diff options
author | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2013-10-09 22:42:09 +0200 |
---|---|---|
committer | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2013-10-10 09:06:58 +0200 |
commit | bceabc95c1c85d793200446fa85f1ddc6313ea29 (patch) | |
tree | 973c8bd8deca9fd69913f2895cc91e0e6114d46c /freebsd/sys/netinet6 | |
parent | Add FreeBSD sources as a submodule (diff) | |
download | rtems-libbsd-bceabc95c1c85d793200446fa85f1ddc6313ea29.tar.bz2 |
Move files to match FreeBSD layout
Diffstat (limited to 'freebsd/sys/netinet6')
49 files changed, 39264 insertions, 0 deletions
diff --git a/freebsd/sys/netinet6/dest6.c b/freebsd/sys/netinet6/dest6.c new file mode 100644 index 00000000..6774dc86 --- /dev/null +++ b/freebsd/sys/netinet6/dest6.c @@ -0,0 +1,125 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: dest6.c,v 1.59 2003/07/11 13:21:16 t-momose Exp $ + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/local/opt_inet.h> +#include <freebsd/local/opt_inet6.h> + +#include <freebsd/sys/param.h> +#include <freebsd/sys/systm.h> +#include <freebsd/sys/malloc.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/domain.h> +#include <freebsd/sys/protosw.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/errno.h> +#include <freebsd/sys/time.h> +#include <freebsd/sys/kernel.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/route.h> + +#include <freebsd/netinet/in.h> +#include <freebsd/netinet/in_var.h> +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet/icmp6.h> + +/* + * Destination options header processing. + */ +int +dest6_input(struct mbuf **mp, int *offp, int proto) +{ + struct mbuf *m = *mp; + int off = *offp, dstoptlen, optlen; + struct ip6_dest *dstopts; + u_int8_t *opt; + + /* validation of the length of the header */ +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, off, sizeof(*dstopts), IPPROTO_DONE); + dstopts = (struct ip6_dest *)(mtod(m, caddr_t) + off); +#else + IP6_EXTHDR_GET(dstopts, struct ip6_dest *, m, off, sizeof(*dstopts)); + if (dstopts == NULL) + return IPPROTO_DONE; +#endif + dstoptlen = (dstopts->ip6d_len + 1) << 3; + +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, off, dstoptlen, IPPROTO_DONE); + dstopts = (struct ip6_dest *)(mtod(m, caddr_t) + off); +#else + IP6_EXTHDR_GET(dstopts, struct ip6_dest *, m, off, dstoptlen); + if (dstopts == NULL) + return IPPROTO_DONE; +#endif + off += dstoptlen; + dstoptlen -= sizeof(struct ip6_dest); + opt = (u_int8_t *)dstopts + sizeof(struct ip6_dest); + + /* search header for all options. */ + for (optlen = 0; dstoptlen > 0; dstoptlen -= optlen, opt += optlen) { + if (*opt != IP6OPT_PAD1 && + (dstoptlen < IP6OPT_MINLEN || *(opt + 1) + 2 > dstoptlen)) { + V_ip6stat.ip6s_toosmall++; + goto bad; + } + + switch (*opt) { + case IP6OPT_PAD1: + optlen = 1; + break; + case IP6OPT_PADN: + optlen = *(opt + 1) + 2; + break; + default: /* unknown option */ + optlen = ip6_unknown_opt(opt, m, + opt - mtod(m, u_int8_t *)); + if (optlen == -1) + return (IPPROTO_DONE); + optlen += 2; + break; + } + } + + *offp = off; + return (dstopts->ip6d_nxt); + + bad: + m_freem(m); + return (IPPROTO_DONE); +} diff --git a/freebsd/sys/netinet6/frag6.c b/freebsd/sys/netinet6/frag6.c new file mode 100644 index 00000000..bdb0f198 --- /dev/null +++ b/freebsd/sys/netinet6/frag6.c @@ -0,0 +1,781 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: frag6.c,v 1.33 2002/01/07 11:34:48 kjc Exp $ + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/sys/param.h> +#include <freebsd/sys/systm.h> +#include <freebsd/sys/malloc.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/domain.h> +#include <freebsd/sys/protosw.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/errno.h> +#include <freebsd/sys/time.h> +#include <freebsd/sys/kernel.h> +#include <freebsd/sys/syslog.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/route.h> +#include <freebsd/net/vnet.h> + +#include <freebsd/netinet/in.h> +#include <freebsd/netinet/in_var.h> +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet/icmp6.h> +#include <freebsd/netinet/in_systm.h> /* for ECN definitions */ +#include <freebsd/netinet/ip.h> /* for ECN definitions */ + +#include <freebsd/security/mac/mac_framework.h> + +/* + * Define it to get a correct behavior on per-interface statistics. + * You will need to perform an extra routing table lookup, per fragment, + * to do it. This may, or may not be, a performance hit. + */ +#define IN6_IFSTAT_STRICT + +static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *); +static void frag6_deq(struct ip6asfrag *); +static void frag6_insque(struct ip6q *, struct ip6q *); +static void frag6_remque(struct ip6q *); +static void frag6_freef(struct ip6q *); + +static struct mtx ip6qlock; +/* + * These fields all protected by ip6qlock. + */ +static VNET_DEFINE(u_int, frag6_nfragpackets); +static VNET_DEFINE(u_int, frag6_nfrags); +static VNET_DEFINE(struct ip6q, ip6q); /* ip6 reassemble queue */ + +#define V_frag6_nfragpackets VNET(frag6_nfragpackets) +#define V_frag6_nfrags VNET(frag6_nfrags) +#define V_ip6q VNET(ip6q) + +#define IP6Q_LOCK_INIT() mtx_init(&ip6qlock, "ip6qlock", NULL, MTX_DEF); +#define IP6Q_LOCK() mtx_lock(&ip6qlock) +#define IP6Q_TRYLOCK() mtx_trylock(&ip6qlock) +#define IP6Q_LOCK_ASSERT() mtx_assert(&ip6qlock, MA_OWNED) +#define IP6Q_UNLOCK() mtx_unlock(&ip6qlock) + +static MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header"); + +/* + * Initialise reassembly queue and fragment identifier. + */ +static void +frag6_change(void *tag) +{ + + V_ip6_maxfragpackets = nmbclusters / 4; + V_ip6_maxfrags = nmbclusters / 4; +} + +void +frag6_init(void) +{ + + V_ip6_maxfragpackets = nmbclusters / 4; + V_ip6_maxfrags = nmbclusters / 4; + V_ip6q.ip6q_next = V_ip6q.ip6q_prev = &V_ip6q; + + if (!IS_DEFAULT_VNET(curvnet)) + return; + + EVENTHANDLER_REGISTER(nmbclusters_change, + frag6_change, NULL, EVENTHANDLER_PRI_ANY); + + IP6Q_LOCK_INIT(); +} + +/* + * In RFC2460, fragment and reassembly rule do not agree with each other, + * in terms of next header field handling in fragment header. + * While the sender will use the same value for all of the fragmented packets, + * receiver is suggested not to check the consistency. + * + * fragment rule (p20): + * (2) A Fragment header containing: + * The Next Header value that identifies the first header of + * the Fragmentable Part of the original packet. + * -> next header field is same for all fragments + * + * reassembly rule (p21): + * The Next Header field of the last header of the Unfragmentable + * Part is obtained from the Next Header field of the first + * fragment's Fragment header. + * -> should grab it from the first fragment only + * + * The following note also contradicts with fragment rule - noone is going to + * send different fragment with different next header field. + * + * additional note (p22): + * The Next Header values in the Fragment headers of different + * fragments of the same original packet may differ. Only the value + * from the Offset zero fragment packet is used for reassembly. + * -> should grab it from the first fragment only + * + * There is no explicit reason given in the RFC. Historical reason maybe? + */ +/* + * Fragment input + */ +int +frag6_input(struct mbuf **mp, int *offp, int proto) +{ + struct mbuf *m = *mp, *t; + struct ip6_hdr *ip6; + struct ip6_frag *ip6f; + struct ip6q *q6; + struct ip6asfrag *af6, *ip6af, *af6dwn; +#ifdef IN6_IFSTAT_STRICT + struct in6_ifaddr *ia; +#endif + int offset = *offp, nxt, i, next; + int first_frag = 0; + int fragoff, frgpartlen; /* must be larger than u_int16_t */ + struct ifnet *dstifp; + u_int8_t ecn, ecn0; +#if 0 + char ip6buf[INET6_ADDRSTRLEN]; +#endif + + ip6 = mtod(m, struct ip6_hdr *); +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), IPPROTO_DONE); + ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset); +#else + IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f)); + if (ip6f == NULL) + return (IPPROTO_DONE); +#endif + + dstifp = NULL; +#ifdef IN6_IFSTAT_STRICT + /* find the destination interface of the packet. */ + if ((ia = ip6_getdstifaddr(m)) != NULL) { + dstifp = ia->ia_ifp; + ifa_free(&ia->ia_ifa); + } +#else + /* we are violating the spec, this is not the destination interface */ + if ((m->m_flags & M_PKTHDR) != 0) + dstifp = m->m_pkthdr.rcvif; +#endif + + /* jumbo payload can't contain a fragment header */ + if (ip6->ip6_plen == 0) { + icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset); + in6_ifstat_inc(dstifp, ifs6_reass_fail); + return IPPROTO_DONE; + } + + /* + * check whether fragment packet's fragment length is + * multiple of 8 octets. + * sizeof(struct ip6_frag) == 8 + * sizeof(struct ip6_hdr) = 40 + */ + if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) && + (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) { + icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, + offsetof(struct ip6_hdr, ip6_plen)); + in6_ifstat_inc(dstifp, ifs6_reass_fail); + return IPPROTO_DONE; + } + + V_ip6stat.ip6s_fragments++; + in6_ifstat_inc(dstifp, ifs6_reass_reqd); + + /* offset now points to data portion */ + offset += sizeof(struct ip6_frag); + + IP6Q_LOCK(); + + /* + * Enforce upper bound on number of fragments. + * If maxfrag is 0, never accept fragments. + * If maxfrag is -1, accept all fragments without limitation. + */ + if (V_ip6_maxfrags < 0) + ; + else if (V_frag6_nfrags >= (u_int)V_ip6_maxfrags) + goto dropfrag; + + for (q6 = V_ip6q.ip6q_next; q6 != &V_ip6q; q6 = q6->ip6q_next) + if (ip6f->ip6f_ident == q6->ip6q_ident && + IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) && + IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst) +#ifdef MAC + && mac_ip6q_match(m, q6) +#endif + ) + break; + + if (q6 == &V_ip6q) { + /* + * the first fragment to arrive, create a reassembly queue. + */ + first_frag = 1; + + /* + * Enforce upper bound on number of fragmented packets + * for which we attempt reassembly; + * If maxfragpackets is 0, never accept fragments. + * If maxfragpackets is -1, accept all fragments without + * limitation. + */ + if (V_ip6_maxfragpackets < 0) + ; + else if (V_frag6_nfragpackets >= (u_int)V_ip6_maxfragpackets) + goto dropfrag; + V_frag6_nfragpackets++; + q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FTABLE, + M_NOWAIT); + if (q6 == NULL) + goto dropfrag; + bzero(q6, sizeof(*q6)); +#ifdef MAC + if (mac_ip6q_init(q6, M_NOWAIT) != 0) { + free(q6, M_FTABLE); + goto dropfrag; + } + mac_ip6q_create(m, q6); +#endif + frag6_insque(q6, &V_ip6q); + + /* ip6q_nxt will be filled afterwards, from 1st fragment */ + q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6; +#ifdef notyet + q6->ip6q_nxtp = (u_char *)nxtp; +#endif + q6->ip6q_ident = ip6f->ip6f_ident; + q6->ip6q_ttl = IPV6_FRAGTTL; + q6->ip6q_src = ip6->ip6_src; + q6->ip6q_dst = ip6->ip6_dst; + q6->ip6q_ecn = + (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; + q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */ + + q6->ip6q_nfrag = 0; + } + + /* + * If it's the 1st fragment, record the length of the + * unfragmentable part and the next header of the fragment header. + */ + fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK); + if (fragoff == 0) { + q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) - + sizeof(struct ip6_frag); + q6->ip6q_nxt = ip6f->ip6f_nxt; + } + + /* + * Check that the reassembled packet would not exceed 65535 bytes + * in size. + * If it would exceed, discard the fragment and return an ICMP error. + */ + frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset; + if (q6->ip6q_unfrglen >= 0) { + /* The 1st fragment has already arrived. */ + if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) { + icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, + offset - sizeof(struct ip6_frag) + + offsetof(struct ip6_frag, ip6f_offlg)); + IP6Q_UNLOCK(); + return (IPPROTO_DONE); + } + } else if (fragoff + frgpartlen > IPV6_MAXPACKET) { + icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, + offset - sizeof(struct ip6_frag) + + offsetof(struct ip6_frag, ip6f_offlg)); + IP6Q_UNLOCK(); + return (IPPROTO_DONE); + } + /* + * If it's the first fragment, do the above check for each + * fragment already stored in the reassembly queue. + */ + if (fragoff == 0) { + for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; + af6 = af6dwn) { + af6dwn = af6->ip6af_down; + + if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen > + IPV6_MAXPACKET) { + struct mbuf *merr = IP6_REASS_MBUF(af6); + struct ip6_hdr *ip6err; + int erroff = af6->ip6af_offset; + + /* dequeue the fragment. */ + frag6_deq(af6); + free(af6, M_FTABLE); + + /* adjust pointer. */ + ip6err = mtod(merr, struct ip6_hdr *); + + /* + * Restore source and destination addresses + * in the erroneous IPv6 header. + */ + ip6err->ip6_src = q6->ip6q_src; + ip6err->ip6_dst = q6->ip6q_dst; + + icmp6_error(merr, ICMP6_PARAM_PROB, + ICMP6_PARAMPROB_HEADER, + erroff - sizeof(struct ip6_frag) + + offsetof(struct ip6_frag, ip6f_offlg)); + } + } + } + + ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FTABLE, + M_NOWAIT); + if (ip6af == NULL) + goto dropfrag; + bzero(ip6af, sizeof(*ip6af)); + ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG; + ip6af->ip6af_off = fragoff; + ip6af->ip6af_frglen = frgpartlen; + ip6af->ip6af_offset = offset; + IP6_REASS_MBUF(ip6af) = m; + + if (first_frag) { + af6 = (struct ip6asfrag *)q6; + goto insert; + } + + /* + * Handle ECN by comparing this segment with the first one; + * if CE is set, do not lose CE. + * drop if CE and not-ECT are mixed for the same packet. + */ + ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; + ecn0 = q6->ip6q_ecn; + if (ecn == IPTOS_ECN_CE) { + if (ecn0 == IPTOS_ECN_NOTECT) { + free(ip6af, M_FTABLE); + goto dropfrag; + } + if (ecn0 != IPTOS_ECN_CE) + q6->ip6q_ecn = IPTOS_ECN_CE; + } + if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) { + free(ip6af, M_FTABLE); + goto dropfrag; + } + + /* + * Find a segment which begins after this one does. + */ + for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; + af6 = af6->ip6af_down) + if (af6->ip6af_off > ip6af->ip6af_off) + break; + +#if 0 + /* + * If there is a preceding segment, it may provide some of + * our data already. If so, drop the data from the incoming + * segment. If it provides all of our data, drop us. + */ + if (af6->ip6af_up != (struct ip6asfrag *)q6) { + i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen + - ip6af->ip6af_off; + if (i > 0) { + if (i >= ip6af->ip6af_frglen) + goto dropfrag; + m_adj(IP6_REASS_MBUF(ip6af), i); + ip6af->ip6af_off += i; + ip6af->ip6af_frglen -= i; + } + } + + /* + * While we overlap succeeding segments trim them or, + * if they are completely covered, dequeue them. + */ + while (af6 != (struct ip6asfrag *)q6 && + ip6af->ip6af_off + ip6af->ip6af_frglen > af6->ip6af_off) { + i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off; + if (i < af6->ip6af_frglen) { + af6->ip6af_frglen -= i; + af6->ip6af_off += i; + m_adj(IP6_REASS_MBUF(af6), i); + break; + } + af6 = af6->ip6af_down; + m_freem(IP6_REASS_MBUF(af6->ip6af_up)); + frag6_deq(af6->ip6af_up); + } +#else + /* + * If the incoming framgent overlaps some existing fragments in + * the reassembly queue, drop it, since it is dangerous to override + * existing fragments from a security point of view. + * We don't know which fragment is the bad guy - here we trust + * fragment that came in earlier, with no real reason. + * + * Note: due to changes after disabling this part, mbuf passed to + * m_adj() below now does not meet the requirement. + */ + if (af6->ip6af_up != (struct ip6asfrag *)q6) { + i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen + - ip6af->ip6af_off; + if (i > 0) { +#if 0 /* suppress the noisy log */ + log(LOG_ERR, "%d bytes of a fragment from %s " + "overlaps the previous fragment\n", + i, ip6_sprintf(ip6buf, &q6->ip6q_src)); +#endif + free(ip6af, M_FTABLE); + goto dropfrag; + } + } + if (af6 != (struct ip6asfrag *)q6) { + i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off; + if (i > 0) { +#if 0 /* suppress the noisy log */ + log(LOG_ERR, "%d bytes of a fragment from %s " + "overlaps the succeeding fragment", + i, ip6_sprintf(ip6buf, &q6->ip6q_src)); +#endif + free(ip6af, M_FTABLE); + goto dropfrag; + } + } +#endif + +insert: +#ifdef MAC + if (!first_frag) + mac_ip6q_update(m, q6); +#endif + + /* + * Stick new segment in its place; + * check for complete reassembly. + * Move to front of packet queue, as we are + * the most recently active fragmented packet. + */ + frag6_enq(ip6af, af6->ip6af_up); + V_frag6_nfrags++; + q6->ip6q_nfrag++; +#if 0 /* xxx */ + if (q6 != V_ip6q.ip6q_next) { + frag6_remque(q6); + frag6_insque(q6, &V_ip6q); + } +#endif + next = 0; + for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; + af6 = af6->ip6af_down) { + if (af6->ip6af_off != next) { + IP6Q_UNLOCK(); + return IPPROTO_DONE; + } + next += af6->ip6af_frglen; + } + if (af6->ip6af_up->ip6af_mff) { + IP6Q_UNLOCK(); + return IPPROTO_DONE; + } + + /* + * Reassembly is complete; concatenate fragments. + */ + ip6af = q6->ip6q_down; + t = m = IP6_REASS_MBUF(ip6af); + af6 = ip6af->ip6af_down; + frag6_deq(ip6af); + while (af6 != (struct ip6asfrag *)q6) { + af6dwn = af6->ip6af_down; + frag6_deq(af6); + while (t->m_next) + t = t->m_next; + t->m_next = IP6_REASS_MBUF(af6); + m_adj(t->m_next, af6->ip6af_offset); + free(af6, M_FTABLE); + af6 = af6dwn; + } + + /* adjust offset to point where the original next header starts */ + offset = ip6af->ip6af_offset - sizeof(struct ip6_frag); + free(ip6af, M_FTABLE); + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr)); + if (q6->ip6q_ecn == IPTOS_ECN_CE) + ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20); + nxt = q6->ip6q_nxt; +#ifdef notyet + *q6->ip6q_nxtp = (u_char)(nxt & 0xff); +#endif + + /* Delete frag6 header */ + if (m->m_len >= offset + sizeof(struct ip6_frag)) { + /* This is the only possible case with !PULLDOWN_TEST */ + ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag), + offset); + m->m_data += sizeof(struct ip6_frag); + m->m_len -= sizeof(struct ip6_frag); + } else { + /* this comes with no copy if the boundary is on cluster */ + if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) { + frag6_remque(q6); + V_frag6_nfrags -= q6->ip6q_nfrag; +#ifdef MAC + mac_ip6q_destroy(q6); +#endif + free(q6, M_FTABLE); + V_frag6_nfragpackets--; + goto dropfrag; + } + m_adj(t, sizeof(struct ip6_frag)); + m_cat(m, t); + } + + /* + * Store NXT to the original. + */ + { + char *prvnxtp = ip6_get_prevhdr(m, offset); /* XXX */ + *prvnxtp = nxt; + } + + frag6_remque(q6); + V_frag6_nfrags -= q6->ip6q_nfrag; +#ifdef MAC + mac_ip6q_reassemble(q6, m); + mac_ip6q_destroy(q6); +#endif + free(q6, M_FTABLE); + V_frag6_nfragpackets--; + + if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */ + int plen = 0; + for (t = m; t; t = t->m_next) + plen += t->m_len; + m->m_pkthdr.len = plen; + } + + V_ip6stat.ip6s_reassembled++; + in6_ifstat_inc(dstifp, ifs6_reass_ok); + + /* + * Tell launch routine the next header + */ + + *mp = m; + *offp = offset; + + IP6Q_UNLOCK(); + return nxt; + + dropfrag: + IP6Q_UNLOCK(); + in6_ifstat_inc(dstifp, ifs6_reass_fail); + V_ip6stat.ip6s_fragdropped++; + m_freem(m); + return IPPROTO_DONE; +} + +/* + * Free a fragment reassembly header and all + * associated datagrams. + */ +void +frag6_freef(struct ip6q *q6) +{ + struct ip6asfrag *af6, *down6; + + IP6Q_LOCK_ASSERT(); + + for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; + af6 = down6) { + struct mbuf *m = IP6_REASS_MBUF(af6); + + down6 = af6->ip6af_down; + frag6_deq(af6); + + /* + * Return ICMP time exceeded error for the 1st fragment. + * Just free other fragments. + */ + if (af6->ip6af_off == 0) { + struct ip6_hdr *ip6; + + /* adjust pointer */ + ip6 = mtod(m, struct ip6_hdr *); + + /* restore source and destination addresses */ + ip6->ip6_src = q6->ip6q_src; + ip6->ip6_dst = q6->ip6q_dst; + + icmp6_error(m, ICMP6_TIME_EXCEEDED, + ICMP6_TIME_EXCEED_REASSEMBLY, 0); + } else + m_freem(m); + free(af6, M_FTABLE); + } + frag6_remque(q6); + V_frag6_nfrags -= q6->ip6q_nfrag; +#ifdef MAC + mac_ip6q_destroy(q6); +#endif + free(q6, M_FTABLE); + V_frag6_nfragpackets--; +} + +/* + * Put an ip fragment on a reassembly chain. + * Like insque, but pointers in middle of structure. + */ +void +frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6) +{ + + IP6Q_LOCK_ASSERT(); + + af6->ip6af_up = up6; + af6->ip6af_down = up6->ip6af_down; + up6->ip6af_down->ip6af_up = af6; + up6->ip6af_down = af6; +} + +/* + * To frag6_enq as remque is to insque. + */ +void +frag6_deq(struct ip6asfrag *af6) +{ + + IP6Q_LOCK_ASSERT(); + + af6->ip6af_up->ip6af_down = af6->ip6af_down; + af6->ip6af_down->ip6af_up = af6->ip6af_up; +} + +void +frag6_insque(struct ip6q *new, struct ip6q *old) +{ + + IP6Q_LOCK_ASSERT(); + + new->ip6q_prev = old; + new->ip6q_next = old->ip6q_next; + old->ip6q_next->ip6q_prev= new; + old->ip6q_next = new; +} + +void +frag6_remque(struct ip6q *p6) +{ + + IP6Q_LOCK_ASSERT(); + + p6->ip6q_prev->ip6q_next = p6->ip6q_next; + p6->ip6q_next->ip6q_prev = p6->ip6q_prev; +} + +/* + * IPv6 reassembling timer processing; + * if a timer expires on a reassembly + * queue, discard it. + */ +void +frag6_slowtimo(void) +{ + VNET_ITERATOR_DECL(vnet_iter); + struct ip6q *q6; + + VNET_LIST_RLOCK_NOSLEEP(); + IP6Q_LOCK(); + VNET_FOREACH(vnet_iter) { + CURVNET_SET(vnet_iter); + q6 = V_ip6q.ip6q_next; + if (q6) + while (q6 != &V_ip6q) { + --q6->ip6q_ttl; + q6 = q6->ip6q_next; + if (q6->ip6q_prev->ip6q_ttl == 0) { + V_ip6stat.ip6s_fragtimeout++; + /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ + frag6_freef(q6->ip6q_prev); + } + } + /* + * If we are over the maximum number of fragments + * (due to the limit being lowered), drain off + * enough to get down to the new limit. + */ + while (V_frag6_nfragpackets > (u_int)V_ip6_maxfragpackets && + V_ip6q.ip6q_prev) { + V_ip6stat.ip6s_fragoverflow++; + /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ + frag6_freef(V_ip6q.ip6q_prev); + } + CURVNET_RESTORE(); + } + IP6Q_UNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); +} + +/* + * Drain off all datagram fragments. + */ +void +frag6_drain(void) +{ + VNET_ITERATOR_DECL(vnet_iter); + + VNET_LIST_RLOCK_NOSLEEP(); + if (IP6Q_TRYLOCK() == 0) { + VNET_LIST_RUNLOCK_NOSLEEP(); + return; + } + VNET_FOREACH(vnet_iter) { + CURVNET_SET(vnet_iter); + while (V_ip6q.ip6q_next != &V_ip6q) { + V_ip6stat.ip6s_fragdropped++; + /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ + frag6_freef(V_ip6q.ip6q_next); + } + CURVNET_RESTORE(); + } + IP6Q_UNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); +} diff --git a/freebsd/sys/netinet6/icmp6.c b/freebsd/sys/netinet6/icmp6.c new file mode 100644 index 00000000..3df7f7b1 --- /dev/null +++ b/freebsd/sys/netinet6/icmp6.c @@ -0,0 +1,2857 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: icmp6.c,v 1.211 2001/04/04 05:56:20 itojun Exp $ + */ + +/*- + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/local/opt_inet.h> +#include <freebsd/local/opt_inet6.h> +#include <freebsd/local/opt_ipsec.h> + +#include <freebsd/sys/param.h> +#include <freebsd/sys/domain.h> +#include <freebsd/sys/jail.h> +#include <freebsd/sys/kernel.h> +#include <freebsd/sys/lock.h> +#include <freebsd/sys/malloc.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/proc.h> +#include <freebsd/sys/protosw.h> +#include <freebsd/sys/signalvar.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/socketvar.h> +#include <freebsd/sys/sx.h> +#include <freebsd/sys/syslog.h> +#include <freebsd/sys/systm.h> +#include <freebsd/sys/time.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/if_dl.h> +#include <freebsd/net/if_llatbl.h> +#include <freebsd/net/if_types.h> +#include <freebsd/net/route.h> +#include <freebsd/net/vnet.h> + +#include <freebsd/netinet/in.h> +#include <freebsd/netinet/in_pcb.h> +#include <freebsd/netinet/in_var.h> +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet/icmp6.h> +#include <freebsd/netinet/tcp_var.h> + +#include <freebsd/netinet6/in6_ifattach.h> +#include <freebsd/netinet6/in6_pcb.h> +#include <freebsd/netinet6/ip6protosw.h> +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet6/scope6_var.h> +#include <freebsd/netinet6/mld6_var.h> +#include <freebsd/netinet6/nd6.h> + +#ifdef IPSEC +#include <freebsd/netipsec/ipsec.h> +#include <freebsd/netipsec/key.h> +#endif + +extern struct domain inet6domain; + +VNET_DEFINE(struct icmp6stat, icmp6stat); + +VNET_DECLARE(struct inpcbinfo, ripcbinfo); +VNET_DECLARE(struct inpcbhead, ripcb); +VNET_DECLARE(int, icmp6errppslim); +static VNET_DEFINE(int, icmp6errpps_count) = 0; +static VNET_DEFINE(struct timeval, icmp6errppslim_last); +VNET_DECLARE(int, icmp6_nodeinfo); + +#define V_ripcbinfo VNET(ripcbinfo) +#define V_ripcb VNET(ripcb) +#define V_icmp6errppslim VNET(icmp6errppslim) +#define V_icmp6errpps_count VNET(icmp6errpps_count) +#define V_icmp6errppslim_last VNET(icmp6errppslim_last) +#define V_icmp6_nodeinfo VNET(icmp6_nodeinfo) + +static void icmp6_errcount(struct icmp6errstat *, int, int); +static int icmp6_rip6_input(struct mbuf **, int); +static int icmp6_ratelimit(const struct in6_addr *, const int, const int); +static const char *icmp6_redirect_diag __P((struct in6_addr *, + struct in6_addr *, struct in6_addr *)); +static struct mbuf *ni6_input(struct mbuf *, int); +static struct mbuf *ni6_nametodns(const char *, int, int); +static int ni6_dnsmatch(const char *, int, const char *, int); +static int ni6_addrs __P((struct icmp6_nodeinfo *, struct mbuf *, + struct ifnet **, struct in6_addr *)); +static int ni6_store_addrs __P((struct icmp6_nodeinfo *, struct icmp6_nodeinfo *, + struct ifnet *, int)); +static int icmp6_notify_error(struct mbuf **, int, int, int); + +/* + * Kernel module interface for updating icmp6stat. The argument is an index + * into icmp6stat treated as an array of u_quad_t. While this encodes the + * general layout of icmp6stat into the caller, it doesn't encode its + * location, so that future changes to add, for example, per-CPU stats + * support won't cause binary compatibility problems for kernel modules. + */ +void +kmod_icmp6stat_inc(int statnum) +{ + + (*((u_quad_t *)&V_icmp6stat + statnum))++; +} + +static void +icmp6_errcount(struct icmp6errstat *stat, int type, int code) +{ + switch (type) { + case ICMP6_DST_UNREACH: + switch (code) { + case ICMP6_DST_UNREACH_NOROUTE: + stat->icp6errs_dst_unreach_noroute++; + return; + case ICMP6_DST_UNREACH_ADMIN: + stat->icp6errs_dst_unreach_admin++; + return; + case ICMP6_DST_UNREACH_BEYONDSCOPE: + stat->icp6errs_dst_unreach_beyondscope++; + return; + case ICMP6_DST_UNREACH_ADDR: + stat->icp6errs_dst_unreach_addr++; + return; + case ICMP6_DST_UNREACH_NOPORT: + stat->icp6errs_dst_unreach_noport++; + return; + } + break; + case ICMP6_PACKET_TOO_BIG: + stat->icp6errs_packet_too_big++; + return; + case ICMP6_TIME_EXCEEDED: + switch (code) { + case ICMP6_TIME_EXCEED_TRANSIT: + stat->icp6errs_time_exceed_transit++; + return; + case ICMP6_TIME_EXCEED_REASSEMBLY: + stat->icp6errs_time_exceed_reassembly++; + return; + } + break; + case ICMP6_PARAM_PROB: + switch (code) { + case ICMP6_PARAMPROB_HEADER: + stat->icp6errs_paramprob_header++; + return; + case ICMP6_PARAMPROB_NEXTHEADER: + stat->icp6errs_paramprob_nextheader++; + return; + case ICMP6_PARAMPROB_OPTION: + stat->icp6errs_paramprob_option++; + return; + } + break; + case ND_REDIRECT: + stat->icp6errs_redirect++; + return; + } + stat->icp6errs_unknown++; +} + +/* + * A wrapper function for icmp6_error() necessary when the erroneous packet + * may not contain enough scope zone information. + */ +void +icmp6_error2(struct mbuf *m, int type, int code, int param, + struct ifnet *ifp) +{ + struct ip6_hdr *ip6; + + if (ifp == NULL) + return; + +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), ); +#else + if (m->m_len < sizeof(struct ip6_hdr)) { + m = m_pullup(m, sizeof(struct ip6_hdr)); + if (m == NULL) + return; + } +#endif + + ip6 = mtod(m, struct ip6_hdr *); + + if (in6_setscope(&ip6->ip6_src, ifp, NULL) != 0) + return; + if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0) + return; + + icmp6_error(m, type, code, param); +} + +/* + * Generate an error packet of type error in response to bad IP6 packet. + */ +void +icmp6_error(struct mbuf *m, int type, int code, int param) +{ + struct ip6_hdr *oip6, *nip6; + struct icmp6_hdr *icmp6; + u_int preplen; + int off; + int nxt; + + ICMP6STAT_INC(icp6s_error); + + /* count per-type-code statistics */ + icmp6_errcount(&V_icmp6stat.icp6s_outerrhist, type, code); + +#ifdef M_DECRYPTED /*not openbsd*/ + if (m->m_flags & M_DECRYPTED) { + ICMP6STAT_INC(icp6s_canterror); + goto freeit; + } +#endif + +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), ); +#else + if (m->m_len < sizeof(struct ip6_hdr)) { + m = m_pullup(m, sizeof(struct ip6_hdr)); + if (m == NULL) + return; + } +#endif + oip6 = mtod(m, struct ip6_hdr *); + + /* + * If the destination address of the erroneous packet is a multicast + * address, or the packet was sent using link-layer multicast, + * we should basically suppress sending an error (RFC 2463, Section + * 2.4). + * We have two exceptions (the item e.2 in that section): + * - the Packet Too Big message can be sent for path MTU discovery. + * - the Parameter Problem Message that can be allowed an icmp6 error + * in the option type field. This check has been done in + * ip6_unknown_opt(), so we can just check the type and code. + */ + if ((m->m_flags & (M_BCAST|M_MCAST) || + IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) && + (type != ICMP6_PACKET_TOO_BIG && + (type != ICMP6_PARAM_PROB || + code != ICMP6_PARAMPROB_OPTION))) + goto freeit; + + /* + * RFC 2463, 2.4 (e.5): source address check. + * XXX: the case of anycast source? + */ + if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) || + IN6_IS_ADDR_MULTICAST(&oip6->ip6_src)) + goto freeit; + + /* + * If we are about to send ICMPv6 against ICMPv6 error/redirect, + * don't do it. + */ + nxt = -1; + off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt); + if (off >= 0 && nxt == IPPROTO_ICMPV6) { + struct icmp6_hdr *icp; + +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, 0, off + sizeof(struct icmp6_hdr), ); + icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); +#else + IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off, + sizeof(*icp)); + if (icp == NULL) { + ICMP6STAT_INC(icp6s_tooshort); + return; + } +#endif + if (icp->icmp6_type < ICMP6_ECHO_REQUEST || + icp->icmp6_type == ND_REDIRECT) { + /* + * ICMPv6 error + * Special case: for redirect (which is + * informational) we must not send icmp6 error. + */ + ICMP6STAT_INC(icp6s_canterror); + goto freeit; + } else { + /* ICMPv6 informational - send the error */ + } + } else { + /* non-ICMPv6 - send the error */ + } + + oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */ + + /* Finally, do rate limitation check. */ + if (icmp6_ratelimit(&oip6->ip6_src, type, code)) { + ICMP6STAT_INC(icp6s_toofreq); + goto freeit; + } + + /* + * OK, ICMP6 can be generated. + */ + + if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN) + m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len); + + preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); + M_PREPEND(m, preplen, M_DONTWAIT); + if (m && m->m_len < preplen) + m = m_pullup(m, preplen); + if (m == NULL) { + nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__)); + return; + } + + nip6 = mtod(m, struct ip6_hdr *); + nip6->ip6_src = oip6->ip6_src; + nip6->ip6_dst = oip6->ip6_dst; + + in6_clearscope(&oip6->ip6_src); + in6_clearscope(&oip6->ip6_dst); + + icmp6 = (struct icmp6_hdr *)(nip6 + 1); + icmp6->icmp6_type = type; + icmp6->icmp6_code = code; + icmp6->icmp6_pptr = htonl((u_int32_t)param); + + /* + * icmp6_reflect() is designed to be in the input path. + * icmp6_error() can be called from both input and output path, + * and if we are in output path rcvif could contain bogus value. + * clear m->m_pkthdr.rcvif for safety, we should have enough scope + * information in ip header (nip6). + */ + m->m_pkthdr.rcvif = NULL; + + ICMP6STAT_INC(icp6s_outhist[type]); + icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */ + + return; + + freeit: + /* + * If we can't tell whether or not we can generate ICMP6, free it. + */ + m_freem(m); +} + +/* + * Process a received ICMP6 message. + */ +int +icmp6_input(struct mbuf **mp, int *offp, int proto) +{ + struct mbuf *m = *mp, *n; + struct ifnet *ifp; + struct ip6_hdr *ip6, *nip6; + struct icmp6_hdr *icmp6, *nicmp6; + int off = *offp; + int icmp6len = m->m_pkthdr.len - *offp; + int code, sum, noff; + char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; + + ifp = m->m_pkthdr.rcvif; + +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), IPPROTO_DONE); + /* m might change if M_LOOP. So, call mtod after this */ +#endif + + /* + * Locate icmp6 structure in mbuf, and check + * that not corrupted and of at least minimum length + */ + + ip6 = mtod(m, struct ip6_hdr *); + if (icmp6len < sizeof(struct icmp6_hdr)) { + ICMP6STAT_INC(icp6s_tooshort); + goto freeit; + } + + /* + * Check multicast group membership. + * Note: SSM filters are not applied for ICMPv6 traffic. + */ + if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + struct in6_multi *inm; + + inm = in6m_lookup(ifp, &ip6->ip6_dst); + if (inm == NULL) { + IP6STAT_INC(ip6s_notmember); + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); + goto freeit; + } + } + + /* + * calculate the checksum + */ +#ifndef PULLDOWN_TEST + icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off); +#else + IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6)); + if (icmp6 == NULL) { + ICMP6STAT_INC(icp6s_tooshort); + return IPPROTO_DONE; + } +#endif + code = icmp6->icmp6_code; + + if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) { + nd6log((LOG_ERR, + "ICMP6 checksum error(%d|%x) %s\n", + icmp6->icmp6_type, sum, + ip6_sprintf(ip6bufs, &ip6->ip6_src))); + ICMP6STAT_INC(icp6s_checksum); + goto freeit; + } + + if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) { + /* + * Deliver very specific ICMP6 type only. + * This is important to deliver TOOBIG. Otherwise PMTUD + * will not work. + */ + switch (icmp6->icmp6_type) { + case ICMP6_DST_UNREACH: + case ICMP6_PACKET_TOO_BIG: + case ICMP6_TIME_EXCEEDED: + break; + default: + goto freeit; + } + } + + ICMP6STAT_INC(icp6s_inhist[icmp6->icmp6_type]); + icmp6_ifstat_inc(ifp, ifs6_in_msg); + if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK) + icmp6_ifstat_inc(ifp, ifs6_in_error); + + switch (icmp6->icmp6_type) { + case ICMP6_DST_UNREACH: + icmp6_ifstat_inc(ifp, ifs6_in_dstunreach); + switch (code) { + case ICMP6_DST_UNREACH_NOROUTE: + code = PRC_UNREACH_NET; + break; + case ICMP6_DST_UNREACH_ADMIN: + icmp6_ifstat_inc(ifp, ifs6_in_adminprohib); + code = PRC_UNREACH_PROTOCOL; /* is this a good code? */ + break; + case ICMP6_DST_UNREACH_ADDR: + code = PRC_HOSTDEAD; + break; + case ICMP6_DST_UNREACH_BEYONDSCOPE: + /* I mean "source address was incorrect." */ + code = PRC_PARAMPROB; + break; + case ICMP6_DST_UNREACH_NOPORT: + code = PRC_UNREACH_PORT; + break; + default: + goto badcode; + } + goto deliver; + break; + + case ICMP6_PACKET_TOO_BIG: + icmp6_ifstat_inc(ifp, ifs6_in_pkttoobig); + + /* validation is made in icmp6_mtudisc_update */ + + code = PRC_MSGSIZE; + + /* + * Updating the path MTU will be done after examining + * intermediate extension headers. + */ + goto deliver; + break; + + case ICMP6_TIME_EXCEEDED: + icmp6_ifstat_inc(ifp, ifs6_in_timeexceed); + switch (code) { + case ICMP6_TIME_EXCEED_TRANSIT: + code = PRC_TIMXCEED_INTRANS; + break; + case ICMP6_TIME_EXCEED_REASSEMBLY: + code = PRC_TIMXCEED_REASS; + break; + default: + goto badcode; + } + goto deliver; + break; + + case ICMP6_PARAM_PROB: + icmp6_ifstat_inc(ifp, ifs6_in_paramprob); + switch (code) { + case ICMP6_PARAMPROB_NEXTHEADER: + code = PRC_UNREACH_PROTOCOL; + break; + case ICMP6_PARAMPROB_HEADER: + case ICMP6_PARAMPROB_OPTION: + code = PRC_PARAMPROB; + break; + default: + goto badcode; + } + goto deliver; + break; + + case ICMP6_ECHO_REQUEST: + icmp6_ifstat_inc(ifp, ifs6_in_echo); + if (code != 0) + goto badcode; + if ((n = m_copy(m, 0, M_COPYALL)) == NULL) { + /* Give up remote */ + break; + } + if ((n->m_flags & M_EXT) != 0 + || n->m_len < off + sizeof(struct icmp6_hdr)) { + struct mbuf *n0 = n; + const int maxlen = sizeof(*nip6) + sizeof(*nicmp6); + int n0len; + + MGETHDR(n, M_DONTWAIT, n0->m_type); + n0len = n0->m_pkthdr.len; /* save for use below */ + if (n) + M_MOVE_PKTHDR(n, n0); + if (n && maxlen >= MHLEN) { + MCLGET(n, M_DONTWAIT); + if ((n->m_flags & M_EXT) == 0) { + m_free(n); + n = NULL; + } + } + if (n == NULL) { + /* Give up remote */ + m_freem(n0); + break; + } + /* + * Copy IPv6 and ICMPv6 only. + */ + nip6 = mtod(n, struct ip6_hdr *); + bcopy(ip6, nip6, sizeof(struct ip6_hdr)); + nicmp6 = (struct icmp6_hdr *)(nip6 + 1); + bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr)); + noff = sizeof(struct ip6_hdr); + /* new mbuf contains only ipv6+icmpv6 headers */ + n->m_len = noff + sizeof(struct icmp6_hdr); + /* + * Adjust mbuf. ip6_plen will be adjusted in + * ip6_output(). + */ + m_adj(n0, off + sizeof(struct icmp6_hdr)); + /* recalculate complete packet size */ + n->m_pkthdr.len = n0len + (noff - off); + n->m_next = n0; + } else { + nip6 = mtod(n, struct ip6_hdr *); + IP6_EXTHDR_GET(nicmp6, struct icmp6_hdr *, n, off, + sizeof(*nicmp6)); + noff = off; + } + nicmp6->icmp6_type = ICMP6_ECHO_REPLY; + nicmp6->icmp6_code = 0; + if (n) { + ICMP6STAT_INC(icp6s_reflect); + ICMP6STAT_INC(icp6s_outhist[ICMP6_ECHO_REPLY]); + icmp6_reflect(n, noff); + } + break; + + case ICMP6_ECHO_REPLY: + icmp6_ifstat_inc(ifp, ifs6_in_echoreply); + if (code != 0) + goto badcode; + break; + + case MLD_LISTENER_QUERY: + case MLD_LISTENER_REPORT: + case MLD_LISTENER_DONE: + case MLDV2_LISTENER_REPORT: + /* + * Drop MLD traffic which is not link-local, has a hop limit + * of greater than 1 hop, or which does not have the + * IPv6 HBH Router Alert option. + * As IPv6 HBH options are stripped in ip6_input() we must + * check an mbuf header flag. + * XXX Should we also sanity check that these messages + * were directed to a link-local multicast prefix? + */ + if ((ip6->ip6_hlim != 1) || (m->m_flags & M_RTALERT_MLD) == 0) + goto freeit; + if (mld_input(m, off, icmp6len) != 0) + return (IPPROTO_DONE); + /* m stays. */ + break; + + case ICMP6_WRUREQUEST: /* ICMP6_FQDN_QUERY */ + { + enum { WRU, FQDN } mode; + + if (!V_icmp6_nodeinfo) + break; + + if (icmp6len == sizeof(struct icmp6_hdr) + 4) + mode = WRU; + else if (icmp6len >= sizeof(struct icmp6_nodeinfo)) + mode = FQDN; + else + goto badlen; + + if (mode == FQDN) { +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo), + IPPROTO_DONE); +#endif + n = m_copy(m, 0, M_COPYALL); + if (n) + n = ni6_input(n, off); + /* XXX meaningless if n == NULL */ + noff = sizeof(struct ip6_hdr); + } else { + struct prison *pr; + u_char *p; + int maxlen, maxhlen, hlen; + + /* + * XXX: this combination of flags is pointless, + * but should we keep this for compatibility? + */ + if ((V_icmp6_nodeinfo & 5) != 5) + break; + + if (code != 0) + goto badcode; + maxlen = sizeof(*nip6) + sizeof(*nicmp6) + 4; + if (maxlen >= MCLBYTES) { + /* Give up remote */ + break; + } + MGETHDR(n, M_DONTWAIT, m->m_type); + if (n && maxlen > MHLEN) { + MCLGET(n, M_DONTWAIT); + if ((n->m_flags & M_EXT) == 0) { + m_free(n); + n = NULL; + } + } + if (n && !m_dup_pkthdr(n, m, M_DONTWAIT)) { + /* + * Previous code did a blind M_COPY_PKTHDR + * and said "just for rcvif". If true, then + * we could tolerate the dup failing (due to + * the deep copy of the tag chain). For now + * be conservative and just fail. + */ + m_free(n); + n = NULL; + } + if (n == NULL) { + /* Give up remote */ + break; + } + n->m_pkthdr.rcvif = NULL; + n->m_len = 0; + maxhlen = M_TRAILINGSPACE(n) - maxlen; + pr = curthread->td_ucred->cr_prison; + mtx_lock(&pr->pr_mtx); + hlen = strlen(pr->pr_hostname); + if (maxhlen > hlen) + maxhlen = hlen; + /* + * Copy IPv6 and ICMPv6 only. + */ + nip6 = mtod(n, struct ip6_hdr *); + bcopy(ip6, nip6, sizeof(struct ip6_hdr)); + nicmp6 = (struct icmp6_hdr *)(nip6 + 1); + bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr)); + p = (u_char *)(nicmp6 + 1); + bzero(p, 4); + /* meaningless TTL */ + bcopy(pr->pr_hostname, p + 4, maxhlen); + mtx_unlock(&pr->pr_mtx); + noff = sizeof(struct ip6_hdr); + n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + + sizeof(struct icmp6_hdr) + 4 + maxhlen; + nicmp6->icmp6_type = ICMP6_WRUREPLY; + nicmp6->icmp6_code = 0; + } + if (n) { + ICMP6STAT_INC(icp6s_reflect); + ICMP6STAT_INC(icp6s_outhist[ICMP6_WRUREPLY]); + icmp6_reflect(n, noff); + } + break; + } + + case ICMP6_WRUREPLY: + if (code != 0) + goto badcode; + break; + + case ND_ROUTER_SOLICIT: + icmp6_ifstat_inc(ifp, ifs6_in_routersolicit); + if (code != 0) + goto badcode; + if (icmp6len < sizeof(struct nd_router_solicit)) + goto badlen; + if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { + /* give up local */ + nd6_rs_input(m, off, icmp6len); + m = NULL; + goto freeit; + } + nd6_rs_input(n, off, icmp6len); + /* m stays. */ + break; + + case ND_ROUTER_ADVERT: + icmp6_ifstat_inc(ifp, ifs6_in_routeradvert); + if (code != 0) + goto badcode; + if (icmp6len < sizeof(struct nd_router_advert)) + goto badlen; + if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { + /* give up local */ + nd6_ra_input(m, off, icmp6len); + m = NULL; + goto freeit; + } + nd6_ra_input(n, off, icmp6len); + /* m stays. */ + break; + + case ND_NEIGHBOR_SOLICIT: + icmp6_ifstat_inc(ifp, ifs6_in_neighborsolicit); + if (code != 0) + goto badcode; + if (icmp6len < sizeof(struct nd_neighbor_solicit)) + goto badlen; + if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { + /* give up local */ + nd6_ns_input(m, off, icmp6len); + m = NULL; + goto freeit; + } + nd6_ns_input(n, off, icmp6len); + /* m stays. */ + break; + + case ND_NEIGHBOR_ADVERT: + icmp6_ifstat_inc(ifp, ifs6_in_neighboradvert); + if (code != 0) + goto badcode; + if (icmp6len < sizeof(struct nd_neighbor_advert)) + goto badlen; + if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { + /* give up local */ + nd6_na_input(m, off, icmp6len); + m = NULL; + goto freeit; + } + nd6_na_input(n, off, icmp6len); + /* m stays. */ + break; + + case ND_REDIRECT: + icmp6_ifstat_inc(ifp, ifs6_in_redirect); + if (code != 0) + goto badcode; + if (icmp6len < sizeof(struct nd_redirect)) + goto badlen; + if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { + /* give up local */ + icmp6_redirect_input(m, off); + m = NULL; + goto freeit; + } + icmp6_redirect_input(n, off); + /* m stays. */ + break; + + case ICMP6_ROUTER_RENUMBERING: + if (code != ICMP6_ROUTER_RENUMBERING_COMMAND && + code != ICMP6_ROUTER_RENUMBERING_RESULT) + goto badcode; + if (icmp6len < sizeof(struct icmp6_router_renum)) + goto badlen; + break; + + default: + nd6log((LOG_DEBUG, + "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n", + icmp6->icmp6_type, ip6_sprintf(ip6bufs, &ip6->ip6_src), + ip6_sprintf(ip6bufd, &ip6->ip6_dst), + ifp ? ifp->if_index : 0)); + if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) { + /* ICMPv6 error: MUST deliver it by spec... */ + code = PRC_NCMDS; + /* deliver */ + } else { + /* ICMPv6 informational: MUST not deliver */ + break; + } + deliver: + if (icmp6_notify_error(&m, off, icmp6len, code) != 0) { + /* In this case, m should've been freed. */ + return (IPPROTO_DONE); + } + break; + + badcode: + ICMP6STAT_INC(icp6s_badcode); + break; + + badlen: + ICMP6STAT_INC(icp6s_badlen); + break; + } + + /* deliver the packet to appropriate sockets */ + icmp6_rip6_input(&m, *offp); + + return IPPROTO_DONE; + + freeit: + m_freem(m); + return IPPROTO_DONE; +} + +static int +icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code) +{ + struct mbuf *m = *mp; + struct icmp6_hdr *icmp6; + struct ip6_hdr *eip6; + u_int32_t notifymtu; + struct sockaddr_in6 icmp6src, icmp6dst; + + if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) { + ICMP6STAT_INC(icp6s_tooshort); + goto freeit; + } +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, off, + sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr), -1); + icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); +#else + IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, + sizeof(*icmp6) + sizeof(struct ip6_hdr)); + if (icmp6 == NULL) { + ICMP6STAT_INC(icp6s_tooshort); + return (-1); + } +#endif + eip6 = (struct ip6_hdr *)(icmp6 + 1); + + /* Detect the upper level protocol */ + { + void (*ctlfunc)(int, struct sockaddr *, void *); + u_int8_t nxt = eip6->ip6_nxt; + int eoff = off + sizeof(struct icmp6_hdr) + + sizeof(struct ip6_hdr); + struct ip6ctlparam ip6cp; + struct in6_addr *finaldst = NULL; + int icmp6type = icmp6->icmp6_type; + struct ip6_frag *fh; + struct ip6_rthdr *rth; + struct ip6_rthdr0 *rth0; + int rthlen; + + while (1) { /* XXX: should avoid infinite loop explicitly? */ + struct ip6_ext *eh; + + switch (nxt) { + case IPPROTO_HOPOPTS: + case IPPROTO_DSTOPTS: + case IPPROTO_AH: +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, 0, + eoff + sizeof(struct ip6_ext), -1); + eh = (struct ip6_ext *)(mtod(m, caddr_t) + eoff); +#else + IP6_EXTHDR_GET(eh, struct ip6_ext *, m, + eoff, sizeof(*eh)); + if (eh == NULL) { + ICMP6STAT_INC(icp6s_tooshort); + return (-1); + } +#endif + + if (nxt == IPPROTO_AH) + eoff += (eh->ip6e_len + 2) << 2; + else + eoff += (eh->ip6e_len + 1) << 3; + nxt = eh->ip6e_nxt; + break; + case IPPROTO_ROUTING: + /* + * When the erroneous packet contains a + * routing header, we should examine the + * header to determine the final destination. + * Otherwise, we can't properly update + * information that depends on the final + * destination (e.g. path MTU). + */ +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(*rth), -1); + rth = (struct ip6_rthdr *) + (mtod(m, caddr_t) + eoff); +#else + IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m, + eoff, sizeof(*rth)); + if (rth == NULL) { + ICMP6STAT_INC(icp6s_tooshort); + return (-1); + } +#endif + rthlen = (rth->ip6r_len + 1) << 3; + /* + * XXX: currently there is no + * officially defined type other + * than type-0. + * Note that if the segment left field + * is 0, all intermediate hops must + * have been passed. + */ + if (rth->ip6r_segleft && + rth->ip6r_type == IPV6_RTHDR_TYPE_0) { + int hops; + +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, 0, eoff + rthlen, -1); + rth0 = (struct ip6_rthdr0 *) + (mtod(m, caddr_t) + eoff); +#else + IP6_EXTHDR_GET(rth0, + struct ip6_rthdr0 *, m, + eoff, rthlen); + if (rth0 == NULL) { + ICMP6STAT_INC(icp6s_tooshort); + return (-1); + } +#endif + /* just ignore a bogus header */ + if ((rth0->ip6r0_len % 2) == 0 && + (hops = rth0->ip6r0_len/2)) + finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1); + } + eoff += rthlen; + nxt = rth->ip6r_nxt; + break; + case IPPROTO_FRAGMENT: +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, 0, eoff + + sizeof(struct ip6_frag), -1); + fh = (struct ip6_frag *)(mtod(m, caddr_t) + + eoff); +#else + IP6_EXTHDR_GET(fh, struct ip6_frag *, m, + eoff, sizeof(*fh)); + if (fh == NULL) { + ICMP6STAT_INC(icp6s_tooshort); + return (-1); + } +#endif + /* + * Data after a fragment header is meaningless + * unless it is the first fragment, but + * we'll go to the notify label for path MTU + * discovery. + */ + if (fh->ip6f_offlg & IP6F_OFF_MASK) + goto notify; + + eoff += sizeof(struct ip6_frag); + nxt = fh->ip6f_nxt; + break; + default: + /* + * This case includes ESP and the No Next + * Header. In such cases going to the notify + * label does not have any meaning + * (i.e. ctlfunc will be NULL), but we go + * anyway since we might have to update + * path MTU information. + */ + goto notify; + } + } + notify: +#ifndef PULLDOWN_TEST + icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); +#else + IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, + sizeof(*icmp6) + sizeof(struct ip6_hdr)); + if (icmp6 == NULL) { + ICMP6STAT_INC(icp6s_tooshort); + return (-1); + } +#endif + + /* + * retrieve parameters from the inner IPv6 header, and convert + * them into sockaddr structures. + * XXX: there is no guarantee that the source or destination + * addresses of the inner packet are in the same scope as + * the addresses of the icmp packet. But there is no other + * way to determine the zone. + */ + eip6 = (struct ip6_hdr *)(icmp6 + 1); + + bzero(&icmp6dst, sizeof(icmp6dst)); + icmp6dst.sin6_len = sizeof(struct sockaddr_in6); + icmp6dst.sin6_family = AF_INET6; + if (finaldst == NULL) + icmp6dst.sin6_addr = eip6->ip6_dst; + else + icmp6dst.sin6_addr = *finaldst; + if (in6_setscope(&icmp6dst.sin6_addr, m->m_pkthdr.rcvif, NULL)) + goto freeit; + bzero(&icmp6src, sizeof(icmp6src)); + icmp6src.sin6_len = sizeof(struct sockaddr_in6); + icmp6src.sin6_family = AF_INET6; + icmp6src.sin6_addr = eip6->ip6_src; + if (in6_setscope(&icmp6src.sin6_addr, m->m_pkthdr.rcvif, NULL)) + goto freeit; + icmp6src.sin6_flowinfo = + (eip6->ip6_flow & IPV6_FLOWLABEL_MASK); + + if (finaldst == NULL) + finaldst = &eip6->ip6_dst; + ip6cp.ip6c_m = m; + ip6cp.ip6c_icmp6 = icmp6; + ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1); + ip6cp.ip6c_off = eoff; + ip6cp.ip6c_finaldst = finaldst; + ip6cp.ip6c_src = &icmp6src; + ip6cp.ip6c_nxt = nxt; + + if (icmp6type == ICMP6_PACKET_TOO_BIG) { + notifymtu = ntohl(icmp6->icmp6_mtu); + ip6cp.ip6c_cmdarg = (void *)¬ifymtu; + icmp6_mtudisc_update(&ip6cp, 1); /*XXX*/ + } + + ctlfunc = (void (*)(int, struct sockaddr *, void *)) + (inet6sw[ip6_protox[nxt]].pr_ctlinput); + if (ctlfunc) { + (void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst, + &ip6cp); + } + } + *mp = m; + return (0); + + freeit: + m_freem(m); + return (-1); +} + +void +icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated) +{ + struct in6_addr *dst = ip6cp->ip6c_finaldst; + struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6; + struct mbuf *m = ip6cp->ip6c_m; /* will be necessary for scope issue */ + u_int mtu = ntohl(icmp6->icmp6_mtu); + struct in_conninfo inc; + +#if 0 + /* + * RFC2460 section 5, last paragraph. + * even though minimum link MTU for IPv6 is IPV6_MMTU, + * we may see ICMPv6 too big with mtu < IPV6_MMTU + * due to packet translator in the middle. + * see ip6_output() and ip6_getpmtu() "alwaysfrag" case for + * special handling. + */ + if (mtu < IPV6_MMTU) + return; +#endif + + /* + * we reject ICMPv6 too big with abnormally small value. + * XXX what is the good definition of "abnormally small"? + */ + if (mtu < sizeof(struct ip6_hdr) + sizeof(struct ip6_frag) + 8) + return; + + if (!validated) + return; + + /* + * In case the suggested mtu is less than IPV6_MMTU, we + * only need to remember that it was for above mentioned + * "alwaysfrag" case. + * Try to be as close to the spec as possible. + */ + if (mtu < IPV6_MMTU) + mtu = IPV6_MMTU - 8; + + bzero(&inc, sizeof(inc)); + inc.inc_flags |= INC_ISIPV6; + inc.inc6_faddr = *dst; + if (in6_setscope(&inc.inc6_faddr, m->m_pkthdr.rcvif, NULL)) + return; + + if (mtu < tcp_maxmtu6(&inc, NULL)) { + tcp_hc_updatemtu(&inc, mtu); + ICMP6STAT_INC(icp6s_pmtuchg); + } +} + +/* + * Process a Node Information Query packet, based on + * draft-ietf-ipngwg-icmp-name-lookups-07. + * + * Spec incompatibilities: + * - IPv6 Subject address handling + * - IPv4 Subject address handling support missing + * - Proxy reply (answer even if it's not for me) + * - joins NI group address at in6_ifattach() time only, does not cope + * with hostname changes by sethostname(3) + */ +static struct mbuf * +ni6_input(struct mbuf *m, int off) +{ + struct icmp6_nodeinfo *ni6, *nni6; + struct mbuf *n = NULL; + struct prison *pr; + u_int16_t qtype; + int subjlen; + int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo); + struct ni_reply_fqdn *fqdn; + int addrs; /* for NI_QTYPE_NODEADDR */ + struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */ + struct in6_addr in6_subj; /* subject address */ + struct ip6_hdr *ip6; + int oldfqdn = 0; /* if 1, return pascal string (03 draft) */ + char *subj = NULL; + struct in6_ifaddr *ia6 = NULL; + + ip6 = mtod(m, struct ip6_hdr *); +#ifndef PULLDOWN_TEST + ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off); +#else + IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6)); + if (ni6 == NULL) { + /* m is already reclaimed */ + return (NULL); + } +#endif + + /* + * Validate IPv6 source address. + * The default configuration MUST be to refuse answering queries from + * global-scope addresses according to RFC4602. + * Notes: + * - it's not very clear what "refuse" means; this implementation + * simply drops it. + * - it's not very easy to identify global-scope (unicast) addresses + * since there are many prefixes for them. It should be safer + * and in practice sufficient to check "all" but loopback and + * link-local (note that site-local unicast was deprecated and + * ULA is defined as global scope-wise) + */ + if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_GLOBALOK) == 0 && + !IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) && + !IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) + goto bad; + + /* + * Validate IPv6 destination address. + * + * The Responder must discard the Query without further processing + * unless it is one of the Responder's unicast or anycast addresses, or + * a link-local scope multicast address which the Responder has joined. + * [RFC4602, Section 5.] + */ + if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + if (!IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst)) + goto bad; + /* else it's a link-local multicast, fine */ + } else { /* unicast or anycast */ + if ((ia6 = ip6_getdstifaddr(m)) == NULL) + goto bad; /* XXX impossible */ + + if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) && + !(V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK)) { + ifa_free(&ia6->ia_ifa); + nd6log((LOG_DEBUG, "ni6_input: ignore node info to " + "a temporary address in %s:%d", + __FILE__, __LINE__)); + goto bad; + } + ifa_free(&ia6->ia_ifa); + } + + /* validate query Subject field. */ + qtype = ntohs(ni6->ni_qtype); + subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo); + switch (qtype) { + case NI_QTYPE_NOOP: + case NI_QTYPE_SUPTYPES: + /* 07 draft */ + if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0) + break; + /* FALLTHROUGH */ + case NI_QTYPE_FQDN: + case NI_QTYPE_NODEADDR: + case NI_QTYPE_IPV4ADDR: + switch (ni6->ni_code) { + case ICMP6_NI_SUBJ_IPV6: +#if ICMP6_NI_SUBJ_IPV6 != 0 + case 0: +#endif + /* + * backward compatibility - try to accept 03 draft + * format, where no Subject is present. + */ + if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 && + subjlen == 0) { + oldfqdn++; + break; + } +#if ICMP6_NI_SUBJ_IPV6 != 0 + if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6) + goto bad; +#endif + + if (subjlen != sizeof(struct in6_addr)) + goto bad; + + /* + * Validate Subject address. + * + * Not sure what exactly "address belongs to the node" + * means in the spec, is it just unicast, or what? + * + * At this moment we consider Subject address as + * "belong to the node" if the Subject address equals + * to the IPv6 destination address; validation for + * IPv6 destination address should have done enough + * check for us. + * + * We do not do proxy at this moment. + */ + /* m_pulldown instead of copy? */ + m_copydata(m, off + sizeof(struct icmp6_nodeinfo), + subjlen, (caddr_t)&in6_subj); + if (in6_setscope(&in6_subj, m->m_pkthdr.rcvif, NULL)) + goto bad; + + subj = (char *)&in6_subj; + if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &in6_subj)) + break; + + /* + * XXX if we are to allow other cases, we should really + * be careful about scope here. + * basically, we should disallow queries toward IPv6 + * destination X with subject Y, + * if scope(X) > scope(Y). + * if we allow scope(X) > scope(Y), it will result in + * information leakage across scope boundary. + */ + goto bad; + + case ICMP6_NI_SUBJ_FQDN: + /* + * Validate Subject name with gethostname(3). + * + * The behavior may need some debate, since: + * - we are not sure if the node has FQDN as + * hostname (returned by gethostname(3)). + * - the code does wildcard match for truncated names. + * however, we are not sure if we want to perform + * wildcard match, if gethostname(3) side has + * truncated hostname. + */ + pr = curthread->td_ucred->cr_prison; + mtx_lock(&pr->pr_mtx); + n = ni6_nametodns(pr->pr_hostname, + strlen(pr->pr_hostname), 0); + mtx_unlock(&pr->pr_mtx); + if (!n || n->m_next || n->m_len == 0) + goto bad; + IP6_EXTHDR_GET(subj, char *, m, + off + sizeof(struct icmp6_nodeinfo), subjlen); + if (subj == NULL) + goto bad; + if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *), + n->m_len)) { + goto bad; + } + m_freem(n); + n = NULL; + break; + + case ICMP6_NI_SUBJ_IPV4: /* XXX: to be implemented? */ + default: + goto bad; + } + break; + } + + /* refuse based on configuration. XXX ICMP6_NI_REFUSED? */ + switch (qtype) { + case NI_QTYPE_FQDN: + if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_FQDNOK) == 0) + goto bad; + break; + case NI_QTYPE_NODEADDR: + case NI_QTYPE_IPV4ADDR: + if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_NODEADDROK) == 0) + goto bad; + break; + } + + /* guess reply length */ + switch (qtype) { + case NI_QTYPE_NOOP: + break; /* no reply data */ + case NI_QTYPE_SUPTYPES: + replylen += sizeof(u_int32_t); + break; + case NI_QTYPE_FQDN: + /* XXX will append an mbuf */ + replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen); + break; + case NI_QTYPE_NODEADDR: + addrs = ni6_addrs(ni6, m, &ifp, (struct in6_addr *)subj); + if ((replylen += addrs * (sizeof(struct in6_addr) + + sizeof(u_int32_t))) > MCLBYTES) + replylen = MCLBYTES; /* XXX: will truncate pkt later */ + break; + case NI_QTYPE_IPV4ADDR: + /* unsupported - should respond with unknown Qtype? */ + break; + default: + /* + * XXX: We must return a reply with the ICMP6 code + * `unknown Qtype' in this case. However we regard the case + * as an FQDN query for backward compatibility. + * Older versions set a random value to this field, + * so it rarely varies in the defined qtypes. + * But the mechanism is not reliable... + * maybe we should obsolete older versions. + */ + qtype = NI_QTYPE_FQDN; + /* XXX will append an mbuf */ + replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen); + oldfqdn++; + break; + } + + /* allocate an mbuf to reply. */ + MGETHDR(n, M_DONTWAIT, m->m_type); + if (n == NULL) { + m_freem(m); + return (NULL); + } + M_MOVE_PKTHDR(n, m); /* just for recvif */ + if (replylen > MHLEN) { + if (replylen > MCLBYTES) { + /* + * XXX: should we try to allocate more? But MCLBYTES + * is probably much larger than IPV6_MMTU... + */ + goto bad; + } + MCLGET(n, M_DONTWAIT); + if ((n->m_flags & M_EXT) == 0) { + goto bad; + } + } + n->m_pkthdr.len = n->m_len = replylen; + + /* copy mbuf header and IPv6 + Node Information base headers */ + bcopy(mtod(m, caddr_t), mtod(n, caddr_t), sizeof(struct ip6_hdr)); + nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1); + bcopy((caddr_t)ni6, (caddr_t)nni6, sizeof(struct icmp6_nodeinfo)); + + /* qtype dependent procedure */ + switch (qtype) { + case NI_QTYPE_NOOP: + nni6->ni_code = ICMP6_NI_SUCCESS; + nni6->ni_flags = 0; + break; + case NI_QTYPE_SUPTYPES: + { + u_int32_t v; + nni6->ni_code = ICMP6_NI_SUCCESS; + nni6->ni_flags = htons(0x0000); /* raw bitmap */ + /* supports NOOP, SUPTYPES, FQDN, and NODEADDR */ + v = (u_int32_t)htonl(0x0000000f); + bcopy(&v, nni6 + 1, sizeof(u_int32_t)); + break; + } + case NI_QTYPE_FQDN: + nni6->ni_code = ICMP6_NI_SUCCESS; + fqdn = (struct ni_reply_fqdn *)(mtod(n, caddr_t) + + sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo)); + nni6->ni_flags = 0; /* XXX: meaningless TTL */ + fqdn->ni_fqdn_ttl = 0; /* ditto. */ + /* + * XXX do we really have FQDN in hostname? + */ + pr = curthread->td_ucred->cr_prison; + mtx_lock(&pr->pr_mtx); + n->m_next = ni6_nametodns(pr->pr_hostname, + strlen(pr->pr_hostname), oldfqdn); + mtx_unlock(&pr->pr_mtx); + if (n->m_next == NULL) + goto bad; + /* XXX we assume that n->m_next is not a chain */ + if (n->m_next->m_next != NULL) + goto bad; + n->m_pkthdr.len += n->m_next->m_len; + break; + case NI_QTYPE_NODEADDR: + { + int lenlim, copied; + + nni6->ni_code = ICMP6_NI_SUCCESS; + n->m_pkthdr.len = n->m_len = + sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo); + lenlim = M_TRAILINGSPACE(n); + copied = ni6_store_addrs(ni6, nni6, ifp, lenlim); + /* XXX: reset mbuf length */ + n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + + sizeof(struct icmp6_nodeinfo) + copied; + break; + } + default: + break; /* XXX impossible! */ + } + + nni6->ni_type = ICMP6_NI_REPLY; + m_freem(m); + return (n); + + bad: + m_freem(m); + if (n) + m_freem(n); + return (NULL); +} + +/* + * make a mbuf with DNS-encoded string. no compression support. + * + * XXX names with less than 2 dots (like "foo" or "foo.section") will be + * treated as truncated name (two \0 at the end). this is a wild guess. + * + * old - return pascal string if non-zero + */ +static struct mbuf * +ni6_nametodns(const char *name, int namelen, int old) +{ + struct mbuf *m; + char *cp, *ep; + const char *p, *q; + int i, len, nterm; + + if (old) + len = namelen + 1; + else + len = MCLBYTES; + + /* because MAXHOSTNAMELEN is usually 256, we use cluster mbuf */ + MGET(m, M_DONTWAIT, MT_DATA); + if (m && len > MLEN) { + MCLGET(m, M_DONTWAIT); + if ((m->m_flags & M_EXT) == 0) + goto fail; + } + if (!m) + goto fail; + m->m_next = NULL; + + if (old) { + m->m_len = len; + *mtod(m, char *) = namelen; + bcopy(name, mtod(m, char *) + 1, namelen); + return m; + } else { + m->m_len = 0; + cp = mtod(m, char *); + ep = mtod(m, char *) + M_TRAILINGSPACE(m); + + /* if not certain about my name, return empty buffer */ + if (namelen == 0) + return m; + + /* + * guess if it looks like shortened hostname, or FQDN. + * shortened hostname needs two trailing "\0". + */ + i = 0; + for (p = name; p < name + namelen; p++) { + if (*p && *p == '.') + i++; + } + if (i < 2) + nterm = 2; + else + nterm = 1; + + p = name; + while (cp < ep && p < name + namelen) { + i = 0; + for (q = p; q < name + namelen && *q && *q != '.'; q++) + i++; + /* result does not fit into mbuf */ + if (cp + i + 1 >= ep) + goto fail; + /* + * DNS label length restriction, RFC1035 page 8. + * "i == 0" case is included here to avoid returning + * 0-length label on "foo..bar". + */ + if (i <= 0 || i >= 64) + goto fail; + *cp++ = i; + bcopy(p, cp, i); + cp += i; + p = q; + if (p < name + namelen && *p == '.') + p++; + } + /* termination */ + if (cp + nterm >= ep) + goto fail; + while (nterm-- > 0) + *cp++ = '\0'; + m->m_len = cp - mtod(m, char *); + return m; + } + + panic("should not reach here"); + /* NOTREACHED */ + + fail: + if (m) + m_freem(m); + return NULL; +} + +/* + * check if two DNS-encoded string matches. takes care of truncated + * form (with \0\0 at the end). no compression support. + * XXX upper/lowercase match (see RFC2065) + */ +static int +ni6_dnsmatch(const char *a, int alen, const char *b, int blen) +{ + const char *a0, *b0; + int l; + + /* simplest case - need validation? */ + if (alen == blen && bcmp(a, b, alen) == 0) + return 1; + + a0 = a; + b0 = b; + + /* termination is mandatory */ + if (alen < 2 || blen < 2) + return 0; + if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0') + return 0; + alen--; + blen--; + + while (a - a0 < alen && b - b0 < blen) { + if (a - a0 + 1 > alen || b - b0 + 1 > blen) + return 0; + + if ((signed char)a[0] < 0 || (signed char)b[0] < 0) + return 0; + /* we don't support compression yet */ + if (a[0] >= 64 || b[0] >= 64) + return 0; + + /* truncated case */ + if (a[0] == 0 && a - a0 == alen - 1) + return 1; + if (b[0] == 0 && b - b0 == blen - 1) + return 1; + if (a[0] == 0 || b[0] == 0) + return 0; + + if (a[0] != b[0]) + return 0; + l = a[0]; + if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen) + return 0; + if (bcmp(a + 1, b + 1, l) != 0) + return 0; + + a += 1 + l; + b += 1 + l; + } + + if (a - a0 == alen && b - b0 == blen) + return 1; + else + return 0; +} + +/* + * calculate the number of addresses to be returned in the node info reply. + */ +static int +ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp, + struct in6_addr *subj) +{ + struct ifnet *ifp; + struct in6_ifaddr *ifa6; + struct ifaddr *ifa; + int addrs = 0, addrsofif, iffound = 0; + int niflags = ni6->ni_flags; + + if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) { + switch (ni6->ni_code) { + case ICMP6_NI_SUBJ_IPV6: + if (subj == NULL) /* must be impossible... */ + return (0); + break; + default: + /* + * XXX: we only support IPv6 subject address for + * this Qtype. + */ + return (0); + } + } + + IFNET_RLOCK_NOSLEEP(); + for (ifp = TAILQ_FIRST(&V_ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) { + addrsofif = 0; + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + ifa6 = (struct in6_ifaddr *)ifa; + + if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 && + IN6_ARE_ADDR_EQUAL(subj, &ifa6->ia_addr.sin6_addr)) + iffound = 1; + + /* + * IPv4-mapped addresses can only be returned by a + * Node Information proxy, since they represent + * addresses of IPv4-only nodes, which perforce do + * not implement this protocol. + * [icmp-name-lookups-07, Section 5.4] + * So we don't support NI_NODEADDR_FLAG_COMPAT in + * this function at this moment. + */ + + /* What do we have to do about ::1? */ + switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) { + case IPV6_ADDR_SCOPE_LINKLOCAL: + if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0) + continue; + break; + case IPV6_ADDR_SCOPE_SITELOCAL: + if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0) + continue; + break; + case IPV6_ADDR_SCOPE_GLOBAL: + if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0) + continue; + break; + default: + continue; + } + + /* + * check if anycast is okay. + * XXX: just experimental. not in the spec. + */ + if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 && + (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0) + continue; /* we need only unicast addresses */ + if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && + (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) { + continue; + } + addrsofif++; /* count the address */ + } + IF_ADDR_UNLOCK(ifp); + if (iffound) { + *ifpp = ifp; + IFNET_RUNLOCK_NOSLEEP(); + return (addrsofif); + } + + addrs += addrsofif; + } + IFNET_RUNLOCK_NOSLEEP(); + + return (addrs); +} + +static int +ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6, + struct ifnet *ifp0, int resid) +{ + struct ifnet *ifp = ifp0 ? ifp0 : TAILQ_FIRST(&V_ifnet); + struct in6_ifaddr *ifa6; + struct ifaddr *ifa; + struct ifnet *ifp_dep = NULL; + int copied = 0, allow_deprecated = 0; + u_char *cp = (u_char *)(nni6 + 1); + int niflags = ni6->ni_flags; + u_int32_t ltime; + + if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL)) + return (0); /* needless to copy */ + + IFNET_RLOCK_NOSLEEP(); + again: + + for (; ifp; ifp = TAILQ_NEXT(ifp, if_list)) { + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + ifa6 = (struct in6_ifaddr *)ifa; + + if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 && + allow_deprecated == 0) { + /* + * prefererred address should be put before + * deprecated addresses. + */ + + /* record the interface for later search */ + if (ifp_dep == NULL) + ifp_dep = ifp; + + continue; + } else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 && + allow_deprecated != 0) + continue; /* we now collect deprecated addrs */ + + /* What do we have to do about ::1? */ + switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) { + case IPV6_ADDR_SCOPE_LINKLOCAL: + if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0) + continue; + break; + case IPV6_ADDR_SCOPE_SITELOCAL: + if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0) + continue; + break; + case IPV6_ADDR_SCOPE_GLOBAL: + if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0) + continue; + break; + default: + continue; + } + + /* + * check if anycast is okay. + * XXX: just experimental. not in the spec. + */ + if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 && + (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0) + continue; + if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && + (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) { + continue; + } + + /* now we can copy the address */ + if (resid < sizeof(struct in6_addr) + + sizeof(u_int32_t)) { + IF_ADDR_UNLOCK(ifp); + /* + * We give up much more copy. + * Set the truncate flag and return. + */ + nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE; + IFNET_RUNLOCK_NOSLEEP(); + return (copied); + } + + /* + * Set the TTL of the address. + * The TTL value should be one of the following + * according to the specification: + * + * 1. The remaining lifetime of a DHCP lease on the + * address, or + * 2. The remaining Valid Lifetime of a prefix from + * which the address was derived through Stateless + * Autoconfiguration. + * + * Note that we currently do not support stateful + * address configuration by DHCPv6, so the former + * case can't happen. + */ + if (ifa6->ia6_lifetime.ia6t_expire == 0) + ltime = ND6_INFINITE_LIFETIME; + else { + if (ifa6->ia6_lifetime.ia6t_expire > + time_second) + ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_second); + else + ltime = 0; + } + + bcopy(<ime, cp, sizeof(u_int32_t)); + cp += sizeof(u_int32_t); + + /* copy the address itself */ + bcopy(&ifa6->ia_addr.sin6_addr, cp, + sizeof(struct in6_addr)); + in6_clearscope((struct in6_addr *)cp); /* XXX */ + cp += sizeof(struct in6_addr); + + resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t)); + copied += (sizeof(struct in6_addr) + sizeof(u_int32_t)); + } + IF_ADDR_UNLOCK(ifp); + if (ifp0) /* we need search only on the specified IF */ + break; + } + + if (allow_deprecated == 0 && ifp_dep != NULL) { + ifp = ifp_dep; + allow_deprecated = 1; + + goto again; + } + + IFNET_RUNLOCK_NOSLEEP(); + + return (copied); +} + +/* + * XXX almost dup'ed code with rip6_input. + */ +static int +icmp6_rip6_input(struct mbuf **mp, int off) +{ + struct mbuf *m = *mp; + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + struct inpcb *in6p; + struct inpcb *last = NULL; + struct sockaddr_in6 fromsa; + struct icmp6_hdr *icmp6; + struct mbuf *opts = NULL; + +#ifndef PULLDOWN_TEST + /* this is assumed to be safe. */ + icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off); +#else + IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6)); + if (icmp6 == NULL) { + /* m is already reclaimed */ + return (IPPROTO_DONE); + } +#endif + + /* + * XXX: the address may have embedded scope zone ID, which should be + * hidden from applications. + */ + bzero(&fromsa, sizeof(fromsa)); + fromsa.sin6_family = AF_INET6; + fromsa.sin6_len = sizeof(struct sockaddr_in6); + fromsa.sin6_addr = ip6->ip6_src; + if (sa6_recoverscope(&fromsa)) { + m_freem(m); + return (IPPROTO_DONE); + } + + INP_INFO_RLOCK(&V_ripcbinfo); + LIST_FOREACH(in6p, &V_ripcb, inp_list) { + if ((in6p->inp_vflag & INP_IPV6) == 0) + continue; + if (in6p->inp_ip_p != IPPROTO_ICMPV6) + continue; + if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) && + !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst)) + continue; + if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) && + !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src)) + continue; + INP_RLOCK(in6p); + if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type, + in6p->in6p_icmp6filt)) { + INP_RUNLOCK(in6p); + continue; + } + if (last != NULL) { + struct mbuf *n = NULL; + + /* + * Recent network drivers tend to allocate a single + * mbuf cluster, rather than to make a couple of + * mbufs without clusters. Also, since the IPv6 code + * path tries to avoid m_pullup(), it is highly + * probable that we still have an mbuf cluster here + * even though the necessary length can be stored in an + * mbuf's internal buffer. + * Meanwhile, the default size of the receive socket + * buffer for raw sockets is not so large. This means + * the possibility of packet loss is relatively higher + * than before. To avoid this scenario, we copy the + * received data to a separate mbuf that does not use + * a cluster, if possible. + * XXX: it is better to copy the data after stripping + * intermediate headers. + */ + if ((m->m_flags & M_EXT) && m->m_next == NULL && + m->m_len <= MHLEN) { + MGET(n, M_DONTWAIT, m->m_type); + if (n != NULL) { + if (m_dup_pkthdr(n, m, M_NOWAIT)) { + bcopy(m->m_data, n->m_data, + m->m_len); + n->m_len = m->m_len; + } else { + m_free(n); + n = NULL; + } + } + } + if (n != NULL || + (n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { + if (last->inp_flags & INP_CONTROLOPTS) + ip6_savecontrol(last, n, &opts); + /* strip intermediate headers */ + m_adj(n, off); + SOCKBUF_LOCK(&last->inp_socket->so_rcv); + if (sbappendaddr_locked( + &last->inp_socket->so_rcv, + (struct sockaddr *)&fromsa, n, opts) + == 0) { + /* should notify about lost packet */ + m_freem(n); + if (opts) { + m_freem(opts); + } + SOCKBUF_UNLOCK( + &last->inp_socket->so_rcv); + } else + sorwakeup_locked(last->inp_socket); + opts = NULL; + } + INP_RUNLOCK(last); + } + last = in6p; + } + INP_INFO_RUNLOCK(&V_ripcbinfo); + if (last != NULL) { + if (last->inp_flags & INP_CONTROLOPTS) + ip6_savecontrol(last, m, &opts); + /* strip intermediate headers */ + m_adj(m, off); + + /* avoid using mbuf clusters if possible (see above) */ + if ((m->m_flags & M_EXT) && m->m_next == NULL && + m->m_len <= MHLEN) { + struct mbuf *n; + + MGET(n, M_DONTWAIT, m->m_type); + if (n != NULL) { + if (m_dup_pkthdr(n, m, M_NOWAIT)) { + bcopy(m->m_data, n->m_data, m->m_len); + n->m_len = m->m_len; + + m_freem(m); + m = n; + } else { + m_freem(n); + n = NULL; + } + } + } + SOCKBUF_LOCK(&last->inp_socket->so_rcv); + if (sbappendaddr_locked(&last->inp_socket->so_rcv, + (struct sockaddr *)&fromsa, m, opts) == 0) { + m_freem(m); + if (opts) + m_freem(opts); + SOCKBUF_UNLOCK(&last->inp_socket->so_rcv); + } else + sorwakeup_locked(last->inp_socket); + INP_RUNLOCK(last); + } else { + m_freem(m); + IP6STAT_DEC(ip6s_delivered); + } + return IPPROTO_DONE; +} + +/* + * Reflect the ip6 packet back to the source. + * OFF points to the icmp6 header, counted from the top of the mbuf. + */ +void +icmp6_reflect(struct mbuf *m, size_t off) +{ + struct ip6_hdr *ip6; + struct icmp6_hdr *icmp6; + struct in6_ifaddr *ia = NULL; + int plen; + int type, code; + struct ifnet *outif = NULL; + struct in6_addr origdst, src, *srcp = NULL; + + /* too short to reflect */ + if (off < sizeof(struct ip6_hdr)) { + nd6log((LOG_DEBUG, + "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n", + (u_long)off, (u_long)sizeof(struct ip6_hdr), + __FILE__, __LINE__)); + goto bad; + } + + /* + * If there are extra headers between IPv6 and ICMPv6, strip + * off that header first. + */ +#ifdef DIAGNOSTIC + if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN) + panic("assumption failed in icmp6_reflect"); +#endif + if (off > sizeof(struct ip6_hdr)) { + size_t l; + struct ip6_hdr nip6; + + l = off - sizeof(struct ip6_hdr); + m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6); + m_adj(m, l); + l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); + if (m->m_len < l) { + if ((m = m_pullup(m, l)) == NULL) + return; + } + bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6)); + } else /* off == sizeof(struct ip6_hdr) */ { + size_t l; + l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); + if (m->m_len < l) { + if ((m = m_pullup(m, l)) == NULL) + return; + } + } + plen = m->m_pkthdr.len - sizeof(struct ip6_hdr); + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_nxt = IPPROTO_ICMPV6; + icmp6 = (struct icmp6_hdr *)(ip6 + 1); + type = icmp6->icmp6_type; /* keep type for statistics */ + code = icmp6->icmp6_code; /* ditto. */ + + origdst = ip6->ip6_dst; + /* + * ip6_input() drops a packet if its src is multicast. + * So, the src is never multicast. + */ + ip6->ip6_dst = ip6->ip6_src; + + /* + * If the incoming packet was addressed directly to us (i.e. unicast), + * use dst as the src for the reply. + * The IN6_IFF_NOTREADY case should be VERY rare, but is possible + * (for example) when we encounter an error while forwarding procedure + * destined to a duplicated address of ours. + * Note that ip6_getdstifaddr() may fail if we are in an error handling + * procedure of an outgoing packet of our own, in which case we need + * to search in the ifaddr list. + */ + if (!IN6_IS_ADDR_MULTICAST(&origdst)) { + if ((ia = ip6_getdstifaddr(m))) { + if (!(ia->ia6_flags & + (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) + srcp = &ia->ia_addr.sin6_addr; + } else { + struct sockaddr_in6 d; + + bzero(&d, sizeof(d)); + d.sin6_family = AF_INET6; + d.sin6_len = sizeof(d); + d.sin6_addr = origdst; + ia = (struct in6_ifaddr *) + ifa_ifwithaddr((struct sockaddr *)&d); + if (ia && + !(ia->ia6_flags & + (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) { + srcp = &ia->ia_addr.sin6_addr; + } + } + } + + if ((srcp != NULL) && + (in6_addrscope(srcp) != in6_addrscope(&ip6->ip6_src))) + srcp = NULL; + + if (srcp == NULL) { + int e; + struct sockaddr_in6 sin6; + struct route_in6 ro; + + /* + * This case matches to multicasts, our anycast, or unicasts + * that we do not own. Select a source address based on the + * source address of the erroneous packet. + */ + bzero(&sin6, sizeof(sin6)); + sin6.sin6_family = AF_INET6; + sin6.sin6_len = sizeof(sin6); + sin6.sin6_addr = ip6->ip6_dst; /* zone ID should be embedded */ + + bzero(&ro, sizeof(ro)); + e = in6_selectsrc(&sin6, NULL, NULL, &ro, NULL, &outif, &src); + if (ro.ro_rt) + RTFREE(ro.ro_rt); /* XXX: we could use this */ + if (e) { + char ip6buf[INET6_ADDRSTRLEN]; + nd6log((LOG_DEBUG, + "icmp6_reflect: source can't be determined: " + "dst=%s, error=%d\n", + ip6_sprintf(ip6buf, &sin6.sin6_addr), e)); + goto bad; + } + srcp = &src; + } + + ip6->ip6_src = *srcp; + ip6->ip6_flow = 0; + ip6->ip6_vfc &= ~IPV6_VERSION_MASK; + ip6->ip6_vfc |= IPV6_VERSION; + ip6->ip6_nxt = IPPROTO_ICMPV6; + if (outif) + ip6->ip6_hlim = ND_IFINFO(outif)->chlim; + else if (m->m_pkthdr.rcvif) { + /* XXX: This may not be the outgoing interface */ + ip6->ip6_hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim; + } else + ip6->ip6_hlim = V_ip6_defhlim; + + icmp6->icmp6_cksum = 0; + icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6, + sizeof(struct ip6_hdr), plen); + + /* + * XXX option handling + */ + + m->m_flags &= ~(M_BCAST|M_MCAST); + + ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL); + if (outif) + icmp6_ifoutstat_inc(outif, type, code); + + if (ia != NULL) + ifa_free(&ia->ia_ifa); + return; + + bad: + if (ia != NULL) + ifa_free(&ia->ia_ifa); + m_freem(m); + return; +} + +void +icmp6_fasttimo(void) +{ + + mld_fasttimo(); +} + +void +icmp6_slowtimo(void) +{ + + mld_slowtimo(); +} + +static const char * +icmp6_redirect_diag(struct in6_addr *src6, struct in6_addr *dst6, + struct in6_addr *tgt6) +{ + static char buf[1024]; + char ip6bufs[INET6_ADDRSTRLEN]; + char ip6bufd[INET6_ADDRSTRLEN]; + char ip6buft[INET6_ADDRSTRLEN]; + snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)", + ip6_sprintf(ip6bufs, src6), ip6_sprintf(ip6bufd, dst6), + ip6_sprintf(ip6buft, tgt6)); + return buf; +} + +void +icmp6_redirect_input(struct mbuf *m, int off) +{ + struct ifnet *ifp; + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + struct nd_redirect *nd_rd; + int icmp6len = ntohs(ip6->ip6_plen); + char *lladdr = NULL; + int lladdrlen = 0; + u_char *redirhdr = NULL; + int redirhdrlen = 0; + struct rtentry *rt = NULL; + int is_router; + int is_onlink; + struct in6_addr src6 = ip6->ip6_src; + struct in6_addr redtgt6; + struct in6_addr reddst6; + union nd_opts ndopts; + char ip6buf[INET6_ADDRSTRLEN]; + + if (!m) + return; + + ifp = m->m_pkthdr.rcvif; + + if (!ifp) + return; + + /* XXX if we are router, we don't update route by icmp6 redirect */ + if (V_ip6_forwarding) + goto freeit; + if (!V_icmp6_rediraccept) + goto freeit; + +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, off, icmp6len,); + nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off); +#else + IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len); + if (nd_rd == NULL) { + ICMP6STAT_INC(icp6s_tooshort); + return; + } +#endif + redtgt6 = nd_rd->nd_rd_target; + reddst6 = nd_rd->nd_rd_dst; + + if (in6_setscope(&redtgt6, m->m_pkthdr.rcvif, NULL) || + in6_setscope(&reddst6, m->m_pkthdr.rcvif, NULL)) { + goto freeit; + } + + /* validation */ + if (!IN6_IS_ADDR_LINKLOCAL(&src6)) { + nd6log((LOG_ERR, + "ICMP6 redirect sent from %s rejected; " + "must be from linklocal\n", + ip6_sprintf(ip6buf, &src6))); + goto bad; + } + if (ip6->ip6_hlim != 255) { + nd6log((LOG_ERR, + "ICMP6 redirect sent from %s rejected; " + "hlim=%d (must be 255)\n", + ip6_sprintf(ip6buf, &src6), ip6->ip6_hlim)); + goto bad; + } + { + /* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */ + struct sockaddr_in6 sin6; + struct in6_addr *gw6; + + bzero(&sin6, sizeof(sin6)); + sin6.sin6_family = AF_INET6; + sin6.sin6_len = sizeof(struct sockaddr_in6); + bcopy(&reddst6, &sin6.sin6_addr, sizeof(reddst6)); + rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL); + if (rt) { + if (rt->rt_gateway == NULL || + rt->rt_gateway->sa_family != AF_INET6) { + nd6log((LOG_ERR, + "ICMP6 redirect rejected; no route " + "with inet6 gateway found for redirect dst: %s\n", + icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); + RTFREE_LOCKED(rt); + goto bad; + } + + gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr); + if (bcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) { + nd6log((LOG_ERR, + "ICMP6 redirect rejected; " + "not equal to gw-for-src=%s (must be same): " + "%s\n", + ip6_sprintf(ip6buf, gw6), + icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); + RTFREE_LOCKED(rt); + goto bad; + } + } else { + nd6log((LOG_ERR, + "ICMP6 redirect rejected; " + "no route found for redirect dst: %s\n", + icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); + goto bad; + } + RTFREE_LOCKED(rt); + rt = NULL; + } + if (IN6_IS_ADDR_MULTICAST(&reddst6)) { + nd6log((LOG_ERR, + "ICMP6 redirect rejected; " + "redirect dst must be unicast: %s\n", + icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); + goto bad; + } + + is_router = is_onlink = 0; + if (IN6_IS_ADDR_LINKLOCAL(&redtgt6)) + is_router = 1; /* router case */ + if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0) + is_onlink = 1; /* on-link destination case */ + if (!is_router && !is_onlink) { + nd6log((LOG_ERR, + "ICMP6 redirect rejected; " + "neither router case nor onlink case: %s\n", + icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); + goto bad; + } + /* validation passed */ + + icmp6len -= sizeof(*nd_rd); + nd6_option_init(nd_rd + 1, icmp6len, &ndopts); + if (nd6_options(&ndopts) < 0) { + nd6log((LOG_INFO, "icmp6_redirect_input: " + "invalid ND option, rejected: %s\n", + icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); + /* nd6_options have incremented stats */ + goto freeit; + } + + if (ndopts.nd_opts_tgt_lladdr) { + lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1); + lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3; + } + + if (ndopts.nd_opts_rh) { + redirhdrlen = ndopts.nd_opts_rh->nd_opt_rh_len; + redirhdr = (u_char *)(ndopts.nd_opts_rh + 1); /* xxx */ + } + + if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { + nd6log((LOG_INFO, + "icmp6_redirect_input: lladdrlen mismatch for %s " + "(if %d, icmp6 packet %d): %s\n", + ip6_sprintf(ip6buf, &redtgt6), + ifp->if_addrlen, lladdrlen - 2, + icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); + goto bad; + } + + /* RFC 2461 8.3 */ + nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT, + is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER); + + if (!is_onlink) { /* better router case. perform rtredirect. */ + /* perform rtredirect */ + struct sockaddr_in6 sdst; + struct sockaddr_in6 sgw; + struct sockaddr_in6 ssrc; + + bzero(&sdst, sizeof(sdst)); + bzero(&sgw, sizeof(sgw)); + bzero(&ssrc, sizeof(ssrc)); + sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6; + sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len = + sizeof(struct sockaddr_in6); + bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr)); + bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); + bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr)); + rtredirect((struct sockaddr *)&sdst, (struct sockaddr *)&sgw, + (struct sockaddr *)NULL, RTF_GATEWAY | RTF_HOST, + (struct sockaddr *)&ssrc); + } + /* finally update cached route in each socket via pfctlinput */ + { + struct sockaddr_in6 sdst; + + bzero(&sdst, sizeof(sdst)); + sdst.sin6_family = AF_INET6; + sdst.sin6_len = sizeof(struct sockaddr_in6); + bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); + pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst); +#ifdef IPSEC + key_sa_routechange((struct sockaddr *)&sdst); +#endif /* IPSEC */ + } + + freeit: + m_freem(m); + return; + + bad: + ICMP6STAT_INC(icp6s_badredirect); + m_freem(m); +} + +void +icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt) +{ + struct ifnet *ifp; /* my outgoing interface */ + struct in6_addr *ifp_ll6; + struct in6_addr *router_ll6; + struct ip6_hdr *sip6; /* m0 as struct ip6_hdr */ + struct mbuf *m = NULL; /* newly allocated one */ + struct ip6_hdr *ip6; /* m as struct ip6_hdr */ + struct nd_redirect *nd_rd; + struct llentry *ln = NULL; + size_t maxlen; + u_char *p; + struct ifnet *outif = NULL; + struct sockaddr_in6 src_sa; + + icmp6_errcount(&V_icmp6stat.icp6s_outerrhist, ND_REDIRECT, 0); + + /* if we are not router, we don't send icmp6 redirect */ + if (!V_ip6_forwarding) + goto fail; + + /* sanity check */ + if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp)) + goto fail; + + /* + * Address check: + * the source address must identify a neighbor, and + * the destination address must not be a multicast address + * [RFC 2461, sec 8.2] + */ + sip6 = mtod(m0, struct ip6_hdr *); + bzero(&src_sa, sizeof(src_sa)); + src_sa.sin6_family = AF_INET6; + src_sa.sin6_len = sizeof(src_sa); + src_sa.sin6_addr = sip6->ip6_src; + if (nd6_is_addr_neighbor(&src_sa, ifp) == 0) + goto fail; + if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst)) + goto fail; /* what should we do here? */ + + /* rate limit */ + if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0)) + goto fail; + + /* + * Since we are going to append up to 1280 bytes (= IPV6_MMTU), + * we almost always ask for an mbuf cluster for simplicity. + * (MHLEN < IPV6_MMTU is almost always true) + */ +#if IPV6_MMTU >= MCLBYTES +# error assumption failed about IPV6_MMTU and MCLBYTES +#endif + MGETHDR(m, M_DONTWAIT, MT_HEADER); + if (m && IPV6_MMTU >= MHLEN) + MCLGET(m, M_DONTWAIT); + if (!m) + goto fail; + m->m_pkthdr.rcvif = NULL; + m->m_len = 0; + maxlen = M_TRAILINGSPACE(m); + maxlen = min(IPV6_MMTU, maxlen); + /* just for safety */ + if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) + + ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) { + goto fail; + } + + { + /* get ip6 linklocal address for ifp(my outgoing interface). */ + struct in6_ifaddr *ia; + if ((ia = in6ifa_ifpforlinklocal(ifp, + IN6_IFF_NOTREADY| + IN6_IFF_ANYCAST)) == NULL) + goto fail; + ifp_ll6 = &ia->ia_addr.sin6_addr; + /* XXXRW: reference released prematurely. */ + ifa_free(&ia->ia_ifa); + } + + /* get ip6 linklocal address for the router. */ + if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) { + struct sockaddr_in6 *sin6; + sin6 = (struct sockaddr_in6 *)rt->rt_gateway; + router_ll6 = &sin6->sin6_addr; + if (!IN6_IS_ADDR_LINKLOCAL(router_ll6)) + router_ll6 = (struct in6_addr *)NULL; + } else + router_ll6 = (struct in6_addr *)NULL; + + /* ip6 */ + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_flow = 0; + ip6->ip6_vfc &= ~IPV6_VERSION_MASK; + ip6->ip6_vfc |= IPV6_VERSION; + /* ip6->ip6_plen will be set later */ + ip6->ip6_nxt = IPPROTO_ICMPV6; + ip6->ip6_hlim = 255; + /* ip6->ip6_src must be linklocal addr for my outgoing if. */ + bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr)); + bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr)); + + /* ND Redirect */ + nd_rd = (struct nd_redirect *)(ip6 + 1); + nd_rd->nd_rd_type = ND_REDIRECT; + nd_rd->nd_rd_code = 0; + nd_rd->nd_rd_reserved = 0; + if (rt->rt_flags & RTF_GATEWAY) { + /* + * nd_rd->nd_rd_target must be a link-local address in + * better router cases. + */ + if (!router_ll6) + goto fail; + bcopy(router_ll6, &nd_rd->nd_rd_target, + sizeof(nd_rd->nd_rd_target)); + bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst, + sizeof(nd_rd->nd_rd_dst)); + } else { + /* make sure redtgt == reddst */ + bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target, + sizeof(nd_rd->nd_rd_target)); + bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst, + sizeof(nd_rd->nd_rd_dst)); + } + + p = (u_char *)(nd_rd + 1); + + if (!router_ll6) + goto nolladdropt; + + { + /* target lladdr option */ + int len; + struct nd_opt_hdr *nd_opt; + char *lladdr; + + IF_AFDATA_LOCK(ifp); + ln = nd6_lookup(router_ll6, 0, ifp); + IF_AFDATA_UNLOCK(ifp); + if (ln == NULL) + goto nolladdropt; + + len = sizeof(*nd_opt) + ifp->if_addrlen; + len = (len + 7) & ~7; /* round by 8 */ + /* safety check */ + if (len + (p - (u_char *)ip6) > maxlen) + goto nolladdropt; + + if (ln->la_flags & LLE_VALID) { + nd_opt = (struct nd_opt_hdr *)p; + nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; + nd_opt->nd_opt_len = len >> 3; + lladdr = (char *)(nd_opt + 1); + bcopy(&ln->ll_addr, lladdr, ifp->if_addrlen); + p += len; + } + } +nolladdropt: + if (ln != NULL) + LLE_RUNLOCK(ln); + + m->m_pkthdr.len = m->m_len = p - (u_char *)ip6; + + /* just to be safe */ +#ifdef M_DECRYPTED /*not openbsd*/ + if (m0->m_flags & M_DECRYPTED) + goto noredhdropt; +#endif + if (p - (u_char *)ip6 > maxlen) + goto noredhdropt; + + { + /* redirected header option */ + int len; + struct nd_opt_rd_hdr *nd_opt_rh; + + /* + * compute the maximum size for icmp6 redirect header option. + * XXX room for auth header? + */ + len = maxlen - (p - (u_char *)ip6); + len &= ~7; + + /* This is just for simplicity. */ + if (m0->m_pkthdr.len != m0->m_len) { + if (m0->m_next) { + m_freem(m0->m_next); + m0->m_next = NULL; + } + m0->m_pkthdr.len = m0->m_len; + } + + /* + * Redirected header option spec (RFC2461 4.6.3) talks nothing + * about padding/truncate rule for the original IP packet. + * From the discussion on IPv6imp in Feb 1999, + * the consensus was: + * - "attach as much as possible" is the goal + * - pad if not aligned (original size can be guessed by + * original ip6 header) + * Following code adds the padding if it is simple enough, + * and truncates if not. + */ + if (m0->m_next || m0->m_pkthdr.len != m0->m_len) + panic("assumption failed in %s:%d", __FILE__, + __LINE__); + + if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) { + /* not enough room, truncate */ + m0->m_pkthdr.len = m0->m_len = len - + sizeof(*nd_opt_rh); + } else { + /* enough room, pad or truncate */ + size_t extra; + + extra = m0->m_pkthdr.len % 8; + if (extra) { + /* pad if easy enough, truncate if not */ + if (8 - extra <= M_TRAILINGSPACE(m0)) { + /* pad */ + m0->m_len += (8 - extra); + m0->m_pkthdr.len += (8 - extra); + } else { + /* truncate */ + m0->m_pkthdr.len -= extra; + m0->m_len -= extra; + } + } + len = m0->m_pkthdr.len + sizeof(*nd_opt_rh); + m0->m_pkthdr.len = m0->m_len = len - + sizeof(*nd_opt_rh); + } + + nd_opt_rh = (struct nd_opt_rd_hdr *)p; + bzero(nd_opt_rh, sizeof(*nd_opt_rh)); + nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER; + nd_opt_rh->nd_opt_rh_len = len >> 3; + p += sizeof(*nd_opt_rh); + m->m_pkthdr.len = m->m_len = p - (u_char *)ip6; + + /* connect m0 to m */ + m_tag_delete_chain(m0, NULL); + m0->m_flags &= ~M_PKTHDR; + m->m_next = m0; + m->m_pkthdr.len = m->m_len + m0->m_len; + m0 = NULL; + } +noredhdropt:; + if (m0) { + m_freem(m0); + m0 = NULL; + } + + /* XXX: clear embedded link IDs in the inner header */ + in6_clearscope(&sip6->ip6_src); + in6_clearscope(&sip6->ip6_dst); + in6_clearscope(&nd_rd->nd_rd_target); + in6_clearscope(&nd_rd->nd_rd_dst); + + ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); + + nd_rd->nd_rd_cksum = 0; + nd_rd->nd_rd_cksum = in6_cksum(m, IPPROTO_ICMPV6, + sizeof(*ip6), ntohs(ip6->ip6_plen)); + + /* send the packet to outside... */ + ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL); + if (outif) { + icmp6_ifstat_inc(outif, ifs6_out_msg); + icmp6_ifstat_inc(outif, ifs6_out_redirect); + } + ICMP6STAT_INC(icp6s_outhist[ND_REDIRECT]); + + return; + +fail: + if (m) + m_freem(m); + if (m0) + m_freem(m0); +} + +/* + * ICMPv6 socket option processing. + */ +int +icmp6_ctloutput(struct socket *so, struct sockopt *sopt) +{ + int error = 0; + int optlen; + struct inpcb *inp = sotoinpcb(so); + int level, op, optname; + + if (sopt) { + level = sopt->sopt_level; + op = sopt->sopt_dir; + optname = sopt->sopt_name; + optlen = sopt->sopt_valsize; + } else + level = op = optname = optlen = 0; + + if (level != IPPROTO_ICMPV6) { + return EINVAL; + } + + switch (op) { + case PRCO_SETOPT: + switch (optname) { + case ICMP6_FILTER: + { + struct icmp6_filter ic6f; + + if (optlen != sizeof(ic6f)) { + error = EMSGSIZE; + break; + } + error = sooptcopyin(sopt, &ic6f, optlen, optlen); + if (error == 0) { + INP_WLOCK(inp); + *inp->in6p_icmp6filt = ic6f; + INP_WUNLOCK(inp); + } + break; + } + + default: + error = ENOPROTOOPT; + break; + } + break; + + case PRCO_GETOPT: + switch (optname) { + case ICMP6_FILTER: + { + struct icmp6_filter ic6f; + + INP_RLOCK(inp); + ic6f = *inp->in6p_icmp6filt; + INP_RUNLOCK(inp); + error = sooptcopyout(sopt, &ic6f, sizeof(ic6f)); + break; + } + + default: + error = ENOPROTOOPT; + break; + } + break; + } + + return (error); +} + +/* + * Perform rate limit check. + * Returns 0 if it is okay to send the icmp6 packet. + * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate + * limitation. + * + * XXX per-destination/type check necessary? + * + * dst - not used at this moment + * type - not used at this moment + * code - not used at this moment + */ +static int +icmp6_ratelimit(const struct in6_addr *dst, const int type, + const int code) +{ + int ret; + + ret = 0; /* okay to send */ + + /* PPS limit */ + if (!ppsratecheck(&V_icmp6errppslim_last, &V_icmp6errpps_count, + V_icmp6errppslim)) { + /* The packet is subject to rate limit */ + ret++; + } + + return ret; +} diff --git a/freebsd/sys/netinet6/icmp6.h b/freebsd/sys/netinet6/icmp6.h new file mode 100644 index 00000000..a6414efc --- /dev/null +++ b/freebsd/sys/netinet6/icmp6.h @@ -0,0 +1,4 @@ +/* $FreeBSD$ */ +/* $KAME: icmp6.h,v 1.17 2000/06/11 17:23:40 jinmei Exp $ */ + +#error "netinet6/icmp6.h is obsolete. use netinet/icmp6.h" diff --git a/freebsd/sys/netinet6/in6.c b/freebsd/sys/netinet6/in6.c new file mode 100644 index 00000000..826213af --- /dev/null +++ b/freebsd/sys/netinet6/in6.c @@ -0,0 +1,2671 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: in6.c,v 1.259 2002/01/21 11:37:50 keiichi Exp $ + */ + +/*- + * Copyright (c) 1982, 1986, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in.c 8.2 (Berkeley) 11/15/93 + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/local/opt_compat.h> +#include <freebsd/local/opt_inet.h> +#include <freebsd/local/opt_inet6.h> + +#include <freebsd/sys/param.h> +#include <freebsd/sys/errno.h> +#include <freebsd/sys/jail.h> +#include <freebsd/sys/malloc.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/socketvar.h> +#include <freebsd/sys/sockio.h> +#include <freebsd/sys/systm.h> +#include <freebsd/sys/priv.h> +#include <freebsd/sys/proc.h> +#include <freebsd/sys/time.h> +#include <freebsd/sys/kernel.h> +#include <freebsd/sys/syslog.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/if_var.h> +#include <freebsd/net/if_types.h> +#include <freebsd/net/route.h> +#include <freebsd/net/if_dl.h> +#include <freebsd/net/vnet.h> + +#include <freebsd/netinet/in.h> +#include <freebsd/netinet/in_var.h> +#include <freebsd/net/if_llatbl.h> +#include <freebsd/netinet/if_ether.h> +#include <freebsd/netinet/in_systm.h> +#include <freebsd/netinet/ip.h> +#include <freebsd/netinet/in_pcb.h> + +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet6/nd6.h> +#include <freebsd/netinet6/mld6_var.h> +#include <freebsd/netinet6/ip6_mroute.h> +#include <freebsd/netinet6/in6_ifattach.h> +#include <freebsd/netinet6/scope6_var.h> +#include <freebsd/netinet6/in6_pcb.h> + +/* + * Definitions of some costant IP6 addresses. + */ +const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; +const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; +const struct in6_addr in6addr_nodelocal_allnodes = + IN6ADDR_NODELOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_linklocal_allnodes = + IN6ADDR_LINKLOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_linklocal_allrouters = + IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_linklocal_allv2routers = + IN6ADDR_LINKLOCAL_ALLV2ROUTERS_INIT; + +const struct in6_addr in6mask0 = IN6MASK0; +const struct in6_addr in6mask32 = IN6MASK32; +const struct in6_addr in6mask64 = IN6MASK64; +const struct in6_addr in6mask96 = IN6MASK96; +const struct in6_addr in6mask128 = IN6MASK128; + +const struct sockaddr_in6 sa6_any = + { sizeof(sa6_any), AF_INET6, 0, 0, IN6ADDR_ANY_INIT, 0 }; + +static int in6_lifaddr_ioctl __P((struct socket *, u_long, caddr_t, + struct ifnet *, struct thread *)); +static int in6_ifinit __P((struct ifnet *, struct in6_ifaddr *, + struct sockaddr_in6 *, int)); +static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *); + +int (*faithprefix_p)(struct in6_addr *); + + + +int +in6_mask2len(struct in6_addr *mask, u_char *lim0) +{ + int x = 0, y; + u_char *lim = lim0, *p; + + /* ignore the scope_id part */ + if (lim0 == NULL || lim0 - (u_char *)mask > sizeof(*mask)) + lim = (u_char *)mask + sizeof(*mask); + for (p = (u_char *)mask; p < lim; x++, p++) { + if (*p != 0xff) + break; + } + y = 0; + if (p < lim) { + for (y = 0; y < 8; y++) { + if ((*p & (0x80 >> y)) == 0) + break; + } + } + + /* + * when the limit pointer is given, do a stricter check on the + * remaining bits. + */ + if (p < lim) { + if (y != 0 && (*p & (0x00ff >> y)) != 0) + return (-1); + for (p = p + 1; p < lim; p++) + if (*p != 0) + return (-1); + } + + return x * 8 + y; +} + +#define ifa2ia6(ifa) ((struct in6_ifaddr *)(ifa)) +#define ia62ifa(ia6) (&((ia6)->ia_ifa)) + +#ifdef COMPAT_FREEBSD32 +struct in6_ndifreq32 { + char ifname[IFNAMSIZ]; + uint32_t ifindex; +}; +#define SIOCGDEFIFACE32_IN6 _IOWR('i', 86, struct in6_ndifreq32) +#endif + +int +in6_control(struct socket *so, u_long cmd, caddr_t data, + struct ifnet *ifp, struct thread *td) +{ + struct in6_ifreq *ifr = (struct in6_ifreq *)data; + struct in6_ifaddr *ia = NULL; + struct in6_aliasreq *ifra = (struct in6_aliasreq *)data; + struct sockaddr_in6 *sa6; + int error; + + switch (cmd) { + case SIOCGETSGCNT_IN6: + case SIOCGETMIFCNT_IN6: + return (mrt6_ioctl ? mrt6_ioctl(cmd, data) : EOPNOTSUPP); + } + + switch(cmd) { + case SIOCAADDRCTL_POLICY: + case SIOCDADDRCTL_POLICY: + if (td != NULL) { + error = priv_check(td, PRIV_NETINET_ADDRCTRL6); + if (error) + return (error); + } + return (in6_src_ioctl(cmd, data)); + } + + if (ifp == NULL) + return (EOPNOTSUPP); + + switch (cmd) { + case SIOCSNDFLUSH_IN6: + case SIOCSPFXFLUSH_IN6: + case SIOCSRTRFLUSH_IN6: + case SIOCSDEFIFACE_IN6: + case SIOCSIFINFO_FLAGS: + case SIOCSIFINFO_IN6: + if (td != NULL) { + error = priv_check(td, PRIV_NETINET_ND6); + if (error) + return (error); + } + /* FALLTHROUGH */ + case OSIOCGIFINFO_IN6: + case SIOCGIFINFO_IN6: + case SIOCGDRLST_IN6: + case SIOCGPRLST_IN6: + case SIOCGNBRINFO_IN6: + case SIOCGDEFIFACE_IN6: + return (nd6_ioctl(cmd, data, ifp)); + +#ifdef COMPAT_FREEBSD32 + case SIOCGDEFIFACE32_IN6: + { + struct in6_ndifreq ndif; + struct in6_ndifreq32 *ndif32; + + error = nd6_ioctl(SIOCGDEFIFACE_IN6, (caddr_t)&ndif, + ifp); + if (error) + return (error); + ndif32 = (struct in6_ndifreq32 *)data; + ndif32->ifindex = ndif.ifindex; + return (0); + } +#endif + } + + switch (cmd) { + case SIOCSIFPREFIX_IN6: + case SIOCDIFPREFIX_IN6: + case SIOCAIFPREFIX_IN6: + case SIOCCIFPREFIX_IN6: + case SIOCSGIFPREFIX_IN6: + case SIOCGIFPREFIX_IN6: + log(LOG_NOTICE, + "prefix ioctls are now invalidated. " + "please use ifconfig.\n"); + return (EOPNOTSUPP); + } + + switch (cmd) { + case SIOCSSCOPE6: + if (td != NULL) { + error = priv_check(td, PRIV_NETINET_SCOPE6); + if (error) + return (error); + } + return (scope6_set(ifp, + (struct scope6_id *)ifr->ifr_ifru.ifru_scope_id)); + case SIOCGSCOPE6: + return (scope6_get(ifp, + (struct scope6_id *)ifr->ifr_ifru.ifru_scope_id)); + case SIOCGSCOPE6DEF: + return (scope6_get_default((struct scope6_id *) + ifr->ifr_ifru.ifru_scope_id)); + } + + switch (cmd) { + case SIOCALIFADDR: + if (td != NULL) { + error = priv_check(td, PRIV_NET_ADDIFADDR); + if (error) + return (error); + } + return in6_lifaddr_ioctl(so, cmd, data, ifp, td); + + case SIOCDLIFADDR: + if (td != NULL) { + error = priv_check(td, PRIV_NET_DELIFADDR); + if (error) + return (error); + } + /* FALLTHROUGH */ + case SIOCGLIFADDR: + return in6_lifaddr_ioctl(so, cmd, data, ifp, td); + } + + /* + * Find address for this interface, if it exists. + * + * In netinet code, we have checked ifra_addr in SIOCSIF*ADDR operation + * only, and used the first interface address as the target of other + * operations (without checking ifra_addr). This was because netinet + * code/API assumed at most 1 interface address per interface. + * Since IPv6 allows a node to assign multiple addresses + * on a single interface, we almost always look and check the + * presence of ifra_addr, and reject invalid ones here. + * It also decreases duplicated code among SIOC*_IN6 operations. + */ + switch (cmd) { + case SIOCAIFADDR_IN6: + case SIOCSIFPHYADDR_IN6: + sa6 = &ifra->ifra_addr; + break; + case SIOCSIFADDR_IN6: + case SIOCGIFADDR_IN6: + case SIOCSIFDSTADDR_IN6: + case SIOCSIFNETMASK_IN6: + case SIOCGIFDSTADDR_IN6: + case SIOCGIFNETMASK_IN6: + case SIOCDIFADDR_IN6: + case SIOCGIFPSRCADDR_IN6: + case SIOCGIFPDSTADDR_IN6: + case SIOCGIFAFLAG_IN6: + case SIOCSNDFLUSH_IN6: + case SIOCSPFXFLUSH_IN6: + case SIOCSRTRFLUSH_IN6: + case SIOCGIFALIFETIME_IN6: + case SIOCSIFALIFETIME_IN6: + case SIOCGIFSTAT_IN6: + case SIOCGIFSTAT_ICMP6: + sa6 = &ifr->ifr_addr; + break; + default: + sa6 = NULL; + break; + } + if (sa6 && sa6->sin6_family == AF_INET6) { + if (sa6->sin6_scope_id != 0) + error = sa6_embedscope(sa6, 0); + else + error = in6_setscope(&sa6->sin6_addr, ifp, NULL); + if (error != 0) + return (error); + if (td != NULL && (error = prison_check_ip6(td->td_ucred, + &sa6->sin6_addr)) != 0) + return (error); + ia = in6ifa_ifpwithaddr(ifp, &sa6->sin6_addr); + } else + ia = NULL; + + switch (cmd) { + case SIOCSIFADDR_IN6: + case SIOCSIFDSTADDR_IN6: + case SIOCSIFNETMASK_IN6: + /* + * Since IPv6 allows a node to assign multiple addresses + * on a single interface, SIOCSIFxxx ioctls are deprecated. + */ + /* we decided to obsolete this command (20000704) */ + error = EINVAL; + goto out; + + case SIOCDIFADDR_IN6: + /* + * for IPv4, we look for existing in_ifaddr here to allow + * "ifconfig if0 delete" to remove the first IPv4 address on + * the interface. For IPv6, as the spec allows multiple + * interface address from the day one, we consider "remove the + * first one" semantics to be not preferable. + */ + if (ia == NULL) { + error = EADDRNOTAVAIL; + goto out; + } + /* FALLTHROUGH */ + case SIOCAIFADDR_IN6: + /* + * We always require users to specify a valid IPv6 address for + * the corresponding operation. + */ + if (ifra->ifra_addr.sin6_family != AF_INET6 || + ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6)) { + error = EAFNOSUPPORT; + goto out; + } + + if (td != NULL) { + error = priv_check(td, (cmd == SIOCDIFADDR_IN6) ? + PRIV_NET_DELIFADDR : PRIV_NET_ADDIFADDR); + if (error) + goto out; + } + break; + + case SIOCGIFADDR_IN6: + /* This interface is basically deprecated. use SIOCGIFCONF. */ + /* FALLTHROUGH */ + case SIOCGIFAFLAG_IN6: + case SIOCGIFNETMASK_IN6: + case SIOCGIFDSTADDR_IN6: + case SIOCGIFALIFETIME_IN6: + /* must think again about its semantics */ + if (ia == NULL) { + error = EADDRNOTAVAIL; + goto out; + } + break; + + case SIOCSIFALIFETIME_IN6: + { + struct in6_addrlifetime *lt; + + if (td != NULL) { + error = priv_check(td, PRIV_NETINET_ALIFETIME6); + if (error) + goto out; + } + if (ia == NULL) { + error = EADDRNOTAVAIL; + goto out; + } + /* sanity for overflow - beware unsigned */ + lt = &ifr->ifr_ifru.ifru_lifetime; + if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME && + lt->ia6t_vltime + time_second < time_second) { + error = EINVAL; + goto out; + } + if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME && + lt->ia6t_pltime + time_second < time_second) { + error = EINVAL; + goto out; + } + break; + } + } + + switch (cmd) { + case SIOCGIFADDR_IN6: + ifr->ifr_addr = ia->ia_addr; + if ((error = sa6_recoverscope(&ifr->ifr_addr)) != 0) + goto out; + break; + + case SIOCGIFDSTADDR_IN6: + if ((ifp->if_flags & IFF_POINTOPOINT) == 0) { + error = EINVAL; + goto out; + } + /* + * XXX: should we check if ifa_dstaddr is NULL and return + * an error? + */ + ifr->ifr_dstaddr = ia->ia_dstaddr; + if ((error = sa6_recoverscope(&ifr->ifr_dstaddr)) != 0) + goto out; + break; + + case SIOCGIFNETMASK_IN6: + ifr->ifr_addr = ia->ia_prefixmask; + break; + + case SIOCGIFAFLAG_IN6: + ifr->ifr_ifru.ifru_flags6 = ia->ia6_flags; + break; + + case SIOCGIFSTAT_IN6: + if (ifp == NULL) { + error = EINVAL; + goto out; + } + bzero(&ifr->ifr_ifru.ifru_stat, + sizeof(ifr->ifr_ifru.ifru_stat)); + ifr->ifr_ifru.ifru_stat = + *((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->in6_ifstat; + break; + + case SIOCGIFSTAT_ICMP6: + if (ifp == NULL) { + error = EINVAL; + goto out; + } + bzero(&ifr->ifr_ifru.ifru_icmp6stat, + sizeof(ifr->ifr_ifru.ifru_icmp6stat)); + ifr->ifr_ifru.ifru_icmp6stat = + *((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->icmp6_ifstat; + break; + + case SIOCGIFALIFETIME_IN6: + ifr->ifr_ifru.ifru_lifetime = ia->ia6_lifetime; + if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { + time_t maxexpire; + struct in6_addrlifetime *retlt = + &ifr->ifr_ifru.ifru_lifetime; + + /* + * XXX: adjust expiration time assuming time_t is + * signed. + */ + maxexpire = (-1) & + ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1)); + if (ia->ia6_lifetime.ia6t_vltime < + maxexpire - ia->ia6_updatetime) { + retlt->ia6t_expire = ia->ia6_updatetime + + ia->ia6_lifetime.ia6t_vltime; + } else + retlt->ia6t_expire = maxexpire; + } + if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { + time_t maxexpire; + struct in6_addrlifetime *retlt = + &ifr->ifr_ifru.ifru_lifetime; + + /* + * XXX: adjust expiration time assuming time_t is + * signed. + */ + maxexpire = (-1) & + ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1)); + if (ia->ia6_lifetime.ia6t_pltime < + maxexpire - ia->ia6_updatetime) { + retlt->ia6t_preferred = ia->ia6_updatetime + + ia->ia6_lifetime.ia6t_pltime; + } else + retlt->ia6t_preferred = maxexpire; + } + break; + + case SIOCSIFALIFETIME_IN6: + ia->ia6_lifetime = ifr->ifr_ifru.ifru_lifetime; + /* for sanity */ + if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { + ia->ia6_lifetime.ia6t_expire = + time_second + ia->ia6_lifetime.ia6t_vltime; + } else + ia->ia6_lifetime.ia6t_expire = 0; + if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { + ia->ia6_lifetime.ia6t_preferred = + time_second + ia->ia6_lifetime.ia6t_pltime; + } else + ia->ia6_lifetime.ia6t_preferred = 0; + break; + + case SIOCAIFADDR_IN6: + { + int i; + struct nd_prefixctl pr0; + struct nd_prefix *pr; + + /* + * first, make or update the interface address structure, + * and link it to the list. + */ + if ((error = in6_update_ifa(ifp, ifra, ia, 0)) != 0) + goto out; + if (ia != NULL) + ifa_free(&ia->ia_ifa); + if ((ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr)) + == NULL) { + /* + * this can happen when the user specify the 0 valid + * lifetime. + */ + break; + } + + /* + * then, make the prefix on-link on the interface. + * XXX: we'd rather create the prefix before the address, but + * we need at least one address to install the corresponding + * interface route, so we configure the address first. + */ + + /* + * convert mask to prefix length (prefixmask has already + * been validated in in6_update_ifa(). + */ + bzero(&pr0, sizeof(pr0)); + pr0.ndpr_ifp = ifp; + pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr, + NULL); + if (pr0.ndpr_plen == 128) { + break; /* we don't need to install a host route. */ + } + pr0.ndpr_prefix = ifra->ifra_addr; + /* apply the mask for safety. */ + for (i = 0; i < 4; i++) { + pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &= + ifra->ifra_prefixmask.sin6_addr.s6_addr32[i]; + } + /* + * XXX: since we don't have an API to set prefix (not address) + * lifetimes, we just use the same lifetimes as addresses. + * The (temporarily) installed lifetimes can be overridden by + * later advertised RAs (when accept_rtadv is non 0), which is + * an intended behavior. + */ + pr0.ndpr_raf_onlink = 1; /* should be configurable? */ + pr0.ndpr_raf_auto = + ((ifra->ifra_flags & IN6_IFF_AUTOCONF) != 0); + pr0.ndpr_vltime = ifra->ifra_lifetime.ia6t_vltime; + pr0.ndpr_pltime = ifra->ifra_lifetime.ia6t_pltime; + + /* add the prefix if not yet. */ + if ((pr = nd6_prefix_lookup(&pr0)) == NULL) { + /* + * nd6_prelist_add will install the corresponding + * interface route. + */ + if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) + goto out; + if (pr == NULL) { + log(LOG_ERR, "nd6_prelist_add succeeded but " + "no prefix\n"); + error = EINVAL; + goto out; + } + } + + /* relate the address to the prefix */ + if (ia->ia6_ndpr == NULL) { + ia->ia6_ndpr = pr; + pr->ndpr_refcnt++; + + /* + * If this is the first autoconf address from the + * prefix, create a temporary address as well + * (when required). + */ + if ((ia->ia6_flags & IN6_IFF_AUTOCONF) && + V_ip6_use_tempaddr && pr->ndpr_refcnt == 1) { + int e; + if ((e = in6_tmpifadd(ia, 1, 0)) != 0) { + log(LOG_NOTICE, "in6_control: failed " + "to create a temporary address, " + "errno=%d\n", e); + } + } + } + + /* + * this might affect the status of autoconfigured addresses, + * that is, this address might make other addresses detached. + */ + pfxlist_onlink_check(); + if (error == 0 && ia) + EVENTHANDLER_INVOKE(ifaddr_event, ifp); + break; + } + + case SIOCDIFADDR_IN6: + { + struct nd_prefix *pr; + + /* + * If the address being deleted is the only one that owns + * the corresponding prefix, expire the prefix as well. + * XXX: theoretically, we don't have to worry about such + * relationship, since we separate the address management + * and the prefix management. We do this, however, to provide + * as much backward compatibility as possible in terms of + * the ioctl operation. + * Note that in6_purgeaddr() will decrement ndpr_refcnt. + */ + pr = ia->ia6_ndpr; + in6_purgeaddr(&ia->ia_ifa); + if (pr && pr->ndpr_refcnt == 0) + prelist_remove(pr); + EVENTHANDLER_INVOKE(ifaddr_event, ifp); + break; + } + + default: + if (ifp == NULL || ifp->if_ioctl == 0) { + error = EOPNOTSUPP; + goto out; + } + error = (*ifp->if_ioctl)(ifp, cmd, data); + goto out; + } + + error = 0; +out: + if (ia != NULL) + ifa_free(&ia->ia_ifa); + return (error); +} + +/* + * Update parameters of an IPv6 interface address. + * If necessary, a new entry is created and linked into address chains. + * This function is separated from in6_control(). + * XXX: should this be performed under splnet()? + */ +int +in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, + struct in6_ifaddr *ia, int flags) +{ + int error = 0, hostIsNew = 0, plen = -1; + struct sockaddr_in6 dst6; + struct in6_addrlifetime *lt; + struct in6_multi_mship *imm; + struct in6_multi *in6m_sol; + struct rtentry *rt; + int delay; + char ip6buf[INET6_ADDRSTRLEN]; + + /* Validate parameters */ + if (ifp == NULL || ifra == NULL) /* this maybe redundant */ + return (EINVAL); + + /* + * The destination address for a p2p link must have a family + * of AF_UNSPEC or AF_INET6. + */ + if ((ifp->if_flags & IFF_POINTOPOINT) != 0 && + ifra->ifra_dstaddr.sin6_family != AF_INET6 && + ifra->ifra_dstaddr.sin6_family != AF_UNSPEC) + return (EAFNOSUPPORT); + /* + * validate ifra_prefixmask. don't check sin6_family, netmask + * does not carry fields other than sin6_len. + */ + if (ifra->ifra_prefixmask.sin6_len > sizeof(struct sockaddr_in6)) + return (EINVAL); + /* + * Because the IPv6 address architecture is classless, we require + * users to specify a (non 0) prefix length (mask) for a new address. + * We also require the prefix (when specified) mask is valid, and thus + * reject a non-consecutive mask. + */ + if (ia == NULL && ifra->ifra_prefixmask.sin6_len == 0) + return (EINVAL); + if (ifra->ifra_prefixmask.sin6_len != 0) { + plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr, + (u_char *)&ifra->ifra_prefixmask + + ifra->ifra_prefixmask.sin6_len); + if (plen <= 0) + return (EINVAL); + } else { + /* + * In this case, ia must not be NULL. We just use its prefix + * length. + */ + plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); + } + /* + * If the destination address on a p2p interface is specified, + * and the address is a scoped one, validate/set the scope + * zone identifier. + */ + dst6 = ifra->ifra_dstaddr; + if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) != 0 && + (dst6.sin6_family == AF_INET6)) { + struct in6_addr in6_tmp; + u_int32_t zoneid; + + in6_tmp = dst6.sin6_addr; + if (in6_setscope(&in6_tmp, ifp, &zoneid)) + return (EINVAL); /* XXX: should be impossible */ + + if (dst6.sin6_scope_id != 0) { + if (dst6.sin6_scope_id != zoneid) + return (EINVAL); + } else /* user omit to specify the ID. */ + dst6.sin6_scope_id = zoneid; + + /* convert into the internal form */ + if (sa6_embedscope(&dst6, 0)) + return (EINVAL); /* XXX: should be impossible */ + } + /* + * The destination address can be specified only for a p2p or a + * loopback interface. If specified, the corresponding prefix length + * must be 128. + */ + if (ifra->ifra_dstaddr.sin6_family == AF_INET6) { + if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) == 0) { + /* XXX: noisy message */ + nd6log((LOG_INFO, "in6_update_ifa: a destination can " + "be specified for a p2p or a loopback IF only\n")); + return (EINVAL); + } + if (plen != 128) { + nd6log((LOG_INFO, "in6_update_ifa: prefixlen should " + "be 128 when dstaddr is specified\n")); + return (EINVAL); + } + } + /* lifetime consistency check */ + lt = &ifra->ifra_lifetime; + if (lt->ia6t_pltime > lt->ia6t_vltime) + return (EINVAL); + if (lt->ia6t_vltime == 0) { + /* + * the following log might be noisy, but this is a typical + * configuration mistake or a tool's bug. + */ + nd6log((LOG_INFO, + "in6_update_ifa: valid lifetime is 0 for %s\n", + ip6_sprintf(ip6buf, &ifra->ifra_addr.sin6_addr))); + + if (ia == NULL) + return (0); /* there's nothing to do */ + } + + /* + * If this is a new address, allocate a new ifaddr and link it + * into chains. + */ + if (ia == NULL) { + hostIsNew = 1; + /* + * When in6_update_ifa() is called in a process of a received + * RA, it is called under an interrupt context. So, we should + * call malloc with M_NOWAIT. + */ + ia = (struct in6_ifaddr *) malloc(sizeof(*ia), M_IFADDR, + M_NOWAIT); + if (ia == NULL) + return (ENOBUFS); + bzero((caddr_t)ia, sizeof(*ia)); + ifa_init(&ia->ia_ifa); + LIST_INIT(&ia->ia6_memberships); + /* Initialize the address and masks, and put time stamp */ + ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr; + ia->ia_addr.sin6_family = AF_INET6; + ia->ia_addr.sin6_len = sizeof(ia->ia_addr); + ia->ia6_createtime = time_second; + if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) { + /* + * XXX: some functions expect that ifa_dstaddr is not + * NULL for p2p interfaces. + */ + ia->ia_ifa.ifa_dstaddr = + (struct sockaddr *)&ia->ia_dstaddr; + } else { + ia->ia_ifa.ifa_dstaddr = NULL; + } + ia->ia_ifa.ifa_netmask = (struct sockaddr *)&ia->ia_prefixmask; + ia->ia_ifp = ifp; + ifa_ref(&ia->ia_ifa); /* if_addrhead */ + IF_ADDR_LOCK(ifp); + TAILQ_INSERT_TAIL(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); + IF_ADDR_UNLOCK(ifp); + + ifa_ref(&ia->ia_ifa); /* in6_ifaddrhead */ + IN6_IFADDR_WLOCK(); + TAILQ_INSERT_TAIL(&V_in6_ifaddrhead, ia, ia_link); + IN6_IFADDR_WUNLOCK(); + } + + /* update timestamp */ + ia->ia6_updatetime = time_second; + + /* set prefix mask */ + if (ifra->ifra_prefixmask.sin6_len) { + /* + * We prohibit changing the prefix length of an existing + * address, because + * + such an operation should be rare in IPv6, and + * + the operation would confuse prefix management. + */ + if (ia->ia_prefixmask.sin6_len && + in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL) != plen) { + nd6log((LOG_INFO, "in6_update_ifa: the prefix length of an" + " existing (%s) address should not be changed\n", + ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); + error = EINVAL; + goto unlink; + } + ia->ia_prefixmask = ifra->ifra_prefixmask; + } + + /* + * If a new destination address is specified, scrub the old one and + * install the new destination. Note that the interface must be + * p2p or loopback (see the check above.) + */ + if (dst6.sin6_family == AF_INET6 && + !IN6_ARE_ADDR_EQUAL(&dst6.sin6_addr, &ia->ia_dstaddr.sin6_addr)) { + int e; + + if ((ia->ia_flags & IFA_ROUTE) != 0 && + (e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST)) != 0) { + nd6log((LOG_ERR, "in6_update_ifa: failed to remove " + "a route to the old destination: %s\n", + ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); + /* proceed anyway... */ + } else + ia->ia_flags &= ~IFA_ROUTE; + ia->ia_dstaddr = dst6; + } + + /* + * Set lifetimes. We do not refer to ia6t_expire and ia6t_preferred + * to see if the address is deprecated or invalidated, but initialize + * these members for applications. + */ + ia->ia6_lifetime = ifra->ifra_lifetime; + if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { + ia->ia6_lifetime.ia6t_expire = + time_second + ia->ia6_lifetime.ia6t_vltime; + } else + ia->ia6_lifetime.ia6t_expire = 0; + if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { + ia->ia6_lifetime.ia6t_preferred = + time_second + ia->ia6_lifetime.ia6t_pltime; + } else + ia->ia6_lifetime.ia6t_preferred = 0; + + /* reset the interface and routing table appropriately. */ + if ((error = in6_ifinit(ifp, ia, &ifra->ifra_addr, hostIsNew)) != 0) + goto unlink; + + /* + * configure address flags. + */ + ia->ia6_flags = ifra->ifra_flags; + /* + * backward compatibility - if IN6_IFF_DEPRECATED is set from the + * userland, make it deprecated. + */ + if ((ifra->ifra_flags & IN6_IFF_DEPRECATED) != 0) { + ia->ia6_lifetime.ia6t_pltime = 0; + ia->ia6_lifetime.ia6t_preferred = time_second; + } + /* + * Make the address tentative before joining multicast addresses, + * so that corresponding MLD responses would not have a tentative + * source address. + */ + ia->ia6_flags &= ~IN6_IFF_DUPLICATED; /* safety */ + if (hostIsNew && in6if_do_dad(ifp)) + ia->ia6_flags |= IN6_IFF_TENTATIVE; + + /* + * We are done if we have simply modified an existing address. + */ + if (!hostIsNew) + return (error); + + /* + * Beyond this point, we should call in6_purgeaddr upon an error, + * not just go to unlink. + */ + + /* Join necessary multicast groups */ + in6m_sol = NULL; + if ((ifp->if_flags & IFF_MULTICAST) != 0) { + struct sockaddr_in6 mltaddr, mltmask; + struct in6_addr llsol; + + /* join solicited multicast addr for new host id */ + bzero(&llsol, sizeof(struct in6_addr)); + llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL; + llsol.s6_addr32[1] = 0; + llsol.s6_addr32[2] = htonl(1); + llsol.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3]; + llsol.s6_addr8[12] = 0xff; + if ((error = in6_setscope(&llsol, ifp, NULL)) != 0) { + /* XXX: should not happen */ + log(LOG_ERR, "in6_update_ifa: " + "in6_setscope failed\n"); + goto cleanup; + } + delay = 0; + if ((flags & IN6_IFAUPDATE_DADDELAY)) { + /* + * We need a random delay for DAD on the address + * being configured. It also means delaying + * transmission of the corresponding MLD report to + * avoid report collision. + * [draft-ietf-ipv6-rfc2462bis-02.txt] + */ + delay = arc4random() % + (MAX_RTR_SOLICITATION_DELAY * hz); + } + imm = in6_joingroup(ifp, &llsol, &error, delay); + if (imm == NULL) { + nd6log((LOG_WARNING, + "in6_update_ifa: addmulti failed for " + "%s on %s (errno=%d)\n", + ip6_sprintf(ip6buf, &llsol), if_name(ifp), + error)); + goto cleanup; + } + LIST_INSERT_HEAD(&ia->ia6_memberships, + imm, i6mm_chain); + in6m_sol = imm->i6mm_maddr; + + bzero(&mltmask, sizeof(mltmask)); + mltmask.sin6_len = sizeof(struct sockaddr_in6); + mltmask.sin6_family = AF_INET6; + mltmask.sin6_addr = in6mask32; +#define MLTMASK_LEN 4 /* mltmask's masklen (=32bit=4octet) */ + + /* + * join link-local all-nodes address + */ + bzero(&mltaddr, sizeof(mltaddr)); + mltaddr.sin6_len = sizeof(struct sockaddr_in6); + mltaddr.sin6_family = AF_INET6; + mltaddr.sin6_addr = in6addr_linklocal_allnodes; + if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != + 0) + goto cleanup; /* XXX: should not fail */ + + /* + * XXX: do we really need this automatic routes? + * We should probably reconsider this stuff. Most applications + * actually do not need the routes, since they usually specify + * the outgoing interface. + */ + rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL); + if (rt) { + /* XXX: only works in !SCOPEDROUTING case. */ + if (memcmp(&mltaddr.sin6_addr, + &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, + MLTMASK_LEN)) { + RTFREE_LOCKED(rt); + rt = NULL; + } + } + if (!rt) { + error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, + (struct sockaddr *)&ia->ia_addr, + (struct sockaddr *)&mltmask, RTF_UP, + (struct rtentry **)0); + if (error) + goto cleanup; + } else { + RTFREE_LOCKED(rt); + } + + imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0); + if (!imm) { + nd6log((LOG_WARNING, + "in6_update_ifa: addmulti failed for " + "%s on %s (errno=%d)\n", + ip6_sprintf(ip6buf, &mltaddr.sin6_addr), + if_name(ifp), error)); + goto cleanup; + } + LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); + + /* + * join node information group address + */ + delay = 0; + if ((flags & IN6_IFAUPDATE_DADDELAY)) { + /* + * The spec doesn't say anything about delay for this + * group, but the same logic should apply. + */ + delay = arc4random() % + (MAX_RTR_SOLICITATION_DELAY * hz); + } + if (in6_nigroup(ifp, NULL, -1, &mltaddr.sin6_addr) == 0) { + imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, + delay); /* XXX jinmei */ + if (!imm) { + nd6log((LOG_WARNING, "in6_update_ifa: " + "addmulti failed for %s on %s " + "(errno=%d)\n", + ip6_sprintf(ip6buf, &mltaddr.sin6_addr), + if_name(ifp), error)); + /* XXX not very fatal, go on... */ + } else { + LIST_INSERT_HEAD(&ia->ia6_memberships, + imm, i6mm_chain); + } + } + + /* + * join interface-local all-nodes address. + * (ff01::1%ifN, and ff01::%ifN/32) + */ + mltaddr.sin6_addr = in6addr_nodelocal_allnodes; + if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) + != 0) + goto cleanup; /* XXX: should not fail */ + /* XXX: again, do we really need the route? */ + rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL); + if (rt) { + if (memcmp(&mltaddr.sin6_addr, + &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, + MLTMASK_LEN)) { + RTFREE_LOCKED(rt); + rt = NULL; + } + } + if (!rt) { + error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, + (struct sockaddr *)&ia->ia_addr, + (struct sockaddr *)&mltmask, RTF_UP, + (struct rtentry **)0); + if (error) + goto cleanup; + } else + RTFREE_LOCKED(rt); + + imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0); + if (!imm) { + nd6log((LOG_WARNING, "in6_update_ifa: " + "addmulti failed for %s on %s " + "(errno=%d)\n", + ip6_sprintf(ip6buf, &mltaddr.sin6_addr), + if_name(ifp), error)); + goto cleanup; + } + LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); +#undef MLTMASK_LEN + } + + /* + * Perform DAD, if needed. + * XXX It may be of use, if we can administratively + * disable DAD. + */ + if (in6if_do_dad(ifp) && ((ifra->ifra_flags & IN6_IFF_NODAD) == 0) && + (ia->ia6_flags & IN6_IFF_TENTATIVE)) + { + int mindelay, maxdelay; + + delay = 0; + if ((flags & IN6_IFAUPDATE_DADDELAY)) { + /* + * We need to impose a delay before sending an NS + * for DAD. Check if we also needed a delay for the + * corresponding MLD message. If we did, the delay + * should be larger than the MLD delay (this could be + * relaxed a bit, but this simple logic is at least + * safe). + * XXX: Break data hiding guidelines and look at + * state for the solicited multicast group. + */ + mindelay = 0; + if (in6m_sol != NULL && + in6m_sol->in6m_state == MLD_REPORTING_MEMBER) { + mindelay = in6m_sol->in6m_timer; + } + maxdelay = MAX_RTR_SOLICITATION_DELAY * hz; + if (maxdelay - mindelay == 0) + delay = 0; + else { + delay = + (arc4random() % (maxdelay - mindelay)) + + mindelay; + } + } + nd6_dad_start((struct ifaddr *)ia, delay); + } + + KASSERT(hostIsNew, ("in6_update_ifa: !hostIsNew")); + ifa_free(&ia->ia_ifa); + return (error); + + unlink: + /* + * XXX: if a change of an existing address failed, keep the entry + * anyway. + */ + if (hostIsNew) { + in6_unlink_ifa(ia, ifp); + ifa_free(&ia->ia_ifa); + } + return (error); + + cleanup: + KASSERT(hostIsNew, ("in6_update_ifa: cleanup: !hostIsNew")); + ifa_free(&ia->ia_ifa); + in6_purgeaddr(&ia->ia_ifa); + return error; +} + +void +in6_purgeaddr(struct ifaddr *ifa) +{ + struct ifnet *ifp = ifa->ifa_ifp; + struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa; + struct in6_multi_mship *imm; + struct sockaddr_in6 mltaddr, mltmask; + struct rtentry rt0; + struct sockaddr_dl gateway; + struct sockaddr_in6 mask, addr; + int plen, error; + struct rtentry *rt; + struct ifaddr *ifa0, *nifa; + + /* + * find another IPv6 address as the gateway for the + * link-local and node-local all-nodes multicast + * address routes + */ + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH_SAFE(ifa0, &ifp->if_addrhead, ifa_link, nifa) { + if ((ifa0->ifa_addr->sa_family != AF_INET6) || + memcmp(&satosin6(ifa0->ifa_addr)->sin6_addr, + &ia->ia_addr.sin6_addr, + sizeof(struct in6_addr)) == 0) + continue; + else + break; + } + if (ifa0 != NULL) + ifa_ref(ifa0); + IF_ADDR_UNLOCK(ifp); + + /* + * Remove the loopback route to the interface address. + * The check for the current setting of "nd6_useloopback" + * is not needed. + */ + if (ia->ia_flags & IFA_RTSELF) { + error = ifa_del_loopback_route((struct ifaddr *)ia, + (struct sockaddr *)&ia->ia_addr); + if (error == 0) + ia->ia_flags &= ~IFA_RTSELF; + } + + /* stop DAD processing */ + nd6_dad_stop(ifa); + + IF_AFDATA_LOCK(ifp); + lla_lookup(LLTABLE6(ifp), (LLE_DELETE | LLE_IFADDR), + (struct sockaddr *)&ia->ia_addr); + IF_AFDATA_UNLOCK(ifp); + + /* + * initialize for rtmsg generation + */ + bzero(&gateway, sizeof(gateway)); + gateway.sdl_len = sizeof(gateway); + gateway.sdl_family = AF_LINK; + gateway.sdl_nlen = 0; + gateway.sdl_alen = ifp->if_addrlen; + /* */ + bzero(&rt0, sizeof(rt0)); + rt0.rt_gateway = (struct sockaddr *)&gateway; + memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask)); + memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr)); + rt_mask(&rt0) = (struct sockaddr *)&mask; + rt_key(&rt0) = (struct sockaddr *)&addr; + rt0.rt_flags = RTF_HOST | RTF_STATIC; + rt_newaddrmsg(RTM_DELETE, ifa, 0, &rt0); + + /* + * leave from multicast groups we have joined for the interface + */ + while ((imm = ia->ia6_memberships.lh_first) != NULL) { + LIST_REMOVE(imm, i6mm_chain); + in6_leavegroup(imm); + } + + /* + * remove the link-local all-nodes address + */ + bzero(&mltmask, sizeof(mltmask)); + mltmask.sin6_len = sizeof(struct sockaddr_in6); + mltmask.sin6_family = AF_INET6; + mltmask.sin6_addr = in6mask32; + + bzero(&mltaddr, sizeof(mltaddr)); + mltaddr.sin6_len = sizeof(struct sockaddr_in6); + mltaddr.sin6_family = AF_INET6; + mltaddr.sin6_addr = in6addr_linklocal_allnodes; + + if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != + 0) + goto cleanup; + + rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL); + if (rt != NULL && rt->rt_gateway != NULL && + (memcmp(&satosin6(rt->rt_gateway)->sin6_addr, + &ia->ia_addr.sin6_addr, + sizeof(ia->ia_addr.sin6_addr)) == 0)) { + /* + * if no more IPv6 address exists on this interface + * then remove the multicast address route + */ + if (ifa0 == NULL) { + memcpy(&mltaddr.sin6_addr, &satosin6(rt_key(rt))->sin6_addr, + sizeof(mltaddr.sin6_addr)); + RTFREE_LOCKED(rt); + error = rtrequest(RTM_DELETE, (struct sockaddr *)&mltaddr, + (struct sockaddr *)&ia->ia_addr, + (struct sockaddr *)&mltmask, RTF_UP, + (struct rtentry **)0); + if (error) + log(LOG_INFO, "in6_purgeaddr: link-local all-nodes" + "multicast address deletion error\n"); + } else { + /* + * replace the gateway of the route + */ + struct sockaddr_in6 sa; + + bzero(&sa, sizeof(sa)); + sa.sin6_len = sizeof(struct sockaddr_in6); + sa.sin6_family = AF_INET6; + memcpy(&sa.sin6_addr, &satosin6(ifa0->ifa_addr)->sin6_addr, + sizeof(sa.sin6_addr)); + in6_setscope(&sa.sin6_addr, ifa0->ifa_ifp, NULL); + memcpy(rt->rt_gateway, &sa, sizeof(sa)); + RTFREE_LOCKED(rt); + } + } else { + if (rt != NULL) + RTFREE_LOCKED(rt); + } + + /* + * remove the node-local all-nodes address + */ + mltaddr.sin6_addr = in6addr_nodelocal_allnodes; + if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != + 0) + goto cleanup; + + rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL); + if (rt != NULL && rt->rt_gateway != NULL && + (memcmp(&satosin6(rt->rt_gateway)->sin6_addr, + &ia->ia_addr.sin6_addr, + sizeof(ia->ia_addr.sin6_addr)) == 0)) { + /* + * if no more IPv6 address exists on this interface + * then remove the multicast address route + */ + if (ifa0 == NULL) { + memcpy(&mltaddr.sin6_addr, &satosin6(rt_key(rt))->sin6_addr, + sizeof(mltaddr.sin6_addr)); + + RTFREE_LOCKED(rt); + error = rtrequest(RTM_DELETE, (struct sockaddr *)&mltaddr, + (struct sockaddr *)&ia->ia_addr, + (struct sockaddr *)&mltmask, RTF_UP, + (struct rtentry **)0); + + if (error) + log(LOG_INFO, "in6_purgeaddr: node-local all-nodes" + "multicast address deletion error\n"); + } else { + /* + * replace the gateway of the route + */ + struct sockaddr_in6 sa; + + bzero(&sa, sizeof(sa)); + sa.sin6_len = sizeof(struct sockaddr_in6); + sa.sin6_family = AF_INET6; + memcpy(&sa.sin6_addr, &satosin6(ifa0->ifa_addr)->sin6_addr, + sizeof(sa.sin6_addr)); + in6_setscope(&sa.sin6_addr, ifa0->ifa_ifp, NULL); + memcpy(rt->rt_gateway, &sa, sizeof(sa)); + RTFREE_LOCKED(rt); + } + } else { + if (rt != NULL) + RTFREE_LOCKED(rt); + } + +cleanup: + + plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */ + if ((ia->ia_flags & IFA_ROUTE) && plen == 128) { + int error; + struct sockaddr *dstaddr; + + /* + * use the interface address if configuring an + * interface address with a /128 prefix len + */ + if (ia->ia_dstaddr.sin6_family == AF_INET6) + dstaddr = (struct sockaddr *)&ia->ia_dstaddr; + else + dstaddr = (struct sockaddr *)&ia->ia_addr; + + error = rtrequest(RTM_DELETE, + (struct sockaddr *)dstaddr, + (struct sockaddr *)&ia->ia_addr, + (struct sockaddr *)&ia->ia_prefixmask, + ia->ia_flags | RTF_HOST, NULL); + if (error != 0) + return; + ia->ia_flags &= ~IFA_ROUTE; + } + if (ifa0 != NULL) + ifa_free(ifa0); + + in6_unlink_ifa(ia, ifp); +} + +static void +in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp) +{ + int s = splnet(); + + IF_ADDR_LOCK(ifp); + TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); + IF_ADDR_UNLOCK(ifp); + ifa_free(&ia->ia_ifa); /* if_addrhead */ + + /* + * Defer the release of what might be the last reference to the + * in6_ifaddr so that it can't be freed before the remainder of the + * cleanup. + */ + IN6_IFADDR_WLOCK(); + TAILQ_REMOVE(&V_in6_ifaddrhead, ia, ia_link); + IN6_IFADDR_WUNLOCK(); + + /* + * Release the reference to the base prefix. There should be a + * positive reference. + */ + if (ia->ia6_ndpr == NULL) { + nd6log((LOG_NOTICE, + "in6_unlink_ifa: autoconf'ed address " + "%p has no prefix\n", ia)); + } else { + ia->ia6_ndpr->ndpr_refcnt--; + ia->ia6_ndpr = NULL; + } + + /* + * Also, if the address being removed is autoconf'ed, call + * pfxlist_onlink_check() since the release might affect the status of + * other (detached) addresses. + */ + if ((ia->ia6_flags & IN6_IFF_AUTOCONF)) { + pfxlist_onlink_check(); + } + ifa_free(&ia->ia_ifa); /* in6_ifaddrhead */ + splx(s); +} + +void +in6_purgeif(struct ifnet *ifp) +{ + struct ifaddr *ifa, *nifa; + + TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + in6_purgeaddr(ifa); + } + + in6_ifdetach(ifp); +} + +/* + * SIOC[GAD]LIFADDR. + * SIOCGLIFADDR: get first address. (?) + * SIOCGLIFADDR with IFLR_PREFIX: + * get first address that matches the specified prefix. + * SIOCALIFADDR: add the specified address. + * SIOCALIFADDR with IFLR_PREFIX: + * add the specified prefix, filling hostid part from + * the first link-local address. prefixlen must be <= 64. + * SIOCDLIFADDR: delete the specified address. + * SIOCDLIFADDR with IFLR_PREFIX: + * delete the first address that matches the specified prefix. + * return values: + * EINVAL on invalid parameters + * EADDRNOTAVAIL on prefix match failed/specified address not found + * other values may be returned from in6_ioctl() + * + * NOTE: SIOCALIFADDR(with IFLR_PREFIX set) allows prefixlen less than 64. + * this is to accomodate address naming scheme other than RFC2374, + * in the future. + * RFC2373 defines interface id to be 64bit, but it allows non-RFC2374 + * address encoding scheme. (see figure on page 8) + */ +static int +in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data, + struct ifnet *ifp, struct thread *td) +{ + struct if_laddrreq *iflr = (struct if_laddrreq *)data; + struct ifaddr *ifa; + struct sockaddr *sa; + + /* sanity checks */ + if (!data || !ifp) { + panic("invalid argument to in6_lifaddr_ioctl"); + /* NOTREACHED */ + } + + switch (cmd) { + case SIOCGLIFADDR: + /* address must be specified on GET with IFLR_PREFIX */ + if ((iflr->flags & IFLR_PREFIX) == 0) + break; + /* FALLTHROUGH */ + case SIOCALIFADDR: + case SIOCDLIFADDR: + /* address must be specified on ADD and DELETE */ + sa = (struct sockaddr *)&iflr->addr; + if (sa->sa_family != AF_INET6) + return EINVAL; + if (sa->sa_len != sizeof(struct sockaddr_in6)) + return EINVAL; + /* XXX need improvement */ + sa = (struct sockaddr *)&iflr->dstaddr; + if (sa->sa_family && sa->sa_family != AF_INET6) + return EINVAL; + if (sa->sa_len && sa->sa_len != sizeof(struct sockaddr_in6)) + return EINVAL; + break; + default: /* shouldn't happen */ +#if 0 + panic("invalid cmd to in6_lifaddr_ioctl"); + /* NOTREACHED */ +#else + return EOPNOTSUPP; +#endif + } + if (sizeof(struct in6_addr) * 8 < iflr->prefixlen) + return EINVAL; + + switch (cmd) { + case SIOCALIFADDR: + { + struct in6_aliasreq ifra; + struct in6_addr *hostid = NULL; + int prefixlen; + + ifa = NULL; + if ((iflr->flags & IFLR_PREFIX) != 0) { + struct sockaddr_in6 *sin6; + + /* + * hostid is to fill in the hostid part of the + * address. hostid points to the first link-local + * address attached to the interface. + */ + ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); + if (!ifa) + return EADDRNOTAVAIL; + hostid = IFA_IN6(ifa); + + /* prefixlen must be <= 64. */ + if (64 < iflr->prefixlen) + return EINVAL; + prefixlen = iflr->prefixlen; + + /* hostid part must be zero. */ + sin6 = (struct sockaddr_in6 *)&iflr->addr; + if (sin6->sin6_addr.s6_addr32[2] != 0 || + sin6->sin6_addr.s6_addr32[3] != 0) { + return EINVAL; + } + } else + prefixlen = iflr->prefixlen; + + /* copy args to in6_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */ + bzero(&ifra, sizeof(ifra)); + bcopy(iflr->iflr_name, ifra.ifra_name, sizeof(ifra.ifra_name)); + + bcopy(&iflr->addr, &ifra.ifra_addr, + ((struct sockaddr *)&iflr->addr)->sa_len); + if (hostid) { + /* fill in hostid part */ + ifra.ifra_addr.sin6_addr.s6_addr32[2] = + hostid->s6_addr32[2]; + ifra.ifra_addr.sin6_addr.s6_addr32[3] = + hostid->s6_addr32[3]; + } + + if (((struct sockaddr *)&iflr->dstaddr)->sa_family) { /* XXX */ + bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr, + ((struct sockaddr *)&iflr->dstaddr)->sa_len); + if (hostid) { + ifra.ifra_dstaddr.sin6_addr.s6_addr32[2] = + hostid->s6_addr32[2]; + ifra.ifra_dstaddr.sin6_addr.s6_addr32[3] = + hostid->s6_addr32[3]; + } + } + if (ifa != NULL) + ifa_free(ifa); + + ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); + in6_prefixlen2mask(&ifra.ifra_prefixmask.sin6_addr, prefixlen); + + ifra.ifra_flags = iflr->flags & ~IFLR_PREFIX; + return in6_control(so, SIOCAIFADDR_IN6, (caddr_t)&ifra, ifp, td); + } + case SIOCGLIFADDR: + case SIOCDLIFADDR: + { + struct in6_ifaddr *ia; + struct in6_addr mask, candidate, match; + struct sockaddr_in6 *sin6; + int cmp; + + bzero(&mask, sizeof(mask)); + if (iflr->flags & IFLR_PREFIX) { + /* lookup a prefix rather than address. */ + in6_prefixlen2mask(&mask, iflr->prefixlen); + + sin6 = (struct sockaddr_in6 *)&iflr->addr; + bcopy(&sin6->sin6_addr, &match, sizeof(match)); + match.s6_addr32[0] &= mask.s6_addr32[0]; + match.s6_addr32[1] &= mask.s6_addr32[1]; + match.s6_addr32[2] &= mask.s6_addr32[2]; + match.s6_addr32[3] &= mask.s6_addr32[3]; + + /* if you set extra bits, that's wrong */ + if (bcmp(&match, &sin6->sin6_addr, sizeof(match))) + return EINVAL; + + cmp = 1; + } else { + if (cmd == SIOCGLIFADDR) { + /* on getting an address, take the 1st match */ + cmp = 0; /* XXX */ + } else { + /* on deleting an address, do exact match */ + in6_prefixlen2mask(&mask, 128); + sin6 = (struct sockaddr_in6 *)&iflr->addr; + bcopy(&sin6->sin6_addr, &match, sizeof(match)); + + cmp = 1; + } + } + + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + if (!cmp) + break; + + /* + * XXX: this is adhoc, but is necessary to allow + * a user to specify fe80::/64 (not /10) for a + * link-local address. + */ + bcopy(IFA_IN6(ifa), &candidate, sizeof(candidate)); + in6_clearscope(&candidate); + candidate.s6_addr32[0] &= mask.s6_addr32[0]; + candidate.s6_addr32[1] &= mask.s6_addr32[1]; + candidate.s6_addr32[2] &= mask.s6_addr32[2]; + candidate.s6_addr32[3] &= mask.s6_addr32[3]; + if (IN6_ARE_ADDR_EQUAL(&candidate, &match)) + break; + } + IF_ADDR_UNLOCK(ifp); + if (!ifa) + return EADDRNOTAVAIL; + ia = ifa2ia6(ifa); + + if (cmd == SIOCGLIFADDR) { + int error; + + /* fill in the if_laddrreq structure */ + bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin6_len); + error = sa6_recoverscope( + (struct sockaddr_in6 *)&iflr->addr); + if (error != 0) + return (error); + + if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { + bcopy(&ia->ia_dstaddr, &iflr->dstaddr, + ia->ia_dstaddr.sin6_len); + error = sa6_recoverscope( + (struct sockaddr_in6 *)&iflr->dstaddr); + if (error != 0) + return (error); + } else + bzero(&iflr->dstaddr, sizeof(iflr->dstaddr)); + + iflr->prefixlen = + in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); + + iflr->flags = ia->ia6_flags; /* XXX */ + + return 0; + } else { + struct in6_aliasreq ifra; + + /* fill in6_aliasreq and do ioctl(SIOCDIFADDR_IN6) */ + bzero(&ifra, sizeof(ifra)); + bcopy(iflr->iflr_name, ifra.ifra_name, + sizeof(ifra.ifra_name)); + + bcopy(&ia->ia_addr, &ifra.ifra_addr, + ia->ia_addr.sin6_len); + if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { + bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr, + ia->ia_dstaddr.sin6_len); + } else { + bzero(&ifra.ifra_dstaddr, + sizeof(ifra.ifra_dstaddr)); + } + bcopy(&ia->ia_prefixmask, &ifra.ifra_dstaddr, + ia->ia_prefixmask.sin6_len); + + ifra.ifra_flags = ia->ia6_flags; + return in6_control(so, SIOCDIFADDR_IN6, (caddr_t)&ifra, + ifp, td); + } + } + } + + return EOPNOTSUPP; /* just for safety */ +} + +/* + * Initialize an interface's intetnet6 address + * and routing table entry. + */ +static int +in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, + struct sockaddr_in6 *sin6, int newhost) +{ + int error = 0, plen, ifacount = 0; + int s = splimp(); + struct ifaddr *ifa; + + /* + * Give the interface a chance to initialize + * if this is its first address, + * and to validate the address if necessary. + */ + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + ifacount++; + } + IF_ADDR_UNLOCK(ifp); + + ia->ia_addr = *sin6; + + if (ifacount <= 1 && ifp->if_ioctl) { + error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia); + if (error) { + splx(s); + return (error); + } + } + splx(s); + + ia->ia_ifa.ifa_metric = ifp->if_metric; + + /* we could do in(6)_socktrim here, but just omit it at this moment. */ + + /* + * Special case: + * If a new destination address is specified for a point-to-point + * interface, install a route to the destination as an interface + * direct route. + * XXX: the logic below rejects assigning multiple addresses on a p2p + * interface that share the same destination. + */ + plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */ + if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 && + ia->ia_dstaddr.sin6_family == AF_INET6) { + int rtflags = RTF_UP | RTF_HOST; + + error = rtrequest(RTM_ADD, + (struct sockaddr *)&ia->ia_dstaddr, + (struct sockaddr *)&ia->ia_addr, + (struct sockaddr *)&ia->ia_prefixmask, + ia->ia_flags | rtflags, NULL); + if (error != 0) + return (error); + ia->ia_flags |= IFA_ROUTE; + } + + /* + * add a loopback route to self + */ + if (!(ia->ia_flags & IFA_ROUTE) + && (V_nd6_useloopback + || (ifp->if_flags & IFF_LOOPBACK))) { + error = ifa_add_loopback_route((struct ifaddr *)ia, + (struct sockaddr *)&ia->ia_addr); + if (error == 0) + ia->ia_flags |= IFA_RTSELF; + } + + /* Add ownaddr as loopback rtentry, if necessary (ex. on p2p link). */ + if (newhost) { + struct llentry *ln; + struct rtentry rt; + struct sockaddr_dl gateway; + struct sockaddr_in6 mask, addr; + + IF_AFDATA_LOCK(ifp); + ia->ia_ifa.ifa_rtrequest = NULL; + + /* XXX QL + * we need to report rt_newaddrmsg + */ + ln = lla_lookup(LLTABLE6(ifp), (LLE_CREATE | LLE_IFADDR | LLE_EXCLUSIVE), + (struct sockaddr *)&ia->ia_addr); + IF_AFDATA_UNLOCK(ifp); + if (ln != NULL) { + ln->la_expire = 0; /* for IPv6 this means permanent */ + ln->ln_state = ND6_LLINFO_REACHABLE; + /* + * initialize for rtmsg generation + */ + bzero(&gateway, sizeof(gateway)); + gateway.sdl_len = sizeof(gateway); + gateway.sdl_family = AF_LINK; + gateway.sdl_nlen = 0; + gateway.sdl_alen = 6; + memcpy(gateway.sdl_data, &ln->ll_addr.mac_aligned, sizeof(ln->ll_addr)); + /* */ + LLE_WUNLOCK(ln); + } + + bzero(&rt, sizeof(rt)); + rt.rt_gateway = (struct sockaddr *)&gateway; + memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask)); + memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr)); + rt_mask(&rt) = (struct sockaddr *)&mask; + rt_key(&rt) = (struct sockaddr *)&addr; + rt.rt_flags = RTF_UP | RTF_HOST | RTF_STATIC; + rt_newaddrmsg(RTM_ADD, &ia->ia_ifa, 0, &rt); + } + + return (error); +} + +/* + * Find an IPv6 interface link-local address specific to an interface. + * ifaddr is returned referenced. + */ +struct in6_ifaddr * +in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags) +{ + struct ifaddr *ifa; + + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa))) { + if ((((struct in6_ifaddr *)ifa)->ia6_flags & + ignoreflags) != 0) + continue; + ifa_ref(ifa); + break; + } + } + IF_ADDR_UNLOCK(ifp); + + return ((struct in6_ifaddr *)ifa); +} + + +/* + * find the internet address corresponding to a given interface and address. + * ifaddr is returned referenced. + */ +struct in6_ifaddr * +in6ifa_ifpwithaddr(struct ifnet *ifp, struct in6_addr *addr) +{ + struct ifaddr *ifa; + + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + if (IN6_ARE_ADDR_EQUAL(addr, IFA_IN6(ifa))) { + ifa_ref(ifa); + break; + } + } + IF_ADDR_UNLOCK(ifp); + + return ((struct in6_ifaddr *)ifa); +} + +/* + * Convert IP6 address to printable (loggable) representation. Caller + * has to make sure that ip6buf is at least INET6_ADDRSTRLEN long. + */ +static char digits[] = "0123456789abcdef"; +char * +ip6_sprintf(char *ip6buf, const struct in6_addr *addr) +{ + int i; + char *cp; + const u_int16_t *a = (const u_int16_t *)addr; + const u_int8_t *d; + int dcolon = 0, zero = 0; + + cp = ip6buf; + + for (i = 0; i < 8; i++) { + if (dcolon == 1) { + if (*a == 0) { + if (i == 7) + *cp++ = ':'; + a++; + continue; + } else + dcolon = 2; + } + if (*a == 0) { + if (dcolon == 0 && *(a + 1) == 0) { + if (i == 0) + *cp++ = ':'; + *cp++ = ':'; + dcolon = 1; + } else { + *cp++ = '0'; + *cp++ = ':'; + } + a++; + continue; + } + d = (const u_char *)a; + /* Try to eliminate leading zeros in printout like in :0001. */ + zero = 1; + *cp = digits[*d >> 4]; + if (*cp != '0') { + zero = 0; + cp++; + } + *cp = digits[*d++ & 0xf]; + if (zero == 0 || (*cp != '0')) { + zero = 0; + cp++; + } + *cp = digits[*d >> 4]; + if (zero == 0 || (*cp != '0')) { + zero = 0; + cp++; + } + *cp++ = digits[*d & 0xf]; + *cp++ = ':'; + a++; + } + *--cp = '\0'; + return (ip6buf); +} + +int +in6_localaddr(struct in6_addr *in6) +{ + struct in6_ifaddr *ia; + + if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_LINKLOCAL(in6)) + return 1; + + IN6_IFADDR_RLOCK(); + TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { + if (IN6_ARE_MASKED_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr, + &ia->ia_prefixmask.sin6_addr)) { + IN6_IFADDR_RUNLOCK(); + return 1; + } + } + IN6_IFADDR_RUNLOCK(); + + return (0); +} + +int +in6_is_addr_deprecated(struct sockaddr_in6 *sa6) +{ + struct in6_ifaddr *ia; + + IN6_IFADDR_RLOCK(); + TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { + if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, + &sa6->sin6_addr) && + (ia->ia6_flags & IN6_IFF_DEPRECATED) != 0) { + IN6_IFADDR_RUNLOCK(); + return (1); /* true */ + } + + /* XXX: do we still have to go thru the rest of the list? */ + } + IN6_IFADDR_RUNLOCK(); + + return (0); /* false */ +} + +/* + * return length of part which dst and src are equal + * hard coding... + */ +int +in6_matchlen(struct in6_addr *src, struct in6_addr *dst) +{ + int match = 0; + u_char *s = (u_char *)src, *d = (u_char *)dst; + u_char *lim = s + 16, r; + + while (s < lim) + if ((r = (*d++ ^ *s++)) != 0) { + while (r < 128) { + match++; + r <<= 1; + } + break; + } else + match += 8; + return match; +} + +/* XXX: to be scope conscious */ +int +in6_are_prefix_equal(struct in6_addr *p1, struct in6_addr *p2, int len) +{ + int bytelen, bitlen; + + /* sanity check */ + if (0 > len || len > 128) { + log(LOG_ERR, "in6_are_prefix_equal: invalid prefix length(%d)\n", + len); + return (0); + } + + bytelen = len / 8; + bitlen = len % 8; + + if (bcmp(&p1->s6_addr, &p2->s6_addr, bytelen)) + return (0); + if (bitlen != 0 && + p1->s6_addr[bytelen] >> (8 - bitlen) != + p2->s6_addr[bytelen] >> (8 - bitlen)) + return (0); + + return (1); +} + +void +in6_prefixlen2mask(struct in6_addr *maskp, int len) +{ + u_char maskarray[8] = {0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff}; + int bytelen, bitlen, i; + + /* sanity check */ + if (0 > len || len > 128) { + log(LOG_ERR, "in6_prefixlen2mask: invalid prefix length(%d)\n", + len); + return; + } + + bzero(maskp, sizeof(*maskp)); + bytelen = len / 8; + bitlen = len % 8; + for (i = 0; i < bytelen; i++) + maskp->s6_addr[i] = 0xff; + if (bitlen) + maskp->s6_addr[bytelen] = maskarray[bitlen - 1]; +} + +/* + * return the best address out of the same scope. if no address was + * found, return the first valid address from designated IF. + */ +struct in6_ifaddr * +in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst) +{ + int dst_scope = in6_addrscope(dst), blen = -1, tlen; + struct ifaddr *ifa; + struct in6_ifaddr *besta = 0; + struct in6_ifaddr *dep[2]; /* last-resort: deprecated */ + + dep[0] = dep[1] = NULL; + + /* + * We first look for addresses in the same scope. + * If there is one, return it. + * If two or more, return one which matches the dst longest. + * If none, return one of global addresses assigned other ifs. + */ + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST) + continue; /* XXX: is there any case to allow anycast? */ + if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY) + continue; /* don't use this interface */ + if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED) + continue; + if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) { + if (V_ip6_use_deprecated) + dep[0] = (struct in6_ifaddr *)ifa; + continue; + } + + if (dst_scope == in6_addrscope(IFA_IN6(ifa))) { + /* + * call in6_matchlen() as few as possible + */ + if (besta) { + if (blen == -1) + blen = in6_matchlen(&besta->ia_addr.sin6_addr, dst); + tlen = in6_matchlen(IFA_IN6(ifa), dst); + if (tlen > blen) { + blen = tlen; + besta = (struct in6_ifaddr *)ifa; + } + } else + besta = (struct in6_ifaddr *)ifa; + } + } + if (besta) { + ifa_ref(&besta->ia_ifa); + IF_ADDR_UNLOCK(ifp); + return (besta); + } + IF_ADDR_UNLOCK(ifp); + + IN6_IFADDR_RLOCK(); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST) + continue; /* XXX: is there any case to allow anycast? */ + if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY) + continue; /* don't use this interface */ + if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED) + continue; + if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) { + if (V_ip6_use_deprecated) + dep[1] = (struct in6_ifaddr *)ifa; + continue; + } + + if (ifa != NULL) + ifa_ref(ifa); + IN6_IFADDR_RUNLOCK(); + return (struct in6_ifaddr *)ifa; + } + IN6_IFADDR_RUNLOCK(); + + /* use the last-resort values, that are, deprecated addresses */ + if (dep[0]) + return dep[0]; + if (dep[1]) + return dep[1]; + + return NULL; +} + +/* + * perform DAD when interface becomes IFF_UP. + */ +void +in6_if_up(struct ifnet *ifp) +{ + struct ifaddr *ifa; + struct in6_ifaddr *ia; + + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + ia = (struct in6_ifaddr *)ifa; + if (ia->ia6_flags & IN6_IFF_TENTATIVE) { + /* + * The TENTATIVE flag was likely set by hand + * beforehand, implicitly indicating the need for DAD. + * We may be able to skip the random delay in this + * case, but we impose delays just in case. + */ + nd6_dad_start(ifa, + arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz)); + } + } + IF_ADDR_UNLOCK(ifp); + + /* + * special cases, like 6to4, are handled in in6_ifattach + */ + in6_ifattach(ifp, NULL); +} + +int +in6if_do_dad(struct ifnet *ifp) +{ + if ((ifp->if_flags & IFF_LOOPBACK) != 0) + return (0); + + switch (ifp->if_type) { +#ifdef IFT_DUMMY + case IFT_DUMMY: +#endif + case IFT_FAITH: + /* + * These interfaces do not have the IFF_LOOPBACK flag, + * but loop packets back. We do not have to do DAD on such + * interfaces. We should even omit it, because loop-backed + * NS would confuse the DAD procedure. + */ + return (0); + default: + /* + * Our DAD routine requires the interface up and running. + * However, some interfaces can be up before the RUNNING + * status. Additionaly, users may try to assign addresses + * before the interface becomes up (or running). + * We simply skip DAD in such a case as a work around. + * XXX: we should rather mark "tentative" on such addresses, + * and do DAD after the interface becomes ready. + */ + if (!((ifp->if_flags & IFF_UP) && + (ifp->if_drv_flags & IFF_DRV_RUNNING))) + return (0); + + return (1); + } +} + +/* + * Calculate max IPv6 MTU through all the interfaces and store it + * to in6_maxmtu. + */ +void +in6_setmaxmtu(void) +{ + unsigned long maxmtu = 0; + struct ifnet *ifp; + + IFNET_RLOCK_NOSLEEP(); + for (ifp = TAILQ_FIRST(&V_ifnet); ifp; + ifp = TAILQ_NEXT(ifp, if_list)) { + /* this function can be called during ifnet initialization */ + if (!ifp->if_afdata[AF_INET6]) + continue; + if ((ifp->if_flags & IFF_LOOPBACK) == 0 && + IN6_LINKMTU(ifp) > maxmtu) + maxmtu = IN6_LINKMTU(ifp); + } + IFNET_RUNLOCK_NOSLEEP(); + if (maxmtu) /* update only when maxmtu is positive */ + V_in6_maxmtu = maxmtu; +} + +/* + * Provide the length of interface identifiers to be used for the link attached + * to the given interface. The length should be defined in "IPv6 over + * xxx-link" document. Note that address architecture might also define + * the length for a particular set of address prefixes, regardless of the + * link type. As clarified in rfc2462bis, those two definitions should be + * consistent, and those really are as of August 2004. + */ +int +in6_if2idlen(struct ifnet *ifp) +{ + switch (ifp->if_type) { + case IFT_ETHER: /* RFC2464 */ +#ifdef IFT_PROPVIRTUAL + case IFT_PROPVIRTUAL: /* XXX: no RFC. treat it as ether */ +#endif +#ifdef IFT_L2VLAN + case IFT_L2VLAN: /* ditto */ +#endif +#ifdef IFT_IEEE80211 + case IFT_IEEE80211: /* ditto */ +#endif +#ifdef IFT_MIP + case IFT_MIP: /* ditto */ +#endif + return (64); + case IFT_FDDI: /* RFC2467 */ + return (64); + case IFT_ISO88025: /* RFC2470 (IPv6 over Token Ring) */ + return (64); + case IFT_PPP: /* RFC2472 */ + return (64); + case IFT_ARCNET: /* RFC2497 */ + return (64); + case IFT_FRELAY: /* RFC2590 */ + return (64); + case IFT_IEEE1394: /* RFC3146 */ + return (64); + case IFT_GIF: + return (64); /* draft-ietf-v6ops-mech-v2-07 */ + case IFT_LOOP: + return (64); /* XXX: is this really correct? */ + default: + /* + * Unknown link type: + * It might be controversial to use the today's common constant + * of 64 for these cases unconditionally. For full compliance, + * we should return an error in this case. On the other hand, + * if we simply miss the standard for the link type or a new + * standard is defined for a new link type, the IFID length + * is very likely to be the common constant. As a compromise, + * we always use the constant, but make an explicit notice + * indicating the "unknown" case. + */ + printf("in6_if2idlen: unknown link type (%d)\n", ifp->if_type); + return (64); + } +} + +#include <freebsd/sys/sysctl.h> + +struct in6_llentry { + struct llentry base; + struct sockaddr_in6 l3_addr6; +}; + +static struct llentry * +in6_lltable_new(const struct sockaddr *l3addr, u_int flags) +{ + struct in6_llentry *lle; + + lle = malloc(sizeof(struct in6_llentry), M_LLTABLE, + M_DONTWAIT | M_ZERO); + if (lle == NULL) /* NB: caller generates msg */ + return NULL; + + lle->l3_addr6 = *(const struct sockaddr_in6 *)l3addr; + lle->base.lle_refcnt = 1; + LLE_LOCK_INIT(&lle->base); + callout_init_rw(&lle->base.ln_timer_ch, &lle->base.lle_lock, + CALLOUT_RETURNUNLOCKED); + + return &lle->base; +} + +/* + * Deletes an address from the address table. + * This function is called by the timer functions + * such as arptimer() and nd6_llinfo_timer(), and + * the caller does the locking. + */ +static void +in6_lltable_free(struct lltable *llt, struct llentry *lle) +{ + LLE_WUNLOCK(lle); + LLE_LOCK_DESTROY(lle); + free(lle, M_LLTABLE); +} + +static void +in6_lltable_prefix_free(struct lltable *llt, + const struct sockaddr *prefix, + const struct sockaddr *mask) +{ + const struct sockaddr_in6 *pfx = (const struct sockaddr_in6 *)prefix; + const struct sockaddr_in6 *msk = (const struct sockaddr_in6 *)mask; + struct llentry *lle, *next; + register int i; + + for (i=0; i < LLTBL_HASHTBL_SIZE; i++) { + LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) { + if (IN6_ARE_MASKED_ADDR_EQUAL( + &((struct sockaddr_in6 *)L3_ADDR(lle))->sin6_addr, + &pfx->sin6_addr, + &msk->sin6_addr)) { + int canceled; + + canceled = callout_drain(&lle->la_timer); + LLE_WLOCK(lle); + if (canceled) + LLE_REMREF(lle); + llentry_free(lle); + } + } + } +} + +static int +in6_lltable_rtcheck(struct ifnet *ifp, + u_int flags, + const struct sockaddr *l3addr) +{ + struct rtentry *rt; + char ip6buf[INET6_ADDRSTRLEN]; + + KASSERT(l3addr->sa_family == AF_INET6, + ("sin_family %d", l3addr->sa_family)); + + /* XXX rtalloc1 should take a const param */ + rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0); + if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) || rt->rt_ifp != ifp) { + struct ifaddr *ifa; + /* + * Create an ND6 cache for an IPv6 neighbor + * that is not covered by our own prefix. + */ + /* XXX ifaof_ifpforaddr should take a const param */ + ifa = ifaof_ifpforaddr(__DECONST(struct sockaddr *, l3addr), ifp); + if (ifa != NULL) { + ifa_free(ifa); + if (rt != NULL) + RTFREE_LOCKED(rt); + return 0; + } + log(LOG_INFO, "IPv6 address: \"%s\" is not on the network\n", + ip6_sprintf(ip6buf, &((const struct sockaddr_in6 *)l3addr)->sin6_addr)); + if (rt != NULL) + RTFREE_LOCKED(rt); + return EINVAL; + } + RTFREE_LOCKED(rt); + return 0; +} + +static struct llentry * +in6_lltable_lookup(struct lltable *llt, u_int flags, + const struct sockaddr *l3addr) +{ + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr; + struct ifnet *ifp = llt->llt_ifp; + struct llentry *lle; + struct llentries *lleh; + u_int hashkey; + + IF_AFDATA_LOCK_ASSERT(ifp); + KASSERT(l3addr->sa_family == AF_INET6, + ("sin_family %d", l3addr->sa_family)); + + hashkey = sin6->sin6_addr.s6_addr32[3]; + lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)]; + LIST_FOREACH(lle, lleh, lle_next) { + struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)L3_ADDR(lle); + if (lle->la_flags & LLE_DELETED) + continue; + if (bcmp(&sa6->sin6_addr, &sin6->sin6_addr, + sizeof(struct in6_addr)) == 0) + break; + } + + if (lle == NULL) { + if (!(flags & LLE_CREATE)) + return (NULL); + /* + * A route that covers the given address must have + * been installed 1st because we are doing a resolution, + * verify this. + */ + if (!(flags & LLE_IFADDR) && + in6_lltable_rtcheck(ifp, flags, l3addr) != 0) + return NULL; + + lle = in6_lltable_new(l3addr, flags); + if (lle == NULL) { + log(LOG_INFO, "lla_lookup: new lle malloc failed\n"); + return NULL; + } + lle->la_flags = flags & ~LLE_CREATE; + if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) { + bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen); + lle->la_flags |= (LLE_VALID | LLE_STATIC); + } + + lle->lle_tbl = llt; + lle->lle_head = lleh; + LIST_INSERT_HEAD(lleh, lle, lle_next); + } else if (flags & LLE_DELETE) { + if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) { + LLE_WLOCK(lle); + lle->la_flags = LLE_DELETED; + LLE_WUNLOCK(lle); +#ifdef DIAGNOSTIC + log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle); +#endif + } + lle = (void *)-1; + } + if (LLE_IS_VALID(lle)) { + if (flags & LLE_EXCLUSIVE) + LLE_WLOCK(lle); + else + LLE_RLOCK(lle); + } + return (lle); +} + +static int +in6_lltable_dump(struct lltable *llt, struct sysctl_req *wr) +{ + struct ifnet *ifp = llt->llt_ifp; + struct llentry *lle; + /* XXX stack use */ + struct { + struct rt_msghdr rtm; + struct sockaddr_in6 sin6; + /* + * ndp.c assumes that sdl is word aligned + */ +#ifdef __LP64__ + uint32_t pad; +#endif + struct sockaddr_dl sdl; + } ndpc; + int i, error; + + if (ifp->if_flags & IFF_LOOPBACK) + return 0; + + LLTABLE_LOCK_ASSERT(); + + error = 0; + for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) { + LIST_FOREACH(lle, &llt->lle_head[i], lle_next) { + struct sockaddr_dl *sdl; + + /* skip deleted or invalid entries */ + if ((lle->la_flags & (LLE_DELETED|LLE_VALID)) != LLE_VALID) + continue; + /* Skip if jailed and not a valid IP of the prison. */ + if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0) + continue; + /* + * produce a msg made of: + * struct rt_msghdr; + * struct sockaddr_in6 (IPv6) + * struct sockaddr_dl; + */ + bzero(&ndpc, sizeof(ndpc)); + ndpc.rtm.rtm_msglen = sizeof(ndpc); + ndpc.rtm.rtm_version = RTM_VERSION; + ndpc.rtm.rtm_type = RTM_GET; + ndpc.rtm.rtm_flags = RTF_UP; + ndpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY; + ndpc.sin6.sin6_family = AF_INET6; + ndpc.sin6.sin6_len = sizeof(ndpc.sin6); + bcopy(L3_ADDR(lle), &ndpc.sin6, L3_ADDR_LEN(lle)); + + /* publish */ + if (lle->la_flags & LLE_PUB) + ndpc.rtm.rtm_flags |= RTF_ANNOUNCE; + + sdl = &ndpc.sdl; + sdl->sdl_family = AF_LINK; + sdl->sdl_len = sizeof(*sdl); + sdl->sdl_alen = ifp->if_addrlen; + sdl->sdl_index = ifp->if_index; + sdl->sdl_type = ifp->if_type; + bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen); + ndpc.rtm.rtm_rmx.rmx_expire = + lle->la_flags & LLE_STATIC ? 0 : lle->la_expire; + ndpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA); + if (lle->la_flags & LLE_STATIC) + ndpc.rtm.rtm_flags |= RTF_STATIC; + ndpc.rtm.rtm_index = ifp->if_index; + error = SYSCTL_OUT(wr, &ndpc, sizeof(ndpc)); + if (error) + break; + } + } + return error; +} + +void * +in6_domifattach(struct ifnet *ifp) +{ + struct in6_ifextra *ext; + + ext = (struct in6_ifextra *)malloc(sizeof(*ext), M_IFADDR, M_WAITOK); + bzero(ext, sizeof(*ext)); + + ext->in6_ifstat = (struct in6_ifstat *)malloc(sizeof(struct in6_ifstat), + M_IFADDR, M_WAITOK); + bzero(ext->in6_ifstat, sizeof(*ext->in6_ifstat)); + + ext->icmp6_ifstat = + (struct icmp6_ifstat *)malloc(sizeof(struct icmp6_ifstat), + M_IFADDR, M_WAITOK); + bzero(ext->icmp6_ifstat, sizeof(*ext->icmp6_ifstat)); + + ext->nd_ifinfo = nd6_ifattach(ifp); + ext->scope6_id = scope6_ifattach(ifp); + ext->lltable = lltable_init(ifp, AF_INET6); + if (ext->lltable != NULL) { + ext->lltable->llt_new = in6_lltable_new; + ext->lltable->llt_free = in6_lltable_free; + ext->lltable->llt_prefix_free = in6_lltable_prefix_free; + ext->lltable->llt_rtcheck = in6_lltable_rtcheck; + ext->lltable->llt_lookup = in6_lltable_lookup; + ext->lltable->llt_dump = in6_lltable_dump; + } + + ext->mld_ifinfo = mld_domifattach(ifp); + + return ext; +} + +void +in6_domifdetach(struct ifnet *ifp, void *aux) +{ + struct in6_ifextra *ext = (struct in6_ifextra *)aux; + + mld_domifdetach(ifp); + scope6_ifdetach(ext->scope6_id); + nd6_ifdetach(ext->nd_ifinfo); + lltable_free(ext->lltable); + free(ext->in6_ifstat, M_IFADDR); + free(ext->icmp6_ifstat, M_IFADDR); + free(ext, M_IFADDR); +} + +/* + * Convert sockaddr_in6 to sockaddr_in. Original sockaddr_in6 must be + * v4 mapped addr or v4 compat addr + */ +void +in6_sin6_2_sin(struct sockaddr_in *sin, struct sockaddr_in6 *sin6) +{ + + bzero(sin, sizeof(*sin)); + sin->sin_len = sizeof(struct sockaddr_in); + sin->sin_family = AF_INET; + sin->sin_port = sin6->sin6_port; + sin->sin_addr.s_addr = sin6->sin6_addr.s6_addr32[3]; +} + +/* Convert sockaddr_in to sockaddr_in6 in v4 mapped addr format. */ +void +in6_sin_2_v4mapsin6(struct sockaddr_in *sin, struct sockaddr_in6 *sin6) +{ + bzero(sin6, sizeof(*sin6)); + sin6->sin6_len = sizeof(struct sockaddr_in6); + sin6->sin6_family = AF_INET6; + sin6->sin6_port = sin->sin_port; + sin6->sin6_addr.s6_addr32[0] = 0; + sin6->sin6_addr.s6_addr32[1] = 0; + sin6->sin6_addr.s6_addr32[2] = IPV6_ADDR_INT32_SMP; + sin6->sin6_addr.s6_addr32[3] = sin->sin_addr.s_addr; +} + +/* Convert sockaddr_in6 into sockaddr_in. */ +void +in6_sin6_2_sin_in_sock(struct sockaddr *nam) +{ + struct sockaddr_in *sin_p; + struct sockaddr_in6 sin6; + + /* + * Save original sockaddr_in6 addr and convert it + * to sockaddr_in. + */ + sin6 = *(struct sockaddr_in6 *)nam; + sin_p = (struct sockaddr_in *)nam; + in6_sin6_2_sin(sin_p, &sin6); +} + +/* Convert sockaddr_in into sockaddr_in6 in v4 mapped addr format. */ +void +in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam) +{ + struct sockaddr_in *sin_p; + struct sockaddr_in6 *sin6_p; + + sin6_p = malloc(sizeof *sin6_p, M_SONAME, + M_WAITOK); + sin_p = (struct sockaddr_in *)*nam; + in6_sin_2_v4mapsin6(sin_p, sin6_p); + free(*nam, M_SONAME); + *nam = (struct sockaddr *)sin6_p; +} diff --git a/freebsd/sys/netinet6/in6.h b/freebsd/sys/netinet6/in6.h new file mode 100644 index 00000000..8d241116 --- /dev/null +++ b/freebsd/sys/netinet6/in6.h @@ -0,0 +1,708 @@ +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: in6.h,v 1.89 2001/05/27 13:28:35 itojun Exp $ + */ + +/*- + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in.h 8.3 (Berkeley) 1/3/94 + * $FreeBSD$ + */ + +#ifndef __KAME_NETINET_IN_HH_INCLUDED_ +#error "do not include netinet6/in6.h directly, include netinet/in.h. see RFC2553" +#endif + +#ifndef _NETINET6_IN6_HH_ +#define _NETINET6_IN6_HH_ + +/* + * Identification of the network protocol stack + * for *BSD-current/release: http://www.kame.net/dev/cvsweb.cgi/kame/COVERAGE + * has the table of implementation/integration differences. + */ +#define __KAME__ +#define __KAME_VERSION "FreeBSD" + +/* + * IPv6 port allocation rules should mirror the IPv4 rules and are controlled + * by the the net.inet.ip.portrange sysctl tree. The following defines exist + * for compatibility with userland applications that need them. + */ +#if __BSD_VISIBLE +#define IPV6PORT_RESERVED 1024 +#define IPV6PORT_ANONMIN 49152 +#define IPV6PORT_ANONMAX 65535 +#define IPV6PORT_RESERVEDMIN 600 +#define IPV6PORT_RESERVEDMAX (IPV6PORT_RESERVED-1) +#endif + +/* + * IPv6 address + */ +struct in6_addr { + union { + uint8_t __u6_addr8[16]; + uint16_t __u6_addr16[8]; + uint32_t __u6_addr32[4]; + } __u6_addr; /* 128-bit IP6 address */ +}; + +#define s6_addr __u6_addr.__u6_addr8 +#ifdef _KERNEL /* XXX nonstandard */ +#define s6_addr8 __u6_addr.__u6_addr8 +#define s6_addr16 __u6_addr.__u6_addr16 +#define s6_addr32 __u6_addr.__u6_addr32 +#endif + +#define INET6_ADDRSTRLEN 46 + +/* + * XXX missing POSIX.1-2001 macro IPPROTO_IPV6. + */ + +/* + * Socket address for IPv6 + */ +#if __BSD_VISIBLE +#define SIN6_LEN +#endif + +struct sockaddr_in6 { + uint8_t sin6_len; /* length of this struct */ + sa_family_t sin6_family; /* AF_INET6 */ + in_port_t sin6_port; /* Transport layer port # */ + uint32_t sin6_flowinfo; /* IP6 flow information */ + struct in6_addr sin6_addr; /* IP6 address */ + uint32_t sin6_scope_id; /* scope zone index */ +}; + +/* + * Local definition for masks + */ +#ifdef _KERNEL /* XXX nonstandard */ +#define IN6MASK0 {{{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }}} +#define IN6MASK32 {{{ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}} +#define IN6MASK64 {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}} +#define IN6MASK96 {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \ + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }}} +#define IN6MASK128 {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }}} +#endif + +#ifdef _KERNEL +extern const struct sockaddr_in6 sa6_any; + +extern const struct in6_addr in6mask0; +extern const struct in6_addr in6mask32; +extern const struct in6_addr in6mask64; +extern const struct in6_addr in6mask96; +extern const struct in6_addr in6mask128; +#endif /* _KERNEL */ + +/* + * Macros started with IPV6_ADDR is KAME local + */ +#ifdef _KERNEL /* XXX nonstandard */ +#if _BYTE_ORDER == _BIG_ENDIAN +#define IPV6_ADDR_INT32_ONE 1 +#define IPV6_ADDR_INT32_TWO 2 +#define IPV6_ADDR_INT32_MNL 0xff010000 +#define IPV6_ADDR_INT32_MLL 0xff020000 +#define IPV6_ADDR_INT32_SMP 0x0000ffff +#define IPV6_ADDR_INT16_ULL 0xfe80 +#define IPV6_ADDR_INT16_USL 0xfec0 +#define IPV6_ADDR_INT16_MLL 0xff02 +#elif _BYTE_ORDER == _LITTLE_ENDIAN +#define IPV6_ADDR_INT32_ONE 0x01000000 +#define IPV6_ADDR_INT32_TWO 0x02000000 +#define IPV6_ADDR_INT32_MNL 0x000001ff +#define IPV6_ADDR_INT32_MLL 0x000002ff +#define IPV6_ADDR_INT32_SMP 0xffff0000 +#define IPV6_ADDR_INT16_ULL 0x80fe +#define IPV6_ADDR_INT16_USL 0xc0fe +#define IPV6_ADDR_INT16_MLL 0x02ff +#endif +#endif + +/* + * Definition of some useful macros to handle IP6 addresses + */ +#if __BSD_VISIBLE +#define IN6ADDR_ANY_INIT \ + {{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}} +#define IN6ADDR_LOOPBACK_INIT \ + {{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}} +#define IN6ADDR_NODELOCAL_ALLNODES_INIT \ + {{{ 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}} +#define IN6ADDR_INTFACELOCAL_ALLNODES_INIT \ + {{{ 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}} +#define IN6ADDR_LINKLOCAL_ALLNODES_INIT \ + {{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}} +#define IN6ADDR_LINKLOCAL_ALLROUTERS_INIT \ + {{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }}} +#define IN6ADDR_LINKLOCAL_ALLV2ROUTERS_INIT \ + {{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16 }}} +#endif + +extern const struct in6_addr in6addr_any; +extern const struct in6_addr in6addr_loopback; +#if __BSD_VISIBLE +extern const struct in6_addr in6addr_nodelocal_allnodes; +extern const struct in6_addr in6addr_linklocal_allnodes; +extern const struct in6_addr in6addr_linklocal_allrouters; +extern const struct in6_addr in6addr_linklocal_allv2routers; +#endif + +/* + * Equality + * NOTE: Some of kernel programming environment (for example, openbsd/sparc) + * does not supply memcmp(). For userland memcmp() is preferred as it is + * in ANSI standard. + */ +#ifdef _KERNEL +#define IN6_ARE_ADDR_EQUAL(a, b) \ + (bcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], sizeof(struct in6_addr)) == 0) +#else +#if __BSD_VISIBLE +#define IN6_ARE_ADDR_EQUAL(a, b) \ + (memcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], sizeof(struct in6_addr)) == 0) +#endif +#endif + +/* + * Unspecified + */ +#define IN6_IS_ADDR_UNSPECIFIED(a) \ + ((*(const u_int32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \ + (*(const u_int32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \ + (*(const u_int32_t *)(const void *)(&(a)->s6_addr[8]) == 0) && \ + (*(const u_int32_t *)(const void *)(&(a)->s6_addr[12]) == 0)) + +/* + * Loopback + */ +#define IN6_IS_ADDR_LOOPBACK(a) \ + ((*(const u_int32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \ + (*(const u_int32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \ + (*(const u_int32_t *)(const void *)(&(a)->s6_addr[8]) == 0) && \ + (*(const u_int32_t *)(const void *)(&(a)->s6_addr[12]) == ntohl(1))) + +/* + * IPv4 compatible + */ +#define IN6_IS_ADDR_V4COMPAT(a) \ + ((*(const u_int32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \ + (*(const u_int32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \ + (*(const u_int32_t *)(const void *)(&(a)->s6_addr[8]) == 0) && \ + (*(const u_int32_t *)(const void *)(&(a)->s6_addr[12]) != 0) && \ + (*(const u_int32_t *)(const void *)(&(a)->s6_addr[12]) != ntohl(1))) + +/* + * Mapped + */ +#define IN6_IS_ADDR_V4MAPPED(a) \ + ((*(const u_int32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \ + (*(const u_int32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \ + (*(const u_int32_t *)(const void *)(&(a)->s6_addr[8]) == ntohl(0x0000ffff))) + +/* + * KAME Scope Values + */ + +#ifdef _KERNEL /* XXX nonstandard */ +#define IPV6_ADDR_SCOPE_NODELOCAL 0x01 +#define IPV6_ADDR_SCOPE_INTFACELOCAL 0x01 +#define IPV6_ADDR_SCOPE_LINKLOCAL 0x02 +#define IPV6_ADDR_SCOPE_SITELOCAL 0x05 +#define IPV6_ADDR_SCOPE_ORGLOCAL 0x08 /* just used in this file */ +#define IPV6_ADDR_SCOPE_GLOBAL 0x0e +#else +#define __IPV6_ADDR_SCOPE_NODELOCAL 0x01 +#define __IPV6_ADDR_SCOPE_INTFACELOCAL 0x01 +#define __IPV6_ADDR_SCOPE_LINKLOCAL 0x02 +#define __IPV6_ADDR_SCOPE_SITELOCAL 0x05 +#define __IPV6_ADDR_SCOPE_ORGLOCAL 0x08 /* just used in this file */ +#define __IPV6_ADDR_SCOPE_GLOBAL 0x0e +#endif + +/* + * Unicast Scope + * Note that we must check topmost 10 bits only, not 16 bits (see RFC2373). + */ +#define IN6_IS_ADDR_LINKLOCAL(a) \ + (((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0x80)) +#define IN6_IS_ADDR_SITELOCAL(a) \ + (((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0xc0)) + +/* + * Multicast + */ +#define IN6_IS_ADDR_MULTICAST(a) ((a)->s6_addr[0] == 0xff) + +#ifdef _KERNEL /* XXX nonstandard */ +#define IPV6_ADDR_MC_SCOPE(a) ((a)->s6_addr[1] & 0x0f) +#else +#define __IPV6_ADDR_MC_SCOPE(a) ((a)->s6_addr[1] & 0x0f) +#endif + +/* + * Multicast Scope + */ +#ifdef _KERNEL /* refers nonstandard items */ +#define IN6_IS_ADDR_MC_NODELOCAL(a) \ + (IN6_IS_ADDR_MULTICAST(a) && \ + (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_NODELOCAL)) +#define IN6_IS_ADDR_MC_INTFACELOCAL(a) \ + (IN6_IS_ADDR_MULTICAST(a) && \ + (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_INTFACELOCAL)) +#define IN6_IS_ADDR_MC_LINKLOCAL(a) \ + (IN6_IS_ADDR_MULTICAST(a) && \ + (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_LINKLOCAL)) +#define IN6_IS_ADDR_MC_SITELOCAL(a) \ + (IN6_IS_ADDR_MULTICAST(a) && \ + (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_SITELOCAL)) +#define IN6_IS_ADDR_MC_ORGLOCAL(a) \ + (IN6_IS_ADDR_MULTICAST(a) && \ + (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_ORGLOCAL)) +#define IN6_IS_ADDR_MC_GLOBAL(a) \ + (IN6_IS_ADDR_MULTICAST(a) && \ + (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_GLOBAL)) +#else +#define IN6_IS_ADDR_MC_NODELOCAL(a) \ + (IN6_IS_ADDR_MULTICAST(a) && \ + (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_NODELOCAL)) +#define IN6_IS_ADDR_MC_LINKLOCAL(a) \ + (IN6_IS_ADDR_MULTICAST(a) && \ + (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_LINKLOCAL)) +#define IN6_IS_ADDR_MC_SITELOCAL(a) \ + (IN6_IS_ADDR_MULTICAST(a) && \ + (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_SITELOCAL)) +#define IN6_IS_ADDR_MC_ORGLOCAL(a) \ + (IN6_IS_ADDR_MULTICAST(a) && \ + (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_ORGLOCAL)) +#define IN6_IS_ADDR_MC_GLOBAL(a) \ + (IN6_IS_ADDR_MULTICAST(a) && \ + (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_GLOBAL)) +#endif + +#ifdef _KERNEL /* nonstandard */ +/* + * KAME Scope + */ +#define IN6_IS_SCOPE_LINKLOCAL(a) \ + ((IN6_IS_ADDR_LINKLOCAL(a)) || \ + (IN6_IS_ADDR_MC_LINKLOCAL(a))) +#define IN6_IS_SCOPE_EMBED(a) \ + ((IN6_IS_ADDR_LINKLOCAL(a)) || \ + (IN6_IS_ADDR_MC_LINKLOCAL(a)) || \ + (IN6_IS_ADDR_MC_INTFACELOCAL(a))) + +#define IFA6_IS_DEPRECATED(a) \ + ((a)->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME && \ + (u_int32_t)((time_second - (a)->ia6_updatetime)) > \ + (a)->ia6_lifetime.ia6t_pltime) +#define IFA6_IS_INVALID(a) \ + ((a)->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME && \ + (u_int32_t)((time_second - (a)->ia6_updatetime)) > \ + (a)->ia6_lifetime.ia6t_vltime) +#endif /* _KERNEL */ + +/* + * IP6 route structure + */ +#if __BSD_VISIBLE +struct route_in6 { + struct rtentry *ro_rt; + struct llentry *ro_lle; + struct sockaddr_in6 ro_dst; +}; +#endif + +/* + * Options for use with [gs]etsockopt at the IPV6 level. + * First word of comment is data type; bool is stored in int. + */ +/* no hdrincl */ +#if 0 /* the followings are relic in IPv4 and hence are disabled */ +#define IPV6_OPTIONS 1 /* buf/ip6_opts; set/get IP6 options */ +#define IPV6_RECVOPTS 5 /* bool; receive all IP6 opts w/dgram */ +#define IPV6_RECVRETOPTS 6 /* bool; receive IP6 opts for response */ +#define IPV6_RECVDSTADDR 7 /* bool; receive IP6 dst addr w/dgram */ +#define IPV6_RETOPTS 8 /* ip6_opts; set/get IP6 options */ +#endif +#define IPV6_SOCKOPT_RESERVED1 3 /* reserved for future use */ +#define IPV6_UNICAST_HOPS 4 /* int; IP6 hops */ +#define IPV6_MULTICAST_IF 9 /* u_int; set/get IP6 multicast i/f */ +#define IPV6_MULTICAST_HOPS 10 /* int; set/get IP6 multicast hops */ +#define IPV6_MULTICAST_LOOP 11 /* u_int; set/get IP6 multicast loopback */ +#define IPV6_JOIN_GROUP 12 /* ip6_mreq; join a group membership */ +#define IPV6_LEAVE_GROUP 13 /* ip6_mreq; leave a group membership */ +#define IPV6_PORTRANGE 14 /* int; range to choose for unspec port */ +#define ICMP6_FILTER 18 /* icmp6_filter; icmp6 filter */ +/* RFC2292 options */ +#ifdef _KERNEL +#define IPV6_2292PKTINFO 19 /* bool; send/recv if, src/dst addr */ +#define IPV6_2292HOPLIMIT 20 /* bool; hop limit */ +#define IPV6_2292NEXTHOP 21 /* bool; next hop addr */ +#define IPV6_2292HOPOPTS 22 /* bool; hop-by-hop option */ +#define IPV6_2292DSTOPTS 23 /* bool; destinaion option */ +#define IPV6_2292RTHDR 24 /* bool; routing header */ +#define IPV6_2292PKTOPTIONS 25 /* buf/cmsghdr; set/get IPv6 options */ +#endif + +#define IPV6_CHECKSUM 26 /* int; checksum offset for raw socket */ +#define IPV6_V6ONLY 27 /* bool; make AF_INET6 sockets v6 only */ +#ifndef _KERNEL +#define IPV6_BINDV6ONLY IPV6_V6ONLY +#endif + +#if 1 /* IPSEC */ +#define IPV6_IPSEC_POLICY 28 /* struct; get/set security policy */ +#endif /* IPSEC */ + +#define IPV6_FAITH 29 /* bool; accept FAITH'ed connections */ + +#if 1 /* IPV6FIREWALL */ +#define IPV6_FW_ADD 30 /* add a firewall rule to chain */ +#define IPV6_FW_DEL 31 /* delete a firewall rule from chain */ +#define IPV6_FW_FLUSH 32 /* flush firewall rule chain */ +#define IPV6_FW_ZERO 33 /* clear single/all firewall counter(s) */ +#define IPV6_FW_GET 34 /* get entire firewall rule chain */ +#endif + +/* new socket options introduced in RFC3542 */ +#define IPV6_RTHDRDSTOPTS 35 /* ip6_dest; send dst option before rthdr */ + +#define IPV6_RECVPKTINFO 36 /* bool; recv if, dst addr */ +#define IPV6_RECVHOPLIMIT 37 /* bool; recv hop limit */ +#define IPV6_RECVRTHDR 38 /* bool; recv routing header */ +#define IPV6_RECVHOPOPTS 39 /* bool; recv hop-by-hop option */ +#define IPV6_RECVDSTOPTS 40 /* bool; recv dst option after rthdr */ +#ifdef _KERNEL +#define IPV6_RECVRTHDRDSTOPTS 41 /* bool; recv dst option before rthdr */ +#endif + +#define IPV6_USE_MIN_MTU 42 /* bool; send packets at the minimum MTU */ +#define IPV6_RECVPATHMTU 43 /* bool; notify an according MTU */ + +#define IPV6_PATHMTU 44 /* mtuinfo; get the current path MTU (sopt), + 4 bytes int; MTU notification (cmsg) */ +#if 0 /*obsoleted during 2292bis -> 3542*/ +#define IPV6_REACHCONF 45 /* no data; ND reachability confirm + (cmsg only/not in of RFC3542) */ +#endif + +/* more new socket options introduced in RFC3542 */ +#define IPV6_PKTINFO 46 /* in6_pktinfo; send if, src addr */ +#define IPV6_HOPLIMIT 47 /* int; send hop limit */ +#define IPV6_NEXTHOP 48 /* sockaddr; next hop addr */ +#define IPV6_HOPOPTS 49 /* ip6_hbh; send hop-by-hop option */ +#define IPV6_DSTOPTS 50 /* ip6_dest; send dst option befor rthdr */ +#define IPV6_RTHDR 51 /* ip6_rthdr; send routing header */ +#if 0 +#define IPV6_PKTOPTIONS 52 /* buf/cmsghdr; set/get IPv6 options */ + /* obsoleted by RFC3542 */ +#endif + +#define IPV6_RECVTCLASS 57 /* bool; recv traffic class values */ + +#define IPV6_AUTOFLOWLABEL 59 /* bool; attach flowlabel automagically */ + +#define IPV6_TCLASS 61 /* int; send traffic class value */ +#define IPV6_DONTFRAG 62 /* bool; disable IPv6 fragmentation */ + +#define IPV6_PREFER_TEMPADDR 63 /* int; prefer temporary addresses as + * the source address. + */ + +#define IPV6_BINDANY 64 /* bool: allow bind to any address */ + +/* + * The following option is private; do not use it from user applications. + * It is deliberately defined to the same value as IP_MSFILTER. + */ +#define IPV6_MSFILTER 74 /* struct __msfilterreq; + * set/get multicast source filter list. + */ + +/* to define items, should talk with KAME guys first, for *BSD compatibility */ + +#define IPV6_RTHDR_LOOSE 0 /* this hop need not be a neighbor. XXX old spec */ +#define IPV6_RTHDR_STRICT 1 /* this hop must be a neighbor. XXX old spec */ +#define IPV6_RTHDR_TYPE_0 0 /* IPv6 routing header type 0 */ + +/* + * Defaults and limits for options + */ +#define IPV6_DEFAULT_MULTICAST_HOPS 1 /* normally limit m'casts to 1 hop */ +#define IPV6_DEFAULT_MULTICAST_LOOP 1 /* normally hear sends if a member */ + +/* + * The im6o_membership vector for each socket is now dynamically allocated at + * run-time, bounded by USHRT_MAX, and is reallocated when needed, sized + * according to a power-of-two increment. + */ +#define IPV6_MIN_MEMBERSHIPS 31 +#define IPV6_MAX_MEMBERSHIPS 4095 + +/* + * Default resource limits for IPv6 multicast source filtering. + * These may be modified by sysctl. + */ +#define IPV6_MAX_GROUP_SRC_FILTER 512 /* sources per group */ +#define IPV6_MAX_SOCK_SRC_FILTER 128 /* sources per socket/group */ + +/* + * Argument structure for IPV6_JOIN_GROUP and IPV6_LEAVE_GROUP. + */ +struct ipv6_mreq { + struct in6_addr ipv6mr_multiaddr; + unsigned int ipv6mr_interface; +}; + +/* + * IPV6_PKTINFO: Packet information(RFC2292 sec 5) + */ +struct in6_pktinfo { + struct in6_addr ipi6_addr; /* src/dst IPv6 address */ + unsigned int ipi6_ifindex; /* send/recv interface index */ +}; + +/* + * Control structure for IPV6_RECVPATHMTU socket option. + */ +struct ip6_mtuinfo { + struct sockaddr_in6 ip6m_addr; /* or sockaddr_storage? */ + uint32_t ip6m_mtu; +}; + +/* + * Argument for IPV6_PORTRANGE: + * - which range to search when port is unspecified at bind() or connect() + */ +#define IPV6_PORTRANGE_DEFAULT 0 /* default range */ +#define IPV6_PORTRANGE_HIGH 1 /* "high" - request firewall bypass */ +#define IPV6_PORTRANGE_LOW 2 /* "low" - vouchsafe security */ + +#if __BSD_VISIBLE +/* + * Definitions for inet6 sysctl operations. + * + * Third level is protocol number. + * Fourth level is desired variable within that protocol. + */ +#define IPV6PROTO_MAXID (IPPROTO_PIM + 1) /* don't list to IPV6PROTO_MAX */ + +/* + * Names for IP sysctl objects + */ +#define IPV6CTL_FORWARDING 1 /* act as router */ +#define IPV6CTL_SENDREDIRECTS 2 /* may send redirects when forwarding*/ +#define IPV6CTL_DEFHLIM 3 /* default Hop-Limit */ +#ifdef notyet +#define IPV6CTL_DEFMTU 4 /* default MTU */ +#endif +#define IPV6CTL_FORWSRCRT 5 /* forward source-routed dgrams */ +#define IPV6CTL_STATS 6 /* stats */ +#define IPV6CTL_MRTSTATS 7 /* multicast forwarding stats */ +#define IPV6CTL_MRTPROTO 8 /* multicast routing protocol */ +#define IPV6CTL_MAXFRAGPACKETS 9 /* max packets reassembly queue */ +#define IPV6CTL_SOURCECHECK 10 /* verify source route and intf */ +#define IPV6CTL_SOURCECHECK_LOGINT 11 /* minimume logging interval */ +#define IPV6CTL_ACCEPT_RTADV 12 +#define IPV6CTL_KEEPFAITH 13 +#define IPV6CTL_LOG_INTERVAL 14 +#define IPV6CTL_HDRNESTLIMIT 15 +#define IPV6CTL_DAD_COUNT 16 +#define IPV6CTL_AUTO_FLOWLABEL 17 +#define IPV6CTL_DEFMCASTHLIM 18 +#define IPV6CTL_GIF_HLIM 19 /* default HLIM for gif encap packet */ +#define IPV6CTL_KAME_VERSION 20 +#define IPV6CTL_USE_DEPRECATED 21 /* use deprecated addr (RFC2462 5.5.4) */ +#define IPV6CTL_RR_PRUNE 22 /* walk timer for router renumbering */ +#if 0 /* obsolete */ +#define IPV6CTL_MAPPED_ADDR 23 +#endif +#define IPV6CTL_V6ONLY 24 +#define IPV6CTL_RTEXPIRE 25 /* cloned route expiration time */ +#define IPV6CTL_RTMINEXPIRE 26 /* min value for expiration time */ +#define IPV6CTL_RTMAXCACHE 27 /* trigger level for dynamic expire */ + +#define IPV6CTL_USETEMPADDR 32 /* use temporary addresses (RFC3041) */ +#define IPV6CTL_TEMPPLTIME 33 /* preferred lifetime for tmpaddrs */ +#define IPV6CTL_TEMPVLTIME 34 /* valid lifetime for tmpaddrs */ +#define IPV6CTL_AUTO_LINKLOCAL 35 /* automatic link-local addr assign */ +#define IPV6CTL_RIP6STATS 36 /* raw_ip6 stats */ +#define IPV6CTL_PREFER_TEMPADDR 37 /* prefer temporary addr as src */ +#define IPV6CTL_ADDRCTLPOLICY 38 /* get/set address selection policy */ +#define IPV6CTL_USE_DEFAULTZONE 39 /* use default scope zone */ + +#define IPV6CTL_MAXFRAGS 41 /* max fragments */ +#if 0 +#define IPV6CTL_IFQ 42 /* ip6intrq node */ +#define IPV6CTL_ISATAPRTR 43 /* isatap router */ +#endif +#define IPV6CTL_MCAST_PMTU 44 /* enable pMTU discovery for multicast? */ + +/* New entries should be added here from current IPV6CTL_MAXID value. */ +/* to define items, should talk with KAME guys first, for *BSD compatibility */ +#define IPV6CTL_STEALTH 45 + +#define ICMPV6CTL_ND6_ONLINKNSRFC4861 47 +#define IPV6CTL_MAXID 48 +#endif /* __BSD_VISIBLE */ + +/* + * Redefinition of mbuf flags + */ +#define M_AUTHIPHDR M_PROTO2 +#define M_DECRYPTED M_PROTO3 +#define M_LOOP M_PROTO4 +#define M_AUTHIPDGM M_PROTO5 +#define M_RTALERT_MLD M_PROTO6 + +#ifdef _KERNEL +struct cmsghdr; + +int in6_cksum __P((struct mbuf *, u_int8_t, u_int32_t, u_int32_t)); +int in6_localaddr __P((struct in6_addr *)); +int in6_addrscope __P((struct in6_addr *)); +struct in6_ifaddr *in6_ifawithifp __P((struct ifnet *, struct in6_addr *)); +extern void in6_if_up __P((struct ifnet *)); +struct sockaddr; +extern u_char ip6_protox[]; + +void in6_sin6_2_sin __P((struct sockaddr_in *sin, + struct sockaddr_in6 *sin6)); +void in6_sin_2_v4mapsin6 __P((struct sockaddr_in *sin, + struct sockaddr_in6 *sin6)); +void in6_sin6_2_sin_in_sock __P((struct sockaddr *nam)); +void in6_sin_2_v4mapsin6_in_sock __P((struct sockaddr **nam)); +extern void addrsel_policy_init __P((void)); + +#define satosin6(sa) ((struct sockaddr_in6 *)(sa)) +#define sin6tosa(sin6) ((struct sockaddr *)(sin6)) +#define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) + +extern int (*faithprefix_p)(struct in6_addr *); +#endif /* _KERNEL */ + +#ifndef _SIZE_T_DECLARED +typedef __size_t size_t; +#define _SIZE_T_DECLARED +#endif + +#ifndef _SOCKLEN_T_DECLARED +typedef __socklen_t socklen_t; +#define _SOCKLEN_T_DECLARED +#endif + +#if __BSD_VISIBLE + +__BEGIN_DECLS +struct cmsghdr; + +extern int inet6_option_space __P((int)); +extern int inet6_option_init __P((void *, struct cmsghdr **, int)); +extern int inet6_option_append __P((struct cmsghdr *, const uint8_t *, + int, int)); +extern uint8_t *inet6_option_alloc __P((struct cmsghdr *, int, int, int)); +extern int inet6_option_next __P((const struct cmsghdr *, uint8_t **)); +extern int inet6_option_find __P((const struct cmsghdr *, uint8_t **, int)); + +extern size_t inet6_rthdr_space __P((int, int)); +extern struct cmsghdr *inet6_rthdr_init __P((void *, int)); +extern int inet6_rthdr_add __P((struct cmsghdr *, const struct in6_addr *, + unsigned int)); +extern int inet6_rthdr_lasthop __P((struct cmsghdr *, unsigned int)); +#if 0 /* not implemented yet */ +extern int inet6_rthdr_reverse __P((const struct cmsghdr *, struct cmsghdr *)); +#endif +extern int inet6_rthdr_segments __P((const struct cmsghdr *)); +extern struct in6_addr *inet6_rthdr_getaddr __P((struct cmsghdr *, int)); +extern int inet6_rthdr_getflags __P((const struct cmsghdr *, int)); + +extern int inet6_opt_init __P((void *, socklen_t)); +extern int inet6_opt_append __P((void *, socklen_t, int, uint8_t, socklen_t, + uint8_t, void **)); +extern int inet6_opt_finish __P((void *, socklen_t, int)); +extern int inet6_opt_set_val __P((void *, int, void *, socklen_t)); + +extern int inet6_opt_next __P((void *, socklen_t, int, uint8_t *, socklen_t *, + void **)); +extern int inet6_opt_find __P((void *, socklen_t, int, uint8_t, socklen_t *, + void **)); +extern int inet6_opt_get_val __P((void *, int, void *, socklen_t)); +extern socklen_t inet6_rth_space __P((int, int)); +extern void *inet6_rth_init __P((void *, socklen_t, int, int)); +extern int inet6_rth_add __P((void *, const struct in6_addr *)); +extern int inet6_rth_reverse __P((const void *, void *)); +extern int inet6_rth_segments __P((const void *)); +extern struct in6_addr *inet6_rth_getaddr __P((const void *, int)); +__END_DECLS + +#endif /* __BSD_VISIBLE */ + +#endif /* !_NETINET6_IN6_HH_ */ diff --git a/freebsd/sys/netinet6/in6_cksum.c b/freebsd/sys/netinet6/in6_cksum.c new file mode 100644 index 00000000..1bd215b9 --- /dev/null +++ b/freebsd/sys/netinet6/in6_cksum.c @@ -0,0 +1,303 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: in6_cksum.c,v 1.10 2000/12/03 00:53:59 itojun Exp $ + */ + +/*- + * Copyright (c) 1988, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/sys/param.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/systm.h> +#include <freebsd/netinet/in.h> +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet6/scope6_var.h> + +/* + * Checksum routine for Internet Protocol family headers (Portable Version). + * + * This routine is very heavily used in the network + * code and should be modified for each CPU to be as fast as possible. + */ + +#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) +#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);} + +/* + * m MUST contain a continuous IP6 header. + * off is an offset where TCP/UDP/ICMP6 header starts. + * len is a total length of a transport segment. + * (e.g. TCP header + TCP payload) + */ +int +in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len) +{ + u_int16_t *w; + int sum = 0; + int mlen = 0; + int byte_swapped = 0; + struct ip6_hdr *ip6; + struct in6_addr in6; + union { + u_int16_t phs[4]; + struct { + u_int32_t ph_len; + u_int8_t ph_zero[3]; + u_int8_t ph_nxt; + } __packed ph; + } uph; + union { + u_int8_t c[2]; + u_int16_t s; + } s_util; + union { + u_int16_t s[2]; + u_int32_t l; + } l_util; + + /* sanity check */ + if (m->m_pkthdr.len < off + len) { + panic("in6_cksum: mbuf len (%d) < off+len (%d+%d)", + m->m_pkthdr.len, off, len); + } + + bzero(&uph, sizeof(uph)); + + /* + * First create IP6 pseudo header and calculate a summary. + */ + ip6 = mtod(m, struct ip6_hdr *); + uph.ph.ph_len = htonl(len); + uph.ph.ph_nxt = nxt; + + /* + * IPv6 source address. + * XXX: we'd like to avoid copying the address, but we can't due to + * the possibly embedded scope zone ID. + */ + in6 = ip6->ip6_src; + in6_clearscope(&in6); + w = (u_int16_t *)&in6; + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; + + /* IPv6 destination address */ + in6 = ip6->ip6_dst; + in6_clearscope(&in6); + w = (u_int16_t *)&in6; + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; + + /* Payload length and upper layer identifier */ + sum += uph.phs[0]; sum += uph.phs[1]; + sum += uph.phs[2]; sum += uph.phs[3]; + + /* + * Secondly calculate a summary of the first mbuf excluding offset. + */ + while (off > 0) { + if (m->m_len <= off) + off -= m->m_len; + else + break; + m = m->m_next; + } + w = (u_int16_t *)(mtod(m, u_char *) + off); + mlen = m->m_len - off; + if (len < mlen) + mlen = len; + len -= mlen; + /* + * Force to even boundary. + */ + if ((1 & (long) w) && (mlen > 0)) { + REDUCE; + sum <<= 8; + s_util.c[0] = *(u_char *)w; + w = (u_int16_t *)((char *)w + 1); + mlen--; + byte_swapped = 1; + } + /* + * Unroll the loop to make overhead from + * branches &c small. + */ + while ((mlen -= 32) >= 0) { + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; + sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; + sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; + w += 16; + } + mlen += 32; + while ((mlen -= 8) >= 0) { + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + w += 4; + } + mlen += 8; + if (mlen == 0 && byte_swapped == 0) + goto next; + REDUCE; + while ((mlen -= 2) >= 0) { + sum += *w++; + } + if (byte_swapped) { + REDUCE; + sum <<= 8; + byte_swapped = 0; + if (mlen == -1) { + s_util.c[1] = *(char *)w; + sum += s_util.s; + mlen = 0; + } else + mlen = -1; + } else if (mlen == -1) + s_util.c[0] = *(char *)w; + next: + m = m->m_next; + + /* + * Lastly calculate a summary of the rest of mbufs. + */ + + for (;m && len; m = m->m_next) { + if (m->m_len == 0) + continue; + w = mtod(m, u_int16_t *); + if (mlen == -1) { + /* + * The first byte of this mbuf is the continuation + * of a word spanning between this mbuf and the + * last mbuf. + * + * s_util.c[0] is already saved when scanning previous + * mbuf. + */ + s_util.c[1] = *(char *)w; + sum += s_util.s; + w = (u_int16_t *)((char *)w + 1); + mlen = m->m_len - 1; + len--; + } else + mlen = m->m_len; + if (len < mlen) + mlen = len; + len -= mlen; + /* + * Force to even boundary. + */ + if ((1 & (long) w) && (mlen > 0)) { + REDUCE; + sum <<= 8; + s_util.c[0] = *(u_char *)w; + w = (u_int16_t *)((char *)w + 1); + mlen--; + byte_swapped = 1; + } + /* + * Unroll the loop to make overhead from + * branches &c small. + */ + while ((mlen -= 32) >= 0) { + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; + sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; + sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; + w += 16; + } + mlen += 32; + while ((mlen -= 8) >= 0) { + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + w += 4; + } + mlen += 8; + if (mlen == 0 && byte_swapped == 0) + continue; + REDUCE; + while ((mlen -= 2) >= 0) { + sum += *w++; + } + if (byte_swapped) { + REDUCE; + sum <<= 8; + byte_swapped = 0; + if (mlen == -1) { + s_util.c[1] = *(char *)w; + sum += s_util.s; + mlen = 0; + } else + mlen = -1; + } else if (mlen == -1) + s_util.c[0] = *(char *)w; + } + if (len) + panic("in6_cksum: out of data"); + if (mlen == -1) { + /* The last mbuf has odd # of bytes. Follow the + standard (the odd byte may be shifted left by 8 bits + or not as determined by endian-ness of the machine) */ + s_util.c[1] = 0; + sum += s_util.s; + } + REDUCE; + return (~sum & 0xffff); +} diff --git a/freebsd/sys/netinet6/in6_gif.c b/freebsd/sys/netinet6/in6_gif.c new file mode 100644 index 00000000..482181db --- /dev/null +++ b/freebsd/sys/netinet6/in6_gif.c @@ -0,0 +1,466 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: in6_gif.c,v 1.49 2001/05/14 14:02:17 itojun Exp $ + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/local/opt_inet.h> +#include <freebsd/local/opt_inet6.h> + +#include <freebsd/sys/param.h> +#include <freebsd/sys/systm.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/sockio.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/errno.h> +#include <freebsd/sys/kernel.h> +#include <freebsd/sys/queue.h> +#include <freebsd/sys/syslog.h> +#include <freebsd/sys/sysctl.h> +#include <freebsd/sys/protosw.h> +#include <freebsd/sys/malloc.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/route.h> + +#include <freebsd/netinet/in.h> +#include <freebsd/netinet/in_systm.h> +#ifdef INET +#include <freebsd/netinet/ip.h> +#endif +#include <freebsd/netinet/ip_encap.h> +#ifdef INET6 +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet6/in6_gif.h> +#include <freebsd/netinet6/in6_var.h> +#endif +#include <freebsd/netinet6/ip6protosw.h> +#include <freebsd/netinet/ip_ecn.h> +#ifdef INET6 +#include <freebsd/netinet6/ip6_ecn.h> +#endif + +#include <freebsd/net/if_gif.h> + +VNET_DEFINE(int, ip6_gif_hlim) = GIF_HLIM; +#define V_ip6_gif_hlim VNET(ip6_gif_hlim) + +SYSCTL_DECL(_net_inet6_ip6); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim, CTLFLAG_RW, + &VNET_NAME(ip6_gif_hlim), 0, ""); + +static int gif_validate6(const struct ip6_hdr *, struct gif_softc *, + struct ifnet *); + +extern struct domain inet6domain; +struct ip6protosw in6_gif_protosw = { + .pr_type = SOCK_RAW, + .pr_domain = &inet6domain, + .pr_protocol = 0, /* IPPROTO_IPV[46] */ + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = in6_gif_input, + .pr_output = rip6_output, + .pr_ctloutput = rip6_ctloutput, + .pr_usrreqs = &rip6_usrreqs +}; + +int +in6_gif_output(struct ifnet *ifp, + int family, /* family of the packet to be encapsulate */ + struct mbuf *m) +{ + struct gif_softc *sc = ifp->if_softc; + struct sockaddr_in6 *dst = (struct sockaddr_in6 *)&sc->gif_ro6.ro_dst; + struct sockaddr_in6 *sin6_src = (struct sockaddr_in6 *)sc->gif_psrc; + struct sockaddr_in6 *sin6_dst = (struct sockaddr_in6 *)sc->gif_pdst; + struct ip6_hdr *ip6; + struct etherip_header eiphdr; + int error, len, proto; + u_int8_t itos, otos; + + GIF_LOCK_ASSERT(sc); + + if (sin6_src == NULL || sin6_dst == NULL || + sin6_src->sin6_family != AF_INET6 || + sin6_dst->sin6_family != AF_INET6) { + m_freem(m); + return EAFNOSUPPORT; + } + + switch (family) { +#ifdef INET + case AF_INET: + { + struct ip *ip; + + proto = IPPROTO_IPV4; + if (m->m_len < sizeof(*ip)) { + m = m_pullup(m, sizeof(*ip)); + if (!m) + return ENOBUFS; + } + ip = mtod(m, struct ip *); + itos = ip->ip_tos; + break; + } +#endif +#ifdef INET6 + case AF_INET6: + { + struct ip6_hdr *ip6; + proto = IPPROTO_IPV6; + if (m->m_len < sizeof(*ip6)) { + m = m_pullup(m, sizeof(*ip6)); + if (!m) + return ENOBUFS; + } + ip6 = mtod(m, struct ip6_hdr *); + itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; + break; + } +#endif + case AF_LINK: + proto = IPPROTO_ETHERIP; + + /* + * GIF_SEND_REVETHIP (disabled by default) intentionally + * sends an EtherIP packet with revered version field in + * the header. This is a knob for backward compatibility + * with FreeBSD 7.2R or prior. + */ + if ((sc->gif_options & GIF_SEND_REVETHIP)) { + eiphdr.eip_ver = 0; + eiphdr.eip_resvl = ETHERIP_VERSION; + eiphdr.eip_resvh = 0; + } else { + eiphdr.eip_ver = ETHERIP_VERSION; + eiphdr.eip_resvl = 0; + eiphdr.eip_resvh = 0; + } + /* prepend Ethernet-in-IP header */ + M_PREPEND(m, sizeof(struct etherip_header), M_DONTWAIT); + if (m && m->m_len < sizeof(struct etherip_header)) + m = m_pullup(m, sizeof(struct etherip_header)); + if (m == NULL) + return ENOBUFS; + bcopy(&eiphdr, mtod(m, struct etherip_header *), + sizeof(struct etherip_header)); + break; + + default: +#ifdef DEBUG + printf("in6_gif_output: warning: unknown family %d passed\n", + family); +#endif + m_freem(m); + return EAFNOSUPPORT; + } + + /* prepend new IP header */ + len = sizeof(struct ip6_hdr); +#ifndef __NO_STRICT_ALIGNMENT + if (family == AF_LINK) + len += ETHERIP_ALIGN; +#endif + M_PREPEND(m, len, M_DONTWAIT); + if (m != NULL && m->m_len < len) + m = m_pullup(m, len); + if (m == NULL) { + printf("ENOBUFS in in6_gif_output %d\n", __LINE__); + return ENOBUFS; + } +#ifndef __NO_STRICT_ALIGNMENT + if (family == AF_LINK) { + len = mtod(m, vm_offset_t) & 3; + KASSERT(len == 0 || len == ETHERIP_ALIGN, + ("in6_gif_output: unexpected misalignment")); + m->m_data += len; + m->m_len -= ETHERIP_ALIGN; + } +#endif + + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_flow = 0; + ip6->ip6_vfc &= ~IPV6_VERSION_MASK; + ip6->ip6_vfc |= IPV6_VERSION; + ip6->ip6_plen = htons((u_short)m->m_pkthdr.len); + ip6->ip6_nxt = proto; + ip6->ip6_hlim = V_ip6_gif_hlim; + ip6->ip6_src = sin6_src->sin6_addr; + /* bidirectional configured tunnel mode */ + if (!IN6_IS_ADDR_UNSPECIFIED(&sin6_dst->sin6_addr)) + ip6->ip6_dst = sin6_dst->sin6_addr; + else { + m_freem(m); + return ENETUNREACH; + } + ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED : ECN_NOCARE, + &otos, &itos); + ip6->ip6_flow &= ~htonl(0xff << 20); + ip6->ip6_flow |= htonl((u_int32_t)otos << 20); + + if (dst->sin6_family != sin6_dst->sin6_family || + !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &sin6_dst->sin6_addr)) { + /* cache route doesn't match */ + bzero(dst, sizeof(*dst)); + dst->sin6_family = sin6_dst->sin6_family; + dst->sin6_len = sizeof(struct sockaddr_in6); + dst->sin6_addr = sin6_dst->sin6_addr; + if (sc->gif_ro6.ro_rt) { + RTFREE(sc->gif_ro6.ro_rt); + sc->gif_ro6.ro_rt = NULL; + } +#if 0 + GIF2IFP(sc)->if_mtu = GIF_MTU; +#endif + } + + if (sc->gif_ro6.ro_rt == NULL) { + rtalloc((struct route *)&sc->gif_ro6); + if (sc->gif_ro6.ro_rt == NULL) { + m_freem(m); + return ENETUNREACH; + } + + /* if it constitutes infinite encapsulation, punt. */ + if (sc->gif_ro.ro_rt->rt_ifp == ifp) { + m_freem(m); + return ENETUNREACH; /*XXX*/ + } +#if 0 + ifp->if_mtu = sc->gif_ro6.ro_rt->rt_ifp->if_mtu + - sizeof(struct ip6_hdr); +#endif + } + +#ifdef IPV6_MINMTU + /* + * force fragmentation to minimum MTU, to avoid path MTU discovery. + * it is too painful to ask for resend of inner packet, to achieve + * path MTU discovery for encapsulated packets. + */ + error = ip6_output(m, 0, &sc->gif_ro6, IPV6_MINMTU, 0, NULL, NULL); +#else + error = ip6_output(m, 0, &sc->gif_ro6, 0, 0, NULL, NULL); +#endif + + if (!(GIF2IFP(sc)->if_flags & IFF_LINK0) && + sc->gif_ro6.ro_rt != NULL) { + RTFREE(sc->gif_ro6.ro_rt); + sc->gif_ro6.ro_rt = NULL; + } + + return (error); +} + +int +in6_gif_input(struct mbuf **mp, int *offp, int proto) +{ + struct mbuf *m = *mp; + struct ifnet *gifp = NULL; + struct gif_softc *sc; + struct ip6_hdr *ip6; + int af = 0; + u_int32_t otos; + + ip6 = mtod(m, struct ip6_hdr *); + + sc = (struct gif_softc *)encap_getarg(m); + if (sc == NULL) { + m_freem(m); + V_ip6stat.ip6s_nogif++; + return IPPROTO_DONE; + } + + gifp = GIF2IFP(sc); + if (gifp == NULL || (gifp->if_flags & IFF_UP) == 0) { + m_freem(m); + V_ip6stat.ip6s_nogif++; + return IPPROTO_DONE; + } + + otos = ip6->ip6_flow; + m_adj(m, *offp); + + switch (proto) { +#ifdef INET + case IPPROTO_IPV4: + { + struct ip *ip; + u_int8_t otos8; + af = AF_INET; + otos8 = (ntohl(otos) >> 20) & 0xff; + if (m->m_len < sizeof(*ip)) { + m = m_pullup(m, sizeof(*ip)); + if (!m) + return IPPROTO_DONE; + } + ip = mtod(m, struct ip *); + if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ? + ECN_ALLOWED : ECN_NOCARE, + &otos8, &ip->ip_tos) == 0) { + m_freem(m); + return IPPROTO_DONE; + } + break; + } +#endif /* INET */ +#ifdef INET6 + case IPPROTO_IPV6: + { + struct ip6_hdr *ip6; + af = AF_INET6; + if (m->m_len < sizeof(*ip6)) { + m = m_pullup(m, sizeof(*ip6)); + if (!m) + return IPPROTO_DONE; + } + ip6 = mtod(m, struct ip6_hdr *); + if (ip6_ecn_egress((gifp->if_flags & IFF_LINK1) ? + ECN_ALLOWED : ECN_NOCARE, + &otos, &ip6->ip6_flow) == 0) { + m_freem(m); + return IPPROTO_DONE; + } + break; + } +#endif + case IPPROTO_ETHERIP: + af = AF_LINK; + break; + + default: + V_ip6stat.ip6s_nogif++; + m_freem(m); + return IPPROTO_DONE; + } + + gif_input(m, af, gifp); + return IPPROTO_DONE; +} + +/* + * validate outer address. + */ +static int +gif_validate6(const struct ip6_hdr *ip6, struct gif_softc *sc, + struct ifnet *ifp) +{ + struct sockaddr_in6 *src, *dst; + + src = (struct sockaddr_in6 *)sc->gif_psrc; + dst = (struct sockaddr_in6 *)sc->gif_pdst; + + /* + * Check for address match. Note that the check is for an incoming + * packet. We should compare the *source* address in our configuration + * and the *destination* address of the packet, and vice versa. + */ + if (!IN6_ARE_ADDR_EQUAL(&src->sin6_addr, &ip6->ip6_dst) || + !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_src)) + return 0; + + /* martian filters on outer source - done in ip6_input */ + + /* ingress filters on outer source */ + if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0 && ifp) { + struct sockaddr_in6 sin6; + struct rtentry *rt; + + bzero(&sin6, sizeof(sin6)); + sin6.sin6_family = AF_INET6; + sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_addr = ip6->ip6_src; + sin6.sin6_scope_id = 0; /* XXX */ + + rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL); + if (!rt || rt->rt_ifp != ifp) { +#if 0 + char ip6buf[INET6_ADDRSTRLEN]; + log(LOG_WARNING, "%s: packet from %s dropped " + "due to ingress filter\n", if_name(GIF2IFP(sc)), + ip6_sprintf(ip6buf, &sin6.sin6_addr)); +#endif + if (rt) + RTFREE_LOCKED(rt); + return 0; + } + RTFREE_LOCKED(rt); + } + + return 128 * 2; +} + +/* + * we know that we are in IFF_UP, outer address available, and outer family + * matched the physical addr family. see gif_encapcheck(). + * sanity check for arg should have been done in the caller. + */ +int +gif_encapcheck6(const struct mbuf *m, int off, int proto, void *arg) +{ + struct ip6_hdr ip6; + struct gif_softc *sc; + struct ifnet *ifp; + + /* sanity check done in caller */ + sc = (struct gif_softc *)arg; + + /* LINTED const cast */ + m_copydata(m, 0, sizeof(ip6), (caddr_t)&ip6); + ifp = ((m->m_flags & M_PKTHDR) != 0) ? m->m_pkthdr.rcvif : NULL; + + return gif_validate6(&ip6, sc, ifp); +} + +int +in6_gif_attach(struct gif_softc *sc) +{ + sc->encap_cookie6 = encap_attach_func(AF_INET6, -1, gif_encapcheck, + (void *)&in6_gif_protosw, sc); + if (sc->encap_cookie6 == NULL) + return EEXIST; + return 0; +} + +int +in6_gif_detach(struct gif_softc *sc) +{ + int error; + + error = encap_detach(sc->encap_cookie6); + if (error == 0) + sc->encap_cookie6 = NULL; + return error; +} diff --git a/freebsd/sys/netinet6/in6_gif.h b/freebsd/sys/netinet6/in6_gif.h new file mode 100644 index 00000000..ed566112 --- /dev/null +++ b/freebsd/sys/netinet6/in6_gif.h @@ -0,0 +1,45 @@ +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: in6_gif.h,v 1.5 2000/04/14 08:36:03 itojun Exp $ + * $FreeBSD$ + */ + +#ifndef _NETINET6_IN6_GIF_HH_ +#define _NETINET6_IN6_GIF_HH_ + +#define GIF_HLIM 30 + +struct gif_softc; +int in6_gif_input __P((struct mbuf **, int *, int)); +int in6_gif_output __P((struct ifnet *, int, struct mbuf *)); +int gif_encapcheck6 __P((const struct mbuf *, int, int, void *)); +int in6_gif_attach __P((struct gif_softc *)); +int in6_gif_detach __P((struct gif_softc *)); + +#endif /* _NETINET6_IN6_GIF_HH_ */ diff --git a/freebsd/sys/netinet6/in6_ifattach.c b/freebsd/sys/netinet6/in6_ifattach.c new file mode 100644 index 00000000..7942d5af --- /dev/null +++ b/freebsd/sys/netinet6/in6_ifattach.c @@ -0,0 +1,971 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: in6_ifattach.c,v 1.118 2001/05/24 07:44:00 itojun Exp $ + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/sys/param.h> +#include <freebsd/sys/systm.h> +#include <freebsd/sys/malloc.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/sockio.h> +#include <freebsd/sys/jail.h> +#include <freebsd/sys/kernel.h> +#include <freebsd/sys/proc.h> +#include <freebsd/sys/syslog.h> +#include <freebsd/sys/md5.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/if_dl.h> +#include <freebsd/net/if_types.h> +#include <freebsd/net/route.h> +#include <freebsd/net/vnet.h> + +#include <freebsd/netinet/in.h> +#include <freebsd/netinet/in_var.h> +#include <freebsd/netinet/if_ether.h> +#include <freebsd/netinet/in_pcb.h> +#include <freebsd/netinet/ip_var.h> +#include <freebsd/netinet/udp.h> +#include <freebsd/netinet/udp_var.h> + +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet6/in6_var.h> +#include <freebsd/netinet6/in6_pcb.h> +#include <freebsd/netinet6/in6_ifattach.h> +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet6/nd6.h> +#include <freebsd/netinet6/mld6_var.h> +#include <freebsd/netinet6/scope6_var.h> + +VNET_DEFINE(unsigned long, in6_maxmtu) = 0; + +#ifdef IP6_AUTO_LINKLOCAL +VNET_DEFINE(int, ip6_auto_linklocal) = IP6_AUTO_LINKLOCAL; +#else +VNET_DEFINE(int, ip6_auto_linklocal) = 1; /* enabled by default */ +#endif + +VNET_DEFINE(struct callout, in6_tmpaddrtimer_ch); +#define V_in6_tmpaddrtimer_ch VNET(in6_tmpaddrtimer_ch) + +VNET_DECLARE(struct inpcbinfo, ripcbinfo); +#define V_ripcbinfo VNET(ripcbinfo) + +static int get_rand_ifid(struct ifnet *, struct in6_addr *); +static int generate_tmp_ifid(u_int8_t *, const u_int8_t *, u_int8_t *); +static int get_ifid(struct ifnet *, struct ifnet *, struct in6_addr *); +static int in6_ifattach_linklocal(struct ifnet *, struct ifnet *); +static int in6_ifattach_loopback(struct ifnet *); +static void in6_purgemaddrs(struct ifnet *); + +#define EUI64_GBIT 0x01 +#define EUI64_UBIT 0x02 +#define EUI64_TO_IFID(in6) do {(in6)->s6_addr[8] ^= EUI64_UBIT; } while (0) +#define EUI64_GROUP(in6) ((in6)->s6_addr[8] & EUI64_GBIT) +#define EUI64_INDIVIDUAL(in6) (!EUI64_GROUP(in6)) +#define EUI64_LOCAL(in6) ((in6)->s6_addr[8] & EUI64_UBIT) +#define EUI64_UNIVERSAL(in6) (!EUI64_LOCAL(in6)) + +#define IFID_LOCAL(in6) (!EUI64_LOCAL(in6)) +#define IFID_UNIVERSAL(in6) (!EUI64_UNIVERSAL(in6)) + +/* + * Generate a last-resort interface identifier, when the machine has no + * IEEE802/EUI64 address sources. + * The goal here is to get an interface identifier that is + * (1) random enough and (2) does not change across reboot. + * We currently use MD5(hostname) for it. + * + * in6 - upper 64bits are preserved + */ +static int +get_rand_ifid(struct ifnet *ifp, struct in6_addr *in6) +{ + MD5_CTX ctxt; + struct prison *pr; + u_int8_t digest[16]; + int hostnamelen; + + pr = curthread->td_ucred->cr_prison; + mtx_lock(&pr->pr_mtx); + hostnamelen = strlen(pr->pr_hostname); +#if 0 + /* we need at least several letters as seed for ifid */ + if (hostnamelen < 3) { + mtx_unlock(&pr->pr_mtx); + return -1; + } +#endif + + /* generate 8 bytes of pseudo-random value. */ + bzero(&ctxt, sizeof(ctxt)); + MD5Init(&ctxt); + MD5Update(&ctxt, pr->pr_hostname, hostnamelen); + mtx_unlock(&pr->pr_mtx); + MD5Final(digest, &ctxt); + + /* assumes sizeof(digest) > sizeof(ifid) */ + bcopy(digest, &in6->s6_addr[8], 8); + + /* make sure to set "u" bit to local, and "g" bit to individual. */ + in6->s6_addr[8] &= ~EUI64_GBIT; /* g bit to "individual" */ + in6->s6_addr[8] |= EUI64_UBIT; /* u bit to "local" */ + + /* convert EUI64 into IPv6 interface identifier */ + EUI64_TO_IFID(in6); + + return 0; +} + +static int +generate_tmp_ifid(u_int8_t *seed0, const u_int8_t *seed1, u_int8_t *ret) +{ + MD5_CTX ctxt; + u_int8_t seed[16], digest[16], nullbuf[8]; + u_int32_t val32; + + /* If there's no history, start with a random seed. */ + bzero(nullbuf, sizeof(nullbuf)); + if (bcmp(nullbuf, seed0, sizeof(nullbuf)) == 0) { + int i; + + for (i = 0; i < 2; i++) { + val32 = arc4random(); + bcopy(&val32, seed + sizeof(val32) * i, sizeof(val32)); + } + } else + bcopy(seed0, seed, 8); + + /* copy the right-most 64-bits of the given address */ + /* XXX assumption on the size of IFID */ + bcopy(seed1, &seed[8], 8); + + if (0) { /* for debugging purposes only */ + int i; + + printf("generate_tmp_ifid: new randomized ID from: "); + for (i = 0; i < 16; i++) + printf("%02x", seed[i]); + printf(" "); + } + + /* generate 16 bytes of pseudo-random value. */ + bzero(&ctxt, sizeof(ctxt)); + MD5Init(&ctxt); + MD5Update(&ctxt, seed, sizeof(seed)); + MD5Final(digest, &ctxt); + + /* + * RFC 3041 3.2.1. (3) + * Take the left-most 64-bits of the MD5 digest and set bit 6 (the + * left-most bit is numbered 0) to zero. + */ + bcopy(digest, ret, 8); + ret[0] &= ~EUI64_UBIT; + + /* + * XXX: we'd like to ensure that the generated value is not zero + * for simplicity. If the caclculated digest happens to be zero, + * use a random non-zero value as the last resort. + */ + if (bcmp(nullbuf, ret, sizeof(nullbuf)) == 0) { + nd6log((LOG_INFO, + "generate_tmp_ifid: computed MD5 value is zero.\n")); + + val32 = arc4random(); + val32 = 1 + (val32 % (0xffffffff - 1)); + } + + /* + * RFC 3041 3.2.1. (4) + * Take the rightmost 64-bits of the MD5 digest and save them in + * stable storage as the history value to be used in the next + * iteration of the algorithm. + */ + bcopy(&digest[8], seed0, 8); + + if (0) { /* for debugging purposes only */ + int i; + + printf("to: "); + for (i = 0; i < 16; i++) + printf("%02x", digest[i]); + printf("\n"); + } + + return 0; +} + +/* + * Get interface identifier for the specified interface. + * XXX assumes single sockaddr_dl (AF_LINK address) per an interface + * + * in6 - upper 64bits are preserved + */ +int +in6_get_hw_ifid(struct ifnet *ifp, struct in6_addr *in6) +{ + struct ifaddr *ifa; + struct sockaddr_dl *sdl; + u_int8_t *addr; + size_t addrlen; + static u_int8_t allzero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + static u_int8_t allone[8] = + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_LINK) + continue; + sdl = (struct sockaddr_dl *)ifa->ifa_addr; + if (sdl == NULL) + continue; + if (sdl->sdl_alen == 0) + continue; + + goto found; + } + IF_ADDR_UNLOCK(ifp); + + return -1; + +found: + IF_ADDR_LOCK_ASSERT(ifp); + addr = LLADDR(sdl); + addrlen = sdl->sdl_alen; + + /* get EUI64 */ + switch (ifp->if_type) { + case IFT_ETHER: + case IFT_FDDI: + case IFT_ISO88025: + case IFT_ATM: + case IFT_IEEE1394: +#ifdef IFT_IEEE80211 + case IFT_IEEE80211: +#endif + /* IEEE802/EUI64 cases - what others? */ + /* IEEE1394 uses 16byte length address starting with EUI64 */ + if (addrlen > 8) + addrlen = 8; + + /* look at IEEE802/EUI64 only */ + if (addrlen != 8 && addrlen != 6) { + IF_ADDR_UNLOCK(ifp); + return -1; + } + + /* + * check for invalid MAC address - on bsdi, we see it a lot + * since wildboar configures all-zero MAC on pccard before + * card insertion. + */ + if (bcmp(addr, allzero, addrlen) == 0) { + IF_ADDR_UNLOCK(ifp); + return -1; + } + if (bcmp(addr, allone, addrlen) == 0) { + IF_ADDR_UNLOCK(ifp); + return -1; + } + + /* make EUI64 address */ + if (addrlen == 8) + bcopy(addr, &in6->s6_addr[8], 8); + else if (addrlen == 6) { + in6->s6_addr[8] = addr[0]; + in6->s6_addr[9] = addr[1]; + in6->s6_addr[10] = addr[2]; + in6->s6_addr[11] = 0xff; + in6->s6_addr[12] = 0xfe; + in6->s6_addr[13] = addr[3]; + in6->s6_addr[14] = addr[4]; + in6->s6_addr[15] = addr[5]; + } + break; + + case IFT_ARCNET: + if (addrlen != 1) { + IF_ADDR_UNLOCK(ifp); + return -1; + } + if (!addr[0]) { + IF_ADDR_UNLOCK(ifp); + return -1; + } + + bzero(&in6->s6_addr[8], 8); + in6->s6_addr[15] = addr[0]; + + /* + * due to insufficient bitwidth, we mark it local. + */ + in6->s6_addr[8] &= ~EUI64_GBIT; /* g bit to "individual" */ + in6->s6_addr[8] |= EUI64_UBIT; /* u bit to "local" */ + break; + + case IFT_GIF: +#ifdef IFT_STF + case IFT_STF: +#endif + /* + * RFC2893 says: "SHOULD use IPv4 address as ifid source". + * however, IPv4 address is not very suitable as unique + * identifier source (can be renumbered). + * we don't do this. + */ + IF_ADDR_UNLOCK(ifp); + return -1; + + default: + IF_ADDR_UNLOCK(ifp); + return -1; + } + + /* sanity check: g bit must not indicate "group" */ + if (EUI64_GROUP(in6)) { + IF_ADDR_UNLOCK(ifp); + return -1; + } + + /* convert EUI64 into IPv6 interface identifier */ + EUI64_TO_IFID(in6); + + /* + * sanity check: ifid must not be all zero, avoid conflict with + * subnet router anycast + */ + if ((in6->s6_addr[8] & ~(EUI64_GBIT | EUI64_UBIT)) == 0x00 && + bcmp(&in6->s6_addr[9], allzero, 7) == 0) { + IF_ADDR_UNLOCK(ifp); + return -1; + } + + IF_ADDR_UNLOCK(ifp); + return 0; +} + +/* + * Get interface identifier for the specified interface. If it is not + * available on ifp0, borrow interface identifier from other information + * sources. + * + * altifp - secondary EUI64 source + */ +static int +get_ifid(struct ifnet *ifp0, struct ifnet *altifp, + struct in6_addr *in6) +{ + struct ifnet *ifp; + + /* first, try to get it from the interface itself */ + if (in6_get_hw_ifid(ifp0, in6) == 0) { + nd6log((LOG_DEBUG, "%s: got interface identifier from itself\n", + if_name(ifp0))); + goto success; + } + + /* try secondary EUI64 source. this basically is for ATM PVC */ + if (altifp && in6_get_hw_ifid(altifp, in6) == 0) { + nd6log((LOG_DEBUG, "%s: got interface identifier from %s\n", + if_name(ifp0), if_name(altifp))); + goto success; + } + + /* next, try to get it from some other hardware interface */ + IFNET_RLOCK_NOSLEEP(); + for (ifp = V_ifnet.tqh_first; ifp; ifp = ifp->if_list.tqe_next) { + if (ifp == ifp0) + continue; + if (in6_get_hw_ifid(ifp, in6) != 0) + continue; + + /* + * to borrow ifid from other interface, ifid needs to be + * globally unique + */ + if (IFID_UNIVERSAL(in6)) { + nd6log((LOG_DEBUG, + "%s: borrow interface identifier from %s\n", + if_name(ifp0), if_name(ifp))); + IFNET_RUNLOCK_NOSLEEP(); + goto success; + } + } + IFNET_RUNLOCK_NOSLEEP(); + + /* last resort: get from random number source */ + if (get_rand_ifid(ifp, in6) == 0) { + nd6log((LOG_DEBUG, + "%s: interface identifier generated by random number\n", + if_name(ifp0))); + goto success; + } + + printf("%s: failed to get interface identifier\n", if_name(ifp0)); + return -1; + +success: + nd6log((LOG_INFO, "%s: ifid: %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n", + if_name(ifp0), in6->s6_addr[8], in6->s6_addr[9], in6->s6_addr[10], + in6->s6_addr[11], in6->s6_addr[12], in6->s6_addr[13], + in6->s6_addr[14], in6->s6_addr[15])); + return 0; +} + +/* + * altifp - secondary EUI64 source + */ +static int +in6_ifattach_linklocal(struct ifnet *ifp, struct ifnet *altifp) +{ + struct in6_ifaddr *ia; + struct in6_aliasreq ifra; + struct nd_prefixctl pr0; + int i, error; + + /* + * configure link-local address. + */ + bzero(&ifra, sizeof(ifra)); + + /* + * in6_update_ifa() does not use ifra_name, but we accurately set it + * for safety. + */ + strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); + + ifra.ifra_addr.sin6_family = AF_INET6; + ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6); + ifra.ifra_addr.sin6_addr.s6_addr32[0] = htonl(0xfe800000); + ifra.ifra_addr.sin6_addr.s6_addr32[1] = 0; + if ((ifp->if_flags & IFF_LOOPBACK) != 0) { + ifra.ifra_addr.sin6_addr.s6_addr32[2] = 0; + ifra.ifra_addr.sin6_addr.s6_addr32[3] = htonl(1); + } else { + if (get_ifid(ifp, altifp, &ifra.ifra_addr.sin6_addr) != 0) { + nd6log((LOG_ERR, + "%s: no ifid available\n", if_name(ifp))); + return (-1); + } + } + if (in6_setscope(&ifra.ifra_addr.sin6_addr, ifp, NULL)) + return (-1); + + ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); + ifra.ifra_prefixmask.sin6_family = AF_INET6; + ifra.ifra_prefixmask.sin6_addr = in6mask64; + /* link-local addresses should NEVER expire. */ + ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME; + ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME; + + /* + * Now call in6_update_ifa() to do a bunch of procedures to configure + * a link-local address. We can set the 3rd argument to NULL, because + * we know there's no other link-local address on the interface + * and therefore we are adding one (instead of updating one). + */ + if ((error = in6_update_ifa(ifp, &ifra, NULL, + IN6_IFAUPDATE_DADDELAY)) != 0) { + /* + * XXX: When the interface does not support IPv6, this call + * would fail in the SIOCSIFADDR ioctl. I believe the + * notification is rather confusing in this case, so just + * suppress it. (jinmei@kame.net 20010130) + */ + if (error != EAFNOSUPPORT) + nd6log((LOG_NOTICE, "in6_ifattach_linklocal: failed to " + "configure a link-local address on %s " + "(errno=%d)\n", + if_name(ifp), error)); + return (-1); + } + + ia = in6ifa_ifpforlinklocal(ifp, 0); /* ia must not be NULL */ +#ifdef DIAGNOSTIC + if (!ia) { + panic("ia == NULL in in6_ifattach_linklocal"); + /* NOTREACHED */ + } +#endif + ifa_free(&ia->ia_ifa); + + /* + * Make the link-local prefix (fe80::%link/64) as on-link. + * Since we'd like to manage prefixes separately from addresses, + * we make an ND6 prefix structure for the link-local prefix, + * and add it to the prefix list as a never-expire prefix. + * XXX: this change might affect some existing code base... + */ + bzero(&pr0, sizeof(pr0)); + pr0.ndpr_ifp = ifp; + /* this should be 64 at this moment. */ + pr0.ndpr_plen = in6_mask2len(&ifra.ifra_prefixmask.sin6_addr, NULL); + pr0.ndpr_prefix = ifra.ifra_addr; + /* apply the mask for safety. (nd6_prelist_add will apply it again) */ + for (i = 0; i < 4; i++) { + pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &= + in6mask64.s6_addr32[i]; + } + /* + * Initialize parameters. The link-local prefix must always be + * on-link, and its lifetimes never expire. + */ + pr0.ndpr_raf_onlink = 1; + pr0.ndpr_raf_auto = 1; /* probably meaningless */ + pr0.ndpr_vltime = ND6_INFINITE_LIFETIME; + pr0.ndpr_pltime = ND6_INFINITE_LIFETIME; + /* + * Since there is no other link-local addresses, nd6_prefix_lookup() + * probably returns NULL. However, we cannot always expect the result. + * For example, if we first remove the (only) existing link-local + * address, and then reconfigure another one, the prefix is still + * valid with referring to the old link-local address. + */ + if (nd6_prefix_lookup(&pr0) == NULL) { + if ((error = nd6_prelist_add(&pr0, NULL, NULL)) != 0) + return (error); + } + + return 0; +} + +/* + * ifp - must be IFT_LOOP + */ +static int +in6_ifattach_loopback(struct ifnet *ifp) +{ + struct in6_aliasreq ifra; + int error; + + bzero(&ifra, sizeof(ifra)); + + /* + * in6_update_ifa() does not use ifra_name, but we accurately set it + * for safety. + */ + strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); + + ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); + ifra.ifra_prefixmask.sin6_family = AF_INET6; + ifra.ifra_prefixmask.sin6_addr = in6mask128; + + /* + * Always initialize ia_dstaddr (= broadcast address) to loopback + * address. Follows IPv4 practice - see in_ifinit(). + */ + ifra.ifra_dstaddr.sin6_len = sizeof(struct sockaddr_in6); + ifra.ifra_dstaddr.sin6_family = AF_INET6; + ifra.ifra_dstaddr.sin6_addr = in6addr_loopback; + + ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6); + ifra.ifra_addr.sin6_family = AF_INET6; + ifra.ifra_addr.sin6_addr = in6addr_loopback; + + /* the loopback address should NEVER expire. */ + ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME; + ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME; + + /* we don't need to perform DAD on loopback interfaces. */ + ifra.ifra_flags |= IN6_IFF_NODAD; + + /* skip registration to the prefix list. XXX should be temporary. */ + ifra.ifra_flags |= IN6_IFF_NOPFX; + + /* + * We are sure that this is a newly assigned address, so we can set + * NULL to the 3rd arg. + */ + if ((error = in6_update_ifa(ifp, &ifra, NULL, 0)) != 0) { + nd6log((LOG_ERR, "in6_ifattach_loopback: failed to configure " + "the loopback address on %s (errno=%d)\n", + if_name(ifp), error)); + return (-1); + } + + return 0; +} + +/* + * compute NI group address, based on the current hostname setting. + * see draft-ietf-ipngwg-icmp-name-lookup-* (04 and later). + * + * when ifp == NULL, the caller is responsible for filling scopeid. + */ +int +in6_nigroup(struct ifnet *ifp, const char *name, int namelen, + struct in6_addr *in6) +{ + struct prison *pr; + const char *p; + u_char *q; + MD5_CTX ctxt; + u_int8_t digest[16]; + char l; + char n[64]; /* a single label must not exceed 63 chars */ + + /* + * If no name is given and namelen is -1, + * we try to do the hostname lookup ourselves. + */ + if (!name && namelen == -1) { + pr = curthread->td_ucred->cr_prison; + mtx_lock(&pr->pr_mtx); + name = pr->pr_hostname; + namelen = strlen(name); + } else + pr = NULL; + if (!name || !namelen) { + if (pr != NULL) + mtx_unlock(&pr->pr_mtx); + return -1; + } + + p = name; + while (p && *p && *p != '.' && p - name < namelen) + p++; + if (p == name || p - name > sizeof(n) - 1) { + if (pr != NULL) + mtx_unlock(&pr->pr_mtx); + return -1; /* label too long */ + } + l = p - name; + strncpy(n, name, l); + if (pr != NULL) + mtx_unlock(&pr->pr_mtx); + n[(int)l] = '\0'; + for (q = n; *q; q++) { + if ('A' <= *q && *q <= 'Z') + *q = *q - 'A' + 'a'; + } + + /* generate 8 bytes of pseudo-random value. */ + bzero(&ctxt, sizeof(ctxt)); + MD5Init(&ctxt); + MD5Update(&ctxt, &l, sizeof(l)); + MD5Update(&ctxt, n, l); + MD5Final(digest, &ctxt); + + bzero(in6, sizeof(*in6)); + in6->s6_addr16[0] = IPV6_ADDR_INT16_MLL; + in6->s6_addr8[11] = 2; + bcopy(digest, &in6->s6_addr32[3], sizeof(in6->s6_addr32[3])); + if (in6_setscope(in6, ifp, NULL)) + return (-1); /* XXX: should not fail */ + + return 0; +} + +/* + * XXX multiple loopback interface needs more care. for instance, + * nodelocal address needs to be configured onto only one of them. + * XXX multiple link-local address case + * + * altifp - secondary EUI64 source + */ +void +in6_ifattach(struct ifnet *ifp, struct ifnet *altifp) +{ + struct in6_ifaddr *ia; + struct in6_addr in6; + + /* some of the interfaces are inherently not IPv6 capable */ + switch (ifp->if_type) { + case IFT_PFLOG: + case IFT_PFSYNC: + case IFT_CARP: + return; + } + + /* + * quirks based on interface type + */ + switch (ifp->if_type) { +#ifdef IFT_STF + case IFT_STF: + /* + * 6to4 interface is a very special kind of beast. + * no multicast, no linklocal. RFC2529 specifies how to make + * linklocals for 6to4 interface, but there's no use and + * it is rather harmful to have one. + */ + goto statinit; +#endif + default: + break; + } + + /* + * usually, we require multicast capability to the interface + */ + if ((ifp->if_flags & IFF_MULTICAST) == 0) { + nd6log((LOG_INFO, "in6_ifattach: " + "%s is not multicast capable, IPv6 not enabled\n", + if_name(ifp))); + return; + } + + /* + * assign loopback address for loopback interface. + * XXX multiple loopback interface case. + */ + if ((ifp->if_flags & IFF_LOOPBACK) != 0) { + struct ifaddr *ifa; + + in6 = in6addr_loopback; + ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &in6); + if (ifa == NULL) { + if (in6_ifattach_loopback(ifp) != 0) + return; + } else + ifa_free(ifa); + } + + /* + * assign a link-local address, if there's none. + */ + if (V_ip6_auto_linklocal && ifp->if_type != IFT_BRIDGE) { + ia = in6ifa_ifpforlinklocal(ifp, 0); + if (ia == NULL) { + if (in6_ifattach_linklocal(ifp, altifp) == 0) { + /* linklocal address assigned */ + } else { + /* failed to assign linklocal address. bark? */ + } + } else + ifa_free(&ia->ia_ifa); + } + +#ifdef IFT_STF /* XXX */ +statinit: +#endif + + /* update dynamically. */ + if (V_in6_maxmtu < ifp->if_mtu) + V_in6_maxmtu = ifp->if_mtu; +} + +/* + * NOTE: in6_ifdetach() does not support loopback if at this moment. + * We don't need this function in bsdi, because interfaces are never removed + * from the ifnet list in bsdi. + */ +void +in6_ifdetach(struct ifnet *ifp) +{ + struct in6_ifaddr *ia; + struct ifaddr *ifa, *next; + struct radix_node_head *rnh; + struct rtentry *rt; + short rtflags; + struct sockaddr_in6 sin6; + struct in6_multi_mship *imm; + + /* remove neighbor management table */ + nd6_purge(ifp); + + /* nuke any of IPv6 addresses we have */ + TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + in6_purgeaddr(ifa); + } + + /* undo everything done by in6_ifattach(), just in case */ + TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) { + if (ifa->ifa_addr->sa_family != AF_INET6 + || !IN6_IS_ADDR_LINKLOCAL(&satosin6(&ifa->ifa_addr)->sin6_addr)) { + continue; + } + + ia = (struct in6_ifaddr *)ifa; + + /* + * leave from multicast groups we have joined for the interface + */ + while ((imm = ia->ia6_memberships.lh_first) != NULL) { + LIST_REMOVE(imm, i6mm_chain); + in6_leavegroup(imm); + } + + /* remove from the routing table */ + if ((ia->ia_flags & IFA_ROUTE) && + (rt = rtalloc1((struct sockaddr *)&ia->ia_addr, 0, 0UL))) { + rtflags = rt->rt_flags; + RTFREE_LOCKED(rt); + rtrequest(RTM_DELETE, (struct sockaddr *)&ia->ia_addr, + (struct sockaddr *)&ia->ia_addr, + (struct sockaddr *)&ia->ia_prefixmask, + rtflags, (struct rtentry **)0); + } + + /* remove from the linked list */ + IF_ADDR_LOCK(ifp); + TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link); + IF_ADDR_UNLOCK(ifp); + ifa_free(ifa); /* if_addrhead */ + + IN6_IFADDR_WLOCK(); + TAILQ_REMOVE(&V_in6_ifaddrhead, ia, ia_link); + IN6_IFADDR_WUNLOCK(); + ifa_free(ifa); + } + + in6_pcbpurgeif0(&V_udbinfo, ifp); + in6_pcbpurgeif0(&V_ripcbinfo, ifp); + /* leave from all multicast groups joined */ + in6_purgemaddrs(ifp); + + /* + * remove neighbor management table. we call it twice just to make + * sure we nuke everything. maybe we need just one call. + * XXX: since the first call did not release addresses, some prefixes + * might remain. We should call nd6_purge() again to release the + * prefixes after removing all addresses above. + * (Or can we just delay calling nd6_purge until at this point?) + */ + nd6_purge(ifp); + + /* remove route to link-local allnodes multicast (ff02::1) */ + bzero(&sin6, sizeof(sin6)); + sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_family = AF_INET6; + sin6.sin6_addr = in6addr_linklocal_allnodes; + if (in6_setscope(&sin6.sin6_addr, ifp, NULL)) + /* XXX: should not fail */ + return; + /* XXX grab lock first to avoid LOR */ + rnh = rt_tables_get_rnh(0, AF_INET6); + if (rnh != NULL) { + RADIX_NODE_HEAD_LOCK(rnh); + rt = rtalloc1((struct sockaddr *)&sin6, 0, RTF_RNH_LOCKED); + if (rt) { + if (rt->rt_ifp == ifp) + rtexpunge(rt); + RTFREE_LOCKED(rt); + } + RADIX_NODE_HEAD_UNLOCK(rnh); + } +} + +int +in6_get_tmpifid(struct ifnet *ifp, u_int8_t *retbuf, + const u_int8_t *baseid, int generate) +{ + u_int8_t nullbuf[8]; + struct nd_ifinfo *ndi = ND_IFINFO(ifp); + + bzero(nullbuf, sizeof(nullbuf)); + if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) == 0) { + /* we've never created a random ID. Create a new one. */ + generate = 1; + } + + if (generate) { + bcopy(baseid, ndi->randomseed1, sizeof(ndi->randomseed1)); + + /* generate_tmp_ifid will update seedn and buf */ + (void)generate_tmp_ifid(ndi->randomseed0, ndi->randomseed1, + ndi->randomid); + } + bcopy(ndi->randomid, retbuf, 8); + + return (0); +} + +void +in6_tmpaddrtimer(void *arg) +{ + CURVNET_SET((struct vnet *) arg); + struct nd_ifinfo *ndi; + u_int8_t nullbuf[8]; + struct ifnet *ifp; + + callout_reset(&V_in6_tmpaddrtimer_ch, + (V_ip6_temp_preferred_lifetime - V_ip6_desync_factor - + V_ip6_temp_regen_advance) * hz, in6_tmpaddrtimer, curvnet); + + bzero(nullbuf, sizeof(nullbuf)); + for (ifp = TAILQ_FIRST(&V_ifnet); ifp; + ifp = TAILQ_NEXT(ifp, if_list)) { + ndi = ND_IFINFO(ifp); + if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) != 0) { + /* + * We've been generating a random ID on this interface. + * Create a new one. + */ + (void)generate_tmp_ifid(ndi->randomseed0, + ndi->randomseed1, ndi->randomid); + } + } + + CURVNET_RESTORE(); +} + +static void +in6_purgemaddrs(struct ifnet *ifp) +{ + LIST_HEAD(,in6_multi) purgeinms; + struct in6_multi *inm, *tinm; + struct ifmultiaddr *ifma; + + LIST_INIT(&purgeinms); + IN6_MULTI_LOCK(); + + /* + * Extract list of in6_multi associated with the detaching ifp + * which the PF_INET6 layer is about to release. + * We need to do this as IF_ADDR_LOCK() may be re-acquired + * by code further down. + */ + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_INET6 || + ifma->ifma_protospec == NULL) + continue; + inm = (struct in6_multi *)ifma->ifma_protospec; + LIST_INSERT_HEAD(&purgeinms, inm, in6m_entry); + } + IF_ADDR_UNLOCK(ifp); + + LIST_FOREACH_SAFE(inm, &purgeinms, in6m_entry, tinm) { + LIST_REMOVE(inm, in6m_entry); + in6m_release_locked(inm); + } + mld_ifdetach(ifp); + + IN6_MULTI_UNLOCK(); +} diff --git a/freebsd/sys/netinet6/in6_ifattach.h b/freebsd/sys/netinet6/in6_ifattach.h new file mode 100644 index 00000000..d7db4e47 --- /dev/null +++ b/freebsd/sys/netinet6/in6_ifattach.h @@ -0,0 +1,45 @@ +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: in6_ifattach.h,v 1.14 2001/02/08 12:48:39 jinmei Exp $ + * $FreeBSD$ + */ + +#ifndef _NETINET6_IN6_IFATTACH_HH_ +#define _NETINET6_IN6_IFATTACH_HH_ + +#ifdef _KERNEL +void in6_ifattach __P((struct ifnet *, struct ifnet *)); +void in6_ifdetach __P((struct ifnet *)); +int in6_get_tmpifid __P((struct ifnet *, u_int8_t *, const u_int8_t *, int)); +void in6_tmpaddrtimer __P((void *)); +int in6_get_hw_ifid __P((struct ifnet *, struct in6_addr *)); +int in6_nigroup __P((struct ifnet *, const char *, int, struct in6_addr *)); +#endif /* _KERNEL */ + +#endif /* _NETINET6_IN6_IFATTACH_HH_ */ diff --git a/freebsd/sys/netinet6/in6_mcast.c b/freebsd/sys/netinet6/in6_mcast.c new file mode 100644 index 00000000..a75a4ed3 --- /dev/null +++ b/freebsd/sys/netinet6/in6_mcast.c @@ -0,0 +1,2840 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/* + * Copyright (c) 2009 Bruce Simpson. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * IPv6 multicast socket, group, and socket option processing module. + * Normative references: RFC 2292, RFC 3492, RFC 3542, RFC 3678, RFC 3810. + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/local/opt_inet6.h> + +#include <freebsd/sys/param.h> +#include <freebsd/sys/systm.h> +#include <freebsd/sys/kernel.h> +#include <freebsd/sys/malloc.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/protosw.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/socketvar.h> +#include <freebsd/sys/protosw.h> +#include <freebsd/sys/sysctl.h> +#include <freebsd/sys/priv.h> +#include <freebsd/sys/ktr.h> +#include <freebsd/sys/tree.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/if_dl.h> +#include <freebsd/net/route.h> +#include <freebsd/net/vnet.h> + +#include <freebsd/netinet/in.h> +#include <freebsd/netinet/in_var.h> +#include <freebsd/netinet6/in6_var.h> +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet/icmp6.h> +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet/in_pcb.h> +#include <freebsd/netinet/tcp_var.h> +#include <freebsd/netinet6/nd6.h> +#include <freebsd/netinet6/mld6_var.h> +#include <freebsd/netinet6/scope6_var.h> + +#ifndef KTR_MLD +#define KTR_MLD KTR_INET6 +#endif + +#ifndef __SOCKUNION_DECLARED +union sockunion { + struct sockaddr_storage ss; + struct sockaddr sa; + struct sockaddr_dl sdl; + struct sockaddr_in6 sin6; +}; +typedef union sockunion sockunion_t; +#define __SOCKUNION_DECLARED +#endif /* __SOCKUNION_DECLARED */ + +static MALLOC_DEFINE(M_IN6MFILTER, "in6_mfilter", + "IPv6 multicast PCB-layer source filter"); +static MALLOC_DEFINE(M_IP6MADDR, "in6_multi", "IPv6 multicast group"); +static MALLOC_DEFINE(M_IP6MOPTS, "ip6_moptions", "IPv6 multicast options"); +static MALLOC_DEFINE(M_IP6MSOURCE, "ip6_msource", + "IPv6 multicast MLD-layer source filter"); + +RB_GENERATE(ip6_msource_tree, ip6_msource, im6s_link, ip6_msource_cmp); + +/* + * Locking: + * - Lock order is: Giant, INP_WLOCK, IN6_MULTI_LOCK, MLD_LOCK, IF_ADDR_LOCK. + * - The IF_ADDR_LOCK is implicitly taken by in6m_lookup() earlier, however + * it can be taken by code in net/if.c also. + * - ip6_moptions and in6_mfilter are covered by the INP_WLOCK. + * + * struct in6_multi is covered by IN6_MULTI_LOCK. There isn't strictly + * any need for in6_multi itself to be virtualized -- it is bound to an ifp + * anyway no matter what happens. + */ +struct mtx in6_multi_mtx; +MTX_SYSINIT(in6_multi_mtx, &in6_multi_mtx, "in6_multi_mtx", MTX_DEF); + +static void im6f_commit(struct in6_mfilter *); +static int im6f_get_source(struct in6_mfilter *imf, + const struct sockaddr_in6 *psin, + struct in6_msource **); +static struct in6_msource * + im6f_graft(struct in6_mfilter *, const uint8_t, + const struct sockaddr_in6 *); +static void im6f_leave(struct in6_mfilter *); +static int im6f_prune(struct in6_mfilter *, const struct sockaddr_in6 *); +static void im6f_purge(struct in6_mfilter *); +static void im6f_rollback(struct in6_mfilter *); +static void im6f_reap(struct in6_mfilter *); +static int im6o_grow(struct ip6_moptions *); +static size_t im6o_match_group(const struct ip6_moptions *, + const struct ifnet *, const struct sockaddr *); +static struct in6_msource * + im6o_match_source(const struct ip6_moptions *, const size_t, + const struct sockaddr *); +static void im6s_merge(struct ip6_msource *ims, + const struct in6_msource *lims, const int rollback); +static int in6_mc_get(struct ifnet *, const struct in6_addr *, + struct in6_multi **); +static int in6m_get_source(struct in6_multi *inm, + const struct in6_addr *addr, const int noalloc, + struct ip6_msource **pims); +static int in6m_is_ifp_detached(const struct in6_multi *); +static int in6m_merge(struct in6_multi *, /*const*/ struct in6_mfilter *); +static void in6m_purge(struct in6_multi *); +static void in6m_reap(struct in6_multi *); +static struct ip6_moptions * + in6p_findmoptions(struct inpcb *); +static int in6p_get_source_filters(struct inpcb *, struct sockopt *); +static int in6p_join_group(struct inpcb *, struct sockopt *); +static int in6p_leave_group(struct inpcb *, struct sockopt *); +static struct ifnet * + in6p_lookup_mcast_ifp(const struct inpcb *, + const struct sockaddr_in6 *); +static int in6p_block_unblock_source(struct inpcb *, struct sockopt *); +static int in6p_set_multicast_if(struct inpcb *, struct sockopt *); +static int in6p_set_source_filters(struct inpcb *, struct sockopt *); +static int sysctl_ip6_mcast_filters(SYSCTL_HANDLER_ARGS); + +SYSCTL_DECL(_net_inet6_ip6); /* XXX Not in any common header. */ + +SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, mcast, CTLFLAG_RW, 0, "IPv6 multicast"); + +static u_long in6_mcast_maxgrpsrc = IPV6_MAX_GROUP_SRC_FILTER; +SYSCTL_ULONG(_net_inet6_ip6_mcast, OID_AUTO, maxgrpsrc, + CTLFLAG_RW | CTLFLAG_TUN, &in6_mcast_maxgrpsrc, 0, + "Max source filters per group"); +TUNABLE_ULONG("net.inet6.ip6.mcast.maxgrpsrc", &in6_mcast_maxgrpsrc); + +static u_long in6_mcast_maxsocksrc = IPV6_MAX_SOCK_SRC_FILTER; +SYSCTL_ULONG(_net_inet6_ip6_mcast, OID_AUTO, maxsocksrc, + CTLFLAG_RW | CTLFLAG_TUN, &in6_mcast_maxsocksrc, 0, + "Max source filters per socket"); +TUNABLE_ULONG("net.inet6.ip6.mcast.maxsocksrc", &in6_mcast_maxsocksrc); + +/* TODO Virtualize this switch. */ +int in6_mcast_loop = IPV6_DEFAULT_MULTICAST_LOOP; +SYSCTL_INT(_net_inet6_ip6_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_TUN, + &in6_mcast_loop, 0, "Loopback multicast datagrams by default"); +TUNABLE_INT("net.inet6.ip6.mcast.loop", &in6_mcast_loop); + +SYSCTL_NODE(_net_inet6_ip6_mcast, OID_AUTO, filters, + CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip6_mcast_filters, + "Per-interface stack-wide source filters"); + +/* + * Inline function which wraps assertions for a valid ifp. + * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp + * is detached. + */ +static int __inline +in6m_is_ifp_detached(const struct in6_multi *inm) +{ + struct ifnet *ifp; + + KASSERT(inm->in6m_ifma != NULL, ("%s: no ifma", __func__)); + ifp = inm->in6m_ifma->ifma_ifp; + if (ifp != NULL) { + /* + * Sanity check that network-layer notion of ifp is the + * same as that of link-layer. + */ + KASSERT(inm->in6m_ifp == ifp, ("%s: bad ifp", __func__)); + } + + return (ifp == NULL); +} + +/* + * Initialize an in6_mfilter structure to a known state at t0, t1 + * with an empty source filter list. + */ +static __inline void +im6f_init(struct in6_mfilter *imf, const int st0, const int st1) +{ + memset(imf, 0, sizeof(struct in6_mfilter)); + RB_INIT(&imf->im6f_sources); + imf->im6f_st[0] = st0; + imf->im6f_st[1] = st1; +} + +/* + * Resize the ip6_moptions vector to the next power-of-two minus 1. + * May be called with locks held; do not sleep. + */ +static int +im6o_grow(struct ip6_moptions *imo) +{ + struct in6_multi **nmships; + struct in6_multi **omships; + struct in6_mfilter *nmfilters; + struct in6_mfilter *omfilters; + size_t idx; + size_t newmax; + size_t oldmax; + + nmships = NULL; + nmfilters = NULL; + omships = imo->im6o_membership; + omfilters = imo->im6o_mfilters; + oldmax = imo->im6o_max_memberships; + newmax = ((oldmax + 1) * 2) - 1; + + if (newmax <= IPV6_MAX_MEMBERSHIPS) { + nmships = (struct in6_multi **)realloc(omships, + sizeof(struct in6_multi *) * newmax, M_IP6MOPTS, M_NOWAIT); + nmfilters = (struct in6_mfilter *)realloc(omfilters, + sizeof(struct in6_mfilter) * newmax, M_IN6MFILTER, + M_NOWAIT); + if (nmships != NULL && nmfilters != NULL) { + /* Initialize newly allocated source filter heads. */ + for (idx = oldmax; idx < newmax; idx++) { + im6f_init(&nmfilters[idx], MCAST_UNDEFINED, + MCAST_EXCLUDE); + } + imo->im6o_max_memberships = newmax; + imo->im6o_membership = nmships; + imo->im6o_mfilters = nmfilters; + } + } + + if (nmships == NULL || nmfilters == NULL) { + if (nmships != NULL) + free(nmships, M_IP6MOPTS); + if (nmfilters != NULL) + free(nmfilters, M_IN6MFILTER); + return (ETOOMANYREFS); + } + + return (0); +} + +/* + * Find an IPv6 multicast group entry for this ip6_moptions instance + * which matches the specified group, and optionally an interface. + * Return its index into the array, or -1 if not found. + */ +static size_t +im6o_match_group(const struct ip6_moptions *imo, const struct ifnet *ifp, + const struct sockaddr *group) +{ + const struct sockaddr_in6 *gsin6; + struct in6_multi **pinm; + int idx; + int nmships; + + gsin6 = (const struct sockaddr_in6 *)group; + + /* The im6o_membership array may be lazy allocated. */ + if (imo->im6o_membership == NULL || imo->im6o_num_memberships == 0) + return (-1); + + nmships = imo->im6o_num_memberships; + pinm = &imo->im6o_membership[0]; + for (idx = 0; idx < nmships; idx++, pinm++) { + if (*pinm == NULL) + continue; + if ((ifp == NULL || ((*pinm)->in6m_ifp == ifp)) && + IN6_ARE_ADDR_EQUAL(&(*pinm)->in6m_addr, + &gsin6->sin6_addr)) { + break; + } + } + if (idx >= nmships) + idx = -1; + + return (idx); +} + +/* + * Find an IPv6 multicast source entry for this imo which matches + * the given group index for this socket, and source address. + * + * XXX TODO: The scope ID, if present in src, is stripped before + * any comparison. We SHOULD enforce scope/zone checks where the source + * filter entry has a link scope. + * + * NOTE: This does not check if the entry is in-mode, merely if + * it exists, which may not be the desired behaviour. + */ +static struct in6_msource * +im6o_match_source(const struct ip6_moptions *imo, const size_t gidx, + const struct sockaddr *src) +{ + struct ip6_msource find; + struct in6_mfilter *imf; + struct ip6_msource *ims; + const sockunion_t *psa; + + KASSERT(src->sa_family == AF_INET6, ("%s: !AF_INET6", __func__)); + KASSERT(gidx != -1 && gidx < imo->im6o_num_memberships, + ("%s: invalid index %d\n", __func__, (int)gidx)); + + /* The im6o_mfilters array may be lazy allocated. */ + if (imo->im6o_mfilters == NULL) + return (NULL); + imf = &imo->im6o_mfilters[gidx]; + + psa = (const sockunion_t *)src; + find.im6s_addr = psa->sin6.sin6_addr; + in6_clearscope(&find.im6s_addr); /* XXX */ + ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find); + + return ((struct in6_msource *)ims); +} + +/* + * Perform filtering for multicast datagrams on a socket by group and source. + * + * Returns 0 if a datagram should be allowed through, or various error codes + * if the socket was not a member of the group, or the source was muted, etc. + */ +int +im6o_mc_filter(const struct ip6_moptions *imo, const struct ifnet *ifp, + const struct sockaddr *group, const struct sockaddr *src) +{ + size_t gidx; + struct in6_msource *ims; + int mode; + + KASSERT(ifp != NULL, ("%s: null ifp", __func__)); + + gidx = im6o_match_group(imo, ifp, group); + if (gidx == -1) + return (MCAST_NOTGMEMBER); + + /* + * Check if the source was included in an (S,G) join. + * Allow reception on exclusive memberships by default, + * reject reception on inclusive memberships by default. + * Exclude source only if an in-mode exclude filter exists. + * Include source only if an in-mode include filter exists. + * NOTE: We are comparing group state here at MLD t1 (now) + * with socket-layer t0 (since last downcall). + */ + mode = imo->im6o_mfilters[gidx].im6f_st[1]; + ims = im6o_match_source(imo, gidx, src); + + if ((ims == NULL && mode == MCAST_INCLUDE) || + (ims != NULL && ims->im6sl_st[0] != mode)) + return (MCAST_NOTSMEMBER); + + return (MCAST_PASS); +} + +/* + * Find and return a reference to an in6_multi record for (ifp, group), + * and bump its reference count. + * If one does not exist, try to allocate it, and update link-layer multicast + * filters on ifp to listen for group. + * Assumes the IN6_MULTI lock is held across the call. + * Return 0 if successful, otherwise return an appropriate error code. + */ +static int +in6_mc_get(struct ifnet *ifp, const struct in6_addr *group, + struct in6_multi **pinm) +{ + struct sockaddr_in6 gsin6; + struct ifmultiaddr *ifma; + struct in6_multi *inm; + int error; + + error = 0; + + /* + * XXX: Accesses to ifma_protospec must be covered by IF_ADDR_LOCK; + * if_addmulti() takes this mutex itself, so we must drop and + * re-acquire around the call. + */ + IN6_MULTI_LOCK_ASSERT(); + IF_ADDR_LOCK(ifp); + + inm = in6m_lookup_locked(ifp, group); + if (inm != NULL) { + /* + * If we already joined this group, just bump the + * refcount and return it. + */ + KASSERT(inm->in6m_refcount >= 1, + ("%s: bad refcount %d", __func__, inm->in6m_refcount)); + ++inm->in6m_refcount; + *pinm = inm; + goto out_locked; + } + + memset(&gsin6, 0, sizeof(gsin6)); + gsin6.sin6_family = AF_INET6; + gsin6.sin6_len = sizeof(struct sockaddr_in6); + gsin6.sin6_addr = *group; + + /* + * Check if a link-layer group is already associated + * with this network-layer group on the given ifnet. + */ + IF_ADDR_UNLOCK(ifp); + error = if_addmulti(ifp, (struct sockaddr *)&gsin6, &ifma); + if (error != 0) + return (error); + IF_ADDR_LOCK(ifp); + + /* + * If something other than netinet6 is occupying the link-layer + * group, print a meaningful error message and back out of + * the allocation. + * Otherwise, bump the refcount on the existing network-layer + * group association and return it. + */ + if (ifma->ifma_protospec != NULL) { + inm = (struct in6_multi *)ifma->ifma_protospec; +#ifdef INVARIANTS + KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr", + __func__)); + KASSERT(ifma->ifma_addr->sa_family == AF_INET6, + ("%s: ifma not AF_INET6", __func__)); + KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__)); + if (inm->in6m_ifma != ifma || inm->in6m_ifp != ifp || + !IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, group)) + panic("%s: ifma %p is inconsistent with %p (%p)", + __func__, ifma, inm, group); +#endif + ++inm->in6m_refcount; + *pinm = inm; + goto out_locked; + } + + IF_ADDR_LOCK_ASSERT(ifp); + + /* + * A new in6_multi record is needed; allocate and initialize it. + * We DO NOT perform an MLD join as the in6_ layer may need to + * push an initial source list down to MLD to support SSM. + * + * The initial source filter state is INCLUDE, {} as per the RFC. + * Pending state-changes per group are subject to a bounds check. + */ + inm = malloc(sizeof(*inm), M_IP6MADDR, M_NOWAIT | M_ZERO); + if (inm == NULL) { + if_delmulti_ifma(ifma); + error = ENOMEM; + goto out_locked; + } + inm->in6m_addr = *group; + inm->in6m_ifp = ifp; + inm->in6m_mli = MLD_IFINFO(ifp); + inm->in6m_ifma = ifma; + inm->in6m_refcount = 1; + inm->in6m_state = MLD_NOT_MEMBER; + IFQ_SET_MAXLEN(&inm->in6m_scq, MLD_MAX_STATE_CHANGES); + + inm->in6m_st[0].iss_fmode = MCAST_UNDEFINED; + inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED; + RB_INIT(&inm->in6m_srcs); + + ifma->ifma_protospec = inm; + *pinm = inm; + +out_locked: + IF_ADDR_UNLOCK(ifp); + return (error); +} + +/* + * Drop a reference to an in6_multi record. + * + * If the refcount drops to 0, free the in6_multi record and + * delete the underlying link-layer membership. + */ +void +in6m_release_locked(struct in6_multi *inm) +{ + struct ifmultiaddr *ifma; + + IN6_MULTI_LOCK_ASSERT(); + + CTR2(KTR_MLD, "%s: refcount is %d", __func__, inm->in6m_refcount); + + if (--inm->in6m_refcount > 0) { + CTR2(KTR_MLD, "%s: refcount is now %d", __func__, + inm->in6m_refcount); + return; + } + + CTR2(KTR_MLD, "%s: freeing inm %p", __func__, inm); + + ifma = inm->in6m_ifma; + + /* XXX this access is not covered by IF_ADDR_LOCK */ + CTR2(KTR_MLD, "%s: purging ifma %p", __func__, ifma); + KASSERT(ifma->ifma_protospec == inm, + ("%s: ifma_protospec != inm", __func__)); + ifma->ifma_protospec = NULL; + + in6m_purge(inm); + + free(inm, M_IP6MADDR); + + if_delmulti_ifma(ifma); +} + +/* + * Clear recorded source entries for a group. + * Used by the MLD code. Caller must hold the IN6_MULTI lock. + * FIXME: Should reap. + */ +void +in6m_clear_recorded(struct in6_multi *inm) +{ + struct ip6_msource *ims; + + IN6_MULTI_LOCK_ASSERT(); + + RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) { + if (ims->im6s_stp) { + ims->im6s_stp = 0; + --inm->in6m_st[1].iss_rec; + } + } + KASSERT(inm->in6m_st[1].iss_rec == 0, + ("%s: iss_rec %d not 0", __func__, inm->in6m_st[1].iss_rec)); +} + +/* + * Record a source as pending for a Source-Group MLDv2 query. + * This lives here as it modifies the shared tree. + * + * inm is the group descriptor. + * naddr is the address of the source to record in network-byte order. + * + * If the net.inet6.mld.sgalloc sysctl is non-zero, we will + * lazy-allocate a source node in response to an SG query. + * Otherwise, no allocation is performed. This saves some memory + * with the trade-off that the source will not be reported to the + * router if joined in the window between the query response and + * the group actually being joined on the local host. + * + * VIMAGE: XXX: Currently the mld_sgalloc feature has been removed. + * This turns off the allocation of a recorded source entry if + * the group has not been joined. + * + * Return 0 if the source didn't exist or was already marked as recorded. + * Return 1 if the source was marked as recorded by this function. + * Return <0 if any error occured (negated errno code). + */ +int +in6m_record_source(struct in6_multi *inm, const struct in6_addr *addr) +{ + struct ip6_msource find; + struct ip6_msource *ims, *nims; + + IN6_MULTI_LOCK_ASSERT(); + + find.im6s_addr = *addr; + ims = RB_FIND(ip6_msource_tree, &inm->in6m_srcs, &find); + if (ims && ims->im6s_stp) + return (0); + if (ims == NULL) { + if (inm->in6m_nsrc == in6_mcast_maxgrpsrc) + return (-ENOSPC); + nims = malloc(sizeof(struct ip6_msource), M_IP6MSOURCE, + M_NOWAIT | M_ZERO); + if (nims == NULL) + return (-ENOMEM); + nims->im6s_addr = find.im6s_addr; + RB_INSERT(ip6_msource_tree, &inm->in6m_srcs, nims); + ++inm->in6m_nsrc; + ims = nims; + } + + /* + * Mark the source as recorded and update the recorded + * source count. + */ + ++ims->im6s_stp; + ++inm->in6m_st[1].iss_rec; + + return (1); +} + +/* + * Return a pointer to an in6_msource owned by an in6_mfilter, + * given its source address. + * Lazy-allocate if needed. If this is a new entry its filter state is + * undefined at t0. + * + * imf is the filter set being modified. + * addr is the source address. + * + * SMPng: May be called with locks held; malloc must not block. + */ +static int +im6f_get_source(struct in6_mfilter *imf, const struct sockaddr_in6 *psin, + struct in6_msource **plims) +{ + struct ip6_msource find; + struct ip6_msource *ims, *nims; + struct in6_msource *lims; + int error; + + error = 0; + ims = NULL; + lims = NULL; + + find.im6s_addr = psin->sin6_addr; + ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find); + lims = (struct in6_msource *)ims; + if (lims == NULL) { + if (imf->im6f_nsrc == in6_mcast_maxsocksrc) + return (ENOSPC); + nims = malloc(sizeof(struct in6_msource), M_IN6MFILTER, + M_NOWAIT | M_ZERO); + if (nims == NULL) + return (ENOMEM); + lims = (struct in6_msource *)nims; + lims->im6s_addr = find.im6s_addr; + lims->im6sl_st[0] = MCAST_UNDEFINED; + RB_INSERT(ip6_msource_tree, &imf->im6f_sources, nims); + ++imf->im6f_nsrc; + } + + *plims = lims; + + return (error); +} + +/* + * Graft a source entry into an existing socket-layer filter set, + * maintaining any required invariants and checking allocations. + * + * The source is marked as being in the new filter mode at t1. + * + * Return the pointer to the new node, otherwise return NULL. + */ +static struct in6_msource * +im6f_graft(struct in6_mfilter *imf, const uint8_t st1, + const struct sockaddr_in6 *psin) +{ + struct ip6_msource *nims; + struct in6_msource *lims; + + nims = malloc(sizeof(struct in6_msource), M_IN6MFILTER, + M_NOWAIT | M_ZERO); + if (nims == NULL) + return (NULL); + lims = (struct in6_msource *)nims; + lims->im6s_addr = psin->sin6_addr; + lims->im6sl_st[0] = MCAST_UNDEFINED; + lims->im6sl_st[1] = st1; + RB_INSERT(ip6_msource_tree, &imf->im6f_sources, nims); + ++imf->im6f_nsrc; + + return (lims); +} + +/* + * Prune a source entry from an existing socket-layer filter set, + * maintaining any required invariants and checking allocations. + * + * The source is marked as being left at t1, it is not freed. + * + * Return 0 if no error occurred, otherwise return an errno value. + */ +static int +im6f_prune(struct in6_mfilter *imf, const struct sockaddr_in6 *psin) +{ + struct ip6_msource find; + struct ip6_msource *ims; + struct in6_msource *lims; + + find.im6s_addr = psin->sin6_addr; + ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find); + if (ims == NULL) + return (ENOENT); + lims = (struct in6_msource *)ims; + lims->im6sl_st[1] = MCAST_UNDEFINED; + return (0); +} + +/* + * Revert socket-layer filter set deltas at t1 to t0 state. + */ +static void +im6f_rollback(struct in6_mfilter *imf) +{ + struct ip6_msource *ims, *tims; + struct in6_msource *lims; + + RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) { + lims = (struct in6_msource *)ims; + if (lims->im6sl_st[0] == lims->im6sl_st[1]) { + /* no change at t1 */ + continue; + } else if (lims->im6sl_st[0] != MCAST_UNDEFINED) { + /* revert change to existing source at t1 */ + lims->im6sl_st[1] = lims->im6sl_st[0]; + } else { + /* revert source added t1 */ + CTR2(KTR_MLD, "%s: free ims %p", __func__, ims); + RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims); + free(ims, M_IN6MFILTER); + imf->im6f_nsrc--; + } + } + imf->im6f_st[1] = imf->im6f_st[0]; +} + +/* + * Mark socket-layer filter set as INCLUDE {} at t1. + */ +static void +im6f_leave(struct in6_mfilter *imf) +{ + struct ip6_msource *ims; + struct in6_msource *lims; + + RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) { + lims = (struct in6_msource *)ims; + lims->im6sl_st[1] = MCAST_UNDEFINED; + } + imf->im6f_st[1] = MCAST_INCLUDE; +} + +/* + * Mark socket-layer filter set deltas as committed. + */ +static void +im6f_commit(struct in6_mfilter *imf) +{ + struct ip6_msource *ims; + struct in6_msource *lims; + + RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) { + lims = (struct in6_msource *)ims; + lims->im6sl_st[0] = lims->im6sl_st[1]; + } + imf->im6f_st[0] = imf->im6f_st[1]; +} + +/* + * Reap unreferenced sources from socket-layer filter set. + */ +static void +im6f_reap(struct in6_mfilter *imf) +{ + struct ip6_msource *ims, *tims; + struct in6_msource *lims; + + RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) { + lims = (struct in6_msource *)ims; + if ((lims->im6sl_st[0] == MCAST_UNDEFINED) && + (lims->im6sl_st[1] == MCAST_UNDEFINED)) { + CTR2(KTR_MLD, "%s: free lims %p", __func__, ims); + RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims); + free(ims, M_IN6MFILTER); + imf->im6f_nsrc--; + } + } +} + +/* + * Purge socket-layer filter set. + */ +static void +im6f_purge(struct in6_mfilter *imf) +{ + struct ip6_msource *ims, *tims; + + RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) { + CTR2(KTR_MLD, "%s: free ims %p", __func__, ims); + RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims); + free(ims, M_IN6MFILTER); + imf->im6f_nsrc--; + } + imf->im6f_st[0] = imf->im6f_st[1] = MCAST_UNDEFINED; + KASSERT(RB_EMPTY(&imf->im6f_sources), + ("%s: im6f_sources not empty", __func__)); +} + +/* + * Look up a source filter entry for a multicast group. + * + * inm is the group descriptor to work with. + * addr is the IPv6 address to look up. + * noalloc may be non-zero to suppress allocation of sources. + * *pims will be set to the address of the retrieved or allocated source. + * + * SMPng: NOTE: may be called with locks held. + * Return 0 if successful, otherwise return a non-zero error code. + */ +static int +in6m_get_source(struct in6_multi *inm, const struct in6_addr *addr, + const int noalloc, struct ip6_msource **pims) +{ + struct ip6_msource find; + struct ip6_msource *ims, *nims; +#ifdef KTR + char ip6tbuf[INET6_ADDRSTRLEN]; +#endif + + find.im6s_addr = *addr; + ims = RB_FIND(ip6_msource_tree, &inm->in6m_srcs, &find); + if (ims == NULL && !noalloc) { + if (inm->in6m_nsrc == in6_mcast_maxgrpsrc) + return (ENOSPC); + nims = malloc(sizeof(struct ip6_msource), M_IP6MSOURCE, + M_NOWAIT | M_ZERO); + if (nims == NULL) + return (ENOMEM); + nims->im6s_addr = *addr; + RB_INSERT(ip6_msource_tree, &inm->in6m_srcs, nims); + ++inm->in6m_nsrc; + ims = nims; + CTR3(KTR_MLD, "%s: allocated %s as %p", __func__, + ip6_sprintf(ip6tbuf, addr), ims); + } + + *pims = ims; + return (0); +} + +/* + * Merge socket-layer source into MLD-layer source. + * If rollback is non-zero, perform the inverse of the merge. + */ +static void +im6s_merge(struct ip6_msource *ims, const struct in6_msource *lims, + const int rollback) +{ + int n = rollback ? -1 : 1; +#ifdef KTR + char ip6tbuf[INET6_ADDRSTRLEN]; + + ip6_sprintf(ip6tbuf, &lims->im6s_addr); +#endif + + if (lims->im6sl_st[0] == MCAST_EXCLUDE) { + CTR3(KTR_MLD, "%s: t1 ex -= %d on %s", __func__, n, ip6tbuf); + ims->im6s_st[1].ex -= n; + } else if (lims->im6sl_st[0] == MCAST_INCLUDE) { + CTR3(KTR_MLD, "%s: t1 in -= %d on %s", __func__, n, ip6tbuf); + ims->im6s_st[1].in -= n; + } + + if (lims->im6sl_st[1] == MCAST_EXCLUDE) { + CTR3(KTR_MLD, "%s: t1 ex += %d on %s", __func__, n, ip6tbuf); + ims->im6s_st[1].ex += n; + } else if (lims->im6sl_st[1] == MCAST_INCLUDE) { + CTR3(KTR_MLD, "%s: t1 in += %d on %s", __func__, n, ip6tbuf); + ims->im6s_st[1].in += n; + } +} + +/* + * Atomically update the global in6_multi state, when a membership's + * filter list is being updated in any way. + * + * imf is the per-inpcb-membership group filter pointer. + * A fake imf may be passed for in-kernel consumers. + * + * XXX This is a candidate for a set-symmetric-difference style loop + * which would eliminate the repeated lookup from root of ims nodes, + * as they share the same key space. + * + * If any error occurred this function will back out of refcounts + * and return a non-zero value. + */ +static int +in6m_merge(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf) +{ + struct ip6_msource *ims, *nims; + struct in6_msource *lims; + int schanged, error; + int nsrc0, nsrc1; + + schanged = 0; + error = 0; + nsrc1 = nsrc0 = 0; + + /* + * Update the source filters first, as this may fail. + * Maintain count of in-mode filters at t0, t1. These are + * used to work out if we transition into ASM mode or not. + * Maintain a count of source filters whose state was + * actually modified by this operation. + */ + RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) { + lims = (struct in6_msource *)ims; + if (lims->im6sl_st[0] == imf->im6f_st[0]) nsrc0++; + if (lims->im6sl_st[1] == imf->im6f_st[1]) nsrc1++; + if (lims->im6sl_st[0] == lims->im6sl_st[1]) continue; + error = in6m_get_source(inm, &lims->im6s_addr, 0, &nims); + ++schanged; + if (error) + break; + im6s_merge(nims, lims, 0); + } + if (error) { + struct ip6_msource *bims; + + RB_FOREACH_REVERSE_FROM(ims, ip6_msource_tree, nims) { + lims = (struct in6_msource *)ims; + if (lims->im6sl_st[0] == lims->im6sl_st[1]) + continue; + (void)in6m_get_source(inm, &lims->im6s_addr, 1, &bims); + if (bims == NULL) + continue; + im6s_merge(bims, lims, 1); + } + goto out_reap; + } + + CTR3(KTR_MLD, "%s: imf filters in-mode: %d at t0, %d at t1", + __func__, nsrc0, nsrc1); + + /* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */ + if (imf->im6f_st[0] == imf->im6f_st[1] && + imf->im6f_st[1] == MCAST_INCLUDE) { + if (nsrc1 == 0) { + CTR1(KTR_MLD, "%s: --in on inm at t1", __func__); + --inm->in6m_st[1].iss_in; + } + } + + /* Handle filter mode transition on socket. */ + if (imf->im6f_st[0] != imf->im6f_st[1]) { + CTR3(KTR_MLD, "%s: imf transition %d to %d", + __func__, imf->im6f_st[0], imf->im6f_st[1]); + + if (imf->im6f_st[0] == MCAST_EXCLUDE) { + CTR1(KTR_MLD, "%s: --ex on inm at t1", __func__); + --inm->in6m_st[1].iss_ex; + } else if (imf->im6f_st[0] == MCAST_INCLUDE) { + CTR1(KTR_MLD, "%s: --in on inm at t1", __func__); + --inm->in6m_st[1].iss_in; + } + + if (imf->im6f_st[1] == MCAST_EXCLUDE) { + CTR1(KTR_MLD, "%s: ex++ on inm at t1", __func__); + inm->in6m_st[1].iss_ex++; + } else if (imf->im6f_st[1] == MCAST_INCLUDE && nsrc1 > 0) { + CTR1(KTR_MLD, "%s: in++ on inm at t1", __func__); + inm->in6m_st[1].iss_in++; + } + } + + /* + * Track inm filter state in terms of listener counts. + * If there are any exclusive listeners, stack-wide + * membership is exclusive. + * Otherwise, if only inclusive listeners, stack-wide is inclusive. + * If no listeners remain, state is undefined at t1, + * and the MLD lifecycle for this group should finish. + */ + if (inm->in6m_st[1].iss_ex > 0) { + CTR1(KTR_MLD, "%s: transition to EX", __func__); + inm->in6m_st[1].iss_fmode = MCAST_EXCLUDE; + } else if (inm->in6m_st[1].iss_in > 0) { + CTR1(KTR_MLD, "%s: transition to IN", __func__); + inm->in6m_st[1].iss_fmode = MCAST_INCLUDE; + } else { + CTR1(KTR_MLD, "%s: transition to UNDEF", __func__); + inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED; + } + + /* Decrement ASM listener count on transition out of ASM mode. */ + if (imf->im6f_st[0] == MCAST_EXCLUDE && nsrc0 == 0) { + if ((imf->im6f_st[1] != MCAST_EXCLUDE) || + (imf->im6f_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) + CTR1(KTR_MLD, "%s: --asm on inm at t1", __func__); + --inm->in6m_st[1].iss_asm; + } + + /* Increment ASM listener count on transition to ASM mode. */ + if (imf->im6f_st[1] == MCAST_EXCLUDE && nsrc1 == 0) { + CTR1(KTR_MLD, "%s: asm++ on inm at t1", __func__); + inm->in6m_st[1].iss_asm++; + } + + CTR3(KTR_MLD, "%s: merged imf %p to inm %p", __func__, imf, inm); + in6m_print(inm); + +out_reap: + if (schanged > 0) { + CTR1(KTR_MLD, "%s: sources changed; reaping", __func__); + in6m_reap(inm); + } + return (error); +} + +/* + * Mark an in6_multi's filter set deltas as committed. + * Called by MLD after a state change has been enqueued. + */ +void +in6m_commit(struct in6_multi *inm) +{ + struct ip6_msource *ims; + + CTR2(KTR_MLD, "%s: commit inm %p", __func__, inm); + CTR1(KTR_MLD, "%s: pre commit:", __func__); + in6m_print(inm); + + RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) { + ims->im6s_st[0] = ims->im6s_st[1]; + } + inm->in6m_st[0] = inm->in6m_st[1]; +} + +/* + * Reap unreferenced nodes from an in6_multi's filter set. + */ +static void +in6m_reap(struct in6_multi *inm) +{ + struct ip6_msource *ims, *tims; + + RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs, tims) { + if (ims->im6s_st[0].ex > 0 || ims->im6s_st[0].in > 0 || + ims->im6s_st[1].ex > 0 || ims->im6s_st[1].in > 0 || + ims->im6s_stp != 0) + continue; + CTR2(KTR_MLD, "%s: free ims %p", __func__, ims); + RB_REMOVE(ip6_msource_tree, &inm->in6m_srcs, ims); + free(ims, M_IP6MSOURCE); + inm->in6m_nsrc--; + } +} + +/* + * Purge all source nodes from an in6_multi's filter set. + */ +static void +in6m_purge(struct in6_multi *inm) +{ + struct ip6_msource *ims, *tims; + + RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs, tims) { + CTR2(KTR_MLD, "%s: free ims %p", __func__, ims); + RB_REMOVE(ip6_msource_tree, &inm->in6m_srcs, ims); + free(ims, M_IP6MSOURCE); + inm->in6m_nsrc--; + } +} + +/* + * Join a multicast address w/o sources. + * KAME compatibility entry point. + * + * SMPng: Assume no mc locks held by caller. + */ +struct in6_multi_mship * +in6_joingroup(struct ifnet *ifp, struct in6_addr *mcaddr, + int *errorp, int delay) +{ + struct in6_multi_mship *imm; + int error; + + imm = malloc(sizeof(*imm), M_IP6MADDR, M_NOWAIT); + if (imm == NULL) { + *errorp = ENOBUFS; + return (NULL); + } + + delay = (delay * PR_FASTHZ) / hz; + + error = in6_mc_join(ifp, mcaddr, NULL, &imm->i6mm_maddr, delay); + if (error) { + *errorp = error; + free(imm, M_IP6MADDR); + return (NULL); + } + + return (imm); +} + +/* + * Leave a multicast address w/o sources. + * KAME compatibility entry point. + * + * SMPng: Assume no mc locks held by caller. + */ +int +in6_leavegroup(struct in6_multi_mship *imm) +{ + + if (imm->i6mm_maddr != NULL) + in6_mc_leave(imm->i6mm_maddr, NULL); + free(imm, M_IP6MADDR); + return 0; +} + +/* + * Join a multicast group; unlocked entry point. + * + * SMPng: XXX: in6_mc_join() is called from in6_control() when upper + * locks are not held. Fortunately, ifp is unlikely to have been detached + * at this point, so we assume it's OK to recurse. + */ +int +in6_mc_join(struct ifnet *ifp, const struct in6_addr *mcaddr, + /*const*/ struct in6_mfilter *imf, struct in6_multi **pinm, + const int delay) +{ + int error; + + IN6_MULTI_LOCK(); + error = in6_mc_join_locked(ifp, mcaddr, imf, pinm, delay); + IN6_MULTI_UNLOCK(); + + return (error); +} + +/* + * Join a multicast group; real entry point. + * + * Only preserves atomicity at inm level. + * NOTE: imf argument cannot be const due to sys/tree.h limitations. + * + * If the MLD downcall fails, the group is not joined, and an error + * code is returned. + */ +int +in6_mc_join_locked(struct ifnet *ifp, const struct in6_addr *mcaddr, + /*const*/ struct in6_mfilter *imf, struct in6_multi **pinm, + const int delay) +{ + struct in6_mfilter timf; + struct in6_multi *inm; + int error; +#ifdef KTR + char ip6tbuf[INET6_ADDRSTRLEN]; +#endif + +#ifdef INVARIANTS + /* + * Sanity: Check scope zone ID was set for ifp, if and + * only if group is scoped to an interface. + */ + KASSERT(IN6_IS_ADDR_MULTICAST(mcaddr), + ("%s: not a multicast address", __func__)); + if (IN6_IS_ADDR_MC_LINKLOCAL(mcaddr) || + IN6_IS_ADDR_MC_INTFACELOCAL(mcaddr)) { + KASSERT(mcaddr->s6_addr16[1] != 0, + ("%s: scope zone ID not set", __func__)); + } +#endif + + IN6_MULTI_LOCK_ASSERT(); + + CTR4(KTR_MLD, "%s: join %s on %p(%s))", __func__, + ip6_sprintf(ip6tbuf, mcaddr), ifp, ifp->if_xname); + + error = 0; + inm = NULL; + + /* + * If no imf was specified (i.e. kernel consumer), + * fake one up and assume it is an ASM join. + */ + if (imf == NULL) { + im6f_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE); + imf = &timf; + } + + error = in6_mc_get(ifp, mcaddr, &inm); + if (error) { + CTR1(KTR_MLD, "%s: in6_mc_get() failure", __func__); + return (error); + } + + CTR1(KTR_MLD, "%s: merge inm state", __func__); + error = in6m_merge(inm, imf); + if (error) { + CTR1(KTR_MLD, "%s: failed to merge inm state", __func__); + goto out_in6m_release; + } + + CTR1(KTR_MLD, "%s: doing mld downcall", __func__); + error = mld_change_state(inm, delay); + if (error) { + CTR1(KTR_MLD, "%s: failed to update source", __func__); + goto out_in6m_release; + } + +out_in6m_release: + if (error) { + CTR2(KTR_MLD, "%s: dropping ref on %p", __func__, inm); + in6m_release_locked(inm); + } else { + *pinm = inm; + } + + return (error); +} + +/* + * Leave a multicast group; unlocked entry point. + */ +int +in6_mc_leave(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf) +{ + struct ifnet *ifp; + int error; + + ifp = inm->in6m_ifp; + + IN6_MULTI_LOCK(); + error = in6_mc_leave_locked(inm, imf); + IN6_MULTI_UNLOCK(); + + return (error); +} + +/* + * Leave a multicast group; real entry point. + * All source filters will be expunged. + * + * Only preserves atomicity at inm level. + * + * Holding the write lock for the INP which contains imf + * is highly advisable. We can't assert for it as imf does not + * contain a back-pointer to the owning inp. + * + * Note: This is not the same as in6m_release(*) as this function also + * makes a state change downcall into MLD. + */ +int +in6_mc_leave_locked(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf) +{ + struct in6_mfilter timf; + int error; +#ifdef KTR + char ip6tbuf[INET6_ADDRSTRLEN]; +#endif + + error = 0; + + IN6_MULTI_LOCK_ASSERT(); + + CTR5(KTR_MLD, "%s: leave inm %p, %s/%s, imf %p", __func__, + inm, ip6_sprintf(ip6tbuf, &inm->in6m_addr), + (in6m_is_ifp_detached(inm) ? "null" : inm->in6m_ifp->if_xname), + imf); + + /* + * If no imf was specified (i.e. kernel consumer), + * fake one up and assume it is an ASM join. + */ + if (imf == NULL) { + im6f_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED); + imf = &timf; + } + + /* + * Begin state merge transaction at MLD layer. + * + * As this particular invocation should not cause any memory + * to be allocated, and there is no opportunity to roll back + * the transaction, it MUST NOT fail. + */ + CTR1(KTR_MLD, "%s: merge inm state", __func__); + error = in6m_merge(inm, imf); + KASSERT(error == 0, ("%s: failed to merge inm state", __func__)); + + CTR1(KTR_MLD, "%s: doing mld downcall", __func__); + error = mld_change_state(inm, 0); + if (error) + CTR1(KTR_MLD, "%s: failed mld downcall", __func__); + + CTR2(KTR_MLD, "%s: dropping ref on %p", __func__, inm); + in6m_release_locked(inm); + + return (error); +} + +/* + * Block or unblock an ASM multicast source on an inpcb. + * This implements the delta-based API described in RFC 3678. + * + * The delta-based API applies only to exclusive-mode memberships. + * An MLD downcall will be performed. + * + * SMPng: NOTE: Must take Giant as a join may create a new ifma. + * + * Return 0 if successful, otherwise return an appropriate error code. + */ +static int +in6p_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) +{ + struct group_source_req gsr; + sockunion_t *gsa, *ssa; + struct ifnet *ifp; + struct in6_mfilter *imf; + struct ip6_moptions *imo; + struct in6_msource *ims; + struct in6_multi *inm; + size_t idx; + uint16_t fmode; + int error, doblock; +#ifdef KTR + char ip6tbuf[INET6_ADDRSTRLEN]; +#endif + + ifp = NULL; + error = 0; + doblock = 0; + + memset(&gsr, 0, sizeof(struct group_source_req)); + gsa = (sockunion_t *)&gsr.gsr_group; + ssa = (sockunion_t *)&gsr.gsr_source; + + switch (sopt->sopt_name) { + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + error = sooptcopyin(sopt, &gsr, + sizeof(struct group_source_req), + sizeof(struct group_source_req)); + if (error) + return (error); + + if (gsa->sin6.sin6_family != AF_INET6 || + gsa->sin6.sin6_len != sizeof(struct sockaddr_in6)) + return (EINVAL); + + if (ssa->sin6.sin6_family != AF_INET6 || + ssa->sin6.sin6_len != sizeof(struct sockaddr_in6)) + return (EINVAL); + + if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) + return (EADDRNOTAVAIL); + + ifp = ifnet_byindex(gsr.gsr_interface); + + if (sopt->sopt_name == MCAST_BLOCK_SOURCE) + doblock = 1; + break; + + default: + CTR2(KTR_MLD, "%s: unknown sopt_name %d", + __func__, sopt->sopt_name); + return (EOPNOTSUPP); + break; + } + + if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr)) + return (EINVAL); + + (void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL); + + /* + * Check if we are actually a member of this group. + */ + imo = in6p_findmoptions(inp); + idx = im6o_match_group(imo, ifp, &gsa->sa); + if (idx == -1 || imo->im6o_mfilters == NULL) { + error = EADDRNOTAVAIL; + goto out_in6p_locked; + } + + KASSERT(imo->im6o_mfilters != NULL, + ("%s: im6o_mfilters not allocated", __func__)); + imf = &imo->im6o_mfilters[idx]; + inm = imo->im6o_membership[idx]; + + /* + * Attempting to use the delta-based API on an + * non exclusive-mode membership is an error. + */ + fmode = imf->im6f_st[0]; + if (fmode != MCAST_EXCLUDE) { + error = EINVAL; + goto out_in6p_locked; + } + + /* + * Deal with error cases up-front: + * Asked to block, but already blocked; or + * Asked to unblock, but nothing to unblock. + * If adding a new block entry, allocate it. + */ + ims = im6o_match_source(imo, idx, &ssa->sa); + if ((ims != NULL && doblock) || (ims == NULL && !doblock)) { + CTR3(KTR_MLD, "%s: source %s %spresent", __func__, + ip6_sprintf(ip6tbuf, &ssa->sin6.sin6_addr), + doblock ? "" : "not "); + error = EADDRNOTAVAIL; + goto out_in6p_locked; + } + + INP_WLOCK_ASSERT(inp); + + /* + * Begin state merge transaction at socket layer. + */ + if (doblock) { + CTR2(KTR_MLD, "%s: %s source", __func__, "block"); + ims = im6f_graft(imf, fmode, &ssa->sin6); + if (ims == NULL) + error = ENOMEM; + } else { + CTR2(KTR_MLD, "%s: %s source", __func__, "allow"); + error = im6f_prune(imf, &ssa->sin6); + } + + if (error) { + CTR1(KTR_MLD, "%s: merge imf state failed", __func__); + goto out_im6f_rollback; + } + + /* + * Begin state merge transaction at MLD layer. + */ + IN6_MULTI_LOCK(); + + CTR1(KTR_MLD, "%s: merge inm state", __func__); + error = in6m_merge(inm, imf); + if (error) { + CTR1(KTR_MLD, "%s: failed to merge inm state", __func__); + goto out_im6f_rollback; + } + + CTR1(KTR_MLD, "%s: doing mld downcall", __func__); + error = mld_change_state(inm, 0); + if (error) + CTR1(KTR_MLD, "%s: failed mld downcall", __func__); + + IN6_MULTI_UNLOCK(); + +out_im6f_rollback: + if (error) + im6f_rollback(imf); + else + im6f_commit(imf); + + im6f_reap(imf); + +out_in6p_locked: + INP_WUNLOCK(inp); + return (error); +} + +/* + * Given an inpcb, return its multicast options structure pointer. Accepts + * an unlocked inpcb pointer, but will return it locked. May sleep. + * + * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. + * SMPng: NOTE: Returns with the INP write lock held. + */ +static struct ip6_moptions * +in6p_findmoptions(struct inpcb *inp) +{ + struct ip6_moptions *imo; + struct in6_multi **immp; + struct in6_mfilter *imfp; + size_t idx; + + INP_WLOCK(inp); + if (inp->in6p_moptions != NULL) + return (inp->in6p_moptions); + + INP_WUNLOCK(inp); + + imo = malloc(sizeof(*imo), M_IP6MOPTS, M_WAITOK); + immp = malloc(sizeof(*immp) * IPV6_MIN_MEMBERSHIPS, M_IP6MOPTS, + M_WAITOK | M_ZERO); + imfp = malloc(sizeof(struct in6_mfilter) * IPV6_MIN_MEMBERSHIPS, + M_IN6MFILTER, M_WAITOK); + + imo->im6o_multicast_ifp = NULL; + imo->im6o_multicast_hlim = V_ip6_defmcasthlim; + imo->im6o_multicast_loop = in6_mcast_loop; + imo->im6o_num_memberships = 0; + imo->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS; + imo->im6o_membership = immp; + + /* Initialize per-group source filters. */ + for (idx = 0; idx < IPV6_MIN_MEMBERSHIPS; idx++) + im6f_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE); + imo->im6o_mfilters = imfp; + + INP_WLOCK(inp); + if (inp->in6p_moptions != NULL) { + free(imfp, M_IN6MFILTER); + free(immp, M_IP6MOPTS); + free(imo, M_IP6MOPTS); + return (inp->in6p_moptions); + } + inp->in6p_moptions = imo; + return (imo); +} + +/* + * Discard the IPv6 multicast options (and source filters). + * + * SMPng: NOTE: assumes INP write lock is held. + */ +void +ip6_freemoptions(struct ip6_moptions *imo) +{ + struct in6_mfilter *imf; + size_t idx, nmships; + + KASSERT(imo != NULL, ("%s: ip6_moptions is NULL", __func__)); + + nmships = imo->im6o_num_memberships; + for (idx = 0; idx < nmships; ++idx) { + imf = imo->im6o_mfilters ? &imo->im6o_mfilters[idx] : NULL; + if (imf) + im6f_leave(imf); + /* XXX this will thrash the lock(s) */ + (void)in6_mc_leave(imo->im6o_membership[idx], imf); + if (imf) + im6f_purge(imf); + } + + if (imo->im6o_mfilters) + free(imo->im6o_mfilters, M_IN6MFILTER); + free(imo->im6o_membership, M_IP6MOPTS); + free(imo, M_IP6MOPTS); +} + +/* + * Atomically get source filters on a socket for an IPv6 multicast group. + * Called with INP lock held; returns with lock released. + */ +static int +in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt) +{ + struct __msfilterreq msfr; + sockunion_t *gsa; + struct ifnet *ifp; + struct ip6_moptions *imo; + struct in6_mfilter *imf; + struct ip6_msource *ims; + struct in6_msource *lims; + struct sockaddr_in6 *psin; + struct sockaddr_storage *ptss; + struct sockaddr_storage *tss; + int error; + size_t idx, nsrcs, ncsrcs; + + INP_WLOCK_ASSERT(inp); + + imo = inp->in6p_moptions; + KASSERT(imo != NULL, ("%s: null ip6_moptions", __func__)); + + INP_WUNLOCK(inp); + + error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), + sizeof(struct __msfilterreq)); + if (error) + return (error); + + if (msfr.msfr_group.ss_family != AF_INET6 || + msfr.msfr_group.ss_len != sizeof(struct sockaddr_in6)) + return (EINVAL); + + gsa = (sockunion_t *)&msfr.msfr_group; + if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr)) + return (EINVAL); + + if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) + return (EADDRNOTAVAIL); + ifp = ifnet_byindex(msfr.msfr_ifindex); + if (ifp == NULL) + return (EADDRNOTAVAIL); + (void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL); + + INP_WLOCK(inp); + + /* + * Lookup group on the socket. + */ + idx = im6o_match_group(imo, ifp, &gsa->sa); + if (idx == -1 || imo->im6o_mfilters == NULL) { + INP_WUNLOCK(inp); + return (EADDRNOTAVAIL); + } + imf = &imo->im6o_mfilters[idx]; + + /* + * Ignore memberships which are in limbo. + */ + if (imf->im6f_st[1] == MCAST_UNDEFINED) { + INP_WUNLOCK(inp); + return (EAGAIN); + } + msfr.msfr_fmode = imf->im6f_st[1]; + + /* + * If the user specified a buffer, copy out the source filter + * entries to userland gracefully. + * We only copy out the number of entries which userland + * has asked for, but we always tell userland how big the + * buffer really needs to be. + */ + tss = NULL; + if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) { + tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, + M_TEMP, M_NOWAIT | M_ZERO); + if (tss == NULL) { + INP_WUNLOCK(inp); + return (ENOBUFS); + } + } + + /* + * Count number of sources in-mode at t0. + * If buffer space exists and remains, copy out source entries. + */ + nsrcs = msfr.msfr_nsrcs; + ncsrcs = 0; + ptss = tss; + RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) { + lims = (struct in6_msource *)ims; + if (lims->im6sl_st[0] == MCAST_UNDEFINED || + lims->im6sl_st[0] != imf->im6f_st[0]) + continue; + ++ncsrcs; + if (tss != NULL && nsrcs > 0) { + psin = (struct sockaddr_in6 *)ptss; + psin->sin6_family = AF_INET6; + psin->sin6_len = sizeof(struct sockaddr_in6); + psin->sin6_addr = lims->im6s_addr; + psin->sin6_port = 0; + --nsrcs; + ++ptss; + } + } + + INP_WUNLOCK(inp); + + if (tss != NULL) { + error = copyout(tss, msfr.msfr_srcs, + sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); + free(tss, M_TEMP); + if (error) + return (error); + } + + msfr.msfr_nsrcs = ncsrcs; + error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq)); + + return (error); +} + +/* + * Return the IP multicast options in response to user getsockopt(). + */ +int +ip6_getmoptions(struct inpcb *inp, struct sockopt *sopt) +{ + struct ip6_moptions *im6o; + int error; + u_int optval; + + INP_WLOCK(inp); + im6o = inp->in6p_moptions; + /* + * If socket is neither of type SOCK_RAW or SOCK_DGRAM, + * or is a divert socket, reject it. + */ + if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || + (inp->inp_socket->so_proto->pr_type != SOCK_RAW && + inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { + INP_WUNLOCK(inp); + return (EOPNOTSUPP); + } + + error = 0; + switch (sopt->sopt_name) { + case IPV6_MULTICAST_IF: + if (im6o == NULL || im6o->im6o_multicast_ifp == NULL) { + optval = 0; + } else { + optval = im6o->im6o_multicast_ifp->if_index; + } + INP_WUNLOCK(inp); + error = sooptcopyout(sopt, &optval, sizeof(u_int)); + break; + + case IPV6_MULTICAST_HOPS: + if (im6o == NULL) + optval = V_ip6_defmcasthlim; + else + optval = im6o->im6o_multicast_loop; + INP_WUNLOCK(inp); + error = sooptcopyout(sopt, &optval, sizeof(u_int)); + break; + + case IPV6_MULTICAST_LOOP: + if (im6o == NULL) + optval = in6_mcast_loop; /* XXX VIMAGE */ + else + optval = im6o->im6o_multicast_loop; + INP_WUNLOCK(inp); + error = sooptcopyout(sopt, &optval, sizeof(u_int)); + break; + + case IPV6_MSFILTER: + if (im6o == NULL) { + error = EADDRNOTAVAIL; + INP_WUNLOCK(inp); + } else { + error = in6p_get_source_filters(inp, sopt); + } + break; + + default: + INP_WUNLOCK(inp); + error = ENOPROTOOPT; + break; + } + + INP_UNLOCK_ASSERT(inp); + + return (error); +} + +/* + * Look up the ifnet to use for a multicast group membership, + * given the address of an IPv6 group. + * + * This routine exists to support legacy IPv6 multicast applications. + * + * If inp is non-NULL, use this socket's current FIB number for any + * required FIB lookup. Look up the group address in the unicast FIB, + * and use its ifp; usually, this points to the default next-hop. + * If the FIB lookup fails, return NULL. + * + * FUTURE: Support multiple forwarding tables for IPv6. + * + * Returns NULL if no ifp could be found. + */ +static struct ifnet * +in6p_lookup_mcast_ifp(const struct inpcb *in6p __unused, + const struct sockaddr_in6 *gsin6) +{ + struct route_in6 ro6; + struct ifnet *ifp; + + KASSERT(in6p->inp_vflag & INP_IPV6, + ("%s: not INP_IPV6 inpcb", __func__)); + KASSERT(gsin6->sin6_family == AF_INET6, + ("%s: not AF_INET6 group", __func__)); + KASSERT(IN6_IS_ADDR_MULTICAST(&gsin6->sin6_addr), + ("%s: not multicast", __func__)); + + ifp = NULL; + memset(&ro6, 0, sizeof(struct route_in6)); + memcpy(&ro6.ro_dst, gsin6, sizeof(struct sockaddr_in6)); +#ifdef notyet + rtalloc_ign_fib(&ro6, 0, inp ? inp->inp_inc.inc_fibnum : 0); +#else + rtalloc_ign((struct route *)&ro6, 0); +#endif + if (ro6.ro_rt != NULL) { + ifp = ro6.ro_rt->rt_ifp; + KASSERT(ifp != NULL, ("%s: null ifp", __func__)); + RTFREE(ro6.ro_rt); + } + + return (ifp); +} + +/* + * Join an IPv6 multicast group, possibly with a source. + * + * FIXME: The KAME use of the unspecified address (::) + * to join *all* multicast groups is currently unsupported. + */ +static int +in6p_join_group(struct inpcb *inp, struct sockopt *sopt) +{ + struct group_source_req gsr; + sockunion_t *gsa, *ssa; + struct ifnet *ifp; + struct in6_mfilter *imf; + struct ip6_moptions *imo; + struct in6_multi *inm; + struct in6_msource *lims; + size_t idx; + int error, is_new; + + ifp = NULL; + imf = NULL; + lims = NULL; + error = 0; + is_new = 0; + + memset(&gsr, 0, sizeof(struct group_source_req)); + gsa = (sockunion_t *)&gsr.gsr_group; + gsa->ss.ss_family = AF_UNSPEC; + ssa = (sockunion_t *)&gsr.gsr_source; + ssa->ss.ss_family = AF_UNSPEC; + + /* + * Chew everything into struct group_source_req. + * Overwrite the port field if present, as the sockaddr + * being copied in may be matched with a binary comparison. + * Ignore passed-in scope ID. + */ + switch (sopt->sopt_name) { + case IPV6_JOIN_GROUP: { + struct ipv6_mreq mreq; + + error = sooptcopyin(sopt, &mreq, sizeof(struct ipv6_mreq), + sizeof(struct ipv6_mreq)); + if (error) + return (error); + + gsa->sin6.sin6_family = AF_INET6; + gsa->sin6.sin6_len = sizeof(struct sockaddr_in6); + gsa->sin6.sin6_addr = mreq.ipv6mr_multiaddr; + + if (mreq.ipv6mr_interface == 0) { + ifp = in6p_lookup_mcast_ifp(inp, &gsa->sin6); + } else { + if (mreq.ipv6mr_interface < 0 || + V_if_index < mreq.ipv6mr_interface) + return (EADDRNOTAVAIL); + ifp = ifnet_byindex(mreq.ipv6mr_interface); + } + CTR3(KTR_MLD, "%s: ipv6mr_interface = %d, ifp = %p", + __func__, mreq.ipv6mr_interface, ifp); + } break; + + case MCAST_JOIN_GROUP: + case MCAST_JOIN_SOURCE_GROUP: + if (sopt->sopt_name == MCAST_JOIN_GROUP) { + error = sooptcopyin(sopt, &gsr, + sizeof(struct group_req), + sizeof(struct group_req)); + } else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { + error = sooptcopyin(sopt, &gsr, + sizeof(struct group_source_req), + sizeof(struct group_source_req)); + } + if (error) + return (error); + + if (gsa->sin6.sin6_family != AF_INET6 || + gsa->sin6.sin6_len != sizeof(struct sockaddr_in6)) + return (EINVAL); + + if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { + if (ssa->sin6.sin6_family != AF_INET6 || + ssa->sin6.sin6_len != sizeof(struct sockaddr_in6)) + return (EINVAL); + if (IN6_IS_ADDR_MULTICAST(&ssa->sin6.sin6_addr)) + return (EINVAL); + /* + * TODO: Validate embedded scope ID in source + * list entry against passed-in ifp, if and only + * if source list filter entry is iface or node local. + */ + in6_clearscope(&ssa->sin6.sin6_addr); + ssa->sin6.sin6_port = 0; + ssa->sin6.sin6_scope_id = 0; + } + + if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) + return (EADDRNOTAVAIL); + ifp = ifnet_byindex(gsr.gsr_interface); + break; + + default: + CTR2(KTR_MLD, "%s: unknown sopt_name %d", + __func__, sopt->sopt_name); + return (EOPNOTSUPP); + break; + } + + if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr)) + return (EINVAL); + + if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) + return (EADDRNOTAVAIL); + + gsa->sin6.sin6_port = 0; + gsa->sin6.sin6_scope_id = 0; + + /* + * Always set the scope zone ID on memberships created from userland. + * Use the passed-in ifp to do this. + * XXX The in6_setscope() return value is meaningless. + * XXX SCOPE6_LOCK() is taken by in6_setscope(). + */ + (void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL); + + imo = in6p_findmoptions(inp); + idx = im6o_match_group(imo, ifp, &gsa->sa); + if (idx == -1) { + is_new = 1; + } else { + inm = imo->im6o_membership[idx]; + imf = &imo->im6o_mfilters[idx]; + if (ssa->ss.ss_family != AF_UNSPEC) { + /* + * MCAST_JOIN_SOURCE_GROUP on an exclusive membership + * is an error. On an existing inclusive membership, + * it just adds the source to the filter list. + */ + if (imf->im6f_st[1] != MCAST_INCLUDE) { + error = EINVAL; + goto out_in6p_locked; + } + /* + * Throw out duplicates. + * + * XXX FIXME: This makes a naive assumption that + * even if entries exist for *ssa in this imf, + * they will be rejected as dupes, even if they + * are not valid in the current mode (in-mode). + * + * in6_msource is transactioned just as for anything + * else in SSM -- but note naive use of in6m_graft() + * below for allocating new filter entries. + * + * This is only an issue if someone mixes the + * full-state SSM API with the delta-based API, + * which is discouraged in the relevant RFCs. + */ + lims = im6o_match_source(imo, idx, &ssa->sa); + if (lims != NULL /*&& + lims->im6sl_st[1] == MCAST_INCLUDE*/) { + error = EADDRNOTAVAIL; + goto out_in6p_locked; + } + } else { + /* + * MCAST_JOIN_GROUP alone, on any existing membership, + * is rejected, to stop the same inpcb tying up + * multiple refs to the in_multi. + * On an existing inclusive membership, this is also + * an error; if you want to change filter mode, + * you must use the userland API setsourcefilter(). + * XXX We don't reject this for imf in UNDEFINED + * state at t1, because allocation of a filter + * is atomic with allocation of a membership. + */ + error = EINVAL; + goto out_in6p_locked; + } + } + + /* + * Begin state merge transaction at socket layer. + */ + INP_WLOCK_ASSERT(inp); + + if (is_new) { + if (imo->im6o_num_memberships == imo->im6o_max_memberships) { + error = im6o_grow(imo); + if (error) + goto out_in6p_locked; + } + /* + * Allocate the new slot upfront so we can deal with + * grafting the new source filter in same code path + * as for join-source on existing membership. + */ + idx = imo->im6o_num_memberships; + imo->im6o_membership[idx] = NULL; + imo->im6o_num_memberships++; + KASSERT(imo->im6o_mfilters != NULL, + ("%s: im6f_mfilters vector was not allocated", __func__)); + imf = &imo->im6o_mfilters[idx]; + KASSERT(RB_EMPTY(&imf->im6f_sources), + ("%s: im6f_sources not empty", __func__)); + } + + /* + * Graft new source into filter list for this inpcb's + * membership of the group. The in6_multi may not have + * been allocated yet if this is a new membership, however, + * the in_mfilter slot will be allocated and must be initialized. + * + * Note: Grafting of exclusive mode filters doesn't happen + * in this path. + * XXX: Should check for non-NULL lims (node exists but may + * not be in-mode) for interop with full-state API. + */ + if (ssa->ss.ss_family != AF_UNSPEC) { + /* Membership starts in IN mode */ + if (is_new) { + CTR1(KTR_MLD, "%s: new join w/source", __func__); + im6f_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE); + } else { + CTR2(KTR_MLD, "%s: %s source", __func__, "allow"); + } + lims = im6f_graft(imf, MCAST_INCLUDE, &ssa->sin6); + if (lims == NULL) { + CTR1(KTR_MLD, "%s: merge imf state failed", + __func__); + error = ENOMEM; + goto out_im6o_free; + } + } else { + /* No address specified; Membership starts in EX mode */ + if (is_new) { + CTR1(KTR_MLD, "%s: new join w/o source", __func__); + im6f_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE); + } + } + + /* + * Begin state merge transaction at MLD layer. + */ + IN6_MULTI_LOCK(); + + if (is_new) { + error = in6_mc_join_locked(ifp, &gsa->sin6.sin6_addr, imf, + &inm, 0); + if (error) + goto out_im6o_free; + imo->im6o_membership[idx] = inm; + } else { + CTR1(KTR_MLD, "%s: merge inm state", __func__); + error = in6m_merge(inm, imf); + if (error) { + CTR1(KTR_MLD, "%s: failed to merge inm state", + __func__); + goto out_im6f_rollback; + } + CTR1(KTR_MLD, "%s: doing mld downcall", __func__); + error = mld_change_state(inm, 0); + if (error) { + CTR1(KTR_MLD, "%s: failed mld downcall", + __func__); + goto out_im6f_rollback; + } + } + + IN6_MULTI_UNLOCK(); + +out_im6f_rollback: + INP_WLOCK_ASSERT(inp); + if (error) { + im6f_rollback(imf); + if (is_new) + im6f_purge(imf); + else + im6f_reap(imf); + } else { + im6f_commit(imf); + } + +out_im6o_free: + if (error && is_new) { + imo->im6o_membership[idx] = NULL; + --imo->im6o_num_memberships; + } + +out_in6p_locked: + INP_WUNLOCK(inp); + return (error); +} + +/* + * Leave an IPv6 multicast group on an inpcb, possibly with a source. + */ +static int +in6p_leave_group(struct inpcb *inp, struct sockopt *sopt) +{ + struct ipv6_mreq mreq; + struct group_source_req gsr; + sockunion_t *gsa, *ssa; + struct ifnet *ifp; + struct in6_mfilter *imf; + struct ip6_moptions *imo; + struct in6_msource *ims; + struct in6_multi *inm; + uint32_t ifindex; + size_t idx; + int error, is_final; +#ifdef KTR + char ip6tbuf[INET6_ADDRSTRLEN]; +#endif + + ifp = NULL; + ifindex = 0; + error = 0; + is_final = 1; + + memset(&gsr, 0, sizeof(struct group_source_req)); + gsa = (sockunion_t *)&gsr.gsr_group; + gsa->ss.ss_family = AF_UNSPEC; + ssa = (sockunion_t *)&gsr.gsr_source; + ssa->ss.ss_family = AF_UNSPEC; + + /* + * Chew everything passed in up into a struct group_source_req + * as that is easier to process. + * Note: Any embedded scope ID in the multicast group passed + * in by userland is ignored, the interface index is the recommended + * mechanism to specify an interface; see below. + */ + switch (sopt->sopt_name) { + case IPV6_LEAVE_GROUP: + error = sooptcopyin(sopt, &mreq, sizeof(struct ipv6_mreq), + sizeof(struct ipv6_mreq)); + if (error) + return (error); + gsa->sin6.sin6_family = AF_INET6; + gsa->sin6.sin6_len = sizeof(struct sockaddr_in6); + gsa->sin6.sin6_addr = mreq.ipv6mr_multiaddr; + gsa->sin6.sin6_port = 0; + gsa->sin6.sin6_scope_id = 0; + ifindex = mreq.ipv6mr_interface; + break; + + case MCAST_LEAVE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + if (sopt->sopt_name == MCAST_LEAVE_GROUP) { + error = sooptcopyin(sopt, &gsr, + sizeof(struct group_req), + sizeof(struct group_req)); + } else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { + error = sooptcopyin(sopt, &gsr, + sizeof(struct group_source_req), + sizeof(struct group_source_req)); + } + if (error) + return (error); + + if (gsa->sin6.sin6_family != AF_INET6 || + gsa->sin6.sin6_len != sizeof(struct sockaddr_in6)) + return (EINVAL); + if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { + if (ssa->sin6.sin6_family != AF_INET6 || + ssa->sin6.sin6_len != sizeof(struct sockaddr_in6)) + return (EINVAL); + if (IN6_IS_ADDR_MULTICAST(&ssa->sin6.sin6_addr)) + return (EINVAL); + /* + * TODO: Validate embedded scope ID in source + * list entry against passed-in ifp, if and only + * if source list filter entry is iface or node local. + */ + in6_clearscope(&ssa->sin6.sin6_addr); + } + gsa->sin6.sin6_port = 0; + gsa->sin6.sin6_scope_id = 0; + ifindex = gsr.gsr_interface; + break; + + default: + CTR2(KTR_MLD, "%s: unknown sopt_name %d", + __func__, sopt->sopt_name); + return (EOPNOTSUPP); + break; + } + + if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr)) + return (EINVAL); + + /* + * Validate interface index if provided. If no interface index + * was provided separately, attempt to look the membership up + * from the default scope as a last resort to disambiguate + * the membership we are being asked to leave. + * XXX SCOPE6 lock potentially taken here. + */ + if (ifindex != 0) { + if (ifindex < 0 || V_if_index < ifindex) + return (EADDRNOTAVAIL); + ifp = ifnet_byindex(ifindex); + if (ifp == NULL) + return (EADDRNOTAVAIL); + (void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL); + } else { + error = sa6_embedscope(&gsa->sin6, V_ip6_use_defzone); + if (error) + return (EADDRNOTAVAIL); + /* + * Some badly behaved applications don't pass an ifindex + * or a scope ID, which is an API violation. In this case, + * perform a lookup as per a v6 join. + * + * XXX For now, stomp on zone ID for the corner case. + * This is not the 'KAME way', but we need to see the ifp + * directly until such time as this implementation is + * refactored, assuming the scope IDs are the way to go. + */ + ifindex = ntohs(gsa->sin6.sin6_addr.s6_addr16[1]); + if (ifindex == 0) { + CTR2(KTR_MLD, "%s: warning: no ifindex, looking up " + "ifp for group %s.", __func__, + ip6_sprintf(ip6tbuf, &gsa->sin6.sin6_addr)); + ifp = in6p_lookup_mcast_ifp(inp, &gsa->sin6); + } else { + ifp = ifnet_byindex(ifindex); + } + if (ifp == NULL) + return (EADDRNOTAVAIL); + } + + CTR2(KTR_MLD, "%s: ifp = %p", __func__, ifp); + KASSERT(ifp != NULL, ("%s: ifp did not resolve", __func__)); + + /* + * Find the membership in the membership array. + */ + imo = in6p_findmoptions(inp); + idx = im6o_match_group(imo, ifp, &gsa->sa); + if (idx == -1) { + error = EADDRNOTAVAIL; + goto out_in6p_locked; + } + inm = imo->im6o_membership[idx]; + imf = &imo->im6o_mfilters[idx]; + + if (ssa->ss.ss_family != AF_UNSPEC) + is_final = 0; + + /* + * Begin state merge transaction at socket layer. + */ + INP_WLOCK_ASSERT(inp); + + /* + * If we were instructed only to leave a given source, do so. + * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships. + */ + if (is_final) { + im6f_leave(imf); + } else { + if (imf->im6f_st[0] == MCAST_EXCLUDE) { + error = EADDRNOTAVAIL; + goto out_in6p_locked; + } + ims = im6o_match_source(imo, idx, &ssa->sa); + if (ims == NULL) { + CTR3(KTR_MLD, "%s: source %p %spresent", __func__, + ip6_sprintf(ip6tbuf, &ssa->sin6.sin6_addr), + "not "); + error = EADDRNOTAVAIL; + goto out_in6p_locked; + } + CTR2(KTR_MLD, "%s: %s source", __func__, "block"); + error = im6f_prune(imf, &ssa->sin6); + if (error) { + CTR1(KTR_MLD, "%s: merge imf state failed", + __func__); + goto out_in6p_locked; + } + } + + /* + * Begin state merge transaction at MLD layer. + */ + IN6_MULTI_LOCK(); + + if (is_final) { + /* + * Give up the multicast address record to which + * the membership points. + */ + (void)in6_mc_leave_locked(inm, imf); + } else { + CTR1(KTR_MLD, "%s: merge inm state", __func__); + error = in6m_merge(inm, imf); + if (error) { + CTR1(KTR_MLD, "%s: failed to merge inm state", + __func__); + goto out_im6f_rollback; + } + + CTR1(KTR_MLD, "%s: doing mld downcall", __func__); + error = mld_change_state(inm, 0); + if (error) { + CTR1(KTR_MLD, "%s: failed mld downcall", + __func__); + } + } + + IN6_MULTI_UNLOCK(); + +out_im6f_rollback: + if (error) + im6f_rollback(imf); + else + im6f_commit(imf); + + im6f_reap(imf); + + if (is_final) { + /* Remove the gap in the membership array. */ + for (++idx; idx < imo->im6o_num_memberships; ++idx) { + imo->im6o_membership[idx-1] = imo->im6o_membership[idx]; + imo->im6o_mfilters[idx-1] = imo->im6o_mfilters[idx]; + } + imo->im6o_num_memberships--; + } + +out_in6p_locked: + INP_WUNLOCK(inp); + return (error); +} + +/* + * Select the interface for transmitting IPv6 multicast datagrams. + * + * Either an instance of struct in6_addr or an instance of struct ipv6_mreqn + * may be passed to this socket option. An address of in6addr_any or an + * interface index of 0 is used to remove a previous selection. + * When no interface is selected, one is chosen for every send. + */ +static int +in6p_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) +{ + struct ifnet *ifp; + struct ip6_moptions *imo; + u_int ifindex; + int error; + + if (sopt->sopt_valsize != sizeof(u_int)) + return (EINVAL); + + error = sooptcopyin(sopt, &ifindex, sizeof(u_int), sizeof(u_int)); + if (error) + return (error); + if (ifindex < 0 || V_if_index < ifindex) + return (EINVAL); + + ifp = ifnet_byindex(ifindex); + if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) + return (EADDRNOTAVAIL); + + imo = in6p_findmoptions(inp); + imo->im6o_multicast_ifp = ifp; + INP_WUNLOCK(inp); + + return (0); +} + +/* + * Atomically set source filters on a socket for an IPv6 multicast group. + * + * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. + */ +static int +in6p_set_source_filters(struct inpcb *inp, struct sockopt *sopt) +{ + struct __msfilterreq msfr; + sockunion_t *gsa; + struct ifnet *ifp; + struct in6_mfilter *imf; + struct ip6_moptions *imo; + struct in6_multi *inm; + size_t idx; + int error; + + error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), + sizeof(struct __msfilterreq)); + if (error) + return (error); + + if (msfr.msfr_nsrcs > in6_mcast_maxsocksrc) + return (ENOBUFS); + + if (msfr.msfr_fmode != MCAST_EXCLUDE && + msfr.msfr_fmode != MCAST_INCLUDE) + return (EINVAL); + + if (msfr.msfr_group.ss_family != AF_INET6 || + msfr.msfr_group.ss_len != sizeof(struct sockaddr_in6)) + return (EINVAL); + + gsa = (sockunion_t *)&msfr.msfr_group; + if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr)) + return (EINVAL); + + gsa->sin6.sin6_port = 0; /* ignore port */ + + if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) + return (EADDRNOTAVAIL); + ifp = ifnet_byindex(msfr.msfr_ifindex); + if (ifp == NULL) + return (EADDRNOTAVAIL); + (void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL); + + /* + * Take the INP write lock. + * Check if this socket is a member of this group. + */ + imo = in6p_findmoptions(inp); + idx = im6o_match_group(imo, ifp, &gsa->sa); + if (idx == -1 || imo->im6o_mfilters == NULL) { + error = EADDRNOTAVAIL; + goto out_in6p_locked; + } + inm = imo->im6o_membership[idx]; + imf = &imo->im6o_mfilters[idx]; + + /* + * Begin state merge transaction at socket layer. + */ + INP_WLOCK_ASSERT(inp); + + imf->im6f_st[1] = msfr.msfr_fmode; + + /* + * Apply any new source filters, if present. + * Make a copy of the user-space source vector so + * that we may copy them with a single copyin. This + * allows us to deal with page faults up-front. + */ + if (msfr.msfr_nsrcs > 0) { + struct in6_msource *lims; + struct sockaddr_in6 *psin; + struct sockaddr_storage *kss, *pkss; + int i; + + INP_WUNLOCK(inp); + + CTR2(KTR_MLD, "%s: loading %lu source list entries", + __func__, (unsigned long)msfr.msfr_nsrcs); + kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, + M_TEMP, M_WAITOK); + error = copyin(msfr.msfr_srcs, kss, + sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); + if (error) { + free(kss, M_TEMP); + return (error); + } + + INP_WLOCK(inp); + + /* + * Mark all source filters as UNDEFINED at t1. + * Restore new group filter mode, as im6f_leave() + * will set it to INCLUDE. + */ + im6f_leave(imf); + imf->im6f_st[1] = msfr.msfr_fmode; + + /* + * Update socket layer filters at t1, lazy-allocating + * new entries. This saves a bunch of memory at the + * cost of one RB_FIND() per source entry; duplicate + * entries in the msfr_nsrcs vector are ignored. + * If we encounter an error, rollback transaction. + * + * XXX This too could be replaced with a set-symmetric + * difference like loop to avoid walking from root + * every time, as the key space is common. + */ + for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) { + psin = (struct sockaddr_in6 *)pkss; + if (psin->sin6_family != AF_INET6) { + error = EAFNOSUPPORT; + break; + } + if (psin->sin6_len != sizeof(struct sockaddr_in6)) { + error = EINVAL; + break; + } + if (IN6_IS_ADDR_MULTICAST(&psin->sin6_addr)) { + error = EINVAL; + break; + } + /* + * TODO: Validate embedded scope ID in source + * list entry against passed-in ifp, if and only + * if source list filter entry is iface or node local. + */ + in6_clearscope(&psin->sin6_addr); + error = im6f_get_source(imf, psin, &lims); + if (error) + break; + lims->im6sl_st[1] = imf->im6f_st[1]; + } + free(kss, M_TEMP); + } + + if (error) + goto out_im6f_rollback; + + INP_WLOCK_ASSERT(inp); + IN6_MULTI_LOCK(); + + /* + * Begin state merge transaction at MLD layer. + */ + CTR1(KTR_MLD, "%s: merge inm state", __func__); + error = in6m_merge(inm, imf); + if (error) { + CTR1(KTR_MLD, "%s: failed to merge inm state", __func__); + goto out_im6f_rollback; + } + + CTR1(KTR_MLD, "%s: doing mld downcall", __func__); + error = mld_change_state(inm, 0); + if (error) + CTR1(KTR_MLD, "%s: failed mld downcall", __func__); + + IN6_MULTI_UNLOCK(); + +out_im6f_rollback: + if (error) + im6f_rollback(imf); + else + im6f_commit(imf); + + im6f_reap(imf); + +out_in6p_locked: + INP_WUNLOCK(inp); + return (error); +} + +/* + * Set the IP multicast options in response to user setsockopt(). + * + * Many of the socket options handled in this function duplicate the + * functionality of socket options in the regular unicast API. However, + * it is not possible to merge the duplicate code, because the idempotence + * of the IPv6 multicast part of the BSD Sockets API must be preserved; + * the effects of these options must be treated as separate and distinct. + * + * SMPng: XXX: Unlocked read of inp_socket believed OK. + */ +int +ip6_setmoptions(struct inpcb *inp, struct sockopt *sopt) +{ + struct ip6_moptions *im6o; + int error; + + error = 0; + + /* + * If socket is neither of type SOCK_RAW or SOCK_DGRAM, + * or is a divert socket, reject it. + */ + if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || + (inp->inp_socket->so_proto->pr_type != SOCK_RAW && + inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) + return (EOPNOTSUPP); + + switch (sopt->sopt_name) { + case IPV6_MULTICAST_IF: + error = in6p_set_multicast_if(inp, sopt); + break; + + case IPV6_MULTICAST_HOPS: { + int hlim; + + if (sopt->sopt_valsize != sizeof(int)) { + error = EINVAL; + break; + } + error = sooptcopyin(sopt, &hlim, sizeof(hlim), sizeof(int)); + if (error) + break; + if (hlim < -1 || hlim > 255) { + error = EINVAL; + break; + } else if (hlim == -1) { + hlim = V_ip6_defmcasthlim; + } + im6o = in6p_findmoptions(inp); + im6o->im6o_multicast_hlim = hlim; + INP_WUNLOCK(inp); + break; + } + + case IPV6_MULTICAST_LOOP: { + u_int loop; + + /* + * Set the loopback flag for outgoing multicast packets. + * Must be zero or one. + */ + if (sopt->sopt_valsize != sizeof(u_int)) { + error = EINVAL; + break; + } + error = sooptcopyin(sopt, &loop, sizeof(u_int), sizeof(u_int)); + if (error) + break; + if (loop > 1) { + error = EINVAL; + break; + } + im6o = in6p_findmoptions(inp); + im6o->im6o_multicast_loop = loop; + INP_WUNLOCK(inp); + break; + } + + case IPV6_JOIN_GROUP: + case MCAST_JOIN_GROUP: + case MCAST_JOIN_SOURCE_GROUP: + error = in6p_join_group(inp, sopt); + break; + + case IPV6_LEAVE_GROUP: + case MCAST_LEAVE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + error = in6p_leave_group(inp, sopt); + break; + + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + error = in6p_block_unblock_source(inp, sopt); + break; + + case IPV6_MSFILTER: + error = in6p_set_source_filters(inp, sopt); + break; + + default: + error = EOPNOTSUPP; + break; + } + + INP_UNLOCK_ASSERT(inp); + + return (error); +} + +/* + * Expose MLD's multicast filter mode and source list(s) to userland, + * keyed by (ifindex, group). + * The filter mode is written out as a uint32_t, followed by + * 0..n of struct in6_addr. + * For use by ifmcstat(8). + * SMPng: NOTE: unlocked read of ifindex space. + */ +static int +sysctl_ip6_mcast_filters(SYSCTL_HANDLER_ARGS) +{ + struct in6_addr mcaddr; + struct in6_addr src; + struct ifnet *ifp; + struct ifmultiaddr *ifma; + struct in6_multi *inm; + struct ip6_msource *ims; + int *name; + int retval; + u_int namelen; + uint32_t fmode, ifindex; +#ifdef KTR + char ip6tbuf[INET6_ADDRSTRLEN]; +#endif + + name = (int *)arg1; + namelen = arg2; + + if (req->newptr != NULL) + return (EPERM); + + /* int: ifindex + 4 * 32 bits of IPv6 address */ + if (namelen != 5) + return (EINVAL); + + ifindex = name[0]; + if (ifindex <= 0 || ifindex > V_if_index) { + CTR2(KTR_MLD, "%s: ifindex %u out of range", + __func__, ifindex); + return (ENOENT); + } + + memcpy(&mcaddr, &name[1], sizeof(struct in6_addr)); + if (!IN6_IS_ADDR_MULTICAST(&mcaddr)) { + CTR2(KTR_MLD, "%s: group %s is not multicast", + __func__, ip6_sprintf(ip6tbuf, &mcaddr)); + return (EINVAL); + } + + ifp = ifnet_byindex(ifindex); + if (ifp == NULL) { + CTR2(KTR_MLD, "%s: no ifp for ifindex %u", + __func__, ifindex); + return (ENOENT); + } + /* + * Internal MLD lookups require that scope/zone ID is set. + */ + (void)in6_setscope(&mcaddr, ifp, NULL); + + retval = sysctl_wire_old_buffer(req, + sizeof(uint32_t) + (in6_mcast_maxgrpsrc * sizeof(struct in6_addr))); + if (retval) + return (retval); + + IN6_MULTI_LOCK(); + + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_INET6 || + ifma->ifma_protospec == NULL) + continue; + inm = (struct in6_multi *)ifma->ifma_protospec; + if (!IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, &mcaddr)) + continue; + fmode = inm->in6m_st[1].iss_fmode; + retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t)); + if (retval != 0) + break; + RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) { + CTR2(KTR_MLD, "%s: visit node %p", __func__, ims); + /* + * Only copy-out sources which are in-mode. + */ + if (fmode != im6s_get_mode(inm, ims, 1)) { + CTR1(KTR_MLD, "%s: skip non-in-mode", + __func__); + continue; + } + src = ims->im6s_addr; + retval = SYSCTL_OUT(req, &src, + sizeof(struct in6_addr)); + if (retval != 0) + break; + } + } + IF_ADDR_UNLOCK(ifp); + + IN6_MULTI_UNLOCK(); + + return (retval); +} + +#ifdef KTR + +static const char *in6m_modestrs[] = { "un", "in", "ex" }; + +static const char * +in6m_mode_str(const int mode) +{ + + if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE) + return (in6m_modestrs[mode]); + return ("??"); +} + +static const char *in6m_statestrs[] = { + "not-member", + "silent", + "idle", + "lazy", + "sleeping", + "awakening", + "query-pending", + "sg-query-pending", + "leaving" +}; + +static const char * +in6m_state_str(const int state) +{ + + if (state >= MLD_NOT_MEMBER && state <= MLD_LEAVING_MEMBER) + return (in6m_statestrs[state]); + return ("??"); +} + +/* + * Dump an in6_multi structure to the console. + */ +void +in6m_print(const struct in6_multi *inm) +{ + int t; + char ip6tbuf[INET6_ADDRSTRLEN]; + + if ((ktr_mask & KTR_MLD) == 0) + return; + + printf("%s: --- begin in6m %p ---\n", __func__, inm); + printf("addr %s ifp %p(%s) ifma %p\n", + ip6_sprintf(ip6tbuf, &inm->in6m_addr), + inm->in6m_ifp, + inm->in6m_ifp->if_xname, + inm->in6m_ifma); + printf("timer %u state %s refcount %u scq.len %u\n", + inm->in6m_timer, + in6m_state_str(inm->in6m_state), + inm->in6m_refcount, + inm->in6m_scq.ifq_len); + printf("mli %p nsrc %lu sctimer %u scrv %u\n", + inm->in6m_mli, + inm->in6m_nsrc, + inm->in6m_sctimer, + inm->in6m_scrv); + for (t = 0; t < 2; t++) { + printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t, + in6m_mode_str(inm->in6m_st[t].iss_fmode), + inm->in6m_st[t].iss_asm, + inm->in6m_st[t].iss_ex, + inm->in6m_st[t].iss_in, + inm->in6m_st[t].iss_rec); + } + printf("%s: --- end in6m %p ---\n", __func__, inm); +} + +#else /* !KTR */ + +void +in6m_print(const struct in6_multi *inm) +{ + +} + +#endif /* KTR */ diff --git a/freebsd/sys/netinet6/in6_pcb.c b/freebsd/sys/netinet6/in6_pcb.c new file mode 100644 index 00000000..5faccefd --- /dev/null +++ b/freebsd/sys/netinet6/in6_pcb.c @@ -0,0 +1,936 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: in6_pcb.c,v 1.31 2001/05/21 05:45:10 jinmei Exp $ + */ + +/*- + * Copyright (c) 1982, 1986, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/local/opt_inet.h> +#include <freebsd/local/opt_inet6.h> +#include <freebsd/local/opt_ipsec.h> + +#include <freebsd/sys/param.h> +#include <freebsd/sys/systm.h> +#include <freebsd/sys/malloc.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/domain.h> +#include <freebsd/sys/protosw.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/socketvar.h> +#include <freebsd/sys/sockio.h> +#include <freebsd/sys/errno.h> +#include <freebsd/sys/time.h> +#include <freebsd/sys/priv.h> +#include <freebsd/sys/proc.h> +#include <freebsd/sys/jail.h> + +#include <freebsd/vm/uma.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/if_types.h> +#include <freebsd/net/route.h> + +#include <freebsd/netinet/in.h> +#include <freebsd/netinet/in_var.h> +#include <freebsd/netinet/in_systm.h> +#include <freebsd/netinet/tcp_var.h> +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet/ip_var.h> + +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet6/nd6.h> +#include <freebsd/netinet/in_pcb.h> +#include <freebsd/netinet6/in6_pcb.h> +#include <freebsd/netinet6/scope6_var.h> + +#include <freebsd/security/mac/mac_framework.h> + +struct in6_addr zeroin6_addr; + +int +in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam, + struct ucred *cred) +{ + struct socket *so = inp->inp_socket; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL; + struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; + u_short lport = 0; + int error, wild = 0, reuseport = (so->so_options & SO_REUSEPORT); + + INP_INFO_WLOCK_ASSERT(pcbinfo); + INP_WLOCK_ASSERT(inp); + + if (TAILQ_EMPTY(&V_in6_ifaddrhead)) /* XXX broken! */ + return (EADDRNOTAVAIL); + if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) + return (EINVAL); + if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) + wild = INPLOOKUP_WILDCARD; + if (nam == NULL) { + if ((error = prison_local_ip6(cred, &inp->in6p_laddr, + ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) + return (error); + } else { + sin6 = (struct sockaddr_in6 *)nam; + if (nam->sa_len != sizeof(*sin6)) + return (EINVAL); + /* + * family check. + */ + if (nam->sa_family != AF_INET6) + return (EAFNOSUPPORT); + + if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) + return(error); + + if ((error = prison_local_ip6(cred, &sin6->sin6_addr, + ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) + return (error); + + lport = sin6->sin6_port; + if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { + /* + * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; + * allow compepte duplication of binding if + * SO_REUSEPORT is set, or if SO_REUSEADDR is set + * and a multicast address is bound on both + * new and duplicated sockets. + */ + if (so->so_options & SO_REUSEADDR) + reuseport = SO_REUSEADDR|SO_REUSEPORT; + } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + struct ifaddr *ifa; + + sin6->sin6_port = 0; /* yech... */ + if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin6)) == + NULL && + (inp->inp_flags & INP_BINDANY) == 0) { + return (EADDRNOTAVAIL); + } + + /* + * XXX: bind to an anycast address might accidentally + * cause sending a packet with anycast source address. + * We should allow to bind to a deprecated address, since + * the application dares to use it. + */ + if (ifa != NULL && + ((struct in6_ifaddr *)ifa)->ia6_flags & + (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) { + ifa_free(ifa); + return (EADDRNOTAVAIL); + } + if (ifa != NULL) + ifa_free(ifa); + } + if (lport) { + struct inpcb *t; + + /* GROSS */ + if (ntohs(lport) <= V_ipport_reservedhigh && + ntohs(lport) >= V_ipport_reservedlow && + priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, + 0)) + return (EACCES); + if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) && + priv_check_cred(inp->inp_cred, + PRIV_NETINET_REUSEPORT, 0) != 0) { + t = in6_pcblookup_local(pcbinfo, + &sin6->sin6_addr, lport, + INPLOOKUP_WILDCARD, cred); + if (t && + ((t->inp_flags & INP_TIMEWAIT) == 0) && + (so->so_type != SOCK_STREAM || + IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) && + (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || + !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) || + (t->inp_socket->so_options & SO_REUSEPORT) + == 0) && (inp->inp_cred->cr_uid != + t->inp_cred->cr_uid)) + return (EADDRINUSE); + if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && + IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + struct sockaddr_in sin; + + in6_sin6_2_sin(&sin, sin6); + t = in_pcblookup_local(pcbinfo, + sin.sin_addr, lport, + INPLOOKUP_WILDCARD, cred); + if (t && + ((t->inp_flags & + INP_TIMEWAIT) == 0) && + (so->so_type != SOCK_STREAM || + ntohl(t->inp_faddr.s_addr) == + INADDR_ANY) && + (inp->inp_cred->cr_uid != + t->inp_cred->cr_uid)) + return (EADDRINUSE); + } + } + t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr, + lport, wild, cred); + if (t && (reuseport & ((t->inp_flags & INP_TIMEWAIT) ? + intotw(t)->tw_so_options : + t->inp_socket->so_options)) == 0) + return (EADDRINUSE); + if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && + IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + struct sockaddr_in sin; + + in6_sin6_2_sin(&sin, sin6); + t = in_pcblookup_local(pcbinfo, sin.sin_addr, + lport, wild, cred); + if (t && t->inp_flags & INP_TIMEWAIT) { + if ((reuseport & + intotw(t)->tw_so_options) == 0 && + (ntohl(t->inp_laddr.s_addr) != + INADDR_ANY || ((inp->inp_vflag & + INP_IPV6PROTO) == + (t->inp_vflag & INP_IPV6PROTO)))) + return (EADDRINUSE); + } + else if (t && + (reuseport & t->inp_socket->so_options) + == 0 && (ntohl(t->inp_laddr.s_addr) != + INADDR_ANY || INP_SOCKAF(so) == + INP_SOCKAF(t->inp_socket))) + return (EADDRINUSE); + } + } + inp->in6p_laddr = sin6->sin6_addr; + } + if (lport == 0) { + if ((error = in6_pcbsetport(&inp->in6p_laddr, inp, cred)) != 0) + return (error); + } else { + inp->inp_lport = lport; + if (in_pcbinshash(inp) != 0) { + inp->in6p_laddr = in6addr_any; + inp->inp_lport = 0; + return (EAGAIN); + } + } + return (0); +} + +/* + * Transform old in6_pcbconnect() into an inner subroutine for new + * in6_pcbconnect(): Do some validity-checking on the remote + * address (in mbuf 'nam') and then determine local host address + * (i.e., which interface) to use to access that remote host. + * + * This preserves definition of in6_pcbconnect(), while supporting a + * slightly different version for T/TCP. (This is more than + * a bit of a kludge, but cleaning up the internal interfaces would + * have forced minor changes in every protocol). + */ +int +in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam, + struct in6_addr *plocal_addr6) +{ + register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; + int error = 0; + struct ifnet *ifp = NULL; + int scope_ambiguous = 0; + struct in6_addr in6a; + + INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); + INP_WLOCK_ASSERT(inp); + + if (nam->sa_len != sizeof (*sin6)) + return (EINVAL); + if (sin6->sin6_family != AF_INET6) + return (EAFNOSUPPORT); + if (sin6->sin6_port == 0) + return (EADDRNOTAVAIL); + + if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone) + scope_ambiguous = 1; + if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) + return(error); + + if (!TAILQ_EMPTY(&V_in6_ifaddrhead)) { + /* + * If the destination address is UNSPECIFIED addr, + * use the loopback addr, e.g ::1. + */ + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) + sin6->sin6_addr = in6addr_loopback; + } + if ((error = prison_remote_ip6(inp->inp_cred, &sin6->sin6_addr)) != 0) + return (error); + + error = in6_selectsrc(sin6, inp->in6p_outputopts, + inp, NULL, inp->inp_cred, &ifp, &in6a); + if (error) + return (error); + + if (ifp && scope_ambiguous && + (error = in6_setscope(&sin6->sin6_addr, ifp, NULL)) != 0) { + return(error); + } + + /* + * Do not update this earlier, in case we return with an error. + * + * XXX: this in6_selectsrc result might replace the bound local + * address with the address specified by setsockopt(IPV6_PKTINFO). + * Is it the intended behavior? + */ + *plocal_addr6 = in6a; + + /* + * Don't do pcblookup call here; return interface in + * plocal_addr6 + * and exit to caller, that will do the lookup. + */ + + return (0); +} + +/* + * Outer subroutine: + * Connect from a socket to a specified address. + * Both address and port must be specified in argument sin. + * If don't have a local address for this socket yet, + * then pick one. + */ +int +in6_pcbconnect(register struct inpcb *inp, struct sockaddr *nam, + struct ucred *cred) +{ + register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; + struct in6_addr addr6; + int error; + + INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); + INP_WLOCK_ASSERT(inp); + + /* + * Call inner routine, to assign local interface address. + * in6_pcbladdr() may automatically fill in sin6_scope_id. + */ + if ((error = in6_pcbladdr(inp, nam, &addr6)) != 0) + return (error); + + if (in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr, + sin6->sin6_port, + IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) + ? &addr6 : &inp->in6p_laddr, + inp->inp_lport, 0, NULL) != NULL) { + return (EADDRINUSE); + } + if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { + if (inp->inp_lport == 0) { + error = in6_pcbbind(inp, (struct sockaddr *)0, cred); + if (error) + return (error); + } + inp->in6p_laddr = addr6; + } + inp->in6p_faddr = sin6->sin6_addr; + inp->inp_fport = sin6->sin6_port; + /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ + inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; + if (inp->inp_flags & IN6P_AUTOFLOWLABEL) + inp->inp_flow |= + (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); + + in_pcbrehash(inp); + + return (0); +} + +void +in6_pcbdisconnect(struct inpcb *inp) +{ + + INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); + INP_WLOCK_ASSERT(inp); + + bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr)); + inp->inp_fport = 0; + /* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ + inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; + in_pcbrehash(inp); +} + +struct sockaddr * +in6_sockaddr(in_port_t port, struct in6_addr *addr_p) +{ + struct sockaddr_in6 *sin6; + + sin6 = malloc(sizeof *sin6, M_SONAME, M_WAITOK); + bzero(sin6, sizeof *sin6); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + sin6->sin6_port = port; + sin6->sin6_addr = *addr_p; + (void)sa6_recoverscope(sin6); /* XXX: should catch errors */ + + return (struct sockaddr *)sin6; +} + +struct sockaddr * +in6_v4mapsin6_sockaddr(in_port_t port, struct in_addr *addr_p) +{ + struct sockaddr_in sin; + struct sockaddr_in6 *sin6_p; + + bzero(&sin, sizeof sin); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(sin); + sin.sin_port = port; + sin.sin_addr = *addr_p; + + sin6_p = malloc(sizeof *sin6_p, M_SONAME, + M_WAITOK); + in6_sin_2_v4mapsin6(&sin, sin6_p); + + return (struct sockaddr *)sin6_p; +} + +int +in6_getsockaddr(struct socket *so, struct sockaddr **nam) +{ + register struct inpcb *inp; + struct in6_addr addr; + in_port_t port; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("in6_getsockaddr: inp == NULL")); + + INP_RLOCK(inp); + port = inp->inp_lport; + addr = inp->in6p_laddr; + INP_RUNLOCK(inp); + + *nam = in6_sockaddr(port, &addr); + return 0; +} + +int +in6_getpeeraddr(struct socket *so, struct sockaddr **nam) +{ + struct inpcb *inp; + struct in6_addr addr; + in_port_t port; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("in6_getpeeraddr: inp == NULL")); + + INP_RLOCK(inp); + port = inp->inp_fport; + addr = inp->in6p_faddr; + INP_RUNLOCK(inp); + + *nam = in6_sockaddr(port, &addr); + return 0; +} + +int +in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam) +{ + struct inpcb *inp; + int error; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("in6_mapped_sockaddr: inp == NULL")); + + if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) { + error = in_getsockaddr(so, nam); + if (error == 0) + in6_sin_2_v4mapsin6_in_sock(nam); + } else { + /* scope issues will be handled in in6_getsockaddr(). */ + error = in6_getsockaddr(so, nam); + } + + return error; +} + +int +in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam) +{ + struct inpcb *inp; + int error; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("in6_mapped_peeraddr: inp == NULL")); + + if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) { + error = in_getpeeraddr(so, nam); + if (error == 0) + in6_sin_2_v4mapsin6_in_sock(nam); + } else + /* scope issues will be handled in in6_getpeeraddr(). */ + error = in6_getpeeraddr(so, nam); + + return error; +} + +/* + * Pass some notification to all connections of a protocol + * associated with address dst. The local address and/or port numbers + * may be specified to limit the search. The "usual action" will be + * taken, depending on the ctlinput cmd. The caller must filter any + * cmds that are uninteresting (e.g., no error in the map). + * Call the protocol specific routine (if any) to report + * any errors for each matching socket. + */ +void +in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst, + u_int fport_arg, const struct sockaddr *src, u_int lport_arg, + int cmd, void *cmdarg, + struct inpcb *(*notify)(struct inpcb *, int)) +{ + struct inpcb *inp, *inp_temp; + struct sockaddr_in6 sa6_src, *sa6_dst; + u_short fport = fport_arg, lport = lport_arg; + u_int32_t flowinfo; + int errno; + + if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6) + return; + + sa6_dst = (struct sockaddr_in6 *)dst; + if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr)) + return; + + /* + * note that src can be NULL when we get notify by local fragmentation. + */ + sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src; + flowinfo = sa6_src.sin6_flowinfo; + + /* + * Redirects go to all references to the destination, + * and use in6_rtchange to invalidate the route cache. + * Dead host indications: also use in6_rtchange to invalidate + * the cache, and deliver the error to all the sockets. + * Otherwise, if we have knowledge of the local port and address, + * deliver only to that socket. + */ + if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) { + fport = 0; + lport = 0; + bzero((caddr_t)&sa6_src.sin6_addr, sizeof(sa6_src.sin6_addr)); + + if (cmd != PRC_HOSTDEAD) + notify = in6_rtchange; + } + errno = inet6ctlerrmap[cmd]; + INP_INFO_WLOCK(pcbinfo); + LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) { + INP_WLOCK(inp); + if ((inp->inp_vflag & INP_IPV6) == 0) { + INP_WUNLOCK(inp); + continue; + } + + /* + * If the error designates a new path MTU for a destination + * and the application (associated with this socket) wanted to + * know the value, notify. Note that we notify for all + * disconnected sockets if the corresponding application + * wanted. This is because some UDP applications keep sending + * sockets disconnected. + * XXX: should we avoid to notify the value to TCP sockets? + */ + if (cmd == PRC_MSGSIZE && (inp->inp_flags & IN6P_MTU) != 0 && + (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || + IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &sa6_dst->sin6_addr))) { + ip6_notify_pmtu(inp, (struct sockaddr_in6 *)dst, + (u_int32_t *)cmdarg); + } + + /* + * Detect if we should notify the error. If no source and + * destination ports are specifed, but non-zero flowinfo and + * local address match, notify the error. This is the case + * when the error is delivered with an encrypted buffer + * by ESP. Otherwise, just compare addresses and ports + * as usual. + */ + if (lport == 0 && fport == 0 && flowinfo && + inp->inp_socket != NULL && + flowinfo == (inp->inp_flow & IPV6_FLOWLABEL_MASK) && + IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr)) + goto do_notify; + else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, + &sa6_dst->sin6_addr) || + inp->inp_socket == 0 || + (lport && inp->inp_lport != lport) || + (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) && + !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, + &sa6_src.sin6_addr)) || + (fport && inp->inp_fport != fport)) { + INP_WUNLOCK(inp); + continue; + } + + do_notify: + if (notify) { + if ((*notify)(inp, errno)) + INP_WUNLOCK(inp); + } else + INP_WUNLOCK(inp); + } + INP_INFO_WUNLOCK(pcbinfo); +} + +/* + * Lookup a PCB based on the local address and port. + */ +struct inpcb * +in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr, + u_short lport, int wild_okay, struct ucred *cred) +{ + register struct inpcb *inp; + int matchwild = 3, wildcard; + + INP_INFO_WLOCK_ASSERT(pcbinfo); + + if (!wild_okay) { + struct inpcbhead *head; + /* + * Look for an unconnected (wildcard foreign addr) PCB that + * matches the local address and port we're looking for. + */ + head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, + 0, pcbinfo->ipi_hashmask)]; + LIST_FOREACH(inp, head, inp_hash) { + /* XXX inp locking */ + if ((inp->inp_vflag & INP_IPV6) == 0) + continue; + if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && + IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && + inp->inp_lport == lport) { + /* Found. */ + if (cred == NULL || + prison_equal_ip6(cred->cr_prison, + inp->inp_cred->cr_prison)) + return (inp); + } + } + /* + * Not found. + */ + return (NULL); + } else { + struct inpcbporthead *porthash; + struct inpcbport *phd; + struct inpcb *match = NULL; + /* + * Best fit PCB lookup. + * + * First see if this local port is in use by looking on the + * port hash list. + */ + porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, + pcbinfo->ipi_porthashmask)]; + LIST_FOREACH(phd, porthash, phd_hash) { + if (phd->phd_port == lport) + break; + } + if (phd != NULL) { + /* + * Port is in use by one or more PCBs. Look for best + * fit. + */ + LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { + wildcard = 0; + if (cred != NULL && + !prison_equal_ip6(cred->cr_prison, + inp->inp_cred->cr_prison)) + continue; + /* XXX inp locking */ + if ((inp->inp_vflag & INP_IPV6) == 0) + continue; + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) + wildcard++; + if (!IN6_IS_ADDR_UNSPECIFIED( + &inp->in6p_laddr)) { + if (IN6_IS_ADDR_UNSPECIFIED(laddr)) + wildcard++; + else if (!IN6_ARE_ADDR_EQUAL( + &inp->in6p_laddr, laddr)) + continue; + } else { + if (!IN6_IS_ADDR_UNSPECIFIED(laddr)) + wildcard++; + } + if (wildcard < matchwild) { + match = inp; + matchwild = wildcard; + if (matchwild == 0) + break; + } + } + } + return (match); + } +} + +void +in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) +{ + struct inpcb *in6p; + struct ip6_moptions *im6o; + int i, gap; + + INP_INFO_RLOCK(pcbinfo); + LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) { + INP_WLOCK(in6p); + im6o = in6p->in6p_moptions; + if ((in6p->inp_vflag & INP_IPV6) && im6o != NULL) { + /* + * Unselect the outgoing ifp for multicast if it + * is being detached. + */ + if (im6o->im6o_multicast_ifp == ifp) + im6o->im6o_multicast_ifp = NULL; + /* + * Drop multicast group membership if we joined + * through the interface being detached. + */ + gap = 0; + for (i = 0; i < im6o->im6o_num_memberships; i++) { + if (im6o->im6o_membership[i]->in6m_ifp == + ifp) { + in6_mc_leave(im6o->im6o_membership[i], + NULL); + gap++; + } else if (gap != 0) { + im6o->im6o_membership[i - gap] = + im6o->im6o_membership[i]; + } + } + im6o->im6o_num_memberships -= gap; + } + INP_WUNLOCK(in6p); + } + INP_INFO_RUNLOCK(pcbinfo); +} + +/* + * Check for alternatives when higher level complains + * about service problems. For now, invalidate cached + * routing information. If the route was created dynamically + * (by a redirect), time to try a default gateway again. + */ +void +in6_losing(struct inpcb *in6p) +{ + + /* + * We don't store route pointers in the routing table anymore + */ + return; +} + +/* + * After a routing change, flush old routing + * and allocate a (hopefully) better one. + */ +struct inpcb * +in6_rtchange(struct inpcb *inp, int errno) +{ + /* + * We don't store route pointers in the routing table anymore + */ + return inp; +} + +/* + * Lookup PCB in hash list. + */ +struct inpcb * +in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, + u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, int wildcard, + struct ifnet *ifp) +{ + struct inpcbhead *head; + struct inpcb *inp, *tmpinp; + u_short fport = fport_arg, lport = lport_arg; + int faith; + + INP_INFO_LOCK_ASSERT(pcbinfo); + + if (faithprefix_p != NULL) + faith = (*faithprefix_p)(laddr); + else + faith = 0; + + /* + * First look for an exact match. + */ + tmpinp = NULL; + head = &pcbinfo->ipi_hashbase[ + INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport, + pcbinfo->ipi_hashmask)]; + LIST_FOREACH(inp, head, inp_hash) { + /* XXX inp locking */ + if ((inp->inp_vflag & INP_IPV6) == 0) + continue; + if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && + IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && + inp->inp_fport == fport && + inp->inp_lport == lport) { + /* + * XXX We should be able to directly return + * the inp here, without any checks. + * Well unless both bound with SO_REUSEPORT? + */ + if (prison_flag(inp->inp_cred, PR_IP6)) + return (inp); + if (tmpinp == NULL) + tmpinp = inp; + } + } + if (tmpinp != NULL) + return (tmpinp); + + /* + * Then look for a wildcard match, if requested. + */ + if (wildcard == INPLOOKUP_WILDCARD) { + struct inpcb *local_wild = NULL, *local_exact = NULL; + struct inpcb *jail_wild = NULL; + int injail; + + /* + * Order of socket selection - we always prefer jails. + * 1. jailed, non-wild. + * 2. jailed, wild. + * 3. non-jailed, non-wild. + * 4. non-jailed, wild. + */ + head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, + 0, pcbinfo->ipi_hashmask)]; + LIST_FOREACH(inp, head, inp_hash) { + /* XXX inp locking */ + if ((inp->inp_vflag & INP_IPV6) == 0) + continue; + + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || + inp->inp_lport != lport) { + continue; + } + + /* XXX inp locking */ + if (faith && (inp->inp_flags & INP_FAITH) == 0) + continue; + + injail = prison_flag(inp->inp_cred, PR_IP6); + if (injail) { + if (prison_check_ip6(inp->inp_cred, + laddr) != 0) + continue; + } else { + if (local_exact != NULL) + continue; + } + + if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) { + if (injail) + return (inp); + else + local_exact = inp; + } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { + if (injail) + jail_wild = inp; + else + local_wild = inp; + } + } /* LIST_FOREACH */ + + if (jail_wild != NULL) + return (jail_wild); + if (local_exact != NULL) + return (local_exact); + if (local_wild != NULL) + return (local_wild); + } /* if (wildcard == INPLOOKUP_WILDCARD) */ + + /* + * Not found. + */ + return (NULL); +} + +void +init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m) +{ + struct ip6_hdr *ip; + + ip = mtod(m, struct ip6_hdr *); + bzero(sin6, sizeof(*sin6)); + sin6->sin6_len = sizeof(*sin6); + sin6->sin6_family = AF_INET6; + sin6->sin6_addr = ip->ip6_src; + + (void)sa6_recoverscope(sin6); /* XXX: should catch errors... */ + + return; +} diff --git a/freebsd/sys/netinet6/in6_pcb.h b/freebsd/sys/netinet6/in6_pcb.h new file mode 100644 index 00000000..abc4a318 --- /dev/null +++ b/freebsd/sys/netinet6/in6_pcb.h @@ -0,0 +1,109 @@ +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: in6_pcb.h,v 1.13 2001/02/06 09:16:53 itojun Exp $ + */ + +/*- + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_pcb.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD$ + */ + +#ifndef _NETINET6_IN6_PCB_HH_ +#define _NETINET6_IN6_PCB_HH_ + +#ifdef _KERNEL +#define satosin6(sa) ((struct sockaddr_in6 *)(sa)) +#define sin6tosa(sin6) ((struct sockaddr *)(sin6)) +#define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) + +void in6_pcbpurgeif0 __P((struct inpcbinfo *, struct ifnet *)); +void in6_losing __P((struct inpcb *)); +int in6_pcbbind __P((struct inpcb *, struct sockaddr *, struct ucred *)); +int in6_pcbconnect __P((struct inpcb *, struct sockaddr *, struct ucred *)); +void in6_pcbdisconnect __P((struct inpcb *)); +int in6_pcbladdr(struct inpcb *, struct sockaddr *, struct in6_addr *); +struct inpcb * + in6_pcblookup_local __P((struct inpcbinfo *, + struct in6_addr *, u_short, int, + struct ucred *)); +struct inpcb * + in6_pcblookup_hash __P((struct inpcbinfo *, + struct in6_addr *, u_int, struct in6_addr *, + u_int, int, struct ifnet *)); +void in6_pcbnotify __P((struct inpcbinfo *, struct sockaddr *, + u_int, const struct sockaddr *, u_int, int, void *, + struct inpcb *(*)(struct inpcb *, int))); +#ifndef __rtems__ +struct inpcb * + in6_rtchange __P((struct inpcb *, int)); +#else +struct inpcb * + in6_rtchange(struct inpcb *inp, int errno); +#endif +struct sockaddr * + in6_sockaddr __P((in_port_t port, struct in6_addr *addr_p)); +struct sockaddr * + in6_v4mapsin6_sockaddr __P((in_port_t port, struct in_addr *addr_p)); +int in6_getpeeraddr __P((struct socket *so, struct sockaddr **nam)); +int in6_getsockaddr __P((struct socket *so, struct sockaddr **nam)); +int in6_mapped_sockaddr __P((struct socket *so, struct sockaddr **nam)); +int in6_mapped_peeraddr __P((struct socket *so, struct sockaddr **nam)); +int in6_selecthlim __P((struct in6pcb *, struct ifnet *)); +int in6_pcbsetport __P((struct in6_addr *, struct inpcb *, struct ucred *)); +void init_sin6 __P((struct sockaddr_in6 *sin6, struct mbuf *m)); +#endif /* _KERNEL */ + +#endif /* !_NETINET6_IN6_PCB_HH_ */ diff --git a/freebsd/sys/netinet6/in6_proto.c b/freebsd/sys/netinet6/in6_proto.c new file mode 100644 index 00000000..0cd0858d --- /dev/null +++ b/freebsd/sys/netinet6/in6_proto.c @@ -0,0 +1,597 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: in6_proto.c,v 1.91 2001/05/27 13:28:35 itojun Exp $ + */ + +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_proto.c 8.1 (Berkeley) 6/10/93 + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/local/opt_inet.h> +#include <freebsd/local/opt_inet6.h> +#include <freebsd/local/opt_ipsec.h> +#include <freebsd/local/opt_ipstealth.h> +#include <freebsd/local/opt_sctp.h> +#include <freebsd/local/opt_mpath.h> + +#include <freebsd/sys/param.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/socketvar.h> +#include <freebsd/sys/proc.h> +#include <freebsd/sys/protosw.h> +#include <freebsd/sys/jail.h> +#include <freebsd/sys/kernel.h> +#include <freebsd/sys/domain.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/systm.h> +#include <freebsd/sys/sysctl.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/radix.h> +#include <freebsd/net/route.h> +#ifdef RADIX_MPATH +#include <freebsd/net/radix_mpath.h> +#endif + +#include <freebsd/netinet/in.h> +#include <freebsd/netinet/in_systm.h> +#include <freebsd/netinet/in_var.h> +#include <freebsd/netinet/ip_encap.h> +#include <freebsd/netinet/ip.h> +#include <freebsd/netinet/ip_var.h> +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet/icmp6.h> + +#include <freebsd/netinet/tcp.h> +#include <freebsd/netinet/tcp_timer.h> +#include <freebsd/netinet/tcp_var.h> +#include <freebsd/netinet/udp.h> +#include <freebsd/netinet/udp_var.h> +#include <freebsd/netinet6/tcp6_var.h> +#include <freebsd/netinet6/raw_ip6.h> +#include <freebsd/netinet6/udp6_var.h> +#include <freebsd/netinet6/pim6_var.h> +#include <freebsd/netinet6/nd6.h> + +#ifdef SCTP +#include <freebsd/netinet/in_pcb.h> +#include <freebsd/netinet/sctp_pcb.h> +#include <freebsd/netinet/sctp.h> +#include <freebsd/netinet/sctp_var.h> +#include <freebsd/netinet6/sctp6_var.h> +#endif /* SCTP */ + +#ifdef IPSEC +#include <freebsd/netipsec/ipsec.h> +#include <freebsd/netipsec/ipsec6.h> +#endif /* IPSEC */ + +#include <freebsd/netinet6/ip6protosw.h> + +/* + * TCP/IP protocol family: IP6, ICMP6, UDP, TCP. + */ + +extern struct domain inet6domain; +static struct pr_usrreqs nousrreqs; + +#define PR_LISTEN 0 +#define PR_ABRTACPTDIS 0 + +/* Spacer for loadable protocols. */ +#define IP6PROTOSPACER \ +{ \ + .pr_domain = &inet6domain, \ + .pr_protocol = PROTO_SPACER, \ + .pr_usrreqs = &nousrreqs \ +} + +struct ip6protosw inet6sw[] = { +{ + .pr_type = 0, + .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_IPV6, + .pr_init = ip6_init, +#ifdef VIMAGE + .pr_destroy = ip6_destroy, +#endif + .pr_slowtimo = frag6_slowtimo, + .pr_drain = frag6_drain, + .pr_usrreqs = &nousrreqs, +}, +{ + .pr_type = SOCK_DGRAM, + .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_UDP, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = udp6_input, + .pr_ctlinput = udp6_ctlinput, + .pr_ctloutput = ip6_ctloutput, + .pr_usrreqs = &udp6_usrreqs, +}, +{ + .pr_type = SOCK_STREAM, + .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_TCP, + .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN, + .pr_input = tcp6_input, + .pr_ctlinput = tcp6_ctlinput, + .pr_ctloutput = tcp_ctloutput, +#ifndef INET /* don't call initialization and timeout routines twice */ + .pr_init = tcp_init, + .pr_slowtimo = tcp_slowtimo, +#endif + .pr_drain = tcp_drain, + .pr_usrreqs = &tcp6_usrreqs, +}, +#ifdef SCTP +{ + .pr_type = SOCK_DGRAM, + .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_SCTP, + .pr_flags = PR_WANTRCVD, + .pr_input = sctp6_input, + .pr_ctlinput = sctp6_ctlinput, + .pr_ctloutput = sctp_ctloutput, + .pr_drain = sctp_drain, + .pr_usrreqs = &sctp6_usrreqs +}, +{ + .pr_type = SOCK_SEQPACKET, + .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_SCTP, + .pr_flags = PR_WANTRCVD, + .pr_input = sctp6_input, + .pr_ctlinput = sctp6_ctlinput, + .pr_ctloutput = sctp_ctloutput, + .pr_drain = sctp_drain, + .pr_usrreqs = &sctp6_usrreqs +}, + +{ + .pr_type = SOCK_STREAM, + .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_SCTP, + .pr_flags = PR_WANTRCVD, + .pr_input = sctp6_input, + .pr_ctlinput = sctp6_ctlinput, + .pr_ctloutput = sctp_ctloutput, + .pr_drain = sctp_drain, + .pr_usrreqs = &sctp6_usrreqs +}, +#endif /* SCTP */ +{ + .pr_type = SOCK_RAW, + .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_RAW, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = rip6_input, + .pr_output = rip6_output, + .pr_ctlinput = rip6_ctlinput, + .pr_ctloutput = rip6_ctloutput, + .pr_usrreqs = &rip6_usrreqs +}, +{ + .pr_type = SOCK_RAW, + .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_ICMPV6, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = icmp6_input, + .pr_output = rip6_output, + .pr_ctlinput = rip6_ctlinput, + .pr_ctloutput = rip6_ctloutput, + .pr_fasttimo = icmp6_fasttimo, + .pr_slowtimo = icmp6_slowtimo, + .pr_usrreqs = &rip6_usrreqs +}, +{ + .pr_type = SOCK_RAW, + .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_DSTOPTS, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = dest6_input, + .pr_usrreqs = &nousrreqs +}, +{ + .pr_type = SOCK_RAW, + .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_ROUTING, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = route6_input, + .pr_usrreqs = &nousrreqs +}, +{ + .pr_type = SOCK_RAW, + .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_FRAGMENT, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = frag6_input, + .pr_usrreqs = &nousrreqs +}, +#ifdef IPSEC +{ + .pr_type = SOCK_RAW, + .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_AH, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = ipsec6_common_input, + .pr_usrreqs = &nousrreqs, +}, +{ + .pr_type = SOCK_RAW, + .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_ESP, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = ipsec6_common_input, + .pr_ctlinput = esp6_ctlinput, + .pr_usrreqs = &nousrreqs, +}, +{ + .pr_type = SOCK_RAW, + .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_IPCOMP, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = ipsec6_common_input, + .pr_usrreqs = &nousrreqs, +}, +#endif /* IPSEC */ +#ifdef INET +{ + .pr_type = SOCK_RAW, + .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_IPV4, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = encap6_input, + .pr_output = rip6_output, + .pr_ctloutput = rip6_ctloutput, + .pr_init = encap_init, + .pr_usrreqs = &rip6_usrreqs +}, +#endif /* INET */ +{ + .pr_type = SOCK_RAW, + .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_IPV6, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = encap6_input, + .pr_output = rip6_output, + .pr_ctloutput = rip6_ctloutput, + .pr_init = encap_init, + .pr_usrreqs = &rip6_usrreqs +}, +{ + .pr_type = SOCK_RAW, + .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_PIM, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = encap6_input, + .pr_output = rip6_output, + .pr_ctloutput = rip6_ctloutput, + .pr_usrreqs = &rip6_usrreqs +}, +/* Spacer n-times for loadable protocols. */ +IP6PROTOSPACER, +IP6PROTOSPACER, +IP6PROTOSPACER, +IP6PROTOSPACER, +IP6PROTOSPACER, +IP6PROTOSPACER, +IP6PROTOSPACER, +IP6PROTOSPACER, +/* raw wildcard */ +{ + .pr_type = SOCK_RAW, + .pr_domain = &inet6domain, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = rip6_input, + .pr_output = rip6_output, + .pr_ctloutput = rip6_ctloutput, + .pr_usrreqs = &rip6_usrreqs +}, +}; + +extern int in6_inithead(void **, int); +#ifdef VIMAGE +extern int in6_detachhead(void **, int); +#endif + +struct domain inet6domain = { + .dom_family = AF_INET6, + .dom_name = "internet6", + .dom_protosw = (struct protosw *)inet6sw, + .dom_protoswNPROTOSW = (struct protosw *) + &inet6sw[sizeof(inet6sw)/sizeof(inet6sw[0])], +#ifdef RADIX_MPATH + .dom_rtattach = rn6_mpath_inithead, +#else + .dom_rtattach = in6_inithead, +#endif +#ifdef VIMAGE + .dom_rtdetach = in6_detachhead, +#endif + .dom_rtoffset = offsetof(struct sockaddr_in6, sin6_addr) << 3, + .dom_maxrtkey = sizeof(struct sockaddr_in6), + .dom_ifattach = in6_domifattach, + .dom_ifdetach = in6_domifdetach +}; + +VNET_DOMAIN_SET(inet6); + +/* + * Internet configuration info + */ +#ifndef IPV6FORWARDING +#ifdef GATEWAY6 +#define IPV6FORWARDING 1 /* forward IP6 packets not for us */ +#else +#define IPV6FORWARDING 0 /* don't forward IP6 packets not for us */ +#endif /* GATEWAY6 */ +#endif /* !IPV6FORWARDING */ + +#ifndef IPV6_SENDREDIRECTS +#define IPV6_SENDREDIRECTS 1 +#endif + +VNET_DEFINE(int, ip6_forwarding) = IPV6FORWARDING; /* act as router? */ +VNET_DEFINE(int, ip6_sendredirects) = IPV6_SENDREDIRECTS; +VNET_DEFINE(int, ip6_defhlim) = IPV6_DEFHLIM; +VNET_DEFINE(int, ip6_defmcasthlim) = IPV6_DEFAULT_MULTICAST_HOPS; +VNET_DEFINE(int, ip6_accept_rtadv) = 0; +VNET_DEFINE(int, ip6_maxfragpackets); /* initialized in frag6.c:frag6_init() */ +VNET_DEFINE(int, ip6_maxfrags); /* initialized in frag6.c:frag6_init() */ +VNET_DEFINE(int, ip6_log_interval) = 5; +VNET_DEFINE(int, ip6_hdrnestlimit) = 15;/* How many header options will we + * process? */ +VNET_DEFINE(int, ip6_dad_count) = 1; /* DupAddrDetectionTransmits */ +VNET_DEFINE(int, ip6_auto_flowlabel) = 1; +VNET_DEFINE(int, ip6_use_deprecated) = 1;/* allow deprecated addr + * (RFC2462 5.5.4) */ +VNET_DEFINE(int, ip6_rr_prune) = 5; /* router renumbering prefix + * walk list every 5 sec. */ +VNET_DEFINE(int, ip6_mcast_pmtu) = 0; /* enable pMTU discovery for multicast? */ +VNET_DEFINE(int, ip6_v6only) = 1; + +VNET_DEFINE(int, ip6_keepfaith) = 0; +VNET_DEFINE(time_t, ip6_log_time) = (time_t)0L; +#ifdef IPSTEALTH +VNET_DEFINE(int, ip6stealth) = 0; +#endif +VNET_DEFINE(int, nd6_onlink_ns_rfc4861) = 0;/* allow 'on-link' nd6 NS + * (RFC 4861) */ + +/* icmp6 */ +/* + * BSDI4 defines these variables in in_proto.c... + * XXX: what if we don't define INET? Should we define pmtu6_expire + * or so? (jinmei@kame.net 19990310) + */ +VNET_DEFINE(int, pmtu_expire) = 60*10; +VNET_DEFINE(int, pmtu_probe) = 60*2; + +/* raw IP6 parameters */ +/* + * Nominal space allocated to a raw ip socket. + */ +#define RIPV6SNDQ 8192 +#define RIPV6RCVQ 8192 + +VNET_DEFINE(u_long, rip6_sendspace) = RIPV6SNDQ; +VNET_DEFINE(u_long, rip6_recvspace) = RIPV6RCVQ; + +/* ICMPV6 parameters */ +VNET_DEFINE(int, icmp6_rediraccept) = 1;/* accept and process redirects */ +VNET_DEFINE(int, icmp6_redirtimeout) = 10 * 60; /* 10 minutes */ +VNET_DEFINE(int, icmp6errppslim) = 100; /* 100pps */ +/* control how to respond to NI queries */ +VNET_DEFINE(int, icmp6_nodeinfo) = + (ICMP6_NODEINFO_FQDNOK|ICMP6_NODEINFO_NODEADDROK); + +/* UDP on IP6 parameters */ +VNET_DEFINE(int, udp6_sendspace) = 9216;/* really max datagram size */ +VNET_DEFINE(int, udp6_recvspace) = 40 * (1024 + sizeof(struct sockaddr_in6)); + /* 40 1K datagrams */ + +/* + * sysctl related items. + */ +SYSCTL_NODE(_net, PF_INET6, inet6, CTLFLAG_RW, 0, + "Internet6 Family"); + +/* net.inet6 */ +SYSCTL_NODE(_net_inet6, IPPROTO_IPV6, ip6, CTLFLAG_RW, 0, "IP6"); +SYSCTL_NODE(_net_inet6, IPPROTO_ICMPV6, icmp6, CTLFLAG_RW, 0, "ICMP6"); +SYSCTL_NODE(_net_inet6, IPPROTO_UDP, udp6, CTLFLAG_RW, 0, "UDP6"); +SYSCTL_NODE(_net_inet6, IPPROTO_TCP, tcp6, CTLFLAG_RW, 0, "TCP6"); +#ifdef SCTP +SYSCTL_NODE(_net_inet6, IPPROTO_SCTP, sctp6, CTLFLAG_RW, 0, "SCTP6"); +#endif +#ifdef IPSEC +SYSCTL_NODE(_net_inet6, IPPROTO_ESP, ipsec6, CTLFLAG_RW, 0, "IPSEC6"); +#endif /* IPSEC */ + +/* net.inet6.ip6 */ +static int +sysctl_ip6_temppltime(SYSCTL_HANDLER_ARGS) +{ + int error = 0; + int old; + + VNET_SYSCTL_ARG(req, arg1); + + error = SYSCTL_OUT(req, arg1, sizeof(int)); + if (error || !req->newptr) + return (error); + old = V_ip6_temp_preferred_lifetime; + error = SYSCTL_IN(req, arg1, sizeof(int)); + if (V_ip6_temp_preferred_lifetime < + V_ip6_desync_factor + V_ip6_temp_regen_advance) { + V_ip6_temp_preferred_lifetime = old; + return (EINVAL); + } + return (error); +} + +static int +sysctl_ip6_tempvltime(SYSCTL_HANDLER_ARGS) +{ + int error = 0; + int old; + + VNET_SYSCTL_ARG(req, arg1); + + error = SYSCTL_OUT(req, arg1, sizeof(int)); + if (error || !req->newptr) + return (error); + old = V_ip6_temp_valid_lifetime; + error = SYSCTL_IN(req, arg1, sizeof(int)); + if (V_ip6_temp_valid_lifetime < V_ip6_temp_preferred_lifetime) { + V_ip6_temp_preferred_lifetime = old; + return (EINVAL); + } + return (error); +} + +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_FORWARDING, forwarding, CTLFLAG_RW, + &VNET_NAME(ip6_forwarding), 0, ""); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_SENDREDIRECTS, redirect, CTLFLAG_RW, + &VNET_NAME(ip6_sendredirects), 0, ""); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim, CTLFLAG_RW, + &VNET_NAME(ip6_defhlim), 0, ""); +SYSCTL_VNET_STRUCT(_net_inet6_ip6, IPV6CTL_STATS, stats, CTLFLAG_RD, + &VNET_NAME(ip6stat), ip6stat, ""); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets, + CTLFLAG_RW, &VNET_NAME(ip6_maxfragpackets), 0, ""); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv, + CTLFLAG_RW, &VNET_NAME(ip6_accept_rtadv), 0, ""); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_KEEPFAITH, keepfaith, CTLFLAG_RW, + &VNET_NAME(ip6_keepfaith), 0, ""); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_LOG_INTERVAL, log_interval, + CTLFLAG_RW, &VNET_NAME(ip6_log_interval), 0, ""); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_HDRNESTLIMIT, hdrnestlimit, + CTLFLAG_RW, &VNET_NAME(ip6_hdrnestlimit), 0, ""); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_DAD_COUNT, dad_count, CTLFLAG_RW, + &VNET_NAME(ip6_dad_count), 0, ""); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_AUTO_FLOWLABEL, auto_flowlabel, + CTLFLAG_RW, &VNET_NAME(ip6_auto_flowlabel), 0, ""); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_DEFMCASTHLIM, defmcasthlim, + CTLFLAG_RW, &VNET_NAME(ip6_defmcasthlim), 0, ""); +SYSCTL_STRING(_net_inet6_ip6, IPV6CTL_KAME_VERSION, kame_version, + CTLFLAG_RD, __KAME_VERSION, 0, ""); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_USE_DEPRECATED, use_deprecated, + CTLFLAG_RW, &VNET_NAME(ip6_use_deprecated), 0, ""); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RR_PRUNE, rr_prune, CTLFLAG_RW, + &VNET_NAME(ip6_rr_prune), 0, ""); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_USETEMPADDR, use_tempaddr, + CTLFLAG_RW, &VNET_NAME(ip6_use_tempaddr), 0, ""); +SYSCTL_VNET_PROC(_net_inet6_ip6, IPV6CTL_TEMPPLTIME, temppltime, + CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ip6_temp_preferred_lifetime), 0, + sysctl_ip6_temppltime, "I", ""); +SYSCTL_VNET_PROC(_net_inet6_ip6, IPV6CTL_TEMPVLTIME, tempvltime, + CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ip6_temp_valid_lifetime), 0, + sysctl_ip6_tempvltime, "I", ""); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_V6ONLY, v6only, CTLFLAG_RW, + &VNET_NAME(ip6_v6only), 0, ""); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_AUTO_LINKLOCAL, auto_linklocal, + CTLFLAG_RW, &VNET_NAME(ip6_auto_linklocal), 0, ""); +SYSCTL_VNET_STRUCT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats, CTLFLAG_RD, + &VNET_NAME(rip6stat), rip6stat, ""); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_PREFER_TEMPADDR, prefer_tempaddr, + CTLFLAG_RW, &VNET_NAME(ip6_prefer_tempaddr), 0, ""); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_USE_DEFAULTZONE, use_defaultzone, + CTLFLAG_RW, &VNET_NAME(ip6_use_defzone), 0,""); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags, CTLFLAG_RW, + &VNET_NAME(ip6_maxfrags), 0, ""); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_MCAST_PMTU, mcast_pmtu, CTLFLAG_RW, + &VNET_NAME(ip6_mcast_pmtu), 0, ""); +#ifdef IPSTEALTH +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_STEALTH, stealth, CTLFLAG_RW, + &VNET_NAME(ip6stealth), 0, ""); +#endif + +/* net.inet6.icmp6 */ +SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRACCEPT, rediraccept, + CTLFLAG_RW, &VNET_NAME(icmp6_rediraccept), 0, ""); +SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRTIMEOUT, redirtimeout, + CTLFLAG_RW, &VNET_NAME(icmp6_redirtimeout), 0, ""); +SYSCTL_VNET_STRUCT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats, CTLFLAG_RD, + &VNET_NAME(icmp6stat), icmp6stat, ""); +SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_PRUNE, nd6_prune, CTLFLAG_RW, + &VNET_NAME(nd6_prune), 0, ""); +SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DELAY, nd6_delay, CTLFLAG_RW, + &VNET_NAME(nd6_delay), 0, ""); +SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_UMAXTRIES, nd6_umaxtries, + CTLFLAG_RW, &VNET_NAME(nd6_umaxtries), 0, ""); +SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MMAXTRIES, nd6_mmaxtries, + CTLFLAG_RW, &VNET_NAME(nd6_mmaxtries), 0, ""); +SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_USELOOPBACK, nd6_useloopback, + CTLFLAG_RW, &VNET_NAME(nd6_useloopback), 0, ""); +SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO, nodeinfo, CTLFLAG_RW, + &VNET_NAME(icmp6_nodeinfo), 0, ""); +SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ERRPPSLIMIT, errppslimit, + CTLFLAG_RW, &VNET_NAME(icmp6errppslim), 0, ""); +SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXNUDHINT, nd6_maxnudhint, + CTLFLAG_RW, &VNET_NAME(nd6_maxnudhint), 0, ""); +SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DEBUG, nd6_debug, CTLFLAG_RW, + &VNET_NAME(nd6_debug), 0, ""); +SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_ONLINKNSRFC4861, + nd6_onlink_ns_rfc4861, CTLFLAG_RW, &VNET_NAME(nd6_onlink_ns_rfc4861), + 0, "Accept 'on-link' nd6 NS in compliance with RFC 4861."); diff --git a/freebsd/sys/netinet6/in6_rmx.c b/freebsd/sys/netinet6/in6_rmx.c new file mode 100644 index 00000000..de867c36 --- /dev/null +++ b/freebsd/sys/netinet6/in6_rmx.c @@ -0,0 +1,449 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: in6_rmx.c,v 1.11 2001/07/26 06:53:16 jinmei Exp $ + */ + +/*- + * Copyright 1994, 1995 Massachusetts Institute of Technology + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that both the above copyright notice and this + * permission notice appear in all copies, that both the above + * copyright notice and this permission notice appear in all + * supporting documentation, and that the name of M.I.T. not be used + * in advertising or publicity pertaining to distribution of the + * software without specific, written prior permission. M.I.T. makes + * no representations about the suitability of this software for any + * purpose. It is provided "as is" without express or implied + * warranty. + * + * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS + * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT + * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +/* + * This code does two things necessary for the enhanced TCP metrics to + * function in a useful manner: + * 1) It marks all non-host routes as `cloning', thus ensuring that + * every actual reference to such a route actually gets turned + * into a reference to a host route to the specific destination + * requested. + * 2) When such routes lose all their references, it arranges for them + * to be deleted in some random collection of circumstances, so that + * a large quantity of stale routing data is not kept in kernel memory + * indefinitely. See in6_rtqtimo() below for the exact mechanism. + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/sys/param.h> +#include <freebsd/sys/systm.h> +#include <freebsd/sys/kernel.h> +#include <freebsd/sys/lock.h> +#include <freebsd/sys/sysctl.h> +#include <freebsd/sys/queue.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/socketvar.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/rwlock.h> +#include <freebsd/sys/syslog.h> +#include <freebsd/sys/callout.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/route.h> + +#include <freebsd/netinet/in.h> +#include <freebsd/netinet/ip_var.h> +#include <freebsd/netinet/in_var.h> + +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet6/ip6_var.h> + +#include <freebsd/netinet/icmp6.h> +#include <freebsd/netinet6/nd6.h> + +#include <freebsd/netinet/tcp.h> +#include <freebsd/netinet/tcp_seq.h> +#include <freebsd/netinet/tcp_timer.h> +#include <freebsd/netinet/tcp_var.h> + +extern int in6_inithead(void **head, int off); +#ifdef VIMAGE +extern int in6_detachhead(void **head, int off); +#endif + +#define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */ + +/* + * Do what we need to do when inserting a route. + */ +static struct radix_node * +in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, + struct radix_node *treenodes) +{ + struct rtentry *rt = (struct rtentry *)treenodes; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rt_key(rt); + struct radix_node *ret; + + RADIX_NODE_HEAD_WLOCK_ASSERT(head); + if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) + rt->rt_flags |= RTF_MULTICAST; + + /* + * A little bit of help for both IPv6 output and input: + * For local addresses, we make sure that RTF_LOCAL is set, + * with the thought that this might one day be used to speed up + * ip_input(). + * + * We also mark routes to multicast addresses as such, because + * it's easy to do and might be useful (but this is much more + * dubious since it's so easy to inspect the address). (This + * is done above.) + * + * XXX + * should elaborate the code. + */ + if (rt->rt_flags & RTF_HOST) { + if (IN6_ARE_ADDR_EQUAL(&satosin6(rt->rt_ifa->ifa_addr) + ->sin6_addr, + &sin6->sin6_addr)) { + rt->rt_flags |= RTF_LOCAL; + } + } + + if (!rt->rt_rmx.rmx_mtu && rt->rt_ifp) + rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp); + + ret = rn_addroute(v_arg, n_arg, head, treenodes); + if (ret == NULL) { + struct rtentry *rt2; + /* + * We are trying to add a net route, but can't. + * The following case should be allowed, so we'll make a + * special check for this: + * Two IPv6 addresses with the same prefix is assigned + * to a single interrface. + * # ifconfig if0 inet6 3ffe:0501::1 prefix 64 alias (*1) + * # ifconfig if0 inet6 3ffe:0501::2 prefix 64 alias (*2) + * In this case, (*1) and (*2) want to add the same + * net route entry, 3ffe:0501:: -> if0. + * This case should not raise an error. + */ + rt2 = rtalloc1((struct sockaddr *)sin6, 0, RTF_RNH_LOCKED); + if (rt2) { + if (((rt2->rt_flags & (RTF_HOST|RTF_GATEWAY)) == 0) + && rt2->rt_gateway + && rt2->rt_gateway->sa_family == AF_LINK + && rt2->rt_ifp == rt->rt_ifp) { + ret = rt2->rt_nodes; + } + RTFREE_LOCKED(rt2); + } + } + return (ret); +} + +/* + * This code is the inverse of in6_clsroute: on first reference, if we + * were managing the route, stop doing so and set the expiration timer + * back off again. + */ +static struct radix_node * +in6_matroute(void *v_arg, struct radix_node_head *head) +{ + struct radix_node *rn = rn_match(v_arg, head); + struct rtentry *rt = (struct rtentry *)rn; + + if (rt) { + RT_LOCK(rt); + if (rt->rt_flags & RTPRF_OURS) { + rt->rt_flags &= ~RTPRF_OURS; + rt->rt_rmx.rmx_expire = 0; + } + RT_UNLOCK(rt); + } + return rn; +} + +SYSCTL_DECL(_net_inet6_ip6); + +static VNET_DEFINE(int, rtq_reallyold6) = 60*60; + /* one hour is ``really old'' */ +#define V_rtq_reallyold6 VNET(rtq_reallyold6) +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RTEXPIRE, rtexpire, CTLFLAG_RW, + &VNET_NAME(rtq_reallyold6) , 0, ""); + +static VNET_DEFINE(int, rtq_minreallyold6) = 10; + /* never automatically crank down to less */ +#define V_rtq_minreallyold6 VNET(rtq_minreallyold6) +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW, + &VNET_NAME(rtq_minreallyold6) , 0, ""); + +static VNET_DEFINE(int, rtq_toomany6) = 128; + /* 128 cached routes is ``too many'' */ +#define V_rtq_toomany6 VNET(rtq_toomany6) +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW, + &VNET_NAME(rtq_toomany6) , 0, ""); + +struct rtqk_arg { + struct radix_node_head *rnh; + int mode; + int updating; + int draining; + int killed; + int found; + time_t nextstop; +}; + +/* + * Get rid of old routes. When draining, this deletes everything, even when + * the timeout is not expired yet. When updating, this makes sure that + * nothing has a timeout longer than the current value of rtq_reallyold6. + */ +static int +in6_rtqkill(struct radix_node *rn, void *rock) +{ + struct rtqk_arg *ap = rock; + struct rtentry *rt = (struct rtentry *)rn; + int err; + + RADIX_NODE_HEAD_WLOCK_ASSERT(ap->rnh); + + if (rt->rt_flags & RTPRF_OURS) { + ap->found++; + + if (ap->draining || rt->rt_rmx.rmx_expire <= time_uptime) { + if (rt->rt_refcnt > 0) + panic("rtqkill route really not free"); + + err = rtrequest(RTM_DELETE, + (struct sockaddr *)rt_key(rt), + rt->rt_gateway, rt_mask(rt), + rt->rt_flags|RTF_RNH_LOCKED, 0); + if (err) { + log(LOG_WARNING, "in6_rtqkill: error %d", err); + } else { + ap->killed++; + } + } else { + if (ap->updating + && (rt->rt_rmx.rmx_expire - time_uptime + > V_rtq_reallyold6)) { + rt->rt_rmx.rmx_expire = time_uptime + + V_rtq_reallyold6; + } + ap->nextstop = lmin(ap->nextstop, + rt->rt_rmx.rmx_expire); + } + } + + return 0; +} + +#define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */ +static VNET_DEFINE(int, rtq_timeout6) = RTQ_TIMEOUT; +static VNET_DEFINE(struct callout, rtq_timer6); + +#define V_rtq_timeout6 VNET(rtq_timeout6) +#define V_rtq_timer6 VNET(rtq_timer6) + +static void +in6_rtqtimo(void *rock) +{ + CURVNET_SET_QUIET((struct vnet *) rock); + struct radix_node_head *rnh; + struct rtqk_arg arg; + struct timeval atv; + static time_t last_adjusted_timeout = 0; + + rnh = rt_tables_get_rnh(0, AF_INET6); + if (rnh == NULL) { + CURVNET_RESTORE(); + return; + } + arg.found = arg.killed = 0; + arg.rnh = rnh; + arg.nextstop = time_uptime + V_rtq_timeout6; + arg.draining = arg.updating = 0; + RADIX_NODE_HEAD_LOCK(rnh); + rnh->rnh_walktree(rnh, in6_rtqkill, &arg); + RADIX_NODE_HEAD_UNLOCK(rnh); + + /* + * Attempt to be somewhat dynamic about this: + * If there are ``too many'' routes sitting around taking up space, + * then crank down the timeout, and see if we can't make some more + * go away. However, we make sure that we will never adjust more + * than once in rtq_timeout6 seconds, to keep from cranking down too + * hard. + */ + if ((arg.found - arg.killed > V_rtq_toomany6) + && (time_uptime - last_adjusted_timeout >= V_rtq_timeout6) + && V_rtq_reallyold6 > V_rtq_minreallyold6) { + V_rtq_reallyold6 = 2*V_rtq_reallyold6 / 3; + if (V_rtq_reallyold6 < V_rtq_minreallyold6) { + V_rtq_reallyold6 = V_rtq_minreallyold6; + } + + last_adjusted_timeout = time_uptime; +#ifdef DIAGNOSTIC + log(LOG_DEBUG, "in6_rtqtimo: adjusted rtq_reallyold6 to %d", + V_rtq_reallyold6); +#endif + arg.found = arg.killed = 0; + arg.updating = 1; + RADIX_NODE_HEAD_LOCK(rnh); + rnh->rnh_walktree(rnh, in6_rtqkill, &arg); + RADIX_NODE_HEAD_UNLOCK(rnh); + } + + atv.tv_usec = 0; + atv.tv_sec = arg.nextstop - time_uptime; + callout_reset(&V_rtq_timer6, tvtohz(&atv), in6_rtqtimo, rock); + CURVNET_RESTORE(); +} + +/* + * Age old PMTUs. + */ +struct mtuex_arg { + struct radix_node_head *rnh; + time_t nextstop; +}; +static VNET_DEFINE(struct callout, rtq_mtutimer); +#define V_rtq_mtutimer VNET(rtq_mtutimer) + +static int +in6_mtuexpire(struct radix_node *rn, void *rock) +{ + struct rtentry *rt = (struct rtentry *)rn; + struct mtuex_arg *ap = rock; + + /* sanity */ + if (!rt) + panic("rt == NULL in in6_mtuexpire"); + + if (rt->rt_rmx.rmx_expire && !(rt->rt_flags & RTF_PROBEMTU)) { + if (rt->rt_rmx.rmx_expire <= time_uptime) { + rt->rt_flags |= RTF_PROBEMTU; + } else { + ap->nextstop = lmin(ap->nextstop, + rt->rt_rmx.rmx_expire); + } + } + + return 0; +} + +#define MTUTIMO_DEFAULT (60*1) + +static void +in6_mtutimo(void *rock) +{ + CURVNET_SET_QUIET((struct vnet *) rock); + struct radix_node_head *rnh; + struct mtuex_arg arg; + struct timeval atv; + + rnh = rt_tables_get_rnh(0, AF_INET6); + if (rnh == NULL) { + CURVNET_RESTORE(); + return; + } + arg.rnh = rnh; + arg.nextstop = time_uptime + MTUTIMO_DEFAULT; + RADIX_NODE_HEAD_LOCK(rnh); + rnh->rnh_walktree(rnh, in6_mtuexpire, &arg); + RADIX_NODE_HEAD_UNLOCK(rnh); + + atv.tv_usec = 0; + atv.tv_sec = arg.nextstop - time_uptime; + if (atv.tv_sec < 0) { + printf("invalid mtu expiration time on routing table\n"); + arg.nextstop = time_uptime + 30; /* last resort */ + atv.tv_sec = 30; + } + callout_reset(&V_rtq_mtutimer, tvtohz(&atv), in6_mtutimo, rock); + CURVNET_RESTORE(); +} + +/* + * Initialize our routing tree. + * XXX MRT When off == 0, we are being called from vfs_export.c + * so just set up their table and leave. (we know what the correct + * value should be so just use that).. FIX AFTER RELENG_7 is MFC'd + * see also comments in in_inithead() vfs_export.c and domain.h + */ +int +in6_inithead(void **head, int off) +{ + struct radix_node_head *rnh; + + if (!rn_inithead(head, offsetof(struct sockaddr_in6, sin6_addr) << 3)) + return 0; /* See above */ + + if (off == 0) /* See above */ + return 1; /* only do the rest for the real thing */ + + rnh = *head; + KASSERT(rnh == rt_tables_get_rnh(0, AF_INET6), ("rnh?")); + rnh->rnh_addaddr = in6_addroute; + rnh->rnh_matchaddr = in6_matroute; + callout_init(&V_rtq_timer6, CALLOUT_MPSAFE); + callout_init(&V_rtq_mtutimer, CALLOUT_MPSAFE); + in6_rtqtimo(curvnet); /* kick off timeout first time */ + in6_mtutimo(curvnet); /* kick off timeout first time */ + return 1; +} + +#ifdef VIMAGE +int +in6_detachhead(void **head, int off) +{ + + callout_drain(&V_rtq_timer6); + callout_drain(&V_rtq_mtutimer); + return (1); +} +#endif diff --git a/freebsd/sys/netinet6/in6_src.c b/freebsd/sys/netinet6/in6_src.c new file mode 100644 index 00000000..a3914c3f --- /dev/null +++ b/freebsd/sys/netinet6/in6_src.c @@ -0,0 +1,1204 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: in6_src.c,v 1.132 2003/08/26 04:42:27 keiichi Exp $ + */ + +/*- + * Copyright (c) 1982, 1986, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/local/opt_inet.h> +#include <freebsd/local/opt_inet6.h> +#include <freebsd/local/opt_mpath.h> + +#include <freebsd/sys/param.h> +#include <freebsd/sys/systm.h> +#include <freebsd/sys/lock.h> +#include <freebsd/sys/malloc.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/priv.h> +#include <freebsd/sys/protosw.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/socketvar.h> +#include <freebsd/sys/sockio.h> +#include <freebsd/sys/sysctl.h> +#include <freebsd/sys/errno.h> +#include <freebsd/sys/time.h> +#include <freebsd/sys/jail.h> +#include <freebsd/sys/kernel.h> +#include <freebsd/sys/sx.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/if_dl.h> +#include <freebsd/net/route.h> +#include <freebsd/net/if_llatbl.h> +#ifdef RADIX_MPATH +#include <freebsd/net/radix_mpath.h> +#endif + +#include <freebsd/netinet/in.h> +#include <freebsd/netinet/in_var.h> +#include <freebsd/netinet/in_systm.h> +#include <freebsd/netinet/ip.h> +#include <freebsd/netinet/in_pcb.h> +#include <freebsd/netinet/ip_var.h> +#include <freebsd/netinet/udp.h> +#include <freebsd/netinet/udp_var.h> + +#include <freebsd/netinet6/in6_var.h> +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet6/in6_pcb.h> +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet6/scope6_var.h> +#include <freebsd/netinet6/nd6.h> + +static struct mtx addrsel_lock; +#define ADDRSEL_LOCK_INIT() mtx_init(&addrsel_lock, "addrsel_lock", NULL, MTX_DEF) +#define ADDRSEL_LOCK() mtx_lock(&addrsel_lock) +#define ADDRSEL_UNLOCK() mtx_unlock(&addrsel_lock) +#define ADDRSEL_LOCK_ASSERT() mtx_assert(&addrsel_lock, MA_OWNED) + +static struct sx addrsel_sxlock; +#define ADDRSEL_SXLOCK_INIT() sx_init(&addrsel_sxlock, "addrsel_sxlock") +#define ADDRSEL_SLOCK() sx_slock(&addrsel_sxlock) +#define ADDRSEL_SUNLOCK() sx_sunlock(&addrsel_sxlock) +#define ADDRSEL_XLOCK() sx_xlock(&addrsel_sxlock) +#define ADDRSEL_XUNLOCK() sx_xunlock(&addrsel_sxlock) + +#define ADDR_LABEL_NOTAPP (-1) +static VNET_DEFINE(struct in6_addrpolicy, defaultaddrpolicy); +#define V_defaultaddrpolicy VNET(defaultaddrpolicy) + +VNET_DEFINE(int, ip6_prefer_tempaddr) = 0; + +static int selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *, + struct ip6_moptions *, struct route_in6 *, struct ifnet **, + struct rtentry **, int)); +static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *, + struct ip6_moptions *, struct route_in6 *ro, struct ifnet **)); + +static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *); + +static void init_policy_queue(void); +static int add_addrsel_policyent(struct in6_addrpolicy *); +static int delete_addrsel_policyent(struct in6_addrpolicy *); +static int walk_addrsel_policy __P((int (*)(struct in6_addrpolicy *, void *), + void *)); +static int dump_addrsel_policyent(struct in6_addrpolicy *, void *); +static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *); + +/* + * Return an IPv6 address, which is the most appropriate for a given + * destination and user specified options. + * If necessary, this function lookups the routing table and returns + * an entry to the caller for later use. + */ +#define REPLACE(r) do {\ + if ((r) < sizeof(V_ip6stat.ip6s_sources_rule) / \ + sizeof(V_ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ + V_ip6stat.ip6s_sources_rule[(r)]++; \ + /* { \ + char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \ + printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \ + } */ \ + goto replace; \ +} while(0) +#define NEXT(r) do {\ + if ((r) < sizeof(V_ip6stat.ip6s_sources_rule) / \ + sizeof(V_ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ + V_ip6stat.ip6s_sources_rule[(r)]++; \ + /* { \ + char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \ + printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \ + } */ \ + goto next; /* XXX: we can't use 'continue' here */ \ +} while(0) +#define BREAK(r) do { \ + if ((r) < sizeof(V_ip6stat.ip6s_sources_rule) / \ + sizeof(V_ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ + V_ip6stat.ip6s_sources_rule[(r)]++; \ + goto out; /* XXX: we can't use 'break' here */ \ +} while(0) + +int +in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, + struct inpcb *inp, struct route_in6 *ro, struct ucred *cred, + struct ifnet **ifpp, struct in6_addr *srcp) +{ + struct in6_addr dst, tmp; + struct ifnet *ifp = NULL; + struct in6_ifaddr *ia = NULL, *ia_best = NULL; + struct in6_pktinfo *pi = NULL; + int dst_scope = -1, best_scope = -1, best_matchlen = -1; + struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL; + u_int32_t odstzone; + int prefer_tempaddr; + int error; + struct ip6_moptions *mopts; + + KASSERT(srcp != NULL, ("%s: srcp is NULL", __func__)); + + dst = dstsock->sin6_addr; /* make a copy for local operation */ + if (ifpp) + *ifpp = NULL; + + if (inp != NULL) { + INP_LOCK_ASSERT(inp); + mopts = inp->in6p_moptions; + } else { + mopts = NULL; + } + + /* + * If the source address is explicitly specified by the caller, + * check if the requested source address is indeed a unicast address + * assigned to the node, and can be used as the packet's source + * address. If everything is okay, use the address as source. + */ + if (opts && (pi = opts->ip6po_pktinfo) && + !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) { + struct sockaddr_in6 srcsock; + struct in6_ifaddr *ia6; + + /* get the outgoing interface */ + if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp)) != 0) + return (error); + + /* + * determine the appropriate zone id of the source based on + * the zone of the destination and the outgoing interface. + * If the specified address is ambiguous wrt the scope zone, + * the interface must be specified; otherwise, ifa_ifwithaddr() + * will fail matching the address. + */ + bzero(&srcsock, sizeof(srcsock)); + srcsock.sin6_family = AF_INET6; + srcsock.sin6_len = sizeof(srcsock); + srcsock.sin6_addr = pi->ipi6_addr; + if (ifp) { + error = in6_setscope(&srcsock.sin6_addr, ifp, NULL); + if (error) + return (error); + } + if (cred != NULL && (error = prison_local_ip6(cred, + &srcsock.sin6_addr, (inp != NULL && + (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) + return (error); + + ia6 = (struct in6_ifaddr *)ifa_ifwithaddr( + (struct sockaddr *)&srcsock); + if (ia6 == NULL || + (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) { + if (ia6 != NULL) + ifa_free(&ia6->ia_ifa); + return (EADDRNOTAVAIL); + } + pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */ + if (ifpp) + *ifpp = ifp; + bcopy(&ia6->ia_addr.sin6_addr, srcp, sizeof(*srcp)); + ifa_free(&ia6->ia_ifa); + return (0); + } + + /* + * Otherwise, if the socket has already bound the source, just use it. + */ + if (inp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { + if (cred != NULL && + (error = prison_local_ip6(cred, &inp->in6p_laddr, + ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) + return (error); + bcopy(&inp->in6p_laddr, srcp, sizeof(*srcp)); + return (0); + } + + /* + * Bypass source address selection and use the primary jail IP + * if requested. + */ + if (cred != NULL && !prison_saddrsel_ip6(cred, srcp)) + return (0); + + /* + * If the address is not specified, choose the best one based on + * the outgoing interface and the destination address. + */ + /* get the outgoing interface */ + if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp)) != 0) + return (error); + +#ifdef DIAGNOSTIC + if (ifp == NULL) /* this should not happen */ + panic("in6_selectsrc: NULL ifp"); +#endif + error = in6_setscope(&dst, ifp, &odstzone); + if (error) + return (error); + + IN6_IFADDR_RLOCK(); + TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { + int new_scope = -1, new_matchlen = -1; + struct in6_addrpolicy *new_policy = NULL; + u_int32_t srczone, osrczone, dstzone; + struct in6_addr src; + struct ifnet *ifp1 = ia->ia_ifp; + + /* + * We'll never take an address that breaks the scope zone + * of the destination. We also skip an address if its zone + * does not contain the outgoing interface. + * XXX: we should probably use sin6_scope_id here. + */ + if (in6_setscope(&dst, ifp1, &dstzone) || + odstzone != dstzone) { + continue; + } + src = ia->ia_addr.sin6_addr; + if (in6_setscope(&src, ifp, &osrczone) || + in6_setscope(&src, ifp1, &srczone) || + osrczone != srczone) { + continue; + } + + /* avoid unusable addresses */ + if ((ia->ia6_flags & + (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) { + continue; + } + if (!V_ip6_use_deprecated && IFA6_IS_DEPRECATED(ia)) + continue; + + /* If jailed only take addresses of the jail into account. */ + if (cred != NULL && + prison_check_ip6(cred, &ia->ia_addr.sin6_addr) != 0) + continue; + + /* Rule 1: Prefer same address */ + if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) { + ia_best = ia; + BREAK(1); /* there should be no better candidate */ + } + + if (ia_best == NULL) + REPLACE(0); + + /* Rule 2: Prefer appropriate scope */ + if (dst_scope < 0) + dst_scope = in6_addrscope(&dst); + new_scope = in6_addrscope(&ia->ia_addr.sin6_addr); + if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) { + if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0) + REPLACE(2); + NEXT(2); + } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) { + if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0) + NEXT(2); + REPLACE(2); + } + + /* + * Rule 3: Avoid deprecated addresses. Note that the case of + * !ip6_use_deprecated is already rejected above. + */ + if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia)) + NEXT(3); + if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia)) + REPLACE(3); + + /* Rule 4: Prefer home addresses */ + /* + * XXX: This is a TODO. We should probably merge the MIP6 + * case above. + */ + + /* Rule 5: Prefer outgoing interface */ + if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp) + NEXT(5); + if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp) + REPLACE(5); + + /* + * Rule 6: Prefer matching label + * Note that best_policy should be non-NULL here. + */ + if (dst_policy == NULL) + dst_policy = lookup_addrsel_policy(dstsock); + if (dst_policy->label != ADDR_LABEL_NOTAPP) { + new_policy = lookup_addrsel_policy(&ia->ia_addr); + if (dst_policy->label == best_policy->label && + dst_policy->label != new_policy->label) + NEXT(6); + if (dst_policy->label != best_policy->label && + dst_policy->label == new_policy->label) + REPLACE(6); + } + + /* + * Rule 7: Prefer public addresses. + * We allow users to reverse the logic by configuring + * a sysctl variable, so that privacy conscious users can + * always prefer temporary addresses. + */ + if (opts == NULL || + opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) { + prefer_tempaddr = V_ip6_prefer_tempaddr; + } else if (opts->ip6po_prefer_tempaddr == + IP6PO_TEMPADDR_NOTPREFER) { + prefer_tempaddr = 0; + } else + prefer_tempaddr = 1; + if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) && + (ia->ia6_flags & IN6_IFF_TEMPORARY)) { + if (prefer_tempaddr) + REPLACE(7); + else + NEXT(7); + } + if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) && + !(ia->ia6_flags & IN6_IFF_TEMPORARY)) { + if (prefer_tempaddr) + NEXT(7); + else + REPLACE(7); + } + + /* + * Rule 8: prefer addresses on alive interfaces. + * This is a KAME specific rule. + */ + if ((ia_best->ia_ifp->if_flags & IFF_UP) && + !(ia->ia_ifp->if_flags & IFF_UP)) + NEXT(8); + if (!(ia_best->ia_ifp->if_flags & IFF_UP) && + (ia->ia_ifp->if_flags & IFF_UP)) + REPLACE(8); + + /* + * Rule 14: Use longest matching prefix. + * Note: in the address selection draft, this rule is + * documented as "Rule 8". However, since it is also + * documented that this rule can be overridden, we assign + * a large number so that it is easy to assign smaller numbers + * to more preferred rules. + */ + new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst); + if (best_matchlen < new_matchlen) + REPLACE(14); + if (new_matchlen < best_matchlen) + NEXT(14); + + /* Rule 15 is reserved. */ + + /* + * Last resort: just keep the current candidate. + * Or, do we need more rules? + */ + continue; + + replace: + ia_best = ia; + best_scope = (new_scope >= 0 ? new_scope : + in6_addrscope(&ia_best->ia_addr.sin6_addr)); + best_policy = (new_policy ? new_policy : + lookup_addrsel_policy(&ia_best->ia_addr)); + best_matchlen = (new_matchlen >= 0 ? new_matchlen : + in6_matchlen(&ia_best->ia_addr.sin6_addr, + &dst)); + + next: + continue; + + out: + break; + } + + if ((ia = ia_best) == NULL) { + IN6_IFADDR_RUNLOCK(); + return (EADDRNOTAVAIL); + } + + /* + * At this point at least one of the addresses belonged to the jail + * but it could still be, that we want to further restrict it, e.g. + * theoratically IN6_IS_ADDR_LOOPBACK. + * It must not be IN6_IS_ADDR_UNSPECIFIED anymore. + * prison_local_ip6() will fix an IN6_IS_ADDR_LOOPBACK but should + * let all others previously selected pass. + * Use tmp to not change ::1 on lo0 to the primary jail address. + */ + tmp = ia->ia_addr.sin6_addr; + if (cred != NULL && prison_local_ip6(cred, &tmp, (inp != NULL && + (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0) { + IN6_IFADDR_RUNLOCK(); + return (EADDRNOTAVAIL); + } + + if (ifpp) + *ifpp = ifp; + + bcopy(&tmp, srcp, sizeof(*srcp)); + IN6_IFADDR_RUNLOCK(); + return (0); +} + +/* + * clone - meaningful only for bsdi and freebsd + */ +static int +selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, + struct ip6_moptions *mopts, struct route_in6 *ro, + struct ifnet **retifp, struct rtentry **retrt, int norouteok) +{ + int error = 0; + struct ifnet *ifp = NULL; + struct rtentry *rt = NULL; + struct sockaddr_in6 *sin6_next; + struct in6_pktinfo *pi = NULL; + struct in6_addr *dst = &dstsock->sin6_addr; +#if 0 + char ip6buf[INET6_ADDRSTRLEN]; + + if (dstsock->sin6_addr.s6_addr32[0] == 0 && + dstsock->sin6_addr.s6_addr32[1] == 0 && + !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) { + printf("in6_selectroute: strange destination %s\n", + ip6_sprintf(ip6buf, &dstsock->sin6_addr)); + } else { + printf("in6_selectroute: destination = %s%%%d\n", + ip6_sprintf(ip6buf, &dstsock->sin6_addr), + dstsock->sin6_scope_id); /* for debug */ + } +#endif + + /* If the caller specify the outgoing interface explicitly, use it. */ + if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) { + /* XXX boundary check is assumed to be already done. */ + ifp = ifnet_byindex(pi->ipi6_ifindex); + if (ifp != NULL && + (norouteok || retrt == NULL || + IN6_IS_ADDR_MULTICAST(dst))) { + /* + * we do not have to check or get the route for + * multicast. + */ + goto done; + } else + goto getroute; + } + + /* + * If the destination address is a multicast address and the outgoing + * interface for the address is specified by the caller, use it. + */ + if (IN6_IS_ADDR_MULTICAST(dst) && + mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) { + goto done; /* we do not need a route for multicast. */ + } + + getroute: + /* + * If the next hop address for the packet is specified by the caller, + * use it as the gateway. + */ + if (opts && opts->ip6po_nexthop) { + struct route_in6 *ron; + struct llentry *la; + + sin6_next = satosin6(opts->ip6po_nexthop); + + /* at this moment, we only support AF_INET6 next hops */ + if (sin6_next->sin6_family != AF_INET6) { + error = EAFNOSUPPORT; /* or should we proceed? */ + goto done; + } + + /* + * If the next hop is an IPv6 address, then the node identified + * by that address must be a neighbor of the sending host. + */ + ron = &opts->ip6po_nextroute; + /* + * XXX what do we do here? + * PLZ to be fixing + */ + + + if (ron->ro_rt == NULL) { + rtalloc((struct route *)ron); /* multi path case? */ + if (ron->ro_rt == NULL) { + if (ron->ro_rt) { + RTFREE(ron->ro_rt); + ron->ro_rt = NULL; + } + error = EHOSTUNREACH; + goto done; + } + } + + rt = ron->ro_rt; + ifp = rt->rt_ifp; + IF_AFDATA_LOCK(ifp); + la = lla_lookup(LLTABLE6(ifp), 0, (struct sockaddr *)&sin6_next->sin6_addr); + IF_AFDATA_UNLOCK(ifp); + if (la != NULL) + LLE_RUNLOCK(la); + else { + error = EHOSTUNREACH; + goto done; + } +#if 0 + if ((ron->ro_rt && + (ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) != + (RTF_UP | RTF_LLINFO)) || + !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr, + &sin6_next->sin6_addr)) { + if (ron->ro_rt) { + RTFREE(ron->ro_rt); + ron->ro_rt = NULL; + } + *satosin6(&ron->ro_dst) = *sin6_next; + } + if (ron->ro_rt == NULL) { + rtalloc((struct route *)ron); /* multi path case? */ + if (ron->ro_rt == NULL || + !(ron->ro_rt->rt_flags & RTF_LLINFO)) { + if (ron->ro_rt) { + RTFREE(ron->ro_rt); + ron->ro_rt = NULL; + } + error = EHOSTUNREACH; + goto done; + } + } +#endif + + /* + * When cloning is required, try to allocate a route to the + * destination so that the caller can store path MTU + * information. + */ + goto done; + } + + /* + * Use a cached route if it exists and is valid, else try to allocate + * a new one. Note that we should check the address family of the + * cached destination, in case of sharing the cache with IPv4. + */ + if (ro) { + if (ro->ro_rt && + (!(ro->ro_rt->rt_flags & RTF_UP) || + ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 || + !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, + dst))) { + RTFREE(ro->ro_rt); + ro->ro_rt = (struct rtentry *)NULL; + } + if (ro->ro_rt == (struct rtentry *)NULL) { + struct sockaddr_in6 *sa6; + + /* No route yet, so try to acquire one */ + bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); + sa6 = (struct sockaddr_in6 *)&ro->ro_dst; + *sa6 = *dstsock; + sa6->sin6_scope_id = 0; + +#ifdef RADIX_MPATH + rtalloc_mpath((struct route *)ro, + ntohl(sa6->sin6_addr.s6_addr32[3])); +#else + ro->ro_rt = rtalloc1(&((struct route *)ro) + ->ro_dst, 0, 0UL); + if (ro->ro_rt) + RT_UNLOCK(ro->ro_rt); +#endif + } + + /* + * do not care about the result if we have the nexthop + * explicitly specified. + */ + if (opts && opts->ip6po_nexthop) + goto done; + + if (ro->ro_rt) { + ifp = ro->ro_rt->rt_ifp; + + if (ifp == NULL) { /* can this really happen? */ + RTFREE(ro->ro_rt); + ro->ro_rt = NULL; + } + } + if (ro->ro_rt == NULL) + error = EHOSTUNREACH; + rt = ro->ro_rt; + + /* + * Check if the outgoing interface conflicts with + * the interface specified by ipi6_ifindex (if specified). + * Note that loopback interface is always okay. + * (this may happen when we are sending a packet to one of + * our own addresses.) + */ + if (ifp && opts && opts->ip6po_pktinfo && + opts->ip6po_pktinfo->ipi6_ifindex) { + if (!(ifp->if_flags & IFF_LOOPBACK) && + ifp->if_index != + opts->ip6po_pktinfo->ipi6_ifindex) { + error = EHOSTUNREACH; + goto done; + } + } + } + + done: + if (ifp == NULL && rt == NULL) { + /* + * This can happen if the caller did not pass a cached route + * nor any other hints. We treat this case an error. + */ + error = EHOSTUNREACH; + } + if (error == EHOSTUNREACH) + V_ip6stat.ip6s_noroute++; + + if (retifp != NULL) { + *retifp = ifp; + + /* + * Adjust the "outgoing" interface. If we're going to loop + * the packet back to ourselves, the ifp would be the loopback + * interface. However, we'd rather know the interface associated + * to the destination address (which should probably be one of + * our own addresses.) + */ + if (rt) { + if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) && + (rt->rt_gateway->sa_family == AF_LINK)) + *retifp = + ifnet_byindex(((struct sockaddr_dl *) + rt->rt_gateway)->sdl_index); + } + } + + if (retrt != NULL) + *retrt = rt; /* rt may be NULL */ + + return (error); +} + +static int +in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, + struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp) +{ + int error; + struct route_in6 sro; + struct rtentry *rt = NULL; + + if (ro == NULL) { + bzero(&sro, sizeof(sro)); + ro = &sro; + } + + if ((error = selectroute(dstsock, opts, mopts, ro, retifp, + &rt, 1)) != 0) { + if (ro == &sro && rt && rt == sro.ro_rt) + RTFREE(rt); + return (error); + } + + /* + * do not use a rejected or black hole route. + * XXX: this check should be done in the L2 output routine. + * However, if we skipped this check here, we'd see the following + * scenario: + * - install a rejected route for a scoped address prefix + * (like fe80::/10) + * - send a packet to a destination that matches the scoped prefix, + * with ambiguity about the scope zone. + * - pick the outgoing interface from the route, and disambiguate the + * scope zone with the interface. + * - ip6_output() would try to get another route with the "new" + * destination, which may be valid. + * - we'd see no error on output. + * Although this may not be very harmful, it should still be confusing. + * We thus reject the case here. + */ + if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) { + int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); + + if (ro == &sro && rt && rt == sro.ro_rt) + RTFREE(rt); + return (flags); + } + + if (ro == &sro && rt && rt == sro.ro_rt) + RTFREE(rt); + return (0); +} + +/* + * clone - meaningful only for bsdi and freebsd + */ +int +in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, + struct ip6_moptions *mopts, struct route_in6 *ro, + struct ifnet **retifp, struct rtentry **retrt) +{ + + return (selectroute(dstsock, opts, mopts, ro, retifp, + retrt, 0)); +} + +/* + * Default hop limit selection. The precedence is as follows: + * 1. Hoplimit value specified via ioctl. + * 2. (If the outgoing interface is detected) the current + * hop limit of the interface specified by router advertisement. + * 3. The system default hoplimit. + */ +int +in6_selecthlim(struct inpcb *in6p, struct ifnet *ifp) +{ + + if (in6p && in6p->in6p_hops >= 0) + return (in6p->in6p_hops); + else if (ifp) + return (ND_IFINFO(ifp)->chlim); + else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { + struct route_in6 ro6; + struct ifnet *lifp; + + bzero(&ro6, sizeof(ro6)); + ro6.ro_dst.sin6_family = AF_INET6; + ro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6); + ro6.ro_dst.sin6_addr = in6p->in6p_faddr; + rtalloc((struct route *)&ro6); + if (ro6.ro_rt) { + lifp = ro6.ro_rt->rt_ifp; + RTFREE(ro6.ro_rt); + if (lifp) + return (ND_IFINFO(lifp)->chlim); + } else + return (V_ip6_defhlim); + } + return (V_ip6_defhlim); +} + +/* + * XXX: this is borrowed from in6_pcbbind(). If possible, we should + * share this function by all *bsd*... + */ +int +in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred) +{ + struct socket *so = inp->inp_socket; + u_int16_t lport = 0, first, last, *lastport; + int count, error, wild = 0, dorandom; + struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; + + INP_INFO_WLOCK_ASSERT(pcbinfo); + INP_WLOCK_ASSERT(inp); + + error = prison_local_ip6(cred, laddr, + ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)); + if (error) + return(error); + + /* XXX: this is redundant when called from in6_pcbbind */ + if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) + wild = INPLOOKUP_WILDCARD; + + inp->inp_flags |= INP_ANONPORT; + + if (inp->inp_flags & INP_HIGHPORT) { + first = V_ipport_hifirstauto; /* sysctl */ + last = V_ipport_hilastauto; + lastport = &pcbinfo->ipi_lasthi; + } else if (inp->inp_flags & INP_LOWPORT) { + error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0); + if (error) + return error; + first = V_ipport_lowfirstauto; /* 1023 */ + last = V_ipport_lowlastauto; /* 600 */ + lastport = &pcbinfo->ipi_lastlow; + } else { + first = V_ipport_firstauto; /* sysctl */ + last = V_ipport_lastauto; + lastport = &pcbinfo->ipi_lastport; + } + + /* + * For UDP, use random port allocation as long as the user + * allows it. For TCP (and as of yet unknown) connections, + * use random port allocation only if the user allows it AND + * ipport_tick() allows it. + */ + if (V_ipport_randomized && + (!V_ipport_stoprandom || pcbinfo == &V_udbinfo)) + dorandom = 1; + else + dorandom = 0; + /* + * It makes no sense to do random port allocation if + * we have the only port available. + */ + if (first == last) + dorandom = 0; + /* Make sure to not include UDP packets in the count. */ + if (pcbinfo != &V_udbinfo) + V_ipport_tcpallocs++; + + /* + * Instead of having two loops further down counting up or down + * make sure that first is always <= last and go with only one + * code path implementing all logic. + */ + if (first > last) { + u_int16_t aux; + + aux = first; + first = last; + last = aux; + } + + if (dorandom) + *lastport = first + (arc4random() % (last - first)); + + count = last - first; + + do { + if (count-- < 0) { /* completely used? */ + /* Undo an address bind that may have occurred. */ + inp->in6p_laddr = in6addr_any; + return (EADDRNOTAVAIL); + } + ++*lastport; + if (*lastport < first || *lastport > last) + *lastport = first; + lport = htons(*lastport); + } while (in6_pcblookup_local(pcbinfo, &inp->in6p_laddr, + lport, wild, cred)); + + inp->inp_lport = lport; + if (in_pcbinshash(inp) != 0) { + inp->in6p_laddr = in6addr_any; + inp->inp_lport = 0; + return (EAGAIN); + } + + return (0); +} + +void +addrsel_policy_init(void) +{ + + init_policy_queue(); + + /* initialize the "last resort" policy */ + bzero(&V_defaultaddrpolicy, sizeof(V_defaultaddrpolicy)); + V_defaultaddrpolicy.label = ADDR_LABEL_NOTAPP; + + if (!IS_DEFAULT_VNET(curvnet)) + return; + + ADDRSEL_LOCK_INIT(); + ADDRSEL_SXLOCK_INIT(); +} + +static struct in6_addrpolicy * +lookup_addrsel_policy(struct sockaddr_in6 *key) +{ + struct in6_addrpolicy *match = NULL; + + ADDRSEL_LOCK(); + match = match_addrsel_policy(key); + + if (match == NULL) + match = &V_defaultaddrpolicy; + else + match->use++; + ADDRSEL_UNLOCK(); + + return (match); +} + +/* + * Subroutines to manage the address selection policy table via sysctl. + */ +struct walkarg { + struct sysctl_req *w_req; +}; + +static int in6_src_sysctl(SYSCTL_HANDLER_ARGS); +SYSCTL_DECL(_net_inet6_ip6); +SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy, + CTLFLAG_RD, in6_src_sysctl, ""); + +static int +in6_src_sysctl(SYSCTL_HANDLER_ARGS) +{ + struct walkarg w; + + if (req->newptr) + return EPERM; + + bzero(&w, sizeof(w)); + w.w_req = req; + + return (walk_addrsel_policy(dump_addrsel_policyent, &w)); +} + +int +in6_src_ioctl(u_long cmd, caddr_t data) +{ + int i; + struct in6_addrpolicy ent0; + + if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY) + return (EOPNOTSUPP); /* check for safety */ + + ent0 = *(struct in6_addrpolicy *)data; + + if (ent0.label == ADDR_LABEL_NOTAPP) + return (EINVAL); + /* check if the prefix mask is consecutive. */ + if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0) + return (EINVAL); + /* clear trailing garbages (if any) of the prefix address. */ + for (i = 0; i < 4; i++) { + ent0.addr.sin6_addr.s6_addr32[i] &= + ent0.addrmask.sin6_addr.s6_addr32[i]; + } + ent0.use = 0; + + switch (cmd) { + case SIOCAADDRCTL_POLICY: + return (add_addrsel_policyent(&ent0)); + case SIOCDADDRCTL_POLICY: + return (delete_addrsel_policyent(&ent0)); + } + + return (0); /* XXX: compromise compilers */ +} + +/* + * The followings are implementation of the policy table using a + * simple tail queue. + * XXX such details should be hidden. + * XXX implementation using binary tree should be more efficient. + */ +struct addrsel_policyent { + TAILQ_ENTRY(addrsel_policyent) ape_entry; + struct in6_addrpolicy ape_policy; +}; + +TAILQ_HEAD(addrsel_policyhead, addrsel_policyent); + +static VNET_DEFINE(struct addrsel_policyhead, addrsel_policytab); +#define V_addrsel_policytab VNET(addrsel_policytab) + +static void +init_policy_queue(void) +{ + + TAILQ_INIT(&V_addrsel_policytab); +} + +static int +add_addrsel_policyent(struct in6_addrpolicy *newpolicy) +{ + struct addrsel_policyent *new, *pol; + + new = malloc(sizeof(*new), M_IFADDR, + M_WAITOK); + ADDRSEL_XLOCK(); + ADDRSEL_LOCK(); + + /* duplication check */ + TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) { + if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr, + &pol->ape_policy.addr.sin6_addr) && + IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr, + &pol->ape_policy.addrmask.sin6_addr)) { + ADDRSEL_UNLOCK(); + ADDRSEL_XUNLOCK(); + free(new, M_IFADDR); + return (EEXIST); /* or override it? */ + } + } + + bzero(new, sizeof(*new)); + + /* XXX: should validate entry */ + new->ape_policy = *newpolicy; + + TAILQ_INSERT_TAIL(&V_addrsel_policytab, new, ape_entry); + ADDRSEL_UNLOCK(); + ADDRSEL_XUNLOCK(); + + return (0); +} + +static int +delete_addrsel_policyent(struct in6_addrpolicy *key) +{ + struct addrsel_policyent *pol; + + ADDRSEL_XLOCK(); + ADDRSEL_LOCK(); + + /* search for the entry in the table */ + TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) { + if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr, + &pol->ape_policy.addr.sin6_addr) && + IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr, + &pol->ape_policy.addrmask.sin6_addr)) { + break; + } + } + if (pol == NULL) { + ADDRSEL_UNLOCK(); + ADDRSEL_XUNLOCK(); + return (ESRCH); + } + + TAILQ_REMOVE(&V_addrsel_policytab, pol, ape_entry); + ADDRSEL_UNLOCK(); + ADDRSEL_XUNLOCK(); + + return (0); +} + +static int +walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *), + void *w) +{ + struct addrsel_policyent *pol; + int error = 0; + + ADDRSEL_SLOCK(); + TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) { + if ((error = (*callback)(&pol->ape_policy, w)) != 0) { + ADDRSEL_SUNLOCK(); + return (error); + } + } + ADDRSEL_SUNLOCK(); + return (error); +} + +static int +dump_addrsel_policyent(struct in6_addrpolicy *pol, void *arg) +{ + int error = 0; + struct walkarg *w = arg; + + error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol)); + + return (error); +} + +static struct in6_addrpolicy * +match_addrsel_policy(struct sockaddr_in6 *key) +{ + struct addrsel_policyent *pent; + struct in6_addrpolicy *bestpol = NULL, *pol; + int matchlen, bestmatchlen = -1; + u_char *mp, *ep, *k, *p, m; + + TAILQ_FOREACH(pent, &V_addrsel_policytab, ape_entry) { + matchlen = 0; + + pol = &pent->ape_policy; + mp = (u_char *)&pol->addrmask.sin6_addr; + ep = mp + 16; /* XXX: scope field? */ + k = (u_char *)&key->sin6_addr; + p = (u_char *)&pol->addr.sin6_addr; + for (; mp < ep && *mp; mp++, k++, p++) { + m = *mp; + if ((*k & m) != *p) + goto next; /* not match */ + if (m == 0xff) /* short cut for a typical case */ + matchlen += 8; + else { + while (m >= 0x80) { + matchlen++; + m <<= 1; + } + } + } + + /* matched. check if this is better than the current best. */ + if (bestpol == NULL || + matchlen > bestmatchlen) { + bestpol = pol; + bestmatchlen = matchlen; + } + + next: + continue; + } + + return (bestpol); +} diff --git a/freebsd/sys/netinet6/in6_var.h b/freebsd/sys/netinet6/in6_var.h new file mode 100644 index 00000000..f77a3271 --- /dev/null +++ b/freebsd/sys/netinet6/in6_var.h @@ -0,0 +1,786 @@ +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: in6_var.h,v 1.56 2001/03/29 05:34:31 itojun Exp $ + */ + +/*- + * Copyright (c) 1985, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_var.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD$ + */ + +#ifndef _NETINET6_IN6_VAR_HH_ +#define _NETINET6_IN6_VAR_HH_ + +#include <freebsd/sys/tree.h> + +#ifdef _KERNEL +#include <freebsd/sys/libkern.h> +#endif + +/* + * Interface address, Internet version. One of these structures + * is allocated for each interface with an Internet address. + * The ifaddr structure contains the protocol-independent part + * of the structure and is assumed to be first. + */ + +/* + * pltime/vltime are just for future reference (required to implements 2 + * hour rule for hosts). they should never be modified by nd6_timeout or + * anywhere else. + * userland -> kernel: accept pltime/vltime + * kernel -> userland: throw up everything + * in kernel: modify preferred/expire only + */ +struct in6_addrlifetime { + time_t ia6t_expire; /* valid lifetime expiration time */ + time_t ia6t_preferred; /* preferred lifetime expiration time */ + u_int32_t ia6t_vltime; /* valid lifetime */ + u_int32_t ia6t_pltime; /* prefix lifetime */ +}; + +struct nd_ifinfo; +struct scope6_id; +struct lltable; +struct mld_ifinfo; + +struct in6_ifextra { + struct in6_ifstat *in6_ifstat; + struct icmp6_ifstat *icmp6_ifstat; + struct nd_ifinfo *nd_ifinfo; + struct scope6_id *scope6_id; + struct lltable *lltable; + struct mld_ifinfo *mld_ifinfo; +}; + +#define LLTABLE6(ifp) (((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->lltable) + +struct in6_ifaddr { + struct ifaddr ia_ifa; /* protocol-independent info */ +#define ia_ifp ia_ifa.ifa_ifp +#define ia_flags ia_ifa.ifa_flags + struct sockaddr_in6 ia_addr; /* interface address */ + struct sockaddr_in6 ia_net; /* network number of interface */ + struct sockaddr_in6 ia_dstaddr; /* space for destination addr */ + struct sockaddr_in6 ia_prefixmask; /* prefix mask */ + u_int32_t ia_plen; /* prefix length */ + TAILQ_ENTRY(in6_ifaddr) ia_link; /* list of IPv6 addresses */ + int ia6_flags; + + struct in6_addrlifetime ia6_lifetime; + time_t ia6_createtime; /* the creation time of this address, which is + * currently used for temporary addresses only. + */ + time_t ia6_updatetime; + + /* back pointer to the ND prefix (for autoconfigured addresses only) */ + struct nd_prefix *ia6_ndpr; + + /* multicast addresses joined from the kernel */ + LIST_HEAD(, in6_multi_mship) ia6_memberships; +}; + +/* List of in6_ifaddr's. */ +TAILQ_HEAD(in6_ifaddrhead, in6_ifaddr); + +/* control structure to manage address selection policy */ +struct in6_addrpolicy { + struct sockaddr_in6 addr; /* prefix address */ + struct sockaddr_in6 addrmask; /* prefix mask */ + int preced; /* precedence */ + int label; /* matching label */ + u_quad_t use; /* statistics */ +}; + +/* + * IPv6 interface statistics, as defined in RFC2465 Ipv6IfStatsEntry (p12). + */ +struct in6_ifstat { + u_quad_t ifs6_in_receive; /* # of total input datagram */ + u_quad_t ifs6_in_hdrerr; /* # of datagrams with invalid hdr */ + u_quad_t ifs6_in_toobig; /* # of datagrams exceeded MTU */ + u_quad_t ifs6_in_noroute; /* # of datagrams with no route */ + u_quad_t ifs6_in_addrerr; /* # of datagrams with invalid dst */ + u_quad_t ifs6_in_protounknown; /* # of datagrams with unknown proto */ + /* NOTE: increment on final dst if */ + u_quad_t ifs6_in_truncated; /* # of truncated datagrams */ + u_quad_t ifs6_in_discard; /* # of discarded datagrams */ + /* NOTE: fragment timeout is not here */ + u_quad_t ifs6_in_deliver; /* # of datagrams delivered to ULP */ + /* NOTE: increment on final dst if */ + u_quad_t ifs6_out_forward; /* # of datagrams forwarded */ + /* NOTE: increment on outgoing if */ + u_quad_t ifs6_out_request; /* # of outgoing datagrams from ULP */ + /* NOTE: does not include forwrads */ + u_quad_t ifs6_out_discard; /* # of discarded datagrams */ + u_quad_t ifs6_out_fragok; /* # of datagrams fragmented */ + u_quad_t ifs6_out_fragfail; /* # of datagrams failed on fragment */ + u_quad_t ifs6_out_fragcreat; /* # of fragment datagrams */ + /* NOTE: this is # after fragment */ + u_quad_t ifs6_reass_reqd; /* # of incoming fragmented packets */ + /* NOTE: increment on final dst if */ + u_quad_t ifs6_reass_ok; /* # of reassembled packets */ + /* NOTE: this is # after reass */ + /* NOTE: increment on final dst if */ + u_quad_t ifs6_reass_fail; /* # of reass failures */ + /* NOTE: may not be packet count */ + /* NOTE: increment on final dst if */ + u_quad_t ifs6_in_mcast; /* # of inbound multicast datagrams */ + u_quad_t ifs6_out_mcast; /* # of outbound multicast datagrams */ +}; + +/* + * ICMPv6 interface statistics, as defined in RFC2466 Ipv6IfIcmpEntry. + * XXX: I'm not sure if this file is the right place for this structure... + */ +struct icmp6_ifstat { + /* + * Input statistics + */ + /* ipv6IfIcmpInMsgs, total # of input messages */ + u_quad_t ifs6_in_msg; + /* ipv6IfIcmpInErrors, # of input error messages */ + u_quad_t ifs6_in_error; + /* ipv6IfIcmpInDestUnreachs, # of input dest unreach errors */ + u_quad_t ifs6_in_dstunreach; + /* ipv6IfIcmpInAdminProhibs, # of input administratively prohibited errs */ + u_quad_t ifs6_in_adminprohib; + /* ipv6IfIcmpInTimeExcds, # of input time exceeded errors */ + u_quad_t ifs6_in_timeexceed; + /* ipv6IfIcmpInParmProblems, # of input parameter problem errors */ + u_quad_t ifs6_in_paramprob; + /* ipv6IfIcmpInPktTooBigs, # of input packet too big errors */ + u_quad_t ifs6_in_pkttoobig; + /* ipv6IfIcmpInEchos, # of input echo requests */ + u_quad_t ifs6_in_echo; + /* ipv6IfIcmpInEchoReplies, # of input echo replies */ + u_quad_t ifs6_in_echoreply; + /* ipv6IfIcmpInRouterSolicits, # of input router solicitations */ + u_quad_t ifs6_in_routersolicit; + /* ipv6IfIcmpInRouterAdvertisements, # of input router advertisements */ + u_quad_t ifs6_in_routeradvert; + /* ipv6IfIcmpInNeighborSolicits, # of input neighbor solicitations */ + u_quad_t ifs6_in_neighborsolicit; + /* ipv6IfIcmpInNeighborAdvertisements, # of input neighbor advertisements */ + u_quad_t ifs6_in_neighboradvert; + /* ipv6IfIcmpInRedirects, # of input redirects */ + u_quad_t ifs6_in_redirect; + /* ipv6IfIcmpInGroupMembQueries, # of input MLD queries */ + u_quad_t ifs6_in_mldquery; + /* ipv6IfIcmpInGroupMembResponses, # of input MLD reports */ + u_quad_t ifs6_in_mldreport; + /* ipv6IfIcmpInGroupMembReductions, # of input MLD done */ + u_quad_t ifs6_in_mlddone; + + /* + * Output statistics. We should solve unresolved routing problem... + */ + /* ipv6IfIcmpOutMsgs, total # of output messages */ + u_quad_t ifs6_out_msg; + /* ipv6IfIcmpOutErrors, # of output error messages */ + u_quad_t ifs6_out_error; + /* ipv6IfIcmpOutDestUnreachs, # of output dest unreach errors */ + u_quad_t ifs6_out_dstunreach; + /* ipv6IfIcmpOutAdminProhibs, # of output administratively prohibited errs */ + u_quad_t ifs6_out_adminprohib; + /* ipv6IfIcmpOutTimeExcds, # of output time exceeded errors */ + u_quad_t ifs6_out_timeexceed; + /* ipv6IfIcmpOutParmProblems, # of output parameter problem errors */ + u_quad_t ifs6_out_paramprob; + /* ipv6IfIcmpOutPktTooBigs, # of output packet too big errors */ + u_quad_t ifs6_out_pkttoobig; + /* ipv6IfIcmpOutEchos, # of output echo requests */ + u_quad_t ifs6_out_echo; + /* ipv6IfIcmpOutEchoReplies, # of output echo replies */ + u_quad_t ifs6_out_echoreply; + /* ipv6IfIcmpOutRouterSolicits, # of output router solicitations */ + u_quad_t ifs6_out_routersolicit; + /* ipv6IfIcmpOutRouterAdvertisements, # of output router advertisements */ + u_quad_t ifs6_out_routeradvert; + /* ipv6IfIcmpOutNeighborSolicits, # of output neighbor solicitations */ + u_quad_t ifs6_out_neighborsolicit; + /* ipv6IfIcmpOutNeighborAdvertisements, # of output neighbor advertisements */ + u_quad_t ifs6_out_neighboradvert; + /* ipv6IfIcmpOutRedirects, # of output redirects */ + u_quad_t ifs6_out_redirect; + /* ipv6IfIcmpOutGroupMembQueries, # of output MLD queries */ + u_quad_t ifs6_out_mldquery; + /* ipv6IfIcmpOutGroupMembResponses, # of output MLD reports */ + u_quad_t ifs6_out_mldreport; + /* ipv6IfIcmpOutGroupMembReductions, # of output MLD done */ + u_quad_t ifs6_out_mlddone; +}; + +struct in6_ifreq { + char ifr_name[IFNAMSIZ]; + union { + struct sockaddr_in6 ifru_addr; + struct sockaddr_in6 ifru_dstaddr; + int ifru_flags; + int ifru_flags6; + int ifru_metric; + caddr_t ifru_data; + struct in6_addrlifetime ifru_lifetime; + struct in6_ifstat ifru_stat; + struct icmp6_ifstat ifru_icmp6stat; + u_int32_t ifru_scope_id[16]; + } ifr_ifru; +}; + +struct in6_aliasreq { + char ifra_name[IFNAMSIZ]; + struct sockaddr_in6 ifra_addr; + struct sockaddr_in6 ifra_dstaddr; + struct sockaddr_in6 ifra_prefixmask; + int ifra_flags; + struct in6_addrlifetime ifra_lifetime; +}; + +/* prefix type macro */ +#define IN6_PREFIX_ND 1 +#define IN6_PREFIX_RR 2 + +/* + * prefix related flags passed between kernel(NDP related part) and + * user land command(ifconfig) and daemon(rtadvd). + */ +struct in6_prflags { + struct prf_ra { + u_char onlink : 1; + u_char autonomous : 1; + u_char reserved : 6; + } prf_ra; + u_char prf_reserved1; + u_short prf_reserved2; + /* want to put this on 4byte offset */ + struct prf_rr { + u_char decrvalid : 1; + u_char decrprefd : 1; + u_char reserved : 6; + } prf_rr; + u_char prf_reserved3; + u_short prf_reserved4; +}; + +struct in6_prefixreq { + char ipr_name[IFNAMSIZ]; + u_char ipr_origin; + u_char ipr_plen; + u_int32_t ipr_vltime; + u_int32_t ipr_pltime; + struct in6_prflags ipr_flags; + struct sockaddr_in6 ipr_prefix; +}; + +#define PR_ORIG_RA 0 +#define PR_ORIG_RR 1 +#define PR_ORIG_STATIC 2 +#define PR_ORIG_KERNEL 3 + +#define ipr_raf_onlink ipr_flags.prf_ra.onlink +#define ipr_raf_auto ipr_flags.prf_ra.autonomous + +#define ipr_statef_onlink ipr_flags.prf_state.onlink + +#define ipr_rrf_decrvalid ipr_flags.prf_rr.decrvalid +#define ipr_rrf_decrprefd ipr_flags.prf_rr.decrprefd + +struct in6_rrenumreq { + char irr_name[IFNAMSIZ]; + u_char irr_origin; + u_char irr_m_len; /* match len for matchprefix */ + u_char irr_m_minlen; /* minlen for matching prefix */ + u_char irr_m_maxlen; /* maxlen for matching prefix */ + u_char irr_u_uselen; /* uselen for adding prefix */ + u_char irr_u_keeplen; /* keeplen from matching prefix */ + struct irr_raflagmask { + u_char onlink : 1; + u_char autonomous : 1; + u_char reserved : 6; + } irr_raflagmask; + u_int32_t irr_vltime; + u_int32_t irr_pltime; + struct in6_prflags irr_flags; + struct sockaddr_in6 irr_matchprefix; + struct sockaddr_in6 irr_useprefix; +}; + +#define irr_raf_mask_onlink irr_raflagmask.onlink +#define irr_raf_mask_auto irr_raflagmask.autonomous +#define irr_raf_mask_reserved irr_raflagmask.reserved + +#define irr_raf_onlink irr_flags.prf_ra.onlink +#define irr_raf_auto irr_flags.prf_ra.autonomous + +#define irr_statef_onlink irr_flags.prf_state.onlink + +#define irr_rrf irr_flags.prf_rr +#define irr_rrf_decrvalid irr_flags.prf_rr.decrvalid +#define irr_rrf_decrprefd irr_flags.prf_rr.decrprefd + +/* + * Given a pointer to an in6_ifaddr (ifaddr), + * return a pointer to the addr as a sockaddr_in6 + */ +#define IA6_IN6(ia) (&((ia)->ia_addr.sin6_addr)) +#define IA6_DSTIN6(ia) (&((ia)->ia_dstaddr.sin6_addr)) +#define IA6_MASKIN6(ia) (&((ia)->ia_prefixmask.sin6_addr)) +#define IA6_SIN6(ia) (&((ia)->ia_addr)) +#define IA6_DSTSIN6(ia) (&((ia)->ia_dstaddr)) +#define IFA_IN6(x) (&((struct sockaddr_in6 *)((x)->ifa_addr))->sin6_addr) +#define IFA_DSTIN6(x) (&((struct sockaddr_in6 *)((x)->ifa_dstaddr))->sin6_addr) + +#define IFPR_IN6(x) (&((struct sockaddr_in6 *)((x)->ifpr_prefix))->sin6_addr) + +#ifdef _KERNEL +#define IN6_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \ + (((d)->s6_addr32[0] ^ (a)->s6_addr32[0]) & (m)->s6_addr32[0]) == 0 && \ + (((d)->s6_addr32[1] ^ (a)->s6_addr32[1]) & (m)->s6_addr32[1]) == 0 && \ + (((d)->s6_addr32[2] ^ (a)->s6_addr32[2]) & (m)->s6_addr32[2]) == 0 && \ + (((d)->s6_addr32[3] ^ (a)->s6_addr32[3]) & (m)->s6_addr32[3]) == 0 ) +#endif + +#define SIOCSIFADDR_IN6 _IOW('i', 12, struct in6_ifreq) +#define SIOCGIFADDR_IN6 _IOWR('i', 33, struct in6_ifreq) + +#ifdef _KERNEL +/* + * SIOCSxxx ioctls should be unused (see comments in in6.c), but + * we do not shift numbers for binary compatibility. + */ +#define SIOCSIFDSTADDR_IN6 _IOW('i', 14, struct in6_ifreq) +#define SIOCSIFNETMASK_IN6 _IOW('i', 22, struct in6_ifreq) +#endif + +#define SIOCGIFDSTADDR_IN6 _IOWR('i', 34, struct in6_ifreq) +#define SIOCGIFNETMASK_IN6 _IOWR('i', 37, struct in6_ifreq) + +#define SIOCDIFADDR_IN6 _IOW('i', 25, struct in6_ifreq) +#define SIOCAIFADDR_IN6 _IOW('i', 26, struct in6_aliasreq) + +#define SIOCSIFPHYADDR_IN6 _IOW('i', 70, struct in6_aliasreq) +#define SIOCGIFPSRCADDR_IN6 _IOWR('i', 71, struct in6_ifreq) +#define SIOCGIFPDSTADDR_IN6 _IOWR('i', 72, struct in6_ifreq) + +#define SIOCGIFAFLAG_IN6 _IOWR('i', 73, struct in6_ifreq) + +#define SIOCGDRLST_IN6 _IOWR('i', 74, struct in6_drlist) +#ifdef _KERNEL +/* XXX: SIOCGPRLST_IN6 is exposed in KAME but in6_oprlist is not. */ +#define SIOCGPRLST_IN6 _IOWR('i', 75, struct in6_oprlist) +#endif +#ifdef _KERNEL +#define OSIOCGIFINFO_IN6 _IOWR('i', 76, struct in6_ondireq) +#endif +#define SIOCGIFINFO_IN6 _IOWR('i', 108, struct in6_ndireq) +#define SIOCSIFINFO_IN6 _IOWR('i', 109, struct in6_ndireq) +#define SIOCSNDFLUSH_IN6 _IOWR('i', 77, struct in6_ifreq) +#define SIOCGNBRINFO_IN6 _IOWR('i', 78, struct in6_nbrinfo) +#define SIOCSPFXFLUSH_IN6 _IOWR('i', 79, struct in6_ifreq) +#define SIOCSRTRFLUSH_IN6 _IOWR('i', 80, struct in6_ifreq) + +#define SIOCGIFALIFETIME_IN6 _IOWR('i', 81, struct in6_ifreq) +#define SIOCSIFALIFETIME_IN6 _IOWR('i', 82, struct in6_ifreq) +#define SIOCGIFSTAT_IN6 _IOWR('i', 83, struct in6_ifreq) +#define SIOCGIFSTAT_ICMP6 _IOWR('i', 84, struct in6_ifreq) + +#define SIOCSDEFIFACE_IN6 _IOWR('i', 85, struct in6_ndifreq) +#define SIOCGDEFIFACE_IN6 _IOWR('i', 86, struct in6_ndifreq) + +#define SIOCSIFINFO_FLAGS _IOWR('i', 87, struct in6_ndireq) /* XXX */ + +#define SIOCSSCOPE6 _IOW('i', 88, struct in6_ifreq) +#define SIOCGSCOPE6 _IOWR('i', 89, struct in6_ifreq) +#define SIOCGSCOPE6DEF _IOWR('i', 90, struct in6_ifreq) + +#define SIOCSIFPREFIX_IN6 _IOW('i', 100, struct in6_prefixreq) /* set */ +#define SIOCGIFPREFIX_IN6 _IOWR('i', 101, struct in6_prefixreq) /* get */ +#define SIOCDIFPREFIX_IN6 _IOW('i', 102, struct in6_prefixreq) /* del */ +#define SIOCAIFPREFIX_IN6 _IOW('i', 103, struct in6_rrenumreq) /* add */ +#define SIOCCIFPREFIX_IN6 _IOW('i', 104, \ + struct in6_rrenumreq) /* change */ +#define SIOCSGIFPREFIX_IN6 _IOW('i', 105, \ + struct in6_rrenumreq) /* set global */ + +#define SIOCGETSGCNT_IN6 _IOWR('u', 106, \ + struct sioc_sg_req6) /* get s,g pkt cnt */ +#define SIOCGETMIFCNT_IN6 _IOWR('u', 107, \ + struct sioc_mif_req6) /* get pkt cnt per if */ + +#define SIOCAADDRCTL_POLICY _IOW('u', 108, struct in6_addrpolicy) +#define SIOCDADDRCTL_POLICY _IOW('u', 109, struct in6_addrpolicy) + +#define IN6_IFF_ANYCAST 0x01 /* anycast address */ +#define IN6_IFF_TENTATIVE 0x02 /* tentative address */ +#define IN6_IFF_DUPLICATED 0x04 /* DAD detected duplicate */ +#define IN6_IFF_DETACHED 0x08 /* may be detached from the link */ +#define IN6_IFF_DEPRECATED 0x10 /* deprecated address */ +#define IN6_IFF_NODAD 0x20 /* don't perform DAD on this address + * (used only at first SIOC* call) + */ +#define IN6_IFF_AUTOCONF 0x40 /* autoconfigurable address. */ +#define IN6_IFF_TEMPORARY 0x80 /* temporary (anonymous) address. */ +#define IN6_IFF_NOPFX 0x8000 /* skip kernel prefix management. + * XXX: this should be temporary. + */ + +/* do not input/output */ +#define IN6_IFF_NOTREADY (IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED) + +#ifdef _KERNEL +#define IN6_ARE_SCOPE_CMP(a,b) ((a)-(b)) +#define IN6_ARE_SCOPE_EQUAL(a,b) ((a)==(b)) +#endif + +#ifdef _KERNEL +VNET_DECLARE(struct in6_ifaddrhead, in6_ifaddrhead); +#define V_in6_ifaddrhead VNET(in6_ifaddrhead) + +extern struct rwlock in6_ifaddr_lock; +#define IN6_IFADDR_LOCK_ASSERT( ) rw_assert(&in6_ifaddr_lock, RA_LOCKED) +#define IN6_IFADDR_RLOCK() rw_rlock(&in6_ifaddr_lock) +#define IN6_IFADDR_RLOCK_ASSERT() rw_assert(&in6_ifaddr_lock, RA_RLOCKED) +#define IN6_IFADDR_RUNLOCK() rw_runlock(&in6_ifaddr_lock) +#define IN6_IFADDR_WLOCK() rw_wlock(&in6_ifaddr_lock) +#define IN6_IFADDR_WLOCK_ASSERT() rw_assert(&in6_ifaddr_lock, RA_WLOCKED) +#define IN6_IFADDR_WUNLOCK() rw_wunlock(&in6_ifaddr_lock) + +VNET_DECLARE(struct icmp6stat, icmp6stat); +#define V_icmp6stat VNET(icmp6stat) +#define in6_ifstat_inc(ifp, tag) \ +do { \ + if (ifp) \ + ((struct in6_ifextra *)((ifp)->if_afdata[AF_INET6]))->in6_ifstat->tag++; \ +} while (/*CONSTCOND*/ 0) + +extern struct in6_addr zeroin6_addr; +extern u_char inet6ctlerrmap[]; +VNET_DECLARE(unsigned long, in6_maxmtu); +#define V_in6_maxmtu VNET(in6_maxmtu) +#endif /* _KERNEL */ + +/* + * IPv6 multicast MLD-layer source entry. + */ +struct ip6_msource { + RB_ENTRY(ip6_msource) im6s_link; /* RB tree links */ + struct in6_addr im6s_addr; + struct im6s_st { + uint16_t ex; /* # of exclusive members */ + uint16_t in; /* # of inclusive members */ + } im6s_st[2]; /* state at t0, t1 */ + uint8_t im6s_stp; /* pending query */ +}; +RB_HEAD(ip6_msource_tree, ip6_msource); + +/* + * IPv6 multicast PCB-layer source entry. + * + * NOTE: overlapping use of struct ip6_msource fields at start. + */ +struct in6_msource { + RB_ENTRY(ip6_msource) im6s_link; /* Common field */ + struct in6_addr im6s_addr; /* Common field */ + uint8_t im6sl_st[2]; /* state before/at commit */ +}; + +#ifdef _KERNEL +/* + * IPv6 source tree comparison function. + * + * An ordered predicate is necessary; bcmp() is not documented to return + * an indication of order, memcmp() is, and is an ISO C99 requirement. + */ +static __inline int +ip6_msource_cmp(const struct ip6_msource *a, const struct ip6_msource *b) +{ + + return (memcmp(&a->im6s_addr, &b->im6s_addr, sizeof(struct in6_addr))); +} +RB_PROTOTYPE(ip6_msource_tree, ip6_msource, im6s_link, ip6_msource_cmp); +#endif /* _KERNEL */ + +/* + * IPv6 multicast PCB-layer group filter descriptor. + */ +struct in6_mfilter { + struct ip6_msource_tree im6f_sources; /* source list for (S,G) */ + u_long im6f_nsrc; /* # of source entries */ + uint8_t im6f_st[2]; /* state before/at commit */ +}; + +/* + * Legacy KAME IPv6 multicast membership descriptor. + */ +struct in6_multi_mship { + struct in6_multi *i6mm_maddr; + LIST_ENTRY(in6_multi_mship) i6mm_chain; +}; + +/* + * IPv6 group descriptor. + * + * For every entry on an ifnet's if_multiaddrs list which represents + * an IP multicast group, there is one of these structures. + * + * If any source filters are present, then a node will exist in the RB-tree + * to permit fast lookup by source whenever an operation takes place. + * This permits pre-order traversal when we issue reports. + * Source filter trees are kept separately from the socket layer to + * greatly simplify locking. + * + * When MLDv2 is active, in6m_timer is the response to group query timer. + * The state-change timer in6m_sctimer is separate; whenever state changes + * for the group the state change record is generated and transmitted, + * and kept if retransmissions are necessary. + * + * FUTURE: in6m_link is now only used when groups are being purged + * on a detaching ifnet. It could be demoted to a SLIST_ENTRY, but + * because it is at the very start of the struct, we can't do this + * w/o breaking the ABI for ifmcstat. + */ +struct in6_multi { + LIST_ENTRY(in6_multi) in6m_entry; /* list glue */ + struct in6_addr in6m_addr; /* IPv6 multicast address */ + struct ifnet *in6m_ifp; /* back pointer to ifnet */ + struct ifmultiaddr *in6m_ifma; /* back pointer to ifmultiaddr */ + u_int in6m_refcount; /* reference count */ + u_int in6m_state; /* state of the membership */ + u_int in6m_timer; /* MLD6 listener report timer */ + + /* New fields for MLDv2 follow. */ + struct mld_ifinfo *in6m_mli; /* MLD info */ + SLIST_ENTRY(in6_multi) in6m_nrele; /* to-be-released by MLD */ + struct ip6_msource_tree in6m_srcs; /* tree of sources */ + u_long in6m_nsrc; /* # of tree entries */ + + struct ifqueue in6m_scq; /* queue of pending + * state-change packets */ + struct timeval in6m_lastgsrtv; /* last G-S-R query */ + uint16_t in6m_sctimer; /* state-change timer */ + uint16_t in6m_scrv; /* state-change rexmit count */ + + /* + * SSM state counters which track state at T0 (the time the last + * state-change report's RV timer went to zero) and T1 + * (time of pending report, i.e. now). + * Used for computing MLDv2 state-change reports. Several refcounts + * are maintained here to optimize for common use-cases. + */ + struct in6m_st { + uint16_t iss_fmode; /* MLD filter mode */ + uint16_t iss_asm; /* # of ASM listeners */ + uint16_t iss_ex; /* # of exclusive members */ + uint16_t iss_in; /* # of inclusive members */ + uint16_t iss_rec; /* # of recorded sources */ + } in6m_st[2]; /* state at t0, t1 */ +}; + +/* + * Helper function to derive the filter mode on a source entry + * from its internal counters. Predicates are: + * A source is only excluded if all listeners exclude it. + * A source is only included if no listeners exclude it, + * and at least one listener includes it. + * May be used by ifmcstat(8). + */ +static __inline uint8_t +im6s_get_mode(const struct in6_multi *inm, const struct ip6_msource *ims, + uint8_t t) +{ + + t = !!t; + if (inm->in6m_st[t].iss_ex > 0 && + inm->in6m_st[t].iss_ex == ims->im6s_st[t].ex) + return (MCAST_EXCLUDE); + else if (ims->im6s_st[t].in > 0 && ims->im6s_st[t].ex == 0) + return (MCAST_INCLUDE); + return (MCAST_UNDEFINED); +} + +#ifdef _KERNEL + +/* + * Lock macros for IPv6 layer multicast address lists. IPv6 lock goes + * before link layer multicast locks in the lock order. In most cases, + * consumers of IN_*_MULTI() macros should acquire the locks before + * calling them; users of the in_{add,del}multi() functions should not. + */ +extern struct mtx in6_multi_mtx; +#define IN6_MULTI_LOCK() mtx_lock(&in6_multi_mtx) +#define IN6_MULTI_UNLOCK() mtx_unlock(&in6_multi_mtx) +#define IN6_MULTI_LOCK_ASSERT() mtx_assert(&in6_multi_mtx, MA_OWNED) +#define IN6_MULTI_UNLOCK_ASSERT() mtx_assert(&in6_multi_mtx, MA_NOTOWNED) + +/* + * Look up an in6_multi record for an IPv6 multicast address + * on the interface ifp. + * If no record found, return NULL. + * + * SMPng: The IN6_MULTI_LOCK and IF_ADDR_LOCK on ifp must be held. + */ +static __inline struct in6_multi * +in6m_lookup_locked(struct ifnet *ifp, const struct in6_addr *mcaddr) +{ + struct ifmultiaddr *ifma; + struct in6_multi *inm; + + IN6_MULTI_LOCK_ASSERT(); + IF_ADDR_LOCK_ASSERT(ifp); + + inm = NULL; + TAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { + if (ifma->ifma_addr->sa_family == AF_INET6) { + inm = (struct in6_multi *)ifma->ifma_protospec; + if (IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, mcaddr)) + break; + inm = NULL; + } + } + return (inm); +} + +/* + * Wrapper for in6m_lookup_locked(). + * + * SMPng: Assumes that neithr the IN6_MULTI_LOCK() or IF_ADDR_LOCK() are held. + */ +static __inline struct in6_multi * +in6m_lookup(struct ifnet *ifp, const struct in6_addr *mcaddr) +{ + struct in6_multi *inm; + + IN6_MULTI_LOCK(); + IF_ADDR_LOCK(ifp); + inm = in6m_lookup_locked(ifp, mcaddr); + IF_ADDR_UNLOCK(ifp); + IN6_MULTI_UNLOCK(); + + return (inm); +} + +/* Acquire an in6_multi record. */ +static __inline void +in6m_acquire_locked(struct in6_multi *inm) +{ + + IN6_MULTI_LOCK_ASSERT(); + ++inm->in6m_refcount; +} + +struct ip6_moptions; +struct sockopt; + +/* Multicast KPIs. */ +int im6o_mc_filter(const struct ip6_moptions *, const struct ifnet *, + const struct sockaddr *, const struct sockaddr *); +int in6_mc_join(struct ifnet *, const struct in6_addr *, + struct in6_mfilter *, struct in6_multi **, int); +int in6_mc_join_locked(struct ifnet *, const struct in6_addr *, + struct in6_mfilter *, struct in6_multi **, int); +int in6_mc_leave(struct in6_multi *, struct in6_mfilter *); +int in6_mc_leave_locked(struct in6_multi *, struct in6_mfilter *); +void in6m_clear_recorded(struct in6_multi *); +void in6m_commit(struct in6_multi *); +void in6m_print(const struct in6_multi *); +int in6m_record_source(struct in6_multi *, const struct in6_addr *); +void in6m_release_locked(struct in6_multi *); +void ip6_freemoptions(struct ip6_moptions *); +int ip6_getmoptions(struct inpcb *, struct sockopt *); +int ip6_setmoptions(struct inpcb *, struct sockopt *); + +/* Legacy KAME multicast KPIs. */ +struct in6_multi_mship * + in6_joingroup(struct ifnet *, struct in6_addr *, int *, int); +int in6_leavegroup(struct in6_multi_mship *); + +/* flags to in6_update_ifa */ +#define IN6_IFAUPDATE_DADDELAY 0x1 /* first time to configure an address */ + +int in6_mask2len __P((struct in6_addr *, u_char *)); +int in6_control __P((struct socket *, u_long, caddr_t, struct ifnet *, + struct thread *)); +int in6_update_ifa __P((struct ifnet *, struct in6_aliasreq *, + struct in6_ifaddr *, int)); +void in6_purgeaddr __P((struct ifaddr *)); +int in6if_do_dad __P((struct ifnet *)); +void in6_purgeif __P((struct ifnet *)); +void in6_savemkludge __P((struct in6_ifaddr *)); +void *in6_domifattach __P((struct ifnet *)); +void in6_domifdetach __P((struct ifnet *, void *)); +void in6_setmaxmtu __P((void)); +int in6_if2idlen __P((struct ifnet *)); +struct in6_ifaddr *in6ifa_ifpforlinklocal __P((struct ifnet *, int)); +struct in6_ifaddr *in6ifa_ifpwithaddr __P((struct ifnet *, struct in6_addr *)); +char *ip6_sprintf __P((char *, const struct in6_addr *)); +int in6_addr2zoneid __P((struct ifnet *, struct in6_addr *, u_int32_t *)); +int in6_matchlen __P((struct in6_addr *, struct in6_addr *)); +int in6_are_prefix_equal __P((struct in6_addr *, struct in6_addr *, int)); +void in6_prefixlen2mask __P((struct in6_addr *, int)); +int in6_prefix_ioctl __P((struct socket *, u_long, caddr_t, + struct ifnet *)); +int in6_prefix_add_ifid __P((int, struct in6_ifaddr *)); +void in6_prefix_remove_ifid __P((int, struct in6_ifaddr *)); +void in6_purgeprefix __P((struct ifnet *)); +void in6_ifremloop(struct ifaddr *); +void in6_ifaddloop(struct ifaddr *); + +int in6_is_addr_deprecated __P((struct sockaddr_in6 *)); +struct inpcb; +int in6_src_ioctl __P((u_long, caddr_t)); +#endif /* _KERNEL */ + +#endif /* _NETINET6_IN6_VAR_HH_ */ diff --git a/freebsd/sys/netinet6/ip6.h b/freebsd/sys/netinet6/ip6.h new file mode 100644 index 00000000..9eec13fb --- /dev/null +++ b/freebsd/sys/netinet6/ip6.h @@ -0,0 +1,4 @@ +/* $FreeBSD$ */ +/* $KAME: ip6.h,v 1.7 2000/03/25 07:23:36 sumikawa Exp $ */ + +#error "netinet6/ip6.h is obsolete. use netinet/ip6.h" diff --git a/freebsd/sys/netinet6/ip6_ecn.h b/freebsd/sys/netinet6/ip6_ecn.h new file mode 100644 index 00000000..27d3f34d --- /dev/null +++ b/freebsd/sys/netinet6/ip6_ecn.h @@ -0,0 +1,41 @@ +/*- + * Copyright (C) 1999 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: ip_ecn.h,v 1.5 2000/03/27 04:58:38 sumikawa Exp $ + * $FreeBSD$ + */ + +/* + * ECN consideration on tunnel ingress/egress operation. + * http://www.aciri.org/floyd/papers/draft-ipsec-ecn-00.txt + */ + +#ifdef _KERNEL +extern void ip6_ecn_ingress(int, u_int32_t *, const u_int32_t *); +extern int ip6_ecn_egress(int, const u_int32_t *, u_int32_t *); +#endif diff --git a/freebsd/sys/netinet6/ip6_forward.c b/freebsd/sys/netinet6/ip6_forward.c new file mode 100644 index 00000000..f1d19737 --- /dev/null +++ b/freebsd/sys/netinet6/ip6_forward.c @@ -0,0 +1,626 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: ip6_forward.c,v 1.69 2001/05/17 03:48:30 itojun Exp $ + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/local/opt_inet.h> +#include <freebsd/local/opt_inet6.h> +#include <freebsd/local/opt_ipsec.h> +#include <freebsd/local/opt_ipstealth.h> + +#include <freebsd/sys/param.h> +#include <freebsd/sys/systm.h> +#include <freebsd/sys/malloc.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/domain.h> +#include <freebsd/sys/protosw.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/errno.h> +#include <freebsd/sys/time.h> +#include <freebsd/sys/kernel.h> +#include <freebsd/sys/syslog.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/route.h> +#include <freebsd/net/pfil.h> + +#include <freebsd/netinet/in.h> +#include <freebsd/netinet/in_var.h> +#include <freebsd/netinet/in_systm.h> +#include <freebsd/netinet/ip.h> +#include <freebsd/netinet/ip_var.h> +#include <freebsd/netinet6/in6_var.h> +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet6/scope6_var.h> +#include <freebsd/netinet/icmp6.h> +#include <freebsd/netinet6/nd6.h> + +#include <freebsd/netinet/in_pcb.h> + +#ifdef IPSEC +#include <freebsd/netipsec/ipsec.h> +#include <freebsd/netipsec/ipsec6.h> +#include <freebsd/netipsec/key.h> +#endif /* IPSEC */ + +#include <freebsd/netinet6/ip6protosw.h> + +/* + * Forward a packet. If some error occurs return the sender + * an icmp packet. Note we can't always generate a meaningful + * icmp message because icmp doesn't have a large enough repertoire + * of codes and types. + * + * If not forwarding, just drop the packet. This could be confusing + * if ipforwarding was zero but some routing protocol was advancing + * us as a gateway to somewhere. However, we must let the routing + * protocol deal with that. + * + */ +void +ip6_forward(struct mbuf *m, int srcrt) +{ + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + struct sockaddr_in6 *dst = NULL; + struct rtentry *rt = NULL; + struct route_in6 rin6; + int error, type = 0, code = 0; + struct mbuf *mcopy = NULL; + struct ifnet *origifp; /* maybe unnecessary */ + u_int32_t inzone, outzone; + struct in6_addr src_in6, dst_in6; +#ifdef IPSEC + struct secpolicy *sp = NULL; + int ipsecrt = 0; +#endif + char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; + +#ifdef IPSEC + /* + * Check AH/ESP integrity. + */ + /* + * Don't increment ip6s_cantforward because this is the check + * before forwarding packet actually. + */ + if (ipsec6_in_reject(m, NULL)) { + V_ipsec6stat.in_polvio++; + m_freem(m); + return; + } +#endif /* IPSEC */ + + /* + * Do not forward packets to multicast destination (should be handled + * by ip6_mforward(). + * Do not forward packets with unspecified source. It was discussed + * in July 2000, on the ipngwg mailing list. + */ + if ((m->m_flags & (M_BCAST|M_MCAST)) != 0 || + IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || + IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { + V_ip6stat.ip6s_cantforward++; + /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */ + if (V_ip6_log_time + V_ip6_log_interval < time_second) { + V_ip6_log_time = time_second; + log(LOG_DEBUG, + "cannot forward " + "from %s to %s nxt %d received on %s\n", + ip6_sprintf(ip6bufs, &ip6->ip6_src), + ip6_sprintf(ip6bufd, &ip6->ip6_dst), + ip6->ip6_nxt, + if_name(m->m_pkthdr.rcvif)); + } + m_freem(m); + return; + } + +#ifdef IPSTEALTH + if (!V_ip6stealth) { +#endif + if (ip6->ip6_hlim <= IPV6_HLIMDEC) { + /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */ + icmp6_error(m, ICMP6_TIME_EXCEEDED, + ICMP6_TIME_EXCEED_TRANSIT, 0); + return; + } + ip6->ip6_hlim -= IPV6_HLIMDEC; + +#ifdef IPSTEALTH + } +#endif + + /* + * Save at most ICMPV6_PLD_MAXLEN (= the min IPv6 MTU - + * size of IPv6 + ICMPv6 headers) bytes of the packet in case + * we need to generate an ICMP6 message to the src. + * Thanks to M_EXT, in most cases copy will not occur. + * + * It is important to save it before IPsec processing as IPsec + * processing may modify the mbuf. + */ + mcopy = m_copy(m, 0, imin(m->m_pkthdr.len, ICMPV6_PLD_MAXLEN)); + +#ifdef IPSEC + /* get a security policy for this packet */ + sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, + IP_FORWARDING, &error); + if (sp == NULL) { + V_ipsec6stat.out_inval++; + V_ip6stat.ip6s_cantforward++; + if (mcopy) { +#if 0 + /* XXX: what icmp ? */ +#else + m_freem(mcopy); +#endif + } + m_freem(m); + return; + } + + error = 0; + + /* check policy */ + switch (sp->policy) { + case IPSEC_POLICY_DISCARD: + /* + * This packet is just discarded. + */ + V_ipsec6stat.out_polvio++; + V_ip6stat.ip6s_cantforward++; + KEY_FREESP(&sp); + if (mcopy) { +#if 0 + /* XXX: what icmp ? */ +#else + m_freem(mcopy); +#endif + } + m_freem(m); + return; + + case IPSEC_POLICY_BYPASS: + case IPSEC_POLICY_NONE: + /* no need to do IPsec. */ + KEY_FREESP(&sp); + goto skip_ipsec; + + case IPSEC_POLICY_IPSEC: + if (sp->req == NULL) { + /* XXX should be panic ? */ + printf("ip6_forward: No IPsec request specified.\n"); + V_ip6stat.ip6s_cantforward++; + KEY_FREESP(&sp); + if (mcopy) { +#if 0 + /* XXX: what icmp ? */ +#else + m_freem(mcopy); +#endif + } + m_freem(m); + return; + } + /* do IPsec */ + break; + + case IPSEC_POLICY_ENTRUST: + default: + /* should be panic ?? */ + printf("ip6_forward: Invalid policy found. %d\n", sp->policy); + KEY_FREESP(&sp); + goto skip_ipsec; + } + + { + struct ipsecrequest *isr = NULL; + struct ipsec_output_state state; + + /* + * when the kernel forwards a packet, it is not proper to apply + * IPsec transport mode to the packet is not proper. this check + * avoid from this. + * at present, if there is even a transport mode SA request in the + * security policy, the kernel does not apply IPsec to the packet. + * this check is not enough because the following case is valid. + * ipsec esp/tunnel/xxx-xxx/require esp/transport//require; + */ + for (isr = sp->req; isr; isr = isr->next) { + if (isr->saidx.mode == IPSEC_MODE_ANY) + goto doipsectunnel; + if (isr->saidx.mode == IPSEC_MODE_TUNNEL) + goto doipsectunnel; + } + + /* + * if there's no need for tunnel mode IPsec, skip. + */ + if (!isr) + goto skip_ipsec; + + doipsectunnel: + /* + * All the extension headers will become inaccessible + * (since they can be encrypted). + * Don't panic, we need no more updates to extension headers + * on inner IPv6 packet (since they are now encapsulated). + * + * IPv6 [ESP|AH] IPv6 [extension headers] payload + */ + bzero(&state, sizeof(state)); + state.m = m; + state.ro = NULL; /* update at ipsec6_output_tunnel() */ + state.dst = NULL; /* update at ipsec6_output_tunnel() */ + + error = ipsec6_output_tunnel(&state, sp, 0); + + m = state.m; + KEY_FREESP(&sp); + + if (error) { + /* mbuf is already reclaimed in ipsec6_output_tunnel. */ + switch (error) { + case EHOSTUNREACH: + case ENETUNREACH: + case EMSGSIZE: + case ENOBUFS: + case ENOMEM: + break; + default: + printf("ip6_output (ipsec): error code %d\n", error); + /* FALLTHROUGH */ + case ENOENT: + /* don't show these error codes to the user */ + break; + } + V_ip6stat.ip6s_cantforward++; + if (mcopy) { +#if 0 + /* XXX: what icmp ? */ +#else + m_freem(mcopy); +#endif + } + m_freem(m); + return; + } else { + /* + * In the FAST IPSec case we have already + * re-injected the packet and it has been freed + * by the ipsec_done() function. So, just clean + * up after ourselves. + */ + m = NULL; + goto freecopy; + } + + if ((m != NULL) && (ip6 != mtod(m, struct ip6_hdr *)) ){ + /* + * now tunnel mode headers are added. we are originating + * packet instead of forwarding the packet. + */ + ip6_output(m, NULL, NULL, IPV6_FORWARDING/*XXX*/, NULL, NULL, + NULL); + goto freecopy; + } + + /* adjust pointer */ + dst = (struct sockaddr_in6 *)state.dst; + rt = state.ro ? state.ro->ro_rt : NULL; + if (dst != NULL && rt != NULL) + ipsecrt = 1; + } + if (ipsecrt) + goto skip_routing; +skip_ipsec: +#endif + + bzero(&rin6, sizeof(struct route_in6)); + dst = (struct sockaddr_in6 *)&rin6.ro_dst; + dst->sin6_len = sizeof(struct sockaddr_in6); + dst->sin6_family = AF_INET6; + dst->sin6_addr = ip6->ip6_dst; + + rin6.ro_rt = rtalloc1((struct sockaddr *)dst, 0, 0); + if (rin6.ro_rt != NULL) + RT_UNLOCK(rin6.ro_rt); + else { + V_ip6stat.ip6s_noroute++; + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_noroute); + if (mcopy) { + icmp6_error(mcopy, ICMP6_DST_UNREACH, + ICMP6_DST_UNREACH_NOROUTE, 0); + } + goto bad; + } + rt = rin6.ro_rt; +#ifdef IPSEC +skip_routing: +#endif + + /* + * Source scope check: if a packet can't be delivered to its + * destination for the reason that the destination is beyond the scope + * of the source address, discard the packet and return an icmp6 + * destination unreachable error with Code 2 (beyond scope of source + * address). We use a local copy of ip6_src, since in6_setscope() + * will possibly modify its first argument. + * [draft-ietf-ipngwg-icmp-v3-04.txt, Section 3.1] + */ + src_in6 = ip6->ip6_src; + if (in6_setscope(&src_in6, rt->rt_ifp, &outzone)) { + /* XXX: this should not happen */ + V_ip6stat.ip6s_cantforward++; + V_ip6stat.ip6s_badscope++; + goto bad; + } + if (in6_setscope(&src_in6, m->m_pkthdr.rcvif, &inzone)) { + V_ip6stat.ip6s_cantforward++; + V_ip6stat.ip6s_badscope++; + goto bad; + } + if (inzone != outzone +#ifdef IPSEC + && !ipsecrt +#endif + ) { + V_ip6stat.ip6s_cantforward++; + V_ip6stat.ip6s_badscope++; + in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard); + + if (V_ip6_log_time + V_ip6_log_interval < time_second) { + V_ip6_log_time = time_second; + log(LOG_DEBUG, + "cannot forward " + "src %s, dst %s, nxt %d, rcvif %s, outif %s\n", + ip6_sprintf(ip6bufs, &ip6->ip6_src), + ip6_sprintf(ip6bufd, &ip6->ip6_dst), + ip6->ip6_nxt, + if_name(m->m_pkthdr.rcvif), if_name(rt->rt_ifp)); + } + if (mcopy) + icmp6_error(mcopy, ICMP6_DST_UNREACH, + ICMP6_DST_UNREACH_BEYONDSCOPE, 0); + goto bad; + } + + /* + * Destination scope check: if a packet is going to break the scope + * zone of packet's destination address, discard it. This case should + * usually be prevented by appropriately-configured routing table, but + * we need an explicit check because we may mistakenly forward the + * packet to a different zone by (e.g.) a default route. + */ + dst_in6 = ip6->ip6_dst; + if (in6_setscope(&dst_in6, m->m_pkthdr.rcvif, &inzone) != 0 || + in6_setscope(&dst_in6, rt->rt_ifp, &outzone) != 0 || + inzone != outzone) { + V_ip6stat.ip6s_cantforward++; + V_ip6stat.ip6s_badscope++; + goto bad; + } + + if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) { + in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig); + if (mcopy) { + u_long mtu; +#ifdef IPSEC + struct secpolicy *sp; + int ipsecerror; + size_t ipsechdrsiz; +#endif /* IPSEC */ + + mtu = IN6_LINKMTU(rt->rt_ifp); +#ifdef IPSEC + /* + * When we do IPsec tunnel ingress, we need to play + * with the link value (decrement IPsec header size + * from mtu value). The code is much simpler than v4 + * case, as we have the outgoing interface for + * encapsulated packet as "rt->rt_ifp". + */ + sp = ipsec_getpolicybyaddr(mcopy, IPSEC_DIR_OUTBOUND, + IP_FORWARDING, &ipsecerror); + if (sp) { + ipsechdrsiz = ipsec_hdrsiz(mcopy, + IPSEC_DIR_OUTBOUND, NULL); + if (ipsechdrsiz < mtu) + mtu -= ipsechdrsiz; + } + + /* + * if mtu becomes less than minimum MTU, + * tell minimum MTU (and I'll need to fragment it). + */ + if (mtu < IPV6_MMTU) + mtu = IPV6_MMTU; +#endif /* IPSEC */ + icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu); + } + goto bad; + } + + if (rt->rt_flags & RTF_GATEWAY) + dst = (struct sockaddr_in6 *)rt->rt_gateway; + + /* + * If we are to forward the packet using the same interface + * as one we got the packet from, perhaps we should send a redirect + * to sender to shortcut a hop. + * Only send redirect if source is sending directly to us, + * and if packet was not source routed (or has any options). + * Also, don't send redirect if forwarding using a route + * modified by a redirect. + */ + if (V_ip6_sendredirects && rt->rt_ifp == m->m_pkthdr.rcvif && !srcrt && +#ifdef IPSEC + !ipsecrt && +#endif /* IPSEC */ + (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0) { + if ((rt->rt_ifp->if_flags & IFF_POINTOPOINT) != 0) { + /* + * If the incoming interface is equal to the outgoing + * one, and the link attached to the interface is + * point-to-point, then it will be highly probable + * that a routing loop occurs. Thus, we immediately + * drop the packet and send an ICMPv6 error message. + * + * type/code is based on suggestion by Rich Draves. + * not sure if it is the best pick. + */ + icmp6_error(mcopy, ICMP6_DST_UNREACH, + ICMP6_DST_UNREACH_ADDR, 0); + goto bad; + } + type = ND_REDIRECT; + } + + /* + * Fake scoped addresses. Note that even link-local source or + * destinaion can appear, if the originating node just sends the + * packet to us (without address resolution for the destination). + * Since both icmp6_error and icmp6_redirect_output fill the embedded + * link identifiers, we can do this stuff after making a copy for + * returning an error. + */ + if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) { + /* + * See corresponding comments in ip6_output. + * XXX: but is it possible that ip6_forward() sends a packet + * to a loopback interface? I don't think so, and thus + * I bark here. (jinmei@kame.net) + * XXX: it is common to route invalid packets to loopback. + * also, the codepath will be visited on use of ::1 in + * rthdr. (itojun) + */ +#if 1 + if (0) +#else + if ((rt->rt_flags & (RTF_BLACKHOLE|RTF_REJECT)) == 0) +#endif + { + printf("ip6_forward: outgoing interface is loopback. " + "src %s, dst %s, nxt %d, rcvif %s, outif %s\n", + ip6_sprintf(ip6bufs, &ip6->ip6_src), + ip6_sprintf(ip6bufd, &ip6->ip6_dst), + ip6->ip6_nxt, if_name(m->m_pkthdr.rcvif), + if_name(rt->rt_ifp)); + } + + /* we can just use rcvif in forwarding. */ + origifp = m->m_pkthdr.rcvif; + } + else + origifp = rt->rt_ifp; + /* + * clear embedded scope identifiers if necessary. + * in6_clearscope will touch the addresses only when necessary. + */ + in6_clearscope(&ip6->ip6_src); + in6_clearscope(&ip6->ip6_dst); + + /* Jump over all PFIL processing if hooks are not active. */ + if (!PFIL_HOOKED(&V_inet6_pfil_hook)) + goto pass; + + /* Run through list of hooks for output packets. */ + error = pfil_run_hooks(&V_inet6_pfil_hook, &m, rt->rt_ifp, PFIL_OUT, NULL); + if (error != 0) + goto senderr; + if (m == NULL) + goto freecopy; + ip6 = mtod(m, struct ip6_hdr *); + +pass: + error = nd6_output(rt->rt_ifp, origifp, m, dst, rt); + if (error) { + in6_ifstat_inc(rt->rt_ifp, ifs6_out_discard); + V_ip6stat.ip6s_cantforward++; + } else { + V_ip6stat.ip6s_forward++; + in6_ifstat_inc(rt->rt_ifp, ifs6_out_forward); + if (type) + V_ip6stat.ip6s_redirectsent++; + else { + if (mcopy) + goto freecopy; + } + } + +senderr: + if (mcopy == NULL) + goto out; + switch (error) { + case 0: + if (type == ND_REDIRECT) { + icmp6_redirect_output(mcopy, rt); + goto out; + } + goto freecopy; + + case EMSGSIZE: + /* xxx MTU is constant in PPP? */ + goto freecopy; + + case ENOBUFS: + /* Tell source to slow down like source quench in IP? */ + goto freecopy; + + case ENETUNREACH: /* shouldn't happen, checked above */ + case EHOSTUNREACH: + case ENETDOWN: + case EHOSTDOWN: + default: + type = ICMP6_DST_UNREACH; + code = ICMP6_DST_UNREACH_ADDR; + break; + } + icmp6_error(mcopy, type, code, 0); + goto out; + + freecopy: + m_freem(mcopy); + goto out; +bad: + m_freem(m); +out: + if (rt != NULL +#ifdef IPSEC + && !ipsecrt +#endif + ) + RTFREE(rt); +} diff --git a/freebsd/sys/netinet6/ip6_id.c b/freebsd/sys/netinet6/ip6_id.c new file mode 100644 index 00000000..090e66fa --- /dev/null +++ b/freebsd/sys/netinet6/ip6_id.c @@ -0,0 +1,269 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 2003 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: ip6_id.c,v 1.13 2003/09/16 09:11:19 itojun Exp $ + */ + +/*- + * Copyright 1998 Niels Provos <provos@citi.umich.edu> + * All rights reserved. + * + * Theo de Raadt <deraadt@openbsd.org> came up with the idea of using + * such a mathematical system to generate more random (yet non-repeating) + * ids to solve the resolver/named problem. But Niels designed the + * actual system based on the constraints. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Niels Provos. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $OpenBSD: ip_id.c,v 1.6 2002/03/15 18:19:52 millert Exp $ + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +/* + * seed = random (bits - 1) bit + * n = prime, g0 = generator to n, + * j = random so that gcd(j,n-1) == 1 + * g = g0^j mod n will be a generator again. + * + * X[0] = random seed. + * X[n] = a*X[n-1]+b mod m is a Linear Congruential Generator + * with a = 7^(even random) mod m, + * b = random with gcd(b,m) == 1 + * m = constant and a maximal period of m-1. + * + * The transaction id is determined by: + * id[n] = seed xor (g^X[n] mod n) + * + * Effectivly the id is restricted to the lower (bits - 1) bits, thus + * yielding two different cycles by toggling the msb on and off. + * This avoids reuse issues caused by reseeding. + */ + +#include <freebsd/sys/types.h> +#include <freebsd/sys/param.h> +#include <freebsd/sys/kernel.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/libkern.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/route.h> +#include <freebsd/netinet/in.h> +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet6/ip6_var.h> + +#ifndef INT32_MAX +#define INT32_MAX 0x7fffffffU +#endif + +struct randomtab { + const int ru_bits; /* resulting bits */ + const long ru_out; /* Time after wich will be reseeded */ + const u_int32_t ru_max; /* Uniq cycle, avoid blackjack prediction */ + const u_int32_t ru_gen; /* Starting generator */ + const u_int32_t ru_n; /* ru_n: prime, ru_n - 1: product of pfacts[] */ + const u_int32_t ru_agen; /* determine ru_a as ru_agen^(2*rand) */ + const u_int32_t ru_m; /* ru_m = 2^x*3^y */ + const u_int32_t pfacts[4]; /* factors of ru_n */ + + u_int32_t ru_counter; + u_int32_t ru_msb; + + u_int32_t ru_x; + u_int32_t ru_seed, ru_seed2; + u_int32_t ru_a, ru_b; + u_int32_t ru_g; + long ru_reseed; +}; + +static struct randomtab randomtab_32 = { + 32, /* resulting bits */ + 180, /* Time after wich will be reseeded */ + 1000000000, /* Uniq cycle, avoid blackjack prediction */ + 2, /* Starting generator */ + 2147483629, /* RU_N-1 = 2^2*3^2*59652323 */ + 7, /* determine ru_a as RU_AGEN^(2*rand) */ + 1836660096, /* RU_M = 2^7*3^15 - don't change */ + { 2, 3, 59652323, 0 }, /* factors of ru_n */ +}; + +static struct randomtab randomtab_20 = { + 20, /* resulting bits */ + 180, /* Time after wich will be reseeded */ + 200000, /* Uniq cycle, avoid blackjack prediction */ + 2, /* Starting generator */ + 524269, /* RU_N-1 = 2^2*3^2*14563 */ + 7, /* determine ru_a as RU_AGEN^(2*rand) */ + 279936, /* RU_M = 2^7*3^7 - don't change */ + { 2, 3, 14563, 0 }, /* factors of ru_n */ +}; + +static u_int32_t pmod(u_int32_t, u_int32_t, u_int32_t); +static void initid(struct randomtab *); +static u_int32_t randomid(struct randomtab *); + +/* + * Do a fast modular exponation, returned value will be in the range + * of 0 - (mod-1) + */ +static u_int32_t +pmod(u_int32_t gen, u_int32_t expo, u_int32_t mod) +{ + u_int64_t s, t, u; + + s = 1; + t = gen; + u = expo; + + while (u) { + if (u & 1) + s = (s * t) % mod; + u >>= 1; + t = (t * t) % mod; + } + return (s); +} + +/* + * Initalizes the seed and chooses a suitable generator. Also toggles + * the msb flag. The msb flag is used to generate two distinct + * cycles of random numbers and thus avoiding reuse of ids. + * + * This function is called from id_randomid() when needed, an + * application does not have to worry about it. + */ +static void +initid(struct randomtab *p) +{ + u_int32_t j, i; + int noprime = 1; + + p->ru_x = arc4random() % p->ru_m; + + /* (bits - 1) bits of random seed */ + p->ru_seed = arc4random() & (~0U >> (32 - p->ru_bits + 1)); + p->ru_seed2 = arc4random() & (~0U >> (32 - p->ru_bits + 1)); + + /* Determine the LCG we use */ + p->ru_b = (arc4random() & (~0U >> (32 - p->ru_bits))) | 1; + p->ru_a = pmod(p->ru_agen, + (arc4random() & (~0U >> (32 - p->ru_bits))) & (~1U), p->ru_m); + while (p->ru_b % 3 == 0) + p->ru_b += 2; + + j = arc4random() % p->ru_n; + + /* + * Do a fast gcd(j, RU_N - 1), so we can find a j with + * gcd(j, RU_N - 1) == 1, giving a new generator for + * RU_GEN^j mod RU_N + */ + while (noprime) { + for (i = 0; p->pfacts[i] > 0; i++) + if (j % p->pfacts[i] == 0) + break; + + if (p->pfacts[i] == 0) + noprime = 0; + else + j = (j + 1) % p->ru_n; + } + + p->ru_g = pmod(p->ru_gen, j, p->ru_n); + p->ru_counter = 0; + + p->ru_reseed = time_second + p->ru_out; + p->ru_msb = p->ru_msb ? 0 : (1U << (p->ru_bits - 1)); +} + +static u_int32_t +randomid(struct randomtab *p) +{ + int i, n; + u_int32_t tmp; + + if (p->ru_counter >= p->ru_max || time_second > p->ru_reseed) + initid(p); + + tmp = arc4random(); + + /* Skip a random number of ids */ + n = tmp & 0x3; tmp = tmp >> 2; + if (p->ru_counter + n >= p->ru_max) + initid(p); + + for (i = 0; i <= n; i++) { + /* Linear Congruential Generator */ + p->ru_x = (u_int32_t)((u_int64_t)p->ru_a * p->ru_x + p->ru_b) % p->ru_m; + } + + p->ru_counter += i; + + return (p->ru_seed ^ pmod(p->ru_g, p->ru_seed2 ^ p->ru_x, p->ru_n)) | + p->ru_msb; +} + +u_int32_t +ip6_randomid(void) +{ + + return randomid(&randomtab_32); +} + +u_int32_t +ip6_randomflowlabel(void) +{ + + return randomid(&randomtab_20) & 0xfffff; +} diff --git a/freebsd/sys/netinet6/ip6_input.c b/freebsd/sys/netinet6/ip6_input.c new file mode 100644 index 00000000..323bb2a8 --- /dev/null +++ b/freebsd/sys/netinet6/ip6_input.c @@ -0,0 +1,1759 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: ip6_input.c,v 1.259 2002/01/21 04:58:09 jinmei Exp $ + */ + +/*- + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/local/opt_inet.h> +#include <freebsd/local/opt_inet6.h> +#include <freebsd/local/opt_ipsec.h> + +#include <freebsd/sys/param.h> +#include <freebsd/sys/systm.h> +#include <freebsd/sys/malloc.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/proc.h> +#include <freebsd/sys/domain.h> +#include <freebsd/sys/protosw.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/socketvar.h> +#include <freebsd/sys/errno.h> +#include <freebsd/sys/time.h> +#include <freebsd/sys/kernel.h> +#include <freebsd/sys/syslog.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/if_types.h> +#include <freebsd/net/if_dl.h> +#include <freebsd/net/route.h> +#include <freebsd/net/netisr.h> +#include <freebsd/net/pfil.h> +#include <freebsd/net/vnet.h> + +#include <freebsd/netinet/in.h> +#include <freebsd/netinet/in_systm.h> +#include <freebsd/net/if_llatbl.h> +#ifdef INET +#include <freebsd/netinet/ip.h> +#include <freebsd/netinet/ip_icmp.h> +#endif /* INET */ +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet6/in6_var.h> +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet/in_pcb.h> +#include <freebsd/netinet/icmp6.h> +#include <freebsd/netinet6/scope6_var.h> +#include <freebsd/netinet6/in6_ifattach.h> +#include <freebsd/netinet6/nd6.h> + +#ifdef IPSEC +#include <freebsd/netipsec/ipsec.h> +#include <freebsd/netinet6/ip6_ipsec.h> +#include <freebsd/netipsec/ipsec6.h> +#endif /* IPSEC */ + +#include <freebsd/netinet6/ip6protosw.h> + +extern struct domain inet6domain; + +u_char ip6_protox[IPPROTO_MAX]; +VNET_DEFINE(struct in6_ifaddrhead, in6_ifaddrhead); + +static struct netisr_handler ip6_nh = { + .nh_name = "ip6", + .nh_handler = ip6_input, + .nh_proto = NETISR_IPV6, + .nh_policy = NETISR_POLICY_FLOW, +}; + +VNET_DECLARE(struct callout, in6_tmpaddrtimer_ch); +#define V_in6_tmpaddrtimer_ch VNET(in6_tmpaddrtimer_ch) + +VNET_DEFINE(struct pfil_head, inet6_pfil_hook); + +VNET_DEFINE(struct ip6stat, ip6stat); + +struct rwlock in6_ifaddr_lock; +RW_SYSINIT(in6_ifaddr_lock, &in6_ifaddr_lock, "in6_ifaddr_lock"); + +static void ip6_init2(void *); +static struct ip6aux *ip6_setdstifaddr(struct mbuf *, struct in6_ifaddr *); +static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *); +#ifdef PULLDOWN_TEST +static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int); +#endif + +/* + * IP6 initialization: fill in IP6 protocol switch table. + * All protocols not implemented in kernel go to raw IP6 protocol handler. + */ +void +ip6_init(void) +{ + struct ip6protosw *pr; + int i; + + TUNABLE_INT_FETCH("net.inet6.ip6.auto_linklocal", + &V_ip6_auto_linklocal); + + TAILQ_INIT(&V_in6_ifaddrhead); + + /* Initialize packet filter hooks. */ + V_inet6_pfil_hook.ph_type = PFIL_TYPE_AF; + V_inet6_pfil_hook.ph_af = AF_INET6; + if ((i = pfil_head_register(&V_inet6_pfil_hook)) != 0) + printf("%s: WARNING: unable to register pfil hook, " + "error %d\n", __func__, i); + + scope6_init(); + addrsel_policy_init(); + nd6_init(); + frag6_init(); + + V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR; + + /* Skip global initialization stuff for non-default instances. */ + if (!IS_DEFAULT_VNET(curvnet)) + return; + +#ifdef DIAGNOSTIC + if (sizeof(struct protosw) != sizeof(struct ip6protosw)) + panic("sizeof(protosw) != sizeof(ip6protosw)"); +#endif + pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW); + if (pr == NULL) + panic("ip6_init"); + + /* Initialize the entire ip6_protox[] array to IPPROTO_RAW. */ + for (i = 0; i < IPPROTO_MAX; i++) + ip6_protox[i] = pr - inet6sw; + /* + * Cycle through IP protocols and put them into the appropriate place + * in ip6_protox[]. + */ + for (pr = (struct ip6protosw *)inet6domain.dom_protosw; + pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++) + if (pr->pr_domain->dom_family == PF_INET6 && + pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { + /* Be careful to only index valid IP protocols. */ + if (pr->pr_protocol < IPPROTO_MAX) + ip6_protox[pr->pr_protocol] = pr - inet6sw; + } + + netisr_register(&ip6_nh); +} + +/* + * The protocol to be inserted into ip6_protox[] must be already registered + * in inet6sw[], either statically or through pf_proto_register(). + */ +int +ip6proto_register(short ip6proto) +{ + struct ip6protosw *pr; + + /* Sanity checks. */ + if (ip6proto <= 0 || ip6proto >= IPPROTO_MAX) + return (EPROTONOSUPPORT); + + /* + * The protocol slot must not be occupied by another protocol + * already. An index pointing to IPPROTO_RAW is unused. + */ + pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW); + if (pr == NULL) + return (EPFNOSUPPORT); + if (ip6_protox[ip6proto] != pr - inet6sw) /* IPPROTO_RAW */ + return (EEXIST); + + /* + * Find the protocol position in inet6sw[] and set the index. + */ + for (pr = (struct ip6protosw *)inet6domain.dom_protosw; + pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++) { + if (pr->pr_domain->dom_family == PF_INET6 && + pr->pr_protocol && pr->pr_protocol == ip6proto) { + ip6_protox[pr->pr_protocol] = pr - inet6sw; + return (0); + } + } + return (EPROTONOSUPPORT); +} + +int +ip6proto_unregister(short ip6proto) +{ + struct ip6protosw *pr; + + /* Sanity checks. */ + if (ip6proto <= 0 || ip6proto >= IPPROTO_MAX) + return (EPROTONOSUPPORT); + + /* Check if the protocol was indeed registered. */ + pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW); + if (pr == NULL) + return (EPFNOSUPPORT); + if (ip6_protox[ip6proto] == pr - inet6sw) /* IPPROTO_RAW */ + return (ENOENT); + + /* Reset the protocol slot to IPPROTO_RAW. */ + ip6_protox[ip6proto] = pr - inet6sw; + return (0); +} + +#ifdef VIMAGE +void +ip6_destroy() +{ + + nd6_destroy(); + callout_drain(&V_in6_tmpaddrtimer_ch); +} +#endif + +static int +ip6_init2_vnet(const void *unused __unused) +{ + + /* nd6_timer_init */ + callout_init(&V_nd6_timer_ch, 0); + callout_reset(&V_nd6_timer_ch, hz, nd6_timer, curvnet); + + /* timer for regeneranation of temporary addresses randomize ID */ + callout_init(&V_in6_tmpaddrtimer_ch, 0); + callout_reset(&V_in6_tmpaddrtimer_ch, + (V_ip6_temp_preferred_lifetime - V_ip6_desync_factor - + V_ip6_temp_regen_advance) * hz, + in6_tmpaddrtimer, curvnet); + + return (0); +} + +static void +ip6_init2(void *dummy) +{ + + ip6_init2_vnet(NULL); +} + +/* cheat */ +/* This must be after route_init(), which is now SI_ORDER_THIRD */ +SYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL); + +void +ip6_input(struct mbuf *m) +{ + struct ip6_hdr *ip6; + int off = sizeof(struct ip6_hdr), nest; + u_int32_t plen; + u_int32_t rtalert = ~0; + int nxt, ours = 0; + struct ifnet *deliverifp = NULL, *ifp = NULL; + struct in6_addr odst; + struct route_in6 rin6; + int srcrt = 0; + struct llentry *lle = NULL; + struct sockaddr_in6 dst6, *dst; + + bzero(&rin6, sizeof(struct route_in6)); +#ifdef IPSEC + /* + * should the inner packet be considered authentic? + * see comment in ah4_input(). + * NB: m cannot be NULL when passed to the input routine + */ + + m->m_flags &= ~M_AUTHIPHDR; + m->m_flags &= ~M_AUTHIPDGM; + +#endif /* IPSEC */ + + /* + * make sure we don't have onion peering information into m_tag. + */ + ip6_delaux(m); + + /* + * mbuf statistics + */ + if (m->m_flags & M_EXT) { + if (m->m_next) + V_ip6stat.ip6s_mext2m++; + else + V_ip6stat.ip6s_mext1++; + } else { +#define M2MMAX (sizeof(V_ip6stat.ip6s_m2m)/sizeof(V_ip6stat.ip6s_m2m[0])) + if (m->m_next) { + if (m->m_flags & M_LOOP) { + V_ip6stat.ip6s_m2m[V_loif->if_index]++; + } else if (m->m_pkthdr.rcvif->if_index < M2MMAX) + V_ip6stat.ip6s_m2m[m->m_pkthdr.rcvif->if_index]++; + else + V_ip6stat.ip6s_m2m[0]++; + } else + V_ip6stat.ip6s_m1++; +#undef M2MMAX + } + + /* drop the packet if IPv6 operation is disabled on the IF */ + if ((ND_IFINFO(m->m_pkthdr.rcvif)->flags & ND6_IFF_IFDISABLED)) { + m_freem(m); + return; + } + + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive); + V_ip6stat.ip6s_total++; + +#ifndef PULLDOWN_TEST + /* + * L2 bridge code and some other code can return mbuf chain + * that does not conform to KAME requirement. too bad. + * XXX: fails to join if interface MTU > MCLBYTES. jumbogram? + */ + if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) { + struct mbuf *n; + + MGETHDR(n, M_DONTWAIT, MT_HEADER); + if (n) + M_MOVE_PKTHDR(n, m); + if (n && n->m_pkthdr.len > MHLEN) { + MCLGET(n, M_DONTWAIT); + if ((n->m_flags & M_EXT) == 0) { + m_freem(n); + n = NULL; + } + } + if (n == NULL) { + m_freem(m); + return; /* ENOBUFS */ + } + + m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t)); + n->m_len = n->m_pkthdr.len; + m_freem(m); + m = n; + } + IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), /* nothing */); +#endif + + if (m->m_len < sizeof(struct ip6_hdr)) { + struct ifnet *inifp; + inifp = m->m_pkthdr.rcvif; + if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) { + V_ip6stat.ip6s_toosmall++; + in6_ifstat_inc(inifp, ifs6_in_hdrerr); + return; + } + } + + ip6 = mtod(m, struct ip6_hdr *); + + if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) { + V_ip6stat.ip6s_badvers++; + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr); + goto bad; + } + + V_ip6stat.ip6s_nxthist[ip6->ip6_nxt]++; + + /* + * Check against address spoofing/corruption. + */ + if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src) || + IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) { + /* + * XXX: "badscope" is not very suitable for a multicast source. + */ + V_ip6stat.ip6s_badscope++; + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); + goto bad; + } + if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) && + !(m->m_flags & M_LOOP)) { + /* + * In this case, the packet should come from the loopback + * interface. However, we cannot just check the if_flags, + * because ip6_mloopback() passes the "actual" interface + * as the outgoing/incoming interface. + */ + V_ip6stat.ip6s_badscope++; + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); + goto bad; + } + +#ifdef ALTQ + if (altq_input != NULL && (*altq_input)(m, AF_INET6) == 0) { + /* packet is dropped by traffic conditioner */ + return; + } +#endif + /* + * The following check is not documented in specs. A malicious + * party may be able to use IPv4 mapped addr to confuse tcp/udp stack + * and bypass security checks (act as if it was from 127.0.0.1 by using + * IPv6 src ::ffff:127.0.0.1). Be cautious. + * + * This check chokes if we are in an SIIT cloud. As none of BSDs + * support IPv4-less kernel compilation, we cannot support SIIT + * environment at all. So, it makes more sense for us to reject any + * malicious packets for non-SIIT environment, than try to do a + * partial support for SIIT environment. + */ + if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) || + IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) { + V_ip6stat.ip6s_badscope++; + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); + goto bad; + } +#if 0 + /* + * Reject packets with IPv4 compatible addresses (auto tunnel). + * + * The code forbids auto tunnel relay case in RFC1933 (the check is + * stronger than RFC1933). We may want to re-enable it if mech-xx + * is revised to forbid relaying case. + */ + if (IN6_IS_ADDR_V4COMPAT(&ip6->ip6_src) || + IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) { + V_ip6stat.ip6s_badscope++; + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); + goto bad; + } +#endif + + /* + * Run through list of hooks for input packets. + * + * NB: Beware of the destination address changing + * (e.g. by NAT rewriting). When this happens, + * tell ip6_forward to do the right thing. + */ + odst = ip6->ip6_dst; + + /* Jump over all PFIL processing if hooks are not active. */ + if (!PFIL_HOOKED(&V_inet6_pfil_hook)) + goto passin; + + if (pfil_run_hooks(&V_inet6_pfil_hook, &m, + m->m_pkthdr.rcvif, PFIL_IN, NULL)) + return; + if (m == NULL) /* consumed by filter */ + return; + ip6 = mtod(m, struct ip6_hdr *); + srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst); + +passin: + /* + * Disambiguate address scope zones (if there is ambiguity). + * We first make sure that the original source or destination address + * is not in our internal form for scoped addresses. Such addresses + * are not necessarily invalid spec-wise, but we cannot accept them due + * to the usage conflict. + * in6_setscope() then also checks and rejects the cases where src or + * dst are the loopback address and the receiving interface + * is not loopback. + */ + if (in6_clearscope(&ip6->ip6_src) || in6_clearscope(&ip6->ip6_dst)) { + V_ip6stat.ip6s_badscope++; /* XXX */ + goto bad; + } + if (in6_setscope(&ip6->ip6_src, m->m_pkthdr.rcvif, NULL) || + in6_setscope(&ip6->ip6_dst, m->m_pkthdr.rcvif, NULL)) { + V_ip6stat.ip6s_badscope++; + goto bad; + } + + /* + * Multicast check. Assume packet is for us to avoid + * prematurely taking locks. + */ + if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + ours = 1; + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast); + deliverifp = m->m_pkthdr.rcvif; + goto hbhcheck; + } + + /* + * Unicast check + */ + + bzero(&dst6, sizeof(dst6)); + dst6.sin6_family = AF_INET6; + dst6.sin6_len = sizeof(struct sockaddr_in6); + dst6.sin6_addr = ip6->ip6_dst; + ifp = m->m_pkthdr.rcvif; + IF_AFDATA_LOCK(ifp); + lle = lla_lookup(LLTABLE6(ifp), 0, + (struct sockaddr *)&dst6); + IF_AFDATA_UNLOCK(ifp); + if ((lle != NULL) && (lle->la_flags & LLE_IFADDR)) { + struct ifaddr *ifa; + struct in6_ifaddr *ia6; + int bad; + + bad = 1; +#define sa_equal(a1, a2) \ + (bcmp((a1), (a2), ((a1))->sin6_len) == 0) + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != dst6.sin6_family) + continue; + if (sa_equal(&dst6, ifa->ifa_addr)) + break; + } + KASSERT(ifa != NULL, ("%s: ifa not found for lle %p", + __func__, lle)); +#undef sa_equal + + ia6 = (struct in6_ifaddr *)ifa; + if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) { + /* Count the packet in the ip address stats */ + ia6->ia_ifa.if_ipackets++; + ia6->ia_ifa.if_ibytes += m->m_pkthdr.len; + + /* + * record address information into m_tag. + */ + (void)ip6_setdstifaddr(m, ia6); + + bad = 0; + } else { + char ip6bufs[INET6_ADDRSTRLEN]; + char ip6bufd[INET6_ADDRSTRLEN]; + /* address is not ready, so discard the packet. */ + nd6log((LOG_INFO, + "ip6_input: packet to an unready address %s->%s\n", + ip6_sprintf(ip6bufs, &ip6->ip6_src), + ip6_sprintf(ip6bufd, &ip6->ip6_dst))); + } + IF_ADDR_UNLOCK(ifp); + LLE_RUNLOCK(lle); + if (bad) + goto bad; + else { + ours = 1; + deliverifp = ifp; + goto hbhcheck; + } + } + if (lle != NULL) + LLE_RUNLOCK(lle); + + dst = &rin6.ro_dst; + dst->sin6_len = sizeof(struct sockaddr_in6); + dst->sin6_family = AF_INET6; + dst->sin6_addr = ip6->ip6_dst; + rin6.ro_rt = rtalloc1((struct sockaddr *)dst, 0, 0); + if (rin6.ro_rt) + RT_UNLOCK(rin6.ro_rt); + +#define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key)) + + /* + * Accept the packet if the forwarding interface to the destination + * according to the routing table is the loopback interface, + * unless the associated route has a gateway. + * Note that this approach causes to accept a packet if there is a + * route to the loopback interface for the destination of the packet. + * But we think it's even useful in some situations, e.g. when using + * a special daemon which wants to intercept the packet. + * + * XXX: some OSes automatically make a cloned route for the destination + * of an outgoing packet. If the outgoing interface of the packet + * is a loopback one, the kernel would consider the packet to be + * accepted, even if we have no such address assinged on the interface. + * We check the cloned flag of the route entry to reject such cases, + * assuming that route entries for our own addresses are not made by + * cloning (it should be true because in6_addloop explicitly installs + * the host route). However, we might have to do an explicit check + * while it would be less efficient. Or, should we rather install a + * reject route for such a case? + */ + if (rin6.ro_rt && + (rin6.ro_rt->rt_flags & + (RTF_HOST|RTF_GATEWAY)) == RTF_HOST && +#ifdef RTF_WASCLONED + !(rin6.ro_rt->rt_flags & RTF_WASCLONED) && +#endif +#ifdef RTF_CLONED + !(rin6.ro_rt->rt_flags & RTF_CLONED) && +#endif +#if 0 + /* + * The check below is redundant since the comparison of + * the destination and the key of the rtentry has + * already done through looking up the routing table. + */ + IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, + &rt6_key(rin6.ro_rt)->sin6_addr) +#endif + rin6.ro_rt->rt_ifp->if_type == IFT_LOOP) { + int free_ia6 = 0; + struct in6_ifaddr *ia6; + + /* + * found the loopback route to the interface address + */ + if (rin6.ro_rt->rt_gateway->sa_family == AF_LINK) { + struct sockaddr_in6 dest6; + + bzero(&dest6, sizeof(dest6)); + dest6.sin6_family = AF_INET6; + dest6.sin6_len = sizeof(dest6); + dest6.sin6_addr = ip6->ip6_dst; + ia6 = (struct in6_ifaddr *) + ifa_ifwithaddr((struct sockaddr *)&dest6); + if (ia6 == NULL) + goto bad; + free_ia6 = 1; + } + else + ia6 = (struct in6_ifaddr *)rin6.ro_rt->rt_ifa; + + /* + * record address information into m_tag. + */ + (void)ip6_setdstifaddr(m, ia6); + + /* + * packets to a tentative, duplicated, or somehow invalid + * address must not be accepted. + */ + if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) { + /* this address is ready */ + ours = 1; + deliverifp = ia6->ia_ifp; /* correct? */ + /* Count the packet in the ip address stats */ + ia6->ia_ifa.if_ipackets++; + ia6->ia_ifa.if_ibytes += m->m_pkthdr.len; + if (ia6 != NULL && free_ia6 != 0) + ifa_free(&ia6->ia_ifa); + goto hbhcheck; + } else { + char ip6bufs[INET6_ADDRSTRLEN]; + char ip6bufd[INET6_ADDRSTRLEN]; + /* address is not ready, so discard the packet. */ + nd6log((LOG_INFO, + "ip6_input: packet to an unready address %s->%s\n", + ip6_sprintf(ip6bufs, &ip6->ip6_src), + ip6_sprintf(ip6bufd, &ip6->ip6_dst))); + + if (ia6 != NULL && free_ia6 != 0) + ifa_free(&ia6->ia_ifa); + goto bad; + } + } + + /* + * FAITH (Firewall Aided Internet Translator) + */ + if (V_ip6_keepfaith) { + if (rin6.ro_rt && rin6.ro_rt->rt_ifp && + rin6.ro_rt->rt_ifp->if_type == IFT_FAITH) { + /* XXX do we need more sanity checks? */ + ours = 1; + deliverifp = rin6.ro_rt->rt_ifp; /* faith */ + goto hbhcheck; + } + } + + /* + * Now there is no reason to process the packet if it's not our own + * and we're not a router. + */ + if (!V_ip6_forwarding) { + V_ip6stat.ip6s_cantforward++; + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); + goto bad; + } + + hbhcheck: + /* + * record address information into m_tag, if we don't have one yet. + * note that we are unable to record it, if the address is not listed + * as our interface address (e.g. multicast addresses, addresses + * within FAITH prefixes and such). + */ + if (deliverifp && !ip6_getdstifaddr(m)) { + struct in6_ifaddr *ia6; + + ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst); + if (ia6) { + if (!ip6_setdstifaddr(m, ia6)) { + /* + * XXX maybe we should drop the packet here, + * as we could not provide enough information + * to the upper layers. + */ + } + ifa_free(&ia6->ia_ifa); + } + } + + /* + * Process Hop-by-Hop options header if it's contained. + * m may be modified in ip6_hopopts_input(). + * If a JumboPayload option is included, plen will also be modified. + */ + plen = (u_int32_t)ntohs(ip6->ip6_plen); + if (ip6->ip6_nxt == IPPROTO_HOPOPTS) { + struct ip6_hbh *hbh; + + if (ip6_hopopts_input(&plen, &rtalert, &m, &off)) { +#if 0 /*touches NULL pointer*/ + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); +#endif + goto out; /* m have already been freed */ + } + + /* adjust pointer */ + ip6 = mtod(m, struct ip6_hdr *); + + /* + * if the payload length field is 0 and the next header field + * indicates Hop-by-Hop Options header, then a Jumbo Payload + * option MUST be included. + */ + if (ip6->ip6_plen == 0 && plen == 0) { + /* + * Note that if a valid jumbo payload option is + * contained, ip6_hopopts_input() must set a valid + * (non-zero) payload length to the variable plen. + */ + V_ip6stat.ip6s_badoptions++; + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr); + icmp6_error(m, ICMP6_PARAM_PROB, + ICMP6_PARAMPROB_HEADER, + (caddr_t)&ip6->ip6_plen - (caddr_t)ip6); + goto out; + } +#ifndef PULLDOWN_TEST + /* ip6_hopopts_input() ensures that mbuf is contiguous */ + hbh = (struct ip6_hbh *)(ip6 + 1); +#else + IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr), + sizeof(struct ip6_hbh)); + if (hbh == NULL) { + V_ip6stat.ip6s_tooshort++; + goto out; + } +#endif + nxt = hbh->ip6h_nxt; + + /* + * If we are acting as a router and the packet contains a + * router alert option, see if we know the option value. + * Currently, we only support the option value for MLD, in which + * case we should pass the packet to the multicast routing + * daemon. + */ + if (rtalert != ~0) { + switch (rtalert) { + case IP6OPT_RTALERT_MLD: + if (V_ip6_forwarding) + ours = 1; + break; + default: + /* + * RFC2711 requires unrecognized values must be + * silently ignored. + */ + break; + } + } + } else + nxt = ip6->ip6_nxt; + + /* + * Check that the amount of data in the buffers + * is as at least much as the IPv6 header would have us expect. + * Trim mbufs if longer than we expect. + * Drop packet if shorter than we expect. + */ + if (m->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) { + V_ip6stat.ip6s_tooshort++; + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated); + goto bad; + } + if (m->m_pkthdr.len > sizeof(struct ip6_hdr) + plen) { + if (m->m_len == m->m_pkthdr.len) { + m->m_len = sizeof(struct ip6_hdr) + plen; + m->m_pkthdr.len = sizeof(struct ip6_hdr) + plen; + } else + m_adj(m, sizeof(struct ip6_hdr) + plen - m->m_pkthdr.len); + } + + /* + * Forward if desirable. + */ + if (V_ip6_mrouter && + IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + /* + * If we are acting as a multicast router, all + * incoming multicast packets are passed to the + * kernel-level multicast forwarding function. + * The packet is returned (relatively) intact; if + * ip6_mforward() returns a non-zero value, the packet + * must be discarded, else it may be accepted below. + * + * XXX TODO: Check hlim and multicast scope here to avoid + * unnecessarily calling into ip6_mforward(). + */ + if (ip6_mforward && + ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) { + IP6STAT_INC(ip6s_cantforward); + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); + goto bad; + } + } else if (!ours) { + ip6_forward(m, srcrt); + goto out; + } + + ip6 = mtod(m, struct ip6_hdr *); + + /* + * Malicious party may be able to use IPv4 mapped addr to confuse + * tcp/udp stack and bypass security checks (act as if it was from + * 127.0.0.1 by using IPv6 src ::ffff:127.0.0.1). Be cautious. + * + * For SIIT end node behavior, you may want to disable the check. + * However, you will become vulnerable to attacks using IPv4 mapped + * source. + */ + if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) || + IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) { + V_ip6stat.ip6s_badscope++; + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); + goto bad; + } + + /* + * Tell launch routine the next header + */ + V_ip6stat.ip6s_delivered++; + in6_ifstat_inc(deliverifp, ifs6_in_deliver); + nest = 0; + + while (nxt != IPPROTO_DONE) { + if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) { + V_ip6stat.ip6s_toomanyhdr++; + goto bad; + } + + /* + * protection against faulty packet - there should be + * more sanity checks in header chain processing. + */ + if (m->m_pkthdr.len < off) { + V_ip6stat.ip6s_tooshort++; + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated); + goto bad; + } + +#ifdef IPSEC + /* + * enforce IPsec policy checking if we are seeing last header. + * note that we do not visit this with protocols with pcb layer + * code - like udp/tcp/raw ip. + */ + if (ip6_ipsec_input(m, nxt)) + goto bad; +#endif /* IPSEC */ + + /* + * Use mbuf flags to propagate Router Alert option to + * ICMPv6 layer, as hop-by-hop options have been stripped. + */ + if (nxt == IPPROTO_ICMPV6 && rtalert != ~0) + m->m_flags |= M_RTALERT_MLD; + + nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt); + } + goto out; +bad: + m_freem(m); +out: + if (rin6.ro_rt) + RTFREE(rin6.ro_rt); +} + +/* + * set/grab in6_ifaddr correspond to IPv6 destination address. + * XXX backward compatibility wrapper + * + * XXXRW: We should bump the refcount on ia6 before sticking it in the m_tag, + * and then bump it when the tag is copied, and release it when the tag is + * freed. Unfortunately, m_tags don't support deep copies (yet), so instead + * we just bump the ia refcount when we receive it. This should be fixed. + */ +static struct ip6aux * +ip6_setdstifaddr(struct mbuf *m, struct in6_ifaddr *ia6) +{ + struct ip6aux *ip6a; + + ip6a = ip6_addaux(m); + if (ip6a) + ip6a->ip6a_dstia6 = ia6; + return ip6a; /* NULL if failed to set */ +} + +struct in6_ifaddr * +ip6_getdstifaddr(struct mbuf *m) +{ + struct ip6aux *ip6a; + struct in6_ifaddr *ia; + + ip6a = ip6_findaux(m); + if (ip6a) { + ia = ip6a->ip6a_dstia6; + ifa_ref(&ia->ia_ifa); + return ia; + } else + return NULL; +} + +/* + * Hop-by-Hop options header processing. If a valid jumbo payload option is + * included, the real payload length will be stored in plenp. + * + * rtalertp - XXX: should be stored more smart way + */ +static int +ip6_hopopts_input(u_int32_t *plenp, u_int32_t *rtalertp, + struct mbuf **mp, int *offp) +{ + struct mbuf *m = *mp; + int off = *offp, hbhlen; + struct ip6_hbh *hbh; + u_int8_t *opt; + + /* validation of the length of the header */ +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, off, sizeof(*hbh), -1); + hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off); + hbhlen = (hbh->ip6h_len + 1) << 3; + + IP6_EXTHDR_CHECK(m, off, hbhlen, -1); + hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off); +#else + IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, + sizeof(struct ip6_hdr), sizeof(struct ip6_hbh)); + if (hbh == NULL) { + V_ip6stat.ip6s_tooshort++; + return -1; + } + hbhlen = (hbh->ip6h_len + 1) << 3; + IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr), + hbhlen); + if (hbh == NULL) { + V_ip6stat.ip6s_tooshort++; + return -1; + } +#endif + off += hbhlen; + hbhlen -= sizeof(struct ip6_hbh); + opt = (u_int8_t *)hbh + sizeof(struct ip6_hbh); + + if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh), + hbhlen, rtalertp, plenp) < 0) + return (-1); + + *offp = off; + *mp = m; + return (0); +} + +/* + * Search header for all Hop-by-hop options and process each option. + * This function is separate from ip6_hopopts_input() in order to + * handle a case where the sending node itself process its hop-by-hop + * options header. In such a case, the function is called from ip6_output(). + * + * The function assumes that hbh header is located right after the IPv6 header + * (RFC2460 p7), opthead is pointer into data content in m, and opthead to + * opthead + hbhlen is located in continuous memory region. + */ +int +ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen, + u_int32_t *rtalertp, u_int32_t *plenp) +{ + struct ip6_hdr *ip6; + int optlen = 0; + u_int8_t *opt = opthead; + u_int16_t rtalert_val; + u_int32_t jumboplen; + const int erroff = sizeof(struct ip6_hdr) + sizeof(struct ip6_hbh); + + for (; hbhlen > 0; hbhlen -= optlen, opt += optlen) { + switch (*opt) { + case IP6OPT_PAD1: + optlen = 1; + break; + case IP6OPT_PADN: + if (hbhlen < IP6OPT_MINLEN) { + V_ip6stat.ip6s_toosmall++; + goto bad; + } + optlen = *(opt + 1) + 2; + break; + case IP6OPT_ROUTER_ALERT: + /* XXX may need check for alignment */ + if (hbhlen < IP6OPT_RTALERT_LEN) { + V_ip6stat.ip6s_toosmall++; + goto bad; + } + if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) { + /* XXX stat */ + icmp6_error(m, ICMP6_PARAM_PROB, + ICMP6_PARAMPROB_HEADER, + erroff + opt + 1 - opthead); + return (-1); + } + optlen = IP6OPT_RTALERT_LEN; + bcopy((caddr_t)(opt + 2), (caddr_t)&rtalert_val, 2); + *rtalertp = ntohs(rtalert_val); + break; + case IP6OPT_JUMBO: + /* XXX may need check for alignment */ + if (hbhlen < IP6OPT_JUMBO_LEN) { + V_ip6stat.ip6s_toosmall++; + goto bad; + } + if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) { + /* XXX stat */ + icmp6_error(m, ICMP6_PARAM_PROB, + ICMP6_PARAMPROB_HEADER, + erroff + opt + 1 - opthead); + return (-1); + } + optlen = IP6OPT_JUMBO_LEN; + + /* + * IPv6 packets that have non 0 payload length + * must not contain a jumbo payload option. + */ + ip6 = mtod(m, struct ip6_hdr *); + if (ip6->ip6_plen) { + V_ip6stat.ip6s_badoptions++; + icmp6_error(m, ICMP6_PARAM_PROB, + ICMP6_PARAMPROB_HEADER, + erroff + opt - opthead); + return (-1); + } + + /* + * We may see jumbolen in unaligned location, so + * we'd need to perform bcopy(). + */ + bcopy(opt + 2, &jumboplen, sizeof(jumboplen)); + jumboplen = (u_int32_t)htonl(jumboplen); + +#if 1 + /* + * if there are multiple jumbo payload options, + * *plenp will be non-zero and the packet will be + * rejected. + * the behavior may need some debate in ipngwg - + * multiple options does not make sense, however, + * there's no explicit mention in specification. + */ + if (*plenp != 0) { + V_ip6stat.ip6s_badoptions++; + icmp6_error(m, ICMP6_PARAM_PROB, + ICMP6_PARAMPROB_HEADER, + erroff + opt + 2 - opthead); + return (-1); + } +#endif + + /* + * jumbo payload length must be larger than 65535. + */ + if (jumboplen <= IPV6_MAXPACKET) { + V_ip6stat.ip6s_badoptions++; + icmp6_error(m, ICMP6_PARAM_PROB, + ICMP6_PARAMPROB_HEADER, + erroff + opt + 2 - opthead); + return (-1); + } + *plenp = jumboplen; + + break; + default: /* unknown option */ + if (hbhlen < IP6OPT_MINLEN) { + V_ip6stat.ip6s_toosmall++; + goto bad; + } + optlen = ip6_unknown_opt(opt, m, + erroff + opt - opthead); + if (optlen == -1) + return (-1); + optlen += 2; + break; + } + } + + return (0); + + bad: + m_freem(m); + return (-1); +} + +/* + * Unknown option processing. + * The third argument `off' is the offset from the IPv6 header to the option, + * which is necessary if the IPv6 header the and option header and IPv6 header + * is not continuous in order to return an ICMPv6 error. + */ +int +ip6_unknown_opt(u_int8_t *optp, struct mbuf *m, int off) +{ + struct ip6_hdr *ip6; + + switch (IP6OPT_TYPE(*optp)) { + case IP6OPT_TYPE_SKIP: /* ignore the option */ + return ((int)*(optp + 1)); + case IP6OPT_TYPE_DISCARD: /* silently discard */ + m_freem(m); + return (-1); + case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */ + V_ip6stat.ip6s_badoptions++; + icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off); + return (-1); + case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */ + V_ip6stat.ip6s_badoptions++; + ip6 = mtod(m, struct ip6_hdr *); + if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || + (m->m_flags & (M_BCAST|M_MCAST))) + m_freem(m); + else + icmp6_error(m, ICMP6_PARAM_PROB, + ICMP6_PARAMPROB_OPTION, off); + return (-1); + } + + m_freem(m); /* XXX: NOTREACHED */ + return (-1); +} + +/* + * Create the "control" list for this pcb. + * These functions will not modify mbuf chain at all. + * + * With KAME mbuf chain restriction: + * The routine will be called from upper layer handlers like tcp6_input(). + * Thus the routine assumes that the caller (tcp6_input) have already + * called IP6_EXTHDR_CHECK() and all the extension headers are located in the + * very first mbuf on the mbuf chain. + * + * ip6_savecontrol_v4 will handle those options that are possible to be + * set on a v4-mapped socket. + * ip6_savecontrol will directly call ip6_savecontrol_v4 to handle those + * options and handle the v6-only ones itself. + */ +struct mbuf ** +ip6_savecontrol_v4(struct inpcb *inp, struct mbuf *m, struct mbuf **mp, + int *v4only) +{ + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + +#ifdef SO_TIMESTAMP + if ((inp->inp_socket->so_options & SO_TIMESTAMP) != 0) { + struct timeval tv; + + microtime(&tv); + *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv), + SCM_TIMESTAMP, SOL_SOCKET); + if (*mp) + mp = &(*mp)->m_next; + } +#endif + + if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) { + if (v4only != NULL) + *v4only = 1; + return (mp); + } + +#define IS2292(inp, x, y) (((inp)->inp_flags & IN6P_RFC2292) ? (x) : (y)) + /* RFC 2292 sec. 5 */ + if ((inp->inp_flags & IN6P_PKTINFO) != 0) { + struct in6_pktinfo pi6; + + bcopy(&ip6->ip6_dst, &pi6.ipi6_addr, sizeof(struct in6_addr)); + in6_clearscope(&pi6.ipi6_addr); /* XXX */ + pi6.ipi6_ifindex = + (m && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0; + + *mp = sbcreatecontrol((caddr_t) &pi6, + sizeof(struct in6_pktinfo), + IS2292(inp, IPV6_2292PKTINFO, IPV6_PKTINFO), IPPROTO_IPV6); + if (*mp) + mp = &(*mp)->m_next; + } + + if ((inp->inp_flags & IN6P_HOPLIMIT) != 0) { + int hlim = ip6->ip6_hlim & 0xff; + + *mp = sbcreatecontrol((caddr_t) &hlim, sizeof(int), + IS2292(inp, IPV6_2292HOPLIMIT, IPV6_HOPLIMIT), + IPPROTO_IPV6); + if (*mp) + mp = &(*mp)->m_next; + } + + if (v4only != NULL) + *v4only = 0; + return (mp); +} + +void +ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) +{ + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + int v4only = 0; + + mp = ip6_savecontrol_v4(in6p, m, mp, &v4only); + if (v4only) + return; + + if ((in6p->inp_flags & IN6P_TCLASS) != 0) { + u_int32_t flowinfo; + int tclass; + + flowinfo = (u_int32_t)ntohl(ip6->ip6_flow & IPV6_FLOWINFO_MASK); + flowinfo >>= 20; + + tclass = flowinfo & 0xff; + *mp = sbcreatecontrol((caddr_t) &tclass, sizeof(tclass), + IPV6_TCLASS, IPPROTO_IPV6); + if (*mp) + mp = &(*mp)->m_next; + } + + /* + * IPV6_HOPOPTS socket option. Recall that we required super-user + * privilege for the option (see ip6_ctloutput), but it might be too + * strict, since there might be some hop-by-hop options which can be + * returned to normal user. + * See also RFC 2292 section 6 (or RFC 3542 section 8). + */ + if ((in6p->inp_flags & IN6P_HOPOPTS) != 0) { + /* + * Check if a hop-by-hop options header is contatined in the + * received packet, and if so, store the options as ancillary + * data. Note that a hop-by-hop options header must be + * just after the IPv6 header, which is assured through the + * IPv6 input processing. + */ + if (ip6->ip6_nxt == IPPROTO_HOPOPTS) { + struct ip6_hbh *hbh; + int hbhlen = 0; +#ifdef PULLDOWN_TEST + struct mbuf *ext; +#endif + +#ifndef PULLDOWN_TEST + hbh = (struct ip6_hbh *)(ip6 + 1); + hbhlen = (hbh->ip6h_len + 1) << 3; +#else + ext = ip6_pullexthdr(m, sizeof(struct ip6_hdr), + ip6->ip6_nxt); + if (ext == NULL) { + V_ip6stat.ip6s_tooshort++; + return; + } + hbh = mtod(ext, struct ip6_hbh *); + hbhlen = (hbh->ip6h_len + 1) << 3; + if (hbhlen != ext->m_len) { + m_freem(ext); + V_ip6stat.ip6s_tooshort++; + return; + } +#endif + + /* + * XXX: We copy the whole header even if a + * jumbo payload option is included, the option which + * is to be removed before returning according to + * RFC2292. + * Note: this constraint is removed in RFC3542 + */ + *mp = sbcreatecontrol((caddr_t)hbh, hbhlen, + IS2292(in6p, IPV6_2292HOPOPTS, IPV6_HOPOPTS), + IPPROTO_IPV6); + if (*mp) + mp = &(*mp)->m_next; +#ifdef PULLDOWN_TEST + m_freem(ext); +#endif + } + } + + if ((in6p->inp_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) { + int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr); + + /* + * Search for destination options headers or routing + * header(s) through the header chain, and stores each + * header as ancillary data. + * Note that the order of the headers remains in + * the chain of ancillary data. + */ + while (1) { /* is explicit loop prevention necessary? */ + struct ip6_ext *ip6e = NULL; + int elen; +#ifdef PULLDOWN_TEST + struct mbuf *ext = NULL; +#endif + + /* + * if it is not an extension header, don't try to + * pull it from the chain. + */ + switch (nxt) { + case IPPROTO_DSTOPTS: + case IPPROTO_ROUTING: + case IPPROTO_HOPOPTS: + case IPPROTO_AH: /* is it possible? */ + break; + default: + goto loopend; + } + +#ifndef PULLDOWN_TEST + if (off + sizeof(*ip6e) > m->m_len) + goto loopend; + ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off); + if (nxt == IPPROTO_AH) + elen = (ip6e->ip6e_len + 2) << 2; + else + elen = (ip6e->ip6e_len + 1) << 3; + if (off + elen > m->m_len) + goto loopend; +#else + ext = ip6_pullexthdr(m, off, nxt); + if (ext == NULL) { + V_ip6stat.ip6s_tooshort++; + return; + } + ip6e = mtod(ext, struct ip6_ext *); + if (nxt == IPPROTO_AH) + elen = (ip6e->ip6e_len + 2) << 2; + else + elen = (ip6e->ip6e_len + 1) << 3; + if (elen != ext->m_len) { + m_freem(ext); + V_ip6stat.ip6s_tooshort++; + return; + } +#endif + + switch (nxt) { + case IPPROTO_DSTOPTS: + if (!(in6p->inp_flags & IN6P_DSTOPTS)) + break; + + *mp = sbcreatecontrol((caddr_t)ip6e, elen, + IS2292(in6p, + IPV6_2292DSTOPTS, IPV6_DSTOPTS), + IPPROTO_IPV6); + if (*mp) + mp = &(*mp)->m_next; + break; + case IPPROTO_ROUTING: + if (!in6p->inp_flags & IN6P_RTHDR) + break; + + *mp = sbcreatecontrol((caddr_t)ip6e, elen, + IS2292(in6p, IPV6_2292RTHDR, IPV6_RTHDR), + IPPROTO_IPV6); + if (*mp) + mp = &(*mp)->m_next; + break; + case IPPROTO_HOPOPTS: + case IPPROTO_AH: /* is it possible? */ + break; + + default: + /* + * other cases have been filtered in the above. + * none will visit this case. here we supply + * the code just in case (nxt overwritten or + * other cases). + */ +#ifdef PULLDOWN_TEST + m_freem(ext); +#endif + goto loopend; + + } + + /* proceed with the next header. */ + off += elen; + nxt = ip6e->ip6e_nxt; + ip6e = NULL; +#ifdef PULLDOWN_TEST + m_freem(ext); + ext = NULL; +#endif + } + loopend: + ; + } +} +#undef IS2292 + +void +ip6_notify_pmtu(struct inpcb *in6p, struct sockaddr_in6 *dst, u_int32_t *mtu) +{ + struct socket *so; + struct mbuf *m_mtu; + struct ip6_mtuinfo mtuctl; + + so = in6p->inp_socket; + + if (mtu == NULL) + return; + +#ifdef DIAGNOSTIC + if (so == NULL) /* I believe this is impossible */ + panic("ip6_notify_pmtu: socket is NULL"); +#endif + + bzero(&mtuctl, sizeof(mtuctl)); /* zero-clear for safety */ + mtuctl.ip6m_mtu = *mtu; + mtuctl.ip6m_addr = *dst; + if (sa6_recoverscope(&mtuctl.ip6m_addr)) + return; + + if ((m_mtu = sbcreatecontrol((caddr_t)&mtuctl, sizeof(mtuctl), + IPV6_PATHMTU, IPPROTO_IPV6)) == NULL) + return; + + if (sbappendaddr(&so->so_rcv, (struct sockaddr *)dst, NULL, m_mtu) + == 0) { + m_freem(m_mtu); + /* XXX: should count statistics */ + } else + sorwakeup(so); + + return; +} + +#ifdef PULLDOWN_TEST +/* + * pull single extension header from mbuf chain. returns single mbuf that + * contains the result, or NULL on error. + */ +static struct mbuf * +ip6_pullexthdr(struct mbuf *m, size_t off, int nxt) +{ + struct ip6_ext ip6e; + size_t elen; + struct mbuf *n; + +#ifdef DIAGNOSTIC + switch (nxt) { + case IPPROTO_DSTOPTS: + case IPPROTO_ROUTING: + case IPPROTO_HOPOPTS: + case IPPROTO_AH: /* is it possible? */ + break; + default: + printf("ip6_pullexthdr: invalid nxt=%d\n", nxt); + } +#endif + + m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e); + if (nxt == IPPROTO_AH) + elen = (ip6e.ip6e_len + 2) << 2; + else + elen = (ip6e.ip6e_len + 1) << 3; + + MGET(n, M_DONTWAIT, MT_DATA); + if (n && elen >= MLEN) { + MCLGET(n, M_DONTWAIT); + if ((n->m_flags & M_EXT) == 0) { + m_free(n); + n = NULL; + } + } + if (!n) + return NULL; + + n->m_len = 0; + if (elen >= M_TRAILINGSPACE(n)) { + m_free(n); + return NULL; + } + + m_copydata(m, off, elen, mtod(n, caddr_t)); + n->m_len = elen; + return n; +} +#endif + +/* + * Get pointer to the previous header followed by the header + * currently processed. + * XXX: This function supposes that + * M includes all headers, + * the next header field and the header length field of each header + * are valid, and + * the sum of each header length equals to OFF. + * Because of these assumptions, this function must be called very + * carefully. Moreover, it will not be used in the near future when + * we develop `neater' mechanism to process extension headers. + */ +char * +ip6_get_prevhdr(struct mbuf *m, int off) +{ + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + + if (off == sizeof(struct ip6_hdr)) + return (&ip6->ip6_nxt); + else { + int len, nxt; + struct ip6_ext *ip6e = NULL; + + nxt = ip6->ip6_nxt; + len = sizeof(struct ip6_hdr); + while (len < off) { + ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + len); + + switch (nxt) { + case IPPROTO_FRAGMENT: + len += sizeof(struct ip6_frag); + break; + case IPPROTO_AH: + len += (ip6e->ip6e_len + 2) << 2; + break; + default: + len += (ip6e->ip6e_len + 1) << 3; + break; + } + nxt = ip6e->ip6e_nxt; + } + if (ip6e) + return (&ip6e->ip6e_nxt); + else + return NULL; + } +} + +/* + * get next header offset. m will be retained. + */ +int +ip6_nexthdr(struct mbuf *m, int off, int proto, int *nxtp) +{ + struct ip6_hdr ip6; + struct ip6_ext ip6e; + struct ip6_frag fh; + + /* just in case */ + if (m == NULL) + panic("ip6_nexthdr: m == NULL"); + if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len < off) + return -1; + + switch (proto) { + case IPPROTO_IPV6: + if (m->m_pkthdr.len < off + sizeof(ip6)) + return -1; + m_copydata(m, off, sizeof(ip6), (caddr_t)&ip6); + if (nxtp) + *nxtp = ip6.ip6_nxt; + off += sizeof(ip6); + return off; + + case IPPROTO_FRAGMENT: + /* + * terminate parsing if it is not the first fragment, + * it does not make sense to parse through it. + */ + if (m->m_pkthdr.len < off + sizeof(fh)) + return -1; + m_copydata(m, off, sizeof(fh), (caddr_t)&fh); + /* IP6F_OFF_MASK = 0xfff8(BigEndian), 0xf8ff(LittleEndian) */ + if (fh.ip6f_offlg & IP6F_OFF_MASK) + return -1; + if (nxtp) + *nxtp = fh.ip6f_nxt; + off += sizeof(struct ip6_frag); + return off; + + case IPPROTO_AH: + if (m->m_pkthdr.len < off + sizeof(ip6e)) + return -1; + m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e); + if (nxtp) + *nxtp = ip6e.ip6e_nxt; + off += (ip6e.ip6e_len + 2) << 2; + return off; + + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: + if (m->m_pkthdr.len < off + sizeof(ip6e)) + return -1; + m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e); + if (nxtp) + *nxtp = ip6e.ip6e_nxt; + off += (ip6e.ip6e_len + 1) << 3; + return off; + + case IPPROTO_NONE: + case IPPROTO_ESP: + case IPPROTO_IPCOMP: + /* give up */ + return -1; + + default: + return -1; + } + + return -1; +} + +/* + * get offset for the last header in the chain. m will be kept untainted. + */ +int +ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp) +{ + int newoff; + int nxt; + + if (!nxtp) { + nxt = -1; + nxtp = &nxt; + } + while (1) { + newoff = ip6_nexthdr(m, off, proto, nxtp); + if (newoff < 0) + return off; + else if (newoff < off) + return -1; /* invalid */ + else if (newoff == off) + return newoff; + + off = newoff; + proto = *nxtp; + } +} + +struct ip6aux * +ip6_addaux(struct mbuf *m) +{ + struct m_tag *mtag; + + mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL); + if (!mtag) { + mtag = m_tag_get(PACKET_TAG_IPV6_INPUT, sizeof(struct ip6aux), + M_NOWAIT); + if (mtag) { + m_tag_prepend(m, mtag); + bzero(mtag + 1, sizeof(struct ip6aux)); + } + } + return mtag ? (struct ip6aux *)(mtag + 1) : NULL; +} + +struct ip6aux * +ip6_findaux(struct mbuf *m) +{ + struct m_tag *mtag; + + mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL); + return mtag ? (struct ip6aux *)(mtag + 1) : NULL; +} + +void +ip6_delaux(struct mbuf *m) +{ + struct m_tag *mtag; + + mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL); + if (mtag) + m_tag_delete(m, mtag); +} + +/* + * System control for IP6 + */ + +u_char inet6ctlerrmap[PRC_NCMDS] = { + 0, 0, 0, 0, + 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, + EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, + EMSGSIZE, EHOSTUNREACH, 0, 0, + 0, 0, 0, 0, + ENOPROTOOPT +}; diff --git a/freebsd/sys/netinet6/ip6_ipsec.c b/freebsd/sys/netinet6/ip6_ipsec.c new file mode 100644 index 00000000..a50c22c5 --- /dev/null +++ b/freebsd/sys/netinet6/ip6_ipsec.c @@ -0,0 +1,386 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/local/opt_inet6.h> +#include <freebsd/local/opt_ipsec.h> + +#include <freebsd/sys/param.h> +#include <freebsd/sys/systm.h> +#include <freebsd/sys/kernel.h> +#include <freebsd/sys/mac.h> +#include <freebsd/sys/malloc.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/protosw.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/socketvar.h> +#include <freebsd/sys/sysctl.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/route.h> +#include <freebsd/net/vnet.h> + +#include <freebsd/netinet/in.h> +#include <freebsd/netinet/in_systm.h> +#include <freebsd/netinet/in_var.h> +#include <freebsd/netinet/ip.h> +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet/in_pcb.h> +#include <freebsd/netinet/ip_var.h> +#include <freebsd/netinet/ip_options.h> + +#include <freebsd/machine/in_cksum.h> + +#ifdef IPSEC +#include <freebsd/netipsec/ipsec.h> +#include <freebsd/netipsec/ipsec6.h> +#include <freebsd/netipsec/xform.h> +#include <freebsd/netipsec/key.h> +#ifdef IPSEC_DEBUG +#include <freebsd/netipsec/key_debug.h> +#else +#define KEYDEBUG(lev,arg) +#endif +#endif /*IPSEC*/ + +#include <freebsd/netinet6/ip6_ipsec.h> +#include <freebsd/netinet6/ip6_var.h> + +extern struct protosw inet6sw[]; + + +#ifdef INET6 +#ifdef IPSEC +#ifdef IPSEC_FILTERTUNNEL +static VNET_DEFINE(int, ip6_ipsec6_filtertunnel) = 1; +#else +static VNET_DEFINE(int, ip6_ipsec6_filtertunnel) = 0; +#endif +#define V_ip6_ipsec6_filtertunnel VNET(ip6_ipsec6_filtertunnel) + +SYSCTL_DECL(_net_inet6_ipsec6); +SYSCTL_VNET_INT(_net_inet6_ipsec6, OID_AUTO, + filtertunnel, CTLFLAG_RW, &VNET_NAME(ip6_ipsec6_filtertunnel), 0, + "If set filter packets from an IPsec tunnel."); +#endif /* IPSEC */ +#endif /* INET6 */ + +/* + * Check if we have to jump over firewall processing for this packet. + * Called from ip_input(). + * 1 = jump over firewall, 0 = packet goes through firewall. + */ +int +ip6_ipsec_filtertunnel(struct mbuf *m) +{ +#if defined(IPSEC) + + /* + * Bypass packet filtering for packets from a tunnel. + */ + if (!V_ip6_ipsec6_filtertunnel && + m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) + return 1; +#endif + return 0; +} + +/* + * Check if this packet has an active SA and needs to be dropped instead + * of forwarded. + * Called from ip_input(). + * 1 = drop packet, 0 = forward packet. + */ +int +ip6_ipsec_fwd(struct mbuf *m) +{ +#ifdef IPSEC + struct m_tag *mtag; + struct tdb_ident *tdbi; + struct secpolicy *sp; + int s, error; + mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); + s = splnet(); + if (mtag != NULL) { + tdbi = (struct tdb_ident *)(mtag + 1); + sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); + } else { + sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, + IP_FORWARDING, &error); + } + if (sp == NULL) { /* NB: can happen if error */ + splx(s); + /*XXX error stat???*/ + DPRINTF(("ip_input: no SP for forwarding\n")); /*XXX*/ + return 1; + } + + /* + * Check security policy against packet attributes. + */ + error = ipsec_in_reject(sp, m); + KEY_FREESP(&sp); + splx(s); + if (error) { + V_ip6stat.ip6s_cantforward++; + return 1; + } +#endif /* IPSEC */ + return 0; +} + +/* + * Check if protocol type doesn't have a further header and do IPSEC + * decryption or reject right now. Protocols with further headers get + * their IPSEC treatment within the protocol specific processing. + * Called from ip_input(). + * 1 = drop packet, 0 = continue processing packet. + */ +int +ip6_ipsec_input(struct mbuf *m, int nxt) +{ +#ifdef IPSEC + struct m_tag *mtag; + struct tdb_ident *tdbi; + struct secpolicy *sp; + int s, error; + /* + * enforce IPsec policy checking if we are seeing last header. + * note that we do not visit this with protocols with pcb layer + * code - like udp/tcp/raw ip. + */ + if ((inet6sw[ip6_protox[nxt]].pr_flags & PR_LASTHDR) != 0 && + ipsec6_in_reject(m, NULL)) { + + /* + * Check if the packet has already had IPsec processing + * done. If so, then just pass it along. This tag gets + * set during AH, ESP, etc. input handling, before the + * packet is returned to the ip input queue for delivery. + */ + mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); + s = splnet(); + if (mtag != NULL) { + tdbi = (struct tdb_ident *)(mtag + 1); + sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); + } else { + sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, + IP_FORWARDING, &error); + } + if (sp != NULL) { + /* + * Check security policy against packet attributes. + */ + error = ipsec_in_reject(sp, m); + KEY_FREESP(&sp); + } else { + /* XXX error stat??? */ + error = EINVAL; + DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/ + return 1; + } + splx(s); + if (error) + return 1; + } +#endif /* IPSEC */ + return 0; +} + +/* + * Called from ip6_output(). + * 1 = drop packet, 0 = continue processing packet, + * -1 = packet was reinjected and stop processing packet + */ + +int +ip6_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error, + struct ifnet **ifp, struct secpolicy **sp) +{ +#ifdef IPSEC + struct tdb_ident *tdbi; + struct m_tag *mtag; + /* XXX int s; */ + if (sp == NULL) + return 1; + mtag = m_tag_find(*m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); + if (mtag != NULL) { + tdbi = (struct tdb_ident *)(mtag + 1); + *sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND); + if (*sp == NULL) + *error = -EINVAL; /* force silent drop */ + m_tag_delete(*m, mtag); + } else { + *sp = ipsec4_checkpolicy(*m, IPSEC_DIR_OUTBOUND, *flags, + error, inp); + } + + /* + * There are four return cases: + * sp != NULL apply IPsec policy + * sp == NULL, error == 0 no IPsec handling needed + * sp == NULL, error == -EINVAL discard packet w/o error + * sp == NULL, error != 0 discard packet, report error + */ + if (*sp != NULL) { + /* Loop detection, check if ipsec processing already done */ + KASSERT((*sp)->req != NULL, ("ip_output: no ipsec request")); + for (mtag = m_tag_first(*m); mtag != NULL; + mtag = m_tag_next(*m, mtag)) { + if (mtag->m_tag_cookie != MTAG_ABI_COMPAT) + continue; + if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && + mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) + continue; + /* + * Check if policy has an SA associated with it. + * This can happen when an SP has yet to acquire + * an SA; e.g. on first reference. If it occurs, + * then we let ipsec4_process_packet do its thing. + */ + if ((*sp)->req->sav == NULL) + break; + tdbi = (struct tdb_ident *)(mtag + 1); + if (tdbi->spi == (*sp)->req->sav->spi && + tdbi->proto == (*sp)->req->sav->sah->saidx.proto && + bcmp(&tdbi->dst, &(*sp)->req->sav->sah->saidx.dst, + sizeof (union sockaddr_union)) == 0) { + /* + * No IPsec processing is needed, free + * reference to SP. + * + * NB: null pointer to avoid free at + * done: below. + */ + KEY_FREESP(sp), *sp = NULL; + /* XXX splx(s); */ + goto done; + } + } + + /* + * Do delayed checksums now because we send before + * this is done in the normal processing path. + */ + if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + in_delayed_cksum(*m); + (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } + + /* + * Preserve KAME behaviour: ENOENT can be returned + * when an SA acquire is in progress. Don't propagate + * this to user-level; it confuses applications. + * + * XXX this will go away when the SADB is redone. + */ + if (*error == ENOENT) + *error = 0; + goto do_ipsec; + } else { /* sp == NULL */ + if (*error != 0) { + /* + * Hack: -EINVAL is used to signal that a packet + * should be silently discarded. This is typically + * because we asked key management for an SA and + * it was delayed (e.g. kicked up to IKE). + */ + if (*error == -EINVAL) + *error = 0; + goto bad; + } else { + /* No IPsec processing for this packet. */ + } + } +done: + return 0; +do_ipsec: + return -1; +bad: + return 1; +#endif /* IPSEC */ + return 0; +} + +#if 0 +/* + * Compute the MTU for a forwarded packet that gets IPSEC encapsulated. + * Called from ip_forward(). + * Returns MTU suggestion for ICMP needfrag reply. + */ +int +ip6_ipsec_mtu(struct mbuf *m) +{ + int mtu = 0; + /* + * If the packet is routed over IPsec tunnel, tell the + * originator the tunnel MTU. + * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz + * XXX quickhack!!! + */ +#ifdef IPSEC + struct secpolicy *sp = NULL; + int ipsecerror; + int ipsechdr; + struct route *ro; + sp = ipsec_getpolicybyaddr(m, + IPSEC_DIR_OUTBOUND, + IP_FORWARDING, + &ipsecerror); + if (sp != NULL) { + /* count IPsec header size */ + ipsechdr = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, NULL); + + /* + * find the correct route for outer IPv4 + * header, compute tunnel MTU. + */ + if (sp->req != NULL && + sp->req->sav != NULL && + sp->req->sav->sah != NULL) { + ro = &sp->req->sav->sah->route_cache.sa_route; + if (ro->ro_rt && ro->ro_rt->rt_ifp) { + mtu = + ro->ro_rt->rt_rmx.rmx_mtu ? + ro->ro_rt->rt_rmx.rmx_mtu : + ro->ro_rt->rt_ifp->if_mtu; + mtu -= ipsechdr; + } + } + KEY_FREESP(&sp); + } +#endif /* IPSEC */ + /* XXX else case missing. */ + return mtu; +} +#endif diff --git a/freebsd/sys/netinet6/ip6_ipsec.h b/freebsd/sys/netinet6/ip6_ipsec.h new file mode 100644 index 00000000..e3049534 --- /dev/null +++ b/freebsd/sys/netinet6/ip6_ipsec.h @@ -0,0 +1,43 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NETINET_IP6_IPSEC_HH_ +#define _NETINET_IP6_IPSEC_HH_ + +int ip6_ipsec_filtertunnel(struct mbuf *); +int ip6_ipsec_fwd(struct mbuf *); +int ip6_ipsec_input(struct mbuf *, int); +int ip6_ipsec_output(struct mbuf **, struct inpcb *, int *, int *, + struct ifnet **, struct secpolicy **sp); +#if 0 +int ip6_ipsec_mtu(struct mbuf *); +#endif +#endif diff --git a/freebsd/sys/netinet6/ip6_mroute.c b/freebsd/sys/netinet6/ip6_mroute.c new file mode 100644 index 00000000..661cd1c0 --- /dev/null +++ b/freebsd/sys/netinet6/ip6_mroute.c @@ -0,0 +1,2065 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: ip6_mroute.c,v 1.58 2001/12/18 02:36:31 itojun Exp $ + */ + +/*- + * Copyright (c) 1989 Stephen Deering + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Stephen Deering of Stanford University. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93 + * BSDI ip_mroute.c,v 2.10 1996/11/14 00:29:52 jch Exp + */ + +/* + * IP multicast forwarding procedures + * + * Written by David Waitzman, BBN Labs, August 1988. + * Modified by Steve Deering, Stanford, February 1989. + * Modified by Mark J. Steiglitz, Stanford, May, 1991 + * Modified by Van Jacobson, LBL, January 1993 + * Modified by Ajit Thyagarajan, PARC, August 1993 + * Modified by Bill Fenner, PARC, April 1994 + * + * MROUTING Revision: 3.5.1.2 + PIM-SMv2 (pimd) Support + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/local/opt_inet.h> +#include <freebsd/local/opt_inet6.h> + +#include <freebsd/sys/param.h> +#include <freebsd/sys/callout.h> +#include <freebsd/sys/errno.h> +#include <freebsd/sys/kernel.h> +#include <freebsd/sys/lock.h> +#include <freebsd/sys/malloc.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/module.h> +#include <freebsd/sys/domain.h> +#include <freebsd/sys/protosw.h> +#include <freebsd/sys/signalvar.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/socketvar.h> +#include <freebsd/sys/sockio.h> +#include <freebsd/sys/sx.h> +#include <freebsd/sys/sysctl.h> +#include <freebsd/sys/syslog.h> +#include <freebsd/sys/systm.h> +#include <freebsd/sys/time.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/if_types.h> +#include <freebsd/net/raw_cb.h> +#include <freebsd/net/vnet.h> + +#include <freebsd/netinet/in.h> +#include <freebsd/netinet/in_var.h> +#include <freebsd/netinet/icmp6.h> +#include <freebsd/netinet/ip_encap.h> + +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet6/scope6_var.h> +#include <freebsd/netinet6/nd6.h> +#include <freebsd/netinet6/ip6_mroute.h> +#include <freebsd/netinet6/ip6protosw.h> +#include <freebsd/netinet6/pim6.h> +#include <freebsd/netinet6/pim6_var.h> + +static MALLOC_DEFINE(M_MRTABLE6, "mf6c", "multicast forwarding cache entry"); + +/* XXX: this is a very common idiom; move to <sys/mbuf.h> ? */ +#define M_HASCL(m) ((m)->m_flags & M_EXT) + +static int ip6_mdq(struct mbuf *, struct ifnet *, struct mf6c *); +static void phyint_send(struct ip6_hdr *, struct mif6 *, struct mbuf *); +static int register_send(struct ip6_hdr *, struct mif6 *, struct mbuf *); +static int set_pim6(int *); +static int socket_send(struct socket *, struct mbuf *, + struct sockaddr_in6 *); + +extern int in6_mcast_loop; +extern struct domain inet6domain; + +static const struct encaptab *pim6_encap_cookie; +static const struct ip6protosw in6_pim_protosw = { + .pr_type = SOCK_RAW, + .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_PIM, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = pim6_input, + .pr_output = rip6_output, + .pr_ctloutput = rip6_ctloutput, + .pr_usrreqs = &rip6_usrreqs +}; +static int pim6_encapcheck(const struct mbuf *, int, int, void *); + +static VNET_DEFINE(int, ip6_mrouter_ver) = 0; +#define V_ip6_mrouter_ver VNET(ip6_mrouter_ver) + +SYSCTL_DECL(_net_inet6); +SYSCTL_DECL(_net_inet6_ip6); +SYSCTL_NODE(_net_inet6, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM"); + +static struct mrt6stat mrt6stat; +SYSCTL_STRUCT(_net_inet6_ip6, OID_AUTO, mrt6stat, CTLFLAG_RW, + &mrt6stat, mrt6stat, + "Multicast Routing Statistics (struct mrt6stat, netinet6/ip6_mroute.h)"); + +#define NO_RTE_FOUND 0x1 +#define RTE_FOUND 0x2 + +static struct mtx mrouter6_mtx; +#define MROUTER6_LOCK() mtx_lock(&mrouter6_mtx) +#define MROUTER6_UNLOCK() mtx_unlock(&mrouter6_mtx) +#define MROUTER6_LOCK_ASSERT() do { \ + mtx_assert(&mrouter6_mtx, MA_OWNED); \ + NET_ASSERT_GIANT(); \ +} while (0) +#define MROUTER6_LOCK_INIT() \ + mtx_init(&mrouter6_mtx, "IPv6 multicast forwarding", NULL, MTX_DEF) +#define MROUTER6_LOCK_DESTROY() mtx_destroy(&mrouter6_mtx) + +static struct mf6c *mf6ctable[MF6CTBLSIZ]; +SYSCTL_OPAQUE(_net_inet6_ip6, OID_AUTO, mf6ctable, CTLFLAG_RD, + &mf6ctable, sizeof(mf6ctable), "S,*mf6ctable[MF6CTBLSIZ]", + "IPv6 Multicast Forwarding Table (struct *mf6ctable[MF6CTBLSIZ], " + "netinet6/ip6_mroute.h)"); + +static struct mtx mfc6_mtx; +#define MFC6_LOCK() mtx_lock(&mfc6_mtx) +#define MFC6_UNLOCK() mtx_unlock(&mfc6_mtx) +#define MFC6_LOCK_ASSERT() do { \ + mtx_assert(&mfc6_mtx, MA_OWNED); \ + NET_ASSERT_GIANT(); \ +} while (0) +#define MFC6_LOCK_INIT() \ + mtx_init(&mfc6_mtx, "IPv6 multicast forwarding cache", NULL, MTX_DEF) +#define MFC6_LOCK_DESTROY() mtx_destroy(&mfc6_mtx) + +static u_char n6expire[MF6CTBLSIZ]; + +static struct mif6 mif6table[MAXMIFS]; +SYSCTL_OPAQUE(_net_inet6_ip6, OID_AUTO, mif6table, CTLFLAG_RD, + &mif6table, sizeof(mif6table), "S,mif6[MAXMIFS]", + "IPv6 Multicast Interfaces (struct mif6[MAXMIFS], netinet6/ip6_mroute.h)"); + +static struct mtx mif6_mtx; +#define MIF6_LOCK() mtx_lock(&mif6_mtx) +#define MIF6_UNLOCK() mtx_unlock(&mif6_mtx) +#define MIF6_LOCK_ASSERT() mtx_assert(&mif6_mtx, MA_OWNED) +#define MIF6_LOCK_INIT() \ + mtx_init(&mif6_mtx, "IPv6 multicast interfaces", NULL, MTX_DEF) +#define MIF6_LOCK_DESTROY() mtx_destroy(&mif6_mtx) + +#ifdef MRT6DEBUG +static VNET_DEFINE(u_int, mrt6debug) = 0; /* debug level */ +#define V_mrt6debug VNET(mrt6debug) +#define DEBUG_MFC 0x02 +#define DEBUG_FORWARD 0x04 +#define DEBUG_EXPIRE 0x08 +#define DEBUG_XMIT 0x10 +#define DEBUG_REG 0x20 +#define DEBUG_PIM 0x40 +#endif + +static void expire_upcalls(void *); +#define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ +#define UPCALL_EXPIRE 6 /* number of timeouts */ + +/* + * XXX TODO: maintain a count to if_allmulti() calls in struct ifnet. + */ + +/* + * 'Interfaces' associated with decapsulator (so we can tell + * packets that went through it from ones that get reflected + * by a broken gateway). Different from IPv4 register_if, + * these interfaces are linked into the system ifnet list, + * because per-interface IPv6 statistics are maintained in + * ifp->if_afdata. But it does not have any routes point + * to them. I.e., packets can't be sent this way. They + * only exist as a placeholder for multicast source + * verification. + */ +static struct ifnet *multicast_register_if6; + +#define ENCAP_HOPS 64 + +/* + * Private variables. + */ +static mifi_t nummifs = 0; +static mifi_t reg_mif_num = (mifi_t)-1; + +static struct pim6stat pim6stat; +SYSCTL_STRUCT(_net_inet6_pim, PIM6CTL_STATS, stats, CTLFLAG_RD, + &pim6stat, pim6stat, + "PIM Statistics (struct pim6stat, netinet6/pim_var.h)"); + +static VNET_DEFINE(int, pim6); +#define V_pim6 VNET(pim6) + +/* + * Hash function for a source, group entry + */ +#define MF6CHASH(a, g) MF6CHASHMOD((a).s6_addr32[0] ^ (a).s6_addr32[1] ^ \ + (a).s6_addr32[2] ^ (a).s6_addr32[3] ^ \ + (g).s6_addr32[0] ^ (g).s6_addr32[1] ^ \ + (g).s6_addr32[2] ^ (g).s6_addr32[3]) + +/* + * Find a route for a given origin IPv6 address and Multicast group address. + */ +#define MF6CFIND(o, g, rt) do { \ + struct mf6c *_rt = mf6ctable[MF6CHASH(o,g)]; \ + rt = NULL; \ + mrt6stat.mrt6s_mfc_lookups++; \ + while (_rt) { \ + if (IN6_ARE_ADDR_EQUAL(&_rt->mf6c_origin.sin6_addr, &(o)) && \ + IN6_ARE_ADDR_EQUAL(&_rt->mf6c_mcastgrp.sin6_addr, &(g)) && \ + (_rt->mf6c_stall == NULL)) { \ + rt = _rt; \ + break; \ + } \ + _rt = _rt->mf6c_next; \ + } \ + if (rt == NULL) { \ + mrt6stat.mrt6s_mfc_misses++; \ + } \ +} while (/*CONSTCOND*/ 0) + +/* + * Macros to compute elapsed time efficiently + * Borrowed from Van Jacobson's scheduling code + * XXX: replace with timersub() ? + */ +#define TV_DELTA(a, b, delta) do { \ + int xxs; \ + \ + delta = (a).tv_usec - (b).tv_usec; \ + if ((xxs = (a).tv_sec - (b).tv_sec)) { \ + switch (xxs) { \ + case 2: \ + delta += 1000000; \ + /* FALLTHROUGH */ \ + case 1: \ + delta += 1000000; \ + break; \ + default: \ + delta += (1000000 * xxs); \ + } \ + } \ +} while (/*CONSTCOND*/ 0) + +/* XXX: replace with timercmp(a, b, <) ? */ +#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ + (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) + +#ifdef UPCALL_TIMING +#define UPCALL_MAX 50 +static u_long upcall_data[UPCALL_MAX + 1]; +static void collate(); +#endif /* UPCALL_TIMING */ + +static int ip6_mrouter_init(struct socket *, int, int); +static int add_m6fc(struct mf6cctl *); +static int add_m6if(struct mif6ctl *); +static int del_m6fc(struct mf6cctl *); +static int del_m6if(mifi_t *); +static int del_m6if_locked(mifi_t *); +static int get_mif6_cnt(struct sioc_mif_req6 *); +static int get_sg_cnt(struct sioc_sg_req6 *); + +static struct callout expire_upcalls_ch; + +int X_ip6_mforward(struct ip6_hdr *, struct ifnet *, struct mbuf *); +int X_ip6_mrouter_done(void); +int X_ip6_mrouter_set(struct socket *, struct sockopt *); +int X_ip6_mrouter_get(struct socket *, struct sockopt *); +int X_mrt6_ioctl(u_long, caddr_t); + +/* + * Handle MRT setsockopt commands to modify the multicast routing tables. + */ +int +X_ip6_mrouter_set(struct socket *so, struct sockopt *sopt) +{ + int error = 0; + int optval; + struct mif6ctl mifc; + struct mf6cctl mfcc; + mifi_t mifi; + + if (so != V_ip6_mrouter && sopt->sopt_name != MRT6_INIT) + return (EACCES); + + switch (sopt->sopt_name) { + case MRT6_INIT: +#ifdef MRT6_OINIT + case MRT6_OINIT: +#endif + error = sooptcopyin(sopt, &optval, sizeof(optval), + sizeof(optval)); + if (error) + break; + error = ip6_mrouter_init(so, optval, sopt->sopt_name); + break; + case MRT6_DONE: + error = X_ip6_mrouter_done(); + break; + case MRT6_ADD_MIF: + error = sooptcopyin(sopt, &mifc, sizeof(mifc), sizeof(mifc)); + if (error) + break; + error = add_m6if(&mifc); + break; + case MRT6_ADD_MFC: + error = sooptcopyin(sopt, &mfcc, sizeof(mfcc), sizeof(mfcc)); + if (error) + break; + error = add_m6fc(&mfcc); + break; + case MRT6_DEL_MFC: + error = sooptcopyin(sopt, &mfcc, sizeof(mfcc), sizeof(mfcc)); + if (error) + break; + error = del_m6fc(&mfcc); + break; + case MRT6_DEL_MIF: + error = sooptcopyin(sopt, &mifi, sizeof(mifi), sizeof(mifi)); + if (error) + break; + error = del_m6if(&mifi); + break; + case MRT6_PIM: + error = sooptcopyin(sopt, &optval, sizeof(optval), + sizeof(optval)); + if (error) + break; + error = set_pim6(&optval); + break; + default: + error = EOPNOTSUPP; + break; + } + + return (error); +} + +/* + * Handle MRT getsockopt commands + */ +int +X_ip6_mrouter_get(struct socket *so, struct sockopt *sopt) +{ + int error = 0; + + if (so != V_ip6_mrouter) + return (EACCES); + + switch (sopt->sopt_name) { + case MRT6_PIM: + error = sooptcopyout(sopt, &V_pim6, sizeof(V_pim6)); + break; + } + return (error); +} + +/* + * Handle ioctl commands to obtain information from the cache + */ +int +X_mrt6_ioctl(u_long cmd, caddr_t data) +{ + int ret; + + ret = EINVAL; + + switch (cmd) { + case SIOCGETSGCNT_IN6: + ret = get_sg_cnt((struct sioc_sg_req6 *)data); + break; + + case SIOCGETMIFCNT_IN6: + ret = get_mif6_cnt((struct sioc_mif_req6 *)data); + break; + + default: + break; + } + + return (ret); +} + +/* + * returns the packet, byte, rpf-failure count for the source group provided + */ +static int +get_sg_cnt(struct sioc_sg_req6 *req) +{ + struct mf6c *rt; + int ret; + + ret = 0; + + MFC6_LOCK(); + + MF6CFIND(req->src.sin6_addr, req->grp.sin6_addr, rt); + if (rt == NULL) { + ret = ESRCH; + } else { + req->pktcnt = rt->mf6c_pkt_cnt; + req->bytecnt = rt->mf6c_byte_cnt; + req->wrong_if = rt->mf6c_wrong_if; + } + + MFC6_UNLOCK(); + + return (ret); +} + +/* + * returns the input and output packet and byte counts on the mif provided + */ +static int +get_mif6_cnt(struct sioc_mif_req6 *req) +{ + mifi_t mifi; + int ret; + + ret = 0; + mifi = req->mifi; + + MIF6_LOCK(); + + if (mifi >= nummifs) { + ret = EINVAL; + } else { + req->icount = mif6table[mifi].m6_pkt_in; + req->ocount = mif6table[mifi].m6_pkt_out; + req->ibytes = mif6table[mifi].m6_bytes_in; + req->obytes = mif6table[mifi].m6_bytes_out; + } + + MIF6_UNLOCK(); + + return (ret); +} + +static int +set_pim6(int *i) +{ + if ((*i != 1) && (*i != 0)) + return (EINVAL); + + V_pim6 = *i; + + return (0); +} + +/* + * Enable multicast routing + */ +static int +ip6_mrouter_init(struct socket *so, int v, int cmd) +{ + +#ifdef MRT6DEBUG + if (V_mrt6debug) + log(LOG_DEBUG, + "ip6_mrouter_init: so_type = %d, pr_protocol = %d\n", + so->so_type, so->so_proto->pr_protocol); +#endif + + if (so->so_type != SOCK_RAW || + so->so_proto->pr_protocol != IPPROTO_ICMPV6) + return (EOPNOTSUPP); + + if (v != 1) + return (ENOPROTOOPT); + + MROUTER6_LOCK(); + + if (V_ip6_mrouter != NULL) { + MROUTER6_UNLOCK(); + return (EADDRINUSE); + } + + V_ip6_mrouter = so; + V_ip6_mrouter_ver = cmd; + + bzero((caddr_t)mf6ctable, sizeof(mf6ctable)); + bzero((caddr_t)n6expire, sizeof(n6expire)); + + V_pim6 = 0;/* used for stubbing out/in pim stuff */ + + callout_init(&expire_upcalls_ch, 0); + callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, + expire_upcalls, NULL); + + MROUTER6_UNLOCK(); + +#ifdef MRT6DEBUG + if (V_mrt6debug) + log(LOG_DEBUG, "ip6_mrouter_init\n"); +#endif + + return (0); +} + +/* + * Disable IPv6 multicast forwarding. + */ +int +X_ip6_mrouter_done(void) +{ + mifi_t mifi; + int i; + struct mf6c *rt; + struct rtdetq *rte; + + MROUTER6_LOCK(); + + if (V_ip6_mrouter == NULL) { + MROUTER6_UNLOCK(); + return (EINVAL); + } + + /* + * For each phyint in use, disable promiscuous reception of all IPv6 + * multicasts. + */ + for (mifi = 0; mifi < nummifs; mifi++) { + if (mif6table[mifi].m6_ifp && + !(mif6table[mifi].m6_flags & MIFF_REGISTER)) { + if_allmulti(mif6table[mifi].m6_ifp, 0); + } + } + bzero((caddr_t)mif6table, sizeof(mif6table)); + nummifs = 0; + + V_pim6 = 0; /* used to stub out/in pim specific code */ + + callout_stop(&expire_upcalls_ch); + + /* + * Free all multicast forwarding cache entries. + */ + MFC6_LOCK(); + for (i = 0; i < MF6CTBLSIZ; i++) { + rt = mf6ctable[i]; + while (rt) { + struct mf6c *frt; + + for (rte = rt->mf6c_stall; rte != NULL; ) { + struct rtdetq *n = rte->next; + + m_free(rte->m); + free(rte, M_MRTABLE6); + rte = n; + } + frt = rt; + rt = rt->mf6c_next; + free(frt, M_MRTABLE6); + } + } + bzero((caddr_t)mf6ctable, sizeof(mf6ctable)); + MFC6_UNLOCK(); + + /* + * Reset register interface + */ + if (reg_mif_num != (mifi_t)-1 && multicast_register_if6 != NULL) { + if_detach(multicast_register_if6); + if_free(multicast_register_if6); + reg_mif_num = (mifi_t)-1; + multicast_register_if6 = NULL; + } + + V_ip6_mrouter = NULL; + V_ip6_mrouter_ver = 0; + + MROUTER6_UNLOCK(); + +#ifdef MRT6DEBUG + if (V_mrt6debug) + log(LOG_DEBUG, "ip6_mrouter_done\n"); +#endif + + return (0); +} + +static struct sockaddr_in6 sin6 = { sizeof(sin6), AF_INET6 }; + +/* + * Add a mif to the mif table + */ +static int +add_m6if(struct mif6ctl *mifcp) +{ + struct mif6 *mifp; + struct ifnet *ifp; + int error; + + MIF6_LOCK(); + + if (mifcp->mif6c_mifi >= MAXMIFS) { + MIF6_UNLOCK(); + return (EINVAL); + } + mifp = mif6table + mifcp->mif6c_mifi; + if (mifp->m6_ifp != NULL) { + MIF6_UNLOCK(); + return (EADDRINUSE); /* XXX: is it appropriate? */ + } + if (mifcp->mif6c_pifi == 0 || mifcp->mif6c_pifi > V_if_index) { + MIF6_UNLOCK(); + return (ENXIO); + } + + ifp = ifnet_byindex(mifcp->mif6c_pifi); + + if (mifcp->mif6c_flags & MIFF_REGISTER) { + if (reg_mif_num == (mifi_t)-1) { + ifp = if_alloc(IFT_OTHER); + + if_initname(ifp, "register_mif", 0); + ifp->if_flags |= IFF_LOOPBACK; + if_attach(ifp); + multicast_register_if6 = ifp; + reg_mif_num = mifcp->mif6c_mifi; + /* + * it is impossible to guess the ifindex of the + * register interface. So mif6c_pifi is automatically + * calculated. + */ + mifcp->mif6c_pifi = ifp->if_index; + } else { + ifp = multicast_register_if6; + } + } else { + /* Make sure the interface supports multicast */ + if ((ifp->if_flags & IFF_MULTICAST) == 0) { + MIF6_UNLOCK(); + return (EOPNOTSUPP); + } + + error = if_allmulti(ifp, 1); + if (error) { + MIF6_UNLOCK(); + return (error); + } + } + + mifp->m6_flags = mifcp->mif6c_flags; + mifp->m6_ifp = ifp; + + /* initialize per mif pkt counters */ + mifp->m6_pkt_in = 0; + mifp->m6_pkt_out = 0; + mifp->m6_bytes_in = 0; + mifp->m6_bytes_out = 0; + bzero(&mifp->m6_route, sizeof(mifp->m6_route)); + + /* Adjust nummifs up if the mifi is higher than nummifs */ + if (nummifs <= mifcp->mif6c_mifi) + nummifs = mifcp->mif6c_mifi + 1; + + MIF6_UNLOCK(); + +#ifdef MRT6DEBUG + if (V_mrt6debug) + log(LOG_DEBUG, + "add_mif #%d, phyint %s\n", + mifcp->mif6c_mifi, + ifp->if_xname); +#endif + + return (0); +} + +/* + * Delete a mif from the mif table + */ +static int +del_m6if_locked(mifi_t *mifip) +{ + struct mif6 *mifp = mif6table + *mifip; + mifi_t mifi; + struct ifnet *ifp; + + MIF6_LOCK_ASSERT(); + + if (*mifip >= nummifs) + return (EINVAL); + if (mifp->m6_ifp == NULL) + return (EINVAL); + + if (!(mifp->m6_flags & MIFF_REGISTER)) { + /* XXX: TODO: Maintain an ALLMULTI refcount in struct ifnet. */ + ifp = mifp->m6_ifp; + if_allmulti(ifp, 0); + } else { + if (reg_mif_num != (mifi_t)-1 && + multicast_register_if6 != NULL) { + if_detach(multicast_register_if6); + if_free(multicast_register_if6); + reg_mif_num = (mifi_t)-1; + multicast_register_if6 = NULL; + } + } + + bzero((caddr_t)mifp, sizeof(*mifp)); + + /* Adjust nummifs down */ + for (mifi = nummifs; mifi > 0; mifi--) + if (mif6table[mifi - 1].m6_ifp) + break; + nummifs = mifi; + +#ifdef MRT6DEBUG + if (V_mrt6debug) + log(LOG_DEBUG, "del_m6if %d, nummifs %d\n", *mifip, nummifs); +#endif + + return (0); +} + +static int +del_m6if(mifi_t *mifip) +{ + int cc; + + MIF6_LOCK(); + cc = del_m6if_locked(mifip); + MIF6_UNLOCK(); + + return (cc); +} + +/* + * Add an mfc entry + */ +static int +add_m6fc(struct mf6cctl *mfccp) +{ + struct mf6c *rt; + u_long hash; + struct rtdetq *rte; + u_short nstl; + char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN]; + + MFC6_LOCK(); + + MF6CFIND(mfccp->mf6cc_origin.sin6_addr, + mfccp->mf6cc_mcastgrp.sin6_addr, rt); + + /* If an entry already exists, just update the fields */ + if (rt) { +#ifdef MRT6DEBUG + if (V_mrt6debug & DEBUG_MFC) { + log(LOG_DEBUG, + "add_m6fc no upcall h %d o %s g %s p %x\n", + ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr), + ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr), + mfccp->mf6cc_parent); + } +#endif + + rt->mf6c_parent = mfccp->mf6cc_parent; + rt->mf6c_ifset = mfccp->mf6cc_ifset; + + MFC6_UNLOCK(); + return (0); + } + + /* + * Find the entry for which the upcall was made and update + */ + hash = MF6CHASH(mfccp->mf6cc_origin.sin6_addr, + mfccp->mf6cc_mcastgrp.sin6_addr); + for (rt = mf6ctable[hash], nstl = 0; rt; rt = rt->mf6c_next) { + if (IN6_ARE_ADDR_EQUAL(&rt->mf6c_origin.sin6_addr, + &mfccp->mf6cc_origin.sin6_addr) && + IN6_ARE_ADDR_EQUAL(&rt->mf6c_mcastgrp.sin6_addr, + &mfccp->mf6cc_mcastgrp.sin6_addr) && + (rt->mf6c_stall != NULL)) { + + if (nstl++) + log(LOG_ERR, + "add_m6fc: %s o %s g %s p %x dbx %p\n", + "multiple kernel entries", + ip6_sprintf(ip6bufo, + &mfccp->mf6cc_origin.sin6_addr), + ip6_sprintf(ip6bufg, + &mfccp->mf6cc_mcastgrp.sin6_addr), + mfccp->mf6cc_parent, rt->mf6c_stall); + +#ifdef MRT6DEBUG + if (V_mrt6debug & DEBUG_MFC) + log(LOG_DEBUG, + "add_m6fc o %s g %s p %x dbg %x\n", + ip6_sprintf(ip6bufo, + &mfccp->mf6cc_origin.sin6_addr), + ip6_sprintf(ip6bufg, + &mfccp->mf6cc_mcastgrp.sin6_addr), + mfccp->mf6cc_parent, rt->mf6c_stall); +#endif + + rt->mf6c_origin = mfccp->mf6cc_origin; + rt->mf6c_mcastgrp = mfccp->mf6cc_mcastgrp; + rt->mf6c_parent = mfccp->mf6cc_parent; + rt->mf6c_ifset = mfccp->mf6cc_ifset; + /* initialize pkt counters per src-grp */ + rt->mf6c_pkt_cnt = 0; + rt->mf6c_byte_cnt = 0; + rt->mf6c_wrong_if = 0; + + rt->mf6c_expire = 0; /* Don't clean this guy up */ + n6expire[hash]--; + + /* free packets Qed at the end of this entry */ + for (rte = rt->mf6c_stall; rte != NULL; ) { + struct rtdetq *n = rte->next; + ip6_mdq(rte->m, rte->ifp, rt); + m_freem(rte->m); +#ifdef UPCALL_TIMING + collate(&(rte->t)); +#endif /* UPCALL_TIMING */ + free(rte, M_MRTABLE6); + rte = n; + } + rt->mf6c_stall = NULL; + } + } + + /* + * It is possible that an entry is being inserted without an upcall + */ + if (nstl == 0) { +#ifdef MRT6DEBUG + if (V_mrt6debug & DEBUG_MFC) + log(LOG_DEBUG, + "add_mfc no upcall h %d o %s g %s p %x\n", + hash, + ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr), + ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr), + mfccp->mf6cc_parent); +#endif + + for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) { + + if (IN6_ARE_ADDR_EQUAL(&rt->mf6c_origin.sin6_addr, + &mfccp->mf6cc_origin.sin6_addr)&& + IN6_ARE_ADDR_EQUAL(&rt->mf6c_mcastgrp.sin6_addr, + &mfccp->mf6cc_mcastgrp.sin6_addr)) { + + rt->mf6c_origin = mfccp->mf6cc_origin; + rt->mf6c_mcastgrp = mfccp->mf6cc_mcastgrp; + rt->mf6c_parent = mfccp->mf6cc_parent; + rt->mf6c_ifset = mfccp->mf6cc_ifset; + /* initialize pkt counters per src-grp */ + rt->mf6c_pkt_cnt = 0; + rt->mf6c_byte_cnt = 0; + rt->mf6c_wrong_if = 0; + + if (rt->mf6c_expire) + n6expire[hash]--; + rt->mf6c_expire = 0; + } + } + if (rt == NULL) { + /* no upcall, so make a new entry */ + rt = (struct mf6c *)malloc(sizeof(*rt), M_MRTABLE6, + M_NOWAIT); + if (rt == NULL) { + MFC6_UNLOCK(); + return (ENOBUFS); + } + + /* insert new entry at head of hash chain */ + rt->mf6c_origin = mfccp->mf6cc_origin; + rt->mf6c_mcastgrp = mfccp->mf6cc_mcastgrp; + rt->mf6c_parent = mfccp->mf6cc_parent; + rt->mf6c_ifset = mfccp->mf6cc_ifset; + /* initialize pkt counters per src-grp */ + rt->mf6c_pkt_cnt = 0; + rt->mf6c_byte_cnt = 0; + rt->mf6c_wrong_if = 0; + rt->mf6c_expire = 0; + rt->mf6c_stall = NULL; + + /* link into table */ + rt->mf6c_next = mf6ctable[hash]; + mf6ctable[hash] = rt; + } + } + + MFC6_UNLOCK(); + return (0); +} + +#ifdef UPCALL_TIMING +/* + * collect delay statistics on the upcalls + */ +static void +collate(struct timeval *t) +{ + u_long d; + struct timeval tp; + u_long delta; + + GET_TIME(tp); + + if (TV_LT(*t, tp)) + { + TV_DELTA(tp, *t, delta); + + d = delta >> 10; + if (d > UPCALL_MAX) + d = UPCALL_MAX; + + ++upcall_data[d]; + } +} +#endif /* UPCALL_TIMING */ + +/* + * Delete an mfc entry + */ +static int +del_m6fc(struct mf6cctl *mfccp) +{ + struct sockaddr_in6 origin; + struct sockaddr_in6 mcastgrp; + struct mf6c *rt; + struct mf6c **nptr; + u_long hash; + + origin = mfccp->mf6cc_origin; + mcastgrp = mfccp->mf6cc_mcastgrp; + hash = MF6CHASH(origin.sin6_addr, mcastgrp.sin6_addr); + +#ifdef MRT6DEBUG + if (V_mrt6debug & DEBUG_MFC) { + char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN]; + log(LOG_DEBUG,"del_m6fc orig %s mcastgrp %s\n", + ip6_sprintf(ip6bufo, &origin.sin6_addr), + ip6_sprintf(ip6bufg, &mcastgrp.sin6_addr)); + } +#endif + + MFC6_LOCK(); + + nptr = &mf6ctable[hash]; + while ((rt = *nptr) != NULL) { + if (IN6_ARE_ADDR_EQUAL(&origin.sin6_addr, + &rt->mf6c_origin.sin6_addr) && + IN6_ARE_ADDR_EQUAL(&mcastgrp.sin6_addr, + &rt->mf6c_mcastgrp.sin6_addr) && + rt->mf6c_stall == NULL) + break; + + nptr = &rt->mf6c_next; + } + if (rt == NULL) { + MFC6_UNLOCK(); + return (EADDRNOTAVAIL); + } + + *nptr = rt->mf6c_next; + free(rt, M_MRTABLE6); + + MFC6_UNLOCK(); + + return (0); +} + +static int +socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in6 *src) +{ + + if (s) { + if (sbappendaddr(&s->so_rcv, + (struct sockaddr *)src, + mm, (struct mbuf *)0) != 0) { + sorwakeup(s); + return (0); + } + } + m_freem(mm); + return (-1); +} + +/* + * IPv6 multicast forwarding function. This function assumes that the packet + * pointed to by "ip6" has arrived on (or is about to be sent to) the interface + * pointed to by "ifp", and the packet is to be relayed to other networks + * that have members of the packet's destination IPv6 multicast group. + * + * The packet is returned unscathed to the caller, unless it is + * erroneous, in which case a non-zero return value tells the caller to + * discard it. + * + * NOTE: this implementation assumes that m->m_pkthdr.rcvif is NULL iff + * this function is called in the originating context (i.e., not when + * forwarding a packet from other node). ip6_output(), which is currently the + * only function that calls this function is called in the originating context, + * explicitly ensures this condition. It is caller's responsibility to ensure + * that if this function is called from somewhere else in the originating + * context in the future. + */ +int +X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m) +{ + struct mf6c *rt; + struct mif6 *mifp; + struct mbuf *mm; + mifi_t mifi; + char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; + +#ifdef MRT6DEBUG + if (V_mrt6debug & DEBUG_FORWARD) + log(LOG_DEBUG, "ip6_mforward: src %s, dst %s, ifindex %d\n", + ip6_sprintf(ip6bufs, &ip6->ip6_src), + ip6_sprintf(ip6bufd, &ip6->ip6_dst), + ifp->if_index); +#endif + + /* + * Don't forward a packet with Hop limit of zero or one, + * or a packet destined to a local-only group. + */ + if (ip6->ip6_hlim <= 1 || IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) || + IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst)) + return (0); + ip6->ip6_hlim--; + + /* + * Source address check: do not forward packets with unspecified + * source. It was discussed in July 2000, on ipngwg mailing list. + * This is rather more serious than unicast cases, because some + * MLD packets can be sent with the unspecified source address + * (although such packets must normally set 1 to the hop limit field). + */ + if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { + V_ip6stat.ip6s_cantforward++; + if (V_ip6_log_time + V_ip6_log_interval < time_second) { + V_ip6_log_time = time_second; + log(LOG_DEBUG, + "cannot forward " + "from %s to %s nxt %d received on %s\n", + ip6_sprintf(ip6bufs, &ip6->ip6_src), + ip6_sprintf(ip6bufd, &ip6->ip6_dst), + ip6->ip6_nxt, + if_name(m->m_pkthdr.rcvif)); + } + return (0); + } + + MFC6_LOCK(); + + /* + * Determine forwarding mifs from the forwarding cache table + */ + MF6CFIND(ip6->ip6_src, ip6->ip6_dst, rt); + + /* Entry exists, so forward if necessary */ + if (rt) { + MFC6_UNLOCK(); + return (ip6_mdq(m, ifp, rt)); + } else { + /* + * If we don't have a route for packet's origin, + * Make a copy of the packet & + * send message to routing daemon + */ + + struct mbuf *mb0; + struct rtdetq *rte; + u_long hash; +/* int i, npkts;*/ +#ifdef UPCALL_TIMING + struct timeval tp; + + GET_TIME(tp); +#endif /* UPCALL_TIMING */ + + mrt6stat.mrt6s_no_route++; +#ifdef MRT6DEBUG + if (V_mrt6debug & (DEBUG_FORWARD | DEBUG_MFC)) + log(LOG_DEBUG, "ip6_mforward: no rte s %s g %s\n", + ip6_sprintf(ip6bufs, &ip6->ip6_src), + ip6_sprintf(ip6bufd, &ip6->ip6_dst)); +#endif + + /* + * Allocate mbufs early so that we don't do extra work if we + * are just going to fail anyway. + */ + rte = (struct rtdetq *)malloc(sizeof(*rte), M_MRTABLE6, + M_NOWAIT); + if (rte == NULL) { + MFC6_UNLOCK(); + return (ENOBUFS); + } + mb0 = m_copy(m, 0, M_COPYALL); + /* + * Pullup packet header if needed before storing it, + * as other references may modify it in the meantime. + */ + if (mb0 && + (M_HASCL(mb0) || mb0->m_len < sizeof(struct ip6_hdr))) + mb0 = m_pullup(mb0, sizeof(struct ip6_hdr)); + if (mb0 == NULL) { + free(rte, M_MRTABLE6); + MFC6_UNLOCK(); + return (ENOBUFS); + } + + /* is there an upcall waiting for this packet? */ + hash = MF6CHASH(ip6->ip6_src, ip6->ip6_dst); + for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) { + if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, + &rt->mf6c_origin.sin6_addr) && + IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, + &rt->mf6c_mcastgrp.sin6_addr) && + (rt->mf6c_stall != NULL)) + break; + } + + if (rt == NULL) { + struct mrt6msg *im; +#ifdef MRT6_OINIT + struct omrt6msg *oim; +#endif + + /* no upcall, so make a new entry */ + rt = (struct mf6c *)malloc(sizeof(*rt), M_MRTABLE6, + M_NOWAIT); + if (rt == NULL) { + free(rte, M_MRTABLE6); + m_freem(mb0); + MFC6_UNLOCK(); + return (ENOBUFS); + } + /* + * Make a copy of the header to send to the user + * level process + */ + mm = m_copy(mb0, 0, sizeof(struct ip6_hdr)); + + if (mm == NULL) { + free(rte, M_MRTABLE6); + m_freem(mb0); + free(rt, M_MRTABLE6); + MFC6_UNLOCK(); + return (ENOBUFS); + } + + /* + * Send message to routing daemon + */ + sin6.sin6_addr = ip6->ip6_src; + + im = NULL; +#ifdef MRT6_OINIT + oim = NULL; +#endif + switch (V_ip6_mrouter_ver) { +#ifdef MRT6_OINIT + case MRT6_OINIT: + oim = mtod(mm, struct omrt6msg *); + oim->im6_msgtype = MRT6MSG_NOCACHE; + oim->im6_mbz = 0; + break; +#endif + case MRT6_INIT: + im = mtod(mm, struct mrt6msg *); + im->im6_msgtype = MRT6MSG_NOCACHE; + im->im6_mbz = 0; + break; + default: + free(rte, M_MRTABLE6); + m_freem(mb0); + free(rt, M_MRTABLE6); + MFC6_UNLOCK(); + return (EINVAL); + } + +#ifdef MRT6DEBUG + if (V_mrt6debug & DEBUG_FORWARD) + log(LOG_DEBUG, + "getting the iif info in the kernel\n"); +#endif + + for (mifp = mif6table, mifi = 0; + mifi < nummifs && mifp->m6_ifp != ifp; + mifp++, mifi++) + ; + + switch (V_ip6_mrouter_ver) { +#ifdef MRT6_OINIT + case MRT6_OINIT: + oim->im6_mif = mifi; + break; +#endif + case MRT6_INIT: + im->im6_mif = mifi; + break; + } + + if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) { + log(LOG_WARNING, "ip6_mforward: ip6_mrouter " + "socket queue full\n"); + mrt6stat.mrt6s_upq_sockfull++; + free(rte, M_MRTABLE6); + m_freem(mb0); + free(rt, M_MRTABLE6); + MFC6_UNLOCK(); + return (ENOBUFS); + } + + mrt6stat.mrt6s_upcalls++; + + /* insert new entry at head of hash chain */ + bzero(rt, sizeof(*rt)); + rt->mf6c_origin.sin6_family = AF_INET6; + rt->mf6c_origin.sin6_len = sizeof(struct sockaddr_in6); + rt->mf6c_origin.sin6_addr = ip6->ip6_src; + rt->mf6c_mcastgrp.sin6_family = AF_INET6; + rt->mf6c_mcastgrp.sin6_len = sizeof(struct sockaddr_in6); + rt->mf6c_mcastgrp.sin6_addr = ip6->ip6_dst; + rt->mf6c_expire = UPCALL_EXPIRE; + n6expire[hash]++; + rt->mf6c_parent = MF6C_INCOMPLETE_PARENT; + + /* link into table */ + rt->mf6c_next = mf6ctable[hash]; + mf6ctable[hash] = rt; + /* Add this entry to the end of the queue */ + rt->mf6c_stall = rte; + } else { + /* determine if q has overflowed */ + struct rtdetq **p; + int npkts = 0; + + for (p = &rt->mf6c_stall; *p != NULL; p = &(*p)->next) + if (++npkts > MAX_UPQ6) { + mrt6stat.mrt6s_upq_ovflw++; + free(rte, M_MRTABLE6); + m_freem(mb0); + MFC6_UNLOCK(); + return (0); + } + + /* Add this entry to the end of the queue */ + *p = rte; + } + + rte->next = NULL; + rte->m = mb0; + rte->ifp = ifp; +#ifdef UPCALL_TIMING + rte->t = tp; +#endif /* UPCALL_TIMING */ + + MFC6_UNLOCK(); + + return (0); + } +} + +/* + * Clean up cache entries if upcalls are not serviced + * Call from the Slow Timeout mechanism, every half second. + */ +static void +expire_upcalls(void *unused) +{ + struct rtdetq *rte; + struct mf6c *mfc, **nptr; + int i; + + MFC6_LOCK(); + for (i = 0; i < MF6CTBLSIZ; i++) { + if (n6expire[i] == 0) + continue; + nptr = &mf6ctable[i]; + while ((mfc = *nptr) != NULL) { + rte = mfc->mf6c_stall; + /* + * Skip real cache entries + * Make sure it wasn't marked to not expire (shouldn't happen) + * If it expires now + */ + if (rte != NULL && + mfc->mf6c_expire != 0 && + --mfc->mf6c_expire == 0) { +#ifdef MRT6DEBUG + if (V_mrt6debug & DEBUG_EXPIRE) { + char ip6bufo[INET6_ADDRSTRLEN]; + char ip6bufg[INET6_ADDRSTRLEN]; + log(LOG_DEBUG, "expire_upcalls: expiring (%s %s)\n", + ip6_sprintf(ip6bufo, &mfc->mf6c_origin.sin6_addr), + ip6_sprintf(ip6bufg, &mfc->mf6c_mcastgrp.sin6_addr)); + } +#endif + /* + * drop all the packets + * free the mbuf with the pkt, if, timing info + */ + do { + struct rtdetq *n = rte->next; + m_freem(rte->m); + free(rte, M_MRTABLE6); + rte = n; + } while (rte != NULL); + mrt6stat.mrt6s_cache_cleanups++; + n6expire[i]--; + + *nptr = mfc->mf6c_next; + free(mfc, M_MRTABLE6); + } else { + nptr = &mfc->mf6c_next; + } + } + } + MFC6_UNLOCK(); + callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, + expire_upcalls, NULL); +} + +/* + * Packet forwarding routine once entry in the cache is made + */ +static int +ip6_mdq(struct mbuf *m, struct ifnet *ifp, struct mf6c *rt) +{ + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + mifi_t mifi, iif; + struct mif6 *mifp; + int plen = m->m_pkthdr.len; + struct in6_addr src0, dst0; /* copies for local work */ + u_int32_t iszone, idzone, oszone, odzone; + int error = 0; + +/* + * Macro to send packet on mif. Since RSVP packets don't get counted on + * input, they shouldn't get counted on output, so statistics keeping is + * separate. + */ + +#define MC6_SEND(ip6, mifp, m) do { \ + if ((mifp)->m6_flags & MIFF_REGISTER) \ + register_send((ip6), (mifp), (m)); \ + else \ + phyint_send((ip6), (mifp), (m)); \ +} while (/*CONSTCOND*/ 0) + + /* + * Don't forward if it didn't arrive from the parent mif + * for its origin. + */ + mifi = rt->mf6c_parent; + if ((mifi >= nummifs) || (mif6table[mifi].m6_ifp != ifp)) { + /* came in the wrong interface */ +#ifdef MRT6DEBUG + if (V_mrt6debug & DEBUG_FORWARD) + log(LOG_DEBUG, + "wrong if: ifid %d mifi %d mififid %x\n", + ifp->if_index, mifi, + mif6table[mifi].m6_ifp->if_index); +#endif + mrt6stat.mrt6s_wrong_if++; + rt->mf6c_wrong_if++; + /* + * If we are doing PIM processing, and we are forwarding + * packets on this interface, send a message to the + * routing daemon. + */ + /* have to make sure this is a valid mif */ + if (mifi < nummifs && mif6table[mifi].m6_ifp) + if (V_pim6 && (m->m_flags & M_LOOP) == 0) { + /* + * Check the M_LOOP flag to avoid an + * unnecessary PIM assert. + * XXX: M_LOOP is an ad-hoc hack... + */ + static struct sockaddr_in6 sin6 = + { sizeof(sin6), AF_INET6 }; + + struct mbuf *mm; + struct mrt6msg *im; +#ifdef MRT6_OINIT + struct omrt6msg *oim; +#endif + + mm = m_copy(m, 0, sizeof(struct ip6_hdr)); + if (mm && + (M_HASCL(mm) || + mm->m_len < sizeof(struct ip6_hdr))) + mm = m_pullup(mm, sizeof(struct ip6_hdr)); + if (mm == NULL) + return (ENOBUFS); + +#ifdef MRT6_OINIT + oim = NULL; +#endif + im = NULL; + switch (V_ip6_mrouter_ver) { +#ifdef MRT6_OINIT + case MRT6_OINIT: + oim = mtod(mm, struct omrt6msg *); + oim->im6_msgtype = MRT6MSG_WRONGMIF; + oim->im6_mbz = 0; + break; +#endif + case MRT6_INIT: + im = mtod(mm, struct mrt6msg *); + im->im6_msgtype = MRT6MSG_WRONGMIF; + im->im6_mbz = 0; + break; + default: + m_freem(mm); + return (EINVAL); + } + + for (mifp = mif6table, iif = 0; + iif < nummifs && mifp && + mifp->m6_ifp != ifp; + mifp++, iif++) + ; + + switch (V_ip6_mrouter_ver) { +#ifdef MRT6_OINIT + case MRT6_OINIT: + oim->im6_mif = iif; + sin6.sin6_addr = oim->im6_src; + break; +#endif + case MRT6_INIT: + im->im6_mif = iif; + sin6.sin6_addr = im->im6_src; + break; + } + + mrt6stat.mrt6s_upcalls++; + + if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) { +#ifdef MRT6DEBUG + if (V_mrt6debug) + log(LOG_WARNING, "mdq, ip6_mrouter socket queue full\n"); +#endif + ++mrt6stat.mrt6s_upq_sockfull; + return (ENOBUFS); + } /* if socket Q full */ + } /* if PIM */ + return (0); + } /* if wrong iif */ + + /* If I sourced this packet, it counts as output, else it was input. */ + if (m->m_pkthdr.rcvif == NULL) { + /* XXX: is rcvif really NULL when output?? */ + mif6table[mifi].m6_pkt_out++; + mif6table[mifi].m6_bytes_out += plen; + } else { + mif6table[mifi].m6_pkt_in++; + mif6table[mifi].m6_bytes_in += plen; + } + rt->mf6c_pkt_cnt++; + rt->mf6c_byte_cnt += plen; + + /* + * For each mif, forward a copy of the packet if there are group + * members downstream on the interface. + */ + src0 = ip6->ip6_src; + dst0 = ip6->ip6_dst; + if ((error = in6_setscope(&src0, ifp, &iszone)) != 0 || + (error = in6_setscope(&dst0, ifp, &idzone)) != 0) { + V_ip6stat.ip6s_badscope++; + return (error); + } + for (mifp = mif6table, mifi = 0; mifi < nummifs; mifp++, mifi++) { + if (IF_ISSET(mifi, &rt->mf6c_ifset)) { + /* + * check if the outgoing packet is going to break + * a scope boundary. + * XXX For packets through PIM register tunnel + * interface, we believe a routing daemon. + */ + if (!(mif6table[rt->mf6c_parent].m6_flags & + MIFF_REGISTER) && + !(mif6table[mifi].m6_flags & MIFF_REGISTER)) { + if (in6_setscope(&src0, mif6table[mifi].m6_ifp, + &oszone) || + in6_setscope(&dst0, mif6table[mifi].m6_ifp, + &odzone) || + iszone != oszone || + idzone != odzone) { + V_ip6stat.ip6s_badscope++; + continue; + } + } + + mifp->m6_pkt_out++; + mifp->m6_bytes_out += plen; + MC6_SEND(ip6, mifp, m); + } + } + return (0); +} + +static void +phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m) +{ + struct mbuf *mb_copy; + struct ifnet *ifp = mifp->m6_ifp; + int error = 0; + struct sockaddr_in6 *dst6; + u_long linkmtu; + + dst6 = &mifp->m6_route.ro_dst; + + /* + * Make a new reference to the packet; make sure that + * the IPv6 header is actually copied, not just referenced, + * so that ip6_output() only scribbles on the copy. + */ + mb_copy = m_copy(m, 0, M_COPYALL); + if (mb_copy && + (M_HASCL(mb_copy) || mb_copy->m_len < sizeof(struct ip6_hdr))) + mb_copy = m_pullup(mb_copy, sizeof(struct ip6_hdr)); + if (mb_copy == NULL) { + return; + } + /* set MCAST flag to the outgoing packet */ + mb_copy->m_flags |= M_MCAST; + + /* + * If we sourced the packet, call ip6_output since we may devide + * the packet into fragments when the packet is too big for the + * outgoing interface. + * Otherwise, we can simply send the packet to the interface + * sending queue. + */ + if (m->m_pkthdr.rcvif == NULL) { + struct ip6_moptions im6o; + + im6o.im6o_multicast_ifp = ifp; + /* XXX: ip6_output will override ip6->ip6_hlim */ + im6o.im6o_multicast_hlim = ip6->ip6_hlim; + im6o.im6o_multicast_loop = 1; + error = ip6_output(mb_copy, NULL, &mifp->m6_route, + IPV6_FORWARDING, &im6o, NULL, NULL); + +#ifdef MRT6DEBUG + if (V_mrt6debug & DEBUG_XMIT) + log(LOG_DEBUG, "phyint_send on mif %d err %d\n", + mifp - mif6table, error); +#endif + return; + } + + /* + * If configured to loop back multicasts by default, + * loop back a copy now. + */ + if (in6_mcast_loop) { + dst6->sin6_len = sizeof(struct sockaddr_in6); + dst6->sin6_family = AF_INET6; + dst6->sin6_addr = ip6->ip6_dst; + ip6_mloopback(ifp, m, &mifp->m6_route.ro_dst); + } + + /* + * Put the packet into the sending queue of the outgoing interface + * if it would fit in the MTU of the interface. + */ + linkmtu = IN6_LINKMTU(ifp); + if (mb_copy->m_pkthdr.len <= linkmtu || linkmtu < IPV6_MMTU) { + dst6->sin6_len = sizeof(struct sockaddr_in6); + dst6->sin6_family = AF_INET6; + dst6->sin6_addr = ip6->ip6_dst; + /* + * We just call if_output instead of nd6_output here, since + * we need no ND for a multicast forwarded packet...right? + */ + error = (*ifp->if_output)(ifp, mb_copy, + (struct sockaddr *)&mifp->m6_route.ro_dst, NULL); +#ifdef MRT6DEBUG + if (V_mrt6debug & DEBUG_XMIT) + log(LOG_DEBUG, "phyint_send on mif %d err %d\n", + mifp - mif6table, error); +#endif + } else { + /* + * pMTU discovery is intentionally disabled by default, since + * various router may notify pMTU in multicast, which can be + * a DDoS to a router + */ + if (V_ip6_mcast_pmtu) + icmp6_error(mb_copy, ICMP6_PACKET_TOO_BIG, 0, linkmtu); + else { +#ifdef MRT6DEBUG + if (V_mrt6debug & DEBUG_XMIT) { + char ip6bufs[INET6_ADDRSTRLEN]; + char ip6bufd[INET6_ADDRSTRLEN]; + log(LOG_DEBUG, + "phyint_send: packet too big on %s o %s " + "g %s size %d(discarded)\n", + if_name(ifp), + ip6_sprintf(ip6bufs, &ip6->ip6_src), + ip6_sprintf(ip6bufd, &ip6->ip6_dst), + mb_copy->m_pkthdr.len); + } +#endif /* MRT6DEBUG */ + m_freem(mb_copy); /* simply discard the packet */ + } + } +} + +static int +register_send(struct ip6_hdr *ip6, struct mif6 *mif, struct mbuf *m) +{ + struct mbuf *mm; + int i, len = m->m_pkthdr.len; + static struct sockaddr_in6 sin6 = { sizeof(sin6), AF_INET6 }; + struct mrt6msg *im6; + +#ifdef MRT6DEBUG + if (V_mrt6debug) { + char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; + log(LOG_DEBUG, "** IPv6 register_send **\n src %s dst %s\n", + ip6_sprintf(ip6bufs, &ip6->ip6_src), + ip6_sprintf(ip6bufd, &ip6->ip6_dst)); + } +#endif + ++pim6stat.pim6s_snd_registers; + + /* Make a copy of the packet to send to the user level process */ + MGETHDR(mm, M_DONTWAIT, MT_HEADER); + if (mm == NULL) + return (ENOBUFS); + mm->m_pkthdr.rcvif = NULL; + mm->m_data += max_linkhdr; + mm->m_len = sizeof(struct ip6_hdr); + + if ((mm->m_next = m_copy(m, 0, M_COPYALL)) == NULL) { + m_freem(mm); + return (ENOBUFS); + } + i = MHLEN - M_LEADINGSPACE(mm); + if (i > len) + i = len; + mm = m_pullup(mm, i); + if (mm == NULL) + return (ENOBUFS); +/* TODO: check it! */ + mm->m_pkthdr.len = len + sizeof(struct ip6_hdr); + + /* + * Send message to routing daemon + */ + sin6.sin6_addr = ip6->ip6_src; + + im6 = mtod(mm, struct mrt6msg *); + im6->im6_msgtype = MRT6MSG_WHOLEPKT; + im6->im6_mbz = 0; + + im6->im6_mif = mif - mif6table; + + /* iif info is not given for reg. encap.n */ + mrt6stat.mrt6s_upcalls++; + + if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) { +#ifdef MRT6DEBUG + if (V_mrt6debug) + log(LOG_WARNING, + "register_send: ip6_mrouter socket queue full\n"); +#endif + ++mrt6stat.mrt6s_upq_sockfull; + return (ENOBUFS); + } + return (0); +} + +/* + * pim6_encapcheck() is called by the encap6_input() path at runtime to + * determine if a packet is for PIM; allowing PIM to be dynamically loaded + * into the kernel. + */ +static int +pim6_encapcheck(const struct mbuf *m, int off, int proto, void *arg) +{ + +#ifdef DIAGNOSTIC + KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM")); +#endif + if (proto != IPPROTO_PIM) + return 0; /* not for us; reject the datagram. */ + + return 64; /* claim the datagram. */ +} + +/* + * PIM sparse mode hook + * Receives the pim control messages, and passes them up to the listening + * socket, using rip6_input. + * The only message processed is the REGISTER pim message; the pim header + * is stripped off, and the inner packet is passed to register_mforward. + */ +int +pim6_input(struct mbuf **mp, int *offp, int proto) +{ + struct pim *pim; /* pointer to a pim struct */ + struct ip6_hdr *ip6; + int pimlen; + struct mbuf *m = *mp; + int minlen; + int off = *offp; + + ++pim6stat.pim6s_rcv_total; + + ip6 = mtod(m, struct ip6_hdr *); + pimlen = m->m_pkthdr.len - *offp; + + /* + * Validate lengths + */ + if (pimlen < PIM_MINLEN) { + ++pim6stat.pim6s_rcv_tooshort; +#ifdef MRT6DEBUG + if (V_mrt6debug & DEBUG_PIM) + log(LOG_DEBUG,"pim6_input: PIM packet too short\n"); +#endif + m_freem(m); + return (IPPROTO_DONE); + } + + /* + * if the packet is at least as big as a REGISTER, go ahead + * and grab the PIM REGISTER header size, to avoid another + * possible m_pullup() later. + * + * PIM_MINLEN == pimhdr + u_int32 == 8 + * PIM6_REG_MINLEN == pimhdr + reghdr + eip6hdr == 4 + 4 + 40 + */ + minlen = (pimlen >= PIM6_REG_MINLEN) ? PIM6_REG_MINLEN : PIM_MINLEN; + + /* + * Make sure that the IP6 and PIM headers in contiguous memory, and + * possibly the PIM REGISTER header + */ +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, off, minlen, IPPROTO_DONE); + /* adjust pointer */ + ip6 = mtod(m, struct ip6_hdr *); + + /* adjust mbuf to point to the PIM header */ + pim = (struct pim *)((caddr_t)ip6 + off); +#else + IP6_EXTHDR_GET(pim, struct pim *, m, off, minlen); + if (pim == NULL) { + pim6stat.pim6s_rcv_tooshort++; + return (IPPROTO_DONE); + } +#endif + +#define PIM6_CHECKSUM +#ifdef PIM6_CHECKSUM + { + int cksumlen; + + /* + * Validate checksum. + * If PIM REGISTER, exclude the data packet + */ + if (pim->pim_type == PIM_REGISTER) + cksumlen = PIM_MINLEN; + else + cksumlen = pimlen; + + if (in6_cksum(m, IPPROTO_PIM, off, cksumlen)) { + ++pim6stat.pim6s_rcv_badsum; +#ifdef MRT6DEBUG + if (V_mrt6debug & DEBUG_PIM) + log(LOG_DEBUG, + "pim6_input: invalid checksum\n"); +#endif + m_freem(m); + return (IPPROTO_DONE); + } + } +#endif /* PIM_CHECKSUM */ + + /* PIM version check */ + if (pim->pim_ver != PIM_VERSION) { + ++pim6stat.pim6s_rcv_badversion; +#ifdef MRT6DEBUG + log(LOG_ERR, + "pim6_input: incorrect version %d, expecting %d\n", + pim->pim_ver, PIM_VERSION); +#endif + m_freem(m); + return (IPPROTO_DONE); + } + + if (pim->pim_type == PIM_REGISTER) { + /* + * since this is a REGISTER, we'll make a copy of the register + * headers ip6+pim+u_int32_t+encap_ip6, to be passed up to the + * routing daemon. + */ + static struct sockaddr_in6 dst = { sizeof(dst), AF_INET6 }; + + struct mbuf *mcp; + struct ip6_hdr *eip6; + u_int32_t *reghdr; + int rc; +#ifdef MRT6DEBUG + char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; +#endif + + ++pim6stat.pim6s_rcv_registers; + + if ((reg_mif_num >= nummifs) || (reg_mif_num == (mifi_t) -1)) { +#ifdef MRT6DEBUG + if (V_mrt6debug & DEBUG_PIM) + log(LOG_DEBUG, + "pim6_input: register mif not set: %d\n", + reg_mif_num); +#endif + m_freem(m); + return (IPPROTO_DONE); + } + + reghdr = (u_int32_t *)(pim + 1); + + if ((ntohl(*reghdr) & PIM_NULL_REGISTER)) + goto pim6_input_to_daemon; + + /* + * Validate length + */ + if (pimlen < PIM6_REG_MINLEN) { + ++pim6stat.pim6s_rcv_tooshort; + ++pim6stat.pim6s_rcv_badregisters; +#ifdef MRT6DEBUG + log(LOG_ERR, + "pim6_input: register packet size too " + "small %d from %s\n", + pimlen, ip6_sprintf(ip6bufs, &ip6->ip6_src)); +#endif + m_freem(m); + return (IPPROTO_DONE); + } + + eip6 = (struct ip6_hdr *) (reghdr + 1); +#ifdef MRT6DEBUG + if (V_mrt6debug & DEBUG_PIM) + log(LOG_DEBUG, + "pim6_input[register], eip6: %s -> %s, " + "eip6 plen %d\n", + ip6_sprintf(ip6bufs, &eip6->ip6_src), + ip6_sprintf(ip6bufd, &eip6->ip6_dst), + ntohs(eip6->ip6_plen)); +#endif + + /* verify the version number of the inner packet */ + if ((eip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) { + ++pim6stat.pim6s_rcv_badregisters; +#ifdef MRT6DEBUG + log(LOG_DEBUG, "pim6_input: invalid IP version (%d) " + "of the inner packet\n", + (eip6->ip6_vfc & IPV6_VERSION)); +#endif + m_freem(m); + return (IPPROTO_NONE); + } + + /* verify the inner packet is destined to a mcast group */ + if (!IN6_IS_ADDR_MULTICAST(&eip6->ip6_dst)) { + ++pim6stat.pim6s_rcv_badregisters; +#ifdef MRT6DEBUG + if (V_mrt6debug & DEBUG_PIM) + log(LOG_DEBUG, + "pim6_input: inner packet of register " + "is not multicast %s\n", + ip6_sprintf(ip6bufd, &eip6->ip6_dst)); +#endif + m_freem(m); + return (IPPROTO_DONE); + } + + /* + * make a copy of the whole header to pass to the daemon later. + */ + mcp = m_copy(m, 0, off + PIM6_REG_MINLEN); + if (mcp == NULL) { +#ifdef MRT6DEBUG + log(LOG_ERR, + "pim6_input: pim register: " + "could not copy register head\n"); +#endif + m_freem(m); + return (IPPROTO_DONE); + } + + /* + * forward the inner ip6 packet; point m_data at the inner ip6. + */ + m_adj(m, off + PIM_MINLEN); +#ifdef MRT6DEBUG + if (V_mrt6debug & DEBUG_PIM) { + log(LOG_DEBUG, + "pim6_input: forwarding decapsulated register: " + "src %s, dst %s, mif %d\n", + ip6_sprintf(ip6bufs, &eip6->ip6_src), + ip6_sprintf(ip6bufd, &eip6->ip6_dst), + reg_mif_num); + } +#endif + + rc = if_simloop(mif6table[reg_mif_num].m6_ifp, m, + dst.sin6_family, 0); + + /* prepare the register head to send to the mrouting daemon */ + m = mcp; + } + + /* + * Pass the PIM message up to the daemon; if it is a register message + * pass the 'head' only up to the daemon. This includes the + * encapsulator ip6 header, pim header, register header and the + * encapsulated ip6 header. + */ + pim6_input_to_daemon: + rip6_input(&m, offp, proto); + return (IPPROTO_DONE); +} + +static int +ip6_mroute_modevent(module_t mod, int type, void *unused) +{ + + switch (type) { + case MOD_LOAD: + MROUTER6_LOCK_INIT(); + MFC6_LOCK_INIT(); + MIF6_LOCK_INIT(); + + pim6_encap_cookie = encap_attach_func(AF_INET6, IPPROTO_PIM, + pim6_encapcheck, + (const struct protosw *)&in6_pim_protosw, NULL); + if (pim6_encap_cookie == NULL) { + printf("ip6_mroute: unable to attach pim6 encap\n"); + MIF6_LOCK_DESTROY(); + MFC6_LOCK_DESTROY(); + MROUTER6_LOCK_DESTROY(); + return (EINVAL); + } + + ip6_mforward = X_ip6_mforward; + ip6_mrouter_done = X_ip6_mrouter_done; + ip6_mrouter_get = X_ip6_mrouter_get; + ip6_mrouter_set = X_ip6_mrouter_set; + mrt6_ioctl = X_mrt6_ioctl; + break; + + case MOD_UNLOAD: + if (V_ip6_mrouter != NULL) + return EINVAL; + + if (pim6_encap_cookie) { + encap_detach(pim6_encap_cookie); + pim6_encap_cookie = NULL; + } + X_ip6_mrouter_done(); + ip6_mforward = NULL; + ip6_mrouter_done = NULL; + ip6_mrouter_get = NULL; + ip6_mrouter_set = NULL; + mrt6_ioctl = NULL; + + MIF6_LOCK_DESTROY(); + MFC6_LOCK_DESTROY(); + MROUTER6_LOCK_DESTROY(); + break; + + default: + return (EOPNOTSUPP); + } + + return (0); +} + +static moduledata_t ip6_mroutemod = { + "ip6_mroute", + ip6_mroute_modevent, + 0 +}; + +DECLARE_MODULE(ip6_mroute, ip6_mroutemod, SI_SUB_PSEUDO, SI_ORDER_ANY); diff --git a/freebsd/sys/netinet6/ip6_mroute.h b/freebsd/sys/netinet6/ip6_mroute.h new file mode 100644 index 00000000..198659fa --- /dev/null +++ b/freebsd/sys/netinet6/ip6_mroute.h @@ -0,0 +1,271 @@ +/*- + * Copyright (C) 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: ip6_mroute.h,v 1.19 2001/06/14 06:12:55 suz Exp $ + * $FreeBSD$ + */ + +/* BSDI ip_mroute.h,v 2.5 1996/10/11 16:01:48 pjd Exp */ + +/* + * Definitions for IP multicast forwarding. + * + * Written by David Waitzman, BBN Labs, August 1988. + * Modified by Steve Deering, Stanford, February 1989. + * Modified by Ajit Thyagarajan, PARC, August 1993. + * Modified by Ajit Thyagarajan, PARC, August 1994. + * Modified by Ahmed Helmy, USC, September 1996. + * + * MROUTING Revision: 1.2 + */ + +#ifndef _NETINET6_IP6_MROUTE_HH_ +#define _NETINET6_IP6_MROUTE_HH_ + +/* + * Multicast Routing set/getsockopt commands. + */ +#ifdef _KERNEL +#define MRT6_OINIT 100 /* initialize forwarder (omrt6msg) */ +#endif +#define MRT6_DONE 101 /* shut down forwarder */ +#define MRT6_ADD_MIF 102 /* add multicast interface */ +#define MRT6_DEL_MIF 103 /* delete multicast interface */ +#define MRT6_ADD_MFC 104 /* insert forwarding cache entry */ +#define MRT6_DEL_MFC 105 /* delete forwarding cache entry */ +#define MRT6_PIM 107 /* enable pim code */ +#define MRT6_INIT 108 /* initialize forwarder (mrt6msg) */ + +#if BSD >= 199103 +#define GET_TIME(t) microtime(&t) +#elif defined(sun) +#define GET_TIME(t) uniqtime(&t) +#else +#define GET_TIME(t) ((t) = time) +#endif + +/* + * Types and macros for handling bitmaps with one bit per multicast interface. + */ +typedef u_short mifi_t; /* type of a mif index */ +#define MAXMIFS 64 + +#ifndef IF_SETSIZE +#define IF_SETSIZE 256 +#endif + +typedef u_int32_t if_mask; +#define NIFBITS (sizeof(if_mask) * NBBY) /* bits per mask */ + +#ifndef howmany +#define howmany(x, y) (((x) + ((y) - 1)) / (y)) +#endif + +typedef struct if_set { + if_mask ifs_bits[howmany(IF_SETSIZE, NIFBITS)]; +} if_set; + +#define IF_SET(n, p) ((p)->ifs_bits[(n)/NIFBITS] |= (1 << ((n) % NIFBITS))) +#define IF_CLR(n, p) ((p)->ifs_bits[(n)/NIFBITS] &= ~(1 << ((n) % NIFBITS))) +#define IF_ISSET(n, p) ((p)->ifs_bits[(n)/NIFBITS] & (1 << ((n) % NIFBITS))) +#define IF_COPY(f, t) bcopy(f, t, sizeof(*(f))) +#define IF_ZERO(p) bzero(p, sizeof(*(p))) + +/* + * Argument structure for MRT6_ADD_IF. + */ +struct mif6ctl { + mifi_t mif6c_mifi; /* the index of the mif to be added */ + u_char mif6c_flags; /* MIFF_ flags defined below */ + u_short mif6c_pifi; /* the index of the physical IF */ +}; + +#define MIFF_REGISTER 0x1 /* mif represents a register end-point */ + +/* + * Argument structure for MRT6_ADD_MFC and MRT6_DEL_MFC + */ +struct mf6cctl { + struct sockaddr_in6 mf6cc_origin; /* IPv6 origin of mcasts */ + struct sockaddr_in6 mf6cc_mcastgrp; /* multicast group associated */ + mifi_t mf6cc_parent; /* incoming ifindex */ + struct if_set mf6cc_ifset; /* set of forwarding ifs */ +}; + +/* + * The kernel's multicast routing statistics. + */ +struct mrt6stat { + u_quad_t mrt6s_mfc_lookups; /* # forw. cache hash table hits */ + u_quad_t mrt6s_mfc_misses; /* # forw. cache hash table misses */ + u_quad_t mrt6s_upcalls; /* # calls to multicast routing daemon */ + u_quad_t mrt6s_no_route; /* no route for packet's origin */ + u_quad_t mrt6s_bad_tunnel; /* malformed tunnel options */ + u_quad_t mrt6s_cant_tunnel; /* no room for tunnel options */ + u_quad_t mrt6s_wrong_if; /* arrived on wrong interface */ + u_quad_t mrt6s_upq_ovflw; /* upcall Q overflow */ + u_quad_t mrt6s_cache_cleanups; /* # entries with no upcalls */ + u_quad_t mrt6s_drop_sel; /* pkts dropped selectively */ + u_quad_t mrt6s_q_overflow; /* pkts dropped - Q overflow */ + u_quad_t mrt6s_pkt2large; /* pkts dropped - size > BKT SIZE */ + u_quad_t mrt6s_upq_sockfull; /* upcalls dropped - socket full */ +}; + +#ifdef MRT6_OINIT +/* + * Struct used to communicate from kernel to multicast router + * note the convenient similarity to an IPv6 header. + * XXX old version, superseded by mrt6msg. + */ +struct omrt6msg { + u_long unused1; + u_char im6_msgtype; /* what type of message */ +#if 0 +#define MRT6MSG_NOCACHE 1 +#define MRT6MSG_WRONGMIF 2 +#define MRT6MSG_WHOLEPKT 3 /* used for user level encap*/ +#endif + u_char im6_mbz; /* must be zero */ + u_char im6_mif; /* mif rec'd on */ + u_char unused2; + struct in6_addr im6_src, im6_dst; +}; +#endif + +/* + * Structure used to communicate from kernel to multicast router. + * We'll overlay the structure onto an MLD header (not an IPv6 header + * like igmpmsg{} used for IPv4 implementation). This is because this + * structure will be passed via an IPv6 raw socket, on which an application + * will only receive the payload i.e. the data after the IPv6 header and all + * the extension headers. (see Section 3 of RFC3542) + */ +struct mrt6msg { +#define MRT6MSG_NOCACHE 1 +#define MRT6MSG_WRONGMIF 2 +#define MRT6MSG_WHOLEPKT 3 /* used for user level encap*/ + u_char im6_mbz; /* must be zero */ + u_char im6_msgtype; /* what type of message */ + u_int16_t im6_mif; /* mif rec'd on */ + u_int32_t im6_pad; /* padding for 64bit arch */ + struct in6_addr im6_src, im6_dst; +}; + +/* + * Argument structure used by multicast routing daemon to get src-grp + * packet counts + */ +struct sioc_sg_req6 { + struct sockaddr_in6 src; + struct sockaddr_in6 grp; + u_quad_t pktcnt; + u_quad_t bytecnt; + u_quad_t wrong_if; +}; + +/* + * Argument structure used by mrouted to get mif pkt counts + */ +struct sioc_mif_req6 { + mifi_t mifi; /* mif number */ + u_quad_t icount; /* Input packet count on mif */ + u_quad_t ocount; /* Output packet count on mif */ + u_quad_t ibytes; /* Input byte count on mif */ + u_quad_t obytes; /* Output byte count on mif */ +}; + +#if defined(_KERNEL) || defined(KERNEL) +/* + * The kernel's multicast-interface structure. + */ +struct mif6 { + u_char m6_flags; /* MIFF_ flags defined above */ + u_int m6_rate_limit; /* max rate */ + struct in6_addr m6_lcl_addr; /* local interface address */ + struct ifnet *m6_ifp; /* pointer to interface */ + u_quad_t m6_pkt_in; /* # pkts in on interface */ + u_quad_t m6_pkt_out; /* # pkts out on interface */ + u_quad_t m6_bytes_in; /* # bytes in on interface */ + u_quad_t m6_bytes_out; /* # bytes out on interface */ + struct route_in6 m6_route; /* cached route */ +#ifdef notyet + u_int m6_rsvp_on; /* RSVP listening on this vif */ + struct socket *m6_rsvpd; /* RSVP daemon socket */ +#endif +}; + +/* + * The kernel's multicast forwarding cache entry structure + */ +struct mf6c { + struct sockaddr_in6 mf6c_origin; /* IPv6 origin of mcasts */ + struct sockaddr_in6 mf6c_mcastgrp; /* multicast group associated*/ + mifi_t mf6c_parent; /* incoming IF */ + struct if_set mf6c_ifset; /* set of outgoing IFs */ + + u_quad_t mf6c_pkt_cnt; /* pkt count for src-grp */ + u_quad_t mf6c_byte_cnt; /* byte count for src-grp */ + u_quad_t mf6c_wrong_if; /* wrong if for src-grp */ + int mf6c_expire; /* time to clean entry up */ + struct timeval mf6c_last_assert; /* last time I sent an assert*/ + struct rtdetq *mf6c_stall; /* pkts waiting for route */ + struct mf6c *mf6c_next; /* hash table linkage */ +}; + +#define MF6C_INCOMPLETE_PARENT ((mifi_t)-1) + +/* + * Argument structure used for pkt info. while upcall is made + */ +#ifndef _NETINET_IP_MROUTE_HH_ +struct rtdetq { /* XXX: rtdetq is also defined in ip_mroute.h */ + struct mbuf *m; /* A copy of the packet */ + struct ifnet *ifp; /* Interface pkt came in on */ +#ifdef UPCALL_TIMING + struct timeval t; /* Timestamp */ +#endif /* UPCALL_TIMING */ + struct rtdetq *next; +}; +#endif /* _NETINET_IP_MROUTE_HH_ */ + +#define MF6CTBLSIZ 256 +#if (MF6CTBLSIZ & (MF6CTBLSIZ - 1)) == 0 /* from sys:route.h */ +#define MF6CHASHMOD(h) ((h) & (MF6CTBLSIZ - 1)) +#else +#define MF6CHASHMOD(h) ((h) % MF6CTBLSIZ) +#endif + +#define MAX_UPQ6 4 /* max. no of pkts in upcall Q */ + +extern int (*ip6_mrouter_set)(struct socket *so, struct sockopt *sopt); +extern int (*ip6_mrouter_get)(struct socket *so, struct sockopt *sopt); +extern int (*ip6_mrouter_done)(void); +extern int (*mrt6_ioctl)(u_long, caddr_t); +#endif /* _KERNEL */ + +#endif /* !_NETINET6_IP6_MROUTE_HH_ */ diff --git a/freebsd/sys/netinet6/ip6_output.c b/freebsd/sys/netinet6/ip6_output.c new file mode 100644 index 00000000..0d762cc0 --- /dev/null +++ b/freebsd/sys/netinet6/ip6_output.c @@ -0,0 +1,2928 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $ + */ + +/*- + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/local/opt_inet.h> +#include <freebsd/local/opt_inet6.h> +#include <freebsd/local/opt_ipsec.h> +#include <freebsd/local/opt_sctp.h> + +#include <freebsd/sys/param.h> +#include <freebsd/sys/kernel.h> +#include <freebsd/sys/malloc.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/errno.h> +#include <freebsd/sys/priv.h> +#include <freebsd/sys/proc.h> +#include <freebsd/sys/protosw.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/socketvar.h> +#include <freebsd/sys/syslog.h> +#include <freebsd/sys/ucred.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/netisr.h> +#include <freebsd/net/route.h> +#include <freebsd/net/pfil.h> +#include <freebsd/net/vnet.h> + +#include <freebsd/netinet/in.h> +#include <freebsd/netinet/in_var.h> +#include <freebsd/netinet6/in6_var.h> +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet/icmp6.h> +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet/in_pcb.h> +#include <freebsd/netinet/tcp_var.h> +#include <freebsd/netinet6/nd6.h> + +#ifdef IPSEC +#include <freebsd/netipsec/ipsec.h> +#include <freebsd/netipsec/ipsec6.h> +#include <freebsd/netipsec/key.h> +#include <freebsd/netinet6/ip6_ipsec.h> +#endif /* IPSEC */ +#ifdef SCTP +#include <freebsd/netinet/sctp.h> +#include <freebsd/netinet/sctp_crc32.h> +#endif + +#include <freebsd/netinet6/ip6protosw.h> +#include <freebsd/netinet6/scope6_var.h> + +extern int in6_mcast_loop; + +struct ip6_exthdrs { + struct mbuf *ip6e_ip6; + struct mbuf *ip6e_hbh; + struct mbuf *ip6e_dest1; + struct mbuf *ip6e_rthdr; + struct mbuf *ip6e_dest2; +}; + +static int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **, + struct ucred *, int)); +static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *, + struct socket *, struct sockopt *)); +static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *); +static int ip6_setpktopt __P((int, u_char *, int, struct ip6_pktopts *, + struct ucred *, int, int, int)); + +static int ip6_copyexthdr(struct mbuf **, caddr_t, int); +static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int, + struct ip6_frag **)); +static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t); +static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *); +static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *, + struct ifnet *, struct in6_addr *, u_long *, int *)); +static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int); + + +/* + * Make an extension header from option data. hp is the source, and + * mp is the destination. + */ +#define MAKE_EXTHDR(hp, mp) \ + do { \ + if (hp) { \ + struct ip6_ext *eh = (struct ip6_ext *)(hp); \ + error = ip6_copyexthdr((mp), (caddr_t)(hp), \ + ((eh)->ip6e_len + 1) << 3); \ + if (error) \ + goto freehdrs; \ + } \ + } while (/*CONSTCOND*/ 0) + +/* + * Form a chain of extension headers. + * m is the extension header mbuf + * mp is the previous mbuf in the chain + * p is the next header + * i is the type of option. + */ +#define MAKE_CHAIN(m, mp, p, i)\ + do {\ + if (m) {\ + if (!hdrsplit) \ + panic("assumption failed: hdr not split"); \ + *mtod((m), u_char *) = *(p);\ + *(p) = (i);\ + p = mtod((m), u_char *);\ + (m)->m_next = (mp)->m_next;\ + (mp)->m_next = (m);\ + (mp) = (m);\ + }\ + } while (/*CONSTCOND*/ 0) + +/* + * IP6 output. The packet in mbuf chain m contains a skeletal IP6 + * header (with pri, len, nxt, hlim, src, dst). + * This function may modify ver and hlim only. + * The mbuf chain containing the packet will be freed. + * The mbuf opt, if present, will not be freed. + * + * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and + * nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one, + * which is rt_rmx.rmx_mtu. + * + * ifpp - XXX: just for statistics + */ +int +ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, + struct route_in6 *ro, int flags, struct ip6_moptions *im6o, + struct ifnet **ifpp, struct inpcb *inp) +{ + struct ip6_hdr *ip6, *mhip6; + struct ifnet *ifp, *origifp; + struct mbuf *m = m0; + struct mbuf *mprev = NULL; + int hlen, tlen, len, off; + struct route_in6 ip6route; + struct rtentry *rt = NULL; + struct sockaddr_in6 *dst, src_sa, dst_sa; + struct in6_addr odst; + int error = 0; + struct in6_ifaddr *ia = NULL; + u_long mtu; + int alwaysfrag, dontfrag; + u_int32_t optlen = 0, plen = 0, unfragpartlen = 0; + struct ip6_exthdrs exthdrs; + struct in6_addr finaldst, src0, dst0; + u_int32_t zone; + struct route_in6 *ro_pmtu = NULL; + int hdrsplit = 0; + int needipsec = 0; +#ifdef SCTP + int sw_csum; +#endif +#ifdef IPSEC + struct ipsec_output_state state; + struct ip6_rthdr *rh = NULL; + int needipsectun = 0; + int segleft_org = 0; + struct secpolicy *sp = NULL; +#endif /* IPSEC */ + + ip6 = mtod(m, struct ip6_hdr *); + if (ip6 == NULL) { + printf ("ip6 is NULL"); + goto bad; + } + + finaldst = ip6->ip6_dst; + + bzero(&exthdrs, sizeof(exthdrs)); + + if (opt) { + /* Hop-by-Hop options header */ + MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh); + /* Destination options header(1st part) */ + if (opt->ip6po_rthdr) { + /* + * Destination options header(1st part) + * This only makes sense with a routing header. + * See Section 9.2 of RFC 3542. + * Disabling this part just for MIP6 convenience is + * a bad idea. We need to think carefully about a + * way to make the advanced API coexist with MIP6 + * options, which might automatically be inserted in + * the kernel. + */ + MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1); + } + /* Routing header */ + MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr); + /* Destination options header(2nd part) */ + MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2); + } + + /* + * IPSec checking which handles several cases. + * FAST IPSEC: We re-injected the packet. + */ +#ifdef IPSEC + switch(ip6_ipsec_output(&m, inp, &flags, &error, &ifp, &sp)) + { + case 1: /* Bad packet */ + goto freehdrs; + case -1: /* Do IPSec */ + needipsec = 1; + case 0: /* No IPSec */ + default: + break; + } +#endif /* IPSEC */ + + /* + * Calculate the total length of the extension header chain. + * Keep the length of the unfragmentable part for fragmentation. + */ + optlen = 0; + if (exthdrs.ip6e_hbh) + optlen += exthdrs.ip6e_hbh->m_len; + if (exthdrs.ip6e_dest1) + optlen += exthdrs.ip6e_dest1->m_len; + if (exthdrs.ip6e_rthdr) + optlen += exthdrs.ip6e_rthdr->m_len; + unfragpartlen = optlen + sizeof(struct ip6_hdr); + + /* NOTE: we don't add AH/ESP length here. do that later. */ + if (exthdrs.ip6e_dest2) + optlen += exthdrs.ip6e_dest2->m_len; + + /* + * If we need IPsec, or there is at least one extension header, + * separate IP6 header from the payload. + */ + if ((needipsec || optlen) && !hdrsplit) { + if ((error = ip6_splithdr(m, &exthdrs)) != 0) { + m = NULL; + goto freehdrs; + } + m = exthdrs.ip6e_ip6; + hdrsplit++; + } + + /* adjust pointer */ + ip6 = mtod(m, struct ip6_hdr *); + + /* adjust mbuf packet header length */ + m->m_pkthdr.len += optlen; + plen = m->m_pkthdr.len - sizeof(*ip6); + + /* If this is a jumbo payload, insert a jumbo payload option. */ + if (plen > IPV6_MAXPACKET) { + if (!hdrsplit) { + if ((error = ip6_splithdr(m, &exthdrs)) != 0) { + m = NULL; + goto freehdrs; + } + m = exthdrs.ip6e_ip6; + hdrsplit++; + } + /* adjust pointer */ + ip6 = mtod(m, struct ip6_hdr *); + if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0) + goto freehdrs; + ip6->ip6_plen = 0; + } else + ip6->ip6_plen = htons(plen); + + /* + * Concatenate headers and fill in next header fields. + * Here we have, on "m" + * IPv6 payload + * and we insert headers accordingly. Finally, we should be getting: + * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload] + * + * during the header composing process, "m" points to IPv6 header. + * "mprev" points to an extension header prior to esp. + */ + u_char *nexthdrp = &ip6->ip6_nxt; + mprev = m; + + /* + * we treat dest2 specially. this makes IPsec processing + * much easier. the goal here is to make mprev point the + * mbuf prior to dest2. + * + * result: IPv6 dest2 payload + * m and mprev will point to IPv6 header. + */ + if (exthdrs.ip6e_dest2) { + if (!hdrsplit) + panic("assumption failed: hdr not split"); + exthdrs.ip6e_dest2->m_next = m->m_next; + m->m_next = exthdrs.ip6e_dest2; + *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt; + ip6->ip6_nxt = IPPROTO_DSTOPTS; + } + + /* + * result: IPv6 hbh dest1 rthdr dest2 payload + * m will point to IPv6 header. mprev will point to the + * extension header prior to dest2 (rthdr in the above case). + */ + MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS); + MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, + IPPROTO_DSTOPTS); + MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, + IPPROTO_ROUTING); + +#ifdef IPSEC + if (!needipsec) + goto skip_ipsec2; + + /* + * pointers after IPsec headers are not valid any more. + * other pointers need a great care too. + * (IPsec routines should not mangle mbufs prior to AH/ESP) + */ + exthdrs.ip6e_dest2 = NULL; + + if (exthdrs.ip6e_rthdr) { + rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *); + segleft_org = rh->ip6r_segleft; + rh->ip6r_segleft = 0; + } + + bzero(&state, sizeof(state)); + state.m = m; + error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags, + &needipsectun); + m = state.m; + if (error == EJUSTRETURN) { + /* + * We had a SP with a level of 'use' and no SA. We + * will just continue to process the packet without + * IPsec processing. + */ + ; + } else if (error) { + /* mbuf is already reclaimed in ipsec6_output_trans. */ + m = NULL; + switch (error) { + case EHOSTUNREACH: + case ENETUNREACH: + case EMSGSIZE: + case ENOBUFS: + case ENOMEM: + break; + default: + printf("[%s:%d] (ipsec): error code %d\n", + __func__, __LINE__, error); + /* FALLTHROUGH */ + case ENOENT: + /* don't show these error codes to the user */ + error = 0; + break; + } + goto bad; + } else if (!needipsectun) { + /* + * In the FAST IPSec case we have already + * re-injected the packet and it has been freed + * by the ipsec_done() function. So, just clean + * up after ourselves. + */ + m = NULL; + goto done; + } + if (exthdrs.ip6e_rthdr) { + /* ah6_output doesn't modify mbuf chain */ + rh->ip6r_segleft = segleft_org; + } +skip_ipsec2:; +#endif /* IPSEC */ + + /* + * If there is a routing header, discard the packet. + */ + if (exthdrs.ip6e_rthdr) { + error = EINVAL; + goto bad; + } + + /* Source address validation */ + if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && + (flags & IPV6_UNSPECSRC) == 0) { + error = EOPNOTSUPP; + V_ip6stat.ip6s_badscope++; + goto bad; + } + if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { + error = EOPNOTSUPP; + V_ip6stat.ip6s_badscope++; + goto bad; + } + + V_ip6stat.ip6s_localout++; + + /* + * Route packet. + */ + if (ro == 0) { + ro = &ip6route; + bzero((caddr_t)ro, sizeof(*ro)); + } + ro_pmtu = ro; + if (opt && opt->ip6po_rthdr) + ro = &opt->ip6po_route; + dst = (struct sockaddr_in6 *)&ro->ro_dst; + +again: + /* + * if specified, try to fill in the traffic class field. + * do not override if a non-zero value is already set. + * we check the diffserv field and the ecn field separately. + */ + if (opt && opt->ip6po_tclass >= 0) { + int mask = 0; + + if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0) + mask |= 0xfc; + if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0) + mask |= 0x03; + if (mask != 0) + ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20); + } + + /* fill in or override the hop limit field, if necessary. */ + if (opt && opt->ip6po_hlim != -1) + ip6->ip6_hlim = opt->ip6po_hlim & 0xff; + else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + if (im6o != NULL) + ip6->ip6_hlim = im6o->im6o_multicast_hlim; + else + ip6->ip6_hlim = V_ip6_defmcasthlim; + } + +#ifdef IPSEC + /* + * We may re-inject packets into the stack here. + */ + if (needipsec && needipsectun) { + struct ipsec_output_state state; + + /* + * All the extension headers will become inaccessible + * (since they can be encrypted). + * Don't panic, we need no more updates to extension headers + * on inner IPv6 packet (since they are now encapsulated). + * + * IPv6 [ESP|AH] IPv6 [extension headers] payload + */ + bzero(&exthdrs, sizeof(exthdrs)); + exthdrs.ip6e_ip6 = m; + + bzero(&state, sizeof(state)); + state.m = m; + state.ro = (struct route *)ro; + state.dst = (struct sockaddr *)dst; + + error = ipsec6_output_tunnel(&state, sp, flags); + + m = state.m; + ro = (struct route_in6 *)state.ro; + dst = (struct sockaddr_in6 *)state.dst; + if (error == EJUSTRETURN) { + /* + * We had a SP with a level of 'use' and no SA. We + * will just continue to process the packet without + * IPsec processing. + */ + ; + } else if (error) { + /* mbuf is already reclaimed in ipsec6_output_tunnel. */ + m0 = m = NULL; + m = NULL; + switch (error) { + case EHOSTUNREACH: + case ENETUNREACH: + case EMSGSIZE: + case ENOBUFS: + case ENOMEM: + break; + default: + printf("[%s:%d] (ipsec): error code %d\n", + __func__, __LINE__, error); + /* FALLTHROUGH */ + case ENOENT: + /* don't show these error codes to the user */ + error = 0; + break; + } + goto bad; + } else { + /* + * In the FAST IPSec case we have already + * re-injected the packet and it has been freed + * by the ipsec_done() function. So, just clean + * up after ourselves. + */ + m = NULL; + goto done; + } + + exthdrs.ip6e_ip6 = m; + } +#endif /* IPSEC */ + + /* adjust pointer */ + ip6 = mtod(m, struct ip6_hdr *); + + bzero(&dst_sa, sizeof(dst_sa)); + dst_sa.sin6_family = AF_INET6; + dst_sa.sin6_len = sizeof(dst_sa); + dst_sa.sin6_addr = ip6->ip6_dst; + if ((error = in6_selectroute(&dst_sa, opt, im6o, ro, + &ifp, &rt)) != 0) { + switch (error) { + case EHOSTUNREACH: + V_ip6stat.ip6s_noroute++; + break; + case EADDRNOTAVAIL: + default: + break; /* XXX statistics? */ + } + if (ifp != NULL) + in6_ifstat_inc(ifp, ifs6_out_discard); + goto bad; + } + if (rt == NULL) { + /* + * If in6_selectroute() does not return a route entry, + * dst may not have been updated. + */ + *dst = dst_sa; /* XXX */ + } + + /* + * then rt (for unicast) and ifp must be non-NULL valid values. + */ + if ((flags & IPV6_FORWARDING) == 0) { + /* XXX: the FORWARDING flag can be set for mrouting. */ + in6_ifstat_inc(ifp, ifs6_out_request); + } + if (rt != NULL) { + ia = (struct in6_ifaddr *)(rt->rt_ifa); + rt->rt_use++; + } + + + /* + * The outgoing interface must be in the zone of source and + * destination addresses. + */ + origifp = ifp; + + src0 = ip6->ip6_src; + if (in6_setscope(&src0, origifp, &zone)) + goto badscope; + bzero(&src_sa, sizeof(src_sa)); + src_sa.sin6_family = AF_INET6; + src_sa.sin6_len = sizeof(src_sa); + src_sa.sin6_addr = ip6->ip6_src; + if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id) + goto badscope; + + dst0 = ip6->ip6_dst; + if (in6_setscope(&dst0, origifp, &zone)) + goto badscope; + /* re-initialize to be sure */ + bzero(&dst_sa, sizeof(dst_sa)); + dst_sa.sin6_family = AF_INET6; + dst_sa.sin6_len = sizeof(dst_sa); + dst_sa.sin6_addr = ip6->ip6_dst; + if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) { + goto badscope; + } + + /* We should use ia_ifp to support the case of + * sending packets to an address of our own. + */ + if (ia != NULL && ia->ia_ifp) + ifp = ia->ia_ifp; + + /* scope check is done. */ + goto routefound; + + badscope: + V_ip6stat.ip6s_badscope++; + in6_ifstat_inc(origifp, ifs6_out_discard); + if (error == 0) + error = EHOSTUNREACH; /* XXX */ + goto bad; + + routefound: + if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + if (opt && opt->ip6po_nextroute.ro_rt) { + /* + * The nexthop is explicitly specified by the + * application. We assume the next hop is an IPv6 + * address. + */ + dst = (struct sockaddr_in6 *)opt->ip6po_nexthop; + } + else if ((rt->rt_flags & RTF_GATEWAY)) + dst = (struct sockaddr_in6 *)rt->rt_gateway; + } + + if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */ + } else { + m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST; + in6_ifstat_inc(ifp, ifs6_out_mcast); + /* + * Confirm that the outgoing interface supports multicast. + */ + if (!(ifp->if_flags & IFF_MULTICAST)) { + V_ip6stat.ip6s_noroute++; + in6_ifstat_inc(ifp, ifs6_out_discard); + error = ENETUNREACH; + goto bad; + } + if ((im6o == NULL && in6_mcast_loop) || + (im6o && im6o->im6o_multicast_loop)) { + /* + * Loop back multicast datagram if not expressly + * forbidden to do so, even if we have not joined + * the address; protocols will filter it later, + * thus deferring a hash lookup and lock acquisition + * at the expense of an m_copym(). + */ + ip6_mloopback(ifp, m, dst); + } else { + /* + * If we are acting as a multicast router, perform + * multicast forwarding as if the packet had just + * arrived on the interface to which we are about + * to send. The multicast forwarding function + * recursively calls this function, using the + * IPV6_FORWARDING flag to prevent infinite recursion. + * + * Multicasts that are looped back by ip6_mloopback(), + * above, will be forwarded by the ip6_input() routine, + * if necessary. + */ + if (V_ip6_mrouter && (flags & IPV6_FORWARDING) == 0) { + /* + * XXX: ip6_mforward expects that rcvif is NULL + * when it is called from the originating path. + * However, it is not always the case, since + * some versions of MGETHDR() does not + * initialize the field. + */ + m->m_pkthdr.rcvif = NULL; + if (ip6_mforward(ip6, ifp, m) != 0) { + m_freem(m); + goto done; + } + } + } + /* + * Multicasts with a hoplimit of zero may be looped back, + * above, but must not be transmitted on a network. + * Also, multicasts addressed to the loopback interface + * are not sent -- the above call to ip6_mloopback() will + * loop back a copy if this host actually belongs to the + * destination group on the loopback interface. + */ + if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) || + IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) { + m_freem(m); + goto done; + } + } + + /* + * Fill the outgoing inteface to tell the upper layer + * to increment per-interface statistics. + */ + if (ifpp) + *ifpp = ifp; + + /* Determine path MTU. */ + if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu, + &alwaysfrag)) != 0) + goto bad; + + /* + * The caller of this function may specify to use the minimum MTU + * in some cases. + * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU + * setting. The logic is a bit complicated; by default, unicast + * packets will follow path MTU while multicast packets will be sent at + * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets + * including unicast ones will be sent at the minimum MTU. Multicast + * packets will always be sent at the minimum MTU unless + * IP6PO_MINMTU_DISABLE is explicitly specified. + * See RFC 3542 for more details. + */ + if (mtu > IPV6_MMTU) { + if ((flags & IPV6_MINMTU)) + mtu = IPV6_MMTU; + else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL) + mtu = IPV6_MMTU; + else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && + (opt == NULL || + opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) { + mtu = IPV6_MMTU; + } + } + + /* + * clear embedded scope identifiers if necessary. + * in6_clearscope will touch the addresses only when necessary. + */ + in6_clearscope(&ip6->ip6_src); + in6_clearscope(&ip6->ip6_dst); + + /* + * If the outgoing packet contains a hop-by-hop options header, + * it must be examined and processed even by the source node. + * (RFC 2460, section 4.) + */ + if (exthdrs.ip6e_hbh) { + struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *); + u_int32_t dummy; /* XXX unused */ + u_int32_t plen = 0; /* XXX: ip6_process will check the value */ + +#ifdef DIAGNOSTIC + if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len) + panic("ip6e_hbh is not continuous"); +#endif + /* + * XXX: if we have to send an ICMPv6 error to the sender, + * we need the M_LOOP flag since icmp6_error() expects + * the IPv6 and the hop-by-hop options header are + * continuous unless the flag is set. + */ + m->m_flags |= M_LOOP; + m->m_pkthdr.rcvif = ifp; + if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1), + ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh), + &dummy, &plen) < 0) { + /* m was already freed at this point */ + error = EINVAL;/* better error? */ + goto done; + } + m->m_flags &= ~M_LOOP; /* XXX */ + m->m_pkthdr.rcvif = NULL; + } + + /* Jump over all PFIL processing if hooks are not active. */ + if (!PFIL_HOOKED(&V_inet6_pfil_hook)) + goto passout; + + odst = ip6->ip6_dst; + /* Run through list of hooks for output packets. */ + error = pfil_run_hooks(&V_inet6_pfil_hook, &m, ifp, PFIL_OUT, inp); + if (error != 0 || m == NULL) + goto done; + ip6 = mtod(m, struct ip6_hdr *); + + /* See if destination IP address was changed by packet filter. */ + if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) { + m->m_flags |= M_SKIP_FIREWALL; + /* If destination is now ourself drop to ip6_input(). */ + if (in6_localaddr(&ip6->ip6_dst)) { + if (m->m_pkthdr.rcvif == NULL) + m->m_pkthdr.rcvif = V_loif; + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + m->m_pkthdr.csum_flags |= + CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + m->m_pkthdr.csum_data = 0xffff; + } + m->m_pkthdr.csum_flags |= + CSUM_IP_CHECKED | CSUM_IP_VALID; +#ifdef SCTP + if (m->m_pkthdr.csum_flags & CSUM_SCTP) + m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; +#endif + error = netisr_queue(NETISR_IPV6, m); + goto done; + } else + goto again; /* Redo the routing table lookup. */ + } + + /* XXX: IPFIREWALL_FORWARD */ + +passout: + /* + * Send the packet to the outgoing interface. + * If necessary, do IPv6 fragmentation before sending. + * + * the logic here is rather complex: + * 1: normal case (dontfrag == 0, alwaysfrag == 0) + * 1-a: send as is if tlen <= path mtu + * 1-b: fragment if tlen > path mtu + * + * 2: if user asks us not to fragment (dontfrag == 1) + * 2-a: send as is if tlen <= interface mtu + * 2-b: error if tlen > interface mtu + * + * 3: if we always need to attach fragment header (alwaysfrag == 1) + * always fragment + * + * 4: if dontfrag == 1 && alwaysfrag == 1 + * error, as we cannot handle this conflicting request + */ +#ifdef SCTP + sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist; + if (sw_csum & CSUM_SCTP) { + sctp_delayed_cksum(m, sizeof(struct ip6_hdr)); + sw_csum &= ~CSUM_SCTP; + } +#endif + tlen = m->m_pkthdr.len; + + if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) + dontfrag = 1; + else + dontfrag = 0; + if (dontfrag && alwaysfrag) { /* case 4 */ + /* conflicting request - can't transmit */ + error = EMSGSIZE; + goto bad; + } + if (dontfrag && tlen > IN6_LINKMTU(ifp)) { /* case 2-b */ + /* + * Even if the DONTFRAG option is specified, we cannot send the + * packet when the data length is larger than the MTU of the + * outgoing interface. + * Notify the error by sending IPV6_PATHMTU ancillary data as + * well as returning an error code (the latter is not described + * in the API spec.) + */ + u_int32_t mtu32; + struct ip6ctlparam ip6cp; + + mtu32 = (u_int32_t)mtu; + bzero(&ip6cp, sizeof(ip6cp)); + ip6cp.ip6c_cmdarg = (void *)&mtu32; + pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst, + (void *)&ip6cp); + + error = EMSGSIZE; + goto bad; + } + + /* + * transmit packet without fragmentation + */ + if (dontfrag || (!alwaysfrag && tlen <= mtu)) { /* case 1-a and 2-a */ + struct in6_ifaddr *ia6; + + ip6 = mtod(m, struct ip6_hdr *); + ia6 = in6_ifawithifp(ifp, &ip6->ip6_src); + if (ia6) { + /* Record statistics for this interface address. */ + ia6->ia_ifa.if_opackets++; + ia6->ia_ifa.if_obytes += m->m_pkthdr.len; + ifa_free(&ia6->ia_ifa); + } + error = nd6_output(ifp, origifp, m, dst, ro->ro_rt); + goto done; + } + + /* + * try to fragment the packet. case 1-b and 3 + */ + if (mtu < IPV6_MMTU) { + /* path MTU cannot be less than IPV6_MMTU */ + error = EMSGSIZE; + in6_ifstat_inc(ifp, ifs6_out_fragfail); + goto bad; + } else if (ip6->ip6_plen == 0) { + /* jumbo payload cannot be fragmented */ + error = EMSGSIZE; + in6_ifstat_inc(ifp, ifs6_out_fragfail); + goto bad; + } else { + struct mbuf **mnext, *m_frgpart; + struct ip6_frag *ip6f; + u_int32_t id = htonl(ip6_randomid()); + u_char nextproto; + + int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len; + + /* + * Too large for the destination or interface; + * fragment if possible. + * Must be able to put at least 8 bytes per fragment. + */ + hlen = unfragpartlen; + if (mtu > IPV6_MAXPACKET) + mtu = IPV6_MAXPACKET; + + len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7; + if (len < 8) { + error = EMSGSIZE; + in6_ifstat_inc(ifp, ifs6_out_fragfail); + goto bad; + } + + /* + * Verify that we have any chance at all of being able to queue + * the packet or packet fragments + */ + if (qslots <= 0 || ((u_int)qslots * (mtu - hlen) + < tlen /* - hlen */)) { + error = ENOBUFS; + V_ip6stat.ip6s_odropped++; + goto bad; + } + + mnext = &m->m_nextpkt; + + /* + * Change the next header field of the last header in the + * unfragmentable part. + */ + if (exthdrs.ip6e_rthdr) { + nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *); + *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT; + } else if (exthdrs.ip6e_dest1) { + nextproto = *mtod(exthdrs.ip6e_dest1, u_char *); + *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT; + } else if (exthdrs.ip6e_hbh) { + nextproto = *mtod(exthdrs.ip6e_hbh, u_char *); + *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT; + } else { + nextproto = ip6->ip6_nxt; + ip6->ip6_nxt = IPPROTO_FRAGMENT; + } + + /* + * Loop through length of segment after first fragment, + * make new header and copy data of each part and link onto + * chain. + */ + m0 = m; + for (off = hlen; off < tlen; off += len) { + MGETHDR(m, M_DONTWAIT, MT_HEADER); + if (!m) { + error = ENOBUFS; + V_ip6stat.ip6s_odropped++; + goto sendorfree; + } + m->m_pkthdr.rcvif = NULL; + m->m_flags = m0->m_flags & M_COPYFLAGS; + *mnext = m; + mnext = &m->m_nextpkt; + m->m_data += max_linkhdr; + mhip6 = mtod(m, struct ip6_hdr *); + *mhip6 = *ip6; + m->m_len = sizeof(*mhip6); + error = ip6_insertfraghdr(m0, m, hlen, &ip6f); + if (error) { + V_ip6stat.ip6s_odropped++; + goto sendorfree; + } + ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7)); + if (off + len >= tlen) + len = tlen - off; + else + ip6f->ip6f_offlg |= IP6F_MORE_FRAG; + mhip6->ip6_plen = htons((u_short)(len + hlen + + sizeof(*ip6f) - sizeof(struct ip6_hdr))); + if ((m_frgpart = m_copy(m0, off, len)) == 0) { + error = ENOBUFS; + V_ip6stat.ip6s_odropped++; + goto sendorfree; + } + m_cat(m, m_frgpart); + m->m_pkthdr.len = len + hlen + sizeof(*ip6f); + m->m_pkthdr.rcvif = NULL; + ip6f->ip6f_reserved = 0; + ip6f->ip6f_ident = id; + ip6f->ip6f_nxt = nextproto; + V_ip6stat.ip6s_ofragments++; + in6_ifstat_inc(ifp, ifs6_out_fragcreat); + } + + in6_ifstat_inc(ifp, ifs6_out_fragok); + } + + /* + * Remove leading garbages. + */ +sendorfree: + m = m0->m_nextpkt; + m0->m_nextpkt = 0; + m_freem(m0); + for (m0 = m; m; m = m0) { + m0 = m->m_nextpkt; + m->m_nextpkt = 0; + if (error == 0) { + /* Record statistics for this interface address. */ + if (ia) { + ia->ia_ifa.if_opackets++; + ia->ia_ifa.if_obytes += m->m_pkthdr.len; + } + error = nd6_output(ifp, origifp, m, dst, ro->ro_rt); + } else + m_freem(m); + } + + if (error == 0) + V_ip6stat.ip6s_fragmented++; + +done: + if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */ + RTFREE(ro->ro_rt); + } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) { + RTFREE(ro_pmtu->ro_rt); + } +#ifdef IPSEC + if (sp != NULL) + KEY_FREESP(&sp); +#endif + + return (error); + +freehdrs: + m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */ + m_freem(exthdrs.ip6e_dest1); + m_freem(exthdrs.ip6e_rthdr); + m_freem(exthdrs.ip6e_dest2); + /* FALLTHROUGH */ +bad: + if (m) + m_freem(m); + goto done; +} + +static int +ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen) +{ + struct mbuf *m; + + if (hlen > MCLBYTES) + return (ENOBUFS); /* XXX */ + + MGET(m, M_DONTWAIT, MT_DATA); + if (!m) + return (ENOBUFS); + + if (hlen > MLEN) { + MCLGET(m, M_DONTWAIT); + if ((m->m_flags & M_EXT) == 0) { + m_free(m); + return (ENOBUFS); + } + } + m->m_len = hlen; + if (hdr) + bcopy(hdr, mtod(m, caddr_t), hlen); + + *mp = m; + return (0); +} + +/* + * Insert jumbo payload option. + */ +static int +ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen) +{ + struct mbuf *mopt; + u_char *optbuf; + u_int32_t v; + +#define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */ + + /* + * If there is no hop-by-hop options header, allocate new one. + * If there is one but it doesn't have enough space to store the + * jumbo payload option, allocate a cluster to store the whole options. + * Otherwise, use it to store the options. + */ + if (exthdrs->ip6e_hbh == 0) { + MGET(mopt, M_DONTWAIT, MT_DATA); + if (mopt == 0) + return (ENOBUFS); + mopt->m_len = JUMBOOPTLEN; + optbuf = mtod(mopt, u_char *); + optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */ + exthdrs->ip6e_hbh = mopt; + } else { + struct ip6_hbh *hbh; + + mopt = exthdrs->ip6e_hbh; + if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) { + /* + * XXX assumption: + * - exthdrs->ip6e_hbh is not referenced from places + * other than exthdrs. + * - exthdrs->ip6e_hbh is not an mbuf chain. + */ + int oldoptlen = mopt->m_len; + struct mbuf *n; + + /* + * XXX: give up if the whole (new) hbh header does + * not fit even in an mbuf cluster. + */ + if (oldoptlen + JUMBOOPTLEN > MCLBYTES) + return (ENOBUFS); + + /* + * As a consequence, we must always prepare a cluster + * at this point. + */ + MGET(n, M_DONTWAIT, MT_DATA); + if (n) { + MCLGET(n, M_DONTWAIT); + if ((n->m_flags & M_EXT) == 0) { + m_freem(n); + n = NULL; + } + } + if (!n) + return (ENOBUFS); + n->m_len = oldoptlen + JUMBOOPTLEN; + bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t), + oldoptlen); + optbuf = mtod(n, caddr_t) + oldoptlen; + m_freem(mopt); + mopt = exthdrs->ip6e_hbh = n; + } else { + optbuf = mtod(mopt, u_char *) + mopt->m_len; + mopt->m_len += JUMBOOPTLEN; + } + optbuf[0] = IP6OPT_PADN; + optbuf[1] = 1; + + /* + * Adjust the header length according to the pad and + * the jumbo payload option. + */ + hbh = mtod(mopt, struct ip6_hbh *); + hbh->ip6h_len += (JUMBOOPTLEN >> 3); + } + + /* fill in the option. */ + optbuf[2] = IP6OPT_JUMBO; + optbuf[3] = 4; + v = (u_int32_t)htonl(plen + JUMBOOPTLEN); + bcopy(&v, &optbuf[4], sizeof(u_int32_t)); + + /* finally, adjust the packet header length */ + exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN; + + return (0); +#undef JUMBOOPTLEN +} + +/* + * Insert fragment header and copy unfragmentable header portions. + */ +static int +ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen, + struct ip6_frag **frghdrp) +{ + struct mbuf *n, *mlast; + + if (hlen > sizeof(struct ip6_hdr)) { + n = m_copym(m0, sizeof(struct ip6_hdr), + hlen - sizeof(struct ip6_hdr), M_DONTWAIT); + if (n == 0) + return (ENOBUFS); + m->m_next = n; + } else + n = m; + + /* Search for the last mbuf of unfragmentable part. */ + for (mlast = n; mlast->m_next; mlast = mlast->m_next) + ; + + if ((mlast->m_flags & M_EXT) == 0 && + M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) { + /* use the trailing space of the last mbuf for the fragment hdr */ + *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) + + mlast->m_len); + mlast->m_len += sizeof(struct ip6_frag); + m->m_pkthdr.len += sizeof(struct ip6_frag); + } else { + /* allocate a new mbuf for the fragment header */ + struct mbuf *mfrg; + + MGET(mfrg, M_DONTWAIT, MT_DATA); + if (mfrg == 0) + return (ENOBUFS); + mfrg->m_len = sizeof(struct ip6_frag); + *frghdrp = mtod(mfrg, struct ip6_frag *); + mlast->m_next = mfrg; + } + + return (0); +} + +static int +ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, + struct ifnet *ifp, struct in6_addr *dst, u_long *mtup, + int *alwaysfragp) +{ + u_int32_t mtu = 0; + int alwaysfrag = 0; + int error = 0; + + if (ro_pmtu != ro) { + /* The first hop and the final destination may differ. */ + struct sockaddr_in6 *sa6_dst = + (struct sockaddr_in6 *)&ro_pmtu->ro_dst; + if (ro_pmtu->ro_rt && + ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 || + !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) { + RTFREE(ro_pmtu->ro_rt); + ro_pmtu->ro_rt = (struct rtentry *)NULL; + } + if (ro_pmtu->ro_rt == NULL) { + bzero(sa6_dst, sizeof(*sa6_dst)); + sa6_dst->sin6_family = AF_INET6; + sa6_dst->sin6_len = sizeof(struct sockaddr_in6); + sa6_dst->sin6_addr = *dst; + + rtalloc((struct route *)ro_pmtu); + } + } + if (ro_pmtu->ro_rt) { + u_int32_t ifmtu; + struct in_conninfo inc; + + bzero(&inc, sizeof(inc)); + inc.inc_flags |= INC_ISIPV6; + inc.inc6_faddr = *dst; + + if (ifp == NULL) + ifp = ro_pmtu->ro_rt->rt_ifp; + ifmtu = IN6_LINKMTU(ifp); + mtu = tcp_hc_getmtu(&inc); + if (mtu) + mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu); + else + mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu; + if (mtu == 0) + mtu = ifmtu; + else if (mtu < IPV6_MMTU) { + /* + * RFC2460 section 5, last paragraph: + * if we record ICMPv6 too big message with + * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU + * or smaller, with framgent header attached. + * (fragment header is needed regardless from the + * packet size, for translators to identify packets) + */ + alwaysfrag = 1; + mtu = IPV6_MMTU; + } else if (mtu > ifmtu) { + /* + * The MTU on the route is larger than the MTU on + * the interface! This shouldn't happen, unless the + * MTU of the interface has been changed after the + * interface was brought up. Change the MTU in the + * route to match the interface MTU (as long as the + * field isn't locked). + */ + mtu = ifmtu; + ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; + } + } else if (ifp) { + mtu = IN6_LINKMTU(ifp); + } else + error = EHOSTUNREACH; /* XXX */ + + *mtup = mtu; + if (alwaysfragp) + *alwaysfragp = alwaysfrag; + return (error); +} + +/* + * IP6 socket option processing. + */ +int +ip6_ctloutput(struct socket *so, struct sockopt *sopt) +{ + int optdatalen, uproto; + void *optdata; + struct inpcb *in6p = sotoinpcb(so); + int error, optval; + int level, op, optname; + int optlen; + struct thread *td; + + level = sopt->sopt_level; + op = sopt->sopt_dir; + optname = sopt->sopt_name; + optlen = sopt->sopt_valsize; + td = sopt->sopt_td; + error = 0; + optval = 0; + uproto = (int)so->so_proto->pr_protocol; + + if (level == IPPROTO_IPV6) { + switch (op) { + + case SOPT_SET: + switch (optname) { + case IPV6_2292PKTOPTIONS: +#ifdef IPV6_PKTOPTIONS + case IPV6_PKTOPTIONS: +#endif + { + struct mbuf *m; + + error = soopt_getm(sopt, &m); /* XXX */ + if (error != 0) + break; + error = soopt_mcopyin(sopt, m); /* XXX */ + if (error != 0) + break; + error = ip6_pcbopts(&in6p->in6p_outputopts, + m, so, sopt); + m_freem(m); /* XXX */ + break; + } + + /* + * Use of some Hop-by-Hop options or some + * Destination options, might require special + * privilege. That is, normal applications + * (without special privilege) might be forbidden + * from setting certain options in outgoing packets, + * and might never see certain options in received + * packets. [RFC 2292 Section 6] + * KAME specific note: + * KAME prevents non-privileged users from sending or + * receiving ANY hbh/dst options in order to avoid + * overhead of parsing options in the kernel. + */ + case IPV6_RECVHOPOPTS: + case IPV6_RECVDSTOPTS: + case IPV6_RECVRTHDRDSTOPTS: + if (td != NULL) { + error = priv_check(td, + PRIV_NETINET_SETHDROPTS); + if (error) + break; + } + /* FALLTHROUGH */ + case IPV6_UNICAST_HOPS: + case IPV6_HOPLIMIT: + case IPV6_FAITH: + + case IPV6_RECVPKTINFO: + case IPV6_RECVHOPLIMIT: + case IPV6_RECVRTHDR: + case IPV6_RECVPATHMTU: + case IPV6_RECVTCLASS: + case IPV6_V6ONLY: + case IPV6_AUTOFLOWLABEL: + case IPV6_BINDANY: + if (optname == IPV6_BINDANY && td != NULL) { + error = priv_check(td, + PRIV_NETINET_BINDANY); + if (error) + break; + } + + if (optlen != sizeof(int)) { + error = EINVAL; + break; + } + error = sooptcopyin(sopt, &optval, + sizeof optval, sizeof optval); + if (error) + break; + switch (optname) { + + case IPV6_UNICAST_HOPS: + if (optval < -1 || optval >= 256) + error = EINVAL; + else { + /* -1 = kernel default */ + in6p->in6p_hops = optval; + if ((in6p->inp_vflag & + INP_IPV4) != 0) + in6p->inp_ip_ttl = optval; + } + break; +#define OPTSET(bit) \ +do { \ + if (optval) \ + in6p->inp_flags |= (bit); \ + else \ + in6p->inp_flags &= ~(bit); \ +} while (/*CONSTCOND*/ 0) +#define OPTSET2292(bit) \ +do { \ + in6p->inp_flags |= IN6P_RFC2292; \ + if (optval) \ + in6p->inp_flags |= (bit); \ + else \ + in6p->inp_flags &= ~(bit); \ +} while (/*CONSTCOND*/ 0) +#define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0) + + case IPV6_RECVPKTINFO: + /* cannot mix with RFC2292 */ + if (OPTBIT(IN6P_RFC2292)) { + error = EINVAL; + break; + } + OPTSET(IN6P_PKTINFO); + break; + + case IPV6_HOPLIMIT: + { + struct ip6_pktopts **optp; + + /* cannot mix with RFC2292 */ + if (OPTBIT(IN6P_RFC2292)) { + error = EINVAL; + break; + } + optp = &in6p->in6p_outputopts; + error = ip6_pcbopt(IPV6_HOPLIMIT, + (u_char *)&optval, sizeof(optval), + optp, (td != NULL) ? td->td_ucred : + NULL, uproto); + break; + } + + case IPV6_RECVHOPLIMIT: + /* cannot mix with RFC2292 */ + if (OPTBIT(IN6P_RFC2292)) { + error = EINVAL; + break; + } + OPTSET(IN6P_HOPLIMIT); + break; + + case IPV6_RECVHOPOPTS: + /* cannot mix with RFC2292 */ + if (OPTBIT(IN6P_RFC2292)) { + error = EINVAL; + break; + } + OPTSET(IN6P_HOPOPTS); + break; + + case IPV6_RECVDSTOPTS: + /* cannot mix with RFC2292 */ + if (OPTBIT(IN6P_RFC2292)) { + error = EINVAL; + break; + } + OPTSET(IN6P_DSTOPTS); + break; + + case IPV6_RECVRTHDRDSTOPTS: + /* cannot mix with RFC2292 */ + if (OPTBIT(IN6P_RFC2292)) { + error = EINVAL; + break; + } + OPTSET(IN6P_RTHDRDSTOPTS); + break; + + case IPV6_RECVRTHDR: + /* cannot mix with RFC2292 */ + if (OPTBIT(IN6P_RFC2292)) { + error = EINVAL; + break; + } + OPTSET(IN6P_RTHDR); + break; + + case IPV6_FAITH: + OPTSET(INP_FAITH); + break; + + case IPV6_RECVPATHMTU: + /* + * We ignore this option for TCP + * sockets. + * (RFC3542 leaves this case + * unspecified.) + */ + if (uproto != IPPROTO_TCP) + OPTSET(IN6P_MTU); + break; + + case IPV6_V6ONLY: + /* + * make setsockopt(IPV6_V6ONLY) + * available only prior to bind(2). + * see ipng mailing list, Jun 22 2001. + */ + if (in6p->inp_lport || + !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) { + error = EINVAL; + break; + } + OPTSET(IN6P_IPV6_V6ONLY); + if (optval) + in6p->inp_vflag &= ~INP_IPV4; + else + in6p->inp_vflag |= INP_IPV4; + break; + case IPV6_RECVTCLASS: + /* cannot mix with RFC2292 XXX */ + if (OPTBIT(IN6P_RFC2292)) { + error = EINVAL; + break; + } + OPTSET(IN6P_TCLASS); + break; + case IPV6_AUTOFLOWLABEL: + OPTSET(IN6P_AUTOFLOWLABEL); + break; + + case IPV6_BINDANY: + OPTSET(INP_BINDANY); + break; + } + break; + + case IPV6_TCLASS: + case IPV6_DONTFRAG: + case IPV6_USE_MIN_MTU: + case IPV6_PREFER_TEMPADDR: + if (optlen != sizeof(optval)) { + error = EINVAL; + break; + } + error = sooptcopyin(sopt, &optval, + sizeof optval, sizeof optval); + if (error) + break; + { + struct ip6_pktopts **optp; + optp = &in6p->in6p_outputopts; + error = ip6_pcbopt(optname, + (u_char *)&optval, sizeof(optval), + optp, (td != NULL) ? td->td_ucred : + NULL, uproto); + break; + } + + case IPV6_2292PKTINFO: + case IPV6_2292HOPLIMIT: + case IPV6_2292HOPOPTS: + case IPV6_2292DSTOPTS: + case IPV6_2292RTHDR: + /* RFC 2292 */ + if (optlen != sizeof(int)) { + error = EINVAL; + break; + } + error = sooptcopyin(sopt, &optval, + sizeof optval, sizeof optval); + if (error) + break; + switch (optname) { + case IPV6_2292PKTINFO: + OPTSET2292(IN6P_PKTINFO); + break; + case IPV6_2292HOPLIMIT: + OPTSET2292(IN6P_HOPLIMIT); + break; + case IPV6_2292HOPOPTS: + /* + * Check super-user privilege. + * See comments for IPV6_RECVHOPOPTS. + */ + if (td != NULL) { + error = priv_check(td, + PRIV_NETINET_SETHDROPTS); + if (error) + return (error); + } + OPTSET2292(IN6P_HOPOPTS); + break; + case IPV6_2292DSTOPTS: + if (td != NULL) { + error = priv_check(td, + PRIV_NETINET_SETHDROPTS); + if (error) + return (error); + } + OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */ + break; + case IPV6_2292RTHDR: + OPTSET2292(IN6P_RTHDR); + break; + } + break; + case IPV6_PKTINFO: + case IPV6_HOPOPTS: + case IPV6_RTHDR: + case IPV6_DSTOPTS: + case IPV6_RTHDRDSTOPTS: + case IPV6_NEXTHOP: + { + /* new advanced API (RFC3542) */ + u_char *optbuf; + u_char optbuf_storage[MCLBYTES]; + int optlen; + struct ip6_pktopts **optp; + + /* cannot mix with RFC2292 */ + if (OPTBIT(IN6P_RFC2292)) { + error = EINVAL; + break; + } + + /* + * We only ensure valsize is not too large + * here. Further validation will be done + * later. + */ + error = sooptcopyin(sopt, optbuf_storage, + sizeof(optbuf_storage), 0); + if (error) + break; + optlen = sopt->sopt_valsize; + optbuf = optbuf_storage; + optp = &in6p->in6p_outputopts; + error = ip6_pcbopt(optname, optbuf, optlen, + optp, (td != NULL) ? td->td_ucred : NULL, + uproto); + break; + } +#undef OPTSET + + case IPV6_MULTICAST_IF: + case IPV6_MULTICAST_HOPS: + case IPV6_MULTICAST_LOOP: + case IPV6_JOIN_GROUP: + case IPV6_LEAVE_GROUP: + case IPV6_MSFILTER: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + case MCAST_JOIN_GROUP: + case MCAST_LEAVE_GROUP: + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + error = ip6_setmoptions(in6p, sopt); + break; + + case IPV6_PORTRANGE: + error = sooptcopyin(sopt, &optval, + sizeof optval, sizeof optval); + if (error) + break; + + switch (optval) { + case IPV6_PORTRANGE_DEFAULT: + in6p->inp_flags &= ~(INP_LOWPORT); + in6p->inp_flags &= ~(INP_HIGHPORT); + break; + + case IPV6_PORTRANGE_HIGH: + in6p->inp_flags &= ~(INP_LOWPORT); + in6p->inp_flags |= INP_HIGHPORT; + break; + + case IPV6_PORTRANGE_LOW: + in6p->inp_flags &= ~(INP_HIGHPORT); + in6p->inp_flags |= INP_LOWPORT; + break; + + default: + error = EINVAL; + break; + } + break; + +#ifdef IPSEC + case IPV6_IPSEC_POLICY: + { + caddr_t req; + struct mbuf *m; + + if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ + break; + if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ + break; + req = mtod(m, caddr_t); + error = ipsec_set_policy(in6p, optname, req, + m->m_len, (sopt->sopt_td != NULL) ? + sopt->sopt_td->td_ucred : NULL); + m_freem(m); + break; + } +#endif /* IPSEC */ + + default: + error = ENOPROTOOPT; + break; + } + break; + + case SOPT_GET: + switch (optname) { + + case IPV6_2292PKTOPTIONS: +#ifdef IPV6_PKTOPTIONS + case IPV6_PKTOPTIONS: +#endif + /* + * RFC3542 (effectively) deprecated the + * semantics of the 2292-style pktoptions. + * Since it was not reliable in nature (i.e., + * applications had to expect the lack of some + * information after all), it would make sense + * to simplify this part by always returning + * empty data. + */ + sopt->sopt_valsize = 0; + break; + + case IPV6_RECVHOPOPTS: + case IPV6_RECVDSTOPTS: + case IPV6_RECVRTHDRDSTOPTS: + case IPV6_UNICAST_HOPS: + case IPV6_RECVPKTINFO: + case IPV6_RECVHOPLIMIT: + case IPV6_RECVRTHDR: + case IPV6_RECVPATHMTU: + + case IPV6_FAITH: + case IPV6_V6ONLY: + case IPV6_PORTRANGE: + case IPV6_RECVTCLASS: + case IPV6_AUTOFLOWLABEL: + case IPV6_BINDANY: + switch (optname) { + + case IPV6_RECVHOPOPTS: + optval = OPTBIT(IN6P_HOPOPTS); + break; + + case IPV6_RECVDSTOPTS: + optval = OPTBIT(IN6P_DSTOPTS); + break; + + case IPV6_RECVRTHDRDSTOPTS: + optval = OPTBIT(IN6P_RTHDRDSTOPTS); + break; + + case IPV6_UNICAST_HOPS: + optval = in6p->in6p_hops; + break; + + case IPV6_RECVPKTINFO: + optval = OPTBIT(IN6P_PKTINFO); + break; + + case IPV6_RECVHOPLIMIT: + optval = OPTBIT(IN6P_HOPLIMIT); + break; + + case IPV6_RECVRTHDR: + optval = OPTBIT(IN6P_RTHDR); + break; + + case IPV6_RECVPATHMTU: + optval = OPTBIT(IN6P_MTU); + break; + + case IPV6_FAITH: + optval = OPTBIT(INP_FAITH); + break; + + case IPV6_V6ONLY: + optval = OPTBIT(IN6P_IPV6_V6ONLY); + break; + + case IPV6_PORTRANGE: + { + int flags; + flags = in6p->inp_flags; + if (flags & INP_HIGHPORT) + optval = IPV6_PORTRANGE_HIGH; + else if (flags & INP_LOWPORT) + optval = IPV6_PORTRANGE_LOW; + else + optval = 0; + break; + } + case IPV6_RECVTCLASS: + optval = OPTBIT(IN6P_TCLASS); + break; + + case IPV6_AUTOFLOWLABEL: + optval = OPTBIT(IN6P_AUTOFLOWLABEL); + break; + + case IPV6_BINDANY: + optval = OPTBIT(INP_BINDANY); + break; + } + if (error) + break; + error = sooptcopyout(sopt, &optval, + sizeof optval); + break; + + case IPV6_PATHMTU: + { + u_long pmtu = 0; + struct ip6_mtuinfo mtuinfo; + struct route_in6 sro; + + bzero(&sro, sizeof(sro)); + + if (!(so->so_state & SS_ISCONNECTED)) + return (ENOTCONN); + /* + * XXX: we dot not consider the case of source + * routing, or optional information to specify + * the outgoing interface. + */ + error = ip6_getpmtu(&sro, NULL, NULL, + &in6p->in6p_faddr, &pmtu, NULL); + if (sro.ro_rt) + RTFREE(sro.ro_rt); + if (error) + break; + if (pmtu > IPV6_MAXPACKET) + pmtu = IPV6_MAXPACKET; + + bzero(&mtuinfo, sizeof(mtuinfo)); + mtuinfo.ip6m_mtu = (u_int32_t)pmtu; + optdata = (void *)&mtuinfo; + optdatalen = sizeof(mtuinfo); + error = sooptcopyout(sopt, optdata, + optdatalen); + break; + } + + case IPV6_2292PKTINFO: + case IPV6_2292HOPLIMIT: + case IPV6_2292HOPOPTS: + case IPV6_2292RTHDR: + case IPV6_2292DSTOPTS: + switch (optname) { + case IPV6_2292PKTINFO: + optval = OPTBIT(IN6P_PKTINFO); + break; + case IPV6_2292HOPLIMIT: + optval = OPTBIT(IN6P_HOPLIMIT); + break; + case IPV6_2292HOPOPTS: + optval = OPTBIT(IN6P_HOPOPTS); + break; + case IPV6_2292RTHDR: + optval = OPTBIT(IN6P_RTHDR); + break; + case IPV6_2292DSTOPTS: + optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); + break; + } + error = sooptcopyout(sopt, &optval, + sizeof optval); + break; + case IPV6_PKTINFO: + case IPV6_HOPOPTS: + case IPV6_RTHDR: + case IPV6_DSTOPTS: + case IPV6_RTHDRDSTOPTS: + case IPV6_NEXTHOP: + case IPV6_TCLASS: + case IPV6_DONTFRAG: + case IPV6_USE_MIN_MTU: + case IPV6_PREFER_TEMPADDR: + error = ip6_getpcbopt(in6p->in6p_outputopts, + optname, sopt); + break; + + case IPV6_MULTICAST_IF: + case IPV6_MULTICAST_HOPS: + case IPV6_MULTICAST_LOOP: + case IPV6_MSFILTER: + error = ip6_getmoptions(in6p, sopt); + break; + +#ifdef IPSEC + case IPV6_IPSEC_POLICY: + { + caddr_t req = NULL; + size_t len = 0; + struct mbuf *m = NULL; + struct mbuf **mp = &m; + size_t ovalsize = sopt->sopt_valsize; + caddr_t oval = (caddr_t)sopt->sopt_val; + + error = soopt_getm(sopt, &m); /* XXX */ + if (error != 0) + break; + error = soopt_mcopyin(sopt, m); /* XXX */ + if (error != 0) + break; + sopt->sopt_valsize = ovalsize; + sopt->sopt_val = oval; + if (m) { + req = mtod(m, caddr_t); + len = m->m_len; + } + error = ipsec_get_policy(in6p, req, len, mp); + if (error == 0) + error = soopt_mcopyout(sopt, m); /* XXX */ + if (error == 0 && m) + m_freem(m); + break; + } +#endif /* IPSEC */ + + default: + error = ENOPROTOOPT; + break; + } + break; + } + } else { /* level != IPPROTO_IPV6 */ + error = EINVAL; + } + return (error); +} + +int +ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt) +{ + int error = 0, optval, optlen; + const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum); + struct inpcb *in6p = sotoinpcb(so); + int level, op, optname; + + level = sopt->sopt_level; + op = sopt->sopt_dir; + optname = sopt->sopt_name; + optlen = sopt->sopt_valsize; + + if (level != IPPROTO_IPV6) { + return (EINVAL); + } + + switch (optname) { + case IPV6_CHECKSUM: + /* + * For ICMPv6 sockets, no modification allowed for checksum + * offset, permit "no change" values to help existing apps. + * + * RFC3542 says: "An attempt to set IPV6_CHECKSUM + * for an ICMPv6 socket will fail." + * The current behavior does not meet RFC3542. + */ + switch (op) { + case SOPT_SET: + if (optlen != sizeof(int)) { + error = EINVAL; + break; + } + error = sooptcopyin(sopt, &optval, sizeof(optval), + sizeof(optval)); + if (error) + break; + if ((optval % 2) != 0) { + /* the API assumes even offset values */ + error = EINVAL; + } else if (so->so_proto->pr_protocol == + IPPROTO_ICMPV6) { + if (optval != icmp6off) + error = EINVAL; + } else + in6p->in6p_cksum = optval; + break; + + case SOPT_GET: + if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) + optval = icmp6off; + else + optval = in6p->in6p_cksum; + + error = sooptcopyout(sopt, &optval, sizeof(optval)); + break; + + default: + error = EINVAL; + break; + } + break; + + default: + error = ENOPROTOOPT; + break; + } + + return (error); +} + +/* + * Set up IP6 options in pcb for insertion in output packets or + * specifying behavior of outgoing packets. + */ +static int +ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m, + struct socket *so, struct sockopt *sopt) +{ + struct ip6_pktopts *opt = *pktopt; + int error = 0; + struct thread *td = sopt->sopt_td; + + /* turn off any old options. */ + if (opt) { +#ifdef DIAGNOSTIC + if (opt->ip6po_pktinfo || opt->ip6po_nexthop || + opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 || + opt->ip6po_rhinfo.ip6po_rhi_rthdr) + printf("ip6_pcbopts: all specified options are cleared.\n"); +#endif + ip6_clearpktopts(opt, -1); + } else + opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK); + *pktopt = NULL; + + if (!m || m->m_len == 0) { + /* + * Only turning off any previous options, regardless of + * whether the opt is just created or given. + */ + free(opt, M_IP6OPT); + return (0); + } + + /* set options specified by user. */ + if ((error = ip6_setpktopts(m, opt, NULL, (td != NULL) ? + td->td_ucred : NULL, so->so_proto->pr_protocol)) != 0) { + ip6_clearpktopts(opt, -1); /* XXX: discard all options */ + free(opt, M_IP6OPT); + return (error); + } + *pktopt = opt; + return (0); +} + +/* + * initialize ip6_pktopts. beware that there are non-zero default values in + * the struct. + */ +void +ip6_initpktopts(struct ip6_pktopts *opt) +{ + + bzero(opt, sizeof(*opt)); + opt->ip6po_hlim = -1; /* -1 means default hop limit */ + opt->ip6po_tclass = -1; /* -1 means default traffic class */ + opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY; + opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM; +} + +static int +ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, + struct ucred *cred, int uproto) +{ + struct ip6_pktopts *opt; + + if (*pktopt == NULL) { + *pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT, + M_WAITOK); + ip6_initpktopts(*pktopt); + } + opt = *pktopt; + + return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto)); +} + +static int +ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt) +{ + void *optdata = NULL; + int optdatalen = 0; + struct ip6_ext *ip6e; + int error = 0; + struct in6_pktinfo null_pktinfo; + int deftclass = 0, on; + int defminmtu = IP6PO_MINMTU_MCASTONLY; + int defpreftemp = IP6PO_TEMPADDR_SYSTEM; + + switch (optname) { + case IPV6_PKTINFO: + if (pktopt && pktopt->ip6po_pktinfo) + optdata = (void *)pktopt->ip6po_pktinfo; + else { + /* XXX: we don't have to do this every time... */ + bzero(&null_pktinfo, sizeof(null_pktinfo)); + optdata = (void *)&null_pktinfo; + } + optdatalen = sizeof(struct in6_pktinfo); + break; + case IPV6_TCLASS: + if (pktopt && pktopt->ip6po_tclass >= 0) + optdata = (void *)&pktopt->ip6po_tclass; + else + optdata = (void *)&deftclass; + optdatalen = sizeof(int); + break; + case IPV6_HOPOPTS: + if (pktopt && pktopt->ip6po_hbh) { + optdata = (void *)pktopt->ip6po_hbh; + ip6e = (struct ip6_ext *)pktopt->ip6po_hbh; + optdatalen = (ip6e->ip6e_len + 1) << 3; + } + break; + case IPV6_RTHDR: + if (pktopt && pktopt->ip6po_rthdr) { + optdata = (void *)pktopt->ip6po_rthdr; + ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr; + optdatalen = (ip6e->ip6e_len + 1) << 3; + } + break; + case IPV6_RTHDRDSTOPTS: + if (pktopt && pktopt->ip6po_dest1) { + optdata = (void *)pktopt->ip6po_dest1; + ip6e = (struct ip6_ext *)pktopt->ip6po_dest1; + optdatalen = (ip6e->ip6e_len + 1) << 3; + } + break; + case IPV6_DSTOPTS: + if (pktopt && pktopt->ip6po_dest2) { + optdata = (void *)pktopt->ip6po_dest2; + ip6e = (struct ip6_ext *)pktopt->ip6po_dest2; + optdatalen = (ip6e->ip6e_len + 1) << 3; + } + break; + case IPV6_NEXTHOP: + if (pktopt && pktopt->ip6po_nexthop) { + optdata = (void *)pktopt->ip6po_nexthop; + optdatalen = pktopt->ip6po_nexthop->sa_len; + } + break; + case IPV6_USE_MIN_MTU: + if (pktopt) + optdata = (void *)&pktopt->ip6po_minmtu; + else + optdata = (void *)&defminmtu; + optdatalen = sizeof(int); + break; + case IPV6_DONTFRAG: + if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG)) + on = 1; + else + on = 0; + optdata = (void *)&on; + optdatalen = sizeof(on); + break; + case IPV6_PREFER_TEMPADDR: + if (pktopt) + optdata = (void *)&pktopt->ip6po_prefer_tempaddr; + else + optdata = (void *)&defpreftemp; + optdatalen = sizeof(int); + break; + default: /* should not happen */ +#ifdef DIAGNOSTIC + panic("ip6_getpcbopt: unexpected option\n"); +#endif + return (ENOPROTOOPT); + } + + error = sooptcopyout(sopt, optdata, optdatalen); + + return (error); +} + +void +ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname) +{ + if (pktopt == NULL) + return; + + if (optname == -1 || optname == IPV6_PKTINFO) { + if (pktopt->ip6po_pktinfo) + free(pktopt->ip6po_pktinfo, M_IP6OPT); + pktopt->ip6po_pktinfo = NULL; + } + if (optname == -1 || optname == IPV6_HOPLIMIT) + pktopt->ip6po_hlim = -1; + if (optname == -1 || optname == IPV6_TCLASS) + pktopt->ip6po_tclass = -1; + if (optname == -1 || optname == IPV6_NEXTHOP) { + if (pktopt->ip6po_nextroute.ro_rt) { + RTFREE(pktopt->ip6po_nextroute.ro_rt); + pktopt->ip6po_nextroute.ro_rt = NULL; + } + if (pktopt->ip6po_nexthop) + free(pktopt->ip6po_nexthop, M_IP6OPT); + pktopt->ip6po_nexthop = NULL; + } + if (optname == -1 || optname == IPV6_HOPOPTS) { + if (pktopt->ip6po_hbh) + free(pktopt->ip6po_hbh, M_IP6OPT); + pktopt->ip6po_hbh = NULL; + } + if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) { + if (pktopt->ip6po_dest1) + free(pktopt->ip6po_dest1, M_IP6OPT); + pktopt->ip6po_dest1 = NULL; + } + if (optname == -1 || optname == IPV6_RTHDR) { + if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr) + free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT); + pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL; + if (pktopt->ip6po_route.ro_rt) { + RTFREE(pktopt->ip6po_route.ro_rt); + pktopt->ip6po_route.ro_rt = NULL; + } + } + if (optname == -1 || optname == IPV6_DSTOPTS) { + if (pktopt->ip6po_dest2) + free(pktopt->ip6po_dest2, M_IP6OPT); + pktopt->ip6po_dest2 = NULL; + } +} + +#define PKTOPT_EXTHDRCPY(type) \ +do {\ + if (src->type) {\ + int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\ + dst->type = malloc(hlen, M_IP6OPT, canwait);\ + if (dst->type == NULL && canwait == M_NOWAIT)\ + goto bad;\ + bcopy(src->type, dst->type, hlen);\ + }\ +} while (/*CONSTCOND*/ 0) + +static int +copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait) +{ + if (dst == NULL || src == NULL) { + printf("ip6_clearpktopts: invalid argument\n"); + return (EINVAL); + } + + dst->ip6po_hlim = src->ip6po_hlim; + dst->ip6po_tclass = src->ip6po_tclass; + dst->ip6po_flags = src->ip6po_flags; + if (src->ip6po_pktinfo) { + dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo), + M_IP6OPT, canwait); + if (dst->ip6po_pktinfo == NULL) + goto bad; + *dst->ip6po_pktinfo = *src->ip6po_pktinfo; + } + if (src->ip6po_nexthop) { + dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len, + M_IP6OPT, canwait); + if (dst->ip6po_nexthop == NULL) + goto bad; + bcopy(src->ip6po_nexthop, dst->ip6po_nexthop, + src->ip6po_nexthop->sa_len); + } + PKTOPT_EXTHDRCPY(ip6po_hbh); + PKTOPT_EXTHDRCPY(ip6po_dest1); + PKTOPT_EXTHDRCPY(ip6po_dest2); + PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */ + return (0); + + bad: + ip6_clearpktopts(dst, -1); + return (ENOBUFS); +} +#undef PKTOPT_EXTHDRCPY + +struct ip6_pktopts * +ip6_copypktopts(struct ip6_pktopts *src, int canwait) +{ + int error; + struct ip6_pktopts *dst; + + dst = malloc(sizeof(*dst), M_IP6OPT, canwait); + if (dst == NULL) + return (NULL); + ip6_initpktopts(dst); + + if ((error = copypktopts(dst, src, canwait)) != 0) { + free(dst, M_IP6OPT); + return (NULL); + } + + return (dst); +} + +void +ip6_freepcbopts(struct ip6_pktopts *pktopt) +{ + if (pktopt == NULL) + return; + + ip6_clearpktopts(pktopt, -1); + + free(pktopt, M_IP6OPT); +} + +/* + * Set IPv6 outgoing packet options based on advanced API. + */ +int +ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt, + struct ip6_pktopts *stickyopt, struct ucred *cred, int uproto) +{ + struct cmsghdr *cm = 0; + + if (control == NULL || opt == NULL) + return (EINVAL); + + ip6_initpktopts(opt); + if (stickyopt) { + int error; + + /* + * If stickyopt is provided, make a local copy of the options + * for this particular packet, then override them by ancillary + * objects. + * XXX: copypktopts() does not copy the cached route to a next + * hop (if any). This is not very good in terms of efficiency, + * but we can allow this since this option should be rarely + * used. + */ + if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0) + return (error); + } + + /* + * XXX: Currently, we assume all the optional information is stored + * in a single mbuf. + */ + if (control->m_next) + return (EINVAL); + + for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len), + control->m_len -= CMSG_ALIGN(cm->cmsg_len)) { + int error; + + if (control->m_len < CMSG_LEN(0)) + return (EINVAL); + + cm = mtod(control, struct cmsghdr *); + if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len) + return (EINVAL); + if (cm->cmsg_level != IPPROTO_IPV6) + continue; + + error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm), + cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto); + if (error) + return (error); + } + + return (0); +} + +/* + * Set a particular packet option, as a sticky option or an ancillary data + * item. "len" can be 0 only when it's a sticky option. + * We have 4 cases of combination of "sticky" and "cmsg": + * "sticky=0, cmsg=0": impossible + * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data + * "sticky=1, cmsg=0": RFC3542 socket option + * "sticky=1, cmsg=1": RFC2292 socket option + */ +static int +ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, + struct ucred *cred, int sticky, int cmsg, int uproto) +{ + int minmtupolicy, preftemp; + int error; + + if (!sticky && !cmsg) { +#ifdef DIAGNOSTIC + printf("ip6_setpktopt: impossible case\n"); +#endif + return (EINVAL); + } + + /* + * IPV6_2292xxx is for backward compatibility to RFC2292, and should + * not be specified in the context of RFC3542. Conversely, + * RFC3542 types should not be specified in the context of RFC2292. + */ + if (!cmsg) { + switch (optname) { + case IPV6_2292PKTINFO: + case IPV6_2292HOPLIMIT: + case IPV6_2292NEXTHOP: + case IPV6_2292HOPOPTS: + case IPV6_2292DSTOPTS: + case IPV6_2292RTHDR: + case IPV6_2292PKTOPTIONS: + return (ENOPROTOOPT); + } + } + if (sticky && cmsg) { + switch (optname) { + case IPV6_PKTINFO: + case IPV6_HOPLIMIT: + case IPV6_NEXTHOP: + case IPV6_HOPOPTS: + case IPV6_DSTOPTS: + case IPV6_RTHDRDSTOPTS: + case IPV6_RTHDR: + case IPV6_USE_MIN_MTU: + case IPV6_DONTFRAG: + case IPV6_TCLASS: + case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */ + return (ENOPROTOOPT); + } + } + + switch (optname) { + case IPV6_2292PKTINFO: + case IPV6_PKTINFO: + { + struct ifnet *ifp = NULL; + struct in6_pktinfo *pktinfo; + + if (len != sizeof(struct in6_pktinfo)) + return (EINVAL); + + pktinfo = (struct in6_pktinfo *)buf; + + /* + * An application can clear any sticky IPV6_PKTINFO option by + * doing a "regular" setsockopt with ipi6_addr being + * in6addr_any and ipi6_ifindex being zero. + * [RFC 3542, Section 6] + */ + if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo && + pktinfo->ipi6_ifindex == 0 && + IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { + ip6_clearpktopts(opt, optname); + break; + } + + if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO && + sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { + return (EINVAL); + } + + /* validate the interface index if specified. */ + if (pktinfo->ipi6_ifindex > V_if_index || + pktinfo->ipi6_ifindex < 0) { + return (ENXIO); + } + if (pktinfo->ipi6_ifindex) { + ifp = ifnet_byindex(pktinfo->ipi6_ifindex); + if (ifp == NULL) + return (ENXIO); + } + + /* + * We store the address anyway, and let in6_selectsrc() + * validate the specified address. This is because ipi6_addr + * may not have enough information about its scope zone, and + * we may need additional information (such as outgoing + * interface or the scope zone of a destination address) to + * disambiguate the scope. + * XXX: the delay of the validation may confuse the + * application when it is used as a sticky option. + */ + if (opt->ip6po_pktinfo == NULL) { + opt->ip6po_pktinfo = malloc(sizeof(*pktinfo), + M_IP6OPT, M_NOWAIT); + if (opt->ip6po_pktinfo == NULL) + return (ENOBUFS); + } + bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo)); + break; + } + + case IPV6_2292HOPLIMIT: + case IPV6_HOPLIMIT: + { + int *hlimp; + + /* + * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT + * to simplify the ordering among hoplimit options. + */ + if (optname == IPV6_HOPLIMIT && sticky) + return (ENOPROTOOPT); + + if (len != sizeof(int)) + return (EINVAL); + hlimp = (int *)buf; + if (*hlimp < -1 || *hlimp > 255) + return (EINVAL); + + opt->ip6po_hlim = *hlimp; + break; + } + + case IPV6_TCLASS: + { + int tclass; + + if (len != sizeof(int)) + return (EINVAL); + tclass = *(int *)buf; + if (tclass < -1 || tclass > 255) + return (EINVAL); + + opt->ip6po_tclass = tclass; + break; + } + + case IPV6_2292NEXTHOP: + case IPV6_NEXTHOP: + if (cred != NULL) { + error = priv_check_cred(cred, + PRIV_NETINET_SETHDROPTS, 0); + if (error) + return (error); + } + + if (len == 0) { /* just remove the option */ + ip6_clearpktopts(opt, IPV6_NEXTHOP); + break; + } + + /* check if cmsg_len is large enough for sa_len */ + if (len < sizeof(struct sockaddr) || len < *buf) + return (EINVAL); + + switch (((struct sockaddr *)buf)->sa_family) { + case AF_INET6: + { + struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf; + int error; + + if (sa6->sin6_len != sizeof(struct sockaddr_in6)) + return (EINVAL); + + if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) || + IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) { + return (EINVAL); + } + if ((error = sa6_embedscope(sa6, V_ip6_use_defzone)) + != 0) { + return (error); + } + break; + } + case AF_LINK: /* should eventually be supported */ + default: + return (EAFNOSUPPORT); + } + + /* turn off the previous option, then set the new option. */ + ip6_clearpktopts(opt, IPV6_NEXTHOP); + opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT); + if (opt->ip6po_nexthop == NULL) + return (ENOBUFS); + bcopy(buf, opt->ip6po_nexthop, *buf); + break; + + case IPV6_2292HOPOPTS: + case IPV6_HOPOPTS: + { + struct ip6_hbh *hbh; + int hbhlen; + + /* + * XXX: We don't allow a non-privileged user to set ANY HbH + * options, since per-option restriction has too much + * overhead. + */ + if (cred != NULL) { + error = priv_check_cred(cred, + PRIV_NETINET_SETHDROPTS, 0); + if (error) + return (error); + } + + if (len == 0) { + ip6_clearpktopts(opt, IPV6_HOPOPTS); + break; /* just remove the option */ + } + + /* message length validation */ + if (len < sizeof(struct ip6_hbh)) + return (EINVAL); + hbh = (struct ip6_hbh *)buf; + hbhlen = (hbh->ip6h_len + 1) << 3; + if (len != hbhlen) + return (EINVAL); + + /* turn off the previous option, then set the new option. */ + ip6_clearpktopts(opt, IPV6_HOPOPTS); + opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT); + if (opt->ip6po_hbh == NULL) + return (ENOBUFS); + bcopy(hbh, opt->ip6po_hbh, hbhlen); + + break; + } + + case IPV6_2292DSTOPTS: + case IPV6_DSTOPTS: + case IPV6_RTHDRDSTOPTS: + { + struct ip6_dest *dest, **newdest = NULL; + int destlen; + + if (cred != NULL) { /* XXX: see the comment for IPV6_HOPOPTS */ + error = priv_check_cred(cred, + PRIV_NETINET_SETHDROPTS, 0); + if (error) + return (error); + } + + if (len == 0) { + ip6_clearpktopts(opt, optname); + break; /* just remove the option */ + } + + /* message length validation */ + if (len < sizeof(struct ip6_dest)) + return (EINVAL); + dest = (struct ip6_dest *)buf; + destlen = (dest->ip6d_len + 1) << 3; + if (len != destlen) + return (EINVAL); + + /* + * Determine the position that the destination options header + * should be inserted; before or after the routing header. + */ + switch (optname) { + case IPV6_2292DSTOPTS: + /* + * The old advacned API is ambiguous on this point. + * Our approach is to determine the position based + * according to the existence of a routing header. + * Note, however, that this depends on the order of the + * extension headers in the ancillary data; the 1st + * part of the destination options header must appear + * before the routing header in the ancillary data, + * too. + * RFC3542 solved the ambiguity by introducing + * separate ancillary data or option types. + */ + if (opt->ip6po_rthdr == NULL) + newdest = &opt->ip6po_dest1; + else + newdest = &opt->ip6po_dest2; + break; + case IPV6_RTHDRDSTOPTS: + newdest = &opt->ip6po_dest1; + break; + case IPV6_DSTOPTS: + newdest = &opt->ip6po_dest2; + break; + } + + /* turn off the previous option, then set the new option. */ + ip6_clearpktopts(opt, optname); + *newdest = malloc(destlen, M_IP6OPT, M_NOWAIT); + if (*newdest == NULL) + return (ENOBUFS); + bcopy(dest, *newdest, destlen); + + break; + } + + case IPV6_2292RTHDR: + case IPV6_RTHDR: + { + struct ip6_rthdr *rth; + int rthlen; + + if (len == 0) { + ip6_clearpktopts(opt, IPV6_RTHDR); + break; /* just remove the option */ + } + + /* message length validation */ + if (len < sizeof(struct ip6_rthdr)) + return (EINVAL); + rth = (struct ip6_rthdr *)buf; + rthlen = (rth->ip6r_len + 1) << 3; + if (len != rthlen) + return (EINVAL); + + switch (rth->ip6r_type) { + case IPV6_RTHDR_TYPE_0: + if (rth->ip6r_len == 0) /* must contain one addr */ + return (EINVAL); + if (rth->ip6r_len % 2) /* length must be even */ + return (EINVAL); + if (rth->ip6r_len / 2 != rth->ip6r_segleft) + return (EINVAL); + break; + default: + return (EINVAL); /* not supported */ + } + + /* turn off the previous option */ + ip6_clearpktopts(opt, IPV6_RTHDR); + opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT); + if (opt->ip6po_rthdr == NULL) + return (ENOBUFS); + bcopy(rth, opt->ip6po_rthdr, rthlen); + + break; + } + + case IPV6_USE_MIN_MTU: + if (len != sizeof(int)) + return (EINVAL); + minmtupolicy = *(int *)buf; + if (minmtupolicy != IP6PO_MINMTU_MCASTONLY && + minmtupolicy != IP6PO_MINMTU_DISABLE && + minmtupolicy != IP6PO_MINMTU_ALL) { + return (EINVAL); + } + opt->ip6po_minmtu = minmtupolicy; + break; + + case IPV6_DONTFRAG: + if (len != sizeof(int)) + return (EINVAL); + + if (uproto == IPPROTO_TCP || *(int *)buf == 0) { + /* + * we ignore this option for TCP sockets. + * (RFC3542 leaves this case unspecified.) + */ + opt->ip6po_flags &= ~IP6PO_DONTFRAG; + } else + opt->ip6po_flags |= IP6PO_DONTFRAG; + break; + + case IPV6_PREFER_TEMPADDR: + if (len != sizeof(int)) + return (EINVAL); + preftemp = *(int *)buf; + if (preftemp != IP6PO_TEMPADDR_SYSTEM && + preftemp != IP6PO_TEMPADDR_NOTPREFER && + preftemp != IP6PO_TEMPADDR_PREFER) { + return (EINVAL); + } + opt->ip6po_prefer_tempaddr = preftemp; + break; + + default: + return (ENOPROTOOPT); + } /* end of switch */ + + return (0); +} + +/* + * Routine called from ip6_output() to loop back a copy of an IP6 multicast + * packet to the input queue of a specified interface. Note that this + * calls the output routine of the loopback "driver", but with an interface + * pointer that might NOT be &loif -- easier than replicating that code here. + */ +void +ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst) +{ + struct mbuf *copym; + struct ip6_hdr *ip6; + + copym = m_copy(m, 0, M_COPYALL); + if (copym == NULL) + return; + + /* + * Make sure to deep-copy IPv6 header portion in case the data + * is in an mbuf cluster, so that we can safely override the IPv6 + * header portion later. + */ + if ((copym->m_flags & M_EXT) != 0 || + copym->m_len < sizeof(struct ip6_hdr)) { + copym = m_pullup(copym, sizeof(struct ip6_hdr)); + if (copym == NULL) + return; + } + +#ifdef DIAGNOSTIC + if (copym->m_len < sizeof(*ip6)) { + m_freem(copym); + return; + } +#endif + + ip6 = mtod(copym, struct ip6_hdr *); + /* + * clear embedded scope identifiers if necessary. + * in6_clearscope will touch the addresses only when necessary. + */ + in6_clearscope(&ip6->ip6_src); + in6_clearscope(&ip6->ip6_dst); + + (void)if_simloop(ifp, copym, dst->sin6_family, 0); +} + +/* + * Chop IPv6 header off from the payload. + */ +static int +ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs) +{ + struct mbuf *mh; + struct ip6_hdr *ip6; + + ip6 = mtod(m, struct ip6_hdr *); + if (m->m_len > sizeof(*ip6)) { + MGETHDR(mh, M_DONTWAIT, MT_HEADER); + if (mh == 0) { + m_freem(m); + return ENOBUFS; + } + M_MOVE_PKTHDR(mh, m); + MH_ALIGN(mh, sizeof(*ip6)); + m->m_len -= sizeof(*ip6); + m->m_data += sizeof(*ip6); + mh->m_next = m; + m = mh; + m->m_len = sizeof(*ip6); + bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6)); + } + exthdrs->ip6e_ip6 = m; + return 0; +} + +/* + * Compute IPv6 extension header length. + */ +int +ip6_optlen(struct inpcb *in6p) +{ + int len; + + if (!in6p->in6p_outputopts) + return 0; + + len = 0; +#define elen(x) \ + (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0) + + len += elen(in6p->in6p_outputopts->ip6po_hbh); + if (in6p->in6p_outputopts->ip6po_rthdr) + /* dest1 is valid with rthdr only */ + len += elen(in6p->in6p_outputopts->ip6po_dest1); + len += elen(in6p->in6p_outputopts->ip6po_rthdr); + len += elen(in6p->in6p_outputopts->ip6po_dest2); + return len; +#undef elen +} diff --git a/freebsd/sys/netinet6/ip6_var.h b/freebsd/sys/netinet6/ip6_var.h new file mode 100644 index 00000000..50d8adcb --- /dev/null +++ b/freebsd/sys/netinet6/ip6_var.h @@ -0,0 +1,444 @@ +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: ip6_var.h,v 1.62 2001/05/03 14:51:48 itojun Exp $ + */ + +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_var.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD$ + */ + +#ifndef _NETINET6_IP6_VAR_HH_ +#define _NETINET6_IP6_VAR_HH_ + +/* + * IP6 reassembly queue structure. Each fragment + * being reassembled is attached to one of these structures. + */ +struct ip6q { + struct ip6asfrag *ip6q_down; + struct ip6asfrag *ip6q_up; + u_int32_t ip6q_ident; + u_int8_t ip6q_nxt; + u_int8_t ip6q_ecn; + u_int8_t ip6q_ttl; + struct in6_addr ip6q_src, ip6q_dst; + struct ip6q *ip6q_next; + struct ip6q *ip6q_prev; + int ip6q_unfrglen; /* len of unfragmentable part */ +#ifdef notyet + u_char *ip6q_nxtp; +#endif + int ip6q_nfrag; /* # of fragments */ + struct label *ip6q_label; +}; + +struct ip6asfrag { + struct ip6asfrag *ip6af_down; + struct ip6asfrag *ip6af_up; + struct mbuf *ip6af_m; + int ip6af_offset; /* offset in ip6af_m to next header */ + int ip6af_frglen; /* fragmentable part length */ + int ip6af_off; /* fragment offset */ + u_int16_t ip6af_mff; /* more fragment bit in frag off */ +}; + +#define IP6_REASS_MBUF(ip6af) (*(struct mbuf **)&((ip6af)->ip6af_m)) + +/* + * Structure attached to inpcb.in6p_moptions and + * passed to ip6_output when IPv6 multicast options are in use. + * This structure is lazy-allocated. + */ +struct ip6_moptions { + struct ifnet *im6o_multicast_ifp; /* ifp for outgoing multicasts */ + u_char im6o_multicast_hlim; /* hoplimit for outgoing multicasts */ + u_char im6o_multicast_loop; /* 1 >= hear sends if a member */ + u_short im6o_num_memberships; /* no. memberships this socket */ + u_short im6o_max_memberships; /* max memberships this socket */ + struct in6_multi **im6o_membership; /* group memberships */ + struct in6_mfilter *im6o_mfilters; /* source filters */ +}; + +/* + * Control options for outgoing packets + */ + +/* Routing header related info */ +struct ip6po_rhinfo { + struct ip6_rthdr *ip6po_rhi_rthdr; /* Routing header */ + struct route_in6 ip6po_rhi_route; /* Route to the 1st hop */ +}; +#define ip6po_rthdr ip6po_rhinfo.ip6po_rhi_rthdr +#define ip6po_route ip6po_rhinfo.ip6po_rhi_route + +/* Nexthop related info */ +struct ip6po_nhinfo { + struct sockaddr *ip6po_nhi_nexthop; + struct route_in6 ip6po_nhi_route; /* Route to the nexthop */ +}; +#define ip6po_nexthop ip6po_nhinfo.ip6po_nhi_nexthop +#define ip6po_nextroute ip6po_nhinfo.ip6po_nhi_route + +struct ip6_pktopts { + struct mbuf *ip6po_m; /* Pointer to mbuf storing the data */ + int ip6po_hlim; /* Hoplimit for outgoing packets */ + + /* Outgoing IF/address information */ + struct in6_pktinfo *ip6po_pktinfo; + + /* Next-hop address information */ + struct ip6po_nhinfo ip6po_nhinfo; + + struct ip6_hbh *ip6po_hbh; /* Hop-by-Hop options header */ + + /* Destination options header (before a routing header) */ + struct ip6_dest *ip6po_dest1; + + /* Routing header related info. */ + struct ip6po_rhinfo ip6po_rhinfo; + + /* Destination options header (after a routing header) */ + struct ip6_dest *ip6po_dest2; + + int ip6po_tclass; /* traffic class */ + + int ip6po_minmtu; /* fragment vs PMTU discovery policy */ +#define IP6PO_MINMTU_MCASTONLY -1 /* default; send at min MTU for multicast*/ +#define IP6PO_MINMTU_DISABLE 0 /* always perform pmtu disc */ +#define IP6PO_MINMTU_ALL 1 /* always send at min MTU */ + + int ip6po_prefer_tempaddr; /* whether temporary addresses are + preferred as source address */ +#define IP6PO_TEMPADDR_SYSTEM -1 /* follow the system default */ +#define IP6PO_TEMPADDR_NOTPREFER 0 /* not prefer temporary address */ +#define IP6PO_TEMPADDR_PREFER 1 /* prefer temporary address */ + + int ip6po_flags; +#if 0 /* parameters in this block is obsolete. do not reuse the values. */ +#define IP6PO_REACHCONF 0x01 /* upper-layer reachability confirmation. */ +#define IP6PO_MINMTU 0x02 /* use minimum MTU (IPV6_USE_MIN_MTU) */ +#endif +#define IP6PO_DONTFRAG 0x04 /* disable fragmentation (IPV6_DONTFRAG) */ +#define IP6PO_USECOA 0x08 /* use care of address */ +}; + +/* + * Control options for incoming packets + */ + +struct ip6stat { + u_quad_t ip6s_total; /* total packets received */ + u_quad_t ip6s_tooshort; /* packet too short */ + u_quad_t ip6s_toosmall; /* not enough data */ + u_quad_t ip6s_fragments; /* fragments received */ + u_quad_t ip6s_fragdropped; /* frags dropped(dups, out of space) */ + u_quad_t ip6s_fragtimeout; /* fragments timed out */ + u_quad_t ip6s_fragoverflow; /* fragments that exceeded limit */ + u_quad_t ip6s_forward; /* packets forwarded */ + u_quad_t ip6s_cantforward; /* packets rcvd for unreachable dest */ + u_quad_t ip6s_redirectsent; /* packets forwarded on same net */ + u_quad_t ip6s_delivered; /* datagrams delivered to upper level*/ + u_quad_t ip6s_localout; /* total ip packets generated here */ + u_quad_t ip6s_odropped; /* lost packets due to nobufs, etc. */ + u_quad_t ip6s_reassembled; /* total packets reassembled ok */ + u_quad_t ip6s_fragmented; /* datagrams successfully fragmented */ + u_quad_t ip6s_ofragments; /* output fragments created */ + u_quad_t ip6s_cantfrag; /* don't fragment flag was set, etc. */ + u_quad_t ip6s_badoptions; /* error in option processing */ + u_quad_t ip6s_noroute; /* packets discarded due to no route */ + u_quad_t ip6s_badvers; /* ip6 version != 6 */ + u_quad_t ip6s_rawout; /* total raw ip packets generated */ + u_quad_t ip6s_badscope; /* scope error */ + u_quad_t ip6s_notmember; /* don't join this multicast group */ + u_quad_t ip6s_nxthist[256]; /* next header history */ + u_quad_t ip6s_m1; /* one mbuf */ + u_quad_t ip6s_m2m[32]; /* two or more mbuf */ + u_quad_t ip6s_mext1; /* one ext mbuf */ + u_quad_t ip6s_mext2m; /* two or more ext mbuf */ + u_quad_t ip6s_exthdrtoolong; /* ext hdr are not continuous */ + u_quad_t ip6s_nogif; /* no match gif found */ + u_quad_t ip6s_toomanyhdr; /* discarded due to too many headers */ + + /* + * statistics for improvement of the source address selection + * algorithm: + * XXX: hardcoded 16 = # of ip6 multicast scope types + 1 + */ + /* number of times that address selection fails */ + u_quad_t ip6s_sources_none; + /* number of times that an address on the outgoing I/F is chosen */ + u_quad_t ip6s_sources_sameif[16]; + /* number of times that an address on a non-outgoing I/F is chosen */ + u_quad_t ip6s_sources_otherif[16]; + /* + * number of times that an address that has the same scope + * from the destination is chosen. + */ + u_quad_t ip6s_sources_samescope[16]; + /* + * number of times that an address that has a different scope + * from the destination is chosen. + */ + u_quad_t ip6s_sources_otherscope[16]; + /* number of times that a deprecated address is chosen */ + u_quad_t ip6s_sources_deprecated[16]; + + /* number of times that each rule of source selection is applied. */ + u_quad_t ip6s_sources_rule[16]; +}; + +#ifdef _KERNEL +#define IP6STAT_ADD(name, val) V_ip6stat.name += (val) +#define IP6STAT_SUB(name, val) V_ip6stat.name -= (val) +#define IP6STAT_INC(name) IP6STAT_ADD(name, 1) +#define IP6STAT_DEC(name) IP6STAT_SUB(name, 1) +#endif + +#ifdef _KERNEL +/* + * IPv6 onion peeling state. + * it will be initialized when we come into ip6_input(). + * XXX do not make it a kitchen sink! + */ +struct ip6aux { + u_int32_t ip6a_flags; +#define IP6A_SWAP 0x01 /* swapped home/care-of on packet */ +#define IP6A_HASEEN 0x02 /* HA was present */ +#define IP6A_BRUID 0x04 /* BR Unique Identifier was present */ +#define IP6A_RTALERTSEEN 0x08 /* rtalert present */ + + /* ip6.ip6_src */ + struct in6_addr ip6a_careof; /* care-of address of the peer */ + struct in6_addr ip6a_home; /* home address of the peer */ + u_int16_t ip6a_bruid; /* BR unique identifier */ + + /* ip6.ip6_dst */ + struct in6_ifaddr *ip6a_dstia6; /* my ifaddr that matches ip6_dst */ + + /* rtalert */ + u_int16_t ip6a_rtalert; /* rtalert option value */ + + /* + * decapsulation history will be here. + * with IPsec it may not be accurate. + */ +}; +#endif + +#ifdef _KERNEL +/* flags passed to ip6_output as last parameter */ +#define IPV6_UNSPECSRC 0x01 /* allow :: as the source address */ +#define IPV6_FORWARDING 0x02 /* most of IPv6 header exists */ +#define IPV6_MINMTU 0x04 /* use minimum MTU (IPV6_USE_MIN_MTU) */ + +#ifdef __NO_STRICT_ALIGNMENT +#define IP6_HDR_ALIGNED_P(ip) 1 +#else +#define IP6_HDR_ALIGNED_P(ip) ((((intptr_t) (ip)) & 3) == 0) +#endif + +VNET_DECLARE(struct ip6stat, ip6stat); /* statistics */ +VNET_DECLARE(int, ip6_defhlim); /* default hop limit */ +VNET_DECLARE(int, ip6_defmcasthlim); /* default multicast hop limit */ +VNET_DECLARE(int, ip6_forwarding); /* act as router? */ +VNET_DECLARE(int, ip6_use_deprecated); /* allow deprecated addr as source */ +VNET_DECLARE(int, ip6_rr_prune); /* router renumbering prefix + * walk list every 5 sec. */ +VNET_DECLARE(int, ip6_mcast_pmtu); /* enable pMTU discovery for multicast? */ +VNET_DECLARE(int, ip6_v6only); +#define V_ip6stat VNET(ip6stat) +#define V_ip6_defhlim VNET(ip6_defhlim) +#define V_ip6_defmcasthlim VNET(ip6_defmcasthlim) +#define V_ip6_forwarding VNET(ip6_forwarding) +#define V_ip6_use_deprecated VNET(ip6_use_deprecated) +#define V_ip6_rr_prune VNET(ip6_rr_prune) +#define V_ip6_mcast_pmtu VNET(ip6_mcast_pmtu) +#define V_ip6_v6only VNET(ip6_v6only) + +VNET_DECLARE(struct socket *, ip6_mrouter); /* multicast routing daemon */ +VNET_DECLARE(int, ip6_sendredirects); /* send IP redirects when forwarding? */ +VNET_DECLARE(int, ip6_maxfragpackets); /* Maximum packets in reassembly + * queue */ +VNET_DECLARE(int, ip6_maxfrags); /* Maximum fragments in reassembly + * queue */ +VNET_DECLARE(int, ip6_accept_rtadv); /* Acts as a host not a router */ +VNET_DECLARE(int, ip6_keepfaith); /* Firewall Aided Internet Translator */ +VNET_DECLARE(int, ip6_log_interval); +VNET_DECLARE(time_t, ip6_log_time); +VNET_DECLARE(int, ip6_hdrnestlimit); /* upper limit of # of extension + * headers */ +VNET_DECLARE(int, ip6_dad_count); /* DupAddrDetectionTransmits */ +#define V_ip6_mrouter VNET(ip6_mrouter) +#define V_ip6_sendredirects VNET(ip6_sendredirects) +#define V_ip6_maxfragpackets VNET(ip6_maxfragpackets) +#define V_ip6_maxfrags VNET(ip6_maxfrags) +#define V_ip6_accept_rtadv VNET(ip6_accept_rtadv) +#define V_ip6_keepfaith VNET(ip6_keepfaith) +#define V_ip6_log_interval VNET(ip6_log_interval) +#define V_ip6_log_time VNET(ip6_log_time) +#define V_ip6_hdrnestlimit VNET(ip6_hdrnestlimit) +#define V_ip6_dad_count VNET(ip6_dad_count) + +VNET_DECLARE(int, ip6_auto_flowlabel); +VNET_DECLARE(int, ip6_auto_linklocal); +#define V_ip6_auto_flowlabel VNET(ip6_auto_flowlabel) +#define V_ip6_auto_linklocal VNET(ip6_auto_linklocal) + +VNET_DECLARE(int, ip6_use_tempaddr); /* Whether to use temporary addresses */ +VNET_DECLARE(int, ip6_prefer_tempaddr); /* Whether to prefer temporary + * addresses in the source address + * selection */ +#define V_ip6_use_tempaddr VNET(ip6_use_tempaddr) +#define V_ip6_prefer_tempaddr VNET(ip6_prefer_tempaddr) + +VNET_DECLARE(int, ip6_use_defzone); /* Whether to use the default scope + * zone when unspecified */ +#define V_ip6_use_defzone VNET(ip6_use_defzone) + +VNET_DECLARE (struct pfil_head, inet6_pfil_hook); /* packet filter hooks */ +#define V_inet6_pfil_hook VNET(inet6_pfil_hook) +#ifdef IPSTEALTH +VNET_DECLARE(int, ip6stealth); +#define V_ip6stealth VNET(ip6stealth) +#endif + +extern struct pr_usrreqs rip6_usrreqs; +struct sockopt; + +struct inpcb; + +int icmp6_ctloutput __P((struct socket *, struct sockopt *sopt)); + +struct in6_ifaddr; +void ip6_init __P((void)); +#ifdef VIMAGE +void ip6_destroy __P((void)); +#endif +int ip6proto_register(short); +int ip6proto_unregister(short); + +void ip6_input __P((struct mbuf *)); +struct in6_ifaddr *ip6_getdstifaddr __P((struct mbuf *)); +void ip6_freepcbopts __P((struct ip6_pktopts *)); + +int ip6_unknown_opt __P((u_int8_t *, struct mbuf *, int)); +char * ip6_get_prevhdr __P((struct mbuf *, int)); +int ip6_nexthdr __P((struct mbuf *, int, int, int *)); +int ip6_lasthdr __P((struct mbuf *, int, int, int *)); + +struct ip6aux *ip6_addaux __P((struct mbuf *)); +struct ip6aux *ip6_findaux __P((struct mbuf *)); +void ip6_delaux __P((struct mbuf *)); + +extern int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *, + struct mbuf *); + +int ip6_process_hopopts __P((struct mbuf *, u_int8_t *, int, u_int32_t *, + u_int32_t *)); +struct mbuf **ip6_savecontrol_v4(struct inpcb *, struct mbuf *, + struct mbuf **, int *); +void ip6_savecontrol __P((struct inpcb *, struct mbuf *, struct mbuf **)); +void ip6_notify_pmtu __P((struct inpcb *, struct sockaddr_in6 *, + u_int32_t *)); +int ip6_sysctl __P((int *, u_int, void *, size_t *, void *, size_t)); + +void ip6_forward __P((struct mbuf *, int)); + +void ip6_mloopback __P((struct ifnet *, struct mbuf *, struct sockaddr_in6 *)); +int ip6_output __P((struct mbuf *, struct ip6_pktopts *, + struct route_in6 *, + int, + struct ip6_moptions *, struct ifnet **, + struct inpcb *)); +int ip6_ctloutput __P((struct socket *, struct sockopt *)); +int ip6_raw_ctloutput __P((struct socket *, struct sockopt *)); +void ip6_initpktopts __P((struct ip6_pktopts *)); +int ip6_setpktopts __P((struct mbuf *, struct ip6_pktopts *, + struct ip6_pktopts *, struct ucred *, int)); +void ip6_clearpktopts __P((struct ip6_pktopts *, int)); +struct ip6_pktopts *ip6_copypktopts __P((struct ip6_pktopts *, int)); +int ip6_optlen __P((struct inpcb *)); + +int route6_input __P((struct mbuf **, int *, int)); + +void frag6_init __P((void)); +int frag6_input __P((struct mbuf **, int *, int)); +void frag6_slowtimo __P((void)); +void frag6_drain __P((void)); + +void rip6_init __P((void)); +int rip6_input __P((struct mbuf **, int *, int)); +void rip6_ctlinput __P((int, struct sockaddr *, void *)); +int rip6_ctloutput __P((struct socket *, struct sockopt *)); +int rip6_output __P((struct mbuf *, ...)); +int rip6_usrreq __P((struct socket *, + int, struct mbuf *, struct mbuf *, struct mbuf *, struct thread *)); + +int dest6_input __P((struct mbuf **, int *, int)); +int none_input __P((struct mbuf **, int *, int)); + +int in6_selectsrc(struct sockaddr_in6 *, struct ip6_pktopts *, + struct inpcb *inp, struct route_in6 *, struct ucred *cred, + struct ifnet **, struct in6_addr *); +int in6_selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *, + struct ip6_moptions *, struct route_in6 *, struct ifnet **, + struct rtentry **)); +u_int32_t ip6_randomid __P((void)); +u_int32_t ip6_randomflowlabel __P((void)); +#endif /* _KERNEL */ + +#endif /* !_NETINET6_IP6_VAR_HH_ */ diff --git a/freebsd/sys/netinet6/ip6protosw.h b/freebsd/sys/netinet6/ip6protosw.h new file mode 100644 index 00000000..ebb38c8d --- /dev/null +++ b/freebsd/sys/netinet6/ip6protosw.h @@ -0,0 +1,148 @@ +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: ip6protosw.h,v 1.25 2001/09/26 06:13:03 keiichi Exp $ + */ + +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)protosw.h 8.1 (Berkeley) 6/2/93 + * BSDI protosw.h,v 2.3 1996/10/11 16:02:40 pjd Exp + * $FreeBSD$ + */ + +#ifndef _NETINET6_IP6PROTOSW_HH_ +#define _NETINET6_IP6PROTOSW_HH_ + +/* + * Protocol switch table for IPv6. + * All other definitions should refer to sys/protosw.h + */ + +struct mbuf; +struct sockaddr; +struct socket; +struct domain; +struct thread; +struct ip6_hdr; +struct icmp6_hdr; +struct in6_addr; +struct pr_usrreqs; + +/* + * argument type for the last arg of pr_ctlinput(). + * should be consulted only with AF_INET6 family. + * + * IPv6 ICMP IPv6 [exthdrs] finalhdr payload + * ^ ^ ^ ^ + * | | ip6c_ip6 ip6c_off + * | ip6c_icmp6 + * ip6c_m + * + * ip6c_finaldst usually points to ip6c_ip6->ip6_dst. if the original + * (internal) packet carries a routing header, it may point the final + * dstination address in the routing header. + * + * ip6c_src: ip6c_ip6->ip6_src + scope info + flowlabel in ip6c_ip6 + * (beware of flowlabel, if you try to compare it against others) + * ip6c_dst: ip6c_finaldst + scope info + */ +struct ip6ctlparam { + struct mbuf *ip6c_m; /* start of mbuf chain */ + struct icmp6_hdr *ip6c_icmp6; /* icmp6 header of target packet */ + struct ip6_hdr *ip6c_ip6; /* ip6 header of target packet */ + int ip6c_off; /* offset of the target proto header */ + struct sockaddr_in6 *ip6c_src; /* srcaddr w/ additional info */ + struct sockaddr_in6 *ip6c_dst; /* (final) dstaddr w/ additional info */ + struct in6_addr *ip6c_finaldst; /* final destination address */ + void *ip6c_cmdarg; /* control command dependent data */ + u_int8_t ip6c_nxt; /* final next header field */ +}; + +struct ip6protosw { + short pr_type; /* socket type used for */ + struct domain *pr_domain; /* domain protocol a member of */ + short pr_protocol; /* protocol number */ + short pr_flags; /* see below */ + +/* protocol-protocol hooks */ + int (*pr_input) /* input to protocol (from below) */ + __P((struct mbuf **, int *, int)); + int (*pr_output) /* output to protocol (from above) */ + __P((struct mbuf *, ...)); + void (*pr_ctlinput) /* control input (from below) */ + __P((int, struct sockaddr *, void *)); + int (*pr_ctloutput) /* control output (from above) */ + __P((struct socket *, struct sockopt *)); + +/* utility hooks */ + void (*pr_init) /* initialization hook */ + __P((void)); + void (*pr_destroy) /* cleanup hook */ + __P((void)); + + void (*pr_fasttimo) /* fast timeout (200ms) */ + __P((void)); + void (*pr_slowtimo) /* slow timeout (500ms) */ + __P((void)); + void (*pr_drain) /* flush any excess space possible */ + __P((void)); + struct pr_usrreqs *pr_usrreqs; /* supersedes pr_usrreq() */ +}; + +#ifdef _KERNEL +extern struct ip6protosw inet6sw[]; +#endif + +#endif /* !_NETINET6_IP6PROTOSW_HH_ */ diff --git a/freebsd/sys/netinet6/mld6.c b/freebsd/sys/netinet6/mld6.c new file mode 100644 index 00000000..a08f0f16 --- /dev/null +++ b/freebsd/sys/netinet6/mld6.c @@ -0,0 +1,3311 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (c) 2009 Bruce Simpson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: mld6.c,v 1.27 2001/04/04 05:17:30 itojun Exp $ + */ + +/*- + * Copyright (c) 1988 Stephen Deering. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Stephen Deering of Stanford University. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)igmp.c 8.1 (Berkeley) 7/19/93 + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/local/opt_inet.h> +#include <freebsd/local/opt_inet6.h> + +#include <freebsd/sys/param.h> +#include <freebsd/sys/systm.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/protosw.h> +#include <freebsd/sys/sysctl.h> +#include <freebsd/sys/kernel.h> +#include <freebsd/sys/callout.h> +#include <freebsd/sys/malloc.h> +#include <freebsd/sys/module.h> +#include <freebsd/sys/ktr.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/route.h> +#include <freebsd/net/vnet.h> + +#include <freebsd/netinet/in.h> +#include <freebsd/netinet/in_var.h> +#include <freebsd/netinet6/in6_var.h> +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet6/scope6_var.h> +#include <freebsd/netinet/icmp6.h> +#include <freebsd/netinet6/mld6.h> +#include <freebsd/netinet6/mld6_var.h> + +#include <freebsd/security/mac/mac_framework.h> + +#ifndef KTR_MLD +#define KTR_MLD KTR_INET6 +#endif + +static struct mld_ifinfo * + mli_alloc_locked(struct ifnet *); +static void mli_delete_locked(const struct ifnet *); +static void mld_dispatch_packet(struct mbuf *); +static void mld_dispatch_queue(struct ifqueue *, int); +static void mld_final_leave(struct in6_multi *, struct mld_ifinfo *); +static void mld_fasttimo_vnet(void); +static int mld_handle_state_change(struct in6_multi *, + struct mld_ifinfo *); +static int mld_initial_join(struct in6_multi *, struct mld_ifinfo *, + const int); +#ifdef KTR +static char * mld_rec_type_to_str(const int); +#endif +static void mld_set_version(struct mld_ifinfo *, const int); +static void mld_slowtimo_vnet(void); +static int mld_v1_input_query(struct ifnet *, const struct ip6_hdr *, + /*const*/ struct mld_hdr *); +static int mld_v1_input_report(struct ifnet *, const struct ip6_hdr *, + /*const*/ struct mld_hdr *); +static void mld_v1_process_group_timer(struct in6_multi *, const int); +static void mld_v1_process_querier_timers(struct mld_ifinfo *); +static int mld_v1_transmit_report(struct in6_multi *, const int); +static void mld_v1_update_group(struct in6_multi *, const int); +static void mld_v2_cancel_link_timers(struct mld_ifinfo *); +static void mld_v2_dispatch_general_query(struct mld_ifinfo *); +static struct mbuf * + mld_v2_encap_report(struct ifnet *, struct mbuf *); +static int mld_v2_enqueue_filter_change(struct ifqueue *, + struct in6_multi *); +static int mld_v2_enqueue_group_record(struct ifqueue *, + struct in6_multi *, const int, const int, const int, + const int); +static int mld_v2_input_query(struct ifnet *, const struct ip6_hdr *, + struct mbuf *, const int, const int); +static int mld_v2_merge_state_changes(struct in6_multi *, + struct ifqueue *); +static void mld_v2_process_group_timers(struct mld_ifinfo *, + struct ifqueue *, struct ifqueue *, + struct in6_multi *, const int); +static int mld_v2_process_group_query(struct in6_multi *, + struct mld_ifinfo *mli, int, struct mbuf *, const int); +static int sysctl_mld_gsr(SYSCTL_HANDLER_ARGS); +static int sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS); + +/* + * Normative references: RFC 2710, RFC 3590, RFC 3810. + * + * Locking: + * * The MLD subsystem lock ends up being system-wide for the moment, + * but could be per-VIMAGE later on. + * * The permitted lock order is: IN6_MULTI_LOCK, MLD_LOCK, IF_ADDR_LOCK. + * Any may be taken independently; if any are held at the same + * time, the above lock order must be followed. + * * IN6_MULTI_LOCK covers in_multi. + * * MLD_LOCK covers per-link state and any global variables in this file. + * * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of + * per-link state iterators. + * + * XXX LOR PREVENTION + * A special case for IPv6 is the in6_setscope() routine. ip6_output() + * will not accept an ifp; it wants an embedded scope ID, unlike + * ip_output(), which happily takes the ifp given to it. The embedded + * scope ID is only used by MLD to select the outgoing interface. + * + * During interface attach and detach, MLD will take MLD_LOCK *after* + * the IF_AFDATA_LOCK. + * As in6_setscope() takes IF_AFDATA_LOCK then SCOPE_LOCK, we can't call + * it with MLD_LOCK held without triggering an LOR. A netisr with indirect + * dispatch could work around this, but we'd rather not do that, as it + * can introduce other races. + * + * As such, we exploit the fact that the scope ID is just the interface + * index, and embed it in the IPv6 destination address accordingly. + * This is potentially NOT VALID for MLDv1 reports, as they + * are always sent to the multicast group itself; as MLDv2 + * reports are always sent to ff02::16, this is not an issue + * when MLDv2 is in use. + * + * This does not however eliminate the LOR when ip6_output() itself + * calls in6_setscope() internally whilst MLD_LOCK is held. This will + * trigger a LOR warning in WITNESS when the ifnet is detached. + * + * The right answer is probably to make IF_AFDATA_LOCK an rwlock, given + * how it's used across the network stack. Here we're simply exploiting + * the fact that MLD runs at a similar layer in the stack to scope6.c. + * + * VIMAGE: + * * Each in6_multi corresponds to an ifp, and each ifp corresponds + * to a vnet in ifp->if_vnet. + */ +static struct mtx mld_mtx; +MALLOC_DEFINE(M_MLD, "mld", "mld state"); + +#define MLD_EMBEDSCOPE(pin6, zoneid) \ + if (IN6_IS_SCOPE_LINKLOCAL(pin6) || \ + IN6_IS_ADDR_MC_INTFACELOCAL(pin6)) \ + (pin6)->s6_addr16[1] = htons((zoneid) & 0xFFFF) \ + +/* + * VIMAGE-wide globals. + */ +static VNET_DEFINE(struct timeval, mld_gsrdelay) = {10, 0}; +static VNET_DEFINE(LIST_HEAD(, mld_ifinfo), mli_head); +static VNET_DEFINE(int, interface_timers_running6); +static VNET_DEFINE(int, state_change_timers_running6); +static VNET_DEFINE(int, current_state_timers_running6); + +#define V_mld_gsrdelay VNET(mld_gsrdelay) +#define V_mli_head VNET(mli_head) +#define V_interface_timers_running6 VNET(interface_timers_running6) +#define V_state_change_timers_running6 VNET(state_change_timers_running6) +#define V_current_state_timers_running6 VNET(current_state_timers_running6) + +SYSCTL_DECL(_net_inet6); /* Note: Not in any common header. */ + +SYSCTL_NODE(_net_inet6, OID_AUTO, mld, CTLFLAG_RW, 0, + "IPv6 Multicast Listener Discovery"); + +/* + * Virtualized sysctls. + */ +SYSCTL_VNET_PROC(_net_inet6_mld, OID_AUTO, gsrdelay, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, + &VNET_NAME(mld_gsrdelay.tv_sec), 0, sysctl_mld_gsr, "I", + "Rate limit for MLDv2 Group-and-Source queries in seconds"); + +/* + * Non-virtualized sysctls. + */ +SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_MPSAFE, + sysctl_mld_ifinfo, "Per-interface MLDv2 state"); + +static int mld_v1enable = 1; +SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RW, + &mld_v1enable, 0, "Enable fallback to MLDv1"); +TUNABLE_INT("net.inet6.mld.v1enable", &mld_v1enable); + +static int mld_use_allow = 1; +SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RW, + &mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves"); +TUNABLE_INT("net.inet6.mld.use_allow", &mld_use_allow); + +/* + * Packed Router Alert option structure declaration. + */ +struct mld_raopt { + struct ip6_hbh hbh; + struct ip6_opt pad; + struct ip6_opt_router ra; +} __packed; + +/* + * Router Alert hop-by-hop option header. + */ +static struct mld_raopt mld_ra = { + .hbh = { 0, 0 }, + .pad = { .ip6o_type = IP6OPT_PADN, 0 }, + .ra = { + .ip6or_type = IP6OPT_ROUTER_ALERT, + .ip6or_len = IP6OPT_RTALERT_LEN - 2, + .ip6or_value[0] = ((IP6OPT_RTALERT_MLD >> 8) & 0xFF), + .ip6or_value[1] = (IP6OPT_RTALERT_MLD & 0xFF) + } +}; +static struct ip6_pktopts mld_po; + +static __inline void +mld_save_context(struct mbuf *m, struct ifnet *ifp) +{ + +#ifdef VIMAGE + m->m_pkthdr.header = ifp->if_vnet; +#endif /* VIMAGE */ + m->m_pkthdr.flowid = ifp->if_index; +} + +static __inline void +mld_scrub_context(struct mbuf *m) +{ + + m->m_pkthdr.header = NULL; + m->m_pkthdr.flowid = 0; +} + +/* + * Restore context from a queued output chain. + * Return saved ifindex. + * + * VIMAGE: The assertion is there to make sure that we + * actually called CURVNET_SET() with what's in the mbuf chain. + */ +static __inline uint32_t +mld_restore_context(struct mbuf *m) +{ + +#if defined(VIMAGE) && defined(INVARIANTS) + KASSERT(curvnet == m->m_pkthdr.header, + ("%s: called when curvnet was not restored", __func__)); +#endif + return (m->m_pkthdr.flowid); +} + +/* + * Retrieve or set threshold between group-source queries in seconds. + * + * VIMAGE: Assume curvnet set by caller. + * SMPng: NOTE: Serialized by MLD lock. + */ +static int +sysctl_mld_gsr(SYSCTL_HANDLER_ARGS) +{ + int error; + int i; + + error = sysctl_wire_old_buffer(req, sizeof(int)); + if (error) + return (error); + + MLD_LOCK(); + + i = V_mld_gsrdelay.tv_sec; + + error = sysctl_handle_int(oidp, &i, 0, req); + if (error || !req->newptr) + goto out_locked; + + if (i < -1 || i >= 60) { + error = EINVAL; + goto out_locked; + } + + CTR2(KTR_MLD, "change mld_gsrdelay from %d to %d", + V_mld_gsrdelay.tv_sec, i); + V_mld_gsrdelay.tv_sec = i; + +out_locked: + MLD_UNLOCK(); + return (error); +} + +/* + * Expose struct mld_ifinfo to userland, keyed by ifindex. + * For use by ifmcstat(8). + * + * SMPng: NOTE: Does an unlocked ifindex space read. + * VIMAGE: Assume curvnet set by caller. The node handler itself + * is not directly virtualized. + */ +static int +sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS) +{ + int *name; + int error; + u_int namelen; + struct ifnet *ifp; + struct mld_ifinfo *mli; + + name = (int *)arg1; + namelen = arg2; + + if (req->newptr != NULL) + return (EPERM); + + if (namelen != 1) + return (EINVAL); + + error = sysctl_wire_old_buffer(req, sizeof(struct mld_ifinfo)); + if (error) + return (error); + + IN6_MULTI_LOCK(); + MLD_LOCK(); + + if (name[0] <= 0 || name[0] > V_if_index) { + error = ENOENT; + goto out_locked; + } + + error = ENOENT; + + ifp = ifnet_byindex(name[0]); + if (ifp == NULL) + goto out_locked; + + LIST_FOREACH(mli, &V_mli_head, mli_link) { + if (ifp == mli->mli_ifp) { + error = SYSCTL_OUT(req, mli, + sizeof(struct mld_ifinfo)); + break; + } + } + +out_locked: + MLD_UNLOCK(); + IN6_MULTI_UNLOCK(); + return (error); +} + +/* + * Dispatch an entire queue of pending packet chains. + * VIMAGE: Assumes the vnet pointer has been set. + */ +static void +mld_dispatch_queue(struct ifqueue *ifq, int limit) +{ + struct mbuf *m; + + for (;;) { + _IF_DEQUEUE(ifq, m); + if (m == NULL) + break; + CTR3(KTR_MLD, "%s: dispatch %p from %p", __func__, ifq, m); + mld_dispatch_packet(m); + if (--limit == 0) + break; + } +} + +/* + * Filter outgoing MLD report state by group. + * + * Reports are ALWAYS suppressed for ALL-HOSTS (ff02::1) + * and node-local addresses. However, kernel and socket consumers + * always embed the KAME scope ID in the address provided, so strip it + * when performing comparison. + * Note: This is not the same as the *multicast* scope. + * + * Return zero if the given group is one for which MLD reports + * should be suppressed, or non-zero if reports should be issued. + */ +static __inline int +mld_is_addr_reported(const struct in6_addr *addr) +{ + + KASSERT(IN6_IS_ADDR_MULTICAST(addr), ("%s: not multicast", __func__)); + + if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_NODELOCAL) + return (0); + + if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_LINKLOCAL) { + struct in6_addr tmp = *addr; + in6_clearscope(&tmp); + if (IN6_ARE_ADDR_EQUAL(&tmp, &in6addr_linklocal_allnodes)) + return (0); + } + + return (1); +} + +/* + * Attach MLD when PF_INET6 is attached to an interface. + * + * SMPng: Normally called with IF_AFDATA_LOCK held. + */ +struct mld_ifinfo * +mld_domifattach(struct ifnet *ifp) +{ + struct mld_ifinfo *mli; + + CTR3(KTR_MLD, "%s: called for ifp %p(%s)", + __func__, ifp, ifp->if_xname); + + MLD_LOCK(); + + mli = mli_alloc_locked(ifp); + if (!(ifp->if_flags & IFF_MULTICAST)) + mli->mli_flags |= MLIF_SILENT; + if (mld_use_allow) + mli->mli_flags |= MLIF_USEALLOW; + + MLD_UNLOCK(); + + return (mli); +} + +/* + * VIMAGE: assume curvnet set by caller. + */ +static struct mld_ifinfo * +mli_alloc_locked(/*const*/ struct ifnet *ifp) +{ + struct mld_ifinfo *mli; + + MLD_LOCK_ASSERT(); + + mli = malloc(sizeof(struct mld_ifinfo), M_MLD, M_NOWAIT|M_ZERO); + if (mli == NULL) + goto out; + + mli->mli_ifp = ifp; + mli->mli_version = MLD_VERSION_2; + mli->mli_flags = 0; + mli->mli_rv = MLD_RV_INIT; + mli->mli_qi = MLD_QI_INIT; + mli->mli_qri = MLD_QRI_INIT; + mli->mli_uri = MLD_URI_INIT; + + SLIST_INIT(&mli->mli_relinmhead); + + /* + * Responses to general queries are subject to bounds. + */ + IFQ_SET_MAXLEN(&mli->mli_gq, MLD_MAX_RESPONSE_PACKETS); + + LIST_INSERT_HEAD(&V_mli_head, mli, mli_link); + + CTR2(KTR_MLD, "allocate mld_ifinfo for ifp %p(%s)", + ifp, ifp->if_xname); + +out: + return (mli); +} + +/* + * Hook for ifdetach. + * + * NOTE: Some finalization tasks need to run before the protocol domain + * is detached, but also before the link layer does its cleanup. + * Run before link-layer cleanup; cleanup groups, but do not free MLD state. + * + * SMPng: Caller must hold IN6_MULTI_LOCK(). + * Must take IF_ADDR_LOCK() to cover if_multiaddrs iterator. + * XXX This routine is also bitten by unlocked ifma_protospec access. + */ +void +mld_ifdetach(struct ifnet *ifp) +{ + struct mld_ifinfo *mli; + struct ifmultiaddr *ifma; + struct in6_multi *inm, *tinm; + + CTR3(KTR_MLD, "%s: called for ifp %p(%s)", __func__, ifp, + ifp->if_xname); + + IN6_MULTI_LOCK_ASSERT(); + MLD_LOCK(); + + mli = MLD_IFINFO(ifp); + if (mli->mli_version == MLD_VERSION_2) { + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_INET6 || + ifma->ifma_protospec == NULL) + continue; + inm = (struct in6_multi *)ifma->ifma_protospec; + if (inm->in6m_state == MLD_LEAVING_MEMBER) { + SLIST_INSERT_HEAD(&mli->mli_relinmhead, + inm, in6m_nrele); + } + in6m_clear_recorded(inm); + } + IF_ADDR_UNLOCK(ifp); + SLIST_FOREACH_SAFE(inm, &mli->mli_relinmhead, in6m_nrele, + tinm) { + SLIST_REMOVE_HEAD(&mli->mli_relinmhead, in6m_nrele); + in6m_release_locked(inm); + } + } + + MLD_UNLOCK(); +} + +/* + * Hook for domifdetach. + * Runs after link-layer cleanup; free MLD state. + * + * SMPng: Normally called with IF_AFDATA_LOCK held. + */ +void +mld_domifdetach(struct ifnet *ifp) +{ + + CTR3(KTR_MLD, "%s: called for ifp %p(%s)", + __func__, ifp, ifp->if_xname); + + MLD_LOCK(); + mli_delete_locked(ifp); + MLD_UNLOCK(); +} + +static void +mli_delete_locked(const struct ifnet *ifp) +{ + struct mld_ifinfo *mli, *tmli; + + CTR3(KTR_MLD, "%s: freeing mld_ifinfo for ifp %p(%s)", + __func__, ifp, ifp->if_xname); + + MLD_LOCK_ASSERT(); + + LIST_FOREACH_SAFE(mli, &V_mli_head, mli_link, tmli) { + if (mli->mli_ifp == ifp) { + /* + * Free deferred General Query responses. + */ + _IF_DRAIN(&mli->mli_gq); + + LIST_REMOVE(mli, mli_link); + + KASSERT(SLIST_EMPTY(&mli->mli_relinmhead), + ("%s: there are dangling in_multi references", + __func__)); + + free(mli, M_MLD); + return; + } + } +#ifdef INVARIANTS + panic("%s: mld_ifinfo not found for ifp %p\n", __func__, ifp); +#endif +} + +/* + * Process a received MLDv1 general or address-specific query. + * Assumes that the query header has been pulled up to sizeof(mld_hdr). + * + * NOTE: Can't be fully const correct as we temporarily embed scope ID in + * mld_addr. This is OK as we own the mbuf chain. + */ +static int +mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, + /*const*/ struct mld_hdr *mld) +{ + struct ifmultiaddr *ifma; + struct mld_ifinfo *mli; + struct in6_multi *inm; + int is_general_query; + uint16_t timer; +#ifdef KTR + char ip6tbuf[INET6_ADDRSTRLEN]; +#endif + + is_general_query = 0; + + if (!mld_v1enable) { + CTR3(KTR_MLD, "ignore v1 query %s on ifp %p(%s)", + ip6_sprintf(ip6tbuf, &mld->mld_addr), + ifp, ifp->if_xname); + return (0); + } + + /* + * RFC3810 Section 6.2: MLD queries must originate from + * a router's link-local address. + */ + if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { + CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", + ip6_sprintf(ip6tbuf, &ip6->ip6_src), + ifp, ifp->if_xname); + return (0); + } + + /* + * Do address field validation upfront before we accept + * the query. + */ + if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) { + /* + * MLDv1 General Query. + * If this was not sent to the all-nodes group, ignore it. + */ + struct in6_addr dst; + + dst = ip6->ip6_dst; + in6_clearscope(&dst); + if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes)) + return (EINVAL); + is_general_query = 1; + } else { + /* + * Embed scope ID of receiving interface in MLD query for + * lookup whilst we don't hold other locks. + */ + in6_setscope(&mld->mld_addr, ifp, NULL); + } + + IN6_MULTI_LOCK(); + MLD_LOCK(); + IF_ADDR_LOCK(ifp); + + /* + * Switch to MLDv1 host compatibility mode. + */ + mli = MLD_IFINFO(ifp); + KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp)); + mld_set_version(mli, MLD_VERSION_1); + + timer = (ntohs(mld->mld_maxdelay) * PR_FASTHZ) / MLD_TIMER_SCALE; + if (timer == 0) + timer = 1; + + if (is_general_query) { + /* + * For each reporting group joined on this + * interface, kick the report timer. + */ + CTR2(KTR_MLD, "process v1 general query on ifp %p(%s)", + ifp, ifp->if_xname); + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_INET6 || + ifma->ifma_protospec == NULL) + continue; + inm = (struct in6_multi *)ifma->ifma_protospec; + mld_v1_update_group(inm, timer); + } + } else { + /* + * MLDv1 Group-Specific Query. + * If this is a group-specific MLDv1 query, we need only + * look up the single group to process it. + */ + inm = in6m_lookup_locked(ifp, &mld->mld_addr); + if (inm != NULL) { + CTR3(KTR_MLD, "process v1 query %s on ifp %p(%s)", + ip6_sprintf(ip6tbuf, &mld->mld_addr), + ifp, ifp->if_xname); + mld_v1_update_group(inm, timer); + } + /* XXX Clear embedded scope ID as userland won't expect it. */ + in6_clearscope(&mld->mld_addr); + } + + IF_ADDR_UNLOCK(ifp); + MLD_UNLOCK(); + IN6_MULTI_UNLOCK(); + + return (0); +} + +/* + * Update the report timer on a group in response to an MLDv1 query. + * + * If we are becoming the reporting member for this group, start the timer. + * If we already are the reporting member for this group, and timer is + * below the threshold, reset it. + * + * We may be updating the group for the first time since we switched + * to MLDv2. If we are, then we must clear any recorded source lists, + * and transition to REPORTING state; the group timer is overloaded + * for group and group-source query responses. + * + * Unlike MLDv2, the delay per group should be jittered + * to avoid bursts of MLDv1 reports. + */ +static void +mld_v1_update_group(struct in6_multi *inm, const int timer) +{ +#ifdef KTR + char ip6tbuf[INET6_ADDRSTRLEN]; +#endif + + CTR4(KTR_MLD, "%s: %s/%s timer=%d", __func__, + ip6_sprintf(ip6tbuf, &inm->in6m_addr), + inm->in6m_ifp->if_xname, timer); + + IN6_MULTI_LOCK_ASSERT(); + + switch (inm->in6m_state) { + case MLD_NOT_MEMBER: + case MLD_SILENT_MEMBER: + break; + case MLD_REPORTING_MEMBER: + if (inm->in6m_timer != 0 && + inm->in6m_timer <= timer) { + CTR1(KTR_MLD, "%s: REPORTING and timer running, " + "skipping.", __func__); + break; + } + /* FALLTHROUGH */ + case MLD_SG_QUERY_PENDING_MEMBER: + case MLD_G_QUERY_PENDING_MEMBER: + case MLD_IDLE_MEMBER: + case MLD_LAZY_MEMBER: + case MLD_AWAKENING_MEMBER: + CTR1(KTR_MLD, "%s: ->REPORTING", __func__); + inm->in6m_state = MLD_REPORTING_MEMBER; + inm->in6m_timer = MLD_RANDOM_DELAY(timer); + V_current_state_timers_running6 = 1; + break; + case MLD_SLEEPING_MEMBER: + CTR1(KTR_MLD, "%s: ->AWAKENING", __func__); + inm->in6m_state = MLD_AWAKENING_MEMBER; + break; + case MLD_LEAVING_MEMBER: + break; + } +} + +/* + * Process a received MLDv2 general, group-specific or + * group-and-source-specific query. + * + * Assumes that the query header has been pulled up to sizeof(mldv2_query). + * + * Return 0 if successful, otherwise an appropriate error code is returned. + */ +static int +mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, + struct mbuf *m, const int off, const int icmp6len) +{ + struct mld_ifinfo *mli; + struct mldv2_query *mld; + struct in6_multi *inm; + uint32_t maxdelay, nsrc, qqi; + int is_general_query; + uint16_t timer; + uint8_t qrv; +#ifdef KTR + char ip6tbuf[INET6_ADDRSTRLEN]; +#endif + + is_general_query = 0; + + /* + * RFC3810 Section 6.2: MLD queries must originate from + * a router's link-local address. + */ + if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { + CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", + ip6_sprintf(ip6tbuf, &ip6->ip6_src), + ifp, ifp->if_xname); + return (0); + } + + CTR2(KTR_MLD, "input v2 query on ifp %p(%s)", ifp, ifp->if_xname); + + mld = (struct mldv2_query *)(mtod(m, uint8_t *) + off); + + maxdelay = ntohs(mld->mld_maxdelay); /* in 1/10ths of a second */ + if (maxdelay >= 32678) { + maxdelay = (MLD_MRC_MANT(maxdelay) | 0x1000) << + (MLD_MRC_EXP(maxdelay) + 3); + } + timer = (maxdelay * PR_FASTHZ) / MLD_TIMER_SCALE; + if (timer == 0) + timer = 1; + + qrv = MLD_QRV(mld->mld_misc); + if (qrv < 2) { + CTR3(KTR_MLD, "%s: clamping qrv %d to %d", __func__, + qrv, MLD_RV_INIT); + qrv = MLD_RV_INIT; + } + + qqi = mld->mld_qqi; + if (qqi >= 128) { + qqi = MLD_QQIC_MANT(mld->mld_qqi) << + (MLD_QQIC_EXP(mld->mld_qqi) + 3); + } + + nsrc = ntohs(mld->mld_numsrc); + if (nsrc > MLD_MAX_GS_SOURCES) + return (EMSGSIZE); + if (icmp6len < sizeof(struct mldv2_query) + + (nsrc * sizeof(struct in6_addr))) + return (EMSGSIZE); + + /* + * Do further input validation upfront to avoid resetting timers + * should we need to discard this query. + */ + if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) { + /* + * General Queries SHOULD be directed to ff02::1. + * A general query with a source list has undefined + * behaviour; discard it. + */ + struct in6_addr dst; + + dst = ip6->ip6_dst; + in6_clearscope(&dst); + if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes) || + nsrc > 0) + return (EINVAL); + is_general_query = 1; + } else { + /* + * Embed scope ID of receiving interface in MLD query for + * lookup whilst we don't hold other locks (due to KAME + * locking lameness). We own this mbuf chain just now. + */ + in6_setscope(&mld->mld_addr, ifp, NULL); + } + + IN6_MULTI_LOCK(); + MLD_LOCK(); + IF_ADDR_LOCK(ifp); + + mli = MLD_IFINFO(ifp); + KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp)); + + /* + * Discard the v2 query if we're in Compatibility Mode. + * The RFC is pretty clear that hosts need to stay in MLDv1 mode + * until the Old Version Querier Present timer expires. + */ + if (mli->mli_version != MLD_VERSION_2) + goto out_locked; + + mld_set_version(mli, MLD_VERSION_2); + mli->mli_rv = qrv; + mli->mli_qi = qqi; + mli->mli_qri = maxdelay; + + CTR4(KTR_MLD, "%s: qrv %d qi %d maxdelay %d", __func__, qrv, qqi, + maxdelay); + + if (is_general_query) { + /* + * MLDv2 General Query. + * + * Schedule a current-state report on this ifp for + * all groups, possibly containing source lists. + * + * If there is a pending General Query response + * scheduled earlier than the selected delay, do + * not schedule any other reports. + * Otherwise, reset the interface timer. + */ + CTR2(KTR_MLD, "process v2 general query on ifp %p(%s)", + ifp, ifp->if_xname); + if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) { + mli->mli_v2_timer = MLD_RANDOM_DELAY(timer); + V_interface_timers_running6 = 1; + } + } else { + /* + * MLDv2 Group-specific or Group-and-source-specific Query. + * + * Group-source-specific queries are throttled on + * a per-group basis to defeat denial-of-service attempts. + * Queries for groups we are not a member of on this + * link are simply ignored. + */ + inm = in6m_lookup_locked(ifp, &mld->mld_addr); + if (inm == NULL) + goto out_locked; + if (nsrc > 0) { + if (!ratecheck(&inm->in6m_lastgsrtv, + &V_mld_gsrdelay)) { + CTR1(KTR_MLD, "%s: GS query throttled.", + __func__); + goto out_locked; + } + } + CTR2(KTR_MLD, "process v2 group query on ifp %p(%s)", + ifp, ifp->if_xname); + /* + * If there is a pending General Query response + * scheduled sooner than the selected delay, no + * further report need be scheduled. + * Otherwise, prepare to respond to the + * group-specific or group-and-source query. + */ + if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) + mld_v2_process_group_query(inm, mli, timer, m, off); + + /* XXX Clear embedded scope ID as userland won't expect it. */ + in6_clearscope(&mld->mld_addr); + } + +out_locked: + IF_ADDR_UNLOCK(ifp); + MLD_UNLOCK(); + IN6_MULTI_UNLOCK(); + + return (0); +} + +/* + * Process a recieved MLDv2 group-specific or group-and-source-specific + * query. + * Return <0 if any error occured. Currently this is ignored. + */ +static int +mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifinfo *mli, + int timer, struct mbuf *m0, const int off) +{ + struct mldv2_query *mld; + int retval; + uint16_t nsrc; + + IN6_MULTI_LOCK_ASSERT(); + MLD_LOCK_ASSERT(); + + retval = 0; + mld = (struct mldv2_query *)(mtod(m0, uint8_t *) + off); + + switch (inm->in6m_state) { + case MLD_NOT_MEMBER: + case MLD_SILENT_MEMBER: + case MLD_SLEEPING_MEMBER: + case MLD_LAZY_MEMBER: + case MLD_AWAKENING_MEMBER: + case MLD_IDLE_MEMBER: + case MLD_LEAVING_MEMBER: + return (retval); + break; + case MLD_REPORTING_MEMBER: + case MLD_G_QUERY_PENDING_MEMBER: + case MLD_SG_QUERY_PENDING_MEMBER: + break; + } + + nsrc = ntohs(mld->mld_numsrc); + + /* + * Deal with group-specific queries upfront. + * If any group query is already pending, purge any recorded + * source-list state if it exists, and schedule a query response + * for this group-specific query. + */ + if (nsrc == 0) { + if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER || + inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) { + in6m_clear_recorded(inm); + timer = min(inm->in6m_timer, timer); + } + inm->in6m_state = MLD_G_QUERY_PENDING_MEMBER; + inm->in6m_timer = MLD_RANDOM_DELAY(timer); + V_current_state_timers_running6 = 1; + return (retval); + } + + /* + * Deal with the case where a group-and-source-specific query has + * been received but a group-specific query is already pending. + */ + if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER) { + timer = min(inm->in6m_timer, timer); + inm->in6m_timer = MLD_RANDOM_DELAY(timer); + V_current_state_timers_running6 = 1; + return (retval); + } + + /* + * Finally, deal with the case where a group-and-source-specific + * query has been received, where a response to a previous g-s-r + * query exists, or none exists. + * In this case, we need to parse the source-list which the Querier + * has provided us with and check if we have any source list filter + * entries at T1 for these sources. If we do not, there is no need + * schedule a report and the query may be dropped. + * If we do, we must record them and schedule a current-state + * report for those sources. + */ + if (inm->in6m_nsrc > 0) { + struct mbuf *m; + uint8_t *sp; + int i, nrecorded; + int soff; + + m = m0; + soff = off + sizeof(struct mldv2_query); + nrecorded = 0; + for (i = 0; i < nsrc; i++) { + sp = mtod(m, uint8_t *) + soff; + retval = in6m_record_source(inm, + (const struct in6_addr *)sp); + if (retval < 0) + break; + nrecorded += retval; + soff += sizeof(struct in6_addr); + if (soff >= m->m_len) { + soff = soff - m->m_len; + m = m->m_next; + if (m == NULL) + break; + } + } + if (nrecorded > 0) { + CTR1(KTR_MLD, + "%s: schedule response to SG query", __func__); + inm->in6m_state = MLD_SG_QUERY_PENDING_MEMBER; + inm->in6m_timer = MLD_RANDOM_DELAY(timer); + V_current_state_timers_running6 = 1; + } + } + + return (retval); +} + +/* + * Process a received MLDv1 host membership report. + * Assumes mld points to mld_hdr in pulled up mbuf chain. + * + * NOTE: Can't be fully const correct as we temporarily embed scope ID in + * mld_addr. This is OK as we own the mbuf chain. + */ +static int +mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, + /*const*/ struct mld_hdr *mld) +{ + struct in6_addr src, dst; + struct in6_ifaddr *ia; + struct in6_multi *inm; +#ifdef KTR + char ip6tbuf[INET6_ADDRSTRLEN]; +#endif + + if (!mld_v1enable) { + CTR3(KTR_MLD, "ignore v1 report %s on ifp %p(%s)", + ip6_sprintf(ip6tbuf, &mld->mld_addr), + ifp, ifp->if_xname); + return (0); + } + + if (ifp->if_flags & IFF_LOOPBACK) + return (0); + + /* + * MLDv1 reports must originate from a host's link-local address, + * or the unspecified address (when booting). + */ + src = ip6->ip6_src; + in6_clearscope(&src); + if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) { + CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", + ip6_sprintf(ip6tbuf, &ip6->ip6_src), + ifp, ifp->if_xname); + return (EINVAL); + } + + /* + * RFC2710 Section 4: MLDv1 reports must pertain to a multicast + * group, and must be directed to the group itself. + */ + dst = ip6->ip6_dst; + in6_clearscope(&dst); + if (!IN6_IS_ADDR_MULTICAST(&mld->mld_addr) || + !IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) { + CTR3(KTR_MLD, "ignore v1 query dst %s on ifp %p(%s)", + ip6_sprintf(ip6tbuf, &ip6->ip6_dst), + ifp, ifp->if_xname); + return (EINVAL); + } + + /* + * Make sure we don't hear our own membership report, as fast + * leave requires knowing that we are the only member of a + * group. Assume we used the link-local address if available, + * otherwise look for ::. + * + * XXX Note that scope ID comparison is needed for the address + * returned by in6ifa_ifpforlinklocal(), but SHOULD NOT be + * performed for the on-wire address. + */ + ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); + if ((ia && IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, IA6_IN6(ia))) || + (ia == NULL && IN6_IS_ADDR_UNSPECIFIED(&src))) { + if (ia != NULL) + ifa_free(&ia->ia_ifa); + return (0); + } + if (ia != NULL) + ifa_free(&ia->ia_ifa); + + CTR3(KTR_MLD, "process v1 report %s on ifp %p(%s)", + ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, ifp->if_xname); + + /* + * Embed scope ID of receiving interface in MLD query for lookup + * whilst we don't hold other locks (due to KAME locking lameness). + */ + if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) + in6_setscope(&mld->mld_addr, ifp, NULL); + + IN6_MULTI_LOCK(); + MLD_LOCK(); + IF_ADDR_LOCK(ifp); + + /* + * MLDv1 report suppression. + * If we are a member of this group, and our membership should be + * reported, and our group timer is pending or about to be reset, + * stop our group timer by transitioning to the 'lazy' state. + */ + inm = in6m_lookup_locked(ifp, &mld->mld_addr); + if (inm != NULL) { + struct mld_ifinfo *mli; + + mli = inm->in6m_mli; + KASSERT(mli != NULL, + ("%s: no mli for ifp %p", __func__, ifp)); + + /* + * If we are in MLDv2 host mode, do not allow the + * other host's MLDv1 report to suppress our reports. + */ + if (mli->mli_version == MLD_VERSION_2) + goto out_locked; + + inm->in6m_timer = 0; + + switch (inm->in6m_state) { + case MLD_NOT_MEMBER: + case MLD_SILENT_MEMBER: + case MLD_SLEEPING_MEMBER: + break; + case MLD_REPORTING_MEMBER: + case MLD_IDLE_MEMBER: + case MLD_AWAKENING_MEMBER: + CTR3(KTR_MLD, + "report suppressed for %s on ifp %p(%s)", + ip6_sprintf(ip6tbuf, &mld->mld_addr), + ifp, ifp->if_xname); + case MLD_LAZY_MEMBER: + inm->in6m_state = MLD_LAZY_MEMBER; + break; + case MLD_G_QUERY_PENDING_MEMBER: + case MLD_SG_QUERY_PENDING_MEMBER: + case MLD_LEAVING_MEMBER: + break; + } + } + +out_locked: + MLD_UNLOCK(); + IF_ADDR_UNLOCK(ifp); + IN6_MULTI_UNLOCK(); + + /* XXX Clear embedded scope ID as userland won't expect it. */ + in6_clearscope(&mld->mld_addr); + + return (0); +} + +/* + * MLD input path. + * + * Assume query messages which fit in a single ICMPv6 message header + * have been pulled up. + * Assume that userland will want to see the message, even if it + * otherwise fails kernel input validation; do not free it. + * Pullup may however free the mbuf chain m if it fails. + * + * Return IPPROTO_DONE if we freed m. Otherwise, return 0. + */ +int +mld_input(struct mbuf *m, int off, int icmp6len) +{ + struct ifnet *ifp; + struct ip6_hdr *ip6; + struct mld_hdr *mld; + int mldlen; + + CTR3(KTR_MLD, "%s: called w/mbuf (%p,%d)", __func__, m, off); + + ifp = m->m_pkthdr.rcvif; + + ip6 = mtod(m, struct ip6_hdr *); + + /* Pullup to appropriate size. */ + mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off); + if (mld->mld_type == MLD_LISTENER_QUERY && + icmp6len >= sizeof(struct mldv2_query)) { + mldlen = sizeof(struct mldv2_query); + } else { + mldlen = sizeof(struct mld_hdr); + } + IP6_EXTHDR_GET(mld, struct mld_hdr *, m, off, mldlen); + if (mld == NULL) { + ICMP6STAT_INC(icp6s_badlen); + return (IPPROTO_DONE); + } + + /* + * Userland needs to see all of this traffic for implementing + * the endpoint discovery portion of multicast routing. + */ + switch (mld->mld_type) { + case MLD_LISTENER_QUERY: + icmp6_ifstat_inc(ifp, ifs6_in_mldquery); + if (icmp6len == sizeof(struct mld_hdr)) { + if (mld_v1_input_query(ifp, ip6, mld) != 0) + return (0); + } else if (icmp6len >= sizeof(struct mldv2_query)) { + if (mld_v2_input_query(ifp, ip6, m, off, + icmp6len) != 0) + return (0); + } + break; + case MLD_LISTENER_REPORT: + icmp6_ifstat_inc(ifp, ifs6_in_mldreport); + if (mld_v1_input_report(ifp, ip6, mld) != 0) + return (0); + break; + case MLDV2_LISTENER_REPORT: + icmp6_ifstat_inc(ifp, ifs6_in_mldreport); + break; + case MLD_LISTENER_DONE: + icmp6_ifstat_inc(ifp, ifs6_in_mlddone); + break; + default: + break; + } + + return (0); +} + +/* + * Fast timeout handler (global). + * VIMAGE: Timeout handlers are expected to service all vimages. + */ +void +mld_fasttimo(void) +{ + VNET_ITERATOR_DECL(vnet_iter); + + VNET_LIST_RLOCK_NOSLEEP(); + VNET_FOREACH(vnet_iter) { + CURVNET_SET(vnet_iter); + mld_fasttimo_vnet(); + CURVNET_RESTORE(); + } + VNET_LIST_RUNLOCK_NOSLEEP(); +} + +/* + * Fast timeout handler (per-vnet). + * + * VIMAGE: Assume caller has set up our curvnet. + */ +static void +mld_fasttimo_vnet(void) +{ + struct ifqueue scq; /* State-change packets */ + struct ifqueue qrq; /* Query response packets */ + struct ifnet *ifp; + struct mld_ifinfo *mli; + struct ifmultiaddr *ifma, *tifma; + struct in6_multi *inm; + int uri_fasthz; + + uri_fasthz = 0; + + /* + * Quick check to see if any work needs to be done, in order to + * minimize the overhead of fasttimo processing. + * SMPng: XXX Unlocked reads. + */ + if (!V_current_state_timers_running6 && + !V_interface_timers_running6 && + !V_state_change_timers_running6) + return; + + IN6_MULTI_LOCK(); + MLD_LOCK(); + + /* + * MLDv2 General Query response timer processing. + */ + if (V_interface_timers_running6) { + CTR1(KTR_MLD, "%s: interface timers running", __func__); + + V_interface_timers_running6 = 0; + LIST_FOREACH(mli, &V_mli_head, mli_link) { + if (mli->mli_v2_timer == 0) { + /* Do nothing. */ + } else if (--mli->mli_v2_timer == 0) { + mld_v2_dispatch_general_query(mli); + } else { + V_interface_timers_running6 = 1; + } + } + } + + if (!V_current_state_timers_running6 && + !V_state_change_timers_running6) + goto out_locked; + + V_current_state_timers_running6 = 0; + V_state_change_timers_running6 = 0; + + CTR1(KTR_MLD, "%s: state change timers running", __func__); + + /* + * MLD host report and state-change timer processing. + * Note: Processing a v2 group timer may remove a node. + */ + LIST_FOREACH(mli, &V_mli_head, mli_link) { + ifp = mli->mli_ifp; + + if (mli->mli_version == MLD_VERSION_2) { + uri_fasthz = MLD_RANDOM_DELAY(mli->mli_uri * + PR_FASTHZ); + + memset(&qrq, 0, sizeof(struct ifqueue)); + IFQ_SET_MAXLEN(&qrq, MLD_MAX_G_GS_PACKETS); + + memset(&scq, 0, sizeof(struct ifqueue)); + IFQ_SET_MAXLEN(&scq, MLD_MAX_STATE_CHANGE_PACKETS); + } + + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, + tifma) { + if (ifma->ifma_addr->sa_family != AF_INET6 || + ifma->ifma_protospec == NULL) + continue; + inm = (struct in6_multi *)ifma->ifma_protospec; + switch (mli->mli_version) { + case MLD_VERSION_1: + /* + * XXX Drop IF_ADDR lock temporarily to + * avoid recursion caused by a potential + * call by in6ifa_ifpforlinklocal(). + * rwlock candidate? + */ + IF_ADDR_UNLOCK(ifp); + mld_v1_process_group_timer(inm, + mli->mli_version); + IF_ADDR_LOCK(ifp); + break; + case MLD_VERSION_2: + mld_v2_process_group_timers(mli, &qrq, + &scq, inm, uri_fasthz); + break; + } + } + IF_ADDR_UNLOCK(ifp); + + if (mli->mli_version == MLD_VERSION_2) { + struct in6_multi *tinm; + + mld_dispatch_queue(&qrq, 0); + mld_dispatch_queue(&scq, 0); + + /* + * Free the in_multi reference(s) for + * this lifecycle. + */ + SLIST_FOREACH_SAFE(inm, &mli->mli_relinmhead, + in6m_nrele, tinm) { + SLIST_REMOVE_HEAD(&mli->mli_relinmhead, + in6m_nrele); + in6m_release_locked(inm); + } + } + } + +out_locked: + MLD_UNLOCK(); + IN6_MULTI_UNLOCK(); +} + +/* + * Update host report group timer. + * Will update the global pending timer flags. + */ +static void +mld_v1_process_group_timer(struct in6_multi *inm, const int version) +{ + int report_timer_expired; + + IN6_MULTI_LOCK_ASSERT(); + MLD_LOCK_ASSERT(); + + if (inm->in6m_timer == 0) { + report_timer_expired = 0; + } else if (--inm->in6m_timer == 0) { + report_timer_expired = 1; + } else { + V_current_state_timers_running6 = 1; + return; + } + + switch (inm->in6m_state) { + case MLD_NOT_MEMBER: + case MLD_SILENT_MEMBER: + case MLD_IDLE_MEMBER: + case MLD_LAZY_MEMBER: + case MLD_SLEEPING_MEMBER: + case MLD_AWAKENING_MEMBER: + break; + case MLD_REPORTING_MEMBER: + if (report_timer_expired) { + inm->in6m_state = MLD_IDLE_MEMBER; + (void)mld_v1_transmit_report(inm, + MLD_LISTENER_REPORT); + } + break; + case MLD_G_QUERY_PENDING_MEMBER: + case MLD_SG_QUERY_PENDING_MEMBER: + case MLD_LEAVING_MEMBER: + break; + } +} + +/* + * Update a group's timers for MLDv2. + * Will update the global pending timer flags. + * Note: Unlocked read from mli. + */ +static void +mld_v2_process_group_timers(struct mld_ifinfo *mli, + struct ifqueue *qrq, struct ifqueue *scq, + struct in6_multi *inm, const int uri_fasthz) +{ + int query_response_timer_expired; + int state_change_retransmit_timer_expired; +#ifdef KTR + char ip6tbuf[INET6_ADDRSTRLEN]; +#endif + + IN6_MULTI_LOCK_ASSERT(); + MLD_LOCK_ASSERT(); + + query_response_timer_expired = 0; + state_change_retransmit_timer_expired = 0; + + /* + * During a transition from compatibility mode back to MLDv2, + * a group record in REPORTING state may still have its group + * timer active. This is a no-op in this function; it is easier + * to deal with it here than to complicate the slow-timeout path. + */ + if (inm->in6m_timer == 0) { + query_response_timer_expired = 0; + } else if (--inm->in6m_timer == 0) { + query_response_timer_expired = 1; + } else { + V_current_state_timers_running6 = 1; + } + + if (inm->in6m_sctimer == 0) { + state_change_retransmit_timer_expired = 0; + } else if (--inm->in6m_sctimer == 0) { + state_change_retransmit_timer_expired = 1; + } else { + V_state_change_timers_running6 = 1; + } + + /* We are in fasttimo, so be quick about it. */ + if (!state_change_retransmit_timer_expired && + !query_response_timer_expired) + return; + + switch (inm->in6m_state) { + case MLD_NOT_MEMBER: + case MLD_SILENT_MEMBER: + case MLD_SLEEPING_MEMBER: + case MLD_LAZY_MEMBER: + case MLD_AWAKENING_MEMBER: + case MLD_IDLE_MEMBER: + break; + case MLD_G_QUERY_PENDING_MEMBER: + case MLD_SG_QUERY_PENDING_MEMBER: + /* + * Respond to a previously pending Group-Specific + * or Group-and-Source-Specific query by enqueueing + * the appropriate Current-State report for + * immediate transmission. + */ + if (query_response_timer_expired) { + int retval; + + retval = mld_v2_enqueue_group_record(qrq, inm, 0, 1, + (inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER), + 0); + CTR2(KTR_MLD, "%s: enqueue record = %d", + __func__, retval); + inm->in6m_state = MLD_REPORTING_MEMBER; + in6m_clear_recorded(inm); + } + /* FALLTHROUGH */ + case MLD_REPORTING_MEMBER: + case MLD_LEAVING_MEMBER: + if (state_change_retransmit_timer_expired) { + /* + * State-change retransmission timer fired. + * If there are any further pending retransmissions, + * set the global pending state-change flag, and + * reset the timer. + */ + if (--inm->in6m_scrv > 0) { + inm->in6m_sctimer = uri_fasthz; + V_state_change_timers_running6 = 1; + } + /* + * Retransmit the previously computed state-change + * report. If there are no further pending + * retransmissions, the mbuf queue will be consumed. + * Update T0 state to T1 as we have now sent + * a state-change. + */ + (void)mld_v2_merge_state_changes(inm, scq); + + in6m_commit(inm); + CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, + ip6_sprintf(ip6tbuf, &inm->in6m_addr), + inm->in6m_ifp->if_xname); + + /* + * If we are leaving the group for good, make sure + * we release MLD's reference to it. + * This release must be deferred using a SLIST, + * as we are called from a loop which traverses + * the in_ifmultiaddr TAILQ. + */ + if (inm->in6m_state == MLD_LEAVING_MEMBER && + inm->in6m_scrv == 0) { + inm->in6m_state = MLD_NOT_MEMBER; + SLIST_INSERT_HEAD(&mli->mli_relinmhead, + inm, in6m_nrele); + } + } + break; + } +} + +/* + * Switch to a different version on the given interface, + * as per Section 9.12. + */ +static void +mld_set_version(struct mld_ifinfo *mli, const int version) +{ + int old_version_timer; + + MLD_LOCK_ASSERT(); + + CTR4(KTR_MLD, "%s: switching to v%d on ifp %p(%s)", __func__, + version, mli->mli_ifp, mli->mli_ifp->if_xname); + + if (version == MLD_VERSION_1) { + /* + * Compute the "Older Version Querier Present" timer as per + * Section 9.12. + */ + old_version_timer = (mli->mli_rv * mli->mli_qi) + mli->mli_qri; + old_version_timer *= PR_SLOWHZ; + mli->mli_v1_timer = old_version_timer; + } + + if (mli->mli_v1_timer > 0 && mli->mli_version != MLD_VERSION_1) { + mli->mli_version = MLD_VERSION_1; + mld_v2_cancel_link_timers(mli); + } +} + +/* + * Cancel pending MLDv2 timers for the given link and all groups + * joined on it; state-change, general-query, and group-query timers. + */ +static void +mld_v2_cancel_link_timers(struct mld_ifinfo *mli) +{ + struct ifmultiaddr *ifma; + struct ifnet *ifp; + struct in6_multi *inm; + + CTR3(KTR_MLD, "%s: cancel v2 timers on ifp %p(%s)", __func__, + mli->mli_ifp, mli->mli_ifp->if_xname); + + IN6_MULTI_LOCK_ASSERT(); + MLD_LOCK_ASSERT(); + + /* + * Fast-track this potentially expensive operation + * by checking all the global 'timer pending' flags. + */ + if (!V_interface_timers_running6 && + !V_state_change_timers_running6 && + !V_current_state_timers_running6) + return; + + mli->mli_v2_timer = 0; + + ifp = mli->mli_ifp; + + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_INET6) + continue; + inm = (struct in6_multi *)ifma->ifma_protospec; + switch (inm->in6m_state) { + case MLD_NOT_MEMBER: + case MLD_SILENT_MEMBER: + case MLD_IDLE_MEMBER: + case MLD_LAZY_MEMBER: + case MLD_SLEEPING_MEMBER: + case MLD_AWAKENING_MEMBER: + break; + case MLD_LEAVING_MEMBER: + /* + * If we are leaving the group and switching + * version, we need to release the final + * reference held for issuing the INCLUDE {}. + * + * SMPNG: Must drop and re-acquire IF_ADDR_LOCK + * around in6m_release_locked(), as it is not + * a recursive mutex. + */ + IF_ADDR_UNLOCK(ifp); + in6m_release_locked(inm); + IF_ADDR_LOCK(ifp); + /* FALLTHROUGH */ + case MLD_G_QUERY_PENDING_MEMBER: + case MLD_SG_QUERY_PENDING_MEMBER: + in6m_clear_recorded(inm); + /* FALLTHROUGH */ + case MLD_REPORTING_MEMBER: + inm->in6m_sctimer = 0; + inm->in6m_timer = 0; + inm->in6m_state = MLD_REPORTING_MEMBER; + /* + * Free any pending MLDv2 state-change records. + */ + _IF_DRAIN(&inm->in6m_scq); + break; + } + } + IF_ADDR_UNLOCK(ifp); +} + +/* + * Global slowtimo handler. + * VIMAGE: Timeout handlers are expected to service all vimages. + */ +void +mld_slowtimo(void) +{ + VNET_ITERATOR_DECL(vnet_iter); + + VNET_LIST_RLOCK_NOSLEEP(); + VNET_FOREACH(vnet_iter) { + CURVNET_SET(vnet_iter); + mld_slowtimo_vnet(); + CURVNET_RESTORE(); + } + VNET_LIST_RUNLOCK_NOSLEEP(); +} + +/* + * Per-vnet slowtimo handler. + */ +static void +mld_slowtimo_vnet(void) +{ + struct mld_ifinfo *mli; + + MLD_LOCK(); + + LIST_FOREACH(mli, &V_mli_head, mli_link) { + mld_v1_process_querier_timers(mli); + } + + MLD_UNLOCK(); +} + +/* + * Update the Older Version Querier Present timers for a link. + * See Section 9.12 of RFC 3810. + */ +static void +mld_v1_process_querier_timers(struct mld_ifinfo *mli) +{ + + MLD_LOCK_ASSERT(); + + if (mli->mli_version != MLD_VERSION_2 && --mli->mli_v1_timer == 0) { + /* + * MLDv1 Querier Present timer expired; revert to MLDv2. + */ + CTR5(KTR_MLD, + "%s: transition from v%d -> v%d on %p(%s)", + __func__, mli->mli_version, MLD_VERSION_2, + mli->mli_ifp, mli->mli_ifp->if_xname); + mli->mli_version = MLD_VERSION_2; + } +} + +/* + * Transmit an MLDv1 report immediately. + */ +static int +mld_v1_transmit_report(struct in6_multi *in6m, const int type) +{ + struct ifnet *ifp; + struct in6_ifaddr *ia; + struct ip6_hdr *ip6; + struct mbuf *mh, *md; + struct mld_hdr *mld; + + IN6_MULTI_LOCK_ASSERT(); + MLD_LOCK_ASSERT(); + + ifp = in6m->in6m_ifp; + ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); + /* ia may be NULL if link-local address is tentative. */ + + MGETHDR(mh, M_DONTWAIT, MT_HEADER); + if (mh == NULL) { + if (ia != NULL) + ifa_free(&ia->ia_ifa); + return (ENOMEM); + } + MGET(md, M_DONTWAIT, MT_DATA); + if (md == NULL) { + m_free(mh); + if (ia != NULL) + ifa_free(&ia->ia_ifa); + return (ENOMEM); + } + mh->m_next = md; + + /* + * FUTURE: Consider increasing alignment by ETHER_HDR_LEN, so + * that ether_output() does not need to allocate another mbuf + * for the header in the most common case. + */ + MH_ALIGN(mh, sizeof(struct ip6_hdr)); + mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr); + mh->m_len = sizeof(struct ip6_hdr); + + ip6 = mtod(mh, struct ip6_hdr *); + ip6->ip6_flow = 0; + ip6->ip6_vfc &= ~IPV6_VERSION_MASK; + ip6->ip6_vfc |= IPV6_VERSION; + ip6->ip6_nxt = IPPROTO_ICMPV6; + ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any; + ip6->ip6_dst = in6m->in6m_addr; + + md->m_len = sizeof(struct mld_hdr); + mld = mtod(md, struct mld_hdr *); + mld->mld_type = type; + mld->mld_code = 0; + mld->mld_cksum = 0; + mld->mld_maxdelay = 0; + mld->mld_reserved = 0; + mld->mld_addr = in6m->in6m_addr; + in6_clearscope(&mld->mld_addr); + mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, + sizeof(struct ip6_hdr), sizeof(struct mld_hdr)); + + mld_save_context(mh, ifp); + mh->m_flags |= M_MLDV1; + + mld_dispatch_packet(mh); + + if (ia != NULL) + ifa_free(&ia->ia_ifa); + return (0); +} + +/* + * Process a state change from the upper layer for the given IPv6 group. + * + * Each socket holds a reference on the in_multi in its own ip_moptions. + * The socket layer will have made the necessary updates to.the group + * state, it is now up to MLD to issue a state change report if there + * has been any change between T0 (when the last state-change was issued) + * and T1 (now). + * + * We use the MLDv2 state machine at group level. The MLd module + * however makes the decision as to which MLD protocol version to speak. + * A state change *from* INCLUDE {} always means an initial join. + * A state change *to* INCLUDE {} always means a final leave. + * + * If delay is non-zero, and the state change is an initial multicast + * join, the state change report will be delayed by 'delay' ticks + * in units of PR_FASTHZ if MLDv1 is active on the link; otherwise + * the initial MLDv2 state change report will be delayed by whichever + * is sooner, a pending state-change timer or delay itself. + * + * VIMAGE: curvnet should have been set by caller, as this routine + * is called from the socket option handlers. + */ +int +mld_change_state(struct in6_multi *inm, const int delay) +{ + struct mld_ifinfo *mli; + struct ifnet *ifp; + int error; + + IN6_MULTI_LOCK_ASSERT(); + + error = 0; + + /* + * Try to detect if the upper layer just asked us to change state + * for an interface which has now gone away. + */ + KASSERT(inm->in6m_ifma != NULL, ("%s: no ifma", __func__)); + ifp = inm->in6m_ifma->ifma_ifp; + if (ifp != NULL) { + /* + * Sanity check that netinet6's notion of ifp is the + * same as net's. + */ + KASSERT(inm->in6m_ifp == ifp, ("%s: bad ifp", __func__)); + } + + MLD_LOCK(); + + mli = MLD_IFINFO(ifp); + KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp)); + + /* + * If we detect a state transition to or from MCAST_UNDEFINED + * for this group, then we are starting or finishing an MLD + * life cycle for this group. + */ + if (inm->in6m_st[1].iss_fmode != inm->in6m_st[0].iss_fmode) { + CTR3(KTR_MLD, "%s: inm transition %d -> %d", __func__, + inm->in6m_st[0].iss_fmode, inm->in6m_st[1].iss_fmode); + if (inm->in6m_st[0].iss_fmode == MCAST_UNDEFINED) { + CTR1(KTR_MLD, "%s: initial join", __func__); + error = mld_initial_join(inm, mli, delay); + goto out_locked; + } else if (inm->in6m_st[1].iss_fmode == MCAST_UNDEFINED) { + CTR1(KTR_MLD, "%s: final leave", __func__); + mld_final_leave(inm, mli); + goto out_locked; + } + } else { + CTR1(KTR_MLD, "%s: filter set change", __func__); + } + + error = mld_handle_state_change(inm, mli); + +out_locked: + MLD_UNLOCK(); + return (error); +} + +/* + * Perform the initial join for an MLD group. + * + * When joining a group: + * If the group should have its MLD traffic suppressed, do nothing. + * MLDv1 starts sending MLDv1 host membership reports. + * MLDv2 will schedule an MLDv2 state-change report containing the + * initial state of the membership. + * + * If the delay argument is non-zero, then we must delay sending the + * initial state change for delay ticks (in units of PR_FASTHZ). + */ +static int +mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli, + const int delay) +{ + struct ifnet *ifp; + struct ifqueue *ifq; + int error, retval, syncstates; + int odelay; +#ifdef KTR + char ip6tbuf[INET6_ADDRSTRLEN]; +#endif + + CTR4(KTR_MLD, "%s: initial join %s on ifp %p(%s)", + __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), + inm->in6m_ifp, inm->in6m_ifp->if_xname); + + error = 0; + syncstates = 1; + + ifp = inm->in6m_ifp; + + IN6_MULTI_LOCK_ASSERT(); + MLD_LOCK_ASSERT(); + + KASSERT(mli && mli->mli_ifp == ifp, ("%s: inconsistent ifp", __func__)); + + /* + * Groups joined on loopback or marked as 'not reported', + * enter the MLD_SILENT_MEMBER state and + * are never reported in any protocol exchanges. + * All other groups enter the appropriate state machine + * for the version in use on this link. + * A link marked as MLIF_SILENT causes MLD to be completely + * disabled for the link. + */ + if ((ifp->if_flags & IFF_LOOPBACK) || + (mli->mli_flags & MLIF_SILENT) || + !mld_is_addr_reported(&inm->in6m_addr)) { + CTR1(KTR_MLD, +"%s: not kicking state machine for silent group", __func__); + inm->in6m_state = MLD_SILENT_MEMBER; + inm->in6m_timer = 0; + } else { + /* + * Deal with overlapping in_multi lifecycle. + * If this group was LEAVING, then make sure + * we drop the reference we picked up to keep the + * group around for the final INCLUDE {} enqueue. + */ + if (mli->mli_version == MLD_VERSION_2 && + inm->in6m_state == MLD_LEAVING_MEMBER) + in6m_release_locked(inm); + + inm->in6m_state = MLD_REPORTING_MEMBER; + + switch (mli->mli_version) { + case MLD_VERSION_1: + /* + * If a delay was provided, only use it if + * it is greater than the delay normally + * used for an MLDv1 state change report, + * and delay sending the initial MLDv1 report + * by not transitioning to the IDLE state. + */ + odelay = MLD_RANDOM_DELAY(MLD_V1_MAX_RI * PR_FASTHZ); + if (delay) { + inm->in6m_timer = max(delay, odelay); + V_current_state_timers_running6 = 1; + } else { + inm->in6m_state = MLD_IDLE_MEMBER; + error = mld_v1_transmit_report(inm, + MLD_LISTENER_REPORT); + if (error == 0) { + inm->in6m_timer = odelay; + V_current_state_timers_running6 = 1; + } + } + break; + + case MLD_VERSION_2: + /* + * Defer update of T0 to T1, until the first copy + * of the state change has been transmitted. + */ + syncstates = 0; + + /* + * Immediately enqueue a State-Change Report for + * this interface, freeing any previous reports. + * Don't kick the timers if there is nothing to do, + * or if an error occurred. + */ + ifq = &inm->in6m_scq; + _IF_DRAIN(ifq); + retval = mld_v2_enqueue_group_record(ifq, inm, 1, + 0, 0, (mli->mli_flags & MLIF_USEALLOW)); + CTR2(KTR_MLD, "%s: enqueue record = %d", + __func__, retval); + if (retval <= 0) { + error = retval * -1; + break; + } + + /* + * Schedule transmission of pending state-change + * report up to RV times for this link. The timer + * will fire at the next mld_fasttimo (~200ms), + * giving us an opportunity to merge the reports. + * + * If a delay was provided to this function, only + * use this delay if sooner than the existing one. + */ + KASSERT(mli->mli_rv > 1, + ("%s: invalid robustness %d", __func__, + mli->mli_rv)); + inm->in6m_scrv = mli->mli_rv; + if (delay) { + if (inm->in6m_sctimer > 1) { + inm->in6m_sctimer = + min(inm->in6m_sctimer, delay); + } else + inm->in6m_sctimer = delay; + } else + inm->in6m_sctimer = 1; + V_state_change_timers_running6 = 1; + + error = 0; + break; + } + } + + /* + * Only update the T0 state if state change is atomic, + * i.e. we don't need to wait for a timer to fire before we + * can consider the state change to have been communicated. + */ + if (syncstates) { + in6m_commit(inm); + CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, + ip6_sprintf(ip6tbuf, &inm->in6m_addr), + inm->in6m_ifp->if_xname); + } + + return (error); +} + +/* + * Issue an intermediate state change during the life-cycle. + */ +static int +mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli) +{ + struct ifnet *ifp; + int retval; +#ifdef KTR + char ip6tbuf[INET6_ADDRSTRLEN]; +#endif + + CTR4(KTR_MLD, "%s: state change for %s on ifp %p(%s)", + __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), + inm->in6m_ifp, inm->in6m_ifp->if_xname); + + ifp = inm->in6m_ifp; + + IN6_MULTI_LOCK_ASSERT(); + MLD_LOCK_ASSERT(); + + KASSERT(mli && mli->mli_ifp == ifp, + ("%s: inconsistent ifp", __func__)); + + if ((ifp->if_flags & IFF_LOOPBACK) || + (mli->mli_flags & MLIF_SILENT) || + !mld_is_addr_reported(&inm->in6m_addr) || + (mli->mli_version != MLD_VERSION_2)) { + if (!mld_is_addr_reported(&inm->in6m_addr)) { + CTR1(KTR_MLD, +"%s: not kicking state machine for silent group", __func__); + } + CTR1(KTR_MLD, "%s: nothing to do", __func__); + in6m_commit(inm); + CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, + ip6_sprintf(ip6tbuf, &inm->in6m_addr), + inm->in6m_ifp->if_xname); + return (0); + } + + _IF_DRAIN(&inm->in6m_scq); + + retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0, + (mli->mli_flags & MLIF_USEALLOW)); + CTR2(KTR_MLD, "%s: enqueue record = %d", __func__, retval); + if (retval <= 0) + return (-retval); + + /* + * If record(s) were enqueued, start the state-change + * report timer for this group. + */ + inm->in6m_scrv = mli->mli_rv; + inm->in6m_sctimer = 1; + V_state_change_timers_running6 = 1; + + return (0); +} + +/* + * Perform the final leave for a multicast address. + * + * When leaving a group: + * MLDv1 sends a DONE message, if and only if we are the reporter. + * MLDv2 enqueues a state-change report containing a transition + * to INCLUDE {} for immediate transmission. + */ +static void +mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli) +{ + int syncstates; +#ifdef KTR + char ip6tbuf[INET6_ADDRSTRLEN]; +#endif + + syncstates = 1; + + CTR4(KTR_MLD, "%s: final leave %s on ifp %p(%s)", + __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), + inm->in6m_ifp, inm->in6m_ifp->if_xname); + + IN6_MULTI_LOCK_ASSERT(); + MLD_LOCK_ASSERT(); + + switch (inm->in6m_state) { + case MLD_NOT_MEMBER: + case MLD_SILENT_MEMBER: + case MLD_LEAVING_MEMBER: + /* Already leaving or left; do nothing. */ + CTR1(KTR_MLD, +"%s: not kicking state machine for silent group", __func__); + break; + case MLD_REPORTING_MEMBER: + case MLD_IDLE_MEMBER: + case MLD_G_QUERY_PENDING_MEMBER: + case MLD_SG_QUERY_PENDING_MEMBER: + if (mli->mli_version == MLD_VERSION_1) { +#ifdef INVARIANTS + if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER || + inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) + panic("%s: MLDv2 state reached, not MLDv2 mode", + __func__); +#endif + mld_v1_transmit_report(inm, MLD_LISTENER_DONE); + inm->in6m_state = MLD_NOT_MEMBER; + } else if (mli->mli_version == MLD_VERSION_2) { + /* + * Stop group timer and all pending reports. + * Immediately enqueue a state-change report + * TO_IN {} to be sent on the next fast timeout, + * giving us an opportunity to merge reports. + */ + _IF_DRAIN(&inm->in6m_scq); + inm->in6m_timer = 0; + inm->in6m_scrv = mli->mli_rv; + CTR4(KTR_MLD, "%s: Leaving %s/%s with %d " + "pending retransmissions.", __func__, + ip6_sprintf(ip6tbuf, &inm->in6m_addr), + inm->in6m_ifp->if_xname, inm->in6m_scrv); + if (inm->in6m_scrv == 0) { + inm->in6m_state = MLD_NOT_MEMBER; + inm->in6m_sctimer = 0; + } else { + int retval; + + in6m_acquire_locked(inm); + + retval = mld_v2_enqueue_group_record( + &inm->in6m_scq, inm, 1, 0, 0, + (mli->mli_flags & MLIF_USEALLOW)); + KASSERT(retval != 0, + ("%s: enqueue record = %d", __func__, + retval)); + + inm->in6m_state = MLD_LEAVING_MEMBER; + inm->in6m_sctimer = 1; + V_state_change_timers_running6 = 1; + syncstates = 0; + } + break; + } + break; + case MLD_LAZY_MEMBER: + case MLD_SLEEPING_MEMBER: + case MLD_AWAKENING_MEMBER: + /* Our reports are suppressed; do nothing. */ + break; + } + + if (syncstates) { + in6m_commit(inm); + CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, + ip6_sprintf(ip6tbuf, &inm->in6m_addr), + inm->in6m_ifp->if_xname); + inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED; + CTR3(KTR_MLD, "%s: T1 now MCAST_UNDEFINED for %p/%s", + __func__, &inm->in6m_addr, inm->in6m_ifp->if_xname); + } +} + +/* + * Enqueue an MLDv2 group record to the given output queue. + * + * If is_state_change is zero, a current-state record is appended. + * If is_state_change is non-zero, a state-change report is appended. + * + * If is_group_query is non-zero, an mbuf packet chain is allocated. + * If is_group_query is zero, and if there is a packet with free space + * at the tail of the queue, it will be appended to providing there + * is enough free space. + * Otherwise a new mbuf packet chain is allocated. + * + * If is_source_query is non-zero, each source is checked to see if + * it was recorded for a Group-Source query, and will be omitted if + * it is not both in-mode and recorded. + * + * If use_block_allow is non-zero, state change reports for initial join + * and final leave, on an inclusive mode group with a source list, will be + * rewritten to use the ALLOW_NEW and BLOCK_OLD record types, respectively. + * + * The function will attempt to allocate leading space in the packet + * for the IPv6+ICMP headers to be prepended without fragmenting the chain. + * + * If successful the size of all data appended to the queue is returned, + * otherwise an error code less than zero is returned, or zero if + * no record(s) were appended. + */ +static int +mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm, + const int is_state_change, const int is_group_query, + const int is_source_query, const int use_block_allow) +{ + struct mldv2_record mr; + struct mldv2_record *pmr; + struct ifnet *ifp; + struct ip6_msource *ims, *nims; + struct mbuf *m0, *m, *md; + int error, is_filter_list_change; + int minrec0len, m0srcs, msrcs, nbytes, off; + int record_has_sources; + int now; + int type; + uint8_t mode; +#ifdef KTR + char ip6tbuf[INET6_ADDRSTRLEN]; +#endif + + IN6_MULTI_LOCK_ASSERT(); + + error = 0; + ifp = inm->in6m_ifp; + is_filter_list_change = 0; + m = NULL; + m0 = NULL; + m0srcs = 0; + msrcs = 0; + nbytes = 0; + nims = NULL; + record_has_sources = 1; + pmr = NULL; + type = MLD_DO_NOTHING; + mode = inm->in6m_st[1].iss_fmode; + + /* + * If we did not transition out of ASM mode during t0->t1, + * and there are no source nodes to process, we can skip + * the generation of source records. + */ + if (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0 && + inm->in6m_nsrc == 0) + record_has_sources = 0; + + if (is_state_change) { + /* + * Queue a state change record. + * If the mode did not change, and there are non-ASM + * listeners or source filters present, + * we potentially need to issue two records for the group. + * If there are ASM listeners, and there was no filter + * mode transition of any kind, do nothing. + * + * If we are transitioning to MCAST_UNDEFINED, we need + * not send any sources. A transition to/from this state is + * considered inclusive with some special treatment. + * + * If we are rewriting initial joins/leaves to use + * ALLOW/BLOCK, and the group's membership is inclusive, + * we need to send sources in all cases. + */ + if (mode != inm->in6m_st[0].iss_fmode) { + if (mode == MCAST_EXCLUDE) { + CTR1(KTR_MLD, "%s: change to EXCLUDE", + __func__); + type = MLD_CHANGE_TO_EXCLUDE_MODE; + } else { + CTR1(KTR_MLD, "%s: change to INCLUDE", + __func__); + if (use_block_allow) { + /* + * XXX + * Here we're interested in state + * edges either direction between + * MCAST_UNDEFINED and MCAST_INCLUDE. + * Perhaps we should just check + * the group state, rather than + * the filter mode. + */ + if (mode == MCAST_UNDEFINED) { + type = MLD_BLOCK_OLD_SOURCES; + } else { + type = MLD_ALLOW_NEW_SOURCES; + } + } else { + type = MLD_CHANGE_TO_INCLUDE_MODE; + if (mode == MCAST_UNDEFINED) + record_has_sources = 0; + } + } + } else { + if (record_has_sources) { + is_filter_list_change = 1; + } else { + type = MLD_DO_NOTHING; + } + } + } else { + /* + * Queue a current state record. + */ + if (mode == MCAST_EXCLUDE) { + type = MLD_MODE_IS_EXCLUDE; + } else if (mode == MCAST_INCLUDE) { + type = MLD_MODE_IS_INCLUDE; + KASSERT(inm->in6m_st[1].iss_asm == 0, + ("%s: inm %p is INCLUDE but ASM count is %d", + __func__, inm, inm->in6m_st[1].iss_asm)); + } + } + + /* + * Generate the filter list changes using a separate function. + */ + if (is_filter_list_change) + return (mld_v2_enqueue_filter_change(ifq, inm)); + + if (type == MLD_DO_NOTHING) { + CTR3(KTR_MLD, "%s: nothing to do for %s/%s", + __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), + inm->in6m_ifp->if_xname); + return (0); + } + + /* + * If any sources are present, we must be able to fit at least + * one in the trailing space of the tail packet's mbuf, + * ideally more. + */ + minrec0len = sizeof(struct mldv2_record); + if (record_has_sources) + minrec0len += sizeof(struct in6_addr); + + CTR4(KTR_MLD, "%s: queueing %s for %s/%s", __func__, + mld_rec_type_to_str(type), + ip6_sprintf(ip6tbuf, &inm->in6m_addr), + inm->in6m_ifp->if_xname); + + /* + * Check if we have a packet in the tail of the queue for this + * group into which the first group record for this group will fit. + * Otherwise allocate a new packet. + * Always allocate leading space for IP6+RA+ICMPV6+REPORT. + * Note: Group records for G/GSR query responses MUST be sent + * in their own packet. + */ + m0 = ifq->ifq_tail; + if (!is_group_query && + m0 != NULL && + (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) && + (m0->m_pkthdr.len + minrec0len) < + (ifp->if_mtu - MLD_MTUSPACE)) { + m0srcs = (ifp->if_mtu - m0->m_pkthdr.len - + sizeof(struct mldv2_record)) / + sizeof(struct in6_addr); + m = m0; + CTR1(KTR_MLD, "%s: use existing packet", __func__); + } else { + if (_IF_QFULL(ifq)) { + CTR1(KTR_MLD, "%s: outbound queue full", __func__); + return (-ENOMEM); + } + m = NULL; + m0srcs = (ifp->if_mtu - MLD_MTUSPACE - + sizeof(struct mldv2_record)) / sizeof(struct in6_addr); + if (!is_state_change && !is_group_query) + m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); + if (m == NULL) + m = m_gethdr(M_DONTWAIT, MT_DATA); + if (m == NULL) + return (-ENOMEM); + + mld_save_context(m, ifp); + + CTR1(KTR_MLD, "%s: allocated first packet", __func__); + } + + /* + * Append group record. + * If we have sources, we don't know how many yet. + */ + mr.mr_type = type; + mr.mr_datalen = 0; + mr.mr_numsrc = 0; + mr.mr_addr = inm->in6m_addr; + in6_clearscope(&mr.mr_addr); + if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) { + if (m != m0) + m_freem(m); + CTR1(KTR_MLD, "%s: m_append() failed.", __func__); + return (-ENOMEM); + } + nbytes += sizeof(struct mldv2_record); + + /* + * Append as many sources as will fit in the first packet. + * If we are appending to a new packet, the chain allocation + * may potentially use clusters; use m_getptr() in this case. + * If we are appending to an existing packet, we need to obtain + * a pointer to the group record after m_append(), in case a new + * mbuf was allocated. + * + * Only append sources which are in-mode at t1. If we are + * transitioning to MCAST_UNDEFINED state on the group, and + * use_block_allow is zero, do not include source entries. + * Otherwise, we need to include this source in the report. + * + * Only report recorded sources in our filter set when responding + * to a group-source query. + */ + if (record_has_sources) { + if (m == m0) { + md = m_last(m); + pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + + md->m_len - nbytes); + } else { + md = m_getptr(m, 0, &off); + pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + + off); + } + msrcs = 0; + RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs, + nims) { + CTR2(KTR_MLD, "%s: visit node %s", __func__, + ip6_sprintf(ip6tbuf, &ims->im6s_addr)); + now = im6s_get_mode(inm, ims, 1); + CTR2(KTR_MLD, "%s: node is %d", __func__, now); + if ((now != mode) || + (now == mode && + (!use_block_allow && mode == MCAST_UNDEFINED))) { + CTR1(KTR_MLD, "%s: skip node", __func__); + continue; + } + if (is_source_query && ims->im6s_stp == 0) { + CTR1(KTR_MLD, "%s: skip unrecorded node", + __func__); + continue; + } + CTR1(KTR_MLD, "%s: append node", __func__); + if (!m_append(m, sizeof(struct in6_addr), + (void *)&ims->im6s_addr)) { + if (m != m0) + m_freem(m); + CTR1(KTR_MLD, "%s: m_append() failed.", + __func__); + return (-ENOMEM); + } + nbytes += sizeof(struct in6_addr); + ++msrcs; + if (msrcs == m0srcs) + break; + } + CTR2(KTR_MLD, "%s: msrcs is %d this packet", __func__, + msrcs); + pmr->mr_numsrc = htons(msrcs); + nbytes += (msrcs * sizeof(struct in6_addr)); + } + + if (is_source_query && msrcs == 0) { + CTR1(KTR_MLD, "%s: no recorded sources to report", __func__); + if (m != m0) + m_freem(m); + return (0); + } + + /* + * We are good to go with first packet. + */ + if (m != m0) { + CTR1(KTR_MLD, "%s: enqueueing first packet", __func__); + m->m_pkthdr.PH_vt.vt_nrecs = 1; + _IF_ENQUEUE(ifq, m); + } else + m->m_pkthdr.PH_vt.vt_nrecs++; + + /* + * No further work needed if no source list in packet(s). + */ + if (!record_has_sources) + return (nbytes); + + /* + * Whilst sources remain to be announced, we need to allocate + * a new packet and fill out as many sources as will fit. + * Always try for a cluster first. + */ + while (nims != NULL) { + if (_IF_QFULL(ifq)) { + CTR1(KTR_MLD, "%s: outbound queue full", __func__); + return (-ENOMEM); + } + m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); + if (m == NULL) + m = m_gethdr(M_DONTWAIT, MT_DATA); + if (m == NULL) + return (-ENOMEM); + mld_save_context(m, ifp); + md = m_getptr(m, 0, &off); + pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off); + CTR1(KTR_MLD, "%s: allocated next packet", __func__); + + if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) { + if (m != m0) + m_freem(m); + CTR1(KTR_MLD, "%s: m_append() failed.", __func__); + return (-ENOMEM); + } + m->m_pkthdr.PH_vt.vt_nrecs = 1; + nbytes += sizeof(struct mldv2_record); + + m0srcs = (ifp->if_mtu - MLD_MTUSPACE - + sizeof(struct mldv2_record)) / sizeof(struct in6_addr); + + msrcs = 0; + RB_FOREACH_FROM(ims, ip6_msource_tree, nims) { + CTR2(KTR_MLD, "%s: visit node %s", + __func__, ip6_sprintf(ip6tbuf, &ims->im6s_addr)); + now = im6s_get_mode(inm, ims, 1); + if ((now != mode) || + (now == mode && + (!use_block_allow && mode == MCAST_UNDEFINED))) { + CTR1(KTR_MLD, "%s: skip node", __func__); + continue; + } + if (is_source_query && ims->im6s_stp == 0) { + CTR1(KTR_MLD, "%s: skip unrecorded node", + __func__); + continue; + } + CTR1(KTR_MLD, "%s: append node", __func__); + if (!m_append(m, sizeof(struct in6_addr), + (void *)&ims->im6s_addr)) { + if (m != m0) + m_freem(m); + CTR1(KTR_MLD, "%s: m_append() failed.", + __func__); + return (-ENOMEM); + } + ++msrcs; + if (msrcs == m0srcs) + break; + } + pmr->mr_numsrc = htons(msrcs); + nbytes += (msrcs * sizeof(struct in6_addr)); + + CTR1(KTR_MLD, "%s: enqueueing next packet", __func__); + _IF_ENQUEUE(ifq, m); + } + + return (nbytes); +} + +/* + * Type used to mark record pass completion. + * We exploit the fact we can cast to this easily from the + * current filter modes on each ip_msource node. + */ +typedef enum { + REC_NONE = 0x00, /* MCAST_UNDEFINED */ + REC_ALLOW = 0x01, /* MCAST_INCLUDE */ + REC_BLOCK = 0x02, /* MCAST_EXCLUDE */ + REC_FULL = REC_ALLOW | REC_BLOCK +} rectype_t; + +/* + * Enqueue an MLDv2 filter list change to the given output queue. + * + * Source list filter state is held in an RB-tree. When the filter list + * for a group is changed without changing its mode, we need to compute + * the deltas between T0 and T1 for each source in the filter set, + * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records. + * + * As we may potentially queue two record types, and the entire R-B tree + * needs to be walked at once, we break this out into its own function + * so we can generate a tightly packed queue of packets. + * + * XXX This could be written to only use one tree walk, although that makes + * serializing into the mbuf chains a bit harder. For now we do two walks + * which makes things easier on us, and it may or may not be harder on + * the L2 cache. + * + * If successful the size of all data appended to the queue is returned, + * otherwise an error code less than zero is returned, or zero if + * no record(s) were appended. + */ +static int +mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm) +{ + static const int MINRECLEN = + sizeof(struct mldv2_record) + sizeof(struct in6_addr); + struct ifnet *ifp; + struct mldv2_record mr; + struct mldv2_record *pmr; + struct ip6_msource *ims, *nims; + struct mbuf *m, *m0, *md; + int m0srcs, nbytes, npbytes, off, rsrcs, schanged; + int nallow, nblock; + uint8_t mode, now, then; + rectype_t crt, drt, nrt; +#ifdef KTR + char ip6tbuf[INET6_ADDRSTRLEN]; +#endif + + IN6_MULTI_LOCK_ASSERT(); + + if (inm->in6m_nsrc == 0 || + (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0)) + return (0); + + ifp = inm->in6m_ifp; /* interface */ + mode = inm->in6m_st[1].iss_fmode; /* filter mode at t1 */ + crt = REC_NONE; /* current group record type */ + drt = REC_NONE; /* mask of completed group record types */ + nrt = REC_NONE; /* record type for current node */ + m0srcs = 0; /* # source which will fit in current mbuf chain */ + npbytes = 0; /* # of bytes appended this packet */ + nbytes = 0; /* # of bytes appended to group's state-change queue */ + rsrcs = 0; /* # sources encoded in current record */ + schanged = 0; /* # nodes encoded in overall filter change */ + nallow = 0; /* # of source entries in ALLOW_NEW */ + nblock = 0; /* # of source entries in BLOCK_OLD */ + nims = NULL; /* next tree node pointer */ + + /* + * For each possible filter record mode. + * The first kind of source we encounter tells us which + * is the first kind of record we start appending. + * If a node transitioned to UNDEFINED at t1, its mode is treated + * as the inverse of the group's filter mode. + */ + while (drt != REC_FULL) { + do { + m0 = ifq->ifq_tail; + if (m0 != NULL && + (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= + MLD_V2_REPORT_MAXRECS) && + (m0->m_pkthdr.len + MINRECLEN) < + (ifp->if_mtu - MLD_MTUSPACE)) { + m = m0; + m0srcs = (ifp->if_mtu - m0->m_pkthdr.len - + sizeof(struct mldv2_record)) / + sizeof(struct in6_addr); + CTR1(KTR_MLD, + "%s: use previous packet", __func__); + } else { + m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); + if (m == NULL) + m = m_gethdr(M_DONTWAIT, MT_DATA); + if (m == NULL) { + CTR1(KTR_MLD, + "%s: m_get*() failed", __func__); + return (-ENOMEM); + } + m->m_pkthdr.PH_vt.vt_nrecs = 0; + mld_save_context(m, ifp); + m0srcs = (ifp->if_mtu - MLD_MTUSPACE - + sizeof(struct mldv2_record)) / + sizeof(struct in6_addr); + npbytes = 0; + CTR1(KTR_MLD, + "%s: allocated new packet", __func__); + } + /* + * Append the MLD group record header to the + * current packet's data area. + * Recalculate pointer to free space for next + * group record, in case m_append() allocated + * a new mbuf or cluster. + */ + memset(&mr, 0, sizeof(mr)); + mr.mr_addr = inm->in6m_addr; + in6_clearscope(&mr.mr_addr); + if (!m_append(m, sizeof(mr), (void *)&mr)) { + if (m != m0) + m_freem(m); + CTR1(KTR_MLD, + "%s: m_append() failed", __func__); + return (-ENOMEM); + } + npbytes += sizeof(struct mldv2_record); + if (m != m0) { + /* new packet; offset in chain */ + md = m_getptr(m, npbytes - + sizeof(struct mldv2_record), &off); + pmr = (struct mldv2_record *)(mtod(md, + uint8_t *) + off); + } else { + /* current packet; offset from last append */ + md = m_last(m); + pmr = (struct mldv2_record *)(mtod(md, + uint8_t *) + md->m_len - + sizeof(struct mldv2_record)); + } + /* + * Begin walking the tree for this record type + * pass, or continue from where we left off + * previously if we had to allocate a new packet. + * Only report deltas in-mode at t1. + * We need not report included sources as allowed + * if we are in inclusive mode on the group, + * however the converse is not true. + */ + rsrcs = 0; + if (nims == NULL) { + nims = RB_MIN(ip6_msource_tree, + &inm->in6m_srcs); + } + RB_FOREACH_FROM(ims, ip6_msource_tree, nims) { + CTR2(KTR_MLD, "%s: visit node %s", __func__, + ip6_sprintf(ip6tbuf, &ims->im6s_addr)); + now = im6s_get_mode(inm, ims, 1); + then = im6s_get_mode(inm, ims, 0); + CTR3(KTR_MLD, "%s: mode: t0 %d, t1 %d", + __func__, then, now); + if (now == then) { + CTR1(KTR_MLD, + "%s: skip unchanged", __func__); + continue; + } + if (mode == MCAST_EXCLUDE && + now == MCAST_INCLUDE) { + CTR1(KTR_MLD, + "%s: skip IN src on EX group", + __func__); + continue; + } + nrt = (rectype_t)now; + if (nrt == REC_NONE) + nrt = (rectype_t)(~mode & REC_FULL); + if (schanged++ == 0) { + crt = nrt; + } else if (crt != nrt) + continue; + if (!m_append(m, sizeof(struct in6_addr), + (void *)&ims->im6s_addr)) { + if (m != m0) + m_freem(m); + CTR1(KTR_MLD, + "%s: m_append() failed", __func__); + return (-ENOMEM); + } + nallow += !!(crt == REC_ALLOW); + nblock += !!(crt == REC_BLOCK); + if (++rsrcs == m0srcs) + break; + } + /* + * If we did not append any tree nodes on this + * pass, back out of allocations. + */ + if (rsrcs == 0) { + npbytes -= sizeof(struct mldv2_record); + if (m != m0) { + CTR1(KTR_MLD, + "%s: m_free(m)", __func__); + m_freem(m); + } else { + CTR1(KTR_MLD, + "%s: m_adj(m, -mr)", __func__); + m_adj(m, -((int)sizeof( + struct mldv2_record))); + } + continue; + } + npbytes += (rsrcs * sizeof(struct in6_addr)); + if (crt == REC_ALLOW) + pmr->mr_type = MLD_ALLOW_NEW_SOURCES; + else if (crt == REC_BLOCK) + pmr->mr_type = MLD_BLOCK_OLD_SOURCES; + pmr->mr_numsrc = htons(rsrcs); + /* + * Count the new group record, and enqueue this + * packet if it wasn't already queued. + */ + m->m_pkthdr.PH_vt.vt_nrecs++; + if (m != m0) + _IF_ENQUEUE(ifq, m); + nbytes += npbytes; + } while (nims != NULL); + drt |= crt; + crt = (~crt & REC_FULL); + } + + CTR3(KTR_MLD, "%s: queued %d ALLOW_NEW, %d BLOCK_OLD", __func__, + nallow, nblock); + + return (nbytes); +} + +static int +mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq) +{ + struct ifqueue *gq; + struct mbuf *m; /* pending state-change */ + struct mbuf *m0; /* copy of pending state-change */ + struct mbuf *mt; /* last state-change in packet */ + int docopy, domerge; + u_int recslen; + + docopy = 0; + domerge = 0; + recslen = 0; + + IN6_MULTI_LOCK_ASSERT(); + MLD_LOCK_ASSERT(); + + /* + * If there are further pending retransmissions, make a writable + * copy of each queued state-change message before merging. + */ + if (inm->in6m_scrv > 0) + docopy = 1; + + gq = &inm->in6m_scq; +#ifdef KTR + if (gq->ifq_head == NULL) { + CTR2(KTR_MLD, "%s: WARNING: queue for inm %p is empty", + __func__, inm); + } +#endif + + m = gq->ifq_head; + while (m != NULL) { + /* + * Only merge the report into the current packet if + * there is sufficient space to do so; an MLDv2 report + * packet may only contain 65,535 group records. + * Always use a simple mbuf chain concatentation to do this, + * as large state changes for single groups may have + * allocated clusters. + */ + domerge = 0; + mt = ifscq->ifq_tail; + if (mt != NULL) { + recslen = m_length(m, NULL); + + if ((mt->m_pkthdr.PH_vt.vt_nrecs + + m->m_pkthdr.PH_vt.vt_nrecs <= + MLD_V2_REPORT_MAXRECS) && + (mt->m_pkthdr.len + recslen <= + (inm->in6m_ifp->if_mtu - MLD_MTUSPACE))) + domerge = 1; + } + + if (!domerge && _IF_QFULL(gq)) { + CTR2(KTR_MLD, + "%s: outbound queue full, skipping whole packet %p", + __func__, m); + mt = m->m_nextpkt; + if (!docopy) + m_freem(m); + m = mt; + continue; + } + + if (!docopy) { + CTR2(KTR_MLD, "%s: dequeueing %p", __func__, m); + _IF_DEQUEUE(gq, m0); + m = m0->m_nextpkt; + } else { + CTR2(KTR_MLD, "%s: copying %p", __func__, m); + m0 = m_dup(m, M_NOWAIT); + if (m0 == NULL) + return (ENOMEM); + m0->m_nextpkt = NULL; + m = m->m_nextpkt; + } + + if (!domerge) { + CTR3(KTR_MLD, "%s: queueing %p to ifscq %p)", + __func__, m0, ifscq); + _IF_ENQUEUE(ifscq, m0); + } else { + struct mbuf *mtl; /* last mbuf of packet mt */ + + CTR3(KTR_MLD, "%s: merging %p with ifscq tail %p)", + __func__, m0, mt); + + mtl = m_last(mt); + m0->m_flags &= ~M_PKTHDR; + mt->m_pkthdr.len += recslen; + mt->m_pkthdr.PH_vt.vt_nrecs += + m0->m_pkthdr.PH_vt.vt_nrecs; + + mtl->m_next = m0; + } + } + + return (0); +} + +/* + * Respond to a pending MLDv2 General Query. + */ +static void +mld_v2_dispatch_general_query(struct mld_ifinfo *mli) +{ + struct ifmultiaddr *ifma, *tifma; + struct ifnet *ifp; + struct in6_multi *inm; + int retval; + + IN6_MULTI_LOCK_ASSERT(); + MLD_LOCK_ASSERT(); + + KASSERT(mli->mli_version == MLD_VERSION_2, + ("%s: called when version %d", __func__, mli->mli_version)); + + ifp = mli->mli_ifp; + + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, tifma) { + if (ifma->ifma_addr->sa_family != AF_INET6 || + ifma->ifma_protospec == NULL) + continue; + + inm = (struct in6_multi *)ifma->ifma_protospec; + KASSERT(ifp == inm->in6m_ifp, + ("%s: inconsistent ifp", __func__)); + + switch (inm->in6m_state) { + case MLD_NOT_MEMBER: + case MLD_SILENT_MEMBER: + break; + case MLD_REPORTING_MEMBER: + case MLD_IDLE_MEMBER: + case MLD_LAZY_MEMBER: + case MLD_SLEEPING_MEMBER: + case MLD_AWAKENING_MEMBER: + inm->in6m_state = MLD_REPORTING_MEMBER; + retval = mld_v2_enqueue_group_record(&mli->mli_gq, + inm, 0, 0, 0, 0); + CTR2(KTR_MLD, "%s: enqueue record = %d", + __func__, retval); + break; + case MLD_G_QUERY_PENDING_MEMBER: + case MLD_SG_QUERY_PENDING_MEMBER: + case MLD_LEAVING_MEMBER: + break; + } + } + IF_ADDR_UNLOCK(ifp); + + mld_dispatch_queue(&mli->mli_gq, MLD_MAX_RESPONSE_BURST); + + /* + * Slew transmission of bursts over 500ms intervals. + */ + if (mli->mli_gq.ifq_head != NULL) { + mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY( + MLD_RESPONSE_BURST_INTERVAL); + V_interface_timers_running6 = 1; + } +} + +/* + * Transmit the next pending message in the output queue. + * + * VIMAGE: Needs to store/restore vnet pointer on a per-mbuf-chain basis. + * MRT: Nothing needs to be done, as MLD traffic is always local to + * a link and uses a link-scope multicast address. + */ +static void +mld_dispatch_packet(struct mbuf *m) +{ + struct ip6_moptions im6o; + struct ifnet *ifp; + struct ifnet *oifp; + struct mbuf *m0; + struct mbuf *md; + struct ip6_hdr *ip6; + struct mld_hdr *mld; + int error; + int off; + int type; + uint32_t ifindex; + + CTR2(KTR_MLD, "%s: transmit %p", __func__, m); + + /* + * Set VNET image pointer from enqueued mbuf chain + * before doing anything else. Whilst we use interface + * indexes to guard against interface detach, they are + * unique to each VIMAGE and must be retrieved. + */ + ifindex = mld_restore_context(m); + + /* + * Check if the ifnet still exists. This limits the scope of + * any race in the absence of a global ifp lock for low cost + * (an array lookup). + */ + ifp = ifnet_byindex(ifindex); + if (ifp == NULL) { + CTR3(KTR_MLD, "%s: dropped %p as ifindex %u went away.", + __func__, m, ifindex); + m_freem(m); + IP6STAT_INC(ip6s_noroute); + goto out; + } + + im6o.im6o_multicast_hlim = 1; + im6o.im6o_multicast_loop = (V_ip6_mrouter != NULL); + im6o.im6o_multicast_ifp = ifp; + + if (m->m_flags & M_MLDV1) { + m0 = m; + } else { + m0 = mld_v2_encap_report(ifp, m); + if (m0 == NULL) { + CTR2(KTR_MLD, "%s: dropped %p", __func__, m); + m_freem(m); + IP6STAT_INC(ip6s_odropped); + goto out; + } + } + + mld_scrub_context(m0); + m->m_flags &= ~(M_PROTOFLAGS); + m0->m_pkthdr.rcvif = V_loif; + + ip6 = mtod(m0, struct ip6_hdr *); +#if 0 + (void)in6_setscope(&ip6->ip6_dst, ifp, NULL); /* XXX LOR */ +#else + /* + * XXX XXX Break some KPI rules to prevent an LOR which would + * occur if we called in6_setscope() at transmission. + * See comments at top of file. + */ + MLD_EMBEDSCOPE(&ip6->ip6_dst, ifp->if_index); +#endif + + /* + * Retrieve the ICMPv6 type before handoff to ip6_output(), + * so we can bump the stats. + */ + md = m_getptr(m0, sizeof(struct ip6_hdr), &off); + mld = (struct mld_hdr *)(mtod(md, uint8_t *) + off); + type = mld->mld_type; + + error = ip6_output(m0, &mld_po, NULL, IPV6_UNSPECSRC, &im6o, + &oifp, NULL); + if (error) { + CTR3(KTR_MLD, "%s: ip6_output(%p) = %d", __func__, m0, error); + goto out; + } + ICMP6STAT_INC(icp6s_outhist[type]); + if (oifp != NULL) { + icmp6_ifstat_inc(oifp, ifs6_out_msg); + switch (type) { + case MLD_LISTENER_REPORT: + case MLDV2_LISTENER_REPORT: + icmp6_ifstat_inc(oifp, ifs6_out_mldreport); + break; + case MLD_LISTENER_DONE: + icmp6_ifstat_inc(oifp, ifs6_out_mlddone); + break; + } + } +out: + return; +} + +/* + * Encapsulate an MLDv2 report. + * + * KAME IPv6 requires that hop-by-hop options be passed separately, + * and that the IPv6 header be prepended in a separate mbuf. + * + * Returns a pointer to the new mbuf chain head, or NULL if the + * allocation failed. + */ +static struct mbuf * +mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m) +{ + struct mbuf *mh; + struct mldv2_report *mld; + struct ip6_hdr *ip6; + struct in6_ifaddr *ia; + int mldreclen; + + KASSERT(ifp != NULL, ("%s: null ifp", __func__)); + KASSERT((m->m_flags & M_PKTHDR), + ("%s: mbuf chain %p is !M_PKTHDR", __func__, m)); + + /* + * RFC3590: OK to send as :: or tentative during DAD. + */ + ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); + if (ia == NULL) + CTR1(KTR_MLD, "%s: warning: ia is NULL", __func__); + + MGETHDR(mh, M_DONTWAIT, MT_HEADER); + if (mh == NULL) { + if (ia != NULL) + ifa_free(&ia->ia_ifa); + m_freem(m); + return (NULL); + } + MH_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report)); + + mldreclen = m_length(m, NULL); + CTR2(KTR_MLD, "%s: mldreclen is %d", __func__, mldreclen); + + mh->m_len = sizeof(struct ip6_hdr) + sizeof(struct mldv2_report); + mh->m_pkthdr.len = sizeof(struct ip6_hdr) + + sizeof(struct mldv2_report) + mldreclen; + + ip6 = mtod(mh, struct ip6_hdr *); + ip6->ip6_flow = 0; + ip6->ip6_vfc &= ~IPV6_VERSION_MASK; + ip6->ip6_vfc |= IPV6_VERSION; + ip6->ip6_nxt = IPPROTO_ICMPV6; + ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any; + if (ia != NULL) + ifa_free(&ia->ia_ifa); + ip6->ip6_dst = in6addr_linklocal_allv2routers; + /* scope ID will be set in netisr */ + + mld = (struct mldv2_report *)(ip6 + 1); + mld->mld_type = MLDV2_LISTENER_REPORT; + mld->mld_code = 0; + mld->mld_cksum = 0; + mld->mld_v2_reserved = 0; + mld->mld_v2_numrecs = htons(m->m_pkthdr.PH_vt.vt_nrecs); + m->m_pkthdr.PH_vt.vt_nrecs = 0; + + mh->m_next = m; + mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, + sizeof(struct ip6_hdr), sizeof(struct mldv2_report) + mldreclen); + return (mh); +} + +#ifdef KTR +static char * +mld_rec_type_to_str(const int type) +{ + + switch (type) { + case MLD_CHANGE_TO_EXCLUDE_MODE: + return "TO_EX"; + break; + case MLD_CHANGE_TO_INCLUDE_MODE: + return "TO_IN"; + break; + case MLD_MODE_IS_EXCLUDE: + return "MODE_EX"; + break; + case MLD_MODE_IS_INCLUDE: + return "MODE_IN"; + break; + case MLD_ALLOW_NEW_SOURCES: + return "ALLOW_NEW"; + break; + case MLD_BLOCK_OLD_SOURCES: + return "BLOCK_OLD"; + break; + default: + break; + } + return "unknown"; +} +#endif + +static void +mld_init(void *unused __unused) +{ + + CTR1(KTR_MLD, "%s: initializing", __func__); + MLD_LOCK_INIT(); + + ip6_initpktopts(&mld_po); + mld_po.ip6po_hlim = 1; + mld_po.ip6po_hbh = &mld_ra.hbh; + mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER; + mld_po.ip6po_flags = IP6PO_DONTFRAG; +} +SYSINIT(mld_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, mld_init, NULL); + +static void +mld_uninit(void *unused __unused) +{ + + CTR1(KTR_MLD, "%s: tearing down", __func__); + MLD_LOCK_DESTROY(); +} +SYSUNINIT(mld_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, mld_uninit, NULL); + +static void +vnet_mld_init(const void *unused __unused) +{ + + CTR1(KTR_MLD, "%s: initializing", __func__); + + LIST_INIT(&V_mli_head); +} +VNET_SYSINIT(vnet_mld_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_mld_init, + NULL); + +static void +vnet_mld_uninit(const void *unused __unused) +{ + + CTR1(KTR_MLD, "%s: tearing down", __func__); + + KASSERT(LIST_EMPTY(&V_mli_head), + ("%s: mli list not empty; ifnets not detached?", __func__)); +} +VNET_SYSUNINIT(vnet_mld_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_mld_uninit, + NULL); + +static int +mld_modevent(module_t mod, int type, void *unused __unused) +{ + + switch (type) { + case MOD_LOAD: + case MOD_UNLOAD: + break; + default: + return (EOPNOTSUPP); + } + return (0); +} + +static moduledata_t mld_mod = { + "mld", + mld_modevent, + 0 +}; +DECLARE_MODULE(mld, mld_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); diff --git a/freebsd/sys/netinet6/mld6.h b/freebsd/sys/netinet6/mld6.h new file mode 100644 index 00000000..1839b4f7 --- /dev/null +++ b/freebsd/sys/netinet6/mld6.h @@ -0,0 +1,112 @@ +/*- + * Copyright (c) 2009 Bruce Simpson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NETINET6_MLD6_HH_ +#define _NETINET6_MLD6_HH_ + +/* + * Multicast Listener Discovery (MLD) definitions. + */ + +/* Minimum length of any MLD protocol message. */ +#define MLD_MINLEN sizeof(struct icmp6_hdr) + +/* + * MLD v2 query format. + * See <netinet/icmp6.h> for struct mld_hdr + * (MLDv1 query and host report format). + */ +struct mldv2_query { + struct icmp6_hdr mld_icmp6_hdr; /* ICMPv6 header */ + struct in6_addr mld_addr; /* address being queried */ + uint8_t mld_misc; /* reserved/suppress/robustness */ + uint8_t mld_qqi; /* querier's query interval */ + uint16_t mld_numsrc; /* number of sources */ + /* followed by 1..numsrc source addresses */ +} __packed; +#define MLD_V2_QUERY_MINLEN sizeof(struct mldv2_query) +#define MLD_MRC_EXP(x) ((ntohs((x)) >> 12) & 0x0007) +#define MLD_MRC_MANT(x) (ntohs((x)) & 0x0fff) +#define MLD_QQIC_EXP(x) (((x) >> 4) & 0x07) +#define MLD_QQIC_MANT(x) ((x) & 0x0f) +#define MLD_QRESV(x) (((x) >> 4) & 0x0f) +#define MLD_SFLAG(x) (((x) >> 3) & 0x01) +#define MLD_QRV(x) ((x) & 0x07) + +/* + * MLDv2 host membership report header. + * mld_type: MLDV2_LISTENER_REPORT + */ +struct mldv2_report { + struct icmp6_hdr mld_icmp6_hdr; + /* followed by 1..numgrps records */ +} __packed; +/* overlaid on struct icmp6_hdr. */ +#define mld_numrecs mld_icmp6_hdr.icmp6_data16[1] + +struct mldv2_record { + uint8_t mr_type; /* record type */ + uint8_t mr_datalen; /* length of auxiliary data */ + uint16_t mr_numsrc; /* number of sources */ + struct in6_addr mr_addr; /* address being reported */ + /* followed by 1..numsrc source addresses */ +} __packed; +#define MLD_V2_REPORT_MAXRECS 65535 + +/* + * MLDv2 report modes. + */ +#define MLD_DO_NOTHING 0 /* don't send a record */ +#define MLD_MODE_IS_INCLUDE 1 /* MODE_IN */ +#define MLD_MODE_IS_EXCLUDE 2 /* MODE_EX */ +#define MLD_CHANGE_TO_INCLUDE_MODE 3 /* TO_IN */ +#define MLD_CHANGE_TO_EXCLUDE_MODE 4 /* TO_EX */ +#define MLD_ALLOW_NEW_SOURCES 5 /* ALLOW_NEW */ +#define MLD_BLOCK_OLD_SOURCES 6 /* BLOCK_OLD */ + +/* + * MLDv2 query types. + */ +#define MLD_V2_GENERAL_QUERY 1 +#define MLD_V2_GROUP_QUERY 2 +#define MLD_V2_GROUP_SOURCE_QUERY 3 + +/* + * Maximum report interval for MLDv1 host membership reports. + */ +#define MLD_V1_MAX_RI 10 + +/* + * MLD_TIMER_SCALE denotes that the MLD code field specifies + * time in milliseconds. + */ +#define MLD_TIMER_SCALE 1000 + +#endif /* _NETINET6_MLD6_HH_ */ diff --git a/freebsd/sys/netinet6/mld6_var.h b/freebsd/sys/netinet6/mld6_var.h new file mode 100644 index 00000000..40d70e51 --- /dev/null +++ b/freebsd/sys/netinet6/mld6_var.h @@ -0,0 +1,164 @@ +/*- + * Copyright (c) 2009 Bruce Simpson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ +#ifndef _NETINET6_MLD6_VAR_HH_ +#define _NETINET6_MLD6_VAR_HH_ + +/* + * Multicast Listener Discovery (MLD) + * implementation-specific definitions. + */ + +#ifdef _KERNEL + +/* + * Per-link MLD state. + */ +struct mld_ifinfo { + LIST_ENTRY(mld_ifinfo) mli_link; + struct ifnet *mli_ifp; /* interface this instance belongs to */ + uint32_t mli_version; /* MLDv1 Host Compatibility Mode */ + uint32_t mli_v1_timer; /* MLDv1 Querier Present timer (s) */ + uint32_t mli_v2_timer; /* MLDv2 General Query (interface) timer (s)*/ + uint32_t mli_flags; /* MLD per-interface flags */ + uint32_t mli_rv; /* MLDv2 Robustness Variable */ + uint32_t mli_qi; /* MLDv2 Query Interval (s) */ + uint32_t mli_qri; /* MLDv2 Query Response Interval (s) */ + uint32_t mli_uri; /* MLDv2 Unsolicited Report Interval (s) */ + SLIST_HEAD(,in6_multi) mli_relinmhead; /* released groups */ + struct ifqueue mli_gq; /* queue of general query responses */ +}; +#define MLIF_SILENT 0x00000001 /* Do not use MLD on this ifp */ +#define MLIF_USEALLOW 0x00000002 /* Use ALLOW/BLOCK for joins/leaves */ + +#define MLD_RANDOM_DELAY(X) (arc4random() % (X) + 1) +#define MLD_MAX_STATE_CHANGES 24 /* Max pending changes per group */ + +/* + * MLD per-group states. + */ +#define MLD_NOT_MEMBER 0 /* Can garbage collect group */ +#define MLD_SILENT_MEMBER 1 /* Do not perform MLD for group */ +#define MLD_REPORTING_MEMBER 2 /* MLDv1 we are reporter */ +#define MLD_IDLE_MEMBER 3 /* MLDv1 we reported last */ +#define MLD_LAZY_MEMBER 4 /* MLDv1 other member reporting */ +#define MLD_SLEEPING_MEMBER 5 /* MLDv1 start query response */ +#define MLD_AWAKENING_MEMBER 6 /* MLDv1 group timer will start */ +#define MLD_G_QUERY_PENDING_MEMBER 7 /* MLDv2 group query pending */ +#define MLD_SG_QUERY_PENDING_MEMBER 8 /* MLDv2 source query pending */ +#define MLD_LEAVING_MEMBER 9 /* MLDv2 dying gasp (pending last */ + /* retransmission of INCLUDE {}) */ + +/* + * MLD version tag. + */ +#define MLD_VERSION_NONE 0 /* Invalid */ +#define MLD_VERSION_1 1 +#define MLD_VERSION_2 2 /* Default */ + +/* + * MLDv2 protocol control variables. + */ +#define MLD_RV_INIT 2 /* Robustness Variable */ +#define MLD_RV_MIN 1 +#define MLD_RV_MAX 7 + +#define MLD_QI_INIT 125 /* Query Interval (s) */ +#define MLD_QI_MIN 1 +#define MLD_QI_MAX 255 + +#define MLD_QRI_INIT 10 /* Query Response Interval (s) */ +#define MLD_QRI_MIN 1 +#define MLD_QRI_MAX 255 + +#define MLD_URI_INIT 3 /* Unsolicited Report Interval (s) */ +#define MLD_URI_MIN 0 +#define MLD_URI_MAX 10 + +#define MLD_MAX_GS_SOURCES 256 /* # of sources in rx GS query */ +#define MLD_MAX_G_GS_PACKETS 8 /* # of packets to answer G/GS */ +#define MLD_MAX_STATE_CHANGE_PACKETS 8 /* # of packets per state change */ +#define MLD_MAX_RESPONSE_PACKETS 16 /* # of packets for general query */ +#define MLD_MAX_RESPONSE_BURST 4 /* # of responses to send at once */ +#define MLD_RESPONSE_BURST_INTERVAL (PR_FASTHZ / 2) /* 500ms */ + +/* + * MLD-specific mbuf flags. + */ +#define M_MLDV1 M_PROTO1 /* Packet is MLDv1 */ +#define M_GROUPREC M_PROTO3 /* mbuf chain is a group record */ + +/* + * Leading space for MLDv2 reports inside MTU. + * + * NOTE: This differs from IGMPv3 significantly. KAME IPv6 requires + * that a fully formed mbuf chain *without* the Router Alert option + * is passed to ip6_output(), however we must account for it in the + * MTU if we need to split an MLDv2 report into several packets. + * + * We now put the MLDv2 report header in the initial mbuf containing + * the IPv6 header. + */ +#define MLD_MTUSPACE (sizeof(struct ip6_hdr) + sizeof(struct mld_raopt) + \ + sizeof(struct icmp6_hdr)) + +/* + * Subsystem lock macros. + * The MLD lock is only taken with MLD. Currently it is system-wide. + * VIMAGE: The lock could be pushed to per-VIMAGE granularity in future. + */ +#define MLD_LOCK_INIT() mtx_init(&mld_mtx, "mld_mtx", NULL, MTX_DEF) +#define MLD_LOCK_DESTROY() mtx_destroy(&mld_mtx) +#define MLD_LOCK() mtx_lock(&mld_mtx) +#define MLD_LOCK_ASSERT() mtx_assert(&mld_mtx, MA_OWNED) +#define MLD_UNLOCK() mtx_unlock(&mld_mtx) +#define MLD_UNLOCK_ASSERT() mtx_assert(&mld_mtx, MA_NOTOWNED) + +/* + * Per-link MLD context. + */ +#define MLD_IFINFO(ifp) \ + (((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->mld_ifinfo) + +int mld_change_state(struct in6_multi *, const int); +struct mld_ifinfo * + mld_domifattach(struct ifnet *); +void mld_domifdetach(struct ifnet *); +void mld_fasttimo(void); +void mld_ifdetach(struct ifnet *); +int mld_input(struct mbuf *, int, int); +void mld_slowtimo(void); + +#ifdef SYSCTL_DECL +SYSCTL_DECL(_net_inet6_mld); +#endif + +#endif /* _KERNEL */ + +#endif /* _NETINET6_MLD6_VAR_HH_ */ diff --git a/freebsd/sys/netinet6/nd6.c b/freebsd/sys/netinet6/nd6.c new file mode 100644 index 00000000..bc7ed57e --- /dev/null +++ b/freebsd/sys/netinet6/nd6.c @@ -0,0 +1,2249 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $ + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/local/opt_inet.h> +#include <freebsd/local/opt_inet6.h> + +#include <freebsd/sys/param.h> +#include <freebsd/sys/systm.h> +#include <freebsd/sys/callout.h> +#include <freebsd/sys/malloc.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/sockio.h> +#include <freebsd/sys/time.h> +#include <freebsd/sys/kernel.h> +#include <freebsd/sys/protosw.h> +#include <freebsd/sys/errno.h> +#include <freebsd/sys/syslog.h> +#include <freebsd/sys/lock.h> +#include <freebsd/sys/rwlock.h> +#include <freebsd/sys/queue.h> +#include <freebsd/sys/sysctl.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/if_arc.h> +#include <freebsd/net/if_dl.h> +#include <freebsd/net/if_types.h> +#include <freebsd/net/iso88025.h> +#include <freebsd/net/fddi.h> +#include <freebsd/net/route.h> +#include <freebsd/net/vnet.h> + +#include <freebsd/netinet/in.h> +#include <freebsd/net/if_llatbl.h> +#define L3_ADDR_SIN6(le) ((struct sockaddr_in6 *) L3_ADDR(le)) +#include <freebsd/netinet/if_ether.h> +#include <freebsd/netinet6/in6_var.h> +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet6/scope6_var.h> +#include <freebsd/netinet6/nd6.h> +#include <freebsd/netinet/icmp6.h> + +#include <freebsd/sys/limits.h> + +#include <freebsd/security/mac/mac_framework.h> + +#define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */ +#define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */ + +#define SIN6(s) ((struct sockaddr_in6 *)s) + +/* timer values */ +VNET_DEFINE(int, nd6_prune) = 1; /* walk list every 1 seconds */ +VNET_DEFINE(int, nd6_delay) = 5; /* delay first probe time 5 second */ +VNET_DEFINE(int, nd6_umaxtries) = 3; /* maximum unicast query */ +VNET_DEFINE(int, nd6_mmaxtries) = 3; /* maximum multicast query */ +VNET_DEFINE(int, nd6_useloopback) = 1; /* use loopback interface for + * local traffic */ +VNET_DEFINE(int, nd6_gctimer) = (60 * 60 * 24); /* 1 day: garbage + * collection timer */ + +/* preventing too many loops in ND option parsing */ +static VNET_DEFINE(int, nd6_maxndopt) = 10; /* max # of ND options allowed */ + +VNET_DEFINE(int, nd6_maxnudhint) = 0; /* max # of subsequent upper + * layer hints */ +static VNET_DEFINE(int, nd6_maxqueuelen) = 1; /* max pkts cached in unresolved + * ND entries */ +#define V_nd6_maxndopt VNET(nd6_maxndopt) +#define V_nd6_maxqueuelen VNET(nd6_maxqueuelen) + +#ifdef ND6_DEBUG +VNET_DEFINE(int, nd6_debug) = 1; +#else +VNET_DEFINE(int, nd6_debug) = 0; +#endif + +/* for debugging? */ +#if 0 +static int nd6_inuse, nd6_allocated; +#endif + +VNET_DEFINE(struct nd_drhead, nd_defrouter); +VNET_DEFINE(struct nd_prhead, nd_prefix); + +VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL; +#define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval) + +static struct sockaddr_in6 all1_sa; + +static int nd6_is_new_addr_neighbor __P((struct sockaddr_in6 *, + struct ifnet *)); +static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *); +static void nd6_slowtimo(void *); +static int regen_tmpaddr(struct in6_ifaddr *); +static struct llentry *nd6_free(struct llentry *, int); +static void nd6_llinfo_timer(void *); +static void clear_llinfo_pqueue(struct llentry *); + +static VNET_DEFINE(struct callout, nd6_slowtimo_ch); +#define V_nd6_slowtimo_ch VNET(nd6_slowtimo_ch) + +VNET_DEFINE(struct callout, nd6_timer_ch); + +void +nd6_init(void) +{ + int i; + + LIST_INIT(&V_nd_prefix); + + all1_sa.sin6_family = AF_INET6; + all1_sa.sin6_len = sizeof(struct sockaddr_in6); + for (i = 0; i < sizeof(all1_sa.sin6_addr); i++) + all1_sa.sin6_addr.s6_addr[i] = 0xff; + + /* initialization of the default router list */ + TAILQ_INIT(&V_nd_defrouter); + + /* start timer */ + callout_init(&V_nd6_slowtimo_ch, 0); + callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, + nd6_slowtimo, curvnet); +} + +#ifdef VIMAGE +void +nd6_destroy() +{ + + callout_drain(&V_nd6_slowtimo_ch); + callout_drain(&V_nd6_timer_ch); +} +#endif + +struct nd_ifinfo * +nd6_ifattach(struct ifnet *ifp) +{ + struct nd_ifinfo *nd; + + nd = (struct nd_ifinfo *)malloc(sizeof(*nd), M_IP6NDP, M_WAITOK); + bzero(nd, sizeof(*nd)); + + nd->initialized = 1; + + nd->chlim = IPV6_DEFHLIM; + nd->basereachable = REACHABLE_TIME; + nd->reachable = ND_COMPUTE_RTIME(nd->basereachable); + nd->retrans = RETRANS_TIMER; + /* + * Note that the default value of ip6_accept_rtadv is 0, which means + * we won't accept RAs by default even if we set ND6_IFF_ACCEPT_RTADV + * here. + */ + nd->flags = (ND6_IFF_PERFORMNUD | ND6_IFF_ACCEPT_RTADV); + + /* XXX: we cannot call nd6_setmtu since ifp is not fully initialized */ + nd6_setmtu0(ifp, nd); + + return nd; +} + +void +nd6_ifdetach(struct nd_ifinfo *nd) +{ + + free(nd, M_IP6NDP); +} + +/* + * Reset ND level link MTU. This function is called when the physical MTU + * changes, which means we might have to adjust the ND level MTU. + */ +void +nd6_setmtu(struct ifnet *ifp) +{ + + nd6_setmtu0(ifp, ND_IFINFO(ifp)); +} + +/* XXX todo: do not maintain copy of ifp->if_mtu in ndi->maxmtu */ +void +nd6_setmtu0(struct ifnet *ifp, struct nd_ifinfo *ndi) +{ + u_int32_t omaxmtu; + + omaxmtu = ndi->maxmtu; + + switch (ifp->if_type) { + case IFT_ARCNET: + ndi->maxmtu = MIN(ARC_PHDS_MAXMTU, ifp->if_mtu); /* RFC2497 */ + break; + case IFT_FDDI: + ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu); /* RFC2467 */ + break; + case IFT_ISO88025: + ndi->maxmtu = MIN(ISO88025_MAX_MTU, ifp->if_mtu); + break; + default: + ndi->maxmtu = ifp->if_mtu; + break; + } + + /* + * Decreasing the interface MTU under IPV6 minimum MTU may cause + * undesirable situation. We thus notify the operator of the change + * explicitly. The check for omaxmtu is necessary to restrict the + * log to the case of changing the MTU, not initializing it. + */ + if (omaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) { + log(LOG_NOTICE, "nd6_setmtu0: " + "new link MTU on %s (%lu) is too small for IPv6\n", + if_name(ifp), (unsigned long)ndi->maxmtu); + } + + if (ndi->maxmtu > V_in6_maxmtu) + in6_setmaxmtu(); /* check all interfaces just in case */ + +} + +void +nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts) +{ + + bzero(ndopts, sizeof(*ndopts)); + ndopts->nd_opts_search = (struct nd_opt_hdr *)opt; + ndopts->nd_opts_last + = (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len); + + if (icmp6len == 0) { + ndopts->nd_opts_done = 1; + ndopts->nd_opts_search = NULL; + } +} + +/* + * Take one ND option. + */ +struct nd_opt_hdr * +nd6_option(union nd_opts *ndopts) +{ + struct nd_opt_hdr *nd_opt; + int olen; + + if (ndopts == NULL) + panic("ndopts == NULL in nd6_option"); + if (ndopts->nd_opts_last == NULL) + panic("uninitialized ndopts in nd6_option"); + if (ndopts->nd_opts_search == NULL) + return NULL; + if (ndopts->nd_opts_done) + return NULL; + + nd_opt = ndopts->nd_opts_search; + + /* make sure nd_opt_len is inside the buffer */ + if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) { + bzero(ndopts, sizeof(*ndopts)); + return NULL; + } + + olen = nd_opt->nd_opt_len << 3; + if (olen == 0) { + /* + * Message validation requires that all included + * options have a length that is greater than zero. + */ + bzero(ndopts, sizeof(*ndopts)); + return NULL; + } + + ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen); + if (ndopts->nd_opts_search > ndopts->nd_opts_last) { + /* option overruns the end of buffer, invalid */ + bzero(ndopts, sizeof(*ndopts)); + return NULL; + } else if (ndopts->nd_opts_search == ndopts->nd_opts_last) { + /* reached the end of options chain */ + ndopts->nd_opts_done = 1; + ndopts->nd_opts_search = NULL; + } + return nd_opt; +} + +/* + * Parse multiple ND options. + * This function is much easier to use, for ND routines that do not need + * multiple options of the same type. + */ +int +nd6_options(union nd_opts *ndopts) +{ + struct nd_opt_hdr *nd_opt; + int i = 0; + + if (ndopts == NULL) + panic("ndopts == NULL in nd6_options"); + if (ndopts->nd_opts_last == NULL) + panic("uninitialized ndopts in nd6_options"); + if (ndopts->nd_opts_search == NULL) + return 0; + + while (1) { + nd_opt = nd6_option(ndopts); + if (nd_opt == NULL && ndopts->nd_opts_last == NULL) { + /* + * Message validation requires that all included + * options have a length that is greater than zero. + */ + ICMP6STAT_INC(icp6s_nd_badopt); + bzero(ndopts, sizeof(*ndopts)); + return -1; + } + + if (nd_opt == NULL) + goto skip1; + + switch (nd_opt->nd_opt_type) { + case ND_OPT_SOURCE_LINKADDR: + case ND_OPT_TARGET_LINKADDR: + case ND_OPT_MTU: + case ND_OPT_REDIRECTED_HEADER: + if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) { + nd6log((LOG_INFO, + "duplicated ND6 option found (type=%d)\n", + nd_opt->nd_opt_type)); + /* XXX bark? */ + } else { + ndopts->nd_opt_array[nd_opt->nd_opt_type] + = nd_opt; + } + break; + case ND_OPT_PREFIX_INFORMATION: + if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) { + ndopts->nd_opt_array[nd_opt->nd_opt_type] + = nd_opt; + } + ndopts->nd_opts_pi_end = + (struct nd_opt_prefix_info *)nd_opt; + break; + default: + /* + * Unknown options must be silently ignored, + * to accomodate future extension to the protocol. + */ + nd6log((LOG_DEBUG, + "nd6_options: unsupported option %d - " + "option ignored\n", nd_opt->nd_opt_type)); + } + +skip1: + i++; + if (i > V_nd6_maxndopt) { + ICMP6STAT_INC(icp6s_nd_toomanyopt); + nd6log((LOG_INFO, "too many loop in nd opt\n")); + break; + } + + if (ndopts->nd_opts_done) + break; + } + + return 0; +} + +/* + * ND6 timer routine to handle ND6 entries + */ +void +nd6_llinfo_settimer_locked(struct llentry *ln, long tick) +{ + int canceled; + + LLE_WLOCK_ASSERT(ln); + + if (tick < 0) { + ln->la_expire = 0; + ln->ln_ntick = 0; + canceled = callout_stop(&ln->ln_timer_ch); + } else { + ln->la_expire = time_second + tick / hz; + LLE_ADDREF(ln); + if (tick > INT_MAX) { + ln->ln_ntick = tick - INT_MAX; + canceled = callout_reset(&ln->ln_timer_ch, INT_MAX, + nd6_llinfo_timer, ln); + } else { + ln->ln_ntick = 0; + canceled = callout_reset(&ln->ln_timer_ch, tick, + nd6_llinfo_timer, ln); + } + } + if (canceled) + LLE_REMREF(ln); +} + +void +nd6_llinfo_settimer(struct llentry *ln, long tick) +{ + + LLE_WLOCK(ln); + nd6_llinfo_settimer_locked(ln, tick); + LLE_WUNLOCK(ln); +} + +static void +nd6_llinfo_timer(void *arg) +{ + struct llentry *ln; + struct in6_addr *dst; + struct ifnet *ifp; + struct nd_ifinfo *ndi = NULL; + + KASSERT(arg != NULL, ("%s: arg NULL", __func__)); + ln = (struct llentry *)arg; + LLE_WLOCK_ASSERT(ln); + ifp = ln->lle_tbl->llt_ifp; + + CURVNET_SET(ifp->if_vnet); + + if (ln->ln_ntick > 0) { + if (ln->ln_ntick > INT_MAX) { + ln->ln_ntick -= INT_MAX; + nd6_llinfo_settimer_locked(ln, INT_MAX); + } else { + ln->ln_ntick = 0; + nd6_llinfo_settimer_locked(ln, ln->ln_ntick); + } + goto done; + } + + ndi = ND_IFINFO(ifp); + dst = &L3_ADDR_SIN6(ln)->sin6_addr; + if (ln->la_flags & LLE_STATIC) { + goto done; + } + + if (ln->la_flags & LLE_DELETED) { + (void)nd6_free(ln, 0); + ln = NULL; + goto done; + } + + switch (ln->ln_state) { + case ND6_LLINFO_INCOMPLETE: + if (ln->la_asked < V_nd6_mmaxtries) { + ln->la_asked++; + nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000); + LLE_WUNLOCK(ln); + nd6_ns_output(ifp, NULL, dst, ln, 0); + LLE_WLOCK(ln); + } else { + struct mbuf *m = ln->la_hold; + if (m) { + struct mbuf *m0; + + /* + * assuming every packet in la_hold has the + * same IP header. Send error after unlock. + */ + m0 = m->m_nextpkt; + m->m_nextpkt = NULL; + ln->la_hold = m0; + clear_llinfo_pqueue(ln); + } + (void)nd6_free(ln, 0); + ln = NULL; + if (m != NULL) + icmp6_error2(m, ICMP6_DST_UNREACH, + ICMP6_DST_UNREACH_ADDR, 0, ifp); + } + break; + case ND6_LLINFO_REACHABLE: + if (!ND6_LLINFO_PERMANENT(ln)) { + ln->ln_state = ND6_LLINFO_STALE; + nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); + } + break; + + case ND6_LLINFO_STALE: + /* Garbage Collection(RFC 2461 5.3) */ + if (!ND6_LLINFO_PERMANENT(ln)) { + (void)nd6_free(ln, 1); + ln = NULL; + } + break; + + case ND6_LLINFO_DELAY: + if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) { + /* We need NUD */ + ln->la_asked = 1; + ln->ln_state = ND6_LLINFO_PROBE; + nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000); + LLE_WUNLOCK(ln); + nd6_ns_output(ifp, dst, dst, ln, 0); + LLE_WLOCK(ln); + } else { + ln->ln_state = ND6_LLINFO_STALE; /* XXX */ + nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); + } + break; + case ND6_LLINFO_PROBE: + if (ln->la_asked < V_nd6_umaxtries) { + ln->la_asked++; + nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000); + LLE_WUNLOCK(ln); + nd6_ns_output(ifp, dst, dst, ln, 0); + LLE_WLOCK(ln); + } else { + (void)nd6_free(ln, 0); + ln = NULL; + } + break; + default: + panic("%s: paths in a dark night can be confusing: %d", + __func__, ln->ln_state); + } +done: + if (ln != NULL) + LLE_FREE_LOCKED(ln); + CURVNET_RESTORE(); +} + + +/* + * ND6 timer routine to expire default route list and prefix list + */ +void +nd6_timer(void *arg) +{ + CURVNET_SET((struct vnet *) arg); + int s; + struct nd_defrouter *dr; + struct nd_prefix *pr; + struct in6_ifaddr *ia6, *nia6; + struct in6_addrlifetime *lt6; + + callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz, + nd6_timer, curvnet); + + /* expire default router list */ + s = splnet(); + dr = TAILQ_FIRST(&V_nd_defrouter); + while (dr) { + if (dr->expire && dr->expire < time_second) { + struct nd_defrouter *t; + t = TAILQ_NEXT(dr, dr_entry); + defrtrlist_del(dr); + dr = t; + } else { + dr = TAILQ_NEXT(dr, dr_entry); + } + } + + /* + * expire interface addresses. + * in the past the loop was inside prefix expiry processing. + * However, from a stricter speci-confrmance standpoint, we should + * rather separate address lifetimes and prefix lifetimes. + * + * XXXRW: in6_ifaddrhead locking. + */ + addrloop: + TAILQ_FOREACH_SAFE(ia6, &V_in6_ifaddrhead, ia_link, nia6) { + /* check address lifetime */ + lt6 = &ia6->ia6_lifetime; + if (IFA6_IS_INVALID(ia6)) { + int regen = 0; + + /* + * If the expiring address is temporary, try + * regenerating a new one. This would be useful when + * we suspended a laptop PC, then turned it on after a + * period that could invalidate all temporary + * addresses. Although we may have to restart the + * loop (see below), it must be after purging the + * address. Otherwise, we'd see an infinite loop of + * regeneration. + */ + if (V_ip6_use_tempaddr && + (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) { + if (regen_tmpaddr(ia6) == 0) + regen = 1; + } + + in6_purgeaddr(&ia6->ia_ifa); + + if (regen) + goto addrloop; /* XXX: see below */ + } else if (IFA6_IS_DEPRECATED(ia6)) { + int oldflags = ia6->ia6_flags; + + ia6->ia6_flags |= IN6_IFF_DEPRECATED; + + /* + * If a temporary address has just become deprecated, + * regenerate a new one if possible. + */ + if (V_ip6_use_tempaddr && + (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && + (oldflags & IN6_IFF_DEPRECATED) == 0) { + + if (regen_tmpaddr(ia6) == 0) { + /* + * A new temporary address is + * generated. + * XXX: this means the address chain + * has changed while we are still in + * the loop. Although the change + * would not cause disaster (because + * it's not a deletion, but an + * addition,) we'd rather restart the + * loop just for safety. Or does this + * significantly reduce performance?? + */ + goto addrloop; + } + } + } else { + /* + * A new RA might have made a deprecated address + * preferred. + */ + ia6->ia6_flags &= ~IN6_IFF_DEPRECATED; + } + } + + /* expire prefix list */ + pr = V_nd_prefix.lh_first; + while (pr) { + /* + * check prefix lifetime. + * since pltime is just for autoconf, pltime processing for + * prefix is not necessary. + */ + if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME && + time_second - pr->ndpr_lastupdate > pr->ndpr_vltime) { + struct nd_prefix *t; + t = pr->ndpr_next; + + /* + * address expiration and prefix expiration are + * separate. NEVER perform in6_purgeaddr here. + */ + + prelist_remove(pr); + pr = t; + } else + pr = pr->ndpr_next; + } + splx(s); + CURVNET_RESTORE(); +} + +/* + * ia6 - deprecated/invalidated temporary address + */ +static int +regen_tmpaddr(struct in6_ifaddr *ia6) +{ + struct ifaddr *ifa; + struct ifnet *ifp; + struct in6_ifaddr *public_ifa6 = NULL; + + ifp = ia6->ia_ifa.ifa_ifp; + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + struct in6_ifaddr *it6; + + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + + it6 = (struct in6_ifaddr *)ifa; + + /* ignore no autoconf addresses. */ + if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0) + continue; + + /* ignore autoconf addresses with different prefixes. */ + if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr) + continue; + + /* + * Now we are looking at an autoconf address with the same + * prefix as ours. If the address is temporary and is still + * preferred, do not create another one. It would be rare, but + * could happen, for example, when we resume a laptop PC after + * a long period. + */ + if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && + !IFA6_IS_DEPRECATED(it6)) { + public_ifa6 = NULL; + break; + } + + /* + * This is a public autoconf address that has the same prefix + * as ours. If it is preferred, keep it. We can't break the + * loop here, because there may be a still-preferred temporary + * address with the prefix. + */ + if (!IFA6_IS_DEPRECATED(it6)) + public_ifa6 = it6; + + if (public_ifa6 != NULL) + ifa_ref(&public_ifa6->ia_ifa); + } + IF_ADDR_UNLOCK(ifp); + + if (public_ifa6 != NULL) { + int e; + + if ((e = in6_tmpifadd(public_ifa6, 0, 0)) != 0) { + ifa_free(&public_ifa6->ia_ifa); + log(LOG_NOTICE, "regen_tmpaddr: failed to create a new" + " tmp addr,errno=%d\n", e); + return (-1); + } + ifa_free(&public_ifa6->ia_ifa); + return (0); + } + + return (-1); +} + +/* + * Nuke neighbor cache/prefix/default router management table, right before + * ifp goes away. + */ +void +nd6_purge(struct ifnet *ifp) +{ + struct nd_defrouter *dr, *ndr; + struct nd_prefix *pr, *npr; + + /* + * Nuke default router list entries toward ifp. + * We defer removal of default router list entries that is installed + * in the routing table, in order to keep additional side effects as + * small as possible. + */ + for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = ndr) { + ndr = TAILQ_NEXT(dr, dr_entry); + if (dr->installed) + continue; + + if (dr->ifp == ifp) + defrtrlist_del(dr); + } + + for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = ndr) { + ndr = TAILQ_NEXT(dr, dr_entry); + if (!dr->installed) + continue; + + if (dr->ifp == ifp) + defrtrlist_del(dr); + } + + /* Nuke prefix list entries toward ifp */ + for (pr = V_nd_prefix.lh_first; pr; pr = npr) { + npr = pr->ndpr_next; + if (pr->ndpr_ifp == ifp) { + /* + * Because if_detach() does *not* release prefixes + * while purging addresses the reference count will + * still be above zero. We therefore reset it to + * make sure that the prefix really gets purged. + */ + pr->ndpr_refcnt = 0; + + /* + * Previously, pr->ndpr_addr is removed as well, + * but I strongly believe we don't have to do it. + * nd6_purge() is only called from in6_ifdetach(), + * which removes all the associated interface addresses + * by itself. + * (jinmei@kame.net 20010129) + */ + prelist_remove(pr); + } + } + + /* cancel default outgoing interface setting */ + if (V_nd6_defifindex == ifp->if_index) + nd6_setdefaultiface(0); + + if (!V_ip6_forwarding && V_ip6_accept_rtadv) { /* XXX: too restrictive? */ + /* refresh default router list + * + * + */ + defrouter_select(); + + } + + /* XXXXX + * We do not nuke the neighbor cache entries here any more + * because the neighbor cache is kept in if_afdata[AF_INET6]. + * nd6_purge() is invoked by in6_ifdetach() which is called + * from if_detach() where everything gets purged. So let + * in6_domifdetach() do the actual L2 table purging work. + */ +} + +/* + * the caller acquires and releases the lock on the lltbls + * Returns the llentry locked + */ +struct llentry * +nd6_lookup(struct in6_addr *addr6, int flags, struct ifnet *ifp) +{ + struct sockaddr_in6 sin6; + struct llentry *ln; + int llflags; + + bzero(&sin6, sizeof(sin6)); + sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_family = AF_INET6; + sin6.sin6_addr = *addr6; + + IF_AFDATA_LOCK_ASSERT(ifp); + + llflags = 0; + if (flags & ND6_CREATE) + llflags |= LLE_CREATE; + if (flags & ND6_EXCLUSIVE) + llflags |= LLE_EXCLUSIVE; + + ln = lla_lookup(LLTABLE6(ifp), llflags, (struct sockaddr *)&sin6); + if ((ln != NULL) && (llflags & LLE_CREATE)) + ln->ln_state = ND6_LLINFO_NOSTATE; + + return (ln); +} + +/* + * Test whether a given IPv6 address is a neighbor or not, ignoring + * the actual neighbor cache. The neighbor cache is ignored in order + * to not reenter the routing code from within itself. + */ +static int +nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) +{ + struct nd_prefix *pr; + struct ifaddr *dstaddr; + + /* + * A link-local address is always a neighbor. + * XXX: a link does not necessarily specify a single interface. + */ + if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) { + struct sockaddr_in6 sin6_copy; + u_int32_t zone; + + /* + * We need sin6_copy since sa6_recoverscope() may modify the + * content (XXX). + */ + sin6_copy = *addr; + if (sa6_recoverscope(&sin6_copy)) + return (0); /* XXX: should be impossible */ + if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone)) + return (0); + if (sin6_copy.sin6_scope_id == zone) + return (1); + else + return (0); + } + + /* + * If the address matches one of our addresses, + * it should be a neighbor. + * If the address matches one of our on-link prefixes, it should be a + * neighbor. + */ + for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) { + if (pr->ndpr_ifp != ifp) + continue; + + if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) { + struct rtentry *rt; + rt = rtalloc1((struct sockaddr *)&pr->ndpr_prefix, 0, 0); + if (rt == NULL) + continue; + /* + * This is the case where multiple interfaces + * have the same prefix, but only one is installed + * into the routing table and that prefix entry + * is not the one being examined here. In the case + * where RADIX_MPATH is enabled, multiple route + * entries (of the same rt_key value) will be + * installed because the interface addresses all + * differ. + */ + if (!IN6_ARE_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr, + &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr)) { + RTFREE_LOCKED(rt); + continue; + } + RTFREE_LOCKED(rt); + } + + if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr, + &addr->sin6_addr, &pr->ndpr_mask)) + return (1); + } + + /* + * If the address is assigned on the node of the other side of + * a p2p interface, the address should be a neighbor. + */ + dstaddr = ifa_ifwithdstaddr((struct sockaddr *)addr); + if (dstaddr != NULL) { + if (dstaddr->ifa_ifp == ifp) { + ifa_free(dstaddr); + return (1); + } + ifa_free(dstaddr); + } + + /* + * If the default router list is empty, all addresses are regarded + * as on-link, and thus, as a neighbor. + * XXX: we restrict the condition to hosts, because routers usually do + * not have the "default router list". + */ + if (!V_ip6_forwarding && TAILQ_FIRST(&V_nd_defrouter) == NULL && + V_nd6_defifindex == ifp->if_index) { + return (1); + } + + return (0); +} + + +/* + * Detect if a given IPv6 address identifies a neighbor on a given link. + * XXX: should take care of the destination of a p2p link? + */ +int +nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) +{ + struct llentry *lle; + int rc = 0; + + IF_AFDATA_UNLOCK_ASSERT(ifp); + if (nd6_is_new_addr_neighbor(addr, ifp)) + return (1); + + /* + * Even if the address matches none of our addresses, it might be + * in the neighbor cache. + */ + IF_AFDATA_LOCK(ifp); + if ((lle = nd6_lookup(&addr->sin6_addr, 0, ifp)) != NULL) { + LLE_RUNLOCK(lle); + rc = 1; + } + IF_AFDATA_UNLOCK(ifp); + return (rc); +} + +/* + * Free an nd6 llinfo entry. + * Since the function would cause significant changes in the kernel, DO NOT + * make it global, unless you have a strong reason for the change, and are sure + * that the change is safe. + */ +static struct llentry * +nd6_free(struct llentry *ln, int gc) +{ + struct llentry *next; + struct nd_defrouter *dr; + struct ifnet *ifp; + + LLE_WLOCK_ASSERT(ln); + + /* + * we used to have pfctlinput(PRC_HOSTDEAD) here. + * even though it is not harmful, it was not really necessary. + */ + + /* cancel timer */ + nd6_llinfo_settimer_locked(ln, -1); + + ifp = ln->lle_tbl->llt_ifp; + + if (!V_ip6_forwarding) { + + dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ifp); + + if (dr != NULL && dr->expire && + ln->ln_state == ND6_LLINFO_STALE && gc) { + /* + * If the reason for the deletion is just garbage + * collection, and the neighbor is an active default + * router, do not delete it. Instead, reset the GC + * timer using the router's lifetime. + * Simply deleting the entry would affect default + * router selection, which is not necessarily a good + * thing, especially when we're using router preference + * values. + * XXX: the check for ln_state would be redundant, + * but we intentionally keep it just in case. + */ + if (dr->expire > time_second) + nd6_llinfo_settimer_locked(ln, + (dr->expire - time_second) * hz); + else + nd6_llinfo_settimer_locked(ln, + (long)V_nd6_gctimer * hz); + + next = LIST_NEXT(ln, lle_next); + LLE_REMREF(ln); + LLE_WUNLOCK(ln); + return (next); + } + + if (dr) { + /* + * Unreachablity of a router might affect the default + * router selection and on-link detection of advertised + * prefixes. + */ + + /* + * Temporarily fake the state to choose a new default + * router and to perform on-link determination of + * prefixes correctly. + * Below the state will be set correctly, + * or the entry itself will be deleted. + */ + ln->ln_state = ND6_LLINFO_INCOMPLETE; + } + + if (ln->ln_router || dr) { + + /* + * We need to unlock to avoid a LOR with rt6_flush() with the + * rnh and for the calls to pfxlist_onlink_check() and + * defrouter_select() in the block further down for calls + * into nd6_lookup(). We still hold a ref. + */ + LLE_WUNLOCK(ln); + + /* + * rt6_flush must be called whether or not the neighbor + * is in the Default Router List. + * See a corresponding comment in nd6_na_input(). + */ + rt6_flush(&L3_ADDR_SIN6(ln)->sin6_addr, ifp); + } + + if (dr) { + /* + * Since defrouter_select() does not affect the + * on-link determination and MIP6 needs the check + * before the default router selection, we perform + * the check now. + */ + pfxlist_onlink_check(); + + /* + * Refresh default router list. + */ + defrouter_select(); + } + + if (ln->ln_router || dr) + LLE_WLOCK(ln); + } + + /* + * Before deleting the entry, remember the next entry as the + * return value. We need this because pfxlist_onlink_check() above + * might have freed other entries (particularly the old next entry) as + * a side effect (XXX). + */ + next = LIST_NEXT(ln, lle_next); + + /* + * Save to unlock. We still hold an extra reference and will not + * free(9) in llentry_free() if someone else holds one as well. + */ + LLE_WUNLOCK(ln); + IF_AFDATA_LOCK(ifp); + LLE_WLOCK(ln); + LLE_REMREF(ln); + llentry_free(ln); + IF_AFDATA_UNLOCK(ifp); + + return (next); +} + +/* + * Upper-layer reachability hint for Neighbor Unreachability Detection. + * + * XXX cost-effective methods? + */ +void +nd6_nud_hint(struct rtentry *rt, struct in6_addr *dst6, int force) +{ + struct llentry *ln; + struct ifnet *ifp; + + if ((dst6 == NULL) || (rt == NULL)) + return; + + ifp = rt->rt_ifp; + IF_AFDATA_LOCK(ifp); + ln = nd6_lookup(dst6, ND6_EXCLUSIVE, NULL); + IF_AFDATA_UNLOCK(ifp); + if (ln == NULL) + return; + + if (ln->ln_state < ND6_LLINFO_REACHABLE) + goto done; + + /* + * if we get upper-layer reachability confirmation many times, + * it is possible we have false information. + */ + if (!force) { + ln->ln_byhint++; + if (ln->ln_byhint > V_nd6_maxnudhint) { + goto done; + } + } + + ln->ln_state = ND6_LLINFO_REACHABLE; + if (!ND6_LLINFO_PERMANENT(ln)) { + nd6_llinfo_settimer_locked(ln, + (long)ND_IFINFO(rt->rt_ifp)->reachable * hz); + } +done: + LLE_WUNLOCK(ln); +} + + +int +nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) +{ + struct in6_drlist *drl = (struct in6_drlist *)data; + struct in6_oprlist *oprl = (struct in6_oprlist *)data; + struct in6_ndireq *ndi = (struct in6_ndireq *)data; + struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data; + struct in6_ndifreq *ndif = (struct in6_ndifreq *)data; + struct nd_defrouter *dr; + struct nd_prefix *pr; + int i = 0, error = 0; + int s; + + switch (cmd) { + case SIOCGDRLST_IN6: + /* + * obsolete API, use sysctl under net.inet6.icmp6 + */ + bzero(drl, sizeof(*drl)); + s = splnet(); + dr = TAILQ_FIRST(&V_nd_defrouter); + while (dr && i < DRLSTSIZ) { + drl->defrouter[i].rtaddr = dr->rtaddr; + in6_clearscope(&drl->defrouter[i].rtaddr); + + drl->defrouter[i].flags = dr->flags; + drl->defrouter[i].rtlifetime = dr->rtlifetime; + drl->defrouter[i].expire = dr->expire; + drl->defrouter[i].if_index = dr->ifp->if_index; + i++; + dr = TAILQ_NEXT(dr, dr_entry); + } + splx(s); + break; + case SIOCGPRLST_IN6: + /* + * obsolete API, use sysctl under net.inet6.icmp6 + * + * XXX the structure in6_prlist was changed in backward- + * incompatible manner. in6_oprlist is used for SIOCGPRLST_IN6, + * in6_prlist is used for nd6_sysctl() - fill_prlist(). + */ + /* + * XXX meaning of fields, especialy "raflags", is very + * differnet between RA prefix list and RR/static prefix list. + * how about separating ioctls into two? + */ + bzero(oprl, sizeof(*oprl)); + s = splnet(); + pr = V_nd_prefix.lh_first; + while (pr && i < PRLSTSIZ) { + struct nd_pfxrouter *pfr; + int j; + + oprl->prefix[i].prefix = pr->ndpr_prefix.sin6_addr; + oprl->prefix[i].raflags = pr->ndpr_raf; + oprl->prefix[i].prefixlen = pr->ndpr_plen; + oprl->prefix[i].vltime = pr->ndpr_vltime; + oprl->prefix[i].pltime = pr->ndpr_pltime; + oprl->prefix[i].if_index = pr->ndpr_ifp->if_index; + if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME) + oprl->prefix[i].expire = 0; + else { + time_t maxexpire; + + /* XXX: we assume time_t is signed. */ + maxexpire = (-1) & + ~((time_t)1 << + ((sizeof(maxexpire) * 8) - 1)); + if (pr->ndpr_vltime < + maxexpire - pr->ndpr_lastupdate) { + oprl->prefix[i].expire = + pr->ndpr_lastupdate + + pr->ndpr_vltime; + } else + oprl->prefix[i].expire = maxexpire; + } + + pfr = pr->ndpr_advrtrs.lh_first; + j = 0; + while (pfr) { + if (j < DRLSTSIZ) { +#define RTRADDR oprl->prefix[i].advrtr[j] + RTRADDR = pfr->router->rtaddr; + in6_clearscope(&RTRADDR); +#undef RTRADDR + } + j++; + pfr = pfr->pfr_next; + } + oprl->prefix[i].advrtrs = j; + oprl->prefix[i].origin = PR_ORIG_RA; + + i++; + pr = pr->ndpr_next; + } + splx(s); + + break; + case OSIOCGIFINFO_IN6: +#define ND ndi->ndi + /* XXX: old ndp(8) assumes a positive value for linkmtu. */ + bzero(&ND, sizeof(ND)); + ND.linkmtu = IN6_LINKMTU(ifp); + ND.maxmtu = ND_IFINFO(ifp)->maxmtu; + ND.basereachable = ND_IFINFO(ifp)->basereachable; + ND.reachable = ND_IFINFO(ifp)->reachable; + ND.retrans = ND_IFINFO(ifp)->retrans; + ND.flags = ND_IFINFO(ifp)->flags; + ND.recalctm = ND_IFINFO(ifp)->recalctm; + ND.chlim = ND_IFINFO(ifp)->chlim; + break; + case SIOCGIFINFO_IN6: + ND = *ND_IFINFO(ifp); + break; + case SIOCSIFINFO_IN6: + /* + * used to change host variables from userland. + * intented for a use on router to reflect RA configurations. + */ + /* 0 means 'unspecified' */ + if (ND.linkmtu != 0) { + if (ND.linkmtu < IPV6_MMTU || + ND.linkmtu > IN6_LINKMTU(ifp)) { + error = EINVAL; + break; + } + ND_IFINFO(ifp)->linkmtu = ND.linkmtu; + } + + if (ND.basereachable != 0) { + int obasereachable = ND_IFINFO(ifp)->basereachable; + + ND_IFINFO(ifp)->basereachable = ND.basereachable; + if (ND.basereachable != obasereachable) + ND_IFINFO(ifp)->reachable = + ND_COMPUTE_RTIME(ND.basereachable); + } + if (ND.retrans != 0) + ND_IFINFO(ifp)->retrans = ND.retrans; + if (ND.chlim != 0) + ND_IFINFO(ifp)->chlim = ND.chlim; + /* FALLTHROUGH */ + case SIOCSIFINFO_FLAGS: + ND_IFINFO(ifp)->flags = ND.flags; + break; +#undef ND + case SIOCSNDFLUSH_IN6: /* XXX: the ioctl name is confusing... */ + /* sync kernel routing table with the default router list */ + defrouter_reset(); + defrouter_select(); + break; + case SIOCSPFXFLUSH_IN6: + { + /* flush all the prefix advertised by routers */ + struct nd_prefix *pr, *next; + + s = splnet(); + for (pr = V_nd_prefix.lh_first; pr; pr = next) { + struct in6_ifaddr *ia, *ia_next; + + next = pr->ndpr_next; + + if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) + continue; /* XXX */ + + /* do we really have to remove addresses as well? */ + /* XXXRW: in6_ifaddrhead locking. */ + TAILQ_FOREACH_SAFE(ia, &V_in6_ifaddrhead, ia_link, + ia_next) { + if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0) + continue; + + if (ia->ia6_ndpr == pr) + in6_purgeaddr(&ia->ia_ifa); + } + prelist_remove(pr); + } + splx(s); + break; + } + case SIOCSRTRFLUSH_IN6: + { + /* flush all the default routers */ + struct nd_defrouter *dr, *next; + + s = splnet(); + defrouter_reset(); + for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = next) { + next = TAILQ_NEXT(dr, dr_entry); + defrtrlist_del(dr); + } + defrouter_select(); + splx(s); + break; + } + case SIOCGNBRINFO_IN6: + { + struct llentry *ln; + struct in6_addr nb_addr = nbi->addr; /* make local for safety */ + + if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0) + return (error); + + IF_AFDATA_LOCK(ifp); + ln = nd6_lookup(&nb_addr, 0, ifp); + IF_AFDATA_UNLOCK(ifp); + + if (ln == NULL) { + error = EINVAL; + break; + } + nbi->state = ln->ln_state; + nbi->asked = ln->la_asked; + nbi->isrouter = ln->ln_router; + nbi->expire = ln->la_expire; + LLE_RUNLOCK(ln); + break; + } + case SIOCGDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */ + ndif->ifindex = V_nd6_defifindex; + break; + case SIOCSDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */ + return (nd6_setdefaultiface(ndif->ifindex)); + } + return (error); +} + +/* + * Create neighbor cache entry and cache link-layer address, + * on reception of inbound ND6 packets. (RS/RA/NS/redirect) + * + * type - ICMP6 type + * code - type dependent information + * + * XXXXX + * The caller of this function already acquired the ndp + * cache table lock because the cache entry is returned. + */ +struct llentry * +nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, + int lladdrlen, int type, int code) +{ + struct llentry *ln = NULL; + int is_newentry; + int do_update; + int olladdr; + int llchange; + int flags; + int newstate = 0; + uint16_t router = 0; + struct sockaddr_in6 sin6; + struct mbuf *chain = NULL; + int static_route = 0; + + IF_AFDATA_UNLOCK_ASSERT(ifp); + + if (ifp == NULL) + panic("ifp == NULL in nd6_cache_lladdr"); + if (from == NULL) + panic("from == NULL in nd6_cache_lladdr"); + + /* nothing must be updated for unspecified address */ + if (IN6_IS_ADDR_UNSPECIFIED(from)) + return NULL; + + /* + * Validation about ifp->if_addrlen and lladdrlen must be done in + * the caller. + * + * XXX If the link does not have link-layer adderss, what should + * we do? (ifp->if_addrlen == 0) + * Spec says nothing in sections for RA, RS and NA. There's small + * description on it in NS section (RFC 2461 7.2.3). + */ + flags = lladdr ? ND6_EXCLUSIVE : 0; + IF_AFDATA_LOCK(ifp); + ln = nd6_lookup(from, flags, ifp); + + if (ln == NULL) { + flags |= ND6_EXCLUSIVE; + ln = nd6_lookup(from, flags | ND6_CREATE, ifp); + IF_AFDATA_UNLOCK(ifp); + is_newentry = 1; + } else { + IF_AFDATA_UNLOCK(ifp); + /* do nothing if static ndp is set */ + if (ln->la_flags & LLE_STATIC) { + static_route = 1; + goto done; + } + is_newentry = 0; + } + if (ln == NULL) + return (NULL); + + olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0; + if (olladdr && lladdr) { + llchange = bcmp(lladdr, &ln->ll_addr, + ifp->if_addrlen); + } else + llchange = 0; + + /* + * newentry olladdr lladdr llchange (*=record) + * 0 n n -- (1) + * 0 y n -- (2) + * 0 n y -- (3) * STALE + * 0 y y n (4) * + * 0 y y y (5) * STALE + * 1 -- n -- (6) NOSTATE(= PASSIVE) + * 1 -- y -- (7) * STALE + */ + + if (lladdr) { /* (3-5) and (7) */ + /* + * Record source link-layer address + * XXX is it dependent to ifp->if_type? + */ + bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen); + ln->la_flags |= LLE_VALID; + } + + if (!is_newentry) { + if ((!olladdr && lladdr != NULL) || /* (3) */ + (olladdr && lladdr != NULL && llchange)) { /* (5) */ + do_update = 1; + newstate = ND6_LLINFO_STALE; + } else /* (1-2,4) */ + do_update = 0; + } else { + do_update = 1; + if (lladdr == NULL) /* (6) */ + newstate = ND6_LLINFO_NOSTATE; + else /* (7) */ + newstate = ND6_LLINFO_STALE; + } + + if (do_update) { + /* + * Update the state of the neighbor cache. + */ + ln->ln_state = newstate; + + if (ln->ln_state == ND6_LLINFO_STALE) { + /* + * XXX: since nd6_output() below will cause + * state tansition to DELAY and reset the timer, + * we must set the timer now, although it is actually + * meaningless. + */ + nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); + + if (ln->la_hold) { + struct mbuf *m_hold, *m_hold_next; + + /* + * reset the la_hold in advance, to explicitly + * prevent a la_hold lookup in nd6_output() + * (wouldn't happen, though...) + */ + for (m_hold = ln->la_hold, ln->la_hold = NULL; + m_hold; m_hold = m_hold_next) { + m_hold_next = m_hold->m_nextpkt; + m_hold->m_nextpkt = NULL; + + /* + * we assume ifp is not a p2p here, so + * just set the 2nd argument as the + * 1st one. + */ + nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain); + } + /* + * If we have mbufs in the chain we need to do + * deferred transmit. Copy the address from the + * llentry before dropping the lock down below. + */ + if (chain != NULL) + memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6)); + } + } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { + /* probe right away */ + nd6_llinfo_settimer_locked((void *)ln, 0); + } + } + + /* + * ICMP6 type dependent behavior. + * + * NS: clear IsRouter if new entry + * RS: clear IsRouter + * RA: set IsRouter if there's lladdr + * redir: clear IsRouter if new entry + * + * RA case, (1): + * The spec says that we must set IsRouter in the following cases: + * - If lladdr exist, set IsRouter. This means (1-5). + * - If it is old entry (!newentry), set IsRouter. This means (7). + * So, based on the spec, in (1-5) and (7) cases we must set IsRouter. + * A quetion arises for (1) case. (1) case has no lladdr in the + * neighbor cache, this is similar to (6). + * This case is rare but we figured that we MUST NOT set IsRouter. + * + * newentry olladdr lladdr llchange NS RS RA redir + * D R + * 0 n n -- (1) c ? s + * 0 y n -- (2) c s s + * 0 n y -- (3) c s s + * 0 y y n (4) c s s + * 0 y y y (5) c s s + * 1 -- n -- (6) c c c s + * 1 -- y -- (7) c c s c s + * + * (c=clear s=set) + */ + switch (type & 0xff) { + case ND_NEIGHBOR_SOLICIT: + /* + * New entry must have is_router flag cleared. + */ + if (is_newentry) /* (6-7) */ + ln->ln_router = 0; + break; + case ND_REDIRECT: + /* + * If the icmp is a redirect to a better router, always set the + * is_router flag. Otherwise, if the entry is newly created, + * clear the flag. [RFC 2461, sec 8.3] + */ + if (code == ND_REDIRECT_ROUTER) + ln->ln_router = 1; + else if (is_newentry) /* (6-7) */ + ln->ln_router = 0; + break; + case ND_ROUTER_SOLICIT: + /* + * is_router flag must always be cleared. + */ + ln->ln_router = 0; + break; + case ND_ROUTER_ADVERT: + /* + * Mark an entry with lladdr as a router. + */ + if ((!is_newentry && (olladdr || lladdr)) || /* (2-5) */ + (is_newentry && lladdr)) { /* (7) */ + ln->ln_router = 1; + } + break; + } + + if (ln != NULL) { + static_route = (ln->la_flags & LLE_STATIC); + router = ln->ln_router; + + if (flags & ND6_EXCLUSIVE) + LLE_WUNLOCK(ln); + else + LLE_RUNLOCK(ln); + if (static_route) + ln = NULL; + } + if (chain) + nd6_output_flush(ifp, ifp, chain, &sin6, NULL); + + /* + * When the link-layer address of a router changes, select the + * best router again. In particular, when the neighbor entry is newly + * created, it might affect the selection policy. + * Question: can we restrict the first condition to the "is_newentry" + * case? + * XXX: when we hear an RA from a new router with the link-layer + * address option, defrouter_select() is called twice, since + * defrtrlist_update called the function as well. However, I believe + * we can compromise the overhead, since it only happens the first + * time. + * XXX: although defrouter_select() should not have a bad effect + * for those are not autoconfigured hosts, we explicitly avoid such + * cases for safety. + */ + if (do_update && router && !V_ip6_forwarding && V_ip6_accept_rtadv) { + /* + * guaranteed recursion + */ + defrouter_select(); + } + + return (ln); +done: + if (ln != NULL) { + if (flags & ND6_EXCLUSIVE) + LLE_WUNLOCK(ln); + else + LLE_RUNLOCK(ln); + if (static_route) + ln = NULL; + } + return (ln); +} + +static void +nd6_slowtimo(void *arg) +{ + CURVNET_SET((struct vnet *) arg); + struct nd_ifinfo *nd6if; + struct ifnet *ifp; + + callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, + nd6_slowtimo, curvnet); + IFNET_RLOCK_NOSLEEP(); + for (ifp = TAILQ_FIRST(&V_ifnet); ifp; + ifp = TAILQ_NEXT(ifp, if_list)) { + nd6if = ND_IFINFO(ifp); + if (nd6if->basereachable && /* already initialized */ + (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) { + /* + * Since reachable time rarely changes by router + * advertisements, we SHOULD insure that a new random + * value gets recomputed at least once every few hours. + * (RFC 2461, 6.3.4) + */ + nd6if->recalctm = V_nd6_recalc_reachtm_interval; + nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable); + } + } + IFNET_RUNLOCK_NOSLEEP(); + CURVNET_RESTORE(); +} + +int +nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, + struct sockaddr_in6 *dst, struct rtentry *rt0) +{ + + return (nd6_output_lle(ifp, origifp, m0, dst, rt0, NULL, NULL)); +} + + +/* + * Note that I'm not enforcing any global serialization + * lle state or asked changes here as the logic is too + * complicated to avoid having to always acquire an exclusive + * lock + * KMM + * + */ +#define senderr(e) { error = (e); goto bad;} + +int +nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, + struct sockaddr_in6 *dst, struct rtentry *rt0, struct llentry *lle, + struct mbuf **chain) +{ + struct mbuf *m = m0; + struct llentry *ln = lle; + int error = 0; + int flags = 0; + +#ifdef INVARIANTS + if (lle != NULL) { + + LLE_WLOCK_ASSERT(lle); + + KASSERT(chain != NULL, (" lle locked but no mbuf chain pointer passed")); + } +#endif + if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr)) + goto sendpkt; + + if (nd6_need_cache(ifp) == 0) + goto sendpkt; + + /* + * next hop determination. This routine is derived from ether_output. + */ + + /* + * Address resolution or Neighbor Unreachability Detection + * for the next hop. + * At this point, the destination of the packet must be a unicast + * or an anycast address(i.e. not a multicast). + */ + + flags = ((m != NULL) || (lle != NULL)) ? LLE_EXCLUSIVE : 0; + if (ln == NULL) { + retry: + IF_AFDATA_LOCK(ifp); + ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)dst); + IF_AFDATA_UNLOCK(ifp); + if ((ln == NULL) && nd6_is_addr_neighbor(dst, ifp)) { + /* + * Since nd6_is_addr_neighbor() internally calls nd6_lookup(), + * the condition below is not very efficient. But we believe + * it is tolerable, because this should be a rare case. + */ + flags = ND6_CREATE | (m ? ND6_EXCLUSIVE : 0); + IF_AFDATA_LOCK(ifp); + ln = nd6_lookup(&dst->sin6_addr, flags, ifp); + IF_AFDATA_UNLOCK(ifp); + } + } + if (ln == NULL) { + if ((ifp->if_flags & IFF_POINTOPOINT) == 0 && + !(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) { + char ip6buf[INET6_ADDRSTRLEN]; + log(LOG_DEBUG, + "nd6_output: can't allocate llinfo for %s " + "(ln=%p)\n", + ip6_sprintf(ip6buf, &dst->sin6_addr), ln); + senderr(EIO); /* XXX: good error? */ + } + goto sendpkt; /* send anyway */ + } + + /* We don't have to do link-layer address resolution on a p2p link. */ + if ((ifp->if_flags & IFF_POINTOPOINT) != 0 && + ln->ln_state < ND6_LLINFO_REACHABLE) { + if ((flags & LLE_EXCLUSIVE) == 0) { + flags |= LLE_EXCLUSIVE; + goto retry; + } + ln->ln_state = ND6_LLINFO_STALE; + nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); + } + + /* + * The first time we send a packet to a neighbor whose entry is + * STALE, we have to change the state to DELAY and a sets a timer to + * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do + * neighbor unreachability detection on expiration. + * (RFC 2461 7.3.3) + */ + if (ln->ln_state == ND6_LLINFO_STALE) { + if ((flags & LLE_EXCLUSIVE) == 0) { + flags |= LLE_EXCLUSIVE; + LLE_RUNLOCK(ln); + goto retry; + } + ln->la_asked = 0; + ln->ln_state = ND6_LLINFO_DELAY; + nd6_llinfo_settimer_locked(ln, (long)V_nd6_delay * hz); + } + + /* + * If the neighbor cache entry has a state other than INCOMPLETE + * (i.e. its link-layer address is already resolved), just + * send the packet. + */ + if (ln->ln_state > ND6_LLINFO_INCOMPLETE) + goto sendpkt; + + /* + * There is a neighbor cache entry, but no ethernet address + * response yet. Append this latest packet to the end of the + * packet queue in the mbuf, unless the number of the packet + * does not exceed nd6_maxqueuelen. When it exceeds nd6_maxqueuelen, + * the oldest packet in the queue will be removed. + */ + if (ln->ln_state == ND6_LLINFO_NOSTATE) + ln->ln_state = ND6_LLINFO_INCOMPLETE; + + if ((flags & LLE_EXCLUSIVE) == 0) { + flags |= LLE_EXCLUSIVE; + LLE_RUNLOCK(ln); + goto retry; + } + + LLE_WLOCK_ASSERT(ln); + + if (ln->la_hold) { + struct mbuf *m_hold; + int i; + + i = 0; + for (m_hold = ln->la_hold; m_hold; m_hold = m_hold->m_nextpkt) { + i++; + if (m_hold->m_nextpkt == NULL) { + m_hold->m_nextpkt = m; + break; + } + } + while (i >= V_nd6_maxqueuelen) { + m_hold = ln->la_hold; + ln->la_hold = ln->la_hold->m_nextpkt; + m_freem(m_hold); + i--; + } + } else { + ln->la_hold = m; + } + + /* + * If there has been no NS for the neighbor after entering the + * INCOMPLETE state, send the first solicitation. + */ + if (!ND6_LLINFO_PERMANENT(ln) && ln->la_asked == 0) { + ln->la_asked++; + + nd6_llinfo_settimer_locked(ln, + (long)ND_IFINFO(ifp)->retrans * hz / 1000); + LLE_WUNLOCK(ln); + nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0); + if (lle != NULL && ln == lle) + LLE_WLOCK(lle); + + } else if (lle == NULL || ln != lle) { + /* + * We did the lookup (no lle arg) so we + * need to do the unlock here. + */ + LLE_WUNLOCK(ln); + } + + return (0); + + sendpkt: + /* discard the packet if IPv6 operation is disabled on the interface */ + if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) { + error = ENETDOWN; /* better error? */ + goto bad; + } + /* + * ln is valid and the caller did not pass in + * an llentry + */ + if ((ln != NULL) && (lle == NULL)) { + if (flags & LLE_EXCLUSIVE) + LLE_WUNLOCK(ln); + else + LLE_RUNLOCK(ln); + } + +#ifdef MAC + mac_netinet6_nd6_send(ifp, m); +#endif + /* + * We were passed in a pointer to an lle with the lock held + * this means that we can't call if_output as we will + * recurse on the lle lock - so what we do is we create + * a list of mbufs to send and transmit them in the caller + * after the lock is dropped + */ + if (lle != NULL) { + if (*chain == NULL) + *chain = m; + else { + struct mbuf *m = *chain; + + /* + * append mbuf to end of deferred chain + */ + while (m->m_nextpkt != NULL) + m = m->m_nextpkt; + m->m_nextpkt = m; + } + return (error); + } + if ((ifp->if_flags & IFF_LOOPBACK) != 0) { + return ((*ifp->if_output)(origifp, m, (struct sockaddr *)dst, + NULL)); + } + error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, NULL); + return (error); + + bad: + /* + * ln is valid and the caller did not pass in + * an llentry + */ + if ((ln != NULL) && (lle == NULL)) { + if (flags & LLE_EXCLUSIVE) + LLE_WUNLOCK(ln); + else + LLE_RUNLOCK(ln); + } + if (m) + m_freem(m); + return (error); +} +#undef senderr + + +int +nd6_output_flush(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain, + struct sockaddr_in6 *dst, struct route *ro) +{ + struct mbuf *m, *m_head; + struct ifnet *outifp; + int error = 0; + + m_head = chain; + if ((ifp->if_flags & IFF_LOOPBACK) != 0) + outifp = origifp; + else + outifp = ifp; + + while (m_head) { + m = m_head; + m_head = m_head->m_nextpkt; + error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, ro); + } + + /* + * XXX + * note that intermediate errors are blindly ignored - but this is + * the same convention as used with nd6_output when called by + * nd6_cache_lladdr + */ + return (error); +} + + +int +nd6_need_cache(struct ifnet *ifp) +{ + /* + * XXX: we currently do not make neighbor cache on any interface + * other than ARCnet, Ethernet, FDDI and GIF. + * + * RFC2893 says: + * - unidirectional tunnels needs no ND + */ + switch (ifp->if_type) { + case IFT_ARCNET: + case IFT_ETHER: + case IFT_FDDI: + case IFT_IEEE1394: +#ifdef IFT_L2VLAN + case IFT_L2VLAN: +#endif +#ifdef IFT_IEEE80211 + case IFT_IEEE80211: +#endif +#ifdef IFT_CARP + case IFT_CARP: +#endif + case IFT_GIF: /* XXX need more cases? */ + case IFT_PPP: + case IFT_TUNNEL: + case IFT_BRIDGE: + case IFT_PROPVIRTUAL: + return (1); + default: + return (0); + } +} + +/* + * the callers of this function need to be re-worked to drop + * the lle lock, drop here for now + */ +int +nd6_storelladdr(struct ifnet *ifp, struct mbuf *m, + struct sockaddr *dst, u_char *desten, struct llentry **lle) +{ + struct llentry *ln; + + *lle = NULL; + IF_AFDATA_UNLOCK_ASSERT(ifp); + if (m->m_flags & M_MCAST) { + int i; + + switch (ifp->if_type) { + case IFT_ETHER: + case IFT_FDDI: +#ifdef IFT_L2VLAN + case IFT_L2VLAN: +#endif +#ifdef IFT_IEEE80211 + case IFT_IEEE80211: +#endif + case IFT_BRIDGE: + case IFT_ISO88025: + ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr, + desten); + return (0); + case IFT_IEEE1394: + /* + * netbsd can use if_broadcastaddr, but we don't do so + * to reduce # of ifdef. + */ + for (i = 0; i < ifp->if_addrlen; i++) + desten[i] = ~0; + return (0); + case IFT_ARCNET: + *desten = 0; + return (0); + default: + m_freem(m); + return (EAFNOSUPPORT); + } + } + + + /* + * the entry should have been created in nd6_store_lladdr + */ + IF_AFDATA_LOCK(ifp); + ln = lla_lookup(LLTABLE6(ifp), 0, dst); + IF_AFDATA_UNLOCK(ifp); + if ((ln == NULL) || !(ln->la_flags & LLE_VALID)) { + if (ln != NULL) + LLE_RUNLOCK(ln); + /* this could happen, if we could not allocate memory */ + m_freem(m); + return (1); + } + + bcopy(&ln->ll_addr, desten, ifp->if_addrlen); + *lle = ln; + LLE_RUNLOCK(ln); + /* + * A *small* use after free race exists here + */ + return (0); +} + +static void +clear_llinfo_pqueue(struct llentry *ln) +{ + struct mbuf *m_hold, *m_hold_next; + + for (m_hold = ln->la_hold; m_hold; m_hold = m_hold_next) { + m_hold_next = m_hold->m_nextpkt; + m_hold->m_nextpkt = NULL; + m_freem(m_hold); + } + + ln->la_hold = NULL; + return; +} + +static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS); +static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS); +#ifdef SYSCTL_DECL +SYSCTL_DECL(_net_inet6_icmp6); +#endif +SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist, + CTLFLAG_RD, nd6_sysctl_drlist, ""); +SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist, + CTLFLAG_RD, nd6_sysctl_prlist, ""); +SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen, + CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, ""); + +static int +nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS) +{ + int error; + char buf[1024] __aligned(4); + struct in6_defrouter *d, *de; + struct nd_defrouter *dr; + + if (req->newptr) + return EPERM; + error = 0; + + for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; + dr = TAILQ_NEXT(dr, dr_entry)) { + d = (struct in6_defrouter *)buf; + de = (struct in6_defrouter *)(buf + sizeof(buf)); + + if (d + 1 <= de) { + bzero(d, sizeof(*d)); + d->rtaddr.sin6_family = AF_INET6; + d->rtaddr.sin6_len = sizeof(d->rtaddr); + d->rtaddr.sin6_addr = dr->rtaddr; + error = sa6_recoverscope(&d->rtaddr); + if (error != 0) + return (error); + d->flags = dr->flags; + d->rtlifetime = dr->rtlifetime; + d->expire = dr->expire; + d->if_index = dr->ifp->if_index; + } else + panic("buffer too short"); + + error = SYSCTL_OUT(req, buf, sizeof(*d)); + if (error) + break; + } + + return (error); +} + +static int +nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS) +{ + int error; + char buf[1024] __aligned(4); + struct in6_prefix *p, *pe; + struct nd_prefix *pr; + char ip6buf[INET6_ADDRSTRLEN]; + + if (req->newptr) + return EPERM; + error = 0; + + for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) { + u_short advrtrs; + size_t advance; + struct sockaddr_in6 *sin6, *s6; + struct nd_pfxrouter *pfr; + + p = (struct in6_prefix *)buf; + pe = (struct in6_prefix *)(buf + sizeof(buf)); + + if (p + 1 <= pe) { + bzero(p, sizeof(*p)); + sin6 = (struct sockaddr_in6 *)(p + 1); + + p->prefix = pr->ndpr_prefix; + if (sa6_recoverscope(&p->prefix)) { + log(LOG_ERR, + "scope error in prefix list (%s)\n", + ip6_sprintf(ip6buf, &p->prefix.sin6_addr)); + /* XXX: press on... */ + } + p->raflags = pr->ndpr_raf; + p->prefixlen = pr->ndpr_plen; + p->vltime = pr->ndpr_vltime; + p->pltime = pr->ndpr_pltime; + p->if_index = pr->ndpr_ifp->if_index; + if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME) + p->expire = 0; + else { + time_t maxexpire; + + /* XXX: we assume time_t is signed. */ + maxexpire = (-1) & + ~((time_t)1 << + ((sizeof(maxexpire) * 8) - 1)); + if (pr->ndpr_vltime < + maxexpire - pr->ndpr_lastupdate) { + p->expire = pr->ndpr_lastupdate + + pr->ndpr_vltime; + } else + p->expire = maxexpire; + } + p->refcnt = pr->ndpr_refcnt; + p->flags = pr->ndpr_stateflags; + p->origin = PR_ORIG_RA; + advrtrs = 0; + for (pfr = pr->ndpr_advrtrs.lh_first; pfr; + pfr = pfr->pfr_next) { + if ((void *)&sin6[advrtrs + 1] > (void *)pe) { + advrtrs++; + continue; + } + s6 = &sin6[advrtrs]; + bzero(s6, sizeof(*s6)); + s6->sin6_family = AF_INET6; + s6->sin6_len = sizeof(*sin6); + s6->sin6_addr = pfr->router->rtaddr; + if (sa6_recoverscope(s6)) { + log(LOG_ERR, + "scope error in " + "prefix list (%s)\n", + ip6_sprintf(ip6buf, + &pfr->router->rtaddr)); + } + advrtrs++; + } + p->advrtrs = advrtrs; + } else + panic("buffer too short"); + + advance = sizeof(*p) + sizeof(*sin6) * advrtrs; + error = SYSCTL_OUT(req, buf, advance); + if (error) + break; + } + + return (error); +} diff --git a/freebsd/sys/netinet6/nd6.h b/freebsd/sys/netinet6/nd6.h new file mode 100644 index 00000000..2da061df --- /dev/null +++ b/freebsd/sys/netinet6/nd6.h @@ -0,0 +1,455 @@ +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: nd6.h,v 1.76 2001/12/18 02:10:31 itojun Exp $ + * $FreeBSD$ + */ + +#ifndef _NETINET6_ND6_HH_ +#define _NETINET6_ND6_HH_ + +/* see net/route.h, or net/if_inarp.h */ +#ifndef RTF_ANNOUNCE +#define RTF_ANNOUNCE RTF_PROTO2 +#endif + +#include <freebsd/sys/queue.h> +#include <freebsd/sys/callout.h> + +struct llentry; + +#define ND6_LLINFO_NOSTATE -2 +/* + * We don't need the WAITDELETE state any more, but we keep the definition + * in a comment line instead of removing it. This is necessary to avoid + * unintentionally reusing the value for another purpose, which might + * affect backward compatibility with old applications. + * (20000711 jinmei@kame.net) + */ +/* #define ND6_LLINFO_WAITDELETE -1 */ +#define ND6_LLINFO_INCOMPLETE 0 +#define ND6_LLINFO_REACHABLE 1 +#define ND6_LLINFO_STALE 2 +#define ND6_LLINFO_DELAY 3 +#define ND6_LLINFO_PROBE 4 + +#define ND6_IS_LLINFO_PROBREACH(n) ((n)->ln_state > ND6_LLINFO_INCOMPLETE) +#define ND6_LLINFO_PERMANENT(n) (((n)->la_expire == 0) && ((n)->ln_state > ND6_LLINFO_INCOMPLETE)) + +struct nd_ifinfo { + u_int32_t linkmtu; /* LinkMTU */ + u_int32_t maxmtu; /* Upper bound of LinkMTU */ + u_int32_t basereachable; /* BaseReachableTime */ + u_int32_t reachable; /* Reachable Time */ + u_int32_t retrans; /* Retrans Timer */ + u_int32_t flags; /* Flags */ + int recalctm; /* BaseReacable re-calculation timer */ + u_int8_t chlim; /* CurHopLimit */ + u_int8_t initialized; /* Flag to see the entry is initialized */ + /* the following 3 members are for privacy extension for addrconf */ + u_int8_t randomseed0[8]; /* upper 64 bits of MD5 digest */ + u_int8_t randomseed1[8]; /* lower 64 bits (usually the EUI64 IFID) */ + u_int8_t randomid[8]; /* current random ID */ +}; + +#define ND6_IFF_PERFORMNUD 0x1 +#define ND6_IFF_ACCEPT_RTADV 0x2 +#define ND6_IFF_PREFER_SOURCE 0x4 /* XXX: not related to ND. */ +#define ND6_IFF_IFDISABLED 0x8 /* IPv6 operation is disabled due to + * DAD failure. (XXX: not ND-specific) + */ +#define ND6_IFF_DONT_SET_IFROUTE 0x10 + +#define ND6_CREATE LLE_CREATE +#define ND6_EXCLUSIVE LLE_EXCLUSIVE + +#ifdef _KERNEL +#define ND_IFINFO(ifp) \ + (((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->nd_ifinfo) +#define IN6_LINKMTU(ifp) \ + ((ND_IFINFO(ifp)->linkmtu && ND_IFINFO(ifp)->linkmtu < (ifp)->if_mtu) \ + ? ND_IFINFO(ifp)->linkmtu \ + : ((ND_IFINFO(ifp)->maxmtu && ND_IFINFO(ifp)->maxmtu < (ifp)->if_mtu) \ + ? ND_IFINFO(ifp)->maxmtu : (ifp)->if_mtu)) +#endif + +struct in6_nbrinfo { + char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */ + struct in6_addr addr; /* IPv6 address of the neighbor */ + long asked; /* number of queries already sent for this addr */ + int isrouter; /* if it acts as a router */ + int state; /* reachability state */ + int expire; /* lifetime for NDP state transition */ +}; + +#define DRLSTSIZ 10 +#define PRLSTSIZ 10 +struct in6_drlist { + char ifname[IFNAMSIZ]; + struct { + struct in6_addr rtaddr; + u_char flags; + u_short rtlifetime; + u_long expire; + u_short if_index; + } defrouter[DRLSTSIZ]; +}; + +struct in6_defrouter { + struct sockaddr_in6 rtaddr; + u_char flags; + u_short rtlifetime; + u_long expire; + u_short if_index; +}; + +#ifdef _KERNEL +struct in6_oprlist { + char ifname[IFNAMSIZ]; + struct { + struct in6_addr prefix; + struct prf_ra raflags; + u_char prefixlen; + u_char origin; + u_long vltime; + u_long pltime; + u_long expire; + u_short if_index; + u_short advrtrs; /* number of advertisement routers */ + struct in6_addr advrtr[DRLSTSIZ]; /* XXX: explicit limit */ + } prefix[PRLSTSIZ]; +}; +#endif + +struct in6_prlist { + char ifname[IFNAMSIZ]; + struct { + struct in6_addr prefix; + struct prf_ra raflags; + u_char prefixlen; + u_char origin; + u_int32_t vltime; + u_int32_t pltime; + time_t expire; + u_short if_index; + u_short advrtrs; /* number of advertisement routers */ + struct in6_addr advrtr[DRLSTSIZ]; /* XXX: explicit limit */ + } prefix[PRLSTSIZ]; +}; + +struct in6_prefix { + struct sockaddr_in6 prefix; + struct prf_ra raflags; + u_char prefixlen; + u_char origin; + u_int32_t vltime; + u_int32_t pltime; + time_t expire; + u_int32_t flags; + int refcnt; + u_short if_index; + u_short advrtrs; /* number of advertisement routers */ + /* struct sockaddr_in6 advrtr[] */ +}; + +#ifdef _KERNEL +struct in6_ondireq { + char ifname[IFNAMSIZ]; + struct { + u_int32_t linkmtu; /* LinkMTU */ + u_int32_t maxmtu; /* Upper bound of LinkMTU */ + u_int32_t basereachable; /* BaseReachableTime */ + u_int32_t reachable; /* Reachable Time */ + u_int32_t retrans; /* Retrans Timer */ + u_int32_t flags; /* Flags */ + int recalctm; /* BaseReacable re-calculation timer */ + u_int8_t chlim; /* CurHopLimit */ + u_int8_t receivedra; + } ndi; +}; +#endif + +struct in6_ndireq { + char ifname[IFNAMSIZ]; + struct nd_ifinfo ndi; +}; + +struct in6_ndifreq { + char ifname[IFNAMSIZ]; + u_long ifindex; +}; + +/* Prefix status */ +#define NDPRF_ONLINK 0x1 +#define NDPRF_DETACHED 0x2 + +/* protocol constants */ +#define MAX_RTR_SOLICITATION_DELAY 1 /* 1sec */ +#define RTR_SOLICITATION_INTERVAL 4 /* 4sec */ +#define MAX_RTR_SOLICITATIONS 3 + +#define ND6_INFINITE_LIFETIME 0xffffffff + +#ifdef _KERNEL +/* node constants */ +#define MAX_REACHABLE_TIME 3600000 /* msec */ +#define REACHABLE_TIME 30000 /* msec */ +#define RETRANS_TIMER 1000 /* msec */ +#define MIN_RANDOM_FACTOR 512 /* 1024 * 0.5 */ +#define MAX_RANDOM_FACTOR 1536 /* 1024 * 1.5 */ +#define DEF_TEMP_VALID_LIFETIME 604800 /* 1 week */ +#define DEF_TEMP_PREFERRED_LIFETIME 86400 /* 1 day */ +#define TEMPADDR_REGEN_ADVANCE 5 /* sec */ +#define MAX_TEMP_DESYNC_FACTOR 600 /* 10 min */ +#define ND_COMPUTE_RTIME(x) \ + (((MIN_RANDOM_FACTOR * (x >> 10)) + (arc4random() & \ + ((MAX_RANDOM_FACTOR - MIN_RANDOM_FACTOR) * (x >> 10)))) /1000) + +TAILQ_HEAD(nd_drhead, nd_defrouter); +struct nd_defrouter { + TAILQ_ENTRY(nd_defrouter) dr_entry; + struct in6_addr rtaddr; + u_char flags; /* flags on RA message */ + u_short rtlifetime; + u_long expire; + struct ifnet *ifp; + int installed; /* is installed into kernel routing table */ +}; + +struct nd_prefixctl { + struct ifnet *ndpr_ifp; + + /* prefix */ + struct sockaddr_in6 ndpr_prefix; + u_char ndpr_plen; + + u_int32_t ndpr_vltime; /* advertised valid lifetime */ + u_int32_t ndpr_pltime; /* advertised preferred lifetime */ + + struct prf_ra ndpr_flags; +}; + + +struct nd_prefix { + struct ifnet *ndpr_ifp; + LIST_ENTRY(nd_prefix) ndpr_entry; + struct sockaddr_in6 ndpr_prefix; /* prefix */ + struct in6_addr ndpr_mask; /* netmask derived from the prefix */ + + u_int32_t ndpr_vltime; /* advertised valid lifetime */ + u_int32_t ndpr_pltime; /* advertised preferred lifetime */ + + time_t ndpr_expire; /* expiration time of the prefix */ + time_t ndpr_preferred; /* preferred time of the prefix */ + time_t ndpr_lastupdate; /* reception time of last advertisement */ + + struct prf_ra ndpr_flags; + u_int32_t ndpr_stateflags; /* actual state flags */ + /* list of routers that advertise the prefix: */ + LIST_HEAD(pr_rtrhead, nd_pfxrouter) ndpr_advrtrs; + u_char ndpr_plen; + int ndpr_refcnt; /* reference couter from addresses */ +}; + +#define ndpr_next ndpr_entry.le_next + +#define ndpr_raf ndpr_flags +#define ndpr_raf_onlink ndpr_flags.onlink +#define ndpr_raf_auto ndpr_flags.autonomous +#define ndpr_raf_router ndpr_flags.router + +/* + * Message format for use in obtaining information about prefixes + * from inet6 sysctl function + */ +struct inet6_ndpr_msghdr { + u_short inpm_msglen; /* to skip over non-understood messages */ + u_char inpm_version; /* future binary compatibility */ + u_char inpm_type; /* message type */ + struct in6_addr inpm_prefix; + u_long prm_vltim; + u_long prm_pltime; + u_long prm_expire; + u_long prm_preferred; + struct in6_prflags prm_flags; + u_short prm_index; /* index for associated ifp */ + u_char prm_plen; /* length of prefix in bits */ +}; + +#define prm_raf_onlink prm_flags.prf_ra.onlink +#define prm_raf_auto prm_flags.prf_ra.autonomous + +#define prm_statef_onlink prm_flags.prf_state.onlink + +#define prm_rrf_decrvalid prm_flags.prf_rr.decrvalid +#define prm_rrf_decrprefd prm_flags.prf_rr.decrprefd + +struct nd_pfxrouter { + LIST_ENTRY(nd_pfxrouter) pfr_entry; +#define pfr_next pfr_entry.le_next + struct nd_defrouter *router; +}; + +LIST_HEAD(nd_prhead, nd_prefix); + +/* nd6.c */ +VNET_DECLARE(int, nd6_prune); +VNET_DECLARE(int, nd6_delay); +VNET_DECLARE(int, nd6_umaxtries); +VNET_DECLARE(int, nd6_mmaxtries); +VNET_DECLARE(int, nd6_useloopback); +VNET_DECLARE(int, nd6_maxnudhint); +VNET_DECLARE(int, nd6_gctimer); +VNET_DECLARE(struct nd_drhead, nd_defrouter); +VNET_DECLARE(struct nd_prhead, nd_prefix); +VNET_DECLARE(int, nd6_debug); +VNET_DECLARE(int, nd6_onlink_ns_rfc4861); +#define V_nd6_prune VNET(nd6_prune) +#define V_nd6_delay VNET(nd6_delay) +#define V_nd6_umaxtries VNET(nd6_umaxtries) +#define V_nd6_mmaxtries VNET(nd6_mmaxtries) +#define V_nd6_useloopback VNET(nd6_useloopback) +#define V_nd6_maxnudhint VNET(nd6_maxnudhint) +#define V_nd6_gctimer VNET(nd6_gctimer) +#define V_nd_defrouter VNET(nd_defrouter) +#define V_nd_prefix VNET(nd_prefix) +#define V_nd6_debug VNET(nd6_debug) +#define V_nd6_onlink_ns_rfc4861 VNET(nd6_onlink_ns_rfc4861) + +#define nd6log(x) do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0) + +VNET_DECLARE(struct callout, nd6_timer_ch); +#define V_nd6_timer_ch VNET(nd6_timer_ch) + +/* nd6_rtr.c */ +VNET_DECLARE(int, nd6_defifindex); +VNET_DECLARE(int, ip6_desync_factor); /* seconds */ +VNET_DECLARE(u_int32_t, ip6_temp_preferred_lifetime); /* seconds */ +VNET_DECLARE(u_int32_t, ip6_temp_valid_lifetime); /* seconds */ +VNET_DECLARE(int, ip6_temp_regen_advance); /* seconds */ +#define V_nd6_defifindex VNET(nd6_defifindex) +#define V_ip6_desync_factor VNET(ip6_desync_factor) +#define V_ip6_temp_preferred_lifetime VNET(ip6_temp_preferred_lifetime) +#define V_ip6_temp_valid_lifetime VNET(ip6_temp_valid_lifetime) +#define V_ip6_temp_regen_advance VNET(ip6_temp_regen_advance) + +union nd_opts { + struct nd_opt_hdr *nd_opt_array[8]; /* max = target address list */ + struct { + struct nd_opt_hdr *zero; + struct nd_opt_hdr *src_lladdr; + struct nd_opt_hdr *tgt_lladdr; + struct nd_opt_prefix_info *pi_beg; /* multiple opts, start */ + struct nd_opt_rd_hdr *rh; + struct nd_opt_mtu *mtu; + struct nd_opt_hdr *search; /* multiple opts */ + struct nd_opt_hdr *last; /* multiple opts */ + int done; + struct nd_opt_prefix_info *pi_end;/* multiple opts, end */ + } nd_opt_each; +}; +#define nd_opts_src_lladdr nd_opt_each.src_lladdr +#define nd_opts_tgt_lladdr nd_opt_each.tgt_lladdr +#define nd_opts_pi nd_opt_each.pi_beg +#define nd_opts_pi_end nd_opt_each.pi_end +#define nd_opts_rh nd_opt_each.rh +#define nd_opts_mtu nd_opt_each.mtu +#define nd_opts_search nd_opt_each.search +#define nd_opts_last nd_opt_each.last +#define nd_opts_done nd_opt_each.done + +/* XXX: need nd6_var.h?? */ +/* nd6.c */ +void nd6_init __P((void)); +#ifdef VIMAGE +void nd6_destroy __P((void)); +#endif +struct nd_ifinfo *nd6_ifattach __P((struct ifnet *)); +void nd6_ifdetach __P((struct nd_ifinfo *)); +int nd6_is_addr_neighbor __P((struct sockaddr_in6 *, struct ifnet *)); +void nd6_option_init __P((void *, int, union nd_opts *)); +struct nd_opt_hdr *nd6_option __P((union nd_opts *)); +int nd6_options __P((union nd_opts *)); +struct llentry *nd6_lookup __P((struct in6_addr *, int, struct ifnet *)); +void nd6_setmtu __P((struct ifnet *)); +void nd6_llinfo_settimer __P((struct llentry *, long)); +void nd6_llinfo_settimer_locked __P((struct llentry *, long)); +void nd6_timer __P((void *)); +void nd6_purge __P((struct ifnet *)); +void nd6_nud_hint __P((struct rtentry *, struct in6_addr *, int)); +int nd6_resolve __P((struct ifnet *, struct rtentry *, struct mbuf *, + struct sockaddr *, u_char *)); +int nd6_ioctl __P((u_long, caddr_t, struct ifnet *)); +struct llentry *nd6_cache_lladdr __P((struct ifnet *, struct in6_addr *, + char *, int, int, int)); +int nd6_output __P((struct ifnet *, struct ifnet *, struct mbuf *, + struct sockaddr_in6 *, struct rtentry *)); +int nd6_output_lle __P((struct ifnet *, struct ifnet *, struct mbuf *, + struct sockaddr_in6 *, struct rtentry *, struct llentry *, + struct mbuf **)); +int nd6_output_flush __P((struct ifnet *, struct ifnet *, struct mbuf *, + struct sockaddr_in6 *, struct route *)); +int nd6_need_cache __P((struct ifnet *)); +int nd6_storelladdr __P((struct ifnet *, struct mbuf *, + struct sockaddr *, u_char *, struct llentry **)); + +/* nd6_nbr.c */ +void nd6_na_input __P((struct mbuf *, int, int)); +void nd6_na_output __P((struct ifnet *, const struct in6_addr *, + const struct in6_addr *, u_long, int, struct sockaddr *)); +void nd6_ns_input __P((struct mbuf *, int, int)); +void nd6_ns_output __P((struct ifnet *, const struct in6_addr *, + const struct in6_addr *, struct llentry *, int)); +caddr_t nd6_ifptomac __P((struct ifnet *)); +void nd6_dad_start __P((struct ifaddr *, int)); +void nd6_dad_stop __P((struct ifaddr *)); +void nd6_dad_duplicated __P((struct ifaddr *)); + +/* nd6_rtr.c */ +void nd6_rs_input __P((struct mbuf *, int, int)); +void nd6_ra_input __P((struct mbuf *, int, int)); +void prelist_del __P((struct nd_prefix *)); +void defrouter_addreq __P((struct nd_defrouter *)); +void defrouter_reset __P((void)); +void defrouter_select __P((void)); +void defrtrlist_del __P((struct nd_defrouter *)); +void prelist_remove __P((struct nd_prefix *)); +int nd6_prelist_add __P((struct nd_prefixctl *, struct nd_defrouter *, + struct nd_prefix **)); +int nd6_prefix_onlink __P((struct nd_prefix *)); +int nd6_prefix_offlink __P((struct nd_prefix *)); +void pfxlist_onlink_check __P((void)); +struct nd_defrouter *defrouter_lookup __P((struct in6_addr *, struct ifnet *)); +struct nd_prefix *nd6_prefix_lookup __P((struct nd_prefixctl *)); +void rt6_flush __P((struct in6_addr *, struct ifnet *)); +int nd6_setdefaultiface __P((int)); +int in6_tmpifadd __P((const struct in6_ifaddr *, int, int)); + +#endif /* _KERNEL */ + +#endif /* _NETINET6_ND6_HH_ */ diff --git a/freebsd/sys/netinet6/nd6_nbr.c b/freebsd/sys/netinet6/nd6_nbr.c new file mode 100644 index 00000000..44dd3cf1 --- /dev/null +++ b/freebsd/sys/netinet6/nd6_nbr.c @@ -0,0 +1,1514 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: nd6_nbr.c,v 1.86 2002/01/21 02:33:04 jinmei Exp $ + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/local/opt_inet.h> +#include <freebsd/local/opt_inet6.h> +#include <freebsd/local/opt_ipsec.h> +#include <freebsd/local/opt_mpath.h> + +#include <freebsd/sys/param.h> +#include <freebsd/sys/systm.h> +#include <freebsd/sys/malloc.h> +#include <freebsd/sys/lock.h> +#include <freebsd/sys/rwlock.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/sockio.h> +#include <freebsd/sys/time.h> +#include <freebsd/sys/kernel.h> +#include <freebsd/sys/errno.h> +#include <freebsd/sys/syslog.h> +#include <freebsd/sys/queue.h> +#include <freebsd/sys/callout.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/if_types.h> +#include <freebsd/net/if_dl.h> +#include <freebsd/net/if_var.h> +#include <freebsd/net/route.h> +#ifdef RADIX_MPATH +#include <freebsd/net/radix_mpath.h> +#endif + +#include <freebsd/netinet/in.h> +#include <freebsd/netinet/in_var.h> +#include <freebsd/net/if_llatbl.h> +#define L3_ADDR_SIN6(le) ((struct sockaddr_in6 *) L3_ADDR(le)) +#include <freebsd/netinet6/in6_var.h> +#include <freebsd/netinet6/in6_ifattach.h> +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet6/scope6_var.h> +#include <freebsd/netinet6/nd6.h> +#include <freebsd/netinet/icmp6.h> +#include <freebsd/netinet/ip_carp.h> + +#define SDL(s) ((struct sockaddr_dl *)s) + +struct dadq; +static struct dadq *nd6_dad_find(struct ifaddr *); +static void nd6_dad_starttimer(struct dadq *, int); +static void nd6_dad_stoptimer(struct dadq *); +static void nd6_dad_timer(struct dadq *); +static void nd6_dad_ns_output(struct dadq *, struct ifaddr *); +static void nd6_dad_ns_input(struct ifaddr *); +static void nd6_dad_na_input(struct ifaddr *); + +VNET_DEFINE(int, dad_ignore_ns) = 0; /* ignore NS in DAD - specwise incorrect*/ +VNET_DEFINE(int, dad_maxtry) = 15; /* max # of *tries* to transmit DAD packet */ +#define V_dad_ignore_ns VNET(dad_ignore_ns) +#define V_dad_maxtry VNET(dad_maxtry) + +/* + * Input a Neighbor Solicitation Message. + * + * Based on RFC 2461 + * Based on RFC 2462 (duplicate address detection) + */ +void +nd6_ns_input(struct mbuf *m, int off, int icmp6len) +{ + struct ifnet *ifp = m->m_pkthdr.rcvif; + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + struct nd_neighbor_solicit *nd_ns; + struct in6_addr saddr6 = ip6->ip6_src; + struct in6_addr daddr6 = ip6->ip6_dst; + struct in6_addr taddr6; + struct in6_addr myaddr6; + char *lladdr = NULL; + struct ifaddr *ifa = NULL; + int lladdrlen = 0; + int anycast = 0, proxy = 0, tentative = 0; + int tlladdr; + union nd_opts ndopts; + struct sockaddr_dl *proxydl = NULL; + char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; + +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, off, icmp6len,); + nd_ns = (struct nd_neighbor_solicit *)((caddr_t)ip6 + off); +#else + IP6_EXTHDR_GET(nd_ns, struct nd_neighbor_solicit *, m, off, icmp6len); + if (nd_ns == NULL) { + ICMP6STAT_INC(icp6s_tooshort); + return; + } +#endif + ip6 = mtod(m, struct ip6_hdr *); /* adjust pointer for safety */ + taddr6 = nd_ns->nd_ns_target; + if (in6_setscope(&taddr6, ifp, NULL) != 0) + goto bad; + + if (ip6->ip6_hlim != 255) { + nd6log((LOG_ERR, + "nd6_ns_input: invalid hlim (%d) from %s to %s on %s\n", + ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src), + ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp))); + goto bad; + } + + if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) { + /* dst has to be a solicited node multicast address. */ + if (daddr6.s6_addr16[0] == IPV6_ADDR_INT16_MLL && + /* don't check ifindex portion */ + daddr6.s6_addr32[1] == 0 && + daddr6.s6_addr32[2] == IPV6_ADDR_INT32_ONE && + daddr6.s6_addr8[12] == 0xff) { + ; /* good */ + } else { + nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet " + "(wrong ip6 dst)\n")); + goto bad; + } + } else if (!V_nd6_onlink_ns_rfc4861) { + struct sockaddr_in6 src_sa6; + + /* + * According to recent IETF discussions, it is not a good idea + * to accept a NS from an address which would not be deemed + * to be a neighbor otherwise. This point is expected to be + * clarified in future revisions of the specification. + */ + bzero(&src_sa6, sizeof(src_sa6)); + src_sa6.sin6_family = AF_INET6; + src_sa6.sin6_len = sizeof(src_sa6); + src_sa6.sin6_addr = saddr6; + if (nd6_is_addr_neighbor(&src_sa6, ifp) == 0) { + nd6log((LOG_INFO, "nd6_ns_input: " + "NS packet from non-neighbor\n")); + goto bad; + } + } + + if (IN6_IS_ADDR_MULTICAST(&taddr6)) { + nd6log((LOG_INFO, "nd6_ns_input: bad NS target (multicast)\n")); + goto bad; + } + + icmp6len -= sizeof(*nd_ns); + nd6_option_init(nd_ns + 1, icmp6len, &ndopts); + if (nd6_options(&ndopts) < 0) { + nd6log((LOG_INFO, + "nd6_ns_input: invalid ND option, ignored\n")); + /* nd6_options have incremented stats */ + goto freeit; + } + + if (ndopts.nd_opts_src_lladdr) { + lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1); + lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3; + } + + if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && lladdr) { + nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet " + "(link-layer address option)\n")); + goto bad; + } + + /* + * Attaching target link-layer address to the NA? + * (RFC 2461 7.2.4) + * + * NS IP dst is unicast/anycast MUST NOT add + * NS IP dst is solicited-node multicast MUST add + * + * In implementation, we add target link-layer address by default. + * We do not add one in MUST NOT cases. + */ + if (!IN6_IS_ADDR_MULTICAST(&daddr6)) + tlladdr = 0; + else + tlladdr = 1; + + /* + * Target address (taddr6) must be either: + * (1) Valid unicast/anycast address for my receiving interface, + * (2) Unicast address for which I'm offering proxy service, or + * (3) "tentative" address on which DAD is being performed. + */ + /* (1) and (3) check. */ + if (ifp->if_carp) + ifa = (*carp_iamatch6_p)(ifp, &taddr6); + if (ifa == NULL) + ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); + + /* (2) check. */ + if (ifa == NULL) { + struct rtentry *rt; + struct sockaddr_in6 tsin6; + int need_proxy; +#ifdef RADIX_MPATH + struct route_in6 ro; +#endif + + bzero(&tsin6, sizeof tsin6); + tsin6.sin6_len = sizeof(struct sockaddr_in6); + tsin6.sin6_family = AF_INET6; + tsin6.sin6_addr = taddr6; + +#ifdef RADIX_MPATH + bzero(&ro, sizeof(ro)); + ro.ro_dst = tsin6; + rtalloc_mpath((struct route *)&ro, RTF_ANNOUNCE); + rt = ro.ro_rt; +#else + rt = rtalloc1((struct sockaddr *)&tsin6, 0, 0); +#endif + need_proxy = (rt && (rt->rt_flags & RTF_ANNOUNCE) != 0 && + rt->rt_gateway->sa_family == AF_LINK); + if (rt) + RTFREE_LOCKED(rt); + if (need_proxy) { + /* + * proxy NDP for single entry + */ + ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, + IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); + if (ifa) { + proxy = 1; + proxydl = SDL(rt->rt_gateway); + } + } + } + if (ifa == NULL) { + /* + * We've got an NS packet, and we don't have that adddress + * assigned for us. We MUST silently ignore it. + * See RFC2461 7.2.3. + */ + goto freeit; + } + myaddr6 = *IFA_IN6(ifa); + anycast = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST; + tentative = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE; + if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DUPLICATED) + goto freeit; + + if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { + nd6log((LOG_INFO, "nd6_ns_input: lladdrlen mismatch for %s " + "(if %d, NS packet %d)\n", + ip6_sprintf(ip6bufs, &taddr6), + ifp->if_addrlen, lladdrlen - 2)); + goto bad; + } + + if (IN6_ARE_ADDR_EQUAL(&myaddr6, &saddr6)) { + nd6log((LOG_INFO, "nd6_ns_input: duplicate IP6 address %s\n", + ip6_sprintf(ip6bufs, &saddr6))); + goto freeit; + } + + /* + * We have neighbor solicitation packet, with target address equals to + * one of my tentative address. + * + * src addr how to process? + * --- --- + * multicast of course, invalid (rejected in ip6_input) + * unicast somebody is doing address resolution -> ignore + * unspec dup address detection + * + * The processing is defined in RFC 2462. + */ + if (tentative) { + /* + * If source address is unspecified address, it is for + * duplicate address detection. + * + * If not, the packet is for addess resolution; + * silently ignore it. + */ + if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) + nd6_dad_ns_input(ifa); + + goto freeit; + } + + /* + * If the source address is unspecified address, entries must not + * be created or updated. + * It looks that sender is performing DAD. Output NA toward + * all-node multicast address, to tell the sender that I'm using + * the address. + * S bit ("solicited") must be zero. + */ + if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) { + struct in6_addr in6_all; + + in6_all = in6addr_linklocal_allnodes; + if (in6_setscope(&in6_all, ifp, NULL) != 0) + goto bad; + nd6_na_output(ifp, &in6_all, &taddr6, + ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) | + (V_ip6_forwarding ? ND_NA_FLAG_ROUTER : 0), + tlladdr, (struct sockaddr *)proxydl); + goto freeit; + } + + nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, + ND_NEIGHBOR_SOLICIT, 0); + + nd6_na_output(ifp, &saddr6, &taddr6, + ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) | + (V_ip6_forwarding ? ND_NA_FLAG_ROUTER : 0) | ND_NA_FLAG_SOLICITED, + tlladdr, (struct sockaddr *)proxydl); + freeit: + if (ifa != NULL) + ifa_free(ifa); + m_freem(m); + return; + + bad: + nd6log((LOG_ERR, "nd6_ns_input: src=%s\n", + ip6_sprintf(ip6bufs, &saddr6))); + nd6log((LOG_ERR, "nd6_ns_input: dst=%s\n", + ip6_sprintf(ip6bufs, &daddr6))); + nd6log((LOG_ERR, "nd6_ns_input: tgt=%s\n", + ip6_sprintf(ip6bufs, &taddr6))); + ICMP6STAT_INC(icp6s_badns); + if (ifa != NULL) + ifa_free(ifa); + m_freem(m); +} + +/* + * Output a Neighbor Solicitation Message. Caller specifies: + * - ICMP6 header source IP6 address + * - ND6 header target IP6 address + * - ND6 header source datalink address + * + * Based on RFC 2461 + * Based on RFC 2462 (duplicate address detection) + * + * ln - for source address determination + * dad - duplicate address detection + */ +void +nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, + const struct in6_addr *taddr6, struct llentry *ln, int dad) +{ + struct mbuf *m; + struct ip6_hdr *ip6; + struct nd_neighbor_solicit *nd_ns; + struct ip6_moptions im6o; + int icmp6len; + int maxlen; + caddr_t mac; + struct route_in6 ro; + + if (IN6_IS_ADDR_MULTICAST(taddr6)) + return; + + /* estimate the size of message */ + maxlen = sizeof(*ip6) + sizeof(*nd_ns); + maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7; + if (max_linkhdr + maxlen >= MCLBYTES) { +#ifdef DIAGNOSTIC + printf("nd6_ns_output: max_linkhdr + maxlen >= MCLBYTES " + "(%d + %d > %d)\n", max_linkhdr, maxlen, MCLBYTES); +#endif + return; + } + + MGETHDR(m, M_DONTWAIT, MT_DATA); + if (m && max_linkhdr + maxlen >= MHLEN) { + MCLGET(m, M_DONTWAIT); + if ((m->m_flags & M_EXT) == 0) { + m_free(m); + m = NULL; + } + } + if (m == NULL) + return; + m->m_pkthdr.rcvif = NULL; + + bzero(&ro, sizeof(ro)); + + if (daddr6 == NULL || IN6_IS_ADDR_MULTICAST(daddr6)) { + m->m_flags |= M_MCAST; + im6o.im6o_multicast_ifp = ifp; + im6o.im6o_multicast_hlim = 255; + im6o.im6o_multicast_loop = 0; + } + + icmp6len = sizeof(*nd_ns); + m->m_pkthdr.len = m->m_len = sizeof(*ip6) + icmp6len; + m->m_data += max_linkhdr; /* or MH_ALIGN() equivalent? */ + + /* fill neighbor solicitation packet */ + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_flow = 0; + ip6->ip6_vfc &= ~IPV6_VERSION_MASK; + ip6->ip6_vfc |= IPV6_VERSION; + /* ip6->ip6_plen will be set later */ + ip6->ip6_nxt = IPPROTO_ICMPV6; + ip6->ip6_hlim = 255; + if (daddr6) + ip6->ip6_dst = *daddr6; + else { + ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL; + ip6->ip6_dst.s6_addr16[1] = 0; + ip6->ip6_dst.s6_addr32[1] = 0; + ip6->ip6_dst.s6_addr32[2] = IPV6_ADDR_INT32_ONE; + ip6->ip6_dst.s6_addr32[3] = taddr6->s6_addr32[3]; + ip6->ip6_dst.s6_addr8[12] = 0xff; + if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0) + goto bad; + } + if (!dad) { + struct ifaddr *ifa; + + /* + * RFC2461 7.2.2: + * "If the source address of the packet prompting the + * solicitation is the same as one of the addresses assigned + * to the outgoing interface, that address SHOULD be placed + * in the IP Source Address of the outgoing solicitation. + * Otherwise, any one of the addresses assigned to the + * interface should be used." + * + * We use the source address for the prompting packet + * (saddr6), if: + * - saddr6 is given from the caller (by giving "ln"), and + * - saddr6 belongs to the outgoing interface. + * Otherwise, we perform the source address selection as usual. + */ + struct in6_addr *hsrc; + + hsrc = NULL; + if (ln != NULL) { + LLE_RLOCK(ln); + if (ln->la_hold != NULL) { + struct ip6_hdr *hip6; /* hold ip6 */ + + /* + * assuming every packet in la_hold has the same IP + * header + */ + hip6 = mtod(ln->la_hold, struct ip6_hdr *); + /* XXX pullup? */ + if (sizeof(*hip6) < ln->la_hold->m_len) { + ip6->ip6_src = hip6->ip6_src; + hsrc = &hip6->ip6_src; + } + } + LLE_RUNLOCK(ln); + } + if (hsrc && (ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, + hsrc)) != NULL) { + /* ip6_src set already. */ + ifa_free(ifa); + } else { + int error; + struct sockaddr_in6 dst_sa; + struct in6_addr src_in; + + bzero(&dst_sa, sizeof(dst_sa)); + dst_sa.sin6_family = AF_INET6; + dst_sa.sin6_len = sizeof(dst_sa); + dst_sa.sin6_addr = ip6->ip6_dst; + + error = in6_selectsrc(&dst_sa, NULL, + NULL, &ro, NULL, NULL, &src_in); + if (error) { + char ip6buf[INET6_ADDRSTRLEN]; + nd6log((LOG_DEBUG, + "nd6_ns_output: source can't be " + "determined: dst=%s, error=%d\n", + ip6_sprintf(ip6buf, &dst_sa.sin6_addr), + error)); + goto bad; + } + ip6->ip6_src = src_in; + } + } else { + /* + * Source address for DAD packet must always be IPv6 + * unspecified address. (0::0) + * We actually don't have to 0-clear the address (we did it + * above), but we do so here explicitly to make the intention + * clearer. + */ + bzero(&ip6->ip6_src, sizeof(ip6->ip6_src)); + } + nd_ns = (struct nd_neighbor_solicit *)(ip6 + 1); + nd_ns->nd_ns_type = ND_NEIGHBOR_SOLICIT; + nd_ns->nd_ns_code = 0; + nd_ns->nd_ns_reserved = 0; + nd_ns->nd_ns_target = *taddr6; + in6_clearscope(&nd_ns->nd_ns_target); /* XXX */ + + /* + * Add source link-layer address option. + * + * spec implementation + * --- --- + * DAD packet MUST NOT do not add the option + * there's no link layer address: + * impossible do not add the option + * there's link layer address: + * Multicast NS MUST add one add the option + * Unicast NS SHOULD add one add the option + */ + if (!dad && (mac = nd6_ifptomac(ifp))) { + int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen; + struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1); + /* 8 byte alignments... */ + optlen = (optlen + 7) & ~7; + + m->m_pkthdr.len += optlen; + m->m_len += optlen; + icmp6len += optlen; + bzero((caddr_t)nd_opt, optlen); + nd_opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; + nd_opt->nd_opt_len = optlen >> 3; + bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen); + } + + ip6->ip6_plen = htons((u_short)icmp6len); + nd_ns->nd_ns_cksum = 0; + nd_ns->nd_ns_cksum = + in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), icmp6len); + + ip6_output(m, NULL, &ro, dad ? IPV6_UNSPECSRC : 0, &im6o, NULL, NULL); + icmp6_ifstat_inc(ifp, ifs6_out_msg); + icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit); + ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_SOLICIT]); + + if (ro.ro_rt) { /* we don't cache this route. */ + RTFREE(ro.ro_rt); + } + return; + + bad: + if (ro.ro_rt) { + RTFREE(ro.ro_rt); + } + m_freem(m); + return; +} + +/* + * Neighbor advertisement input handling. + * + * Based on RFC 2461 + * Based on RFC 2462 (duplicate address detection) + * + * the following items are not implemented yet: + * - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD) + * - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD) + */ +void +nd6_na_input(struct mbuf *m, int off, int icmp6len) +{ + struct ifnet *ifp = m->m_pkthdr.rcvif; + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + struct nd_neighbor_advert *nd_na; + struct in6_addr daddr6 = ip6->ip6_dst; + struct in6_addr taddr6; + int flags; + int is_router; + int is_solicited; + int is_override; + char *lladdr = NULL; + int lladdrlen = 0; + int checklink = 0; + struct ifaddr *ifa; + struct llentry *ln = NULL; + union nd_opts ndopts; + struct mbuf *chain = NULL; + struct sockaddr_in6 sin6; + char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; + + if (ip6->ip6_hlim != 255) { + nd6log((LOG_ERR, + "nd6_na_input: invalid hlim (%d) from %s to %s on %s\n", + ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src), + ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp))); + goto bad; + } + +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, off, icmp6len,); + nd_na = (struct nd_neighbor_advert *)((caddr_t)ip6 + off); +#else + IP6_EXTHDR_GET(nd_na, struct nd_neighbor_advert *, m, off, icmp6len); + if (nd_na == NULL) { + ICMP6STAT_INC(icp6s_tooshort); + return; + } +#endif + + flags = nd_na->nd_na_flags_reserved; + is_router = ((flags & ND_NA_FLAG_ROUTER) != 0); + is_solicited = ((flags & ND_NA_FLAG_SOLICITED) != 0); + is_override = ((flags & ND_NA_FLAG_OVERRIDE) != 0); + + taddr6 = nd_na->nd_na_target; + if (in6_setscope(&taddr6, ifp, NULL)) + goto bad; /* XXX: impossible */ + + if (IN6_IS_ADDR_MULTICAST(&taddr6)) { + nd6log((LOG_ERR, + "nd6_na_input: invalid target address %s\n", + ip6_sprintf(ip6bufs, &taddr6))); + goto bad; + } + if (IN6_IS_ADDR_MULTICAST(&daddr6)) + if (is_solicited) { + nd6log((LOG_ERR, + "nd6_na_input: a solicited adv is multicasted\n")); + goto bad; + } + + icmp6len -= sizeof(*nd_na); + nd6_option_init(nd_na + 1, icmp6len, &ndopts); + if (nd6_options(&ndopts) < 0) { + nd6log((LOG_INFO, + "nd6_na_input: invalid ND option, ignored\n")); + /* nd6_options have incremented stats */ + goto freeit; + } + + if (ndopts.nd_opts_tgt_lladdr) { + lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1); + lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3; + } + + ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); + + /* + * Target address matches one of my interface address. + * + * If my address is tentative, this means that there's somebody + * already using the same address as mine. This indicates DAD failure. + * This is defined in RFC 2462. + * + * Otherwise, process as defined in RFC 2461. + */ + if (ifa + && (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE)) { + ifa_free(ifa); + nd6_dad_na_input(ifa); + goto freeit; + } + + /* Just for safety, maybe unnecessary. */ + if (ifa) { + ifa_free(ifa); + log(LOG_ERR, + "nd6_na_input: duplicate IP6 address %s\n", + ip6_sprintf(ip6bufs, &taddr6)); + goto freeit; + } + + if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { + nd6log((LOG_INFO, "nd6_na_input: lladdrlen mismatch for %s " + "(if %d, NA packet %d)\n", ip6_sprintf(ip6bufs, &taddr6), + ifp->if_addrlen, lladdrlen - 2)); + goto bad; + } + + /* + * If no neighbor cache entry is found, NA SHOULD silently be + * discarded. + */ + IF_AFDATA_LOCK(ifp); + ln = nd6_lookup(&taddr6, LLE_EXCLUSIVE, ifp); + IF_AFDATA_UNLOCK(ifp); + if (ln == NULL) { + goto freeit; + } + + if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { + /* + * If the link-layer has address, and no lladdr option came, + * discard the packet. + */ + if (ifp->if_addrlen && lladdr == NULL) { + goto freeit; + } + + /* + * Record link-layer address, and update the state. + */ + bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen); + ln->la_flags |= LLE_VALID; + if (is_solicited) { + ln->ln_state = ND6_LLINFO_REACHABLE; + ln->ln_byhint = 0; + if (!ND6_LLINFO_PERMANENT(ln)) { + nd6_llinfo_settimer_locked(ln, + (long)ND_IFINFO(ln->lle_tbl->llt_ifp)->reachable * hz); + } + } else { + ln->ln_state = ND6_LLINFO_STALE; + nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); + } + if ((ln->ln_router = is_router) != 0) { + /* + * This means a router's state has changed from + * non-reachable to probably reachable, and might + * affect the status of associated prefixes.. + */ + checklink = 1; + } + } else { + int llchange; + + /* + * Check if the link-layer address has changed or not. + */ + if (lladdr == NULL) + llchange = 0; + else { + if (ln->la_flags & LLE_VALID) { + if (bcmp(lladdr, &ln->ll_addr, ifp->if_addrlen)) + llchange = 1; + else + llchange = 0; + } else + llchange = 1; + } + + /* + * This is VERY complex. Look at it with care. + * + * override solicit lladdr llchange action + * (L: record lladdr) + * + * 0 0 n -- (2c) + * 0 0 y n (2b) L + * 0 0 y y (1) REACHABLE->STALE + * 0 1 n -- (2c) *->REACHABLE + * 0 1 y n (2b) L *->REACHABLE + * 0 1 y y (1) REACHABLE->STALE + * 1 0 n -- (2a) + * 1 0 y n (2a) L + * 1 0 y y (2a) L *->STALE + * 1 1 n -- (2a) *->REACHABLE + * 1 1 y n (2a) L *->REACHABLE + * 1 1 y y (2a) L *->REACHABLE + */ + if (!is_override && (lladdr != NULL && llchange)) { /* (1) */ + /* + * If state is REACHABLE, make it STALE. + * no other updates should be done. + */ + if (ln->ln_state == ND6_LLINFO_REACHABLE) { + ln->ln_state = ND6_LLINFO_STALE; + nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); + } + goto freeit; + } else if (is_override /* (2a) */ + || (!is_override && (lladdr != NULL && !llchange)) /* (2b) */ + || lladdr == NULL) { /* (2c) */ + /* + * Update link-local address, if any. + */ + if (lladdr != NULL) { + bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen); + ln->la_flags |= LLE_VALID; + } + + /* + * If solicited, make the state REACHABLE. + * If not solicited and the link-layer address was + * changed, make it STALE. + */ + if (is_solicited) { + ln->ln_state = ND6_LLINFO_REACHABLE; + ln->ln_byhint = 0; + if (!ND6_LLINFO_PERMANENT(ln)) { + nd6_llinfo_settimer_locked(ln, + (long)ND_IFINFO(ifp)->reachable * hz); + } + } else { + if (lladdr != NULL && llchange) { + ln->ln_state = ND6_LLINFO_STALE; + nd6_llinfo_settimer_locked(ln, + (long)V_nd6_gctimer * hz); + } + } + } + + if (ln->ln_router && !is_router) { + /* + * The peer dropped the router flag. + * Remove the sender from the Default Router List and + * update the Destination Cache entries. + */ + struct nd_defrouter *dr; + struct in6_addr *in6; + + in6 = &L3_ADDR_SIN6(ln)->sin6_addr; + + /* + * Lock to protect the default router list. + * XXX: this might be unnecessary, since this function + * is only called under the network software interrupt + * context. However, we keep it just for safety. + */ + dr = defrouter_lookup(in6, ln->lle_tbl->llt_ifp); + if (dr) + defrtrlist_del(dr); + else if (!V_ip6_forwarding) { + /* + * Even if the neighbor is not in the default + * router list, the neighbor may be used + * as a next hop for some destinations + * (e.g. redirect case). So we must + * call rt6_flush explicitly. + */ + rt6_flush(&ip6->ip6_src, ifp); + } + } + ln->ln_router = is_router; + } + /* XXX - QL + * Does this matter? + * rt->rt_flags &= ~RTF_REJECT; + */ + ln->la_asked = 0; + if (ln->la_hold) { + struct mbuf *m_hold, *m_hold_next; + + /* + * reset the la_hold in advance, to explicitly + * prevent a la_hold lookup in nd6_output() + * (wouldn't happen, though...) + */ + for (m_hold = ln->la_hold, ln->la_hold = NULL; + m_hold; m_hold = m_hold_next) { + m_hold_next = m_hold->m_nextpkt; + m_hold->m_nextpkt = NULL; + /* + * we assume ifp is not a loopback here, so just set + * the 2nd argument as the 1st one. + */ + nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain); + } + } + freeit: + if (ln != NULL) { + if (chain) + memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6)); + LLE_WUNLOCK(ln); + + if (chain) + nd6_output_flush(ifp, ifp, chain, &sin6, NULL); + } + if (checklink) + pfxlist_onlink_check(); + + m_freem(m); + return; + + bad: + if (ln != NULL) + LLE_WUNLOCK(ln); + + ICMP6STAT_INC(icp6s_badna); + m_freem(m); +} + +/* + * Neighbor advertisement output handling. + * + * Based on RFC 2461 + * + * the following items are not implemented yet: + * - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD) + * - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD) + * + * tlladdr - 1 if include target link-layer address + * sdl0 - sockaddr_dl (= proxy NA) or NULL + */ +void +nd6_na_output(struct ifnet *ifp, const struct in6_addr *daddr6_0, + const struct in6_addr *taddr6, u_long flags, int tlladdr, + struct sockaddr *sdl0) +{ + struct mbuf *m; + struct ip6_hdr *ip6; + struct nd_neighbor_advert *nd_na; + struct ip6_moptions im6o; + struct in6_addr src, daddr6; + struct sockaddr_in6 dst_sa; + int icmp6len, maxlen, error; + caddr_t mac = NULL; + struct route_in6 ro; + + bzero(&ro, sizeof(ro)); + + daddr6 = *daddr6_0; /* make a local copy for modification */ + + /* estimate the size of message */ + maxlen = sizeof(*ip6) + sizeof(*nd_na); + maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7; + if (max_linkhdr + maxlen >= MCLBYTES) { +#ifdef DIAGNOSTIC + printf("nd6_na_output: max_linkhdr + maxlen >= MCLBYTES " + "(%d + %d > %d)\n", max_linkhdr, maxlen, MCLBYTES); +#endif + return; + } + + MGETHDR(m, M_DONTWAIT, MT_DATA); + if (m && max_linkhdr + maxlen >= MHLEN) { + MCLGET(m, M_DONTWAIT); + if ((m->m_flags & M_EXT) == 0) { + m_free(m); + m = NULL; + } + } + if (m == NULL) + return; + m->m_pkthdr.rcvif = NULL; + + if (IN6_IS_ADDR_MULTICAST(&daddr6)) { + m->m_flags |= M_MCAST; + im6o.im6o_multicast_ifp = ifp; + im6o.im6o_multicast_hlim = 255; + im6o.im6o_multicast_loop = 0; + } + + icmp6len = sizeof(*nd_na); + m->m_pkthdr.len = m->m_len = sizeof(struct ip6_hdr) + icmp6len; + m->m_data += max_linkhdr; /* or MH_ALIGN() equivalent? */ + + /* fill neighbor advertisement packet */ + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_flow = 0; + ip6->ip6_vfc &= ~IPV6_VERSION_MASK; + ip6->ip6_vfc |= IPV6_VERSION; + ip6->ip6_nxt = IPPROTO_ICMPV6; + ip6->ip6_hlim = 255; + if (IN6_IS_ADDR_UNSPECIFIED(&daddr6)) { + /* reply to DAD */ + daddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL; + daddr6.s6_addr16[1] = 0; + daddr6.s6_addr32[1] = 0; + daddr6.s6_addr32[2] = 0; + daddr6.s6_addr32[3] = IPV6_ADDR_INT32_ONE; + if (in6_setscope(&daddr6, ifp, NULL)) + goto bad; + + flags &= ~ND_NA_FLAG_SOLICITED; + } + ip6->ip6_dst = daddr6; + bzero(&dst_sa, sizeof(struct sockaddr_in6)); + dst_sa.sin6_family = AF_INET6; + dst_sa.sin6_len = sizeof(struct sockaddr_in6); + dst_sa.sin6_addr = daddr6; + + /* + * Select a source whose scope is the same as that of the dest. + */ + bcopy(&dst_sa, &ro.ro_dst, sizeof(dst_sa)); + error = in6_selectsrc(&dst_sa, NULL, NULL, &ro, NULL, NULL, &src); + if (error) { + char ip6buf[INET6_ADDRSTRLEN]; + nd6log((LOG_DEBUG, "nd6_na_output: source can't be " + "determined: dst=%s, error=%d\n", + ip6_sprintf(ip6buf, &dst_sa.sin6_addr), error)); + goto bad; + } + ip6->ip6_src = src; + nd_na = (struct nd_neighbor_advert *)(ip6 + 1); + nd_na->nd_na_type = ND_NEIGHBOR_ADVERT; + nd_na->nd_na_code = 0; + nd_na->nd_na_target = *taddr6; + in6_clearscope(&nd_na->nd_na_target); /* XXX */ + + /* + * "tlladdr" indicates NS's condition for adding tlladdr or not. + * see nd6_ns_input() for details. + * Basically, if NS packet is sent to unicast/anycast addr, + * target lladdr option SHOULD NOT be included. + */ + if (tlladdr) { + /* + * sdl0 != NULL indicates proxy NA. If we do proxy, use + * lladdr in sdl0. If we are not proxying (sending NA for + * my address) use lladdr configured for the interface. + */ + if (sdl0 == NULL) { + if (ifp->if_carp) + mac = (*carp_macmatch6_p)(ifp, m, taddr6); + if (mac == NULL) + mac = nd6_ifptomac(ifp); + } else if (sdl0->sa_family == AF_LINK) { + struct sockaddr_dl *sdl; + sdl = (struct sockaddr_dl *)sdl0; + if (sdl->sdl_alen == ifp->if_addrlen) + mac = LLADDR(sdl); + } + } + if (tlladdr && mac) { + int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen; + struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_na + 1); + + /* roundup to 8 bytes alignment! */ + optlen = (optlen + 7) & ~7; + + m->m_pkthdr.len += optlen; + m->m_len += optlen; + icmp6len += optlen; + bzero((caddr_t)nd_opt, optlen); + nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; + nd_opt->nd_opt_len = optlen >> 3; + bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen); + } else + flags &= ~ND_NA_FLAG_OVERRIDE; + + ip6->ip6_plen = htons((u_short)icmp6len); + nd_na->nd_na_flags_reserved = flags; + nd_na->nd_na_cksum = 0; + nd_na->nd_na_cksum = + in6_cksum(m, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), icmp6len); + + ip6_output(m, NULL, &ro, 0, &im6o, NULL, NULL); + icmp6_ifstat_inc(ifp, ifs6_out_msg); + icmp6_ifstat_inc(ifp, ifs6_out_neighboradvert); + ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_ADVERT]); + + if (ro.ro_rt) { /* we don't cache this route. */ + RTFREE(ro.ro_rt); + } + return; + + bad: + if (ro.ro_rt) { + RTFREE(ro.ro_rt); + } + m_freem(m); + return; +} + +caddr_t +nd6_ifptomac(struct ifnet *ifp) +{ + switch (ifp->if_type) { + case IFT_ARCNET: + case IFT_ETHER: + case IFT_FDDI: + case IFT_IEEE1394: +#ifdef IFT_L2VLAN + case IFT_L2VLAN: +#endif +#ifdef IFT_IEEE80211 + case IFT_IEEE80211: +#endif +#ifdef IFT_CARP + case IFT_CARP: +#endif + case IFT_BRIDGE: + case IFT_ISO88025: + return IF_LLADDR(ifp); + default: + return NULL; + } +} + +struct dadq { + TAILQ_ENTRY(dadq) dad_list; + struct ifaddr *dad_ifa; + int dad_count; /* max NS to send */ + int dad_ns_tcount; /* # of trials to send NS */ + int dad_ns_ocount; /* NS sent so far */ + int dad_ns_icount; + int dad_na_icount; + struct callout dad_timer_ch; + struct vnet *dad_vnet; +}; + +static VNET_DEFINE(TAILQ_HEAD(, dadq), dadq); +VNET_DEFINE(int, dad_init) = 0; +#define V_dadq VNET(dadq) +#define V_dad_init VNET(dad_init) + +static struct dadq * +nd6_dad_find(struct ifaddr *ifa) +{ + struct dadq *dp; + + for (dp = V_dadq.tqh_first; dp; dp = dp->dad_list.tqe_next) { + if (dp->dad_ifa == ifa) + return dp; + } + return NULL; +} + +static void +nd6_dad_starttimer(struct dadq *dp, int ticks) +{ + + callout_reset(&dp->dad_timer_ch, ticks, + (void (*)(void *))nd6_dad_timer, (void *)dp); +} + +static void +nd6_dad_stoptimer(struct dadq *dp) +{ + + callout_stop(&dp->dad_timer_ch); +} + +/* + * Start Duplicate Address Detection (DAD) for specified interface address. + */ +void +nd6_dad_start(struct ifaddr *ifa, int delay) +{ + struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; + struct dadq *dp; + char ip6buf[INET6_ADDRSTRLEN]; + + if (!V_dad_init) { + TAILQ_INIT(&V_dadq); + V_dad_init++; + } + + /* + * If we don't need DAD, don't do it. + * There are several cases: + * - DAD is disabled (ip6_dad_count == 0) + * - the interface address is anycast + */ + if (!(ia->ia6_flags & IN6_IFF_TENTATIVE)) { + log(LOG_DEBUG, + "nd6_dad_start: called with non-tentative address " + "%s(%s)\n", + ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), + ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); + return; + } + if (ia->ia6_flags & IN6_IFF_ANYCAST) { + ia->ia6_flags &= ~IN6_IFF_TENTATIVE; + return; + } + if (!V_ip6_dad_count) { + ia->ia6_flags &= ~IN6_IFF_TENTATIVE; + return; + } + if (ifa->ifa_ifp == NULL) + panic("nd6_dad_start: ifa->ifa_ifp == NULL"); + if (!(ifa->ifa_ifp->if_flags & IFF_UP)) { + return; + } + if (nd6_dad_find(ifa) != NULL) { + /* DAD already in progress */ + return; + } + + dp = malloc(sizeof(*dp), M_IP6NDP, M_NOWAIT); + if (dp == NULL) { + log(LOG_ERR, "nd6_dad_start: memory allocation failed for " + "%s(%s)\n", + ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), + ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); + return; + } + bzero(dp, sizeof(*dp)); + callout_init(&dp->dad_timer_ch, 0); +#ifdef VIMAGE + dp->dad_vnet = curvnet; +#endif + TAILQ_INSERT_TAIL(&V_dadq, (struct dadq *)dp, dad_list); + + nd6log((LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp), + ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); + + /* + * Send NS packet for DAD, ip6_dad_count times. + * Note that we must delay the first transmission, if this is the + * first packet to be sent from the interface after interface + * (re)initialization. + */ + dp->dad_ifa = ifa; + ifa_ref(ifa); /* just for safety */ + dp->dad_count = V_ip6_dad_count; + dp->dad_ns_icount = dp->dad_na_icount = 0; + dp->dad_ns_ocount = dp->dad_ns_tcount = 0; + if (delay == 0) { + nd6_dad_ns_output(dp, ifa); + nd6_dad_starttimer(dp, + (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000); + } else { + nd6_dad_starttimer(dp, delay); + } +} + +/* + * terminate DAD unconditionally. used for address removals. + */ +void +nd6_dad_stop(struct ifaddr *ifa) +{ + struct dadq *dp; + + if (!V_dad_init) + return; + dp = nd6_dad_find(ifa); + if (!dp) { + /* DAD wasn't started yet */ + return; + } + + nd6_dad_stoptimer(dp); + + TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list); + free(dp, M_IP6NDP); + dp = NULL; + ifa_free(ifa); +} + +static void +nd6_dad_timer(struct dadq *dp) +{ + CURVNET_SET(dp->dad_vnet); + int s; + struct ifaddr *ifa = dp->dad_ifa; + struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; + char ip6buf[INET6_ADDRSTRLEN]; + + s = splnet(); /* XXX */ + + /* Sanity check */ + if (ia == NULL) { + log(LOG_ERR, "nd6_dad_timer: called with null parameter\n"); + goto done; + } + if (ia->ia6_flags & IN6_IFF_DUPLICATED) { + log(LOG_ERR, "nd6_dad_timer: called with duplicated address " + "%s(%s)\n", + ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), + ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); + goto done; + } + if ((ia->ia6_flags & IN6_IFF_TENTATIVE) == 0) { + log(LOG_ERR, "nd6_dad_timer: called with non-tentative address " + "%s(%s)\n", + ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), + ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); + goto done; + } + + /* timeouted with IFF_{RUNNING,UP} check */ + if (dp->dad_ns_tcount > V_dad_maxtry) { + nd6log((LOG_INFO, "%s: could not run DAD, driver problem?\n", + if_name(ifa->ifa_ifp))); + + TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list); + free(dp, M_IP6NDP); + dp = NULL; + ifa_free(ifa); + goto done; + } + + /* Need more checks? */ + if (dp->dad_ns_ocount < dp->dad_count) { + /* + * We have more NS to go. Send NS packet for DAD. + */ + nd6_dad_ns_output(dp, ifa); + nd6_dad_starttimer(dp, + (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000); + } else { + /* + * We have transmitted sufficient number of DAD packets. + * See what we've got. + */ + int duplicate; + + duplicate = 0; + + if (dp->dad_na_icount) { + /* + * the check is in nd6_dad_na_input(), + * but just in case + */ + duplicate++; + } + + if (dp->dad_ns_icount) { + /* We've seen NS, means DAD has failed. */ + duplicate++; + } + + if (duplicate) { + /* (*dp) will be freed in nd6_dad_duplicated() */ + dp = NULL; + nd6_dad_duplicated(ifa); + } else { + /* + * We are done with DAD. No NA came, no NS came. + * No duplicate address found. + */ + ia->ia6_flags &= ~IN6_IFF_TENTATIVE; + + nd6log((LOG_DEBUG, + "%s: DAD complete for %s - no duplicates found\n", + if_name(ifa->ifa_ifp), + ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); + + TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list); + free(dp, M_IP6NDP); + dp = NULL; + ifa_free(ifa); + } + } + +done: + splx(s); + CURVNET_RESTORE(); +} + +void +nd6_dad_duplicated(struct ifaddr *ifa) +{ + struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; + struct ifnet *ifp; + struct dadq *dp; + char ip6buf[INET6_ADDRSTRLEN]; + + dp = nd6_dad_find(ifa); + if (dp == NULL) { + log(LOG_ERR, "nd6_dad_duplicated: DAD structure not found\n"); + return; + } + + log(LOG_ERR, "%s: DAD detected duplicate IPv6 address %s: " + "NS in/out=%d/%d, NA in=%d\n", + if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), + dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_na_icount); + + ia->ia6_flags &= ~IN6_IFF_TENTATIVE; + ia->ia6_flags |= IN6_IFF_DUPLICATED; + + /* We are done with DAD, with duplicate address found. (failure) */ + nd6_dad_stoptimer(dp); + + ifp = ifa->ifa_ifp; + log(LOG_ERR, "%s: DAD complete for %s - duplicate found\n", + if_name(ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)); + log(LOG_ERR, "%s: manual intervention required\n", + if_name(ifp)); + + /* + * If the address is a link-local address formed from an interface + * identifier based on the hardware address which is supposed to be + * uniquely assigned (e.g., EUI-64 for an Ethernet interface), IP + * operation on the interface SHOULD be disabled. + * [rfc2462bis-03 Section 5.4.5] + */ + if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) { + struct in6_addr in6; + + /* + * To avoid over-reaction, we only apply this logic when we are + * very sure that hardware addresses are supposed to be unique. + */ + switch (ifp->if_type) { + case IFT_ETHER: + case IFT_FDDI: + case IFT_ATM: + case IFT_IEEE1394: +#ifdef IFT_IEEE80211 + case IFT_IEEE80211: +#endif + in6 = ia->ia_addr.sin6_addr; + if (in6_get_hw_ifid(ifp, &in6) == 0 && + IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &in6)) { + ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED; + log(LOG_ERR, "%s: possible hardware address " + "duplication detected, disable IPv6\n", + if_name(ifp)); + } + break; + } + } + + TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list); + free(dp, M_IP6NDP); + dp = NULL; + ifa_free(ifa); +} + +static void +nd6_dad_ns_output(struct dadq *dp, struct ifaddr *ifa) +{ + struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; + struct ifnet *ifp = ifa->ifa_ifp; + + dp->dad_ns_tcount++; + if ((ifp->if_flags & IFF_UP) == 0) { + return; + } + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { + return; + } + + dp->dad_ns_ocount++; + nd6_ns_output(ifp, NULL, &ia->ia_addr.sin6_addr, NULL, 1); +} + +static void +nd6_dad_ns_input(struct ifaddr *ifa) +{ + struct in6_ifaddr *ia; + struct ifnet *ifp; + const struct in6_addr *taddr6; + struct dadq *dp; + int duplicate; + + if (ifa == NULL) + panic("ifa == NULL in nd6_dad_ns_input"); + + ia = (struct in6_ifaddr *)ifa; + ifp = ifa->ifa_ifp; + taddr6 = &ia->ia_addr.sin6_addr; + duplicate = 0; + dp = nd6_dad_find(ifa); + + /* Quickhack - completely ignore DAD NS packets */ + if (V_dad_ignore_ns) { + char ip6buf[INET6_ADDRSTRLEN]; + nd6log((LOG_INFO, + "nd6_dad_ns_input: ignoring DAD NS packet for " + "address %s(%s)\n", ip6_sprintf(ip6buf, taddr6), + if_name(ifa->ifa_ifp))); + return; + } + + /* + * if I'm yet to start DAD, someone else started using this address + * first. I have a duplicate and you win. + */ + if (dp == NULL || dp->dad_ns_ocount == 0) + duplicate++; + + /* XXX more checks for loopback situation - see nd6_dad_timer too */ + + if (duplicate) { + dp = NULL; /* will be freed in nd6_dad_duplicated() */ + nd6_dad_duplicated(ifa); + } else { + /* + * not sure if I got a duplicate. + * increment ns count and see what happens. + */ + if (dp) + dp->dad_ns_icount++; + } +} + +static void +nd6_dad_na_input(struct ifaddr *ifa) +{ + struct dadq *dp; + + if (ifa == NULL) + panic("ifa == NULL in nd6_dad_na_input"); + + dp = nd6_dad_find(ifa); + if (dp) + dp->dad_na_icount++; + + /* remove the address. */ + nd6_dad_duplicated(ifa); +} diff --git a/freebsd/sys/netinet6/nd6_rtr.c b/freebsd/sys/netinet6/nd6_rtr.c new file mode 100644 index 00000000..97aa15fb --- /dev/null +++ b/freebsd/sys/netinet6/nd6_rtr.c @@ -0,0 +1,2162 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: nd6_rtr.c,v 1.111 2001/04/27 01:37:15 jinmei Exp $ + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/local/opt_inet.h> +#include <freebsd/local/opt_inet6.h> + +#include <freebsd/sys/param.h> +#include <freebsd/sys/systm.h> +#include <freebsd/sys/malloc.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/sockio.h> +#include <freebsd/sys/time.h> +#include <freebsd/sys/kernel.h> +#include <freebsd/sys/lock.h> +#include <freebsd/sys/errno.h> +#include <freebsd/sys/rwlock.h> +#include <freebsd/sys/syslog.h> +#include <freebsd/sys/queue.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/if_types.h> +#include <freebsd/net/if_dl.h> +#include <freebsd/net/route.h> +#include <freebsd/net/radix.h> +#include <freebsd/net/vnet.h> + +#include <freebsd/netinet/in.h> +#include <freebsd/net/if_llatbl.h> +#include <freebsd/netinet6/in6_var.h> +#include <freebsd/netinet6/in6_ifattach.h> +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet6/nd6.h> +#include <freebsd/netinet/icmp6.h> +#include <freebsd/netinet6/scope6_var.h> + +static int rtpref(struct nd_defrouter *); +static struct nd_defrouter *defrtrlist_update(struct nd_defrouter *); +static int prelist_update __P((struct nd_prefixctl *, struct nd_defrouter *, + struct mbuf *, int)); +static struct in6_ifaddr *in6_ifadd(struct nd_prefixctl *, int); +static struct nd_pfxrouter *pfxrtr_lookup __P((struct nd_prefix *, + struct nd_defrouter *)); +static void pfxrtr_add(struct nd_prefix *, struct nd_defrouter *); +static void pfxrtr_del(struct nd_pfxrouter *); +static struct nd_pfxrouter *find_pfxlist_reachable_router +(struct nd_prefix *); +static void defrouter_delreq(struct nd_defrouter *); +static void nd6_rtmsg(int, struct rtentry *); + +static int in6_init_prefix_ltimes(struct nd_prefix *); +static void in6_init_address_ltimes __P((struct nd_prefix *, + struct in6_addrlifetime *)); + +static int rt6_deleteroute(struct radix_node *, void *); + +VNET_DECLARE(int, nd6_recalc_reachtm_interval); +#define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval) + +static VNET_DEFINE(struct ifnet *, nd6_defifp); +VNET_DEFINE(int, nd6_defifindex); +#define V_nd6_defifp VNET(nd6_defifp) + +VNET_DEFINE(int, ip6_use_tempaddr) = 0; + +VNET_DEFINE(int, ip6_desync_factor); +VNET_DEFINE(u_int32_t, ip6_temp_preferred_lifetime) = DEF_TEMP_PREFERRED_LIFETIME; +VNET_DEFINE(u_int32_t, ip6_temp_valid_lifetime) = DEF_TEMP_VALID_LIFETIME; + +VNET_DEFINE(int, ip6_temp_regen_advance) = TEMPADDR_REGEN_ADVANCE; + +/* RTPREF_MEDIUM has to be 0! */ +#define RTPREF_HIGH 1 +#define RTPREF_MEDIUM 0 +#define RTPREF_LOW (-1) +#define RTPREF_RESERVED (-2) +#define RTPREF_INVALID (-3) /* internal */ + +/* + * Receive Router Solicitation Message - just for routers. + * Router solicitation/advertisement is mostly managed by userland program + * (rtadvd) so here we have no function like nd6_ra_output(). + * + * Based on RFC 2461 + */ +void +nd6_rs_input(struct mbuf *m, int off, int icmp6len) +{ + struct ifnet *ifp = m->m_pkthdr.rcvif; + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + struct nd_router_solicit *nd_rs; + struct in6_addr saddr6 = ip6->ip6_src; + char *lladdr = NULL; + int lladdrlen = 0; + union nd_opts ndopts; + char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; + + /* If I'm not a router, ignore it. */ + if (V_ip6_accept_rtadv != 0 || V_ip6_forwarding != 1) + goto freeit; + + /* Sanity checks */ + if (ip6->ip6_hlim != 255) { + nd6log((LOG_ERR, + "nd6_rs_input: invalid hlim (%d) from %s to %s on %s\n", + ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src), + ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp))); + goto bad; + } + + /* + * Don't update the neighbor cache, if src = ::. + * This indicates that the src has no IP address assigned yet. + */ + if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) + goto freeit; + +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, off, icmp6len,); + nd_rs = (struct nd_router_solicit *)((caddr_t)ip6 + off); +#else + IP6_EXTHDR_GET(nd_rs, struct nd_router_solicit *, m, off, icmp6len); + if (nd_rs == NULL) { + ICMP6STAT_INC(icp6s_tooshort); + return; + } +#endif + + icmp6len -= sizeof(*nd_rs); + nd6_option_init(nd_rs + 1, icmp6len, &ndopts); + if (nd6_options(&ndopts) < 0) { + nd6log((LOG_INFO, + "nd6_rs_input: invalid ND option, ignored\n")); + /* nd6_options have incremented stats */ + goto freeit; + } + + if (ndopts.nd_opts_src_lladdr) { + lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1); + lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3; + } + + if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { + nd6log((LOG_INFO, + "nd6_rs_input: lladdrlen mismatch for %s " + "(if %d, RS packet %d)\n", + ip6_sprintf(ip6bufs, &saddr6), + ifp->if_addrlen, lladdrlen - 2)); + goto bad; + } + + nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_SOLICIT, 0); + + freeit: + m_freem(m); + return; + + bad: + ICMP6STAT_INC(icp6s_badrs); + m_freem(m); +} + +/* + * Receive Router Advertisement Message. + * + * Based on RFC 2461 + * TODO: on-link bit on prefix information + * TODO: ND_RA_FLAG_{OTHER,MANAGED} processing + */ +void +nd6_ra_input(struct mbuf *m, int off, int icmp6len) +{ + struct ifnet *ifp = m->m_pkthdr.rcvif; + struct nd_ifinfo *ndi = ND_IFINFO(ifp); + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + struct nd_router_advert *nd_ra; + struct in6_addr saddr6 = ip6->ip6_src; + int mcast = 0; + union nd_opts ndopts; + struct nd_defrouter *dr; + char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; + + /* + * We only accept RAs only when + * the system-wide variable allows the acceptance, and + * per-interface variable allows RAs on the receiving interface. + */ + if (V_ip6_accept_rtadv == 0) + goto freeit; + if (!(ndi->flags & ND6_IFF_ACCEPT_RTADV)) + goto freeit; + + if (ip6->ip6_hlim != 255) { + nd6log((LOG_ERR, + "nd6_ra_input: invalid hlim (%d) from %s to %s on %s\n", + ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src), + ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp))); + goto bad; + } + + if (!IN6_IS_ADDR_LINKLOCAL(&saddr6)) { + nd6log((LOG_ERR, + "nd6_ra_input: src %s is not link-local\n", + ip6_sprintf(ip6bufs, &saddr6))); + goto bad; + } + +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, off, icmp6len,); + nd_ra = (struct nd_router_advert *)((caddr_t)ip6 + off); +#else + IP6_EXTHDR_GET(nd_ra, struct nd_router_advert *, m, off, icmp6len); + if (nd_ra == NULL) { + ICMP6STAT_INC(icp6s_tooshort); + return; + } +#endif + + icmp6len -= sizeof(*nd_ra); + nd6_option_init(nd_ra + 1, icmp6len, &ndopts); + if (nd6_options(&ndopts) < 0) { + nd6log((LOG_INFO, + "nd6_ra_input: invalid ND option, ignored\n")); + /* nd6_options have incremented stats */ + goto freeit; + } + + { + struct nd_defrouter dr0; + u_int32_t advreachable = nd_ra->nd_ra_reachable; + + /* remember if this is a multicasted advertisement */ + if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) + mcast = 1; + + bzero(&dr0, sizeof(dr0)); + dr0.rtaddr = saddr6; + dr0.flags = nd_ra->nd_ra_flags_reserved; + dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime); + dr0.expire = time_second + dr0.rtlifetime; + dr0.ifp = ifp; + /* unspecified or not? (RFC 2461 6.3.4) */ + if (advreachable) { + advreachable = ntohl(advreachable); + if (advreachable <= MAX_REACHABLE_TIME && + ndi->basereachable != advreachable) { + ndi->basereachable = advreachable; + ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable); + ndi->recalctm = V_nd6_recalc_reachtm_interval; /* reset */ + } + } + if (nd_ra->nd_ra_retransmit) + ndi->retrans = ntohl(nd_ra->nd_ra_retransmit); + if (nd_ra->nd_ra_curhoplimit) + ndi->chlim = nd_ra->nd_ra_curhoplimit; + dr = defrtrlist_update(&dr0); + } + + /* + * prefix + */ + if (ndopts.nd_opts_pi) { + struct nd_opt_hdr *pt; + struct nd_opt_prefix_info *pi = NULL; + struct nd_prefixctl pr; + + for (pt = (struct nd_opt_hdr *)ndopts.nd_opts_pi; + pt <= (struct nd_opt_hdr *)ndopts.nd_opts_pi_end; + pt = (struct nd_opt_hdr *)((caddr_t)pt + + (pt->nd_opt_len << 3))) { + if (pt->nd_opt_type != ND_OPT_PREFIX_INFORMATION) + continue; + pi = (struct nd_opt_prefix_info *)pt; + + if (pi->nd_opt_pi_len != 4) { + nd6log((LOG_INFO, + "nd6_ra_input: invalid option " + "len %d for prefix information option, " + "ignored\n", pi->nd_opt_pi_len)); + continue; + } + + if (128 < pi->nd_opt_pi_prefix_len) { + nd6log((LOG_INFO, + "nd6_ra_input: invalid prefix " + "len %d for prefix information option, " + "ignored\n", pi->nd_opt_pi_prefix_len)); + continue; + } + + if (IN6_IS_ADDR_MULTICAST(&pi->nd_opt_pi_prefix) + || IN6_IS_ADDR_LINKLOCAL(&pi->nd_opt_pi_prefix)) { + nd6log((LOG_INFO, + "nd6_ra_input: invalid prefix " + "%s, ignored\n", + ip6_sprintf(ip6bufs, + &pi->nd_opt_pi_prefix))); + continue; + } + + bzero(&pr, sizeof(pr)); + pr.ndpr_prefix.sin6_family = AF_INET6; + pr.ndpr_prefix.sin6_len = sizeof(pr.ndpr_prefix); + pr.ndpr_prefix.sin6_addr = pi->nd_opt_pi_prefix; + pr.ndpr_ifp = (struct ifnet *)m->m_pkthdr.rcvif; + + pr.ndpr_raf_onlink = (pi->nd_opt_pi_flags_reserved & + ND_OPT_PI_FLAG_ONLINK) ? 1 : 0; + pr.ndpr_raf_auto = (pi->nd_opt_pi_flags_reserved & + ND_OPT_PI_FLAG_AUTO) ? 1 : 0; + pr.ndpr_plen = pi->nd_opt_pi_prefix_len; + pr.ndpr_vltime = ntohl(pi->nd_opt_pi_valid_time); + pr.ndpr_pltime = ntohl(pi->nd_opt_pi_preferred_time); + (void)prelist_update(&pr, dr, m, mcast); + } + } + + /* + * MTU + */ + if (ndopts.nd_opts_mtu && ndopts.nd_opts_mtu->nd_opt_mtu_len == 1) { + u_long mtu; + u_long maxmtu; + + mtu = (u_long)ntohl(ndopts.nd_opts_mtu->nd_opt_mtu_mtu); + + /* lower bound */ + if (mtu < IPV6_MMTU) { + nd6log((LOG_INFO, "nd6_ra_input: bogus mtu option " + "mtu=%lu sent from %s, ignoring\n", + mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src))); + goto skip; + } + + /* upper bound */ + maxmtu = (ndi->maxmtu && ndi->maxmtu < ifp->if_mtu) + ? ndi->maxmtu : ifp->if_mtu; + if (mtu <= maxmtu) { + int change = (ndi->linkmtu != mtu); + + ndi->linkmtu = mtu; + if (change) /* in6_maxmtu may change */ + in6_setmaxmtu(); + } else { + nd6log((LOG_INFO, "nd6_ra_input: bogus mtu " + "mtu=%lu sent from %s; " + "exceeds maxmtu %lu, ignoring\n", + mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src), maxmtu)); + } + } + + skip: + + /* + * Source link layer address + */ + { + char *lladdr = NULL; + int lladdrlen = 0; + + if (ndopts.nd_opts_src_lladdr) { + lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1); + lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3; + } + + if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { + nd6log((LOG_INFO, + "nd6_ra_input: lladdrlen mismatch for %s " + "(if %d, RA packet %d)\n", ip6_sprintf(ip6bufs, &saddr6), + ifp->if_addrlen, lladdrlen - 2)); + goto bad; + } + + nd6_cache_lladdr(ifp, &saddr6, lladdr, + lladdrlen, ND_ROUTER_ADVERT, 0); + + /* + * Installing a link-layer address might change the state of the + * router's neighbor cache, which might also affect our on-link + * detection of adveritsed prefixes. + */ + pfxlist_onlink_check(); + } + + freeit: + m_freem(m); + return; + + bad: + ICMP6STAT_INC(icp6s_badra); + m_freem(m); +} + +/* + * default router list proccessing sub routines + */ + +/* tell the change to user processes watching the routing socket. */ +static void +nd6_rtmsg(int cmd, struct rtentry *rt) +{ + struct rt_addrinfo info; + struct ifnet *ifp; + struct ifaddr *ifa; + + bzero((caddr_t)&info, sizeof(info)); + info.rti_info[RTAX_DST] = rt_key(rt); + info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; + info.rti_info[RTAX_NETMASK] = rt_mask(rt); + ifp = rt->rt_ifp; + if (ifp != NULL) { + IF_ADDR_LOCK(ifp); + ifa = TAILQ_FIRST(&ifp->if_addrhead); + info.rti_info[RTAX_IFP] = ifa->ifa_addr; + ifa_ref(ifa); + IF_ADDR_UNLOCK(ifp); + info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; + } else + ifa = NULL; + + rt_missmsg(cmd, &info, rt->rt_flags, 0); + if (ifa != NULL) + ifa_free(ifa); +} + +void +defrouter_addreq(struct nd_defrouter *new) +{ + struct sockaddr_in6 def, mask, gate; + struct rtentry *newrt = NULL; + int s; + int error; + + bzero(&def, sizeof(def)); + bzero(&mask, sizeof(mask)); + bzero(&gate, sizeof(gate)); + + def.sin6_len = mask.sin6_len = gate.sin6_len = + sizeof(struct sockaddr_in6); + def.sin6_family = gate.sin6_family = AF_INET6; + gate.sin6_addr = new->rtaddr; + + s = splnet(); + error = rtrequest(RTM_ADD, (struct sockaddr *)&def, + (struct sockaddr *)&gate, (struct sockaddr *)&mask, + RTF_GATEWAY, &newrt); + if (newrt) { + nd6_rtmsg(RTM_ADD, newrt); /* tell user process */ + RTFREE(newrt); + } + if (error == 0) + new->installed = 1; + splx(s); + return; +} + +struct nd_defrouter * +defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp) +{ + struct nd_defrouter *dr; + + for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; + dr = TAILQ_NEXT(dr, dr_entry)) { + if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr)) + return (dr); + } + + return (NULL); /* search failed */ +} + +/* + * Remove the default route for a given router. + * This is just a subroutine function for defrouter_select(), and should + * not be called from anywhere else. + */ +static void +defrouter_delreq(struct nd_defrouter *dr) +{ + struct sockaddr_in6 def, mask, gate; + struct rtentry *oldrt = NULL; + + bzero(&def, sizeof(def)); + bzero(&mask, sizeof(mask)); + bzero(&gate, sizeof(gate)); + + def.sin6_len = mask.sin6_len = gate.sin6_len = + sizeof(struct sockaddr_in6); + def.sin6_family = gate.sin6_family = AF_INET6; + gate.sin6_addr = dr->rtaddr; + + rtrequest(RTM_DELETE, (struct sockaddr *)&def, + (struct sockaddr *)&gate, + (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt); + if (oldrt) { + nd6_rtmsg(RTM_DELETE, oldrt); + RTFREE(oldrt); + } + + dr->installed = 0; +} + +/* + * remove all default routes from default router list + */ +void +defrouter_reset(void) +{ + struct nd_defrouter *dr; + + for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; + dr = TAILQ_NEXT(dr, dr_entry)) + defrouter_delreq(dr); + + /* + * XXX should we also nuke any default routers in the kernel, by + * going through them by rtalloc1()? + */ +} + +void +defrtrlist_del(struct nd_defrouter *dr) +{ + struct nd_defrouter *deldr = NULL; + struct nd_prefix *pr; + + /* + * Flush all the routing table entries that use the router + * as a next hop. + */ + if (!V_ip6_forwarding && V_ip6_accept_rtadv) /* XXX: better condition? */ + rt6_flush(&dr->rtaddr, dr->ifp); + + if (dr->installed) { + deldr = dr; + defrouter_delreq(dr); + } + TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry); + + /* + * Also delete all the pointers to the router in each prefix lists. + */ + for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) { + struct nd_pfxrouter *pfxrtr; + if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL) + pfxrtr_del(pfxrtr); + } + pfxlist_onlink_check(); + + /* + * If the router is the primary one, choose a new one. + * Note that defrouter_select() will remove the current gateway + * from the routing table. + */ + if (deldr) + defrouter_select(); + + free(dr, M_IP6NDP); +} + +/* + * Default Router Selection according to Section 6.3.6 of RFC 2461 and + * draft-ietf-ipngwg-router-selection: + * 1) Routers that are reachable or probably reachable should be preferred. + * If we have more than one (probably) reachable router, prefer ones + * with the highest router preference. + * 2) When no routers on the list are known to be reachable or + * probably reachable, routers SHOULD be selected in a round-robin + * fashion, regardless of router preference values. + * 3) If the Default Router List is empty, assume that all + * destinations are on-link. + * + * We assume nd_defrouter is sorted by router preference value. + * Since the code below covers both with and without router preference cases, + * we do not need to classify the cases by ifdef. + * + * At this moment, we do not try to install more than one default router, + * even when the multipath routing is available, because we're not sure about + * the benefits for stub hosts comparing to the risk of making the code + * complicated and the possibility of introducing bugs. + */ +void +defrouter_select(void) +{ + int s = splnet(); + struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL; + struct llentry *ln = NULL; + + /* + * This function should be called only when acting as an autoconfigured + * host. Although the remaining part of this function is not effective + * if the node is not an autoconfigured host, we explicitly exclude + * such cases here for safety. + */ + if (V_ip6_forwarding || !V_ip6_accept_rtadv) { + nd6log((LOG_WARNING, + "defrouter_select: called unexpectedly (forwarding=%d, " + "accept_rtadv=%d)\n", V_ip6_forwarding, V_ip6_accept_rtadv)); + splx(s); + return; + } + + /* + * Let's handle easy case (3) first: + * If default router list is empty, there's nothing to be done. + */ + if (!TAILQ_FIRST(&V_nd_defrouter)) { + splx(s); + return; + } + + /* + * Search for a (probably) reachable router from the list. + * We just pick up the first reachable one (if any), assuming that + * the ordering rule of the list described in defrtrlist_update(). + */ + for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; + dr = TAILQ_NEXT(dr, dr_entry)) { + IF_AFDATA_LOCK(dr->ifp); + if (selected_dr == NULL && + (ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) && + ND6_IS_LLINFO_PROBREACH(ln)) { + selected_dr = dr; + } + IF_AFDATA_UNLOCK(dr->ifp); + if (ln != NULL) { + LLE_RUNLOCK(ln); + ln = NULL; + } + + if (dr->installed && installed_dr == NULL) + installed_dr = dr; + else if (dr->installed && installed_dr) { + /* this should not happen. warn for diagnosis. */ + log(LOG_ERR, "defrouter_select: more than one router" + " is installed\n"); + } + } + /* + * If none of the default routers was found to be reachable, + * round-robin the list regardless of preference. + * Otherwise, if we have an installed router, check if the selected + * (reachable) router should really be preferred to the installed one. + * We only prefer the new router when the old one is not reachable + * or when the new one has a really higher preference value. + */ + if (selected_dr == NULL) { + if (installed_dr == NULL || !TAILQ_NEXT(installed_dr, dr_entry)) + selected_dr = TAILQ_FIRST(&V_nd_defrouter); + else + selected_dr = TAILQ_NEXT(installed_dr, dr_entry); + } else if (installed_dr) { + IF_AFDATA_LOCK(installed_dr->ifp); + if ((ln = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) && + ND6_IS_LLINFO_PROBREACH(ln) && + rtpref(selected_dr) <= rtpref(installed_dr)) { + selected_dr = installed_dr; + } + IF_AFDATA_UNLOCK(installed_dr->ifp); + if (ln != NULL) + LLE_RUNLOCK(ln); + } + + /* + * If the selected router is different than the installed one, + * remove the installed router and install the selected one. + * Note that the selected router is never NULL here. + */ + if (installed_dr != selected_dr) { + if (installed_dr) + defrouter_delreq(installed_dr); + defrouter_addreq(selected_dr); + } + + splx(s); + return; +} + +/* + * for default router selection + * regards router-preference field as a 2-bit signed integer + */ +static int +rtpref(struct nd_defrouter *dr) +{ + switch (dr->flags & ND_RA_FLAG_RTPREF_MASK) { + case ND_RA_FLAG_RTPREF_HIGH: + return (RTPREF_HIGH); + case ND_RA_FLAG_RTPREF_MEDIUM: + case ND_RA_FLAG_RTPREF_RSV: + return (RTPREF_MEDIUM); + case ND_RA_FLAG_RTPREF_LOW: + return (RTPREF_LOW); + default: + /* + * This case should never happen. If it did, it would mean a + * serious bug of kernel internal. We thus always bark here. + * Or, can we even panic? + */ + log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->flags); + return (RTPREF_INVALID); + } + /* NOTREACHED */ +} + +static struct nd_defrouter * +defrtrlist_update(struct nd_defrouter *new) +{ + struct nd_defrouter *dr, *n; + int s = splnet(); + + if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) { + /* entry exists */ + if (new->rtlifetime == 0) { + defrtrlist_del(dr); + dr = NULL; + } else { + int oldpref = rtpref(dr); + + /* override */ + dr->flags = new->flags; /* xxx flag check */ + dr->rtlifetime = new->rtlifetime; + dr->expire = new->expire; + + /* + * If the preference does not change, there's no need + * to sort the entries. + */ + if (rtpref(new) == oldpref) { + splx(s); + return (dr); + } + + /* + * preferred router may be changed, so relocate + * this router. + * XXX: calling TAILQ_REMOVE directly is a bad manner. + * However, since defrtrlist_del() has many side + * effects, we intentionally do so here. + * defrouter_select() below will handle routing + * changes later. + */ + TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry); + n = dr; + goto insert; + } + splx(s); + return (dr); + } + + /* entry does not exist */ + if (new->rtlifetime == 0) { + splx(s); + return (NULL); + } + + n = (struct nd_defrouter *)malloc(sizeof(*n), M_IP6NDP, M_NOWAIT); + if (n == NULL) { + splx(s); + return (NULL); + } + bzero(n, sizeof(*n)); + *n = *new; + +insert: + /* + * Insert the new router in the Default Router List; + * The Default Router List should be in the descending order + * of router-preferece. Routers with the same preference are + * sorted in the arriving time order. + */ + + /* insert at the end of the group */ + for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; + dr = TAILQ_NEXT(dr, dr_entry)) { + if (rtpref(n) > rtpref(dr)) + break; + } + if (dr) + TAILQ_INSERT_BEFORE(dr, n, dr_entry); + else + TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry); + + defrouter_select(); + + splx(s); + + return (n); +} + +static struct nd_pfxrouter * +pfxrtr_lookup(struct nd_prefix *pr, struct nd_defrouter *dr) +{ + struct nd_pfxrouter *search; + + for (search = pr->ndpr_advrtrs.lh_first; search; search = search->pfr_next) { + if (search->router == dr) + break; + } + + return (search); +} + +static void +pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr) +{ + struct nd_pfxrouter *new; + + new = (struct nd_pfxrouter *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT); + if (new == NULL) + return; + bzero(new, sizeof(*new)); + new->router = dr; + + LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry); + + pfxlist_onlink_check(); +} + +static void +pfxrtr_del(struct nd_pfxrouter *pfr) +{ + LIST_REMOVE(pfr, pfr_entry); + free(pfr, M_IP6NDP); +} + +struct nd_prefix * +nd6_prefix_lookup(struct nd_prefixctl *key) +{ + struct nd_prefix *search; + + for (search = V_nd_prefix.lh_first; + search; search = search->ndpr_next) { + if (key->ndpr_ifp == search->ndpr_ifp && + key->ndpr_plen == search->ndpr_plen && + in6_are_prefix_equal(&key->ndpr_prefix.sin6_addr, + &search->ndpr_prefix.sin6_addr, key->ndpr_plen)) { + break; + } + } + + return (search); +} + +int +nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr, + struct nd_prefix **newp) +{ + struct nd_prefix *new = NULL; + int error = 0; + int i, s; + char ip6buf[INET6_ADDRSTRLEN]; + + new = (struct nd_prefix *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT); + if (new == NULL) + return(ENOMEM); + bzero(new, sizeof(*new)); + new->ndpr_ifp = pr->ndpr_ifp; + new->ndpr_prefix = pr->ndpr_prefix; + new->ndpr_plen = pr->ndpr_plen; + new->ndpr_vltime = pr->ndpr_vltime; + new->ndpr_pltime = pr->ndpr_pltime; + new->ndpr_flags = pr->ndpr_flags; + if ((error = in6_init_prefix_ltimes(new)) != 0) { + free(new, M_IP6NDP); + return(error); + } + new->ndpr_lastupdate = time_second; + if (newp != NULL) + *newp = new; + + /* initialization */ + LIST_INIT(&new->ndpr_advrtrs); + in6_prefixlen2mask(&new->ndpr_mask, new->ndpr_plen); + /* make prefix in the canonical form */ + for (i = 0; i < 4; i++) + new->ndpr_prefix.sin6_addr.s6_addr32[i] &= + new->ndpr_mask.s6_addr32[i]; + + s = splnet(); + /* link ndpr_entry to nd_prefix list */ + LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry); + splx(s); + + /* ND_OPT_PI_FLAG_ONLINK processing */ + if (new->ndpr_raf_onlink) { + int e; + + if ((e = nd6_prefix_onlink(new)) != 0) { + nd6log((LOG_ERR, "nd6_prelist_add: failed to make " + "the prefix %s/%d on-link on %s (errno=%d)\n", + ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), + pr->ndpr_plen, if_name(pr->ndpr_ifp), e)); + /* proceed anyway. XXX: is it correct? */ + } + } + + if (dr) + pfxrtr_add(new, dr); + + return 0; +} + +void +prelist_remove(struct nd_prefix *pr) +{ + struct nd_pfxrouter *pfr, *next; + int e, s; + char ip6buf[INET6_ADDRSTRLEN]; + + /* make sure to invalidate the prefix until it is really freed. */ + pr->ndpr_vltime = 0; + pr->ndpr_pltime = 0; + + /* + * Though these flags are now meaningless, we'd rather keep the value + * of pr->ndpr_raf_onlink and pr->ndpr_raf_auto not to confuse users + * when executing "ndp -p". + */ + + if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0 && + (e = nd6_prefix_offlink(pr)) != 0) { + nd6log((LOG_ERR, "prelist_remove: failed to make %s/%d offlink " + "on %s, errno=%d\n", + ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), + pr->ndpr_plen, if_name(pr->ndpr_ifp), e)); + /* what should we do? */ + } + + if (pr->ndpr_refcnt > 0) + return; /* notice here? */ + + s = splnet(); + + /* unlink ndpr_entry from nd_prefix list */ + LIST_REMOVE(pr, ndpr_entry); + + /* free list of routers that adversed the prefix */ + for (pfr = pr->ndpr_advrtrs.lh_first; pfr; pfr = next) { + next = pfr->pfr_next; + + free(pfr, M_IP6NDP); + } + splx(s); + + free(pr, M_IP6NDP); + + pfxlist_onlink_check(); +} + +/* + * dr - may be NULL + */ + +static int +prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr, + struct mbuf *m, int mcast) +{ + struct in6_ifaddr *ia6 = NULL, *ia6_match = NULL; + struct ifaddr *ifa; + struct ifnet *ifp = new->ndpr_ifp; + struct nd_prefix *pr; + int s = splnet(); + int error = 0; + int newprefix = 0; + int auth; + struct in6_addrlifetime lt6_tmp; + char ip6buf[INET6_ADDRSTRLEN]; + + auth = 0; + if (m) { + /* + * Authenticity for NA consists authentication for + * both IP header and IP datagrams, doesn't it ? + */ +#if defined(M_AUTHIPHDR) && defined(M_AUTHIPDGM) + auth = ((m->m_flags & M_AUTHIPHDR) && + (m->m_flags & M_AUTHIPDGM)); +#endif + } + + if ((pr = nd6_prefix_lookup(new)) != NULL) { + /* + * nd6_prefix_lookup() ensures that pr and new have the same + * prefix on a same interface. + */ + + /* + * Update prefix information. Note that the on-link (L) bit + * and the autonomous (A) bit should NOT be changed from 1 + * to 0. + */ + if (new->ndpr_raf_onlink == 1) + pr->ndpr_raf_onlink = 1; + if (new->ndpr_raf_auto == 1) + pr->ndpr_raf_auto = 1; + if (new->ndpr_raf_onlink) { + pr->ndpr_vltime = new->ndpr_vltime; + pr->ndpr_pltime = new->ndpr_pltime; + (void)in6_init_prefix_ltimes(pr); /* XXX error case? */ + pr->ndpr_lastupdate = time_second; + } + + if (new->ndpr_raf_onlink && + (pr->ndpr_stateflags & NDPRF_ONLINK) == 0) { + int e; + + if ((e = nd6_prefix_onlink(pr)) != 0) { + nd6log((LOG_ERR, + "prelist_update: failed to make " + "the prefix %s/%d on-link on %s " + "(errno=%d)\n", + ip6_sprintf(ip6buf, + &pr->ndpr_prefix.sin6_addr), + pr->ndpr_plen, if_name(pr->ndpr_ifp), e)); + /* proceed anyway. XXX: is it correct? */ + } + } + + if (dr && pfxrtr_lookup(pr, dr) == NULL) + pfxrtr_add(pr, dr); + } else { + struct nd_prefix *newpr = NULL; + + newprefix = 1; + + if (new->ndpr_vltime == 0) + goto end; + if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0) + goto end; + + error = nd6_prelist_add(new, dr, &newpr); + if (error != 0 || newpr == NULL) { + nd6log((LOG_NOTICE, "prelist_update: " + "nd6_prelist_add failed for %s/%d on %s " + "errno=%d, returnpr=%p\n", + ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr), + new->ndpr_plen, if_name(new->ndpr_ifp), + error, newpr)); + goto end; /* we should just give up in this case. */ + } + + /* + * XXX: from the ND point of view, we can ignore a prefix + * with the on-link bit being zero. However, we need a + * prefix structure for references from autoconfigured + * addresses. Thus, we explicitly make sure that the prefix + * itself expires now. + */ + if (newpr->ndpr_raf_onlink == 0) { + newpr->ndpr_vltime = 0; + newpr->ndpr_pltime = 0; + in6_init_prefix_ltimes(newpr); + } + + pr = newpr; + } + + /* + * Address autoconfiguration based on Section 5.5.3 of RFC 2462. + * Note that pr must be non NULL at this point. + */ + + /* 5.5.3 (a). Ignore the prefix without the A bit set. */ + if (!new->ndpr_raf_auto) + goto end; + + /* + * 5.5.3 (b). the link-local prefix should have been ignored in + * nd6_ra_input. + */ + + /* 5.5.3 (c). Consistency check on lifetimes: pltime <= vltime. */ + if (new->ndpr_pltime > new->ndpr_vltime) { + error = EINVAL; /* XXX: won't be used */ + goto end; + } + + /* + * 5.5.3 (d). If the prefix advertised is not equal to the prefix of + * an address configured by stateless autoconfiguration already in the + * list of addresses associated with the interface, and the Valid + * Lifetime is not 0, form an address. We first check if we have + * a matching prefix. + * Note: we apply a clarification in rfc2462bis-02 here. We only + * consider autoconfigured addresses while RFC2462 simply said + * "address". + */ + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + struct in6_ifaddr *ifa6; + u_int32_t remaininglifetime; + + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + + ifa6 = (struct in6_ifaddr *)ifa; + + /* + * We only consider autoconfigured addresses as per rfc2462bis. + */ + if (!(ifa6->ia6_flags & IN6_IFF_AUTOCONF)) + continue; + + /* + * Spec is not clear here, but I believe we should concentrate + * on unicast (i.e. not anycast) addresses. + * XXX: other ia6_flags? detached or duplicated? + */ + if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0) + continue; + + /* + * Ignore the address if it is not associated with a prefix + * or is associated with a prefix that is different from this + * one. (pr is never NULL here) + */ + if (ifa6->ia6_ndpr != pr) + continue; + + if (ia6_match == NULL) /* remember the first one */ + ia6_match = ifa6; + + /* + * An already autoconfigured address matched. Now that we + * are sure there is at least one matched address, we can + * proceed to 5.5.3. (e): update the lifetimes according to the + * "two hours" rule and the privacy extension. + * We apply some clarifications in rfc2462bis: + * - use remaininglifetime instead of storedlifetime as a + * variable name + * - remove the dead code in the "two-hour" rule + */ +#define TWOHOUR (120*60) + lt6_tmp = ifa6->ia6_lifetime; + + if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME) + remaininglifetime = ND6_INFINITE_LIFETIME; + else if (time_second - ifa6->ia6_updatetime > + lt6_tmp.ia6t_vltime) { + /* + * The case of "invalid" address. We should usually + * not see this case. + */ + remaininglifetime = 0; + } else + remaininglifetime = lt6_tmp.ia6t_vltime - + (time_second - ifa6->ia6_updatetime); + + /* when not updating, keep the current stored lifetime. */ + lt6_tmp.ia6t_vltime = remaininglifetime; + + if (TWOHOUR < new->ndpr_vltime || + remaininglifetime < new->ndpr_vltime) { + lt6_tmp.ia6t_vltime = new->ndpr_vltime; + } else if (remaininglifetime <= TWOHOUR) { + if (auth) { + lt6_tmp.ia6t_vltime = new->ndpr_vltime; + } + } else { + /* + * new->ndpr_vltime <= TWOHOUR && + * TWOHOUR < remaininglifetime + */ + lt6_tmp.ia6t_vltime = TWOHOUR; + } + + /* The 2 hour rule is not imposed for preferred lifetime. */ + lt6_tmp.ia6t_pltime = new->ndpr_pltime; + + in6_init_address_ltimes(pr, <6_tmp); + + /* + * We need to treat lifetimes for temporary addresses + * differently, according to + * draft-ietf-ipv6-privacy-addrs-v2-01.txt 3.3 (1); + * we only update the lifetimes when they are in the maximum + * intervals. + */ + if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0) { + u_int32_t maxvltime, maxpltime; + + if (V_ip6_temp_valid_lifetime > + (u_int32_t)((time_second - ifa6->ia6_createtime) + + V_ip6_desync_factor)) { + maxvltime = V_ip6_temp_valid_lifetime - + (time_second - ifa6->ia6_createtime) - + V_ip6_desync_factor; + } else + maxvltime = 0; + if (V_ip6_temp_preferred_lifetime > + (u_int32_t)((time_second - ifa6->ia6_createtime) + + V_ip6_desync_factor)) { + maxpltime = V_ip6_temp_preferred_lifetime - + (time_second - ifa6->ia6_createtime) - + V_ip6_desync_factor; + } else + maxpltime = 0; + + if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME || + lt6_tmp.ia6t_vltime > maxvltime) { + lt6_tmp.ia6t_vltime = maxvltime; + } + if (lt6_tmp.ia6t_pltime == ND6_INFINITE_LIFETIME || + lt6_tmp.ia6t_pltime > maxpltime) { + lt6_tmp.ia6t_pltime = maxpltime; + } + } + ifa6->ia6_lifetime = lt6_tmp; + ifa6->ia6_updatetime = time_second; + } + IF_ADDR_UNLOCK(ifp); + if (ia6_match == NULL && new->ndpr_vltime) { + int ifidlen; + + /* + * 5.5.3 (d) (continued) + * No address matched and the valid lifetime is non-zero. + * Create a new address. + */ + + /* + * Prefix Length check: + * If the sum of the prefix length and interface identifier + * length does not equal 128 bits, the Prefix Information + * option MUST be ignored. The length of the interface + * identifier is defined in a separate link-type specific + * document. + */ + ifidlen = in6_if2idlen(ifp); + if (ifidlen < 0) { + /* this should not happen, so we always log it. */ + log(LOG_ERR, "prelist_update: IFID undefined (%s)\n", + if_name(ifp)); + goto end; + } + if (ifidlen + pr->ndpr_plen != 128) { + nd6log((LOG_INFO, + "prelist_update: invalid prefixlen " + "%d for %s, ignored\n", + pr->ndpr_plen, if_name(ifp))); + goto end; + } + + if ((ia6 = in6_ifadd(new, mcast)) != NULL) { + /* + * note that we should use pr (not new) for reference. + */ + pr->ndpr_refcnt++; + ia6->ia6_ndpr = pr; + + /* + * RFC 3041 3.3 (2). + * When a new public address is created as described + * in RFC2462, also create a new temporary address. + * + * RFC 3041 3.5. + * When an interface connects to a new link, a new + * randomized interface identifier should be generated + * immediately together with a new set of temporary + * addresses. Thus, we specifiy 1 as the 2nd arg of + * in6_tmpifadd(). + */ + if (V_ip6_use_tempaddr) { + int e; + if ((e = in6_tmpifadd(ia6, 1, 1)) != 0) { + nd6log((LOG_NOTICE, "prelist_update: " + "failed to create a temporary " + "address, errno=%d\n", + e)); + } + } + ifa_free(&ia6->ia_ifa); + + /* + * A newly added address might affect the status + * of other addresses, so we check and update it. + * XXX: what if address duplication happens? + */ + pfxlist_onlink_check(); + } else { + /* just set an error. do not bark here. */ + error = EADDRNOTAVAIL; /* XXX: might be unused. */ + } + } + + end: + splx(s); + return error; +} + +/* + * A supplement function used in the on-link detection below; + * detect if a given prefix has a (probably) reachable advertising router. + * XXX: lengthy function name... + */ +static struct nd_pfxrouter * +find_pfxlist_reachable_router(struct nd_prefix *pr) +{ + struct nd_pfxrouter *pfxrtr; + struct llentry *ln; + int canreach; + + for (pfxrtr = LIST_FIRST(&pr->ndpr_advrtrs); pfxrtr != NULL; + pfxrtr = LIST_NEXT(pfxrtr, pfr_entry)) { + IF_AFDATA_LOCK(pfxrtr->router->ifp); + ln = nd6_lookup(&pfxrtr->router->rtaddr, 0, pfxrtr->router->ifp); + IF_AFDATA_UNLOCK(pfxrtr->router->ifp); + if (ln == NULL) + continue; + canreach = ND6_IS_LLINFO_PROBREACH(ln); + LLE_RUNLOCK(ln); + if (canreach) + break; + } + return (pfxrtr); +} + +/* + * Check if each prefix in the prefix list has at least one available router + * that advertised the prefix (a router is "available" if its neighbor cache + * entry is reachable or probably reachable). + * If the check fails, the prefix may be off-link, because, for example, + * we have moved from the network but the lifetime of the prefix has not + * expired yet. So we should not use the prefix if there is another prefix + * that has an available router. + * But, if there is no prefix that has an available router, we still regards + * all the prefixes as on-link. This is because we can't tell if all the + * routers are simply dead or if we really moved from the network and there + * is no router around us. + */ +void +pfxlist_onlink_check() +{ + struct nd_prefix *pr; + struct in6_ifaddr *ifa; + struct nd_defrouter *dr; + struct nd_pfxrouter *pfxrtr = NULL; + + /* + * Check if there is a prefix that has a reachable advertising + * router. + */ + for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) { + if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr)) + break; + } + + /* + * If we have no such prefix, check whether we still have a router + * that does not advertise any prefixes. + */ + if (pr == NULL) { + for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; + dr = TAILQ_NEXT(dr, dr_entry)) { + struct nd_prefix *pr0; + + for (pr0 = V_nd_prefix.lh_first; pr0; + pr0 = pr0->ndpr_next) { + if ((pfxrtr = pfxrtr_lookup(pr0, dr)) != NULL) + break; + } + if (pfxrtr != NULL) + break; + } + } + if (pr != NULL || (TAILQ_FIRST(&V_nd_defrouter) && pfxrtr == NULL)) { + /* + * There is at least one prefix that has a reachable router, + * or at least a router which probably does not advertise + * any prefixes. The latter would be the case when we move + * to a new link where we have a router that does not provide + * prefixes and we configure an address by hand. + * Detach prefixes which have no reachable advertising + * router, and attach other prefixes. + */ + for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) { + /* XXX: a link-local prefix should never be detached */ + if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) + continue; + + /* + * we aren't interested in prefixes without the L bit + * set. + */ + if (pr->ndpr_raf_onlink == 0) + continue; + + if (pr->ndpr_raf_auto == 0) + continue; + + if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 && + find_pfxlist_reachable_router(pr) == NULL) + pr->ndpr_stateflags |= NDPRF_DETACHED; + if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 && + find_pfxlist_reachable_router(pr) != 0) + pr->ndpr_stateflags &= ~NDPRF_DETACHED; + } + } else { + /* there is no prefix that has a reachable router */ + for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) { + if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) + continue; + + if (pr->ndpr_raf_onlink == 0) + continue; + + if (pr->ndpr_raf_auto == 0) + continue; + + if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0) + pr->ndpr_stateflags &= ~NDPRF_DETACHED; + } + } + + /* + * Remove each interface route associated with a (just) detached + * prefix, and reinstall the interface route for a (just) attached + * prefix. Note that all attempt of reinstallation does not + * necessarily success, when a same prefix is shared among multiple + * interfaces. Such cases will be handled in nd6_prefix_onlink, + * so we don't have to care about them. + */ + for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) { + int e; + char ip6buf[INET6_ADDRSTRLEN]; + + if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) + continue; + + if (pr->ndpr_raf_onlink == 0) + continue; + + if (pr->ndpr_raf_auto == 0) + continue; + + if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 && + (pr->ndpr_stateflags & NDPRF_ONLINK) != 0) { + if ((e = nd6_prefix_offlink(pr)) != 0) { + nd6log((LOG_ERR, + "pfxlist_onlink_check: failed to " + "make %s/%d offlink, errno=%d\n", + ip6_sprintf(ip6buf, + &pr->ndpr_prefix.sin6_addr), + pr->ndpr_plen, e)); + } + } + if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 && + (pr->ndpr_stateflags & NDPRF_ONLINK) == 0 && + pr->ndpr_raf_onlink) { + if ((e = nd6_prefix_onlink(pr)) != 0) { + nd6log((LOG_ERR, + "pfxlist_onlink_check: failed to " + "make %s/%d onlink, errno=%d\n", + ip6_sprintf(ip6buf, + &pr->ndpr_prefix.sin6_addr), + pr->ndpr_plen, e)); + } + } + } + + /* + * Changes on the prefix status might affect address status as well. + * Make sure that all addresses derived from an attached prefix are + * attached, and that all addresses derived from a detached prefix are + * detached. Note, however, that a manually configured address should + * always be attached. + * The precise detection logic is same as the one for prefixes. + * + * XXXRW: in6_ifaddrhead locking. + */ + TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) { + if (!(ifa->ia6_flags & IN6_IFF_AUTOCONF)) + continue; + + if (ifa->ia6_ndpr == NULL) { + /* + * This can happen when we first configure the address + * (i.e. the address exists, but the prefix does not). + * XXX: complicated relationships... + */ + continue; + } + + if (find_pfxlist_reachable_router(ifa->ia6_ndpr)) + break; + } + if (ifa) { + TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) { + if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0) + continue; + + if (ifa->ia6_ndpr == NULL) /* XXX: see above. */ + continue; + + if (find_pfxlist_reachable_router(ifa->ia6_ndpr)) { + if (ifa->ia6_flags & IN6_IFF_DETACHED) { + ifa->ia6_flags &= ~IN6_IFF_DETACHED; + ifa->ia6_flags |= IN6_IFF_TENTATIVE; + nd6_dad_start((struct ifaddr *)ifa, 0); + } + } else { + ifa->ia6_flags |= IN6_IFF_DETACHED; + } + } + } + else { + TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) { + if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0) + continue; + + if (ifa->ia6_flags & IN6_IFF_DETACHED) { + ifa->ia6_flags &= ~IN6_IFF_DETACHED; + ifa->ia6_flags |= IN6_IFF_TENTATIVE; + /* Do we need a delay in this case? */ + nd6_dad_start((struct ifaddr *)ifa, 0); + } + } + } +} + +int +nd6_prefix_onlink(struct nd_prefix *pr) +{ + struct ifaddr *ifa; + struct ifnet *ifp = pr->ndpr_ifp; + struct sockaddr_in6 mask6; + struct nd_prefix *opr; + u_long rtflags; + int error = 0; + struct radix_node_head *rnh; + struct rtentry *rt = NULL; + char ip6buf[INET6_ADDRSTRLEN]; + struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; + + /* sanity check */ + if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) { + nd6log((LOG_ERR, + "nd6_prefix_onlink: %s/%d is already on-link\n", + ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), + pr->ndpr_plen)); + return (EEXIST); + } + + /* + * Add the interface route associated with the prefix. Before + * installing the route, check if there's the same prefix on another + * interface, and the prefix has already installed the interface route. + * Although such a configuration is expected to be rare, we explicitly + * allow it. + */ + for (opr = V_nd_prefix.lh_first; opr; opr = opr->ndpr_next) { + if (opr == pr) + continue; + + if ((opr->ndpr_stateflags & NDPRF_ONLINK) == 0) + continue; + + if (opr->ndpr_plen == pr->ndpr_plen && + in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr, + &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) + return (0); + } + + /* + * We prefer link-local addresses as the associated interface address. + */ + /* search for a link-local addr */ + ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, + IN6_IFF_NOTREADY | IN6_IFF_ANYCAST); + if (ifa == NULL) { + /* XXX: freebsd does not have ifa_ifwithaf */ + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family == AF_INET6) + break; + } + if (ifa != NULL) + ifa_ref(ifa); + IF_ADDR_UNLOCK(ifp); + /* should we care about ia6_flags? */ + } + if (ifa == NULL) { + /* + * This can still happen, when, for example, we receive an RA + * containing a prefix with the L bit set and the A bit clear, + * after removing all IPv6 addresses on the receiving + * interface. This should, of course, be rare though. + */ + nd6log((LOG_NOTICE, + "nd6_prefix_onlink: failed to find any ifaddr" + " to add route for a prefix(%s/%d) on %s\n", + ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), + pr->ndpr_plen, if_name(ifp))); + return (0); + } + + /* + * in6_ifinit() sets nd6_rtrequest to ifa_rtrequest for all ifaddrs. + * ifa->ifa_rtrequest = nd6_rtrequest; + */ + bzero(&mask6, sizeof(mask6)); + mask6.sin6_len = sizeof(mask6); + mask6.sin6_addr = pr->ndpr_mask; + rtflags = (ifa->ifa_flags & ~IFA_RTSELF) | RTF_UP; + error = rtrequest(RTM_ADD, (struct sockaddr *)&pr->ndpr_prefix, + ifa->ifa_addr, (struct sockaddr *)&mask6, rtflags, &rt); + if (error == 0) { + if (rt != NULL) /* this should be non NULL, though */ { + rnh = rt_tables_get_rnh(rt->rt_fibnum, AF_INET6); + /* XXX what if rhn == NULL? */ + RADIX_NODE_HEAD_LOCK(rnh); + RT_LOCK(rt); + if (!rt_setgate(rt, rt_key(rt), (struct sockaddr *)&null_sdl)) { + ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type = + rt->rt_ifp->if_type; + ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index = + rt->rt_ifp->if_index; + } + RADIX_NODE_HEAD_UNLOCK(rnh); + nd6_rtmsg(RTM_ADD, rt); + RT_UNLOCK(rt); + } + pr->ndpr_stateflags |= NDPRF_ONLINK; + } else { + char ip6bufg[INET6_ADDRSTRLEN], ip6bufm[INET6_ADDRSTRLEN]; + nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add route for a" + " prefix (%s/%d) on %s, gw=%s, mask=%s, flags=%lx " + "errno = %d\n", + ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), + pr->ndpr_plen, if_name(ifp), + ip6_sprintf(ip6bufg, &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr), + ip6_sprintf(ip6bufm, &mask6.sin6_addr), rtflags, error)); + } + + if (rt != NULL) { + RT_LOCK(rt); + RT_REMREF(rt); + RT_UNLOCK(rt); + } + if (ifa != NULL) + ifa_free(ifa); + + return (error); +} + +int +nd6_prefix_offlink(struct nd_prefix *pr) +{ + int error = 0; + struct ifnet *ifp = pr->ndpr_ifp; + struct nd_prefix *opr; + struct sockaddr_in6 sa6, mask6; + struct rtentry *rt = NULL; + char ip6buf[INET6_ADDRSTRLEN]; + + /* sanity check */ + if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) { + nd6log((LOG_ERR, + "nd6_prefix_offlink: %s/%d is already off-link\n", + ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), + pr->ndpr_plen)); + return (EEXIST); + } + + bzero(&sa6, sizeof(sa6)); + sa6.sin6_family = AF_INET6; + sa6.sin6_len = sizeof(sa6); + bcopy(&pr->ndpr_prefix.sin6_addr, &sa6.sin6_addr, + sizeof(struct in6_addr)); + bzero(&mask6, sizeof(mask6)); + mask6.sin6_family = AF_INET6; + mask6.sin6_len = sizeof(sa6); + bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr)); + error = rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL, + (struct sockaddr *)&mask6, 0, &rt); + if (error == 0) { + pr->ndpr_stateflags &= ~NDPRF_ONLINK; + + /* report the route deletion to the routing socket. */ + if (rt != NULL) + nd6_rtmsg(RTM_DELETE, rt); + + /* + * There might be the same prefix on another interface, + * the prefix which could not be on-link just because we have + * the interface route (see comments in nd6_prefix_onlink). + * If there's one, try to make the prefix on-link on the + * interface. + */ + for (opr = V_nd_prefix.lh_first; opr; opr = opr->ndpr_next) { + if (opr == pr) + continue; + + if ((opr->ndpr_stateflags & NDPRF_ONLINK) != 0) + continue; + + /* + * KAME specific: detached prefixes should not be + * on-link. + */ + if ((opr->ndpr_stateflags & NDPRF_DETACHED) != 0) + continue; + + if (opr->ndpr_plen == pr->ndpr_plen && + in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr, + &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) { + int e; + + if ((e = nd6_prefix_onlink(opr)) != 0) { + nd6log((LOG_ERR, + "nd6_prefix_offlink: failed to " + "recover a prefix %s/%d from %s " + "to %s (errno = %d)\n", + ip6_sprintf(ip6buf, + &opr->ndpr_prefix.sin6_addr), + opr->ndpr_plen, if_name(ifp), + if_name(opr->ndpr_ifp), e)); + } + } + } + } else { + /* XXX: can we still set the NDPRF_ONLINK flag? */ + nd6log((LOG_ERR, + "nd6_prefix_offlink: failed to delete route: " + "%s/%d on %s (errno = %d)\n", + ip6_sprintf(ip6buf, &sa6.sin6_addr), pr->ndpr_plen, + if_name(ifp), error)); + } + + if (rt != NULL) { + RTFREE(rt); + } + + return (error); +} + +static struct in6_ifaddr * +in6_ifadd(struct nd_prefixctl *pr, int mcast) +{ + struct ifnet *ifp = pr->ndpr_ifp; + struct ifaddr *ifa; + struct in6_aliasreq ifra; + struct in6_ifaddr *ia, *ib; + int error, plen0; + struct in6_addr mask; + int prefixlen = pr->ndpr_plen; + int updateflags; + char ip6buf[INET6_ADDRSTRLEN]; + + in6_prefixlen2mask(&mask, prefixlen); + + /* + * find a link-local address (will be interface ID). + * Is it really mandatory? Theoretically, a global or a site-local + * address can be configured without a link-local address, if we + * have a unique interface identifier... + * + * it is not mandatory to have a link-local address, we can generate + * interface identifier on the fly. we do this because: + * (1) it should be the easiest way to find interface identifier. + * (2) RFC2462 5.4 suggesting the use of the same interface identifier + * for multiple addresses on a single interface, and possible shortcut + * of DAD. we omitted DAD for this reason in the past. + * (3) a user can prevent autoconfiguration of global address + * by removing link-local address by hand (this is partly because we + * don't have other way to control the use of IPv6 on an interface. + * this has been our design choice - cf. NRL's "ifconfig auto"). + * (4) it is easier to manage when an interface has addresses + * with the same interface identifier, than to have multiple addresses + * with different interface identifiers. + */ + ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); /* 0 is OK? */ + if (ifa) + ib = (struct in6_ifaddr *)ifa; + else + return NULL; + + /* prefixlen + ifidlen must be equal to 128 */ + plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL); + if (prefixlen != plen0) { + ifa_free(ifa); + nd6log((LOG_INFO, "in6_ifadd: wrong prefixlen for %s " + "(prefix=%d ifid=%d)\n", + if_name(ifp), prefixlen, 128 - plen0)); + return NULL; + } + + /* make ifaddr */ + + bzero(&ifra, sizeof(ifra)); + /* + * in6_update_ifa() does not use ifra_name, but we accurately set it + * for safety. + */ + strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); + ifra.ifra_addr.sin6_family = AF_INET6; + ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6); + /* prefix */ + ifra.ifra_addr.sin6_addr = pr->ndpr_prefix.sin6_addr; + ifra.ifra_addr.sin6_addr.s6_addr32[0] &= mask.s6_addr32[0]; + ifra.ifra_addr.sin6_addr.s6_addr32[1] &= mask.s6_addr32[1]; + ifra.ifra_addr.sin6_addr.s6_addr32[2] &= mask.s6_addr32[2]; + ifra.ifra_addr.sin6_addr.s6_addr32[3] &= mask.s6_addr32[3]; + + /* interface ID */ + ifra.ifra_addr.sin6_addr.s6_addr32[0] |= + (ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]); + ifra.ifra_addr.sin6_addr.s6_addr32[1] |= + (ib->ia_addr.sin6_addr.s6_addr32[1] & ~mask.s6_addr32[1]); + ifra.ifra_addr.sin6_addr.s6_addr32[2] |= + (ib->ia_addr.sin6_addr.s6_addr32[2] & ~mask.s6_addr32[2]); + ifra.ifra_addr.sin6_addr.s6_addr32[3] |= + (ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]); + ifa_free(ifa); + + /* new prefix mask. */ + ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); + ifra.ifra_prefixmask.sin6_family = AF_INET6; + bcopy(&mask, &ifra.ifra_prefixmask.sin6_addr, + sizeof(ifra.ifra_prefixmask.sin6_addr)); + + /* lifetimes. */ + ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime; + ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime; + + /* XXX: scope zone ID? */ + + ifra.ifra_flags |= IN6_IFF_AUTOCONF; /* obey autoconf */ + + /* + * Make sure that we do not have this address already. This should + * usually not happen, but we can still see this case, e.g., if we + * have manually configured the exact address to be configured. + */ + ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, + &ifra.ifra_addr.sin6_addr); + if (ifa != NULL) { + ifa_free(ifa); + /* this should be rare enough to make an explicit log */ + log(LOG_INFO, "in6_ifadd: %s is already configured\n", + ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr)); + return (NULL); + } + + /* + * Allocate ifaddr structure, link into chain, etc. + * If we are going to create a new address upon receiving a multicasted + * RA, we need to impose a random delay before starting DAD. + * [draft-ietf-ipv6-rfc2462bis-02.txt, Section 5.4.2] + */ + updateflags = 0; + if (mcast) + updateflags |= IN6_IFAUPDATE_DADDELAY; + if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0) { + nd6log((LOG_ERR, + "in6_ifadd: failed to make ifaddr %s on %s (errno=%d)\n", + ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr), + if_name(ifp), error)); + return (NULL); /* ifaddr must not have been allocated. */ + } + + ia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr); + /* + * XXXRW: Assumption of non-NULLness here might not be true with + * fine-grained locking -- should we validate it? Or just return + * earlier ifa rather than looking it up again? + */ + return (ia); /* this is always non-NULL and referenced. */ +} + +/* + * ia0 - corresponding public address + */ +int +in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay) +{ + struct ifnet *ifp = ia0->ia_ifa.ifa_ifp; + struct in6_ifaddr *newia, *ia; + struct in6_aliasreq ifra; + int i, error; + int trylimit = 3; /* XXX: adhoc value */ + int updateflags; + u_int32_t randid[2]; + time_t vltime0, pltime0; + + bzero(&ifra, sizeof(ifra)); + strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); + ifra.ifra_addr = ia0->ia_addr; + /* copy prefix mask */ + ifra.ifra_prefixmask = ia0->ia_prefixmask; + /* clear the old IFID */ + for (i = 0; i < 4; i++) { + ifra.ifra_addr.sin6_addr.s6_addr32[i] &= + ifra.ifra_prefixmask.sin6_addr.s6_addr32[i]; + } + + again: + if (in6_get_tmpifid(ifp, (u_int8_t *)randid, + (const u_int8_t *)&ia0->ia_addr.sin6_addr.s6_addr[8], forcegen)) { + nd6log((LOG_NOTICE, "in6_tmpifadd: failed to find a good " + "random IFID\n")); + return (EINVAL); + } + ifra.ifra_addr.sin6_addr.s6_addr32[2] |= + (randid[0] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[2])); + ifra.ifra_addr.sin6_addr.s6_addr32[3] |= + (randid[1] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[3])); + + /* + * in6_get_tmpifid() quite likely provided a unique interface ID. + * However, we may still have a chance to see collision, because + * there may be a time lag between generation of the ID and generation + * of the address. So, we'll do one more sanity check. + */ + IN6_IFADDR_RLOCK(); + TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { + if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, + &ifra.ifra_addr.sin6_addr)) { + if (trylimit-- == 0) { + IN6_IFADDR_RUNLOCK(); + /* + * Give up. Something strange should have + * happened. + */ + nd6log((LOG_NOTICE, "in6_tmpifadd: failed to " + "find a unique random IFID\n")); + return (EEXIST); + } + IN6_IFADDR_RUNLOCK(); + forcegen = 1; + goto again; + } + } + IN6_IFADDR_RUNLOCK(); + + /* + * The Valid Lifetime is the lower of the Valid Lifetime of the + * public address or TEMP_VALID_LIFETIME. + * The Preferred Lifetime is the lower of the Preferred Lifetime + * of the public address or TEMP_PREFERRED_LIFETIME - + * DESYNC_FACTOR. + */ + if (ia0->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { + vltime0 = IFA6_IS_INVALID(ia0) ? 0 : + (ia0->ia6_lifetime.ia6t_vltime - + (time_second - ia0->ia6_updatetime)); + if (vltime0 > V_ip6_temp_valid_lifetime) + vltime0 = V_ip6_temp_valid_lifetime; + } else + vltime0 = V_ip6_temp_valid_lifetime; + if (ia0->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { + pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 : + (ia0->ia6_lifetime.ia6t_pltime - + (time_second - ia0->ia6_updatetime)); + if (pltime0 > V_ip6_temp_preferred_lifetime - V_ip6_desync_factor){ + pltime0 = V_ip6_temp_preferred_lifetime - + V_ip6_desync_factor; + } + } else + pltime0 = V_ip6_temp_preferred_lifetime - V_ip6_desync_factor; + ifra.ifra_lifetime.ia6t_vltime = vltime0; + ifra.ifra_lifetime.ia6t_pltime = pltime0; + + /* + * A temporary address is created only if this calculated Preferred + * Lifetime is greater than REGEN_ADVANCE time units. + */ + if (ifra.ifra_lifetime.ia6t_pltime <= V_ip6_temp_regen_advance) + return (0); + + /* XXX: scope zone ID? */ + + ifra.ifra_flags |= (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY); + + /* allocate ifaddr structure, link into chain, etc. */ + updateflags = 0; + if (delay) + updateflags |= IN6_IFAUPDATE_DADDELAY; + if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0) + return (error); + + newia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr); + if (newia == NULL) { /* XXX: can it happen? */ + nd6log((LOG_ERR, + "in6_tmpifadd: ifa update succeeded, but we got " + "no ifaddr\n")); + return (EINVAL); /* XXX */ + } + newia->ia6_ndpr = ia0->ia6_ndpr; + newia->ia6_ndpr->ndpr_refcnt++; + ifa_free(&newia->ia_ifa); + + /* + * A newly added address might affect the status of other addresses. + * XXX: when the temporary address is generated with a new public + * address, the onlink check is redundant. However, it would be safe + * to do the check explicitly everywhere a new address is generated, + * and, in fact, we surely need the check when we create a new + * temporary address due to deprecation of an old temporary address. + */ + pfxlist_onlink_check(); + + return (0); +} + +static int +in6_init_prefix_ltimes(struct nd_prefix *ndpr) +{ + if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME) + ndpr->ndpr_preferred = 0; + else + ndpr->ndpr_preferred = time_second + ndpr->ndpr_pltime; + if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME) + ndpr->ndpr_expire = 0; + else + ndpr->ndpr_expire = time_second + ndpr->ndpr_vltime; + + return 0; +} + +static void +in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6) +{ + /* init ia6t_expire */ + if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME) + lt6->ia6t_expire = 0; + else { + lt6->ia6t_expire = time_second; + lt6->ia6t_expire += lt6->ia6t_vltime; + } + + /* init ia6t_preferred */ + if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME) + lt6->ia6t_preferred = 0; + else { + lt6->ia6t_preferred = time_second; + lt6->ia6t_preferred += lt6->ia6t_pltime; + } +} + +/* + * Delete all the routing table entries that use the specified gateway. + * XXX: this function causes search through all entries of routing table, so + * it shouldn't be called when acting as a router. + */ +void +rt6_flush(struct in6_addr *gateway, struct ifnet *ifp) +{ + struct radix_node_head *rnh; + int s = splnet(); + + /* We'll care only link-local addresses */ + if (!IN6_IS_ADDR_LINKLOCAL(gateway)) { + splx(s); + return; + } + + rnh = rt_tables_get_rnh(0, AF_INET6); + if (rnh == NULL) + return; + + RADIX_NODE_HEAD_LOCK(rnh); + rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway); + RADIX_NODE_HEAD_UNLOCK(rnh); + splx(s); +} + +static int +rt6_deleteroute(struct radix_node *rn, void *arg) +{ +#define SIN6(s) ((struct sockaddr_in6 *)s) + struct rtentry *rt = (struct rtentry *)rn; + struct in6_addr *gate = (struct in6_addr *)arg; + + if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6) + return (0); + + if (!IN6_ARE_ADDR_EQUAL(gate, &SIN6(rt->rt_gateway)->sin6_addr)) { + return (0); + } + + /* + * Do not delete a static route. + * XXX: this seems to be a bit ad-hoc. Should we consider the + * 'cloned' bit instead? + */ + if ((rt->rt_flags & RTF_STATIC) != 0) + return (0); + + /* + * We delete only host route. This means, in particular, we don't + * delete default route. + */ + if ((rt->rt_flags & RTF_HOST) == 0) + return (0); + + return (rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, + rt_mask(rt), rt->rt_flags, 0)); +#undef SIN6 +} + +int +nd6_setdefaultiface(int ifindex) +{ + int error = 0; + + if (ifindex < 0 || V_if_index < ifindex) + return (EINVAL); + if (ifindex != 0 && !ifnet_byindex(ifindex)) + return (EINVAL); + + if (V_nd6_defifindex != ifindex) { + V_nd6_defifindex = ifindex; + if (V_nd6_defifindex > 0) + V_nd6_defifp = ifnet_byindex(V_nd6_defifindex); + else + V_nd6_defifp = NULL; + + /* + * Our current implementation assumes one-to-one maping between + * interfaces and links, so it would be natural to use the + * default interface as the default link. + */ + scope6_setdefault(V_nd6_defifp); + } + + return (error); +} diff --git a/freebsd/sys/netinet6/pim6.h b/freebsd/sys/netinet6/pim6.h new file mode 100644 index 00000000..dec84cf2 --- /dev/null +++ b/freebsd/sys/netinet6/pim6.h @@ -0,0 +1,69 @@ +/*- + * Copyright (C) 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: pim6.h,v 1.3 2000/03/25 07:23:58 sumikawa Exp $ + * $FreeBSD$ + */ +/* + * Protocol Independent Multicast (PIM) definitions + * + * Written by Ahmed Helmy, SGI, July 1996 + * + * MULTICAST + */ + +/* + * PIM packet header + */ +#define PIM_VERSION 2 +struct pim { +#if defined(BYTE_ORDER) && (BYTE_ORDER == LITTLE_ENDIAN) + u_char pim_type:4, /* the PIM message type, currently they are: + * Hello, Register, Register-Stop, Join/Prune, + * Bootstrap, Assert, Graft (PIM-DM only), + * Graft-Ack (PIM-DM only), C-RP-Adv + */ + pim_ver:4; /* PIM version number; 2 for PIMv2 */ +#else + u_char pim_ver:4, /* PIM version */ + pim_type:4; /* PIM type */ +#endif + u_char pim_rsv; /* Reserved */ + u_short pim_cksum; /* IP style check sum */ +}; + +#define PIM_MINLEN 8 /* The header min. length is 8 */ +#define PIM6_REG_MINLEN (PIM_MINLEN+40) /* Register message + inner IP6 header */ + +/* + * Message types + */ +#define PIM_REGISTER 1 /* PIM Register type is 1 */ + +/* second bit in reg_head is the null bit */ +#define PIM_NULL_REGISTER 0x40000000 diff --git a/freebsd/sys/netinet6/pim6_var.h b/freebsd/sys/netinet6/pim6_var.h new file mode 100644 index 00000000..8c63b5cc --- /dev/null +++ b/freebsd/sys/netinet6/pim6_var.h @@ -0,0 +1,68 @@ +/*- + * Copyright (C) 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: pim6_var.h,v 1.8 2000/06/06 08:07:43 jinmei Exp $ + * $FreeBSD$ + */ + +/* + * Protocol Independent Multicast (PIM), + * implementation-specific definitions. + * + * Written by George Edmond Eddy (Rusty), ISI, February 1998 + * Modified by Pavlin Ivanov Radoslavov, USC/ISI, May 1998 + */ + +#ifndef _NETINET6_PIM6_VAR_HH_ +#define _NETINET6_PIM6_VAR_HH_ + +struct pim6stat { + u_quad_t pim6s_rcv_total; /* total PIM messages received */ + u_quad_t pim6s_rcv_tooshort; /* received with too few bytes */ + u_quad_t pim6s_rcv_badsum; /* received with bad checksum */ + u_quad_t pim6s_rcv_badversion; /* received bad PIM version */ + u_quad_t pim6s_rcv_registers; /* received registers */ + u_quad_t pim6s_rcv_badregisters; /* received invalid registers */ + u_quad_t pim6s_snd_registers; /* sent registers */ +}; + +#if (defined(KERNEL)) || (defined(_KERNEL)) +int pim6_input __P((struct mbuf **, int*, int)); +#endif /* KERNEL */ + +/* + * Names for PIM sysctl objects + */ +#define PIM6CTL_STATS 1 /* statistics (read-only) */ +#define PIM6CTL_MAXID 2 + +#define PIM6CTL_NAMES { \ + { 0, 0 }, \ + { 0, 0 }, \ +} +#endif /* _NETINET6_PIM6_VAR_HH_ */ diff --git a/freebsd/sys/netinet6/raw_ip6.c b/freebsd/sys/netinet6/raw_ip6.c new file mode 100644 index 00000000..71a8add8 --- /dev/null +++ b/freebsd/sys/netinet6/raw_ip6.c @@ -0,0 +1,905 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)raw_ip.c 8.2 (Berkeley) 1/4/94 + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/local/opt_ipsec.h> +#include <freebsd/local/opt_inet6.h> + +#include <freebsd/sys/param.h> +#include <freebsd/sys/errno.h> +#include <freebsd/sys/jail.h> +#include <freebsd/sys/lock.h> +#include <freebsd/sys/malloc.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/priv.h> +#include <freebsd/sys/proc.h> +#include <freebsd/sys/protosw.h> +#include <freebsd/sys/signalvar.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/socketvar.h> +#include <freebsd/sys/sx.h> +#include <freebsd/sys/syslog.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/if_types.h> +#include <freebsd/net/route.h> +#include <freebsd/net/vnet.h> + +#include <freebsd/netinet/in.h> +#include <freebsd/netinet/in_var.h> +#include <freebsd/netinet/in_systm.h> +#include <freebsd/netinet/in_pcb.h> + +#include <freebsd/netinet/icmp6.h> +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet6/ip6protosw.h> +#include <freebsd/netinet6/ip6_mroute.h> +#include <freebsd/netinet6/in6_pcb.h> +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet6/nd6.h> +#include <freebsd/netinet6/raw_ip6.h> +#include <freebsd/netinet6/scope6_var.h> + +#ifdef IPSEC +#include <freebsd/netipsec/ipsec.h> +#include <freebsd/netipsec/ipsec6.h> +#endif /* IPSEC */ + +#include <freebsd/machine/stdarg.h> + +#define satosin6(sa) ((struct sockaddr_in6 *)(sa)) +#define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) + +/* + * Raw interface to IP6 protocol. + */ + +VNET_DECLARE(struct inpcbhead, ripcb); +VNET_DECLARE(struct inpcbinfo, ripcbinfo); +#define V_ripcb VNET(ripcb) +#define V_ripcbinfo VNET(ripcbinfo) + +extern u_long rip_sendspace; +extern u_long rip_recvspace; + +VNET_DEFINE(struct rip6stat, rip6stat); + +/* + * Hooks for multicast routing. They all default to NULL, so leave them not + * initialized and rely on BSS being set to 0. + */ + +/* + * The socket used to communicate with the multicast routing daemon. + */ +VNET_DEFINE(struct socket *, ip6_mrouter); + +/* + * The various mrouter functions. + */ +int (*ip6_mrouter_set)(struct socket *, struct sockopt *); +int (*ip6_mrouter_get)(struct socket *, struct sockopt *); +int (*ip6_mrouter_done)(void); +int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *, struct mbuf *); +int (*mrt6_ioctl)(u_long, caddr_t); + +/* + * Setup generic address and protocol structures for raw_input routine, then + * pass them along with mbuf chain. + */ +int +rip6_input(struct mbuf **mp, int *offp, int proto) +{ + struct ifnet *ifp; + struct mbuf *m = *mp; + register struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + register struct inpcb *in6p; + struct inpcb *last = 0; + struct mbuf *opts = NULL; + struct sockaddr_in6 fromsa; + + V_rip6stat.rip6s_ipackets++; + + if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) { + /* XXX Send icmp6 host/port unreach? */ + m_freem(m); + return (IPPROTO_DONE); + } + + init_sin6(&fromsa, m); /* general init */ + + ifp = m->m_pkthdr.rcvif; + + INP_INFO_RLOCK(&V_ripcbinfo); + LIST_FOREACH(in6p, &V_ripcb, inp_list) { + /* XXX inp locking */ + if ((in6p->inp_vflag & INP_IPV6) == 0) + continue; + if (in6p->inp_ip_p && + in6p->inp_ip_p != proto) + continue; + if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) && + !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst)) + continue; + if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) && + !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src)) + continue; + if (jailed_without_vnet(in6p->inp_cred)) { + /* + * Allow raw socket in jail to receive multicast; + * assume process had PRIV_NETINET_RAW at attach, + * and fall through into normal filter path if so. + */ + if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && + prison_check_ip6(in6p->inp_cred, + &ip6->ip6_dst) != 0) + continue; + } + if (in6p->in6p_cksum != -1) { + V_rip6stat.rip6s_isum++; + if (in6_cksum(m, proto, *offp, + m->m_pkthdr.len - *offp)) { + INP_RUNLOCK(in6p); + V_rip6stat.rip6s_badsum++; + continue; + } + } + INP_RLOCK(in6p); + /* + * If this raw socket has multicast state, and we + * have received a multicast, check if this socket + * should receive it, as multicast filtering is now + * the responsibility of the transport layer. + */ + if (in6p->in6p_moptions && + IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + /* + * If the incoming datagram is for MLD, allow it + * through unconditionally to the raw socket. + * + * Use the M_RTALERT_MLD flag to check for MLD + * traffic without having to inspect the mbuf chain + * more deeply, as all MLDv1/v2 host messages MUST + * contain the Router Alert option. + * + * In the case of MLDv1, we may not have explicitly + * joined the group, and may have set IFF_ALLMULTI + * on the interface. im6o_mc_filter() may discard + * control traffic we actually need to see. + * + * Userland multicast routing daemons should continue + * filter the control traffic appropriately. + */ + int blocked; + + blocked = MCAST_PASS; + if ((m->m_flags & M_RTALERT_MLD) == 0) { + struct sockaddr_in6 mcaddr; + + bzero(&mcaddr, sizeof(struct sockaddr_in6)); + mcaddr.sin6_len = sizeof(struct sockaddr_in6); + mcaddr.sin6_family = AF_INET6; + mcaddr.sin6_addr = ip6->ip6_dst; + + blocked = im6o_mc_filter(in6p->in6p_moptions, + ifp, + (struct sockaddr *)&mcaddr, + (struct sockaddr *)&fromsa); + } + if (blocked != MCAST_PASS) { + IP6STAT_INC(ip6s_notmember); + INP_RUNLOCK(in6p); + continue; + } + } + if (last != NULL) { + struct mbuf *n = m_copy(m, 0, (int)M_COPYALL); + +#ifdef IPSEC + /* + * Check AH/ESP integrity. + */ + if (n && ipsec6_in_reject(n, last)) { + m_freem(n); + V_ipsec6stat.in_polvio++; + /* Do not inject data into pcb. */ + } else +#endif /* IPSEC */ + if (n) { + if (last->inp_flags & INP_CONTROLOPTS || + last->inp_socket->so_options & SO_TIMESTAMP) + ip6_savecontrol(last, n, &opts); + /* strip intermediate headers */ + m_adj(n, *offp); + if (sbappendaddr(&last->inp_socket->so_rcv, + (struct sockaddr *)&fromsa, + n, opts) == 0) { + m_freem(n); + if (opts) + m_freem(opts); + V_rip6stat.rip6s_fullsock++; + } else + sorwakeup(last->inp_socket); + opts = NULL; + } + INP_RUNLOCK(last); + } + last = in6p; + } + INP_INFO_RUNLOCK(&V_ripcbinfo); +#ifdef IPSEC + /* + * Check AH/ESP integrity. + */ + if ((last != NULL) && ipsec6_in_reject(m, last)) { + m_freem(m); + V_ipsec6stat.in_polvio++; + V_ip6stat.ip6s_delivered--; + /* Do not inject data into pcb. */ + INP_RUNLOCK(last); + } else +#endif /* IPSEC */ + if (last != NULL) { + if (last->inp_flags & INP_CONTROLOPTS || + last->inp_socket->so_options & SO_TIMESTAMP) + ip6_savecontrol(last, m, &opts); + /* Strip intermediate headers. */ + m_adj(m, *offp); + if (sbappendaddr(&last->inp_socket->so_rcv, + (struct sockaddr *)&fromsa, m, opts) == 0) { + m_freem(m); + if (opts) + m_freem(opts); + V_rip6stat.rip6s_fullsock++; + } else + sorwakeup(last->inp_socket); + INP_RUNLOCK(last); + } else { + V_rip6stat.rip6s_nosock++; + if (m->m_flags & M_MCAST) + V_rip6stat.rip6s_nosockmcast++; + if (proto == IPPROTO_NONE) + m_freem(m); + else { + char *prvnxtp = ip6_get_prevhdr(m, *offp); /* XXX */ + icmp6_error(m, ICMP6_PARAM_PROB, + ICMP6_PARAMPROB_NEXTHEADER, + prvnxtp - mtod(m, char *)); + } + V_ip6stat.ip6s_delivered--; + } + return (IPPROTO_DONE); +} + +void +rip6_ctlinput(int cmd, struct sockaddr *sa, void *d) +{ + struct ip6_hdr *ip6; + struct mbuf *m; + int off = 0; + struct ip6ctlparam *ip6cp = NULL; + const struct sockaddr_in6 *sa6_src = NULL; + void *cmdarg; + struct inpcb *(*notify)(struct inpcb *, int) = in6_rtchange; + + if (sa->sa_family != AF_INET6 || + sa->sa_len != sizeof(struct sockaddr_in6)) + return; + + if ((unsigned)cmd >= PRC_NCMDS) + return; + if (PRC_IS_REDIRECT(cmd)) + notify = in6_rtchange, d = NULL; + else if (cmd == PRC_HOSTDEAD) + d = NULL; + else if (inet6ctlerrmap[cmd] == 0) + return; + + /* + * If the parameter is from icmp6, decode it. + */ + if (d != NULL) { + ip6cp = (struct ip6ctlparam *)d; + m = ip6cp->ip6c_m; + ip6 = ip6cp->ip6c_ip6; + off = ip6cp->ip6c_off; + cmdarg = ip6cp->ip6c_cmdarg; + sa6_src = ip6cp->ip6c_src; + } else { + m = NULL; + ip6 = NULL; + cmdarg = NULL; + sa6_src = &sa6_any; + } + + (void) in6_pcbnotify(&V_ripcbinfo, sa, 0, + (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify); +} + +/* + * Generate IPv6 header and pass packet to ip6_output. Tack on options user + * may have setup with control call. + */ +int +#if __STDC__ +rip6_output(struct mbuf *m, ...) +#else +rip6_output(m, va_alist) + struct mbuf *m; + va_dcl +#endif +{ + struct mbuf *control; + struct socket *so; + struct sockaddr_in6 *dstsock; + struct in6_addr *dst; + struct ip6_hdr *ip6; + struct inpcb *in6p; + u_int plen = m->m_pkthdr.len; + int error = 0; + struct ip6_pktopts opt, *optp; + struct ifnet *oifp = NULL; + int type = 0, code = 0; /* for ICMPv6 output statistics only */ + int scope_ambiguous = 0; + int use_defzone = 0; + struct in6_addr in6a; + va_list ap; + + va_start(ap, m); + so = va_arg(ap, struct socket *); + dstsock = va_arg(ap, struct sockaddr_in6 *); + control = va_arg(ap, struct mbuf *); + va_end(ap); + + in6p = sotoinpcb(so); + INP_WLOCK(in6p); + + dst = &dstsock->sin6_addr; + if (control != NULL) { + if ((error = ip6_setpktopts(control, &opt, + in6p->in6p_outputopts, so->so_cred, + so->so_proto->pr_protocol)) != 0) { + goto bad; + } + optp = &opt; + } else + optp = in6p->in6p_outputopts; + + /* + * Check and convert scope zone ID into internal form. + * + * XXX: we may still need to determine the zone later. + */ + if (!(so->so_state & SS_ISCONNECTED)) { + if (!optp || !optp->ip6po_pktinfo || + !optp->ip6po_pktinfo->ipi6_ifindex) + use_defzone = V_ip6_use_defzone; + if (dstsock->sin6_scope_id == 0 && !use_defzone) + scope_ambiguous = 1; + if ((error = sa6_embedscope(dstsock, use_defzone)) != 0) + goto bad; + } + + /* + * For an ICMPv6 packet, we should know its type and code to update + * statistics. + */ + if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { + struct icmp6_hdr *icmp6; + if (m->m_len < sizeof(struct icmp6_hdr) && + (m = m_pullup(m, sizeof(struct icmp6_hdr))) == NULL) { + error = ENOBUFS; + goto bad; + } + icmp6 = mtod(m, struct icmp6_hdr *); + type = icmp6->icmp6_type; + code = icmp6->icmp6_code; + } + + M_PREPEND(m, sizeof(*ip6), M_DONTWAIT); + if (m == NULL) { + error = ENOBUFS; + goto bad; + } + ip6 = mtod(m, struct ip6_hdr *); + + /* + * Source address selection. + */ + error = in6_selectsrc(dstsock, optp, in6p, NULL, so->so_cred, + &oifp, &in6a); + if (error) + goto bad; + error = prison_check_ip6(in6p->inp_cred, &in6a); + if (error != 0) + goto bad; + ip6->ip6_src = in6a; + + if (oifp && scope_ambiguous) { + /* + * Application should provide a proper zone ID or the use of + * default zone IDs should be enabled. Unfortunately, some + * applications do not behave as it should, so we need a + * workaround. Even if an appropriate ID is not determined + * (when it's required), if we can determine the outgoing + * interface. determine the zone ID based on the interface. + */ + error = in6_setscope(&dstsock->sin6_addr, oifp, NULL); + if (error != 0) + goto bad; + } + ip6->ip6_dst = dstsock->sin6_addr; + + /* + * Fill in the rest of the IPv6 header fields. + */ + ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | + (in6p->inp_flow & IPV6_FLOWINFO_MASK); + ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | + (IPV6_VERSION & IPV6_VERSION_MASK); + + /* + * ip6_plen will be filled in ip6_output, so not fill it here. + */ + ip6->ip6_nxt = in6p->inp_ip_p; + ip6->ip6_hlim = in6_selecthlim(in6p, oifp); + + if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 || + in6p->in6p_cksum != -1) { + struct mbuf *n; + int off; + u_int16_t *p; + + /* Compute checksum. */ + if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) + off = offsetof(struct icmp6_hdr, icmp6_cksum); + else + off = in6p->in6p_cksum; + if (plen < off + 1) { + error = EINVAL; + goto bad; + } + off += sizeof(struct ip6_hdr); + + n = m; + while (n && n->m_len <= off) { + off -= n->m_len; + n = n->m_next; + } + if (!n) + goto bad; + p = (u_int16_t *)(mtod(n, caddr_t) + off); + *p = 0; + *p = in6_cksum(m, ip6->ip6_nxt, sizeof(*ip6), plen); + } + + error = ip6_output(m, optp, NULL, 0, in6p->in6p_moptions, &oifp, in6p); + if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { + if (oifp) + icmp6_ifoutstat_inc(oifp, type, code); + ICMP6STAT_INC(icp6s_outhist[type]); + } else + V_rip6stat.rip6s_opackets++; + + goto freectl; + + bad: + if (m) + m_freem(m); + + freectl: + if (control != NULL) { + ip6_clearpktopts(&opt, -1); + m_freem(control); + } + INP_WUNLOCK(in6p); + return (error); +} + +/* + * Raw IPv6 socket option processing. + */ +int +rip6_ctloutput(struct socket *so, struct sockopt *sopt) +{ + int error; + + if (sopt->sopt_level == IPPROTO_ICMPV6) + /* + * XXX: is it better to call icmp6_ctloutput() directly + * from protosw? + */ + return (icmp6_ctloutput(so, sopt)); + else if (sopt->sopt_level != IPPROTO_IPV6) + return (EINVAL); + + error = 0; + + switch (sopt->sopt_dir) { + case SOPT_GET: + switch (sopt->sopt_name) { + case MRT6_INIT: + case MRT6_DONE: + case MRT6_ADD_MIF: + case MRT6_DEL_MIF: + case MRT6_ADD_MFC: + case MRT6_DEL_MFC: + case MRT6_PIM: + error = ip6_mrouter_get ? ip6_mrouter_get(so, sopt) : + EOPNOTSUPP; + break; + case IPV6_CHECKSUM: + error = ip6_raw_ctloutput(so, sopt); + break; + default: + error = ip6_ctloutput(so, sopt); + break; + } + break; + + case SOPT_SET: + switch (sopt->sopt_name) { + case MRT6_INIT: + case MRT6_DONE: + case MRT6_ADD_MIF: + case MRT6_DEL_MIF: + case MRT6_ADD_MFC: + case MRT6_DEL_MFC: + case MRT6_PIM: + error = ip6_mrouter_set ? ip6_mrouter_set(so, sopt) : + EOPNOTSUPP; + break; + case IPV6_CHECKSUM: + error = ip6_raw_ctloutput(so, sopt); + break; + default: + error = ip6_ctloutput(so, sopt); + break; + } + break; + } + + return (error); +} + +static int +rip6_attach(struct socket *so, int proto, struct thread *td) +{ + struct inpcb *inp; + struct icmp6_filter *filter; + int error; + + inp = sotoinpcb(so); + KASSERT(inp == NULL, ("rip6_attach: inp != NULL")); + + error = priv_check(td, PRIV_NETINET_RAW); + if (error) + return (error); + error = soreserve(so, rip_sendspace, rip_recvspace); + if (error) + return (error); + filter = malloc(sizeof(struct icmp6_filter), M_PCB, M_NOWAIT); + if (filter == NULL) + return (ENOMEM); + INP_INFO_WLOCK(&V_ripcbinfo); + error = in_pcballoc(so, &V_ripcbinfo); + if (error) { + INP_INFO_WUNLOCK(&V_ripcbinfo); + free(filter, M_PCB); + return (error); + } + inp = (struct inpcb *)so->so_pcb; + INP_INFO_WUNLOCK(&V_ripcbinfo); + inp->inp_vflag |= INP_IPV6; + inp->inp_ip_p = (long)proto; + inp->in6p_hops = -1; /* use kernel default */ + inp->in6p_cksum = -1; + inp->in6p_icmp6filt = filter; + ICMP6_FILTER_SETPASSALL(inp->in6p_icmp6filt); + INP_WUNLOCK(inp); + return (0); +} + +static void +rip6_detach(struct socket *so) +{ + struct inpcb *inp; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("rip6_detach: inp == NULL")); + + if (so == V_ip6_mrouter && ip6_mrouter_done) + ip6_mrouter_done(); + /* xxx: RSVP */ + INP_INFO_WLOCK(&V_ripcbinfo); + INP_WLOCK(inp); + free(inp->in6p_icmp6filt, M_PCB); + in_pcbdetach(inp); + in_pcbfree(inp); + INP_INFO_WUNLOCK(&V_ripcbinfo); +} + +/* XXXRW: This can't ever be called. */ +static void +rip6_abort(struct socket *so) +{ + struct inpcb *inp; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("rip6_abort: inp == NULL")); + + soisdisconnected(so); +} + +static void +rip6_close(struct socket *so) +{ + struct inpcb *inp; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("rip6_close: inp == NULL")); + + soisdisconnected(so); +} + +static int +rip6_disconnect(struct socket *so) +{ + struct inpcb *inp; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("rip6_disconnect: inp == NULL")); + + if ((so->so_state & SS_ISCONNECTED) == 0) + return (ENOTCONN); + inp->in6p_faddr = in6addr_any; + rip6_abort(so); + return (0); +} + +static int +rip6_bind(struct socket *so, struct sockaddr *nam, struct thread *td) +{ + struct inpcb *inp; + struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam; + struct ifaddr *ifa = NULL; + int error = 0; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("rip6_bind: inp == NULL")); + + if (nam->sa_len != sizeof(*addr)) + return (EINVAL); + if ((error = prison_check_ip6(td->td_ucred, &addr->sin6_addr)) != 0) + return (error); + if (TAILQ_EMPTY(&V_ifnet) || addr->sin6_family != AF_INET6) + return (EADDRNOTAVAIL); + if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0) + return (error); + + if (!IN6_IS_ADDR_UNSPECIFIED(&addr->sin6_addr) && + (ifa = ifa_ifwithaddr((struct sockaddr *)addr)) == NULL) + return (EADDRNOTAVAIL); + if (ifa != NULL && + ((struct in6_ifaddr *)ifa)->ia6_flags & + (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY| + IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) { + ifa_free(ifa); + return (EADDRNOTAVAIL); + } + if (ifa != NULL) + ifa_free(ifa); + INP_INFO_WLOCK(&V_ripcbinfo); + INP_WLOCK(inp); + inp->in6p_laddr = addr->sin6_addr; + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_ripcbinfo); + return (0); +} + +static int +rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) +{ + struct inpcb *inp; + struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam; + struct in6_addr in6a; + struct ifnet *ifp = NULL; + int error = 0, scope_ambiguous = 0; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("rip6_connect: inp == NULL")); + + if (nam->sa_len != sizeof(*addr)) + return (EINVAL); + if (TAILQ_EMPTY(&V_ifnet)) + return (EADDRNOTAVAIL); + if (addr->sin6_family != AF_INET6) + return (EAFNOSUPPORT); + + /* + * Application should provide a proper zone ID or the use of default + * zone IDs should be enabled. Unfortunately, some applications do + * not behave as it should, so we need a workaround. Even if an + * appropriate ID is not determined, we'll see if we can determine + * the outgoing interface. If we can, determine the zone ID based on + * the interface below. + */ + if (addr->sin6_scope_id == 0 && !V_ip6_use_defzone) + scope_ambiguous = 1; + if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0) + return (error); + + INP_INFO_WLOCK(&V_ripcbinfo); + INP_WLOCK(inp); + /* Source address selection. XXX: need pcblookup? */ + error = in6_selectsrc(addr, inp->in6p_outputopts, + inp, NULL, so->so_cred, &ifp, &in6a); + if (error) { + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_ripcbinfo); + return (error); + } + + /* XXX: see above */ + if (ifp && scope_ambiguous && + (error = in6_setscope(&addr->sin6_addr, ifp, NULL)) != 0) { + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_ripcbinfo); + return (error); + } + inp->in6p_faddr = addr->sin6_addr; + inp->in6p_laddr = in6a; + soisconnected(so); + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_ripcbinfo); + return (0); +} + +static int +rip6_shutdown(struct socket *so) +{ + struct inpcb *inp; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("rip6_shutdown: inp == NULL")); + + INP_WLOCK(inp); + socantsendmore(so); + INP_WUNLOCK(inp); + return (0); +} + +static int +rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, + struct mbuf *control, struct thread *td) +{ + struct inpcb *inp; + struct sockaddr_in6 tmp; + struct sockaddr_in6 *dst; + int ret; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("rip6_send: inp == NULL")); + + /* Always copy sockaddr to avoid overwrites. */ + /* Unlocked read. */ + if (so->so_state & SS_ISCONNECTED) { + if (nam) { + m_freem(m); + return (EISCONN); + } + /* XXX */ + bzero(&tmp, sizeof(tmp)); + tmp.sin6_family = AF_INET6; + tmp.sin6_len = sizeof(struct sockaddr_in6); + INP_RLOCK(inp); + bcopy(&inp->in6p_faddr, &tmp.sin6_addr, + sizeof(struct in6_addr)); + INP_RUNLOCK(inp); + dst = &tmp; + } else { + if (nam == NULL) { + m_freem(m); + return (ENOTCONN); + } + if (nam->sa_len != sizeof(struct sockaddr_in6)) { + m_freem(m); + return (EINVAL); + } + tmp = *(struct sockaddr_in6 *)nam; + dst = &tmp; + + if (dst->sin6_family == AF_UNSPEC) { + /* + * XXX: we allow this case for backward + * compatibility to buggy applications that + * rely on old (and wrong) kernel behavior. + */ + log(LOG_INFO, "rip6 SEND: address family is " + "unspec. Assume AF_INET6\n"); + dst->sin6_family = AF_INET6; + } else if (dst->sin6_family != AF_INET6) { + m_freem(m); + return(EAFNOSUPPORT); + } + } + ret = rip6_output(m, so, dst, control); + return (ret); +} + +struct pr_usrreqs rip6_usrreqs = { + .pru_abort = rip6_abort, + .pru_attach = rip6_attach, + .pru_bind = rip6_bind, + .pru_connect = rip6_connect, + .pru_control = in6_control, + .pru_detach = rip6_detach, + .pru_disconnect = rip6_disconnect, + .pru_peeraddr = in6_getpeeraddr, + .pru_send = rip6_send, + .pru_shutdown = rip6_shutdown, + .pru_sockaddr = in6_getsockaddr, + .pru_close = rip6_close, +}; diff --git a/freebsd/sys/netinet6/raw_ip6.h b/freebsd/sys/netinet6/raw_ip6.h new file mode 100644 index 00000000..c4491d01 --- /dev/null +++ b/freebsd/sys/netinet6/raw_ip6.h @@ -0,0 +1,55 @@ +/*- + * Copyright (C) 2001 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: raw_ip6.h,v 1.2 2001/05/27 13:28:35 itojun Exp $ + * $FreeBSD$ + */ + +#ifndef _NETINET6_RAW_IP6_HH_ +#define _NETINET6_RAW_IP6_HH_ + +/* + * ICMPv6 stat is counted separately. see netinet/icmp6.h + */ +struct rip6stat { + u_quad_t rip6s_ipackets; /* total input packets */ + u_quad_t rip6s_isum; /* input checksum computations */ + u_quad_t rip6s_badsum; /* of above, checksum error */ + u_quad_t rip6s_nosock; /* no matching socket */ + u_quad_t rip6s_nosockmcast; /* of above, arrived as multicast */ + u_quad_t rip6s_fullsock; /* not delivered, input socket full */ + + u_quad_t rip6s_opackets; /* total output packets */ +}; + +#ifdef _KERNEL +VNET_DECLARE(struct rip6stat, rip6stat); +#define V_rip6stat VNET(rip6stat) +#endif + +#endif diff --git a/freebsd/sys/netinet6/route6.c b/freebsd/sys/netinet6/route6.c new file mode 100644 index 00000000..72ad9fae --- /dev/null +++ b/freebsd/sys/netinet6/route6.c @@ -0,0 +1,111 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: route6.c,v 1.24 2001/03/14 03:07:05 itojun Exp $ + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/local/opt_inet.h> +#include <freebsd/local/opt_inet6.h> + +#include <freebsd/sys/param.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/systm.h> +#include <freebsd/sys/queue.h> + +#include <freebsd/net/if.h> + +#include <freebsd/netinet/in.h> +#include <freebsd/netinet6/in6_var.h> +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet6/scope6_var.h> + +#include <freebsd/netinet/icmp6.h> + +/* + * proto - is unused + */ + +int +route6_input(struct mbuf **mp, int *offp, int proto) +{ + struct ip6_hdr *ip6; + struct mbuf *m = *mp; + struct ip6_rthdr *rh; + int off = *offp, rhlen; + struct ip6aux *ip6a; + + ip6a = ip6_findaux(m); + if (ip6a) { + /* XXX reject home-address option before rthdr */ + if (ip6a->ip6a_flags & IP6A_SWAP) { + V_ip6stat.ip6s_badoptions++; + m_freem(m); + return IPPROTO_DONE; + } + } + +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, off, sizeof(*rh), IPPROTO_DONE); + ip6 = mtod(m, struct ip6_hdr *); + rh = (struct ip6_rthdr *)((caddr_t)ip6 + off); +#else + ip6 = mtod(m, struct ip6_hdr *); + IP6_EXTHDR_GET(rh, struct ip6_rthdr *, m, off, sizeof(*rh)); + if (rh == NULL) { + V_ip6stat.ip6s_tooshort++; + return IPPROTO_DONE; + } +#endif + + /* + * While this switch may look gratuitous, leave it in + * in favour of RH2 implementations, etc. + */ + switch (rh->ip6r_type) { + default: + /* Unknown routing header type. */ + if (rh->ip6r_segleft == 0) { + rhlen = (rh->ip6r_len + 1) << 3; + break; /* Final dst. Just ignore the header. */ + } + V_ip6stat.ip6s_badoptions++; + icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, + (caddr_t)&rh->ip6r_type - (caddr_t)ip6); + return (IPPROTO_DONE); + } + + *offp += rhlen; + return (rh->ip6r_nxt); +} diff --git a/freebsd/sys/netinet6/scope6.c b/freebsd/sys/netinet6/scope6.c new file mode 100644 index 00000000..d9a19d56 --- /dev/null +++ b/freebsd/sys/netinet6/scope6.c @@ -0,0 +1,498 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 2000 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: scope6.c,v 1.10 2000/07/24 13:29:31 itojun Exp $ + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/sys/param.h> +#include <freebsd/sys/malloc.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/systm.h> +#include <freebsd/sys/queue.h> +#include <freebsd/sys/syslog.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/vnet.h> + +#include <freebsd/netinet/in.h> + +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet6/in6_var.h> +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet6/scope6_var.h> + +#ifdef ENABLE_DEFAULT_SCOPE +VNET_DEFINE(int, ip6_use_defzone) = 1; +#else +VNET_DEFINE(int, ip6_use_defzone) = 0; +#endif + +/* + * The scope6_lock protects the global sid default stored in + * sid_default below. + */ +static struct mtx scope6_lock; +#define SCOPE6_LOCK_INIT() mtx_init(&scope6_lock, "scope6_lock", NULL, MTX_DEF) +#define SCOPE6_LOCK() mtx_lock(&scope6_lock) +#define SCOPE6_UNLOCK() mtx_unlock(&scope6_lock) +#define SCOPE6_LOCK_ASSERT() mtx_assert(&scope6_lock, MA_OWNED) + +static VNET_DEFINE(struct scope6_id, sid_default); +#define V_sid_default VNET(sid_default) + +#define SID(ifp) \ + (((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->scope6_id) + +void +scope6_init(void) +{ + + bzero(&V_sid_default, sizeof(V_sid_default)); + + if (!IS_DEFAULT_VNET(curvnet)) + return; + + SCOPE6_LOCK_INIT(); +} + +struct scope6_id * +scope6_ifattach(struct ifnet *ifp) +{ + struct scope6_id *sid; + + sid = (struct scope6_id *)malloc(sizeof(*sid), M_IFADDR, M_WAITOK); + bzero(sid, sizeof(*sid)); + + /* + * XXX: IPV6_ADDR_SCOPE_xxx macros are not standard. + * Should we rather hardcode here? + */ + sid->s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL] = ifp->if_index; + sid->s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL] = ifp->if_index; +#ifdef MULTI_SCOPE + /* by default, we don't care about scope boundary for these scopes. */ + sid->s6id_list[IPV6_ADDR_SCOPE_SITELOCAL] = 1; + sid->s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL] = 1; +#endif + + return sid; +} + +void +scope6_ifdetach(struct scope6_id *sid) +{ + + free(sid, M_IFADDR); +} + +int +scope6_set(struct ifnet *ifp, struct scope6_id *idlist) +{ + int i; + int error = 0; + struct scope6_id *sid = NULL; + + IF_AFDATA_LOCK(ifp); + sid = SID(ifp); + + if (!sid) { /* paranoid? */ + IF_AFDATA_UNLOCK(ifp); + return (EINVAL); + } + + /* + * XXX: We need more consistency checks of the relationship among + * scopes (e.g. an organization should be larger than a site). + */ + + /* + * TODO(XXX): after setting, we should reflect the changes to + * interface addresses, routing table entries, PCB entries... + */ + + SCOPE6_LOCK(); + for (i = 0; i < 16; i++) { + if (idlist->s6id_list[i] && + idlist->s6id_list[i] != sid->s6id_list[i]) { + /* + * An interface zone ID must be the corresponding + * interface index by definition. + */ + if (i == IPV6_ADDR_SCOPE_INTFACELOCAL && + idlist->s6id_list[i] != ifp->if_index) { + IF_AFDATA_UNLOCK(ifp); + SCOPE6_UNLOCK(); + return (EINVAL); + } + + if (i == IPV6_ADDR_SCOPE_LINKLOCAL && + idlist->s6id_list[i] > V_if_index) { + /* + * XXX: theoretically, there should be no + * relationship between link IDs and interface + * IDs, but we check the consistency for + * safety in later use. + */ + IF_AFDATA_UNLOCK(ifp); + SCOPE6_UNLOCK(); + return (EINVAL); + } + + /* + * XXX: we must need lots of work in this case, + * but we simply set the new value in this initial + * implementation. + */ + sid->s6id_list[i] = idlist->s6id_list[i]; + } + } + SCOPE6_UNLOCK(); + IF_AFDATA_UNLOCK(ifp); + + return (error); +} + +int +scope6_get(struct ifnet *ifp, struct scope6_id *idlist) +{ + /* We only need to lock the interface's afdata for SID() to work. */ + IF_AFDATA_LOCK(ifp); + struct scope6_id *sid = SID(ifp); + + if (sid == NULL) { /* paranoid? */ + IF_AFDATA_UNLOCK(ifp); + return (EINVAL); + } + + SCOPE6_LOCK(); + *idlist = *sid; + SCOPE6_UNLOCK(); + + IF_AFDATA_UNLOCK(ifp); + return (0); +} + + +/* + * Get a scope of the address. Node-local, link-local, site-local or global. + */ +int +in6_addrscope(struct in6_addr *addr) +{ + int scope; + + if (addr->s6_addr[0] == 0xfe) { + scope = addr->s6_addr[1] & 0xc0; + + switch (scope) { + case 0x80: + return IPV6_ADDR_SCOPE_LINKLOCAL; + break; + case 0xc0: + return IPV6_ADDR_SCOPE_SITELOCAL; + break; + default: + return IPV6_ADDR_SCOPE_GLOBAL; /* just in case */ + break; + } + } + + + if (addr->s6_addr[0] == 0xff) { + scope = addr->s6_addr[1] & 0x0f; + + /* + * due to other scope such as reserved, + * return scope doesn't work. + */ + switch (scope) { + case IPV6_ADDR_SCOPE_INTFACELOCAL: + return IPV6_ADDR_SCOPE_INTFACELOCAL; + break; + case IPV6_ADDR_SCOPE_LINKLOCAL: + return IPV6_ADDR_SCOPE_LINKLOCAL; + break; + case IPV6_ADDR_SCOPE_SITELOCAL: + return IPV6_ADDR_SCOPE_SITELOCAL; + break; + default: + return IPV6_ADDR_SCOPE_GLOBAL; + break; + } + } + + /* + * Regard loopback and unspecified addresses as global, since + * they have no ambiguity. + */ + if (bcmp(&in6addr_loopback, addr, sizeof(*addr) - 1) == 0) { + if (addr->s6_addr[15] == 1) /* loopback */ + return IPV6_ADDR_SCOPE_LINKLOCAL; + if (addr->s6_addr[15] == 0) /* unspecified */ + return IPV6_ADDR_SCOPE_GLOBAL; /* XXX: correct? */ + } + + return IPV6_ADDR_SCOPE_GLOBAL; +} + +/* + * ifp - note that this might be NULL + */ + +void +scope6_setdefault(struct ifnet *ifp) +{ + + /* + * Currently, this function just sets the default "interfaces" + * and "links" according to the given interface. + * We might eventually have to separate the notion of "link" from + * "interface" and provide a user interface to set the default. + */ + SCOPE6_LOCK(); + if (ifp) { + V_sid_default.s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL] = + ifp->if_index; + V_sid_default.s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL] = + ifp->if_index; + } else { + V_sid_default.s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL] = 0; + V_sid_default.s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL] = 0; + } + SCOPE6_UNLOCK(); +} + +int +scope6_get_default(struct scope6_id *idlist) +{ + + SCOPE6_LOCK(); + *idlist = V_sid_default; + SCOPE6_UNLOCK(); + + return (0); +} + +u_int32_t +scope6_addr2default(struct in6_addr *addr) +{ + u_int32_t id; + + /* + * special case: The loopback address should be considered as + * link-local, but there's no ambiguity in the syntax. + */ + if (IN6_IS_ADDR_LOOPBACK(addr)) + return (0); + + /* + * XXX: 32-bit read is atomic on all our platforms, is it OK + * not to lock here? + */ + SCOPE6_LOCK(); + id = V_sid_default.s6id_list[in6_addrscope(addr)]; + SCOPE6_UNLOCK(); + return (id); +} + +/* + * Validate the specified scope zone ID in the sin6_scope_id field. If the ID + * is unspecified (=0), needs to be specified, and the default zone ID can be + * used, the default value will be used. + * This routine then generates the kernel-internal form: if the address scope + * of is interface-local or link-local, embed the interface index in the + * address. + */ +int +sa6_embedscope(struct sockaddr_in6 *sin6, int defaultok) +{ + struct ifnet *ifp; + u_int32_t zoneid; + + if ((zoneid = sin6->sin6_scope_id) == 0 && defaultok) + zoneid = scope6_addr2default(&sin6->sin6_addr); + + if (zoneid != 0 && + (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) || + IN6_IS_ADDR_MC_INTFACELOCAL(&sin6->sin6_addr))) { + /* + * At this moment, we only check interface-local and + * link-local scope IDs, and use interface indices as the + * zone IDs assuming a one-to-one mapping between interfaces + * and links. + */ + if (V_if_index < zoneid) + return (ENXIO); + ifp = ifnet_byindex(zoneid); + if (ifp == NULL) /* XXX: this can happen for some OS */ + return (ENXIO); + + /* XXX assignment to 16bit from 32bit variable */ + sin6->sin6_addr.s6_addr16[1] = htons(zoneid & 0xffff); + + sin6->sin6_scope_id = 0; + } + + return 0; +} + +/* + * generate standard sockaddr_in6 from embedded form. + */ +int +sa6_recoverscope(struct sockaddr_in6 *sin6) +{ + char ip6buf[INET6_ADDRSTRLEN]; + u_int32_t zoneid; + + if (sin6->sin6_scope_id != 0) { + log(LOG_NOTICE, + "sa6_recoverscope: assumption failure (non 0 ID): %s%%%d\n", + ip6_sprintf(ip6buf, &sin6->sin6_addr), sin6->sin6_scope_id); + /* XXX: proceed anyway... */ + } + if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) || + IN6_IS_ADDR_MC_INTFACELOCAL(&sin6->sin6_addr)) { + /* + * KAME assumption: link id == interface id + */ + zoneid = ntohs(sin6->sin6_addr.s6_addr16[1]); + if (zoneid) { + /* sanity check */ + if (zoneid < 0 || V_if_index < zoneid) + return (ENXIO); + if (!ifnet_byindex(zoneid)) + return (ENXIO); + sin6->sin6_addr.s6_addr16[1] = 0; + sin6->sin6_scope_id = zoneid; + } + } + + return 0; +} + +/* + * Determine the appropriate scope zone ID for in6 and ifp. If ret_id is + * non NULL, it is set to the zone ID. If the zone ID needs to be embedded + * in the in6_addr structure, in6 will be modified. + * + * ret_id - unnecessary? + */ +int +in6_setscope(struct in6_addr *in6, struct ifnet *ifp, u_int32_t *ret_id) +{ + int scope; + u_int32_t zoneid = 0; + struct scope6_id *sid; + + IF_AFDATA_LOCK(ifp); + + sid = SID(ifp); + +#ifdef DIAGNOSTIC + if (sid == NULL) { /* should not happen */ + panic("in6_setscope: scope array is NULL"); + /* NOTREACHED */ + } +#endif + + /* + * special case: the loopback address can only belong to a loopback + * interface. + */ + if (IN6_IS_ADDR_LOOPBACK(in6)) { + if (!(ifp->if_flags & IFF_LOOPBACK)) { + IF_AFDATA_UNLOCK(ifp); + return (EINVAL); + } else { + if (ret_id != NULL) + *ret_id = 0; /* there's no ambiguity */ + IF_AFDATA_UNLOCK(ifp); + return (0); + } + } + + scope = in6_addrscope(in6); + + SCOPE6_LOCK(); + switch (scope) { + case IPV6_ADDR_SCOPE_INTFACELOCAL: /* should be interface index */ + zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL]; + break; + + case IPV6_ADDR_SCOPE_LINKLOCAL: + zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL]; + break; + + case IPV6_ADDR_SCOPE_SITELOCAL: + zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_SITELOCAL]; + break; + + case IPV6_ADDR_SCOPE_ORGLOCAL: + zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL]; + break; + + default: + zoneid = 0; /* XXX: treat as global. */ + break; + } + SCOPE6_UNLOCK(); + IF_AFDATA_UNLOCK(ifp); + + if (ret_id != NULL) + *ret_id = zoneid; + + if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) + in6->s6_addr16[1] = htons(zoneid & 0xffff); /* XXX */ + + return (0); +} + +/* + * Just clear the embedded scope identifier. Return 0 if the original address + * is intact; return non 0 if the address is modified. + */ +int +in6_clearscope(struct in6_addr *in6) +{ + int modified = 0; + + if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) { + if (in6->s6_addr16[1] != 0) + modified = 1; + in6->s6_addr16[1] = 0; + } + + return (modified); +} diff --git a/freebsd/sys/netinet6/scope6_var.h b/freebsd/sys/netinet6/scope6_var.h new file mode 100644 index 00000000..a87aa57c --- /dev/null +++ b/freebsd/sys/netinet6/scope6_var.h @@ -0,0 +1,60 @@ +/*- + * Copyright (C) 2000 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: scope6_var.h,v 1.4 2000/05/18 15:03:27 jinmei Exp $ + * $FreeBSD$ + */ + +#ifndef _NETINET6_SCOPE6_VAR_HH_ +#define _NETINET6_SCOPE6_VAR_HH_ + +#ifdef _KERNEL +struct scope6_id { + /* + * 16 is correspondent to 4bit multicast scope field. + * i.e. from node-local to global with some reserved/unassigned types. + */ + u_int32_t s6id_list[16]; +}; + +void scope6_init __P((void)); +struct scope6_id *scope6_ifattach __P((struct ifnet *)); +void scope6_ifdetach __P((struct scope6_id *)); +int scope6_set __P((struct ifnet *, struct scope6_id *)); +int scope6_get __P((struct ifnet *, struct scope6_id *)); +void scope6_setdefault __P((struct ifnet *)); +int scope6_get_default __P((struct scope6_id *)); +u_int32_t scope6_in6_addrscope __P((struct in6_addr *)); +u_int32_t scope6_addr2default __P((struct in6_addr *)); +int sa6_embedscope __P((struct sockaddr_in6 *, int)); +int sa6_recoverscope __P((struct sockaddr_in6 *)); +int in6_setscope __P((struct in6_addr *, struct ifnet *, u_int32_t *)); +int in6_clearscope __P((struct in6_addr *)); +#endif /* _KERNEL */ + +#endif /* _NETINET6_SCOPE6_VAR_HH_ */ diff --git a/freebsd/sys/netinet6/sctp6_usrreq.c b/freebsd/sys/netinet6/sctp6_usrreq.c new file mode 100644 index 00000000..178f0d84 --- /dev/null +++ b/freebsd/sys/netinet6/sctp6_usrreq.c @@ -0,0 +1,1319 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * a) Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * b) Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * c) Neither the name of Cisco Systems, Inc. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +/* $KAME: sctp6_usrreq.c,v 1.38 2005/08/24 08:08:56 suz Exp $ */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/netinet/sctp_os.h> +#include <freebsd/sys/proc.h> +#include <freebsd/netinet/sctp_pcb.h> +#include <freebsd/netinet/sctp_header.h> +#include <freebsd/netinet/sctp_var.h> +#if defined(INET6) +#include <freebsd/netinet6/sctp6_var.h> +#endif +#include <freebsd/netinet/sctp_sysctl.h> +#include <freebsd/netinet/sctp_output.h> +#include <freebsd/netinet/sctp_uio.h> +#include <freebsd/netinet/sctp_asconf.h> +#include <freebsd/netinet/sctputil.h> +#include <freebsd/netinet/sctp_indata.h> +#include <freebsd/netinet/sctp_timer.h> +#include <freebsd/netinet/sctp_auth.h> +#include <freebsd/netinet/sctp_input.h> +#include <freebsd/netinet/sctp_output.h> +#include <freebsd/netinet/sctp_bsd_addr.h> +#include <freebsd/netinet/sctp_crc32.h> +#include <freebsd/netinet/udp.h> + +#ifdef IPSEC +#include <freebsd/netipsec/ipsec.h> +#if defined(INET6) +#include <freebsd/netipsec/ipsec6.h> +#endif /* INET6 */ +#endif /* IPSEC */ + +extern struct protosw inetsw[]; + +int +sctp6_input(struct mbuf **i_pak, int *offp, int proto) +{ + struct mbuf *m; + struct ip6_hdr *ip6; + struct sctphdr *sh; + struct sctp_inpcb *in6p = NULL; + struct sctp_nets *net; + int refcount_up = 0; + uint32_t vrf_id = 0; + +#ifdef IPSEC + struct inpcb *in6p_ip; + +#endif + struct sctp_chunkhdr *ch; + int length, offset, iphlen; + uint8_t ecn_bits; + struct sctp_tcb *stcb = NULL; + int pkt_len = 0; + +#if !defined(SCTP_WITH_NO_CSUM) + uint32_t check, calc_check; + +#endif + int off = *offp; + uint16_t port = 0; + + /* get the VRF and table id's */ + if (SCTP_GET_PKT_VRFID(*i_pak, vrf_id)) { + SCTP_RELEASE_PKT(*i_pak); + return (-1); + } + m = SCTP_HEADER_TO_CHAIN(*i_pak); + pkt_len = SCTP_HEADER_LEN((*i_pak)); + +#ifdef SCTP_PACKET_LOGGING + sctp_packet_log(m, pkt_len); +#endif + ip6 = mtod(m, struct ip6_hdr *); + /* Ensure that (sctphdr + sctp_chunkhdr) in a row. */ + IP6_EXTHDR_GET(sh, struct sctphdr *, m, off, + (int)(sizeof(*sh) + sizeof(*ch))); + if (sh == NULL) { + SCTP_STAT_INCR(sctps_hdrops); + return IPPROTO_DONE; + } + ch = (struct sctp_chunkhdr *)((caddr_t)sh + sizeof(struct sctphdr)); + iphlen = off; + offset = iphlen + sizeof(*sh) + sizeof(*ch); + SCTPDBG(SCTP_DEBUG_INPUT1, + "sctp6_input() length:%d iphlen:%d\n", pkt_len, iphlen); + + +#if defined(NFAITH) && NFAITH > 0 + + if (faithprefix_p != NULL && (*faithprefix_p) (&ip6->ip6_dst)) { + /* XXX send icmp6 host/port unreach? */ + goto bad; + } +#endif /* NFAITH defined and > 0 */ + SCTP_STAT_INCR(sctps_recvpackets); + SCTP_STAT_INCR_COUNTER64(sctps_inpackets); + SCTPDBG(SCTP_DEBUG_INPUT1, "V6 input gets a packet iphlen:%d pktlen:%d\n", + iphlen, pkt_len); + if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + /* No multi-cast support in SCTP */ + goto bad; + } + /* destination port of 0 is illegal, based on RFC2960. */ + if (sh->dest_port == 0) + goto bad; + + SCTPDBG(SCTP_DEBUG_CRCOFFLOAD, + "sctp_input(): Packet of length %d received on %s with csum_flags 0x%x.\n", + m->m_pkthdr.len, + if_name(m->m_pkthdr.rcvif), + m->m_pkthdr.csum_flags); +#if defined(SCTP_WITH_NO_CSUM) + SCTP_STAT_INCR(sctps_recvnocrc); +#else + if (m->m_pkthdr.csum_flags & CSUM_SCTP_VALID) { + SCTP_STAT_INCR(sctps_recvhwcrc); + goto sctp_skip_csum; + } + check = sh->checksum; /* save incoming checksum */ + if ((check == 0) && (SCTP_BASE_SYSCTL(sctp_no_csum_on_loopback)) && + (IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &ip6->ip6_dst))) { + SCTP_STAT_INCR(sctps_recvnocrc); + goto sctp_skip_csum; + } + sh->checksum = 0; /* prepare for calc */ + calc_check = sctp_calculate_cksum(m, iphlen); + SCTP_STAT_INCR(sctps_recvswcrc); + if (calc_check != check) { + SCTPDBG(SCTP_DEBUG_INPUT1, "Bad CSUM on SCTP packet calc_check:%x check:%x m:%p phlen:%d\n", + calc_check, check, m, iphlen); + stcb = sctp_findassociation_addr(m, iphlen, offset - sizeof(*ch), + sh, ch, &in6p, &net, vrf_id); + if ((net) && (port)) { + if (net->port == 0) { + sctp_pathmtu_adjustment(in6p, stcb, net, net->mtu - sizeof(struct udphdr)); + } + net->port = port; + } + /* in6p's ref-count increased && stcb locked */ + if ((in6p) && (stcb)) { + sctp_send_packet_dropped(stcb, net, m, iphlen, 1); + sctp_chunk_output((struct sctp_inpcb *)in6p, stcb, SCTP_OUTPUT_FROM_INPUT_ERROR, SCTP_SO_NOT_LOCKED); + } else if ((in6p != NULL) && (stcb == NULL)) { + refcount_up = 1; + } + SCTP_STAT_INCR(sctps_badsum); + SCTP_STAT_INCR_COUNTER32(sctps_checksumerrors); + goto bad; + } + sh->checksum = calc_check; + +sctp_skip_csum: +#endif + net = NULL; + /* + * Locate pcb and tcb for datagram sctp_findassociation_addr() wants + * IP/SCTP/first chunk header... + */ + stcb = sctp_findassociation_addr(m, iphlen, offset - sizeof(*ch), + sh, ch, &in6p, &net, vrf_id); + if ((net) && (port)) { + if (net->port == 0) { + sctp_pathmtu_adjustment(in6p, stcb, net, net->mtu - sizeof(struct udphdr)); + } + net->port = port; + } + /* in6p's ref-count increased */ + if (in6p == NULL) { + struct sctp_init_chunk *init_chk, chunk_buf; + + SCTP_STAT_INCR(sctps_noport); + if (ch->chunk_type == SCTP_INITIATION) { + /* + * we do a trick here to get the INIT tag, dig in + * and get the tag from the INIT and put it in the + * common header. + */ + init_chk = (struct sctp_init_chunk *)sctp_m_getptr(m, + iphlen + sizeof(*sh), sizeof(*init_chk), + (uint8_t *) & chunk_buf); + if (init_chk) + sh->v_tag = init_chk->init.initiate_tag; + else + sh->v_tag = 0; + } + if (ch->chunk_type == SCTP_SHUTDOWN_ACK) { + sctp_send_shutdown_complete2(m, iphlen, sh, vrf_id, port); + goto bad; + } + if (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) { + goto bad; + } + if (ch->chunk_type != SCTP_ABORT_ASSOCIATION) + sctp_send_abort(m, iphlen, sh, 0, NULL, vrf_id, port); + goto bad; + } else if (stcb == NULL) { + refcount_up = 1; + } +#ifdef IPSEC + /* + * Check AH/ESP integrity. + */ + in6p_ip = (struct inpcb *)in6p; + if (in6p_ip && (ipsec6_in_reject(m, in6p_ip))) { +/* XXX */ + MODULE_GLOBAL(ipsec6stat).in_polvio++; + goto bad; + } +#endif /* IPSEC */ + + /* + * CONTROL chunk processing + */ + offset -= sizeof(*ch); + ecn_bits = ((ntohl(ip6->ip6_flow) >> 20) & 0x000000ff); + + /* Length now holds the total packet length payload + iphlen */ + length = ntohs(ip6->ip6_plen) + iphlen; + + /* sa_ignore NO_NULL_CHK */ + sctp_common_input_processing(&m, iphlen, offset, length, sh, ch, + in6p, stcb, net, ecn_bits, vrf_id, port); + /* inp's ref-count reduced && stcb unlocked */ + /* XXX this stuff below gets moved to appropriate parts later... */ + if (m) + sctp_m_freem(m); + if ((in6p) && refcount_up) { + /* reduce ref-count */ + SCTP_INP_WLOCK(in6p); + SCTP_INP_DECR_REF(in6p); + SCTP_INP_WUNLOCK(in6p); + } + return IPPROTO_DONE; + +bad: + if (stcb) { + SCTP_TCB_UNLOCK(stcb); + } + if ((in6p) && refcount_up) { + /* reduce ref-count */ + SCTP_INP_WLOCK(in6p); + SCTP_INP_DECR_REF(in6p); + SCTP_INP_WUNLOCK(in6p); + } + if (m) + sctp_m_freem(m); + return IPPROTO_DONE; +} + + +static void +sctp6_notify_mbuf(struct sctp_inpcb *inp, struct icmp6_hdr *icmp6, + struct sctphdr *sh, struct sctp_tcb *stcb, struct sctp_nets *net) +{ + uint32_t nxtsz; + + if ((inp == NULL) || (stcb == NULL) || (net == NULL) || + (icmp6 == NULL) || (sh == NULL)) { + goto out; + } + /* First do we even look at it? */ + if (ntohl(sh->v_tag) != (stcb->asoc.peer_vtag)) + goto out; + + if (icmp6->icmp6_type != ICMP6_PACKET_TOO_BIG) { + /* not PACKET TO BIG */ + goto out; + } + /* + * ok we need to look closely. We could even get smarter and look at + * anyone that we sent to in case we get a different ICMP that tells + * us there is no way to reach a host, but for this impl, all we + * care about is MTU discovery. + */ + nxtsz = ntohl(icmp6->icmp6_mtu); + /* Stop any PMTU timer */ + sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, NULL, SCTP_FROM_SCTP6_USRREQ + SCTP_LOC_1); + + /* Adjust destination size limit */ + if (net->mtu > nxtsz) { + net->mtu = nxtsz; + if (net->port) { + net->mtu -= sizeof(struct udphdr); + } + } + /* now what about the ep? */ + if (stcb->asoc.smallest_mtu > nxtsz) { + struct sctp_tmit_chunk *chk; + + /* Adjust that too */ + stcb->asoc.smallest_mtu = nxtsz; + /* now off to subtract IP_DF flag if needed */ + + TAILQ_FOREACH(chk, &stcb->asoc.send_queue, sctp_next) { + if ((uint32_t) (chk->send_size + IP_HDR_SIZE) > nxtsz) { + chk->flags |= CHUNK_FLAGS_FRAGMENT_OK; + } + } + TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) { + if ((uint32_t) (chk->send_size + IP_HDR_SIZE) > nxtsz) { + /* + * For this guy we also mark for immediate + * resend since we sent to big of chunk + */ + chk->flags |= CHUNK_FLAGS_FRAGMENT_OK; + if (chk->sent != SCTP_DATAGRAM_RESEND) + stcb->asoc.sent_queue_retran_cnt++; + chk->sent = SCTP_DATAGRAM_RESEND; + chk->rec.data.doing_fast_retransmit = 0; + + chk->sent = SCTP_DATAGRAM_RESEND; + /* Clear any time so NO RTT is being done */ + chk->sent_rcv_time.tv_sec = 0; + chk->sent_rcv_time.tv_usec = 0; + stcb->asoc.total_flight -= chk->send_size; + net->flight_size -= chk->send_size; + } + } + } + sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, NULL); +out: + if (stcb) { + SCTP_TCB_UNLOCK(stcb); + } +} + + +void +sctp6_notify(struct sctp_inpcb *inp, + struct icmp6_hdr *icmph, + struct sctphdr *sh, + struct sockaddr *to, + struct sctp_tcb *stcb, + struct sctp_nets *net) +{ +#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) + struct socket *so; + +#endif + /* protection */ + int reason; + + + if ((inp == NULL) || (stcb == NULL) || (net == NULL) || + (sh == NULL) || (to == NULL)) { + if (stcb) + SCTP_TCB_UNLOCK(stcb); + return; + } + /* First job is to verify the vtag matches what I would send */ + if (ntohl(sh->v_tag) != (stcb->asoc.peer_vtag)) { + SCTP_TCB_UNLOCK(stcb); + return; + } + if (icmph->icmp6_type != ICMP_UNREACH) { + /* We only care about unreachable */ + SCTP_TCB_UNLOCK(stcb); + return; + } + if ((icmph->icmp6_code == ICMP_UNREACH_NET) || + (icmph->icmp6_code == ICMP_UNREACH_HOST) || + (icmph->icmp6_code == ICMP_UNREACH_NET_UNKNOWN) || + (icmph->icmp6_code == ICMP_UNREACH_HOST_UNKNOWN) || + (icmph->icmp6_code == ICMP_UNREACH_ISOLATED) || + (icmph->icmp6_code == ICMP_UNREACH_NET_PROHIB) || + (icmph->icmp6_code == ICMP_UNREACH_HOST_PROHIB) || + (icmph->icmp6_code == ICMP_UNREACH_FILTER_PROHIB)) { + + /* + * Hmm reachablity problems we must examine closely. If its + * not reachable, we may have lost a network. Or if there is + * NO protocol at the other end named SCTP. well we consider + * it a OOTB abort. + */ + if (net->dest_state & SCTP_ADDR_REACHABLE) { + /* Ok that destination is NOT reachable */ + SCTP_PRINTF("ICMP (thresh %d/%d) takes interface %p down\n", + net->error_count, + net->failure_threshold, + net); + + net->dest_state &= ~SCTP_ADDR_REACHABLE; + net->dest_state |= SCTP_ADDR_NOT_REACHABLE; + /* + * JRS 5/14/07 - If a destination is unreachable, + * the PF bit is turned off. This allows an + * unambiguous use of the PF bit for destinations + * that are reachable but potentially failed. If the + * destination is set to the unreachable state, also + * set the destination to the PF state. + */ + /* + * Add debug message here if destination is not in + * PF state. + */ + /* Stop any running T3 timers here? */ + if ((stcb->asoc.sctp_cmt_on_off == 1) && + (stcb->asoc.sctp_cmt_pf > 0)) { + net->dest_state &= ~SCTP_ADDR_PF; + SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from PF to unreachable.\n", + net); + } + net->error_count = net->failure_threshold + 1; + sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, + stcb, SCTP_FAILED_THRESHOLD, + (void *)net, SCTP_SO_NOT_LOCKED); + } + SCTP_TCB_UNLOCK(stcb); + } else if ((icmph->icmp6_code == ICMP_UNREACH_PROTOCOL) || + (icmph->icmp6_code == ICMP_UNREACH_PORT)) { + /* + * Here the peer is either playing tricks on us, including + * an address that belongs to someone who does not support + * SCTP OR was a userland implementation that shutdown and + * now is dead. In either case treat it like a OOTB abort + * with no TCB + */ + reason = SCTP_PEER_FAULTY; + sctp_abort_notification(stcb, reason, SCTP_SO_NOT_LOCKED); +#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) + so = SCTP_INP_SO(inp); + atomic_add_int(&stcb->asoc.refcnt, 1); + SCTP_TCB_UNLOCK(stcb); + SCTP_SOCKET_LOCK(so, 1); + SCTP_TCB_LOCK(stcb); + atomic_subtract_int(&stcb->asoc.refcnt, 1); +#endif + (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2); +#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) + SCTP_SOCKET_UNLOCK(so, 1); + /* SCTP_TCB_UNLOCK(stcb); MT: I think this is not needed. */ +#endif + /* no need to unlock here, since the TCB is gone */ + } else { + SCTP_TCB_UNLOCK(stcb); + } +} + + + +void +sctp6_ctlinput(int cmd, struct sockaddr *pktdst, void *d) +{ + struct sctphdr sh; + struct ip6ctlparam *ip6cp = NULL; + uint32_t vrf_id; + + vrf_id = SCTP_DEFAULT_VRFID; + + if (pktdst->sa_family != AF_INET6 || + pktdst->sa_len != sizeof(struct sockaddr_in6)) + return; + + if ((unsigned)cmd >= PRC_NCMDS) + return; + if (PRC_IS_REDIRECT(cmd)) { + d = NULL; + } else if (inet6ctlerrmap[cmd] == 0) { + return; + } + /* if the parameter is from icmp6, decode it. */ + if (d != NULL) { + ip6cp = (struct ip6ctlparam *)d; + } else { + ip6cp = (struct ip6ctlparam *)NULL; + } + + if (ip6cp) { + /* + * XXX: We assume that when IPV6 is non NULL, M and OFF are + * valid. + */ + /* check if we can safely examine src and dst ports */ + struct sctp_inpcb *inp = NULL; + struct sctp_tcb *stcb = NULL; + struct sctp_nets *net = NULL; + struct sockaddr_in6 final; + + if (ip6cp->ip6c_m == NULL) + return; + + bzero(&sh, sizeof(sh)); + bzero(&final, sizeof(final)); + inp = NULL; + net = NULL; + m_copydata(ip6cp->ip6c_m, ip6cp->ip6c_off, sizeof(sh), + (caddr_t)&sh); + ip6cp->ip6c_src->sin6_port = sh.src_port; + final.sin6_len = sizeof(final); + final.sin6_family = AF_INET6; + final.sin6_addr = ((struct sockaddr_in6 *)pktdst)->sin6_addr; + final.sin6_port = sh.dest_port; + stcb = sctp_findassociation_addr_sa((struct sockaddr *)ip6cp->ip6c_src, + (struct sockaddr *)&final, + &inp, &net, 1, vrf_id); + /* inp's ref-count increased && stcb locked */ + if (stcb != NULL && inp && (inp->sctp_socket != NULL)) { + if (cmd == PRC_MSGSIZE) { + sctp6_notify_mbuf(inp, + ip6cp->ip6c_icmp6, + &sh, + stcb, + net); + /* inp's ref-count reduced && stcb unlocked */ + } else { + sctp6_notify(inp, ip6cp->ip6c_icmp6, &sh, + (struct sockaddr *)&final, + stcb, net); + /* inp's ref-count reduced && stcb unlocked */ + } + } else { + if (PRC_IS_REDIRECT(cmd) && inp) { + in6_rtchange((struct in6pcb *)inp, + inet6ctlerrmap[cmd]); + } + if (inp) { + /* reduce inp's ref-count */ + SCTP_INP_WLOCK(inp); + SCTP_INP_DECR_REF(inp); + SCTP_INP_WUNLOCK(inp); + } + if (stcb) + SCTP_TCB_UNLOCK(stcb); + } + } +} + +/* + * this routine can probably be collasped into the one in sctp_userreq.c + * since they do the same thing and now we lookup with a sockaddr + */ +static int +sctp6_getcred(SYSCTL_HANDLER_ARGS) +{ + struct xucred xuc; + struct sockaddr_in6 addrs[2]; + struct sctp_inpcb *inp; + struct sctp_nets *net; + struct sctp_tcb *stcb; + int error; + uint32_t vrf_id; + + vrf_id = SCTP_DEFAULT_VRFID; + + error = priv_check(req->td, PRIV_NETINET_GETCRED); + if (error) + return (error); + + if (req->newlen != sizeof(addrs)) { + SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); + return (EINVAL); + } + if (req->oldlen != sizeof(struct ucred)) { + SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); + return (EINVAL); + } + error = SYSCTL_IN(req, addrs, sizeof(addrs)); + if (error) + return (error); + + stcb = sctp_findassociation_addr_sa(sin6tosa(&addrs[0]), + sin6tosa(&addrs[1]), + &inp, &net, 1, vrf_id); + if (stcb == NULL || inp == NULL || inp->sctp_socket == NULL) { + if ((inp != NULL) && (stcb == NULL)) { + /* reduce ref-count */ + SCTP_INP_WLOCK(inp); + SCTP_INP_DECR_REF(inp); + goto cred_can_cont; + } + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT); + error = ENOENT; + goto out; + } + SCTP_TCB_UNLOCK(stcb); + /* + * We use the write lock here, only since in the error leg we need + * it. If we used RLOCK, then we would have to + * wlock/decr/unlock/rlock. Which in theory could create a hole. + * Better to use higher wlock. + */ + SCTP_INP_WLOCK(inp); +cred_can_cont: + error = cr_canseesocket(req->td->td_ucred, inp->sctp_socket); + if (error) { + SCTP_INP_WUNLOCK(inp); + goto out; + } + cru2x(inp->sctp_socket->so_cred, &xuc); + SCTP_INP_WUNLOCK(inp); + error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); +out: + return (error); +} + +SYSCTL_PROC(_net_inet6_sctp6, OID_AUTO, getcred, CTLTYPE_OPAQUE | CTLFLAG_RW, + 0, 0, + sctp6_getcred, "S,ucred", "Get the ucred of a SCTP6 connection"); + + +/* This is the same as the sctp_abort() could be made common */ +static void +sctp6_abort(struct socket *so) +{ + struct sctp_inpcb *inp; + uint32_t flags; + + inp = (struct sctp_inpcb *)so->so_pcb; + if (inp == 0) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); + return; + } +sctp_must_try_again: + flags = inp->sctp_flags; +#ifdef SCTP_LOG_CLOSING + sctp_log_closing(inp, NULL, 17); +#endif + if (((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) && + (atomic_cmpset_int(&inp->sctp_flags, flags, (flags | SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_CLOSE_IP)))) { +#ifdef SCTP_LOG_CLOSING + sctp_log_closing(inp, NULL, 16); +#endif + sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT, + SCTP_CALLED_AFTER_CMPSET_OFCLOSE); + SOCK_LOCK(so); + SCTP_SB_CLEAR(so->so_snd); + /* + * same for the rcv ones, they are only here for the + * accounting/select. + */ + SCTP_SB_CLEAR(so->so_rcv); + /* Now null out the reference, we are completely detached. */ + so->so_pcb = NULL; + SOCK_UNLOCK(so); + } else { + flags = inp->sctp_flags; + if ((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) { + goto sctp_must_try_again; + } + } + return; +} + +static int +sctp6_attach(struct socket *so, int proto, struct thread *p) +{ + struct in6pcb *inp6; + int error; + struct sctp_inpcb *inp; + uint32_t vrf_id = SCTP_DEFAULT_VRFID; + + inp = (struct sctp_inpcb *)so->so_pcb; + if (inp != NULL) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); + return EINVAL; + } + if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { + error = SCTP_SORESERVE(so, SCTP_BASE_SYSCTL(sctp_sendspace), SCTP_BASE_SYSCTL(sctp_recvspace)); + if (error) + return error; + } + error = sctp_inpcb_alloc(so, vrf_id); + if (error) + return error; + inp = (struct sctp_inpcb *)so->so_pcb; + SCTP_INP_WLOCK(inp); + inp->sctp_flags |= SCTP_PCB_FLAGS_BOUND_V6; /* I'm v6! */ + inp6 = (struct in6pcb *)inp; + + inp6->inp_vflag |= INP_IPV6; + inp6->in6p_hops = -1; /* use kernel default */ + inp6->in6p_cksum = -1; /* just to be sure */ +#ifdef INET + /* + * XXX: ugly!! IPv4 TTL initialization is necessary for an IPv6 + * socket as well, because the socket may be bound to an IPv6 + * wildcard address, which may match an IPv4-mapped IPv6 address. + */ + inp6->inp_ip_ttl = MODULE_GLOBAL(ip_defttl); +#endif + /* + * Hmm what about the IPSEC stuff that is missing here but in + * sctp_attach()? + */ + SCTP_INP_WUNLOCK(inp); + return 0; +} + +static int +sctp6_bind(struct socket *so, struct sockaddr *addr, struct thread *p) +{ + struct sctp_inpcb *inp; + struct in6pcb *inp6; + int error; + + inp = (struct sctp_inpcb *)so->so_pcb; + if (inp == 0) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); + return EINVAL; + } + if (addr) { + if ((addr->sa_family == AF_INET6) && + (addr->sa_len != sizeof(struct sockaddr_in6))) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); + return EINVAL; + } + if ((addr->sa_family == AF_INET) && + (addr->sa_len != sizeof(struct sockaddr_in))) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); + return EINVAL; + } + } + inp6 = (struct in6pcb *)inp; + inp6->inp_vflag &= ~INP_IPV4; + inp6->inp_vflag |= INP_IPV6; + if ((addr != NULL) && (SCTP_IPV6_V6ONLY(inp6) == 0)) { + if (addr->sa_family == AF_INET) { + /* binding v4 addr to v6 socket, so reset flags */ + inp6->inp_vflag |= INP_IPV4; + inp6->inp_vflag &= ~INP_IPV6; + } else { + struct sockaddr_in6 *sin6_p; + + sin6_p = (struct sockaddr_in6 *)addr; + + if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr)) { + inp6->inp_vflag |= INP_IPV4; + } else if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) { + struct sockaddr_in sin; + + in6_sin6_2_sin(&sin, sin6_p); + inp6->inp_vflag |= INP_IPV4; + inp6->inp_vflag &= ~INP_IPV6; + error = sctp_inpcb_bind(so, (struct sockaddr *)&sin, NULL, p); + return error; + } + } + } else if (addr != NULL) { + /* IPV6_V6ONLY socket */ + if (addr->sa_family == AF_INET) { + /* can't bind v4 addr to v6 only socket! */ + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); + return EINVAL; + } else { + struct sockaddr_in6 *sin6_p; + + sin6_p = (struct sockaddr_in6 *)addr; + + if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) { + /* can't bind v4-mapped addrs either! */ + /* NOTE: we don't support SIIT */ + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); + return EINVAL; + } + } + } + error = sctp_inpcb_bind(so, addr, NULL, p); + return error; +} + + +static void +sctp6_close(struct socket *so) +{ + sctp_close(so); +} + +/* This could be made common with sctp_detach() since they are identical */ + +static +int +sctp6_disconnect(struct socket *so) +{ + return (sctp_disconnect(so)); +} + + +int +sctp_sendm(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, + struct mbuf *control, struct thread *p); + + +static int +sctp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, + struct mbuf *control, struct thread *p) +{ + struct sctp_inpcb *inp; + struct in6pcb *inp6; + +#ifdef INET + struct sockaddr_in6 *sin6; + +#endif /* INET */ + /* No SPL needed since sctp_output does this */ + + inp = (struct sctp_inpcb *)so->so_pcb; + if (inp == NULL) { + if (control) { + SCTP_RELEASE_PKT(control); + control = NULL; + } + SCTP_RELEASE_PKT(m); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); + return EINVAL; + } + inp6 = (struct in6pcb *)inp; + /* + * For the TCP model we may get a NULL addr, if we are a connected + * socket thats ok. + */ + if ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) && + (addr == NULL)) { + goto connected_type; + } + if (addr == NULL) { + SCTP_RELEASE_PKT(m); + if (control) { + SCTP_RELEASE_PKT(control); + control = NULL; + } + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EDESTADDRREQ); + return (EDESTADDRREQ); + } +#ifdef INET + sin6 = (struct sockaddr_in6 *)addr; + if (SCTP_IPV6_V6ONLY(inp6)) { + /* + * if IPV6_V6ONLY flag, we discard datagrams destined to a + * v4 addr or v4-mapped addr + */ + if (addr->sa_family == AF_INET) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); + return EINVAL; + } + if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); + return EINVAL; + } + } + if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { + if (!MODULE_GLOBAL(ip6_v6only)) { + struct sockaddr_in sin; + + /* convert v4-mapped into v4 addr and send */ + in6_sin6_2_sin(&sin, sin6); + return sctp_sendm(so, flags, m, (struct sockaddr *)&sin, + control, p); + } else { + /* mapped addresses aren't enabled */ + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); + return EINVAL; + } + } +#endif /* INET */ +connected_type: + /* now what about control */ + if (control) { + if (inp->control) { + SCTP_PRINTF("huh? control set?\n"); + SCTP_RELEASE_PKT(inp->control); + inp->control = NULL; + } + inp->control = control; + } + /* Place the data */ + if (inp->pkt) { + SCTP_BUF_NEXT(inp->pkt_last) = m; + inp->pkt_last = m; + } else { + inp->pkt_last = inp->pkt = m; + } + if ( + /* FreeBSD and MacOSX uses a flag passed */ + ((flags & PRUS_MORETOCOME) == 0) + ) { + /* + * note with the current version this code will only be used + * by OpenBSD, NetBSD and FreeBSD have methods for + * re-defining sosend() to use sctp_sosend(). One can + * optionaly switch back to this code (by changing back the + * defininitions but this is not advisable. + */ + int ret; + + ret = sctp_output(inp, inp->pkt, addr, inp->control, p, flags); + inp->pkt = NULL; + inp->control = NULL; + return (ret); + } else { + return (0); + } +} + +static int +sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p) +{ + uint32_t vrf_id; + int error = 0; + struct sctp_inpcb *inp; + struct in6pcb *inp6; + struct sctp_tcb *stcb; + +#ifdef INET + struct sockaddr_in6 *sin6; + struct sockaddr_storage ss; + +#endif /* INET */ + + inp6 = (struct in6pcb *)so->so_pcb; + inp = (struct sctp_inpcb *)so->so_pcb; + if (inp == 0) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ECONNRESET); + return (ECONNRESET); /* I made the same as TCP since we are + * not setup? */ + } + if (addr == NULL) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); + return (EINVAL); + } + if ((addr->sa_family == AF_INET6) && (addr->sa_len != sizeof(struct sockaddr_in6))) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); + return (EINVAL); + } + if ((addr->sa_family == AF_INET) && (addr->sa_len != sizeof(struct sockaddr_in))) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); + return (EINVAL); + } + vrf_id = inp->def_vrf_id; + SCTP_ASOC_CREATE_LOCK(inp); + SCTP_INP_RLOCK(inp); + if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) == + SCTP_PCB_FLAGS_UNBOUND) { + /* Bind a ephemeral port */ + SCTP_INP_RUNLOCK(inp); + error = sctp6_bind(so, NULL, p); + if (error) { + SCTP_ASOC_CREATE_UNLOCK(inp); + + return (error); + } + SCTP_INP_RLOCK(inp); + } + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) && + (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) { + /* We are already connected AND the TCP model */ + SCTP_INP_RUNLOCK(inp); + SCTP_ASOC_CREATE_UNLOCK(inp); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EADDRINUSE); + return (EADDRINUSE); + } +#ifdef INET + sin6 = (struct sockaddr_in6 *)addr; + if (SCTP_IPV6_V6ONLY(inp6)) { + /* + * if IPV6_V6ONLY flag, ignore connections destined to a v4 + * addr or v4-mapped addr + */ + if (addr->sa_family == AF_INET) { + SCTP_INP_RUNLOCK(inp); + SCTP_ASOC_CREATE_UNLOCK(inp); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); + return EINVAL; + } + if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { + SCTP_INP_RUNLOCK(inp); + SCTP_ASOC_CREATE_UNLOCK(inp); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); + return EINVAL; + } + } + if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { + if (!MODULE_GLOBAL(ip6_v6only)) { + /* convert v4-mapped into v4 addr */ + in6_sin6_2_sin((struct sockaddr_in *)&ss, sin6); + addr = (struct sockaddr *)&ss; + } else { + /* mapped addresses aren't enabled */ + SCTP_INP_RUNLOCK(inp); + SCTP_ASOC_CREATE_UNLOCK(inp); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); + return EINVAL; + } + } else +#endif /* INET */ + addr = addr; /* for true v6 address case */ + + /* Now do we connect? */ + if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) { + stcb = LIST_FIRST(&inp->sctp_asoc_list); + if (stcb) { + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_INP_RUNLOCK(inp); + SCTP_INP_WLOCK(inp); + SCTP_INP_INCR_REF(inp); + SCTP_INP_WUNLOCK(inp); + stcb = sctp_findassociation_ep_addr(&inp, addr, NULL, NULL, NULL); + if (stcb == NULL) { + SCTP_INP_WLOCK(inp); + SCTP_INP_DECR_REF(inp); + SCTP_INP_WUNLOCK(inp); + } + } + + if (stcb != NULL) { + /* Already have or am bring up an association */ + SCTP_ASOC_CREATE_UNLOCK(inp); + SCTP_TCB_UNLOCK(stcb); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EALREADY); + return (EALREADY); + } + /* We are GOOD to go */ + stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id, p); + SCTP_ASOC_CREATE_UNLOCK(inp); + if (stcb == NULL) { + /* Gak! no memory */ + return (error); + } + if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) { + stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED; + /* Set the connected flag so we can queue data */ + soisconnecting(so); + } + stcb->asoc.state = SCTP_STATE_COOKIE_WAIT; + (void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered); + + /* initialize authentication parameters for the assoc */ + sctp_initialize_auth_params(inp, stcb); + + sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED); + SCTP_TCB_UNLOCK(stcb); + return error; +} + +static int +sctp6_getaddr(struct socket *so, struct sockaddr **addr) +{ + struct sockaddr_in6 *sin6; + struct sctp_inpcb *inp; + uint32_t vrf_id; + struct sctp_ifa *sctp_ifa; + + int error; + + /* + * Do the malloc first in case it blocks. + */ + SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof *sin6); + if (sin6 == NULL) + return ENOMEM; + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + + inp = (struct sctp_inpcb *)so->so_pcb; + if (inp == NULL) { + SCTP_FREE_SONAME(sin6); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ECONNRESET); + return ECONNRESET; + } + SCTP_INP_RLOCK(inp); + sin6->sin6_port = inp->sctp_lport; + if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { + /* For the bound all case you get back 0 */ + if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) { + struct sctp_tcb *stcb; + struct sockaddr_in6 *sin_a6; + struct sctp_nets *net; + int fnd; + + stcb = LIST_FIRST(&inp->sctp_asoc_list); + if (stcb == NULL) { + goto notConn6; + } + fnd = 0; + sin_a6 = NULL; + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + sin_a6 = (struct sockaddr_in6 *)&net->ro._l_addr; + if (sin_a6 == NULL) + /* this will make coverity happy */ + continue; + + if (sin_a6->sin6_family == AF_INET6) { + fnd = 1; + break; + } + } + if ((!fnd) || (sin_a6 == NULL)) { + /* punt */ + goto notConn6; + } + vrf_id = inp->def_vrf_id; + sctp_ifa = sctp_source_address_selection(inp, stcb, (sctp_route_t *) & net->ro, net, 0, vrf_id); + if (sctp_ifa) { + sin6->sin6_addr = sctp_ifa->address.sin6.sin6_addr; + } + } else { + /* For the bound all case you get back 0 */ + notConn6: + memset(&sin6->sin6_addr, 0, sizeof(sin6->sin6_addr)); + } + } else { + /* Take the first IPv6 address in the list */ + struct sctp_laddr *laddr; + int fnd = 0; + + LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { + if (laddr->ifa->address.sa.sa_family == AF_INET6) { + struct sockaddr_in6 *sin_a; + + sin_a = (struct sockaddr_in6 *)&laddr->ifa->address.sin6; + sin6->sin6_addr = sin_a->sin6_addr; + fnd = 1; + break; + } + } + if (!fnd) { + SCTP_FREE_SONAME(sin6); + SCTP_INP_RUNLOCK(inp); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT); + return ENOENT; + } + } + SCTP_INP_RUNLOCK(inp); + /* Scoping things for v6 */ + if ((error = sa6_recoverscope(sin6)) != 0) { + SCTP_FREE_SONAME(sin6); + return (error); + } + (*addr) = (struct sockaddr *)sin6; + return (0); +} + +static int +sctp6_peeraddr(struct socket *so, struct sockaddr **addr) +{ + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)*addr; + int fnd; + struct sockaddr_in6 *sin_a6; + struct sctp_inpcb *inp; + struct sctp_tcb *stcb; + struct sctp_nets *net; + + int error; + + /* + * Do the malloc first in case it blocks. + */ + inp = (struct sctp_inpcb *)so->so_pcb; + if ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0) { + /* UDP type and listeners will drop out here */ + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOTCONN); + return (ENOTCONN); + } + SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof *sin6); + if (sin6 == NULL) + return (ENOMEM); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + + /* We must recapture incase we blocked */ + inp = (struct sctp_inpcb *)so->so_pcb; + if (inp == NULL) { + SCTP_FREE_SONAME(sin6); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ECONNRESET); + return ECONNRESET; + } + SCTP_INP_RLOCK(inp); + stcb = LIST_FIRST(&inp->sctp_asoc_list); + if (stcb) { + SCTP_TCB_LOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); + if (stcb == NULL) { + SCTP_FREE_SONAME(sin6); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ECONNRESET); + return ECONNRESET; + } + fnd = 0; + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + sin_a6 = (struct sockaddr_in6 *)&net->ro._l_addr; + if (sin_a6->sin6_family == AF_INET6) { + fnd = 1; + sin6->sin6_port = stcb->rport; + sin6->sin6_addr = sin_a6->sin6_addr; + break; + } + } + SCTP_TCB_UNLOCK(stcb); + if (!fnd) { + /* No IPv4 address */ + SCTP_FREE_SONAME(sin6); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT); + return ENOENT; + } + if ((error = sa6_recoverscope(sin6)) != 0) + return (error); + *addr = (struct sockaddr *)sin6; + return (0); +} + +static int +sctp6_in6getaddr(struct socket *so, struct sockaddr **nam) +{ + struct sockaddr *addr; + struct in6pcb *inp6 = sotoin6pcb(so); + int error; + + if (inp6 == NULL) { + SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); + return EINVAL; + } + /* allow v6 addresses precedence */ + error = sctp6_getaddr(so, nam); + if (error) { + /* try v4 next if v6 failed */ + error = sctp_ingetaddr(so, nam); + if (error) { + return (error); + } + addr = *nam; + /* if I'm V6ONLY, convert it to v4-mapped */ + if (SCTP_IPV6_V6ONLY(inp6)) { + struct sockaddr_in6 sin6; + + in6_sin_2_v4mapsin6((struct sockaddr_in *)addr, &sin6); + memcpy(addr, &sin6, sizeof(struct sockaddr_in6)); + + } + } + return (error); +} + + +static int +sctp6_getpeeraddr(struct socket *so, struct sockaddr **nam) +{ + struct sockaddr *addr = *nam; + struct in6pcb *inp6 = sotoin6pcb(so); + int error; + + if (inp6 == NULL) { + SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); + return EINVAL; + } + /* allow v6 addresses precedence */ + error = sctp6_peeraddr(so, nam); + if (error) { + /* try v4 next if v6 failed */ + error = sctp_peeraddr(so, nam); + if (error) { + return (error); + } + /* if I'm V6ONLY, convert it to v4-mapped */ + if (SCTP_IPV6_V6ONLY(inp6)) { + struct sockaddr_in6 sin6; + + in6_sin_2_v4mapsin6((struct sockaddr_in *)addr, &sin6); + memcpy(addr, &sin6, sizeof(struct sockaddr_in6)); + } + } + return error; +} + +struct pr_usrreqs sctp6_usrreqs = { + .pru_abort = sctp6_abort, + .pru_accept = sctp_accept, + .pru_attach = sctp6_attach, + .pru_bind = sctp6_bind, + .pru_connect = sctp6_connect, + .pru_control = in6_control, + .pru_close = sctp6_close, + .pru_detach = sctp6_close, + .pru_sopoll = sopoll_generic, + .pru_flush = sctp_flush, + .pru_disconnect = sctp6_disconnect, + .pru_listen = sctp_listen, + .pru_peeraddr = sctp6_getpeeraddr, + .pru_send = sctp6_send, + .pru_shutdown = sctp_shutdown, + .pru_sockaddr = sctp6_in6getaddr, + .pru_sosend = sctp_sosend, + .pru_soreceive = sctp_soreceive +}; diff --git a/freebsd/sys/netinet6/sctp6_var.h b/freebsd/sys/netinet6/sctp6_var.h new file mode 100644 index 00000000..b692fdbd --- /dev/null +++ b/freebsd/sys/netinet6/sctp6_var.h @@ -0,0 +1,61 @@ +/*- + * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * a) Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * b) Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * c) Neither the name of Cisco Systems, Inc. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +/* $KAME: sctp6_var.h,v 1.7 2004/08/17 04:06:22 itojun Exp $ */ + +#ifndef _NETINET6_SCTP6_VAR_HH_ +#define _NETINET6_SCTP6_VAR_HH_ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +/* TODO __Userspace__ IPv6 stuff... */ +#if defined(_KERNEL) + +SYSCTL_DECL(_net_inet6_sctp6); +extern struct pr_usrreqs sctp6_usrreqs; + + +int sctp6_input __P((struct mbuf **, int *, int)); +int sctp6_output +__P((struct sctp_inpcb *, struct mbuf *, struct sockaddr *, + struct mbuf *, struct proc *)); + void sctp6_ctlinput __P((int, struct sockaddr *, void *)); + + + extern void sctp6_notify(struct sctp_inpcb *inp, + struct icmp6_hdr *icmph, + struct sctphdr *sh, + struct sockaddr *to, + struct sctp_tcb *stcb, + struct sctp_nets *net); + + +#endif /* _KERNEL */ +#endif diff --git a/freebsd/sys/netinet6/tcp6_var.h b/freebsd/sys/netinet6/tcp6_var.h new file mode 100644 index 00000000..e0373b3d --- /dev/null +++ b/freebsd/sys/netinet6/tcp6_var.h @@ -0,0 +1,83 @@ +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1982, 1986, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_var.h 8.4 (Berkeley) 5/24/95 + * $FreeBSD$ + */ + +#ifndef _NETINET_TCP6_VAR_HH_ +#define _NETINET_TCP6_VAR_HH_ + +#ifdef _KERNEL +#ifdef SYSCTL_DECL +SYSCTL_DECL(_net_inet6_tcp6); + +VNET_DECLARE(int, tcp_v6mssdflt); /* XXX */ +#define V_tcp_v6mssdflt VNET(tcp_v6mssdflt) +#endif + +struct ip6_hdr; +void tcp6_ctlinput __P((int, struct sockaddr *, void *)); +void tcp6_init __P((void)); +int tcp6_input __P((struct mbuf **, int *, int)); +struct rtentry *tcp_rtlookup6(struct in_conninfo *); + +extern struct pr_usrreqs tcp6_usrreqs; + +#endif /* _KERNEL */ + +#endif /* _NETINET_TCP6_VAR_HH_ */ diff --git a/freebsd/sys/netinet6/udp6_usrreq.c b/freebsd/sys/netinet6/udp6_usrreq.c new file mode 100644 index 00000000..735c795b --- /dev/null +++ b/freebsd/sys/netinet6/udp6_usrreq.c @@ -0,0 +1,1112 @@ +#include <freebsd/machine/rtems-bsd-config.h> + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: udp6_usrreq.c,v 1.27 2001/05/21 05:45:10 jinmei Exp $ + * $KAME: udp6_output.c,v 1.31 2001/05/21 16:39:15 jinmei Exp $ + */ + +/*- + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 + * The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 + */ + +#include <freebsd/sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <freebsd/local/opt_inet.h> +#include <freebsd/local/opt_inet6.h> +#include <freebsd/local/opt_ipsec.h> + +#include <freebsd/sys/param.h> +#include <freebsd/sys/jail.h> +#include <freebsd/sys/kernel.h> +#include <freebsd/sys/lock.h> +#include <freebsd/sys/mbuf.h> +#include <freebsd/sys/priv.h> +#include <freebsd/sys/proc.h> +#include <freebsd/sys/protosw.h> +#include <freebsd/sys/signalvar.h> +#include <freebsd/sys/socket.h> +#include <freebsd/sys/socketvar.h> +#include <freebsd/sys/sx.h> +#include <freebsd/sys/sysctl.h> +#include <freebsd/sys/syslog.h> +#include <freebsd/sys/systm.h> + +#include <freebsd/net/if.h> +#include <freebsd/net/if_types.h> +#include <freebsd/net/route.h> + +#include <freebsd/netinet/in.h> +#include <freebsd/netinet/in_pcb.h> +#include <freebsd/netinet/in_systm.h> +#include <freebsd/netinet/in_var.h> +#include <freebsd/netinet/ip.h> +#include <freebsd/netinet/ip_icmp.h> +#include <freebsd/netinet/ip6.h> +#include <freebsd/netinet/icmp_var.h> +#include <freebsd/netinet/icmp6.h> +#include <freebsd/netinet/ip_var.h> +#include <freebsd/netinet/udp.h> +#include <freebsd/netinet/udp_var.h> + +#include <freebsd/netinet6/ip6protosw.h> +#include <freebsd/netinet6/ip6_var.h> +#include <freebsd/netinet6/in6_pcb.h> +#include <freebsd/netinet6/udp6_var.h> +#include <freebsd/netinet6/scope6_var.h> + +#ifdef IPSEC +#include <freebsd/netipsec/ipsec.h> +#include <freebsd/netipsec/ipsec6.h> +#endif /* IPSEC */ + +#include <freebsd/security/mac/mac_framework.h> + +/* + * UDP protocol implementation. + * Per RFC 768, August, 1980. + */ + +extern struct protosw inetsw[]; +static void udp6_detach(struct socket *so); + +static void +udp6_append(struct inpcb *inp, struct mbuf *n, int off, + struct sockaddr_in6 *fromsa) +{ + struct socket *so; + struct mbuf *opts; + + INP_LOCK_ASSERT(inp); + +#ifdef IPSEC + /* Check AH/ESP integrity. */ + if (ipsec6_in_reject(n, inp)) { + m_freem(n); + V_ipsec6stat.in_polvio++; + return; + } +#endif /* IPSEC */ +#ifdef MAC + if (mac_inpcb_check_deliver(inp, n) != 0) { + m_freem(n); + return; + } +#endif + opts = NULL; + if (inp->inp_flags & INP_CONTROLOPTS || + inp->inp_socket->so_options & SO_TIMESTAMP) + ip6_savecontrol(inp, n, &opts); + m_adj(n, off + sizeof(struct udphdr)); + + so = inp->inp_socket; + SOCKBUF_LOCK(&so->so_rcv); + if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)fromsa, n, + opts) == 0) { + SOCKBUF_UNLOCK(&so->so_rcv); + m_freem(n); + if (opts) + m_freem(opts); + UDPSTAT_INC(udps_fullsock); + } else + sorwakeup_locked(so); +} + +int +udp6_input(struct mbuf **mp, int *offp, int proto) +{ + struct mbuf *m = *mp; + struct ifnet *ifp; + struct ip6_hdr *ip6; + struct udphdr *uh; + struct inpcb *inp; + struct udpcb *up; + int off = *offp; + int plen, ulen; + struct sockaddr_in6 fromsa; + + ifp = m->m_pkthdr.rcvif; + ip6 = mtod(m, struct ip6_hdr *); + + if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) { + /* XXX send icmp6 host/port unreach? */ + m_freem(m); + return (IPPROTO_DONE); + } + +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), IPPROTO_DONE); + ip6 = mtod(m, struct ip6_hdr *); + uh = (struct udphdr *)((caddr_t)ip6 + off); +#else + IP6_EXTHDR_GET(uh, struct udphdr *, m, off, sizeof(*uh)); + if (!uh) + return (IPPROTO_DONE); +#endif + + UDPSTAT_INC(udps_ipackets); + + /* + * Destination port of 0 is illegal, based on RFC768. + */ + if (uh->uh_dport == 0) + goto badunlocked; + + plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6); + ulen = ntohs((u_short)uh->uh_ulen); + + if (plen != ulen) { + UDPSTAT_INC(udps_badlen); + goto badunlocked; + } + + /* + * Checksum extended UDP header and data. + */ + if (uh->uh_sum == 0) { + UDPSTAT_INC(udps_nosum); + goto badunlocked; + } + if (in6_cksum(m, IPPROTO_UDP, off, ulen) != 0) { + UDPSTAT_INC(udps_badsum); + goto badunlocked; + } + + /* + * Construct sockaddr format source address. + */ + init_sin6(&fromsa, m); + fromsa.sin6_port = uh->uh_sport; + + INP_INFO_RLOCK(&V_udbinfo); + if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + struct inpcb *last; + struct ip6_moptions *imo; + + /* + * In the event that laddr should be set to the link-local + * address (this happens in RIPng), the multicast address + * specified in the received packet will not match laddr. To + * handle this situation, matching is relaxed if the + * receiving interface is the same as one specified in the + * socket and if the destination multicast address matches + * one of the multicast groups specified in the socket. + */ + + /* + * KAME note: traditionally we dropped udpiphdr from mbuf + * here. We need udphdr for IPsec processing so we do that + * later. + */ + last = NULL; + LIST_FOREACH(inp, &V_udb, inp_list) { + if ((inp->inp_vflag & INP_IPV6) == 0) + continue; + if (inp->inp_lport != uh->uh_dport) + continue; + if (inp->inp_fport != 0 && + inp->inp_fport != uh->uh_sport) + continue; + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { + if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, + &ip6->ip6_dst)) + continue; + } + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { + if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, + &ip6->ip6_src) || + inp->inp_fport != uh->uh_sport) + continue; + } + + /* + * Handle socket delivery policy for any-source + * and source-specific multicast. [RFC3678] + */ + imo = inp->in6p_moptions; + if (imo && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + struct sockaddr_in6 mcaddr; + int blocked; + + INP_RLOCK(inp); + + bzero(&mcaddr, sizeof(struct sockaddr_in6)); + mcaddr.sin6_len = sizeof(struct sockaddr_in6); + mcaddr.sin6_family = AF_INET6; + mcaddr.sin6_addr = ip6->ip6_dst; + + blocked = im6o_mc_filter(imo, ifp, + (struct sockaddr *)&mcaddr, + (struct sockaddr *)&fromsa); + if (blocked != MCAST_PASS) { + if (blocked == MCAST_NOTGMEMBER) + IP6STAT_INC(ip6s_notmember); + if (blocked == MCAST_NOTSMEMBER || + blocked == MCAST_MUTED) + UDPSTAT_INC(udps_filtermcast); + INP_RUNLOCK(inp); /* XXX */ + continue; + } + + INP_RUNLOCK(inp); + } + if (last != NULL) { + struct mbuf *n; + + if ((n = m_copy(m, 0, M_COPYALL)) != NULL) { + INP_RLOCK(last); + up = intoudpcb(last); + if (up->u_tun_func == NULL) { + udp6_append(last, n, off, &fromsa); + } else { + /* + * Engage the tunneling + * protocol we will have to + * leave the info_lock up, + * since we are hunting + * through multiple UDP's. + * + */ + (*up->u_tun_func)(n, off, last); + } + INP_RUNLOCK(last); + } + } + last = inp; + /* + * Don't look for additional matches if this one does + * not have either the SO_REUSEPORT or SO_REUSEADDR + * socket options set. This heuristic avoids + * searching through all pcbs in the common case of a + * non-shared port. It assumes that an application + * will never clear these options after setting them. + */ + if ((last->inp_socket->so_options & + (SO_REUSEPORT|SO_REUSEADDR)) == 0) + break; + } + + if (last == NULL) { + /* + * No matching pcb found; discard datagram. (No need + * to send an ICMP Port Unreachable for a broadcast + * or multicast datgram.) + */ + UDPSTAT_INC(udps_noport); + UDPSTAT_INC(udps_noportmcast); + goto badheadlocked; + } + INP_RLOCK(last); + INP_INFO_RUNLOCK(&V_udbinfo); + up = intoudpcb(last); + if (up->u_tun_func == NULL) { + udp6_append(last, m, off, &fromsa); + } else { + /* + * Engage the tunneling protocol. + */ + (*up->u_tun_func)(m, off, last); + } + INP_RUNLOCK(last); + return (IPPROTO_DONE); + } + /* + * Locate pcb for datagram. + */ + inp = in6_pcblookup_hash(&V_udbinfo, &ip6->ip6_src, uh->uh_sport, + &ip6->ip6_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif); + if (inp == NULL) { + if (udp_log_in_vain) { + char ip6bufs[INET6_ADDRSTRLEN]; + char ip6bufd[INET6_ADDRSTRLEN]; + + log(LOG_INFO, + "Connection attempt to UDP [%s]:%d from [%s]:%d\n", + ip6_sprintf(ip6bufd, &ip6->ip6_dst), + ntohs(uh->uh_dport), + ip6_sprintf(ip6bufs, &ip6->ip6_src), + ntohs(uh->uh_sport)); + } + UDPSTAT_INC(udps_noport); + if (m->m_flags & M_MCAST) { + printf("UDP6: M_MCAST is set in a unicast packet.\n"); + UDPSTAT_INC(udps_noportmcast); + goto badheadlocked; + } + INP_INFO_RUNLOCK(&V_udbinfo); + if (V_udp_blackhole) + goto badunlocked; + if (badport_bandlim(BANDLIM_ICMP6_UNREACH) < 0) + goto badunlocked; + icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0); + return (IPPROTO_DONE); + } + INP_RLOCK(inp); + INP_INFO_RUNLOCK(&V_udbinfo); + up = intoudpcb(inp); + if (up->u_tun_func == NULL) { + udp6_append(inp, m, off, &fromsa); + } else { + /* + * Engage the tunneling protocol. + */ + + (*up->u_tun_func)(m, off, inp); + } + INP_RUNLOCK(inp); + return (IPPROTO_DONE); + +badheadlocked: + INP_INFO_RUNLOCK(&V_udbinfo); +badunlocked: + if (m) + m_freem(m); + return (IPPROTO_DONE); +} + +void +udp6_ctlinput(int cmd, struct sockaddr *sa, void *d) +{ + struct udphdr uh; + struct ip6_hdr *ip6; + struct mbuf *m; + int off = 0; + struct ip6ctlparam *ip6cp = NULL; + const struct sockaddr_in6 *sa6_src = NULL; + void *cmdarg; + struct inpcb *(*notify)(struct inpcb *, int) = udp_notify; + struct udp_portonly { + u_int16_t uh_sport; + u_int16_t uh_dport; + } *uhp; + + if (sa->sa_family != AF_INET6 || + sa->sa_len != sizeof(struct sockaddr_in6)) + return; + + if ((unsigned)cmd >= PRC_NCMDS) + return; + if (PRC_IS_REDIRECT(cmd)) + notify = in6_rtchange, d = NULL; + else if (cmd == PRC_HOSTDEAD) + d = NULL; + else if (inet6ctlerrmap[cmd] == 0) + return; + + /* if the parameter is from icmp6, decode it. */ + if (d != NULL) { + ip6cp = (struct ip6ctlparam *)d; + m = ip6cp->ip6c_m; + ip6 = ip6cp->ip6c_ip6; + off = ip6cp->ip6c_off; + cmdarg = ip6cp->ip6c_cmdarg; + sa6_src = ip6cp->ip6c_src; + } else { + m = NULL; + ip6 = NULL; + cmdarg = NULL; + sa6_src = &sa6_any; + } + + if (ip6) { + /* + * XXX: We assume that when IPV6 is non NULL, + * M and OFF are valid. + */ + + /* Check if we can safely examine src and dst ports. */ + if (m->m_pkthdr.len < off + sizeof(*uhp)) + return; + + bzero(&uh, sizeof(uh)); + m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh); + + (void) in6_pcbnotify(&V_udbinfo, sa, uh.uh_dport, + (struct sockaddr *)ip6cp->ip6c_src, uh.uh_sport, cmd, + cmdarg, notify); + } else + (void) in6_pcbnotify(&V_udbinfo, sa, 0, + (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify); +} + +static int +udp6_getcred(SYSCTL_HANDLER_ARGS) +{ + struct xucred xuc; + struct sockaddr_in6 addrs[2]; + struct inpcb *inp; + int error; + + error = priv_check(req->td, PRIV_NETINET_GETCRED); + if (error) + return (error); + + if (req->newlen != sizeof(addrs)) + return (EINVAL); + if (req->oldlen != sizeof(struct xucred)) + return (EINVAL); + error = SYSCTL_IN(req, addrs, sizeof(addrs)); + if (error) + return (error); + if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 || + (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) { + return (error); + } + INP_INFO_RLOCK(&V_udbinfo); + inp = in6_pcblookup_hash(&V_udbinfo, &addrs[1].sin6_addr, + addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port, 1, + NULL); + if (inp != NULL) { + INP_RLOCK(inp); + INP_INFO_RUNLOCK(&V_udbinfo); + if (inp->inp_socket == NULL) + error = ENOENT; + if (error == 0) + error = cr_canseesocket(req->td->td_ucred, + inp->inp_socket); + if (error == 0) + cru2x(inp->inp_cred, &xuc); + INP_RUNLOCK(inp); + } else { + INP_INFO_RUNLOCK(&V_udbinfo); + error = ENOENT; + } + if (error == 0) + error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); + return (error); +} + +SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0, + 0, udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection"); + +static int +udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, + struct mbuf *control, struct thread *td) +{ + u_int32_t ulen = m->m_pkthdr.len; + u_int32_t plen = sizeof(struct udphdr) + ulen; + struct ip6_hdr *ip6; + struct udphdr *udp6; + struct in6_addr *laddr, *faddr, in6a; + struct sockaddr_in6 *sin6 = NULL; + struct ifnet *oifp = NULL; + int scope_ambiguous = 0; + u_short fport; + int error = 0; + struct ip6_pktopts *optp, opt; + int af = AF_INET6, hlen = sizeof(struct ip6_hdr); + int flags; + struct sockaddr_in6 tmp; + + INP_WLOCK_ASSERT(inp); + + if (addr6) { + /* addr6 has been validated in udp6_send(). */ + sin6 = (struct sockaddr_in6 *)addr6; + + /* protect *sin6 from overwrites */ + tmp = *sin6; + sin6 = &tmp; + + /* + * Application should provide a proper zone ID or the use of + * default zone IDs should be enabled. Unfortunately, some + * applications do not behave as it should, so we need a + * workaround. Even if an appropriate ID is not determined, + * we'll see if we can determine the outgoing interface. If we + * can, determine the zone ID based on the interface below. + */ + if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone) + scope_ambiguous = 1; + if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) + return (error); + } + + if (control) { + if ((error = ip6_setpktopts(control, &opt, + inp->in6p_outputopts, td->td_ucred, IPPROTO_UDP)) != 0) + goto release; + optp = &opt; + } else + optp = inp->in6p_outputopts; + + if (sin6) { + faddr = &sin6->sin6_addr; + + /* + * IPv4 version of udp_output calls in_pcbconnect in this case, + * which needs splnet and affects performance. + * Since we saw no essential reason for calling in_pcbconnect, + * we get rid of such kind of logic, and call in6_selectsrc + * and in6_pcbsetport in order to fill in the local address + * and the local port. + */ + if (sin6->sin6_port == 0) { + error = EADDRNOTAVAIL; + goto release; + } + + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { + /* how about ::ffff:0.0.0.0 case? */ + error = EISCONN; + goto release; + } + + fport = sin6->sin6_port; /* allow 0 port */ + + if (IN6_IS_ADDR_V4MAPPED(faddr)) { + if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) { + /* + * I believe we should explicitly discard the + * packet when mapped addresses are disabled, + * rather than send the packet as an IPv6 one. + * If we chose the latter approach, the packet + * might be sent out on the wire based on the + * default route, the situation which we'd + * probably want to avoid. + * (20010421 jinmei@kame.net) + */ + error = EINVAL; + goto release; + } + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && + !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) { + /* + * when remote addr is an IPv4-mapped address, + * local addr should not be an IPv6 address, + * since you cannot determine how to map IPv6 + * source address to IPv4. + */ + error = EINVAL; + goto release; + } + + af = AF_INET; + } + + if (!IN6_IS_ADDR_V4MAPPED(faddr)) { + error = in6_selectsrc(sin6, optp, inp, NULL, + td->td_ucred, &oifp, &in6a); + if (error) + goto release; + if (oifp && scope_ambiguous && + (error = in6_setscope(&sin6->sin6_addr, + oifp, NULL))) { + goto release; + } + laddr = &in6a; + } else + laddr = &inp->in6p_laddr; /* XXX */ + if (laddr == NULL) { + if (error == 0) + error = EADDRNOTAVAIL; + goto release; + } + if (inp->inp_lport == 0 && + (error = in6_pcbsetport(laddr, inp, td->td_ucred)) != 0) + goto release; + } else { + if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { + error = ENOTCONN; + goto release; + } + if (IN6_IS_ADDR_V4MAPPED(&inp->in6p_faddr)) { + if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) { + /* + * XXX: this case would happen when the + * application sets the V6ONLY flag after + * connecting the foreign address. + * Such applications should be fixed, + * so we bark here. + */ + log(LOG_INFO, "udp6_output: IPV6_V6ONLY " + "option was set for a connected socket\n"); + error = EINVAL; + goto release; + } else + af = AF_INET; + } + laddr = &inp->in6p_laddr; + faddr = &inp->in6p_faddr; + fport = inp->inp_fport; + } + + if (af == AF_INET) + hlen = sizeof(struct ip); + + /* + * Calculate data length and get a mbuf + * for UDP and IP6 headers. + */ + M_PREPEND(m, hlen + sizeof(struct udphdr), M_DONTWAIT); + if (m == 0) { + error = ENOBUFS; + goto release; + } + + /* + * Stuff checksum and output datagram. + */ + udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen); + udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */ + udp6->uh_dport = fport; + if (plen <= 0xffff) + udp6->uh_ulen = htons((u_short)plen); + else + udp6->uh_ulen = 0; + udp6->uh_sum = 0; + + switch (af) { + case AF_INET6: + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_flow = inp->inp_flow & IPV6_FLOWINFO_MASK; + ip6->ip6_vfc &= ~IPV6_VERSION_MASK; + ip6->ip6_vfc |= IPV6_VERSION; +#if 0 /* ip6_plen will be filled in ip6_output. */ + ip6->ip6_plen = htons((u_short)plen); +#endif + ip6->ip6_nxt = IPPROTO_UDP; + ip6->ip6_hlim = in6_selecthlim(inp, NULL); + ip6->ip6_src = *laddr; + ip6->ip6_dst = *faddr; + + if ((udp6->uh_sum = in6_cksum(m, IPPROTO_UDP, + sizeof(struct ip6_hdr), plen)) == 0) { + udp6->uh_sum = 0xffff; + } + + flags = 0; + + UDPSTAT_INC(udps_opackets); + error = ip6_output(m, optp, NULL, flags, inp->in6p_moptions, + NULL, inp); + break; + case AF_INET: + error = EAFNOSUPPORT; + goto release; + } + goto releaseopt; + +release: + m_freem(m); + +releaseopt: + if (control) { + ip6_clearpktopts(&opt, -1); + m_freem(control); + } + return (error); +} + +static void +udp6_abort(struct socket *so) +{ + struct inpcb *inp; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("udp6_abort: inp == NULL")); + +#ifdef INET + if (inp->inp_vflag & INP_IPV4) { + struct pr_usrreqs *pru; + + pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs; + (*pru->pru_abort)(so); + return; + } +#endif + + INP_INFO_WLOCK(&V_udbinfo); + INP_WLOCK(inp); + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { + in6_pcbdisconnect(inp); + inp->in6p_laddr = in6addr_any; + soisdisconnected(so); + } + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_udbinfo); +} + +static int +udp6_attach(struct socket *so, int proto, struct thread *td) +{ + struct inpcb *inp; + int error; + + inp = sotoinpcb(so); + KASSERT(inp == NULL, ("udp6_attach: inp != NULL")); + + if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { + error = soreserve(so, udp_sendspace, udp_recvspace); + if (error) + return (error); + } + INP_INFO_WLOCK(&V_udbinfo); + error = in_pcballoc(so, &V_udbinfo); + if (error) { + INP_INFO_WUNLOCK(&V_udbinfo); + return (error); + } + inp = (struct inpcb *)so->so_pcb; + inp->inp_vflag |= INP_IPV6; + if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) + inp->inp_vflag |= INP_IPV4; + inp->in6p_hops = -1; /* use kernel default */ + inp->in6p_cksum = -1; /* just to be sure */ + /* + * XXX: ugly!! + * IPv4 TTL initialization is necessary for an IPv6 socket as well, + * because the socket may be bound to an IPv6 wildcard address, + * which may match an IPv4-mapped IPv6 address. + */ + inp->inp_ip_ttl = V_ip_defttl; + + error = udp_newudpcb(inp); + if (error) { + in_pcbdetach(inp); + in_pcbfree(inp); + INP_INFO_WUNLOCK(&V_udbinfo); + return (error); + } + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_udbinfo); + return (0); +} + +static int +udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td) +{ + struct inpcb *inp; + int error; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("udp6_bind: inp == NULL")); + + INP_INFO_WLOCK(&V_udbinfo); + INP_WLOCK(inp); + inp->inp_vflag &= ~INP_IPV4; + inp->inp_vflag |= INP_IPV6; + if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { + struct sockaddr_in6 *sin6_p; + + sin6_p = (struct sockaddr_in6 *)nam; + + if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr)) + inp->inp_vflag |= INP_IPV4; + else if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) { + struct sockaddr_in sin; + + in6_sin6_2_sin(&sin, sin6_p); + inp->inp_vflag |= INP_IPV4; + inp->inp_vflag &= ~INP_IPV6; + error = in_pcbbind(inp, (struct sockaddr *)&sin, + td->td_ucred); + goto out; + } + } + + error = in6_pcbbind(inp, nam, td->td_ucred); +out: + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_udbinfo); + return (error); +} + +static void +udp6_close(struct socket *so) +{ + struct inpcb *inp; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("udp6_close: inp == NULL")); + +#ifdef INET + if (inp->inp_vflag & INP_IPV4) { + struct pr_usrreqs *pru; + + pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs; + (*pru->pru_disconnect)(so); + return; + } +#endif + INP_INFO_WLOCK(&V_udbinfo); + INP_WLOCK(inp); + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { + in6_pcbdisconnect(inp); + inp->in6p_laddr = in6addr_any; + soisdisconnected(so); + } + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_udbinfo); +} + +static int +udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) +{ + struct inpcb *inp; + struct sockaddr_in6 *sin6; + int error; + + inp = sotoinpcb(so); + sin6 = (struct sockaddr_in6 *)nam; + KASSERT(inp != NULL, ("udp6_connect: inp == NULL")); + + INP_INFO_WLOCK(&V_udbinfo); + INP_WLOCK(inp); + if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && + IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { + struct sockaddr_in sin; + + if (inp->inp_faddr.s_addr != INADDR_ANY) { + error = EISCONN; + goto out; + } + in6_sin6_2_sin(&sin, sin6); + error = prison_remote_ip4(td->td_ucred, &sin.sin_addr); + if (error != 0) + goto out; + error = in_pcbconnect(inp, (struct sockaddr *)&sin, + td->td_ucred); + if (error == 0) { + inp->inp_vflag |= INP_IPV4; + inp->inp_vflag &= ~INP_IPV6; + soisconnected(so); + } + goto out; + } + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { + error = EISCONN; + goto out; + } + error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr); + if (error != 0) + goto out; + error = in6_pcbconnect(inp, nam, td->td_ucred); + if (error == 0) { + if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { + /* should be non mapped addr */ + inp->inp_vflag &= ~INP_IPV4; + inp->inp_vflag |= INP_IPV6; + } + soisconnected(so); + } +out: + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_udbinfo); + return (error); +} + +static void +udp6_detach(struct socket *so) +{ + struct inpcb *inp; + struct udpcb *up; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("udp6_detach: inp == NULL")); + + INP_INFO_WLOCK(&V_udbinfo); + INP_WLOCK(inp); + up = intoudpcb(inp); + KASSERT(up != NULL, ("%s: up == NULL", __func__)); + in_pcbdetach(inp); + in_pcbfree(inp); + INP_INFO_WUNLOCK(&V_udbinfo); + udp_discardcb(up); +} + +static int +udp6_disconnect(struct socket *so) +{ + struct inpcb *inp; + int error; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL")); + + INP_INFO_WLOCK(&V_udbinfo); + INP_WLOCK(inp); + +#ifdef INET + if (inp->inp_vflag & INP_IPV4) { + struct pr_usrreqs *pru; + + pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs; + error = (*pru->pru_disconnect)(so); + goto out; + } +#endif + + if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { + error = ENOTCONN; + goto out; + } + + in6_pcbdisconnect(inp); + inp->in6p_laddr = in6addr_any; + SOCK_LOCK(so); + so->so_state &= ~SS_ISCONNECTED; /* XXX */ + SOCK_UNLOCK(so); +out: + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_udbinfo); + return (0); +} + +static int +udp6_send(struct socket *so, int flags, struct mbuf *m, + struct sockaddr *addr, struct mbuf *control, struct thread *td) +{ + struct inpcb *inp; + int error = 0; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("udp6_send: inp == NULL")); + + INP_INFO_WLOCK(&V_udbinfo); + INP_WLOCK(inp); + if (addr) { + if (addr->sa_len != sizeof(struct sockaddr_in6)) { + error = EINVAL; + goto bad; + } + if (addr->sa_family != AF_INET6) { + error = EAFNOSUPPORT; + goto bad; + } + } + +#ifdef INET + if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { + int hasv4addr; + struct sockaddr_in6 *sin6 = 0; + + if (addr == 0) + hasv4addr = (inp->inp_vflag & INP_IPV4); + else { + sin6 = (struct sockaddr_in6 *)addr; + hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) + ? 1 : 0; + } + if (hasv4addr) { + struct pr_usrreqs *pru; + + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && + !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) { + /* + * When remote addr is IPv4-mapped address, + * local addr should not be an IPv6 address; + * since you cannot determine how to map IPv6 + * source address to IPv4. + */ + error = EINVAL; + goto out; + } + + /* + * XXXRW: We release UDP-layer locks before calling + * udp_send() in order to avoid recursion. However, + * this does mean there is a short window where inp's + * fields are unstable. Could this lead to a + * potential race in which the factors causing us to + * select the UDPv4 output routine are invalidated? + */ + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_udbinfo); + if (sin6) + in6_sin6_2_sin_in_sock(addr); + pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs; + /* addr will just be freed in sendit(). */ + return ((*pru->pru_send)(so, flags, m, addr, control, + td)); + } + } +#endif +#ifdef MAC + mac_inpcb_create_mbuf(inp, m); +#endif + error = udp6_output(inp, m, addr, control, td); +out: + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_udbinfo); + return (error); + +bad: + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_udbinfo); + m_freem(m); + return (error); +} + +struct pr_usrreqs udp6_usrreqs = { + .pru_abort = udp6_abort, + .pru_attach = udp6_attach, + .pru_bind = udp6_bind, + .pru_connect = udp6_connect, + .pru_control = in6_control, + .pru_detach = udp6_detach, + .pru_disconnect = udp6_disconnect, + .pru_peeraddr = in6_mapped_peeraddr, + .pru_send = udp6_send, + .pru_shutdown = udp_shutdown, + .pru_sockaddr = in6_mapped_sockaddr, + .pru_soreceive = soreceive_dgram, + .pru_sosend = sosend_dgram, + .pru_sosetlabel = in_pcbsosetlabel, + .pru_close = udp6_close +}; diff --git a/freebsd/sys/netinet6/udp6_var.h b/freebsd/sys/netinet6/udp6_var.h new file mode 100644 index 00000000..f52503cd --- /dev/null +++ b/freebsd/sys/netinet6/udp6_var.h @@ -0,0 +1,75 @@ +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)udp_var.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD$ + */ + +#ifndef _NETINET6_UDP6_VAR_HH_ +#define _NETINET6_UDP6_VAR_HH_ + +#ifdef _KERNEL +SYSCTL_DECL(_net_inet6_udp6); + +extern struct pr_usrreqs udp6_usrreqs; + +void udp6_ctlinput(int, struct sockaddr *, void *); +int udp6_input(struct mbuf **, int *, int); +#endif + +#endif /*_NETINET6_UDP6_VAR_HH_*/ |