diff options
author | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2016-10-07 15:10:20 +0200 |
---|---|---|
committer | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2017-01-10 09:53:31 +0100 |
commit | c40e45b75eb76d79a05c7fa85c1fa9b5c728a12f (patch) | |
tree | ad4f2519067709f00ab98b3c591186c26dc3a21f /freebsd/sys/netinet6/ip6_input.c | |
parent | userspace-header-gen.py: Simplify program ports (diff) | |
download | rtems-libbsd-c40e45b75eb76d79a05c7fa85c1fa9b5c728a12f.tar.bz2 |
Update to FreeBSD head 2016-08-23
Git mirror commit 9fe7c416e6abb28b1398fd3e5687099846800cfd.
Diffstat (limited to 'freebsd/sys/netinet6/ip6_input.c')
-rw-r--r-- | freebsd/sys/netinet6/ip6_input.c | 712 |
1 files changed, 297 insertions, 415 deletions
diff --git a/freebsd/sys/netinet6/ip6_input.c b/freebsd/sys/netinet6/ip6_input.c index 10536316..c7ffe759 100644 --- a/freebsd/sys/netinet6/ip6_input.c +++ b/freebsd/sys/netinet6/ip6_input.c @@ -67,33 +67,41 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/local/opt_inet.h> #include <rtems/bsd/local/opt_inet6.h> -#include <rtems/bsd/local/opt_ipfw.h> #include <rtems/bsd/local/opt_ipsec.h> #include <rtems/bsd/local/opt_route.h> +#include <rtems/bsd/local/opt_rss.h> #include <rtems/bsd/sys/param.h> #include <sys/systm.h> +#include <sys/hhook.h> #include <sys/malloc.h> #include <sys/mbuf.h> #include <sys/proc.h> #include <sys/domain.h> #include <sys/protosw.h> +#include <sys/sdt.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <rtems/bsd/sys/errno.h> #include <sys/time.h> #include <sys/kernel.h> +#include <rtems/bsd/sys/lock.h> +#include <sys/rmlock.h> #include <sys/syslog.h> +#include <sys/sysctl.h> #include <net/if.h> +#include <net/if_var.h> #include <net/if_types.h> #include <net/if_dl.h> #include <net/route.h> #include <net/netisr.h> +#include <net/rss_config.h> #include <net/pfil.h> #include <net/vnet.h> #include <netinet/in.h> +#include <netinet/in_kdtrace.h> #include <netinet/ip_var.h> #include <netinet/in_systm.h> #include <net/if_llatbl.h> @@ -108,7 +116,9 @@ __FBSDID("$FreeBSD$"); #include <netinet/icmp6.h> #include <netinet6/scope6_var.h> #include <netinet6/in6_ifattach.h> +#include <netinet6/mld6_var.h> #include <netinet6/nd6.h> +#include <netinet6/in6_rss.h> #ifdef IPSEC #include <netipsec/ipsec.h> @@ -118,39 +128,84 @@ __FBSDID("$FreeBSD$"); #include <netinet6/ip6protosw.h> -#ifdef FLOWTABLE -#include <net/flowtable.h> -VNET_DECLARE(int, ip6_output_flowtable_size); -#define V_ip6_output_flowtable_size VNET(ip6_output_flowtable_size) -#endif - extern struct domain inet6domain; u_char ip6_protox[IPPROTO_MAX]; VNET_DEFINE(struct in6_ifaddrhead, in6_ifaddrhead); +VNET_DEFINE(struct in6_ifaddrlisthead *, in6_ifaddrhashtbl); +VNET_DEFINE(u_long, in6_ifaddrhmask); static struct netisr_handler ip6_nh = { .nh_name = "ip6", .nh_handler = ip6_input, .nh_proto = NETISR_IPV6, +#ifdef RSS + .nh_m2cpuid = rss_soft_m2cpuid_v6, + .nh_policy = NETISR_POLICY_CPU, + .nh_dispatch = NETISR_DISPATCH_HYBRID, +#else .nh_policy = NETISR_POLICY_FLOW, +#endif }; -VNET_DECLARE(struct callout, in6_tmpaddrtimer_ch); -#define V_in6_tmpaddrtimer_ch VNET(in6_tmpaddrtimer_ch) +static int +sysctl_netinet6_intr_queue_maxlen(SYSCTL_HANDLER_ARGS) +{ + int error, qlimit; + + netisr_getqlimit(&ip6_nh, &qlimit); + error = sysctl_handle_int(oidp, &qlimit, 0, req); + if (error || !req->newptr) + return (error); + if (qlimit < 1) + return (EINVAL); + return (netisr_setqlimit(&ip6_nh, qlimit)); +} +SYSCTL_DECL(_net_inet6_ip6); +SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_INTRQMAXLEN, intr_queue_maxlen, + CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet6_intr_queue_maxlen, "I", + "Maximum size of the IPv6 input queue"); + +#ifdef RSS +static struct netisr_handler ip6_direct_nh = { + .nh_name = "ip6_direct", + .nh_handler = ip6_direct_input, + .nh_proto = NETISR_IPV6_DIRECT, + .nh_m2cpuid = rss_soft_m2cpuid_v6, + .nh_policy = NETISR_POLICY_CPU, + .nh_dispatch = NETISR_DISPATCH_HYBRID, +}; + +static int +sysctl_netinet6_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS) +{ + int error, qlimit; + + netisr_getqlimit(&ip6_direct_nh, &qlimit); + error = sysctl_handle_int(oidp, &qlimit, 0, req); + if (error || !req->newptr) + return (error); + if (qlimit < 1) + return (EINVAL); + return (netisr_setqlimit(&ip6_direct_nh, qlimit)); +} +SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_INTRDQMAXLEN, intr_direct_queue_maxlen, + CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet6_intr_direct_queue_maxlen, + "I", "Maximum size of the IPv6 direct input queue"); + +#endif VNET_DEFINE(struct pfil_head, inet6_pfil_hook); -VNET_DEFINE(struct ip6stat, ip6stat); +VNET_PCPUSTAT_DEFINE(struct ip6stat, ip6stat); +VNET_PCPUSTAT_SYSINIT(ip6stat); +#ifdef VIMAGE +VNET_PCPUSTAT_SYSUNINIT(ip6stat); +#endif /* VIMAGE */ -struct rwlock in6_ifaddr_lock; -RW_SYSINIT(in6_ifaddr_lock, &in6_ifaddr_lock, "in6_ifaddr_lock"); +struct rmlock in6_ifaddr_lock; +RM_SYSINIT(in6_ifaddr_lock, &in6_ifaddr_lock, "in6_ifaddr_lock"); -static void ip6_init2(void *); -static struct ip6aux *ip6_setdstifaddr(struct mbuf *, struct in6_ifaddr *); -static struct ip6aux *ip6_addaux(struct mbuf *); -static struct ip6aux *ip6_findaux(struct mbuf *m); -static void ip6_delaux (struct mbuf *); static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *); #ifdef PULLDOWN_TEST static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int); @@ -163,7 +218,7 @@ static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int); void ip6_init(void) { - struct ip6protosw *pr; + struct protosw *pr; int i; TUNABLE_INT_FETCH("net.inet6.ip6.auto_linklocal", @@ -172,6 +227,8 @@ ip6_init(void) TUNABLE_INT_FETCH("net.inet6.ip6.no_radr", &V_ip6_no_radr); TAILQ_INIT(&V_in6_ifaddrhead); + V_in6_ifaddrhashtbl = hashinit(IN6ADDR_NHASH, M_IFADDR, + &V_in6_ifaddrhmask); /* Initialize packet filter hooks. */ V_inet6_pfil_hook.ph_type = PFIL_TYPE_AF; @@ -180,40 +237,36 @@ ip6_init(void) printf("%s: WARNING: unable to register pfil hook, " "error %d\n", __func__, i); + if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET6, + &V_ipsec_hhh_in[HHOOK_IPSEC_INET6], + HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) + printf("%s: WARNING: unable to register input helper hook\n", + __func__); + if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET6, + &V_ipsec_hhh_out[HHOOK_IPSEC_INET6], + HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) + printf("%s: WARNING: unable to register output helper hook\n", + __func__); + scope6_init(); addrsel_policy_init(); nd6_init(); frag6_init(); -#ifdef FLOWTABLE - if (TUNABLE_INT_FETCH("net.inet6.ip6.output_flowtable_size", - &V_ip6_output_flowtable_size)) { - if (V_ip6_output_flowtable_size < 256) - V_ip6_output_flowtable_size = 256; - if (!powerof2(V_ip6_output_flowtable_size)) { - printf("flowtable must be power of 2 size\n"); - V_ip6_output_flowtable_size = 2048; - } - } else { - /* - * round up to the next power of 2 - */ - V_ip6_output_flowtable_size = 1 << fls((1024 + maxusers * 64)-1); - } - V_ip6_ft = flowtable_alloc("ipv6", V_ip6_output_flowtable_size, FL_IPV6|FL_PCPU); -#endif - V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR; /* Skip global initialization stuff for non-default instances. */ - if (!IS_DEFAULT_VNET(curvnet)) +#ifdef VIMAGE + if (!IS_DEFAULT_VNET(curvnet)) { + netisr_register_vnet(&ip6_nh); +#ifdef RSS + netisr_register_vnet(&ip6_direct_nh); +#endif return; - -#ifdef DIAGNOSTIC - if (sizeof(struct protosw) != sizeof(struct ip6protosw)) - panic("sizeof(protosw) != sizeof(ip6protosw)"); + } #endif - pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW); + + pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW); if (pr == NULL) panic("ip6_init"); @@ -224,8 +277,8 @@ ip6_init(void) * Cycle through IP protocols and put them into the appropriate place * in ip6_protox[]. */ - for (pr = (struct ip6protosw *)inet6domain.dom_protosw; - pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++) + for (pr = inet6domain.dom_protosw; + pr < inet6domain.dom_protoswNPROTOSW; pr++) if (pr->pr_domain->dom_family == PF_INET6 && pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { /* Be careful to only index valid IP protocols. */ @@ -234,6 +287,9 @@ ip6_init(void) } netisr_register(&ip6_nh); +#ifdef RSS + netisr_register(&ip6_direct_nh); +#endif } /* @@ -243,7 +299,7 @@ ip6_init(void) int ip6proto_register(short ip6proto) { - struct ip6protosw *pr; + struct protosw *pr; /* Sanity checks. */ if (ip6proto <= 0 || ip6proto >= IPPROTO_MAX) @@ -253,7 +309,7 @@ ip6proto_register(short ip6proto) * The protocol slot must not be occupied by another protocol * already. An index pointing to IPPROTO_RAW is unused. */ - pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW); + pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW); if (pr == NULL) return (EPFNOSUPPORT); if (ip6_protox[ip6proto] != pr - inet6sw) /* IPPROTO_RAW */ @@ -262,8 +318,8 @@ ip6proto_register(short ip6proto) /* * Find the protocol position in inet6sw[] and set the index. */ - for (pr = (struct ip6protosw *)inet6domain.dom_protosw; - pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++) { + for (pr = inet6domain.dom_protosw; + pr < inet6domain.dom_protoswNPROTOSW; pr++) { if (pr->pr_domain->dom_family == PF_INET6 && pr->pr_protocol && pr->pr_protocol == ip6proto) { ip6_protox[pr->pr_protocol] = pr - inet6sw; @@ -276,14 +332,14 @@ ip6proto_register(short ip6proto) int ip6proto_unregister(short ip6proto) { - struct ip6protosw *pr; + struct protosw *pr; /* Sanity checks. */ if (ip6proto <= 0 || ip6proto >= IPPROTO_MAX) return (EPROTONOSUPPORT); /* Check if the protocol was indeed registered. */ - pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW); + pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW); if (pr == NULL) return (EPFNOSUPPORT); if (ip6_protox[ip6proto] == pr - inet6sw) /* IPPROTO_RAW */ @@ -295,43 +351,61 @@ ip6proto_unregister(short ip6proto) } #ifdef VIMAGE -void -ip6_destroy() +static void +ip6_destroy(void *unused __unused) { + struct ifaddr *ifa, *nifa; + struct ifnet *ifp; + int error; - nd6_destroy(); - callout_drain(&V_in6_tmpaddrtimer_ch); -} +#ifdef RSS + netisr_unregister_vnet(&ip6_direct_nh); #endif + netisr_unregister_vnet(&ip6_nh); + + if ((error = pfil_head_unregister(&V_inet6_pfil_hook)) != 0) + printf("%s: WARNING: unable to unregister pfil hook, " + "error %d\n", __func__, error); + error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET6]); + if (error != 0) { + printf("%s: WARNING: unable to deregister input helper hook " + "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET6: " + "error %d returned\n", __func__, error); + } + error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET6]); + if (error != 0) { + printf("%s: WARNING: unable to deregister output helper hook " + "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET6: " + "error %d returned\n", __func__, error); + } -static int -ip6_init2_vnet(const void *unused __unused) -{ - - /* nd6_timer_init */ - callout_init(&V_nd6_timer_ch, 0); - callout_reset(&V_nd6_timer_ch, hz, nd6_timer, curvnet); - - /* timer for regeneranation of temporary addresses randomize ID */ - callout_init(&V_in6_tmpaddrtimer_ch, 0); - callout_reset(&V_in6_tmpaddrtimer_ch, - (V_ip6_temp_preferred_lifetime - V_ip6_desync_factor - - V_ip6_temp_regen_advance) * hz, - in6_tmpaddrtimer, curvnet); + /* Cleanup addresses. */ + IFNET_RLOCK(); + TAILQ_FOREACH(ifp, &V_ifnet, if_link) { + /* Cannot lock here - lock recursion. */ + /* IF_ADDR_LOCK(ifp); */ + TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) { - return (0); -} + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + in6_purgeaddr(ifa); + } + /* IF_ADDR_UNLOCK(ifp); */ + in6_ifdetach_destroy(ifp); + mld_domifdetach(ifp); + /* Make sure any routes are gone as well. */ + rt_flushifroutes_af(ifp, AF_INET6); + } + IFNET_RUNLOCK(); -static void -ip6_init2(void *dummy) -{ + nd6_destroy(); + in6_ifattach_destroy(); - ip6_init2_vnet(NULL); + hashdestroy(V_in6_ifaddrhashtbl, M_IFADDR, V_in6_ifaddrhmask); } -/* cheat */ -/* This must be after route_init(), which is now SI_ORDER_THIRD */ -SYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL); +VNET_SYSUNINIT(inet6, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip6_destroy, NULL); +#endif static int ip6_input_hbh(struct mbuf *m, uint32_t *plen, uint32_t *rtalert, int *off, @@ -410,22 +484,78 @@ out: return (1); } +#ifdef RSS +/* + * IPv6 direct input routine. + * + * This is called when reinjecting completed fragments where + * all of the previous checking and book-keeping has been done. + */ +void +ip6_direct_input(struct mbuf *m) +{ + int off, nxt; + int nest; + struct m_tag *mtag; + struct ip6_direct_ctx *ip6dc; + + mtag = m_tag_locate(m, MTAG_ABI_IPV6, IPV6_TAG_DIRECT, NULL); + KASSERT(mtag != NULL, ("Reinjected packet w/o direct ctx tag!")); + + ip6dc = (struct ip6_direct_ctx *)(mtag + 1); + nxt = ip6dc->ip6dc_nxt; + off = ip6dc->ip6dc_off; + + nest = 0; + + m_tag_delete(m, mtag); + + while (nxt != IPPROTO_DONE) { + if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) { + IP6STAT_INC(ip6s_toomanyhdr); + goto bad; + } + + /* + * protection against faulty packet - there should be + * more sanity checks in header chain processing. + */ + if (m->m_pkthdr.len < off) { + IP6STAT_INC(ip6s_tooshort); + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated); + goto bad; + } + +#ifdef IPSEC + /* + * enforce IPsec policy checking if we are seeing last header. + * note that we do not visit this with protocols with pcb layer + * code - like udp/tcp/raw ip. + */ + if (ip6_ipsec_input(m, nxt)) + goto bad; +#endif /* IPSEC */ + + nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt); + } + return; +bad: + m_freem(m); +} +#endif + void ip6_input(struct mbuf *m) { + struct in6_addr odst; struct ip6_hdr *ip6; - int off = sizeof(struct ip6_hdr), nest; + struct in6_ifaddr *ia; u_int32_t plen; u_int32_t rtalert = ~0; + int off = sizeof(struct ip6_hdr), nest; int nxt, ours = 0; - struct ifnet *deliverifp = NULL, *ifp = NULL; - struct in6_addr odst; - struct route_in6 rin6; int srcrt = 0; - struct llentry *lle = NULL; - struct sockaddr_in6 dst6, *dst; - bzero(&rin6, sizeof(struct route_in6)); #ifdef IPSEC /* * should the inner packet be considered authentic? @@ -438,18 +568,12 @@ ip6_input(struct mbuf *m) #endif /* IPSEC */ - /* - * make sure we don't have onion peering information into m_tag. - */ - ip6_delaux(m); - if (m->m_flags & M_FASTFWD_OURS) { /* * Firewall changed destination to local. */ m->m_flags &= ~M_FASTFWD_OURS; ours = 1; - deliverifp = m->m_pkthdr.rcvif; ip6 = mtod(m, struct ip6_hdr *); goto hbhcheck; } @@ -476,10 +600,8 @@ ip6_input(struct mbuf *m) } /* drop the packet if IPv6 operation is disabled on the IF */ - if ((ND_IFINFO(m->m_pkthdr.rcvif)->flags & ND6_IFF_IFDISABLED)) { - m_freem(m); - return; - } + if ((ND_IFINFO(m->m_pkthdr.rcvif)->flags & ND6_IFF_IFDISABLED)) + goto bad; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive); IP6STAT_INC(ip6s_total); @@ -493,21 +615,16 @@ ip6_input(struct mbuf *m) if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) { struct mbuf *n; - MGETHDR(n, M_DONTWAIT, MT_HEADER); - if (n) - M_MOVE_PKTHDR(n, m); - if (n && n->m_pkthdr.len > MHLEN) { - MCLGET(n, M_DONTWAIT); - if ((n->m_flags & M_EXT) == 0) { - m_freem(n); - n = NULL; - } - } + if (m->m_pkthdr.len > MHLEN) + n = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); + else + n = m_gethdr(M_NOWAIT, MT_DATA); if (n == NULL) { m_freem(m); return; /* ENOBUFS */ } + m_move_pkthdr(n, m); m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t)); n->m_len = n->m_pkthdr.len; m_freem(m); @@ -536,6 +653,8 @@ ip6_input(struct mbuf *m) IP6STAT_INC(ip6s_nxthist[ip6->ip6_nxt]); + IP_PROBE(receive, NULL, NULL, ip6, m->m_pkthdr.rcvif, NULL, ip6); + /* * Check against address spoofing/corruption. */ @@ -643,7 +762,6 @@ ip6_input(struct mbuf *m) if (m->m_flags & M_FASTFWD_OURS) { m->m_flags &= ~M_FASTFWD_OURS; ours = 1; - deliverifp = m->m_pkthdr.rcvif; goto hbhcheck; } if ((m->m_flags & M_IP6_NEXTHOP) && @@ -654,7 +772,7 @@ ip6_input(struct mbuf *m) * connected host. */ ip6_forward(m, 1); - goto out; + return; } passin: @@ -677,7 +795,6 @@ passin: IP6STAT_INC(ip6s_badscope); goto bad; } - /* * Multicast check. Assume packet is for us to avoid * prematurely taking locks. @@ -685,167 +802,16 @@ passin: if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { ours = 1; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast); - deliverifp = m->m_pkthdr.rcvif; goto hbhcheck; } - - /* - * Unicast check - */ - - bzero(&dst6, sizeof(dst6)); - dst6.sin6_family = AF_INET6; - dst6.sin6_len = sizeof(struct sockaddr_in6); - dst6.sin6_addr = ip6->ip6_dst; - ifp = m->m_pkthdr.rcvif; - IF_AFDATA_RLOCK(ifp); - lle = lla_lookup(LLTABLE6(ifp), 0, - (struct sockaddr *)&dst6); - IF_AFDATA_RUNLOCK(ifp); - if ((lle != NULL) && (lle->la_flags & LLE_IFADDR)) { - struct ifaddr *ifa; - struct in6_ifaddr *ia6; - int bad; - - bad = 1; -#define sa_equal(a1, a2) \ - (bcmp((a1), (a2), ((a1))->sin6_len) == 0) - IF_ADDR_RLOCK(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - if (ifa->ifa_addr->sa_family != dst6.sin6_family) - continue; - if (sa_equal(&dst6, ifa->ifa_addr)) - break; - } - KASSERT(ifa != NULL, ("%s: ifa not found for lle %p", - __func__, lle)); -#undef sa_equal - - ia6 = (struct in6_ifaddr *)ifa; - if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) { - /* Count the packet in the ip address stats */ - ia6->ia_ifa.if_ipackets++; - ia6->ia_ifa.if_ibytes += m->m_pkthdr.len; - - /* - * record address information into m_tag. - */ - (void)ip6_setdstifaddr(m, ia6); - - bad = 0; - } else { - char ip6bufs[INET6_ADDRSTRLEN]; - char ip6bufd[INET6_ADDRSTRLEN]; - /* address is not ready, so discard the packet. */ - nd6log((LOG_INFO, - "ip6_input: packet to an unready address %s->%s\n", - ip6_sprintf(ip6bufs, &ip6->ip6_src), - ip6_sprintf(ip6bufd, &ip6->ip6_dst))); - } - IF_ADDR_RUNLOCK(ifp); - LLE_RUNLOCK(lle); - if (bad) - goto bad; - else { - ours = 1; - deliverifp = ifp; - goto hbhcheck; - } - } - if (lle != NULL) - LLE_RUNLOCK(lle); - - dst = &rin6.ro_dst; - dst->sin6_len = sizeof(struct sockaddr_in6); - dst->sin6_family = AF_INET6; - dst->sin6_addr = ip6->ip6_dst; - rin6.ro_rt = in6_rtalloc1((struct sockaddr *)dst, 0, 0, M_GETFIB(m)); - if (rin6.ro_rt) - RT_UNLOCK(rin6.ro_rt); - -#define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key)) - /* - * Accept the packet if the forwarding interface to the destination - * according to the routing table is the loopback interface, - * unless the associated route has a gateway. - * Note that this approach causes to accept a packet if there is a - * route to the loopback interface for the destination of the packet. - * But we think it's even useful in some situations, e.g. when using - * a special daemon which wants to intercept the packet. - * - * XXX: some OSes automatically make a cloned route for the destination - * of an outgoing packet. If the outgoing interface of the packet - * is a loopback one, the kernel would consider the packet to be - * accepted, even if we have no such address assinged on the interface. - * We check the cloned flag of the route entry to reject such cases, - * assuming that route entries for our own addresses are not made by - * cloning (it should be true because in6_addloop explicitly installs - * the host route). However, we might have to do an explicit check - * while it would be less efficient. Or, should we rather install a - * reject route for such a case? + * Unicast check + * XXX: For now we keep link-local IPv6 addresses with embedded + * scope zone id, therefore we use zero zoneid here. */ - if (rin6.ro_rt && - (rin6.ro_rt->rt_flags & - (RTF_HOST|RTF_GATEWAY)) == RTF_HOST && -#ifdef RTF_WASCLONED - !(rin6.ro_rt->rt_flags & RTF_WASCLONED) && -#endif -#ifdef RTF_CLONED - !(rin6.ro_rt->rt_flags & RTF_CLONED) && -#endif -#if 0 - /* - * The check below is redundant since the comparison of - * the destination and the key of the rtentry has - * already done through looking up the routing table. - */ - IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, - &rt6_key(rin6.ro_rt)->sin6_addr) -#endif - rin6.ro_rt->rt_ifp->if_type == IFT_LOOP) { - int free_ia6 = 0; - struct in6_ifaddr *ia6; - - /* - * found the loopback route to the interface address - */ - if (rin6.ro_rt->rt_gateway->sa_family == AF_LINK) { - struct sockaddr_in6 dest6; - - bzero(&dest6, sizeof(dest6)); - dest6.sin6_family = AF_INET6; - dest6.sin6_len = sizeof(dest6); - dest6.sin6_addr = ip6->ip6_dst; - ia6 = (struct in6_ifaddr *) - ifa_ifwithaddr((struct sockaddr *)&dest6); - if (ia6 == NULL) - goto bad; - free_ia6 = 1; - } - else - ia6 = (struct in6_ifaddr *)rin6.ro_rt->rt_ifa; - - /* - * record address information into m_tag. - */ - (void)ip6_setdstifaddr(m, ia6); - - /* - * packets to a tentative, duplicated, or somehow invalid - * address must not be accepted. - */ - if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) { - /* this address is ready */ - ours = 1; - deliverifp = ia6->ia_ifp; /* correct? */ - /* Count the packet in the ip address stats */ - ia6->ia_ifa.if_ipackets++; - ia6->ia_ifa.if_ibytes += m->m_pkthdr.len; - if (ia6 != NULL && free_ia6 != 0) - ifa_free(&ia6->ia_ifa); - goto hbhcheck; - } else { + ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); + if (ia != NULL) { + if (ia->ia6_flags & IN6_IFF_NOTREADY) { char ip6bufs[INET6_ADDRSTRLEN]; char ip6bufd[INET6_ADDRSTRLEN]; /* address is not ready, so discard the packet. */ @@ -853,24 +819,15 @@ passin: "ip6_input: packet to an unready address %s->%s\n", ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst))); - - if (ia6 != NULL && free_ia6 != 0) - ifa_free(&ia6->ia_ifa); + ifa_free(&ia->ia_ifa); goto bad; } - } - - /* - * FAITH (Firewall Aided Internet Translator) - */ - if (V_ip6_keepfaith) { - if (rin6.ro_rt && rin6.ro_rt->rt_ifp && - rin6.ro_rt->rt_ifp->if_type == IFT_FAITH) { - /* XXX do we need more sanity checks? */ - ours = 1; - deliverifp = rin6.ro_rt->rt_ifp; /* faith */ - goto hbhcheck; - } + /* Count the packet in the ip address stats */ + counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); + counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len); + ifa_free(&ia->ia_ifa); + ours = 1; + goto hbhcheck; } /* @@ -885,47 +842,25 @@ passin: hbhcheck: /* - * record address information into m_tag, if we don't have one yet. - * note that we are unable to record it, if the address is not listed - * as our interface address (e.g. multicast addresses, addresses - * within FAITH prefixes and such). - */ - if (deliverifp) { - struct in6_ifaddr *ia6; - - if ((ia6 = ip6_getdstifaddr(m)) != NULL) { - ifa_free(&ia6->ia_ifa); - } else { - ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst); - if (ia6) { - if (!ip6_setdstifaddr(m, ia6)) { - /* - * XXX maybe we should drop the packet here, - * as we could not provide enough information - * to the upper layers. - */ - } - ifa_free(&ia6->ia_ifa); - } - } - } - - /* * Process Hop-by-Hop options header if it's contained. * m may be modified in ip6_hopopts_input(). * If a JumboPayload option is included, plen will also be modified. */ plen = (u_int32_t)ntohs(ip6->ip6_plen); if (ip6->ip6_nxt == IPPROTO_HOPOPTS) { - int error; - - error = ip6_input_hbh(m, &plen, &rtalert, &off, &nxt, &ours); - if (error != 0) - goto out; + if (ip6_input_hbh(m, &plen, &rtalert, &off, &nxt, &ours) != 0) + return; } else nxt = ip6->ip6_nxt; /* + * Use mbuf flags to propagate Router Alert option to + * ICMPv6 layer, as hop-by-hop options have been stripped. + */ + if (rtalert != ~0) + m->m_flags |= M_RTALERT_MLD; + + /* * Check that the amount of data in the buffers * is as at least much as the IPv6 header would have us expect. * Trim mbufs if longer than we expect. @@ -968,7 +903,7 @@ passin: } } else if (!ours) { ip6_forward(m, srcrt); - goto out; + return; } ip6 = mtod(m, struct ip6_hdr *); @@ -993,7 +928,7 @@ passin: * Tell launch routine the next header */ IP6STAT_INC(ip6s_delivered); - in6_ifstat_inc(deliverifp, ifs6_in_deliver); + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_deliver); nest = 0; while (nxt != IPPROTO_DONE) { @@ -1022,56 +957,11 @@ passin: goto bad; #endif /* IPSEC */ - /* - * Use mbuf flags to propagate Router Alert option to - * ICMPv6 layer, as hop-by-hop options have been stripped. - */ - if (nxt == IPPROTO_ICMPV6 && rtalert != ~0) - m->m_flags |= M_RTALERT_MLD; - nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt); } - goto out; + return; bad: m_freem(m); -out: - if (rin6.ro_rt) - RTFREE(rin6.ro_rt); -} - -/* - * set/grab in6_ifaddr correspond to IPv6 destination address. - * XXX backward compatibility wrapper - * - * XXXRW: We should bump the refcount on ia6 before sticking it in the m_tag, - * and then bump it when the tag is copied, and release it when the tag is - * freed. Unfortunately, m_tags don't support deep copies (yet), so instead - * we just bump the ia refcount when we receive it. This should be fixed. - */ -static struct ip6aux * -ip6_setdstifaddr(struct mbuf *m, struct in6_ifaddr *ia6) -{ - struct ip6aux *ip6a; - - ip6a = ip6_addaux(m); - if (ip6a) - ip6a->ip6a_dstia6 = ia6; - return ip6a; /* NULL if failed to set */ -} - -struct in6_ifaddr * -ip6_getdstifaddr(struct mbuf *m) -{ - struct ip6aux *ip6a; - struct in6_ifaddr *ia; - - ip6a = ip6_findaux(m); - if (ip6a) { - ia = ip6a->ip6a_dstia6; - ifa_ref(&ia->ia_ifa); - return ia; - } else - return NULL; } /* @@ -1601,6 +1491,44 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) loopend: ; } + + if (in6p->inp_flags2 & INP_RECVFLOWID) { + uint32_t flowid, flow_type; + + flowid = m->m_pkthdr.flowid; + flow_type = M_HASHTYPE_GET(m); + + /* + * XXX should handle the failure of one or the + * other - don't populate both? + */ + *mp = sbcreatecontrol((caddr_t) &flowid, + sizeof(uint32_t), IPV6_FLOWID, IPPROTO_IPV6); + if (*mp) + mp = &(*mp)->m_next; + *mp = sbcreatecontrol((caddr_t) &flow_type, + sizeof(uint32_t), IPV6_FLOWTYPE, IPPROTO_IPV6); + if (*mp) + mp = &(*mp)->m_next; + } + +#ifdef RSS + if (in6p->inp_flags2 & INP_RECVRSSBUCKETID) { + uint32_t flowid, flow_type; + uint32_t rss_bucketid; + + flowid = m->m_pkthdr.flowid; + flow_type = M_HASHTYPE_GET(m); + + if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) { + *mp = sbcreatecontrol((caddr_t) &rss_bucketid, + sizeof(uint32_t), IPV6_RSSBUCKETID, IPPROTO_IPV6); + if (*mp) + mp = &(*mp)->m_next; + } + } +#endif + } #undef IS2292 @@ -1674,22 +1602,12 @@ ip6_pullexthdr(struct mbuf *m, size_t off, int nxt) else elen = (ip6e.ip6e_len + 1) << 3; - MGET(n, M_DONTWAIT, MT_DATA); - if (n && elen >= MLEN) { - MCLGET(n, M_DONTWAIT); - if ((n->m_flags & M_EXT) == 0) { - m_free(n); - n = NULL; - } - } - if (!n) - return NULL; - - n->m_len = 0; - if (elen >= M_TRAILINGSPACE(n)) { - m_free(n); + if (elen > MLEN) + n = m_getcl(M_NOWAIT, MT_DATA, 0); + else + n = m_get(M_NOWAIT, MT_DATA); + if (n == NULL) return NULL; - } m_copydata(m, off, elen, mtod(n, caddr_t)); n->m_len = elen; @@ -1710,7 +1628,7 @@ ip6_pullexthdr(struct mbuf *m, size_t off, int nxt) * we develop `neater' mechanism to process extension headers. */ char * -ip6_get_prevhdr(struct mbuf *m, int off) +ip6_get_prevhdr(const struct mbuf *m, int off) { struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); @@ -1749,7 +1667,7 @@ ip6_get_prevhdr(struct mbuf *m, int off) * get next header offset. m will be retained. */ int -ip6_nexthdr(struct mbuf *m, int off, int proto, int *nxtp) +ip6_nexthdr(const struct mbuf *m, int off, int proto, int *nxtp) { struct ip6_hdr ip6; struct ip6_ext ip6e; @@ -1817,14 +1735,14 @@ ip6_nexthdr(struct mbuf *m, int off, int proto, int *nxtp) return -1; } - return -1; + /* NOTREACHED */ } /* * get offset for the last header in the chain. m will be kept untainted. */ int -ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp) +ip6_lasthdr(const struct mbuf *m, int off, int proto, int *nxtp) { int newoff; int nxt; @@ -1847,42 +1765,6 @@ ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp) } } -static struct ip6aux * -ip6_addaux(struct mbuf *m) -{ - struct m_tag *mtag; - - mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL); - if (!mtag) { - mtag = m_tag_get(PACKET_TAG_IPV6_INPUT, sizeof(struct ip6aux), - M_NOWAIT); - if (mtag) { - m_tag_prepend(m, mtag); - bzero(mtag + 1, sizeof(struct ip6aux)); - } - } - return mtag ? (struct ip6aux *)(mtag + 1) : NULL; -} - -static struct ip6aux * -ip6_findaux(struct mbuf *m) -{ - struct m_tag *mtag; - - mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL); - return mtag ? (struct ip6aux *)(mtag + 1) : NULL; -} - -static void -ip6_delaux(struct mbuf *m) -{ - struct m_tag *mtag; - - mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL); - if (mtag) - m_tag_delete(m, mtag); -} - /* * System control for IP6 */ |