summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/netinet
diff options
context:
space:
mode:
authorSebastian Huber <sebastian.huber@embedded-brains.de>2018-08-22 14:59:50 +0200
committerSebastian Huber <sebastian.huber@embedded-brains.de>2018-09-21 10:29:41 +0200
commit3489e3b6396ee9944a6a2e19e675ca54c36993b4 (patch)
treecd55cfac1c96ff4b888a9606fd6a0d8eb65bb446 /freebsd/sys/netinet
parentck: Define CK_MD_PPC32_LWSYNC if available (diff)
downloadrtems-libbsd-3489e3b6396ee9944a6a2e19e675ca54c36993b4.tar.bz2
Update to FreeBSD head 2018-09-17
Git mirror commit 6c2192b1ef8c50788c751f878552526800b1e319. Update #3472.
Diffstat (limited to 'freebsd/sys/netinet')
-rw-r--r--freebsd/sys/netinet/cc/cc_newreno.c8
-rw-r--r--freebsd/sys/netinet/if_ether.c12
-rw-r--r--freebsd/sys/netinet/igmp.c26
-rw-r--r--freebsd/sys/netinet/in.c9
-rw-r--r--freebsd/sys/netinet/in_fib.c4
-rw-r--r--freebsd/sys/netinet/in_gif.c311
-rw-r--r--freebsd/sys/netinet/in_gif.h45
-rw-r--r--freebsd/sys/netinet/in_kdtrace.h23
-rw-r--r--freebsd/sys/netinet/in_mcast.c39
-rw-r--r--freebsd/sys/netinet/in_pcb.c589
-rw-r--r--freebsd/sys/netinet/in_pcb.h167
-rw-r--r--freebsd/sys/netinet/ip.h5
-rw-r--r--freebsd/sys/netinet/ip6.h1
-rw-r--r--freebsd/sys/netinet/ip_carp.c46
-rw-r--r--freebsd/sys/netinet/ip_divert.c49
-rw-r--r--freebsd/sys/netinet/ip_encap.c470
-rw-r--r--freebsd/sys/netinet/ip_encap.h47
-rw-r--r--freebsd/sys/netinet/ip_fastfwd.c26
-rw-r--r--freebsd/sys/netinet/ip_fw.h2
-rw-r--r--freebsd/sys/netinet/ip_gre.c243
-rw-r--r--freebsd/sys/netinet/ip_icmp.c24
-rw-r--r--freebsd/sys/netinet/ip_id.c20
-rw-r--r--freebsd/sys/netinet/ip_input.c10
-rw-r--r--freebsd/sys/netinet/ip_mroute.c86
-rw-r--r--freebsd/sys/netinet/ip_options.c4
-rw-r--r--freebsd/sys/netinet/ip_output.c71
-rw-r--r--freebsd/sys/netinet/ip_reass.c203
-rw-r--r--freebsd/sys/netinet/libalias/alias.c12
-rw-r--r--freebsd/sys/netinet/libalias/alias_irc.c3
-rw-r--r--freebsd/sys/netinet/libalias/alias_mod.h8
-rw-r--r--freebsd/sys/netinet/pim_var.h2
-rw-r--r--freebsd/sys/netinet/raw_ip.c102
-rw-r--r--freebsd/sys/netinet/sctp.h2
-rw-r--r--freebsd/sys/netinet/sctp_asconf.c39
-rw-r--r--freebsd/sys/netinet/sctp_asconf.h4
-rw-r--r--freebsd/sys/netinet/sctp_auth.c8
-rw-r--r--freebsd/sys/netinet/sctp_auth.h40
-rw-r--r--freebsd/sys/netinet/sctp_bsd_addr.c2
-rw-r--r--freebsd/sys/netinet/sctp_cc_functions.c10
-rw-r--r--freebsd/sys/netinet/sctp_constants.h12
-rw-r--r--freebsd/sys/netinet/sctp_dtrace_define.h2
-rw-r--r--freebsd/sys/netinet/sctp_header.h26
-rw-r--r--freebsd/sys/netinet/sctp_indata.c66
-rw-r--r--freebsd/sys/netinet/sctp_indata.h3
-rw-r--r--freebsd/sys/netinet/sctp_input.c154
-rw-r--r--freebsd/sys/netinet/sctp_input.h2
-rw-r--r--freebsd/sys/netinet/sctp_os_bsd.h2
-rw-r--r--freebsd/sys/netinet/sctp_output.c145
-rw-r--r--freebsd/sys/netinet/sctp_output.h10
-rw-r--r--freebsd/sys/netinet/sctp_pcb.c67
-rw-r--r--freebsd/sys/netinet/sctp_pcb.h14
-rw-r--r--freebsd/sys/netinet/sctp_peeloff.c6
-rw-r--r--freebsd/sys/netinet/sctp_structs.h57
-rw-r--r--freebsd/sys/netinet/sctp_sysctl.c2
-rw-r--r--freebsd/sys/netinet/sctp_timer.c23
-rw-r--r--freebsd/sys/netinet/sctp_uio.h71
-rw-r--r--freebsd/sys/netinet/sctp_usrreq.c71
-rw-r--r--freebsd/sys/netinet/sctp_var.h4
-rw-r--r--freebsd/sys/netinet/sctputil.c112
-rw-r--r--freebsd/sys/netinet/sctputil.h31
-rw-r--r--freebsd/sys/netinet/tcp_hostcache.c4
-rw-r--r--freebsd/sys/netinet/tcp_hpts.h4
-rw-r--r--freebsd/sys/netinet/tcp_input.c167
-rw-r--r--freebsd/sys/netinet/tcp_log_buf.h6
-rw-r--r--freebsd/sys/netinet/tcp_output.c245
-rw-r--r--freebsd/sys/netinet/tcp_reass.c975
-rw-r--r--freebsd/sys/netinet/tcp_subr.c165
-rw-r--r--freebsd/sys/netinet/tcp_syncache.c92
-rw-r--r--freebsd/sys/netinet/tcp_syncache.h1
-rw-r--r--freebsd/sys/netinet/tcp_timer.c197
-rw-r--r--freebsd/sys/netinet/tcp_timer.h17
-rw-r--r--freebsd/sys/netinet/tcp_timewait.c116
-rw-r--r--freebsd/sys/netinet/tcp_usrreq.c238
-rw-r--r--freebsd/sys/netinet/tcp_var.h56
-rw-r--r--freebsd/sys/netinet/udp_usrreq.c105
-rw-r--r--freebsd/sys/netinet/udplite.h11
76 files changed, 4060 insertions, 1991 deletions
diff --git a/freebsd/sys/netinet/cc/cc_newreno.c b/freebsd/sys/netinet/cc/cc_newreno.c
index 4d5f8644..2450f08e 100644
--- a/freebsd/sys/netinet/cc/cc_newreno.c
+++ b/freebsd/sys/netinet/cc/cc_newreno.c
@@ -90,8 +90,8 @@ static void newreno_cong_signal(struct cc_var *ccv, uint32_t type);
static void newreno_post_recovery(struct cc_var *ccv);
static int newreno_ctl_output(struct cc_var *ccv, struct sockopt *sopt, void *buf);
-static VNET_DEFINE(uint32_t, newreno_beta) = 50;
-static VNET_DEFINE(uint32_t, newreno_beta_ecn) = 80;
+VNET_DEFINE_STATIC(uint32_t, newreno_beta) = 50;
+VNET_DEFINE_STATIC(uint32_t, newreno_beta_ecn) = 80;
#define V_newreno_beta VNET(newreno_beta)
#define V_newreno_beta_ecn VNET(newreno_beta_ecn)
@@ -129,9 +129,7 @@ newreno_malloc(struct cc_var *ccv)
static void
newreno_cb_destroy(struct cc_var *ccv)
{
-
- if (ccv->cc_data != NULL)
- free(ccv->cc_data, M_NEWRENO);
+ free(ccv->cc_data, M_NEWRENO);
}
static void
diff --git a/freebsd/sys/netinet/if_ether.c b/freebsd/sys/netinet/if_ether.c
index 0d608180..6ee6b71c 100644
--- a/freebsd/sys/netinet/if_ether.c
+++ b/freebsd/sys/netinet/if_ether.c
@@ -96,13 +96,13 @@ static SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, "");
static SYSCTL_NODE(_net_link_ether, PF_ARP, arp, CTLFLAG_RW, 0, "");
/* timer values */
-static VNET_DEFINE(int, arpt_keep) = (20*60); /* once resolved, good for 20
+VNET_DEFINE_STATIC(int, arpt_keep) = (20*60); /* once resolved, good for 20
* minutes */
-static VNET_DEFINE(int, arp_maxtries) = 5;
-static VNET_DEFINE(int, arp_proxyall) = 0;
-static VNET_DEFINE(int, arpt_down) = 20; /* keep incomplete entries for
+VNET_DEFINE_STATIC(int, arp_maxtries) = 5;
+VNET_DEFINE_STATIC(int, arp_proxyall) = 0;
+VNET_DEFINE_STATIC(int, arpt_down) = 20; /* keep incomplete entries for
* 20 seconds */
-static VNET_DEFINE(int, arpt_rexmit) = 1; /* retransmit arp entries, sec*/
+VNET_DEFINE_STATIC(int, arpt_rexmit) = 1; /* retransmit arp entries, sec*/
VNET_PCPUSTAT_DEFINE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */
VNET_PCPUSTAT_SYSINIT(arpstat);
@@ -110,7 +110,7 @@ VNET_PCPUSTAT_SYSINIT(arpstat);
VNET_PCPUSTAT_SYSUNINIT(arpstat);
#endif /* VIMAGE */
-static VNET_DEFINE(int, arp_maxhold) = 1;
+VNET_DEFINE_STATIC(int, arp_maxhold) = 1;
#define V_arpt_keep VNET(arpt_keep)
#define V_arpt_down VNET(arpt_down)
diff --git a/freebsd/sys/netinet/igmp.c b/freebsd/sys/netinet/igmp.c
index a4b99f62..970a01a0 100644
--- a/freebsd/sys/netinet/igmp.c
+++ b/freebsd/sys/netinet/igmp.c
@@ -219,11 +219,11 @@ static MALLOC_DEFINE(M_IGMP, "igmp", "igmp state");
* FUTURE: Stop using IFP_TO_IA/INADDR_ANY, and use source address selection
* policy to control the address used by IGMP on the link.
*/
-static VNET_DEFINE(int, interface_timers_running); /* IGMPv3 general
+VNET_DEFINE_STATIC(int, interface_timers_running); /* IGMPv3 general
* query response */
-static VNET_DEFINE(int, state_change_timers_running); /* IGMPv3 state-change
+VNET_DEFINE_STATIC(int, state_change_timers_running); /* IGMPv3 state-change
* retransmit */
-static VNET_DEFINE(int, current_state_timers_running); /* IGMPv1/v2 host
+VNET_DEFINE_STATIC(int, current_state_timers_running); /* IGMPv1/v2 host
* report; IGMPv3 g/sg
* query response */
@@ -231,25 +231,25 @@ static VNET_DEFINE(int, current_state_timers_running); /* IGMPv1/v2 host
#define V_state_change_timers_running VNET(state_change_timers_running)
#define V_current_state_timers_running VNET(current_state_timers_running)
-static VNET_DEFINE(LIST_HEAD(, igmp_ifsoftc), igi_head) =
+VNET_DEFINE_STATIC(LIST_HEAD(, igmp_ifsoftc), igi_head) =
LIST_HEAD_INITIALIZER(igi_head);
-static VNET_DEFINE(struct igmpstat, igmpstat) = {
+VNET_DEFINE_STATIC(struct igmpstat, igmpstat) = {
.igps_version = IGPS_VERSION_3,
.igps_len = sizeof(struct igmpstat),
};
-static VNET_DEFINE(struct timeval, igmp_gsrdelay) = {10, 0};
+VNET_DEFINE_STATIC(struct timeval, igmp_gsrdelay) = {10, 0};
#define V_igi_head VNET(igi_head)
#define V_igmpstat VNET(igmpstat)
#define V_igmp_gsrdelay VNET(igmp_gsrdelay)
-static VNET_DEFINE(int, igmp_recvifkludge) = 1;
-static VNET_DEFINE(int, igmp_sendra) = 1;
-static VNET_DEFINE(int, igmp_sendlocal) = 1;
-static VNET_DEFINE(int, igmp_v1enable) = 1;
-static VNET_DEFINE(int, igmp_v2enable) = 1;
-static VNET_DEFINE(int, igmp_legacysupp);
-static VNET_DEFINE(int, igmp_default_version) = IGMP_VERSION_3;
+VNET_DEFINE_STATIC(int, igmp_recvifkludge) = 1;
+VNET_DEFINE_STATIC(int, igmp_sendra) = 1;
+VNET_DEFINE_STATIC(int, igmp_sendlocal) = 1;
+VNET_DEFINE_STATIC(int, igmp_v1enable) = 1;
+VNET_DEFINE_STATIC(int, igmp_v2enable) = 1;
+VNET_DEFINE_STATIC(int, igmp_legacysupp);
+VNET_DEFINE_STATIC(int, igmp_default_version) = IGMP_VERSION_3;
#define V_igmp_recvifkludge VNET(igmp_recvifkludge)
#define V_igmp_sendra VNET(igmp_sendra)
diff --git a/freebsd/sys/netinet/in.c b/freebsd/sys/netinet/in.c
index 7233f9a2..78fd00c0 100644
--- a/freebsd/sys/netinet/in.c
+++ b/freebsd/sys/netinet/in.c
@@ -80,7 +80,7 @@ static int in_difaddr_ioctl(u_long, caddr_t, struct ifnet *, struct thread *);
static void in_socktrim(struct sockaddr_in *);
static void in_purgemaddrs(struct ifnet *);
-static VNET_DEFINE(int, nosameprefix);
+VNET_DEFINE_STATIC(int, nosameprefix);
#define V_nosameprefix VNET(nosameprefix)
SYSCTL_INT(_net_inet_ip, OID_AUTO, no_same_prefix, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(nosameprefix), 0,
@@ -624,8 +624,7 @@ in_difaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td)
in_ifadown(&ia->ia_ifa, 1);
if (ia->ia_ifa.ifa_carp)
- (*carp_detach_p)(&ia->ia_ifa,
- (cmd == SIOCDIFADDR) ? false : true);
+ (*carp_detach_p)(&ia->ia_ifa, cmd == SIOCAIFADDR);
/*
* If this is the last IPv4 address configured on this
@@ -1169,10 +1168,6 @@ in_lltable_free_entry(struct lltable *llt, struct llentry *lle)
lltable_unlink_entry(llt, lle);
}
- /* cancel timer */
- if (callout_stop(&lle->lle_timer) > 0)
- LLE_REMREF(lle);
-
/* Drop hold queue */
pkts_dropped = llentry_free(lle);
ARPSTAT_ADD(dropped, pkts_dropped);
diff --git a/freebsd/sys/netinet/in_fib.c b/freebsd/sys/netinet/in_fib.c
index f62bc4a1..f61909ea 100644
--- a/freebsd/sys/netinet/in_fib.c
+++ b/freebsd/sys/netinet/in_fib.c
@@ -39,7 +39,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
-#include <sys/rwlock.h>
+#include <sys/rmlock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
@@ -136,6 +136,7 @@ int
fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags,
uint32_t flowid, struct nhop4_basic *pnh4)
{
+ RIB_RLOCK_TRACKER;
struct rib_head *rh;
struct radix_node *rn;
struct sockaddr_in sin;
@@ -184,6 +185,7 @@ int
fib4_lookup_nh_ext(uint32_t fibnum, struct in_addr dst, uint32_t flags,
uint32_t flowid, struct nhop4_extended *pnh4)
{
+ RIB_RLOCK_TRACKER;
struct rib_head *rh;
struct radix_node *rn;
struct sockaddr_in sin;
diff --git a/freebsd/sys/netinet/in_gif.c b/freebsd/sys/netinet/in_gif.c
index d072161f..03aaaf08 100644
--- a/freebsd/sys/netinet/in_gif.c
+++ b/freebsd/sys/netinet/in_gif.c
@@ -4,6 +4,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -40,18 +41,18 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet6.h>
#include <sys/param.h>
-#include <sys/lock.h>
-#include <sys/rmlock.h>
#include <sys/systm.h>
+#include <sys/jail.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/mbuf.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
-#include <sys/protosw.h>
#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/route.h>
@@ -72,35 +73,161 @@ __FBSDID("$FreeBSD$");
#include <net/if_gif.h>
-static int in_gif_input(struct mbuf **, int *, int);
-
-extern struct domain inetdomain;
-static struct protosw in_gif_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = 0/* IPPROTO_IPV[46] */,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = in_gif_input,
- .pr_output = rip_output,
- .pr_ctloutput = rip_ctloutput,
- .pr_usrreqs = &rip_usrreqs
-};
-
#define GIF_TTL 30
-static VNET_DEFINE(int, ip_gif_ttl) = GIF_TTL;
+VNET_DEFINE_STATIC(int, ip_gif_ttl) = GIF_TTL;
#define V_ip_gif_ttl VNET(ip_gif_ttl)
SYSCTL_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_VNET | CTLFLAG_RW,
- &VNET_NAME(ip_gif_ttl), 0, "");
+ &VNET_NAME(ip_gif_ttl), 0, "Default TTL value for encapsulated packets");
+
+/*
+ * We keep interfaces in a hash table using src+dst as key.
+ * Interfaces with GIF_IGNORE_SOURCE flag are linked into plain list.
+ */
+VNET_DEFINE_STATIC(struct gif_list *, ipv4_hashtbl) = NULL;
+VNET_DEFINE_STATIC(struct gif_list, ipv4_list) = CK_LIST_HEAD_INITIALIZER();
+#define V_ipv4_hashtbl VNET(ipv4_hashtbl)
+#define V_ipv4_list VNET(ipv4_list)
+
+#define GIF_HASH(src, dst) (V_ipv4_hashtbl[\
+ in_gif_hashval((src), (dst)) & (GIF_HASH_SIZE - 1)])
+#define GIF_HASH_SC(sc) GIF_HASH((sc)->gif_iphdr->ip_src.s_addr,\
+ (sc)->gif_iphdr->ip_dst.s_addr)
+static uint32_t
+in_gif_hashval(in_addr_t src, in_addr_t dst)
+{
+ uint32_t ret;
+
+ ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT);
+ return (fnv_32_buf(&dst, sizeof(dst), ret));
+}
+
+static int
+in_gif_checkdup(const struct gif_softc *sc, in_addr_t src, in_addr_t dst)
+{
+ struct gif_softc *tmp;
+
+ if (sc->gif_family == AF_INET &&
+ sc->gif_iphdr->ip_src.s_addr == src &&
+ sc->gif_iphdr->ip_dst.s_addr == dst)
+ return (EEXIST);
+
+ CK_LIST_FOREACH(tmp, &GIF_HASH(src, dst), chain) {
+ if (tmp == sc)
+ continue;
+ if (tmp->gif_iphdr->ip_src.s_addr == src &&
+ tmp->gif_iphdr->ip_dst.s_addr == dst)
+ return (EADDRNOTAVAIL);
+ }
+ return (0);
+}
+
+static void
+in_gif_attach(struct gif_softc *sc)
+{
+
+ if (sc->gif_options & GIF_IGNORE_SOURCE)
+ CK_LIST_INSERT_HEAD(&V_ipv4_list, sc, chain);
+ else
+ CK_LIST_INSERT_HEAD(&GIF_HASH_SC(sc), sc, chain);
+}
+
+int
+in_gif_setopts(struct gif_softc *sc, u_int options)
+{
+
+ /* NOTE: we are protected with gif_ioctl_sx lock */
+ MPASS(sc->gif_family == AF_INET);
+ MPASS(sc->gif_options != options);
+
+ if ((options & GIF_IGNORE_SOURCE) !=
+ (sc->gif_options & GIF_IGNORE_SOURCE)) {
+ CK_LIST_REMOVE(sc, chain);
+ sc->gif_options = options;
+ in_gif_attach(sc);
+ }
+ return (0);
+}
+
+int
+in_gif_ioctl(struct gif_softc *sc, u_long cmd, caddr_t data)
+{
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct sockaddr_in *dst, *src;
+ struct ip *ip;
+ int error;
+
+ /* NOTE: we are protected with gif_ioctl_sx lock */
+ error = EINVAL;
+ switch (cmd) {
+ case SIOCSIFPHYADDR:
+ src = &((struct in_aliasreq *)data)->ifra_addr;
+ dst = &((struct in_aliasreq *)data)->ifra_dstaddr;
+
+ /* sanity checks */
+ if (src->sin_family != dst->sin_family ||
+ src->sin_family != AF_INET ||
+ src->sin_len != dst->sin_len ||
+ src->sin_len != sizeof(*src))
+ break;
+ if (src->sin_addr.s_addr == INADDR_ANY ||
+ dst->sin_addr.s_addr == INADDR_ANY) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ if (V_ipv4_hashtbl == NULL)
+ V_ipv4_hashtbl = gif_hashinit();
+ error = in_gif_checkdup(sc, src->sin_addr.s_addr,
+ dst->sin_addr.s_addr);
+ if (error == EADDRNOTAVAIL)
+ break;
+ if (error == EEXIST) {
+ /* Addresses are the same. Just return. */
+ error = 0;
+ break;
+ }
+ ip = malloc(sizeof(*ip), M_GIF, M_WAITOK | M_ZERO);
+ ip->ip_src.s_addr = src->sin_addr.s_addr;
+ ip->ip_dst.s_addr = dst->sin_addr.s_addr;
+ if (sc->gif_family != 0) {
+ /* Detach existing tunnel first */
+ CK_LIST_REMOVE(sc, chain);
+ GIF_WAIT();
+ free(sc->gif_hdr, M_GIF);
+ /* XXX: should we notify about link state change? */
+ }
+ sc->gif_family = AF_INET;
+ sc->gif_iphdr = ip;
+ in_gif_attach(sc);
+ break;
+ case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
+ if (sc->gif_family != AF_INET) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ src = (struct sockaddr_in *)&ifr->ifr_addr;
+ memset(src, 0, sizeof(*src));
+ src->sin_family = AF_INET;
+ src->sin_len = sizeof(*src);
+ src->sin_addr = (cmd == SIOCGIFPSRCADDR) ?
+ sc->gif_iphdr->ip_src: sc->gif_iphdr->ip_dst;
+ error = prison_if(curthread->td_ucred, (struct sockaddr *)src);
+ if (error != 0)
+ memset(src, 0, sizeof(*src));
+ break;
+ }
+ return (error);
+}
int
in_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
{
- GIF_RLOCK_TRACKER;
struct gif_softc *sc = ifp->if_softc;
struct ip *ip;
int len;
/* prepend new IP header */
+ MPASS(in_epoch(net_epoch_preempt));
len = sizeof(struct ip);
#ifndef __NO_STRICT_ALIGNMENT
if (proto == IPPROTO_ETHERIP)
@@ -119,15 +246,9 @@ in_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
}
#endif
ip = mtod(m, struct ip *);
- GIF_RLOCK(sc);
- if (sc->gif_family != AF_INET) {
- m_freem(m);
- GIF_RUNLOCK(sc);
- return (ENETDOWN);
- }
- bcopy(sc->gif_iphdr, ip, sizeof(struct ip));
- GIF_RUNLOCK(sc);
+ MPASS(sc->gif_family == AF_INET);
+ bcopy(sc->gif_iphdr, ip, sizeof(struct ip));
ip->ip_p = proto;
/* version will be set in ip_output() */
ip->ip_ttl = V_ip_gif_ttl;
@@ -138,15 +259,14 @@ in_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
}
static int
-in_gif_input(struct mbuf **mp, int *offp, int proto)
+in_gif_input(struct mbuf *m, int off, int proto, void *arg)
{
- struct mbuf *m = *mp;
- struct gif_softc *sc;
+ struct gif_softc *sc = arg;
struct ifnet *gifp;
struct ip *ip;
uint8_t ecn;
- sc = encap_getarg(m);
+ MPASS(in_epoch(net_epoch_preempt));
if (sc == NULL) {
m_freem(m);
KMOD_IPSTAT_INC(ips_nogif);
@@ -156,7 +276,7 @@ in_gif_input(struct mbuf **mp, int *offp, int proto)
if ((gifp->if_flags & IFF_UP) != 0) {
ip = mtod(m, struct ip *);
ecn = ip->ip_tos;
- m_adj(m, *offp);
+ m_adj(m, off);
gif_input(m, gifp, proto, ecn);
} else {
m_freem(m);
@@ -165,56 +285,125 @@ in_gif_input(struct mbuf **mp, int *offp, int proto)
return (IPPROTO_DONE);
}
-/*
- * we know that we are in IFF_UP, outer address available, and outer family
- * matched the physical addr family. see gif_encapcheck().
- */
-int
-in_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+static int
+in_gif_lookup(const struct mbuf *m, int off, int proto, void **arg)
{
const struct ip *ip;
struct gif_softc *sc;
int ret;
- /* sanity check done in caller */
- sc = (struct gif_softc *)arg;
- GIF_RLOCK_ASSERT(sc);
+ if (V_ipv4_hashtbl == NULL)
+ return (0);
- /* check for address match */
+ MPASS(in_epoch(net_epoch_preempt));
ip = mtod(m, const struct ip *);
- if (sc->gif_iphdr->ip_src.s_addr != ip->ip_dst.s_addr)
+ /*
+ * NOTE: it is safe to iterate without any locking here, because softc
+ * can be reclaimed only when we are not within net_epoch_preempt
+ * section, but ip_encap lookup+input are executed in epoch section.
+ */
+ ret = 0;
+ CK_LIST_FOREACH(sc, &GIF_HASH(ip->ip_dst.s_addr,
+ ip->ip_src.s_addr), chain) {
+ /*
+ * This is an inbound packet, its ip_dst is source address
+ * in softc.
+ */
+ if (sc->gif_iphdr->ip_src.s_addr == ip->ip_dst.s_addr &&
+ sc->gif_iphdr->ip_dst.s_addr == ip->ip_src.s_addr) {
+ ret = ENCAP_DRV_LOOKUP;
+ goto done;
+ }
+ }
+ /*
+ * No exact match.
+ * Check the list of interfaces with GIF_IGNORE_SOURCE flag.
+ */
+ CK_LIST_FOREACH(sc, &V_ipv4_list, chain) {
+ if (sc->gif_iphdr->ip_src.s_addr == ip->ip_dst.s_addr) {
+ ret = 32 + 8; /* src + proto */
+ goto done;
+ }
+ }
+ return (0);
+done:
+ if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0)
return (0);
- ret = 32;
- if (sc->gif_iphdr->ip_dst.s_addr != ip->ip_src.s_addr) {
- if ((sc->gif_options & GIF_IGNORE_SOURCE) == 0)
- return (0);
- } else
- ret += 32;
-
/* ingress filters on outer source */
if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0) {
struct nhop4_basic nh4;
struct in_addr dst;
dst = ip->ip_src;
-
if (fib4_lookup_nh_basic(sc->gif_fibnum, dst, 0, 0, &nh4) != 0)
return (0);
-
if (nh4.nh_ifp != m->m_pkthdr.rcvif)
return (0);
}
+ *arg = sc;
return (ret);
}
-int
-in_gif_attach(struct gif_softc *sc)
+static struct {
+ const struct encap_config encap;
+ const struct encaptab *cookie;
+} ipv4_encap_cfg[] = {
+ {
+ .encap = {
+ .proto = IPPROTO_IPV4,
+ .min_length = 2 * sizeof(struct ip),
+ .exact_match = ENCAP_DRV_LOOKUP,
+ .lookup = in_gif_lookup,
+ .input = in_gif_input
+ },
+ },
+#ifdef INET6
+ {
+ .encap = {
+ .proto = IPPROTO_IPV6,
+ .min_length = sizeof(struct ip) +
+ sizeof(struct ip6_hdr),
+ .exact_match = ENCAP_DRV_LOOKUP,
+ .lookup = in_gif_lookup,
+ .input = in_gif_input
+ },
+ },
+#endif
+ {
+ .encap = {
+ .proto = IPPROTO_ETHERIP,
+ .min_length = sizeof(struct ip) +
+ sizeof(struct etherip_header) +
+ sizeof(struct ether_header),
+ .exact_match = ENCAP_DRV_LOOKUP,
+ .lookup = in_gif_lookup,
+ .input = in_gif_input
+ },
+ }
+};
+
+void
+in_gif_init(void)
{
+ int i;
- KASSERT(sc->gif_ecookie == NULL, ("gif_ecookie isn't NULL"));
- sc->gif_ecookie = encap_attach_func(AF_INET, -1, gif_encapcheck,
- &in_gif_protosw, sc);
- if (sc->gif_ecookie == NULL)
- return (EEXIST);
- return (0);
+ if (!IS_DEFAULT_VNET(curvnet))
+ return;
+ for (i = 0; i < nitems(ipv4_encap_cfg); i++)
+ ipv4_encap_cfg[i].cookie = ip_encap_attach(
+ &ipv4_encap_cfg[i].encap, NULL, M_WAITOK);
+}
+
+void
+in_gif_uninit(void)
+{
+ int i;
+
+ if (IS_DEFAULT_VNET(curvnet)) {
+ for (i = 0; i < nitems(ipv4_encap_cfg); i++)
+ ip_encap_detach(ipv4_encap_cfg[i].cookie);
+ }
+ if (V_ipv4_hashtbl != NULL)
+ gif_hashdestroy(V_ipv4_hashtbl);
}
+
diff --git a/freebsd/sys/netinet/in_gif.h b/freebsd/sys/netinet/in_gif.h
deleted file mode 100644
index e1f4ae48..00000000
--- a/freebsd/sys/netinet/in_gif.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* $FreeBSD$ */
-/* $KAME: in_gif.h,v 1.5 2000/04/14 08:36:02 itojun Exp $ */
-
-/*-
- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the project nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef _NETINET_IN_GIF_H_
-#define _NETINET_IN_GIF_H_
-
-#define GIF_TTL 30
-
-struct gif_softc;
-void in_gif_input(struct mbuf *, int);
-int in_gif_output(struct ifnet *, int, struct mbuf *);
-int gif_encapcheck4(const struct mbuf *, int, int, void *);
-int in_gif_attach(struct gif_softc *);
-int in_gif_detach(struct gif_softc *);
-
-#endif /*_NETINET_IN_GIF_H_*/
diff --git a/freebsd/sys/netinet/in_kdtrace.h b/freebsd/sys/netinet/in_kdtrace.h
index ba63a9a9..ccf53833 100644
--- a/freebsd/sys/netinet/in_kdtrace.h
+++ b/freebsd/sys/netinet/in_kdtrace.h
@@ -34,6 +34,8 @@
SDT_PROBE6(ip, , , probe, arg0, arg1, arg2, arg3, arg4, arg5)
#define UDP_PROBE(probe, arg0, arg1, arg2, arg3, arg4) \
SDT_PROBE5(udp, , , probe, arg0, arg1, arg2, arg3, arg4)
+#define UDPLITE_PROBE(probe, arg0, arg1, arg2, arg3, arg4) \
+ SDT_PROBE5(udplite, , , probe, arg0, arg1, arg2, arg3, arg4)
#define TCP_PROBE1(probe, arg0) \
SDT_PROBE1(tcp, , , probe, arg0)
#define TCP_PROBE2(probe, arg0, arg1) \
@@ -46,14 +48,32 @@
SDT_PROBE5(tcp, , , probe, arg0, arg1, arg2, arg3, arg4)
#define TCP_PROBE6(probe, arg0, arg1, arg2, arg3, arg4, arg5) \
SDT_PROBE6(tcp, , , probe, arg0, arg1, arg2, arg3, arg4, arg5)
+#define SCTP_PROBE1(probe, arg0) \
+ SDT_PROBE1(sctp, , , probe, arg0)
+#define SCTP_PROBE2(probe, arg0, arg1) \
+ SDT_PROBE2(sctp, , , probe, arg0, arg1)
+#define SCTP_PROBE3(probe, arg0, arg1, arg2) \
+ SDT_PROBE3(sctp, , , probe, arg0, arg1, arg2)
+#define SCTP_PROBE4(probe, arg0, arg1, arg2, arg3) \
+ SDT_PROBE4(sctp, , , probe, arg0, arg1, arg2, arg3)
+#define SCTP_PROBE5(probe, arg0, arg1, arg2, arg3, arg4) \
+ SDT_PROBE5(sctp, , , probe, arg0, arg1, arg2, arg3, arg4)
+#define SCTP_PROBE6(probe, arg0, arg1, arg2, arg3, arg4, arg5) \
+ SDT_PROBE6(sctp, , , probe, arg0, arg1, arg2, arg3, arg4, arg5)
SDT_PROVIDER_DECLARE(ip);
+SDT_PROVIDER_DECLARE(sctp);
SDT_PROVIDER_DECLARE(tcp);
SDT_PROVIDER_DECLARE(udp);
+SDT_PROVIDER_DECLARE(udplite);
SDT_PROBE_DECLARE(ip, , , receive);
SDT_PROBE_DECLARE(ip, , , send);
+SDT_PROBE_DECLARE(sctp, , , receive);
+SDT_PROBE_DECLARE(sctp, , , send);
+SDT_PROBE_DECLARE(sctp, , , state__change);
+
SDT_PROBE_DECLARE(tcp, , , accept__established);
SDT_PROBE_DECLARE(tcp, , , accept__refused);
SDT_PROBE_DECLARE(tcp, , , connect__established);
@@ -72,4 +92,7 @@ SDT_PROBE_DECLARE(tcp, , , receive__autoresize);
SDT_PROBE_DECLARE(udp, , , receive);
SDT_PROBE_DECLARE(udp, , , send);
+SDT_PROBE_DECLARE(udplite, , , receive);
+SDT_PROBE_DECLARE(udplite, , , send);
+
#endif
diff --git a/freebsd/sys/netinet/in_mcast.c b/freebsd/sys/netinet/in_mcast.c
index ea4779fc..e0fd4c37 100644
--- a/freebsd/sys/netinet/in_mcast.c
+++ b/freebsd/sys/netinet/in_mcast.c
@@ -233,8 +233,13 @@ static void inm_init(void)
taskqgroup_config_gtask_init(NULL, &free_gtask, inm_release_task, "inm release task");
}
+#ifdef EARLY_AP_STARTUP
SYSINIT(inm_init, SI_SUB_SMP + 1, SI_ORDER_FIRST,
inm_init, NULL);
+#else
+SYSINIT(inm_init, SI_SUB_ROOT_CONF - 1, SI_ORDER_FIRST,
+ inm_init, NULL);
+#endif
void
@@ -260,7 +265,10 @@ inm_disconnect(struct in_multi *inm)
ifma = inm->inm_ifma;
if_ref(ifp);
- CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
+ if (ifma->ifma_flags & IFMA_F_ENQUEUED) {
+ CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
+ ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
+ }
MCDPRINTF("removed ifma: %p from %s\n", ifma, ifp->if_xname);
if ((ll_ifma = ifma->ifma_llifma) != NULL) {
MPASS(ifma != ll_ifma);
@@ -268,7 +276,10 @@ inm_disconnect(struct in_multi *inm)
MPASS(ll_ifma->ifma_llifma == NULL);
MPASS(ll_ifma->ifma_ifp == ifp);
if (--ll_ifma->ifma_refcount == 0) {
- CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link);
+ if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) {
+ CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link);
+ ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
+ }
MCDPRINTF("removed ll_ifma: %p from %s\n", ll_ifma, ifp->if_xname);
if_freemulti(ll_ifma);
ifma_restart = true;
@@ -1581,23 +1592,24 @@ inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
* Begin state merge transaction at IGMP layer.
*/
IN_MULTI_LOCK();
- IN_MULTI_LIST_LOCK();
CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
+ IN_MULTI_LIST_LOCK();
error = inm_merge(inm, imf);
if (error) {
CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
+ IN_MULTI_LIST_UNLOCK();
goto out_in_multi_locked;
}
CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
error = igmp_change_state(inm);
+ IN_MULTI_LIST_UNLOCK();
if (error)
CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
out_in_multi_locked:
IN_MULTI_UNLOCK();
- IN_MULTI_UNLOCK();
out_imf_rollback:
if (error)
imf_rollback(imf);
@@ -1664,16 +1676,13 @@ inp_findmoptions(struct inpcb *inp)
}
static void
-inp_gcmoptions(epoch_context_t ctx)
+inp_gcmoptions(struct ip_moptions *imo)
{
- struct ip_moptions *imo;
struct in_mfilter *imf;
struct in_multi *inm;
struct ifnet *ifp;
size_t idx, nmships;
- imo = __containerof(ctx, struct ip_moptions, imo_epoch_ctx);
-
nmships = imo->imo_num_memberships;
for (idx = 0; idx < nmships; ++idx) {
imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL;
@@ -1709,7 +1718,7 @@ inp_freemoptions(struct ip_moptions *imo)
{
if (imo == NULL)
return;
- epoch_call(net_epoch_preempt, &imo->imo_epoch_ctx, inp_gcmoptions);
+ inp_gcmoptions(imo);
}
/*
@@ -2261,7 +2270,8 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
__func__);
IN_MULTI_LIST_UNLOCK();
goto out_imo_free;
- }
+ }
+ inm_acquire(inm);
imo->imo_membership[idx] = inm;
} else {
CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
@@ -2301,6 +2311,12 @@ out_in_multi_locked:
out_imo_free:
if (error && is_new) {
+ inm = imo->imo_membership[idx];
+ if (inm != NULL) {
+ IN_MULTI_LIST_LOCK();
+ inm_release_deferred(inm);
+ IN_MULTI_LIST_UNLOCK();
+ }
imo->imo_membership[idx] = NULL;
--imo->imo_num_memberships;
}
@@ -2494,6 +2510,7 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
if (error) {
CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
__func__);
+ IN_MULTI_LIST_UNLOCK();
goto out_in_multi_locked;
}
@@ -2738,12 +2755,12 @@ inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
INP_WLOCK_ASSERT(inp);
IN_MULTI_LOCK();
- IN_MULTI_LIST_LOCK();
/*
* Begin state merge transaction at IGMP layer.
*/
CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
+ IN_MULTI_LIST_LOCK();
error = inm_merge(inm, imf);
if (error) {
CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
diff --git a/freebsd/sys/netinet/in_pcb.c b/freebsd/sys/netinet/in_pcb.c
index f89487b6..5ba918fa 100644
--- a/freebsd/sys/netinet/in_pcb.c
+++ b/freebsd/sys/netinet/in_pcb.c
@@ -114,6 +114,9 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
+#define INPCBLBGROUP_SIZMIN 8
+#define INPCBLBGROUP_SIZMAX 256
+
static struct callout ipport_tick_callout;
/*
@@ -141,7 +144,7 @@ VNET_DEFINE(int, ipport_randomcps) = 10; /* user controlled via sysctl */
VNET_DEFINE(int, ipport_randomtime) = 45; /* user controlled via sysctl */
VNET_DEFINE(int, ipport_stoprandom); /* toggled by ipport_tick */
VNET_DEFINE(int, ipport_tcpallocs);
-static VNET_DEFINE(int, ipport_tcplastcount);
+VNET_DEFINE_STATIC(int, ipport_tcplastcount);
#define V_ipport_tcplastcount VNET(ipport_tcplastcount)
@@ -223,6 +226,222 @@ SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime,
* functions often modify hash chains or addresses in pcbs.
*/
+static struct inpcblbgroup *
+in_pcblbgroup_alloc(struct inpcblbgrouphead *hdr, u_char vflag,
+ uint16_t port, const union in_dependaddr *addr, int size)
+{
+ struct inpcblbgroup *grp;
+ size_t bytes;
+
+ bytes = __offsetof(struct inpcblbgroup, il_inp[size]);
+ grp = malloc(bytes, M_PCB, M_ZERO | M_NOWAIT);
+ if (!grp)
+ return (NULL);
+ grp->il_vflag = vflag;
+ grp->il_lport = port;
+ grp->il_dependladdr = *addr;
+ grp->il_inpsiz = size;
+ CK_LIST_INSERT_HEAD(hdr, grp, il_list);
+ return (grp);
+}
+
+static void
+in_pcblbgroup_free_deferred(epoch_context_t ctx)
+{
+ struct inpcblbgroup *grp;
+
+ grp = __containerof(ctx, struct inpcblbgroup, il_epoch_ctx);
+ free(grp, M_PCB);
+}
+
+static void
+in_pcblbgroup_free(struct inpcblbgroup *grp)
+{
+
+ CK_LIST_REMOVE(grp, il_list);
+ epoch_call(net_epoch_preempt, &grp->il_epoch_ctx,
+ in_pcblbgroup_free_deferred);
+}
+
+static struct inpcblbgroup *
+in_pcblbgroup_resize(struct inpcblbgrouphead *hdr,
+ struct inpcblbgroup *old_grp, int size)
+{
+ struct inpcblbgroup *grp;
+ int i;
+
+ grp = in_pcblbgroup_alloc(hdr, old_grp->il_vflag,
+ old_grp->il_lport, &old_grp->il_dependladdr, size);
+ if (!grp)
+ return (NULL);
+
+ KASSERT(old_grp->il_inpcnt < grp->il_inpsiz,
+ ("invalid new local group size %d and old local group count %d",
+ grp->il_inpsiz, old_grp->il_inpcnt));
+
+ for (i = 0; i < old_grp->il_inpcnt; ++i)
+ grp->il_inp[i] = old_grp->il_inp[i];
+ grp->il_inpcnt = old_grp->il_inpcnt;
+ in_pcblbgroup_free(old_grp);
+ return (grp);
+}
+
+/*
+ * PCB at index 'i' is removed from the group. Pull up the ones below il_inp[i]
+ * and shrink group if possible.
+ */
+static void
+in_pcblbgroup_reorder(struct inpcblbgrouphead *hdr, struct inpcblbgroup **grpp,
+ int i)
+{
+ struct inpcblbgroup *grp = *grpp;
+
+ for (; i + 1 < grp->il_inpcnt; ++i)
+ grp->il_inp[i] = grp->il_inp[i + 1];
+ grp->il_inpcnt--;
+
+ if (grp->il_inpsiz > INPCBLBGROUP_SIZMIN &&
+ grp->il_inpcnt <= (grp->il_inpsiz / 4)) {
+ /* Shrink this group. */
+ struct inpcblbgroup *new_grp =
+ in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz / 2);
+ if (new_grp)
+ *grpp = new_grp;
+ }
+ return;
+}
+
+/*
+ * Add PCB to load balance group for SO_REUSEPORT_LB option.
+ */
+static int
+in_pcbinslbgrouphash(struct inpcb *inp)
+{
+ const static struct timeval interval = { 60, 0 };
+ static struct timeval lastprint;
+ struct inpcbinfo *pcbinfo;
+ struct inpcblbgrouphead *hdr;
+ struct inpcblbgroup *grp;
+ uint16_t hashmask, lport;
+ uint32_t group_index;
+ struct ucred *cred;
+
+ pcbinfo = inp->inp_pcbinfo;
+
+ INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(pcbinfo);
+
+ if (pcbinfo->ipi_lbgrouphashbase == NULL)
+ return (0);
+
+ hashmask = pcbinfo->ipi_lbgrouphashmask;
+ lport = inp->inp_lport;
+ group_index = INP_PCBLBGROUP_PORTHASH(lport, hashmask);
+ hdr = &pcbinfo->ipi_lbgrouphashbase[group_index];
+
+ /*
+ * Don't allow jailed socket to join local group.
+ */
+ if (inp->inp_socket != NULL)
+ cred = inp->inp_socket->so_cred;
+ else
+ cred = NULL;
+ if (cred != NULL && jailed(cred))
+ return (0);
+
+#ifdef INET6
+ /*
+ * Don't allow IPv4 mapped INET6 wild socket.
+ */
+ if ((inp->inp_vflag & INP_IPV4) &&
+ inp->inp_laddr.s_addr == INADDR_ANY &&
+ INP_CHECK_SOCKAF(inp->inp_socket, AF_INET6)) {
+ return (0);
+ }
+#endif
+
+ hdr = &pcbinfo->ipi_lbgrouphashbase[
+ INP_PCBLBGROUP_PORTHASH(inp->inp_lport,
+ pcbinfo->ipi_lbgrouphashmask)];
+ CK_LIST_FOREACH(grp, hdr, il_list) {
+ if (grp->il_vflag == inp->inp_vflag &&
+ grp->il_lport == inp->inp_lport &&
+ memcmp(&grp->il_dependladdr,
+ &inp->inp_inc.inc_ie.ie_dependladdr,
+ sizeof(grp->il_dependladdr)) == 0) {
+ break;
+ }
+ }
+ if (grp == NULL) {
+ /* Create new load balance group. */
+ grp = in_pcblbgroup_alloc(hdr, inp->inp_vflag,
+ inp->inp_lport, &inp->inp_inc.inc_ie.ie_dependladdr,
+ INPCBLBGROUP_SIZMIN);
+ if (!grp)
+ return (ENOBUFS);
+ } else if (grp->il_inpcnt == grp->il_inpsiz) {
+ if (grp->il_inpsiz >= INPCBLBGROUP_SIZMAX) {
+ if (ratecheck(&lastprint, &interval))
+ printf("lb group port %d, limit reached\n",
+ ntohs(grp->il_lport));
+ return (0);
+ }
+
+ /* Expand this local group. */
+ grp = in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz * 2);
+ if (!grp)
+ return (ENOBUFS);
+ }
+
+ KASSERT(grp->il_inpcnt < grp->il_inpsiz,
+ ("invalid local group size %d and count %d",
+ grp->il_inpsiz, grp->il_inpcnt));
+
+ grp->il_inp[grp->il_inpcnt] = inp;
+ grp->il_inpcnt++;
+ return (0);
+}
+
+/*
+ * Remove PCB from load balance group.
+ */
+static void
+in_pcbremlbgrouphash(struct inpcb *inp)
+{
+ struct inpcbinfo *pcbinfo;
+ struct inpcblbgrouphead *hdr;
+ struct inpcblbgroup *grp;
+ int i;
+
+ pcbinfo = inp->inp_pcbinfo;
+
+ INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(pcbinfo);
+
+ if (pcbinfo->ipi_lbgrouphashbase == NULL)
+ return;
+
+ hdr = &pcbinfo->ipi_lbgrouphashbase[
+ INP_PCBLBGROUP_PORTHASH(inp->inp_lport,
+ pcbinfo->ipi_lbgrouphashmask)];
+
+ CK_LIST_FOREACH(grp, hdr, il_list) {
+ for (i = 0; i < grp->il_inpcnt; ++i) {
+ if (grp->il_inp[i] != inp)
+ continue;
+
+ if (grp->il_inpcnt == 1) {
+ /* We are the last, free this local group. */
+ in_pcblbgroup_free(grp);
+ } else {
+ /* Pull up inpcbs, shrink group if possible. */
+ in_pcblbgroup_reorder(hdr, &grp, i);
+ }
+ return;
+ }
+ }
+}
+
/*
* Different protocols initialize their inpcbs differently - giving
* different name to the lock. But they all are disposed the same.
@@ -252,12 +471,14 @@ in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
pcbinfo->ipi_vnet = curvnet;
#endif
pcbinfo->ipi_listhead = listhead;
- LIST_INIT(pcbinfo->ipi_listhead);
+ CK_LIST_INIT(pcbinfo->ipi_listhead);
pcbinfo->ipi_count = 0;
pcbinfo->ipi_hashbase = hashinit(hash_nelements, M_PCB,
&pcbinfo->ipi_hashmask);
pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB,
&pcbinfo->ipi_porthashmask);
+ pcbinfo->ipi_lbgrouphashbase = hashinit(hash_nelements, M_PCB,
+ &pcbinfo->ipi_lbgrouphashmask);
#ifdef PCBGROUP
in_pcbgroup_init(pcbinfo, hashfields, hash_nelements);
#endif
@@ -281,6 +502,8 @@ in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask);
hashdestroy(pcbinfo->ipi_porthashbase, M_PCB,
pcbinfo->ipi_porthashmask);
+ hashdestroy(pcbinfo->ipi_lbgrouphashbase, M_PCB,
+ pcbinfo->ipi_lbgrouphashmask);
#ifdef PCBGROUP
in_pcbgroup_destroy(pcbinfo);
#endif
@@ -341,7 +564,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
#endif
INP_WLOCK(inp);
INP_LIST_WLOCK(pcbinfo);
- LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
+ CK_LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
pcbinfo->ipi_count++;
so->so_pcb = (caddr_t)inp;
#ifdef INET6
@@ -519,18 +742,20 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
/*
* Return cached socket options.
*/
-short
+int
inp_so_options(const struct inpcb *inp)
{
- short so_options;
+ int so_options;
- so_options = 0;
+ so_options = 0;
- if ((inp->inp_flags2 & INP_REUSEPORT) != 0)
- so_options |= SO_REUSEPORT;
- if ((inp->inp_flags2 & INP_REUSEADDR) != 0)
- so_options |= SO_REUSEADDR;
- return (so_options);
+ if ((inp->inp_flags2 & INP_REUSEPORT_LB) != 0)
+ so_options |= SO_REUSEPORT_LB;
+ if ((inp->inp_flags2 & INP_REUSEPORT) != 0)
+ so_options |= SO_REUSEPORT;
+ if ((inp->inp_flags2 & INP_REUSEADDR) != 0)
+ so_options |= SO_REUSEADDR;
+ return (so_options);
}
#endif /* INET || INET6 */
@@ -589,6 +814,12 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
int error;
/*
+ * XXX: Maybe we could let SO_REUSEPORT_LB set SO_REUSEPORT bit here
+ * so that we don't have to add to the (already messy) code below.
+ */
+ int reuseport_lb = (so->so_options & SO_REUSEPORT_LB);
+
+ /*
* No state changes, so read locks are sufficient here.
*/
INP_LOCK_ASSERT(inp);
@@ -599,7 +830,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
laddr.s_addr = *laddrp;
if (nam != NULL && laddr.s_addr != INADDR_ANY)
return (EINVAL);
- if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
+ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0)
lookupflags = INPLOOKUP_WILDCARD;
if (nam == NULL) {
if ((error = prison_local_ip4(cred, &laddr)) != 0)
@@ -636,16 +867,23 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
*/
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0)
reuseport = SO_REUSEADDR|SO_REUSEPORT;
+ /*
+ * XXX: How to deal with SO_REUSEPORT_LB here?
+ * Treat same as SO_REUSEPORT for now.
+ */
+ if ((so->so_options &
+ (SO_REUSEADDR|SO_REUSEPORT_LB)) != 0)
+ reuseport_lb = SO_REUSEADDR|SO_REUSEPORT_LB;
} else if (sin->sin_addr.s_addr != INADDR_ANY) {
sin->sin_port = 0; /* yech... */
bzero(&sin->sin_zero, sizeof(sin->sin_zero));
/*
- * Is the address a local IP address?
+ * Is the address a local IP address?
* If INP_BINDANY is set, then the socket may be bound
* to any endpoint address, local or not.
*/
if ((inp->inp_flags & INP_BINDANY) == 0 &&
- ifa_ifwithaddr_check((struct sockaddr *)sin) == 0)
+ ifa_ifwithaddr_check((struct sockaddr *)sin) == 0)
return (EADDRNOTAVAIL);
}
laddr = sin->sin_addr;
@@ -675,7 +913,8 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
ntohl(t->inp_faddr.s_addr) == INADDR_ANY) &&
(ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
- (t->inp_flags2 & INP_REUSEPORT) == 0) &&
+ (t->inp_flags2 & INP_REUSEPORT) ||
+ (t->inp_flags2 & INP_REUSEPORT_LB) == 0) &&
#ifndef __rtems__
(inp->inp_cred->cr_uid !=
t->inp_cred->cr_uid))
@@ -704,11 +943,15 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
*/
tw = intotw(t);
if (tw == NULL ||
- (reuseport & tw->tw_so_options) == 0)
+ ((reuseport & tw->tw_so_options) == 0 &&
+ (reuseport_lb &
+ tw->tw_so_options) == 0)) {
return (EADDRINUSE);
+ }
} else if (t &&
- ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
- (reuseport & inp_so_options(t)) == 0) {
+ ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
+ (reuseport & inp_so_options(t)) == 0 &&
+ (reuseport_lb & inp_so_options(t)) == 0) {
#ifdef INET6
if (ntohl(sin->sin_addr.s_addr) !=
INADDR_ANY ||
@@ -717,7 +960,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
(inp->inp_vflag & INP_IPV6PROTO) == 0 ||
(t->inp_vflag & INP_IPV6PROTO) == 0)
#endif
- return (EADDRINUSE);
+ return (EADDRINUSE);
if (t && (! in_pcbbind_check_bindmulti(inp, t)))
return (EADDRINUSE);
}
@@ -862,7 +1105,6 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
ifp = ia->ia_ifp;
ia = NULL;
- IF_ADDR_RLOCK(ifp);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
sa = ifa->ifa_addr;
@@ -876,10 +1118,8 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
}
if (ia != NULL) {
laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
- IF_ADDR_RUNLOCK(ifp);
goto done;
}
- IF_ADDR_RUNLOCK(ifp);
/* 3. As a last resort return the 'default' jail address. */
error = prison_get_ip4(cred, laddr);
@@ -921,7 +1161,6 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
*/
ia = NULL;
ifp = sro.ro_rt->rt_ifp;
- IF_ADDR_RLOCK(ifp);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
sa = ifa->ifa_addr;
if (sa->sa_family != AF_INET)
@@ -934,10 +1173,8 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
}
if (ia != NULL) {
laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
- IF_ADDR_RUNLOCK(ifp);
goto done;
}
- IF_ADDR_RUNLOCK(ifp);
/* 3. As a last resort return the 'default' jail address. */
error = prison_get_ip4(cred, laddr);
@@ -985,9 +1222,7 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
ifp = ia->ia_ifp;
ia = NULL;
- IF_ADDR_RLOCK(ifp);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
-
sa = ifa->ifa_addr;
if (sa->sa_family != AF_INET)
continue;
@@ -1000,10 +1235,8 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
}
if (ia != NULL) {
laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
- IF_ADDR_RUNLOCK(ifp);
goto done;
}
- IF_ADDR_RUNLOCK(ifp);
}
/* 3. As a last resort return the 'default' jail address. */
@@ -1347,6 +1580,58 @@ in_pcblist_rele_rlocked(epoch_context_t ctx)
free(il, M_TEMP);
}
+static void
+inpcbport_free(epoch_context_t ctx)
+{
+ struct inpcbport *phd;
+
+ phd = __containerof(ctx, struct inpcbport, phd_epoch_ctx);
+ free(phd, M_PCB);
+}
+
+static void
+in_pcbfree_deferred(epoch_context_t ctx)
+{
+ struct inpcb *inp;
+ int released __unused;
+
+ inp = __containerof(ctx, struct inpcb, inp_epoch_ctx);
+
+ INP_WLOCK(inp);
+#ifdef INET
+ struct ip_moptions *imo = inp->inp_moptions;
+ inp->inp_moptions = NULL;
+#endif
+ /* XXXRW: Do as much as possible here. */
+#if defined(IPSEC) || defined(IPSEC_SUPPORT)
+ if (inp->inp_sp != NULL)
+ ipsec_delete_pcbpolicy(inp);
+#endif
+#ifdef INET6
+ struct ip6_moptions *im6o = NULL;
+ if (inp->inp_vflag & INP_IPV6PROTO) {
+ ip6_freepcbopts(inp->in6p_outputopts);
+ im6o = inp->in6p_moptions;
+ inp->in6p_moptions = NULL;
+ }
+#endif
+ if (inp->inp_options)
+ (void)m_free(inp->inp_options);
+ inp->inp_vflag = 0;
+ crfree(inp->inp_cred);
+#ifdef MAC
+ mac_inpcb_destroy(inp);
+#endif
+ released = in_pcbrele_wlocked(inp);
+ MPASS(released);
+#ifdef INET6
+ ip6_freemoptions(im6o);
+#endif
+#ifdef INET
+ inp_freemoptions(imo);
+#endif
+}
+
/*
* Unconditionally schedule an inpcb to be freed by decrementing its
* reference count, which should occur only after the inpcb has been detached
@@ -1361,14 +1646,7 @@ in_pcbfree(struct inpcb *inp)
{
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
-#ifdef INET6
- struct ip6_moptions *im6o = NULL;
-#endif
-#ifdef INET
- struct ip_moptions *imo = NULL;
-#endif
KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
-
KASSERT((inp->inp_flags2 & INP_FREED) == 0,
("%s: called twice for pcb %p", __func__, inp));
if (inp->inp_flags2 & INP_FREED) {
@@ -1384,45 +1662,14 @@ in_pcbfree(struct inpcb *inp)
}
#endif
INP_WLOCK_ASSERT(inp);
-
-#ifdef INET
- imo = inp->inp_moptions;
- inp->inp_moptions = NULL;
-#endif
- /* XXXRW: Do as much as possible here. */
-#if defined(IPSEC) || defined(IPSEC_SUPPORT)
- if (inp->inp_sp != NULL)
- ipsec_delete_pcbpolicy(inp);
-#endif
INP_LIST_WLOCK(pcbinfo);
- inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
in_pcbremlists(inp);
INP_LIST_WUNLOCK(pcbinfo);
-#ifdef INET6
- if (inp->inp_vflag & INP_IPV6PROTO) {
- ip6_freepcbopts(inp->in6p_outputopts);
- im6o = inp->in6p_moptions;
- inp->in6p_moptions = NULL;
- }
-#endif
- if (inp->inp_options)
- (void)m_free(inp->inp_options);
RO_INVALIDATE_CACHE(&inp->inp_route);
-
- inp->inp_vflag = 0;
+ /* mark as destruction in progress */
inp->inp_flags2 |= INP_FREED;
- crfree(inp->inp_cred);
-#ifdef MAC
- mac_inpcb_destroy(inp);
-#endif
-#ifdef INET6
- ip6_freemoptions(im6o);
-#endif
-#ifdef INET
- inp_freemoptions(imo);
-#endif
- if (!in_pcbrele_wlocked(inp))
- INP_WUNLOCK(inp);
+ INP_WUNLOCK(inp);
+ epoch_call(net_epoch_preempt, &inp->inp_epoch_ctx, in_pcbfree_deferred);
}
/*
@@ -1444,6 +1691,10 @@ in_pcbdrop(struct inpcb *inp)
{
INP_WLOCK_ASSERT(inp);
+#ifdef INVARIANTS
+ if (inp->inp_socket != NULL && inp->inp_ppcb != NULL)
+ MPASS(inp->inp_refcount > 1);
+#endif
/*
* XXXRW: Possibly we should protect the setting of INP_DROPPED with
@@ -1454,11 +1705,12 @@ in_pcbdrop(struct inpcb *inp)
struct inpcbport *phd = inp->inp_phd;
INP_HASH_WLOCK(inp->inp_pcbinfo);
- LIST_REMOVE(inp, inp_hash);
- LIST_REMOVE(inp, inp_portlist);
- if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
- LIST_REMOVE(phd, phd_hash);
- free(phd, M_PCB);
+ in_pcbremlbgrouphash(inp);
+ CK_LIST_REMOVE(inp, inp_hash);
+ CK_LIST_REMOVE(inp, inp_portlist);
+ if (CK_LIST_FIRST(&phd->phd_pcblist) == NULL) {
+ CK_LIST_REMOVE(phd, phd_hash);
+ epoch_call(net_epoch_preempt, &phd->phd_epoch_ctx, inpcbport_free);
}
INP_HASH_WUNLOCK(inp->inp_pcbinfo);
inp->inp_flags &= ~INP_INHASHLIST;
@@ -1532,7 +1784,7 @@ in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, int errno,
struct inpcb *inp, *inp_temp;
INP_INFO_WLOCK(pcbinfo);
- LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
+ CK_LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
INP_WLOCK(inp);
#ifdef INET6
if ((inp->inp_vflag & INP_IPV4) == 0) {
@@ -1559,7 +1811,7 @@ in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
int i, gap;
INP_INFO_WLOCK(pcbinfo);
- LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
+ CK_LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
INP_WLOCK(inp);
imo = inp->inp_moptions;
if ((inp->inp_vflag & INP_IPV4) &&
@@ -1624,7 +1876,7 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
*/
head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
0, pcbinfo->ipi_hashmask)];
- LIST_FOREACH(inp, head, inp_hash) {
+ CK_LIST_FOREACH(inp, head, inp_hash) {
#ifdef INET6
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV4) == 0)
@@ -1658,7 +1910,7 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
*/
porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
pcbinfo->ipi_porthashmask)];
- LIST_FOREACH(phd, porthash, phd_hash) {
+ CK_LIST_FOREACH(phd, porthash, phd_hash) {
if (phd->phd_port == lport)
break;
}
@@ -1667,7 +1919,7 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
* Port is in use by one or more PCBs. Look for best
* fit.
*/
- LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
+ CK_LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
wildcard = 0;
if (cred != NULL &&
!prison_equal_ip4(inp->inp_cred->cr_prison,
@@ -1717,6 +1969,50 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
}
#undef INP_LOOKUP_MAPPED_PCB_COST
+static struct inpcb *
+in_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
+ const struct in_addr *laddr, uint16_t lport, const struct in_addr *faddr,
+ uint16_t fport, int lookupflags)
+{
+ struct inpcb *local_wild;
+ const struct inpcblbgrouphead *hdr;
+ struct inpcblbgroup *grp;
+ uint32_t idx;
+
+ INP_HASH_LOCK_ASSERT(pcbinfo);
+
+ hdr = &pcbinfo->ipi_lbgrouphashbase[INP_PCBLBGROUP_PORTHASH(lport,
+ pcbinfo->ipi_lbgrouphashmask)];
+
+ /*
+ * Order of socket selection:
+ * 1. non-wild.
+ * 2. wild (if lookupflags contains INPLOOKUP_WILDCARD).
+ *
+ * NOTE:
+ * - Load balanced group does not contain jailed sockets
+ * - Load balanced group does not contain IPv4 mapped INET6 wild sockets
+ */
+ local_wild = NULL;
+ CK_LIST_FOREACH(grp, hdr, il_list) {
+#ifdef INET6
+ if (!(grp->il_vflag & INP_IPV4))
+ continue;
+#endif
+ if (grp->il_lport != lport)
+ continue;
+
+ idx = INP_PCBLBGROUP_PKTHASH(faddr->s_addr, lport, fport) %
+ grp->il_inpcnt;
+ if (grp->il_laddr.s_addr == laddr->s_addr)
+ return (grp->il_inp[idx]);
+ if (grp->il_laddr.s_addr == INADDR_ANY &&
+ (lookupflags & INPLOOKUP_WILDCARD) != 0)
+ local_wild = grp->il_inp[idx];
+ }
+ return (local_wild);
+}
+
#ifdef PCBGROUP
/*
* Lookup PCB in hash list, using pcbgroup tables.
@@ -1738,7 +2034,7 @@ in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
INP_GROUP_LOCK(pcbgroup);
head = &pcbgroup->ipg_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
pcbgroup->ipg_hashmask)];
- LIST_FOREACH(inp, head, inp_pcbgrouphash) {
+ CK_LIST_FOREACH(inp, head, inp_pcbgrouphash) {
#ifdef INET6
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV4) == 0)
@@ -1788,7 +2084,7 @@ in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
head = &pcbgroup->ipg_hashbase[INP_PCBHASH(INADDR_ANY,
lport, 0, pcbgroup->ipg_hashmask)];
- LIST_FOREACH(inp, head, inp_pcbgrouphash) {
+ CK_LIST_FOREACH(inp, head, inp_pcbgrouphash) {
#ifdef INET6
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV4) == 0)
@@ -1862,7 +2158,7 @@ in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
*/
head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport,
0, pcbinfo->ipi_wildmask)];
- LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
+ CK_LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
#ifdef INET6
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV4) == 0)
@@ -1922,7 +2218,13 @@ found:
locked = INP_TRY_RLOCK(inp);
else
panic("%s: locking bug", __func__);
- if (!locked)
+ if (__predict_false(locked && (inp->inp_flags2 & INP_FREED))) {
+ if (lookupflags & INPLOOKUP_WLOCKPCB)
+ INP_WUNLOCK(inp);
+ else
+ INP_RUNLOCK(inp);
+ return (NULL);
+ } else if (!locked)
in_pcbref(inp);
INP_GROUP_UNLOCK(pcbgroup);
if (!locked) {
@@ -1960,18 +2262,19 @@ in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
struct inpcb *inp, *tmpinp;
u_short fport = fport_arg, lport = lport_arg;
+#ifdef INVARIANTS
KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
-
- INP_HASH_LOCK_ASSERT(pcbinfo);
-
+ if (!mtx_owned(&pcbinfo->ipi_hash_lock))
+ MPASS(in_epoch_verbose(net_epoch_preempt, 1));
+#endif
/*
* First look for an exact match.
*/
tmpinp = NULL;
head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
pcbinfo->ipi_hashmask)];
- LIST_FOREACH(inp, head, inp_hash) {
+ CK_LIST_FOREACH(inp, head, inp_hash) {
#ifdef INET6
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV4) == 0)
@@ -1996,6 +2299,18 @@ in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
return (tmpinp);
/*
+ * Then look in lb group (for wildcard match).
+ */
+ if (pcbinfo->ipi_lbgrouphashbase != NULL &&
+ (lookupflags & INPLOOKUP_WILDCARD)) {
+ inp = in_pcblookup_lbgroup(pcbinfo, &laddr, lport, &faddr,
+ fport, lookupflags);
+ if (inp != NULL) {
+ return (inp);
+ }
+ }
+
+ /*
* Then look for a wildcard match, if requested.
*/
if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
@@ -2016,7 +2331,7 @@ in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
0, pcbinfo->ipi_hashmask)];
- LIST_FOREACH(inp, head, inp_hash) {
+ CK_LIST_FOREACH(inp, head, inp_hash) {
#ifdef INET6
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV4) == 0)
@@ -2080,40 +2395,35 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
struct ifnet *ifp)
{
struct inpcb *inp;
- bool locked;
INP_HASH_RLOCK(pcbinfo);
inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
(lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp);
if (inp != NULL) {
- if (lookupflags & INPLOOKUP_WLOCKPCB)
- locked = INP_TRY_WLOCK(inp);
- else if (lookupflags & INPLOOKUP_RLOCKPCB)
- locked = INP_TRY_RLOCK(inp);
- else
- panic("%s: locking bug", __func__);
- if (!locked)
- in_pcbref(inp);
- INP_HASH_RUNLOCK(pcbinfo);
- if (!locked) {
- if (lookupflags & INPLOOKUP_WLOCKPCB) {
- INP_WLOCK(inp);
- if (in_pcbrele_wlocked(inp))
- return (NULL);
- } else {
- INP_RLOCK(inp);
- if (in_pcbrele_rlocked(inp))
- return (NULL);
+ if (lookupflags & INPLOOKUP_WLOCKPCB) {
+ INP_WLOCK(inp);
+ if (__predict_false(inp->inp_flags2 & INP_FREED)) {
+ INP_WUNLOCK(inp);
+ inp = NULL;
}
- }
+ } else if (lookupflags & INPLOOKUP_RLOCKPCB) {
+ INP_RLOCK(inp);
+ if (__predict_false(inp->inp_flags2 & INP_FREED)) {
+ INP_RUNLOCK(inp);
+ inp = NULL;
+ }
+ } else
+ panic("%s: locking bug", __func__);
#ifdef INVARIANTS
- if (lookupflags & INPLOOKUP_WLOCKPCB)
- INP_WLOCK_ASSERT(inp);
- else
- INP_RLOCK_ASSERT(inp);
+ if (inp != NULL) {
+ if (lookupflags & INPLOOKUP_WLOCKPCB)
+ INP_WLOCK_ASSERT(inp);
+ else
+ INP_RLOCK_ASSERT(inp);
+ }
#endif
- } else
- INP_HASH_RUNLOCK(pcbinfo);
+ }
+ INP_HASH_RUNLOCK(pcbinfo);
return (inp);
}
@@ -2212,6 +2522,7 @@ in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update)
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
struct inpcbport *phd;
u_int32_t hashkey_faddr;
+ int so_options;
INP_WLOCK_ASSERT(inp);
INP_HASH_WLOCK_ASSERT(pcbinfo);
@@ -2233,9 +2544,22 @@ in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update)
INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];
/*
+ * Add entry to load balance group.
+ * Only do this if SO_REUSEPORT_LB is set.
+ */
+ so_options = inp_so_options(inp);
+ if (so_options & SO_REUSEPORT_LB) {
+ int ret = in_pcbinslbgrouphash(inp);
+ if (ret) {
+ /* pcb lb group malloc fail (ret=ENOBUFS). */
+ return (ret);
+ }
+ }
+
+ /*
* Go through port list and look for a head for this lport.
*/
- LIST_FOREACH(phd, pcbporthash, phd_hash) {
+ CK_LIST_FOREACH(phd, pcbporthash, phd_hash) {
if (phd->phd_port == inp->inp_lport)
break;
}
@@ -2247,13 +2571,14 @@ in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update)
if (phd == NULL) {
return (ENOBUFS); /* XXX */
}
+ bzero(&phd->phd_epoch_ctx, sizeof(struct epoch_context));
phd->phd_port = inp->inp_lport;
- LIST_INIT(&phd->phd_pcblist);
- LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
+ CK_LIST_INIT(&phd->phd_pcblist);
+ CK_LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
}
inp->inp_phd = phd;
- LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
- LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
+ CK_LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
+ CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
inp->inp_flags |= INP_INHASHLIST;
#ifdef PCBGROUP
if (do_pcbgroup_update)
@@ -2316,8 +2641,8 @@ in_pcbrehash_mbuf(struct inpcb *inp, struct mbuf *m)
head = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
- LIST_REMOVE(inp, inp_hash);
- LIST_INSERT_HEAD(head, inp, inp_hash);
+ CK_LIST_REMOVE(inp, inp_hash);
+ CK_LIST_INSERT_HEAD(head, inp, inp_hash);
#ifdef PCBGROUP
if (m != NULL)
@@ -2358,16 +2683,20 @@ in_pcbremlists(struct inpcb *inp)
struct inpcbport *phd = inp->inp_phd;
INP_HASH_WLOCK(pcbinfo);
- LIST_REMOVE(inp, inp_hash);
- LIST_REMOVE(inp, inp_portlist);
- if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
- LIST_REMOVE(phd, phd_hash);
- free(phd, M_PCB);
+
+ /* XXX: Only do if SO_REUSEPORT_LB set? */
+ in_pcbremlbgrouphash(inp);
+
+ CK_LIST_REMOVE(inp, inp_hash);
+ CK_LIST_REMOVE(inp, inp_portlist);
+ if (CK_LIST_FIRST(&phd->phd_pcblist) == NULL) {
+ CK_LIST_REMOVE(phd, phd_hash);
+ epoch_call(net_epoch_preempt, &phd->phd_epoch_ctx, inpcbport_free);
}
INP_HASH_WUNLOCK(pcbinfo);
inp->inp_flags &= ~INP_INHASHLIST;
}
- LIST_REMOVE(inp, inp_list);
+ CK_LIST_REMOVE(inp, inp_list);
pcbinfo->ipi_count--;
#ifdef PCBGROUP
in_pcbgroup_remove(inp);
@@ -2511,7 +2840,7 @@ inp_apply_all(void (*func)(struct inpcb *, void *), void *arg)
struct inpcb *inp;
INP_INFO_WLOCK(&V_tcbinfo);
- LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
+ CK_LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
INP_WLOCK(inp);
func(inp, arg);
INP_WUNLOCK(inp);
@@ -2594,7 +2923,7 @@ in_pcbtoxinpcb(const struct inpcb *inp, struct xinpcb *xi)
bzero(&xi->xi_socket, sizeof(struct xsocket));
bcopy(&inp->inp_inc, &xi->inp_inc, sizeof(struct in_conninfo));
xi->inp_gencnt = inp->inp_gencnt;
- xi->inp_ppcb = inp->inp_ppcb;
+ xi->inp_ppcb = (uintptr_t)inp->inp_ppcb;
xi->inp_flow = inp->inp_flow;
xi->inp_flowid = inp->inp_flowid;
xi->inp_flowtype = inp->inp_flowtype;
diff --git a/freebsd/sys/netinet/in_pcb.h b/freebsd/sys/netinet/in_pcb.h
index d00dd456..86c9705c 100644
--- a/freebsd/sys/netinet/in_pcb.h
+++ b/freebsd/sys/netinet/in_pcb.h
@@ -51,8 +51,11 @@
#include <sys/lock.h>
#include <sys/rwlock.h>
#include <net/vnet.h>
+#include <net/if.h>
+#include <net/if_var.h>
#include <vm/uma.h>
#endif
+#include <sys/ck.h>
#define in6pcb inpcb /* for KAME src sync over BSD*'s */
#define in6p_sp inp_sp /* for KAME src sync over BSD*'s */
@@ -65,8 +68,9 @@
* numbers, and pointers up (to a socket structure) and down (to a
* protocol-specific control block) are stored here.
*/
-LIST_HEAD(inpcbhead, inpcb);
-LIST_HEAD(inpcbporthead, inpcbport);
+CK_LIST_HEAD(inpcbhead, inpcb);
+CK_LIST_HEAD(inpcbporthead, inpcbport);
+CK_LIST_HEAD(inpcblbgrouphead, inpcblbgroup);
typedef uint64_t inp_gen_t;
/*
@@ -79,6 +83,11 @@ struct in_addr_4in6 {
struct in_addr ia46_addr4;
};
+union in_dependaddr {
+ struct in_addr_4in6 id46_addr;
+ struct in6_addr id6_addr;
+};
+
/*
* NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553. in_conninfo has
* some extra padding to accomplish this.
@@ -89,22 +98,14 @@ struct in_endpoints {
u_int16_t ie_fport; /* foreign port */
u_int16_t ie_lport; /* local port */
/* protocol dependent part, local and foreign addr */
- union {
- /* foreign host table entry */
- struct in_addr_4in6 ie46_foreign;
- struct in6_addr ie6_foreign;
- } ie_dependfaddr;
- union {
- /* local host table entry */
- struct in_addr_4in6 ie46_local;
- struct in6_addr ie6_local;
- } ie_dependladdr;
+ union in_dependaddr ie_dependfaddr; /* foreign host table entry */
+ union in_dependaddr ie_dependladdr; /* local host table entry */
+#define ie_faddr ie_dependfaddr.id46_addr.ia46_addr4
+#define ie_laddr ie_dependladdr.id46_addr.ia46_addr4
+#define ie6_faddr ie_dependfaddr.id6_addr
+#define ie6_laddr ie_dependladdr.id6_addr
u_int32_t ie6_zoneid; /* scope zone id */
};
-#define ie_faddr ie_dependfaddr.ie46_foreign.ia46_addr4
-#define ie_laddr ie_dependladdr.ie46_local.ia46_addr4
-#define ie6_faddr ie_dependfaddr.ie6_foreign
-#define ie6_laddr ie_dependladdr.ie6_local
/*
* XXX The defines for inc_* are hacks and should be changed to direct
@@ -122,8 +123,8 @@ struct in_conninfo {
* Flags for inc_flags.
*/
#define INC_ISIPV6 0x01
+#define INC_IPV6MINMTU 0x02
-#define inc_isipv6 inc_flags /* temp compatibility */
#define inc_fport inc_ie.ie_fport
#define inc_lport inc_ie.ie_lport
#define inc_faddr inc_ie.ie_faddr
@@ -159,6 +160,7 @@ struct in_conninfo {
* Key:
* (b) - Protected by the hpts lock.
* (c) - Constant after initialization
+ * (e) - Protected by the net_epoch_prempt epoch
* (g) - Protected by the pcbgroup lock
* (i) - Protected by the inpcb lock
* (p) - Protected by the pcbinfo lock for the inpcb
@@ -233,8 +235,8 @@ struct inpcbpolicy;
struct m_snd_tag;
struct inpcb {
/* Cache line #1 (amd64) */
- LIST_ENTRY(inpcb) inp_hash; /* (h/i) hash list */
- LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */
+ CK_LIST_ENTRY(inpcb) inp_hash; /* [w](h/i) [r](e/i) hash list */
+ CK_LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */
struct rwlock inp_lock;
/* Cache line #2 (amd64) */
#define inp_start_zero inp_hpts
@@ -278,7 +280,7 @@ struct inpcb {
TAILQ_ENTRY(inpcb) inp_input; /* pacing in queue next lock(b) */
struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */
struct inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */
- LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/h) group wildcard entry */
+ CK_LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/h) group wildcard entry */
struct ucred *inp_cred; /* (c) cache of socket cred */
u_int32_t inp_flow; /* (i) IPv6 flow information */
u_char inp_vflag; /* (i) IP version flag (v4/v6) */
@@ -316,18 +318,19 @@ struct inpcb {
int in6p_cksum;
short in6p_hops;
};
- LIST_ENTRY(inpcb) inp_portlist; /* (i/h) */
+ CK_LIST_ENTRY(inpcb) inp_portlist; /* (i/h) */
struct inpcbport *inp_phd; /* (i/h) head of this list */
inp_gen_t inp_gencnt; /* (c) generation count */
- struct llentry *inp_lle; /* cached L2 information */
+ void *spare_ptr; /* Spare pointer. */
rt_gen_t inp_rt_cookie; /* generation for route entry */
union { /* cached L3 information */
struct route inp_route;
struct route_in6 inp_route6;
};
- LIST_ENTRY(inpcb) inp_list; /* (p/l) list for all PCBs for proto */
- /* (p[w]) for list iteration */
- /* (p[r]/l) for addition/removal */
+ CK_LIST_ENTRY(inpcb) inp_list; /* (p/l) list for all PCBs for proto */
+ /* (e[r]) for list iteration */
+ /* (p[w]/l) for addition/removal */
+ struct epoch_context inp_epoch_ctx;
};
#endif /* _KERNEL */
@@ -364,14 +367,11 @@ struct inpcb {
*/
#ifdef _SYS_SOCKETVAR_H_
struct xinpcb {
- size_t xi_len; /* length of this structure */
+ ksize_t xi_len; /* length of this structure */
struct xsocket xi_socket; /* (s,p) */
struct in_conninfo inp_inc; /* (s,p) */
uint64_t inp_gencnt; /* (s,p) */
- union {
- void *inp_ppcb; /* (s) netstat(1) */
- int64_t ph_ppcb;
- };
+ kvaddr_t inp_ppcb; /* (s) netstat(1) */
int64_t inp_spare64[4];
uint32_t inp_flow; /* (s) */
uint32_t inp_flowid; /* (s) */
@@ -392,10 +392,12 @@ struct xinpcb {
} __aligned(8);
struct xinpgen {
- size_t xig_len; /* length of this structure */
+ ksize_t xig_len; /* length of this structure */
u_int xig_count; /* number of PCBs at this time */
+ uint32_t _xig_spare32;
inp_gen_t xig_gen; /* generation count at this time */
so_gen_t xig_sogen; /* socket generation count this time */
+ uint64_t _xig_spare64[4];
} __aligned(8);
#ifdef _KERNEL
void in_pcbtoxinpcb(const struct inpcb *, struct xinpcb *);
@@ -403,7 +405,8 @@ void in_pcbtoxinpcb(const struct inpcb *, struct xinpcb *);
#endif /* _SYS_SOCKETVAR_H_ */
struct inpcbport {
- LIST_ENTRY(inpcbport) phd_hash;
+ struct epoch_context phd_epoch_ctx;
+ CK_LIST_ENTRY(inpcbport) phd_hash;
struct inpcbhead phd_pcblist;
u_short phd_port;
};
@@ -436,22 +439,23 @@ struct in_pcblist {
* Locking key:
*
* (c) Constant or nearly constant after initialisation
+ * (e) - Protected by the net_epoch_prempt epoch
* (g) Locked by ipi_lock
* (l) Locked by ipi_list_lock
- * (h) Read using either ipi_hash_lock or inpcb lock; write requires both
+ * (h) Read using either net_epoch_preempt or inpcb lock; write requires both ipi_hash_lock and inpcb lock
* (p) Protected by one or more pcbgroup locks
* (x) Synchronisation properties poorly defined
*/
struct inpcbinfo {
/*
- * Global lock protecting full inpcb list traversal
+ * Global lock protecting inpcb list modification
*/
- struct rwlock ipi_lock;
+ struct mtx ipi_lock;
/*
* Global list of inpcbs on the protocol.
*/
- struct inpcbhead *ipi_listhead; /* (g/l) */
+ struct inpcbhead *ipi_listhead; /* [r](e) [w](g/l) */
u_int ipi_count; /* (l) */
/*
@@ -482,9 +486,9 @@ struct inpcbinfo {
u_int ipi_hashfields; /* (c) */
/*
- * Global lock protecting non-pcbgroup hash lookup tables.
+ * Global lock protecting modification non-pcbgroup hash lookup tables.
*/
- struct rwlock ipi_hash_lock;
+ struct mtx ipi_hash_lock;
/*
* Global hash of inpcbs, hashed by local and foreign addresses and
@@ -508,6 +512,13 @@ struct inpcbinfo {
u_long ipi_wildmask; /* (p) */
/*
+ * Load balance groups used for the SO_REUSEPORT_LB option,
+ * hashed by local port.
+ */
+ struct inpcblbgrouphead *ipi_lbgrouphashbase; /* (h) */
+ u_long ipi_lbgrouphashmask; /* (h) */
+
+ /*
* Pointer to network stack instance
*/
struct vnet *ipi_vnet; /* (c) */
@@ -549,6 +560,27 @@ struct inpcbgroup {
struct mtx ipg_lock;
} __aligned(CACHE_LINE_SIZE);
+/*
+ * Load balance groups used for the SO_REUSEPORT_LB socket option. Each group
+ * (or unique address:port combination) can be re-used at most
+ * INPCBLBGROUP_SIZMAX (256) times. The inpcbs are stored in il_inp which
+ * is dynamically resized as processes bind/unbind to that specific group.
+ */
+struct inpcblbgroup {
+ CK_LIST_ENTRY(inpcblbgroup) il_list;
+ struct epoch_context il_epoch_ctx;
+ uint16_t il_lport; /* (c) */
+ u_char il_vflag; /* (c) */
+ u_char il_pad;
+ uint32_t il_pad2;
+ union in_dependaddr il_dependladdr; /* (c) */
+#define il_laddr il_dependladdr.id46_addr.ia46_addr4
+#define il6_laddr il_dependladdr.id6_addr
+ uint32_t il_inpsiz; /* max count in il_inp[] (h) */
+ uint32_t il_inpcnt; /* cur count in il_inp[] (h) */
+ struct inpcb *il_inp[]; /* (h) */
+};
+
#define INP_LOCK_INIT(inp, d, t) \
rw_init_flags(&(inp)->inp_lock, (t), RW_RECURSE | RW_DUPOK)
#define INP_LOCK_DESTROY(inp) rw_destroy(&(inp)->inp_lock)
@@ -593,25 +625,24 @@ struct tcpcb *
inp_inpcbtotcpcb(struct inpcb *inp);
void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
uint32_t *faddr, uint16_t *fp);
-short inp_so_options(const struct inpcb *inp);
+int inp_so_options(const struct inpcb *inp);
#endif /* _KERNEL */
#define INP_INFO_LOCK_INIT(ipi, d) \
- rw_init_flags(&(ipi)->ipi_lock, (d), RW_RECURSE)
-#define INP_INFO_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_lock)
-#define INP_INFO_RLOCK(ipi) rw_rlock(&(ipi)->ipi_lock)
-#define INP_INFO_WLOCK(ipi) rw_wlock(&(ipi)->ipi_lock)
-#define INP_INFO_TRY_RLOCK(ipi) rw_try_rlock(&(ipi)->ipi_lock)
-#define INP_INFO_TRY_WLOCK(ipi) rw_try_wlock(&(ipi)->ipi_lock)
-#define INP_INFO_TRY_UPGRADE(ipi) rw_try_upgrade(&(ipi)->ipi_lock)
-#define INP_INFO_WLOCKED(ipi) rw_wowned(&(ipi)->ipi_lock)
-#define INP_INFO_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_lock)
-#define INP_INFO_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_lock)
-#define INP_INFO_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_LOCKED)
-#define INP_INFO_RLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_RLOCKED)
-#define INP_INFO_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_WLOCKED)
-#define INP_INFO_UNLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED)
+ mtx_init(&(ipi)->ipi_lock, (d), NULL, MTX_DEF| MTX_RECURSE)
+#define INP_INFO_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_lock)
+#define INP_INFO_RLOCK_ET(ipi, et) NET_EPOCH_ENTER_ET((et))
+#define INP_INFO_WLOCK(ipi) mtx_lock(&(ipi)->ipi_lock)
+#define INP_INFO_TRY_WLOCK(ipi) mtx_trylock(&(ipi)->ipi_lock)
+#define INP_INFO_WLOCKED(ipi) mtx_owned(&(ipi)->ipi_lock)
+#define INP_INFO_RUNLOCK_ET(ipi, et) NET_EPOCH_EXIT_ET((et))
+#define INP_INFO_RUNLOCK_TP(ipi, tp) NET_EPOCH_EXIT_ET(*(tp)->t_inpcb->inp_et)
+#define INP_INFO_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_lock)
+#define INP_INFO_LOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ipi)->ipi_lock))
+#define INP_INFO_RLOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt))
+#define INP_INFO_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_lock, MA_OWNED)
+#define INP_INFO_UNLOCK_ASSERT(ipi) MPASS(!in_epoch(net_epoch_preempt) && !mtx_owned(&(ipi)->ipi_lock))
#define INP_LIST_LOCK_INIT(ipi, d) \
rw_init_flags(&(ipi)->ipi_list_lock, (d), 0)
@@ -632,17 +663,16 @@ short inp_so_options(const struct inpcb *inp);
#define INP_LIST_UNLOCK_ASSERT(ipi) \
rw_assert(&(ipi)->ipi_list_lock, RA_UNLOCKED)
-#define INP_HASH_LOCK_INIT(ipi, d) \
- rw_init_flags(&(ipi)->ipi_hash_lock, (d), 0)
-#define INP_HASH_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_hash_lock)
-#define INP_HASH_RLOCK(ipi) rw_rlock(&(ipi)->ipi_hash_lock)
-#define INP_HASH_WLOCK(ipi) rw_wlock(&(ipi)->ipi_hash_lock)
-#define INP_HASH_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_hash_lock)
-#define INP_HASH_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_hash_lock)
-#define INP_HASH_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \
- RA_LOCKED)
-#define INP_HASH_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \
- RA_WLOCKED)
+#define INP_HASH_LOCK_INIT(ipi, d) mtx_init(&(ipi)->ipi_hash_lock, (d), NULL, MTX_DEF)
+#define INP_HASH_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_hash_lock)
+#define INP_HASH_RLOCK(ipi) struct epoch_tracker inp_hash_et; epoch_enter_preempt(net_epoch_preempt, &inp_hash_et)
+#define INP_HASH_RLOCK_ET(ipi, et) epoch_enter_preempt(net_epoch_preempt, &(et))
+#define INP_HASH_WLOCK(ipi) mtx_lock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_RUNLOCK(ipi) NET_EPOCH_EXIT_ET(inp_hash_et)
+#define INP_HASH_RUNLOCK_ET(ipi, et) NET_EPOCH_EXIT_ET((et))
+#define INP_HASH_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_LOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ipi)->ipi_hash_lock))
+#define INP_HASH_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_hash_lock, MA_OWNED);
#define INP_GROUP_LOCK_INIT(ipg, d) mtx_init(&(ipg)->ipg_lock, (d), NULL, \
MTX_DEF | MTX_DUPOK)
@@ -656,6 +686,10 @@ short inp_so_options(const struct inpcb *inp);
(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
#define INP_PCBPORTHASH(lport, mask) \
(ntohs((lport)) & (mask))
+#define INP_PCBLBGROUP_PORTHASH(lport, mask) \
+ (ntohs((lport)) & (mask))
+#define INP_PCBLBGROUP_PKTHASH(faddr, lport, fport) \
+ ((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport)))
#define INP6_PCBHASHKEY(faddr) ((faddr)->s6_addr32[3])
/*
@@ -711,8 +745,8 @@ short inp_so_options(const struct inpcb *inp);
/*
* Flags for inp_flags2.
*/
-#define INP_LLE_VALID 0x00000001 /* cached lle is valid */
-#define INP_RT_VALID 0x00000002 /* cached rtentry is valid */
+#define INP_2UNUSED1 0x00000001
+#define INP_2UNUSED2 0x00000002
#define INP_PCBGROUPWILD 0x00000004 /* in pcbgroup wildcard list */
#define INP_REUSEPORT 0x00000008 /* SO_REUSEPORT option is set */
#define INP_FREED 0x00000010 /* inp itself is not valid */
@@ -724,6 +758,7 @@ short inp_so_options(const struct inpcb *inp);
#define INP_RATE_LIMIT_CHANGED 0x00000400 /* rate limit needs attention */
#define INP_ORIGDSTADDR 0x00000800 /* receive IP dst address/port */
#define INP_CANNOT_DO_ECN 0x00001000 /* The stack does not do ECN */
+#define INP_REUSEPORT_LB 0x00002000 /* SO_REUSEPORT_LB option is set */
/*
* Flags passed to in_pcblookup*() functions.
diff --git a/freebsd/sys/netinet/ip.h b/freebsd/sys/netinet/ip.h
index 6d902fe4..934bd812 100644
--- a/freebsd/sys/netinet/ip.h
+++ b/freebsd/sys/netinet/ip.h
@@ -94,6 +94,11 @@ struct ip {
#define IPTOS_PREC_ROUTINE IPTOS_DSCP_CS0
/*
+ * Offset of Diffserv decimal value to convert it to tos value .
+ */
+#define IPTOS_DSCP_OFFSET 2
+
+/*
* Definitions for DiffServ Codepoints as per RFC2474 and RFC5865.
*/
#define IPTOS_DSCP_CS0 0x00
diff --git a/freebsd/sys/netinet/ip6.h b/freebsd/sys/netinet/ip6.h
index a0dfcb0f..1f4be3fd 100644
--- a/freebsd/sys/netinet/ip6.h
+++ b/freebsd/sys/netinet/ip6.h
@@ -104,6 +104,7 @@ struct ip6_hdr {
#define IPV6_FLOWLABEL_MASK 0xffff0f00 /* flow label (20 bits) */
#endif /* LITTLE_ENDIAN */
#endif
+#define IPV6_FLOWLABEL_LEN 20
#if 1
/* ECN bits proposed by Sally Floyd */
#define IP6TOS_CE 0x01 /* congestion experienced */
diff --git a/freebsd/sys/netinet/ip_carp.c b/freebsd/sys/netinet/ip_carp.c
index 6f5160e0..8f7f6edf 100644
--- a/freebsd/sys/netinet/ip_carp.c
+++ b/freebsd/sys/netinet/ip_carp.c
@@ -189,36 +189,44 @@ static int proto_reg[] = {-1, -1};
*/
/* Accept incoming CARP packets. */
-static VNET_DEFINE(int, carp_allow) = 1;
+VNET_DEFINE_STATIC(int, carp_allow) = 1;
#define V_carp_allow VNET(carp_allow)
+/* Set DSCP in outgoing CARP packets. */
+VNET_DEFINE_STATIC(int, carp_dscp) = 56;
+#define V_carp_dscp VNET(carp_dscp)
+
/* Preempt slower nodes. */
-static VNET_DEFINE(int, carp_preempt) = 0;
+VNET_DEFINE_STATIC(int, carp_preempt) = 0;
#define V_carp_preempt VNET(carp_preempt)
/* Log level. */
-static VNET_DEFINE(int, carp_log) = 1;
+VNET_DEFINE_STATIC(int, carp_log) = 1;
#define V_carp_log VNET(carp_log)
/* Global advskew demotion. */
-static VNET_DEFINE(int, carp_demotion) = 0;
+VNET_DEFINE_STATIC(int, carp_demotion) = 0;
#define V_carp_demotion VNET(carp_demotion)
/* Send error demotion factor. */
-static VNET_DEFINE(int, carp_senderr_adj) = CARP_MAXSKEW;
+VNET_DEFINE_STATIC(int, carp_senderr_adj) = CARP_MAXSKEW;
#define V_carp_senderr_adj VNET(carp_senderr_adj)
/* Iface down demotion factor. */
-static VNET_DEFINE(int, carp_ifdown_adj) = CARP_MAXSKEW;
+VNET_DEFINE_STATIC(int, carp_ifdown_adj) = CARP_MAXSKEW;
#define V_carp_ifdown_adj VNET(carp_ifdown_adj)
static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS);
+static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS);
static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS);
SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP");
SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow,
CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, 0, 0, carp_allow_sysctl, "I",
"Accept incoming CARP packets");
+SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, 0, 0, carp_dscp_sysctl, "I",
+ "DSCP value for carp packets");
SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode");
SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW,
@@ -935,7 +943,7 @@ carp_send_ad_locked(struct carp_softc *sc)
ip = mtod(m, struct ip *);
ip->ip_v = IPVERSION;
ip->ip_hl = sizeof(*ip) >> 2;
- ip->ip_tos = IPTOS_LOWDELAY;
+ ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET;
ip->ip_len = htons(len);
ip->ip_off = htons(IP_DF);
ip->ip_ttl = CARP_DFLTTL;
@@ -985,6 +993,10 @@ carp_send_ad_locked(struct carp_softc *sc)
ip6 = mtod(m, struct ip6_hdr *);
bzero(ip6, sizeof(*ip6));
ip6->ip6_vfc |= IPV6_VERSION;
+ /* Traffic class isn't defined in ip6 struct instead
+ * it gets offset into flowid field */
+ ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN +
+ IPTOS_DSCP_OFFSET));
ip6->ip6_hlim = CARP_DFLTTL;
ip6->ip6_nxt = IPPROTO_CARP;
@@ -1413,6 +1425,7 @@ carp_multicast_setup(struct carp_if *cif, sa_family_t sa)
free(im6o->im6o_membership, M_CARP);
break;
}
+ in6m_acquire(in6m);
im6o->im6o_membership[0] = in6m;
im6o->im6o_num_memberships++;
@@ -1434,6 +1447,7 @@ carp_multicast_setup(struct carp_if *cif, sa_family_t sa)
free(im6o->im6o_membership, M_CARP);
break;
}
+ in6m_acquire(in6m);
im6o->im6o_membership[1] = in6m;
im6o->im6o_num_memberships++;
break;
@@ -2104,6 +2118,24 @@ carp_allow_sysctl(SYSCTL_HANDLER_ARGS)
}
static int
+carp_dscp_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ int new, error;
+
+ new = V_carp_dscp;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error || !req->newptr)
+ return (error);
+
+ if (new < 0 || new > 63)
+ return (EINVAL);
+
+ V_carp_dscp = new;
+
+ return (0);
+}
+
+static int
carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS)
{
int new, error;
diff --git a/freebsd/sys/netinet/ip_divert.c b/freebsd/sys/netinet/ip_divert.c
index 84f39023..fbf74ca1 100644
--- a/freebsd/sys/netinet/ip_divert.c
+++ b/freebsd/sys/netinet/ip_divert.c
@@ -113,8 +113,8 @@ __FBSDID("$FreeBSD$");
*/
/* Internal variables. */
-static VNET_DEFINE(struct inpcbhead, divcb);
-static VNET_DEFINE(struct inpcbinfo, divcbinfo);
+VNET_DEFINE_STATIC(struct inpcbhead, divcb);
+VNET_DEFINE_STATIC(struct inpcbinfo, divcbinfo);
#define V_divcb VNET(divcb)
#define V_divcbinfo VNET(divcbinfo)
@@ -194,6 +194,7 @@ divert_packet(struct mbuf *m, int incoming)
u_int16_t nport;
struct sockaddr_in divsrc;
struct m_tag *mtag;
+ struct epoch_tracker et;
mtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL);
if (mtag == NULL) {
@@ -274,8 +275,8 @@ divert_packet(struct mbuf *m, int incoming)
/* Put packet on socket queue, if any */
sa = NULL;
nport = htons((u_int16_t)(((struct ipfw_rule_ref *)(mtag+1))->info));
- INP_INFO_RLOCK(&V_divcbinfo);
- LIST_FOREACH(inp, &V_divcb, inp_list) {
+ INP_INFO_RLOCK_ET(&V_divcbinfo, et);
+ CK_LIST_FOREACH(inp, &V_divcb, inp_list) {
/* XXX why does only one socket match? */
if (inp->inp_lport == nport) {
INP_RLOCK(inp);
@@ -292,7 +293,7 @@ divert_packet(struct mbuf *m, int incoming)
break;
}
}
- INP_INFO_RUNLOCK(&V_divcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_divcbinfo, et);
if (sa == NULL) {
m_freem(m);
KMOD_IPSTAT_INC(ips_noproto);
@@ -554,7 +555,6 @@ div_detach(struct socket *so)
KASSERT(inp != NULL, ("div_detach: inp == NULL"));
INP_INFO_WLOCK(&V_divcbinfo);
INP_WLOCK(inp);
- /* XXX defer destruction to epoch_call */
in_pcbdetach(inp);
in_pcbfree(inp);
INP_INFO_WUNLOCK(&V_divcbinfo);
@@ -634,10 +634,10 @@ static int
div_pcblist(SYSCTL_HANDLER_ARGS)
{
int error, i, n;
- struct in_pcblist *il;
struct inpcb *inp, **inp_list;
inp_gen_t gencnt;
struct xinpgen xig;
+ struct epoch_tracker et;
/*
* The process of preparing the TCB list is too time-consuming and
@@ -656,10 +656,10 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
/*
* OK, now we're committed to doing something.
*/
- INP_INFO_RLOCK(&V_divcbinfo);
+ INP_INFO_WLOCK(&V_divcbinfo);
gencnt = V_divcbinfo.ipi_gencnt;
n = V_divcbinfo.ipi_count;
- INP_INFO_RUNLOCK(&V_divcbinfo);
+ INP_INFO_WUNLOCK(&V_divcbinfo);
error = sysctl_wire_old_buffer(req,
2 * sizeof(xig) + n*sizeof(struct xinpcb));
@@ -674,12 +674,13 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
if (error)
return error;
- il = malloc(sizeof(struct in_pcblist) + n * sizeof(struct inpcb *), M_TEMP, M_WAITOK|M_ZERO_INVARIANTS);
- inp_list = il->il_inp_list;
+ inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
+ if (inp_list == NULL)
+ return ENOMEM;
- INP_INFO_RLOCK(&V_divcbinfo);
- for (inp = LIST_FIRST(V_divcbinfo.ipi_listhead), i = 0; inp && i < n;
- inp = LIST_NEXT(inp, inp_list)) {
+ INP_INFO_RLOCK_ET(&V_divcbinfo, et);
+ for (inp = CK_LIST_FIRST(V_divcbinfo.ipi_listhead), i = 0; inp && i < n;
+ inp = CK_LIST_NEXT(inp, inp_list)) {
INP_WLOCK(inp);
if (inp->inp_gencnt <= gencnt &&
cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
@@ -688,7 +689,7 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
}
INP_WUNLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_divcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_divcbinfo, et);
n = i;
error = 0;
@@ -704,11 +705,17 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
} else
INP_RUNLOCK(inp);
}
- il->il_count = n;
- il->il_pcbinfo = &V_divcbinfo;
- epoch_call(net_epoch_preempt, &il->il_epoch_ctx, in_pcblist_rele_rlocked);
+ INP_INFO_WLOCK(&V_divcbinfo);
+ for (i = 0; i < n; i++) {
+ inp = inp_list[i];
+ INP_RLOCK(inp);
+ if (!in_pcbrele_rlocked(inp))
+ INP_RUNLOCK(inp);
+ }
+ INP_INFO_WUNLOCK(&V_divcbinfo);
if (!error) {
+ struct epoch_tracker et;
/*
* Give the user an updated idea of our state.
* If the generation differs from what we told
@@ -716,13 +723,14 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
* while we were processing this request, and it
* might be necessary to retry.
*/
- INP_INFO_RLOCK(&V_divcbinfo);
+ INP_INFO_RLOCK_ET(&V_divcbinfo, et);
xig.xig_gen = V_divcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_divcbinfo.ipi_count;
- INP_INFO_RUNLOCK(&V_divcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_divcbinfo, et);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
+ free(inp_list, M_TEMP);
return error;
}
@@ -802,7 +810,6 @@ div_modevent(module_t mod, int type, void *unused)
break;
}
ip_divert_ptr = NULL;
- /* XXX defer to epoch_call ? */
err = pf_proto_unregister(PF_INET, IPPROTO_DIVERT, SOCK_RAW);
INP_INFO_WUNLOCK(&V_divcbinfo);
#ifndef VIMAGE
diff --git a/freebsd/sys/netinet/ip_encap.c b/freebsd/sys/netinet/ip_encap.c
index 52cd0b40..1e794f73 100644
--- a/freebsd/sys/netinet/ip_encap.c
+++ b/freebsd/sys/netinet/ip_encap.c
@@ -6,6 +6,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -58,417 +59,214 @@
* So, clearly good old protosw does not work for protocol #4 and #41.
* The code will let you match protocol via src/dst address pair.
*/
-/* XXX is M_NETADDR correct? */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <rtems/bsd/local/opt_mrouting.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/kernel.h>
#include <sys/lock.h>
+#include <sys/malloc.h>
#include <sys/mutex.h>
-#include <sys/socket.h>
-#include <sys/sockio.h>
#include <sys/mbuf.h>
#include <sys/errno.h>
-#include <sys/protosw.h>
-#include <sys/queue.h>
+#include <sys/socket.h>
#include <net/if.h>
-#include <net/route.h>
+#include <net/if_var.h>
#include <netinet/in.h>
-#include <netinet/in_systm.h>
-#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/ip_encap.h>
#ifdef INET6
-#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#endif
-#include <machine/stdarg.h>
+static MALLOC_DEFINE(M_NETADDR, "encap_export_host",
+ "Export host address structure");
-#include <sys/kernel.h>
-#include <sys/malloc.h>
-static MALLOC_DEFINE(M_NETADDR, "encap_export_host", "Export host address structure");
+struct encaptab {
+ CK_LIST_ENTRY(encaptab) chain;
+ int proto;
+ int min_length;
+ int exact_match;
+ void *arg;
-static void encap_add(struct encaptab *);
-static int mask_match(const struct encaptab *, const struct sockaddr *,
- const struct sockaddr *);
-static void encap_fillarg(struct mbuf *, void *);
+ encap_lookup_t lookup;
+ encap_check_t check;
+ encap_input_t input;
+};
+
+CK_LIST_HEAD(encaptab_head, encaptab);
+#ifdef INET
+static struct encaptab_head ipv4_encaptab = CK_LIST_HEAD_INITIALIZER();
+#endif
+#ifdef INET6
+static struct encaptab_head ipv6_encaptab = CK_LIST_HEAD_INITIALIZER();
+#endif
-/*
- * All global variables in ip_encap.c are locked using encapmtx.
- */
static struct mtx encapmtx;
MTX_SYSINIT(encapmtx, &encapmtx, "encapmtx", MTX_DEF);
-static LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(encaptab);
-
-#ifdef INET
-int
-encap4_input(struct mbuf **mp, int *offp, int proto)
+#define ENCAP_WLOCK() mtx_lock(&encapmtx)
+#define ENCAP_WUNLOCK() mtx_unlock(&encapmtx)
+#define ENCAP_RLOCK() struct epoch_tracker encap_et; epoch_enter_preempt(net_epoch_preempt, &encap_et)
+#define ENCAP_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &encap_et)
+#define ENCAP_WAIT() epoch_wait_preempt(net_epoch_preempt)
+
+static struct encaptab *
+encap_attach(struct encaptab_head *head, const struct encap_config *cfg,
+ void *arg, int mflags)
{
- struct ip *ip;
- struct mbuf *m;
- struct sockaddr_in s, d;
- const struct protosw *psw;
- struct encaptab *ep, *match;
- void *arg;
- int matchprio, off, prio;
-
- m = *mp;
- off = *offp;
- ip = mtod(m, struct ip *);
-
- bzero(&s, sizeof(s));
- s.sin_family = AF_INET;
- s.sin_len = sizeof(struct sockaddr_in);
- s.sin_addr = ip->ip_src;
- bzero(&d, sizeof(d));
- d.sin_family = AF_INET;
- d.sin_len = sizeof(struct sockaddr_in);
- d.sin_addr = ip->ip_dst;
-
- arg = NULL;
- psw = NULL;
- match = NULL;
- matchprio = 0;
- mtx_lock(&encapmtx);
- LIST_FOREACH(ep, &encaptab, chain) {
- if (ep->af != AF_INET)
- continue;
- if (ep->proto >= 0 && ep->proto != proto)
- continue;
- if (ep->func)
- prio = (*ep->func)(m, off, proto, ep->arg);
- else {
- /*
- * it's inbound traffic, we need to match in reverse
- * order
- */
- prio = mask_match(ep, (struct sockaddr *)&d,
- (struct sockaddr *)&s);
- }
+ struct encaptab *ep, *tmp;
- /*
- * We prioritize the matches by using bit length of the
- * matches. mask_match() and user-supplied matching function
- * should return the bit length of the matches (for example,
- * if both src/dst are matched for IPv4, 64 should be returned).
- * 0 or negative return value means "it did not match".
- *
- * The question is, since we have two "mask" portion, we
- * cannot really define total order between entries.
- * For example, which of these should be preferred?
- * mask_match() returns 48 (32 + 16) for both of them.
- * src=3ffe::/16, dst=3ffe:501::/32
- * src=3ffe:501::/32, dst=3ffe::/16
- *
- * We need to loop through all the possible candidates
- * to get the best match - the search takes O(n) for
- * n attachments (i.e. interfaces).
- */
- if (prio <= 0)
- continue;
- if (prio > matchprio) {
- matchprio = prio;
- match = ep;
- }
- }
- if (match != NULL) {
- psw = match->psw;
- arg = match->arg;
- }
- mtx_unlock(&encapmtx);
+ if (cfg == NULL || cfg->input == NULL ||
+ (cfg->check == NULL && cfg->lookup == NULL) ||
+ (cfg->lookup != NULL && cfg->exact_match != ENCAP_DRV_LOOKUP) ||
+ (cfg->exact_match == ENCAP_DRV_LOOKUP && cfg->lookup == NULL))
+ return (NULL);
- if (match != NULL) {
- /* found a match, "match" has the best one */
- if (psw != NULL && psw->pr_input != NULL) {
- encap_fillarg(m, arg);
- (*psw->pr_input)(mp, offp, proto);
- } else
- m_freem(m);
- return (IPPROTO_DONE);
+ ep = malloc(sizeof(*ep), M_NETADDR, mflags);
+ if (ep == NULL)
+ return (NULL);
+
+ ep->proto = cfg->proto;
+ ep->min_length = cfg->min_length;
+ ep->exact_match = cfg->exact_match;
+ ep->arg = arg;
+ ep->lookup = cfg->exact_match == ENCAP_DRV_LOOKUP ? cfg->lookup: NULL;
+ ep->check = cfg->exact_match != ENCAP_DRV_LOOKUP ? cfg->check: NULL;
+ ep->input = cfg->input;
+
+ ENCAP_WLOCK();
+ CK_LIST_FOREACH(tmp, head, chain) {
+ if (tmp->exact_match <= ep->exact_match)
+ break;
}
+ if (tmp == NULL)
+ CK_LIST_INSERT_HEAD(head, ep, chain);
+ else
+ CK_LIST_INSERT_BEFORE(tmp, ep, chain);
+ ENCAP_WUNLOCK();
+ return (ep);
+}
+
+static int
+encap_detach(struct encaptab_head *head, const struct encaptab *cookie)
+{
+ struct encaptab *ep;
- /* last resort: inject to raw socket */
- return (rip_input(mp, offp, proto));
+ ENCAP_WLOCK();
+ CK_LIST_FOREACH(ep, head, chain) {
+ if (ep == cookie) {
+ CK_LIST_REMOVE(ep, chain);
+ ENCAP_WUNLOCK();
+ ENCAP_WAIT();
+ free(ep, M_NETADDR);
+ return (0);
+ }
+ }
+ ENCAP_WUNLOCK();
+ return (EINVAL);
}
-#endif
-#ifdef INET6
-int
-encap6_input(struct mbuf **mp, int *offp, int proto)
+static int
+encap_input(struct encaptab_head *head, struct mbuf *m, int off, int proto)
{
- struct mbuf *m = *mp;
- struct ip6_hdr *ip6;
- struct sockaddr_in6 s, d;
- const struct protosw *psw;
struct encaptab *ep, *match;
void *arg;
- int prio, matchprio;
-
- ip6 = mtod(m, struct ip6_hdr *);
+ int matchprio, ret;
- bzero(&s, sizeof(s));
- s.sin6_family = AF_INET6;
- s.sin6_len = sizeof(struct sockaddr_in6);
- s.sin6_addr = ip6->ip6_src;
- bzero(&d, sizeof(d));
- d.sin6_family = AF_INET6;
- d.sin6_len = sizeof(struct sockaddr_in6);
- d.sin6_addr = ip6->ip6_dst;
-
- arg = NULL;
- psw = NULL;
match = NULL;
matchprio = 0;
- mtx_lock(&encapmtx);
- LIST_FOREACH(ep, &encaptab, chain) {
- if (ep->af != AF_INET6)
- continue;
+
+ ENCAP_RLOCK();
+ CK_LIST_FOREACH(ep, head, chain) {
if (ep->proto >= 0 && ep->proto != proto)
continue;
- if (ep->func)
- prio = (*ep->func)(m, *offp, proto, ep->arg);
- else {
- /*
- * it's inbound traffic, we need to match in reverse
- * order
- */
- prio = mask_match(ep, (struct sockaddr *)&d,
- (struct sockaddr *)&s);
- }
-
- /* see encap4_input() for issues here */
- if (prio <= 0)
+ if (ep->min_length > m->m_pkthdr.len)
continue;
- if (prio > matchprio) {
- matchprio = prio;
+ if (ep->exact_match == ENCAP_DRV_LOOKUP)
+ ret = (*ep->lookup)(m, off, proto, &arg);
+ else
+ ret = (*ep->check)(m, off, proto, ep->arg);
+ if (ret <= 0)
+ continue;
+ if (ret > matchprio) {
match = ep;
+ if (ep->exact_match != ENCAP_DRV_LOOKUP)
+ arg = ep->arg;
+ /*
+ * No need to continue the search, we got the
+ * exact match.
+ */
+ if (ret >= ep->exact_match)
+ break;
+ matchprio = ret;
}
}
- if (match != NULL) {
- psw = match->psw;
- arg = match->arg;
- }
- mtx_unlock(&encapmtx);
if (match != NULL) {
- /* found a match */
- if (psw != NULL && psw->pr_input != NULL) {
- encap_fillarg(m, arg);
- return (*psw->pr_input)(mp, offp, proto);
- } else {
- m_freem(m);
- return (IPPROTO_DONE);
- }
+ /* found a match, "match" has the best one */
+ ret = (*match->input)(m, off, proto, arg);
+ ENCAP_RUNLOCK();
+ MPASS(ret == IPPROTO_DONE);
+ return (IPPROTO_DONE);
}
-
- /* last resort: inject to raw socket */
- return rip6_input(mp, offp, proto);
-}
-#endif
-
-/*lint -sem(encap_add, custodial(1)) */
-static void
-encap_add(struct encaptab *ep)
-{
-
- mtx_assert(&encapmtx, MA_OWNED);
- LIST_INSERT_HEAD(&encaptab, ep, chain);
+ ENCAP_RUNLOCK();
+ return (0);
}
-/*
- * sp (src ptr) is always my side, and dp (dst ptr) is always remote side.
- * length of mask (sm and dm) is assumed to be same as sp/dp.
- * Return value will be necessary as input (cookie) for encap_detach().
- */
+#ifdef INET
const struct encaptab *
-encap_attach(int af, int proto, const struct sockaddr *sp,
- const struct sockaddr *sm, const struct sockaddr *dp,
- const struct sockaddr *dm, const struct protosw *psw, void *arg)
+ip_encap_attach(const struct encap_config *cfg, void *arg, int mflags)
{
- struct encaptab *ep;
-
- /* sanity check on args */
- if (sp->sa_len > sizeof(ep->src) || dp->sa_len > sizeof(ep->dst))
- return (NULL);
- if (sp->sa_len != dp->sa_len)
- return (NULL);
- if (af != sp->sa_family || af != dp->sa_family)
- return (NULL);
- /* check if anyone have already attached with exactly same config */
- mtx_lock(&encapmtx);
- LIST_FOREACH(ep, &encaptab, chain) {
- if (ep->af != af)
- continue;
- if (ep->proto != proto)
- continue;
- if (ep->src.ss_len != sp->sa_len ||
- bcmp(&ep->src, sp, sp->sa_len) != 0 ||
- bcmp(&ep->srcmask, sm, sp->sa_len) != 0)
- continue;
- if (ep->dst.ss_len != dp->sa_len ||
- bcmp(&ep->dst, dp, dp->sa_len) != 0 ||
- bcmp(&ep->dstmask, dm, dp->sa_len) != 0)
- continue;
-
- mtx_unlock(&encapmtx);
- return (NULL);
- }
-
- ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT); /*XXX*/
- if (ep == NULL) {
- mtx_unlock(&encapmtx);
- return (NULL);
- }
- bzero(ep, sizeof(*ep));
-
- ep->af = af;
- ep->proto = proto;
- bcopy(sp, &ep->src, sp->sa_len);
- bcopy(sm, &ep->srcmask, sp->sa_len);
- bcopy(dp, &ep->dst, dp->sa_len);
- bcopy(dm, &ep->dstmask, dp->sa_len);
- ep->psw = psw;
- ep->arg = arg;
-
- encap_add(ep);
- mtx_unlock(&encapmtx);
- return (ep);
+ return (encap_attach(&ipv4_encaptab, cfg, arg, mflags));
}
-const struct encaptab *
-encap_attach_func(int af, int proto,
- int (*func)(const struct mbuf *, int, int, void *),
- const struct protosw *psw, void *arg)
+int
+ip_encap_detach(const struct encaptab *cookie)
{
- struct encaptab *ep;
- /* sanity check on args */
- if (!func)
- return (NULL);
-
- ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT); /*XXX*/
- if (ep == NULL)
- return (NULL);
- bzero(ep, sizeof(*ep));
-
- ep->af = af;
- ep->proto = proto;
- ep->func = func;
- ep->psw = psw;
- ep->arg = arg;
-
- mtx_lock(&encapmtx);
- encap_add(ep);
- mtx_unlock(&encapmtx);
- return (ep);
+ return (encap_detach(&ipv4_encaptab, cookie));
}
int
-encap_detach(const struct encaptab *cookie)
+encap4_input(struct mbuf **mp, int *offp, int proto)
{
- const struct encaptab *ep = cookie;
- struct encaptab *p;
-
- mtx_lock(&encapmtx);
- LIST_FOREACH(p, &encaptab, chain) {
- if (p == ep) {
- LIST_REMOVE(p, chain);
- mtx_unlock(&encapmtx);
- free(p, M_NETADDR); /*XXX*/
- return 0;
- }
- }
- mtx_unlock(&encapmtx);
- return EINVAL;
+ if (encap_input(&ipv4_encaptab, *mp, *offp, proto) != IPPROTO_DONE)
+ return (rip_input(mp, offp, proto));
+ return (IPPROTO_DONE);
}
+#endif /* INET */
-static int
-mask_match(const struct encaptab *ep, const struct sockaddr *sp,
- const struct sockaddr *dp)
+#ifdef INET6
+const struct encaptab *
+ip6_encap_attach(const struct encap_config *cfg, void *arg, int mflags)
{
- struct sockaddr_storage s;
- struct sockaddr_storage d;
- int i;
- const u_int8_t *p, *q;
- u_int8_t *r;
- int matchlen;
-
- if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d))
- return 0;
- if (sp->sa_family != ep->af || dp->sa_family != ep->af)
- return 0;
- if (sp->sa_len != ep->src.ss_len || dp->sa_len != ep->dst.ss_len)
- return 0;
-
- matchlen = 0;
-
- p = (const u_int8_t *)sp;
- q = (const u_int8_t *)&ep->srcmask;
- r = (u_int8_t *)&s;
- for (i = 0 ; i < sp->sa_len; i++) {
- r[i] = p[i] & q[i];
- /* XXX estimate */
- matchlen += (q[i] ? 8 : 0);
- }
- p = (const u_int8_t *)dp;
- q = (const u_int8_t *)&ep->dstmask;
- r = (u_int8_t *)&d;
- for (i = 0 ; i < dp->sa_len; i++) {
- r[i] = p[i] & q[i];
- /* XXX rough estimate */
- matchlen += (q[i] ? 8 : 0);
- }
-
- /* need to overwrite len/family portion as we don't compare them */
- s.ss_len = sp->sa_len;
- s.ss_family = sp->sa_family;
- d.ss_len = dp->sa_len;
- d.ss_family = dp->sa_family;
-
- if (bcmp(&s, &ep->src, ep->src.ss_len) == 0 &&
- bcmp(&d, &ep->dst, ep->dst.ss_len) == 0) {
- return matchlen;
- } else
- return 0;
+ return (encap_attach(&ipv6_encaptab, cfg, arg, mflags));
}
-static void
-encap_fillarg(struct mbuf *m, void *arg)
+int
+ip6_encap_detach(const struct encaptab *cookie)
{
- struct m_tag *tag;
- if (arg != NULL) {
- tag = m_tag_get(PACKET_TAG_ENCAP, sizeof(void *), M_NOWAIT);
- if (tag != NULL) {
- *(void**)(tag+1) = arg;
- m_tag_prepend(m, tag);
- }
- }
+ return (encap_detach(&ipv6_encaptab, cookie));
}
-void *
-encap_getarg(struct mbuf *m)
+int
+encap6_input(struct mbuf **mp, int *offp, int proto)
{
- void *p = NULL;
- struct m_tag *tag;
- tag = m_tag_find(m, PACKET_TAG_ENCAP, NULL);
- if (tag) {
- p = *(void**)(tag+1);
- m_tag_delete(m, tag);
- }
- return p;
+ if (encap_input(&ipv6_encaptab, *mp, *offp, proto) != IPPROTO_DONE)
+ return (rip6_input(mp, offp, proto));
+ return (IPPROTO_DONE);
}
+#endif /* INET6 */
diff --git a/freebsd/sys/netinet/ip_encap.h b/freebsd/sys/netinet/ip_encap.h
index ef232189..f3d1d3af 100644
--- a/freebsd/sys/netinet/ip_encap.h
+++ b/freebsd/sys/netinet/ip_encap.h
@@ -5,6 +5,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -37,29 +38,33 @@
#ifdef _KERNEL
-struct encaptab {
- LIST_ENTRY(encaptab) chain;
- int af;
- int proto; /* -1: don't care, I'll check myself */
- struct sockaddr_storage src; /* my addr */
- struct sockaddr_storage srcmask;
- struct sockaddr_storage dst; /* remote addr */
- struct sockaddr_storage dstmask;
- int (*func)(const struct mbuf *, int, int, void *);
- const struct protosw *psw; /* only pr_input will be used */
- void *arg; /* passed via m->m_pkthdr.aux */
-};
-
int encap4_input(struct mbuf **, int *, int);
int encap6_input(struct mbuf **, int *, int);
-const struct encaptab *encap_attach(int, int, const struct sockaddr *,
- const struct sockaddr *, const struct sockaddr *,
- const struct sockaddr *, const struct protosw *, void *);
-const struct encaptab *encap_attach_func(int, int,
- int (*)(const struct mbuf *, int, int, void *),
- const struct protosw *, void *);
-int encap_detach(const struct encaptab *);
-void *encap_getarg(struct mbuf *);
+
+typedef int (*encap_lookup_t)(const struct mbuf *, int, int, void **);
+typedef int (*encap_check_t)(const struct mbuf *, int, int, void *);
+typedef int (*encap_input_t)(struct mbuf *, int , int, void *);
+
+struct encap_config {
+ int proto; /* protocol */
+ int min_length; /* minimum packet length */
+ int exact_match; /* a packet is exactly matched */
+#define ENCAP_DRV_LOOKUP 0x7fffffff
+
+ encap_lookup_t lookup;
+ encap_check_t check;
+ encap_input_t input;
+};
+
+struct encaptab;
+
+const struct encaptab *ip_encap_attach(const struct encap_config *,
+ void *arg, int mflags);
+const struct encaptab *ip6_encap_attach(const struct encap_config *,
+ void *arg, int mflags);
+
+int ip_encap_detach(const struct encaptab *);
+int ip6_encap_detach(const struct encaptab *);
#endif
#endif /*_NETINET_IP_ENCAP_H_*/
diff --git a/freebsd/sys/netinet/ip_fastfwd.c b/freebsd/sys/netinet/ip_fastfwd.c
index b084fdc6..05deb4d8 100644
--- a/freebsd/sys/netinet/ip_fastfwd.c
+++ b/freebsd/sys/netinet/ip_fastfwd.c
@@ -155,7 +155,7 @@ ip_tryforward(struct mbuf *m)
struct mbuf *m0 = NULL;
struct nhop4_basic nh;
struct sockaddr_in dst;
- struct in_addr odest, dest;
+ struct in_addr dest, odest, rtdest;
uint16_t ip_len, ip_off;
int error = 0;
struct m_tag *fwd_tag = NULL;
@@ -296,12 +296,31 @@ passin:
#endif
/*
+ * Next hop forced by pfil(9) hook?
+ */
+ if ((m->m_flags & M_IP_NEXTHOP) &&
+ ((fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL)) {
+ /*
+ * Now we will find route to forced destination.
+ */
+ dest.s_addr = ((struct sockaddr_in *)
+ (fwd_tag + 1))->sin_addr.s_addr;
+ m_tag_delete(m, fwd_tag);
+ m->m_flags &= ~M_IP_NEXTHOP;
+ }
+
+ /*
* Find route to destination.
*/
if (ip_findroute(&nh, dest, m) != 0)
return (NULL); /* icmp unreach already sent */
/*
+ * Avoid second route lookup by caching destination.
+ */
+ rtdest.s_addr = dest.s_addr;
+
+ /*
* Step 5: outgoing firewall packet processing
*/
if (!PFIL_HOOKED(&V_inet_pfil_hook))
@@ -323,6 +342,8 @@ passin:
*/
if (m->m_flags & M_IP_NEXTHOP)
fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
+ else
+ fwd_tag = NULL;
if (odest.s_addr != dest.s_addr || fwd_tag != NULL) {
/*
* Is it now for a local address on this host?
@@ -344,7 +365,8 @@ forwardlocal:
m_tag_delete(m, fwd_tag);
m->m_flags &= ~M_IP_NEXTHOP;
}
- if (ip_findroute(&nh, dest, m) != 0)
+ if (dest.s_addr != rtdest.s_addr &&
+ ip_findroute(&nh, dest, m) != 0)
return (NULL); /* icmp unreach already sent */
}
diff --git a/freebsd/sys/netinet/ip_fw.h b/freebsd/sys/netinet/ip_fw.h
index 286eb03f..a7bf5b4d 100644
--- a/freebsd/sys/netinet/ip_fw.h
+++ b/freebsd/sys/netinet/ip_fw.h
@@ -285,6 +285,8 @@ enum ipfw_opcodes { /* arguments (4 byte each) */
O_EXTERNAL_INSTANCE, /* arg1=id of eaction handler instance */
O_EXTERNAL_DATA, /* variable length data */
+ O_SKIP_ACTION, /* none */
+
O_LAST_OPCODE /* not an opcode! */
};
diff --git a/freebsd/sys/netinet/ip_gre.c b/freebsd/sys/netinet/ip_gre.c
index 673e23d5..65ab0ab9 100644
--- a/freebsd/sys/netinet/ip_gre.c
+++ b/freebsd/sys/netinet/ip_gre.c
@@ -4,7 +4,7 @@
* SPDX-License-Identifier: BSD-2-Clause-NetBSD
*
* Copyright (c) 1998 The NetBSD Foundation, Inc.
- * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
+ * Copyright (c) 2014, 2018 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -43,18 +43,17 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet6.h>
#include <sys/param.h>
+#include <sys/jail.h>
#include <sys/systm.h>
-#include <sys/mbuf.h>
#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/protosw.h>
+#include <sys/sockio.h>
+#include <sys/mbuf.h>
#include <sys/errno.h>
-#include <sys/time.h>
#include <sys/kernel.h>
-#include <sys/lock.h>
-#include <sys/rmlock.h>
#include <sys/sysctl.h>
-#include <net/ethernet.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+
#include <net/if.h>
#include <net/if_var.h>
#include <net/vnet.h>
@@ -71,61 +70,177 @@ __FBSDID("$FreeBSD$");
#include <net/if_gre.h>
-extern struct domain inetdomain;
-static const struct protosw in_gre_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_GRE,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = gre_input,
- .pr_output = rip_output,
- .pr_ctlinput = rip_ctlinput,
- .pr_ctloutput = rip_ctloutput,
- .pr_usrreqs = &rip_usrreqs
-};
-
#define GRE_TTL 30
VNET_DEFINE(int, ip_gre_ttl) = GRE_TTL;
#define V_ip_gre_ttl VNET(ip_gre_ttl)
SYSCTL_INT(_net_inet_ip, OID_AUTO, grettl, CTLFLAG_VNET | CTLFLAG_RW,
- &VNET_NAME(ip_gre_ttl), 0, "");
+ &VNET_NAME(ip_gre_ttl), 0, "Default TTL value for encapsulated packets");
+
+VNET_DEFINE_STATIC(struct gre_list *, ipv4_hashtbl) = NULL;
+#define V_ipv4_hashtbl VNET(ipv4_hashtbl)
+#define GRE_HASH(src, dst) (V_ipv4_hashtbl[\
+ in_gre_hashval((src), (dst)) & (GRE_HASH_SIZE - 1)])
+#define GRE_HASH_SC(sc) GRE_HASH((sc)->gre_oip.ip_src.s_addr,\
+ (sc)->gre_oip.ip_dst.s_addr)
+
+static uint32_t
+in_gre_hashval(in_addr_t src, in_addr_t dst)
+{
+ uint32_t ret;
+
+ ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT);
+ return (fnv_32_buf(&dst, sizeof(dst), ret));
+}
+
+static int
+in_gre_checkdup(const struct gre_softc *sc, in_addr_t src, in_addr_t dst)
+{
+ struct gre_softc *tmp;
+
+ if (sc->gre_family == AF_INET &&
+ sc->gre_oip.ip_src.s_addr == src &&
+ sc->gre_oip.ip_dst.s_addr == dst)
+ return (EEXIST);
+
+ CK_LIST_FOREACH(tmp, &GRE_HASH(src, dst), chain) {
+ if (tmp == sc)
+ continue;
+ if (tmp->gre_oip.ip_src.s_addr == src &&
+ tmp->gre_oip.ip_dst.s_addr == dst)
+ return (EADDRNOTAVAIL);
+ }
+ return (0);
+}
static int
-in_gre_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+in_gre_lookup(const struct mbuf *m, int off, int proto, void **arg)
{
- GRE_RLOCK_TRACKER;
+ const struct ip *ip;
struct gre_softc *sc;
- struct ip *ip;
- sc = (struct gre_softc *)arg;
- if ((GRE2IFP(sc)->if_flags & IFF_UP) == 0)
+ if (V_ipv4_hashtbl == NULL)
return (0);
- M_ASSERTPKTHDR(m);
- /*
- * We expect that payload contains at least IPv4
- * or IPv6 packet.
- */
- if (m->m_pkthdr.len < sizeof(struct greip) + sizeof(struct ip))
- return (0);
+ MPASS(in_epoch(net_epoch_preempt));
+ ip = mtod(m, const struct ip *);
+ CK_LIST_FOREACH(sc, &GRE_HASH(ip->ip_dst.s_addr,
+ ip->ip_src.s_addr), chain) {
+ /*
+ * This is an inbound packet, its ip_dst is source address
+ * in softc.
+ */
+ if (sc->gre_oip.ip_src.s_addr == ip->ip_dst.s_addr &&
+ sc->gre_oip.ip_dst.s_addr == ip->ip_src.s_addr) {
+ if ((GRE2IFP(sc)->if_flags & IFF_UP) == 0)
+ return (0);
+ *arg = sc;
+ return (ENCAP_DRV_LOOKUP);
+ }
+ }
+ return (0);
+}
- GRE_RLOCK(sc);
- if (sc->gre_family == 0)
- goto bad;
+static void
+in_gre_attach(struct gre_softc *sc)
+{
- KASSERT(sc->gre_family == AF_INET,
- ("wrong gre_family: %d", sc->gre_family));
+ sc->gre_hlen = sizeof(struct greip);
+ sc->gre_oip.ip_v = IPVERSION;
+ sc->gre_oip.ip_hl = sizeof(struct ip) >> 2;
+ sc->gre_oip.ip_p = IPPROTO_GRE;
+ gre_updatehdr(sc, &sc->gre_gihdr->gi_gre);
+ CK_LIST_INSERT_HEAD(&GRE_HASH_SC(sc), sc, chain);
+}
- ip = mtod(m, struct ip *);
- if (sc->gre_oip.ip_src.s_addr != ip->ip_dst.s_addr ||
- sc->gre_oip.ip_dst.s_addr != ip->ip_src.s_addr)
- goto bad;
+void
+in_gre_setopts(struct gre_softc *sc, u_long cmd, uint32_t value)
+{
- GRE_RUNLOCK(sc);
- return (32 * 2);
-bad:
- GRE_RUNLOCK(sc);
- return (0);
+ MPASS(cmd == GRESKEY || cmd == GRESOPTS);
+
+ /* NOTE: we are protected with gre_ioctl_sx lock */
+ MPASS(sc->gre_family == AF_INET);
+ CK_LIST_REMOVE(sc, chain);
+ GRE_WAIT();
+ if (cmd == GRESKEY)
+ sc->gre_key = value;
+ else
+ sc->gre_options = value;
+ in_gre_attach(sc);
+}
+
+int
+in_gre_ioctl(struct gre_softc *sc, u_long cmd, caddr_t data)
+{
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct sockaddr_in *dst, *src;
+ struct ip *ip;
+ int error;
+
+ /* NOTE: we are protected with gre_ioctl_sx lock */
+ error = EINVAL;
+ switch (cmd) {
+ case SIOCSIFPHYADDR:
+ src = &((struct in_aliasreq *)data)->ifra_addr;
+ dst = &((struct in_aliasreq *)data)->ifra_dstaddr;
+
+ /* sanity checks */
+ if (src->sin_family != dst->sin_family ||
+ src->sin_family != AF_INET ||
+ src->sin_len != dst->sin_len ||
+ src->sin_len != sizeof(*src))
+ break;
+ if (src->sin_addr.s_addr == INADDR_ANY ||
+ dst->sin_addr.s_addr == INADDR_ANY) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ if (V_ipv4_hashtbl == NULL)
+ V_ipv4_hashtbl = gre_hashinit();
+ error = in_gre_checkdup(sc, src->sin_addr.s_addr,
+ dst->sin_addr.s_addr);
+ if (error == EADDRNOTAVAIL)
+ break;
+ if (error == EEXIST) {
+ /* Addresses are the same. Just return. */
+ error = 0;
+ break;
+ }
+ ip = malloc(sizeof(struct greip) + 3 * sizeof(uint32_t),
+ M_GRE, M_WAITOK | M_ZERO);
+ ip->ip_src.s_addr = src->sin_addr.s_addr;
+ ip->ip_dst.s_addr = dst->sin_addr.s_addr;
+ if (sc->gre_family != 0) {
+ /* Detach existing tunnel first */
+ CK_LIST_REMOVE(sc, chain);
+ GRE_WAIT();
+ free(sc->gre_hdr, M_GRE);
+ /* XXX: should we notify about link state change? */
+ }
+ sc->gre_family = AF_INET;
+ sc->gre_hdr = ip;
+ sc->gre_oseq = 0;
+ sc->gre_iseq = UINT32_MAX;
+ in_gre_attach(sc);
+ break;
+ case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
+ if (sc->gre_family != AF_INET) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ src = (struct sockaddr_in *)&ifr->ifr_addr;
+ memset(src, 0, sizeof(*src));
+ src->sin_family = AF_INET;
+ src->sin_len = sizeof(*src);
+ src->sin_addr = (cmd == SIOCGIFPSRCADDR) ?
+ sc->gre_oip.ip_src: sc->gre_oip.ip_dst;
+ error = prison_if(curthread->td_ucred, (struct sockaddr *)src);
+ if (error != 0)
+ memset(src, 0, sizeof(*src));
+ break;
+ }
+ return (error);
}
int
@@ -158,14 +273,30 @@ in_gre_output(struct mbuf *m, int af, int hlen)
return (ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL));
}
-int
-in_gre_attach(struct gre_softc *sc)
+static const struct encaptab *ecookie = NULL;
+static const struct encap_config ipv4_encap_cfg = {
+ .proto = IPPROTO_GRE,
+ .min_length = sizeof(struct greip) + sizeof(struct ip),
+ .exact_match = ENCAP_DRV_LOOKUP,
+ .lookup = in_gre_lookup,
+ .input = gre_input
+};
+
+void
+in_gre_init(void)
{
- KASSERT(sc->gre_ecookie == NULL, ("gre_ecookie isn't NULL"));
- sc->gre_ecookie = encap_attach_func(AF_INET, IPPROTO_GRE,
- in_gre_encapcheck, &in_gre_protosw, sc);
- if (sc->gre_ecookie == NULL)
- return (EEXIST);
- return (0);
+ if (!IS_DEFAULT_VNET(curvnet))
+ return;
+ ecookie = ip_encap_attach(&ipv4_encap_cfg, NULL, M_WAITOK);
+}
+
+void
+in_gre_uninit(void)
+{
+
+ if (IS_DEFAULT_VNET(curvnet))
+ ip_encap_detach(ecookie);
+ if (V_ipv4_hashtbl != NULL)
+ gre_hashdestroy(V_ipv4_hashtbl);
}
diff --git a/freebsd/sys/netinet/ip_icmp.c b/freebsd/sys/netinet/ip_icmp.c
index 3fc59a14..414e3812 100644
--- a/freebsd/sys/netinet/ip_icmp.c
+++ b/freebsd/sys/netinet/ip_icmp.c
@@ -84,13 +84,13 @@ __FBSDID("$FreeBSD$");
* routines to turnaround packets back to the originator, and
* host table maintenance routines.
*/
-static VNET_DEFINE(int, icmplim) = 200;
+VNET_DEFINE_STATIC(int, icmplim) = 200;
#define V_icmplim VNET(icmplim)
SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmplim), 0,
"Maximum number of ICMP responses per second");
-static VNET_DEFINE(int, icmplim_output) = 1;
+VNET_DEFINE_STATIC(int, icmplim_output) = 1;
#define V_icmplim_output VNET(icmplim_output)
SYSCTL_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmplim_output), 0,
@@ -106,13 +106,13 @@ SYSCTL_VNET_PCPUSTAT(_net_inet_icmp, ICMPCTL_STATS, stats, struct icmpstat,
VNET_PCPUSTAT_SYSUNINIT(icmpstat);
#endif /* VIMAGE */
-static VNET_DEFINE(int, icmpmaskrepl) = 0;
+VNET_DEFINE_STATIC(int, icmpmaskrepl) = 0;
#define V_icmpmaskrepl VNET(icmpmaskrepl)
SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmpmaskrepl), 0,
"Reply to ICMP Address Mask Request packets");
-static VNET_DEFINE(u_int, icmpmaskfake) = 0;
+VNET_DEFINE_STATIC(u_int, icmpmaskfake) = 0;
#define V_icmpmaskfake VNET(icmpmaskfake)
SYSCTL_UINT(_net_inet_icmp, OID_AUTO, maskfake, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmpmaskfake), 0,
@@ -124,37 +124,37 @@ SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(drop_redirect), 0,
"Ignore ICMP redirects");
-static VNET_DEFINE(int, log_redirect) = 0;
+VNET_DEFINE_STATIC(int, log_redirect) = 0;
#define V_log_redirect VNET(log_redirect)
SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(log_redirect), 0,
"Log ICMP redirects to the console");
-static VNET_DEFINE(char, reply_src[IFNAMSIZ]);
+VNET_DEFINE_STATIC(char, reply_src[IFNAMSIZ]);
#define V_reply_src VNET(reply_src)
SYSCTL_STRING(_net_inet_icmp, OID_AUTO, reply_src, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(reply_src), IFNAMSIZ,
"ICMP reply source for non-local packets");
-static VNET_DEFINE(int, icmp_rfi) = 0;
+VNET_DEFINE_STATIC(int, icmp_rfi) = 0;
#define V_icmp_rfi VNET(icmp_rfi)
SYSCTL_INT(_net_inet_icmp, OID_AUTO, reply_from_interface, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmp_rfi), 0,
"ICMP reply from incoming interface for non-local packets");
-
-static VNET_DEFINE(int, icmp_quotelen) = 8;
+/* Router requirements RFC 1812 section 4.3.2.3 requires 576 - 28. */
+VNET_DEFINE_STATIC(int, icmp_quotelen) = 548;
#define V_icmp_quotelen VNET(icmp_quotelen)
SYSCTL_INT(_net_inet_icmp, OID_AUTO, quotelen, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmp_quotelen), 0,
"Number of bytes from original packet to quote in ICMP reply");
-static VNET_DEFINE(int, icmpbmcastecho) = 0;
+VNET_DEFINE_STATIC(int, icmpbmcastecho) = 0;
#define V_icmpbmcastecho VNET(icmpbmcastecho)
SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmpbmcastecho), 0,
"Reply to multicast ICMP Echo Request and Timestamp packets");
-static VNET_DEFINE(int, icmptstamprepl) = 1;
+VNET_DEFINE_STATIC(int, icmptstamprepl) = 1;
#define V_icmptstamprepl VNET(icmptstamprepl)
SYSCTL_INT(_net_inet_icmp, OID_AUTO, tstamprepl, CTLFLAG_RW,
&VNET_NAME(icmptstamprepl), 0,
@@ -1003,7 +1003,7 @@ struct icmp_rate {
const char *descr;
struct counter_rate cr;
};
-static VNET_DEFINE(struct icmp_rate, icmp_rates[BANDLIM_MAX]) = {
+VNET_DEFINE_STATIC(struct icmp_rate, icmp_rates[BANDLIM_MAX]) = {
{ "icmp unreach response" },
{ "icmp ping response" },
{ "icmp tstamp response" },
diff --git a/freebsd/sys/netinet/ip_id.c b/freebsd/sys/netinet/ip_id.c
index 02bf2c5b..85a67612 100644
--- a/freebsd/sys/netinet/ip_id.c
+++ b/freebsd/sys/netinet/ip_id.c
@@ -100,8 +100,8 @@ __FBSDID("$FreeBSD$");
* suggested by RFC6864. We use per-CPU counter for that, or if
* user wants to, we can turn on random ID generation.
*/
-static VNET_DEFINE(int, ip_rfc6864) = 1;
-static VNET_DEFINE(int, ip_do_randomid) = 0;
+VNET_DEFINE_STATIC(int, ip_rfc6864) = 1;
+VNET_DEFINE_STATIC(int, ip_do_randomid) = 0;
#define V_ip_rfc6864 VNET(ip_rfc6864)
#define V_ip_do_randomid VNET(ip_do_randomid)
@@ -109,13 +109,13 @@ static VNET_DEFINE(int, ip_do_randomid) = 0;
* Random ID state engine.
*/
static MALLOC_DEFINE(M_IPID, "ipid", "randomized ip id state");
-static VNET_DEFINE(uint16_t *, id_array);
-static VNET_DEFINE(bitstr_t *, id_bits);
-static VNET_DEFINE(int, array_ptr);
-static VNET_DEFINE(int, array_size);
-static VNET_DEFINE(int, random_id_collisions);
-static VNET_DEFINE(int, random_id_total);
-static VNET_DEFINE(struct mtx, ip_id_mtx);
+VNET_DEFINE_STATIC(uint16_t *, id_array);
+VNET_DEFINE_STATIC(bitstr_t *, id_bits);
+VNET_DEFINE_STATIC(int, array_ptr);
+VNET_DEFINE_STATIC(int, array_size);
+VNET_DEFINE_STATIC(int, random_id_collisions);
+VNET_DEFINE_STATIC(int, random_id_total);
+VNET_DEFINE_STATIC(struct mtx, ip_id_mtx);
#define V_id_array VNET(id_array)
#define V_id_bits VNET(id_bits)
#define V_array_ptr VNET(array_ptr)
@@ -127,7 +127,7 @@ static VNET_DEFINE(struct mtx, ip_id_mtx);
/*
* Non-random ID state engine is simply a per-cpu counter.
*/
-static VNET_DEFINE(counter_u64_t, ip_id);
+VNET_DEFINE_STATIC(counter_u64_t, ip_id);
#define V_ip_id VNET(ip_id)
static int sysctl_ip_randomid(SYSCTL_HANDLER_ARGS);
diff --git a/freebsd/sys/netinet/ip_input.c b/freebsd/sys/netinet/ip_input.c
index 343eec5e..2852b52e 100644
--- a/freebsd/sys/netinet/ip_input.c
+++ b/freebsd/sys/netinet/ip_input.c
@@ -111,7 +111,7 @@ SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW
&VNET_NAME(ipforwarding), 0,
"Enable IP forwarding between interfaces");
-static VNET_DEFINE(int, ipsendredirects) = 1; /* XXX */
+VNET_DEFINE_STATIC(int, ipsendredirects) = 1; /* XXX */
#define V_ipsendredirects VNET(ipsendredirects)
SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ipsendredirects), 0,
@@ -130,7 +130,7 @@ SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_R
* to the loopback interface instead of the interface where the
* packets for those addresses are received.
*/
-static VNET_DEFINE(int, ip_checkinterface);
+VNET_DEFINE_STATIC(int, ip_checkinterface);
#define V_ip_checkinterface VNET(ip_checkinterface)
SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ip_checkinterface), 0,
@@ -559,13 +559,15 @@ tooshort:
/*
* Try to forward the packet, but if we fail continue.
+ * ip_tryforward() does not generate redirects, so fall
+ * through to normal processing if redirects are required.
* ip_tryforward() does inbound and outbound packet firewall
* processing. If firewall has decided that destination becomes
* our local address, it sets M_FASTFWD_OURS flag. In this
* case skip another inbound firewall processing and update
* ip pointer.
*/
- if (V_ipforwarding != 0
+ if (V_ipforwarding != 0 && V_ipsendredirects == 0
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
&& (!IPSEC_ENABLED(ipv4) ||
IPSEC_CAPS(ipv4, m, IPSEC_CAP_OPERABLE) == 0)
@@ -1349,7 +1351,7 @@ makedummy:
* locking. This code remains in ip_input.c as ip_mroute.c is optionally
* compiled.
*/
-static VNET_DEFINE(int, ip_rsvp_on);
+VNET_DEFINE_STATIC(int, ip_rsvp_on);
VNET_DEFINE(struct socket *, ip_rsvpd);
#define V_ip_rsvp_on VNET(ip_rsvp_on)
diff --git a/freebsd/sys/netinet/ip_mroute.c b/freebsd/sys/netinet/ip_mroute.c
index ac901601..987549c6 100644
--- a/freebsd/sys/netinet/ip_mroute.c
+++ b/freebsd/sys/netinet/ip_mroute.c
@@ -127,7 +127,7 @@ __FBSDID("$FreeBSD$");
#define VIFI_INVALID ((vifi_t) -1)
-static VNET_DEFINE(uint32_t, last_tv_sec); /* last time we processed this */
+VNET_DEFINE_STATIC(uint32_t, last_tv_sec); /* last time we processed this */
#define V_last_tv_sec VNET(last_tv_sec)
static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast forwarding cache");
@@ -151,14 +151,14 @@ static struct mtx mrouter_mtx;
static int ip_mrouter_cnt; /* # of vnets with active mrouters */
static int ip_mrouter_unloading; /* Allow no more V_ip_mrouter sockets */
-static VNET_PCPUSTAT_DEFINE(struct mrtstat, mrtstat);
+VNET_PCPUSTAT_DEFINE_STATIC(struct mrtstat, mrtstat);
VNET_PCPUSTAT_SYSINIT(mrtstat);
VNET_PCPUSTAT_SYSUNINIT(mrtstat);
SYSCTL_VNET_PCPUSTAT(_net_inet_ip, OID_AUTO, mrtstat, struct mrtstat,
mrtstat, "IPv4 Multicast Forwarding Statistics (struct mrtstat, "
"netinet/ip_mroute.h)");
-static VNET_DEFINE(u_long, mfchash);
+VNET_DEFINE_STATIC(u_long, mfchash);
#define V_mfchash VNET(mfchash)
#define MFCHASH(a, g) \
((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \
@@ -166,9 +166,9 @@ static VNET_DEFINE(u_long, mfchash);
#define MFCHASHSIZE 256
static u_long mfchashsize; /* Hash size */
-static VNET_DEFINE(u_char *, nexpire); /* 0..mfchashsize-1 */
+VNET_DEFINE_STATIC(u_char *, nexpire); /* 0..mfchashsize-1 */
#define V_nexpire VNET(nexpire)
-static VNET_DEFINE(LIST_HEAD(mfchashhdr, mfc)*, mfchashtbl);
+VNET_DEFINE_STATIC(LIST_HEAD(mfchashhdr, mfc)*, mfchashtbl);
#define V_mfchashtbl VNET(mfchashtbl)
static struct mtx mfc_mtx;
@@ -179,9 +179,9 @@ static struct mtx mfc_mtx;
mtx_init(&mfc_mtx, "IPv4 multicast forwarding cache", NULL, MTX_DEF)
#define MFC_LOCK_DESTROY() mtx_destroy(&mfc_mtx)
-static VNET_DEFINE(vifi_t, numvifs);
+VNET_DEFINE_STATIC(vifi_t, numvifs);
#define V_numvifs VNET(numvifs)
-static VNET_DEFINE(struct vif, viftable[MAXVIFS]);
+VNET_DEFINE_STATIC(struct vif, viftable[MAXVIFS]);
#define V_viftable VNET(viftable)
SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_VNET | CTLFLAG_RD,
&VNET_NAME(viftable), sizeof(V_viftable), "S,vif[MAXVIFS]",
@@ -197,7 +197,7 @@ static struct mtx vif_mtx;
static eventhandler_tag if_detach_event_tag = NULL;
-static VNET_DEFINE(struct callout, expire_upcalls_ch);
+VNET_DEFINE_STATIC(struct callout, expire_upcalls_ch);
#define V_expire_upcalls_ch VNET(expire_upcalls_ch)
#define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */
@@ -212,9 +212,9 @@ static MALLOC_DEFINE(M_BWMETER, "bwmeter", "multicast upcall bw meters");
* expiration time. Periodically, the entries are analysed and processed.
*/
#define BW_METER_BUCKETS 1024
-static VNET_DEFINE(struct bw_meter*, bw_meter_timers[BW_METER_BUCKETS]);
+VNET_DEFINE_STATIC(struct bw_meter*, bw_meter_timers[BW_METER_BUCKETS]);
#define V_bw_meter_timers VNET(bw_meter_timers)
-static VNET_DEFINE(struct callout, bw_meter_ch);
+VNET_DEFINE_STATIC(struct callout, bw_meter_ch);
#define V_bw_meter_ch VNET(bw_meter_ch)
#define BW_METER_PERIOD (hz) /* periodical handling of bw meters */
@@ -222,16 +222,16 @@ static VNET_DEFINE(struct callout, bw_meter_ch);
* Pending upcalls are stored in a vector which is flushed when
* full, or periodically
*/
-static VNET_DEFINE(struct bw_upcall, bw_upcalls[BW_UPCALLS_MAX]);
+VNET_DEFINE_STATIC(struct bw_upcall, bw_upcalls[BW_UPCALLS_MAX]);
#define V_bw_upcalls VNET(bw_upcalls)
-static VNET_DEFINE(u_int, bw_upcalls_n); /* # of pending upcalls */
+VNET_DEFINE_STATIC(u_int, bw_upcalls_n); /* # of pending upcalls */
#define V_bw_upcalls_n VNET(bw_upcalls_n)
-static VNET_DEFINE(struct callout, bw_upcalls_ch);
+VNET_DEFINE_STATIC(struct callout, bw_upcalls_ch);
#define V_bw_upcalls_ch VNET(bw_upcalls_ch)
#define BW_UPCALLS_PERIOD (hz) /* periodical flush of bw upcalls */
-static VNET_PCPUSTAT_DEFINE(struct pimstat, pimstat);
+VNET_PCPUSTAT_DEFINE_STATIC(struct pimstat, pimstat);
VNET_PCPUSTAT_SYSINIT(pimstat);
VNET_PCPUSTAT_SYSUNINIT(pimstat);
@@ -244,20 +244,17 @@ SYSCTL_ULONG(_net_inet_pim, OID_AUTO, squelch_wholepkt, CTLFLAG_RW,
&pim_squelch_wholepkt, 0,
"Disable IGMP_WHOLEPKT notifications if rendezvous point is unspecified");
-extern struct domain inetdomain;
-static const struct protosw in_pim_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_PIM,
- .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
- .pr_input = pim_input,
- .pr_output = rip_output,
- .pr_ctloutput = rip_ctloutput,
- .pr_usrreqs = &rip_usrreqs
-};
static const struct encaptab *pim_encap_cookie;
-
static int pim_encapcheck(const struct mbuf *, int, int, void *);
+static int pim_input(struct mbuf *, int, int, void *);
+
+static const struct encap_config ipv4_encap_cfg = {
+ .proto = IPPROTO_PIM,
+ .min_length = sizeof(struct ip) + PIM_MINLEN,
+ .exact_match = 8,
+ .check = pim_encapcheck,
+ .input = pim_input
+};
/*
* Note: the PIM Register encapsulation adds the following in front of a
@@ -302,9 +299,9 @@ static struct pim_encap_pimhdr pim_encap_pimhdr = {
0 /* flags */
};
-static VNET_DEFINE(vifi_t, reg_vif_num) = VIFI_INVALID;
+VNET_DEFINE_STATIC(vifi_t, reg_vif_num) = VIFI_INVALID;
#define V_reg_vif_num VNET(reg_vif_num)
-static VNET_DEFINE(struct ifnet, multicast_register_if);
+VNET_DEFINE_STATIC(struct ifnet, multicast_register_if);
#define V_multicast_register_if VNET(multicast_register_if)
/*
@@ -373,9 +370,9 @@ static const uint32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF |
MRT_MFC_FLAGS_BORDER_VIF |
MRT_MFC_RP |
MRT_MFC_BW_UPCALL);
-static VNET_DEFINE(uint32_t, mrt_api_config);
+VNET_DEFINE_STATIC(uint32_t, mrt_api_config);
#define V_mrt_api_config VNET(mrt_api_config)
-static VNET_DEFINE(int, pim_assert_enabled);
+VNET_DEFINE_STATIC(int, pim_assert_enabled);
#define V_pim_assert_enabled VNET(pim_assert_enabled)
static struct timeval pim_assert_interval = { 3, 0 }; /* Rate limit */
@@ -2546,16 +2543,12 @@ pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy,
* into the kernel.
*/
static int
-pim_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+pim_encapcheck(const struct mbuf *m __unused, int off __unused,
+ int proto __unused, void *arg __unused)
{
-#ifdef DIAGNOSTIC
KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM"));
-#endif
- if (proto != IPPROTO_PIM)
- return 0; /* not for us; reject the datagram. */
-
- return 64; /* claim the datagram. */
+ return (8); /* claim the datagram. */
}
/*
@@ -2566,18 +2559,15 @@ pim_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
* (used by PIM-SM): the PIM header is stripped off, and the inner packet
* is passed to if_simloop().
*/
-int
-pim_input(struct mbuf **mp, int *offp, int proto)
+static int
+pim_input(struct mbuf *m, int off, int proto, void *arg __unused)
{
- struct mbuf *m = *mp;
struct ip *ip = mtod(m, struct ip *);
struct pim *pim;
- int iphlen = *offp;
+ int iphlen = off;
int minlen;
int datalen = ntohs(ip->ip_len) - iphlen;
int ip_tos;
-
- *mp = NULL;
/* Keep statistics */
PIMSTAT_INC(pims_rcv_total_msgs);
@@ -2781,10 +2771,7 @@ pim_input_to_daemon:
* XXX: the outer IP header pkt size of a Register is not adjust to
* reflect the fact that the inner multicast data is truncated.
*/
- *mp = m;
- rip_input(mp, offp, proto);
-
- return (IPPROTO_DONE);
+ return (rip_input(&m, &off, proto));
}
static int
@@ -2877,8 +2864,7 @@ ip_mroute_modevent(module_t mod, int type, void *unused)
TUNABLE_ULONG_FETCH("net.inet.pim.squelch_wholepkt",
&pim_squelch_wholepkt);
- pim_encap_cookie = encap_attach_func(AF_INET, IPPROTO_PIM,
- pim_encapcheck, &in_pim_protosw, NULL);
+ pim_encap_cookie = ip_encap_attach(&ipv4_encap_cfg, NULL, M_WAITOK);
if (pim_encap_cookie == NULL) {
printf("ip_mroute: unable to attach pim encap\n");
VIF_LOCK_DESTROY();
@@ -2921,7 +2907,7 @@ ip_mroute_modevent(module_t mod, int type, void *unused)
EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
if (pim_encap_cookie) {
- encap_detach(pim_encap_cookie);
+ ip_encap_detach(pim_encap_cookie);
pim_encap_cookie = NULL;
}
diff --git a/freebsd/sys/netinet/ip_options.c b/freebsd/sys/netinet/ip_options.c
index cc2f3eed..7c189bdb 100644
--- a/freebsd/sys/netinet/ip_options.c
+++ b/freebsd/sys/netinet/ip_options.c
@@ -70,13 +70,13 @@ __FBSDID("$FreeBSD$");
#include <sys/socketvar.h>
-static VNET_DEFINE(int, ip_dosourceroute);
+VNET_DEFINE_STATIC(int, ip_dosourceroute);
SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_dosourceroute), 0,
"Enable forwarding source routed IP packets");
#define V_ip_dosourceroute VNET(ip_dosourceroute)
-static VNET_DEFINE(int, ip_acceptsourceroute);
+VNET_DEFINE_STATIC(int, ip_acceptsourceroute);
SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_acceptsourceroute), 0,
"Enable accepting source routed IP packets");
diff --git a/freebsd/sys/netinet/ip_output.c b/freebsd/sys/netinet/ip_output.c
index 792f2311..5f643746 100644
--- a/freebsd/sys/netinet/ip_output.c
+++ b/freebsd/sys/netinet/ip_output.c
@@ -82,6 +82,10 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
#include <netinet/ip_options.h>
+
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+
#ifdef SCTP
#include <netinet/sctp.h>
#include <netinet/sctp_crc32.h>
@@ -922,24 +926,34 @@ void
in_delayed_cksum(struct mbuf *m)
{
struct ip *ip;
- uint16_t csum, offset, ip_len;
+ struct udphdr *uh;
+ uint16_t cklen, csum, offset;
ip = mtod(m, struct ip *);
offset = ip->ip_hl << 2 ;
- ip_len = ntohs(ip->ip_len);
- csum = in_cksum_skip(m, ip_len, offset);
- if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
- csum = 0xffff;
- offset += m->m_pkthdr.csum_data; /* checksum offset */
- /* find the mbuf in the chain where the checksum starts*/
- while ((m != NULL) && (offset >= m->m_len)) {
- offset -= m->m_len;
- m = m->m_next;
+ if (m->m_pkthdr.csum_flags & CSUM_UDP) {
+ /* if udp header is not in the first mbuf copy udplen */
+ if (offset + sizeof(struct udphdr) > m->m_len)
+ m_copydata(m, offset + offsetof(struct udphdr,
+ uh_ulen), sizeof(cklen), (caddr_t)&cklen);
+ else {
+ uh = (struct udphdr *)mtodo(m, offset);
+ cklen = ntohs(uh->uh_ulen);
+ }
+ csum = in_cksum_skip(m, cklen + offset, offset);
+ if (csum == 0)
+ csum = 0xffff;
+ } else {
+ cklen = ntohs(ip->ip_len);
+ csum = in_cksum_skip(m, cklen, offset);
}
- KASSERT(m != NULL, ("in_delayed_cksum: checksum outside mbuf chain."));
- KASSERT(offset + sizeof(u_short) <= m->m_len, ("in_delayed_cksum: checksum split between mbufs."));
- *(u_short *)(m->m_data + offset) = csum;
+ offset += m->m_pkthdr.csum_data; /* checksum offset */
+
+ if (offset + sizeof(csum) > m->m_len)
+ m_copyback(m, offset, sizeof(csum), (caddr_t)&csum);
+ else
+ *(u_short *)mtodo(m, offset) = csum;
}
/*
@@ -980,6 +994,15 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
INP_WUNLOCK(inp);
error = 0;
break;
+ case SO_REUSEPORT_LB:
+ INP_WLOCK(inp);
+ if ((so->so_options & SO_REUSEPORT_LB) != 0)
+ inp->inp_flags2 |= INP_REUSEPORT_LB;
+ else
+ inp->inp_flags2 &= ~INP_REUSEPORT_LB;
+ INP_WUNLOCK(inp);
+ error = 0;
+ break;
case SO_SETFIB:
INP_WLOCK(inp);
inp->inp_inc.inc_fibnum = so->so_fibnum;
@@ -1235,13 +1258,23 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
switch (sopt->sopt_name) {
case IP_OPTIONS:
case IP_RETOPTS:
- if (inp->inp_options)
- error = sooptcopyout(sopt,
- mtod(inp->inp_options,
- char *),
- inp->inp_options->m_len);
- else
+ INP_RLOCK(inp);
+ if (inp->inp_options) {
+ struct mbuf *options;
+
+ options = m_dup(inp->inp_options, M_NOWAIT);
+ INP_RUNLOCK(inp);
+ if (options != NULL) {
+ error = sooptcopyout(sopt,
+ mtod(options, char *),
+ options->m_len);
+ m_freem(options);
+ } else
+ error = ENOMEM;
+ } else {
+ INP_RUNLOCK(inp);
sopt->sopt_valsize = 0;
+ }
break;
case IP_TOS:
diff --git a/freebsd/sys/netinet/ip_reass.c b/freebsd/sys/netinet/ip_reass.c
index 64660228..95603390 100644
--- a/freebsd/sys/netinet/ip_reass.c
+++ b/freebsd/sys/netinet/ip_reass.c
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <sys/hash.h>
#include <sys/mbuf.h>
#include <sys/malloc.h>
+#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/sysctl.h>
@@ -65,18 +66,19 @@ SYSCTL_DECL(_net_inet_ip);
/*
* Reassembly headers are stored in hash buckets.
*/
-#define IPREASS_NHASH_LOG2 6
+#define IPREASS_NHASH_LOG2 10
#define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2)
#define IPREASS_HMASK (IPREASS_NHASH - 1)
struct ipqbucket {
TAILQ_HEAD(ipqhead, ipq) head;
struct mtx lock;
+ int count;
};
-static VNET_DEFINE(struct ipqbucket, ipq[IPREASS_NHASH]);
+VNET_DEFINE_STATIC(struct ipqbucket, ipq[IPREASS_NHASH]);
#define V_ipq VNET(ipq)
-static VNET_DEFINE(uint32_t, ipq_hashseed);
+VNET_DEFINE_STATIC(uint32_t, ipq_hashseed);
#define V_ipq_hashseed VNET(ipq_hashseed)
#define IPQ_LOCK(i) mtx_lock(&V_ipq[i].lock)
@@ -84,6 +86,9 @@ static VNET_DEFINE(uint32_t, ipq_hashseed);
#define IPQ_UNLOCK(i) mtx_unlock(&V_ipq[i].lock)
#define IPQ_LOCK_ASSERT(i) mtx_assert(&V_ipq[i].lock, MA_OWNED)
+VNET_DEFINE_STATIC(int, ipreass_maxbucketsize);
+#define V_ipreass_maxbucketsize VNET(ipreass_maxbucketsize)
+
void ipreass_init(void);
void ipreass_drain(void);
void ipreass_slowtimo(void);
@@ -91,28 +96,54 @@ void ipreass_slowtimo(void);
void ipreass_destroy(void);
#endif
static int sysctl_maxfragpackets(SYSCTL_HANDLER_ARGS);
+static int sysctl_maxfragbucketsize(SYSCTL_HANDLER_ARGS);
static void ipreass_zone_change(void *);
static void ipreass_drain_tomax(void);
-static void ipq_free(struct ipqhead *, struct ipq *);
+static void ipq_free(struct ipqbucket *, struct ipq *);
static struct ipq * ipq_reuse(int);
static inline void
-ipq_timeout(struct ipqhead *head, struct ipq *fp)
+ipq_timeout(struct ipqbucket *bucket, struct ipq *fp)
{
IPSTAT_ADD(ips_fragtimeout, fp->ipq_nfrags);
- ipq_free(head, fp);
+ ipq_free(bucket, fp);
}
static inline void
-ipq_drop(struct ipqhead *head, struct ipq *fp)
+ipq_drop(struct ipqbucket *bucket, struct ipq *fp)
{
IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
- ipq_free(head, fp);
+ ipq_free(bucket, fp);
}
-static VNET_DEFINE(uma_zone_t, ipq_zone);
+/*
+ * By default, limit the number of IP fragments across all reassembly
+ * queues to 1/32 of the total number of mbuf clusters.
+ *
+ * Limit the total number of reassembly queues per VNET to the
+ * IP fragment limit, but ensure the limit will not allow any bucket
+ * to grow above 100 items. (The bucket limit is
+ * IP_MAXFRAGPACKETS / (IPREASS_NHASH / 2), so the 50 is the correct
+ * multiplier to reach a 100-item limit.)
+ * The 100-item limit was chosen as brief testing seems to show that
+ * this produces "reasonable" performance on some subset of systems
+ * under DoS attack.
+ */
+#define IP_MAXFRAGS (nmbclusters / 32)
+#define IP_MAXFRAGPACKETS (imin(IP_MAXFRAGS, IPREASS_NHASH * 50))
+
+static int maxfrags;
+static volatile u_int nfrags;
+SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfrags, CTLFLAG_RW,
+ &maxfrags, 0,
+ "Maximum number of IPv4 fragments allowed across all reassembly queues");
+SYSCTL_UINT(_net_inet_ip, OID_AUTO, curfrags, CTLFLAG_RD,
+ __DEVOLATILE(u_int *, &nfrags), 0,
+ "Current number of IPv4 fragments across all reassembly queues");
+
+VNET_DEFINE_STATIC(uma_zone_t, ipq_zone);
#define V_ipq_zone VNET(ipq_zone)
SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_VNET |
CTLTYPE_INT | CTLFLAG_RW, NULL, 0, sysctl_maxfragpackets, "I",
@@ -121,14 +152,18 @@ SYSCTL_UMA_CUR(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_VNET,
&VNET_NAME(ipq_zone),
"Current number of IPv4 fragment reassembly queue entries");
-static VNET_DEFINE(int, noreass);
+VNET_DEFINE_STATIC(int, noreass);
#define V_noreass VNET(noreass)
-static VNET_DEFINE(int, maxfragsperpacket);
+VNET_DEFINE_STATIC(int, maxfragsperpacket);
#define V_maxfragsperpacket VNET(maxfragsperpacket)
SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(maxfragsperpacket), 0,
"Maximum number of IPv4 fragments allowed per packet");
+SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragbucketsize,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
+ sysctl_maxfragbucketsize, "I",
+ "Maximum number of IPv4 fragment reassembly queue entries per bucket");
/*
* Take incoming datagram fragment and try to reassemble it into
@@ -148,9 +183,9 @@ ip_reass(struct mbuf *m)
struct mbuf *p, *q, *nq, *t;
struct ipq *fp;
struct ipqhead *head;
- int i, hlen, next;
+ int i, hlen, next, tmpmax;
u_int8_t ecn, ecn0;
- uint32_t hash;
+ uint32_t hash, hashkey[3];
#ifdef RSS
uint32_t rss_hash, rss_type;
#endif
@@ -158,8 +193,12 @@ ip_reass(struct mbuf *m)
/*
* If no reassembling or maxfragsperpacket are 0,
* never accept fragments.
+ * Also, drop packet if it would exceed the maximum
+ * number of fragments.
*/
- if (V_noreass == 1 || V_maxfragsperpacket == 0) {
+ tmpmax = maxfrags;
+ if (V_noreass == 1 || V_maxfragsperpacket == 0 ||
+ (tmpmax >= 0 && atomic_load_int(&nfrags) >= (u_int)tmpmax)) {
IPSTAT_INC(ips_fragments);
IPSTAT_INC(ips_fragdropped);
m_freem(m);
@@ -204,8 +243,12 @@ ip_reass(struct mbuf *m)
m->m_data += hlen;
m->m_len -= hlen;
- hash = ip->ip_src.s_addr ^ ip->ip_id;
- hash = jenkins_hash32(&hash, 1, V_ipq_hashseed) & IPREASS_HMASK;
+ hashkey[0] = ip->ip_src.s_addr;
+ hashkey[1] = ip->ip_dst.s_addr;
+ hashkey[2] = (uint32_t)ip->ip_p << 16;
+ hashkey[2] += ip->ip_id;
+ hash = jenkins_hash32(hashkey, nitems(hashkey), V_ipq_hashseed);
+ hash &= IPREASS_HMASK;
head = &V_ipq[hash].head;
IPQ_LOCK(hash);
@@ -226,9 +269,12 @@ ip_reass(struct mbuf *m)
* If first fragment to arrive, create a reassembly queue.
*/
if (fp == NULL) {
- fp = uma_zalloc(V_ipq_zone, M_NOWAIT);
+ if (V_ipq[hash].count < V_ipreass_maxbucketsize)
+ fp = uma_zalloc(V_ipq_zone, M_NOWAIT);
if (fp == NULL)
fp = ipq_reuse(hash);
+ if (fp == NULL)
+ goto dropfrag;
#ifdef MAC
if (mac_ipq_init(fp, M_NOWAIT) != 0) {
uma_zfree(V_ipq_zone, fp);
@@ -238,7 +284,9 @@ ip_reass(struct mbuf *m)
mac_ipq_create(m, fp);
#endif
TAILQ_INSERT_HEAD(head, fp, ipq_list);
+ V_ipq[hash].count++;
fp->ipq_nfrags = 1;
+ atomic_add_int(&nfrags, 1);
fp->ipq_ttl = IPFRAGTTL;
fp->ipq_p = ip->ip_p;
fp->ipq_id = ip->ip_id;
@@ -249,6 +297,7 @@ ip_reass(struct mbuf *m)
goto done;
} else {
fp->ipq_nfrags++;
+ atomic_add_int(&nfrags, 1);
#ifdef MAC
mac_ipq_update(m, fp);
#endif
@@ -325,6 +374,7 @@ ip_reass(struct mbuf *m)
m->m_nextpkt = nq;
IPSTAT_INC(ips_fragdropped);
fp->ipq_nfrags--;
+ atomic_subtract_int(&nfrags, 1);
m_freem(q);
}
@@ -342,7 +392,7 @@ ip_reass(struct mbuf *m)
for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
if (ntohs(GETIP(q)->ip_off) != next) {
if (fp->ipq_nfrags > V_maxfragsperpacket)
- ipq_drop(head, fp);
+ ipq_drop(&V_ipq[hash], fp);
goto done;
}
next += ntohs(GETIP(q)->ip_len);
@@ -350,7 +400,7 @@ ip_reass(struct mbuf *m)
/* Make sure the last packet didn't have the IP_MF flag */
if (p->m_flags & M_IP_FRAG) {
if (fp->ipq_nfrags > V_maxfragsperpacket)
- ipq_drop(head, fp);
+ ipq_drop(&V_ipq[hash], fp);
goto done;
}
@@ -361,7 +411,7 @@ ip_reass(struct mbuf *m)
ip = GETIP(q);
if (next + (ip->ip_hl << 2) > IP_MAXPACKET) {
IPSTAT_INC(ips_toolong);
- ipq_drop(head, fp);
+ ipq_drop(&V_ipq[hash], fp);
goto done;
}
@@ -390,6 +440,7 @@ ip_reass(struct mbuf *m)
while (m->m_pkthdr.csum_data & 0xffff0000)
m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
(m->m_pkthdr.csum_data >> 16);
+ atomic_subtract_int(&nfrags, fp->ipq_nfrags);
#ifdef MAC
mac_ipq_reassemble(fp, m);
mac_ipq_destroy(fp);
@@ -404,6 +455,7 @@ ip_reass(struct mbuf *m)
ip->ip_src = fp->ipq_src;
ip->ip_dst = fp->ipq_dst;
TAILQ_REMOVE(head, fp, ipq_list);
+ V_ipq[hash].count--;
uma_zfree(V_ipq_zone, fp);
m->m_len += (ip->ip_hl << 2);
m->m_data -= (ip->ip_hl << 2);
@@ -449,8 +501,10 @@ ip_reass(struct mbuf *m)
dropfrag:
IPSTAT_INC(ips_fragdropped);
- if (fp != NULL)
+ if (fp != NULL) {
fp->ipq_nfrags--;
+ atomic_subtract_int(&nfrags, 1);
+ }
m_freem(m);
done:
IPQ_UNLOCK(hash);
@@ -465,21 +519,27 @@ done:
void
ipreass_init(void)
{
+ int max;
for (int i = 0; i < IPREASS_NHASH; i++) {
TAILQ_INIT(&V_ipq[i].head);
mtx_init(&V_ipq[i].lock, "IP reassembly", NULL,
MTX_DEF | MTX_DUPOK);
+ V_ipq[i].count = 0;
}
V_ipq_hashseed = arc4random();
V_maxfragsperpacket = 16;
V_ipq_zone = uma_zcreate("ipq", sizeof(struct ipq), NULL, NULL, NULL,
NULL, UMA_ALIGN_PTR, 0);
- uma_zone_set_max(V_ipq_zone, nmbclusters / 32);
+ max = IP_MAXFRAGPACKETS;
+ max = uma_zone_set_max(V_ipq_zone, max);
+ V_ipreass_maxbucketsize = imax(max / (IPREASS_NHASH / 2), 1);
- if (IS_DEFAULT_VNET(curvnet))
+ if (IS_DEFAULT_VNET(curvnet)) {
+ maxfrags = IP_MAXFRAGS;
EVENTHANDLER_REGISTER(nmbclusters_change, ipreass_zone_change,
NULL, EVENTHANDLER_PRI_ANY);
+ }
}
/*
@@ -494,7 +554,7 @@ ipreass_slowtimo(void)
IPQ_LOCK(i);
TAILQ_FOREACH_SAFE(fp, &V_ipq[i].head, ipq_list, tmp)
if (--fp->ipq_ttl == 0)
- ipq_timeout(&V_ipq[i].head, fp);
+ ipq_timeout(&V_ipq[i], fp);
IPQ_UNLOCK(i);
}
}
@@ -509,7 +569,10 @@ ipreass_drain(void)
for (int i = 0; i < IPREASS_NHASH; i++) {
IPQ_LOCK(i);
while(!TAILQ_EMPTY(&V_ipq[i].head))
- ipq_drop(&V_ipq[i].head, TAILQ_FIRST(&V_ipq[i].head));
+ ipq_drop(&V_ipq[i], TAILQ_FIRST(&V_ipq[i].head));
+ KASSERT(V_ipq[i].count == 0,
+ ("%s: V_ipq[%d] count %d (V_ipq=%p)", __func__, i,
+ V_ipq[i].count, V_ipq));
IPQ_UNLOCK(i);
}
}
@@ -537,9 +600,23 @@ ipreass_destroy(void)
static void
ipreass_drain_tomax(void)
{
+ struct ipq *fp;
int target;
/*
+ * Make sure each bucket is under the new limit. If
+ * necessary, drop enough of the oldest elements from
+ * each bucket to get under the new limit.
+ */
+ for (int i = 0; i < IPREASS_NHASH; i++) {
+ IPQ_LOCK(i);
+ while (V_ipq[i].count > V_ipreass_maxbucketsize &&
+ (fp = TAILQ_LAST(&V_ipq[i].head, ipqhead)) != NULL)
+ ipq_timeout(&V_ipq[i], fp);
+ IPQ_UNLOCK(i);
+ }
+
+ /*
* If we are over the maximum number of fragments,
* drain off enough to get down to the new limit,
* stripping off last elements on queues. Every
@@ -547,13 +624,11 @@ ipreass_drain_tomax(void)
*/
target = uma_zone_get_max(V_ipq_zone);
while (uma_zone_get_cur(V_ipq_zone) > target) {
- struct ipq *fp;
-
for (int i = 0; i < IPREASS_NHASH; i++) {
IPQ_LOCK(i);
fp = TAILQ_LAST(&V_ipq[i].head, ipqhead);
if (fp != NULL)
- ipq_timeout(&V_ipq[i].head, fp);
+ ipq_timeout(&V_ipq[i], fp);
IPQ_UNLOCK(i);
}
}
@@ -562,9 +637,20 @@ ipreass_drain_tomax(void)
static void
ipreass_zone_change(void *tag)
{
-
- uma_zone_set_max(V_ipq_zone, nmbclusters / 32);
- ipreass_drain_tomax();
+ VNET_ITERATOR_DECL(vnet_iter);
+ int max;
+
+ maxfrags = IP_MAXFRAGS;
+ max = IP_MAXFRAGPACKETS;
+ VNET_LIST_RLOCK_NOSLEEP();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ max = uma_zone_set_max(V_ipq_zone, max);
+ V_ipreass_maxbucketsize = imax(max / (IPREASS_NHASH / 2), 1);
+ ipreass_drain_tomax();
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK_NOSLEEP();
}
/*
@@ -592,6 +678,7 @@ sysctl_maxfragpackets(SYSCTL_HANDLER_ARGS)
* and place an extreme upper bound.
*/
max = uma_zone_set_max(V_ipq_zone, max);
+ V_ipreass_maxbucketsize = imax(max / (IPREASS_NHASH / 2), 1);
ipreass_drain_tomax();
V_noreass = 0;
} else if (max == 0) {
@@ -600,6 +687,7 @@ sysctl_maxfragpackets(SYSCTL_HANDLER_ARGS)
} else if (max == -1) {
V_noreass = 0;
uma_zone_set_max(V_ipq_zone, 0);
+ V_ipreass_maxbucketsize = INT_MAX;
} else
return (EINVAL);
return (0);
@@ -613,49 +701,72 @@ static struct ipq *
ipq_reuse(int start)
{
struct ipq *fp;
- int i;
+ int bucket, i;
IPQ_LOCK_ASSERT(start);
- for (i = start;; i++) {
- if (i == IPREASS_NHASH)
- i = 0;
- if (i != start && IPQ_TRYLOCK(i) == 0)
+ for (i = 0; i < IPREASS_NHASH; i++) {
+ bucket = (start + i) % IPREASS_NHASH;
+ if (bucket != start && IPQ_TRYLOCK(bucket) == 0)
continue;
- fp = TAILQ_LAST(&V_ipq[i].head, ipqhead);
+ fp = TAILQ_LAST(&V_ipq[bucket].head, ipqhead);
if (fp) {
struct mbuf *m;
IPSTAT_ADD(ips_fragtimeout, fp->ipq_nfrags);
+ atomic_subtract_int(&nfrags, fp->ipq_nfrags);
while (fp->ipq_frags) {
m = fp->ipq_frags;
fp->ipq_frags = m->m_nextpkt;
m_freem(m);
}
- TAILQ_REMOVE(&V_ipq[i].head, fp, ipq_list);
- if (i != start)
- IPQ_UNLOCK(i);
- IPQ_LOCK_ASSERT(start);
- return (fp);
+ TAILQ_REMOVE(&V_ipq[bucket].head, fp, ipq_list);
+ V_ipq[bucket].count--;
+ if (bucket != start)
+ IPQ_UNLOCK(bucket);
+ break;
}
- if (i != start)
- IPQ_UNLOCK(i);
+ if (bucket != start)
+ IPQ_UNLOCK(bucket);
}
+ IPQ_LOCK_ASSERT(start);
+ return (fp);
}
/*
* Free a fragment reassembly header and all associated datagrams.
*/
static void
-ipq_free(struct ipqhead *fhp, struct ipq *fp)
+ipq_free(struct ipqbucket *bucket, struct ipq *fp)
{
struct mbuf *q;
+ atomic_subtract_int(&nfrags, fp->ipq_nfrags);
while (fp->ipq_frags) {
q = fp->ipq_frags;
fp->ipq_frags = q->m_nextpkt;
m_freem(q);
}
- TAILQ_REMOVE(fhp, fp, ipq_list);
+ TAILQ_REMOVE(&bucket->head, fp, ipq_list);
+ bucket->count--;
uma_zfree(V_ipq_zone, fp);
}
+
+/*
+ * Get or set the maximum number of reassembly queues per bucket.
+ */
+static int
+sysctl_maxfragbucketsize(SYSCTL_HANDLER_ARGS)
+{
+ int error, max;
+
+ max = V_ipreass_maxbucketsize;
+ error = sysctl_handle_int(oidp, &max, 0, req);
+ if (error || !req->newptr)
+ return (error);
+ if (max <= 0)
+ return (EINVAL);
+ V_ipreass_maxbucketsize = max;
+ ipreass_drain_tomax();
+ return (0);
+}
diff --git a/freebsd/sys/netinet/libalias/alias.c b/freebsd/sys/netinet/libalias/alias.c
index 2dd5b999..d4eeb040 100644
--- a/freebsd/sys/netinet/libalias/alias.c
+++ b/freebsd/sys/netinet/libalias/alias.c
@@ -1753,7 +1753,8 @@ LibAliasUnLoadAllModule(void)
* the input packet, on failure NULL. The input packet is always consumed.
*/
struct mbuf *
-m_megapullup(struct mbuf *m, int len) {
+m_megapullup(struct mbuf *m, int len)
+{
struct mbuf *mcl;
if (len > m->m_pkthdr.len)
@@ -1762,7 +1763,14 @@ m_megapullup(struct mbuf *m, int len) {
if (m->m_next == NULL && M_WRITABLE(m))
return (m);
- mcl = m_get2(len, M_NOWAIT, MT_DATA, M_PKTHDR);
+ if (len <= MJUMPAGESIZE)
+ mcl = m_get2(len, M_NOWAIT, MT_DATA, M_PKTHDR);
+ else if (len <= MJUM9BYTES)
+ mcl = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES);
+ else if (len <= MJUM16BYTES)
+ mcl = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM16BYTES);
+ else
+ goto bad;
if (mcl == NULL)
goto bad;
m_align(mcl, len);
diff --git a/freebsd/sys/netinet/libalias/alias_irc.c b/freebsd/sys/netinet/libalias/alias_irc.c
index 1dbb9ddf..19337121 100644
--- a/freebsd/sys/netinet/libalias/alias_irc.c
+++ b/freebsd/sys/netinet/libalias/alias_irc.c
@@ -100,8 +100,7 @@ static int
fingerprint(struct libalias *la, struct alias_data *ah)
{
- if (ah->dport == NULL || ah->dport == NULL || ah->lnk == NULL ||
- ah->maxpktsize == 0)
+ if (ah->dport == NULL || ah->lnk == NULL || ah->maxpktsize == 0)
return (-1);
if (ntohs(*ah->dport) == IRC_CONTROL_PORT_NUMBER_1
|| ntohs(*ah->dport) == IRC_CONTROL_PORT_NUMBER_2)
diff --git a/freebsd/sys/netinet/libalias/alias_mod.h b/freebsd/sys/netinet/libalias/alias_mod.h
index c646f794..a894b6de 100644
--- a/freebsd/sys/netinet/libalias/alias_mod.h
+++ b/freebsd/sys/netinet/libalias/alias_mod.h
@@ -41,17 +41,17 @@ MALLOC_DECLARE(M_ALIAS);
/* Use kernel allocator. */
#if defined(_SYS_MALLOC_H_)
+#undef malloc
#ifndef __rtems__
#define malloc(x) malloc(x, M_ALIAS, M_NOWAIT|M_ZERO)
#define calloc(n, x) mallocarray((n), (x), M_ALIAS, M_NOWAIT|M_ZERO)
#define free(x) free(x, M_ALIAS)
#else /* __rtems__ */
-#undef malloc
#undef calloc
#undef free
-#define malloc(x) _bsd_malloc(x, M_ALIAS, M_NOWAIT|M_ZERO)
-#define calloc(x, n) malloc(x*n)
-#define free(x) _bsd_free(x, M_ALIAS)
+#define malloc(x) _bsd_malloc(x, M_ALIAS, M_NOWAIT|M_ZERO)
+#define calloc(n, x) mallocarray((n), (x), M_ALIAS, M_NOWAIT|M_ZERO)
+#define free(x) _bsd_free(x, M_ALIAS)
#endif /* __rtems__ */
#endif
#endif
diff --git a/freebsd/sys/netinet/pim_var.h b/freebsd/sys/netinet/pim_var.h
index e6398a4d..dfb06928 100644
--- a/freebsd/sys/netinet/pim_var.h
+++ b/freebsd/sys/netinet/pim_var.h
@@ -73,8 +73,6 @@ struct pimstat {
#define PIMCTL_STATS 1 /* statistics (read-only) */
#ifdef _KERNEL
-
-int pim_input(struct mbuf **, int *, int);
SYSCTL_DECL(_net_inet_pim);
#endif
diff --git a/freebsd/sys/netinet/raw_ip.c b/freebsd/sys/netinet/raw_ip.c
index 7dea3ec1..a97eadae 100644
--- a/freebsd/sys/netinet/raw_ip.c
+++ b/freebsd/sys/netinet/raw_ip.c
@@ -172,7 +172,7 @@ rip_inshash(struct inpcb *inp)
} else
hash = 0;
pcbhash = &pcbinfo->ipi_hashbase[hash];
- LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
+ CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
}
static void
@@ -182,7 +182,7 @@ rip_delhash(struct inpcb *inp)
INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
INP_WLOCK_ASSERT(inp);
- LIST_REMOVE(inp, inp_hash);
+ CK_LIST_REMOVE(inp, inp_hash);
}
#endif /* INET */
@@ -287,6 +287,7 @@ rip_input(struct mbuf **mp, int *offp, int proto)
struct ip *ip = mtod(m, struct ip *);
struct inpcb *inp, *last;
struct sockaddr_in ripsrc;
+ struct epoch_tracker et;
int hash;
*mp = NULL;
@@ -301,8 +302,8 @@ rip_input(struct mbuf **mp, int *offp, int proto)
hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr,
ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask);
- INP_INFO_RLOCK(&V_ripcbinfo);
- LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) {
+ INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
+ CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) {
if (inp->inp_ip_p != proto)
continue;
#ifdef INET6
@@ -314,27 +315,33 @@ rip_input(struct mbuf **mp, int *offp, int proto)
continue;
if (inp->inp_faddr.s_addr != ip->ip_src.s_addr)
continue;
- if (jailed_without_vnet(inp->inp_cred)) {
- /*
- * XXX: If faddr was bound to multicast group,
- * jailed raw socket will drop datagram.
- */
- if (prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
- continue;
- }
if (last != NULL) {
struct mbuf *n;
n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
if (n != NULL)
- (void) rip_append(last, ip, n, &ripsrc);
+ (void) rip_append(last, ip, n, &ripsrc);
/* XXX count dropped packet */
INP_RUNLOCK(last);
+ last = NULL;
}
INP_RLOCK(inp);
+ if (__predict_false(inp->inp_flags2 & INP_FREED))
+ goto skip_1;
+ if (jailed_without_vnet(inp->inp_cred)) {
+ /*
+ * XXX: If faddr was bound to multicast group,
+ * jailed raw socket will drop datagram.
+ */
+ if (prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
+ goto skip_1;
+ }
last = inp;
+ continue;
+ skip_1:
+ INP_RUNLOCK(inp);
}
- LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[0], inp_hash) {
+ CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[0], inp_hash) {
if (inp->inp_ip_p && inp->inp_ip_p != proto)
continue;
#ifdef INET6
@@ -348,6 +355,19 @@ rip_input(struct mbuf **mp, int *offp, int proto)
if (!in_nullhost(inp->inp_faddr) &&
!in_hosteq(inp->inp_faddr, ip->ip_src))
continue;
+ if (last != NULL) {
+ struct mbuf *n;
+
+ n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
+ if (n != NULL)
+ (void) rip_append(last, ip, n, &ripsrc);
+ /* XXX count dropped packet */
+ INP_RUNLOCK(last);
+ last = NULL;
+ }
+ INP_RLOCK(inp);
+ if (__predict_false(inp->inp_flags2 & INP_FREED))
+ goto skip_2;
if (jailed_without_vnet(inp->inp_cred)) {
/*
* Allow raw socket in jail to receive multicast;
@@ -356,7 +376,7 @@ rip_input(struct mbuf **mp, int *offp, int proto)
*/
if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
- continue;
+ goto skip_2;
}
/*
* If this raw socket has multicast state, and we
@@ -397,22 +417,15 @@ rip_input(struct mbuf **mp, int *offp, int proto)
if (blocked != MCAST_PASS) {
IPSTAT_INC(ips_notmember);
- continue;
+ goto skip_2;
}
}
- if (last != NULL) {
- struct mbuf *n;
-
- n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
- if (n != NULL)
- (void) rip_append(last, ip, n, &ripsrc);
- /* XXX count dropped packet */
- INP_RUNLOCK(last);
- }
- INP_RLOCK(inp);
last = inp;
+ continue;
+ skip_2:
+ INP_RUNLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_ripcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
if (last != NULL) {
if (rip_append(last, ip, m, &ripsrc) != 0)
IPSTAT_INC(ips_delivered);
@@ -853,7 +866,6 @@ rip_detach(struct socket *so)
ip_rsvp_force_done(so);
if (so == V_ip_rsvpd)
ip_rsvp_done();
- /* XXX defer to epoch_call */
in_pcbdetach(inp);
in_pcbfree(inp);
INP_INFO_WUNLOCK(&V_ripcbinfo);
@@ -1023,10 +1035,10 @@ static int
rip_pcblist(SYSCTL_HANDLER_ARGS)
{
int error, i, n;
- struct in_pcblist *il;
struct inpcb *inp, **inp_list;
inp_gen_t gencnt;
struct xinpgen xig;
+ struct epoch_tracker et;
/*
* The process of preparing the TCB list is too time-consuming and
@@ -1045,10 +1057,10 @@ rip_pcblist(SYSCTL_HANDLER_ARGS)
/*
* OK, now we're committed to doing something.
*/
- INP_INFO_RLOCK(&V_ripcbinfo);
+ INP_INFO_WLOCK(&V_ripcbinfo);
gencnt = V_ripcbinfo.ipi_gencnt;
n = V_ripcbinfo.ipi_count;
- INP_INFO_RUNLOCK(&V_ripcbinfo);
+ INP_INFO_WUNLOCK(&V_ripcbinfo);
xig.xig_len = sizeof xig;
xig.xig_count = n;
@@ -1058,12 +1070,11 @@ rip_pcblist(SYSCTL_HANDLER_ARGS)
if (error)
return (error);
- il = malloc(sizeof(struct in_pcblist) + n * sizeof(struct inpcb *), M_TEMP, M_WAITOK|M_ZERO_INVARIANTS);
- inp_list = il->il_inp_list;
+ inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
- INP_INFO_RLOCK(&V_ripcbinfo);
- for (inp = LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n;
- inp = LIST_NEXT(inp, inp_list)) {
+ INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
+ for (inp = CK_LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n;
+ inp = CK_LIST_NEXT(inp, inp_list)) {
INP_WLOCK(inp);
if (inp->inp_gencnt <= gencnt &&
cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
@@ -1072,7 +1083,7 @@ rip_pcblist(SYSCTL_HANDLER_ARGS)
}
INP_WUNLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_ripcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
n = i;
error = 0;
@@ -1088,24 +1099,31 @@ rip_pcblist(SYSCTL_HANDLER_ARGS)
} else
INP_RUNLOCK(inp);
}
- il->il_count = n;
- il->il_pcbinfo = &V_ripcbinfo;
- epoch_call(net_epoch_preempt, &il->il_epoch_ctx, in_pcblist_rele_rlocked);
+ INP_INFO_WLOCK(&V_ripcbinfo);
+ for (i = 0; i < n; i++) {
+ inp = inp_list[i];
+ INP_RLOCK(inp);
+ if (!in_pcbrele_rlocked(inp))
+ INP_RUNLOCK(inp);
+ }
+ INP_INFO_WUNLOCK(&V_ripcbinfo);
if (!error) {
+ struct epoch_tracker et;
/*
* Give the user an updated idea of our state. If the
* generation differs from what we told her before, she knows
* that something happened while we were processing this
* request, and it might be necessary to retry.
*/
- INP_INFO_RLOCK(&V_ripcbinfo);
+ INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
xig.xig_gen = V_ripcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_ripcbinfo.ipi_count;
- INP_INFO_RUNLOCK(&V_ripcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
+ free(inp_list, M_TEMP);
return (error);
}
diff --git a/freebsd/sys/netinet/sctp.h b/freebsd/sys/netinet/sctp.h
index 5a86f108..64fd5442 100644
--- a/freebsd/sys/netinet/sctp.h
+++ b/freebsd/sys/netinet/sctp.h
@@ -419,7 +419,7 @@ struct sctp_error_unresolv_addr {
struct sctp_error_unrecognized_chunk {
struct sctp_error_cause cause; /* code=SCTP_CAUSE_UNRECOG_CHUNK */
- struct sctp_chunkhdr ch;/* header from chunk in error */
+ struct sctp_chunkhdr ch; /* header from chunk in error */
} SCTP_PACKED;
struct sctp_error_no_user_data {
diff --git a/freebsd/sys/netinet/sctp_asconf.c b/freebsd/sys/netinet/sctp_asconf.c
index d2d990e1..c21e3251 100644
--- a/freebsd/sys/netinet/sctp_asconf.c
+++ b/freebsd/sys/netinet/sctp_asconf.c
@@ -279,6 +279,7 @@ sctp_asconf_del_remote_addrs_except(struct sctp_tcb *stcb, struct sockaddr *src)
/* not found */
return (-1);
}
+
/* delete all destination addresses except the source */
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
if (net != src_net) {
@@ -385,6 +386,7 @@ sctp_process_asconf_delete_ip(struct sockaddr *src,
aparam_length);
return (m_reply);
}
+
/* if deleting 0.0.0.0/::0, delete all addresses except src addr */
if (zero_address && SCTP_BASE_SYSCTL(sctp_nat_friendly)) {
result = sctp_asconf_del_remote_addrs_except(stcb, src);
@@ -403,6 +405,7 @@ sctp_process_asconf_delete_ip(struct sockaddr *src,
}
return (m_reply);
}
+
/* delete the address */
result = sctp_del_remote_addr(stcb, sa);
/*
@@ -618,6 +621,7 @@ sctp_handle_asconf(struct mbuf *m, unsigned int offset,
serial_num, asoc->asconf_seq_in + 1);
return;
}
+
/* it's the expected "next" sequence number, so process it */
asoc->asconf_seq_in = serial_num; /* update sequence */
/* get length of all the param's in the ASCONF */
@@ -642,6 +646,7 @@ sctp_handle_asconf(struct mbuf *m, unsigned int offset,
SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asconf_ack), ack);
}
}
+
m_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_ack_chunk), 0,
M_NOWAIT, 1, MT_DATA);
if (m_ack == NULL) {
@@ -976,6 +981,7 @@ sctp_assoc_immediate_retrans(struct sctp_tcb *stcb, struct sctp_nets *dstnet)
if (stcb->asoc.deleted_primary == NULL) {
return;
}
+
if (!TAILQ_EMPTY(&stcb->asoc.sent_queue)) {
SCTPDBG(SCTP_DEBUG_ASCONF1, "assoc_immediate_retrans: Deleted primary is ");
SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, &stcb->asoc.deleted_primary->ro._l_addr.sa);
@@ -1079,6 +1085,7 @@ sctp_path_check_and_react(struct sctp_tcb *stcb, struct sctp_ifa *newifa)
}
return;
}
+
/* Multiple local addresses exsist in the association. */
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
/* clear any cached route and source address */
@@ -1325,6 +1332,7 @@ sctp_asconf_queue_add(struct sctp_tcb *stcb, struct sctp_ifa *ifa,
if (stcb->asoc.asconf_supported == 0) {
return (-1);
}
+
/*
* if this is deleting the last address from the assoc, mark it as
* pending.
@@ -1345,6 +1353,7 @@ sctp_asconf_queue_add(struct sctp_tcb *stcb, struct sctp_ifa *ifa,
return (-1);
}
}
+
/* queue an asconf parameter */
status = sctp_asconf_queue_mgmt(stcb, ifa, type);
@@ -1366,6 +1375,7 @@ sctp_asconf_queue_add(struct sctp_tcb *stcb, struct sctp_ifa *ifa,
stcb->asoc.asconf_addr_del_pending = NULL;
}
}
+
if (pending_delete_queued) {
struct sctp_nets *net;
@@ -1390,6 +1400,7 @@ sctp_asconf_queue_add(struct sctp_tcb *stcb, struct sctp_ifa *ifa,
SCTP_FROM_SCTP_ASCONF,
__LINE__);
}
+
/* queue in an advisory set primary too */
(void)sctp_asconf_queue_mgmt(stcb, ifa, SCTP_SET_PRIM_ADDR);
/* let caller know we should send this out immediately */
@@ -1687,11 +1698,13 @@ sctp_handle_asconf_ack(struct mbuf *m, int offset,
serial_num, asoc->asconf_seq_out_acked + 1);
return;
}
+
if (serial_num == asoc->asconf_seq_out - 1) {
/* stop our timer */
sctp_timer_stop(SCTP_TIMER_TYPE_ASCONF, stcb->sctp_ep, stcb, net,
SCTP_FROM_SCTP_ASCONF + SCTP_LOC_5);
}
+
/* process the ASCONF-ACK contents */
ack_length = ntohs(cp->ch.chunk_length) -
sizeof(struct sctp_asconf_ack_chunk);
@@ -1780,7 +1793,7 @@ sctp_handle_asconf_ack(struct mbuf *m, int offset,
* at any given time
*/
if (last_error_id == 0)
- last_error_id--;/* set to "max" value */
+ last_error_id--; /* set to "max" value */
TAILQ_FOREACH_SAFE(aa, &stcb->asoc.asconf_queue, next, aa_next) {
if (aa->sent == 1) {
/*
@@ -1980,8 +1993,8 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
* sent when the state goes open.
*/
if (status == 0 &&
- ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED))) {
+ ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED))) {
#ifdef SCTP_TIMER_BASED_ASCONF
sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp,
stcb, stcb->asoc.primary_destination);
@@ -2060,6 +2073,7 @@ sctp_asconf_iterator_ep_end(struct sctp_inpcb *inp, void *ptr, uint32_t val SCTP
laddr->action = 0;
break;
}
+
}
} else if (l->action == SCTP_DEL_IP_ADDRESS) {
LIST_FOREACH_SAFE(laddr, &inp->sctp_addr_list, sctp_nxt_addr, nladdr) {
@@ -2093,6 +2107,7 @@ sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
if (ifa->vrf_id != stcb->asoc.vrf_id) {
continue;
}
+
/* Same checks again for assoc */
switch (ifa->address.sa.sa_family) {
#ifdef INET6
@@ -2229,8 +2244,8 @@ sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
* count of queued params. If in the non-open
* state, these get sent when the assoc goes open.
*/
- if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
if (status >= 0) {
num_queued++;
}
@@ -2283,6 +2298,7 @@ sctp_set_primary_ip_address_sa(struct sctp_tcb *stcb, struct sockaddr *sa)
/* Invalid address */
return (-1);
}
+
/* queue an ASCONF:SET_PRIM_ADDR to be sent */
if (!sctp_asconf_queue_add(stcb, ifa, SCTP_SET_PRIM_ADDR)) {
/* set primary queuing succeeded */
@@ -2290,8 +2306,8 @@ sctp_set_primary_ip_address_sa(struct sctp_tcb *stcb, struct sockaddr *sa)
"set_primary_ip_address_sa: queued on tcb=%p, ",
(void *)stcb);
SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
- if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
#ifdef SCTP_TIMER_BASED_ASCONF
sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
stcb->sctp_ep, stcb,
@@ -2361,11 +2377,13 @@ sctp_is_addr_pending(struct sctp_tcb *stcb, struct sctp_ifa *sctp_ifa)
SCTPDBG(SCTP_DEBUG_ASCONF1, "is_addr_pending: param length(%u) too short\n", param_length);
break;
}
+
aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(chk->data, offset, param_length, aparam_buf);
if (aph == NULL) {
SCTPDBG(SCTP_DEBUG_ASCONF1, "is_addr_pending: couldn't get entire param\n");
break;
}
+
ph = (struct sctp_paramhdr *)(aph + 1);
if (sctp_addr_match(ph, &sctp_ifa->address.sa) != 0) {
switch (param_type) {
@@ -2380,6 +2398,7 @@ sctp_is_addr_pending(struct sctp_tcb *stcb, struct sctp_ifa *sctp_ifa)
}
last_param_type = param_type;
}
+
offset += SCTP_SIZE32(param_length);
if (offset >= asconf_limit) {
/* no more data in the mbuf chain */
@@ -2463,6 +2482,7 @@ sctp_find_valid_localaddr(struct sctp_tcb *stcb, int addr_locked)
if (sctp_ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
continue;
}
+
sin6 = &sctp_ifa->address.sin6;
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
/*
@@ -2826,8 +2846,7 @@ sctp_process_initack_addresses(struct sctp_tcb *stcb, struct mbuf *m,
* out the ASCONF.
*/
if (status == 0 &&
- SCTP_GET_STATE(&stcb->asoc) ==
- SCTP_STATE_OPEN) {
+ SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) {
#ifdef SCTP_TIMER_BASED_ASCONF
sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
stcb->sctp_ep, stcb,
@@ -2838,6 +2857,7 @@ sctp_process_initack_addresses(struct sctp_tcb *stcb, struct mbuf *m,
}
}
}
+
next_addr:
/*
* Sanity check: Make sure the length isn't 0, otherwise
@@ -3372,6 +3392,7 @@ sctp_asconf_send_nat_state_update(struct sctp_tcb *stcb,
if (vrf == NULL) {
goto skip_rest;
}
+
SCTP_IPI_ADDR_RLOCK();
LIST_FOREACH(sctp_ifnp, &vrf->ifnlist, next_ifn) {
LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) {
diff --git a/freebsd/sys/netinet/sctp_asconf.h b/freebsd/sys/netinet/sctp_asconf.h
index 2a372205..581d504c 100644
--- a/freebsd/sys/netinet/sctp_asconf.h
+++ b/freebsd/sys/netinet/sctp_asconf.h
@@ -60,10 +60,10 @@ sctp_addr_mgmt_ep_sa(struct sctp_inpcb *, struct sockaddr *,
uint32_t, uint32_t, struct sctp_ifa *);
-extern int
+extern int
sctp_asconf_iterator_ep(struct sctp_inpcb *inp, void *ptr,
uint32_t val);
-extern void
+extern void
sctp_asconf_iterator_stcb(struct sctp_inpcb *inp,
struct sctp_tcb *stcb,
void *ptr, uint32_t type);
diff --git a/freebsd/sys/netinet/sctp_auth.c b/freebsd/sys/netinet/sctp_auth.c
index d8fbcf6e..0fc076e1 100644
--- a/freebsd/sys/netinet/sctp_auth.c
+++ b/freebsd/sys/netinet/sctp_auth.c
@@ -1311,6 +1311,7 @@ sctp_auth_setactivekey(struct sctp_tcb *stcb, uint16_t keyid)
/* can't reactivate a deactivated key with other refcounts */
return (-1);
}
+
/* set the (new) active key */
stcb->asoc.authinfo.active_keyid = keyid;
/* reset the deactivated flag */
@@ -1365,6 +1366,7 @@ sctp_deact_sharedkey(struct sctp_tcb *stcb, uint16_t keyid)
sctp_ulp_notify(SCTP_NOTIFY_AUTH_FREE_KEY, stcb, keyid, 0,
SCTP_SO_LOCKED);
}
+
/* mark the key as deactivated */
skey->deactivated = 1;
@@ -1506,6 +1508,8 @@ sctp_auth_get_cookie_params(struct sctp_tcb *stcb, struct mbuf *m,
if (p_random != NULL) {
keylen = sizeof(*p_random) + random_len;
memcpy(new_key->key, p_random, keylen);
+ } else {
+ keylen = 0;
}
/* append in the AUTH chunks */
if (chunks != NULL) {
@@ -1582,6 +1586,7 @@ sctp_fill_hmac_digest_m(struct mbuf *m, uint32_t auth_offset,
"Assoc Key");
#endif
}
+
/* set in the active key id */
auth->shared_key_id = htons(keyid);
@@ -1769,6 +1774,7 @@ sctp_notify_authentication(struct sctp_tcb *stcb, uint32_t indication,
/* If the socket is gone we are out of here */
return;
}
+
if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_AUTHEVNT))
/* event not enabled */
return;
@@ -1929,6 +1935,7 @@ sctp_validate_init_auth_params(struct mbuf *m, int offset, int limit)
if (num_chunks)
got_chklist = 1;
}
+
offset += SCTP_SIZE32(plen);
if (offset >= limit) {
break;
@@ -2023,6 +2030,7 @@ sctp_initialize_auth_params(struct sctp_inpcb *inp, struct sctp_tcb *stcb)
new_key->key[keylen++] = i;
}
}
+
/* append in the HMACs */
ph = (struct sctp_paramhdr *)(new_key->key + keylen);
ph->param_type = htons(SCTP_HMAC_LIST);
diff --git a/freebsd/sys/netinet/sctp_auth.h b/freebsd/sys/netinet/sctp_auth.h
index 66990c30..44126e3e 100644
--- a/freebsd/sys/netinet/sctp_auth.h
+++ b/freebsd/sys/netinet/sctp_auth.h
@@ -87,7 +87,7 @@ typedef struct sctp_hmaclist {
typedef struct sctp_authinformation {
sctp_key_t *random; /* local random key (concatenated) */
uint32_t random_len; /* local random number length for param */
- sctp_key_t *peer_random;/* peer's random key (concatenated) */
+ sctp_key_t *peer_random; /* peer's random key (concatenated) */
sctp_key_t *assoc_key; /* cached concatenated send key */
sctp_key_t *recv_key; /* cached concatenated recv key */
uint16_t active_keyid; /* active send keyid */
@@ -114,13 +114,13 @@ extern sctp_auth_chklist_t *sctp_copy_chunklist(sctp_auth_chklist_t *chklist);
extern int sctp_auth_add_chunk(uint8_t chunk, sctp_auth_chklist_t *list);
extern int sctp_auth_delete_chunk(uint8_t chunk, sctp_auth_chklist_t *list);
extern size_t sctp_auth_get_chklist_size(const sctp_auth_chklist_t *list);
-extern int
+extern int
sctp_serialize_auth_chunks(const sctp_auth_chklist_t *list,
uint8_t *ptr);
-extern int
+extern int
sctp_pack_auth_chunks(const sctp_auth_chklist_t *list,
uint8_t *ptr);
-extern int
+extern int
sctp_unpack_auth_chunks(const uint8_t *ptr, uint8_t num_chunks,
sctp_auth_chklist_t *list);
@@ -141,16 +141,16 @@ extern void sctp_free_sharedkey(sctp_sharedkey_t *skey);
extern sctp_sharedkey_t *
sctp_find_sharedkey(struct sctp_keyhead *shared_keys,
uint16_t key_id);
-extern int
+extern int
sctp_insert_sharedkey(struct sctp_keyhead *shared_keys,
sctp_sharedkey_t *new_skey);
-extern int
+extern int
sctp_copy_skeylist(const struct sctp_keyhead *src,
struct sctp_keyhead *dest);
/* ref counts on shared keys, by key id */
extern void sctp_auth_key_acquire(struct sctp_tcb *stcb, uint16_t keyid);
-extern void
+extern void
sctp_auth_key_release(struct sctp_tcb *stcb, uint16_t keyid,
int so_locked);
@@ -161,11 +161,11 @@ extern void sctp_free_hmaclist(sctp_hmaclist_t *list);
extern int sctp_auth_add_hmacid(sctp_hmaclist_t *list, uint16_t hmac_id);
extern sctp_hmaclist_t *sctp_copy_hmaclist(sctp_hmaclist_t *list);
extern sctp_hmaclist_t *sctp_default_supported_hmaclist(void);
-extern uint16_t
+extern uint16_t
sctp_negotiate_hmacid(sctp_hmaclist_t *peer,
sctp_hmaclist_t *local);
extern int sctp_serialize_hmaclist(sctp_hmaclist_t *list, uint8_t *ptr);
-extern int
+extern int
sctp_verify_hmac_param(struct sctp_auth_hmac_algo *hmacs,
uint32_t num_hmacs);
@@ -175,22 +175,22 @@ extern void sctp_free_authinfo(sctp_authinfo_t *authinfo);
/* keyed-HMAC functions */
extern uint32_t sctp_get_auth_chunk_len(uint16_t hmac_algo);
extern uint32_t sctp_get_hmac_digest_len(uint16_t hmac_algo);
-extern uint32_t
+extern uint32_t
sctp_hmac(uint16_t hmac_algo, uint8_t *key, uint32_t keylen,
uint8_t *text, uint32_t textlen, uint8_t *digest);
-extern int
+extern int
sctp_verify_hmac(uint16_t hmac_algo, uint8_t *key, uint32_t keylen,
uint8_t *text, uint32_t textlen, uint8_t *digest, uint32_t digestlen);
-extern uint32_t
+extern uint32_t
sctp_compute_hmac(uint16_t hmac_algo, sctp_key_t *key,
uint8_t *text, uint32_t textlen, uint8_t *digest);
extern int sctp_auth_is_supported_hmac(sctp_hmaclist_t *list, uint16_t id);
/* mbuf versions */
-extern uint32_t
+extern uint32_t
sctp_hmac_m(uint16_t hmac_algo, uint8_t *key, uint32_t keylen,
struct mbuf *m, uint32_t m_offset, uint8_t *digest, uint32_t trailer);
-extern uint32_t
+extern uint32_t
sctp_compute_hmac_m(uint16_t hmac_algo, sctp_key_t *key,
struct mbuf *m, uint32_t m_offset, uint8_t *digest);
@@ -206,26 +206,26 @@ extern int sctp_auth_setactivekey_ep(struct sctp_inpcb *inp, uint16_t keyid);
extern int sctp_deact_sharedkey(struct sctp_tcb *stcb, uint16_t keyid);
extern int sctp_deact_sharedkey_ep(struct sctp_inpcb *inp, uint16_t keyid);
-extern void
+extern void
sctp_auth_get_cookie_params(struct sctp_tcb *stcb, struct mbuf *m,
uint32_t offset, uint32_t length);
-extern void
+extern void
sctp_fill_hmac_digest_m(struct mbuf *m, uint32_t auth_offset,
struct sctp_auth_chunk *auth, struct sctp_tcb *stcb, uint16_t key_id);
extern struct mbuf *
sctp_add_auth_chunk(struct mbuf *m, struct mbuf **m_end,
struct sctp_auth_chunk **auth_ret, uint32_t *offset,
struct sctp_tcb *stcb, uint8_t chunk);
-extern int
+extern int
sctp_handle_auth(struct sctp_tcb *stcb, struct sctp_auth_chunk *ch,
struct mbuf *m, uint32_t offset);
-extern void
+extern void
sctp_notify_authentication(struct sctp_tcb *stcb,
uint32_t indication, uint16_t keyid, uint16_t alt_keyid, int so_locked);
-extern int
+extern int
sctp_validate_init_auth_params(struct mbuf *m, int offset,
int limit);
-extern void
+extern void
sctp_initialize_auth_params(struct sctp_inpcb *inp,
struct sctp_tcb *stcb);
diff --git a/freebsd/sys/netinet/sctp_bsd_addr.c b/freebsd/sys/netinet/sctp_bsd_addr.c
index 94c23bff..0f0ddd89 100644
--- a/freebsd/sys/netinet/sctp_bsd_addr.c
+++ b/freebsd/sys/netinet/sctp_bsd_addr.c
@@ -307,10 +307,12 @@ sctp_addr_change(struct ifaddr *ifa, int cmd)
SCTP_BASE_VAR(first_time) = 1;
sctp_init_ifns_for_vrf(SCTP_DEFAULT_VRFID);
}
+
if ((cmd != RTM_ADD) && (cmd != RTM_DELETE)) {
/* don't know what to do with this */
return;
}
+
if (ifa->ifa_addr == NULL) {
return;
}
diff --git a/freebsd/sys/netinet/sctp_cc_functions.c b/freebsd/sys/netinet/sctp_cc_functions.c
index e8d6a354..1163cb91 100644
--- a/freebsd/sys/netinet/sctp_cc_functions.c
+++ b/freebsd/sys/netinet/sctp_cc_functions.c
@@ -133,6 +133,7 @@ sctp_cwnd_update_after_fr(struct sctp_tcb *stcb,
t_ucwnd_sbw = 1;
}
}
+
/*-
* CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) &&
* (net->fast_retran_loss_recovery == 0)))
@@ -1121,6 +1122,7 @@ sctp_cwnd_update_after_ecn_echo_common(struct sctp_tcb *stcb, struct sctp_nets *
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
}
+
}
SCTP_STAT_INCR(sctps_ecnereducedcwnd);
} else {
@@ -1320,7 +1322,7 @@ sctp_cwnd_update_rtcc_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *ne
static
-void
+void
sctp_cwnd_update_rtcc_tsn_acknowledged(struct sctp_nets *net,
struct sctp_tmit_chunk *tp1)
{
@@ -1937,6 +1939,7 @@ measure_achieved_throughput(struct sctp_nets *net)
net->cc_mod.htcp_ca.lasttime = now;
return;
}
+
net->cc_mod.htcp_ca.bytecount += net->net_ack;
if ((net->cc_mod.htcp_ca.bytecount >= net->cwnd - (((net->cc_mod.htcp_ca.alpha >> 7) ? (net->cc_mod.htcp_ca.alpha >> 7) : 1) * net->mtu)) &&
(now - net->cc_mod.htcp_ca.lasttime >= net->cc_mod.htcp_ca.minRTT) &&
@@ -1973,6 +1976,7 @@ htcp_beta_update(struct htcp *ca, uint32_t minRTT, uint32_t maxRTT)
return;
}
}
+
if (ca->modeswitch && minRTT > (uint32_t)MSEC_TO_TICKS(10) && maxRTT) {
ca->beta = (minRTT << 7) / maxRTT;
if (ca->beta < BETA_MIN)
@@ -1996,6 +2000,7 @@ htcp_alpha_update(struct htcp *ca)
diff -= hz;
factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / hz)) / hz;
}
+
if (use_rtt_scaling && minRTT) {
uint32_t scale = (hz << 3) / (10 * minRTT);
@@ -2005,6 +2010,7 @@ htcp_alpha_update(struct htcp *ca)
if (!factor)
factor = 1;
}
+
ca->alpha = 2 * factor * ((1 << 7) - ca->beta);
if (!ca->alpha)
ca->alpha = ALPHA_BASE;
@@ -2059,12 +2065,14 @@ htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net)
sctp_log_cwnd(stcb, net, net->mtu,
SCTP_CWND_LOG_FROM_SS);
}
+
} else {
net->cwnd += net->net_ack;
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
sctp_log_cwnd(stcb, net, net->net_ack,
SCTP_CWND_LOG_FROM_SS);
}
+
}
sctp_enforce_cwnd_limit(&stcb->asoc, net);
} else {
diff --git a/freebsd/sys/netinet/sctp_constants.h b/freebsd/sys/netinet/sctp_constants.h
index 018cd282..d07381d5 100644
--- a/freebsd/sys/netinet/sctp_constants.h
+++ b/freebsd/sys/netinet/sctp_constants.h
@@ -470,10 +470,14 @@ __FBSDID("$FreeBSD$");
#define SCTP_STATE_IN_ACCEPT_QUEUE 0x1000
#define SCTP_STATE_MASK 0x007f
-#define SCTP_GET_STATE(asoc) ((asoc)->state & SCTP_STATE_MASK)
-#define SCTP_SET_STATE(asoc, newstate) ((asoc)->state = ((asoc)->state & ~SCTP_STATE_MASK) | newstate)
-#define SCTP_CLEAR_SUBSTATE(asoc, substate) ((asoc)->state &= ~substate)
-#define SCTP_ADD_SUBSTATE(asoc, substate) ((asoc)->state |= substate)
+#define SCTP_GET_STATE(_stcb) \
+ ((_stcb)->asoc.state & SCTP_STATE_MASK)
+#define SCTP_SET_STATE(_stcb, _state) \
+ sctp_set_state(_stcb, _state)
+#define SCTP_CLEAR_SUBSTATE(_stcb, _substate) \
+ (_stcb)->asoc.state &= ~(_substate)
+#define SCTP_ADD_SUBSTATE(_stcb, _substate) \
+ sctp_add_substate(_stcb, _substate)
/* SCTP reachability state for each address */
#define SCTP_ADDR_REACHABLE 0x001
diff --git a/freebsd/sys/netinet/sctp_dtrace_define.h b/freebsd/sys/netinet/sctp_dtrace_define.h
index 53451d20..ad7c8526 100644
--- a/freebsd/sys/netinet/sctp_dtrace_define.h
+++ b/freebsd/sys/netinet/sctp_dtrace_define.h
@@ -40,7 +40,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/sdt.h>
-SDT_PROVIDER_DEFINE(sctp);
+SDT_PROVIDER_DECLARE(sctp);
/********************************************************/
/* Cwnd probe - tracks changes in the congestion window on a netp */
diff --git a/freebsd/sys/netinet/sctp_header.h b/freebsd/sys/netinet/sctp_header.h
index 685ed78a..8c4137a5 100644
--- a/freebsd/sys/netinet/sctp_header.h
+++ b/freebsd/sys/netinet/sctp_header.h
@@ -48,7 +48,7 @@ __FBSDID("$FreeBSD$");
* Parameter structures
*/
struct sctp_ipv4addr_param {
- struct sctp_paramhdr ph;/* type=SCTP_IPV4_PARAM_TYPE, len=8 */
+ struct sctp_paramhdr ph; /* type=SCTP_IPV4_PARAM_TYPE, len=8 */
uint32_t addr; /* IPV4 address */
} SCTP_PACKED;
@@ -56,20 +56,20 @@ struct sctp_ipv4addr_param {
struct sctp_ipv6addr_param {
- struct sctp_paramhdr ph;/* type=SCTP_IPV6_PARAM_TYPE, len=20 */
+ struct sctp_paramhdr ph; /* type=SCTP_IPV6_PARAM_TYPE, len=20 */
uint8_t addr[SCTP_V6_ADDR_BYTES]; /* IPV6 address */
} SCTP_PACKED;
/* Cookie Preservative */
struct sctp_cookie_perserve_param {
- struct sctp_paramhdr ph;/* type=SCTP_COOKIE_PRESERVE, len=8 */
+ struct sctp_paramhdr ph; /* type=SCTP_COOKIE_PRESERVE, len=8 */
uint32_t time; /* time in ms to extend cookie */
} SCTP_PACKED;
#define SCTP_ARRAY_MIN_LEN 1
/* Host Name Address */
struct sctp_host_name_param {
- struct sctp_paramhdr ph;/* type=SCTP_HOSTNAME_ADDRESS */
+ struct sctp_paramhdr ph; /* type=SCTP_HOSTNAME_ADDRESS */
char name[SCTP_ARRAY_MIN_LEN]; /* host name */
} SCTP_PACKED;
@@ -80,7 +80,7 @@ struct sctp_host_name_param {
#define SCTP_MAX_ADDR_PARAMS_SIZE 12
/* supported address type */
struct sctp_supported_addr_param {
- struct sctp_paramhdr ph;/* type=SCTP_SUPPORTED_ADDRTYPE */
+ struct sctp_paramhdr ph; /* type=SCTP_SUPPORTED_ADDRTYPE */
uint16_t addr_type[2]; /* array of supported address types */
} SCTP_PACKED;
@@ -108,8 +108,8 @@ struct sctp_prsctp_supported_param {
/* draft-ietf-tsvwg-addip-sctp */
struct sctp_asconf_paramhdr { /* an ASCONF "parameter" */
- struct sctp_paramhdr ph;/* a SCTP parameter header */
- uint32_t correlation_id;/* correlation id for this param */
+ struct sctp_paramhdr ph; /* a SCTP parameter header */
+ uint32_t correlation_id; /* correlation id for this param */
} SCTP_PACKED;
struct sctp_asconf_addr_param { /* an ASCONF address parameter */
@@ -133,7 +133,7 @@ struct sctp_asconf_addrv4_param { /* an ASCONF address (v4) parameter */
#define SCTP_MAX_SUPPORTED_EXT 256
struct sctp_supported_chunk_types_param {
- struct sctp_paramhdr ph;/* type = 0x8008 len = x */
+ struct sctp_paramhdr ph; /* type = 0x8008 len = x */
uint8_t chunk_types[];
} SCTP_PACKED;
@@ -206,8 +206,8 @@ struct sctp_state_cookie { /* this is our definition... */
uint16_t peerport; /* port address of the peer in the INIT */
uint16_t myport; /* my port address used in the INIT */
- uint8_t ipv4_addr_legal;/* Are V4 addr legal? */
- uint8_t ipv6_addr_legal;/* Are V6 addr legal? */
+ uint8_t ipv4_addr_legal; /* Are V4 addr legal? */
+ uint8_t ipv6_addr_legal; /* Are V6 addr legal? */
uint8_t local_scope; /* IPv6 local scope flag */
uint8_t site_scope; /* IPv6 site scope flag */
@@ -512,17 +512,17 @@ struct sctp_stream_reset_resp_tsn {
/* Should we make the max be 32? */
#define SCTP_RANDOM_MAX_SIZE 256
struct sctp_auth_random {
- struct sctp_paramhdr ph;/* type = 0x8002 */
+ struct sctp_paramhdr ph; /* type = 0x8002 */
uint8_t random_data[];
} SCTP_PACKED;
struct sctp_auth_chunk_list {
- struct sctp_paramhdr ph;/* type = 0x8003 */
+ struct sctp_paramhdr ph; /* type = 0x8003 */
uint8_t chunk_types[];
} SCTP_PACKED;
struct sctp_auth_hmac_algo {
- struct sctp_paramhdr ph;/* type = 0x8004 */
+ struct sctp_paramhdr ph; /* type = 0x8004 */
uint16_t hmac_ids[];
} SCTP_PACKED;
diff --git a/freebsd/sys/netinet/sctp_indata.c b/freebsd/sys/netinet/sctp_indata.c
index 98b397a2..28e3f5b2 100644
--- a/freebsd/sys/netinet/sctp_indata.c
+++ b/freebsd/sys/netinet/sctp_indata.c
@@ -92,6 +92,7 @@ sctp_calc_rwnd(struct sctp_tcb *stcb, struct sctp_association *asoc)
if (stcb->sctp_socket == NULL) {
return (calc);
}
+
KASSERT(asoc->cnt_on_reasm_queue > 0 || asoc->size_on_reasm_queue == 0,
("size_on_reasm_queue is %u", asoc->size_on_reasm_queue));
KASSERT(asoc->cnt_on_all_streams > 0 || asoc->size_on_all_streams == 0,
@@ -117,6 +118,7 @@ sctp_calc_rwnd(struct sctp_tcb *stcb, struct sctp_association *asoc)
/* out of space */
return (calc);
}
+
/* what is the overhead of all these rwnd's */
calc = sctp_sbspace_sub(calc, stcb->asoc.my_rwnd_control_len);
/*
@@ -187,6 +189,7 @@ sctp_build_ctl_nchunk(struct sctp_inpcb *inp, struct sctp_sndrcvinfo *sinfo)
/* user does not want any ancillary data */
return (NULL);
}
+
len = 0;
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVRCVINFO)) {
len += CMSG_SPACE(sizeof(struct sctp_rcvinfo));
@@ -1046,6 +1049,7 @@ place_chunk:
SCTP_FROM_SCTP_INDATA + SCTP_LOC_5);
return;
}
+
}
if (inserted == 0) {
/* Its at the end */
@@ -2140,6 +2144,7 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc,
control = NULL;
goto finish_express_del;
}
+
/* Now will we need a chunk too? */
if ((chk_flags & SCTP_DATA_NOT_FRAG) != SCTP_DATA_NOT_FRAG) {
sctp_alloc_a_chunk(stcb, chk);
@@ -2570,7 +2575,7 @@ sctp_sack_check(struct sctp_tcb *stcb, int was_a_gap)
* Now we need to see if we need to queue a sack or just start the
* timer (if allowed).
*/
- if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_SENT) {
/*
* Ok special case, in SHUTDOWN-SENT case. here we maker
* sure SACK timer is off and instead send a SHUTDOWN and a
@@ -2927,7 +2932,7 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length,
(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_last_rcvd);
}
/* now service all of the reassm queue if needed */
- if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_SENT) {
/* Assure that we ack right away */
stcb->asoc.send_sack = 1;
}
@@ -3075,7 +3080,7 @@ sctp_process_segment_range(struct sctp_tcb *stcb, struct sctp_tmit_chunk **p_tp1
tp1->whoTo->net_ack += tp1->send_size;
if (tp1->snd_count < 2) {
/*-
- * True non-retransmited chunk
+ * True non-retransmitted chunk
*/
tp1->whoTo->net_ack2 += tp1->send_size;
@@ -3098,6 +3103,7 @@ sctp_process_segment_range(struct sctp_tcb *stcb, struct sctp_tmit_chunk **p_tp1
tp1->do_rtt = 0;
}
}
+
}
if (tp1->sent <= SCTP_DATAGRAM_RESEND) {
if (SCTP_TSN_GT(tp1->rec.data.tsn,
@@ -3363,6 +3369,7 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc,
continue;
}
}
+
}
if (SCTP_TSN_GT(tp1->rec.data.tsn, asoc->this_sack_highest_gap) &&
!(accum_moved && asoc->fast_retran_loss_recovery)) {
@@ -3598,6 +3605,7 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc,
tp1);
}
}
+
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_RWND_ENABLE) {
sctp_log_rwnd(SCTP_INCREASE_PEER_RWND,
asoc->peers_rwnd, tp1->send_size, SCTP_BASE_SYSCTL(sctp_peer_chunk_oh));
@@ -3679,6 +3687,7 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc,
tp1->whoTo->find_pseudo_cumack = 1;
tp1->whoTo->find_rtx_pseudo_cumack = 1;
}
+
} else { /* CMT is OFF */
#ifdef SCTP_FR_TO_ALTERNATE
@@ -3967,6 +3976,7 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack,
}
return;
}
+
/* First setup for CC stuff */
TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
if (SCTP_TSN_GT(cumack, net->cwr_window_tsn)) {
@@ -4048,7 +4058,7 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack,
tp1->whoTo->net_ack += tp1->send_size;
if (tp1->snd_count < 2) {
/*
- * True non-retransmited
+ * True non-retransmitted
* chunk
*/
tp1->whoTo->net_ack2 +=
@@ -4232,6 +4242,7 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack,
asoc->total_flight = 0;
asoc->total_flight_count = 0;
}
+
/* RWND update */
asoc->peers_rwnd = sctp_sbspace_sub(rwnd,
(uint32_t)(asoc->total_flight + (asoc->total_flight_count * SCTP_BASE_SYSCTL(sctp_peer_chunk_oh))));
@@ -4320,12 +4331,12 @@ again:
/* clean up */
if ((asoc->stream_queue_cnt == 1) &&
((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
- (asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED)) &&
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) &&
((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc))) {
- asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_PARTIAL_MSG_LEFT);
}
if (((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) &&
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) &&
(asoc->stream_queue_cnt == 1) &&
(asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
struct mbuf *op_err;
@@ -4341,12 +4352,11 @@ again:
(asoc->stream_queue_cnt == 0)) {
struct sctp_nets *netp;
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_SENT);
sctp_stop_timers_for_shutdown(stcb);
if (asoc->alternate) {
netp = asoc->alternate;
@@ -4358,13 +4368,12 @@ again:
stcb->sctp_ep, stcb, netp);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
stcb->sctp_ep, stcb, netp);
- } else if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ } else if ((SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED) &&
(asoc->stream_queue_cnt == 0)) {
struct sctp_nets *netp;
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_ACK_SENT);
sctp_stop_timers_for_shutdown(stcb);
if (asoc->alternate) {
netp = asoc->alternate;
@@ -4484,6 +4493,7 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup,
sctp_misc_ints(SCTP_SACK_LOG_NORMAL, cum_ack,
rwnd, stcb->asoc.last_acked_seq, stcb->asoc.peers_rwnd);
}
+
old_rwnd = stcb->asoc.peers_rwnd;
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
@@ -4555,6 +4565,7 @@ hopeless_peer:
/* acking something behind */
return;
}
+
/* update the Rwnd of the peer */
if (TAILQ_EMPTY(&asoc->sent_queue) &&
TAILQ_EMPTY(&asoc->send_queue) &&
@@ -4608,6 +4619,7 @@ hopeless_peer:
if (stcb->asoc.cc_functions.sctp_cwnd_prepare_net_for_sack) {
(*stcb->asoc.cc_functions.sctp_cwnd_prepare_net_for_sack) (stcb, net);
}
+
/*
* CMT: SFR algo (and HTNA) - this_sack_highest_newack has
* to be greater than the cumack. Also reset saw_newack to 0
@@ -4664,7 +4676,7 @@ hopeless_peer:
if (tp1->snd_count < 2) {
/*
- * True non-retransmited
+ * True non-retransmitted
* chunk
*/
tp1->whoTo->net_ack2 +=
@@ -4843,6 +4855,7 @@ hopeless_peer:
#endif
asoc->total_flight = 0;
}
+
/* sa_ignore NO_NULL_CHK */
if ((wake_him) && (stcb->sctp_socket)) {
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
@@ -4947,6 +4960,7 @@ hopeless_peer:
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
0, (void *)net, SCTP_SO_NOT_LOCKED);
}
+
if (net == stcb->asoc.primary_destination) {
if (stcb->asoc.alternate) {
/*
@@ -4957,6 +4971,7 @@ hopeless_peer:
stcb->asoc.alternate = NULL;
}
}
+
if (net->dest_state & SCTP_ADDR_PF) {
net->dest_state &= ~SCTP_ADDR_PF;
sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
@@ -4979,6 +4994,7 @@ hopeless_peer:
}
asoc->cc_functions.sctp_cwnd_update_after_sack(stcb, asoc, accum_moved, reneged_all, will_exit_fast_recovery);
}
+
if (TAILQ_EMPTY(&asoc->sent_queue)) {
/* nothing left in-flight */
TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
@@ -4992,6 +5008,7 @@ hopeless_peer:
asoc->total_flight = 0;
asoc->total_flight_count = 0;
}
+
/**********************************/
/* Now what about shutdown issues */
/**********************************/
@@ -5009,12 +5026,12 @@ hopeless_peer:
/* clean up */
if ((asoc->stream_queue_cnt == 1) &&
((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
- (asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED)) &&
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) &&
((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc))) {
- asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_PARTIAL_MSG_LEFT);
}
if (((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) &&
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) &&
(asoc->stream_queue_cnt == 1) &&
(asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
struct mbuf *op_err;
@@ -5030,12 +5047,11 @@ hopeless_peer:
(asoc->stream_queue_cnt == 0)) {
struct sctp_nets *netp;
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_SENT);
sctp_stop_timers_for_shutdown(stcb);
if (asoc->alternate) {
netp = asoc->alternate;
@@ -5048,13 +5064,12 @@ hopeless_peer:
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
stcb->sctp_ep, stcb, netp);
return;
- } else if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ } else if ((SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED) &&
(asoc->stream_queue_cnt == 0)) {
struct sctp_nets *netp;
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_ACK_SENT);
sctp_stop_timers_for_shutdown(stcb);
if (asoc->alternate) {
netp = asoc->alternate;
@@ -5126,6 +5141,7 @@ hopeless_peer:
if (asoc->peers_rwnd > old_rwnd) {
win_probe_recovery = 1;
}
+
/*
* Now we must setup so we have a timer up for anyone with
* outstanding data.
diff --git a/freebsd/sys/netinet/sctp_indata.h b/freebsd/sys/netinet/sctp_indata.h
index 10b18d0b..59ceac3a 100644
--- a/freebsd/sys/netinet/sctp_indata.h
+++ b/freebsd/sys/netinet/sctp_indata.h
@@ -99,8 +99,7 @@ void
sctp_handle_forward_tsn(struct sctp_tcb *,
struct sctp_forward_tsn_chunk *, int *, struct mbuf *, int);
-struct sctp_tmit_chunk *
- sctp_try_advance_peer_ack_point(struct sctp_tcb *, struct sctp_association *);
+struct sctp_tmit_chunk *sctp_try_advance_peer_ack_point(struct sctp_tcb *, struct sctp_association *);
void sctp_service_queues(struct sctp_tcb *, struct sctp_association *);
diff --git a/freebsd/sys/netinet/sctp_input.c b/freebsd/sys/netinet/sctp_input.c
index ee206551..c7e86e78 100644
--- a/freebsd/sys/netinet/sctp_input.c
+++ b/freebsd/sys/netinet/sctp_input.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#if defined(INET) || defined(INET6)
#include <netinet/udp.h>
#endif
+#include <netinet/in_kdtrace.h>
#include <sys/smp.h>
@@ -192,7 +193,7 @@ sctp_handle_init(struct mbuf *m, int iphlen, int offset,
goto outnow;
}
if ((stcb != NULL) &&
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_ACK_SENT)) {
SCTPDBG(SCTP_DEBUG_INPUT3, "sctp_handle_init: sending SHUTDOWN-ACK\n");
sctp_send_shutdown_ack(stcb, NULL);
sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED);
@@ -307,6 +308,7 @@ sctp_process_init(struct sctp_init_chunk *cp, struct sctp_tcb *stcb)
if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
sctp_log_cwnd(stcb, lnet, 0, SCTP_CWND_INITIALIZATION);
}
+
}
}
SCTP_TCB_SEND_LOCK(stcb);
@@ -493,6 +495,7 @@ sctp_process_init_ack(struct mbuf *m, int iphlen, int offset,
SCTP_FREE(param, SCTP_M_ASC_ADDR);
}
}
+
stcb->asoc.peer_hmac_id = sctp_negotiate_hmacid(stcb->asoc.peer_hmacs,
stcb->asoc.local_hmacs);
if (op_err) {
@@ -555,6 +558,7 @@ sctp_process_init_ack(struct mbuf *m, int iphlen, int offset,
}
return (retval);
}
+
return (0);
}
@@ -572,6 +576,7 @@ sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp,
/* Invalid length */
return;
}
+
memset(&store, 0, sizeof(store));
switch (cp->heartbeat.hb_info.addr_family) {
#ifdef INET
@@ -711,15 +716,15 @@ sctp_handle_nat_colliding_state(struct sctp_tcb *stcb)
*/
struct sctpasochead *head;
- if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
SCTP_INP_INFO_WLOCK();
SCTP_TCB_LOCK(stcb);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
}
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) {
/* generate a new vtag and send init */
LIST_REMOVE(stcb, sctp_asocs);
stcb->asoc.my_vtag = sctp_select_a_tag(stcb->sctp_ep, stcb->sctp_ep->sctp_lport, stcb->rport, 1);
@@ -733,15 +738,14 @@ sctp_handle_nat_colliding_state(struct sctp_tcb *stcb)
SCTP_INP_INFO_WUNLOCK();
return (1);
}
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED) {
/*
* treat like a case where the cookie expired i.e.: - dump
* current cookie. - generate a new vtag. - resend init.
*/
/* generate a new vtag and send init */
LIST_REMOVE(stcb, sctp_asocs);
- stcb->asoc.state &= ~SCTP_STATE_COOKIE_ECHOED;
- stcb->asoc.state |= SCTP_STATE_COOKIE_WAIT;
+ SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
sctp_stop_all_cookie_timers(stcb);
sctp_toss_old_cookies(stcb, &stcb->asoc);
stcb->asoc.my_vtag = sctp_select_a_tag(stcb->sctp_ep, stcb->sctp_ep->sctp_lport, stcb->rport, 1);
@@ -823,8 +827,8 @@ sctp_handle_abort(struct sctp_abort_chunk *abort,
sctp_abort_notification(stcb, 1, error, abort, SCTP_SO_NOT_LOCKED);
/* free the tcb */
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
- if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
#ifdef SCTP_ASOCLOG_OF_TSNS
@@ -838,7 +842,7 @@ sctp_handle_abort(struct sctp_abort_chunk *abort,
SCTP_TCB_LOCK(stcb);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
- stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_WAS_ABORTED);
(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
SCTP_FROM_SCTP_INPUT + SCTP_LOC_8);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
@@ -893,15 +897,15 @@ sctp_handle_shutdown(struct sctp_shutdown_chunk *cp,
if (stcb == NULL)
return;
asoc = &stcb->asoc;
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
return;
}
if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_shutdown_chunk)) {
/* Shutdown NOT the expected size */
return;
}
- old_state = SCTP_GET_STATE(asoc);
+ old_state = SCTP_GET_STATE(stcb);
sctp_update_acked(stcb, cp, abort_flag);
if (*abort_flag) {
return;
@@ -957,11 +961,10 @@ sctp_handle_shutdown(struct sctp_shutdown_chunk *cp,
}
/* goto SHUTDOWN_RECEIVED state to block new requests */
if (stcb->sctp_socket) {
- if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT)) {
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_RECEIVED);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT)) {
+ SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_RECEIVED);
/*
* notify upper layer that peer has initiated a
* shutdown
@@ -972,7 +975,7 @@ sctp_handle_shutdown(struct sctp_shutdown_chunk *cp,
(void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered);
}
}
- if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_SENT) {
/*
* stop the shutdown timer, since we WILL move to
* SHUTDOWN-ACK-SENT.
@@ -992,13 +995,12 @@ sctp_handle_shutdown(struct sctp_shutdown_chunk *cp,
/* no outstanding data to send, so move on... */
/* send SHUTDOWN-ACK */
/* move to SHUTDOWN-ACK-SENT state */
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
- if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT) {
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
+ if (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT) {
+ SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_ACK_SENT);
sctp_stop_timers_for_shutdown(stcb);
sctp_send_shutdown_ack(stcb, net);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK,
@@ -1027,15 +1029,15 @@ sctp_handle_shutdown_ack(struct sctp_shutdown_ack_chunk *cp SCTP_UNUSED,
asoc = &stcb->asoc;
/* process according to association state */
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
/* unexpected SHUTDOWN-ACK... do OOTB handling... */
sctp_send_shutdown_complete(stcb, net, 1);
SCTP_TCB_UNLOCK(stcb);
return;
}
- if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
/* unexpected SHUTDOWN-ACK... so ignore... */
SCTP_TCB_UNLOCK(stcb);
return;
@@ -1231,7 +1233,7 @@ sctp_handle_error(struct sctp_chunkhdr *ch,
* waiting.
*/
if ((cause_length >= sizeof(struct sctp_error_stale_cookie)) &&
- (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
struct sctp_error_stale_cookie *stale_cookie;
stale_cookie = (struct sctp_error_stale_cookie *)cause;
@@ -1264,8 +1266,7 @@ sctp_handle_error(struct sctp_chunkhdr *ch,
}
/* blast back to INIT state */
sctp_toss_old_cookies(stcb, &stcb->asoc);
- asoc->state &= ~SCTP_STATE_COOKIE_ECHOED;
- asoc->state |= SCTP_STATE_COOKIE_WAIT;
+ SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
sctp_stop_all_cookie_timers(stcb);
sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
}
@@ -1416,7 +1417,7 @@ sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset,
return (-1);
}
/* process according to association state... */
- switch (stcb->asoc.state & SCTP_STATE_MASK) {
+ switch (SCTP_GET_STATE(stcb)) {
case SCTP_STATE_COOKIE_WAIT:
/* this is the expected state for this chunk */
/* process the INIT-ACK parameters */
@@ -1442,7 +1443,7 @@ sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset,
}
/* update our state */
SCTPDBG(SCTP_DEBUG_INPUT2, "moving to COOKIE-ECHOED state\n");
- SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_ECHOED);
+ SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_ECHOED);
/* reset the RTO calc */
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
@@ -1536,7 +1537,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
if (how_indx < sizeof(asoc->cookie_how)) {
asoc->cookie_how[how_indx] = 1;
}
- if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_ACK_SENT) {
/* SHUTDOWN came in after sending INIT-ACK */
sctp_send_shutdown_ack(stcb, stcb->asoc.primary_destination);
op_err = sctp_generate_cause(SCTP_CAUSE_COOKIE_IN_SHUTDOWN, "");
@@ -1605,7 +1606,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
return (NULL);
}
- switch (SCTP_GET_STATE(asoc)) {
+ switch (SCTP_GET_STATE(stcb)) {
case SCTP_STATE_COOKIE_WAIT:
case SCTP_STATE_COOKIE_ECHOED:
/*
@@ -1629,12 +1630,12 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
stcb, net,
SCTP_FROM_SCTP_INPUT + SCTP_LOC_14);
/* update current state */
- if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)
SCTP_STAT_INCR_COUNTER32(sctps_activeestab);
else
SCTP_STAT_INCR_COUNTER32(sctps_collisionestab);
- SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+ SCTP_SET_STATE(stcb, SCTP_STATE_OPEN);
if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
stcb->sctp_ep, stcb, asoc->primary_destination);
@@ -1718,6 +1719,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
asoc->cookie_how[how_indx] = 5;
return (stcb);
}
+
if (ntohl(initack_cp->init.initiate_tag) != asoc->my_vtag &&
ntohl(init_cp->init.initiate_tag) == asoc->peer_vtag &&
cookie->tie_tag_my_vtag == 0 &&
@@ -1733,7 +1735,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
* If nat support, and the below and stcb is established, send back
* a ABORT(colliding state) if we are established.
*/
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) &&
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) &&
(asoc->peer_supports_nat) &&
((ntohl(initack_cp->init.initiate_tag) == asoc->my_vtag) &&
((ntohl(init_cp->init.initiate_tag) != asoc->peer_vtag) ||
@@ -1838,8 +1840,8 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
asoc->cookie_how[how_indx] = 10;
return (NULL);
}
- if ((asoc->state & SCTP_STATE_COOKIE_WAIT) ||
- (asoc->state & SCTP_STATE_COOKIE_ECHOED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
*notification = SCTP_NOTIFY_ASSOC_UP;
if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
@@ -1867,17 +1869,17 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
- if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)
SCTP_STAT_INCR_COUNTER32(sctps_activeestab);
else
SCTP_STAT_INCR_COUNTER32(sctps_collisionestab);
SCTP_STAT_INCR_GAUGE32(sctps_currestab);
- } else if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
+ } else if (SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) {
SCTP_STAT_INCR_COUNTER32(sctps_restartestab);
} else {
SCTP_STAT_INCR_COUNTER32(sctps_collisionestab);
}
- SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+ SCTP_SET_STATE(stcb, SCTP_STATE_OPEN);
if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
stcb->sctp_ep, stcb, asoc->primary_destination);
@@ -1937,24 +1939,24 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
/* notify upper layer */
*notification = SCTP_NOTIFY_ASSOC_RESTART;
atomic_add_int(&stcb->asoc.refcnt, 1);
- if ((SCTP_GET_STATE(asoc) != SCTP_STATE_OPEN) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT)) {
+ if ((SCTP_GET_STATE(stcb) != SCTP_STATE_OPEN) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT)) {
SCTP_STAT_INCR_GAUGE32(sctps_currestab);
}
- if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) {
SCTP_STAT_INCR_GAUGE32(sctps_restartestab);
- } else if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) {
+ } else if (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) {
SCTP_STAT_INCR_GAUGE32(sctps_collisionestab);
}
if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
- SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+ SCTP_SET_STATE(stcb, SCTP_STATE_OPEN);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
stcb->sctp_ep, stcb, asoc->primary_destination);
- } else if (!(asoc->state & SCTP_STATE_SHUTDOWN_SENT)) {
+ } else if (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) {
/* move to OPEN state, if not in SHUTDOWN_SENT */
- SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+ SCTP_SET_STATE(stcb, SCTP_STATE_OPEN);
}
asoc->pre_open_streams =
ntohs(initack_cp->init.num_outbound_streams);
@@ -2293,6 +2295,7 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
stcb->asoc.authenticated = 1;
}
}
+
/*
* if we're doing ASCONFs, check to see if we have any new local
* addresses that need to get added to the peer (eg. addresses
@@ -2342,7 +2345,7 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
/* update current state */
SCTPDBG(SCTP_DEBUG_INPUT2, "moving to OPEN state\n");
- SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+ SCTP_SET_STATE(stcb, SCTP_STATE_OPEN);
if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
stcb->sctp_ep, stcb, asoc->primary_destination);
@@ -2590,6 +2593,7 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
(uint32_t)offset, cookie_offset, sig_offset);
return (NULL);
}
+
/*
* check the cookie timestamps to be sure it's not stale
*/
@@ -2710,6 +2714,7 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
}
}
}
+
cookie_len -= SCTP_SIGNATURE_SIZE;
if (*stcb == NULL) {
/* this is the "normal" case... get a new TCB */
@@ -2877,7 +2882,7 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
* the accept state waiting for the accept!
*/
if (*stcb) {
- (*stcb)->asoc.state |= SCTP_STATE_IN_ACCEPT_QUEUE;
+ SCTP_ADD_SUBSTATE(*stcb, SCTP_STATE_IN_ACCEPT_QUEUE);
}
sctp_move_pcb_and_assoc(*inp_p, inp, *stcb);
@@ -2912,6 +2917,7 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
(*stcb), 0, (void *)netl, SCTP_SO_NOT_LOCKED);
}
+
/*
* Pull it from the incomplete queue and wake the
* guy
@@ -2952,6 +2958,7 @@ sctp_handle_cookie_ack(struct sctp_cookie_ack_chunk *cp SCTP_UNUSED,
if ((stcb == NULL) || (net == NULL)) {
return;
}
+
asoc = &stcb->asoc;
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
@@ -2963,10 +2970,10 @@ sctp_handle_cookie_ack(struct sctp_cookie_ack_chunk *cp SCTP_UNUSED,
asoc->overall_error_count = 0;
sctp_stop_all_cookie_timers(stcb);
/* process according to association state */
- if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED) {
/* state change only needed when I am in right state */
SCTPDBG(SCTP_DEBUG_INPUT2, "moving to OPEN state\n");
- SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+ SCTP_SET_STATE(stcb, SCTP_STATE_OPEN);
sctp_start_net_timers(stcb);
if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
@@ -3018,6 +3025,7 @@ sctp_handle_cookie_ack(struct sctp_cookie_ack_chunk *cp SCTP_UNUSED,
*/
goto closed_socket;
}
+
sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep,
stcb, net);
@@ -3224,7 +3232,6 @@ static void
sctp_handle_shutdown_complete(struct sctp_shutdown_complete_chunk *cp SCTP_UNUSED,
struct sctp_tcb *stcb, struct sctp_nets *net)
{
- struct sctp_association *asoc;
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -3234,9 +3241,8 @@ sctp_handle_shutdown_complete(struct sctp_shutdown_complete_chunk *cp SCTP_UNUSE
if (stcb == NULL)
return;
- asoc = &stcb->asoc;
/* process according to association state */
- if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT) {
+ if (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT) {
/* unexpected SHUTDOWN-COMPLETE... so ignore... */
SCTPDBG(SCTP_DEBUG_INPUT2,
"sctp_handle_shutdown_complete: not in SCTP_STATE_SHUTDOWN_ACK_SENT --- ignore\n");
@@ -3248,8 +3254,8 @@ sctp_handle_shutdown_complete(struct sctp_shutdown_complete_chunk *cp SCTP_UNUSE
sctp_ulp_notify(SCTP_NOTIFY_ASSOC_DOWN, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
}
#ifdef INVARIANTS
- if (!TAILQ_EMPTY(&asoc->send_queue) ||
- !TAILQ_EMPTY(&asoc->sent_queue) ||
+ if (!TAILQ_EMPTY(&stcb->asoc.send_queue) ||
+ !TAILQ_EMPTY(&stcb->asoc.sent_queue) ||
sctp_is_there_unsent_data(stcb, SCTP_SO_NOT_LOCKED)) {
panic("Queues are not empty when handling SHUTDOWN-COMPLETE");
}
@@ -3796,6 +3802,7 @@ sctp_handle_stream_reset_response(struct sctp_tcb *stcb,
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) {
sctp_log_map(0, 7, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
}
+
stcb->asoc.tsn_last_delivered = stcb->asoc.cumulative_tsn = stcb->asoc.highest_tsn_inside_map;
stcb->asoc.mapping_array_base_tsn = ntohl(resp->senders_next_tsn);
memset(stcb->asoc.mapping_array, 0, stcb->asoc.mapping_array_size);
@@ -4395,6 +4402,7 @@ sctp_handle_packet_dropped(struct sctp_pktdrop_chunk *cp,
if (trunc_len > limit) {
trunc_len = limit;
}
+
/* now the chunks themselves */
while ((ch != NULL) && (chlen >= sizeof(struct sctp_chunkhdr))) {
desc.chunk_type = ch->chunk_type;
@@ -4654,6 +4662,7 @@ sctp_process_control(struct mbuf *m, int iphlen, int *offset, int length,
*/
SCTP_INP_DECR_REF(inp);
}
+
/* now go back and verify any auth chunk to be sure */
if (auth_skipped && (stcb != NULL)) {
struct sctp_auth_chunk *auth;
@@ -4748,11 +4757,12 @@ sctp_process_control(struct mbuf *m, int iphlen, int *offset, int length,
if (((ch->chunk_type == SCTP_SELECTIVE_ACK) ||
(ch->chunk_type == SCTP_NR_SELECTIVE_ACK) ||
(ch->chunk_type == SCTP_HEARTBEAT_REQUEST)) &&
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
/* implied cookie-ack.. we must have lost the ack */
sctp_handle_cookie_ack((struct sctp_cookie_ack_chunk *)ch, stcb,
*netp);
}
+
process_control_chunks:
while (IS_SCTP_CONTROL(ch)) {
/* validate chunk length */
@@ -4792,6 +4802,7 @@ process_control_chunks:
}
return (NULL);
}
+
num_chunks++;
/* Save off the last place we got a control from */
if (stcb != NULL) {
@@ -4811,7 +4822,6 @@ process_control_chunks:
/* check to see if this chunk required auth, but isn't */
if ((stcb != NULL) &&
- (stcb->asoc.auth_supported == 1) &&
sctp_auth_is_required_chunk(ch->chunk_type, stcb->asoc.local_auth_chunks) &&
!stcb->asoc.authenticated) {
/* "silently" ignore */
@@ -4941,7 +4951,7 @@ process_control_chunks:
break;
}
}
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_ACK_SENT) {
/*-
* If we have sent a shutdown-ack, we will pay no
* attention to a sack sent in to us since
@@ -5159,6 +5169,7 @@ process_control_chunks:
goto abend;
}
}
+
if (netp != NULL) {
struct sctp_tcb *locked_stcb;
@@ -5331,6 +5342,7 @@ process_control_chunks:
*offset = length;
return (stcb);
}
+
if (stcb != NULL) {
int abort_flag = 0;
@@ -5393,6 +5405,7 @@ process_control_chunks:
*offset = length;
return (stcb);
}
+
if ((ch != NULL) && (stcb != NULL) && (netp != NULL) && (*netp != NULL)) {
if (stcb->asoc.pktdrop_supported == 0) {
goto unknown_chunk;
@@ -5559,6 +5572,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
net->flowtype = mflowtype;
net->flowid = mflowid;
}
+ SCTP_PROBE5(receive, NULL, stcb, m, stcb, sh);
if ((inp != NULL) && (stcb != NULL)) {
sctp_send_packet_dropped(stcb, net, m, length, iphlen, 1);
sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_INPUT_ERROR, SCTP_SO_NOT_LOCKED);
@@ -5599,6 +5613,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
net->flowid = mflowid;
}
if (inp == NULL) {
+ SCTP_PROBE5(receive, NULL, stcb, m, stcb, sh);
SCTP_STAT_INCR(sctps_noport);
if (badport_bandlim(BANDLIM_SCTP_OOTB) < 0) {
goto out;
@@ -5647,6 +5662,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
*/
SCTP_TCB_UNLOCK(stcb);
stcb = NULL;
+ SCTP_PROBE5(receive, NULL, stcb, m, stcb, sh);
snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
msg);
@@ -5655,6 +5671,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
vrf_id, port);
goto out;
}
+
}
if (IS_SCTP_CONTROL(ch)) {
/* process the control portion of the SCTP packet */
@@ -5700,14 +5717,15 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
* chunks
*/
if ((stcb != NULL) &&
- (stcb->asoc.auth_supported == 1) &&
sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.local_auth_chunks)) {
/* "silently" ignore */
+ SCTP_PROBE5(receive, NULL, stcb, m, stcb, sh);
SCTP_STAT_INCR(sctps_recvauthmissing);
goto out;
}
if (stcb == NULL) {
/* out of the blue DATA chunk */
+ SCTP_PROBE5(receive, NULL, NULL, m, NULL, sh);
snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
msg);
@@ -5718,11 +5736,13 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
}
if (stcb->asoc.my_vtag != ntohl(sh->v_tag)) {
/* v_tag mismatch! */
+ SCTP_PROBE5(receive, NULL, stcb, m, stcb, sh);
SCTP_STAT_INCR(sctps_badvtag);
goto out;
}
}
+ SCTP_PROBE5(receive, NULL, stcb, m, stcb, sh);
if (stcb == NULL) {
/*
* no valid TCB for this packet, or we found it's a bad
@@ -5731,6 +5751,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
*/
goto out;
}
+
/*
* DATA chunk processing
*/
@@ -5742,7 +5763,6 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
*/
if ((length > offset) &&
(stcb != NULL) &&
- (stcb->asoc.auth_supported == 1) &&
sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.local_auth_chunks) &&
!stcb->asoc.authenticated) {
/* "silently" ignore */
@@ -5759,7 +5779,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
* not get here unless we really did have a tag, so we don't
* abort if this happens, just dump the chunk silently.
*/
- switch (SCTP_GET_STATE(&stcb->asoc)) {
+ switch (SCTP_GET_STATE(stcb)) {
case SCTP_STATE_COOKIE_ECHOED:
/*
* we consider data with valid tags in this state
@@ -5810,6 +5830,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
* process_data
*/
}
+
/* take care of ecn */
if ((data_processed == 1) &&
(stcb->asoc.ecn_supported == 1) &&
@@ -5817,6 +5838,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
/* Yep, we need to add a ECNE */
sctp_send_ecn_echo(stcb, net, high_tsn);
}
+
if ((data_processed == 0) && (fwd_tsn_seen)) {
int was_a_gap;
uint32_t highest_tsn;
diff --git a/freebsd/sys/netinet/sctp_input.h b/freebsd/sys/netinet/sctp_input.h
index f393ad89..72908e11 100644
--- a/freebsd/sys/netinet/sctp_input.h
+++ b/freebsd/sys/netinet/sctp_input.h
@@ -52,7 +52,7 @@ struct sctp_stream_reset_request *
sctp_find_stream_reset(struct sctp_tcb *stcb, uint32_t seq,
struct sctp_tmit_chunk **bchk);
-void
+void
sctp_reset_in_stream(struct sctp_tcb *stcb, uint32_t number_entries,
uint16_t *list);
diff --git a/freebsd/sys/netinet/sctp_os_bsd.h b/freebsd/sys/netinet/sctp_os_bsd.h
index d8d9e6e8..abe8e2c9 100644
--- a/freebsd/sys/netinet/sctp_os_bsd.h
+++ b/freebsd/sys/netinet/sctp_os_bsd.h
@@ -445,7 +445,7 @@ sctp_get_mbuf_for_msg(unsigned int space_needed,
/*
* SCTP AUTH
*/
-#define SCTP_READ_RANDOM(buf, len) read_random(buf, len)
+#define SCTP_READ_RANDOM(buf, len) arc4rand(buf, len, 0)
/* map standard crypto API names */
#define SCTP_SHA1_CTX SHA1_CTX
diff --git a/freebsd/sys/netinet/sctp_output.c b/freebsd/sys/netinet/sctp_output.c
index bdef958c..8f0c8aa4 100644
--- a/freebsd/sys/netinet/sctp_output.c
+++ b/freebsd/sys/netinet/sctp_output.c
@@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
#endif
#include <netinet/udp_var.h>
#include <machine/in_cksum.h>
+#include <netinet/in_kdtrace.h>
@@ -2547,6 +2548,7 @@ once_again:
inp->next_addr_touse = NULL;
goto once_again;
}
+
inp->next_addr_touse = starting_point;
resettotop = 0;
once_again_too:
@@ -2554,6 +2556,7 @@ once_again_too:
inp->next_addr_touse = LIST_FIRST(&inp->sctp_addr_list);
resettotop = 1;
}
+
/* ok, what about an acceptable address in the inp */
for (laddr = inp->next_addr_touse; laddr;
laddr = LIST_NEXT(laddr, sctp_nxt_addr)) {
@@ -2576,6 +2579,7 @@ once_again_too:
inp->next_addr_touse = NULL;
goto once_again_too;
}
+
/*
* no address bound can be a source for the destination we are in
* trouble
@@ -3990,8 +3994,8 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
int so_locked
#endif
)
-/* nofragment_flag to tell if IP_DF should be set (IPv4 only) */
{
+/* nofragment_flag to tell if IP_DF should be set (IPv4 only) */
/**
* Given a mbuf chain (via SCTP_BUF_NEXT()) that holds a packet header
* WITH an SCTPHDR but no IP header, endpoint inp and sa structure:
@@ -4038,6 +4042,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
if ((auth != NULL) && (stcb != NULL)) {
sctp_fill_hmac_digest_m(m, auth_offset, auth, stcb, auth_keyid);
}
+
if (net) {
tos_value = net->dscp;
} else if (stcb) {
@@ -4249,6 +4254,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
SCTP_SOCKET_UNLOCK(so, 0);
}
#endif
+ SCTP_PROBE5(send, NULL, stcb, ip, stcb, sctphdr);
SCTP_IP_OUTPUT(ret, o_pak, ro, stcb, vrf_id);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
@@ -4394,7 +4400,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
} else {
ip6h->ip6_nxt = IPPROTO_SCTP;
}
- ip6h->ip6_plen = (uint16_t)(packet_length - sizeof(struct ip6_hdr));
+ ip6h->ip6_plen = htons((uint16_t)(packet_length - sizeof(struct ip6_hdr)));
ip6h->ip6_dst = sin6->sin6_addr;
/*
@@ -4552,6 +4558,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
prev_scope = sin6->sin6_scope_id;
prev_port = sin6->sin6_port;
}
+
if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
/* failed to prepend data, give up */
sctp_m_freem(m);
@@ -4581,6 +4588,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
sctp_packet_log(o_pak);
#endif
+ SCTP_PROBE5(send, NULL, stcb, ip6h, stcb, sctphdr);
SCTP_IP6_OUTPUT(ret, o_pak, (struct route_in6 *)ro, &ifp, stcb, vrf_id);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
@@ -4740,6 +4748,7 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
ali->indication = htonl(inp->sctp_ep.adaptation_layer_indicator);
chunk_len += parameter_len;
}
+
/* ECN parameter */
if (stcb->asoc.ecn_supported == 1) {
parameter_len = (uint16_t)sizeof(struct sctp_paramhdr);
@@ -4748,6 +4757,7 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
ph->param_length = htons(parameter_len);
chunk_len += parameter_len;
}
+
/* PR-SCTP supported parameter */
if (stcb->asoc.prsctp_supported == 1) {
parameter_len = (uint16_t)sizeof(struct sctp_paramhdr);
@@ -4756,6 +4766,7 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
ph->param_length = htons(parameter_len);
chunk_len += parameter_len;
}
+
/* Add NAT friendly parameter. */
if (SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly)) {
parameter_len = (uint16_t)sizeof(struct sctp_paramhdr);
@@ -4764,6 +4775,7 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
ph->param_length = htons(parameter_len);
chunk_len += parameter_len;
}
+
/* And now tell the peer which extensions we support */
num_ext = 0;
pr_supported = (struct sctp_supported_chunk_types_param *)(mtod(m, caddr_t)+chunk_len);
@@ -4854,6 +4866,7 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
chunk_len += parameter_len;
}
}
+
/* now any cookie time extensions */
if (stcb->asoc.cookie_preserve_req) {
struct sctp_cookie_perserve_param *cookie_preserve;
@@ -4871,6 +4884,7 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
stcb->asoc.cookie_preserve_req = 0;
chunk_len += parameter_len;
}
+
if (stcb->asoc.scope.ipv4_addr_legal || stcb->asoc.scope.ipv6_addr_legal) {
uint8_t i;
@@ -4899,6 +4913,7 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
padding_len = 4 - 2 * i;
chunk_len += parameter_len;
}
+
SCTP_BUF_LEN(m) = chunk_len;
/* now the addresses */
/*
@@ -5519,7 +5534,7 @@ sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
asoc = NULL;
}
if ((asoc != NULL) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT)) {
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_COOKIE_WAIT)) {
if (sctp_are_there_new_addresses(asoc, init_pkt, offset, src)) {
/*
* new addresses, out of here in non-cookie-wait
@@ -5822,9 +5837,9 @@ do_a_abort:
initack->ch.chunk_length = 0;
/* place in my tag */
if ((asoc != NULL) &&
- ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_INUSE) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED))) {
+ ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_INUSE) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED))) {
/* re-use the v-tags and init-seq here */
initack->init.initiate_tag = htonl(asoc->my_vtag);
initack->init.initial_tsn = htonl(asoc->init_seq_number);
@@ -5904,6 +5919,7 @@ do_a_abort:
ali->indication = htonl(inp->sctp_ep.adaptation_layer_indicator);
chunk_len += parameter_len;
}
+
/* ECN parameter */
if (((asoc != NULL) && (asoc->ecn_supported == 1)) ||
((asoc == NULL) && (inp->ecn_supported == 1))) {
@@ -5913,6 +5929,7 @@ do_a_abort:
ph->param_length = htons(parameter_len);
chunk_len += parameter_len;
}
+
/* PR-SCTP supported parameter */
if (((asoc != NULL) && (asoc->prsctp_supported == 1)) ||
((asoc == NULL) && (inp->prsctp_supported == 1))) {
@@ -5922,6 +5939,7 @@ do_a_abort:
ph->param_length = htons(parameter_len);
chunk_len += parameter_len;
}
+
/* Add NAT friendly parameter */
if (nat_friendly) {
parameter_len = (uint16_t)sizeof(struct sctp_paramhdr);
@@ -5930,6 +5948,7 @@ do_a_abort:
ph->param_length = htons(parameter_len);
chunk_len += parameter_len;
}
+
/* And now tell the peer which extensions we support */
num_ext = 0;
pr_supported = (struct sctp_supported_chunk_types_param *)(mtod(m, caddr_t)+chunk_len);
@@ -5973,6 +5992,7 @@ do_a_abort:
padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
chunk_len += parameter_len;
}
+
/* add authentication parameters */
if (((asoc != NULL) && (asoc->auth_supported == 1)) ||
((asoc == NULL) && (inp->auth_supported == 1))) {
@@ -6050,6 +6070,7 @@ do_a_abort:
SCTP_BUF_LEN(m) += padding_len;
padding_len = 0;
}
+
/* tack on the operational error if present */
if (op_err) {
parameter_len = 0;
@@ -6347,9 +6368,9 @@ sctp_msg_append(struct sctp_tcb *stcb,
}
strm = &stcb->asoc.strmout[srcv->sinfo_stream];
/* Now can we send this? */
- if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_SENT) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_SENT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
(stcb->asoc.state & SCTP_STATE_SHUTDOWN_PENDING)) {
/* got data while shutting down */
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
@@ -6682,18 +6703,17 @@ sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr,
* there is nothing queued to send, so I'm
* done...
*/
- if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
/*
* only send SHUTDOWN the first time
* through
*/
- if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_SENT);
sctp_stop_timers_for_shutdown(stcb);
sctp_send_shutdown(stcb, net);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb,
@@ -6714,13 +6734,13 @@ sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr,
* we will allow user data to be sent first
* and move to SHUTDOWN-PENDING
*/
- if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
- asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_PARTIAL_MSG_LEFT);
}
- asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_SHUTDOWN_PENDING);
if (TAILQ_EMPTY(&asoc->send_queue) &&
TAILQ_EMPTY(&asoc->sent_queue) &&
(asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
@@ -7433,6 +7453,7 @@ dont_do_it:
chk->last_mbuf = SCTP_BUF_NEXT(chk->last_mbuf);
}
}
+
if (to_move > length) {
/*- This should not happen either
* since we always lower to_move to the size
@@ -7839,7 +7860,7 @@ sctp_med_chunk_output(struct sctp_inpcb *inp,
*reason_code = 0;
auth_keyid = stcb->asoc.authinfo.active_keyid;
if ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
- (asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
(sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR))) {
eeor_mode = 1;
} else {
@@ -7970,6 +7991,7 @@ nothing_to_send:
*reason_code = 8;
return (0);
}
+
if (asoc->sctp_cmt_on_off > 0) {
/* get the last start point */
start_at = asoc->last_net_cmt_send_started;
@@ -8596,8 +8618,8 @@ again_one_more_time:
omtu = 0;
break;
}
- if ((((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) &&
+ if ((((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) &&
(skip_data_for_this_net == 0)) ||
(cookie)) {
TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) {
@@ -8616,6 +8638,7 @@ again_one_more_time:
/* Don't send the chunk on this net */
continue;
}
+
if (asoc->sctp_cmt_on_off == 0) {
if ((asoc->alternate) &&
(asoc->alternate != net) &&
@@ -8645,7 +8668,7 @@ again_one_more_time:
chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
}
if (SCTP_BASE_SYSCTL(sctp_enable_sack_immediately) &&
- ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) == SCTP_STATE_SHUTDOWN_PENDING)) {
+ (asoc->state & SCTP_STATE_SHUTDOWN_PENDING)) {
struct sctp_data_chunk *dchkh;
dchkh = mtod(chk->data, struct sctp_data_chunk *);
@@ -8879,6 +8902,7 @@ no_data_fill:
if (old_start_at)
goto again_one_more_time;
}
+
/*
* At the end there should be no NON timed chunks hanging on this
* queue.
@@ -9272,17 +9296,20 @@ sctp_send_asconf(struct sctp_tcb *stcb, struct sctp_nets *net, int addr_locked)
/* can't send a new one if there is one in flight already */
return;
}
+
/* compose an ASCONF chunk, maximum length is PMTU */
m_asconf = sctp_compose_asconf(stcb, &len, addr_locked);
if (m_asconf == NULL) {
return;
}
+
sctp_alloc_a_chunk(stcb, chk);
if (chk == NULL) {
/* no memory */
sctp_m_freem(m_asconf);
return;
}
+
chk->copy_by_ref = 0;
chk->rec.chunk_id.id = SCTP_ASCONF;
chk->rec.chunk_id.can_take_data = 0;
@@ -9353,6 +9380,7 @@ sctp_send_asconf_ack(struct sctp_tcb *stcb)
if (ack->data == NULL) {
continue;
}
+
/* copy the asconf_ack */
m_ack = SCTP_M_COPYM(ack->data, 0, M_COPYALL, M_NOWAIT);
if (m_ack == NULL) {
@@ -9541,8 +9569,8 @@ sctp_chunk_retransmission(struct sctp_inpcb *inp,
if (TAILQ_EMPTY(&asoc->sent_queue)) {
return (SCTP_RETRAN_DONE);
}
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT)) {
/* not yet open, resend the cookie and that is it */
return (1);
}
@@ -10241,6 +10269,7 @@ sctp_output(
SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
return (EINVAL);
}
+
if (inp->sctp_socket == NULL) {
SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
return (EINVAL);
@@ -11228,12 +11257,13 @@ sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
sctp_packet_log(o_pak);
}
#endif
+ SCTP_PROBE5(send, NULL, NULL, ip, NULL, shout);
SCTP_IP_OUTPUT(ret, o_pak, NULL, NULL, vrf_id);
break;
#endif
#ifdef INET6
case AF_INET6:
- ip6->ip6_plen = (uint16_t)(len - sizeof(struct ip6_hdr));
+ ip6->ip6_plen = htons((uint16_t)(len - sizeof(struct ip6_hdr)));
if (port) {
shout->checksum = sctp_calculate_cksum(mout, sizeof(struct ip6_hdr) + sizeof(struct udphdr));
SCTP_STAT_INCR(sctps_sendswcrc);
@@ -11250,6 +11280,7 @@ sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
sctp_packet_log(o_pak);
}
#endif
+ SCTP_PROBE5(send, NULL, NULL, ip6, NULL, shout);
SCTP_IP6_OUTPUT(ret, o_pak, NULL, NULL, NULL, vrf_id);
break;
#endif
@@ -11314,6 +11345,7 @@ sctp_send_hb(struct sctp_tcb *stcb, struct sctp_nets *net, int so_locked
SCTPDBG(SCTP_DEBUG_OUTPUT4, "Gak, can't get a chunk for hb\n");
return;
}
+
chk->copy_by_ref = 0;
chk->rec.chunk_id.id = SCTP_HEARTBEAT_REQUEST;
chk->rec.chunk_id.can_take_data = 1;
@@ -12332,6 +12364,7 @@ sctp_copy_one(struct sctp_stream_queue_pending *sp,
SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOBUFS);
return (ENOBUFS);
}
+
sp->tail_mbuf = m_last(sp->data);
return (0);
}
@@ -12348,6 +12381,7 @@ sctp_copy_it_in(struct sctp_tcb *stcb,
int user_marks_eor,
int *error)
{
+
/*-
* This routine must be very careful in its work. Protocol
* processing is up and running so care must be taken to spl...()
@@ -12360,9 +12394,9 @@ sctp_copy_it_in(struct sctp_tcb *stcb,
*error = 0;
/* Now can we send this? */
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_SENT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
(asoc->state & SCTP_STATE_SHUTDOWN_PENDING)) {
/* got data while shutting down */
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
@@ -12748,7 +12782,7 @@ sctp_lower_sosend(struct socket *so,
*/
queue_only = 1;
asoc = &stcb->asoc;
- SCTP_SET_STATE(asoc, SCTP_STATE_COOKIE_WAIT);
+ SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
(void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered);
/* initialize authentication params for the assoc */
@@ -12870,8 +12904,8 @@ sctp_lower_sosend(struct socket *so,
SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, error);
goto out_unlocked;
}
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
queue_only = 1;
}
/* we are now done with all control */
@@ -12879,9 +12913,9 @@ sctp_lower_sosend(struct socket *so,
sctp_m_freem(control);
control = NULL;
}
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_SENT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
(asoc->state & SCTP_STATE_SHUTDOWN_PENDING)) {
if (srcv->sinfo_flags & SCTP_ABORT) {
;
@@ -12903,8 +12937,8 @@ sctp_lower_sosend(struct socket *so,
int tot_demand, tot_out = 0, max_out;
SCTP_STAT_INCR(sctps_sends_with_abort);
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
/* It has to be up before we abort */
/* how big is the user initiated abort? */
SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
@@ -13014,6 +13048,7 @@ sctp_lower_sosend(struct socket *so,
error = EFAULT;
goto out_unlocked;
}
+
/* Unless E_EOR mode is on, we must make a send FIT in one call. */
if ((user_marks_eor == 0) &&
(sndlen > SCTP_SB_LIMIT_SND(stcb->sctp_socket))) {
@@ -13031,6 +13066,7 @@ sctp_lower_sosend(struct socket *so,
error = EINVAL;
goto out_unlocked;
}
+
if (user_marks_eor) {
local_add_more = min(SCTP_SB_LIMIT_SND(so), SCTP_BASE_SYSCTL(sctp_add_more_threshold));
} else {
@@ -13095,6 +13131,7 @@ sctp_lower_sosend(struct socket *so,
}
SOCKBUF_UNLOCK(&so->so_snd);
}
+
skip_preblock:
if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
goto out_unlocked;
@@ -13231,6 +13268,7 @@ skip_preblock:
if (srcv->sinfo_flags & SCTP_SACK_IMMEDIATELY) {
sp->sinfo_flags |= SCTP_SACK_IMMEDIATELY;
}
+
/* Did we reach EOR? */
if ((uio->uio_resid == 0) &&
((user_marks_eor == 0) ||
@@ -13279,12 +13317,12 @@ skip_preblock:
SCTP_TCB_LOCK(stcb);
hold_tcblock = 1;
}
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) {
/* a collision took us forward? */
queue_only = 0;
} else {
sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
- SCTP_SET_STATE(asoc, SCTP_STATE_COOKIE_WAIT);
+ SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
queue_only = 1;
}
}
@@ -13396,6 +13434,7 @@ skip_preblock:
SOCKBUF_UNLOCK(&so->so_snd);
goto out_unlocked;
}
+
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) {
sctp_log_block(SCTP_BLOCK_LOG_OUTOF_BLK,
asoc, stcb->asoc.total_output_queue_size);
@@ -13466,17 +13505,16 @@ dataless_eof:
goto abort_anyway;
}
/* there is nothing queued to send, so I'm done... */
- if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
struct sctp_nets *netp;
/* only send SHUTDOWN the first time through */
- if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_SENT);
sctp_stop_timers_for_shutdown(stcb);
if (stcb->asoc.alternate) {
netp = stcb->asoc.alternate;
@@ -13500,17 +13538,17 @@ dataless_eof:
* data to be sent first and move to
* SHUTDOWN-PENDING
*/
- if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
if (hold_tcblock == 0) {
SCTP_TCB_LOCK(stcb);
hold_tcblock = 1;
}
if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
- asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_PARTIAL_MSG_LEFT);
}
- asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_SHUTDOWN_PENDING);
if (TAILQ_EMPTY(&asoc->send_queue) &&
TAILQ_EMPTY(&asoc->sent_queue) &&
(asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
@@ -13551,12 +13589,12 @@ skip_out_eof:
SCTP_TCB_LOCK(stcb);
hold_tcblock = 1;
}
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) {
/* a collision took us forward? */
queue_only = 0;
} else {
sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
- SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_WAIT);
+ SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
queue_only = 1;
}
}
@@ -13761,6 +13799,7 @@ sctp_v6src_match_nexthop(struct sockaddr_in6 *src6, sctp_route_t *ro)
SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)src6);
return (0);
}
+
SCTPDBG(SCTP_DEBUG_OUTPUT2, "v6src_match_nexthop(), Prefix entry is ");
SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)src6);
diff --git a/freebsd/sys/netinet/sctp_output.h b/freebsd/sys/netinet/sctp_output.h
index e6222e3f..1b3d22d9 100644
--- a/freebsd/sys/netinet/sctp_output.h
+++ b/freebsd/sys/netinet/sctp_output.h
@@ -74,7 +74,7 @@ int
int
sctp_v4src_match_nexthop(struct sctp_ifa *sifa, sctp_route_t *ro);
-void
+void
sctp_send_initiate(struct sctp_inpcb *, struct sctp_tcb *, int
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
SCTP_UNUSED
@@ -117,7 +117,7 @@ void sctp_send_shutdown_ack(struct sctp_tcb *, struct sctp_nets *);
void sctp_send_shutdown_complete(struct sctp_tcb *, struct sctp_nets *, int);
-void
+void
sctp_send_shutdown_complete2(struct sockaddr *, struct sockaddr *,
struct sctphdr *,
uint8_t, uint32_t, uint16_t,
@@ -146,13 +146,13 @@ int
sctp_output(struct sctp_inpcb *, struct mbuf *, struct sockaddr *,
struct mbuf *, struct thread *, int);
-void
+void
sctp_chunk_output(struct sctp_inpcb *, struct sctp_tcb *, int, int
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
SCTP_UNUSED
#endif
);
-void
+void
sctp_send_abort_tcb(struct sctp_tcb *, struct mbuf *, int
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
SCTP_UNUSED
@@ -201,7 +201,7 @@ sctp_send_abort(struct mbuf *, int, struct sockaddr *, struct sockaddr *,
uint8_t, uint32_t, uint16_t,
uint32_t, uint16_t);
-void
+void
sctp_send_operr_to(struct sockaddr *, struct sockaddr *,
struct sctphdr *, uint32_t, struct mbuf *,
uint8_t, uint32_t, uint16_t,
diff --git a/freebsd/sys/netinet/sctp_pcb.c b/freebsd/sys/netinet/sctp_pcb.c
index cf993d64..782e5f1d 100644
--- a/freebsd/sys/netinet/sctp_pcb.c
+++ b/freebsd/sys/netinet/sctp_pcb.c
@@ -187,6 +187,7 @@ sctp_allocate_vrf(int vrf_id)
SCTP_FREE(vrf, SCTP_M_VRF);
return (NULL);
}
+
/* Add it to the hash table */
bucket = &SCTP_BASE_INFO(sctp_vrfhash)[(vrf_id & SCTP_BASE_INFO(hashvrfmark))];
LIST_INSERT_HEAD(bucket, vrf, next_vrf);
@@ -738,6 +739,7 @@ sctp_del_addr_from_vrf(uint32_t vrf_id, struct sockaddr *addr,
SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id);
goto out_now;
}
+
#ifdef SCTP_DEBUG
SCTPDBG(SCTP_DEBUG_PCB4, "vrf_id 0x%x: deleting address:", vrf_id);
SCTPDBG_ADDR(SCTP_DEBUG_PCB4, addr);
@@ -866,6 +868,7 @@ sctp_does_stcb_own_this_addr(struct sctp_tcb *stcb, struct sockaddr *to)
SCTP_IPI_ADDR_RUNLOCK();
return (0);
}
+
if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
if ((loopback_scope == 0) &&
@@ -1027,6 +1030,7 @@ sctp_tcb_special_locate(struct sctp_inpcb **inp_p, struct sockaddr *from,
if ((to == NULL) || (from == NULL)) {
return (NULL);
}
+
switch (to->sa_family) {
#ifdef INET
case AF_INET:
@@ -1389,6 +1393,7 @@ sctp_findassociation_ep_addr(struct sctp_inpcb **inp_p, struct sockaddr *remote,
if (locked_tcb) {
atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
}
+
SCTP_INP_WUNLOCK(inp);
SCTP_INP_INFO_RUNLOCK();
return (stcb);
@@ -2254,6 +2259,7 @@ sctp_findassociation_addr(struct mbuf *m, int offset,
return (stcb);
}
}
+
if (inp_p) {
stcb = sctp_findassociation_addr_sa(src, dst, inp_p, netp,
1, vrf_id);
@@ -2849,6 +2855,7 @@ sctp_inpcb_bind(struct socket *so, struct sockaddr *addr,
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
return (EINVAL);
}
+
sin = (struct sockaddr_in *)addr;
lport = sin->sin_port;
/*
@@ -3368,14 +3375,14 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
* was not closed. So go ahead and
* start it now.
*/
- asoc->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE;
+ SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_IN_ACCEPT_QUEUE);
sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, asoc, NULL);
}
SCTP_TCB_UNLOCK(asoc);
continue;
}
- if (((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_COOKIE_WAIT) ||
- (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_COOKIE_ECHOED)) &&
+ if (((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) &&
(asoc->asoc.total_output_queue_size == 0)) {
/*
* If we have data in queue, we don't want
@@ -3392,7 +3399,7 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
}
/* Disconnect the socket please */
asoc->sctp_socket = NULL;
- asoc->asoc.state |= SCTP_STATE_CLOSED_SOCKET;
+ SCTP_ADD_SUBSTATE(asoc, SCTP_STATE_CLOSED_SOCKET);
if ((asoc->asoc.size_on_reasm_queue > 0) ||
(asoc->asoc.control_pdapi) ||
(asoc->asoc.size_on_all_streams > 0) ||
@@ -3404,8 +3411,8 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_3;
sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
- if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
if (sctp_free_assoc(inp, asoc,
@@ -3419,20 +3426,19 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
if ((*asoc->asoc.ss_functions.sctp_ss_is_user_msgs_incomplete) (asoc, &asoc->asoc)) {
goto abort_anyway;
}
- if ((SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
- (SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
struct sctp_nets *netp;
/*
* there is nothing queued to send,
* so I send shutdown
*/
- if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- SCTP_SET_STATE(&asoc->asoc, SCTP_STATE_SHUTDOWN_SENT);
- SCTP_CLEAR_SUBSTATE(&asoc->asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
sctp_stop_timers_for_shutdown(asoc);
if (asoc->asoc.alternate) {
netp = asoc->asoc.alternate;
@@ -3448,11 +3454,11 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
}
} else {
/* mark into shutdown pending */
- asoc->asoc.state |= SCTP_STATE_SHUTDOWN_PENDING;
+ SCTP_ADD_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, asoc->sctp_ep, asoc,
asoc->asoc.primary_destination);
if ((*asoc->asoc.ss_functions.sctp_ss_is_user_msgs_incomplete) (asoc, &asoc->asoc)) {
- asoc->asoc.state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+ SCTP_ADD_SUBSTATE(asoc, SCTP_STATE_PARTIAL_MSG_LEFT);
}
if (TAILQ_EMPTY(&asoc->asoc.send_queue) &&
TAILQ_EMPTY(&asoc->asoc.sent_queue) &&
@@ -3464,8 +3470,8 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_5;
sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
- if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
if (sctp_free_assoc(inp, asoc,
@@ -3503,6 +3509,7 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
LIST_REMOVE(inp, sctp_hash);
inp->sctp_flags |= SCTP_PCB_FLAGS_UNBOUND;
}
+
/*
* If there is a timer running to kill us, forget it, since it may
* have a contest on the INP lock.. which would cause us to die ...
@@ -3512,7 +3519,7 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
SCTP_TCB_LOCK(asoc);
if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
if (asoc->asoc.state & SCTP_STATE_IN_ACCEPT_QUEUE) {
- asoc->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE;
+ SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_IN_ACCEPT_QUEUE);
sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, asoc, NULL);
}
cnt++;
@@ -3520,7 +3527,7 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
continue;
}
/* Free associations that are NOT killing us */
- if ((SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_COOKIE_WAIT) &&
+ if ((SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT) &&
((asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0)) {
struct mbuf *op_err;
@@ -3533,8 +3540,8 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
SCTP_TCB_UNLOCK(asoc);
continue;
}
- if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_FORCE,
@@ -3637,6 +3644,8 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
(void)sctp_m_free(ip_pcb->inp_options);
ip_pcb->inp_options = 0;
}
+
+
#ifdef INET6
if (ip_pcb->inp_vflag & INP_IPV6) {
struct in6pcb *in6p;
@@ -4797,7 +4806,7 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre
}
/* Now the read queue needs to be cleaned up (only once) */
if ((stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0) {
- stcb->asoc.state |= SCTP_STATE_ABOUT_TO_BE_FREED;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_ABOUT_TO_BE_FREED);
SCTP_INP_READ_LOCK(inp);
TAILQ_FOREACH(sq, &inp->read_queue, next) {
if (sq->stcb == stcb) {
@@ -4851,7 +4860,7 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre
if ((stcb->asoc.refcnt) ||
(inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
(inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
- stcb->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE;
+ SCTP_CLEAR_SUBSTATE(stcb, SCTP_STATE_IN_ACCEPT_QUEUE);
sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL);
}
SCTP_TCB_UNLOCK(stcb);
@@ -4864,6 +4873,7 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre
sctp_sorwakeup(inp, so);
sctp_sowwakeup(inp, so);
}
+
#ifdef SCTP_LOG_CLOSING
sctp_log_closing(inp, stcb, 9);
#endif
@@ -4922,6 +4932,7 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre
}
}
}
+
/*
* Make it invalid too, that way if its about to run it will abort
* and return.
@@ -4931,7 +4942,7 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre
atomic_add_int(&stcb->asoc.refcnt, -1);
}
if (stcb->asoc.refcnt) {
- stcb->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE;
+ SCTP_CLEAR_SUBSTATE(stcb, SCTP_STATE_IN_ACCEPT_QUEUE);
sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL);
if (from_inpcbfree == SCTP_NORMAL_PROC) {
SCTP_INP_INFO_WUNLOCK();
@@ -5339,6 +5350,7 @@ sctp_update_ep_vflag(struct sctp_inpcb *inp)
__func__);
continue;
}
+
if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
continue;
}
@@ -5752,6 +5764,7 @@ sctp_startup_mcore_threads(void)
i++;
}
}
+
/* Now start them all */
CPU_FOREACH(cpu) {
(void)kproc_create(sctp_mcore_thread,
@@ -6267,7 +6280,7 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
* assoc? straighten out locks.
*/
if (stcb_tmp) {
- if (SCTP_GET_STATE(&stcb_tmp->asoc) & SCTP_STATE_COOKIE_WAIT) {
+ if (SCTP_GET_STATE(stcb_tmp) == SCTP_STATE_COOKIE_WAIT) {
struct mbuf *op_err;
char msg[SCTP_DIAG_INFO_LEN];
@@ -6286,6 +6299,7 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
}
SCTP_TCB_UNLOCK(stcb_tmp);
}
+
if (stcb->asoc.state == 0) {
/* the assoc was freed? */
return (-12);
@@ -6366,7 +6380,7 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
* assoc? straighten out locks.
*/
if (stcb_tmp) {
- if (SCTP_GET_STATE(&stcb_tmp->asoc) & SCTP_STATE_COOKIE_WAIT) {
+ if (SCTP_GET_STATE(stcb_tmp) == SCTP_STATE_COOKIE_WAIT) {
struct mbuf *op_err;
char msg[SCTP_DIAG_INFO_LEN];
@@ -6708,6 +6722,8 @@ next_param:
if (p_random != NULL) {
keylen = sizeof(*p_random) + random_len;
memcpy(new_key->key, p_random, keylen);
+ } else {
+ keylen = 0;
}
/* append in the AUTH chunks */
if (chunks != NULL) {
@@ -7003,6 +7019,7 @@ sctp_drain_mbufs(struct sctp_tcb *stcb)
if (!fnd) {
asoc->highest_tsn_inside_map = asoc->mapping_array_base_tsn - 1;
}
+
/*
* Question, should we go through the delivery queue? The
* only reason things are on here is the app not reading OR
diff --git a/freebsd/sys/netinet/sctp_pcb.h b/freebsd/sys/netinet/sctp_pcb.h
index 3fc03399..5b41ae8a 100644
--- a/freebsd/sys/netinet/sctp_pcb.h
+++ b/freebsd/sys/netinet/sctp_pcb.h
@@ -363,7 +363,7 @@ struct sctp_inpcb {
union {
struct inpcb inp;
char align[(sizeof(struct in6pcb) + SCTP_ALIGNM1) &
- ~SCTP_ALIGNM1];
+ ~SCTP_ALIGNM1];
} ip_inp;
@@ -389,7 +389,7 @@ struct sctp_inpcb {
uint64_t sctp_features; /* Feature flags */
uint32_t sctp_flags; /* INP state flag set */
uint32_t sctp_mobility_features; /* Mobility Feature flags */
- struct sctp_pcb sctp_ep;/* SCTP ep data */
+ struct sctp_pcb sctp_ep; /* SCTP ep data */
/* head of the hash of all associations */
struct sctpasochead *sctp_tcbhash;
u_long sctp_hashmark;
@@ -492,8 +492,7 @@ int SCTP6_ARE_ADDR_EQUAL(struct sockaddr_in6 *a, struct sockaddr_in6 *b);
void sctp_fill_pcbinfo(struct sctp_pcbinfo *);
-struct sctp_ifn *
- sctp_find_ifn(void *ifn, uint32_t ifn_index);
+struct sctp_ifn *sctp_find_ifn(void *ifn, uint32_t ifn_index);
struct sctp_vrf *sctp_allocate_vrf(int vrfid);
struct sctp_vrf *sctp_find_vrf(uint32_t vrfid);
@@ -524,7 +523,7 @@ void sctp_free_ifn(struct sctp_ifn *sctp_ifnp);
void sctp_free_ifa(struct sctp_ifa *sctp_ifap);
-void
+void
sctp_del_addr_from_vrf(uint32_t vrfid, struct sockaddr *addr,
uint32_t ifn_index, const char *if_name);
@@ -534,7 +533,7 @@ struct sctp_nets *sctp_findnet(struct sctp_tcb *, struct sockaddr *);
struct sctp_inpcb *sctp_pcb_findep(struct sockaddr *, int, int, uint32_t);
-int
+int
sctp_inpcb_bind(struct socket *, struct sockaddr *,
struct sctp_ifa *, struct thread *);
@@ -563,8 +562,7 @@ sctp_findassociation_ep_addr(struct sctp_inpcb **,
struct sockaddr *, struct sctp_nets **, struct sockaddr *,
struct sctp_tcb *);
-struct sctp_tcb *
- sctp_findasoc_ep_asocid_locked(struct sctp_inpcb *inp, sctp_assoc_t asoc_id, int want_lock);
+struct sctp_tcb *sctp_findasoc_ep_asocid_locked(struct sctp_inpcb *inp, sctp_assoc_t asoc_id, int want_lock);
struct sctp_tcb *
sctp_findassociation_ep_asocid(struct sctp_inpcb *,
diff --git a/freebsd/sys/netinet/sctp_peeloff.c b/freebsd/sys/netinet/sctp_peeloff.c
index ad96b88c..14a7c381 100644
--- a/freebsd/sys/netinet/sctp_peeloff.c
+++ b/freebsd/sys/netinet/sctp_peeloff.c
@@ -76,7 +76,7 @@ sctp_can_peel_off(struct socket *head, sctp_assoc_t assoc_id)
SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_PEELOFF, ENOENT);
return (ENOENT);
}
- state = SCTP_GET_STATE((&stcb->asoc));
+ state = SCTP_GET_STATE(stcb);
if ((state == SCTP_STATE_EMPTY) ||
(state == SCTP_STATE_INUSE)) {
SCTP_TCB_UNLOCK(stcb);
@@ -105,13 +105,15 @@ sctp_do_peeloff(struct socket *head, struct socket *so, sctp_assoc_t assoc_id)
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
return (ENOTCONN);
}
- state = SCTP_GET_STATE((&stcb->asoc));
+
+ state = SCTP_GET_STATE(stcb);
if ((state == SCTP_STATE_EMPTY) ||
(state == SCTP_STATE_INUSE)) {
SCTP_TCB_UNLOCK(stcb);
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
return (ENOTCONN);
}
+
n_inp = (struct sctp_inpcb *)so->so_pcb;
n_inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE |
SCTP_PCB_FLAGS_CONNECTED |
diff --git a/freebsd/sys/netinet/sctp_structs.h b/freebsd/sys/netinet/sctp_structs.h
index d60705b4..c4eafc26 100644
--- a/freebsd/sys/netinet/sctp_structs.h
+++ b/freebsd/sys/netinet/sctp_structs.h
@@ -105,7 +105,7 @@ TAILQ_HEAD(sctp_resethead, sctp_stream_reset_list);
#define SCTP_ASOC_ANY_STATE 0x00000000
typedef void (*asoc_func) (struct sctp_inpcb *, struct sctp_tcb *, void *ptr,
- uint32_t val);
+ uint32_t val);
typedef int (*inp_func) (struct sctp_inpcb *, void *ptr, uint32_t val);
typedef void (*end_func) (void *ptr, uint32_t val);
@@ -144,7 +144,7 @@ struct sctp_iterator {
asoc_func function_assoc; /* per assoc function */
inp_func function_inp; /* per endpoint function */
inp_func function_inp_end; /* end INP function */
- end_func function_atend;/* iterator completion function */
+ end_func function_atend; /* iterator completion function */
void *pointer; /* pointer for apply func to use */
uint32_t val; /* value for apply func to use */
uint32_t pcb_flags; /* endpoint flags being checked */
@@ -231,7 +231,7 @@ struct rtcc_cc {
uint64_t bw_tot_time; /* The total time since sending began */
uint64_t new_tot_time; /* temp holding the new value */
uint64_t bw_bytes_at_last_rttc; /* What bw_bytes was at last rtt calc */
- uint32_t cwnd_at_bw_set;/* Cwnd at last bw saved - lbw */
+ uint32_t cwnd_at_bw_set; /* Cwnd at last bw saved - lbw */
uint32_t vol_reduce; /* cnt of voluntary reductions */
uint16_t steady_step; /* The number required to be in steady state */
uint16_t step_cnt; /* The current number */
@@ -240,7 +240,8 @@ struct rtcc_cc {
uint8_t use_dccc_ecn; /* Flag to enable DCCC ECN */
uint8_t tls_needs_set; /* Flag to indicate we need to set tls 0 or 1
* means set at send 2 not */
- uint8_t last_step_state;/* Last state if steady state stepdown is on */
+ uint8_t last_step_state; /* Last state if steady state stepdown
+ * is on */
uint8_t rtt_set_this_sack; /* Flag saying this sack had RTT calc
* on it */
uint8_t last_inst_ind; /* Last saved inst indication */
@@ -331,8 +332,8 @@ struct sctp_nets {
uint8_t dscp;
struct timeval start_time; /* time when this net was created */
- uint32_t marked_retrans;/* number or DATA chunks marked for timer
- * based retransmissions */
+ uint32_t marked_retrans; /* number or DATA chunks marked for
+ * timer based retransmissions */
uint32_t marked_fastretrans;
uint32_t heart_beat_delay; /* Heart Beat delay in ms */
@@ -706,28 +707,28 @@ struct sctp_nonpad_sndrcvinfo {
struct sctp_cc_functions {
void (*sctp_set_initial_cc_param) (struct sctp_tcb *stcb, struct sctp_nets *net);
void (*sctp_cwnd_update_after_sack) (struct sctp_tcb *stcb,
- struct sctp_association *asoc,
- int accum_moved, int reneged_all, int will_exit);
+ struct sctp_association *asoc,
+ int accum_moved, int reneged_all, int will_exit);
void (*sctp_cwnd_update_exit_pf) (struct sctp_tcb *stcb, struct sctp_nets *net);
void (*sctp_cwnd_update_after_fr) (struct sctp_tcb *stcb,
- struct sctp_association *asoc);
+ struct sctp_association *asoc);
void (*sctp_cwnd_update_after_timeout) (struct sctp_tcb *stcb,
- struct sctp_nets *net);
+ struct sctp_nets *net);
void (*sctp_cwnd_update_after_ecn_echo) (struct sctp_tcb *stcb,
- struct sctp_nets *net, int in_window, int num_pkt_lost);
+ struct sctp_nets *net, int in_window, int num_pkt_lost);
void (*sctp_cwnd_update_after_packet_dropped) (struct sctp_tcb *stcb,
- struct sctp_nets *net, struct sctp_pktdrop_chunk *cp,
- uint32_t *bottle_bw, uint32_t *on_queue);
+ struct sctp_nets *net, struct sctp_pktdrop_chunk *cp,
+ uint32_t *bottle_bw, uint32_t *on_queue);
void (*sctp_cwnd_update_after_output) (struct sctp_tcb *stcb,
- struct sctp_nets *net, int burst_limit);
+ struct sctp_nets *net, int burst_limit);
void (*sctp_cwnd_update_packet_transmitted) (struct sctp_tcb *stcb,
- struct sctp_nets *net);
+ struct sctp_nets *net);
void (*sctp_cwnd_update_tsn_acknowledged) (struct sctp_nets *net,
- struct sctp_tmit_chunk *);
+ struct sctp_tmit_chunk *);
void (*sctp_cwnd_new_transmission_begins) (struct sctp_tcb *stcb,
- struct sctp_nets *net);
+ struct sctp_nets *net);
void (*sctp_cwnd_prepare_net_for_sack) (struct sctp_tcb *stcb,
- struct sctp_nets *net);
+ struct sctp_nets *net);
int (*sctp_cwnd_socket_option) (struct sctp_tcb *stcb, int set, struct sctp_cc_option *);
void (*sctp_rtt_calculated) (struct sctp_tcb *, struct sctp_nets *, struct timeval *);
};
@@ -738,25 +739,25 @@ struct sctp_cc_functions {
*/
struct sctp_ss_functions {
void (*sctp_ss_init) (struct sctp_tcb *stcb, struct sctp_association *asoc,
- int holds_lock);
+ int holds_lock);
void (*sctp_ss_clear) (struct sctp_tcb *stcb, struct sctp_association *asoc,
- int clear_values, int holds_lock);
+ int clear_values, int holds_lock);
void (*sctp_ss_init_stream) (struct sctp_tcb *stcb, struct sctp_stream_out *strq, struct sctp_stream_out *with_strq);
void (*sctp_ss_add_to_stream) (struct sctp_tcb *stcb, struct sctp_association *asoc,
- struct sctp_stream_out *strq, struct sctp_stream_queue_pending *sp, int holds_lock);
+ struct sctp_stream_out *strq, struct sctp_stream_queue_pending *sp, int holds_lock);
int (*sctp_ss_is_empty) (struct sctp_tcb *stcb, struct sctp_association *asoc);
void (*sctp_ss_remove_from_stream) (struct sctp_tcb *stcb, struct sctp_association *asoc,
- struct sctp_stream_out *strq, struct sctp_stream_queue_pending *sp, int holds_lock);
- struct sctp_stream_out *(*sctp_ss_select_stream) (struct sctp_tcb *stcb,
- struct sctp_nets *net, struct sctp_association *asoc);
+ struct sctp_stream_out *strq, struct sctp_stream_queue_pending *sp, int holds_lock);
+struct sctp_stream_out *(*sctp_ss_select_stream) (struct sctp_tcb *stcb,
+ struct sctp_nets *net, struct sctp_association *asoc);
void (*sctp_ss_scheduled) (struct sctp_tcb *stcb, struct sctp_nets *net,
- struct sctp_association *asoc, struct sctp_stream_out *strq, int moved_how_much);
+ struct sctp_association *asoc, struct sctp_stream_out *strq, int moved_how_much);
void (*sctp_ss_packet_done) (struct sctp_tcb *stcb, struct sctp_nets *net,
- struct sctp_association *asoc);
+ struct sctp_association *asoc);
int (*sctp_ss_get_value) (struct sctp_tcb *stcb, struct sctp_association *asoc,
- struct sctp_stream_out *strq, uint16_t *value);
+ struct sctp_stream_out *strq, uint16_t *value);
int (*sctp_ss_set_value) (struct sctp_tcb *stcb, struct sctp_association *asoc,
- struct sctp_stream_out *strq, uint16_t value);
+ struct sctp_stream_out *strq, uint16_t value);
int (*sctp_ss_is_user_msgs_incomplete) (struct sctp_tcb *stcb, struct sctp_association *asoc);
};
diff --git a/freebsd/sys/netinet/sctp_sysctl.c b/freebsd/sys/netinet/sctp_sysctl.c
index f1a8d1d5..a4343cbe 100644
--- a/freebsd/sys/netinet/sctp_sysctl.c
+++ b/freebsd/sys/netinet/sctp_sysctl.c
@@ -411,7 +411,7 @@ sctp_sysctl_handle_assoclist(SYSCTL_HANDLER_ARGS)
xinpcb.total_recvs = inp->total_recvs;
xinpcb.total_nospaces = inp->total_nospaces;
xinpcb.fragmentation_point = inp->sctp_frag_point;
- xinpcb.socket = inp->sctp_socket;
+ xinpcb.socket = (uintptr_t)inp->sctp_socket;
so = inp->sctp_socket;
if ((so == NULL) ||
(!SCTP_IS_LISTENING(inp)) ||
diff --git a/freebsd/sys/netinet/sctp_timer.c b/freebsd/sys/netinet/sctp_timer.c
index c0253840..86ed4d0d 100644
--- a/freebsd/sys/netinet/sctp_timer.c
+++ b/freebsd/sys/netinet/sctp_timer.c
@@ -651,6 +651,7 @@ start_again:
sctp_log_fr(chk->rec.data.tsn, chk->snd_count,
0, SCTP_FR_T3_MARKED);
}
+
if (chk->rec.data.chunk_was_revoked) {
/* deflate the cwnd */
chk->whoTo->cwnd -= chk->book_size;
@@ -717,6 +718,7 @@ start_again:
/* we did not subtract the same things? */
audit_tf = 1;
}
+
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) {
sctp_log_fr(tsnfirst, tsnlast, num_mk, SCTP_FR_T3_TIMEOUT);
}
@@ -791,6 +793,7 @@ start_again:
(uint32_t)(uintptr_t)chk->whoTo,
chk->rec.data.tsn);
}
+
sctp_flight_size_increase(chk);
sctp_total_flight_increase(stcb, chk);
}
@@ -911,6 +914,7 @@ sctp_t3rxt_timer(struct sctp_inpcb *inp,
(net->flight_size == 0)) {
(*stcb->asoc.cc_functions.sctp_cwnd_new_transmission_begins) (stcb, net);
}
+
/*
* setup the sat loss recovery that prevents satellite cwnd advance.
*/
@@ -939,6 +943,7 @@ sctp_t3rxt_timer(struct sctp_inpcb *inp,
RTFREE(net->ro.ro_rt);
net->ro.ro_rt = NULL;
}
+
/* Was it our primary? */
if ((stcb->asoc.primary_destination == net) && (alt != net)) {
/*
@@ -959,7 +964,7 @@ sctp_t3rxt_timer(struct sctp_inpcb *inp,
* Special case for cookie-echo'ed case, we don't do output but must
* await the COOKIE-ACK before retransmission
*/
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED) {
/*
* Here we just reset the timer and start again since we
* have not established the asoc
@@ -1001,7 +1006,7 @@ sctp_t1init_timer(struct sctp_inpcb *inp,
sctp_send_initiate(inp, stcb, SCTP_SO_NOT_LOCKED);
return (0);
}
- if (SCTP_GET_STATE((&stcb->asoc)) != SCTP_STATE_COOKIE_WAIT) {
+ if (SCTP_GET_STATE(stcb) != SCTP_STATE_COOKIE_WAIT) {
return (0);
}
if (sctp_threshold_management(inp, stcb, net,
@@ -1049,7 +1054,7 @@ sctp_cookie_timer(struct sctp_inpcb *inp,
}
}
if (cookie == NULL) {
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED) {
/* FOOBAR! */
struct mbuf *op_err;
@@ -1061,7 +1066,7 @@ sctp_cookie_timer(struct sctp_inpcb *inp,
#ifdef INVARIANTS
panic("Cookie timer expires in wrong state?");
#else
- SCTP_PRINTF("Strange in state %d not cookie-echoed yet c-e timer expires?\n", SCTP_GET_STATE(&stcb->asoc));
+ SCTP_PRINTF("Strange in state %d not cookie-echoed yet c-e timer expires?\n", SCTP_GET_STATE(stcb));
return (0);
#endif
}
@@ -1212,6 +1217,7 @@ sctp_asconf_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
asconf->whoTo = alt;
atomic_add_int(&alt->ref_count, 1);
}
+
/* See if an ECN Echo is also stranded */
TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
if ((chk->whoTo == net) &&
@@ -1554,16 +1560,15 @@ sctp_autoclose_timer(struct sctp_inpcb *inp,
* there is nothing queued to send, so I'm
* done...
*/
- if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) {
+ if (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) {
/* only send SHUTDOWN 1st time thru */
struct sctp_nets *netp;
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_SENT);
sctp_stop_timers_for_shutdown(stcb);
if (stcb->asoc.alternate) {
netp = stcb->asoc.alternate;
diff --git a/freebsd/sys/netinet/sctp_uio.h b/freebsd/sys/netinet/sctp_uio.h
index 8732219c..c91e0414 100644
--- a/freebsd/sys/netinet/sctp_uio.h
+++ b/freebsd/sys/netinet/sctp_uio.h
@@ -258,13 +258,14 @@ struct sctp_snd_all_completes {
/* for the endpoint */
/* The lower four bits is an enumeration of PR-SCTP policies */
-#define SCTP_PR_SCTP_NONE 0x0000/* Reliable transfer */
-#define SCTP_PR_SCTP_TTL 0x0001/* Time based PR-SCTP */
-#define SCTP_PR_SCTP_PRIO 0x0002/* Buffer based PR-SCTP */
+#define SCTP_PR_SCTP_NONE 0x0000 /* Reliable transfer */
+#define SCTP_PR_SCTP_TTL 0x0001 /* Time based PR-SCTP */
+#define SCTP_PR_SCTP_PRIO 0x0002 /* Buffer based PR-SCTP */
#define SCTP_PR_SCTP_BUF SCTP_PR_SCTP_PRIO /* For backwards compatibility */
-#define SCTP_PR_SCTP_RTX 0x0003/* Number of retransmissions based PR-SCTP */
+#define SCTP_PR_SCTP_RTX 0x0003 /* Number of retransmissions based
+ * PR-SCTP */
#define SCTP_PR_SCTP_MAX SCTP_PR_SCTP_RTX
-#define SCTP_PR_SCTP_ALL 0x000f/* Used for aggregated stats */
+#define SCTP_PR_SCTP_ALL 0x000f /* Used for aggregated stats */
#define PR_SCTP_POLICY(x) ((x) & 0x0f)
#define PR_SCTP_ENABLED(x) ((PR_SCTP_POLICY(x) != SCTP_PR_SCTP_NONE) && \
@@ -744,7 +745,7 @@ struct sctp_prstatus {
struct sctp_cwnd_args {
struct sctp_nets *net; /* network to *//* FIXME: LP64 issue */
- uint32_t cwnd_new_value;/* cwnd in k */
+ uint32_t cwnd_new_value; /* cwnd in k */
uint32_t pseudo_cumack;
uint16_t inflight; /* flightsize in k */
uint16_t cwnd_augment; /* increment to it */
@@ -758,9 +759,9 @@ struct sctp_blk_args {
uint32_t onsb; /* in 1k bytes */
uint32_t sndlen; /* len of send being attempted */
uint32_t peer_rwnd; /* rwnd of peer */
- uint16_t send_sent_qcnt;/* chnk cnt */
+ uint16_t send_sent_qcnt; /* chnk cnt */
uint16_t stream_qcnt; /* chnk cnt */
- uint16_t chunks_on_oque;/* chunks out */
+ uint16_t chunks_on_oque; /* chunks out */
uint16_t flight_size; /* flight size in k */
};
@@ -952,7 +953,7 @@ struct sctpstat {
uint32_t sctps_collisionestab;
uint32_t sctps_passiveestab; /* sctpStats 3 (Counter32) */
uint32_t sctps_aborted; /* sctpStats 4 (Counter32) */
- uint32_t sctps_shutdown;/* sctpStats 5 (Counter32) */
+ uint32_t sctps_shutdown; /* sctpStats 5 (Counter32) */
uint32_t sctps_outoftheblue; /* sctpStats 6 (Counter32) */
uint32_t sctps_checksumerrors; /* sctpStats 7 (Counter32) */
uint32_t sctps_outcontrolchunks; /* sctpStats 8 (Counter64) */
@@ -971,12 +972,12 @@ struct sctpstat {
uint32_t sctps_recvdatagrams; /* total input datagrams */
uint32_t sctps_recvpktwithdata; /* total packets that had data */
uint32_t sctps_recvsacks; /* total input SACK chunks */
- uint32_t sctps_recvdata;/* total input DATA chunks */
+ uint32_t sctps_recvdata; /* total input DATA chunks */
uint32_t sctps_recvdupdata; /* total input duplicate DATA chunks */
uint32_t sctps_recvheartbeat; /* total input HB chunks */
uint32_t sctps_recvheartbeatack; /* total input HB-ACK chunks */
- uint32_t sctps_recvecne;/* total input ECNE chunks */
- uint32_t sctps_recvauth;/* total input AUTH chunks */
+ uint32_t sctps_recvecne; /* total input ECNE chunks */
+ uint32_t sctps_recvauth; /* total input AUTH chunks */
uint32_t sctps_recvauthmissing; /* total input chunks missing AUTH */
uint32_t sctps_recvivalhmacid; /* total number of invalid HMAC ids
* received */
@@ -993,7 +994,7 @@ struct sctpstat {
/* output statistics: */
uint32_t sctps_sendpackets; /* total output packets */
uint32_t sctps_sendsacks; /* total output SACKs */
- uint32_t sctps_senddata;/* total output DATA chunks */
+ uint32_t sctps_senddata; /* total output DATA chunks */
uint32_t sctps_sendretransdata; /* total output retransmitted DATA
* chunks */
uint32_t sctps_sendfastretrans; /* total output fast retransmitted
@@ -1003,8 +1004,8 @@ struct sctpstat {
* chunk (u-del multi-fr
* algo). */
uint32_t sctps_sendheartbeat; /* total output HB chunks */
- uint32_t sctps_sendecne;/* total output ECNE chunks */
- uint32_t sctps_sendauth;/* total output AUTH chunks FIXME */
+ uint32_t sctps_sendecne; /* total output ECNE chunks */
+ uint32_t sctps_sendauth; /* total output AUTH chunks FIXME */
uint32_t sctps_senderrors; /* ip_output error counter */
uint32_t sctps_send_spare; /* formerly sctps_sendnocrc */
uint32_t sctps_sendswcrc;
@@ -1012,8 +1013,8 @@ struct sctpstat {
/* PCKDROPREP statistics: */
uint32_t sctps_pdrpfmbox; /* Packet drop from middle box */
uint32_t sctps_pdrpfehos; /* P-drop from end host */
- uint32_t sctps_pdrpmbda;/* P-drops with data */
- uint32_t sctps_pdrpmbct;/* P-drops, non-data, non-endhost */
+ uint32_t sctps_pdrpmbda; /* P-drops with data */
+ uint32_t sctps_pdrpmbct; /* P-drops, non-data, non-endhost */
uint32_t sctps_pdrpbwrpt; /* P-drop, non-endhost, bandwidth rep
* only */
uint32_t sctps_pdrpcrupt; /* P-drop, not enough for chunk header */
@@ -1024,16 +1025,17 @@ struct sctpstat {
uint32_t sctps_pdrpdnfnd; /* P-drop, attempt reverse TSN lookup */
uint32_t sctps_pdrpdiwnp; /* P-drop, e-host confirms zero-rwnd */
uint32_t sctps_pdrpdizrw; /* P-drop, midbox confirms no space */
- uint32_t sctps_pdrpbadd;/* P-drop, data did not match TSN */
- uint32_t sctps_pdrpmark;/* P-drop, TSN's marked for Fast Retran */
+ uint32_t sctps_pdrpbadd; /* P-drop, data did not match TSN */
+ uint32_t sctps_pdrpmark; /* P-drop, TSN's marked for Fast
+ * Retran */
/* timeouts */
uint32_t sctps_timoiterator; /* Number of iterator timers that
* fired */
- uint32_t sctps_timodata;/* Number of T3 data time outs */
+ uint32_t sctps_timodata; /* Number of T3 data time outs */
uint32_t sctps_timowindowprobe; /* Number of window probe (T3) timers
* that fired */
- uint32_t sctps_timoinit;/* Number of INIT timers that fired */
- uint32_t sctps_timosack;/* Number of sack timers that fired */
+ uint32_t sctps_timoinit; /* Number of INIT timers that fired */
+ uint32_t sctps_timosack; /* Number of sack timers that fired */
uint32_t sctps_timoshutdown; /* Number of shutdown timers that
* fired */
uint32_t sctps_timoheartbeat; /* Number of heartbeat timers that
@@ -1175,14 +1177,11 @@ struct xsctp_inpcb {
uint16_t local_port;
uint16_t qlen_old;
uint16_t maxqlen_old;
- void *socket;
+ uint16_t __spare16;
+ kvaddr_t socket;
uint32_t qlen;
uint32_t maxqlen;
-#if defined(__LP64__)
- uint32_t extra_padding[27]; /* future */
-#else
- uint32_t extra_padding[28]; /* future */
-#endif
+ uint32_t extra_padding[26]; /* future */
};
struct xsctp_tcb {
@@ -1192,7 +1191,7 @@ struct xsctp_tcb {
uint32_t state; /* sctpAssocEntry 8 */
uint32_t in_streams; /* sctpAssocEntry 9 */
uint32_t out_streams; /* sctpAssocEntry 10 */
- uint32_t max_nr_retrans;/* sctpAssocEntry 11 */
+ uint32_t max_nr_retrans; /* sctpAssocEntry 11 */
uint32_t primary_process; /* sctpAssocEntry 12 */
uint32_t T1_expireries; /* sctpAssocEntry 13 */
uint32_t T2_expireries; /* sctpAssocEntry 14 */
@@ -1305,37 +1304,37 @@ void sctp_freeladdrs(struct sockaddr *);
int sctp_opt_info(int, sctp_assoc_t, int, void *, socklen_t *);
/* deprecated */
-ssize_t
+ssize_t
sctp_sendmsg(int, const void *, size_t, const struct sockaddr *,
socklen_t, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t);
/* deprecated */
-ssize_t
+ssize_t
sctp_send(int, const void *, size_t,
const struct sctp_sndrcvinfo *, int);
/* deprecated */
-ssize_t
+ssize_t
sctp_sendx(int, const void *, size_t, struct sockaddr *,
int, struct sctp_sndrcvinfo *, int);
/* deprecated */
-ssize_t
+ssize_t
sctp_sendmsgx(int sd, const void *, size_t, struct sockaddr *,
int, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t);
sctp_assoc_t sctp_getassocid(int, struct sockaddr *);
/* deprecated */
-ssize_t
+ssize_t
sctp_recvmsg(int, void *, size_t, struct sockaddr *, socklen_t *,
struct sctp_sndrcvinfo *, int *);
-ssize_t
+ssize_t
sctp_sendv(int, const struct iovec *, int, struct sockaddr *,
int, void *, socklen_t, unsigned int, int);
-ssize_t
+ssize_t
sctp_recvv(int, const struct iovec *, int, struct sockaddr *,
socklen_t *, void *, socklen_t *, unsigned int *, int *);
diff --git a/freebsd/sys/netinet/sctp_usrreq.c b/freebsd/sys/netinet/sctp_usrreq.c
index 071d44c2..b519971c 100644
--- a/freebsd/sys/netinet/sctp_usrreq.c
+++ b/freebsd/sys/netinet/sctp_usrreq.c
@@ -391,6 +391,7 @@ sctp_getcred(SYSCTL_HANDLER_ARGS)
SCTP_INP_DECR_REF(inp);
goto cred_can_cont;
}
+
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
error = ENOENT;
goto out;
@@ -431,6 +432,7 @@ sctp_abort(struct socket *so)
if (inp == NULL) {
return;
}
+
sctp_must_try_again:
flags = inp->sctp_flags;
#ifdef SCTP_LOG_CLOSING
@@ -704,8 +706,7 @@ sctp_disconnect(struct socket *so)
if (((so->so_options & SO_LINGER) &&
(so->so_linger == 0)) ||
(so->so_rcv.sb_cc > 0)) {
- if (SCTP_GET_STATE(asoc) !=
- SCTP_STATE_COOKIE_WAIT) {
+ if (SCTP_GET_STATE(stcb) != SCTP_STATE_COOKIE_WAIT) {
/* Left with Data unread */
struct mbuf *op_err;
@@ -714,8 +715,8 @@ sctp_disconnect(struct socket *so)
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
}
SCTP_INP_RUNLOCK(inp);
- if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
@@ -730,17 +731,16 @@ sctp_disconnect(struct socket *so)
if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
goto abort_anyway;
}
- if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
/* only send SHUTDOWN 1st time thru */
struct sctp_nets *netp;
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_SENT);
sctp_stop_timers_for_shutdown(stcb);
if (stcb->asoc.alternate) {
netp = stcb->asoc.alternate;
@@ -773,11 +773,11 @@ sctp_disconnect(struct socket *so)
netp = stcb->asoc.primary_destination;
}
- asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_SHUTDOWN_PENDING);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
netp);
if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
- asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_PARTIAL_MSG_LEFT);
}
if (TAILQ_EMPTY(&asoc->send_queue) &&
TAILQ_EMPTY(&asoc->sent_queue) &&
@@ -789,8 +789,8 @@ sctp_disconnect(struct socket *so)
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_4;
sctp_send_abort_tcb(stcb, op_err, SCTP_SO_LOCKED);
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
- if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
SCTP_INP_RUNLOCK(inp);
@@ -921,9 +921,9 @@ sctp_shutdown(struct socket *so)
SCTP_INP_RUNLOCK(inp);
return (0);
}
- if ((SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_ECHOED) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_OPEN)) {
+ if ((SCTP_GET_STATE(stcb) != SCTP_STATE_COOKIE_WAIT) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_COOKIE_ECHOED) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_OPEN)) {
/*
* If we are not in or before ESTABLISHED, there is
* no protocol action required.
@@ -937,7 +937,7 @@ sctp_shutdown(struct socket *so)
} else {
netp = stcb->asoc.primary_destination;
}
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) &&
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) &&
TAILQ_EMPTY(&asoc->send_queue) &&
TAILQ_EMPTY(&asoc->sent_queue) &&
(asoc->stream_queue_cnt == 0)) {
@@ -946,8 +946,7 @@ sctp_shutdown(struct socket *so)
}
/* there is nothing queued to send, so I'm done... */
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_SENT);
sctp_stop_timers_for_shutdown(stcb);
sctp_send_shutdown(stcb, netp);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
@@ -957,9 +956,9 @@ sctp_shutdown(struct socket *so)
* We still got (or just got) data to send, so set
* SHUTDOWN_PENDING.
*/
- SCTP_ADD_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_SHUTDOWN_PENDING);
if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
- SCTP_ADD_SUBSTATE(asoc, SCTP_STATE_PARTIAL_MSG_LEFT);
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_PARTIAL_MSG_LEFT);
}
if (TAILQ_EMPTY(&asoc->send_queue) &&
TAILQ_EMPTY(&asoc->sent_queue) &&
@@ -1369,11 +1368,13 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
return (EADDRINUSE);
}
+
if ((inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) &&
(sctp_is_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE))) {
SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
return (EINVAL);
}
+
if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
SCTP_INP_RLOCK(inp);
stcb = LIST_FIRST(&inp->sctp_asoc_list);
@@ -1438,6 +1439,7 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
goto out_now;
}
}
+
/* FIX ME: do we want to pass in a vrf on the connect call? */
vrf_id = inp->def_vrf_id;
@@ -1457,7 +1459,7 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
/* Set the connected flag so we can queue data */
soisconnecting(so);
}
- SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_WAIT);
+ SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
/* move to second address */
switch (sa->sa_family) {
#ifdef INET
@@ -1549,6 +1551,7 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize,
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
return (EINVAL);
}
+
inp = (struct sctp_inpcb *)so->so_pcb;
if (inp == NULL) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
@@ -2393,6 +2396,7 @@ flags_out:
break;
}
}
+
if (stcb != NULL) {
/* Applies to the specific association */
paddrp->spp_flags = 0;
@@ -3262,6 +3266,7 @@ flags_out:
break;
}
}
+
if (stcb != NULL) {
if (net != NULL) {
thlds->spt_pathmaxrxt = net->failure_threshold;
@@ -3374,6 +3379,7 @@ flags_out:
break;
}
}
+
if (stcb != NULL) {
if (net) {
encaps->sue_port = net->port;
@@ -4252,6 +4258,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
if (sctp_auth_add_chunk(sauth->sauth_chunk, inp->sctp_ep.local_auth_chunks)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
+ } else {
+ inp->auth_supported = 1;
}
SCTP_INP_WUNLOCK(inp);
break;
@@ -4397,6 +4405,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
error = EINVAL;
break;
}
+
hmaclist = sctp_alloc_hmaclist((uint16_t)shmac->shmac_number_of_idents);
if (hmaclist == NULL) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
@@ -4589,6 +4598,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
}
SCTP_INP_RUNLOCK(inp);
}
+
}
break;
}
@@ -5272,12 +5282,14 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
return (EINVAL);
}
+
if ((paddrp->spp_flags & SPP_PMTUD_ENABLE) && (paddrp->spp_flags & SPP_PMTUD_DISABLE)) {
if (stcb)
SCTP_TCB_UNLOCK(stcb);
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
return (EINVAL);
}
+
if (stcb != NULL) {
/************************TCB SPECIFIC SET ******************/
if (net != NULL) {
@@ -5413,6 +5425,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
net->failure_threshold = paddrp->spp_pathmaxrxt;
}
}
+
if (paddrp->spp_flags & SPP_HB_ENABLE) {
if (paddrp->spp_hbinterval != 0) {
stcb->asoc.heart_beat_delay = paddrp->spp_hbinterval;
@@ -5523,6 +5536,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
if (paddrp->spp_pathmaxrxt != 0) {
inp->sctp_ep.def_net_failure = paddrp->spp_pathmaxrxt;
}
+
if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO)
inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = 0;
else if (paddrp->spp_hbinterval != 0) {
@@ -5530,6 +5544,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
paddrp->spp_hbinterval = SCTP_MAX_HB_INTERVAL;
inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(paddrp->spp_hbinterval);
}
+
if (paddrp->spp_flags & SPP_HB_ENABLE) {
if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) {
inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = 0;
@@ -6482,6 +6497,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
break;
}
}
+
if (stcb != NULL) {
if (net != NULL) {
net->port = encaps->sue_port;
@@ -6865,6 +6881,7 @@ sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
return EINVAL;
}
+
switch (addr->sa_family) {
#ifdef INET6
case AF_INET6:
@@ -6970,6 +6987,7 @@ sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
error = EALREADY;
goto out_now;
}
+
vrf_id = inp->def_vrf_id;
/* We are GOOD to go */
stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id,
@@ -6984,7 +7002,7 @@ sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
/* Set the connected flag so we can queue data */
soisconnecting(so);
}
- SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_WAIT);
+ SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
/* initialize authentication parameters for the assoc */
@@ -6996,6 +7014,7 @@ out_now:
if (create_lock_on) {
SCTP_ASOC_CREATE_UNLOCK(inp);
}
+
SCTP_INP_DECR_REF(inp);
return (error);
}
@@ -7134,6 +7153,7 @@ sctp_listen(struct socket *so, int backlog, struct thread *p)
return (EADDRINUSE);
}
}
+
if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
(inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) {
/* We are already connected AND the TCP model */
@@ -7201,7 +7221,7 @@ sctp_accept(struct socket *so, struct sockaddr **addr)
SCTP_TCB_LOCK(stcb);
SCTP_INP_RUNLOCK(inp);
store = stcb->asoc.primary_destination->ro._l_addr;
- stcb->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE;
+ SCTP_CLEAR_SUBSTATE(stcb, SCTP_STATE_IN_ACCEPT_QUEUE);
SCTP_TCB_UNLOCK(stcb);
switch (store.sa.sa_family) {
#ifdef INET
@@ -7336,6 +7356,7 @@ sctp_ingetaddr(struct socket *so, struct sockaddr **addr)
SCTP_TCB_UNLOCK(stcb);
goto notConn;
}
+
vrf_id = inp->def_vrf_id;
sctp_ifa = sctp_source_address_selection(inp,
stcb,
diff --git a/freebsd/sys/netinet/sctp_var.h b/freebsd/sys/netinet/sctp_var.h
index 84cbfc88..175888c3 100644
--- a/freebsd/sys/netinet/sctp_var.h
+++ b/freebsd/sys/netinet/sctp_var.h
@@ -341,12 +341,12 @@ int sctp_input(struct mbuf **, int *, int);
void sctp_pathmtu_adjustment(struct sctp_tcb *, uint16_t);
void sctp_drain(void);
void sctp_init(void);
-void
+void
sctp_notify(struct sctp_inpcb *, struct sctp_tcb *, struct sctp_nets *,
uint8_t, uint8_t, uint16_t, uint32_t);
int sctp_flush(struct socket *, int);
int sctp_shutdown(struct socket *);
-int
+int
sctp_bindx(struct socket *, int, struct sockaddr_storage *,
int, int, struct proc *);
diff --git a/freebsd/sys/netinet/sctputil.c b/freebsd/sys/netinet/sctputil.c
index aad1e19d..c3cb115e 100644
--- a/freebsd/sys/netinet/sctputil.c
+++ b/freebsd/sys/netinet/sctputil.c
@@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
#endif
#include <netinet/udp.h>
#include <netinet/udp_var.h>
+#include <netinet/in_kdtrace.h>
#include <sys/proc.h>
#ifdef INET6
#include <netinet/icmp6.h>
@@ -1016,7 +1017,7 @@ sctp_init_asoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
asoc = &stcb->asoc;
/* init all variables to a known value. */
- SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_INUSE);
+ SCTP_SET_STATE(stcb, SCTP_STATE_INUSE);
asoc->max_burst = inp->sctp_ep.max_burst;
asoc->fr_max_burst = inp->sctp_ep.fr_max_burst;
asoc->heart_beat_delay = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]);
@@ -1435,6 +1436,7 @@ select_a_new_ep:
atomic_add_int(&it->stcb->asoc.refcnt, -1);
iteration_count = 0;
}
+
/* run function on this one */
(*it->function_assoc) (it->inp, it->stcb, it->pointer, it->val);
@@ -1788,6 +1790,7 @@ sctp_timeout_handler(void *t)
if ((stcb == NULL) || (inp == NULL)) {
break;
}
+
if (sctp_cookie_timer(inp, stcb, net)) {
/* no need to unlock on tcb its gone */
goto out_decr;
@@ -1983,6 +1986,7 @@ out_decr:
if (inp) {
SCTP_INP_DECR_REF(inp);
}
+
out_no_decr:
SCTPDBG(SCTP_DEBUG_TIMER1, "Timer now complete (type = %d)\n", type);
CURVNET_RESTORE();
@@ -2498,9 +2502,8 @@ sctp_calculate_rto(struct sctp_tcb *stcb,
}
timevalsub(&now, old);
/* store the current RTT in us */
- net->rtt = (uint64_t)1000000 *(uint64_t)now.tv_sec +
- (uint64_t)now.tv_usec;
-
+ net->rtt = (uint64_t)1000000 * (uint64_t)now.tv_sec +
+ (uint64_t)now.tv_usec;
/* compute rtt in ms */
rtt = (int32_t)(net->rtt / 1000);
if ((asoc->cc_functions.sctp_rtt_calculated) && (rtt_from_sack == SCTP_RTT_FROM_DATA)) {
@@ -2522,6 +2525,7 @@ sctp_calculate_rto(struct sctp_tcb *stcb,
net->lan_type = SCTP_LAN_LOCAL;
}
}
+
/***************************/
/* 2. update RTTVAR & SRTT */
/***************************/
@@ -2798,7 +2802,7 @@ set_error:
((state == SCTP_COMM_LOST) || (state == SCTP_CANT_STR_ASSOC))) {
SOCK_LOCK(stcb->sctp_socket);
if (from_peer) {
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) {
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNREFUSED);
stcb->sctp_socket->so_error = ECONNREFUSED;
} else {
@@ -2806,8 +2810,8 @@ set_error:
stcb->sctp_socket->so_error = ECONNRESET;
}
} else {
- if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ETIMEDOUT);
stcb->sctp_socket->so_error = ETIMEDOUT;
} else {
@@ -2960,6 +2964,7 @@ sctp_notify_send_failed(struct sctp_tcb *stcb, uint8_t sent, uint32_t error,
/* event not enabled */
return;
}
+
if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT)) {
notifhdr_len = sizeof(struct sctp_send_failed_event);
} else {
@@ -3188,6 +3193,7 @@ sctp_notify_adaptation_layer(struct sctp_tcb *stcb)
/* event not enabled */
return;
}
+
m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_adaption_event), 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
@@ -3244,6 +3250,7 @@ sctp_notify_partial_delivery_indication(struct sctp_tcb *stcb, uint32_t error,
if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_CANT_READ) {
return;
}
+
m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_pdapi_event), 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
@@ -3352,6 +3359,7 @@ sctp_notify_shutdown_event(struct sctp_tcb *stcb)
/* event not enabled */
return;
}
+
m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_event), 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
@@ -3401,6 +3409,7 @@ sctp_notify_sender_dry_event(struct sctp_tcb *stcb,
/* event not enabled */
return;
}
+
m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_sender_dry_event), 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL) {
/* no space left */
@@ -3557,6 +3566,7 @@ sctp_notify_stream_reset(struct sctp_tcb *stcb,
/* event not enabled */
return;
}
+
m_notify = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
@@ -3691,8 +3701,8 @@ sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb,
if (stcb->sctp_socket->so_rcv.sb_state & SBS_CANTRCVMORE) {
return;
}
- if ((stcb->asoc.state & SCTP_STATE_COOKIE_WAIT) ||
- (stcb->asoc.state & SCTP_STATE_COOKIE_ECHOED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
if ((notification == SCTP_NOTIFY_INTERFACE_DOWN) ||
(notification == SCTP_NOTIFY_INTERFACE_UP) ||
(notification == SCTP_NOTIFY_INTERFACE_CONFIRMED)) {
@@ -3766,16 +3776,16 @@ sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb,
break;
}
case SCTP_NOTIFY_ASSOC_LOC_ABORTED:
- if (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) ||
- ((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
sctp_notify_assoc_change(SCTP_CANT_STR_ASSOC, stcb, error, data, 0, so_locked);
} else {
sctp_notify_assoc_change(SCTP_COMM_LOST, stcb, error, data, 0, so_locked);
}
break;
case SCTP_NOTIFY_ASSOC_REM_ABORTED:
- if (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) ||
- ((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
sctp_notify_assoc_change(SCTP_CANT_STR_ASSOC, stcb, error, data, 1, so_locked);
} else {
sctp_notify_assoc_change(SCTP_COMM_LOST, stcb, error, data, 1, so_locked);
@@ -4019,7 +4029,7 @@ sctp_abort_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
if (stcb != NULL) {
/* We have a TCB to abort, send notification too */
sctp_abort_notification(stcb, 0, 0, NULL, SCTP_SO_NOT_LOCKED);
- stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_WAS_ABORTED);
/* Ok, now lets free it */
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(inp);
@@ -4030,8 +4040,8 @@ sctp_abort_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
- if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
@@ -4130,13 +4140,13 @@ sctp_abort_an_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
}
return;
} else {
- stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_WAS_ABORTED);
}
/* notify the peer */
sctp_send_abort_tcb(stcb, op_err, so_locked);
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
- if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
/* notify the ulp */
@@ -4971,6 +4981,7 @@ sctp_find_ifa_in_ep(struct sctp_inpcb *inp, struct sockaddr *addr,
if (holds_lock == 0) {
SCTP_INP_RLOCK(inp);
}
+
LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
if (laddr->ifa == NULL)
continue;
@@ -5060,6 +5071,7 @@ sctp_find_ifa_by_addr(struct sockaddr *addr, uint32_t vrf_id, int holds_lock)
SCTP_IPI_ADDR_RUNLOCK();
return (NULL);
}
+
hash_of_addr = sctp_get_ifa_hash_val(addr);
hash_head = &vrf->vrf_addr_hash[(hash_of_addr & vrf->vrf_addr_hashmark)];
@@ -5121,9 +5133,8 @@ sctp_user_rcvd(struct sctp_tcb *stcb, uint32_t *freed_so_far, int hold_rlock,
atomic_add_int(&stcb->asoc.refcnt, 1);
- if (stcb->asoc.state & (SCTP_STATE_ABOUT_TO_BE_FREED |
- SCTP_STATE_SHUTDOWN_RECEIVED |
- SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
+ (stcb->asoc.state & (SCTP_STATE_ABOUT_TO_BE_FREED | SCTP_STATE_SHUTDOWN_RECEIVED))) {
/* Pre-check If we are freeing no update */
goto no_lock;
}
@@ -5184,6 +5195,7 @@ out:
if (so && r_unlocked && hold_rlock) {
SCTP_INP_READ_LOCK(stcb->sctp_ep);
}
+
SCTP_INP_DECR_REF(stcb->sctp_ep);
no_lock:
atomic_add_int(&stcb->asoc.refcnt, -1);
@@ -5233,6 +5245,7 @@ sctp_sorecvmsg(struct socket *so,
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
return (EINVAL);
}
+
if (msg_flags) {
in_flags = *msg_flags;
if (in_flags & MSG_PEEK)
@@ -5276,6 +5289,8 @@ sctp_sorecvmsg(struct socket *so,
sctp_misc_ints(SCTP_SORECV_ENTERPL,
rwnd_req, block_allowed, so->so_rcv.sb_cc, (uint32_t)uio->uio_resid);
}
+
+
error = sblock(&so->so_rcv, (block_allowed ? SBL_WAIT : 0));
if (error) {
goto release_unlocked;
@@ -5385,6 +5400,7 @@ restart_nosblocks:
hold_rlock = 0;
goto restart;
}
+
if ((control->length == 0) &&
(control->do_not_ref_stcb)) {
/*
@@ -5568,6 +5584,7 @@ found_one:
control->do_not_ref_stcb == 0) {
stcb->asoc.strmin[control->sinfo_stream].delivery_started = 1;
}
+
/* First lets get off the sinfo and sockaddr info */
if ((sinfo != NULL) && (filling_sinfo != 0)) {
sinfo->sinfo_stream = control->sinfo_stream;
@@ -5729,6 +5746,7 @@ get_more_data:
if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
goto release;
}
+
if ((control->do_not_ref_stcb == 0) && stcb &&
stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
no_rcv_needed = 1;
@@ -5941,6 +5959,7 @@ wait_some_more:
if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
goto release;
}
+
if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)
goto release;
@@ -6069,6 +6088,7 @@ release:
SOCKBUF_UNLOCK(&so->so_rcv);
hold_sblock = 0;
}
+
sbunlock(&so->so_rcv);
sockbuf_lock = 0;
@@ -6106,6 +6126,7 @@ out:
if (sockbuf_lock) {
sbunlock(&so->so_rcv);
}
+
if (freecnt_applied) {
/*
* The lock on the socket buffer protects us so the free
@@ -6703,6 +6724,7 @@ sctp_local_addr_count(struct sctp_tcb *stcb)
SCTP_IPI_ADDR_RUNLOCK();
return (0);
}
+
if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
/*
* bound all case: go through all ifns on the vrf
@@ -7362,3 +7384,49 @@ sctp_hc_get_mtu(union sctp_sockstore *addr, uint16_t fibnum)
}
return ((uint32_t)tcp_hc_getmtu(&inc));
}
+
+void
+sctp_set_state(struct sctp_tcb *stcb, int new_state)
+{
+#if defined(KDTRACE_HOOKS)
+ int old_state = stcb->asoc.state;
+#endif
+
+ KASSERT((new_state & ~SCTP_STATE_MASK) == 0,
+ ("sctp_set_state: Can't set substate (new_state = %x)",
+ new_state));
+ stcb->asoc.state = (stcb->asoc.state & ~SCTP_STATE_MASK) | new_state;
+ if ((new_state == SCTP_STATE_SHUTDOWN_RECEIVED) ||
+ (new_state == SCTP_STATE_SHUTDOWN_SENT) ||
+ (new_state == SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ SCTP_CLEAR_SUBSTATE(stcb, SCTP_STATE_SHUTDOWN_PENDING);
+ }
+#if defined(KDTRACE_HOOKS)
+ if (((old_state & SCTP_STATE_MASK) != new_state) &&
+ !(((old_state & SCTP_STATE_MASK) == SCTP_STATE_EMPTY) &&
+ (new_state == SCTP_STATE_INUSE))) {
+ SCTP_PROBE6(state__change, NULL, stcb, NULL, stcb, NULL, old_state);
+ }
+#endif
+}
+
+void
+sctp_add_substate(struct sctp_tcb *stcb, int substate)
+{
+#if defined(KDTRACE_HOOKS)
+ int old_state = stcb->asoc.state;
+#endif
+
+ KASSERT((substate & SCTP_STATE_MASK) == 0,
+ ("sctp_add_substate: Can't set state (substate = %x)",
+ substate));
+ stcb->asoc.state |= substate;
+#if defined(KDTRACE_HOOKS)
+ if (((substate & SCTP_STATE_ABOUT_TO_BE_FREED) &&
+ ((old_state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0)) ||
+ ((substate & SCTP_STATE_SHUTDOWN_PENDING) &&
+ ((old_state & SCTP_STATE_SHUTDOWN_PENDING) == 0))) {
+ SCTP_PROBE6(state__change, NULL, stcb, NULL, stcb, NULL, old_state);
+ }
+#endif
+}
diff --git a/freebsd/sys/netinet/sctputil.h b/freebsd/sys/netinet/sctputil.h
index 61d34591..c12fb210 100644
--- a/freebsd/sys/netinet/sctputil.h
+++ b/freebsd/sys/netinet/sctputil.h
@@ -72,11 +72,9 @@ int32_t
uint32_t
sctp_get_ifa_hash_val(struct sockaddr *addr);
-struct sctp_ifa *
- sctp_find_ifa_in_ep(struct sctp_inpcb *inp, struct sockaddr *addr, int hold_lock);
+struct sctp_ifa *sctp_find_ifa_in_ep(struct sctp_inpcb *inp, struct sockaddr *addr, int hold_lock);
-struct sctp_ifa *
- sctp_find_ifa_by_addr(struct sockaddr *addr, uint32_t vrf_id, int holds_lock);
+struct sctp_ifa *sctp_find_ifa_by_addr(struct sockaddr *addr, uint32_t vrf_id, int holds_lock);
uint32_t sctp_select_initial_TSN(struct sctp_pcb *);
@@ -147,13 +145,11 @@ struct sctp_paramhdr *
sctp_get_next_param(struct mbuf *, int,
struct sctp_paramhdr *, int);
-struct mbuf *
- sctp_add_pad_tombuf(struct mbuf *, int);
+struct mbuf *sctp_add_pad_tombuf(struct mbuf *, int);
-struct mbuf *
- sctp_pad_lastmbuf(struct mbuf *, int, struct mbuf *);
+struct mbuf *sctp_pad_lastmbuf(struct mbuf *, int, struct mbuf *);
-void
+void
sctp_ulp_notify(uint32_t, struct sctp_tcb *, uint32_t, void *, int
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
SCTP_UNUSED
@@ -168,7 +164,7 @@ sctp_pull_off_control_to_new_inp(struct sctp_inpcb *old_inp,
void sctp_stop_timers_for_shutdown(struct sctp_tcb *);
-void
+void
sctp_report_all_outbound(struct sctp_tcb *, uint16_t, int, int
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
SCTP_UNUSED
@@ -177,7 +173,7 @@ sctp_report_all_outbound(struct sctp_tcb *, uint16_t, int, int
int sctp_expand_mapping_array(struct sctp_association *, uint32_t);
-void
+void
sctp_abort_notification(struct sctp_tcb *, uint8_t, uint16_t,
struct sctp_abort_chunk *, int
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
@@ -203,7 +199,7 @@ sctp_abort_an_association(struct sctp_inpcb *, struct sctp_tcb *,
#endif
);
-void
+void
sctp_handle_ootb(struct mbuf *, int, int,
struct sockaddr *, struct sockaddr *,
struct sctphdr *, struct sctp_inpcb *,
@@ -211,7 +207,7 @@ sctp_handle_ootb(struct mbuf *, int, int,
uint8_t, uint32_t, uint16_t,
uint32_t, uint16_t);
-int
+int
sctp_connectx_helper_add(struct sctp_tcb *stcb, struct sockaddr *addr,
int totaddr, int *error);
@@ -224,8 +220,7 @@ int sctp_is_there_an_abort_here(struct mbuf *, int, uint32_t *);
#ifdef INET6
uint32_t sctp_is_same_scope(struct sockaddr_in6 *, struct sockaddr_in6 *);
-struct sockaddr_in6 *
- sctp_recover_scope(struct sockaddr_in6 *, struct sockaddr_in6 *);
+struct sockaddr_in6 *sctp_recover_scope(struct sockaddr_in6 *, struct sockaddr_in6 *);
#define sctp_recover_scope_mac(addr, store) do { \
if ((addr->sin6_family == AF_INET6) && \
@@ -258,11 +253,11 @@ sctp_release_pr_sctp_chunk(struct sctp_tcb *, struct sctp_tmit_chunk *,
struct mbuf *sctp_generate_cause(uint16_t, char *);
struct mbuf *sctp_generate_no_user_data_cause(uint32_t);
-void
+void
sctp_bindx_add_address(struct socket *so, struct sctp_inpcb *inp,
struct sockaddr *sa, sctp_assoc_t assoc_id,
uint32_t vrf_id, int *error, void *p);
-void
+void
sctp_bindx_delete_address(struct sctp_inpcb *inp,
struct sockaddr *sa, sctp_assoc_t assoc_id,
uint32_t vrf_id, int *error);
@@ -393,5 +388,7 @@ void sctp_audit_log(uint8_t, uint8_t);
uint32_t sctp_min_mtu(uint32_t, uint32_t, uint32_t);
void sctp_hc_set_mtu(union sctp_sockstore *, uint16_t, uint32_t);
uint32_t sctp_hc_get_mtu(union sctp_sockstore *, uint16_t);
+void sctp_set_state(struct sctp_tcb *, int);
+void sctp_add_substate(struct sctp_tcb *, int);
#endif /* _KERNEL */
#endif
diff --git a/freebsd/sys/netinet/tcp_hostcache.c b/freebsd/sys/netinet/tcp_hostcache.c
index d1de3f33..f2e3d875 100644
--- a/freebsd/sys/netinet/tcp_hostcache.c
+++ b/freebsd/sys/netinet/tcp_hostcache.c
@@ -114,10 +114,10 @@ __FBSDID("$FreeBSD$");
#define TCP_HOSTCACHE_EXPIRE 60*60 /* one hour */
#define TCP_HOSTCACHE_PRUNE 5*60 /* every 5 minutes */
-static VNET_DEFINE(struct tcp_hostcache, tcp_hostcache);
+VNET_DEFINE_STATIC(struct tcp_hostcache, tcp_hostcache);
#define V_tcp_hostcache VNET(tcp_hostcache)
-static VNET_DEFINE(struct callout, tcp_hc_callout);
+VNET_DEFINE_STATIC(struct callout, tcp_hc_callout);
#define V_tcp_hc_callout VNET(tcp_hc_callout)
static struct hc_metrics *tcp_hc_lookup(struct in_conninfo *);
diff --git a/freebsd/sys/netinet/tcp_hpts.h b/freebsd/sys/netinet/tcp_hpts.h
index c52a1d78..04c86769 100644
--- a/freebsd/sys/netinet/tcp_hpts.h
+++ b/freebsd/sys/netinet/tcp_hpts.h
@@ -238,10 +238,10 @@ int
#define tcp_queue_to_input_locked(a, b) __tcp_queue_to_input_locked(a, b, __LINE__);
void
tcp_queue_pkt_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
- int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, uint8_t ti_locked);
+ int32_t tlen, int32_t drop_hdrlen, uint8_t iptos);
int
__tcp_queue_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
- int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, uint8_t ti_locked, int32_t line);
+ int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, int32_t line);
#define tcp_queue_to_input(a, b, c, d, e, f, g) __tcp_queue_to_input(a, b, c, d, e, f, g, __LINE__)
uint16_t tcp_hpts_delayedby(struct inpcb *inp);
diff --git a/freebsd/sys/netinet/tcp_input.c b/freebsd/sys/netinet/tcp_input.c
index 20bea2de..2c6c3048 100644
--- a/freebsd/sys/netinet/tcp_input.c
+++ b/freebsd/sys/netinet/tcp_input.c
@@ -585,6 +585,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
int rstreason = 0; /* For badport_bandlim accounting purposes */
uint8_t iptos;
struct m_tag *fwd_tag = NULL;
+ struct epoch_tracker et;
#ifdef INET6
struct ip6_hdr *ip6 = NULL;
int isipv6;
@@ -775,7 +776,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
* connection in TIMEWAIT and SYNs not targeting a listening socket.
*/
if ((thflags & (TH_FIN | TH_RST)) != 0) {
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
ti_locked = TI_RLOCKED;
} else
ti_locked = TI_UNLOCKED;
@@ -962,25 +963,10 @@ findpcb:
*
* XXXRW: It may be time to rethink timewait locking.
*/
-relocked:
if (inp->inp_flags & INP_TIMEWAIT) {
if (ti_locked == TI_UNLOCKED) {
- if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
- in_pcbref(inp);
- INP_WUNLOCK(inp);
- INP_INFO_RLOCK(&V_tcbinfo);
- ti_locked = TI_RLOCKED;
- INP_WLOCK(inp);
- if (in_pcbrele_wlocked(inp)) {
- inp = NULL;
- goto findpcb;
- } else if (inp->inp_flags & INP_DROPPED) {
- INP_WUNLOCK(inp);
- inp = NULL;
- goto findpcb;
- }
- } else
- ti_locked = TI_RLOCKED;
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ ti_locked = TI_RLOCKED;
}
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
@@ -991,7 +977,7 @@ relocked:
*/
if (tcp_twcheck(inp, &to, th, m, tlen))
goto findpcb;
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (IPPROTO_DONE);
}
/*
@@ -1028,23 +1014,8 @@ relocked:
(tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) &&
!IS_FASTOPEN(tp->t_flags)))) {
if (ti_locked == TI_UNLOCKED) {
- if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
- in_pcbref(inp);
- INP_WUNLOCK(inp);
- INP_INFO_RLOCK(&V_tcbinfo);
- ti_locked = TI_RLOCKED;
- INP_WLOCK(inp);
- if (in_pcbrele_wlocked(inp)) {
- inp = NULL;
- goto findpcb;
- } else if (inp->inp_flags & INP_DROPPED) {
- INP_WUNLOCK(inp);
- inp = NULL;
- goto findpcb;
- }
- goto relocked;
- } else
- ti_locked = TI_RLOCKED;
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ ti_locked = TI_RLOCKED;
}
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
}
@@ -1082,6 +1053,8 @@ relocked:
#ifdef INET6
if (isipv6) {
inc.inc_flags |= INC_ISIPV6;
+ if (inp->inp_inc.inc_flags & INC_IPV6MINMTU)
+ inc.inc_flags |= INC_IPV6MINMTU;
inc.inc6_faddr = ip6->ip6_src;
inc.inc6_laddr = ip6->ip6_dst;
} else
@@ -1176,9 +1149,11 @@ tfo_socket_result:
* contains. tcp_do_segment() consumes
* the mbuf chain and unlocks the inpcb.
*/
+ TCP_PROBE5(receive, NULL, tp, m, tp, th);
tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen,
- iptos, ti_locked);
- INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ iptos);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (IPPROTO_DONE);
}
/*
@@ -1382,7 +1357,7 @@ tfo_socket_result:
* Only the listen socket is unlocked by syncache_add().
*/
if (ti_locked == TI_RLOCKED) {
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
ti_locked = TI_UNLOCKED;
}
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
@@ -1416,15 +1391,16 @@ tfo_socket_result:
* state. tcp_do_segment() always consumes the mbuf chain, unlocks
* the inpcb, and unlocks pcbinfo.
*/
- tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked);
- INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (IPPROTO_DONE);
dropwithreset:
TCP_PROBE5(receive, NULL, tp, m, tp, th);
if (ti_locked == TI_RLOCKED) {
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
ti_locked = TI_UNLOCKED;
}
#ifdef INVARIANTS
@@ -1448,7 +1424,7 @@ dropunlock:
TCP_PROBE5(receive, NULL, tp, m, tp, th);
if (ti_locked == TI_RLOCKED) {
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
ti_locked = TI_UNLOCKED;
}
#ifdef INVARIANTS
@@ -1535,8 +1511,7 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so,
void
tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
- struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
- int ti_locked)
+ struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos)
{
int thflags, acked, ourfinisacked, needoutput = 0, sack_changed;
int rstreason, todrop, win;
@@ -1562,7 +1537,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tp->sackhint.last_sack_ack = 0;
sack_changed = 0;
nsegs = max(1, m->m_pkthdr.lro_nsegs);
-
/*
* If this is either a state-changing packet or current state isn't
* established, we require a write lock on tcbinfo. Otherwise, we
@@ -1571,19 +1545,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/
if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
tp->t_state != TCPS_ESTABLISHED) {
- KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for "
- "SYN/FIN/RST/!EST", __func__, ti_locked));
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- } else {
-#ifdef INVARIANTS
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- else {
- KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST "
- "ti_locked: %d", __func__, ti_locked));
- INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
- }
-#endif
}
INP_WLOCK_ASSERT(tp->t_inpcb);
KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
@@ -1717,10 +1679,19 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
(to.to_flags & TOF_SACKPERM) == 0)
tp->t_flags &= ~TF_SACK_PERMIT;
if (IS_FASTOPEN(tp->t_flags)) {
- if (to.to_flags & TOF_FASTOPEN)
- tcp_fastopen_update_cache(tp, to.to_mss,
+ if (to.to_flags & TOF_FASTOPEN) {
+ uint16_t mss;
+
+ if (to.to_flags & TOF_MSS)
+ mss = to.to_mss;
+ else
+ if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
+ mss = TCP6_MSS;
+ else
+ mss = TCP_MSS;
+ tcp_fastopen_update_cache(tp, mss,
to.to_tfo_len, to.to_tfo_cookie);
- else
+ } else
tcp_fastopen_disable_path(tp);
}
}
@@ -1767,7 +1738,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tp->snd_nxt == tp->snd_max &&
tiwin && tiwin == tp->snd_wnd &&
((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
- LIST_EMPTY(&tp->t_segq) &&
+ SEGQ_EMPTY(tp) &&
((to.to_flags & TOF_TS) == 0 ||
TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) {
@@ -1792,10 +1763,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
/*
* This is a pure ack for outstanding data.
*/
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- ti_locked = TI_UNLOCKED;
-
TCPSTAT_INC(tcps_predack);
/*
@@ -1899,10 +1866,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* nothing on the reassembly queue and we have enough
* buffer space to take it.
*/
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- ti_locked = TI_UNLOCKED;
-
/* Clean receiver SACK report if present */
if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks)
tcp_clean_sackreport(tp);
@@ -2104,8 +2067,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tcp_state_change(tp, TCPS_SYN_RECEIVED);
}
- KASSERT(ti_locked == TI_RLOCKED, ("%s: trimthenstep6: "
- "ti_locked %d", __func__, ti_locked));
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb);
@@ -2180,9 +2141,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
(tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- KASSERT(ti_locked == TI_RLOCKED,
- ("%s: TH_RST ti_locked %d, th %p tp %p",
- __func__, ti_locked, th, tp));
KASSERT(tp->t_state != TCPS_SYN_SENT,
("%s: TH_RST for TCPS_SYN_SENT th %p tp %p",
__func__, th, tp));
@@ -2225,8 +2183,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/
if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT &&
tp->t_state != TCPS_SYN_RECEIVED) {
- KASSERT(ti_locked == TI_RLOCKED,
- ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked));
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
TCPSTAT_INC(tcps_badsyn);
@@ -2340,8 +2296,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/
if ((so->so_state & SS_NOFDREF) &&
tp->t_state > TCPS_CLOSE_WAIT && tlen) {
- KASSERT(ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && "
- "CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked));
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
@@ -2457,6 +2411,16 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* SYN-RECEIVED* -> FIN-WAIT-1
*/
tp->t_starttime = ticks;
+ if (IS_FASTOPEN(tp->t_flags) && tp->t_tfo_pending) {
+ tcp_fastopen_decrement_counter(tp->t_tfo_pending);
+ tp->t_tfo_pending = NULL;
+
+ /*
+ * Account for the ACK of our SYN prior to
+ * regular ACK processing below.
+ */
+ tp->snd_una++;
+ }
if (tp->t_flags & TF_NEEDFIN) {
tcp_state_change(tp, TCPS_FIN_WAIT_1);
tp->t_flags &= ~TF_NEEDFIN;
@@ -2464,16 +2428,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tcp_state_change(tp, TCPS_ESTABLISHED);
TCP_PROBE5(accept__established, NULL, tp,
m, tp, th);
- if (IS_FASTOPEN(tp->t_flags) && tp->t_tfo_pending) {
- tcp_fastopen_decrement_counter(tp->t_tfo_pending);
- tp->t_tfo_pending = NULL;
-
- /*
- * Account for the ACK of our SYN prior to
- * regular ACK processing below.
- */
- tp->snd_una++;
- }
/*
* TFO connections call cc_conn_init() during SYN
* processing. Calling it again here for such
@@ -2490,7 +2444,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* later; if not, do so now to pass queued data to user.
*/
if (tlen == 0 && (thflags & TH_FIN) == 0)
- (void) tcp_reass(tp, (struct tcphdr *)0, 0,
+ (void) tcp_reass(tp, (struct tcphdr *)0, NULL, 0,
(struct mbuf *)0);
tp->snd_wl1 = th->th_seq - 1;
/* FALLTHROUGH */
@@ -2931,7 +2885,6 @@ process_ACK:
if (ourfinisacked) {
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tcp_twstart(tp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
m_freem(m);
return;
}
@@ -3068,7 +3021,7 @@ dodata: /* XXX */
* fast retransmit can work).
*/
if (th->th_seq == tp->rcv_nxt &&
- LIST_EMPTY(&tp->t_segq) &&
+ SEGQ_EMPTY(tp) &&
(TCPS_HAVEESTABLISHED(tp->t_state) ||
tfo_syn)) {
if (DELAY_ACK(tp, tlen) || tfo_syn)
@@ -3093,7 +3046,7 @@ dodata: /* XXX */
* m_adj() doesn't actually frees any mbufs
* when trimming from the head.
*/
- thflags = tcp_reass(tp, th, &tlen, m);
+ thflags = tcp_reass(tp, th, &save_start, &tlen, m);
tp->t_flags |= TF_ACKNOW;
}
if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT))
@@ -3163,19 +3116,11 @@ dodata: /* XXX */
*/
case TCPS_FIN_WAIT_2:
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- KASSERT(ti_locked == TI_RLOCKED, ("%s: dodata "
- "TCP_FIN_WAIT_2 ti_locked: %d", __func__,
- ti_locked));
tcp_twstart(tp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
return;
}
}
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- ti_locked = TI_UNLOCKED;
-
#ifdef TCPDEBUG
if (so->so_options & SO_DEBUG)
tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen,
@@ -3190,9 +3135,6 @@ dodata: /* XXX */
(void) tp->t_fb->tfb_tcp_output(tp);
check_delack:
- KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
- __func__, ti_locked));
- INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb);
if (tp->t_flags & TF_DELACK) {
@@ -3230,10 +3172,6 @@ dropafterack:
&tcp_savetcp, 0);
#endif
TCP_PROBE3(debug__input, tp, th, m);
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- ti_locked = TI_UNLOCKED;
-
tp->t_flags |= TF_ACKNOW;
(void) tp->t_fb->tfb_tcp_output(tp);
INP_WUNLOCK(tp->t_inpcb);
@@ -3241,10 +3179,6 @@ dropafterack:
return;
dropwithreset:
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- ti_locked = TI_UNLOCKED;
-
if (tp != NULL) {
tcp_dropwithreset(m, th, tp, tlen, rstreason);
INP_WUNLOCK(tp->t_inpcb);
@@ -3253,15 +3187,6 @@ dropwithreset:
return;
drop:
- if (ti_locked == TI_RLOCKED) {
- INP_INFO_RUNLOCK(&V_tcbinfo);
- ti_locked = TI_UNLOCKED;
- }
-#ifdef INVARIANTS
- else
- INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
-#endif
-
/*
* Drop space held by incoming segment and return.
*/
diff --git a/freebsd/sys/netinet/tcp_log_buf.h b/freebsd/sys/netinet/tcp_log_buf.h
index 58713fe5..e569395a 100644
--- a/freebsd/sys/netinet/tcp_log_buf.h
+++ b/freebsd/sys/netinet/tcp_log_buf.h
@@ -94,7 +94,7 @@ struct tcp_log_bbr {
uint16_t flex7;
uint8_t bbr_state;
uint8_t bbr_substate;
- uint8_t inpacer;
+ uint8_t inhpts;
uint8_t ininput;
uint8_t use_lt_bw;
uint8_t flex8;
@@ -217,7 +217,9 @@ enum tcp_log_events {
BBR_LOG_REDUCE, /* old bbr log reduce for 4.1 and earlier 46*/
TCP_LOG_RTT, /* A rtt (in useconds) is being sampled and applied to the srtt algo 47 */
BBR_LOG_SETTINGS_CHG, /* Settings changed for loss response 48 */
- TCP_LOG_END /* End (keep at end) 49 */
+ BBR_LOG_SRTT_GAIN_EVENT, /* SRTT gaining 49 */
+ TCP_LOG_REASS, /* Reassembly buffer logging 50 */
+ TCP_LOG_END /* End (keep at end) 51 */
};
enum tcp_log_states {
diff --git a/freebsd/sys/netinet/tcp_output.c b/freebsd/sys/netinet/tcp_output.c
index bdbfe984..8f83440d 100644
--- a/freebsd/sys/netinet/tcp_output.c
+++ b/freebsd/sys/netinet/tcp_output.c
@@ -145,18 +145,13 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto_lowat, CTLFLAG_VNET | CTLFLAG_R
tcp_timer_active((tp), TT_PERSIST), \
("neither rexmt nor persist timer is set"))
-#ifdef TCP_HHOOK
-static void inline hhook_run_tcp_est_out(struct tcpcb *tp,
- struct tcphdr *th, struct tcpopt *to,
- uint32_t len, int tso);
-#endif
static void inline cc_after_idle(struct tcpcb *tp);
#ifdef TCP_HHOOK
/*
* Wrapper for the TCP established output helper hook.
*/
-static void inline
+void
hhook_run_tcp_est_out(struct tcpcb *tp, struct tcphdr *th,
struct tcpopt *to, uint32_t len, int tso)
{
@@ -197,6 +192,8 @@ tcp_output(struct tcpcb *tp)
int32_t len;
uint32_t recwin, sendwin;
int off, flags, error = 0; /* Keep compiler happy */
+ u_int if_hw_tsomaxsegcount = 0;
+ u_int if_hw_tsomaxsegsize;
struct mbuf *m;
struct ip *ip = NULL;
#ifdef TCPDEBUG
@@ -233,13 +230,15 @@ tcp_output(struct tcpcb *tp)
#endif
/*
- * For TFO connections in SYN_RECEIVED, only allow the initial
- * SYN|ACK and those sent by the retransmit timer.
+ * For TFO connections in SYN_SENT or SYN_RECEIVED,
+ * only allow the initial SYN or SYN|ACK and those sent
+ * by the retransmit timer.
*/
if (IS_FASTOPEN(tp->t_flags) &&
- (tp->t_state == TCPS_SYN_RECEIVED) &&
- SEQ_GT(tp->snd_max, tp->snd_una) && /* initial SYN|ACK sent */
- (tp->snd_nxt != tp->snd_una)) /* not a retransmit */
+ ((tp->t_state == TCPS_SYN_SENT) ||
+ (tp->t_state == TCPS_SYN_RECEIVED)) &&
+ SEQ_GT(tp->snd_max, tp->snd_una) && /* initial SYN or SYN|ACK sent */
+ (tp->snd_nxt != tp->snd_una)) /* not a retransmit */
return (0);
/*
@@ -867,9 +866,6 @@ send:
if (tso) {
u_int if_hw_tsomax;
- u_int if_hw_tsomaxsegcount;
- u_int if_hw_tsomaxsegsize;
- struct mbuf *mb;
u_int moff;
int max_len;
@@ -901,65 +897,6 @@ send:
len = max_len;
}
}
-
- /*
- * Check if we should limit by maximum segment
- * size and count:
- */
- if (if_hw_tsomaxsegcount != 0 &&
- if_hw_tsomaxsegsize != 0) {
- /*
- * Subtract one segment for the LINK
- * and TCP/IP headers mbuf that will
- * be prepended to this mbuf chain
- * after the code in this section
- * limits the number of mbufs in the
- * chain to if_hw_tsomaxsegcount.
- */
- if_hw_tsomaxsegcount -= 1;
- max_len = 0;
- mb = sbsndmbuf(&so->so_snd, off, &moff);
-
- while (mb != NULL && max_len < len) {
- u_int mlen;
- u_int frags;
-
- /*
- * Get length of mbuf fragment
- * and how many hardware frags,
- * rounded up, it would use:
- */
- mlen = (mb->m_len - moff);
- frags = howmany(mlen,
- if_hw_tsomaxsegsize);
-
- /* Handle special case: Zero Length Mbuf */
- if (frags == 0)
- frags = 1;
-
- /*
- * Check if the fragment limit
- * will be reached or exceeded:
- */
- if (frags >= if_hw_tsomaxsegcount) {
- max_len += min(mlen,
- if_hw_tsomaxsegcount *
- if_hw_tsomaxsegsize);
- break;
- }
- max_len += mlen;
- if_hw_tsomaxsegcount -= frags;
- moff = 0;
- mb = mb->m_next;
- }
- if (max_len <= 0) {
- len = 0;
- } else if (len > max_len) {
- sendalot = 1;
- len = max_len;
- }
- }
-
/*
* Prevent the last segment from being
* fractional unless the send sockbuf can be
@@ -994,7 +931,6 @@ send:
*/
if (tp->t_flags & TF_NEEDFIN)
sendalot = 1;
-
} else {
len = tp->t_maxseg - optlen - ipoptlen;
sendalot = 1;
@@ -1029,6 +965,7 @@ send:
*/
if (len) {
struct mbuf *mb;
+ struct sockbuf *msb;
u_int moff;
if ((tp->t_flags & TF_FORCEDATA) && len == 1)
@@ -1062,14 +999,30 @@ send:
* Start the m_copy functions from the closest mbuf
* to the offset in the socket buffer chain.
*/
- mb = sbsndptr(&so->so_snd, off, len, &moff);
-
+ mb = sbsndptr_noadv(&so->so_snd, off, &moff);
if (len <= MHLEN - hdrlen - max_linkhdr) {
m_copydata(mb, moff, len,
mtod(m, caddr_t) + hdrlen);
+ if (SEQ_LT(tp->snd_nxt, tp->snd_max))
+ sbsndptr_adv(&so->so_snd, mb, len);
m->m_len += len;
} else {
- m->m_next = m_copym(mb, moff, len, M_NOWAIT);
+ if (SEQ_LT(tp->snd_nxt, tp->snd_max))
+ msb = NULL;
+ else
+ msb = &so->so_snd;
+ m->m_next = tcp_m_copym(mb, moff,
+ &len, if_hw_tsomaxsegcount,
+ if_hw_tsomaxsegsize, msb);
+ if (len <= (tp->t_maxseg - optlen)) {
+ /*
+ * Must have ran out of mbufs for the copy
+ * shorten it to no longer need tso. Lets
+ * not put on sendalot since we are low on
+ * mbufs.
+ */
+ tso = 0;
+ }
if (m->m_next == NULL) {
SOCKBUF_UNLOCK(&so->so_snd);
(void) m_free(m);
@@ -1853,6 +1806,144 @@ tcp_addoptions(struct tcpopt *to, u_char *optp)
return (optlen);
}
+/*
+ * This is a copy of m_copym(), taking the TSO segment size/limit
+ * constraints into account, and advancing the sndptr as it goes.
+ */
+struct mbuf *
+tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
+ int32_t seglimit, int32_t segsize, struct sockbuf *sb)
+{
+ struct mbuf *n, **np;
+ struct mbuf *top;
+ int32_t off = off0;
+ int32_t len = *plen;
+ int32_t fragsize;
+ int32_t len_cp = 0;
+ int32_t *pkthdrlen;
+ uint32_t mlen, frags;
+ bool copyhdr;
+
+
+ KASSERT(off >= 0, ("tcp_m_copym, negative off %d", off));
+ KASSERT(len >= 0, ("tcp_m_copym, negative len %d", len));
+ if (off == 0 && m->m_flags & M_PKTHDR)
+ copyhdr = true;
+ else
+ copyhdr = false;
+ while (off > 0) {
+ KASSERT(m != NULL, ("tcp_m_copym, offset > size of mbuf chain"));
+ if (off < m->m_len)
+ break;
+ off -= m->m_len;
+ if ((sb) && (m == sb->sb_sndptr)) {
+ sb->sb_sndptroff += m->m_len;
+ sb->sb_sndptr = m->m_next;
+ }
+ m = m->m_next;
+ }
+ np = &top;
+ top = NULL;
+ pkthdrlen = NULL;
+ while (len > 0) {
+ if (m == NULL) {
+ KASSERT(len == M_COPYALL,
+ ("tcp_m_copym, length > size of mbuf chain"));
+ *plen = len_cp;
+ if (pkthdrlen != NULL)
+ *pkthdrlen = len_cp;
+ break;
+ }
+ mlen = min(len, m->m_len - off);
+ if (seglimit) {
+ /*
+ * For M_NOMAP mbufs, add 3 segments
+ * + 1 in case we are crossing page boundaries
+ * + 2 in case the TLS hdr/trailer are used
+ * It is cheaper to just add the segments
+ * than it is to take the cache miss to look
+ * at the mbuf ext_pgs state in detail.
+ */
+ if (m->m_flags & M_NOMAP) {
+ fragsize = min(segsize, PAGE_SIZE);
+ frags = 3;
+ } else {
+ fragsize = segsize;
+ frags = 0;
+ }
+
+ /* Break if we really can't fit anymore. */
+ if ((frags + 1) >= seglimit) {
+ *plen = len_cp;
+ if (pkthdrlen != NULL)
+ *pkthdrlen = len_cp;
+ break;
+ }
+
+ /*
+ * Reduce size if you can't copy the whole
+ * mbuf. If we can't copy the whole mbuf, also
+ * adjust len so the loop will end after this
+ * mbuf.
+ */
+ if ((frags + howmany(mlen, fragsize)) >= seglimit) {
+ mlen = (seglimit - frags - 1) * fragsize;
+ len = mlen;
+ *plen = len_cp + len;
+ if (pkthdrlen != NULL)
+ *pkthdrlen = *plen;
+ }
+ frags += howmany(mlen, fragsize);
+ if (frags == 0)
+ frags++;
+ seglimit -= frags;
+ KASSERT(seglimit > 0,
+ ("%s: seglimit went too low", __func__));
+ }
+ if (copyhdr)
+ n = m_gethdr(M_NOWAIT, m->m_type);
+ else
+ n = m_get(M_NOWAIT, m->m_type);
+ *np = n;
+ if (n == NULL)
+ goto nospace;
+ if (copyhdr) {
+ if (!m_dup_pkthdr(n, m, M_NOWAIT))
+ goto nospace;
+ if (len == M_COPYALL)
+ n->m_pkthdr.len -= off0;
+ else
+ n->m_pkthdr.len = len;
+ pkthdrlen = &n->m_pkthdr.len;
+ copyhdr = false;
+ }
+ n->m_len = mlen;
+ len_cp += n->m_len;
+ if (m->m_flags & M_EXT) {
+ n->m_data = m->m_data + off;
+ mb_dupcl(n, m);
+ } else
+ bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
+ (u_int)n->m_len);
+
+ if (sb && (sb->sb_sndptr == m) &&
+ ((n->m_len + off) >= m->m_len) && m->m_next) {
+ sb->sb_sndptroff += m->m_len;
+ sb->sb_sndptr = m->m_next;
+ }
+ off = 0;
+ if (len != M_COPYALL) {
+ len -= n->m_len;
+ }
+ m = m->m_next;
+ np = &n->m_next;
+ }
+ return (top);
+nospace:
+ m_freem(top);
+ return (NULL);
+}
+
void
tcp_sndbuf_autoscale(struct tcpcb *tp, struct socket *so, uint32_t sendwin)
{
diff --git a/freebsd/sys/netinet/tcp_reass.c b/freebsd/sys/netinet/tcp_reass.c
index dbb61299..4776a808 100644
--- a/freebsd/sys/netinet/tcp_reass.c
+++ b/freebsd/sys/netinet/tcp_reass.c
@@ -74,15 +74,37 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
+#include <netinet/tcp_log_buf.h>
+#include <netinet/tcp_hpts.h>
#include <netinet6/tcp6_var.h>
#include <netinet/tcpip.h>
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif /* TCPDEBUG */
+#define TCP_R_LOG_ADD 1
+#define TCP_R_LOG_LIMIT_REACHED 2
+#define TCP_R_LOG_APPEND 3
+#define TCP_R_LOG_PREPEND 4
+#define TCP_R_LOG_REPLACE 5
+#define TCP_R_LOG_MERGE_INTO 6
+#define TCP_R_LOG_NEW_ENTRY 7
+#define TCP_R_LOG_READ 8
+#define TCP_R_LOG_ZERO 9
+#define TCP_R_LOG_DUMP 10
+#define TCP_R_LOG_TRIM 11
+
+/* For debugging we want counters and BB logging */
+/* #define TCP_REASS_COUNTERS 1 */
+/* #define TCP_REASS_LOGGING 1 */
+
static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
"TCP Segment Reassembly Queue");
+static SYSCTL_NODE(_net_inet_tcp_reass, OID_AUTO, stats, CTLFLAG_RW, 0,
+ "TCP Segment Reassembly stats");
+
+
static int tcp_reass_maxseg = 0;
SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
&tcp_reass_maxseg, 0,
@@ -93,6 +115,77 @@ SYSCTL_UMA_CUR(_net_inet_tcp_reass, OID_AUTO, cursegments, 0,
&tcp_reass_zone,
"Global number of TCP Segments currently in Reassembly Queue");
+static u_int tcp_reass_maxqueuelen = 100;
+SYSCTL_UINT(_net_inet_tcp_reass, OID_AUTO, maxqueuelen, CTLFLAG_RWTUN,
+ &tcp_reass_maxqueuelen, 0,
+ "Maximum number of TCP Segments per Reassembly Queue");
+
+static int tcp_new_limits = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, new_limit, CTLFLAG_RWTUN,
+ &tcp_new_limits, 0,
+ "Do we use the new limit method we are discussing?");
+
+static u_int tcp_reass_queue_guard = 16;
+SYSCTL_UINT(_net_inet_tcp_reass, OID_AUTO, queueguard, CTLFLAG_RWTUN,
+ &tcp_reass_queue_guard, 16,
+ "Number of TCP Segments in Reassembly Queue where we flip over to guard mode");
+
+#ifdef TCP_REASS_COUNTERS
+
+counter_u64_t reass_entry;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, entry, CTLFLAG_RD,
+ &reass_entry, "A segment entered reassembly ");
+
+counter_u64_t reass_path1;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, path1, CTLFLAG_RD,
+ &reass_path1, "Took path 1");
+
+counter_u64_t reass_path2;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, path2, CTLFLAG_RD,
+ &reass_path2, "Took path 2");
+
+counter_u64_t reass_path3;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, path3, CTLFLAG_RD,
+ &reass_path3, "Took path 3");
+
+counter_u64_t reass_path4;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, path4, CTLFLAG_RD,
+ &reass_path4, "Took path 4");
+
+counter_u64_t reass_path5;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, path5, CTLFLAG_RD,
+ &reass_path5, "Took path 5");
+
+counter_u64_t reass_path6;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, path6, CTLFLAG_RD,
+ &reass_path6, "Took path 6");
+
+counter_u64_t reass_path7;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, path7, CTLFLAG_RD,
+ &reass_path7, "Took path 7");
+
+counter_u64_t reass_fullwalk;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, fullwalk, CTLFLAG_RD,
+ &reass_fullwalk, "Took a full walk ");
+
+counter_u64_t reass_nospace;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, nospace, CTLFLAG_RD,
+ &reass_nospace, "Had no mbuf capacity ");
+
+counter_u64_t merge_fwd;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, merge_fwd, CTLFLAG_RD,
+ &merge_fwd, "Ran merge fwd");
+
+counter_u64_t merge_into;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, merge_into, CTLFLAG_RD,
+ &merge_into, "Ran merge into");
+
+counter_u64_t tcp_zero_input;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, zero_input, CTLFLAG_RD,
+ &tcp_zero_input, "The reassembly buffer saw a zero len segment etc");
+
+#endif
+
/* Initialize TCP reassembly queue */
static void
tcp_reass_zone_change(void *tag)
@@ -104,6 +197,77 @@ tcp_reass_zone_change(void *tag)
tcp_reass_maxseg);
}
+#ifdef TCP_REASS_LOGGING
+
+static void
+tcp_log_reassm(struct tcpcb *tp, struct tseg_qent *q, struct tseg_qent *p,
+ tcp_seq seq, int len, uint8_t action, int instance)
+{
+ uint32_t cts;
+ struct timeval tv;
+
+ if (tp->t_logstate != TCP_LOG_STATE_OFF) {
+ union tcp_log_stackspecific log;
+
+ memset(&log, 0, sizeof(log));
+ cts = tcp_get_usecs(&tv);
+ log.u_bbr.flex1 = seq;
+ log.u_bbr.cur_del_rate = (uint64_t)q;
+ log.u_bbr.delRate = (uint64_t)p;
+ if (q != NULL) {
+ log.u_bbr.flex2 = q->tqe_start;
+ log.u_bbr.flex3 = q->tqe_len;
+ log.u_bbr.flex4 = q->tqe_mbuf_cnt;
+ log.u_bbr.hptsi_gain = q->tqe_flags;
+ }
+ if (p != NULL) {
+ log.u_bbr.flex5 = p->tqe_start;
+ log.u_bbr.pkts_out = p->tqe_len;
+ log.u_bbr.epoch = p->tqe_mbuf_cnt;
+ log.u_bbr.cwnd_gain = p->tqe_flags;
+ }
+ log.u_bbr.flex6 = tp->t_segqmbuflen;
+ log.u_bbr.flex7 = instance;
+ log.u_bbr.flex8 = action;
+ log.u_bbr.timeStamp = cts;
+ TCP_LOG_EVENTP(tp, NULL,
+ &tp->t_inpcb->inp_socket->so_rcv,
+ &tp->t_inpcb->inp_socket->so_snd,
+ TCP_LOG_REASS, 0,
+ len, &log, false, &tv);
+ }
+}
+
+static void
+tcp_reass_log_dump(struct tcpcb *tp)
+{
+ struct tseg_qent *q;
+
+ if (tp->t_logstate != TCP_LOG_STATE_OFF) {
+ TAILQ_FOREACH(q, &tp->t_segq, tqe_q) {
+ tcp_log_reassm(tp, q, NULL, q->tqe_start, q->tqe_len, TCP_R_LOG_DUMP, 0);
+ }
+ };
+}
+
+static void
+tcp_reass_log_new_in(struct tcpcb *tp, tcp_seq seq, int len, struct mbuf *m,
+ int logval, struct tseg_qent *q)
+{
+ int cnt;
+ struct mbuf *t;
+
+ cnt = 0;
+ t = m;
+ while (t) {
+ cnt += t->m_len;
+ t = t->m_next;
+ }
+ tcp_log_reassm(tp, q, NULL, seq, len, logval, cnt);
+}
+
+#endif
+
void
tcp_reass_global_init(void)
{
@@ -116,8 +280,24 @@ tcp_reass_global_init(void)
/* Set the zone limit and read back the effective value. */
tcp_reass_maxseg = uma_zone_set_max(tcp_reass_zone,
tcp_reass_maxseg);
+#ifdef TCP_REASS_COUNTERS
+ reass_path1 = counter_u64_alloc(M_WAITOK);
+ reass_path2 = counter_u64_alloc(M_WAITOK);
+ reass_path3 = counter_u64_alloc(M_WAITOK);
+ reass_path4 = counter_u64_alloc(M_WAITOK);
+ reass_path5 = counter_u64_alloc(M_WAITOK);
+ reass_path6 = counter_u64_alloc(M_WAITOK);
+ reass_path7 = counter_u64_alloc(M_WAITOK);
+ reass_fullwalk = counter_u64_alloc(M_WAITOK);
+ reass_nospace = counter_u64_alloc(M_WAITOK);
+ reass_entry = counter_u64_alloc(M_WAITOK);
+ merge_fwd = counter_u64_alloc(M_WAITOK);
+ merge_into = counter_u64_alloc(M_WAITOK);
+ tcp_zero_input = counter_u64_alloc(M_WAITOK);
+#endif
EVENTHANDLER_REGISTER(nmbclusters_change,
tcp_reass_zone_change, NULL, EVENTHANDLER_PRI_ANY);
+
}
void
@@ -127,32 +307,237 @@ tcp_reass_flush(struct tcpcb *tp)
INP_WLOCK_ASSERT(tp->t_inpcb);
- while ((qe = LIST_FIRST(&tp->t_segq)) != NULL) {
- LIST_REMOVE(qe, tqe_q);
+ while ((qe = TAILQ_FIRST(&tp->t_segq)) != NULL) {
+ TAILQ_REMOVE(&tp->t_segq, qe, tqe_q);
m_freem(qe->tqe_m);
uma_zfree(tcp_reass_zone, qe);
tp->t_segqlen--;
}
-
+ tp->t_segqmbuflen = 0;
KASSERT((tp->t_segqlen == 0),
("TCP reass queue %p segment count is %d instead of 0 after flush.",
tp, tp->t_segqlen));
}
+static void
+tcp_reass_append(struct tcpcb *tp, struct tseg_qent *last,
+ struct mbuf *m, struct tcphdr *th, int tlen,
+ struct mbuf *mlast, int lenofoh)
+{
+
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, last, NULL, th->th_seq, tlen, TCP_R_LOG_APPEND, 0);
+#endif
+ last->tqe_len += tlen;
+ last->tqe_m->m_pkthdr.len += tlen;
+ /* Preserve the FIN bit if its there */
+ last->tqe_flags |= (th->th_flags & TH_FIN);
+ last->tqe_last->m_next = m;
+ last->tqe_last = mlast;
+ last->tqe_mbuf_cnt += lenofoh;
+ tp->t_rcvoopack++;
+ TCPSTAT_INC(tcps_rcvoopack);
+ TCPSTAT_ADD(tcps_rcvoobyte, tlen);
+#ifdef TCP_REASS_LOGGING
+ tcp_reass_log_new_in(tp, last->tqe_start, lenofoh, last->tqe_m,
+ TCP_R_LOG_APPEND,
+ last);
+#endif
+}
+
+static void
+tcp_reass_prepend(struct tcpcb *tp, struct tseg_qent *first, struct mbuf *m, struct tcphdr *th,
+ int tlen, struct mbuf *mlast, int lenofoh)
+{
+ int i;
+
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, first, NULL, th->th_seq, tlen, TCP_R_LOG_PREPEND, 0);
+#endif
+ if (SEQ_GT((th->th_seq + tlen), first->tqe_start)) {
+ /* The new data overlaps into the old */
+ i = (th->th_seq + tlen) - first->tqe_start;
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, first, NULL, 0, i, TCP_R_LOG_TRIM, 1);
+#endif
+ m_adj(first->tqe_m, i);
+ first->tqe_len -= i;
+ first->tqe_start += i;
+ }
+ /* Ok now setup our chain to point to the old first */
+ mlast->m_next = first->tqe_m;
+ first->tqe_m = m;
+ first->tqe_len += tlen;
+ first->tqe_start = th->th_seq;
+ first->tqe_m->m_pkthdr.len = first->tqe_len;
+ first->tqe_mbuf_cnt += lenofoh;
+ tp->t_rcvoopack++;
+ TCPSTAT_INC(tcps_rcvoopack);
+ TCPSTAT_ADD(tcps_rcvoobyte, tlen);
+#ifdef TCP_REASS_LOGGING
+ tcp_reass_log_new_in(tp, first->tqe_start, lenofoh, first->tqe_m,
+ TCP_R_LOG_PREPEND,
+ first);
+#endif
+}
+
+static void
+tcp_reass_replace(struct tcpcb *tp, struct tseg_qent *q, struct mbuf *m,
+ tcp_seq seq, int len, struct mbuf *mlast, int mbufoh, uint8_t flags)
+{
+ /*
+ * Free the data in q, and replace
+ * it with the new segment.
+ */
+ int len_dif;
+
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, q, NULL, seq, len, TCP_R_LOG_REPLACE, 0);
+#endif
+ m_freem(q->tqe_m);
+ KASSERT(tp->t_segqmbuflen >= q->tqe_mbuf_cnt,
+ ("Tp:%p seg queue goes negative", tp));
+ tp->t_segqmbuflen -= q->tqe_mbuf_cnt;
+ q->tqe_mbuf_cnt = mbufoh;
+ q->tqe_m = m;
+ q->tqe_last = mlast;
+ q->tqe_start = seq;
+ if (len > q->tqe_len)
+ len_dif = len - q->tqe_len;
+ else
+ len_dif = 0;
+ tp->t_rcvoopack++;
+ TCPSTAT_INC(tcps_rcvoopack);
+ TCPSTAT_ADD(tcps_rcvoobyte, len_dif);
+ q->tqe_len = len;
+ q->tqe_flags = (flags & TH_FIN);
+ q->tqe_m->m_pkthdr.len = q->tqe_len;
+ tp->t_segqmbuflen += mbufoh;
+
+}
+
+static void
+tcp_reass_merge_into(struct tcpcb *tp, struct tseg_qent *ent,
+ struct tseg_qent *q)
+{
+ /*
+ * Merge q into ent and free q from the list.
+ */
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, q, ent, 0, 0, TCP_R_LOG_MERGE_INTO, 0);
+#endif
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(merge_into, 1);
+#endif
+ ent->tqe_last->m_next = q->tqe_m;
+ ent->tqe_last = q->tqe_last;
+ ent->tqe_len += q->tqe_len;
+ ent->tqe_mbuf_cnt += q->tqe_mbuf_cnt;
+ ent->tqe_m->m_pkthdr.len += q->tqe_len;
+ ent->tqe_flags |= (q->tqe_flags & TH_FIN);
+ TAILQ_REMOVE(&tp->t_segq, q, tqe_q);
+ uma_zfree(tcp_reass_zone, q);
+ tp->t_segqlen--;
+
+}
+
+static void
+tcp_reass_merge_forward(struct tcpcb *tp, struct tseg_qent *ent)
+{
+ struct tseg_qent *q, *qtmp;
+ int i;
+ tcp_seq max;
+ /*
+ * Given an entry merge forward anyplace
+ * that ent overlaps forward.
+ */
+
+ max = ent->tqe_start + ent->tqe_len;
+ q = TAILQ_NEXT(ent, tqe_q);
+ if (q == NULL) {
+ /* Nothing left */
+ return;
+ }
+ TAILQ_FOREACH_FROM_SAFE(q, &tp->t_segq, tqe_q, qtmp) {
+ if (SEQ_GT(q->tqe_start, max)) {
+ /* Beyond q */
+ break;
+ }
+ /* We have some or all that are overlapping */
+ if (SEQ_GEQ(max, (q->tqe_start + q->tqe_len))) {
+ /* It consumes it all */
+ tp->t_segqmbuflen -= q->tqe_mbuf_cnt;
+ m_freem(q->tqe_m);
+ TAILQ_REMOVE(&tp->t_segq, q, tqe_q);
+ uma_zfree(tcp_reass_zone, q);
+ tp->t_segqlen--;
+ continue;
+ }
+ /*
+ * Trim the q entry to dovetail to this one
+ * and then merge q into ent updating max
+ * in the process.
+ */
+ i = max - q->tqe_start;
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, q, NULL, 0, i, TCP_R_LOG_TRIM, 2);
+#endif
+ m_adj(q->tqe_m, i);
+ q->tqe_len -= i;
+ q->tqe_start += i;
+ tcp_reass_merge_into(tp, ent, q);
+ max = ent->tqe_start + ent->tqe_len;
+ }
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(merge_fwd, 1);
+#endif
+}
+
+static int
+tcp_reass_overhead_of_chain(struct mbuf *m, struct mbuf **mlast)
+{
+ int len = MSIZE;
+
+ if (m->m_flags & M_EXT)
+ len += m->m_ext.ext_size;
+ while (m->m_next != NULL) {
+ m = m->m_next;
+ len += MSIZE;
+ if (m->m_flags & M_EXT)
+ len += m->m_ext.ext_size;
+ }
+ *mlast = m;
+ return (len);
+}
+
+
+/*
+ * NOTE!!! the new tcp-reassembly code *must not* use
+ * m_adj() with a negative index. That alters the chain
+ * of mbufs (by possibly chopping trailing mbufs). At
+ * the front of tcp_reass we count the mbuf overhead
+ * and setup the tail pointer. If we use m_adj(m, -5)
+ * we could corrupt the tail pointer. Currently the
+ * code only uses m_adj(m, postive-num). If this
+ * changes appropriate changes to update mlast would
+ * be needed.
+ */
int
-tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
+tcp_reass(struct tcpcb *tp, struct tcphdr *th, tcp_seq *seq_start,
+ int *tlenp, struct mbuf *m)
{
- struct tseg_qent *q;
+ struct tseg_qent *q, *last, *first;
struct tseg_qent *p = NULL;
- struct tseg_qent *nq;
+ struct tseg_qent *nq = NULL;
struct tseg_qent *te = NULL;
+ struct tseg_qent tqs;
+ struct mbuf *mlast = NULL;
+ struct sockbuf *sb;
struct socket *so = tp->t_inpcb->inp_socket;
char *s = NULL;
- int flags;
- struct tseg_qent tqs;
+ int flags, i, lenofoh;
INP_WLOCK_ASSERT(tp->t_inpcb);
-
/*
* XXX: tcp_reass() is rather inefficient with its data structures
* and should be rewritten (see NetBSD for optimizations).
@@ -164,149 +549,475 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
*/
if (th == NULL)
goto present;
-
+ KASSERT(SEQ_GEQ(th->th_seq, tp->rcv_nxt),
+ ("Attempt to add old entry to reassembly queue (th=%p, tp=%p)",
+ th, tp));
+#ifdef TCP_REASS_LOGGING
+ tcp_reass_log_new_in(tp, th->th_seq, *tlenp, m, TCP_R_LOG_ADD, NULL);
+#endif
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(reass_entry, 1);
+#endif
/*
- * Limit the number of segments that can be queued to reduce the
- * potential for mbuf exhaustion. For best performance, we want to be
- * able to queue a full window's worth of segments. The size of the
- * socket receive buffer determines our advertised window and grows
- * automatically when socket buffer autotuning is enabled. Use it as the
- * basis for our queue limit.
- * Always let the missing segment through which caused this queue.
- * NB: Access to the socket buffer is left intentionally unlocked as we
- * can tolerate stale information here.
- *
- * XXXLAS: Using sbspace(so->so_rcv) instead of so->so_rcv.sb_hiwat
- * should work but causes packets to be dropped when they shouldn't.
- * Investigate why and re-evaluate the below limit after the behaviour
- * is understood.
+ * Check for zero length data.
+ */
+ if ((*tlenp == 0) && ((th->th_flags & TH_FIN) == 0)) {
+ /*
+ * A zero length segment does no
+ * one any good. We could check
+ * the rcv_nxt <-> rcv_wnd but thats
+ * already done for us by the caller.
+ */
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(tcp_zero_input, 1);
+#endif
+ m_freem(m);
+#ifdef TCP_REASS_LOGGING
+ tcp_reass_log_dump(tp);
+#endif
+ return (0);
+ }
+ /*
+ * Will it fit?
*/
- if ((th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) &&
- tp->t_segqlen >= (so->so_rcv.sb_hiwat / tp->t_maxseg) + 1) {
+ lenofoh = tcp_reass_overhead_of_chain(m, &mlast);
+ sb = &tp->t_inpcb->inp_socket->so_rcv;
+ if ((sb->sb_mbcnt + tp->t_segqmbuflen + lenofoh) > sb->sb_mbmax) {
+ /* No room */
TCPSTAT_INC(tcps_rcvreassfull);
- *tlenp = 0;
- if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) {
- log(LOG_DEBUG, "%s; %s: queue limit reached, "
- "segment dropped\n", s, __func__);
- free(s, M_TCPLOG);
- }
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(reass_nospace, 1);
+#endif
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, NULL, NULL, th->th_seq, lenofoh, TCP_R_LOG_LIMIT_REACHED, 0);
+#endif
m_freem(m);
+ *tlenp = 0;
+#ifdef TCP_REASS_LOGGING
+ tcp_reass_log_dump(tp);
+#endif
return (0);
}
-
/*
- * Allocate a new queue entry. If we can't, or hit the zone limit
- * just drop the pkt.
- *
- * Use a temporary structure on the stack for the missing segment
- * when the zone is exhausted. Otherwise we may get stuck.
+ * First lets deal with two common cases, the
+ * segment appends to the back of our collected
+ * segments. Or the segment is the next in line.
*/
- te = uma_zalloc(tcp_reass_zone, M_NOWAIT);
- if (te == NULL) {
- if (th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) {
- TCPSTAT_INC(tcps_rcvmemdrop);
- m_freem(m);
+ last = TAILQ_LAST_FAST(&tp->t_segq, tseg_qent, tqe_q);
+ if (last != NULL) {
+ if ((th->th_flags & TH_FIN) &&
+ SEQ_LT((th->th_seq + *tlenp), (last->tqe_start + last->tqe_len))) {
+ /*
+ * Someone is trying to game us, dump
+ * the segment.
+ */
*tlenp = 0;
- if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL,
- NULL))) {
- log(LOG_DEBUG, "%s; %s: global zone limit "
- "reached, segment dropped\n", s, __func__);
- free(s, M_TCPLOG);
+ m_freem(m);
+ return (0);
+ }
+ if ((SEQ_GEQ(th->th_seq, last->tqe_start)) &&
+ (SEQ_GEQ((last->tqe_start + last->tqe_len), th->th_seq))) {
+ /* Common case, trailing segment is added */
+ /**
+ * +--last
+ * v
+ * reassembly buffer |---| |---| |---|
+ * new segment |---|
+ */
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(reass_path1, 1);
+#endif
+ if (SEQ_GT((last->tqe_start + last->tqe_len), th->th_seq)) {
+ i = (last->tqe_start + last->tqe_len) - th->th_seq;
+ if (i < *tlenp) {
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, last, NULL, 0, i, TCP_R_LOG_TRIM, 3);
+ th->th_seq += i;
+#endif
+ m_adj(m, i);
+ *tlenp -= i;
+ } else {
+ /* Complete overlap */
+ TCPSTAT_INC(tcps_rcvduppack);
+ TCPSTAT_ADD(tcps_rcvdupbyte, *tlenp);
+ m_freem(m);
+ *tlenp = last->tqe_len;
+ *seq_start = last->tqe_start;
+ return (0);
+ }
+ }
+ if (last->tqe_flags & TH_FIN) {
+ /*
+ * We have data after the FIN on the last?
+ */
+ *tlenp = 0;
+ m_freem(m);
+ return(0);
}
+ tcp_reass_append(tp, last, m, th, *tlenp, mlast, lenofoh);
+ tp->t_segqmbuflen += lenofoh;
+ *seq_start = last->tqe_start;
+ *tlenp = last->tqe_len;
return (0);
- } else {
- bzero(&tqs, sizeof(struct tseg_qent));
- te = &tqs;
- if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL,
- NULL))) {
- log(LOG_DEBUG,
- "%s; %s: global zone limit reached, using "
- "stack for missing segment\n", s, __func__);
- free(s, M_TCPLOG);
+ } else if (SEQ_GT(th->th_seq, (last->tqe_start + last->tqe_len))) {
+ /*
+ * Second common case, we missed
+ * another one and have something more
+ * for the end.
+ */
+ /**
+ * +--last
+ * v
+ * reassembly buffer |---| |---| |---|
+ * new segment |---|
+ */
+ if (last->tqe_flags & TH_FIN) {
+ /*
+ * We have data after the FIN on the last?
+ */
+ *tlenp = 0;
+ m_freem(m);
+ return(0);
}
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(reass_path2, 1);
+#endif
+ p = last;
+ goto new_entry;
}
+ } else {
+ /* First segment (it's NULL). */
+ goto new_entry;
}
- tp->t_segqlen++;
+ first = TAILQ_FIRST(&tp->t_segq);
+ if (SEQ_LT(th->th_seq, first->tqe_start) &&
+ SEQ_GEQ((th->th_seq + *tlenp),first->tqe_start) &&
+ SEQ_LT((th->th_seq + *tlenp), (first->tqe_start + first->tqe_len))) {
+ /*
+ * The head of the queue is prepended by this and
+ * it may be the one I want most.
+ */
+ /**
+ * first-------+
+ * v
+ * rea: |---| |---| |---|
+ * new: |---|
+ * Note the case we do not deal with here is:
+ * rea= |---| |---| |---|
+ * new= |----|
+ * Due to the fact that it could be
+ * new |--------------------|
+ * And we might need to merge forward.
+ */
+#ifdef INVARIANTS
+ struct mbuf *firstmbuf;
+#endif
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(reass_path3, 1);
+#endif
+ if (SEQ_LT(th->th_seq, tp->rcv_nxt)) {
+ /*
+ * The resend was even before
+ * what we have. We need to trim it.
+ * Note TSNH (it should be trimmed
+ * before the call to tcp_reass()).
+ */
+#ifdef INVARIANTS
+ panic("th->th_seq:%u rcv_nxt:%u tp:%p not pre-trimmed",
+ th->th_seq, tp->rcv_nxt, tp);
+#else
+ i = tp->rcv_nxt - th->th_seq;
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, first, NULL, 0, i, TCP_R_LOG_TRIM, 4);
+#endif
+ m_adj(m, i);
+ th->th_seq += i;
+ *tlenp -= i;
+#endif
+ }
+#ifdef INVARIANTS
+ firstmbuf = first->tqe_m;
+#endif
+ tcp_reass_prepend(tp, first, m, th, *tlenp, mlast, lenofoh);
+#ifdef INVARIANTS
+ if (firstmbuf == first->tqe_m) {
+ panic("First stayed same m:%p foobar:%p first->tqe_m:%p tp:%p first:%p",
+ m, firstmbuf, first->tqe_m, tp, first);
+ } else if (first->tqe_m != m) {
+ panic("First did not change to m:%p foobar:%p first->tqe_m:%p tp:%p first:%p",
+ m, firstmbuf, first->tqe_m, tp, first);
+ }
+#endif
+ tp->t_segqmbuflen += lenofoh;
+ *seq_start = first->tqe_start;
+ *tlenp = first->tqe_len;
+ goto present;
+ } else if (SEQ_LT((th->th_seq + *tlenp), first->tqe_start)) {
+ /* New segment is before our earliest segment. */
+ /**
+ * first---->+
+ * v
+ * rea= |---| ....
+ * new" |---|
+ *
+ */
+ goto new_entry;
+ }
/*
* Find a segment which begins after this one does.
*/
- LIST_FOREACH(q, &tp->t_segq, tqe_q) {
- if (SEQ_GT(q->tqe_th->th_seq, th->th_seq))
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(reass_fullwalk, 1);
+#endif
+ TAILQ_FOREACH(q, &tp->t_segq, tqe_q) {
+ if (SEQ_GT(q->tqe_start, th->th_seq))
break;
- p = q;
}
-
- /*
- * If there is a preceding segment, it may provide some of
- * our data already. If so, drop the data from the incoming
- * segment. If it provides all of our data, drop us.
+ p = TAILQ_PREV(q, tsegqe_head, tqe_q);
+ /**
+ * Now is this fit just in-between only?
+ * i.e.:
+ * p---+ +----q
+ * v v
+ * res= |--| |--| |--|
+ * nee |-|
+ */
+ if (SEQ_LT((th->th_seq + *tlenp), q->tqe_start) &&
+ ((p == NULL) || (SEQ_GT(th->th_seq, (p->tqe_start + p->tqe_len))))) {
+ /* Yep no overlap */
+ goto new_entry;
+ }
+ /**
+ * If we reach here we have some (possibly all) overlap
+ * such as:
+ * res= |--| |--| |--|
+ * new= |----|
+ * or new= |-----------------|
+ * or new= |--------|
+ * or new= |---|
+ * or new= |-----------|
*/
- if (p != NULL) {
- int i;
+ if ((p != NULL) &&
+ (SEQ_LEQ(th->th_seq, (p->tqe_start + p->tqe_len)))) {
/* conversion to int (in i) handles seq wraparound */
- i = p->tqe_th->th_seq + p->tqe_len - th->th_seq;
- if (i > 0) {
+
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(reass_path4, 1);
+#endif
+ i = p->tqe_start + p->tqe_len - th->th_seq;
+ if (i >= 0) {
if (i >= *tlenp) {
+ /**
+ * prev seg---->+
+ * v
+ * reassembly buffer |---|
+ * new segment |-|
+ */
TCPSTAT_INC(tcps_rcvduppack);
TCPSTAT_ADD(tcps_rcvdupbyte, *tlenp);
+ *tlenp = p->tqe_len;
+ *seq_start = p->tqe_start;
m_freem(m);
- if (te != &tqs)
- uma_zfree(tcp_reass_zone, te);
- tp->t_segqlen--;
/*
* Try to present any queued data
* at the left window edge to the user.
* This is needed after the 3-WHS
- * completes.
+ * completes. Note this probably
+ * will not work and we will return.
*/
- goto present; /* ??? */
+ return (0);
}
- m_adj(m, i);
- *tlenp -= i;
- th->th_seq += i;
+ if (i > 0) {
+ /**
+ * prev seg---->+
+ * v
+ * reassembly buffer |---|
+ * new segment |-----|
+ */
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(reass_path5, 1);
+#endif
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, p, NULL, 0, i, TCP_R_LOG_TRIM, 5);
+#endif
+ m_adj(m, i);
+ *tlenp -= i;
+ th->th_seq += i;
+ }
+ }
+ if (th->th_seq == (p->tqe_start + p->tqe_len)) {
+ /*
+ * If dovetails in with this one
+ * append it.
+ */
+ /**
+ * prev seg---->+
+ * v
+ * reassembly buffer |--| |---|
+ * new segment |--|
+ * (note: it was trimmed above if it overlapped)
+ */
+ tcp_reass_append(tp, p, m, th, *tlenp, mlast, lenofoh);
+ tp->t_segqmbuflen += lenofoh;
+ } else {
+#ifdef INVARIANTS
+ panic("Impossible cut th_seq:%u p->seq:%u(%d) p:%p tp:%p",
+ th->th_seq, p->tqe_start, p->tqe_len,
+ p, tp);
+#endif
+ *tlenp = 0;
+ m_freem(m);
+ return (0);
+ }
+ q = p;
+ } else {
+ /*
+ * The new data runs over the
+ * top of previously sack'd data (in q).
+ * It may be partially overlapping, or
+ * it may overlap the entire segment.
+ */
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(reass_path6, 1);
+#endif
+ if (SEQ_GEQ((th->th_seq + *tlenp), (q->tqe_start + q->tqe_len))) {
+ /* It consumes it all */
+ /**
+ * next seg---->+
+ * v
+ * reassembly buffer |--| |---|
+ * new segment |----------|
+ */
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(reass_path7, 1);
+#endif
+ tcp_reass_replace(tp, q, m, th->th_seq, *tlenp, mlast, lenofoh, th->th_flags);
+ } else {
+ /*
+ * We just need to prepend the data
+ * to this. It does not overrun
+ * the end.
+ */
+ /**
+ * next seg---->+
+ * v
+ * reassembly buffer |--| |---|
+ * new segment |----------|
+ */
+ tcp_reass_prepend(tp, q, m, th, *tlenp, mlast, lenofoh);
+ tp->t_segqmbuflen += lenofoh;
}
}
- tp->t_rcvoopack++;
- TCPSTAT_INC(tcps_rcvoopack);
- TCPSTAT_ADD(tcps_rcvoobyte, *tlenp);
+ /* Now does it go further than that? */
+ tcp_reass_merge_forward(tp, q);
+ *seq_start = q->tqe_start;
+ *tlenp = q->tqe_len;
+ goto present;
- /*
- * While we overlap succeeding segments trim them or,
- * if they are completely covered, dequeue them.
+ /*
+ * When we reach here we can't combine it
+ * with any existing segment.
+ *
+ * Limit the number of segments that can be queued to reduce the
+ * potential for mbuf exhaustion. For best performance, we want to be
+ * able to queue a full window's worth of segments. The size of the
+ * socket receive buffer determines our advertised window and grows
+ * automatically when socket buffer autotuning is enabled. Use it as the
+ * basis for our queue limit.
+ *
+ * However, allow the user to specify a ceiling for the number of
+ * segments in each queue.
+ *
+ * Always let the missing segment through which caused this queue.
+ * NB: Access to the socket buffer is left intentionally unlocked as we
+ * can tolerate stale information here.
+ *
+ * XXXLAS: Using sbspace(so->so_rcv) instead of so->so_rcv.sb_hiwat
+ * should work but causes packets to be dropped when they shouldn't.
+ * Investigate why and re-evaluate the below limit after the behaviour
+ * is understood.
*/
- while (q) {
- int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq;
- if (i <= 0)
- break;
- if (i < q->tqe_len) {
- q->tqe_th->th_seq += i;
- q->tqe_len -= i;
- m_adj(q->tqe_m, i);
- break;
+new_entry:
+ if (tcp_new_limits) {
+ if ((tp->t_segqlen > tcp_reass_queue_guard) &&
+ (*tlenp < MSIZE)) {
+ /*
+ * This is really a lie, we are not full but
+ * are getting a segment that is above
+ * guard threshold. If it is and its below
+ * a mbuf size (256) we drop it if it
+ * can't fill in some place.
+ */
+ TCPSTAT_INC(tcps_rcvreassfull);
+ *tlenp = 0;
+ if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) {
+ log(LOG_DEBUG, "%s; %s: queue limit reached, "
+ "segment dropped\n", s, __func__);
+ free(s, M_TCPLOG);
+ }
+ m_freem(m);
+#ifdef TCP_REASS_LOGGING
+ tcp_reass_log_dump(tp);
+#endif
+ return (0);
}
+ } else {
- nq = LIST_NEXT(q, tqe_q);
- LIST_REMOVE(q, tqe_q);
- m_freem(q->tqe_m);
- uma_zfree(tcp_reass_zone, q);
- tp->t_segqlen--;
- q = nq;
+ if ((th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) &&
+ tp->t_segqlen >= min((so->so_rcv.sb_hiwat / tp->t_maxseg) + 1,
+ tcp_reass_maxqueuelen)) {
+ TCPSTAT_INC(tcps_rcvreassfull);
+ *tlenp = 0;
+ if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) {
+ log(LOG_DEBUG, "%s; %s: queue limit reached, "
+ "segment dropped\n", s, __func__);
+ free(s, M_TCPLOG);
+ }
+ m_freem(m);
+#ifdef TCP_REASS_LOGGING
+ tcp_reass_log_dump(tp);
+#endif
+ return (0);
+ }
}
-
+ /*
+ * Allocate a new queue entry. If we can't, or hit the zone limit
+ * just drop the pkt.
+ */
+ te = uma_zalloc(tcp_reass_zone, M_NOWAIT);
+ if (te == NULL) {
+ TCPSTAT_INC(tcps_rcvmemdrop);
+ m_freem(m);
+ *tlenp = 0;
+ if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL,
+ NULL))) {
+ log(LOG_DEBUG, "%s; %s: global zone limit "
+ "reached, segment dropped\n", s, __func__);
+ free(s, M_TCPLOG);
+ }
+ return (0);
+ }
+ tp->t_segqlen++;
+ tp->t_rcvoopack++;
+ TCPSTAT_INC(tcps_rcvoopack);
+ TCPSTAT_ADD(tcps_rcvoobyte, *tlenp);
/* Insert the new segment queue entry into place. */
te->tqe_m = m;
- te->tqe_th = th;
+ te->tqe_flags = th->th_flags;
te->tqe_len = *tlenp;
-
+ te->tqe_start = th->th_seq;
+ te->tqe_last = mlast;
+ te->tqe_mbuf_cnt = lenofoh;
+ tp->t_segqmbuflen += te->tqe_mbuf_cnt;
if (p == NULL) {
- LIST_INSERT_HEAD(&tp->t_segq, te, tqe_q);
+ TAILQ_INSERT_HEAD(&tp->t_segq, te, tqe_q);
} else {
- KASSERT(te != &tqs, ("%s: temporary stack based entry not "
- "first element in queue", __func__));
- LIST_INSERT_AFTER(p, te, tqe_q);
+ TAILQ_INSERT_AFTER(&tp->t_segq, p, te, tqe_q);
}
-
+#ifdef TCP_REASS_LOGGING
+ tcp_reass_log_new_in(tp, th->th_seq, *tlenp, m, TCP_R_LOG_NEW_ENTRY, te);
+#endif
present:
/*
* Present data to user, advancing rcv_nxt through
@@ -314,24 +1025,56 @@ present:
*/
if (!TCPS_HAVEESTABLISHED(tp->t_state))
return (0);
- q = LIST_FIRST(&tp->t_segq);
- if (!q || q->tqe_th->th_seq != tp->rcv_nxt)
+ q = TAILQ_FIRST(&tp->t_segq);
+ KASSERT(q == NULL || SEQ_GEQ(q->tqe_start, tp->rcv_nxt),
+ ("Reassembly queue for %p has stale entry at head", tp));
+ if (!q || q->tqe_start != tp->rcv_nxt) {
+#ifdef TCP_REASS_LOGGING
+ tcp_reass_log_dump(tp);
+#endif
return (0);
+ }
SOCKBUF_LOCK(&so->so_rcv);
do {
tp->rcv_nxt += q->tqe_len;
- flags = q->tqe_th->th_flags & TH_FIN;
- nq = LIST_NEXT(q, tqe_q);
- LIST_REMOVE(q, tqe_q);
- if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
+ flags = q->tqe_flags & TH_FIN;
+ nq = TAILQ_NEXT(q, tqe_q);
+ TAILQ_REMOVE(&tp->t_segq, q, tqe_q);
+ if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
m_freem(q->tqe_m);
- else
+ } else {
+#ifdef TCP_REASS_LOGGING
+ tcp_reass_log_new_in(tp, q->tqe_start, q->tqe_len, q->tqe_m, TCP_R_LOG_READ, q);
+ tcp_log_reassm(tp, q, NULL, th->th_seq, *tlenp, TCP_R_LOG_READ, 1);
+#endif
sbappendstream_locked(&so->so_rcv, q->tqe_m, 0);
- if (q != &tqs)
+ }
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, q, NULL, th->th_seq, *tlenp, TCP_R_LOG_READ, 2);
+#endif
+ KASSERT(tp->t_segqmbuflen >= q->tqe_mbuf_cnt,
+ ("tp:%p seg queue goes negative", tp));
+ tp->t_segqmbuflen -= q->tqe_mbuf_cnt;
+ if (q != &tqs)
uma_zfree(tcp_reass_zone, q);
tp->t_segqlen--;
q = nq;
- } while (q && q->tqe_th->th_seq == tp->rcv_nxt);
+ } while (q && q->tqe_start == tp->rcv_nxt);
+ if (TAILQ_EMPTY(&tp->t_segq) &&
+ (tp->t_segqmbuflen != 0)) {
+#ifdef INVARIANTS
+ panic("tp:%p segq:%p len:%d queue empty",
+ tp, &tp->t_segq, tp->t_segqmbuflen);
+#else
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, NULL, NULL, th->th_seq, *tlenp, TCP_R_LOG_ZERO, 0);
+#endif
+ tp->t_segqmbuflen = 0;
+#endif
+ }
+#ifdef TCP_REASS_LOGGING
+ tcp_reass_log_dump(tp);
+#endif
sorwakeup_locked(so);
return (flags);
}
diff --git a/freebsd/sys/netinet/tcp_subr.c b/freebsd/sys/netinet/tcp_subr.c
index 787213b0..4852ffaf 100644
--- a/freebsd/sys/netinet/tcp_subr.c
+++ b/freebsd/sys/netinet/tcp_subr.c
@@ -216,13 +216,13 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0,
SYSCTL_UINT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_VNET | CTLFLAG_RD,
&VNET_NAME(tcbinfo.ipi_count), 0, "Number of active PCBs");
-static VNET_DEFINE(int, icmp_may_rst) = 1;
+VNET_DEFINE_STATIC(int, icmp_may_rst) = 1;
#define V_icmp_may_rst VNET(icmp_may_rst)
SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmp_may_rst), 0,
"Certain ICMP unreachable messages may abort connections in SYN_SENT");
-static VNET_DEFINE(int, tcp_isn_reseed_interval) = 0;
+VNET_DEFINE_STATIC(int, tcp_isn_reseed_interval) = 0;
#define V_tcp_isn_reseed_interval VNET(tcp_isn_reseed_interval)
SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_isn_reseed_interval), 0,
@@ -239,6 +239,10 @@ VNET_DEFINE(uma_zone_t, sack_hole_zone);
VNET_DEFINE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST+1]);
#endif
+#define TS_OFFSET_SECRET_LENGTH 32
+VNET_DEFINE_STATIC(u_char, ts_offset_secret[TS_OFFSET_SECRET_LENGTH]);
+#define V_ts_offset_secret VNET(ts_offset_secret)
+
static int tcp_default_fb_init(struct tcpcb *tp);
static void tcp_default_fb_fini(struct tcpcb *tp, int tcb_is_purged);
static int tcp_default_handoff_ok(struct tcpcb *tp);
@@ -701,7 +705,7 @@ struct tcpcb_mem {
#endif
};
-static VNET_DEFINE(uma_zone_t, tcpcb_zone);
+VNET_DEFINE_STATIC(uma_zone_t, tcpcb_zone);
#define V_tcpcb_zone VNET(tcpcb_zone)
MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers");
@@ -949,11 +953,10 @@ deregister_tcp_functions(struct tcp_function_block *blk, bool quiesce,
rw_wunlock(&tcp_function_lock);
VNET_LIST_RLOCK();
- /* XXX handle */
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
INP_INFO_WLOCK(&V_tcbinfo);
- LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
+ CK_LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
INP_WLOCK(inp);
if (inp->inp_flags & INP_TIMEWAIT) {
INP_WUNLOCK(inp);
@@ -1099,6 +1102,7 @@ tcp_init(void)
/* Initialize the TCP logging data. */
tcp_log_init();
#endif
+ arc4rand(&V_ts_offset_secret, sizeof(V_ts_offset_secret), 0);
if (tcp_soreceive_stream) {
#ifdef INET
@@ -1629,7 +1633,7 @@ tcp_newtcpcb(struct inpcb *inp)
tp->t_vnet = inp->inp_vnet;
#endif
tp->t_timers = &tm->tt;
- /* LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */
+ TAILQ_INIT(&tp->t_segq);
tp->t_maxseg =
#ifdef INET6
isipv6 ? V_tcp_v6mssdflt :
@@ -1723,7 +1727,7 @@ tcp_ccalgounload(struct cc_algo *unload_algo)
* therefore don't enter the loop below until the connection
* list has stabilised.
*/
- LIST_FOREACH(inp, &V_tcb, inp_list) {
+ CK_LIST_FOREACH(inp, &V_tcb, inp_list) {
INP_WLOCK(inp);
/* Important to skip tcptw structs. */
if (!(inp->inp_flags & INP_TIMEWAIT) &&
@@ -1737,11 +1741,18 @@ tcp_ccalgounload(struct cc_algo *unload_algo)
*/
if (CC_ALGO(tp) == unload_algo) {
tmpalgo = CC_ALGO(tp);
- /* NewReno does not require any init. */
- CC_ALGO(tp) = &newreno_cc_algo;
- /* XXX defer to epoch_call */
if (tmpalgo->cb_destroy != NULL)
tmpalgo->cb_destroy(tp->ccv);
+ CC_DATA(tp) = NULL;
+ /*
+ * NewReno may allocate memory on
+ * demand for certain stateful
+ * configuration as needed, but is
+ * coded to never fail on memory
+ * allocation failure so it is a safe
+ * fallback.
+ */
+ CC_ALGO(tp) = &newreno_cc_algo;
}
}
INP_WUNLOCK(inp);
@@ -1893,6 +1904,7 @@ tcp_discardcb(struct tcpcb *tp)
/* Allow the CC algorithm to clean up after itself. */
if (CC_ALGO(tp)->cb_destroy != NULL)
CC_ALGO(tp)->cb_destroy(tp->ccv);
+ CC_DATA(tp) = NULL;
#ifdef TCP_HHOOK
khelp_destroy_osd(tp->osd);
@@ -1922,10 +1934,11 @@ tcp_timer_discard(void *ptp)
{
struct inpcb *inp;
struct tcpcb *tp;
+ struct epoch_tracker et;
tp = (struct tcpcb *)ptp;
CURVNET_SET(tp->t_vnet);
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
inp = tp->t_inpcb;
KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL",
__func__, tp));
@@ -1945,13 +1958,13 @@ tcp_timer_discard(void *ptp)
tp->t_inpcb = NULL;
uma_zfree(V_tcpcb_zone, tp);
if (in_pcbrele_wlocked(inp)) {
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
return;
}
}
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
}
@@ -2024,10 +2037,12 @@ tcp_drain(void)
* useful.
*/
INP_INFO_WLOCK(&V_tcbinfo);
- LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) {
- if (inpb->inp_flags & INP_TIMEWAIT)
- continue;
+ CK_LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) {
INP_WLOCK(inpb);
+ if (inpb->inp_flags & INP_TIMEWAIT) {
+ INP_WUNLOCK(inpb);
+ continue;
+ }
if ((tcpb = intotcpcb(inpb)) != NULL) {
tcp_reass_flush(tcpb);
tcp_clean_sackreport(tcpb);
@@ -2110,10 +2125,10 @@ static int
tcp_pcblist(SYSCTL_HANDLER_ARGS)
{
int error, i, m, n, pcb_count;
- struct in_pcblist *il;
struct inpcb *inp, **inp_list;
inp_gen_t gencnt;
struct xinpgen xig;
+ struct epoch_tracker et;
/*
* The process of preparing the TCB list is too time-consuming and
@@ -2157,12 +2172,11 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
if (error)
return (error);
- il = malloc(sizeof(struct in_pcblist) + n * sizeof(struct inpcb *), M_TEMP, M_WAITOK|M_ZERO_INVARIANTS);
- inp_list = il->il_inp_list;
+ inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
INP_INFO_WLOCK(&V_tcbinfo);
- for (inp = LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0;
- inp != NULL && i < n; inp = LIST_NEXT(inp, inp_list)) {
+ for (inp = CK_LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0;
+ inp != NULL && i < n; inp = CK_LIST_NEXT(inp, inp_list)) {
INP_WLOCK(inp);
if (inp->inp_gencnt <= gencnt) {
/*
@@ -2201,10 +2215,14 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
} else
INP_RUNLOCK(inp);
}
-
- il->il_count = n;
- il->il_pcbinfo = &V_tcbinfo;
- epoch_call(net_epoch_preempt, &il->il_epoch_ctx, in_pcblist_rele_rlocked);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ for (i = 0; i < n; i++) {
+ inp = inp_list[i];
+ INP_RLOCK(inp);
+ if (!in_pcbrele_rlocked(inp))
+ INP_RUNLOCK(inp);
+ }
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
if (!error) {
/*
@@ -2221,6 +2239,7 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
INP_LIST_RUNLOCK(&V_tcbinfo);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
+ free(inp_list, M_TEMP);
return (error);
}
@@ -2342,6 +2361,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
struct icmp *icp;
struct in_conninfo inc;
+ struct epoch_tracker et;
tcp_seq icmp_tcp_seq;
int mtu;
@@ -2373,7 +2393,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
icp = (struct icmp *)((caddr_t)ip - offsetof(struct icmp, icmp_ip));
th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport, ip->ip_src,
th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
if (inp != NULL && PRC_IS_REDIRECT(cmd)) {
@@ -2438,7 +2458,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
out:
if (inp != NULL)
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
}
#endif /* INET */
@@ -2456,6 +2476,7 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
struct ip6ctlparam *ip6cp = NULL;
const struct sockaddr_in6 *sa6_src = NULL;
struct in_conninfo inc;
+ struct epoch_tracker et;
struct tcp_ports {
uint16_t th_sport;
uint16_t th_dport;
@@ -2517,7 +2538,7 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
}
bzero(&t_ports, sizeof(struct tcp_ports));
m_copydata(m, off, sizeof(struct tcp_ports), (caddr_t)&t_ports);
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_dst, t_ports.th_dport,
&ip6->ip6_src, t_ports.th_sport, INPLOOKUP_WLOCKPCB, NULL);
if (inp != NULL && PRC_IS_REDIRECT(cmd)) {
@@ -2589,10 +2610,45 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
out:
if (inp != NULL)
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
}
#endif /* INET6 */
+static uint32_t
+tcp_keyed_hash(struct in_conninfo *inc, u_char *key, u_int len)
+{
+ MD5_CTX ctx;
+ uint32_t hash[4];
+
+ MD5Init(&ctx);
+ MD5Update(&ctx, &inc->inc_fport, sizeof(uint16_t));
+ MD5Update(&ctx, &inc->inc_lport, sizeof(uint16_t));
+ switch (inc->inc_flags & INC_ISIPV6) {
+#ifdef INET
+ case 0:
+ MD5Update(&ctx, &inc->inc_faddr, sizeof(struct in_addr));
+ MD5Update(&ctx, &inc->inc_laddr, sizeof(struct in_addr));
+ break;
+#endif
+#ifdef INET6
+ case INC_ISIPV6:
+ MD5Update(&ctx, &inc->inc6_faddr, sizeof(struct in6_addr));
+ MD5Update(&ctx, &inc->inc6_laddr, sizeof(struct in6_addr));
+ break;
+#endif
+ }
+ MD5Update(&ctx, key, len);
+ MD5Final((unsigned char *)hash, &ctx);
+
+ return (hash[0]);
+}
+
+uint32_t
+tcp_new_ts_offset(struct in_conninfo *inc)
+{
+ return (tcp_keyed_hash(inc, V_ts_offset_secret,
+ sizeof(V_ts_offset_secret)));
+}
/*
* Following is where TCP initial sequence number generation occurs.
@@ -2634,19 +2690,20 @@ out:
* as reseeding should not be necessary.
*
* Locking of the global variables isn_secret, isn_last_reseed, isn_offset,
- * isn_offset_old, and isn_ctx is performed using the TCP pcbinfo lock. In
+ * isn_offset_old, and isn_ctx is performed using the ISN lock. In
* general, this means holding an exclusive (write) lock.
*/
#define ISN_BYTES_PER_SECOND 1048576
#define ISN_STATIC_INCREMENT 4096
#define ISN_RANDOM_INCREMENT (4096 - 1)
+#define ISN_SECRET_LENGTH 32
-static VNET_DEFINE(u_char, isn_secret[32]);
-static VNET_DEFINE(int, isn_last);
-static VNET_DEFINE(int, isn_last_reseed);
-static VNET_DEFINE(u_int32_t, isn_offset);
-static VNET_DEFINE(u_int32_t, isn_offset_old);
+VNET_DEFINE_STATIC(u_char, isn_secret[ISN_SECRET_LENGTH]);
+VNET_DEFINE_STATIC(int, isn_last);
+VNET_DEFINE_STATIC(int, isn_last_reseed);
+VNET_DEFINE_STATIC(u_int32_t, isn_offset);
+VNET_DEFINE_STATIC(u_int32_t, isn_offset_old);
#define V_isn_secret VNET(isn_secret)
#define V_isn_last VNET(isn_last)
@@ -2655,45 +2712,23 @@ static VNET_DEFINE(u_int32_t, isn_offset_old);
#define V_isn_offset_old VNET(isn_offset_old)
tcp_seq
-tcp_new_isn(struct tcpcb *tp)
+tcp_new_isn(struct in_conninfo *inc)
{
- MD5_CTX isn_ctx;
- u_int32_t md5_buffer[4];
tcp_seq new_isn;
u_int32_t projected_offset;
- INP_WLOCK_ASSERT(tp->t_inpcb);
-
ISN_LOCK();
/* Seed if this is the first use, reseed if requested. */
if ((V_isn_last_reseed == 0) || ((V_tcp_isn_reseed_interval > 0) &&
(((u_int)V_isn_last_reseed + (u_int)V_tcp_isn_reseed_interval*hz)
< (u_int)ticks))) {
- read_random(&V_isn_secret, sizeof(V_isn_secret));
+ arc4rand(&V_isn_secret, sizeof(V_isn_secret), 0);
V_isn_last_reseed = ticks;
}
/* Compute the md5 hash and return the ISN. */
- MD5Init(&isn_ctx);
- MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_fport, sizeof(u_short));
- MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_lport, sizeof(u_short));
-#ifdef INET6
- if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) {
- MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_faddr,
- sizeof(struct in6_addr));
- MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_laddr,
- sizeof(struct in6_addr));
- } else
-#endif
- {
- MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_faddr,
- sizeof(struct in_addr));
- MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_laddr,
- sizeof(struct in_addr));
- }
- MD5Update(&isn_ctx, (u_char *) &V_isn_secret, sizeof(V_isn_secret));
- MD5Final((u_char *) &md5_buffer, &isn_ctx);
- new_isn = (tcp_seq) md5_buffer[0];
+ new_isn = (tcp_seq)tcp_keyed_hash(inc, V_isn_secret,
+ sizeof(V_isn_secret));
V_isn_offset += ISN_STATIC_INCREMENT +
(arc4random() & ISN_RANDOM_INCREMENT);
if (ticks != V_isn_last) {
@@ -2840,6 +2875,9 @@ tcp_maxmtu6(struct in_conninfo *inc, struct tcp_ifcap *cap)
KASSERT(inc != NULL, ("tcp_maxmtu6 with NULL in_conninfo pointer"));
+ if (inc->inc_flags & INC_IPV6MINMTU)
+ return (IPV6_MMTU);
+
if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) {
in6_splitscope(&inc->inc6_faddr, &dst6, &scopeid);
if (fib6_lookup_nh_ext(inc->inc_fibnum, &dst6, scopeid, 0,
@@ -2928,6 +2966,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS)
struct tcpcb *tp;
struct tcptw *tw;
struct sockaddr_in *fin, *lin;
+ struct epoch_tracker et;
#ifdef INET6
struct sockaddr_in6 *fin6, *lin6;
#endif
@@ -2987,7 +3026,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS)
default:
return (EINVAL);
}
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
switch (addrs[0].ss_family) {
#ifdef INET6
case AF_INET6:
@@ -3026,7 +3065,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS)
INP_WUNLOCK(inp);
} else
error = ESRCH;
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (error);
}
diff --git a/freebsd/sys/netinet/tcp_syncache.c b/freebsd/sys/netinet/tcp_syncache.c
index e163aa54..6fdd859d 100644
--- a/freebsd/sys/netinet/tcp_syncache.c
+++ b/freebsd/sys/netinet/tcp_syncache.c
@@ -71,6 +71,7 @@ __FBSDID("$FreeBSD$");
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_var.h>
@@ -104,19 +105,19 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
-static VNET_DEFINE(int, tcp_syncookies) = 1;
+VNET_DEFINE_STATIC(int, tcp_syncookies) = 1;
#define V_tcp_syncookies VNET(tcp_syncookies)
SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_syncookies), 0,
"Use TCP SYN cookies if the syncache overflows");
-static VNET_DEFINE(int, tcp_syncookiesonly) = 0;
+VNET_DEFINE_STATIC(int, tcp_syncookiesonly) = 0;
#define V_tcp_syncookiesonly VNET(tcp_syncookiesonly)
SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_syncookiesonly), 0,
"Use only TCP SYN cookies");
-static VNET_DEFINE(int, functions_inherit_listen_socket_stack) = 1;
+VNET_DEFINE_STATIC(int, functions_inherit_listen_socket_stack) = 1;
#define V_functions_inherit_listen_socket_stack \
VNET(functions_inherit_listen_socket_stack)
SYSCTL_INT(_net_inet_tcp, OID_AUTO, functions_inherit_listen_socket_stack,
@@ -164,7 +165,7 @@ static int syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
#define TCP_SYNCACHE_HASHSIZE 512
#define TCP_SYNCACHE_BUCKETLIMIT 30
-static VNET_DEFINE(struct tcp_syncache, tcp_syncache);
+VNET_DEFINE_STATIC(struct tcp_syncache, tcp_syncache);
#define V_tcp_syncache VNET(tcp_syncache)
static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, syncache, CTLFLAG_RW, 0,
@@ -185,8 +186,27 @@ SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_VNET | CTLFLAG_R
&VNET_NAME(tcp_syncache.hashsize), 0,
"Size of TCP syncache hashtable");
-SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_VNET | CTLFLAG_RW,
+static int
+sysctl_net_inet_tcp_syncache_rexmtlimit_check(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ u_int new;
+
+ new = V_tcp_syncache.rexmt_limit;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if ((error == 0) && (req->newptr != NULL)) {
+ if (new > TCP_MAXRXTSHIFT)
+ error = EINVAL;
+ else
+ V_tcp_syncache.rexmt_limit = new;
+ }
+ return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit,
+ CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
&VNET_NAME(tcp_syncache.rexmt_limit), 0,
+ sysctl_net_inet_tcp_syncache_rexmtlimit_check, "UI",
"Limit on SYN/ACK retransmissions");
VNET_DEFINE(int, tcp_sc_rst_sock_fail) = 1;
@@ -398,8 +418,14 @@ syncache_drop(struct syncache *sc, struct syncache_head *sch)
static void
syncache_timeout(struct syncache *sc, struct syncache_head *sch, int docallout)
{
- sc->sc_rxttime = ticks +
- TCPTV_RTOBASE * (tcp_syn_backoff[sc->sc_rxmits]);
+ int rexmt;
+
+ if (sc->sc_rxmits == 0)
+ rexmt = TCPTV_RTOBASE;
+ else
+ TCPT_RANGESET(rexmt, TCPTV_RTOBASE * tcp_syn_backoff[sc->sc_rxmits],
+ tcp_rexmit_min, TCPTV_REXMTMAX);
+ sc->sc_rxttime = ticks + rexmt;
sc->sc_rxmits++;
if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc)) {
sch->sch_nextc = sc->sc_rxttime;
@@ -746,10 +772,9 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
goto abort;
}
#ifdef INET6
- if (sc->sc_inc.inc_flags & INC_ISIPV6) {
+ if (inp->inp_vflag & INP_IPV6PROTO) {
struct inpcb *oinp = sotoinpcb(lso);
- struct in6_addr laddr6;
- struct sockaddr_in6 sin6;
+
/*
* Inherit socket options from the listening socket.
* Note that in6p_inputopts are not (and should not be)
@@ -763,6 +788,11 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
if (oinp->in6p_outputopts)
inp->in6p_outputopts =
ip6_copypktopts(oinp->in6p_outputopts, M_NOWAIT);
+ }
+
+ if (sc->sc_inc.inc_flags & INC_ISIPV6) {
+ struct in6_addr laddr6;
+ struct sockaddr_in6 sin6;
sin6.sin6_family = AF_INET6;
sin6.sin6_len = sizeof(sin6);
@@ -1153,25 +1183,6 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
}
}
- /*
- * If timestamps were negotiated, the reflected timestamp
- * must be equal to what we actually sent in the SYN|ACK
- * except in the case of 0. Some boxes are known for sending
- * broken timestamp replies during the 3whs (and potentially
- * during the connection also).
- *
- * Accept the final ACK of 3whs with reflected timestamp of 0
- * instead of sending a RST and deleting the syncache entry.
- */
- if ((to->to_flags & TOF_TS) && to->to_tsecr &&
- to->to_tsecr != sc->sc_ts) {
- if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
- log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, "
- "segment rejected\n",
- s, __func__, to->to_tsecr, sc->sc_ts);
- goto failed;
- }
-
*lsop = syncache_socket(sc, *lsop, m);
if (*lsop == NULL)
@@ -1404,6 +1415,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
*/
mac_syncache_destroy(&maclabel);
#endif
+ TCP_PROBE5(receive, NULL, NULL, m, NULL, th);
/* Retransmit SYN|ACK and reset retransmit count. */
if ((s = tcp_log_addrs(&sc->sc_inc, th, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: Received duplicate SYN, "
@@ -1418,7 +1430,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
TCPSTAT_INC(tcps_sndtotal);
}
SCH_UNLOCK(sch);
- goto done;
+ goto donenoprobe;
}
if (tfo_cookie_valid) {
@@ -1498,8 +1510,8 @@ skip_alloc:
*/
if (to->to_flags & TOF_TS) {
sc->sc_tsreflect = to->to_tsval;
- sc->sc_ts = tcp_ts_getticks();
sc->sc_flags |= SCF_TIMESTAMP;
+ sc->sc_tsoff = tcp_new_ts_offset(inc);
}
if (to->to_flags & TOF_SCALE) {
int wscale = 0;
@@ -1571,6 +1583,7 @@ skip_alloc:
goto tfo_expanded;
}
+ TCP_PROBE5(receive, NULL, NULL, m, NULL, th);
/*
* Do a standard 3-way handshake.
*/
@@ -1586,8 +1599,11 @@ skip_alloc:
syncache_free(sc);
TCPSTAT_INC(tcps_sc_dropped);
}
+ goto donenoprobe;
done:
+ TCP_PROBE5(receive, NULL, NULL, m, NULL, th);
+donenoprobe:
if (m) {
*lsop = NULL;
m_freem(m);
@@ -1727,8 +1743,7 @@ syncache_respond(struct syncache *sc, struct syncache_head *sch, int locked,
to.to_flags |= TOF_SCALE;
}
if (sc->sc_flags & SCF_TIMESTAMP) {
- /* Virgin timestamp or TCP cookie enhanced one. */
- to.to_tsval = sc->sc_ts;
+ to.to_tsval = sc->sc_tsoff + tcp_ts_getticks();
to.to_tsecr = sc->sc_tsreflect;
to.to_flags |= TOF_TS;
}
@@ -1799,6 +1814,7 @@ syncache_respond(struct syncache *sc, struct syncache_head *sch, int locked,
return (error);
}
#endif
+ TCP_PROBE5(send, NULL, NULL, ip6, NULL, th);
error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
}
#endif
@@ -1819,6 +1835,7 @@ syncache_respond(struct syncache *sc, struct syncache_head *sch, int locked,
return (error);
}
#endif
+ TCP_PROBE5(send, NULL, NULL, ip, NULL, th);
error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL);
}
#endif
@@ -2033,12 +2050,6 @@ syncookie_generate(struct syncache_head *sch, struct syncache *sc)
iss = hash & ~0xff;
iss |= cookie.cookie ^ (hash >> 24);
- /* Randomize the timestamp. */
- if (sc->sc_flags & SCF_TIMESTAMP) {
- sc->sc_ts = arc4random();
- sc->sc_tsoff = sc->sc_ts - tcp_ts_getticks();
- }
-
TCPSTAT_INC(tcps_sc_sendcookie);
return (iss);
}
@@ -2125,8 +2136,7 @@ syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch,
if (to->to_flags & TOF_TS) {
sc->sc_flags |= SCF_TIMESTAMP;
sc->sc_tsreflect = to->to_tsval;
- sc->sc_ts = to->to_tsecr;
- sc->sc_tsoff = to->to_tsecr - tcp_ts_getticks();
+ sc->sc_tsoff = tcp_new_ts_offset(inc);
}
if (to->to_flags & TOF_SIGNATURE)
diff --git a/freebsd/sys/netinet/tcp_syncache.h b/freebsd/sys/netinet/tcp_syncache.h
index 92a7c7c9..0104e528 100644
--- a/freebsd/sys/netinet/tcp_syncache.h
+++ b/freebsd/sys/netinet/tcp_syncache.h
@@ -56,7 +56,6 @@ struct syncache {
int sc_rxttime; /* retransmit time */
u_int16_t sc_rxmits; /* retransmit counter */
u_int32_t sc_tsreflect; /* timestamp to reflect */
- u_int32_t sc_ts; /* our timestamp to send */
u_int32_t sc_tsoff; /* ts offset w/ syncookies */
u_int32_t sc_flowlabel; /* IPv6 flowlabel */
tcp_seq sc_irs; /* seq from peer */
diff --git a/freebsd/sys/netinet/tcp_timer.c b/freebsd/sys/netinet/tcp_timer.c
index 422e5122..c50af2bb 100644
--- a/freebsd/sys/netinet/tcp_timer.c
+++ b/freebsd/sys/netinet/tcp_timer.c
@@ -73,6 +73,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_log_buf.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
+#include <netinet/tcp_seq.h>
#include <netinet/cc/cc.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
@@ -141,7 +142,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
/* max idle probes */
int tcp_maxpersistidle;
-static int tcp_rexmit_drop_options = 0;
+int tcp_rexmit_drop_options = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
&tcp_rexmit_drop_options, 0,
"Drop TCP options from 3rd and later retransmitted SYN");
@@ -176,18 +177,13 @@ static int per_cpu_timers = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
&per_cpu_timers , 0, "run tcp timers on all cpus");
-#if 0
-#define INP_CPU(inp) (per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \
- ((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0)
-#endif
-
/*
* Map the given inp to a CPU id.
*
* This queries RSS if it's compiled in, else it defaults to the current
* CPU ID.
*/
-static inline int
+inline int
inp_to_cpuid(struct inpcb *inp)
{
u_int cpuid;
@@ -245,7 +241,7 @@ int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
int tcp_backoff[TCP_MAXRXTSHIFT + 1] =
{ 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
-static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */
+int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */
/*
* TCP timer processing.
@@ -280,55 +276,10 @@ tcp_timer_delack(void *xtp)
CURVNET_RESTORE();
}
-/*
- * When a timer wants to remove a TCB it must
- * hold the INP_INFO_RLOCK(). The timer function
- * should only have grabbed the INP_WLOCK() when
- * it entered. To safely switch to holding both the
- * INP_INFO_RLOCK() and the INP_WLOCK() we must first
- * grab a reference on the inp, which will hold the inp
- * so that it can't be removed. We then unlock the INP_WLOCK(),
- * and grab the INP_INFO_RLOCK() lock. Once we have the INP_INFO_RLOCK()
- * we proceed again to get the INP_WLOCK() (this preserves proper
- * lock order). After acquiring the INP_WLOCK we must check if someone
- * else deleted the pcb i.e. the inp_flags check.
- * If so we return 1 otherwise we return 0.
- *
- * No matter what the tcp_inpinfo_lock_add() function
- * returns the caller must afterwards call tcp_inpinfo_lock_del()
- * to drop the locks and reference properly.
- */
-
-int
-tcp_inpinfo_lock_add(struct inpcb *inp)
-{
- in_pcbref(inp);
- INP_WUNLOCK(inp);
- INP_INFO_RLOCK(&V_tcbinfo);
- INP_WLOCK(inp);
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- return(1);
- }
- return(0);
-
-}
-
void
tcp_inpinfo_lock_del(struct inpcb *inp, struct tcpcb *tp)
{
- INP_INFO_RUNLOCK(&V_tcbinfo);
- if (inp && (tp == NULL)) {
- /*
- * If tcp_close/drop() gets called and tp
- * returns NULL, then the function dropped
- * the inp lock, we hold a reference keeping
- * this around, so we must re-aquire the
- * INP_WLOCK() in order to proceed with
- * our dropping the inp reference.
- */
- INP_WLOCK(inp);
- }
- if (inp && in_pcbrele_wlocked(inp) == 0)
+ if (inp && tp != NULL)
INP_WUNLOCK(inp);
}
@@ -337,6 +288,7 @@ tcp_timer_2msl(void *xtp)
{
struct tcpcb *tp = xtp;
struct inpcb *inp;
+ struct epoch_tracker et;
CURVNET_SET(tp->t_vnet);
#ifdef TCPDEBUG
int ostate;
@@ -383,11 +335,13 @@ tcp_timer_2msl(void *xtp)
tp->t_inpcb && tp->t_inpcb->inp_socket &&
(tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
TCPSTAT_INC(tcps_finwait2_drops);
- if (tcp_inpinfo_lock_add(inp)) {
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
tp = tcp_close(tp);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
tcp_inpinfo_lock_del(inp, tp);
goto out;
} else {
@@ -395,15 +349,17 @@ tcp_timer_2msl(void *xtp)
callout_reset(&tp->t_timers->tt_2msl,
TP_KEEPINTVL(tp), tcp_timer_2msl, tp);
} else {
- if (tcp_inpinfo_lock_add(inp)) {
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
tp = tcp_close(tp);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
- }
+ }
#ifdef TCPDEBUG
if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
@@ -424,6 +380,7 @@ tcp_timer_keep(void *xtp)
struct tcpcb *tp = xtp;
struct tcptemp *t_template;
struct inpcb *inp;
+ struct epoch_tracker et;
CURVNET_SET(tp->t_vnet);
#ifdef TCPDEBUG
int ostate;
@@ -517,11 +474,11 @@ tcp_timer_keep(void *xtp)
dropit:
TCPSTAT_INC(tcps_keepdrops);
-
- if (tcp_inpinfo_lock_add(inp)) {
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
tp = tcp_drop(tp, ETIMEDOUT);
#ifdef TCPDEBUG
@@ -530,8 +487,9 @@ dropit:
PRU_SLOWTIMO);
#endif
TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
tcp_inpinfo_lock_del(inp, tp);
-out:
+ out:
CURVNET_RESTORE();
}
@@ -540,6 +498,7 @@ tcp_timer_persist(void *xtp)
{
struct tcpcb *tp = xtp;
struct inpcb *inp;
+ struct epoch_tracker et;
CURVNET_SET(tp->t_vnet);
#ifdef TCPDEBUG
int ostate;
@@ -579,11 +538,13 @@ tcp_timer_persist(void *xtp)
(ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
TCPSTAT_INC(tcps_persistdrop);
- if (tcp_inpinfo_lock_add(inp)) {
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
tp = tcp_drop(tp, ETIMEDOUT);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
@@ -594,11 +555,13 @@ tcp_timer_persist(void *xtp)
if (tp->t_state > TCPS_CLOSE_WAIT &&
(ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
TCPSTAT_INC(tcps_persistdrop);
- if (tcp_inpinfo_lock_add(inp)) {
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
tp = tcp_drop(tp, ETIMEDOUT);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
@@ -624,6 +587,7 @@ tcp_timer_rexmt(void * xtp)
CURVNET_SET(tp->t_vnet);
int rexmt;
struct inpcb *inp;
+ struct epoch_tracker et;
#ifdef TCPDEBUG
int ostate;
@@ -660,11 +624,13 @@ tcp_timer_rexmt(void * xtp)
if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
tp->t_rxtshift = TCP_MAXRXTSHIFT;
TCPSTAT_INC(tcps_timeoutdrop);
- if (tcp_inpinfo_lock_add(inp)) {
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
tp = tcp_drop(tp, ETIMEDOUT);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
@@ -950,6 +916,111 @@ tcp_timer_active(struct tcpcb *tp, uint32_t timer_type)
return callout_active(t_callout);
}
+/*
+ * Stop the timer from running, and apply a flag
+ * against the timer_flags that will force the
+ * timer never to run. The flag is needed to assure
+ * a race does not leave it running and cause
+ * the timer to possibly restart itself (keep and persist
+ * especially do this).
+ */
+int
+tcp_timer_suspend(struct tcpcb *tp, uint32_t timer_type)
+{
+ struct callout *t_callout;
+ uint32_t t_flags;
+
+ switch (timer_type) {
+ case TT_DELACK:
+ t_flags = TT_DELACK_SUS;
+ t_callout = &tp->t_timers->tt_delack;
+ break;
+ case TT_REXMT:
+ t_flags = TT_REXMT_SUS;
+ t_callout = &tp->t_timers->tt_rexmt;
+ break;
+ case TT_PERSIST:
+ t_flags = TT_PERSIST_SUS;
+ t_callout = &tp->t_timers->tt_persist;
+ break;
+ case TT_KEEP:
+ t_flags = TT_KEEP_SUS;
+ t_callout = &tp->t_timers->tt_keep;
+ break;
+ case TT_2MSL:
+ t_flags = TT_2MSL_SUS;
+ t_callout = &tp->t_timers->tt_2msl;
+ break;
+ default:
+ panic("tp:%p bad timer_type 0x%x", tp, timer_type);
+ }
+ tp->t_timers->tt_flags |= t_flags;
+ return (callout_stop(t_callout));
+}
+
+void
+tcp_timers_unsuspend(struct tcpcb *tp, uint32_t timer_type)
+{
+ switch (timer_type) {
+ case TT_DELACK:
+ if (tp->t_timers->tt_flags & TT_DELACK_SUS) {
+ tp->t_timers->tt_flags &= ~TT_DELACK_SUS;
+ if (tp->t_flags & TF_DELACK) {
+ /* Delayed ack timer should be up activate a timer */
+ tp->t_flags &= ~TF_DELACK;
+ tcp_timer_activate(tp, TT_DELACK,
+ tcp_delacktime);
+ }
+ }
+ break;
+ case TT_REXMT:
+ if (tp->t_timers->tt_flags & TT_REXMT_SUS) {
+ tp->t_timers->tt_flags &= ~TT_REXMT_SUS;
+ if (SEQ_GT(tp->snd_max, tp->snd_una) &&
+ (tcp_timer_active((tp), TT_PERSIST) == 0) &&
+ tp->snd_wnd) {
+ /* We have outstanding data activate a timer */
+ tcp_timer_activate(tp, TT_REXMT,
+ tp->t_rxtcur);
+ }
+ }
+ break;
+ case TT_PERSIST:
+ if (tp->t_timers->tt_flags & TT_PERSIST_SUS) {
+ tp->t_timers->tt_flags &= ~TT_PERSIST_SUS;
+ if (tp->snd_wnd == 0) {
+ /* Activate the persists timer */
+ tp->t_rxtshift = 0;
+ tcp_setpersist(tp);
+ }
+ }
+ break;
+ case TT_KEEP:
+ if (tp->t_timers->tt_flags & TT_KEEP_SUS) {
+ tp->t_timers->tt_flags &= ~TT_KEEP_SUS;
+ tcp_timer_activate(tp, TT_KEEP,
+ TCPS_HAVEESTABLISHED(tp->t_state) ?
+ TP_KEEPIDLE(tp) : TP_KEEPINIT(tp));
+ }
+ break;
+ case TT_2MSL:
+ if (tp->t_timers->tt_flags &= TT_2MSL_SUS) {
+ tp->t_timers->tt_flags &= ~TT_2MSL_SUS;
+ if ((tp->t_state == TCPS_FIN_WAIT_2) &&
+ ((tp->t_inpcb->inp_socket == NULL) ||
+ (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE))) {
+ /* Star the 2MSL timer */
+ tcp_timer_activate(tp, TT_2MSL,
+ (tcp_fast_finwait2_recycle) ?
+ tcp_finwait2_timeout : TP_MAXIDLE(tp));
+ }
+ }
+ break;
+ default:
+ panic("tp:%p bad timer_type 0x%x", tp, timer_type);
+ }
+}
+
void
tcp_timer_stop(struct tcpcb *tp, uint32_t timer_type)
{
diff --git a/freebsd/sys/netinet/tcp_timer.h b/freebsd/sys/netinet/tcp_timer.h
index b0ff3809..a2ab6ca5 100644
--- a/freebsd/sys/netinet/tcp_timer.h
+++ b/freebsd/sys/netinet/tcp_timer.h
@@ -168,11 +168,15 @@ struct tcp_timer {
#define TT_2MSL 0x0010
#define TT_MASK (TT_DELACK|TT_REXMT|TT_PERSIST|TT_KEEP|TT_2MSL)
-#define TT_DELACK_RST 0x0100
-#define TT_REXMT_RST 0x0200
-#define TT_PERSIST_RST 0x0400
-#define TT_KEEP_RST 0x0800
-#define TT_2MSL_RST 0x1000
+/*
+ * Suspend flags - used when suspending a timer
+ * from ever running again.
+ */
+#define TT_DELACK_SUS 0x0100
+#define TT_REXMT_SUS 0x0200
+#define TT_PERSIST_SUS 0x0400
+#define TT_KEEP_SUS 0x0800
+#define TT_2MSL_SUS 0x1000
#define TT_STOPPED 0x00010000
@@ -196,6 +200,8 @@ extern int tcp_msl;
extern int tcp_ttl; /* time to live for TCP segs */
extern int tcp_backoff[];
extern int tcp_syn_backoff[];
+extern int tcp_totbackoff;
+extern int tcp_rexmit_drop_options;
extern int tcp_always_keepalive;
extern int tcp_finwait2_timeout;
@@ -208,7 +214,6 @@ VNET_DECLARE(int, tcp_pmtud_blackhole_mss);
VNET_DECLARE(int, tcp_v6pmtud_blackhole_mss);
#define V_tcp_v6pmtud_blackhole_mss VNET(tcp_v6pmtud_blackhole_mss)
-int tcp_inpinfo_lock_add(struct inpcb *inp);
void tcp_inpinfo_lock_del(struct inpcb *inp, struct tcpcb *tp);
void tcp_timer_init(void);
diff --git a/freebsd/sys/netinet/tcp_timewait.c b/freebsd/sys/netinet/tcp_timewait.c
index afadf7cd..8a28283f 100644
--- a/freebsd/sys/netinet/tcp_timewait.c
+++ b/freebsd/sys/netinet/tcp_timewait.c
@@ -65,6 +65,7 @@ __FBSDID("$FreeBSD$");
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
@@ -98,7 +99,7 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
-static VNET_DEFINE(uma_zone_t, tcptw_zone);
+VNET_DEFINE_STATIC(uma_zone_t, tcptw_zone);
#define V_tcptw_zone VNET(tcptw_zone)
static int maxtcptw;
@@ -113,11 +114,11 @@ static int maxtcptw;
* - a tcptw relies on its inpcb reference counting for memory stability
* - a tcptw is dereferenceable only while its inpcb is locked
*/
-static VNET_DEFINE(TAILQ_HEAD(, tcptw), twq_2msl);
+VNET_DEFINE_STATIC(TAILQ_HEAD(, tcptw), twq_2msl);
#define V_twq_2msl VNET(twq_2msl)
/* Global timewait lock */
-static VNET_DEFINE(struct rwlock, tw_lock);
+VNET_DEFINE_STATIC(struct rwlock, tw_lock);
#define V_tw_lock VNET(tw_lock)
#define TW_LOCK_INIT(tw, d) rw_init_flags(&(tw), (d), 0)
@@ -174,7 +175,7 @@ SYSCTL_PROC(_net_inet_tcp, OID_AUTO, maxtcptw, CTLTYPE_INT|CTLFLAG_RW,
&maxtcptw, 0, sysctl_maxtcptw, "IU",
"Maximum number of compressed TCP TIME_WAIT entries");
-static VNET_DEFINE(int, nolocaltimewait) = 0;
+VNET_DEFINE_STATIC(int, nolocaltimewait) = 0;
#define V_nolocaltimewait VNET(nolocaltimewait)
SYSCTL_INT(_net_inet_tcp, OID_AUTO, nolocaltimewait, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(nolocaltimewait), 0,
@@ -208,11 +209,12 @@ void
tcp_tw_destroy(void)
{
struct tcptw *tw;
+ struct epoch_tracker et;
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
while ((tw = TAILQ_FIRST(&V_twq_2msl)) != NULL)
tcp_twclose(tw, 0);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
TW_LOCK_DESTROY(V_tw_lock);
uma_zdestroy(V_tcptw_zone);
@@ -230,6 +232,7 @@ tcp_twstart(struct tcpcb *tp)
struct tcptw twlocal, *tw;
struct inpcb *inp = tp->t_inpcb;
struct socket *so;
+ uint32_t recwin;
bool acknow, local;
#ifdef INET6
bool isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
@@ -292,10 +295,16 @@ tcp_twstart(struct tcpcb *tp)
/*
* Recover last window size sent.
*/
- if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt))
- tw->last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale;
- else
- tw->last_win = 0;
+ so = inp->inp_socket;
+ recwin = lmin(lmax(sbspace(&so->so_rcv), 0),
+ (long)TCP_MAXWIN << tp->rcv_scale);
+ if (recwin < (so->so_rcv.sb_hiwat / 4) &&
+ recwin < tp->t_maxseg)
+ recwin = 0;
+ if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) &&
+ recwin < (tp->rcv_adv - tp->rcv_nxt))
+ recwin = (tp->rcv_adv - tp->rcv_nxt);
+ tw->last_win = htons((u_short)(recwin >> tp->rcv_scale));
/*
* Set t_recent if timestamps are used on the connection.
@@ -332,7 +341,6 @@ tcp_twstart(struct tcpcb *tp)
* and might not be needed here any longer.
*/
tcp_discardcb(tp);
- so = inp->inp_socket;
soisdisconnected(so);
tw->tw_so_options = so->so_options;
inp->inp_flags |= INP_TIMEWAIT;
@@ -451,9 +459,14 @@ tcp_twcheck(struct inpcb *inp, struct tcpopt *to __unused, struct tcphdr *th,
* Acknowledge the segment if it has data or is not a duplicate ACK.
*/
if (thflags != TH_ACK || tlen != 0 ||
- th->th_seq != tw->rcv_nxt || th->th_ack != tw->snd_nxt)
+ th->th_seq != tw->rcv_nxt || th->th_ack != tw->snd_nxt) {
+ TCP_PROBE5(receive, NULL, NULL, m, NULL, th);
tcp_twrespond(tw, TH_ACK);
+ goto dropnoprobe;
+ }
drop:
+ TCP_PROBE5(receive, NULL, NULL, m, NULL, th);
+dropnoprobe:
INP_WUNLOCK(inp);
m_freem(m);
return (0);
@@ -599,6 +612,7 @@ tcp_twrespond(struct tcptw *tw, int flags)
th->th_sum = in6_cksum_pseudo(ip6,
sizeof(struct tcphdr) + optlen, IPPROTO_TCP, 0);
ip6->ip6_hlim = in6_selecthlim(inp, NULL);
+ TCP_PROBE5(send, NULL, NULL, ip6, NULL, th);
error = ip6_output(m, inp->in6p_outputopts, NULL,
(tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp);
}
@@ -614,6 +628,7 @@ tcp_twrespond(struct tcptw *tw, int flags)
ip->ip_len = htons(m->m_pkthdr.len);
if (V_path_mtu_discovery)
ip->ip_off |= htons(IP_DF);
+ TCP_PROBE5(send, NULL, NULL, ip, NULL, th);
error = ip_output(m, inp->inp_options, NULL,
((tw->tw_so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0),
NULL, inp);
@@ -676,6 +691,7 @@ tcp_tw_2msl_scan(int reuse)
{
struct tcptw *tw;
struct inpcb *inp;
+ struct epoch_tracker et;
#ifdef INVARIANTS
if (reuse) {
@@ -709,54 +725,46 @@ tcp_tw_2msl_scan(int reuse)
in_pcbref(inp);
TW_RUNLOCK(V_tw_lock);
- if (INP_INFO_TRY_RLOCK(&V_tcbinfo)) {
-
- INP_WLOCK(inp);
- tw = intotw(inp);
- if (in_pcbrele_wlocked(inp)) {
- if (__predict_true(tw == NULL)) {
- INP_INFO_RUNLOCK(&V_tcbinfo);
- continue;
- } else {
- /* This should not happen as in TIMEWAIT
- * state the inp should not be destroyed
- * before its tcptw. If INVARIANTS is
- * defined panic.
- */
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ INP_WLOCK(inp);
+ tw = intotw(inp);
+ if (in_pcbrele_wlocked(inp)) {
+ if (__predict_true(tw == NULL)) {
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ continue;
+ } else {
+ /* This should not happen as in TIMEWAIT
+ * state the inp should not be destroyed
+ * before its tcptw. If INVARIANTS is
+ * defined panic.
+ */
#ifdef INVARIANTS
- panic("%s: Panic before an infinite "
- "loop: INP_TIMEWAIT && (INP_FREED "
- "|| inp last reference) && tw != "
- "NULL", __func__);
+ panic("%s: Panic before an infinite "
+ "loop: INP_TIMEWAIT && (INP_FREED "
+ "|| inp last reference) && tw != "
+ "NULL", __func__);
#else
- log(LOG_ERR, "%s: Avoid an infinite "
- "loop: INP_TIMEWAIT && (INP_FREED "
- "|| inp last reference) && tw != "
- "NULL", __func__);
+ log(LOG_ERR, "%s: Avoid an infinite "
+ "loop: INP_TIMEWAIT && (INP_FREED "
+ "|| inp last reference) && tw != "
+ "NULL", __func__);
#endif
- INP_INFO_RUNLOCK(&V_tcbinfo);
- break;
- }
- }
-
- if (tw == NULL) {
- /* tcp_twclose() has already been called */
- INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
- continue;
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ break;
}
+ }
- tcp_twclose(tw, reuse);
- INP_INFO_RUNLOCK(&V_tcbinfo);
- if (reuse)
- return tw;
- } else {
- /* INP_INFO lock is busy, continue later. */
- INP_WLOCK(inp);
- if (!in_pcbrele_wlocked(inp))
- INP_WUNLOCK(inp);
- break;
+ if (tw == NULL) {
+ /* tcp_twclose() has already been called */
+ INP_WUNLOCK(inp);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ continue;
}
+
+ tcp_twclose(tw, reuse);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ if (reuse)
+ return tw;
}
return NULL;
diff --git a/freebsd/sys/netinet/tcp_usrreq.c b/freebsd/sys/netinet/tcp_usrreq.c
index bf2cff4c..617f60d0 100644
--- a/freebsd/sys/netinet/tcp_usrreq.c
+++ b/freebsd/sys/netinet/tcp_usrreq.c
@@ -278,11 +278,12 @@ tcp_usr_detach(struct socket *so)
{
struct inpcb *inp;
int rlock = 0;
+ struct epoch_tracker et;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL"));
if (!INP_INFO_WLOCKED(&V_tcbinfo)) {
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
rlock = 1;
}
INP_WLOCK(inp);
@@ -290,7 +291,7 @@ tcp_usr_detach(struct socket *so)
("tcp_usr_detach: inp_socket == NULL"));
tcp_detach(so, inp);
if (rlock)
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
}
#ifdef INET
@@ -379,6 +380,11 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
struct sockaddr_in sin;
in6_sin6_2_sin(&sin, sin6p);
+ if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
+ error = EAFNOSUPPORT;
+ INP_HASH_WUNLOCK(&V_tcbinfo);
+ goto out;
+ }
inp->inp_vflag |= INP_IPV4;
inp->inp_vflag &= ~INP_IPV6;
error = in_pcbbind(inp, (struct sockaddr *)&sin,
@@ -608,6 +614,10 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
}
in6_sin6_2_sin(&sin, sin6p);
+ if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
+ error = EAFNOSUPPORT;
+ goto out;
+ }
inp->inp_vflag |= INP_IPV4;
inp->inp_vflag &= ~INP_IPV6;
if ((error = prison_remote_ip4(td->td_ucred,
@@ -670,10 +680,11 @@ tcp_usr_disconnect(struct socket *so)
{
struct inpcb *inp;
struct tcpcb *tp = NULL;
+ struct epoch_tracker et;
int error = 0;
TCPDEBUG0;
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
INP_WLOCK(inp);
@@ -690,7 +701,7 @@ out:
TCPDEBUG2(PRU_DISCONNECT);
TCP_PROBE2(debug__user, tp, PRU_DISCONNECT);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (error);
}
@@ -749,6 +760,7 @@ tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
struct tcpcb *tp = NULL;
struct in_addr addr;
struct in6_addr addr6;
+ struct epoch_tracker et;
in_port_t port = 0;
int v4 = 0;
TCPDEBUG0;
@@ -758,7 +770,7 @@ tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp);
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
error = ECONNABORTED;
@@ -785,7 +797,7 @@ out:
TCPDEBUG2(PRU_ACCEPT);
TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
if (error == 0) {
if (v4)
*nam = in6_v4mapsin6_sockaddr(port, &addr);
@@ -805,9 +817,10 @@ tcp_usr_shutdown(struct socket *so)
int error = 0;
struct inpcb *inp;
struct tcpcb *tp = NULL;
+ struct epoch_tracker et;
TCPDEBUG0;
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("inp == NULL"));
INP_WLOCK(inp);
@@ -826,7 +839,7 @@ out:
TCPDEBUG2(PRU_SHUTDOWN);
TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (error);
}
@@ -889,6 +902,13 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
int error = 0;
struct inpcb *inp;
struct tcpcb *tp = NULL;
+ struct epoch_tracker net_et;
+#ifdef INET
+#ifdef INET6
+ struct sockaddr_in sin;
+#endif
+ struct sockaddr_in *sinp;
+#endif
#ifdef INET6
int isipv6;
#endif
@@ -899,7 +919,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
* this call.
*/
if (flags & PRUS_EOF)
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, net_et);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
INP_WLOCK(inp);
@@ -915,11 +935,124 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
error = ECONNRESET;
goto out;
}
-#ifdef INET6
- isipv6 = nam && nam->sa_family == AF_INET6;
-#endif /* INET6 */
tp = intotcpcb(inp);
TCPDEBUG1();
+ if (nam != NULL && tp->t_state < TCPS_SYN_SENT) {
+ switch (nam->sa_family) {
+#ifdef INET
+ case AF_INET:
+ sinp = (struct sockaddr_in *)nam;
+ if (sinp->sin_len != sizeof(struct sockaddr_in)) {
+ if (m)
+ m_freem(m);
+ error = EINVAL;
+ goto out;
+ }
+ if ((inp->inp_vflag & INP_IPV6) != 0) {
+ if (m)
+ m_freem(m);
+ error = EAFNOSUPPORT;
+ goto out;
+ }
+ if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
+ if (m)
+ m_freem(m);
+ error = EAFNOSUPPORT;
+ goto out;
+ }
+ if ((error = prison_remote_ip4(td->td_ucred,
+ &sinp->sin_addr))) {
+ if (m)
+ m_freem(m);
+ goto out;
+ }
+#ifdef INET6
+ isipv6 = 0;
+#endif
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ {
+ struct sockaddr_in6 *sin6p;
+
+ sin6p = (struct sockaddr_in6 *)nam;
+ if (sin6p->sin6_len != sizeof(struct sockaddr_in6)) {
+ if (m)
+ m_freem(m);
+ error = EINVAL;
+ goto out;
+ }
+ if (IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
+ if (m)
+ m_freem(m);
+ error = EAFNOSUPPORT;
+ goto out;
+ }
+ if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
+#ifdef INET
+ if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
+ error = EINVAL;
+ if (m)
+ m_freem(m);
+ goto out;
+ }
+ if ((inp->inp_vflag & INP_IPV4) == 0) {
+ error = EAFNOSUPPORT;
+ if (m)
+ m_freem(m);
+ goto out;
+ }
+ inp->inp_vflag &= ~INP_IPV6;
+ sinp = &sin;
+ in6_sin6_2_sin(sinp, sin6p);
+ if (IN_MULTICAST(
+ ntohl(sinp->sin_addr.s_addr))) {
+ error = EAFNOSUPPORT;
+ if (m)
+ m_freem(m);
+ goto out;
+ }
+ if ((error = prison_remote_ip4(td->td_ucred,
+ &sinp->sin_addr))) {
+ if (m)
+ m_freem(m);
+ goto out;
+ }
+ isipv6 = 0;
+#else /* !INET */
+ error = EAFNOSUPPORT;
+ if (m)
+ m_freem(m);
+ goto out;
+#endif /* INET */
+ } else {
+ if ((inp->inp_vflag & INP_IPV6) == 0) {
+ if (m)
+ m_freem(m);
+ error = EAFNOSUPPORT;
+ goto out;
+ }
+ inp->inp_vflag &= ~INP_IPV4;
+ inp->inp_inc.inc_flags |= INC_ISIPV6;
+ if ((error = prison_remote_ip6(td->td_ucred,
+ &sin6p->sin6_addr))) {
+ if (m)
+ m_freem(m);
+ goto out;
+ }
+ isipv6 = 1;
+ }
+ break;
+ }
+#endif /* INET6 */
+ default:
+ if (m)
+ m_freem(m);
+ error = EAFNOSUPPORT;
+ goto out;
+ }
+ }
if (control) {
/* TCP doesn't do control messages (rights, creds, etc) */
if (control->m_len) {
@@ -947,7 +1080,8 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
else
#endif
#ifdef INET
- error = tcp_connect(tp, nam, td);
+ error = tcp_connect(tp,
+ (struct sockaddr *)sinp, td);
#endif
if (error)
goto out;
@@ -1016,7 +1150,8 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
else
#endif
#ifdef INET
- error = tcp_connect(tp, nam, td);
+ error = tcp_connect(tp,
+ (struct sockaddr *)sinp, td);
#endif
if (error)
goto out;
@@ -1042,7 +1177,7 @@ out:
((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
INP_WUNLOCK(inp);
if (flags & PRUS_EOF)
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, net_et);
return (error);
}
@@ -1081,12 +1216,13 @@ tcp_usr_abort(struct socket *so)
{
struct inpcb *inp;
struct tcpcb *tp = NULL;
+ struct epoch_tracker et;
TCPDEBUG0;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp);
KASSERT(inp->inp_socket != NULL,
("tcp_usr_abort: inp_socket == NULL"));
@@ -1112,7 +1248,7 @@ tcp_usr_abort(struct socket *so)
}
INP_WUNLOCK(inp);
dropped:
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
}
/*
@@ -1123,12 +1259,13 @@ tcp_usr_close(struct socket *so)
{
struct inpcb *inp;
struct tcpcb *tp = NULL;
+ struct epoch_tracker et;
TCPDEBUG0;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp);
KASSERT(inp->inp_socket != NULL,
("tcp_usr_close: inp_socket == NULL"));
@@ -1152,7 +1289,7 @@ tcp_usr_close(struct socket *so)
inp->inp_flags |= INP_SOCKREF;
}
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
}
/*
@@ -1304,7 +1441,9 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
soisconnecting(so);
TCPSTAT_INC(tcps_connattempt);
tcp_state_change(tp, TCPS_SYN_SENT);
- tp->iss = tcp_new_isn(tp);
+ tp->iss = tcp_new_isn(&inp->inp_inc);
+ if (tp->t_flags & TF_REQ_TSTMP)
+ tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
tcp_sendseqinit(tp);
return 0;
@@ -1343,7 +1482,9 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
soisconnecting(inp->inp_socket);
TCPSTAT_INC(tcps_connattempt);
tcp_state_change(tp, TCPS_SYN_SENT);
- tp->iss = tcp_new_isn(tp);
+ tp->iss = tcp_new_isn(&inp->inp_inc);
+ if (tp->t_flags & TF_REQ_TSTMP)
+ tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
tcp_sendseqinit(tp);
return 0;
@@ -1445,6 +1586,42 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
if (inp->inp_vflag & INP_IPV6PROTO) {
INP_WUNLOCK(inp);
error = ip6_ctloutput(so, sopt);
+ /*
+ * In case of the IPV6_USE_MIN_MTU socket option,
+ * the INC_IPV6MINMTU flag to announce a corresponding
+ * MSS during the initial handshake.
+ * If the TCP connection is not in the front states,
+ * just reduce the MSS being used.
+ * This avoids the sending of TCP segments which will
+ * be fragmented at the IPv6 layer.
+ */
+ if ((error == 0) &&
+ (sopt->sopt_dir == SOPT_SET) &&
+ (sopt->sopt_level == IPPROTO_IPV6) &&
+ (sopt->sopt_name == IPV6_USE_MIN_MTU)) {
+ INP_WLOCK(inp);
+ if ((inp->inp_flags &
+ (INP_TIMEWAIT | INP_DROPPED))) {
+ INP_WUNLOCK(inp);
+ return (ECONNRESET);
+ }
+ inp->inp_inc.inc_flags |= INC_IPV6MINMTU;
+ tp = intotcpcb(inp);
+ if ((tp->t_state >= TCPS_SYN_SENT) &&
+ (inp->inp_inc.inc_flags & INC_ISIPV6)) {
+ struct ip6_pktopts *opt;
+
+ opt = inp->in6p_outputopts;
+ if ((opt != NULL) &&
+ (opt->ip6po_minmtu ==
+ IP6PO_MINMTU_ALL)) {
+ if (tp->t_maxseg > TCP6_MSS) {
+ tp->t_maxseg = TCP6_MSS;
+ }
+ }
+ }
+ INP_WUNLOCK(inp);
+ }
}
#endif /* INET6 */
#if defined(INET6) && defined(INET)
@@ -1487,7 +1664,6 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
return (0);
}
if (tp->t_state != TCPS_CLOSED) {
- int error=EINVAL;
/*
* The user has advanced the state
* past the initial point, we may not
@@ -1500,7 +1676,8 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
* still be possible?
*/
error = (*blk->tfb_tcp_handoff_ok)(tp);
- }
+ } else
+ error = EINVAL;
if (error) {
refcount_release(&blk->tfb_refcnt);
INP_WUNLOCK(inp);
@@ -1724,6 +1901,7 @@ unlock_and_done:
*/
if (CC_ALGO(tp)->cb_destroy != NULL)
CC_ALGO(tp)->cb_destroy(tp->ccv);
+ CC_DATA(tp) = NULL;
CC_ALGO(tp) = algo;
/*
* If something goes pear shaped initialising the new
@@ -2045,6 +2223,7 @@ tcp_attach(struct socket *so)
{
struct tcpcb *tp;
struct inpcb *inp;
+ struct epoch_tracker et;
int error;
if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
@@ -2054,10 +2233,10 @@ tcp_attach(struct socket *so)
}
so->so_rcv.sb_flags |= SB_AUTOSIZE;
so->so_snd.sb_flags |= SB_AUTOSIZE;
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
error = in_pcballoc(so, &V_tcbinfo);
if (error) {
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (error);
}
inp = sotoinpcb(so);
@@ -2075,12 +2254,12 @@ tcp_attach(struct socket *so)
if (tp == NULL) {
in_pcbdetach(inp);
in_pcbfree(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (ENOBUFS);
}
tp->t_state = TCPS_CLOSED;
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
TCPSTATES_INC(TCPS_CLOSED);
return (0);
}
@@ -2106,7 +2285,8 @@ tcp_disconnect(struct tcpcb *tp)
* Neither tcp_close() nor tcp_drop() should return NULL, as the
* socket is still open.
*/
- if (tp->t_state < TCPS_ESTABLISHED) {
+ if (tp->t_state < TCPS_ESTABLISHED &&
+ !(tp->t_state > TCPS_LISTEN && IS_FASTOPEN(tp->t_flags))) {
tp = tcp_close(tp);
KASSERT(tp != NULL,
("tcp_disconnect: tcp_close() returned NULL"));
@@ -2383,7 +2563,7 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
db_print_indent(indent);
db_printf("t_segq first: %p t_segqlen: %d t_dupacks: %d\n",
- LIST_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
+ TAILQ_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
db_print_indent(indent);
db_printf("tt_rexmt: %p tt_persist: %p tt_keep: %p\n",
diff --git a/freebsd/sys/netinet/tcp_var.h b/freebsd/sys/netinet/tcp_var.h
index adaaff61..2fbe07ad 100644
--- a/freebsd/sys/netinet/tcp_var.h
+++ b/freebsd/sys/netinet/tcp_var.h
@@ -46,12 +46,15 @@
#if defined(_KERNEL) || defined(_WANT_TCPCB)
/* TCP segment queue entry */
struct tseg_qent {
- LIST_ENTRY(tseg_qent) tqe_q;
+ TAILQ_ENTRY(tseg_qent) tqe_q;
+ struct mbuf *tqe_m; /* mbuf contains packet */
+ struct mbuf *tqe_last; /* last mbuf in chain */
+ tcp_seq tqe_start; /* TCP Sequence number start */
int tqe_len; /* TCP segment data length */
- struct tcphdr *tqe_th; /* a pointer to tcp header */
- struct mbuf *tqe_m; /* mbuf contains packet */
+ uint32_t tqe_flags; /* The flags from the th->th_flags */
+ uint32_t tqe_mbuf_cnt; /* Count of mbuf overhead */
};
-LIST_HEAD(tsegqe_head, tseg_qent);
+TAILQ_HEAD(tsegqe_head, tseg_qent);
struct sackblk {
tcp_seq start; /* start seq no. of sack block */
@@ -79,6 +82,8 @@ struct sackhint {
uint64_t _pad[1]; /* TBD */
};
+#define SEGQ_EMPTY(tp) TAILQ_EMPTY(&(tp)->t_segq)
+
STAILQ_HEAD(tcp_log_stailq, tcp_log_mem);
/*
@@ -93,8 +98,11 @@ struct tcpcb {
void *t_fb_ptr; /* Pointer to t_fb specific data */
uint32_t t_maxseg:24, /* maximum segment size */
t_logstate:8; /* State of "black box" logging */
- uint32_t t_state:4, /* state of this connection */
- bits_spare : 24;
+ uint32_t t_port:16, /* Tunneling (over udp) port */
+ t_state:4, /* state of this connection */
+ t_idle_reduce : 1,
+ t_delayed_ack: 7, /* Delayed ack variable */
+ bits_spare : 4;
u_int t_flags;
tcp_seq snd_una; /* sent but unacknowledged */
tcp_seq snd_max; /* highest sequence number sent;
@@ -104,7 +112,7 @@ struct tcpcb {
tcp_seq snd_up; /* send urgent pointer */
uint32_t snd_wnd; /* send window */
uint32_t snd_cwnd; /* congestion-controlled window */
- uint32_t cl1_spare; /* Spare to round out CL 1 */
+ uint32_t t_peakrate_thr; /* pre-calculated peak rate threshold */
/* Cache line 2 */
u_int32_t ts_offset; /* our timestamp offset */
u_int32_t rfbuf_ts; /* recv buffer autoscaling timestamp */
@@ -128,6 +136,7 @@ struct tcpcb {
/* Cache line 3 */
tcp_seq rcv_up; /* receive urgent pointer */
int t_segqlen; /* segment reassembly queue length */
+ uint32_t t_segqmbuflen; /* Count of bytes mbufs on all entries */
struct tsegqe_head t_segq; /* segment reassembly queue */
struct mbuf *t_in_pkt;
struct mbuf *t_tail_pkt;
@@ -189,6 +198,7 @@ struct tcpcb {
struct cc_var *ccv; /* congestion control specific vars */
struct osd *osd; /* storage for Khelp module data */
int t_bytes_acked; /* # bytes acked during current RTT */
+ u_int t_maxunacktime;
u_int t_keepinit; /* time to establish connection */
u_int t_keepidle; /* time before keepalive probes begin */
u_int t_keepintvl; /* interval between keepalives */
@@ -260,12 +270,11 @@ struct tcp_function_block {
int (*tfb_tcp_output_wtime)(struct tcpcb *, const struct timeval *);
void (*tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *,
struct socket *, struct tcpcb *,
- int, int, uint8_t,
- int);
+ int, int, uint8_t);
void (*tfb_tcp_hpts_do_segment)(struct mbuf *, struct tcphdr *,
struct socket *, struct tcpcb *,
int, int, uint8_t,
- int, int, struct timeval *);
+ int, struct timeval *);
int (*tfb_tcp_ctloutput)(struct socket *so, struct sockopt *sopt,
struct inpcb *inp, struct tcpcb *tp);
/* Optional memory allocation/free routine */
@@ -361,6 +370,7 @@ TAILQ_HEAD(tcp_funchead, tcp_function);
#define TF2_PLPMTU_PMTUD 0x00000002 /* Allowed to attempt PLPMTUD. */
#define TF2_PLPMTU_MAXSEGSNT 0x00000004 /* Last seg sent was full seg. */
#define TF2_LOG_AUTO 0x00000008 /* Session is auto-logging. */
+#define TF2_DROP_AF_DATA 0x00000010 /* Drop after all data ack'd */
/*
* Structure to hold TCP options that are only used during segment
@@ -649,6 +659,11 @@ struct tcp_hhook_data {
int tso;
tcp_seq curack;
};
+#ifdef TCP_HHOOK
+void hhook_run_tcp_est_out(struct tcpcb *tp,
+ struct tcphdr *th, struct tcpopt *to,
+ uint32_t len, int tso);
+#endif
#endif
/*
@@ -668,7 +683,7 @@ struct tcp_hhook_data {
*/
#if defined(_NETINET_IN_PCB_H_) && defined(_SYS_SOCKETVAR_H_)
struct xtcpcb {
- size_t xt_len; /* length of this structure */
+ ksize_t xt_len; /* length of this structure */
struct xinpcb xt_inp;
char xt_stack[TCP_FUNCTION_NAME_LEN_MAX]; /* (s) */
char xt_logid[TCP_LOG_ID_LEN]; /* (s) */
@@ -801,6 +816,9 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
#define V_tcp_sack_maxholes VNET(tcp_sack_maxholes)
#define V_tcp_sc_rst_sock_fail VNET(tcp_sc_rst_sock_fail)
#define V_tcp_sendspace VNET(tcp_sendspace)
+#define V_tcp_udp_tunneling_overhead VNET(tcp_udp_tunneling_overhead)
+#define V_tcp_udp_tunneling_port VNET(tcp_udp_tunneling_port)
+
#ifdef TCP_HHOOK
VNET_DECLARE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST + 1]);
@@ -825,7 +843,7 @@ char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, void *,
const void *);
char *tcp_log_vain(struct in_conninfo *, struct tcphdr *, void *,
const void *);
-int tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *);
+int tcp_reass(struct tcpcb *, struct tcphdr *, tcp_seq *, int *, struct mbuf *);
void tcp_reass_global_init(void);
void tcp_reass_flush(struct tcpcb *);
void tcp_dooptions(struct tcpopt *, u_char *, int, int);
@@ -849,8 +867,7 @@ int tcp_input(struct mbuf **, int *, int);
int tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *,
struct tcpcb *, int);
void tcp_do_segment(struct mbuf *, struct tcphdr *,
- struct socket *, struct tcpcb *, int, int, uint8_t,
- int);
+ struct socket *, struct tcpcb *, int, int, uint8_t);
int register_tcp_functions(struct tcp_function_block *blk, int wait);
int register_tcp_functions_as_names(struct tcp_function_block *blk,
@@ -893,9 +910,12 @@ struct tcptemp *
tcpip_maketemplate(struct inpcb *);
void tcpip_fillheaders(struct inpcb *, void *, void *);
void tcp_timer_activate(struct tcpcb *, uint32_t, u_int);
+int tcp_timer_suspend(struct tcpcb *, uint32_t);
+void tcp_timers_unsuspend(struct tcpcb *, uint32_t);
int tcp_timer_active(struct tcpcb *, uint32_t);
void tcp_timer_stop(struct tcpcb *, uint32_t);
void tcp_trace(short, short, struct tcpcb *, void *, struct tcphdr *, int);
+int inp_to_cpuid(struct inpcb *inp);
/*
* All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo)
*/
@@ -909,7 +929,9 @@ void tcp_hc_updatemtu(struct in_conninfo *, uint32_t);
void tcp_hc_update(struct in_conninfo *, struct hc_metrics_lite *);
extern struct pr_usrreqs tcp_usrreqs;
-tcp_seq tcp_new_isn(struct tcpcb *);
+
+uint32_t tcp_new_ts_offset(struct in_conninfo *);
+tcp_seq tcp_new_isn(struct in_conninfo *);
int tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
@@ -921,6 +943,10 @@ void tcp_free_sackholes(struct tcpcb *tp);
int tcp_newreno(struct tcpcb *, struct tcphdr *);
int tcp_compute_pipe(struct tcpcb *);
void tcp_sndbuf_autoscale(struct tcpcb *, struct socket *, uint32_t);
+struct mbuf *
+ tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
+ int32_t seglimit, int32_t segsize, struct sockbuf *sb);
+
static inline void
tcp_fields_to_host(struct tcphdr *th)
diff --git a/freebsd/sys/netinet/udp_usrreq.c b/freebsd/sys/netinet/udp_usrreq.c
index 178a8d5e..9557c154 100644
--- a/freebsd/sys/netinet/udp_usrreq.c
+++ b/freebsd/sys/netinet/udp_usrreq.c
@@ -150,7 +150,7 @@ VNET_DEFINE(struct inpcbhead, udb); /* from udp_var.h */
VNET_DEFINE(struct inpcbinfo, udbinfo);
VNET_DEFINE(struct inpcbhead, ulitecb);
VNET_DEFINE(struct inpcbinfo, ulitecbinfo);
-static VNET_DEFINE(uma_zone_t, udpcb_zone);
+VNET_DEFINE_STATIC(uma_zone_t, udpcb_zone);
#define V_udpcb_zone VNET(udpcb_zone)
#ifndef UDBHASHSIZE
@@ -405,6 +405,7 @@ udp_input(struct mbuf **mp, int *offp, int proto)
struct sockaddr_in udp_in[2];
struct mbuf *m;
struct m_tag *fwd_tag;
+ struct epoch_tracker et;
int cscov_partial, iphlen;
m = *mp;
@@ -535,10 +536,10 @@ udp_input(struct mbuf **mp, int *offp, int proto)
struct inpcbhead *pcblist;
struct ip_moptions *imo;
- INP_INFO_RLOCK(pcbinfo);
+ INP_INFO_RLOCK_ET(pcbinfo, et);
pcblist = udp_get_pcblist(proto);
last = NULL;
- LIST_FOREACH(inp, pcblist, inp_list) {
+ CK_LIST_FOREACH(inp, pcblist, inp_list) {
if (inp->inp_lport != uh->uh_dport)
continue;
#ifdef INET6
@@ -599,8 +600,12 @@ udp_input(struct mbuf **mp, int *offp, int proto)
if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) !=
NULL) {
- UDP_PROBE(receive, NULL, last, ip,
- last, uh);
+ if (proto == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(receive, NULL, last, ip,
+ last, uh);
+ else
+ UDP_PROBE(receive, NULL, last, ip, last,
+ uh);
if (udp_append(last, ip, n, iphlen,
udp_in)) {
goto inp_lost;
@@ -618,7 +623,7 @@ udp_input(struct mbuf **mp, int *offp, int proto)
* will never clear these options after setting them.
*/
if ((last->inp_socket->so_options &
- (SO_REUSEPORT|SO_REUSEADDR)) == 0)
+ (SO_REUSEPORT|SO_REUSEPORT_LB|SO_REUSEADDR)) == 0)
break;
}
@@ -631,14 +636,17 @@ udp_input(struct mbuf **mp, int *offp, int proto)
UDPSTAT_INC(udps_noportbcast);
if (inp)
INP_RUNLOCK(inp);
- INP_INFO_RUNLOCK(pcbinfo);
+ INP_INFO_RUNLOCK_ET(pcbinfo, et);
goto badunlocked;
}
- UDP_PROBE(receive, NULL, last, ip, last, uh);
+ if (proto == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(receive, NULL, last, ip, last, uh);
+ else
+ UDP_PROBE(receive, NULL, last, ip, last, uh);
if (udp_append(last, ip, m, iphlen, udp_in) == 0)
INP_RUNLOCK(last);
inp_lost:
- INP_INFO_RUNLOCK(pcbinfo);
+ INP_INFO_RUNLOCK_ET(pcbinfo, et);
return (IPPROTO_DONE);
}
@@ -690,6 +698,10 @@ udp_input(struct mbuf **mp, int *offp, int proto)
inet_ntoa_r(ip->ip_dst, dst), ntohs(uh->uh_dport),
inet_ntoa_r(ip->ip_src, src), ntohs(uh->uh_sport));
}
+ if (proto == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(receive, NULL, NULL, ip, NULL, uh);
+ else
+ UDP_PROBE(receive, NULL, NULL, ip, NULL, uh);
UDPSTAT_INC(udps_noport);
if (m->m_flags & (M_BCAST | M_MCAST)) {
UDPSTAT_INC(udps_noportbcast);
@@ -709,6 +721,10 @@ udp_input(struct mbuf **mp, int *offp, int proto)
*/
INP_RLOCK_ASSERT(inp);
if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) {
+ if (proto == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(receive, NULL, inp, ip, inp, uh);
+ else
+ UDP_PROBE(receive, NULL, inp, ip, inp, uh);
INP_RUNLOCK(inp);
m_freem(m);
return (IPPROTO_DONE);
@@ -724,7 +740,10 @@ udp_input(struct mbuf **mp, int *offp, int proto)
}
}
- UDP_PROBE(receive, NULL, inp, ip, inp, uh);
+ if (proto == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(receive, NULL, inp, ip, inp, uh);
+ else
+ UDP_PROBE(receive, NULL, inp, ip, inp, uh);
if (udp_append(inp, ip, m, iphlen, udp_in) == 0)
INP_RUNLOCK(inp);
return (IPPROTO_DONE);
@@ -808,14 +827,15 @@ udp_common_ctlinput(int cmd, struct sockaddr *sa, void *vip,
INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
if (inp != NULL) {
struct udpcb *up;
+ void *ctx;
+ udp_tun_icmp_t func;
up = intoudpcb(inp);
- if (up->u_icmp_func != NULL) {
- INP_RUNLOCK(inp);
- (*up->u_icmp_func)(cmd, sa, vip, up->u_tun_ctx);
- } else {
- INP_RUNLOCK(inp);
- }
+ ctx = up->u_tun_ctx;
+ func = up->u_icmp_func;
+ INP_RUNLOCK(inp);
+ if (func != NULL)
+ (*func)(cmd, sa, vip, ctx);
}
}
} else
@@ -842,9 +862,9 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
{
int error, i, n;
struct inpcb *inp, **inp_list;
- struct in_pcblist *il;
inp_gen_t gencnt;
struct xinpgen xig;
+ struct epoch_tracker et;
/*
* The process of preparing the PCB list is too time-consuming and
@@ -863,10 +883,10 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
/*
* OK, now we're committed to doing something.
*/
- INP_INFO_RLOCK(&V_udbinfo);
+ INP_INFO_RLOCK_ET(&V_udbinfo, et);
gencnt = V_udbinfo.ipi_gencnt;
n = V_udbinfo.ipi_count;
- INP_INFO_RUNLOCK(&V_udbinfo);
+ INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
+ n * sizeof(struct xinpcb));
@@ -880,12 +900,14 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error)
return (error);
- il = malloc(sizeof(struct in_pcblist) + n * sizeof(struct inpcb *), M_TEMP, M_WAITOK|M_ZERO_INVARIANTS);
- inp_list = il->il_inp_list;
- INP_INFO_RLOCK(&V_udbinfo);
- for (inp = LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n;
- inp = LIST_NEXT(inp, inp_list)) {
+ inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
+ if (inp_list == NULL)
+ return (ENOMEM);
+
+ INP_INFO_RLOCK_ET(&V_udbinfo, et);
+ for (inp = CK_LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n;
+ inp = CK_LIST_NEXT(inp, inp_list)) {
INP_WLOCK(inp);
if (inp->inp_gencnt <= gencnt &&
cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
@@ -894,7 +916,7 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
}
INP_WUNLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_udbinfo);
+ INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
n = i;
error = 0;
@@ -910,9 +932,14 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
} else
INP_RUNLOCK(inp);
}
- il->il_count = n;
- il->il_pcbinfo = &V_udbinfo;
- epoch_call(net_epoch_preempt, &il->il_epoch_ctx, in_pcblist_rele_rlocked);
+ INP_INFO_WLOCK(&V_udbinfo);
+ for (i = 0; i < n; i++) {
+ inp = inp_list[i];
+ INP_RLOCK(inp);
+ if (!in_pcbrele_rlocked(inp))
+ INP_RUNLOCK(inp);
+ }
+ INP_INFO_WUNLOCK(&V_udbinfo);
if (!error) {
/*
@@ -921,13 +948,14 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
* that something happened while we were processing this
* request, and it might be necessary to retry.
*/
- INP_INFO_RLOCK(&V_udbinfo);
+ INP_INFO_RLOCK_ET(&V_udbinfo, et);
xig.xig_gen = V_udbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_udbinfo.ipi_count;
- INP_INFO_RUNLOCK(&V_udbinfo);
+ INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
+ free(inp_list, M_TEMP);
return (error);
}
@@ -1106,6 +1134,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
struct cmsghdr *cm;
struct inpcbinfo *pcbinfo;
struct sockaddr_in *sin, src;
+ struct epoch_tracker et;
int cscov_partial = 0;
int error = 0;
int ipflags;
@@ -1262,7 +1291,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
(inp->inp_laddr.s_addr == INADDR_ANY) ||
(inp->inp_lport == 0))) ||
(src.sin_family == AF_INET)) {
- INP_HASH_RLOCK(pcbinfo);
+ INP_HASH_RLOCK_ET(pcbinfo, et);
unlock_udbinfo = UH_RLOCKED;
} else
unlock_udbinfo = UH_UNLOCKED;
@@ -1390,6 +1419,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
*/
ui = mtod(m, struct udpiphdr *);
bzero(ui->ui_x1, sizeof(ui->ui_x1)); /* XXX still needed? */
+ ui->ui_v = IPVERSION << 4;
ui->ui_pr = pr;
ui->ui_src = laddr;
ui->ui_dst = faddr;
@@ -1412,8 +1442,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
* the entire UDPLite packet is covered by the checksum.
*/
cscov_partial = (cscov == 0) ? 0 : 1;
- } else
- ui->ui_v = IPVERSION << 4;
+ }
/*
* Set the Don't Fragment bit in the IP header.
@@ -1518,8 +1547,11 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
if (unlock_udbinfo == UH_WLOCKED)
INP_HASH_WUNLOCK(pcbinfo);
else if (unlock_udbinfo == UH_RLOCKED)
- INP_HASH_RUNLOCK(pcbinfo);
- UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
+ INP_HASH_RUNLOCK_ET(pcbinfo, et);
+ if (pr == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
+ else
+ UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
error = ip_output(m, inp->inp_options,
(unlock_inp == UH_WLOCKED ? &inp->inp_route : NULL), ipflags,
inp->inp_moptions, inp);
@@ -1538,7 +1570,7 @@ release:
} else if (unlock_udbinfo == UH_RLOCKED) {
KASSERT(unlock_inp == UH_RLOCKED,
("%s: shared udbinfo lock, excl inp lock", __func__));
- INP_HASH_RUNLOCK(pcbinfo);
+ INP_HASH_RUNLOCK_ET(pcbinfo, et);
INP_RUNLOCK(inp);
} else if (unlock_inp == UH_WLOCKED)
INP_WUNLOCK(inp);
@@ -1719,7 +1751,6 @@ udp_detach(struct socket *so)
INP_WLOCK(inp);
up = intoudpcb(inp);
KASSERT(up != NULL, ("%s: up == NULL", __func__));
- /* XXX defer to epoch_call */
inp->inp_ppcb = NULL;
in_pcbdetach(inp);
in_pcbfree(inp);
diff --git a/freebsd/sys/netinet/udplite.h b/freebsd/sys/netinet/udplite.h
index 0e23cd70..57a1422a 100644
--- a/freebsd/sys/netinet/udplite.h
+++ b/freebsd/sys/netinet/udplite.h
@@ -29,6 +29,17 @@
#ifndef _NETINET_UDPLITE_H_
#define _NETINET_UDPLITE_H_
+/*
+ * UDP-Lite protocol header.
+ * Per RFC 3828, July, 2004.
+ */
+struct udplitehdr {
+ u_short udplite_sport; /* UDO-Lite source port */
+ u_short udplite_dport; /* UDP-Lite destination port */
+ u_short udplite_coverage; /* UDP-Lite checksum coverage */
+ u_short udplite_checksum; /* UDP-Lite checksum */
+};
+
/*
* User-settable options (used with setsockopt).
*/