summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/netinet
diff options
context:
space:
mode:
authorSebastian Huber <sebastian.huber@embedded-brains.de>2018-08-07 12:12:37 +0200
committerSebastian Huber <sebastian.huber@embedded-brains.de>2018-09-21 10:29:36 +0200
commitde261e0404e1fe54544275fc57d5b982df4f42b4 (patch)
tree856cbdf23d6809b99c4d642d066bc45cd67c26e6 /freebsd/sys/netinet
parentlibbsd.txt: Use rtems_bsd_ifconfig_lo0() (diff)
downloadrtems-libbsd-de261e0404e1fe54544275fc57d5b982df4f42b4.tar.bz2
Update to FreeBSD head 2017-06-01
Git mirror commit dfb26efac4ce9101dda240e94d9ab53f80a9e131. Update #3472.
Diffstat (limited to 'freebsd/sys/netinet')
-rw-r--r--freebsd/sys/netinet/in.c14
-rw-r--r--freebsd/sys/netinet/in_kdtrace.h1
-rw-r--r--freebsd/sys/netinet/in_mcast.c3
-rw-r--r--freebsd/sys/netinet/in_pcb.c20
-rw-r--r--freebsd/sys/netinet/in_pcb.h32
-rw-r--r--freebsd/sys/netinet/ip_divert.c18
-rw-r--r--freebsd/sys/netinet/ip_icmp.c28
-rw-r--r--freebsd/sys/netinet/ip_input.c8
-rw-r--r--freebsd/sys/netinet/libalias/alias.c4
-rw-r--r--freebsd/sys/netinet/raw_ip.c2
-rw-r--r--freebsd/sys/netinet/sctp_input.c19
-rw-r--r--freebsd/sys/netinet/sctp_os_bsd.h6
-rw-r--r--freebsd/sys/netinet/sctp_output.c4
-rw-r--r--freebsd/sys/netinet/sctp_pcb.c28
-rw-r--r--freebsd/sys/netinet/sctp_sysctl.c1
-rw-r--r--freebsd/sys/netinet/sctp_timer.c8
-rw-r--r--freebsd/sys/netinet/sctp_usrreq.c24
-rw-r--r--freebsd/sys/netinet/sctp_var.h2
-rw-r--r--freebsd/sys/netinet/sctputil.c94
-rw-r--r--freebsd/sys/netinet/sctputil.h5
-rw-r--r--freebsd/sys/netinet/tcp_input.c150
-rw-r--r--freebsd/sys/netinet/tcp_lro.c22
-rw-r--r--freebsd/sys/netinet/tcp_output.c10
-rw-r--r--freebsd/sys/netinet/tcp_reass.c2
-rw-r--r--freebsd/sys/netinet/tcp_subr.c2
-rw-r--r--freebsd/sys/netinet/tcp_syncache.c27
-rw-r--r--freebsd/sys/netinet/tcp_syncache.h1
-rw-r--r--freebsd/sys/netinet/tcp_usrreq.c9
-rw-r--r--freebsd/sys/netinet/tcp_var.h2
-rw-r--r--freebsd/sys/netinet/udp_usrreq.c16
30 files changed, 358 insertions, 204 deletions
diff --git a/freebsd/sys/netinet/in.c b/freebsd/sys/netinet/in.c
index ca902fdc..0b31ff7e 100644
--- a/freebsd/sys/netinet/in.c
+++ b/freebsd/sys/netinet/in.c
@@ -98,8 +98,8 @@ int
in_localaddr(struct in_addr in)
{
struct rm_priotracker in_ifa_tracker;
- register u_long i = ntohl(in.s_addr);
- register struct in_ifaddr *ia;
+ u_long i = ntohl(in.s_addr);
+ struct in_ifaddr *ia;
IN_IFADDR_RLOCK(&in_ifa_tracker);
TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
@@ -189,8 +189,8 @@ in_localip_more(struct in_ifaddr *ia)
int
in_canforward(struct in_addr in)
{
- register u_long i = ntohl(in.s_addr);
- register u_long net;
+ u_long i = ntohl(in.s_addr);
+ u_long net;
if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i))
return (0);
@@ -208,8 +208,8 @@ in_canforward(struct in_addr in)
static void
in_socktrim(struct sockaddr_in *ap)
{
- register char *cplim = (char *) &ap->sin_addr;
- register char *cp = (char *) (&ap->sin_addr + 1);
+ char *cplim = (char *) &ap->sin_addr;
+ char *cp = (char *) (&ap->sin_addr + 1);
ap->sin_len = 0;
while (--cp >= cplim)
@@ -966,7 +966,7 @@ in_ifaddr_broadcast(struct in_addr in, struct in_ifaddr *ia)
int
in_broadcast(struct in_addr in, struct ifnet *ifp)
{
- register struct ifaddr *ifa;
+ struct ifaddr *ifa;
int found;
if (in.s_addr == INADDR_BROADCAST ||
diff --git a/freebsd/sys/netinet/in_kdtrace.h b/freebsd/sys/netinet/in_kdtrace.h
index a36991ef..0825c7df 100644
--- a/freebsd/sys/netinet/in_kdtrace.h
+++ b/freebsd/sys/netinet/in_kdtrace.h
@@ -65,6 +65,7 @@ SDT_PROBE_DECLARE(tcp, , , debug__input);
SDT_PROBE_DECLARE(tcp, , , debug__output);
SDT_PROBE_DECLARE(tcp, , , debug__user);
SDT_PROBE_DECLARE(tcp, , , debug__drop);
+SDT_PROBE_DECLARE(tcp, , , receive__autoresize);
SDT_PROBE_DECLARE(udp, , , receive);
SDT_PROBE_DECLARE(udp, , , send);
diff --git a/freebsd/sys/netinet/in_mcast.c b/freebsd/sys/netinet/in_mcast.c
index cb92a254..2ba4d9e8 100644
--- a/freebsd/sys/netinet/in_mcast.c
+++ b/freebsd/sys/netinet/in_mcast.c
@@ -1049,9 +1049,10 @@ inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
/* Decrement ASM listener count on transition out of ASM mode. */
if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
if ((imf->imf_st[1] != MCAST_EXCLUDE) ||
- (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0))
+ (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) {
CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__);
--inm->inm_st[1].iss_asm;
+ }
}
/* Increment ASM listener count on transition to ASM mode. */
diff --git a/freebsd/sys/netinet/in_pcb.c b/freebsd/sys/netinet/in_pcb.c
index b61b6e09..3d43ed92 100644
--- a/freebsd/sys/netinet/in_pcb.c
+++ b/freebsd/sys/netinet/in_pcb.c
@@ -218,14 +218,25 @@ SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime,
*/
/*
+ * Different protocols initialize their inpcbs differently - giving
+ * different name to the lock. But they all are disposed the same.
+ */
+static void
+inpcb_fini(void *mem, int size)
+{
+ struct inpcb *inp = mem;
+
+ INP_LOCK_DESTROY(inp);
+}
+
+/*
* Initialize an inpcbinfo -- we should be able to reduce the number of
* arguments in time.
*/
void
in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
struct inpcbhead *listhead, int hash_nelements, int porthash_nelements,
- char *inpcbzone_name, uma_init inpcbzone_init, uma_fini inpcbzone_fini,
- uint32_t inpcbzone_flags, u_int hashfields)
+ char *inpcbzone_name, uma_init inpcbzone_init, u_int hashfields)
{
INP_INFO_LOCK_INIT(pcbinfo, name);
@@ -245,8 +256,7 @@ in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
in_pcbgroup_init(pcbinfo, hashfields, hash_nelements);
#endif
pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb),
- NULL, NULL, inpcbzone_init, inpcbzone_fini, UMA_ALIGN_PTR,
- inpcbzone_flags);
+ NULL, NULL, inpcbzone_init, inpcb_fini, UMA_ALIGN_PTR, 0);
uma_zone_set_max(pcbinfo->ipi_zone, maxsockets);
uma_zone_set_warning(pcbinfo->ipi_zone,
"kern.ipc.maxsockets limit reached");
@@ -296,7 +306,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
if (inp == NULL)
return (ENOBUFS);
- bzero(inp, inp_zero_size);
+ bzero(&inp->inp_start_zero, inp_zero_size);
inp->inp_pcbinfo = pcbinfo;
inp->inp_socket = so;
inp->inp_cred = crhold(so->so_cred);
diff --git a/freebsd/sys/netinet/in_pcb.h b/freebsd/sys/netinet/in_pcb.h
index 59de3b0f..42fd23d0 100644
--- a/freebsd/sys/netinet/in_pcb.h
+++ b/freebsd/sys/netinet/in_pcb.h
@@ -183,26 +183,29 @@ struct icmp6_filter;
struct inpcbpolicy;
struct m_snd_tag;
struct inpcb {
+ /* Cache line #1 (amd64) */
LIST_ENTRY(inpcb) inp_hash; /* (h/i) hash list */
LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */
- LIST_ENTRY(inpcb) inp_list; /* (p/l) list for all PCBs for proto */
- /* (p[w]) for list iteration */
- /* (p[r]/l) for addition/removal */
+ struct rwlock inp_lock;
+ /* Cache line #2 (amd64) */
+#define inp_start_zero inp_refcount
+#define inp_zero_size (sizeof(struct inpcb) - \
+ offsetof(struct inpcb, inp_start_zero))
+ u_int inp_refcount; /* (i) refcount */
+ int inp_flags; /* (i) generic IP/datagram flags */
+ int inp_flags2; /* (i) generic IP/datagram flags #2*/
void *inp_ppcb; /* (i) pointer to per-protocol pcb */
+ struct socket *inp_socket; /* (i) back pointer to socket */
struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */
struct inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */
LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/h) group wildcard entry */
- struct socket *inp_socket; /* (i) back pointer to socket */
struct ucred *inp_cred; /* (c) cache of socket cred */
u_int32_t inp_flow; /* (i) IPv6 flow information */
- int inp_flags; /* (i) generic IP/datagram flags */
- int inp_flags2; /* (i) generic IP/datagram flags #2*/
u_char inp_vflag; /* (i) IP version flag (v4/v6) */
u_char inp_ip_ttl; /* (i) time to live proto */
u_char inp_ip_p; /* (c) protocol proto */
u_char inp_ip_minttl; /* (i) minimum TTL or drop */
uint32_t inp_flowid; /* (x) flow id / queue id */
- u_int inp_refcount; /* (i) refcount */
struct m_snd_tag *inp_snd_tag; /* (i) send tag for outgoing mbufs */
uint32_t inp_flowtype; /* (x) M_HASHTYPE value */
uint32_t inp_rss_listen_bucket; /* (x) overridden RSS listen bucket */
@@ -235,17 +238,16 @@ struct inpcb {
};
LIST_ENTRY(inpcb) inp_portlist; /* (i/h) */
struct inpcbport *inp_phd; /* (i/h) head of this list */
-#define inp_zero_size offsetof(struct inpcb, inp_gencnt)
inp_gen_t inp_gencnt; /* (c) generation count */
struct llentry *inp_lle; /* cached L2 information */
- struct rwlock inp_lock;
rt_gen_t inp_rt_cookie; /* generation for route entry */
union { /* cached L3 information */
- struct route inpu_route;
- struct route_in6 inpu_route6;
- } inp_rtu;
-#define inp_route inp_rtu.inpu_route
-#define inp_route6 inp_rtu.inpu_route6
+ struct route inp_route;
+ struct route_in6 inp_route6;
+ };
+ LIST_ENTRY(inpcb) inp_list; /* (p/l) list for all PCBs for proto */
+ /* (p[w]) for list iteration */
+ /* (p[r]/l) for addition/removal */
};
#endif /* _KERNEL */
@@ -690,7 +692,7 @@ VNET_DECLARE(int, ipport_tcpallocs);
void in_pcbinfo_destroy(struct inpcbinfo *);
void in_pcbinfo_init(struct inpcbinfo *, const char *, struct inpcbhead *,
- int, int, char *, uma_init, uma_fini, uint32_t, u_int);
+ int, int, char *, uma_init, u_int);
int in_pcbbind_check_bindmulti(const struct inpcb *ni,
const struct inpcb *oi);
diff --git a/freebsd/sys/netinet/ip_divert.c b/freebsd/sys/netinet/ip_divert.c
index 3efae683..5d7b1635 100644
--- a/freebsd/sys/netinet/ip_divert.c
+++ b/freebsd/sys/netinet/ip_divert.c
@@ -143,14 +143,6 @@ div_inpcb_init(void *mem, int size, int flags)
}
static void
-div_inpcb_fini(void *mem, int size)
-{
- struct inpcb *inp = mem;
-
- INP_LOCK_DESTROY(inp);
-}
-
-static void
div_init(void)
{
@@ -160,7 +152,7 @@ div_init(void)
* place for hashbase == NULL.
*/
in_pcbinfo_init(&V_divcbinfo, "div", &V_divcb, 1, 1, "divcb",
- div_inpcb_init, div_inpcb_fini, 0, IPI_HASHFIELDS_NONE);
+ div_inpcb_init, IPI_HASHFIELDS_NONE);
}
static void
@@ -491,6 +483,14 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin,
/* Send packet to input processing via netisr */
switch (ip->ip_v) {
case IPVERSION:
+ /*
+ * Restore M_BCAST flag when destination address is
+ * broadcast. It is expected by ip_tryforward().
+ */
+ if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)))
+ m->m_flags |= M_MCAST;
+ else if (in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
+ m->m_flags |= M_BCAST;
netisr_queue_src(NETISR_IP, (uintptr_t)so, m);
break;
#ifdef INET6
diff --git a/freebsd/sys/netinet/ip_icmp.c b/freebsd/sys/netinet/ip_icmp.c
index 1c32b1b8..acc2e6b6 100644
--- a/freebsd/sys/netinet/ip_icmp.c
+++ b/freebsd/sys/netinet/ip_icmp.c
@@ -187,10 +187,10 @@ kmod_icmpstat_inc(int statnum)
void
icmp_error(struct mbuf *n, int type, int code, uint32_t dest, int mtu)
{
- register struct ip *oip = mtod(n, struct ip *), *nip;
- register unsigned oiphlen = oip->ip_hl << 2;
- register struct icmp *icp;
- register struct mbuf *m;
+ struct ip *oip = mtod(n, struct ip *), *nip;
+ unsigned oiphlen = oip->ip_hl << 2;
+ struct icmp *icp;
+ struct mbuf *m;
unsigned icmplen, icmpelen, nlen;
KASSERT((u_int)type <= ICMP_MAXTYPE, ("%s: illegal ICMP type", __func__));
@@ -542,11 +542,10 @@ icmp_input(struct mbuf **mp, int *offp, int proto)
ICMPSTAT_INC(icps_bmcastecho);
break;
}
- icp->icmp_type = ICMP_ECHOREPLY;
if (badport_bandlim(BANDLIM_ICMP_ECHO) < 0)
goto freeit;
- else
- goto reflect;
+ icp->icmp_type = ICMP_ECHOREPLY;
+ goto reflect;
case ICMP_TSTAMP:
if (V_icmptstamprepl == 0)
@@ -560,13 +559,12 @@ icmp_input(struct mbuf **mp, int *offp, int proto)
ICMPSTAT_INC(icps_badlen);
break;
}
+ if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0)
+ goto freeit;
icp->icmp_type = ICMP_TSTAMPREPLY;
icp->icmp_rtime = iptime();
icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */
- if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0)
- goto freeit;
- else
- goto reflect;
+ goto reflect;
case ICMP_MASKREQ:
if (V_icmpmaskrepl == 0)
@@ -816,7 +814,7 @@ match:
ip->ip_ttl = V_ip_defttl;
if (optlen > 0) {
- register u_char *cp;
+ u_char *cp;
int opt, cnt;
u_int len;
@@ -891,9 +889,9 @@ done:
static void
icmp_send(struct mbuf *m, struct mbuf *opts)
{
- register struct ip *ip = mtod(m, struct ip *);
- register int hlen;
- register struct icmp *icp;
+ struct ip *ip = mtod(m, struct ip *);
+ int hlen;
+ struct icmp *icp;
hlen = ip->ip_hl << 2;
m->m_data += hlen;
diff --git a/freebsd/sys/netinet/ip_input.c b/freebsd/sys/netinet/ip_input.c
index a9126d4b..437c281a 100644
--- a/freebsd/sys/netinet/ip_input.c
+++ b/freebsd/sys/netinet/ip_input.c
@@ -268,9 +268,9 @@ sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
return (EINVAL);
return (netisr_setqlimit(&ip_direct_nh, qlimit));
}
-SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_direct_queue_maxlen,
- CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_direct_queue_maxlen, "I",
- "Maximum size of the IP direct input queue");
+SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQMAXLEN, intr_direct_queue_maxlen,
+ CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_direct_queue_maxlen,
+ "I", "Maximum size of the IP direct input queue");
static int
sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS)
@@ -289,7 +289,7 @@ sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS)
return (0);
}
-SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_direct_queue_drops,
+SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQDROPS, intr_direct_queue_drops,
CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_direct_queue_drops, "I",
"Number of packets dropped from the IP direct input queue");
#endif /* RSS */
diff --git a/freebsd/sys/netinet/libalias/alias.c b/freebsd/sys/netinet/libalias/alias.c
index cd3b5e05..35343c5f 100644
--- a/freebsd/sys/netinet/libalias/alias.c
+++ b/freebsd/sys/netinet/libalias/alias.c
@@ -701,12 +701,14 @@ ProtoAliasOut(struct libalias *la, struct in_addr *ip_src,
struct alias_link *lnk;
LIBALIAS_LOCK_ASSERT(la);
- (void)create;
/* Return if proxy-only mode is enabled */
if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY)
return (PKT_ALIAS_OK);
+ if (!create)
+ return (PKT_ALIAS_IGNORED);
+
lnk = FindProtoOut(la, *ip_src, ip_dst, ip_p);
if (lnk != NULL) {
struct in_addr alias_address;
diff --git a/freebsd/sys/netinet/raw_ip.c b/freebsd/sys/netinet/raw_ip.c
index d67df1ca..689a2bc4 100644
--- a/freebsd/sys/netinet/raw_ip.c
+++ b/freebsd/sys/netinet/raw_ip.c
@@ -212,7 +212,7 @@ rip_init(void)
{
in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE,
- 1, "ripcb", rip_inpcb_init, NULL, 0, IPI_HASHFIELDS_NONE);
+ 1, "ripcb", rip_inpcb_init, IPI_HASHFIELDS_NONE);
EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL,
EVENTHANDLER_PRI_ANY);
}
diff --git a/freebsd/sys/netinet/sctp_input.c b/freebsd/sys/netinet/sctp_input.c
index d363642a..be01c38a 100644
--- a/freebsd/sys/netinet/sctp_input.c
+++ b/freebsd/sys/netinet/sctp_input.c
@@ -163,13 +163,11 @@ sctp_handle_init(struct mbuf *m, int iphlen, int offset,
*abort_no_unlock = 1;
goto outnow;
}
- /* We are only accepting if we have a socket with positive
- * so_qlimit. */
+ /* We are only accepting if we have a listening socket. */
if ((stcb == NULL) &&
((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
(inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
- (inp->sctp_socket == NULL) ||
- (inp->sctp_socket->so_qlimit == 0))) {
+ (!SCTP_IS_LISTENING(inp)))) {
/*
* FIX ME ?? What about TCP model and we have a
* match/restart case? Actually no fix is needed. the lookup
@@ -1607,8 +1605,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
sctp_stop_all_cookie_timers(stcb);
if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
(stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
- (inp->sctp_socket->so_qlimit == 0)
- ) {
+ (!SCTP_IS_LISTENING(inp))) {
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -1808,7 +1805,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
(stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
- (inp->sctp_socket->so_qlimit == 0)) {
+ (!SCTP_IS_LISTENING(inp))) {
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -2319,7 +2316,7 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
*notification = SCTP_NOTIFY_ASSOC_UP;
if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
(stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
- (inp->sctp_socket->so_qlimit == 0)) {
+ (!SCTP_IS_LISTENING(inp))) {
/*
* This is an endpoint that called connect() how it got a
* cookie that is NEW is a bit of a mystery. It must be that
@@ -2345,7 +2342,7 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
SCTP_SOCKET_UNLOCK(so, 1);
#endif
} else if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
- (inp->sctp_socket->so_qlimit)) {
+ (SCTP_IS_LISTENING(inp))) {
/*
* We don't want to do anything with this one. Since it is
* the listening guy. The timer will get started for
@@ -5207,7 +5204,9 @@ process_control_chunks:
* longer listening.
*/
- if ((stcb == NULL) && (inp->sctp_socket->so_qlen >= inp->sctp_socket->so_qlimit)) {
+ if ((stcb == NULL) &&
+ (!SCTP_IS_LISTENING(inp) ||
+ inp->sctp_socket->so_qlen >= inp->sctp_socket->so_qlimit)) {
if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
(SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit))) {
op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
diff --git a/freebsd/sys/netinet/sctp_os_bsd.h b/freebsd/sys/netinet/sctp_os_bsd.h
index 2da90b69..f2bea00e 100644
--- a/freebsd/sys/netinet/sctp_os_bsd.h
+++ b/freebsd/sys/netinet/sctp_os_bsd.h
@@ -462,8 +462,6 @@ sctp_get_mbuf_for_msg(unsigned int space_needed,
#define SCTP_SHA256_UPDATE SHA256_Update
#define SCTP_SHA256_FINAL(x,y) SHA256_Final((caddr_t)x, y)
-#endif
-
#define SCTP_DECREMENT_AND_CHECK_REFCOUNT(addr) (atomic_fetchadd_int(addr, -1) == 1)
#if defined(INVARIANTS)
#define SCTP_SAVE_ATOMIC_DECREMENT(addr, val) \
@@ -484,3 +482,7 @@ sctp_get_mbuf_for_msg(unsigned int space_needed,
} \
}
#endif
+
+#define SCTP_IS_LISTENING(inp) ((inp->sctp_flags & SCTP_PCB_FLAGS_ACCEPTING) != 0)
+
+#endif
diff --git a/freebsd/sys/netinet/sctp_output.c b/freebsd/sys/netinet/sctp_output.c
index 2e6eedaf..221d0570 100644
--- a/freebsd/sys/netinet/sctp_output.c
+++ b/freebsd/sys/netinet/sctp_output.c
@@ -11149,7 +11149,7 @@ sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
ip->ip_v = IPVERSION;
ip->ip_hl = (sizeof(struct ip) >> 2);
ip->ip_tos = 0;
- ip->ip_off = 0;
+ ip->ip_off = htons(IP_DF);
ip_fillid(ip);
ip->ip_ttl = MODULE_GLOBAL(ip_defttl);
if (port) {
@@ -12597,7 +12597,7 @@ sctp_lower_sosend(struct socket *so,
(void *)addr,
sndlen);
if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
- (inp->sctp_socket->so_qlimit)) {
+ SCTP_IS_LISTENING(inp)) {
/* The listener can NOT send */
SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOTCONN);
error = ENOTCONN;
diff --git a/freebsd/sys/netinet/sctp_pcb.c b/freebsd/sys/netinet/sctp_pcb.c
index 3608fd5e..e32e63f4 100644
--- a/freebsd/sys/netinet/sctp_pcb.c
+++ b/freebsd/sys/netinet/sctp_pcb.c
@@ -1313,7 +1313,7 @@ sctp_findassociation_ep_addr(struct sctp_inpcb **inp_p, struct sockaddr *remote,
* it is the acceptor, then do the special_lookup to hash
* and find the real inp.
*/
- if ((inp->sctp_socket) && (inp->sctp_socket->so_qlimit)) {
+ if ((inp->sctp_socket) && SCTP_IS_LISTENING(inp)) {
/* to is peer addr, from is my addr */
stcb = sctp_tcb_special_locate(inp_p, remote, local,
netp, inp->def_vrf_id);
@@ -1886,7 +1886,7 @@ sctp_swap_inpcb_for_listen(struct sctp_inpcb *inp)
if (tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
continue;
}
- if (tinp->sctp_socket->so_qlimit) {
+ if (SCTP_IS_LISTENING(tinp)) {
continue;
}
SCTP_INP_WLOCK(tinp);
@@ -3937,6 +3937,7 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr,
stcb->asoc.vrf_id,
stcb->sctp_ep->fibnum);
+ net->src_addr_selected = 0;
if (SCTP_ROUTE_HAS_VALID_IFN(&net->ro)) {
/* Get source address */
net->ro._s_addr = sctp_source_address_selection(stcb->sctp_ep,
@@ -3946,18 +3947,18 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr,
0,
stcb->asoc.vrf_id);
if (net->ro._s_addr != NULL) {
+ uint32_t imtu, rmtu, hcmtu;
+
net->src_addr_selected = 1;
/* Now get the interface MTU */
if (net->ro._s_addr->ifn_p != NULL) {
- net->mtu = SCTP_GATHER_MTU_FROM_INTFC(net->ro._s_addr->ifn_p);
+ imtu = SCTP_GATHER_MTU_FROM_INTFC(net->ro._s_addr->ifn_p);
+ } else {
+ imtu = 0;
}
- } else {
- net->src_addr_selected = 0;
- }
- if (net->mtu > 0) {
- uint32_t rmtu;
-
rmtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, net->ro.ro_rt);
+ hcmtu = sctp_hc_get_mtu(&net->ro._l_addr, stcb->sctp_ep->fibnum);
+ net->mtu = sctp_min_mtu(hcmtu, rmtu, imtu);
if (rmtu == 0) {
/*
* Start things off to match mtu of
@@ -3965,17 +3966,8 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr,
*/
SCTP_SET_MTU_OF_ROUTE(&net->ro._l_addr.sa,
net->ro.ro_rt, net->mtu);
- } else {
- /*
- * we take the route mtu over the interface,
- * since the route may be leading out the
- * loopback, or a different interface.
- */
- net->mtu = rmtu;
}
}
- } else {
- net->src_addr_selected = 0;
}
if (net->mtu == 0) {
switch (newaddr->sa_family) {
diff --git a/freebsd/sys/netinet/sctp_sysctl.c b/freebsd/sys/netinet/sctp_sysctl.c
index ea3b3d9c..db150112 100644
--- a/freebsd/sys/netinet/sctp_sysctl.c
+++ b/freebsd/sys/netinet/sctp_sysctl.c
@@ -412,6 +412,7 @@ sctp_sysctl_handle_assoclist(SYSCTL_HANDLER_ARGS)
xinpcb.socket = inp->sctp_socket;
so = inp->sctp_socket;
if ((so == NULL) ||
+ (!SCTP_IS_LISTENING(inp)) ||
(inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
xinpcb.qlen = 0;
xinpcb.maxqlen = 0;
diff --git a/freebsd/sys/netinet/sctp_timer.c b/freebsd/sys/netinet/sctp_timer.c
index 6ce9fc30..ecadca5b 100644
--- a/freebsd/sys/netinet/sctp_timer.c
+++ b/freebsd/sys/netinet/sctp_timer.c
@@ -669,6 +669,7 @@ start_again:
stcb->asoc.peers_rwnd += SCTP_BASE_SYSCTL(sctp_peer_chunk_oh);
}
chk->sent = SCTP_DATAGRAM_RESEND;
+ chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
SCTP_STAT_INCR(sctps_markedretrans);
/* reset the TSN for striking and other FR stuff */
@@ -742,6 +743,7 @@ start_again:
chk->whoTo = alt;
if (chk->sent != SCTP_DATAGRAM_RESEND) {
chk->sent = SCTP_DATAGRAM_RESEND;
+ chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
cnt_mk++;
}
@@ -1086,6 +1088,7 @@ sctp_cookie_timer(struct sctp_inpcb *inp,
sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
}
cookie->sent = SCTP_DATAGRAM_RESEND;
+ cookie->flags |= CHUNK_FLAGS_FRAGMENT_OK;
/*
* Now call the output routine to kick out the cookie again, Note we
* don't mark any chunks for retran so that FR will need to kick in
@@ -1132,6 +1135,7 @@ sctp_strreset_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
sctp_free_remote_addr(chk->whoTo);
if (chk->sent != SCTP_DATAGRAM_RESEND) {
chk->sent = SCTP_DATAGRAM_RESEND;
+ chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
}
chk->whoTo = alt;
@@ -1149,6 +1153,7 @@ sctp_strreset_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
if (strrst->sent != SCTP_DATAGRAM_RESEND)
sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
strrst->sent = SCTP_DATAGRAM_RESEND;
+ strrst->flags |= CHUNK_FLAGS_FRAGMENT_OK;
/* restart the timer */
sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, inp, stcb, strrst->whoTo);
@@ -1213,6 +1218,7 @@ sctp_asconf_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
chk->whoTo = alt;
if (chk->sent != SCTP_DATAGRAM_RESEND) {
chk->sent = SCTP_DATAGRAM_RESEND;
+ chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
}
atomic_add_int(&alt->ref_count, 1);
@@ -1227,6 +1233,7 @@ sctp_asconf_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
if (asconf->sent != SCTP_DATAGRAM_RESEND && chk->sent != SCTP_DATAGRAM_UNSENT)
sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
chk->sent = SCTP_DATAGRAM_RESEND;
+ chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
}
if (!(net->dest_state & SCTP_ADDR_REACHABLE)) {
/*
@@ -1239,6 +1246,7 @@ sctp_asconf_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
if (asconf->sent != SCTP_DATAGRAM_RESEND)
sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
asconf->sent = SCTP_DATAGRAM_RESEND;
+ asconf->flags |= CHUNK_FLAGS_FRAGMENT_OK;
/* send another ASCONF if any and we can do */
sctp_send_asconf(stcb, alt, SCTP_ADDR_NOT_LOCKED);
diff --git a/freebsd/sys/netinet/sctp_usrreq.c b/freebsd/sys/netinet/sctp_usrreq.c
index 550926f3..b65f74d1 100644
--- a/freebsd/sys/netinet/sctp_usrreq.c
+++ b/freebsd/sys/netinet/sctp_usrreq.c
@@ -154,7 +154,7 @@ sctp_notify(struct sctp_inpcb *inp,
uint8_t icmp_type,
uint8_t icmp_code,
uint16_t ip_len,
- uint16_t next_mtu)
+ uint32_t next_mtu)
{
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
@@ -222,10 +222,15 @@ sctp_notify(struct sctp_inpcb *inp,
timer_stopped = 0;
}
/* Update the path MTU. */
+ if (net->port) {
+ next_mtu -= sizeof(struct udphdr);
+ }
if (net->mtu > next_mtu) {
net->mtu = next_mtu;
if (net->port) {
- net->mtu -= sizeof(struct udphdr);
+ sctp_hc_set_mtu(&net->ro._l_addr, inp->fibnum, next_mtu + sizeof(struct udphdr));
+ } else {
+ sctp_hc_set_mtu(&net->ro._l_addr, inp->fibnum, next_mtu);
}
}
/* Update the association MTU */
@@ -330,7 +335,7 @@ sctp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
icmp->icmp_type,
icmp->icmp_code,
ntohs(inner_ip->ip_len),
- ntohs(icmp->icmp_nextmtu));
+ (uint32_t)ntohs(icmp->icmp_nextmtu));
} else {
if ((stcb == NULL) && (inp != NULL)) {
/* reduce ref-count */
@@ -7036,7 +7041,7 @@ sctp_listen(struct socket *so, int backlog, struct thread *p)
if (tinp && (tinp != inp) &&
((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) == 0) &&
((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
- (tinp->sctp_socket->so_qlimit)) {
+ (SCTP_IS_LISTENING(tinp))) {
/*
* we have a listener already and
* its not this inp.
@@ -7080,7 +7085,7 @@ sctp_listen(struct socket *so, int backlog, struct thread *p)
if (tinp && (tinp != inp) &&
((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) == 0) &&
((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
- (tinp->sctp_socket->so_qlimit)) {
+ (SCTP_IS_LISTENING(tinp))) {
/*
* we have a listener already and its not
* this inp.
@@ -7134,6 +7139,7 @@ sctp_listen(struct socket *so, int backlog, struct thread *p)
return (error);
}
}
+ SCTP_INP_WLOCK(inp);
SOCK_LOCK(so);
/* It appears for 7.0 and on, we must always call this. */
solisten_proto(so, backlog);
@@ -7141,11 +7147,13 @@ sctp_listen(struct socket *so, int backlog, struct thread *p)
/* remove the ACCEPTCONN flag for one-to-many sockets */
so->so_options &= ~SO_ACCEPTCONN;
}
- if (backlog == 0) {
- /* turning off listen */
- so->so_options &= ~SO_ACCEPTCONN;
+ if (backlog > 0) {
+ inp->sctp_flags |= SCTP_PCB_FLAGS_ACCEPTING;
+ } else {
+ inp->sctp_flags &= ~SCTP_PCB_FLAGS_ACCEPTING;
}
SOCK_UNLOCK(so);
+ SCTP_INP_WUNLOCK(inp);
return (error);
}
diff --git a/freebsd/sys/netinet/sctp_var.h b/freebsd/sys/netinet/sctp_var.h
index 6365dfec..9e149e68 100644
--- a/freebsd/sys/netinet/sctp_var.h
+++ b/freebsd/sys/netinet/sctp_var.h
@@ -341,7 +341,7 @@ void sctp_drain(void);
void sctp_init(void);
void
sctp_notify(struct sctp_inpcb *, struct sctp_tcb *, struct sctp_nets *,
- uint8_t, uint8_t, uint16_t, uint16_t);
+ uint8_t, uint8_t, uint16_t, uint32_t);
int sctp_flush(struct socket *, int);
int sctp_shutdown(struct socket *);
int
diff --git a/freebsd/sys/netinet/sctputil.c b/freebsd/sys/netinet/sctputil.c
index 79bb0620..4c6ba598 100644
--- a/freebsd/sys/netinet/sctputil.c
+++ b/freebsd/sys/netinet/sctputil.c
@@ -51,6 +51,9 @@ __FBSDID("$FreeBSD$");
#include <netinet/sctp_auth.h>
#include <netinet/sctp_asconf.h>
#include <netinet/sctp_bsd_addr.h>
+#if defined(INET6) || defined(INET)
+#include <netinet/tcp_var.h>
+#endif
#include <netinet/udp.h>
#include <netinet/udp_var.h>
#include <sys/proc.h>
@@ -6973,7 +6976,7 @@ sctp_recv_icmp_tunneled_packet(int cmd, struct sockaddr *sa, void *vip, void *ct
}
sctp_notify(inp, stcb, net, type, code,
ntohs(inner_ip->ip_len),
- ntohs(icmp->icmp_nextmtu));
+ (uint32_t)ntohs(icmp->icmp_nextmtu));
} else {
if ((stcb == NULL) && (inp != NULL)) {
/* reduce ref-count */
@@ -7115,7 +7118,7 @@ sctp_recv_icmp6_tunneled_packet(int cmd, struct sockaddr *sa, void *d, void *ctx
code = ICMP6_PARAMPROB_NEXTHEADER;
}
sctp6_notify(inp, stcb, net, type, code,
- (uint16_t)ntohl(ip6cp->ip6c_icmp6->icmp6_mtu));
+ ntohl(ip6cp->ip6c_icmp6->icmp6_mtu));
} else {
if ((stcb == NULL) && (inp != NULL)) {
/* reduce inp's ref-count */
@@ -7237,3 +7240,90 @@ sctp_over_udp_start(void)
#endif
return (0);
}
+
+#if defined(INET6) || defined(INET)
+
+/*
+ * sctp_min_mtu ()returns the minimum of all non-zero arguments.
+ * If all arguments are zero, zero is returned.
+ */
+uint32_t
+sctp_min_mtu(uint32_t mtu1, uint32_t mtu2, uint32_t mtu3)
+{
+ if (mtu1 > 0) {
+ if (mtu2 > 0) {
+ if (mtu3 > 0) {
+ return (min(mtu1, min(mtu2, mtu3)));
+ } else {
+ return (min(mtu1, mtu2));
+ }
+ } else {
+ if (mtu3 > 0) {
+ return (min(mtu1, mtu3));
+ } else {
+ return (mtu1);
+ }
+ }
+ } else {
+ if (mtu2 > 0) {
+ if (mtu3 > 0) {
+ return (min(mtu2, mtu3));
+ } else {
+ return (mtu2);
+ }
+ } else {
+ return (mtu3);
+ }
+ }
+}
+
+void
+sctp_hc_set_mtu(union sctp_sockstore *addr, uint16_t fibnum, uint32_t mtu)
+{
+ struct in_conninfo inc;
+
+ memset(&inc, 0, sizeof(struct in_conninfo));
+ inc.inc_fibnum = fibnum;
+ switch (addr->sa.sa_family) {
+#ifdef INET
+ case AF_INET:
+ inc.inc_faddr = addr->sin.sin_addr;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ inc.inc_flags |= INC_ISIPV6;
+ inc.inc6_faddr = addr->sin6.sin6_addr;
+ break;
+#endif
+ default:
+ return;
+ }
+ tcp_hc_updatemtu(&inc, (u_long)mtu);
+}
+
+uint32_t
+sctp_hc_get_mtu(union sctp_sockstore *addr, uint16_t fibnum)
+{
+ struct in_conninfo inc;
+
+ memset(&inc, 0, sizeof(struct in_conninfo));
+ inc.inc_fibnum = fibnum;
+ switch (addr->sa.sa_family) {
+#ifdef INET
+ case AF_INET:
+ inc.inc_faddr = addr->sin.sin_addr;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ inc.inc_flags |= INC_ISIPV6;
+ inc.inc6_faddr = addr->sin6.sin6_addr;
+ break;
+#endif
+ default:
+ return (0);
+ }
+ return ((uint32_t)tcp_hc_getmtu(&inc));
+}
+#endif
diff --git a/freebsd/sys/netinet/sctputil.h b/freebsd/sys/netinet/sctputil.h
index dd45e49a..50118b7a 100644
--- a/freebsd/sys/netinet/sctputil.h
+++ b/freebsd/sys/netinet/sctputil.h
@@ -388,5 +388,10 @@ sctp_auditing(int, struct sctp_inpcb *, struct sctp_tcb *,
void sctp_audit_log(uint8_t, uint8_t);
#endif
+#if defined(INET6) || defined(INET)
+uint32_t sctp_min_mtu(uint32_t, uint32_t, uint32_t);
+void sctp_hc_set_mtu(union sctp_sockstore *, uint16_t, uint32_t);
+uint32_t sctp_hc_get_mtu(union sctp_sockstore *, uint16_t);
+#endif
#endif /* _KERNEL */
#endif
diff --git a/freebsd/sys/netinet/tcp_input.c b/freebsd/sys/netinet/tcp_input.c
index 89f2bf0c..d7091928 100644
--- a/freebsd/sys/netinet/tcp_input.c
+++ b/freebsd/sys/netinet/tcp_input.c
@@ -1488,6 +1488,68 @@ drop:
return (IPPROTO_DONE);
}
+/*
+ * Automatic sizing of receive socket buffer. Often the send
+ * buffer size is not optimally adjusted to the actual network
+ * conditions at hand (delay bandwidth product). Setting the
+ * buffer size too small limits throughput on links with high
+ * bandwidth and high delay (eg. trans-continental/oceanic links).
+ *
+ * On the receive side the socket buffer memory is only rarely
+ * used to any significant extent. This allows us to be much
+ * more aggressive in scaling the receive socket buffer. For
+ * the case that the buffer space is actually used to a large
+ * extent and we run out of kernel memory we can simply drop
+ * the new segments; TCP on the sender will just retransmit it
+ * later. Setting the buffer size too big may only consume too
+ * much kernel memory if the application doesn't read() from
+ * the socket or packet loss or reordering makes use of the
+ * reassembly queue.
+ *
+ * The criteria to step up the receive buffer one notch are:
+ * 1. Application has not set receive buffer size with
+ * SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE.
+ * 2. the number of bytes received during the time it takes
+ * one timestamp to be reflected back to us (the RTT);
+ * 3. received bytes per RTT is within seven eighth of the
+ * current socket buffer size;
+ * 4. receive buffer size has not hit maximal automatic size;
+ *
+ * This algorithm does one step per RTT at most and only if
+ * we receive a bulk stream w/o packet losses or reorderings.
+ * Shrinking the buffer during idle times is not necessary as
+ * it doesn't consume any memory when idle.
+ *
+ * TODO: Only step up if the application is actually serving
+ * the buffer to better manage the socket buffer resources.
+ */
+int
+tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so,
+ struct tcpcb *tp, int tlen)
+{
+ int newsize = 0;
+
+ if (V_tcp_do_autorcvbuf && (so->so_rcv.sb_flags & SB_AUTOSIZE) &&
+ tp->t_srtt != 0 && tp->rfbuf_ts != 0 &&
+ TCP_TS_TO_TICKS(tcp_ts_getticks() - tp->rfbuf_ts) >
+ (tp->t_srtt >> TCP_RTT_SHIFT)) {
+ if (tp->rfbuf_cnt > (so->so_rcv.sb_hiwat / 8 * 7) &&
+ so->so_rcv.sb_hiwat < V_tcp_autorcvbuf_max) {
+ newsize = min(so->so_rcv.sb_hiwat +
+ V_tcp_autorcvbuf_inc, V_tcp_autorcvbuf_max);
+ }
+ TCP_PROBE6(receive__autoresize, NULL, tp, m, tp, th, newsize);
+
+ /* Start over with next RTT. */
+ tp->rfbuf_ts = 0;
+ tp->rfbuf_cnt = 0;
+ } else {
+ tp->rfbuf_cnt += tlen; /* add up */
+ }
+
+ return (newsize);
+}
+
void
tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
@@ -1553,6 +1615,26 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tcp_pcap_add(th, m, &(tp->t_inpkts));
#endif
+ if ((thflags & TH_SYN) && (thflags & TH_FIN) && V_drop_synfin) {
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
+ log(LOG_DEBUG, "%s; %s: "
+ "SYN|FIN segment ignored (based on "
+ "sysctl setting)\n", s, __func__);
+ free(s, M_TCPLOG);
+ }
+ goto drop;
+ }
+
+ /*
+ * If a segment with the ACK-bit set arrives in the SYN-SENT state
+ * check SEQ.ACK first.
+ */
+ if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) &&
+ (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) {
+ rstreason = BANDLIM_UNLIMITED;
+ goto dropwithreset;
+ }
+
/*
* Segment received on connection.
* Reset idle time and keep-alive timer.
@@ -1851,62 +1933,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
#endif
TCP_PROBE3(debug__input, tp, th, m);
- /*
- * Automatic sizing of receive socket buffer. Often the send
- * buffer size is not optimally adjusted to the actual network
- * conditions at hand (delay bandwidth product). Setting the
- * buffer size too small limits throughput on links with high
- * bandwidth and high delay (eg. trans-continental/oceanic links).
- *
- * On the receive side the socket buffer memory is only rarely
- * used to any significant extent. This allows us to be much
- * more aggressive in scaling the receive socket buffer. For
- * the case that the buffer space is actually used to a large
- * extent and we run out of kernel memory we can simply drop
- * the new segments; TCP on the sender will just retransmit it
- * later. Setting the buffer size too big may only consume too
- * much kernel memory if the application doesn't read() from
- * the socket or packet loss or reordering makes use of the
- * reassembly queue.
- *
- * The criteria to step up the receive buffer one notch are:
- * 1. Application has not set receive buffer size with
- * SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE.
- * 2. the number of bytes received during the time it takes
- * one timestamp to be reflected back to us (the RTT);
- * 3. received bytes per RTT is within seven eighth of the
- * current socket buffer size;
- * 4. receive buffer size has not hit maximal automatic size;
- *
- * This algorithm does one step per RTT at most and only if
- * we receive a bulk stream w/o packet losses or reorderings.
- * Shrinking the buffer during idle times is not necessary as
- * it doesn't consume any memory when idle.
- *
- * TODO: Only step up if the application is actually serving
- * the buffer to better manage the socket buffer resources.
- */
- if (V_tcp_do_autorcvbuf &&
- (to.to_flags & TOF_TS) &&
- to.to_tsecr &&
- (so->so_rcv.sb_flags & SB_AUTOSIZE)) {
- if (TSTMP_GT(to.to_tsecr, tp->rfbuf_ts) &&
- to.to_tsecr - tp->rfbuf_ts < hz) {
- if (tp->rfbuf_cnt >
- (so->so_rcv.sb_hiwat / 8 * 7) &&
- so->so_rcv.sb_hiwat <
- V_tcp_autorcvbuf_max) {
- newsize =
- min(so->so_rcv.sb_hiwat +
- V_tcp_autorcvbuf_inc,
- V_tcp_autorcvbuf_max);
- }
- /* Start over with next RTT. */
- tp->rfbuf_ts = 0;
- tp->rfbuf_cnt = 0;
- } else
- tp->rfbuf_cnt += tlen; /* add up */
- }
+ newsize = tcp_autorcvbuf(m, th, so, tp, tlen);
/* Add data to socket buffer. */
SOCKBUF_LOCK(&so->so_rcv);
@@ -1947,10 +1974,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
win = 0;
tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
- /* Reset receive buffer auto scaling when not in bulk receive mode. */
- tp->rfbuf_ts = 0;
- tp->rfbuf_cnt = 0;
-
switch (tp->t_state) {
/*
@@ -1990,7 +2013,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
/*
* If the state is SYN_SENT:
- * if seg contains an ACK, but not for our SYN, drop the input.
* if seg contains a RST, then drop the connection.
* if seg does not contain SYN, then drop it.
* Otherwise this is an acceptable SYN segment
@@ -2003,12 +2025,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* continue processing rest of data/controls, beginning with URG
*/
case TCPS_SYN_SENT:
- if ((thflags & TH_ACK) &&
- (SEQ_LEQ(th->th_ack, tp->iss) ||
- SEQ_GT(th->th_ack, tp->snd_max))) {
- rstreason = BANDLIM_UNLIMITED;
- goto dropwithreset;
- }
if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) {
TCP_PROBE5(connect__refused, NULL, tp,
m, tp, th);
diff --git a/freebsd/sys/netinet/tcp_lro.c b/freebsd/sys/netinet/tcp_lro.c
index 13866134..91d534f1 100644
--- a/freebsd/sys/netinet/tcp_lro.c
+++ b/freebsd/sys/netinet/tcp_lro.c
@@ -117,7 +117,6 @@ tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *ifp,
lc->lro_bad_csum = 0;
lc->lro_queued = 0;
lc->lro_flushed = 0;
- lc->lro_cnt = 0;
lc->lro_mbuf_count = 0;
lc->lro_mbuf_max = lro_mbufs;
lc->lro_cnt = lro_entries;
@@ -147,6 +146,7 @@ tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *ifp,
/* check for out of memory */
if (lc->lro_mbuf_data == NULL) {
+ free(lc->lro_hash, M_LRO);
memset(lc, 0, sizeof(*lc));
return (ENOMEM);
}
@@ -177,17 +177,15 @@ tcp_lro_free(struct lro_ctrl *lc)
}
/* free hash table */
- if (lc->lro_hash != NULL) {
- free(lc->lro_hash, M_LRO);
- lc->lro_hash = NULL;
- }
+ free(lc->lro_hash, M_LRO);
+ lc->lro_hash = NULL;
lc->lro_hashsz = 0;
/* free mbuf array, if any */
for (x = 0; x != lc->lro_mbuf_count; x++)
m_freem(lc->lro_mbuf_data[x].mb);
lc->lro_mbuf_count = 0;
-
+
/* free allocated memory, if any */
free(lc->lro_mbuf_data, M_LRO);
lc->lro_mbuf_data = NULL;
@@ -957,18 +955,12 @@ tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
/* check if packet is not LRO capable */
if (__predict_false(mb->m_pkthdr.csum_flags == 0 ||
(lc->ifp->if_capenable & IFCAP_LRO) == 0)) {
- lc->lro_flushed++;
- lc->lro_queued++;
/* input packet to network layer */
(*lc->ifp->if_input) (lc->ifp, mb);
return;
}
- /* check if array is full */
- if (__predict_false(lc->lro_mbuf_count == lc->lro_mbuf_max))
- tcp_lro_flush_all(lc);
-
/* create sequence number */
lc->lro_mbuf_data[lc->lro_mbuf_count].seq =
(((uint64_t)M_HASHTYPE_GET(mb)) << 56) |
@@ -976,7 +968,11 @@ tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
((uint64_t)lc->lro_mbuf_count);
/* enter mbuf */
- lc->lro_mbuf_data[lc->lro_mbuf_count++].mb = mb;
+ lc->lro_mbuf_data[lc->lro_mbuf_count].mb = mb;
+
+ /* flush if array is full */
+ if (__predict_false(++lc->lro_mbuf_count == lc->lro_mbuf_max))
+ tcp_lro_flush_all(lc);
}
/* end */
diff --git a/freebsd/sys/netinet/tcp_output.c b/freebsd/sys/netinet/tcp_output.c
index 53eccf11..d2606fb6 100644
--- a/freebsd/sys/netinet/tcp_output.c
+++ b/freebsd/sys/netinet/tcp_output.c
@@ -833,11 +833,13 @@ send:
to.to_tsval = tcp_ts_getticks() + tp->ts_offset;
to.to_tsecr = tp->ts_recent;
to.to_flags |= TOF_TS;
- /* Set receive buffer autosizing timestamp. */
- if (tp->rfbuf_ts == 0 &&
- (so->so_rcv.sb_flags & SB_AUTOSIZE))
- tp->rfbuf_ts = tcp_ts_getticks();
}
+
+ /* Set receive buffer autosizing timestamp. */
+ if (tp->rfbuf_ts == 0 &&
+ (so->so_rcv.sb_flags & SB_AUTOSIZE))
+ tp->rfbuf_ts = tcp_ts_getticks();
+
/* Selective ACK's. */
if (tp->t_flags & TF_SACK_PERMIT) {
if (flags & TH_SYN)
diff --git a/freebsd/sys/netinet/tcp_reass.c b/freebsd/sys/netinet/tcp_reass.c
index 779de5e0..4f944cab 100644
--- a/freebsd/sys/netinet/tcp_reass.c
+++ b/freebsd/sys/netinet/tcp_reass.c
@@ -110,7 +110,7 @@ tcp_reass_global_init(void)
TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
&tcp_reass_maxseg);
tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
- NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
/* Set the zone limit and read back the effective value. */
tcp_reass_maxseg = uma_zone_set_max(tcp_reass_zone,
tcp_reass_maxseg);
diff --git a/freebsd/sys/netinet/tcp_subr.c b/freebsd/sys/netinet/tcp_subr.c
index 48f4cfda..30464e1b 100644
--- a/freebsd/sys/netinet/tcp_subr.c
+++ b/freebsd/sys/netinet/tcp_subr.c
@@ -653,7 +653,7 @@ tcp_init(void)
hashsize);
}
in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize,
- "tcp_inpcb", tcp_inpcb_init, NULL, 0, IPI_HASHFIELDS_4TUPLE);
+ "tcp_inpcb", tcp_inpcb_init, IPI_HASHFIELDS_4TUPLE);
/*
* These have to be type stable for the benefit of the timers.
diff --git a/freebsd/sys/netinet/tcp_syncache.c b/freebsd/sys/netinet/tcp_syncache.c
index 84b9d271..13170ae9 100644
--- a/freebsd/sys/netinet/tcp_syncache.c
+++ b/freebsd/sys/netinet/tcp_syncache.c
@@ -262,6 +262,8 @@ syncache_init(void)
&V_tcp_syncache.hashbase[i].sch_mtx, 0);
V_tcp_syncache.hashbase[i].sch_length = 0;
V_tcp_syncache.hashbase[i].sch_sc = &V_tcp_syncache;
+ V_tcp_syncache.hashbase[i].sch_last_overflow =
+ -(SYNCOOKIE_LIFETIME + 1);
}
/* Create the syncache entry zone. */
@@ -337,6 +339,7 @@ syncache_insert(struct syncache *sc, struct syncache_head *sch)
KASSERT(!TAILQ_EMPTY(&sch->sch_bucket),
("sch->sch_length incorrect"));
sc2 = TAILQ_LAST(&sch->sch_bucket, sch_head);
+ sch->sch_last_overflow = time_uptime;
syncache_drop(sc2, sch);
TCPSTAT_INC(tcps_sc_bucketoverflow);
}
@@ -984,10 +987,13 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
/*
* There is no syncache entry, so see if this ACK is
* a returning syncookie. To do this, first:
- * A. See if this socket has had a syncache entry dropped in
- * the past. We don't want to accept a bogus syncookie
- * if we've never received a SYN.
- * B. check that the syncookie is valid. If it is, then
+ * A. Check if syncookies are used in case of syncache
+ * overflows
+ * B. See if this socket has had a syncache entry dropped in
+ * the recent past. We don't want to accept a bogus
+ * syncookie if we've never received a SYN or accept it
+ * twice.
+ * C. check that the syncookie is valid. If it is, then
* cobble up a fake syncache entry, and return.
*/
if (!V_tcp_syncookies) {
@@ -998,6 +1004,15 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
s, __func__);
goto failed;
}
+ if (!V_tcp_syncookiesonly &&
+ sch->sch_last_overflow < time_uptime - SYNCOOKIE_LIFETIME) {
+ SCH_UNLOCK(sch);
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Spurious ACK, "
+ "segment rejected (no syncache entry)\n",
+ s, __func__);
+ goto failed;
+ }
bzero(&scs, sizeof(scs));
sc = syncookie_lookup(inc, sch, &scs, th, to, *lsop);
SCH_UNLOCK(sch);
@@ -1421,8 +1436,10 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
* entry and insert the new one.
*/
TCPSTAT_INC(tcps_sc_zonefail);
- if ((sc = TAILQ_LAST(&sch->sch_bucket, sch_head)) != NULL)
+ if ((sc = TAILQ_LAST(&sch->sch_bucket, sch_head)) != NULL) {
+ sch->sch_last_overflow = time_uptime;
syncache_drop(sc, sch);
+ }
sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
if (sc == NULL) {
if (V_tcp_syncookies) {
diff --git a/freebsd/sys/netinet/tcp_syncache.h b/freebsd/sys/netinet/tcp_syncache.h
index 2c8c5b00..ebf9fb84 100644
--- a/freebsd/sys/netinet/tcp_syncache.h
+++ b/freebsd/sys/netinet/tcp_syncache.h
@@ -99,6 +99,7 @@ struct syncache_head {
int sch_nextc;
u_int sch_length;
struct tcp_syncache *sch_sc;
+ time_t sch_last_overflow;
};
#define SYNCOOKIE_SECRET_SIZE 16
diff --git a/freebsd/sys/netinet/tcp_usrreq.c b/freebsd/sys/netinet/tcp_usrreq.c
index 05fed2d5..198291f2 100644
--- a/freebsd/sys/netinet/tcp_usrreq.c
+++ b/freebsd/sys/netinet/tcp_usrreq.c
@@ -599,6 +599,10 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
error = EINVAL;
goto out;
}
+ if ((inp->inp_vflag & INP_IPV4) == 0) {
+ error = EAFNOSUPPORT;
+ goto out;
+ }
in6_sin6_2_sin(&sin, sin6p);
inp->inp_vflag |= INP_IPV4;
@@ -616,6 +620,11 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
#endif
error = tp->t_fb->tfb_tcp_output(tp);
goto out;
+ } else {
+ if ((inp->inp_vflag & INP_IPV6) == 0) {
+ error = EAFNOSUPPORT;
+ goto out;
+ }
}
#endif
inp->inp_vflag &= ~INP_IPV4;
diff --git a/freebsd/sys/netinet/tcp_var.h b/freebsd/sys/netinet/tcp_var.h
index 5705e553..d298c9dd 100644
--- a/freebsd/sys/netinet/tcp_var.h
+++ b/freebsd/sys/netinet/tcp_var.h
@@ -778,6 +778,8 @@ void hhook_run_tcp_est_in(struct tcpcb *tp,
#endif
int tcp_input(struct mbuf **, int *, int);
+int tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *,
+ struct tcpcb *, int);
void tcp_do_segment(struct mbuf *, struct tcphdr *,
struct socket *, struct tcpcb *, int, int, uint8_t,
int);
diff --git a/freebsd/sys/netinet/udp_usrreq.c b/freebsd/sys/netinet/udp_usrreq.c
index c77439f7..af6b564f 100644
--- a/freebsd/sys/netinet/udp_usrreq.c
+++ b/freebsd/sys/netinet/udp_usrreq.c
@@ -129,12 +129,6 @@ SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(udp_blackhole), 0,
"Do not send port unreachables for refused connects");
-static VNET_DEFINE(int, udp_require_l2_bcast) = 0;
-#define V_udp_require_l2_bcast VNET(udp_require_l2_bcast)
-SYSCTL_INT(_net_inet_udp, OID_AUTO, require_l2_bcast, CTLFLAG_VNET | CTLFLAG_RW,
- &VNET_NAME(udp_require_l2_bcast), 0,
- "Only treat packets sent to an L2 broadcast address as broadcast packets");
-
u_long udp_sendspace = 9216; /* really max datagram size */
SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
&udp_sendspace, 0, "Maximum outgoing UDP datagram size");
@@ -215,8 +209,7 @@ udp_init(void)
* a 4-tuple, flip this to 4-tuple.
*/
in_pcbinfo_init(&V_udbinfo, "udp", &V_udb, UDBHASHSIZE, UDBHASHSIZE,
- "udp_inpcb", udp_inpcb_init, NULL, 0,
- IPI_HASHFIELDS_2TUPLE);
+ "udp_inpcb", udp_inpcb_init, IPI_HASHFIELDS_2TUPLE);
V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
uma_zone_set_max(V_udpcb_zone, maxsockets);
@@ -230,8 +223,8 @@ udplite_init(void)
{
in_pcbinfo_init(&V_ulitecbinfo, "udplite", &V_ulitecb, UDBHASHSIZE,
- UDBHASHSIZE, "udplite_inpcb", udplite_inpcb_init, NULL,
- 0, IPI_HASHFIELDS_2TUPLE);
+ UDBHASHSIZE, "udplite_inpcb", udplite_inpcb_init,
+ IPI_HASHFIELDS_2TUPLE);
}
/*
@@ -535,8 +528,7 @@ udp_input(struct mbuf **mp, int *offp, int proto)
pcbinfo = udp_get_inpcbinfo(proto);
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
- ((!V_udp_require_l2_bcast || m->m_flags & M_BCAST) &&
- in_broadcast(ip->ip_dst, ifp))) {
+ in_broadcast(ip->ip_dst, ifp)) {
struct inpcb *last;
struct inpcbhead *pcblist;
struct ip_moptions *imo;