From de261e0404e1fe54544275fc57d5b982df4f42b4 Mon Sep 17 00:00:00 2001 From: Sebastian Huber Date: Tue, 7 Aug 2018 12:12:37 +0200 Subject: Update to FreeBSD head 2017-06-01 Git mirror commit dfb26efac4ce9101dda240e94d9ab53f80a9e131. Update #3472. --- freebsd/sys/netinet/in.c | 14 ++-- freebsd/sys/netinet/in_kdtrace.h | 1 + freebsd/sys/netinet/in_mcast.c | 3 +- freebsd/sys/netinet/in_pcb.c | 20 +++-- freebsd/sys/netinet/in_pcb.h | 32 ++++---- freebsd/sys/netinet/ip_divert.c | 18 ++--- freebsd/sys/netinet/ip_icmp.c | 28 +++---- freebsd/sys/netinet/ip_input.c | 8 +- freebsd/sys/netinet/libalias/alias.c | 4 +- freebsd/sys/netinet/raw_ip.c | 2 +- freebsd/sys/netinet/sctp_input.c | 19 +++-- freebsd/sys/netinet/sctp_os_bsd.h | 6 +- freebsd/sys/netinet/sctp_output.c | 4 +- freebsd/sys/netinet/sctp_pcb.c | 28 +++---- freebsd/sys/netinet/sctp_sysctl.c | 1 + freebsd/sys/netinet/sctp_timer.c | 8 ++ freebsd/sys/netinet/sctp_usrreq.c | 24 ++++-- freebsd/sys/netinet/sctp_var.h | 2 +- freebsd/sys/netinet/sctputil.c | 94 +++++++++++++++++++++- freebsd/sys/netinet/sctputil.h | 5 ++ freebsd/sys/netinet/tcp_input.c | 150 +++++++++++++++++++---------------- freebsd/sys/netinet/tcp_lro.c | 22 +++-- freebsd/sys/netinet/tcp_output.c | 10 ++- freebsd/sys/netinet/tcp_reass.c | 2 +- freebsd/sys/netinet/tcp_subr.c | 2 +- freebsd/sys/netinet/tcp_syncache.c | 27 +++++-- freebsd/sys/netinet/tcp_syncache.h | 1 + freebsd/sys/netinet/tcp_usrreq.c | 9 +++ freebsd/sys/netinet/tcp_var.h | 2 + freebsd/sys/netinet/udp_usrreq.c | 16 +--- 30 files changed, 358 insertions(+), 204 deletions(-) (limited to 'freebsd/sys/netinet') diff --git a/freebsd/sys/netinet/in.c b/freebsd/sys/netinet/in.c index ca902fdc..0b31ff7e 100644 --- a/freebsd/sys/netinet/in.c +++ b/freebsd/sys/netinet/in.c @@ -98,8 +98,8 @@ int in_localaddr(struct in_addr in) { struct rm_priotracker in_ifa_tracker; - register u_long i = ntohl(in.s_addr); - register struct in_ifaddr *ia; + u_long i = ntohl(in.s_addr); + struct in_ifaddr *ia; IN_IFADDR_RLOCK(&in_ifa_tracker); TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { @@ -189,8 +189,8 @@ in_localip_more(struct in_ifaddr *ia) int in_canforward(struct in_addr in) { - register u_long i = ntohl(in.s_addr); - register u_long net; + u_long i = ntohl(in.s_addr); + u_long net; if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i)) return (0); @@ -208,8 +208,8 @@ in_canforward(struct in_addr in) static void in_socktrim(struct sockaddr_in *ap) { - register char *cplim = (char *) &ap->sin_addr; - register char *cp = (char *) (&ap->sin_addr + 1); + char *cplim = (char *) &ap->sin_addr; + char *cp = (char *) (&ap->sin_addr + 1); ap->sin_len = 0; while (--cp >= cplim) @@ -966,7 +966,7 @@ in_ifaddr_broadcast(struct in_addr in, struct in_ifaddr *ia) int in_broadcast(struct in_addr in, struct ifnet *ifp) { - register struct ifaddr *ifa; + struct ifaddr *ifa; int found; if (in.s_addr == INADDR_BROADCAST || diff --git a/freebsd/sys/netinet/in_kdtrace.h b/freebsd/sys/netinet/in_kdtrace.h index a36991ef..0825c7df 100644 --- a/freebsd/sys/netinet/in_kdtrace.h +++ b/freebsd/sys/netinet/in_kdtrace.h @@ -65,6 +65,7 @@ SDT_PROBE_DECLARE(tcp, , , debug__input); SDT_PROBE_DECLARE(tcp, , , debug__output); SDT_PROBE_DECLARE(tcp, , , debug__user); SDT_PROBE_DECLARE(tcp, , , debug__drop); +SDT_PROBE_DECLARE(tcp, , , receive__autoresize); SDT_PROBE_DECLARE(udp, , , receive); SDT_PROBE_DECLARE(udp, , , send); diff --git a/freebsd/sys/netinet/in_mcast.c b/freebsd/sys/netinet/in_mcast.c index cb92a254..2ba4d9e8 100644 --- a/freebsd/sys/netinet/in_mcast.c +++ b/freebsd/sys/netinet/in_mcast.c @@ -1049,9 +1049,10 @@ inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf) /* Decrement ASM listener count on transition out of ASM mode. */ if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) { if ((imf->imf_st[1] != MCAST_EXCLUDE) || - (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) + (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) { CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__); --inm->inm_st[1].iss_asm; + } } /* Increment ASM listener count on transition to ASM mode. */ diff --git a/freebsd/sys/netinet/in_pcb.c b/freebsd/sys/netinet/in_pcb.c index b61b6e09..3d43ed92 100644 --- a/freebsd/sys/netinet/in_pcb.c +++ b/freebsd/sys/netinet/in_pcb.c @@ -217,6 +217,18 @@ SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, * functions often modify hash chains or addresses in pcbs. */ +/* + * Different protocols initialize their inpcbs differently - giving + * different name to the lock. But they all are disposed the same. + */ +static void +inpcb_fini(void *mem, int size) +{ + struct inpcb *inp = mem; + + INP_LOCK_DESTROY(inp); +} + /* * Initialize an inpcbinfo -- we should be able to reduce the number of * arguments in time. @@ -224,8 +236,7 @@ SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, void in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name, struct inpcbhead *listhead, int hash_nelements, int porthash_nelements, - char *inpcbzone_name, uma_init inpcbzone_init, uma_fini inpcbzone_fini, - uint32_t inpcbzone_flags, u_int hashfields) + char *inpcbzone_name, uma_init inpcbzone_init, u_int hashfields) { INP_INFO_LOCK_INIT(pcbinfo, name); @@ -245,8 +256,7 @@ in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name, in_pcbgroup_init(pcbinfo, hashfields, hash_nelements); #endif pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb), - NULL, NULL, inpcbzone_init, inpcbzone_fini, UMA_ALIGN_PTR, - inpcbzone_flags); + NULL, NULL, inpcbzone_init, inpcb_fini, UMA_ALIGN_PTR, 0); uma_zone_set_max(pcbinfo->ipi_zone, maxsockets); uma_zone_set_warning(pcbinfo->ipi_zone, "kern.ipc.maxsockets limit reached"); @@ -296,7 +306,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT); if (inp == NULL) return (ENOBUFS); - bzero(inp, inp_zero_size); + bzero(&inp->inp_start_zero, inp_zero_size); inp->inp_pcbinfo = pcbinfo; inp->inp_socket = so; inp->inp_cred = crhold(so->so_cred); diff --git a/freebsd/sys/netinet/in_pcb.h b/freebsd/sys/netinet/in_pcb.h index 59de3b0f..42fd23d0 100644 --- a/freebsd/sys/netinet/in_pcb.h +++ b/freebsd/sys/netinet/in_pcb.h @@ -183,26 +183,29 @@ struct icmp6_filter; struct inpcbpolicy; struct m_snd_tag; struct inpcb { + /* Cache line #1 (amd64) */ LIST_ENTRY(inpcb) inp_hash; /* (h/i) hash list */ LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */ - LIST_ENTRY(inpcb) inp_list; /* (p/l) list for all PCBs for proto */ - /* (p[w]) for list iteration */ - /* (p[r]/l) for addition/removal */ + struct rwlock inp_lock; + /* Cache line #2 (amd64) */ +#define inp_start_zero inp_refcount +#define inp_zero_size (sizeof(struct inpcb) - \ + offsetof(struct inpcb, inp_start_zero)) + u_int inp_refcount; /* (i) refcount */ + int inp_flags; /* (i) generic IP/datagram flags */ + int inp_flags2; /* (i) generic IP/datagram flags #2*/ void *inp_ppcb; /* (i) pointer to per-protocol pcb */ + struct socket *inp_socket; /* (i) back pointer to socket */ struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */ struct inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */ LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/h) group wildcard entry */ - struct socket *inp_socket; /* (i) back pointer to socket */ struct ucred *inp_cred; /* (c) cache of socket cred */ u_int32_t inp_flow; /* (i) IPv6 flow information */ - int inp_flags; /* (i) generic IP/datagram flags */ - int inp_flags2; /* (i) generic IP/datagram flags #2*/ u_char inp_vflag; /* (i) IP version flag (v4/v6) */ u_char inp_ip_ttl; /* (i) time to live proto */ u_char inp_ip_p; /* (c) protocol proto */ u_char inp_ip_minttl; /* (i) minimum TTL or drop */ uint32_t inp_flowid; /* (x) flow id / queue id */ - u_int inp_refcount; /* (i) refcount */ struct m_snd_tag *inp_snd_tag; /* (i) send tag for outgoing mbufs */ uint32_t inp_flowtype; /* (x) M_HASHTYPE value */ uint32_t inp_rss_listen_bucket; /* (x) overridden RSS listen bucket */ @@ -235,17 +238,16 @@ struct inpcb { }; LIST_ENTRY(inpcb) inp_portlist; /* (i/h) */ struct inpcbport *inp_phd; /* (i/h) head of this list */ -#define inp_zero_size offsetof(struct inpcb, inp_gencnt) inp_gen_t inp_gencnt; /* (c) generation count */ struct llentry *inp_lle; /* cached L2 information */ - struct rwlock inp_lock; rt_gen_t inp_rt_cookie; /* generation for route entry */ union { /* cached L3 information */ - struct route inpu_route; - struct route_in6 inpu_route6; - } inp_rtu; -#define inp_route inp_rtu.inpu_route -#define inp_route6 inp_rtu.inpu_route6 + struct route inp_route; + struct route_in6 inp_route6; + }; + LIST_ENTRY(inpcb) inp_list; /* (p/l) list for all PCBs for proto */ + /* (p[w]) for list iteration */ + /* (p[r]/l) for addition/removal */ }; #endif /* _KERNEL */ @@ -690,7 +692,7 @@ VNET_DECLARE(int, ipport_tcpallocs); void in_pcbinfo_destroy(struct inpcbinfo *); void in_pcbinfo_init(struct inpcbinfo *, const char *, struct inpcbhead *, - int, int, char *, uma_init, uma_fini, uint32_t, u_int); + int, int, char *, uma_init, u_int); int in_pcbbind_check_bindmulti(const struct inpcb *ni, const struct inpcb *oi); diff --git a/freebsd/sys/netinet/ip_divert.c b/freebsd/sys/netinet/ip_divert.c index 3efae683..5d7b1635 100644 --- a/freebsd/sys/netinet/ip_divert.c +++ b/freebsd/sys/netinet/ip_divert.c @@ -142,14 +142,6 @@ div_inpcb_init(void *mem, int size, int flags) return (0); } -static void -div_inpcb_fini(void *mem, int size) -{ - struct inpcb *inp = mem; - - INP_LOCK_DESTROY(inp); -} - static void div_init(void) { @@ -160,7 +152,7 @@ div_init(void) * place for hashbase == NULL. */ in_pcbinfo_init(&V_divcbinfo, "div", &V_divcb, 1, 1, "divcb", - div_inpcb_init, div_inpcb_fini, 0, IPI_HASHFIELDS_NONE); + div_inpcb_init, IPI_HASHFIELDS_NONE); } static void @@ -491,6 +483,14 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin, /* Send packet to input processing via netisr */ switch (ip->ip_v) { case IPVERSION: + /* + * Restore M_BCAST flag when destination address is + * broadcast. It is expected by ip_tryforward(). + */ + if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) + m->m_flags |= M_MCAST; + else if (in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) + m->m_flags |= M_BCAST; netisr_queue_src(NETISR_IP, (uintptr_t)so, m); break; #ifdef INET6 diff --git a/freebsd/sys/netinet/ip_icmp.c b/freebsd/sys/netinet/ip_icmp.c index 1c32b1b8..acc2e6b6 100644 --- a/freebsd/sys/netinet/ip_icmp.c +++ b/freebsd/sys/netinet/ip_icmp.c @@ -187,10 +187,10 @@ kmod_icmpstat_inc(int statnum) void icmp_error(struct mbuf *n, int type, int code, uint32_t dest, int mtu) { - register struct ip *oip = mtod(n, struct ip *), *nip; - register unsigned oiphlen = oip->ip_hl << 2; - register struct icmp *icp; - register struct mbuf *m; + struct ip *oip = mtod(n, struct ip *), *nip; + unsigned oiphlen = oip->ip_hl << 2; + struct icmp *icp; + struct mbuf *m; unsigned icmplen, icmpelen, nlen; KASSERT((u_int)type <= ICMP_MAXTYPE, ("%s: illegal ICMP type", __func__)); @@ -542,11 +542,10 @@ icmp_input(struct mbuf **mp, int *offp, int proto) ICMPSTAT_INC(icps_bmcastecho); break; } - icp->icmp_type = ICMP_ECHOREPLY; if (badport_bandlim(BANDLIM_ICMP_ECHO) < 0) goto freeit; - else - goto reflect; + icp->icmp_type = ICMP_ECHOREPLY; + goto reflect; case ICMP_TSTAMP: if (V_icmptstamprepl == 0) @@ -560,13 +559,12 @@ icmp_input(struct mbuf **mp, int *offp, int proto) ICMPSTAT_INC(icps_badlen); break; } + if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0) + goto freeit; icp->icmp_type = ICMP_TSTAMPREPLY; icp->icmp_rtime = iptime(); icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */ - if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0) - goto freeit; - else - goto reflect; + goto reflect; case ICMP_MASKREQ: if (V_icmpmaskrepl == 0) @@ -816,7 +814,7 @@ match: ip->ip_ttl = V_ip_defttl; if (optlen > 0) { - register u_char *cp; + u_char *cp; int opt, cnt; u_int len; @@ -891,9 +889,9 @@ done: static void icmp_send(struct mbuf *m, struct mbuf *opts) { - register struct ip *ip = mtod(m, struct ip *); - register int hlen; - register struct icmp *icp; + struct ip *ip = mtod(m, struct ip *); + int hlen; + struct icmp *icp; hlen = ip->ip_hl << 2; m->m_data += hlen; diff --git a/freebsd/sys/netinet/ip_input.c b/freebsd/sys/netinet/ip_input.c index a9126d4b..437c281a 100644 --- a/freebsd/sys/netinet/ip_input.c +++ b/freebsd/sys/netinet/ip_input.c @@ -268,9 +268,9 @@ sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS) return (EINVAL); return (netisr_setqlimit(&ip_direct_nh, qlimit)); } -SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_direct_queue_maxlen, - CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_direct_queue_maxlen, "I", - "Maximum size of the IP direct input queue"); +SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQMAXLEN, intr_direct_queue_maxlen, + CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_direct_queue_maxlen, + "I", "Maximum size of the IP direct input queue"); static int sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS) @@ -289,7 +289,7 @@ sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS) return (0); } -SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_direct_queue_drops, +SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQDROPS, intr_direct_queue_drops, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_direct_queue_drops, "I", "Number of packets dropped from the IP direct input queue"); #endif /* RSS */ diff --git a/freebsd/sys/netinet/libalias/alias.c b/freebsd/sys/netinet/libalias/alias.c index cd3b5e05..35343c5f 100644 --- a/freebsd/sys/netinet/libalias/alias.c +++ b/freebsd/sys/netinet/libalias/alias.c @@ -701,12 +701,14 @@ ProtoAliasOut(struct libalias *la, struct in_addr *ip_src, struct alias_link *lnk; LIBALIAS_LOCK_ASSERT(la); - (void)create; /* Return if proxy-only mode is enabled */ if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY) return (PKT_ALIAS_OK); + if (!create) + return (PKT_ALIAS_IGNORED); + lnk = FindProtoOut(la, *ip_src, ip_dst, ip_p); if (lnk != NULL) { struct in_addr alias_address; diff --git a/freebsd/sys/netinet/raw_ip.c b/freebsd/sys/netinet/raw_ip.c index d67df1ca..689a2bc4 100644 --- a/freebsd/sys/netinet/raw_ip.c +++ b/freebsd/sys/netinet/raw_ip.c @@ -212,7 +212,7 @@ rip_init(void) { in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE, - 1, "ripcb", rip_inpcb_init, NULL, 0, IPI_HASHFIELDS_NONE); + 1, "ripcb", rip_inpcb_init, IPI_HASHFIELDS_NONE); EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL, EVENTHANDLER_PRI_ANY); } diff --git a/freebsd/sys/netinet/sctp_input.c b/freebsd/sys/netinet/sctp_input.c index d363642a..be01c38a 100644 --- a/freebsd/sys/netinet/sctp_input.c +++ b/freebsd/sys/netinet/sctp_input.c @@ -163,13 +163,11 @@ sctp_handle_init(struct mbuf *m, int iphlen, int offset, *abort_no_unlock = 1; goto outnow; } - /* We are only accepting if we have a socket with positive - * so_qlimit. */ + /* We are only accepting if we have a listening socket. */ if ((stcb == NULL) && ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) || (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) || - (inp->sctp_socket == NULL) || - (inp->sctp_socket->so_qlimit == 0))) { + (!SCTP_IS_LISTENING(inp)))) { /* * FIX ME ?? What about TCP model and we have a * match/restart case? Actually no fix is needed. the lookup @@ -1607,8 +1605,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, sctp_stop_all_cookie_timers(stcb); if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) && - (inp->sctp_socket->so_qlimit == 0) - ) { + (!SCTP_IS_LISTENING(inp))) { #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif @@ -1808,7 +1805,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) && - (inp->sctp_socket->so_qlimit == 0)) { + (!SCTP_IS_LISTENING(inp))) { #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif @@ -2319,7 +2316,7 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, *notification = SCTP_NOTIFY_ASSOC_UP; if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) && - (inp->sctp_socket->so_qlimit == 0)) { + (!SCTP_IS_LISTENING(inp))) { /* * This is an endpoint that called connect() how it got a * cookie that is NEW is a bit of a mystery. It must be that @@ -2345,7 +2342,7 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, SCTP_SOCKET_UNLOCK(so, 1); #endif } else if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) && - (inp->sctp_socket->so_qlimit)) { + (SCTP_IS_LISTENING(inp))) { /* * We don't want to do anything with this one. Since it is * the listening guy. The timer will get started for @@ -5207,7 +5204,9 @@ process_control_chunks: * longer listening. */ - if ((stcb == NULL) && (inp->sctp_socket->so_qlen >= inp->sctp_socket->so_qlimit)) { + if ((stcb == NULL) && + (!SCTP_IS_LISTENING(inp) || + inp->sctp_socket->so_qlen >= inp->sctp_socket->so_qlimit)) { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) && (SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit))) { op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, ""); diff --git a/freebsd/sys/netinet/sctp_os_bsd.h b/freebsd/sys/netinet/sctp_os_bsd.h index 2da90b69..f2bea00e 100644 --- a/freebsd/sys/netinet/sctp_os_bsd.h +++ b/freebsd/sys/netinet/sctp_os_bsd.h @@ -462,8 +462,6 @@ sctp_get_mbuf_for_msg(unsigned int space_needed, #define SCTP_SHA256_UPDATE SHA256_Update #define SCTP_SHA256_FINAL(x,y) SHA256_Final((caddr_t)x, y) -#endif - #define SCTP_DECREMENT_AND_CHECK_REFCOUNT(addr) (atomic_fetchadd_int(addr, -1) == 1) #if defined(INVARIANTS) #define SCTP_SAVE_ATOMIC_DECREMENT(addr, val) \ @@ -484,3 +482,7 @@ sctp_get_mbuf_for_msg(unsigned int space_needed, } \ } #endif + +#define SCTP_IS_LISTENING(inp) ((inp->sctp_flags & SCTP_PCB_FLAGS_ACCEPTING) != 0) + +#endif diff --git a/freebsd/sys/netinet/sctp_output.c b/freebsd/sys/netinet/sctp_output.c index 2e6eedaf..221d0570 100644 --- a/freebsd/sys/netinet/sctp_output.c +++ b/freebsd/sys/netinet/sctp_output.c @@ -11149,7 +11149,7 @@ sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst, ip->ip_v = IPVERSION; ip->ip_hl = (sizeof(struct ip) >> 2); ip->ip_tos = 0; - ip->ip_off = 0; + ip->ip_off = htons(IP_DF); ip_fillid(ip); ip->ip_ttl = MODULE_GLOBAL(ip_defttl); if (port) { @@ -12597,7 +12597,7 @@ sctp_lower_sosend(struct socket *so, (void *)addr, sndlen); if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) && - (inp->sctp_socket->so_qlimit)) { + SCTP_IS_LISTENING(inp)) { /* The listener can NOT send */ SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOTCONN); error = ENOTCONN; diff --git a/freebsd/sys/netinet/sctp_pcb.c b/freebsd/sys/netinet/sctp_pcb.c index 3608fd5e..e32e63f4 100644 --- a/freebsd/sys/netinet/sctp_pcb.c +++ b/freebsd/sys/netinet/sctp_pcb.c @@ -1313,7 +1313,7 @@ sctp_findassociation_ep_addr(struct sctp_inpcb **inp_p, struct sockaddr *remote, * it is the acceptor, then do the special_lookup to hash * and find the real inp. */ - if ((inp->sctp_socket) && (inp->sctp_socket->so_qlimit)) { + if ((inp->sctp_socket) && SCTP_IS_LISTENING(inp)) { /* to is peer addr, from is my addr */ stcb = sctp_tcb_special_locate(inp_p, remote, local, netp, inp->def_vrf_id); @@ -1886,7 +1886,7 @@ sctp_swap_inpcb_for_listen(struct sctp_inpcb *inp) if (tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { continue; } - if (tinp->sctp_socket->so_qlimit) { + if (SCTP_IS_LISTENING(tinp)) { continue; } SCTP_INP_WLOCK(tinp); @@ -3937,6 +3937,7 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, stcb->asoc.vrf_id, stcb->sctp_ep->fibnum); + net->src_addr_selected = 0; if (SCTP_ROUTE_HAS_VALID_IFN(&net->ro)) { /* Get source address */ net->ro._s_addr = sctp_source_address_selection(stcb->sctp_ep, @@ -3946,18 +3947,18 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, 0, stcb->asoc.vrf_id); if (net->ro._s_addr != NULL) { + uint32_t imtu, rmtu, hcmtu; + net->src_addr_selected = 1; /* Now get the interface MTU */ if (net->ro._s_addr->ifn_p != NULL) { - net->mtu = SCTP_GATHER_MTU_FROM_INTFC(net->ro._s_addr->ifn_p); + imtu = SCTP_GATHER_MTU_FROM_INTFC(net->ro._s_addr->ifn_p); + } else { + imtu = 0; } - } else { - net->src_addr_selected = 0; - } - if (net->mtu > 0) { - uint32_t rmtu; - rmtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, net->ro.ro_rt); + hcmtu = sctp_hc_get_mtu(&net->ro._l_addr, stcb->sctp_ep->fibnum); + net->mtu = sctp_min_mtu(hcmtu, rmtu, imtu); if (rmtu == 0) { /* * Start things off to match mtu of @@ -3965,17 +3966,8 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, */ SCTP_SET_MTU_OF_ROUTE(&net->ro._l_addr.sa, net->ro.ro_rt, net->mtu); - } else { - /* - * we take the route mtu over the interface, - * since the route may be leading out the - * loopback, or a different interface. - */ - net->mtu = rmtu; } } - } else { - net->src_addr_selected = 0; } if (net->mtu == 0) { switch (newaddr->sa_family) { diff --git a/freebsd/sys/netinet/sctp_sysctl.c b/freebsd/sys/netinet/sctp_sysctl.c index ea3b3d9c..db150112 100644 --- a/freebsd/sys/netinet/sctp_sysctl.c +++ b/freebsd/sys/netinet/sctp_sysctl.c @@ -412,6 +412,7 @@ sctp_sysctl_handle_assoclist(SYSCTL_HANDLER_ARGS) xinpcb.socket = inp->sctp_socket; so = inp->sctp_socket; if ((so == NULL) || + (!SCTP_IS_LISTENING(inp)) || (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) { xinpcb.qlen = 0; xinpcb.maxqlen = 0; diff --git a/freebsd/sys/netinet/sctp_timer.c b/freebsd/sys/netinet/sctp_timer.c index 6ce9fc30..ecadca5b 100644 --- a/freebsd/sys/netinet/sctp_timer.c +++ b/freebsd/sys/netinet/sctp_timer.c @@ -669,6 +669,7 @@ start_again: stcb->asoc.peers_rwnd += SCTP_BASE_SYSCTL(sctp_peer_chunk_oh); } chk->sent = SCTP_DATAGRAM_RESEND; + chk->flags |= CHUNK_FLAGS_FRAGMENT_OK; SCTP_STAT_INCR(sctps_markedretrans); /* reset the TSN for striking and other FR stuff */ @@ -742,6 +743,7 @@ start_again: chk->whoTo = alt; if (chk->sent != SCTP_DATAGRAM_RESEND) { chk->sent = SCTP_DATAGRAM_RESEND; + chk->flags |= CHUNK_FLAGS_FRAGMENT_OK; sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt); cnt_mk++; } @@ -1086,6 +1088,7 @@ sctp_cookie_timer(struct sctp_inpcb *inp, sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt); } cookie->sent = SCTP_DATAGRAM_RESEND; + cookie->flags |= CHUNK_FLAGS_FRAGMENT_OK; /* * Now call the output routine to kick out the cookie again, Note we * don't mark any chunks for retran so that FR will need to kick in @@ -1132,6 +1135,7 @@ sctp_strreset_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb, sctp_free_remote_addr(chk->whoTo); if (chk->sent != SCTP_DATAGRAM_RESEND) { chk->sent = SCTP_DATAGRAM_RESEND; + chk->flags |= CHUNK_FLAGS_FRAGMENT_OK; sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt); } chk->whoTo = alt; @@ -1149,6 +1153,7 @@ sctp_strreset_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb, if (strrst->sent != SCTP_DATAGRAM_RESEND) sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt); strrst->sent = SCTP_DATAGRAM_RESEND; + strrst->flags |= CHUNK_FLAGS_FRAGMENT_OK; /* restart the timer */ sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, inp, stcb, strrst->whoTo); @@ -1213,6 +1218,7 @@ sctp_asconf_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb, chk->whoTo = alt; if (chk->sent != SCTP_DATAGRAM_RESEND) { chk->sent = SCTP_DATAGRAM_RESEND; + chk->flags |= CHUNK_FLAGS_FRAGMENT_OK; sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt); } atomic_add_int(&alt->ref_count, 1); @@ -1227,6 +1233,7 @@ sctp_asconf_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb, if (asconf->sent != SCTP_DATAGRAM_RESEND && chk->sent != SCTP_DATAGRAM_UNSENT) sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt); chk->sent = SCTP_DATAGRAM_RESEND; + chk->flags |= CHUNK_FLAGS_FRAGMENT_OK; } if (!(net->dest_state & SCTP_ADDR_REACHABLE)) { /* @@ -1239,6 +1246,7 @@ sctp_asconf_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb, if (asconf->sent != SCTP_DATAGRAM_RESEND) sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt); asconf->sent = SCTP_DATAGRAM_RESEND; + asconf->flags |= CHUNK_FLAGS_FRAGMENT_OK; /* send another ASCONF if any and we can do */ sctp_send_asconf(stcb, alt, SCTP_ADDR_NOT_LOCKED); diff --git a/freebsd/sys/netinet/sctp_usrreq.c b/freebsd/sys/netinet/sctp_usrreq.c index 550926f3..b65f74d1 100644 --- a/freebsd/sys/netinet/sctp_usrreq.c +++ b/freebsd/sys/netinet/sctp_usrreq.c @@ -154,7 +154,7 @@ sctp_notify(struct sctp_inpcb *inp, uint8_t icmp_type, uint8_t icmp_code, uint16_t ip_len, - uint16_t next_mtu) + uint32_t next_mtu) { #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; @@ -222,10 +222,15 @@ sctp_notify(struct sctp_inpcb *inp, timer_stopped = 0; } /* Update the path MTU. */ + if (net->port) { + next_mtu -= sizeof(struct udphdr); + } if (net->mtu > next_mtu) { net->mtu = next_mtu; if (net->port) { - net->mtu -= sizeof(struct udphdr); + sctp_hc_set_mtu(&net->ro._l_addr, inp->fibnum, next_mtu + sizeof(struct udphdr)); + } else { + sctp_hc_set_mtu(&net->ro._l_addr, inp->fibnum, next_mtu); } } /* Update the association MTU */ @@ -330,7 +335,7 @@ sctp_ctlinput(int cmd, struct sockaddr *sa, void *vip) icmp->icmp_type, icmp->icmp_code, ntohs(inner_ip->ip_len), - ntohs(icmp->icmp_nextmtu)); + (uint32_t)ntohs(icmp->icmp_nextmtu)); } else { if ((stcb == NULL) && (inp != NULL)) { /* reduce ref-count */ @@ -7036,7 +7041,7 @@ sctp_listen(struct socket *so, int backlog, struct thread *p) if (tinp && (tinp != inp) && ((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) == 0) && ((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) && - (tinp->sctp_socket->so_qlimit)) { + (SCTP_IS_LISTENING(tinp))) { /* * we have a listener already and * its not this inp. @@ -7080,7 +7085,7 @@ sctp_listen(struct socket *so, int backlog, struct thread *p) if (tinp && (tinp != inp) && ((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) == 0) && ((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) && - (tinp->sctp_socket->so_qlimit)) { + (SCTP_IS_LISTENING(tinp))) { /* * we have a listener already and its not * this inp. @@ -7134,6 +7139,7 @@ sctp_listen(struct socket *so, int backlog, struct thread *p) return (error); } } + SCTP_INP_WLOCK(inp); SOCK_LOCK(so); /* It appears for 7.0 and on, we must always call this. */ solisten_proto(so, backlog); @@ -7141,11 +7147,13 @@ sctp_listen(struct socket *so, int backlog, struct thread *p) /* remove the ACCEPTCONN flag for one-to-many sockets */ so->so_options &= ~SO_ACCEPTCONN; } - if (backlog == 0) { - /* turning off listen */ - so->so_options &= ~SO_ACCEPTCONN; + if (backlog > 0) { + inp->sctp_flags |= SCTP_PCB_FLAGS_ACCEPTING; + } else { + inp->sctp_flags &= ~SCTP_PCB_FLAGS_ACCEPTING; } SOCK_UNLOCK(so); + SCTP_INP_WUNLOCK(inp); return (error); } diff --git a/freebsd/sys/netinet/sctp_var.h b/freebsd/sys/netinet/sctp_var.h index 6365dfec..9e149e68 100644 --- a/freebsd/sys/netinet/sctp_var.h +++ b/freebsd/sys/netinet/sctp_var.h @@ -341,7 +341,7 @@ void sctp_drain(void); void sctp_init(void); void sctp_notify(struct sctp_inpcb *, struct sctp_tcb *, struct sctp_nets *, - uint8_t, uint8_t, uint16_t, uint16_t); + uint8_t, uint8_t, uint16_t, uint32_t); int sctp_flush(struct socket *, int); int sctp_shutdown(struct socket *); int diff --git a/freebsd/sys/netinet/sctputil.c b/freebsd/sys/netinet/sctputil.c index 79bb0620..4c6ba598 100644 --- a/freebsd/sys/netinet/sctputil.c +++ b/freebsd/sys/netinet/sctputil.c @@ -51,6 +51,9 @@ __FBSDID("$FreeBSD$"); #include #include #include +#if defined(INET6) || defined(INET) +#include +#endif #include #include #include @@ -6973,7 +6976,7 @@ sctp_recv_icmp_tunneled_packet(int cmd, struct sockaddr *sa, void *vip, void *ct } sctp_notify(inp, stcb, net, type, code, ntohs(inner_ip->ip_len), - ntohs(icmp->icmp_nextmtu)); + (uint32_t)ntohs(icmp->icmp_nextmtu)); } else { if ((stcb == NULL) && (inp != NULL)) { /* reduce ref-count */ @@ -7115,7 +7118,7 @@ sctp_recv_icmp6_tunneled_packet(int cmd, struct sockaddr *sa, void *d, void *ctx code = ICMP6_PARAMPROB_NEXTHEADER; } sctp6_notify(inp, stcb, net, type, code, - (uint16_t)ntohl(ip6cp->ip6c_icmp6->icmp6_mtu)); + ntohl(ip6cp->ip6c_icmp6->icmp6_mtu)); } else { if ((stcb == NULL) && (inp != NULL)) { /* reduce inp's ref-count */ @@ -7237,3 +7240,90 @@ sctp_over_udp_start(void) #endif return (0); } + +#if defined(INET6) || defined(INET) + +/* + * sctp_min_mtu ()returns the minimum of all non-zero arguments. + * If all arguments are zero, zero is returned. + */ +uint32_t +sctp_min_mtu(uint32_t mtu1, uint32_t mtu2, uint32_t mtu3) +{ + if (mtu1 > 0) { + if (mtu2 > 0) { + if (mtu3 > 0) { + return (min(mtu1, min(mtu2, mtu3))); + } else { + return (min(mtu1, mtu2)); + } + } else { + if (mtu3 > 0) { + return (min(mtu1, mtu3)); + } else { + return (mtu1); + } + } + } else { + if (mtu2 > 0) { + if (mtu3 > 0) { + return (min(mtu2, mtu3)); + } else { + return (mtu2); + } + } else { + return (mtu3); + } + } +} + +void +sctp_hc_set_mtu(union sctp_sockstore *addr, uint16_t fibnum, uint32_t mtu) +{ + struct in_conninfo inc; + + memset(&inc, 0, sizeof(struct in_conninfo)); + inc.inc_fibnum = fibnum; + switch (addr->sa.sa_family) { +#ifdef INET + case AF_INET: + inc.inc_faddr = addr->sin.sin_addr; + break; +#endif +#ifdef INET6 + case AF_INET6: + inc.inc_flags |= INC_ISIPV6; + inc.inc6_faddr = addr->sin6.sin6_addr; + break; +#endif + default: + return; + } + tcp_hc_updatemtu(&inc, (u_long)mtu); +} + +uint32_t +sctp_hc_get_mtu(union sctp_sockstore *addr, uint16_t fibnum) +{ + struct in_conninfo inc; + + memset(&inc, 0, sizeof(struct in_conninfo)); + inc.inc_fibnum = fibnum; + switch (addr->sa.sa_family) { +#ifdef INET + case AF_INET: + inc.inc_faddr = addr->sin.sin_addr; + break; +#endif +#ifdef INET6 + case AF_INET6: + inc.inc_flags |= INC_ISIPV6; + inc.inc6_faddr = addr->sin6.sin6_addr; + break; +#endif + default: + return (0); + } + return ((uint32_t)tcp_hc_getmtu(&inc)); +} +#endif diff --git a/freebsd/sys/netinet/sctputil.h b/freebsd/sys/netinet/sctputil.h index dd45e49a..50118b7a 100644 --- a/freebsd/sys/netinet/sctputil.h +++ b/freebsd/sys/netinet/sctputil.h @@ -387,6 +387,11 @@ sctp_auditing(int, struct sctp_inpcb *, struct sctp_tcb *, struct sctp_nets *); void sctp_audit_log(uint8_t, uint8_t); +#endif +#if defined(INET6) || defined(INET) +uint32_t sctp_min_mtu(uint32_t, uint32_t, uint32_t); +void sctp_hc_set_mtu(union sctp_sockstore *, uint16_t, uint32_t); +uint32_t sctp_hc_get_mtu(union sctp_sockstore *, uint16_t); #endif #endif /* _KERNEL */ #endif diff --git a/freebsd/sys/netinet/tcp_input.c b/freebsd/sys/netinet/tcp_input.c index 89f2bf0c..d7091928 100644 --- a/freebsd/sys/netinet/tcp_input.c +++ b/freebsd/sys/netinet/tcp_input.c @@ -1488,6 +1488,68 @@ drop: return (IPPROTO_DONE); } +/* + * Automatic sizing of receive socket buffer. Often the send + * buffer size is not optimally adjusted to the actual network + * conditions at hand (delay bandwidth product). Setting the + * buffer size too small limits throughput on links with high + * bandwidth and high delay (eg. trans-continental/oceanic links). + * + * On the receive side the socket buffer memory is only rarely + * used to any significant extent. This allows us to be much + * more aggressive in scaling the receive socket buffer. For + * the case that the buffer space is actually used to a large + * extent and we run out of kernel memory we can simply drop + * the new segments; TCP on the sender will just retransmit it + * later. Setting the buffer size too big may only consume too + * much kernel memory if the application doesn't read() from + * the socket or packet loss or reordering makes use of the + * reassembly queue. + * + * The criteria to step up the receive buffer one notch are: + * 1. Application has not set receive buffer size with + * SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE. + * 2. the number of bytes received during the time it takes + * one timestamp to be reflected back to us (the RTT); + * 3. received bytes per RTT is within seven eighth of the + * current socket buffer size; + * 4. receive buffer size has not hit maximal automatic size; + * + * This algorithm does one step per RTT at most and only if + * we receive a bulk stream w/o packet losses or reorderings. + * Shrinking the buffer during idle times is not necessary as + * it doesn't consume any memory when idle. + * + * TODO: Only step up if the application is actually serving + * the buffer to better manage the socket buffer resources. + */ +int +tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so, + struct tcpcb *tp, int tlen) +{ + int newsize = 0; + + if (V_tcp_do_autorcvbuf && (so->so_rcv.sb_flags & SB_AUTOSIZE) && + tp->t_srtt != 0 && tp->rfbuf_ts != 0 && + TCP_TS_TO_TICKS(tcp_ts_getticks() - tp->rfbuf_ts) > + (tp->t_srtt >> TCP_RTT_SHIFT)) { + if (tp->rfbuf_cnt > (so->so_rcv.sb_hiwat / 8 * 7) && + so->so_rcv.sb_hiwat < V_tcp_autorcvbuf_max) { + newsize = min(so->so_rcv.sb_hiwat + + V_tcp_autorcvbuf_inc, V_tcp_autorcvbuf_max); + } + TCP_PROBE6(receive__autoresize, NULL, tp, m, tp, th, newsize); + + /* Start over with next RTT. */ + tp->rfbuf_ts = 0; + tp->rfbuf_cnt = 0; + } else { + tp->rfbuf_cnt += tlen; /* add up */ + } + + return (newsize); +} + void tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, @@ -1553,6 +1615,26 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, tcp_pcap_add(th, m, &(tp->t_inpkts)); #endif + if ((thflags & TH_SYN) && (thflags & TH_FIN) && V_drop_synfin) { + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { + log(LOG_DEBUG, "%s; %s: " + "SYN|FIN segment ignored (based on " + "sysctl setting)\n", s, __func__); + free(s, M_TCPLOG); + } + goto drop; + } + + /* + * If a segment with the ACK-bit set arrives in the SYN-SENT state + * check SEQ.ACK first. + */ + if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) && + (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { + rstreason = BANDLIM_UNLIMITED; + goto dropwithreset; + } + /* * Segment received on connection. * Reset idle time and keep-alive timer. @@ -1851,62 +1933,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, #endif TCP_PROBE3(debug__input, tp, th, m); - /* - * Automatic sizing of receive socket buffer. Often the send - * buffer size is not optimally adjusted to the actual network - * conditions at hand (delay bandwidth product). Setting the - * buffer size too small limits throughput on links with high - * bandwidth and high delay (eg. trans-continental/oceanic links). - * - * On the receive side the socket buffer memory is only rarely - * used to any significant extent. This allows us to be much - * more aggressive in scaling the receive socket buffer. For - * the case that the buffer space is actually used to a large - * extent and we run out of kernel memory we can simply drop - * the new segments; TCP on the sender will just retransmit it - * later. Setting the buffer size too big may only consume too - * much kernel memory if the application doesn't read() from - * the socket or packet loss or reordering makes use of the - * reassembly queue. - * - * The criteria to step up the receive buffer one notch are: - * 1. Application has not set receive buffer size with - * SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE. - * 2. the number of bytes received during the time it takes - * one timestamp to be reflected back to us (the RTT); - * 3. received bytes per RTT is within seven eighth of the - * current socket buffer size; - * 4. receive buffer size has not hit maximal automatic size; - * - * This algorithm does one step per RTT at most and only if - * we receive a bulk stream w/o packet losses or reorderings. - * Shrinking the buffer during idle times is not necessary as - * it doesn't consume any memory when idle. - * - * TODO: Only step up if the application is actually serving - * the buffer to better manage the socket buffer resources. - */ - if (V_tcp_do_autorcvbuf && - (to.to_flags & TOF_TS) && - to.to_tsecr && - (so->so_rcv.sb_flags & SB_AUTOSIZE)) { - if (TSTMP_GT(to.to_tsecr, tp->rfbuf_ts) && - to.to_tsecr - tp->rfbuf_ts < hz) { - if (tp->rfbuf_cnt > - (so->so_rcv.sb_hiwat / 8 * 7) && - so->so_rcv.sb_hiwat < - V_tcp_autorcvbuf_max) { - newsize = - min(so->so_rcv.sb_hiwat + - V_tcp_autorcvbuf_inc, - V_tcp_autorcvbuf_max); - } - /* Start over with next RTT. */ - tp->rfbuf_ts = 0; - tp->rfbuf_cnt = 0; - } else - tp->rfbuf_cnt += tlen; /* add up */ - } + newsize = tcp_autorcvbuf(m, th, so, tp, tlen); /* Add data to socket buffer. */ SOCKBUF_LOCK(&so->so_rcv); @@ -1947,10 +1974,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, win = 0; tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt)); - /* Reset receive buffer auto scaling when not in bulk receive mode. */ - tp->rfbuf_ts = 0; - tp->rfbuf_cnt = 0; - switch (tp->t_state) { /* @@ -1990,7 +2013,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, /* * If the state is SYN_SENT: - * if seg contains an ACK, but not for our SYN, drop the input. * if seg contains a RST, then drop the connection. * if seg does not contain SYN, then drop it. * Otherwise this is an acceptable SYN segment @@ -2003,12 +2025,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, * continue processing rest of data/controls, beginning with URG */ case TCPS_SYN_SENT: - if ((thflags & TH_ACK) && - (SEQ_LEQ(th->th_ack, tp->iss) || - SEQ_GT(th->th_ack, tp->snd_max))) { - rstreason = BANDLIM_UNLIMITED; - goto dropwithreset; - } if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) { TCP_PROBE5(connect__refused, NULL, tp, m, tp, th); diff --git a/freebsd/sys/netinet/tcp_lro.c b/freebsd/sys/netinet/tcp_lro.c index 13866134..91d534f1 100644 --- a/freebsd/sys/netinet/tcp_lro.c +++ b/freebsd/sys/netinet/tcp_lro.c @@ -117,7 +117,6 @@ tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *ifp, lc->lro_bad_csum = 0; lc->lro_queued = 0; lc->lro_flushed = 0; - lc->lro_cnt = 0; lc->lro_mbuf_count = 0; lc->lro_mbuf_max = lro_mbufs; lc->lro_cnt = lro_entries; @@ -147,6 +146,7 @@ tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *ifp, /* check for out of memory */ if (lc->lro_mbuf_data == NULL) { + free(lc->lro_hash, M_LRO); memset(lc, 0, sizeof(*lc)); return (ENOMEM); } @@ -177,17 +177,15 @@ tcp_lro_free(struct lro_ctrl *lc) } /* free hash table */ - if (lc->lro_hash != NULL) { - free(lc->lro_hash, M_LRO); - lc->lro_hash = NULL; - } + free(lc->lro_hash, M_LRO); + lc->lro_hash = NULL; lc->lro_hashsz = 0; /* free mbuf array, if any */ for (x = 0; x != lc->lro_mbuf_count; x++) m_freem(lc->lro_mbuf_data[x].mb); lc->lro_mbuf_count = 0; - + /* free allocated memory, if any */ free(lc->lro_mbuf_data, M_LRO); lc->lro_mbuf_data = NULL; @@ -957,18 +955,12 @@ tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb) /* check if packet is not LRO capable */ if (__predict_false(mb->m_pkthdr.csum_flags == 0 || (lc->ifp->if_capenable & IFCAP_LRO) == 0)) { - lc->lro_flushed++; - lc->lro_queued++; /* input packet to network layer */ (*lc->ifp->if_input) (lc->ifp, mb); return; } - /* check if array is full */ - if (__predict_false(lc->lro_mbuf_count == lc->lro_mbuf_max)) - tcp_lro_flush_all(lc); - /* create sequence number */ lc->lro_mbuf_data[lc->lro_mbuf_count].seq = (((uint64_t)M_HASHTYPE_GET(mb)) << 56) | @@ -976,7 +968,11 @@ tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb) ((uint64_t)lc->lro_mbuf_count); /* enter mbuf */ - lc->lro_mbuf_data[lc->lro_mbuf_count++].mb = mb; + lc->lro_mbuf_data[lc->lro_mbuf_count].mb = mb; + + /* flush if array is full */ + if (__predict_false(++lc->lro_mbuf_count == lc->lro_mbuf_max)) + tcp_lro_flush_all(lc); } /* end */ diff --git a/freebsd/sys/netinet/tcp_output.c b/freebsd/sys/netinet/tcp_output.c index 53eccf11..d2606fb6 100644 --- a/freebsd/sys/netinet/tcp_output.c +++ b/freebsd/sys/netinet/tcp_output.c @@ -833,11 +833,13 @@ send: to.to_tsval = tcp_ts_getticks() + tp->ts_offset; to.to_tsecr = tp->ts_recent; to.to_flags |= TOF_TS; - /* Set receive buffer autosizing timestamp. */ - if (tp->rfbuf_ts == 0 && - (so->so_rcv.sb_flags & SB_AUTOSIZE)) - tp->rfbuf_ts = tcp_ts_getticks(); } + + /* Set receive buffer autosizing timestamp. */ + if (tp->rfbuf_ts == 0 && + (so->so_rcv.sb_flags & SB_AUTOSIZE)) + tp->rfbuf_ts = tcp_ts_getticks(); + /* Selective ACK's. */ if (tp->t_flags & TF_SACK_PERMIT) { if (flags & TH_SYN) diff --git a/freebsd/sys/netinet/tcp_reass.c b/freebsd/sys/netinet/tcp_reass.c index 779de5e0..4f944cab 100644 --- a/freebsd/sys/netinet/tcp_reass.c +++ b/freebsd/sys/netinet/tcp_reass.c @@ -110,7 +110,7 @@ tcp_reass_global_init(void) TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments", &tcp_reass_maxseg); tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent), - NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); /* Set the zone limit and read back the effective value. */ tcp_reass_maxseg = uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg); diff --git a/freebsd/sys/netinet/tcp_subr.c b/freebsd/sys/netinet/tcp_subr.c index 48f4cfda..30464e1b 100644 --- a/freebsd/sys/netinet/tcp_subr.c +++ b/freebsd/sys/netinet/tcp_subr.c @@ -653,7 +653,7 @@ tcp_init(void) hashsize); } in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize, - "tcp_inpcb", tcp_inpcb_init, NULL, 0, IPI_HASHFIELDS_4TUPLE); + "tcp_inpcb", tcp_inpcb_init, IPI_HASHFIELDS_4TUPLE); /* * These have to be type stable for the benefit of the timers. diff --git a/freebsd/sys/netinet/tcp_syncache.c b/freebsd/sys/netinet/tcp_syncache.c index 84b9d271..13170ae9 100644 --- a/freebsd/sys/netinet/tcp_syncache.c +++ b/freebsd/sys/netinet/tcp_syncache.c @@ -262,6 +262,8 @@ syncache_init(void) &V_tcp_syncache.hashbase[i].sch_mtx, 0); V_tcp_syncache.hashbase[i].sch_length = 0; V_tcp_syncache.hashbase[i].sch_sc = &V_tcp_syncache; + V_tcp_syncache.hashbase[i].sch_last_overflow = + -(SYNCOOKIE_LIFETIME + 1); } /* Create the syncache entry zone. */ @@ -337,6 +339,7 @@ syncache_insert(struct syncache *sc, struct syncache_head *sch) KASSERT(!TAILQ_EMPTY(&sch->sch_bucket), ("sch->sch_length incorrect")); sc2 = TAILQ_LAST(&sch->sch_bucket, sch_head); + sch->sch_last_overflow = time_uptime; syncache_drop(sc2, sch); TCPSTAT_INC(tcps_sc_bucketoverflow); } @@ -984,10 +987,13 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, /* * There is no syncache entry, so see if this ACK is * a returning syncookie. To do this, first: - * A. See if this socket has had a syncache entry dropped in - * the past. We don't want to accept a bogus syncookie - * if we've never received a SYN. - * B. check that the syncookie is valid. If it is, then + * A. Check if syncookies are used in case of syncache + * overflows + * B. See if this socket has had a syncache entry dropped in + * the recent past. We don't want to accept a bogus + * syncookie if we've never received a SYN or accept it + * twice. + * C. check that the syncookie is valid. If it is, then * cobble up a fake syncache entry, and return. */ if (!V_tcp_syncookies) { @@ -998,6 +1004,15 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, s, __func__); goto failed; } + if (!V_tcp_syncookiesonly && + sch->sch_last_overflow < time_uptime - SYNCOOKIE_LIFETIME) { + SCH_UNLOCK(sch); + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Spurious ACK, " + "segment rejected (no syncache entry)\n", + s, __func__); + goto failed; + } bzero(&scs, sizeof(scs)); sc = syncookie_lookup(inc, sch, &scs, th, to, *lsop); SCH_UNLOCK(sch); @@ -1421,8 +1436,10 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, * entry and insert the new one. */ TCPSTAT_INC(tcps_sc_zonefail); - if ((sc = TAILQ_LAST(&sch->sch_bucket, sch_head)) != NULL) + if ((sc = TAILQ_LAST(&sch->sch_bucket, sch_head)) != NULL) { + sch->sch_last_overflow = time_uptime; syncache_drop(sc, sch); + } sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO); if (sc == NULL) { if (V_tcp_syncookies) { diff --git a/freebsd/sys/netinet/tcp_syncache.h b/freebsd/sys/netinet/tcp_syncache.h index 2c8c5b00..ebf9fb84 100644 --- a/freebsd/sys/netinet/tcp_syncache.h +++ b/freebsd/sys/netinet/tcp_syncache.h @@ -99,6 +99,7 @@ struct syncache_head { int sch_nextc; u_int sch_length; struct tcp_syncache *sch_sc; + time_t sch_last_overflow; }; #define SYNCOOKIE_SECRET_SIZE 16 diff --git a/freebsd/sys/netinet/tcp_usrreq.c b/freebsd/sys/netinet/tcp_usrreq.c index 05fed2d5..198291f2 100644 --- a/freebsd/sys/netinet/tcp_usrreq.c +++ b/freebsd/sys/netinet/tcp_usrreq.c @@ -599,6 +599,10 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) error = EINVAL; goto out; } + if ((inp->inp_vflag & INP_IPV4) == 0) { + error = EAFNOSUPPORT; + goto out; + } in6_sin6_2_sin(&sin, sin6p); inp->inp_vflag |= INP_IPV4; @@ -616,6 +620,11 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) #endif error = tp->t_fb->tfb_tcp_output(tp); goto out; + } else { + if ((inp->inp_vflag & INP_IPV6) == 0) { + error = EAFNOSUPPORT; + goto out; + } } #endif inp->inp_vflag &= ~INP_IPV4; diff --git a/freebsd/sys/netinet/tcp_var.h b/freebsd/sys/netinet/tcp_var.h index 5705e553..d298c9dd 100644 --- a/freebsd/sys/netinet/tcp_var.h +++ b/freebsd/sys/netinet/tcp_var.h @@ -778,6 +778,8 @@ void hhook_run_tcp_est_in(struct tcpcb *tp, #endif int tcp_input(struct mbuf **, int *, int); +int tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *, + struct tcpcb *, int); void tcp_do_segment(struct mbuf *, struct tcphdr *, struct socket *, struct tcpcb *, int, int, uint8_t, int); diff --git a/freebsd/sys/netinet/udp_usrreq.c b/freebsd/sys/netinet/udp_usrreq.c index c77439f7..af6b564f 100644 --- a/freebsd/sys/netinet/udp_usrreq.c +++ b/freebsd/sys/netinet/udp_usrreq.c @@ -129,12 +129,6 @@ SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(udp_blackhole), 0, "Do not send port unreachables for refused connects"); -static VNET_DEFINE(int, udp_require_l2_bcast) = 0; -#define V_udp_require_l2_bcast VNET(udp_require_l2_bcast) -SYSCTL_INT(_net_inet_udp, OID_AUTO, require_l2_bcast, CTLFLAG_VNET | CTLFLAG_RW, - &VNET_NAME(udp_require_l2_bcast), 0, - "Only treat packets sent to an L2 broadcast address as broadcast packets"); - u_long udp_sendspace = 9216; /* really max datagram size */ SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW, &udp_sendspace, 0, "Maximum outgoing UDP datagram size"); @@ -215,8 +209,7 @@ udp_init(void) * a 4-tuple, flip this to 4-tuple. */ in_pcbinfo_init(&V_udbinfo, "udp", &V_udb, UDBHASHSIZE, UDBHASHSIZE, - "udp_inpcb", udp_inpcb_init, NULL, 0, - IPI_HASHFIELDS_2TUPLE); + "udp_inpcb", udp_inpcb_init, IPI_HASHFIELDS_2TUPLE); V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); uma_zone_set_max(V_udpcb_zone, maxsockets); @@ -230,8 +223,8 @@ udplite_init(void) { in_pcbinfo_init(&V_ulitecbinfo, "udplite", &V_ulitecb, UDBHASHSIZE, - UDBHASHSIZE, "udplite_inpcb", udplite_inpcb_init, NULL, - 0, IPI_HASHFIELDS_2TUPLE); + UDBHASHSIZE, "udplite_inpcb", udplite_inpcb_init, + IPI_HASHFIELDS_2TUPLE); } /* @@ -535,8 +528,7 @@ udp_input(struct mbuf **mp, int *offp, int proto) pcbinfo = udp_get_inpcbinfo(proto); if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || - ((!V_udp_require_l2_bcast || m->m_flags & M_BCAST) && - in_broadcast(ip->ip_dst, ifp))) { + in_broadcast(ip->ip_dst, ifp)) { struct inpcb *last; struct inpcbhead *pcblist; struct ip_moptions *imo; -- cgit v1.2.3