diff options
Diffstat (limited to 'freebsd/sys/netinet/tcp_input.c')
-rw-r--r-- | freebsd/sys/netinet/tcp_input.c | 134 |
1 files changed, 94 insertions, 40 deletions
diff --git a/freebsd/sys/netinet/tcp_input.c b/freebsd/sys/netinet/tcp_input.c index d00504dc..4bf12298 100644 --- a/freebsd/sys/netinet/tcp_input.c +++ b/freebsd/sys/netinet/tcp_input.c @@ -214,11 +214,6 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_autorcvbuf), 0, "Enable automatic receive buffer sizing"); -VNET_DEFINE(int, tcp_autorcvbuf_inc) = 16*1024; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_inc, CTLFLAG_VNET | CTLFLAG_RW, - &VNET_NAME(tcp_autorcvbuf_inc), 0, - "Incrementor step size of automatic receive buffer"); - VNET_DEFINE(int, tcp_autorcvbuf_max) = 2*1024*1024; SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_autorcvbuf_max), 0, @@ -373,31 +368,14 @@ cc_conn_init(struct tcpcb *tp) /* * Set the initial slow-start flight size. * - * RFC5681 Section 3.1 specifies the default conservative values. - * RFC3390 specifies slightly more aggressive values. - * RFC6928 increases it to ten segments. - * Support for user specified value for initial flight size. - * * If a SYN or SYN/ACK was lost and retransmitted, we have to * reduce the initial CWND to one segment as congestion is likely * requiring us to be cautious. */ if (tp->snd_cwnd == 1) tp->snd_cwnd = maxseg; /* SYN(-ACK) lost */ - else if (V_tcp_initcwnd_segments) - tp->snd_cwnd = min(V_tcp_initcwnd_segments * maxseg, - max(2 * maxseg, V_tcp_initcwnd_segments * 1460)); - else if (V_tcp_do_rfc3390) - tp->snd_cwnd = min(4 * maxseg, max(2 * maxseg, 4380)); - else { - /* Per RFC5681 Section 3.1 */ - if (maxseg > 2190) - tp->snd_cwnd = 2 * maxseg; - else if (maxseg > 1095) - tp->snd_cwnd = 3 * maxseg; - else - tp->snd_cwnd = 4 * maxseg; - } + else + tp->snd_cwnd = tcp_compute_initwnd(maxseg); if (CC_ALGO(tp)->conn_init != NULL) CC_ALGO(tp)->conn_init(tp->ccv); @@ -578,6 +556,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto) int optlen = 0; #ifdef INET int len; + uint8_t ipttl; #endif int tlen = 0, off; int drop_hdrlen; @@ -700,6 +679,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto) * Checksum extended TCP header and data. */ len = off0 + tlen; + ipttl = ip->ip_ttl; bzero(ipov->ih_x1, sizeof(ipov->ih_x1)); ipov->ih_len = htons(tlen); th->th_sum = in_cksum(m, len); @@ -708,6 +688,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto) /* Reset TOS bits */ ip->ip_tos = iptos; /* Re-initialization for later version check */ + ip->ip_ttl = ipttl; ip->ip_v = IPVERSION; ip->ip_hl = off0 >> 2; } @@ -1468,13 +1449,16 @@ drop: * The criteria to step up the receive buffer one notch are: * 1. Application has not set receive buffer size with * SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE. - * 2. the number of bytes received during the time it takes - * one timestamp to be reflected back to us (the RTT); - * 3. received bytes per RTT is within seven eighth of the - * current socket buffer size; - * 4. receive buffer size has not hit maximal automatic size; + * 2. the number of bytes received during 1/2 of an sRTT + * is at least 3/8 of the current socket buffer size. + * 3. receive buffer size has not hit maximal automatic size; + * + * If all of the criteria are met we increaset the socket buffer + * by a 1/2 (bounded by the max). This allows us to keep ahead + * of slow-start but also makes it so our peer never gets limited + * by our rwnd which we then open up causing a burst. * - * This algorithm does one step per RTT at most and only if + * This algorithm does two steps per RTT at most and only if * we receive a bulk stream w/o packet losses or reorderings. * Shrinking the buffer during idle times is not necessary as * it doesn't consume any memory when idle. @@ -1491,11 +1475,10 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so, if (V_tcp_do_autorcvbuf && (so->so_rcv.sb_flags & SB_AUTOSIZE) && tp->t_srtt != 0 && tp->rfbuf_ts != 0 && TCP_TS_TO_TICKS(tcp_ts_getticks() - tp->rfbuf_ts) > - (tp->t_srtt >> TCP_RTT_SHIFT)) { - if (tp->rfbuf_cnt > (so->so_rcv.sb_hiwat / 8 * 7) && + ((tp->t_srtt >> TCP_RTT_SHIFT)/2)) { + if (tp->rfbuf_cnt > ((so->so_rcv.sb_hiwat / 2)/ 4 * 3) && so->so_rcv.sb_hiwat < V_tcp_autorcvbuf_max) { - newsize = min(so->so_rcv.sb_hiwat + - V_tcp_autorcvbuf_inc, V_tcp_autorcvbuf_max); + newsize = min((so->so_rcv.sb_hiwat + (so->so_rcv.sb_hiwat/2)), V_tcp_autorcvbuf_max); } TCP_PROBE6(receive__autoresize, NULL, tp, m, tp, th, newsize); @@ -1505,7 +1488,6 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so, } else { tp->rfbuf_cnt += tlen; /* add up */ } - return (newsize); } @@ -2029,7 +2011,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, else tp->t_flags |= TF_ACKNOW; - if ((thflags & TH_ECE) && V_tcp_do_ecn) { + if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) && + V_tcp_do_ecn) { tp->t_flags |= TF_ECN_PERMIT; TCPSTAT_INC(tcps_ecn_shs); } @@ -2279,6 +2262,18 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, TCPSTAT_INC(tcps_rcvpartduppack); TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop); } + /* + * DSACK - add SACK block for dropped range + */ + if (tp->t_flags & TF_SACK_PERMIT) { + tcp_update_sack_list(tp, th->th_seq, + th->th_seq + todrop); + /* + * ACK now, as the next in-sequence segment + * will clear the DSACK block again + */ + tp->t_flags |= TF_ACKNOW; + } drop_hdrlen += todrop; /* drop from the top afterwards */ th->th_seq += todrop; tlen -= todrop; @@ -2403,8 +2398,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == (TF_RCVD_SCALE|TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; - tp->snd_wnd = tiwin; } + tp->snd_wnd = tiwin; /* * Make transitions: * SYN-RECEIVED -> ESTABLISHED @@ -3007,6 +3002,8 @@ dodata: /* XXX */ if ((tlen || (thflags & TH_FIN) || tfo_syn) && TCPS_HAVERCVDFIN(tp->t_state) == 0) { tcp_seq save_start = th->th_seq; + tcp_seq save_rnxt = tp->rcv_nxt; + int save_tlen = tlen; m_adj(m, drop_hdrlen); /* delayed header drop */ /* * Insert segment which includes th into TCP reassembly queue @@ -3046,11 +3043,41 @@ dodata: /* XXX */ * m_adj() doesn't actually frees any mbufs * when trimming from the head. */ - thflags = tcp_reass(tp, th, &save_start, &tlen, m); + tcp_seq temp = save_start; + thflags = tcp_reass(tp, th, &temp, &tlen, m); tp->t_flags |= TF_ACKNOW; } - if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT)) - tcp_update_sack_list(tp, save_start, save_start + tlen); + if ((tp->t_flags & TF_SACK_PERMIT) && (save_tlen > 0)) { + if ((tlen == 0) && (SEQ_LT(save_start, save_rnxt))) { + /* + * DSACK actually handled in the fastpath + * above. + */ + tcp_update_sack_list(tp, save_start, + save_start + save_tlen); + } else if ((tlen > 0) && SEQ_GT(tp->rcv_nxt, save_rnxt)) { + if ((tp->rcv_numsacks >= 1) && + (tp->sackblks[0].end == save_start)) { + /* + * Partial overlap, recorded at todrop + * above. + */ + tcp_update_sack_list(tp, + tp->sackblks[0].start, + tp->sackblks[0].end); + } else { + tcp_update_dsack_list(tp, save_start, + save_start + save_tlen); + } + } else if (tlen >= save_tlen) { + /* Update of sackblks. */ + tcp_update_dsack_list(tp, save_start, + save_start + save_tlen); + } else if (tlen > 0) { + tcp_update_dsack_list(tp, save_start, + save_start + tlen); + } + } #if 0 /* * Note the amount of data that peer has sent into @@ -3820,3 +3847,30 @@ tcp_compute_pipe(struct tcpcb *tp) tp->sackhint.sack_bytes_rexmit - tp->sackhint.sacked_bytes); } + +uint32_t +tcp_compute_initwnd(uint32_t maxseg) +{ + /* + * Calculate the Initial Window, also used as Restart Window + * + * RFC5681 Section 3.1 specifies the default conservative values. + * RFC3390 specifies slightly more aggressive values. + * RFC6928 increases it to ten segments. + * Support for user specified value for initial flight size. + */ + if (V_tcp_initcwnd_segments) + return min(V_tcp_initcwnd_segments * maxseg, + max(2 * maxseg, V_tcp_initcwnd_segments * 1460)); + else if (V_tcp_do_rfc3390) + return min(4 * maxseg, max(2 * maxseg, 4380)); + else { + /* Per RFC5681 Section 3.1 */ + if (maxseg > 2190) + return (2 * maxseg); + else if (maxseg > 1095) + return (3 * maxseg); + else + return (4 * maxseg); + } +} |