summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/netinet/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'freebsd/sys/netinet/tcp_input.c')
-rw-r--r--freebsd/sys/netinet/tcp_input.c134
1 files changed, 94 insertions, 40 deletions
diff --git a/freebsd/sys/netinet/tcp_input.c b/freebsd/sys/netinet/tcp_input.c
index d00504dc..4bf12298 100644
--- a/freebsd/sys/netinet/tcp_input.c
+++ b/freebsd/sys/netinet/tcp_input.c
@@ -214,11 +214,6 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_do_autorcvbuf), 0,
"Enable automatic receive buffer sizing");
-VNET_DEFINE(int, tcp_autorcvbuf_inc) = 16*1024;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_inc, CTLFLAG_VNET | CTLFLAG_RW,
- &VNET_NAME(tcp_autorcvbuf_inc), 0,
- "Incrementor step size of automatic receive buffer");
-
VNET_DEFINE(int, tcp_autorcvbuf_max) = 2*1024*1024;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_autorcvbuf_max), 0,
@@ -373,31 +368,14 @@ cc_conn_init(struct tcpcb *tp)
/*
* Set the initial slow-start flight size.
*
- * RFC5681 Section 3.1 specifies the default conservative values.
- * RFC3390 specifies slightly more aggressive values.
- * RFC6928 increases it to ten segments.
- * Support for user specified value for initial flight size.
- *
* If a SYN or SYN/ACK was lost and retransmitted, we have to
* reduce the initial CWND to one segment as congestion is likely
* requiring us to be cautious.
*/
if (tp->snd_cwnd == 1)
tp->snd_cwnd = maxseg; /* SYN(-ACK) lost */
- else if (V_tcp_initcwnd_segments)
- tp->snd_cwnd = min(V_tcp_initcwnd_segments * maxseg,
- max(2 * maxseg, V_tcp_initcwnd_segments * 1460));
- else if (V_tcp_do_rfc3390)
- tp->snd_cwnd = min(4 * maxseg, max(2 * maxseg, 4380));
- else {
- /* Per RFC5681 Section 3.1 */
- if (maxseg > 2190)
- tp->snd_cwnd = 2 * maxseg;
- else if (maxseg > 1095)
- tp->snd_cwnd = 3 * maxseg;
- else
- tp->snd_cwnd = 4 * maxseg;
- }
+ else
+ tp->snd_cwnd = tcp_compute_initwnd(maxseg);
if (CC_ALGO(tp)->conn_init != NULL)
CC_ALGO(tp)->conn_init(tp->ccv);
@@ -578,6 +556,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
int optlen = 0;
#ifdef INET
int len;
+ uint8_t ipttl;
#endif
int tlen = 0, off;
int drop_hdrlen;
@@ -700,6 +679,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
* Checksum extended TCP header and data.
*/
len = off0 + tlen;
+ ipttl = ip->ip_ttl;
bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
ipov->ih_len = htons(tlen);
th->th_sum = in_cksum(m, len);
@@ -708,6 +688,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
/* Reset TOS bits */
ip->ip_tos = iptos;
/* Re-initialization for later version check */
+ ip->ip_ttl = ipttl;
ip->ip_v = IPVERSION;
ip->ip_hl = off0 >> 2;
}
@@ -1468,13 +1449,16 @@ drop:
* The criteria to step up the receive buffer one notch are:
* 1. Application has not set receive buffer size with
* SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE.
- * 2. the number of bytes received during the time it takes
- * one timestamp to be reflected back to us (the RTT);
- * 3. received bytes per RTT is within seven eighth of the
- * current socket buffer size;
- * 4. receive buffer size has not hit maximal automatic size;
+ * 2. the number of bytes received during 1/2 of an sRTT
+ * is at least 3/8 of the current socket buffer size.
+ * 3. receive buffer size has not hit maximal automatic size;
+ *
+ * If all of the criteria are met we increaset the socket buffer
+ * by a 1/2 (bounded by the max). This allows us to keep ahead
+ * of slow-start but also makes it so our peer never gets limited
+ * by our rwnd which we then open up causing a burst.
*
- * This algorithm does one step per RTT at most and only if
+ * This algorithm does two steps per RTT at most and only if
* we receive a bulk stream w/o packet losses or reorderings.
* Shrinking the buffer during idle times is not necessary as
* it doesn't consume any memory when idle.
@@ -1491,11 +1475,10 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so,
if (V_tcp_do_autorcvbuf && (so->so_rcv.sb_flags & SB_AUTOSIZE) &&
tp->t_srtt != 0 && tp->rfbuf_ts != 0 &&
TCP_TS_TO_TICKS(tcp_ts_getticks() - tp->rfbuf_ts) >
- (tp->t_srtt >> TCP_RTT_SHIFT)) {
- if (tp->rfbuf_cnt > (so->so_rcv.sb_hiwat / 8 * 7) &&
+ ((tp->t_srtt >> TCP_RTT_SHIFT)/2)) {
+ if (tp->rfbuf_cnt > ((so->so_rcv.sb_hiwat / 2)/ 4 * 3) &&
so->so_rcv.sb_hiwat < V_tcp_autorcvbuf_max) {
- newsize = min(so->so_rcv.sb_hiwat +
- V_tcp_autorcvbuf_inc, V_tcp_autorcvbuf_max);
+ newsize = min((so->so_rcv.sb_hiwat + (so->so_rcv.sb_hiwat/2)), V_tcp_autorcvbuf_max);
}
TCP_PROBE6(receive__autoresize, NULL, tp, m, tp, th, newsize);
@@ -1505,7 +1488,6 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so,
} else {
tp->rfbuf_cnt += tlen; /* add up */
}
-
return (newsize);
}
@@ -2029,7 +2011,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
else
tp->t_flags |= TF_ACKNOW;
- if ((thflags & TH_ECE) && V_tcp_do_ecn) {
+ if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
+ V_tcp_do_ecn) {
tp->t_flags |= TF_ECN_PERMIT;
TCPSTAT_INC(tcps_ecn_shs);
}
@@ -2279,6 +2262,18 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
TCPSTAT_INC(tcps_rcvpartduppack);
TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop);
}
+ /*
+ * DSACK - add SACK block for dropped range
+ */
+ if (tp->t_flags & TF_SACK_PERMIT) {
+ tcp_update_sack_list(tp, th->th_seq,
+ th->th_seq + todrop);
+ /*
+ * ACK now, as the next in-sequence segment
+ * will clear the DSACK block again
+ */
+ tp->t_flags |= TF_ACKNOW;
+ }
drop_hdrlen += todrop; /* drop from the top afterwards */
th->th_seq += todrop;
tlen -= todrop;
@@ -2403,8 +2398,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
(TF_RCVD_SCALE|TF_REQ_SCALE)) {
tp->rcv_scale = tp->request_r_scale;
- tp->snd_wnd = tiwin;
}
+ tp->snd_wnd = tiwin;
/*
* Make transitions:
* SYN-RECEIVED -> ESTABLISHED
@@ -3007,6 +3002,8 @@ dodata: /* XXX */
if ((tlen || (thflags & TH_FIN) || tfo_syn) &&
TCPS_HAVERCVDFIN(tp->t_state) == 0) {
tcp_seq save_start = th->th_seq;
+ tcp_seq save_rnxt = tp->rcv_nxt;
+ int save_tlen = tlen;
m_adj(m, drop_hdrlen); /* delayed header drop */
/*
* Insert segment which includes th into TCP reassembly queue
@@ -3046,11 +3043,41 @@ dodata: /* XXX */
* m_adj() doesn't actually frees any mbufs
* when trimming from the head.
*/
- thflags = tcp_reass(tp, th, &save_start, &tlen, m);
+ tcp_seq temp = save_start;
+ thflags = tcp_reass(tp, th, &temp, &tlen, m);
tp->t_flags |= TF_ACKNOW;
}
- if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT))
- tcp_update_sack_list(tp, save_start, save_start + tlen);
+ if ((tp->t_flags & TF_SACK_PERMIT) && (save_tlen > 0)) {
+ if ((tlen == 0) && (SEQ_LT(save_start, save_rnxt))) {
+ /*
+ * DSACK actually handled in the fastpath
+ * above.
+ */
+ tcp_update_sack_list(tp, save_start,
+ save_start + save_tlen);
+ } else if ((tlen > 0) && SEQ_GT(tp->rcv_nxt, save_rnxt)) {
+ if ((tp->rcv_numsacks >= 1) &&
+ (tp->sackblks[0].end == save_start)) {
+ /*
+ * Partial overlap, recorded at todrop
+ * above.
+ */
+ tcp_update_sack_list(tp,
+ tp->sackblks[0].start,
+ tp->sackblks[0].end);
+ } else {
+ tcp_update_dsack_list(tp, save_start,
+ save_start + save_tlen);
+ }
+ } else if (tlen >= save_tlen) {
+ /* Update of sackblks. */
+ tcp_update_dsack_list(tp, save_start,
+ save_start + save_tlen);
+ } else if (tlen > 0) {
+ tcp_update_dsack_list(tp, save_start,
+ save_start + tlen);
+ }
+ }
#if 0
/*
* Note the amount of data that peer has sent into
@@ -3820,3 +3847,30 @@ tcp_compute_pipe(struct tcpcb *tp)
tp->sackhint.sack_bytes_rexmit -
tp->sackhint.sacked_bytes);
}
+
+uint32_t
+tcp_compute_initwnd(uint32_t maxseg)
+{
+ /*
+ * Calculate the Initial Window, also used as Restart Window
+ *
+ * RFC5681 Section 3.1 specifies the default conservative values.
+ * RFC3390 specifies slightly more aggressive values.
+ * RFC6928 increases it to ten segments.
+ * Support for user specified value for initial flight size.
+ */
+ if (V_tcp_initcwnd_segments)
+ return min(V_tcp_initcwnd_segments * maxseg,
+ max(2 * maxseg, V_tcp_initcwnd_segments * 1460));
+ else if (V_tcp_do_rfc3390)
+ return min(4 * maxseg, max(2 * maxseg, 4380));
+ else {
+ /* Per RFC5681 Section 3.1 */
+ if (maxseg > 2190)
+ return (2 * maxseg);
+ else if (maxseg > 1095)
+ return (3 * maxseg);
+ else
+ return (4 * maxseg);
+ }
+}