diff options
Diffstat (limited to 'freebsd/sys/netinet/sctp_cc_functions.c')
-rw-r--r-- | freebsd/sys/netinet/sctp_cc_functions.c | 1829 |
1 files changed, 1308 insertions, 521 deletions
diff --git a/freebsd/sys/netinet/sctp_cc_functions.c b/freebsd/sys/netinet/sctp_cc_functions.c index e0f8beae..36818feb 100644 --- a/freebsd/sys/netinet/sctp_cc_functions.c +++ b/freebsd/sys/netinet/sctp_cc_functions.c @@ -2,16 +2,18 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -30,6 +32,9 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + #include <netinet/sctp_os.h> #include <netinet/sctp_var.h> #include <netinet/sctp_sysctl.h> @@ -43,47 +48,82 @@ #include <netinet/sctp_timer.h> #include <netinet/sctp_auth.h> #include <netinet/sctp_asconf.h> -#include <netinet/sctp_cc_functions.h> -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +#include <netinet/sctp_dtrace_declare.h> + +#define SHIFT_MPTCP_MULTI_N 40 +#define SHIFT_MPTCP_MULTI_Z 16 +#define SHIFT_MPTCP_MULTI 8 -void +static void sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net) { struct sctp_association *assoc; uint32_t cwnd_in_mtu; assoc = &stcb->asoc; - /* - * We take the minimum of the burst limit and the initial congestion - * window. The initial congestion window is at least two times the - * MTU. - */ cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd); - if ((assoc->max_burst > 0) && (cwnd_in_mtu > assoc->max_burst)) - cwnd_in_mtu = assoc->max_burst; - net->cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu; + if (cwnd_in_mtu == 0) { + /* Using 0 means that the value of RFC 4960 is used. */ + net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND)); + } else { + /* + * We take the minimum of the burst limit and the initial + * congestion window. + */ + if ((assoc->max_burst > 0) && (cwnd_in_mtu > assoc->max_burst)) + cwnd_in_mtu = assoc->max_burst; + net->cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu; + } + if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) || + (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2)) { + /* In case of resource pooling initialize appropriately */ + net->cwnd /= assoc->numnets; + if (net->cwnd < (net->mtu - sizeof(struct sctphdr))) { + net->cwnd = net->mtu - sizeof(struct sctphdr); + } + } net->ssthresh = assoc->peers_rwnd; - + SDT_PROBE(sctp, cwnd, net, init, + stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, + 0, net->cwnd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) { sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION); } } -void +static void sctp_cwnd_update_after_fr(struct sctp_tcb *stcb, struct sctp_association *asoc) { struct sctp_nets *net; - + uint32_t t_ssthresh, t_cwnd; + uint64_t t_ucwnd_sbw; + + /* MT FIXME: Don't compute this over and over again */ + t_ssthresh = 0; + t_cwnd = 0; + t_ucwnd_sbw = 0; + if ((asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) || + (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2)) { + TAILQ_FOREACH(net, &asoc->nets, sctp_next) { + t_ssthresh += net->ssthresh; + t_cwnd += net->cwnd; + if (net->lastsa > 0) { + t_ucwnd_sbw += (uint64_t) net->cwnd / (uint64_t) net->lastsa; + } + } + if (t_ucwnd_sbw == 0) { + t_ucwnd_sbw = 1; + } + } /*- - * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) && + * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) && * (net->fast_retran_loss_recovery == 0))) */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if ((asoc->fast_retran_loss_recovery == 0) || - (asoc->sctp_cmt_on_off == 1)) { + (asoc->sctp_cmt_on_off > 0)) { /* out of a RFC2582 Fast recovery window? */ if (net->net_ack > 0) { /* @@ -95,11 +135,54 @@ sctp_cwnd_update_after_fr(struct sctp_tcb *stcb, struct sctp_tmit_chunk *lchk; int old_cwnd = net->cwnd; - net->ssthresh = net->cwnd / 2; - if (net->ssthresh < (net->mtu * 2)) { - net->ssthresh = 2 * net->mtu; + if ((asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) || + (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2)) { + if (asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) { + net->ssthresh = (uint32_t) (((uint64_t) 4 * + (uint64_t) net->mtu * + (uint64_t) net->ssthresh) / + (uint64_t) t_ssthresh); + + } + if (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2) { + uint32_t srtt; + + srtt = net->lastsa; + /* + * lastsa>>3; we don't need + * to devide ... + */ + if (srtt == 0) { + srtt = 1; + } + /* + * Short Version => Equal to + * Contel Version MBe + */ + net->ssthresh = (uint32_t) (((uint64_t) 4 * + (uint64_t) net->mtu * + (uint64_t) net->cwnd) / + ((uint64_t) srtt * + t_ucwnd_sbw)); + /* INCREASE FACTOR */ ; + } + if ((net->cwnd > t_cwnd / 2) && + (net->ssthresh < net->cwnd - t_cwnd / 2)) { + net->ssthresh = net->cwnd - t_cwnd / 2; + } + if (net->ssthresh < net->mtu) { + net->ssthresh = net->mtu; + } + } else { + net->ssthresh = net->cwnd / 2; + if (net->ssthresh < (net->mtu * 2)) { + net->ssthresh = 2 * net->mtu; + } } net->cwnd = net->ssthresh; + SDT_PROBE(sctp, cwnd, net, fr, + stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, + old_cwnd, net->cwnd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR); @@ -129,13 +212,6 @@ sctp_cwnd_update_after_fr(struct sctp_tcb *stcb, net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; } - /* - * Disable Nonce Sum Checking and store the - * resync tsn - */ - asoc->nonce_sum_check = 0; - asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1; - sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); sctp_timer_start(SCTP_TIMER_TYPE_SEND, @@ -151,13 +227,493 @@ sctp_cwnd_update_after_fr(struct sctp_tcb *stcb, } } -void -sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, +/* Defines for instantaneous bw decisions */ +#define SCTP_INST_LOOSING 1 /* Loosing to other flows */ +#define SCTP_INST_NEUTRAL 2 /* Neutral, no indication */ +#define SCTP_INST_GAINING 3 /* Gaining, step down possible */ + + +static int +cc_bw_same(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, + uint64_t rtt_offset, uint64_t vtag, uint8_t inst_ind) +{ + uint64_t oth, probepoint; + + probepoint = (((uint64_t) net->cwnd) << 32); + if (net->rtt > net->cc_mod.rtcc.lbw_rtt + rtt_offset) { + /* + * rtt increased we don't update bw.. so we don't update the + * rtt either. + */ + /* Probe point 5 */ + probepoint |= ((5 << 16) | 1); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + net->flight_size, + probepoint); + if ((net->cc_mod.rtcc.steady_step) && (inst_ind != SCTP_INST_LOOSING)) { + if (net->cc_mod.rtcc.last_step_state == 5) + net->cc_mod.rtcc.step_cnt++; + else + net->cc_mod.rtcc.step_cnt = 1; + net->cc_mod.rtcc.last_step_state = 5; + if ((net->cc_mod.rtcc.step_cnt == net->cc_mod.rtcc.steady_step) || + ((net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step) && + ((net->cc_mod.rtcc.step_cnt % net->cc_mod.rtcc.steady_step) == 0))) { + /* Try a step down */ + oth = net->cc_mod.rtcc.vol_reduce; + oth <<= 16; + oth |= net->cc_mod.rtcc.step_cnt; + oth <<= 16; + oth |= net->cc_mod.rtcc.last_step_state; + SDT_PROBE(sctp, cwnd, net, rttstep, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + oth, + probepoint); + if (net->cwnd > (4 * net->mtu)) { + net->cwnd -= net->mtu; + net->cc_mod.rtcc.vol_reduce++; + } else { + net->cc_mod.rtcc.step_cnt = 0; + } + } + } + return (1); + } + if (net->rtt < net->cc_mod.rtcc.lbw_rtt - rtt_offset) { + /* + * rtt decreased, there could be more room. we update both + * the bw and the rtt here to lock this in as a good step + * down. + */ + /* Probe point 6 */ + probepoint |= ((6 << 16) | 0); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + net->flight_size, + probepoint); + if (net->cc_mod.rtcc.steady_step) { + oth = net->cc_mod.rtcc.vol_reduce; + oth <<= 16; + oth |= net->cc_mod.rtcc.step_cnt; + oth <<= 16; + oth |= net->cc_mod.rtcc.last_step_state; + SDT_PROBE(sctp, cwnd, net, rttstep, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + oth, + probepoint); + if ((net->cc_mod.rtcc.last_step_state == 5) && + (net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step)) { + /* Step down worked */ + net->cc_mod.rtcc.step_cnt = 0; + return (1); + } else { + net->cc_mod.rtcc.last_step_state = 6; + net->cc_mod.rtcc.step_cnt = 0; + } + } + net->cc_mod.rtcc.lbw = nbw; + net->cc_mod.rtcc.lbw_rtt = net->rtt; + net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd; + if (inst_ind == SCTP_INST_GAINING) + return (1); + else if (inst_ind == SCTP_INST_NEUTRAL) + return (1); + else + return (0); + } + /* + * Ok bw and rtt remained the same .. no update to any + */ + /* Probe point 7 */ + probepoint |= ((7 << 16) | net->cc_mod.rtcc.ret_from_eq); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + net->flight_size, + probepoint); + if ((net->cc_mod.rtcc.steady_step) && (inst_ind != SCTP_INST_LOOSING)) { + if (net->cc_mod.rtcc.last_step_state == 5) + net->cc_mod.rtcc.step_cnt++; + else + net->cc_mod.rtcc.step_cnt = 1; + net->cc_mod.rtcc.last_step_state = 5; + if ((net->cc_mod.rtcc.step_cnt == net->cc_mod.rtcc.steady_step) || + ((net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step) && + ((net->cc_mod.rtcc.step_cnt % net->cc_mod.rtcc.steady_step) == 0))) { + /* Try a step down */ + if (net->cwnd > (4 * net->mtu)) { + net->cwnd -= net->mtu; + net->cc_mod.rtcc.vol_reduce++; + return (1); + } else { + net->cc_mod.rtcc.step_cnt = 0; + } + } + } + if (inst_ind == SCTP_INST_GAINING) + return (1); + else if (inst_ind == SCTP_INST_NEUTRAL) + return (1); + else + return ((int)net->cc_mod.rtcc.ret_from_eq); +} + +static int +cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint64_t rtt_offset, + uint64_t vtag, uint8_t inst_ind) +{ + uint64_t oth, probepoint; + + /* Bandwidth decreased. */ + probepoint = (((uint64_t) net->cwnd) << 32); + if (net->rtt > net->cc_mod.rtcc.lbw_rtt + rtt_offset) { + /* rtt increased */ + /* Did we add more */ + if ((net->cwnd > net->cc_mod.rtcc.cwnd_at_bw_set) && + (inst_ind != SCTP_INST_LOOSING)) { + /* We caused it maybe.. back off? */ + /* PROBE POINT 1 */ + probepoint |= ((1 << 16) | 1); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + net->flight_size, + probepoint); + if (net->cc_mod.rtcc.ret_from_eq) { + /* + * Switch over to CA if we are less + * aggressive + */ + net->ssthresh = net->cwnd - 1; + net->partial_bytes_acked = 0; + } + return (1); + } + /* Probe point 2 */ + probepoint |= ((2 << 16) | 0); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + net->flight_size, + probepoint); + /* Someone else - fight for more? */ + if (net->cc_mod.rtcc.steady_step) { + oth = net->cc_mod.rtcc.vol_reduce; + oth <<= 16; + oth |= net->cc_mod.rtcc.step_cnt; + oth <<= 16; + oth |= net->cc_mod.rtcc.last_step_state; + SDT_PROBE(sctp, cwnd, net, rttstep, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + oth, + probepoint); + /* + * Did we voluntarily give up some? if so take one + * back please + */ + if ((net->cc_mod.rtcc.vol_reduce) && + (inst_ind != SCTP_INST_GAINING)) { + net->cwnd += net->mtu; + net->cc_mod.rtcc.vol_reduce--; + } + net->cc_mod.rtcc.last_step_state = 2; + net->cc_mod.rtcc.step_cnt = 0; + } + goto out_decision; + } else if (net->rtt < net->cc_mod.rtcc.lbw_rtt - rtt_offset) { + /* bw & rtt decreased */ + /* Probe point 3 */ + probepoint |= ((3 << 16) | 0); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + net->flight_size, + probepoint); + if (net->cc_mod.rtcc.steady_step) { + oth = net->cc_mod.rtcc.vol_reduce; + oth <<= 16; + oth |= net->cc_mod.rtcc.step_cnt; + oth <<= 16; + oth |= net->cc_mod.rtcc.last_step_state; + SDT_PROBE(sctp, cwnd, net, rttstep, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + oth, + probepoint); + if ((net->cc_mod.rtcc.vol_reduce) && + (inst_ind != SCTP_INST_GAINING)) { + net->cwnd += net->mtu; + net->cc_mod.rtcc.vol_reduce--; + } + net->cc_mod.rtcc.last_step_state = 3; + net->cc_mod.rtcc.step_cnt = 0; + } + goto out_decision; + } + /* The bw decreased but rtt stayed the same */ + /* Probe point 4 */ + probepoint |= ((4 << 16) | 0); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + net->flight_size, + probepoint); + if (net->cc_mod.rtcc.steady_step) { + oth = net->cc_mod.rtcc.vol_reduce; + oth <<= 16; + oth |= net->cc_mod.rtcc.step_cnt; + oth <<= 16; + oth |= net->cc_mod.rtcc.last_step_state; + SDT_PROBE(sctp, cwnd, net, rttstep, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + oth, + probepoint); + if ((net->cc_mod.rtcc.vol_reduce) && + (inst_ind != SCTP_INST_GAINING)) { + net->cwnd += net->mtu; + net->cc_mod.rtcc.vol_reduce--; + } + net->cc_mod.rtcc.last_step_state = 4; + net->cc_mod.rtcc.step_cnt = 0; + } +out_decision: + net->cc_mod.rtcc.lbw = nbw; + net->cc_mod.rtcc.lbw_rtt = net->rtt; + net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd; + if (inst_ind == SCTP_INST_GAINING) { + return (1); + } else { + return (0); + } +} + +static int +cc_bw_increase(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint64_t vtag) +{ + uint64_t oth, probepoint; + + /* + * BW increased, so update and return 0, since all actions in our + * table say to do the normal CC update. Note that we pay no + * attention to the inst_ind since our overall sum is increasing. + */ + /* PROBE POINT 0 */ + probepoint = (((uint64_t) net->cwnd) << 32); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + net->flight_size, + probepoint); + if (net->cc_mod.rtcc.steady_step) { + oth = net->cc_mod.rtcc.vol_reduce; + oth <<= 16; + oth |= net->cc_mod.rtcc.step_cnt; + oth <<= 16; + oth |= net->cc_mod.rtcc.last_step_state; + SDT_PROBE(sctp, cwnd, net, rttstep, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + oth, + probepoint); + net->cc_mod.rtcc.last_step_state = 0; + net->cc_mod.rtcc.step_cnt = 0; + net->cc_mod.rtcc.vol_reduce = 0; + } + net->cc_mod.rtcc.lbw = nbw; + net->cc_mod.rtcc.lbw_rtt = net->rtt; + net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd; + return (0); +} + +/* RTCC Algoritm to limit growth of cwnd, return + * true if you want to NOT allow cwnd growth + */ +static int +cc_bw_limit(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw) +{ + uint64_t bw_offset, rtt_offset; + uint64_t probepoint, rtt, vtag; + uint64_t bytes_for_this_rtt, inst_bw; + uint64_t div, inst_off; + int bw_shift; + uint8_t inst_ind; + int ret; + + /*- + * Here we need to see if we want + * to limit cwnd growth due to increase + * in overall rtt but no increase in bw. + * We use the following table to figure + * out what we should do. When we return + * 0, cc update goes on as planned. If we + * return 1, then no cc update happens and cwnd + * stays where it is at. + * ---------------------------------- + * BW | RTT | Action + * ********************************* + * INC | INC | return 0 + * ---------------------------------- + * INC | SAME | return 0 + * ---------------------------------- + * INC | DECR | return 0 + * ---------------------------------- + * SAME | INC | return 1 + * ---------------------------------- + * SAME | SAME | return 1 + * ---------------------------------- + * SAME | DECR | return 0 + * ---------------------------------- + * DECR | INC | return 0 or 1 based on if we caused. + * ---------------------------------- + * DECR | SAME | return 0 + * ---------------------------------- + * DECR | DECR | return 0 + * ---------------------------------- + * + * We are a bit fuzz on what an increase or + * decrease is. For BW it is the same if + * it did not change within 1/64th. For + * RTT it stayed the same if it did not + * change within 1/32nd + */ + bw_shift = SCTP_BASE_SYSCTL(sctp_rttvar_bw); + rtt = stcb->asoc.my_vtag; + vtag = (rtt << 32) | (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) | (stcb->rport); + probepoint = (((uint64_t) net->cwnd) << 32); + rtt = net->rtt; + if (net->cc_mod.rtcc.rtt_set_this_sack) { + net->cc_mod.rtcc.rtt_set_this_sack = 0; + bytes_for_this_rtt = net->cc_mod.rtcc.bw_bytes - net->cc_mod.rtcc.bw_bytes_at_last_rttc; + net->cc_mod.rtcc.bw_bytes_at_last_rttc = net->cc_mod.rtcc.bw_bytes; + if (net->rtt) { + div = net->rtt / 1000; + if (div) { + inst_bw = bytes_for_this_rtt / div; + inst_off = inst_bw >> bw_shift; + if (inst_bw > nbw) + inst_ind = SCTP_INST_GAINING; + else if ((inst_bw + inst_off) < nbw) + inst_ind = SCTP_INST_LOOSING; + else + inst_ind = SCTP_INST_NEUTRAL; + probepoint |= ((0xb << 16) | inst_ind); + } else { + inst_ind = net->cc_mod.rtcc.last_inst_ind; + inst_bw = bytes_for_this_rtt / (uint64_t) (net->rtt); + /* Can't determine do not change */ + probepoint |= ((0xc << 16) | inst_ind); + } + } else { + inst_ind = net->cc_mod.rtcc.last_inst_ind; + inst_bw = bytes_for_this_rtt; + /* Can't determine do not change */ + probepoint |= ((0xd << 16) | inst_ind); + } + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((nbw << 32) | inst_bw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | rtt), + net->flight_size, + probepoint); + } else { + /* No rtt measurement, use last one */ + inst_ind = net->cc_mod.rtcc.last_inst_ind; + } + bw_offset = net->cc_mod.rtcc.lbw >> bw_shift; + if (nbw > net->cc_mod.rtcc.lbw + bw_offset) { + ret = cc_bw_increase(stcb, net, nbw, vtag); + goto out; + } + rtt_offset = net->cc_mod.rtcc.lbw_rtt >> SCTP_BASE_SYSCTL(sctp_rttvar_rtt); + if (nbw < net->cc_mod.rtcc.lbw - bw_offset) { + ret = cc_bw_decrease(stcb, net, nbw, rtt_offset, vtag, inst_ind); + goto out; + } + /* + * If we reach here then we are in a situation where the bw stayed + * the same. + */ + ret = cc_bw_same(stcb, net, nbw, rtt_offset, vtag, inst_ind); +out: + net->cc_mod.rtcc.last_inst_ind = inst_ind; + return (ret); +} + +static void +sctp_cwnd_update_after_sack_common(struct sctp_tcb *stcb, struct sctp_association *asoc, - int accum_moved, int reneged_all, int will_exit) + int accum_moved, int reneged_all SCTP_UNUSED, int will_exit, int use_rtcc) { struct sctp_nets *net; - + int old_cwnd; + uint32_t t_ssthresh, t_cwnd, incr; + uint64_t t_ucwnd_sbw; + uint64_t t_path_mptcp; + uint64_t mptcp_like_alpha; + uint32_t srtt; + uint64_t max_path; + + /* MT FIXME: Don't compute this over and over again */ + t_ssthresh = 0; + t_cwnd = 0; + t_ucwnd_sbw = 0; + t_path_mptcp = 0; + mptcp_like_alpha = 1; + if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) || + (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2) || + (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_MPTCP)) { + max_path = 0; + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + t_ssthresh += net->ssthresh; + t_cwnd += net->cwnd; + /* lastsa>>3; we don't need to devide ... */ + srtt = net->lastsa; + if (srtt > 0) { + uint64_t tmp; + + t_ucwnd_sbw += (uint64_t) net->cwnd / (uint64_t) srtt; + t_path_mptcp += (((uint64_t) net->cwnd) << SHIFT_MPTCP_MULTI_Z) / + (((uint64_t) net->mtu) * (uint64_t) srtt); + tmp = (((uint64_t) net->cwnd) << SHIFT_MPTCP_MULTI_N) / + ((uint64_t) net->mtu * (uint64_t) (srtt * srtt)); + if (tmp > max_path) { + max_path = tmp; + } + } + } + if (t_path_mptcp > 0) { + mptcp_like_alpha = max_path / (t_path_mptcp * t_path_mptcp); + } else { + mptcp_like_alpha = 1; + } + } + if (t_ssthresh == 0) { + t_ssthresh = 1; + } + if (t_ucwnd_sbw == 0) { + t_ucwnd_sbw = 1; + } /******************************/ /* update cwnd and Early FR */ /******************************/ @@ -168,49 +724,12 @@ sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, * CMT fast recovery code. Need to debug. */ if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { - if (compare_with_wrap(asoc->last_acked_seq, - net->fast_recovery_tsn, MAX_TSN) || - (asoc->last_acked_seq == net->fast_recovery_tsn) || - compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) || - (net->pseudo_cumack == net->fast_recovery_tsn)) { + if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) || + SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) { net->will_exit_fast_recovery = 1; } } #endif - if (SCTP_BASE_SYSCTL(sctp_early_fr)) { - /* - * So, first of all do we need to have a Early FR - * timer running? - */ - if ((!TAILQ_EMPTY(&asoc->sent_queue) && - (net->ref_count > 1) && - (net->flight_size < net->cwnd)) || - (reneged_all)) { - /* - * yes, so in this case stop it if its - * running, and then restart it. Reneging - * all is a special case where we want to - * run the Early FR timer and then force the - * last few unacked to be sent, causing us - * to illicit a sack with gaps to force out - * the others. - */ - if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { - SCTP_STAT_INCR(sctps_earlyfrstpidsck2); - sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, - SCTP_FROM_SCTP_INDATA + SCTP_LOC_20); - } - SCTP_STAT_INCR(sctps_earlyfrstrid); - sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); - } else { - /* No, stop it if its running */ - if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { - SCTP_STAT_INCR(sctps_earlyfrstpidsck3); - sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, - SCTP_FROM_SCTP_INDATA + SCTP_LOC_21); - } - } - } /* if nothing was acked on this destination skip it */ if (net->net_ack == 0) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { @@ -218,56 +737,14 @@ sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, } continue; } - if (net->net_ack2 > 0) { - /* - * Karn's rule applies to clearing error count, this - * is optional. - */ - net->error_count = 0; - if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == - SCTP_ADDR_NOT_REACHABLE) { - /* addr came good */ - net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; - net->dest_state |= SCTP_ADDR_REACHABLE; - sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, - SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED); - /* now was it the primary? if so restore */ - if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) { - (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net); - } - } - /* - * JRS 5/14/07 - If CMT PF is on and the destination - * is in PF state, set the destination to active - * state and set the cwnd to one or two MTU's based - * on whether PF1 or PF2 is being used. - * - * Should we stop any running T3 timer here? - */ - if ((asoc->sctp_cmt_on_off == 1) && - (asoc->sctp_cmt_pf > 0) && - ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) { - net->dest_state &= ~SCTP_ADDR_PF; - net->cwnd = net->mtu * asoc->sctp_cmt_pf; - SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", - net, net->cwnd); - /* - * Since the cwnd value is explicitly set, - * skip the code that updates the cwnd - * value. - */ - goto skip_cwnd_update; - } - } #ifdef JANA_CMT_FAST_RECOVERY /* * CMT fast recovery code */ /* - * if (sctp_cmt_on_off == 1 && - * net->fast_retran_loss_recovery && - * net->will_exit_fast_recovery == 0) { @@@ Do something } - * else if (sctp_cmt_on_off == 0 && + * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery + * && net->will_exit_fast_recovery == 0) { @@@ Do something + * } else if (sctp_cmt_on_off == 0 && * asoc->fast_retran_loss_recovery && will_exit == 0) { */ #endif @@ -279,31 +756,143 @@ sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, * If we are in loss recovery we skip any cwnd * update */ - goto skip_cwnd_update; + return; + } + /* + * Did any measurements go on for this network? + */ + if (use_rtcc && (net->cc_mod.rtcc.tls_needs_set > 0)) { + uint64_t nbw; + + /* + * At this point our bw_bytes has been updated by + * incoming sack information. + * + * But our bw may not yet be set. + * + */ + if ((net->cc_mod.rtcc.new_tot_time / 1000) > 0) { + nbw = net->cc_mod.rtcc.bw_bytes / (net->cc_mod.rtcc.new_tot_time / 1000); + } else { + nbw = net->cc_mod.rtcc.bw_bytes; + } + if (net->cc_mod.rtcc.lbw) { + if (cc_bw_limit(stcb, net, nbw)) { + /* Hold here, no update */ + continue; + } + } else { + uint64_t vtag, probepoint; + + probepoint = (((uint64_t) net->cwnd) << 32); + probepoint |= ((0xa << 16) | 0); + vtag = (net->rtt << 32) | + (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) | + (stcb->rport); + + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + nbw, + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + net->flight_size, + probepoint); + net->cc_mod.rtcc.lbw = nbw; + net->cc_mod.rtcc.lbw_rtt = net->rtt; + if (net->cc_mod.rtcc.rtt_set_this_sack) { + net->cc_mod.rtcc.rtt_set_this_sack = 0; + net->cc_mod.rtcc.bw_bytes_at_last_rttc = net->cc_mod.rtcc.bw_bytes; + } + } } /* * CMT: CUC algorithm. Update cwnd if pseudo-cumack has * moved. */ if (accum_moved || - ((asoc->sctp_cmt_on_off == 1) && net->new_pseudo_cumack)) { + ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) { /* If the cumulative ack moved we can proceed */ if (net->cwnd <= net->ssthresh) { /* We are in slow start */ if (net->flight_size + net->net_ack >= net->cwnd) { - if (net->net_ack > (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable))) { - net->cwnd += (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable)); - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, net->mtu, - SCTP_CWND_LOG_FROM_SS); + uint32_t limit; + + old_cwnd = net->cwnd; + switch (asoc->sctp_cmt_on_off) { + case SCTP_CMT_RPV1: + limit = (uint32_t) (((uint64_t) net->mtu * + (uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable) * + (uint64_t) net->ssthresh) / + (uint64_t) t_ssthresh); + incr = (uint32_t) (((uint64_t) net->net_ack * + (uint64_t) net->ssthresh) / + (uint64_t) t_ssthresh); + if (incr > limit) { + incr = limit; + } + if (incr == 0) { + incr = 1; } - } else { - net->cwnd += net->net_ack; - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, net->net_ack, - SCTP_CWND_LOG_FROM_SS); + break; + case SCTP_CMT_RPV2: + /* + * lastsa>>3; we don't need + * to divide ... + */ + srtt = net->lastsa; + if (srtt == 0) { + srtt = 1; } + limit = (uint32_t) (((uint64_t) net->mtu * + (uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable) * + (uint64_t) net->cwnd) / + ((uint64_t) srtt * t_ucwnd_sbw)); + /* INCREASE FACTOR */ + incr = (uint32_t) (((uint64_t) net->net_ack * + (uint64_t) net->cwnd) / + ((uint64_t) srtt * t_ucwnd_sbw)); + /* INCREASE FACTOR */ + if (incr > limit) { + incr = limit; + } + if (incr == 0) { + incr = 1; + } + break; + case SCTP_CMT_MPTCP: + limit = (uint32_t) (((uint64_t) net->mtu * + mptcp_like_alpha * + (uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable)) >> + SHIFT_MPTCP_MULTI); + incr = (uint32_t) (((uint64_t) net->net_ack * + mptcp_like_alpha) >> + SHIFT_MPTCP_MULTI); + if (incr > limit) { + incr = limit; + } + if (incr > net->net_ack) { + incr = net->net_ack; + } + if (incr > net->mtu) { + incr = net->mtu; + } + break; + default: + incr = net->net_ack; + if (incr > net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable)) { + incr = net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable); + } + break; } + net->cwnd += incr; + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, incr, + SCTP_CWND_LOG_FROM_SS); + } + SDT_PROBE(sctp, cwnd, net, ack, + stcb->asoc.my_vtag, + ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), + net, + old_cwnd, net->cwnd); } else { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, net->net_ack, @@ -320,7 +909,52 @@ sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, if ((net->flight_size + net->net_ack >= net->cwnd) && (net->partial_bytes_acked >= net->cwnd)) { net->partial_bytes_acked -= net->cwnd; - net->cwnd += net->mtu; + old_cwnd = net->cwnd; + switch (asoc->sctp_cmt_on_off) { + case SCTP_CMT_RPV1: + incr = (uint32_t) (((uint64_t) net->mtu * + (uint64_t) net->ssthresh) / + (uint64_t) t_ssthresh); + if (incr == 0) { + incr = 1; + } + break; + case SCTP_CMT_RPV2: + /* + * lastsa>>3; we don't need + * to divide ... + */ + srtt = net->lastsa; + if (srtt == 0) { + srtt = 1; + } + incr = (uint32_t) ((uint64_t) net->mtu * + (uint64_t) net->cwnd / + ((uint64_t) srtt * + t_ucwnd_sbw)); + /* INCREASE FACTOR */ + if (incr == 0) { + incr = 1; + } + break; + case SCTP_CMT_MPTCP: + incr = (uint32_t) ((mptcp_like_alpha * + (uint64_t) net->cwnd) >> + SHIFT_MPTCP_MULTI); + if (incr > net->mtu) { + incr = net->mtu; + } + break; + default: + incr = net->mtu; + break; + } + net->cwnd += incr; + SDT_PROBE(sctp, cwnd, net, ack, + stcb->asoc.my_vtag, + ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), + net, + old_cwnd, net->cwnd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_CA); @@ -338,69 +972,172 @@ sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, SCTP_CWND_LOG_NO_CUMACK); } } -skip_cwnd_update: - /* - * NOW, according to Karn's rule do we need to restore the - * RTO timer back? Check our net_ack2. If not set then we - * have a ambiguity.. i.e. all data ack'd was sent to more - * than one place. - */ - if (net->net_ack2) { - /* restore any doubled timers */ - net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1; - if (net->RTO < stcb->asoc.minrto) { - net->RTO = stcb->asoc.minrto; - } - if (net->RTO > stcb->asoc.maxrto) { - net->RTO = stcb->asoc.maxrto; - } - } } } -void +static void +sctp_cwnd_update_exit_pf_common(struct sctp_tcb *stcb, struct sctp_nets *net) +{ + int old_cwnd; + + old_cwnd = net->cwnd; + net->cwnd = net->mtu; + SDT_PROBE(sctp, cwnd, net, ack, + stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, + old_cwnd, net->cwnd); + SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", + (void *)net, net->cwnd); +} + + +static void sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb, struct sctp_nets *net) { int old_cwnd = net->cwnd; + uint32_t t_ssthresh, t_cwnd; + uint64_t t_ucwnd_sbw; + + /* MT FIXME: Don't compute this over and over again */ + t_ssthresh = 0; + t_cwnd = 0; + if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) || + (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2)) { + struct sctp_nets *lnet; + uint32_t srtt; + + t_ucwnd_sbw = 0; + TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) { + t_ssthresh += lnet->ssthresh; + t_cwnd += lnet->cwnd; + srtt = lnet->lastsa; + /* lastsa>>3; we don't need to divide ... */ + if (srtt > 0) { + t_ucwnd_sbw += (uint64_t) lnet->cwnd / (uint64_t) srtt; + } + } + if (t_ssthresh < 1) { + t_ssthresh = 1; + } + if (t_ucwnd_sbw < 1) { + t_ucwnd_sbw = 1; + } + if (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) { + net->ssthresh = (uint32_t) (((uint64_t) 4 * + (uint64_t) net->mtu * + (uint64_t) net->ssthresh) / + (uint64_t) t_ssthresh); + } else { + uint64_t cc_delta; - net->ssthresh = max(net->cwnd / 2, 4 * net->mtu); + srtt = net->lastsa; + /* lastsa>>3; we don't need to divide ... */ + if (srtt == 0) { + srtt = 1; + } + cc_delta = t_ucwnd_sbw * (uint64_t) srtt / 2; + if (cc_delta < t_cwnd) { + net->ssthresh = (uint32_t) ((uint64_t) t_cwnd - cc_delta); + } else { + net->ssthresh = net->mtu; + } + } + if ((net->cwnd > t_cwnd / 2) && + (net->ssthresh < net->cwnd - t_cwnd / 2)) { + net->ssthresh = net->cwnd - t_cwnd / 2; + } + if (net->ssthresh < net->mtu) { + net->ssthresh = net->mtu; + } + } else { + net->ssthresh = max(net->cwnd / 2, 4 * net->mtu); + } net->cwnd = net->mtu; net->partial_bytes_acked = 0; - + SDT_PROBE(sctp, cwnd, net, to, + stcb->asoc.my_vtag, + ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), + net, + old_cwnd, net->cwnd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX); } } -void -sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net) +static void +sctp_cwnd_update_after_ecn_echo_common(struct sctp_tcb *stcb, struct sctp_nets *net, + int in_window, int num_pkt_lost, int use_rtcc) { int old_cwnd = net->cwnd; - SCTP_STAT_INCR(sctps_ecnereducedcwnd); - net->ssthresh = net->cwnd / 2; - if (net->ssthresh < net->mtu) { - net->ssthresh = net->mtu; - /* here back off the timer as well, to slow us down */ - net->RTO <<= 1; - } - net->cwnd = net->ssthresh; - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); + if ((use_rtcc) && (net->lan_type == SCTP_LAN_LOCAL) && (net->cc_mod.rtcc.use_dccc_ecn)) { + /* Data center Congestion Control */ + if (in_window == 0) { + /* + * Go to CA with the cwnd at the point we sent the + * TSN that was marked with a CE. + */ + if (net->ecn_prev_cwnd < net->cwnd) { + /* Restore to prev cwnd */ + net->cwnd = net->ecn_prev_cwnd - (net->mtu * num_pkt_lost); + } else { + /* Just cut in 1/2 */ + net->cwnd /= 2; + } + /* Drop to CA */ + net->ssthresh = net->cwnd - (num_pkt_lost * net->mtu); + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); + } + } else { + /* + * Further tuning down required over the drastic + * orginal cut + */ + net->ssthresh -= (net->mtu * num_pkt_lost); + net->cwnd -= (net->mtu * num_pkt_lost); + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); + } + } + SCTP_STAT_INCR(sctps_ecnereducedcwnd); + } else { + if (in_window == 0) { + SCTP_STAT_INCR(sctps_ecnereducedcwnd); + net->ssthresh = net->cwnd / 2; + if (net->ssthresh < net->mtu) { + net->ssthresh = net->mtu; + /* + * here back off the timer as well, to slow + * us down + */ + net->RTO <<= 1; + } + net->cwnd = net->ssthresh; + SDT_PROBE(sctp, cwnd, net, ecn, + stcb->asoc.my_vtag, + ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), + net, + old_cwnd, net->cwnd); + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); + } + } } + } -void +static void sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb, struct sctp_nets *net, struct sctp_pktdrop_chunk *cp, uint32_t * bottle_bw, uint32_t * on_queue) { uint32_t bw_avail; - int rtt, incr; + int rtt; + unsigned int incr; int old_cwnd = net->cwnd; - /* need real RTT for this calc */ - rtt = ((net->lastsa >> 2) + net->lastsv) >> 1; + /* need real RTT in msd for this calc */ + rtt = net->rtt / 1000; /* get bottle neck bw */ *bottle_bw = ntohl(cp->bottle_bw); /* and whats on queue */ @@ -480,8 +1217,11 @@ sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb, * Take 1/4 of the space left or max burst up .. whichever * is less. */ - incr = min((bw_avail - *on_queue) >> 2, - stcb->asoc.max_burst * net->mtu); + incr = (bw_avail - *on_queue) >> 2; + if ((stcb->asoc.max_burst > 0) && + (stcb->asoc.max_burst * net->mtu < incr)) { + incr = stcb->asoc.max_burst * net->mtu; + } net->cwnd += incr; } if (net->cwnd > bw_avail) { @@ -494,6 +1234,11 @@ sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb, } if (net->cwnd - old_cwnd != 0) { /* log only changes */ + SDT_PROBE(sctp, cwnd, net, pd, + stcb->asoc.my_vtag, + ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), + net, + old_cwnd, net->cwnd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); @@ -501,7 +1246,7 @@ sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb, } } -void +static void sctp_cwnd_update_after_output(struct sctp_tcb *stcb, struct sctp_nets *net, int burst_limit) { @@ -509,34 +1254,258 @@ sctp_cwnd_update_after_output(struct sctp_tcb *stcb, if (net->ssthresh < net->cwnd) net->ssthresh = net->cwnd; - net->cwnd = (net->flight_size + (burst_limit * net->mtu)); + if (burst_limit) { + net->cwnd = (net->flight_size + (burst_limit * net->mtu)); + SDT_PROBE(sctp, cwnd, net, bl, + stcb->asoc.my_vtag, + ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), + net, + old_cwnd, net->cwnd); + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST); + } + } +} - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST); +static void +sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, + struct sctp_association *asoc, + int accum_moved, int reneged_all, int will_exit) +{ + /* Passing a zero argument in last disables the rtcc algoritm */ + sctp_cwnd_update_after_sack_common(stcb, asoc, accum_moved, reneged_all, will_exit, 0); +} + +static void +sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net, + int in_window, int num_pkt_lost) +{ + /* Passing a zero argument in last disables the rtcc algoritm */ + sctp_cwnd_update_after_ecn_echo_common(stcb, net, in_window, num_pkt_lost, 0); +} + +/* Here starts the RTCCVAR type CC invented by RRS which + * is a slight mod to RFC2581. We reuse a common routine or + * two since these algoritms are so close and need to + * remain the same. + */ +static void +sctp_cwnd_update_rtcc_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net, + int in_window, int num_pkt_lost) +{ + sctp_cwnd_update_after_ecn_echo_common(stcb, net, in_window, num_pkt_lost, 1); +} + + +static +void +sctp_cwnd_update_rtcc_tsn_acknowledged(struct sctp_nets *net, + struct sctp_tmit_chunk *tp1) +{ + net->cc_mod.rtcc.bw_bytes += tp1->send_size; +} + +static void +sctp_cwnd_prepare_rtcc_net_for_sack(struct sctp_tcb *stcb SCTP_UNUSED, + struct sctp_nets *net) +{ + if (net->cc_mod.rtcc.tls_needs_set > 0) { + /* We had a bw measurment going on */ + struct timeval ltls; + + SCTP_GETPTIME_TIMEVAL(<ls); + timevalsub(<ls, &net->cc_mod.rtcc.tls); + net->cc_mod.rtcc.new_tot_time = (ltls.tv_sec * 1000000) + ltls.tv_usec; } } -void -sctp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp, - struct sctp_tcb *stcb, struct sctp_nets *net) +static void +sctp_cwnd_new_rtcc_transmission_begins(struct sctp_tcb *stcb, + struct sctp_nets *net) { - int old_cwnd = net->cwnd; + uint64_t vtag, probepoint; + + if (net->cc_mod.rtcc.lbw) { + /* Clear the old bw.. we went to 0 in-flight */ + vtag = (net->rtt << 32) | (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) | + (stcb->rport); + probepoint = (((uint64_t) net->cwnd) << 32); + /* Probe point 8 */ + probepoint |= ((8 << 16) | 0); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | 0), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + net->flight_size, + probepoint); + net->cc_mod.rtcc.lbw_rtt = 0; + net->cc_mod.rtcc.cwnd_at_bw_set = 0; + net->cc_mod.rtcc.lbw = 0; + net->cc_mod.rtcc.bw_bytes_at_last_rttc = 0; + net->cc_mod.rtcc.vol_reduce = 0; + net->cc_mod.rtcc.bw_tot_time = 0; + net->cc_mod.rtcc.bw_bytes = 0; + net->cc_mod.rtcc.tls_needs_set = 0; + if (net->cc_mod.rtcc.steady_step) { + net->cc_mod.rtcc.vol_reduce = 0; + net->cc_mod.rtcc.step_cnt = 0; + net->cc_mod.rtcc.last_step_state = 0; + } + if (net->cc_mod.rtcc.ret_from_eq) { + /* less aggressive one - reset cwnd too */ + uint32_t cwnd_in_mtu, cwnd; - sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED); - /* - * make a small adjustment to cwnd and force to CA. - */ - if (net->cwnd > net->mtu) - /* drop down one MTU after sending */ - net->cwnd -= net->mtu; - if (net->cwnd < net->ssthresh) - /* still in SS move to CA */ - net->ssthresh = net->cwnd - 1; - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR); + cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd); + if (cwnd_in_mtu == 0) { + /* + * Using 0 means that the value of RFC 4960 + * is used. + */ + cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND)); + } else { + /* + * We take the minimum of the burst limit + * and the initial congestion window. + */ + if ((stcb->asoc.max_burst > 0) && (cwnd_in_mtu > stcb->asoc.max_burst)) + cwnd_in_mtu = stcb->asoc.max_burst; + cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu; + } + if (net->cwnd > cwnd) { + /* + * Only set if we are not a timeout (i.e. + * down to 1 mtu) + */ + net->cwnd = cwnd; + } + } + } +} + +static void +sctp_set_rtcc_initial_cc_param(struct sctp_tcb *stcb, + struct sctp_nets *net) +{ + uint64_t vtag, probepoint; + + sctp_set_initial_cc_param(stcb, net); + stcb->asoc.use_precise_time = 1; + probepoint = (((uint64_t) net->cwnd) << 32); + probepoint |= ((9 << 16) | 0); + vtag = (net->rtt << 32) | + (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) | + (stcb->rport); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + 0, + 0, + 0, + probepoint); + net->cc_mod.rtcc.lbw_rtt = 0; + net->cc_mod.rtcc.cwnd_at_bw_set = 0; + net->cc_mod.rtcc.vol_reduce = 0; + net->cc_mod.rtcc.lbw = 0; + net->cc_mod.rtcc.vol_reduce = 0; + net->cc_mod.rtcc.bw_bytes_at_last_rttc = 0; + net->cc_mod.rtcc.bw_tot_time = 0; + net->cc_mod.rtcc.bw_bytes = 0; + net->cc_mod.rtcc.tls_needs_set = 0; + net->cc_mod.rtcc.ret_from_eq = SCTP_BASE_SYSCTL(sctp_rttvar_eqret); + net->cc_mod.rtcc.steady_step = SCTP_BASE_SYSCTL(sctp_steady_step); + net->cc_mod.rtcc.use_dccc_ecn = SCTP_BASE_SYSCTL(sctp_use_dccc_ecn); + net->cc_mod.rtcc.step_cnt = 0; + net->cc_mod.rtcc.last_step_state = 0; + + +} + +static int +sctp_cwnd_rtcc_socket_option(struct sctp_tcb *stcb, int setorget, + struct sctp_cc_option *cc_opt) +{ + struct sctp_nets *net; + + if (setorget == 1) { + /* a set */ + if (cc_opt->option == SCTP_CC_OPT_RTCC_SETMODE) { + if ((cc_opt->aid_value.assoc_value != 0) && + (cc_opt->aid_value.assoc_value != 1)) { + return (EINVAL); + } + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + net->cc_mod.rtcc.ret_from_eq = cc_opt->aid_value.assoc_value; + } + } else if (cc_opt->option == SCTP_CC_OPT_USE_DCCC_ECN) { + if ((cc_opt->aid_value.assoc_value != 0) && + (cc_opt->aid_value.assoc_value != 1)) { + return (EINVAL); + } + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + net->cc_mod.rtcc.use_dccc_ecn = cc_opt->aid_value.assoc_value; + } + } else if (cc_opt->option == SCTP_CC_OPT_STEADY_STEP) { + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + net->cc_mod.rtcc.steady_step = cc_opt->aid_value.assoc_value; + } + } else { + return (EINVAL); + } + } else { + /* a get */ + if (cc_opt->option == SCTP_CC_OPT_RTCC_SETMODE) { + net = TAILQ_FIRST(&stcb->asoc.nets); + if (net == NULL) { + return (EFAULT); + } + cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.ret_from_eq; + } else if (cc_opt->option == SCTP_CC_OPT_USE_DCCC_ECN) { + net = TAILQ_FIRST(&stcb->asoc.nets); + if (net == NULL) { + return (EFAULT); + } + cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.use_dccc_ecn; + } else if (cc_opt->option == SCTP_CC_OPT_STEADY_STEP) { + net = TAILQ_FIRST(&stcb->asoc.nets); + if (net == NULL) { + return (EFAULT); + } + cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.steady_step; + } else { + return (EINVAL); + } + } + return (0); +} + +static void +sctp_cwnd_update_rtcc_packet_transmitted(struct sctp_tcb *stcb SCTP_UNUSED, + struct sctp_nets *net) +{ + if (net->cc_mod.rtcc.tls_needs_set == 0) { + SCTP_GETPTIME_TIMEVAL(&net->cc_mod.rtcc.tls); + net->cc_mod.rtcc.tls_needs_set = 2; } } +static void +sctp_cwnd_update_rtcc_after_sack(struct sctp_tcb *stcb, + struct sctp_association *asoc, + int accum_moved, int reneged_all, int will_exit) +{ + /* Passing a one argument at the last enables the rtcc algoritm */ + sctp_cwnd_update_after_sack_common(stcb, asoc, accum_moved, reneged_all, will_exit, 1); +} + +static void +sctp_rtt_rtcc_calculated(struct sctp_tcb *stcb SCTP_UNUSED, + struct sctp_nets *net, + struct timeval *now SCTP_UNUSED) +{ + net->cc_mod.rtcc.rtt_set_this_sack = 1; +} + +/* Here starts Sally Floyds HS-TCP */ + struct sctp_hs_raise_drop { int32_t cwnd; int32_t increase; @@ -628,9 +1597,7 @@ sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net) cur_val = net->cwnd >> 10; indx = SCTP_HS_TABLE_SIZE - 1; -#ifdef SCTP_DEBUG - printf("HS CC CAlled.\n"); -#endif + if (cur_val < sctp_cwnd_adjust[0].cwnd) { /* normal mode */ if (net->net_ack > net->mtu) { @@ -700,19 +1667,19 @@ sctp_hs_cwnd_decrease(struct sctp_tcb *stcb, struct sctp_nets *net) } } -void +static void sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb, struct sctp_association *asoc) { struct sctp_nets *net; /* - * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) && + * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) && * (net->fast_retran_loss_recovery == 0))) */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if ((asoc->fast_retran_loss_recovery == 0) || - (asoc->sctp_cmt_on_off == 1)) { + (asoc->sctp_cmt_on_off > 0)) { /* out of a RFC2582 Fast recovery window? */ if (net->net_ack > 0) { /* @@ -750,13 +1717,6 @@ sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb, net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; } - /* - * Disable Nonce Sum Checking and store the - * resync tsn - */ - asoc->nonce_sum_check = 0; - asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1; - sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); sctp_timer_start(SCTP_TIMER_TYPE_SEND, @@ -772,10 +1732,10 @@ sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb, } } -void +static void sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb, struct sctp_association *asoc, - int accum_moved, int reneged_all, int will_exit) + int accum_moved, int reneged_all SCTP_UNUSED, int will_exit) { struct sctp_nets *net; @@ -789,49 +1749,12 @@ sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb, * CMT fast recovery code. Need to debug. */ if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { - if (compare_with_wrap(asoc->last_acked_seq, - net->fast_recovery_tsn, MAX_TSN) || - (asoc->last_acked_seq == net->fast_recovery_tsn) || - compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) || - (net->pseudo_cumack == net->fast_recovery_tsn)) { + if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) || + SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) { net->will_exit_fast_recovery = 1; } } #endif - if (SCTP_BASE_SYSCTL(sctp_early_fr)) { - /* - * So, first of all do we need to have a Early FR - * timer running? - */ - if ((!TAILQ_EMPTY(&asoc->sent_queue) && - (net->ref_count > 1) && - (net->flight_size < net->cwnd)) || - (reneged_all)) { - /* - * yes, so in this case stop it if its - * running, and then restart it. Reneging - * all is a special case where we want to - * run the Early FR timer and then force the - * last few unacked to be sent, causing us - * to illicit a sack with gaps to force out - * the others. - */ - if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { - SCTP_STAT_INCR(sctps_earlyfrstpidsck2); - sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, - SCTP_FROM_SCTP_INDATA + SCTP_LOC_20); - } - SCTP_STAT_INCR(sctps_earlyfrstrid); - sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); - } else { - /* No, stop it if its running */ - if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { - SCTP_STAT_INCR(sctps_earlyfrstpidsck3); - sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, - SCTP_FROM_SCTP_INDATA + SCTP_LOC_21); - } - } - } /* if nothing was acked on this destination skip it */ if (net->net_ack == 0) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { @@ -839,56 +1762,14 @@ sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb, } continue; } - if (net->net_ack2 > 0) { - /* - * Karn's rule applies to clearing error count, this - * is optional. - */ - net->error_count = 0; - if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == - SCTP_ADDR_NOT_REACHABLE) { - /* addr came good */ - net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; - net->dest_state |= SCTP_ADDR_REACHABLE; - sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, - SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED); - /* now was it the primary? if so restore */ - if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) { - (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net); - } - } - /* - * JRS 5/14/07 - If CMT PF is on and the destination - * is in PF state, set the destination to active - * state and set the cwnd to one or two MTU's based - * on whether PF1 or PF2 is being used. - * - * Should we stop any running T3 timer here? - */ - if ((asoc->sctp_cmt_on_off == 1) && - (asoc->sctp_cmt_pf > 0) && - ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) { - net->dest_state &= ~SCTP_ADDR_PF; - net->cwnd = net->mtu * asoc->sctp_cmt_pf; - SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", - net, net->cwnd); - /* - * Since the cwnd value is explicitly set, - * skip the code that updates the cwnd - * value. - */ - goto skip_cwnd_update; - } - } #ifdef JANA_CMT_FAST_RECOVERY /* * CMT fast recovery code */ /* - * if (sctp_cmt_on_off == 1 && - * net->fast_retran_loss_recovery && - * net->will_exit_fast_recovery == 0) { @@@ Do something } - * else if (sctp_cmt_on_off == 0 && + * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery + * && net->will_exit_fast_recovery == 0) { @@@ Do something + * } else if (sctp_cmt_on_off == 0 && * asoc->fast_retran_loss_recovery && will_exit == 0) { */ #endif @@ -900,14 +1781,14 @@ sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb, * If we are in loss recovery we skip any cwnd * update */ - goto skip_cwnd_update; + return; } /* * CMT: CUC algorithm. Update cwnd if pseudo-cumack has * moved. */ if (accum_moved || - ((asoc->sctp_cmt_on_off == 1) && net->new_pseudo_cumack)) { + ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) { /* If the cumulative ack moved we can proceed */ if (net->cwnd <= net->ssthresh) { /* We are in slow start */ @@ -945,23 +1826,6 @@ sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb, SCTP_CWND_LOG_NO_CUMACK); } } -skip_cwnd_update: - /* - * NOW, according to Karn's rule do we need to restore the - * RTO timer back? Check our net_ack2. If not set then we - * have a ambiguity.. i.e. all data ack'd was sent to more - * than one place. - */ - if (net->net_ack2) { - /* restore any doubled timers */ - net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1; - if (net->RTO < stcb->asoc.minrto) { - net->RTO = stcb->asoc.minrto; - } - if (net->RTO > stcb->asoc.maxrto) { - net->RTO = stcb->asoc.maxrto; - } - } } } @@ -981,19 +1845,19 @@ static int use_bandwidth_switch = 1; static inline int between(uint32_t seq1, uint32_t seq2, uint32_t seq3) { - return seq3 - seq2 >= seq1 - seq2; + return (seq3 - seq2 >= seq1 - seq2); } static inline uint32_t htcp_cong_time(struct htcp *ca) { - return sctp_get_tick_count() - ca->last_cong; + return (sctp_get_tick_count() - ca->last_cong); } static inline uint32_t htcp_ccount(struct htcp *ca) { - return htcp_cong_time(ca) / ca->minRTT; + return (htcp_cong_time(ca) / ca->minRTT); } static inline void @@ -1010,39 +1874,39 @@ htcp_reset(struct htcp *ca) static uint32_t htcp_cwnd_undo(struct sctp_tcb *stcb, struct sctp_nets *net) { - net->htcp_ca.last_cong = net->htcp_ca.undo_last_cong; - net->htcp_ca.maxRTT = net->htcp_ca.undo_maxRTT; - net->htcp_ca.old_maxB = net->htcp_ca.undo_old_maxB; - return max(net->cwnd, ((net->ssthresh / net->mtu << 7) / net->htcp_ca.beta) * net->mtu); + net->cc_mod.htcp_ca.last_cong = net->cc_mod.htcp_ca.undo_last_cong; + net->cc_mod.htcp_ca.maxRTT = net->cc_mod.htcp_ca.undo_maxRTT; + net->cc_mod.htcp_ca.old_maxB = net->cc_mod.htcp_ca.undo_old_maxB; + return (max(net->cwnd, ((net->ssthresh / net->mtu << 7) / net->cc_mod.htcp_ca.beta) * net->mtu)); } #endif static inline void -measure_rtt(struct sctp_tcb *stcb, struct sctp_nets *net) +measure_rtt(struct sctp_nets *net) { - uint32_t srtt = net->lastsa >> 3; + uint32_t srtt = net->lastsa >> SCTP_RTT_SHIFT; /* keep track of minimum RTT seen so far, minRTT is zero at first */ - if (net->htcp_ca.minRTT > srtt || !net->htcp_ca.minRTT) - net->htcp_ca.minRTT = srtt; + if (net->cc_mod.htcp_ca.minRTT > srtt || !net->cc_mod.htcp_ca.minRTT) + net->cc_mod.htcp_ca.minRTT = srtt; /* max RTT */ - if (net->fast_retran_ip == 0 && net->ssthresh < 0xFFFF && htcp_ccount(&net->htcp_ca) > 3) { - if (net->htcp_ca.maxRTT < net->htcp_ca.minRTT) - net->htcp_ca.maxRTT = net->htcp_ca.minRTT; - if (net->htcp_ca.maxRTT < srtt && srtt <= net->htcp_ca.maxRTT + MSEC_TO_TICKS(20)) - net->htcp_ca.maxRTT = srtt; + if (net->fast_retran_ip == 0 && net->ssthresh < 0xFFFF && htcp_ccount(&net->cc_mod.htcp_ca) > 3) { + if (net->cc_mod.htcp_ca.maxRTT < net->cc_mod.htcp_ca.minRTT) + net->cc_mod.htcp_ca.maxRTT = net->cc_mod.htcp_ca.minRTT; + if (net->cc_mod.htcp_ca.maxRTT < srtt && srtt <= net->cc_mod.htcp_ca.maxRTT + MSEC_TO_TICKS(20)) + net->cc_mod.htcp_ca.maxRTT = srtt; } } static void -measure_achieved_throughput(struct sctp_tcb *stcb, struct sctp_nets *net) +measure_achieved_throughput(struct sctp_nets *net) { uint32_t now = sctp_get_tick_count(); if (net->fast_retran_ip == 0) - net->htcp_ca.bytes_acked = net->net_ack; + net->cc_mod.htcp_ca.bytes_acked = net->net_ack; if (!use_bandwidth_switch) return; @@ -1050,29 +1914,28 @@ measure_achieved_throughput(struct sctp_tcb *stcb, struct sctp_nets *net) /* achieved throughput calculations */ /* JRS - not 100% sure of this statement */ if (net->fast_retran_ip == 1) { - net->htcp_ca.bytecount = 0; - net->htcp_ca.lasttime = now; + net->cc_mod.htcp_ca.bytecount = 0; + net->cc_mod.htcp_ca.lasttime = now; return; } - net->htcp_ca.bytecount += net->net_ack; - - if (net->htcp_ca.bytecount >= net->cwnd - ((net->htcp_ca.alpha >> 7 ? : 1) * net->mtu) - && now - net->htcp_ca.lasttime >= net->htcp_ca.minRTT - && net->htcp_ca.minRTT > 0) { - uint32_t cur_Bi = net->htcp_ca.bytecount / net->mtu * hz / (now - net->htcp_ca.lasttime); + net->cc_mod.htcp_ca.bytecount += net->net_ack; + if ((net->cc_mod.htcp_ca.bytecount >= net->cwnd - (((net->cc_mod.htcp_ca.alpha >> 7) ? (net->cc_mod.htcp_ca.alpha >> 7) : 1) * net->mtu)) && + (now - net->cc_mod.htcp_ca.lasttime >= net->cc_mod.htcp_ca.minRTT) && + (net->cc_mod.htcp_ca.minRTT > 0)) { + uint32_t cur_Bi = net->cc_mod.htcp_ca.bytecount / net->mtu * hz / (now - net->cc_mod.htcp_ca.lasttime); - if (htcp_ccount(&net->htcp_ca) <= 3) { + if (htcp_ccount(&net->cc_mod.htcp_ca) <= 3) { /* just after backoff */ - net->htcp_ca.minB = net->htcp_ca.maxB = net->htcp_ca.Bi = cur_Bi; + net->cc_mod.htcp_ca.minB = net->cc_mod.htcp_ca.maxB = net->cc_mod.htcp_ca.Bi = cur_Bi; } else { - net->htcp_ca.Bi = (3 * net->htcp_ca.Bi + cur_Bi) / 4; - if (net->htcp_ca.Bi > net->htcp_ca.maxB) - net->htcp_ca.maxB = net->htcp_ca.Bi; - if (net->htcp_ca.minB > net->htcp_ca.maxB) - net->htcp_ca.minB = net->htcp_ca.maxB; + net->cc_mod.htcp_ca.Bi = (3 * net->cc_mod.htcp_ca.Bi + cur_Bi) / 4; + if (net->cc_mod.htcp_ca.Bi > net->cc_mod.htcp_ca.maxB) + net->cc_mod.htcp_ca.maxB = net->cc_mod.htcp_ca.Bi; + if (net->cc_mod.htcp_ca.minB > net->cc_mod.htcp_ca.maxB) + net->cc_mod.htcp_ca.minB = net->cc_mod.htcp_ca.maxB; } - net->htcp_ca.bytecount = 0; - net->htcp_ca.lasttime = now; + net->cc_mod.htcp_ca.bytecount = 0; + net->cc_mod.htcp_ca.lasttime = now; } } @@ -1137,27 +2000,27 @@ htcp_alpha_update(struct htcp *ca) * were getting just too full now). */ static void -htcp_param_update(struct sctp_tcb *stcb, struct sctp_nets *net) +htcp_param_update(struct sctp_nets *net) { - uint32_t minRTT = net->htcp_ca.minRTT; - uint32_t maxRTT = net->htcp_ca.maxRTT; + uint32_t minRTT = net->cc_mod.htcp_ca.minRTT; + uint32_t maxRTT = net->cc_mod.htcp_ca.maxRTT; - htcp_beta_update(&net->htcp_ca, minRTT, maxRTT); - htcp_alpha_update(&net->htcp_ca); + htcp_beta_update(&net->cc_mod.htcp_ca, minRTT, maxRTT); + htcp_alpha_update(&net->cc_mod.htcp_ca); /* * add slowly fading memory for maxRTT to accommodate routing * changes etc */ if (minRTT > 0 && maxRTT > minRTT) - net->htcp_ca.maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100; + net->cc_mod.htcp_ca.maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100; } static uint32_t -htcp_recalc_ssthresh(struct sctp_tcb *stcb, struct sctp_nets *net) +htcp_recalc_ssthresh(struct sctp_nets *net) { - htcp_param_update(stcb, net); - return max(((net->cwnd / net->mtu * net->htcp_ca.beta) >> 7) * net->mtu, 2U * net->mtu); + htcp_param_update(net); + return (max(((net->cwnd / net->mtu * net->cc_mod.htcp_ca.beta) >> 7) * net->mtu, 2U * net->mtu)); } static void @@ -1191,21 +2054,21 @@ htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net) } } } else { - measure_rtt(stcb, net); + measure_rtt(net); /* * In dangerous area, increase slowly. In theory this is * net->cwnd += alpha / net->cwnd */ /* What is snd_cwnd_cnt?? */ - if (((net->partial_bytes_acked / net->mtu * net->htcp_ca.alpha) >> 7) * net->mtu >= net->cwnd) { + if (((net->partial_bytes_acked / net->mtu * net->cc_mod.htcp_ca.alpha) >> 7) * net->mtu >= net->cwnd) { /*- * Does SCTP have a cwnd clamp? * if (net->snd_cwnd < net->snd_cwnd_clamp) - Nope (RRS). */ net->cwnd += net->mtu; net->partial_bytes_acked = 0; - htcp_alpha_update(&net->htcp_ca); + htcp_alpha_update(&net->cc_mod.htcp_ca); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_CA); @@ -1218,7 +2081,7 @@ htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net) } } - net->htcp_ca.bytes_acked = net->mtu; + net->cc_mod.htcp_ca.bytes_acked = net->mtu; } } @@ -1227,22 +2090,22 @@ htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net) static uint32_t htcp_min_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net) { - return net->ssthresh; + return (net->ssthresh); } #endif static void -htcp_init(struct sctp_tcb *stcb, struct sctp_nets *net) +htcp_init(struct sctp_nets *net) { - memset(&net->htcp_ca, 0, sizeof(struct htcp)); - net->htcp_ca.alpha = ALPHA_BASE; - net->htcp_ca.beta = BETA_MIN; - net->htcp_ca.bytes_acked = net->mtu; - net->htcp_ca.last_cong = sctp_get_tick_count(); + memset(&net->cc_mod.htcp_ca, 0, sizeof(struct htcp)); + net->cc_mod.htcp_ca.alpha = ALPHA_BASE; + net->cc_mod.htcp_ca.beta = BETA_MIN; + net->cc_mod.htcp_ca.bytes_acked = net->mtu; + net->cc_mod.htcp_ca.last_cong = sctp_get_tick_count(); } -void +static void sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net) { /* @@ -1251,17 +2114,17 @@ sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net) */ net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND)); net->ssthresh = stcb->asoc.peers_rwnd; - htcp_init(stcb, net); + htcp_init(net); if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) { sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION); } } -void +static void sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb, struct sctp_association *asoc, - int accum_moved, int reneged_all, int will_exit) + int accum_moved, int reneged_all SCTP_UNUSED, int will_exit) { struct sctp_nets *net; @@ -1275,49 +2138,12 @@ sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb, * CMT fast recovery code. Need to debug. */ if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { - if (compare_with_wrap(asoc->last_acked_seq, - net->fast_recovery_tsn, MAX_TSN) || - (asoc->last_acked_seq == net->fast_recovery_tsn) || - compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) || - (net->pseudo_cumack == net->fast_recovery_tsn)) { + if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) || + SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) { net->will_exit_fast_recovery = 1; } } #endif - if (SCTP_BASE_SYSCTL(sctp_early_fr)) { - /* - * So, first of all do we need to have a Early FR - * timer running? - */ - if ((!TAILQ_EMPTY(&asoc->sent_queue) && - (net->ref_count > 1) && - (net->flight_size < net->cwnd)) || - (reneged_all)) { - /* - * yes, so in this case stop it if its - * running, and then restart it. Reneging - * all is a special case where we want to - * run the Early FR timer and then force the - * last few unacked to be sent, causing us - * to illicit a sack with gaps to force out - * the others. - */ - if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { - SCTP_STAT_INCR(sctps_earlyfrstpidsck2); - sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, - SCTP_FROM_SCTP_INDATA + SCTP_LOC_20); - } - SCTP_STAT_INCR(sctps_earlyfrstrid); - sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); - } else { - /* No, stop it if its running */ - if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { - SCTP_STAT_INCR(sctps_earlyfrstpidsck3); - sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, - SCTP_FROM_SCTP_INDATA + SCTP_LOC_21); - } - } - } /* if nothing was acked on this destination skip it */ if (net->net_ack == 0) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { @@ -1325,56 +2151,14 @@ sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb, } continue; } - if (net->net_ack2 > 0) { - /* - * Karn's rule applies to clearing error count, this - * is optional. - */ - net->error_count = 0; - if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == - SCTP_ADDR_NOT_REACHABLE) { - /* addr came good */ - net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; - net->dest_state |= SCTP_ADDR_REACHABLE; - sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, - SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED); - /* now was it the primary? if so restore */ - if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) { - (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net); - } - } - /* - * JRS 5/14/07 - If CMT PF is on and the destination - * is in PF state, set the destination to active - * state and set the cwnd to one or two MTU's based - * on whether PF1 or PF2 is being used. - * - * Should we stop any running T3 timer here? - */ - if ((asoc->sctp_cmt_on_off == 1) && - (asoc->sctp_cmt_pf > 0) && - ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) { - net->dest_state &= ~SCTP_ADDR_PF; - net->cwnd = net->mtu * asoc->sctp_cmt_pf; - SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", - net, net->cwnd); - /* - * Since the cwnd value is explicitly set, - * skip the code that updates the cwnd - * value. - */ - goto skip_cwnd_update; - } - } #ifdef JANA_CMT_FAST_RECOVERY /* * CMT fast recovery code */ /* - * if (sctp_cmt_on_off == 1 && - * net->fast_retran_loss_recovery && - * net->will_exit_fast_recovery == 0) { @@@ Do something } - * else if (sctp_cmt_on_off == 0 && + * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery + * && net->will_exit_fast_recovery == 0) { @@@ Do something + * } else if (sctp_cmt_on_off == 0 && * asoc->fast_retran_loss_recovery && will_exit == 0) { */ #endif @@ -1386,55 +2170,38 @@ sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb, * If we are in loss recovery we skip any cwnd * update */ - goto skip_cwnd_update; + return; } /* * CMT: CUC algorithm. Update cwnd if pseudo-cumack has * moved. */ if (accum_moved || - ((asoc->sctp_cmt_on_off == 1) && net->new_pseudo_cumack)) { + ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) { htcp_cong_avoid(stcb, net); - measure_achieved_throughput(stcb, net); + measure_achieved_throughput(net); } else { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_NO_CUMACK); } } -skip_cwnd_update: - /* - * NOW, according to Karn's rule do we need to restore the - * RTO timer back? Check our net_ack2. If not set then we - * have a ambiguity.. i.e. all data ack'd was sent to more - * than one place. - */ - if (net->net_ack2) { - /* restore any doubled timers */ - net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1; - if (net->RTO < stcb->asoc.minrto) { - net->RTO = stcb->asoc.minrto; - } - if (net->RTO > stcb->asoc.maxrto) { - net->RTO = stcb->asoc.maxrto; - } - } } } -void +static void sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb, struct sctp_association *asoc) { struct sctp_nets *net; /* - * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) && + * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) && * (net->fast_retran_loss_recovery == 0))) */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if ((asoc->fast_retran_loss_recovery == 0) || - (asoc->sctp_cmt_on_off == 1)) { + (asoc->sctp_cmt_on_off > 0)) { /* out of a RFC2582 Fast recovery window? */ if (net->net_ack > 0) { /* @@ -1447,8 +2214,8 @@ sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb, int old_cwnd = net->cwnd; /* JRS - reset as if state were changed */ - htcp_reset(&net->htcp_ca); - net->ssthresh = htcp_recalc_ssthresh(stcb, net); + htcp_reset(&net->cc_mod.htcp_ca); + net->ssthresh = htcp_recalc_ssthresh(net); net->cwnd = net->ssthresh; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), @@ -1479,13 +2246,6 @@ sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb, net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; } - /* - * Disable Nonce Sum Checking and store the - * resync tsn - */ - asoc->nonce_sum_check = 0; - asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1; - sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); sctp_timer_start(SCTP_TIMER_TYPE_SEND, @@ -1501,15 +2261,15 @@ sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb, } } -void +static void sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb, struct sctp_nets *net) { int old_cwnd = net->cwnd; /* JRS - reset as if the state were being changed to timeout */ - htcp_reset(&net->htcp_ca); - net->ssthresh = htcp_recalc_ssthresh(stcb, net); + htcp_reset(&net->cc_mod.htcp_ca); + net->ssthresh = htcp_recalc_ssthresh(net); net->cwnd = net->mtu; net->partial_bytes_acked = 0; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { @@ -1517,49 +2277,76 @@ sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb, } } -void -sctp_htcp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp, - struct sctp_tcb *stcb, struct sctp_nets *net) -{ - int old_cwnd; - - old_cwnd = net->cwnd; - - sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED); - net->htcp_ca.last_cong = sctp_get_tick_count(); - /* - * make a small adjustment to cwnd and force to CA. - */ - if (net->cwnd > net->mtu) - /* drop down one MTU after sending */ - net->cwnd -= net->mtu; - if (net->cwnd < net->ssthresh) - /* still in SS move to CA */ - net->ssthresh = net->cwnd - 1; - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR); - } -} - -void +static void sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, - struct sctp_nets *net) + struct sctp_nets *net, int in_window, int num_pkt_lost SCTP_UNUSED) { int old_cwnd; old_cwnd = net->cwnd; /* JRS - reset hctp as if state changed */ - htcp_reset(&net->htcp_ca); - SCTP_STAT_INCR(sctps_ecnereducedcwnd); - net->ssthresh = htcp_recalc_ssthresh(stcb, net); - if (net->ssthresh < net->mtu) { - net->ssthresh = net->mtu; - /* here back off the timer as well, to slow us down */ - net->RTO <<= 1; - } - net->cwnd = net->ssthresh; - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); + if (in_window == 0) { + htcp_reset(&net->cc_mod.htcp_ca); + SCTP_STAT_INCR(sctps_ecnereducedcwnd); + net->ssthresh = htcp_recalc_ssthresh(net); + if (net->ssthresh < net->mtu) { + net->ssthresh = net->mtu; + /* here back off the timer as well, to slow us down */ + net->RTO <<= 1; + } + net->cwnd = net->ssthresh; + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); + } } } + +struct sctp_cc_functions sctp_cc_functions[] = { + { + .sctp_set_initial_cc_param = sctp_set_initial_cc_param, + .sctp_cwnd_update_after_sack = sctp_cwnd_update_after_sack, + .sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common, + .sctp_cwnd_update_after_fr = sctp_cwnd_update_after_fr, + .sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout, + .sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_after_ecn_echo, + .sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped, + .sctp_cwnd_update_after_output = sctp_cwnd_update_after_output, + }, + { + .sctp_set_initial_cc_param = sctp_set_initial_cc_param, + .sctp_cwnd_update_after_sack = sctp_hs_cwnd_update_after_sack, + .sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common, + .sctp_cwnd_update_after_fr = sctp_hs_cwnd_update_after_fr, + .sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout, + .sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_after_ecn_echo, + .sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped, + .sctp_cwnd_update_after_output = sctp_cwnd_update_after_output, + }, + { + .sctp_set_initial_cc_param = sctp_htcp_set_initial_cc_param, + .sctp_cwnd_update_after_sack = sctp_htcp_cwnd_update_after_sack, + .sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common, + .sctp_cwnd_update_after_fr = sctp_htcp_cwnd_update_after_fr, + .sctp_cwnd_update_after_timeout = sctp_htcp_cwnd_update_after_timeout, + .sctp_cwnd_update_after_ecn_echo = sctp_htcp_cwnd_update_after_ecn_echo, + .sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped, + .sctp_cwnd_update_after_output = sctp_cwnd_update_after_output, + }, + { + .sctp_set_initial_cc_param = sctp_set_rtcc_initial_cc_param, + .sctp_cwnd_update_after_sack = sctp_cwnd_update_rtcc_after_sack, + .sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common, + .sctp_cwnd_update_after_fr = sctp_cwnd_update_after_fr, + .sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout, + .sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_rtcc_after_ecn_echo, + .sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped, + .sctp_cwnd_update_after_output = sctp_cwnd_update_after_output, + .sctp_cwnd_update_packet_transmitted = sctp_cwnd_update_rtcc_packet_transmitted, + .sctp_cwnd_update_tsn_acknowledged = sctp_cwnd_update_rtcc_tsn_acknowledged, + .sctp_cwnd_new_transmission_begins = sctp_cwnd_new_rtcc_transmission_begins, + .sctp_cwnd_prepare_net_for_sack = sctp_cwnd_prepare_rtcc_net_for_sack, + .sctp_cwnd_socket_option = sctp_cwnd_rtcc_socket_option, + .sctp_rtt_calculated = sctp_rtt_rtcc_calculated + } +}; |