diff options
Diffstat (limited to 'freebsd/sys/netinet/tcp_timer.c')
-rw-r--r-- | freebsd/sys/netinet/tcp_timer.c | 197 |
1 files changed, 134 insertions, 63 deletions
diff --git a/freebsd/sys/netinet/tcp_timer.c b/freebsd/sys/netinet/tcp_timer.c index 422e5122..c50af2bb 100644 --- a/freebsd/sys/netinet/tcp_timer.c +++ b/freebsd/sys/netinet/tcp_timer.c @@ -73,6 +73,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/tcp_log_buf.h> #include <netinet/tcp_timer.h> #include <netinet/tcp_var.h> +#include <netinet/tcp_seq.h> #include <netinet/cc/cc.h> #ifdef INET6 #include <netinet6/tcp6_var.h> @@ -141,7 +142,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0, /* max idle probes */ int tcp_maxpersistidle; -static int tcp_rexmit_drop_options = 0; +int tcp_rexmit_drop_options = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW, &tcp_rexmit_drop_options, 0, "Drop TCP options from 3rd and later retransmitted SYN"); @@ -176,18 +177,13 @@ static int per_cpu_timers = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW, &per_cpu_timers , 0, "run tcp timers on all cpus"); -#if 0 -#define INP_CPU(inp) (per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \ - ((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0) -#endif - /* * Map the given inp to a CPU id. * * This queries RSS if it's compiled in, else it defaults to the current * CPU ID. */ -static inline int +inline int inp_to_cpuid(struct inpcb *inp) { u_int cpuid; @@ -245,7 +241,7 @@ int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = int tcp_backoff[TCP_MAXRXTSHIFT + 1] = { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; -static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ +int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ /* * TCP timer processing. @@ -280,55 +276,10 @@ tcp_timer_delack(void *xtp) CURVNET_RESTORE(); } -/* - * When a timer wants to remove a TCB it must - * hold the INP_INFO_RLOCK(). The timer function - * should only have grabbed the INP_WLOCK() when - * it entered. To safely switch to holding both the - * INP_INFO_RLOCK() and the INP_WLOCK() we must first - * grab a reference on the inp, which will hold the inp - * so that it can't be removed. We then unlock the INP_WLOCK(), - * and grab the INP_INFO_RLOCK() lock. Once we have the INP_INFO_RLOCK() - * we proceed again to get the INP_WLOCK() (this preserves proper - * lock order). After acquiring the INP_WLOCK we must check if someone - * else deleted the pcb i.e. the inp_flags check. - * If so we return 1 otherwise we return 0. - * - * No matter what the tcp_inpinfo_lock_add() function - * returns the caller must afterwards call tcp_inpinfo_lock_del() - * to drop the locks and reference properly. - */ - -int -tcp_inpinfo_lock_add(struct inpcb *inp) -{ - in_pcbref(inp); - INP_WUNLOCK(inp); - INP_INFO_RLOCK(&V_tcbinfo); - INP_WLOCK(inp); - if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { - return(1); - } - return(0); - -} - void tcp_inpinfo_lock_del(struct inpcb *inp, struct tcpcb *tp) { - INP_INFO_RUNLOCK(&V_tcbinfo); - if (inp && (tp == NULL)) { - /* - * If tcp_close/drop() gets called and tp - * returns NULL, then the function dropped - * the inp lock, we hold a reference keeping - * this around, so we must re-aquire the - * INP_WLOCK() in order to proceed with - * our dropping the inp reference. - */ - INP_WLOCK(inp); - } - if (inp && in_pcbrele_wlocked(inp) == 0) + if (inp && tp != NULL) INP_WUNLOCK(inp); } @@ -337,6 +288,7 @@ tcp_timer_2msl(void *xtp) { struct tcpcb *tp = xtp; struct inpcb *inp; + struct epoch_tracker et; CURVNET_SET(tp->t_vnet); #ifdef TCPDEBUG int ostate; @@ -383,11 +335,13 @@ tcp_timer_2msl(void *xtp) tp->t_inpcb && tp->t_inpcb->inp_socket && (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { TCPSTAT_INC(tcps_finwait2_drops); - if (tcp_inpinfo_lock_add(inp)) { + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { tcp_inpinfo_lock_del(inp, tp); goto out; } + INP_INFO_RLOCK_ET(&V_tcbinfo, et); tp = tcp_close(tp); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); tcp_inpinfo_lock_del(inp, tp); goto out; } else { @@ -395,15 +349,17 @@ tcp_timer_2msl(void *xtp) callout_reset(&tp->t_timers->tt_2msl, TP_KEEPINTVL(tp), tcp_timer_2msl, tp); } else { - if (tcp_inpinfo_lock_add(inp)) { + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { tcp_inpinfo_lock_del(inp, tp); goto out; } + INP_INFO_RLOCK_ET(&V_tcbinfo, et); tp = tcp_close(tp); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); tcp_inpinfo_lock_del(inp, tp); goto out; } - } + } #ifdef TCPDEBUG if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) @@ -424,6 +380,7 @@ tcp_timer_keep(void *xtp) struct tcpcb *tp = xtp; struct tcptemp *t_template; struct inpcb *inp; + struct epoch_tracker et; CURVNET_SET(tp->t_vnet); #ifdef TCPDEBUG int ostate; @@ -517,11 +474,11 @@ tcp_timer_keep(void *xtp) dropit: TCPSTAT_INC(tcps_keepdrops); - - if (tcp_inpinfo_lock_add(inp)) { + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { tcp_inpinfo_lock_del(inp, tp); goto out; } + INP_INFO_RLOCK_ET(&V_tcbinfo, et); tp = tcp_drop(tp, ETIMEDOUT); #ifdef TCPDEBUG @@ -530,8 +487,9 @@ dropit: PRU_SLOWTIMO); #endif TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); tcp_inpinfo_lock_del(inp, tp); -out: + out: CURVNET_RESTORE(); } @@ -540,6 +498,7 @@ tcp_timer_persist(void *xtp) { struct tcpcb *tp = xtp; struct inpcb *inp; + struct epoch_tracker et; CURVNET_SET(tp->t_vnet); #ifdef TCPDEBUG int ostate; @@ -579,11 +538,13 @@ tcp_timer_persist(void *xtp) (ticks - tp->t_rcvtime >= tcp_maxpersistidle || ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { TCPSTAT_INC(tcps_persistdrop); - if (tcp_inpinfo_lock_add(inp)) { + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { tcp_inpinfo_lock_del(inp, tp); goto out; } + INP_INFO_RLOCK_ET(&V_tcbinfo, et); tp = tcp_drop(tp, ETIMEDOUT); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); tcp_inpinfo_lock_del(inp, tp); goto out; } @@ -594,11 +555,13 @@ tcp_timer_persist(void *xtp) if (tp->t_state > TCPS_CLOSE_WAIT && (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { TCPSTAT_INC(tcps_persistdrop); - if (tcp_inpinfo_lock_add(inp)) { + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { tcp_inpinfo_lock_del(inp, tp); goto out; } + INP_INFO_RLOCK_ET(&V_tcbinfo, et); tp = tcp_drop(tp, ETIMEDOUT); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); tcp_inpinfo_lock_del(inp, tp); goto out; } @@ -624,6 +587,7 @@ tcp_timer_rexmt(void * xtp) CURVNET_SET(tp->t_vnet); int rexmt; struct inpcb *inp; + struct epoch_tracker et; #ifdef TCPDEBUG int ostate; @@ -660,11 +624,13 @@ tcp_timer_rexmt(void * xtp) if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { tp->t_rxtshift = TCP_MAXRXTSHIFT; TCPSTAT_INC(tcps_timeoutdrop); - if (tcp_inpinfo_lock_add(inp)) { + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { tcp_inpinfo_lock_del(inp, tp); goto out; } + INP_INFO_RLOCK_ET(&V_tcbinfo, et); tp = tcp_drop(tp, ETIMEDOUT); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); tcp_inpinfo_lock_del(inp, tp); goto out; } @@ -950,6 +916,111 @@ tcp_timer_active(struct tcpcb *tp, uint32_t timer_type) return callout_active(t_callout); } +/* + * Stop the timer from running, and apply a flag + * against the timer_flags that will force the + * timer never to run. The flag is needed to assure + * a race does not leave it running and cause + * the timer to possibly restart itself (keep and persist + * especially do this). + */ +int +tcp_timer_suspend(struct tcpcb *tp, uint32_t timer_type) +{ + struct callout *t_callout; + uint32_t t_flags; + + switch (timer_type) { + case TT_DELACK: + t_flags = TT_DELACK_SUS; + t_callout = &tp->t_timers->tt_delack; + break; + case TT_REXMT: + t_flags = TT_REXMT_SUS; + t_callout = &tp->t_timers->tt_rexmt; + break; + case TT_PERSIST: + t_flags = TT_PERSIST_SUS; + t_callout = &tp->t_timers->tt_persist; + break; + case TT_KEEP: + t_flags = TT_KEEP_SUS; + t_callout = &tp->t_timers->tt_keep; + break; + case TT_2MSL: + t_flags = TT_2MSL_SUS; + t_callout = &tp->t_timers->tt_2msl; + break; + default: + panic("tp:%p bad timer_type 0x%x", tp, timer_type); + } + tp->t_timers->tt_flags |= t_flags; + return (callout_stop(t_callout)); +} + +void +tcp_timers_unsuspend(struct tcpcb *tp, uint32_t timer_type) +{ + switch (timer_type) { + case TT_DELACK: + if (tp->t_timers->tt_flags & TT_DELACK_SUS) { + tp->t_timers->tt_flags &= ~TT_DELACK_SUS; + if (tp->t_flags & TF_DELACK) { + /* Delayed ack timer should be up activate a timer */ + tp->t_flags &= ~TF_DELACK; + tcp_timer_activate(tp, TT_DELACK, + tcp_delacktime); + } + } + break; + case TT_REXMT: + if (tp->t_timers->tt_flags & TT_REXMT_SUS) { + tp->t_timers->tt_flags &= ~TT_REXMT_SUS; + if (SEQ_GT(tp->snd_max, tp->snd_una) && + (tcp_timer_active((tp), TT_PERSIST) == 0) && + tp->snd_wnd) { + /* We have outstanding data activate a timer */ + tcp_timer_activate(tp, TT_REXMT, + tp->t_rxtcur); + } + } + break; + case TT_PERSIST: + if (tp->t_timers->tt_flags & TT_PERSIST_SUS) { + tp->t_timers->tt_flags &= ~TT_PERSIST_SUS; + if (tp->snd_wnd == 0) { + /* Activate the persists timer */ + tp->t_rxtshift = 0; + tcp_setpersist(tp); + } + } + break; + case TT_KEEP: + if (tp->t_timers->tt_flags & TT_KEEP_SUS) { + tp->t_timers->tt_flags &= ~TT_KEEP_SUS; + tcp_timer_activate(tp, TT_KEEP, + TCPS_HAVEESTABLISHED(tp->t_state) ? + TP_KEEPIDLE(tp) : TP_KEEPINIT(tp)); + } + break; + case TT_2MSL: + if (tp->t_timers->tt_flags &= TT_2MSL_SUS) { + tp->t_timers->tt_flags &= ~TT_2MSL_SUS; + if ((tp->t_state == TCPS_FIN_WAIT_2) && + ((tp->t_inpcb->inp_socket == NULL) || + (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE))) { + /* Star the 2MSL timer */ + tcp_timer_activate(tp, TT_2MSL, + (tcp_fast_finwait2_recycle) ? + tcp_finwait2_timeout : TP_MAXIDLE(tp)); + } + } + break; + default: + panic("tp:%p bad timer_type 0x%x", tp, timer_type); + } +} + void tcp_timer_stop(struct tcpcb *tp, uint32_t timer_type) { |