diff options
Diffstat (limited to 'freebsd/sys/netinet/tcp_subr.c')
-rw-r--r-- | freebsd/sys/netinet/tcp_subr.c | 165 |
1 files changed, 102 insertions, 63 deletions
diff --git a/freebsd/sys/netinet/tcp_subr.c b/freebsd/sys/netinet/tcp_subr.c index 787213b0..4852ffaf 100644 --- a/freebsd/sys/netinet/tcp_subr.c +++ b/freebsd/sys/netinet/tcp_subr.c @@ -216,13 +216,13 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0, SYSCTL_UINT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcbinfo.ipi_count), 0, "Number of active PCBs"); -static VNET_DEFINE(int, icmp_may_rst) = 1; +VNET_DEFINE_STATIC(int, icmp_may_rst) = 1; #define V_icmp_may_rst VNET(icmp_may_rst) SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp_may_rst), 0, "Certain ICMP unreachable messages may abort connections in SYN_SENT"); -static VNET_DEFINE(int, tcp_isn_reseed_interval) = 0; +VNET_DEFINE_STATIC(int, tcp_isn_reseed_interval) = 0; #define V_tcp_isn_reseed_interval VNET(tcp_isn_reseed_interval) SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_isn_reseed_interval), 0, @@ -239,6 +239,10 @@ VNET_DEFINE(uma_zone_t, sack_hole_zone); VNET_DEFINE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST+1]); #endif +#define TS_OFFSET_SECRET_LENGTH 32 +VNET_DEFINE_STATIC(u_char, ts_offset_secret[TS_OFFSET_SECRET_LENGTH]); +#define V_ts_offset_secret VNET(ts_offset_secret) + static int tcp_default_fb_init(struct tcpcb *tp); static void tcp_default_fb_fini(struct tcpcb *tp, int tcb_is_purged); static int tcp_default_handoff_ok(struct tcpcb *tp); @@ -701,7 +705,7 @@ struct tcpcb_mem { #endif }; -static VNET_DEFINE(uma_zone_t, tcpcb_zone); +VNET_DEFINE_STATIC(uma_zone_t, tcpcb_zone); #define V_tcpcb_zone VNET(tcpcb_zone) MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers"); @@ -949,11 +953,10 @@ deregister_tcp_functions(struct tcp_function_block *blk, bool quiesce, rw_wunlock(&tcp_function_lock); VNET_LIST_RLOCK(); - /* XXX handle */ VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); INP_INFO_WLOCK(&V_tcbinfo); - LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) { + CK_LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) { INP_WLOCK(inp); if (inp->inp_flags & INP_TIMEWAIT) { INP_WUNLOCK(inp); @@ -1099,6 +1102,7 @@ tcp_init(void) /* Initialize the TCP logging data. */ tcp_log_init(); #endif + arc4rand(&V_ts_offset_secret, sizeof(V_ts_offset_secret), 0); if (tcp_soreceive_stream) { #ifdef INET @@ -1629,7 +1633,7 @@ tcp_newtcpcb(struct inpcb *inp) tp->t_vnet = inp->inp_vnet; #endif tp->t_timers = &tm->tt; - /* LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */ + TAILQ_INIT(&tp->t_segq); tp->t_maxseg = #ifdef INET6 isipv6 ? V_tcp_v6mssdflt : @@ -1723,7 +1727,7 @@ tcp_ccalgounload(struct cc_algo *unload_algo) * therefore don't enter the loop below until the connection * list has stabilised. */ - LIST_FOREACH(inp, &V_tcb, inp_list) { + CK_LIST_FOREACH(inp, &V_tcb, inp_list) { INP_WLOCK(inp); /* Important to skip tcptw structs. */ if (!(inp->inp_flags & INP_TIMEWAIT) && @@ -1737,11 +1741,18 @@ tcp_ccalgounload(struct cc_algo *unload_algo) */ if (CC_ALGO(tp) == unload_algo) { tmpalgo = CC_ALGO(tp); - /* NewReno does not require any init. */ - CC_ALGO(tp) = &newreno_cc_algo; - /* XXX defer to epoch_call */ if (tmpalgo->cb_destroy != NULL) tmpalgo->cb_destroy(tp->ccv); + CC_DATA(tp) = NULL; + /* + * NewReno may allocate memory on + * demand for certain stateful + * configuration as needed, but is + * coded to never fail on memory + * allocation failure so it is a safe + * fallback. + */ + CC_ALGO(tp) = &newreno_cc_algo; } } INP_WUNLOCK(inp); @@ -1893,6 +1904,7 @@ tcp_discardcb(struct tcpcb *tp) /* Allow the CC algorithm to clean up after itself. */ if (CC_ALGO(tp)->cb_destroy != NULL) CC_ALGO(tp)->cb_destroy(tp->ccv); + CC_DATA(tp) = NULL; #ifdef TCP_HHOOK khelp_destroy_osd(tp->osd); @@ -1922,10 +1934,11 @@ tcp_timer_discard(void *ptp) { struct inpcb *inp; struct tcpcb *tp; + struct epoch_tracker et; tp = (struct tcpcb *)ptp; CURVNET_SET(tp->t_vnet); - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); inp = tp->t_inpcb; KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); @@ -1945,13 +1958,13 @@ tcp_timer_discard(void *ptp) tp->t_inpcb = NULL; uma_zfree(V_tcpcb_zone, tp); if (in_pcbrele_wlocked(inp)) { - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); CURVNET_RESTORE(); return; } } INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); CURVNET_RESTORE(); } @@ -2024,10 +2037,12 @@ tcp_drain(void) * useful. */ INP_INFO_WLOCK(&V_tcbinfo); - LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) { - if (inpb->inp_flags & INP_TIMEWAIT) - continue; + CK_LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) { INP_WLOCK(inpb); + if (inpb->inp_flags & INP_TIMEWAIT) { + INP_WUNLOCK(inpb); + continue; + } if ((tcpb = intotcpcb(inpb)) != NULL) { tcp_reass_flush(tcpb); tcp_clean_sackreport(tcpb); @@ -2110,10 +2125,10 @@ static int tcp_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, m, n, pcb_count; - struct in_pcblist *il; struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; + struct epoch_tracker et; /* * The process of preparing the TCB list is too time-consuming and @@ -2157,12 +2172,11 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) if (error) return (error); - il = malloc(sizeof(struct in_pcblist) + n * sizeof(struct inpcb *), M_TEMP, M_WAITOK|M_ZERO_INVARIANTS); - inp_list = il->il_inp_list; + inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); INP_INFO_WLOCK(&V_tcbinfo); - for (inp = LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0; - inp != NULL && i < n; inp = LIST_NEXT(inp, inp_list)) { + for (inp = CK_LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0; + inp != NULL && i < n; inp = CK_LIST_NEXT(inp, inp_list)) { INP_WLOCK(inp); if (inp->inp_gencnt <= gencnt) { /* @@ -2201,10 +2215,14 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) } else INP_RUNLOCK(inp); } - - il->il_count = n; - il->il_pcbinfo = &V_tcbinfo; - epoch_call(net_epoch_preempt, &il->il_epoch_ctx, in_pcblist_rele_rlocked); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); + for (i = 0; i < n; i++) { + inp = inp_list[i]; + INP_RLOCK(inp); + if (!in_pcbrele_rlocked(inp)) + INP_RUNLOCK(inp); + } + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); if (!error) { /* @@ -2221,6 +2239,7 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) INP_LIST_RUNLOCK(&V_tcbinfo); error = SYSCTL_OUT(req, &xig, sizeof xig); } + free(inp_list, M_TEMP); return (error); } @@ -2342,6 +2361,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify; struct icmp *icp; struct in_conninfo inc; + struct epoch_tracker et; tcp_seq icmp_tcp_seq; int mtu; @@ -2373,7 +2393,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) icp = (struct icmp *)((caddr_t)ip - offsetof(struct icmp, icmp_ip)); th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport, ip->ip_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL); if (inp != NULL && PRC_IS_REDIRECT(cmd)) { @@ -2438,7 +2458,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) out: if (inp != NULL) INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); } #endif /* INET */ @@ -2456,6 +2476,7 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d) struct ip6ctlparam *ip6cp = NULL; const struct sockaddr_in6 *sa6_src = NULL; struct in_conninfo inc; + struct epoch_tracker et; struct tcp_ports { uint16_t th_sport; uint16_t th_dport; @@ -2517,7 +2538,7 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d) } bzero(&t_ports, sizeof(struct tcp_ports)); m_copydata(m, off, sizeof(struct tcp_ports), (caddr_t)&t_ports); - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_dst, t_ports.th_dport, &ip6->ip6_src, t_ports.th_sport, INPLOOKUP_WLOCKPCB, NULL); if (inp != NULL && PRC_IS_REDIRECT(cmd)) { @@ -2589,10 +2610,45 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d) out: if (inp != NULL) INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); } #endif /* INET6 */ +static uint32_t +tcp_keyed_hash(struct in_conninfo *inc, u_char *key, u_int len) +{ + MD5_CTX ctx; + uint32_t hash[4]; + + MD5Init(&ctx); + MD5Update(&ctx, &inc->inc_fport, sizeof(uint16_t)); + MD5Update(&ctx, &inc->inc_lport, sizeof(uint16_t)); + switch (inc->inc_flags & INC_ISIPV6) { +#ifdef INET + case 0: + MD5Update(&ctx, &inc->inc_faddr, sizeof(struct in_addr)); + MD5Update(&ctx, &inc->inc_laddr, sizeof(struct in_addr)); + break; +#endif +#ifdef INET6 + case INC_ISIPV6: + MD5Update(&ctx, &inc->inc6_faddr, sizeof(struct in6_addr)); + MD5Update(&ctx, &inc->inc6_laddr, sizeof(struct in6_addr)); + break; +#endif + } + MD5Update(&ctx, key, len); + MD5Final((unsigned char *)hash, &ctx); + + return (hash[0]); +} + +uint32_t +tcp_new_ts_offset(struct in_conninfo *inc) +{ + return (tcp_keyed_hash(inc, V_ts_offset_secret, + sizeof(V_ts_offset_secret))); +} /* * Following is where TCP initial sequence number generation occurs. @@ -2634,19 +2690,20 @@ out: * as reseeding should not be necessary. * * Locking of the global variables isn_secret, isn_last_reseed, isn_offset, - * isn_offset_old, and isn_ctx is performed using the TCP pcbinfo lock. In + * isn_offset_old, and isn_ctx is performed using the ISN lock. In * general, this means holding an exclusive (write) lock. */ #define ISN_BYTES_PER_SECOND 1048576 #define ISN_STATIC_INCREMENT 4096 #define ISN_RANDOM_INCREMENT (4096 - 1) +#define ISN_SECRET_LENGTH 32 -static VNET_DEFINE(u_char, isn_secret[32]); -static VNET_DEFINE(int, isn_last); -static VNET_DEFINE(int, isn_last_reseed); -static VNET_DEFINE(u_int32_t, isn_offset); -static VNET_DEFINE(u_int32_t, isn_offset_old); +VNET_DEFINE_STATIC(u_char, isn_secret[ISN_SECRET_LENGTH]); +VNET_DEFINE_STATIC(int, isn_last); +VNET_DEFINE_STATIC(int, isn_last_reseed); +VNET_DEFINE_STATIC(u_int32_t, isn_offset); +VNET_DEFINE_STATIC(u_int32_t, isn_offset_old); #define V_isn_secret VNET(isn_secret) #define V_isn_last VNET(isn_last) @@ -2655,45 +2712,23 @@ static VNET_DEFINE(u_int32_t, isn_offset_old); #define V_isn_offset_old VNET(isn_offset_old) tcp_seq -tcp_new_isn(struct tcpcb *tp) +tcp_new_isn(struct in_conninfo *inc) { - MD5_CTX isn_ctx; - u_int32_t md5_buffer[4]; tcp_seq new_isn; u_int32_t projected_offset; - INP_WLOCK_ASSERT(tp->t_inpcb); - ISN_LOCK(); /* Seed if this is the first use, reseed if requested. */ if ((V_isn_last_reseed == 0) || ((V_tcp_isn_reseed_interval > 0) && (((u_int)V_isn_last_reseed + (u_int)V_tcp_isn_reseed_interval*hz) < (u_int)ticks))) { - read_random(&V_isn_secret, sizeof(V_isn_secret)); + arc4rand(&V_isn_secret, sizeof(V_isn_secret), 0); V_isn_last_reseed = ticks; } /* Compute the md5 hash and return the ISN. */ - MD5Init(&isn_ctx); - MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_fport, sizeof(u_short)); - MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_lport, sizeof(u_short)); -#ifdef INET6 - if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) { - MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_faddr, - sizeof(struct in6_addr)); - MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_laddr, - sizeof(struct in6_addr)); - } else -#endif - { - MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_faddr, - sizeof(struct in_addr)); - MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_laddr, - sizeof(struct in_addr)); - } - MD5Update(&isn_ctx, (u_char *) &V_isn_secret, sizeof(V_isn_secret)); - MD5Final((u_char *) &md5_buffer, &isn_ctx); - new_isn = (tcp_seq) md5_buffer[0]; + new_isn = (tcp_seq)tcp_keyed_hash(inc, V_isn_secret, + sizeof(V_isn_secret)); V_isn_offset += ISN_STATIC_INCREMENT + (arc4random() & ISN_RANDOM_INCREMENT); if (ticks != V_isn_last) { @@ -2840,6 +2875,9 @@ tcp_maxmtu6(struct in_conninfo *inc, struct tcp_ifcap *cap) KASSERT(inc != NULL, ("tcp_maxmtu6 with NULL in_conninfo pointer")); + if (inc->inc_flags & INC_IPV6MINMTU) + return (IPV6_MMTU); + if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) { in6_splitscope(&inc->inc6_faddr, &dst6, &scopeid); if (fib6_lookup_nh_ext(inc->inc_fibnum, &dst6, scopeid, 0, @@ -2928,6 +2966,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS) struct tcpcb *tp; struct tcptw *tw; struct sockaddr_in *fin, *lin; + struct epoch_tracker et; #ifdef INET6 struct sockaddr_in6 *fin6, *lin6; #endif @@ -2987,7 +3026,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS) default: return (EINVAL); } - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); switch (addrs[0].ss_family) { #ifdef INET6 case AF_INET6: @@ -3026,7 +3065,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS) INP_WUNLOCK(inp); } else error = ESRCH; - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); return (error); } |