From af5333e0a02b2295304d4e029b15ee15a4fe2b3a Mon Sep 17 00:00:00 2001 From: Sebastian Huber Date: Mon, 4 Nov 2013 11:33:00 +0100 Subject: Update to FreeBSD 8.4 --- freebsd/sys/netinet/cc.h | 167 + freebsd/sys/netinet/cc/cc.c | 332 ++ freebsd/sys/netinet/cc/cc_module.h | 70 + freebsd/sys/netinet/cc/cc_newreno.c | 239 ++ freebsd/sys/netinet/icmp6.h | 21 +- freebsd/sys/netinet/icmp_var.h | 3 +- freebsd/sys/netinet/if_atm.c | 6 + freebsd/sys/netinet/if_ether.c | 100 +- freebsd/sys/netinet/igmp.c | 59 +- freebsd/sys/netinet/in.c | 189 +- freebsd/sys/netinet/in.h | 1 + freebsd/sys/netinet/in_mcast.c | 16 +- freebsd/sys/netinet/in_pcb.c | 227 +- freebsd/sys/netinet/in_pcb.h | 5 +- freebsd/sys/netinet/in_proto.c | 16 +- freebsd/sys/netinet/in_rmx.c | 24 +- freebsd/sys/netinet/in_var.h | 8 +- freebsd/sys/netinet/ip.h | 25 + freebsd/sys/netinet/ip_carp.c | 85 +- freebsd/sys/netinet/ip_divert.c | 4 +- freebsd/sys/netinet/ip_fw.h | 56 +- freebsd/sys/netinet/ip_icmp.c | 60 +- freebsd/sys/netinet/ip_input.c | 20 +- freebsd/sys/netinet/ip_ipsec.c | 2 +- freebsd/sys/netinet/ip_mroute.c | 4 +- freebsd/sys/netinet/ip_output.c | 12 +- freebsd/sys/netinet/ip_var.h | 3 + freebsd/sys/netinet/ipfw/ip_dn_glue.c | 6 +- freebsd/sys/netinet/ipfw/ip_dn_io.c | 101 +- freebsd/sys/netinet/ipfw/ip_dn_private.h | 4 + freebsd/sys/netinet/ipfw/ip_dummynet.c | 79 +- freebsd/sys/netinet/ipfw/ip_fw2.c | 310 +- freebsd/sys/netinet/ipfw/ip_fw_log.c | 19 +- freebsd/sys/netinet/ipfw/ip_fw_nat.c | 209 +- freebsd/sys/netinet/ipfw/ip_fw_pfil.c | 14 +- freebsd/sys/netinet/ipfw/ip_fw_private.h | 25 +- freebsd/sys/netinet/ipfw/ip_fw_sockopt.c | 129 +- freebsd/sys/netinet/ipfw/ip_fw_table.c | 595 +++- freebsd/sys/netinet/libalias/alias.h | 6 + freebsd/sys/netinet/libalias/alias_db.c | 33 +- freebsd/sys/netinet/libalias/alias_ftp.c | 124 +- freebsd/sys/netinet/libalias/alias_local.h | 7 + freebsd/sys/netinet/libalias/alias_mod.h | 3 + freebsd/sys/netinet/libalias/alias_proxy.c | 2 +- freebsd/sys/netinet/libalias/alias_sctp.c | 2 +- freebsd/sys/netinet/raw_ip.c | 14 +- freebsd/sys/netinet/sctp.h | 167 +- freebsd/sys/netinet/sctp_asconf.c | 1198 ++++--- freebsd/sys/netinet/sctp_asconf.h | 12 +- freebsd/sys/netinet/sctp_auth.c | 51 +- freebsd/sys/netinet/sctp_auth.h | 18 +- freebsd/sys/netinet/sctp_bsd_addr.c | 146 +- freebsd/sys/netinet/sctp_bsd_addr.h | 13 +- freebsd/sys/netinet/sctp_cc_functions.c | 1829 +++++++--- freebsd/sys/netinet/sctp_constants.h | 171 +- freebsd/sys/netinet/sctp_crc32.c | 9 +- freebsd/sys/netinet/sctp_crc32.h | 12 +- freebsd/sys/netinet/sctp_dtrace_declare.h | 80 + freebsd/sys/netinet/sctp_dtrace_define.h | 233 ++ freebsd/sys/netinet/sctp_header.h | 53 +- freebsd/sys/netinet/sctp_indata.c | 1645 ++++----- freebsd/sys/netinet/sctp_indata.h | 36 +- freebsd/sys/netinet/sctp_input.c | 2241 ++++++------ freebsd/sys/netinet/sctp_input.h | 26 +- freebsd/sys/netinet/sctp_lock_bsd.h | 55 +- freebsd/sys/netinet/sctp_os.h | 12 +- freebsd/sys/netinet/sctp_os_bsd.h | 48 +- freebsd/sys/netinet/sctp_output.c | 5347 ++++++++++++++-------------- freebsd/sys/netinet/sctp_output.h | 96 +- freebsd/sys/netinet/sctp_pcb.c | 2245 ++++++------ freebsd/sys/netinet/sctp_pcb.h | 52 +- freebsd/sys/netinet/sctp_peeloff.c | 127 +- freebsd/sys/netinet/sctp_peeloff.h | 23 +- freebsd/sys/netinet/sctp_structs.h | 279 +- freebsd/sys/netinet/sctp_sysctl.c | 364 +- freebsd/sys/netinet/sctp_sysctl.h | 184 +- freebsd/sys/netinet/sctp_timer.c | 584 +-- freebsd/sys/netinet/sctp_timer.h | 17 +- freebsd/sys/netinet/sctp_uio.h | 378 +- freebsd/sys/netinet/sctp_usrreq.c | 3579 +++++++++++++------ freebsd/sys/netinet/sctp_var.h | 59 +- freebsd/sys/netinet/sctputil.c | 2214 ++++++------ freebsd/sys/netinet/sctputil.h | 70 +- freebsd/sys/netinet/tcp.h | 26 +- freebsd/sys/netinet/tcp_hostcache.c | 27 +- freebsd/sys/netinet/tcp_input.c | 701 ++-- freebsd/sys/netinet/tcp_lro.c | 62 +- freebsd/sys/netinet/tcp_lro.h | 63 +- freebsd/sys/netinet/tcp_offload.h | 2 +- freebsd/sys/netinet/tcp_output.c | 123 +- freebsd/sys/netinet/tcp_reass.c | 51 +- freebsd/sys/netinet/tcp_sack.c | 3 +- freebsd/sys/netinet/tcp_seq.h | 31 +- freebsd/sys/netinet/tcp_subr.c | 217 +- freebsd/sys/netinet/tcp_syncache.c | 29 +- freebsd/sys/netinet/tcp_timer.c | 110 +- freebsd/sys/netinet/tcp_timewait.c | 9 +- freebsd/sys/netinet/tcp_usrreq.c | 73 +- freebsd/sys/netinet/tcp_var.h | 77 +- freebsd/sys/netinet/udp.h | 4 +- freebsd/sys/netinet/udp_usrreq.c | 41 +- freebsd/sys/netinet/udp_var.h | 2 + 102 files changed, 17178 insertions(+), 11582 deletions(-) create mode 100644 freebsd/sys/netinet/cc.h create mode 100644 freebsd/sys/netinet/cc/cc.c create mode 100644 freebsd/sys/netinet/cc/cc_module.h create mode 100644 freebsd/sys/netinet/cc/cc_newreno.c create mode 100644 freebsd/sys/netinet/sctp_dtrace_declare.h create mode 100644 freebsd/sys/netinet/sctp_dtrace_define.h (limited to 'freebsd/sys/netinet') diff --git a/freebsd/sys/netinet/cc.h b/freebsd/sys/netinet/cc.h new file mode 100644 index 00000000..14b4a9de --- /dev/null +++ b/freebsd/sys/netinet/cc.h @@ -0,0 +1,167 @@ +/*- + * Copyright (c) 2007-2008 + * Swinburne University of Technology, Melbourne, Australia. + * Copyright (c) 2009-2010 Lawrence Stewart + * Copyright (c) 2010 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed at the Centre for Advanced Internet + * Architectures, Swinburne University of Technology, by Lawrence Stewart and + * James Healy, made possible in part by a grant from the Cisco University + * Research Program Fund at Community Foundation Silicon Valley. + * + * Portions of this software were developed at the Centre for Advanced + * Internet Architectures, Swinburne University of Technology, Melbourne, + * Australia by David Hayes under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * This software was first released in 2007 by James Healy and Lawrence Stewart + * whilst working on the NewTCP research project at Swinburne University of + * Technology's Centre for Advanced Internet Architectures, Melbourne, + * Australia, which was made possible in part by a grant from the Cisco + * University Research Program Fund at Community Foundation Silicon Valley. + * More details are available at: + * http://caia.swin.edu.au/urp/newtcp/ + */ + +#ifndef _NETINET_CC_H_ +#define _NETINET_CC_H_ + +/* XXX: TCP_CA_NAME_MAX define lives in tcp.h for compat reasons. */ +#include + +/* Global CC vars. */ +extern STAILQ_HEAD(cc_head, cc_algo) cc_list; +extern const int tcprexmtthresh; +extern struct cc_algo newreno_cc_algo; + +/* Per-netstack bits. */ +VNET_DECLARE(struct cc_algo *, default_cc_ptr); +#define V_default_cc_ptr VNET(default_cc_ptr) + +/* Define the new net.inet.tcp.cc sysctl tree. */ +SYSCTL_DECL(_net_inet_tcp_cc); + +/* CC housekeeping functions. */ +int cc_register_algo(struct cc_algo *add_cc); +int cc_deregister_algo(struct cc_algo *remove_cc); + +/* + * Wrapper around transport structs that contain same-named congestion + * control variables. Allows algos to be shared amongst multiple CC aware + * transprots. + */ +struct cc_var { + void *cc_data; /* Per-connection private CC algorithm data. */ + int bytes_this_ack; /* # bytes acked by the current ACK. */ + tcp_seq curack; /* Most recent ACK. */ + uint32_t flags; /* Flags for cc_var (see below) */ + int type; /* Indicates which ptr is valid in ccvc. */ + union ccv_container { + struct tcpcb *tcp; + struct sctp_nets *sctp; + } ccvc; +}; + +/* cc_var flags. */ +#define CCF_ABC_SENTAWND 0x0001 /* ABC counted cwnd worth of bytes? */ +#define CCF_CWND_LIMITED 0x0002 /* Are we currently cwnd limited? */ + +/* ACK types passed to the ack_received() hook. */ +#define CC_ACK 0x0001 /* Regular in sequence ACK. */ +#define CC_DUPACK 0x0002 /* Duplicate ACK. */ +#define CC_PARTIALACK 0x0004 /* Not yet. */ +#define CC_SACK 0x0008 /* Not yet. */ + +/* + * Congestion signal types passed to the cong_signal() hook. The highest order 8 + * bits (0x01000000 - 0x80000000) are reserved for CC algos to declare their own + * congestion signal types. + */ +#define CC_ECN 0x00000001 /* ECN marked packet received. */ +#define CC_RTO 0x00000002 /* RTO fired. */ +#define CC_RTO_ERR 0x00000004 /* RTO fired in error. */ +#define CC_NDUPACK 0x00000008 /* Threshold of dupack's reached. */ + +#define CC_SIGPRIVMASK 0xFF000000 /* Mask to check if sig is private. */ + +/* + * Structure to hold data and function pointers that together represent a + * congestion control algorithm. + */ +struct cc_algo { + char name[TCP_CA_NAME_MAX]; + + /* Init global module state on kldload. */ + int (*mod_init)(void); + + /* Cleanup global module state on kldunload. */ + int (*mod_destroy)(void); + + /* Init CC state for a new control block. */ + int (*cb_init)(struct cc_var *ccv); + + /* Cleanup CC state for a terminating control block. */ + void (*cb_destroy)(struct cc_var *ccv); + + /* Init variables for a newly established connection. */ + void (*conn_init)(struct cc_var *ccv); + + /* Called on receipt of an ack. */ + void (*ack_received)(struct cc_var *ccv, uint16_t type); + + /* Called on detection of a congestion signal. */ + void (*cong_signal)(struct cc_var *ccv, uint32_t type); + + /* Called after exiting congestion recovery. */ + void (*post_recovery)(struct cc_var *ccv); + + /* Called when data transfer resumes after an idle period. */ + void (*after_idle)(struct cc_var *ccv); + + STAILQ_ENTRY (cc_algo) entries; +}; + +/* Macro to obtain the CC algo's struct ptr. */ +#define CC_ALGO(tp) ((tp)->cc_algo) + +/* Macro to obtain the CC algo's data ptr. */ +#define CC_DATA(tp) ((tp)->ccv->cc_data) + +/* Macro to obtain the system default CC algo's struct ptr. */ +#define CC_DEFAULT() V_default_cc_ptr + +extern struct rwlock cc_list_lock; +#define CC_LIST_LOCK_INIT() rw_init(&cc_list_lock, "cc_list") +#define CC_LIST_LOCK_DESTROY() rw_destroy(&cc_list_lock) +#define CC_LIST_RLOCK() rw_rlock(&cc_list_lock) +#define CC_LIST_RUNLOCK() rw_runlock(&cc_list_lock) +#define CC_LIST_WLOCK() rw_wlock(&cc_list_lock) +#define CC_LIST_WUNLOCK() rw_wunlock(&cc_list_lock) +#define CC_LIST_LOCK_ASSERT() rw_assert(&cc_list_lock, RA_LOCKED) + +#endif /* _NETINET_CC_H_ */ diff --git a/freebsd/sys/netinet/cc/cc.c b/freebsd/sys/netinet/cc/cc.c new file mode 100644 index 00000000..a5a971e6 --- /dev/null +++ b/freebsd/sys/netinet/cc/cc.c @@ -0,0 +1,332 @@ +#include + +/*- + * Copyright (c) 2007-2008 + * Swinburne University of Technology, Melbourne, Australia. + * Copyright (c) 2009-2010 Lawrence Stewart + * Copyright (c) 2010 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed at the Centre for Advanced Internet + * Architectures, Swinburne University of Technology, by Lawrence Stewart and + * James Healy, made possible in part by a grant from the Cisco University + * Research Program Fund at Community Foundation Silicon Valley. + * + * Portions of this software were developed at the Centre for Advanced + * Internet Architectures, Swinburne University of Technology, Melbourne, + * Australia by David Hayes under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * This software was first released in 2007 by James Healy and Lawrence Stewart + * whilst working on the NewTCP research project at Swinburne University of + * Technology's Centre for Advanced Internet Architectures, Melbourne, + * Australia, which was made possible in part by a grant from the Cisco + * University Research Program Fund at Community Foundation Silicon Valley. + * More details are available at: + * http://caia.swin.edu.au/urp/newtcp/ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include + +/* + * List of available cc algorithms on the current system. First element + * is used as the system default CC algorithm. + */ +struct cc_head cc_list = STAILQ_HEAD_INITIALIZER(cc_list); + +/* Protects the cc_list TAILQ. */ +struct rwlock cc_list_lock; + +VNET_DEFINE(struct cc_algo *, default_cc_ptr) = &newreno_cc_algo; + +/* + * Sysctl handler to show and change the default CC algorithm. + */ +static int +cc_default_algo(SYSCTL_HANDLER_ARGS) +{ + char default_cc[TCP_CA_NAME_MAX]; + struct cc_algo *funcs; + int err, found; + + err = found = 0; + + if (req->newptr == NULL) { + /* Just print the current default. */ + CC_LIST_RLOCK(); + strlcpy(default_cc, CC_DEFAULT()->name, TCP_CA_NAME_MAX); + CC_LIST_RUNLOCK(); + err = sysctl_handle_string(oidp, default_cc, 1, req); + } else { + /* Find algo with specified name and set it to default. */ + CC_LIST_RLOCK(); + STAILQ_FOREACH(funcs, &cc_list, entries) { + if (strncmp((char *)req->newptr, funcs->name, + TCP_CA_NAME_MAX) == 0) { + found = 1; + V_default_cc_ptr = funcs; + } + } + CC_LIST_RUNLOCK(); + + if (!found) + err = ESRCH; + } + + return (err); +} + +#ifndef __rtems__ +/* + * Sysctl handler to display the list of available CC algorithms. + */ +static int +cc_list_available(SYSCTL_HANDLER_ARGS) +{ + struct cc_algo *algo; + struct sbuf *s; + int err, first, nalgos; + + err = nalgos = 0; + first = 1; + + CC_LIST_RLOCK(); + STAILQ_FOREACH(algo, &cc_list, entries) { + nalgos++; + } + CC_LIST_RUNLOCK(); + + s = sbuf_new(NULL, NULL, nalgos * TCP_CA_NAME_MAX, SBUF_FIXEDLEN); + + if (s == NULL) + return (ENOMEM); + + /* + * It is theoretically possible for the CC list to have grown in size + * since the call to sbuf_new() and therefore for the sbuf to be too + * small. If this were to happen (incredibly unlikely), the sbuf will + * reach an overflow condition, sbuf_printf() will return an error and + * the sysctl will fail gracefully. + */ + CC_LIST_RLOCK(); + STAILQ_FOREACH(algo, &cc_list, entries) { + err = sbuf_printf(s, first ? "%s" : ", %s", algo->name); + if (err) { + /* Sbuf overflow condition. */ + err = EOVERFLOW; + break; + } + first = 0; + } + CC_LIST_RUNLOCK(); + + if (!err) { + sbuf_finish(s); + err = sysctl_handle_string(oidp, sbuf_data(s), 1, req); + } + + sbuf_delete(s); + return (err); +} +#endif /* __rtems__ */ + +/* + * Reset the default CC algo to NewReno for any netstack which is using the algo + * that is about to go away as its default. + */ +static void +cc_checkreset_default(struct cc_algo *remove_cc) +{ + VNET_ITERATOR_DECL(vnet_iter); + + CC_LIST_LOCK_ASSERT(); + + VNET_LIST_RLOCK_NOSLEEP(); + VNET_FOREACH(vnet_iter) { + CURVNET_SET(vnet_iter); + if (strncmp(CC_DEFAULT()->name, remove_cc->name, + TCP_CA_NAME_MAX) == 0) + V_default_cc_ptr = &newreno_cc_algo; + CURVNET_RESTORE(); + } + VNET_LIST_RUNLOCK_NOSLEEP(); +} + +/* + * Initialise CC subsystem on system boot. + */ +static void +cc_init(void) +{ + CC_LIST_LOCK_INIT(); + STAILQ_INIT(&cc_list); +} + +/* + * Returns non-zero on success, 0 on failure. + */ +int +cc_deregister_algo(struct cc_algo *remove_cc) +{ + struct cc_algo *funcs, *tmpfuncs; + int err; + + err = ENOENT; + + /* Never allow newreno to be deregistered. */ + if (&newreno_cc_algo == remove_cc) + return (EPERM); + + /* Remove algo from cc_list so that new connections can't use it. */ + CC_LIST_WLOCK(); + STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) { + if (funcs == remove_cc) { + cc_checkreset_default(remove_cc); + STAILQ_REMOVE(&cc_list, funcs, cc_algo, entries); + err = 0; + break; + } + } + CC_LIST_WUNLOCK(); + + if (!err) + /* + * XXXLAS: + * - We may need to handle non-zero return values in future. + * - If we add CC framework support for protocols other than + * TCP, we may want a more generic way to handle this step. + */ + tcp_ccalgounload(remove_cc); + + return (err); +} + +/* + * Returns 0 on success, non-zero on failure. + */ +int +cc_register_algo(struct cc_algo *add_cc) +{ + struct cc_algo *funcs; + int err; + + err = 0; + + /* + * Iterate over list of registered CC algorithms and make sure + * we're not trying to add a duplicate. + */ + CC_LIST_WLOCK(); + STAILQ_FOREACH(funcs, &cc_list, entries) { + if (funcs == add_cc || strncmp(funcs->name, add_cc->name, + TCP_CA_NAME_MAX) == 0) + err = EEXIST; + } + + if (!err) + STAILQ_INSERT_TAIL(&cc_list, add_cc, entries); + + CC_LIST_WUNLOCK(); + + return (err); +} + +/* + * Handles kld related events. Returns 0 on success, non-zero on failure. + */ +int +cc_modevent(module_t mod, int event_type, void *data) +{ + struct cc_algo *algo; + int err; + + err = 0; + algo = (struct cc_algo *)data; + + switch(event_type) { + case MOD_LOAD: + if (algo->mod_init != NULL) + err = algo->mod_init(); + if (!err) + err = cc_register_algo(algo); + break; + + case MOD_QUIESCE: + case MOD_SHUTDOWN: + case MOD_UNLOAD: + err = cc_deregister_algo(algo); + if (!err && algo->mod_destroy != NULL) + algo->mod_destroy(); + if (err == ENOENT) + err = 0; + break; + + default: + err = EINVAL; + break; + } + + return (err); +} + +SYSINIT(cc, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, cc_init, NULL); + +/* Declare sysctl tree and populate it. */ +SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW, NULL, + "congestion control related settings"); + +SYSCTL_VNET_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm, CTLTYPE_STRING|CTLFLAG_RW, + NULL, 0, cc_default_algo, "A", "default congestion control algorithm"); + +#ifndef __rtems__ +SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available, CTLTYPE_STRING|CTLFLAG_RD, + NULL, 0, cc_list_available, "A", + "list available congestion control algorithms"); +#endif /* __rtems__ */ diff --git a/freebsd/sys/netinet/cc/cc_module.h b/freebsd/sys/netinet/cc/cc_module.h new file mode 100644 index 00000000..6da10df1 --- /dev/null +++ b/freebsd/sys/netinet/cc/cc_module.h @@ -0,0 +1,70 @@ +/*- + * Copyright (c) 2009-2010 Lawrence Stewart + * All rights reserved. + * + * This software was developed by Lawrence Stewart while studying at the Centre + * for Advanced Internet Architectures, Swinburne University of Technology, made + * possible in part by a grant from the Cisco University Research Program Fund + * at Community Foundation Silicon Valley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * This software was first released in 2009 by Lawrence Stewart as part of the + * NewTCP research project at Swinburne University of Technology's Centre for + * Advanced Internet Architectures, Melbourne, Australia, which was made + * possible in part by a grant from the Cisco University Research Program Fund + * at Community Foundation Silicon Valley. More details are available at: + * http://caia.swin.edu.au/urp/newtcp/ + */ + +#ifndef _NETINET_CC_MODULE_H_ +#define _NETINET_CC_MODULE_H_ + +/* + * Allows a CC algorithm to manipulate a commonly named CC variable regardless + * of the transport protocol and associated C struct. + * XXXLAS: Out of action until the work to support SCTP is done. + * +#define CCV(ccv, what) \ +(*( \ + (ccv)->type == IPPROTO_TCP ? &(ccv)->ccvc.tcp->what : \ + &(ccv)->ccvc.sctp->what \ +)) + */ +#define CCV(ccv, what) (ccv)->ccvc.tcp->what + +#define DECLARE_CC_MODULE(ccname, ccalgo) \ + static moduledata_t cc_##ccname = { \ + .name = #ccname, \ + .evhand = cc_modevent, \ + .priv = ccalgo \ + }; \ + DECLARE_MODULE(ccname, cc_##ccname, \ + SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY) + +int cc_modevent(module_t mod, int type, void *data); + +#endif /* _NETINET_CC_MODULE_H_ */ diff --git a/freebsd/sys/netinet/cc/cc_newreno.c b/freebsd/sys/netinet/cc/cc_newreno.c new file mode 100644 index 00000000..e40f6d83 --- /dev/null +++ b/freebsd/sys/netinet/cc/cc_newreno.c @@ -0,0 +1,239 @@ +#include + +/*- + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 + * The Regents of the University of California. + * Copyright (c) 2007-2008,2010 + * Swinburne University of Technology, Melbourne, Australia. + * Copyright (c) 2009-2010 Lawrence Stewart + * Copyright (c) 2010 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed at the Centre for Advanced Internet + * Architectures, Swinburne University of Technology, by Lawrence Stewart, James + * Healy and David Hayes, made possible in part by a grant from the Cisco + * University Research Program Fund at Community Foundation Silicon Valley. + * + * Portions of this software were developed at the Centre for Advanced + * Internet Architectures, Swinburne University of Technology, Melbourne, + * Australia by David Hayes under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * This software was first released in 2007 by James Healy and Lawrence Stewart + * whilst working on the NewTCP research project at Swinburne University of + * Technology's Centre for Advanced Internet Architectures, Melbourne, + * Australia, which was made possible in part by a grant from the Cisco + * University Research Program Fund at Community Foundation Silicon Valley. + * More details are available at: + * http://caia.swin.edu.au/urp/newtcp/ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include + +static void newreno_ack_received(struct cc_var *ccv, uint16_t type); +static void newreno_after_idle(struct cc_var *ccv); +static void newreno_cong_signal(struct cc_var *ccv, uint32_t type); +static void newreno_post_recovery(struct cc_var *ccv); + +struct cc_algo newreno_cc_algo = { + .name = "newreno", + .ack_received = newreno_ack_received, + .after_idle = newreno_after_idle, + .cong_signal = newreno_cong_signal, + .post_recovery = newreno_post_recovery, +}; + +static void +newreno_ack_received(struct cc_var *ccv, uint16_t type) +{ + if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) && + (ccv->flags & CCF_CWND_LIMITED)) { + u_int cw = CCV(ccv, snd_cwnd); + u_int incr = CCV(ccv, t_maxseg); + + /* + * Regular in-order ACK, open the congestion window. + * Method depends on which congestion control state we're + * in (slow start or cong avoid) and if ABC (RFC 3465) is + * enabled. + * + * slow start: cwnd <= ssthresh + * cong avoid: cwnd > ssthresh + * + * slow start and ABC (RFC 3465): + * Grow cwnd exponentially by the amount of data + * ACKed capping the max increment per ACK to + * (abc_l_var * maxseg) bytes. + * + * slow start without ABC (RFC 5681): + * Grow cwnd exponentially by maxseg per ACK. + * + * cong avoid and ABC (RFC 3465): + * Grow cwnd linearly by maxseg per RTT for each + * cwnd worth of ACKed data. + * + * cong avoid without ABC (RFC 5681): + * Grow cwnd linearly by approximately maxseg per RTT using + * maxseg^2 / cwnd per ACK as the increment. + * If cwnd > maxseg^2, fix the cwnd increment at 1 byte to + * avoid capping cwnd. + */ + if (cw > CCV(ccv, snd_ssthresh)) { + if (V_tcp_do_rfc3465) { + if (ccv->flags & CCF_ABC_SENTAWND) + ccv->flags &= ~CCF_ABC_SENTAWND; + else + incr = 0; + } else + incr = max((incr * incr / cw), 1); + } else if (V_tcp_do_rfc3465) { + /* + * In slow-start with ABC enabled and no RTO in sight? + * (Must not use abc_l_var > 1 if slow starting after + * an RTO. On RTO, snd_nxt = snd_una, so the + * snd_nxt == snd_max check is sufficient to + * handle this). + * + * XXXLAS: Find a way to signal SS after RTO that + * doesn't rely on tcpcb vars. + */ + if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max)) + incr = min(ccv->bytes_this_ack, + V_tcp_abc_l_var * CCV(ccv, t_maxseg)); + else + incr = min(ccv->bytes_this_ack, CCV(ccv, t_maxseg)); + } + /* ABC is on by default, so incr equals 0 frequently. */ + if (incr > 0) + CCV(ccv, snd_cwnd) = min(cw + incr, + TCP_MAXWIN << CCV(ccv, snd_scale)); + } +} + +static void +newreno_after_idle(struct cc_var *ccv) +{ + int rw; + + /* + * If we've been idle for more than one retransmit timeout the old + * congestion window is no longer current and we have to reduce it to + * the restart window before we can transmit again. + * + * The restart window is the initial window or the last CWND, whichever + * is smaller. + * + * This is done to prevent us from flooding the path with a full CWND at + * wirespeed, overloading router and switch buffers along the way. + * + * See RFC5681 Section 4.1. "Restarting Idle Connections". + */ + if (V_tcp_do_rfc3390) + rw = min(4 * CCV(ccv, t_maxseg), + max(2 * CCV(ccv, t_maxseg), 4380)); + else + rw = CCV(ccv, t_maxseg) * 2; + + CCV(ccv, snd_cwnd) = min(rw, CCV(ccv, snd_cwnd)); +} + +/* + * Perform any necessary tasks before we enter congestion recovery. + */ +static void +newreno_cong_signal(struct cc_var *ccv, uint32_t type) +{ + u_int win; + + /* Catch algos which mistakenly leak private signal types. */ + KASSERT((type & CC_SIGPRIVMASK) == 0, + ("%s: congestion signal type 0x%08x is private\n", __func__, type)); + + win = max(CCV(ccv, snd_cwnd) / 2 / CCV(ccv, t_maxseg), 2) * + CCV(ccv, t_maxseg); + + switch (type) { + case CC_NDUPACK: + if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) { + if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) + CCV(ccv, snd_ssthresh) = win; + ENTER_RECOVERY(CCV(ccv, t_flags)); + } + break; + case CC_ECN: + if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { + CCV(ccv, snd_ssthresh) = win; + CCV(ccv, snd_cwnd) = win; + ENTER_CONGRECOVERY(CCV(ccv, t_flags)); + } + break; + } +} + +/* + * Perform any necessary tasks before we exit congestion recovery. + */ +static void +newreno_post_recovery(struct cc_var *ccv) +{ + if (IN_FASTRECOVERY(CCV(ccv, t_flags))) { + /* + * Fast recovery will conclude after returning from this + * function. Window inflation should have left us with + * approximately snd_ssthresh outstanding data. But in case we + * would be inclined to send a burst, better to do it via the + * slow start mechanism. + * + * XXXLAS: Find a way to do this without needing curack + */ + if (SEQ_GT(ccv->curack + CCV(ccv, snd_ssthresh), + CCV(ccv, snd_max))) + CCV(ccv, snd_cwnd) = CCV(ccv, snd_max) - + ccv->curack + CCV(ccv, t_maxseg); + else + CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh); + } +} + + +DECLARE_CC_MODULE(newreno, &newreno_cc_algo); diff --git a/freebsd/sys/netinet/icmp6.h b/freebsd/sys/netinet/icmp6.h index 5faae7c1..a6e68864 100644 --- a/freebsd/sys/netinet/icmp6.h +++ b/freebsd/sys/netinet/icmp6.h @@ -297,8 +297,9 @@ struct nd_opt_hdr { /* Neighbor discovery option header */ #define ND_OPT_PREFIX_INFORMATION 3 #define ND_OPT_REDIRECTED_HEADER 4 #define ND_OPT_MTU 5 - -#define ND_OPT_ROUTE_INFO 200 /* draft-ietf-ipngwg-router-preference, not officially assigned yet */ +#define ND_OPT_ROUTE_INFO 24 /* RFC 4191 */ +#define ND_OPT_RDNSS 25 /* RFC 6106 */ +#define ND_OPT_DNSSL 31 /* RFC 6106 */ struct nd_opt_prefix_info { /* prefix information */ u_int8_t nd_opt_pi_type; @@ -338,6 +339,22 @@ struct nd_opt_route_info { /* route info */ /* prefix follows */ } __packed; +struct nd_opt_rdnss { /* RDNSS option (RFC 6106) */ + u_int8_t nd_opt_rdnss_type; + u_int8_t nd_opt_rdnss_len; + u_int16_t nd_opt_rdnss_reserved; + u_int32_t nd_opt_rdnss_lifetime; + /* followed by list of recursive DNS servers */ +} __packed; + +struct nd_opt_dnssl { /* DNSSL option (RFC 6106) */ + u_int8_t nd_opt_dnssl_type; + u_int8_t nd_opt_dnssl_len; + u_int16_t nd_opt_dnssl_reserved; + u_int32_t nd_opt_dnssl_lifetime; + /* followed by list of DNS search domains */ +} __packed; + /* * icmp6 namelookup */ diff --git a/freebsd/sys/netinet/icmp_var.h b/freebsd/sys/netinet/icmp_var.h index 30da6a26..d939cc2e 100644 --- a/freebsd/sys/netinet/icmp_var.h +++ b/freebsd/sys/netinet/icmp_var.h @@ -102,7 +102,8 @@ extern int badport_bandlim(int); #define BANDLIM_RST_CLOSEDPORT 3 /* No connection, and no listeners */ #define BANDLIM_RST_OPENPORT 4 /* No connection, listener */ #define BANDLIM_ICMP6_UNREACH 5 -#define BANDLIM_MAX 5 +#define BANDLIM_SCTP_OOTB 6 +#define BANDLIM_MAX 6 #endif #endif diff --git a/freebsd/sys/netinet/if_atm.c b/freebsd/sys/netinet/if_atm.c index 1c51718d..97b2525c 100644 --- a/freebsd/sys/netinet/if_atm.c +++ b/freebsd/sys/netinet/if_atm.c @@ -231,7 +231,9 @@ atm_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info) npcb->npcb_flags |= NPCB_IP; npcb->ipaddr.s_addr = sin->sin_addr.s_addr; /* XXX: move npcb to llinfo when ATM ARP is ready */ +#ifdef __notyet_restored__ rt->rt_llinfo = (caddr_t) npcb; +#endif rt->rt_flags |= RTF_LLINFO; #endif /* @@ -257,7 +259,9 @@ failed: #ifdef NATM if (npcb) { npcb_free(npcb, NPCB_DESTROY); +#ifdef __notyet_restored__ rt->rt_llinfo = NULL; +#endif rt->rt_flags &= ~RTF_LLINFO; } NATM_UNLOCK(); @@ -275,9 +279,11 @@ failed: */ if (rt->rt_flags & RTF_LLINFO) { NATM_LOCK(); +#ifdef __notyet_restored__ npcb_free((struct natmpcb *)rt->rt_llinfo, NPCB_DESTROY); rt->rt_llinfo = NULL; +#endif rt->rt_flags &= ~RTF_LLINFO; NATM_UNLOCK(); } diff --git a/freebsd/sys/netinet/if_ether.c b/freebsd/sys/netinet/if_ether.c index e7a20c2c..4cd4f8e4 100644 --- a/freebsd/sys/netinet/if_ether.c +++ b/freebsd/sys/netinet/if_ether.c @@ -66,7 +66,7 @@ __FBSDID("$FreeBSD$"); #include #include #include -#if defined(INET) || defined(INET6) +#if defined(INET) #include #endif @@ -91,13 +91,16 @@ VNET_DEFINE(int, useloopback) = 1; /* use loopback interface for static VNET_DEFINE(int, arp_proxyall) = 0; static VNET_DEFINE(int, arpt_down) = 20; /* keep incomplete entries for * 20 seconds */ -static VNET_DEFINE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */ +VNET_DEFINE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */ + +static VNET_DEFINE(int, arp_maxhold) = 1; #define V_arpt_keep VNET(arpt_keep) #define V_arpt_down VNET(arpt_down) #define V_arp_maxtries VNET(arp_maxtries) #define V_arp_proxyall VNET(arp_proxyall) #define V_arpstat VNET(arpstat) +#define V_arp_maxhold VNET(arp_maxhold) SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW, &VNET_NAME(arpt_keep), 0, @@ -111,9 +114,15 @@ SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, useloopback, CTLFLAG_RW, SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW, &VNET_NAME(arp_proxyall), 0, "Enable proxy ARP for all suitable requests"); +SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, wait, CTLFLAG_RW, + &VNET_NAME(arpt_down), 0, + "Incomplete ARP entry lifetime in seconds"); SYSCTL_VNET_STRUCT(_net_link_ether_arp, OID_AUTO, stats, CTLFLAG_RW, &VNET_NAME(arpstat), arpstat, "ARP statistics (struct arpstat, net/if_arp.h)"); +SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, maxhold, CTLFLAG_RW, + &VNET_NAME(arp_maxhold), 0, + "Number of packets to hold per ARP entry"); static void arp_init(void); void arprequest(struct ifnet *, @@ -162,6 +171,7 @@ arptimer(void *arg) { struct ifnet *ifp; struct llentry *lle; + int pkts_dropped; KASSERT(arg != NULL, ("%s: arg NULL", __func__)); lle = (struct llentry *)arg; @@ -176,18 +186,19 @@ arptimer(void *arg) callout_active(&lle->la_timer)) { callout_stop(&lle->la_timer); LLE_REMREF(lle); - (void) llentry_free(lle); + pkts_dropped = llentry_free(lle); + ARPSTAT_ADD(dropped, pkts_dropped); ARPSTAT_INC(timeouts); - } + } else { #ifdef DIAGNOSTIC - else { struct sockaddr *l3addr = L3_ADDR(lle); log(LOG_INFO, "arptimer issue: %p, IPv4 address: \"%s\"\n", lle, inet_ntoa( ((const struct sockaddr_in *)l3addr)->sin_addr)); - } #endif + LLE_WUNLOCK(lle); + } } IF_AFDATA_UNLOCK(ifp); CURVNET_RESTORE(); @@ -275,6 +286,8 @@ arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m, { struct llentry *la = 0; u_int flags = 0; + struct mbuf *curr = NULL; + struct mbuf *next = NULL; int error, renew; *lle = NULL; @@ -350,15 +363,28 @@ retry: } /* * There is an arptab entry, but no ethernet address - * response yet. Replace the held mbuf with this - * latest one. + * response yet. Add the mbuf to the list, dropping + * the oldest packet if we have exceeded the system + * setting. */ if (m != NULL) { + if (la->la_numheld >= V_arp_maxhold) { + if (la->la_hold != NULL) { + next = la->la_hold->m_nextpkt; + m_freem(la->la_hold); + la->la_hold = next; + la->la_numheld--; + ARPSTAT_INC(dropped); + } + } if (la->la_hold != NULL) { - m_freem(la->la_hold); - ARPSTAT_INC(dropped); - } - la->la_hold = m; + curr = la->la_hold; + while (curr->m_nextpkt != NULL) + curr = curr->m_nextpkt; + curr->m_nextpkt = m; + } else + la->la_hold = m; + la->la_numheld++; if (renew == 0 && (flags & LLE_EXCLUSIVE)) { flags &= ~LLE_EXCLUSIVE; LLE_DOWNGRADE(la); @@ -485,7 +511,6 @@ in_arpinput(struct mbuf *m) struct rtentry *rt; struct ifaddr *ifa; struct in_ifaddr *ia; - struct mbuf *hold; struct sockaddr sa; struct in_addr isaddr, itaddr, myaddr; u_int8_t *enaddr = NULL; @@ -580,15 +605,15 @@ in_arpinput(struct mbuf *m) * No match, use the first inet address on the receive interface * as a dummy address for the rest of the function. */ - IF_ADDR_LOCK(ifp); + IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (ifa->ifa_addr->sa_family == AF_INET) { ia = ifatoia(ifa); ifa_ref(ifa); - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); goto match; } - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); /* * If bridging, fall back to using any inet address. @@ -657,11 +682,13 @@ match: bcmp(ar_sha(ah), &la->ll_addr, ifp->if_addrlen)) { if (la->la_flags & LLE_STATIC) { LLE_WUNLOCK(la); - log(LOG_ERR, - "arp: %*D attempts to modify permanent " - "entry for %s on %s\n", - ifp->if_addrlen, (u_char *)ar_sha(ah), ":", - inet_ntoa(isaddr), ifp->if_xname); + if (log_arp_permanent_modify) + log(LOG_ERR, + "arp: %*D attempts to modify " + "permanent entry for %s on %s\n", + ifp->if_addrlen, + (u_char *)ar_sha(ah), ":", + inet_ntoa(isaddr), ifp->if_xname); goto reply; } if (log_arp_movements) { @@ -698,15 +725,29 @@ match: } la->la_asked = 0; la->la_preempt = V_arp_maxtries; - hold = la->la_hold; - if (hold != NULL) { + /* + * The packets are all freed within the call to the output + * routine. + * + * NB: The lock MUST be released before the call to the + * output routine. + */ + if (la->la_hold != NULL) { + struct mbuf *m_hold, *m_hold_next; + + m_hold = la->la_hold; la->la_hold = NULL; + la->la_numheld = 0; memcpy(&sa, L3_ADDR(la), sizeof(sa)); - } - LLE_WUNLOCK(la); - if (hold != NULL) - (*ifp->if_output)(ifp, hold, &sa, NULL); - } + LLE_WUNLOCK(la); + for (; m_hold != NULL; m_hold = m_hold_next) { + m_hold_next = m_hold->m_nextpkt; + m_hold->m_nextpkt = NULL; + (*ifp->if_output)(ifp, m_hold, &sa, NULL); + } + } else + LLE_WUNLOCK(la); + } /* end of FIB loop */ reply: if (op != ARPOP_REQUEST) goto drop; @@ -758,7 +799,7 @@ reply: /* * Also check that the node which sent the ARP packet - * is on the the interface we expect it to be on. This + * is on the interface we expect it to be on. This * avoids ARP chaos if an interface is connected to the * wrong network. */ @@ -804,6 +845,7 @@ reply: ah->ar_pro = htons(ETHERTYPE_IP); /* let's be sure! */ m->m_len = sizeof(*ah) + (2 * ah->ar_pln) + (2 * ah->ar_hln); m->m_pkthdr.len = m->m_len; + m->m_pkthdr.rcvif = NULL; sa.sa_family = AF_ARP; sa.sa_len = 2; (*ifp->if_output)(ifp, m, &sa, NULL); diff --git a/freebsd/sys/netinet/igmp.c b/freebsd/sys/netinet/igmp.c index c41b8f71..138a1504 100644 --- a/freebsd/sys/netinet/igmp.c +++ b/freebsd/sys/netinet/igmp.c @@ -619,7 +619,7 @@ igmp_ifdetach(struct ifnet *ifp) igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; if (igi->igi_version == IGMP_VERSION_3) { - IF_ADDR_LOCK(ifp); + IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) @@ -635,7 +635,7 @@ igmp_ifdetach(struct ifnet *ifp) } inm_clear_recorded(inm); } - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); /* * Free the in_multi reference(s) for this IGMP lifecycle. */ @@ -752,7 +752,7 @@ igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip, * for the interface on which the query arrived, * except those which are already running. */ - IF_ADDR_LOCK(ifp); + IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) @@ -780,7 +780,7 @@ igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip, break; } } - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); out_locked: IGMP_UNLOCK(); @@ -853,7 +853,7 @@ igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip, */ CTR2(KTR_IGMPV3, "process v2 general query on ifp %p(%s)", ifp, ifp->if_xname); - IF_ADDR_LOCK(ifp); + IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) @@ -861,7 +861,7 @@ igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip, inm = (struct in_multi *)ifma->ifma_protospec; igmp_v2_update_group(inm, timer); } - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); } else { /* * Group-specific IGMPv2 query, we need only @@ -1643,7 +1643,7 @@ igmp_fasttimo_vnet(void) struct ifqueue qrq; /* Query response packets */ struct ifnet *ifp; struct igmp_ifinfo *igi; - struct ifmultiaddr *ifma, *tifma; + struct ifmultiaddr *ifma; struct in_multi *inm; int loop, uri_fasthz; @@ -1709,9 +1709,8 @@ igmp_fasttimo_vnet(void) IFQ_SET_MAXLEN(&scq, IGMP_MAX_STATE_CHANGE_PACKETS); } - IF_ADDR_LOCK(ifp); - TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, - tifma) { + IF_ADDR_RLOCK(ifp); + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) continue; @@ -1728,7 +1727,7 @@ igmp_fasttimo_vnet(void) break; } } - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); if (igi->igi_version == IGMP_VERSION_3) { struct in_multi *tinm; @@ -2005,7 +2004,7 @@ igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi) { struct ifmultiaddr *ifma; struct ifnet *ifp; - struct in_multi *inm; + struct in_multi *inm, *tinm; CTR3(KTR_IGMPV3, "%s: cancel v3 timers on ifp %p(%s)", __func__, igi->igi_ifp, igi->igi_ifp->if_xname); @@ -2025,7 +2024,7 @@ igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi) * for all memberships scoped to this link. */ ifp = igi->igi_ifp; - IF_ADDR_LOCK(ifp); + IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) @@ -2051,14 +2050,8 @@ igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi) * transition to REPORTING to ensure the host leave * message is sent upstream to the old querier -- * transition to NOT would lose the leave and race. - * - * SMPNG: Must drop and re-acquire IF_ADDR_LOCK - * around inm_release_locked(), as it is not - * a recursive mutex. */ - IF_ADDR_UNLOCK(ifp); - inm_release_locked(inm); - IF_ADDR_LOCK(ifp); + SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele); /* FALLTHROUGH */ case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: @@ -2076,7 +2069,11 @@ igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi) inm->inm_timer = 0; _IF_DRAIN(&inm->inm_scq); } - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); + SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead, inm_nrele, tinm) { + SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele); + inm_release_locked(inm); + } } /* @@ -2289,13 +2286,11 @@ igmp_change_state(struct in_multi *inm) */ KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__)); ifp = inm->inm_ifma->ifma_ifp; - if (ifp != NULL) { - /* - * Sanity check that netinet's notion of ifp is the - * same as net's. - */ - KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); - } + /* + * Sanity check that netinet's notion of ifp is the + * same as net's. + */ + KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); IGMP_LOCK(); @@ -3322,7 +3317,7 @@ igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq) static void igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi) { - struct ifmultiaddr *ifma, *tifma; + struct ifmultiaddr *ifma; struct ifnet *ifp; struct in_multi *inm; int retval, loop; @@ -3335,8 +3330,8 @@ igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi) ifp = igi->igi_ifp; - IF_ADDR_LOCK(ifp); - TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, tifma) { + IF_ADDR_RLOCK(ifp); + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) continue; @@ -3366,7 +3361,7 @@ igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi) break; } } - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0; igmp_dispatch_queue(&igi->igi_gq, IGMP_MAX_RESPONSE_BURST, loop); diff --git a/freebsd/sys/netinet/in.c b/freebsd/sys/netinet/in.c index b9231e13..9d84e82b 100644 --- a/freebsd/sys/netinet/in.c +++ b/freebsd/sys/netinet/in.c @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -71,7 +72,7 @@ static int in_lifaddr_ioctl(struct socket *, u_long, caddr_t, struct ifnet *, struct thread *); static int in_addprefix(struct in_ifaddr *, int); -static int in_scrubprefix(struct in_ifaddr *); +static int in_scrubprefix(struct in_ifaddr *, u_int); static void in_socktrim(struct sockaddr_in *); static int in_ifinit(struct ifnet *, struct in_ifaddr *, struct sockaddr_in *, int); @@ -91,6 +92,9 @@ SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, same_prefix_carp_only, CTLFLAG_RW, VNET_DECLARE(struct inpcbinfo, ripcbinfo); #define V_ripcbinfo VNET(ripcbinfo) +VNET_DECLARE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */ +#define V_arpstat VNET(arpstat) + /* * Return 1 if an internet address is for a ``local'' host * (one to which we have a connection). If subnetsarelocal @@ -340,7 +344,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(); if (ia == NULL) { - IF_ADDR_LOCK(ifp); + IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { iap = ifatoia(ifa); if (iap->ia_addr.sin_family == AF_INET) { @@ -354,7 +358,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, } if (ia != NULL) ifa_ref(&ia->ia_ifa); - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); } if (ia == NULL) iaIsFirst = 1; @@ -418,9 +422,9 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, ia->ia_ifp = ifp; ifa_ref(ifa); /* if_addrhead */ - IF_ADDR_LOCK(ifp); + IF_ADDR_WLOCK(ifp); TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link); - IF_ADDR_UNLOCK(ifp); + IF_ADDR_WUNLOCK(ifp); ifa_ref(ifa); /* in_ifaddrhead */ IN_IFADDR_WLOCK(); TAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link); @@ -546,7 +550,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, * is the same as before, then the call is * un-necessarily executed here. */ - in_ifscrub(ifp, ia); + in_ifscrub(ifp, ia, LLE_STATIC); ia->ia_sockmask = ifra->ifra_mask; ia->ia_sockmask.sin_family = AF_INET; ia->ia_subnetmask = @@ -555,7 +559,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, } if ((ifp->if_flags & IFF_POINTOPOINT) && (ifra->ifra_dstaddr.sin_family == AF_INET)) { - in_ifscrub(ifp, ia); + in_ifscrub(ifp, ia, LLE_STATIC); ia->ia_dstaddr = ifra->ifra_dstaddr; maskIsNew = 1; /* We lie; but the effect's the same */ } @@ -583,7 +587,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, /* * in_ifscrub kills the interface route. */ - in_ifscrub(ifp, ia); + in_ifscrub(ifp, ia, LLE_STATIC); /* * in_ifadown gets rid of all the rest of @@ -600,7 +604,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, panic("in_control: unsupported ioctl"); } - IF_ADDR_LOCK(ifp); + IF_ADDR_WLOCK(ifp); /* Re-check that ia is still part of the list. */ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa == &ia->ia_ifa) @@ -612,12 +616,12 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, * try it again for the next loop as there is no other exit * path between here and out. */ - IF_ADDR_UNLOCK(ifp); + IF_ADDR_WUNLOCK(ifp); error = EADDRNOTAVAIL; goto out; } TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); - IF_ADDR_UNLOCK(ifp); + IF_ADDR_WUNLOCK(ifp); ifa_free(&ia->ia_ifa); /* if_addrhead */ IN_IFADDR_WLOCK(); @@ -718,7 +722,7 @@ in_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data, if (iflr->flags & IFLR_PREFIX) return (EINVAL); - /* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */ + /* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR). */ bzero(&ifra, sizeof(ifra)); bcopy(iflr->iflr_name, ifra.ifra_name, sizeof(ifra.ifra_name)); @@ -767,8 +771,9 @@ in_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data, } } + IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - if (ifa->ifa_addr->sa_family != AF_INET6) + if (ifa->ifa_addr->sa_family != AF_INET) continue; if (match.s_addr == 0) break; @@ -777,6 +782,9 @@ in_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data, if (candidate.s_addr == match.s_addr) break; } + if (ifa != NULL) + ifa_ref(ifa); + IF_ADDR_RUNLOCK(ifp); if (ifa == NULL) return (EADDRNOTAVAIL); ia = (struct in_ifaddr *)ifa; @@ -795,12 +803,13 @@ in_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data, in_mask2len(&ia->ia_sockmask.sin_addr); iflr->flags = 0; /*XXX*/ + ifa_free(ifa); return (0); } else { struct in_aliasreq ifra; - /* fill in_aliasreq and do ioctl(SIOCDIFADDR_IN6) */ + /* fill in_aliasreq and do ioctl(SIOCDIFADDR) */ bzero(&ifra, sizeof(ifra)); bcopy(iflr->iflr_name, ifra.ifra_name, sizeof(ifra.ifra_name)); @@ -813,6 +822,7 @@ in_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data, } bcopy(&ia->ia_sockmask, &ifra.ifra_dstaddr, ia->ia_sockmask.sin_len); + ifa_free(ifa); return (in_control(so, SIOCDIFADDR, (caddr_t)&ifra, ifp, td)); @@ -827,10 +837,10 @@ in_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data, * Delete any existing route for an interface. */ void -in_ifscrub(struct ifnet *ifp, struct in_ifaddr *ia) +in_ifscrub(struct ifnet *ifp, struct in_ifaddr *ia, u_int flags) { - in_scrubprefix(ia); + in_scrubprefix(ia, flags); } /* @@ -885,7 +895,7 @@ in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin, splx(s); if (scrub) { ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr; - in_ifscrub(ifp, ia); + in_ifscrub(ifp, ia, LLE_STATIC); ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr; } if (IN_CLASSA(i)) @@ -949,7 +959,7 @@ in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin, bzero(&ia_ro, sizeof(ia_ro)); *((struct sockaddr_in *)(&ia_ro.ro_dst)) = ia->ia_addr; - rtalloc_ign_fib(&ia_ro, 0, 0); + rtalloc_ign_fib(&ia_ro, 0, RT_DEFAULT_FIB); if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) && (ia_ro.ro_rt->rt_ifp == V_loif)) { RT_LOCK(ia_ro.ro_rt); @@ -1035,7 +1045,7 @@ in_addprefix(struct in_ifaddr *target, int flags) IN_IFADDR_RLOCK(); TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if (rtinitflags(ia)) { - p = ia->ia_addr.sin_addr; + p = ia->ia_dstaddr.sin_addr; if (prefix.s_addr != p.s_addr) continue; @@ -1093,7 +1103,7 @@ extern void arp_ifscrub(struct ifnet *ifp, uint32_t addr); * otherwise. */ static int -in_scrubprefix(struct in_ifaddr *target) +in_scrubprefix(struct in_ifaddr *target, u_int flags) { struct in_ifaddr *ia; struct in_addr prefix, mask, p; @@ -1124,17 +1134,22 @@ in_scrubprefix(struct in_ifaddr *target) RT_LOCK(ia_ro.ro_rt); if (ia_ro.ro_rt->rt_refcnt <= 1) freeit = 1; - else + else if (flags & LLE_STATIC) { RT_REMREF(ia_ro.ro_rt); + target->ia_flags &= ~IFA_RTSELF; + } RTFREE_LOCKED(ia_ro.ro_rt); } - if (freeit) + if (freeit && (flags & LLE_STATIC)) { error = ifa_del_loopback_route((struct ifaddr *)target, (struct sockaddr *)&target->ia_addr); - if (error == 0) - target->ia_flags &= ~IFA_RTSELF; - /* remove arp cache */ - arp_ifscrub(target->ia_ifp, IA_SIN(target)->sin_addr.s_addr); + if (error == 0) + target->ia_flags &= ~IFA_RTSELF; + } + if ((flags & LLE_STATIC) && + !(target->ia_ifp->if_flags & IFF_NOARP)) + /* remove arp cache */ + arp_ifscrub(target->ia_ifp, IA_SIN(target)->sin_addr.s_addr); } if (rtinitflags(target)) @@ -1159,7 +1174,8 @@ in_scrubprefix(struct in_ifaddr *target) p.s_addr &= ia->ia_sockmask.sin_addr.s_addr; } - if (prefix.s_addr != p.s_addr) + if ((prefix.s_addr != p.s_addr) || + !(ia->ia_ifp->if_flags & IFF_UP)) continue; /* @@ -1172,17 +1188,24 @@ in_scrubprefix(struct in_ifaddr *target) * doesn't support such action. */ if ((ia->ia_flags & IFA_ROUTE) == 0 - && (ia->ia_ifp->if_type != IFT_CARP) - ) { + && (ia->ia_ifp->if_type != IFT_CARP)) { + ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(); - rtinit(&(target->ia_ifa), (int)RTM_DELETE, + error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target)); - target->ia_flags &= ~IFA_ROUTE; - + if (error == 0) + target->ia_flags &= ~IFA_ROUTE; + else + log(LOG_INFO, "in_scrubprefix: err=%d, old prefix delete failed\n", + error); error = rtinit(&ia->ia_ifa, (int)RTM_ADD, rtinitflags(ia) | RTF_UP); if (error == 0) ia->ia_flags |= IFA_ROUTE; + else + log(LOG_INFO, "in_scrubprefix: err=%d, new prefix add failed\n", + error); + ifa_free(&ia->ia_ifa); return (error); } } @@ -1199,15 +1222,18 @@ in_scrubprefix(struct in_ifaddr *target) mask0.sin_len = sizeof(mask0); mask0.sin_family = AF_INET; mask0.sin_addr.s_addr = target->ia_subnetmask; - lltable_prefix_free(AF_INET, (struct sockaddr *)&prefix0, - (struct sockaddr *)&mask0); + lltable_prefix_free(AF_INET, (struct sockaddr *)&prefix0, + (struct sockaddr *)&mask0, flags); /* * As no-one seem to have this prefix, we can remove the route. */ - rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target)); - target->ia_flags &= ~IFA_ROUTE; - return (0); + error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target)); + if (error == 0) + target->ia_flags &= ~IFA_ROUTE; + else + log(LOG_INFO, "in_scrubprefix: err=%d, prefix delete failed\n", error); + return (error); } #undef rtinitflags @@ -1286,7 +1312,7 @@ in_purgemaddrs(struct ifnet *ifp) * We need to do this as IF_ADDR_LOCK() may be re-acquired * by code further down. */ - IF_ADDR_LOCK(ifp); + IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) @@ -1298,7 +1324,7 @@ in_purgemaddrs(struct ifnet *ifp) inm = (struct in_multi *)ifma->ifma_protospec; LIST_INSERT_HEAD(&purgeinms, inm, inm_link); } - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); LIST_FOREACH_SAFE(inm, &purgeinms, inm_link, tinm) { LIST_REMOVE(inm, inm_link); @@ -1359,25 +1385,33 @@ in_lltable_free(struct lltable *llt, struct llentry *lle) static void in_lltable_prefix_free(struct lltable *llt, const struct sockaddr *prefix, - const struct sockaddr *mask) + const struct sockaddr *mask, + u_int flags) { const struct sockaddr_in *pfx = (const struct sockaddr_in *)prefix; const struct sockaddr_in *msk = (const struct sockaddr_in *)mask; struct llentry *lle, *next; register int i; + size_t pkts_dropped; for (i=0; i < LLTBL_HASHTBL_SIZE; i++) { LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) { - if (IN_ARE_MASKED_ADDR_EQUAL((struct sockaddr_in *)L3_ADDR(lle), - pfx, msk)) { + /* + * (flags & LLE_STATIC) means deleting all entries + * including static ARP entries + */ + if (IN_ARE_MASKED_ADDR_EQUAL((struct sockaddr_in *)L3_ADDR(lle), + pfx, msk) && + ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))) { int canceled; canceled = callout_drain(&lle->la_timer); LLE_WLOCK(lle); if (canceled) LLE_REMREF(lle); - llentry_free(lle); + pkts_dropped = llentry_free(lle); + ARPSTAT_ADD(dropped, pkts_dropped); } } } @@ -1394,19 +1428,68 @@ in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr /* XXX rtalloc1 should take a const param */ rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0); - if (rt == NULL || (!(flags & LLE_PUB) && - ((rt->rt_flags & RTF_GATEWAY) || - (rt->rt_ifp != ifp)))) { + + if (rt == NULL) + return (EINVAL); + + /* + * If the gateway for an existing host route matches the target L3 + * address, which is a special route inserted by some implementation + * such as MANET, and the interface is of the correct type, then + * allow for ARP to proceed. + */ + if (rt->rt_flags & RTF_GATEWAY) { + if (!(rt->rt_flags & RTF_HOST) || !rt->rt_ifp || + rt->rt_ifp->if_type != IFT_ETHER || + (rt->rt_ifp->if_flags & + (IFF_NOARP | IFF_STATICARP)) != 0 || + memcmp(rt->rt_gateway->sa_data, l3addr->sa_data, + sizeof(in_addr_t)) != 0) { + RTFREE_LOCKED(rt); + return (EINVAL); + } + } + + /* + * Make sure that at least the destination address is covered + * by the route. This is for handling the case where 2 or more + * interfaces have the same prefix. An incoming packet arrives + * on one interface and the corresponding outgoing packet leaves + * another interface. + */ + if (!(rt->rt_flags & RTF_HOST) && rt->rt_ifp != ifp) { + const char *sa, *mask, *addr, *lim; + int len; + + mask = (const char *)rt_mask(rt); + /* + * Just being extra cautious to avoid some custom + * code getting into trouble. + */ + if (mask == NULL) { + RTFREE_LOCKED(rt); + return (EINVAL); + } + + sa = (const char *)rt_key(rt); + addr = (const char *)l3addr; + len = ((const struct sockaddr_in *)l3addr)->sin_len; + lim = addr + len; + + for ( ; addr < lim; sa++, mask++, addr++) { + if ((*sa ^ *addr) & *mask) { #ifdef DIAGNOSTIC - log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n", - inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr)); + log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n", + inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr)); #endif - if (rt != NULL) - RTFREE_LOCKED(rt); - return (EINVAL); + RTFREE_LOCKED(rt); + return (EINVAL); + } + } } + RTFREE_LOCKED(rt); - return 0; + return (0); } /* @@ -1576,10 +1659,8 @@ in_domifattach(struct ifnet *ifp) llt = lltable_init(ifp, AF_INET); if (llt != NULL) { - llt->llt_new = in_lltable_new; llt->llt_free = in_lltable_free; llt->llt_prefix_free = in_lltable_prefix_free; - llt->llt_rtcheck = in_lltable_rtcheck; llt->llt_lookup = in_lltable_lookup; llt->llt_dump = in_lltable_dump; } diff --git a/freebsd/sys/netinet/in.h b/freebsd/sys/netinet/in.h index c0bc26ec..16df5f24 100644 --- a/freebsd/sys/netinet/in.h +++ b/freebsd/sys/netinet/in.h @@ -481,6 +481,7 @@ __END_DECLS #define IP_RECVTTL 65 /* bool; receive IP TTL w/dgram */ #define IP_MINTTL 66 /* minimum TTL for packet or drop */ #define IP_DONTFRAG 67 /* don't fragment packet */ +#define IP_RECVTOS 68 /* bool; receive IP TOS w/dgram */ /* IPv4 Source Filter Multicast API [RFC3678] */ #define IP_ADD_SOURCE_MEMBERSHIP 70 /* join a source-specific group */ diff --git a/freebsd/sys/netinet/in_mcast.c b/freebsd/sys/netinet/in_mcast.c index d6a316a5..b5fd6572 100644 --- a/freebsd/sys/netinet/in_mcast.c +++ b/freebsd/sys/netinet/in_mcast.c @@ -423,7 +423,7 @@ in_getmulti(struct ifnet *ifp, const struct in_addr *group, return (error); /* XXX ifma_protospec must be covered by IF_ADDR_LOCK */ - IF_ADDR_LOCK(ifp); + IF_ADDR_WLOCK(ifp); /* * If something other than netinet is occupying the link-layer @@ -447,11 +447,11 @@ in_getmulti(struct ifnet *ifp, const struct in_addr *group, #endif ++inm->inm_refcount; *pinm = inm; - IF_ADDR_UNLOCK(ifp); + IF_ADDR_WUNLOCK(ifp); return (0); } - IF_ADDR_LOCK_ASSERT(ifp); + IF_ADDR_WLOCK_ASSERT(ifp); /* * A new in_multi record is needed; allocate and initialize it. @@ -463,7 +463,7 @@ in_getmulti(struct ifnet *ifp, const struct in_addr *group, inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO); if (inm == NULL) { if_delmulti_ifma(ifma); - IF_ADDR_UNLOCK(ifp); + IF_ADDR_WUNLOCK(ifp); return (ENOMEM); } inm->inm_addr = *group; @@ -486,7 +486,7 @@ in_getmulti(struct ifnet *ifp, const struct in_addr *group, *pinm = inm; - IF_ADDR_UNLOCK(ifp); + IF_ADDR_WUNLOCK(ifp); return (0); } @@ -1615,6 +1615,8 @@ inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt) * has asked for, but we always tell userland how big the * buffer really needs to be. */ + if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) + msfr.msfr_nsrcs = in_mcast_maxsocksrc; tss = NULL; if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) { tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, @@ -2777,7 +2779,7 @@ sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS) IN_MULTI_LOCK(); - IF_ADDR_LOCK(ifp); + IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) @@ -2810,7 +2812,7 @@ sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS) break; } } - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); IN_MULTI_UNLOCK(); diff --git a/freebsd/sys/netinet/in_pcb.c b/freebsd/sys/netinet/in_pcb.c index d56eb7ae..c8c1c3b0 100644 --- a/freebsd/sys/netinet/in_pcb.c +++ b/freebsd/sys/netinet/in_pcb.c @@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include @@ -79,6 +80,7 @@ __FBSDID("$FreeBSD$"); #ifdef INET6 #include #include +#include #endif /* INET6 */ @@ -277,6 +279,124 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) return (0); } +#if defined(INET) || defined(INET6) +int +in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp, + struct ucred *cred, int wild) +{ + struct inpcbinfo *pcbinfo; + struct inpcb *tmpinp; + unsigned short *lastport; + int count, dorandom, error; + u_short aux, first, last, lport; +#ifdef INET + struct in_addr laddr; +#endif + + pcbinfo = inp->inp_pcbinfo; + + /* + * Because no actual state changes occur here, a global write lock on + * the pcbinfo isn't required. + */ + INP_INFO_LOCK_ASSERT(pcbinfo); + INP_LOCK_ASSERT(inp); + + if (inp->inp_flags & INP_HIGHPORT) { + first = V_ipport_hifirstauto; /* sysctl */ + last = V_ipport_hilastauto; + lastport = &pcbinfo->ipi_lasthi; + } else if (inp->inp_flags & INP_LOWPORT) { + error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0); + if (error) + return (error); + first = V_ipport_lowfirstauto; /* 1023 */ + last = V_ipport_lowlastauto; /* 600 */ + lastport = &pcbinfo->ipi_lastlow; + } else { + first = V_ipport_firstauto; /* sysctl */ + last = V_ipport_lastauto; + lastport = &pcbinfo->ipi_lastport; + } + /* + * For UDP, use random port allocation as long as the user + * allows it. For TCP (and as of yet unknown) connections, + * use random port allocation only if the user allows it AND + * ipport_tick() allows it. + */ + if (V_ipport_randomized && + (!V_ipport_stoprandom || pcbinfo == &V_udbinfo)) + dorandom = 1; + else + dorandom = 0; + /* + * It makes no sense to do random port allocation if + * we have the only port available. + */ + if (first == last) + dorandom = 0; + /* Make sure to not include UDP packets in the count. */ + if (pcbinfo != &V_udbinfo) + V_ipport_tcpallocs++; + /* + * Instead of having two loops further down counting up or down + * make sure that first is always <= last and go with only one + * code path implementing all logic. + */ + if (first > last) { + aux = first; + first = last; + last = aux; + } + +#ifdef INET + /* Make the compiler happy. */ + laddr.s_addr = 0; + if ((inp->inp_vflag & (INP_IPV4|INP_IPV6)) == INP_IPV4) { + KASSERT(laddrp != NULL, ("%s: laddrp NULL for v4 inp %p", + __func__, inp)); + laddr = *laddrp; + } +#endif + lport = *lportp; + + if (dorandom) + *lastport = first + (arc4random() % (last - first)); + + count = last - first; + + do { + if (count-- < 0) /* completely used? */ + return (EADDRNOTAVAIL); + ++*lastport; + if (*lastport < first || *lastport > last) + *lastport = first; + lport = htons(*lastport); + +#ifdef INET6 + if ((inp->inp_vflag & INP_IPV6) != 0) + tmpinp = in6_pcblookup_local(pcbinfo, + &inp->in6p_laddr, lport, wild, cred); +#endif +#if defined(INET) && defined(INET6) + else +#endif +#ifdef INET + tmpinp = in_pcblookup_local(pcbinfo, laddr, + lport, wild, cred); +#endif + } while (tmpinp != NULL); + +#ifdef INET + if ((inp->inp_vflag & (INP_IPV4|INP_IPV6)) == INP_IPV4) + laddrp->s_addr = laddr.s_addr; +#endif + *lportp = lport; + + return (0); +} +#endif /* INET || INET6 */ + /* * Set up a bind operation on a PCB, performing port allocation * as required, but do not actually modify the PCB. Callers can @@ -291,14 +411,12 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp, u_short *lportp, struct ucred *cred) { struct socket *so = inp->inp_socket; - unsigned short *lastport; struct sockaddr_in *sin; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct in_addr laddr; u_short lport = 0; int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); int error; - int dorandom; /* * Because no actual state changes occur here, a global write lock on @@ -427,72 +545,10 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp, if (*lportp != 0) lport = *lportp; if (lport == 0) { - u_short first, last, aux; - int count; - - if (inp->inp_flags & INP_HIGHPORT) { - first = V_ipport_hifirstauto; /* sysctl */ - last = V_ipport_hilastauto; - lastport = &pcbinfo->ipi_lasthi; - } else if (inp->inp_flags & INP_LOWPORT) { - error = priv_check_cred(cred, - PRIV_NETINET_RESERVEDPORT, 0); - if (error) - return error; - first = V_ipport_lowfirstauto; /* 1023 */ - last = V_ipport_lowlastauto; /* 600 */ - lastport = &pcbinfo->ipi_lastlow; - } else { - first = V_ipport_firstauto; /* sysctl */ - last = V_ipport_lastauto; - lastport = &pcbinfo->ipi_lastport; - } - /* - * For UDP, use random port allocation as long as the user - * allows it. For TCP (and as of yet unknown) connections, - * use random port allocation only if the user allows it AND - * ipport_tick() allows it. - */ - if (V_ipport_randomized && - (!V_ipport_stoprandom || pcbinfo == &V_udbinfo)) - dorandom = 1; - else - dorandom = 0; - /* - * It makes no sense to do random port allocation if - * we have the only port available. - */ - if (first == last) - dorandom = 0; - /* Make sure to not include UDP packets in the count. */ - if (pcbinfo != &V_udbinfo) - V_ipport_tcpallocs++; - /* - * Instead of having two loops further down counting up or down - * make sure that first is always <= last and go with only one - * code path implementing all logic. - */ - if (first > last) { - aux = first; - first = last; - last = aux; - } - - if (dorandom) - *lastport = first + - (arc4random() % (last - first)); - - count = last - first; + error = in_pcb_lport(inp, &laddr, &lport, cred, wild); + if (error != 0) + return (error); - do { - if (count-- < 0) /* completely used? */ - return (EADDRNOTAVAIL); - ++*lastport; - if (*lastport < first || *lastport > last) - *lastport = first; - lport = htons(*lastport); - } while (in_pcblookup_local(pcbinfo, laddr, - lport, wild, cred)); } *laddrp = laddr.s_addr; *lportp = lport; @@ -615,7 +671,7 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr, ifp = ia->ia_ifp; ifa_free(&ia->ia_ifa); ia = NULL; - IF_ADDR_LOCK(ifp); + IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { sa = ifa->ifa_addr; @@ -629,10 +685,10 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr, } if (ia != NULL) { laddr->s_addr = ia->ia_addr.sin_addr.s_addr; - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); goto done; } - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); /* 3. As a last resort return the 'default' jail address. */ error = prison_get_ip4(cred, laddr); @@ -674,7 +730,7 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr, */ ia = NULL; ifp = sro.ro_rt->rt_ifp; - IF_ADDR_LOCK(ifp); + IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { sa = ifa->ifa_addr; if (sa->sa_family != AF_INET) @@ -687,10 +743,10 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr, } if (ia != NULL) { laddr->s_addr = ia->ia_addr.sin_addr.s_addr; - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); goto done; } - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); /* 3. As a last resort return the 'default' jail address. */ error = prison_get_ip4(cred, laddr); @@ -738,7 +794,7 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr, ifp = ia->ia_ifp; ifa_free(&ia->ia_ifa); ia = NULL; - IF_ADDR_LOCK(ifp); + IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { sa = ifa->ifa_addr; @@ -753,10 +809,10 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr, } if (ia != NULL) { laddr->s_addr = ia->ia_addr.sin_addr.s_addr; - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); goto done; } - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); } /* 3. As a last resort return the 'default' jail address. */ @@ -858,17 +914,20 @@ in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam, if (imo->imo_multicast_ifp != NULL) { ifp = imo->imo_multicast_ifp; IN_IFADDR_RLOCK(); - TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) - if (ia->ia_ifp == ifp) + TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { + if ((ia->ia_ifp == ifp) && + (cred == NULL || + prison_check_ip4(cred, + &ia->ia_addr.sin_addr) == 0)) break; - if (ia == NULL) { - IN_IFADDR_RUNLOCK(); + } + if (ia == NULL) error = EADDRNOTAVAIL; - } else { + else { laddr = ia->ia_addr.sin_addr; - IN_IFADDR_RUNLOCK(); error = 0; } + IN_IFADDR_RUNLOCK(); } } if (error) @@ -1813,6 +1872,10 @@ db_print_inpflags(int inp_flags) db_printf("%sINP_DONTFRAG", comma ? ", " : ""); comma = 1; } + if (inp_flags & INP_RECVTOS) { + db_printf("%sINP_RECVTOS", comma ? ", " : ""); + comma = 1; + } if (inp_flags & IN6P_IPV6_V6ONLY) { db_printf("%sIN6P_IPV6_V6ONLY", comma ? ", " : ""); comma = 1; diff --git a/freebsd/sys/netinet/in_pcb.h b/freebsd/sys/netinet/in_pcb.h index 6e3a24be..9f602ce2 100644 --- a/freebsd/sys/netinet/in_pcb.h +++ b/freebsd/sys/netinet/in_pcb.h @@ -413,6 +413,7 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, #define INP_DONTFRAG 0x00000800 /* don't fragment packet */ #define INP_BINDANY 0x00001000 /* allow bind to any address */ #define INP_INHASHLIST 0x00002000 /* in_pcbinshash() has been called */ +#define INP_RECVTOS 0x00004000 /* receive incoming IP TOS */ #define IN6P_IPV6_V6ONLY 0x00008000 /* restrict AF_INET6 socket for v6 */ #define IN6P_PKTINFO 0x00010000 /* receive IP6 dst and I/F */ #define IN6P_HOPLIMIT 0x00020000 /* receive hoplimit */ @@ -432,7 +433,7 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, #define IN6P_MTU 0x80000000 /* receive path MTU */ #define INP_CONTROLOPTS (INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\ - INP_RECVIF|INP_RECVTTL|\ + INP_RECVIF|INP_RECVTTL|INP_RECVTOS|\ IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|\ IN6P_DSTOPTS|IN6P_RTHDR|IN6P_RTHDRDSTOPTS|\ IN6P_TCLASS|IN6P_AUTOFLOWLABEL|IN6P_RFC2292|\ @@ -486,6 +487,8 @@ extern struct callout ipport_tick_callout; void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *); int in_pcballoc(struct socket *, struct inpcbinfo *); int in_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *); +int in_pcb_lport(struct inpcb *, struct in_addr *, u_short *, + struct ucred *, int); int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *, u_short *, struct ucred *); int in_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *); diff --git a/freebsd/sys/netinet/in_proto.c b/freebsd/sys/netinet/in_proto.c index db25a54d..8b184cb9 100644 --- a/freebsd/sys/netinet/in_proto.c +++ b/freebsd/sys/netinet/in_proto.c @@ -95,6 +95,8 @@ static struct pr_usrreqs nousrreqs; #include #endif +FEATURE(inet, "Internet Protocol version 4"); + extern struct domain inetdomain; /* Spacer for loadable protocols. */ @@ -150,7 +152,7 @@ struct protosw inetsw[] = { }, #ifdef SCTP { - .pr_type = SOCK_DGRAM, + .pr_type = SOCK_SEQPACKET, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_SCTP, .pr_flags = PR_WANTRCVD, @@ -164,18 +166,6 @@ struct protosw inetsw[] = { .pr_drain = sctp_drain, .pr_usrreqs = &sctp_usrreqs }, -{ - .pr_type = SOCK_SEQPACKET, - .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_SCTP, - .pr_flags = PR_WANTRCVD, - .pr_input = sctp_input, - .pr_ctlinput = sctp_ctlinput, - .pr_ctloutput = sctp_ctloutput, - .pr_drain = sctp_drain, - .pr_usrreqs = &sctp_usrreqs -}, - { .pr_type = SOCK_STREAM, .pr_domain = &inetdomain, diff --git a/freebsd/sys/netinet/in_rmx.c b/freebsd/sys/netinet/in_rmx.c index dda56194..447605ec 100644 --- a/freebsd/sys/netinet/in_rmx.c +++ b/freebsd/sys/netinet/in_rmx.c @@ -29,19 +29,6 @@ * SUCH DAMAGE. */ -/* - * This code does two things necessary for the enhanced TCP metrics to - * function in a useful manner: - * 1) It marks all non-host routes as `cloning', thus ensuring that - * every actual reference to such a route actually gets turned - * into a reference to a host route to the specific destination - * requested. - * 2) When such routes lose all their references, it arranges for them - * to be deleted in some random collection of circumstances, so that - * a large quantity of stale routing data is not kept in kernel memory - * indefinitely. See in_rtqtimo() below for the exact mechanism. - */ - #include __FBSDID("$FreeBSD$"); @@ -60,6 +47,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include +#include #include extern int in_inithead(void **head, int off); @@ -342,6 +331,13 @@ in_rtqdrain(void) VNET_LIST_RUNLOCK_NOSLEEP(); } +void +in_setmatchfunc(struct radix_node_head *rnh, int val) +{ + + rnh->rnh_matchaddr = (val != 0) ? rn_match : in_matroute; +} + static int _in_rt_was_here; /* * Initialize our routing tree. @@ -367,7 +363,7 @@ in_inithead(void **head, int off) rnh = *head; rnh->rnh_addaddr = in_addroute; - rnh->rnh_matchaddr = in_matroute; + in_setmatchfunc(rnh, V_drop_redirect); rnh->rnh_close = in_clsroute; if (_in_rt_was_here == 0 ) { callout_init(&V_rtq_timer, CALLOUT_MPSAFE); diff --git a/freebsd/sys/netinet/in_var.h b/freebsd/sys/netinet/in_var.h index cd1d9043..c04d45b9 100644 --- a/freebsd/sys/netinet/in_var.h +++ b/freebsd/sys/netinet/in_var.h @@ -396,9 +396,9 @@ inm_lookup(struct ifnet *ifp, const struct in_addr ina) struct in_multi *inm; IN_MULTI_LOCK_ASSERT(); - IF_ADDR_LOCK(ifp); + IF_ADDR_RLOCK(ifp); inm = inm_lookup_locked(ifp, ina); - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); return (inm); } @@ -423,6 +423,7 @@ inm_acquire_locked(struct in_multi *inm) struct rtentry; struct route; struct ip_moptions; +struct radix_node_head; int imo_multi_filter(const struct ip_moptions *, const struct ifnet *, const struct sockaddr *, const struct sockaddr *); @@ -447,7 +448,7 @@ int in_control(struct socket *, u_long, caddr_t, struct ifnet *, void in_rtqdrain(void); void ip_input(struct mbuf *); int in_ifadown(struct ifaddr *ifa, int); -void in_ifscrub(struct ifnet *, struct in_ifaddr *); +void in_ifscrub(struct ifnet *, struct in_ifaddr *, u_int); struct mbuf *ip_fastforward(struct mbuf *); void *in_domifattach(struct ifnet *); void in_domifdetach(struct ifnet *, void *); @@ -461,6 +462,7 @@ void in_rtredirect(struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct sockaddr *, u_int); int in_rtrequest(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int); +void in_setmatchfunc(struct radix_node_head *, int); #if 0 int in_rt_getifa(struct rt_addrinfo *, u_int fibnum); diff --git a/freebsd/sys/netinet/ip.h b/freebsd/sys/netinet/ip.h index daee533e..6c9482f9 100644 --- a/freebsd/sys/netinet/ip.h +++ b/freebsd/sys/netinet/ip.h @@ -91,6 +91,31 @@ struct ip { #define IPTOS_PREC_PRIORITY 0x20 #define IPTOS_PREC_ROUTINE 0x00 +/* + * Definitions for DiffServ Codepoints as per RFC2474 + */ +#define IPTOS_DSCP_CS0 0x00 +#define IPTOS_DSCP_CS1 0x20 +#define IPTOS_DSCP_AF11 0x28 +#define IPTOS_DSCP_AF12 0x30 +#define IPTOS_DSCP_AF13 0x38 +#define IPTOS_DSCP_CS2 0x40 +#define IPTOS_DSCP_AF21 0x48 +#define IPTOS_DSCP_AF22 0x50 +#define IPTOS_DSCP_AF23 0x58 +#define IPTOS_DSCP_CS3 0x60 +#define IPTOS_DSCP_AF31 0x68 +#define IPTOS_DSCP_AF32 0x70 +#define IPTOS_DSCP_AF33 0x78 +#define IPTOS_DSCP_CS4 0x80 +#define IPTOS_DSCP_AF41 0x88 +#define IPTOS_DSCP_AF42 0x90 +#define IPTOS_DSCP_AF43 0x98 +#define IPTOS_DSCP_CS5 0xa0 +#define IPTOS_DSCP_EF 0xb8 +#define IPTOS_DSCP_CS6 0xc0 +#define IPTOS_DSCP_CS7 0xe0 + /* * ECN (Explicit Congestion Notification) codepoints in RFC3168 mapped to the * lower 2 bits of the TOS field. diff --git a/freebsd/sys/netinet/ip_carp.c b/freebsd/sys/netinet/ip_carp.c index 7b2a7e18..3cfcf707 100644 --- a/freebsd/sys/netinet/ip_carp.c +++ b/freebsd/sys/netinet/ip_carp.c @@ -222,7 +222,7 @@ static void carp_set_state(struct carp_softc *, int); static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int); enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; -static void carp_multicast_cleanup(struct carp_softc *); +static void carp_multicast_cleanup(struct carp_softc *, int dofree); static int carp_set_addr(struct carp_softc *, struct sockaddr_in *); static int carp_del_addr(struct carp_softc *, struct sockaddr_in *); static void carp_carpdev_state_locked(struct carp_if *); @@ -231,7 +231,7 @@ static void carp_sc_state_locked(struct carp_softc *); static void carp_send_na(struct carp_softc *); static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *); -static void carp_multicast6_cleanup(struct carp_softc *); +static void carp_multicast6_cleanup(struct carp_softc *, int dofree); #endif static LIST_HEAD(, carp_softc) carpif_list; @@ -283,7 +283,7 @@ carp_hmac_prepare(struct carp_softc *sc) found = 0; last = cur; cur.s_addr = 0xffffffff; - IF_ADDR_LOCK(SC2IFP(sc)); + IF_ADDR_RLOCK(SC2IFP(sc)); TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; if (ifa->ifa_addr->sa_family == AF_INET && @@ -293,7 +293,7 @@ carp_hmac_prepare(struct carp_softc *sc) found++; } } - IF_ADDR_UNLOCK(SC2IFP(sc)); + IF_ADDR_RUNLOCK(SC2IFP(sc)); if (found) SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); } while (found); @@ -304,7 +304,7 @@ carp_hmac_prepare(struct carp_softc *sc) found = 0; last6 = cur6; memset(&cur6, 0xff, sizeof(cur6)); - IF_ADDR_LOCK(SC2IFP(sc)); + IF_ADDR_RLOCK(SC2IFP(sc)); TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { in6 = ifatoia6(ifa)->ia_addr.sin6_addr; if (IN6_IS_SCOPE_EMBED(&in6)) @@ -316,7 +316,7 @@ carp_hmac_prepare(struct carp_softc *sc) found++; } } - IF_ADDR_UNLOCK(SC2IFP(sc)); + IF_ADDR_RUNLOCK(SC2IFP(sc)); if (found) SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); } while (found); @@ -470,9 +470,11 @@ carp_clone_destroy(struct ifnet *ifp) /* * This function can be called on CARP interface destroy path, * and in case of the removal of the underlying interface as - * well. We differentiate these two cases. In the latter case - * we do not cleanup our multicast memberships, since they - * are already freed. Also, in the latter case we do not + * well. We differentiate these two cases: in case of destruction + * of the underlying interface, we do not cleanup our multicast + * memberships, since they are already freed. But we purge pointers + * to multicast structures, since they are no longer valid, to + * avoid panic in future calls to carpdetach(). Also, we do not * release the lock on return, because the function will be * called once more, for another CARP instance on the same * interface. @@ -497,10 +499,9 @@ carpdetach(struct carp_softc *sc, int unlock) carp_set_state(sc, INIT); SC2IFP(sc)->if_flags &= ~IFF_UP; carp_setrun(sc, 0); - if (unlock) - carp_multicast_cleanup(sc); + carp_multicast_cleanup(sc, unlock); #ifdef INET6 - carp_multicast6_cleanup(sc); + carp_multicast6_cleanup(sc, unlock); #endif if (sc->sc_carpdev != NULL) { @@ -1141,7 +1142,7 @@ carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type) (SC2IFP(vh)->if_flags & IFF_UP) && (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) || (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) { - IF_ADDR_LOCK(SC2IFP(vh)); + IF_ADDR_RLOCK(SC2IFP(vh)); TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family == AF_INET && @@ -1149,7 +1150,7 @@ carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type) ifatoia(ifa)->ia_addr.sin_addr.s_addr) count++; } - IF_ADDR_UNLOCK(SC2IFP(vh)); + IF_ADDR_RUNLOCK(SC2IFP(vh)); } } return (count); @@ -1189,7 +1190,7 @@ carp_iamatch(struct ifnet *ifp, struct in_ifaddr *ia, TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { if ((SC2IFP(vh)->if_flags & IFF_UP) && (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) { - IF_ADDR_LOCK(SC2IFP(vh)); + IF_ADDR_RLOCK(SC2IFP(vh)); TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family == @@ -1200,11 +1201,11 @@ carp_iamatch(struct ifnet *ifp, struct in_ifaddr *ia, if (vh->sc_state == MASTER) { *enaddr = IF_LLADDR(vh->sc_ifp); - IF_ADDR_UNLOCK(SC2IFP(vh)); + IF_ADDR_RUNLOCK(SC2IFP(vh)); CARP_UNLOCK(cif); return (1); } else { - IF_ADDR_UNLOCK(SC2IFP(vh)); + IF_ADDR_RUNLOCK(SC2IFP(vh)); CARP_UNLOCK(cif); return (0); } @@ -1212,7 +1213,7 @@ carp_iamatch(struct ifnet *ifp, struct in_ifaddr *ia, count++; } } - IF_ADDR_UNLOCK(SC2IFP(vh)); + IF_ADDR_RUNLOCK(SC2IFP(vh)); } } } else { @@ -1242,7 +1243,7 @@ carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) cif = ifp->if_carp; CARP_LOCK(cif); TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { - IF_ADDR_LOCK(SC2IFP(vh)); + IF_ADDR_RLOCK(SC2IFP(vh)); TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) { if (IN6_ARE_ADDR_EQUAL(taddr, &ifatoia6(ifa)->ia_addr.sin6_addr) && @@ -1250,12 +1251,12 @@ carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) && vh->sc_state == MASTER) { ifa_ref(ifa); - IF_ADDR_UNLOCK(SC2IFP(vh)); + IF_ADDR_RUNLOCK(SC2IFP(vh)); CARP_UNLOCK(cif); return (ifa); } } - IF_ADDR_UNLOCK(SC2IFP(vh)); + IF_ADDR_RUNLOCK(SC2IFP(vh)); } CARP_UNLOCK(cif); @@ -1273,7 +1274,7 @@ carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) cif = ifp->if_carp; CARP_LOCK(cif); TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { - IF_ADDR_LOCK(SC2IFP(sc)); + IF_ADDR_RLOCK(SC2IFP(sc)); TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { if (IN6_ARE_ADDR_EQUAL(taddr, &ifatoia6(ifa)->ia_addr.sin6_addr) && @@ -1284,7 +1285,7 @@ carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) sizeof(struct ifnet *), M_NOWAIT); if (mtag == NULL) { /* better a bit than nothing */ - IF_ADDR_UNLOCK(SC2IFP(sc)); + IF_ADDR_RUNLOCK(SC2IFP(sc)); CARP_UNLOCK(cif); return (IF_LLADDR(sc->sc_ifp)); } @@ -1292,12 +1293,12 @@ carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) sizeof(struct ifnet *)); m_tag_prepend(m, mtag); - IF_ADDR_UNLOCK(SC2IFP(sc)); + IF_ADDR_RUNLOCK(SC2IFP(sc)); CARP_UNLOCK(cif); return (IF_LLADDR(sc->sc_ifp)); } } - IF_ADDR_UNLOCK(SC2IFP(sc)); + IF_ADDR_RUNLOCK(SC2IFP(sc)); } CARP_UNLOCK(cif); @@ -1394,22 +1395,10 @@ carp_setrun(struct carp_softc *sc, sa_family_t af) switch (sc->sc_state) { case INIT: - if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) { - carp_send_ad_locked(sc); - carp_send_arp(sc); -#ifdef INET6 - carp_send_na(sc); -#endif /* INET6 */ - CARP_LOG("%s: INIT -> MASTER (preempting)\n", - SC2IFP(sc)->if_xname); - carp_set_state(sc, MASTER); - carp_setroute(sc, RTM_ADD); - } else { - CARP_LOG("%s: INIT -> BACKUP\n", SC2IFP(sc)->if_xname); - carp_set_state(sc, BACKUP); - carp_setroute(sc, RTM_DELETE); - carp_setrun(sc, 0); - } + CARP_LOG("%s: INIT -> BACKUP\n", SC2IFP(sc)->if_xname); + carp_set_state(sc, BACKUP); + carp_setroute(sc, RTM_DELETE); + carp_setrun(sc, 0); break; case BACKUP: callout_stop(&sc->sc_ad_tmo); @@ -1448,7 +1437,7 @@ carp_setrun(struct carp_softc *sc, sa_family_t af) } static void -carp_multicast_cleanup(struct carp_softc *sc) +carp_multicast_cleanup(struct carp_softc *sc, int dofree) { struct ip_moptions *imo = &sc->sc_imo; u_int16_t n = imo->imo_num_memberships; @@ -1456,7 +1445,8 @@ carp_multicast_cleanup(struct carp_softc *sc) /* Clean up our own multicast memberships */ while (n-- > 0) { if (imo->imo_membership[n] != NULL) { - in_delmulti(imo->imo_membership[n]); + if (dofree) + in_delmulti(imo->imo_membership[n]); imo->imo_membership[n] = NULL; } } @@ -1468,14 +1458,15 @@ carp_multicast_cleanup(struct carp_softc *sc) #ifdef INET6 static void -carp_multicast6_cleanup(struct carp_softc *sc) +carp_multicast6_cleanup(struct carp_softc *sc, int dofree) { struct ip6_moptions *im6o = &sc->sc_im6o; u_int16_t n = im6o->im6o_num_memberships; while (n-- > 0) { if (im6o->im6o_membership[n] != NULL) { - in6_mc_leave(im6o->im6o_membership[n], NULL); + if (dofree) + in6_mc_leave(im6o->im6o_membership[n], NULL); im6o->im6o_membership[n] = NULL; } } @@ -1835,7 +1826,7 @@ carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) cleanup: if (!sc->sc_naddrs6) - carp_multicast6_cleanup(sc); + carp_multicast6_cleanup(sc, 1); ifa_free(&ia->ia_ifa); return (error); } @@ -1853,7 +1844,7 @@ carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) SC2IFP(sc)->if_flags &= ~IFF_UP; SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; sc->sc_vhid = -1; - carp_multicast6_cleanup(sc); + carp_multicast6_cleanup(sc, 1); TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); if (!--cif->vhif_nvrs) { CARP_LOCK_DESTROY(cif); diff --git a/freebsd/sys/netinet/ip_divert.c b/freebsd/sys/netinet/ip_divert.c index 857df606..d630dd12 100644 --- a/freebsd/sys/netinet/ip_divert.c +++ b/freebsd/sys/netinet/ip_divert.c @@ -710,8 +710,8 @@ div_pcblist(SYSCTL_HANDLER_ARGS) #ifdef SYSCTL_NODE SYSCTL_NODE(_net_inet, IPPROTO_DIVERT, divert, CTLFLAG_RW, 0, "IPDIVERT"); -SYSCTL_PROC(_net_inet_divert, OID_AUTO, pcblist, CTLFLAG_RD, 0, 0, - div_pcblist, "S,xinpcb", "List of active divert sockets"); +SYSCTL_PROC(_net_inet_divert, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD, + NULL, 0, div_pcblist, "S,xinpcb", "List of active divert sockets"); #endif struct pr_usrreqs div_usrreqs = { diff --git a/freebsd/sys/netinet/ip_fw.h b/freebsd/sys/netinet/ip_fw.h index cf5d8d03..69311a79 100644 --- a/freebsd/sys/netinet/ip_fw.h +++ b/freebsd/sys/netinet/ip_fw.h @@ -37,10 +37,10 @@ #define IPFW_DEFAULT_RULE 65535 /* - * The number of ipfw tables. The maximum allowed table number is the - * (IPFW_TABLES_MAX - 1). + * Default number of ipfw tables. */ -#define IPFW_TABLES_MAX 128 +#define IPFW_TABLES_MAX 65535 +#define IPFW_TABLES_DEFAULT 128 /* * Most commands (queue, pipe, tag, untag, limit...) can have a 16-bit @@ -56,6 +56,25 @@ #define IPFW_ARG_MAX 65534 #define IP_FW_TABLEARG 65535 /* XXX should use 0 */ +/* + * Number of entries in the call stack of the call/return commands. + * Call stack currently is an uint16_t array with rule numbers. + */ +#define IPFW_CALLSTACK_SIZE 16 + +/* IP_FW3 header/opcodes */ +typedef struct _ip_fw3_opheader { + uint16_t opcode; /* Operation opcode */ + uint16_t reserved[3]; /* Align to 64-bit boundary */ +} ip_fw3_opheader; + + +/* IPFW extented tables support */ +#define IP_FW_TABLE_XADD 86 /* add entry */ +#define IP_FW_TABLE_XDEL 87 /* delete entry */ +#define IP_FW_TABLE_XGETSIZE 88 /* get table size */ +#define IP_FW_TABLE_XLIST 89 /* list table contents */ + /* * The kernel representation of ipfw rules is made of a list of * 'instructions' (for all practical purposes equivalent to BPF @@ -193,6 +212,8 @@ enum ipfw_opcodes { /* arguments (4 byte each) */ O_SETFIB, /* arg1=FIB number */ O_FIB, /* arg1=FIB desired fib number */ + O_CALLRETURN, /* arg1=called rule number */ + O_LAST_OPCODE /* not an opcode! */ }; @@ -380,8 +401,6 @@ struct cfg_redir { }; #endif -#define NAT_BUF_LEN 1024 - #ifdef IPFW_INTERNAL /* Nat configuration data struct. */ struct cfg_nat { @@ -562,6 +581,11 @@ struct _ipfw_dyn_rule { /* * These are used for lookup tables. */ + +#define IPFW_TABLE_CIDR 1 /* Table for holding IPv4/IPv6 prefixes */ +#define IPFW_TABLE_INTERFACE 2 /* Table for holding interface names */ +#define IPFW_TABLE_MAXTYPE 2 /* Maximum valid number */ + typedef struct _ipfw_table_entry { in_addr_t addr; /* network address */ u_int32_t value; /* value */ @@ -569,6 +593,19 @@ typedef struct _ipfw_table_entry { u_int8_t masklen; /* mask length */ } ipfw_table_entry; +typedef struct _ipfw_table_xentry { + uint16_t len; /* Total entry length */ + uint8_t type; /* entry type */ + uint8_t masklen; /* mask length */ + uint16_t tbl; /* table number */ + uint32_t value; /* value */ + union { + /* Longest field needs to be aligned by 4-byte boundary */ + struct in6_addr addr6; /* IPv6 address */ + char iface[IF_NAMESIZE]; /* interface name */ + } k; +} ipfw_table_xentry; + typedef struct _ipfw_table { u_int32_t size; /* size of entries in bytes */ u_int32_t cnt; /* # of entries */ @@ -576,4 +613,13 @@ typedef struct _ipfw_table { ipfw_table_entry ent[0]; /* entries */ } ipfw_table; +typedef struct _ipfw_xtable { + ip_fw3_opheader opheader; /* eXtended tables are controlled via IP_FW3 */ + uint32_t size; /* size of entries in bytes */ + uint32_t cnt; /* # of entries */ + uint16_t tbl; /* table number */ + uint8_t type; /* table type */ + ipfw_table_xentry xent[0]; /* entries */ +} ipfw_xtable; + #endif /* _IPFW2_H */ diff --git a/freebsd/sys/netinet/ip_icmp.c b/freebsd/sys/netinet/ip_icmp.c index d8d34aa4..1b6ec7d9 100644 --- a/freebsd/sys/netinet/ip_icmp.c +++ b/freebsd/sys/netinet/ip_icmp.c @@ -94,11 +94,7 @@ SYSCTL_VNET_UINT(_net_inet_icmp, OID_AUTO, maskfake, CTLFLAG_RW, &VNET_NAME(icmpmaskfake), 0, "Fake reply to ICMP Address Mask Request packets."); -static VNET_DEFINE(int, drop_redirect) = 0; -#define V_drop_redirect VNET(drop_redirect) -SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW, - &VNET_NAME(drop_redirect), 0, - "Ignore ICMP redirects"); +VNET_DEFINE(int, drop_redirect) = 0; static VNET_DEFINE(int, log_redirect) = 0; #define V_log_redirect VNET(log_redirect) @@ -155,6 +151,39 @@ static void icmp_send(struct mbuf *, struct mbuf *); extern struct protosw inetsw[]; +static int +sysctl_net_icmp_drop_redir(SYSCTL_HANDLER_ARGS) +{ + int error, new; + int i; + struct radix_node_head *rnh; + + new = V_drop_redirect; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error == 0 && req->newptr) { + new = (new != 0) ? 1 : 0; + + if (new == V_drop_redirect) + return (0); + + for (i = 0; i < rt_numfibs; i++) { + if ((rnh = rt_tables_get_rnh(i, AF_INET)) == NULL) + continue; + RADIX_NODE_HEAD_LOCK(rnh); + in_setmatchfunc(rnh, new); + RADIX_NODE_HEAD_UNLOCK(rnh); + } + + V_drop_redirect = new; + } + + return (error); +} + +SYSCTL_VNET_PROC(_net_inet_icmp, OID_AUTO, drop_redirect, + CTLTYPE_INT|CTLFLAG_RW, 0, 0, + sysctl_net_icmp_drop_redir, "I", "Ignore ICMP redirects"); + /* * Kernel module interface for updating icmpstat. The argument is an index * into icmpstat treated as an array of u_long. While this encodes the @@ -699,7 +728,7 @@ icmp_reflect(struct mbuf *m) */ ifp = m->m_pkthdr.rcvif; if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) { - IF_ADDR_LOCK(ifp); + IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; @@ -707,11 +736,11 @@ icmp_reflect(struct mbuf *m) if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == t.s_addr) { t = IA_SIN(ia)->sin_addr; - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); goto match; } } - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); } /* * If the packet was transiting through us, use the address of @@ -720,16 +749,16 @@ icmp_reflect(struct mbuf *m) * criteria apply. */ if (V_icmp_rfi && ifp != NULL) { - IF_ADDR_LOCK(ifp); + IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; ia = ifatoia(ifa); t = IA_SIN(ia)->sin_addr; - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); goto match; } - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); } /* * If the incoming packet was not addressed directly to us, use @@ -738,16 +767,16 @@ icmp_reflect(struct mbuf *m) * with normal source selection. */ if (V_reply_src[0] != '\0' && (ifp = ifunit(V_reply_src))) { - IF_ADDR_LOCK(ifp); + IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; ia = ifatoia(ifa); t = IA_SIN(ia)->sin_addr; - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); goto match; } - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); } /* * If the packet was transiting through us, use the address of @@ -960,7 +989,8 @@ badport_bandlim(int which) { "icmp tstamp response" }, { "closed port RST response" }, { "open port RST response" }, - { "icmp6 unreach response" } + { "icmp6 unreach response" }, + { "sctp ootb response" } }; /* diff --git a/freebsd/sys/netinet/ip_input.c b/freebsd/sys/netinet/ip_input.c index 1b7f4319..9d174b3b 100644 --- a/freebsd/sys/netinet/ip_input.c +++ b/freebsd/sys/netinet/ip_input.c @@ -512,7 +512,7 @@ tooshort: } #ifdef IPSEC /* - * Bypass packet filtering for packets from a tunnel (gif). + * Bypass packet filtering for packets previously handled by IPsec. */ if (ip_ipsec_filtertunnel(m)) goto passin; @@ -635,7 +635,7 @@ passin: * into the stack for SIMPLEX interfaces handled by ether_output(). */ if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) { - IF_ADDR_LOCK(ifp); + IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; @@ -643,23 +643,23 @@ passin: if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == ip->ip_dst.s_addr) { ifa_ref(ifa); - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); goto ours; } if (ia->ia_netbroadcast.s_addr == ip->ip_dst.s_addr) { ifa_ref(ifa); - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); goto ours; } #ifdef BOOTP_COMPAT if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) { ifa_ref(ifa); - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); goto ours; } #endif } - IF_ADDR_UNLOCK(ifp); + IF_ADDR_RUNLOCK(ifp); ia = NULL; } /* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */ @@ -1033,7 +1033,7 @@ found: * segment. If it provides all of our data, drop us, otherwise * stick new segment in the proper place. * - * If some of the data is dropped from the the preceding + * If some of the data is dropped from the preceding * segment, then it's checksum is invalidated. */ if (p) { @@ -1713,6 +1713,12 @@ makedummy: if (*mp) mp = &(*mp)->m_next; } + if (inp->inp_flags & INP_RECVTOS) { + *mp = sbcreatecontrol((caddr_t) &ip->ip_tos, + sizeof(u_char), IP_RECVTOS, IPPROTO_IP); + if (*mp) + mp = &(*mp)->m_next; + } } /* diff --git a/freebsd/sys/netinet/ip_ipsec.c b/freebsd/sys/netinet/ip_ipsec.c index f83b2d93..917c2c79 100644 --- a/freebsd/sys/netinet/ip_ipsec.c +++ b/freebsd/sys/netinet/ip_ipsec.c @@ -97,7 +97,7 @@ ip_ipsec_filtertunnel(struct mbuf *m) #if defined(IPSEC) /* - * Bypass packet filtering for packets from a tunnel. + * Bypass packet filtering for packets previously handled by IPsec. */ if (!V_ip4_ipsec_filtertunnel && m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) diff --git a/freebsd/sys/netinet/ip_mroute.c b/freebsd/sys/netinet/ip_mroute.c index 022f2451..1e9cd57a 100644 --- a/freebsd/sys/netinet/ip_mroute.c +++ b/freebsd/sys/netinet/ip_mroute.c @@ -2824,7 +2824,7 @@ vnet_mroute_init(const void *unused __unused) callout_init(&V_bw_meter_ch, CALLOUT_MPSAFE); } -VNET_SYSINIT(vnet_mroute_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, vnet_mroute_init, +VNET_SYSINIT(vnet_mroute_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_mroute_init, NULL); static void @@ -2949,4 +2949,4 @@ static moduledata_t ip_mroutemod = { 0 }; -DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PSEUDO, SI_ORDER_ANY); +DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE); diff --git a/freebsd/sys/netinet/ip_output.c b/freebsd/sys/netinet/ip_output.c index c01e0d32..7aedcb50 100644 --- a/freebsd/sys/netinet/ip_output.c +++ b/freebsd/sys/netinet/ip_output.c @@ -95,7 +95,7 @@ __FBSDID("$FreeBSD$"); VNET_DEFINE(u_short, ip_id); #ifdef MBUF_STRESS_TEST -int mbuf_frag_size = 0; +static int mbuf_frag_size = 0; SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); #endif @@ -199,8 +199,8 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, hlen = ip->ip_hl << 2; } - dst = (struct sockaddr_in *)&ro->ro_dst; again: + dst = (struct sockaddr_in *)&ro->ro_dst; /* * If there is a cached route, * check that it is to the same destination @@ -958,6 +958,7 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) case IP_FAITH: case IP_ONESBCAST: case IP_DONTFRAG: + case IP_RECVTOS: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) @@ -1021,6 +1022,9 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) case IP_BINDANY: OPTSET(INP_BINDANY); break; + case IP_RECVTOS: + OPTSET(INP_RECVTOS); + break; } break; #undef OPTSET @@ -1130,6 +1134,7 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) case IP_ONESBCAST: case IP_DONTFRAG: case IP_BINDANY: + case IP_RECVTOS: switch (sopt->sopt_name) { case IP_TOS: @@ -1188,6 +1193,9 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) case IP_BINDANY: optval = OPTBIT(INP_BINDANY); break; + case IP_RECVTOS: + optval = OPTBIT(INP_RECVTOS); + break; } error = sooptcopyout(sopt, &optval, sizeof optval); break; diff --git a/freebsd/sys/netinet/ip_var.h b/freebsd/sys/netinet/ip_var.h index 222b7efe..d196fd04 100644 --- a/freebsd/sys/netinet/ip_var.h +++ b/freebsd/sys/netinet/ip_var.h @@ -187,6 +187,7 @@ VNET_DECLARE(struct socket *, ip_mrouter); /* multicast routing daemon */ extern int (*legal_vif_num)(int); extern u_long (*ip_mcast_src)(int); VNET_DECLARE(int, rsvp_on); +VNET_DECLARE(int, drop_redirect); extern struct pr_usrreqs rip_usrreqs; #define V_ipstat VNET(ipstat) @@ -199,6 +200,7 @@ extern struct pr_usrreqs rip_usrreqs; #define V_ip_rsvpd VNET(ip_rsvpd) #define V_ip_mrouter VNET(ip_mrouter) #define V_rsvp_on VNET(rsvp_on) +#define V_drop_redirect VNET(drop_redirect) void inp_freemoptions(struct ip_moptions *); int inp_getmoptions(struct inpcb *, struct sockopt *); @@ -287,6 +289,7 @@ enum { }; #define MTAG_IPFW 1148380143 /* IPFW-tagged cookie */ #define MTAG_IPFW_RULE 1262273568 /* rule reference */ +#define MTAG_IPFW_CALL 1308397630 /* call stack */ struct ip_fw_args; typedef int (*ip_fw_chk_ptr_t)(struct ip_fw_args *args); diff --git a/freebsd/sys/netinet/ipfw/ip_dn_glue.c b/freebsd/sys/netinet/ipfw/ip_dn_glue.c index 8c8c3ef8..2b109771 100644 --- a/freebsd/sys/netinet/ipfw/ip_dn_glue.c +++ b/freebsd/sys/netinet/ipfw/ip_dn_glue.c @@ -626,7 +626,7 @@ dn_c_copy_pipe(struct dn_schk *s, struct copy_args *a, int nq) /* These 4 field are the same in pipe7 and pipe8 */ pipe7->next.sle_next = (struct dn_pipe7 *)DN_IS_PIPE; pipe7->bandwidth = l->bandwidth; - pipe7->delay = l->delay; + pipe7->delay = l->delay * 1000 / hz; pipe7->pipe_nr = l->link_nr - DN_MAX_ID; if (!is7) { @@ -809,7 +809,7 @@ ip_dummynet_compat(struct sockopt *sopt) if (error) break; error = dn_compat_del(v); - free(v, M_DUMMYNET); + free(v, M_TEMP); break; case IP_DUMMYNET_CONFIGURE: @@ -818,7 +818,7 @@ ip_dummynet_compat(struct sockopt *sopt) if (error) break; error = dn_compat_configure(v); - free(v, M_DUMMYNET); + free(v, M_TEMP); break; case IP_DUMMYNET_GET: { diff --git a/freebsd/sys/netinet/ipfw/ip_dn_io.c b/freebsd/sys/netinet/ipfw/ip_dn_io.c index 72f0c354..ccb91c2e 100644 --- a/freebsd/sys/netinet/ipfw/ip_dn_io.c +++ b/freebsd/sys/netinet/ipfw/ip_dn_io.c @@ -47,8 +47,11 @@ __FBSDID("$FreeBSD$"); #include #include #include + #include /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */ #include +#include + #include #include /* ip_len, ip_off */ #include /* ip_output(), IP_FORWARDING */ @@ -71,6 +74,7 @@ __FBSDID("$FreeBSD$"); */ struct dn_parms dn_cfg; +//VNET_DEFINE(struct dn_parms, _base_dn_cfg); static long tick_last; /* Last tick duration (usec). */ static long tick_delta; /* Last vs standard tick diff (usec). */ @@ -102,31 +106,78 @@ SYSCTL_DECL(_net_inet); SYSCTL_DECL(_net_inet_ip); SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet"); +/* wrapper to pass dn_cfg fields to SYSCTL_* */ +//#define DC(x) (&(VNET_NAME(_base_dn_cfg).x)) +#define DC(x) (&(dn_cfg.x)) /* parameters */ -SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size, - CTLFLAG_RW, &dn_cfg.hash_size, 0, "Default hash table size"); -SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit, - CTLFLAG_RW, &dn_cfg.slot_limit, 0, - "Upper limit in slots for pipe queue."); -SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_byte_limit, - CTLFLAG_RW, &dn_cfg.byte_limit, 0, - "Upper limit in bytes for pipe queue."); + +static int +sysctl_hash_size(SYSCTL_HANDLER_ARGS) +{ + int error, value; + + value = dn_cfg.hash_size; + error = sysctl_handle_int(oidp, &value, 0, req); + if (error != 0 || req->newptr == NULL) + return (error); + if (value < 16 || value > 65536) + return (EINVAL); + dn_cfg.hash_size = value; + return (0); +} + +SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, hash_size, + CTLTYPE_INT | CTLFLAG_RW, 0, 0, sysctl_hash_size, + "I", "Default hash table size"); + +static int +sysctl_limits(SYSCTL_HANDLER_ARGS) +{ + int error; + long value; + + if (arg2 != 0) + value = dn_cfg.slot_limit; + else + value = dn_cfg.byte_limit; + error = sysctl_handle_long(oidp, &value, 0, req); + + if (error != 0 || req->newptr == NULL) + return (error); + if (arg2 != 0) { + if (value < 1) + return (EINVAL); + dn_cfg.slot_limit = value; + } else { + if (value < 1500) + return (EINVAL); + dn_cfg.byte_limit = value; + } + return (0); +} + +SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit, + CTLTYPE_LONG | CTLFLAG_RW, 0, 1, sysctl_limits, + "L", "Upper limit in slots for pipe queue."); +SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, pipe_byte_limit, + CTLTYPE_LONG | CTLFLAG_RW, 0, 0, sysctl_limits, + "L", "Upper limit in bytes for pipe queue."); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast, - CTLFLAG_RW, &dn_cfg.io_fast, 0, "Enable fast dummynet io."); + CTLFLAG_RW, DC(io_fast), 0, "Enable fast dummynet io."); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug, - CTLFLAG_RW, &dn_cfg.debug, 0, "Dummynet debug level"); -SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire, - CTLFLAG_RW, &dn_cfg.expire, 0, "Expire empty queues/pipes"); -SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire_cycle, - CTLFLAG_RD, &dn_cfg.expire_cycle, 0, "Expire cycle for queues/pipes"); + CTLFLAG_RW, DC(debug), 0, "Dummynet debug level"); +SYSCTL_UINT(_net_inet_ip_dummynet, OID_AUTO, expire, + CTLFLAG_RW, DC(expire), 0, "Expire empty queues/pipes"); +SYSCTL_UINT(_net_inet_ip_dummynet, OID_AUTO, expire_cycle, + CTLFLAG_RD, DC(expire_cycle), 0, "Expire cycle for queues/pipes"); /* RED parameters */ SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth, - CTLFLAG_RD, &dn_cfg.red_lookup_depth, 0, "Depth of RED lookup table"); + CTLFLAG_RD, DC(red_lookup_depth), 0, "Depth of RED lookup table"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size, - CTLFLAG_RD, &dn_cfg.red_avg_pkt_size, 0, "RED Medium packet size"); + CTLFLAG_RD, DC(red_avg_pkt_size), 0, "RED Medium packet size"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size, - CTLFLAG_RD, &dn_cfg.red_max_pkt_size, 0, "RED Max packet size"); + CTLFLAG_RD, DC(red_max_pkt_size), 0, "RED Max packet size"); /* time adjustment */ SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta, @@ -144,13 +195,13 @@ SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost, /* statistics */ SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, schk_count, - CTLFLAG_RD, &dn_cfg.schk_count, 0, "Number of schedulers"); + CTLFLAG_RD, DC(schk_count), 0, "Number of schedulers"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, si_count, - CTLFLAG_RD, &dn_cfg.si_count, 0, "Number of scheduler instances"); + CTLFLAG_RD, DC(si_count), 0, "Number of scheduler instances"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, fsk_count, - CTLFLAG_RD, &dn_cfg.fsk_count, 0, "Number of flowsets"); + CTLFLAG_RD, DC(fsk_count), 0, "Number of flowsets"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, queue_count, - CTLFLAG_RD, &dn_cfg.queue_count, 0, "Number of queues"); + CTLFLAG_RD, DC(queue_count), 0, "Number of queues"); SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt, CTLFLAG_RD, &io_pkt, 0, "Number of packets passed to dummynet."); @@ -160,7 +211,7 @@ SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast, SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop, CTLFLAG_RD, &io_pkt_drop, 0, "Number of packets dropped by dummynet."); - +#undef DC SYSEND #endif @@ -463,7 +514,7 @@ serve_sched(struct mq *q, struct dn_sch_inst *si, uint64_t now) (m->m_pkthdr.len * 8 + extra_bits(m, s)); si->credit -= len_scaled; /* Move packet in the delay line */ - dn_tag_get(m)->output_time += s->link.delay ; + dn_tag_get(m)->output_time = dn_cfg.curr_time + s->link.delay ; mq_append(&si->dline.mq, m); } /* @@ -498,6 +549,8 @@ dummynet_task(void *context, int pending) struct timeval t; struct mq q = { NULL, NULL }; /* queue to accumulate results */ + CURVNET_SET((struct vnet *)context); + DN_BH_WLOCK(); /* Update number of lost(coalesced) ticks. */ @@ -562,6 +615,7 @@ dummynet_task(void *context, int pending) dn_reschedule(); if (q.head != NULL) dummynet_send(q.head); + CURVNET_RESTORE(); } /* @@ -612,7 +666,6 @@ dummynet_send(struct mbuf *m) break; case DIR_OUT | PROTO_IPV6: - SET_HOST_IPLEN(mtod(m, struct ip *)); ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL); break; #endif diff --git a/freebsd/sys/netinet/ipfw/ip_dn_private.h b/freebsd/sys/netinet/ipfw/ip_dn_private.h index 270f1881..7b0088d9 100644 --- a/freebsd/sys/netinet/ipfw/ip_dn_private.h +++ b/freebsd/sys/netinet/ipfw/ip_dn_private.h @@ -150,6 +150,8 @@ struct dn_parms { uint32_t expire; uint32_t expire_cycle; /* tick count */ + int init_done; + /* if the upper half is busy doing something long, * can set the busy flag and we will enqueue packets in * a queue for later processing. @@ -354,6 +356,8 @@ enum { }; extern struct dn_parms dn_cfg; +//VNET_DECLARE(struct dn_parms, _base_dn_cfg); +//#define dn_cfg VNET(_base_dn_cfg) int dummynet_io(struct mbuf **, int , struct ip_fw_args *); void dummynet_task(void *context, int pending); diff --git a/freebsd/sys/netinet/ipfw/ip_dummynet.c b/freebsd/sys/netinet/ipfw/ip_dummynet.c index 5d51c30b..89e5f4db 100644 --- a/freebsd/sys/netinet/ipfw/ip_dummynet.c +++ b/freebsd/sys/netinet/ipfw/ip_dummynet.c @@ -99,7 +99,7 @@ find_sched_type(int type, char *name) struct dn_alg *d; SLIST_FOREACH(d, &dn_cfg.schedlist, next) { - if (d->type == type || (name && !strcmp(d->name, name))) + if (d->type == type || (name && !strcasecmp(d->name, name))) return d; } return NULL; /* not found */ @@ -110,6 +110,10 @@ ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg) { int oldv = *v; const char *op = NULL; + if (dflt < lo) + dflt = lo; + if (dflt > hi) + dflt = hi; if (oldv < lo) { *v = dflt; op = "Bump"; @@ -749,9 +753,10 @@ schk_delete_cb(void *obj, void *arg) #endif fsk_detach_list(&s->fsk_list, arg ? DN_DESTROY : 0); /* no more flowset pointing to us now */ - if (s->sch.flags & DN_HAVE_MASK) + if (s->sch.flags & DN_HAVE_MASK) { dn_ht_scan(s->siht, si_destroy, NULL); - else if (s->siht) + dn_ht_free(s->siht, 0); + } else if (s->siht) si_destroy(s->siht, NULL); if (s->profile) { free(s->profile, M_DUMMYNET); @@ -804,6 +809,7 @@ copy_obj(char **start, char *end, void *_o, const char *msg, int i) /* Adjust burst parameter for link */ struct dn_link *l = (struct dn_link *)*start; l->burst = div64(l->burst, 8 * hz); + l->delay = l->delay * 1000 / hz; } else if (o->type == DN_SCH) { /* Set id->id to the number of instances */ struct dn_schk *s = _o; @@ -1040,7 +1046,7 @@ config_red(struct dn_fsk *fs) fs->w_q = fs->fs.w_q; fs->max_p = fs->fs.max_p; - D("called"); + ND("called"); /* Doing stuff that was in userland */ i = fs->sched->link.bandwidth; s = (i <= 0) ? 0 : @@ -1104,7 +1110,7 @@ config_red(struct dn_fsk *fs) if (dn_cfg.red_max_pkt_size < 1) dn_cfg.red_max_pkt_size = 1500; fs->max_pkt_size = dn_cfg.red_max_pkt_size; - D("exit"); + ND("exit"); return 0; } @@ -1834,9 +1840,7 @@ dummynet_get(struct sockopt *sopt, void **compat) #endif if (l > sizeof(r)) { /* request larger than default, allocate buffer */ - cmd = malloc(l, M_DUMMYNET, M_WAIT); - if (cmd == NULL) - return ENOMEM; //XXX + cmd = malloc(l, M_DUMMYNET, M_WAITOK); error = sooptcopyin(sopt, cmd, l, l); sopt->sopt_valsize = sopt_valsize; if (error) @@ -1892,10 +1896,6 @@ dummynet_get(struct sockopt *sopt, void **compat) have = need; start = malloc(have, M_DUMMYNET, M_WAITOK | M_ZERO); - if (start == NULL) { - error = ENOMEM; - goto done; - } } if (start == NULL) { @@ -2114,14 +2114,10 @@ ip_dn_ctl(struct sockopt *sopt) static void ip_dn_init(void) { - static int init_done = 0; - - if (init_done) + if (dn_cfg.init_done) return; - init_done = 1; - if (bootverbose) - printf("DUMMYNET with IPv6 initialized (100131)\n"); - + printf("DUMMYNET %p with IPv6 initialized (100409)\n", curvnet); + dn_cfg.init_done = 1; /* Set defaults here. MSVC does not accept initializers, * and this is also useful for vimages */ @@ -2136,7 +2132,7 @@ ip_dn_init(void) dn_cfg.red_max_pkt_size = 1500; /* default max packet size */ /* hash tables */ - dn_cfg.max_hash_size = 1024; /* max in the hash tables */ + dn_cfg.max_hash_size = 65536; /* max in the hash tables */ dn_cfg.hash_size = 64; /* default hash size */ /* create hash tables for schedulers and flowsets. @@ -2158,10 +2154,8 @@ ip_dn_init(void) SLIST_INIT(&dn_cfg.schedlist); DN_LOCK_INIT(); - ip_dn_ctl_ptr = ip_dn_ctl; - ip_dn_io_ptr = dummynet_io; - TASK_INIT(&dn_task, 0, dummynet_task, NULL); + TASK_INIT(&dn_task, 0, dummynet_task, curvnet); dn_tq = taskqueue_create_fast("dummynet", M_NOWAIT, taskqueue_thread_enqueue, &dn_tq); taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet"); @@ -2175,13 +2169,16 @@ ip_dn_init(void) #ifdef KLD_MODULE static void -ip_dn_destroy(void) +ip_dn_destroy(int last) { callout_drain(&dn_timeout); DN_BH_WLOCK(); - ip_dn_ctl_ptr = NULL; - ip_dn_io_ptr = NULL; + if (last) { + ND("removing last instance\n"); + ip_dn_ctl_ptr = NULL; + ip_dn_io_ptr = NULL; + } dummynet_flush(); DN_BH_WUNLOCK(); @@ -2206,13 +2203,15 @@ dummynet_modevent(module_t mod, int type, void *data) return EEXIST ; } ip_dn_init(); + ip_dn_ctl_ptr = ip_dn_ctl; + ip_dn_io_ptr = dummynet_io; return 0; } else if (type == MOD_UNLOAD) { #if !defined(KLD_MODULE) printf("dummynet statically compiled, cannot unload\n"); return EINVAL ; #else - ip_dn_destroy(); + ip_dn_destroy(1 /* last */); return 0; #endif } else @@ -2256,13 +2255,13 @@ unload_dn_sched(struct dn_alg *s) struct dn_alg *tmp, *r; int err = EINVAL; - D("called for %s", s->name); + ND("called for %s", s->name); DN_BH_WLOCK(); SLIST_FOREACH_SAFE(r, &dn_cfg.schedlist, next, tmp) { if (strcmp(s->name, r->name) != 0) continue; - D("ref_count = %d", r->ref_count); + ND("ref_count = %d", r->ref_count); err = (r->ref_count != 0) ? EBUSY : 0; if (err == 0) SLIST_REMOVE(&dn_cfg.schedlist, r, dn_alg, next); @@ -2290,8 +2289,24 @@ static moduledata_t dummynet_mod = { "dummynet", dummynet_modevent, NULL }; -DECLARE_MODULE(dummynet, dummynet_mod, - SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY-1); +#define DN_SI_SUB SI_SUB_PROTO_IFATTACHDOMAIN +#define DN_MODEV_ORD (SI_ORDER_ANY - 128) /* after ipfw */ +DECLARE_MODULE(dummynet, dummynet_mod, DN_SI_SUB, DN_MODEV_ORD); MODULE_DEPEND(dummynet, ipfw, 2, 2, 2); -MODULE_VERSION(dummynet, 1); +MODULE_VERSION(dummynet, 3); + +/* + * Starting up. Done in order after dummynet_modevent() has been called. + * VNET_SYSINIT is also called for each existing vnet and each new vnet. + */ +//VNET_SYSINIT(vnet_dn_init, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_init, NULL); + +/* + * Shutdown handlers up shop. These are done in REVERSE ORDER, but still + * after dummynet_modevent() has been called. Not called on reboot. + * VNET_SYSUNINIT is also called for each exiting vnet as it exits. + * or when the module is unloaded. + */ +//VNET_SYSUNINIT(vnet_dn_uninit, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_destroy, NULL); + /* end of file */ diff --git a/freebsd/sys/netinet/ipfw/ip_fw2.c b/freebsd/sys/netinet/ipfw/ip_fw2.c index 4328c50e..44781127 100644 --- a/freebsd/sys/netinet/ipfw/ip_fw2.c +++ b/freebsd/sys/netinet/ipfw/ip_fw2.c @@ -32,15 +32,12 @@ __FBSDID("$FreeBSD$"); * The FreeBSD IP packet firewall, main file */ -#if !defined(KLD_MODULE) #include #include -#include #include #ifndef INET #error IPFIREWALL requires INET. #endif /* INET */ -#endif #include #include @@ -108,6 +105,9 @@ static VNET_DEFINE(int, ipfw_vnet_ready) = 0; static VNET_DEFINE(int, fw_deny_unknown_exthdrs); #define V_fw_deny_unknown_exthdrs VNET(fw_deny_unknown_exthdrs) +static VNET_DEFINE(int, fw_permit_single_frag6) = 1; +#define V_fw_permit_single_frag6 VNET(fw_permit_single_frag6) + #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT static int default_to_accept = 1; #else @@ -116,6 +116,10 @@ static int default_to_accept; VNET_DEFINE(int, autoinc_step); +VNET_DEFINE(unsigned int, fw_tables_max); +/* Use 128 tables by default */ +static unsigned int default_fw_tables = IPFW_TABLES_DEFAULT; + /* * Each rule belongs to one of 32 different sets (0..31). * The variable set_disable contains one bit per set. @@ -145,7 +149,7 @@ ipfw_nat_cfg_t *ipfw_nat_get_log_ptr; #ifdef SYSCTL_NODE uint32_t dummy_def = IPFW_DEFAULT_RULE; -uint32_t dummy_tables_max = IPFW_TABLES_MAX; +static int sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS); SYSBEGIN(f3) @@ -165,13 +169,14 @@ SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD, &dummy_def, 0, "The default/max possible rule number."); -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, tables_max, CTLFLAG_RD, - &dummy_tables_max, 0, - "The maximum number of tables."); +SYSCTL_VNET_PROC(_net_inet_ip_fw, OID_AUTO, tables_max, + CTLTYPE_UINT|CTLFLAG_RW, 0, 0, sysctl_ipfw_table_num, "IU", + "Maximum number of tables"); SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN, &default_to_accept, 0, "Make the default rule accept all packets."); TUNABLE_INT("net.inet.ip.fw.default_to_accept", &default_to_accept); +TUNABLE_INT("net.inet.ip.fw.tables_max", &default_fw_tables); SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0, "Number of static rules"); @@ -182,6 +187,9 @@ SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); SYSCTL_VNET_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs, CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_deny_unknown_exthdrs), 0, "Deny packets with unknown IPv6 Extension Headers"); +SYSCTL_VNET_INT(_net_inet6_ip6_fw, OID_AUTO, permit_single_frag6, + CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_permit_single_frag6), 0, + "Permit single packet IPv6 fragments"); #endif /* INET6 */ SYSEND @@ -338,12 +346,15 @@ tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd) } static int -iface_match(struct ifnet *ifp, ipfw_insn_if *cmd) +iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain, uint32_t *tablearg) { if (ifp == NULL) /* no iface with this packet, match fails */ return 0; /* Check by name or by IP address */ if (cmd->name[0] != '\0') { /* match by name */ + if (cmd->name[0] == '\1') /* use tablearg to match */ + return ipfw_lookup_table_extended(chain, cmd->p.glob, + ifp->if_xname, tablearg, IPFW_TABLE_INTERFACE); /* Check name */ if (cmd->p.glob) { if (fnmatch(cmd->name, ifp->if_xname, 0) == 0) @@ -493,7 +504,7 @@ search_ip6_addr_net (struct in6_addr * ip6_addr) } static int -verify_path6(struct in6_addr *src, struct ifnet *ifp) +verify_path6(struct in6_addr *src, struct ifnet *ifp, u_int fib) { struct route_in6 ro; struct sockaddr_in6 *dst; @@ -504,9 +515,8 @@ verify_path6(struct in6_addr *src, struct ifnet *ifp) dst->sin6_family = AF_INET6; dst->sin6_len = sizeof(*dst); dst->sin6_addr = *src; - /* XXX MRT 0 for ipv6 at this time */ - rtalloc_ign((struct route *)&ro, 0); + in6_rtalloc_ign(&ro, 0, fib); if (ro.ro_rt == NULL) return 0; @@ -851,13 +861,14 @@ ipfw_chk(struct ip_fw_args *args) * we have a fragment at this offset of an IPv4 packet. * offset == 0 means that (if this is an IPv4 packet) * this is the first or only fragment. - * For IPv6 offset == 0 means there is no Fragment Header. - * If offset != 0 for IPv6 always use correct mask to - * get the correct offset because we add IP6F_MORE_FRAG - * to be able to dectect the first fragment which would - * otherwise have offset = 0. + * For IPv6 offset|ip6f_mf == 0 means there is no Fragment Header + * or there is a single packet fragement (fragement header added + * without needed). We will treat a single packet fragment as if + * there was no fragment header (or log/block depending on the + * V_fw_permit_single_frag6 sysctl setting). */ u_short offset = 0; + u_short ip6f_mf = 0; /* * Local copies of addresses. They are only valid if we have @@ -923,9 +934,10 @@ ipfw_chk(struct ip_fw_args *args) * pointer might become stale after other pullups (but we never use it * this way). */ -#define PULLUP_TO(_len, p, T) \ +#define PULLUP_TO(_len, p, T) PULLUP_LEN(_len, p, sizeof(T)) +#define PULLUP_LEN(_len, p, T) \ do { \ - int x = (_len) + sizeof(T); \ + int x = (_len) + T; \ if ((m)->m_len < x) { \ args->m = m = m_pullup(m, x); \ if (m == NULL) \ @@ -950,7 +962,7 @@ do { \ proto = ip6->ip6_nxt; /* Search extension headers to find upper layer protocols */ - while (ulp == NULL) { + while (ulp == NULL && offset == 0) { switch (proto) { case IPPROTO_ICMPV6: PULLUP_TO(hlen, ulp, struct icmp6_hdr); @@ -995,9 +1007,11 @@ do { \ ext_hd |= EXT_RTHDR2; break; default: - printf("IPFW2: IPV6 - Unknown Routing " - "Header type(%d)\n", - ((struct ip6_rthdr *)ulp)->ip6r_type); + if (V_fw_verbose) + printf("IPFW2: IPV6 - Unknown " + "Routing Header type(%d)\n", + ((struct ip6_rthdr *) + ulp)->ip6r_type); if (V_fw_deny_unknown_exthdrs) return (IP_FW_DENY); break; @@ -1015,13 +1029,13 @@ do { \ proto = ((struct ip6_frag *)ulp)->ip6f_nxt; offset = ((struct ip6_frag *)ulp)->ip6f_offlg & IP6F_OFF_MASK; - /* Add IP6F_MORE_FRAG for offset of first - * fragment to be != 0. */ - offset |= ((struct ip6_frag *)ulp)->ip6f_offlg & + ip6f_mf = ((struct ip6_frag *)ulp)->ip6f_offlg & IP6F_MORE_FRAG; - if (offset == 0) { - printf("IPFW2: IPV6 - Invalid Fragment " - "Header\n"); + if (V_fw_permit_single_frag6 == 0 && + offset == 0 && ip6f_mf == 0) { + if (V_fw_verbose) + printf("IPFW2: IPV6 - Invalid " + "Fragment Header\n"); if (V_fw_deny_unknown_exthdrs) return (IP_FW_DENY); break; @@ -1092,8 +1106,10 @@ do { \ break; default: - printf("IPFW2: IPV6 - Unknown Extension " - "Header(%d), ext_hd=%x\n", proto, ext_hd); + if (V_fw_verbose) + printf("IPFW2: IPV6 - Unknown " + "Extension Header(%d), ext_hd=%x\n", + proto, ext_hd); if (V_fw_deny_unknown_exthdrs) return (IP_FW_DENY); PULLUP_TO(hlen, ulp, struct ip6_ext); @@ -1133,6 +1149,12 @@ do { \ args->f_id._flags = TCP(ulp)->th_flags; break; + case IPPROTO_SCTP: + PULLUP_TO(hlen, ulp, struct sctphdr); + src_port = SCTP(ulp)->src_port; + dst_port = SCTP(ulp)->dest_port; + break; + case IPPROTO_UDP: PULLUP_TO(hlen, ulp, struct udphdr); dst_port = UDP(ulp)->uh_dport; @@ -1282,16 +1304,18 @@ do { \ case O_RECV: match = iface_match(m->m_pkthdr.rcvif, - (ipfw_insn_if *)cmd); + (ipfw_insn_if *)cmd, chain, &tablearg); break; case O_XMIT: - match = iface_match(oif, (ipfw_insn_if *)cmd); + match = iface_match(oif, (ipfw_insn_if *)cmd, + chain, &tablearg); break; case O_VIA: match = iface_match(oif ? oif : - m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd); + m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd, + chain, &tablearg); break; case O_MACADDR2: @@ -1429,6 +1453,17 @@ do { \ ((ipfw_insn_u32 *)cmd)->d[0] == v; else tablearg = v; + } else if (is_ipv6) { + uint32_t v = 0; + void *pkey = (cmd->opcode == O_IP_DST_LOOKUP) ? + &args->f_id.dst_ip6: &args->f_id.src_ip6; + match = ipfw_lookup_table_extended(chain, + cmd->arg1, pkey, &v, + IPFW_TABLE_CIDR); + if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) + match = ((ipfw_insn_u32 *)cmd)->d[0] == v; + if (match) + tablearg = v; } break; @@ -1612,6 +1647,7 @@ do { \ break; case O_TCPOPTS: + PULLUP_LEN(hlen, ulp, (TCP(ulp)->th_off << 2)); match = (proto == IPPROTO_TCP && offset == 0 && tcpopts_match(TCP(ulp), cmd)); break; @@ -1668,7 +1704,7 @@ do { \ case O_LOG: ipfw_log(f, hlen, args, m, - oif, offset, tablearg, ip); + oif, offset | ip6f_mf, tablearg, ip); match = 1; break; @@ -1684,7 +1720,7 @@ do { \ #ifdef INET6 is_ipv6 ? verify_path6(&(args->f_id.src_ip6), - m->m_pkthdr.rcvif) : + m->m_pkthdr.rcvif, args->f_id.fib) : #endif verify_path(src_ip, m->m_pkthdr.rcvif, args->f_id.fib))); @@ -1696,7 +1732,7 @@ do { \ #ifdef INET6 is_ipv6 ? verify_path6(&(args->f_id.src_ip6), - NULL) : + NULL, args->f_id.fib) : #endif verify_path(src_ip, NULL, args->f_id.fib))); break; @@ -1714,7 +1750,8 @@ do { \ #ifdef INET6 is_ipv6 ? verify_path6( &(args->f_id.src_ip6), - m->m_pkthdr.rcvif) : + m->m_pkthdr.rcvif, + args->f_id.fib) : #endif verify_path(src_ip, m->m_pkthdr.rcvif, @@ -1805,10 +1842,13 @@ do { \ if (mtag != NULL) m_tag_delete(m, mtag); match = 0; - } else if (mtag == NULL) { - if ((mtag = m_tag_alloc(MTAG_IPFW, - tag, 0, M_NOWAIT)) != NULL) - m_tag_prepend(m, mtag); + } else { + if (mtag == NULL) { + mtag = m_tag_alloc( MTAG_IPFW, + tag, 0, M_NOWAIT); + if (mtag != NULL) + m_tag_prepend(m, mtag); + } match = 1; } break; @@ -2040,6 +2080,123 @@ do { \ continue; break; /* not reached */ + case O_CALLRETURN: { + /* + * Implementation of `subroutine' call/return, + * in the stack carried in an mbuf tag. This + * is different from `skipto' in that any call + * address is possible (`skipto' must prevent + * backward jumps to avoid endless loops). + * We have `return' action when F_NOT flag is + * present. The `m_tag_id' field is used as + * stack pointer. + */ + struct m_tag *mtag; + uint16_t jmpto, *stack; + +#define IS_CALL ((cmd->len & F_NOT) == 0) +#define IS_RETURN ((cmd->len & F_NOT) != 0) + /* + * Hand-rolled version of m_tag_locate() with + * wildcard `type'. + * If not already tagged, allocate new tag. + */ + mtag = m_tag_first(m); + while (mtag != NULL) { + if (mtag->m_tag_cookie == + MTAG_IPFW_CALL) + break; + mtag = m_tag_next(m, mtag); + } + if (mtag == NULL && IS_CALL) { + mtag = m_tag_alloc(MTAG_IPFW_CALL, 0, + IPFW_CALLSTACK_SIZE * + sizeof(uint16_t), M_NOWAIT); + if (mtag != NULL) + m_tag_prepend(m, mtag); + } + + /* + * On error both `call' and `return' just + * continue with next rule. + */ + if (IS_RETURN && (mtag == NULL || + mtag->m_tag_id == 0)) { + l = 0; /* exit inner loop */ + break; + } + if (IS_CALL && (mtag == NULL || + mtag->m_tag_id >= IPFW_CALLSTACK_SIZE)) { + printf("ipfw: call stack error, " + "go to next rule\n"); + l = 0; /* exit inner loop */ + break; + } + + f->pcnt++; /* update stats */ + f->bcnt += pktlen; + f->timestamp = time_uptime; + stack = (uint16_t *)(mtag + 1); + + /* + * The `call' action may use cached f_pos + * (in f->next_rule), whose version is written + * in f->next_rule. + * The `return' action, however, doesn't have + * fixed jump address in cmd->arg1 and can't use + * cache. + */ + if (IS_CALL) { + stack[mtag->m_tag_id] = f->rulenum; + mtag->m_tag_id++; + if (cmd->arg1 != IP_FW_TABLEARG && + (uintptr_t)f->x_next == chain->id) { + f_pos = (uintptr_t)f->next_rule; + } else { + jmpto = (cmd->arg1 == + IP_FW_TABLEARG) ? tablearg: + cmd->arg1; + f_pos = ipfw_find_rule(chain, + jmpto, 0); + /* update the cache */ + if (cmd->arg1 != + IP_FW_TABLEARG) { + f->next_rule = + (void *)(uintptr_t) + f_pos; + f->x_next = + (void *)(uintptr_t) + chain->id; + } + } + } else { /* `return' action */ + mtag->m_tag_id--; + jmpto = stack[mtag->m_tag_id] + 1; + f_pos = ipfw_find_rule(chain, jmpto, 0); + } + + /* + * Skip disabled rules, and re-enter + * the inner loop with the correct + * f_pos, f, l and cmd. + * Also clear cmdlen and skip_or + */ + for (; f_pos < chain->n_rules - 1 && + (V_set_disable & + (1 << chain->map[f_pos]->set)); f_pos++) + ; + /* Re-enter the inner loop at the dest rule. */ + f = chain->map[f_pos]; + l = f->cmd_len; + cmd = f->cmd; + cmdlen = 0; + skip_or = 0; + continue; + break; /* NOTREACHED */ + } +#undef IS_CALL +#undef IS_RETURN + case O_REJECT: /* * Drop the packet and send a reject notice @@ -2079,7 +2236,8 @@ do { \ case O_FORWARD_IP: if (args->eh) /* not valid on layer2 pkts */ break; - if (!q || dyn_dir == MATCH_FORWARD) { + if (q == NULL || q->rule != f || + dyn_dir == MATCH_FORWARD) { struct sockaddr_in *sa; sa = &(((ipfw_insn_sa *)cmd)->sa); if (sa->sin_addr.s_addr == INADDR_ANY) { @@ -2110,14 +2268,21 @@ do { \ done = 1; /* exit outer loop */ break; - case O_SETFIB: + case O_SETFIB: { + uint32_t fib; + f->pcnt++; /* update stats */ f->bcnt += pktlen; f->timestamp = time_uptime; - M_SETFIB(m, cmd->arg1); - args->f_id.fib = cmd->arg1; + fib = (cmd->arg1 == IP_FW_TABLEARG) ? tablearg: + cmd->arg1; + if (fib >= rt_numfibs) + fib = 0; + M_SETFIB(m, fib); + args->f_id.fib = fib; l = 0; /* exit inner loop */ break; + } case O_NAT: if (!IPFW_NAT_LOADED) { @@ -2127,6 +2292,13 @@ do { \ int nat_id; set_match(args, f_pos, chain); + /* Check if this is 'global' nat rule */ + if (cmd->arg1 == 0) { + retval = ipfw_nat_ptr(args, NULL, m); + l = 0; + done = 1; + break; + } t = ((ipfw_insn_nat *)cmd)->nat; if (t == NULL) { nat_id = (cmd->arg1 == IP_FW_TABLEARG) ? @@ -2209,6 +2381,7 @@ do { \ } } /* end of inner loop, scan opcodes */ +#undef PULLUP_LEN if (done) break; @@ -2240,6 +2413,26 @@ pullup_failed: return (IP_FW_DENY); } +/* + * Set maximum number of tables that can be used in given VNET ipfw instance. + */ +#ifdef SYSCTL_NODE +static int +sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS) +{ + int error; + unsigned int ntables; + + ntables = V_fw_tables_max; + + error = sysctl_handle_int(oidp, &ntables, 0, req); + /* Read operation or some error */ + if ((error != 0) || (req->newptr == NULL)) + return (error); + + return (ipfw_resize_tables(&V_layer3_chain, ntables)); +} +#endif /* * Module and VNET glue */ @@ -2296,6 +2489,10 @@ ipfw_init(void) printf("limited to %d packets/entry by default\n", V_verbose_limit); + /* Check user-supplied table count for validness */ + if (default_fw_tables > IPFW_TABLES_MAX) + default_fw_tables = IPFW_TABLES_MAX; + ipfw_log_bpf(1); /* init */ return (error); } @@ -2341,19 +2538,18 @@ vnet_ipfw_init(const void *unused) /* insert the default rule and create the initial map */ chain->n_rules = 1; chain->static_len = sizeof(struct ip_fw); - chain->map = malloc(sizeof(struct ip_fw *), M_IPFW, M_NOWAIT | M_ZERO); + chain->map = malloc(sizeof(struct ip_fw *), M_IPFW, M_WAITOK | M_ZERO); if (chain->map) - rule = malloc(chain->static_len, M_IPFW, M_NOWAIT | M_ZERO); - if (rule == NULL) { - if (chain->map) - free(chain->map, M_IPFW); - printf("ipfw2: ENOSPC initializing default rule " - "(support disabled)\n"); - return (ENOSPC); - } + rule = malloc(chain->static_len, M_IPFW, M_WAITOK | M_ZERO); + + /* Set initial number of tables */ + V_fw_tables_max = default_fw_tables; error = ipfw_init_tables(chain); if (error) { - panic("init_tables"); /* XXX Marko fix this ! */ + printf("ipfw2: setting up tables failed\n"); + free(chain->map, M_IPFW); + free(rule, M_IPFW); + return (ENOSPC); } /* fill and insert the default rule */ @@ -2416,12 +2612,12 @@ vnet_ipfw_uninit(const void *unused) IPFW_UH_WLOCK(chain); IPFW_WLOCK(chain); + ipfw_dyn_uninit(0); /* run the callout_drain */ IPFW_WUNLOCK(chain); - IPFW_WLOCK(chain); - ipfw_dyn_uninit(0); /* run the callout_drain */ ipfw_destroy_tables(chain); reap = NULL; + IPFW_WLOCK(chain); for (i = 0; i < chain->n_rules; i++) { rule = chain->map[i]; rule->x_next = reap; diff --git a/freebsd/sys/netinet/ipfw/ip_fw_log.c b/freebsd/sys/netinet/ipfw/ip_fw_log.c index b529b9e1..773c698e 100644 --- a/freebsd/sys/netinet/ipfw/ip_fw_log.c +++ b/freebsd/sys/netinet/ipfw/ip_fw_log.c @@ -32,17 +32,12 @@ __FBSDID("$FreeBSD$"); * Logging support for ipfw */ -#if !defined(KLD_MODULE) #include -#include -#include #include #ifndef INET #error IPFIREWALL requires INET. #endif /* INET */ -#endif #include -#include #include #include @@ -306,6 +301,13 @@ ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args, case O_REASS: action = "Reass"; break; + case O_CALLRETURN: + if (cmd->len & F_NOT) + action = "Return"; + else + snprintf(SNPARGS(action2, 0), "Call %d", + cmd->arg1); + break; default: action = "UNKNOWN"; break; @@ -328,10 +330,14 @@ ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args, #ifdef INET6 struct ip6_hdr *ip6 = NULL; struct icmp6_hdr *icmp6; + u_short ip6f_mf; #endif src[0] = '\0'; dst[0] = '\0'; #ifdef INET6 + ip6f_mf = offset & IP6F_MORE_FRAG; + offset &= IP6F_OFF_MASK; + if (IS_IP6_FLOW_ID(&(args->f_id))) { char ip6buf[INET6_ADDRSTRLEN]; snprintf(src, sizeof(src), "[%s]", @@ -413,8 +419,7 @@ ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args, " (frag %08x:%d@%d%s)", args->f_id.extra, ntohs(ip6->ip6_plen) - hlen, - ntohs(offset & IP6F_OFF_MASK) << 3, - (offset & IP6F_MORE_FRAG) ? "+" : ""); + ntohs(offset) << 3, ip6f_mf ? "+" : ""); } else #endif { diff --git a/freebsd/sys/netinet/ipfw/ip_fw_nat.c b/freebsd/sys/netinet/ipfw/ip_fw_nat.c index dd47dedc..294b185e 100644 --- a/freebsd/sys/netinet/ipfw/ip_fw_nat.c +++ b/freebsd/sys/netinet/ipfw/ip_fw_nat.c @@ -140,7 +140,7 @@ del_redir_spool_cfg(struct cfg_nat *n, struct redir_chain *head) } } -static int +static void add_redir_spool_cfg(char *buf, struct cfg_nat *ptr) { struct cfg_redir *r, *ser_r; @@ -201,7 +201,6 @@ add_redir_spool_cfg(char *buf, struct cfg_nat *ptr) /* And finally hook this redir entry. */ LIST_INSERT_HEAD(&ptr->redir_chain, r, _next); } - return (1); } static int @@ -210,7 +209,8 @@ ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m) struct mbuf *mcl; struct ip *ip; /* XXX - libalias duct tape */ - int ldt, retval; + int ldt, retval, found; + struct ip_fw_chain *chain; char *c; ldt = 0; @@ -259,23 +259,65 @@ ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m) ldt = 1; c = mtod(mcl, char *); - if (args->oif == NULL) - retval = LibAliasIn(t->lib, c, - mcl->m_len + M_TRAILINGSPACE(mcl)); - else - retval = LibAliasOut(t->lib, c, - mcl->m_len + M_TRAILINGSPACE(mcl)); - if (retval == PKT_ALIAS_RESPOND) { - m->m_flags |= M_SKIP_FIREWALL; - retval = PKT_ALIAS_OK; + + /* Check if this is 'global' instance */ + if (t == NULL) { + if (args->oif == NULL) { + /* Wrong direction, skip processing */ + args->m = mcl; + return (IP_FW_NAT); + } + + found = 0; + chain = &V_layer3_chain; + IPFW_RLOCK(chain); + /* Check every nat entry... */ + LIST_FOREACH(t, &chain->nat, _next) { + if ((t->mode & PKT_ALIAS_SKIP_GLOBAL) != 0) + continue; + retval = LibAliasOutTry(t->lib, c, + mcl->m_len + M_TRAILINGSPACE(mcl), 0); + if (retval == PKT_ALIAS_OK) { + /* Nat instance recognises state */ + found = 1; + break; + } + } + IPFW_RUNLOCK(chain); + if (found != 1) { + /* No instance found, return ignore */ + args->m = mcl; + return (IP_FW_NAT); + } + } else { + if (args->oif == NULL) + retval = LibAliasIn(t->lib, c, + mcl->m_len + M_TRAILINGSPACE(mcl)); + else + retval = LibAliasOut(t->lib, c, + mcl->m_len + M_TRAILINGSPACE(mcl)); } - if (retval != PKT_ALIAS_OK && - retval != PKT_ALIAS_FOUND_HEADER_FRAGMENT) { + + /* + * We drop packet when: + * 1. libalias returns PKT_ALIAS_ERROR; + * 2. For incoming packets: + * a) for unresolved fragments; + * b) libalias returns PKT_ALIAS_IGNORED and + * PKT_ALIAS_DENY_INCOMING flag is set. + */ + if (retval == PKT_ALIAS_ERROR || + (args->oif == NULL && (retval == PKT_ALIAS_UNRESOLVED_FRAGMENT || + (retval == PKT_ALIAS_IGNORED && + (t->mode & PKT_ALIAS_DENY_INCOMING) != 0)))) { /* XXX - should i add some logging? */ m_free(mcl); args->m = NULL; return (IP_FW_DENY); } + + if (retval == PKT_ALIAS_RESPOND) + mcl->m_flags |= M_SKIP_FIREWALL; mcl->m_pkthdr.len = mcl->m_len = ntohs(ip->ip_len); /* @@ -346,59 +388,57 @@ lookup_nat(struct nat_list *l, int nat_id) static int ipfw_nat_cfg(struct sockopt *sopt) { - struct cfg_nat *ptr, *ser_n; + struct cfg_nat *cfg, *ptr; char *buf; struct ip_fw_chain *chain = &V_layer3_chain; + size_t len; + int gencnt, error = 0; + + len = sopt->sopt_valsize; + buf = malloc(len, M_TEMP, M_WAITOK | M_ZERO); + if ((error = sooptcopyin(sopt, buf, len, sizeof(struct cfg_nat))) != 0) + goto out; + + cfg = (struct cfg_nat *)buf; + if (cfg->id < 0) { + error = EINVAL; + goto out; + } - buf = malloc(NAT_BUF_LEN, M_IPFW, M_WAITOK | M_ZERO); - sooptcopyin(sopt, buf, NAT_BUF_LEN, sizeof(struct cfg_nat)); - ser_n = (struct cfg_nat *)buf; - - /* check valid parameter ser_n->id > 0 ? */ /* * Find/create nat rule. */ IPFW_WLOCK(chain); - ptr = lookup_nat(&chain->nat, ser_n->id); + gencnt = chain->gencnt; + ptr = lookup_nat(&chain->nat, cfg->id); if (ptr == NULL) { + IPFW_WUNLOCK(chain); /* New rule: allocate and init new instance. */ - ptr = malloc(sizeof(struct cfg_nat), - M_IPFW, M_NOWAIT | M_ZERO); - if (ptr == NULL) { - IPFW_WUNLOCK(chain); - free(buf, M_IPFW); - return (ENOSPC); - } + ptr = malloc(sizeof(struct cfg_nat), M_IPFW, M_WAITOK | M_ZERO); ptr->lib = LibAliasInit(NULL); - if (ptr->lib == NULL) { - IPFW_WUNLOCK(chain); - free(ptr, M_IPFW); - free(buf, M_IPFW); - return (EINVAL); - } LIST_INIT(&ptr->redir_chain); } else { - /* Entry already present: temporarly unhook it. */ + /* Entry already present: temporarily unhook it. */ LIST_REMOVE(ptr, _next); - flush_nat_ptrs(chain, ser_n->id); + flush_nat_ptrs(chain, cfg->id); + IPFW_WUNLOCK(chain); } - IPFW_WUNLOCK(chain); /* * Basic nat configuration. */ - ptr->id = ser_n->id; + ptr->id = cfg->id; /* * XXX - what if this rule doesn't nat any ip and just * redirect? * do we set aliasaddress to 0.0.0.0? */ - ptr->ip = ser_n->ip; - ptr->redir_cnt = ser_n->redir_cnt; - ptr->mode = ser_n->mode; - LibAliasSetMode(ptr->lib, ser_n->mode, ser_n->mode); + ptr->ip = cfg->ip; + ptr->redir_cnt = cfg->redir_cnt; + ptr->mode = cfg->mode; + LibAliasSetMode(ptr->lib, cfg->mode, cfg->mode); LibAliasSetAddress(ptr->lib, ptr->ip); - memcpy(ptr->if_name, ser_n->if_name, IF_NAMESIZE); + memcpy(ptr->if_name, cfg->if_name, IF_NAMESIZE); /* * Redir and LSNAT configuration. @@ -407,11 +447,19 @@ ipfw_nat_cfg(struct sockopt *sopt) del_redir_spool_cfg(ptr, &ptr->redir_chain); /* Add new entries. */ add_redir_spool_cfg(&buf[(sizeof(struct cfg_nat))], ptr); - free(buf, M_IPFW); + IPFW_WLOCK(chain); + /* Extra check to avoid race with another ipfw_nat_cfg() */ + if (gencnt != chain->gencnt && + ((cfg = lookup_nat(&chain->nat, ptr->id)) != NULL)) + LIST_REMOVE(cfg, _next); LIST_INSERT_HEAD(&chain->nat, ptr, _next); + chain->gencnt++; IPFW_WUNLOCK(chain); - return (0); + +out: + free(buf, M_TEMP); + return (error); } static int @@ -441,52 +489,61 @@ ipfw_nat_del(struct sockopt *sopt) static int ipfw_nat_get_cfg(struct sockopt *sopt) { - uint8_t *data; + struct ip_fw_chain *chain = &V_layer3_chain; struct cfg_nat *n; struct cfg_redir *r; struct cfg_spool *s; - int nat_cnt, off; - struct ip_fw_chain *chain; - int err = ENOSPC; + char *data; + int gencnt, nat_cnt, len, error; - chain = &V_layer3_chain; nat_cnt = 0; - off = sizeof(nat_cnt); + len = sizeof(nat_cnt); - data = malloc(NAT_BUF_LEN, M_IPFW, M_WAITOK | M_ZERO); IPFW_RLOCK(chain); - /* Serialize all the data. */ +retry: + gencnt = chain->gencnt; + /* Estimate memory amount */ LIST_FOREACH(n, &chain->nat, _next) { nat_cnt++; - if (off + SOF_NAT >= NAT_BUF_LEN) - goto nospace; - bcopy(n, &data[off], SOF_NAT); - off += SOF_NAT; + len += sizeof(struct cfg_nat); + LIST_FOREACH(r, &n->redir_chain, _next) { + len += sizeof(struct cfg_redir); + LIST_FOREACH(s, &r->spool_chain, _next) + len += sizeof(struct cfg_spool); + } + } + IPFW_RUNLOCK(chain); + + data = malloc(len, M_TEMP, M_WAITOK | M_ZERO); + bcopy(&nat_cnt, data, sizeof(nat_cnt)); + + nat_cnt = 0; + len = sizeof(nat_cnt); + + IPFW_RLOCK(chain); + if (gencnt != chain->gencnt) { + free(data, M_TEMP); + goto retry; + } + /* Serialize all the data. */ + LIST_FOREACH(n, &chain->nat, _next) { + bcopy(n, &data[len], sizeof(struct cfg_nat)); + len += sizeof(struct cfg_nat); LIST_FOREACH(r, &n->redir_chain, _next) { - if (off + SOF_REDIR >= NAT_BUF_LEN) - goto nospace; - bcopy(r, &data[off], SOF_REDIR); - off += SOF_REDIR; + bcopy(r, &data[len], sizeof(struct cfg_redir)); + len += sizeof(struct cfg_redir); LIST_FOREACH(s, &r->spool_chain, _next) { - if (off + SOF_SPOOL >= NAT_BUF_LEN) - goto nospace; - bcopy(s, &data[off], SOF_SPOOL); - off += SOF_SPOOL; + bcopy(s, &data[len], sizeof(struct cfg_spool)); + len += sizeof(struct cfg_spool); } } } - err = 0; /* all good */ -nospace: IPFW_RUNLOCK(chain); - if (err == 0) { - bcopy(&nat_cnt, data, sizeof(nat_cnt)); - sooptcopyout(sopt, data, NAT_BUF_LEN); - } else { - printf("serialized data buffer not big enough:" - "please increase NAT_BUF_LEN\n"); - } - free(data, M_IPFW); - return (err); + + error = sooptcopyout(sopt, data, len); + free(data, M_TEMP); + + return (error); } static int diff --git a/freebsd/sys/netinet/ipfw/ip_fw_pfil.c b/freebsd/sys/netinet/ipfw/ip_fw_pfil.c index b32a08b7..ebd10c5b 100644 --- a/freebsd/sys/netinet/ipfw/ip_fw_pfil.c +++ b/freebsd/sys/netinet/ipfw/ip_fw_pfil.c @@ -29,15 +29,12 @@ #include __FBSDID("$FreeBSD$"); -#if !defined(KLD_MODULE) #include -#include #include +#include #ifndef INET #error IPFIREWALL requires INET. #endif /* INET */ -#endif /* KLD_MODULE */ -#include #include #include @@ -129,8 +126,9 @@ again: args.rule = *((struct ipfw_rule_ref *)(tag+1)); m_tag_delete(*m0, tag); if (args.rule.info & IPFW_ONEPASS) { - SET_HOST_IPLEN(mtod(*m0, struct ip *)); - return 0; + if (mtod(*m0, struct ip *)->ip_v == 4) + SET_HOST_IPLEN(mtod(*m0, struct ip *)); + return (0); } } @@ -151,7 +149,7 @@ again: /* next_hop may be set by ipfw_chk */ if (args.next_hop == NULL) break; /* pass */ -#ifndef IPFIREWALL_FORWARD +#if !defined(IPFIREWALL_FORWARD) || (!defined(INET6) && !defined(INET)) ret = EACCES; #else { @@ -179,7 +177,7 @@ again: if (in_localip(args.next_hop->sin_addr)) (*m0)->m_flags |= M_FASTFWD_OURS; } -#endif +#endif /* IPFIREWALL_FORWARD */ break; case IP_FW_DENY: diff --git a/freebsd/sys/netinet/ipfw/ip_fw_private.h b/freebsd/sys/netinet/ipfw/ip_fw_private.h index c29ae0ad..b91c154a 100644 --- a/freebsd/sys/netinet/ipfw/ip_fw_private.h +++ b/freebsd/sys/netinet/ipfw/ip_fw_private.h @@ -90,7 +90,7 @@ struct ip_fw_args { /* * On return, it points to the matching rule. * On entry, rule.slot > 0 means the info is valid and - * contains the the starting rule for an ipfw search. + * contains the starting rule for an ipfw search. * If chain_id == chain->id && slot >0 then jump to that slot. * Otherwise, we locate the first rule >= rulenum:rule_id */ @@ -208,6 +208,9 @@ VNET_DECLARE(u_int32_t, set_disable); VNET_DECLARE(int, autoinc_step); #define V_autoinc_step VNET(autoinc_step) +VNET_DECLARE(unsigned int, fw_tables_max); +#define V_fw_tables_max VNET(fw_tables_max) + struct ip_fw_chain { struct ip_fw *rules; /* list of rules */ struct ip_fw *reap; /* list of rules to reap */ @@ -216,7 +219,9 @@ struct ip_fw_chain { int static_len; /* total len of static rules */ struct ip_fw **map; /* array of rule ptrs to ease lookup */ LIST_HEAD(nat_list, cfg_nat) nat; /* list of nat entries */ - struct radix_node_head *tables[IPFW_TABLES_MAX]; + struct radix_node_head **tables; /* IPv4 tables */ + struct radix_node_head **xtables; /* extended tables */ + uint8_t *tabletype; /* Array of table types */ #if defined( __linux__ ) || defined( _WIN32 ) spinlock_t rwmtx; spinlock_t uh_lock; @@ -225,6 +230,7 @@ struct ip_fw_chain { struct rwlock uh_lock; /* lock for upper half */ #endif uint32_t id; /* ruleset id */ + uint32_t gencnt; /* generation count */ }; struct sockopt; /* used by tcp_var.h */ @@ -271,16 +277,21 @@ int ipfw_check_hook(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, struct radix_node; int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint32_t *val); +int ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, + uint32_t *val, int type); int ipfw_init_tables(struct ip_fw_chain *ch); void ipfw_destroy_tables(struct ip_fw_chain *ch); int ipfw_flush_table(struct ip_fw_chain *ch, uint16_t tbl); -int ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, - uint8_t mlen, uint32_t value); -int ipfw_dump_table_entry(struct radix_node *rn, void *arg); -int ipfw_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, - uint8_t mlen); +int ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, + uint8_t plen, uint8_t mlen, uint8_t type, uint32_t value); +int ipfw_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, + uint8_t plen, uint8_t mlen, uint8_t type); int ipfw_count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt); +int ipfw_dump_table_entry(struct radix_node *rn, void *arg); int ipfw_dump_table(struct ip_fw_chain *ch, ipfw_table *tbl); +int ipfw_count_xtable(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt); +int ipfw_dump_xtable(struct ip_fw_chain *ch, ipfw_xtable *tbl); +int ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables); /* In ip_fw_nat.c -- XXX to be moved to ip_var.h */ diff --git a/freebsd/sys/netinet/ipfw/ip_fw_sockopt.c b/freebsd/sys/netinet/ipfw/ip_fw_sockopt.c index b9635416..abfc24a0 100644 --- a/freebsd/sys/netinet/ipfw/ip_fw_sockopt.c +++ b/freebsd/sys/netinet/ipfw/ip_fw_sockopt.c @@ -35,17 +35,12 @@ __FBSDID("$FreeBSD$"); * the upper half of the ipfw code. */ -#if !defined(KLD_MODULE) #include -#include -#include #include #ifndef INET #error IPFIREWALL requires INET. #endif /* INET */ -#endif #include -#include #include #include @@ -351,12 +346,13 @@ del_entry(struct ip_fw_chain *chain, uint32_t arg) } if (n == 0) { - /* A flush request (arg == 0) on empty ruleset - * returns with no error. On the contrary, + /* A flush request (arg == 0 or cmd == 1) on empty + * ruleset returns with no error. On the contrary, * if there is no match on a specific request, * we return EINVAL. */ - error = (arg == 0) ? 0 : EINVAL; + if (arg != 0 && cmd != 1) + error = EINVAL; break; } @@ -607,7 +603,8 @@ check_ipfw_struct(struct ip_fw *rule, int size) case O_SETFIB: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; - if (cmd->arg1 >= rt_numfibs) { + if ((cmd->arg1 != IP_FW_TABLEARG) && + (cmd->arg1 >= rt_numfibs)) { printf("ipfw: invalid fib number %d\n", cmd->arg1); return EINVAL; @@ -672,7 +669,6 @@ check_ipfw_struct(struct ip_fw *rule, int size) cmdlen != F_INSN_SIZE(ipfw_insn_u32)) goto bad_size; break; - case O_MACADDR2: if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) goto bad_size; @@ -751,6 +747,7 @@ check_ipfw_struct(struct ip_fw *rule, int size) #endif case O_SKIPTO: case O_REASS: + case O_CALLRETURN: check_size: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; @@ -933,6 +930,7 @@ ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space) } +#define IP_FW3_OPLENGTH(x) ((x)->sopt_valsize - sizeof(ip_fw3_opheader)) /** * {set|get}sockopt parser. */ @@ -941,10 +939,13 @@ ipfw_ctl(struct sockopt *sopt) { #define RULE_MAXSIZE (256*sizeof(u_int32_t)) int error; - size_t size; + size_t size, len, valsize; struct ip_fw *buf, *rule; struct ip_fw_chain *chain; u_int32_t rulenum[2]; + uint32_t opt; + char xbuf[128]; + ip_fw3_opheader *op3 = NULL; error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW); if (error) @@ -964,7 +965,21 @@ ipfw_ctl(struct sockopt *sopt) chain = &V_layer3_chain; error = 0; - switch (sopt->sopt_name) { + /* Save original valsize before it is altered via sooptcopyin() */ + valsize = sopt->sopt_valsize; + if ((opt = sopt->sopt_name) == IP_FW3) { + /* + * Copy not less than sizeof(ip_fw3_opheader). + * We hope any IP_FW3 command will fit into 128-byte buffer. + */ + if ((error = sooptcopyin(sopt, xbuf, sizeof(xbuf), + sizeof(ip_fw3_opheader))) != 0) + return (error); + op3 = (ip_fw3_opheader *)xbuf; + opt = op3->opcode; + } + + switch (opt) { case IP_FW_GET: /* * pass up a copy of the current rules. Static rules @@ -1103,7 +1118,8 @@ ipfw_ctl(struct sockopt *sopt) if (error) break; error = ipfw_add_table_entry(chain, ent.tbl, - ent.addr, ent.masklen, ent.value); + &ent.addr, sizeof(ent.addr), ent.masklen, + IPFW_TABLE_CIDR, ent.value); } break; @@ -1116,7 +1132,34 @@ ipfw_ctl(struct sockopt *sopt) if (error) break; error = ipfw_del_table_entry(chain, ent.tbl, - ent.addr, ent.masklen); + &ent.addr, sizeof(ent.addr), ent.masklen, IPFW_TABLE_CIDR); + } + break; + + case IP_FW_TABLE_XADD: /* IP_FW3 */ + case IP_FW_TABLE_XDEL: /* IP_FW3 */ + { + ipfw_table_xentry *xent = (ipfw_table_xentry *)(op3 + 1); + + /* Check minimum header size */ + if (IP_FW3_OPLENGTH(sopt) < offsetof(ipfw_table_xentry, k)) { + error = EINVAL; + break; + } + + /* Check if len field is valid */ + if (xent->len > sizeof(ipfw_table_xentry)) { + error = EINVAL; + break; + } + + len = xent->len - offsetof(ipfw_table_xentry, k); + + error = (opt == IP_FW_TABLE_XADD) ? + ipfw_add_table_entry(chain, xent->tbl, &xent->k, + len, xent->masklen, xent->type, xent->value) : + ipfw_del_table_entry(chain, xent->tbl, &xent->k, + len, xent->masklen, xent->type); } break; @@ -1128,9 +1171,7 @@ ipfw_ctl(struct sockopt *sopt) sizeof(tbl), sizeof(tbl)); if (error) break; - IPFW_WLOCK(chain); error = ipfw_flush_table(chain, tbl); - IPFW_WUNLOCK(chain); } break; @@ -1179,6 +1220,62 @@ ipfw_ctl(struct sockopt *sopt) } break; + case IP_FW_TABLE_XGETSIZE: /* IP_FW3 */ + { + uint32_t *tbl; + + if (IP_FW3_OPLENGTH(sopt) < sizeof(uint32_t)) { + error = EINVAL; + break; + } + + tbl = (uint32_t *)(op3 + 1); + + IPFW_RLOCK(chain); + error = ipfw_count_xtable(chain, *tbl, tbl); + IPFW_RUNLOCK(chain); + if (error) + break; + error = sooptcopyout(sopt, op3, sopt->sopt_valsize); + } + break; + + case IP_FW_TABLE_XLIST: /* IP_FW3 */ + { + ipfw_xtable *tbl; + + if ((size = valsize) < sizeof(ipfw_xtable)) { + error = EINVAL; + break; + } + + tbl = malloc(size, M_TEMP, M_ZERO | M_WAITOK); + memcpy(tbl, op3, sizeof(ipfw_xtable)); + + /* Get maximum number of entries we can store */ + tbl->size = (size - sizeof(ipfw_xtable)) / + sizeof(ipfw_table_xentry); + IPFW_RLOCK(chain); + error = ipfw_dump_xtable(chain, tbl); + IPFW_RUNLOCK(chain); + if (error) { + free(tbl, M_TEMP); + break; + } + + /* Revert size field back to bytes */ + tbl->size = tbl->size * sizeof(ipfw_table_xentry) + + sizeof(ipfw_table); + /* + * Since we call sooptcopyin() with small buffer, sopt_valsize is + * decreased to reflect supplied buffer size. Set it back to original value + */ + sopt->sopt_valsize = valsize; + error = sooptcopyout(sopt, tbl, size); + free(tbl, M_TEMP); + } + break; + /*--- NAT operations are protected by the IPFW_LOCK ---*/ case IP_FW_NAT_CFG: if (IPFW_NAT_LOADED) diff --git a/freebsd/sys/netinet/ipfw/ip_fw_table.c b/freebsd/sys/netinet/ipfw/ip_fw_table.c index a399a7a0..1725b8a5 100644 --- a/freebsd/sys/netinet/ipfw/ip_fw_table.c +++ b/freebsd/sys/netinet/ipfw/ip_fw_table.c @@ -41,17 +41,12 @@ __FBSDID("$FreeBSD$"); * from userland, because operations are typically fast. */ -#if !defined(KLD_MODULE) #include -#include -#include #include #ifndef INET #error IPFIREWALL requires INET. #endif /* INET */ -#endif #include -#include #include #include @@ -83,6 +78,29 @@ struct table_entry { u_int32_t value; }; +struct xaddr_iface { + uint8_t if_len; /* length of this struct */ + uint8_t pad[7]; /* Align name */ + char ifname[IF_NAMESIZE]; /* Interface name */ +}; + +struct table_xentry { + struct radix_node rn[2]; + union { +#ifdef INET6 + struct sockaddr_in6 addr6; +#endif + struct xaddr_iface iface; + } a; + union { +#ifdef INET6 + struct sockaddr_in6 mask6; +#endif + struct xaddr_iface ifmask; + } m; + u_int32_t value; +}; + /* * The radix code expects addr and mask to be array of bytes, * with the first byte being the length of the array. rn_inithead @@ -94,57 +112,281 @@ struct table_entry { */ #define KEY_LEN(v) *((uint8_t *)&(v)) #define KEY_OFS (8*offsetof(struct sockaddr_in, sin_addr)) +/* + * Do not require radix to compare more than actual IPv4/IPv6 address + */ +#define KEY_LEN_INET (offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t)) +#define KEY_LEN_INET6 (offsetof(struct sockaddr_in6, sin6_addr) + sizeof(struct in6_addr)) +#define KEY_LEN_IFACE (offsetof(struct xaddr_iface, ifname)) + +#define OFF_LEN_INET (8 * offsetof(struct sockaddr_in, sin_addr)) +#define OFF_LEN_INET6 (8 * offsetof(struct sockaddr_in6, sin6_addr)) +#define OFF_LEN_IFACE (8 * offsetof(struct xaddr_iface, ifname)) + + +static inline void +ipv6_writemask(struct in6_addr *addr6, uint8_t mask) +{ + uint32_t *cp; + + for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32) + *cp++ = 0xFFFFFFFF; + *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0); +} int -ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, - uint8_t mlen, uint32_t value) +ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, + uint8_t plen, uint8_t mlen, uint8_t type, uint32_t value) { - struct radix_node_head *rnh; + struct radix_node_head *rnh, **rnh_ptr; struct table_entry *ent; + struct table_xentry *xent; struct radix_node *rn; + in_addr_t addr; + int offset; + void *ent_ptr; + struct sockaddr *addr_ptr, *mask_ptr; + char c; - if (tbl >= IPFW_TABLES_MAX) + if (tbl >= V_fw_tables_max) return (EINVAL); - rnh = ch->tables[tbl]; - ent = malloc(sizeof(*ent), M_IPFW_TBL, M_NOWAIT | M_ZERO); - if (ent == NULL) - return (ENOMEM); - ent->value = value; - KEY_LEN(ent->addr) = KEY_LEN(ent->mask) = 8; - ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); - ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr; + + switch (type) { + case IPFW_TABLE_CIDR: + if (plen == sizeof(in_addr_t)) { +#ifdef INET + /* IPv4 case */ + if (mlen > 32) + return (EINVAL); + ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO); + ent->value = value; + /* Set 'total' structure length */ + KEY_LEN(ent->addr) = KEY_LEN_INET; + KEY_LEN(ent->mask) = KEY_LEN_INET; + /* Set offset of IPv4 address in bits */ + offset = OFF_LEN_INET; + ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); + addr = *((in_addr_t *)paddr); + ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr; + /* Set pointers */ + rnh_ptr = &ch->tables[tbl]; + ent_ptr = ent; + addr_ptr = (struct sockaddr *)&ent->addr; + mask_ptr = (struct sockaddr *)&ent->mask; +#endif +#ifdef INET6 + } else if (plen == sizeof(struct in6_addr)) { + /* IPv6 case */ + if (mlen > 128) + return (EINVAL); + xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO); + xent->value = value; + /* Set 'total' structure length */ + KEY_LEN(xent->a.addr6) = KEY_LEN_INET6; + KEY_LEN(xent->m.mask6) = KEY_LEN_INET6; + /* Set offset of IPv6 address in bits */ + offset = OFF_LEN_INET6; + ipv6_writemask(&xent->m.mask6.sin6_addr, mlen); + memcpy(&xent->a.addr6.sin6_addr, paddr, sizeof(struct in6_addr)); + APPLY_MASK(&xent->a.addr6.sin6_addr, &xent->m.mask6.sin6_addr); + /* Set pointers */ + rnh_ptr = &ch->xtables[tbl]; + ent_ptr = xent; + addr_ptr = (struct sockaddr *)&xent->a.addr6; + mask_ptr = (struct sockaddr *)&xent->m.mask6; +#endif + } else { + /* Unknown CIDR type */ + return (EINVAL); + } + break; + + case IPFW_TABLE_INTERFACE: + /* Check if string is terminated */ + c = ((char *)paddr)[IF_NAMESIZE - 1]; + ((char *)paddr)[IF_NAMESIZE - 1] = '\0'; + if (((mlen = strlen((char *)paddr)) == IF_NAMESIZE - 1) && (c != '\0')) + return (EINVAL); + + /* Include last \0 into comparison */ + mlen++; + + xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO); + xent->value = value; + /* Set 'total' structure length */ + KEY_LEN(xent->a.iface) = KEY_LEN_IFACE + mlen; + KEY_LEN(xent->m.ifmask) = KEY_LEN_IFACE + mlen; + /* Set offset of interface name in bits */ + offset = OFF_LEN_IFACE; + memcpy(xent->a.iface.ifname, paddr, mlen); + /* Assume direct match */ + /* TODO: Add interface pattern matching */ +#if 0 + memset(xent->m.ifmask.ifname, 0xFF, IF_NAMESIZE); + mask_ptr = (struct sockaddr *)&xent->m.ifmask; +#endif + /* Set pointers */ + rnh_ptr = &ch->xtables[tbl]; + ent_ptr = xent; + addr_ptr = (struct sockaddr *)&xent->a.iface; + mask_ptr = NULL; + break; + + default: + return (EINVAL); + } + IPFW_WLOCK(ch); - rn = rnh->rnh_addaddr(&ent->addr, &ent->mask, rnh, (void *)ent); - if (rn == NULL) { + + /* Check if tabletype is valid */ + if ((ch->tabletype[tbl] != 0) && (ch->tabletype[tbl] != type)) { IPFW_WUNLOCK(ch); - free(ent, M_IPFW_TBL); - return (EEXIST); + free(ent_ptr, M_IPFW_TBL); + return (EINVAL); + } + + /* Check if radix tree exists */ + if ((rnh = *rnh_ptr) == NULL) { + IPFW_WUNLOCK(ch); + /* Create radix for a new table */ + if (!rn_inithead((void **)&rnh, offset)) { + free(ent_ptr, M_IPFW_TBL); + return (ENOMEM); + } + + IPFW_WLOCK(ch); + if (*rnh_ptr != NULL) { + /* Tree is already attached by other thread */ + rn_detachhead((void **)&rnh); + rnh = *rnh_ptr; + /* Check table type another time */ + if (ch->tabletype[tbl] != type) { + IPFW_WUNLOCK(ch); + free(ent_ptr, M_IPFW_TBL); + return (EINVAL); + } + } else { + *rnh_ptr = rnh; + /* + * Set table type. It can be set already + * (if we have IPv6-only table) but setting + * it another time does not hurt + */ + ch->tabletype[tbl] = type; + } } + + rn = rnh->rnh_addaddr(addr_ptr, mask_ptr, rnh, ent_ptr); IPFW_WUNLOCK(ch); + + if (rn == NULL) { + free(ent_ptr, M_IPFW_TBL); + return (EEXIST); + } return (0); } int -ipfw_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, - uint8_t mlen) +ipfw_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, + uint8_t plen, uint8_t mlen, uint8_t type) { - struct radix_node_head *rnh; + struct radix_node_head *rnh, **rnh_ptr; struct table_entry *ent; + in_addr_t addr; struct sockaddr_in sa, mask; + struct sockaddr *sa_ptr, *mask_ptr; + char c; - if (tbl >= IPFW_TABLES_MAX) + if (tbl >= V_fw_tables_max) return (EINVAL); - rnh = ch->tables[tbl]; - KEY_LEN(sa) = KEY_LEN(mask) = 8; - mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); - sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr; + + switch (type) { + case IPFW_TABLE_CIDR: + if (plen == sizeof(in_addr_t)) { + /* Set 'total' structure length */ + KEY_LEN(sa) = KEY_LEN_INET; + KEY_LEN(mask) = KEY_LEN_INET; + mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); + addr = *((in_addr_t *)paddr); + sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr; + rnh_ptr = &ch->tables[tbl]; + sa_ptr = (struct sockaddr *)&sa; + mask_ptr = (struct sockaddr *)&mask; +#ifdef INET6 + } else if (plen == sizeof(struct in6_addr)) { + /* IPv6 case */ + if (mlen > 128) + return (EINVAL); + struct sockaddr_in6 sa6, mask6; + memset(&sa6, 0, sizeof(struct sockaddr_in6)); + memset(&mask6, 0, sizeof(struct sockaddr_in6)); + /* Set 'total' structure length */ + KEY_LEN(sa6) = KEY_LEN_INET6; + KEY_LEN(mask6) = KEY_LEN_INET6; + ipv6_writemask(&mask6.sin6_addr, mlen); + memcpy(&sa6.sin6_addr, paddr, sizeof(struct in6_addr)); + APPLY_MASK(&sa6.sin6_addr, &mask6.sin6_addr); + rnh_ptr = &ch->xtables[tbl]; + sa_ptr = (struct sockaddr *)&sa6; + mask_ptr = (struct sockaddr *)&mask6; +#endif + } else { + /* Unknown CIDR type */ + return (EINVAL); + } + break; + + case IPFW_TABLE_INTERFACE: + /* Check if string is terminated */ + c = ((char *)paddr)[IF_NAMESIZE - 1]; + ((char *)paddr)[IF_NAMESIZE - 1] = '\0'; + if (((mlen = strlen((char *)paddr)) == IF_NAMESIZE - 1) && (c != '\0')) + return (EINVAL); + + struct xaddr_iface ifname, ifmask; + memset(&ifname, 0, sizeof(ifname)); + + /* Include last \0 into comparison */ + mlen++; + + /* Set 'total' structure length */ + KEY_LEN(ifname) = KEY_LEN_IFACE + mlen; + KEY_LEN(ifmask) = KEY_LEN_IFACE + mlen; + /* Assume direct match */ + /* FIXME: Add interface pattern matching */ +#if 0 + memset(ifmask.ifname, 0xFF, IF_NAMESIZE); + mask_ptr = (struct sockaddr *)&ifmask; +#endif + mask_ptr = NULL; + memcpy(ifname.ifname, paddr, mlen); + /* Set pointers */ + rnh_ptr = &ch->xtables[tbl]; + sa_ptr = (struct sockaddr *)&ifname; + + break; + + default: + return (EINVAL); + } + IPFW_WLOCK(ch); - ent = (struct table_entry *)rnh->rnh_deladdr(&sa, &mask, rnh); - if (ent == NULL) { + if ((rnh = *rnh_ptr) == NULL) { IPFW_WUNLOCK(ch); return (ESRCH); } + + if (ch->tabletype[tbl] != type) { + IPFW_WUNLOCK(ch); + return (EINVAL); + } + + ent = (struct table_entry *)rnh->rnh_deladdr(sa_ptr, mask_ptr, rnh); IPFW_WUNLOCK(ch); + + if (ent == NULL) + return (ESRCH); + free(ent, M_IPFW_TBL); return (0); } @@ -165,15 +407,38 @@ flush_table_entry(struct radix_node *rn, void *arg) int ipfw_flush_table(struct ip_fw_chain *ch, uint16_t tbl) { - struct radix_node_head *rnh; - - IPFW_WLOCK_ASSERT(ch); + struct radix_node_head *rnh, *xrnh; - if (tbl >= IPFW_TABLES_MAX) + if (tbl >= V_fw_tables_max) return (EINVAL); - rnh = ch->tables[tbl]; - KASSERT(rnh != NULL, ("NULL IPFW table")); - rnh->rnh_walktree(rnh, flush_table_entry, rnh); + + /* + * We free both (IPv4 and extended) radix trees and + * clear table type here to permit table to be reused + * for different type without module reload + */ + + IPFW_WLOCK(ch); + /* Set IPv4 table pointer to zero */ + if ((rnh = ch->tables[tbl]) != NULL) + ch->tables[tbl] = NULL; + /* Set extended table pointer to zero */ + if ((xrnh = ch->xtables[tbl]) != NULL) + ch->xtables[tbl] = NULL; + /* Zero table type */ + ch->tabletype[tbl] = 0; + IPFW_WUNLOCK(ch); + + if (rnh != NULL) { + rnh->rnh_walktree(rnh, flush_table_entry, rnh); + rn_detachhead((void **)&rnh); + } + + if (xrnh != NULL) { + xrnh->rnh_walktree(xrnh, flush_table_entry, xrnh); + rn_detachhead((void **)&xrnh); + } + return (0); } @@ -181,31 +446,86 @@ void ipfw_destroy_tables(struct ip_fw_chain *ch) { uint16_t tbl; - struct radix_node_head *rnh; - IPFW_WLOCK_ASSERT(ch); - - for (tbl = 0; tbl < IPFW_TABLES_MAX; tbl++) { + /* Flush all tables */ + for (tbl = 0; tbl < V_fw_tables_max; tbl++) ipfw_flush_table(ch, tbl); - rnh = ch->tables[tbl]; - rn_detachhead((void **)&rnh); - } + + /* Free pointers itself */ + free(ch->tables, M_IPFW); + free(ch->xtables, M_IPFW); + free(ch->tabletype, M_IPFW); } int ipfw_init_tables(struct ip_fw_chain *ch) -{ - int i; - uint16_t j; +{ + /* Allocate pointers */ + ch->tables = malloc(V_fw_tables_max * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO); + ch->xtables = malloc(V_fw_tables_max * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO); + ch->tabletype = malloc(V_fw_tables_max * sizeof(uint8_t), M_IPFW, M_WAITOK | M_ZERO); + return (0); +} - for (i = 0; i < IPFW_TABLES_MAX; i++) { - if (!rn_inithead((void **)&ch->tables[i], KEY_OFS)) { - for (j = 0; j < i; j++) { - (void) ipfw_flush_table(ch, j); +int +ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables) +{ + struct radix_node_head **tables, **xtables, *rnh; + struct radix_node_head **tables_old, **xtables_old; + uint8_t *tabletype, *tabletype_old; + unsigned int ntables_old, tbl; + + /* Check new value for validity */ + if (ntables > IPFW_TABLES_MAX) + ntables = IPFW_TABLES_MAX; + + /* Allocate new pointers */ + tables = malloc(ntables * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO); + xtables = malloc(ntables * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO); + tabletype = malloc(ntables * sizeof(uint8_t), M_IPFW, M_WAITOK | M_ZERO); + + IPFW_WLOCK(ch); + + tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables; + + /* Copy old table pointers */ + memcpy(tables, ch->tables, sizeof(void *) * tbl); + memcpy(xtables, ch->xtables, sizeof(void *) * tbl); + memcpy(tabletype, ch->tabletype, sizeof(uint8_t) * tbl); + + /* Change pointers and number of tables */ + tables_old = ch->tables; + xtables_old = ch->xtables; + tabletype_old = ch->tabletype; + ch->tables = tables; + ch->xtables = xtables; + ch->tabletype = tabletype; + + ntables_old = V_fw_tables_max; + V_fw_tables_max = ntables; + + IPFW_WUNLOCK(ch); + + /* Check if we need to destroy radix trees */ + if (ntables < ntables_old) { + for (tbl = ntables; tbl < ntables_old; tbl++) { + if ((rnh = tables_old[tbl]) != NULL) { + rnh->rnh_walktree(rnh, flush_table_entry, rnh); + rn_detachhead((void **)&rnh); + } + + if ((rnh = xtables_old[tbl]) != NULL) { + rnh->rnh_walktree(rnh, flush_table_entry, rnh); + rn_detachhead((void **)&rnh); } - return (ENOMEM); } } + + /* Free old pointers */ + free(tables_old, M_IPFW); + free(xtables_old, M_IPFW); + free(tabletype_old, M_IPFW); + return (0); } @@ -217,10 +537,11 @@ ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, struct table_entry *ent; struct sockaddr_in sa; - if (tbl >= IPFW_TABLES_MAX) + if (tbl >= V_fw_tables_max) + return (0); + if ((rnh = ch->tables[tbl]) == NULL) return (0); - rnh = ch->tables[tbl]; - KEY_LEN(sa) = 8; + KEY_LEN(sa) = KEY_LEN_INET; sa.sin_addr.s_addr = addr; ent = (struct table_entry *)(rnh->rnh_lookup(&sa, NULL, rnh)); if (ent != NULL) { @@ -230,6 +551,46 @@ ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, return (0); } +int +ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, + uint32_t *val, int type) +{ + struct radix_node_head *rnh; + struct table_xentry *xent; + struct sockaddr_in6 sa6; + struct xaddr_iface iface; + + if (tbl >= V_fw_tables_max) + return (0); + if ((rnh = ch->xtables[tbl]) == NULL) + return (0); + + switch (type) { + case IPFW_TABLE_CIDR: + KEY_LEN(sa6) = KEY_LEN_INET6; + memcpy(&sa6.sin6_addr, paddr, sizeof(struct in6_addr)); + xent = (struct table_xentry *)(rnh->rnh_lookup(&sa6, NULL, rnh)); + break; + + case IPFW_TABLE_INTERFACE: + KEY_LEN(iface) = KEY_LEN_IFACE + + strlcpy(iface.ifname, (char *)paddr, IF_NAMESIZE) + 1; + /* Assume direct match */ + /* FIXME: Add interface pattern matching */ + xent = (struct table_xentry *)(rnh->rnh_lookup(&iface, NULL, rnh)); + break; + + default: + return (0); + } + + if (xent != NULL) { + *val = xent->value; + return (1); + } + return (0); +} + static int count_table_entry(struct radix_node *rn, void *arg) { @@ -244,10 +605,11 @@ ipfw_count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt) { struct radix_node_head *rnh; - if (tbl >= IPFW_TABLES_MAX) + if (tbl >= V_fw_tables_max) return (EINVAL); - rnh = ch->tables[tbl]; *cnt = 0; + if ((rnh = ch->tables[tbl]) == NULL) + return (0); rnh->rnh_walktree(rnh, count_table_entry, cnt); return (0); } @@ -278,11 +640,124 @@ ipfw_dump_table(struct ip_fw_chain *ch, ipfw_table *tbl) { struct radix_node_head *rnh; - if (tbl->tbl >= IPFW_TABLES_MAX) + if (tbl->tbl >= V_fw_tables_max) return (EINVAL); - rnh = ch->tables[tbl->tbl]; tbl->cnt = 0; + if ((rnh = ch->tables[tbl->tbl]) == NULL) + return (0); rnh->rnh_walktree(rnh, dump_table_entry, tbl); return (0); } + +static int +count_table_xentry(struct radix_node *rn, void *arg) +{ + uint32_t * const cnt = arg; + + (*cnt) += sizeof(ipfw_table_xentry); + return (0); +} + +int +ipfw_count_xtable(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt) +{ + struct radix_node_head *rnh; + + if (tbl >= V_fw_tables_max) + return (EINVAL); + *cnt = 0; + if ((rnh = ch->tables[tbl]) != NULL) + rnh->rnh_walktree(rnh, count_table_xentry, cnt); + if ((rnh = ch->xtables[tbl]) != NULL) + rnh->rnh_walktree(rnh, count_table_xentry, cnt); + /* Return zero if table is empty */ + if (*cnt > 0) + (*cnt) += sizeof(ipfw_xtable); + return (0); +} + + +static int +dump_table_xentry_base(struct radix_node *rn, void *arg) +{ + struct table_entry * const n = (struct table_entry *)rn; + ipfw_xtable * const tbl = arg; + ipfw_table_xentry *xent; + + /* Out of memory, returning */ + if (tbl->cnt == tbl->size) + return (1); + xent = &tbl->xent[tbl->cnt]; + xent->len = sizeof(ipfw_table_xentry); + xent->tbl = tbl->tbl; + if (in_nullhost(n->mask.sin_addr)) + xent->masklen = 0; + else + xent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr)); + /* Save IPv4 address as deprecated IPv6 compatible */ + xent->k.addr6.s6_addr32[3] = n->addr.sin_addr.s_addr; + xent->value = n->value; + tbl->cnt++; + return (0); +} + +static int +dump_table_xentry_extended(struct radix_node *rn, void *arg) +{ + struct table_xentry * const n = (struct table_xentry *)rn; + ipfw_xtable * const tbl = arg; + ipfw_table_xentry *xent; +#ifdef INET6 + int i; + uint32_t *v; +#endif + /* Out of memory, returning */ + if (tbl->cnt == tbl->size) + return (1); + xent = &tbl->xent[tbl->cnt]; + xent->len = sizeof(ipfw_table_xentry); + xent->tbl = tbl->tbl; + + switch (tbl->type) { +#ifdef INET6 + case IPFW_TABLE_CIDR: + /* Count IPv6 mask */ + v = (uint32_t *)&n->m.mask6.sin6_addr; + for (i = 0; i < sizeof(struct in6_addr) / 4; i++, v++) + xent->masklen += bitcount32(*v); + memcpy(&xent->k, &n->a.addr6.sin6_addr, sizeof(struct in6_addr)); + break; +#endif + case IPFW_TABLE_INTERFACE: + /* Assume exact mask */ + xent->masklen = 8 * IF_NAMESIZE; + memcpy(&xent->k, &n->a.iface.ifname, IF_NAMESIZE); + break; + + default: + /* unknown, skip entry */ + return (0); + } + + xent->value = n->value; + tbl->cnt++; + return (0); +} + +int +ipfw_dump_xtable(struct ip_fw_chain *ch, ipfw_xtable *tbl) +{ + struct radix_node_head *rnh; + + if (tbl->tbl >= V_fw_tables_max) + return (EINVAL); + tbl->cnt = 0; + tbl->type = ch->tabletype[tbl->tbl]; + if ((rnh = ch->tables[tbl->tbl]) != NULL) + rnh->rnh_walktree(rnh, dump_table_xentry_base, tbl); + if ((rnh = ch->xtables[tbl->tbl]) != NULL) + rnh->rnh_walktree(rnh, dump_table_xentry_extended, tbl); + return (0); +} + /* end of file */ diff --git a/freebsd/sys/netinet/libalias/alias.h b/freebsd/sys/netinet/libalias/alias.h index 2aed8293..b2615c90 100644 --- a/freebsd/sys/netinet/libalias/alias.h +++ b/freebsd/sys/netinet/libalias/alias.h @@ -220,6 +220,12 @@ struct mbuf *m_megapullup(struct mbuf *, int); */ #define PKT_ALIAS_REVERSE 0x80 +/* + * If PKT_ALIAS_SKIP_GLOBAL is set, nat instance is not checked for matching + * states in 'ipfw nat global' rule. + */ +#define PKT_ALIAS_SKIP_GLOBAL 0x200 + /* Function return codes. */ #define PKT_ALIAS_ERROR -1 #define PKT_ALIAS_OK 1 diff --git a/freebsd/sys/netinet/libalias/alias_db.c b/freebsd/sys/netinet/libalias/alias_db.c index 35b1846b..60a8e06f 100644 --- a/freebsd/sys/netinet/libalias/alias_db.c +++ b/freebsd/sys/netinet/libalias/alias_db.c @@ -553,10 +553,6 @@ static void IncrementalCleanup(struct libalias *); static void DeleteLink(struct alias_link *); -static struct alias_link * -AddLink(struct libalias *, struct in_addr, struct in_addr, struct in_addr, - u_short, u_short, int, int); - static struct alias_link * ReLink(struct alias_link *, struct in_addr, struct in_addr, struct in_addr, @@ -574,9 +570,6 @@ static struct alias_link * #define ALIAS_PORT_MASK_EVEN 0x07ffe #define GET_NEW_PORT_MAX_ATTEMPTS 20 -#define GET_ALIAS_PORT -1 -#define GET_ALIAS_ID GET_ALIAS_PORT - #define FIND_EVEN_ALIAS_BASE 1 /* GetNewPort() allocates port numbers. Note that if a port number @@ -939,17 +932,12 @@ DeleteLink(struct alias_link *lnk) } -static struct alias_link * -AddLink(struct libalias *la, struct in_addr src_addr, - struct in_addr dst_addr, - struct in_addr alias_addr, - u_short src_port, - u_short dst_port, - int alias_port_param, /* if less than zero, alias */ - int link_type) -{ /* port will be automatically *//* chosen. - * If greater than */ - u_int start_point; /* zero, equal to alias port */ +struct alias_link * +AddLink(struct libalias *la, struct in_addr src_addr, struct in_addr dst_addr, + struct in_addr alias_addr, u_short src_port, u_short dst_port, + int alias_port_param, int link_type) +{ + u_int start_point; struct alias_link *lnk; LIBALIAS_LOCK_ASSERT(la); @@ -2494,9 +2482,18 @@ LibAliasInit(struct libalias *la) #endif if (la == NULL) { +#ifdef _KERNEL +#ifndef __rtems__ +#undef malloc /* XXX: ugly */ + la = malloc(sizeof *la, M_ALIAS, M_WAITOK | M_ZERO); +#else /* __rtems__ */ + la = _bsd_malloc(sizeof *la, M_ALIAS, M_WAITOK | M_ZERO); +#endif /* __rtems__ */ +#else la = calloc(sizeof *la, 1); if (la == NULL) return (la); +#endif #ifndef _KERNEL /* kernel cleans up on module unload */ if (LIST_EMPTY(&instancehead)) diff --git a/freebsd/sys/netinet/libalias/alias_ftp.c b/freebsd/sys/netinet/libalias/alias_ftp.c index c8a09413..43411d41 100644 --- a/freebsd/sys/netinet/libalias/alias_ftp.c +++ b/freebsd/sys/netinet/libalias/alias_ftp.c @@ -102,38 +102,68 @@ __FBSDID("$FreeBSD$"); #define FTP_CONTROL_PORT_NUMBER 21 static void -AliasHandleFtpOut(struct libalias *, struct ip *, struct alias_link *, - int maxpacketsize); +AliasHandleFtpOut(struct libalias *, struct ip *, struct alias_link *, + int maxpacketsize); +static void +AliasHandleFtpIn(struct libalias *, struct ip *, struct alias_link *); -static int -fingerprint(struct libalias *la, struct alias_data *ah) +static int +fingerprint_out(struct libalias *la, struct alias_data *ah) { - if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL || - ah->maxpktsize == 0) + if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL || + ah->maxpktsize == 0) return (-1); - if (ntohs(*ah->dport) == FTP_CONTROL_PORT_NUMBER - || ntohs(*ah->sport) == FTP_CONTROL_PORT_NUMBER) + if (ntohs(*ah->dport) == FTP_CONTROL_PORT_NUMBER || + ntohs(*ah->sport) == FTP_CONTROL_PORT_NUMBER) return (0); return (-1); } -static int -protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah) +static int +fingerprint_in(struct libalias *la, struct alias_data *ah) +{ + + if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL) + return (-1); + if (ntohs(*ah->dport) == FTP_CONTROL_PORT_NUMBER || + ntohs(*ah->sport) == FTP_CONTROL_PORT_NUMBER) + return (0); + return (-1); +} + +static int +protohandler_out(struct libalias *la, struct ip *pip, struct alias_data *ah) { - + AliasHandleFtpOut(la, pip, ah->lnk, ah->maxpktsize); return (0); } + +static int +protohandler_in(struct libalias *la, struct ip *pip, struct alias_data *ah) +{ + + AliasHandleFtpIn(la, pip, ah->lnk); + return (0); +} + struct proto_handler handlers[] = { - { - .pri = 80, - .dir = OUT, - .proto = TCP, - .fingerprint = &fingerprint, - .protohandler = &protohandler - }, + { + .pri = 80, + .dir = OUT, + .proto = TCP, + .fingerprint = &fingerprint_out, + .protohandler = &protohandler_out + }, + { + .pri = 80, + .dir = IN, + .proto = TCP, + .fingerprint = &fingerprint_in, + .protohandler = &protohandler_in + }, { EOH } }; @@ -258,6 +288,57 @@ AliasHandleFtpOut( } } +static void +AliasHandleFtpIn(struct libalias *la, + struct ip *pip, /* IP packet to examine/patch */ + struct alias_link *lnk) /* The link to go through (aliased port) */ +{ + int hlen, tlen, dlen, pflags; + char *sptr; + struct tcphdr *tc; + + /* Calculate data length of TCP packet */ + tc = (struct tcphdr *)ip_next(pip); + hlen = (pip->ip_hl + tc->th_off) << 2; + tlen = ntohs(pip->ip_len); + dlen = tlen - hlen; + + /* Place string pointer and beginning of data */ + sptr = (char *)pip; + sptr += hlen; + + /* + * Check that data length is not too long and previous message was + * properly terminated with CRLF. + */ + pflags = GetProtocolFlags(lnk); + if (dlen <= MAX_MESSAGE_SIZE && (pflags & WAIT_CRLF) == 0 && + ntohs(tc->th_dport) == FTP_CONTROL_PORT_NUMBER && + (ParseFtpPortCommand(la, sptr, dlen) != 0 || + ParseFtpEprtCommand(la, sptr, dlen) != 0)) { + /* + * Alias active mode client requesting data from server + * behind NAT. We need to alias server->client connection + * to external address client is connecting to. + */ + AddLink(la, GetOriginalAddress(lnk), la->true_addr, + GetAliasAddress(lnk), htons(FTP_CONTROL_PORT_NUMBER - 1), + htons(la->true_port), GET_ALIAS_PORT, IPPROTO_TCP); + } + /* Track the msgs which are CRLF term'd for PORT/PASV FW breach */ + if (dlen) { + sptr = (char *)pip; /* start over at beginning */ + tlen = ntohs(pip->ip_len); /* recalc tlen, pkt may + * have grown. + */ + if (sptr[tlen - 2] == '\r' && sptr[tlen - 1] == '\n') + pflags &= ~WAIT_CRLF; + else + pflags |= WAIT_CRLF; + SetProtocolFlags(lnk, pflags); + } +} + static int ParseFtpPortCommand(struct libalias *la, char *sptr, int dlen) { @@ -578,9 +659,10 @@ NewFtpMessage(struct libalias *la, struct ip *pip, if (la->true_port < IPPORT_RESERVED) return; -/* Establish link to address and port found in FTP control message. */ - ftp_lnk = FindUdpTcpOut(la, la->true_addr, GetDestAddress(lnk), - htons(la->true_port), 0, IPPROTO_TCP, 1); + /* Establish link to address and port found in FTP control message. */ + ftp_lnk = AddLink(la, la->true_addr, GetDestAddress(lnk), + GetAliasAddress(lnk), htons(la->true_port), 0, GET_ALIAS_PORT, + IPPROTO_TCP); if (ftp_lnk != NULL) { int slen, hlen, tlen, dlen; diff --git a/freebsd/sys/netinet/libalias/alias_local.h b/freebsd/sys/netinet/libalias/alias_local.h index 68839da3..307442cd 100644 --- a/freebsd/sys/netinet/libalias/alias_local.h +++ b/freebsd/sys/netinet/libalias/alias_local.h @@ -67,6 +67,9 @@ #define LINK_TABLE_OUT_SIZE 4001 #define LINK_TABLE_IN_SIZE 4001 +#define GET_ALIAS_PORT -1 +#define GET_ALIAS_ID GET_ALIAS_PORT + struct proxy_entry; struct libalias { @@ -249,6 +252,10 @@ DifferentialChecksum(u_short * _cksum, void * _new, void * _old, int _n); /* Internal data access */ struct alias_link * +AddLink(struct libalias *la, struct in_addr src_addr, struct in_addr dst_addr, + struct in_addr alias_addr, u_short src_port, u_short dst_port, + int alias_param, int link_type); +struct alias_link * FindIcmpIn(struct libalias *la, struct in_addr _dst_addr, struct in_addr _alias_addr, u_short _id_alias, int _create); struct alias_link * diff --git a/freebsd/sys/netinet/libalias/alias_mod.h b/freebsd/sys/netinet/libalias/alias_mod.h index 995f61ac..e9dfc724 100644 --- a/freebsd/sys/netinet/libalias/alias_mod.h +++ b/freebsd/sys/netinet/libalias/alias_mod.h @@ -44,6 +44,9 @@ MALLOC_DECLARE(M_ALIAS); #define calloc(x, n) malloc(x*n) #define free(x) free(x, M_ALIAS) #else /* __rtems__ */ +#undef malloc +#undef calloc +#undef free #define malloc(x) _bsd_malloc(x, M_ALIAS, M_NOWAIT|M_ZERO) #define calloc(x, n) malloc(x*n) #define free(x) _bsd_free(x, M_ALIAS) diff --git a/freebsd/sys/netinet/libalias/alias_proxy.c b/freebsd/sys/netinet/libalias/alias_proxy.c index cbd4b99b..89eb2324 100644 --- a/freebsd/sys/netinet/libalias/alias_proxy.c +++ b/freebsd/sys/netinet/libalias/alias_proxy.c @@ -212,13 +212,13 @@ RuleAdd(struct libalias *la, struct proxy_entry *entry) LIBALIAS_LOCK_ASSERT(la); + entry->la = la; if (la->proxyList == NULL) { la->proxyList = entry; entry->last = NULL; entry->next = NULL; return; } - entry->la = la; rule_index = entry->rule_index; ptr = la->proxyList; diff --git a/freebsd/sys/netinet/libalias/alias_sctp.c b/freebsd/sys/netinet/libalias/alias_sctp.c index fed592f0..ac871fc9 100644 --- a/freebsd/sys/netinet/libalias/alias_sctp.c +++ b/freebsd/sys/netinet/libalias/alias_sctp.c @@ -1931,7 +1931,7 @@ UP_process(struct libalias *la, int direction, struct sctp_nat_assoc *assoc, str * @brief Process SCTP message while association is in the process of closing * * This function waits for a SHUT-COMP to close the association. Depending on - * the the setting of sysctl_holddown_timer it may not remove the association + * the setting of sysctl_holddown_timer it may not remove the association * immediately, but leave it up until SN_X_T(la). Only SHUT-COMP, SHUT-ACK, and * ABORT packets are permitted in this state. All other packets are dropped. * diff --git a/freebsd/sys/netinet/raw_ip.c b/freebsd/sys/netinet/raw_ip.c index 408b529b..d70d8323 100644 --- a/freebsd/sys/netinet/raw_ip.c +++ b/freebsd/sys/netinet/raw_ip.c @@ -65,6 +65,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -718,7 +719,7 @@ rip_ctlinput(int cmd, struct sockaddr *sa, void *vip) /* * in_ifscrub kills the interface route. */ - in_ifscrub(ia->ia_ifp, ia); + in_ifscrub(ia->ia_ifp, ia, 0); /* * in_ifadown gets rid of all the rest of the * routes. This is not quite the right thing @@ -753,10 +754,18 @@ rip_ctlinput(int cmd, struct sockaddr *sa, void *vip) || (ifp->if_flags & IFF_POINTOPOINT)) flags |= RTF_HOST; + err = ifa_del_loopback_route((struct ifaddr *)ia, sa); + if (err == 0) + ia->ia_flags &= ~IFA_RTSELF; + err = rtinit(&ia->ia_ifa, RTM_ADD, flags); if (err == 0) ia->ia_flags |= IFA_ROUTE; + err = ifa_add_loopback_route((struct ifaddr *)ia, sa); + if (err == 0) + ia->ia_flags |= IFA_RTSELF; + ifa_free(&ia->ia_ifa); break; } @@ -1096,7 +1105,8 @@ rip_pcblist(SYSCTL_HANDLER_ARGS) return (error); } -SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD, 0, 0, +SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, + CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0, rip_pcblist, "S,xinpcb", "List of active raw IP sockets"); struct pr_usrreqs rip_usrreqs = { diff --git a/freebsd/sys/netinet/sctp.h b/freebsd/sys/netinet/sctp.h index 09e2e465..03cf86a3 100644 --- a/freebsd/sys/netinet/sctp.h +++ b/freebsd/sys/netinet/sctp.h @@ -1,15 +1,17 @@ /*- * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -27,7 +29,6 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp.h,v 1.18 2005/03/06 16:04:16 itojun Exp $ */ #include __FBSDID("$FreeBSD$"); @@ -35,6 +36,7 @@ __FBSDID("$FreeBSD$"); #ifndef _NETINET_SCTP_H_ #define _NETINET_SCTP_H_ + #include @@ -89,7 +91,7 @@ struct sctp_paramhdr { #define SCTP_PEER_ADDR_PARAMS 0x0000000a #define SCTP_DEFAULT_SEND_PARAM 0x0000000b /* ancillary data/notification interest options */ -#define SCTP_EVENTS 0x0000000c +#define SCTP_EVENTS 0x0000000c /* deprecated */ /* Without this applied we will give V4 and V6 addresses on a V6 socket */ #define SCTP_I_WANT_MAPPED_V4_ADDR 0x0000000d #define SCTP_MAXSEG 0x0000000e @@ -112,6 +114,13 @@ struct sctp_paramhdr { #define SCTP_EXPLICIT_EOR 0x0000001b #define SCTP_REUSE_PORT 0x0000001c /* rw */ #define SCTP_AUTH_DEACTIVATE_KEY 0x0000001d +#define SCTP_EVENT 0x0000001e +#define SCTP_RECVRCVINFO 0x0000001f +#define SCTP_RECVNXTINFO 0x00000020 +#define SCTP_DEFAULT_SNDINFO 0x00000021 +#define SCTP_DEFAULT_PRINFO 0x00000022 +#define SCTP_PEER_ADDR_THLDS 0x00000023 +#define SCTP_REMOTE_UDP_ENCAPS_PORT 0x00000024 /* * read-only options @@ -146,8 +155,22 @@ struct sctp_paramhdr { * field. */ -/* these should probably go into sockets API */ -#define SCTP_RESET_STREAMS 0x00001004 /* wo */ +#define SCTP_ENABLE_STREAM_RESET 0x00000900 /* struct + * sctp_assoc_value */ +#define SCTP_RESET_STREAMS 0x00000901 /* struct + * sctp_reset_streams */ +#define SCTP_RESET_ASSOC 0x00000902 /* sctp_assoc_t */ +#define SCTP_ADD_STREAMS 0x00000903 /* struct + * sctp_add_streams */ + +/* For enable stream reset */ +#define SCTP_ENABLE_RESET_STREAM_REQ 0x00000001 +#define SCTP_ENABLE_RESET_ASSOC_REQ 0x00000002 +#define SCTP_ENABLE_CHANGE_ASSOC_REQ 0x00000004 +#define SCTP_ENABLE_VALUE_MASK 0x00000007 +/* For reset streams */ +#define SCTP_STREAM_RESET_INCOMING 0x00000001 +#define SCTP_STREAM_RESET_OUTGOING 0x00000002 /* here on down are more implementation specific */ @@ -158,7 +181,11 @@ struct sctp_paramhdr { #define SCTP_CMT_USE_DAC 0x00001201 /* JRS - Pluggable Congestion Control Socket option */ #define SCTP_PLUGGABLE_CC 0x00001202 - +/* RS - Pluggable Stream Scheduling Socket option */ +#define SCTP_PLUGGABLE_SS 0x00001203 +#define SCTP_SS_VALUE 0x00001204 +#define SCTP_CC_OPTION 0x00001205 /* Options for CC + * modules */ /* read only */ #define SCTP_GET_SNDBUF_USE 0x00001101 #define SCTP_GET_STAT_LOG 0x00001103 @@ -238,8 +265,6 @@ struct sctp_paramhdr { #define SCTP_PEELOFF 0x0000800a /* the real worker for sctp_getaddrlen() */ #define SCTP_GET_ADDR_LEN 0x0000800b -/* temporary workaround for Apple listen() issue, no args used */ -#define SCTP_LISTEN_FIX 0x0000800c /* Debug things that need to be purged */ #define SCTP_SET_INITIAL_DBG_SEQ 0x00009f00 @@ -252,6 +277,35 @@ struct sctp_paramhdr { #define SCTP_CC_HSTCP 0x00000001 /* HTCP Congestion Control */ #define SCTP_CC_HTCP 0x00000002 +/* RTCC Congestion Control - RFC2581 plus */ +#define SCTP_CC_RTCC 0x00000003 + +#define SCTP_CC_OPT_RTCC_SETMODE 0x00002000 +#define SCTP_CC_OPT_USE_DCCC_ECN 0x00002001 +#define SCTP_CC_OPT_STEADY_STEP 0x00002002 + +#define SCTP_CMT_OFF 0 +#define SCTP_CMT_BASE 1 +#define SCTP_CMT_RPV1 2 +#define SCTP_CMT_RPV2 3 +#define SCTP_CMT_MPTCP 4 +#define SCTP_CMT_MAX SCTP_CMT_MPTCP + +/* RS - Supported stream scheduling modules for pluggable + * stream scheduling + */ +/* Default simple round-robin */ +#define SCTP_SS_DEFAULT 0x00000000 +/* Real round-robin */ +#define SCTP_SS_ROUND_ROBIN 0x00000001 +/* Real round-robin per packet */ +#define SCTP_SS_ROUND_ROBIN_PACKET 0x00000002 +/* Priority */ +#define SCTP_SS_PRIORITY 0x00000003 +/* Fair Bandwidth */ +#define SCTP_SS_FAIR_BANDWITH 0x00000004 +/* First-come, first-serve */ +#define SCTP_SS_FIRST_COME 0x00000005 /* fragment interleave constants @@ -398,6 +452,10 @@ struct sctp_error_unrecognized_chunk { #define SCTP_BADCRC 0x02 #define SCTP_PACKET_TRUNCATED 0x04 +/* Flag for ECN -CWR */ +#define SCTP_CWR_REDUCE_OVERRIDE 0x01 +#define SCTP_CWR_IN_SAME_WINDOW 0x02 + #define SCTP_SAT_NETWORK_MIN 400 /* min ms for RTT to set satellite * time */ #define SCTP_SAT_NETWORK_BURST_INCR 2 /* how many times to multiply maxburst @@ -442,55 +500,64 @@ struct sctp_error_unrecognized_chunk { #define SCTP_PCB_FLAGS_SOCKET_GONE 0x10000000 #define SCTP_PCB_FLAGS_SOCKET_ALLGONE 0x20000000 #define SCTP_PCB_FLAGS_SOCKET_CANT_READ 0x40000000 + /* flags to copy to new PCB */ #define SCTP_PCB_COPY_FLAGS (SCTP_PCB_FLAGS_BOUNDALL|\ SCTP_PCB_FLAGS_WAKEINPUT|\ SCTP_PCB_FLAGS_BOUND_V6) - /* * PCB Features (in sctp_features bitmask) */ -#define SCTP_PCB_FLAGS_EXT_RCVINFO 0x00000002 -#define SCTP_PCB_FLAGS_DONOT_HEARTBEAT 0x00000004 -#define SCTP_PCB_FLAGS_FRAG_INTERLEAVE 0x00000008 -#define SCTP_PCB_FLAGS_INTERLEAVE_STRMS 0x00000010 -#define SCTP_PCB_FLAGS_DO_ASCONF 0x00000020 -#define SCTP_PCB_FLAGS_AUTO_ASCONF 0x00000040 -#define SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE 0x00000080 +#define SCTP_PCB_FLAGS_DO_NOT_PMTUD 0x00000001 +#define SCTP_PCB_FLAGS_EXT_RCVINFO 0x00000002 /* deprecated */ +#define SCTP_PCB_FLAGS_DONOT_HEARTBEAT 0x00000004 +#define SCTP_PCB_FLAGS_FRAG_INTERLEAVE 0x00000008 +#define SCTP_PCB_FLAGS_INTERLEAVE_STRMS 0x00000010 +#define SCTP_PCB_FLAGS_DO_ASCONF 0x00000020 +#define SCTP_PCB_FLAGS_AUTO_ASCONF 0x00000040 +#define SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE 0x00000080 /* socket options */ -#define SCTP_PCB_FLAGS_NODELAY 0x00000100 -#define SCTP_PCB_FLAGS_AUTOCLOSE 0x00000200 -#define SCTP_PCB_FLAGS_RECVDATAIOEVNT 0x00000400 -#define SCTP_PCB_FLAGS_RECVASSOCEVNT 0x00000800 -#define SCTP_PCB_FLAGS_RECVPADDREVNT 0x00001000 -#define SCTP_PCB_FLAGS_RECVPEERERR 0x00002000 -#define SCTP_PCB_FLAGS_RECVSENDFAILEVNT 0x00004000 -#define SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT 0x00008000 -#define SCTP_PCB_FLAGS_ADAPTATIONEVNT 0x00010000 -#define SCTP_PCB_FLAGS_PDAPIEVNT 0x00020000 -#define SCTP_PCB_FLAGS_AUTHEVNT 0x00040000 -#define SCTP_PCB_FLAGS_STREAM_RESETEVNT 0x00080000 -#define SCTP_PCB_FLAGS_NO_FRAGMENT 0x00100000 -#define SCTP_PCB_FLAGS_EXPLICIT_EOR 0x00400000 -#define SCTP_PCB_FLAGS_NEEDS_MAPPED_V4 0x00800000 -#define SCTP_PCB_FLAGS_MULTIPLE_ASCONFS 0x01000000 -#define SCTP_PCB_FLAGS_PORTREUSE 0x02000000 -#define SCTP_PCB_FLAGS_DRYEVNT 0x04000000 +#define SCTP_PCB_FLAGS_NODELAY 0x00000100 +#define SCTP_PCB_FLAGS_AUTOCLOSE 0x00000200 +#define SCTP_PCB_FLAGS_RECVDATAIOEVNT 0x00000400 /* deprecated */ +#define SCTP_PCB_FLAGS_RECVASSOCEVNT 0x00000800 +#define SCTP_PCB_FLAGS_RECVPADDREVNT 0x00001000 +#define SCTP_PCB_FLAGS_RECVPEERERR 0x00002000 +#define SCTP_PCB_FLAGS_RECVSENDFAILEVNT 0x00004000 /* deprecated */ +#define SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT 0x00008000 +#define SCTP_PCB_FLAGS_ADAPTATIONEVNT 0x00010000 +#define SCTP_PCB_FLAGS_PDAPIEVNT 0x00020000 +#define SCTP_PCB_FLAGS_AUTHEVNT 0x00040000 +#define SCTP_PCB_FLAGS_STREAM_RESETEVNT 0x00080000 +#define SCTP_PCB_FLAGS_NO_FRAGMENT 0x00100000 +#define SCTP_PCB_FLAGS_EXPLICIT_EOR 0x00400000 +#define SCTP_PCB_FLAGS_NEEDS_MAPPED_V4 0x00800000 +#define SCTP_PCB_FLAGS_MULTIPLE_ASCONFS 0x01000000 +#define SCTP_PCB_FLAGS_PORTREUSE 0x02000000 +#define SCTP_PCB_FLAGS_DRYEVNT 0x04000000 +#define SCTP_PCB_FLAGS_RECVRCVINFO 0x08000000 +#define SCTP_PCB_FLAGS_RECVNXTINFO 0x10000000 +#define SCTP_PCB_FLAGS_ASSOC_RESETEVNT 0x20000000 +#define SCTP_PCB_FLAGS_STREAM_CHANGEEVNT 0x40000000 +#define SCTP_PCB_FLAGS_RECVNSENDFAILEVNT 0x80000000 + /*- * mobility_features parameters (by micchie).Note * these features are applied against the * sctp_mobility_features flags.. not the sctp_features * flags. */ -#define SCTP_MOBILITY_BASE 0x00000001 -#define SCTP_MOBILITY_FASTHANDOFF 0x00000002 -#define SCTP_MOBILITY_PRIM_DELETED 0x00000004 +#define SCTP_MOBILITY_BASE 0x00000001 +#define SCTP_MOBILITY_FASTHANDOFF 0x00000002 +#define SCTP_MOBILITY_PRIM_DELETED 0x00000004 #define SCTP_SMALLEST_PMTU 512 /* smallest pmtu allowed when disabling PMTU * discovery */ +#undef SCTP_PACKED + #include /* This dictates the size of the packet @@ -517,7 +584,6 @@ struct sctp_error_unrecognized_chunk { #define SCTP_BLK_LOGGING_ENABLE 0x00000001 #define SCTP_CWND_MONITOR_ENABLE 0x00000002 #define SCTP_CWND_LOGGING_ENABLE 0x00000004 -#define SCTP_EARLYFR_LOGGING_ENABLE 0x00000010 #define SCTP_FLIGHT_LOGGING_ENABLE 0x00000020 #define SCTP_FR_LOGGING_ENABLE 0x00000040 #define SCTP_LOCK_LOGGING_ENABLE 0x00000080 @@ -525,25 +591,22 @@ struct sctp_error_unrecognized_chunk { #define SCTP_MBCNT_LOGGING_ENABLE 0x00000200 #define SCTP_MBUF_LOGGING_ENABLE 0x00000400 #define SCTP_NAGLE_LOGGING_ENABLE 0x00000800 -#define SCTP_RECV_RWND_LOGGING_ENABLE 0x00001000 +#define SCTP_RECV_RWND_LOGGING_ENABLE 0x00001000 #define SCTP_RTTVAR_LOGGING_ENABLE 0x00002000 #define SCTP_SACK_LOGGING_ENABLE 0x00004000 -#define SCTP_SACK_RWND_LOGGING_ENABLE 0x00008000 +#define SCTP_SACK_RWND_LOGGING_ENABLE 0x00008000 #define SCTP_SB_LOGGING_ENABLE 0x00010000 #define SCTP_STR_LOGGING_ENABLE 0x00020000 #define SCTP_WAKE_LOGGING_ENABLE 0x00040000 #define SCTP_LOG_MAXBURST_ENABLE 0x00080000 #define SCTP_LOG_RWND_ENABLE 0x00100000 -#define SCTP_LOG_SACK_ARRIVALS_ENABLE 0x00200000 -#define SCTP_LTRACE_CHUNK_ENABLE 0x00400000 -#define SCTP_LTRACE_ERROR_ENABLE 0x00800000 -#define SCTP_LAST_PACKET_TRACING 0x01000000 -#define SCTP_THRESHOLD_LOGGING 0x02000000 -#define SCTP_LOG_AT_SEND_2_SCTP 0x04000000 -#define SCTP_LOG_AT_SEND_2_OUTQ 0x08000000 -#define SCTP_LOG_TRY_ADVANCE 0x10000000 - - -#undef SCTP_PACKED +#define SCTP_LOG_SACK_ARRIVALS_ENABLE 0x00200000 +#define SCTP_LTRACE_CHUNK_ENABLE 0x00400000 +#define SCTP_LTRACE_ERROR_ENABLE 0x00800000 +#define SCTP_LAST_PACKET_TRACING 0x01000000 +#define SCTP_THRESHOLD_LOGGING 0x02000000 +#define SCTP_LOG_AT_SEND_2_SCTP 0x04000000 +#define SCTP_LOG_AT_SEND_2_OUTQ 0x08000000 +#define SCTP_LOG_TRY_ADVANCE 0x10000000 #endif /* !_NETINET_SCTP_H_ */ diff --git a/freebsd/sys/netinet/sctp_asconf.c b/freebsd/sys/netinet/sctp_asconf.c index 3c24b75d..3e51ed72 100644 --- a/freebsd/sys/netinet/sctp_asconf.c +++ b/freebsd/sys/netinet/sctp_asconf.c @@ -2,16 +2,18 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -30,10 +32,9 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp_asconf.c,v 1.24 2005/03/06 16:04:16 itojun Exp $ */ - #include __FBSDID("$FreeBSD$"); + #include #include #include @@ -49,53 +50,10 @@ __FBSDID("$FreeBSD$"); * SCTP_DEBUG_ASCONF1: protocol info, general info and errors * SCTP_DEBUG_ASCONF2: detailed info */ -#ifdef SCTP_DEBUG -#endif /* SCTP_DEBUG */ - - -static void -sctp_asconf_get_source_ip(struct mbuf *m, struct sockaddr *sa) -{ - struct ip *iph; - struct sockaddr_in *sin; - -#ifdef INET6 - struct sockaddr_in6 *sin6; - -#endif - - iph = mtod(m, struct ip *); - if (iph->ip_v == IPVERSION) { - /* IPv4 source */ - sin = (struct sockaddr_in *)sa; - bzero(sin, sizeof(*sin)); - sin->sin_family = AF_INET; - sin->sin_len = sizeof(struct sockaddr_in); - sin->sin_port = 0; - sin->sin_addr.s_addr = iph->ip_src.s_addr; - return; - } -#ifdef INET6 - else if (iph->ip_v == (IPV6_VERSION >> 4)) { - /* IPv6 source */ - struct ip6_hdr *ip6; - sin6 = (struct sockaddr_in6 *)sa; - bzero(sin6, sizeof(*sin6)); - sin6->sin6_family = AF_INET6; - sin6->sin6_len = sizeof(struct sockaddr_in6); - sin6->sin6_port = 0; - ip6 = mtod(m, struct ip6_hdr *); - sin6->sin6_addr = ip6->ip6_src; - return; - } -#endif /* INET6 */ - else - return; -} /* - * draft-ietf-tsvwg-addip-sctp + * RFC 5061 * * An ASCONF parameter queue exists per asoc which holds the pending address * operations. Lists are updated upon receipt of ASCONF-ACK. @@ -128,7 +86,7 @@ sctp_asconf_success_response(uint32_t id) if (m_reply == NULL) { SCTPDBG(SCTP_DEBUG_ASCONF1, "asconf_success_response: couldn't get mbuf!\n"); - return NULL; + return (NULL); } aph = mtod(m_reply, struct sctp_asconf_paramhdr *); aph->correlation_id = id; @@ -137,7 +95,7 @@ sctp_asconf_success_response(uint32_t id) SCTP_BUF_LEN(m_reply) = aph->ph.param_length; aph->ph.param_length = htons(aph->ph.param_length); - return m_reply; + return (m_reply); } static struct mbuf * @@ -156,7 +114,7 @@ sctp_asconf_error_response(uint32_t id, uint16_t cause, uint8_t * error_tlv, if (m_reply == NULL) { SCTPDBG(SCTP_DEBUG_ASCONF1, "asconf_error_response: couldn't get mbuf!\n"); - return NULL; + return (NULL); } aph = mtod(m_reply, struct sctp_asconf_paramhdr *); error = (struct sctp_error_cause *)(aph + 1); @@ -173,7 +131,7 @@ sctp_asconf_error_response(uint32_t id, uint16_t cause, uint8_t * error_tlv, "asconf_error_response: tlv_length (%xh) too big\n", tlv_length); sctp_m_freem(m_reply); /* discard */ - return NULL; + return (NULL); } if (error_tlv != NULL) { tlv = (uint8_t *) (error + 1); @@ -183,59 +141,70 @@ sctp_asconf_error_response(uint32_t id, uint16_t cause, uint8_t * error_tlv, error->length = htons(error->length); aph->ph.param_length = htons(aph->ph.param_length); - return m_reply; + return (m_reply); } static struct mbuf * -sctp_process_asconf_add_ip(struct mbuf *m, struct sctp_asconf_paramhdr *aph, - struct sctp_tcb *stcb, int response_required) +sctp_process_asconf_add_ip(struct sockaddr *src, struct sctp_asconf_paramhdr *aph, + struct sctp_tcb *stcb, int send_hb, int response_required) { + struct sctp_nets *net; struct mbuf *m_reply = NULL; - struct sockaddr_storage sa_source, sa_store; - struct sctp_ipv4addr_param *v4addr; + struct sockaddr_storage sa_store; + struct sctp_paramhdr *ph; uint16_t param_type, param_length, aparam_length; struct sockaddr *sa; - struct sockaddr_in *sin; int zero_address = 0; + int bad_address = 0; + +#ifdef INET + struct sockaddr_in *sin; + struct sctp_ipv4addr_param *v4addr; +#endif #ifdef INET6 struct sockaddr_in6 *sin6; struct sctp_ipv6addr_param *v6addr; -#endif /* INET6 */ +#endif aparam_length = ntohs(aph->ph.param_length); - v4addr = (struct sctp_ipv4addr_param *)(aph + 1); -#ifdef INET6 - v6addr = (struct sctp_ipv6addr_param *)(aph + 1); -#endif /* INET6 */ - param_type = ntohs(v4addr->ph.param_type); - param_length = ntohs(v4addr->ph.param_length); + ph = (struct sctp_paramhdr *)(aph + 1); + param_type = ntohs(ph->param_type); + param_length = ntohs(ph->param_length); sa = (struct sockaddr *)&sa_store; switch (param_type) { +#ifdef INET case SCTP_IPV4_ADDRESS: if (param_length != sizeof(struct sctp_ipv4addr_param)) { /* invalid param size */ - return NULL; + return (NULL); } + v4addr = (struct sctp_ipv4addr_param *)ph; sin = (struct sockaddr_in *)&sa_store; bzero(sin, sizeof(*sin)); sin->sin_family = AF_INET; sin->sin_len = sizeof(struct sockaddr_in); sin->sin_port = stcb->rport; sin->sin_addr.s_addr = v4addr->addr; + if ((sin->sin_addr.s_addr == INADDR_BROADCAST) || + IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { + bad_address = 1; + } if (sin->sin_addr.s_addr == INADDR_ANY) zero_address = 1; SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_add_ip: adding "); SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa); break; - case SCTP_IPV6_ADDRESS: +#endif #ifdef INET6 + case SCTP_IPV6_ADDRESS: if (param_length != sizeof(struct sctp_ipv6addr_param)) { /* invalid param size */ - return NULL; + return (NULL); } + v6addr = (struct sctp_ipv6addr_param *)ph; sin6 = (struct sockaddr_in6 *)&sa_store; bzero(sin6, sizeof(*sin6)); sin6->sin6_family = AF_INET6; @@ -243,38 +212,35 @@ sctp_process_asconf_add_ip(struct mbuf *m, struct sctp_asconf_paramhdr *aph, sin6->sin6_port = stcb->rport; memcpy((caddr_t)&sin6->sin6_addr, v6addr->addr, sizeof(struct in6_addr)); + if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { + bad_address = 1; + } if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) zero_address = 1; SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_add_ip: adding "); SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa); -#else - /* IPv6 not enabled! */ - /* FIX ME: currently sends back an invalid param error */ - m_reply = sctp_asconf_error_response(aph->correlation_id, - SCTP_CAUSE_INVALID_PARAM, (uint8_t *) aph, aparam_length); - SCTPDBG(SCTP_DEBUG_ASCONF1, - "process_asconf_add_ip: v6 disabled- skipping "); - SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa); - return m_reply; -#endif break; +#endif default: m_reply = sctp_asconf_error_response(aph->correlation_id, - SCTP_CAUSE_UNRESOLVABLE_ADDR, (uint8_t *) aph, + SCTP_CAUSE_INVALID_PARAM, (uint8_t *) aph, aparam_length); - return m_reply; + return (m_reply); } /* end switch */ /* if 0.0.0.0/::0, add the source address instead */ if (zero_address && SCTP_BASE_SYSCTL(sctp_nat_friendly)) { - sa = (struct sockaddr *)&sa_source; - sctp_asconf_get_source_ip(m, sa); + sa = src; SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_add_ip: using source addr "); - SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa); + SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, src); } /* add the address */ - if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, + if (bad_address) { + m_reply = sctp_asconf_error_response(aph->correlation_id, + SCTP_CAUSE_INVALID_PARAM, (uint8_t *) aph, + aparam_length); + } else if (sctp_add_remote_addr(stcb, sa, &net, SCTP_DONOT_SETSCOPE, SCTP_ADDR_DYNAMIC_ADDED) != 0) { SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_add_ip: error adding address\n"); @@ -288,13 +254,14 @@ sctp_process_asconf_add_ip(struct mbuf *m, struct sctp_asconf_paramhdr *aph, m_reply = sctp_asconf_success_response(aph->correlation_id); } - sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, - NULL, SCTP_FROM_SCTP_ASCONF + SCTP_LOC_1); + sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, stcb->sctp_ep, stcb, net); sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, - stcb, NULL); + stcb, net); + if (send_hb) { + sctp_send_hb(stcb, net, SCTP_SO_NOT_LOCKED); + } } - - return m_reply; + return (m_reply); } static int @@ -306,7 +273,7 @@ sctp_asconf_del_remote_addrs_except(struct sctp_tcb *stcb, struct sockaddr *src) src_net = sctp_findnet(stcb, src); if (src_net == NULL) { /* not found */ - return -1; + return (-1); } /* delete all destination addresses except the source */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { @@ -322,46 +289,47 @@ sctp_asconf_del_remote_addrs_except(struct sctp_tcb *stcb, struct sockaddr *src) (struct sockaddr *)&net->ro._l_addr, SCTP_SO_NOT_LOCKED); } } - return 0; + return (0); } static struct mbuf * -sctp_process_asconf_delete_ip(struct mbuf *m, struct sctp_asconf_paramhdr *aph, +sctp_process_asconf_delete_ip(struct sockaddr *src, + struct sctp_asconf_paramhdr *aph, struct sctp_tcb *stcb, int response_required) { struct mbuf *m_reply = NULL; - struct sockaddr_storage sa_source, sa_store; - struct sctp_ipv4addr_param *v4addr; + struct sockaddr_storage sa_store; + struct sctp_paramhdr *ph; uint16_t param_type, param_length, aparam_length; struct sockaddr *sa; - struct sockaddr_in *sin; int zero_address = 0; int result; +#ifdef INET + struct sockaddr_in *sin; + struct sctp_ipv4addr_param *v4addr; + +#endif #ifdef INET6 struct sockaddr_in6 *sin6; struct sctp_ipv6addr_param *v6addr; -#endif /* INET6 */ - - /* get the source IP address for src and 0.0.0.0/::0 delete checks */ - sctp_asconf_get_source_ip(m, (struct sockaddr *)&sa_source); +#endif aparam_length = ntohs(aph->ph.param_length); - v4addr = (struct sctp_ipv4addr_param *)(aph + 1); -#ifdef INET6 - v6addr = (struct sctp_ipv6addr_param *)(aph + 1); -#endif /* INET6 */ - param_type = ntohs(v4addr->ph.param_type); - param_length = ntohs(v4addr->ph.param_length); + ph = (struct sctp_paramhdr *)(aph + 1); + param_type = ntohs(ph->param_type); + param_length = ntohs(ph->param_length); sa = (struct sockaddr *)&sa_store; switch (param_type) { +#ifdef INET case SCTP_IPV4_ADDRESS: if (param_length != sizeof(struct sctp_ipv4addr_param)) { /* invalid param size */ - return NULL; + return (NULL); } + v4addr = (struct sctp_ipv4addr_param *)ph; sin = (struct sockaddr_in *)&sa_store; bzero(sin, sizeof(*sin)); sin->sin_family = AF_INET; @@ -374,12 +342,14 @@ sctp_process_asconf_delete_ip(struct mbuf *m, struct sctp_asconf_paramhdr *aph, "process_asconf_delete_ip: deleting "); SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa); break; +#endif +#ifdef INET6 case SCTP_IPV6_ADDRESS: if (param_length != sizeof(struct sctp_ipv6addr_param)) { /* invalid param size */ - return NULL; + return (NULL); } -#ifdef INET6 + v6addr = (struct sctp_ipv6addr_param *)ph; sin6 = (struct sockaddr_in6 *)&sa_store; bzero(sin6, sizeof(*sin6)); sin6->sin6_family = AF_INET6; @@ -392,35 +362,27 @@ sctp_process_asconf_delete_ip(struct mbuf *m, struct sctp_asconf_paramhdr *aph, SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_delete_ip: deleting "); SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa); -#else - /* IPv6 not enabled! No "action" needed; just ack it */ - SCTPDBG(SCTP_DEBUG_ASCONF1, - "process_asconf_delete_ip: v6 disabled- ignoring: "); - SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa); - /* just respond with a "success" ASCONF-ACK */ - return NULL; -#endif break; +#endif default: m_reply = sctp_asconf_error_response(aph->correlation_id, SCTP_CAUSE_UNRESOLVABLE_ADDR, (uint8_t *) aph, aparam_length); - return m_reply; + return (m_reply); } /* make sure the source address is not being deleted */ - if (sctp_cmpaddr(sa, (struct sockaddr *)&sa_source)) { + if (sctp_cmpaddr(sa, src)) { /* trying to delete the source address! */ SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_delete_ip: tried to delete source addr\n"); m_reply = sctp_asconf_error_response(aph->correlation_id, SCTP_CAUSE_DELETING_SRC_ADDR, (uint8_t *) aph, aparam_length); - return m_reply; + return (m_reply); } /* if deleting 0.0.0.0/::0, delete all addresses except src addr */ if (zero_address && SCTP_BASE_SYSCTL(sctp_nat_friendly)) { - result = sctp_asconf_del_remote_addrs_except(stcb, - (struct sockaddr *)&sa_source); + result = sctp_asconf_del_remote_addrs_except(stcb, src); if (result) { /* src address did not exist? */ @@ -434,7 +396,7 @@ sctp_process_asconf_delete_ip(struct mbuf *m, struct sctp_asconf_paramhdr *aph, m_reply = sctp_asconf_success_response(aph->correlation_id); } - return m_reply; + return (m_reply); } /* delete the address */ result = sctp_del_remote_addr(stcb, sa); @@ -456,43 +418,46 @@ sctp_process_asconf_delete_ip(struct mbuf *m, struct sctp_asconf_paramhdr *aph, /* notify upper layer */ sctp_ulp_notify(SCTP_NOTIFY_ASCONF_DELETE_IP, stcb, 0, sa, SCTP_SO_NOT_LOCKED); } - return m_reply; + return (m_reply); } static struct mbuf * -sctp_process_asconf_set_primary(struct mbuf *m, +sctp_process_asconf_set_primary(struct sockaddr *src, struct sctp_asconf_paramhdr *aph, struct sctp_tcb *stcb, int response_required) { struct mbuf *m_reply = NULL; - struct sockaddr_storage sa_source, sa_store; - struct sctp_ipv4addr_param *v4addr; + struct sockaddr_storage sa_store; + struct sctp_paramhdr *ph; uint16_t param_type, param_length, aparam_length; struct sockaddr *sa; - struct sockaddr_in *sin; int zero_address = 0; +#ifdef INET + struct sockaddr_in *sin; + struct sctp_ipv4addr_param *v4addr; + +#endif #ifdef INET6 struct sockaddr_in6 *sin6; struct sctp_ipv6addr_param *v6addr; -#endif /* INET6 */ +#endif aparam_length = ntohs(aph->ph.param_length); - v4addr = (struct sctp_ipv4addr_param *)(aph + 1); -#ifdef INET6 - v6addr = (struct sctp_ipv6addr_param *)(aph + 1); -#endif /* INET6 */ - param_type = ntohs(v4addr->ph.param_type); - param_length = ntohs(v4addr->ph.param_length); + ph = (struct sctp_paramhdr *)(aph + 1); + param_type = ntohs(ph->param_type); + param_length = ntohs(ph->param_length); sa = (struct sockaddr *)&sa_store; switch (param_type) { +#ifdef INET case SCTP_IPV4_ADDRESS: if (param_length != sizeof(struct sctp_ipv4addr_param)) { /* invalid param size */ - return NULL; + return (NULL); } + v4addr = (struct sctp_ipv4addr_param *)ph; sin = (struct sockaddr_in *)&sa_store; bzero(sin, sizeof(*sin)); sin->sin_family = AF_INET; @@ -503,12 +468,14 @@ sctp_process_asconf_set_primary(struct mbuf *m, SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_set_primary: "); SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa); break; +#endif +#ifdef INET6 case SCTP_IPV6_ADDRESS: if (param_length != sizeof(struct sctp_ipv6addr_param)) { /* invalid param size */ - return NULL; + return (NULL); } -#ifdef INET6 + v6addr = (struct sctp_ipv6addr_param *)ph; sin6 = (struct sockaddr_in6 *)&sa_store; bzero(sin6, sizeof(*sin6)); sin6->sin6_family = AF_INET6; @@ -519,29 +486,21 @@ sctp_process_asconf_set_primary(struct mbuf *m, zero_address = 1; SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_set_primary: "); SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa); -#else - /* IPv6 not enabled! No "action" needed; just ack it */ - SCTPDBG(SCTP_DEBUG_ASCONF1, - "process_asconf_set_primary: v6 disabled- ignoring: "); - SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa); - /* just respond with a "success" ASCONF-ACK */ - return NULL; -#endif break; +#endif default: m_reply = sctp_asconf_error_response(aph->correlation_id, SCTP_CAUSE_UNRESOLVABLE_ADDR, (uint8_t *) aph, aparam_length); - return m_reply; + return (m_reply); } /* if 0.0.0.0/::0, use the source address instead */ if (zero_address && SCTP_BASE_SYSCTL(sctp_nat_friendly)) { - sa = (struct sockaddr *)&sa_source; - sctp_asconf_get_source_ip(m, sa); + sa = src; SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_set_primary: using source addr "); - SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa); + SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, src); } /* set the primary address */ if (sctp_set_primary_addr(stcb, sa, NULL) == 0) { @@ -549,7 +508,12 @@ sctp_process_asconf_set_primary(struct mbuf *m, "process_asconf_set_primary: primary address set\n"); /* notify upper layer */ sctp_ulp_notify(SCTP_NOTIFY_ASCONF_SET_PRIMARY, stcb, 0, sa, SCTP_SO_NOT_LOCKED); - + if ((stcb->asoc.primary_destination->dest_state & SCTP_ADDR_REACHABLE) && + (!(stcb->asoc.primary_destination->dest_state & SCTP_ADDR_PF)) && + (stcb->asoc.alternate)) { + sctp_free_remote_addr(stcb->asoc.alternate); + stcb->asoc.alternate = NULL; + } if (response_required) { m_reply = sctp_asconf_success_response(aph->correlation_id); } @@ -599,7 +563,7 @@ sctp_process_asconf_set_primary(struct mbuf *m, aparam_length); } - return m_reply; + return (m_reply); } /* @@ -608,6 +572,7 @@ sctp_process_asconf_set_primary(struct mbuf *m, */ void sctp_handle_asconf(struct mbuf *m, unsigned int offset, + struct sockaddr *src, struct sctp_asconf_chunk *cp, struct sctp_tcb *stcb, int first) { @@ -617,7 +582,7 @@ sctp_handle_asconf(struct mbuf *m, unsigned int offset, struct sctp_asconf_ack_chunk *ack_cp; struct sctp_asconf_paramhdr *aph, *ack_aph; struct sctp_ipv6addr_param *p_addr; - unsigned int asconf_limit; + unsigned int asconf_limit, cnt; int error = 0; /* did an error occur? */ /* asconf param buffer */ @@ -634,8 +599,7 @@ sctp_handle_asconf(struct mbuf *m, unsigned int offset, asoc = &stcb->asoc; serial_num = ntohl(cp->serial_number); - if (compare_with_wrap(asoc->asconf_seq_in, serial_num, MAX_SEQ) || - serial_num == asoc->asconf_seq_in) { + if (SCTP_TSN_GE(asoc->asconf_seq_in, serial_num)) { /* got a duplicate ASCONF */ SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: got duplicate serial number = %xh\n", @@ -656,21 +620,18 @@ sctp_handle_asconf(struct mbuf *m, unsigned int offset, if (first) { /* delete old cache */ - SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: Now processing firstASCONF. Try to delte old cache\n"); + SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: Now processing first ASCONF. Try to delete old cache\n"); - ack = TAILQ_FIRST(&stcb->asoc.asconf_ack_sent); - while (ack != NULL) { - ack_next = TAILQ_NEXT(ack, next); + TAILQ_FOREACH_SAFE(ack, &asoc->asconf_ack_sent, next, ack_next) { if (ack->serial_number == serial_num) break; SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: delete old(%u) < first(%u)\n", ack->serial_number, serial_num); - TAILQ_REMOVE(&stcb->asoc.asconf_ack_sent, ack, next); + TAILQ_REMOVE(&asoc->asconf_ack_sent, ack, next); if (ack->data != NULL) { sctp_m_freem(ack->data); } SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asconf_ack), ack); - ack = ack_next; } } m_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_ack_chunk), 0, @@ -716,6 +677,7 @@ sctp_handle_asconf(struct mbuf *m, unsigned int offset, goto send_reply; } /* process through all parameters */ + cnt = 0; while (aph != NULL) { unsigned int param_length, param_type; @@ -747,12 +709,13 @@ sctp_handle_asconf(struct mbuf *m, unsigned int offset, switch (param_type) { case SCTP_ADD_IP_ADDRESS: asoc->peer_supports_asconf = 1; - m_result = sctp_process_asconf_add_ip(m, aph, stcb, - error); + m_result = sctp_process_asconf_add_ip(src, aph, stcb, + (cnt < SCTP_BASE_SYSCTL(sctp_hb_maxburst)), error); + cnt++; break; case SCTP_DEL_IP_ADDRESS: asoc->peer_supports_asconf = 1; - m_result = sctp_process_asconf_delete_ip(m, aph, stcb, + m_result = sctp_process_asconf_delete_ip(src, aph, stcb, error); break; case SCTP_ERROR_CAUSE_IND: @@ -760,7 +723,7 @@ sctp_handle_asconf(struct mbuf *m, unsigned int offset, break; case SCTP_SET_PRIM_ADDR: asoc->peer_supports_asconf = 1; - m_result = sctp_process_asconf_set_primary(m, aph, + m_result = sctp_process_asconf_set_primary(src, aph, stcb, error); break; case SCTP_NAT_VTAGS: @@ -829,10 +792,8 @@ send_reply: ack->last_sent_to = NULL; ack->data = m_ack; ack->len = 0; - n = m_ack; - while (n) { + for (n = m_ack; n != NULL; n = SCTP_BUF_NEXT(n)) { ack->len += SCTP_BUF_LEN(n); - n = SCTP_BUF_NEXT(n); } TAILQ_INSERT_TAIL(&stcb->asoc.asconf_ack_sent, ack, next); @@ -842,67 +803,16 @@ send_reply: * this could happen if the source address was just newly * added */ - struct ip *iph; - struct sctphdr *sh; - struct sockaddr_storage from_store; - struct sockaddr *from = (struct sockaddr *)&from_store; - SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: looking up net for IP source address\n"); - /* pullup already done, IP options already stripped */ - iph = mtod(m, struct ip *); - sh = (struct sctphdr *)((caddr_t)iph + sizeof(*iph)); - switch (iph->ip_v) { - case IPVERSION: - { - struct sockaddr_in *from4; - - from4 = (struct sockaddr_in *)&from_store; - bzero(from4, sizeof(*from4)); - from4->sin_family = AF_INET; - from4->sin_len = sizeof(struct sockaddr_in); - from4->sin_addr.s_addr = iph->ip_src.s_addr; - from4->sin_port = sh->src_port; - break; - } -#ifdef INET6 - case IPV6_VERSION >> 4: - { - struct ip6_hdr *ip6; - struct sockaddr_in6 *from6; - - ip6 = mtod(m, struct ip6_hdr *); - from6 = (struct sockaddr_in6 *)&from_store; - bzero(from6, sizeof(*from6)); - from6->sin6_family = AF_INET6; - from6->sin6_len = sizeof(struct sockaddr_in6); - from6->sin6_addr = ip6->ip6_src; - from6->sin6_port = sh->src_port; - /* - * Get the scopes in properly to the sin6 - * addr's - */ - /* we probably don't need these operations */ - (void)sa6_recoverscope(from6); - sa6_embedscope(from6, - MODULE_GLOBAL(ip6_use_defzone)); - - break; - } -#endif - default: - /* unknown address type */ - from = NULL; - } - if (from != NULL) { - SCTPDBG(SCTP_DEBUG_ASCONF1, "Looking for IP source: "); - SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, from); - /* look up the from address */ - stcb->asoc.last_control_chunk_from = sctp_findnet(stcb, from); + SCTPDBG(SCTP_DEBUG_ASCONF1, "Looking for IP source: "); + SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, src); + /* look up the from address */ + stcb->asoc.last_control_chunk_from = sctp_findnet(stcb, src); #ifdef SCTP_DEBUG - if (stcb->asoc.last_control_chunk_from == NULL) - SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: IP source address not found?!\n"); -#endif + if (stcb->asoc.last_control_chunk_from == NULL) { + SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: IP source address not found?!\n"); } +#endif } } @@ -912,28 +822,36 @@ send_reply: static uint32_t sctp_asconf_addr_match(struct sctp_asconf_addr *aa, struct sockaddr *sa) { + switch (sa->sa_family) { #ifdef INET6 - if (sa->sa_family == AF_INET6) { - /* IPv6 sa address */ - /* XXX scopeid */ - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; - - if ((aa->ap.addrp.ph.param_type == SCTP_IPV6_ADDRESS) && - (memcmp(&aa->ap.addrp.addr, &sin6->sin6_addr, - sizeof(struct in6_addr)) == 0)) { - return (1); + case AF_INET6: + { + /* XXX scopeid */ + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; + + if ((aa->ap.addrp.ph.param_type == SCTP_IPV6_ADDRESS) && + (memcmp(&aa->ap.addrp.addr, &sin6->sin6_addr, + sizeof(struct in6_addr)) == 0)) { + return (1); + } + break; } - } else -#endif /* INET6 */ - if (sa->sa_family == AF_INET) { - /* IPv4 sa address */ - struct sockaddr_in *sin = (struct sockaddr_in *)sa; +#endif +#ifdef INET + case AF_INET: + { + struct sockaddr_in *sin = (struct sockaddr_in *)sa; - if ((aa->ap.addrp.ph.param_type == SCTP_IPV4_ADDRESS) && - (memcmp(&aa->ap.addrp.addr, &sin->sin_addr, - sizeof(struct in_addr)) == 0)) { - return (1); + if ((aa->ap.addrp.ph.param_type == SCTP_IPV4_ADDRESS) && + (memcmp(&aa->ap.addrp.addr, &sin->sin_addr, + sizeof(struct in_addr)) == 0)) { + return (1); + } + break; } +#endif + default: + break; } return (0); } @@ -942,43 +860,48 @@ sctp_asconf_addr_match(struct sctp_asconf_addr *aa, struct sockaddr *sa) * does the address match? returns 0 if not, 1 if so */ static uint32_t -sctp_addr_match( - struct sctp_ipv6addr_param *v6addr, - struct sockaddr *sa) +sctp_addr_match(struct sctp_paramhdr *ph, struct sockaddr *sa) { uint16_t param_type, param_length; - struct sctp_ipv4addr_param *v4addr = (struct sctp_ipv4addr_param *)v6addr; + param_type = ntohs(ph->param_type); + param_length = ntohs(ph->param_length); + switch (sa->sa_family) { #ifdef INET6 - if (sa->sa_family == AF_INET6) { - /* IPv6 sa address */ - /* XXX scopeid */ - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; - - param_type = ntohs(v6addr->ph.param_type); - param_length = ntohs(v6addr->ph.param_length); - - if ((param_type == SCTP_IPV6_ADDRESS) && - param_length == sizeof(struct sctp_ipv6addr_param) && - (memcmp(&v6addr->addr, &sin6->sin6_addr, - sizeof(struct in6_addr)) == 0)) { - return (1); + case AF_INET6: + { + /* XXX scopeid */ + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; + struct sctp_ipv6addr_param *v6addr; + + v6addr = (struct sctp_ipv6addr_param *)ph; + if ((param_type == SCTP_IPV6_ADDRESS) && + param_length == sizeof(struct sctp_ipv6addr_param) && + (memcmp(&v6addr->addr, &sin6->sin6_addr, + sizeof(struct in6_addr)) == 0)) { + return (1); + } + break; } - } #endif - if (sa->sa_family == AF_INET) { - /* IPv4 sa address */ - struct sockaddr_in *sin = (struct sockaddr_in *)sa; - - param_type = ntohs(v4addr->ph.param_type); - param_length = ntohs(v4addr->ph.param_length); - - if ((param_type == SCTP_IPV4_ADDRESS) && - param_length == sizeof(struct sctp_ipv4addr_param) && - (memcmp(&v4addr->addr, &sin->sin_addr, - sizeof(struct in_addr)) == 0)) { - return (1); +#ifdef INET + case AF_INET: + { + struct sockaddr_in *sin = (struct sockaddr_in *)sa; + struct sctp_ipv4addr_param *v4addr; + + v4addr = (struct sctp_ipv4addr_param *)ph; + if ((param_type == SCTP_IPV4_ADDRESS) && + param_length == sizeof(struct sctp_ipv4addr_param) && + (memcmp(&v4addr->addr, &sin->sin_addr, + sizeof(struct in_addr)) == 0)) { + return (1); + } + break; } +#endif + default: + break; } return (0); } @@ -1183,17 +1106,25 @@ sctp_path_check_and_react(struct sctp_tcb *stcb, struct sctp_ifa *newifa) continue; changed = 0; - if (net->ro._l_addr.sa.sa_family == AF_INET) { - if (sctp_v4src_match_nexthop(newifa, (sctp_route_t *) & net->ro)) + switch (net->ro._l_addr.sa.sa_family) { +#ifdef INET + case AF_INET: + if (sctp_v4src_match_nexthop(newifa, (sctp_route_t *) & net->ro)) { changed = 1; - } + } + break; +#endif #ifdef INET6 - if (net->ro._l_addr.sa.sa_family == AF_INET6) { + case AF_INET6: if (sctp_v6src_match_nexthop( - &newifa->address.sin6, (sctp_route_t *) & net->ro)) + &newifa->address.sin6, (sctp_route_t *) & net->ro)) { changed = 1; - } + } + break; #endif + default: + break; + } /* * if the newly added address does not relate routing * information, we skip. @@ -1220,8 +1151,7 @@ sctp_path_check_and_react(struct sctp_tcb *stcb, struct sctp_ifa *newifa) * flag: 1=success, 0=failure. */ static void -sctp_asconf_addr_mgmt_ack(struct sctp_tcb *stcb, struct sctp_ifa *addr, - uint16_t type, uint32_t flag) +sctp_asconf_addr_mgmt_ack(struct sctp_tcb *stcb, struct sctp_ifa *addr, uint32_t flag) { /* * do the necessary asoc list work- if we get a failure indication, @@ -1266,9 +1196,7 @@ sctp_asconf_queue_mgmt(struct sctp_tcb *stcb, struct sctp_ifa *ifa, struct sockaddr *sa; /* make sure the request isn't already in the queue */ - for (aa = TAILQ_FIRST(&stcb->asoc.asconf_queue); aa != NULL; - aa = aa_next) { - aa_next = TAILQ_NEXT(aa, next); + TAILQ_FOREACH_SAFE(aa, &stcb->asoc.asconf_queue, next, aa_next) { /* address match? */ if (sctp_asconf_addr_match(aa, &ifa->address.sa) == 0) continue; @@ -1322,31 +1250,40 @@ sctp_asconf_queue_mgmt(struct sctp_tcb *stcb, struct sctp_ifa *ifa, aa->ifa = ifa; atomic_add_int(&ifa->refcount, 1); /* correlation_id filled in during send routine later... */ - if (ifa->address.sa.sa_family == AF_INET6) { - /* IPv6 address */ - struct sockaddr_in6 *sin6; + switch (ifa->address.sa.sa_family) { +#ifdef INET6 + case AF_INET6: + { + struct sockaddr_in6 *sin6; - sin6 = (struct sockaddr_in6 *)&ifa->address.sa; - sa = (struct sockaddr *)sin6; - aa->ap.addrp.ph.param_type = SCTP_IPV6_ADDRESS; - aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv6addr_param)); - aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) + - sizeof(struct sctp_ipv6addr_param); - memcpy(&aa->ap.addrp.addr, &sin6->sin6_addr, - sizeof(struct in6_addr)); - } else if (ifa->address.sa.sa_family == AF_INET) { - /* IPv4 address */ - struct sockaddr_in *sin; + sin6 = (struct sockaddr_in6 *)&ifa->address.sa; + sa = (struct sockaddr *)sin6; + aa->ap.addrp.ph.param_type = SCTP_IPV6_ADDRESS; + aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv6addr_param)); + aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) + + sizeof(struct sctp_ipv6addr_param); + memcpy(&aa->ap.addrp.addr, &sin6->sin6_addr, + sizeof(struct in6_addr)); + break; + } +#endif +#ifdef INET + case AF_INET: + { + struct sockaddr_in *sin; - sin = (struct sockaddr_in *)&ifa->address.sa; - sa = (struct sockaddr *)sin; - aa->ap.addrp.ph.param_type = SCTP_IPV4_ADDRESS; - aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv4addr_param)); - aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) + - sizeof(struct sctp_ipv4addr_param); - memcpy(&aa->ap.addrp.addr, &sin->sin_addr, - sizeof(struct in_addr)); - } else { + sin = (struct sockaddr_in *)&ifa->address.sa; + sa = (struct sockaddr *)sin; + aa->ap.addrp.ph.param_type = SCTP_IPV4_ADDRESS; + aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv4addr_param)); + aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) + + sizeof(struct sctp_ipv4addr_param); + memcpy(&aa->ap.addrp.addr, &sin->sin_addr, + sizeof(struct in_addr)); + break; + } +#endif + default: /* invalid family! */ SCTP_FREE(aa, SCTP_M_ASC_ADDR); sctp_free_ifa(ifa); @@ -1356,7 +1293,7 @@ sctp_asconf_queue_mgmt(struct sctp_tcb *stcb, struct sctp_ifa *ifa, TAILQ_INSERT_TAIL(&stcb->asoc.asconf_queue, aa, next); #ifdef SCTP_DEBUG - if (SCTP_BASE_SYSCTL(sctp_debug_on) && SCTP_DEBUG_ASCONF2) { + if (SCTP_BASE_SYSCTL(sctp_debug_on) & SCTP_DEBUG_ASCONF2) { if (type == SCTP_ADD_IP_ADDRESS) { SCTP_PRINTF("asconf_queue_mgmt: inserted asconf ADD_IP_ADDRESS: "); SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, sa); @@ -1482,9 +1419,7 @@ sctp_asconf_queue_sa_delete(struct sctp_tcb *stcb, struct sockaddr *sa) return (-1); } /* make sure the request isn't already in the queue */ - for (aa = TAILQ_FIRST(&stcb->asoc.asconf_queue); aa != NULL; - aa = aa_next) { - aa_next = TAILQ_NEXT(aa, next); + TAILQ_FOREACH_SAFE(aa, &stcb->asoc.asconf_queue, next, aa_next) { /* address match? */ if (sctp_asconf_addr_match(aa, sa) == 0) continue; @@ -1530,26 +1465,37 @@ sctp_asconf_queue_sa_delete(struct sctp_tcb *stcb, struct sockaddr *sa) if (ifa) atomic_add_int(&ifa->refcount, 1); /* correlation_id filled in during send routine later... */ - if (sa->sa_family == AF_INET6) { - /* IPv6 address */ - struct sockaddr_in6 *sin6; - - sin6 = (struct sockaddr_in6 *)sa; - aa->ap.addrp.ph.param_type = SCTP_IPV6_ADDRESS; - aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv6addr_param)); - aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) + sizeof(struct sctp_ipv6addr_param); - memcpy(&aa->ap.addrp.addr, &sin6->sin6_addr, - sizeof(struct in6_addr)); - } else if (sa->sa_family == AF_INET) { - /* IPv4 address */ - struct sockaddr_in *sin = (struct sockaddr_in *)sa; + switch (sa->sa_family) { +#ifdef INET6 + case AF_INET6: + { + /* IPv6 address */ + struct sockaddr_in6 *sin6; - aa->ap.addrp.ph.param_type = SCTP_IPV4_ADDRESS; - aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv4addr_param)); - aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) + sizeof(struct sctp_ipv4addr_param); - memcpy(&aa->ap.addrp.addr, &sin->sin_addr, - sizeof(struct in_addr)); - } else { + sin6 = (struct sockaddr_in6 *)sa; + aa->ap.addrp.ph.param_type = SCTP_IPV6_ADDRESS; + aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv6addr_param)); + aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) + sizeof(struct sctp_ipv6addr_param); + memcpy(&aa->ap.addrp.addr, &sin6->sin6_addr, + sizeof(struct in6_addr)); + break; + } +#endif +#ifdef INET + case AF_INET: + { + /* IPv4 address */ + struct sockaddr_in *sin = (struct sockaddr_in *)sa; + + aa->ap.addrp.ph.param_type = SCTP_IPV4_ADDRESS; + aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv4addr_param)); + aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) + sizeof(struct sctp_ipv4addr_param); + memcpy(&aa->ap.addrp.addr, &sin->sin_addr, + sizeof(struct in_addr)); + break; + } +#endif + default: /* invalid family! */ SCTP_FREE(aa, SCTP_M_ASC_ADDR); if (ifa) @@ -1655,7 +1601,7 @@ sctp_asconf_process_param_ack(struct sctp_tcb *stcb, case SCTP_ADD_IP_ADDRESS: SCTPDBG(SCTP_DEBUG_ASCONF1, "process_param_ack: added IP address\n"); - sctp_asconf_addr_mgmt_ack(stcb, aparam->ifa, param_type, flag); + sctp_asconf_addr_mgmt_ack(stcb, aparam->ifa, flag); break; case SCTP_DEL_IP_ADDRESS: SCTPDBG(SCTP_DEBUG_ASCONF1, @@ -1733,8 +1679,7 @@ sctp_handle_asconf_ack(struct mbuf *m, int offset, */ if (serial_num == (asoc->asconf_seq_out + 1)) { SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf_ack: got unexpected next serial number! Aborting asoc!\n"); - sctp_abort_an_association(stcb->sctp_ep, stcb, - SCTP_CAUSE_ILLEGAL_ASCONF_ACK, NULL, SCTP_SO_NOT_LOCKED); + sctp_abort_an_association(stcb->sctp_ep, stcb, NULL, SCTP_SO_NOT_LOCKED); *abort_no_unlock = 1; return; } @@ -1838,9 +1783,7 @@ sctp_handle_asconf_ack(struct mbuf *m, int offset, */ if (last_error_id == 0) last_error_id--;/* set to "max" value */ - for (aa = TAILQ_FIRST(&stcb->asoc.asconf_queue); aa != NULL; - aa = aa_next) { - aa_next = TAILQ_NEXT(aa, next); + TAILQ_FOREACH_SAFE(aa, &stcb->asoc.asconf_queue, next, aa_next) { if (aa->sent == 1) { /* * implicitly successful or failed if correlation_id @@ -1921,8 +1864,7 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, { int status; - - if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0 && + if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0 || sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DO_ASCONF)) { /* subset bound, no ASCONF allowed case, so ignore */ return; @@ -1933,10 +1875,19 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, */ /* first, make sure it's a good address family */ - if (ifa->address.sa.sa_family != AF_INET6 && - ifa->address.sa.sa_family != AF_INET) { + switch (ifa->address.sa.sa_family) { +#ifdef INET6 + case AF_INET6: + break; +#endif +#ifdef INET + case AF_INET: + break; +#endif + default: return; } +#ifdef INET6 /* make sure we're "allowed" to add this type of addr */ if (ifa->address.sa.sa_family == AF_INET6) { /* invalid if we're not a v6 endpoint */ @@ -1947,6 +1898,7 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, return; } } +#endif /* put this address on the "pending/do not use yet" list */ sctp_add_local_addr_restricted(stcb, ifa); /* @@ -1966,7 +1918,7 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, return; } if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { - if (stcb->asoc.local_scope == 0) { + if (stcb->asoc.scope.local_scope == 0) { return; } /* is it the right link local scope? */ @@ -1974,13 +1926,14 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, return; } } - if (stcb->asoc.site_scope == 0 && + if (stcb->asoc.scope.site_scope == 0 && IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) { return; } break; } #endif +#ifdef INET case AF_INET: { struct sockaddr_in *sin; @@ -1997,12 +1950,13 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, /* we skip unspecifed addresses */ return; } - if (stcb->asoc.ipv4_local_scope == 0 && + if (stcb->asoc.scope.ipv4_local_scope == 0 && IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) { return; } break; } +#endif default: /* else, not AF_INET or AF_INET6, so skip */ return; @@ -2026,8 +1980,7 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp, stcb, stcb->asoc.primary_destination); #else - sctp_send_asconf(stcb, stcb->asoc.primary_destination, - addr_locked); + sctp_send_asconf(stcb, NULL, addr_locked); #endif } } @@ -2036,7 +1989,7 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int -sctp_asconf_iterator_ep(struct sctp_inpcb *inp, void *ptr, uint32_t val) +sctp_asconf_iterator_ep(struct sctp_inpcb *inp, void *ptr, uint32_t val SCTP_UNUSED) { struct sctp_asconf_iterator *asc; struct sctp_ifa *ifa; @@ -2046,42 +1999,45 @@ sctp_asconf_iterator_ep(struct sctp_inpcb *inp, void *ptr, uint32_t val) asc = (struct sctp_asconf_iterator *)ptr; LIST_FOREACH(l, &asc->list_of_work, sctp_nxt_addr) { ifa = l->ifa; - if (ifa->address.sa.sa_family == AF_INET6) { + switch (ifa->address.sa.sa_family) { +#ifdef INET6 + case AF_INET6: /* invalid if we're not a v6 endpoint */ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) { cnt_invalid++; if (asc->cnt == cnt_invalid) return (1); - else - continue; } - } else if (ifa->address.sa.sa_family == AF_INET) { - /* invalid if we are a v6 only endpoint */ - struct in6pcb *inp6; + break; +#endif +#ifdef INET + case AF_INET: + { + /* invalid if we are a v6 only endpoint */ + struct in6pcb *inp6; - inp6 = (struct in6pcb *)&inp->ip_inp.inp; - if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) && - SCTP_IPV6_V6ONLY(inp6)) { - cnt_invalid++; - if (asc->cnt == cnt_invalid) - return (1); - else - continue; + inp6 = (struct in6pcb *)&inp->ip_inp.inp; + if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) && + SCTP_IPV6_V6ONLY(inp6)) { + cnt_invalid++; + if (asc->cnt == cnt_invalid) + return (1); + } + break; } - } else { +#endif + default: /* invalid address family */ cnt_invalid++; if (asc->cnt == cnt_invalid) return (1); - else - continue; } } return (0); } static int -sctp_asconf_iterator_ep_end(struct sctp_inpcb *inp, void *ptr, uint32_t val) +sctp_asconf_iterator_ep_end(struct sctp_inpcb *inp, void *ptr, uint32_t val SCTP_UNUSED) { struct sctp_ifa *ifa; struct sctp_asconf_iterator *asc; @@ -2100,14 +2056,11 @@ sctp_asconf_iterator_ep_end(struct sctp_inpcb *inp, void *ptr, uint32_t val) } } } else if (l->action == SCTP_DEL_IP_ADDRESS) { - laddr = LIST_FIRST(&inp->sctp_addr_list); - while (laddr) { - nladdr = LIST_NEXT(laddr, sctp_nxt_addr); + LIST_FOREACH_SAFE(laddr, &inp->sctp_addr_list, sctp_nxt_addr, nladdr) { /* remove only after all guys are done */ if (laddr->ifa == ifa) { sctp_del_local_addr_ep(inp, ifa); } - laddr = nladdr; } } } @@ -2116,7 +2069,7 @@ sctp_asconf_iterator_ep_end(struct sctp_inpcb *inp, void *ptr, uint32_t val) void sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb, - void *ptr, uint32_t val) + void *ptr, uint32_t val SCTP_UNUSED) { struct sctp_asconf_iterator *asc; struct sctp_ifa *ifa; @@ -2155,7 +2108,7 @@ sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb, continue; } if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { - if (stcb->asoc.local_scope == 0) { + if (stcb->asoc.scope.local_scope == 0) { continue; } /* is it the right link local scope? */ @@ -2166,6 +2119,7 @@ sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb, break; } #endif +#ifdef INET case AF_INET: { /* invalid if we are a v6 only endpoint */ @@ -2183,7 +2137,7 @@ sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb, /* we skip unspecifed addresses */ continue; } - if (stcb->asoc.ipv4_local_scope == 0 && + if (stcb->asoc.scope.ipv4_local_scope == 0 && IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) { continue; } @@ -2197,6 +2151,7 @@ sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb, } break; } +#endif default: /* invalid address family */ cnt_invalid++; @@ -2245,13 +2200,7 @@ sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb, } } else { /* Need to check scopes for this guy */ - if (sctp_is_address_in_scope(ifa, - stcb->asoc.ipv4_addr_legal, - stcb->asoc.ipv6_addr_legal, - stcb->asoc.loopback_scope, - stcb->asoc.ipv4_local_scope, - stcb->asoc.local_scope, - stcb->asoc.site_scope, 0) == 0) { + if (sctp_is_address_in_scope(ifa, &stcb->asoc.scope, 0) == 0) { continue; } } @@ -2277,22 +2226,19 @@ sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb, * If we have queued params in the open state, send out an ASCONF. */ if (num_queued > 0) { - sctp_send_asconf(stcb, stcb->asoc.primary_destination, - SCTP_ADDR_NOT_LOCKED); + sctp_send_asconf(stcb, NULL, SCTP_ADDR_NOT_LOCKED); } } void -sctp_asconf_iterator_end(void *ptr, uint32_t val) +sctp_asconf_iterator_end(void *ptr, uint32_t val SCTP_UNUSED) { struct sctp_asconf_iterator *asc; struct sctp_ifa *ifa; - struct sctp_laddr *l, *l_next; + struct sctp_laddr *l, *nl; asc = (struct sctp_asconf_iterator *)ptr; - l = LIST_FIRST(&asc->list_of_work); - while (l != NULL) { - l_next = LIST_NEXT(l, sctp_nxt_addr); + LIST_FOREACH_SAFE(l, &asc->list_of_work, sctp_nxt_addr, nl) { ifa = l->ifa; if (l->action == SCTP_ADD_IP_ADDRESS) { /* Clear the defer use flag */ @@ -2301,7 +2247,6 @@ sctp_asconf_iterator_end(void *ptr, uint32_t val) sctp_free_ifa(ifa); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_laddr), l); SCTP_DECR_LADDR_COUNT(); - l = l_next; } SCTP_FREE(asc, SCTP_M_ASC_IT); } @@ -2328,7 +2273,7 @@ sctp_set_primary_ip_address_sa(struct sctp_tcb *stcb, struct sockaddr *sa) /* set primary queuing succeeded */ SCTPDBG(SCTP_DEBUG_ASCONF1, "set_primary_ip_address_sa: queued on tcb=%p, ", - stcb); + (void *)stcb); SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa); if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) { #ifdef SCTP_TIMER_BASED_ASCONF @@ -2336,13 +2281,12 @@ sctp_set_primary_ip_address_sa(struct sctp_tcb *stcb, struct sockaddr *sa) stcb->sctp_ep, stcb, stcb->asoc.primary_destination); #else - sctp_send_asconf(stcb, stcb->asoc.primary_destination, - SCTP_ADDR_NOT_LOCKED); + sctp_send_asconf(stcb, NULL, SCTP_ADDR_NOT_LOCKED); #endif } } else { SCTPDBG(SCTP_DEBUG_ASCONF1, "set_primary_ip_address_sa: failed to add to queue on tcb=%p, ", - stcb); + (void *)stcb); SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa); return (-1); } @@ -2365,7 +2309,7 @@ sctp_set_primary_ip_address(struct sctp_ifa *ifa) SCTP_SET_PRIM_ADDR)) { /* set primary queuing succeeded */ SCTPDBG(SCTP_DEBUG_ASCONF1, "set_primary_ip_address: queued on stcb=%p, ", - stcb); + (void *)stcb); SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, &ifa->address.sa); if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) { #ifdef SCTP_TIMER_BASED_ASCONF @@ -2373,8 +2317,7 @@ sctp_set_primary_ip_address(struct sctp_ifa *ifa) stcb->sctp_ep, stcb, stcb->asoc.primary_destination); #else - sctp_send_asconf(stcb, stcb->asoc.primary_destination, - SCTP_ADDR_NOT_LOCKED); + sctp_send_asconf(stcb, NULL, SCTP_ADDR_NOT_LOCKED); #endif } } @@ -2390,17 +2333,13 @@ sctp_is_addr_pending(struct sctp_tcb *stcb, struct sctp_ifa *sctp_ifa) struct sctp_asconf_chunk *acp; struct sctp_asconf_paramhdr *aph; uint8_t aparam_buf[SCTP_PARAM_BUFFER_SIZE]; - struct sctp_ipv6addr_param *p_addr; + struct sctp_paramhdr *ph; int add_cnt, del_cnt; uint16_t last_param_type; add_cnt = del_cnt = 0; last_param_type = 0; - for (chk = TAILQ_FIRST(&stcb->asoc.asconf_send_queue); chk != NULL; - chk = nchk) { - /* get next chk */ - nchk = TAILQ_NEXT(chk, sctp_next); - + TAILQ_FOREACH_SAFE(chk, &stcb->asoc.asconf_send_queue, sctp_next, nchk) { if (chk->data == NULL) { SCTPDBG(SCTP_DEBUG_ASCONF1, "is_addr_pending: No mbuf data?\n"); continue; @@ -2409,12 +2348,12 @@ sctp_is_addr_pending(struct sctp_tcb *stcb, struct sctp_ifa *sctp_ifa) acp = mtod(chk->data, struct sctp_asconf_chunk *); offset += sizeof(struct sctp_asconf_chunk); asconf_limit = ntohs(acp->ch.chunk_length); - p_addr = (struct sctp_ipv6addr_param *)sctp_m_getptr(chk->data, offset, sizeof(struct sctp_paramhdr), aparam_buf); - if (p_addr == NULL) { + ph = (struct sctp_paramhdr *)sctp_m_getptr(chk->data, offset, sizeof(struct sctp_paramhdr), aparam_buf); + if (ph == NULL) { SCTPDBG(SCTP_DEBUG_ASCONF1, "is_addr_pending: couldn't get lookup addr!\n"); continue; } - offset += ntohs(p_addr->ph.param_length); + offset += ntohs(ph->param_length); aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(chk->data, offset, sizeof(struct sctp_asconf_paramhdr), aparam_buf); if (aph == NULL) { @@ -2443,8 +2382,8 @@ sctp_is_addr_pending(struct sctp_tcb *stcb, struct sctp_ifa *sctp_ifa) SCTPDBG(SCTP_DEBUG_ASCONF1, "is_addr_pending: couldn't get entire param\n"); break; } - p_addr = (struct sctp_ipv6addr_param *)(aph + 1); - if (sctp_addr_match(p_addr, &sctp_ifa->address.sa) != 0) { + ph = (struct sctp_paramhdr *)(aph + 1); + if (sctp_addr_match(ph, &sctp_ifa->address.sa) != 0) { switch (param_type) { case SCTP_ADD_IP_ADDRESS: add_cnt++; @@ -2473,9 +2412,9 @@ sctp_is_addr_pending(struct sctp_tcb *stcb, struct sctp_ifa *sctp_ifa) */ if (add_cnt > del_cnt || (add_cnt == del_cnt && last_param_type == SCTP_ADD_IP_ADDRESS)) { - return 1; + return (1); } - return 0; + return (0); } static struct sockaddr * @@ -2494,58 +2433,78 @@ sctp_find_valid_localaddr(struct sctp_tcb *stcb, int addr_locked) return (NULL); } LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) { - if (stcb->asoc.loopback_scope == 0 && + if (stcb->asoc.scope.loopback_scope == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) { /* Skip if loopback_scope not set */ continue; } LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { - if (sctp_ifa->address.sa.sa_family == AF_INET && - stcb->asoc.ipv4_addr_legal) { - struct sockaddr_in *sin; + switch (sctp_ifa->address.sa.sa_family) { +#ifdef INET + case AF_INET: + if (stcb->asoc.scope.ipv4_addr_legal) { + struct sockaddr_in *sin; + + sin = (struct sockaddr_in *)&sctp_ifa->address.sa; + if (sin->sin_addr.s_addr == 0) { + /* skip unspecifed addresses */ + continue; + } + if (stcb->asoc.scope.ipv4_local_scope == 0 && + IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) + continue; - sin = (struct sockaddr_in *)&sctp_ifa->address.sa; - if (sin->sin_addr.s_addr == 0) { - /* skip unspecifed addresses */ - continue; + if (sctp_is_addr_restricted(stcb, sctp_ifa) && + (!sctp_is_addr_pending(stcb, sctp_ifa))) + continue; + /* + * found a valid local v4 address to + * use + */ + if (addr_locked == SCTP_ADDR_NOT_LOCKED) + SCTP_IPI_ADDR_RUNLOCK(); + return (&sctp_ifa->address.sa); } - if (stcb->asoc.ipv4_local_scope == 0 && - IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) - continue; + break; +#endif +#ifdef INET6 + case AF_INET6: + if (stcb->asoc.scope.ipv6_addr_legal) { + struct sockaddr_in6 *sin6; - if (sctp_is_addr_restricted(stcb, sctp_ifa) && - (!sctp_is_addr_pending(stcb, sctp_ifa))) - continue; - /* found a valid local v4 address to use */ - if (addr_locked == SCTP_ADDR_NOT_LOCKED) - SCTP_IPI_ADDR_RUNLOCK(); - return (&sctp_ifa->address.sa); - } else if (sctp_ifa->address.sa.sa_family == AF_INET6 && - stcb->asoc.ipv6_addr_legal) { - struct sockaddr_in6 *sin6; + if (sctp_ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { + continue; + } + sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa; + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + /* + * we skip unspecifed + * addresses + */ + continue; + } + if (stcb->asoc.scope.local_scope == 0 && + IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) + continue; + if (stcb->asoc.scope.site_scope == 0 && + IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) + continue; - if (sctp_ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { - continue; - } - sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa; - if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { - /* we skip unspecifed addresses */ - continue; + if (sctp_is_addr_restricted(stcb, sctp_ifa) && + (!sctp_is_addr_pending(stcb, sctp_ifa))) + continue; + /* + * found a valid local v6 address to + * use + */ + if (addr_locked == SCTP_ADDR_NOT_LOCKED) + SCTP_IPI_ADDR_RUNLOCK(); + return (&sctp_ifa->address.sa); } - if (stcb->asoc.local_scope == 0 && - IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) - continue; - if (stcb->asoc.site_scope == 0 && - IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) - continue; - - if (sctp_is_addr_restricted(stcb, sctp_ifa) && - (!sctp_is_addr_pending(stcb, sctp_ifa))) - continue; - /* found a valid local v6 address to use */ - if (addr_locked == SCTP_ADDR_NOT_LOCKED) - SCTP_IPI_ADDR_RUNLOCK(); - return (&sctp_ifa->address.sa); + break; +#endif + default: + break; } } } @@ -2717,7 +2676,9 @@ sctp_compose_asconf(struct sctp_tcb *stcb, int *retlen, int addr_locked) lookup = (struct sctp_ipv6addr_param *)lookup_ptr; if (found_addr != NULL) { - if (found_addr->sa_family == AF_INET6) { + switch (found_addr->sa_family) { +#ifdef INET6 + case AF_INET6: /* copy IPv6 address */ lookup->ph.param_type = htons(SCTP_IPV6_ADDRESS); @@ -2725,7 +2686,10 @@ sctp_compose_asconf(struct sctp_tcb *stcb, int *retlen, int addr_locked) addr_size = sizeof(struct in6_addr); addr_ptr = (caddr_t)&((struct sockaddr_in6 *) found_addr)->sin6_addr; - } else { + break; +#endif +#ifdef INET + case AF_INET: /* copy IPv4 address */ lookup->ph.param_type = htons(SCTP_IPV4_ADDRESS); @@ -2733,21 +2697,26 @@ sctp_compose_asconf(struct sctp_tcb *stcb, int *retlen, int addr_locked) addr_size = sizeof(struct in_addr); addr_ptr = (caddr_t)&((struct sockaddr_in *) found_addr)->sin_addr; + break; +#endif + default: + p_size = 0; + addr_size = 0; + addr_ptr = NULL; + break; } lookup->ph.param_length = htons(SCTP_SIZE32(p_size)); memcpy(lookup->addr, addr_ptr, addr_size); SCTP_BUF_LEN(m_asconf_chk) += SCTP_SIZE32(p_size); - lookup_used = 1; } else { /* uh oh... don't have any address?? */ SCTPDBG(SCTP_DEBUG_ASCONF1, "compose_asconf: no lookup addr!\n"); - /* for now, we send a IPv4 address of 0.0.0.0 */ + /* XXX for now, we send a IPv4 address of 0.0.0.0 */ lookup->ph.param_type = htons(SCTP_IPV4_ADDRESS); lookup->ph.param_length = htons(SCTP_SIZE32(sizeof(struct sctp_ipv4addr_param))); bzero(lookup->addr, sizeof(struct in_addr)); SCTP_BUF_LEN(m_asconf_chk) += SCTP_SIZE32(sizeof(struct sctp_ipv4addr_param)); - lookup_used = 1; } } /* chain it all together */ @@ -2773,9 +2742,17 @@ sctp_process_initack_addresses(struct sctp_tcb *stcb, struct mbuf *m, struct sctp_paramhdr tmp_param, *ph; uint16_t plen, ptype; struct sctp_ifa *sctp_ifa; - struct sctp_ipv6addr_param addr_store; + +#ifdef INET6 + struct sctp_ipv6addr_param addr6_store; struct sockaddr_in6 sin6; + +#endif +#ifdef INET + struct sctp_ipv4addr_param addr4_store; struct sockaddr_in sin; + +#endif struct sockaddr *sa; uint32_t vrf_id; @@ -2790,51 +2767,67 @@ sctp_process_initack_addresses(struct sctp_tcb *stcb, struct mbuf *m, return; } /* init the addresses */ +#ifdef INET6 bzero(&sin6, sizeof(sin6)); sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(sin6); sin6.sin6_port = stcb->rport; +#endif +#ifdef INET bzero(&sin, sizeof(sin)); - sin.sin_len = sizeof(sin); sin.sin_family = AF_INET; + sin.sin_len = sizeof(sin); sin.sin_port = stcb->rport; +#endif /* go through the addresses in the init-ack */ - ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset, - sizeof(struct sctp_paramhdr), (uint8_t *) & tmp_param); + ph = (struct sctp_paramhdr *) + sctp_m_getptr(m, offset, sizeof(struct sctp_paramhdr), + (uint8_t *) & tmp_param); while (ph != NULL) { ptype = ntohs(ph->param_type); plen = ntohs(ph->param_length); - if (ptype == SCTP_IPV6_ADDRESS) { - struct sctp_ipv6addr_param *a6p; - - /* get the entire IPv6 address param */ - a6p = (struct sctp_ipv6addr_param *) - sctp_m_getptr(m, offset, - sizeof(struct sctp_ipv6addr_param), - (uint8_t *) & addr_store); - if (plen != sizeof(struct sctp_ipv6addr_param) || - a6p == NULL) { - return; + switch (ptype) { +#ifdef INET6 + case SCTP_IPV6_ADDRESS: + { + struct sctp_ipv6addr_param *a6p; + + /* get the entire IPv6 address param */ + a6p = (struct sctp_ipv6addr_param *) + sctp_m_getptr(m, offset, + sizeof(struct sctp_ipv6addr_param), + (uint8_t *) & addr6_store); + if (plen != sizeof(struct sctp_ipv6addr_param) || + a6p == NULL) { + return; + } + memcpy(&sin6.sin6_addr, a6p->addr, + sizeof(struct in6_addr)); + sa = (struct sockaddr *)&sin6; + break; } - memcpy(&sin6.sin6_addr, a6p->addr, - sizeof(struct in6_addr)); - sa = (struct sockaddr *)&sin6; - } else if (ptype == SCTP_IPV4_ADDRESS) { - struct sctp_ipv4addr_param *a4p; - - /* get the entire IPv4 address param */ - a4p = (struct sctp_ipv4addr_param *)sctp_m_getptr(m, offset, - sizeof(struct sctp_ipv4addr_param), - (uint8_t *) & addr_store); - if (plen != sizeof(struct sctp_ipv4addr_param) || - a4p == NULL) { - return; +#endif +#ifdef INET + case SCTP_IPV4_ADDRESS: + { + struct sctp_ipv4addr_param *a4p; + + /* get the entire IPv4 address param */ + a4p = (struct sctp_ipv4addr_param *)sctp_m_getptr(m, offset, + sizeof(struct sctp_ipv4addr_param), + (uint8_t *) & addr4_store); + if (plen != sizeof(struct sctp_ipv4addr_param) || + a4p == NULL) { + return; + } + sin.sin_addr.s_addr = a4p->addr; + sa = (struct sockaddr *)&sin; + break; } - sin.sin_addr.s_addr = a4p->addr; - sa = (struct sockaddr *)&sin; - } else { +#endif + default: goto next_addr; } @@ -2868,8 +2861,7 @@ sctp_process_initack_addresses(struct sctp_tcb *stcb, struct mbuf *m, stcb->sctp_ep, stcb, stcb->asoc.primary_destination); #else - sctp_send_asconf(stcb, stcb->asoc.primary_destination, - SCTP_ADDR_NOT_LOCKED); + sctp_send_asconf(stcb, NULL, SCTP_ADDR_NOT_LOCKED); #endif } } @@ -2899,28 +2891,37 @@ next_addr: * 1 if found, 0 if not */ static uint32_t -sctp_addr_in_initack(struct sctp_tcb *stcb, struct mbuf *m, uint32_t offset, - uint32_t length, struct sockaddr *sa) +sctp_addr_in_initack(struct mbuf *m, uint32_t offset, uint32_t length, struct sockaddr *sa) { struct sctp_paramhdr tmp_param, *ph; uint16_t plen, ptype; - struct sctp_ipv6addr_param addr_store; + +#ifdef INET struct sockaddr_in *sin; struct sctp_ipv4addr_param *a4p; + struct sctp_ipv6addr_param addr4_store; +#endif #ifdef INET6 struct sockaddr_in6 *sin6; struct sctp_ipv6addr_param *a6p; + struct sctp_ipv6addr_param addr6_store; struct sockaddr_in6 sin6_tmp; -#endif /* INET6 */ +#endif - if ( + switch (sa->sa_family) { +#ifdef INET + case AF_INET: + break; +#endif #ifdef INET6 - (sa->sa_family != AF_INET6) && -#endif /* INET6 */ - (sa->sa_family != AF_INET)) + case AF_INET6: + break; +#endif + default: return (0); + } SCTPDBG(SCTP_DEBUG_ASCONF2, "find_initack_addr: starting search for "); SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, sa); @@ -2938,55 +2939,68 @@ sctp_addr_in_initack(struct sctp_tcb *stcb, struct mbuf *m, uint32_t offset, while (ph != NULL) { ptype = ntohs(ph->param_type); plen = ntohs(ph->param_length); + switch (ptype) { #ifdef INET6 - if (ptype == SCTP_IPV6_ADDRESS && sa->sa_family == AF_INET6) { - /* get the entire IPv6 address param */ - a6p = (struct sctp_ipv6addr_param *) - sctp_m_getptr(m, offset, - sizeof(struct sctp_ipv6addr_param), - (uint8_t *) & addr_store); - if (plen != sizeof(struct sctp_ipv6addr_param) || - (ph == NULL) || - (a6p == NULL)) { - return (0); - } - sin6 = (struct sockaddr_in6 *)sa; - if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) { - /* create a copy and clear scope */ - memcpy(&sin6_tmp, sin6, - sizeof(struct sockaddr_in6)); - sin6 = &sin6_tmp; - in6_clearscope(&sin6->sin6_addr); - } - if (memcmp(&sin6->sin6_addr, a6p->addr, - sizeof(struct in6_addr)) == 0) { - /* found it */ - return (1); + case SCTP_IPV6_ADDRESS: + if (sa->sa_family == AF_INET6) { + /* get the entire IPv6 address param */ + if (plen != sizeof(struct sctp_ipv6addr_param)) { + break; + } + /* get the entire IPv6 address param */ + a6p = (struct sctp_ipv6addr_param *) + sctp_m_getptr(m, offset, + sizeof(struct sctp_ipv6addr_param), + (uint8_t *) & addr6_store); + if (a6p == NULL) { + return (0); + } + sin6 = (struct sockaddr_in6 *)sa; + if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) { + /* create a copy and clear scope */ + memcpy(&sin6_tmp, sin6, + sizeof(struct sockaddr_in6)); + sin6 = &sin6_tmp; + in6_clearscope(&sin6->sin6_addr); + } + if (memcmp(&sin6->sin6_addr, a6p->addr, + sizeof(struct in6_addr)) == 0) { + /* found it */ + return (1); + } } - } else + break; #endif /* INET6 */ - - if (ptype == SCTP_IPV4_ADDRESS && - sa->sa_family == AF_INET) { - /* get the entire IPv4 address param */ - a4p = (struct sctp_ipv4addr_param *)sctp_m_getptr(m, - offset, sizeof(struct sctp_ipv4addr_param), - (uint8_t *) & addr_store); - if (plen != sizeof(struct sctp_ipv4addr_param) || - (ph == NULL) || - (a4p == NULL)) { - return (0); - } - sin = (struct sockaddr_in *)sa; - if (sin->sin_addr.s_addr == a4p->addr) { - /* found it */ - return (1); +#ifdef INET + case SCTP_IPV4_ADDRESS: + if (sa->sa_family == AF_INET) { + if (plen != sizeof(struct sctp_ipv4addr_param)) { + break; + } + /* get the entire IPv4 address param */ + a4p = (struct sctp_ipv4addr_param *) + sctp_m_getptr(m, offset, + sizeof(struct sctp_ipv4addr_param), + (uint8_t *) & addr4_store); + if (a4p == NULL) { + return (0); + } + sin = (struct sockaddr_in *)sa; + if (sin->sin_addr.s_addr == a4p->addr) { + /* found it */ + return (1); + } } + break; +#endif + default: + break; } /* get next parameter */ offset += SCTP_SIZE32(plen); - if (offset + sizeof(struct sctp_paramhdr) > length) + if (offset + sizeof(struct sctp_paramhdr) > length) { return (0); + } ph = (struct sctp_paramhdr *) sctp_m_getptr(m, offset, sizeof(struct sctp_paramhdr), (uint8_t *) & tmp_param); @@ -3023,8 +3037,7 @@ sctp_check_address_list_ep(struct sctp_tcb *stcb, struct mbuf *m, int offset, continue; } /* check to see if in the init-ack */ - if (!sctp_addr_in_initack(stcb, m, offset, length, - &laddr->ifa->address.sa)) { + if (!sctp_addr_in_initack(m, offset, length, &laddr->ifa->address.sa)) { /* try to add it */ sctp_addr_mgmt_assoc(stcb->sctp_ep, stcb, laddr->ifa, SCTP_ADD_IP_ADDRESS, SCTP_ADDR_NOT_LOCKED); @@ -3047,6 +3060,15 @@ sctp_check_address_list_all(struct sctp_tcb *stcb, struct mbuf *m, int offset, struct sctp_ifa *sctp_ifa; uint32_t vrf_id; +#ifdef INET + struct sockaddr_in *sin; + +#endif +#ifdef INET6 + struct sockaddr_in6 *sin6; + +#endif + if (stcb) { vrf_id = stcb->asoc.vrf_id; } else { @@ -3070,9 +3092,35 @@ sctp_check_address_list_all(struct sctp_tcb *stcb, struct mbuf *m, int offset, if (sctp_cmpaddr(&sctp_ifa->address.sa, init_addr)) { continue; } + switch (sctp_ifa->address.sa.sa_family) { +#ifdef INET + case AF_INET: + sin = (struct sockaddr_in *)&sctp_ifa->address.sin; + if ((ipv4_scope == 0) && + (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) { + /* private address not in scope */ + continue; + } + break; +#endif +#ifdef INET6 + case AF_INET6: + sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sin6; + if ((local_scope == 0) && + (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))) { + continue; + } + if ((site_scope == 0) && + (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) { + continue; + } + break; +#endif + default: + break; + } /* check to see if in the init-ack */ - if (!sctp_addr_in_initack(stcb, m, offset, length, - &sctp_ifa->address.sa)) { + if (!sctp_addr_in_initack(m, offset, length, &sctp_ifa->address.sa)) { /* try to add it */ sctp_addr_mgmt_assoc(stcb->sctp_ep, stcb, sctp_ifa, SCTP_ADD_IP_ADDRESS, @@ -3124,6 +3172,7 @@ sctp_addr_mgmt_ep_sa(struct sctp_inpcb *inp, struct sockaddr *sa, uint32_t type, uint32_t vrf_id, struct sctp_ifa *sctp_ifap) { struct sctp_ifa *ifa; + struct sctp_laddr *laddr, *nladdr; if (sa->sa_len == 0) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_ASCONF, EINVAL); @@ -3144,8 +3193,6 @@ sctp_addr_mgmt_ep_sa(struct sctp_inpcb *inp, struct sockaddr *sa, if (type == SCTP_ADD_IP_ADDRESS) { sctp_add_local_addr_ep(inp, ifa, type); } else if (type == SCTP_DEL_IP_ADDRESS) { - struct sctp_laddr *laddr; - if (inp->laddr_count < 2) { /* can't delete the last local address */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_ASCONF, EINVAL); @@ -3159,11 +3206,19 @@ sctp_addr_mgmt_ep_sa(struct sctp_inpcb *inp, struct sockaddr *sa, } } } - if (!LIST_EMPTY(&inp->sctp_asoc_list)) { + if (LIST_EMPTY(&inp->sctp_asoc_list)) { /* * There is no need to start the iterator if the inp * has no associations. */ + if (type == SCTP_DEL_IP_ADDRESS) { + LIST_FOREACH_SAFE(laddr, &inp->sctp_addr_list, sctp_nxt_addr, nladdr) { + if (laddr->ifa == ifa) { + sctp_del_local_addr_ep(inp, ifa); + } + } + } + } else { struct sctp_asconf_iterator *asc; struct sctp_laddr *wi; @@ -3211,8 +3266,11 @@ sctp_asconf_send_nat_state_update(struct sctp_tcb *stcb, struct sctp_asconf_addr *aa; struct sctp_ifa *sctp_ifap; struct sctp_asconf_tag_param *vtag; + +#ifdef INET struct sockaddr_in *to; +#endif #ifdef INET6 struct sockaddr_in6 *to6; @@ -3260,24 +3318,28 @@ sctp_asconf_send_nat_state_update(struct sctp_tcb *stcb, memset(aa, 0, sizeof(struct sctp_asconf_addr)); /* fill in asconf address parameter fields */ /* ADD(0.0.0.0) */ - if (net->ro._l_addr.sa.sa_family == AF_INET) { + switch (net->ro._l_addr.sa.sa_family) { +#ifdef INET + case AF_INET: aa->ap.aph.ph.param_type = SCTP_ADD_IP_ADDRESS; aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_addrv4_param); aa->ap.addrp.ph.param_type = SCTP_IPV4_ADDRESS; aa->ap.addrp.ph.param_length = sizeof(struct sctp_ipv4addr_param); /* No need to add an address, we are using 0.0.0.0 */ TAILQ_INSERT_TAIL(&stcb->asoc.asconf_queue, aa, next); - } + break; +#endif #ifdef INET6 - else if (net->ro._l_addr.sa.sa_family == AF_INET6) { + case AF_INET6: aa->ap.aph.ph.param_type = SCTP_ADD_IP_ADDRESS; aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_addr_param); aa->ap.addrp.ph.param_type = SCTP_IPV6_ADDRESS; aa->ap.addrp.ph.param_length = sizeof(struct sctp_ipv6addr_param); /* No need to add an address, we are using 0.0.0.0 */ TAILQ_INSERT_TAIL(&stcb->asoc.asconf_queue, aa, next); + break; +#endif } -#endif /* INET6 */ SCTP_MALLOC(aa, struct sctp_asconf_addr *, sizeof(*aa), SCTP_M_ASC_ADDR); if (aa == NULL) { @@ -3289,24 +3351,28 @@ sctp_asconf_send_nat_state_update(struct sctp_tcb *stcb, memset(aa, 0, sizeof(struct sctp_asconf_addr)); /* fill in asconf address parameter fields */ /* ADD(0.0.0.0) */ - if (net->ro._l_addr.sa.sa_family == AF_INET) { + switch (net->ro._l_addr.sa.sa_family) { +#ifdef INET + case AF_INET: aa->ap.aph.ph.param_type = SCTP_ADD_IP_ADDRESS; aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_addrv4_param); aa->ap.addrp.ph.param_type = SCTP_IPV4_ADDRESS; aa->ap.addrp.ph.param_length = sizeof(struct sctp_ipv4addr_param); /* No need to add an address, we are using 0.0.0.0 */ TAILQ_INSERT_TAIL(&stcb->asoc.asconf_queue, aa, next); - } + break; +#endif #ifdef INET6 - else if (net->ro._l_addr.sa.sa_family == AF_INET6) { + case AF_INET6: aa->ap.aph.ph.param_type = SCTP_DEL_IP_ADDRESS; aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_addr_param); aa->ap.addrp.ph.param_type = SCTP_IPV6_ADDRESS; aa->ap.addrp.ph.param_length = sizeof(struct sctp_ipv6addr_param); /* No need to add an address, we are using 0.0.0.0 */ TAILQ_INSERT_TAIL(&stcb->asoc.asconf_queue, aa, next); + break; +#endif } -#endif /* INET6 */ /* Now we must hunt the addresses and add all global addresses */ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { struct sctp_vrf *vrf = NULL; @@ -3321,18 +3387,20 @@ sctp_asconf_send_nat_state_update(struct sctp_tcb *stcb, SCTP_IPI_ADDR_RLOCK(); LIST_FOREACH(sctp_ifnp, &vrf->ifnlist, next_ifn) { LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) { - if (sctp_ifap->address.sa.sa_family == AF_INET) { + switch (sctp_ifap->address.sa.sa_family) { +#ifdef INET + case AF_INET: to = &sctp_ifap->address.sin; - if (IN4_ISPRIVATE_ADDRESS(&to->sin_addr)) { continue; } if (IN4_ISLOOPBACK_ADDRESS(&to->sin_addr)) { continue; } - } + break; +#endif #ifdef INET6 - else if (sctp_ifap->address.sa.sa_family == AF_INET6) { + case AF_INET6: to6 = &sctp_ifap->address.sin6; if (IN6_IS_ADDR_LOOPBACK(&to6->sin6_addr)) { continue; @@ -3340,8 +3408,11 @@ sctp_asconf_send_nat_state_update(struct sctp_tcb *stcb, if (IN6_IS_ADDR_LINKLOCAL(&to6->sin6_addr)) { continue; } - } + break; #endif + default: + continue; + } sctp_asconf_queue_mgmt(stcb, sctp_ifap, SCTP_ADD_IP_ADDRESS); } } @@ -3367,18 +3438,20 @@ sctp_asconf_send_nat_state_update(struct sctp_tcb *stcb, continue; } sctp_ifap = laddr->ifa; - if (sctp_ifap->address.sa.sa_family == AF_INET) { + switch (sctp_ifap->address.sa.sa_family) { +#ifdef INET + case AF_INET: to = &sctp_ifap->address.sin; - if (IN4_ISPRIVATE_ADDRESS(&to->sin_addr)) { continue; } if (IN4_ISLOOPBACK_ADDRESS(&to->sin_addr)) { continue; } - } + break; +#endif #ifdef INET6 - else if (sctp_ifap->address.sa.sa_family == AF_INET6) { + case AF_INET6: to6 = &sctp_ifap->address.sin6; if (IN6_IS_ADDR_LOOPBACK(&to6->sin6_addr)) { continue; @@ -3386,12 +3459,15 @@ sctp_asconf_send_nat_state_update(struct sctp_tcb *stcb, if (IN6_IS_ADDR_LINKLOCAL(&to6->sin6_addr)) { continue; } - } + break; #endif + default: + continue; + } sctp_asconf_queue_mgmt(stcb, sctp_ifap, SCTP_ADD_IP_ADDRESS); } } skip_rest: /* Now we must send the asconf into the queue */ - sctp_send_asconf(stcb, net, 0); + sctp_send_asconf(stcb, net, SCTP_ADDR_NOT_LOCKED); } diff --git a/freebsd/sys/netinet/sctp_asconf.h b/freebsd/sys/netinet/sctp_asconf.h index bf3d5665..183c99b4 100644 --- a/freebsd/sys/netinet/sctp_asconf.h +++ b/freebsd/sys/netinet/sctp_asconf.h @@ -1,15 +1,17 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -28,8 +30,6 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp_asconf.h,v 1.8 2005/03/06 16:04:16 itojun Exp $ */ - #include __FBSDID("$FreeBSD$"); @@ -46,8 +46,8 @@ extern void sctp_asconf_cleanup(struct sctp_tcb *, struct sctp_nets *); extern struct mbuf *sctp_compose_asconf(struct sctp_tcb *, int *, int); extern void -sctp_handle_asconf(struct mbuf *, unsigned int, struct sctp_asconf_chunk *, - struct sctp_tcb *, int i); +sctp_handle_asconf(struct mbuf *, unsigned int, struct sockaddr *, + struct sctp_asconf_chunk *, struct sctp_tcb *, int); extern void sctp_handle_asconf_ack(struct mbuf *, int, struct sctp_asconf_ack_chunk *, diff --git a/freebsd/sys/netinet/sctp_auth.c b/freebsd/sys/netinet/sctp_auth.c index cf4ebaa9..088ec8c2 100644 --- a/freebsd/sys/netinet/sctp_auth.c +++ b/freebsd/sys/netinet/sctp_auth.c @@ -2,16 +2,18 @@ /*- * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -284,16 +286,16 @@ sctp_print_key(sctp_key_t * key, const char *str) uint32_t i; if (key == NULL) { - printf("%s: [Null key]\n", str); + SCTP_PRINTF("%s: [Null key]\n", str); return; } - printf("%s: len %u, ", str, key->keylen); + SCTP_PRINTF("%s: len %u, ", str, key->keylen); if (key->keylen) { for (i = 0; i < key->keylen; i++) - printf("%02x", key->key[i]); - printf("\n"); + SCTP_PRINTF("%02x", key->key[i]); + SCTP_PRINTF("\n"); } else { - printf("[Null key]\n"); + SCTP_PRINTF("[Null key]\n"); } } @@ -303,16 +305,16 @@ sctp_show_key(sctp_key_t * key, const char *str) uint32_t i; if (key == NULL) { - printf("%s: [Null key]\n", str); + SCTP_PRINTF("%s: [Null key]\n", str); return; } - printf("%s: len %u, ", str, key->keylen); + SCTP_PRINTF("%s: len %u, ", str, key->keylen); if (key->keylen) { for (i = 0; i < key->keylen; i++) - printf("%02x", key->key[i]); - printf("\n"); + SCTP_PRINTF("%02x", key->key[i]); + SCTP_PRINTF("\n"); } else { - printf("[Null key]\n"); + SCTP_PRINTF("[Null key]\n"); } } @@ -469,7 +471,6 @@ sctp_compute_hashkey(sctp_key_t * key1, sctp_key_t * key2, sctp_key_t * shared) } if (sctp_get_keylen(key2)) { bcopy(key2->key, key_ptr, key2->keylen); - key_ptr += key2->keylen; } } else { /* key is shared + key2 + key1 */ @@ -483,7 +484,6 @@ sctp_compute_hashkey(sctp_key_t * key1, sctp_key_t * key2, sctp_key_t * shared) } if (sctp_get_keylen(key1)) { bcopy(key1->key, key_ptr, key1->keylen); - key_ptr += key1->keylen; } } return (new_key); @@ -593,12 +593,16 @@ sctp_auth_key_acquire(struct sctp_tcb *stcb, uint16_t key_id) atomic_add_int(&skey->refcount, 1); SCTPDBG(SCTP_DEBUG_AUTH2, "%s: stcb %p key %u refcount acquire to %d\n", - __FUNCTION__, stcb, key_id, skey->refcount); + __FUNCTION__, (void *)stcb, key_id, skey->refcount); } } void -sctp_auth_key_release(struct sctp_tcb *stcb, uint16_t key_id) +sctp_auth_key_release(struct sctp_tcb *stcb, uint16_t key_id, int so_locked +#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) + SCTP_UNUSED +#endif +) { sctp_sharedkey_t *skey; @@ -610,16 +614,16 @@ sctp_auth_key_release(struct sctp_tcb *stcb, uint16_t key_id) sctp_free_sharedkey(skey); SCTPDBG(SCTP_DEBUG_AUTH2, "%s: stcb %p key %u refcount release to %d\n", - __FUNCTION__, stcb, key_id, skey->refcount); + __FUNCTION__, (void *)stcb, key_id, skey->refcount); /* see if a notification should be generated */ if ((skey->refcount <= 1) && (skey->deactivated)) { /* notify ULP that key is no longer used */ sctp_ulp_notify(SCTP_NOTIFY_AUTH_FREE_KEY, stcb, - key_id, 0, SCTP_SO_LOCKED); + key_id, 0, so_locked); SCTPDBG(SCTP_DEBUG_AUTH2, "%s: stcb %p key %u no longer used, %d\n", - __FUNCTION__, stcb, key_id, skey->refcount); + __FUNCTION__, (void *)stcb, key_id, skey->refcount); } } } @@ -1799,7 +1803,7 @@ sctp_handle_auth(struct sctp_tcb *stcb, struct sctp_auth_chunk *auth, * shared_key_id, (void * *)stcb->asoc.authinfo.recv_keyid); */ - sctp_notify_authentication(stcb, SCTP_AUTH_NEWKEY, + sctp_notify_authentication(stcb, SCTP_AUTH_NEW_KEY, shared_key_id, stcb->asoc.authinfo.recv_keyid, SCTP_SO_NOT_LOCKED); /* compute a new recv assoc key and cache it */ @@ -1862,7 +1866,7 @@ sctp_notify_authentication(struct sctp_tcb *stcb, uint32_t indication, /* If the socket is gone we are out of here */ return; } - if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_AUTHEVNT)) + if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_AUTHEVNT)) /* event not enabled */ return; @@ -1887,7 +1891,7 @@ sctp_notify_authentication(struct sctp_tcb *stcb, uint32_t indication, /* append to socket */ control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination, - 0, 0, 0, 0, 0, 0, m_notify); + 0, 0, stcb->asoc.context, 0, 0, 0, m_notify); if (control == NULL) { /* no memory */ sctp_m_freem(m_notify); @@ -1949,9 +1953,6 @@ sctp_validate_init_auth_params(struct mbuf *m, int offset, int limit) case SCTP_ASCONF_ACK: peer_supports_asconf = 1; break; - case SCTP_AUTHENTICATION: - peer_supports_auth = 1; - break; default: /* one we don't care about */ break; diff --git a/freebsd/sys/netinet/sctp_auth.h b/freebsd/sys/netinet/sctp_auth.h index 36535357..a7ded047 100644 --- a/freebsd/sys/netinet/sctp_auth.h +++ b/freebsd/sys/netinet/sctp_auth.h @@ -1,15 +1,17 @@ /*- * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -31,8 +33,8 @@ #include __FBSDID("$FreeBSD$"); -#ifndef __SCTP_AUTH_HH__ -#define __SCTP_AUTH_HH__ +#ifndef _NETINET_SCTP_AUTH_H_ +#define _NETINET_SCTP_AUTH_H_ /* digest lengths */ @@ -87,7 +89,7 @@ typedef struct sctp_hmaclist { } sctp_hmaclist_t; /* authentication info */ -typedef struct sctp_authinfo { +typedef struct sctp_authinformation { sctp_key_t *random; /* local random key (concatenated) */ uint32_t random_len; /* local random number length for param */ sctp_key_t *peer_random;/* peer's random key (concatenated) */ @@ -96,7 +98,7 @@ typedef struct sctp_authinfo { uint16_t active_keyid; /* active send keyid */ uint16_t assoc_keyid; /* current send keyid (cached) */ uint16_t recv_keyid; /* last recv keyid (cached) */ -} sctp_authinfo_t; +} sctp_authinfo_t; @@ -154,7 +156,9 @@ sctp_copy_skeylist(const struct sctp_keyhead *src, /* ref counts on shared keys, by key id */ extern void sctp_auth_key_acquire(struct sctp_tcb *stcb, uint16_t keyid); -extern void sctp_auth_key_release(struct sctp_tcb *stcb, uint16_t keyid); +extern void +sctp_auth_key_release(struct sctp_tcb *stcb, uint16_t keyid, + int so_locked); /* hmac list handling */ diff --git a/freebsd/sys/netinet/sctp_bsd_addr.c b/freebsd/sys/netinet/sctp_bsd_addr.c index 8612adc9..49549d15 100644 --- a/freebsd/sys/netinet/sctp_bsd_addr.c +++ b/freebsd/sys/netinet/sctp_bsd_addr.c @@ -2,16 +2,18 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -30,8 +32,6 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp_output.c,v 1.46 2005/03/06 16:04:17 itojun Exp $ */ - #include __FBSDID("$FreeBSD$"); @@ -70,25 +70,10 @@ MALLOC_DEFINE(SCTP_M_TIMW, "sctp_timw", "sctp time block"); MALLOC_DEFINE(SCTP_M_MVRF, "sctp_mvrf", "sctp mvrf pcb list"); MALLOC_DEFINE(SCTP_M_ITER, "sctp_iter", "sctp iterator control"); MALLOC_DEFINE(SCTP_M_SOCKOPT, "sctp_socko", "sctp socket option"); +MALLOC_DEFINE(SCTP_M_MCORE, "sctp_mcore", "sctp mcore queue"); /* Global NON-VNET structure that controls the iterator */ struct iterator_control sctp_it_ctl; -static int __sctp_thread_based_iterator_started = 0; - - -static void -sctp_cleanup_itqueue(void) -{ - struct sctp_iterator *it; - - while ((it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead)) != NULL) { - if (it->function_atend != NULL) { - (*it->function_atend) (it->pointer, it->val); - } - TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr); - SCTP_FREE(it, SCTP_M_ITER); - } -} void @@ -98,20 +83,14 @@ sctp_wakeup_iterator(void) } static void -sctp_iterator_thread(void *v) +sctp_iterator_thread(void *v SCTP_UNUSED) { SCTP_IPI_ITERATOR_WQ_LOCK(); - while (1) { + /* In FreeBSD this thread never terminates. */ + for (;;) { msleep(&sctp_it_ctl.iterator_running, &sctp_it_ctl.ipi_iterator_wq_mtx, 0, "waiting_for_work", 0); - if (sctp_it_ctl.iterator_flags & SCTP_ITERATOR_MUST_EXIT) { - SCTP_IPI_ITERATOR_WQ_DESTROY(); - SCTP_ITERATOR_LOCK_DESTROY(); - sctp_cleanup_itqueue(); - __sctp_thread_based_iterator_started = 0; - kthread_exit(); - } sctp_iterator_worker(); } } @@ -119,21 +98,21 @@ sctp_iterator_thread(void *v) void sctp_startup_iterator(void) { - if (__sctp_thread_based_iterator_started) { + static int called = 0; + int ret; + + if (called) { /* You only get one */ return; } /* init the iterator head */ - __sctp_thread_based_iterator_started = 1; + called = 1; sctp_it_ctl.iterator_running = 0; sctp_it_ctl.iterator_flags = 0; sctp_it_ctl.cur_it = NULL; SCTP_ITERATOR_LOCK_INIT(); SCTP_IPI_ITERATOR_WQ_INIT(); TAILQ_INIT(&sctp_it_ctl.iteratorhead); - - int ret; - ret = kproc_create(sctp_iterator_thread, (void *)NULL, &sctp_it_ctl.thread_proc, @@ -175,12 +154,12 @@ sctp_gather_internal_ifa_flags(struct sctp_ifa *ifa) static uint32_t -sctp_is_desired_interface_type(struct ifaddr *ifa) +sctp_is_desired_interface_type(struct ifnet *ifn) { int result; /* check the interface type to see if it's one we care about */ - switch (ifa->ifa_ifp->if_type) { + switch (ifn->if_type) { case IFT_ETHER: case IFT_ISO88023: case IFT_ISO88024: @@ -201,9 +180,11 @@ sctp_is_desired_interface_type(struct ifaddr *ifa) case IFT_SLIP: case IFT_GIF: case IFT_L2VLAN: + case IFT_STF: case IFT_IP: case IFT_IPOVERCDLC: case IFT_IPOVERCLAW: + case IFT_PROPVIRTUAL: /* NetGraph Virtual too */ case IFT_VIRTUALIPADDRESS: result = 1; break; @@ -227,40 +208,59 @@ sctp_init_ifns_for_vrf(int vrfid) */ struct ifnet *ifn; struct ifaddr *ifa; - struct in6_ifaddr *ifa6; struct sctp_ifa *sctp_ifa; uint32_t ifa_flags; +#ifdef INET6 + struct in6_ifaddr *ifa6; + +#endif + IFNET_RLOCK(); TAILQ_FOREACH(ifn, &MODULE_GLOBAL(ifnet), if_list) { - IF_ADDR_LOCK(ifn); + if (sctp_is_desired_interface_type(ifn) == 0) { + /* non desired type */ + continue; + } + IF_ADDR_RLOCK(ifn); TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { if (ifa->ifa_addr == NULL) { continue; } - if ((ifa->ifa_addr->sa_family != AF_INET) && (ifa->ifa_addr->sa_family != AF_INET6)) { - /* non inet/inet6 skip */ - continue; - } - if (ifa->ifa_addr->sa_family == AF_INET6) { - if (IN6_IS_ADDR_UNSPECIFIED(&((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr)) { - /* skip unspecifed addresses */ + switch (ifa->ifa_addr->sa_family) { +#ifdef INET + case AF_INET: + if (((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr == 0) { continue; } - } else { - if (((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr == 0) { + break; +#endif +#ifdef INET6 + case AF_INET6: + if (IN6_IS_ADDR_UNSPECIFIED(&((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr)) { + /* skip unspecifed addresses */ continue; } - } - if (sctp_is_desired_interface_type(ifa) == 0) { - /* non desired type */ + break; +#endif + default: continue; } - if (ifa->ifa_addr->sa_family == AF_INET6) { + switch (ifa->ifa_addr->sa_family) { +#ifdef INET + case AF_INET: + ifa_flags = 0; + break; +#endif +#ifdef INET6 + case AF_INET6: ifa6 = (struct in6_ifaddr *)ifa; ifa_flags = ifa6->ia6_flags; - } else { + break; +#endif + default: ifa_flags = 0; + break; } sctp_ifa = sctp_add_addr_to_vrf(vrfid, (void *)ifn, @@ -275,7 +275,7 @@ sctp_init_ifns_for_vrf(int vrfid) sctp_ifa->localifa_flags &= ~SCTP_ADDR_DEFER_USE; } } - IF_ADDR_UNLOCK(ifn); + IF_ADDR_RUNLOCK(ifn); } IFNET_RUNLOCK(); } @@ -319,37 +319,41 @@ sctp_addr_change(struct ifaddr *ifa, int cmd) if (ifa->ifa_addr == NULL) { return; } - if ((ifa->ifa_addr->sa_family != AF_INET) && (ifa->ifa_addr->sa_family != AF_INET6)) { - /* non inet/inet6 skip */ + if (sctp_is_desired_interface_type(ifa->ifa_ifp) == 0) { + /* non desired type */ return; } - if (ifa->ifa_addr->sa_family == AF_INET6) { + switch (ifa->ifa_addr->sa_family) { +#ifdef INET + case AF_INET: + if (((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr == 0) { + return; + } + break; +#endif +#ifdef INET6 + case AF_INET6: ifa_flags = ((struct in6_ifaddr *)ifa)->ia6_flags; if (IN6_IS_ADDR_UNSPECIFIED(&((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr)) { /* skip unspecifed addresses */ return; } - } else { - if (((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr == 0) { - return; - } - } - - if (sctp_is_desired_interface_type(ifa) == 0) { - /* non desired type */ + break; +#endif + default: + /* non inet/inet6 skip */ return; } if (cmd == RTM_ADD) { (void)sctp_add_addr_to_vrf(SCTP_DEFAULT_VRFID, (void *)ifa->ifa_ifp, - ifa->ifa_ifp->if_index, ifa->ifa_ifp->if_type, - ifa->ifa_ifp->if_xname, + ifa->ifa_ifp->if_index, ifa->ifa_ifp->if_type, ifa->ifa_ifp->if_xname, (void *)ifa, ifa->ifa_addr, ifa_flags, 1); } else { sctp_del_addr_from_vrf(SCTP_DEFAULT_VRFID, ifa->ifa_addr, ifa->ifa_ifp->if_index, - ifa->ifa_ifp->if_xname - ); + ifa->ifa_ifp->if_xname); + /* * We don't bump refcount here so when it completes the * final delete will happen. @@ -418,11 +422,12 @@ sctp_get_mbuf_for_msg(unsigned int space_needed, int want_header, #ifdef SCTP_PACKET_LOGGING void -sctp_packet_log(struct mbuf *m, int length) +sctp_packet_log(struct mbuf *m) { int *lenat, thisone; void *copyto; uint32_t *tick_tock; + int length; int total_len; int grabbed_lock = 0; int value, newval, thisend, thisbegin; @@ -432,6 +437,7 @@ sctp_packet_log(struct mbuf *m, int length) * (value) -ticks of log (ticks) o -ip packet o -as logged - * where this started (thisbegin) x <--end points here */ + length = SCTP_HEADER_LEN(m); total_len = SCTP_SIZE32((length + (4 * sizeof(int)))); /* Log a packet to the buffer. */ if (total_len > SCTP_PACKET_LOG_SIZE) { @@ -477,7 +483,7 @@ again_locked: } /* Sanity check */ if (thisend >= SCTP_PACKET_LOG_SIZE) { - printf("Insanity stops a log thisbegin:%d thisend:%d writers:%d lock:%d end:%d\n", + SCTP_PRINTF("Insanity stops a log thisbegin:%d thisend:%d writers:%d lock:%d end:%d\n", thisbegin, thisend, SCTP_BASE_VAR(packet_log_writers), diff --git a/freebsd/sys/netinet/sctp_bsd_addr.h b/freebsd/sys/netinet/sctp_bsd_addr.h index ae2fa525..24660ca5 100644 --- a/freebsd/sys/netinet/sctp_bsd_addr.h +++ b/freebsd/sys/netinet/sctp_bsd_addr.h @@ -1,15 +1,17 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -31,8 +33,9 @@ #include __FBSDID("$FreeBSD$"); -#ifndef __sctp_bsd_addr_h__ -#define __sctp_bsd_addr_h__ +#ifndef _NETINET_SCTP_BSD_ADDR_H_ +#define _NETINET_SCTP_BSD_ADDR_H_ + #include #if defined(_KERNEL) || defined(__Userspace__) @@ -50,7 +53,7 @@ void sctp_gather_internal_ifa_flags(struct sctp_ifa *ifa); #ifdef SCTP_PACKET_LOGGING -void sctp_packet_log(struct mbuf *m, int length); +void sctp_packet_log(struct mbuf *m); int sctp_copy_out_packet_log(uint8_t * target, int length); #endif diff --git a/freebsd/sys/netinet/sctp_cc_functions.c b/freebsd/sys/netinet/sctp_cc_functions.c index e0f8beae..36818feb 100644 --- a/freebsd/sys/netinet/sctp_cc_functions.c +++ b/freebsd/sys/netinet/sctp_cc_functions.c @@ -2,16 +2,18 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -30,6 +32,9 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ +#include +__FBSDID("$FreeBSD$"); + #include #include #include @@ -43,47 +48,82 @@ #include #include #include -#include -#include -__FBSDID("$FreeBSD$"); +#include + +#define SHIFT_MPTCP_MULTI_N 40 +#define SHIFT_MPTCP_MULTI_Z 16 +#define SHIFT_MPTCP_MULTI 8 -void +static void sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net) { struct sctp_association *assoc; uint32_t cwnd_in_mtu; assoc = &stcb->asoc; - /* - * We take the minimum of the burst limit and the initial congestion - * window. The initial congestion window is at least two times the - * MTU. - */ cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd); - if ((assoc->max_burst > 0) && (cwnd_in_mtu > assoc->max_burst)) - cwnd_in_mtu = assoc->max_burst; - net->cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu; + if (cwnd_in_mtu == 0) { + /* Using 0 means that the value of RFC 4960 is used. */ + net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND)); + } else { + /* + * We take the minimum of the burst limit and the initial + * congestion window. + */ + if ((assoc->max_burst > 0) && (cwnd_in_mtu > assoc->max_burst)) + cwnd_in_mtu = assoc->max_burst; + net->cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu; + } + if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) || + (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2)) { + /* In case of resource pooling initialize appropriately */ + net->cwnd /= assoc->numnets; + if (net->cwnd < (net->mtu - sizeof(struct sctphdr))) { + net->cwnd = net->mtu - sizeof(struct sctphdr); + } + } net->ssthresh = assoc->peers_rwnd; - + SDT_PROBE(sctp, cwnd, net, init, + stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, + 0, net->cwnd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) { sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION); } } -void +static void sctp_cwnd_update_after_fr(struct sctp_tcb *stcb, struct sctp_association *asoc) { struct sctp_nets *net; - + uint32_t t_ssthresh, t_cwnd; + uint64_t t_ucwnd_sbw; + + /* MT FIXME: Don't compute this over and over again */ + t_ssthresh = 0; + t_cwnd = 0; + t_ucwnd_sbw = 0; + if ((asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) || + (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2)) { + TAILQ_FOREACH(net, &asoc->nets, sctp_next) { + t_ssthresh += net->ssthresh; + t_cwnd += net->cwnd; + if (net->lastsa > 0) { + t_ucwnd_sbw += (uint64_t) net->cwnd / (uint64_t) net->lastsa; + } + } + if (t_ucwnd_sbw == 0) { + t_ucwnd_sbw = 1; + } + } /*- - * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) && + * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) && * (net->fast_retran_loss_recovery == 0))) */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if ((asoc->fast_retran_loss_recovery == 0) || - (asoc->sctp_cmt_on_off == 1)) { + (asoc->sctp_cmt_on_off > 0)) { /* out of a RFC2582 Fast recovery window? */ if (net->net_ack > 0) { /* @@ -95,11 +135,54 @@ sctp_cwnd_update_after_fr(struct sctp_tcb *stcb, struct sctp_tmit_chunk *lchk; int old_cwnd = net->cwnd; - net->ssthresh = net->cwnd / 2; - if (net->ssthresh < (net->mtu * 2)) { - net->ssthresh = 2 * net->mtu; + if ((asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) || + (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2)) { + if (asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) { + net->ssthresh = (uint32_t) (((uint64_t) 4 * + (uint64_t) net->mtu * + (uint64_t) net->ssthresh) / + (uint64_t) t_ssthresh); + + } + if (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2) { + uint32_t srtt; + + srtt = net->lastsa; + /* + * lastsa>>3; we don't need + * to devide ... + */ + if (srtt == 0) { + srtt = 1; + } + /* + * Short Version => Equal to + * Contel Version MBe + */ + net->ssthresh = (uint32_t) (((uint64_t) 4 * + (uint64_t) net->mtu * + (uint64_t) net->cwnd) / + ((uint64_t) srtt * + t_ucwnd_sbw)); + /* INCREASE FACTOR */ ; + } + if ((net->cwnd > t_cwnd / 2) && + (net->ssthresh < net->cwnd - t_cwnd / 2)) { + net->ssthresh = net->cwnd - t_cwnd / 2; + } + if (net->ssthresh < net->mtu) { + net->ssthresh = net->mtu; + } + } else { + net->ssthresh = net->cwnd / 2; + if (net->ssthresh < (net->mtu * 2)) { + net->ssthresh = 2 * net->mtu; + } } net->cwnd = net->ssthresh; + SDT_PROBE(sctp, cwnd, net, fr, + stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, + old_cwnd, net->cwnd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR); @@ -129,13 +212,6 @@ sctp_cwnd_update_after_fr(struct sctp_tcb *stcb, net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; } - /* - * Disable Nonce Sum Checking and store the - * resync tsn - */ - asoc->nonce_sum_check = 0; - asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1; - sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); sctp_timer_start(SCTP_TIMER_TYPE_SEND, @@ -151,13 +227,493 @@ sctp_cwnd_update_after_fr(struct sctp_tcb *stcb, } } -void -sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, +/* Defines for instantaneous bw decisions */ +#define SCTP_INST_LOOSING 1 /* Loosing to other flows */ +#define SCTP_INST_NEUTRAL 2 /* Neutral, no indication */ +#define SCTP_INST_GAINING 3 /* Gaining, step down possible */ + + +static int +cc_bw_same(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, + uint64_t rtt_offset, uint64_t vtag, uint8_t inst_ind) +{ + uint64_t oth, probepoint; + + probepoint = (((uint64_t) net->cwnd) << 32); + if (net->rtt > net->cc_mod.rtcc.lbw_rtt + rtt_offset) { + /* + * rtt increased we don't update bw.. so we don't update the + * rtt either. + */ + /* Probe point 5 */ + probepoint |= ((5 << 16) | 1); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + net->flight_size, + probepoint); + if ((net->cc_mod.rtcc.steady_step) && (inst_ind != SCTP_INST_LOOSING)) { + if (net->cc_mod.rtcc.last_step_state == 5) + net->cc_mod.rtcc.step_cnt++; + else + net->cc_mod.rtcc.step_cnt = 1; + net->cc_mod.rtcc.last_step_state = 5; + if ((net->cc_mod.rtcc.step_cnt == net->cc_mod.rtcc.steady_step) || + ((net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step) && + ((net->cc_mod.rtcc.step_cnt % net->cc_mod.rtcc.steady_step) == 0))) { + /* Try a step down */ + oth = net->cc_mod.rtcc.vol_reduce; + oth <<= 16; + oth |= net->cc_mod.rtcc.step_cnt; + oth <<= 16; + oth |= net->cc_mod.rtcc.last_step_state; + SDT_PROBE(sctp, cwnd, net, rttstep, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + oth, + probepoint); + if (net->cwnd > (4 * net->mtu)) { + net->cwnd -= net->mtu; + net->cc_mod.rtcc.vol_reduce++; + } else { + net->cc_mod.rtcc.step_cnt = 0; + } + } + } + return (1); + } + if (net->rtt < net->cc_mod.rtcc.lbw_rtt - rtt_offset) { + /* + * rtt decreased, there could be more room. we update both + * the bw and the rtt here to lock this in as a good step + * down. + */ + /* Probe point 6 */ + probepoint |= ((6 << 16) | 0); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + net->flight_size, + probepoint); + if (net->cc_mod.rtcc.steady_step) { + oth = net->cc_mod.rtcc.vol_reduce; + oth <<= 16; + oth |= net->cc_mod.rtcc.step_cnt; + oth <<= 16; + oth |= net->cc_mod.rtcc.last_step_state; + SDT_PROBE(sctp, cwnd, net, rttstep, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + oth, + probepoint); + if ((net->cc_mod.rtcc.last_step_state == 5) && + (net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step)) { + /* Step down worked */ + net->cc_mod.rtcc.step_cnt = 0; + return (1); + } else { + net->cc_mod.rtcc.last_step_state = 6; + net->cc_mod.rtcc.step_cnt = 0; + } + } + net->cc_mod.rtcc.lbw = nbw; + net->cc_mod.rtcc.lbw_rtt = net->rtt; + net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd; + if (inst_ind == SCTP_INST_GAINING) + return (1); + else if (inst_ind == SCTP_INST_NEUTRAL) + return (1); + else + return (0); + } + /* + * Ok bw and rtt remained the same .. no update to any + */ + /* Probe point 7 */ + probepoint |= ((7 << 16) | net->cc_mod.rtcc.ret_from_eq); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + net->flight_size, + probepoint); + if ((net->cc_mod.rtcc.steady_step) && (inst_ind != SCTP_INST_LOOSING)) { + if (net->cc_mod.rtcc.last_step_state == 5) + net->cc_mod.rtcc.step_cnt++; + else + net->cc_mod.rtcc.step_cnt = 1; + net->cc_mod.rtcc.last_step_state = 5; + if ((net->cc_mod.rtcc.step_cnt == net->cc_mod.rtcc.steady_step) || + ((net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step) && + ((net->cc_mod.rtcc.step_cnt % net->cc_mod.rtcc.steady_step) == 0))) { + /* Try a step down */ + if (net->cwnd > (4 * net->mtu)) { + net->cwnd -= net->mtu; + net->cc_mod.rtcc.vol_reduce++; + return (1); + } else { + net->cc_mod.rtcc.step_cnt = 0; + } + } + } + if (inst_ind == SCTP_INST_GAINING) + return (1); + else if (inst_ind == SCTP_INST_NEUTRAL) + return (1); + else + return ((int)net->cc_mod.rtcc.ret_from_eq); +} + +static int +cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint64_t rtt_offset, + uint64_t vtag, uint8_t inst_ind) +{ + uint64_t oth, probepoint; + + /* Bandwidth decreased. */ + probepoint = (((uint64_t) net->cwnd) << 32); + if (net->rtt > net->cc_mod.rtcc.lbw_rtt + rtt_offset) { + /* rtt increased */ + /* Did we add more */ + if ((net->cwnd > net->cc_mod.rtcc.cwnd_at_bw_set) && + (inst_ind != SCTP_INST_LOOSING)) { + /* We caused it maybe.. back off? */ + /* PROBE POINT 1 */ + probepoint |= ((1 << 16) | 1); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + net->flight_size, + probepoint); + if (net->cc_mod.rtcc.ret_from_eq) { + /* + * Switch over to CA if we are less + * aggressive + */ + net->ssthresh = net->cwnd - 1; + net->partial_bytes_acked = 0; + } + return (1); + } + /* Probe point 2 */ + probepoint |= ((2 << 16) | 0); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + net->flight_size, + probepoint); + /* Someone else - fight for more? */ + if (net->cc_mod.rtcc.steady_step) { + oth = net->cc_mod.rtcc.vol_reduce; + oth <<= 16; + oth |= net->cc_mod.rtcc.step_cnt; + oth <<= 16; + oth |= net->cc_mod.rtcc.last_step_state; + SDT_PROBE(sctp, cwnd, net, rttstep, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + oth, + probepoint); + /* + * Did we voluntarily give up some? if so take one + * back please + */ + if ((net->cc_mod.rtcc.vol_reduce) && + (inst_ind != SCTP_INST_GAINING)) { + net->cwnd += net->mtu; + net->cc_mod.rtcc.vol_reduce--; + } + net->cc_mod.rtcc.last_step_state = 2; + net->cc_mod.rtcc.step_cnt = 0; + } + goto out_decision; + } else if (net->rtt < net->cc_mod.rtcc.lbw_rtt - rtt_offset) { + /* bw & rtt decreased */ + /* Probe point 3 */ + probepoint |= ((3 << 16) | 0); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + net->flight_size, + probepoint); + if (net->cc_mod.rtcc.steady_step) { + oth = net->cc_mod.rtcc.vol_reduce; + oth <<= 16; + oth |= net->cc_mod.rtcc.step_cnt; + oth <<= 16; + oth |= net->cc_mod.rtcc.last_step_state; + SDT_PROBE(sctp, cwnd, net, rttstep, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + oth, + probepoint); + if ((net->cc_mod.rtcc.vol_reduce) && + (inst_ind != SCTP_INST_GAINING)) { + net->cwnd += net->mtu; + net->cc_mod.rtcc.vol_reduce--; + } + net->cc_mod.rtcc.last_step_state = 3; + net->cc_mod.rtcc.step_cnt = 0; + } + goto out_decision; + } + /* The bw decreased but rtt stayed the same */ + /* Probe point 4 */ + probepoint |= ((4 << 16) | 0); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + net->flight_size, + probepoint); + if (net->cc_mod.rtcc.steady_step) { + oth = net->cc_mod.rtcc.vol_reduce; + oth <<= 16; + oth |= net->cc_mod.rtcc.step_cnt; + oth <<= 16; + oth |= net->cc_mod.rtcc.last_step_state; + SDT_PROBE(sctp, cwnd, net, rttstep, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + oth, + probepoint); + if ((net->cc_mod.rtcc.vol_reduce) && + (inst_ind != SCTP_INST_GAINING)) { + net->cwnd += net->mtu; + net->cc_mod.rtcc.vol_reduce--; + } + net->cc_mod.rtcc.last_step_state = 4; + net->cc_mod.rtcc.step_cnt = 0; + } +out_decision: + net->cc_mod.rtcc.lbw = nbw; + net->cc_mod.rtcc.lbw_rtt = net->rtt; + net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd; + if (inst_ind == SCTP_INST_GAINING) { + return (1); + } else { + return (0); + } +} + +static int +cc_bw_increase(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint64_t vtag) +{ + uint64_t oth, probepoint; + + /* + * BW increased, so update and return 0, since all actions in our + * table say to do the normal CC update. Note that we pay no + * attention to the inst_ind since our overall sum is increasing. + */ + /* PROBE POINT 0 */ + probepoint = (((uint64_t) net->cwnd) << 32); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + net->flight_size, + probepoint); + if (net->cc_mod.rtcc.steady_step) { + oth = net->cc_mod.rtcc.vol_reduce; + oth <<= 16; + oth |= net->cc_mod.rtcc.step_cnt; + oth <<= 16; + oth |= net->cc_mod.rtcc.last_step_state; + SDT_PROBE(sctp, cwnd, net, rttstep, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + oth, + probepoint); + net->cc_mod.rtcc.last_step_state = 0; + net->cc_mod.rtcc.step_cnt = 0; + net->cc_mod.rtcc.vol_reduce = 0; + } + net->cc_mod.rtcc.lbw = nbw; + net->cc_mod.rtcc.lbw_rtt = net->rtt; + net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd; + return (0); +} + +/* RTCC Algoritm to limit growth of cwnd, return + * true if you want to NOT allow cwnd growth + */ +static int +cc_bw_limit(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw) +{ + uint64_t bw_offset, rtt_offset; + uint64_t probepoint, rtt, vtag; + uint64_t bytes_for_this_rtt, inst_bw; + uint64_t div, inst_off; + int bw_shift; + uint8_t inst_ind; + int ret; + + /*- + * Here we need to see if we want + * to limit cwnd growth due to increase + * in overall rtt but no increase in bw. + * We use the following table to figure + * out what we should do. When we return + * 0, cc update goes on as planned. If we + * return 1, then no cc update happens and cwnd + * stays where it is at. + * ---------------------------------- + * BW | RTT | Action + * ********************************* + * INC | INC | return 0 + * ---------------------------------- + * INC | SAME | return 0 + * ---------------------------------- + * INC | DECR | return 0 + * ---------------------------------- + * SAME | INC | return 1 + * ---------------------------------- + * SAME | SAME | return 1 + * ---------------------------------- + * SAME | DECR | return 0 + * ---------------------------------- + * DECR | INC | return 0 or 1 based on if we caused. + * ---------------------------------- + * DECR | SAME | return 0 + * ---------------------------------- + * DECR | DECR | return 0 + * ---------------------------------- + * + * We are a bit fuzz on what an increase or + * decrease is. For BW it is the same if + * it did not change within 1/64th. For + * RTT it stayed the same if it did not + * change within 1/32nd + */ + bw_shift = SCTP_BASE_SYSCTL(sctp_rttvar_bw); + rtt = stcb->asoc.my_vtag; + vtag = (rtt << 32) | (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) | (stcb->rport); + probepoint = (((uint64_t) net->cwnd) << 32); + rtt = net->rtt; + if (net->cc_mod.rtcc.rtt_set_this_sack) { + net->cc_mod.rtcc.rtt_set_this_sack = 0; + bytes_for_this_rtt = net->cc_mod.rtcc.bw_bytes - net->cc_mod.rtcc.bw_bytes_at_last_rttc; + net->cc_mod.rtcc.bw_bytes_at_last_rttc = net->cc_mod.rtcc.bw_bytes; + if (net->rtt) { + div = net->rtt / 1000; + if (div) { + inst_bw = bytes_for_this_rtt / div; + inst_off = inst_bw >> bw_shift; + if (inst_bw > nbw) + inst_ind = SCTP_INST_GAINING; + else if ((inst_bw + inst_off) < nbw) + inst_ind = SCTP_INST_LOOSING; + else + inst_ind = SCTP_INST_NEUTRAL; + probepoint |= ((0xb << 16) | inst_ind); + } else { + inst_ind = net->cc_mod.rtcc.last_inst_ind; + inst_bw = bytes_for_this_rtt / (uint64_t) (net->rtt); + /* Can't determine do not change */ + probepoint |= ((0xc << 16) | inst_ind); + } + } else { + inst_ind = net->cc_mod.rtcc.last_inst_ind; + inst_bw = bytes_for_this_rtt; + /* Can't determine do not change */ + probepoint |= ((0xd << 16) | inst_ind); + } + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((nbw << 32) | inst_bw), + ((net->cc_mod.rtcc.lbw_rtt << 32) | rtt), + net->flight_size, + probepoint); + } else { + /* No rtt measurement, use last one */ + inst_ind = net->cc_mod.rtcc.last_inst_ind; + } + bw_offset = net->cc_mod.rtcc.lbw >> bw_shift; + if (nbw > net->cc_mod.rtcc.lbw + bw_offset) { + ret = cc_bw_increase(stcb, net, nbw, vtag); + goto out; + } + rtt_offset = net->cc_mod.rtcc.lbw_rtt >> SCTP_BASE_SYSCTL(sctp_rttvar_rtt); + if (nbw < net->cc_mod.rtcc.lbw - bw_offset) { + ret = cc_bw_decrease(stcb, net, nbw, rtt_offset, vtag, inst_ind); + goto out; + } + /* + * If we reach here then we are in a situation where the bw stayed + * the same. + */ + ret = cc_bw_same(stcb, net, nbw, rtt_offset, vtag, inst_ind); +out: + net->cc_mod.rtcc.last_inst_ind = inst_ind; + return (ret); +} + +static void +sctp_cwnd_update_after_sack_common(struct sctp_tcb *stcb, struct sctp_association *asoc, - int accum_moved, int reneged_all, int will_exit) + int accum_moved, int reneged_all SCTP_UNUSED, int will_exit, int use_rtcc) { struct sctp_nets *net; - + int old_cwnd; + uint32_t t_ssthresh, t_cwnd, incr; + uint64_t t_ucwnd_sbw; + uint64_t t_path_mptcp; + uint64_t mptcp_like_alpha; + uint32_t srtt; + uint64_t max_path; + + /* MT FIXME: Don't compute this over and over again */ + t_ssthresh = 0; + t_cwnd = 0; + t_ucwnd_sbw = 0; + t_path_mptcp = 0; + mptcp_like_alpha = 1; + if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) || + (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2) || + (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_MPTCP)) { + max_path = 0; + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + t_ssthresh += net->ssthresh; + t_cwnd += net->cwnd; + /* lastsa>>3; we don't need to devide ... */ + srtt = net->lastsa; + if (srtt > 0) { + uint64_t tmp; + + t_ucwnd_sbw += (uint64_t) net->cwnd / (uint64_t) srtt; + t_path_mptcp += (((uint64_t) net->cwnd) << SHIFT_MPTCP_MULTI_Z) / + (((uint64_t) net->mtu) * (uint64_t) srtt); + tmp = (((uint64_t) net->cwnd) << SHIFT_MPTCP_MULTI_N) / + ((uint64_t) net->mtu * (uint64_t) (srtt * srtt)); + if (tmp > max_path) { + max_path = tmp; + } + } + } + if (t_path_mptcp > 0) { + mptcp_like_alpha = max_path / (t_path_mptcp * t_path_mptcp); + } else { + mptcp_like_alpha = 1; + } + } + if (t_ssthresh == 0) { + t_ssthresh = 1; + } + if (t_ucwnd_sbw == 0) { + t_ucwnd_sbw = 1; + } /******************************/ /* update cwnd and Early FR */ /******************************/ @@ -168,49 +724,12 @@ sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, * CMT fast recovery code. Need to debug. */ if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { - if (compare_with_wrap(asoc->last_acked_seq, - net->fast_recovery_tsn, MAX_TSN) || - (asoc->last_acked_seq == net->fast_recovery_tsn) || - compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) || - (net->pseudo_cumack == net->fast_recovery_tsn)) { + if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) || + SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) { net->will_exit_fast_recovery = 1; } } #endif - if (SCTP_BASE_SYSCTL(sctp_early_fr)) { - /* - * So, first of all do we need to have a Early FR - * timer running? - */ - if ((!TAILQ_EMPTY(&asoc->sent_queue) && - (net->ref_count > 1) && - (net->flight_size < net->cwnd)) || - (reneged_all)) { - /* - * yes, so in this case stop it if its - * running, and then restart it. Reneging - * all is a special case where we want to - * run the Early FR timer and then force the - * last few unacked to be sent, causing us - * to illicit a sack with gaps to force out - * the others. - */ - if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { - SCTP_STAT_INCR(sctps_earlyfrstpidsck2); - sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, - SCTP_FROM_SCTP_INDATA + SCTP_LOC_20); - } - SCTP_STAT_INCR(sctps_earlyfrstrid); - sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); - } else { - /* No, stop it if its running */ - if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { - SCTP_STAT_INCR(sctps_earlyfrstpidsck3); - sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, - SCTP_FROM_SCTP_INDATA + SCTP_LOC_21); - } - } - } /* if nothing was acked on this destination skip it */ if (net->net_ack == 0) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { @@ -218,56 +737,14 @@ sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, } continue; } - if (net->net_ack2 > 0) { - /* - * Karn's rule applies to clearing error count, this - * is optional. - */ - net->error_count = 0; - if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == - SCTP_ADDR_NOT_REACHABLE) { - /* addr came good */ - net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; - net->dest_state |= SCTP_ADDR_REACHABLE; - sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, - SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED); - /* now was it the primary? if so restore */ - if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) { - (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net); - } - } - /* - * JRS 5/14/07 - If CMT PF is on and the destination - * is in PF state, set the destination to active - * state and set the cwnd to one or two MTU's based - * on whether PF1 or PF2 is being used. - * - * Should we stop any running T3 timer here? - */ - if ((asoc->sctp_cmt_on_off == 1) && - (asoc->sctp_cmt_pf > 0) && - ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) { - net->dest_state &= ~SCTP_ADDR_PF; - net->cwnd = net->mtu * asoc->sctp_cmt_pf; - SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", - net, net->cwnd); - /* - * Since the cwnd value is explicitly set, - * skip the code that updates the cwnd - * value. - */ - goto skip_cwnd_update; - } - } #ifdef JANA_CMT_FAST_RECOVERY /* * CMT fast recovery code */ /* - * if (sctp_cmt_on_off == 1 && - * net->fast_retran_loss_recovery && - * net->will_exit_fast_recovery == 0) { @@@ Do something } - * else if (sctp_cmt_on_off == 0 && + * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery + * && net->will_exit_fast_recovery == 0) { @@@ Do something + * } else if (sctp_cmt_on_off == 0 && * asoc->fast_retran_loss_recovery && will_exit == 0) { */ #endif @@ -279,31 +756,143 @@ sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, * If we are in loss recovery we skip any cwnd * update */ - goto skip_cwnd_update; + return; + } + /* + * Did any measurements go on for this network? + */ + if (use_rtcc && (net->cc_mod.rtcc.tls_needs_set > 0)) { + uint64_t nbw; + + /* + * At this point our bw_bytes has been updated by + * incoming sack information. + * + * But our bw may not yet be set. + * + */ + if ((net->cc_mod.rtcc.new_tot_time / 1000) > 0) { + nbw = net->cc_mod.rtcc.bw_bytes / (net->cc_mod.rtcc.new_tot_time / 1000); + } else { + nbw = net->cc_mod.rtcc.bw_bytes; + } + if (net->cc_mod.rtcc.lbw) { + if (cc_bw_limit(stcb, net, nbw)) { + /* Hold here, no update */ + continue; + } + } else { + uint64_t vtag, probepoint; + + probepoint = (((uint64_t) net->cwnd) << 32); + probepoint |= ((0xa << 16) | 0); + vtag = (net->rtt << 32) | + (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) | + (stcb->rport); + + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + nbw, + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + net->flight_size, + probepoint); + net->cc_mod.rtcc.lbw = nbw; + net->cc_mod.rtcc.lbw_rtt = net->rtt; + if (net->cc_mod.rtcc.rtt_set_this_sack) { + net->cc_mod.rtcc.rtt_set_this_sack = 0; + net->cc_mod.rtcc.bw_bytes_at_last_rttc = net->cc_mod.rtcc.bw_bytes; + } + } } /* * CMT: CUC algorithm. Update cwnd if pseudo-cumack has * moved. */ if (accum_moved || - ((asoc->sctp_cmt_on_off == 1) && net->new_pseudo_cumack)) { + ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) { /* If the cumulative ack moved we can proceed */ if (net->cwnd <= net->ssthresh) { /* We are in slow start */ if (net->flight_size + net->net_ack >= net->cwnd) { - if (net->net_ack > (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable))) { - net->cwnd += (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable)); - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, net->mtu, - SCTP_CWND_LOG_FROM_SS); + uint32_t limit; + + old_cwnd = net->cwnd; + switch (asoc->sctp_cmt_on_off) { + case SCTP_CMT_RPV1: + limit = (uint32_t) (((uint64_t) net->mtu * + (uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable) * + (uint64_t) net->ssthresh) / + (uint64_t) t_ssthresh); + incr = (uint32_t) (((uint64_t) net->net_ack * + (uint64_t) net->ssthresh) / + (uint64_t) t_ssthresh); + if (incr > limit) { + incr = limit; + } + if (incr == 0) { + incr = 1; } - } else { - net->cwnd += net->net_ack; - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, net->net_ack, - SCTP_CWND_LOG_FROM_SS); + break; + case SCTP_CMT_RPV2: + /* + * lastsa>>3; we don't need + * to divide ... + */ + srtt = net->lastsa; + if (srtt == 0) { + srtt = 1; } + limit = (uint32_t) (((uint64_t) net->mtu * + (uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable) * + (uint64_t) net->cwnd) / + ((uint64_t) srtt * t_ucwnd_sbw)); + /* INCREASE FACTOR */ + incr = (uint32_t) (((uint64_t) net->net_ack * + (uint64_t) net->cwnd) / + ((uint64_t) srtt * t_ucwnd_sbw)); + /* INCREASE FACTOR */ + if (incr > limit) { + incr = limit; + } + if (incr == 0) { + incr = 1; + } + break; + case SCTP_CMT_MPTCP: + limit = (uint32_t) (((uint64_t) net->mtu * + mptcp_like_alpha * + (uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable)) >> + SHIFT_MPTCP_MULTI); + incr = (uint32_t) (((uint64_t) net->net_ack * + mptcp_like_alpha) >> + SHIFT_MPTCP_MULTI); + if (incr > limit) { + incr = limit; + } + if (incr > net->net_ack) { + incr = net->net_ack; + } + if (incr > net->mtu) { + incr = net->mtu; + } + break; + default: + incr = net->net_ack; + if (incr > net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable)) { + incr = net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable); + } + break; } + net->cwnd += incr; + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, incr, + SCTP_CWND_LOG_FROM_SS); + } + SDT_PROBE(sctp, cwnd, net, ack, + stcb->asoc.my_vtag, + ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), + net, + old_cwnd, net->cwnd); } else { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, net->net_ack, @@ -320,7 +909,52 @@ sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, if ((net->flight_size + net->net_ack >= net->cwnd) && (net->partial_bytes_acked >= net->cwnd)) { net->partial_bytes_acked -= net->cwnd; - net->cwnd += net->mtu; + old_cwnd = net->cwnd; + switch (asoc->sctp_cmt_on_off) { + case SCTP_CMT_RPV1: + incr = (uint32_t) (((uint64_t) net->mtu * + (uint64_t) net->ssthresh) / + (uint64_t) t_ssthresh); + if (incr == 0) { + incr = 1; + } + break; + case SCTP_CMT_RPV2: + /* + * lastsa>>3; we don't need + * to divide ... + */ + srtt = net->lastsa; + if (srtt == 0) { + srtt = 1; + } + incr = (uint32_t) ((uint64_t) net->mtu * + (uint64_t) net->cwnd / + ((uint64_t) srtt * + t_ucwnd_sbw)); + /* INCREASE FACTOR */ + if (incr == 0) { + incr = 1; + } + break; + case SCTP_CMT_MPTCP: + incr = (uint32_t) ((mptcp_like_alpha * + (uint64_t) net->cwnd) >> + SHIFT_MPTCP_MULTI); + if (incr > net->mtu) { + incr = net->mtu; + } + break; + default: + incr = net->mtu; + break; + } + net->cwnd += incr; + SDT_PROBE(sctp, cwnd, net, ack, + stcb->asoc.my_vtag, + ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), + net, + old_cwnd, net->cwnd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_CA); @@ -338,69 +972,172 @@ sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, SCTP_CWND_LOG_NO_CUMACK); } } -skip_cwnd_update: - /* - * NOW, according to Karn's rule do we need to restore the - * RTO timer back? Check our net_ack2. If not set then we - * have a ambiguity.. i.e. all data ack'd was sent to more - * than one place. - */ - if (net->net_ack2) { - /* restore any doubled timers */ - net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1; - if (net->RTO < stcb->asoc.minrto) { - net->RTO = stcb->asoc.minrto; - } - if (net->RTO > stcb->asoc.maxrto) { - net->RTO = stcb->asoc.maxrto; - } - } } } -void +static void +sctp_cwnd_update_exit_pf_common(struct sctp_tcb *stcb, struct sctp_nets *net) +{ + int old_cwnd; + + old_cwnd = net->cwnd; + net->cwnd = net->mtu; + SDT_PROBE(sctp, cwnd, net, ack, + stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, + old_cwnd, net->cwnd); + SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", + (void *)net, net->cwnd); +} + + +static void sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb, struct sctp_nets *net) { int old_cwnd = net->cwnd; + uint32_t t_ssthresh, t_cwnd; + uint64_t t_ucwnd_sbw; + + /* MT FIXME: Don't compute this over and over again */ + t_ssthresh = 0; + t_cwnd = 0; + if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) || + (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2)) { + struct sctp_nets *lnet; + uint32_t srtt; + + t_ucwnd_sbw = 0; + TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) { + t_ssthresh += lnet->ssthresh; + t_cwnd += lnet->cwnd; + srtt = lnet->lastsa; + /* lastsa>>3; we don't need to divide ... */ + if (srtt > 0) { + t_ucwnd_sbw += (uint64_t) lnet->cwnd / (uint64_t) srtt; + } + } + if (t_ssthresh < 1) { + t_ssthresh = 1; + } + if (t_ucwnd_sbw < 1) { + t_ucwnd_sbw = 1; + } + if (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) { + net->ssthresh = (uint32_t) (((uint64_t) 4 * + (uint64_t) net->mtu * + (uint64_t) net->ssthresh) / + (uint64_t) t_ssthresh); + } else { + uint64_t cc_delta; - net->ssthresh = max(net->cwnd / 2, 4 * net->mtu); + srtt = net->lastsa; + /* lastsa>>3; we don't need to divide ... */ + if (srtt == 0) { + srtt = 1; + } + cc_delta = t_ucwnd_sbw * (uint64_t) srtt / 2; + if (cc_delta < t_cwnd) { + net->ssthresh = (uint32_t) ((uint64_t) t_cwnd - cc_delta); + } else { + net->ssthresh = net->mtu; + } + } + if ((net->cwnd > t_cwnd / 2) && + (net->ssthresh < net->cwnd - t_cwnd / 2)) { + net->ssthresh = net->cwnd - t_cwnd / 2; + } + if (net->ssthresh < net->mtu) { + net->ssthresh = net->mtu; + } + } else { + net->ssthresh = max(net->cwnd / 2, 4 * net->mtu); + } net->cwnd = net->mtu; net->partial_bytes_acked = 0; - + SDT_PROBE(sctp, cwnd, net, to, + stcb->asoc.my_vtag, + ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), + net, + old_cwnd, net->cwnd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX); } } -void -sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net) +static void +sctp_cwnd_update_after_ecn_echo_common(struct sctp_tcb *stcb, struct sctp_nets *net, + int in_window, int num_pkt_lost, int use_rtcc) { int old_cwnd = net->cwnd; - SCTP_STAT_INCR(sctps_ecnereducedcwnd); - net->ssthresh = net->cwnd / 2; - if (net->ssthresh < net->mtu) { - net->ssthresh = net->mtu; - /* here back off the timer as well, to slow us down */ - net->RTO <<= 1; - } - net->cwnd = net->ssthresh; - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); + if ((use_rtcc) && (net->lan_type == SCTP_LAN_LOCAL) && (net->cc_mod.rtcc.use_dccc_ecn)) { + /* Data center Congestion Control */ + if (in_window == 0) { + /* + * Go to CA with the cwnd at the point we sent the + * TSN that was marked with a CE. + */ + if (net->ecn_prev_cwnd < net->cwnd) { + /* Restore to prev cwnd */ + net->cwnd = net->ecn_prev_cwnd - (net->mtu * num_pkt_lost); + } else { + /* Just cut in 1/2 */ + net->cwnd /= 2; + } + /* Drop to CA */ + net->ssthresh = net->cwnd - (num_pkt_lost * net->mtu); + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); + } + } else { + /* + * Further tuning down required over the drastic + * orginal cut + */ + net->ssthresh -= (net->mtu * num_pkt_lost); + net->cwnd -= (net->mtu * num_pkt_lost); + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); + } + } + SCTP_STAT_INCR(sctps_ecnereducedcwnd); + } else { + if (in_window == 0) { + SCTP_STAT_INCR(sctps_ecnereducedcwnd); + net->ssthresh = net->cwnd / 2; + if (net->ssthresh < net->mtu) { + net->ssthresh = net->mtu; + /* + * here back off the timer as well, to slow + * us down + */ + net->RTO <<= 1; + } + net->cwnd = net->ssthresh; + SDT_PROBE(sctp, cwnd, net, ecn, + stcb->asoc.my_vtag, + ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), + net, + old_cwnd, net->cwnd); + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); + } + } } + } -void +static void sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb, struct sctp_nets *net, struct sctp_pktdrop_chunk *cp, uint32_t * bottle_bw, uint32_t * on_queue) { uint32_t bw_avail; - int rtt, incr; + int rtt; + unsigned int incr; int old_cwnd = net->cwnd; - /* need real RTT for this calc */ - rtt = ((net->lastsa >> 2) + net->lastsv) >> 1; + /* need real RTT in msd for this calc */ + rtt = net->rtt / 1000; /* get bottle neck bw */ *bottle_bw = ntohl(cp->bottle_bw); /* and whats on queue */ @@ -480,8 +1217,11 @@ sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb, * Take 1/4 of the space left or max burst up .. whichever * is less. */ - incr = min((bw_avail - *on_queue) >> 2, - stcb->asoc.max_burst * net->mtu); + incr = (bw_avail - *on_queue) >> 2; + if ((stcb->asoc.max_burst > 0) && + (stcb->asoc.max_burst * net->mtu < incr)) { + incr = stcb->asoc.max_burst * net->mtu; + } net->cwnd += incr; } if (net->cwnd > bw_avail) { @@ -494,6 +1234,11 @@ sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb, } if (net->cwnd - old_cwnd != 0) { /* log only changes */ + SDT_PROBE(sctp, cwnd, net, pd, + stcb->asoc.my_vtag, + ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), + net, + old_cwnd, net->cwnd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); @@ -501,7 +1246,7 @@ sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb, } } -void +static void sctp_cwnd_update_after_output(struct sctp_tcb *stcb, struct sctp_nets *net, int burst_limit) { @@ -509,34 +1254,258 @@ sctp_cwnd_update_after_output(struct sctp_tcb *stcb, if (net->ssthresh < net->cwnd) net->ssthresh = net->cwnd; - net->cwnd = (net->flight_size + (burst_limit * net->mtu)); + if (burst_limit) { + net->cwnd = (net->flight_size + (burst_limit * net->mtu)); + SDT_PROBE(sctp, cwnd, net, bl, + stcb->asoc.my_vtag, + ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), + net, + old_cwnd, net->cwnd); + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST); + } + } +} - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST); +static void +sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, + struct sctp_association *asoc, + int accum_moved, int reneged_all, int will_exit) +{ + /* Passing a zero argument in last disables the rtcc algoritm */ + sctp_cwnd_update_after_sack_common(stcb, asoc, accum_moved, reneged_all, will_exit, 0); +} + +static void +sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net, + int in_window, int num_pkt_lost) +{ + /* Passing a zero argument in last disables the rtcc algoritm */ + sctp_cwnd_update_after_ecn_echo_common(stcb, net, in_window, num_pkt_lost, 0); +} + +/* Here starts the RTCCVAR type CC invented by RRS which + * is a slight mod to RFC2581. We reuse a common routine or + * two since these algoritms are so close and need to + * remain the same. + */ +static void +sctp_cwnd_update_rtcc_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net, + int in_window, int num_pkt_lost) +{ + sctp_cwnd_update_after_ecn_echo_common(stcb, net, in_window, num_pkt_lost, 1); +} + + +static +void +sctp_cwnd_update_rtcc_tsn_acknowledged(struct sctp_nets *net, + struct sctp_tmit_chunk *tp1) +{ + net->cc_mod.rtcc.bw_bytes += tp1->send_size; +} + +static void +sctp_cwnd_prepare_rtcc_net_for_sack(struct sctp_tcb *stcb SCTP_UNUSED, + struct sctp_nets *net) +{ + if (net->cc_mod.rtcc.tls_needs_set > 0) { + /* We had a bw measurment going on */ + struct timeval ltls; + + SCTP_GETPTIME_TIMEVAL(<ls); + timevalsub(<ls, &net->cc_mod.rtcc.tls); + net->cc_mod.rtcc.new_tot_time = (ltls.tv_sec * 1000000) + ltls.tv_usec; } } -void -sctp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp, - struct sctp_tcb *stcb, struct sctp_nets *net) +static void +sctp_cwnd_new_rtcc_transmission_begins(struct sctp_tcb *stcb, + struct sctp_nets *net) { - int old_cwnd = net->cwnd; + uint64_t vtag, probepoint; + + if (net->cc_mod.rtcc.lbw) { + /* Clear the old bw.. we went to 0 in-flight */ + vtag = (net->rtt << 32) | (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) | + (stcb->rport); + probepoint = (((uint64_t) net->cwnd) << 32); + /* Probe point 8 */ + probepoint |= ((8 << 16) | 0); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | 0), + ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), + net->flight_size, + probepoint); + net->cc_mod.rtcc.lbw_rtt = 0; + net->cc_mod.rtcc.cwnd_at_bw_set = 0; + net->cc_mod.rtcc.lbw = 0; + net->cc_mod.rtcc.bw_bytes_at_last_rttc = 0; + net->cc_mod.rtcc.vol_reduce = 0; + net->cc_mod.rtcc.bw_tot_time = 0; + net->cc_mod.rtcc.bw_bytes = 0; + net->cc_mod.rtcc.tls_needs_set = 0; + if (net->cc_mod.rtcc.steady_step) { + net->cc_mod.rtcc.vol_reduce = 0; + net->cc_mod.rtcc.step_cnt = 0; + net->cc_mod.rtcc.last_step_state = 0; + } + if (net->cc_mod.rtcc.ret_from_eq) { + /* less aggressive one - reset cwnd too */ + uint32_t cwnd_in_mtu, cwnd; - sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED); - /* - * make a small adjustment to cwnd and force to CA. - */ - if (net->cwnd > net->mtu) - /* drop down one MTU after sending */ - net->cwnd -= net->mtu; - if (net->cwnd < net->ssthresh) - /* still in SS move to CA */ - net->ssthresh = net->cwnd - 1; - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR); + cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd); + if (cwnd_in_mtu == 0) { + /* + * Using 0 means that the value of RFC 4960 + * is used. + */ + cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND)); + } else { + /* + * We take the minimum of the burst limit + * and the initial congestion window. + */ + if ((stcb->asoc.max_burst > 0) && (cwnd_in_mtu > stcb->asoc.max_burst)) + cwnd_in_mtu = stcb->asoc.max_burst; + cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu; + } + if (net->cwnd > cwnd) { + /* + * Only set if we are not a timeout (i.e. + * down to 1 mtu) + */ + net->cwnd = cwnd; + } + } + } +} + +static void +sctp_set_rtcc_initial_cc_param(struct sctp_tcb *stcb, + struct sctp_nets *net) +{ + uint64_t vtag, probepoint; + + sctp_set_initial_cc_param(stcb, net); + stcb->asoc.use_precise_time = 1; + probepoint = (((uint64_t) net->cwnd) << 32); + probepoint |= ((9 << 16) | 0); + vtag = (net->rtt << 32) | + (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) | + (stcb->rport); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + 0, + 0, + 0, + probepoint); + net->cc_mod.rtcc.lbw_rtt = 0; + net->cc_mod.rtcc.cwnd_at_bw_set = 0; + net->cc_mod.rtcc.vol_reduce = 0; + net->cc_mod.rtcc.lbw = 0; + net->cc_mod.rtcc.vol_reduce = 0; + net->cc_mod.rtcc.bw_bytes_at_last_rttc = 0; + net->cc_mod.rtcc.bw_tot_time = 0; + net->cc_mod.rtcc.bw_bytes = 0; + net->cc_mod.rtcc.tls_needs_set = 0; + net->cc_mod.rtcc.ret_from_eq = SCTP_BASE_SYSCTL(sctp_rttvar_eqret); + net->cc_mod.rtcc.steady_step = SCTP_BASE_SYSCTL(sctp_steady_step); + net->cc_mod.rtcc.use_dccc_ecn = SCTP_BASE_SYSCTL(sctp_use_dccc_ecn); + net->cc_mod.rtcc.step_cnt = 0; + net->cc_mod.rtcc.last_step_state = 0; + + +} + +static int +sctp_cwnd_rtcc_socket_option(struct sctp_tcb *stcb, int setorget, + struct sctp_cc_option *cc_opt) +{ + struct sctp_nets *net; + + if (setorget == 1) { + /* a set */ + if (cc_opt->option == SCTP_CC_OPT_RTCC_SETMODE) { + if ((cc_opt->aid_value.assoc_value != 0) && + (cc_opt->aid_value.assoc_value != 1)) { + return (EINVAL); + } + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + net->cc_mod.rtcc.ret_from_eq = cc_opt->aid_value.assoc_value; + } + } else if (cc_opt->option == SCTP_CC_OPT_USE_DCCC_ECN) { + if ((cc_opt->aid_value.assoc_value != 0) && + (cc_opt->aid_value.assoc_value != 1)) { + return (EINVAL); + } + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + net->cc_mod.rtcc.use_dccc_ecn = cc_opt->aid_value.assoc_value; + } + } else if (cc_opt->option == SCTP_CC_OPT_STEADY_STEP) { + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + net->cc_mod.rtcc.steady_step = cc_opt->aid_value.assoc_value; + } + } else { + return (EINVAL); + } + } else { + /* a get */ + if (cc_opt->option == SCTP_CC_OPT_RTCC_SETMODE) { + net = TAILQ_FIRST(&stcb->asoc.nets); + if (net == NULL) { + return (EFAULT); + } + cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.ret_from_eq; + } else if (cc_opt->option == SCTP_CC_OPT_USE_DCCC_ECN) { + net = TAILQ_FIRST(&stcb->asoc.nets); + if (net == NULL) { + return (EFAULT); + } + cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.use_dccc_ecn; + } else if (cc_opt->option == SCTP_CC_OPT_STEADY_STEP) { + net = TAILQ_FIRST(&stcb->asoc.nets); + if (net == NULL) { + return (EFAULT); + } + cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.steady_step; + } else { + return (EINVAL); + } + } + return (0); +} + +static void +sctp_cwnd_update_rtcc_packet_transmitted(struct sctp_tcb *stcb SCTP_UNUSED, + struct sctp_nets *net) +{ + if (net->cc_mod.rtcc.tls_needs_set == 0) { + SCTP_GETPTIME_TIMEVAL(&net->cc_mod.rtcc.tls); + net->cc_mod.rtcc.tls_needs_set = 2; } } +static void +sctp_cwnd_update_rtcc_after_sack(struct sctp_tcb *stcb, + struct sctp_association *asoc, + int accum_moved, int reneged_all, int will_exit) +{ + /* Passing a one argument at the last enables the rtcc algoritm */ + sctp_cwnd_update_after_sack_common(stcb, asoc, accum_moved, reneged_all, will_exit, 1); +} + +static void +sctp_rtt_rtcc_calculated(struct sctp_tcb *stcb SCTP_UNUSED, + struct sctp_nets *net, + struct timeval *now SCTP_UNUSED) +{ + net->cc_mod.rtcc.rtt_set_this_sack = 1; +} + +/* Here starts Sally Floyds HS-TCP */ + struct sctp_hs_raise_drop { int32_t cwnd; int32_t increase; @@ -628,9 +1597,7 @@ sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net) cur_val = net->cwnd >> 10; indx = SCTP_HS_TABLE_SIZE - 1; -#ifdef SCTP_DEBUG - printf("HS CC CAlled.\n"); -#endif + if (cur_val < sctp_cwnd_adjust[0].cwnd) { /* normal mode */ if (net->net_ack > net->mtu) { @@ -700,19 +1667,19 @@ sctp_hs_cwnd_decrease(struct sctp_tcb *stcb, struct sctp_nets *net) } } -void +static void sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb, struct sctp_association *asoc) { struct sctp_nets *net; /* - * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) && + * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) && * (net->fast_retran_loss_recovery == 0))) */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if ((asoc->fast_retran_loss_recovery == 0) || - (asoc->sctp_cmt_on_off == 1)) { + (asoc->sctp_cmt_on_off > 0)) { /* out of a RFC2582 Fast recovery window? */ if (net->net_ack > 0) { /* @@ -750,13 +1717,6 @@ sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb, net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; } - /* - * Disable Nonce Sum Checking and store the - * resync tsn - */ - asoc->nonce_sum_check = 0; - asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1; - sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); sctp_timer_start(SCTP_TIMER_TYPE_SEND, @@ -772,10 +1732,10 @@ sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb, } } -void +static void sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb, struct sctp_association *asoc, - int accum_moved, int reneged_all, int will_exit) + int accum_moved, int reneged_all SCTP_UNUSED, int will_exit) { struct sctp_nets *net; @@ -789,49 +1749,12 @@ sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb, * CMT fast recovery code. Need to debug. */ if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { - if (compare_with_wrap(asoc->last_acked_seq, - net->fast_recovery_tsn, MAX_TSN) || - (asoc->last_acked_seq == net->fast_recovery_tsn) || - compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) || - (net->pseudo_cumack == net->fast_recovery_tsn)) { + if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) || + SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) { net->will_exit_fast_recovery = 1; } } #endif - if (SCTP_BASE_SYSCTL(sctp_early_fr)) { - /* - * So, first of all do we need to have a Early FR - * timer running? - */ - if ((!TAILQ_EMPTY(&asoc->sent_queue) && - (net->ref_count > 1) && - (net->flight_size < net->cwnd)) || - (reneged_all)) { - /* - * yes, so in this case stop it if its - * running, and then restart it. Reneging - * all is a special case where we want to - * run the Early FR timer and then force the - * last few unacked to be sent, causing us - * to illicit a sack with gaps to force out - * the others. - */ - if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { - SCTP_STAT_INCR(sctps_earlyfrstpidsck2); - sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, - SCTP_FROM_SCTP_INDATA + SCTP_LOC_20); - } - SCTP_STAT_INCR(sctps_earlyfrstrid); - sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); - } else { - /* No, stop it if its running */ - if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { - SCTP_STAT_INCR(sctps_earlyfrstpidsck3); - sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, - SCTP_FROM_SCTP_INDATA + SCTP_LOC_21); - } - } - } /* if nothing was acked on this destination skip it */ if (net->net_ack == 0) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { @@ -839,56 +1762,14 @@ sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb, } continue; } - if (net->net_ack2 > 0) { - /* - * Karn's rule applies to clearing error count, this - * is optional. - */ - net->error_count = 0; - if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == - SCTP_ADDR_NOT_REACHABLE) { - /* addr came good */ - net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; - net->dest_state |= SCTP_ADDR_REACHABLE; - sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, - SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED); - /* now was it the primary? if so restore */ - if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) { - (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net); - } - } - /* - * JRS 5/14/07 - If CMT PF is on and the destination - * is in PF state, set the destination to active - * state and set the cwnd to one or two MTU's based - * on whether PF1 or PF2 is being used. - * - * Should we stop any running T3 timer here? - */ - if ((asoc->sctp_cmt_on_off == 1) && - (asoc->sctp_cmt_pf > 0) && - ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) { - net->dest_state &= ~SCTP_ADDR_PF; - net->cwnd = net->mtu * asoc->sctp_cmt_pf; - SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", - net, net->cwnd); - /* - * Since the cwnd value is explicitly set, - * skip the code that updates the cwnd - * value. - */ - goto skip_cwnd_update; - } - } #ifdef JANA_CMT_FAST_RECOVERY /* * CMT fast recovery code */ /* - * if (sctp_cmt_on_off == 1 && - * net->fast_retran_loss_recovery && - * net->will_exit_fast_recovery == 0) { @@@ Do something } - * else if (sctp_cmt_on_off == 0 && + * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery + * && net->will_exit_fast_recovery == 0) { @@@ Do something + * } else if (sctp_cmt_on_off == 0 && * asoc->fast_retran_loss_recovery && will_exit == 0) { */ #endif @@ -900,14 +1781,14 @@ sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb, * If we are in loss recovery we skip any cwnd * update */ - goto skip_cwnd_update; + return; } /* * CMT: CUC algorithm. Update cwnd if pseudo-cumack has * moved. */ if (accum_moved || - ((asoc->sctp_cmt_on_off == 1) && net->new_pseudo_cumack)) { + ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) { /* If the cumulative ack moved we can proceed */ if (net->cwnd <= net->ssthresh) { /* We are in slow start */ @@ -945,23 +1826,6 @@ sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb, SCTP_CWND_LOG_NO_CUMACK); } } -skip_cwnd_update: - /* - * NOW, according to Karn's rule do we need to restore the - * RTO timer back? Check our net_ack2. If not set then we - * have a ambiguity.. i.e. all data ack'd was sent to more - * than one place. - */ - if (net->net_ack2) { - /* restore any doubled timers */ - net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1; - if (net->RTO < stcb->asoc.minrto) { - net->RTO = stcb->asoc.minrto; - } - if (net->RTO > stcb->asoc.maxrto) { - net->RTO = stcb->asoc.maxrto; - } - } } } @@ -981,19 +1845,19 @@ static int use_bandwidth_switch = 1; static inline int between(uint32_t seq1, uint32_t seq2, uint32_t seq3) { - return seq3 - seq2 >= seq1 - seq2; + return (seq3 - seq2 >= seq1 - seq2); } static inline uint32_t htcp_cong_time(struct htcp *ca) { - return sctp_get_tick_count() - ca->last_cong; + return (sctp_get_tick_count() - ca->last_cong); } static inline uint32_t htcp_ccount(struct htcp *ca) { - return htcp_cong_time(ca) / ca->minRTT; + return (htcp_cong_time(ca) / ca->minRTT); } static inline void @@ -1010,39 +1874,39 @@ htcp_reset(struct htcp *ca) static uint32_t htcp_cwnd_undo(struct sctp_tcb *stcb, struct sctp_nets *net) { - net->htcp_ca.last_cong = net->htcp_ca.undo_last_cong; - net->htcp_ca.maxRTT = net->htcp_ca.undo_maxRTT; - net->htcp_ca.old_maxB = net->htcp_ca.undo_old_maxB; - return max(net->cwnd, ((net->ssthresh / net->mtu << 7) / net->htcp_ca.beta) * net->mtu); + net->cc_mod.htcp_ca.last_cong = net->cc_mod.htcp_ca.undo_last_cong; + net->cc_mod.htcp_ca.maxRTT = net->cc_mod.htcp_ca.undo_maxRTT; + net->cc_mod.htcp_ca.old_maxB = net->cc_mod.htcp_ca.undo_old_maxB; + return (max(net->cwnd, ((net->ssthresh / net->mtu << 7) / net->cc_mod.htcp_ca.beta) * net->mtu)); } #endif static inline void -measure_rtt(struct sctp_tcb *stcb, struct sctp_nets *net) +measure_rtt(struct sctp_nets *net) { - uint32_t srtt = net->lastsa >> 3; + uint32_t srtt = net->lastsa >> SCTP_RTT_SHIFT; /* keep track of minimum RTT seen so far, minRTT is zero at first */ - if (net->htcp_ca.minRTT > srtt || !net->htcp_ca.minRTT) - net->htcp_ca.minRTT = srtt; + if (net->cc_mod.htcp_ca.minRTT > srtt || !net->cc_mod.htcp_ca.minRTT) + net->cc_mod.htcp_ca.minRTT = srtt; /* max RTT */ - if (net->fast_retran_ip == 0 && net->ssthresh < 0xFFFF && htcp_ccount(&net->htcp_ca) > 3) { - if (net->htcp_ca.maxRTT < net->htcp_ca.minRTT) - net->htcp_ca.maxRTT = net->htcp_ca.minRTT; - if (net->htcp_ca.maxRTT < srtt && srtt <= net->htcp_ca.maxRTT + MSEC_TO_TICKS(20)) - net->htcp_ca.maxRTT = srtt; + if (net->fast_retran_ip == 0 && net->ssthresh < 0xFFFF && htcp_ccount(&net->cc_mod.htcp_ca) > 3) { + if (net->cc_mod.htcp_ca.maxRTT < net->cc_mod.htcp_ca.minRTT) + net->cc_mod.htcp_ca.maxRTT = net->cc_mod.htcp_ca.minRTT; + if (net->cc_mod.htcp_ca.maxRTT < srtt && srtt <= net->cc_mod.htcp_ca.maxRTT + MSEC_TO_TICKS(20)) + net->cc_mod.htcp_ca.maxRTT = srtt; } } static void -measure_achieved_throughput(struct sctp_tcb *stcb, struct sctp_nets *net) +measure_achieved_throughput(struct sctp_nets *net) { uint32_t now = sctp_get_tick_count(); if (net->fast_retran_ip == 0) - net->htcp_ca.bytes_acked = net->net_ack; + net->cc_mod.htcp_ca.bytes_acked = net->net_ack; if (!use_bandwidth_switch) return; @@ -1050,29 +1914,28 @@ measure_achieved_throughput(struct sctp_tcb *stcb, struct sctp_nets *net) /* achieved throughput calculations */ /* JRS - not 100% sure of this statement */ if (net->fast_retran_ip == 1) { - net->htcp_ca.bytecount = 0; - net->htcp_ca.lasttime = now; + net->cc_mod.htcp_ca.bytecount = 0; + net->cc_mod.htcp_ca.lasttime = now; return; } - net->htcp_ca.bytecount += net->net_ack; - - if (net->htcp_ca.bytecount >= net->cwnd - ((net->htcp_ca.alpha >> 7 ? : 1) * net->mtu) - && now - net->htcp_ca.lasttime >= net->htcp_ca.minRTT - && net->htcp_ca.minRTT > 0) { - uint32_t cur_Bi = net->htcp_ca.bytecount / net->mtu * hz / (now - net->htcp_ca.lasttime); + net->cc_mod.htcp_ca.bytecount += net->net_ack; + if ((net->cc_mod.htcp_ca.bytecount >= net->cwnd - (((net->cc_mod.htcp_ca.alpha >> 7) ? (net->cc_mod.htcp_ca.alpha >> 7) : 1) * net->mtu)) && + (now - net->cc_mod.htcp_ca.lasttime >= net->cc_mod.htcp_ca.minRTT) && + (net->cc_mod.htcp_ca.minRTT > 0)) { + uint32_t cur_Bi = net->cc_mod.htcp_ca.bytecount / net->mtu * hz / (now - net->cc_mod.htcp_ca.lasttime); - if (htcp_ccount(&net->htcp_ca) <= 3) { + if (htcp_ccount(&net->cc_mod.htcp_ca) <= 3) { /* just after backoff */ - net->htcp_ca.minB = net->htcp_ca.maxB = net->htcp_ca.Bi = cur_Bi; + net->cc_mod.htcp_ca.minB = net->cc_mod.htcp_ca.maxB = net->cc_mod.htcp_ca.Bi = cur_Bi; } else { - net->htcp_ca.Bi = (3 * net->htcp_ca.Bi + cur_Bi) / 4; - if (net->htcp_ca.Bi > net->htcp_ca.maxB) - net->htcp_ca.maxB = net->htcp_ca.Bi; - if (net->htcp_ca.minB > net->htcp_ca.maxB) - net->htcp_ca.minB = net->htcp_ca.maxB; + net->cc_mod.htcp_ca.Bi = (3 * net->cc_mod.htcp_ca.Bi + cur_Bi) / 4; + if (net->cc_mod.htcp_ca.Bi > net->cc_mod.htcp_ca.maxB) + net->cc_mod.htcp_ca.maxB = net->cc_mod.htcp_ca.Bi; + if (net->cc_mod.htcp_ca.minB > net->cc_mod.htcp_ca.maxB) + net->cc_mod.htcp_ca.minB = net->cc_mod.htcp_ca.maxB; } - net->htcp_ca.bytecount = 0; - net->htcp_ca.lasttime = now; + net->cc_mod.htcp_ca.bytecount = 0; + net->cc_mod.htcp_ca.lasttime = now; } } @@ -1137,27 +2000,27 @@ htcp_alpha_update(struct htcp *ca) * were getting just too full now). */ static void -htcp_param_update(struct sctp_tcb *stcb, struct sctp_nets *net) +htcp_param_update(struct sctp_nets *net) { - uint32_t minRTT = net->htcp_ca.minRTT; - uint32_t maxRTT = net->htcp_ca.maxRTT; + uint32_t minRTT = net->cc_mod.htcp_ca.minRTT; + uint32_t maxRTT = net->cc_mod.htcp_ca.maxRTT; - htcp_beta_update(&net->htcp_ca, minRTT, maxRTT); - htcp_alpha_update(&net->htcp_ca); + htcp_beta_update(&net->cc_mod.htcp_ca, minRTT, maxRTT); + htcp_alpha_update(&net->cc_mod.htcp_ca); /* * add slowly fading memory for maxRTT to accommodate routing * changes etc */ if (minRTT > 0 && maxRTT > minRTT) - net->htcp_ca.maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100; + net->cc_mod.htcp_ca.maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100; } static uint32_t -htcp_recalc_ssthresh(struct sctp_tcb *stcb, struct sctp_nets *net) +htcp_recalc_ssthresh(struct sctp_nets *net) { - htcp_param_update(stcb, net); - return max(((net->cwnd / net->mtu * net->htcp_ca.beta) >> 7) * net->mtu, 2U * net->mtu); + htcp_param_update(net); + return (max(((net->cwnd / net->mtu * net->cc_mod.htcp_ca.beta) >> 7) * net->mtu, 2U * net->mtu)); } static void @@ -1191,21 +2054,21 @@ htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net) } } } else { - measure_rtt(stcb, net); + measure_rtt(net); /* * In dangerous area, increase slowly. In theory this is * net->cwnd += alpha / net->cwnd */ /* What is snd_cwnd_cnt?? */ - if (((net->partial_bytes_acked / net->mtu * net->htcp_ca.alpha) >> 7) * net->mtu >= net->cwnd) { + if (((net->partial_bytes_acked / net->mtu * net->cc_mod.htcp_ca.alpha) >> 7) * net->mtu >= net->cwnd) { /*- * Does SCTP have a cwnd clamp? * if (net->snd_cwnd < net->snd_cwnd_clamp) - Nope (RRS). */ net->cwnd += net->mtu; net->partial_bytes_acked = 0; - htcp_alpha_update(&net->htcp_ca); + htcp_alpha_update(&net->cc_mod.htcp_ca); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_CA); @@ -1218,7 +2081,7 @@ htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net) } } - net->htcp_ca.bytes_acked = net->mtu; + net->cc_mod.htcp_ca.bytes_acked = net->mtu; } } @@ -1227,22 +2090,22 @@ htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net) static uint32_t htcp_min_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net) { - return net->ssthresh; + return (net->ssthresh); } #endif static void -htcp_init(struct sctp_tcb *stcb, struct sctp_nets *net) +htcp_init(struct sctp_nets *net) { - memset(&net->htcp_ca, 0, sizeof(struct htcp)); - net->htcp_ca.alpha = ALPHA_BASE; - net->htcp_ca.beta = BETA_MIN; - net->htcp_ca.bytes_acked = net->mtu; - net->htcp_ca.last_cong = sctp_get_tick_count(); + memset(&net->cc_mod.htcp_ca, 0, sizeof(struct htcp)); + net->cc_mod.htcp_ca.alpha = ALPHA_BASE; + net->cc_mod.htcp_ca.beta = BETA_MIN; + net->cc_mod.htcp_ca.bytes_acked = net->mtu; + net->cc_mod.htcp_ca.last_cong = sctp_get_tick_count(); } -void +static void sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net) { /* @@ -1251,17 +2114,17 @@ sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net) */ net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND)); net->ssthresh = stcb->asoc.peers_rwnd; - htcp_init(stcb, net); + htcp_init(net); if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) { sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION); } } -void +static void sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb, struct sctp_association *asoc, - int accum_moved, int reneged_all, int will_exit) + int accum_moved, int reneged_all SCTP_UNUSED, int will_exit) { struct sctp_nets *net; @@ -1275,49 +2138,12 @@ sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb, * CMT fast recovery code. Need to debug. */ if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { - if (compare_with_wrap(asoc->last_acked_seq, - net->fast_recovery_tsn, MAX_TSN) || - (asoc->last_acked_seq == net->fast_recovery_tsn) || - compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) || - (net->pseudo_cumack == net->fast_recovery_tsn)) { + if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) || + SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) { net->will_exit_fast_recovery = 1; } } #endif - if (SCTP_BASE_SYSCTL(sctp_early_fr)) { - /* - * So, first of all do we need to have a Early FR - * timer running? - */ - if ((!TAILQ_EMPTY(&asoc->sent_queue) && - (net->ref_count > 1) && - (net->flight_size < net->cwnd)) || - (reneged_all)) { - /* - * yes, so in this case stop it if its - * running, and then restart it. Reneging - * all is a special case where we want to - * run the Early FR timer and then force the - * last few unacked to be sent, causing us - * to illicit a sack with gaps to force out - * the others. - */ - if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { - SCTP_STAT_INCR(sctps_earlyfrstpidsck2); - sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, - SCTP_FROM_SCTP_INDATA + SCTP_LOC_20); - } - SCTP_STAT_INCR(sctps_earlyfrstrid); - sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); - } else { - /* No, stop it if its running */ - if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { - SCTP_STAT_INCR(sctps_earlyfrstpidsck3); - sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, - SCTP_FROM_SCTP_INDATA + SCTP_LOC_21); - } - } - } /* if nothing was acked on this destination skip it */ if (net->net_ack == 0) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { @@ -1325,56 +2151,14 @@ sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb, } continue; } - if (net->net_ack2 > 0) { - /* - * Karn's rule applies to clearing error count, this - * is optional. - */ - net->error_count = 0; - if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == - SCTP_ADDR_NOT_REACHABLE) { - /* addr came good */ - net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; - net->dest_state |= SCTP_ADDR_REACHABLE; - sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, - SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED); - /* now was it the primary? if so restore */ - if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) { - (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net); - } - } - /* - * JRS 5/14/07 - If CMT PF is on and the destination - * is in PF state, set the destination to active - * state and set the cwnd to one or two MTU's based - * on whether PF1 or PF2 is being used. - * - * Should we stop any running T3 timer here? - */ - if ((asoc->sctp_cmt_on_off == 1) && - (asoc->sctp_cmt_pf > 0) && - ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) { - net->dest_state &= ~SCTP_ADDR_PF; - net->cwnd = net->mtu * asoc->sctp_cmt_pf; - SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", - net, net->cwnd); - /* - * Since the cwnd value is explicitly set, - * skip the code that updates the cwnd - * value. - */ - goto skip_cwnd_update; - } - } #ifdef JANA_CMT_FAST_RECOVERY /* * CMT fast recovery code */ /* - * if (sctp_cmt_on_off == 1 && - * net->fast_retran_loss_recovery && - * net->will_exit_fast_recovery == 0) { @@@ Do something } - * else if (sctp_cmt_on_off == 0 && + * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery + * && net->will_exit_fast_recovery == 0) { @@@ Do something + * } else if (sctp_cmt_on_off == 0 && * asoc->fast_retran_loss_recovery && will_exit == 0) { */ #endif @@ -1386,55 +2170,38 @@ sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb, * If we are in loss recovery we skip any cwnd * update */ - goto skip_cwnd_update; + return; } /* * CMT: CUC algorithm. Update cwnd if pseudo-cumack has * moved. */ if (accum_moved || - ((asoc->sctp_cmt_on_off == 1) && net->new_pseudo_cumack)) { + ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) { htcp_cong_avoid(stcb, net); - measure_achieved_throughput(stcb, net); + measure_achieved_throughput(net); } else { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_NO_CUMACK); } } -skip_cwnd_update: - /* - * NOW, according to Karn's rule do we need to restore the - * RTO timer back? Check our net_ack2. If not set then we - * have a ambiguity.. i.e. all data ack'd was sent to more - * than one place. - */ - if (net->net_ack2) { - /* restore any doubled timers */ - net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1; - if (net->RTO < stcb->asoc.minrto) { - net->RTO = stcb->asoc.minrto; - } - if (net->RTO > stcb->asoc.maxrto) { - net->RTO = stcb->asoc.maxrto; - } - } } } -void +static void sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb, struct sctp_association *asoc) { struct sctp_nets *net; /* - * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) && + * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) && * (net->fast_retran_loss_recovery == 0))) */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if ((asoc->fast_retran_loss_recovery == 0) || - (asoc->sctp_cmt_on_off == 1)) { + (asoc->sctp_cmt_on_off > 0)) { /* out of a RFC2582 Fast recovery window? */ if (net->net_ack > 0) { /* @@ -1447,8 +2214,8 @@ sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb, int old_cwnd = net->cwnd; /* JRS - reset as if state were changed */ - htcp_reset(&net->htcp_ca); - net->ssthresh = htcp_recalc_ssthresh(stcb, net); + htcp_reset(&net->cc_mod.htcp_ca); + net->ssthresh = htcp_recalc_ssthresh(net); net->cwnd = net->ssthresh; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), @@ -1479,13 +2246,6 @@ sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb, net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; } - /* - * Disable Nonce Sum Checking and store the - * resync tsn - */ - asoc->nonce_sum_check = 0; - asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1; - sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); sctp_timer_start(SCTP_TIMER_TYPE_SEND, @@ -1501,15 +2261,15 @@ sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb, } } -void +static void sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb, struct sctp_nets *net) { int old_cwnd = net->cwnd; /* JRS - reset as if the state were being changed to timeout */ - htcp_reset(&net->htcp_ca); - net->ssthresh = htcp_recalc_ssthresh(stcb, net); + htcp_reset(&net->cc_mod.htcp_ca); + net->ssthresh = htcp_recalc_ssthresh(net); net->cwnd = net->mtu; net->partial_bytes_acked = 0; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { @@ -1517,49 +2277,76 @@ sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb, } } -void -sctp_htcp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp, - struct sctp_tcb *stcb, struct sctp_nets *net) -{ - int old_cwnd; - - old_cwnd = net->cwnd; - - sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED); - net->htcp_ca.last_cong = sctp_get_tick_count(); - /* - * make a small adjustment to cwnd and force to CA. - */ - if (net->cwnd > net->mtu) - /* drop down one MTU after sending */ - net->cwnd -= net->mtu; - if (net->cwnd < net->ssthresh) - /* still in SS move to CA */ - net->ssthresh = net->cwnd - 1; - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR); - } -} - -void +static void sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, - struct sctp_nets *net) + struct sctp_nets *net, int in_window, int num_pkt_lost SCTP_UNUSED) { int old_cwnd; old_cwnd = net->cwnd; /* JRS - reset hctp as if state changed */ - htcp_reset(&net->htcp_ca); - SCTP_STAT_INCR(sctps_ecnereducedcwnd); - net->ssthresh = htcp_recalc_ssthresh(stcb, net); - if (net->ssthresh < net->mtu) { - net->ssthresh = net->mtu; - /* here back off the timer as well, to slow us down */ - net->RTO <<= 1; - } - net->cwnd = net->ssthresh; - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); + if (in_window == 0) { + htcp_reset(&net->cc_mod.htcp_ca); + SCTP_STAT_INCR(sctps_ecnereducedcwnd); + net->ssthresh = htcp_recalc_ssthresh(net); + if (net->ssthresh < net->mtu) { + net->ssthresh = net->mtu; + /* here back off the timer as well, to slow us down */ + net->RTO <<= 1; + } + net->cwnd = net->ssthresh; + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); + } } } + +struct sctp_cc_functions sctp_cc_functions[] = { + { + .sctp_set_initial_cc_param = sctp_set_initial_cc_param, + .sctp_cwnd_update_after_sack = sctp_cwnd_update_after_sack, + .sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common, + .sctp_cwnd_update_after_fr = sctp_cwnd_update_after_fr, + .sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout, + .sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_after_ecn_echo, + .sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped, + .sctp_cwnd_update_after_output = sctp_cwnd_update_after_output, + }, + { + .sctp_set_initial_cc_param = sctp_set_initial_cc_param, + .sctp_cwnd_update_after_sack = sctp_hs_cwnd_update_after_sack, + .sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common, + .sctp_cwnd_update_after_fr = sctp_hs_cwnd_update_after_fr, + .sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout, + .sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_after_ecn_echo, + .sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped, + .sctp_cwnd_update_after_output = sctp_cwnd_update_after_output, + }, + { + .sctp_set_initial_cc_param = sctp_htcp_set_initial_cc_param, + .sctp_cwnd_update_after_sack = sctp_htcp_cwnd_update_after_sack, + .sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common, + .sctp_cwnd_update_after_fr = sctp_htcp_cwnd_update_after_fr, + .sctp_cwnd_update_after_timeout = sctp_htcp_cwnd_update_after_timeout, + .sctp_cwnd_update_after_ecn_echo = sctp_htcp_cwnd_update_after_ecn_echo, + .sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped, + .sctp_cwnd_update_after_output = sctp_cwnd_update_after_output, + }, + { + .sctp_set_initial_cc_param = sctp_set_rtcc_initial_cc_param, + .sctp_cwnd_update_after_sack = sctp_cwnd_update_rtcc_after_sack, + .sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common, + .sctp_cwnd_update_after_fr = sctp_cwnd_update_after_fr, + .sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout, + .sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_rtcc_after_ecn_echo, + .sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped, + .sctp_cwnd_update_after_output = sctp_cwnd_update_after_output, + .sctp_cwnd_update_packet_transmitted = sctp_cwnd_update_rtcc_packet_transmitted, + .sctp_cwnd_update_tsn_acknowledged = sctp_cwnd_update_rtcc_tsn_acknowledged, + .sctp_cwnd_new_transmission_begins = sctp_cwnd_new_rtcc_transmission_begins, + .sctp_cwnd_prepare_net_for_sack = sctp_cwnd_prepare_rtcc_net_for_sack, + .sctp_cwnd_socket_option = sctp_cwnd_rtcc_socket_option, + .sctp_rtt_calculated = sctp_rtt_rtcc_calculated + } +}; diff --git a/freebsd/sys/netinet/sctp_constants.h b/freebsd/sys/netinet/sctp_constants.h index 212a2f9e..18057009 100644 --- a/freebsd/sys/netinet/sctp_constants.h +++ b/freebsd/sys/netinet/sctp_constants.h @@ -1,15 +1,17 @@ /*- * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -28,13 +30,11 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp_constants.h,v 1.17 2005/03/06 16:04:17 itojun Exp $ */ - #include __FBSDID("$FreeBSD$"); -#ifndef __sctp_constants_h__ -#define __sctp_constants_h__ +#ifndef _NETINET_SCTP_CONSTANTS_H_ +#define _NETINET_SCTP_CONSTANTS_H_ /* IANA assigned port number for SCTP over UDP encapsulation */ /* For freebsd we cannot bind the port at @@ -91,6 +91,8 @@ __FBSDID("$FreeBSD$"); #define SCTP_KTRHEAD_NAME "sctp_iterator" #define SCTP_KTHREAD_PAGES 0 +#define SCTP_MCORE_NAME "sctp_core_worker" + /* If you support Multi-VRF how big to * make the initial array of VRF's to. @@ -322,7 +324,6 @@ __FBSDID("$FreeBSD$"); #define SCTP_VERSION_NUMBER 0x3 #define MAX_TSN 0xffffffff -#define MAX_SEQ 0xffff /* how many executions every N tick's */ #define SCTP_ITERATOR_MAX_AT_ONCE 20 @@ -344,8 +345,18 @@ __FBSDID("$FreeBSD$"); */ #define SCTP_NO_FR_UNLESS_SEGMENT_SMALLER 1 -/* default max I can burst out after a fast retransmit */ +/* default max I can burst out after a fast retransmit, 0 disables it */ #define SCTP_DEF_MAX_BURST 4 +#define SCTP_DEF_HBMAX_BURST 4 +#define SCTP_DEF_FRMAX_BURST 4 + +/* RTO calculation flag to say if it + * is safe to determine local lan or not. + */ +#define SCTP_RTT_FROM_NON_DATA 0 +#define SCTP_RTT_FROM_DATA 1 + + /* IP hdr (20/40) + 12+2+2 (enet) + sctp common 12 */ #define SCTP_FIRST_MBUF_RESV 68 /* Packet transmit states in the sent field */ @@ -359,6 +370,7 @@ __FBSDID("$FreeBSD$"); #define SCTP_DATAGRAM_ACKED 10010 #define SCTP_DATAGRAM_MARKED 20010 #define SCTP_FORWARD_TSN_SKIP 30010 +#define SCTP_DATAGRAM_NR_ACKED 40010 /* chunk output send from locations */ #define SCTP_OUTPUT_FROM_USR_SEND 0 @@ -378,10 +390,12 @@ __FBSDID("$FreeBSD$"); #define SCTP_OUTPUT_FROM_COOKIE_ACK 14 #define SCTP_OUTPUT_FROM_DRAIN 15 #define SCTP_OUTPUT_FROM_CLOSING 16 +#define SCTP_OUTPUT_FROM_SOCKOPT 17 + /* SCTP chunk types are moved sctp.h for application (NAT, FW) use */ /* align to 32-bit sizes */ -#define SCTP_SIZE32(x) ((((x)+3) >> 2) << 2) +#define SCTP_SIZE32(x) ((((x) + 3) >> 2) << 2) #define IS_SCTP_CONTROL(a) ((a)->chunk_type != SCTP_DATA) #define IS_SCTP_DATA(a) ((a)->chunk_type == SCTP_DATA) @@ -403,7 +417,8 @@ __FBSDID("$FreeBSD$"); #define SCTP_STR_RESET_IN_REQUEST 0x000e #define SCTP_STR_RESET_TSN_REQUEST 0x000f #define SCTP_STR_RESET_RESPONSE 0x0010 -#define SCTP_STR_RESET_ADD_STREAMS 0x0011 +#define SCTP_STR_RESET_ADD_OUT_STREAMS 0x0011 +#define SCTP_STR_RESET_ADD_IN_STREAMS 0x0012 #define SCTP_MAX_RESET_PARAMS 2 #define SCTP_STREAM_RESET_TSN_DELTA 0x1000 @@ -412,8 +427,7 @@ __FBSDID("$FreeBSD$"); /*************0x8000 series*************/ #define SCTP_ECN_CAPABLE 0x8000 -/* ECN Nonce: draft-ladha-sctp-ecn-nonce */ -#define SCTP_ECN_NONCE_SUPPORTED 0x8001 + /* draft-ietf-tsvwg-auth-xxx */ #define SCTP_RANDOM 0x8002 #define SCTP_CHUNK_LIST 0x8003 @@ -445,18 +459,6 @@ __FBSDID("$FreeBSD$"); #define SCTP_HAS_NAT_SUPPORT 0xc007 #define SCTP_NAT_VTAGS 0xc008 -/* Notification error codes */ -#define SCTP_NOTIFY_DATAGRAM_UNSENT 0x0001 -#define SCTP_NOTIFY_DATAGRAM_SENT 0x0002 -#define SCTP_FAILED_THRESHOLD 0x0004 -#define SCTP_HEARTBEAT_SUCCESS 0x0008 -#define SCTP_RESPONSE_TO_USER_REQ 0x0010 -#define SCTP_INTERNAL_ERROR 0x0020 -#define SCTP_SHUTDOWN_GUARD_EXPIRES 0x0040 -#define SCTP_RECEIVED_SACK 0x0080 -#define SCTP_PEER_FAULTY 0x0100 -#define SCTP_ICMP_REFUSED 0x0200 - /* bits for TOS field */ #define SCTP_ECT0_BIT 0x02 #define SCTP_ECT1_BIT 0x01 @@ -496,19 +498,15 @@ __FBSDID("$FreeBSD$"); /* SCTP reachability state for each address */ #define SCTP_ADDR_REACHABLE 0x001 -#define SCTP_ADDR_NOT_REACHABLE 0x002 +#define SCTP_ADDR_NO_PMTUD 0x002 #define SCTP_ADDR_NOHB 0x004 #define SCTP_ADDR_BEING_DELETED 0x008 #define SCTP_ADDR_NOT_IN_ASSOC 0x010 -#define SCTP_ADDR_WAS_PRIMARY 0x020 -#define SCTP_ADDR_SWITCH_PRIMARY 0x040 #define SCTP_ADDR_OUT_OF_SCOPE 0x080 -#define SCTP_ADDR_DOUBLE_SWITCH 0x100 #define SCTP_ADDR_UNCONFIRMED 0x200 #define SCTP_ADDR_REQ_PRIMARY 0x400 /* JRS 5/13/07 - Added potentially failed state for CMT PF */ -#define SCTP_ADDR_PF 0x800 -#define SCTP_REACHABLE_MASK 0x203 +#define SCTP_ADDR_PF 0x800 /* bound address types (e.g. valid address types to allow) */ #define SCTP_BOUND_V6 0x01 @@ -568,14 +566,13 @@ __FBSDID("$FreeBSD$"); #define SCTP_TIMER_TYPE_EVENTWAKE 13 #define SCTP_TIMER_TYPE_STRRESET 14 #define SCTP_TIMER_TYPE_INPKILL 15 -#define SCTP_TIMER_TYPE_EARLYFR 17 -#define SCTP_TIMER_TYPE_ASOCKILL 18 -#define SCTP_TIMER_TYPE_ADDR_WQ 19 -#define SCTP_TIMER_TYPE_ZERO_COPY 20 -#define SCTP_TIMER_TYPE_ZCOPY_SENDQ 21 -#define SCTP_TIMER_TYPE_PRIM_DELETED 22 +#define SCTP_TIMER_TYPE_ASOCKILL 16 +#define SCTP_TIMER_TYPE_ADDR_WQ 17 +#define SCTP_TIMER_TYPE_ZERO_COPY 18 +#define SCTP_TIMER_TYPE_ZCOPY_SENDQ 19 +#define SCTP_TIMER_TYPE_PRIM_DELETED 20 /* add new timers here - and increment LAST */ -#define SCTP_TIMER_TYPE_LAST 23 +#define SCTP_TIMER_TYPE_LAST 21 #define SCTP_IS_TIMER_TYPE_VALID(t) (((t) > SCTP_TIMER_TYPE_NONE) && \ ((t) < SCTP_TIMER_TYPE_LAST)) @@ -644,17 +641,17 @@ __FBSDID("$FreeBSD$"); #define SCTP_DEFAULT_SECRET_LIFE_SEC 3600 #define SCTP_RTO_UPPER_BOUND (60000) /* 60 sec in ms */ -#define SCTP_RTO_UPPER_BOUND_SEC 60 /* for the init timer */ -#define SCTP_RTO_LOWER_BOUND (1000) /* 1 sec in ms */ +#define SCTP_RTO_LOWER_BOUND (1000) /* 1 sec is ms */ #define SCTP_RTO_INITIAL (3000) /* 3 sec in ms */ #define SCTP_INP_KILL_TIMEOUT 20/* number of ms to retry kill of inpcb */ #define SCTP_ASOC_KILL_TIMEOUT 10 /* number of ms to retry kill of inpcb */ -#define SCTP_DEF_MAX_INIT 8 -#define SCTP_DEF_MAX_SEND 10 -#define SCTP_DEF_MAX_PATH_RTX 5 +#define SCTP_DEF_MAX_INIT 8 +#define SCTP_DEF_MAX_SEND 10 +#define SCTP_DEF_MAX_PATH_RTX 5 +#define SCTP_DEF_PATH_PF_THRESHOLD SCTP_DEF_MAX_PATH_RTX #define SCTP_DEF_PMTU_RAISE_SEC 600 /* 10 min between raise attempts */ @@ -669,7 +666,7 @@ __FBSDID("$FreeBSD$"); /* Send window update (incr * this > hiwat). Should be a power of 2 */ #define SCTP_MINIMAL_RWND (4096) /* minimal rwnd */ -#define SCTP_ADDRMAX 24 +#define SCTP_ADDRMAX 16 /* SCTP DEBUG Switch parameters */ #define SCTP_DEBUG_TIMER1 0x00000001 @@ -745,35 +742,29 @@ __FBSDID("$FreeBSD$"); #define SCTP_NOTIFY_ASSOC_DOWN 2 #define SCTP_NOTIFY_INTERFACE_DOWN 3 #define SCTP_NOTIFY_INTERFACE_UP 4 -#define SCTP_NOTIFY_DG_FAIL 5 -#define SCTP_NOTIFY_STRDATA_ERR 6 -#define SCTP_NOTIFY_ASSOC_ABORTED 7 -#define SCTP_NOTIFY_PEER_OPENED_STREAM 8 -#define SCTP_NOTIFY_STREAM_OPENED_OK 9 +#define SCTP_NOTIFY_SENT_DG_FAIL 5 +#define SCTP_NOTIFY_UNSENT_DG_FAIL 6 +#define SCTP_NOTIFY_SPECIAL_SP_FAIL 7 +#define SCTP_NOTIFY_ASSOC_LOC_ABORTED 8 +#define SCTP_NOTIFY_ASSOC_REM_ABORTED 9 #define SCTP_NOTIFY_ASSOC_RESTART 10 -#define SCTP_NOTIFY_HB_RESP 11 -#define SCTP_NOTIFY_ASCONF_SUCCESS 12 -#define SCTP_NOTIFY_ASCONF_FAILED 13 -#define SCTP_NOTIFY_PEER_SHUTDOWN 14 -#define SCTP_NOTIFY_ASCONF_ADD_IP 15 -#define SCTP_NOTIFY_ASCONF_DELETE_IP 16 -#define SCTP_NOTIFY_ASCONF_SET_PRIMARY 17 -#define SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION 18 -#define SCTP_NOTIFY_INTERFACE_CONFIRMED 20 -#define SCTP_NOTIFY_STR_RESET_RECV 21 -#define SCTP_NOTIFY_STR_RESET_SEND 22 -#define SCTP_NOTIFY_STR_RESET_FAILED_OUT 23 -#define SCTP_NOTIFY_STR_RESET_FAILED_IN 24 -#define SCTP_NOTIFY_AUTH_NEW_KEY 25 -#define SCTP_NOTIFY_AUTH_FREE_KEY 26 -#define SCTP_NOTIFY_SPECIAL_SP_FAIL 27 -#define SCTP_NOTIFY_NO_PEER_AUTH 28 -#define SCTP_NOTIFY_SENDER_DRY 29 -#define SCTP_NOTIFY_STR_RESET_ADD_OK 30 -#define SCTP_NOTIFY_STR_RESET_ADD_FAIL 31 -#define SCTP_NOTIFY_STR_RESET_INSTREAM_ADD_OK 32 -#define SCTP_NOTIFY_MAX 32 - +#define SCTP_NOTIFY_PEER_SHUTDOWN 11 +#define SCTP_NOTIFY_ASCONF_ADD_IP 12 +#define SCTP_NOTIFY_ASCONF_DELETE_IP 13 +#define SCTP_NOTIFY_ASCONF_SET_PRIMARY 14 +#define SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION 15 +#define SCTP_NOTIFY_INTERFACE_CONFIRMED 16 +#define SCTP_NOTIFY_STR_RESET_RECV 17 +#define SCTP_NOTIFY_STR_RESET_SEND 18 +#define SCTP_NOTIFY_STR_RESET_FAILED_OUT 19 +#define SCTP_NOTIFY_STR_RESET_FAILED_IN 20 +#define SCTP_NOTIFY_STR_RESET_DENIED_OUT 21 +#define SCTP_NOTIFY_STR_RESET_DENIED_IN 22 +#define SCTP_NOTIFY_AUTH_NEW_KEY 23 +#define SCTP_NOTIFY_AUTH_FREE_KEY 24 +#define SCTP_NOTIFY_NO_PEER_AUTH 25 +#define SCTP_NOTIFY_SENDER_DRY 26 +#define SCTP_NOTIFY_REMOTE_ERROR 27 /* This is the value for messages that are NOT completely * copied down where we will start to split the message. @@ -906,10 +897,13 @@ __FBSDID("$FreeBSD$"); #define SCTP_MAX_DATA_BUNDLING 256 /* modular comparison */ -/* True if a > b (mod = M) */ -#define compare_with_wrap(a, b, M) (((a > b) && ((a - b) < ((M >> 1) + 1))) || \ - ((b > a) && ((b - a) > ((M >> 1) + 1)))) - +/* See RFC 1982 for details. */ +#define SCTP_SSN_GT(a, b) (((a < b) && ((uint16_t)(b - a) > (1U<<15))) || \ + ((a > b) && ((uint16_t)(a - b) < (1U<<15)))) +#define SCTP_SSN_GE(a, b) (SCTP_SSN_GT(a, b) || (a == b)) +#define SCTP_TSN_GT(a, b) (((a < b) && ((uint32_t)(b - a) > (1U<<31))) || \ + ((a > b) && ((uint32_t)(a - b) < (1U<<31)))) +#define SCTP_TSN_GE(a, b) (SCTP_TSN_GT(a, b) || (a == b)) /* Mapping array manipulation routines */ #define SCTP_IS_TSN_PRESENT(arry, gap) ((arry[(gap >> 3)] >> (gap & 0x07)) & 0x01) @@ -921,7 +915,7 @@ __FBSDID("$FreeBSD$"); } else { \ gap = (MAX_TSN - mapping_tsn) + tsn + 1; \ } \ - } while(0) + } while (0) #define SCTP_RETRAN_DONE -1 @@ -948,6 +942,17 @@ __FBSDID("$FreeBSD$"); */ #define SCTP_TIME_WAIT 60 +/* How many micro seconds is the cutoff from + * local lan type rtt's + */ + /* + * We allow 900us for the rtt. + */ +#define SCTP_LOCAL_LAN_RTT 900 +#define SCTP_LAN_UNKNOWN 0 +#define SCTP_LAN_LOCAL 1 +#define SCTP_LAN_INTERNET 2 + #define SCTP_SEND_BUFFER_SPLITTING 0x00000001 #define SCTP_RECV_BUFFER_SPLITTING 0x00000002 @@ -994,19 +999,9 @@ __FBSDID("$FreeBSD$"); #if defined(_KERNEL) - -#define SCTP_GETTIME_TIMEVAL(x) (getmicrouptime(x)) -#define SCTP_GETPTIME_TIMEVAL(x) (microuptime(x)) +#define SCTP_GETTIME_TIMEVAL(x) (getmicrouptime(x)) +#define SCTP_GETPTIME_TIMEVAL(x) (microuptime(x)) #endif -/*#if defined(__FreeBSD__) || defined(__APPLE__)*/ -/*#define SCTP_GETTIME_TIMEVAL(x) { \*/ -/* (x)->tv_sec = ticks / 1000; \*/ -/* (x)->tv_usec = (ticks % 1000) * 1000; \*/ -/*}*/ - -/*#else*/ -/*#define SCTP_GETTIME_TIMEVAL(x) (microtime(x))*/ -/*#endif __FreeBSD__ */ #if defined(_KERNEL) || defined(__Userspace__) #define sctp_sowwakeup(inp, so) \ diff --git a/freebsd/sys/netinet/sctp_crc32.c b/freebsd/sys/netinet/sctp_crc32.c index 3d07f505..bf6da201 100644 --- a/freebsd/sys/netinet/sctp_crc32.c +++ b/freebsd/sys/netinet/sctp_crc32.c @@ -2,6 +2,8 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,9 +32,6 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp_crc32.c,v 1.12 2005/03/06 16:04:17 itojun Exp $ */ - - #include __FBSDID("$FreeBSD$"); @@ -124,7 +123,9 @@ void sctp_delayed_cksum(struct mbuf *m, uint32_t offset) { #if defined(SCTP_WITH_NO_CSUM) +#ifdef INVARIANTS panic("sctp_delayed_cksum() called when using no SCTP CRC."); +#endif #else uint32_t checksum; @@ -134,7 +135,7 @@ sctp_delayed_cksum(struct mbuf *m, uint32_t offset) offset += offsetof(struct sctphdr, checksum); if (offset + sizeof(uint32_t) > (uint32_t) (m->m_len)) { - printf("sctp_delayed_cksum(): m->len: %d, off: %d.\n", + SCTP_PRINTF("sctp_delayed_cksum(): m->len: %d, off: %d.\n", (uint32_t) m->m_len, offset); /* * XXX this shouldn't happen, but if it does, the correct diff --git a/freebsd/sys/netinet/sctp_crc32.h b/freebsd/sys/netinet/sctp_crc32.h index eb7a1bc7..3f98be41 100644 --- a/freebsd/sys/netinet/sctp_crc32.h +++ b/freebsd/sys/netinet/sctp_crc32.h @@ -1,15 +1,17 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -28,13 +30,11 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp_crc32.h,v 1.5 2004/08/17 04:06:16 itojun Exp $ */ - #include __FBSDID("$FreeBSD$"); -#ifndef __crc32c_h__ -#define __crc32c_h__ +#ifndef _NETINET_SCTP_CRC32_H_ +#define _NETINET_SCTP_CRC32_H_ #if defined(_KERNEL) #if !defined(SCTP_WITH_NO_CSUM) diff --git a/freebsd/sys/netinet/sctp_dtrace_declare.h b/freebsd/sys/netinet/sctp_dtrace_declare.h new file mode 100644 index 00000000..f6fe48bd --- /dev/null +++ b/freebsd/sys/netinet/sctp_dtrace_declare.h @@ -0,0 +1,80 @@ +/*- + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * a) Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * b) Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * c) Neither the name of Cisco Systems, Inc. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#ifndef _NETINET_SCTP_DTRACE_DECLARE_H_ +#define _NETINET_SCTP_DTRACE_DECLARE_H_ + +#include +#include +#include + +/* Declare the SCTP provider */ +SDT_PROVIDER_DECLARE(sctp); + +/* The probes we have so far: */ + +/* One to track a net's cwnd */ +/* initial */ +SDT_PROBE_DECLARE(sctp, cwnd, net, init); +/* update at a ack -- increase */ +SDT_PROBE_DECLARE(sctp, cwnd, net, ack); +/* update at a fast retransmit -- decrease */ +SDT_PROBE_DECLARE(sctp, cwnd, net, fr); +/* update at a time-out -- decrease */ +SDT_PROBE_DECLARE(sctp, cwnd, net, to); +/* update at a burst-limit -- decrease */ +SDT_PROBE_DECLARE(sctp, cwnd, net, bl); +/* update at a ECN -- decrease */ +SDT_PROBE_DECLARE(sctp, cwnd, net, ecn); +/* update at a Packet-Drop -- decrease */ +SDT_PROBE_DECLARE(sctp, cwnd, net, pd); +/* Rttvar probe declaration */ +SDT_PROBE_DECLARE(sctp, cwnd, net, rttvar); +SDT_PROBE_DECLARE(sctp, cwnd, net, rttstep); + +/* One to track an associations rwnd */ +SDT_PROBE_DECLARE(sctp, rwnd, assoc, val); + +/* One to track a net's flight size */ +SDT_PROBE_DECLARE(sctp, flightsize, net, val); + +/* One to track an associations flight size */ +SDT_PROBE_DECLARE(sctp, flightsize, assoc, val); + + + + + + +#endif diff --git a/freebsd/sys/netinet/sctp_dtrace_define.h b/freebsd/sys/netinet/sctp_dtrace_define.h new file mode 100644 index 00000000..1eb28f65 --- /dev/null +++ b/freebsd/sys/netinet/sctp_dtrace_define.h @@ -0,0 +1,233 @@ +/*- + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * a) Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * b) Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * c) Neither the name of Cisco Systems, Inc. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#ifndef _NETINET_SCTP_DTRACE_DEFINE_H_ +#define _NETINET_SCTP_DTRACE_DEFINE_H_ + +#include +#include +#include + +SDT_PROVIDER_DEFINE(sctp); + +/********************************************************/ +/* Cwnd probe - tracks changes in the congestion window on a netp */ +/********************************************************/ +/* Initial */ +SDT_PROBE_DEFINE(sctp, cwnd, net, init, init); +/* The Vtag for this end */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 0, "uint32_t"); +/* The port number of the local side << 16 | port number of remote + * in network byte order. + */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 1, "uint32_t"); +/* The pointer to the struct sctp_nets * changing */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 2, "uintptr_t"); +/* The old value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 3, "int"); +/* The new value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 4, "int"); + + +/* ACK-INCREASE */ +SDT_PROBE_DEFINE(sctp, cwnd, net, ack, ack); +/* The Vtag for this end */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 0, "uint32_t"); +/* The port number of the local side << 16 | port number of remote + * in network byte order. + */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 1, "uint32_t"); +/* The pointer to the struct sctp_nets * changing */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 2, "uintptr_t"); +/* The old value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 3, "int"); +/* The new value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 4, "int"); + + +/* ACK-INCREASE */ +SDT_PROBE_DEFINE(sctp, cwnd, net, rttvar, rttvar); +/* The Vtag << 32 | localport << 16 | remoteport */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttvar, 0, "uint64_t"); +/* obw | nbw */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttvar, 1, "uint64_t"); +/* bwrtt | newrtt */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttvar, 2, "uint64_t"); +/* flight */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttvar, 3, "uint64_t"); +/* (cwnd << 32) | point << 16 | retval(0/1) */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttvar, 4, "uint64_t"); + + +SDT_PROBE_DEFINE(sctp, cwnd, net, rttstep, rttstep); +/* The Vtag << 32 | localport << 16 | remoteport */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttstep, 0, "uint64_t"); +/* obw | nbw */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttstep, 1, "uint64_t"); +/* bwrtt | nrtt */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttstep, 2, "uint64_t"); +/* cwnd_saved | stepcnt << 16 | oldstep */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttstep, 3, "uint64_t"); +/* (cwnd << 32) | point << 16 | retval(0/1) */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttstep, 4, "uint64_t"); + + +/* FastRetransmit-DECREASE */ +SDT_PROBE_DEFINE(sctp, cwnd, net, fr, fr); +/* The Vtag for this end */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 0, "uint32_t"); +/* The port number of the local side << 16 | port number of remote + * in network byte order. + */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 1, "uint32_t"); +/* The pointer to the struct sctp_nets * changing */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 2, "uintptr_t"); +/* The old value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 3, "int"); +/* The new value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 4, "int"); + + +/* TimeOut-DECREASE */ +SDT_PROBE_DEFINE(sctp, cwnd, net, to, to); +/* The Vtag for this end */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 0, "uint32_t"); +/* The port number of the local side << 16 | port number of remote + * in network byte order. + */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 1, "uint32_t"); +/* The pointer to the struct sctp_nets * changing */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 2, "uintptr_t"); +/* The old value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 3, "int"); +/* The new value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 4, "int"); + + +/* BurstLimit-DECREASE */ +SDT_PROBE_DEFINE(sctp, cwnd, net, bl, bl); +/* The Vtag for this end */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 0, "uint32_t"); +/* The port number of the local side << 16 | port number of remote + * in network byte order. + */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 1, "uint32_t"); +/* The pointer to the struct sctp_nets * changing */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 2, "uintptr_t"); +/* The old value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 3, "int"); +/* The new value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 4, "int"); + + +/* ECN-DECREASE */ +SDT_PROBE_DEFINE(sctp, cwnd, net, ecn, ecn); +/* The Vtag for this end */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 0, "uint32_t"); +/* The port number of the local side << 16 | port number of remote + * in network byte order. + */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 1, "uint32_t"); +/* The pointer to the struct sctp_nets * changing */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 2, "uintptr_t"); +/* The old value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 3, "int"); +/* The new value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 4, "int"); + + +/* PacketDrop-DECREASE */ +SDT_PROBE_DEFINE(sctp, cwnd, net, pd, pd); +/* The Vtag for this end */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 0, "uint32_t"); +/* The port number of the local side << 16 | port number of remote + * in network byte order. + */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 1, "uint32_t"); +/* The pointer to the struct sctp_nets * changing */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 2, "uintptr_t"); +/* The old value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 3, "int"); +/* The new value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 4, "int"); + + + +/********************************************************/ +/* Rwnd probe - tracks changes in the receiver window for an assoc */ +/********************************************************/ +SDT_PROBE_DEFINE(sctp, rwnd, assoc, val, val); +/* The Vtag for this end */ +SDT_PROBE_ARGTYPE(sctp, rwnd, assoc, val, 0, "uint32_t"); +/* The port number of the local side << 16 | port number of remote + * in network byte order. + */ +SDT_PROBE_ARGTYPE(sctp, rwnd, assoc, val, 1, "uint32_t"); +/* The up/down amount */ +SDT_PROBE_ARGTYPE(sctp, rwnd, assoc, val, 2, "int"); +/* The new value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, rwnd, assoc, val, 3, "int"); + +/********************************************************/ +/* flight probe - tracks changes in the flight size on a net or assoc */ +/********************************************************/ +SDT_PROBE_DEFINE(sctp, flightsize, net, val, val); +/* The Vtag for this end */ +SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 0, "uint32_t"); +/* The port number of the local side << 16 | port number of remote + * in network byte order. + */ +SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 1, "uint32_t"); +/* The pointer to the struct sctp_nets * changing */ +SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 2, "uintptr_t"); +/* The up/down amount */ +SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 3, "int"); +/* The new value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 4, "int"); +/********************************************************/ +/* The total flight version */ +/********************************************************/ +SDT_PROBE_DEFINE(sctp, flightsize, assoc, val, val); +/* The Vtag for this end */ +SDT_PROBE_ARGTYPE(sctp, flightsize, assoc, val, 0, "uint32_t"); +/* The port number of the local side << 16 | port number of remote + * in network byte order. + */ +SDT_PROBE_ARGTYPE(sctp, flightsize, assoc, val, 1, "uint32_t"); +/* The up/down amount */ +SDT_PROBE_ARGTYPE(sctp, flightsize, assoc, val, 2, "int"); +/* The new value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, flightsize, assoc, val, 3, "int"); + +#endif diff --git a/freebsd/sys/netinet/sctp_header.h b/freebsd/sys/netinet/sctp_header.h index 980845dc..261d9eb6 100644 --- a/freebsd/sys/netinet/sctp_header.h +++ b/freebsd/sys/netinet/sctp_header.h @@ -1,15 +1,17 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -28,13 +30,11 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp_header.h,v 1.14 2005/03/06 16:04:17 itojun Exp $ */ - #include __FBSDID("$FreeBSD$"); -#ifndef __sctp_header_h__ -#define __sctp_header_h__ +#ifndef _NETINET_SCTP_HEADER_H_ +#define _NETINET_SCTP_HEADER_H_ #include #include @@ -79,8 +79,7 @@ struct sctp_host_name_param { /* supported address type */ struct sctp_supported_addr_param { struct sctp_paramhdr ph;/* type=SCTP_SUPPORTED_ADDRTYPE */ - uint16_t addr_type[SCTP_ARRAY_MIN_LEN]; /* array of supported address - * types */ + uint16_t addr_type[2]; /* array of supported address types */ } SCTP_PACKED; /* ECN parameter */ @@ -96,9 +95,10 @@ struct sctp_heartbeat_info_param { uint32_t time_value_2; uint32_t random_value1; uint32_t random_value2; - uint16_t user_req; uint8_t addr_family; uint8_t addr_len; + /* make sure that this structure is 4 byte aligned */ + uint8_t padding[2]; char address[SCTP_ADDRMAX]; } SCTP_PACKED; @@ -142,12 +142,6 @@ struct sctp_supported_chunk_types_param { } SCTP_PACKED; -/* ECN Nonce: draft-ladha-sctp-ecn-nonce */ -struct sctp_ecn_nonce_supported_param { - struct sctp_paramhdr ph;/* type = 0x8001 len = 4 */ -} SCTP_PACKED; - - /* * Structures for DATA chunks */ @@ -360,9 +354,15 @@ struct sctp_cookie_ack_chunk { } SCTP_PACKED; /* Explicit Congestion Notification Echo (ECNE) */ +struct old_sctp_ecne_chunk { + struct sctp_chunkhdr ch; + uint32_t tsn; +} SCTP_PACKED; + struct sctp_ecne_chunk { struct sctp_chunkhdr ch; uint32_t tsn; + uint32_t num_pkts_since_cwr; } SCTP_PACKED; /* Congestion Window Reduced (CWR) */ @@ -497,28 +497,19 @@ struct sctp_stream_reset_add_strm { uint16_t reserved; } SCTP_PACKED; -#define SCTP_STREAM_RESET_NOTHING 0x00000000 /* Nothing for me to do */ -#define SCTP_STREAM_RESET_PERFORMED 0x00000001 /* Did it */ -#define SCTP_STREAM_RESET_DENIED 0x00000002 /* refused to do it */ -#define SCTP_STREAM_RESET_ERROR_STR 0x00000003 /* bad Stream no */ -#define SCTP_STREAM_RESET_TRY_LATER 0x00000004 /* collision, try again */ -#define SCTP_STREAM_RESET_BAD_SEQNO 0x00000005 /* bad str-reset seq no */ +#define SCTP_STREAM_RESET_RESULT_NOTHING_TO_DO 0x00000000 /* XXX: unused */ +#define SCTP_STREAM_RESET_RESULT_PERFORMED 0x00000001 +#define SCTP_STREAM_RESET_RESULT_DENIED 0x00000002 +#define SCTP_STREAM_RESET_RESULT_ERR__WRONG_SSN 0x00000003 /* XXX: unused */ +#define SCTP_STREAM_RESET_RESULT_ERR_IN_PROGRESS 0x00000004 +#define SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO 0x00000005 +#define SCTP_STREAM_RESET_RESULT_IN_PROGRESS 0x00000006 /* XXX: unused */ /* * convience structures, note that if you are making a request for specific * streams then the request will need to be an overlay structure. */ -struct sctp_stream_reset_out_req { - struct sctp_chunkhdr ch; - struct sctp_stream_reset_out_request sr_req; -} SCTP_PACKED; - -struct sctp_stream_reset_in_req { - struct sctp_chunkhdr ch; - struct sctp_stream_reset_in_request sr_req; -} SCTP_PACKED; - struct sctp_stream_reset_tsn_req { struct sctp_chunkhdr ch; struct sctp_stream_reset_tsn_request sr_req; diff --git a/freebsd/sys/netinet/sctp_indata.c b/freebsd/sys/netinet/sctp_indata.c index 082e43b8..c82e570f 100644 --- a/freebsd/sys/netinet/sctp_indata.c +++ b/freebsd/sys/netinet/sctp_indata.c @@ -2,16 +2,18 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -30,8 +32,6 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp_indata.c,v 1.36 2005/03/06 16:04:17 itojun Exp $ */ - #include __FBSDID("$FreeBSD$"); @@ -137,7 +137,7 @@ sctp_build_readq_entry(struct sctp_tcb *stcb, read_queue_e->sinfo_ssn = stream_seq; read_queue_e->sinfo_flags = (flags << 8); read_queue_e->sinfo_ppid = ppid; - read_queue_e->sinfo_context = stcb->asoc.context; + read_queue_e->sinfo_context = context; read_queue_e->sinfo_timetolive = 0; read_queue_e->sinfo_tsn = tsn; read_queue_e->sinfo_cumtsn = tsn; @@ -201,95 +201,114 @@ failed_build: struct mbuf * -sctp_build_ctl_nchunk(struct sctp_inpcb *inp, - struct sctp_sndrcvinfo *sinfo) +sctp_build_ctl_nchunk(struct sctp_inpcb *inp, struct sctp_sndrcvinfo *sinfo) { + struct sctp_extrcvinfo *seinfo; struct sctp_sndrcvinfo *outinfo; + struct sctp_rcvinfo *rcvinfo; + struct sctp_nxtinfo *nxtinfo; struct cmsghdr *cmh; struct mbuf *ret; int len; - int use_extended = 0; + int use_extended; + int provide_nxt; - if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) { - /* user does not want the sndrcv ctl */ + if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT) && + sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVRCVINFO) && + sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVNXTINFO)) { + /* user does not want any ancillary data */ return (NULL); } - if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO)) { - use_extended = 1; - len = CMSG_LEN(sizeof(struct sctp_extrcvinfo)); + len = 0; + if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVRCVINFO)) { + len += CMSG_SPACE(sizeof(struct sctp_rcvinfo)); + } + seinfo = (struct sctp_extrcvinfo *)sinfo; + if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO) && + (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_AVAIL)) { + provide_nxt = 1; + len += CMSG_SPACE(sizeof(struct sctp_rcvinfo)); } else { - len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); + provide_nxt = 0; + } + if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) { + if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO)) { + use_extended = 1; + len += CMSG_SPACE(sizeof(struct sctp_extrcvinfo)); + } else { + use_extended = 0; + len += CMSG_SPACE(sizeof(struct sctp_sndrcvinfo)); + } + } else { + use_extended = 0; } - - ret = sctp_get_mbuf_for_msg(len, - 0, M_DONTWAIT, 1, MT_DATA); - + ret = sctp_get_mbuf_for_msg(len, 0, M_DONTWAIT, 1, MT_DATA); if (ret == NULL) { /* No space */ return (ret); } - /* We need a CMSG header followed by the struct */ + SCTP_BUF_LEN(ret) = 0; + + /* We need a CMSG header followed by the struct */ cmh = mtod(ret, struct cmsghdr *); - outinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmh); - cmh->cmsg_level = IPPROTO_SCTP; - if (use_extended) { - cmh->cmsg_type = SCTP_EXTRCV; - cmh->cmsg_len = len; - memcpy(outinfo, sinfo, len); - } else { - cmh->cmsg_type = SCTP_SNDRCV; - cmh->cmsg_len = len; - *outinfo = *sinfo; + if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVRCVINFO)) { + cmh->cmsg_level = IPPROTO_SCTP; + cmh->cmsg_len = CMSG_LEN(sizeof(struct sctp_rcvinfo)); + cmh->cmsg_type = SCTP_RCVINFO; + rcvinfo = (struct sctp_rcvinfo *)CMSG_DATA(cmh); + rcvinfo->rcv_sid = sinfo->sinfo_stream; + rcvinfo->rcv_ssn = sinfo->sinfo_ssn; + rcvinfo->rcv_flags = sinfo->sinfo_flags; + rcvinfo->rcv_ppid = sinfo->sinfo_ppid; + rcvinfo->rcv_tsn = sinfo->sinfo_tsn; + rcvinfo->rcv_cumtsn = sinfo->sinfo_cumtsn; + rcvinfo->rcv_context = sinfo->sinfo_context; + rcvinfo->rcv_assoc_id = sinfo->sinfo_assoc_id; + cmh = (struct cmsghdr *)((caddr_t)cmh + CMSG_SPACE(sizeof(struct sctp_rcvinfo))); + SCTP_BUF_LEN(ret) += CMSG_SPACE(sizeof(struct sctp_rcvinfo)); + } + if (provide_nxt) { + cmh->cmsg_level = IPPROTO_SCTP; + cmh->cmsg_len = CMSG_LEN(sizeof(struct sctp_nxtinfo)); + cmh->cmsg_type = SCTP_NXTINFO; + nxtinfo = (struct sctp_nxtinfo *)CMSG_DATA(cmh); + nxtinfo->nxt_sid = seinfo->sreinfo_next_stream; + nxtinfo->nxt_flags = 0; + if (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_IS_UNORDERED) { + nxtinfo->nxt_flags |= SCTP_UNORDERED; + } + if (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_IS_NOTIFICATION) { + nxtinfo->nxt_flags |= SCTP_NOTIFICATION; + } + if (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_ISCOMPLETE) { + nxtinfo->nxt_flags |= SCTP_COMPLETE; + } + nxtinfo->nxt_ppid = seinfo->sreinfo_next_ppid; + nxtinfo->nxt_length = seinfo->sreinfo_next_length; + nxtinfo->nxt_assoc_id = seinfo->sreinfo_next_aid; + cmh = (struct cmsghdr *)((caddr_t)cmh + CMSG_SPACE(sizeof(struct sctp_nxtinfo))); + SCTP_BUF_LEN(ret) += CMSG_SPACE(sizeof(struct sctp_nxtinfo)); + } + if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) { + cmh->cmsg_level = IPPROTO_SCTP; + outinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmh); + if (use_extended) { + cmh->cmsg_len = CMSG_LEN(sizeof(struct sctp_extrcvinfo)); + cmh->cmsg_type = SCTP_EXTRCV; + memcpy(outinfo, sinfo, sizeof(struct sctp_extrcvinfo)); + SCTP_BUF_LEN(ret) += CMSG_SPACE(sizeof(struct sctp_extrcvinfo)); + } else { + cmh->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); + cmh->cmsg_type = SCTP_SNDRCV; + *outinfo = *sinfo; + SCTP_BUF_LEN(ret) += CMSG_SPACE(sizeof(struct sctp_sndrcvinfo)); + } } - SCTP_BUF_LEN(ret) = cmh->cmsg_len; return (ret); } -char * -sctp_build_ctl_cchunk(struct sctp_inpcb *inp, - int *control_len, - struct sctp_sndrcvinfo *sinfo) -{ - struct sctp_sndrcvinfo *outinfo; - struct cmsghdr *cmh; - char *buf; - int len; - int use_extended = 0; - - if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) { - /* user does not want the sndrcv ctl */ - return (NULL); - } - if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO)) { - use_extended = 1; - len = CMSG_LEN(sizeof(struct sctp_extrcvinfo)); - } else { - len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); - } - SCTP_MALLOC(buf, char *, len, SCTP_M_CMSG); - if (buf == NULL) { - /* No space */ - return (buf); - } - /* We need a CMSG header followed by the struct */ - cmh = (struct cmsghdr *)buf; - outinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmh); - cmh->cmsg_level = IPPROTO_SCTP; - if (use_extended) { - cmh->cmsg_type = SCTP_EXTRCV; - cmh->cmsg_len = len; - memcpy(outinfo, sinfo, len); - } else { - cmh->cmsg_type = SCTP_SNDRCV; - cmh->cmsg_len = len; - *outinfo = *sinfo; - } - *control_len = len; - return (buf); -} - static void sctp_mark_non_revokable(struct sctp_association *asoc, uint32_t tsn) { @@ -300,7 +319,7 @@ sctp_mark_non_revokable(struct sctp_association *asoc, uint32_t tsn) return; } cumackp1 = asoc->cumulative_tsn + 1; - if (compare_with_wrap(cumackp1, tsn, MAX_TSN)) { + if (SCTP_TSN_GT(cumackp1, tsn)) { /* * this tsn is behind the cum ack and thus we don't need to * worry about it being moved from one to the other. @@ -309,7 +328,7 @@ sctp_mark_non_revokable(struct sctp_association *asoc, uint32_t tsn) } SCTP_CALC_TSN_TO_GAP(gap, tsn, asoc->mapping_array_base_tsn); if (!SCTP_IS_TSN_PRESENT(asoc->mapping_array, gap)) { - printf("gap:%x tsn:%x\n", gap, tsn); + SCTP_PRINTF("gap:%x tsn:%x\n", gap, tsn); sctp_print_mapping_array(asoc); #ifdef INVARIANTS panic("Things are really messed up now!!"); @@ -317,13 +336,12 @@ sctp_mark_non_revokable(struct sctp_association *asoc, uint32_t tsn) } SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, gap); SCTP_UNSET_TSN_PRESENT(asoc->mapping_array, gap); - if (compare_with_wrap(tsn, asoc->highest_tsn_inside_nr_map, MAX_TSN)) { + if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_nr_map)) { asoc->highest_tsn_inside_nr_map = tsn; } if (tsn == asoc->highest_tsn_inside_map) { /* We must back down to see what the new highest is */ - for (i = tsn - 1; (compare_with_wrap(i, asoc->mapping_array_base_tsn, MAX_TSN) || - (i == asoc->mapping_array_base_tsn)); i--) { + for (i = tsn - 1; SCTP_TSN_GE(i, asoc->mapping_array_base_tsn); i--) { SCTP_CALC_TSN_TO_GAP(gap, i, asoc->mapping_array_base_tsn); if (SCTP_IS_TSN_PRESENT(asoc->mapping_array, gap)) { asoc->highest_tsn_inside_map = i; @@ -346,13 +364,12 @@ sctp_mark_non_revokable(struct sctp_association *asoc, uint32_t tsn) static void sctp_service_reassembly(struct sctp_tcb *stcb, struct sctp_association *asoc) { - struct sctp_tmit_chunk *chk; + struct sctp_tmit_chunk *chk, *nchk; uint16_t nxt_todel; uint16_t stream_no; int end = 0; int cntDel; - - struct sctp_queued_to_read *control, *ctl, *ctlat; + struct sctp_queued_to_read *control, *ctl, *nctl; if (stcb == NULL) return; @@ -364,8 +381,7 @@ sctp_service_reassembly(struct sctp_tcb *stcb, struct sctp_association *asoc) /* socket above is long gone or going.. */ abandon: asoc->fragmented_delivery_inprogress = 0; - chk = TAILQ_FIRST(&asoc->reasmqueue); - while (chk) { + TAILQ_FOREACH_SAFE(chk, &asoc->reasmqueue, sctp_next, nchk) { TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next); asoc->size_on_reasm_queue -= chk->send_size; sctp_ucount_decr(asoc->cnt_on_reasm_queue); @@ -378,18 +394,13 @@ abandon: chk->data = NULL; } /* Now free the address and data */ - sctp_free_a_chunk(stcb, chk); + sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); /* sa_ignore FREED_MEMORY */ - chk = TAILQ_FIRST(&asoc->reasmqueue); } return; } SCTP_TCB_LOCK_ASSERT(stcb); - do { - chk = TAILQ_FIRST(&asoc->reasmqueue); - if (chk == NULL) { - return; - } + TAILQ_FOREACH_SAFE(chk, &asoc->reasmqueue, sctp_next, nchk) { if (chk->rec.data.TSN_seq != (asoc->tsn_last_delivered + 1)) { /* Can't deliver more :< */ return; @@ -487,7 +498,7 @@ abandon: sctp_ucount_decr(asoc->cnt_on_reasm_queue); /* free up the chk */ chk->data = NULL; - sctp_free_a_chunk(stcb, chk); + sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); if (asoc->fragmented_delivery_inprogress == 0) { /* @@ -498,33 +509,26 @@ abandon: strm = &asoc->strmin[stream_no]; nxt_todel = strm->last_sequence_delivered + 1; - ctl = TAILQ_FIRST(&strm->inqueue); - if (ctl && (nxt_todel == ctl->sinfo_ssn)) { - while (ctl != NULL) { - /* Deliver more if we can. */ - if (nxt_todel == ctl->sinfo_ssn) { - ctlat = TAILQ_NEXT(ctl, next); - TAILQ_REMOVE(&strm->inqueue, ctl, next); - asoc->size_on_all_streams -= ctl->length; - sctp_ucount_decr(asoc->cnt_on_all_streams); - strm->last_sequence_delivered++; - sctp_mark_non_revokable(asoc, ctl->sinfo_tsn); - sctp_add_to_readq(stcb->sctp_ep, stcb, - ctl, - &stcb->sctp_socket->so_rcv, 1, - SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED); - ctl = ctlat; - } else { - break; - } - nxt_todel = strm->last_sequence_delivered + 1; + TAILQ_FOREACH_SAFE(ctl, &strm->inqueue, next, nctl) { + /* Deliver more if we can. */ + if (nxt_todel == ctl->sinfo_ssn) { + TAILQ_REMOVE(&strm->inqueue, ctl, next); + asoc->size_on_all_streams -= ctl->length; + sctp_ucount_decr(asoc->cnt_on_all_streams); + strm->last_sequence_delivered++; + sctp_mark_non_revokable(asoc, ctl->sinfo_tsn); + sctp_add_to_readq(stcb->sctp_ep, stcb, + ctl, + &stcb->sctp_socket->so_rcv, 1, + SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED); + } else { + break; } + nxt_todel = strm->last_sequence_delivered + 1; } break; } - /* sa_ignore FREED_MEMORY */ - chk = TAILQ_FIRST(&asoc->reasmqueue); - } while (chk); + } } /* @@ -574,9 +578,7 @@ sctp_queue_data_to_stream(struct sctp_tcb *stcb, struct sctp_association *asoc, (uint32_t) control->sinfo_stream, (uint32_t) strm->last_sequence_delivered, (uint32_t) nxt_todel); - if (compare_with_wrap(strm->last_sequence_delivered, - control->sinfo_ssn, MAX_SEQ) || - (strm->last_sequence_delivered == control->sinfo_ssn)) { + if (SCTP_SSN_GE(strm->last_sequence_delivered, control->sinfo_ssn)) { /* The incoming sseq is behind where we last delivered? */ SCTPDBG(SCTP_DEBUG_INDATA1, "Duplicate S-SEQ:%d delivered:%d from peer, Abort association\n", control->sinfo_ssn, strm->last_sequence_delivered); @@ -605,9 +607,7 @@ protocol_error: *ippp = ((control->sinfo_stream << 16) | control->sinfo_ssn); } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_1; - sctp_abort_an_association(stcb->sctp_ep, stcb, - SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED); - + sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); *abort_flag = 1; return; @@ -628,12 +628,10 @@ protocol_error: control, &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED); - control = TAILQ_FIRST(&strm->inqueue); - while (control != NULL) { + TAILQ_FOREACH_SAFE(control, &strm->inqueue, next, at) { /* all delivered */ nxt_todel = strm->last_sequence_delivered + 1; if (nxt_todel == control->sinfo_ssn) { - at = TAILQ_NEXT(control, next); TAILQ_REMOVE(&strm->inqueue, control, next); asoc->size_on_all_streams -= control->length; sctp_ucount_decr(asoc->cnt_on_all_streams); @@ -654,7 +652,6 @@ protocol_error: &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED); - control = at; continue; } break; @@ -665,9 +662,7 @@ protocol_error: * Ok, we did not deliver this guy, find the correct place * to put it on the queue. */ - if ((compare_with_wrap(asoc->cumulative_tsn, - control->sinfo_tsn, MAX_TSN)) || - (control->sinfo_tsn == asoc->cumulative_tsn)) { + if (SCTP_TSN_GE(asoc->cumulative_tsn, control->sinfo_tsn)) { goto protocol_error; } if (TAILQ_EMPTY(&strm->inqueue)) { @@ -678,8 +673,7 @@ protocol_error: TAILQ_INSERT_HEAD(&strm->inqueue, control, next); } else { TAILQ_FOREACH(at, &strm->inqueue, next) { - if (compare_with_wrap(at->sinfo_ssn, - control->sinfo_ssn, MAX_SEQ)) { + if (SCTP_SSN_GT(at->sinfo_ssn, control->sinfo_ssn)) { /* * one in queue is bigger than the * new one, insert before this one @@ -758,7 +752,7 @@ sctp_is_all_msg_on_reasm(struct sctp_association *asoc, uint32_t * t_size) return (0); } tsn = chk->rec.data.TSN_seq; - while (chk) { + TAILQ_FOREACH(chk, &asoc->reasmqueue, sctp_next) { if (tsn != chk->rec.data.TSN_seq) { return (0); } @@ -767,7 +761,6 @@ sctp_is_all_msg_on_reasm(struct sctp_association *asoc, uint32_t * t_size) return (1); } tsn++; - chk = TAILQ_NEXT(chk, sctp_next); } return (0); } @@ -850,8 +843,7 @@ sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_tmit_chunk *chk, int *abort_flag) { struct mbuf *oper; - uint32_t cum_ackp1, last_tsn, prev_tsn, post_tsn; - u_char last_flags; + uint32_t cum_ackp1, prev_tsn, post_tsn; struct sctp_tmit_chunk *at, *prev, *next; prev = next = NULL; @@ -898,8 +890,7 @@ sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc, } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_2; - sctp_abort_an_association(stcb->sctp_ep, stcb, - SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED); + sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); *abort_flag = 1; } else if (asoc->fragmented_delivery_inprogress && (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == SCTP_DATA_FIRST_FRAG) { @@ -930,8 +921,7 @@ sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc, *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq); } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_3; - sctp_abort_an_association(stcb->sctp_ep, stcb, - SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED); + sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); *abort_flag = 1; } else if (asoc->fragmented_delivery_inprogress) { /* @@ -967,8 +957,7 @@ sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc, *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq); } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_4; - sctp_abort_an_association(stcb->sctp_ep, - stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED); + sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); *abort_flag = 1; } else if ((asoc->fragment_flags & SCTP_DATA_UNORDERED) != SCTP_DATA_UNORDERED && @@ -1001,8 +990,7 @@ sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc, } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_5; - sctp_abort_an_association(stcb->sctp_ep, - stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED); + sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); *abort_flag = 1; } } @@ -1011,8 +999,7 @@ sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc, } /* Find its place */ TAILQ_FOREACH(at, &asoc->reasmqueue, sctp_next) { - if (compare_with_wrap(at->rec.data.TSN_seq, - chk->rec.data.TSN_seq, MAX_TSN)) { + if (SCTP_TSN_GT(at->rec.data.TSN_seq, chk->rec.data.TSN_seq)) { /* * one in queue is bigger than the new one, insert * before this one @@ -1036,11 +1023,9 @@ sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc, sctp_m_freem(chk->data); chk->data = NULL; } - sctp_free_a_chunk(stcb, chk); + sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); return; } else { - last_flags = at->rec.data.rcv_flags; - last_tsn = at->rec.data.TSN_seq; prev = at; if (TAILQ_NEXT(at, sctp_next) == NULL) { /* @@ -1099,8 +1084,7 @@ sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc, } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_6; - sctp_abort_an_association(stcb->sctp_ep, - stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED); + sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); *abort_flag = 1; return; } @@ -1136,9 +1120,7 @@ sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc, *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq); } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_7; - sctp_abort_an_association(stcb->sctp_ep, - stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED); - + sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); *abort_flag = 1; return; } @@ -1175,9 +1157,7 @@ sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc, *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq); } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_8; - sctp_abort_an_association(stcb->sctp_ep, - stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED); - + sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); *abort_flag = 1; return; } @@ -1211,9 +1191,7 @@ sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc, } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_9; - sctp_abort_an_association(stcb->sctp_ep, - stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED); - + sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); *abort_flag = 1; return; } @@ -1256,9 +1234,7 @@ sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc, *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq); } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_10; - sctp_abort_an_association(stcb->sctp_ep, - stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED); - + sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); *abort_flag = 1; return; } @@ -1298,9 +1274,7 @@ sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc, } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_11; - sctp_abort_an_association(stcb->sctp_ep, - stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED); - + sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); *abort_flag = 1; return; } @@ -1337,9 +1311,7 @@ sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc, } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_12; - sctp_abort_an_association(stcb->sctp_ep, - stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED); - + sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); *abort_flag = 1; return; } @@ -1376,9 +1348,7 @@ sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc, *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq); } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_13; - sctp_abort_an_association(stcb->sctp_ep, - stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED); - + sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); *abort_flag = 1; return; } @@ -1403,8 +1373,7 @@ sctp_does_tsn_belong_to_reasm(struct sctp_association *asoc, uint32_t tsn_est; TAILQ_FOREACH(at, &asoc->reasmqueue, sctp_next) { - if (compare_with_wrap(TSN_seq, - at->rec.data.TSN_seq, MAX_TSN)) { + if (SCTP_TSN_GT(TSN_seq, at->rec.data.TSN_seq)) { /* is it one bigger? */ tsn_est = at->rec.data.TSN_seq + 1; if (tsn_est == TSN_seq) { @@ -1462,7 +1431,7 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_tmit_chunk *chk; uint32_t tsn, gap; struct mbuf *dmbuf; - int indx, the_len; + int the_len; int need_reasm_check = 0; uint16_t strmno, strmseq; struct mbuf *oper; @@ -1487,8 +1456,7 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, return (0); } SCTP_LTRACE_CHK(stcb->sctp_ep, stcb, ch->ch.chunk_type, tsn); - if (compare_with_wrap(asoc->cumulative_tsn, tsn, MAX_TSN) || - asoc->cumulative_tsn == tsn) { + if (SCTP_TSN_GE(asoc->cumulative_tsn, tsn)) { /* It is a duplicate */ SCTP_STAT_INCR(sctps_recvdupdata); if (asoc->numduptsns < SCTP_MAX_DUP_TSNS) { @@ -1512,7 +1480,7 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, return (0); } } - if (compare_with_wrap(tsn, *high_tsn, MAX_TSN)) { + if (SCTP_TSN_GT(tsn, *high_tsn)) { *high_tsn = tsn; } /* See if we have received this one already */ @@ -1542,7 +1510,7 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, struct mbuf *op_err; op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC); - sctp_abort_an_association(stcb->sctp_ep, stcb, 0, op_err, SCTP_SO_NOT_LOCKED); + sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED); *abort_flag = 1; return (0); } @@ -1563,7 +1531,7 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, */ if (stcb->sctp_socket->so_rcv.sb_cc) { /* some to read, wake-up */ -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; so = SCTP_INP_SO(stcb->sctp_ep); @@ -1579,13 +1547,13 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, } #endif sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } /* now is it in the mapping array of what we have accepted? */ - if (compare_with_wrap(tsn, asoc->highest_tsn_inside_map, MAX_TSN) && - compare_with_wrap(tsn, asoc->highest_tsn_inside_nr_map, MAX_TSN)) { + if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_map) && + SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_nr_map)) { /* Nope not in the valid range dump it */ sctp_set_rwnd(stcb, asoc); if ((asoc->cnt_on_all_streams + @@ -1595,7 +1563,6 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, } else { SCTP_STAT_INCR(sctps_datadroprwnd); } - indx = *break_flag; *break_flag = 1; return (0); } @@ -1630,7 +1597,7 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, SCTP_STAT_INCR(sctps_badsid); SCTP_TCB_LOCK_ASSERT(stcb); SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, gap); - if (compare_with_wrap(tsn, asoc->highest_tsn_inside_nr_map, MAX_TSN)) { + if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_nr_map)) { asoc->highest_tsn_inside_nr_map = tsn; } if (tsn == (asoc->cumulative_tsn + 1)) { @@ -1666,9 +1633,7 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, if ((chunk_flags & SCTP_DATA_FIRST_FRAG) && (TAILQ_EMPTY(&asoc->resetHead)) && (chunk_flags & SCTP_DATA_UNORDERED) == 0 && - (compare_with_wrap(asoc->strmin[strmno].last_sequence_delivered, - strmseq, MAX_SEQ) || - asoc->strmin[strmno].last_sequence_delivered == strmseq)) { + SCTP_SSN_GE(asoc->strmin[strmno].last_sequence_delivered, strmseq)) { /* The incoming sseq is behind where we last delivered? */ SCTPDBG(SCTP_DEBUG_INDATA1, "EVIL/Broken-Dup S-SEQ:%d delivered:%d from peer, Abort!\n", strmseq, asoc->strmin[strmno].last_sequence_delivered); @@ -1692,8 +1657,7 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_14; - sctp_abort_an_association(stcb->sctp_ep, stcb, - SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED); + sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); *abort_flag = 1; return (0); } @@ -1711,12 +1675,10 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { struct mbuf *mat; - mat = dmbuf; - while (mat) { + for (mat = dmbuf; mat; mat = SCTP_BUF_NEXT(mat)) { if (SCTP_BUF_IS_EXTENDED(mat)) { sctp_log_mb(mat, SCTP_MBUF_ICOPY); } - mat = SCTP_BUF_NEXT(mat); } } #endif @@ -1737,10 +1699,8 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, struct mbuf *lat; l_len = 0; - lat = dmbuf; - while (lat) { + for (lat = dmbuf; lat; lat = SCTP_BUF_NEXT(lat)) { l_len += SCTP_BUF_LEN(lat); - lat = SCTP_BUF_NEXT(lat); } } if (l_len > the_len) { @@ -1779,7 +1739,7 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, goto failed_express_del; } SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, gap); - if (compare_with_wrap(tsn, asoc->highest_tsn_inside_nr_map, MAX_TSN)) { + if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_nr_map)) { asoc->highest_tsn_inside_nr_map = tsn; } sctp_add_to_readq(stcb->sctp_ep, stcb, @@ -1817,15 +1777,10 @@ failed_express_del: if (tsn == (control->sinfo_tsn + 1)) { /* Yep, we can add it on */ int end = 0; - uint32_t cumack; if (chunk_flags & SCTP_DATA_LAST_FRAG) { end = 1; } - cumack = asoc->cumulative_tsn; - if ((cumack + 1) == tsn) - cumack = tsn; - if (sctp_append_to_readq(stcb->sctp_ep, stcb, control, dmbuf, end, tsn, &stcb->sctp_socket->so_rcv)) { @@ -1833,7 +1788,7 @@ failed_express_del: goto failed_pdapi_express_del; } SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, gap); - if (compare_with_wrap(tsn, asoc->highest_tsn_inside_nr_map, MAX_TSN)) { + if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_nr_map)) { asoc->highest_tsn_inside_nr_map = tsn; } SCTP_STAT_INCR(sctps_recvexpressm); @@ -1867,12 +1822,12 @@ failed_pdapi_express_del: control = NULL; if (SCTP_BASE_SYSCTL(sctp_do_drain) == 0) { SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, gap); - if (compare_with_wrap(tsn, asoc->highest_tsn_inside_nr_map, MAX_TSN)) { + if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_nr_map)) { asoc->highest_tsn_inside_nr_map = tsn; } } else { SCTP_SET_TSN_PRESENT(asoc->mapping_array, gap); - if (compare_with_wrap(tsn, asoc->highest_tsn_inside_map, MAX_TSN)) { + if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_map)) { asoc->highest_tsn_inside_map = tsn; } } @@ -1965,9 +1920,7 @@ failed_pdapi_express_del: *ippp = ((strmno << 16) | strmseq); } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_15; - sctp_abort_an_association(stcb->sctp_ep, stcb, - SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED); - + sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); *abort_flag = 1; return (0); } else { @@ -2003,9 +1956,7 @@ failed_pdapi_express_del: *ippp = ((strmno << 16) | strmseq); } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_16; - sctp_abort_an_association(stcb->sctp_ep, - stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED); - + sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); *abort_flag = 1; return (0); } @@ -2050,9 +2001,7 @@ failed_pdapi_express_del: *ippp = ((strmno << 16) | strmseq); } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_17; - sctp_abort_an_association(stcb->sctp_ep, - stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED); - + sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); *abort_flag = 1; return (0); } @@ -2082,8 +2031,7 @@ failed_pdapi_express_del: * singletons I must worry about. */ if (((liste = TAILQ_FIRST(&asoc->resetHead)) != NULL) && - ((compare_with_wrap(tsn, liste->tsn, MAX_TSN))) - ) { + SCTP_TSN_GT(tsn, liste->tsn)) { /* * yep its past where we need to reset... go * ahead and queue it. @@ -2092,14 +2040,13 @@ failed_pdapi_express_del: /* first one on */ TAILQ_INSERT_TAIL(&asoc->pending_reply_queue, control, next); } else { - struct sctp_queued_to_read *ctlOn; + struct sctp_queued_to_read *ctlOn, + *nctlOn; unsigned char inserted = 0; - ctlOn = TAILQ_FIRST(&asoc->pending_reply_queue); - while (ctlOn) { - if (compare_with_wrap(control->sinfo_tsn, - ctlOn->sinfo_tsn, MAX_TSN)) { - ctlOn = TAILQ_NEXT(ctlOn, next); + TAILQ_FOREACH_SAFE(ctlOn, &asoc->pending_reply_queue, next, nctlOn) { + if (SCTP_TSN_GT(control->sinfo_tsn, ctlOn->sinfo_tsn)) { + continue; } else { /* found it */ TAILQ_INSERT_BEFORE(ctlOn, control, next); @@ -2159,36 +2106,34 @@ finish_express_del: } /* check the special flag for stream resets */ if (((liste = TAILQ_FIRST(&asoc->resetHead)) != NULL) && - ((compare_with_wrap(asoc->cumulative_tsn, liste->tsn, MAX_TSN)) || - (asoc->cumulative_tsn == liste->tsn)) - ) { + SCTP_TSN_GE(asoc->cumulative_tsn, liste->tsn)) { /* * we have finished working through the backlogged TSN's now * time to reset streams. 1: call reset function. 2: free * pending_reply space 3: distribute any chunks in * pending_reply_queue. */ - struct sctp_queued_to_read *ctl; + struct sctp_queued_to_read *ctl, *nctl; - sctp_reset_in_stream(stcb, liste->number_entries, liste->req.list_of_streams); + sctp_reset_in_stream(stcb, liste->number_entries, liste->list_of_streams); TAILQ_REMOVE(&asoc->resetHead, liste, next_resp); SCTP_FREE(liste, SCTP_M_STRESET); /* sa_ignore FREED_MEMORY */ liste = TAILQ_FIRST(&asoc->resetHead); - ctl = TAILQ_FIRST(&asoc->pending_reply_queue); - if (ctl && (liste == NULL)) { + if (TAILQ_EMPTY(&asoc->resetHead)) { /* All can be removed */ - while (ctl) { + TAILQ_FOREACH_SAFE(ctl, &asoc->pending_reply_queue, next, nctl) { TAILQ_REMOVE(&asoc->pending_reply_queue, ctl, next); sctp_queue_data_to_stream(stcb, asoc, ctl, abort_flag); if (*abort_flag) { return (0); } - ctl = TAILQ_FIRST(&asoc->pending_reply_queue); } - } else if (ctl) { - /* more than one in queue */ - while (!compare_with_wrap(ctl->sinfo_tsn, liste->tsn, MAX_TSN)) { + } else { + TAILQ_FOREACH_SAFE(ctl, &asoc->pending_reply_queue, next, nctl) { + if (SCTP_TSN_GT(ctl->sinfo_tsn, liste->tsn)) { + break; + } /* * if ctl->sinfo_tsn is <= liste->tsn we can * process it which is the NOT of @@ -2199,7 +2144,6 @@ finish_express_del: if (*abort_flag) { return (0); } - ctl = TAILQ_FIRST(&asoc->pending_reply_queue); } } /* @@ -2274,7 +2218,6 @@ sctp_slide_mapping_arrays(struct sctp_tcb *stcb) uint32_t old_cumack, old_base, old_highest, highest_tsn; asoc = &stcb->asoc; - at = 0; old_cumack = asoc->cumulative_tsn; old_base = asoc->mapping_array_base_tsn; @@ -2296,8 +2239,8 @@ sctp_slide_mapping_arrays(struct sctp_tcb *stcb) } asoc->cumulative_tsn = asoc->mapping_array_base_tsn + (at - 1); - if (compare_with_wrap(asoc->cumulative_tsn, asoc->highest_tsn_inside_map, MAX_TSN) && - compare_with_wrap(asoc->cumulative_tsn, asoc->highest_tsn_inside_nr_map, MAX_TSN)) { + if (SCTP_TSN_GT(asoc->cumulative_tsn, asoc->highest_tsn_inside_map) && + SCTP_TSN_GT(asoc->cumulative_tsn, asoc->highest_tsn_inside_nr_map)) { #ifdef INVARIANTS panic("huh, cumack 0x%x greater than high-tsn 0x%x in map", asoc->cumulative_tsn, asoc->highest_tsn_inside_map); @@ -2312,9 +2255,7 @@ sctp_slide_mapping_arrays(struct sctp_tcb *stcb) asoc->highest_tsn_inside_nr_map = asoc->cumulative_tsn; #endif } - if (compare_with_wrap(asoc->highest_tsn_inside_nr_map, - asoc->highest_tsn_inside_map, - MAX_TSN)) { + if (SCTP_TSN_GT(asoc->highest_tsn_inside_nr_map, asoc->highest_tsn_inside_map)) { highest_tsn = asoc->highest_tsn_inside_nr_map; } else { highest_tsn = asoc->highest_tsn_inside_map; @@ -2339,7 +2280,7 @@ sctp_slide_mapping_arrays(struct sctp_tcb *stcb) #ifdef INVARIANTS for (i = 0; i < asoc->mapping_array_size; i++) { if ((asoc->mapping_array[i]) || (asoc->nr_mapping_array[i])) { - printf("Error Mapping array's not clean at clear\n"); + SCTP_PRINTF("Error Mapping array's not clean at clear\n"); sctp_print_mapping_array(asoc); } } @@ -2361,7 +2302,7 @@ sctp_slide_mapping_arrays(struct sctp_tcb *stcb) #ifdef INVARIANTS panic("impossible slide"); #else - printf("impossible slide lgap:%x slide_end:%x slide_from:%x? at:%d\n", + SCTP_PRINTF("impossible slide lgap:%x slide_end:%x slide_from:%x? at:%d\n", lgap, slide_end, slide_from, at); return; #endif @@ -2370,7 +2311,7 @@ sctp_slide_mapping_arrays(struct sctp_tcb *stcb) #ifdef INVARIANTS panic("would overrun buffer"); #else - printf("Gak, would have overrun map end:%d slide_end:%d\n", + SCTP_PRINTF("Gak, would have overrun map end:%d slide_end:%d\n", asoc->mapping_array_size, slide_end); slide_end = asoc->mapping_array_size; #endif @@ -2424,17 +2365,14 @@ sctp_slide_mapping_arrays(struct sctp_tcb *stcb) } } - void -sctp_sack_check(struct sctp_tcb *stcb, int was_a_gap, int *abort_flag) +sctp_sack_check(struct sctp_tcb *stcb, int was_a_gap) { struct sctp_association *asoc; uint32_t highest_tsn; asoc = &stcb->asoc; - if (compare_with_wrap(asoc->highest_tsn_inside_nr_map, - asoc->highest_tsn_inside_map, - MAX_TSN)) { + if (SCTP_TSN_GT(asoc->highest_tsn_inside_nr_map, asoc->highest_tsn_inside_map)) { highest_tsn = asoc->highest_tsn_inside_nr_map; } else { highest_tsn = asoc->highest_tsn_inside_map; @@ -2454,13 +2392,14 @@ sctp_sack_check(struct sctp_tcb *stcb, int was_a_gap, int *abort_flag) sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_INDATA + SCTP_LOC_18); } - sctp_send_shutdown(stcb, stcb->asoc.primary_destination); - sctp_send_sack(stcb); + sctp_send_shutdown(stcb, + ((stcb->asoc.alternate) ? stcb->asoc.alternate : stcb->asoc.primary_destination)); + sctp_send_sack(stcb, SCTP_SO_NOT_LOCKED); } else { int is_a_gap; /* is there a gap now ? */ - is_a_gap = compare_with_wrap(highest_tsn, stcb->asoc.cumulative_tsn, MAX_TSN); + is_a_gap = SCTP_TSN_GT(highest_tsn, stcb->asoc.cumulative_tsn); /* * CMT DAC algorithm: increase number of packets received @@ -2478,7 +2417,7 @@ sctp_sack_check(struct sctp_tcb *stcb, int was_a_gap, int *abort_flag) (stcb->asoc.data_pkts_seen >= stcb->asoc.sack_freq) /* hit limit of pkts */ ) { - if ((stcb->asoc.sctp_cmt_on_off == 1) && + if ((stcb->asoc.sctp_cmt_on_off > 0) && (SCTP_BASE_SYSCTL(sctp_cmt_use_dac)) && (stcb->asoc.send_sack == 0) && (stcb->asoc.numduptsns == 0) && @@ -2505,7 +2444,7 @@ sctp_sack_check(struct sctp_tcb *stcb, int was_a_gap, int *abort_flag) * there are gaps or duplicates. */ (void)SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer); - sctp_send_sack(stcb); + sctp_send_sack(stcb, SCTP_SO_NOT_LOCKED); } } else { if (!SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) { @@ -2579,8 +2518,11 @@ doit_again: int sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length, - struct sctphdr *sh, struct sctp_inpcb *inp, struct sctp_tcb *stcb, - struct sctp_nets *net, uint32_t * high_tsn) + struct sockaddr *src, struct sockaddr *dst, + struct sctphdr *sh, struct sctp_inpcb *inp, + struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t * high_tsn, + uint8_t use_mflowid, uint32_t mflowid, + uint32_t vrf_id, uint16_t port) { struct sctp_data_chunk *ch, chunk_buf; struct sctp_association *asoc; @@ -2597,12 +2539,12 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length, m = *mm; SCTP_TCB_LOCK_ASSERT(stcb); asoc = &stcb->asoc; - if (compare_with_wrap(asoc->highest_tsn_inside_nr_map, asoc->highest_tsn_inside_map, MAX_TSN)) { + if (SCTP_TSN_GT(asoc->highest_tsn_inside_nr_map, asoc->highest_tsn_inside_map)) { highest_tsn = asoc->highest_tsn_inside_nr_map; } else { highest_tsn = asoc->highest_tsn_inside_map; } - was_a_gap = compare_with_wrap(highest_tsn, stcb->asoc.cumulative_tsn, MAX_TSN); + was_a_gap = SCTP_TSN_GT(highest_tsn, stcb->asoc.cumulative_tsn); /* * setup where we got the last DATA packet from for any SACK that * may need to go out. Don't bump the net. This is done ONLY when a @@ -2657,7 +2599,7 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length, if (length - *offset < chk_length) { /* all done, mutulated chunk */ stop_proc = 1; - break; + continue; } if (ch->ch.chunk_type == SCTP_DATA) { if ((size_t)chk_length < sizeof(struct sctp_data_chunk) + 1) { @@ -2687,8 +2629,10 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length, } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_19; - sctp_abort_association(inp, stcb, m, iphlen, sh, - op_err, 0, net->port); + sctp_abort_association(inp, stcb, m, iphlen, + src, dst, sh, op_err, + use_mflowid, mflowid, + vrf_id, port); return (2); } #ifdef SCTP_AUDITING_ENABLED @@ -2713,7 +2657,7 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length, * drop rep space left. */ stop_proc = 1; - break; + continue; } } else { /* not a data chunk in the data region */ @@ -2721,7 +2665,7 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length, case SCTP_INITIATION: case SCTP_INITIATION_ACK: case SCTP_SELECTIVE_ACK: - case SCTP_NR_SELECTIVE_ACK: /* EY */ + case SCTP_NR_SELECTIVE_ACK: case SCTP_HEARTBEAT_REQUEST: case SCTP_HEARTBEAT_ACK: case SCTP_ABORT_ASSOCIATION: @@ -2752,7 +2696,12 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length, struct mbuf *op_err; op_err = sctp_generate_invmanparam(SCTP_CAUSE_PROTOCOL_VIOLATION); - sctp_abort_association(inp, stcb, m, iphlen, sh, op_err, 0, net->port); + sctp_abort_association(inp, stcb, + m, iphlen, + src, dst, + sh, op_err, + use_mflowid, mflowid, + vrf_id, port); return (2); } break; @@ -2779,11 +2728,13 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length, phd->param_length = htons(chk_length + sizeof(*phd)); SCTP_BUF_LEN(merr) = sizeof(*phd); - SCTP_BUF_NEXT(merr) = SCTP_M_COPYM(m, *offset, - SCTP_SIZE32(chk_length), - M_DONTWAIT); + SCTP_BUF_NEXT(merr) = SCTP_M_COPYM(m, *offset, chk_length, M_DONTWAIT); if (SCTP_BUF_NEXT(merr)) { - sctp_queue_op_err(stcb, merr); + if (sctp_pad_lastmbuf(SCTP_BUF_NEXT(merr), SCTP_SIZE32(chk_length) - chk_length, NULL)) { + sctp_m_freem(merr); + } else { + sctp_queue_op_err(stcb, merr); + } } else { sctp_m_freem(merr); } @@ -2795,7 +2746,7 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length, } /* else skip this bad chunk and * continue... */ break; - }; /* switch of chunk type */ + } /* switch of chunk type */ } *offset += SCTP_SIZE32(chk_length); if ((*offset >= length) || stop_proc) { @@ -2808,15 +2759,14 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length, if (ch == NULL) { *offset = length; stop_proc = 1; - break; - + continue; } - } /* while */ + } if (break_flag) { /* * we need to report rwnd overrun drops. */ - sctp_send_packet_dropped(stcb, net, *mm, iphlen, 0); + sctp_send_packet_dropped(stcb, net, *mm, length, iphlen, 0); } if (num_chunks) { /* @@ -2843,10 +2793,7 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length, stcb->asoc.send_sack = 1; } /* Start a sack timer or QUEUE a SACK for sending */ - sctp_sack_check(stcb, was_a_gap, &abort_flag); - if (abort_flag) - return (2); - + sctp_sack_check(stcb, was_a_gap); return (0); } @@ -2856,7 +2803,7 @@ sctp_process_segment_range(struct sctp_tcb *stcb, struct sctp_tmit_chunk **p_tp1 int *num_frs, uint32_t * biggest_newly_acked_tsn, uint32_t * this_sack_lowest_newack, - int *ecn_seg_sums) + int *rto_ok) { struct sctp_tmit_chunk *tp1; unsigned int theTSN; @@ -2897,11 +2844,6 @@ sctp_process_segment_range(struct sctp_tcb *stcb, struct sctp_tmit_chunk **p_tp1 * must be held until * cum-ack passes */ - /*- - * ECN Nonce: Add the nonce - * value to the sender's - * nonce sum - */ if (tp1->sent < SCTP_DATAGRAM_RESEND) { /*- * If it is less than RESEND, it is @@ -2910,8 +2852,8 @@ sctp_process_segment_range(struct sctp_tcb *stcb, struct sctp_tmit_chunk **p_tp1 * via previous Gap Ack Blocks... * i.e. ACKED or RESEND. */ - if (compare_with_wrap(tp1->rec.data.TSN_seq, - *biggest_newly_acked_tsn, MAX_TSN)) { + if (SCTP_TSN_GT(tp1->rec.data.TSN_seq, + *biggest_newly_acked_tsn)) { *biggest_newly_acked_tsn = tp1->rec.data.TSN_seq; } /*- @@ -2924,9 +2866,8 @@ sctp_process_segment_range(struct sctp_tcb *stcb, struct sctp_tmit_chunk **p_tp1 if (tp1->rec.data.chunk_was_revoked == 0) tp1->whoTo->saw_newack = 1; - if (compare_with_wrap(tp1->rec.data.TSN_seq, - tp1->whoTo->this_sack_highest_newack, - MAX_TSN)) { + if (SCTP_TSN_GT(tp1->rec.data.TSN_seq, + tp1->whoTo->this_sack_highest_newack)) { tp1->whoTo->this_sack_highest_newack = tp1->rec.data.TSN_seq; } @@ -2984,6 +2925,10 @@ sctp_process_segment_range(struct sctp_tcb *stcb, struct sctp_tmit_chunk **p_tp1 tp1->rec.data.TSN_seq); } sctp_flight_size_decrease(tp1); + if (stcb->asoc.cc_functions.sctp_cwnd_update_tsn_acknowledged) { + (*stcb->asoc.cc_functions.sctp_cwnd_update_tsn_acknowledged) (tp1->whoTo, + tp1); + } sctp_total_flight_decrease(stcb, tp1); tp1->whoTo->net_ack += tp1->send_size; @@ -2997,22 +2942,26 @@ sctp_process_segment_range(struct sctp_tcb *stcb, struct sctp_tmit_chunk **p_tp1 * update RTO too ? */ if (tp1->do_rtt) { - tp1->whoTo->RTO = - sctp_calculate_rto(stcb, - &stcb->asoc, - tp1->whoTo, - &tp1->sent_rcv_time, - sctp_align_safe_nocopy); + if (*rto_ok) { + tp1->whoTo->RTO = + sctp_calculate_rto(stcb, + &stcb->asoc, + tp1->whoTo, + &tp1->sent_rcv_time, + sctp_align_safe_nocopy, + SCTP_RTT_FROM_DATA); + *rto_ok = 0; + } + if (tp1->whoTo->rto_needed == 0) { + tp1->whoTo->rto_needed = 1; + } tp1->do_rtt = 0; } } } if (tp1->sent <= SCTP_DATAGRAM_RESEND) { - (*ecn_seg_sums) += tp1->rec.data.ect_nonce; - (*ecn_seg_sums) &= SCTP_SACK_NONCE_SUM; - if (compare_with_wrap(tp1->rec.data.TSN_seq, - stcb->asoc.this_sack_highest_gap, - MAX_TSN)) { + if (SCTP_TSN_GT(tp1->rec.data.TSN_seq, + stcb->asoc.this_sack_highest_gap)) { stcb->asoc.this_sack_highest_gap = tp1->rec.data.TSN_seq; } @@ -3028,16 +2977,26 @@ sctp_process_segment_range(struct sctp_tcb *stcb, struct sctp_tmit_chunk **p_tp1 * All chunks NOT UNSENT fall through here and are marked * (leave PR-SCTP ones that are to skip alone though) */ - if (tp1->sent != SCTP_FORWARD_TSN_SKIP) + if ((tp1->sent != SCTP_FORWARD_TSN_SKIP) && + (tp1->sent != SCTP_DATAGRAM_NR_ACKED)) { tp1->sent = SCTP_DATAGRAM_MARKED; - + } if (tp1->rec.data.chunk_was_revoked) { /* deflate the cwnd */ tp1->whoTo->cwnd -= tp1->book_size; tp1->rec.data.chunk_was_revoked = 0; } /* NR Sack code here */ - if (nr_sacking) { + if (nr_sacking && + (tp1->sent != SCTP_DATAGRAM_NR_ACKED)) { + if (stcb->asoc.strmout[tp1->rec.data.stream_number].chunks_on_queues > 0) { + stcb->asoc.strmout[tp1->rec.data.stream_number].chunks_on_queues--; +#ifdef INVARIANTS + } else { + panic("No chunks on the queues for sid %u.", tp1->rec.data.stream_number); +#endif + } + tp1->sent = SCTP_DATAGRAM_NR_ACKED; if (tp1->data) { /* * sa_ignore @@ -3052,10 +3011,9 @@ sctp_process_segment_range(struct sctp_tcb *stcb, struct sctp_tmit_chunk **p_tp1 } break; } /* if (tp1->TSN_seq == theTSN) */ - if (compare_with_wrap(tp1->rec.data.TSN_seq, theTSN, - MAX_TSN)) + if (SCTP_TSN_GT(tp1->rec.data.TSN_seq, theTSN)) { break; - + } tp1 = TAILQ_NEXT(tp1, sctp_next); if ((tp1 == NULL) && (circled == 0)) { circled++; @@ -3077,7 +3035,7 @@ static int sctp_handle_segments(struct mbuf *m, int *offset, struct sctp_tcb *stcb, struct sctp_association *asoc, uint32_t last_tsn, uint32_t * biggest_tsn_acked, uint32_t * biggest_newly_acked_tsn, uint32_t * this_sack_lowest_newack, - int num_seg, int num_nr_seg, int *ecn_seg_sums) + int num_seg, int num_nr_seg, int *rto_ok) { struct sctp_gap_ack_block *frag, block; struct sctp_tmit_chunk *tp1; @@ -3113,7 +3071,7 @@ sctp_handle_segments(struct mbuf *m, int *offset, struct sctp_tcb *stcb, struct /* This gap report is not in order, so restart. */ tp1 = TAILQ_FIRST(&asoc->sent_queue); } - if (compare_with_wrap((last_tsn + frag_end), *biggest_tsn_acked, MAX_TSN)) { + if (SCTP_TSN_GT((last_tsn + frag_end), *biggest_tsn_acked)) { *biggest_tsn_acked = last_tsn + frag_end; } if (i < num_seg) { @@ -3123,7 +3081,7 @@ sctp_handle_segments(struct mbuf *m, int *offset, struct sctp_tcb *stcb, struct } if (sctp_process_segment_range(stcb, &tp1, last_tsn, frag_strt, frag_end, non_revocable, &num_frs, biggest_newly_acked_tsn, - this_sack_lowest_newack, ecn_seg_sums)) { + this_sack_lowest_newack, rto_ok)) { chunk_freed = 1; } prev_frag_end = frag_end; @@ -3143,23 +3101,18 @@ sctp_check_for_revoked(struct sctp_tcb *stcb, uint32_t biggest_tsn_acked) { struct sctp_tmit_chunk *tp1; - int tot_revoked = 0; - tp1 = TAILQ_FIRST(&asoc->sent_queue); - while (tp1) { - if (compare_with_wrap(tp1->rec.data.TSN_seq, cumack, - MAX_TSN)) { + TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) { + if (SCTP_TSN_GT(tp1->rec.data.TSN_seq, cumack)) { /* * ok this guy is either ACK or MARKED. If it is * ACKED it has been previously acked but not this * time i.e. revoked. If it is MARKED it was ACK'ed * again. */ - if (compare_with_wrap(tp1->rec.data.TSN_seq, biggest_tsn_acked, - MAX_TSN)) + if (SCTP_TSN_GT(tp1->rec.data.TSN_seq, biggest_tsn_acked)) { break; - - + } if (tp1->sent == SCTP_DATAGRAM_ACKED) { /* it has been revoked */ tp1->sent = SCTP_DATAGRAM_SENT; @@ -3182,7 +3135,6 @@ sctp_check_for_revoked(struct sctp_tcb *stcb, * artificial inflation of the flight_size. */ tp1->whoTo->cwnd += tp1->book_size; - tot_revoked++; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SACK_LOGGING_ENABLE) { sctp_log_sack(asoc->last_acked_seq, cumack, @@ -3198,24 +3150,6 @@ sctp_check_for_revoked(struct sctp_tcb *stcb, } if (tp1->sent == SCTP_DATAGRAM_UNSENT) break; - tp1 = TAILQ_NEXT(tp1, sctp_next); - } - if (tot_revoked > 0) { - /* - * Setup the ecn nonce re-sync point. We do this since once - * data is revoked we begin to retransmit things, which do - * NOT have the ECN bits set. This means we are now out of - * sync and must wait until we get back in sync with the - * peer to check ECN bits. - */ - tp1 = TAILQ_FIRST(&asoc->send_queue); - if (tp1 == NULL) { - asoc->nonce_resync_tsn = asoc->sending_seq; - } else { - asoc->nonce_resync_tsn = tp1->rec.data.TSN_seq; - } - asoc->nonce_wait_for_ecne = 0; - asoc->nonce_sum_check = 0; } } @@ -3244,7 +3178,7 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, } /* CMT DAC algo: finding out if SACK is a mixed SACK */ - if ((asoc->sctp_cmt_on_off == 1) && + if ((asoc->sctp_cmt_on_off > 0) && SCTP_BASE_SYSCTL(sctp_cmt_use_dac)) { TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if (net->saw_newack) @@ -3254,12 +3188,10 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, if (stcb->asoc.peer_supports_prsctp) { (void)SCTP_GETTIME_TIMEVAL(&now); } - tp1 = TAILQ_FIRST(&asoc->sent_queue); - while (tp1) { + TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) { strike_flag = 0; if (tp1->no_fr_allowed) { /* this one had a timeout or something */ - tp1 = TAILQ_NEXT(tp1, sctp_next); continue; } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) { @@ -3269,8 +3201,7 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, tp1->sent, SCTP_FR_LOG_CHECK_STRIKE); } - if (compare_with_wrap(tp1->rec.data.TSN_seq, biggest_tsn_acked, - MAX_TSN) || + if (SCTP_TSN_GT(tp1->rec.data.TSN_seq, biggest_tsn_acked) || tp1->sent == SCTP_DATAGRAM_UNSENT) { /* done */ break; @@ -3281,17 +3212,14 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, if (timevalcmp(&now, &tp1->rec.data.timetodrop, >)) { /* Yes so drop it */ if (tp1->data != NULL) { - (void)sctp_release_pr_sctp_chunk(stcb, tp1, - (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT), + (void)sctp_release_pr_sctp_chunk(stcb, tp1, 1, SCTP_SO_NOT_LOCKED); } - tp1 = TAILQ_NEXT(tp1, sctp_next); continue; } } } - if (compare_with_wrap(tp1->rec.data.TSN_seq, - asoc->this_sack_highest_gap, MAX_TSN)) { + if (SCTP_TSN_GT(tp1->rec.data.TSN_seq, asoc->this_sack_highest_gap)) { /* we are beyond the tsn in the sack */ break; } @@ -3302,7 +3230,6 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, /* Continue strikin FWD-TSN chunks */ tp1->rec.data.fwd_tsn_cnt++; } - tp1 = TAILQ_NEXT(tp1, sctp_next); continue; } /* @@ -3315,10 +3242,9 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, * CMT, no data sent to this dest can be marked for * FR using this SACK. */ - tp1 = TAILQ_NEXT(tp1, sctp_next); continue; - } else if (tp1->whoTo && compare_with_wrap(tp1->rec.data.TSN_seq, - tp1->whoTo->this_sack_highest_newack, MAX_TSN)) { + } else if (tp1->whoTo && SCTP_TSN_GT(tp1->rec.data.TSN_seq, + tp1->whoTo->this_sack_highest_newack)) { /* * CMT: New acks were receieved for data sent to * this dest. But no new acks were seen for data @@ -3327,7 +3253,6 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, * this SACK. This step covers part of the DAC algo * and the HTNA algo as well. */ - tp1 = TAILQ_NEXT(tp1, sctp_next); continue; } /* @@ -3355,7 +3280,7 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, if (tp1->sent < SCTP_DATAGRAM_RESEND) { tp1->sent++; } - if ((asoc->sctp_cmt_on_off == 1) && + if ((asoc->sctp_cmt_on_off > 0) && SCTP_BASE_SYSCTL(sctp_cmt_use_dac)) { /* * CMT DAC algorithm: If SACK flag is set to @@ -3370,7 +3295,7 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, * received after this missing TSN. */ if ((tp1->sent < SCTP_DATAGRAM_RESEND) && (num_dests_sacked == 1) && - compare_with_wrap(this_sack_lowest_newack, tp1->rec.data.TSN_seq, MAX_TSN)) { + SCTP_TSN_GT(this_sack_lowest_newack, tp1->rec.data.TSN_seq)) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) { sctp_log_fr(16 + num_dests_sacked, tp1->rec.data.TSN_seq, @@ -3402,9 +3327,7 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, #endif ) { - if ((compare_with_wrap(biggest_tsn_newly_acked, - tp1->rec.data.fast_retran_tsn, MAX_TSN)) || - (biggest_tsn_newly_acked == + if (SCTP_TSN_GE(biggest_tsn_newly_acked, tp1->rec.data.fast_retran_tsn)) { /* * Strike the TSN, since this ack is @@ -3421,7 +3344,7 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, tp1->sent++; } strike_flag = 1; - if ((asoc->sctp_cmt_on_off == 1) && + if ((asoc->sctp_cmt_on_off > 0) && SCTP_BASE_SYSCTL(sctp_cmt_use_dac)) { /* * CMT DAC algorithm: If @@ -3444,8 +3367,8 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, */ if ((tp1->sent < SCTP_DATAGRAM_RESEND) && (num_dests_sacked == 1) && - compare_with_wrap(this_sack_lowest_newack, - tp1->rec.data.TSN_seq, MAX_TSN)) { + SCTP_TSN_GT(this_sack_lowest_newack, + tp1->rec.data.TSN_seq)) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) { sctp_log_fr(32 + num_dests_sacked, tp1->rec.data.TSN_seq, @@ -3463,8 +3386,8 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, * JRI: TODO: remove code for HTNA algo. CMT's SFR * algo covers HTNA. */ - } else if (compare_with_wrap(tp1->rec.data.TSN_seq, - biggest_tsn_newly_acked, MAX_TSN)) { + } else if (SCTP_TSN_GT(tp1->rec.data.TSN_seq, + biggest_tsn_newly_acked)) { /* * We don't strike these: This is the HTNA * algorithm i.e. we don't strike If our TSN is @@ -3482,7 +3405,7 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, if (tp1->sent < SCTP_DATAGRAM_RESEND) { tp1->sent++; } - if ((asoc->sctp_cmt_on_off == 1) && + if ((asoc->sctp_cmt_on_off > 0) && SCTP_BASE_SYSCTL(sctp_cmt_use_dac)) { /* * CMT DAC algorithm: If SACK flag is set to @@ -3497,7 +3420,7 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, * received after this missing TSN. */ if ((tp1->sent < SCTP_DATAGRAM_RESEND) && (num_dests_sacked == 1) && - compare_with_wrap(this_sack_lowest_newack, tp1->rec.data.TSN_seq, MAX_TSN)) { + SCTP_TSN_GT(this_sack_lowest_newack, tp1->rec.data.TSN_seq)) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) { sctp_log_fr(48 + num_dests_sacked, tp1->rec.data.TSN_seq, @@ -3522,6 +3445,10 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, if (tp1->whoTo) { tp1->whoTo->net_ack++; sctp_flight_size_decrease(tp1); + if (stcb->asoc.cc_functions.sctp_cwnd_update_tsn_acknowledged) { + (*stcb->asoc.cc_functions.sctp_cwnd_update_tsn_acknowledged) (tp1->whoTo, + tp1); + } } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_RWND_ENABLE) { sctp_log_rwnd(SCTP_INCREASE_PEER_RWND, @@ -3542,17 +3469,18 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, if (tp1->snd_count > tp1->rec.data.timetodrop.tv_sec) { /* Yes, so drop it */ if (tp1->data != NULL) { - (void)sctp_release_pr_sctp_chunk(stcb, tp1, - (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT), + (void)sctp_release_pr_sctp_chunk(stcb, tp1, 1, SCTP_SO_NOT_LOCKED); } /* Make sure to flag we had a FR */ tp1->whoTo->net_ack++; - tp1 = TAILQ_NEXT(tp1, sctp_next); continue; } } - /* printf("OK, we are now ready to FR this guy\n"); */ + /* + * SCTP_PRINTF("OK, we are now ready to FR this + * guy\n"); + */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) { sctp_log_fr(tp1->rec.data.TSN_seq, tp1->snd_count, 0, SCTP_FR_MARKED); @@ -3562,7 +3490,7 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, SCTP_STAT_INCR(sctps_sendmultfastretrans); } sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt); - if (asoc->sctp_cmt_on_off == 1) { + if (asoc->sctp_cmt_on_off > 0) { /* * CMT: Using RTX_SSTHRESH policy for CMT. * If CMT is being used, then pick dest with @@ -3620,7 +3548,7 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, tot_retrans++; /* mark the sending seq for possible subsequent FR's */ /* - * printf("Marking TSN for FR new value %x\n", + * SCTP_PRINTF("Marking TSN for FR new value %x\n", * (uint32_t)tpi->rec.data.TSN_seq); */ if (TAILQ_EMPTY(&asoc->send_queue)) { @@ -3652,6 +3580,10 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, * this guy had a RTO calculation pending on * it, cancel it */ + if ((tp1->whoTo != NULL) && + (tp1->whoTo->rto_needed == 0)) { + tp1->whoTo->rto_needed = 1; + } tp1->do_rtt = 0; } if (alt != tp1->whoTo) { @@ -3662,18 +3594,6 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, atomic_add_int(&alt->ref_count, 1); } } - tp1 = TAILQ_NEXT(tp1, sctp_next); - } /* while (tp1) */ - - if (tot_retrans > 0) { - /* - * Setup the ecn nonce re-sync point. We do this since once - * we go to FR something we introduce a Karn's rule scenario - * and won't know the totals for the ECN bits. - */ - asoc->nonce_resync_tsn = sending_seq; - asoc->nonce_wait_for_ecne = 0; - asoc->nonce_sum_check = 0; } } @@ -3688,15 +3608,16 @@ sctp_try_advance_peer_ack_point(struct sctp_tcb *stcb, if (asoc->peer_supports_prsctp == 0) { return (NULL); } - tp1 = TAILQ_FIRST(&asoc->sent_queue); - while (tp1) { + TAILQ_FOREACH_SAFE(tp1, &asoc->sent_queue, sctp_next, tp2) { if (tp1->sent != SCTP_FORWARD_TSN_SKIP && - tp1->sent != SCTP_DATAGRAM_RESEND) { + tp1->sent != SCTP_DATAGRAM_RESEND && + tp1->sent != SCTP_DATAGRAM_NR_ACKED) { /* no chance to advance, out of here */ break; } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) { - if (tp1->sent == SCTP_FORWARD_TSN_SKIP) { + if ((tp1->sent == SCTP_FORWARD_TSN_SKIP) || + (tp1->sent == SCTP_DATAGRAM_NR_ACKED)) { sctp_misc_ints(SCTP_FWD_TSN_CHECK, asoc->advanced_peer_ack_point, tp1->rec.data.TSN_seq, 0, 0); @@ -3713,7 +3634,6 @@ sctp_try_advance_peer_ack_point(struct sctp_tcb *stcb, (void)SCTP_GETTIME_TIMEVAL(&now); now_filled = 1; } - tp2 = TAILQ_NEXT(tp1, sctp_next); /* * now we got a chunk which is marked for another * retransmission to a PR-stream but has run out its chances @@ -3730,8 +3650,7 @@ sctp_try_advance_peer_ack_point(struct sctp_tcb *stcb, /* Yes so drop it */ if (tp1->data) { (void)sctp_release_pr_sctp_chunk(stcb, tp1, - (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT), - SCTP_SO_NOT_LOCKED); + 1, SCTP_SO_NOT_LOCKED); } } else { /* @@ -3746,12 +3665,10 @@ sctp_try_advance_peer_ack_point(struct sctp_tcb *stcb, * the chunk, advance our peer ack point and we can check * the next chunk. */ - if (tp1->sent == SCTP_FORWARD_TSN_SKIP) { + if ((tp1->sent == SCTP_FORWARD_TSN_SKIP) || + (tp1->sent == SCTP_DATAGRAM_NR_ACKED)) { /* advance PeerAckPoint goes forward */ - if (compare_with_wrap(tp1->rec.data.TSN_seq, - asoc->advanced_peer_ack_point, - MAX_TSN)) { - + if (SCTP_TSN_GT(tp1->rec.data.TSN_seq, asoc->advanced_peer_ack_point)) { asoc->advanced_peer_ack_point = tp1->rec.data.TSN_seq; a_adv = tp1; } else if (tp1->rec.data.TSN_seq == asoc->advanced_peer_ack_point) { @@ -3765,11 +3682,6 @@ sctp_try_advance_peer_ack_point(struct sctp_tcb *stcb, */ break; } - /* - * If we hit here we just dumped tp1, move to next tsn on - * sent queue. - */ - tp1 = tp2; } return (a_adv); } @@ -3790,11 +3702,10 @@ sctp_fs_audit(struct sctp_association *asoc) TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) { if (chk->sent < SCTP_DATAGRAM_RESEND) { - printf("Chk TSN:%u size:%d inflight cnt:%d\n", + SCTP_PRINTF("Chk TSN:%u size:%d inflight cnt:%d\n", chk->rec.data.TSN_seq, chk->send_size, - chk->snd_count - ); + chk->snd_count); inflight++; } else if (chk->sent == SCTP_DATAGRAM_RESEND) { resend++; @@ -3811,7 +3722,7 @@ sctp_fs_audit(struct sctp_association *asoc) #ifdef INVARIANTS panic("Flight size-express incorrect? \n"); #else - printf("asoc->total_flight:%d cnt:%d\n", + SCTP_PRINTF("asoc->total_flight:%d cnt:%d\n", entry_flight, entry_cnt); SCTP_PRINTF("Flight size-express incorrect F:%d I:%d R:%d Ab:%d ACK:%d\n", @@ -3826,7 +3737,6 @@ sctp_fs_audit(struct sctp_association *asoc) static void sctp_window_probe_recovery(struct sctp_tcb *stcb, struct sctp_association *asoc, - struct sctp_nets *net, struct sctp_tmit_chunk *tp1) { tp1->window_probe = 0; @@ -3840,6 +3750,10 @@ sctp_window_probe_recovery(struct sctp_tcb *stcb, return; } /* First setup this by shrinking flight */ + if (stcb->asoc.cc_functions.sctp_cwnd_update_tsn_acknowledged) { + (*stcb->asoc.cc_functions.sctp_cwnd_update_tsn_acknowledged) (tp1->whoTo, + tp1); + } sctp_flight_size_decrease(tp1); sctp_total_flight_decrease(stcb, tp1); /* Now mark for resend */ @@ -3857,7 +3771,7 @@ sctp_window_probe_recovery(struct sctp_tcb *stcb, void sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack, - uint32_t rwnd, int nonce_sum_flag, int *abort_now) + uint32_t rwnd, int *abort_now, int ecne_seen) { struct sctp_nets *net; struct sctp_association *asoc; @@ -3866,6 +3780,7 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack, int win_probe_recovery = 0; int win_probe_recovered = 0; int j, done_once = 0; + int rto_ok = 1; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_SACK_ARRIVALS_ENABLE) { sctp_misc_ints(SCTP_SACK_LOG_EXPRESS, cumack, @@ -3881,7 +3796,7 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack, #endif asoc = &stcb->asoc; old_rwnd = asoc->peers_rwnd; - if (compare_with_wrap(asoc->last_acked_seq, cumack, MAX_TSN)) { + if (SCTP_TSN_GT(asoc->last_acked_seq, cumack)) { /* old ack */ return; } else if (asoc->last_acked_seq == cumack) { @@ -3899,6 +3814,10 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack, } /* First setup for CC stuff */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { + if (SCTP_TSN_GT(cumack, net->cwr_window_tsn)) { + /* Drag along the window_tsn for cwr's */ + net->cwr_window_tsn = cumack; + } net->prev_cwnd = net->cwnd; net->net_ack = 0; net->net_ack2 = 0; @@ -3909,6 +3828,9 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack, */ net->new_pseudo_cumack = 0; net->will_exit_fast_recovery = 0; + if (stcb->asoc.cc_functions.sctp_cwnd_prepare_net_for_sack) { + (*stcb->asoc.cc_functions.sctp_cwnd_prepare_net_for_sack) (stcb, net); + } } if (SCTP_BASE_SYSCTL(sctp_strict_sacks)) { uint32_t send_s; @@ -3920,8 +3842,7 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack, } else { send_s = asoc->sending_seq; } - if ((cumack == send_s) || - compare_with_wrap(cumack, send_s, MAX_TSN)) { + if (SCTP_TSN_GE(cumack, send_s)) { #ifndef INVARIANTS struct mbuf *oper; @@ -3947,7 +3868,7 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack, *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_25); } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_25; - sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED); + sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); return; #endif } @@ -3961,22 +3882,13 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack, __LINE__); } stcb->asoc.overall_error_count = 0; - if (compare_with_wrap(cumack, asoc->last_acked_seq, MAX_TSN)) { + if (SCTP_TSN_GT(cumack, asoc->last_acked_seq)) { /* process the new consecutive TSN first */ - tp1 = TAILQ_FIRST(&asoc->sent_queue); - while (tp1) { - tp2 = TAILQ_NEXT(tp1, sctp_next); - if (compare_with_wrap(cumack, tp1->rec.data.TSN_seq, - MAX_TSN) || - cumack == tp1->rec.data.TSN_seq) { + TAILQ_FOREACH_SAFE(tp1, &asoc->sent_queue, sctp_next, tp2) { + if (SCTP_TSN_GE(cumack, tp1->rec.data.TSN_seq)) { if (tp1->sent == SCTP_DATAGRAM_UNSENT) { - printf("Warning, an unsent is now acked?\n"); + SCTP_PRINTF("Warning, an unsent is now acked?\n"); } - /* - * ECN Nonce: Add the nonce to the sender's - * nonce sum - */ - asoc->nonce_sum_expect_base += tp1->rec.data.ect_nonce; if (tp1->sent < SCTP_DATAGRAM_ACKED) { /* * If it is less than ACKED, it is @@ -3992,6 +3904,10 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack, tp1->rec.data.TSN_seq); } sctp_flight_size_decrease(tp1); + if (stcb->asoc.cc_functions.sctp_cwnd_update_tsn_acknowledged) { + (*stcb->asoc.cc_functions.sctp_cwnd_update_tsn_acknowledged) (tp1->whoTo, + tp1); + } /* sa_ignore NO_NULL_CHK */ sctp_total_flight_decrease(stcb, tp1); } @@ -4006,15 +3922,23 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack, /* update RTO too? */ if (tp1->do_rtt) { - tp1->whoTo->RTO = - /* - * sa_ignore - * NO_NULL_CHK - */ - sctp_calculate_rto(stcb, - asoc, tp1->whoTo, - &tp1->sent_rcv_time, - sctp_align_safe_nocopy); + if (rto_ok) { + tp1->whoTo->RTO = + /* + * sa_ignore + * NO_NULL_CH + * K + */ + sctp_calculate_rto(stcb, + asoc, tp1->whoTo, + &tp1->sent_rcv_time, + sctp_align_safe_nocopy, + SCTP_RTT_FROM_DATA); + rto_ok = 0; + } + if (tp1->whoTo->rto_needed == 0) { + tp1->whoTo->rto_needed = 1; + } tp1->do_rtt = 0; } } @@ -4047,12 +3971,21 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack, tp1->whoTo->cwnd -= tp1->book_size; tp1->rec.data.chunk_was_revoked = 0; } - tp1->sent = SCTP_DATAGRAM_ACKED; + if (tp1->sent != SCTP_DATAGRAM_NR_ACKED) { + if (asoc->strmout[tp1->rec.data.stream_number].chunks_on_queues > 0) { + asoc->strmout[tp1->rec.data.stream_number].chunks_on_queues--; +#ifdef INVARIANTS + } else { + panic("No chunks on the queues for sid %u.", tp1->rec.data.stream_number); +#endif + } + } TAILQ_REMOVE(&asoc->sent_queue, tp1, sctp_next); if (tp1->data) { /* sa_ignore NO_NULL_CHK */ sctp_free_bufspace(stcb, asoc, tp1, 1); sctp_m_freem(tp1->data); + tp1->data = NULL; } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SACK_LOGGING_ENABLE) { sctp_log_sack(asoc->last_acked_seq, @@ -4062,10 +3995,8 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack, 0, SCTP_LOG_FREE_SENT); } - tp1->data = NULL; asoc->sent_queue_cnt--; - sctp_free_a_chunk(stcb, tp1); - tp1 = tp2; + sctp_free_a_chunk(stcb, tp1, SCTP_SO_NOT_LOCKED); } else { break; } @@ -4074,16 +4005,16 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack, } /* sa_ignore NO_NULL_CHK */ if (stcb->sctp_socket) { -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif SOCKBUF_LOCK(&stcb->sctp_socket->so_snd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_WAKE_LOGGING_ENABLE) { /* sa_ignore NO_NULL_CHK */ - sctp_wakeup_log(stcb, cumack, 1, SCTP_WAKESND_FROM_SACK); + sctp_wakeup_log(stcb, 1, SCTP_WAKESND_FROM_SACK); } -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(stcb->sctp_ep); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); @@ -4097,19 +4028,60 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack, } #endif sctp_sowwakeup_locked(stcb->sctp_ep, stcb->sctp_socket); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } else { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_WAKE_LOGGING_ENABLE) { - sctp_wakeup_log(stcb, cumack, 1, SCTP_NOWAKE_FROM_SACK); + sctp_wakeup_log(stcb, 1, SCTP_NOWAKE_FROM_SACK); } } /* JRS - Use the congestion control given in the CC module */ - if (asoc->last_acked_seq != cumack) + if ((asoc->last_acked_seq != cumack) && (ecne_seen == 0)) { + TAILQ_FOREACH(net, &asoc->nets, sctp_next) { + if (net->net_ack2 > 0) { + /* + * Karn's rule applies to clearing error + * count, this is optional. + */ + net->error_count = 0; + if (!(net->dest_state & SCTP_ADDR_REACHABLE)) { + /* addr came good */ + net->dest_state |= SCTP_ADDR_REACHABLE; + sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, + 0, (void *)net, SCTP_SO_NOT_LOCKED); + } + if (net == stcb->asoc.primary_destination) { + if (stcb->asoc.alternate) { + /* + * release the alternate, + * primary is good + */ + sctp_free_remote_addr(stcb->asoc.alternate); + stcb->asoc.alternate = NULL; + } + } + if (net->dest_state & SCTP_ADDR_PF) { + net->dest_state &= ~SCTP_ADDR_PF; + sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3); + sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net); + asoc->cc_functions.sctp_cwnd_update_exit_pf(stcb, net); + /* Done with this net */ + net->net_ack = 0; + } + /* restore any doubled timers */ + net->RTO = (net->lastsa >> SCTP_RTT_SHIFT) + net->lastsv; + if (net->RTO < stcb->asoc.minrto) { + net->RTO = stcb->asoc.minrto; + } + if (net->RTO > stcb->asoc.maxrto) { + net->RTO = stcb->asoc.maxrto; + } + } + } asoc->cc_functions.sctp_cwnd_update_after_sack(stcb, asoc, 1, 0, 0); - + } asoc->last_acked_seq = cumack; if (TAILQ_EMPTY(&asoc->sent_queue)) { @@ -4121,54 +4093,6 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack, asoc->total_flight = 0; asoc->total_flight_count = 0; } - /* ECN Nonce updates */ - if (asoc->ecn_nonce_allowed) { - if (asoc->nonce_sum_check) { - if (nonce_sum_flag != ((asoc->nonce_sum_expect_base) & SCTP_SACK_NONCE_SUM)) { - if (asoc->nonce_wait_for_ecne == 0) { - struct sctp_tmit_chunk *lchk; - - lchk = TAILQ_FIRST(&asoc->send_queue); - asoc->nonce_wait_for_ecne = 1; - if (lchk) { - asoc->nonce_wait_tsn = lchk->rec.data.TSN_seq; - } else { - asoc->nonce_wait_tsn = asoc->sending_seq; - } - } else { - if (compare_with_wrap(asoc->last_acked_seq, asoc->nonce_wait_tsn, MAX_TSN) || - (asoc->last_acked_seq == asoc->nonce_wait_tsn)) { - /* - * Misbehaving peer. We need - * to react to this guy - */ - asoc->ecn_allowed = 0; - asoc->ecn_nonce_allowed = 0; - } - } - } - } else { - /* See if Resynchronization Possible */ - if (compare_with_wrap(asoc->last_acked_seq, asoc->nonce_resync_tsn, MAX_TSN)) { - asoc->nonce_sum_check = 1; - /* - * Now we must calculate what the base is. - * We do this based on two things, we know - * the total's for all the segments - * gap-acked in the SACK (none). We also - * know the SACK's nonce sum, its in - * nonce_sum_flag. So we can build a truth - * table to back-calculate the new value of - * asoc->nonce_sum_expect_base: - * - * SACK-flag-Value Seg-Sums Base 0 0 0 - * 1 0 1 0 1 1 1 - * 1 0 - */ - asoc->nonce_sum_expect_base = (0 ^ nonce_sum_flag) & SCTP_SACK_NONCE_SUM; - } - } - } /* RWND update */ asoc->peers_rwnd = sctp_sbspace_sub(rwnd, (uint32_t) (asoc->total_flight + (asoc->total_flight_count * SCTP_BASE_SYSCTL(sctp_peer_chunk_oh)))); @@ -4195,7 +4119,7 @@ again: TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) { if (tp1->window_probe) { /* move back to data send queue */ - sctp_window_probe_recovery(stcb, asoc, net, tp1); + sctp_window_probe_recovery(stcb, asoc, tp1); break; } } @@ -4228,13 +4152,6 @@ again: stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_22); } - if (SCTP_BASE_SYSCTL(sctp_early_fr)) { - if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { - SCTP_STAT_INCR(sctps_earlyfrstpidsck4); - sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, - SCTP_FROM_SCTP_INDATA + SCTP_LOC_23); - } - } } } if ((j == 0) && @@ -4306,23 +4223,21 @@ again: abort_out_now: *abort_now = 1; /* XXX */ - oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)), + oper = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_DONTWAIT, 1, MT_DATA); if (oper) { struct sctp_paramhdr *ph; - uint32_t *ippp; - SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) + - sizeof(uint32_t); + SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr); ph = mtod(oper, struct sctp_paramhdr *); ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT); ph->param_length = htons(SCTP_BUF_LEN(oper)); - ippp = (uint32_t *) (ph + 1); - *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_24); } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_24; - sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_RESPONSE_TO_USER_REQ, oper, SCTP_SO_NOT_LOCKED); + sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); } else { + struct sctp_nets *netp; + if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) || (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); @@ -4330,26 +4245,36 @@ again: SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT); SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING); sctp_stop_timers_for_shutdown(stcb); - sctp_send_shutdown(stcb, - stcb->asoc.primary_destination); + if (asoc->alternate) { + netp = asoc->alternate; + } else { + netp = asoc->primary_destination; + } + sctp_send_shutdown(stcb, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, - stcb->sctp_ep, stcb, asoc->primary_destination); + stcb->sctp_ep, stcb, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, - stcb->sctp_ep, stcb, asoc->primary_destination); + stcb->sctp_ep, stcb, netp); } } else if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) && (asoc->stream_queue_cnt == 0)) { + struct sctp_nets *netp; + if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) { goto abort_out_now; } SCTP_STAT_DECR_GAUGE32(sctps_currestab); SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT); SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING); - sctp_send_shutdown_ack(stcb, - stcb->asoc.primary_destination); sctp_stop_timers_for_shutdown(stcb); + if (asoc->alternate) { + netp = asoc->alternate; + } else { + netp = asoc->primary_destination; + } + sctp_send_shutdown_ack(stcb, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK, - stcb->sctp_ep, stcb, asoc->primary_destination); + stcb->sctp_ep, stcb, netp); } } /*********************************************/ @@ -4357,7 +4282,7 @@ again: /* (section 4.2) */ /*********************************************/ /* C1. update advancedPeerAckPoint */ - if (compare_with_wrap(cumack, asoc->advanced_peer_ack_point, MAX_TSN)) { + if (SCTP_TSN_GT(cumack, asoc->advanced_peer_ack_point)) { asoc->advanced_peer_ack_point = cumack; } /* PR-Sctp issues need to be addressed too */ @@ -4368,22 +4293,12 @@ again: old_adv_peer_ack_point = asoc->advanced_peer_ack_point; lchk = sctp_try_advance_peer_ack_point(stcb, asoc); /* C3. See if we need to send a Fwd-TSN */ - if (compare_with_wrap(asoc->advanced_peer_ack_point, cumack, - MAX_TSN)) { + if (SCTP_TSN_GT(asoc->advanced_peer_ack_point, cumack)) { /* - * ISSUE with ECN, see FWD-TSN processing for notes - * on issues that will occur when the ECN NONCE - * stuff is put into SCTP for cross checking. + * ISSUE with ECN, see FWD-TSN processing. */ - if (compare_with_wrap(asoc->advanced_peer_ack_point, old_adv_peer_ack_point, - MAX_TSN)) { + if (SCTP_TSN_GT(asoc->advanced_peer_ack_point, old_adv_peer_ack_point)) { send_forward_tsn(stcb, asoc); - /* - * ECN Nonce: Disable Nonce Sum check when - * FWD TSN is sent and store resync tsn - */ - asoc->nonce_sum_check = 0; - asoc->nonce_resync_tsn = asoc->advanced_peer_ack_point; } else if (lchk) { /* try to FR fwd-tsn's that get lost too */ if (lchk->rec.data.fwd_tsn_cnt >= 3) { @@ -4408,15 +4323,14 @@ again: void sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup, - struct sctp_tcb *stcb, struct sctp_nets *net_from, + struct sctp_tcb *stcb, uint16_t num_seg, uint16_t num_nr_seg, uint16_t num_dup, int *abort_now, uint8_t flags, - uint32_t cum_ack, uint32_t rwnd) + uint32_t cum_ack, uint32_t rwnd, int ecne_seen) { struct sctp_association *asoc; struct sctp_tmit_chunk *tp1, *tp2; uint32_t last_tsn, biggest_tsn_acked, biggest_tsn_newly_acked, this_sack_lowest_newack; - uint32_t sav_cum_ack; uint16_t wake_him = 0; uint32_t send_s = 0; long j; @@ -4426,8 +4340,8 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup, int win_probe_recovery = 0; int win_probe_recovered = 0; struct sctp_nets *net = NULL; - int nonce_sum_flag, ecn_seg_sums = 0; int done_once; + int rto_ok = 1; uint8_t reneged_all = 0; uint8_t cmt_dac_flag; @@ -4455,10 +4369,8 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup, SCTP_TCB_LOCK_ASSERT(stcb); /* CMT DAC algo */ this_sack_lowest_newack = 0; - j = 0; SCTP_STAT_INCR(sctps_slowpath_sack); last_tsn = cum_ack; - nonce_sum_flag = flags & SCTP_SACK_NONCE_SUM; cmt_dac_flag = flags & SCTP_SACK_CMT_DAC; #ifdef SCTP_ASOCLOG_OF_TSNS stcb->asoc.cumack_log[stcb->asoc.cumack_log_at] = cum_ack; @@ -4491,7 +4403,7 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup, num_dup, SCTP_LOG_NEW_SACK); } - if ((num_dup) && (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_FR_LOGGING_ENABLE | SCTP_EARLYFR_LOGGING_ENABLE))) { + if ((num_dup) && (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE)) { uint16_t i; uint32_t *dupdata, dblock; @@ -4514,19 +4426,18 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup, tp1 = NULL; send_s = asoc->sending_seq; } - if (cum_ack == send_s || - compare_with_wrap(cum_ack, send_s, MAX_TSN)) { + if (SCTP_TSN_GE(cum_ack, send_s)) { struct mbuf *oper; /* * no way, we have not even sent this TSN out yet. * Peer is hopelessly messed up with us. */ - printf("NEW cum_ack:%x send_s:%x is smaller or equal\n", + SCTP_PRINTF("NEW cum_ack:%x send_s:%x is smaller or equal\n", cum_ack, send_s); if (tp1) { - printf("Got send_s from tsn:%x + 1 of tp1:%p\n", - tp1->rec.data.TSN_seq, tp1); + SCTP_PRINTF("Got send_s from tsn:%x + 1 of tp1:%p\n", + tp1->rec.data.TSN_seq, (void *)tp1); } hopeless_peer: *abort_now = 1; @@ -4546,19 +4457,17 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup, *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_25); } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_25; - sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED); + sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); return; } } /**********************/ /* 1) check the range */ /**********************/ - if (compare_with_wrap(asoc->last_acked_seq, last_tsn, MAX_TSN)) { + if (SCTP_TSN_GT(asoc->last_acked_seq, last_tsn)) { /* acking something behind */ return; } - sav_cum_ack = asoc->last_acked_seq; - /* update the Rwnd of the peer */ if (TAILQ_EMPTY(&asoc->sent_queue) && TAILQ_EMPTY(&asoc->send_queue) && @@ -4580,13 +4489,6 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup, TAILQ_FOREACH(net, &asoc->nets, sctp_next) { sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_26); - if (SCTP_BASE_SYSCTL(sctp_early_fr)) { - if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { - SCTP_STAT_INCR(sctps_earlyfrstpidsck1); - sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, - SCTP_FROM_SCTP_INDATA + SCTP_LOC_26); - } - } net->partial_bytes_acked = 0; net->flight_size = 0; } @@ -4602,6 +4504,10 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup, * destination address basis. */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { + if (SCTP_TSN_GT(cum_ack, net->cwr_window_tsn)) { + /* Drag along the window_tsn for cwr's */ + net->cwr_window_tsn = cum_ack; + } net->prev_cwnd = net->cwnd; net->net_ack = 0; net->net_ack2 = 0; @@ -4612,19 +4518,14 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup, */ net->new_pseudo_cumack = 0; net->will_exit_fast_recovery = 0; + if (stcb->asoc.cc_functions.sctp_cwnd_prepare_net_for_sack) { + (*stcb->asoc.cc_functions.sctp_cwnd_prepare_net_for_sack) (stcb, net); + } } /* process the new consecutive TSN first */ - tp1 = TAILQ_FIRST(&asoc->sent_queue); - while (tp1) { - if (compare_with_wrap(last_tsn, tp1->rec.data.TSN_seq, - MAX_TSN) || - last_tsn == tp1->rec.data.TSN_seq) { + TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) { + if (SCTP_TSN_GE(last_tsn, tp1->rec.data.TSN_seq)) { if (tp1->sent != SCTP_DATAGRAM_UNSENT) { - /* - * ECN Nonce: Add the nonce to the sender's - * nonce sum - */ - asoc->nonce_sum_expect_base += tp1->rec.data.ect_nonce; accum_moved = 1; if (tp1->sent < SCTP_DATAGRAM_ACKED) { /* @@ -4656,6 +4557,10 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup, } sctp_flight_size_decrease(tp1); sctp_total_flight_decrease(stcb, tp1); + if (stcb->asoc.cc_functions.sctp_cwnd_update_tsn_acknowledged) { + (*stcb->asoc.cc_functions.sctp_cwnd_update_tsn_acknowledged) (tp1->whoTo, + tp1); + } } tp1->whoTo->net_ack += tp1->send_size; @@ -4673,11 +4578,18 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup, /* update RTO too? */ if (tp1->do_rtt) { - tp1->whoTo->RTO = - sctp_calculate_rto(stcb, - asoc, tp1->whoTo, - &tp1->sent_rcv_time, - sctp_align_safe_nocopy); + if (rto_ok) { + tp1->whoTo->RTO = + sctp_calculate_rto(stcb, + asoc, tp1->whoTo, + &tp1->sent_rcv_time, + sctp_align_safe_nocopy, + SCTP_RTT_FROM_DATA); + rto_ok = 0; + } + if (tp1->whoTo->rto_needed == 0) { + tp1->whoTo->rto_needed = 1; + } tp1->do_rtt = 0; } } @@ -4722,12 +4634,13 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup, tp1->whoTo->cwnd -= tp1->book_size; tp1->rec.data.chunk_was_revoked = 0; } - tp1->sent = SCTP_DATAGRAM_ACKED; + if (tp1->sent != SCTP_DATAGRAM_NR_ACKED) { + tp1->sent = SCTP_DATAGRAM_ACKED; + } } } else { break; } - tp1 = TAILQ_NEXT(tp1, sctp_next); } biggest_tsn_newly_acked = biggest_tsn_acked = last_tsn; /* always set this up to cum-ack */ @@ -4753,7 +4666,7 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup, */ if (sctp_handle_segments(m, &offset_seg, stcb, asoc, last_tsn, &biggest_tsn_acked, &biggest_tsn_newly_acked, &this_sack_lowest_newack, - num_seg, num_nr_seg, &ecn_seg_sums)) { + num_seg, num_nr_seg, &rto_ok)) { wake_him++; } if (SCTP_BASE_SYSCTL(sctp_strict_sacks)) { @@ -4761,16 +4674,13 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup, * validate the biggest_tsn_acked in the gap acks if * strict adherence is wanted. */ - if ((biggest_tsn_acked == send_s) || - (compare_with_wrap(biggest_tsn_acked, send_s, MAX_TSN))) { + if (SCTP_TSN_GE(biggest_tsn_acked, send_s)) { /* * peer is either confused or we are under * attack. We must abort. */ - printf("Hopeless peer! biggest_tsn_acked:%x largest seq:%x\n", - biggest_tsn_acked, - send_s); - + SCTP_PRINTF("Hopeless peer! biggest_tsn_acked:%x largest seq:%x\n", + biggest_tsn_acked, send_s); goto hopeless_peer; } } @@ -4778,7 +4688,7 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup, /*******************************************/ /* cancel ALL T3-send timer if accum moved */ /*******************************************/ - if (asoc->sctp_cmt_on_off == 1) { + if (asoc->sctp_cmt_on_off > 0) { TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if (net->new_pseudo_cumack) sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, @@ -4799,39 +4709,30 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup, /********************************************/ asoc->last_acked_seq = cum_ack; - tp1 = TAILQ_FIRST(&asoc->sent_queue); - if (tp1 == NULL) - goto done_with_it; - do { - if (compare_with_wrap(tp1->rec.data.TSN_seq, cum_ack, - MAX_TSN)) { + TAILQ_FOREACH_SAFE(tp1, &asoc->sent_queue, sctp_next, tp2) { + if (SCTP_TSN_GT(tp1->rec.data.TSN_seq, cum_ack)) { break; } - if (tp1->sent == SCTP_DATAGRAM_UNSENT) { - /* no more sent on list */ - printf("Warning, tp1->sent == %d and its now acked?\n", - tp1->sent); + if (tp1->sent != SCTP_DATAGRAM_NR_ACKED) { + if (asoc->strmout[tp1->rec.data.stream_number].chunks_on_queues > 0) { + asoc->strmout[tp1->rec.data.stream_number].chunks_on_queues--; +#ifdef INVARIANTS + } else { + panic("No chunks on the queues for sid %u.", tp1->rec.data.stream_number); +#endif + } } - tp2 = TAILQ_NEXT(tp1, sctp_next); TAILQ_REMOVE(&asoc->sent_queue, tp1, sctp_next); if (tp1->pr_sctp_on) { if (asoc->pr_sctp_cnt != 0) asoc->pr_sctp_cnt--; } - if (TAILQ_EMPTY(&asoc->sent_queue) && - (asoc->total_flight > 0)) { -#ifdef INVARIANTS - panic("Warning flight size is postive and should be 0"); -#else - SCTP_PRINTF("Warning flight size incorrect should be 0 is %d\n", - asoc->total_flight); -#endif - asoc->total_flight = 0; - } + asoc->sent_queue_cnt--; if (tp1->data) { /* sa_ignore NO_NULL_CHK */ sctp_free_bufspace(stcb, asoc, tp1, 1); sctp_m_freem(tp1->data); + tp1->data = NULL; if (asoc->peer_supports_prsctp && PR_SCTP_BUF_ENABLED(tp1->flags)) { asoc->sent_queue_cnt_removeable--; } @@ -4844,25 +4745,29 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup, 0, SCTP_LOG_FREE_SENT); } - tp1->data = NULL; - asoc->sent_queue_cnt--; - sctp_free_a_chunk(stcb, tp1); + sctp_free_a_chunk(stcb, tp1, SCTP_SO_NOT_LOCKED); wake_him++; - tp1 = tp2; - } while (tp1 != NULL); - -done_with_it: + } + if (TAILQ_EMPTY(&asoc->sent_queue) && (asoc->total_flight > 0)) { +#ifdef INVARIANTS + panic("Warning flight size is postive and should be 0"); +#else + SCTP_PRINTF("Warning flight size incorrect should be 0 is %d\n", + asoc->total_flight); +#endif + asoc->total_flight = 0; + } /* sa_ignore NO_NULL_CHK */ if ((wake_him) && (stcb->sctp_socket)) { -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif SOCKBUF_LOCK(&stcb->sctp_socket->so_snd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_WAKE_LOGGING_ENABLE) { - sctp_wakeup_log(stcb, cum_ack, wake_him, SCTP_WAKESND_FROM_SACK); + sctp_wakeup_log(stcb, wake_him, SCTP_WAKESND_FROM_SACK); } -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(stcb->sctp_ep); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); @@ -4876,19 +4781,17 @@ done_with_it: } #endif sctp_sowwakeup_locked(stcb->sctp_ep, stcb->sctp_socket); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } else { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_WAKE_LOGGING_ENABLE) { - sctp_wakeup_log(stcb, cum_ack, wake_him, SCTP_NOWAKE_FROM_SACK); + sctp_wakeup_log(stcb, wake_him, SCTP_NOWAKE_FROM_SACK); } } if (asoc->fast_retran_loss_recovery && accum_moved) { - if (compare_with_wrap(asoc->last_acked_seq, - asoc->fast_recovery_tsn, MAX_TSN) || - asoc->last_acked_seq == asoc->fast_recovery_tsn) { + if (SCTP_TSN_GE(asoc->last_acked_seq, asoc->fast_recovery_tsn)) { /* Setup so we will exit RFC2582 fast recovery */ will_exit_fast_recovery = 1; } @@ -4909,37 +4812,33 @@ done_with_it: } else if (asoc->saw_sack_with_frags) { int cnt_revoked = 0; - tp1 = TAILQ_FIRST(&asoc->sent_queue); - if (tp1 != NULL) { - /* Peer revoked all dg's marked or acked */ - TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) { - if (tp1->sent == SCTP_DATAGRAM_ACKED) { - tp1->sent = SCTP_DATAGRAM_SENT; - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) { - sctp_misc_ints(SCTP_FLIGHT_LOG_UP_REVOKE, - tp1->whoTo->flight_size, - tp1->book_size, - (uintptr_t) tp1->whoTo, - tp1->rec.data.TSN_seq); - } - sctp_flight_size_increase(tp1); - sctp_total_flight_increase(stcb, tp1); - tp1->rec.data.chunk_was_revoked = 1; - /* - * To ensure that this increase in - * flightsize, which is artificial, - * does not throttle the sender, we - * also increase the cwnd - * artificially. - */ - tp1->whoTo->cwnd += tp1->book_size; - cnt_revoked++; + /* Peer revoked all dg's marked or acked */ + TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) { + if (tp1->sent == SCTP_DATAGRAM_ACKED) { + tp1->sent = SCTP_DATAGRAM_SENT; + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) { + sctp_misc_ints(SCTP_FLIGHT_LOG_UP_REVOKE, + tp1->whoTo->flight_size, + tp1->book_size, + (uintptr_t) tp1->whoTo, + tp1->rec.data.TSN_seq); } - } - if (cnt_revoked) { - reneged_all = 1; + sctp_flight_size_increase(tp1); + sctp_total_flight_increase(stcb, tp1); + tp1->rec.data.chunk_was_revoked = 1; + /* + * To ensure that this increase in + * flightsize, which is artificial, does not + * throttle the sender, we also increase the + * cwnd artificially. + */ + tp1->whoTo->cwnd += tp1->book_size; + cnt_revoked++; } } + if (cnt_revoked) { + reneged_all = 1; + } asoc->saw_sack_with_frags = 0; } if (num_nr_seg > 0) @@ -4948,19 +4847,54 @@ done_with_it: asoc->saw_sack_with_nr_frags = 0; /* JRS - Use the congestion control given in the CC module */ - asoc->cc_functions.sctp_cwnd_update_after_sack(stcb, asoc, accum_moved, reneged_all, will_exit_fast_recovery); - + if (ecne_seen == 0) { + TAILQ_FOREACH(net, &asoc->nets, sctp_next) { + if (net->net_ack2 > 0) { + /* + * Karn's rule applies to clearing error + * count, this is optional. + */ + net->error_count = 0; + if (!(net->dest_state & SCTP_ADDR_REACHABLE)) { + /* addr came good */ + net->dest_state |= SCTP_ADDR_REACHABLE; + sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, + 0, (void *)net, SCTP_SO_NOT_LOCKED); + } + if (net == stcb->asoc.primary_destination) { + if (stcb->asoc.alternate) { + /* + * release the alternate, + * primary is good + */ + sctp_free_remote_addr(stcb->asoc.alternate); + stcb->asoc.alternate = NULL; + } + } + if (net->dest_state & SCTP_ADDR_PF) { + net->dest_state &= ~SCTP_ADDR_PF; + sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3); + sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net); + asoc->cc_functions.sctp_cwnd_update_exit_pf(stcb, net); + /* Done with this net */ + net->net_ack = 0; + } + /* restore any doubled timers */ + net->RTO = (net->lastsa >> SCTP_RTT_SHIFT) + net->lastsv; + if (net->RTO < stcb->asoc.minrto) { + net->RTO = stcb->asoc.minrto; + } + if (net->RTO > stcb->asoc.maxrto) { + net->RTO = stcb->asoc.maxrto; + } + } + } + asoc->cc_functions.sctp_cwnd_update_after_sack(stcb, asoc, accum_moved, reneged_all, will_exit_fast_recovery); + } if (TAILQ_EMPTY(&asoc->sent_queue)) { /* nothing left in-flight */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { /* stop all timers */ - if (SCTP_BASE_SYSCTL(sctp_early_fr)) { - if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { - SCTP_STAT_INCR(sctps_earlyfrstpidsck4); - sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, - SCTP_FROM_SCTP_INDATA + SCTP_LOC_29); - } - } sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_30); net->flight_size = 0; @@ -5017,24 +4951,22 @@ done_with_it: abort_out_now: *abort_now = 1; /* XXX */ - oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)), + oper = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_DONTWAIT, 1, MT_DATA); if (oper) { struct sctp_paramhdr *ph; - uint32_t *ippp; - SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) + - sizeof(uint32_t); + SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr); ph = mtod(oper, struct sctp_paramhdr *); ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT); ph->param_length = htons(SCTP_BUF_LEN(oper)); - ippp = (uint32_t *) (ph + 1); - *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_31); } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_31; - sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_RESPONSE_TO_USER_REQ, oper, SCTP_SO_NOT_LOCKED); + sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); return; } else { + struct sctp_nets *netp; + if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) || (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); @@ -5042,27 +4974,37 @@ done_with_it: SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT); SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING); sctp_stop_timers_for_shutdown(stcb); - sctp_send_shutdown(stcb, - stcb->asoc.primary_destination); + if (asoc->alternate) { + netp = asoc->alternate; + } else { + netp = asoc->primary_destination; + } + sctp_send_shutdown(stcb, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, - stcb->sctp_ep, stcb, asoc->primary_destination); + stcb->sctp_ep, stcb, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, - stcb->sctp_ep, stcb, asoc->primary_destination); + stcb->sctp_ep, stcb, netp); } return; } else if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) && (asoc->stream_queue_cnt == 0)) { + struct sctp_nets *netp; + if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) { goto abort_out_now; } SCTP_STAT_DECR_GAUGE32(sctps_currestab); SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT); SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING); - sctp_send_shutdown_ack(stcb, - stcb->asoc.primary_destination); sctp_stop_timers_for_shutdown(stcb); + if (asoc->alternate) { + netp = asoc->alternate; + } else { + netp = asoc->primary_destination; + } + sctp_send_shutdown_ack(stcb, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK, - stcb->sctp_ep, stcb, asoc->primary_destination); + stcb->sctp_ep, stcb, netp); return; } } @@ -5079,7 +5021,7 @@ done_with_it: * to be done. Setting this_sack_lowest_newack to the cum_ack will * automatically ensure that. */ - if ((asoc->sctp_cmt_on_off == 1) && + if ((asoc->sctp_cmt_on_off > 0) && SCTP_BASE_SYSCTL(sctp_cmt_use_dac) && (cmt_dac_flag == 0)) { this_sack_lowest_newack = cum_ack; @@ -5091,70 +5033,13 @@ done_with_it: /* JRS - Use the congestion control given in the CC module */ asoc->cc_functions.sctp_cwnd_update_after_fr(stcb, asoc); - /****************************************************************** - * Here we do the stuff with ECN Nonce checking. - * We basically check to see if the nonce sum flag was incorrect - * or if resynchronization needs to be done. Also if we catch a - * misbehaving receiver we give him the kick. - ******************************************************************/ - - if (asoc->ecn_nonce_allowed) { - if (asoc->nonce_sum_check) { - if (nonce_sum_flag != ((asoc->nonce_sum_expect_base + ecn_seg_sums) & SCTP_SACK_NONCE_SUM)) { - if (asoc->nonce_wait_for_ecne == 0) { - struct sctp_tmit_chunk *lchk; - - lchk = TAILQ_FIRST(&asoc->send_queue); - asoc->nonce_wait_for_ecne = 1; - if (lchk) { - asoc->nonce_wait_tsn = lchk->rec.data.TSN_seq; - } else { - asoc->nonce_wait_tsn = asoc->sending_seq; - } - } else { - if (compare_with_wrap(asoc->last_acked_seq, asoc->nonce_wait_tsn, MAX_TSN) || - (asoc->last_acked_seq == asoc->nonce_wait_tsn)) { - /* - * Misbehaving peer. We need - * to react to this guy - */ - asoc->ecn_allowed = 0; - asoc->ecn_nonce_allowed = 0; - } - } - } - } else { - /* See if Resynchronization Possible */ - if (compare_with_wrap(asoc->last_acked_seq, asoc->nonce_resync_tsn, MAX_TSN)) { - asoc->nonce_sum_check = 1; - /* - * now we must calculate what the base is. - * We do this based on two things, we know - * the total's for all the segments - * gap-acked in the SACK, its stored in - * ecn_seg_sums. We also know the SACK's - * nonce sum, its in nonce_sum_flag. So we - * can build a truth table to back-calculate - * the new value of - * asoc->nonce_sum_expect_base: - * - * SACK-flag-Value Seg-Sums Base 0 0 0 - * 1 0 1 0 1 1 1 - * 1 0 - */ - asoc->nonce_sum_expect_base = (ecn_seg_sums ^ nonce_sum_flag) & SCTP_SACK_NONCE_SUM; - } - } - } /* Now are we exiting loss recovery ? */ if (will_exit_fast_recovery) { /* Ok, we must exit fast recovery */ asoc->fast_retran_loss_recovery = 0; } if ((asoc->sat_t3_loss_recovery) && - ((compare_with_wrap(asoc->last_acked_seq, asoc->sat_t3_recovery_tsn, - MAX_TSN) || - (asoc->last_acked_seq == asoc->sat_t3_recovery_tsn)))) { + SCTP_TSN_GE(asoc->last_acked_seq, asoc->sat_t3_recovery_tsn)) { /* end satellite t3 loss recovery */ asoc->sat_t3_loss_recovery = 0; } @@ -5200,7 +5085,7 @@ again: */ TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) { if (tp1->window_probe) { - sctp_window_probe_recovery(stcb, asoc, net, tp1); + sctp_window_probe_recovery(stcb, asoc, tp1); break; } } @@ -5230,13 +5115,6 @@ again: stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_22); } - if (SCTP_BASE_SYSCTL(sctp_early_fr)) { - if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { - SCTP_STAT_INCR(sctps_earlyfrstpidsck4); - sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, - SCTP_FROM_SCTP_INDATA + SCTP_LOC_23); - } - } } } if ((j == 0) && @@ -5272,7 +5150,7 @@ again: /* (section 4.2) */ /*********************************************/ /* C1. update advancedPeerAckPoint */ - if (compare_with_wrap(cum_ack, asoc->advanced_peer_ack_point, MAX_TSN)) { + if (SCTP_TSN_GT(cum_ack, asoc->advanced_peer_ack_point)) { asoc->advanced_peer_ack_point = cum_ack; } /* C2. try to further move advancedPeerAckPoint ahead */ @@ -5283,28 +5161,17 @@ again: old_adv_peer_ack_point = asoc->advanced_peer_ack_point; lchk = sctp_try_advance_peer_ack_point(stcb, asoc); /* C3. See if we need to send a Fwd-TSN */ - if (compare_with_wrap(asoc->advanced_peer_ack_point, cum_ack, - MAX_TSN)) { + if (SCTP_TSN_GT(asoc->advanced_peer_ack_point, cum_ack)) { /* - * ISSUE with ECN, see FWD-TSN processing for notes - * on issues that will occur when the ECN NONCE - * stuff is put into SCTP for cross checking. + * ISSUE with ECN, see FWD-TSN processing. */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) { sctp_misc_ints(SCTP_FWD_TSN_CHECK, 0xee, cum_ack, asoc->advanced_peer_ack_point, old_adv_peer_ack_point); } - if (compare_with_wrap(asoc->advanced_peer_ack_point, old_adv_peer_ack_point, - MAX_TSN)) { - + if (SCTP_TSN_GT(asoc->advanced_peer_ack_point, old_adv_peer_ack_point)) { send_forward_tsn(stcb, asoc); - /* - * ECN Nonce: Disable Nonce Sum check when - * FWD TSN is sent and store resync tsn - */ - asoc->nonce_sum_check = 0; - asoc->nonce_resync_tsn = asoc->advanced_peer_ack_point; } else if (lchk) { /* try to FR fwd-tsn's that get lost too */ if (lchk->rec.data.fwd_tsn_cnt >= 3) { @@ -5328,8 +5195,7 @@ again: } void -sctp_update_acked(struct sctp_tcb *stcb, struct sctp_shutdown_chunk *cp, - struct sctp_nets *netp, int *abort_flag) +sctp_update_acked(struct sctp_tcb *stcb, struct sctp_shutdown_chunk *cp, int *abort_flag) { /* Copy cum-ack */ uint32_t cum_ack, a_rwnd; @@ -5339,7 +5205,7 @@ sctp_update_acked(struct sctp_tcb *stcb, struct sctp_shutdown_chunk *cp, a_rwnd = stcb->asoc.peers_rwnd + stcb->asoc.total_flight; /* Now call the express sack handling */ - sctp_express_handle_sack(stcb, cum_ack, a_rwnd, 0, abort_flag); + sctp_express_handle_sack(stcb, cum_ack, a_rwnd, abort_flag, 0); } static void @@ -5356,11 +5222,8 @@ sctp_kick_prsctp_reorder_queue(struct sctp_tcb *stcb, * First deliver anything prior to and including the stream no that * came in */ - ctl = TAILQ_FIRST(&strmin->inqueue); - while (ctl) { - nctl = TAILQ_NEXT(ctl, next); - if (compare_with_wrap(tt, ctl->sinfo_ssn, MAX_SEQ) || - (tt == ctl->sinfo_ssn)) { + TAILQ_FOREACH_SAFE(ctl, &strmin->inqueue, next, nctl) { + if (SCTP_SSN_GE(tt, ctl->sinfo_ssn)) { /* this is deliverable now */ TAILQ_REMOVE(&strmin->inqueue, ctl, next); /* subtract pending on streams */ @@ -5377,16 +5240,13 @@ sctp_kick_prsctp_reorder_queue(struct sctp_tcb *stcb, /* no more delivery now. */ break; } - ctl = nctl; } /* * now we must deliver things in queue the normal way if any are * now ready. */ tt = strmin->last_sequence_delivered + 1; - ctl = TAILQ_FIRST(&strmin->inqueue); - while (ctl) { - nctl = TAILQ_NEXT(ctl, next); + TAILQ_FOREACH_SAFE(ctl, &strmin->inqueue, next, nctl) { if (tt == ctl->sinfo_ssn) { /* this is deliverable now */ TAILQ_REMOVE(&strmin->inqueue, ctl, next); @@ -5406,7 +5266,6 @@ sctp_kick_prsctp_reorder_queue(struct sctp_tcb *stcb, } else { break; } - ctl = nctl; } } @@ -5415,87 +5274,69 @@ sctp_flush_reassm_for_str_seq(struct sctp_tcb *stcb, struct sctp_association *asoc, uint16_t stream, uint16_t seq) { - struct sctp_tmit_chunk *chk, *at; + struct sctp_tmit_chunk *chk, *nchk; - if (!TAILQ_EMPTY(&asoc->reasmqueue)) { - /* For each one on here see if we need to toss it */ + /* For each one on here see if we need to toss it */ + /* + * For now large messages held on the reasmqueue that are complete + * will be tossed too. We could in theory do more work to spin + * through and stop after dumping one msg aka seeing the start of a + * new msg at the head, and call the delivery function... to see if + * it can be delivered... But for now we just dump everything on the + * queue. + */ + TAILQ_FOREACH_SAFE(chk, &asoc->reasmqueue, sctp_next, nchk) { /* - * For now large messages held on the reasmqueue that are - * complete will be tossed too. We could in theory do more - * work to spin through and stop after dumping one msg aka - * seeing the start of a new msg at the head, and call the - * delivery function... to see if it can be delivered... But - * for now we just dump everything on the queue. + * Do not toss it if on a different stream or marked for + * unordered delivery in which case the stream sequence + * number has no meaning. */ - chk = TAILQ_FIRST(&asoc->reasmqueue); - while (chk) { - at = TAILQ_NEXT(chk, sctp_next); - /* - * Do not toss it if on a different stream or marked - * for unordered delivery in which case the stream - * sequence number has no meaning. - */ - if ((chk->rec.data.stream_number != stream) || - ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == SCTP_DATA_UNORDERED)) { - chk = at; - continue; + if ((chk->rec.data.stream_number != stream) || + ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == SCTP_DATA_UNORDERED)) { + continue; + } + if (chk->rec.data.stream_seq == seq) { + /* It needs to be tossed */ + TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next); + if (SCTP_TSN_GT(chk->rec.data.TSN_seq, asoc->tsn_last_delivered)) { + asoc->tsn_last_delivered = chk->rec.data.TSN_seq; + asoc->str_of_pdapi = chk->rec.data.stream_number; + asoc->ssn_of_pdapi = chk->rec.data.stream_seq; + asoc->fragment_flags = chk->rec.data.rcv_flags; } - if (chk->rec.data.stream_seq == seq) { - /* It needs to be tossed */ - TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next); - if (compare_with_wrap(chk->rec.data.TSN_seq, - asoc->tsn_last_delivered, MAX_TSN)) { - asoc->tsn_last_delivered = - chk->rec.data.TSN_seq; - asoc->str_of_pdapi = - chk->rec.data.stream_number; - asoc->ssn_of_pdapi = - chk->rec.data.stream_seq; - asoc->fragment_flags = - chk->rec.data.rcv_flags; - } - asoc->size_on_reasm_queue -= chk->send_size; - sctp_ucount_decr(asoc->cnt_on_reasm_queue); - - /* Clear up any stream problem */ - if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) != - SCTP_DATA_UNORDERED && - (compare_with_wrap(chk->rec.data.stream_seq, - asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered, - MAX_SEQ))) { - /* - * We must dump forward this streams - * sequence number if the chunk is - * not unordered that is being - * skipped. There is a chance that - * if the peer does not include the - * last fragment in its FWD-TSN we - * WILL have a problem here since - * you would have a partial chunk in - * queue that may not be - * deliverable. Also if a Partial - * delivery API as started the user - * may get a partial chunk. The next - * read returning a new chunk... - * really ugly but I see no way - * around it! Maybe a notify?? - */ - asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered = - chk->rec.data.stream_seq; - } - if (chk->data) { - sctp_m_freem(chk->data); - chk->data = NULL; - } - sctp_free_a_chunk(stcb, chk); - } else if (compare_with_wrap(chk->rec.data.stream_seq, seq, MAX_SEQ)) { + asoc->size_on_reasm_queue -= chk->send_size; + sctp_ucount_decr(asoc->cnt_on_reasm_queue); + + /* Clear up any stream problem */ + if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) != SCTP_DATA_UNORDERED && + SCTP_SSN_GT(chk->rec.data.stream_seq, asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered)) { /* - * If the stream_seq is > than the purging - * one, we are done + * We must dump forward this streams + * sequence number if the chunk is not + * unordered that is being skipped. There is + * a chance that if the peer does not + * include the last fragment in its FWD-TSN + * we WILL have a problem here since you + * would have a partial chunk in queue that + * may not be deliverable. Also if a Partial + * delivery API as started the user may get + * a partial chunk. The next read returning + * a new chunk... really ugly but I see no + * way around it! Maybe a notify?? */ - break; + asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered = chk->rec.data.stream_seq; } - chk = at; + if (chk->data) { + sctp_m_freem(chk->data); + chk->data = NULL; + } + sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); + } else if (SCTP_SSN_GT(chk->rec.data.stream_seq, seq)) { + /* + * If the stream_seq is > than the purging one, we + * are done + */ + break; } } } @@ -5506,17 +5347,6 @@ sctp_handle_forward_tsn(struct sctp_tcb *stcb, struct sctp_forward_tsn_chunk *fwd, int *abort_flag, struct mbuf *m, int offset) { - /* - * ISSUES that MUST be fixed for ECN! When we are the sender of the - * forward TSN, when the SACK comes back that acknowledges the - * FWD-TSN we must reset the NONCE sum to match correctly. This will - * get quite tricky since we may have sent more data interveneing - * and must carefully account for what the SACK says on the nonce - * and any gaps that are reported. This work will NOT be done here, - * but I note it here since it is really related to PR-SCTP and - * FWD-TSN's - */ - /* The pr-sctp fwd tsn */ /* * here we will perform all the data receiver side steps for @@ -5531,13 +5361,12 @@ sctp_handle_forward_tsn(struct sctp_tcb *stcb, */ struct sctp_association *asoc; uint32_t new_cum_tsn, gap; - unsigned int i, fwd_sz, cumack_set_flag, m_size; + unsigned int i, fwd_sz, m_size; uint32_t str_seq; struct sctp_stream_in *strm; - struct sctp_tmit_chunk *chk, *at; + struct sctp_tmit_chunk *chk, *nchk; struct sctp_queued_to_read *ctl, *sv; - cumack_set_flag = 0; asoc = &stcb->asoc; if ((fwd_sz = ntohs(fwd->ch.chunk_length)) < sizeof(struct sctp_forward_tsn_chunk)) { SCTPDBG(SCTP_DEBUG_INDATA1, @@ -5550,8 +5379,7 @@ sctp_handle_forward_tsn(struct sctp_tcb *stcb, /*************************************************************/ new_cum_tsn = ntohl(fwd->new_cumulative_tsn); - if (compare_with_wrap(asoc->cumulative_tsn, new_cum_tsn, MAX_TSN) || - asoc->cumulative_tsn == new_cum_tsn) { + if (SCTP_TSN_GE(asoc->cumulative_tsn, new_cum_tsn)) { /* Already got there ... */ return; } @@ -5589,8 +5417,7 @@ sctp_handle_forward_tsn(struct sctp_tcb *stcb, *ippp = new_cum_tsn; } stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_33; - sctp_abort_an_association(stcb->sctp_ep, stcb, - SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED); + sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); return; } SCTP_STAT_INCR(sctps_fwdtsn_map_over); @@ -5605,14 +5432,13 @@ sctp_handle_forward_tsn(struct sctp_tcb *stcb, if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) { sctp_log_map(0, 3, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT); } - asoc->last_echo_tsn = asoc->highest_tsn_inside_map; } else { SCTP_TCB_LOCK_ASSERT(stcb); for (i = 0; i <= gap; i++) { if (!SCTP_IS_TSN_PRESENT(asoc->mapping_array, i) && !SCTP_IS_TSN_PRESENT(asoc->nr_mapping_array, i)) { SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, i); - if (compare_with_wrap(asoc->mapping_array_base_tsn + i, asoc->highest_tsn_inside_nr_map, MAX_TSN)) { + if (SCTP_TSN_GT(asoc->mapping_array_base_tsn + i, asoc->highest_tsn_inside_nr_map)) { asoc->highest_tsn_inside_nr_map = asoc->mapping_array_base_tsn + i; } } @@ -5628,77 +5454,58 @@ sctp_handle_forward_tsn(struct sctp_tcb *stcb, if (asoc->fragmented_delivery_inprogress) { sctp_service_reassembly(stcb, asoc); } - if (!TAILQ_EMPTY(&asoc->reasmqueue)) { - /* For each one on here see if we need to toss it */ - /* - * For now large messages held on the reasmqueue that are - * complete will be tossed too. We could in theory do more - * work to spin through and stop after dumping one msg aka - * seeing the start of a new msg at the head, and call the - * delivery function... to see if it can be delivered... But - * for now we just dump everything on the queue. - */ - chk = TAILQ_FIRST(&asoc->reasmqueue); - while (chk) { - at = TAILQ_NEXT(chk, sctp_next); - if ((compare_with_wrap(new_cum_tsn, - chk->rec.data.TSN_seq, MAX_TSN)) || - (new_cum_tsn == chk->rec.data.TSN_seq)) { - /* It needs to be tossed */ - TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next); - if (compare_with_wrap(chk->rec.data.TSN_seq, - asoc->tsn_last_delivered, MAX_TSN)) { - asoc->tsn_last_delivered = - chk->rec.data.TSN_seq; - asoc->str_of_pdapi = - chk->rec.data.stream_number; - asoc->ssn_of_pdapi = - chk->rec.data.stream_seq; - asoc->fragment_flags = - chk->rec.data.rcv_flags; - } - asoc->size_on_reasm_queue -= chk->send_size; - sctp_ucount_decr(asoc->cnt_on_reasm_queue); - - /* Clear up any stream problem */ - if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) != - SCTP_DATA_UNORDERED && - (compare_with_wrap(chk->rec.data.stream_seq, - asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered, - MAX_SEQ))) { - /* - * We must dump forward this streams - * sequence number if the chunk is - * not unordered that is being - * skipped. There is a chance that - * if the peer does not include the - * last fragment in its FWD-TSN we - * WILL have a problem here since - * you would have a partial chunk in - * queue that may not be - * deliverable. Also if a Partial - * delivery API as started the user - * may get a partial chunk. The next - * read returning a new chunk... - * really ugly but I see no way - * around it! Maybe a notify?? - */ - asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered = - chk->rec.data.stream_seq; - } - if (chk->data) { - sctp_m_freem(chk->data); - chk->data = NULL; - } - sctp_free_a_chunk(stcb, chk); - } else { + /* For each one on here see if we need to toss it */ + /* + * For now large messages held on the reasmqueue that are complete + * will be tossed too. We could in theory do more work to spin + * through and stop after dumping one msg aka seeing the start of a + * new msg at the head, and call the delivery function... to see if + * it can be delivered... But for now we just dump everything on the + * queue. + */ + TAILQ_FOREACH_SAFE(chk, &asoc->reasmqueue, sctp_next, nchk) { + if (SCTP_TSN_GE(new_cum_tsn, chk->rec.data.TSN_seq)) { + /* It needs to be tossed */ + TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next); + if (SCTP_TSN_GT(chk->rec.data.TSN_seq, asoc->tsn_last_delivered)) { + asoc->tsn_last_delivered = chk->rec.data.TSN_seq; + asoc->str_of_pdapi = chk->rec.data.stream_number; + asoc->ssn_of_pdapi = chk->rec.data.stream_seq; + asoc->fragment_flags = chk->rec.data.rcv_flags; + } + asoc->size_on_reasm_queue -= chk->send_size; + sctp_ucount_decr(asoc->cnt_on_reasm_queue); + + /* Clear up any stream problem */ + if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) != SCTP_DATA_UNORDERED && + SCTP_SSN_GT(chk->rec.data.stream_seq, asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered)) { /* - * Ok we have gone beyond the end of the - * fwd-tsn's mark. + * We must dump forward this streams + * sequence number if the chunk is not + * unordered that is being skipped. There is + * a chance that if the peer does not + * include the last fragment in its FWD-TSN + * we WILL have a problem here since you + * would have a partial chunk in queue that + * may not be deliverable. Also if a Partial + * delivery API as started the user may get + * a partial chunk. The next read returning + * a new chunk... really ugly but I see no + * way around it! Maybe a notify?? */ - break; + asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered = chk->rec.data.stream_seq; + } + if (chk->data) { + sctp_m_freem(chk->data); + chk->data = NULL; } - chk = at; + sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); + } else { + /* + * Ok we have gone beyond the end of the fwd-tsn's + * mark. + */ + break; } } /*******************************************************/ @@ -5769,17 +5576,15 @@ sctp_handle_forward_tsn(struct sctp_tcb *stcb, stcb->asoc.control_pdapi = sv; break; } else if ((ctl->sinfo_stream == stseq->stream) && - (compare_with_wrap(ctl->sinfo_ssn, stseq->sequence, MAX_SEQ))) { + SCTP_SSN_GT(ctl->sinfo_ssn, stseq->sequence)) { /* We are past our victim SSN */ break; } } strm = &asoc->strmin[stseq->stream]; - if (compare_with_wrap(stseq->sequence, - strm->last_sequence_delivered, MAX_SEQ)) { + if (SCTP_SSN_GT(stseq->sequence, strm->last_sequence_delivered)) { /* Update the sequence number */ - strm->last_sequence_delivered = - stseq->sequence; + strm->last_sequence_delivered = stseq->sequence; } /* now kick the stream the new way */ /* sa_ignore NO_NULL_CHK */ diff --git a/freebsd/sys/netinet/sctp_indata.h b/freebsd/sys/netinet/sctp_indata.h index 79978a5c..5eaa1f4b 100644 --- a/freebsd/sys/netinet/sctp_indata.h +++ b/freebsd/sys/netinet/sctp_indata.h @@ -1,15 +1,17 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -28,13 +30,11 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp_indata.h,v 1.9 2005/03/06 16:04:17 itojun Exp $ */ - #include __FBSDID("$FreeBSD$"); -#ifndef __sctp_indata_h__ -#define __sctp_indata_h__ +#ifndef _NETINET_SCTP_INDATA_H_ +#define _NETINET_SCTP_INDATA_H_ #if defined(_KERNEL) || defined(__Userspace__) @@ -81,11 +81,6 @@ struct mbuf * sctp_build_ctl_nchunk(struct sctp_inpcb *inp, struct sctp_sndrcvinfo *sinfo); -char * -sctp_build_ctl_cchunk(struct sctp_inpcb *inp, - int *control_len, - struct sctp_sndrcvinfo *sinfo); - void sctp_set_rwnd(struct sctp_tcb *, struct sctp_association *); uint32_t @@ -93,14 +88,14 @@ sctp_calc_rwnd(struct sctp_tcb *stcb, struct sctp_association *asoc); void sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack, - uint32_t rwnd, int nonce_sum_flag, int *abort_now); + uint32_t rwnd, int *abort_now, int ecne_seen); void sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup, - struct sctp_tcb *stcb, struct sctp_nets *net_from, + struct sctp_tcb *stcb, uint16_t num_seg, uint16_t num_nr_seg, uint16_t num_dup, int *abort_now, uint8_t flags, - uint32_t cum_ack, uint32_t rwnd); + uint32_t cum_ack, uint32_t rwnd, int ecne_seen); /* draft-ietf-tsvwg-usctp */ void @@ -113,17 +108,20 @@ struct sctp_tmit_chunk * void sctp_service_queues(struct sctp_tcb *, struct sctp_association *); void -sctp_update_acked(struct sctp_tcb *, struct sctp_shutdown_chunk *, - struct sctp_nets *, int *); + sctp_update_acked(struct sctp_tcb *, struct sctp_shutdown_chunk *, int *); int -sctp_process_data(struct mbuf **, int, int *, int, struct sctphdr *, +sctp_process_data(struct mbuf **, int, int *, int, + struct sockaddr *src, struct sockaddr *dst, + struct sctphdr *, struct sctp_inpcb *, struct sctp_tcb *, - struct sctp_nets *, uint32_t *); + struct sctp_nets *, uint32_t *, + uint8_t, uint32_t, + uint32_t, uint16_t); void sctp_slide_mapping_arrays(struct sctp_tcb *stcb); -void sctp_sack_check(struct sctp_tcb *, int, int *); +void sctp_sack_check(struct sctp_tcb *, int); #endif #endif diff --git a/freebsd/sys/netinet/sctp_input.c b/freebsd/sys/netinet/sctp_input.c index 965bec86..817a95f7 100644 --- a/freebsd/sys/netinet/sctp_input.c +++ b/freebsd/sys/netinet/sctp_input.c @@ -2,16 +2,18 @@ /*- * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -30,8 +32,6 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp_input.c,v 1.27 2005/03/06 16:04:17 itojun Exp $ */ - #include __FBSDID("$FreeBSD$"); @@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include @@ -81,61 +82,38 @@ sctp_stop_all_cookie_timers(struct sctp_tcb *stcb) /* INIT handler */ static void -sctp_handle_init(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh, - struct sctp_init_chunk *cp, struct sctp_inpcb *inp, struct sctp_tcb *stcb, - struct sctp_nets *net, int *abort_no_unlock, uint32_t vrf_id, uint16_t port) +sctp_handle_init(struct mbuf *m, int iphlen, int offset, + struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, + struct sctp_init_chunk *cp, struct sctp_inpcb *inp, + struct sctp_tcb *stcb, int *abort_no_unlock, + uint8_t use_mflowid, uint32_t mflowid, + uint32_t vrf_id, uint16_t port) { struct sctp_init *init; struct mbuf *op_err; - uint32_t init_limit; SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_init: handling INIT tcb:%p\n", - stcb); + (void *)stcb); if (stcb == NULL) { SCTP_INP_RLOCK(inp); - if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { - goto outnow; - } - } - op_err = NULL; - init = &cp->init; - /* First are we accepting? */ - if ((inp->sctp_socket->so_qlimit == 0) && (stcb == NULL)) { - SCTPDBG(SCTP_DEBUG_INPUT2, - "sctp_handle_init: Abort, so_qlimit:%d\n", - inp->sctp_socket->so_qlimit); - /* - * FIX ME ?? What about TCP model and we have a - * match/restart case? Actually no fix is needed. the lookup - * will always find the existing assoc so stcb would not be - * NULL. It may be questionable to do this since we COULD - * just send back the INIT-ACK and hope that the app did - * accept()'s by the time the COOKIE was sent. But there is - * a price to pay for COOKIE generation and I don't want to - * pay it on the chance that the app will actually do some - * accepts(). The App just looses and should NOT be in this - * state :-) - */ - sctp_abort_association(inp, stcb, m, iphlen, sh, op_err, - vrf_id, port); - if (stcb) - *abort_no_unlock = 1; - goto outnow; } + /* validate length */ if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_init_chunk)) { - /* Invalid length */ op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM); - sctp_abort_association(inp, stcb, m, iphlen, sh, op_err, + sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err, + use_mflowid, mflowid, vrf_id, port); if (stcb) *abort_no_unlock = 1; goto outnow; } /* validate parameters */ + init = &cp->init; if (init->initiate_tag == 0) { /* protocol error... send abort */ op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM); - sctp_abort_association(inp, stcb, m, iphlen, sh, op_err, + sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err, + use_mflowid, mflowid, vrf_id, port); if (stcb) *abort_no_unlock = 1; @@ -144,7 +122,8 @@ sctp_handle_init(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh, if (ntohl(init->a_rwnd) < SCTP_MIN_RWND) { /* invalid parameter... send abort */ op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM); - sctp_abort_association(inp, stcb, m, iphlen, sh, op_err, + sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err, + use_mflowid, mflowid, vrf_id, port); if (stcb) *abort_no_unlock = 1; @@ -153,7 +132,8 @@ sctp_handle_init(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh, if (init->num_inbound_streams == 0) { /* protocol error... send abort */ op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM); - sctp_abort_association(inp, stcb, m, iphlen, sh, op_err, + sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err, + use_mflowid, mflowid, vrf_id, port); if (stcb) *abort_no_unlock = 1; @@ -162,25 +142,64 @@ sctp_handle_init(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh, if (init->num_outbound_streams == 0) { /* protocol error... send abort */ op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM); - sctp_abort_association(inp, stcb, m, iphlen, sh, op_err, + sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err, + use_mflowid, mflowid, vrf_id, port); if (stcb) *abort_no_unlock = 1; goto outnow; } - init_limit = offset + ntohs(cp->ch.chunk_length); if (sctp_validate_init_auth_params(m, offset + sizeof(*cp), - init_limit)) { + offset + ntohs(cp->ch.chunk_length))) { /* auth parameter(s) error... send abort */ - sctp_abort_association(inp, stcb, m, iphlen, sh, NULL, vrf_id, port); + sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, NULL, + use_mflowid, mflowid, + vrf_id, port); if (stcb) *abort_no_unlock = 1; goto outnow; } - /* send an INIT-ACK w/cookie */ - SCTPDBG(SCTP_DEBUG_INPUT3, "sctp_handle_init: sending INIT-ACK\n"); - sctp_send_initiate_ack(inp, stcb, m, iphlen, offset, sh, cp, vrf_id, port, - ((stcb == NULL) ? SCTP_HOLDS_LOCK : SCTP_NOT_LOCKED)); + /* + * We are only accepting if we have a socket with positive + * so_qlimit. + */ + if ((stcb == NULL) && + ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) || + (inp->sctp_socket == NULL) || + (inp->sctp_socket->so_qlimit == 0))) { + /* + * FIX ME ?? What about TCP model and we have a + * match/restart case? Actually no fix is needed. the lookup + * will always find the existing assoc so stcb would not be + * NULL. It may be questionable to do this since we COULD + * just send back the INIT-ACK and hope that the app did + * accept()'s by the time the COOKIE was sent. But there is + * a price to pay for COOKIE generation and I don't want to + * pay it on the chance that the app will actually do some + * accepts(). The App just looses and should NOT be in this + * state :-) + */ + if (SCTP_BASE_SYSCTL(sctp_blackhole) == 0) { + sctp_send_abort(m, iphlen, src, dst, sh, 0, NULL, + use_mflowid, mflowid, + vrf_id, port); + } + goto outnow; + } + if ((stcb != NULL) && + (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT)) { + SCTPDBG(SCTP_DEBUG_INPUT3, "sctp_handle_init: sending SHUTDOWN-ACK\n"); + sctp_send_shutdown_ack(stcb, NULL); + sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED); + } else { + SCTPDBG(SCTP_DEBUG_INPUT3, "sctp_handle_init: sending INIT-ACK\n"); + sctp_send_initiate_ack(inp, stcb, m, iphlen, offset, src, dst, + sh, cp, + use_mflowid, mflowid, + vrf_id, port, + ((stcb == NULL) ? SCTP_HOLDS_LOCK : SCTP_NOT_LOCKED)); + } outnow: if (stcb == NULL) { SCTP_INP_RUNLOCK(inp); @@ -192,11 +211,15 @@ outnow: */ int -sctp_is_there_unsent_data(struct sctp_tcb *stcb) +sctp_is_there_unsent_data(struct sctp_tcb *stcb, int so_locked +#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) + SCTP_UNUSED +#endif +) { int unsent_data = 0; + unsigned int i; struct sctp_stream_queue_pending *sp; - struct sctp_stream_out *strq; struct sctp_association *asoc; /* @@ -207,12 +230,11 @@ sctp_is_there_unsent_data(struct sctp_tcb *stcb) */ asoc = &stcb->asoc; SCTP_TCB_SEND_LOCK(stcb); - if (!TAILQ_EMPTY(&asoc->out_wheel)) { + if (!stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) { /* Check to see if some data queued */ - TAILQ_FOREACH(strq, &asoc->out_wheel, next_spoke) { - is_there_another: + for (i = 0; i < stcb->asoc.streamoutcnt; i++) { /* sa_ignore FREED_MEMORY */ - sp = TAILQ_FIRST(&strq->outqueue); + sp = TAILQ_FIRST(&stcb->asoc.strmout[i].outqueue); if (sp == NULL) { continue; } @@ -233,7 +255,7 @@ sctp_is_there_unsent_data(struct sctp_tcb *stcb) sp->put_last_out); } atomic_subtract_int(&stcb->asoc.stream_queue_cnt, 1); - TAILQ_REMOVE(&strq->outqueue, sp, next); + TAILQ_REMOVE(&stcb->asoc.strmout[i].outqueue, sp, next); if (sp->net) { sctp_free_remote_addr(sp->net); sp->net = NULL; @@ -242,11 +264,10 @@ sctp_is_there_unsent_data(struct sctp_tcb *stcb) sctp_m_freem(sp->data); sp->data = NULL; } - sctp_free_a_strmoq(stcb, sp); - goto is_there_another; + sctp_free_a_strmoq(stcb, sp, so_locked); } else { unsent_data++; - continue; + break; } } } @@ -255,8 +276,7 @@ sctp_is_there_unsent_data(struct sctp_tcb *stcb) } static int -sctp_process_init(struct sctp_init_chunk *cp, struct sctp_tcb *stcb, - struct sctp_nets *net) +sctp_process_init(struct sctp_init_chunk *cp, struct sctp_tcb *stcb) { struct sctp_init *init; struct sctp_association *asoc; @@ -268,11 +288,13 @@ sctp_process_init(struct sctp_init_chunk *cp, struct sctp_tcb *stcb, /* save off parameters */ asoc->peer_vtag = ntohl(init->initiate_tag); asoc->peers_rwnd = ntohl(init->a_rwnd); + /* init tsn's */ + asoc->highest_tsn_inside_map = asoc->asconf_seq_in = ntohl(init->initial_tsn) - 1; + if (!TAILQ_EMPTY(&asoc->nets)) { /* update any ssthresh's that may have a default */ TAILQ_FOREACH(lnet, &asoc->nets, sctp_next) { lnet->ssthresh = asoc->peers_rwnd; - if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) { sctp_log_cwnd(stcb, lnet, 0, SCTP_CWND_INITIALIZATION); } @@ -282,43 +304,43 @@ sctp_process_init(struct sctp_init_chunk *cp, struct sctp_tcb *stcb, if (asoc->pre_open_streams > ntohs(init->num_inbound_streams)) { unsigned int newcnt; struct sctp_stream_out *outs; - struct sctp_stream_queue_pending *sp; - struct sctp_tmit_chunk *chk, *chk_next; + struct sctp_stream_queue_pending *sp, *nsp; + struct sctp_tmit_chunk *chk, *nchk; /* abandon the upper streams */ newcnt = ntohs(init->num_inbound_streams); - if (!TAILQ_EMPTY(&asoc->send_queue)) { - chk = TAILQ_FIRST(&asoc->send_queue); - while (chk) { - chk_next = TAILQ_NEXT(chk, sctp_next); - if (chk->rec.data.stream_number >= newcnt) { - TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next); - asoc->send_queue_cnt--; - if (chk->data != NULL) { - sctp_free_bufspace(stcb, asoc, chk, 1); - sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb, - SCTP_NOTIFY_DATAGRAM_UNSENT, chk, SCTP_SO_NOT_LOCKED); - if (chk->data) { - sctp_m_freem(chk->data); - chk->data = NULL; - } + TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) { + if (chk->rec.data.stream_number >= newcnt) { + TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next); + asoc->send_queue_cnt--; + if (asoc->strmout[chk->rec.data.stream_number].chunks_on_queues > 0) { + asoc->strmout[chk->rec.data.stream_number].chunks_on_queues--; +#ifdef INVARIANTS + } else { + panic("No chunks on the queues for sid %u.", chk->rec.data.stream_number); +#endif + } + if (chk->data != NULL) { + sctp_free_bufspace(stcb, asoc, chk, 1); + sctp_ulp_notify(SCTP_NOTIFY_UNSENT_DG_FAIL, stcb, + 0, chk, SCTP_SO_NOT_LOCKED); + if (chk->data) { + sctp_m_freem(chk->data); + chk->data = NULL; } - sctp_free_a_chunk(stcb, chk); - /* sa_ignore FREED_MEMORY */ } - chk = chk_next; + sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); + /* sa_ignore FREED_MEMORY */ } } if (asoc->strmout) { for (i = newcnt; i < asoc->pre_open_streams; i++) { outs = &asoc->strmout[i]; - sp = TAILQ_FIRST(&outs->outqueue); - while (sp) { + TAILQ_FOREACH_SAFE(sp, &outs->outqueue, next, nsp) { TAILQ_REMOVE(&outs->outqueue, sp, next); asoc->stream_queue_cnt--; sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL, - stcb, SCTP_NOTIFY_DATAGRAM_UNSENT, - sp, SCTP_SO_NOT_LOCKED); + stcb, 0, sp, SCTP_SO_NOT_LOCKED); if (sp->data) { sctp_m_freem(sp->data); sp->data = NULL; @@ -328,9 +350,8 @@ sctp_process_init(struct sctp_init_chunk *cp, struct sctp_tcb *stcb, sp->net = NULL; } /* Free the chunk */ - sctp_free_a_strmoq(stcb, sp); + sctp_free_a_strmoq(stcb, sp, SCTP_SO_NOT_LOCKED); /* sa_ignore FREED_MEMORY */ - sp = TAILQ_FIRST(&outs->outqueue); } } } @@ -339,8 +360,7 @@ sctp_process_init(struct sctp_init_chunk *cp, struct sctp_tcb *stcb, } SCTP_TCB_SEND_UNLOCK(stcb); asoc->strm_realoutsize = asoc->streamoutcnt = asoc->pre_open_streams; - /* init tsn's */ - asoc->highest_tsn_inside_map = asoc->asconf_seq_in = ntohl(init->initial_tsn) - 1; + /* EY - nr_sack: initialize highest tsn in nr_mapping_array */ asoc->highest_tsn_inside_nr_map = asoc->highest_tsn_inside_map; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) { @@ -351,24 +371,22 @@ sctp_process_init(struct sctp_init_chunk *cp, struct sctp_tcb *stcb, asoc->mapping_array_base_tsn = ntohl(init->initial_tsn); asoc->tsn_last_delivered = asoc->cumulative_tsn = asoc->asconf_seq_in; - asoc->last_echo_tsn = asoc->asconf_seq_in; + asoc->advanced_peer_ack_point = asoc->last_acked_seq; /* open the requested streams */ if (asoc->strmin != NULL) { /* Free the old ones */ - struct sctp_queued_to_read *ctl; + struct sctp_queued_to_read *ctl, *nctl; for (i = 0; i < asoc->streamincnt; i++) { - ctl = TAILQ_FIRST(&asoc->strmin[i].inqueue); - while (ctl) { + TAILQ_FOREACH_SAFE(ctl, &asoc->strmin[i].inqueue, next, nctl) { TAILQ_REMOVE(&asoc->strmin[i].inqueue, ctl, next); sctp_free_remote_addr(ctl->whoFrom); ctl->whoFrom = NULL; sctp_m_freem(ctl->data); ctl->data = NULL; sctp_free_a_readq(stcb, ctl); - ctl = TAILQ_FIRST(&asoc->strmin[i].inqueue); } } SCTP_FREE(asoc->strmin, SCTP_M_STRMI); @@ -413,8 +431,11 @@ sctp_process_init(struct sctp_init_chunk *cp, struct sctp_tcb *stcb, */ static int sctp_process_init_ack(struct mbuf *m, int iphlen, int offset, - struct sctphdr *sh, struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb, - struct sctp_nets *net, int *abort_no_unlock, uint32_t vrf_id) + struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, + struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb, + struct sctp_nets *net, int *abort_no_unlock, + uint8_t use_mflowid, uint32_t mflowid, + uint32_t vrf_id) { struct sctp_association *asoc; struct mbuf *op_err; @@ -431,40 +452,40 @@ sctp_process_init_ack(struct mbuf *m, int iphlen, int offset, &abort_flag, (struct sctp_chunkhdr *)cp, &nat_friendly); if (abort_flag) { /* Send an abort and notify peer */ - sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_CAUSE_PROTOCOL_VIOLATION, op_err, SCTP_SO_NOT_LOCKED); + sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED); *abort_no_unlock = 1; return (-1); } asoc = &stcb->asoc; asoc->peer_supports_nat = (uint8_t) nat_friendly; /* process the peer's parameters in the INIT-ACK */ - retval = sctp_process_init((struct sctp_init_chunk *)cp, stcb, net); + retval = sctp_process_init((struct sctp_init_chunk *)cp, stcb); if (retval < 0) { return (retval); } initack_limit = offset + ntohs(cp->ch.chunk_length); /* load all addresses */ - if ((retval = sctp_load_addresses_from_init(stcb, m, iphlen, - (offset + sizeof(struct sctp_init_chunk)), initack_limit, sh, - NULL))) { + if ((retval = sctp_load_addresses_from_init(stcb, m, + (offset + sizeof(struct sctp_init_chunk)), initack_limit, + src, dst, NULL))) { /* Huh, we should abort */ SCTPDBG(SCTP_DEBUG_INPUT1, "Load addresses from INIT causes an abort %d\n", retval); - sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh, - NULL, 0, net->port); + sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, + src, dst, sh, NULL, + use_mflowid, mflowid, + vrf_id, net->port); *abort_no_unlock = 1; return (-1); } /* if the peer doesn't support asconf, flush the asconf queue */ if (asoc->peer_supports_asconf == 0) { - struct sctp_asconf_addr *aparam; + struct sctp_asconf_addr *param, *nparam; - while (!TAILQ_EMPTY(&asoc->asconf_queue)) { - /* sa_ignore FREED_MEMORY */ - aparam = TAILQ_FIRST(&asoc->asconf_queue); - TAILQ_REMOVE(&asoc->asconf_queue, aparam, next); - SCTP_FREE(aparam, SCTP_M_ASC_ADDR); + TAILQ_FOREACH_SAFE(param, &asoc->asconf_queue, next, nparam) { + TAILQ_REMOVE(&asoc->asconf_queue, param, next); + SCTP_FREE(param, SCTP_M_ASC_ADDR); } } stcb->asoc.peer_hmac_id = sctp_negotiate_hmacid(stcb->asoc.peer_hmacs, @@ -495,7 +516,8 @@ sctp_process_init_ack(struct mbuf *m, int iphlen, int offset, asoc->primary_destination, SCTP_FROM_SCTP_INPUT + SCTP_LOC_4); /* calculate the RTO */ - net->RTO = sctp_calculate_rto(stcb, asoc, net, &asoc->time_entered, sctp_align_safe_nocopy); + net->RTO = sctp_calculate_rto(stcb, asoc, net, &asoc->time_entered, sctp_align_safe_nocopy, + SCTP_RTT_FROM_NON_DATA); retval = sctp_send_cookie_echo(m, offset, stcb, net); if (retval < 0) { @@ -527,7 +549,9 @@ sctp_process_init_ack(struct mbuf *m, int iphlen, int offset, mp->resv = 0; } sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, - sh, op_err, 0, net->port); + src, dst, sh, op_err, + use_mflowid, mflowid, + vrf_id, net->port); *abort_no_unlock = 1; } return (retval); @@ -540,38 +564,58 @@ sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp, struct sctp_tcb *stcb, struct sctp_nets *net) { struct sockaddr_storage store; - struct sockaddr_in *sin; - struct sockaddr_in6 *sin6; struct sctp_nets *r_net, *f_net; struct timeval tv; int req_prim = 0; + uint16_t old_error_counter; + +#ifdef INET + struct sockaddr_in *sin; + +#endif +#ifdef INET6 + struct sockaddr_in6 *sin6; + +#endif if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_heartbeat_chunk)) { /* Invalid length */ return; } - sin = (struct sockaddr_in *)&store; - sin6 = (struct sockaddr_in6 *)&store; - memset(&store, 0, sizeof(store)); - if (cp->heartbeat.hb_info.addr_family == AF_INET && - cp->heartbeat.hb_info.addr_len == sizeof(struct sockaddr_in)) { - sin->sin_family = cp->heartbeat.hb_info.addr_family; - sin->sin_len = cp->heartbeat.hb_info.addr_len; - sin->sin_port = stcb->rport; - memcpy(&sin->sin_addr, cp->heartbeat.hb_info.address, - sizeof(sin->sin_addr)); - } else if (cp->heartbeat.hb_info.addr_family == AF_INET6 && - cp->heartbeat.hb_info.addr_len == sizeof(struct sockaddr_in6)) { - sin6->sin6_family = cp->heartbeat.hb_info.addr_family; - sin6->sin6_len = cp->heartbeat.hb_info.addr_len; - sin6->sin6_port = stcb->rport; - memcpy(&sin6->sin6_addr, cp->heartbeat.hb_info.address, - sizeof(sin6->sin6_addr)); - } else { + switch (cp->heartbeat.hb_info.addr_family) { +#ifdef INET + case AF_INET: + if (cp->heartbeat.hb_info.addr_len == sizeof(struct sockaddr_in)) { + sin = (struct sockaddr_in *)&store; + sin->sin_family = cp->heartbeat.hb_info.addr_family; + sin->sin_len = cp->heartbeat.hb_info.addr_len; + sin->sin_port = stcb->rport; + memcpy(&sin->sin_addr, cp->heartbeat.hb_info.address, + sizeof(sin->sin_addr)); + } else { + return; + } + break; +#endif +#ifdef INET6 + case AF_INET6: + if (cp->heartbeat.hb_info.addr_len == sizeof(struct sockaddr_in6)) { + sin6 = (struct sockaddr_in6 *)&store; + sin6->sin6_family = cp->heartbeat.hb_info.addr_family; + sin6->sin6_len = cp->heartbeat.hb_info.addr_len; + sin6->sin6_port = stcb->rport; + memcpy(&sin6->sin6_addr, cp->heartbeat.hb_info.address, + sizeof(sin6->sin6_addr)); + } else { + return; + } + break; +#endif + default: return; } - r_net = sctp_findnet(stcb, (struct sockaddr *)sin); + r_net = sctp_findnet(stcb, (struct sockaddr *)&store); if (r_net == NULL) { SCTPDBG(SCTP_DEBUG_INPUT1, "Huh? I can't find the address I sent it to, discard\n"); return; @@ -586,7 +630,6 @@ sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp, r_net->dest_state &= ~SCTP_ADDR_UNCONFIRMED; if (r_net->dest_state & SCTP_ADDR_REQ_PRIMARY) { stcb->asoc.primary_destination = r_net; - r_net->dest_state &= ~SCTP_ADDR_WAS_PRIMARY; r_net->dest_state &= ~SCTP_ADDR_REQ_PRIMARY; f_net = TAILQ_FIRST(&stcb->asoc.nets); if (f_net != r_net) { @@ -603,43 +646,37 @@ sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp, } sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED, stcb, 0, (void *)r_net, SCTP_SO_NOT_LOCKED); + sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, r_net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3); + sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, r_net); } + old_error_counter = r_net->error_count; r_net->error_count = 0; r_net->hb_responded = 1; tv.tv_sec = cp->heartbeat.hb_info.time_value_1; tv.tv_usec = cp->heartbeat.hb_info.time_value_2; - if (r_net->dest_state & SCTP_ADDR_NOT_REACHABLE) { - r_net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; + /* Now lets do a RTO with this */ + r_net->RTO = sctp_calculate_rto(stcb, &stcb->asoc, r_net, &tv, sctp_align_safe_nocopy, + SCTP_RTT_FROM_NON_DATA); + if (!(r_net->dest_state & SCTP_ADDR_REACHABLE)) { r_net->dest_state |= SCTP_ADDR_REACHABLE; sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, - SCTP_HEARTBEAT_SUCCESS, (void *)r_net, SCTP_SO_NOT_LOCKED); - /* now was it the primary? if so restore */ - if (r_net->dest_state & SCTP_ADDR_WAS_PRIMARY) { - (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, r_net); - } + 0, (void *)r_net, SCTP_SO_NOT_LOCKED); } - /* - * JRS 5/14/07 - If CMT PF is on and the destination is in PF state, - * set the destination to active state and set the cwnd to one or - * two MTU's based on whether PF1 or PF2 is being used. If a T3 - * timer is running, for the destination, stop the timer because a - * PF-heartbeat was received. - */ - if ((stcb->asoc.sctp_cmt_on_off == 1) && - (stcb->asoc.sctp_cmt_pf > 0) && - ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) { - if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) { - sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, - stcb, net, - SCTP_FROM_SCTP_INPUT + SCTP_LOC_5); - } - net->dest_state &= ~SCTP_ADDR_PF; - net->cwnd = net->mtu * stcb->asoc.sctp_cmt_pf; - SCTPDBG(SCTP_DEBUG_INPUT1, "Destination %p moved from PF to reachable with cwnd %d.\n", - net, net->cwnd); + if (r_net->dest_state & SCTP_ADDR_PF) { + r_net->dest_state &= ~SCTP_ADDR_PF; + stcb->asoc.cc_functions.sctp_cwnd_update_exit_pf(stcb, net); + } + if (old_error_counter > 0) { + sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, r_net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3); + sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, r_net); + } + if (r_net == stcb->asoc.primary_destination) { + if (stcb->asoc.alternate) { + /* release the alternate, primary is good */ + sctp_free_remote_addr(stcb->asoc.alternate); + stcb->asoc.alternate = NULL; + } } - /* Now lets do a RTO with this */ - r_net->RTO = sctp_calculate_rto(stcb, &stcb->asoc, r_net, &tv, sctp_align_safe_nocopy); /* Mobility adaptation */ if (req_prim) { if ((sctp_is_mobility_feature_on(stcb->sctp_ep, @@ -730,61 +767,51 @@ sctp_handle_nat_missing_state(struct sctp_tcb *stcb, static void -sctp_handle_abort(struct sctp_abort_chunk *cp, +sctp_handle_abort(struct sctp_abort_chunk *abort, struct sctp_tcb *stcb, struct sctp_nets *net) { -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif uint16_t len; + uint16_t error; SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_abort: handling ABORT\n"); if (stcb == NULL) return; - len = ntohs(cp->ch.chunk_length); + len = ntohs(abort->ch.chunk_length); if (len > sizeof(struct sctp_chunkhdr)) { /* * Need to check the cause codes for our two magic nat * aborts which don't kill the assoc necessarily. */ - struct sctp_abort_chunk *cpnext; struct sctp_missing_nat_state *natc; - uint16_t cause; - cpnext = cp; - cpnext++; - natc = (struct sctp_missing_nat_state *)cpnext; - cause = ntohs(natc->cause); - if (cause == SCTP_CAUSE_NAT_COLLIDING_STATE) { + natc = (struct sctp_missing_nat_state *)(abort + 1); + error = ntohs(natc->cause); + if (error == SCTP_CAUSE_NAT_COLLIDING_STATE) { SCTPDBG(SCTP_DEBUG_INPUT2, "Received Colliding state abort flags:%x\n", - cp->ch.chunk_flags); + abort->ch.chunk_flags); if (sctp_handle_nat_colliding_state(stcb)) { return; } - } else if (cause == SCTP_CAUSE_NAT_MISSING_STATE) { + } else if (error == SCTP_CAUSE_NAT_MISSING_STATE) { SCTPDBG(SCTP_DEBUG_INPUT2, "Received missing state abort flags:%x\n", - cp->ch.chunk_flags); + abort->ch.chunk_flags); if (sctp_handle_nat_missing_state(stcb, net)) { return; } } + } else { + error = 0; } /* stop any receive timers */ sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_6); /* notify user of the abort and clean up... */ - sctp_abort_notification(stcb, 0, SCTP_SO_NOT_LOCKED); + sctp_abort_notification(stcb, 1, error, abort, SCTP_SO_NOT_LOCKED); /* free the tcb */ -#if defined(SCTP_PANIC_ON_ABORT) - printf("stcb:%p state:%d rport:%d net:%p\n", - stcb, stcb->asoc.state, stcb->rport, net); - if (!(stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)) { - panic("Received an ABORT"); - } else { - printf("No panic its in state %x closed\n", stcb->asoc.state); - } -#endif SCTP_STAT_INCR_COUNTER32(sctps_aborted); if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) || (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) { @@ -793,7 +820,7 @@ sctp_handle_abort(struct sctp_abort_chunk *cp, #ifdef SCTP_ASOCLOG_OF_TSNS sctp_print_out_track_log(stcb); #endif -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(stcb->sctp_ep); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); @@ -804,12 +831,41 @@ sctp_handle_abort(struct sctp_abort_chunk *cp, stcb->asoc.state |= SCTP_STATE_WAS_ABORTED; (void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_6); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_abort: finished\n"); } +static void +sctp_start_net_timers(struct sctp_tcb *stcb) +{ + uint32_t cnt_hb_sent; + struct sctp_nets *net; + + cnt_hb_sent = 0; + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + /* + * For each network start: 1) A pmtu timer. 2) A HB timer 3) + * If the dest in unconfirmed send a hb as well if under + * max_hb_burst have been sent. + */ + sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, stcb->sctp_ep, stcb, net); + sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net); + if ((net->dest_state & SCTP_ADDR_UNCONFIRMED) && + (cnt_hb_sent < SCTP_BASE_SYSCTL(sctp_hb_maxburst))) { + sctp_send_hb(stcb, net, SCTP_SO_NOT_LOCKED); + cnt_hb_sent++; + } + } + if (cnt_hb_sent) { + sctp_chunk_output(stcb->sctp_ep, stcb, + SCTP_OUTPUT_FROM_COOKIE_ACK, + SCTP_SO_NOT_LOCKED); + } +} + + static void sctp_handle_shutdown(struct sctp_shutdown_chunk *cp, struct sctp_tcb *stcb, struct sctp_nets *net, int *abort_flag) @@ -817,7 +873,7 @@ sctp_handle_shutdown(struct sctp_shutdown_chunk *cp, struct sctp_association *asoc; int some_on_streamwheel; -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif @@ -835,7 +891,7 @@ sctp_handle_shutdown(struct sctp_shutdown_chunk *cp, /* Shutdown NOT the expected size */ return; } else { - sctp_update_acked(stcb, cp, net, abort_flag); + sctp_update_acked(stcb, cp, abort_flag); if (*abort_flag) { return; } @@ -849,7 +905,7 @@ sctp_handle_shutdown(struct sctp_shutdown_chunk *cp, asoc->control_pdapi->pdapi_aborted = 1; asoc->control_pdapi = NULL; SCTP_INP_READ_UNLOCK(stcb->sctp_ep); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(stcb->sctp_ep); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); @@ -863,7 +919,7 @@ sctp_handle_shutdown(struct sctp_shutdown_chunk *cp, } #endif sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } @@ -892,7 +948,7 @@ sctp_handle_shutdown(struct sctp_shutdown_chunk *cp, sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_8); } /* Now is there unsent data on a stream somewhere? */ - some_on_streamwheel = sctp_is_there_unsent_data(stcb); + some_on_streamwheel = sctp_is_there_unsent_data(stcb, SCTP_SO_NOT_LOCKED); if (!TAILQ_EMPTY(&asoc->send_queue) || !TAILQ_EMPTY(&asoc->sent_queue) || @@ -902,7 +958,6 @@ sctp_handle_shutdown(struct sctp_shutdown_chunk *cp, } else { /* no outstanding data to send, so move on... */ /* send SHUTDOWN-ACK */ - sctp_send_shutdown_ack(stcb, stcb->asoc.primary_destination); /* move to SHUTDOWN-ACK-SENT state */ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) || (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) { @@ -911,19 +966,20 @@ sctp_handle_shutdown(struct sctp_shutdown_chunk *cp, SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT); SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING); sctp_stop_timers_for_shutdown(stcb); + sctp_send_shutdown_ack(stcb, net); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK, stcb->sctp_ep, stcb, net); } } static void -sctp_handle_shutdown_ack(struct sctp_shutdown_ack_chunk *cp, +sctp_handle_shutdown_ack(struct sctp_shutdown_ack_chunk *cp SCTP_UNUSED, struct sctp_tcb *stcb, struct sctp_nets *net) { struct sctp_association *asoc; -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; so = SCTP_INP_SO(stcb->sctp_ep); @@ -957,7 +1013,7 @@ sctp_handle_shutdown_ack(struct sctp_shutdown_ack_chunk *cp, asoc->control_pdapi->pdapi_aborted = 1; asoc->control_pdapi = NULL; SCTP_INP_READ_UNLOCK(stcb->sctp_ep); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); @@ -970,15 +1026,15 @@ sctp_handle_shutdown_ack(struct sctp_shutdown_ack_chunk *cp, } #endif sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } /* are the queues empty? */ if (!TAILQ_EMPTY(&asoc->send_queue) || !TAILQ_EMPTY(&asoc->sent_queue) || - !TAILQ_EMPTY(&asoc->out_wheel)) { - sctp_report_all_outbound(stcb, 0, SCTP_SO_NOT_LOCKED); + !stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) { + sctp_report_all_outbound(stcb, 0, 0, SCTP_SO_NOT_LOCKED); } /* stop the timer */ sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_9); @@ -986,16 +1042,15 @@ sctp_handle_shutdown_ack(struct sctp_shutdown_ack_chunk *cp, sctp_send_shutdown_complete(stcb, net, 0); /* notify upper layer protocol */ if (stcb->sctp_socket) { - sctp_ulp_notify(SCTP_NOTIFY_ASSOC_DOWN, stcb, 0, NULL, SCTP_SO_NOT_LOCKED); if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { - /* Set the connected flag to disconnected */ - stcb->sctp_ep->sctp_socket->so_snd.sb_cc = 0; + stcb->sctp_socket->so_snd.sb_cc = 0; } + sctp_ulp_notify(SCTP_NOTIFY_ASSOC_DOWN, stcb, 0, NULL, SCTP_SO_NOT_LOCKED); } SCTP_STAT_INCR_COUNTER32(sctps_shutdown); /* free the TCB but first save off the ep */ -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); @@ -1004,7 +1059,7 @@ sctp_handle_shutdown_ack(struct sctp_shutdown_ack_chunk *cp, #endif (void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_10); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } @@ -1059,11 +1114,6 @@ sctp_process_unrecog_param(struct sctp_tcb *stcb, struct sctp_paramhdr *phdr) case SCTP_HAS_NAT_SUPPORT: stcb->asoc.peer_supports_nat = 0; break; - case SCTP_ECN_NONCE_SUPPORTED: - stcb->asoc.peer_supports_ecn_nonce = 0; - stcb->asoc.ecn_nonce_allowed = 0; - stcb->asoc.ecn_allowed = 0; - break; case SCTP_ADD_IP_ADDRESS: case SCTP_DEL_IP_ADDRESS: case SCTP_SET_PRIM_ADDR: @@ -1090,12 +1140,12 @@ sctp_handle_error(struct sctp_chunkhdr *ch, { int chklen; struct sctp_paramhdr *phdr; - uint16_t error_type; + uint16_t error, error_type; uint16_t error_len; struct sctp_association *asoc; int adjust; -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif @@ -1105,6 +1155,7 @@ sctp_handle_error(struct sctp_chunkhdr *ch, phdr = (struct sctp_paramhdr *)((caddr_t)ch + sizeof(struct sctp_chunkhdr)); chklen = ntohs(ch->chunk_length) - sizeof(struct sctp_chunkhdr); + error = 0; while ((size_t)chklen >= sizeof(struct sctp_paramhdr)) { /* Process an Error Cause */ error_type = ntohs(phdr->param_type); @@ -1115,6 +1166,10 @@ sctp_handle_error(struct sctp_chunkhdr *ch, chklen, error_len); return (0); } + if (error == 0) { + /* report the first error cause */ + error = error_type; + } switch (error_type) { case SCTP_CAUSE_INVALID_STREAM: case SCTP_CAUSE_MISSING_PARAM: @@ -1151,9 +1206,9 @@ sctp_handle_error(struct sctp_chunkhdr *ch, asoc->stale_cookie_count++; if (asoc->stale_cookie_count > asoc->max_init_times) { - sctp_abort_notification(stcb, 0, SCTP_SO_NOT_LOCKED); + sctp_abort_notification(stcb, 0, 0, NULL, SCTP_SO_NOT_LOCKED); /* now free the asoc */ -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(stcb->sctp_ep); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); @@ -1163,7 +1218,7 @@ sctp_handle_error(struct sctp_chunkhdr *ch, #endif (void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_11); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif return (-1); @@ -1231,13 +1286,17 @@ sctp_handle_error(struct sctp_chunkhdr *ch, chklen -= adjust; phdr = (struct sctp_paramhdr *)((caddr_t)phdr + adjust); } + sctp_ulp_notify(SCTP_NOTIFY_REMOTE_ERROR, stcb, error, ch, SCTP_SO_NOT_LOCKED); return (0); } static int sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset, - struct sctphdr *sh, struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb, - struct sctp_nets *net, int *abort_no_unlock, uint32_t vrf_id) + struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, + struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb, + struct sctp_nets *net, int *abort_no_unlock, + uint8_t use_mflowid, uint32_t mflowid, + uint32_t vrf_id) { struct sctp_init_ack *init_ack; struct mbuf *op_err; @@ -1253,8 +1312,10 @@ sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset, if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_init_ack_chunk)) { /* Invalid length */ op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM); - sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh, - op_err, 0, net->port); + sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, + src, dst, sh, op_err, + use_mflowid, mflowid, + vrf_id, net->port); *abort_no_unlock = 1; return (-1); } @@ -1263,32 +1324,40 @@ sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset, if (init_ack->initiate_tag == 0) { /* protocol error... send an abort */ op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM); - sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh, - op_err, 0, net->port); + sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, + src, dst, sh, op_err, + use_mflowid, mflowid, + vrf_id, net->port); *abort_no_unlock = 1; return (-1); } if (ntohl(init_ack->a_rwnd) < SCTP_MIN_RWND) { /* protocol error... send an abort */ op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM); - sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh, - op_err, 0, net->port); + sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, + src, dst, sh, op_err, + use_mflowid, mflowid, + vrf_id, net->port); *abort_no_unlock = 1; return (-1); } if (init_ack->num_inbound_streams == 0) { /* protocol error... send an abort */ op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM); - sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh, - op_err, 0, net->port); + sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, + src, dst, sh, op_err, + use_mflowid, mflowid, + vrf_id, net->port); *abort_no_unlock = 1; return (-1); } if (init_ack->num_outbound_streams == 0) { /* protocol error... send an abort */ op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM); - sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh, - op_err, 0, net->port); + sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, + src, dst, sh, op_err, + use_mflowid, mflowid, + vrf_id, net->port); *abort_no_unlock = 1; return (-1); } @@ -1310,8 +1379,10 @@ sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset, sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED, stcb, 0, (void *)stcb->asoc.primary_destination, SCTP_SO_NOT_LOCKED); } - if (sctp_process_init_ack(m, iphlen, offset, sh, cp, stcb, - net, abort_no_unlock, vrf_id) < 0) { + if (sctp_process_init_ack(m, iphlen, offset, src, dst, sh, cp, stcb, + net, abort_no_unlock, + use_mflowid, mflowid, + vrf_id) < 0) { /* error in parsing parameters */ return (-1); } @@ -1362,10 +1433,12 @@ sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset, static struct sctp_tcb * sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, + struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len, struct sctp_inpcb *inp, struct sctp_nets **netp, struct sockaddr *init_src, int *notification, int auth_skipped, uint32_t auth_offset, uint32_t auth_len, + uint8_t use_mflowid, uint32_t mflowid, uint32_t vrf_id, uint16_t port); @@ -1377,10 +1450,13 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, */ static struct sctp_tcb * sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, + struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len, struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets **netp, - struct sockaddr *init_src, int *notification, sctp_assoc_t * sac_assoc_id, - uint32_t vrf_id, int auth_skipped, uint32_t auth_offset, uint32_t auth_len, uint16_t port) + struct sockaddr *init_src, int *notification, + int auth_skipped, uint32_t auth_offset, uint32_t auth_len, + uint8_t use_mflowid, uint32_t mflowid, + uint32_t vrf_id, uint16_t port) { struct sctp_association *asoc; struct sctp_init_chunk *init_cp, init_buf; @@ -1388,7 +1464,6 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, struct sctp_nets *net; struct mbuf *op_err; struct sctp_paramhdr *ph; - int chk_length; int init_offset, initack_offset, i; int retval; int spec_flag = 0; @@ -1418,7 +1493,8 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, ph = mtod(op_err, struct sctp_paramhdr *); ph->param_type = htons(SCTP_CAUSE_COOKIE_IN_SHUTDOWN); ph->param_length = htons(sizeof(struct sctp_paramhdr)); - sctp_send_operr_to(m, iphlen, op_err, cookie->peers_vtag, + sctp_send_operr_to(src, dst, sh, cookie->peers_vtag, op_err, + use_mflowid, mflowid, vrf_id, net->port); if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 2; @@ -1438,7 +1514,6 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, /* could not pull a INIT chunk in cookie */ return (NULL); } - chk_length = ntohs(init_cp->ch.chunk_length); if (init_cp->ch.chunk_type != SCTP_INITIATION) { return (NULL); } @@ -1446,7 +1521,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, * find and validate the INIT-ACK chunk in the cookie (my info) the * INIT-ACK follows the INIT chunk */ - initack_offset = init_offset + SCTP_SIZE32(chk_length); + initack_offset = init_offset + SCTP_SIZE32(ntohs(init_cp->ch.chunk_length)); initack_cp = (struct sctp_init_ack_chunk *) sctp_m_getptr(m, initack_offset, sizeof(struct sctp_init_ack_chunk), (uint8_t *) & initack_buf); @@ -1454,7 +1529,6 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, /* could not pull INIT-ACK chunk in cookie */ return (NULL); } - chk_length = ntohs(initack_cp->ch.chunk_length); if (initack_cp->ch.chunk_type != SCTP_INITIATION_ACK) { return (NULL); } @@ -1495,7 +1569,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, * the right seq no's. */ /* First we must process the INIT !! */ - retval = sctp_process_init(init_cp, stcb, net); + retval = sctp_process_init(init_cp, stcb); if (retval < 0) { if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 3; @@ -1522,7 +1596,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) && (inp->sctp_socket->so_qlimit == 0) ) { -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif @@ -1534,7 +1608,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, */ stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED; -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(stcb->sctp_ep); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); @@ -1547,7 +1621,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, } #endif soisconnected(stcb->sctp_socket); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } @@ -1559,7 +1633,9 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, */ net->hb_responded = 1; net->RTO = sctp_calculate_rto(stcb, asoc, net, - &cookie->time_entered, sctp_align_unsafe_makecopy); + &cookie->time_entered, + sctp_align_unsafe_makecopy, + SCTP_RTT_FROM_NON_DATA); if (stcb->asoc.sctp_autoclose_ticks && (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE))) { @@ -1580,9 +1656,9 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, * somehow abort.. but we do have an existing asoc. This * really should not fail. */ - if (sctp_load_addresses_from_init(stcb, m, iphlen, + if (sctp_load_addresses_from_init(stcb, m, init_offset + sizeof(struct sctp_init_chunk), - initack_offset, sh, init_src)) { + initack_offset, src, dst, init_src)) { if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 4; return (NULL); @@ -1643,7 +1719,9 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, ph = mtod(op_err, struct sctp_paramhdr *); ph->param_type = htons(SCTP_CAUSE_NAT_COLLIDING_STATE); ph->param_length = htons(sizeof(struct sctp_paramhdr)); - sctp_send_abort(m, iphlen, sh, 0, op_err, vrf_id, port); + sctp_send_abort(m, iphlen, src, dst, sh, 0, op_err, + use_mflowid, mflowid, + vrf_id, port); return (NULL); } if ((ntohl(initack_cp->init.initiate_tag) == asoc->my_vtag) && @@ -1693,8 +1771,6 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, asoc->my_rwnd = ntohl(initack_cp->init.a_rwnd); asoc->pre_open_streams = ntohs(initack_cp->init.num_outbound_streams); - /* Note last_cwr_tsn? where is this used? */ - asoc->last_cwr_tsn = asoc->init_seq_number - 1; if (ntohl(init_cp->init.initiate_tag) != asoc->peer_vtag) { /* * Ok the peer probably discarded our data (if we @@ -1718,15 +1794,15 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, } /* process the INIT info (peer's info) */ - retval = sctp_process_init(init_cp, stcb, net); + retval = sctp_process_init(init_cp, stcb); if (retval < 0) { if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 9; return (NULL); } - if (sctp_load_addresses_from_init(stcb, m, iphlen, + if (sctp_load_addresses_from_init(stcb, m, init_offset + sizeof(struct sctp_init_chunk), - initack_offset, sh, init_src)) { + initack_offset, src, dst, init_src)) { if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 10; return (NULL); @@ -1738,13 +1814,13 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) && (inp->sctp_socket->so_qlimit == 0)) { -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED; -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(stcb->sctp_ep); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); @@ -1757,7 +1833,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, } #endif soisconnected(stcb->sctp_socket); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } @@ -1807,9 +1883,11 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, * cookie_new code since we are allowing a duplicate * association. I hope this works... */ - return (sctp_process_cookie_new(m, iphlen, offset, sh, cookie, cookie_len, + return (sctp_process_cookie_new(m, iphlen, offset, src, dst, + sh, cookie, cookie_len, inp, netp, init_src, notification, auth_skipped, auth_offset, auth_len, + use_mflowid, mflowid, vrf_id, port)); } /* @@ -1821,7 +1899,6 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_15); sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_16); - *sac_assoc_id = sctp_get_associd(stcb); /* notify upper layer */ *notification = SCTP_NOTIFY_ASSOC_RESTART; atomic_add_int(&stcb->asoc.refcnt, 1); @@ -1850,7 +1927,6 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, asoc->sending_seq = asoc->asconf_seq_out = asoc->str_reset_seq_out = asoc->init_seq_number; asoc->asconf_seq_out_acked = asoc->asconf_seq_out - 1; - asoc->last_cwr_tsn = asoc->init_seq_number - 1; asoc->asconf_seq_in = asoc->last_acked_seq = asoc->init_seq_number - 1; asoc->str_reset_seq_in = asoc->init_seq_number; @@ -1872,10 +1948,11 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, /* send up all the data */ SCTP_TCB_SEND_LOCK(stcb); - sctp_report_all_outbound(stcb, 1, SCTP_SO_NOT_LOCKED); + sctp_report_all_outbound(stcb, 0, 1, SCTP_SO_NOT_LOCKED); for (i = 0; i < stcb->asoc.streamoutcnt; i++) { + stcb->asoc.strmout[i].chunks_on_queues = 0; stcb->asoc.strmout[i].stream_no = i; - stcb->asoc.strmout[i].next_sequence_sent = 0; + stcb->asoc.strmout[i].next_sequence_send = 0; stcb->asoc.strmout[i].last_msg_incomplete = 0; } /* process the INIT-ACK info (my info) */ @@ -1898,7 +1975,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, SCTP_INP_WUNLOCK(stcb->sctp_ep); SCTP_INP_INFO_WUNLOCK(); - retval = sctp_process_init(init_cp, stcb, net); + retval = sctp_process_init(init_cp, stcb); if (retval < 0) { if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 13; @@ -1911,9 +1988,9 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, */ net->hb_responded = 1; - if (sctp_load_addresses_from_init(stcb, m, iphlen, + if (sctp_load_addresses_from_init(stcb, m, init_offset + sizeof(struct sctp_init_chunk), - initack_offset, sh, init_src)) { + initack_offset, src, dst, init_src)) { if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 14; @@ -1942,12 +2019,14 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, * cookie-echo chunk length: length of the cookie chunk to: where the init * was from returns a new TCB */ -struct sctp_tcb * +static struct sctp_tcb * sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, + struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len, struct sctp_inpcb *inp, struct sctp_nets **netp, struct sockaddr *init_src, int *notification, int auth_skipped, uint32_t auth_offset, uint32_t auth_len, + uint8_t use_mflowid, uint32_t mflowid, uint32_t vrf_id, uint16_t port) { struct sctp_tcb *stcb; @@ -1955,17 +2034,21 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, struct sctp_init_ack_chunk *initack_cp, initack_buf; struct sockaddr_storage sa_store; struct sockaddr *initack_src = (struct sockaddr *)&sa_store; - struct sockaddr_in *sin; - struct sockaddr_in6 *sin6; struct sctp_association *asoc; - int chk_length; int init_offset, initack_offset, initack_limit; int retval; int error = 0; - uint32_t old_tag; uint8_t auth_chunk_buf[SCTP_PARAM_BUFFER_SIZE]; -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#ifdef INET + struct sockaddr_in *sin; + +#endif +#ifdef INET6 + struct sockaddr_in6 *sin6; + +#endif +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; so = SCTP_INP_SO(inp); @@ -1986,12 +2069,11 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, "process_cookie_new: could not pull INIT chunk hdr\n"); return (NULL); } - chk_length = ntohs(init_cp->ch.chunk_length); if (init_cp->ch.chunk_type != SCTP_INITIATION) { SCTPDBG(SCTP_DEBUG_INPUT1, "HUH? process_cookie_new: could not find INIT chunk!\n"); return (NULL); } - initack_offset = init_offset + SCTP_SIZE32(chk_length); + initack_offset = init_offset + SCTP_SIZE32(ntohs(init_cp->ch.chunk_length)); /* * find and validate the INIT-ACK chunk in the cookie (my info) the * INIT-ACK follows the INIT chunk @@ -2004,7 +2086,6 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, SCTPDBG(SCTP_DEBUG_INPUT1, "process_cookie_new: could not pull INIT-ACK chunk hdr\n"); return (NULL); } - chk_length = ntohs(initack_cp->ch.chunk_length); if (initack_cp->ch.chunk_type != SCTP_INITIATION_ACK) { return (NULL); } @@ -2040,7 +2121,9 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC); sctp_abort_association(inp, (struct sctp_tcb *)NULL, m, iphlen, - sh, op_err, vrf_id, port); + src, dst, sh, op_err, + use_mflowid, mflowid, + vrf_id, port); return (NULL); } /* get the correct sctp_nets */ @@ -2049,13 +2132,13 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, asoc = &stcb->asoc; /* get scope variables out of cookie */ - asoc->ipv4_local_scope = cookie->ipv4_scope; - asoc->site_scope = cookie->site_scope; - asoc->local_scope = cookie->local_scope; - asoc->loopback_scope = cookie->loopback_scope; + asoc->scope.ipv4_local_scope = cookie->ipv4_scope; + asoc->scope.site_scope = cookie->site_scope; + asoc->scope.local_scope = cookie->local_scope; + asoc->scope.loopback_scope = cookie->loopback_scope; - if ((asoc->ipv4_addr_legal != cookie->ipv4_addr_legal) || - (asoc->ipv6_addr_legal != cookie->ipv6_addr_legal)) { + if ((asoc->scope.ipv4_addr_legal != cookie->ipv4_addr_legal) || + (asoc->scope.ipv6_addr_legal != cookie->ipv6_addr_legal)) { struct mbuf *op_err; /* @@ -2066,29 +2149,29 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, atomic_add_int(&stcb->asoc.refcnt, 1); op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC); sctp_abort_association(inp, (struct sctp_tcb *)NULL, m, iphlen, - sh, op_err, vrf_id, port); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) + src, dst, sh, op_err, + use_mflowid, mflowid, + vrf_id, port); +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); #endif (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_16); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif atomic_subtract_int(&stcb->asoc.refcnt, 1); return (NULL); } /* process the INIT-ACK info (my info) */ - old_tag = asoc->my_vtag; asoc->my_vtag = ntohl(initack_cp->init.initiate_tag); asoc->my_rwnd = ntohl(initack_cp->init.a_rwnd); asoc->pre_open_streams = ntohs(initack_cp->init.num_outbound_streams); asoc->init_seq_number = ntohl(initack_cp->init.initial_tsn); asoc->sending_seq = asoc->asconf_seq_out = asoc->str_reset_seq_out = asoc->init_seq_number; asoc->asconf_seq_out_acked = asoc->asconf_seq_out - 1; - asoc->last_cwr_tsn = asoc->init_seq_number - 1; asoc->asconf_seq_in = asoc->last_acked_seq = asoc->init_seq_number - 1; asoc->str_reset_seq_in = asoc->init_seq_number; @@ -2096,35 +2179,35 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, /* process the INIT info (peer's info) */ if (netp) - retval = sctp_process_init(init_cp, stcb, *netp); + retval = sctp_process_init(init_cp, stcb); else retval = 0; if (retval < 0) { atomic_add_int(&stcb->asoc.refcnt, 1); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); #endif (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_16); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif atomic_subtract_int(&stcb->asoc.refcnt, 1); return (NULL); } /* load all addresses */ - if (sctp_load_addresses_from_init(stcb, m, iphlen, - init_offset + sizeof(struct sctp_init_chunk), initack_offset, sh, - init_src)) { + if (sctp_load_addresses_from_init(stcb, m, + init_offset + sizeof(struct sctp_init_chunk), initack_offset, + src, dst, init_src)) { atomic_add_int(&stcb->asoc.refcnt, 1); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); #endif (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_17); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif atomic_subtract_int(&stcb->asoc.refcnt, 1); @@ -2147,13 +2230,13 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, SCTPDBG(SCTP_DEBUG_AUTH1, "COOKIE-ECHO: AUTH failed\n"); atomic_add_int(&stcb->asoc.refcnt, 1); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); #endif (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_18); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif atomic_subtract_int(&stcb->asoc.refcnt, 1); @@ -2185,14 +2268,19 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, /* warning, we re-use sin, sin6, sa_store here! */ /* pull in local_address (our "from" address) */ - if (cookie->laddr_type == SCTP_IPV4_ADDRESS) { + switch (cookie->laddr_type) { +#ifdef INET + case SCTP_IPV4_ADDRESS: /* source addr is IPv4 */ sin = (struct sockaddr_in *)initack_src; memset(sin, 0, sizeof(*sin)); sin->sin_family = AF_INET; sin->sin_len = sizeof(struct sockaddr_in); sin->sin_addr.s_addr = cookie->laddress[0]; - } else if (cookie->laddr_type == SCTP_IPV6_ADDRESS) { + break; +#endif +#ifdef INET6 + case SCTP_IPV6_ADDRESS: /* source addr is IPv6 */ sin6 = (struct sockaddr_in6 *)initack_src; memset(sin6, 0, sizeof(*sin6)); @@ -2201,15 +2289,17 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, sin6->sin6_scope_id = cookie->scope_id; memcpy(&sin6->sin6_addr, cookie->laddress, sizeof(sin6->sin6_addr)); - } else { + break; +#endif + default: atomic_add_int(&stcb->asoc.refcnt, 1); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); #endif (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_19); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif atomic_subtract_int(&stcb->asoc.refcnt, 1); @@ -2230,7 +2320,7 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, * a bit of protection is worth having.. */ stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED; -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); @@ -2242,7 +2332,7 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, } #endif soisconnected(stcb->sctp_socket); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } else if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) && @@ -2266,7 +2356,8 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, (void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered); if ((netp) && (*netp)) { (*netp)->RTO = sctp_calculate_rto(stcb, asoc, *netp, - &cookie->time_entered, sctp_align_unsafe_makecopy); + &cookie->time_entered, sctp_align_unsafe_makecopy, + SCTP_RTT_FROM_NON_DATA); } /* respond with a COOKIE-ACK */ sctp_send_cookie_ack(stcb); @@ -2304,33 +2395,40 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, */ static struct mbuf * sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset, + struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, struct sctp_cookie_echo_chunk *cp, struct sctp_inpcb **inp_p, struct sctp_tcb **stcb, struct sctp_nets **netp, int auth_skipped, uint32_t auth_offset, uint32_t auth_len, - struct sctp_tcb **locked_tcb, uint32_t vrf_id, uint16_t port) + struct sctp_tcb **locked_tcb, + uint8_t use_mflowid, uint32_t mflowid, + uint32_t vrf_id, uint16_t port) { struct sctp_state_cookie *cookie; - struct sockaddr_in6 sin6; - struct sockaddr_in sin; struct sctp_tcb *l_stcb = *stcb; struct sctp_inpcb *l_inp; struct sockaddr *to; - sctp_assoc_t sac_restart_id; struct sctp_pcb *ep; struct mbuf *m_sig; uint8_t calc_sig[SCTP_SIGNATURE_SIZE], tmp_sig[SCTP_SIGNATURE_SIZE]; uint8_t *sig; uint8_t cookie_ok = 0; - unsigned int size_of_pkt, sig_offset, cookie_offset; + unsigned int sig_offset, cookie_offset; unsigned int cookie_len; struct timeval now; struct timeval time_expires; - struct sockaddr_storage dest_store; - struct sockaddr *localep_sa = (struct sockaddr *)&dest_store; - struct ip *iph; int notification = 0; struct sctp_nets *netl; int had_a_existing_tcb = 0; + int send_int_conf = 0; + +#ifdef INET + struct sockaddr_in sin; + +#endif +#ifdef INET6 + struct sockaddr_in6 sin6; + +#endif SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_cookie: handling COOKIE-ECHO\n"); @@ -2338,45 +2436,6 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset, if (inp_p == NULL) { return (NULL); } - /* First get the destination address setup too. */ - iph = mtod(m, struct ip *); - switch (iph->ip_v) { - case IPVERSION: - { - /* its IPv4 */ - struct sockaddr_in *lsin; - - lsin = (struct sockaddr_in *)(localep_sa); - memset(lsin, 0, sizeof(*lsin)); - lsin->sin_family = AF_INET; - lsin->sin_len = sizeof(*lsin); - lsin->sin_port = sh->dest_port; - lsin->sin_addr.s_addr = iph->ip_dst.s_addr; - size_of_pkt = SCTP_GET_IPV4_LENGTH(iph); - break; - } -#ifdef INET6 - case IPV6_VERSION >> 4: - { - /* its IPv6 */ - struct ip6_hdr *ip6; - struct sockaddr_in6 *lsin6; - - lsin6 = (struct sockaddr_in6 *)(localep_sa); - memset(lsin6, 0, sizeof(*lsin6)); - lsin6->sin6_family = AF_INET6; - lsin6->sin6_len = sizeof(struct sockaddr_in6); - ip6 = mtod(m, struct ip6_hdr *); - lsin6->sin6_port = sh->dest_port; - lsin6->sin6_addr = ip6->ip6_dst; - size_of_pkt = SCTP_GET_IPV6_LENGTH(ip6) + iphlen; - break; - } -#endif - default: - return (NULL); - } - cookie = &cp->cookie; cookie_offset = offset + sizeof(struct sctp_chunkhdr); cookie_len = ntohs(cp->ch.chunk_length); @@ -2393,11 +2452,10 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset, */ return (NULL); } - if (cookie_len > size_of_pkt || - cookie_len < sizeof(struct sctp_cookie_echo_chunk) + + if (cookie_len < sizeof(struct sctp_cookie_echo_chunk) + sizeof(struct sctp_init_chunk) + sizeof(struct sctp_init_ack_chunk) + SCTP_SIGNATURE_SIZE) { - /* cookie too long! or too small */ + /* cookie too small */ return (NULL); } /* @@ -2405,11 +2463,6 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset, * calculated in the sctp_hmac_m() call). */ sig_offset = offset + cookie_len - SCTP_SIGNATURE_SIZE; - if (sig_offset > size_of_pkt) { - /* packet not correct size! */ - /* XXX this may already be accounted for earlier... */ - return (NULL); - } m_sig = m_split(m, sig_offset, M_DONTWAIT); if (m_sig == NULL) { /* out of memory or ?? */ @@ -2419,12 +2472,10 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset, if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { struct mbuf *mat; - mat = m_sig; - while (mat) { + for (mat = m_sig; mat; mat = SCTP_BUF_NEXT(mat)) { if (SCTP_BUF_IS_EXTENDED(mat)) { sctp_log_mb(mat, SCTP_MBUF_SPLIT); } - mat = SCTP_BUF_NEXT(mat); } } #endif @@ -2535,7 +2586,8 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset, if (tim == 0) tim = now.tv_usec - cookie->time_entered.tv_usec; scm->time_usec = htonl(tim); - sctp_send_operr_to(m, iphlen, op_err, cookie->peers_vtag, + sctp_send_operr_to(src, dst, sh, cookie->peers_vtag, op_err, + use_mflowid, mflowid, vrf_id, port); return (NULL); } @@ -2549,7 +2601,9 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset, * up. */ to = NULL; - if (cookie->addr_type == SCTP_IPV6_ADDRESS) { + switch (cookie->addr_type) { +#ifdef INET6 + case SCTP_IPV6_ADDRESS: memset(&sin6, 0, sizeof(sin6)); sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(sin6); @@ -2558,20 +2612,25 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset, memcpy(&sin6.sin6_addr.s6_addr, cookie->address, sizeof(sin6.sin6_addr.s6_addr)); to = (struct sockaddr *)&sin6; - } else if (cookie->addr_type == SCTP_IPV4_ADDRESS) { + break; +#endif +#ifdef INET + case SCTP_IPV4_ADDRESS: memset(&sin, 0, sizeof(sin)); sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); sin.sin_port = sh->src_port; sin.sin_addr.s_addr = cookie->address[0]; to = (struct sockaddr *)&sin; - } else { + break; +#endif + default: /* This should not happen */ return (NULL); } if ((*stcb == NULL) && to) { /* Yep, lets check */ - *stcb = sctp_findassociation_ep_addr(inp_p, to, netp, localep_sa, NULL); + *stcb = sctp_findassociation_ep_addr(inp_p, to, netp, dst, NULL); if (*stcb == NULL) { /* * We should have only got back the same inp. If we @@ -2603,7 +2662,7 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset, SCTP_INP_INCR_REF((*stcb)->sctp_ep); if ((*stcb)->sctp_ep != l_inp) { SCTP_PRINTF("Huh? ep:%p diff then l_inp:%p?\n", - (*stcb)->sctp_ep, l_inp); + (void *)(*stcb)->sctp_ep, (void *)l_inp); } } } @@ -2614,21 +2673,33 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset, cookie_len -= SCTP_SIGNATURE_SIZE; if (*stcb == NULL) { /* this is the "normal" case... get a new TCB */ - *stcb = sctp_process_cookie_new(m, iphlen, offset, sh, cookie, - cookie_len, *inp_p, netp, to, ¬ification, - auth_skipped, auth_offset, auth_len, vrf_id, port); + *stcb = sctp_process_cookie_new(m, iphlen, offset, src, dst, sh, + cookie, cookie_len, *inp_p, + netp, to, ¬ification, + auth_skipped, auth_offset, auth_len, + use_mflowid, mflowid, + vrf_id, port); } else { /* this is abnormal... cookie-echo on existing TCB */ had_a_existing_tcb = 1; - *stcb = sctp_process_cookie_existing(m, iphlen, offset, sh, + *stcb = sctp_process_cookie_existing(m, iphlen, offset, + src, dst, sh, cookie, cookie_len, *inp_p, *stcb, netp, to, - ¬ification, &sac_restart_id, vrf_id, auth_skipped, auth_offset, auth_len, port); + ¬ification, auth_skipped, auth_offset, auth_len, + use_mflowid, mflowid, + vrf_id, port); } if (*stcb == NULL) { /* still no TCB... must be bad cookie-echo */ return (NULL); } + if ((*netp != NULL) && (use_mflowid != 0)) { + (*netp)->flowid = mflowid; +#ifdef INVARIANTS + (*netp)->flowidset = 1; +#endif + } /* * Ok, we built an association so confirm the address we sent the * INIT-ACK to. @@ -2639,10 +2710,9 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset, */ if (netl == NULL) { /* TSNH! Huh, why do I need to add this address here? */ - int ret; - - ret = sctp_add_remote_addr(*stcb, to, SCTP_DONOT_SETSCOPE, - SCTP_IN_COOKIE_PROC); + if (sctp_add_remote_addr(*stcb, to, NULL, SCTP_DONOT_SETSCOPE, SCTP_IN_COOKIE_PROC)) { + return (NULL); + } netl = sctp_findnet(*stcb, to); } if (netl) { @@ -2650,14 +2720,10 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset, netl->dest_state &= ~SCTP_ADDR_UNCONFIRMED; (void)sctp_set_primary_addr((*stcb), (struct sockaddr *)NULL, netl); - sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED, - (*stcb), 0, (void *)netl, SCTP_SO_NOT_LOCKED); + send_int_conf = 1; } } - if (*stcb) { - sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, *inp_p, - *stcb, NULL); - } + sctp_start_net_timers(*stcb); if ((*inp_p)->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) { if (!had_a_existing_tcb || (((*inp_p)->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0)) { @@ -2674,21 +2740,27 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset, * For a restart we will keep the same * socket, no need to do anything. I THINK!! */ - sctp_ulp_notify(notification, *stcb, 0, (void *)&sac_restart_id, SCTP_SO_NOT_LOCKED); + sctp_ulp_notify(notification, *stcb, 0, NULL, SCTP_SO_NOT_LOCKED); + if (send_int_conf) { + sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED, + (*stcb), 0, (void *)netl, SCTP_SO_NOT_LOCKED); + } return (m); } oso = (*inp_p)->sctp_socket; atomic_add_int(&(*stcb)->asoc.refcnt, 1); SCTP_TCB_UNLOCK((*stcb)); + CURVNET_SET(oso->so_vnet); so = sonewconn(oso, 0 ); + CURVNET_RESTORE(); SCTP_TCB_LOCK((*stcb)); atomic_subtract_int(&(*stcb)->asoc.refcnt, 1); if (so == NULL) { struct mbuf *op_err; -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *pcb_so; #endif @@ -2696,8 +2768,10 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset, SCTPDBG(SCTP_DEBUG_INPUT1, "process_cookie_new: no room for another socket!\n"); op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC); sctp_abort_association(*inp_p, NULL, m, iphlen, - sh, op_err, vrf_id, port); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) + src, dst, sh, op_err, + use_mflowid, mflowid, + vrf_id, port); +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) pcb_so = SCTP_INP_SO(*inp_p); atomic_add_int(&(*stcb)->asoc.refcnt, 1); SCTP_TCB_UNLOCK((*stcb)); @@ -2706,7 +2780,7 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset, atomic_subtract_int(&(*stcb)->asoc.refcnt, 1); #endif (void)sctp_free_assoc(*inp_p, *stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_20); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(pcb_so, 1); #endif return (NULL); @@ -2729,8 +2803,10 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset, inp->sctp_socket = so; inp->sctp_frag_point = (*inp_p)->sctp_frag_point; inp->sctp_cmt_on_off = (*inp_p)->sctp_cmt_on_off; + inp->sctp_ecn_enable = (*inp_p)->sctp_ecn_enable; inp->partial_delivery_point = (*inp_p)->partial_delivery_point; inp->sctp_context = (*inp_p)->sctp_context; + inp->local_strreset_support = (*inp_p)->local_strreset_support; inp->inp_starting_point_for_iterator = NULL; /* * copy in the authentication parameters from the @@ -2786,18 +2862,21 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset, /* Switch over to the new guy */ *inp_p = inp; sctp_ulp_notify(notification, *stcb, 0, NULL, SCTP_SO_NOT_LOCKED); - + if (send_int_conf) { + sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED, + (*stcb), 0, (void *)netl, SCTP_SO_NOT_LOCKED); + } /* * Pull it from the incomplete queue and wake the * guy */ -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) atomic_add_int(&(*stcb)->asoc.refcnt, 1); SCTP_TCB_UNLOCK((*stcb)); SCTP_SOCKET_LOCK(so, 1); #endif soisconnected(so); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_TCB_LOCK((*stcb)); atomic_subtract_int(&(*stcb)->asoc.refcnt, 1); SCTP_SOCKET_UNLOCK(so, 1); @@ -2805,14 +2884,18 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset, return (m); } } - if ((notification) && ((*inp_p)->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE)) { + if (notification) { sctp_ulp_notify(notification, *stcb, 0, NULL, SCTP_SO_NOT_LOCKED); } + if (send_int_conf) { + sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED, + (*stcb), 0, (void *)netl, SCTP_SO_NOT_LOCKED); + } return (m); } static void -sctp_handle_cookie_ack(struct sctp_cookie_ack_chunk *cp, +sctp_handle_cookie_ack(struct sctp_cookie_ack_chunk *cp SCTP_UNUSED, struct sctp_tcb *stcb, struct sctp_nets *net) { /* cp must not be used, others call this without a c-ack :-) */ @@ -2831,6 +2914,7 @@ sctp_handle_cookie_ack(struct sctp_cookie_ack_chunk *cp, /* state change only needed when I am in right state */ SCTPDBG(SCTP_DEBUG_INPUT2, "moving to OPEN state\n"); SCTP_SET_STATE(asoc, SCTP_STATE_OPEN); + sctp_start_net_timers(stcb); if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) { sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb, asoc->primary_destination); @@ -2841,42 +2925,50 @@ sctp_handle_cookie_ack(struct sctp_cookie_ack_chunk *cp, SCTP_STAT_INCR_GAUGE32(sctps_currestab); if (asoc->overall_error_count == 0) { net->RTO = sctp_calculate_rto(stcb, asoc, net, - &asoc->time_entered, sctp_align_safe_nocopy); + &asoc->time_entered, sctp_align_safe_nocopy, + SCTP_RTT_FROM_NON_DATA); } (void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered); sctp_ulp_notify(SCTP_NOTIFY_ASSOC_UP, stcb, 0, NULL, SCTP_SO_NOT_LOCKED); if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED; -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(stcb->sctp_ep); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); - if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) { - SCTP_SOCKET_UNLOCK(so, 1); - return; - } #endif - soisconnected(stcb->sctp_socket); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) + if ((stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) == 0) { + soisconnected(stcb->sctp_socket); + } +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } - sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, - stcb, net); /* * since we did not send a HB make sure we don't double * things */ net->hb_responded = 1; + if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) { + /* + * We don't need to do the asconf thing, nor hb or + * autoclose if the socket is closed. + */ + goto closed_socket; + } + sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, + stcb, net); + + if (stcb->asoc.sctp_autoclose_ticks && sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_AUTOCLOSE)) { sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE, @@ -2900,6 +2992,7 @@ sctp_handle_cookie_ack(struct sctp_cookie_ack_chunk *cp, #endif } } +closed_socket: /* Toss the cookie if I can */ sctp_toss_old_cookies(stcb, asoc); if (!TAILQ_EMPTY(&asoc->sent_queue)) { @@ -2907,10 +3000,7 @@ sctp_handle_cookie_ack(struct sctp_cookie_ack_chunk *cp, struct sctp_tmit_chunk *chk; chk = TAILQ_FIRST(&asoc->sent_queue); - if (chk) { - sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, - stcb, chk->whoTo); - } + sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, chk->whoTo); } } @@ -2920,83 +3010,149 @@ sctp_handle_ecn_echo(struct sctp_ecne_chunk *cp, { struct sctp_nets *net; struct sctp_tmit_chunk *lchk; - uint32_t tsn; + struct sctp_ecne_chunk bkup; + uint8_t override_bit; + uint32_t tsn, window_data_tsn; + int len; + unsigned int pkt_cnt; - if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_ecne_chunk)) { + len = ntohs(cp->ch.chunk_length); + if ((len != sizeof(struct sctp_ecne_chunk)) && + (len != sizeof(struct old_sctp_ecne_chunk))) { return; } + if (len == sizeof(struct old_sctp_ecne_chunk)) { + /* Its the old format */ + memcpy(&bkup, cp, sizeof(struct old_sctp_ecne_chunk)); + bkup.num_pkts_since_cwr = htonl(1); + cp = &bkup; + } SCTP_STAT_INCR(sctps_recvecne); tsn = ntohl(cp->tsn); - /* ECN Nonce stuff: need a resync and disable the nonce sum check */ - /* Also we make sure we disable the nonce_wait */ - lchk = TAILQ_FIRST(&stcb->asoc.send_queue); + pkt_cnt = ntohl(cp->num_pkts_since_cwr); + lchk = TAILQ_LAST(&stcb->asoc.send_queue, sctpchunk_listhead); if (lchk == NULL) { - stcb->asoc.nonce_resync_tsn = stcb->asoc.sending_seq; + window_data_tsn = stcb->asoc.sending_seq - 1; } else { - stcb->asoc.nonce_resync_tsn = lchk->rec.data.TSN_seq; + window_data_tsn = lchk->rec.data.TSN_seq; } - stcb->asoc.nonce_wait_for_ecne = 0; - stcb->asoc.nonce_sum_check = 0; - /* Find where it was sent, if possible */ + /* Find where it was sent to if possible. */ net = NULL; - lchk = TAILQ_FIRST(&stcb->asoc.sent_queue); - while (lchk) { + TAILQ_FOREACH(lchk, &stcb->asoc.sent_queue, sctp_next) { if (lchk->rec.data.TSN_seq == tsn) { net = lchk->whoTo; + net->ecn_prev_cwnd = lchk->rec.data.cwnd_at_send; break; } - if (compare_with_wrap(lchk->rec.data.TSN_seq, tsn, MAX_SEQ)) + if (SCTP_TSN_GT(lchk->rec.data.TSN_seq, tsn)) { break; - lchk = TAILQ_NEXT(lchk, sctp_next); + } } - if (net == NULL) - /* default is we use the primary */ - net = stcb->asoc.primary_destination; - - if (compare_with_wrap(tsn, stcb->asoc.last_cwr_tsn, MAX_TSN)) { + if (net == NULL) { + /* + * What to do. A previous send of a CWR was possibly lost. + * See how old it is, we may have it marked on the actual + * net. + */ + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + if (tsn == net->last_cwr_tsn) { + /* Found him, send it off */ + break; + } + } + if (net == NULL) { + /* + * If we reach here, we need to send a special CWR + * that says hey, we did this a long time ago and + * you lost the response. + */ + net = TAILQ_FIRST(&stcb->asoc.nets); + if (net == NULL) { + /* TSNH */ + return; + } + override_bit = SCTP_CWR_REDUCE_OVERRIDE; + } else { + override_bit = 0; + } + } else { + override_bit = 0; + } + if (SCTP_TSN_GT(tsn, net->cwr_window_tsn) && + ((override_bit & SCTP_CWR_REDUCE_OVERRIDE) == 0)) { /* * JRS - Use the congestion control given in the pluggable * CC module */ - stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo(stcb, net); + stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo(stcb, net, 0, pkt_cnt); /* - * we reduce once every RTT. So we will only lower cwnd at - * the next sending seq i.e. the resync_tsn. + * We reduce once every RTT. So we will only lower cwnd at + * the next sending seq i.e. the window_data_tsn */ - stcb->asoc.last_cwr_tsn = stcb->asoc.nonce_resync_tsn; + net->cwr_window_tsn = window_data_tsn; + net->ecn_ce_pkt_cnt += pkt_cnt; + net->lost_cnt = pkt_cnt; + net->last_cwr_tsn = tsn; + } else { + override_bit |= SCTP_CWR_IN_SAME_WINDOW; + if (SCTP_TSN_GT(tsn, net->last_cwr_tsn) && + ((override_bit & SCTP_CWR_REDUCE_OVERRIDE) == 0)) { + /* + * Another loss in the same window update how many + * marks/packets lost we have had. + */ + int cnt = 1; + + if (pkt_cnt > net->lost_cnt) { + /* Should be the case */ + cnt = (pkt_cnt - net->lost_cnt); + net->ecn_ce_pkt_cnt += cnt; + } + net->lost_cnt = pkt_cnt; + net->last_cwr_tsn = tsn; + /* + * Most CC functions will ignore this call, since we + * are in-window yet of the initial CE the peer saw. + */ + stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo(stcb, net, 1, cnt); + } } /* * We always send a CWR this way if our previous one was lost our * peer will get an update, or if it is not time again to reduce we - * still get the cwr to the peer. + * still get the cwr to the peer. Note we set the override when we + * could not find the TSN on the chunk or the destination network. */ - sctp_send_cwr(stcb, net, tsn); + sctp_send_cwr(stcb, net, net->last_cwr_tsn, override_bit); } static void -sctp_handle_ecn_cwr(struct sctp_cwr_chunk *cp, struct sctp_tcb *stcb) +sctp_handle_ecn_cwr(struct sctp_cwr_chunk *cp, struct sctp_tcb *stcb, struct sctp_nets *net) { /* * Here we get a CWR from the peer. We must look in the outqueue and - * make sure that we have a covered ECNE in teh control chunk part. + * make sure that we have a covered ECNE in the control chunk part. * If so remove it. */ struct sctp_tmit_chunk *chk; struct sctp_ecne_chunk *ecne; + int override; + uint32_t cwr_tsn; + cwr_tsn = ntohl(cp->tsn); + + override = cp->ch.chunk_flags & SCTP_CWR_REDUCE_OVERRIDE; TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) { if (chk->rec.chunk_id.id != SCTP_ECN_ECHO) { continue; } - /* - * Look for and remove if it is the right TSN. Since there - * is only ONE ECNE on the control queue at any one time we - * don't need to worry about more than one! - */ + if ((override == 0) && (chk->whoTo != net)) { + /* Must be from the right src unless override is set */ + continue; + } ecne = mtod(chk->data, struct sctp_ecne_chunk *); - if (compare_with_wrap(ntohl(cp->tsn), ntohl(ecne->tsn), - MAX_TSN) || (cp->tsn == ecne->tsn)) { + if (SCTP_TSN_GE(cwr_tsn, ntohl(ecne->tsn))) { /* this covers this ECNE, we can remove it */ stcb->asoc.ecn_echo_cnt_onq--; TAILQ_REMOVE(&stcb->asoc.control_send_queue, chk, @@ -3006,19 +3162,21 @@ sctp_handle_ecn_cwr(struct sctp_cwr_chunk *cp, struct sctp_tcb *stcb) chk->data = NULL; } stcb->asoc.ctrl_queue_cnt--; - sctp_free_a_chunk(stcb, chk); - break; + sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); + if (override == 0) { + break; + } } } } static void -sctp_handle_shutdown_complete(struct sctp_shutdown_complete_chunk *cp, +sctp_handle_shutdown_complete(struct sctp_shutdown_complete_chunk *cp SCTP_UNUSED, struct sctp_tcb *stcb, struct sctp_nets *net) { struct sctp_association *asoc; -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif @@ -3043,8 +3201,8 @@ sctp_handle_shutdown_complete(struct sctp_shutdown_complete_chunk *cp, /* are the queues empty? they should be */ if (!TAILQ_EMPTY(&asoc->send_queue) || !TAILQ_EMPTY(&asoc->sent_queue) || - !TAILQ_EMPTY(&asoc->out_wheel)) { - sctp_report_all_outbound(stcb, 0, SCTP_SO_NOT_LOCKED); + !stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) { + sctp_report_all_outbound(stcb, 0, 0, SCTP_SO_NOT_LOCKED); } } /* stop the timer */ @@ -3053,7 +3211,7 @@ sctp_handle_shutdown_complete(struct sctp_shutdown_complete_chunk *cp, /* free the TCB */ SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_shutdown_complete: calls free-asoc\n"); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(stcb->sctp_ep); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); @@ -3062,7 +3220,7 @@ sctp_handle_shutdown_complete(struct sctp_shutdown_complete_chunk *cp, atomic_subtract_int(&stcb->asoc.refcnt, 1); #endif (void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_23); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif return; @@ -3080,19 +3238,16 @@ process_chunk_drop(struct sctp_tcb *stcb, struct sctp_chunk_desc *desc, struct sctp_tmit_chunk *tp1; tsn = ntohl(desc->tsn_ifany); - tp1 = TAILQ_FIRST(&stcb->asoc.sent_queue); - while (tp1) { + TAILQ_FOREACH(tp1, &stcb->asoc.sent_queue, sctp_next) { if (tp1->rec.data.TSN_seq == tsn) { /* found it */ break; } - if (compare_with_wrap(tp1->rec.data.TSN_seq, tsn, - MAX_TSN)) { + if (SCTP_TSN_GT(tp1->rec.data.TSN_seq, tsn)) { /* not found */ tp1 = NULL; break; } - tp1 = TAILQ_NEXT(tp1, sctp_next); } if (tp1 == NULL) { /* @@ -3100,13 +3255,11 @@ process_chunk_drop(struct sctp_tcb *stcb, struct sctp_chunk_desc *desc, * attention to queue seq order. */ SCTP_STAT_INCR(sctps_pdrpdnfnd); - tp1 = TAILQ_FIRST(&stcb->asoc.sent_queue); - while (tp1) { + TAILQ_FOREACH(tp1, &stcb->asoc.sent_queue, sctp_next) { if (tp1->rec.data.TSN_seq == tsn) { /* found it */ break; } - tp1 = TAILQ_NEXT(tp1, sctp_next); } } if (tp1 == NULL) { @@ -3142,17 +3295,15 @@ process_chunk_drop(struct sctp_tcb *stcb, struct sctp_chunk_desc *desc, } } } - /* - * We zero out the nonce so resync not - * needed - */ - tp1->rec.data.ect_nonce = 0; if (tp1->do_rtt) { /* * this guy had a RTO calculation * pending on it, cancel it */ + if (tp1->whoTo->rto_needed == 0) { + tp1->whoTo->rto_needed = 1; + } tp1->do_rtt = 0; } SCTP_STAT_INCR(sctps_pdrpmark); @@ -3251,7 +3402,7 @@ process_chunk_drop(struct sctp_tcb *stcb, struct sctp_chunk_desc *desc, case SCTP_SELECTIVE_ACK: case SCTP_NR_SELECTIVE_ACK: /* resend the sack */ - sctp_send_sack(stcb); + sctp_send_sack(stcb, SCTP_SO_NOT_LOCKED); break; case SCTP_HEARTBEAT_REQUEST: /* resend a demand HB */ @@ -3260,7 +3411,7 @@ process_chunk_drop(struct sctp_tcb *stcb, struct sctp_chunk_desc *desc, * Only retransmit if we KNOW we wont destroy the * tcb */ - (void)sctp_send_hb(stcb, 1, net); + sctp_send_hb(stcb, net, SCTP_SO_NOT_LOCKED); } break; case SCTP_SHUTDOWN: @@ -3314,9 +3465,9 @@ process_chunk_drop(struct sctp_tcb *stcb, struct sctp_chunk_desc *desc, } void -sctp_reset_in_stream(struct sctp_tcb *stcb, int number_entries, uint16_t * list) +sctp_reset_in_stream(struct sctp_tcb *stcb, uint32_t number_entries, uint16_t * list) { - int i; + uint32_t i; uint16_t temp; /* @@ -3348,7 +3499,7 @@ sctp_reset_out_streams(struct sctp_tcb *stcb, int number_entries, uint16_t * lis if (number_entries == 0) { for (i = 0; i < stcb->asoc.streamoutcnt; i++) { - stcb->asoc.strmout[i].next_sequence_sent = 0; + stcb->asoc.strmout[i].next_sequence_send = 0; } } else if (number_entries) { for (i = 0; i < number_entries; i++) { @@ -3359,7 +3510,7 @@ sctp_reset_out_streams(struct sctp_tcb *stcb, int number_entries, uint16_t * lis /* no such stream */ continue; } - stcb->asoc.strmout[temp].next_sequence_sent = 0; + stcb->asoc.strmout[temp].next_sequence_send = 0; } } sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_SEND, stcb, number_entries, (void *)list, SCTP_SO_NOT_LOCKED); @@ -3370,7 +3521,7 @@ struct sctp_stream_reset_out_request * sctp_find_stream_reset(struct sctp_tcb *stcb, uint32_t seq, struct sctp_tmit_chunk **bchk) { struct sctp_association *asoc; - struct sctp_stream_reset_out_req *req; + struct sctp_chunkhdr *ch; struct sctp_stream_reset_out_request *r; struct sctp_tmit_chunk *chk; int len, clen; @@ -3393,8 +3544,8 @@ sctp_find_stream_reset(struct sctp_tcb *stcb, uint32_t seq, struct sctp_tmit_chu *bchk = chk; } clen = chk->send_size; - req = mtod(chk->data, struct sctp_stream_reset_out_req *); - r = &req->sr_req; + ch = mtod(chk->data, struct sctp_chunkhdr *); + r = (struct sctp_stream_reset_out_request *)(ch + 1); if (ntohl(r->request_seq) == seq) { /* found it */ return (r); @@ -3431,7 +3582,7 @@ sctp_clean_up_stream_reset(struct sctp_tcb *stcb) chk->data = NULL; } asoc->ctrl_queue_cnt--; - sctp_free_a_chunk(stcb, chk); + sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); /* sa_ignore NO_NULL_CHK */ stcb->asoc.str_reset = NULL; } @@ -3464,9 +3615,11 @@ sctp_handle_stream_reset_response(struct sctp_tcb *stcb, asoc->stream_reset_out_is_outstanding = 0; if (asoc->stream_reset_outstanding) asoc->stream_reset_outstanding--; - if (action == SCTP_STREAM_RESET_PERFORMED) { + if (action == SCTP_STREAM_RESET_RESULT_PERFORMED) { /* do it */ sctp_reset_out_streams(stcb, number_entries, srparam->list_of_streams); + } else if (action == SCTP_STREAM_RESET_RESULT_DENIED) { + sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_DENIED_OUT, stcb, number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED); } else { sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_OUT, stcb, number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED); } @@ -3475,21 +3628,45 @@ sctp_handle_stream_reset_response(struct sctp_tcb *stcb, number_entries = (lparm_len - sizeof(struct sctp_stream_reset_in_request)) / sizeof(uint16_t); if (asoc->stream_reset_outstanding) asoc->stream_reset_outstanding--; - if (action != SCTP_STREAM_RESET_PERFORMED) { - sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_IN, stcb, number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED); + if (action == SCTP_STREAM_RESET_RESULT_DENIED) { + sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_DENIED_IN, stcb, + number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED); + } else if (action != SCTP_STREAM_RESET_RESULT_PERFORMED) { + sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_IN, stcb, + number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED); } - } else if (type == SCTP_STR_RESET_ADD_STREAMS) { + } else if (type == SCTP_STR_RESET_ADD_OUT_STREAMS) { /* Ok we now may have more streams */ + int num_stream; + + num_stream = stcb->asoc.strm_pending_add_size; + if (num_stream > (stcb->asoc.strm_realoutsize - stcb->asoc.streamoutcnt)) { + /* TSNH */ + num_stream = stcb->asoc.strm_realoutsize - stcb->asoc.streamoutcnt; + } + stcb->asoc.strm_pending_add_size = 0; if (asoc->stream_reset_outstanding) asoc->stream_reset_outstanding--; - if (action == SCTP_STREAM_RESET_PERFORMED) { + if (action == SCTP_STREAM_RESET_RESULT_PERFORMED) { /* Put the new streams into effect */ - stcb->asoc.streamoutcnt = stcb->asoc.strm_realoutsize; - sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_ADD_OK, stcb, - (uint32_t) stcb->asoc.streamoutcnt, NULL, SCTP_SO_NOT_LOCKED); + stcb->asoc.streamoutcnt += num_stream; + sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt, 0); + } else if (action == SCTP_STREAM_RESET_RESULT_DENIED) { + sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt, + SCTP_STREAM_CHANGE_DENIED); } else { - sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_ADD_FAIL, stcb, - (uint32_t) stcb->asoc.streamoutcnt, NULL, SCTP_SO_NOT_LOCKED); + sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt, + SCTP_STREAM_CHANGE_FAILED); + } + } else if (type == SCTP_STR_RESET_ADD_IN_STREAMS) { + if (asoc->stream_reset_outstanding) + asoc->stream_reset_outstanding--; + if (action == SCTP_STREAM_RESET_RESULT_DENIED) { + sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt, + SCTP_STREAM_CHANGE_DENIED); + } else if (action != SCTP_STREAM_RESET_RESULT_PERFORMED) { + sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt, + SCTP_STREAM_CHANGE_FAILED); } } else if (type == SCTP_STR_RESET_TSN_REQUEST) { /** @@ -3505,7 +3682,7 @@ sctp_handle_stream_reset_response(struct sctp_tcb *stcb, /* huh ? */ return (0); } - if (action == SCTP_STREAM_RESET_PERFORMED) { + if (action == SCTP_STREAM_RESET_RESULT_PERFORMED) { resp = (struct sctp_stream_reset_response_tsn *)respin; asoc->stream_reset_outstanding--; fwdtsn.ch.chunk_length = htons(sizeof(struct sctp_forward_tsn_chunk)); @@ -3531,7 +3708,13 @@ sctp_handle_stream_reset_response(struct sctp_tcb *stcb, sctp_reset_out_streams(stcb, 0, (uint16_t *) NULL); sctp_reset_in_stream(stcb, 0, (uint16_t *) NULL); - + sctp_notify_stream_reset_tsn(stcb, stcb->asoc.sending_seq, (stcb->asoc.mapping_array_base_tsn + 1), 0); + } else if (action == SCTP_STREAM_RESET_RESULT_DENIED) { + sctp_notify_stream_reset_tsn(stcb, stcb->asoc.sending_seq, (stcb->asoc.mapping_array_base_tsn + 1), + SCTP_ASSOC_RESET_DENIED); + } else { + sctp_notify_stream_reset_tsn(stcb, stcb->asoc.sending_seq, (stcb->asoc.mapping_array_base_tsn + 1), + SCTP_ASSOC_RESET_FAILED); } } /* get rid of the request and get the request flags */ @@ -3561,11 +3744,12 @@ sctp_handle_str_reset_request_in(struct sctp_tcb *stcb, seq = ntohl(req->request_seq); if (asoc->str_reset_seq_in == seq) { - if (trunc) { + asoc->last_reset_action[1] = asoc->last_reset_action[0]; + if (!(asoc->local_strreset_support & SCTP_ENABLE_RESET_STREAM_REQ)) { + asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; + } else if (trunc) { /* Can't do it, since they exceeded our buffer size */ - asoc->last_reset_action[1] = asoc->last_reset_action[0]; - asoc->last_reset_action[0] = SCTP_STREAM_RESET_DENIED; - sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]); + asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; } else if (stcb->asoc.stream_reset_out_is_outstanding == 0) { len = ntohs(req->ph.param_length); number_entries = ((len - sizeof(struct sctp_stream_reset_in_request)) / sizeof(uint16_t)); @@ -3573,9 +3757,7 @@ sctp_handle_str_reset_request_in(struct sctp_tcb *stcb, temp = ntohs(req->list_of_streams[i]); req->list_of_streams[i] = temp; } - /* move the reset action back one */ - asoc->last_reset_action[1] = asoc->last_reset_action[0]; - asoc->last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED; + asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED; sctp_add_stream_reset_out(chk, number_entries, req->list_of_streams, asoc->str_reset_seq_out, seq, (asoc->sending_seq - 1)); @@ -3585,17 +3767,16 @@ sctp_handle_str_reset_request_in(struct sctp_tcb *stcb, stcb->asoc.stream_reset_outstanding++; } else { /* Can't do it, since we have sent one out */ - asoc->last_reset_action[1] = asoc->last_reset_action[0]; - asoc->last_reset_action[0] = SCTP_STREAM_RESET_TRY_LATER; - sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]); + asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_ERR_IN_PROGRESS; } + sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]); asoc->str_reset_seq_in++; } else if (asoc->str_reset_seq_in - 1 == seq) { sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]); } else if (asoc->str_reset_seq_in - 2 == seq) { sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]); } else { - sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_BAD_SEQNO); + sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO); } } @@ -3617,53 +3798,49 @@ sctp_handle_str_reset_request_tsn(struct sctp_tcb *stcb, seq = ntohl(req->request_seq); if (asoc->str_reset_seq_in == seq) { - fwdtsn.ch.chunk_length = htons(sizeof(struct sctp_forward_tsn_chunk)); - fwdtsn.ch.chunk_type = SCTP_FORWARD_CUM_TSN; - fwdtsn.ch.chunk_flags = 0; - fwdtsn.new_cumulative_tsn = htonl(stcb->asoc.highest_tsn_inside_map + 1); - sctp_handle_forward_tsn(stcb, &fwdtsn, &abort_flag, NULL, 0); - if (abort_flag) { - return (1); - } - stcb->asoc.highest_tsn_inside_map += SCTP_STREAM_RESET_TSN_DELTA; - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) { - sctp_log_map(0, 10, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT); - } - stcb->asoc.tsn_last_delivered = stcb->asoc.cumulative_tsn = stcb->asoc.highest_tsn_inside_map; - stcb->asoc.mapping_array_base_tsn = stcb->asoc.highest_tsn_inside_map + 1; - memset(stcb->asoc.mapping_array, 0, stcb->asoc.mapping_array_size); - stcb->asoc.highest_tsn_inside_nr_map = stcb->asoc.highest_tsn_inside_map; - memset(stcb->asoc.nr_mapping_array, 0, stcb->asoc.mapping_array_size); - atomic_add_int(&stcb->asoc.sending_seq, 1); - /* save off historical data for retrans */ - stcb->asoc.last_sending_seq[1] = stcb->asoc.last_sending_seq[0]; - stcb->asoc.last_sending_seq[0] = stcb->asoc.sending_seq; - stcb->asoc.last_base_tsnsent[1] = stcb->asoc.last_base_tsnsent[0]; - stcb->asoc.last_base_tsnsent[0] = stcb->asoc.mapping_array_base_tsn; - - sctp_add_stream_reset_result_tsn(chk, - ntohl(req->request_seq), - SCTP_STREAM_RESET_PERFORMED, - stcb->asoc.sending_seq, - stcb->asoc.mapping_array_base_tsn); - sctp_reset_out_streams(stcb, 0, (uint16_t *) NULL); - sctp_reset_in_stream(stcb, 0, (uint16_t *) NULL); - stcb->asoc.last_reset_action[1] = stcb->asoc.last_reset_action[0]; - stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED; - + asoc->last_reset_action[1] = stcb->asoc.last_reset_action[0]; + if (!(asoc->local_strreset_support & SCTP_ENABLE_CHANGE_ASSOC_REQ)) { + asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; + } else { + fwdtsn.ch.chunk_length = htons(sizeof(struct sctp_forward_tsn_chunk)); + fwdtsn.ch.chunk_type = SCTP_FORWARD_CUM_TSN; + fwdtsn.ch.chunk_flags = 0; + fwdtsn.new_cumulative_tsn = htonl(stcb->asoc.highest_tsn_inside_map + 1); + sctp_handle_forward_tsn(stcb, &fwdtsn, &abort_flag, NULL, 0); + if (abort_flag) { + return (1); + } + asoc->highest_tsn_inside_map += SCTP_STREAM_RESET_TSN_DELTA; + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) { + sctp_log_map(0, 10, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT); + } + asoc->tsn_last_delivered = asoc->cumulative_tsn = asoc->highest_tsn_inside_map; + asoc->mapping_array_base_tsn = asoc->highest_tsn_inside_map + 1; + memset(asoc->mapping_array, 0, asoc->mapping_array_size); + asoc->highest_tsn_inside_nr_map = asoc->highest_tsn_inside_map; + memset(asoc->nr_mapping_array, 0, asoc->mapping_array_size); + atomic_add_int(&asoc->sending_seq, 1); + /* save off historical data for retrans */ + asoc->last_sending_seq[1] = asoc->last_sending_seq[0]; + asoc->last_sending_seq[0] = asoc->sending_seq; + asoc->last_base_tsnsent[1] = asoc->last_base_tsnsent[0]; + asoc->last_base_tsnsent[0] = asoc->mapping_array_base_tsn; + sctp_reset_out_streams(stcb, 0, (uint16_t *) NULL); + sctp_reset_in_stream(stcb, 0, (uint16_t *) NULL); + asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED; + sctp_notify_stream_reset_tsn(stcb, asoc->sending_seq, (asoc->mapping_array_base_tsn + 1), 0); + } + sctp_add_stream_reset_result_tsn(chk, seq, asoc->last_reset_action[0], + asoc->last_sending_seq[0], asoc->last_base_tsnsent[0]); asoc->str_reset_seq_in++; } else if (asoc->str_reset_seq_in - 1 == seq) { sctp_add_stream_reset_result_tsn(chk, seq, asoc->last_reset_action[0], - stcb->asoc.last_sending_seq[0], - stcb->asoc.last_base_tsnsent[0] - ); + asoc->last_sending_seq[0], asoc->last_base_tsnsent[0]); } else if (asoc->str_reset_seq_in - 2 == seq) { sctp_add_stream_reset_result_tsn(chk, seq, asoc->last_reset_action[1], - stcb->asoc.last_sending_seq[1], - stcb->asoc.last_base_tsnsent[1] - ); + asoc->last_sending_seq[1], asoc->last_base_tsnsent[1]); } else { - sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_BAD_SEQNO); + sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO); } return (0); } @@ -3694,15 +3871,14 @@ sctp_handle_str_reset_request_out(struct sctp_tcb *stcb, /* move the reset action back one */ asoc->last_reset_action[1] = asoc->last_reset_action[0]; - if (trunc) { - sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_DENIED); - asoc->last_reset_action[0] = SCTP_STREAM_RESET_DENIED; - } else if ((tsn == asoc->cumulative_tsn) || - (compare_with_wrap(asoc->cumulative_tsn, tsn, MAX_TSN))) { + if (!(asoc->local_strreset_support & SCTP_ENABLE_RESET_STREAM_REQ)) { + asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; + } else if (trunc) { + asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; + } else if (SCTP_TSN_GE(asoc->cumulative_tsn, tsn)) { /* we can do it now */ sctp_reset_in_stream(stcb, number_entries, req->list_of_streams); - sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_PERFORMED); - asoc->last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED; + asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED; } else { /* * we must queue it up and thus wait for the TSN's @@ -3716,18 +3892,17 @@ sctp_handle_str_reset_request_out(struct sctp_tcb *stcb, siz, SCTP_M_STRESET); if (liste == NULL) { /* gak out of memory */ - sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_DENIED); - asoc->last_reset_action[0] = SCTP_STREAM_RESET_DENIED; + asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; + sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]); return; } liste->tsn = tsn; liste->number_entries = number_entries; - memcpy(&liste->req, req, - (sizeof(struct sctp_stream_reset_out_request) + (number_entries * sizeof(uint16_t)))); + memcpy(&liste->list_of_streams, req->list_of_streams, number_entries * sizeof(uint16_t)); TAILQ_INSERT_TAIL(&asoc->resetHead, liste, next_resp); - sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_PERFORMED); - asoc->last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED; + asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED; } + sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]); asoc->str_reset_seq_in++; } else if ((asoc->str_reset_seq_in - 1) == seq) { /* @@ -3742,7 +3917,7 @@ sctp_handle_str_reset_request_out(struct sctp_tcb *stcb, */ sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]); } else { - sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_BAD_SEQNO); + sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO); } } @@ -3754,10 +3929,10 @@ sctp_handle_str_reset_add_strm(struct sctp_tcb *stcb, struct sctp_tmit_chunk *ch * Peer is requesting to add more streams. If its within our * max-streams we will allow it. */ - uint16_t num_stream, i; + uint32_t num_stream, i; uint32_t seq; struct sctp_association *asoc = &stcb->asoc; - struct sctp_queued_to_read *ctl; + struct sctp_queued_to_read *ctl, *nctl; /* Get the number. */ seq = ntohl(str_add->request_seq); @@ -3765,12 +3940,14 @@ sctp_handle_str_reset_add_strm(struct sctp_tcb *stcb, struct sctp_tmit_chunk *ch /* Now what would be the new total? */ if (asoc->str_reset_seq_in == seq) { num_stream += stcb->asoc.streamincnt; - if (num_stream > stcb->asoc.max_inbound_streams) { + stcb->asoc.last_reset_action[1] = stcb->asoc.last_reset_action[0]; + if (!(asoc->local_strreset_support & SCTP_ENABLE_CHANGE_ASSOC_REQ)) { + asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; + } else if ((num_stream > stcb->asoc.max_inbound_streams) || + (num_stream > 0xffff)) { /* We must reject it they ask for to many */ denied: - sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_DENIED); - stcb->asoc.last_reset_action[1] = stcb->asoc.last_reset_action[0]; - stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_DENIED; + stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; } else { /* Ok, we can do that :-) */ struct sctp_stream_in *oldstrm; @@ -3791,8 +3968,7 @@ sctp_handle_str_reset_add_strm(struct sctp_tcb *stcb, struct sctp_tmit_chunk *ch stcb->asoc.strmin[i].last_sequence_delivered = oldstrm[i].last_sequence_delivered; stcb->asoc.strmin[i].delivery_started = oldstrm[i].delivery_started; /* now anything on those queues? */ - while (TAILQ_EMPTY(&oldstrm[i].inqueue) == 0) { - ctl = TAILQ_FIRST(&oldstrm[i].inqueue); + TAILQ_FOREACH_SAFE(ctl, &oldstrm[i].inqueue, next, nctl) { TAILQ_REMOVE(&oldstrm[i].inqueue, ctl, next); TAILQ_INSERT_TAIL(&stcb->asoc.strmin[i].inqueue, ctl, next); } @@ -3807,13 +3983,11 @@ sctp_handle_str_reset_add_strm(struct sctp_tcb *stcb, struct sctp_tmit_chunk *ch SCTP_FREE(oldstrm, SCTP_M_STRMI); /* update the size */ stcb->asoc.streamincnt = num_stream; - /* Send the ack */ - sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_PERFORMED); - stcb->asoc.last_reset_action[1] = stcb->asoc.last_reset_action[0]; - stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED; - sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_INSTREAM_ADD_OK, stcb, - (uint32_t) stcb->asoc.streamincnt, NULL, SCTP_SO_NOT_LOCKED); + stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED; + sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt, 0); } + sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]); + asoc->str_reset_seq_in++; } else if ((asoc->str_reset_seq_in - 1) == seq) { /* * one seq back, just echo back last action since my @@ -3827,23 +4001,79 @@ sctp_handle_str_reset_add_strm(struct sctp_tcb *stcb, struct sctp_tmit_chunk *ch */ sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]); } else { - sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_BAD_SEQNO); + sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO); } } +static void +sctp_handle_str_reset_add_out_strm(struct sctp_tcb *stcb, struct sctp_tmit_chunk *chk, + struct sctp_stream_reset_add_strm *str_add) +{ + /* + * Peer is requesting to add more streams. If its within our + * max-streams we will allow it. + */ + uint16_t num_stream; + uint32_t seq; + struct sctp_association *asoc = &stcb->asoc; + + /* Get the number. */ + seq = ntohl(str_add->request_seq); + num_stream = ntohs(str_add->number_of_streams); + /* Now what would be the new total? */ + if (asoc->str_reset_seq_in == seq) { + stcb->asoc.last_reset_action[1] = stcb->asoc.last_reset_action[0]; + if (!(asoc->local_strreset_support & SCTP_ENABLE_CHANGE_ASSOC_REQ)) { + asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; + } else if (stcb->asoc.stream_reset_outstanding) { + /* We must reject it we have something pending */ + stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_ERR_IN_PROGRESS; + } else { + /* Ok, we can do that :-) */ + int mychk; + + mychk = stcb->asoc.streamoutcnt; + mychk += num_stream; + if (mychk < 0x10000) { + stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED; + if (sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, 0, 1, num_stream, 0, 1)) { + stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; + } + } else { + stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; + } + } + sctp_add_stream_reset_result(chk, seq, stcb->asoc.last_reset_action[0]); + asoc->str_reset_seq_in++; + } else if ((asoc->str_reset_seq_in - 1) == seq) { + /* + * one seq back, just echo back last action since my + * response was lost. + */ + sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]); + } else if ((asoc->str_reset_seq_in - 2) == seq) { + /* + * two seq back, just echo back last action since my + * response was lost. + */ + sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]); + } else { + sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO); + } +} + #ifdef __GNUC__ __attribute__((noinline)) #endif static int sctp_handle_stream_reset(struct sctp_tcb *stcb, struct mbuf *m, int offset, - struct sctp_stream_reset_out_req *sr_req) + struct sctp_chunkhdr *ch_req) { int chk_length, param_len, ptype; struct sctp_paramhdr pstore; uint8_t cstore[SCTP_CHUNK_BUFFER_SIZE]; - - uint32_t seq; + uint32_t seq = 0; int num_req = 0; int trunc = 0; struct sctp_tmit_chunk *chk; @@ -3853,7 +4083,7 @@ __attribute__((noinline)) int num_param = 0; /* now it may be a reset or a reset-response */ - chk_length = ntohs(sr_req->ch.chunk_length); + chk_length = ntohs(ch_req->chunk_length); /* setup for adding the response */ sctp_alloc_a_chunk(stcb, chk); @@ -3873,7 +4103,7 @@ strres_nochunk: sctp_m_freem(chk->data); chk->data = NULL; } - sctp_free_a_chunk(stcb, chk); + sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); return (ret_code); } SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD); @@ -3881,8 +4111,7 @@ strres_nochunk: /* setup chunk parameters */ chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; - chk->whoTo = stcb->asoc.primary_destination; - atomic_add_int(&chk->whoTo->ref_count, 1); + chk->whoTo = NULL; ch = mtod(chk->data, struct sctp_chunkhdr *); ch->chunk_type = SCTP_STREAM_RESET; @@ -3908,7 +4137,6 @@ strres_nochunk: } else { trunc = 0; } - if (num_param > SCTP_MAX_RESET_PARAMS) { /* hit the max of parameters already sorry.. */ break; @@ -3922,30 +4150,33 @@ strres_nochunk: seq = ntohl(req_out->response_seq); if (seq == stcb->asoc.str_reset_seq_out) { /* implicit ack */ - (void)sctp_handle_stream_reset_response(stcb, seq, SCTP_STREAM_RESET_PERFORMED, NULL); + (void)sctp_handle_stream_reset_response(stcb, seq, SCTP_STREAM_RESET_RESULT_PERFORMED, NULL); } } sctp_handle_str_reset_request_out(stcb, chk, req_out, trunc); - } else if (ptype == SCTP_STR_RESET_ADD_STREAMS) { + } else if (ptype == SCTP_STR_RESET_ADD_OUT_STREAMS) { struct sctp_stream_reset_add_strm *str_add; str_add = (struct sctp_stream_reset_add_strm *)ph; num_req++; sctp_handle_str_reset_add_strm(stcb, chk, str_add); + } else if (ptype == SCTP_STR_RESET_ADD_IN_STREAMS) { + struct sctp_stream_reset_add_strm *str_add; + + str_add = (struct sctp_stream_reset_add_strm *)ph; + num_req++; + sctp_handle_str_reset_add_out_strm(stcb, chk, str_add); } else if (ptype == SCTP_STR_RESET_IN_REQUEST) { struct sctp_stream_reset_in_request *req_in; num_req++; - req_in = (struct sctp_stream_reset_in_request *)ph; - sctp_handle_str_reset_request_in(stcb, chk, req_in, trunc); } else if (ptype == SCTP_STR_RESET_TSN_REQUEST) { struct sctp_stream_reset_tsn_request *req_tsn; num_req++; req_tsn = (struct sctp_stream_reset_tsn_request *)ph; - if (sctp_handle_str_reset_request_tsn(stcb, chk, req_tsn)) { ret_code = 1; goto strres_nochunk; @@ -4150,8 +4381,10 @@ __attribute__((noinline)) #endif static struct sctp_tcb * sctp_process_control(struct mbuf *m, int iphlen, int *offset, int length, + struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, struct sctp_chunkhdr *ch, struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets **netp, int *fwd_tsn_seen, + uint8_t use_mflowid, uint32_t mflowid, uint32_t vrf_id, uint16_t port) { struct sctp_association *asoc; @@ -4160,6 +4393,7 @@ __attribute__((noinline)) uint32_t chk_length; int ret; int abort_no_unlock = 0; + int ecne_seen = 0; /* * How big should this be, and should it be alloc'd? Lets try the @@ -4173,13 +4407,13 @@ __attribute__((noinline)) int auth_skipped = 0; int asconf_cnt = 0; -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif SCTPDBG(SCTP_DEBUG_INPUT1, "sctp_process_control: iphlen=%u, offset=%u, length=%u stcb:%p\n", - iphlen, *offset, length, stcb); + iphlen, *offset, length, (void *)stcb); /* validate chunk header length... */ if (ntohs(ch->chunk_length) < sizeof(*ch)) { @@ -4263,8 +4497,10 @@ __attribute__((noinline)) asconf_len = ntohs(asconf_ch->chunk_length); if (asconf_len < sizeof(struct sctp_asconf_paramhdr)) break; - stcb = sctp_findassociation_ep_asconf(m, iphlen, - *offset, sh, &inp, netp, vrf_id); + stcb = sctp_findassociation_ep_asconf(m, + *offset, + dst, + sh, &inp, netp, vrf_id); if (stcb != NULL) break; asconf_offset += SCTP_SIZE32(asconf_len); @@ -4306,7 +4542,8 @@ __attribute__((noinline)) } if (stcb == NULL) { /* no association, so it's out of the blue... */ - sctp_handle_ootb(m, iphlen, *offset, sh, inp, NULL, + sctp_handle_ootb(m, iphlen, *offset, src, dst, sh, inp, + use_mflowid, mflowid, vrf_id, port); *offset = length; if (locked_tcb) { @@ -4343,8 +4580,10 @@ __attribute__((noinline)) if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } - sctp_handle_ootb(m, iphlen, *offset, sh, inp, - NULL, vrf_id, port); + sctp_handle_ootb(m, iphlen, *offset, src, dst, + sh, inp, + use_mflowid, mflowid, + vrf_id, port); return (NULL); } } else { @@ -4367,7 +4606,6 @@ __attribute__((noinline)) * process all control chunks... */ if (((ch->chunk_type == SCTP_SELECTIVE_ACK) || - /* EY */ (ch->chunk_type == SCTP_NR_SELECTIVE_ACK) || (ch->chunk_type == SCTP_HEARTBEAT_REQUEST)) && (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED)) { @@ -4481,55 +4719,36 @@ process_control_chunks: } switch (ch->chunk_type) { case SCTP_INITIATION: - /* must be first and only chunk */ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_INIT\n"); - if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { - /* We are not interested anymore? */ - if ((stcb) && (stcb->asoc.total_output_queue_size)) { - /* - * collision case where we are - * sending to them too - */ - ; - } else { - if (locked_tcb) { - SCTP_TCB_UNLOCK(locked_tcb); - } - *offset = length; - return (NULL); - } - } - if ((chk_length > SCTP_LARGEST_INIT_ACCEPTED) || - (num_chunks > 1) || - (SCTP_BASE_SYSCTL(sctp_strict_init) && (length - *offset > (int)SCTP_SIZE32(chk_length)))) { + /* The INIT chunk must be the only chunk. */ + if ((num_chunks > 1) || + (length - *offset > (int)SCTP_SIZE32(chk_length))) { + sctp_abort_association(inp, stcb, m, iphlen, + src, dst, sh, NULL, + use_mflowid, mflowid, + vrf_id, port); *offset = length; - if (locked_tcb) { - SCTP_TCB_UNLOCK(locked_tcb); - } return (NULL); } - if ((stcb != NULL) && - (SCTP_GET_STATE(&stcb->asoc) == - SCTP_STATE_SHUTDOWN_ACK_SENT)) { - sctp_send_shutdown_ack(stcb, - stcb->asoc.primary_destination); + /* Honor our resource limit. */ + if (chk_length > SCTP_LARGEST_INIT_ACCEPTED) { + struct mbuf *op_err; + + op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC); + sctp_abort_association(inp, stcb, m, iphlen, + src, dst, sh, op_err, + use_mflowid, mflowid, + vrf_id, port); *offset = length; - sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED); - if (locked_tcb) { - SCTP_TCB_UNLOCK(locked_tcb); - } return (NULL); } - if (netp) { - sctp_handle_init(m, iphlen, *offset, sh, - (struct sctp_init_chunk *)ch, inp, - stcb, *netp, &abort_no_unlock, vrf_id, port); - } - if (abort_no_unlock) - return (NULL); - + sctp_handle_init(m, iphlen, *offset, src, dst, sh, + (struct sctp_init_chunk *)ch, inp, + stcb, &abort_no_unlock, + use_mflowid, mflowid, + vrf_id, port); *offset = length; - if (locked_tcb) { + if ((!abort_no_unlock) && (locked_tcb)) { SCTP_TCB_UNLOCK(locked_tcb); } return (NULL); @@ -4537,7 +4756,6 @@ process_control_chunks: case SCTP_PAD_CHUNK: break; case SCTP_INITIATION_ACK: - /* must be first and only chunk */ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_INIT-ACK\n"); if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { /* We are not interested anymore */ @@ -4550,7 +4768,7 @@ process_control_chunks: } *offset = length; if (stcb) { -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(inp); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); @@ -4559,15 +4777,16 @@ process_control_chunks: atomic_subtract_int(&stcb->asoc.refcnt, 1); #endif (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_27); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } return (NULL); } } + /* The INIT-ACK chunk must be the only chunk. */ if ((num_chunks > 1) || - (SCTP_BASE_SYSCTL(sctp_strict_init) && (length - *offset > (int)SCTP_SIZE32(chk_length)))) { + (length - *offset > (int)SCTP_SIZE32(chk_length))) { *offset = length; if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); @@ -4575,21 +4794,27 @@ process_control_chunks: return (NULL); } if ((netp) && (*netp)) { - ret = sctp_handle_init_ack(m, iphlen, *offset, sh, - (struct sctp_init_ack_chunk *)ch, stcb, *netp, &abort_no_unlock, vrf_id); + ret = sctp_handle_init_ack(m, iphlen, *offset, + src, dst, sh, + (struct sctp_init_ack_chunk *)ch, + stcb, *netp, + &abort_no_unlock, + use_mflowid, mflowid, + vrf_id); } else { ret = -1; } + *offset = length; + if (abort_no_unlock) { + return (NULL); + } /* * Special case, I must call the output routine to * get the cookie echoed */ - if (abort_no_unlock) - return (NULL); - - if ((stcb) && ret == 0) + if ((stcb != NULL) && (ret == 0)) { sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED); - *offset = length; + } if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } @@ -4603,7 +4828,6 @@ process_control_chunks: uint16_t num_seg, num_dup; uint8_t flags; int offset_seg, offset_dup; - int nonce_sum_flag; SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SACK\n"); SCTP_STAT_INCR(sctps_recvsacks); @@ -4625,7 +4849,6 @@ process_control_chunks: } sack = (struct sctp_sack_chunk *)ch; flags = ch->chunk_flags; - nonce_sum_flag = flags & SCTP_SACK_NONCE_SUM; cum_ack = ntohl(sack->sack.cum_tsn_ack); num_seg = ntohs(sack->sack.num_gap_ack_blks); num_dup = ntohs(sack->sack.num_dup_tsns); @@ -4643,8 +4866,7 @@ process_control_chunks: stcb->asoc.seen_a_sack_this_pkt = 1; if ((stcb->asoc.pr_sctp_cnt == 0) && (num_seg == 0) && - ((compare_with_wrap(cum_ack, stcb->asoc.last_acked_seq, MAX_TSN)) || - (cum_ack == stcb->asoc.last_acked_seq)) && + SCTP_TSN_GE(cum_ack, stcb->asoc.last_acked_seq) && (stcb->asoc.saw_sack_with_frags == 0) && (stcb->asoc.saw_sack_with_nr_frags == 0) && (!TAILQ_EMPTY(&stcb->asoc.sent_queue)) @@ -4658,14 +4880,12 @@ process_control_chunks: * with no missing segments to go * this way too. */ - sctp_express_handle_sack(stcb, cum_ack, a_rwnd, nonce_sum_flag, - &abort_now); + sctp_express_handle_sack(stcb, cum_ack, a_rwnd, &abort_now, ecne_seen); } else { if (netp && *netp) - sctp_handle_sack(m, offset_seg, offset_dup, - stcb, *netp, + sctp_handle_sack(m, offset_seg, offset_dup, stcb, num_seg, 0, num_dup, &abort_now, flags, - cum_ack, a_rwnd); + cum_ack, a_rwnd, ecne_seen); } if (abort_now) { /* ABORT signal from sack processing */ @@ -4691,7 +4911,6 @@ process_control_chunks: uint16_t num_seg, num_nr_seg, num_dup; uint8_t flags; int offset_seg, offset_dup; - int nonce_sum_flag; SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_NR_SACK\n"); SCTP_STAT_INCR(sctps_recvsacks); @@ -4717,8 +4936,6 @@ process_control_chunks: } nr_sack = (struct sctp_nr_sack_chunk *)ch; flags = ch->chunk_flags; - nonce_sum_flag = flags & SCTP_SACK_NONCE_SUM; - cum_ack = ntohl(nr_sack->nr_sack.cum_tsn_ack); num_seg = ntohs(nr_sack->nr_sack.num_gap_ack_blks); num_nr_seg = ntohs(nr_sack->nr_sack.num_nr_gap_ack_blks); @@ -4737,8 +4954,7 @@ process_control_chunks: stcb->asoc.seen_a_sack_this_pkt = 1; if ((stcb->asoc.pr_sctp_cnt == 0) && (num_seg == 0) && (num_nr_seg == 0) && - ((compare_with_wrap(cum_ack, stcb->asoc.last_acked_seq, MAX_TSN)) || - (cum_ack == stcb->asoc.last_acked_seq)) && + SCTP_TSN_GE(cum_ack, stcb->asoc.last_acked_seq) && (stcb->asoc.saw_sack_with_frags == 0) && (stcb->asoc.saw_sack_with_nr_frags == 0) && (!TAILQ_EMPTY(&stcb->asoc.sent_queue))) { @@ -4751,14 +4967,13 @@ process_control_chunks: * missing segments to go this way * too. */ - sctp_express_handle_sack(stcb, cum_ack, a_rwnd, nonce_sum_flag, - &abort_now); + sctp_express_handle_sack(stcb, cum_ack, a_rwnd, + &abort_now, ecne_seen); } else { if (netp && *netp) - sctp_handle_sack(m, offset_seg, offset_dup, - stcb, *netp, + sctp_handle_sack(m, offset_seg, offset_dup, stcb, num_seg, num_nr_seg, num_dup, &abort_now, flags, - cum_ack, a_rwnd); + cum_ack, a_rwnd, ecne_seen); } if (abort_now) { /* ABORT signal from sack processing */ @@ -4817,7 +5032,7 @@ process_control_chunks: break; case SCTP_ABORT_ASSOCIATION: SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ABORT, stcb %p\n", - stcb); + (void *)stcb); if ((stcb) && netp && *netp) sctp_handle_abort((struct sctp_abort_chunk *)ch, stcb, *netp); @@ -4826,7 +5041,7 @@ process_control_chunks: break; case SCTP_SHUTDOWN: SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN, stcb %p\n", - stcb); + (void *)stcb); if ((stcb == NULL) || (chk_length != sizeof(struct sctp_shutdown_chunk))) { *offset = length; if (locked_tcb) { @@ -4846,7 +5061,7 @@ process_control_chunks: } break; case SCTP_SHUTDOWN_ACK: - SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN-ACK, stcb %p\n", stcb); + SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN-ACK, stcb %p\n", (void *)stcb); if ((stcb) && (netp) && (*netp)) sctp_handle_shutdown_ack((struct sctp_shutdown_ack_chunk *)ch, stcb, *netp); *offset = length; @@ -4856,14 +5071,13 @@ process_control_chunks: case SCTP_OPERATION_ERROR: SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_OP-ERR\n"); if ((stcb) && netp && *netp && sctp_handle_error(ch, stcb, *netp) < 0) { - *offset = length; return (NULL); } break; case SCTP_COOKIE_ECHO: SCTPDBG(SCTP_DEBUG_INPUT3, - "SCTP_COOKIE-ECHO, stcb %p\n", stcb); + "SCTP_COOKIE-ECHO, stcb %p\n", (void *)stcb); if ((stcb) && (stcb->asoc.total_output_queue_size)) { ; } else { @@ -4888,23 +5102,13 @@ process_control_chunks: if ((stcb == NULL) && (inp->sctp_socket->so_qlen >= inp->sctp_socket->so_qlimit)) { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) && (SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit))) { - struct mbuf *oper; - struct sctp_paramhdr *phdr; - - oper = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), - 0, M_DONTWAIT, 1, MT_DATA); - if (oper) { - SCTP_BUF_LEN(oper) = - sizeof(struct sctp_paramhdr); - phdr = mtod(oper, - struct sctp_paramhdr *); - phdr->param_type = - htons(SCTP_CAUSE_OUT_OF_RESC); - phdr->param_length = - htons(sizeof(struct sctp_paramhdr)); - } - sctp_abort_association(inp, stcb, m, - iphlen, sh, oper, vrf_id, port); + struct mbuf *op_err; + + op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC); + sctp_abort_association(inp, stcb, m, iphlen, + src, dst, sh, op_err, + use_mflowid, mflowid, + vrf_id, port); } *offset = length; return (NULL); @@ -4929,13 +5133,17 @@ process_control_chunks: if (netp) { ret_buf = sctp_handle_cookie_echo(m, iphlen, - *offset, sh, + *offset, + src, dst, + sh, (struct sctp_cookie_echo_chunk *)ch, &inp, &stcb, netp, auth_skipped, auth_offset, auth_len, &locked_tcb, + use_mflowid, + mflowid, vrf_id, port); } else { @@ -4950,7 +5158,6 @@ process_control_chunks: } SCTPDBG(SCTP_DEBUG_INPUT3, "GAK, null buffer\n"); - auth_skipped = 0; *offset = length; return (NULL); } @@ -4967,16 +5174,12 @@ process_control_chunks: struct sctp_tmit_chunk *chk; chk = TAILQ_FIRST(&stcb->asoc.sent_queue); - if (chk) { - sctp_timer_start(SCTP_TIMER_TYPE_SEND, - stcb->sctp_ep, stcb, - chk->whoTo); - } + sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, chk->whoTo); } } break; case SCTP_COOKIE_ACK: - SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_COOKIE-ACK, stcb %p\n", stcb); + SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_COOKIE-ACK, stcb %p\n", (void *)stcb); if ((stcb == NULL) || chk_length != sizeof(struct sctp_cookie_ack_chunk)) { if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); @@ -4988,7 +5191,7 @@ process_control_chunks: if ((stcb) && (stcb->asoc.total_output_queue_size)) { ; } else if (stcb) { -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(inp); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); @@ -4997,7 +5200,7 @@ process_control_chunks: atomic_subtract_int(&stcb->asoc.refcnt, 1); #endif (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_27); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif *offset = length; @@ -5039,6 +5242,7 @@ process_control_chunks: stcb->asoc.overall_error_count = 0; sctp_handle_ecn_echo((struct sctp_ecne_chunk *)ch, stcb); + ecne_seen = 1; } break; case SCTP_ECN_CWR: @@ -5061,11 +5265,11 @@ process_control_chunks: __LINE__); } stcb->asoc.overall_error_count = 0; - sctp_handle_ecn_cwr((struct sctp_cwr_chunk *)ch, stcb); + sctp_handle_ecn_cwr((struct sctp_cwr_chunk *)ch, stcb, *netp); } break; case SCTP_SHUTDOWN_COMPLETE: - SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN-COMPLETE, stcb %p\n", stcb); + SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN-COMPLETE, stcb %p\n", (void *)stcb); /* must be first and only chunk */ if ((num_chunks > 1) || (length - *offset > (int)SCTP_SIZE32(chk_length))) { @@ -5094,7 +5298,7 @@ process_control_chunks: __LINE__); } stcb->asoc.overall_error_count = 0; - sctp_handle_asconf(m, *offset, + sctp_handle_asconf(m, *offset, src, (struct sctp_asconf_chunk *)ch, stcb, asconf_cnt == 0); asconf_cnt++; } @@ -5150,7 +5354,7 @@ process_control_chunks: *fwd_tsn_seen = 1; if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { /* We are not interested anymore */ -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(inp); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); @@ -5159,7 +5363,7 @@ process_control_chunks: atomic_subtract_int(&stcb->asoc.refcnt, 1); #endif (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_29); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif *offset = length; @@ -5193,23 +5397,6 @@ process_control_chunks: *offset = length; return (NULL); } - if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { - /* We are not interested anymore */ -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) - so = SCTP_INP_SO(inp); - atomic_add_int(&stcb->asoc.refcnt, 1); - SCTP_TCB_UNLOCK(stcb); - SCTP_SOCKET_LOCK(so, 1); - SCTP_TCB_LOCK(stcb); - atomic_subtract_int(&stcb->asoc.refcnt, 1); -#endif - (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_30); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) - SCTP_SOCKET_UNLOCK(so, 1); -#endif - *offset = length; - return (NULL); - } if (stcb->asoc.peer_supports_strreset == 0) { /* * hmm, peer should have announced this, but @@ -5218,7 +5405,7 @@ process_control_chunks: */ stcb->asoc.peer_supports_strreset = 1; } - if (sctp_handle_stream_reset(stcb, m, *offset, (struct sctp_stream_reset_out_req *)ch)) { + if (sctp_handle_stream_reset(stcb, m, *offset, ch)) { /* stop processing */ *offset = length; return (NULL); @@ -5305,23 +5492,24 @@ process_control_chunks: phd->param_type = htons(SCTP_CAUSE_UNRECOG_CHUNK); phd->param_length = htons(chk_length + sizeof(*phd)); SCTP_BUF_LEN(mm) = sizeof(*phd); - SCTP_BUF_NEXT(mm) = SCTP_M_COPYM(m, *offset, SCTP_SIZE32(chk_length), - M_DONTWAIT); + SCTP_BUF_NEXT(mm) = SCTP_M_COPYM(m, *offset, chk_length, M_DONTWAIT); if (SCTP_BUF_NEXT(mm)) { + if (sctp_pad_lastmbuf(SCTP_BUF_NEXT(mm), SCTP_SIZE32(chk_length) - chk_length, NULL)) { + sctp_m_freem(mm); + } else { #ifdef SCTP_MBUF_LOGGING - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { - struct mbuf *mat; + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { + struct mbuf *mat; - mat = SCTP_BUF_NEXT(mm); - while (mat) { - if (SCTP_BUF_IS_EXTENDED(mat)) { - sctp_log_mb(mat, SCTP_MBUF_ICOPY); + for (mat = SCTP_BUF_NEXT(mm); mat; mat = SCTP_BUF_NEXT(mat)) { + if (SCTP_BUF_IS_EXTENDED(mat)) { + sctp_log_mb(mat, SCTP_MBUF_ICOPY); + } } - mat = SCTP_BUF_NEXT(mat); } - } #endif - sctp_queue_op_err(stcb, mm); + sctp_queue_op_err(stcb, mm); + } } else { sctp_m_freem(mm); } @@ -5361,69 +5549,6 @@ next_chunk: } -/* - * Process the ECN bits we have something set so we must look to see if it is - * ECN(0) or ECN(1) or CE - */ -static void -sctp_process_ecn_marked_a(struct sctp_tcb *stcb, struct sctp_nets *net, - uint8_t ecn_bits) -{ - if ((ecn_bits & SCTP_CE_BITS) == SCTP_CE_BITS) { - ; - } else if ((ecn_bits & SCTP_ECT1_BIT) == SCTP_ECT1_BIT) { - /* - * we only add to the nonce sum for ECT1, ECT0 does not - * change the NS bit (that we have yet to find a way to send - * it yet). - */ - - /* ECN Nonce stuff */ - stcb->asoc.receiver_nonce_sum++; - stcb->asoc.receiver_nonce_sum &= SCTP_SACK_NONCE_SUM; - - /* - * Drag up the last_echo point if cumack is larger since we - * don't want the point falling way behind by more than - * 2^^31 and then having it be incorrect. - */ - if (compare_with_wrap(stcb->asoc.cumulative_tsn, - stcb->asoc.last_echo_tsn, MAX_TSN)) { - stcb->asoc.last_echo_tsn = stcb->asoc.cumulative_tsn; - } - } else if ((ecn_bits & SCTP_ECT0_BIT) == SCTP_ECT0_BIT) { - /* - * Drag up the last_echo point if cumack is larger since we - * don't want the point falling way behind by more than - * 2^^31 and then having it be incorrect. - */ - if (compare_with_wrap(stcb->asoc.cumulative_tsn, - stcb->asoc.last_echo_tsn, MAX_TSN)) { - stcb->asoc.last_echo_tsn = stcb->asoc.cumulative_tsn; - } - } -} - -static void -sctp_process_ecn_marked_b(struct sctp_tcb *stcb, struct sctp_nets *net, - uint32_t high_tsn, uint8_t ecn_bits) -{ - if ((ecn_bits & SCTP_CE_BITS) == SCTP_CE_BITS) { - /* - * we possibly must notify the sender that a congestion - * window reduction is in order. We do this by adding a ECNE - * chunk to the output chunk queue. The incoming CWR will - * remove this chunk. - */ - if (compare_with_wrap(high_tsn, stcb->asoc.last_echo_tsn, - MAX_TSN)) { - /* Yep, we need to add a ECNE */ - sctp_send_ecn_echo(stcb, net, high_tsn); - stcb->asoc.last_echo_tsn = high_tsn; - } - } -} - #ifdef INVARIANTS #ifdef __GNUC__ __attribute__((noinline)) @@ -5452,34 +5577,152 @@ __attribute__((noinline)) * common input chunk processing (v4 and v6) */ void -sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, - int length, struct sctphdr *sh, struct sctp_chunkhdr *ch, - struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets *net, - uint8_t ecn_bits, uint32_t vrf_id, uint16_t port) +sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int length, + struct sockaddr *src, struct sockaddr *dst, + struct sctphdr *sh, struct sctp_chunkhdr *ch, +#if !defined(SCTP_WITH_NO_CSUM) + uint8_t compute_crc, +#endif + uint8_t ecn_bits, + uint8_t use_mflowid, uint32_t mflowid, + uint32_t vrf_id, uint16_t port) { - /* - * Control chunk processing - */ uint32_t high_tsn; int fwd_tsn_seen = 0, data_processed = 0; struct mbuf *m = *mm; - int abort_flag = 0; int un_sent; + int cnt_ctrl_ready = 0; + struct sctp_inpcb *inp = NULL, *inp_decr = NULL; + struct sctp_tcb *stcb = NULL; + struct sctp_nets *net = NULL; SCTP_STAT_INCR(sctps_recvdatagrams); #ifdef SCTP_AUDITING_ENABLED sctp_audit_log(0xE0, 1); sctp_auditing(0, inp, stcb, net); #endif - +#if !defined(SCTP_WITH_NO_CSUM) + if (compute_crc != 0) { + uint32_t check, calc_check; + + check = sh->checksum; + sh->checksum = 0; + calc_check = sctp_calculate_cksum(m, iphlen); + sh->checksum = check; + if (calc_check != check) { + SCTPDBG(SCTP_DEBUG_INPUT1, "Bad CSUM on SCTP packet calc_check:%x check:%x m:%p mlen:%d iphlen:%d\n", + calc_check, check, (void *)m, length, iphlen); + stcb = sctp_findassociation_addr(m, offset, src, dst, + sh, ch, &inp, &net, vrf_id); + if ((net != NULL) && (port != 0)) { + if (net->port == 0) { + sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr)); + } + net->port = port; + } + if ((net != NULL) && (use_mflowid != 0)) { + net->flowid = mflowid; +#ifdef INVARIANTS + net->flowidset = 1; +#endif + } + if ((inp != NULL) && (stcb != NULL)) { + sctp_send_packet_dropped(stcb, net, m, length, iphlen, 1); + sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_INPUT_ERROR, SCTP_SO_NOT_LOCKED); + } else if ((inp != NULL) && (stcb == NULL)) { + inp_decr = inp; + } + SCTP_STAT_INCR(sctps_badsum); + SCTP_STAT_INCR_COUNTER32(sctps_checksumerrors); + goto out; + } + } +#endif + /* Destination port of 0 is illegal, based on RFC4960. */ + if (sh->dest_port == 0) { + SCTP_STAT_INCR(sctps_hdrops); + goto out; + } + stcb = sctp_findassociation_addr(m, offset, src, dst, + sh, ch, &inp, &net, vrf_id); + if ((net != NULL) && (port != 0)) { + if (net->port == 0) { + sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr)); + } + net->port = port; + } + if ((net != NULL) && (use_mflowid != 0)) { + net->flowid = mflowid; +#ifdef INVARIANTS + net->flowidset = 1; +#endif + } + if (inp == NULL) { + SCTP_STAT_INCR(sctps_noport); + if (badport_bandlim(BANDLIM_SCTP_OOTB) < 0) { + goto out; + } + if (ch->chunk_type == SCTP_SHUTDOWN_ACK) { + sctp_send_shutdown_complete2(src, dst, sh, + use_mflowid, mflowid, + vrf_id, port); + goto out; + } + if (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) { + goto out; + } + if (ch->chunk_type != SCTP_ABORT_ASSOCIATION) { + if ((SCTP_BASE_SYSCTL(sctp_blackhole) == 0) || + ((SCTP_BASE_SYSCTL(sctp_blackhole) == 1) && + (ch->chunk_type != SCTP_INIT))) { + sctp_send_abort(m, iphlen, src, dst, + sh, 0, NULL, + use_mflowid, mflowid, + vrf_id, port); + } + } + goto out; + } else if (stcb == NULL) { + inp_decr = inp; + } +#ifdef IPSEC + /*- + * I very much doubt any of the IPSEC stuff will work but I have no + * idea, so I will leave it in place. + */ + if (inp != NULL) { + switch (dst->sa_family) { +#ifdef INET + case AF_INET: + if (ipsec4_in_reject(m, &inp->ip_inp.inp)) { + MODULE_GLOBAL(ipsec4stat).in_polvio++; + SCTP_STAT_INCR(sctps_hdrops); + goto out; + } + break; +#endif +#ifdef INET6 + case AF_INET6: + if (ipsec6_in_reject(m, &inp->ip_inp.inp)) { + MODULE_GLOBAL(ipsec6stat).in_polvio++; + SCTP_STAT_INCR(sctps_hdrops); + goto out; + } + break; +#endif + default: + break; + } + } +#endif SCTPDBG(SCTP_DEBUG_INPUT1, "Ok, Common input processing called, m:%p iphlen:%d offset:%d length:%d stcb:%p\n", - m, iphlen, offset, length, stcb); + (void *)m, iphlen, offset, length, (void *)stcb); if (stcb) { /* always clear this before beginning a packet */ stcb->asoc.authenticated = 0; stcb->asoc.seen_a_sack_this_pkt = 0; SCTPDBG(SCTP_DEBUG_INPUT1, "stcb:%p state:%x\n", - stcb, stcb->asoc.state); + (void *)stcb, stcb->asoc.state); if ((stcb->asoc.state & SCTP_STATE_WAS_ABORTED) || (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED)) { @@ -5490,16 +5733,21 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, * NOT respond to any packet.. its OOTB. */ SCTP_TCB_UNLOCK(stcb); - sctp_handle_ootb(m, iphlen, offset, sh, inp, NULL, + stcb = NULL; + sctp_handle_ootb(m, iphlen, offset, src, dst, sh, inp, + use_mflowid, mflowid, vrf_id, port); - goto out_now; + goto out; } } if (IS_SCTP_CONTROL(ch)) { /* process the control portion of the SCTP packet */ /* sa_ignore NO_NULL_CHK */ - stcb = sctp_process_control(m, iphlen, &offset, length, sh, ch, - inp, stcb, &net, &fwd_tsn_seen, vrf_id, port); + stcb = sctp_process_control(m, iphlen, &offset, length, + src, dst, sh, ch, + inp, stcb, &net, &fwd_tsn_seen, + use_mflowid, mflowid, + vrf_id, port); if (stcb) { /* * This covers us if the cookie-echo was there and @@ -5508,7 +5756,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, inp = stcb->sctp_ep; if ((net) && (port)) { if (net->port == 0) { - sctp_pathmtu_adjustment(inp, stcb, net, net->mtu - sizeof(struct udphdr)); + sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr)); } net->port = port; } @@ -5529,20 +5777,19 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.local_auth_chunks)) { /* "silently" ignore */ SCTP_STAT_INCR(sctps_recvauthmissing); - SCTP_TCB_UNLOCK(stcb); - goto out_now; + goto out; } if (stcb == NULL) { /* out of the blue DATA chunk */ - sctp_handle_ootb(m, iphlen, offset, sh, inp, NULL, + sctp_handle_ootb(m, iphlen, offset, src, dst, sh, inp, + use_mflowid, mflowid, vrf_id, port); - goto out_now; + goto out; } if (stcb->asoc.my_vtag != ntohl(sh->v_tag)) { /* v_tag mismatch! */ SCTP_STAT_INCR(sctps_badvtag); - SCTP_TCB_UNLOCK(stcb); - goto out_now; + goto out; } } @@ -5552,7 +5799,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, * packet while processing control, or we're done with this * packet (done or skip rest of data), so we drop it... */ - goto out_now; + goto out; } /* * DATA chunk processing @@ -5603,10 +5850,10 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, /* * We consider OOTB any data sent during asoc setup. */ - sctp_handle_ootb(m, iphlen, offset, sh, inp, NULL, + sctp_handle_ootb(m, iphlen, offset, src, dst, sh, inp, + use_mflowid, mflowid, vrf_id, port); - SCTP_TCB_UNLOCK(stcb); - goto out_now; + goto out; /* sa_ignore NOTREACHED */ break; case SCTP_STATE_EMPTY: /* should not happen */ @@ -5614,59 +5861,52 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, case SCTP_STATE_SHUTDOWN_RECEIVED: /* This is a peer error */ case SCTP_STATE_SHUTDOWN_ACK_SENT: default: - SCTP_TCB_UNLOCK(stcb); - goto out_now; + goto out; /* sa_ignore NOTREACHED */ break; case SCTP_STATE_OPEN: case SCTP_STATE_SHUTDOWN_SENT: break; } - /* take care of ECN, part 1. */ - if (stcb->asoc.ecn_allowed && - (ecn_bits & (SCTP_ECT0_BIT | SCTP_ECT1_BIT))) { - sctp_process_ecn_marked_a(stcb, net, ecn_bits); - } /* plow through the data chunks while length > offset */ - retval = sctp_process_data(mm, iphlen, &offset, length, sh, - inp, stcb, net, &high_tsn); + retval = sctp_process_data(mm, iphlen, &offset, length, + src, dst, sh, + inp, stcb, net, &high_tsn, + use_mflowid, mflowid, + vrf_id, port); if (retval == 2) { /* * The association aborted, NO UNLOCK needed since * the association is destroyed. */ - goto out_now; + stcb = NULL; + goto out; } data_processed = 1; - if (retval == 0) { - /* take care of ecn part 2. */ - if (stcb->asoc.ecn_allowed && - (ecn_bits & (SCTP_ECT0_BIT | SCTP_ECT1_BIT))) { - sctp_process_ecn_marked_b(stcb, net, high_tsn, - ecn_bits); - } - } /* * Anything important needs to have been m_copy'ed in * process_data */ } + /* take care of ecn */ + if ((data_processed == 1) && + (stcb->asoc.ecn_allowed == 1) && + ((ecn_bits & SCTP_CE_BITS) == SCTP_CE_BITS)) { + /* Yep, we need to add a ECNE */ + sctp_send_ecn_echo(stcb, net, high_tsn); + } if ((data_processed == 0) && (fwd_tsn_seen)) { int was_a_gap; uint32_t highest_tsn; - if (compare_with_wrap(stcb->asoc.highest_tsn_inside_nr_map, stcb->asoc.highest_tsn_inside_map, MAX_TSN)) { + if (SCTP_TSN_GT(stcb->asoc.highest_tsn_inside_nr_map, stcb->asoc.highest_tsn_inside_map)) { highest_tsn = stcb->asoc.highest_tsn_inside_nr_map; } else { highest_tsn = stcb->asoc.highest_tsn_inside_map; } - was_a_gap = compare_with_wrap(highest_tsn, stcb->asoc.cumulative_tsn, MAX_TSN); + was_a_gap = SCTP_TSN_GT(highest_tsn, stcb->asoc.cumulative_tsn); stcb->asoc.send_sack = 1; - sctp_sack_check(stcb, was_a_gap, &abort_flag); - if (abort_flag) { - /* Again, we aborted so NO UNLOCK needed */ - goto out_now; - } + sctp_sack_check(stcb, was_a_gap); } else if (fwd_tsn_seen) { stcb->asoc.send_sack = 1; } @@ -5682,8 +5922,10 @@ trigger_send: TAILQ_EMPTY(&stcb->asoc.control_send_queue), stcb->asoc.total_flight); un_sent = (stcb->asoc.total_output_queue_size - stcb->asoc.total_flight); - - if (!TAILQ_EMPTY(&stcb->asoc.control_send_queue) || + if (!TAILQ_EMPTY(&stcb->asoc.control_send_queue)) { + cnt_ctrl_ready = stcb->asoc.ctrl_queue_cnt - stcb->asoc.ecn_echo_cnt_onq; + } + if (cnt_ctrl_ready || ((un_sent) && (stcb->asoc.peers_rwnd > 0 || (stcb->asoc.peers_rwnd <= 0 && stcb->asoc.total_flight == 0)))) { @@ -5695,10 +5937,20 @@ trigger_send: sctp_audit_log(0xE0, 3); sctp_auditing(2, inp, stcb, net); #endif - SCTP_TCB_UNLOCK(stcb); -out_now: +out: + if (stcb != NULL) { + SCTP_TCB_UNLOCK(stcb); + } + if (inp_decr != NULL) { + /* reduce ref-count */ + SCTP_INP_WLOCK(inp_decr); + SCTP_INP_DECR_REF(inp_decr); + SCTP_INP_WUNLOCK(inp_decr); + } #ifdef INVARIANTS - sctp_validate_no_locks(inp); + if (inp != NULL) { + sctp_validate_no_locks(inp); + } #endif return; } @@ -5708,258 +5960,181 @@ static void sctp_print_mbuf_chain(struct mbuf *m) { for (; m; m = SCTP_BUF_NEXT(m)) { - printf("%p: m_len = %ld\n", m, SCTP_BUF_LEN(m)); + SCTP_PRINTF("%p: m_len = %ld\n", (void *)m, SCTP_BUF_LEN(m)); if (SCTP_BUF_IS_EXTENDED(m)) - printf("%p: extend_size = %d\n", m, SCTP_BUF_EXTEND_SIZE(m)); + SCTP_PRINTF("%p: extend_size = %d\n", (void *)m, SCTP_BUF_EXTEND_SIZE(m)); } } #endif +#ifdef INET void sctp_input_with_port(struct mbuf *i_pak, int off, uint16_t port) { -#ifdef SCTP_MBUF_LOGGING - struct mbuf *mat; - -#endif struct mbuf *m; int iphlen; uint32_t vrf_id = 0; uint8_t ecn_bits; + struct sockaddr_in src, dst; struct ip *ip; struct sctphdr *sh; - struct sctp_inpcb *inp = NULL; - struct sctp_nets *net; - struct sctp_tcb *stcb = NULL; struct sctp_chunkhdr *ch; - int refcount_up = 0; - int length, mlen, offset; + int length, offset; #if !defined(SCTP_WITH_NO_CSUM) - uint32_t check, calc_check; + uint8_t compute_crc; #endif + uint32_t mflowid; + uint8_t use_mflowid; + iphlen = off; if (SCTP_GET_PKT_VRFID(i_pak, vrf_id)) { SCTP_RELEASE_PKT(i_pak); return; } - mlen = SCTP_HEADER_LEN(i_pak); - iphlen = off; m = SCTP_HEADER_TO_CHAIN(i_pak); - - net = NULL; - SCTP_STAT_INCR(sctps_recvpackets); - SCTP_STAT_INCR_COUNTER64(sctps_inpackets); - - #ifdef SCTP_MBUF_LOGGING /* Log in any input mbufs */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { - mat = m; - while (mat) { + struct mbuf *mat; + + for (mat = m; mat; mat = SCTP_BUF_NEXT(mat)) { if (SCTP_BUF_IS_EXTENDED(mat)) { sctp_log_mb(mat, SCTP_MBUF_INPUT); } - mat = SCTP_BUF_NEXT(mat); } } #endif -#ifdef SCTP_PACKET_LOGGING - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) - sctp_packet_log(m, mlen); +#ifdef SCTP_PACKET_LOGGING + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) { + sctp_packet_log(m); + } #endif - /* - * Must take out the iphlen, since mlen expects this (only effect lb - * case) - */ - mlen -= iphlen; - - /* - * Get IP, SCTP, and first chunk header together in first mbuf. - */ - ip = mtod(m, struct ip *); - offset = iphlen + sizeof(*sh) + sizeof(*ch); + SCTPDBG(SCTP_DEBUG_CRCOFFLOAD, + "sctp_input(): Packet of length %d received on %s with csum_flags 0x%x.\n", + m->m_pkthdr.len, + if_name(m->m_pkthdr.rcvif), + m->m_pkthdr.csum_flags); + if (m->m_flags & M_FLOWID) { + mflowid = m->m_pkthdr.flowid; + use_mflowid = 1; + } else { + mflowid = 0; + use_mflowid = 0; + } + SCTP_STAT_INCR(sctps_recvpackets); + SCTP_STAT_INCR_COUNTER64(sctps_inpackets); + /* Get IP, SCTP, and first chunk header together in the first mbuf. */ + offset = iphlen + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr); if (SCTP_BUF_LEN(m) < offset) { - if ((m = m_pullup(m, offset)) == 0) { + if ((m = m_pullup(m, offset)) == NULL) { SCTP_STAT_INCR(sctps_hdrops); return; } - ip = mtod(m, struct ip *); } - /* validate mbuf chain length with IP payload length */ - if (mlen < (SCTP_GET_IPV4_LENGTH(ip) - iphlen)) { + ip = mtod(m, struct ip *); + sh = (struct sctphdr *)((caddr_t)ip + iphlen); + ch = (struct sctp_chunkhdr *)((caddr_t)sh + sizeof(struct sctphdr)); + offset -= sizeof(struct sctp_chunkhdr); + memset(&src, 0, sizeof(struct sockaddr_in)); + src.sin_family = AF_INET; + src.sin_len = sizeof(struct sockaddr_in); + src.sin_port = sh->src_port; + src.sin_addr = ip->ip_src; + memset(&dst, 0, sizeof(struct sockaddr_in)); + dst.sin_family = AF_INET; + dst.sin_len = sizeof(struct sockaddr_in); + dst.sin_port = sh->dest_port; + dst.sin_addr = ip->ip_dst; + length = ip->ip_len + iphlen; + /* Validate mbuf chain length with IP payload length. */ + if (SCTP_HEADER_LEN(m) != length) { + SCTPDBG(SCTP_DEBUG_INPUT1, + "sctp_input() length:%d reported length:%d\n", length, SCTP_HEADER_LEN(m)); SCTP_STAT_INCR(sctps_hdrops); - goto bad; + goto out; } - sh = (struct sctphdr *)((caddr_t)ip + iphlen); - ch = (struct sctp_chunkhdr *)((caddr_t)sh + sizeof(*sh)); - SCTPDBG(SCTP_DEBUG_INPUT1, - "sctp_input() length:%d iphlen:%d\n", mlen, iphlen); - /* SCTP does not allow broadcasts or multicasts */ - if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { - goto bad; + if (IN_MULTICAST(ntohl(dst.sin_addr.s_addr))) { + goto out; } - if (SCTP_IS_IT_BROADCAST(ip->ip_dst, m)) { - /* - * We only look at broadcast if its a front state, All - * others we will not have a tcb for anyway. - */ - goto bad; + if (SCTP_IS_IT_BROADCAST(dst.sin_addr, m)) { + goto out; } - /* validate SCTP checksum */ - SCTPDBG(SCTP_DEBUG_CRCOFFLOAD, - "sctp_input(): Packet of length %d received on %s with csum_flags 0x%x.\n", - m->m_pkthdr.len, - if_name(m->m_pkthdr.rcvif), - m->m_pkthdr.csum_flags); + ecn_bits = ip->ip_tos; #if defined(SCTP_WITH_NO_CSUM) SCTP_STAT_INCR(sctps_recvnocrc); #else if (m->m_pkthdr.csum_flags & CSUM_SCTP_VALID) { SCTP_STAT_INCR(sctps_recvhwcrc); - goto sctp_skip_csum_4; - } - check = sh->checksum; /* save incoming checksum */ - sh->checksum = 0; /* prepare for calc */ - calc_check = sctp_calculate_cksum(m, iphlen); - sh->checksum = check; - SCTP_STAT_INCR(sctps_recvswcrc); - if (calc_check != check) { - SCTPDBG(SCTP_DEBUG_INPUT1, "Bad CSUM on SCTP packet calc_check:%x check:%x m:%p mlen:%d iphlen:%d\n", - calc_check, check, m, mlen, iphlen); - - stcb = sctp_findassociation_addr(m, iphlen, - offset - sizeof(*ch), - sh, ch, &inp, &net, - vrf_id); - if ((net) && (port)) { - if (net->port == 0) { - sctp_pathmtu_adjustment(inp, stcb, net, net->mtu - sizeof(struct udphdr)); - } - net->port = port; - } - if ((inp) && (stcb)) { - sctp_send_packet_dropped(stcb, net, m, iphlen, 1); - sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_INPUT_ERROR, SCTP_SO_NOT_LOCKED); - } else if ((inp != NULL) && (stcb == NULL)) { - refcount_up = 1; - } - SCTP_STAT_INCR(sctps_badsum); - SCTP_STAT_INCR_COUNTER32(sctps_checksumerrors); - goto bad; - } -sctp_skip_csum_4: -#endif - /* destination port of 0 is illegal, based on RFC2960. */ - if (sh->dest_port == 0) { - SCTP_STAT_INCR(sctps_hdrops); - goto bad; - } - /* - * Locate pcb and tcb for datagram sctp_findassociation_addr() wants - * IP/SCTP/first chunk header... - */ - stcb = sctp_findassociation_addr(m, iphlen, offset - sizeof(*ch), - sh, ch, &inp, &net, vrf_id); - if ((net) && (port)) { - if (net->port == 0) { - sctp_pathmtu_adjustment(inp, stcb, net, net->mtu - sizeof(struct udphdr)); - } - net->port = port; - } - /* inp's ref-count increased && stcb locked */ - if (inp == NULL) { - struct sctp_init_chunk *init_chk, chunk_buf; - - SCTP_STAT_INCR(sctps_noport); -#ifdef ICMP_BANDLIM - /* - * we use the bandwidth limiting to protect against sending - * too many ABORTS all at once. In this case these count the - * same as an ICMP message. - */ - if (badport_bandlim(0) < 0) - goto bad; -#endif /* ICMP_BANDLIM */ - SCTPDBG(SCTP_DEBUG_INPUT1, - "Sending a ABORT from packet entry!\n"); - if (ch->chunk_type == SCTP_INITIATION) { - /* - * we do a trick here to get the INIT tag, dig in - * and get the tag from the INIT and put it in the - * common header. - */ - init_chk = (struct sctp_init_chunk *)sctp_m_getptr(m, - iphlen + sizeof(*sh), sizeof(*init_chk), - (uint8_t *) & chunk_buf); - if (init_chk != NULL) - sh->v_tag = init_chk->init.initiate_tag; - } - if (ch->chunk_type == SCTP_SHUTDOWN_ACK) { - sctp_send_shutdown_complete2(m, iphlen, sh, vrf_id, port); - goto bad; - } - if (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) { - goto bad; - } - if (ch->chunk_type != SCTP_ABORT_ASSOCIATION) - sctp_send_abort(m, iphlen, sh, 0, NULL, vrf_id, port); - goto bad; - } else if (stcb == NULL) { - refcount_up = 1; - } -#ifdef IPSEC - /* - * I very much doubt any of the IPSEC stuff will work but I have no - * idea, so I will leave it in place. - */ - if (inp && ipsec4_in_reject(m, &inp->ip_inp.inp)) { - MODULE_GLOBAL(ipsec4stat).in_polvio++; - SCTP_STAT_INCR(sctps_hdrops); - goto bad; - } -#endif /* IPSEC */ - - /* - * common chunk processing - */ - length = ip->ip_len + iphlen; - offset -= sizeof(struct sctp_chunkhdr); - - ecn_bits = ip->ip_tos; - - /* sa_ignore NO_NULL_CHK */ - sctp_common_input_processing(&m, iphlen, offset, length, sh, ch, - inp, stcb, net, ecn_bits, vrf_id, port); - /* inp's ref-count reduced && stcb unlocked */ - if (m) { - sctp_m_freem(m); - } - if ((inp) && (refcount_up)) { - /* reduce ref-count */ - SCTP_INP_DECR_REF(inp); - } - return; -bad: - if (stcb) { - SCTP_TCB_UNLOCK(stcb); - } - if ((inp) && (refcount_up)) { - /* reduce ref-count */ - SCTP_INP_DECR_REF(inp); + compute_crc = 0; + } else { + SCTP_STAT_INCR(sctps_recvswcrc); + compute_crc = 1; } +#endif + sctp_common_input_processing(&m, iphlen, offset, length, + (struct sockaddr *)&src, + (struct sockaddr *)&dst, + sh, ch, +#if !defined(SCTP_WITH_NO_CSUM) + compute_crc, +#endif + ecn_bits, + use_mflowid, mflowid, + vrf_id, port); +out: if (m) { sctp_m_freem(m); } return; } + +#if defined(__FreeBSD__) && defined(SCTP_MCORE_INPUT) && defined(SMP) +extern int *sctp_cpuarry; + +#endif + void -sctp_input(i_pak, off) - struct mbuf *i_pak; - int off; +sctp_input(struct mbuf *m, int off) { - sctp_input_with_port(i_pak, off, 0); +#if defined(__FreeBSD__) && defined(SCTP_MCORE_INPUT) && defined(SMP) + struct ip *ip; + struct sctphdr *sh; + int offset; + int cpu_to_use; + uint32_t flowid, tag; + + if (mp_ncpus > 1) { + if (m->m_flags & M_FLOWID) { + flowid = m->m_pkthdr.flowid; + } else { + /* + * No flow id built by lower layers fix it so we + * create one. + */ + offset = off + sizeof(struct sctphdr); + if (SCTP_BUF_LEN(m) < offset) { + if ((m = m_pullup(m, offset)) == NULL) { + SCTP_STAT_INCR(sctps_hdrops); + return; + } + } + ip = mtod(m, struct ip *); + sh = (struct sctphdr *)((caddr_t)ip + off); + tag = htonl(sh->v_tag); + flowid = tag ^ ntohs(sh->dest_port) ^ ntohs(sh->src_port); + m->m_pkthdr.flowid = flowid; + m->m_flags |= M_FLOWID; + } + cpu_to_use = sctp_cpuarry[flowid % mp_ncpus]; + sctp_queue_to_mcore(m, off, cpu_to_use); + return; + } +#endif + sctp_input_with_port(m, off, 0); } + +#endif diff --git a/freebsd/sys/netinet/sctp_input.h b/freebsd/sys/netinet/sctp_input.h index 39f64207..95208032 100644 --- a/freebsd/sys/netinet/sctp_input.h +++ b/freebsd/sys/netinet/sctp_input.h @@ -1,15 +1,17 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -28,30 +30,34 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp_input.h,v 1.6 2005/03/06 16:04:17 itojun Exp $ */ - #include __FBSDID("$FreeBSD$"); -#ifndef __sctp_input_h__ -#define __sctp_input_h__ +#ifndef _NETINET_SCTP_INPUT_H_ +#define _NETINET_SCTP_INPUT_H_ #if defined(_KERNEL) || defined(__Userspace__) void sctp_common_input_processing(struct mbuf **, int, int, int, - struct sctphdr *, struct sctp_chunkhdr *, struct sctp_inpcb *, - struct sctp_tcb *, struct sctp_nets *, uint8_t, uint32_t, uint16_t); + struct sockaddr *, struct sockaddr *, + struct sctphdr *, struct sctp_chunkhdr *, +#if !defined(SCTP_WITH_NO_CSUM) + uint8_t, +#endif + uint8_t, + uint8_t, uint32_t, + uint32_t, uint16_t); struct sctp_stream_reset_out_request * sctp_find_stream_reset(struct sctp_tcb *stcb, uint32_t seq, struct sctp_tmit_chunk **bchk); void -sctp_reset_in_stream(struct sctp_tcb *stcb, int number_entries, +sctp_reset_in_stream(struct sctp_tcb *stcb, uint32_t number_entries, uint16_t * list); -int sctp_is_there_unsent_data(struct sctp_tcb *stcb); +int sctp_is_there_unsent_data(struct sctp_tcb *stcb, int so_locked); #endif #endif diff --git a/freebsd/sys/netinet/sctp_lock_bsd.h b/freebsd/sys/netinet/sctp_lock_bsd.h index dd9c2ce5..35cdf5f8 100644 --- a/freebsd/sys/netinet/sctp_lock_bsd.h +++ b/freebsd/sys/netinet/sctp_lock_bsd.h @@ -1,7 +1,7 @@ -#ifndef __sctp_lock_bsd_h__ -#define __sctp_lock_bsd_h__ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,6 +30,12 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ +#include +__FBSDID("$FreeBSD$"); + +#ifndef _NETINET_SCTP_LOCK_BSD_H_ +#define _NETINET_SCTP_LOCK_BSD_H_ + /* * General locking concepts: The goal of our locking is to of course provide * consistency and yet minimize overhead. We will attempt to use @@ -68,9 +74,6 @@ * SCTP_INP_INFO_RLOCK() and then when we want to add a new association to * the SCTP_BASE_INFO() list's we will do a SCTP_INP_INFO_WLOCK(). */ -#include -__FBSDID("$FreeBSD$"); - extern struct sctp_foo_stuff sctp_logoff[]; extern int sctp_logoff_stuff; @@ -97,6 +100,48 @@ extern int sctp_logoff_stuff; rw_rlock(&SCTP_BASE_INFO(ipi_ep_mtx)); \ } while (0) +#define SCTP_MCORE_QLOCK_INIT(cpstr) do { \ + mtx_init(&(cpstr)->que_mtx, \ + "sctp-mcore_queue","queue_lock", \ + MTX_DEF|MTX_DUPOK); \ +} while (0) + +#define SCTP_MCORE_QLOCK(cpstr) do { \ + mtx_lock(&(cpstr)->que_mtx); \ +} while (0) + +#define SCTP_MCORE_QUNLOCK(cpstr) do { \ + mtx_unlock(&(cpstr)->que_mtx); \ +} while (0) + +#define SCTP_MCORE_QDESTROY(cpstr) do { \ + if(mtx_owned(&(cpstr)->core_mtx)) { \ + mtx_unlock(&(cpstr)->que_mtx); \ + } \ + mtx_destroy(&(cpstr)->que_mtx); \ +} while (0) + + +#define SCTP_MCORE_LOCK_INIT(cpstr) do { \ + mtx_init(&(cpstr)->core_mtx, \ + "sctp-cpulck","cpu_proc_lock", \ + MTX_DEF|MTX_DUPOK); \ +} while (0) + +#define SCTP_MCORE_LOCK(cpstr) do { \ + mtx_lock(&(cpstr)->core_mtx); \ +} while (0) + +#define SCTP_MCORE_UNLOCK(cpstr) do { \ + mtx_unlock(&(cpstr)->core_mtx); \ +} while (0) + +#define SCTP_MCORE_DESTROY(cpstr) do { \ + if(mtx_owned(&(cpstr)->core_mtx)) { \ + mtx_unlock(&(cpstr)->core_mtx); \ + } \ + mtx_destroy(&(cpstr)->core_mtx); \ +} while (0) #define SCTP_INP_INFO_WLOCK() do { \ rw_wlock(&SCTP_BASE_INFO(ipi_ep_mtx)); \ diff --git a/freebsd/sys/netinet/sctp_os.h b/freebsd/sys/netinet/sctp_os.h index fb7c5360..dc6049b6 100644 --- a/freebsd/sys/netinet/sctp_os.h +++ b/freebsd/sys/netinet/sctp_os.h @@ -1,15 +1,17 @@ /*- * Copyright (c) 2006-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -27,10 +29,12 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ + #include __FBSDID("$FreeBSD$"); -#ifndef __sctp_os_h__ -#define __sctp_os_h__ + +#ifndef _NETINET_SCTP_OS_H_ +#define _NETINET_SCTP_OS_H_ /* * General kernel memory allocation: diff --git a/freebsd/sys/netinet/sctp_os_bsd.h b/freebsd/sys/netinet/sctp_os_bsd.h index 991daca2..ca4be706 100644 --- a/freebsd/sys/netinet/sctp_os_bsd.h +++ b/freebsd/sys/netinet/sctp_os_bsd.h @@ -1,5 +1,7 @@ /*- * Copyright (c) 2006-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,10 +29,12 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ + #include __FBSDID("$FreeBSD$"); -#ifndef __sctp_os_bsd_h__ -#define __sctp_os_bsd_h__ + +#ifndef _NETINET_SCTP_OS_BSD_H_ +#define _NETINET_SCTP_OS_BSD_H_ /* * includes */ @@ -123,6 +127,7 @@ MALLOC_DECLARE(SCTP_M_TIMW); MALLOC_DECLARE(SCTP_M_MVRF); MALLOC_DECLARE(SCTP_M_ITER); MALLOC_DECLARE(SCTP_M_SOCKOPT); +MALLOC_DECLARE(SCTP_M_MCORE); #if defined(SCTP_LOCAL_TRACE_BUF) @@ -154,12 +159,13 @@ MALLOC_DECLARE(SCTP_M_SOCKOPT); */ #define USER_ADDR_NULL (NULL) /* FIX ME: temp */ +#define SCTP_PRINTF(params...) printf(params) #if defined(SCTP_DEBUG) #define SCTPDBG(level, params...) \ { \ do { \ if (SCTP_BASE_SYSCTL(sctp_debug_on) & level ) { \ - printf(params); \ + SCTP_PRINTF(params); \ } \ } while (0); \ } @@ -171,20 +177,10 @@ MALLOC_DECLARE(SCTP_M_SOCKOPT); } \ } while (0); \ } -#define SCTPDBG_PKT(level, iph, sh) \ -{ \ - do { \ - if (SCTP_BASE_SYSCTL(sctp_debug_on) & level) { \ - sctp_print_address_pkt(iph, sh); \ - } \ - } while (0); \ -} #else #define SCTPDBG(level, params...) #define SCTPDBG_ADDR(level, addr) -#define SCTPDBG_PKT(level, iph, sh) #endif -#define SCTP_PRINTF(params...) printf(params) #ifdef SCTP_LTRACE_CHUNKS #define SCTP_LTRACE_CHK(a, b, c, d) if(SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_CHUNK_ENABLE) SCTP_CTR6(KTR_SUBSYS, "SCTP:%d[%d]:%x-%x-%x-%x", SCTP_LOG_CHUNK_PROC, 0, a, b, c, d) @@ -193,12 +189,14 @@ MALLOC_DECLARE(SCTP_M_SOCKOPT); #endif #ifdef SCTP_LTRACE_ERRORS -#define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err) if(SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_ERROR_ENABLE) \ - printf("mbuf:%p inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \ - m, inp, stcb, net, file, __LINE__, err); -#define SCTP_LTRACE_ERR_RET(inp, stcb, net, file, err) if(SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_ERROR_ENABLE) \ - printf("inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \ - inp, stcb, net, file, __LINE__, err); +#define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err) \ + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_ERROR_ENABLE) \ + SCTP_PRINTF("mbuf:%p inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \ + m, inp, stcb, net, file, __LINE__, err); +#define SCTP_LTRACE_ERR_RET(inp, stcb, net, file, err) \ + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_ERROR_ENABLE) \ + SCTP_PRINTF("inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \ + inp, stcb, net, file, __LINE__, err); #else #define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err) #define SCTP_LTRACE_ERR_RET(inp, stcb, net, file, err) @@ -358,7 +356,7 @@ typedef struct callout sctp_os_timer_t; */ #define SCTP_HEADER_TO_CHAIN(m) (m) #define SCTP_DETACH_HEADER_FROM_CHAIN(m) -#define SCTP_HEADER_LEN(m) (m->m_pkthdr.len) +#define SCTP_HEADER_LEN(m) ((m)->m_pkthdr.len) #define SCTP_GET_HEADER_FOR_OUTPUT(o_pak) 0 #define SCTP_RELEASE_HEADER(m) #define SCTP_RELEASE_PKT(m) sctp_m_freem(m) @@ -387,10 +385,6 @@ typedef struct callout sctp_os_timer_t; * its a NOP. */ -/* Macro's for getting length from V6/V4 header */ -#define SCTP_GET_IPV4_LENGTH(iph) (iph->ip_len) -#define SCTP_GET_IPV6_LENGTH(ip6) (ntohs(ip6->ip6_plen)) - /* get the v6 hop limit */ #define SCTP_GET_HLIM(inp, ro) in6_selecthlim((struct in6pcb *)&inp->ip_inp.inp, (ro ? (ro->ro_rt ? (ro->ro_rt->rt_ifp) : (NULL)) : (NULL))); @@ -421,6 +415,12 @@ typedef struct callout sctp_os_timer_t; typedef struct route sctp_route_t; typedef struct rtentry sctp_rtentry_t; +/* + * XXX multi-FIB support was backed out in r179783 and it seems clear that the + * VRF support as currently in FreeBSD is not ready to support multi-FIB. + * It might be best to implement multi-FIB support for both v4 and v6 indepedent + * of VRFs and leave those to a real MPLS stack. + */ #define SCTP_RTALLOC(ro, vrf_id) rtalloc_ign((struct route *)ro, 0UL) /* Future zero copy wakeup/send function */ diff --git a/freebsd/sys/netinet/sctp_output.c b/freebsd/sys/netinet/sctp_output.c index 648a87a2..30c6f3c0 100644 --- a/freebsd/sys/netinet/sctp_output.c +++ b/freebsd/sys/netinet/sctp_output.c @@ -2,16 +2,18 @@ /*- * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -30,8 +32,6 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp_output.c,v 1.46 2005/03/06 16:04:17 itojun Exp $ */ - #include __FBSDID("$FreeBSD$"); @@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include @@ -1864,15 +1865,10 @@ struct sack_track sack_array[256] = { int sctp_is_address_in_scope(struct sctp_ifa *ifa, - int ipv4_addr_legal, - int ipv6_addr_legal, - int loopback_scope, - int ipv4_local_scope, - int local_scope, - int site_scope, + struct sctp_scoping *scope, int do_update) { - if ((loopback_scope == 0) && + if ((scope->loopback_scope == 0) && (ifa->ifn_p) && SCTP_IFN_IS_IFT_LOOP(ifa->ifn_p)) { /* * skip loopback if not in scope * @@ -1880,8 +1876,9 @@ sctp_is_address_in_scope(struct sctp_ifa *ifa, return (0); } switch (ifa->address.sa.sa_family) { +#ifdef INET case AF_INET: - if (ipv4_addr_legal) { + if (scope->ipv4_addr_legal) { struct sockaddr_in *sin; sin = (struct sockaddr_in *)&ifa->address.sin; @@ -1889,7 +1886,7 @@ sctp_is_address_in_scope(struct sctp_ifa *ifa, /* not in scope , unspecified */ return (0); } - if ((ipv4_local_scope == 0) && + if ((scope->ipv4_local_scope == 0) && (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) { /* private address not in scope */ return (0); @@ -1898,9 +1895,10 @@ sctp_is_address_in_scope(struct sctp_ifa *ifa, return (0); } break; +#endif #ifdef INET6 case AF_INET6: - if (ipv6_addr_legal) { + if (scope->ipv6_addr_legal) { struct sockaddr_in6 *sin6; /* @@ -1923,7 +1921,7 @@ sctp_is_address_in_scope(struct sctp_ifa *ifa, (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))) { return (0); } - if ((site_scope == 0) && + if ((scope->site_scope == 0) && (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) { return (0); } @@ -1939,21 +1937,27 @@ sctp_is_address_in_scope(struct sctp_ifa *ifa, } static struct mbuf * -sctp_add_addr_to_mbuf(struct mbuf *m, struct sctp_ifa *ifa) +sctp_add_addr_to_mbuf(struct mbuf *m, struct sctp_ifa *ifa, uint16_t * len) { struct sctp_paramhdr *parmh; struct mbuf *mret; - int len; + uint16_t plen; - if (ifa->address.sa.sa_family == AF_INET) { - len = sizeof(struct sctp_ipv4addr_param); - } else if (ifa->address.sa.sa_family == AF_INET6) { - len = sizeof(struct sctp_ipv6addr_param); - } else { - /* unknown type */ + switch (ifa->address.sa.sa_family) { +#ifdef INET + case AF_INET: + plen = (uint16_t) sizeof(struct sctp_ipv4addr_param); + break; +#endif +#ifdef INET6 + case AF_INET6: + plen = (uint16_t) sizeof(struct sctp_ipv6addr_param); + break; +#endif + default: return (m); } - if (M_TRAILINGSPACE(m) >= len) { + if (M_TRAILINGSPACE(m) >= plen) { /* easy side we just drop it on the end */ parmh = (struct sctp_paramhdr *)(SCTP_BUF_AT(m, SCTP_BUF_LEN(m))); mret = m; @@ -1963,7 +1967,7 @@ sctp_add_addr_to_mbuf(struct mbuf *m, struct sctp_ifa *ifa) while (SCTP_BUF_NEXT(mret) != NULL) { mret = SCTP_BUF_NEXT(mret); } - SCTP_BUF_NEXT(mret) = sctp_get_mbuf_for_msg(len, 0, M_DONTWAIT, 1, MT_DATA); + SCTP_BUF_NEXT(mret) = sctp_get_mbuf_for_msg(plen, 0, M_NOWAIT, 1, MT_DATA); if (SCTP_BUF_NEXT(mret) == NULL) { /* We are hosed, can't add more addresses */ return (m); @@ -1973,6 +1977,7 @@ sctp_add_addr_to_mbuf(struct mbuf *m, struct sctp_ifa *ifa) } /* now add the parameter */ switch (ifa->address.sa.sa_family) { +#ifdef INET case AF_INET: { struct sctp_ipv4addr_param *ipv4p; @@ -1981,11 +1986,12 @@ sctp_add_addr_to_mbuf(struct mbuf *m, struct sctp_ifa *ifa) sin = (struct sockaddr_in *)&ifa->address.sin; ipv4p = (struct sctp_ipv4addr_param *)parmh; parmh->param_type = htons(SCTP_IPV4_ADDRESS); - parmh->param_length = htons(len); + parmh->param_length = htons(plen); ipv4p->addr = sin->sin_addr.s_addr; - SCTP_BUF_LEN(mret) += len; + SCTP_BUF_LEN(mret) += plen; break; } +#endif #ifdef INET6 case AF_INET6: { @@ -1995,25 +2001,30 @@ sctp_add_addr_to_mbuf(struct mbuf *m, struct sctp_ifa *ifa) sin6 = (struct sockaddr_in6 *)&ifa->address.sin6; ipv6p = (struct sctp_ipv6addr_param *)parmh; parmh->param_type = htons(SCTP_IPV6_ADDRESS); - parmh->param_length = htons(len); + parmh->param_length = htons(plen); memcpy(ipv6p->addr, &sin6->sin6_addr, sizeof(ipv6p->addr)); /* clear embedded scope in the address */ in6_clearscope((struct in6_addr *)ipv6p->addr); - SCTP_BUF_LEN(mret) += len; + SCTP_BUF_LEN(mret) += plen; break; } #endif default: return (m); } + if (len != NULL) { + *len += plen; + } return (mret); } struct mbuf * -sctp_add_addresses_to_i_ia(struct sctp_inpcb *inp, struct sctp_scoping *scope, - struct mbuf *m_at, int cnt_inits_to) +sctp_add_addresses_to_i_ia(struct sctp_inpcb *inp, struct sctp_tcb *stcb, + struct sctp_scoping *scope, + struct mbuf *m_at, int cnt_inits_to, + uint16_t * padding_len, uint16_t * chunk_len) { struct sctp_vrf *vrf = NULL; int cnt, limit_out = 0, total_count; @@ -2046,13 +2057,10 @@ sctp_add_addresses_to_i_ia(struct sctp_inpcb *inp, struct sctp_scoping *scope, continue; } LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) { - if (sctp_is_address_in_scope(sctp_ifap, - scope->ipv4_addr_legal, - scope->ipv6_addr_legal, - scope->loopback_scope, - scope->ipv4_local_scope, - scope->local_scope, - scope->site_scope, 1) == 0) { + if (sctp_is_addr_restricted(stcb, sctp_ifap)) { + continue; + } + if (sctp_is_address_in_scope(sctp_ifap, scope, 1) == 0) { continue; } cnt++; @@ -2078,16 +2086,22 @@ skip_count: continue; } LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) { + if (sctp_is_addr_restricted(stcb, sctp_ifap)) { + continue; + } if (sctp_is_address_in_scope(sctp_ifap, - scope->ipv4_addr_legal, - scope->ipv6_addr_legal, - scope->loopback_scope, - scope->ipv4_local_scope, - scope->local_scope, - scope->site_scope, 0) == 0) { + scope, 0) == 0) { continue; } - m_at = sctp_add_addr_to_mbuf(m_at, sctp_ifap); + if ((chunk_len != NULL) && + (padding_len != NULL) && + (*padding_len > 0)) { + memset(mtod(m_at, caddr_t)+*chunk_len, 0, *padding_len); + SCTP_BUF_LEN(m_at) += *padding_len; + *chunk_len += *padding_len; + *padding_len = 0; + } + m_at = sctp_add_addr_to_mbuf(m_at, sctp_ifap, chunk_len); if (limit_out) { cnt++; total_count++; @@ -2129,43 +2143,38 @@ skip_count: continue; } if (sctp_is_address_in_scope(laddr->ifa, - scope->ipv4_addr_legal, - scope->ipv6_addr_legal, - scope->loopback_scope, - scope->ipv4_local_scope, - scope->local_scope, - scope->site_scope, 1) == 0) { + scope, 1) == 0) { continue; } cnt++; } - if (cnt > SCTP_ADDRESS_LIMIT) { - limit_out = 1; - } /* * To get through a NAT we only list addresses if we have * more than one. That way if you just bind a single address * we let the source of the init dictate our address. */ if (cnt > 1) { + cnt = cnt_inits_to; LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { - cnt = 0; if (laddr->ifa == NULL) { continue; } - if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) + if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) { continue; - + } if (sctp_is_address_in_scope(laddr->ifa, - scope->ipv4_addr_legal, - scope->ipv6_addr_legal, - scope->loopback_scope, - scope->ipv4_local_scope, - scope->local_scope, - scope->site_scope, 0) == 0) { + scope, 0) == 0) { continue; } - m_at = sctp_add_addr_to_mbuf(m_at, laddr->ifa); + if ((chunk_len != NULL) && + (padding_len != NULL) && + (*padding_len > 0)) { + memset(mtod(m_at, caddr_t)+*chunk_len, 0, *padding_len); + SCTP_BUF_LEN(m_at) += *padding_len; + *chunk_len += *padding_len; + *padding_len = 0; + } + m_at = sctp_add_addr_to_mbuf(m_at, laddr->ifa, chunk_len); cnt++; if (cnt >= SCTP_ADDRESS_LIMIT) { break; @@ -2193,26 +2202,26 @@ sctp_is_ifa_addr_preferred(struct sctp_ifa *ifa, * means it is the same scope or higher scope then the destination. * L = loopback, P = private, G = global * ----------------------------------------- - * src | dest | result - * ---------------------------------------- - * L | L | yes - * ----------------------------------------- - * P | L | yes-v4 no-v6 - * ----------------------------------------- - * G | L | yes-v4 no-v6 - * ----------------------------------------- - * L | P | no - * ----------------------------------------- - * P | P | yes - * ----------------------------------------- - * G | P | no - * ----------------------------------------- - * L | G | no - * ----------------------------------------- - * P | G | no - * ----------------------------------------- - * G | G | yes - * ----------------------------------------- + * src | dest | result + * ---------------------------------------- + * L | L | yes + * ----------------------------------------- + * P | L | yes-v4 no-v6 + * ----------------------------------------- + * G | L | yes-v4 no-v6 + * ----------------------------------------- + * L | P | no + * ----------------------------------------- + * P | P | yes + * ----------------------------------------- + * G | P | no + * ----------------------------------------- + * L | G | no + * ----------------------------------------- + * P | G | no + * ----------------------------------------- + * G | G | yes + * ----------------------------------------- */ if (ifa->address.sa.sa_family != fam) { @@ -2225,6 +2234,7 @@ sctp_is_ifa_addr_preferred(struct sctp_ifa *ifa, SCTPDBG(SCTP_DEBUG_OUTPUT2, "Is destination preferred:"); SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &ifa->address.sa); /* Ok the address may be ok */ +#ifdef INET6 if (fam == AF_INET6) { /* ok to use deprecated addresses? no lets not! */ if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { @@ -2244,6 +2254,7 @@ sctp_is_ifa_addr_preferred(struct sctp_ifa *ifa, } } } +#endif /* * Now that we know what is what, implement or table this could in * theory be done slicker (it used to be), but this is @@ -2283,35 +2294,50 @@ sctp_is_ifa_addr_acceptable(struct sctp_ifa *ifa, { uint8_t dest_is_global = 0; - /* + /** * Here we determine if its a acceptable address. A acceptable * address means it is the same scope or higher scope but we can * allow for NAT which means its ok to have a global dest and a * private src. * * L = loopback, P = private, G = global - * ----------------------------------------- src | dest | result - * ----------------------------------------- L | L | yes - * ----------------------------------------- P | L | - * yes-v4 no-v6 ----------------------------------------- G | - * L | yes ----------------------------------------- L | - * P | no ----------------------------------------- P | P - * | yes ----------------------------------------- G | P - * | yes - May not work ----------------------------------------- - * L | G | no ----------------------------------------- P - * | G | yes - May not work - * ----------------------------------------- G | G | yes + * ----------------------------------------- + * src | dest | result + * ----------------------------------------- + * L | L | yes + * ----------------------------------------- + * P | L | yes-v4 no-v6 + * ----------------------------------------- + * G | L | yes + * ----------------------------------------- + * L | P | no + * ----------------------------------------- + * P | P | yes + * ----------------------------------------- + * G | P | yes - May not work + * ----------------------------------------- + * L | G | no + * ----------------------------------------- + * P | G | yes - May not work + * ----------------------------------------- + * G | G | yes * ----------------------------------------- */ if (ifa->address.sa.sa_family != fam) { /* forget non matching family */ + SCTPDBG(SCTP_DEBUG_OUTPUT3, "ifa_fam:%d fam:%d\n", + ifa->address.sa.sa_family, fam); return (NULL); } /* Ok the address may be ok */ + SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT3, &ifa->address.sa); + SCTPDBG(SCTP_DEBUG_OUTPUT3, "dst_is_loop:%d dest_is_priv:%d\n", + dest_is_loop, dest_is_priv); if ((dest_is_loop == 0) && (dest_is_priv == 0)) { dest_is_global = 1; } +#ifdef INET6 if (fam == AF_INET6) { /* ok to use deprecated addresses? */ if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { @@ -2323,17 +2349,25 @@ sctp_is_ifa_addr_acceptable(struct sctp_ifa *ifa, return (NULL); } } +#endif /* * Now that we know what is what, implement our table. This could in * theory be done slicker (it used to be), but this is * straightforward and easier to validate :-) */ + SCTPDBG(SCTP_DEBUG_OUTPUT3, "ifa->src_is_loop:%d dest_is_priv:%d\n", + ifa->src_is_loop, + dest_is_priv); if ((ifa->src_is_loop == 1) && (dest_is_priv)) { return (NULL); } + SCTPDBG(SCTP_DEBUG_OUTPUT3, "ifa->src_is_loop:%d dest_is_glob:%d\n", + ifa->src_is_loop, + dest_is_global); if ((ifa->src_is_loop == 1) && (dest_is_global)) { return (NULL); } + SCTPDBG(SCTP_DEBUG_OUTPUT3, "address is acceptable\n"); /* its an acceptable address */ return (ifa); } @@ -2504,7 +2538,6 @@ once_again_too: static struct sctp_ifa * sctp_choose_boundspecific_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb, - struct sctp_nets *net, sctp_route_t * ro, uint32_t vrf_id, uint8_t dest_is_priv, @@ -2734,6 +2767,7 @@ sctp_select_nth_preferred_addr_from_ifn_boundall(struct sctp_ifn *ifn, } } #endif +#ifdef INET /* Avoid topologically incorrect IPv4 address */ if (stcb && fam == AF_INET && sctp_is_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_BASE)) { @@ -2741,14 +2775,9 @@ sctp_select_nth_preferred_addr_from_ifn_boundall(struct sctp_ifn *ifn, continue; } } +#endif if (stcb) { - if (sctp_is_address_in_scope(ifa, - stcb->asoc.ipv4_addr_legal, - stcb->asoc.ipv6_addr_legal, - stcb->asoc.loopback_scope, - stcb->asoc.ipv4_local_scope, - stcb->asoc.local_scope, - stcb->asoc.site_scope, 0) == 0) { + if (sctp_is_address_in_scope(ifa, &stcb->asoc.scope, 0) == 0) { continue; } if (((non_asoc_addr_ok == 0) && @@ -2794,13 +2823,7 @@ sctp_count_num_preferred_boundall(struct sctp_ifn *ifn, continue; } if (stcb) { - if (sctp_is_address_in_scope(ifa, - stcb->asoc.ipv4_addr_legal, - stcb->asoc.ipv6_addr_legal, - stcb->asoc.loopback_scope, - stcb->asoc.ipv4_local_scope, - stcb->asoc.local_scope, - stcb->asoc.site_scope, 0) == 0) { + if (sctp_is_address_in_scope(ifa, &stcb->asoc.scope, 0) == 0) { continue; } if (((non_asoc_addr_ok == 0) && @@ -2821,8 +2844,7 @@ sctp_count_num_preferred_boundall(struct sctp_ifn *ifn, } static struct sctp_ifa * -sctp_choose_boundall(struct sctp_inpcb *inp, - struct sctp_tcb *stcb, +sctp_choose_boundall(struct sctp_tcb *stcb, struct sctp_nets *net, sctp_route_t * ro, uint32_t vrf_id, @@ -2838,6 +2860,11 @@ sctp_choose_boundall(struct sctp_inpcb *inp, uint32_t ifn_index; struct sctp_vrf *vrf; +#ifdef INET + int retried = 0; + +#endif + /*- * For boundall we can use any address in the association. * If non_asoc_addr_ok is set we can use any address (at least in @@ -2858,6 +2885,7 @@ sctp_choose_boundall(struct sctp_inpcb *inp, ifn = SCTP_GET_IFN_VOID_FROM_ROUTE(ro); ifn_index = SCTP_GET_IF_INDEX_FROM_ROUTE(ro); + SCTPDBG(SCTP_DEBUG_OUTPUT2, "ifn from route:%p ifn_index:%d\n", ifn, ifn_index); emit_ifn = looked_at = sctp_ifn = sctp_find_ifn(ifn, ifn_index); if (sctp_ifn == NULL) { /* ?? We don't have this guy ?? */ @@ -2941,7 +2969,7 @@ bound_all_plan_b: } SCTPDBG(SCTP_DEBUG_OUTPUT2, "num preferred:%d on interface:%p cur_addr_num:%d\n", - num_preferred, sctp_ifn, cur_addr_num); + num_preferred, (void *)sctp_ifn, cur_addr_num); /* * Ok we have num_eligible_addr set with how many we can @@ -2966,30 +2994,34 @@ bound_all_plan_b: } atomic_add_int(&sifa->refcount, 1); return (sifa); - } - +#ifdef INET +again_with_private_addresses_allowed: +#endif /* plan_c: do we have an acceptable address on the emit interface */ + sifa = NULL; SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan C: find acceptable on interface\n"); if (emit_ifn == NULL) { + SCTPDBG(SCTP_DEBUG_OUTPUT2, "Jump to Plan D - no emit_ifn\n"); goto plan_d; } LIST_FOREACH(sctp_ifa, &emit_ifn->ifalist, next_ifa) { + SCTPDBG(SCTP_DEBUG_OUTPUT2, "ifa:%p\n", (void *)sctp_ifa); if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && - (non_asoc_addr_ok == 0)) + (non_asoc_addr_ok == 0)) { + SCTPDBG(SCTP_DEBUG_OUTPUT2, "Defer\n"); continue; + } sifa = sctp_is_ifa_addr_acceptable(sctp_ifa, dest_is_loop, dest_is_priv, fam); - if (sifa == NULL) + if (sifa == NULL) { + SCTPDBG(SCTP_DEBUG_OUTPUT2, "IFA not acceptable\n"); continue; + } if (stcb) { - if (sctp_is_address_in_scope(sifa, - stcb->asoc.ipv4_addr_legal, - stcb->asoc.ipv6_addr_legal, - stcb->asoc.loopback_scope, - stcb->asoc.ipv4_local_scope, - stcb->asoc.local_scope, - stcb->asoc.site_scope, 0) == 0) { + if (sctp_is_address_in_scope(sifa, &stcb->asoc.scope, 0) == 0) { + SCTPDBG(SCTP_DEBUG_OUTPUT2, "NOT in scope\n"); + sifa = NULL; continue; } if (((non_asoc_addr_ok == 0) && @@ -3001,11 +3033,15 @@ bound_all_plan_b: * It is restricted for some reason.. * probably not yet added. */ + SCTPDBG(SCTP_DEBUG_OUTPUT2, "Its resticted\n"); + sifa = NULL; continue; } + } else { + SCTP_PRINTF("Stcb is null - no print\n"); } atomic_add_int(&sifa->refcount, 1); - return (sifa); + goto out; } plan_d: /* @@ -3014,16 +3050,12 @@ plan_d: * out and see if we can find an acceptable address somewhere * amongst all interfaces. */ - SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan D\n"); + SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan D looked_at is %p\n", (void *)looked_at); LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) { if (dest_is_loop == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) { /* wrong base scope */ continue; } - if ((sctp_ifn == looked_at) && looked_at) - /* already looked at this guy */ - continue; - LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0)) @@ -3034,13 +3066,8 @@ plan_d: if (sifa == NULL) continue; if (stcb) { - if (sctp_is_address_in_scope(sifa, - stcb->asoc.ipv4_addr_legal, - stcb->asoc.ipv6_addr_legal, - stcb->asoc.loopback_scope, - stcb->asoc.ipv4_local_scope, - stcb->asoc.local_scope, - stcb->asoc.site_scope, 0) == 0) { + if (sctp_is_address_in_scope(sifa, &stcb->asoc.scope, 0) == 0) { + sifa = NULL; continue; } if (((non_asoc_addr_ok == 0) && @@ -3052,19 +3079,76 @@ plan_d: * It is restricted for some * reason.. probably not yet added. */ + sifa = NULL; continue; } } - atomic_add_int(&sifa->refcount, 1); - return (sifa); + goto out; } } - /* - * Ok we can find NO address to source from that is not on our - * restricted list and non_asoc_address is NOT ok, or it is on our - * restricted list. We can't source to it :-( - */ - return (NULL); +#ifdef INET + if ((retried == 0) && (stcb->asoc.scope.ipv4_local_scope == 0)) { + stcb->asoc.scope.ipv4_local_scope = 1; + retried = 1; + goto again_with_private_addresses_allowed; + } else if (retried == 1) { + stcb->asoc.scope.ipv4_local_scope = 0; + } +#endif +out: +#ifdef INET + if (sifa) { + if (retried == 1) { + LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) { + if (dest_is_loop == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) { + /* wrong base scope */ + continue; + } + LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { + struct sctp_ifa *tmp_sifa; + + if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && + (non_asoc_addr_ok == 0)) + continue; + tmp_sifa = sctp_is_ifa_addr_acceptable(sctp_ifa, + dest_is_loop, + dest_is_priv, fam); + if (tmp_sifa == NULL) { + continue; + } + if (tmp_sifa == sifa) { + continue; + } + if (stcb) { + if (sctp_is_address_in_scope(tmp_sifa, + &stcb->asoc.scope, 0) == 0) { + continue; + } + if (((non_asoc_addr_ok == 0) && + (sctp_is_addr_restricted(stcb, tmp_sifa))) || + (non_asoc_addr_ok && + (sctp_is_addr_restricted(stcb, tmp_sifa)) && + (!sctp_is_addr_pending(stcb, tmp_sifa)))) { + /* + * It is restricted + * for some reason.. + * probably not yet + * added. + */ + continue; + } + } + if ((tmp_sifa->address.sin.sin_family == AF_INET) && + (IN4_ISPRIVATE_ADDRESS(&(tmp_sifa->address.sin.sin_addr)))) { + sctp_add_local_addr_restricted(stcb, tmp_sifa); + } + } + } + } + atomic_add_int(&sifa->refcount, 1); + } +#endif + return (sifa); } @@ -3077,17 +3161,20 @@ sctp_source_address_selection(struct sctp_inpcb *inp, struct sctp_nets *net, int non_asoc_addr_ok, uint32_t vrf_id) { + struct sctp_ifa *answer; + uint8_t dest_is_priv, dest_is_loop; + sa_family_t fam; + +#ifdef INET struct sockaddr_in *to = (struct sockaddr_in *)&ro->ro_dst; +#endif #ifdef INET6 struct sockaddr_in6 *to6 = (struct sockaddr_in6 *)&ro->ro_dst; #endif - struct sctp_ifa *answer; - uint8_t dest_is_priv, dest_is_loop; - sa_family_t fam; - /*- + /** * Rules: - Find the route if needed, cache if I can. - Look at * interface address in route, Is it in the bound list. If so we * have the best source. - If not we must rotate amongst the @@ -3158,10 +3245,11 @@ sctp_source_address_selection(struct sctp_inpcb *inp, if (ro->ro_rt == NULL) { return (NULL); } - fam = to->sin_family; + fam = ro->ro_dst.sa_family; dest_is_priv = dest_is_loop = 0; /* Setup our scopes for the destination */ switch (fam) { +#ifdef INET case AF_INET: /* Scope based on outbound address */ if (IN4_ISLOOPBACK_ADDRESS(&to->sin_addr)) { @@ -3174,6 +3262,7 @@ sctp_source_address_selection(struct sctp_inpcb *inp, dest_is_priv = 1; } break; +#endif #ifdef INET6 case AF_INET6: /* Scope based on outbound address */ @@ -3197,13 +3286,13 @@ sctp_source_address_selection(struct sctp_inpcb *inp, #endif } SCTPDBG(SCTP_DEBUG_OUTPUT2, "Select source addr for:"); - SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)to); + SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)&ro->ro_dst); SCTP_IPI_ADDR_RLOCK(); if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { /* * Bound all case */ - answer = sctp_choose_boundall(inp, stcb, net, ro, vrf_id, + answer = sctp_choose_boundall(stcb, net, ro, vrf_id, dest_is_priv, dest_is_loop, non_asoc_addr_ok, fam); SCTP_IPI_ADDR_RUNLOCK(); @@ -3213,7 +3302,7 @@ sctp_source_address_selection(struct sctp_inpcb *inp, * Subset bound case */ if (stcb) { - answer = sctp_choose_boundspecific_stcb(inp, stcb, net, ro, + answer = sctp_choose_boundspecific_stcb(inp, stcb, ro, vrf_id, dest_is_priv, dest_is_loop, non_asoc_addr_ok, fam); @@ -3228,56 +3317,344 @@ sctp_source_address_selection(struct sctp_inpcb *inp, } static int -sctp_find_cmsg(int c_type, void *data, struct mbuf *control, int cpsize) +sctp_find_cmsg(int c_type, void *data, struct mbuf *control, size_t cpsize) { struct cmsghdr cmh; - int tlen, at; + int tlen, at, found; + struct sctp_sndinfo sndinfo; + struct sctp_prinfo prinfo; + struct sctp_authinfo authinfo; tlen = SCTP_BUF_LEN(control); at = 0; + found = 0; /* * Independent of how many mbufs, find the c_type inside the control * structure and copy out the data. */ while (at < tlen) { if ((tlen - at) < (int)CMSG_ALIGN(sizeof(cmh))) { - /* not enough room for one more we are done. */ - return (0); + /* There is not enough room for one more. */ + return (found); } m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh); + if (cmh.cmsg_len < CMSG_ALIGN(sizeof(cmh))) { + /* We dont't have a complete CMSG header. */ + return (found); + } if (((int)cmh.cmsg_len + at) > tlen) { - /* - * this is real messed up since there is not enough - * data here to cover the cmsg header. We are done. - */ - return (0); + /* We don't have the complete CMSG. */ + return (found); } if ((cmh.cmsg_level == IPPROTO_SCTP) && - (c_type == cmh.cmsg_type)) { - /* found the one we want, copy it out */ - at += CMSG_ALIGN(sizeof(struct cmsghdr)); - if ((int)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < cpsize) { - /* - * space of cmsg_len after header not big - * enough - */ - return (0); + ((c_type == cmh.cmsg_type) || + ((c_type == SCTP_SNDRCV) && + ((cmh.cmsg_type == SCTP_SNDINFO) || + (cmh.cmsg_type == SCTP_PRINFO) || + (cmh.cmsg_type == SCTP_AUTHINFO))))) { + if (c_type == cmh.cmsg_type) { + if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < cpsize) { + return (found); + } + /* It is exactly what we want. Copy it out. */ + m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), cpsize, (caddr_t)data); + return (1); + } else { + struct sctp_sndrcvinfo *sndrcvinfo; + + sndrcvinfo = (struct sctp_sndrcvinfo *)data; + if (found == 0) { + if (cpsize < sizeof(struct sctp_sndrcvinfo)) { + return (found); + } + memset(sndrcvinfo, 0, sizeof(struct sctp_sndrcvinfo)); + } + switch (cmh.cmsg_type) { + case SCTP_SNDINFO: + if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct sctp_sndinfo)) { + return (found); + } + m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct sctp_sndinfo), (caddr_t)&sndinfo); + sndrcvinfo->sinfo_stream = sndinfo.snd_sid; + sndrcvinfo->sinfo_flags = sndinfo.snd_flags; + sndrcvinfo->sinfo_ppid = sndinfo.snd_ppid; + sndrcvinfo->sinfo_context = sndinfo.snd_context; + sndrcvinfo->sinfo_assoc_id = sndinfo.snd_assoc_id; + break; + case SCTP_PRINFO: + if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct sctp_prinfo)) { + return (found); + } + m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct sctp_prinfo), (caddr_t)&prinfo); + sndrcvinfo->sinfo_timetolive = prinfo.pr_value; + sndrcvinfo->sinfo_flags |= prinfo.pr_policy; + break; + case SCTP_AUTHINFO: + if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct sctp_authinfo)) { + return (found); + } + m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct sctp_authinfo), (caddr_t)&authinfo); + sndrcvinfo->sinfo_keynumber_valid = 1; + sndrcvinfo->sinfo_keynumber = authinfo.auth_keynumber; + break; + default: + return (found); + } + found = 1; } - m_copydata(control, at, cpsize, data); + } + at += CMSG_ALIGN(cmh.cmsg_len); + } + return (found); +} + +static int +sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *error) +{ + struct cmsghdr cmh; + int tlen, at; + struct sctp_initmsg initmsg; + +#ifdef INET + struct sockaddr_in sin; + +#endif +#ifdef INET6 + struct sockaddr_in6 sin6; + +#endif + + tlen = SCTP_BUF_LEN(control); + at = 0; + while (at < tlen) { + if ((tlen - at) < (int)CMSG_ALIGN(sizeof(cmh))) { + /* There is not enough room for one more. */ + *error = EINVAL; return (1); - } else { - at += CMSG_ALIGN(cmh.cmsg_len); - if (cmh.cmsg_len == 0) { + } + m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh); + if (cmh.cmsg_len < CMSG_ALIGN(sizeof(cmh))) { + /* We dont't have a complete CMSG header. */ + *error = EINVAL; + return (1); + } + if (((int)cmh.cmsg_len + at) > tlen) { + /* We don't have the complete CMSG. */ + *error = EINVAL; + return (1); + } + if (cmh.cmsg_level == IPPROTO_SCTP) { + switch (cmh.cmsg_type) { + case SCTP_INIT: + if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct sctp_initmsg)) { + *error = EINVAL; + return (1); + } + m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct sctp_initmsg), (caddr_t)&initmsg); + if (initmsg.sinit_max_attempts) + stcb->asoc.max_init_times = initmsg.sinit_max_attempts; + if (initmsg.sinit_num_ostreams) + stcb->asoc.pre_open_streams = initmsg.sinit_num_ostreams; + if (initmsg.sinit_max_instreams) + stcb->asoc.max_inbound_streams = initmsg.sinit_max_instreams; + if (initmsg.sinit_max_init_timeo) + stcb->asoc.initial_init_rto_max = initmsg.sinit_max_init_timeo; + if (stcb->asoc.streamoutcnt < stcb->asoc.pre_open_streams) { + struct sctp_stream_out *tmp_str; + unsigned int i; + + /* Default is NOT correct */ + SCTPDBG(SCTP_DEBUG_OUTPUT1, "Ok, default:%d pre_open:%d\n", + stcb->asoc.streamoutcnt, stcb->asoc.pre_open_streams); + SCTP_TCB_UNLOCK(stcb); + SCTP_MALLOC(tmp_str, + struct sctp_stream_out *, + (stcb->asoc.pre_open_streams * sizeof(struct sctp_stream_out)), + SCTP_M_STRMO); + SCTP_TCB_LOCK(stcb); + if (tmp_str != NULL) { + SCTP_FREE(stcb->asoc.strmout, SCTP_M_STRMO); + stcb->asoc.strmout = tmp_str; + stcb->asoc.strm_realoutsize = stcb->asoc.streamoutcnt = stcb->asoc.pre_open_streams; + } else { + stcb->asoc.pre_open_streams = stcb->asoc.streamoutcnt; + } + for (i = 0; i < stcb->asoc.streamoutcnt; i++) { + TAILQ_INIT(&stcb->asoc.strmout[i].outqueue); + stcb->asoc.strmout[i].chunks_on_queues = 0; + stcb->asoc.strmout[i].next_sequence_send = 0; + stcb->asoc.strmout[i].stream_no = i; + stcb->asoc.strmout[i].last_msg_incomplete = 0; + stcb->asoc.ss_functions.sctp_ss_init_stream(&stcb->asoc.strmout[i], NULL); + } + } + break; +#ifdef INET + case SCTP_DSTADDRV4: + if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in_addr)) { + *error = EINVAL; + return (1); + } + memset(&sin, 0, sizeof(struct sockaddr_in)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(struct sockaddr_in); + sin.sin_port = stcb->rport; + m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in_addr), (caddr_t)&sin.sin_addr); + if ((sin.sin_addr.s_addr == INADDR_ANY) || + (sin.sin_addr.s_addr == INADDR_BROADCAST) || + IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) { + *error = EINVAL; + return (1); + } + if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin, NULL, + SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) { + *error = ENOBUFS; + return (1); + } + break; +#endif +#ifdef INET6 + case SCTP_DSTADDRV6: + if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in6_addr)) { + *error = EINVAL; + return (1); + } + memset(&sin6, 0, sizeof(struct sockaddr_in6)); + sin6.sin6_family = AF_INET6; + sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_port = stcb->rport; + m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr); + if (IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr) || + IN6_IS_ADDR_MULTICAST(&sin6.sin6_addr)) { + *error = EINVAL; + return (1); + } +#ifdef INET + if (IN6_IS_ADDR_V4MAPPED(&sin6.sin6_addr)) { + in6_sin6_2_sin(&sin, &sin6); + if ((sin.sin_addr.s_addr == INADDR_ANY) || + (sin.sin_addr.s_addr == INADDR_BROADCAST) || + IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) { + *error = EINVAL; + return (1); + } + if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin, NULL, + SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) { + *error = ENOBUFS; + return (1); + } + } else +#endif + if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin6, NULL, + SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) { + *error = ENOBUFS; + return (1); + } + break; +#endif + default: break; } } + at += CMSG_ALIGN(cmh.cmsg_len); } - /* not found */ return (0); } +static struct sctp_tcb * +sctp_findassociation_cmsgs(struct sctp_inpcb **inp_p, + in_port_t port, + struct mbuf *control, + struct sctp_nets **net_p, + int *error) +{ + struct cmsghdr cmh; + int tlen, at; + struct sctp_tcb *stcb; + struct sockaddr *addr; + +#ifdef INET + struct sockaddr_in sin; + +#endif +#ifdef INET6 + struct sockaddr_in6 sin6; + +#endif + + tlen = SCTP_BUF_LEN(control); + at = 0; + while (at < tlen) { + if ((tlen - at) < (int)CMSG_ALIGN(sizeof(cmh))) { + /* There is not enough room for one more. */ + *error = EINVAL; + return (NULL); + } + m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh); + if (cmh.cmsg_len < CMSG_ALIGN(sizeof(cmh))) { + /* We dont't have a complete CMSG header. */ + *error = EINVAL; + return (NULL); + } + if (((int)cmh.cmsg_len + at) > tlen) { + /* We don't have the complete CMSG. */ + *error = EINVAL; + return (NULL); + } + if (cmh.cmsg_level == IPPROTO_SCTP) { + switch (cmh.cmsg_type) { +#ifdef INET + case SCTP_DSTADDRV4: + if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in_addr)) { + *error = EINVAL; + return (NULL); + } + memset(&sin, 0, sizeof(struct sockaddr_in)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(struct sockaddr_in); + sin.sin_port = port; + m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in_addr), (caddr_t)&sin.sin_addr); + addr = (struct sockaddr *)&sin; + break; +#endif +#ifdef INET6 + case SCTP_DSTADDRV6: + if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in6_addr)) { + *error = EINVAL; + return (NULL); + } + memset(&sin6, 0, sizeof(struct sockaddr_in6)); + sin6.sin6_family = AF_INET6; + sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_port = port; + m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr); +#ifdef INET + if (IN6_IS_ADDR_V4MAPPED(&sin6.sin6_addr)) { + in6_sin6_2_sin(&sin, &sin6); + addr = (struct sockaddr *)&sin; + } else +#endif + addr = (struct sockaddr *)&sin6; + break; +#endif + default: + addr = NULL; + break; + } + if (addr) { + stcb = sctp_findassociation_ep_addr(inp_p, addr, net_p, NULL, NULL); + if (stcb != NULL) { + return (stcb); + } + } + } + at += CMSG_ALIGN(cmh.cmsg_len); + } + return (NULL); +} + static struct mbuf * -sctp_add_cookie(struct sctp_inpcb *inp, struct mbuf *init, int init_offset, +sctp_add_cookie(struct mbuf *init, int init_offset, struct mbuf *initack, int initack_offset, struct sctp_state_cookie *stc_in, uint8_t ** signature) { struct mbuf *copy_init, *copy_initack, *m_at, *sig, *mret; @@ -3303,12 +3680,10 @@ sctp_add_cookie(struct sctp_inpcb *inp, struct mbuf *init, int init_offset, if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { struct mbuf *mat; - mat = copy_init; - while (mat) { + for (mat = copy_init; mat; mat = SCTP_BUF_NEXT(mat)) { if (SCTP_BUF_IS_EXTENDED(mat)) { sctp_log_mb(mat, SCTP_MBUF_ICOPY); } - mat = SCTP_BUF_NEXT(mat); } } #endif @@ -3323,12 +3698,10 @@ sctp_add_cookie(struct sctp_inpcb *inp, struct mbuf *init, int init_offset, if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { struct mbuf *mat; - mat = copy_initack; - while (mat) { + for (mat = copy_initack; mat; mat = SCTP_BUF_NEXT(mat)) { if (SCTP_BUF_IS_EXTENDED(mat)) { sctp_log_mb(mat, SCTP_MBUF_ICOPY); } - mat = SCTP_BUF_NEXT(mat); } } #endif @@ -3345,7 +3718,6 @@ sctp_add_cookie(struct sctp_inpcb *inp, struct mbuf *init, int init_offset, /* tack the INIT and then the INIT-ACK onto the chain */ cookie_sz = 0; - m_at = mret; for (m_at = mret; m_at; m_at = SCTP_BUF_NEXT(m_at)) { cookie_sz += SCTP_BUF_LEN(m_at); if (SCTP_BUF_NEXT(m_at) == NULL) { @@ -3353,7 +3725,6 @@ sctp_add_cookie(struct sctp_inpcb *inp, struct mbuf *init, int init_offset, break; } } - for (m_at = copy_init; m_at; m_at = SCTP_BUF_NEXT(m_at)) { cookie_sz += SCTP_BUF_LEN(m_at); if (SCTP_BUF_NEXT(m_at) == NULL) { @@ -3361,7 +3732,6 @@ sctp_add_cookie(struct sctp_inpcb *inp, struct mbuf *init, int init_offset, break; } } - for (m_at = copy_initack; m_at; m_at = SCTP_BUF_NEXT(m_at)) { cookie_sz += SCTP_BUF_LEN(m_at); if (SCTP_BUF_NEXT(m_at) == NULL) { @@ -3388,60 +3758,62 @@ sctp_add_cookie(struct sctp_inpcb *inp, struct mbuf *init, int init_offset, static uint8_t -sctp_get_ect(struct sctp_tcb *stcb, - struct sctp_tmit_chunk *chk) +sctp_get_ect(struct sctp_tcb *stcb) { - uint8_t this_random; - - /* Huh? */ - if (SCTP_BASE_SYSCTL(sctp_ecn_enable) == 0) - return (0); - - if (SCTP_BASE_SYSCTL(sctp_ecn_nonce) == 0) - /* no nonce, always return ECT0 */ - return (SCTP_ECT0_BIT); - - if (stcb->asoc.peer_supports_ecn_nonce == 0) { - /* Peer does NOT support it, so we send a ECT0 only */ + if ((stcb != NULL) && (stcb->asoc.ecn_allowed == 1)) { return (SCTP_ECT0_BIT); + } else { + return (0); } - if (chk == NULL) - return (SCTP_ECT0_BIT); +} - if ((stcb->asoc.hb_random_idx > 3) || - ((stcb->asoc.hb_random_idx == 3) && - (stcb->asoc.hb_ect_randombit > 7))) { - uint32_t rndval; - -warp_drive_sa: - rndval = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep); - memcpy(stcb->asoc.hb_random_values, &rndval, - sizeof(stcb->asoc.hb_random_values)); - this_random = stcb->asoc.hb_random_values[0]; - stcb->asoc.hb_random_idx = 0; - stcb->asoc.hb_ect_randombit = 0; - } else { - if (stcb->asoc.hb_ect_randombit > 7) { - stcb->asoc.hb_ect_randombit = 0; - stcb->asoc.hb_random_idx++; - if (stcb->asoc.hb_random_idx > 3) { - goto warp_drive_sa; +#if defined(INET) || defined(INET6) +static void +sctp_handle_no_route(struct sctp_tcb *stcb, + struct sctp_nets *net, + int so_locked) +{ + SCTPDBG(SCTP_DEBUG_OUTPUT1, "dropped packet - no valid source addr\n"); + + if (net) { + SCTPDBG(SCTP_DEBUG_OUTPUT1, "Destination was "); + SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT1, &net->ro._l_addr.sa); + if (net->dest_state & SCTP_ADDR_CONFIRMED) { + if ((net->dest_state & SCTP_ADDR_REACHABLE) && stcb) { + SCTPDBG(SCTP_DEBUG_OUTPUT1, "no route takes interface %p down\n", (void *)net); + sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, + stcb, 0, + (void *)net, + so_locked); + net->dest_state &= ~SCTP_ADDR_REACHABLE; + net->dest_state &= ~SCTP_ADDR_PF; + } + } + if (stcb) { + if (net == stcb->asoc.primary_destination) { + /* need a new primary */ + struct sctp_nets *alt; + + alt = sctp_find_alternate_net(stcb, net, 0); + if (alt != net) { + if (stcb->asoc.alternate) { + sctp_free_remote_addr(stcb->asoc.alternate); + } + stcb->asoc.alternate = alt; + atomic_add_int(&stcb->asoc.alternate->ref_count, 1); + if (net->ro._s_addr) { + sctp_free_ifa(net->ro._s_addr); + net->ro._s_addr = NULL; + } + net->src_addr_selected = 0; + } } } - this_random = stcb->asoc.hb_random_values[stcb->asoc.hb_random_idx]; - } - if ((this_random >> stcb->asoc.hb_ect_randombit) & 0x01) { - if (chk != NULL) - /* ECN Nonce stuff */ - chk->rec.data.ect_nonce = SCTP_ECT1_BIT; - stcb->asoc.hb_ect_randombit++; - return (SCTP_ECT1_BIT); - } else { - stcb->asoc.hb_ect_randombit++; - return (SCTP_ECT0_BIT); } } +#endif + static int sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, struct sctp_tcb *stcb, /* may be NULL */ @@ -3453,41 +3825,49 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, uint16_t auth_keyid, int nofragment_flag, int ecn_ok, - struct sctp_tmit_chunk *chk, int out_of_asoc_ok, uint16_t src_port, uint16_t dest_port, uint32_t v_tag, uint16_t port, - int so_locked, + union sctp_sockstore *over_addr, + uint8_t use_mflowid, uint32_t mflowid, #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) - SCTP_UNUSED + int so_locked SCTP_UNUSED +#else + int so_locked #endif - union sctp_sockstore *over_addr ) /* nofragment_flag to tell if IP_DF should be set (IPv4 only) */ { - /* - * Given a mbuf chain (via SCTP_BUF_NEXT()) that holds a packet - * header WITH an SCTPHDR but no IP header, endpoint inp and sa - * structure: - fill in the HMAC digest of any AUTH chunk in the - * packet. - calculate and fill in the SCTP checksum. - prepend an - * IP address header. - if boundall use INADDR_ANY. - if - * boundspecific do source address selection. - set fragmentation - * option for ipV4. - On return from IP output, check/adjust mtu - * size of output interface and smallest_mtu size as well. + /** + * Given a mbuf chain (via SCTP_BUF_NEXT()) that holds a packet header + * WITH an SCTPHDR but no IP header, endpoint inp and sa structure: + * - fill in the HMAC digest of any AUTH chunk in the packet. + * - calculate and fill in the SCTP checksum. + * - prepend an IP address header. + * - if boundall use INADDR_ANY. + * - if boundspecific do source address selection. + * - set fragmentation option for ipV4. + * - On return from IP output, check/adjust mtu size of output + * interface and smallest_mtu size as well. */ /* Will need ifdefs around this */ - struct mbuf *o_pak; struct mbuf *newm; struct sctphdr *sctphdr; int packet_length; int ret; uint32_t vrf_id; + +#if defined(INET) || defined(INET6) + struct mbuf *o_pak; sctp_route_t *ro = NULL; struct udphdr *udp = NULL; -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#endif + uint8_t tos_value; + +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so = NULL; #endif @@ -3507,645 +3887,649 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, if ((auth != NULL) && (stcb != NULL)) { sctp_fill_hmac_digest_m(m, auth_offset, auth, stcb, auth_keyid); } - if (to->sa_family == AF_INET) { - struct ip *ip = NULL; - sctp_route_t iproute; - uint8_t tos_value; - int len; - - len = sizeof(struct ip) + sizeof(struct sctphdr); - if (port) { - len += sizeof(struct udphdr); - } - newm = sctp_get_mbuf_for_msg(len, 1, M_DONTWAIT, 1, MT_DATA); - if (newm == NULL) { - sctp_m_freem(m); - SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); - return (ENOMEM); - } - SCTP_ALIGN_TO_END(newm, len); - SCTP_BUF_LEN(newm) = len; - SCTP_BUF_NEXT(newm) = m; - m = newm; - packet_length = sctp_calculate_len(m); - ip = mtod(m, struct ip *); - ip->ip_v = IPVERSION; - ip->ip_hl = (sizeof(struct ip) >> 2); - if (net) { - tos_value = net->tos_flowlabel & 0x000000ff; - } else { - tos_value = inp->ip_inp.inp.inp_ip_tos; - } - if ((nofragment_flag) && (port == 0)) { - ip->ip_off = IP_DF; - } else - ip->ip_off = 0; + if (net) { + tos_value = net->dscp; + } else if (stcb) { + tos_value = stcb->asoc.default_dscp; + } else { + tos_value = inp->sctp_ep.default_dscp; + } - /* FreeBSD has a function for ip_id's */ - ip->ip_id = ip_newid(); + switch (to->sa_family) { +#ifdef INET + case AF_INET: + { + struct ip *ip = NULL; + sctp_route_t iproute; + int len; - ip->ip_ttl = inp->ip_inp.inp.inp_ip_ttl; - ip->ip_len = packet_length; - if (stcb) { - if ((stcb->asoc.ecn_allowed) && ecn_ok) { - /* Enable ECN */ - ip->ip_tos = ((u_char)(tos_value & 0xfc) | sctp_get_ect(stcb, chk)); - } else { - /* No ECN */ - ip->ip_tos = (u_char)(tos_value & 0xfc); + len = sizeof(struct ip) + sizeof(struct sctphdr); + if (port) { + len += sizeof(struct udphdr); } - } else { - /* no association at all */ - ip->ip_tos = (tos_value & 0xfc); - } - if (port) { - ip->ip_p = IPPROTO_UDP; - } else { - ip->ip_p = IPPROTO_SCTP; - } - ip->ip_sum = 0; - if (net == NULL) { - ro = &iproute; - memset(&iproute, 0, sizeof(iproute)); - memcpy(&ro->ro_dst, to, to->sa_len); - } else { - ro = (sctp_route_t *) & net->ro; - } - /* Now the address selection part */ - ip->ip_dst.s_addr = ((struct sockaddr_in *)to)->sin_addr.s_addr; - - /* call the routine to select the src address */ - if (net && out_of_asoc_ok == 0) { - if (net->ro._s_addr && (net->ro._s_addr->localifa_flags & (SCTP_BEING_DELETED | SCTP_ADDR_IFA_UNUSEABLE))) { - sctp_free_ifa(net->ro._s_addr); - net->ro._s_addr = NULL; - net->src_addr_selected = 0; - if (ro->ro_rt) { - RTFREE(ro->ro_rt); - ro->ro_rt = NULL; + newm = sctp_get_mbuf_for_msg(len, 1, M_DONTWAIT, 1, MT_DATA); + if (newm == NULL) { + sctp_m_freem(m); + SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); + return (ENOMEM); + } + SCTP_ALIGN_TO_END(newm, len); + SCTP_BUF_LEN(newm) = len; + SCTP_BUF_NEXT(newm) = m; + m = newm; + if (net != NULL) { +#ifdef INVARIANTS + if (net->flowidset == 0) { + panic("Flow ID not set"); + } +#endif + m->m_pkthdr.flowid = net->flowid; + m->m_flags |= M_FLOWID; + } else { + if (use_mflowid != 0) { + m->m_pkthdr.flowid = mflowid; + m->m_flags |= M_FLOWID; } } - if (net->src_addr_selected == 0) { - /* Cache the source address */ - net->ro._s_addr = sctp_source_address_selection(inp, stcb, - ro, net, 0, - vrf_id); - net->src_addr_selected = 1; + packet_length = sctp_calculate_len(m); + ip = mtod(m, struct ip *); + ip->ip_v = IPVERSION; + ip->ip_hl = (sizeof(struct ip) >> 2); + if (tos_value == 0) { + /* + * This means especially, that it is not set + * at the SCTP layer. So use the value from + * the IP layer. + */ + tos_value = inp->ip_inp.inp.inp_ip_tos; } - if (net->ro._s_addr == NULL) { - /* No route to host */ - net->src_addr_selected = 0; - goto no_route; + tos_value &= 0xfc; + if (ecn_ok) { + tos_value |= sctp_get_ect(stcb); } - ip->ip_src = net->ro._s_addr->address.sin.sin_addr; - } else { - if (over_addr == NULL) { - struct sctp_ifa *_lsrc; - - _lsrc = sctp_source_address_selection(inp, stcb, ro, - net, - out_of_asoc_ok, - vrf_id); - if (_lsrc == NULL) { - goto no_route; - } - ip->ip_src = _lsrc->address.sin.sin_addr; - sctp_free_ifa(_lsrc); + if ((nofragment_flag) && (port == 0)) { + ip->ip_off = IP_DF; } else { - ip->ip_src = over_addr->sin.sin_addr; - SCTP_RTALLOC(ro, vrf_id); + ip->ip_off = 0; } - } - if (port) { - udp = (struct udphdr *)((caddr_t)ip + sizeof(struct ip)); - udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)); - udp->uh_dport = port; - udp->uh_ulen = htons(packet_length - sizeof(struct ip)); - udp->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP)); - sctphdr = (struct sctphdr *)((caddr_t)udp + sizeof(struct udphdr)); - } else { - sctphdr = (struct sctphdr *)((caddr_t)ip + sizeof(struct ip)); - } + /* FreeBSD has a function for ip_id's */ + ip->ip_id = ip_newid(); - sctphdr->src_port = src_port; - sctphdr->dest_port = dest_port; - sctphdr->v_tag = v_tag; - sctphdr->checksum = 0; + ip->ip_ttl = inp->ip_inp.inp.inp_ip_ttl; + ip->ip_len = packet_length; + ip->ip_tos = tos_value; + if (port) { + ip->ip_p = IPPROTO_UDP; + } else { + ip->ip_p = IPPROTO_SCTP; + } + ip->ip_sum = 0; + if (net == NULL) { + ro = &iproute; + memset(&iproute, 0, sizeof(iproute)); + memcpy(&ro->ro_dst, to, to->sa_len); + } else { + ro = (sctp_route_t *) & net->ro; + } + /* Now the address selection part */ + ip->ip_dst.s_addr = ((struct sockaddr_in *)to)->sin_addr.s_addr; - /* - * If source address selection fails and we find no route - * then the ip_output should fail as well with a - * NO_ROUTE_TO_HOST type error. We probably should catch - * that somewhere and abort the association right away - * (assuming this is an INIT being sent). - */ - if ((ro->ro_rt == NULL)) { - /* - * src addr selection failed to find a route (or - * valid source addr), so we can't get there from - * here (yet)! - */ - no_route: - SCTPDBG(SCTP_DEBUG_OUTPUT1, - "%s: dropped packet - no valid source addr\n", - __FUNCTION__); - if (net) { - SCTPDBG(SCTP_DEBUG_OUTPUT1, - "Destination was "); - SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT1, - &net->ro._l_addr.sa); - if (net->dest_state & SCTP_ADDR_CONFIRMED) { - if ((net->dest_state & SCTP_ADDR_REACHABLE) && stcb) { - SCTPDBG(SCTP_DEBUG_OUTPUT1, "no route takes interface %p down\n", net); - sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, - stcb, - SCTP_FAILED_THRESHOLD, - (void *)net, - so_locked); - net->dest_state &= ~SCTP_ADDR_REACHABLE; - net->dest_state |= SCTP_ADDR_NOT_REACHABLE; - /* - * JRS 5/14/07 - If a - * destination is - * unreachable, the PF bit - * is turned off. This - * allows an unambiguous use - * of the PF bit for - * destinations that are - * reachable but potentially - * failed. If the - * destination is set to the - * unreachable state, also - * set the destination to - * the PF state. - */ - /* - * Add debug message here if - * destination is not in PF - * state. - */ - /* - * Stop any running T3 - * timers here? - */ - if ((stcb->asoc.sctp_cmt_on_off == 1) && - (stcb->asoc.sctp_cmt_pf > 0)) { - net->dest_state &= ~SCTP_ADDR_PF; - SCTPDBG(SCTP_DEBUG_OUTPUT1, "Destination %p moved from PF to unreachable.\n", - net); - } + /* call the routine to select the src address */ + if (net && out_of_asoc_ok == 0) { + if (net->ro._s_addr && (net->ro._s_addr->localifa_flags & (SCTP_BEING_DELETED | SCTP_ADDR_IFA_UNUSEABLE))) { + sctp_free_ifa(net->ro._s_addr); + net->ro._s_addr = NULL; + net->src_addr_selected = 0; + if (ro->ro_rt) { + RTFREE(ro->ro_rt); + ro->ro_rt = NULL; } } - if (stcb) { - if (net == stcb->asoc.primary_destination) { - /* need a new primary */ - struct sctp_nets *alt; - - alt = sctp_find_alternate_net(stcb, net, 0); - if (alt != net) { - if (sctp_set_primary_addr(stcb, - (struct sockaddr *)NULL, - alt) == 0) { - net->dest_state |= SCTP_ADDR_WAS_PRIMARY; - if (net->ro._s_addr) { - sctp_free_ifa(net->ro._s_addr); - net->ro._s_addr = NULL; - } - net->src_addr_selected = 0; - } - } + if (net->src_addr_selected == 0) { + /* Cache the source address */ + net->ro._s_addr = sctp_source_address_selection(inp, stcb, + ro, net, 0, + vrf_id); + net->src_addr_selected = 1; + } + if (net->ro._s_addr == NULL) { + /* No route to host */ + net->src_addr_selected = 0; + sctp_handle_no_route(stcb, net, so_locked); + SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH); + sctp_m_freem(m); + return (EHOSTUNREACH); + } + ip->ip_src = net->ro._s_addr->address.sin.sin_addr; + } else { + if (over_addr == NULL) { + struct sctp_ifa *_lsrc; + + _lsrc = sctp_source_address_selection(inp, stcb, ro, + net, + out_of_asoc_ok, + vrf_id); + if (_lsrc == NULL) { + sctp_handle_no_route(stcb, net, so_locked); + SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH); + sctp_m_freem(m); + return (EHOSTUNREACH); } + ip->ip_src = _lsrc->address.sin.sin_addr; + sctp_free_ifa(_lsrc); + } else { + ip->ip_src = over_addr->sin.sin_addr; + SCTP_RTALLOC(ro, vrf_id); } } - SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH); - sctp_m_freem(m); - return (EHOSTUNREACH); - } - if (ro != &iproute) { - memcpy(&iproute, ro, sizeof(*ro)); - } - SCTPDBG(SCTP_DEBUG_OUTPUT3, "Calling ipv4 output routine from low level src addr:%x\n", - (uint32_t) (ntohl(ip->ip_src.s_addr))); - SCTPDBG(SCTP_DEBUG_OUTPUT3, "Destination is %x\n", - (uint32_t) (ntohl(ip->ip_dst.s_addr))); - SCTPDBG(SCTP_DEBUG_OUTPUT3, "RTP route is %p through\n", - ro->ro_rt); + if (port) { + if (htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) == 0) { + sctp_handle_no_route(stcb, net, so_locked); + SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH); + sctp_m_freem(m); + return (EHOSTUNREACH); + } + udp = (struct udphdr *)((caddr_t)ip + sizeof(struct ip)); + udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)); + udp->uh_dport = port; + udp->uh_ulen = htons(packet_length - sizeof(struct ip)); + if (V_udp_cksum) { + udp->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP)); + } else { + udp->uh_sum = 0; + } + sctphdr = (struct sctphdr *)((caddr_t)udp + sizeof(struct udphdr)); + } else { + sctphdr = (struct sctphdr *)((caddr_t)ip + sizeof(struct ip)); + } - if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) { - /* failed to prepend data, give up */ - SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); - sctp_m_freem(m); - return (ENOMEM); - } -#ifdef SCTP_PACKET_LOGGING - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) - sctp_packet_log(m, packet_length); -#endif - SCTP_ATTACH_CHAIN(o_pak, m, packet_length); - if (port) { + sctphdr->src_port = src_port; + sctphdr->dest_port = dest_port; + sctphdr->v_tag = v_tag; + sctphdr->checksum = 0; + + /* + * If source address selection fails and we find no + * route then the ip_output should fail as well with + * a NO_ROUTE_TO_HOST type error. We probably should + * catch that somewhere and abort the association + * right away (assuming this is an INIT being sent). + */ + if (ro->ro_rt == NULL) { + /* + * src addr selection failed to find a route + * (or valid source addr), so we can't get + * there from here (yet)! + */ + sctp_handle_no_route(stcb, net, so_locked); + SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH); + sctp_m_freem(m); + return (EHOSTUNREACH); + } + if (ro != &iproute) { + memcpy(&iproute, ro, sizeof(*ro)); + } + SCTPDBG(SCTP_DEBUG_OUTPUT3, "Calling ipv4 output routine from low level src addr:%x\n", + (uint32_t) (ntohl(ip->ip_src.s_addr))); + SCTPDBG(SCTP_DEBUG_OUTPUT3, "Destination is %x\n", + (uint32_t) (ntohl(ip->ip_dst.s_addr))); + SCTPDBG(SCTP_DEBUG_OUTPUT3, "RTP route is %p through\n", + (void *)ro->ro_rt); + + if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) { + /* failed to prepend data, give up */ + SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); + sctp_m_freem(m); + return (ENOMEM); + } + SCTP_ATTACH_CHAIN(o_pak, m, packet_length); + if (port) { #if defined(SCTP_WITH_NO_CSUM) - SCTP_STAT_INCR(sctps_sendnocrc); + SCTP_STAT_INCR(sctps_sendnocrc); #else - if (!(SCTP_BASE_SYSCTL(sctp_no_csum_on_loopback) && - (stcb) && - (stcb->asoc.loopback_scope))) { sctphdr->checksum = sctp_calculate_cksum(m, sizeof(struct ip) + sizeof(struct udphdr)); SCTP_STAT_INCR(sctps_sendswcrc); +#endif + if (V_udp_cksum) { + SCTP_ENABLE_UDP_CSUM(o_pak); + } } else { +#if defined(SCTP_WITH_NO_CSUM) SCTP_STAT_INCR(sctps_sendnocrc); +#else + m->m_pkthdr.csum_flags = CSUM_SCTP; + m->m_pkthdr.csum_data = 0; + SCTP_STAT_INCR(sctps_sendhwcrc); +#endif } +#ifdef SCTP_PACKET_LOGGING + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) + sctp_packet_log(o_pak); #endif - SCTP_ENABLE_UDP_CSUM(o_pak); - } else { -#if defined(SCTP_WITH_NO_CSUM) - SCTP_STAT_INCR(sctps_sendnocrc); -#else - m->m_pkthdr.csum_flags = CSUM_SCTP; - m->m_pkthdr.csum_data = 0; - SCTP_STAT_INCR(sctps_sendhwcrc); + /* send it out. table id is taken from stcb */ +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) + if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) { + so = SCTP_INP_SO(inp); + SCTP_SOCKET_UNLOCK(so, 0); + } #endif - } - /* send it out. table id is taken from stcb */ -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) - if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) { - so = SCTP_INP_SO(inp); - SCTP_SOCKET_UNLOCK(so, 0); - } + SCTP_IP_OUTPUT(ret, o_pak, ro, stcb, vrf_id); +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) + if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) { + atomic_add_int(&stcb->asoc.refcnt, 1); + SCTP_TCB_UNLOCK(stcb); + SCTP_SOCKET_LOCK(so, 0); + SCTP_TCB_LOCK(stcb); + atomic_subtract_int(&stcb->asoc.refcnt, 1); + } #endif - SCTP_IP_OUTPUT(ret, o_pak, ro, stcb, vrf_id); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) - if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) { - atomic_add_int(&stcb->asoc.refcnt, 1); - SCTP_TCB_UNLOCK(stcb); - SCTP_SOCKET_LOCK(so, 0); - SCTP_TCB_LOCK(stcb); - atomic_subtract_int(&stcb->asoc.refcnt, 1); + SCTP_STAT_INCR(sctps_sendpackets); + SCTP_STAT_INCR_COUNTER64(sctps_outpackets); + if (ret) + SCTP_STAT_INCR(sctps_senderrors); + + SCTPDBG(SCTP_DEBUG_OUTPUT3, "IP output returns %d\n", ret); + if (net == NULL) { + /* free tempy routes */ + if (ro->ro_rt) { + RTFREE(ro->ro_rt); + ro->ro_rt = NULL; + } + } else { + /* + * PMTU check versus smallest asoc MTU goes + * here + */ + if ((ro->ro_rt != NULL) && + (net->ro._s_addr)) { + uint32_t mtu; + + mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_rt); + if (net->port) { + mtu -= sizeof(struct udphdr); + } + if (mtu && (stcb->asoc.smallest_mtu > mtu)) { + sctp_mtu_size_reset(inp, &stcb->asoc, mtu); + net->mtu = mtu; + } + } else if (ro->ro_rt == NULL) { + /* route was freed */ + if (net->ro._s_addr && + net->src_addr_selected) { + sctp_free_ifa(net->ro._s_addr); + net->ro._s_addr = NULL; + } + net->src_addr_selected = 0; + } + } + return (ret); } #endif - SCTP_STAT_INCR(sctps_sendpackets); - SCTP_STAT_INCR_COUNTER64(sctps_outpackets); - if (ret) - SCTP_STAT_INCR(sctps_senderrors); +#ifdef INET6 + case AF_INET6: + { + uint32_t flowlabel, flowinfo; + struct ip6_hdr *ip6h; + struct route_in6 ip6route; + struct ifnet *ifp; + struct sockaddr_in6 *sin6, tmp, *lsa6, lsa6_tmp; + int prev_scope = 0; + struct sockaddr_in6 lsa6_storage; + int error; + u_short prev_port = 0; + int len; - SCTPDBG(SCTP_DEBUG_OUTPUT3, "IP output returns %d\n", ret); - if (net == NULL) { - /* free tempy routes */ - if (ro->ro_rt) { - RTFREE(ro->ro_rt); - ro->ro_rt = NULL; + if (net) { + flowlabel = net->flowlabel; + } else if (stcb) { + flowlabel = stcb->asoc.default_flowlabel; + } else { + flowlabel = inp->sctp_ep.default_flowlabel; } - } else { - /* PMTU check versus smallest asoc MTU goes here */ - if ((ro->ro_rt != NULL) && - (net->ro._s_addr)) { - uint32_t mtu; - - mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_rt); - if (net->port) { - mtu -= sizeof(struct udphdr); - } - if (mtu && (stcb->asoc.smallest_mtu > mtu)) { - sctp_mtu_size_reset(inp, &stcb->asoc, mtu); - net->mtu = mtu; - } - } else if (ro->ro_rt == NULL) { - /* route was freed */ - if (net->ro._s_addr && - net->src_addr_selected) { - sctp_free_ifa(net->ro._s_addr); - net->ro._s_addr = NULL; + if (flowlabel == 0) { + /* + * This means especially, that it is not set + * at the SCTP layer. So use the value from + * the IP layer. + */ + flowlabel = ntohl(((struct in6pcb *)inp)->in6p_flowinfo); + } + flowlabel &= 0x000fffff; + len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr); + if (port) { + len += sizeof(struct udphdr); + } + newm = sctp_get_mbuf_for_msg(len, 1, M_DONTWAIT, 1, MT_DATA); + if (newm == NULL) { + sctp_m_freem(m); + SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); + return (ENOMEM); + } + SCTP_ALIGN_TO_END(newm, len); + SCTP_BUF_LEN(newm) = len; + SCTP_BUF_NEXT(newm) = m; + m = newm; + if (net != NULL) { +#ifdef INVARIANTS + if (net->flowidset == 0) { + panic("Flow ID not set"); + } +#endif + m->m_pkthdr.flowid = net->flowid; + m->m_flags |= M_FLOWID; + } else { + if (use_mflowid != 0) { + m->m_pkthdr.flowid = mflowid; + m->m_flags |= M_FLOWID; } - net->src_addr_selected = 0; } - } - return (ret); - } -#ifdef INET6 - else if (to->sa_family == AF_INET6) { - uint32_t flowlabel; - struct ip6_hdr *ip6h; - struct route_in6 ip6route; - struct ifnet *ifp; - u_char flowTop; - uint16_t flowBottom; - u_char tosBottom, tosTop; - struct sockaddr_in6 *sin6, tmp, *lsa6, lsa6_tmp; - int prev_scope = 0; - struct sockaddr_in6 lsa6_storage; - int error; - u_short prev_port = 0; - int len; - - if (net != NULL) { - flowlabel = net->tos_flowlabel; - } else { - flowlabel = ((struct in6pcb *)inp)->in6p_flowinfo; - } + packet_length = sctp_calculate_len(m); - len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr); - if (port) { - len += sizeof(struct udphdr); - } - newm = sctp_get_mbuf_for_msg(len, 1, M_DONTWAIT, 1, MT_DATA); - if (newm == NULL) { - sctp_m_freem(m); - SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); - return (ENOMEM); - } - SCTP_ALIGN_TO_END(newm, len); - SCTP_BUF_LEN(newm) = len; - SCTP_BUF_NEXT(newm) = m; - m = newm; - packet_length = sctp_calculate_len(m); + ip6h = mtod(m, struct ip6_hdr *); + /* protect *sin6 from overwrite */ + sin6 = (struct sockaddr_in6 *)to; + tmp = *sin6; + sin6 = &tmp; - ip6h = mtod(m, struct ip6_hdr *); - /* - * We assume here that inp_flow is in host byte order within - * the TCB! - */ - flowBottom = flowlabel & 0x0000ffff; - flowTop = ((flowlabel & 0x000f0000) >> 16); - tosTop = (((flowlabel & 0xf0) >> 4) | IPV6_VERSION); - /* protect *sin6 from overwrite */ - sin6 = (struct sockaddr_in6 *)to; - tmp = *sin6; - sin6 = &tmp; - - /* KAME hack: embed scopeid */ - if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) { - SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); - return (EINVAL); - } - if (net == NULL) { - memset(&ip6route, 0, sizeof(ip6route)); - ro = (sctp_route_t *) & ip6route; - memcpy(&ro->ro_dst, sin6, sin6->sin6_len); - } else { - ro = (sctp_route_t *) & net->ro; - } - if (stcb != NULL) { - if ((stcb->asoc.ecn_allowed) && ecn_ok) { - /* Enable ECN */ - tosBottom = (((((struct in6pcb *)inp)->in6p_flowinfo & 0x0c) | sctp_get_ect(stcb, chk)) << 4); + /* KAME hack: embed scopeid */ + if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) { + SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); + return (EINVAL); + } + if (net == NULL) { + memset(&ip6route, 0, sizeof(ip6route)); + ro = (sctp_route_t *) & ip6route; + memcpy(&ro->ro_dst, sin6, sin6->sin6_len); } else { - /* No ECN */ - tosBottom = ((((struct in6pcb *)inp)->in6p_flowinfo & 0x0c) << 4); + ro = (sctp_route_t *) & net->ro; } - } else { - /* we could get no asoc if it is a O-O-T-B packet */ - tosBottom = ((((struct in6pcb *)inp)->in6p_flowinfo & 0x0c) << 4); - } - ip6h->ip6_flow = htonl(((tosTop << 24) | ((tosBottom | flowTop) << 16) | flowBottom)); - if (port) { - ip6h->ip6_nxt = IPPROTO_UDP; - } else { - ip6h->ip6_nxt = IPPROTO_SCTP; - } - ip6h->ip6_plen = (packet_length - sizeof(struct ip6_hdr)); - ip6h->ip6_dst = sin6->sin6_addr; + /* + * We assume here that inp_flow is in host byte + * order within the TCB! + */ + if (tos_value == 0) { + /* + * This means especially, that it is not set + * at the SCTP layer. So use the value from + * the IP layer. + */ + tos_value = (ntohl(((struct in6pcb *)inp)->in6p_flowinfo) >> 20) & 0xff; + } + tos_value &= 0xfc; + if (ecn_ok) { + tos_value |= sctp_get_ect(stcb); + } + flowinfo = 0x06; + flowinfo <<= 8; + flowinfo |= tos_value; + flowinfo <<= 20; + flowinfo |= flowlabel; + ip6h->ip6_flow = htonl(flowinfo); + if (port) { + ip6h->ip6_nxt = IPPROTO_UDP; + } else { + ip6h->ip6_nxt = IPPROTO_SCTP; + } + ip6h->ip6_plen = (packet_length - sizeof(struct ip6_hdr)); + ip6h->ip6_dst = sin6->sin6_addr; - /* - * Add SRC address selection here: we can only reuse to a - * limited degree the kame src-addr-sel, since we can try - * their selection but it may not be bound. - */ - bzero(&lsa6_tmp, sizeof(lsa6_tmp)); - lsa6_tmp.sin6_family = AF_INET6; - lsa6_tmp.sin6_len = sizeof(lsa6_tmp); - lsa6 = &lsa6_tmp; - if (net && out_of_asoc_ok == 0) { - if (net->ro._s_addr && (net->ro._s_addr->localifa_flags & (SCTP_BEING_DELETED | SCTP_ADDR_IFA_UNUSEABLE))) { - sctp_free_ifa(net->ro._s_addr); - net->ro._s_addr = NULL; - net->src_addr_selected = 0; - if (ro->ro_rt) { - RTFREE(ro->ro_rt); - ro->ro_rt = NULL; + /* + * Add SRC address selection here: we can only reuse + * to a limited degree the kame src-addr-sel, since + * we can try their selection but it may not be + * bound. + */ + bzero(&lsa6_tmp, sizeof(lsa6_tmp)); + lsa6_tmp.sin6_family = AF_INET6; + lsa6_tmp.sin6_len = sizeof(lsa6_tmp); + lsa6 = &lsa6_tmp; + if (net && out_of_asoc_ok == 0) { + if (net->ro._s_addr && (net->ro._s_addr->localifa_flags & (SCTP_BEING_DELETED | SCTP_ADDR_IFA_UNUSEABLE))) { + sctp_free_ifa(net->ro._s_addr); + net->ro._s_addr = NULL; + net->src_addr_selected = 0; + if (ro->ro_rt) { + RTFREE(ro->ro_rt); + ro->ro_rt = NULL; + } } - } - if (net->src_addr_selected == 0) { - sin6 = (struct sockaddr_in6 *)&net->ro._l_addr; + if (net->src_addr_selected == 0) { + sin6 = (struct sockaddr_in6 *)&net->ro._l_addr; + /* KAME hack: embed scopeid */ + if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) { + SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); + return (EINVAL); + } + /* Cache the source address */ + net->ro._s_addr = sctp_source_address_selection(inp, + stcb, + ro, + net, + 0, + vrf_id); + (void)sa6_recoverscope(sin6); + net->src_addr_selected = 1; + } + if (net->ro._s_addr == NULL) { + SCTPDBG(SCTP_DEBUG_OUTPUT3, "V6:No route to host\n"); + net->src_addr_selected = 0; + sctp_handle_no_route(stcb, net, so_locked); + SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH); + sctp_m_freem(m); + return (EHOSTUNREACH); + } + lsa6->sin6_addr = net->ro._s_addr->address.sin6.sin6_addr; + } else { + sin6 = (struct sockaddr_in6 *)&ro->ro_dst; /* KAME hack: embed scopeid */ if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) { SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); return (EINVAL); } - /* Cache the source address */ - net->ro._s_addr = sctp_source_address_selection(inp, - stcb, - ro, - net, - 0, - vrf_id); + if (over_addr == NULL) { + struct sctp_ifa *_lsrc; + + _lsrc = sctp_source_address_selection(inp, stcb, ro, + net, + out_of_asoc_ok, + vrf_id); + if (_lsrc == NULL) { + sctp_handle_no_route(stcb, net, so_locked); + SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH); + sctp_m_freem(m); + return (EHOSTUNREACH); + } + lsa6->sin6_addr = _lsrc->address.sin6.sin6_addr; + sctp_free_ifa(_lsrc); + } else { + lsa6->sin6_addr = over_addr->sin6.sin6_addr; + SCTP_RTALLOC(ro, vrf_id); + } (void)sa6_recoverscope(sin6); - net->src_addr_selected = 1; } - if (net->ro._s_addr == NULL) { - SCTPDBG(SCTP_DEBUG_OUTPUT3, "V6:No route to host\n"); - net->src_addr_selected = 0; - goto no_route; + lsa6->sin6_port = inp->sctp_lport; + + if (ro->ro_rt == NULL) { + /* + * src addr selection failed to find a route + * (or valid source addr), so we can't get + * there from here! + */ + sctp_handle_no_route(stcb, net, so_locked); + SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH); + sctp_m_freem(m); + return (EHOSTUNREACH); } - lsa6->sin6_addr = net->ro._s_addr->address.sin6.sin6_addr; - } else { - sin6 = (struct sockaddr_in6 *)&ro->ro_dst; - /* KAME hack: embed scopeid */ - if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) { - SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); - return (EINVAL); + /* + * XXX: sa6 may not have a valid sin6_scope_id in + * the non-SCOPEDROUTING case. + */ + bzero(&lsa6_storage, sizeof(lsa6_storage)); + lsa6_storage.sin6_family = AF_INET6; + lsa6_storage.sin6_len = sizeof(lsa6_storage); + lsa6_storage.sin6_addr = lsa6->sin6_addr; + if ((error = sa6_recoverscope(&lsa6_storage)) != 0) { + SCTPDBG(SCTP_DEBUG_OUTPUT3, "recover scope fails error %d\n", error); + sctp_m_freem(m); + return (error); } - if (over_addr == NULL) { - struct sctp_ifa *_lsrc; - - _lsrc = sctp_source_address_selection(inp, stcb, ro, - net, - out_of_asoc_ok, - vrf_id); - if (_lsrc == NULL) { - goto no_route; + /* XXX */ + lsa6_storage.sin6_addr = lsa6->sin6_addr; + lsa6_storage.sin6_port = inp->sctp_lport; + lsa6 = &lsa6_storage; + ip6h->ip6_src = lsa6->sin6_addr; + + if (port) { + if (htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) == 0) { + sctp_handle_no_route(stcb, net, so_locked); + SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH); + sctp_m_freem(m); + return (EHOSTUNREACH); } - lsa6->sin6_addr = _lsrc->address.sin6.sin6_addr; - sctp_free_ifa(_lsrc); + udp = (struct udphdr *)((caddr_t)ip6h + sizeof(struct ip6_hdr)); + udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)); + udp->uh_dport = port; + udp->uh_ulen = htons(packet_length - sizeof(struct ip6_hdr)); + udp->uh_sum = 0; + sctphdr = (struct sctphdr *)((caddr_t)udp + sizeof(struct udphdr)); } else { - lsa6->sin6_addr = over_addr->sin6.sin6_addr; - SCTP_RTALLOC(ro, vrf_id); + sctphdr = (struct sctphdr *)((caddr_t)ip6h + sizeof(struct ip6_hdr)); } - (void)sa6_recoverscope(sin6); - } - lsa6->sin6_port = inp->sctp_lport; - if (ro->ro_rt == NULL) { + sctphdr->src_port = src_port; + sctphdr->dest_port = dest_port; + sctphdr->v_tag = v_tag; + sctphdr->checksum = 0; + /* - * src addr selection failed to find a route (or - * valid source addr), so we can't get there from - * here! + * We set the hop limit now since there is a good + * chance that our ro pointer is now filled */ - goto no_route; - } - /* - * XXX: sa6 may not have a valid sin6_scope_id in the - * non-SCOPEDROUTING case. - */ - bzero(&lsa6_storage, sizeof(lsa6_storage)); - lsa6_storage.sin6_family = AF_INET6; - lsa6_storage.sin6_len = sizeof(lsa6_storage); - lsa6_storage.sin6_addr = lsa6->sin6_addr; - if ((error = sa6_recoverscope(&lsa6_storage)) != 0) { - SCTPDBG(SCTP_DEBUG_OUTPUT3, "recover scope fails error %d\n", error); - sctp_m_freem(m); - return (error); - } - /* XXX */ - lsa6_storage.sin6_addr = lsa6->sin6_addr; - lsa6_storage.sin6_port = inp->sctp_lport; - lsa6 = &lsa6_storage; - ip6h->ip6_src = lsa6->sin6_addr; - - if (port) { - udp = (struct udphdr *)((caddr_t)ip6h + sizeof(struct ip6_hdr)); - udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)); - udp->uh_dport = port; - udp->uh_ulen = htons(packet_length - sizeof(struct ip6_hdr)); - udp->uh_sum = 0; - sctphdr = (struct sctphdr *)((caddr_t)udp + sizeof(struct udphdr)); - } else { - sctphdr = (struct sctphdr *)((caddr_t)ip6h + sizeof(struct ip6_hdr)); - } - - sctphdr->src_port = src_port; - sctphdr->dest_port = dest_port; - sctphdr->v_tag = v_tag; - sctphdr->checksum = 0; - - /* - * We set the hop limit now since there is a good chance - * that our ro pointer is now filled - */ - ip6h->ip6_hlim = SCTP_GET_HLIM(inp, ro); - ifp = SCTP_GET_IFN_VOID_FROM_ROUTE(ro); + ip6h->ip6_hlim = SCTP_GET_HLIM(inp, ro); + ifp = SCTP_GET_IFN_VOID_FROM_ROUTE(ro); #ifdef SCTP_DEBUG - /* Copy to be sure something bad is not happening */ - sin6->sin6_addr = ip6h->ip6_dst; - lsa6->sin6_addr = ip6h->ip6_src; + /* Copy to be sure something bad is not happening */ + sin6->sin6_addr = ip6h->ip6_dst; + lsa6->sin6_addr = ip6h->ip6_src; #endif - SCTPDBG(SCTP_DEBUG_OUTPUT3, "Calling ipv6 output routine from low level\n"); - SCTPDBG(SCTP_DEBUG_OUTPUT3, "src: "); - SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT3, (struct sockaddr *)lsa6); - SCTPDBG(SCTP_DEBUG_OUTPUT3, "dst: "); - SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT3, (struct sockaddr *)sin6); - if (net) { - sin6 = (struct sockaddr_in6 *)&net->ro._l_addr; - /* preserve the port and scope for link local send */ - prev_scope = sin6->sin6_scope_id; - prev_port = sin6->sin6_port; - } - if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) { - /* failed to prepend data, give up */ - sctp_m_freem(m); - SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); - return (ENOMEM); - } -#ifdef SCTP_PACKET_LOGGING - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) - sctp_packet_log(m, packet_length); -#endif - SCTP_ATTACH_CHAIN(o_pak, m, packet_length); - if (port) { + SCTPDBG(SCTP_DEBUG_OUTPUT3, "Calling ipv6 output routine from low level\n"); + SCTPDBG(SCTP_DEBUG_OUTPUT3, "src: "); + SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT3, (struct sockaddr *)lsa6); + SCTPDBG(SCTP_DEBUG_OUTPUT3, "dst: "); + SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT3, (struct sockaddr *)sin6); + if (net) { + sin6 = (struct sockaddr_in6 *)&net->ro._l_addr; + /* + * preserve the port and scope for link + * local send + */ + prev_scope = sin6->sin6_scope_id; + prev_port = sin6->sin6_port; + } + if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) { + /* failed to prepend data, give up */ + sctp_m_freem(m); + SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); + return (ENOMEM); + } + SCTP_ATTACH_CHAIN(o_pak, m, packet_length); + if (port) { #if defined(SCTP_WITH_NO_CSUM) - SCTP_STAT_INCR(sctps_sendnocrc); + SCTP_STAT_INCR(sctps_sendnocrc); #else - if (!(SCTP_BASE_SYSCTL(sctp_no_csum_on_loopback) && - (stcb) && - (stcb->asoc.loopback_scope))) { sctphdr->checksum = sctp_calculate_cksum(m, sizeof(struct ip6_hdr) + sizeof(struct udphdr)); SCTP_STAT_INCR(sctps_sendswcrc); - } else { - SCTP_STAT_INCR(sctps_sendnocrc); - } #endif - if ((udp->uh_sum = in6_cksum(o_pak, IPPROTO_UDP, sizeof(struct ip6_hdr), packet_length - sizeof(struct ip6_hdr))) == 0) { - udp->uh_sum = 0xffff; - } - } else { + if ((udp->uh_sum = in6_cksum(o_pak, IPPROTO_UDP, sizeof(struct ip6_hdr), packet_length - sizeof(struct ip6_hdr))) == 0) { + udp->uh_sum = 0xffff; + } + } else { #if defined(SCTP_WITH_NO_CSUM) - SCTP_STAT_INCR(sctps_sendnocrc); + SCTP_STAT_INCR(sctps_sendnocrc); #else - m->m_pkthdr.csum_flags = CSUM_SCTP; - m->m_pkthdr.csum_data = 0; - SCTP_STAT_INCR(sctps_sendhwcrc); + sctphdr->checksum = sctp_calculate_cksum(m, sizeof(struct ip6_hdr)); + SCTP_STAT_INCR(sctps_sendswcrc); #endif - } - /* send it out. table id is taken from stcb */ -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) - if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) { - so = SCTP_INP_SO(inp); - SCTP_SOCKET_UNLOCK(so, 0); - } + } + /* send it out. table id is taken from stcb */ +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) + if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) { + so = SCTP_INP_SO(inp); + SCTP_SOCKET_UNLOCK(so, 0); + } #endif - SCTP_IP6_OUTPUT(ret, o_pak, (struct route_in6 *)ro, &ifp, stcb, vrf_id); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) - if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) { - atomic_add_int(&stcb->asoc.refcnt, 1); - SCTP_TCB_UNLOCK(stcb); - SCTP_SOCKET_LOCK(so, 0); - SCTP_TCB_LOCK(stcb); - atomic_subtract_int(&stcb->asoc.refcnt, 1); - } +#ifdef SCTP_PACKET_LOGGING + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) + sctp_packet_log(o_pak); #endif - if (net) { - /* for link local this must be done */ - sin6->sin6_scope_id = prev_scope; - sin6->sin6_port = prev_port; - } - SCTPDBG(SCTP_DEBUG_OUTPUT3, "return from send is %d\n", ret); - SCTP_STAT_INCR(sctps_sendpackets); - SCTP_STAT_INCR_COUNTER64(sctps_outpackets); - if (ret) { - SCTP_STAT_INCR(sctps_senderrors); - } - if (net == NULL) { - /* Now if we had a temp route free it */ - if (ro->ro_rt) { - RTFREE(ro->ro_rt); + SCTP_IP6_OUTPUT(ret, o_pak, (struct route_in6 *)ro, &ifp, stcb, vrf_id); +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) + if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) { + atomic_add_int(&stcb->asoc.refcnt, 1); + SCTP_TCB_UNLOCK(stcb); + SCTP_SOCKET_LOCK(so, 0); + SCTP_TCB_LOCK(stcb); + atomic_subtract_int(&stcb->asoc.refcnt, 1); } - } else { - /* PMTU check versus smallest asoc MTU goes here */ - if (ro->ro_rt == NULL) { - /* Route was freed */ - if (net->ro._s_addr && - net->src_addr_selected) { - sctp_free_ifa(net->ro._s_addr); - net->ro._s_addr = NULL; +#endif + if (net) { + /* for link local this must be done */ + sin6->sin6_scope_id = prev_scope; + sin6->sin6_port = prev_port; + } + SCTPDBG(SCTP_DEBUG_OUTPUT3, "return from send is %d\n", ret); + SCTP_STAT_INCR(sctps_sendpackets); + SCTP_STAT_INCR_COUNTER64(sctps_outpackets); + if (ret) { + SCTP_STAT_INCR(sctps_senderrors); + } + if (net == NULL) { + /* Now if we had a temp route free it */ + if (ro->ro_rt) { + RTFREE(ro->ro_rt); } - net->src_addr_selected = 0; - } - if ((ro->ro_rt != NULL) && - (net->ro._s_addr)) { - uint32_t mtu; - - mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_rt); - if (mtu && - (stcb->asoc.smallest_mtu > mtu)) { - sctp_mtu_size_reset(inp, &stcb->asoc, mtu); - net->mtu = mtu; - if (net->port) { - net->mtu -= sizeof(struct udphdr); + } else { + /* + * PMTU check versus smallest asoc MTU goes + * here + */ + if (ro->ro_rt == NULL) { + /* Route was freed */ + if (net->ro._s_addr && + net->src_addr_selected) { + sctp_free_ifa(net->ro._s_addr); + net->ro._s_addr = NULL; } + net->src_addr_selected = 0; } - } else if (ifp) { - if (ND_IFINFO(ifp)->linkmtu && - (stcb->asoc.smallest_mtu > ND_IFINFO(ifp)->linkmtu)) { - sctp_mtu_size_reset(inp, - &stcb->asoc, - ND_IFINFO(ifp)->linkmtu); + if ((ro->ro_rt != NULL) && + (net->ro._s_addr)) { + uint32_t mtu; + + mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_rt); + if (mtu && + (stcb->asoc.smallest_mtu > mtu)) { + sctp_mtu_size_reset(inp, &stcb->asoc, mtu); + net->mtu = mtu; + if (net->port) { + net->mtu -= sizeof(struct udphdr); + } + } + } else if (ifp) { + if (ND_IFINFO(ifp)->linkmtu && + (stcb->asoc.smallest_mtu > ND_IFINFO(ifp)->linkmtu)) { + sctp_mtu_size_reset(inp, + &stcb->asoc, + ND_IFINFO(ifp)->linkmtu); + } } } + return (ret); } - return (ret); - } #endif - else { + default: SCTPDBG(SCTP_DEBUG_OUTPUT1, "Unknown protocol (TSNH) type %d\n", ((struct sockaddr *)to)->sa_family); sctp_m_freem(m); @@ -4162,22 +4546,22 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked #endif ) { - struct mbuf *m, *m_at, *mp_last; + struct mbuf *m; struct sctp_nets *net; struct sctp_init_chunk *init; + +#if defined(INET) || defined(INET6) struct sctp_supported_addr_param *sup_addr; + +#endif struct sctp_adaptation_layer_indication *ali; - struct sctp_ecn_supported_param *ecn; - struct sctp_prsctp_supported_param *prsctp; - struct sctp_ecn_nonce_supported_param *ecn_nonce; struct sctp_supported_chunk_types_param *pr_supported; + struct sctp_paramhdr *ph; int cnt_inits_to = 0; - int padval, ret; - int num_ext; - int p_len; + int ret; + uint16_t num_ext, chunk_len, padding_len, parameter_len; /* INIT's always go to the primary (and usually ONLY address) */ - mp_last = NULL; net = stcb->asoc.primary_destination; if (net == NULL) { net = TAILQ_FIRST(&stcb->asoc.nets); @@ -4194,15 +4578,12 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked } SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT\n"); #ifdef INET6 - if (((struct sockaddr *)&(net->ro._l_addr))->sa_family == AF_INET6) { + if (net->ro._l_addr.sa.sa_family == AF_INET6) { /* * special hook, if we are sending to link local it will not * show up in our private address count. */ - struct sockaddr_in6 *sin6l; - - sin6l = &net->ro._l_addr.sin6; - if (IN6_IS_ADDR_LINKLOCAL(&sin6l->sin6_addr)) + if (IN6_IS_ADDR_LINKLOCAL(&net->ro._l_addr.sin6.sin6_addr)) cnt_inits_to = 1; } #endif @@ -4220,14 +4601,15 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - mbuf?\n"); return; } - SCTP_BUF_LEN(m) = sizeof(struct sctp_init_chunk); + chunk_len = (uint16_t) sizeof(struct sctp_init_chunk); + padding_len = 0; /* * assume peer supports asconf in order to be able to queue local * address changes while an INIT is in flight and before the assoc * is established. */ stcb->asoc.peer_supports_asconf = 1; - /* Now lets put the SCTP header in place */ + /* Now lets put the chunk header in place */ init = mtod(m, struct sctp_init_chunk *); /* now the chunk header */ init->ch.chunk_type = SCTP_INITIATION; @@ -4239,76 +4621,104 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked /* set up some of the credits. */ init->init.a_rwnd = htonl(max(inp->sctp_socket ? SCTP_SB_LIMIT_RCV(inp->sctp_socket) : 0, SCTP_MINIMAL_RWND)); - init->init.num_outbound_streams = htons(stcb->asoc.pre_open_streams); init->init.num_inbound_streams = htons(stcb->asoc.max_inbound_streams); init->init.initial_tsn = htonl(stcb->asoc.init_seq_number); - /* now the address restriction */ - sup_addr = (struct sctp_supported_addr_param *)((caddr_t)init + - sizeof(*init)); - sup_addr->ph.param_type = htons(SCTP_SUPPORTED_ADDRTYPE); -#ifdef INET6 - /* we support 2 types: IPv6/IPv4 */ - sup_addr->ph.param_length = htons(sizeof(*sup_addr) + sizeof(uint16_t)); - sup_addr->addr_type[0] = htons(SCTP_IPV4_ADDRESS); - sup_addr->addr_type[1] = htons(SCTP_IPV6_ADDRESS); -#else - /* we support 1 type: IPv4 */ - sup_addr->ph.param_length = htons(sizeof(*sup_addr) + sizeof(uint8_t)); - sup_addr->addr_type[0] = htons(SCTP_IPV4_ADDRESS); - sup_addr->addr_type[1] = htons(0); /* this is the padding */ -#endif - SCTP_BUF_LEN(m) += sizeof(*sup_addr) + sizeof(uint16_t); - /* adaptation layer indication parameter */ - ali = (struct sctp_adaptation_layer_indication *)((caddr_t)sup_addr + sizeof(*sup_addr) + sizeof(uint16_t)); - ali->ph.param_type = htons(SCTP_ULP_ADAPTATION); - ali->ph.param_length = htons(sizeof(*ali)); - ali->indication = ntohl(inp->sctp_ep.adaptation_layer_indicator); - SCTP_BUF_LEN(m) += sizeof(*ali); - ecn = (struct sctp_ecn_supported_param *)((caddr_t)ali + sizeof(*ali)); - if (SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly)) { - /* Add NAT friendly parameter */ - struct sctp_paramhdr *ph; + if (stcb->asoc.scope.ipv4_addr_legal || stcb->asoc.scope.ipv6_addr_legal) { + uint8_t i; - ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m)); + parameter_len = (uint16_t) sizeof(struct sctp_paramhdr); + if (stcb->asoc.scope.ipv4_addr_legal) { + parameter_len += (uint16_t) sizeof(uint16_t); + } + if (stcb->asoc.scope.ipv6_addr_legal) { + parameter_len += (uint16_t) sizeof(uint16_t); + } + sup_addr = (struct sctp_supported_addr_param *)(mtod(m, caddr_t)+chunk_len); + sup_addr->ph.param_type = htons(SCTP_SUPPORTED_ADDRTYPE); + sup_addr->ph.param_length = htons(parameter_len); + i = 0; + if (stcb->asoc.scope.ipv4_addr_legal) { + sup_addr->addr_type[i++] = htons(SCTP_IPV4_ADDRESS); + } + if (stcb->asoc.scope.ipv6_addr_legal) { + sup_addr->addr_type[i++] = htons(SCTP_IPV6_ADDRESS); + } + padding_len = 4 - 2 * i; + chunk_len += parameter_len; + } + /* Adaptation layer indication parameter */ + if (inp->sctp_ep.adaptation_layer_indicator_provided) { + if (padding_len > 0) { + memset(mtod(m, caddr_t)+chunk_len, 0, padding_len); + chunk_len += padding_len; + padding_len = 0; + } + parameter_len = (uint16_t) sizeof(struct sctp_adaptation_layer_indication); + ali = (struct sctp_adaptation_layer_indication *)(mtod(m, caddr_t)+chunk_len); + ali->ph.param_type = htons(SCTP_ULP_ADAPTATION); + ali->ph.param_length = htons(parameter_len); + ali->indication = ntohl(inp->sctp_ep.adaptation_layer_indicator); + chunk_len += parameter_len; + } + if (SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly)) { + /* Add NAT friendly parameter. */ + if (padding_len > 0) { + memset(mtod(m, caddr_t)+chunk_len, 0, padding_len); + chunk_len += padding_len; + padding_len = 0; + } + parameter_len = (uint16_t) sizeof(struct sctp_paramhdr); + ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len); ph->param_type = htons(SCTP_HAS_NAT_SUPPORT); - ph->param_length = htons(sizeof(struct sctp_paramhdr)); - SCTP_BUF_LEN(m) += sizeof(struct sctp_paramhdr); - ecn = (struct sctp_ecn_supported_param *)((caddr_t)ph + sizeof(*ph)); + ph->param_length = htons(parameter_len); + chunk_len += parameter_len; } /* now any cookie time extensions */ if (stcb->asoc.cookie_preserve_req) { struct sctp_cookie_perserve_param *cookie_preserve; - cookie_preserve = (struct sctp_cookie_perserve_param *)(ecn); + if (padding_len > 0) { + memset(mtod(m, caddr_t)+chunk_len, 0, padding_len); + chunk_len += padding_len; + padding_len = 0; + } + parameter_len = (uint16_t) sizeof(struct sctp_cookie_perserve_param); + cookie_preserve = (struct sctp_cookie_perserve_param *)(mtod(m, caddr_t)+chunk_len); cookie_preserve->ph.param_type = htons(SCTP_COOKIE_PRESERVE); - cookie_preserve->ph.param_length = htons( - sizeof(*cookie_preserve)); + cookie_preserve->ph.param_length = htons(parameter_len); cookie_preserve->time = htonl(stcb->asoc.cookie_preserve_req); - SCTP_BUF_LEN(m) += sizeof(*cookie_preserve); - ecn = (struct sctp_ecn_supported_param *)( - (caddr_t)cookie_preserve + sizeof(*cookie_preserve)); stcb->asoc.cookie_preserve_req = 0; + chunk_len += parameter_len; } /* ECN parameter */ - if (SCTP_BASE_SYSCTL(sctp_ecn_enable) == 1) { - ecn->ph.param_type = htons(SCTP_ECN_CAPABLE); - ecn->ph.param_length = htons(sizeof(*ecn)); - SCTP_BUF_LEN(m) += sizeof(*ecn); - prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn + - sizeof(*ecn)); - } else { - prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn); - } - /* And now tell the peer we do pr-sctp */ - prsctp->ph.param_type = htons(SCTP_PRSCTP_SUPPORTED); - prsctp->ph.param_length = htons(sizeof(*prsctp)); - SCTP_BUF_LEN(m) += sizeof(*prsctp); + if (stcb->asoc.ecn_allowed == 1) { + if (padding_len > 0) { + memset(mtod(m, caddr_t)+chunk_len, 0, padding_len); + chunk_len += padding_len; + padding_len = 0; + } + parameter_len = (uint16_t) sizeof(struct sctp_paramhdr); + ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len); + ph->param_type = htons(SCTP_ECN_CAPABLE); + ph->param_length = htons(parameter_len); + chunk_len += parameter_len; + } + /* And now tell the peer we do support PR-SCTP. */ + if (padding_len > 0) { + memset(mtod(m, caddr_t)+chunk_len, 0, padding_len); + chunk_len += padding_len; + padding_len = 0; + } + parameter_len = (uint16_t) sizeof(struct sctp_paramhdr); + ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len); + ph->param_type = htons(SCTP_PRSCTP_SUPPORTED); + ph->param_length = htons(parameter_len); + chunk_len += parameter_len; /* And now tell the peer we do all the extensions */ - pr_supported = (struct sctp_supported_chunk_types_param *) - ((caddr_t)prsctp + sizeof(*prsctp)); + pr_supported = (struct sctp_supported_chunk_types_param *)(mtod(m, caddr_t)+chunk_len); pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT); num_ext = 0; pr_supported->chunk_types[num_ext++] = SCTP_ASCONF; @@ -4322,118 +4732,100 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked if (stcb->asoc.sctp_nr_sack_on_off == 1) { pr_supported->chunk_types[num_ext++] = SCTP_NR_SELECTIVE_ACK; } - p_len = sizeof(*pr_supported) + num_ext; - pr_supported->ph.param_length = htons(p_len); - bzero((caddr_t)pr_supported + p_len, SCTP_SIZE32(p_len) - p_len); - SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len); + parameter_len = (uint16_t) sizeof(struct sctp_supported_chunk_types_param) + num_ext; + pr_supported->ph.param_length = htons(parameter_len); + padding_len = SCTP_SIZE32(parameter_len) - parameter_len; + chunk_len += parameter_len; - - /* ECN nonce: And now tell the peer we support ECN nonce */ - if (SCTP_BASE_SYSCTL(sctp_ecn_nonce)) { - ecn_nonce = (struct sctp_ecn_nonce_supported_param *) - ((caddr_t)pr_supported + SCTP_SIZE32(p_len)); - ecn_nonce->ph.param_type = htons(SCTP_ECN_NONCE_SUPPORTED); - ecn_nonce->ph.param_length = htons(sizeof(*ecn_nonce)); - SCTP_BUF_LEN(m) += sizeof(*ecn_nonce); - } /* add authentication parameters */ if (!SCTP_BASE_SYSCTL(sctp_auth_disable)) { - struct sctp_auth_random *randp; - struct sctp_auth_hmac_algo *hmacs; - struct sctp_auth_chunk_list *chunks; - /* attach RANDOM parameter, if available */ if (stcb->asoc.authinfo.random != NULL) { - randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m)); - p_len = sizeof(*randp) + stcb->asoc.authinfo.random_len; + struct sctp_auth_random *randp; + + if (padding_len > 0) { + memset(mtod(m, caddr_t)+chunk_len, 0, padding_len); + chunk_len += padding_len; + padding_len = 0; + } + randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+chunk_len); + parameter_len = (uint16_t) sizeof(struct sctp_auth_random) + stcb->asoc.authinfo.random_len; /* random key already contains the header */ - bcopy(stcb->asoc.authinfo.random->key, randp, p_len); - /* zero out any padding required */ - bzero((caddr_t)randp + p_len, SCTP_SIZE32(p_len) - p_len); - SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len); + memcpy(randp, stcb->asoc.authinfo.random->key, parameter_len); + padding_len = SCTP_SIZE32(parameter_len) - parameter_len; + chunk_len += parameter_len; } /* add HMAC_ALGO parameter */ - hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m)); - p_len = sctp_serialize_hmaclist(stcb->asoc.local_hmacs, - (uint8_t *) hmacs->hmac_ids); - if (p_len > 0) { - p_len += sizeof(*hmacs); + if ((stcb->asoc.local_hmacs != NULL) && + (stcb->asoc.local_hmacs->num_algo > 0)) { + struct sctp_auth_hmac_algo *hmacs; + + if (padding_len > 0) { + memset(mtod(m, caddr_t)+chunk_len, 0, padding_len); + chunk_len += padding_len; + padding_len = 0; + } + hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+chunk_len); + parameter_len = (uint16_t) (sizeof(struct sctp_auth_hmac_algo) + + stcb->asoc.local_hmacs->num_algo * sizeof(uint16_t)); hmacs->ph.param_type = htons(SCTP_HMAC_LIST); - hmacs->ph.param_length = htons(p_len); - /* zero out any padding required */ - bzero((caddr_t)hmacs + p_len, SCTP_SIZE32(p_len) - p_len); - SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len); + hmacs->ph.param_length = htons(parameter_len); + sctp_serialize_hmaclist(stcb->asoc.local_hmacs, (uint8_t *) hmacs->hmac_ids); + padding_len = SCTP_SIZE32(parameter_len) - parameter_len; + chunk_len += parameter_len; } /* add CHUNKS parameter */ - chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m)); - p_len = sctp_serialize_auth_chunks(stcb->asoc.local_auth_chunks, - chunks->chunk_types); - if (p_len > 0) { - p_len += sizeof(*chunks); + if (sctp_auth_get_chklist_size(stcb->asoc.local_auth_chunks) > 0) { + struct sctp_auth_chunk_list *chunks; + + if (padding_len > 0) { + memset(mtod(m, caddr_t)+chunk_len, 0, padding_len); + chunk_len += padding_len; + padding_len = 0; + } + chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+chunk_len); + parameter_len = (uint16_t) (sizeof(struct sctp_auth_chunk_list) + + sctp_auth_get_chklist_size(stcb->asoc.local_auth_chunks)); chunks->ph.param_type = htons(SCTP_CHUNK_LIST); - chunks->ph.param_length = htons(p_len); - /* zero out any padding required */ - bzero((caddr_t)chunks + p_len, SCTP_SIZE32(p_len) - p_len); - SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len); + chunks->ph.param_length = htons(parameter_len); + sctp_serialize_auth_chunks(stcb->asoc.local_auth_chunks, chunks->chunk_types); + padding_len = SCTP_SIZE32(parameter_len) - parameter_len; + chunk_len += parameter_len; } } - m_at = m; - /* now the addresses */ - { - struct sctp_scoping scp; - - /* - * To optimize this we could put the scoping stuff into a - * structure and remove the individual uint8's from the - * assoc structure. Then we could just sifa in the address - * within the stcb.. but for now this is a quick hack to get - * the address stuff teased apart. - */ - scp.ipv4_addr_legal = stcb->asoc.ipv4_addr_legal; - scp.ipv6_addr_legal = stcb->asoc.ipv6_addr_legal; - scp.loopback_scope = stcb->asoc.loopback_scope; - scp.ipv4_local_scope = stcb->asoc.ipv4_local_scope; - scp.local_scope = stcb->asoc.local_scope; - scp.site_scope = stcb->asoc.site_scope; - - m_at = sctp_add_addresses_to_i_ia(inp, &scp, m_at, cnt_inits_to); - } + SCTP_BUF_LEN(m) = chunk_len; - /* calulate the size and update pkt header and chunk header */ - p_len = 0; - for (m_at = m; m_at; m_at = SCTP_BUF_NEXT(m_at)) { - if (SCTP_BUF_NEXT(m_at) == NULL) - mp_last = m_at; - p_len += SCTP_BUF_LEN(m_at); - } - init->ch.chunk_length = htons(p_len); + /* now the addresses */ /* - * We sifa 0 here to NOT set IP_DF if its IPv4, we ignore the return - * here since the timer will drive a retranmission. + * To optimize this we could put the scoping stuff into a structure + * and remove the individual uint8's from the assoc structure. Then + * we could just sifa in the address within the stcb. But for now + * this is a quick hack to get the address stuff teased apart. */ + sctp_add_addresses_to_i_ia(inp, stcb, &stcb->asoc.scope, m, cnt_inits_to, &padding_len, &chunk_len); - /* I don't expect this to execute but we will be safe here */ - padval = p_len % 4; - if ((padval) && (mp_last)) { - /* - * The compiler worries that mp_last may not be set even - * though I think it is impossible :-> however we add - * mp_last here just in case. - */ - ret = sctp_add_pad_tombuf(mp_last, (4 - padval)); - if (ret) { - /* Houston we have a problem, no space */ + init->ch.chunk_length = htons(chunk_len); + if (padding_len > 0) { + struct mbuf *m_at, *mp_last; + + mp_last = NULL; + for (m_at = m; m_at; m_at = SCTP_BUF_NEXT(m_at)) { + if (SCTP_BUF_NEXT(m_at) == NULL) + mp_last = m_at; + } + if ((mp_last == NULL) || sctp_add_pad_tombuf(mp_last, padding_len)) { sctp_m_freem(m); return; } - p_len += padval; } SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - calls lowlevel_output\n"); ret = sctp_lowlevel_chunk_output(inp, stcb, net, (struct sockaddr *)&net->ro._l_addr, - m, 0, NULL, 0, 0, 0, NULL, 0, + m, 0, NULL, 0, 0, 0, 0, inp->sctp_lport, stcb->rport, htonl(0), - net->port, so_locked, NULL); + net->port, NULL, + 0, 0, + so_locked); SCTPDBG(SCTP_DEBUG_OUTPUT4, "lowlevel_output - %d\n", ret); SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time); @@ -4557,11 +4949,10 @@ sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt, case SCTP_HAS_NAT_SUPPORT: *nat_friendly = 1; /* fall through */ - case SCTP_ECN_NONCE_SUPPORTED: case SCTP_PRSCTP_SUPPORTED: if (padded_size != sizeof(struct sctp_paramhdr)) { - SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ecnnonce/prsctp/nat support %d\n", plen); + SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error prsctp/nat support %d\n", plen); goto invalid_size; } at += padded_size; @@ -4648,7 +5039,6 @@ sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt, return (NULL); } m_copyback(op_err, err_at, plen, (caddr_t)phdr); - err_at += plen; } return (op_err); break; @@ -4781,7 +5171,7 @@ invalid_size: static int sctp_are_there_new_addresses(struct sctp_association *asoc, - struct mbuf *in_initpkt, int iphlen, int offset) + struct mbuf *in_initpkt, int offset, struct sockaddr *src) { /* * Given a INIT packet, look through the packet to verify that there @@ -4790,75 +5180,56 @@ sctp_are_there_new_addresses(struct sctp_association *asoc, * must return (1) to drop the packet if we see a un-understood * parameter that tells us to drop the chunk. */ - struct sockaddr_in sin4, *sa4; - -#ifdef INET6 - struct sockaddr_in6 sin6, *sa6; - -#endif struct sockaddr *sa_touse; struct sockaddr *sa; struct sctp_paramhdr *phdr, params; - struct ip *iph; - -#ifdef INET6 - struct ip6_hdr *ip6h; - -#endif - struct mbuf *mat; uint16_t ptype, plen; - int err_at; uint8_t fnd; struct sctp_nets *net; - memset(&sin4, 0, sizeof(sin4)); +#ifdef INET + struct sockaddr_in sin4, *sa4; + +#endif #ifdef INET6 - memset(&sin6, 0, sizeof(sin6)); + struct sockaddr_in6 sin6, *sa6; + #endif + +#ifdef INET + memset(&sin4, 0, sizeof(sin4)); sin4.sin_family = AF_INET; sin4.sin_len = sizeof(sin4); +#endif #ifdef INET6 + memset(&sin6, 0, sizeof(sin6)); sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(sin6); #endif - sa_touse = NULL; /* First what about the src address of the pkt ? */ - iph = mtod(in_initpkt, struct ip *); - switch (iph->ip_v) { - case IPVERSION: - /* source addr is IPv4 */ - sin4.sin_addr = iph->ip_src; - sa_touse = (struct sockaddr *)&sin4; - break; -#ifdef INET6 - case IPV6_VERSION >> 4: - /* source addr is IPv6 */ - ip6h = mtod(in_initpkt, struct ip6_hdr *); - sin6.sin6_addr = ip6h->ip6_src; - sa_touse = (struct sockaddr *)&sin6; - break; -#endif - default: - return (1); - } - fnd = 0; TAILQ_FOREACH(net, &asoc->nets, sctp_next) { sa = (struct sockaddr *)&net->ro._l_addr; - if (sa->sa_family == sa_touse->sa_family) { + if (sa->sa_family == src->sa_family) { +#ifdef INET if (sa->sa_family == AF_INET) { + struct sockaddr_in *src4; + sa4 = (struct sockaddr_in *)sa; - if (sa4->sin_addr.s_addr == - sin4.sin_addr.s_addr) { + src4 = (struct sockaddr_in *)src; + if (sa4->sin_addr.s_addr == src4->sin_addr.s_addr) { fnd = 1; break; } } +#endif #ifdef INET6 if (sa->sa_family == AF_INET6) { + struct sockaddr_in6 *src6; + sa6 = (struct sockaddr_in6 *)sa; - if (SCTP6_ARE_ADDR_EQUAL(sa6, - &sin6)) { + src6 = (struct sockaddr_in6 *)src; + if (SCTP6_ARE_ADDR_EQUAL(sa6, src6)) { fnd = 1; break; } @@ -4871,41 +5242,51 @@ sctp_are_there_new_addresses(struct sctp_association *asoc, return (1); } /* Ok so far lets munge through the rest of the packet */ - mat = in_initpkt; - err_at = 0; - sa_touse = NULL; offset += sizeof(struct sctp_init_chunk); - phdr = sctp_get_next_param(mat, offset, ¶ms, sizeof(params)); + phdr = sctp_get_next_param(in_initpkt, offset, ¶ms, sizeof(params)); while (phdr) { + sa_touse = NULL; ptype = ntohs(phdr->param_type); plen = ntohs(phdr->param_length); - if (ptype == SCTP_IPV4_ADDRESS) { - struct sctp_ipv4addr_param *p4, p4_buf; + switch (ptype) { +#ifdef INET + case SCTP_IPV4_ADDRESS: + { + struct sctp_ipv4addr_param *p4, p4_buf; - phdr = sctp_get_next_param(mat, offset, - (struct sctp_paramhdr *)&p4_buf, sizeof(p4_buf)); - if (plen != sizeof(struct sctp_ipv4addr_param) || - phdr == NULL) { - return (1); + phdr = sctp_get_next_param(in_initpkt, offset, + (struct sctp_paramhdr *)&p4_buf, sizeof(p4_buf)); + if (plen != sizeof(struct sctp_ipv4addr_param) || + phdr == NULL) { + return (1); + } + p4 = (struct sctp_ipv4addr_param *)phdr; + sin4.sin_addr.s_addr = p4->addr; + sa_touse = (struct sockaddr *)&sin4; + break; } - p4 = (struct sctp_ipv4addr_param *)phdr; - sin4.sin_addr.s_addr = p4->addr; - sa_touse = (struct sockaddr *)&sin4; - } else if (ptype == SCTP_IPV6_ADDRESS) { - struct sctp_ipv6addr_param *p6, p6_buf; +#endif +#ifdef INET6 + case SCTP_IPV6_ADDRESS: + { + struct sctp_ipv6addr_param *p6, p6_buf; - phdr = sctp_get_next_param(mat, offset, - (struct sctp_paramhdr *)&p6_buf, sizeof(p6_buf)); - if (plen != sizeof(struct sctp_ipv6addr_param) || - phdr == NULL) { - return (1); + phdr = sctp_get_next_param(in_initpkt, offset, + (struct sctp_paramhdr *)&p6_buf, sizeof(p6_buf)); + if (plen != sizeof(struct sctp_ipv6addr_param) || + phdr == NULL) { + return (1); + } + p6 = (struct sctp_ipv6addr_param *)phdr; + memcpy((caddr_t)&sin6.sin6_addr, p6->addr, + sizeof(p6->addr)); + sa_touse = (struct sockaddr *)&sin6; + break; } - p6 = (struct sctp_ipv6addr_param *)phdr; -#ifdef INET6 - memcpy((caddr_t)&sin6.sin6_addr, p6->addr, - sizeof(p6->addr)); #endif - sa_touse = (struct sockaddr *)&sin4; + default: + sa_touse = NULL; + break; } if (sa_touse) { /* ok, sa_touse points to one to check */ @@ -4915,6 +5296,7 @@ sctp_are_there_new_addresses(struct sctp_association *asoc, if (sa->sa_family != sa_touse->sa_family) { continue; } +#ifdef INET if (sa->sa_family == AF_INET) { sa4 = (struct sockaddr_in *)sa; if (sa4->sin_addr.s_addr == @@ -4923,6 +5305,7 @@ sctp_are_there_new_addresses(struct sctp_association *asoc, break; } } +#endif #ifdef INET6 if (sa->sa_family == AF_INET6) { sa6 = (struct sockaddr_in6 *)sa; @@ -4940,7 +5323,7 @@ sctp_are_there_new_addresses(struct sctp_association *asoc, } } offset += SCTP_SIZE32(plen); - phdr = sctp_get_next_param(mat, offset, ¶ms, sizeof(params)); + phdr = sctp_get_next_param(in_initpkt, offset, ¶ms, sizeof(params)); } return (0); } @@ -4953,8 +5336,11 @@ sctp_are_there_new_addresses(struct sctp_association *asoc, */ void sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb, - struct mbuf *init_pkt, int iphlen, int offset, struct sctphdr *sh, - struct sctp_init_chunk *init_chk, uint32_t vrf_id, uint16_t port, int hold_inp_lock) + struct mbuf *init_pkt, int iphlen, int offset, + struct sockaddr *src, struct sockaddr *dst, + struct sctphdr *sh, struct sctp_init_chunk *init_chk, + uint8_t use_mflowid, uint32_t mflowid, + uint32_t vrf_id, uint16_t port, int hold_inp_lock) { struct sctp_association *asoc; struct mbuf *m, *m_at, *m_tmp, *m_cookie, *op_err, *mp_last; @@ -4962,19 +5348,19 @@ sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_adaptation_layer_indication *ali; struct sctp_ecn_supported_param *ecn; struct sctp_prsctp_supported_param *prsctp; - struct sctp_ecn_nonce_supported_param *ecn_nonce; struct sctp_supported_chunk_types_param *pr_supported; - union sctp_sockstore store, store1, *over_addr; - struct sockaddr_in *sin, *to_sin; + union sctp_sockstore *over_addr; -#ifdef INET6 - struct sockaddr_in6 *sin6, *to_sin6; +#ifdef INET + struct sockaddr_in *dst4 = (struct sockaddr_in *)dst; + struct sockaddr_in *src4 = (struct sockaddr_in *)src; + struct sockaddr_in *sin; #endif - struct ip *iph; - #ifdef INET6 - struct ip6_hdr *ip6; + struct sockaddr_in6 *dst6 = (struct sockaddr_in6 *)dst; + struct sockaddr_in6 *src6 = (struct sockaddr_in6 *)src; + struct sockaddr_in6 *sin6; #endif struct sockaddr *to; @@ -4989,21 +5375,24 @@ sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int nat_friendly = 0; struct socket *so; - if (stcb) + if (stcb) { asoc = &stcb->asoc; - else + } else { asoc = NULL; + } mp_last = NULL; if ((asoc != NULL) && (SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT) && - (sctp_are_there_new_addresses(asoc, init_pkt, iphlen, offset))) { + (sctp_are_there_new_addresses(asoc, init_pkt, offset, src))) { /* new addresses, out of here in non-cookie-wait states */ /* * Send a ABORT, we don't add the new address error clause * though we even set the T bit and copy in the 0 tag.. this * looks no different than if no listener was present. */ - sctp_send_abort(init_pkt, iphlen, sh, 0, NULL, vrf_id, port); + sctp_send_abort(init_pkt, iphlen, src, dst, sh, 0, NULL, + use_mflowid, mflowid, + vrf_id, port); return; } abort_flag = 0; @@ -5012,8 +5401,10 @@ sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb, &abort_flag, (struct sctp_chunkhdr *)init_chk, &nat_friendly); if (abort_flag) { do_a_abort: - sctp_send_abort(init_pkt, iphlen, sh, - init_chk->init.initiate_tag, op_err, vrf_id, port); + sctp_send_abort(init_pkt, iphlen, src, dst, sh, + init_chk->init.initiate_tag, op_err, + use_mflowid, mflowid, + vrf_id, port); return; } m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA); @@ -5025,6 +5416,14 @@ do_a_abort: } SCTP_BUF_LEN(m) = sizeof(struct sctp_init_chunk); + /* + * We might not overwrite the identification[] completely and on + * some platforms time_entered will contain some padding. Therefore + * zero out the cookie to avoid putting uninitialized memory on the + * wire. + */ + memset(&stc, 0, sizeof(struct sctp_state_cookie)); + /* the time I built cookie */ (void)SCTP_GETTIME_TIMEVAL(&stc.time_entered); @@ -5052,79 +5451,35 @@ do_a_abort: */ stc.site_scope = stc.local_scope = stc.loopback_scope = 0; if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { - struct inpcb *in_inp; - - /* Its a V6 socket */ - in_inp = (struct inpcb *)inp; stc.ipv6_addr_legal = 1; - /* Now look at the binding flag to see if V4 will be legal */ - if (SCTP_IPV6_V6ONLY(in_inp) == 0) { - stc.ipv4_addr_legal = 1; - } else { - /* V4 addresses are NOT legal on the association */ + if (SCTP_IPV6_V6ONLY(inp)) { stc.ipv4_addr_legal = 0; + } else { + stc.ipv4_addr_legal = 1; } } else { - /* Its a V4 socket, no - V6 */ - stc.ipv4_addr_legal = 1; stc.ipv6_addr_legal = 0; + stc.ipv4_addr_legal = 1; } - #ifdef SCTP_DONT_DO_PRIVADDR_SCOPE stc.ipv4_scope = 1; #else stc.ipv4_scope = 0; #endif - /* now for scope setup */ - memset((caddr_t)&store, 0, sizeof(store)); - memset((caddr_t)&store1, 0, sizeof(store1)); - sin = &store.sin; - to_sin = &store1.sin; -#ifdef INET6 - sin6 = &store.sin6; - to_sin6 = &store1.sin6; -#endif - iph = mtod(init_pkt, struct ip *); - /* establish the to_addr's */ - switch (iph->ip_v) { - case IPVERSION: - to_sin->sin_port = sh->dest_port; - to_sin->sin_family = AF_INET; - to_sin->sin_len = sizeof(struct sockaddr_in); - to_sin->sin_addr = iph->ip_dst; - break; -#ifdef INET6 - case IPV6_VERSION >> 4: - ip6 = mtod(init_pkt, struct ip6_hdr *); - to_sin6->sin6_addr = ip6->ip6_dst; - to_sin6->sin6_scope_id = 0; - to_sin6->sin6_port = sh->dest_port; - to_sin6->sin6_family = AF_INET6; - to_sin6->sin6_len = sizeof(struct sockaddr_in6); - break; -#endif - default: - goto do_a_abort; - break; - }; - if (net == NULL) { - to = (struct sockaddr *)&store; - switch (iph->ip_v) { - case IPVERSION: + to = src; + switch (dst->sa_family) { +#ifdef INET + case AF_INET: { - sin->sin_family = AF_INET; - sin->sin_len = sizeof(struct sockaddr_in); - sin->sin_port = sh->src_port; - sin->sin_addr = iph->ip_src; /* lookup address */ - stc.address[0] = sin->sin_addr.s_addr; + stc.address[0] = src4->sin_addr.s_addr; stc.address[1] = 0; stc.address[2] = 0; stc.address[3] = 0; stc.addr_type = SCTP_IPV4_ADDRESS; /* local from address */ - stc.laddress[0] = to_sin->sin_addr.s_addr; + stc.laddress[0] = dst4->sin_addr.s_addr; stc.laddress[1] = 0; stc.laddress[2] = 0; stc.laddress[3] = 0; @@ -5132,14 +5487,14 @@ do_a_abort: /* scope_id is only for v6 */ stc.scope_id = 0; #ifndef SCTP_DONT_DO_PRIVADDR_SCOPE - if (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) { + if (IN4_ISPRIVATE_ADDRESS(&src4->sin_addr)) { stc.ipv4_scope = 1; } #else stc.ipv4_scope = 1; #endif /* SCTP_DONT_DO_PRIVADDR_SCOPE */ /* Must use the address in this case */ - if (sctp_is_address_on_local_host((struct sockaddr *)sin, vrf_id)) { + if (sctp_is_address_on_local_host(src, vrf_id)) { stc.loopback_scope = 1; stc.ipv4_scope = 1; stc.site_scope = 1; @@ -5147,33 +5502,19 @@ do_a_abort: } break; } +#endif #ifdef INET6 - case IPV6_VERSION >> 4: + case AF_INET6: { - ip6 = mtod(init_pkt, struct ip6_hdr *); - sin6->sin6_family = AF_INET6; - sin6->sin6_len = sizeof(struct sockaddr_in6); - sin6->sin6_port = sh->src_port; - sin6->sin6_addr = ip6->ip6_src; - /* lookup address */ - memcpy(&stc.address, &sin6->sin6_addr, - sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; stc.addr_type = SCTP_IPV6_ADDRESS; - stc.scope_id = 0; - if (sctp_is_address_on_local_host((struct sockaddr *)sin6, vrf_id)) { - /* - * FIX ME: does this have scope from - * rcvif? - */ - (void)sa6_recoverscope(sin6); - stc.scope_id = sin6->sin6_scope_id; - sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)); + memcpy(&stc.address, &src6->sin6_addr, sizeof(struct in6_addr)); + stc.scope_id = in6_getscope(&src6->sin6_addr); + if (sctp_is_address_on_local_host(src, vrf_id)) { stc.loopback_scope = 1; stc.local_scope = 0; stc.site_scope = 1; stc.ipv4_scope = 1; - } else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { + } else if (IN6_IS_ADDR_LINKLOCAL(&src6->sin6_addr)) { /* * If the new destination is a * LINK_LOCAL we must have common @@ -5198,14 +5539,7 @@ do_a_abort: * pull out the scope_id from * incoming pkt */ - /* - * FIX ME: does this have scope from - * rcvif? - */ - (void)sa6_recoverscope(sin6); - stc.scope_id = sin6->sin6_scope_id; - sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)); - } else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) { + } else if (IN6_IS_ADDR_SITELOCAL(&src6->sin6_addr)) { /* * If the new destination is * SITE_LOCAL then we must have site @@ -5213,7 +5547,7 @@ do_a_abort: */ stc.site_scope = 1; } - memcpy(&stc.laddress, &to_sin6->sin6_addr, sizeof(struct in6_addr)); + memcpy(&stc.laddress, &dst6->sin6_addr, sizeof(struct in6_addr)); stc.laddr_type = SCTP_IPV6_ADDRESS; break; } @@ -5231,10 +5565,10 @@ do_a_abort: #endif - stc.loopback_scope = asoc->loopback_scope; - stc.ipv4_scope = asoc->ipv4_local_scope; - stc.site_scope = asoc->site_scope; - stc.local_scope = asoc->local_scope; + stc.loopback_scope = asoc->scope.loopback_scope; + stc.ipv4_scope = asoc->scope.ipv4_local_scope; + stc.site_scope = asoc->scope.site_scope; + stc.local_scope = asoc->scope.local_scope; #ifdef INET6 /* Why do we not consider IPv4 LL addresses? */ TAILQ_FOREACH(lnet, &asoc->nets, sctp_next) { @@ -5252,6 +5586,7 @@ do_a_abort: /* use the net pointer */ to = (struct sockaddr *)&net->ro._l_addr; switch (to->sa_family) { +#ifdef INET case AF_INET: sin = (struct sockaddr_in *)to; stc.address[0] = sin->sin_addr.s_addr; @@ -5278,17 +5613,21 @@ do_a_abort: stc.laddress[2] = 0; stc.laddress[3] = 0; stc.laddr_type = SCTP_IPV4_ADDRESS; + /* scope_id is only for v6 */ + stc.scope_id = 0; break; +#endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)to; memcpy(&stc.address, &sin6->sin6_addr, sizeof(struct in6_addr)); stc.addr_type = SCTP_IPV6_ADDRESS; + stc.scope_id = sin6->sin6_scope_id; if (net->src_addr_selected == 0) { /* * strange case here, the INIT should have - * did the selection. + * done the selection. */ net->ro._s_addr = sctp_source_address_selection(inp, stcb, (sctp_route_t *) & net->ro, @@ -5312,6 +5651,7 @@ do_a_abort: /* who are we */ memcpy(stc.identification, SCTP_VERSION_STRING, min(strlen(SCTP_VERSION_STRING), sizeof(stc.identification))); + memset(stc.reserved, 0, SCTP_RESERVE_SPACE); /* now the chunk header */ initack->ch.chunk_type = SCTP_INITIATION_ACK; initack->ch.chunk_flags = 0; @@ -5392,20 +5732,25 @@ do_a_abort: /* I can have what I want :> */ initack->init.num_outbound_streams = htons(i_want); } - /* tell him his limt. */ + /* tell him his limit. */ initack->init.num_inbound_streams = htons(inp->sctp_ep.max_open_streams_intome); /* adaptation layer indication parameter */ - ali = (struct sctp_adaptation_layer_indication *)((caddr_t)initack + sizeof(*initack)); - ali->ph.param_type = htons(SCTP_ULP_ADAPTATION); - ali->ph.param_length = htons(sizeof(*ali)); - ali->indication = ntohl(inp->sctp_ep.adaptation_layer_indicator); - SCTP_BUF_LEN(m) += sizeof(*ali); - ecn = (struct sctp_ecn_supported_param *)((caddr_t)ali + sizeof(*ali)); + if (inp->sctp_ep.adaptation_layer_indicator_provided) { + ali = (struct sctp_adaptation_layer_indication *)((caddr_t)initack + sizeof(*initack)); + ali->ph.param_type = htons(SCTP_ULP_ADAPTATION); + ali->ph.param_length = htons(sizeof(*ali)); + ali->indication = ntohl(inp->sctp_ep.adaptation_layer_indicator); + SCTP_BUF_LEN(m) += sizeof(*ali); + ecn = (struct sctp_ecn_supported_param *)((caddr_t)ali + sizeof(*ali)); + } else { + ecn = (struct sctp_ecn_supported_param *)((caddr_t)initack + sizeof(*initack)); + } /* ECN parameter */ - if (SCTP_BASE_SYSCTL(sctp_ecn_enable) == 1) { + if (((asoc != NULL) && (asoc->ecn_allowed == 1)) || + (inp->sctp_ecn_enable == 1)) { ecn->ph.param_type = htons(SCTP_ECN_CAPABLE); ecn->ph.param_length = htons(sizeof(*ecn)); SCTP_BUF_LEN(m) += sizeof(*ecn); @@ -5446,14 +5791,6 @@ do_a_abort: bzero((caddr_t)pr_supported + p_len, SCTP_SIZE32(p_len) - p_len); SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len); - /* ECN nonce: And now tell the peer we support ECN nonce */ - if (SCTP_BASE_SYSCTL(sctp_ecn_nonce)) { - ecn_nonce = (struct sctp_ecn_nonce_supported_param *) - ((caddr_t)pr_supported + SCTP_SIZE32(p_len)); - ecn_nonce->ph.param_type = htons(SCTP_ECN_NONCE_SUPPORTED); - ecn_nonce->ph.param_length = htons(sizeof(*ecn_nonce)); - SCTP_BUF_LEN(m) += sizeof(*ecn_nonce); - } /* add authentication parameters */ if (!SCTP_BASE_SYSCTL(sctp_auth_disable)) { struct sctp_auth_random *randp; @@ -5515,7 +5852,7 @@ do_a_abort: scp.ipv4_local_scope = stc.ipv4_scope; scp.local_scope = stc.local_scope; scp.site_scope = stc.site_scope; - m_at = sctp_add_addresses_to_i_ia(inp, &scp, m_at, cnt_inits_to); + m_at = sctp_add_addresses_to_i_ia(inp, stcb, &scp, m_at, cnt_inits_to, NULL, NULL); } /* tack on the operational error if present */ @@ -5525,6 +5862,7 @@ do_a_abort: llen = 0; ol = op_err; + while (ol) { llen += SCTP_BUF_LEN(ol); ol = SCTP_BUF_NEXT(ol); @@ -5556,7 +5894,7 @@ do_a_abort: } /* Now we must build a cookie */ - m_cookie = sctp_add_cookie(inp, init_pkt, offset, m, 0, &stc, &signature); + m_cookie = sctp_add_cookie(init_pkt, offset, m, 0, &stc, &signature); if (m_cookie == NULL) { /* memory problem */ sctp_m_freem(m); @@ -5594,76 +5932,28 @@ do_a_abort: padval = p_len % 4; if ((padval) && (mp_last)) { /* see my previous comments on mp_last */ - int ret; - - ret = sctp_add_pad_tombuf(mp_last, (4 - padval)); - if (ret) { + if (sctp_add_pad_tombuf(mp_last, (4 - padval))) { /* Houston we have a problem, no space */ sctp_m_freem(m); return; } - p_len += padval; } if (stc.loopback_scope) { - over_addr = &store1; + over_addr = (union sctp_sockstore *)dst; } else { over_addr = NULL; } (void)sctp_lowlevel_chunk_output(inp, NULL, NULL, to, m, 0, NULL, 0, 0, - 0, NULL, 0, + 0, 0, inp->sctp_lport, sh->src_port, init_chk->init.initiate_tag, - port, SCTP_SO_NOT_LOCKED, over_addr); + port, over_addr, + use_mflowid, mflowid, + SCTP_SO_NOT_LOCKED); SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); } -void -sctp_insert_on_wheel(struct sctp_tcb *stcb, - struct sctp_association *asoc, - struct sctp_stream_out *strq, int holds_lock) -{ - if (holds_lock == 0) { - SCTP_TCB_SEND_LOCK(stcb); - } - if ((strq->next_spoke.tqe_next == NULL) && - (strq->next_spoke.tqe_prev == NULL)) { - TAILQ_INSERT_TAIL(&asoc->out_wheel, strq, next_spoke); - } - if (holds_lock == 0) { - SCTP_TCB_SEND_UNLOCK(stcb); - } -} - -void -sctp_remove_from_wheel(struct sctp_tcb *stcb, - struct sctp_association *asoc, - struct sctp_stream_out *strq, - int holds_lock) -{ - /* take off and then setup so we know it is not on the wheel */ - if (holds_lock == 0) { - SCTP_TCB_SEND_LOCK(stcb); - } - if (TAILQ_EMPTY(&strq->outqueue)) { - if (asoc->last_out_stream == strq) { - asoc->last_out_stream = TAILQ_PREV(asoc->last_out_stream, sctpwheel_listhead, next_spoke); - if (asoc->last_out_stream == NULL) { - asoc->last_out_stream = TAILQ_LAST(&asoc->out_wheel, sctpwheel_listhead); - } - if (asoc->last_out_stream == strq) { - asoc->last_out_stream = NULL; - } - } - TAILQ_REMOVE(&asoc->out_wheel, strq, next_spoke); - strq->next_spoke.tqe_next = NULL; - strq->next_spoke.tqe_prev = NULL; - } - if (holds_lock == 0) { - SCTP_TCB_SEND_UNLOCK(stcb); - } -} - static void sctp_prune_prsctp(struct sctp_tcb *stcb, struct sctp_association *asoc, @@ -5702,14 +5992,14 @@ sctp_prune_prsctp(struct sctp_tcb *stcb, * if the mbuf is here */ int ret_spc; - int cause; + uint8_t sent; if (chk->sent > SCTP_DATAGRAM_UNSENT) - cause = SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT; + sent = 1; else - cause = SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_UNSENT; + sent = 0; ret_spc = sctp_release_pr_sctp_chunk(stcb, chk, - cause, + sent, SCTP_SO_LOCKED); freed_spc += ret_spc; if (freed_spc >= dataout) { @@ -5720,9 +6010,7 @@ sctp_prune_prsctp(struct sctp_tcb *stcb, } /* if chunk has enabled */ } /* tailqforeach */ - chk = TAILQ_FIRST(&asoc->send_queue); - while (chk) { - nchk = TAILQ_NEXT(chk, sctp_next); + TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) { /* Here we must move to the sent queue and mark */ if (PR_SCTP_BUF_ENABLED(chk->flags)) { if (chk->rec.data.timetodrop.tv_sec >= (long)srcv->sinfo_timetolive) { @@ -5734,8 +6022,7 @@ sctp_prune_prsctp(struct sctp_tcb *stcb, int ret_spc; ret_spc = sctp_release_pr_sctp_chunk(stcb, chk, - SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_UNSENT, - SCTP_SO_LOCKED); + 0, SCTP_SO_LOCKED); freed_spc += ret_spc; if (freed_spc >= dataout) { @@ -5744,8 +6031,7 @@ sctp_prune_prsctp(struct sctp_tcb *stcb, } /* end if chk->data */ } /* end if right class */ } /* end if chk pr-sctp */ - chk = nchk; - } /* end while (chk) */ + } /* tailqforeachsafe (chk) */ } /* if enabled in asoc */ } @@ -5850,7 +6136,7 @@ sctp_msg_append(struct sctp_tcb *stcb, struct mbuf *m, struct sctp_sndrcvinfo *srcv, int hold_stcb_lock) { - int error = 0, holds_lock; + int error = 0; struct mbuf *at; struct sctp_stream_queue_pending *sp = NULL; struct sctp_stream_out *strm; @@ -5859,7 +6145,6 @@ sctp_msg_append(struct sctp_tcb *stcb, * Given an mbuf chain, put it into the association send queue and * place it on the wheel */ - holds_lock = hold_stcb_lock; if (srcv->sinfo_stream >= stcb->asoc.streamoutcnt) { /* Invalid stream number */ SCTP_LTRACE_ERR_RET_PKT(m, NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); @@ -5893,7 +6178,6 @@ sctp_msg_append(struct sctp_tcb *stcb, sp->timetolive = srcv->sinfo_timetolive; sp->ppid = srcv->sinfo_ppid; sp->context = srcv->sinfo_context; - sp->strseq = 0; if (sp->sinfo_flags & SCTP_ADDR_OVER) { sp->net = net; atomic_add_int(&sp->net->ref_count, 1); @@ -5907,35 +6191,38 @@ sctp_msg_append(struct sctp_tcb *stcb, sp->some_taken = 0; sp->data = m; sp->tail_mbuf = NULL; - sp->length = 0; - at = m; sctp_set_prsctp_policy(sp); /* * We could in theory (for sendall) sifa the length in, but we would * still have to hunt through the chain since we need to setup the * tail_mbuf */ - while (at) { + sp->length = 0; + for (at = m; at; at = SCTP_BUF_NEXT(at)) { if (SCTP_BUF_NEXT(at) == NULL) sp->tail_mbuf = at; sp->length += SCTP_BUF_LEN(at); - at = SCTP_BUF_NEXT(at); } - SCTP_TCB_SEND_LOCK(stcb); + if (srcv->sinfo_keynumber_valid) { + sp->auth_keyid = srcv->sinfo_keynumber; + } else { + sp->auth_keyid = stcb->asoc.authinfo.active_keyid; + } + if (sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks)) { + sctp_auth_key_acquire(stcb, sp->auth_keyid); + sp->holds_key_ref = 1; + } + if (hold_stcb_lock == 0) { + SCTP_TCB_SEND_LOCK(stcb); + } sctp_snd_sb_alloc(stcb, sp->length); atomic_add_int(&stcb->asoc.stream_queue_cnt, 1); TAILQ_INSERT_TAIL(&strm->outqueue, sp, next); - if ((srcv->sinfo_flags & SCTP_UNORDERED) == 0) { - sp->strseq = strm->next_sequence_sent; - strm->next_sequence_sent++; - } - if ((strm->next_spoke.tqe_next == NULL) && - (strm->next_spoke.tqe_prev == NULL)) { - /* Not on wheel, insert */ - sctp_insert_on_wheel(stcb, &stcb->asoc, strm, 1); - } + stcb->asoc.ss_functions.sctp_ss_add_to_stream(stcb, &stcb->asoc, strm, sp, 1); m = NULL; - SCTP_TCB_SEND_UNLOCK(stcb); + if (hold_stcb_lock == 0) { + SCTP_TCB_SEND_UNLOCK(stcb); + } out_now: if (m) { sctp_m_freem(m); @@ -6049,12 +6336,10 @@ error_out: if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { struct mbuf *mat; - mat = appendchain; - while (mat) { + for (mat = appendchain; mat; mat = SCTP_BUF_NEXT(mat)) { if (SCTP_BUF_IS_EXTENDED(mat)) { sctp_log_mb(mat, SCTP_MBUF_ICOPY); } - mat = SCTP_BUF_NEXT(mat); } } #endif @@ -6106,7 +6391,7 @@ error_out: } } -int +static int sctp_med_chunk_output(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_association *asoc, @@ -6121,7 +6406,7 @@ sctp_med_chunk_output(struct sctp_inpcb *inp, static void sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr, - uint32_t val) + uint32_t val SCTP_UNUSED) { struct sctp_copy_all *ca; struct mbuf *m; @@ -6129,6 +6414,7 @@ sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr, int added_control = 0; int un_sent, do_chunk_output = 1; struct sctp_association *asoc; + struct sctp_nets *net; ca = (struct sctp_copy_all *)ptr; if (ca->m == NULL) { @@ -6149,12 +6435,10 @@ sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr, if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { struct mbuf *mat; - mat = m; - while (mat) { + for (mat = m; mat; mat = SCTP_BUF_NEXT(mat)) { if (SCTP_BUF_IS_EXTENDED(mat)) { sctp_log_mb(mat, SCTP_MBUF_ICOPY); } - mat = SCTP_BUF_NEXT(mat); } } #endif @@ -6162,6 +6446,11 @@ sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr, m = NULL; } SCTP_TCB_LOCK_ASSERT(stcb); + if (stcb->asoc.alternate) { + net = stcb->asoc.alternate; + } else { + net = stcb->asoc.primary_destination; + } if (ca->sndrcv.sinfo_flags & SCTP_ABORT) { /* Abort this assoc with m as the user defined reason */ if (m) { @@ -6171,16 +6460,14 @@ sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr, if (m) { ph = mtod(m, struct sctp_paramhdr *); ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT); - ph->param_length = htons(ca->sndlen); + ph->param_length = htons(sizeof(struct sctp_paramhdr) + ca->sndlen); } /* * We add one here to keep the assoc from * dis-appearing on us. */ atomic_add_int(&stcb->asoc.refcnt, 1); - sctp_abort_an_association(inp, stcb, - SCTP_RESPONSE_TO_USER_REQ, - m, SCTP_SO_NOT_LOCKED); + sctp_abort_an_association(inp, stcb, m, SCTP_SO_NOT_LOCKED); /* * sctp_abort_an_association calls sctp_free_asoc() * free association will NOT free it since we @@ -6200,7 +6487,7 @@ sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr, } } else { if (m) { - ret = sctp_msg_append(stcb, stcb->asoc.primary_destination, m, + ret = sctp_msg_append(stcb, net, m, &ca->sndrcv, 1); } asoc = &stcb->asoc; @@ -6208,7 +6495,7 @@ sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr, /* shutdown this assoc */ int cnt; - cnt = sctp_is_there_unsent_data(stcb); + cnt = sctp_is_there_unsent_data(stcb, SCTP_SO_NOT_LOCKED); if (TAILQ_EMPTY(&asoc->send_queue) && TAILQ_EMPTY(&asoc->sent_queue) && @@ -6227,14 +6514,15 @@ sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr, * only send SHUTDOWN the first time * through */ - sctp_send_shutdown(stcb, stcb->asoc.primary_destination); if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); } SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT); SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING); + sctp_stop_timers_for_shutdown(stcb); + sctp_send_shutdown(stcb, net); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, - asoc->primary_destination); + net); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb, asoc->primary_destination); added_control = 1; @@ -6274,7 +6562,6 @@ sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr, abort_anyway: atomic_add_int(&stcb->asoc.refcnt, 1); sctp_abort_an_association(stcb->sctp_ep, stcb, - SCTP_RESPONSE_TO_USER_REQ, NULL, SCTP_SO_NOT_LOCKED); atomic_add_int(&stcb->asoc.refcnt, -1); goto no_chunk_output; @@ -6315,7 +6602,7 @@ no_chunk_output: } static void -sctp_sendall_completes(void *ptr, uint32_t val) +sctp_sendall_completes(void *ptr, uint32_t val SCTP_UNUSED) { struct sctp_copy_all *ca; @@ -6400,7 +6687,9 @@ sctp_sendall(struct sctp_inpcb *inp, struct uio *uio, struct mbuf *m, memset(ca, 0, sizeof(struct sctp_copy_all)); ca->inp = inp; - memcpy(&ca->sndrcv, srcv, sizeof(struct sctp_nonpad_sndrcvinfo)); + if (srcv) { + memcpy(&ca->sndrcv, srcv, sizeof(struct sctp_nonpad_sndrcvinfo)); + } /* * take off the sendall flag, it would be bad if we failed to do * this :-0 @@ -6447,9 +6736,7 @@ sctp_toss_old_cookies(struct sctp_tcb *stcb, struct sctp_association *asoc) { struct sctp_tmit_chunk *chk, *nchk; - chk = TAILQ_FIRST(&asoc->control_send_queue); - while (chk) { - nchk = TAILQ_NEXT(chk, sctp_next); + TAILQ_FOREACH_SAFE(chk, &asoc->control_send_queue, sctp_next, nchk) { if (chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) { TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next); if (chk->data) { @@ -6457,9 +6744,8 @@ sctp_toss_old_cookies(struct sctp_tcb *stcb, struct sctp_association *asoc) chk->data = NULL; } asoc->ctrl_queue_cnt--; - sctp_free_a_chunk(stcb, chk); + sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); } - chk = nchk; } } @@ -6467,19 +6753,16 @@ void sctp_toss_old_asconf(struct sctp_tcb *stcb) { struct sctp_association *asoc; - struct sctp_tmit_chunk *chk, *chk_tmp; + struct sctp_tmit_chunk *chk, *nchk; struct sctp_asconf_chunk *acp; asoc = &stcb->asoc; - for (chk = TAILQ_FIRST(&asoc->asconf_send_queue); chk != NULL; - chk = chk_tmp) { - /* get next chk */ - chk_tmp = TAILQ_NEXT(chk, sctp_next); + TAILQ_FOREACH_SAFE(chk, &asoc->asconf_send_queue, sctp_next, nchk) { /* find SCTP_ASCONF chunk in queue */ if (chk->rec.chunk_id.id == SCTP_ASCONF) { if (chk->data) { acp = mtod(chk->data, struct sctp_asconf_chunk *); - if (compare_with_wrap(ntohl(acp->serial_number), stcb->asoc.asconf_seq_out_acked, MAX_SEQ)) { + if (SCTP_TSN_GT(ntohl(acp->serial_number), asoc->asconf_seq_out_acked)) { /* Not Acked yet */ break; } @@ -6490,7 +6773,7 @@ sctp_toss_old_asconf(struct sctp_tcb *stcb) chk->data = NULL; } asoc->ctrl_queue_cnt--; - sctp_free_a_chunk(stcb, chk); + sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); } } } @@ -6520,6 +6803,7 @@ sctp_clean_up_datalist(struct sctp_tcb *stcb, } /* record time */ data_list[i]->sent_rcv_time = net->last_sent_time; + data_list[i]->rec.data.cwnd_at_send = net->cwnd; data_list[i]->rec.data.fast_retran_tsn = data_list[i]->rec.data.TSN_seq; if (data_list[i]->whoTo == NULL) { data_list[i]->whoTo = net; @@ -6527,8 +6811,7 @@ sctp_clean_up_datalist(struct sctp_tcb *stcb, } /* on to the sent queue */ tp1 = TAILQ_LAST(&asoc->sent_queue, sctpchunk_listhead); - if ((tp1) && (compare_with_wrap(tp1->rec.data.TSN_seq, - data_list[i]->rec.data.TSN_seq, MAX_TSN))) { + if ((tp1) && SCTP_TSN_GT(tp1->rec.data.TSN_seq, data_list[i]->rec.data.TSN_seq)) { struct sctp_tmit_chunk *tpp; /* need to move back */ @@ -6539,8 +6822,7 @@ sctp_clean_up_datalist(struct sctp_tcb *stcb, goto all_done; } tp1 = tpp; - if (compare_with_wrap(tp1->rec.data.TSN_seq, - data_list[i]->rec.data.TSN_seq, MAX_TSN)) { + if (SCTP_TSN_GT(tp1->rec.data.TSN_seq, data_list[i]->rec.data.TSN_seq)) { goto back_up_more; } TAILQ_INSERT_AFTER(&asoc->sent_queue, tp1, data_list[i], sctp_next); @@ -6584,16 +6866,21 @@ all_done: asoc->peers_rwnd = 0; } } + if (asoc->cc_functions.sctp_cwnd_update_packet_transmitted) { + (*asoc->cc_functions.sctp_cwnd_update_packet_transmitted) (stcb, net); + } } static void -sctp_clean_up_ctl(struct sctp_tcb *stcb, struct sctp_association *asoc) +sctp_clean_up_ctl(struct sctp_tcb *stcb, struct sctp_association *asoc, int so_locked +#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) + SCTP_UNUSED +#endif +) { struct sctp_tmit_chunk *chk, *nchk; - for (chk = TAILQ_FIRST(&asoc->control_send_queue); - chk; chk = nchk) { - nchk = TAILQ_NEXT(chk, sctp_next); + TAILQ_FOREACH_SAFE(chk, &asoc->control_send_queue, sctp_next, nchk) { if ((chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) || (chk->rec.chunk_id.id == SCTP_NR_SELECTIVE_ACK) || /* EY */ (chk->rec.chunk_id.id == SCTP_HEARTBEAT_REQUEST) || @@ -6616,7 +6903,7 @@ sctp_clean_up_ctl(struct sctp_tcb *stcb, struct sctp_association *asoc) asoc->ctrl_queue_cnt--; if (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN) asoc->fwd_tsn_cnt--; - sctp_free_a_chunk(stcb, chk); + sctp_free_a_chunk(stcb, chk, so_locked); } else if (chk->rec.chunk_id.id == SCTP_STREAM_RESET) { /* special handling, we must look into the param */ if (chk != asoc->str_reset) { @@ -6696,7 +6983,12 @@ sctp_move_to_outqueue(struct sctp_tcb *stcb, int *locked, int *giveup, int eeor_mode, - int *bail) + int *bail, + int so_locked +#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) + SCTP_UNUSED +#endif +) { /* Move from the stream to the send_queue keeping track of the total */ struct sctp_association *asoc; @@ -6758,6 +7050,7 @@ one_more_time: } atomic_subtract_int(&asoc->stream_queue_cnt, 1); TAILQ_REMOVE(&strq->outqueue, sp, next); + stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, strq, sp, send_lock_up); if (sp->net) { sctp_free_remote_addr(sp->net); sp->net = NULL; @@ -6766,7 +7059,7 @@ one_more_time: sctp_m_freem(sp->data); sp->data = NULL; } - sctp_free_a_strmoq(stcb, sp); + sctp_free_a_strmoq(stcb, sp, so_locked); /* we can't be locked to it */ *locked = 0; stcb->asoc.locked_on_sending = NULL; @@ -6932,7 +7225,7 @@ dont_do_it: chk->last_mbuf = NULL; if (chk->data == NULL) { sp->some_taken = some_taken; - sctp_free_a_chunk(stcb, chk); + sctp_free_a_chunk(stcb, chk, so_locked); *bail = 1; to_move = 0; goto out_of; @@ -6941,12 +7234,10 @@ dont_do_it: if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { struct mbuf *mat; - mat = chk->data; - while (mat) { + for (mat = chk->data; mat; mat = SCTP_BUF_NEXT(mat)) { if (SCTP_BUF_IS_EXTENDED(mat)) { sctp_log_mb(mat, SCTP_MBUF_ICOPY); } - mat = SCTP_BUF_NEXT(mat); } } #endif @@ -7036,7 +7327,7 @@ dont_do_it: atomic_add_int(&sp->length, to_move); chk->data = NULL; *bail = 1; - sctp_free_a_chunk(stcb, chk); + sctp_free_a_chunk(stcb, chk, so_locked); to_move = 0; goto out_of; } else { @@ -7053,7 +7344,7 @@ dont_do_it: panic("prepend failes HELP?"); #else SCTP_PRINTF("prepend fails HELP?\n"); - sctp_free_a_chunk(stcb, chk); + sctp_free_a_chunk(stcb, chk, so_locked); #endif *bail = 1; to_move = 0; @@ -7068,12 +7359,14 @@ dont_do_it: chk->asoc = &stcb->asoc; chk->pad_inplace = 0; chk->no_fr_allowed = 0; - chk->rec.data.stream_seq = sp->strseq; + chk->rec.data.stream_seq = strq->next_sequence_send; + if (rcv_flags & SCTP_DATA_LAST_FRAG) { + strq->next_sequence_send++; + } chk->rec.data.stream_number = sp->stream; chk->rec.data.payloadtype = sp->ppid; chk->rec.data.context = sp->context; chk->rec.data.doing_fast_retransmit = 0; - chk->rec.data.ect_nonce = 0; /* ECN Nonce */ chk->rec.data.timetodrop = sp->ts; chk->flags = sp->act_flags; @@ -7167,6 +7460,7 @@ dont_do_it: send_lock_up = 1; } TAILQ_REMOVE(&strq->outqueue, sp, next); + stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, strq, sp, send_lock_up); if (sp->net) { sctp_free_remote_addr(sp->net); sp->net = NULL; @@ -7175,7 +7469,7 @@ dont_do_it: sctp_m_freem(sp->data); sp->data = NULL; } - sctp_free_a_strmoq(stcb, sp); + sctp_free_a_strmoq(stcb, sp, so_locked); /* we can't be locked to it */ *locked = 0; @@ -7185,57 +7479,48 @@ dont_do_it: *locked = 1; } asoc->chunks_on_out_queue++; + strq->chunks_on_queues++; TAILQ_INSERT_TAIL(&asoc->send_queue, chk, sctp_next); asoc->send_queue_cnt++; out_of: if (send_lock_up) { SCTP_TCB_SEND_UNLOCK(stcb); - send_lock_up = 0; } return (to_move); } -static struct sctp_stream_out * -sctp_select_a_stream(struct sctp_tcb *stcb, struct sctp_association *asoc) -{ - struct sctp_stream_out *strq; - - /* Find the next stream to use */ - if (asoc->last_out_stream == NULL) { - strq = TAILQ_FIRST(&asoc->out_wheel); - } else { - strq = TAILQ_NEXT(asoc->last_out_stream, next_spoke); - if (strq == NULL) { - strq = TAILQ_FIRST(&asoc->out_wheel); - } - } - return (strq); -} - - static void sctp_fill_outqueue(struct sctp_tcb *stcb, - struct sctp_nets *net, int frag_point, int eeor_mode, int *quit_now) + struct sctp_nets *net, int frag_point, int eeor_mode, int *quit_now, int so_locked +#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) + SCTP_UNUSED +#endif +) { struct sctp_association *asoc; - struct sctp_stream_out *strq, *strqn; + struct sctp_stream_out *strq; int goal_mtu, moved_how_much, total_moved = 0, bail = 0; int locked, giveup; - struct sctp_stream_queue_pending *sp; SCTP_TCB_LOCK_ASSERT(stcb); asoc = &stcb->asoc; + switch (net->ro._l_addr.sa.sa_family) { +#ifdef INET + case AF_INET: + goal_mtu = net->mtu - SCTP_MIN_V4_OVERHEAD; + break; +#endif #ifdef INET6 - if (net->ro._l_addr.sin6.sin6_family == AF_INET6) { + case AF_INET6: goal_mtu = net->mtu - SCTP_MIN_OVERHEAD; - } else { - /* ?? not sure what else to do */ - goal_mtu = net->mtu - SCTP_MIN_V4_OVERHEAD; - } -#else - goal_mtu = net->mtu - SCTP_MIN_OVERHEAD; + break; #endif + default: + /* TSNH */ + goal_mtu = net->mtu; + break; + } /* Need an allowance for the data chunk header too */ goal_mtu -= sizeof(struct sctp_data_chunk); @@ -7246,46 +7531,16 @@ sctp_fill_outqueue(struct sctp_tcb *stcb, strq = asoc->locked_on_sending; locked = 1; } else { - strq = sctp_select_a_stream(stcb, asoc); + strq = stcb->asoc.ss_functions.sctp_ss_select_stream(stcb, net, asoc); locked = 0; } - strqn = strq; while ((goal_mtu > 0) && strq) { - sp = TAILQ_FIRST(&strq->outqueue); - if (sp == NULL) { - break; - } - /** - * Honor the users' choice if given. If not given, - * pull it only to the primary path in case of not using - * CMT. - */ - if (((sp->net != NULL) && - (sp->net != net)) || - ((sp->net == NULL) && - (asoc->sctp_cmt_on_off == 0) && - (asoc->primary_destination != net))) { - /* Do not pull to this network */ - if (locked) { - break; - } else { - strq = sctp_select_a_stream(stcb, asoc); - if (strq == NULL) - /* none left */ - break; - if (strqn == strq) { - /* I have circled */ - break; - } - continue; - } - } giveup = 0; bail = 0; moved_how_much = sctp_move_to_outqueue(stcb, strq, goal_mtu, frag_point, &locked, - &giveup, eeor_mode, &bail); + &giveup, eeor_mode, &bail, so_locked); if (moved_how_much) - asoc->last_out_stream = strq; + stcb->asoc.ss_functions.sctp_ss_scheduled(stcb, net, asoc, strq, moved_how_much); if (locked) { asoc->locked_on_sending = strq; @@ -7294,23 +7549,10 @@ sctp_fill_outqueue(struct sctp_tcb *stcb, break; } else { asoc->locked_on_sending = NULL; - if (TAILQ_EMPTY(&strq->outqueue)) { - if (strq == strqn) { - /* Must move start to next one */ - strqn = TAILQ_NEXT(strq, next_spoke); - if (strqn == NULL) { - strqn = TAILQ_FIRST(&asoc->out_wheel); - if (strqn == NULL) { - break; - } - } - } - sctp_remove_from_wheel(stcb, asoc, strq, 0); - } if ((giveup) || bail) { break; } - strq = sctp_select_a_stream(stcb, asoc); + strq = stcb->asoc.ss_functions.sctp_ss_select_stream(stcb, net, asoc); if (strq == NULL) { break; } @@ -7322,12 +7564,14 @@ sctp_fill_outqueue(struct sctp_tcb *stcb, if (bail) *quit_now = 1; + stcb->asoc.ss_functions.sctp_ss_packet_done(stcb, net, asoc); + if (total_moved == 0) { if ((stcb->asoc.sctp_cmt_on_off == 0) && (net == stcb->asoc.primary_destination)) { /* ran dry for primary network net */ SCTP_STAT_INCR(sctps_primary_randry); - } else if (stcb->asoc.sctp_cmt_on_off == 1) { + } else if (stcb->asoc.sctp_cmt_on_off > 0) { /* ran dry with CMT on */ SCTP_STAT_INCR(sctps_cmt_randry); } @@ -7350,16 +7594,16 @@ void sctp_move_chunks_from_net(struct sctp_tcb *stcb, struct sctp_nets *net) { struct sctp_association *asoc; - struct sctp_stream_out *outs; struct sctp_tmit_chunk *chk; struct sctp_stream_queue_pending *sp; + unsigned int i; if (net == NULL) { return; } asoc = &stcb->asoc; - TAILQ_FOREACH(outs, &asoc->out_wheel, next_spoke) { - TAILQ_FOREACH(sp, &outs->outqueue, next) { + for (i = 0; i < stcb->asoc.streamoutcnt; i++) { + TAILQ_FOREACH(sp, &stcb->asoc.strmout[i].outqueue, next) { if (sp->net == net) { sctp_free_remote_addr(sp->net); sp->net = NULL; @@ -7396,7 +7640,7 @@ sctp_med_chunk_output(struct sctp_inpcb *inp, * fomulate and send the low level chunks. Making sure to combine * any control in the control chunk queue also. */ - struct sctp_nets *net, *start_at, *old_start_at = NULL; + struct sctp_nets *net, *start_at, *sack_goes_to = NULL, *old_start_at = NULL; struct mbuf *outchain, *endoutchain; struct sctp_tmit_chunk *chk, *nchk; @@ -7413,13 +7657,13 @@ sctp_med_chunk_output(struct sctp_inpcb *inp, struct sctp_auth_chunk *auth = NULL; uint16_t auth_keyid; int override_ok = 1; + int skip_fill_up = 0; int data_auth_reqd = 0; /* * JRS 5/14/07 - Add flag for whether a heartbeat is sent to the * destination. */ - int pf_hbflag = 0; int quit_now = 0; *num_out = 0; @@ -7449,10 +7693,12 @@ sctp_med_chunk_output(struct sctp_inpcb *inp, no_data_chunks = 0; /* Nothing to possible to send? */ - if (TAILQ_EMPTY(&asoc->control_send_queue) && + if ((TAILQ_EMPTY(&asoc->control_send_queue) || + (asoc->ctrl_queue_cnt == stcb->asoc.ecn_echo_cnt_onq)) && TAILQ_EMPTY(&asoc->asconf_send_queue) && TAILQ_EMPTY(&asoc->send_queue) && - TAILQ_EMPTY(&asoc->out_wheel)) { + stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) { +nothing_to_send: *reason_code = 9; return (0); } @@ -7464,12 +7710,43 @@ sctp_med_chunk_output(struct sctp_inpcb *inp, no_data_chunks = 1; } } + if (stcb->asoc.ecn_echo_cnt_onq) { + /* Record where a sack goes, if any */ + if (no_data_chunks && + (asoc->ctrl_queue_cnt == stcb->asoc.ecn_echo_cnt_onq)) { + /* Nothing but ECNe to send - we don't do that */ + goto nothing_to_send; + } + TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) { + if ((chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) || + (chk->rec.chunk_id.id == SCTP_NR_SELECTIVE_ACK)) { + sack_goes_to = chk->whoTo; + break; + } + } + } max_rwnd_per_dest = ((asoc->peers_rwnd + asoc->total_flight) / asoc->numnets); if (stcb->sctp_socket) max_send_per_dest = SCTP_SB_LIMIT_SND(stcb->sctp_socket) / asoc->numnets; else max_send_per_dest = 0; - if ((no_data_chunks == 0) && (!TAILQ_EMPTY(&asoc->out_wheel))) { + if (no_data_chunks == 0) { + /* How many non-directed chunks are there? */ + TAILQ_FOREACH(chk, &asoc->send_queue, sctp_next) { + if (chk->whoTo == NULL) { + /* + * We already have non-directed chunks on + * the queue, no need to do a fill-up. + */ + skip_fill_up = 1; + break; + } + } + + } + if ((no_data_chunks == 0) && + (skip_fill_up == 0) && + (!stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc))) { TAILQ_FOREACH(net, &asoc->nets, sctp_next) { /* * This for loop we are in takes in each net, if @@ -7483,23 +7760,19 @@ sctp_med_chunk_output(struct sctp_inpcb *inp, * copy by reference (we hope). */ net->window_probe = 0; - if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) || - (net->dest_state & SCTP_ADDR_UNCONFIRMED)) { + if ((net != stcb->asoc.alternate) && + ((net->dest_state & SCTP_ADDR_PF) || + (!(net->dest_state & SCTP_ADDR_REACHABLE)) || + (net->dest_state & SCTP_ADDR_UNCONFIRMED))) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, 1, SCTP_CWND_LOG_FILL_OUTQ_CALLED); } continue; } - if ((asoc->sctp_cmt_on_off == 0) && - (asoc->primary_destination != net) && - (net->ref_count < 2)) { - /* nothing can be in queue for this guy */ - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { - sctp_log_cwnd(stcb, net, 2, - SCTP_CWND_LOG_FILL_OUTQ_CALLED); - } - continue; + if ((stcb->asoc.cc_functions.sctp_cwnd_new_transmission_begins) && + (net->flight_size == 0)) { + (*stcb->asoc.cc_functions.sctp_cwnd_new_transmission_begins) (stcb, net); } if (net->flight_size >= net->cwnd) { /* skip this network, no room - can't fill */ @@ -7512,7 +7785,7 @@ sctp_med_chunk_output(struct sctp_inpcb *inp, if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, 4, SCTP_CWND_LOG_FILL_OUTQ_CALLED); } - sctp_fill_outqueue(stcb, net, frag_point, eeor_mode, &quit_now); + sctp_fill_outqueue(stcb, net, frag_point, eeor_mode, &quit_now, so_locked); if (quit_now) { /* memory alloc failure */ no_data_chunks = 1; @@ -7528,7 +7801,7 @@ sctp_med_chunk_output(struct sctp_inpcb *inp, *reason_code = 8; return (0); } - if (asoc->sctp_cmt_on_off == 1) { + if (asoc->sctp_cmt_on_off > 0) { /* get the last start point */ start_at = asoc->last_net_cmt_send_started; if (start_at == NULL) { @@ -7544,6 +7817,16 @@ sctp_med_chunk_output(struct sctp_inpcb *inp, } else { start_at = TAILQ_FIRST(&asoc->nets); } + TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) { + if (chk->whoTo == NULL) { + if (asoc->alternate) { + chk->whoTo = asoc->alternate; + } else { + chk->whoTo = asoc->primary_destination; + } + atomic_add_int(&chk->whoTo->ref_count, 1); + } + } old_start_at = NULL; again_one_more_time: for (net = start_at; net != NULL; net = TAILQ_NEXT(net, sctp_next)) { @@ -7554,15 +7837,6 @@ again_one_more_time: break; } tsns_sent = 0xa; - if ((asoc->sctp_cmt_on_off == 0) && - (asoc->primary_destination != net) && - (net->ref_count < 2)) { - /* - * Ref-count of 1 so we cannot have data or control - * queued to this address. Skip it (non-CMT). - */ - continue; - } if (TAILQ_EMPTY(&asoc->control_send_queue) && TAILQ_EMPTY(&asoc->asconf_send_queue) && (net->flight_size >= net->cwnd)) { @@ -7572,7 +7846,7 @@ again_one_more_time: */ continue; } - ctl_cnt = bundle_at = 0; + bundle_at = 0; endoutchain = outchain = NULL; no_fragmentflg = 1; one_chunk = 0; @@ -7598,9 +7872,11 @@ again_one_more_time: } } switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) { +#ifdef INET case AF_INET: mtu = net->mtu - (sizeof(struct ip) + sizeof(struct sctphdr)); break; +#endif #ifdef INET6 case AF_INET6: mtu = net->mtu - (sizeof(struct ip6_hdr) + sizeof(struct sctphdr)); @@ -7629,18 +7905,24 @@ again_one_more_time: /* ASCONF transmission */ /************************/ /* Now first lets go through the asconf queue */ - for (chk = TAILQ_FIRST(&asoc->asconf_send_queue); - chk; chk = nchk) { - nchk = TAILQ_NEXT(chk, sctp_next); + TAILQ_FOREACH_SAFE(chk, &asoc->asconf_send_queue, sctp_next, nchk) { if (chk->rec.chunk_id.id != SCTP_ASCONF) { continue; } - if (chk->whoTo != net) { - /* - * No, not sent to the network we are - * looking at - */ - break; + if (chk->whoTo == NULL) { + if (asoc->alternate == NULL) { + if (asoc->primary_destination != net) { + break; + } + } else { + if (asoc->alternate != net) { + break; + } + } + } else { + if (chk->whoTo != net) { + break; + } } if (chk->data == NULL) { break; @@ -7724,6 +8006,10 @@ again_one_more_time: */ no_data_chunks = 1; chk->sent = SCTP_DATAGRAM_SENT; + if (chk->whoTo == NULL) { + chk->whoTo = net; + atomic_add_int(&net->ref_count, 1); + } chk->snd_count++; if (mtu == 0) { /* @@ -7742,10 +8028,12 @@ again_one_more_time: (struct sockaddr *)&net->ro._l_addr, outchain, auth_offset, auth, stcb->asoc.authinfo.active_keyid, - no_fragmentflg, 0, NULL, asconf, + no_fragmentflg, 0, asconf, inp->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag), - net->port, so_locked, NULL))) { + net->port, NULL, + 0, 0, + so_locked))) { if (error == ENOBUFS) { asoc->ifp_had_enobuf = 1; SCTP_STAT_INCR(sctps_lowlevelerr); @@ -7793,10 +8081,21 @@ again_one_more_time: if (!no_out_cnt) *num_out += ctl_cnt; /* recalc a clean slate and setup */ - if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { - mtu = (net->mtu - SCTP_MIN_OVERHEAD); - } else { - mtu = (net->mtu - SCTP_MIN_V4_OVERHEAD); + switch (net->ro._l_addr.sa.sa_family) { +#ifdef INET + case AF_INET: + mtu = net->mtu - SCTP_MIN_V4_OVERHEAD; + break; +#endif +#ifdef INET6 + case AF_INET6: + mtu = net->mtu - SCTP_MIN_OVERHEAD; + break; +#endif + default: + /* TSNH */ + mtu = net->mtu; + break; } to_out = 0; no_fragmentflg = 1; @@ -7807,16 +8106,45 @@ again_one_more_time: /* Control transmission */ /************************/ /* Now first lets go through the control queue */ - for (chk = TAILQ_FIRST(&asoc->control_send_queue); - chk; chk = nchk) { - nchk = TAILQ_NEXT(chk, sctp_next); - if (chk->whoTo != net) { + TAILQ_FOREACH_SAFE(chk, &asoc->control_send_queue, sctp_next, nchk) { + if ((sack_goes_to) && + (chk->rec.chunk_id.id == SCTP_ECN_ECHO) && + (chk->whoTo != sack_goes_to)) { /* - * No, not sent to the network we are - * looking at + * if we have a sack in queue, and we are + * looking at an ecn echo that is NOT queued + * to where the sack is going.. */ - continue; + if (chk->whoTo == net) { + /* + * Don't transmit it to where its + * going (current net) + */ + continue; + } else if (sack_goes_to == net) { + /* + * But do transmit it to this + * address + */ + goto skip_net_check; + } + } + if (chk->whoTo == NULL) { + if (asoc->alternate == NULL) { + if (asoc->primary_destination != net) { + continue; + } + } else { + if (asoc->alternate != net) { + continue; + } + } + } else { + if (chk->whoTo != net) { + continue; + } } + skip_net_check: if (chk->data == NULL) { continue; } @@ -7903,15 +8231,8 @@ again_one_more_time: (chk->rec.chunk_id.id == SCTP_ECN_CWR) || (chk->rec.chunk_id.id == SCTP_PACKET_DROPPED) || (chk->rec.chunk_id.id == SCTP_ASCONF_ACK)) { - if (chk->rec.chunk_id.id == SCTP_HEARTBEAT_REQUEST) { hbflag = 1; - /* - * JRS 5/14/07 - Set the - * flag to say a heartbeat - * is being sent. - */ - pf_hbflag = 1; } /* remove these chunks at the end */ if ((chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) || @@ -7934,8 +8255,26 @@ again_one_more_time: if (chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) { cookie = 1; no_out_cnt = 1; + } else if (chk->rec.chunk_id.id == SCTP_ECN_ECHO) { + /* + * Increment ecne send count + * here this means we may be + * over-zealous in our + * counting if the send + * fails, but its the best + * place to do it (we used + * to do it in the queue of + * the chunk, but that did + * not tell how many times + * it was sent. + */ + SCTP_STAT_INCR(sctps_sendecne); } chk->sent = SCTP_DATAGRAM_SENT; + if (chk->whoTo == NULL) { + chk->whoTo = net; + atomic_add_int(&net->ref_count, 1); + } chk->snd_count++; } if (mtu == 0) { @@ -7963,10 +8302,12 @@ again_one_more_time: outchain, auth_offset, auth, stcb->asoc.authinfo.active_keyid, - no_fragmentflg, 0, NULL, asconf, + no_fragmentflg, 0, asconf, inp->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag), - net->port, so_locked, NULL))) { + net->port, NULL, + 0, 0, + so_locked))) { if (error == ENOBUFS) { asoc->ifp_had_enobuf = 1; SCTP_STAT_INCR(sctps_lowlevelerr); @@ -8019,10 +8360,21 @@ again_one_more_time: if (!no_out_cnt) *num_out += ctl_cnt; /* recalc a clean slate and setup */ - if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { - mtu = (net->mtu - SCTP_MIN_OVERHEAD); - } else { - mtu = (net->mtu - SCTP_MIN_V4_OVERHEAD); + switch (net->ro._l_addr.sa.sa_family) { +#ifdef INET + case AF_INET: + mtu = net->mtu - SCTP_MIN_V4_OVERHEAD; + break; +#endif +#ifdef INET6 + case AF_INET6: + mtu = net->mtu - SCTP_MIN_OVERHEAD; + break; +#endif + default: + /* TSNH */ + mtu = net->mtu; + break; } to_out = 0; no_fragmentflg = 1; @@ -8030,15 +8382,15 @@ again_one_more_time: } } /* JRI: if dest is in PF state, do not send data to it */ - if ((asoc->sctp_cmt_on_off == 1) && - (asoc->sctp_cmt_pf > 0) && + if ((asoc->sctp_cmt_on_off > 0) && + (net != stcb->asoc.alternate) && (net->dest_state & SCTP_ADDR_PF)) { goto no_data_fill; } if (net->flight_size >= net->cwnd) { goto no_data_fill; } - if ((asoc->sctp_cmt_on_off == 1) && + if ((asoc->sctp_cmt_on_off > 0) && (SCTP_BASE_SYSCTL(sctp_buffer_splitting) & SCTP_RECV_BUFFER_SPLITTING) && (net->flight_size > max_rwnd_per_dest)) { goto no_data_fill; @@ -8049,7 +8401,7 @@ again_one_more_time: * net. For now, this is better than nothing and it disabled * by default... */ - if ((asoc->sctp_cmt_on_off == 1) && + if ((asoc->sctp_cmt_on_off > 0) && (SCTP_BASE_SYSCTL(sctp_buffer_splitting) & SCTP_SEND_BUFFER_SPLITTING) && (max_send_per_dest > 0) && (net->flight_size > max_send_per_dest)) { @@ -8071,12 +8423,14 @@ again_one_more_time: } /* now lets add any data within the MTU constraints */ switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) { +#ifdef INET case AF_INET: if (net->mtu > (sizeof(struct ip) + sizeof(struct sctphdr))) omtu = net->mtu - (sizeof(struct ip) + sizeof(struct sctphdr)); else omtu = 0; break; +#endif #ifdef INET6 case AF_INET6: if (net->mtu > (sizeof(struct ip6_hdr) + sizeof(struct sctphdr))) @@ -8093,7 +8447,7 @@ again_one_more_time: if ((((asoc->state & SCTP_STATE_OPEN) == SCTP_STATE_OPEN) && (skip_data_for_this_net == 0)) || (cookie)) { - for (chk = TAILQ_FIRST(&asoc->send_queue); chk; chk = nchk) { + TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) { if (no_data_chunks) { /* let only control go out */ *reason_code = 1; @@ -8104,12 +8458,22 @@ again_one_more_time: *reason_code = 2; break; } - nchk = TAILQ_NEXT(chk, sctp_next); if ((chk->whoTo != NULL) && (chk->whoTo != net)) { /* Don't send the chunk on this net */ continue; } + if (asoc->sctp_cmt_on_off == 0) { + if ((asoc->alternate) && + (asoc->alternate != net) && + (chk->whoTo == NULL)) { + continue; + } else if ((net != asoc->primary_destination) && + (asoc->alternate == NULL) && + (chk->whoTo == NULL)) { + continue; + } + } if ((chk->send_size > omtu) && ((chk->flags & CHUNK_FLAGS_FRAGMENT_OK) == 0)) { /*- * strange, we have a chunk that is @@ -8208,7 +8572,6 @@ again_one_more_time: chk->window_probe = 0; data_list[bundle_at++] = chk; if (bundle_at >= SCTP_MAX_DATA_BUNDLING) { - mtu = 0; break; } if (chk->sent == SCTP_DATAGRAM_UNSENT) { @@ -8270,18 +8633,6 @@ no_data_fill: * restart it. */ sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net); - } else if ((asoc->sctp_cmt_on_off == 1) && - (asoc->sctp_cmt_pf > 0) && - pf_hbflag && - ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF) && - (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer))) { - /* - * JRS 5/14/07 - If a HB has been sent to a - * PF destination and no T3 timer is - * currently running, start the T3 timer to - * track the HBs that were sent. - */ - sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net); } /* Now send it, if there is anything to send :> */ if ((error = sctp_lowlevel_chunk_output(inp, @@ -8294,11 +8645,12 @@ no_data_fill: auth_keyid, no_fragmentflg, bundle_at, - data_list[0], asconf, inp->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag), - net->port, so_locked, NULL))) { + net->port, NULL, + 0, 0, + so_locked))) { /* error, we could not output */ if (error == ENOBUFS) { SCTP_STAT_INCR(sctps_lowlevelerr); @@ -8338,7 +8690,7 @@ no_data_fill: } else { asoc->ifp_had_enobuf = 0; } - outchain = endoutchain = NULL; + endoutchain = NULL; auth = NULL; auth_offset = 0; if (bundle_at || hbflag) { @@ -8365,27 +8717,12 @@ no_data_fill: } else { asoc->time_last_sent = *now; } - data_list[0]->do_rtt = 1; + if (net->rto_needed) { + data_list[0]->do_rtt = 1; + net->rto_needed = 0; + } SCTP_STAT_INCR_BY(sctps_senddata, bundle_at); sctp_clean_up_datalist(stcb, asoc, data_list, bundle_at, net); - if (SCTP_BASE_SYSCTL(sctp_early_fr)) { - if (net->flight_size < net->cwnd) { - /* start or restart it */ - if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { - sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, inp, stcb, net, - SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_2); - } - SCTP_STAT_INCR(sctps_earlyfrstrout); - sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, inp, stcb, net); - } else { - /* stop it if its running */ - if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { - SCTP_STAT_INCR(sctps_earlyfrstpout); - sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, inp, stcb, net, - SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_3); - } - } - } } if (one_chunk) { break; @@ -8413,7 +8750,7 @@ no_data_fill: } else { *reason_code = 5; } - sctp_clean_up_ctl(stcb, asoc); + sctp_clean_up_ctl(stcb, asoc, so_locked); return (0); } @@ -8438,7 +8775,7 @@ sctp_queue_op_err(struct sctp_tcb *stcb, struct mbuf *op_err) chk->copy_by_ref = 0; SCTP_BUF_PREPEND(op_err, sizeof(struct sctp_chunkhdr), M_DONTWAIT); if (op_err == NULL) { - sctp_free_a_chunk(stcb, chk); + sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); return; } chk->send_size = 0; @@ -8454,8 +8791,7 @@ sctp_queue_op_err(struct sctp_tcb *stcb, struct mbuf *op_err) chk->flags = 0; chk->asoc = &stcb->asoc; chk->data = op_err; - chk->whoTo = chk->asoc->primary_destination; - atomic_add_int(&chk->whoTo->ref_count, 1); + chk->whoTo = NULL; hdr = mtod(op_err, struct sctp_chunkhdr *); hdr->chunk_type = SCTP_OPERATION_ERROR; hdr->chunk_flags = 0; @@ -8511,12 +8847,10 @@ sctp_send_cookie_echo(struct mbuf *m, if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { struct mbuf *mat; - mat = cookie; - while (mat) { + for (mat = cookie; mat; mat = SCTP_BUF_NEXT(mat)) { if (SCTP_BUF_IS_EXTENDED(mat)) { sctp_log_mb(mat, SCTP_MBUF_ICOPY); } - mat = SCTP_BUF_NEXT(mat); } } #endif @@ -8550,7 +8884,7 @@ sctp_send_cookie_echo(struct mbuf *m, chk->flags = CHUNK_FLAGS_FRAGMENT_OK; chk->asoc = &stcb->asoc; chk->data = cookie; - chk->whoTo = chk->asoc->primary_destination; + chk->whoTo = net; atomic_add_int(&chk->whoTo->ref_count, 1); TAILQ_INSERT_HEAD(&chk->asoc->control_send_queue, chk, sctp_next); chk->asoc->ctrl_queue_cnt++; @@ -8585,12 +8919,10 @@ sctp_send_heartbeat_ack(struct sctp_tcb *stcb, if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { struct mbuf *mat; - mat = outchain; - while (mat) { + for (mat = outchain; mat; mat = SCTP_BUF_NEXT(mat)) { if (SCTP_BUF_IS_EXTENDED(mat)) { sctp_log_mb(mat, SCTP_MBUF_ICOPY); } - mat = SCTP_BUF_NEXT(mat); } } #endif @@ -8660,10 +8992,10 @@ sctp_send_cookie_ack(struct sctp_tcb *stcb) chk->data = cookie_ack; if (chk->asoc->last_control_chunk_from != NULL) { chk->whoTo = chk->asoc->last_control_chunk_from; + atomic_add_int(&chk->whoTo->ref_count, 1); } else { - chk->whoTo = chk->asoc->primary_destination; + chk->whoTo = NULL; } - atomic_add_int(&chk->whoTo->ref_count, 1); hdr = mtod(cookie_ack, struct sctp_chunkhdr *); hdr->chunk_type = SCTP_COOKIE_ACK; hdr->chunk_flags = 0; @@ -8705,8 +9037,9 @@ sctp_send_shutdown_ack(struct sctp_tcb *stcb, struct sctp_nets *net) chk->asoc = &stcb->asoc; chk->data = m_shutdown_ack; chk->whoTo = net; - atomic_add_int(&net->ref_count, 1); - + if (chk->whoTo) { + atomic_add_int(&chk->whoTo->ref_count, 1); + } ack_cp = mtod(m_shutdown_ack, struct sctp_shutdown_ack_chunk *); ack_cp->ch.chunk_type = SCTP_SHUTDOWN_ACK; ack_cp->ch.chunk_flags = 0; @@ -8747,8 +9080,9 @@ sctp_send_shutdown(struct sctp_tcb *stcb, struct sctp_nets *net) chk->asoc = &stcb->asoc; chk->data = m_shutdown; chk->whoTo = net; - atomic_add_int(&net->ref_count, 1); - + if (chk->whoTo) { + atomic_add_int(&chk->whoTo->ref_count, 1); + } shutdown_cp = mtod(m_shutdown, struct sctp_shutdown_chunk *); shutdown_cp->ch.chunk_type = SCTP_SHUTDOWN; shutdown_cp->ch.chunk_flags = 0; @@ -8799,7 +9133,9 @@ sctp_send_asconf(struct sctp_tcb *stcb, struct sctp_nets *net, int addr_locked) chk->flags = CHUNK_FLAGS_FRAGMENT_OK; chk->asoc = &stcb->asoc; chk->whoTo = net; - atomic_add_int(&chk->whoTo->ref_count, 1); + if (chk->whoTo) { + atomic_add_int(&chk->whoTo->ref_count, 1); + } TAILQ_INSERT_TAIL(&chk->asoc->asconf_send_queue, chk, sctp_next); chk->asoc->ctrl_queue_cnt++; return; @@ -8814,7 +9150,7 @@ sctp_send_asconf_ack(struct sctp_tcb *stcb) */ struct sctp_tmit_chunk *chk; struct sctp_asconf_ack *ack, *latest_ack; - struct mbuf *m_ack, *m; + struct mbuf *m_ack; struct sctp_nets *net = NULL; SCTP_TCB_LOCK_ASSERT(stcb); @@ -8829,17 +9165,27 @@ sctp_send_asconf_ack(struct sctp_tcb *stcb) net = sctp_find_alternate_net(stcb, stcb->asoc.last_control_chunk_from, 0); if (net == NULL) { /* no alternate */ - if (stcb->asoc.last_control_chunk_from == NULL) - net = stcb->asoc.primary_destination; - else + if (stcb->asoc.last_control_chunk_from == NULL) { + if (stcb->asoc.alternate) { + net = stcb->asoc.alternate; + } else { + net = stcb->asoc.primary_destination; + } + } else { net = stcb->asoc.last_control_chunk_from; + } } } else { /* normal case */ - if (stcb->asoc.last_control_chunk_from == NULL) - net = stcb->asoc.primary_destination; - else + if (stcb->asoc.last_control_chunk_from == NULL) { + if (stcb->asoc.alternate) { + net = stcb->asoc.alternate; + } else { + net = stcb->asoc.primary_destination; + } + } else { net = stcb->asoc.last_control_chunk_from; + } } latest_ack->last_sent_to = net; @@ -8857,12 +9203,10 @@ sctp_send_asconf_ack(struct sctp_tcb *stcb) if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { struct mbuf *mat; - mat = m_ack; - while (mat) { + for (mat = m_ack; mat; mat = SCTP_BUF_NEXT(mat)) { if (SCTP_BUF_IS_EXTENDED(mat)) { sctp_log_mb(mat, SCTP_MBUF_ICOPY); } - mat = SCTP_BUF_NEXT(mat); } } #endif @@ -8877,10 +9221,12 @@ sctp_send_asconf_ack(struct sctp_tcb *stcb) chk->copy_by_ref = 0; chk->whoTo = net; + if (chk->whoTo) { + atomic_add_int(&chk->whoTo->ref_count, 1); + } chk->data = m_ack; chk->send_size = 0; /* Get size */ - m = m_ack; chk->send_size = ack->len; chk->rec.chunk_id.id = SCTP_ASCONF_ACK; chk->rec.chunk_id.can_take_data = 1; @@ -8888,7 +9234,6 @@ sctp_send_asconf_ack(struct sctp_tcb *stcb) chk->snd_count = 0; chk->flags |= CHUNK_FLAGS_FRAGMENT_OK; /* XXX */ chk->asoc = &stcb->asoc; - atomic_add_int(&chk->whoTo->ref_count, 1); TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next); chk->asoc->ctrl_queue_cnt++; @@ -8974,7 +9319,6 @@ sctp_chunk_retransmission(struct sctp_inpcb *inp, ctl_cnt++; if (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN) { fwd_tsn = 1; - fwd = chk; } /* * Add an AUTH chunk, if chunk requires it save the @@ -9006,13 +9350,15 @@ sctp_chunk_retransmission(struct sctp_inpcb *inp, if ((error = sctp_lowlevel_chunk_output(inp, stcb, chk->whoTo, (struct sockaddr *)&chk->whoTo->ro._l_addr, m, auth_offset, auth, stcb->asoc.authinfo.active_keyid, - no_fragmentflg, 0, NULL, 0, + no_fragmentflg, 0, 0, inp->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag), - chk->whoTo->port, so_locked, NULL))) { + chk->whoTo->port, NULL, + 0, 0, + so_locked))) { SCTP_STAT_INCR(sctps_lowlevelerr); return (error); } - m = endofchain = NULL; + endofchain = NULL; auth = NULL; auth_offset = 0; /* @@ -9027,7 +9373,7 @@ sctp_chunk_retransmission(struct sctp_inpcb *inp, return (0); } else { /* Clean up the fwd-tsn list */ - sctp_clean_up_ctl(stcb, asoc); + sctp_clean_up_ctl(stcb, asoc, so_locked); return (0); } } @@ -9053,7 +9399,7 @@ sctp_chunk_retransmission(struct sctp_inpcb *inp, continue; } if (chk->data == NULL) { - printf("TSN:%x chk->snd_count:%d chk->sent:%d can't retran - no data\n", + SCTP_PRINTF("TSN:%x chk->snd_count:%d chk->sent:%d can't retran - no data\n", chk->rec.data.TSN_seq, chk->snd_count, chk->sent); continue; } @@ -9064,17 +9410,28 @@ sctp_chunk_retransmission(struct sctp_inpcb *inp, chk->snd_count, SCTP_BASE_SYSCTL(sctp_max_retran_chunk)); atomic_add_int(&stcb->asoc.refcnt, 1); - sctp_abort_an_association(stcb->sctp_ep, stcb, 0, NULL, so_locked); + sctp_abort_an_association(stcb->sctp_ep, stcb, NULL, so_locked); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); return (SCTP_RETRAN_EXIT); } /* pick up the net */ net = chk->whoTo; - if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { - mtu = (net->mtu - SCTP_MIN_OVERHEAD); - } else { + switch (net->ro._l_addr.sa.sa_family) { +#ifdef INET + case AF_INET: mtu = net->mtu - SCTP_MIN_V4_OVERHEAD; + break; +#endif +#ifdef INET6 + case AF_INET6: + mtu = net->mtu - SCTP_MIN_OVERHEAD; + break; +#endif + default: + /* TSNH */ + mtu = net->mtu; + break; } if ((asoc->peers_rwnd < mtu) && (asoc->total_flight > 0)) { @@ -9182,16 +9539,13 @@ one_chunk_around: * now are there anymore forward from chk to pick * up? */ - fwd = TAILQ_NEXT(chk, sctp_next); - while (fwd) { + for (fwd = TAILQ_NEXT(chk, sctp_next); fwd != NULL; fwd = TAILQ_NEXT(fwd, sctp_next)) { if (fwd->sent != SCTP_DATAGRAM_RESEND) { /* Nope, not for retran */ - fwd = TAILQ_NEXT(fwd, sctp_next); continue; } if (fwd->whoTo != net) { /* Nope, not the net in question */ - fwd = TAILQ_NEXT(fwd, sctp_next); continue; } if (data_auth_reqd && (auth == NULL)) { @@ -9239,7 +9593,6 @@ one_chunk_around: if (bundle_at >= SCTP_MAX_DATA_BUNDLING) { break; } - fwd = TAILQ_NEXT(fwd, sctp_next); } else { /* can't fit so we are done */ break; @@ -9264,14 +9617,16 @@ one_chunk_around: if ((error = sctp_lowlevel_chunk_output(inp, stcb, net, (struct sockaddr *)&net->ro._l_addr, m, auth_offset, auth, auth_keyid, - no_fragmentflg, 0, NULL, 0, + no_fragmentflg, 0, 0, inp->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag), - net->port, so_locked, NULL))) { + net->port, NULL, + 0, 0, + so_locked))) { /* error, we could not output */ SCTP_STAT_INCR(sctps_lowlevelerr); return (error); } - m = endofchain = NULL; + endofchain = NULL; auth = NULL; auth_offset = 0; /* For HB's */ @@ -9399,12 +9754,10 @@ one_chunk_around: return (0); } - -static int +static void sctp_timer_validation(struct sctp_inpcb *inp, struct sctp_tcb *stcb, - struct sctp_association *asoc, - int ret) + struct sctp_association *asoc) { struct sctp_nets *net; @@ -9412,14 +9765,18 @@ sctp_timer_validation(struct sctp_inpcb *inp, TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) { /* Here is a timer */ - return (ret); + return; } } SCTP_TCB_LOCK_ASSERT(stcb); /* Gak, we did not have a timer somewhere */ SCTPDBG(SCTP_DEBUG_OUTPUT3, "Deadlock avoided starting timer on a dest at retran\n"); - sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, asoc->primary_destination); - return (ret); + if (asoc->alternate) { + sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, asoc->alternate); + } else { + sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, asoc->primary_destination); + } + return; } void @@ -9447,22 +9804,26 @@ sctp_chunk_output(struct sctp_inpcb *inp, */ struct sctp_association *asoc; struct sctp_nets *net; - int error = 0, num_out = 0, tot_out = 0, ret = 0, reason_code = 0, - burst_cnt = 0, burst_limit = 0; + int error = 0, num_out = 0, tot_out = 0, ret = 0, reason_code = 0; + unsigned int burst_cnt = 0; struct timeval now; int now_filled = 0; - int nagle_on = 0; + int nagle_on; int frag_point = sctp_get_frag_point(stcb, &stcb->asoc); int un_sent = 0; - int fr_done, tot_frs = 0; + int fr_done; + unsigned int tot_frs = 0; asoc = &stcb->asoc; + /* The Nagle algorithm is only applied when handling a send call. */ if (from_where == SCTP_OUTPUT_FROM_USR_SEND) { if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NODELAY)) { nagle_on = 0; } else { nagle_on = 1; } + } else { + nagle_on = 0; } SCTP_TCB_LOCK_ASSERT(stcb); @@ -9480,7 +9841,7 @@ sctp_chunk_output(struct sctp_inpcb *inp, * running, if so piggy-back the sack. */ if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) { - sctp_send_sack(stcb); + sctp_send_sack(stcb, so_locked); (void)SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer); } while (asoc->sent_queue_retran_cnt) { @@ -9525,7 +9886,7 @@ sctp_chunk_output(struct sctp_inpcb *inp, #ifdef SCTP_AUDITING_ENABLED sctp_auditing(8, inp, stcb, NULL); #endif - (void)sctp_timer_validation(inp, stcb, asoc, ret); + sctp_timer_validation(inp, stcb, asoc); return; } if (ret < 0) { @@ -9551,12 +9912,11 @@ sctp_chunk_output(struct sctp_inpcb *inp, &now, &now_filled, frag_point, so_locked); return; } - if (tot_frs > asoc->max_burst) { + if ((asoc->fr_max_burst > 0) && (tot_frs >= asoc->fr_max_burst)) { /* Hit FR burst limit */ return; } if ((num_out == 0) && (ret == 0)) { - /* No more retrans to send */ break; } @@ -9565,10 +9925,8 @@ sctp_chunk_output(struct sctp_inpcb *inp, sctp_auditing(12, inp, stcb, NULL); #endif /* Check for bad destinations, if they exist move chunks around. */ - burst_limit = asoc->max_burst; TAILQ_FOREACH(net, &asoc->nets, sctp_next) { - if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == - SCTP_ADDR_NOT_REACHABLE) { + if (!(net->dest_state & SCTP_ADDR_REACHABLE)) { /*- * if possible move things off of this address we * still may send below due to the dormant state but @@ -9578,40 +9936,35 @@ sctp_chunk_output(struct sctp_inpcb *inp, */ if (net->ref_count > 1) sctp_move_chunks_from_net(stcb, net); - } else if ((asoc->sctp_cmt_on_off == 1) && - (asoc->sctp_cmt_pf > 0) && - ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) { - /* - * JRS 5/14/07 - If CMT PF is on and the current - * destination is in PF state, move all queued data - * to an alternate desination. - */ - if (net->ref_count > 1) - sctp_move_chunks_from_net(stcb, net); } else { /*- * if ((asoc->sat_network) || (net->addr_is_local)) * { burst_limit = asoc->max_burst * * SCTP_SAT_NETWORK_BURST_INCR; } */ - if (SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst)) { - if ((net->flight_size + (burst_limit * net->mtu)) < net->cwnd) { - /* - * JRS - Use the congestion control - * given in the congestion control - * module - */ - asoc->cc_functions.sctp_cwnd_update_after_output(stcb, net, burst_limit); - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_MAXBURST_ENABLE) { - sctp_log_maxburst(stcb, net, 0, burst_limit, SCTP_MAX_BURST_APPLIED); + if (asoc->max_burst > 0) { + if (SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst)) { + if ((net->flight_size + (asoc->max_burst * net->mtu)) < net->cwnd) { + /* + * JRS - Use the congestion + * control given in the + * congestion control module + */ + asoc->cc_functions.sctp_cwnd_update_after_output(stcb, net, asoc->max_burst); + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_MAXBURST_ENABLE) { + sctp_log_maxburst(stcb, net, 0, asoc->max_burst, SCTP_MAX_BURST_APPLIED); + } + SCTP_STAT_INCR(sctps_maxburstqueued); + } + net->fast_retran_ip = 0; + } else { + if (net->flight_size == 0) { + /* + * Should be decaying the + * cwnd here + */ + ; } - SCTP_STAT_INCR(sctps_maxburstqueued); - } - net->fast_retran_ip = 0; - } else { - if (net->flight_size == 0) { - /* Should be decaying the cwnd here */ - ; } } } @@ -9644,21 +9997,24 @@ sctp_chunk_output(struct sctp_inpcb *inp, } } if (nagle_on) { - /*- - * When nagle is on, we look at how much is un_sent, then - * if its smaller than an MTU and we have data in - * flight we stop. + /* + * When the Nagle algorithm is used, look at how + * much is unsent, then if its smaller than an MTU + * and we have data in flight we stop, except if we + * are handling a fragmented user message. */ un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) + (stcb->asoc.stream_queue_cnt * sizeof(struct sctp_data_chunk))); if ((un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD)) && - (stcb->asoc.total_flight > 0)) { + (stcb->asoc.total_flight > 0) && + ((stcb->asoc.locked_on_sending == NULL) || + sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR))) { break; } } if (TAILQ_EMPTY(&asoc->control_send_queue) && TAILQ_EMPTY(&asoc->send_queue) && - TAILQ_EMPTY(&asoc->out_wheel)) { + stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) { /* Nothing left to send */ break; } @@ -9666,11 +10022,13 @@ sctp_chunk_output(struct sctp_inpcb *inp, /* Nothing left to send */ break; } - } while (num_out && (SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst) || - (burst_cnt < burst_limit))); + } while (num_out && + ((asoc->max_burst == 0) || + SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst) || + (burst_cnt < asoc->max_burst))); if (SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst) == 0) { - if (burst_cnt >= burst_limit) { + if ((asoc->max_burst > 0) && (burst_cnt >= asoc->max_burst)) { SCTP_STAT_INCR(sctps_maxburstqueued); asoc->burst_limit_applied = 1; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_MAXBURST_ENABLE) { @@ -9698,13 +10056,13 @@ sctp_chunk_output(struct sctp_inpcb *inp, int -sctp_output(inp, m, addr, control, p, flags) - struct sctp_inpcb *inp; - struct mbuf *m; - struct sockaddr *addr; - struct mbuf *control; - struct thread *p; - int flags; +sctp_output( + struct sctp_inpcb *inp, + struct mbuf *m, + struct sockaddr *addr, + struct mbuf *control, + struct thread *p, + int flags) { if (inp == NULL) { SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL); @@ -9738,10 +10096,9 @@ send_forward_tsn(struct sctp_tcb *stcb, chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; /* Do we correct its output location? */ - if (chk->whoTo != asoc->primary_destination) { + if (chk->whoTo) { sctp_free_remote_addr(chk->whoTo); - chk->whoTo = asoc->primary_destination; - atomic_add_int(&chk->whoTo->ref_count, 1); + chk->whoTo = NULL; } goto sctp_fill_in_rest; } @@ -9759,14 +10116,12 @@ send_forward_tsn(struct sctp_tcb *stcb, chk->whoTo = NULL; chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA); if (chk->data == NULL) { - sctp_free_a_chunk(stcb, chk); + sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); return; } SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD); chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; - chk->whoTo = asoc->primary_destination; - atomic_add_int(&chk->whoTo->ref_count, 1); TAILQ_INSERT_TAIL(&asoc->control_send_queue, chk, sctp_next); asoc->ctrl_queue_cnt++; sctp_fill_in_rest: @@ -9783,7 +10138,8 @@ sctp_fill_in_rest: unsigned int cnt_of_skipped = 0; TAILQ_FOREACH(at, &asoc->sent_queue, sctp_next) { - if (at->sent != SCTP_FORWARD_TSN_SKIP) { + if ((at->sent != SCTP_FORWARD_TSN_SKIP) && + (at->sent != SCTP_DATAGRAM_NR_ACKED)) { /* no more to look at */ break; } @@ -9831,12 +10187,14 @@ sctp_fill_in_rest: * we report. */ at = TAILQ_FIRST(&asoc->sent_queue); - for (i = 0; i < cnt_of_skipped; i++) { - tp1 = TAILQ_NEXT(at, sctp_next); - if (tp1 == NULL) { - break; + if (at != NULL) { + for (i = 0; i < cnt_of_skipped; i++) { + tp1 = TAILQ_NEXT(at, sctp_next); + if (tp1 == NULL) { + break; + } + at = tp1; } - at = tp1; } if (at && SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) { sctp_misc_ints(SCTP_FWD_TSN_CHECK, @@ -9900,11 +10258,14 @@ sctp_fill_in_rest: } } return; - } void -sctp_send_sack(struct sctp_tcb *stcb) +sctp_send_sack(struct sctp_tcb *stcb, int so_locked +#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) + SCTP_UNUSED +#endif +) { /*- * Queue up a SACK or NR-SACK in the control queue. @@ -9957,8 +10318,10 @@ sctp_send_sack(struct sctp_tcb *stcb) sctp_m_freem(a_chk->data); a_chk->data = NULL; } - sctp_free_remote_addr(a_chk->whoTo); - a_chk->whoTo = NULL; + if (a_chk->whoTo) { + sctp_free_remote_addr(a_chk->whoTo); + a_chk->whoTo = NULL; + } break; } } @@ -9990,13 +10353,13 @@ sctp_send_sack(struct sctp_tcb *stcb) a_chk->whoTo = NULL; if ((asoc->numduptsns) || - (asoc->last_data_chunk_from->dest_state & SCTP_ADDR_NOT_REACHABLE)) { + (!(asoc->last_data_chunk_from->dest_state & SCTP_ADDR_REACHABLE))) { /*- * Ok, we have some duplicates or the destination for the * sack is unreachable, lets see if we can select an * alternate than asoc->last_data_chunk_from */ - if ((!(asoc->last_data_chunk_from->dest_state & SCTP_ADDR_NOT_REACHABLE)) && + if ((asoc->last_data_chunk_from->dest_state & SCTP_ADDR_REACHABLE) && (asoc->used_alt_onsack > asoc->numnets)) { /* We used an alt last time, don't this time */ a_chk->whoTo = NULL; @@ -10020,7 +10383,7 @@ sctp_send_sack(struct sctp_tcb *stcb) if (a_chk->whoTo) { atomic_add_int(&a_chk->whoTo->ref_count, 1); } - if (compare_with_wrap(asoc->highest_tsn_inside_map, asoc->highest_tsn_inside_nr_map, MAX_TSN)) { + if (SCTP_TSN_GT(asoc->highest_tsn_inside_map, asoc->highest_tsn_inside_nr_map)) { highest_tsn = asoc->highest_tsn_inside_map; } else { highest_tsn = asoc->highest_tsn_inside_nr_map; @@ -10046,7 +10409,7 @@ sctp_send_sack(struct sctp_tcb *stcb) sctp_m_freem(a_chk->data); a_chk->data = NULL; } - sctp_free_a_chunk(stcb, a_chk); + sctp_free_a_chunk(stcb, a_chk, so_locked); /* sa_ignore NO_NULL_CHK */ if (stcb->asoc.delayed_ack) { sctp_timer_stop(SCTP_TIMER_TYPE_RECV, @@ -10067,15 +10430,9 @@ sctp_send_sack(struct sctp_tcb *stcb) limit = mtod(a_chk->data, caddr_t); limit += space; - /* 0x01 is used by nonce for ecn */ - if ((SCTP_BASE_SYSCTL(sctp_ecn_enable)) && - (SCTP_BASE_SYSCTL(sctp_ecn_nonce)) && - (asoc->peer_supports_ecn_nonce)) - flags = (asoc->receiver_nonce_sum & SCTP_SACK_NONCE_SUM); - else - flags = 0; + flags = 0; - if ((asoc->sctp_cmt_on_off == 1) && + if ((asoc->sctp_cmt_on_off > 0) && SCTP_BASE_SYSCTL(sctp_cmt_use_dac)) { /*- * CMT DAC algorithm: If 2 (i.e., 0x10) packets have been @@ -10115,15 +10472,15 @@ sctp_send_sack(struct sctp_tcb *stcb) } } - if (compare_with_wrap(asoc->mapping_array_base_tsn, asoc->cumulative_tsn, MAX_TSN)) { + if (SCTP_TSN_GT(asoc->mapping_array_base_tsn, asoc->cumulative_tsn)) { offset = 1; } else { offset = asoc->mapping_array_base_tsn - asoc->cumulative_tsn; } if (((type == SCTP_SELECTIVE_ACK) && - compare_with_wrap(highest_tsn, asoc->cumulative_tsn, MAX_TSN)) || + SCTP_TSN_GT(highest_tsn, asoc->cumulative_tsn)) || ((type == SCTP_NR_SELECTIVE_ACK) && - compare_with_wrap(asoc->highest_tsn_inside_map, asoc->cumulative_tsn, MAX_TSN))) { + SCTP_TSN_GT(asoc->highest_tsn_inside_map, asoc->cumulative_tsn))) { /* we have a gap .. maybe */ for (i = 0; i < siz; i++) { tsn_map = asoc->mapping_array[i]; @@ -10195,12 +10552,12 @@ sctp_send_sack(struct sctp_tcb *stcb) siz = (((MAX_TSN - asoc->mapping_array_base_tsn) + 1) + asoc->highest_tsn_inside_nr_map + 7) / 8; } - if (compare_with_wrap(asoc->mapping_array_base_tsn, asoc->cumulative_tsn, MAX_TSN)) { + if (SCTP_TSN_GT(asoc->mapping_array_base_tsn, asoc->cumulative_tsn)) { offset = 1; } else { offset = asoc->mapping_array_base_tsn - asoc->cumulative_tsn; } - if (compare_with_wrap(asoc->highest_tsn_inside_nr_map, asoc->cumulative_tsn, MAX_TSN)) { + if (SCTP_TSN_GT(asoc->highest_tsn_inside_nr_map, asoc->cumulative_tsn)) { /* we have a gap .. maybe */ for (i = 0; i < siz; i++) { tsn_map = asoc->nr_mapping_array[i]; @@ -10321,44 +10678,50 @@ sctp_send_abort_tcb(struct sctp_tcb *stcb, struct mbuf *operr, int so_locked #endif ) { - struct mbuf *m_abort; - struct mbuf *m_out = NULL, *m_end = NULL; - struct sctp_abort_chunk *abort = NULL; - int sz; - uint32_t auth_offset = 0; + struct mbuf *m_abort, *m, *m_last; + struct mbuf *m_out, *m_end = NULL; + struct sctp_abort_chunk *abort; struct sctp_auth_chunk *auth = NULL; + struct sctp_nets *net; + uint32_t auth_offset = 0; + uint16_t cause_len, chunk_len, padding_len; + SCTP_TCB_LOCK_ASSERT(stcb); /*- * Add an AUTH chunk, if chunk requires it and save the offset into * the chain for AUTH */ if (sctp_auth_is_required_chunk(SCTP_ABORT_ASSOCIATION, stcb->asoc.peer_auth_chunks)) { - m_out = sctp_add_auth_chunk(m_out, &m_end, &auth, &auth_offset, + m_out = sctp_add_auth_chunk(NULL, &m_end, &auth, &auth_offset, stcb, SCTP_ABORT_ASSOCIATION); SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); + } else { + m_out = NULL; } - SCTP_TCB_LOCK_ASSERT(stcb); m_abort = sctp_get_mbuf_for_msg(sizeof(struct sctp_abort_chunk), 0, M_DONTWAIT, 1, MT_HEADER); if (m_abort == NULL) { - /* no mbuf's */ - if (m_out) + if (m_out) { sctp_m_freem(m_out); + } + if (operr) { + sctp_m_freem(operr); + } return; } /* link in any error */ SCTP_BUF_NEXT(m_abort) = operr; - sz = 0; - if (operr) { - struct mbuf *n; - - n = operr; - while (n) { - sz += SCTP_BUF_LEN(n); - n = SCTP_BUF_NEXT(n); + cause_len = 0; + m_last = NULL; + for (m = operr; m; m = SCTP_BUF_NEXT(m)) { + cause_len += (uint16_t) SCTP_BUF_LEN(m); + if (SCTP_BUF_NEXT(m) == NULL) { + m_last = m; } } - SCTP_BUF_LEN(m_abort) = sizeof(*abort); + SCTP_BUF_LEN(m_abort) = sizeof(struct sctp_abort_chunk); + chunk_len = (uint16_t) sizeof(struct sctp_abort_chunk) + cause_len; + padding_len = SCTP_SIZE32(chunk_len) - chunk_len; if (m_out == NULL) { /* NO Auth chunk prepended, so reserve space in front */ SCTP_BUF_RESV_UF(m_abort, SCTP_MIN_OVERHEAD); @@ -10367,19 +10730,30 @@ sctp_send_abort_tcb(struct sctp_tcb *stcb, struct mbuf *operr, int so_locked /* Put AUTH chunk at the front of the chain */ SCTP_BUF_NEXT(m_end) = m_abort; } - - /* fill in the ABORT chunk */ + if (stcb->asoc.alternate) { + net = stcb->asoc.alternate; + } else { + net = stcb->asoc.primary_destination; + } + /* Fill in the ABORT chunk header. */ abort = mtod(m_abort, struct sctp_abort_chunk *); abort->ch.chunk_type = SCTP_ABORT_ASSOCIATION; abort->ch.chunk_flags = 0; - abort->ch.chunk_length = htons(sizeof(*abort) + sz); - - (void)sctp_lowlevel_chunk_output(stcb->sctp_ep, stcb, - stcb->asoc.primary_destination, - (struct sockaddr *)&stcb->asoc.primary_destination->ro._l_addr, - m_out, auth_offset, auth, stcb->asoc.authinfo.active_keyid, 1, 0, NULL, 0, + abort->ch.chunk_length = htons(chunk_len); + /* Add padding, if necessary. */ + if (padding_len > 0) { + if ((m_last == NULL) || sctp_add_pad_tombuf(m_last, padding_len)) { + sctp_m_freem(m_out); + return; + } + } + (void)sctp_lowlevel_chunk_output(stcb->sctp_ep, stcb, net, + (struct sockaddr *)&net->ro._l_addr, + m_out, auth_offset, auth, stcb->asoc.authinfo.active_keyid, 1, 0, 0, stcb->sctp_ep->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag), - stcb->asoc.primary_destination->port, so_locked, NULL); + stcb->asoc.primary_destination->port, NULL, + 0, 0, + so_locked); SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); } @@ -10413,159 +10787,217 @@ sctp_send_shutdown_complete(struct sctp_tcb *stcb, SCTP_BUF_LEN(m_shutdown_comp) = sizeof(struct sctp_shutdown_complete_chunk); (void)sctp_lowlevel_chunk_output(stcb->sctp_ep, stcb, net, (struct sockaddr *)&net->ro._l_addr, - m_shutdown_comp, 0, NULL, 0, 1, 0, NULL, 0, + m_shutdown_comp, 0, NULL, 0, 1, 0, 0, stcb->sctp_ep->sctp_lport, stcb->rport, htonl(vtag), - net->port, SCTP_SO_NOT_LOCKED, NULL); + net->port, NULL, + 0, 0, + SCTP_SO_NOT_LOCKED); SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); return; } -void -sctp_send_shutdown_complete2(struct mbuf *m, int iphlen, struct sctphdr *sh, +static void +sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst, + struct sctphdr *sh, uint32_t vtag, + uint8_t type, struct mbuf *cause, + uint8_t use_mflowid, uint32_t mflowid, uint32_t vrf_id, uint16_t port) { - /* formulate and SEND a SHUTDOWN-COMPLETE */ struct mbuf *o_pak; struct mbuf *mout; - struct ip *iph, *iph_out; - struct udphdr *udp = NULL; + struct sctphdr *shout; + struct sctp_chunkhdr *ch; + struct udphdr *udp; + int len, cause_len, padding_len, ret; +#ifdef INET + struct sockaddr_in *src_sin, *dst_sin; + struct ip *ip; + +#endif #ifdef INET6 - struct ip6_hdr *ip6, *ip6_out; + struct sockaddr_in6 *src_sin6, *dst_sin6; + struct ip6_hdr *ip6; #endif - int offset_out, len, mlen; - struct sctp_shutdown_complete_msg *comp_cp; - iph = mtod(m, struct ip *); - switch (iph->ip_v) { - case IPVERSION: - len = (sizeof(struct ip) + sizeof(struct sctp_shutdown_complete_msg)); + /* Compute the length of the cause and add final padding. */ + cause_len = 0; + if (cause != NULL) { + struct mbuf *m_at, *m_last = NULL; + + for (m_at = cause; m_at; m_at = SCTP_BUF_NEXT(m_at)) { + if (SCTP_BUF_NEXT(m_at) == NULL) + m_last = m_at; + cause_len += SCTP_BUF_LEN(m_at); + } + padding_len = cause_len % 4; + if (padding_len != 0) { + padding_len = 4 - padding_len; + } + if (padding_len != 0) { + if (sctp_add_pad_tombuf(m_last, padding_len)) { + sctp_m_freem(cause); + return; + } + } + } else { + padding_len = 0; + } + /* Get an mbuf for the header. */ + len = sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr); + switch (dst->sa_family) { +#ifdef INET + case AF_INET: + len += sizeof(struct ip); break; +#endif #ifdef INET6 - case IPV6_VERSION >> 4: - len = (sizeof(struct ip6_hdr) + sizeof(struct sctp_shutdown_complete_msg)); + case AF_INET6: + len += sizeof(struct ip6_hdr); break; #endif default: - return; + break; } if (port) { len += sizeof(struct udphdr); } mout = sctp_get_mbuf_for_msg(len + max_linkhdr, 1, M_DONTWAIT, 1, MT_DATA); if (mout == NULL) { + if (cause) { + sctp_m_freem(cause); + } return; } SCTP_BUF_RESV_UF(mout, max_linkhdr); SCTP_BUF_LEN(mout) = len; - SCTP_BUF_NEXT(mout) = NULL; - iph_out = NULL; + SCTP_BUF_NEXT(mout) = cause; + if (use_mflowid != 0) { + mout->m_pkthdr.flowid = mflowid; + mout->m_flags |= M_FLOWID; + } +#ifdef INET + ip = NULL; +#endif #ifdef INET6 - ip6_out = NULL; + ip6 = NULL; #endif - offset_out = 0; - - switch (iph->ip_v) { - case IPVERSION: - iph_out = mtod(mout, struct ip *); - - /* Fill in the IP header for the ABORT */ - iph_out->ip_v = IPVERSION; - iph_out->ip_hl = (sizeof(struct ip) / 4); - iph_out->ip_tos = (u_char)0; - iph_out->ip_id = 0; - iph_out->ip_off = 0; - iph_out->ip_ttl = MAXTTL; + switch (dst->sa_family) { +#ifdef INET + case AF_INET: + src_sin = (struct sockaddr_in *)src; + dst_sin = (struct sockaddr_in *)dst; + ip = mtod(mout, struct ip *); + ip->ip_v = IPVERSION; + ip->ip_hl = (sizeof(struct ip) >> 2); + ip->ip_tos = 0; + ip->ip_id = ip_newid(); + ip->ip_off = 0; + ip->ip_ttl = MODULE_GLOBAL(ip_defttl); if (port) { - iph_out->ip_p = IPPROTO_UDP; + ip->ip_p = IPPROTO_UDP; } else { - iph_out->ip_p = IPPROTO_SCTP; + ip->ip_p = IPPROTO_SCTP; } - iph_out->ip_src.s_addr = iph->ip_dst.s_addr; - iph_out->ip_dst.s_addr = iph->ip_src.s_addr; - - /* let IP layer calculate this */ - iph_out->ip_sum = 0; - offset_out += sizeof(*iph_out); - comp_cp = (struct sctp_shutdown_complete_msg *)( - (caddr_t)iph_out + offset_out); + ip->ip_src.s_addr = dst_sin->sin_addr.s_addr; + ip->ip_dst.s_addr = src_sin->sin_addr.s_addr; + ip->ip_sum = 0; + len = sizeof(struct ip); + shout = (struct sctphdr *)((caddr_t)ip + len); break; +#endif #ifdef INET6 - case IPV6_VERSION >> 4: - ip6 = (struct ip6_hdr *)iph; - ip6_out = mtod(mout, struct ip6_hdr *); - - /* Fill in the IPv6 header for the ABORT */ - ip6_out->ip6_flow = ip6->ip6_flow; - ip6_out->ip6_hlim = MODULE_GLOBAL(ip6_defhlim); + case AF_INET6: + src_sin6 = (struct sockaddr_in6 *)src; + dst_sin6 = (struct sockaddr_in6 *)dst; + ip6 = mtod(mout, struct ip6_hdr *); + ip6->ip6_flow = htonl(0x60000000); + if (V_ip6_auto_flowlabel) { + ip6->ip6_flow |= (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); + } + ip6->ip6_hlim = MODULE_GLOBAL(ip6_defhlim); if (port) { - ip6_out->ip6_nxt = IPPROTO_UDP; + ip6->ip6_nxt = IPPROTO_UDP; } else { - ip6_out->ip6_nxt = IPPROTO_SCTP; + ip6->ip6_nxt = IPPROTO_SCTP; } - ip6_out->ip6_src = ip6->ip6_dst; - ip6_out->ip6_dst = ip6->ip6_src; - /* - * ?? The old code had both the iph len + payload, I think - * this is wrong and would never have worked - */ - ip6_out->ip6_plen = sizeof(struct sctp_shutdown_complete_msg); - offset_out += sizeof(*ip6_out); - comp_cp = (struct sctp_shutdown_complete_msg *)( - (caddr_t)ip6_out + offset_out); + ip6->ip6_src = dst_sin6->sin6_addr; + ip6->ip6_dst = src_sin6->sin6_addr; + len = sizeof(struct ip6_hdr); + shout = (struct sctphdr *)((caddr_t)ip6 + len); break; -#endif /* INET6 */ +#endif default: - /* Currently not supported. */ - sctp_m_freem(mout); - return; + len = 0; + shout = mtod(mout, struct sctphdr *); + break; } if (port) { - udp = (struct udphdr *)comp_cp; + if (htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) == 0) { + sctp_m_freem(mout); + return; + } + udp = (struct udphdr *)shout; udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)); udp->uh_dport = port; - udp->uh_ulen = htons(sizeof(struct sctp_shutdown_complete_msg) + sizeof(struct udphdr)); - if (iph_out) - udp->uh_sum = in_pseudo(iph_out->ip_src.s_addr, iph_out->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP)); - offset_out += sizeof(struct udphdr); - comp_cp = (struct sctp_shutdown_complete_msg *)((caddr_t)comp_cp + sizeof(struct udphdr)); + udp->uh_sum = 0; + udp->uh_ulen = htons(sizeof(struct udphdr) + + sizeof(struct sctphdr) + + sizeof(struct sctp_chunkhdr) + + cause_len + padding_len); + len += sizeof(struct udphdr); + shout = (struct sctphdr *)((caddr_t)shout + sizeof(struct udphdr)); + } else { + udp = NULL; + } + shout->src_port = sh->dest_port; + shout->dest_port = sh->src_port; + shout->checksum = 0; + if (vtag) { + shout->v_tag = htonl(vtag); + } else { + shout->v_tag = sh->v_tag; + } + len += sizeof(struct sctphdr); + ch = (struct sctp_chunkhdr *)((caddr_t)shout + sizeof(struct sctphdr)); + ch->chunk_type = type; + if (vtag) { + ch->chunk_flags = 0; + } else { + ch->chunk_flags = SCTP_HAD_NO_TCB; } + ch->chunk_length = htons(sizeof(struct sctp_chunkhdr) + cause_len); + len += sizeof(struct sctp_chunkhdr); + len += cause_len + padding_len; + if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) { - /* no mbuf's */ sctp_m_freem(mout); return; } - /* Now copy in and fill in the ABORT tags etc. */ - comp_cp->sh.src_port = sh->dest_port; - comp_cp->sh.dest_port = sh->src_port; - comp_cp->sh.checksum = 0; - comp_cp->sh.v_tag = sh->v_tag; - comp_cp->shut_cmp.ch.chunk_flags = SCTP_HAD_NO_TCB; - comp_cp->shut_cmp.ch.chunk_type = SCTP_SHUTDOWN_COMPLETE; - comp_cp->shut_cmp.ch.chunk_length = htons(sizeof(struct sctp_shutdown_complete_chunk)); - - if (iph_out != NULL) { - sctp_route_t ro; - int ret; - - mlen = SCTP_BUF_LEN(mout); - bzero(&ro, sizeof ro); - /* set IPv4 length */ - iph_out->ip_len = mlen; -#ifdef SCTP_PACKET_LOGGING - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) - sctp_packet_log(mout, mlen); -#endif + SCTP_ATTACH_CHAIN(o_pak, mout, len); + switch (dst->sa_family) { +#ifdef INET + case AF_INET: + if (port) { + if (V_udp_cksum) { + udp->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP)); + } else { + udp->uh_sum = 0; + } + } + ip->ip_len = len; if (port) { #if defined(SCTP_WITH_NO_CSUM) SCTP_STAT_INCR(sctps_sendnocrc); #else - comp_cp->sh.checksum = sctp_calculate_cksum(mout, offset_out); + shout->checksum = sctp_calculate_cksum(mout, sizeof(struct ip) + sizeof(struct udphdr)); SCTP_STAT_INCR(sctps_sendswcrc); #endif - SCTP_ENABLE_UDP_CSUM(mout); + if (V_udp_cksum) { + SCTP_ENABLE_UDP_CSUM(o_pak); + } } else { #if defined(SCTP_WITH_NO_CSUM) SCTP_STAT_INCR(sctps_sendnocrc); @@ -10575,183 +11007,99 @@ sctp_send_shutdown_complete2(struct mbuf *m, int iphlen, struct sctphdr *sh, SCTP_STAT_INCR(sctps_sendhwcrc); #endif } - SCTP_ATTACH_CHAIN(o_pak, mout, mlen); - /* out it goes */ - SCTP_IP_OUTPUT(ret, o_pak, &ro, NULL, vrf_id); - - /* Free the route if we got one back */ - if (ro.ro_rt) - RTFREE(ro.ro_rt); - } +#ifdef SCTP_PACKET_LOGGING + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) { + sctp_packet_log(o_pak); + } +#endif + SCTP_IP_OUTPUT(ret, o_pak, NULL, NULL, vrf_id); + break; +#endif #ifdef INET6 - if (ip6_out != NULL) { - struct route_in6 ro; - int ret; - struct ifnet *ifp = NULL; - - bzero(&ro, sizeof(ro)); - mlen = SCTP_BUF_LEN(mout); -#ifdef SCTP_PACKET_LOGGING - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) - sctp_packet_log(mout, mlen); -#endif - SCTP_ATTACH_CHAIN(o_pak, mout, mlen); + case AF_INET6: + ip6->ip6_plen = len - sizeof(struct ip6_hdr); if (port) { #if defined(SCTP_WITH_NO_CSUM) SCTP_STAT_INCR(sctps_sendnocrc); #else - comp_cp->sh.checksum = sctp_calculate_cksum(mout, sizeof(struct ip6_hdr) + sizeof(struct udphdr)); + shout->checksum = sctp_calculate_cksum(mout, sizeof(struct ip6_hdr) + sizeof(struct udphdr)); SCTP_STAT_INCR(sctps_sendswcrc); #endif - if ((udp->uh_sum = in6_cksum(o_pak, IPPROTO_UDP, sizeof(struct ip6_hdr), mlen - sizeof(struct ip6_hdr))) == 0) { + if ((udp->uh_sum = in6_cksum(o_pak, IPPROTO_UDP, sizeof(struct ip6_hdr), len - sizeof(struct ip6_hdr))) == 0) { udp->uh_sum = 0xffff; } } else { #if defined(SCTP_WITH_NO_CSUM) SCTP_STAT_INCR(sctps_sendnocrc); #else - mout->m_pkthdr.csum_flags = CSUM_SCTP; - mout->m_pkthdr.csum_data = 0; - SCTP_STAT_INCR(sctps_sendhwcrc); + shout->checksum = sctp_calculate_cksum(mout, sizeof(struct ip6_hdr)); + SCTP_STAT_INCR(sctps_sendswcrc); #endif } - SCTP_IP6_OUTPUT(ret, o_pak, &ro, &ifp, NULL, vrf_id); - - /* Free the route if we got one back */ - if (ro.ro_rt) - RTFREE(ro.ro_rt); - } +#ifdef SCTP_PACKET_LOGGING + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) { + sctp_packet_log(o_pak); + } #endif + SCTP_IP6_OUTPUT(ret, o_pak, NULL, NULL, NULL, vrf_id); + break; +#endif + default: + SCTPDBG(SCTP_DEBUG_OUTPUT1, "Unknown protocol (TSNH) type %d\n", + dst->sa_family); + sctp_m_freem(mout); + SCTP_LTRACE_ERR_RET_PKT(mout, NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EFAULT); + return; + } SCTP_STAT_INCR(sctps_sendpackets); SCTP_STAT_INCR_COUNTER64(sctps_outpackets); SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); return; - } -static struct sctp_nets * -sctp_select_hb_destination(struct sctp_tcb *stcb, struct timeval *now) +void +sctp_send_shutdown_complete2(struct sockaddr *src, struct sockaddr *dst, + struct sctphdr *sh, + uint8_t use_mflowid, uint32_t mflowid, + uint32_t vrf_id, uint16_t port) { - struct sctp_nets *net, *hnet; - int ms_goneby, highest_ms, state_overide = 0; - - (void)SCTP_GETTIME_TIMEVAL(now); - highest_ms = 0; - hnet = NULL; - SCTP_TCB_LOCK_ASSERT(stcb); - TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { - if ( - ((net->dest_state & SCTP_ADDR_NOHB) && ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0)) || - (net->dest_state & SCTP_ADDR_OUT_OF_SCOPE) - ) { - /* - * Skip this guy from consideration if HB is off AND - * its confirmed - */ - continue; - } - if (sctp_destination_is_reachable(stcb, (struct sockaddr *)&net->ro._l_addr) == 0) { - /* skip this dest net from consideration */ - continue; - } - if (net->last_sent_time.tv_sec) { - /* Sent to so we subtract */ - ms_goneby = (now->tv_sec - net->last_sent_time.tv_sec) * 1000; - } else - /* Never been sent to */ - ms_goneby = 0x7fffffff; - /*- - * When the address state is unconfirmed but still - * considered reachable, we HB at a higher rate. Once it - * goes confirmed OR reaches the "unreachable" state, thenw - * we cut it back to HB at a more normal pace. - */ - if ((net->dest_state & (SCTP_ADDR_UNCONFIRMED | SCTP_ADDR_NOT_REACHABLE)) == SCTP_ADDR_UNCONFIRMED) { - state_overide = 1; - } else { - state_overide = 0; - } - - if ((((unsigned int)ms_goneby >= net->RTO) || (state_overide)) && - (ms_goneby > highest_ms)) { - highest_ms = ms_goneby; - hnet = net; - } - } - if (hnet && - ((hnet->dest_state & (SCTP_ADDR_UNCONFIRMED | SCTP_ADDR_NOT_REACHABLE)) == SCTP_ADDR_UNCONFIRMED)) { - state_overide = 1; - } else { - state_overide = 0; - } - - if (hnet && highest_ms && (((unsigned int)highest_ms >= hnet->RTO) || state_overide)) { - /*- - * Found the one with longest delay bounds OR it is - * unconfirmed and still not marked unreachable. - */ - SCTPDBG(SCTP_DEBUG_OUTPUT4, "net:%p is the hb winner -", hnet); -#ifdef SCTP_DEBUG - if (hnet) { - SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT4, - (struct sockaddr *)&hnet->ro._l_addr); - } else { - SCTPDBG(SCTP_DEBUG_OUTPUT4, " none\n"); - } -#endif - /* update the timer now */ - hnet->last_sent_time = *now; - return (hnet); - } - /* Nothing to HB */ - return (NULL); + sctp_send_resp_msg(src, dst, sh, 0, SCTP_SHUTDOWN_COMPLETE, NULL, + use_mflowid, mflowid, + vrf_id, port); } -int -sctp_send_hb(struct sctp_tcb *stcb, int user_req, struct sctp_nets *u_net) +void +sctp_send_hb(struct sctp_tcb *stcb, struct sctp_nets *net, int so_locked +#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) + SCTP_UNUSED +#endif +) { struct sctp_tmit_chunk *chk; - struct sctp_nets *net; struct sctp_heartbeat_chunk *hb; struct timeval now; - struct sockaddr_in *sin; - struct sockaddr_in6 *sin6; SCTP_TCB_LOCK_ASSERT(stcb); - if (user_req == 0) { - net = sctp_select_hb_destination(stcb, &now); - if (net == NULL) { - /*- - * All our busy none to send to, just start the - * timer again. - */ - if (stcb->asoc.state == 0) { - return (0); - } - sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, - stcb->sctp_ep, - stcb, - net); - return (0); - } - } else { - net = u_net; - if (net == NULL) { - return (0); - } - (void)SCTP_GETTIME_TIMEVAL(&now); + if (net == NULL) { + return; } - sin = (struct sockaddr_in *)&net->ro._l_addr; - if (sin->sin_family != AF_INET) { - if (sin->sin_family != AF_INET6) { - /* huh */ - return (0); - } + (void)SCTP_GETTIME_TIMEVAL(&now); + switch (net->ro._l_addr.sa.sa_family) { +#ifdef INET + case AF_INET: + break; +#endif +#ifdef INET6 + case AF_INET6: + break; +#endif + default: + return; } sctp_alloc_a_chunk(stcb, chk); if (chk == NULL) { SCTPDBG(SCTP_DEBUG_OUTPUT4, "Gak, can't get a chunk for hb\n"); - return (0); + return; } chk->copy_by_ref = 0; chk->rec.chunk_id.id = SCTP_HEARTBEAT_REQUEST; @@ -10761,8 +11109,8 @@ sctp_send_hb(struct sctp_tcb *stcb, int user_req, struct sctp_nets *u_net) chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_DONTWAIT, 1, MT_HEADER); if (chk->data == NULL) { - sctp_free_a_chunk(stcb, chk); - return (0); + sctp_free_a_chunk(stcb, chk, so_locked); + return; } SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD); SCTP_BUF_LEN(chk->data) = chk->send_size; @@ -10783,9 +11131,8 @@ sctp_send_hb(struct sctp_tcb *stcb, int user_req, struct sctp_nets *u_net) hb->heartbeat.hb_info.time_value_1 = now.tv_sec; hb->heartbeat.hb_info.time_value_2 = now.tv_usec; /* Did our user request this one, put it in */ - hb->heartbeat.hb_info.user_req = user_req; - hb->heartbeat.hb_info.addr_family = sin->sin_family; - hb->heartbeat.hb_info.addr_len = sin->sin_len; + hb->heartbeat.hb_info.addr_family = net->ro._l_addr.sa.sa_family; + hb->heartbeat.hb_info.addr_len = net->ro._l_addr.sa.sa_len; if (net->dest_state & SCTP_ADDR_UNCONFIRMED) { /* * we only take from the entropy pool if the address is not @@ -10797,64 +11144,30 @@ sctp_send_hb(struct sctp_tcb *stcb, int user_req, struct sctp_nets *u_net) net->heartbeat_random1 = hb->heartbeat.hb_info.random_value1 = 0; net->heartbeat_random2 = hb->heartbeat.hb_info.random_value2 = 0; } - if (sin->sin_family == AF_INET) { - memcpy(hb->heartbeat.hb_info.address, &sin->sin_addr, sizeof(sin->sin_addr)); - } else if (sin->sin_family == AF_INET6) { - /* We leave the scope the way it is in our lookup table. */ - sin6 = (struct sockaddr_in6 *)&net->ro._l_addr; - memcpy(hb->heartbeat.hb_info.address, &sin6->sin6_addr, sizeof(sin6->sin6_addr)); - } else { - /* huh compiler bug */ - return (0); - } - - /* - * JRS 5/14/07 - In CMT PF, the T3 timer is used to track - * PF-heartbeats. Because of this, threshold management is done by - * the t3 timer handler, and does not need to be done upon the send - * of a PF-heartbeat. If CMT PF is on and the destination to which a - * heartbeat is being sent is in PF state, do NOT do threshold - * management. - */ - if ((stcb->asoc.sctp_cmt_pf == 0) || - ((net->dest_state & SCTP_ADDR_PF) != SCTP_ADDR_PF)) { - /* ok we have a destination that needs a beat */ - /* lets do the theshold management Qiaobing style */ - if (sctp_threshold_management(stcb->sctp_ep, stcb, net, - stcb->asoc.max_send_times)) { - /*- - * we have lost the association, in a way this is - * quite bad since we really are one less time since - * we really did not send yet. This is the down side - * to the Q's style as defined in the RFC and not my - * alternate style defined in the RFC. - */ - if (chk->data != NULL) { - sctp_m_freem(chk->data); - chk->data = NULL; - } - /* - * Here we do NOT use the macro since the - * association is now gone. - */ - if (chk->whoTo) { - sctp_free_remote_addr(chk->whoTo); - chk->whoTo = NULL; - } - sctp_free_a_chunk((struct sctp_tcb *)NULL, chk); - return (-1); - } + switch (net->ro._l_addr.sa.sa_family) { +#ifdef INET + case AF_INET: + memcpy(hb->heartbeat.hb_info.address, + &net->ro._l_addr.sin.sin_addr, + sizeof(net->ro._l_addr.sin.sin_addr)); + break; +#endif +#ifdef INET6 + case AF_INET6: + memcpy(hb->heartbeat.hb_info.address, + &net->ro._l_addr.sin6.sin6_addr, + sizeof(net->ro._l_addr.sin6.sin6_addr)); + break; +#endif + default: + return; + break; } net->hb_responded = 0; TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next); stcb->asoc.ctrl_queue_cnt++; SCTP_STAT_INCR(sctps_sendheartbeat); - /*- - * Call directly med level routine to put out the chunk. It will - * always tumble out control chunks aka HB but it may even tumble - * out data too. - */ - return (1); + return; } void @@ -10865,13 +11178,25 @@ sctp_send_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net, struct sctp_ecne_chunk *ecne; struct sctp_tmit_chunk *chk; + if (net == NULL) { + return; + } asoc = &stcb->asoc; SCTP_TCB_LOCK_ASSERT(stcb); TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) { - if (chk->rec.chunk_id.id == SCTP_ECN_ECHO) { + if ((chk->rec.chunk_id.id == SCTP_ECN_ECHO) && (net == chk->whoTo)) { /* found a previous ECN_ECHO update it if needed */ + uint32_t cnt, ctsn; + ecne = mtod(chk->data, struct sctp_ecne_chunk *); - ecne->tsn = htonl(high_tsn); + ctsn = ntohl(ecne->tsn); + if (SCTP_TSN_GT(high_tsn, ctsn)) { + ecne->tsn = htonl(high_tsn); + SCTP_STAT_INCR(sctps_queue_upd_ecne); + } + cnt = ntohl(ecne->num_pkts_since_cwr); + cnt++; + ecne->num_pkts_since_cwr = htonl(cnt); return; } } @@ -10881,14 +11206,14 @@ sctp_send_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net, return; } chk->copy_by_ref = 0; - SCTP_STAT_INCR(sctps_sendecne); + SCTP_STAT_INCR(sctps_queue_upd_ecne); chk->rec.chunk_id.id = SCTP_ECN_ECHO; chk->rec.chunk_id.can_take_data = 0; chk->asoc = &stcb->asoc; chk->send_size = sizeof(struct sctp_ecne_chunk); chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_DONTWAIT, 1, MT_HEADER); if (chk->data == NULL) { - sctp_free_a_chunk(stcb, chk); + sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); return; } SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD); @@ -10897,33 +11222,28 @@ sctp_send_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net, chk->snd_count = 0; chk->whoTo = net; atomic_add_int(&chk->whoTo->ref_count, 1); + stcb->asoc.ecn_echo_cnt_onq++; ecne = mtod(chk->data, struct sctp_ecne_chunk *); ecne->ch.chunk_type = SCTP_ECN_ECHO; ecne->ch.chunk_flags = 0; ecne->ch.chunk_length = htons(sizeof(struct sctp_ecne_chunk)); ecne->tsn = htonl(high_tsn); - TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next); + ecne->num_pkts_since_cwr = htonl(1); + TAILQ_INSERT_HEAD(&stcb->asoc.control_send_queue, chk, sctp_next); asoc->ctrl_queue_cnt++; } void sctp_send_packet_dropped(struct sctp_tcb *stcb, struct sctp_nets *net, - struct mbuf *m, int iphlen, int bad_crc) + struct mbuf *m, int len, int iphlen, int bad_crc) { struct sctp_association *asoc; struct sctp_pktdrop_chunk *drp; struct sctp_tmit_chunk *chk; uint8_t *datap; - int len; int was_trunc = 0; - struct ip *iph; - -#ifdef INET6 - struct ip6_hdr *ip6h; - -#endif - int fullsz = 0, extra = 0; + int fullsz = 0; long spc; int offset; struct sctp_chunkhdr *ch, chunk_buf; @@ -10948,26 +11268,8 @@ sctp_send_packet_dropped(struct sctp_tcb *stcb, struct sctp_nets *net, return; } chk->copy_by_ref = 0; - iph = mtod(m, struct ip *); - if (iph == NULL) { - sctp_free_a_chunk(stcb, chk); - return; - } - switch (iph->ip_v) { - case IPVERSION: - /* IPv4 */ - len = chk->send_size = iph->ip_len; - break; -#ifdef INET6 - case IPV6_VERSION >> 4: - /* IPv6 */ - ip6h = mtod(m, struct ip6_hdr *); - len = chk->send_size = htons(ip6h->ip6_plen); - break; -#endif - default: - return; - } + len -= iphlen; + chk->send_size = len; /* Validate that we do not have an ABORT in here. */ offset = iphlen + sizeof(struct sctphdr); ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset, @@ -10988,7 +11290,7 @@ sctp_send_packet_dropped(struct sctp_tcb *stcb, struct sctp_nets *net, * INIT-ACK, because we can't know if the initiation * tag is correct or not. */ - sctp_free_a_chunk(stcb, chk); + sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); return; default: break; @@ -11003,7 +11305,7 @@ sctp_send_packet_dropped(struct sctp_tcb *stcb, struct sctp_nets *net, /* * only send 1 mtu worth, trim off the excess on the end. */ - fullsz = len - extra; + fullsz = len; len = min(stcb->asoc.smallest_mtu, MCLBYTES) - SCTP_MAX_OVERHEAD; was_trunc = 1; } @@ -11011,7 +11313,7 @@ sctp_send_packet_dropped(struct sctp_tcb *stcb, struct sctp_nets *net, chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA); if (chk->data == NULL) { jump_out: - sctp_free_a_chunk(stcb, chk); + sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); return; } SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD); @@ -11048,10 +11350,10 @@ jump_out: if (net) { /* we should hit here */ chk->whoTo = net; + atomic_add_int(&chk->whoTo->ref_count, 1); } else { - chk->whoTo = asoc->primary_destination; + chk->whoTo = NULL; } - atomic_add_int(&chk->whoTo->ref_count, 1); chk->rec.chunk_id.id = SCTP_PACKET_DROPPED; chk->rec.chunk_id.can_take_data = 1; drp->ch.chunk_type = SCTP_PACKET_DROPPED; @@ -11081,26 +11383,37 @@ jump_out: } void -sctp_send_cwr(struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t high_tsn) +sctp_send_cwr(struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t high_tsn, uint8_t override) { struct sctp_association *asoc; struct sctp_cwr_chunk *cwr; struct sctp_tmit_chunk *chk; - asoc = &stcb->asoc; SCTP_TCB_LOCK_ASSERT(stcb); + if (net == NULL) { + return; + } + asoc = &stcb->asoc; TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) { - if (chk->rec.chunk_id.id == SCTP_ECN_CWR) { - /* found a previous ECN_CWR update it if needed */ + if ((chk->rec.chunk_id.id == SCTP_ECN_CWR) && (net == chk->whoTo)) { + /* + * found a previous CWR queued to same destination + * update it if needed + */ + uint32_t ctsn; + cwr = mtod(chk->data, struct sctp_cwr_chunk *); - if (compare_with_wrap(high_tsn, ntohl(cwr->tsn), - MAX_TSN)) { + ctsn = ntohl(cwr->tsn); + if (SCTP_TSN_GT(high_tsn, ctsn)) { cwr->tsn = htonl(high_tsn); } + if (override & SCTP_CWR_REDUCE_OVERRIDE) { + /* Make sure override is carried */ + cwr->ch.chunk_flags |= SCTP_CWR_REDUCE_OVERRIDE; + } return; } } - /* nope could not find one to update so we must build one */ sctp_alloc_a_chunk(stcb, chk); if (chk == NULL) { return; @@ -11112,7 +11425,7 @@ sctp_send_cwr(struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t high_tsn) chk->send_size = sizeof(struct sctp_cwr_chunk); chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_DONTWAIT, 1, MT_HEADER); if (chk->data == NULL) { - sctp_free_a_chunk(stcb, chk); + sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); return; } SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD); @@ -11123,7 +11436,7 @@ sctp_send_cwr(struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t high_tsn) atomic_add_int(&chk->whoTo->ref_count, 1); cwr = mtod(chk->data, struct sctp_cwr_chunk *); cwr->ch.chunk_type = SCTP_ECN_CWR; - cwr->ch.chunk_flags = 0; + cwr->ch.chunk_flags = override; cwr->ch.chunk_length = htons(sizeof(struct sctp_cwr_chunk)); cwr->tsn = htonl(high_tsn); TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next); @@ -11135,13 +11448,11 @@ sctp_add_stream_reset_out(struct sctp_tmit_chunk *chk, int number_entries, uint16_t * list, uint32_t seq, uint32_t resp_seq, uint32_t last_sent) { - int len, old_len, i; + uint16_t len, old_len, i; struct sctp_stream_reset_out_request *req_out; struct sctp_chunkhdr *ch; ch = mtod(chk->data, struct sctp_chunkhdr *); - - old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length)); /* get to new offset for the param. */ @@ -11175,19 +11486,16 @@ sctp_add_stream_reset_out(struct sctp_tmit_chunk *chk, return; } - -void +static void sctp_add_stream_reset_in(struct sctp_tmit_chunk *chk, int number_entries, uint16_t * list, uint32_t seq) { - int len, old_len, i; + uint16_t len, old_len, i; struct sctp_stream_reset_in_request *req_in; struct sctp_chunkhdr *ch; ch = mtod(chk->data, struct sctp_chunkhdr *); - - old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length)); /* get to new offset for the param. */ @@ -11219,18 +11527,15 @@ sctp_add_stream_reset_in(struct sctp_tmit_chunk *chk, return; } - -void +static void sctp_add_stream_reset_tsn(struct sctp_tmit_chunk *chk, uint32_t seq) { - int len, old_len; + uint16_t len, old_len; struct sctp_stream_reset_tsn_request *req_tsn; struct sctp_chunkhdr *ch; ch = mtod(chk->data, struct sctp_chunkhdr *); - - old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length)); /* get to new offset for the param. */ @@ -11254,13 +11559,11 @@ void sctp_add_stream_reset_result(struct sctp_tmit_chunk *chk, uint32_t resp_seq, uint32_t result) { - int len, old_len; + uint16_t len, old_len; struct sctp_stream_reset_response *resp; struct sctp_chunkhdr *ch; ch = mtod(chk->data, struct sctp_chunkhdr *); - - old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length)); /* get to new offset for the param. */ @@ -11279,22 +11582,18 @@ sctp_add_stream_reset_result(struct sctp_tmit_chunk *chk, chk->send_size = SCTP_SIZE32(chk->book_size); SCTP_BUF_LEN(chk->data) = chk->send_size; return; - } - void sctp_add_stream_reset_result_tsn(struct sctp_tmit_chunk *chk, uint32_t resp_seq, uint32_t result, uint32_t send_una, uint32_t recv_next) { - int len, old_len; + uint16_t len, old_len; struct sctp_stream_reset_response_tsn *resp; struct sctp_chunkhdr *ch; ch = mtod(chk->data, struct sctp_chunkhdr *); - - old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length)); /* get to new offset for the param. */ @@ -11318,11 +11617,11 @@ sctp_add_stream_reset_result_tsn(struct sctp_tmit_chunk *chk, } static void -sctp_add_a_stream(struct sctp_tmit_chunk *chk, +sctp_add_an_out_stream(struct sctp_tmit_chunk *chk, uint32_t seq, uint16_t adding) { - int len, old_len; + uint16_t len, old_len; struct sctp_chunkhdr *ch; struct sctp_stream_reset_add_strm *addstr; @@ -11335,7 +11634,7 @@ sctp_add_a_stream(struct sctp_tmit_chunk *chk, len = sizeof(struct sctp_stream_reset_add_strm); /* Fill it out. */ - addstr->ph.param_type = htons(SCTP_STR_RESET_ADD_STREAMS); + addstr->ph.param_type = htons(SCTP_STR_RESET_ADD_OUT_STREAMS); addstr->ph.param_length = htons(len); addstr->request_seq = htonl(seq); addstr->number_of_streams = htons(adding); @@ -11350,601 +11649,242 @@ sctp_add_a_stream(struct sctp_tmit_chunk *chk, return; } -int -sctp_send_str_reset_req(struct sctp_tcb *stcb, - int number_entries, uint16_t * list, - uint8_t send_out_req, - uint32_t resp_seq, - uint8_t send_in_req, - uint8_t send_tsn_req, - uint8_t add_stream, - uint16_t adding -) +static void +sctp_add_an_in_stream(struct sctp_tmit_chunk *chk, + uint32_t seq, + uint16_t adding) { - - struct sctp_association *asoc; - struct sctp_tmit_chunk *chk; + uint16_t len, old_len; struct sctp_chunkhdr *ch; - uint32_t seq; - - asoc = &stcb->asoc; - if (asoc->stream_reset_outstanding) { - /*- - * Already one pending, must get ACK back to clear the flag. - */ - SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EBUSY); - return (EBUSY); - } - if ((send_out_req == 0) && (send_in_req == 0) && (send_tsn_req == 0) && - (add_stream == 0)) { - /* nothing to do */ - SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL); - return (EINVAL); - } - if (send_tsn_req && (send_out_req || send_in_req)) { - /* error, can't do that */ - SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL); - return (EINVAL); - } - sctp_alloc_a_chunk(stcb, chk); - if (chk == NULL) { - SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); - return (ENOMEM); - } - chk->copy_by_ref = 0; - chk->rec.chunk_id.id = SCTP_STREAM_RESET; - chk->rec.chunk_id.can_take_data = 0; - chk->asoc = &stcb->asoc; - chk->book_size = sizeof(struct sctp_chunkhdr); - chk->send_size = SCTP_SIZE32(chk->book_size); - chk->book_size_scale = 0; - - chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA); - if (chk->data == NULL) { - sctp_free_a_chunk(stcb, chk); - SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); - return (ENOMEM); - } - SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD); - - /* setup chunk parameters */ - chk->sent = SCTP_DATAGRAM_UNSENT; - chk->snd_count = 0; - chk->whoTo = asoc->primary_destination; - atomic_add_int(&chk->whoTo->ref_count, 1); + struct sctp_stream_reset_add_strm *addstr; ch = mtod(chk->data, struct sctp_chunkhdr *); - ch->chunk_type = SCTP_STREAM_RESET; - ch->chunk_flags = 0; - ch->chunk_length = htons(chk->book_size); - SCTP_BUF_LEN(chk->data) = chk->send_size; - - seq = stcb->asoc.str_reset_seq_out; - if (send_out_req) { - sctp_add_stream_reset_out(chk, number_entries, list, - seq, resp_seq, (stcb->asoc.sending_seq - 1)); - asoc->stream_reset_out_is_outstanding = 1; - seq++; - asoc->stream_reset_outstanding++; - } - if (add_stream) { - sctp_add_a_stream(chk, seq, adding); - seq++; - asoc->stream_reset_outstanding++; - } - if (send_in_req) { - sctp_add_stream_reset_in(chk, number_entries, list, seq); - asoc->stream_reset_outstanding++; - } - if (send_tsn_req) { - sctp_add_stream_reset_tsn(chk, seq); - asoc->stream_reset_outstanding++; - } - asoc->str_reset = chk; - - /* insert the chunk for sending */ - TAILQ_INSERT_TAIL(&asoc->control_send_queue, - chk, - sctp_next); - asoc->ctrl_queue_cnt++; - sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo); - return (0); -} - -void -sctp_send_abort(struct mbuf *m, int iphlen, struct sctphdr *sh, uint32_t vtag, - struct mbuf *err_cause, uint32_t vrf_id, uint16_t port) -{ - /*- - * Formulate the abort message, and send it back down. - */ - struct mbuf *o_pak; - struct mbuf *mout; - struct sctp_abort_msg *abm; - struct ip *iph, *iph_out; - struct udphdr *udp; - -#ifdef INET6 - struct ip6_hdr *ip6, *ip6_out; - -#endif - int iphlen_out, len; - - /* don't respond to ABORT with ABORT */ - if (sctp_is_there_an_abort_here(m, iphlen, &vtag)) { - if (err_cause) - sctp_m_freem(err_cause); - return; - } - iph = mtod(m, struct ip *); - switch (iph->ip_v) { - case IPVERSION: - len = (sizeof(struct ip) + sizeof(struct sctp_abort_msg)); - break; -#ifdef INET6 - case IPV6_VERSION >> 4: - len = (sizeof(struct ip6_hdr) + sizeof(struct sctp_abort_msg)); - break; -#endif - default: - if (err_cause) { - sctp_m_freem(err_cause); - } - return; - } - if (port) { - len += sizeof(struct udphdr); - } - mout = sctp_get_mbuf_for_msg(len + max_linkhdr, 1, M_DONTWAIT, 1, MT_DATA); - if (mout == NULL) { - if (err_cause) { - sctp_m_freem(err_cause); - } - return; - } - SCTP_BUF_RESV_UF(mout, max_linkhdr); - SCTP_BUF_LEN(mout) = len; - SCTP_BUF_NEXT(mout) = err_cause; - iph_out = NULL; -#ifdef INET6 - ip6_out = NULL; -#endif - switch (iph->ip_v) { - case IPVERSION: - iph_out = mtod(mout, struct ip *); - - /* Fill in the IP header for the ABORT */ - iph_out->ip_v = IPVERSION; - iph_out->ip_hl = (sizeof(struct ip) / 4); - iph_out->ip_tos = (u_char)0; - iph_out->ip_id = 0; - iph_out->ip_off = 0; - iph_out->ip_ttl = MAXTTL; - if (port) { - iph_out->ip_p = IPPROTO_UDP; - } else { - iph_out->ip_p = IPPROTO_SCTP; - } - iph_out->ip_src.s_addr = iph->ip_dst.s_addr; - iph_out->ip_dst.s_addr = iph->ip_src.s_addr; - /* let IP layer calculate this */ - iph_out->ip_sum = 0; - - iphlen_out = sizeof(*iph_out); - abm = (struct sctp_abort_msg *)((caddr_t)iph_out + iphlen_out); - break; -#ifdef INET6 - case IPV6_VERSION >> 4: - ip6 = (struct ip6_hdr *)iph; - ip6_out = mtod(mout, struct ip6_hdr *); - - /* Fill in the IP6 header for the ABORT */ - ip6_out->ip6_flow = ip6->ip6_flow; - ip6_out->ip6_hlim = MODULE_GLOBAL(ip6_defhlim); - if (port) { - ip6_out->ip6_nxt = IPPROTO_UDP; - } else { - ip6_out->ip6_nxt = IPPROTO_SCTP; - } - ip6_out->ip6_src = ip6->ip6_dst; - ip6_out->ip6_dst = ip6->ip6_src; - - iphlen_out = sizeof(*ip6_out); - abm = (struct sctp_abort_msg *)((caddr_t)ip6_out + iphlen_out); - break; -#endif /* INET6 */ - default: - /* Currently not supported */ - sctp_m_freem(mout); - return; - } - - udp = (struct udphdr *)abm; - if (port) { - udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)); - udp->uh_dport = port; - /* set udp->uh_ulen later */ - udp->uh_sum = 0; - iphlen_out += sizeof(struct udphdr); - abm = (struct sctp_abort_msg *)((caddr_t)abm + sizeof(struct udphdr)); - } - abm->sh.src_port = sh->dest_port; - abm->sh.dest_port = sh->src_port; - abm->sh.checksum = 0; - if (vtag == 0) { - abm->sh.v_tag = sh->v_tag; - abm->msg.ch.chunk_flags = SCTP_HAD_NO_TCB; - } else { - abm->sh.v_tag = htonl(vtag); - abm->msg.ch.chunk_flags = 0; - } - abm->msg.ch.chunk_type = SCTP_ABORT_ASSOCIATION; - - if (err_cause) { - struct mbuf *m_tmp = err_cause; - int err_len = 0; - - /* get length of the err_cause chain */ - while (m_tmp != NULL) { - err_len += SCTP_BUF_LEN(m_tmp); - m_tmp = SCTP_BUF_NEXT(m_tmp); - } - len = SCTP_BUF_LEN(mout) + err_len; - if (err_len % 4) { - /* need pad at end of chunk */ - uint32_t cpthis = 0; - int padlen; - - padlen = 4 - (len % 4); - m_copyback(mout, len, padlen, (caddr_t)&cpthis); - len += padlen; - } - abm->msg.ch.chunk_length = htons(sizeof(abm->msg.ch) + err_len); - } else { - len = SCTP_BUF_LEN(mout); - abm->msg.ch.chunk_length = htons(sizeof(abm->msg.ch)); - } - - if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) { - /* no mbuf's */ - sctp_m_freem(mout); - return; - } - if (iph_out != NULL) { - sctp_route_t ro; - int ret; - - /* zap the stack pointer to the route */ - bzero(&ro, sizeof ro); - if (port) { - udp->uh_ulen = htons(len - sizeof(struct ip)); - udp->uh_sum = in_pseudo(iph_out->ip_src.s_addr, iph_out->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP)); - } - SCTPDBG(SCTP_DEBUG_OUTPUT2, "sctp_send_abort calling ip_output:\n"); - SCTPDBG_PKT(SCTP_DEBUG_OUTPUT2, iph_out, &abm->sh); - /* set IPv4 length */ - iph_out->ip_len = len; - /* out it goes */ -#ifdef SCTP_PACKET_LOGGING - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) - sctp_packet_log(mout, len); -#endif - SCTP_ATTACH_CHAIN(o_pak, mout, len); - if (port) { -#if defined(SCTP_WITH_NO_CSUM) - SCTP_STAT_INCR(sctps_sendnocrc); -#else - abm->sh.checksum = sctp_calculate_cksum(mout, iphlen_out); - SCTP_STAT_INCR(sctps_sendswcrc); -#endif - SCTP_ENABLE_UDP_CSUM(o_pak); - } else { -#if defined(SCTP_WITH_NO_CSUM) - SCTP_STAT_INCR(sctps_sendnocrc); -#else - mout->m_pkthdr.csum_flags = CSUM_SCTP; - mout->m_pkthdr.csum_data = 0; - SCTP_STAT_INCR(sctps_sendhwcrc); -#endif - } - SCTP_IP_OUTPUT(ret, o_pak, &ro, NULL, vrf_id); - - /* Free the route if we got one back */ - if (ro.ro_rt) - RTFREE(ro.ro_rt); - } -#ifdef INET6 - if (ip6_out != NULL) { - struct route_in6 ro; - int ret; - struct ifnet *ifp = NULL; + old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length)); - /* zap the stack pointer to the route */ - bzero(&ro, sizeof(ro)); - if (port) { - udp->uh_ulen = htons(len - sizeof(struct ip6_hdr)); - } - SCTPDBG(SCTP_DEBUG_OUTPUT2, "sctp_send_abort calling ip6_output:\n"); - SCTPDBG_PKT(SCTP_DEBUG_OUTPUT2, (struct ip *)ip6_out, &abm->sh); - ip6_out->ip6_plen = len - sizeof(*ip6_out); -#ifdef SCTP_PACKET_LOGGING - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) - sctp_packet_log(mout, len); -#endif - SCTP_ATTACH_CHAIN(o_pak, mout, len); - if (port) { -#if defined(SCTP_WITH_NO_CSUM) - SCTP_STAT_INCR(sctps_sendnocrc); -#else - abm->sh.checksum = sctp_calculate_cksum(mout, sizeof(struct ip6_hdr) + sizeof(struct udphdr)); - SCTP_STAT_INCR(sctps_sendswcrc); -#endif - if ((udp->uh_sum = in6_cksum(o_pak, IPPROTO_UDP, sizeof(struct ip6_hdr), len - sizeof(struct ip6_hdr))) == 0) { - udp->uh_sum = 0xffff; - } - } else { -#if defined(SCTP_WITH_NO_CSUM) - SCTP_STAT_INCR(sctps_sendnocrc); -#else - mout->m_pkthdr.csum_flags = CSUM_SCTP; - mout->m_pkthdr.csum_data = 0; - SCTP_STAT_INCR(sctps_sendhwcrc); -#endif - } - SCTP_IP6_OUTPUT(ret, o_pak, &ro, &ifp, NULL, vrf_id); + /* get to new offset for the param. */ + addstr = (struct sctp_stream_reset_add_strm *)((caddr_t)ch + len); + /* now how long will this param be? */ + len = sizeof(struct sctp_stream_reset_add_strm); + /* Fill it out. */ + addstr->ph.param_type = htons(SCTP_STR_RESET_ADD_IN_STREAMS); + addstr->ph.param_length = htons(len); + addstr->request_seq = htonl(seq); + addstr->number_of_streams = htons(adding); + addstr->reserved = 0; - /* Free the route if we got one back */ - if (ro.ro_rt) - RTFREE(ro.ro_rt); - } -#endif - SCTP_STAT_INCR(sctps_sendpackets); - SCTP_STAT_INCR_COUNTER64(sctps_outpackets); - SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); + /* now fix the chunk length */ + ch->chunk_length = htons(len + old_len); + chk->send_size = len + old_len; + chk->book_size = SCTP_SIZE32(chk->send_size); + chk->book_size_scale = 0; + SCTP_BUF_LEN(chk->data) = SCTP_SIZE32(chk->send_size); + return; } -void -sctp_send_operr_to(struct mbuf *m, int iphlen, struct mbuf *scm, uint32_t vtag, - uint32_t vrf_id, uint16_t port) -{ - struct mbuf *o_pak; - struct sctphdr *sh, *sh_out; - struct sctp_chunkhdr *ch; - struct ip *iph, *iph_out; - struct udphdr *udp = NULL; - struct mbuf *mout; - -#ifdef INET6 - struct ip6_hdr *ip6, *ip6_out; +int +sctp_send_str_reset_req(struct sctp_tcb *stcb, + int number_entries, uint16_t * list, + uint8_t send_out_req, + uint8_t send_in_req, + uint8_t send_tsn_req, + uint8_t add_stream, + uint16_t adding_o, + uint16_t adding_i, uint8_t peer_asked) +{ -#endif - int iphlen_out, len; + struct sctp_association *asoc; + struct sctp_tmit_chunk *chk; + struct sctp_chunkhdr *ch; + uint32_t seq; - iph = mtod(m, struct ip *); - sh = (struct sctphdr *)((caddr_t)iph + iphlen); - switch (iph->ip_v) { - case IPVERSION: - len = (sizeof(struct ip) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr)); - break; -#ifdef INET6 - case IPV6_VERSION >> 4: - len = (sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr)); - break; -#endif - default: - if (scm) { - sctp_m_freem(scm); - } - return; + asoc = &stcb->asoc; + if (asoc->stream_reset_outstanding) { + /*- + * Already one pending, must get ACK back to clear the flag. + */ + SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EBUSY); + return (EBUSY); } - if (port) { - len += sizeof(struct udphdr); + if ((send_out_req == 0) && (send_in_req == 0) && (send_tsn_req == 0) && + (add_stream == 0)) { + /* nothing to do */ + SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL); + return (EINVAL); } - mout = sctp_get_mbuf_for_msg(len + max_linkhdr, 1, M_DONTWAIT, 1, MT_DATA); - if (mout == NULL) { - if (scm) { - sctp_m_freem(scm); - } - return; + if (send_tsn_req && (send_out_req || send_in_req)) { + /* error, can't do that */ + SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL); + return (EINVAL); } - SCTP_BUF_RESV_UF(mout, max_linkhdr); - SCTP_BUF_LEN(mout) = len; - SCTP_BUF_NEXT(mout) = scm; - iph_out = NULL; -#ifdef INET6 - ip6_out = NULL; -#endif - switch (iph->ip_v) { - case IPVERSION: - iph_out = mtod(mout, struct ip *); - - /* Fill in the IP header for the ABORT */ - iph_out->ip_v = IPVERSION; - iph_out->ip_hl = (sizeof(struct ip) / 4); - iph_out->ip_tos = (u_char)0; - iph_out->ip_id = 0; - iph_out->ip_off = 0; - iph_out->ip_ttl = MAXTTL; - if (port) { - iph_out->ip_p = IPPROTO_UDP; - } else { - iph_out->ip_p = IPPROTO_SCTP; - } - iph_out->ip_src.s_addr = iph->ip_dst.s_addr; - iph_out->ip_dst.s_addr = iph->ip_src.s_addr; - /* let IP layer calculate this */ - iph_out->ip_sum = 0; - - iphlen_out = sizeof(struct ip); - sh_out = (struct sctphdr *)((caddr_t)iph_out + iphlen_out); - break; -#ifdef INET6 - case IPV6_VERSION >> 4: - ip6 = (struct ip6_hdr *)iph; - ip6_out = mtod(mout, struct ip6_hdr *); - - /* Fill in the IP6 header for the ABORT */ - ip6_out->ip6_flow = ip6->ip6_flow; - ip6_out->ip6_hlim = MODULE_GLOBAL(ip6_defhlim); - if (port) { - ip6_out->ip6_nxt = IPPROTO_UDP; - } else { - ip6_out->ip6_nxt = IPPROTO_SCTP; - } - ip6_out->ip6_src = ip6->ip6_dst; - ip6_out->ip6_dst = ip6->ip6_src; - - iphlen_out = sizeof(struct ip6_hdr); - sh_out = (struct sctphdr *)((caddr_t)ip6_out + iphlen_out); - break; -#endif /* INET6 */ - default: - /* Currently not supported */ - sctp_m_freem(mout); - return; + sctp_alloc_a_chunk(stcb, chk); + if (chk == NULL) { + SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); + return (ENOMEM); } + chk->copy_by_ref = 0; + chk->rec.chunk_id.id = SCTP_STREAM_RESET; + chk->rec.chunk_id.can_take_data = 0; + chk->asoc = &stcb->asoc; + chk->book_size = sizeof(struct sctp_chunkhdr); + chk->send_size = SCTP_SIZE32(chk->book_size); + chk->book_size_scale = 0; - udp = (struct udphdr *)sh_out; - if (port) { - udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)); - udp->uh_dport = port; - /* set udp->uh_ulen later */ - udp->uh_sum = 0; - iphlen_out += sizeof(struct udphdr); - sh_out = (struct sctphdr *)((caddr_t)udp + sizeof(struct udphdr)); + chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA); + if (chk->data == NULL) { + sctp_free_a_chunk(stcb, chk, SCTP_SO_LOCKED); + SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); + return (ENOMEM); } - sh_out->src_port = sh->dest_port; - sh_out->dest_port = sh->src_port; - sh_out->v_tag = vtag; - sh_out->checksum = 0; - - ch = (struct sctp_chunkhdr *)((caddr_t)sh_out + sizeof(struct sctphdr)); - ch->chunk_type = SCTP_OPERATION_ERROR; - ch->chunk_flags = 0; - - if (scm) { - struct mbuf *m_tmp = scm; - int cause_len = 0; - - /* get length of the err_cause chain */ - while (m_tmp != NULL) { - cause_len += SCTP_BUF_LEN(m_tmp); - m_tmp = SCTP_BUF_NEXT(m_tmp); - } - len = SCTP_BUF_LEN(mout) + cause_len; - if (cause_len % 4) { - /* need pad at end of chunk */ - uint32_t cpthis = 0; - int padlen; + SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD); - padlen = 4 - (len % 4); - m_copyback(mout, len, padlen, (caddr_t)&cpthis); - len += padlen; - } - ch->chunk_length = htons(sizeof(struct sctp_chunkhdr) + cause_len); + /* setup chunk parameters */ + chk->sent = SCTP_DATAGRAM_UNSENT; + chk->snd_count = 0; + if (stcb->asoc.alternate) { + chk->whoTo = stcb->asoc.alternate; } else { - len = SCTP_BUF_LEN(mout); - ch->chunk_length = htons(sizeof(struct sctp_chunkhdr)); + chk->whoTo = stcb->asoc.primary_destination; } + atomic_add_int(&chk->whoTo->ref_count, 1); + ch = mtod(chk->data, struct sctp_chunkhdr *); + ch->chunk_type = SCTP_STREAM_RESET; + ch->chunk_flags = 0; + ch->chunk_length = htons(chk->book_size); + SCTP_BUF_LEN(chk->data) = chk->send_size; - if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) { - /* no mbuf's */ - sctp_m_freem(mout); - return; + seq = stcb->asoc.str_reset_seq_out; + if (send_out_req) { + sctp_add_stream_reset_out(chk, number_entries, list, + seq, (stcb->asoc.str_reset_seq_in - 1), (stcb->asoc.sending_seq - 1)); + asoc->stream_reset_out_is_outstanding = 1; + seq++; + asoc->stream_reset_outstanding++; } - if (iph_out != NULL) { - sctp_route_t ro; - int ret; - - /* zap the stack pointer to the route */ - bzero(&ro, sizeof ro); - if (port) { - udp->uh_ulen = htons(len - sizeof(struct ip)); - udp->uh_sum = in_pseudo(iph_out->ip_src.s_addr, iph_out->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP)); - } - /* set IPv4 length */ - iph_out->ip_len = len; - /* out it goes */ -#ifdef SCTP_PACKET_LOGGING - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) - sctp_packet_log(mout, len); -#endif - SCTP_ATTACH_CHAIN(o_pak, mout, len); - if (port) { -#if defined(SCTP_WITH_NO_CSUM) - SCTP_STAT_INCR(sctps_sendnocrc); -#else - sh_out->checksum = sctp_calculate_cksum(mout, iphlen_out); - SCTP_STAT_INCR(sctps_sendswcrc); -#endif - SCTP_ENABLE_UDP_CSUM(o_pak); - } else { -#if defined(SCTP_WITH_NO_CSUM) - SCTP_STAT_INCR(sctps_sendnocrc); -#else - mout->m_pkthdr.csum_flags = CSUM_SCTP; - mout->m_pkthdr.csum_data = 0; - SCTP_STAT_INCR(sctps_sendhwcrc); -#endif + if ((add_stream & 1) && + ((stcb->asoc.strm_realoutsize - stcb->asoc.streamoutcnt) < adding_o)) { + /* Need to allocate more */ + struct sctp_stream_out *oldstream; + struct sctp_stream_queue_pending *sp, *nsp; + int i; + + oldstream = stcb->asoc.strmout; + /* get some more */ + SCTP_MALLOC(stcb->asoc.strmout, struct sctp_stream_out *, + ((stcb->asoc.streamoutcnt + adding_o) * sizeof(struct sctp_stream_out)), + SCTP_M_STRMO); + if (stcb->asoc.strmout == NULL) { + uint8_t x; + + stcb->asoc.strmout = oldstream; + /* Turn off the bit */ + x = add_stream & 0xfe; + add_stream = x; + goto skip_stuff; } - SCTP_IP_OUTPUT(ret, o_pak, &ro, NULL, vrf_id); - - /* Free the route if we got one back */ - if (ro.ro_rt) - RTFREE(ro.ro_rt); + /* + * Ok now we proceed with copying the old out stuff and + * initializing the new stuff. + */ + SCTP_TCB_SEND_LOCK(stcb); + stcb->asoc.ss_functions.sctp_ss_clear(stcb, &stcb->asoc, 0, 1); + for (i = 0; i < stcb->asoc.streamoutcnt; i++) { + TAILQ_INIT(&stcb->asoc.strmout[i].outqueue); + stcb->asoc.strmout[i].chunks_on_queues = oldstream[i].chunks_on_queues; + stcb->asoc.strmout[i].next_sequence_send = oldstream[i].next_sequence_send; + stcb->asoc.strmout[i].last_msg_incomplete = oldstream[i].last_msg_incomplete; + stcb->asoc.strmout[i].stream_no = i; + stcb->asoc.ss_functions.sctp_ss_init_stream(&stcb->asoc.strmout[i], &oldstream[i]); + /* now anything on those queues? */ + TAILQ_FOREACH_SAFE(sp, &oldstream[i].outqueue, next, nsp) { + TAILQ_REMOVE(&oldstream[i].outqueue, sp, next); + TAILQ_INSERT_TAIL(&stcb->asoc.strmout[i].outqueue, sp, next); + } + /* Now move assoc pointers too */ + if (stcb->asoc.last_out_stream == &oldstream[i]) { + stcb->asoc.last_out_stream = &stcb->asoc.strmout[i]; + } + if (stcb->asoc.locked_on_sending == &oldstream[i]) { + stcb->asoc.locked_on_sending = &stcb->asoc.strmout[i]; + } + } + /* now the new streams */ + stcb->asoc.ss_functions.sctp_ss_init(stcb, &stcb->asoc, 1); + for (i = stcb->asoc.streamoutcnt; i < (stcb->asoc.streamoutcnt + adding_o); i++) { + TAILQ_INIT(&stcb->asoc.strmout[i].outqueue); + stcb->asoc.strmout[i].chunks_on_queues = 0; + stcb->asoc.strmout[i].next_sequence_send = 0x0; + stcb->asoc.strmout[i].stream_no = i; + stcb->asoc.strmout[i].last_msg_incomplete = 0; + stcb->asoc.ss_functions.sctp_ss_init_stream(&stcb->asoc.strmout[i], NULL); + } + stcb->asoc.strm_realoutsize = stcb->asoc.streamoutcnt + adding_o; + SCTP_FREE(oldstream, SCTP_M_STRMO); + SCTP_TCB_SEND_UNLOCK(stcb); } -#ifdef INET6 - if (ip6_out != NULL) { - struct route_in6 ro; - int ret; - struct ifnet *ifp = NULL; - - /* zap the stack pointer to the route */ - bzero(&ro, sizeof(ro)); - if (port) { - udp->uh_ulen = htons(len - sizeof(struct ip6_hdr)); - } - ip6_out->ip6_plen = len - sizeof(*ip6_out); -#ifdef SCTP_PACKET_LOGGING - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) - sctp_packet_log(mout, len); -#endif - SCTP_ATTACH_CHAIN(o_pak, mout, len); - if (port) { -#if defined(SCTP_WITH_NO_CSUM) - SCTP_STAT_INCR(sctps_sendnocrc); -#else - sh_out->checksum = sctp_calculate_cksum(mout, sizeof(struct ip6_hdr) + sizeof(struct udphdr)); - SCTP_STAT_INCR(sctps_sendswcrc); -#endif - if ((udp->uh_sum = in6_cksum(o_pak, IPPROTO_UDP, sizeof(struct ip6_hdr), len - sizeof(struct ip6_hdr))) == 0) { - udp->uh_sum = 0xffff; - } - } else { -#if defined(SCTP_WITH_NO_CSUM) - SCTP_STAT_INCR(sctps_sendnocrc); -#else - mout->m_pkthdr.csum_flags = CSUM_SCTP; - mout->m_pkthdr.csum_data = 0; - SCTP_STAT_INCR(sctps_sendhwcrc); -#endif - } - SCTP_IP6_OUTPUT(ret, o_pak, &ro, &ifp, NULL, vrf_id); +skip_stuff: + if ((add_stream & 1) && (adding_o > 0)) { + asoc->strm_pending_add_size = adding_o; + asoc->peer_req_out = peer_asked; + sctp_add_an_out_stream(chk, seq, adding_o); + seq++; + asoc->stream_reset_outstanding++; + } + if ((add_stream & 2) && (adding_i > 0)) { + sctp_add_an_in_stream(chk, seq, adding_i); + seq++; + asoc->stream_reset_outstanding++; + } + if (send_in_req) { + sctp_add_stream_reset_in(chk, number_entries, list, seq); + seq++; + asoc->stream_reset_outstanding++; + } + if (send_tsn_req) { + sctp_add_stream_reset_tsn(chk, seq); + asoc->stream_reset_outstanding++; + } + asoc->str_reset = chk; + /* insert the chunk for sending */ + TAILQ_INSERT_TAIL(&asoc->control_send_queue, + chk, + sctp_next); + asoc->ctrl_queue_cnt++; + sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo); + return (0); +} - /* Free the route if we got one back */ - if (ro.ro_rt) - RTFREE(ro.ro_rt); +void +sctp_send_abort(struct mbuf *m, int iphlen, struct sockaddr *src, struct sockaddr *dst, + struct sctphdr *sh, uint32_t vtag, struct mbuf *cause, + uint8_t use_mflowid, uint32_t mflowid, + uint32_t vrf_id, uint16_t port) +{ + /* Don't respond to an ABORT with an ABORT. */ + if (sctp_is_there_an_abort_here(m, iphlen, &vtag)) { + if (cause) + sctp_m_freem(cause); + return; } -#endif - SCTP_STAT_INCR(sctps_sendpackets); - SCTP_STAT_INCR_COUNTER64(sctps_outpackets); - SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); + sctp_send_resp_msg(src, dst, sh, vtag, SCTP_ABORT_ASSOCIATION, cause, + use_mflowid, mflowid, + vrf_id, port); + return; +} + +void +sctp_send_operr_to(struct sockaddr *src, struct sockaddr *dst, + struct sctphdr *sh, uint32_t vtag, struct mbuf *cause, + uint8_t use_mflowid, uint32_t mflowid, + uint32_t vrf_id, uint16_t port) +{ + sctp_send_resp_msg(src, dst, sh, vtag, SCTP_OPERATION_ERROR, cause, + use_mflowid, mflowid, + vrf_id, port); + return; } static struct mbuf * -sctp_copy_resume(struct sctp_stream_queue_pending *sp, - struct uio *uio, - struct sctp_sndrcvinfo *srcv, +sctp_copy_resume(struct uio *uio, int max_send_len, int user_marks_eor, int *error, @@ -11993,8 +11933,7 @@ sctp_copy_it_in(struct sctp_tcb *stcb, struct sctp_nets *net, int max_send_len, int user_marks_eor, - int *error, - int non_blocking) + int *error) { /*- * This routine must be very careful in its work. Protocol @@ -12029,7 +11968,6 @@ sctp_copy_it_in(struct sctp_tcb *stcb, sp->timetolive = srcv->sinfo_timetolive; sp->ppid = srcv->sinfo_ppid; sp->context = srcv->sinfo_context; - sp->strseq = 0; (void)SCTP_GETTIME_TIMEVAL(&sp->ts); sp->stream = srcv->sinfo_stream; @@ -12051,15 +11989,19 @@ sctp_copy_it_in(struct sctp_tcb *stcb, *error = 0; goto skip_copy; } - sp->auth_keyid = stcb->asoc.authinfo.active_keyid; + if (srcv->sinfo_keynumber_valid) { + sp->auth_keyid = srcv->sinfo_keynumber; + } else { + sp->auth_keyid = stcb->asoc.authinfo.active_keyid; + } if (sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks)) { - sctp_auth_key_acquire(stcb, stcb->asoc.authinfo.active_keyid); + sctp_auth_key_acquire(stcb, sp->auth_keyid); sp->holds_key_ref = 1; } *error = sctp_copy_one(sp, uio, resv_in_first); skip_copy: if (*error) { - sctp_free_a_strmoq(stcb, sp); + sctp_free_a_strmoq(stcb, sp, SCTP_SO_LOCKED); sp = NULL; } else { if (sp->sinfo_flags & SCTP_ADDR_OVER) { @@ -12085,8 +12027,8 @@ sctp_sosend(struct socket *so, struct thread *p ) { - int error, use_rcvinfo = 0; - struct sctp_sndrcvinfo srcv; + int error, use_sndinfo = 0; + struct sctp_sndrcvinfo sndrcvninfo; struct sockaddr *addr_to_use; #if defined(INET) && defined(INET6) @@ -12096,10 +12038,10 @@ sctp_sosend(struct socket *so, if (control) { /* process cmsg snd/rcv info (maybe a assoc-id) */ - if (sctp_find_cmsg(SCTP_SNDRCV, (void *)&srcv, control, - sizeof(srcv))) { + if (sctp_find_cmsg(SCTP_SNDRCV, (void *)&sndrcvninfo, control, + sizeof(sndrcvninfo))) { /* got one */ - use_rcvinfo = 1; + use_sndinfo = 1; } } addr_to_use = addr; @@ -12117,7 +12059,7 @@ sctp_sosend(struct socket *so, error = sctp_lower_sosend(so, addr_to_use, uio, top, control, flags, - use_rcvinfo ? &srcv : NULL + use_sndinfo ? &sndrcvninfo : NULL ,p ); return (error); @@ -12194,7 +12136,7 @@ sctp_lower_sosend(struct socket *so, sndlen = SCTP_HEADER_LEN(i_pak); } SCTPDBG(SCTP_DEBUG_OUTPUT1, "Send called addr:%p send length %d\n", - addr, + (void *)addr, sndlen); if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) && (inp->sctp_socket->so_qlimit)) { @@ -12211,7 +12153,7 @@ sctp_lower_sosend(struct socket *so, union sctp_sockstore *raddr = (union sctp_sockstore *)addr; switch (raddr->sa.sa_family) { -#if defined(INET) +#ifdef INET case AF_INET: if (raddr->sin.sin_len != sizeof(struct sockaddr_in)) { SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); @@ -12221,7 +12163,7 @@ sctp_lower_sosend(struct socket *so, port = raddr->sin.sin_port; break; #endif -#if defined(INET6) +#ifdef INET6 case AF_INET6: if (raddr->sin6.sin6_len != sizeof(struct sockaddr_in6)) { SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); @@ -12270,14 +12212,10 @@ sctp_lower_sosend(struct socket *so, (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { SCTP_INP_RLOCK(inp); stcb = LIST_FIRST(&inp->sctp_asoc_list); - if (stcb == NULL) { - SCTP_INP_RUNLOCK(inp); - SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOTCONN); - error = ENOTCONN; - goto out_unlocked; + if (stcb) { + SCTP_TCB_LOCK(stcb); + hold_tcblock = 1; } - SCTP_TCB_LOCK(stcb); - hold_tcblock = 1; SCTP_INP_RUNLOCK(inp); } else if (sinfo_assoc_id) { stcb = sctp_findassociation_ep_asocid(inp, sinfo_assoc_id, 0); @@ -12322,6 +12260,9 @@ sctp_lower_sosend(struct socket *so, SCTP_INP_WUNLOCK(inp); /* With the lock applied look again */ stcb = sctp_findassociation_ep_addr(&t_inp, addr, &net, NULL, NULL); + if ((stcb == NULL) && (control != NULL) && (port > 0)) { + stcb = sctp_findassociation_cmsgs(&t_inp, port, control, &net, &error); + } if (stcb == NULL) { SCTP_INP_WLOCK(inp); SCTP_INP_DECR_REF(inp); @@ -12329,6 +12270,9 @@ sctp_lower_sosend(struct socket *so, } else { hold_tcblock = 1; } + if (error) { + goto out_unlocked; + } if (t_inp != inp) { SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOTCONN); error = ENOTCONN; @@ -12336,21 +12280,12 @@ sctp_lower_sosend(struct socket *so, } } if (stcb == NULL) { - if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || - (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { - SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOTCONN); - error = ENOTCONN; - goto out_unlocked; - } if (addr == NULL) { SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOENT); error = ENOENT; goto out_unlocked; } else { - /* - * UDP style, we must go ahead and start the INIT - * process - */ + /* We must go ahead and start the INIT process */ uint32_t vrf_id; if ((sinfo_flags & SCTP_ABORT) || @@ -12377,6 +12312,15 @@ sctp_lower_sosend(struct socket *so, /* Error is setup for us in the call */ goto out_unlocked; } + if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) { + stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED; + /* + * Set the connected flag so we can queue + * data + */ + soisconnecting(so); + } + hold_tcblock = 1; if (create_lock_applied) { SCTP_ASOC_CREATE_UNLOCK(inp); create_lock_applied = 0; @@ -12396,85 +12340,13 @@ sctp_lower_sosend(struct socket *so, sctp_initialize_auth_params(inp, stcb); if (control) { - /* - * see if a init structure exists in cmsg - * headers - */ - struct sctp_initmsg initm; - int i; - - if (sctp_find_cmsg(SCTP_INIT, (void *)&initm, control, - sizeof(initm))) { - /* - * we have an INIT override of the - * default - */ - if (initm.sinit_max_attempts) - asoc->max_init_times = initm.sinit_max_attempts; - if (initm.sinit_num_ostreams) - asoc->pre_open_streams = initm.sinit_num_ostreams; - if (initm.sinit_max_instreams) - asoc->max_inbound_streams = initm.sinit_max_instreams; - if (initm.sinit_max_init_timeo) - asoc->initial_init_rto_max = initm.sinit_max_init_timeo; - if (asoc->streamoutcnt < asoc->pre_open_streams) { - struct sctp_stream_out *tmp_str; - int had_lock = 0; - - /* Default is NOT correct */ - SCTPDBG(SCTP_DEBUG_OUTPUT1, "Ok, defout:%d pre_open:%d\n", - asoc->streamoutcnt, asoc->pre_open_streams); - /* - * What happens if this - * fails? we panic ... - */ - - if (hold_tcblock) { - had_lock = 1; - SCTP_TCB_UNLOCK(stcb); - } - SCTP_MALLOC(tmp_str, - struct sctp_stream_out *, - (asoc->pre_open_streams * - sizeof(struct sctp_stream_out)), - SCTP_M_STRMO); - if (had_lock) { - SCTP_TCB_LOCK(stcb); - } - if (tmp_str != NULL) { - SCTP_FREE(asoc->strmout, SCTP_M_STRMO); - asoc->strmout = tmp_str; - asoc->strm_realoutsize = asoc->streamoutcnt = asoc->pre_open_streams; - } else { - asoc->pre_open_streams = asoc->streamoutcnt; - } - for (i = 0; i < asoc->streamoutcnt; i++) { - /*- - * inbound side must be set - * to 0xffff, also NOTE when - * we get the INIT-ACK back - * (for INIT sender) we MUST - * reduce the count - * (streamoutcnt) but first - * check if we sent to any - * of the upper streams that - * were dropped (if some - * were). Those that were - * dropped must be notified - * to the upper layer as - * failed to send. - */ - asoc->strmout[i].next_sequence_sent = 0x0; - TAILQ_INIT(&asoc->strmout[i].outqueue); - asoc->strmout[i].stream_no = i; - asoc->strmout[i].last_msg_incomplete = 0; - asoc->strmout[i].next_spoke.tqe_next = 0; - asoc->strmout[i].next_spoke.tqe_prev = 0; - } - } + if (sctp_process_cmsgs_for_init(stcb, control, &error)) { + sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_7); + hold_tcblock = 0; + stcb = NULL; + goto out_unlocked; } } - hold_tcblock = 1; /* out with the INIT */ queue_only_for_init = 1; /*- @@ -12500,7 +12372,11 @@ sctp_lower_sosend(struct socket *so, goto out_unlocked; } } else { - net = stcb->asoc.primary_destination; + if (stcb->asoc.alternate) { + net = stcb->asoc.alternate; + } else { + net = stcb->asoc.primary_destination; + } } atomic_add_int(&stcb->total_sends, 1); /* Keep the stcb from being freed under our feet */ @@ -12514,9 +12390,9 @@ sctp_lower_sosend(struct socket *so, goto out_unlocked; } } - if ((SCTP_SO_IS_NBIO(so) + if (SCTP_SO_IS_NBIO(so) || (flags & MSG_NBIO) - )) { + ) { non_blocking = 1; } /* would we block? */ @@ -12607,15 +12483,12 @@ sctp_lower_sosend(struct socket *so, if (top) { struct mbuf *cntm = NULL; - mm = sctp_get_mbuf_for_msg(1, 0, M_WAIT, 1, MT_DATA); + mm = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_WAIT, 1, MT_DATA); if (sndlen != 0) { - cntm = top; - while (cntm) { + for (cntm = top; cntm; cntm = SCTP_BUF_NEXT(cntm)) { tot_out += SCTP_BUF_LEN(cntm); - cntm = SCTP_BUF_NEXT(cntm); } } - tot_demand = (tot_out + sizeof(struct sctp_paramhdr)); } else { /* Must fit in a MTU */ tot_out = sndlen; @@ -12644,7 +12517,7 @@ sctp_lower_sosend(struct socket *so, /* now move forward the data pointer */ ph = mtod(mm, struct sctp_paramhdr *); ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT); - ph->param_length = htons((sizeof(struct sctp_paramhdr) + tot_out)); + ph->param_length = htons(sizeof(struct sctp_paramhdr) + tot_out); ph++; SCTP_BUF_LEN(mm) = tot_out + sizeof(struct sctp_paramhdr); if (top == NULL) { @@ -12666,14 +12539,11 @@ sctp_lower_sosend(struct socket *so, } if (hold_tcblock == 0) { SCTP_TCB_LOCK(stcb); - hold_tcblock = 1; } atomic_add_int(&stcb->asoc.refcnt, -1); free_cnt_applied = 0; /* release this lock, otherwise we hang on ourselves */ - sctp_abort_an_association(stcb->sctp_ep, stcb, - SCTP_RESPONSE_TO_USER_REQ, - mm, SCTP_SO_LOCKED); + sctp_abort_an_association(stcb->sctp_ep, stcb, mm, SCTP_SO_LOCKED); /* now relock the stcb so everything is sane */ hold_tcblock = 0; stcb = NULL; @@ -12763,7 +12633,7 @@ sctp_lower_sosend(struct socket *so, stcb->asoc.chunks_on_out_queue, SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue)); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) { - sctp_log_block(SCTP_BLOCK_LOG_INTO_BLKA, so, asoc, sndlen); + sctp_log_block(SCTP_BLOCK_LOG_INTO_BLKA, asoc, sndlen); } be.error = 0; stcb->block_entry = &be; @@ -12782,7 +12652,7 @@ sctp_lower_sosend(struct socket *so, } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) { sctp_log_block(SCTP_BLOCK_LOG_OUTOF_BLK, - so, asoc, stcb->asoc.total_output_queue_size); + asoc, stcb->asoc.total_output_queue_size); } if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { goto out_unlocked; @@ -12832,7 +12702,7 @@ skip_preblock: strm = &stcb->asoc.strmout[srcv->sinfo_stream]; if (strm->last_msg_incomplete == 0) { do_a_copy_in: - sp = sctp_copy_it_in(stcb, asoc, srcv, uio, net, max_len, user_marks_eor, &error, non_blocking); + sp = sctp_copy_it_in(stcb, asoc, srcv, uio, net, max_len, user_marks_eor, &error); if ((sp == NULL) || (error)) { goto out; } @@ -12852,23 +12722,11 @@ skip_preblock: } sctp_snd_sb_alloc(stcb, sp->length); atomic_add_int(&asoc->stream_queue_cnt, 1); - if ((srcv->sinfo_flags & SCTP_UNORDERED) == 0) { - sp->strseq = strm->next_sequence_sent; - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_AT_SEND_2_SCTP) { - sctp_misc_ints(SCTP_STRMOUT_LOG_ASSIGN, - (uintptr_t) stcb, sp->length, - (uint32_t) ((srcv->sinfo_stream << 16) | sp->strseq), 0); - } - strm->next_sequence_sent++; - } else { + if (srcv->sinfo_flags & SCTP_UNORDERED) { SCTP_STAT_INCR(sctps_sends_with_unord); } TAILQ_INSERT_TAIL(&strm->outqueue, sp, next); - if ((strm->next_spoke.tqe_next == NULL) && - (strm->next_spoke.tqe_prev == NULL)) { - /* Not on wheel, insert */ - sctp_insert_on_wheel(stcb, asoc, strm, 1); - } + stcb->asoc.ss_functions.sctp_ss_add_to_stream(stcb, asoc, strm, sp, 1); SCTP_TCB_SEND_UNLOCK(stcb); } else { SCTP_TCB_SEND_LOCK(stcb); @@ -12904,7 +12762,7 @@ skip_preblock: SCTP_TCB_UNLOCK(stcb); hold_tcblock = 0; } - mm = sctp_copy_resume(sp, uio, srcv, max_len, user_marks_eor, &error, &sndout, &new_tail); + mm = sctp_copy_resume(uio, max_len, user_marks_eor, &error, &sndout, &new_tail); if ((mm == NULL) || error) { if (mm) { sctp_m_freem(mm); @@ -13086,7 +12944,7 @@ skip_preblock: min(SCTP_BASE_SYSCTL(sctp_add_more_threshold), SCTP_SB_LIMIT_SND(so)))) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) { sctp_log_block(SCTP_BLOCK_LOG_INTO_BLK, - so, asoc, uio->uio_resid); + asoc, uio->uio_resid); } be.error = 0; stcb->block_entry = &be; @@ -13106,7 +12964,7 @@ skip_preblock: } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) { sctp_log_block(SCTP_BLOCK_LOG_OUTOF_BLK, - so, asoc, stcb->asoc.total_output_queue_size); + asoc, stcb->asoc.total_output_queue_size); } } SOCKBUF_UNLOCK(&so->so_snd); @@ -13154,8 +13012,7 @@ skip_preblock: dataless_eof: /* EOF thing ? */ if ((srcv->sinfo_flags & SCTP_EOF) && - (got_all_of_the_send == 1) && - (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE)) { + (got_all_of_the_send == 1)) { int cnt; SCTP_STAT_INCR(sctps_sends_with_eof); @@ -13164,7 +13021,7 @@ dataless_eof: SCTP_TCB_LOCK(stcb); hold_tcblock = 1; } - cnt = sctp_is_there_unsent_data(stcb); + cnt = sctp_is_there_unsent_data(stcb, SCTP_SO_LOCKED); if (TAILQ_EMPTY(&asoc->send_queue) && TAILQ_EMPTY(&asoc->sent_queue) && (cnt == 0)) { @@ -13175,15 +13032,23 @@ dataless_eof: if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) && (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) && (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) { + struct sctp_nets *netp; + /* only send SHUTDOWN the first time through */ - sctp_send_shutdown(stcb, stcb->asoc.primary_destination); if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); } SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT); SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING); + sctp_stop_timers_for_shutdown(stcb); + if (stcb->asoc.alternate) { + netp = stcb->asoc.alternate; + } else { + netp = stcb->asoc.primary_destination; + } + sctp_send_shutdown(stcb, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, - asoc->primary_destination); + netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb, asoc->primary_destination); } @@ -13225,7 +13090,6 @@ dataless_eof: free_cnt_applied = 0; } sctp_abort_an_association(stcb->sctp_ep, stcb, - SCTP_RESPONSE_TO_USER_REQ, NULL, SCTP_SO_LOCKED); /* * now relock the stcb so everything @@ -13301,8 +13165,6 @@ skip_out_eof: stcb->asoc.total_flight, stcb->asoc.chunks_on_out_queue, stcb->asoc.total_flight_count); } - if (queue_only_for_init) - queue_only_for_init = 0; if ((queue_only == 0) && (nagle_applies == 0) && (stcb->asoc.peers_rwnd && un_sent)) { /* we can attempt to send too. */ if (hold_tcblock == 0) { @@ -13348,11 +13210,9 @@ out_unlocked: if (local_soresv && stcb) { atomic_subtract_int(&stcb->asoc.sb_send_resv, sndlen); - local_soresv = 0; } if (create_lock_applied) { SCTP_ASOC_CREATE_UNLOCK(inp); - create_lock_applied = 0; } if ((stcb) && hold_tcblock) { SCTP_TCB_UNLOCK(stcb); @@ -13374,7 +13234,7 @@ out_unlocked: if (inp) { sctp_validate_no_locks(inp); } else { - printf("Warning - inp is NULL so cant validate locks\n"); + SCTP_PRINTF("Warning - inp is NULL so cant validate locks\n"); } #endif if (top) { @@ -13398,6 +13258,7 @@ sctp_add_auth_chunk(struct mbuf *m, struct mbuf **m_end, struct mbuf *m_auth; struct sctp_auth_chunk *auth; int chunk_len; + struct mbuf *cn; if ((m_end == NULL) || (auth_ret == NULL) || (offset == NULL) || (stcb == NULL)) @@ -13435,17 +13296,10 @@ sctp_add_auth_chunk(struct mbuf *m, struct mbuf **m_end, /* key id and hmac digest will be computed and filled in upon send */ /* save the offset where the auth was inserted into the chain */ - if (m != NULL) { - struct mbuf *cn; - - *offset = 0; - cn = m; - while (cn) { - *offset += SCTP_BUF_LEN(cn); - cn = SCTP_BUF_NEXT(cn); - } - } else - *offset = 0; + *offset = 0; + for (cn = m; cn; cn = SCTP_BUF_NEXT(cn)) { + *offset += SCTP_BUF_LEN(cn); + } /* update length and return pointer to the auth chunk */ SCTP_BUF_LEN(m_auth) = chunk_len; @@ -13485,8 +13339,7 @@ sctp_v6src_match_nexthop(struct sockaddr_in6 *src6, sctp_route_t * ro) SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)src6); /* search installed gateway from prefix entry */ - for (pfxrtr = pfx->ndpr_advrtrs.lh_first; pfxrtr; pfxrtr = - pfxrtr->pfr_next) { + LIST_FOREACH(pfxrtr, &pfx->ndpr_advrtrs, pfr_entry) { memset(&gw6, 0, sizeof(struct sockaddr_in6)); gw6.sin6_family = AF_INET6; gw6.sin6_len = sizeof(struct sockaddr_in6); @@ -13511,6 +13364,7 @@ sctp_v6src_match_nexthop(struct sockaddr_in6 *src6, sctp_route_t * ro) int sctp_v4src_match_nexthop(struct sctp_ifa *sifa, sctp_route_t * ro) { +#ifdef INET struct sockaddr_in *sin, *mask; struct ifaddr *ifa; struct in_addr srcnetaddr, gwnetaddr; @@ -13535,5 +13389,6 @@ sctp_v4src_match_nexthop(struct sctp_ifa *sifa, sctp_route_t * ro) if (srcnetaddr.s_addr == gwnetaddr.s_addr) { return (1); } +#endif return (0); } diff --git a/freebsd/sys/netinet/sctp_output.h b/freebsd/sys/netinet/sctp_output.h index d655c3aa..59af5af2 100644 --- a/freebsd/sys/netinet/sctp_output.h +++ b/freebsd/sys/netinet/sctp_output.h @@ -1,15 +1,17 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -28,13 +30,11 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp_output.h,v 1.14 2005/03/06 16:04:18 itojun Exp $ */ - #include __FBSDID("$FreeBSD$"); -#ifndef __sctp_output_h__ -#define __sctp_output_h__ +#ifndef _NETINET_SCTP_OUTPUT_H_ +#define _NETINET_SCTP_OUTPUT_H_ #include @@ -43,9 +43,11 @@ __FBSDID("$FreeBSD$"); struct mbuf * sctp_add_addresses_to_i_ia(struct sctp_inpcb *inp, + struct sctp_tcb *stcb, struct sctp_scoping *scope, struct mbuf *m_at, - int cnt_inits_to); + int cnt_inits_to, + uint16_t * padding_len, uint16_t * chunk_len); int sctp_is_addr_restricted(struct sctp_tcb *, struct sctp_ifa *); @@ -53,13 +55,9 @@ int sctp_is_addr_restricted(struct sctp_tcb *, struct sctp_ifa *); int sctp_is_address_in_scope(struct sctp_ifa *ifa, - int ipv4_addr_legal, - int ipv6_addr_legal, - int loopback_scope, - int ipv4_local_scope, - int local_scope, - int site_scope, + struct sctp_scoping *scope, int do_update); + int sctp_is_addr_in_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa); @@ -82,8 +80,11 @@ sctp_send_initiate(struct sctp_inpcb *, struct sctp_tcb *, int ); void -sctp_send_initiate_ack(struct sctp_inpcb *, struct sctp_tcb *, - struct mbuf *, int, int, struct sctphdr *, struct sctp_init_chunk *, +sctp_send_initiate_ack(struct sctp_inpcb *, struct sctp_tcb *, struct mbuf *, + int, int, + struct sockaddr *, struct sockaddr *, + struct sctphdr *, struct sctp_init_chunk *, + uint8_t, uint32_t, uint32_t, uint16_t, int); struct mbuf * @@ -114,7 +115,9 @@ void sctp_send_shutdown_ack(struct sctp_tcb *, struct sctp_nets *); void sctp_send_shutdown_complete(struct sctp_tcb *, struct sctp_nets *, int); void -sctp_send_shutdown_complete2(struct mbuf *, int, struct sctphdr *, +sctp_send_shutdown_complete2(struct sockaddr *, struct sockaddr *, + struct sctphdr *, + uint8_t, uint32_t, uint32_t, uint16_t); void sctp_send_asconf(struct sctp_tcb *, struct sctp_nets *, int addr_locked); @@ -135,11 +138,6 @@ int sctp_output(struct sctp_inpcb *, struct mbuf *, struct sockaddr *, struct mbuf *, struct thread *, int); -void -sctp_insert_on_wheel(struct sctp_tcb *stcb, - struct sctp_association *asoc, - struct sctp_stream_out *strq, int holdslock); - void sctp_chunk_output(struct sctp_inpcb *, struct sctp_tcb *, int, int #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) @@ -155,66 +153,52 @@ sctp_send_abort_tcb(struct sctp_tcb *, struct mbuf *, int void send_forward_tsn(struct sctp_tcb *, struct sctp_association *); -void sctp_send_sack(struct sctp_tcb *); +void sctp_send_sack(struct sctp_tcb *, int); -int sctp_send_hb(struct sctp_tcb *, int, struct sctp_nets *); +void sctp_send_hb(struct sctp_tcb *, struct sctp_nets *, int); void sctp_send_ecn_echo(struct sctp_tcb *, struct sctp_nets *, uint32_t); void sctp_send_packet_dropped(struct sctp_tcb *, struct sctp_nets *, struct mbuf *, - int, int); - + int, int, int); -void sctp_send_cwr(struct sctp_tcb *, struct sctp_nets *, uint32_t); +void sctp_send_cwr(struct sctp_tcb *, struct sctp_nets *, uint32_t, uint8_t); -void -sctp_add_stream_reset_out(struct sctp_tmit_chunk *chk, - int number_entries, uint16_t * list, - uint32_t seq, uint32_t resp_seq, uint32_t last_sent); - -void -sctp_add_stream_reset_in(struct sctp_tmit_chunk *chk, - int number_entries, uint16_t * list, - uint32_t seq); void -sctp_add_stream_reset_tsn(struct sctp_tmit_chunk *chk, - uint32_t seq); +sctp_add_stream_reset_out(struct sctp_tmit_chunk *, + int, uint16_t *, uint32_t, uint32_t, uint32_t); void -sctp_add_stream_reset_result(struct sctp_tmit_chunk *chk, - uint32_t resp_seq, uint32_t result); + sctp_add_stream_reset_result(struct sctp_tmit_chunk *, uint32_t, uint32_t); void -sctp_add_stream_reset_result_tsn(struct sctp_tmit_chunk *chk, - uint32_t resp_seq, uint32_t result, - uint32_t send_una, uint32_t recv_next); +sctp_add_stream_reset_result_tsn(struct sctp_tmit_chunk *, + uint32_t, uint32_t, uint32_t, uint32_t); int -sctp_send_str_reset_req(struct sctp_tcb *stcb, - int number_entries, - uint16_t * list, - uint8_t send_out_req, - uint32_t resp_seq, - uint8_t send_in_req, - uint8_t send_tsn_req, - uint8_t add_str, - uint16_t adding); - +sctp_send_str_reset_req(struct sctp_tcb *, int, uint16_t *, uint8_t, uint8_t, + uint8_t, uint8_t, uint16_t, uint16_t, uint8_t); void -sctp_send_abort(struct mbuf *, int, struct sctphdr *, uint32_t, - struct mbuf *, uint32_t, uint16_t); +sctp_send_abort(struct mbuf *, int, struct sockaddr *, struct sockaddr *, + struct sctphdr *, uint32_t, struct mbuf *, + uint8_t, uint32_t, + uint32_t, uint16_t); -void sctp_send_operr_to(struct mbuf *, int, struct mbuf *, uint32_t, uint32_t, uint16_t); +void +sctp_send_operr_to(struct sockaddr *, struct sockaddr *, + struct sctphdr *, uint32_t, struct mbuf *, + uint8_t, uint32_t, + uint32_t, uint16_t); #endif /* _KERNEL || __Userspace__ */ -#if defined(_KERNEL) || defined (__Userspace__) +#if defined(_KERNEL) || defined(__Userspace__) int sctp_sosend(struct socket *so, struct sockaddr *addr, diff --git a/freebsd/sys/netinet/sctp_pcb.c b/freebsd/sys/netinet/sctp_pcb.c index 98c5b707..8a0cdb42 100644 --- a/freebsd/sys/netinet/sctp_pcb.c +++ b/freebsd/sys/netinet/sctp_pcb.c @@ -2,16 +2,18 @@ /*- * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -30,8 +32,6 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp_pcb.c,v 1.38 2005/03/06 16:04:18 itojun Exp $ */ - #include __FBSDID("$FreeBSD$"); @@ -47,7 +47,14 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include +#ifdef INET6 +#include +#endif +#include +#include +#include VNET_DEFINE(struct sctp_base_info, system_base_info); @@ -62,11 +69,11 @@ SCTP6_ARE_ADDR_EQUAL(struct sockaddr_in6 *a, struct sockaddr_in6 *b) memcpy(&tmp_a, a, sizeof(struct sockaddr_in6)); if (sa6_embedscope(&tmp_a, MODULE_GLOBAL(ip6_use_defzone)) != 0) { - return 0; + return (0); } memcpy(&tmp_b, b, sizeof(struct sockaddr_in6)); if (sa6_embedscope(&tmp_b, MODULE_GLOBAL(ip6_use_defzone)) != 0) { - return 0; + return (0); } return (IN6_ARE_ADDR_EQUAL(&tmp_a.sin6_addr, &tmp_b.sin6_addr)); } @@ -89,11 +96,10 @@ sctp_fill_pcbinfo(struct sctp_pcbinfo *spcb) spcb->readq_count = SCTP_BASE_INFO(ipi_count_readq); spcb->stream_oque = SCTP_BASE_INFO(ipi_count_strmoq); spcb->free_chunks = SCTP_BASE_INFO(ipi_free_chunks); - SCTP_INP_INFO_RUNLOCK(); } -/* +/*- * Addresses are added to VRF's (Virtual Router's). For BSD we * have only the default VRF 0. We maintain a hash list of * VRF's. Each VRF has its own list of sctp_ifn's. Each of @@ -209,7 +215,6 @@ sctp_find_ifn(void *ifn, uint32_t ifn_index) } - struct sctp_vrf * sctp_find_vrf(uint32_t vrf_id) { @@ -225,6 +230,7 @@ sctp_find_vrf(uint32_t vrf_id) return (NULL); } + void sctp_free_vrf(struct sctp_vrf *vrf) { @@ -240,6 +246,7 @@ sctp_free_vrf(struct sctp_vrf *vrf) } } + void sctp_free_ifn(struct sctp_ifn *sctp_ifnp) { @@ -253,6 +260,7 @@ sctp_free_ifn(struct sctp_ifn *sctp_ifnp) } } + void sctp_update_ifn_mtu(uint32_t ifn_index, uint32_t mtu) { @@ -278,6 +286,7 @@ sctp_free_ifa(struct sctp_ifa *sctp_ifap) } } + static void sctp_delete_ifn(struct sctp_ifn *sctp_ifnp, int hold_addr_lock) { @@ -300,12 +309,13 @@ sctp_delete_ifn(struct sctp_ifn *sctp_ifnp, int hold_addr_lock) sctp_free_ifn(sctp_ifnp); } + void sctp_mark_ifa_addr_down(uint32_t vrf_id, struct sockaddr *addr, const char *if_name, uint32_t ifn_index) { struct sctp_vrf *vrf; - struct sctp_ifa *sctp_ifap = NULL; + struct sctp_ifa *sctp_ifap; SCTP_IPI_ADDR_RLOCK(); vrf = sctp_find_vrf(vrf_id); @@ -324,19 +334,9 @@ sctp_mark_ifa_addr_down(uint32_t vrf_id, struct sockaddr *addr, goto out; } if (if_name) { - int len1, len2; - - len1 = strlen(if_name); - len2 = strlen(sctp_ifap->ifn_p->ifn_name); - if (len1 != len2) { - SCTPDBG(SCTP_DEBUG_PCB4, "IFN of ifa names different length %d vs %d - ignored\n", - len1, len2); - goto out; - } - if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, len1) != 0) { + if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, SCTP_IFNAMSIZ) != 0) { SCTPDBG(SCTP_DEBUG_PCB4, "IFN %s of IFA not the same as %s\n", - sctp_ifap->ifn_p->ifn_name, - if_name); + sctp_ifap->ifn_p->ifn_name, if_name); goto out; } } else { @@ -353,12 +353,13 @@ out: SCTP_IPI_ADDR_RUNLOCK(); } + void sctp_mark_ifa_addr_up(uint32_t vrf_id, struct sockaddr *addr, const char *if_name, uint32_t ifn_index) { struct sctp_vrf *vrf; - struct sctp_ifa *sctp_ifap = NULL; + struct sctp_ifa *sctp_ifap; SCTP_IPI_ADDR_RLOCK(); vrf = sctp_find_vrf(vrf_id); @@ -377,19 +378,9 @@ sctp_mark_ifa_addr_up(uint32_t vrf_id, struct sockaddr *addr, goto out; } if (if_name) { - int len1, len2; - - len1 = strlen(if_name); - len2 = strlen(sctp_ifap->ifn_p->ifn_name); - if (len1 != len2) { - SCTPDBG(SCTP_DEBUG_PCB4, "IFN of ifa names different length %d vs %d - ignored\n", - len1, len2); - goto out; - } - if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, len1) != 0) { + if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, SCTP_IFNAMSIZ) != 0) { SCTPDBG(SCTP_DEBUG_PCB4, "IFN %s of IFA not the same as %s\n", - sctp_ifap->ifn_p->ifn_name, - if_name); + sctp_ifap->ifn_p->ifn_name, if_name); goto out; } } else { @@ -406,6 +397,7 @@ out: SCTP_IPI_ADDR_RUNLOCK(); } + /*- * Add an ifa to an ifn. * Register the interface as necessary. @@ -422,10 +414,20 @@ sctp_add_ifa_to_ifn(struct sctp_ifn *sctp_ifnp, struct sctp_ifa *sctp_ifap) /* update address counts */ sctp_ifnp->ifa_count++; ifa_af = sctp_ifap->address.sa.sa_family; - if (ifa_af == AF_INET) + switch (ifa_af) { +#ifdef INET + case AF_INET: sctp_ifnp->num_v4++; - else + break; +#endif +#ifdef INET6 + case AF_INET6: sctp_ifnp->num_v6++; + break; +#endif + default: + break; + } if (sctp_ifnp->ifa_count == 1) { /* register the new interface */ SCTP_REGISTER_INTERFACE(sctp_ifnp->ifn_index, ifa_af); @@ -433,6 +435,7 @@ sctp_add_ifa_to_ifn(struct sctp_ifn *sctp_ifnp, struct sctp_ifa *sctp_ifap) } } + /*- * Remove an ifa from its ifn. * If no more addresses exist, remove the ifn too. Otherwise, re-register @@ -442,18 +445,25 @@ sctp_add_ifa_to_ifn(struct sctp_ifn *sctp_ifnp, struct sctp_ifa *sctp_ifap) static void sctp_remove_ifa_from_ifn(struct sctp_ifa *sctp_ifap) { - uint32_t ifn_index; - LIST_REMOVE(sctp_ifap, next_ifa); if (sctp_ifap->ifn_p) { /* update address counts */ sctp_ifap->ifn_p->ifa_count--; - if (sctp_ifap->address.sa.sa_family == AF_INET6) - sctp_ifap->ifn_p->num_v6--; - else if (sctp_ifap->address.sa.sa_family == AF_INET) + switch (sctp_ifap->address.sa.sa_family) { +#ifdef INET + case AF_INET: sctp_ifap->ifn_p->num_v4--; + break; +#endif +#ifdef INET6 + case AF_INET6: + sctp_ifap->ifn_p->num_v6--; + break; +#endif + default: + break; + } - ifn_index = sctp_ifap->ifn_p->ifn_index; if (LIST_EMPTY(&sctp_ifap->ifn_p->ifalist)) { /* remove the ifn, possibly freeing it */ sctp_delete_ifn(sctp_ifap->ifn_p, SCTP_ADDR_LOCKED); @@ -461,13 +471,13 @@ sctp_remove_ifa_from_ifn(struct sctp_ifa *sctp_ifap) /* re-register address family type, if needed */ if ((sctp_ifap->ifn_p->num_v6 == 0) && (sctp_ifap->ifn_p->registered_af == AF_INET6)) { - SCTP_DEREGISTER_INTERFACE(ifn_index, AF_INET6); - SCTP_REGISTER_INTERFACE(ifn_index, AF_INET); + SCTP_DEREGISTER_INTERFACE(sctp_ifap->ifn_p->ifn_index, AF_INET6); + SCTP_REGISTER_INTERFACE(sctp_ifap->ifn_p->ifn_index, AF_INET); sctp_ifap->ifn_p->registered_af = AF_INET; } else if ((sctp_ifap->ifn_p->num_v4 == 0) && (sctp_ifap->ifn_p->registered_af == AF_INET)) { - SCTP_DEREGISTER_INTERFACE(ifn_index, AF_INET); - SCTP_REGISTER_INTERFACE(ifn_index, AF_INET6); + SCTP_DEREGISTER_INTERFACE(sctp_ifap->ifn_p->ifn_index, AF_INET); + SCTP_REGISTER_INTERFACE(sctp_ifap->ifn_p->ifn_index, AF_INET6); sctp_ifap->ifn_p->registered_af = AF_INET6; } /* free the ifn refcount */ @@ -477,6 +487,7 @@ sctp_remove_ifa_from_ifn(struct sctp_ifa *sctp_ifap) } } + struct sctp_ifa * sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index, uint32_t ifn_type, const char *if_name, void *ifa, @@ -532,9 +543,9 @@ sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index, atomic_add_int(&vrf->refcount, 1); sctp_ifnp->ifn_mtu = SCTP_GATHER_MTU_FROM_IFN_INFO(ifn, ifn_index, addr->sa_family); if (if_name != NULL) { - memcpy(sctp_ifnp->ifn_name, if_name, SCTP_IFNAMSIZ); + snprintf(sctp_ifnp->ifn_name, SCTP_IFNAMSIZ, "%s", if_name); } else { - memcpy(sctp_ifnp->ifn_name, "unknown", min(7, SCTP_IFNAMSIZ)); + snprintf(sctp_ifnp->ifn_name, SCTP_IFNAMSIZ, "%s", "unknown"); } hash_ifn_head = &SCTP_BASE_INFO(vrf_ifn_hash)[(ifn_index & SCTP_BASE_INFO(vrf_ifn_hashmark))]; LIST_INIT(&sctp_ifnp->ifalist); @@ -551,7 +562,7 @@ sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index, (sctp_ifap->ifn_p->ifn_index == ifn_index)) { SCTPDBG(SCTP_DEBUG_PCB4, "Using existing ifn %s (0x%x) for ifa %p\n", sctp_ifap->ifn_p->ifn_name, ifn_index, - sctp_ifap); + (void *)sctp_ifap); if (new_ifn_af) { /* Remove the created one that we don't want */ sctp_delete_ifn(sctp_ifnp, SCTP_ADDR_LOCKED); @@ -573,7 +584,7 @@ sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index, * old one */ SCTPDBG(SCTP_DEBUG_PCB4, "Moving ifa %p from %s (0x%x) to %s (0x%x)\n", - sctp_ifap, sctp_ifap->ifn_p->ifn_name, + (void *)sctp_ifap, sctp_ifap->ifn_p->ifn_name, sctp_ifap->ifn_p->ifn_index, if_name, ifn_index); /* remove the address from the old ifn */ @@ -585,7 +596,7 @@ sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index, /* repair ifnp which was NULL ? */ sctp_ifap->localifa_flags = SCTP_ADDR_VALID; SCTPDBG(SCTP_DEBUG_PCB4, "Repairing ifn %p for ifa %p\n", - sctp_ifnp, sctp_ifap); + (void *)sctp_ifnp, (void *)sctp_ifap); sctp_add_ifa_to_ifn(sctp_ifnp, sctp_ifap); } goto exit_stage_left; @@ -609,6 +620,7 @@ sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index, sctp_ifap->flags = ifa_flags; /* Set scope */ switch (sctp_ifap->address.sa.sa_family) { +#ifdef INET case AF_INET: { struct sockaddr_in *sin; @@ -626,6 +638,7 @@ sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index, new_ifn_af = AF_INET; break; } +#endif #ifdef INET6 case AF_INET6: { @@ -739,19 +752,9 @@ sctp_del_addr_from_vrf(uint32_t vrf_id, struct sockaddr *addr, * panda who might recycle indexes fast. */ if (if_name) { - int len1, len2; - - len1 = min(SCTP_IFNAMSIZ, strlen(if_name)); - len2 = min(SCTP_IFNAMSIZ, strlen(sctp_ifap->ifn_p->ifn_name)); - if (len1 && len2 && (len1 == len2)) { - /* we can compare them */ - if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, len1) == 0) { - /* - * They match its a correct - * delete - */ - valid = 1; - } + if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, SCTP_IFNAMSIZ) == 0) { + /* They match its a correct delete */ + valid = 1; } } if (!valid) { @@ -769,7 +772,7 @@ sctp_del_addr_from_vrf(uint32_t vrf_id, struct sockaddr *addr, return; } } - SCTPDBG(SCTP_DEBUG_PCB4, "Deleting ifa %p\n", sctp_ifap); + SCTPDBG(SCTP_DEBUG_PCB4, "Deleting ifa %p\n", (void *)sctp_ifap); sctp_ifap->localifa_flags &= SCTP_ADDR_VALID; sctp_ifap->localifa_flags |= SCTP_BEING_DELETED; vrf->total_ifa_count--; @@ -823,6 +826,160 @@ out_now: } +static int +sctp_does_stcb_own_this_addr(struct sctp_tcb *stcb, struct sockaddr *to) +{ + int loopback_scope, ipv4_local_scope, local_scope, site_scope; + int ipv4_addr_legal, ipv6_addr_legal; + struct sctp_vrf *vrf; + struct sctp_ifn *sctp_ifn; + struct sctp_ifa *sctp_ifa; + + loopback_scope = stcb->asoc.scope.loopback_scope; + ipv4_local_scope = stcb->asoc.scope.ipv4_local_scope; + local_scope = stcb->asoc.scope.local_scope; + site_scope = stcb->asoc.scope.site_scope; + ipv4_addr_legal = stcb->asoc.scope.ipv4_addr_legal; + ipv6_addr_legal = stcb->asoc.scope.ipv6_addr_legal; + + SCTP_IPI_ADDR_RLOCK(); + vrf = sctp_find_vrf(stcb->asoc.vrf_id); + if (vrf == NULL) { + /* no vrf, no addresses */ + SCTP_IPI_ADDR_RUNLOCK(); + return (0); + } + if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { + LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) { + if ((loopback_scope == 0) && + SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) { + continue; + } + LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { + if (sctp_is_addr_restricted(stcb, sctp_ifa) && + (!sctp_is_addr_pending(stcb, sctp_ifa))) { + /* + * We allow pending addresses, where + * we have sent an asconf-add to be + * considered valid. + */ + continue; + } + switch (sctp_ifa->address.sa.sa_family) { +#ifdef INET + case AF_INET: + if (ipv4_addr_legal) { + struct sockaddr_in *sin, + *rsin; + + sin = &sctp_ifa->address.sin; + rsin = (struct sockaddr_in *)to; + if ((ipv4_local_scope == 0) && + IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) { + continue; + } + if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) { + SCTP_IPI_ADDR_RUNLOCK(); + return (1); + } + } + break; +#endif +#ifdef INET6 + case AF_INET6: + if (ipv6_addr_legal) { + struct sockaddr_in6 *sin6, + *rsin6; + + sin6 = &sctp_ifa->address.sin6; + rsin6 = (struct sockaddr_in6 *)to; + if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { + if (local_scope == 0) + continue; + if (sin6->sin6_scope_id == 0) { + if (sa6_recoverscope(sin6) != 0) + continue; + } + } + if ((site_scope == 0) && + (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) { + continue; + } + if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) { + SCTP_IPI_ADDR_RUNLOCK(); + return (1); + } + } + break; +#endif + default: + /* TSNH */ + break; + } + } + } + } else { + struct sctp_laddr *laddr; + + LIST_FOREACH(laddr, &stcb->sctp_ep->sctp_addr_list, sctp_nxt_addr) { + if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) { + SCTPDBG(SCTP_DEBUG_PCB1, "ifa being deleted\n"); + continue; + } + if (sctp_is_addr_restricted(stcb, laddr->ifa) && + (!sctp_is_addr_pending(stcb, laddr->ifa))) { + /* + * We allow pending addresses, where we have + * sent an asconf-add to be considered + * valid. + */ + continue; + } + if (laddr->ifa->address.sa.sa_family != to->sa_family) { + continue; + } + switch (to->sa_family) { +#ifdef INET + case AF_INET: + { + struct sockaddr_in *sin, *rsin; + + sin = (struct sockaddr_in *)&laddr->ifa->address.sin; + rsin = (struct sockaddr_in *)to; + if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) { + SCTP_IPI_ADDR_RUNLOCK(); + return (1); + } + break; + } +#endif +#ifdef INET6 + case AF_INET6: + { + struct sockaddr_in6 *sin6, *rsin6; + + sin6 = (struct sockaddr_in6 *)&laddr->ifa->address.sin6; + rsin6 = (struct sockaddr_in6 *)to; + if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) { + SCTP_IPI_ADDR_RUNLOCK(); + return (1); + } + break; + } + +#endif + default: + /* TSNH */ + break; + } + + } + } + SCTP_IPI_ADDR_RUNLOCK(); + return (0); +} + + static struct sctp_tcb * sctp_tcb_special_locate(struct sctp_inpcb **inp_p, struct sockaddr *from, struct sockaddr *to, struct sctp_nets **netp, uint32_t vrf_id) @@ -842,14 +999,29 @@ sctp_tcb_special_locate(struct sctp_inpcb **inp_p, struct sockaddr *from, if ((to == NULL) || (from == NULL)) { return (NULL); } - if (to->sa_family == AF_INET && from->sa_family == AF_INET) { - lport = ((struct sockaddr_in *)to)->sin_port; - rport = ((struct sockaddr_in *)from)->sin_port; - } else if (to->sa_family == AF_INET6 && from->sa_family == AF_INET6) { - lport = ((struct sockaddr_in6 *)to)->sin6_port; - rport = ((struct sockaddr_in6 *)from)->sin6_port; - } else { - return NULL; + switch (to->sa_family) { +#ifdef INET + case AF_INET: + if (from->sa_family == AF_INET) { + lport = ((struct sockaddr_in *)to)->sin_port; + rport = ((struct sockaddr_in *)from)->sin_port; + } else { + return (NULL); + } + break; +#endif +#ifdef INET6 + case AF_INET6: + if (from->sa_family == AF_INET6) { + lport = ((struct sockaddr_in6 *)to)->sin6_port; + rport = ((struct sockaddr_in6 *)from)->sin6_port; + } else { + return (NULL); + } + break; +#endif + default: + return (NULL); } ephead = &SCTP_BASE_INFO(sctp_tcpephash)[SCTP_PCBHASH_ALLADDR((lport | rport), SCTP_BASE_INFO(hashtcpmark))]; /* @@ -890,17 +1062,20 @@ sctp_tcb_special_locate(struct sctp_inpcb **inp_p, struct sockaddr *from, if (laddr->ifa->address.sa.sa_family == to->sa_family) { /* see if it matches */ - struct sockaddr_in *intf_addr, *sin; - - intf_addr = &laddr->ifa->address.sin; - sin = (struct sockaddr_in *)to; +#ifdef INET if (from->sa_family == AF_INET) { + struct sockaddr_in *intf_addr, + *sin; + + intf_addr = &laddr->ifa->address.sin; + sin = (struct sockaddr_in *)to; if (sin->sin_addr.s_addr == intf_addr->sin_addr.s_addr) { match = 1; break; } } +#endif #ifdef INET6 if (from->sa_family == AF_INET6) { struct sockaddr_in6 *intf_addr6; @@ -929,13 +1104,18 @@ sctp_tcb_special_locate(struct sctp_inpcb **inp_p, struct sockaddr *from, * Ok if we hit here the ep has the address, does it hold * the tcb? */ - + /* XXX: Why don't we TAILQ_FOREACH through sctp_asoc_list? */ stcb = LIST_FIRST(&inp->sctp_asoc_list); if (stcb == NULL) { SCTP_INP_RUNLOCK(inp); continue; } SCTP_TCB_LOCK(stcb); + if (!sctp_does_stcb_own_this_addr(stcb, to)) { + SCTP_TCB_UNLOCK(stcb); + SCTP_INP_RUNLOCK(inp); + continue; + } if (stcb->rport != rport) { /* remote port does not match. */ SCTP_TCB_UNLOCK(stcb); @@ -947,6 +1127,11 @@ sctp_tcb_special_locate(struct sctp_inpcb **inp_p, struct sockaddr *from, SCTP_INP_RUNLOCK(inp); continue; } + if (!sctp_does_stcb_own_this_addr(stcb, to)) { + SCTP_TCB_UNLOCK(stcb); + SCTP_INP_RUNLOCK(inp); + continue; + } /* Does this TCB have a matching address? */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { @@ -955,6 +1140,7 @@ sctp_tcb_special_locate(struct sctp_inpcb **inp_p, struct sockaddr *from, continue; } switch (from->sa_family) { +#ifdef INET case AF_INET: { struct sockaddr_in *sin, *rsin; @@ -977,6 +1163,7 @@ sctp_tcb_special_locate(struct sctp_inpcb **inp_p, struct sockaddr *from, } break; } +#endif #ifdef INET6 case AF_INET6: { @@ -1012,148 +1199,6 @@ sctp_tcb_special_locate(struct sctp_inpcb **inp_p, struct sockaddr *from, return (NULL); } -static int -sctp_does_stcb_own_this_addr(struct sctp_tcb *stcb, struct sockaddr *to) -{ - int loopback_scope, ipv4_local_scope, local_scope, site_scope; - int ipv4_addr_legal, ipv6_addr_legal; - struct sctp_vrf *vrf; - struct sctp_ifn *sctp_ifn; - struct sctp_ifa *sctp_ifa; - - loopback_scope = stcb->asoc.loopback_scope; - ipv4_local_scope = stcb->asoc.ipv4_local_scope; - local_scope = stcb->asoc.local_scope; - site_scope = stcb->asoc.site_scope; - ipv4_addr_legal = ipv6_addr_legal = 0; - if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { - ipv6_addr_legal = 1; - if (SCTP_IPV6_V6ONLY(stcb->sctp_ep) == 0) { - ipv4_addr_legal = 1; - } - } else { - ipv4_addr_legal = 1; - } - - SCTP_IPI_ADDR_RLOCK(); - vrf = sctp_find_vrf(stcb->asoc.vrf_id); - if (vrf == NULL) { - /* no vrf, no addresses */ - SCTP_IPI_ADDR_RUNLOCK(); - return (0); - } - if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { - LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) { - if ((loopback_scope == 0) && - SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) { - continue; - } - LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { - if (sctp_is_addr_restricted(stcb, sctp_ifa)) - continue; - switch (sctp_ifa->address.sa.sa_family) { -#ifdef INET - case AF_INET: - if (ipv4_addr_legal) { - struct sockaddr_in *sin, - *rsin; - - sin = &sctp_ifa->address.sin; - rsin = (struct sockaddr_in *)to; - if ((ipv4_local_scope == 0) && - IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) { - continue; - } - if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) { - SCTP_IPI_ADDR_RUNLOCK(); - return (1); - } - } - break; -#endif -#ifdef INET6 - case AF_INET6: - if (ipv6_addr_legal) { - struct sockaddr_in6 *sin6, - *rsin6; - - sin6 = &sctp_ifa->address.sin6; - rsin6 = (struct sockaddr_in6 *)to; - if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { - if (local_scope == 0) - continue; - if (sin6->sin6_scope_id == 0) { - if (sa6_recoverscope(sin6) != 0) - continue; - } - } - if ((site_scope == 0) && - (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) { - continue; - } - if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) { - SCTP_IPI_ADDR_RUNLOCK(); - return (1); - } - } - break; -#endif - default: - /* TSNH */ - break; - } - } - } - } else { - struct sctp_laddr *laddr; - - LIST_FOREACH(laddr, &stcb->sctp_ep->sctp_addr_list, sctp_nxt_addr) { - if (sctp_is_addr_restricted(stcb, laddr->ifa)) { - continue; - } - if (laddr->ifa->address.sa.sa_family != to->sa_family) { - continue; - } - switch (to->sa_family) { -#ifdef INET - case AF_INET: - { - struct sockaddr_in *sin, *rsin; - - sin = (struct sockaddr_in *)&laddr->ifa->address.sin; - rsin = (struct sockaddr_in *)to; - if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) { - SCTP_IPI_ADDR_RUNLOCK(); - return (1); - } - break; - } -#endif -#ifdef INET6 - case AF_INET6: - { - struct sockaddr_in6 *sin6, *rsin6; - - sin6 = (struct sockaddr_in6 *)&laddr->ifa->address.sin6; - rsin6 = (struct sockaddr_in6 *)to; - if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) { - SCTP_IPI_ADDR_RUNLOCK(); - return (1); - } - break; - } - -#endif - default: - /* TSNH */ - break; - } - - } - } - SCTP_IPI_ADDR_RUNLOCK(); - return (0); -} /* * rules for use @@ -1175,11 +1220,18 @@ sctp_findassociation_ep_addr(struct sctp_inpcb **inp_p, struct sockaddr *remote, uint16_t rport; inp = *inp_p; - if (remote->sa_family == AF_INET) { + switch (remote->sa_family) { +#ifdef INET + case AF_INET: rport = (((struct sockaddr_in *)remote)->sin_port); - } else if (remote->sa_family == AF_INET6) { + break; +#endif +#ifdef INET6 + case AF_INET6: rport = (((struct sockaddr_in6 *)remote)->sin6_port); - } else { + break; +#endif + default: return (NULL); } if (locked_tcb) { @@ -1191,7 +1243,8 @@ sctp_findassociation_ep_addr(struct sctp_inpcb **inp_p, struct sockaddr *remote, SCTP_TCB_UNLOCK(locked_tcb); } SCTP_INP_INFO_RLOCK(); - if (inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) { + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { /*- * Now either this guy is our listener or it's the * connector. If it is the one that issued the connect, then @@ -1252,6 +1305,7 @@ sctp_findassociation_ep_addr(struct sctp_inpcb **inp_p, struct sockaddr *remote, continue; } switch (remote->sa_family) { +#ifdef INET case AF_INET: { struct sockaddr_in *sin, @@ -1280,6 +1334,7 @@ sctp_findassociation_ep_addr(struct sctp_inpcb **inp_p, struct sockaddr *remote, } break; } +#endif #ifdef INET6 case AF_INET6: { @@ -1353,6 +1408,7 @@ sctp_findassociation_ep_addr(struct sctp_inpcb **inp_p, struct sockaddr *remote, continue; } switch (remote->sa_family) { +#ifdef INET case AF_INET: { struct sockaddr_in *sin, @@ -1381,6 +1437,7 @@ sctp_findassociation_ep_addr(struct sctp_inpcb **inp_p, struct sockaddr *remote, } break; } +#endif #ifdef INET6 case AF_INET6: { @@ -1431,11 +1488,11 @@ null_return: return (NULL); } + /* * Find an association for a specific endpoint using the association id given * out in the COMM_UP notification */ - struct sctp_tcb * sctp_findasoc_ep_asocid_locked(struct sctp_inpcb *inp, sctp_assoc_t asoc_id, int want_lock) { @@ -1496,37 +1553,39 @@ sctp_findassociation_ep_asocid(struct sctp_inpcb *inp, sctp_assoc_t asoc_id, int } +/* + * Endpoint probe expects that the INP_INFO is locked. + */ static struct sctp_inpcb * sctp_endpoint_probe(struct sockaddr *nam, struct sctppcbhead *head, uint16_t lport, uint32_t vrf_id) { struct sctp_inpcb *inp; - struct sockaddr_in *sin; + struct sctp_laddr *laddr; -#ifdef INET6 - struct sockaddr_in6 *sin6; +#ifdef INET + struct sockaddr_in *sin; #endif - struct sctp_laddr *laddr; - #ifdef INET6 + struct sockaddr_in6 *sin6; struct sockaddr_in6 *intf_addr6; #endif - int fnd; - /* - * Endpoint probe expects that the INP_INFO is locked. - */ +#ifdef INET sin = NULL; +#endif #ifdef INET6 sin6 = NULL; #endif switch (nam->sa_family) { +#ifdef INET case AF_INET: sin = (struct sockaddr_in *)nam; break; +#endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)nam; @@ -1549,6 +1608,7 @@ sctp_endpoint_probe(struct sockaddr *nam, struct sctppcbhead *head, if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) && (inp->sctp_lport == lport)) { /* got it */ +#ifdef INET if ((nam->sa_family == AF_INET) && (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) && SCTP_IPV6_V6ONLY(inp)) { @@ -1556,12 +1616,15 @@ sctp_endpoint_probe(struct sockaddr *nam, struct sctppcbhead *head, SCTP_INP_RUNLOCK(inp); continue; } +#endif +#ifdef INET6 /* A V6 address and the endpoint is NOT bound V6 */ if (nam->sa_family == AF_INET6 && (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) { SCTP_INP_RUNLOCK(inp); continue; } +#endif /* does a VRF id match? */ fnd = 0; if (inp->def_vrf_id == vrf_id) @@ -1574,18 +1637,26 @@ sctp_endpoint_probe(struct sockaddr *nam, struct sctppcbhead *head, } SCTP_INP_RUNLOCK(inp); } - if ((nam->sa_family == AF_INET) && - (sin->sin_addr.s_addr == INADDR_ANY)) { - /* Can't hunt for one that has no address specified */ - return (NULL); - } + switch (nam->sa_family) { +#ifdef INET + case AF_INET: + if (sin->sin_addr.s_addr == INADDR_ANY) { + /* Can't hunt for one that has no address specified */ + return (NULL); + } + break; +#endif #ifdef INET6 - if ((nam->sa_family == AF_INET6) && - (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))) { - /* Can't hunt for one that has no address specified */ - return (NULL); - } + case AF_INET6: + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + /* Can't hunt for one that has no address specified */ + return (NULL); + } + break; #endif + default: + break; + } /* * ok, not bound to all so see if we can find a EP bound to this * address. @@ -1624,24 +1695,23 @@ sctp_endpoint_probe(struct sockaddr *nam, struct sctppcbhead *head, continue; } SCTPDBG(SCTP_DEBUG_PCB1, "Ok laddr->ifa:%p is possible, ", - laddr->ifa); + (void *)laddr->ifa); if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) { SCTPDBG(SCTP_DEBUG_PCB1, "Huh IFA being deleted\n"); continue; } if (laddr->ifa->address.sa.sa_family == nam->sa_family) { /* possible, see if it matches */ - struct sockaddr_in *intf_addr; - - intf_addr = &laddr->ifa->address.sin; switch (nam->sa_family) { +#ifdef INET case AF_INET: if (sin->sin_addr.s_addr == - intf_addr->sin_addr.s_addr) { + laddr->ifa->address.sin.sin_addr.s_addr) { SCTP_INP_RUNLOCK(inp); return (inp); } break; +#endif #ifdef INET6 case AF_INET6: intf_addr6 = &laddr->ifa->address.sin6; @@ -1770,19 +1840,32 @@ sctp_pcb_findep(struct sockaddr *nam, int find_tcp_pool, int have_lock, */ struct sctp_inpcb *inp; struct sctppcbhead *head; - struct sockaddr_in *sin; - struct sockaddr_in6 *sin6; int lport; unsigned int i; - if (nam->sa_family == AF_INET) { +#ifdef INET + struct sockaddr_in *sin; + +#endif +#ifdef INET6 + struct sockaddr_in6 *sin6; + +#endif + + switch (nam->sa_family) { +#ifdef INET + case AF_INET: sin = (struct sockaddr_in *)nam; - lport = ((struct sockaddr_in *)nam)->sin_port; - } else if (nam->sa_family == AF_INET6) { + lport = sin->sin_port; + break; +#endif +#ifdef INET6 + case AF_INET6: sin6 = (struct sockaddr_in6 *)nam; - lport = ((struct sockaddr_in6 *)nam)->sin6_port; - } else { - /* unsupported family */ + lport = sin6->sin6_port; + break; +#endif + default: return (NULL); } /* @@ -1826,31 +1909,32 @@ sctp_pcb_findep(struct sockaddr *nam, int find_tcp_pool, int have_lock, return (inp); } + /* * Find an association for an endpoint with the pointer to whom you want to * send to and the endpoint pointer. The address can be IPv4 or IPv6. We may * need to change the *to to some other struct like a mbuf... */ struct sctp_tcb * -sctp_findassociation_addr_sa(struct sockaddr *to, struct sockaddr *from, +sctp_findassociation_addr_sa(struct sockaddr *from, struct sockaddr *to, struct sctp_inpcb **inp_p, struct sctp_nets **netp, int find_tcp_pool, uint32_t vrf_id) { struct sctp_inpcb *inp = NULL; - struct sctp_tcb *retval; + struct sctp_tcb *stcb; SCTP_INP_INFO_RLOCK(); if (find_tcp_pool) { if (inp_p != NULL) { - retval = sctp_tcb_special_locate(inp_p, from, to, netp, + stcb = sctp_tcb_special_locate(inp_p, from, to, netp, vrf_id); } else { - retval = sctp_tcb_special_locate(&inp, from, to, netp, + stcb = sctp_tcb_special_locate(&inp, from, to, netp, vrf_id); } - if (retval != NULL) { + if (stcb != NULL) { SCTP_INP_INFO_RUNLOCK(); - return (retval); + return (stcb); } } inp = sctp_pcb_findep(to, 0, 1, vrf_id); @@ -1858,7 +1942,6 @@ sctp_findassociation_addr_sa(struct sockaddr *to, struct sockaddr *from, *inp_p = inp; } SCTP_INP_INFO_RUNLOCK(); - if (inp == NULL) { return (NULL); } @@ -1869,13 +1952,13 @@ sctp_findassociation_addr_sa(struct sockaddr *to, struct sockaddr *from, * inbound packet side. */ if (inp_p != NULL) { - retval = sctp_findassociation_ep_addr(inp_p, from, netp, to, + stcb = sctp_findassociation_ep_addr(inp_p, from, netp, to, NULL); } else { - retval = sctp_findassociation_ep_addr(&inp, from, netp, to, + stcb = sctp_findassociation_ep_addr(&inp, from, netp, to, NULL); } - return retval; + return (stcb); } @@ -1885,26 +1968,37 @@ sctp_findassociation_addr_sa(struct sockaddr *to, struct sockaddr *from, * address will be used to lookup the TCB and see if one exits. */ static struct sctp_tcb * -sctp_findassociation_special_addr(struct mbuf *m, int iphlen, int offset, +sctp_findassociation_special_addr(struct mbuf *m, int offset, struct sctphdr *sh, struct sctp_inpcb **inp_p, struct sctp_nets **netp, - struct sockaddr *dest) + struct sockaddr *dst) { - struct sockaddr_in sin4; - struct sockaddr_in6 sin6; struct sctp_paramhdr *phdr, parm_buf; - struct sctp_tcb *retval; + struct sctp_tcb *stcb; uint32_t ptype, plen; +#ifdef INET + struct sockaddr_in sin4; + +#endif +#ifdef INET6 + struct sockaddr_in6 sin6; + +#endif + +#ifdef INET memset(&sin4, 0, sizeof(sin4)); - memset(&sin6, 0, sizeof(sin6)); sin4.sin_len = sizeof(sin4); sin4.sin_family = AF_INET; sin4.sin_port = sh->src_port; +#endif +#ifdef INET6 + memset(&sin6, 0, sizeof(sin6)); sin6.sin6_len = sizeof(sin6); sin6.sin6_family = AF_INET6; sin6.sin6_port = sh->src_port; +#endif - retval = NULL; + stcb = NULL; offset += sizeof(struct sctp_init_chunk); phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf)); @@ -1915,6 +2009,7 @@ sctp_findassociation_special_addr(struct mbuf *m, int iphlen, int offset, if (plen == 0) { break; } +#ifdef INET if (ptype == SCTP_IPV4_ADDRESS && plen == sizeof(struct sctp_ipv4addr_param)) { /* Get the rest of the address */ @@ -1928,12 +2023,15 @@ sctp_findassociation_special_addr(struct mbuf *m, int iphlen, int offset, p4 = (struct sctp_ipv4addr_param *)phdr; memcpy(&sin4.sin_addr, &p4->addr, sizeof(p4->addr)); /* look it up */ - retval = sctp_findassociation_ep_addr(inp_p, - (struct sockaddr *)&sin4, netp, dest, NULL); - if (retval != NULL) { - return (retval); + stcb = sctp_findassociation_ep_addr(inp_p, + (struct sockaddr *)&sin4, netp, dst, NULL); + if (stcb != NULL) { + return (stcb); } - } else if (ptype == SCTP_IPV6_ADDRESS && + } +#endif +#ifdef INET6 + if (ptype == SCTP_IPV6_ADDRESS && plen == sizeof(struct sctp_ipv6addr_param)) { /* Get the rest of the address */ struct sctp_ipv6addr_param ip6_parm, *p6; @@ -1946,12 +2044,13 @@ sctp_findassociation_special_addr(struct mbuf *m, int iphlen, int offset, p6 = (struct sctp_ipv6addr_param *)phdr; memcpy(&sin6.sin6_addr, &p6->addr, sizeof(p6->addr)); /* look it up */ - retval = sctp_findassociation_ep_addr(inp_p, - (struct sockaddr *)&sin6, netp, dest, NULL); - if (retval != NULL) { - return (retval); + stcb = sctp_findassociation_ep_addr(inp_p, + (struct sockaddr *)&sin6, netp, dst, NULL); + if (stcb != NULL) { + return (stcb); } } +#endif offset += SCTP_SIZE32(plen); phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf)); @@ -1973,8 +2072,6 @@ sctp_findassoc_by_vtag(struct sockaddr *from, struct sockaddr *to, uint32_t vtag struct sctp_nets *net; struct sctp_tcb *stcb; - *netp = NULL; - *inp_p = NULL; SCTP_INP_INFO_RLOCK(); head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(vtag, SCTP_BASE_INFO(hashasocmark))]; @@ -1989,6 +2086,10 @@ sctp_findassoc_by_vtag(struct sockaddr *from, struct sockaddr *to, uint32_t vtag SCTP_INP_RUNLOCK(stcb->sctp_ep); continue; } + if (stcb->sctp_ep->def_vrf_id != vrf_id) { + SCTP_INP_RUNLOCK(stcb->sctp_ep); + continue; + } SCTP_TCB_LOCK(stcb); SCTP_INP_RUNLOCK(stcb->sctp_ep); if (stcb->asoc.my_vtag == vtag) { @@ -2028,7 +2129,7 @@ sctp_findassoc_by_vtag(struct sockaddr *from, struct sockaddr *to, uint32_t vtag if (skip_src_check) { conclusive: if (from) { - net = sctp_findnet(stcb, from); + *netp = sctp_findnet(stcb, from); } else { *netp = NULL; /* unknown */ } @@ -2059,110 +2160,27 @@ sctp_findassoc_by_vtag(struct sockaddr *from, struct sockaddr *to, uint32_t vtag return (NULL); } + /* * Find an association with the pointer to the inbound IP packet. This can be * a IPv4 or IPv6 packet. */ struct sctp_tcb * -sctp_findassociation_addr(struct mbuf *m, int iphlen, int offset, +sctp_findassociation_addr(struct mbuf *m, int offset, + struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, struct sctp_chunkhdr *ch, struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id) { int find_tcp_pool; - struct ip *iph; - struct sctp_tcb *retval; - struct sockaddr_storage to_store, from_store; - struct sockaddr *to = (struct sockaddr *)&to_store; - struct sockaddr *from = (struct sockaddr *)&from_store; - struct sctp_inpcb *inp; - - iph = mtod(m, struct ip *); - switch (iph->ip_v) { - case IPVERSION: - { - /* its IPv4 */ - struct sockaddr_in *from4; - - from4 = (struct sockaddr_in *)&from_store; - bzero(from4, sizeof(*from4)); - from4->sin_family = AF_INET; - from4->sin_len = sizeof(struct sockaddr_in); - from4->sin_addr.s_addr = iph->ip_src.s_addr; - from4->sin_port = sh->src_port; - break; - } -#ifdef INET6 - case IPV6_VERSION >> 4: - { - /* its IPv6 */ - struct ip6_hdr *ip6; - struct sockaddr_in6 *from6; - - ip6 = mtod(m, struct ip6_hdr *); - from6 = (struct sockaddr_in6 *)&from_store; - bzero(from6, sizeof(*from6)); - from6->sin6_family = AF_INET6; - from6->sin6_len = sizeof(struct sockaddr_in6); - from6->sin6_addr = ip6->ip6_src; - from6->sin6_port = sh->src_port; - /* Get the scopes in properly to the sin6 addr's */ - /* we probably don't need these operations */ - (void)sa6_recoverscope(from6); - sa6_embedscope(from6, MODULE_GLOBAL(ip6_use_defzone)); - break; - } -#endif - default: - /* Currently not supported. */ - return (NULL); - } - + struct sctp_tcb *stcb; + struct sctp_inpcb *inp; - switch (iph->ip_v) { - case IPVERSION: - { - /* its IPv4 */ - struct sockaddr_in *to4; - - to4 = (struct sockaddr_in *)&to_store; - bzero(to4, sizeof(*to4)); - to4->sin_family = AF_INET; - to4->sin_len = sizeof(struct sockaddr_in); - to4->sin_addr.s_addr = iph->ip_dst.s_addr; - to4->sin_port = sh->dest_port; - break; - } -#ifdef INET6 - case IPV6_VERSION >> 4: - { - /* its IPv6 */ - struct ip6_hdr *ip6; - struct sockaddr_in6 *to6; - - ip6 = mtod(m, struct ip6_hdr *); - to6 = (struct sockaddr_in6 *)&to_store; - bzero(to6, sizeof(*to6)); - to6->sin6_family = AF_INET6; - to6->sin6_len = sizeof(struct sockaddr_in6); - to6->sin6_addr = ip6->ip6_dst; - to6->sin6_port = sh->dest_port; - /* Get the scopes in properly to the sin6 addr's */ - /* we probably don't need these operations */ - (void)sa6_recoverscope(to6); - sa6_embedscope(to6, MODULE_GLOBAL(ip6_use_defzone)); - break; - } -#endif - default: - /* TSNH */ - break; - } if (sh->v_tag) { /* we only go down this path if vtag is non-zero */ - retval = sctp_findassoc_by_vtag(from, to, ntohl(sh->v_tag), + stcb = sctp_findassoc_by_vtag(src, dst, ntohl(sh->v_tag), inp_p, netp, sh->src_port, sh->dest_port, 0, vrf_id, 0); - if (retval) { - return (retval); + if (stcb) { + return (stcb); } } find_tcp_pool = 0; @@ -2174,15 +2192,15 @@ sctp_findassociation_addr(struct mbuf *m, int iphlen, int offset, find_tcp_pool = 1; } if (inp_p) { - retval = sctp_findassociation_addr_sa(to, from, inp_p, netp, + stcb = sctp_findassociation_addr_sa(src, dst, inp_p, netp, find_tcp_pool, vrf_id); inp = *inp_p; } else { - retval = sctp_findassociation_addr_sa(to, from, &inp, netp, + stcb = sctp_findassociation_addr_sa(src, dst, &inp, netp, find_tcp_pool, vrf_id); } - SCTPDBG(SCTP_DEBUG_PCB1, "retval:%p inp:%p\n", retval, inp); - if (retval == NULL && inp) { + SCTPDBG(SCTP_DEBUG_PCB1, "stcb:%p inp:%p\n", (void *)stcb, (void *)inp); + if (stcb == NULL && inp) { /* Found a EP but not this address */ if ((ch->chunk_type == SCTP_INITIATION) || (ch->chunk_type == SCTP_INITIATION_ACK)) { @@ -2200,15 +2218,15 @@ sctp_findassociation_addr(struct mbuf *m, int iphlen, int offset, } return (NULL); } - retval = sctp_findassociation_special_addr(m, iphlen, - offset, sh, &inp, netp, to); + stcb = sctp_findassociation_special_addr(m, + offset, sh, &inp, netp, dst); if (inp_p != NULL) { *inp_p = inp; } } } - SCTPDBG(SCTP_DEBUG_PCB1, "retval is %p\n", retval); - return (retval); + SCTPDBG(SCTP_DEBUG_PCB1, "stcb is %p\n", (void *)stcb); + return (stcb); } /* @@ -2216,58 +2234,26 @@ sctp_findassociation_addr(struct mbuf *m, int iphlen, int offset, * if the lookup address is 0.0.0.0 or ::0, use the vtag to do the lookup */ struct sctp_tcb * -sctp_findassociation_ep_asconf(struct mbuf *m, int iphlen, int offset, - struct sctphdr *sh, struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id) +sctp_findassociation_ep_asconf(struct mbuf *m, int offset, + struct sockaddr *dst, struct sctphdr *sh, + struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id) { struct sctp_tcb *stcb; - struct sockaddr_in *sin; - -#ifdef INET6 - struct sockaddr_in6 *sin6; - -#endif - struct sockaddr_storage local_store, remote_store; - struct sockaddr *to; - struct ip *iph; - -#ifdef INET6 - struct ip6_hdr *ip6; - -#endif + struct sockaddr_storage remote_store; struct sctp_paramhdr parm_buf, *phdr; int ptype; int zero_address = 0; +#ifdef INET + struct sockaddr_in *sin; - memset(&local_store, 0, sizeof(local_store)); - memset(&remote_store, 0, sizeof(remote_store)); - to = (struct sockaddr *)&local_store; - /* First get the destination address setup too. */ - iph = mtod(m, struct ip *); - switch (iph->ip_v) { - case IPVERSION: - /* its IPv4 */ - sin = (struct sockaddr_in *)&local_store; - sin->sin_family = AF_INET; - sin->sin_len = sizeof(*sin); - sin->sin_port = sh->dest_port; - sin->sin_addr.s_addr = iph->ip_dst.s_addr; - break; +#endif #ifdef INET6 - case IPV6_VERSION >> 4: - /* its IPv6 */ - ip6 = mtod(m, struct ip6_hdr *); - sin6 = (struct sockaddr_in6 *)&local_store; - sin6->sin6_family = AF_INET6; - sin6->sin6_len = sizeof(*sin6); - sin6->sin6_port = sh->dest_port; - sin6->sin6_addr = ip6->ip6_dst; - break; + struct sockaddr_in6 *sin6; + #endif - default: - return NULL; - } + memset(&remote_store, 0, sizeof(remote_store)); phdr = sctp_get_next_param(m, offset + sizeof(struct sctp_asconf_chunk), &parm_buf, sizeof(struct sctp_paramhdr)); if (phdr == NULL) { @@ -2305,6 +2291,7 @@ sctp_findassociation_ep_asconf(struct mbuf *m, int iphlen, int offset, break; } #endif +#ifdef INET case SCTP_IPV4_ADDRESS: { /* ipv4 address param */ @@ -2330,22 +2317,22 @@ sctp_findassociation_ep_asconf(struct mbuf *m, int iphlen, int offset, zero_address = 1; break; } +#endif default: /* invalid address param type */ return NULL; } if (zero_address) { - stcb = sctp_findassoc_by_vtag(NULL, to, ntohl(sh->v_tag), inp_p, + stcb = sctp_findassoc_by_vtag(NULL, dst, ntohl(sh->v_tag), inp_p, netp, sh->src_port, sh->dest_port, 1, vrf_id, 0); - /* - * printf("findassociation_ep_asconf: zero lookup address - * finds stcb 0x%x\n", (uint32_t)stcb); - */ + if (stcb != NULL) { + SCTP_INP_DECR_REF(*inp_p); + } } else { stcb = sctp_findassociation_ep_addr(inp_p, (struct sockaddr *)&remote_store, netp, - to, NULL); + dst, NULL); } return (stcb); } @@ -2390,10 +2377,16 @@ sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id) /* setup socket pointers */ inp->sctp_socket = so; inp->ip_inp.inp.inp_socket = so; +#ifdef INET6 + if (MODULE_GLOBAL(ip6_auto_flowlabel)) { + inp->ip_inp.inp.inp_flags |= IN6P_AUTOFLOWLABEL; + } +#endif inp->sctp_associd_counter = 1; inp->partial_delivery_point = SCTP_SB_LIMIT_RCV(so) >> SCTP_PARTIAL_DELIVERY_SHIFT; inp->sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT; inp->sctp_cmt_on_off = SCTP_BASE_SYSCTL(sctp_cmt_on_off); + inp->sctp_ecn_enable = SCTP_BASE_SYSCTL(sctp_ecn_enable); /* init the small hash table we use to track asocid <-> tcb */ inp->sctp_asocidhash = SCTP_HASH_INIT(SCTP_STACK_VTAG_HASH_SIZE, &inp->hashasocidmark); if (inp->sctp_asocidhash == NULL) { @@ -2422,8 +2415,7 @@ sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id) so->so_pcb = (caddr_t)inp; - if ((SCTP_SO_TYPE(so) == SOCK_DGRAM) || - (SCTP_SO_TYPE(so) == SOCK_SEQPACKET)) { + if (SCTP_SO_TYPE(so) == SOCK_SEQPACKET) { /* UDP style socket */ inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE | SCTP_PCB_FLAGS_UNBOUND); @@ -2514,21 +2506,20 @@ sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id) m->max_init_times = SCTP_BASE_SYSCTL(sctp_init_rtx_max_default); m->max_send_times = SCTP_BASE_SYSCTL(sctp_assoc_rtx_max_default); m->def_net_failure = SCTP_BASE_SYSCTL(sctp_path_rtx_max_default); + m->def_net_pf_threshold = SCTP_BASE_SYSCTL(sctp_path_pf_threshold); m->sctp_sws_sender = SCTP_SWS_SENDER_DEF; m->sctp_sws_receiver = SCTP_SWS_RECEIVER_DEF; m->max_burst = SCTP_BASE_SYSCTL(sctp_max_burst_default); - if ((SCTP_BASE_SYSCTL(sctp_default_cc_module) >= SCTP_CC_RFC2581) && - (SCTP_BASE_SYSCTL(sctp_default_cc_module) <= SCTP_CC_HTCP)) { - m->sctp_default_cc_module = SCTP_BASE_SYSCTL(sctp_default_cc_module); - } else { - /* sysctl done with invalid value, set to 2581 */ - m->sctp_default_cc_module = SCTP_CC_RFC2581; - } + m->fr_max_burst = SCTP_BASE_SYSCTL(sctp_fr_max_burst_default); + + m->sctp_default_cc_module = SCTP_BASE_SYSCTL(sctp_default_cc_module); + m->sctp_default_ss_module = SCTP_BASE_SYSCTL(sctp_default_ss_module); /* number of streams to pre-open on a association */ m->pre_open_stream_count = SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default); /* Add adaptation cookie */ - m->adaptation_layer_indicator = 0x504C5253; + m->adaptation_layer_indicator = 0; + m->adaptation_layer_indicator_provided = 0; /* seed random number generator */ m->random_counter = 1; @@ -2557,6 +2548,11 @@ sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id) */ m->local_hmacs = sctp_default_supported_hmaclist(); m->local_auth_chunks = sctp_alloc_chunklist(); + m->default_dscp = 0; +#ifdef INET6 + m->default_flowlabel = 0; +#endif + m->port = 0; /* encapsulation disabled by default */ sctp_auth_set_default_chunks(m->local_auth_chunks); LIST_INIT(&m->shared_keys); /* add default NULL key as key id 0 */ @@ -2663,6 +2659,9 @@ sctp_move_pcb_and_assoc(struct sctp_inpcb *old_inp, struct sctp_inpcb *new_inp, LIST_INSERT_HEAD(&new_inp->sctp_addr_list, laddr, sctp_nxt_addr); new_inp->laddr_count++; + if (oladdr == stcb->asoc.last_used_address) { + stcb->asoc.last_used_address = laddr; + } } } /* @@ -2671,7 +2670,6 @@ sctp_move_pcb_and_assoc(struct sctp_inpcb *old_inp, struct sctp_inpcb *new_inp, * all of them. */ - stcb->asoc.hb_timer.ep = (void *)new_inp; stcb->asoc.dack_timer.ep = (void *)new_inp; stcb->asoc.asconf_timer.ep = (void *)new_inp; stcb->asoc.strreset_timer.ep = (void *)new_inp; @@ -2682,8 +2680,8 @@ sctp_move_pcb_and_assoc(struct sctp_inpcb *old_inp, struct sctp_inpcb *new_inp, /* now what about the nets? */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { net->pmtu_timer.ep = (void *)new_inp; + net->hb_timer.ep = (void *)new_inp; net->rxt_timer.ep = (void *)new_inp; - net->fr_timer.ep = (void *)new_inp; } SCTP_INP_WUNLOCK(new_inp); SCTP_INP_WUNLOCK(old_inp); @@ -2714,9 +2712,9 @@ sctp_inpcb_bind(struct socket *so, struct sockaddr *addr, ip_inp = (struct inpcb *)so->so_pcb; #ifdef SCTP_DEBUG if (addr) { - SCTPDBG(SCTP_DEBUG_PCB1, "Bind called port:%d\n", + SCTPDBG(SCTP_DEBUG_PCB1, "Bind called port: %d\n", ntohs(((struct sockaddr_in *)addr)->sin_port)); - SCTPDBG(SCTP_DEBUG_PCB1, "Addr :"); + SCTPDBG(SCTP_DEBUG_PCB1, "Addr: "); SCTPDBG_ADDR(SCTP_DEBUG_PCB1, addr); } #endif @@ -2731,6 +2729,7 @@ sctp_inpcb_bind(struct socket *so, struct sockaddr *addr, #endif if (addr != NULL) { switch (addr->sa_family) { +#ifdef INET case AF_INET: { struct sockaddr_in *sin; @@ -2760,6 +2759,7 @@ sctp_inpcb_bind(struct socket *so, struct sockaddr *addr, } break; } +#endif #ifdef INET6 case AF_INET6: { @@ -2776,7 +2776,6 @@ sctp_inpcb_bind(struct socket *so, struct sockaddr *addr, return (EINVAL); } lport = sin6->sin6_port; - /* * For LOOPBACK the prison_local_ip6() call * will transmute the ipv6 address to the @@ -2814,7 +2813,7 @@ sctp_inpcb_bind(struct socket *so, struct sockaddr *addr, SCTP_INP_INCR_REF(inp); if (lport) { /* - * Did the caller specify a port? if so we must see if a ep + * Did the caller specify a port? if so we must see if an ep * already has this one bound. */ /* got to be root to get at low ports */ @@ -2895,8 +2894,7 @@ continue_anyway: if (bindall) { /* verify that no lport is not used by a singleton */ if ((port_reuse_active == 0) && - (inp_tmp = sctp_isport_inuse(inp, lport, vrf_id)) - ) { + (inp_tmp = sctp_isport_inuse(inp, lport, vrf_id))) { /* Sorry someone already has this one bound */ if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) && (sctp_is_feature_on(inp_tmp, SCTP_PCB_FLAGS_PORTREUSE))) { @@ -3031,27 +3029,40 @@ continue_anyway: struct sockaddr_storage store_sa; memset(&store_sa, 0, sizeof(store_sa)); - if (addr->sa_family == AF_INET) { - struct sockaddr_in *sin; + switch (addr->sa_family) { +#ifdef INET + case AF_INET: + { + struct sockaddr_in *sin; - sin = (struct sockaddr_in *)&store_sa; - memcpy(sin, addr, sizeof(struct sockaddr_in)); - sin->sin_port = 0; - } else if (addr->sa_family == AF_INET6) { - struct sockaddr_in6 *sin6; + sin = (struct sockaddr_in *)&store_sa; + memcpy(sin, addr, sizeof(struct sockaddr_in)); + sin->sin_port = 0; + break; + } +#endif +#ifdef INET6 + case AF_INET6: + { + struct sockaddr_in6 *sin6; - sin6 = (struct sockaddr_in6 *)&store_sa; - memcpy(sin6, addr, sizeof(struct sockaddr_in6)); - sin6->sin6_port = 0; + sin6 = (struct sockaddr_in6 *)&store_sa; + memcpy(sin6, addr, sizeof(struct sockaddr_in6)); + sin6->sin6_port = 0; + break; + } +#endif + default: + break; } /* * first find the interface with the bound address need to * zero out the port to find the address! yuck! can't do * this earlier since need port for sctp_pcb_findep() */ - if (sctp_ifap != NULL) + if (sctp_ifap != NULL) { ifa = sctp_ifap; - else { + } else { /* * Note for BSD we hit here always other O/S's will * pass things in via the sctp_ifap argument @@ -3067,6 +3078,7 @@ continue_anyway: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRNOTAVAIL); return (EADDRNOTAVAIL); } +#ifdef INET6 if (addr->sa_family == AF_INET6) { /* GAK, more FIXME IFA lock? */ if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { @@ -3077,6 +3089,7 @@ continue_anyway: return (EINVAL); } } +#endif /* we're not bound all */ inp->sctp_flags &= ~SCTP_PCB_FLAGS_BOUNDALL; /* allow bindx() to send ASCONF's for binding changes */ @@ -3104,7 +3117,7 @@ continue_anyway: /* put it in the bucket */ LIST_INSERT_HEAD(head, inp, sctp_hash); SCTPDBG(SCTP_DEBUG_PCB1, "Main hash to bind at head:%p, bound port:%d - in tcp_pool=%d\n", - head, ntohs(lport), port_reuse_active); + (void *)head, ntohs(lport), port_reuse_active); /* set in the port */ inp->sctp_lport = lport; @@ -3155,11 +3168,8 @@ sctp_iterator_inp_being_freed(struct sctp_inpcb *inp) * may be still pending on the list */ SCTP_IPI_ITERATOR_WQ_LOCK(); - it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead); - while (it) { - nit = TAILQ_NEXT(it, sctp_nxt_itr); + TAILQ_FOREACH_SAFE(it, &sctp_it_ctl.iteratorhead, sctp_nxt_itr, nit) { if (it->vn != curvnet) { - it = nit; continue; } if (it->inp == inp) { @@ -3184,7 +3194,6 @@ sctp_iterator_inp_being_freed(struct sctp_inpcb *inp) */ SCTP_INP_DECR_REF(inp); } - it = nit; } SCTP_IPI_ITERATOR_WQ_UNLOCK(); } @@ -3201,17 +3210,14 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) * freeing. a) Any local lists. b) Any associations. c) The hash of * all associations. d) finally the ep itself. */ - struct sctp_pcb *m; struct sctp_tcb *asoc, *nasoc; struct sctp_laddr *laddr, *nladdr; struct inpcb *ip_pcb; struct socket *so; int being_refed = 0; - struct sctp_queued_to_read *sq; - - + struct sctp_queued_to_read *sq, *nsq; int cnt; - sctp_sharedkey_t *shared_key; + sctp_sharedkey_t *shared_key, *nshared_key; #ifdef SCTP_LOG_CLOSING @@ -3254,7 +3260,6 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) sctp_m_freem(inp->pkt); inp->pkt = NULL; } - m = &inp->sctp_ep; ip_pcb = &inp->ip_inp.inp; /* we could just cast the main pointer * here but I will be nice :> (i.e. * ip_pcb = ep;) */ @@ -3262,10 +3267,8 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) int cnt_in_sd; cnt_in_sd = 0; - for ((asoc = LIST_FIRST(&inp->sctp_asoc_list)); asoc != NULL; - asoc = nasoc) { + LIST_FOREACH_SAFE(asoc, &inp->sctp_asoc_list, sctp_tcblist, nasoc) { SCTP_TCB_LOCK(asoc); - nasoc = LIST_NEXT(asoc, sctp_tcblist); if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { /* Skip guys being freed */ cnt_in_sd++; @@ -3304,32 +3307,22 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) if ((asoc->asoc.size_on_reasm_queue > 0) || (asoc->asoc.control_pdapi) || (asoc->asoc.size_on_all_streams > 0) || - (so && (so->so_rcv.sb_cc > 0)) - ) { + (so && (so->so_rcv.sb_cc > 0))) { /* Left with Data unread */ struct mbuf *op_err; - op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)), + op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_DONTWAIT, 1, MT_DATA); if (op_err) { /* Fill in the user initiated abort */ struct sctp_paramhdr *ph; - uint32_t *ippp; - - SCTP_BUF_LEN(op_err) = - sizeof(struct sctp_paramhdr) + sizeof(uint32_t); - ph = mtod(op_err, - struct sctp_paramhdr *); - ph->param_type = htons( - SCTP_CAUSE_USER_INITIATED_ABT); + + SCTP_BUF_LEN(op_err) = sizeof(struct sctp_paramhdr); + ph = mtod(op_err, struct sctp_paramhdr *); + ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT); ph->param_length = htons(SCTP_BUF_LEN(op_err)); - ippp = (uint32_t *) (ph + 1); - *ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_3); } asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_3; -#if defined(SCTP_PANIC_ON_ABORT) - panic("inpcb_free does an abort"); -#endif sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED); SCTP_STAT_INCR_COUNTER32(sctps_aborted); if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) || @@ -3343,26 +3336,33 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) continue; } else if (TAILQ_EMPTY(&asoc->asoc.send_queue) && TAILQ_EMPTY(&asoc->asoc.sent_queue) && - (asoc->asoc.stream_queue_cnt == 0) - ) { + (asoc->asoc.stream_queue_cnt == 0)) { if (asoc->asoc.locked_on_sending) { goto abort_anyway; } if ((SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_SHUTDOWN_SENT) && (SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) { + struct sctp_nets *netp; + /* * there is nothing queued to send, * so I send shutdown */ - sctp_send_shutdown(asoc, asoc->asoc.primary_destination); if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) || (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); } SCTP_SET_STATE(&asoc->asoc, SCTP_STATE_SHUTDOWN_SENT); SCTP_CLEAR_SUBSTATE(&asoc->asoc, SCTP_STATE_SHUTDOWN_PENDING); + sctp_stop_timers_for_shutdown(asoc); + if (asoc->asoc.alternate) { + netp = asoc->asoc.alternate; + } else { + netp = asoc->asoc.primary_destination; + } + sctp_send_shutdown(asoc, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, asoc->sctp_ep, asoc, - asoc->asoc.primary_destination); + netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, asoc->sctp_ep, asoc, asoc->asoc.primary_destination); sctp_chunk_output(inp, asoc, SCTP_OUTPUT_FROM_SHUT_TMR, SCTP_SO_LOCKED); @@ -3379,7 +3379,7 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) sctp_streamhead); if (sp == NULL) { SCTP_PRINTF("Error, sp is NULL, locked on sending is %p strm:%d\n", - asoc->asoc.locked_on_sending, + (void *)asoc->asoc.locked_on_sending, asoc->asoc.locked_on_sending->stream_no); } else { if ((sp->length == 0) && (sp->msg_is_complete == 0)) @@ -3392,7 +3392,7 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) struct mbuf *op_err; abort_anyway: - op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)), + op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_DONTWAIT, 1, MT_DATA); if (op_err) { /* @@ -3400,24 +3400,13 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) * initiated abort */ struct sctp_paramhdr *ph; - uint32_t *ippp; - - SCTP_BUF_LEN(op_err) = - (sizeof(struct sctp_paramhdr) + - sizeof(uint32_t)); - ph = mtod(op_err, - struct sctp_paramhdr *); - ph->param_type = htons( - SCTP_CAUSE_USER_INITIATED_ABT); + + SCTP_BUF_LEN(op_err) = sizeof(struct sctp_paramhdr); + ph = mtod(op_err, struct sctp_paramhdr *); + ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT); ph->param_length = htons(SCTP_BUF_LEN(op_err)); - ippp = (uint32_t *) (ph + 1); - *ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_5); } asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_5; -#if defined(SCTP_PANIC_ON_ABORT) - panic("inpcb_free does an abort"); -#endif - sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED); SCTP_STAT_INCR_COUNTER32(sctps_aborted); if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) || @@ -3464,10 +3453,8 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) * have a contest on the INP lock.. which would cause us to die ... */ cnt = 0; - for ((asoc = LIST_FIRST(&inp->sctp_asoc_list)); asoc != NULL; - asoc = nasoc) { + LIST_FOREACH_SAFE(asoc, &inp->sctp_asoc_list, sctp_tcblist, nasoc) { SCTP_TCB_LOCK(asoc); - nasoc = LIST_NEXT(asoc, sctp_tcblist); if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { if (asoc->asoc.state & SCTP_STATE_IN_ACCEPT_QUEUE) { asoc->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE; @@ -3481,28 +3468,19 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) if ((SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_COOKIE_WAIT) && ((asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0)) { struct mbuf *op_err; - uint32_t *ippp; - op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)), + op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_DONTWAIT, 1, MT_DATA); if (op_err) { /* Fill in the user initiated abort */ struct sctp_paramhdr *ph; - SCTP_BUF_LEN(op_err) = (sizeof(struct sctp_paramhdr) + - sizeof(uint32_t)); + SCTP_BUF_LEN(op_err) = sizeof(struct sctp_paramhdr); ph = mtod(op_err, struct sctp_paramhdr *); - ph->param_type = htons( - SCTP_CAUSE_USER_INITIATED_ABT); + ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT); ph->param_length = htons(SCTP_BUF_LEN(op_err)); - ippp = (uint32_t *) (ph + 1); - *ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_7); - } asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_7; -#if defined(SCTP_PANIC_ON_ABORT) - panic("inpcb_free does an abort"); -#endif sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED); SCTP_STAT_INCR_COUNTER32(sctps_aborted); } else if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { @@ -3584,7 +3562,7 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) inp->sctp_asocidhash = NULL; } /* sa_ignore FREED_MEMORY */ - while ((sq = TAILQ_FIRST(&inp->read_queue)) != NULL) { + TAILQ_FOREACH_SAFE(sq, &inp->read_queue, next, nsq) { /* Its only abandoned if it had data left */ if (sq->length) SCTP_STAT_INCR(sctps_left_abandon); @@ -3610,7 +3588,6 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) * macro here since le_next will get freed as part of the * sctp_free_assoc() call. */ - cnt = 0; if (so) { #ifdef IPSEC ipsec_delete_pcbpolicy(ip_pcb); @@ -3622,10 +3599,6 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) (void)sctp_m_free(ip_pcb->inp_options); ip_pcb->inp_options = 0; } - if (ip_pcb->inp_moptions) { - inp_freemoptions(ip_pcb->inp_moptions); - ip_pcb->inp_moptions = 0; - } #ifdef INET6 if (ip_pcb->inp_vflag & INP_IPV6) { struct in6pcb *in6p; @@ -3641,12 +3614,10 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) if (inp->sctp_ep.local_hmacs != NULL) sctp_free_hmaclist(inp->sctp_ep.local_hmacs); - shared_key = LIST_FIRST(&inp->sctp_ep.shared_keys); - while (shared_key) { + LIST_FOREACH_SAFE(shared_key, &inp->sctp_ep.shared_keys, next, nshared_key) { LIST_REMOVE(shared_key, next); sctp_free_sharedkey(shared_key); /* sa_ignore FREED_MEMORY */ - shared_key = LIST_FIRST(&inp->sctp_ep.shared_keys); } /* @@ -3654,17 +3625,13 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) * ifaddr's that are set into this ep. Again macro limitations here, * since the LIST_FOREACH could be a bad idea. */ - for ((laddr = LIST_FIRST(&inp->sctp_addr_list)); laddr != NULL; - laddr = nladdr) { - nladdr = LIST_NEXT(laddr, sctp_nxt_addr); + LIST_FOREACH_SAFE(laddr, &inp->sctp_addr_list, sctp_nxt_addr, nladdr) { sctp_remove_laddr(laddr); } #ifdef SCTP_TRACK_FREED_ASOCS /* TEMP CODE */ - for ((asoc = LIST_FIRST(&inp->sctp_asoc_free_list)); asoc != NULL; - asoc = nasoc) { - nasoc = LIST_NEXT(asoc, sctp_tcblist); + LIST_FOREACH_SAFE(asoc, &inp->sctp_asoc_free_list, sctp_tcblist, nasoc) { LIST_REMOVE(asoc, sctp_tcblist); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), asoc); SCTP_DECR_ASOC_COUNT(); @@ -3720,7 +3687,7 @@ sctp_is_address_on_local_host(struct sockaddr *addr, uint32_t vrf_id) */ int sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, - int set_scope, int from) + struct sctp_nets **netp, int set_scope, int from) { /* * The following is redundant to the same lines in the @@ -3756,84 +3723,96 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, return (0); } addr_inscope = 1; - if (newaddr->sa_family == AF_INET) { - struct sockaddr_in *sin; + switch (newaddr->sa_family) { +#ifdef INET + case AF_INET: + { + struct sockaddr_in *sin; - sin = (struct sockaddr_in *)newaddr; - if (sin->sin_addr.s_addr == 0) { - /* Invalid address */ - return (-1); - } - /* zero out the bzero area */ - memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); + sin = (struct sockaddr_in *)newaddr; + if (sin->sin_addr.s_addr == 0) { + /* Invalid address */ + return (-1); + } + /* zero out the bzero area */ + memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); - /* assure len is set */ - sin->sin_len = sizeof(struct sockaddr_in); - if (set_scope) { + /* assure len is set */ + sin->sin_len = sizeof(struct sockaddr_in); + if (set_scope) { #ifdef SCTP_DONT_DO_PRIVADDR_SCOPE - stcb->ipv4_local_scope = 1; + stcb->ipv4_local_scope = 1; #else - if (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) { - stcb->asoc.ipv4_local_scope = 1; - } + if (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) { + stcb->asoc.scope.ipv4_local_scope = 1; + } #endif /* SCTP_DONT_DO_PRIVADDR_SCOPE */ - } else { - /* Validate the address is in scope */ - if ((IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) && - (stcb->asoc.ipv4_local_scope == 0)) { - addr_inscope = 0; + } else { + /* Validate the address is in scope */ + if ((IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) && + (stcb->asoc.scope.ipv4_local_scope == 0)) { + addr_inscope = 0; + } } + break; } +#endif #ifdef INET6 - } else if (newaddr->sa_family == AF_INET6) { - struct sockaddr_in6 *sin6; + case AF_INET6: + { + struct sockaddr_in6 *sin6; - sin6 = (struct sockaddr_in6 *)newaddr; - if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { - /* Invalid address */ - return (-1); - } - /* assure len is set */ - sin6->sin6_len = sizeof(struct sockaddr_in6); - if (set_scope) { - if (sctp_is_address_on_local_host(newaddr, stcb->asoc.vrf_id)) { - stcb->asoc.loopback_scope = 1; - stcb->asoc.local_scope = 0; - stcb->asoc.ipv4_local_scope = 1; - stcb->asoc.site_scope = 1; - } else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { - /* - * If the new destination is a LINK_LOCAL we - * must have common site scope. Don't set - * the local scope since we may not share - * all links, only loopback can do this. - * Links on the local network would also be - * on our private network for v4 too. - */ - stcb->asoc.ipv4_local_scope = 1; - stcb->asoc.site_scope = 1; - } else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) { - /* - * If the new destination is SITE_LOCAL then - * we must have site scope in common. - */ - stcb->asoc.site_scope = 1; + sin6 = (struct sockaddr_in6 *)newaddr; + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + /* Invalid address */ + return (-1); } - } else { - /* Validate the address is in scope */ - if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr) && - (stcb->asoc.loopback_scope == 0)) { - addr_inscope = 0; - } else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) && - (stcb->asoc.local_scope == 0)) { - addr_inscope = 0; - } else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) && - (stcb->asoc.site_scope == 0)) { - addr_inscope = 0; + /* assure len is set */ + sin6->sin6_len = sizeof(struct sockaddr_in6); + if (set_scope) { + if (sctp_is_address_on_local_host(newaddr, stcb->asoc.vrf_id)) { + stcb->asoc.scope.loopback_scope = 1; + stcb->asoc.scope.local_scope = 0; + stcb->asoc.scope.ipv4_local_scope = 1; + stcb->asoc.scope.site_scope = 1; + } else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { + /* + * If the new destination is a + * LINK_LOCAL we must have common + * site scope. Don't set the local + * scope since we may not share all + * links, only loopback can do this. + * Links on the local network would + * also be on our private network + * for v4 too. + */ + stcb->asoc.scope.ipv4_local_scope = 1; + stcb->asoc.scope.site_scope = 1; + } else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) { + /* + * If the new destination is + * SITE_LOCAL then we must have site + * scope in common. + */ + stcb->asoc.scope.site_scope = 1; + } + } else { + /* Validate the address is in scope */ + if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr) && + (stcb->asoc.scope.loopback_scope == 0)) { + addr_inscope = 0; + } else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) && + (stcb->asoc.scope.local_scope == 0)) { + addr_inscope = 0; + } else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) && + (stcb->asoc.scope.site_scope == 0)) { + addr_inscope = 0; + } } + break; } #endif - } else { + default: /* not supported family type */ return (-1); } @@ -3842,23 +3821,33 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, return (-1); } SCTP_INCR_RADDR_COUNT(); - bzero(net, sizeof(*net)); + bzero(net, sizeof(struct sctp_nets)); (void)SCTP_GETTIME_TIMEVAL(&net->start_time); memcpy(&net->ro._l_addr, newaddr, newaddr->sa_len); - if (newaddr->sa_family == AF_INET) { + switch (newaddr->sa_family) { +#ifdef INET + case AF_INET: ((struct sockaddr_in *)&net->ro._l_addr)->sin_port = stcb->rport; - } else if (newaddr->sa_family == AF_INET6) { + break; +#endif +#ifdef INET6 + case AF_INET6: ((struct sockaddr_in6 *)&net->ro._l_addr)->sin6_port = stcb->rport; + break; +#endif + default: + break; } net->addr_is_local = sctp_is_address_on_local_host(newaddr, stcb->asoc.vrf_id); if (net->addr_is_local && ((set_scope || (from == SCTP_ADDR_IS_CONFIRMED)))) { - stcb->asoc.loopback_scope = 1; - stcb->asoc.ipv4_local_scope = 1; - stcb->asoc.local_scope = 0; - stcb->asoc.site_scope = 1; + stcb->asoc.scope.loopback_scope = 1; + stcb->asoc.scope.ipv4_local_scope = 1; + stcb->asoc.scope.local_scope = 0; + stcb->asoc.scope.site_scope = 1; addr_inscope = 1; } net->failure_threshold = stcb->asoc.def_net_failure; + net->pf_threshold = stcb->asoc.def_net_pf_threshold; if (addr_inscope == 0) { net->dest_state = (SCTP_ADDR_REACHABLE | SCTP_ADDR_OUT_OF_SCOPE); @@ -3874,28 +3863,32 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, * We set this to 0, the timer code knows that this means its an * initial value */ + net->rto_needed = 1; net->RTO = 0; net->RTO_measured = 0; stcb->asoc.numnets++; - *(&net->ref_count) = 1; - net->tos_flowlabel = 0; - if (SCTP_BASE_SYSCTL(sctp_udp_tunneling_for_client_enable)) { - net->port = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)); - } else { - net->port = 0; - } -#ifdef INET - if (newaddr->sa_family == AF_INET) - net->tos_flowlabel = stcb->asoc.default_tos; -#endif + net->ref_count = 1; + net->cwr_window_tsn = net->last_cwr_tsn = stcb->asoc.sending_seq - 1; + net->port = stcb->asoc.port; + net->dscp = stcb->asoc.default_dscp; #ifdef INET6 - if (newaddr->sa_family == AF_INET6) - net->tos_flowlabel = stcb->asoc.default_flowlabel; + net->flowlabel = stcb->asoc.default_flowlabel; #endif + if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_DONOT_HEARTBEAT)) { + net->dest_state |= SCTP_ADDR_NOHB; + } else { + net->dest_state &= ~SCTP_ADDR_NOHB; + } + if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_DO_NOT_PMTUD)) { + net->dest_state |= SCTP_ADDR_NO_PMTUD; + } else { + net->dest_state &= ~SCTP_ADDR_NO_PMTUD; + } + net->heart_beat_delay = stcb->asoc.heart_beat_delay; /* Init the timer structure */ SCTP_OS_TIMER_INIT(&net->rxt_timer.timer); - SCTP_OS_TIMER_INIT(&net->fr_timer.timer); SCTP_OS_TIMER_INIT(&net->pmtu_timer.timer); + SCTP_OS_TIMER_INIT(&net->hb_timer.timer); /* Now generate a route for this guy */ #ifdef INET6 @@ -3921,13 +3914,8 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, /* Now get the interface MTU */ if (net->ro._s_addr && net->ro._s_addr->ifn_p) { net->mtu = SCTP_GATHER_MTU_FROM_INTFC(net->ro._s_addr->ifn_p); - } else { - net->mtu = 0; } - if (net->mtu == 0) { - /* Huh ?? */ - net->mtu = SCTP_DEFAULT_MTU; - } else { + if (net->mtu > 0) { uint32_t rmtu; rmtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, net->ro.ro_rt); @@ -3947,11 +3935,31 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, net->mtu = rmtu; } } - if (from == SCTP_ALLOC_ASOC) { - stcb->asoc.smallest_mtu = net->mtu; + } + if (net->mtu == 0) { + switch (newaddr->sa_family) { +#ifdef INET + case AF_INET: + net->mtu = SCTP_DEFAULT_MTU; + break; +#endif +#ifdef INET6 + case AF_INET6: + net->mtu = 1280; + break; +#endif + default: + break; } - } else { - net->mtu = stcb->asoc.smallest_mtu; + } + if (net->port) { + net->mtu -= (uint32_t) sizeof(struct udphdr); + } + if (from == SCTP_ALLOC_ASOC) { + stcb->asoc.smallest_mtu = net->mtu; + } + if (stcb->asoc.smallest_mtu > net->mtu) { + stcb->asoc.smallest_mtu = net->mtu; } #ifdef INET6 if (newaddr->sa_family == AF_INET6) { @@ -3961,14 +3969,10 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, (void)sa6_recoverscope(sin6); } #endif - if (net->port) { - net->mtu -= sizeof(struct udphdr); - } - if (stcb->asoc.smallest_mtu > net->mtu) { - stcb->asoc.smallest_mtu = net->mtu; - } + /* JRS - Use the congestion control given in the CC module */ - stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net); + if (stcb->asoc.cc_functions.sctp_set_initial_cc_param != NULL) + (*stcb->asoc.cc_functions.sctp_set_initial_cc_param) (stcb, net); /* * CMT: CUC algo - set find_pseudo_cumack to TRUE (1) at beginning @@ -3977,6 +3981,16 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, net->find_pseudo_cumack = 1; net->find_rtx_pseudo_cumack = 1; net->src_addr_selected = 0; + /* Choose an initial flowid. */ + net->flowid = stcb->asoc.my_vtag ^ + ntohs(stcb->rport) ^ + ntohs(stcb->sctp_ep->sctp_lport); +#ifdef INVARIANTS + net->flowidset = 1; +#endif + if (netp) { + *netp = net; + } netfirst = TAILQ_FIRST(&stcb->asoc.nets); if (net->ro.ro_rt == NULL) { /* Since we have no route put it at the back */ @@ -4034,8 +4048,6 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, /* No route to current primary adopt new primary */ stcb->asoc.primary_destination = net; } - sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, stcb->sctp_ep, stcb, - net); /* Validate primary is first */ net = TAILQ_FIRST(&stcb->asoc.nets); if ((net != stcb->asoc.primary_destination) && @@ -4069,11 +4081,11 @@ try_again: return (0); } /* - * We don't allow assoc id to be 0, this is needed otherwise if the - * id were to wrap we would have issues with some socket options. + * We don't allow assoc id to be one of SCTP_FUTURE_ASSOC, + * SCTP_CURRENT_ASSOC and SCTP_ALL_ASSOC. */ - if (inp->sctp_associd_counter == 0) { - inp->sctp_associd_counter++; + if (inp->sctp_associd_counter <= SCTP_ALL_ASSOC) { + inp->sctp_associd_counter = SCTP_ALL_ASSOC + 1; } id = inp->sctp_associd_counter; inp->sctp_associd_counter++; @@ -4138,42 +4150,81 @@ sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr, *error = EINVAL; return (NULL); } + if ((inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE)) { + if ((inp->sctp_flags & SCTP_PCB_FLAGS_WAS_CONNECTED) || + (inp->sctp_flags & SCTP_PCB_FLAGS_WAS_ABORTED)) { + SCTP_INP_RUNLOCK(inp); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); + *error = EINVAL; + return (NULL); + } + } SCTPDBG(SCTP_DEBUG_PCB3, "Allocate an association for peer:"); #ifdef SCTP_DEBUG if (firstaddr) { SCTPDBG_ADDR(SCTP_DEBUG_PCB3, firstaddr); - SCTPDBG(SCTP_DEBUG_PCB3, "Port:%d\n", - ntohs(((struct sockaddr_in *)firstaddr)->sin_port)); + switch (firstaddr->sa_family) { +#ifdef INET + case AF_INET: + SCTPDBG(SCTP_DEBUG_PCB3, "Port:%d\n", + ntohs(((struct sockaddr_in *)firstaddr)->sin_port)); + break; +#endif +#ifdef INET6 + case AF_INET6: + SCTPDBG(SCTP_DEBUG_PCB3, "Port:%d\n", + ntohs(((struct sockaddr_in6 *)firstaddr)->sin6_port)); + break; +#endif + default: + break; + } } else { SCTPDBG(SCTP_DEBUG_PCB3, "None\n"); } #endif /* SCTP_DEBUG */ - if (firstaddr->sa_family == AF_INET) { - struct sockaddr_in *sin; + switch (firstaddr->sa_family) { +#ifdef INET + case AF_INET: + { + struct sockaddr_in *sin; - sin = (struct sockaddr_in *)firstaddr; - if ((sin->sin_port == 0) || (sin->sin_addr.s_addr == 0)) { - /* Invalid address */ - SCTP_INP_RUNLOCK(inp); - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); - *error = EINVAL; - return (NULL); + sin = (struct sockaddr_in *)firstaddr; + if ((ntohs(sin->sin_port) == 0) || + (sin->sin_addr.s_addr == INADDR_ANY) || + (sin->sin_addr.s_addr == INADDR_BROADCAST) || + IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { + /* Invalid address */ + SCTP_INP_RUNLOCK(inp); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); + *error = EINVAL; + return (NULL); + } + rport = sin->sin_port; + break; } - rport = sin->sin_port; - } else if (firstaddr->sa_family == AF_INET6) { - struct sockaddr_in6 *sin6; +#endif +#ifdef INET6 + case AF_INET6: + { + struct sockaddr_in6 *sin6; - sin6 = (struct sockaddr_in6 *)firstaddr; - if ((sin6->sin6_port == 0) || - (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))) { - /* Invalid address */ - SCTP_INP_RUNLOCK(inp); - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); - *error = EINVAL; - return (NULL); + sin6 = (struct sockaddr_in6 *)firstaddr; + if ((ntohs(sin6->sin6_port) == 0) || + IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || + IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { + /* Invalid address */ + SCTP_INP_RUNLOCK(inp); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); + *error = EINVAL; + return (NULL); + } + rport = sin6->sin6_port; + break; } - rport = sin6->sin6_port; - } else { +#endif + default: /* not supported family type */ SCTP_INP_RUNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); @@ -4249,7 +4300,7 @@ sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr, LIST_INSERT_HEAD(head, stcb, sctp_asocs); SCTP_INP_INFO_WUNLOCK(); - if ((err = sctp_add_remote_addr(stcb, firstaddr, SCTP_DO_SETSCOPE, SCTP_ALLOC_ASOC))) { + if ((err = sctp_add_remote_addr(stcb, firstaddr, NULL, SCTP_DO_SETSCOPE, SCTP_ALLOC_ASOC))) { /* failure.. memory error? */ if (asoc->strmout) { SCTP_FREE(asoc->strmout, SCTP_M_STRMO); @@ -4274,7 +4325,6 @@ sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr, return (NULL); } /* Init all the timers */ - SCTP_OS_TIMER_INIT(&asoc->hb_timer.timer); SCTP_OS_TIMER_INIT(&asoc->dack_timer.timer); SCTP_OS_TIMER_INIT(&asoc->strreset_timer.timer); SCTP_OS_TIMER_INIT(&asoc->asconf_timer.timer); @@ -4291,7 +4341,7 @@ sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr, LIST_INSERT_HEAD(head, stcb, sctp_tcbhash); } SCTP_INP_WUNLOCK(inp); - SCTPDBG(SCTP_DEBUG_PCB1, "Association %p now allocated\n", stcb); + SCTPDBG(SCTP_DEBUG_PCB1, "Association %p now allocated\n", (void *)stcb); return (stcb); } @@ -4344,6 +4394,10 @@ out: /* Clear net */ asoc->last_control_chunk_from = NULL; } + if (net == stcb->asoc.alternate) { + sctp_free_remote_addr(stcb->asoc.alternate); + stcb->asoc.alternate = NULL; + } sctp_free_remote_addr(net); } @@ -4362,13 +4416,12 @@ sctp_del_remote_addr(struct sctp_tcb *stcb, struct sockaddr *remaddr) * no other addresses. */ struct sctp_association *asoc; - struct sctp_nets *net, *net_tmp; + struct sctp_nets *net, *nnet; asoc = &stcb->asoc; /* locate the address */ - for (net = TAILQ_FIRST(&asoc->nets); net != NULL; net = net_tmp) { - net_tmp = TAILQ_NEXT(net, sctp_next); + TAILQ_FOREACH_SAFE(net, &asoc->nets, sctp_next, nnet) { if (net->ro._l_addr.sa.sa_family != remaddr->sa_family) { continue; } @@ -4533,18 +4586,16 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre { int i; struct sctp_association *asoc; - struct sctp_nets *net, *prev; - struct sctp_laddr *laddr; - struct sctp_tmit_chunk *chk; - struct sctp_asconf_addr *aparam; - struct sctp_asconf_ack *aack; - struct sctp_stream_reset_list *liste; - struct sctp_queued_to_read *sq; - struct sctp_stream_queue_pending *sp; - sctp_sharedkey_t *shared_key; + struct sctp_nets *net, *nnet; + struct sctp_laddr *laddr, *naddr; + struct sctp_tmit_chunk *chk, *nchk; + struct sctp_asconf_addr *aparam, *naparam; + struct sctp_asconf_ack *aack, *naack; + struct sctp_stream_reset_list *strrst, *nstrrst; + struct sctp_queued_to_read *sq, *nsq; + struct sctp_stream_queue_pending *sp, *nsp; + sctp_sharedkey_t *shared_key, *nshared_key; struct socket *so; - int ccnt = 0; - int cnt = 0; /* first, lets purge the entry from the hash table. */ @@ -4558,6 +4609,10 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre /* there is no asoc, really TSNH :-0 */ return (1); } + if (stcb->asoc.alternate) { + sctp_free_remote_addr(stcb->asoc.alternate); + stcb->asoc.alternate = NULL; + } /* TEMP CODE */ if (stcb->freed_from_where == 0) { /* Only record the first place free happened from */ @@ -4596,8 +4651,6 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre } } /* now clean up any other timers */ - (void)SCTP_OS_TIMER_STOP(&asoc->hb_timer.timer); - asoc->hb_timer.self = NULL; (void)SCTP_OS_TIMER_STOP(&asoc->dack_timer.timer); asoc->dack_timer.self = NULL; (void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer); @@ -4621,15 +4674,14 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre (void)SCTP_OS_TIMER_STOP(&asoc->delete_prim_timer.timer); asoc->delete_prim_timer.self = NULL; TAILQ_FOREACH(net, &asoc->nets, sctp_next) { - (void)SCTP_OS_TIMER_STOP(&net->fr_timer.timer); - net->fr_timer.self = NULL; (void)SCTP_OS_TIMER_STOP(&net->rxt_timer.timer); net->rxt_timer.self = NULL; (void)SCTP_OS_TIMER_STOP(&net->pmtu_timer.timer); net->pmtu_timer.self = NULL; + (void)SCTP_OS_TIMER_STOP(&net->hb_timer.timer); + net->hb_timer.self = NULL; } /* Now the read queue needs to be cleaned up (only once) */ - cnt = 0; if ((stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0) { stcb->asoc.state |= SCTP_STATE_ABOUT_TO_BE_FREED; SCTP_INP_READ_LOCK(inp); @@ -4645,7 +4697,7 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre /* Held for PD-API clear that. */ sq->pdapi_aborted = 1; sq->held_length = 0; - if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PDAPIEVNT) && (so != NULL)) { + if (sctp_stcb_is_feature_on(inp, stcb, SCTP_PCB_FLAGS_PDAPIEVNT) && (so != NULL)) { /* * Need to add a PD-API * aborted indication. @@ -4668,12 +4720,10 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre } /* Add an end to wake them */ sq->end_added = 1; - cnt++; } } SCTP_INP_READ_UNLOCK(inp); if (stcb->block_entry) { - cnt++; SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_PCB, ECONNRESET); stcb->block_entry->error = ECONNRESET; stcb->block_entry = NULL; @@ -4797,7 +4847,6 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre * Now restop the timers to be sure this is paranoia at is finest! */ (void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer); - (void)SCTP_OS_TIMER_STOP(&asoc->hb_timer.timer); (void)SCTP_OS_TIMER_STOP(&asoc->dack_timer.timer); (void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer); (void)SCTP_OS_TIMER_STOP(&asoc->asconf_timer.timer); @@ -4805,13 +4854,12 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre (void)SCTP_OS_TIMER_STOP(&asoc->autoclose_timer.timer); (void)SCTP_OS_TIMER_STOP(&asoc->delayed_event_timer.timer); TAILQ_FOREACH(net, &asoc->nets, sctp_next) { - (void)SCTP_OS_TIMER_STOP(&net->fr_timer.timer); (void)SCTP_OS_TIMER_STOP(&net->rxt_timer.timer); (void)SCTP_OS_TIMER_STOP(&net->pmtu_timer.timer); + (void)SCTP_OS_TIMER_STOP(&net->hb_timer.timer); } asoc->strreset_timer.type = SCTP_TIMER_TYPE_NONE; - prev = NULL; /* * The chunk lists and such SHOULD be empty but we check them just * in case. @@ -4822,45 +4870,35 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre outs = &asoc->strmout[i]; /* now clean up any chunks here */ - sp = TAILQ_FIRST(&outs->outqueue); - while (sp) { + TAILQ_FOREACH_SAFE(sp, &outs->outqueue, next, nsp) { TAILQ_REMOVE(&outs->outqueue, sp, next); + sctp_free_spbufspace(stcb, asoc, sp); if (sp->data) { if (so) { /* Still an open socket - report */ sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL, stcb, - SCTP_NOTIFY_DATAGRAM_UNSENT, - (void *)sp, SCTP_SO_LOCKED); + 0, (void *)sp, SCTP_SO_LOCKED); } if (sp->data) { sctp_m_freem(sp->data); sp->data = NULL; sp->tail_mbuf = NULL; + sp->length = 0; } } if (sp->net) { sctp_free_remote_addr(sp->net); sp->net = NULL; } - sctp_free_spbufspace(stcb, asoc, sp); - if (sp->holds_key_ref) - sctp_auth_key_release(stcb, sp->auth_keyid); - /* Free the zone stuff */ - SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_strmoq), sp); - SCTP_DECR_STRMOQ_COUNT(); - /* sa_ignore FREED_MEMORY */ - sp = TAILQ_FIRST(&outs->outqueue); + sctp_free_a_strmoq(stcb, sp, SCTP_SO_LOCKED); } } - /* sa_ignore FREED_MEMORY */ - while ((liste = TAILQ_FIRST(&asoc->resetHead)) != NULL) { - TAILQ_REMOVE(&asoc->resetHead, liste, next_resp); - SCTP_FREE(liste, SCTP_M_STRESET); + TAILQ_FOREACH_SAFE(strrst, &asoc->resetHead, next_resp, nstrrst) { + TAILQ_REMOVE(&asoc->resetHead, strrst, next_resp); + SCTP_FREE(strrst, SCTP_M_STRESET); } - - sq = TAILQ_FIRST(&asoc->pending_reply_queue); - while (sq) { + TAILQ_FOREACH_SAFE(sq, &asoc->pending_reply_queue, next, nsq) { TAILQ_REMOVE(&asoc->pending_reply_queue, sq, next); if (sq->data) { sctp_m_freem(sq->data); @@ -4873,168 +4911,131 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), sq); SCTP_DECR_READQ_COUNT(); /* sa_ignore FREED_MEMORY */ - sq = TAILQ_FIRST(&asoc->pending_reply_queue); } - - chk = TAILQ_FIRST(&asoc->free_chunks); - while (chk) { + TAILQ_FOREACH_SAFE(chk, &asoc->free_chunks, sctp_next, nchk) { TAILQ_REMOVE(&asoc->free_chunks, chk, sctp_next); if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } if (chk->holds_key_ref) - sctp_auth_key_release(stcb, chk->auth_keyid); - ccnt++; + sctp_auth_key_release(stcb, chk->auth_keyid, SCTP_SO_LOCKED); + SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); + SCTP_DECR_CHK_COUNT(); + atomic_subtract_int(&SCTP_BASE_INFO(ipi_free_chunks), 1); + asoc->free_chunk_cnt--; + /* sa_ignore FREED_MEMORY */ + } + /* pending send queue SHOULD be empty */ + TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) { + if (asoc->strmout[chk->rec.data.stream_number].chunks_on_queues > 0) { + asoc->strmout[chk->rec.data.stream_number].chunks_on_queues--; +#ifdef INVARIANTS + } else { + panic("No chunks on the queues for sid %u.", chk->rec.data.stream_number); +#endif + } + TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next); + if (chk->data) { + if (so) { + /* Still a socket? */ + sctp_ulp_notify(SCTP_NOTIFY_UNSENT_DG_FAIL, stcb, + 0, chk, SCTP_SO_LOCKED); + } + if (chk->data) { + sctp_m_freem(chk->data); + chk->data = NULL; + } + } + if (chk->holds_key_ref) + sctp_auth_key_release(stcb, chk->auth_keyid, SCTP_SO_LOCKED); + if (chk->whoTo) { + sctp_free_remote_addr(chk->whoTo); + chk->whoTo = NULL; + } SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); SCTP_DECR_CHK_COUNT(); - atomic_subtract_int(&SCTP_BASE_INFO(ipi_free_chunks), 1); - asoc->free_chunk_cnt--; /* sa_ignore FREED_MEMORY */ - chk = TAILQ_FIRST(&asoc->free_chunks); - } - /* pending send queue SHOULD be empty */ - if (!TAILQ_EMPTY(&asoc->send_queue)) { - chk = TAILQ_FIRST(&asoc->send_queue); - while (chk) { - TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next); - if (chk->data) { - if (so) { - /* Still a socket? */ - sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb, - SCTP_NOTIFY_DATAGRAM_UNSENT, chk, SCTP_SO_LOCKED); - } - if (chk->data) { - sctp_m_freem(chk->data); - chk->data = NULL; - } - } - if (chk->holds_key_ref) - sctp_auth_key_release(stcb, chk->auth_keyid); - ccnt++; - if (chk->whoTo) { - sctp_free_remote_addr(chk->whoTo); - chk->whoTo = NULL; - } - SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); - SCTP_DECR_CHK_COUNT(); - /* sa_ignore FREED_MEMORY */ - chk = TAILQ_FIRST(&asoc->send_queue); - } } -/* - if (ccnt) { - printf("Freed %d from send_queue\n", ccnt); - ccnt = 0; - } -*/ /* sent queue SHOULD be empty */ - if (!TAILQ_EMPTY(&asoc->sent_queue)) { - chk = TAILQ_FIRST(&asoc->sent_queue); - while (chk) { - TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next); - if (chk->data) { - if (so) { - /* Still a socket? */ - sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb, - SCTP_NOTIFY_DATAGRAM_SENT, chk, SCTP_SO_LOCKED); - } - if (chk->data) { - sctp_m_freem(chk->data); - chk->data = NULL; - } + TAILQ_FOREACH_SAFE(chk, &asoc->sent_queue, sctp_next, nchk) { + if (chk->sent != SCTP_DATAGRAM_NR_ACKED) { + if (asoc->strmout[chk->rec.data.stream_number].chunks_on_queues > 0) { + asoc->strmout[chk->rec.data.stream_number].chunks_on_queues--; +#ifdef INVARIANTS + } else { + panic("No chunks on the queues for sid %u.", chk->rec.data.stream_number); +#endif } - if (chk->holds_key_ref) - sctp_auth_key_release(stcb, chk->auth_keyid); - ccnt++; - sctp_free_remote_addr(chk->whoTo); - SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); - SCTP_DECR_CHK_COUNT(); - /* sa_ignore FREED_MEMORY */ - chk = TAILQ_FIRST(&asoc->sent_queue); } - } -/* - if (ccnt) { - printf("Freed %d from sent_queue\n", ccnt); - ccnt = 0; - } -*/ - /* control queue MAY not be empty */ - if (!TAILQ_EMPTY(&asoc->control_send_queue)) { - chk = TAILQ_FIRST(&asoc->control_send_queue); - while (chk) { - TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next); + TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next); + if (chk->data) { + if (so) { + /* Still a socket? */ + sctp_ulp_notify(SCTP_NOTIFY_SENT_DG_FAIL, stcb, + 0, chk, SCTP_SO_LOCKED); + } if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } - if (chk->holds_key_ref) - sctp_auth_key_release(stcb, chk->auth_keyid); - ccnt++; - sctp_free_remote_addr(chk->whoTo); - SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); - SCTP_DECR_CHK_COUNT(); - /* sa_ignore FREED_MEMORY */ - chk = TAILQ_FIRST(&asoc->control_send_queue); } + if (chk->holds_key_ref) + sctp_auth_key_release(stcb, chk->auth_keyid, SCTP_SO_LOCKED); + sctp_free_remote_addr(chk->whoTo); + SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); + SCTP_DECR_CHK_COUNT(); + /* sa_ignore FREED_MEMORY */ + } +#ifdef INVARIANTS + for (i = 0; i < stcb->asoc.streamoutcnt; i++) { + if (stcb->asoc.strmout[i].chunks_on_queues > 0) { + panic("%u chunks left for stream %u.", stcb->asoc.strmout[i].chunks_on_queues, i); + } + } +#endif + /* control queue MAY not be empty */ + TAILQ_FOREACH_SAFE(chk, &asoc->control_send_queue, sctp_next, nchk) { + TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next); + if (chk->data) { + sctp_m_freem(chk->data); + chk->data = NULL; + } + if (chk->holds_key_ref) + sctp_auth_key_release(stcb, chk->auth_keyid, SCTP_SO_LOCKED); + sctp_free_remote_addr(chk->whoTo); + SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); + SCTP_DECR_CHK_COUNT(); + /* sa_ignore FREED_MEMORY */ } -/* - if (ccnt) { - printf("Freed %d from ctrl_queue\n", ccnt); - ccnt = 0; - } -*/ - /* ASCONF queue MAY not be empty */ - if (!TAILQ_EMPTY(&asoc->asconf_send_queue)) { - chk = TAILQ_FIRST(&asoc->asconf_send_queue); - while (chk) { - TAILQ_REMOVE(&asoc->asconf_send_queue, chk, sctp_next); - if (chk->data) { - sctp_m_freem(chk->data); - chk->data = NULL; - } - if (chk->holds_key_ref) - sctp_auth_key_release(stcb, chk->auth_keyid); - ccnt++; - sctp_free_remote_addr(chk->whoTo); - SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); - SCTP_DECR_CHK_COUNT(); - /* sa_ignore FREED_MEMORY */ - chk = TAILQ_FIRST(&asoc->asconf_send_queue); + TAILQ_FOREACH_SAFE(chk, &asoc->asconf_send_queue, sctp_next, nchk) { + TAILQ_REMOVE(&asoc->asconf_send_queue, chk, sctp_next); + if (chk->data) { + sctp_m_freem(chk->data); + chk->data = NULL; } + if (chk->holds_key_ref) + sctp_auth_key_release(stcb, chk->auth_keyid, SCTP_SO_LOCKED); + sctp_free_remote_addr(chk->whoTo); + SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); + SCTP_DECR_CHK_COUNT(); + /* sa_ignore FREED_MEMORY */ } -/* - if (ccnt) { - printf("Freed %d from asconf_queue\n", ccnt); - ccnt = 0; - } -*/ - if (!TAILQ_EMPTY(&asoc->reasmqueue)) { - chk = TAILQ_FIRST(&asoc->reasmqueue); - while (chk) { - TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next); - if (chk->data) { - sctp_m_freem(chk->data); - chk->data = NULL; - } - if (chk->holds_key_ref) - sctp_auth_key_release(stcb, chk->auth_keyid); - sctp_free_remote_addr(chk->whoTo); - ccnt++; - SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); - SCTP_DECR_CHK_COUNT(); - /* sa_ignore FREED_MEMORY */ - chk = TAILQ_FIRST(&asoc->reasmqueue); + TAILQ_FOREACH_SAFE(chk, &asoc->reasmqueue, sctp_next, nchk) { + TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next); + if (chk->data) { + sctp_m_freem(chk->data); + chk->data = NULL; } + if (chk->holds_key_ref) + sctp_auth_key_release(stcb, chk->auth_keyid, SCTP_SO_LOCKED); + sctp_free_remote_addr(chk->whoTo); + SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); + SCTP_DECR_CHK_COUNT(); + /* sa_ignore FREED_MEMORY */ } -/* - if (ccnt) { - printf("Freed %d from reasm_queue\n", ccnt); - ccnt = 0; - } -*/ + if (asoc->mapping_array) { SCTP_FREE(asoc->mapping_array, SCTP_M_MAP); asoc->mapping_array = NULL; @@ -5050,66 +5051,50 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre } asoc->strm_realoutsize = asoc->streamoutcnt = 0; if (asoc->strmin) { - struct sctp_queued_to_read *ctl; + struct sctp_queued_to_read *ctl, *nctl; for (i = 0; i < asoc->streamincnt; i++) { - if (!TAILQ_EMPTY(&asoc->strmin[i].inqueue)) { - /* We have somethings on the streamin queue */ - ctl = TAILQ_FIRST(&asoc->strmin[i].inqueue); - while (ctl) { - TAILQ_REMOVE(&asoc->strmin[i].inqueue, - ctl, next); - sctp_free_remote_addr(ctl->whoFrom); - if (ctl->data) { - sctp_m_freem(ctl->data); - ctl->data = NULL; - } - /* - * We don't free the address here - * since all the net's were freed - * above. - */ - SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), ctl); - SCTP_DECR_READQ_COUNT(); - ctl = TAILQ_FIRST(&asoc->strmin[i].inqueue); + TAILQ_FOREACH_SAFE(ctl, &asoc->strmin[i].inqueue, next, nctl) { + TAILQ_REMOVE(&asoc->strmin[i].inqueue, ctl, next); + sctp_free_remote_addr(ctl->whoFrom); + if (ctl->data) { + sctp_m_freem(ctl->data); + ctl->data = NULL; } + /* + * We don't free the address here since all + * the net's were freed above. + */ + SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), ctl); + SCTP_DECR_READQ_COUNT(); } } SCTP_FREE(asoc->strmin, SCTP_M_STRMI); asoc->strmin = NULL; } asoc->streamincnt = 0; - while (!TAILQ_EMPTY(&asoc->nets)) { - /* sa_ignore FREED_MEMORY */ - net = TAILQ_FIRST(&asoc->nets); - /* pull from list */ - if ((SCTP_BASE_INFO(ipi_count_raddr) == 0) || (prev == net)) { + TAILQ_FOREACH_SAFE(net, &asoc->nets, sctp_next, nnet) { #ifdef INVARIANTS + if (SCTP_BASE_INFO(ipi_count_raddr) == 0) { panic("no net's left alloc'ed, or list points to itself"); -#endif - break; } - prev = net; +#endif TAILQ_REMOVE(&asoc->nets, net, sctp_next); sctp_free_remote_addr(net); } - - while (!LIST_EMPTY(&asoc->sctp_restricted_addrs)) { + LIST_FOREACH_SAFE(laddr, &asoc->sctp_restricted_addrs, sctp_nxt_addr, naddr) { /* sa_ignore FREED_MEMORY */ - laddr = LIST_FIRST(&asoc->sctp_restricted_addrs); sctp_remove_laddr(laddr); } /* pending asconf (address) parameters */ - while (!TAILQ_EMPTY(&asoc->asconf_queue)) { + TAILQ_FOREACH_SAFE(aparam, &asoc->asconf_queue, next, naparam) { /* sa_ignore FREED_MEMORY */ - aparam = TAILQ_FIRST(&asoc->asconf_queue); TAILQ_REMOVE(&asoc->asconf_queue, aparam, next); SCTP_FREE(aparam, SCTP_M_ASC_ADDR); } - while (!TAILQ_EMPTY(&asoc->asconf_ack_sent)) { + TAILQ_FOREACH_SAFE(aack, &asoc->asconf_ack_sent, next, naack) { /* sa_ignore FREED_MEMORY */ - aack = TAILQ_FIRST(&asoc->asconf_ack_sent); TAILQ_REMOVE(&asoc->asconf_ack_sent, aack, next); if (aack->data != NULL) { sctp_m_freem(aack->data); @@ -5129,12 +5114,10 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre sctp_free_authinfo(&asoc->authinfo); - shared_key = LIST_FIRST(&asoc->shared_keys); - while (shared_key) { + LIST_FOREACH_SAFE(shared_key, &asoc->shared_keys, next, nshared_key) { LIST_REMOVE(shared_key, next); sctp_free_sharedkey(shared_key); /* sa_ignore FREED_MEMORY */ - shared_key = LIST_FIRST(&asoc->shared_keys); } /* Insert new items here :> */ @@ -5231,13 +5214,21 @@ sctp_destination_is_reachable(struct sctp_tcb *stcb, struct sockaddr *destaddr) return (1); } /* NOTE: all "scope" checks are done when local addresses are added */ - if (destaddr->sa_family == AF_INET6) { + switch (destaddr->sa_family) { +#ifdef INET6 + case AF_INET6: answer = inp->ip_inp.inp.inp_vflag & INP_IPV6; - } else if (destaddr->sa_family == AF_INET) { + break; +#endif +#ifdef INET + case AF_INET: answer = inp->ip_inp.inp.inp_vflag & INP_IPV4; - } else { + break; +#endif + default: /* invalid family, so it's unreachable */ answer = 0; + break; } return (answer); } @@ -5262,10 +5253,19 @@ sctp_update_ep_vflag(struct sctp_inpcb *inp) if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) { continue; } - if (laddr->ifa->address.sa.sa_family == AF_INET6) { + switch (laddr->ifa->address.sa.sa_family) { +#ifdef INET6 + case AF_INET6: inp->ip_inp.inp.inp_vflag |= INP_IPV6; - } else if (laddr->ifa->address.sa.sa_family == AF_INET) { + break; +#endif +#ifdef INET + case AF_INET: inp->ip_inp.inp.inp_vflag |= INP_IPV4; + break; +#endif + default: + break; } } } @@ -5286,12 +5286,14 @@ sctp_add_local_addr_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa, uint32_t ac /* You are already bound to all. You have it already */ return; } +#ifdef INET6 if (ifa->address.sa.sa_family == AF_INET6) { if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { /* Can't bind a non-useable addr. */ return; } } +#endif /* first, is it already present? */ LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { if (laddr->ifa == ifa) { @@ -5307,10 +5309,19 @@ sctp_add_local_addr_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa, uint32_t ac return; inp->laddr_count++; /* update inp_vflag flags */ - if (ifa->address.sa.sa_family == AF_INET6) { + switch (ifa->address.sa.sa_family) { +#ifdef INET6 + case AF_INET6: inp->ip_inp.inp.inp_vflag |= INP_IPV6; - } else if (ifa->address.sa.sa_family == AF_INET) { + break; +#endif +#ifdef INET + case AF_INET: inp->ip_inp.inp.inp_vflag |= INP_IPV4; + break; +#endif + default: + break; } } return; @@ -5428,7 +5439,6 @@ sctp_del_local_addr_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa) void sctp_add_local_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa) { - struct sctp_inpcb *inp; struct sctp_laddr *laddr; struct sctpladdr *list; @@ -5438,13 +5448,14 @@ sctp_add_local_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa) */ list = &stcb->asoc.sctp_restricted_addrs; - inp = stcb->sctp_ep; +#ifdef INET6 if (ifa->address.sa.sa_family == AF_INET6) { if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { /* Can't bind a non-existent addr. */ return; } } +#endif /* does the address already exist? */ LIST_FOREACH(laddr, list, sctp_nxt_addr) { if (laddr->ifa == ifa) { @@ -5546,6 +5557,163 @@ sctp_del_local_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa) static int sctp_max_number_of_assoc = SCTP_MAX_NUM_OF_ASOC; static int sctp_scale_up_for_address = SCTP_SCALE_FOR_ADDR; + + +#if defined(__FreeBSD__) && defined(SCTP_MCORE_INPUT) && defined(SMP) +struct sctp_mcore_ctrl *sctp_mcore_workers = NULL; +int *sctp_cpuarry = NULL; +void +sctp_queue_to_mcore(struct mbuf *m, int off, int cpu_to_use) +{ + /* Queue a packet to a processor for the specified core */ + struct sctp_mcore_queue *qent; + struct sctp_mcore_ctrl *wkq; + int need_wake = 0; + + if (sctp_mcore_workers == NULL) { + /* Something went way bad during setup */ + sctp_input_with_port(m, off, 0); + return; + } + SCTP_MALLOC(qent, struct sctp_mcore_queue *, + (sizeof(struct sctp_mcore_queue)), + SCTP_M_MCORE); + if (qent == NULL) { + /* This is trouble */ + sctp_input_with_port(m, off, 0); + return; + } + qent->vn = curvnet; + qent->m = m; + qent->off = off; + qent->v6 = 0; + wkq = &sctp_mcore_workers[cpu_to_use]; + SCTP_MCORE_QLOCK(wkq); + + TAILQ_INSERT_TAIL(&wkq->que, qent, next); + if (wkq->running == 0) { + need_wake = 1; + } + SCTP_MCORE_QUNLOCK(wkq); + if (need_wake) { + wakeup(&wkq->running); + } +} + +static void +sctp_mcore_thread(void *arg) +{ + + struct sctp_mcore_ctrl *wkq; + struct sctp_mcore_queue *qent; + + wkq = (struct sctp_mcore_ctrl *)arg; + struct mbuf *m; + int off, v6; + + /* Wait for first tickle */ + SCTP_MCORE_LOCK(wkq); + wkq->running = 0; + msleep(&wkq->running, + &wkq->core_mtx, + 0, "wait for pkt", 0); + SCTP_MCORE_UNLOCK(wkq); + + /* Bind to our cpu */ + thread_lock(curthread); + sched_bind(curthread, wkq->cpuid); + thread_unlock(curthread); + + /* Now lets start working */ + SCTP_MCORE_LOCK(wkq); + /* Now grab lock and go */ + for (;;) { + SCTP_MCORE_QLOCK(wkq); +skip_sleep: + wkq->running = 1; + qent = TAILQ_FIRST(&wkq->que); + if (qent) { + TAILQ_REMOVE(&wkq->que, qent, next); + SCTP_MCORE_QUNLOCK(wkq); + CURVNET_SET(qent->vn); + m = qent->m; + off = qent->off; + v6 = qent->v6; + SCTP_FREE(qent, SCTP_M_MCORE); + if (v6 == 0) { + sctp_input_with_port(m, off, 0); + } else { + SCTP_PRINTF("V6 not yet supported\n"); + sctp_m_freem(m); + } + CURVNET_RESTORE(); + SCTP_MCORE_QLOCK(wkq); + } + wkq->running = 0; + if (!TAILQ_EMPTY(&wkq->que)) { + goto skip_sleep; + } + SCTP_MCORE_QUNLOCK(wkq); + msleep(&wkq->running, + &wkq->core_mtx, + 0, "wait for pkt", 0); + } +} + +static void +sctp_startup_mcore_threads(void) +{ + int i, cpu; + + if (mp_ncpus == 1) + return; + + if (sctp_mcore_workers != NULL) { + /* + * Already been here in some previous vnet? + */ + return; + } + SCTP_MALLOC(sctp_mcore_workers, struct sctp_mcore_ctrl *, + ((mp_maxid + 1) * sizeof(struct sctp_mcore_ctrl)), + SCTP_M_MCORE); + if (sctp_mcore_workers == NULL) { + /* TSNH I hope */ + return; + } + memset(sctp_mcore_workers, 0, ((mp_maxid + 1) * + sizeof(struct sctp_mcore_ctrl))); + /* Init the structures */ + for (i = 0; i <= mp_maxid; i++) { + TAILQ_INIT(&sctp_mcore_workers[i].que); + SCTP_MCORE_LOCK_INIT(&sctp_mcore_workers[i]); + SCTP_MCORE_QLOCK_INIT(&sctp_mcore_workers[i]); + sctp_mcore_workers[i].cpuid = i; + } + if (sctp_cpuarry == NULL) { + SCTP_MALLOC(sctp_cpuarry, int *, + (mp_ncpus * sizeof(int)), + SCTP_M_MCORE); + i = 0; + CPU_FOREACH(cpu) { + sctp_cpuarry[i] = cpu; + i++; + } + } + /* Now start them all */ + CPU_FOREACH(cpu) { + (void)kproc_create(sctp_mcore_thread, + (void *)&sctp_mcore_workers[cpu], + &sctp_mcore_workers[cpu].thread_proc, + RFPROC, + SCTP_KTHREAD_PAGES, + SCTP_MCORE_NAME); + + } +} + +#endif + void sctp_pcb_init() { @@ -5564,12 +5732,19 @@ sctp_pcb_init() #if defined(SCTP_LOCAL_TRACE_BUF) bzero(&SCTP_BASE_SYSCTL(sctp_log), sizeof(struct sctp_log)); +#endif +#if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT) + SCTP_MALLOC(SCTP_BASE_STATS, struct sctpstat *, + ((mp_maxid + 1) * sizeof(struct sctpstat)), + SCTP_M_MCORE); #endif (void)SCTP_GETTIME_TIMEVAL(&tv); #if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT) + bzero(SCTP_BASE_STATS, (sizeof(struct sctpstat) * (mp_maxid + 1))); SCTP_BASE_STATS[PCPU_GET(cpuid)].sctps_discontinuitytime.tv_sec = (uint32_t) tv.tv_sec; SCTP_BASE_STATS[PCPU_GET(cpuid)].sctps_discontinuitytime.tv_usec = (uint32_t) tv.tv_usec; #else + bzero(&SCTP_BASE_STATS, sizeof(struct sctpstat)); SCTP_BASE_STAT(sctps_discontinuitytime).tv_sec = (uint32_t) tv.tv_sec; SCTP_BASE_STAT(sctps_discontinuitytime).tv_usec = (uint32_t) tv.tv_usec; #endif @@ -5676,13 +5851,16 @@ sctp_pcb_init() sctp_startup_iterator(); +#if defined(__FreeBSD__) && defined(SCTP_MCORE_INPUT) && defined(SMP) + sctp_startup_mcore_threads(); +#endif + /* * INIT the default VRF which for BSD is the only one, other O/S's * may have more. But initially they must start with one and then * add the VRF's as addresses are added. */ sctp_init_vrf_list(SCTP_DEFAULT_VRF); - } /* @@ -5692,12 +5870,12 @@ void sctp_pcb_finish(void) { struct sctp_vrflist *vrf_bucket; - struct sctp_vrf *vrf; - struct sctp_ifn *ifn; - struct sctp_ifa *ifa; + struct sctp_vrf *vrf, *nvrf; + struct sctp_ifn *ifn, *nifn; + struct sctp_ifa *ifa, *nifa; struct sctpvtaghead *chain; struct sctp_tagblock *twait_block, *prev_twait_block; - struct sctp_laddr *wi; + struct sctp_laddr *wi, *nwi; int i; /* @@ -5709,20 +5887,15 @@ sctp_pcb_finish(void) struct sctp_iterator *it, *nit; SCTP_IPI_ITERATOR_WQ_LOCK(); - it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead); - while (it) { - nit = TAILQ_NEXT(it, sctp_nxt_itr); + TAILQ_FOREACH_SAFE(it, &sctp_it_ctl.iteratorhead, sctp_nxt_itr, nit) { if (it->vn != curvnet) { - it = nit; continue; } - TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, - it, sctp_nxt_itr); + TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr); if (it->function_atend != NULL) { (*it->function_atend) (it->pointer, it->val); } SCTP_FREE(it, SCTP_M_ITER); - it = nit; } SCTP_IPI_ITERATOR_WQ_UNLOCK(); SCTP_ITERATOR_LOCK(); @@ -5735,7 +5908,7 @@ sctp_pcb_finish(void) SCTP_OS_TIMER_STOP(&SCTP_BASE_INFO(addr_wq_timer.timer)); SCTP_WQ_ADDR_LOCK(); - while ((wi = LIST_FIRST(&SCTP_BASE_INFO(addr_wq))) != NULL) { + LIST_FOREACH_SAFE(wi, &SCTP_BASE_INFO(addr_wq), sctp_nxt_addr, nwi) { LIST_REMOVE(wi, sctp_nxt_addr); SCTP_DECR_LADDR_COUNT(); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_laddr), wi); @@ -5747,9 +5920,9 @@ sctp_pcb_finish(void) * destroyed first). */ vrf_bucket = &SCTP_BASE_INFO(sctp_vrfhash)[(SCTP_DEFAULT_VRFID & SCTP_BASE_INFO(hashvrfmark))]; - while ((vrf = LIST_FIRST(vrf_bucket)) != NULL) { - while ((ifn = LIST_FIRST(&vrf->ifnlist)) != NULL) { - while ((ifa = LIST_FIRST(&ifn->ifalist)) != NULL) { + LIST_FOREACH_SAFE(vrf, vrf_bucket, next_vrf, nvrf) { + LIST_FOREACH_SAFE(ifn, &vrf->ifnlist, next_ifn, nifn) { + LIST_FOREACH_SAFE(ifa, &ifn->ifalist, next_ifa, nifa) { /* free the ifa */ LIST_REMOVE(ifa, next_bucket); LIST_REMOVE(ifa, next_ifa); @@ -5813,13 +5986,16 @@ sctp_pcb_finish(void) SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_ephash), SCTP_BASE_INFO(hashmark)); if (SCTP_BASE_INFO(sctp_tcpephash) != NULL) SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_tcpephash), SCTP_BASE_INFO(hashtcpmark)); - +#if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT) + SCTP_FREE(SCTP_BASE_STATS, SCTP_M_MCORE); +#endif } int sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m, - int iphlen, int offset, int limit, struct sctphdr *sh, + int offset, int limit, + struct sockaddr *src, struct sockaddr *dst, struct sockaddr *altsa) { /* @@ -5829,17 +6005,12 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m, * with either INIT or INIT-ACK's as long as the m points to the IP * packet and the offset points to the beginning of the parameters. */ - struct sctp_inpcb *inp, *l_inp; - struct sctp_nets *net, *net_tmp; - struct ip *iph; + struct sctp_inpcb *inp; + struct sctp_nets *net, *nnet, *net_tmp; struct sctp_paramhdr *phdr, parm_buf; struct sctp_tcb *stcb_tmp; uint16_t ptype, plen; struct sockaddr *sa; - struct sockaddr_storage dest_store; - struct sockaddr *local_sa = (struct sockaddr *)&dest_store; - struct sockaddr_in sin; - struct sockaddr_in6 sin6; uint8_t random_store[SCTP_PARAM_BUFFER_SIZE]; struct sctp_auth_random *p_random = NULL; uint16_t random_len = 0; @@ -5854,91 +6025,71 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m, sctp_key_t *new_key; uint32_t keylen; int got_random = 0, got_hmacs = 0, got_chklist = 0; + uint8_t ecn_allowed; + +#ifdef INET + struct sockaddr_in sin; + +#endif +#ifdef INET6 + struct sockaddr_in6 sin6; + +#endif /* First get the destination address setup too. */ +#ifdef INET memset(&sin, 0, sizeof(sin)); - memset(&sin6, 0, sizeof(sin6)); - sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); sin.sin_port = stcb->rport; - +#endif +#ifdef INET6 + memset(&sin6, 0, sizeof(sin6)); sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_port = stcb->rport; - if (altsa == NULL) { - iph = mtod(m, struct ip *); - switch (iph->ip_v) { - case IPVERSION: - { - /* its IPv4 */ - struct sockaddr_in *sin_2; - - sin_2 = (struct sockaddr_in *)(local_sa); - memset(sin_2, 0, sizeof(sin)); - sin_2->sin_family = AF_INET; - sin_2->sin_len = sizeof(sin); - sin_2->sin_port = sh->dest_port; - sin_2->sin_addr.s_addr = iph->ip_dst.s_addr; - sin.sin_addr = iph->ip_src; - sa = (struct sockaddr *)&sin; - break; - } -#ifdef INET6 - case IPV6_VERSION >> 4: - { - /* its IPv6 */ - struct ip6_hdr *ip6; - struct sockaddr_in6 *sin6_2; - - ip6 = mtod(m, struct ip6_hdr *); - sin6_2 = (struct sockaddr_in6 *)(local_sa); - memset(sin6_2, 0, sizeof(sin6)); - sin6_2->sin6_family = AF_INET6; - sin6_2->sin6_len = sizeof(struct sockaddr_in6); - sin6_2->sin6_port = sh->dest_port; - sin6.sin6_addr = ip6->ip6_src; - sa = (struct sockaddr *)&sin6; - break; - } #endif - default: - return (-1); - break; - } - } else { - /* - * For cookies we use the src address NOT from the packet - * but from the original INIT - */ + if (altsa) { sa = altsa; + } else { + sa = src; } /* Turn off ECN until we get through all params */ - stcb->asoc.ecn_allowed = 0; + ecn_allowed = 0; TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { /* mark all addresses that we have currently on the list */ net->dest_state |= SCTP_ADDR_NOT_IN_ASSOC; } /* does the source address already exist? if so skip it */ - l_inp = inp = stcb->sctp_ep; - + inp = stcb->sctp_ep; atomic_add_int(&stcb->asoc.refcnt, 1); - stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net_tmp, local_sa, stcb); + stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net_tmp, dst, stcb); atomic_add_int(&stcb->asoc.refcnt, -1); if ((stcb_tmp == NULL && inp == stcb->sctp_ep) || inp == NULL) { /* we must add the source address */ /* no scope set here since we have a tcb already. */ - if ((sa->sa_family == AF_INET) && - (stcb->asoc.ipv4_addr_legal)) { - if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_2)) { - return (-1); + switch (sa->sa_family) { +#ifdef INET + case AF_INET: + if (stcb->asoc.scope.ipv4_addr_legal) { + if (sctp_add_remote_addr(stcb, sa, NULL, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_2)) { + return (-1); + } } - } else if ((sa->sa_family == AF_INET6) && - (stcb->asoc.ipv6_addr_legal)) { - if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_3)) { - return (-2); + break; +#endif +#ifdef INET6 + case AF_INET6: + if (stcb->asoc.scope.ipv6_addr_legal) { + if (sctp_add_remote_addr(stcb, sa, NULL, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_3)) { + return (-2); + } } + break; +#endif + default: + break; } } else { if (net_tmp != NULL && stcb_tmp == stcb) { @@ -5966,8 +6117,8 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m, ptype = ntohs(phdr->param_type); plen = ntohs(phdr->param_length); /* - * printf("ptype => %0x, plen => %d\n", (uint32_t)ptype, - * (int)plen); + * SCTP_PRINTF("ptype => %0x, plen => %d\n", + * (uint32_t)ptype, (int)plen); */ if (offset + plen > limit) { break; @@ -5975,8 +6126,9 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m, if (plen == 0) { break; } +#ifdef INET if (ptype == SCTP_IPV4_ADDRESS) { - if (stcb->asoc.ipv4_addr_legal) { + if (stcb->asoc.scope.ipv4_addr_legal) { struct sctp_ipv4addr_param *p4, p4_buf; /* ok get the v4 address and check/add */ @@ -6001,7 +6153,7 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m, inp = stcb->sctp_ep; atomic_add_int(&stcb->asoc.refcnt, 1); stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net, - local_sa, stcb); + dst, stcb); atomic_add_int(&stcb->asoc.refcnt, -1); if ((stcb_tmp == NULL && inp == stcb->sctp_ep) || @@ -6021,7 +6173,7 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m, /* the assoc was freed? */ return (-7); } - if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_4)) { + if (sctp_add_remote_addr(stcb, sa, NULL, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_4)) { return (-8); } } else if (stcb_tmp == stcb) { @@ -6046,7 +6198,7 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m, * abort this guy */ sctp_abort_an_association(stcb_tmp->sctp_ep, - stcb_tmp, 1, NULL, 0); + stcb_tmp, NULL, SCTP_SO_NOT_LOCKED); goto add_it_now; } SCTP_TCB_UNLOCK(stcb_tmp); @@ -6058,8 +6210,11 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m, return (-13); } } - } else if (ptype == SCTP_IPV6_ADDRESS) { - if (stcb->asoc.ipv6_addr_legal) { + } else +#endif +#ifdef INET6 + if (ptype == SCTP_IPV6_ADDRESS) { + if (stcb->asoc.scope.ipv6_addr_legal) { /* ok get the v6 address and check/add */ struct sctp_ipv6addr_param *p6, p6_buf; @@ -6088,7 +6243,7 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m, inp = stcb->sctp_ep; atomic_add_int(&stcb->asoc.refcnt, 1); stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net, - local_sa, stcb); + dst, stcb); atomic_add_int(&stcb->asoc.refcnt, -1); if (stcb_tmp == NULL && (inp == stcb->sctp_ep || inp == NULL)) { @@ -6105,7 +6260,7 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m, * we must add the address, no scope * set */ - if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_5)) { + if (sctp_add_remote_addr(stcb, sa, NULL, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_5)) { return (-17); } } else if (stcb_tmp == stcb) { @@ -6134,7 +6289,7 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m, * abort this guy */ sctp_abort_an_association(stcb_tmp->sctp_ep, - stcb_tmp, 1, NULL, 0); + stcb_tmp, NULL, SCTP_SO_NOT_LOCKED); goto add_it_now6; } SCTP_TCB_UNLOCK(stcb_tmp); @@ -6146,8 +6301,10 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m, return (-22); } } - } else if (ptype == SCTP_ECN_CAPABLE) { - stcb->asoc.ecn_allowed = 1; + } else +#endif + if (ptype == SCTP_ECN_CAPABLE) { + ecn_allowed = 1; } else if (ptype == SCTP_ULP_ADAPTATION) { if (stcb->asoc.state != SCTP_STATE_OPEN) { struct sctp_adaptation_layer_indication ai, @@ -6163,10 +6320,14 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m, } } else if (ptype == SCTP_SET_PRIM_ADDR) { struct sctp_asconf_addr_param lstore, *fee; - struct sctp_asconf_addrv4_param *fii; int lptype; struct sockaddr *lsa = NULL; +#ifdef INET + struct sctp_asconf_addrv4_param *fii; + +#endif + stcb->asoc.peer_supports_asconf = 1; if (plen > sizeof(lstore)) { return (-23); @@ -6179,7 +6340,9 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m, } fee = (struct sctp_asconf_addr_param *)phdr; lptype = ntohs(fee->addrp.ph.param_type); - if (lptype == SCTP_IPV4_ADDRESS) { + switch (lptype) { +#ifdef INET + case SCTP_IPV4_ADDRESS: if (plen != sizeof(struct sctp_asconf_addrv4_param)) { SCTP_PRINTF("Sizeof setprim in init/init ack not %d but %d - ignored\n", @@ -6190,7 +6353,10 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m, sin.sin_addr.s_addr = fii->addrp.addr; lsa = (struct sockaddr *)&sin; } - } else if (lptype == SCTP_IPV6_ADDRESS) { + break; +#endif +#ifdef INET6 + case SCTP_IPV6_ADDRESS: if (plen != sizeof(struct sctp_asconf_addr_param)) { SCTP_PRINTF("Sizeof setprim (v6) in init/init ack not %d but %d - ignored\n", @@ -6202,6 +6368,10 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m, sizeof(fee->addrp.addr)); lsa = (struct sockaddr *)&sin6; } + break; +#endif + default: + break; } if (lsa) { (void)sctp_set_primary_addr(stcb, sa, NULL); @@ -6257,10 +6427,6 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m, } } - } else if (ptype == SCTP_ECN_NONCE_SUPPORTED) { - /* Peer supports ECN-nonce */ - stcb->asoc.peer_supports_ecn_nonce = 1; - stcb->asoc.ecn_nonce_allowed = 1; } else if (ptype == SCTP_RANDOM) { if (plen > sizeof(random_store)) break; @@ -6376,8 +6542,7 @@ next_param: sizeof(parm_buf)); } /* Now check to see if we need to purge any addresses */ - for (net = TAILQ_FIRST(&stcb->asoc.nets); net != NULL; net = net_tmp) { - net_tmp = TAILQ_NEXT(net, sctp_next); + TAILQ_FOREACH_SAFE(net, &stcb->asoc.nets, sctp_next, nnet) { if ((net->dest_state & SCTP_ADDR_NOT_IN_ASSOC) == SCTP_ADDR_NOT_IN_ASSOC) { /* This address has been removed from the asoc */ @@ -6391,6 +6556,9 @@ next_param: } } } + if (ecn_allowed == 0) { + stcb->asoc.ecn_allowed = 0; + } /* validate authentication required parameters */ if (got_random && got_hmacs) { stcb->asoc.peer_supports_auth = 1; @@ -6464,7 +6632,10 @@ sctp_set_primary_addr(struct sctp_tcb *stcb, struct sockaddr *sa, return (0); } stcb->asoc.primary_destination = net; - net->dest_state &= ~SCTP_ADDR_WAS_PRIMARY; + if (!(net->dest_state & SCTP_ADDR_PF) && (stcb->asoc.alternate)) { + sctp_free_remote_addr(stcb->asoc.alternate); + stcb->asoc.alternate = NULL; + } net = TAILQ_FIRST(&stcb->asoc.nets); if (net != stcb->asoc.primary_destination) { /* @@ -6480,7 +6651,7 @@ sctp_set_primary_addr(struct sctp_tcb *stcb, struct sockaddr *sa, } int -sctp_is_vtag_good(struct sctp_inpcb *inp, uint32_t tag, uint16_t lport, uint16_t rport, struct timeval *now, int save_in_twait) +sctp_is_vtag_good(uint32_t tag, uint16_t lport, uint16_t rport, struct timeval *now) { /* * This function serves two purposes. It will see if a TAG can be @@ -6559,13 +6730,8 @@ skip_vtag_check: return (1); } - -static sctp_assoc_t reneged_asoc_ids[256]; -static uint8_t reneged_at = 0; - - static void -sctp_drain_mbufs(struct sctp_inpcb *inp, struct sctp_tcb *stcb) +sctp_drain_mbufs(struct sctp_tcb *stcb) { /* * We must hunt this association for MBUF's past the cumack (i.e. @@ -6590,12 +6756,8 @@ sctp_drain_mbufs(struct sctp_inpcb *inp, struct sctp_tcb *stcb) cumulative_tsn_p1 = asoc->cumulative_tsn + 1; cnt = 0; /* First look in the re-assembly queue */ - chk = TAILQ_FIRST(&asoc->reasmqueue); - while (chk) { - /* Get the next one */ - nchk = TAILQ_NEXT(chk, sctp_next); - if (compare_with_wrap(chk->rec.data.TSN_seq, - cumulative_tsn_p1, MAX_TSN)) { + TAILQ_FOREACH_SAFE(chk, &asoc->reasmqueue, sctp_next, nchk) { + if (SCTP_TSN_GT(chk->rec.data.TSN_seq, cumulative_tsn_p1)) { /* Yep it is above cum-ack */ cnt++; SCTP_CALC_TSN_TO_GAP(gap, chk->rec.data.TSN_seq, asoc->mapping_array_base_tsn); @@ -6607,17 +6769,13 @@ sctp_drain_mbufs(struct sctp_inpcb *inp, struct sctp_tcb *stcb) sctp_m_freem(chk->data); chk->data = NULL; } - sctp_free_a_chunk(stcb, chk); + sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); } - chk = nchk; } /* Ok that was fun, now we will drain all the inbound streams? */ for (strmat = 0; strmat < asoc->streamincnt; strmat++) { - ctl = TAILQ_FIRST(&asoc->strmin[strmat].inqueue); - while (ctl) { - nctl = TAILQ_NEXT(ctl, next); - if (compare_with_wrap(ctl->sinfo_tsn, - cumulative_tsn_p1, MAX_TSN)) { + TAILQ_FOREACH_SAFE(ctl, &asoc->strmin[strmat].inqueue, next, nctl) { + if (SCTP_TSN_GT(ctl->sinfo_tsn, cumulative_tsn_p1)) { /* Yep it is above cum-ack */ cnt++; SCTP_CALC_TSN_TO_GAP(gap, ctl->sinfo_tsn, asoc->mapping_array_base_tsn); @@ -6633,14 +6791,11 @@ sctp_drain_mbufs(struct sctp_inpcb *inp, struct sctp_tcb *stcb) SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), ctl); SCTP_DECR_READQ_COUNT(); } - ctl = nctl; } } if (cnt) { /* We must back down to see what the new highest is */ - for (i = asoc->highest_tsn_inside_map; - (compare_with_wrap(i, asoc->mapping_array_base_tsn, MAX_TSN) || (i == asoc->mapping_array_base_tsn)); - i--) { + for (i = asoc->highest_tsn_inside_map; SCTP_TSN_GE(i, asoc->mapping_array_base_tsn); i--) { SCTP_CALC_TSN_TO_GAP(gap, i, asoc->mapping_array_base_tsn); if (SCTP_IS_TSN_PRESENT(asoc->mapping_array, gap)) { asoc->highest_tsn_inside_map = i; @@ -6677,10 +6832,8 @@ sctp_drain_mbufs(struct sctp_inpcb *inp, struct sctp_tcb *stcb) asoc->last_revoke_count = cnt; (void)SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer); /* sa_ignore NO_NULL_CHK */ - sctp_send_sack(stcb); + sctp_send_sack(stcb, SCTP_SO_NOT_LOCKED); sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_DRAIN, SCTP_SO_NOT_LOCKED); - reneged_asoc_ids[reneged_at] = sctp_get_associd(stcb); - reneged_at++; } /* * Another issue, in un-setting the TSN's in the mapping array we @@ -6723,7 +6876,7 @@ sctp_drain() LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { /* For each association */ SCTP_TCB_LOCK(stcb); - sctp_drain_mbufs(inp, stcb); + sctp_drain_mbufs(stcb); SCTP_TCB_UNLOCK(stcb); } SCTP_INP_RUNLOCK(inp); diff --git a/freebsd/sys/netinet/sctp_pcb.h b/freebsd/sys/netinet/sctp_pcb.h index 23ebc4f1..91807c7c 100644 --- a/freebsd/sys/netinet/sctp_pcb.h +++ b/freebsd/sys/netinet/sctp_pcb.h @@ -1,15 +1,17 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -28,13 +30,11 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp_pcb.h,v 1.21 2005/07/16 01:18:47 suz Exp $ */ - #include __FBSDID("$FreeBSD$"); -#ifndef __sctp_pcb_h__ -#define __sctp_pcb_h__ +#ifndef _NETINET_SCTP_PCB_H_ +#define _NETINET_SCTP_PCB_H_ #include #include @@ -144,7 +144,12 @@ struct sctp_tagblock { struct sctp_epinfo { - struct socket *udp_tun_socket; +#ifdef INET + struct socket *udp4_tun_socket; +#endif +#ifdef INET6 + struct socket *udp6_tun_socket; +#endif struct sctpasochead *sctp_asochash; u_long hashasocmark; @@ -240,7 +245,7 @@ struct sctp_base_info { */ struct sctp_epinfo sctppcbinfo; #if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT) - struct sctpstat sctpstat[MAXCPU]; + struct sctpstat *sctpstat; #else struct sctpstat sctpstat; #endif @@ -276,6 +281,7 @@ struct sctp_pcb { uint32_t sctp_sws_receiver; uint32_t sctp_default_cc_module; + uint32_t sctp_default_ss_module; /* authentication related fields */ struct sctp_keyhead shared_keys; sctp_auth_chklist_t *local_auth_chunks; @@ -291,6 +297,8 @@ struct sctp_pcb { uint16_t def_net_failure; + uint16_t def_net_pf_threshold; + /* number of streams to pre-open on a association */ uint16_t pre_open_stream_count; uint16_t max_open_streams_intome; @@ -315,10 +323,17 @@ struct sctp_pcb { int auto_close_time; uint32_t initial_sequence_debug; uint32_t adaptation_layer_indicator; + uint8_t adaptation_layer_indicator_provided; uint32_t store_at; - uint8_t max_burst; + uint32_t max_burst; + uint32_t fr_max_burst; +#ifdef INET6 + uint32_t default_flowlabel; +#endif + uint8_t default_dscp; char current_secret_number; char last_secret_number; + uint16_t port; /* remote UDP encapsulation port */ }; #ifndef SCTP_ALIGNMENT @@ -389,7 +404,9 @@ struct sctp_inpcb { uint32_t sctp_frag_point; uint32_t partial_delivery_point; uint32_t sctp_context; + uint8_t local_strreset_support; uint32_t sctp_cmt_on_off; + uint32_t sctp_ecn_enable; struct sctp_nonpad_sndrcvinfo def_send; /*- * These three are here for the sosend_dgram @@ -516,7 +533,8 @@ sctp_inpcb_bind(struct socket *, struct sockaddr *, struct sctp_ifa *, struct thread *); struct sctp_tcb * -sctp_findassociation_addr(struct mbuf *, int, int, +sctp_findassociation_addr(struct mbuf *, int, + struct sockaddr *, struct sockaddr *, struct sctphdr *, struct sctp_chunkhdr *, struct sctp_inpcb **, struct sctp_nets **, uint32_t vrf_id); @@ -547,7 +565,7 @@ sctp_findassociation_ep_asocid(struct sctp_inpcb *, sctp_assoc_t, int); struct sctp_tcb * -sctp_findassociation_ep_asconf(struct mbuf *, int, int, +sctp_findassociation_ep_asconf(struct mbuf *, int, struct sockaddr *, struct sctphdr *, struct sctp_inpcb **, struct sctp_nets **, uint32_t vrf_id); int sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id); @@ -578,7 +596,7 @@ void sctp_remove_laddr(struct sctp_laddr *); void sctp_del_local_addr_ep(struct sctp_inpcb *, struct sctp_ifa *); -int sctp_add_remote_addr(struct sctp_tcb *, struct sockaddr *, int, int); +int sctp_add_remote_addr(struct sctp_tcb *, struct sockaddr *, struct sctp_nets **, int, int); void sctp_remove_net(struct sctp_tcb *, struct sctp_nets *); @@ -593,13 +611,13 @@ void sctp_del_local_addr_restricted(struct sctp_tcb *, struct sctp_ifa *); int sctp_load_addresses_from_init(struct sctp_tcb *, struct mbuf *, int, int, - int, struct sctphdr *, struct sockaddr *); + struct sockaddr *, struct sockaddr *, struct sockaddr *); int sctp_set_primary_addr(struct sctp_tcb *, struct sockaddr *, struct sctp_nets *); -int sctp_is_vtag_good(struct sctp_inpcb *, uint32_t, uint16_t lport, uint16_t rport, struct timeval *, int); +int sctp_is_vtag_good(uint32_t, uint16_t lport, uint16_t rport, struct timeval *); /* void sctp_drain(void); */ @@ -622,6 +640,12 @@ sctp_initiate_iterator(inp_func inpf, struct sctp_inpcb *, uint8_t co_off); +#if defined(__FreeBSD__) && defined(SCTP_MCORE_INPUT) && defined(SMP) +void + sctp_queue_to_mcore(struct mbuf *m, int off, int cpu_to_use); + +#endif + #ifdef INVARIANTS void sctp_validate_no_locks(struct sctp_inpcb *inp); diff --git a/freebsd/sys/netinet/sctp_peeloff.c b/freebsd/sys/netinet/sctp_peeloff.c index bc5f8a4f..13b55be4 100644 --- a/freebsd/sys/netinet/sctp_peeloff.c +++ b/freebsd/sys/netinet/sctp_peeloff.c @@ -2,16 +2,18 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -30,11 +32,9 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ - -/* $KAME: sctp_peeloff.c,v 1.13 2005/03/06 16:04:18 itojun Exp $ */ - #include __FBSDID("$FreeBSD$"); + #include #include #include @@ -55,11 +55,20 @@ sctp_can_peel_off(struct socket *head, sctp_assoc_t assoc_id) struct sctp_tcb *stcb; uint32_t state; + if (head == NULL) { + SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, EBADF); + return (EBADF); + } inp = (struct sctp_inpcb *)head->so_pcb; if (inp == NULL) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, EFAULT); + SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, EFAULT); return (EFAULT); } + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, EOPNOTSUPP); + return (EOPNOTSUPP); + } stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1); if (stcb == NULL) { SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_PEELOFF, ENOENT); @@ -67,9 +76,7 @@ sctp_can_peel_off(struct socket *head, sctp_assoc_t assoc_id) } state = SCTP_GET_STATE((&stcb->asoc)); if ((state == SCTP_STATE_EMPTY) || - (state == SCTP_STATE_INUSE) || - (state == SCTP_STATE_COOKIE_WAIT) || - (state == SCTP_STATE_COOKIE_ECHOED)) { + (state == SCTP_STATE_INUSE)) { SCTP_TCB_UNLOCK(stcb); SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN); return (ENOTCONN); @@ -98,9 +105,7 @@ sctp_do_peeloff(struct socket *head, struct socket *so, sctp_assoc_t assoc_id) } state = SCTP_GET_STATE((&stcb->asoc)); if ((state == SCTP_STATE_EMPTY) || - (state == SCTP_STATE_INUSE) || - (state == SCTP_STATE_COOKIE_WAIT) || - (state == SCTP_STATE_COOKIE_ECHOED)) { + (state == SCTP_STATE_INUSE)) { SCTP_TCB_UNLOCK(stcb); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN); return (ENOTCONN); @@ -115,8 +120,10 @@ sctp_do_peeloff(struct socket *head, struct socket *so, sctp_assoc_t assoc_id) n_inp->sctp_mobility_features = inp->sctp_mobility_features; n_inp->sctp_frag_point = inp->sctp_frag_point; n_inp->sctp_cmt_on_off = inp->sctp_cmt_on_off; + n_inp->sctp_ecn_enable = inp->sctp_ecn_enable; n_inp->partial_delivery_point = inp->partial_delivery_point; n_inp->sctp_context = inp->sctp_context; + n_inp->local_strreset_support = inp->local_strreset_support; n_inp->inp_starting_point_for_iterator = NULL; /* copy in the authentication parameters from the original endpoint */ if (n_inp->sctp_ep.local_hmacs) @@ -142,99 +149,3 @@ sctp_do_peeloff(struct socket *head, struct socket *so, sctp_assoc_t assoc_id) return (0); } - - -struct socket * -sctp_get_peeloff(struct socket *head, sctp_assoc_t assoc_id, int *error) -{ - struct socket *newso; - struct sctp_inpcb *inp, *n_inp; - struct sctp_tcb *stcb; - - SCTPDBG(SCTP_DEBUG_PEEL1, "SCTP peel-off called\n"); - inp = (struct sctp_inpcb *)head->so_pcb; - if (inp == NULL) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, EFAULT); - *error = EFAULT; - return (NULL); - } - stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1); - if (stcb == NULL) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN); - *error = ENOTCONN; - return (NULL); - } - atomic_add_int(&stcb->asoc.refcnt, 1); - SCTP_TCB_UNLOCK(stcb); - newso = sonewconn(head, SS_ISCONNECTED - ); - if (newso == NULL) { - SCTPDBG(SCTP_DEBUG_PEEL1, "sctp_peeloff:sonewconn failed\n"); - SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_PEELOFF, ENOMEM); - *error = ENOMEM; - atomic_subtract_int(&stcb->asoc.refcnt, 1); - return (NULL); - - } - SCTP_TCB_LOCK(stcb); - atomic_subtract_int(&stcb->asoc.refcnt, 1); - n_inp = (struct sctp_inpcb *)newso->so_pcb; - SOCK_LOCK(head); - n_inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE | - SCTP_PCB_FLAGS_CONNECTED | - SCTP_PCB_FLAGS_IN_TCPPOOL | /* Turn on Blocking IO */ - (SCTP_PCB_COPY_FLAGS & inp->sctp_flags)); - n_inp->sctp_features = inp->sctp_features; - n_inp->sctp_frag_point = inp->sctp_frag_point; - n_inp->sctp_cmt_on_off = inp->sctp_cmt_on_off; - n_inp->partial_delivery_point = inp->partial_delivery_point; - n_inp->sctp_context = inp->sctp_context; - n_inp->inp_starting_point_for_iterator = NULL; - - /* copy in the authentication parameters from the original endpoint */ - if (n_inp->sctp_ep.local_hmacs) - sctp_free_hmaclist(n_inp->sctp_ep.local_hmacs); - n_inp->sctp_ep.local_hmacs = - sctp_copy_hmaclist(inp->sctp_ep.local_hmacs); - if (n_inp->sctp_ep.local_auth_chunks) - sctp_free_chunklist(n_inp->sctp_ep.local_auth_chunks); - n_inp->sctp_ep.local_auth_chunks = - sctp_copy_chunklist(inp->sctp_ep.local_auth_chunks); - (void)sctp_copy_skeylist(&inp->sctp_ep.shared_keys, - &n_inp->sctp_ep.shared_keys); - - n_inp->sctp_socket = newso; - if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) { - sctp_feature_off(n_inp, SCTP_PCB_FLAGS_AUTOCLOSE); - n_inp->sctp_ep.auto_close_time = 0; - sctp_timer_stop(SCTP_TIMER_TYPE_AUTOCLOSE, n_inp, stcb, NULL, - SCTP_FROM_SCTP_PEELOFF + SCTP_LOC_1); - } - /* Turn off any non-blocking semantic. */ - SCTP_CLEAR_SO_NBIO(newso); - newso->so_state |= SS_ISCONNECTED; - /* We remove it right away */ - -#ifdef SCTP_LOCK_LOGGING - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOCK_LOGGING_ENABLE) { - sctp_log_lock(inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_SOCK); - } -#endif - TAILQ_REMOVE(&head->so_comp, newso, so_list); - head->so_qlen--; - SOCK_UNLOCK(head); - /* - * Now we must move it from one hash table to another and get the - * stcb in the right place. - */ - sctp_move_pcb_and_assoc(inp, n_inp, stcb); - atomic_add_int(&stcb->asoc.refcnt, 1); - SCTP_TCB_UNLOCK(stcb); - /* - * And now the final hack. We move data in the pending side i.e. - * head to the new socket buffer. Let the GRUBBING begin :-0 - */ - sctp_pull_off_control_to_new_inp(inp, n_inp, stcb, SBL_WAIT); - atomic_subtract_int(&stcb->asoc.refcnt, 1); - return (newso); -} diff --git a/freebsd/sys/netinet/sctp_peeloff.h b/freebsd/sys/netinet/sctp_peeloff.h index db61db3b..dd905676 100644 --- a/freebsd/sys/netinet/sctp_peeloff.h +++ b/freebsd/sys/netinet/sctp_peeloff.h @@ -1,15 +1,17 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -28,25 +30,14 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp_peeloff.h,v 1.6 2005/03/06 16:04:18 itojun Exp $ */ - #include __FBSDID("$FreeBSD$"); -#ifndef __sctp_peeloff_h__ -#define __sctp_peeloff_h__ - - - - +#ifndef _NETINET_SCTP_PEELOFF_H_ +#define _NETINET_SCTP_PEELOFF_H_ #if defined(_KERNEL) - int sctp_can_peel_off(struct socket *, sctp_assoc_t); int sctp_do_peeloff(struct socket *, struct socket *, sctp_assoc_t); -struct socket *sctp_get_peeloff(struct socket *, sctp_assoc_t, int *); - - #endif /* _KERNEL */ - -#endif +#endif /* _NETINET_SCTP_PEELOFF_H_ */ diff --git a/freebsd/sys/netinet/sctp_structs.h b/freebsd/sys/netinet/sctp_structs.h index 94d0395c..abecdabd 100644 --- a/freebsd/sys/netinet/sctp_structs.h +++ b/freebsd/sys/netinet/sctp_structs.h @@ -1,15 +1,17 @@ /*- * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -28,13 +30,11 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp_structs.h,v 1.13 2005/03/06 16:04:18 itojun Exp $ */ - #include __FBSDID("$FreeBSD$"); -#ifndef __sctp_structs_h__ -#define __sctp_structs_h__ +#ifndef _NETINET_SCTP_STRUCTS_H_ +#define _NETINET_SCTP_STRUCTS_H_ #include #include @@ -77,8 +77,8 @@ TAILQ_HEAD(sctpnetlisthead, sctp_nets); struct sctp_stream_reset_list { TAILQ_ENTRY(sctp_stream_reset_list) next_resp; uint32_t tsn; - int number_entries; - struct sctp_stream_reset_out_request req; + uint32_t number_entries; + uint16_t list_of_streams[]; }; TAILQ_HEAD(sctp_resethead, sctp_stream_reset_list); @@ -106,6 +106,31 @@ typedef void (*asoc_func) (struct sctp_inpcb *, struct sctp_tcb *, void *ptr, typedef int (*inp_func) (struct sctp_inpcb *, void *ptr, uint32_t val); typedef void (*end_func) (void *ptr, uint32_t val); +#if defined(__FreeBSD__) && defined(SCTP_MCORE_INPUT) && defined(SMP) +/* whats on the mcore control struct */ +struct sctp_mcore_queue { + TAILQ_ENTRY(sctp_mcore_queue) next; + struct vnet *vn; + struct mbuf *m; + int off; + int v6; +}; + +TAILQ_HEAD(sctp_mcore_qhead, sctp_mcore_queue); + +struct sctp_mcore_ctrl { + SCTP_PROCESS_STRUCT thread_proc; + struct sctp_mcore_qhead que; + struct mtx core_mtx; + struct mtx que_mtx; + int running; + int cpuid; +}; + + +#endif + + struct sctp_iterator { TAILQ_ENTRY(sctp_iterator) sctp_nxt_itr; struct vnet *vn; @@ -158,9 +183,8 @@ struct iterator_control { uint32_t iterator_flags; }; -#define SCTP_ITERATOR_MUST_EXIT 0x00000001 -#define SCTP_ITERATOR_STOP_CUR_IT 0x00000002 -#define SCTP_ITERATOR_STOP_CUR_INP 0x00000004 +#define SCTP_ITERATOR_STOP_CUR_IT 0x00000004 +#define SCTP_ITERATOR_STOP_CUR_INP 0x00000008 struct sctp_net_route { sctp_rtentry_t *ro_rt; @@ -192,6 +216,29 @@ struct htcp { uint32_t lasttime; }; +struct rtcc_cc { + struct timeval tls; /* The time we started the sending */ + uint64_t lbw; /* Our last estimated bw */ + uint64_t lbw_rtt; /* RTT at bw estimate */ + uint64_t bw_bytes; /* The total bytes since this sending began */ + uint64_t bw_tot_time; /* The total time since sending began */ + uint64_t new_tot_time; /* temp holding the new value */ + uint64_t bw_bytes_at_last_rttc; /* What bw_bytes was at last rtt calc */ + uint32_t cwnd_at_bw_set;/* Cwnd at last bw saved - lbw */ + uint32_t vol_reduce; /* cnt of voluntary reductions */ + uint16_t steady_step; /* The number required to be in steady state */ + uint16_t step_cnt; /* The current number */ + uint8_t ret_from_eq; /* When all things are equal what do I return + * 0/1 - 1 no cc advance */ + uint8_t use_dccc_ecn; /* Flag to enable DCCC ECN */ + uint8_t tls_needs_set; /* Flag to indicate we need to set tls 0 or 1 + * means set at send 2 not */ + uint8_t last_step_state;/* Last state if steady state stepdown is on */ + uint8_t rtt_set_this_sack; /* Flag saying this sack had RTT calc + * on it */ + uint8_t last_inst_ind; /* Last saved inst indication */ +}; + struct sctp_nets { TAILQ_ENTRY(sctp_nets) sctp_next; /* next link */ @@ -201,6 +248,7 @@ struct sctp_nets { * structure shared by all. */ struct sctp_timer pmtu_timer; + struct sctp_timer hb_timer; /* * The following two in combination equate to a route entry for v6 @@ -211,19 +259,25 @@ struct sctp_nets { /* mtu discovered so far */ uint32_t mtu; uint32_t ssthresh; /* not sure about this one for split */ - + uint32_t last_cwr_tsn; + uint32_t cwr_window_tsn; + uint32_t ecn_ce_pkt_cnt; + uint32_t lost_cnt; /* smoothed average things for RTT and RTO itself */ int lastsa; int lastsv; - int rtt; /* last measured rtt value in ms */ + uint64_t rtt; /* last measured rtt value in us */ unsigned int RTO; /* This is used for SHUTDOWN/SHUTDOWN-ACK/SEND or INIT timers */ struct sctp_timer rxt_timer; - struct sctp_timer fr_timer; /* for early fr */ /* last time in seconds I sent to it */ struct timeval last_sent_time; + union cc_control_data { + struct htcp htcp_ca; /* JRS - struct used in HTCP algorithm */ + struct rtcc_cc rtcc; /* rtcc module cc stuff */ + } cc_mod; int ref_count; /* Congestion stats per destination */ @@ -234,8 +288,9 @@ struct sctp_nets { uint32_t flight_size; uint32_t cwnd; /* actual cwnd */ uint32_t prev_cwnd; /* cwnd before any processing */ + uint32_t ecn_prev_cwnd; /* ECN prev cwnd at first ecn_echo seen in new + * window */ uint32_t partial_bytes_acked; /* in CA tracks when to incr a MTU */ - uint32_t prev_rtt; /* tracking variables to avoid the aloc/free in sack processing */ unsigned int net_ack; unsigned int net_ack2; @@ -263,19 +318,24 @@ struct sctp_nets { uint32_t fast_recovery_tsn; uint32_t heartbeat_random1; uint32_t heartbeat_random2; - uint32_t tos_flowlabel; +#ifdef INET6 + uint32_t flowlabel; +#endif + uint8_t dscp; struct timeval start_time; /* time when this net was created */ - uint32_t marked_retrans;/* number or DATA chunks marked for timer * based retransmissions */ uint32_t marked_fastretrans; + uint32_t heart_beat_delay; /* Heart Beat delay in ms */ /* if this guy is ok or not ... status */ uint16_t dest_state; - /* number of transmit failures to down this guy */ + /* number of timeouts to consider the destination unreachable */ uint16_t failure_threshold; - /* error stats on destination */ + /* number of timeouts to consider the destination potentially failed */ + uint16_t pf_threshold; + /* error stats on the destination */ uint16_t error_count; /* UDP port number in case of UDP tunneling */ uint16_t port; @@ -315,8 +375,12 @@ struct sctp_nets { uint8_t window_probe; /* Doing a window probe? */ uint8_t RTO_measured; /* Have we done the first measure */ uint8_t last_hs_used; /* index into the last HS table entry we used */ - /* JRS - struct used in HTCP algorithm */ - struct htcp htcp_ca; + uint8_t lan_type; + uint8_t rto_needed; + uint32_t flowid; +#ifdef INVARIANTS + uint8_t flowidset; +#endif }; @@ -326,10 +390,7 @@ struct sctp_data_chunkrec { uint16_t stream_number; /* the stream number of this guy */ uint32_t payloadtype; uint32_t context; /* from send */ - - /* ECN Nonce: Nonce Value for this chunk */ - uint8_t ect_nonce; - uint8_t fwd_tsn_cnt; + uint32_t cwnd_at_send; /* * part of the Highest sacked algorithm to be able to stroke counts * on ones that are FR'd. @@ -341,6 +402,7 @@ struct sctp_data_chunkrec { * outbound holds sending flags for PR-SCTP. */ uint8_t state_flags; uint8_t chunk_was_revoked; + uint8_t fwd_tsn_cnt; }; TAILQ_HEAD(sctpchunk_listhead, sctp_tmit_chunk); @@ -350,7 +412,7 @@ TAILQ_HEAD(sctpchunk_listhead, sctp_tmit_chunk); #define CHUNK_FLAGS_PR_SCTP_BUF SCTP_PR_SCTP_BUF #define CHUNK_FLAGS_PR_SCTP_RTX SCTP_PR_SCTP_RTX -/* The upper byte is used a a bit mask */ +/* The upper byte is used as a bit mask */ #define CHUNK_FLAGS_FRAGMENT_OK 0x0100 struct chk_id { @@ -446,13 +508,13 @@ struct sctp_stream_queue_pending { struct timeval ts; struct sctp_nets *net; TAILQ_ENTRY(sctp_stream_queue_pending) next; + TAILQ_ENTRY(sctp_stream_queue_pending) ss_next; uint32_t length; uint32_t timetolive; uint32_t ppid; uint32_t context; uint16_t sinfo_flags; uint16_t stream; - uint16_t strseq; uint16_t act_flags; uint16_t auth_keyid; uint8_t holds_key_ref; @@ -476,13 +538,57 @@ struct sctp_stream_in { uint8_t delivery_started; }; -/* This struct is used to track the traffic on outbound streams */ TAILQ_HEAD(sctpwheel_listhead, sctp_stream_out); +TAILQ_HEAD(sctplist_listhead, sctp_stream_queue_pending); + +/* Round-robin schedulers */ +struct ss_rr { + /* next link in wheel */ + TAILQ_ENTRY(sctp_stream_out) next_spoke; +}; + +/* Priority scheduler */ +struct ss_prio { + /* next link in wheel */ + TAILQ_ENTRY(sctp_stream_out) next_spoke; + /* priority id */ + uint16_t priority; +}; + +/* Fair Bandwidth scheduler */ +struct ss_fb { + /* next link in wheel */ + TAILQ_ENTRY(sctp_stream_out) next_spoke; + /* stores message size */ + int32_t rounds; +}; + +/* + * This union holds all data necessary for + * different stream schedulers. + */ +union scheduling_data { + struct sctpwheel_listhead out_wheel; + struct sctplist_listhead out_list; +}; + +/* + * This union holds all parameters per stream + * necessary for different stream schedulers. + */ +union scheduling_parameters { + struct ss_rr rr; + struct ss_prio prio; + struct ss_fb fb; +}; + +/* This struct is used to track the traffic on outbound streams */ struct sctp_stream_out { struct sctp_streamhead outqueue; - TAILQ_ENTRY(sctp_stream_out) next_spoke; /* next link in wheel */ + union scheduling_parameters ss_params; + uint32_t chunks_on_queues; uint16_t stream_no; - uint16_t next_sequence_sent; /* next one I expect to send out */ + uint16_t next_sequence_send; /* next one I expect to send out */ uint8_t last_msg_incomplete; }; @@ -544,6 +650,8 @@ struct sctp_nonpad_sndrcvinfo { uint32_t sinfo_tsn; uint32_t sinfo_cumtsn; sctp_assoc_t sinfo_assoc_id; + uint16_t sinfo_keynumber; + uint16_t sinfo_keynumber_valid; }; /* @@ -556,19 +664,55 @@ struct sctp_cc_functions { void (*sctp_cwnd_update_after_sack) (struct sctp_tcb *stcb, struct sctp_association *asoc, int accum_moved, int reneged_all, int will_exit); + void (*sctp_cwnd_update_exit_pf) (struct sctp_tcb *stcb, struct sctp_nets *net); void (*sctp_cwnd_update_after_fr) (struct sctp_tcb *stcb, struct sctp_association *asoc); void (*sctp_cwnd_update_after_timeout) (struct sctp_tcb *stcb, struct sctp_nets *net); void (*sctp_cwnd_update_after_ecn_echo) (struct sctp_tcb *stcb, - struct sctp_nets *net); + struct sctp_nets *net, int in_window, int num_pkt_lost); void (*sctp_cwnd_update_after_packet_dropped) (struct sctp_tcb *stcb, struct sctp_nets *net, struct sctp_pktdrop_chunk *cp, uint32_t * bottle_bw, uint32_t * on_queue); void (*sctp_cwnd_update_after_output) (struct sctp_tcb *stcb, struct sctp_nets *net, int burst_limit); - void (*sctp_cwnd_update_after_fr_timer) (struct sctp_inpcb *inp, - struct sctp_tcb *stcb, struct sctp_nets *net); + void (*sctp_cwnd_update_packet_transmitted) (struct sctp_tcb *stcb, + struct sctp_nets *net); + void (*sctp_cwnd_update_tsn_acknowledged) (struct sctp_nets *net, + struct sctp_tmit_chunk *); + void (*sctp_cwnd_new_transmission_begins) (struct sctp_tcb *stcb, + struct sctp_nets *net); + void (*sctp_cwnd_prepare_net_for_sack) (struct sctp_tcb *stcb, + struct sctp_nets *net); + int (*sctp_cwnd_socket_option) (struct sctp_tcb *stcb, int set, struct sctp_cc_option *); + void (*sctp_rtt_calculated) (struct sctp_tcb *, struct sctp_nets *, struct timeval *); +}; + +/* + * RS - Structure to hold function pointers to the functions responsible + * for stream scheduling. + */ +struct sctp_ss_functions { + void (*sctp_ss_init) (struct sctp_tcb *stcb, struct sctp_association *asoc, + int holds_lock); + void (*sctp_ss_clear) (struct sctp_tcb *stcb, struct sctp_association *asoc, + int clear_values, int holds_lock); + void (*sctp_ss_init_stream) (struct sctp_stream_out *strq, struct sctp_stream_out *with_strq); + void (*sctp_ss_add_to_stream) (struct sctp_tcb *stcb, struct sctp_association *asoc, + struct sctp_stream_out *strq, struct sctp_stream_queue_pending *sp, int holds_lock); + int (*sctp_ss_is_empty) (struct sctp_tcb *stcb, struct sctp_association *asoc); + void (*sctp_ss_remove_from_stream) (struct sctp_tcb *stcb, struct sctp_association *asoc, + struct sctp_stream_out *strq, struct sctp_stream_queue_pending *sp, int holds_lock); + struct sctp_stream_out *(*sctp_ss_select_stream) (struct sctp_tcb *stcb, + struct sctp_nets *net, struct sctp_association *asoc); + void (*sctp_ss_scheduled) (struct sctp_tcb *stcb, struct sctp_nets *net, + struct sctp_association *asoc, struct sctp_stream_out *strq, int moved_how_much); + void (*sctp_ss_packet_done) (struct sctp_tcb *stcb, struct sctp_nets *net, + struct sctp_association *asoc); + int (*sctp_ss_get_value) (struct sctp_tcb *stcb, struct sctp_association *asoc, + struct sctp_stream_out *strq, uint16_t * value); + int (*sctp_ss_set_value) (struct sctp_tcb *stcb, struct sctp_association *asoc, + struct sctp_stream_out *strq, uint16_t value); }; /* used to save ASCONF chunks for retransmission */ @@ -611,7 +755,6 @@ struct sctp_association { struct sctp_nonpad_sndrcvinfo def_send; /* timers and such */ - struct sctp_timer hb_timer; /* hb timer */ struct sctp_timer dack_timer; /* Delayed ack timer */ struct sctp_timer asconf_timer; /* asconf */ struct sctp_timer strreset_timer; /* stream reset */ @@ -652,13 +795,8 @@ struct sctp_association { /* re-assembly queue for fragmented chunks on the inbound path */ struct sctpchunk_listhead reasmqueue; - /* - * this queue is used when we reach a condition that we can NOT put - * data into the socket buffer. We track the size of this queue and - * set our rwnd to the space in the socket minus also the - * size_on_delivery_queue. - */ - struct sctpwheel_listhead out_wheel; + /* Scheduling queues */ + union scheduling_data ss_data; /* * This pointer will be set to NULL most of the time. But when we @@ -691,6 +829,7 @@ struct sctp_association { uint8_t *mapping_array; /* primary destination to use */ struct sctp_nets *primary_destination; + struct sctp_nets *alternate; /* If primary is down or PF */ /* For CMT */ struct sctp_nets *last_net_cmt_send_started; /* last place I got a data chunk from */ @@ -717,6 +856,10 @@ struct sctp_association { * module */ uint32_t congestion_control_module; + /* RS - the stream scheduling functions are in this struct */ + struct sctp_ss_functions ss_functions; + /* RS - value to store the currently loaded stream scheduling module */ + uint32_t stream_scheduling_module; uint32_t vrf_id; @@ -789,8 +932,6 @@ struct sctp_association { uint8_t *nr_mapping_array; uint32_t highest_tsn_inside_nr_map; - uint32_t last_echo_tsn; - uint32_t last_cwr_tsn; uint32_t fast_recovery_tsn; uint32_t sat_t3_recovery_tsn; uint32_t tsn_last_delivered; @@ -845,12 +986,11 @@ struct sctp_association { uint32_t sb_send_resv; /* amount reserved on a send */ uint32_t my_rwnd_control_len; /* shadow of sb_mbcnt used for rwnd * control */ - /* 32 bit nonce stuff */ - uint32_t nonce_resync_tsn; - uint32_t nonce_wait_tsn; +#ifdef INET6 uint32_t default_flowlabel; +#endif uint32_t pr_sctp_cnt; - int ctrl_queue_cnt; /* could be removed REM */ + int ctrl_queue_cnt; /* could be removed REM - NO IT CAN'T!! RRS */ /* * All outbound datagrams queue into this list from the individual * stream queue. Here they get assigned a TSN and then await @@ -887,8 +1027,8 @@ struct sctp_association { unsigned int size_on_all_streams; unsigned int cnt_on_all_streams; - /* Heart Beat delay in ticks */ - unsigned int heart_beat_delay; + /* Heart Beat delay in ms */ + uint32_t heart_beat_delay; /* autoclose */ unsigned int sctp_autoclose_ticks; @@ -951,6 +1091,7 @@ struct sctp_association { uint16_t streamincnt; uint16_t streamoutcnt; uint16_t strm_realoutsize; + uint16_t strm_pending_add_size; /* my maximum number of retrans of INIT and SEND */ /* copied from SCTP but should be individually setable */ uint16_t max_init_times; @@ -958,6 +1099,8 @@ struct sctp_association { uint16_t def_net_failure; + uint16_t def_net_pf_threshold; + /* * lock flag: 0 is ok to send, 1+ (duals as a retran count) is * awaiting ACK @@ -974,7 +1117,6 @@ struct sctp_association { uint16_t ecn_echo_cnt_onq; uint16_t free_chunk_cnt; - uint8_t stream_locked; uint8_t authenticated; /* packet authenticated ok */ /* @@ -983,8 +1125,10 @@ struct sctp_association { */ uint8_t send_sack; - /* max burst after fast retransmit completes */ - uint8_t max_burst; + /* max burst of new packets into the network */ + uint32_t max_burst; + /* max burst of fast retransmit packets */ + uint32_t fr_max_burst; uint8_t sat_network; /* RTT is in range of sat net or greater */ uint8_t sat_network_lockout; /* lockout code */ @@ -996,27 +1140,22 @@ struct sctp_association { uint8_t last_flags_delivered; uint8_t hb_ect_randombit; uint8_t hb_random_idx; - uint8_t hb_is_disabled; /* is the hb disabled? */ - uint8_t default_tos; + uint8_t default_dscp; uint8_t asconf_del_pending; /* asconf delete last addr pending */ - /* ECN Nonce stuff */ - uint8_t receiver_nonce_sum; /* nonce I sum and put in my sack */ - uint8_t ecn_nonce_allowed; /* Tells us if ECN nonce is on */ - uint8_t nonce_sum_check;/* On off switch used during re-sync */ - uint8_t nonce_wait_for_ecne; /* flag when we expect a ECN */ - uint8_t peer_supports_ecn_nonce; - /* * This value, plus all other ack'd but above cum-ack is added * together to cross check against the bit that we have yet to * define (probably in the SACK). When the cum-ack is updated, this * sum is updated as well. */ - uint8_t nonce_sum_expect_base; + /* Flag to tell if ECN is allowed */ uint8_t ecn_allowed; + /* Did the peer make the stream config (add out) request */ + uint8_t peer_req_out; + /* flag to indicate if peer can do asconf */ uint8_t peer_supports_asconf; /* EY - flag to indicate if peer can do nr_sack */ @@ -1027,6 +1166,7 @@ struct sctp_association { uint8_t peer_supports_auth; /* stream resets are supported by the peer */ uint8_t peer_supports_strreset; + uint8_t local_strreset_support; uint8_t peer_supports_nat; /* @@ -1035,17 +1175,7 @@ struct sctp_association { */ uint8_t peer_supports_pktdrop; - /* Do we allow V6/V4? */ - uint8_t ipv4_addr_legal; - uint8_t ipv6_addr_legal; - /* Address scoping flags */ - /* scope value for IPv4 */ - uint8_t ipv4_local_scope; - /* scope values for IPv6 */ - uint8_t local_scope; - uint8_t site_scope; - /* loopback scope */ - uint8_t loopback_scope; + struct sctp_scoping scope; /* flags to handle send alternate net tracking */ uint8_t used_alt_onsack; uint8_t used_alt_asconfack; @@ -1072,6 +1202,9 @@ struct sctp_association { uint8_t sctp_nr_sack_on_off; /* JRS 5/21/07 - CMT PF variable */ uint8_t sctp_cmt_pf; + uint8_t use_precise_time; + uint32_t sctp_features; + uint16_t port; /* remote UDP encapsulation port */ /* * The mapping array is used to track out of order sequences above * last_acked_seq. 0 indicates packet missing 1 indicates packet diff --git a/freebsd/sys/netinet/sctp_sysctl.c b/freebsd/sys/netinet/sctp_sysctl.c index 6fcf4964..b4ed49d4 100644 --- a/freebsd/sys/netinet/sctp_sysctl.c +++ b/freebsd/sys/netinet/sctp_sysctl.c @@ -2,16 +2,18 @@ /*- * Copyright (c) 2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -54,14 +56,10 @@ sctp_init_sysctls() SCTP_BASE_SYSCTL(sctp_auto_asconf) = SCTPCTL_AUTOASCONF_DEFAULT; SCTP_BASE_SYSCTL(sctp_multiple_asconfs) = SCTPCTL_MULTIPLEASCONFS_DEFAULT; SCTP_BASE_SYSCTL(sctp_ecn_enable) = SCTPCTL_ECN_ENABLE_DEFAULT; - SCTP_BASE_SYSCTL(sctp_ecn_nonce) = SCTPCTL_ECN_NONCE_DEFAULT; SCTP_BASE_SYSCTL(sctp_strict_sacks) = SCTPCTL_STRICT_SACKS_DEFAULT; -#if !defined(SCTP_WITH_NO_CSUM) - SCTP_BASE_SYSCTL(sctp_no_csum_on_loopback) = SCTPCTL_LOOPBACK_NOCSUM_DEFAULT; -#endif - SCTP_BASE_SYSCTL(sctp_strict_init) = SCTPCTL_STRICT_INIT_DEFAULT; SCTP_BASE_SYSCTL(sctp_peer_chunk_oh) = SCTPCTL_PEER_CHKOH_DEFAULT; SCTP_BASE_SYSCTL(sctp_max_burst_default) = SCTPCTL_MAXBURST_DEFAULT; + SCTP_BASE_SYSCTL(sctp_fr_max_burst_default) = SCTPCTL_FRMAXBURST_DEFAULT; SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue) = SCTPCTL_MAXCHUNKS_DEFAULT; SCTP_BASE_SYSCTL(sctp_hashtblsize) = SCTPCTL_TCBHASHSIZE_DEFAULT; SCTP_BASE_SYSCTL(sctp_pcbtblsize) = SCTPCTL_PCBHASHSIZE_DEFAULT; @@ -83,16 +81,14 @@ sctp_init_sysctls() SCTP_BASE_SYSCTL(sctp_init_rtx_max_default) = SCTPCTL_INIT_RTX_MAX_DEFAULT; SCTP_BASE_SYSCTL(sctp_assoc_rtx_max_default) = SCTPCTL_ASSOC_RTX_MAX_DEFAULT; SCTP_BASE_SYSCTL(sctp_path_rtx_max_default) = SCTPCTL_PATH_RTX_MAX_DEFAULT; + SCTP_BASE_SYSCTL(sctp_path_pf_threshold) = SCTPCTL_PATH_PF_THRESHOLD_DEFAULT; SCTP_BASE_SYSCTL(sctp_add_more_threshold) = SCTPCTL_ADD_MORE_ON_OUTPUT_DEFAULT; SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default) = SCTPCTL_OUTGOING_STREAMS_DEFAULT; SCTP_BASE_SYSCTL(sctp_cmt_on_off) = SCTPCTL_CMT_ON_OFF_DEFAULT; /* EY */ SCTP_BASE_SYSCTL(sctp_nr_sack_on_off) = SCTPCTL_NR_SACK_ON_OFF_DEFAULT; SCTP_BASE_SYSCTL(sctp_cmt_use_dac) = SCTPCTL_CMT_USE_DAC_DEFAULT; - SCTP_BASE_SYSCTL(sctp_cmt_pf) = SCTPCTL_CMT_PF_DEFAULT; SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst) = SCTPCTL_CWND_MAXBURST_DEFAULT; - SCTP_BASE_SYSCTL(sctp_early_fr) = SCTPCTL_EARLY_FAST_RETRAN_DEFAULT; - SCTP_BASE_SYSCTL(sctp_early_fr_msec) = SCTPCTL_EARLY_FAST_RETRAN_MSEC_DEFAULT; SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk) = SCTPCTL_ASCONF_AUTH_NOCHK_DEFAULT; SCTP_BASE_SYSCTL(sctp_auth_disable) = SCTPCTL_AUTH_DISABLE_DEFAULT; SCTP_BASE_SYSCTL(sctp_nat_friendly) = SCTPCTL_NAT_FRIENDLY_DEFAULT; @@ -107,16 +103,24 @@ sctp_init_sysctls() SCTP_BASE_SYSCTL(sctp_logging_level) = SCTPCTL_LOGGING_LEVEL_DEFAULT; /* JRS - Variable for default congestion control module */ SCTP_BASE_SYSCTL(sctp_default_cc_module) = SCTPCTL_DEFAULT_CC_MODULE_DEFAULT; + /* RS - Variable for default stream scheduling module */ + SCTP_BASE_SYSCTL(sctp_default_ss_module) = SCTPCTL_DEFAULT_SS_MODULE_DEFAULT; SCTP_BASE_SYSCTL(sctp_default_frag_interleave) = SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DEFAULT; SCTP_BASE_SYSCTL(sctp_mobility_base) = SCTPCTL_MOBILITY_BASE_DEFAULT; SCTP_BASE_SYSCTL(sctp_mobility_fasthandoff) = SCTPCTL_MOBILITY_FASTHANDOFF_DEFAULT; SCTP_BASE_SYSCTL(sctp_vtag_time_wait) = SCTPCTL_TIME_WAIT_DEFAULT; SCTP_BASE_SYSCTL(sctp_buffer_splitting) = SCTPCTL_BUFFER_SPLITTING_DEFAULT; SCTP_BASE_SYSCTL(sctp_initial_cwnd) = SCTPCTL_INITIAL_CWND_DEFAULT; + SCTP_BASE_SYSCTL(sctp_rttvar_bw) = SCTPCTL_RTTVAR_BW_DEFAULT; + SCTP_BASE_SYSCTL(sctp_rttvar_rtt) = SCTPCTL_RTTVAR_RTT_DEFAULT; + SCTP_BASE_SYSCTL(sctp_rttvar_eqret) = SCTPCTL_RTTVAR_EQRET_DEFAULT; + SCTP_BASE_SYSCTL(sctp_steady_step) = SCTPCTL_RTTVAR_STEADYS_DEFAULT; + SCTP_BASE_SYSCTL(sctp_use_dccc_ecn) = SCTPCTL_RTTVAR_DCCCECN_DEFAULT; + SCTP_BASE_SYSCTL(sctp_blackhole) = SCTPCTL_BLACKHOLE_DEFAULT; + #if defined(SCTP_LOCAL_TRACE_BUF) memset(&SCTP_BASE_SYSCTL(sctp_log), 0, sizeof(struct sctp_log)); #endif - SCTP_BASE_SYSCTL(sctp_udp_tunneling_for_client_enable) = SCTPCTL_UDP_TUNNELING_FOR_CLIENT_ENABLE_DEFAULT; SCTP_BASE_SYSCTL(sctp_udp_tunneling_port) = SCTPCTL_UDP_TUNNELING_PORT_DEFAULT; SCTP_BASE_SYSCTL(sctp_enable_sack_immediately) = SCTPCTL_SACK_IMMEDIATELY_ENABLE_DEFAULT; SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly) = SCTPCTL_NAT_FRIENDLY_INITS_DEFAULT; @@ -133,7 +137,7 @@ sctp_init_sysctls() static unsigned int number_of_addresses(struct sctp_inpcb *inp) { - int cnt; + unsigned int cnt; struct sctp_vrf *vrf; struct sctp_ifn *sctp_ifn; struct sctp_ifa *sctp_ifa; @@ -147,17 +151,33 @@ number_of_addresses(struct sctp_inpcb *inp) if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) { LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { - if ((sctp_ifa->address.sa.sa_family == AF_INET) || - (sctp_ifa->address.sa.sa_family == AF_INET6)) { + switch (sctp_ifa->address.sa.sa_family) { +#ifdef INET + case AF_INET: +#endif +#ifdef INET6 + case AF_INET6: +#endif cnt++; + break; + default: + break; } } } } else { LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { - if ((laddr->ifa->address.sa.sa_family == AF_INET) || - (laddr->ifa->address.sa.sa_family == AF_INET6)) { + switch (laddr->ifa->address.sa.sa_family) { +#ifdef INET + case AF_INET: +#endif +#ifdef INET6 + case AF_INET6: +#endif cnt++; + break; + default: + break; } } } @@ -179,29 +199,29 @@ copy_out_local_addresses(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct s /* Turn on all the appropriate scope */ if (stcb) { /* use association specific values */ - loopback_scope = stcb->asoc.loopback_scope; - ipv4_local_scope = stcb->asoc.ipv4_local_scope; - local_scope = stcb->asoc.local_scope; - site_scope = stcb->asoc.site_scope; + loopback_scope = stcb->asoc.scope.loopback_scope; + ipv4_local_scope = stcb->asoc.scope.ipv4_local_scope; + local_scope = stcb->asoc.scope.local_scope; + site_scope = stcb->asoc.scope.site_scope; + ipv4_addr_legal = stcb->asoc.scope.ipv4_addr_legal; + ipv6_addr_legal = stcb->asoc.scope.ipv6_addr_legal; } else { - /* use generic values for endpoints */ + /* Use generic values for endpoints. */ loopback_scope = 1; ipv4_local_scope = 1; local_scope = 1; site_scope = 1; - } - - /* use only address families of interest */ - if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { - ipv6_addr_legal = 1; - if (SCTP_IPV6_V6ONLY(inp)) { - ipv4_addr_legal = 0; + if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { + ipv6_addr_legal = 1; + if (SCTP_IPV6_V6ONLY(inp)) { + ipv4_addr_legal = 0; + } else { + ipv4_addr_legal = 1; + } } else { + ipv6_addr_legal = 0; ipv4_addr_legal = 1; } - } else { - ipv4_addr_legal = 1; - ipv6_addr_legal = 0; } /* neither Mac OS X nor FreeBSD support mulitple routing functions */ @@ -225,6 +245,7 @@ copy_out_local_addresses(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct s continue; } switch (sctp_ifa->address.sa.sa_family) { +#ifdef INET case AF_INET: if (ipv4_addr_legal) { struct sockaddr_in *sin; @@ -238,6 +259,7 @@ copy_out_local_addresses(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct s continue; } break; +#endif #ifdef INET6 case AF_INET6: if (ipv6_addr_legal) { @@ -365,12 +387,12 @@ sctp_assoclist(SYSCTL_HANDLER_ARGS) /* request some more memory than needed */ req->oldidx = (n + n / 8); - return 0; + return (0); } if (req->newptr != USER_ADDR_NULL) { SCTP_INP_INFO_RUNLOCK(); SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_SYSCTL, EPERM); - return EPERM; + return (EPERM); } LIST_FOREACH(inp, &SCTP_BASE_INFO(listhead), sctp_list) { SCTP_INP_RLOCK(inp); @@ -401,14 +423,14 @@ sctp_assoclist(SYSCTL_HANDLER_ARGS) error = SYSCTL_OUT(req, &xinpcb, sizeof(struct xsctp_inpcb)); if (error) { SCTP_INP_DECR_REF(inp); - return error; + return (error); } SCTP_INP_INFO_RLOCK(); SCTP_INP_RLOCK(inp); error = copy_out_local_addresses(inp, NULL, req); if (error) { SCTP_INP_DECR_REF(inp); - return error; + return (error); } LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { SCTP_TCB_LOCK(stcb); @@ -452,7 +474,7 @@ sctp_assoclist(SYSCTL_HANDLER_ARGS) if (error) { SCTP_INP_DECR_REF(inp); atomic_subtract_int(&stcb->asoc.refcnt, 1); - return error; + return (error); } SCTP_INP_INFO_RLOCK(); SCTP_INP_RLOCK(inp); @@ -460,7 +482,7 @@ sctp_assoclist(SYSCTL_HANDLER_ARGS) if (error) { SCTP_INP_DECR_REF(inp); atomic_subtract_int(&stcb->asoc.refcnt, 1); - return error; + return (error); } TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { xraddr.last = 0; @@ -468,6 +490,7 @@ sctp_assoclist(SYSCTL_HANDLER_ARGS) xraddr.active = ((net->dest_state & SCTP_ADDR_REACHABLE) == SCTP_ADDR_REACHABLE); xraddr.confirmed = ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0); xraddr.heartbeat_enabled = ((net->dest_state & SCTP_ADDR_NOHB) == 0); + xraddr.potentially_failed = ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF); xraddr.rto = net->RTO; xraddr.max_path_rtx = net->failure_threshold; xraddr.rtx = net->marked_retrans; @@ -475,7 +498,8 @@ sctp_assoclist(SYSCTL_HANDLER_ARGS) xraddr.cwnd = net->cwnd; xraddr.flight_size = net->flight_size; xraddr.mtu = net->mtu; - xraddr.rtt = net->rtt; + xraddr.rtt = net->rtt / 1000; + xraddr.heartbeat_interval = net->heart_beat_delay; xraddr.start_time.tv_sec = (uint32_t) net->start_time.tv_sec; xraddr.start_time.tv_usec = (uint32_t) net->start_time.tv_usec; SCTP_INP_RUNLOCK(inp); @@ -484,7 +508,7 @@ sctp_assoclist(SYSCTL_HANDLER_ARGS) if (error) { SCTP_INP_DECR_REF(inp); atomic_subtract_int(&stcb->asoc.refcnt, 1); - return error; + return (error); } SCTP_INP_INFO_RLOCK(); SCTP_INP_RLOCK(inp); @@ -497,7 +521,7 @@ sctp_assoclist(SYSCTL_HANDLER_ARGS) error = SYSCTL_OUT(req, &xraddr, sizeof(struct xsctp_raddr)); if (error) { SCTP_INP_DECR_REF(inp); - return error; + return (error); } SCTP_INP_INFO_RLOCK(); SCTP_INP_RLOCK(inp); @@ -509,7 +533,7 @@ sctp_assoclist(SYSCTL_HANDLER_ARGS) xstcb.last = 1; error = SYSCTL_OUT(req, &xstcb, sizeof(struct xsctp_tcb)); if (error) { - return error; + return (error); } skip: SCTP_INP_INFO_RLOCK(); @@ -519,7 +543,7 @@ skip: memset((void *)&xinpcb, 0, sizeof(struct xsctp_inpcb)); xinpcb.last = 1; error = SYSCTL_OUT(req, &xinpcb, sizeof(struct xsctp_inpcb)); - return error; + return (error); } @@ -564,20 +588,20 @@ sysctl_sctp_check(SYSCTL_HANDLER_ARGS) { int error; +#ifdef VIMAGE + error = vnet_sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); +#else error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); +#endif if (error == 0) { RANGECHK(SCTP_BASE_SYSCTL(sctp_sendspace), SCTPCTL_MAXDGRAM_MIN, SCTPCTL_MAXDGRAM_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_recvspace), SCTPCTL_RECVSPACE_MIN, SCTPCTL_RECVSPACE_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_auto_asconf), SCTPCTL_AUTOASCONF_MIN, SCTPCTL_AUTOASCONF_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_ecn_enable), SCTPCTL_ECN_ENABLE_MIN, SCTPCTL_ECN_ENABLE_MAX); - RANGECHK(SCTP_BASE_SYSCTL(sctp_ecn_nonce), SCTPCTL_ECN_NONCE_MIN, SCTPCTL_ECN_NONCE_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_strict_sacks), SCTPCTL_STRICT_SACKS_MIN, SCTPCTL_STRICT_SACKS_MAX); -#if !defined(SCTP_WITH_NO_CSUM) - RANGECHK(SCTP_BASE_SYSCTL(sctp_no_csum_on_loopback), SCTPCTL_LOOPBACK_NOCSUM_MIN, SCTPCTL_LOOPBACK_NOCSUM_MAX); -#endif - RANGECHK(SCTP_BASE_SYSCTL(sctp_strict_init), SCTPCTL_STRICT_INIT_MIN, SCTPCTL_STRICT_INIT_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_peer_chunk_oh), SCTPCTL_PEER_CHKOH_MIN, SCTPCTL_PEER_CHKOH_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_max_burst_default), SCTPCTL_MAXBURST_MIN, SCTPCTL_MAXBURST_MAX); + RANGECHK(SCTP_BASE_SYSCTL(sctp_fr_max_burst_default), SCTPCTL_FRMAXBURST_MIN, SCTPCTL_FRMAXBURST_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue), SCTPCTL_MAXCHUNKS_MIN, SCTPCTL_MAXCHUNKS_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_hashtblsize), SCTPCTL_TCBHASHSIZE_MIN, SCTPCTL_TCBHASHSIZE_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_pcbtblsize), SCTPCTL_PCBHASHSIZE_MIN, SCTPCTL_PCBHASHSIZE_MAX); @@ -599,16 +623,14 @@ sysctl_sctp_check(SYSCTL_HANDLER_ARGS) RANGECHK(SCTP_BASE_SYSCTL(sctp_init_rtx_max_default), SCTPCTL_INIT_RTX_MAX_MIN, SCTPCTL_INIT_RTX_MAX_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_assoc_rtx_max_default), SCTPCTL_ASSOC_RTX_MAX_MIN, SCTPCTL_ASSOC_RTX_MAX_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_path_rtx_max_default), SCTPCTL_PATH_RTX_MAX_MIN, SCTPCTL_PATH_RTX_MAX_MAX); + RANGECHK(SCTP_BASE_SYSCTL(sctp_path_pf_threshold), SCTPCTL_PATH_PF_THRESHOLD_MIN, SCTPCTL_PATH_PF_THRESHOLD_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_add_more_threshold), SCTPCTL_ADD_MORE_ON_OUTPUT_MIN, SCTPCTL_ADD_MORE_ON_OUTPUT_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default), SCTPCTL_OUTGOING_STREAMS_MIN, SCTPCTL_OUTGOING_STREAMS_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_cmt_on_off), SCTPCTL_CMT_ON_OFF_MIN, SCTPCTL_CMT_ON_OFF_MAX); /* EY */ RANGECHK(SCTP_BASE_SYSCTL(sctp_nr_sack_on_off), SCTPCTL_NR_SACK_ON_OFF_MIN, SCTPCTL_NR_SACK_ON_OFF_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_cmt_use_dac), SCTPCTL_CMT_USE_DAC_MIN, SCTPCTL_CMT_USE_DAC_MAX); - RANGECHK(SCTP_BASE_SYSCTL(sctp_cmt_pf), SCTPCTL_CMT_PF_MIN, SCTPCTL_CMT_PF_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst), SCTPCTL_CWND_MAXBURST_MIN, SCTPCTL_CWND_MAXBURST_MAX); - RANGECHK(SCTP_BASE_SYSCTL(sctp_early_fr), SCTPCTL_EARLY_FAST_RETRAN_MIN, SCTPCTL_EARLY_FAST_RETRAN_MAX); - RANGECHK(SCTP_BASE_SYSCTL(sctp_early_fr_msec), SCTPCTL_EARLY_FAST_RETRAN_MSEC_MIN, SCTPCTL_EARLY_FAST_RETRAN_MSEC_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk), SCTPCTL_ASCONF_AUTH_NOCHK_MIN, SCTPCTL_ASCONF_AUTH_NOCHK_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_auth_disable), SCTPCTL_AUTH_DISABLE_MIN, SCTPCTL_AUTH_DISABLE_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_nat_friendly), SCTPCTL_NAT_FRIENDLY_MIN, SCTPCTL_NAT_FRIENDLY_MAX); @@ -622,20 +644,26 @@ sysctl_sctp_check(SYSCTL_HANDLER_ARGS) RANGECHK(SCTP_BASE_SYSCTL(sctp_max_retran_chunk), SCTPCTL_MAX_RETRAN_CHUNK_MIN, SCTPCTL_MAX_RETRAN_CHUNK_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_logging_level), SCTPCTL_LOGGING_LEVEL_MIN, SCTPCTL_LOGGING_LEVEL_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_default_cc_module), SCTPCTL_DEFAULT_CC_MODULE_MIN, SCTPCTL_DEFAULT_CC_MODULE_MAX); + RANGECHK(SCTP_BASE_SYSCTL(sctp_default_ss_module), SCTPCTL_DEFAULT_SS_MODULE_MIN, SCTPCTL_DEFAULT_SS_MODULE_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_default_frag_interleave), SCTPCTL_DEFAULT_FRAG_INTERLEAVE_MIN, SCTPCTL_DEFAULT_FRAG_INTERLEAVE_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_vtag_time_wait), SCTPCTL_TIME_WAIT_MIN, SCTPCTL_TIME_WAIT_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_buffer_splitting), SCTPCTL_BUFFER_SPLITTING_MIN, SCTPCTL_BUFFER_SPLITTING_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_initial_cwnd), SCTPCTL_INITIAL_CWND_MIN, SCTPCTL_INITIAL_CWND_MAX); + RANGECHK(SCTP_BASE_SYSCTL(sctp_rttvar_bw), SCTPCTL_RTTVAR_BW_MIN, SCTPCTL_RTTVAR_BW_MAX); + RANGECHK(SCTP_BASE_SYSCTL(sctp_rttvar_rtt), SCTPCTL_RTTVAR_RTT_MIN, SCTPCTL_RTTVAR_RTT_MAX); + RANGECHK(SCTP_BASE_SYSCTL(sctp_rttvar_eqret), SCTPCTL_RTTVAR_EQRET_MIN, SCTPCTL_RTTVAR_EQRET_MAX); + RANGECHK(SCTP_BASE_SYSCTL(sctp_steady_step), SCTPCTL_RTTVAR_STEADYS_MIN, SCTPCTL_RTTVAR_STEADYS_MAX); + RANGECHK(SCTP_BASE_SYSCTL(sctp_use_dccc_ecn), SCTPCTL_RTTVAR_DCCCECN_MIN, SCTPCTL_RTTVAR_DCCCECN_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_mobility_base), SCTPCTL_MOBILITY_BASE_MIN, SCTPCTL_MOBILITY_BASE_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_mobility_fasthandoff), SCTPCTL_MOBILITY_FASTHANDOFF_MIN, SCTPCTL_MOBILITY_FASTHANDOFF_MAX); - RANGECHK(SCTP_BASE_SYSCTL(sctp_udp_tunneling_for_client_enable), SCTPCTL_UDP_TUNNELING_FOR_CLIENT_ENABLE_MIN, SCTPCTL_UDP_TUNNELING_FOR_CLIENT_ENABLE_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_enable_sack_immediately), SCTPCTL_SACK_IMMEDIATELY_ENABLE_MIN, SCTPCTL_SACK_IMMEDIATELY_ENABLE_MAX); RANGECHK(SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly), SCTPCTL_NAT_FRIENDLY_INITS_MIN, SCTPCTL_NAT_FRIENDLY_INITS_MAX); + RANGECHK(SCTP_BASE_SYSCTL(sctp_blackhole), SCTPCTL_BLACKHOLE_MIN, SCTPCTL_BLACKHOLE_MAX); #ifdef SCTP_DEBUG RANGECHK(SCTP_BASE_SYSCTL(sctp_debug_on), SCTPCTL_DEBUG_MIN, SCTPCTL_DEBUG_MAX); #endif -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) RANGECHK(SCTP_BASE_SYSCTL(sctp_output_unlocked), SCTPCTL_OUTPUT_UNLOCKED_MIN, SCTPCTL_OUTPUT_UNLOCKED_MAX); #endif } @@ -647,10 +675,20 @@ static int sysctl_stat_get(SYSCTL_HANDLER_ARGS) { int cpu, error; - struct sctpstat sb, *sarry; - + struct sctpstat sb, *sarry, *cpin = NULL; + + if ((req->newptr) && (req->newlen == sizeof(struct sctpstat))) { + /* + * User wants us to clear or at least reset the counters to + * the specified values. + */ + cpin = (struct sctpstat *)req->newptr; + } else if (req->newptr) { + /* Must be a stat structure */ + return (EINVAL); + } memset(&sb, 0, sizeof(sb)); - for (cpu = 0; cpu < mp_ncpus; cpu++) { + for (cpu = 0; cpu < mp_maxid; cpu++) { sarry = &SCTP_BASE_STATS[cpu]; if (sarry->sctps_discontinuitytime.tv_sec > sb.sctps_discontinuitytime.tv_sec) { sb.sctps_discontinuitytime.tv_sec = sarry->sctps_discontinuitytime.tv_sec; @@ -740,17 +778,6 @@ sysctl_stat_get(SYSCTL_HANDLER_ARGS) sb.sctps_timoautoclose += sarry->sctps_timoautoclose; sb.sctps_timoassockill += sarry->sctps_timoassockill; sb.sctps_timoinpkill += sarry->sctps_timoinpkill; - sb.sctps_earlyfrstart += sarry->sctps_earlyfrstart; - sb.sctps_earlyfrstop += sarry->sctps_earlyfrstop; - sb.sctps_earlyfrmrkretrans += sarry->sctps_earlyfrmrkretrans; - sb.sctps_earlyfrstpout += sarry->sctps_earlyfrstpout; - sb.sctps_earlyfrstpidsck1 += sarry->sctps_earlyfrstpidsck1; - sb.sctps_earlyfrstpidsck2 += sarry->sctps_earlyfrstpidsck2; - sb.sctps_earlyfrstpidsck3 += sarry->sctps_earlyfrstpidsck3; - sb.sctps_earlyfrstpidsck4 += sarry->sctps_earlyfrstpidsck4; - sb.sctps_earlyfrstrid += sarry->sctps_earlyfrstrid; - sb.sctps_earlyfrstrout += sarry->sctps_earlyfrstrout; - sb.sctps_earlyfrstrtmr += sarry->sctps_earlyfrstrtmr; sb.sctps_hdrops += sarry->sctps_hdrops; sb.sctps_badsum += sarry->sctps_badsum; sb.sctps_noport += sarry->sctps_noport; @@ -788,6 +815,9 @@ sysctl_stat_get(SYSCTL_HANDLER_ARGS) sb.sctps_send_burst_avoid += sarry->sctps_send_burst_avoid; sb.sctps_send_cwnd_avoid += sarry->sctps_send_cwnd_avoid; sb.sctps_fwdtsn_map_over += sarry->sctps_fwdtsn_map_over; + if (cpin) { + memcpy(sarry, cpin, sizeof(struct sctpstat)); + } } error = SYSCTL_OUT(req, &sb, sizeof(sb)); return (error); @@ -812,297 +842,301 @@ sysctl_sctp_cleartrace(SYSCTL_HANDLER_ARGS) * sysctl definitions */ -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, sendspace, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, sendspace, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_sendspace), 0, sysctl_sctp_check, "IU", SCTPCTL_MAXDGRAM_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, recvspace, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, recvspace, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_recvspace), 0, sysctl_sctp_check, "IU", SCTPCTL_RECVSPACE_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, auto_asconf, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, auto_asconf, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_auto_asconf), 0, sysctl_sctp_check, "IU", SCTPCTL_AUTOASCONF_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, ecn_enable, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, ecn_enable, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_ecn_enable), 0, sysctl_sctp_check, "IU", SCTPCTL_ECN_ENABLE_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, ecn_nonce, CTLTYPE_INT | CTLFLAG_RW, - &SCTP_BASE_SYSCTL(sctp_ecn_nonce), 0, sysctl_sctp_check, "IU", - SCTPCTL_ECN_NONCE_DESC); - -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, strict_sacks, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, strict_sacks, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_strict_sacks), 0, sysctl_sctp_check, "IU", SCTPCTL_STRICT_SACKS_DESC); -#if !defined(SCTP_WITH_NO_CSUM) -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, loopback_nocsum, CTLTYPE_INT | CTLFLAG_RW, - &SCTP_BASE_SYSCTL(sctp_no_csum_on_loopback), 0, sysctl_sctp_check, "IU", - SCTPCTL_LOOPBACK_NOCSUM_DESC); -#endif - -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, strict_init, CTLTYPE_INT | CTLFLAG_RW, - &SCTP_BASE_SYSCTL(sctp_strict_init), 0, sysctl_sctp_check, "IU", - SCTPCTL_STRICT_INIT_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, peer_chkoh, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, peer_chkoh, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_peer_chunk_oh), 0, sysctl_sctp_check, "IU", SCTPCTL_PEER_CHKOH_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, maxburst, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, maxburst, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_max_burst_default), 0, sysctl_sctp_check, "IU", SCTPCTL_MAXBURST_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, maxchunks, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, fr_maxburst, CTLTYPE_UINT | CTLFLAG_RW, + &SCTP_BASE_SYSCTL(sctp_fr_max_burst_default), 0, sysctl_sctp_check, "IU", + SCTPCTL_FRMAXBURST_DESC); + +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, maxchunks, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue), 0, sysctl_sctp_check, "IU", SCTPCTL_MAXCHUNKS_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, tcbhashsize, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, tcbhashsize, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_hashtblsize), 0, sysctl_sctp_check, "IU", SCTPCTL_TCBHASHSIZE_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, pcbhashsize, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, pcbhashsize, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_pcbtblsize), 0, sysctl_sctp_check, "IU", SCTPCTL_PCBHASHSIZE_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, min_split_point, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, min_split_point, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_min_split_point), 0, sysctl_sctp_check, "IU", SCTPCTL_MIN_SPLIT_POINT_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, chunkscale, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, chunkscale, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_chunkscale), 0, sysctl_sctp_check, "IU", SCTPCTL_CHUNKSCALE_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, delayed_sack_time, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, delayed_sack_time, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_delayed_sack_time_default), 0, sysctl_sctp_check, "IU", SCTPCTL_DELAYED_SACK_TIME_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, sack_freq, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, sack_freq, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_sack_freq_default), 0, sysctl_sctp_check, "IU", SCTPCTL_SACK_FREQ_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, sys_resource, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, sys_resource, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_system_free_resc_limit), 0, sysctl_sctp_check, "IU", SCTPCTL_SYS_RESOURCE_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, asoc_resource, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, asoc_resource, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_asoc_free_resc_limit), 0, sysctl_sctp_check, "IU", SCTPCTL_ASOC_RESOURCE_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, heartbeat_interval, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, heartbeat_interval, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_heartbeat_interval_default), 0, sysctl_sctp_check, "IU", SCTPCTL_HEARTBEAT_INTERVAL_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, pmtu_raise_time, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, pmtu_raise_time, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_pmtu_raise_time_default), 0, sysctl_sctp_check, "IU", SCTPCTL_PMTU_RAISE_TIME_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, shutdown_guard_time, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, shutdown_guard_time, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_shutdown_guard_time_default), 0, sysctl_sctp_check, "IU", SCTPCTL_SHUTDOWN_GUARD_TIME_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, secret_lifetime, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, secret_lifetime, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_secret_lifetime_default), 0, sysctl_sctp_check, "IU", SCTPCTL_SECRET_LIFETIME_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, rto_max, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rto_max, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_rto_max_default), 0, sysctl_sctp_check, "IU", SCTPCTL_RTO_MAX_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, rto_min, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rto_min, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_rto_min_default), 0, sysctl_sctp_check, "IU", SCTPCTL_RTO_MIN_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, rto_initial, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rto_initial, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_rto_initial_default), 0, sysctl_sctp_check, "IU", SCTPCTL_RTO_INITIAL_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, init_rto_max, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, init_rto_max, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_init_rto_max_default), 0, sysctl_sctp_check, "IU", SCTPCTL_INIT_RTO_MAX_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, valid_cookie_life, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, valid_cookie_life, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_valid_cookie_life_default), 0, sysctl_sctp_check, "IU", SCTPCTL_VALID_COOKIE_LIFE_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, init_rtx_max, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, init_rtx_max, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_init_rtx_max_default), 0, sysctl_sctp_check, "IU", SCTPCTL_INIT_RTX_MAX_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, assoc_rtx_max, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, assoc_rtx_max, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_assoc_rtx_max_default), 0, sysctl_sctp_check, "IU", SCTPCTL_ASSOC_RTX_MAX_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, path_rtx_max, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, path_rtx_max, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_path_rtx_max_default), 0, sysctl_sctp_check, "IU", SCTPCTL_PATH_RTX_MAX_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, add_more_on_output, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, path_pf_threshold, CTLTYPE_UINT | CTLFLAG_RW, + &SCTP_BASE_SYSCTL(sctp_path_pf_threshold), 0, sysctl_sctp_check, "IU", + SCTPCTL_PATH_PF_THRESHOLD_DESC); + +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, add_more_on_output, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_add_more_threshold), 0, sysctl_sctp_check, "IU", SCTPCTL_ADD_MORE_ON_OUTPUT_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, outgoing_streams, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, outgoing_streams, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default), 0, sysctl_sctp_check, "IU", SCTPCTL_OUTGOING_STREAMS_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, cmt_on_off, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, cmt_on_off, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_cmt_on_off), 0, sysctl_sctp_check, "IU", SCTPCTL_CMT_ON_OFF_DESC); -/* EY */ -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, nr_sack_on_off, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, nr_sack_on_off, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_nr_sack_on_off), 0, sysctl_sctp_check, "IU", SCTPCTL_NR_SACK_ON_OFF_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, cmt_use_dac, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, cmt_use_dac, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_cmt_use_dac), 0, sysctl_sctp_check, "IU", SCTPCTL_CMT_USE_DAC_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, cmt_pf, CTLTYPE_INT | CTLFLAG_RW, - &SCTP_BASE_SYSCTL(sctp_cmt_pf), 0, sysctl_sctp_check, "IU", - SCTPCTL_CMT_PF_DESC); - -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, cwnd_maxburst, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, cwnd_maxburst, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst), 0, sysctl_sctp_check, "IU", SCTPCTL_CWND_MAXBURST_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, early_fast_retran, CTLTYPE_INT | CTLFLAG_RW, - &SCTP_BASE_SYSCTL(sctp_early_fr), 0, sysctl_sctp_check, "IU", - SCTPCTL_EARLY_FAST_RETRAN_DESC); - -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, early_fast_retran_msec, CTLTYPE_INT | CTLFLAG_RW, - &SCTP_BASE_SYSCTL(sctp_early_fr_msec), 0, sysctl_sctp_check, "IU", - SCTPCTL_EARLY_FAST_RETRAN_MSEC_DESC); - -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, asconf_auth_nochk, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, asconf_auth_nochk, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk), 0, sysctl_sctp_check, "IU", SCTPCTL_ASCONF_AUTH_NOCHK_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, auth_disable, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, auth_disable, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_auth_disable), 0, sysctl_sctp_check, "IU", SCTPCTL_AUTH_DISABLE_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, nat_friendly, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, nat_friendly, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_nat_friendly), 0, sysctl_sctp_check, "IU", SCTPCTL_NAT_FRIENDLY_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, abc_l_var, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, abc_l_var, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_L2_abc_variable), 0, sysctl_sctp_check, "IU", SCTPCTL_ABC_L_VAR_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, max_chained_mbufs, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, max_chained_mbufs, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_mbuf_threshold_count), 0, sysctl_sctp_check, "IU", SCTPCTL_MAX_CHAINED_MBUFS_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, do_sctp_drain, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, do_sctp_drain, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_do_drain), 0, sysctl_sctp_check, "IU", SCTPCTL_DO_SCTP_DRAIN_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, hb_max_burst, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, hb_max_burst, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_hb_maxburst), 0, sysctl_sctp_check, "IU", SCTPCTL_HB_MAX_BURST_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, abort_at_limit, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, abort_at_limit, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit), 0, sysctl_sctp_check, "IU", SCTPCTL_ABORT_AT_LIMIT_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, strict_data_order, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, strict_data_order, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_strict_data_order), 0, sysctl_sctp_check, "IU", SCTPCTL_STRICT_DATA_ORDER_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, min_residual, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, min_residual, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_min_residual), 0, sysctl_sctp_check, "IU", SCTPCTL_MIN_RESIDUAL_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, max_retran_chunk, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, max_retran_chunk, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_max_retran_chunk), 0, sysctl_sctp_check, "IU", SCTPCTL_MAX_RETRAN_CHUNK_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, log_level, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, log_level, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_logging_level), 0, sysctl_sctp_check, "IU", SCTPCTL_LOGGING_LEVEL_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, default_cc_module, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, default_cc_module, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_default_cc_module), 0, sysctl_sctp_check, "IU", SCTPCTL_DEFAULT_CC_MODULE_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, default_frag_interleave, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, default_ss_module, CTLTYPE_UINT | CTLFLAG_RW, + &SCTP_BASE_SYSCTL(sctp_default_ss_module), 0, sysctl_sctp_check, "IU", + SCTPCTL_DEFAULT_SS_MODULE_DESC); + +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, default_frag_interleave, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_default_frag_interleave), 0, sysctl_sctp_check, "IU", SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, mobility_base, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, mobility_base, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_mobility_base), 0, sysctl_sctp_check, "IU", SCTPCTL_MOBILITY_BASE_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, mobility_fasthandoff, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, mobility_fasthandoff, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_mobility_fasthandoff), 0, sysctl_sctp_check, "IU", SCTPCTL_MOBILITY_FASTHANDOFF_DESC); #if defined(SCTP_LOCAL_TRACE_BUF) -SYSCTL_STRUCT(_net_inet_sctp, OID_AUTO, log, CTLFLAG_RD, +SYSCTL_VNET_STRUCT(_net_inet_sctp, OID_AUTO, log, CTLFLAG_RD, &SCTP_BASE_SYSCTL(sctp_log), sctp_log, "SCTP logging (struct sctp_log)"); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, clear_trace, CTLTYPE_OPAQUE | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, clear_trace, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_log), 0, sysctl_sctp_cleartrace, "IU", "Clear SCTP Logging buffer"); - - - #endif -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, udp_tunneling_for_client_enable, CTLTYPE_INT | CTLFLAG_RW, - &SCTP_BASE_SYSCTL(sctp_udp_tunneling_for_client_enable), 0, sysctl_sctp_check, "IU", - SCTPCTL_UDP_TUNNELING_FOR_CLIENT_ENABLE_DESC); - -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, udp_tunneling_port, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, udp_tunneling_port, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_udp_tunneling_port), 0, sysctl_sctp_udp_tunneling_check, "IU", SCTPCTL_UDP_TUNNELING_PORT_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, enable_sack_immediately, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, enable_sack_immediately, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_enable_sack_immediately), 0, sysctl_sctp_check, "IU", SCTPCTL_SACK_IMMEDIATELY_ENABLE_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, nat_friendly_init, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, nat_friendly_init, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly), 0, sysctl_sctp_check, "IU", SCTPCTL_NAT_FRIENDLY_INITS_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, vtag_time_wait, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, vtag_time_wait, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_vtag_time_wait), 0, sysctl_sctp_check, "IU", SCTPCTL_TIME_WAIT_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, buffer_splitting, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, buffer_splitting, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_buffer_splitting), 0, sysctl_sctp_check, "IU", SCTPCTL_BUFFER_SPLITTING_DESC); -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, initial_cwnd, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, initial_cwnd, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_initial_cwnd), 0, sysctl_sctp_check, "IU", SCTPCTL_INITIAL_CWND_DESC); +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rttvar_bw, CTLTYPE_UINT | CTLFLAG_RW, + &SCTP_BASE_SYSCTL(sctp_rttvar_bw), 0, sysctl_sctp_check, "IU", + SCTPCTL_RTTVAR_BW_DESC); + +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rttvar_rtt, CTLTYPE_UINT | CTLFLAG_RW, + &SCTP_BASE_SYSCTL(sctp_rttvar_rtt), 0, sysctl_sctp_check, "IU", + SCTPCTL_RTTVAR_RTT_DESC); + +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rttvar_eqret, CTLTYPE_UINT | CTLFLAG_RW, + &SCTP_BASE_SYSCTL(sctp_rttvar_eqret), 0, sysctl_sctp_check, "IU", + SCTPCTL_RTTVAR_EQRET_DESC); + +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rttvar_steady_step, CTLTYPE_UINT | CTLFLAG_RW, + &SCTP_BASE_SYSCTL(sctp_steady_step), 0, sysctl_sctp_check, "IU", + SCTPCTL_RTTVAR_STEADYS_DESC); + +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, use_dcccecn, CTLTYPE_UINT | CTLFLAG_RW, + &SCTP_BASE_SYSCTL(sctp_use_dccc_ecn), 0, sysctl_sctp_check, "IU", + SCTPCTL_RTTVAR_DCCCECN_DESC); + +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, blackhole, CTLTYPE_UINT | CTLFLAG_RW, + &SCTP_BASE_SYSCTL(sctp_blackhole), 0, sysctl_sctp_check, "IU", + SCTPCTL_BLACKHOLE_DESC); + #ifdef SCTP_DEBUG -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, debug, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, debug, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_debug_on), 0, sysctl_sctp_check, "IU", SCTPCTL_DEBUG_DESC); -#endif /* SCTP_DEBUG */ +#endif -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, output_unlocked, CTLTYPE_INT | CTLFLAG_RW, +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, output_unlocked, CTLTYPE_UINT | CTLFLAG_RW, &SCTP_BASE_SYSCTL(sctp_output_unlocked), 0, sysctl_sctp_check, "IU", SCTPCTL_OUTPUT_UNLOCKED_DESC); #endif + #if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT) -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, stats, - CTLTYPE_STRUCT | CTLFLAG_RD, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, stats, + CTLTYPE_STRUCT | CTLFLAG_RW, 0, 0, sysctl_stat_get, "S,sctpstat", "SCTP statistics (struct sctp_stat)"); #else -SYSCTL_STRUCT(_net_inet_sctp, OID_AUTO, stats, CTLFLAG_RW, +SYSCTL_VNET_STRUCT(_net_inet_sctp, OID_AUTO, stats, CTLFLAG_RW, &SCTP_BASE_STATS_SYSCTL, sctpstat, "SCTP statistics (struct sctp_stat)"); #endif -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, assoclist, CTLFLAG_RD, +SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, assoclist, CTLTYPE_OPAQUE | CTLFLAG_RD, 0, 0, sctp_assoclist, "S,xassoc", "List of active SCTP associations"); diff --git a/freebsd/sys/netinet/sctp_sysctl.h b/freebsd/sys/netinet/sctp_sysctl.h index 9dbc9ed2..4ec37157 100644 --- a/freebsd/sys/netinet/sctp_sysctl.h +++ b/freebsd/sys/netinet/sctp_sysctl.h @@ -1,15 +1,17 @@ /*- * Copyright (c) 2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -31,8 +33,8 @@ #include __FBSDID("$FreeBSD$"); -#ifndef __sctp_sysctl_h__ -#define __sctp_sysctl_h__ +#ifndef _NETINET_SCTP_SYSCTL_H_ +#define _NETINET_SCTP_SYSCTL_H_ #include #include @@ -43,12 +45,8 @@ struct sctp_sysctl { uint32_t sctp_auto_asconf; uint32_t sctp_multiple_asconfs; uint32_t sctp_ecn_enable; - uint32_t sctp_ecn_nonce; + uint32_t sctp_fr_max_burst_default; uint32_t sctp_strict_sacks; -#if !defined(SCTP_WITH_NO_CSUM) - uint32_t sctp_no_csum_on_loopback; -#endif - uint32_t sctp_strict_init; uint32_t sctp_peer_chunk_oh; uint32_t sctp_max_burst_default; uint32_t sctp_max_chunks_on_queue; @@ -72,16 +70,14 @@ struct sctp_sysctl { uint32_t sctp_init_rtx_max_default; uint32_t sctp_assoc_rtx_max_default; uint32_t sctp_path_rtx_max_default; + uint32_t sctp_path_pf_threshold; uint32_t sctp_add_more_threshold; uint32_t sctp_nr_outgoing_streams_default; uint32_t sctp_cmt_on_off; uint32_t sctp_cmt_use_dac; /* EY 5/5/08 - nr_sack flag variable */ uint32_t sctp_nr_sack_on_off; - uint32_t sctp_cmt_pf; uint32_t sctp_use_cwnd_based_maxburst; - uint32_t sctp_early_fr; - uint32_t sctp_early_fr_msec; uint32_t sctp_asconf_auth_nochk; uint32_t sctp_auth_disable; uint32_t sctp_nat_friendly; @@ -96,19 +92,26 @@ struct sctp_sysctl { uint32_t sctp_logging_level; /* JRS - Variable for default congestion control module */ uint32_t sctp_default_cc_module; + /* RS - Variable for default stream scheduling module */ + uint32_t sctp_default_ss_module; uint32_t sctp_default_frag_interleave; uint32_t sctp_mobility_base; uint32_t sctp_mobility_fasthandoff; uint32_t sctp_inits_include_nat_friendly; + uint32_t sctp_rttvar_bw; + uint32_t sctp_rttvar_rtt; + uint32_t sctp_rttvar_eqret; + uint32_t sctp_steady_step; + uint32_t sctp_use_dccc_ecn; #if defined(SCTP_LOCAL_TRACE_BUF) struct sctp_log sctp_log; #endif - uint32_t sctp_udp_tunneling_for_client_enable; uint32_t sctp_udp_tunneling_port; uint32_t sctp_enable_sack_immediately; uint32_t sctp_vtag_time_wait; uint32_t sctp_buffer_splitting; uint32_t sctp_initial_cwnd; + uint32_t sctp_blackhole; #if defined(SCTP_DEBUG) uint32_t sctp_debug_on; #endif @@ -150,12 +153,6 @@ struct sctp_sysctl { #define SCTPCTL_ECN_ENABLE_MAX 1 #define SCTPCTL_ECN_ENABLE_DEFAULT 1 -/* ecn_nonce: Enable SCTP ECN Nonce */ -#define SCTPCTL_ECN_NONCE_DESC "Enable SCTP ECN Nonce" -#define SCTPCTL_ECN_NONCE_MIN 0 -#define SCTPCTL_ECN_NONCE_MAX 1 -#define SCTPCTL_ECN_NONCE_DEFAULT 0 - /* strict_sacks: Enable SCTP Strict SACK checking */ #define SCTPCTL_STRICT_SACKS_DESC "Enable SCTP Strict SACK checking" #define SCTPCTL_STRICT_SACKS_MIN 0 @@ -168,12 +165,6 @@ struct sctp_sysctl { #define SCTPCTL_LOOPBACK_NOCSUM_MAX 1 #define SCTPCTL_LOOPBACK_NOCSUM_DEFAULT 1 -/* strict_init: Enable strict INIT/INIT-ACK singleton enforcement */ -#define SCTPCTL_STRICT_INIT_DESC "Enable strict INIT/INIT-ACK singleton enforcement" -#define SCTPCTL_STRICT_INIT_MIN 0 -#define SCTPCTL_STRICT_INIT_MAX 1 -#define SCTPCTL_STRICT_INIT_DEFAULT 1 - /* peer_chkoh: Amount to debit peers rwnd per chunk sent */ #define SCTPCTL_PEER_CHKOH_DESC "Amount to debit peers rwnd per chunk sent" #define SCTPCTL_PEER_CHKOH_MIN 0 @@ -182,23 +173,30 @@ struct sctp_sysctl { /* maxburst: Default max burst for sctp endpoints */ #define SCTPCTL_MAXBURST_DESC "Default max burst for sctp endpoints" -#define SCTPCTL_MAXBURST_MIN 1 +#define SCTPCTL_MAXBURST_MIN 0 #define SCTPCTL_MAXBURST_MAX 0xFFFFFFFF #define SCTPCTL_MAXBURST_DEFAULT SCTP_DEF_MAX_BURST +/* fr_maxburst: Default max burst for sctp endpoints when fast retransmitting */ +#define SCTPCTL_FRMAXBURST_DESC "Default fr max burst for sctp endpoints" +#define SCTPCTL_FRMAXBURST_MIN 0 +#define SCTPCTL_FRMAXBURST_MAX 0xFFFFFFFF +#define SCTPCTL_FRMAXBURST_DEFAULT SCTP_DEF_FRMAX_BURST + + /* maxchunks: Default max chunks on queue per asoc */ #define SCTPCTL_MAXCHUNKS_DESC "Default max chunks on queue per asoc" #define SCTPCTL_MAXCHUNKS_MIN 0 #define SCTPCTL_MAXCHUNKS_MAX 0xFFFFFFFF #define SCTPCTL_MAXCHUNKS_DEFAULT SCTP_ASOC_MAX_CHUNKS_ON_QUEUE -/* tcbhashsize: Tuneable for Hash table sizes */ +/* tcbhashsize: Tunable for Hash table sizes */ #define SCTPCTL_TCBHASHSIZE_DESC "Tunable for TCB hash table sizes" #define SCTPCTL_TCBHASHSIZE_MIN 1 #define SCTPCTL_TCBHASHSIZE_MAX 0xFFFFFFFF #define SCTPCTL_TCBHASHSIZE_DEFAULT SCTP_TCBHASHSIZE -/* pcbhashsize: Tuneable for PCB Hash table sizes */ +/* pcbhashsize: Tunable for PCB Hash table sizes */ #define SCTPCTL_PCBHASHSIZE_DESC "Tunable for PCB hash table sizes" #define SCTPCTL_PCBHASHSIZE_MIN 1 #define SCTPCTL_PCBHASHSIZE_MAX 0xFFFFFFFF @@ -210,14 +208,14 @@ struct sctp_sysctl { #define SCTPCTL_MIN_SPLIT_POINT_MAX 0xFFFFFFFF #define SCTPCTL_MIN_SPLIT_POINT_DEFAULT SCTP_DEFAULT_SPLIT_POINT_MIN -/* chunkscale: Tuneable for Scaling of number of chunks and messages */ -#define SCTPCTL_CHUNKSCALE_DESC "Tuneable for Scaling of number of chunks and messages" +/* chunkscale: Tunable for Scaling of number of chunks and messages */ +#define SCTPCTL_CHUNKSCALE_DESC "Tunable for Scaling of number of chunks and messages" #define SCTPCTL_CHUNKSCALE_MIN 1 #define SCTPCTL_CHUNKSCALE_MAX 0xFFFFFFFF #define SCTPCTL_CHUNKSCALE_DEFAULT SCTP_CHUNKQUEUE_SCALE -/* delayed_sack_time: Default delayed SACK timer in msec */ -#define SCTPCTL_DELAYED_SACK_TIME_DESC "Default delayed SACK timer in msec" +/* delayed_sack_time: Default delayed SACK timer in ms */ +#define SCTPCTL_DELAYED_SACK_TIME_DESC "Default delayed SACK timer in ms" #define SCTPCTL_DELAYED_SACK_TIME_MIN 0 #define SCTPCTL_DELAYED_SACK_TIME_MAX 0xFFFFFFFF #define SCTPCTL_DELAYED_SACK_TIME_DEFAULT SCTP_RECV_MSEC @@ -240,56 +238,56 @@ struct sctp_sysctl { #define SCTPCTL_ASOC_RESOURCE_MAX 0xFFFFFFFF #define SCTPCTL_ASOC_RESOURCE_DEFAULT SCTP_DEF_ASOC_RESC_LIMIT -/* heartbeat_interval: Default heartbeat interval in msec */ -#define SCTPCTL_HEARTBEAT_INTERVAL_DESC "Default heartbeat interval in msec" +/* heartbeat_interval: Default heartbeat interval in ms */ +#define SCTPCTL_HEARTBEAT_INTERVAL_DESC "Default heartbeat interval in ms" #define SCTPCTL_HEARTBEAT_INTERVAL_MIN 0 #define SCTPCTL_HEARTBEAT_INTERVAL_MAX 0xFFFFFFFF #define SCTPCTL_HEARTBEAT_INTERVAL_DEFAULT SCTP_HB_DEFAULT_MSEC -/* pmtu_raise_time: Default PMTU raise timer in sec */ -#define SCTPCTL_PMTU_RAISE_TIME_DESC "Default PMTU raise timer in sec" +/* pmtu_raise_time: Default PMTU raise timer in seconds */ +#define SCTPCTL_PMTU_RAISE_TIME_DESC "Default PMTU raise timer in seconds" #define SCTPCTL_PMTU_RAISE_TIME_MIN 0 #define SCTPCTL_PMTU_RAISE_TIME_MAX 0xFFFFFFFF #define SCTPCTL_PMTU_RAISE_TIME_DEFAULT SCTP_DEF_PMTU_RAISE_SEC -/* shutdown_guard_time: Default shutdown guard timer in sec */ -#define SCTPCTL_SHUTDOWN_GUARD_TIME_DESC "Default shutdown guard timer in sec" +/* shutdown_guard_time: Default shutdown guard timer in seconds */ +#define SCTPCTL_SHUTDOWN_GUARD_TIME_DESC "Default shutdown guard timer in seconds" #define SCTPCTL_SHUTDOWN_GUARD_TIME_MIN 0 #define SCTPCTL_SHUTDOWN_GUARD_TIME_MAX 0xFFFFFFFF #define SCTPCTL_SHUTDOWN_GUARD_TIME_DEFAULT SCTP_DEF_MAX_SHUTDOWN_SEC -/* secret_lifetime: Default secret lifetime in sec */ -#define SCTPCTL_SECRET_LIFETIME_DESC "Default secret lifetime in sec" +/* secret_lifetime: Default secret lifetime in seconds */ +#define SCTPCTL_SECRET_LIFETIME_DESC "Default secret lifetime in seconds" #define SCTPCTL_SECRET_LIFETIME_MIN 0 #define SCTPCTL_SECRET_LIFETIME_MAX 0xFFFFFFFF #define SCTPCTL_SECRET_LIFETIME_DEFAULT SCTP_DEFAULT_SECRET_LIFE_SEC -/* rto_max: Default maximum retransmission timeout in msec */ -#define SCTPCTL_RTO_MAX_DESC "Default maximum retransmission timeout in msec" +/* rto_max: Default maximum retransmission timeout in ms */ +#define SCTPCTL_RTO_MAX_DESC "Default maximum retransmission timeout in ms" #define SCTPCTL_RTO_MAX_MIN 0 #define SCTPCTL_RTO_MAX_MAX 0xFFFFFFFF #define SCTPCTL_RTO_MAX_DEFAULT SCTP_RTO_UPPER_BOUND -/* rto_min: Default minimum retransmission timeout in msec */ -#define SCTPCTL_RTO_MIN_DESC "Default minimum retransmission timeout in msec" +/* rto_min: Default minimum retransmission timeout in ms */ +#define SCTPCTL_RTO_MIN_DESC "Default minimum retransmission timeout in ms" #define SCTPCTL_RTO_MIN_MIN 0 #define SCTPCTL_RTO_MIN_MAX 0xFFFFFFFF #define SCTPCTL_RTO_MIN_DEFAULT SCTP_RTO_LOWER_BOUND -/* rto_initial: Default initial retransmission timeout in msec */ -#define SCTPCTL_RTO_INITIAL_DESC "Default initial retransmission timeout in msec" +/* rto_initial: Default initial retransmission timeout in ms */ +#define SCTPCTL_RTO_INITIAL_DESC "Default initial retransmission timeout in ms" #define SCTPCTL_RTO_INITIAL_MIN 0 #define SCTPCTL_RTO_INITIAL_MAX 0xFFFFFFFF #define SCTPCTL_RTO_INITIAL_DEFAULT SCTP_RTO_INITIAL -/* init_rto_max: Default maximum retransmission timeout during association setup in msec */ -#define SCTPCTL_INIT_RTO_MAX_DESC "Default maximum retransmission timeout during association setup in msec" +/* init_rto_max: Default maximum retransmission timeout during association setup in ms */ +#define SCTPCTL_INIT_RTO_MAX_DESC "Default maximum retransmission timeout during association setup in ms" #define SCTPCTL_INIT_RTO_MAX_MIN 0 #define SCTPCTL_INIT_RTO_MAX_MAX 0xFFFFFFFF #define SCTPCTL_INIT_RTO_MAX_DEFAULT SCTP_RTO_UPPER_BOUND /* valid_cookie_life: Default cookie lifetime in sec */ -#define SCTPCTL_VALID_COOKIE_LIFE_DESC "Default cookie lifetime in sec" +#define SCTPCTL_VALID_COOKIE_LIFE_DESC "Default cookie lifetime in seconds" #define SCTPCTL_VALID_COOKIE_LIFE_MIN 0 #define SCTPCTL_VALID_COOKIE_LIFE_MAX 0xFFFFFFFF #define SCTPCTL_VALID_COOKIE_LIFE_DEFAULT SCTP_DEFAULT_COOKIE_LIFE @@ -312,8 +310,14 @@ struct sctp_sysctl { #define SCTPCTL_PATH_RTX_MAX_MAX 0xFFFFFFFF #define SCTPCTL_PATH_RTX_MAX_DEFAULT SCTP_DEF_MAX_PATH_RTX -/* add_more_on_output: When space wise is it worthwhile to try to add more to a socket send buffer */ -#define SCTPCTL_ADD_MORE_ON_OUTPUT_DESC "When space wise is it worthwhile to try to add more to a socket send buffer" +/* path_pf_threshold: threshold for considering the path potentially failed */ +#define SCTPCTL_PATH_PF_THRESHOLD_DESC "Default potentially failed threshold" +#define SCTPCTL_PATH_PF_THRESHOLD_MIN 0 +#define SCTPCTL_PATH_PF_THRESHOLD_MAX 0xFFFF +#define SCTPCTL_PATH_PF_THRESHOLD_DEFAULT SCTPCTL_PATH_PF_THRESHOLD_MAX + +/* add_more_on_output: When space-wise is it worthwhile to try to add more to a socket send buffer */ +#define SCTPCTL_ADD_MORE_ON_OUTPUT_DESC "When space-wise is it worthwhile to try to add more to a socket send buffer" #define SCTPCTL_ADD_MORE_ON_OUTPUT_MIN 0 #define SCTPCTL_ADD_MORE_ON_OUTPUT_MAX 0xFFFFFFFF #define SCTPCTL_ADD_MORE_ON_OUTPUT_DEFAULT SCTP_DEFAULT_ADD_MORE @@ -325,10 +329,10 @@ struct sctp_sysctl { #define SCTPCTL_OUTGOING_STREAMS_DEFAULT SCTP_OSTREAM_INITIAL /* cmt_on_off: CMT on/off flag */ -#define SCTPCTL_CMT_ON_OFF_DESC "CMT on/off flag" -#define SCTPCTL_CMT_ON_OFF_MIN 0 -#define SCTPCTL_CMT_ON_OFF_MAX 1 -#define SCTPCTL_CMT_ON_OFF_DEFAULT 0 +#define SCTPCTL_CMT_ON_OFF_DESC "CMT settings" +#define SCTPCTL_CMT_ON_OFF_MIN SCTP_CMT_OFF +#define SCTPCTL_CMT_ON_OFF_MAX SCTP_CMT_MAX +#define SCTPCTL_CMT_ON_OFF_DEFAULT SCTP_CMT_OFF /* EY - nr_sack_on_off: NR_SACK on/off flag */ #define SCTPCTL_NR_SACK_ON_OFF_DESC "NR_SACK on/off flag" @@ -342,30 +346,12 @@ struct sctp_sysctl { #define SCTPCTL_CMT_USE_DAC_MAX 1 #define SCTPCTL_CMT_USE_DAC_DEFAULT 0 -/* JRS 5/2107 - CMT PF type flag */ -#define SCTPCTL_CMT_PF_DESC "CMT PF type flag" -#define SCTPCTL_CMT_PF_MIN 0 -#define SCTPCTL_CMT_PF_MAX 2 -#define SCTPCTL_CMT_PF_DEFAULT 0 - /* cwnd_maxburst: Use a CWND adjusting maxburst */ #define SCTPCTL_CWND_MAXBURST_DESC "Use a CWND adjusting maxburst" #define SCTPCTL_CWND_MAXBURST_MIN 0 #define SCTPCTL_CWND_MAXBURST_MAX 1 #define SCTPCTL_CWND_MAXBURST_DEFAULT 1 -/* early_fast_retran: Early Fast Retransmit with timer */ -#define SCTPCTL_EARLY_FAST_RETRAN_DESC "Early Fast Retransmit with timer" -#define SCTPCTL_EARLY_FAST_RETRAN_MIN 0 -#define SCTPCTL_EARLY_FAST_RETRAN_MAX 0xFFFFFFFF -#define SCTPCTL_EARLY_FAST_RETRAN_DEFAULT 0 - -/* early_fast_retran_msec: Early Fast Retransmit minimum timer value */ -#define SCTPCTL_EARLY_FAST_RETRAN_MSEC_DESC "Early Fast Retransmit minimum timer value" -#define SCTPCTL_EARLY_FAST_RETRAN_MSEC_MIN 0 -#define SCTPCTL_EARLY_FAST_RETRAN_MSEC_MAX 0xFFFFFFFF -#define SCTPCTL_EARLY_FAST_RETRAN_MSEC_DEFAULT SCTP_MINFR_MSEC_TIMER - /* asconf_auth_nochk: Disable SCTP ASCONF AUTH requirement */ #define SCTPCTL_ASCONF_AUTH_NOCHK_DESC "Disable SCTP ASCONF AUTH requirement" #define SCTPCTL_ASCONF_AUTH_NOCHK_MIN 0 @@ -388,7 +374,7 @@ struct sctp_sysctl { #define SCTPCTL_ABC_L_VAR_DESC "SCTP ABC max increase per SACK (L)" #define SCTPCTL_ABC_L_VAR_MIN 0 #define SCTPCTL_ABC_L_VAR_MAX 0xFFFFFFFF -#define SCTPCTL_ABC_L_VAR_DEFAULT 1 +#define SCTPCTL_ABC_L_VAR_DEFAULT 2 /* max_chained_mbufs: Default max number of small mbufs on a chain */ #define SCTPCTL_MAX_CHAINED_MBUFS_DESC "Default max number of small mbufs on a chain" @@ -406,7 +392,7 @@ struct sctp_sysctl { #define SCTPCTL_HB_MAX_BURST_DESC "Confirmation Heartbeat max burst" #define SCTPCTL_HB_MAX_BURST_MIN 1 #define SCTPCTL_HB_MAX_BURST_MAX 0xFFFFFFFF -#define SCTPCTL_HB_MAX_BURST_DEFAULT SCTP_DEF_MAX_BURST +#define SCTPCTL_HB_MAX_BURST_DEFAULT SCTP_DEF_HBMAX_BURST /* abort_at_limit: When one-2-one hits qlimit abort */ #define SCTPCTL_ABORT_AT_LIMIT_DESC "When one-2-one hits qlimit abort" @@ -444,6 +430,12 @@ struct sctp_sysctl { #define SCTPCTL_DEFAULT_CC_MODULE_MAX 2 #define SCTPCTL_DEFAULT_CC_MODULE_DEFAULT 0 +/* RS - default stream scheduling module sysctl */ +#define SCTPCTL_DEFAULT_SS_MODULE_DESC "Default stream scheduling module" +#define SCTPCTL_DEFAULT_SS_MODULE_MIN 0 +#define SCTPCTL_DEFAULT_SS_MODULE_MAX 5 +#define SCTPCTL_DEFAULT_SS_MODULE_DEFAULT 0 + /* RRS - default fragment interleave */ #define SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DESC "Default fragment interleave level" #define SCTPCTL_DEFAULT_FRAG_INTERLEAVE_MIN 0 @@ -462,12 +454,6 @@ struct sctp_sysctl { #define SCTPCTL_MOBILITY_FASTHANDOFF_MAX 1 #define SCTPCTL_MOBILITY_FASTHANDOFF_DEFAULT SCTP_DEFAULT_MOBILITY_FASTHANDOFF -/* Enable SCTP/UDP tunneling for clients*/ -#define SCTPCTL_UDP_TUNNELING_FOR_CLIENT_ENABLE_DESC "Enable SCTP/UDP tunneling for client" -#define SCTPCTL_UDP_TUNNELING_FOR_CLIENT_ENABLE_MIN 0 -#define SCTPCTL_UDP_TUNNELING_FOR_CLIENT_ENABLE_MAX 1 -#define SCTPCTL_UDP_TUNNELING_FOR_CLIENT_ENABLE_DEFAULT SCTPCTL_UDP_TUNNELING_FOR_CLIENT_ENABLE_MIN - /* Enable SCTP/UDP tunneling port */ #define SCTPCTL_UDP_TUNNELING_PORT_DESC "Set the SCTP/UDP tunneling port" #define SCTPCTL_UDP_TUNNELING_PORT_MIN 0 @@ -500,10 +486,42 @@ struct sctp_sysctl { /* Initial congestion window in MTU */ #define SCTPCTL_INITIAL_CWND_DESC "Initial congestion window in MTUs" -#define SCTPCTL_INITIAL_CWND_MIN 1 +#define SCTPCTL_INITIAL_CWND_MIN 0 #define SCTPCTL_INITIAL_CWND_MAX 0xffffffff #define SCTPCTL_INITIAL_CWND_DEFAULT 3 +/* rttvar smooth avg for bw calc */ +#define SCTPCTL_RTTVAR_BW_DESC "Shift amount for bw smoothing on rtt calc" +#define SCTPCTL_RTTVAR_BW_MIN 0 +#define SCTPCTL_RTTVAR_BW_MAX 32 +#define SCTPCTL_RTTVAR_BW_DEFAULT 4 + +/* rttvar smooth avg for bw calc */ +#define SCTPCTL_RTTVAR_RTT_DESC "Shift amount for rtt smoothing on rtt calc" +#define SCTPCTL_RTTVAR_RTT_MIN 0 +#define SCTPCTL_RTTVAR_RTT_MAX 32 +#define SCTPCTL_RTTVAR_RTT_DEFAULT 5 + +#define SCTPCTL_RTTVAR_EQRET_DESC "What to return when rtt and bw are unchanged" +#define SCTPCTL_RTTVAR_EQRET_MIN 0 +#define SCTPCTL_RTTVAR_EQRET_MAX 1 +#define SCTPCTL_RTTVAR_EQRET_DEFAULT 0 + +#define SCTPCTL_RTTVAR_STEADYS_DESC "How many the sames it takes to try step down of cwnd" +#define SCTPCTL_RTTVAR_STEADYS_MIN 0 +#define SCTPCTL_RTTVAR_STEADYS_MAX 0xFFFF +#define SCTPCTL_RTTVAR_STEADYS_DEFAULT 20 /* 0 means disable feature */ + +#define SCTPCTL_RTTVAR_DCCCECN_DESC "Enable for RTCC CC datacenter ECN" +#define SCTPCTL_RTTVAR_DCCCECN_MIN 0 +#define SCTPCTL_RTTVAR_DCCCECN_MAX 1 +#define SCTPCTL_RTTVAR_DCCCECN_DEFAULT 1 /* 0 means disable feature */ + +#define SCTPCTL_BLACKHOLE_DESC "Enable SCTP blackholing" +#define SCTPCTL_BLACKHOLE_MIN 0 +#define SCTPCTL_BLACKHOLE_MAX 2 +#define SCTPCTL_BLACKHOLE_DEFAULT SCTPCTL_BLACKHOLE_MIN + #if defined(SCTP_DEBUG) /* debug: Configure debug output */ #define SCTPCTL_DEBUG_DESC "Configure debug output" @@ -513,7 +531,7 @@ struct sctp_sysctl { #endif -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) #define SCTPCTL_OUTPUT_UNLOCKED_DESC "Unlock socket when sending packets down to IP." #define SCTPCTL_OUTPUT_UNLOCKED_MIN 0 #define SCTPCTL_OUTPUT_UNLOCKED_MAX 1 diff --git a/freebsd/sys/netinet/sctp_timer.c b/freebsd/sys/netinet/sctp_timer.c index 4fbded33..950d3f71 100644 --- a/freebsd/sys/netinet/sctp_timer.c +++ b/freebsd/sys/netinet/sctp_timer.c @@ -2,16 +2,18 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -30,8 +32,6 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp_timer.c,v 1.29 2005/03/06 16:04:18 itojun Exp $ */ - #include __FBSDID("$FreeBSD$"); @@ -54,105 +54,6 @@ __FBSDID("$FreeBSD$"); #include -void -sctp_early_fr_timer(struct sctp_inpcb *inp, - struct sctp_tcb *stcb, - struct sctp_nets *net) -{ - struct sctp_tmit_chunk *chk, *tp2; - struct timeval now, min_wait, tv; - unsigned int cur_rtt, cnt = 0, cnt_resend = 0; - - /* an early FR is occuring. */ - (void)SCTP_GETTIME_TIMEVAL(&now); - /* get cur rto in micro-seconds */ - if (net->lastsa == 0) { - /* Hmm no rtt estimate yet? */ - cur_rtt = stcb->asoc.initial_rto >> 2; - } else { - - cur_rtt = ((net->lastsa >> 2) + net->lastsv) >> 1; - } - if (cur_rtt < SCTP_BASE_SYSCTL(sctp_early_fr_msec)) { - cur_rtt = SCTP_BASE_SYSCTL(sctp_early_fr_msec); - } - cur_rtt *= 1000; - tv.tv_sec = cur_rtt / 1000000; - tv.tv_usec = cur_rtt % 1000000; - min_wait = now; - timevalsub(&min_wait, &tv); - if (min_wait.tv_sec < 0 || min_wait.tv_usec < 0) { - /* - * if we hit here, we don't have enough seconds on the clock - * to account for the RTO. We just let the lower seconds be - * the bounds and don't worry about it. This may mean we - * will mark a lot more than we should. - */ - min_wait.tv_sec = min_wait.tv_usec = 0; - } - chk = TAILQ_LAST(&stcb->asoc.sent_queue, sctpchunk_listhead); - for (; chk != NULL; chk = tp2) { - tp2 = TAILQ_PREV(chk, sctpchunk_listhead, sctp_next); - if (chk->whoTo != net) { - continue; - } - if (chk->sent == SCTP_DATAGRAM_RESEND) - cnt_resend++; - else if ((chk->sent > SCTP_DATAGRAM_UNSENT) && - (chk->sent < SCTP_DATAGRAM_RESEND)) { - /* pending, may need retran */ - if (chk->sent_rcv_time.tv_sec > min_wait.tv_sec) { - /* - * we have reached a chunk that was sent - * some seconds past our min.. forget it we - * will find no more to send. - */ - continue; - } else if (chk->sent_rcv_time.tv_sec == min_wait.tv_sec) { - /* - * we must look at the micro seconds to - * know. - */ - if (chk->sent_rcv_time.tv_usec >= min_wait.tv_usec) { - /* - * ok it was sent after our boundary - * time. - */ - continue; - } - } - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_EARLYFR_LOGGING_ENABLE) { - sctp_log_fr(chk->rec.data.TSN_seq, chk->snd_count, - 4, SCTP_FR_MARKED_EARLY); - } - SCTP_STAT_INCR(sctps_earlyfrmrkretrans); - chk->sent = SCTP_DATAGRAM_RESEND; - sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt); - /* double book size since we are doing an early FR */ - chk->book_size_scale++; - cnt += chk->send_size; - if ((cnt + net->flight_size) > net->cwnd) { - /* Mark all we could possibly resend */ - break; - } - } - } - if (cnt) { - /* - * JRS - Use the congestion control given in the congestion - * control module - */ - stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer(inp, stcb, net); - } else if (cnt_resend) { - sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED); - } - /* Restart it? */ - if (net->flight_size < net->cwnd) { - SCTP_STAT_INCR(sctps_earlyfrstrtmr); - sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); - } -} - void sctp_audit_retranmission_queue(struct sctp_association *asoc) { @@ -191,50 +92,28 @@ sctp_threshold_management(struct sctp_inpcb *inp, struct sctp_tcb *stcb, if (net) { net->error_count++; SCTPDBG(SCTP_DEBUG_TIMER4, "Error count for %p now %d thresh:%d\n", - net, net->error_count, + (void *)net, net->error_count, net->failure_threshold); if (net->error_count > net->failure_threshold) { /* We had a threshold failure */ if (net->dest_state & SCTP_ADDR_REACHABLE) { net->dest_state &= ~SCTP_ADDR_REACHABLE; - net->dest_state |= SCTP_ADDR_NOT_REACHABLE; net->dest_state &= ~SCTP_ADDR_REQ_PRIMARY; - if (net == stcb->asoc.primary_destination) { - net->dest_state |= SCTP_ADDR_WAS_PRIMARY; - } - /* - * JRS 5/14/07 - If a destination is - * unreachable, the PF bit is turned off. - * This allows an unambiguous use of the PF - * bit for destinations that are reachable - * but potentially failed. If the - * destination is set to the unreachable - * state, also set the destination to the PF - * state. - */ - /* - * Add debug message here if destination is - * not in PF state. - */ - /* Stop any running T3 timers here? */ - if ((stcb->asoc.sctp_cmt_on_off == 1) && - (stcb->asoc.sctp_cmt_pf > 0)) { - net->dest_state &= ~SCTP_ADDR_PF; - SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from PF to unreachable.\n", - net); - } + net->dest_state &= ~SCTP_ADDR_PF; sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, - stcb, - SCTP_FAILED_THRESHOLD, + stcb, 0, (void *)net, SCTP_SO_NOT_LOCKED); } + } else if ((net->pf_threshold < net->failure_threshold) && + (net->error_count > net->pf_threshold)) { + if (!(net->dest_state & SCTP_ADDR_PF)) { + net->dest_state |= SCTP_ADDR_PF; + net->last_active = sctp_get_tick_count(); + sctp_send_hb(stcb, net, SCTP_SO_NOT_LOCKED); + sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_TIMER + SCTP_LOC_3); + sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net); + } } - /*********HOLD THIS COMMENT FOR PATCH OF ALTERNATE - *********ROUTING CODE - */ - /*********HOLD THIS COMMENT FOR END OF PATCH OF ALTERNATE - *********ROUTING CODE - */ } if (stcb == NULL) return (0); @@ -261,7 +140,7 @@ sctp_threshold_management(struct sctp_inpcb *inp, struct sctp_tcb *stcb, stcb->asoc.overall_error_count++; } SCTPDBG(SCTP_DEBUG_TIMER4, "Overall error count for %p now %d thresh:%u state:%x\n", - &stcb->asoc, stcb->asoc.overall_error_count, + (void *)&stcb->asoc, stcb->asoc.overall_error_count, (uint32_t) threshold, ((net == NULL) ? (uint32_t) 0 : (uint32_t) net->dest_state)); /* @@ -287,7 +166,7 @@ sctp_threshold_management(struct sctp_inpcb *inp, struct sctp_tcb *stcb, *ippp = htonl(SCTP_FROM_SCTP_TIMER + SCTP_LOC_1); } inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_1; - sctp_abort_an_association(inp, stcb, SCTP_FAILED_THRESHOLD, oper, SCTP_SO_NOT_LOCKED); + sctp_abort_an_association(inp, stcb, oper, SCTP_SO_NOT_LOCKED); return (1); } return (0); @@ -409,31 +288,10 @@ sctp_find_alternate_net(struct sctp_tcb *stcb, } } } - /* - * JRS 5/14/07 - After all destination have been considered - * as alternates, check to see if there was some active - * destination (not in PF state). If not, check to see if - * there was some PF destination with the minimum number of - * errors. If not, return the original destination. If - * there is a min_errors_net, remove the PF flag from that - * destination, set the cwnd to one or two MTUs, and return - * the destination as an alt. If there was some active - * destination with a highest cwnd, return the destination - * as an alt. - */ if (max_cwnd_net == NULL) { if (min_errors_net == NULL) { return (net); } - min_errors_net->dest_state &= ~SCTP_ADDR_PF; - min_errors_net->cwnd = min_errors_net->mtu * stcb->asoc.sctp_cmt_pf; - if (SCTP_OS_TIMER_PENDING(&min_errors_net->rxt_timer.timer)) { - sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, - stcb, min_errors_net, - SCTP_FROM_SCTP_TIMER + SCTP_LOC_2); - } - SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from PF to active with %d errors.\n", - min_errors_net, min_errors_net->error_count); return (min_errors_net); } else { return (max_cwnd_net); @@ -576,20 +434,19 @@ sctp_backoff_on_timeout(struct sctp_tcb *stcb, static void sctp_recover_sent_list(struct sctp_tcb *stcb) { - struct sctp_tmit_chunk *chk, *tp2; + struct sctp_tmit_chunk *chk, *nchk; struct sctp_association *asoc; asoc = &stcb->asoc; - chk = TAILQ_FIRST(&stcb->asoc.sent_queue); - for (; chk != NULL; chk = tp2) { - tp2 = TAILQ_NEXT(chk, sctp_next); - if ((compare_with_wrap(stcb->asoc.last_acked_seq, - chk->rec.data.TSN_seq, - MAX_TSN)) || - (stcb->asoc.last_acked_seq == chk->rec.data.TSN_seq)) { - + TAILQ_FOREACH_SAFE(chk, &asoc->sent_queue, sctp_next, nchk) { + if (SCTP_TSN_GE(asoc->last_acked_seq, chk->rec.data.TSN_seq)) { SCTP_PRINTF("Found chk:%p tsn:%x <= last_acked_seq:%x\n", - chk, chk->rec.data.TSN_seq, stcb->asoc.last_acked_seq); + (void *)chk, chk->rec.data.TSN_seq, asoc->last_acked_seq); + if (chk->sent != SCTP_DATAGRAM_NR_ACKED) { + if (asoc->strmout[chk->rec.data.stream_number].chunks_on_queues > 0) { + asoc->strmout[chk->rec.data.stream_number].chunks_on_queues--; + } + } TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next); if (chk->pr_sctp_on) { if (asoc->pr_sctp_cnt != 0) @@ -599,20 +456,18 @@ sctp_recover_sent_list(struct sctp_tcb *stcb) /* sa_ignore NO_NULL_CHK */ sctp_free_bufspace(stcb, asoc, chk, 1); sctp_m_freem(chk->data); + chk->data = NULL; if (asoc->peer_supports_prsctp && PR_SCTP_BUF_ENABLED(chk->flags)) { asoc->sent_queue_cnt_removeable--; } } - chk->data = NULL; asoc->sent_queue_cnt--; - sctp_free_a_chunk(stcb, chk); + sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); } } SCTP_PRINTF("after recover order is as follows\n"); - chk = TAILQ_FIRST(&stcb->asoc.sent_queue); - for (; chk != NULL; chk = tp2) { - tp2 = TAILQ_NEXT(chk, sctp_next); - SCTP_PRINTF("chk:%p TSN:%x\n", chk, chk->rec.data.TSN_seq); + TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) { + SCTP_PRINTF("chk:%p TSN:%x\n", (void *)chk, chk->rec.data.TSN_seq); } } @@ -633,10 +488,10 @@ sctp_mark_all_for_resend(struct sctp_tcb *stcb, * We only mark chunks that have been outstanding long enough to * have received feed-back. */ - struct sctp_tmit_chunk *chk, *tp2; + struct sctp_tmit_chunk *chk, *nchk; struct sctp_nets *lnets; struct timeval now, min_wait, tv; - int cur_rtt; + int cur_rto; int cnt_abandoned; int audit_tf, num_mk, fir; unsigned int cnt_mk; @@ -654,21 +509,18 @@ sctp_mark_all_for_resend(struct sctp_tcb *stcb, */ (void)SCTP_GETTIME_TIMEVAL(&now); /* get cur rto in micro-seconds */ - cur_rtt = (((net->lastsa >> 2) + net->lastsv) >> 1); - cur_rtt *= 1000; - if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) { - sctp_log_fr(cur_rtt, + cur_rto = (net->lastsa >> SCTP_RTT_SHIFT) + net->lastsv; + cur_rto *= 1000; + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) { + sctp_log_fr(cur_rto, stcb->asoc.peers_rwnd, window_probe, SCTP_FR_T3_MARK_TIME); - sctp_log_fr(net->flight_size, - SCTP_OS_TIMER_PENDING(&net->fr_timer.timer), - SCTP_OS_TIMER_ACTIVE(&net->fr_timer.timer), - SCTP_FR_CWND_REPORT); + sctp_log_fr(net->flight_size, 0, 0, SCTP_FR_CWND_REPORT); sctp_log_fr(net->flight_size, net->cwnd, stcb->asoc.total_flight, SCTP_FR_CWND_REPORT); } - tv.tv_sec = cur_rtt / 1000000; - tv.tv_usec = cur_rtt % 1000000; + tv.tv_sec = cur_rto / 1000000; + tv.tv_usec = cur_rto % 1000000; min_wait = now; timevalsub(&min_wait, &tv); if (min_wait.tv_sec < 0 || min_wait.tv_usec < 0) { @@ -680,8 +532,8 @@ sctp_mark_all_for_resend(struct sctp_tcb *stcb, */ min_wait.tv_sec = min_wait.tv_usec = 0; } - if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) { - sctp_log_fr(cur_rtt, now.tv_sec, now.tv_usec, SCTP_FR_T3_MARK_TIME); + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) { + sctp_log_fr(cur_rto, now.tv_sec, now.tv_usec, SCTP_FR_T3_MARK_TIME); sctp_log_fr(0, min_wait.tv_sec, min_wait.tv_usec, SCTP_FR_T3_MARK_TIME); } /* @@ -699,13 +551,8 @@ sctp_mark_all_for_resend(struct sctp_tcb *stcb, #ifndef INVARIANTS start_again: #endif - chk = TAILQ_FIRST(&stcb->asoc.sent_queue); - for (; chk != NULL; chk = tp2) { - tp2 = TAILQ_NEXT(chk, sctp_next); - if ((compare_with_wrap(stcb->asoc.last_acked_seq, - chk->rec.data.TSN_seq, - MAX_TSN)) || - (stcb->asoc.last_acked_seq == chk->rec.data.TSN_seq)) { + TAILQ_FOREACH_SAFE(chk, &stcb->asoc.sent_queue, sctp_next, nchk) { + if (SCTP_TSN_GE(stcb->asoc.last_acked_seq, chk->rec.data.TSN_seq)) { /* Strange case our list got out of order? */ SCTP_PRINTF("Our list is out of order? last_acked:%x chk:%x", (unsigned int)stcb->asoc.last_acked_seq, (unsigned int)chk->rec.data.TSN_seq); @@ -732,7 +579,7 @@ start_again: */ /* validate its been outstanding long enough */ - if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) { + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) { sctp_log_fr(chk->rec.data.TSN_seq, chk->sent_rcv_time.tv_sec, chk->sent_rcv_time.tv_usec, @@ -744,7 +591,7 @@ start_again: * some seconds past our min.. forget it we * will find no more to send. */ - if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) { + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) { sctp_log_fr(0, chk->sent_rcv_time.tv_sec, chk->sent_rcv_time.tv_usec, @@ -762,12 +609,6 @@ start_again: * ok it was sent after our boundary * time. */ - if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) { - sctp_log_fr(0, - chk->sent_rcv_time.tv_sec, - chk->sent_rcv_time.tv_usec, - SCTP_FR_T3_STOPPED); - } continue; } } @@ -778,7 +619,7 @@ start_again: if (chk->data) { (void)sctp_release_pr_sctp_chunk(stcb, chk, - (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT), + 1, SCTP_SO_NOT_LOCKED); cnt_abandoned++; } @@ -791,7 +632,7 @@ start_again: if (chk->data) { (void)sctp_release_pr_sctp_chunk(stcb, chk, - (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT), + 1, SCTP_SO_NOT_LOCKED); cnt_abandoned++; } @@ -806,7 +647,7 @@ start_again: tsnfirst = chk->rec.data.TSN_seq; } tsnlast = chk->rec.data.TSN_seq; - if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) { + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) { sctp_log_fr(chk->rec.data.TSN_seq, chk->snd_count, 0, SCTP_FR_T3_MARKED); } @@ -835,6 +676,12 @@ start_again: /* reset the TSN for striking and other FR stuff */ chk->rec.data.doing_fast_retransmit = 0; /* Clear any time so NO RTT is being done */ + + if (chk->do_rtt) { + if (chk->whoTo->rto_needed == 0) { + chk->whoTo->rto_needed = 1; + } + } chk->do_rtt = 0; if (alt != net) { sctp_free_remote_addr(chk->whoTo); @@ -852,7 +699,7 @@ start_again: /* * CMT: Do not allow FRs on retransmitted TSNs. */ - if (stcb->asoc.sctp_cmt_on_off == 1) { + if (stcb->asoc.sctp_cmt_on_off > 0) { chk->no_fr_allowed = 1; } #ifdef THIS_SHOULD_NOT_BE_DONE @@ -869,7 +716,7 @@ start_again: /* we did not subtract the same things? */ audit_tf = 1; } - if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) { + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) { sctp_log_fr(tsnfirst, tsnlast, num_mk, SCTP_FR_T3_TIMEOUT); } #ifdef SCTP_DEBUG @@ -927,7 +774,7 @@ start_again: if (audit_tf) { SCTPDBG(SCTP_DEBUG_TIMER4, "Audit total flight due to negative value net:%p\n", - net); + (void *)net); stcb->asoc.total_flight = 0; stcb->asoc.total_flight_count = 0; /* Clear all networks flight size */ @@ -935,7 +782,7 @@ start_again: lnets->flight_size = 0; SCTPDBG(SCTP_DEBUG_TIMER4, "Net:%p c-f cwnd:%d ssthresh:%d\n", - lnets, lnets->cwnd, lnets->ssthresh); + (void *)lnets, lnets->cwnd, lnets->ssthresh); } TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) { if (chk->sent < SCTP_DATAGRAM_RESEND) { @@ -951,19 +798,6 @@ start_again: } } } - /* - * Setup the ecn nonce re-sync point. We do this since - * retranmissions are NOT setup for ECN. This means that do to - * Karn's rule, we don't know the total of the peers ecn bits. - */ - chk = TAILQ_FIRST(&stcb->asoc.send_queue); - if (chk == NULL) { - stcb->asoc.nonce_resync_tsn = stcb->asoc.sending_seq; - } else { - stcb->asoc.nonce_resync_tsn = chk->rec.data.TSN_seq; - } - stcb->asoc.nonce_wait_for_ecne = 0; - stcb->asoc.nonce_sum_check = 0; /* We return 1 if we only have a window probe outstanding */ return (0); } @@ -1000,58 +834,6 @@ sctp_t3rxt_timer(struct sctp_inpcb *inp, win_probe = 0; } - /* - * JRS 5/14/07 - If CMT PF is on and the destination if not already - * in PF state, set the destination to PF state and store the - * current time as the time that the destination was last active. In - * addition, find an alternate destination with PF-based - * find_alt_net(). - */ - if ((stcb->asoc.sctp_cmt_on_off == 1) && - (stcb->asoc.sctp_cmt_pf > 0)) { - if ((net->dest_state & SCTP_ADDR_PF) != SCTP_ADDR_PF) { - net->dest_state |= SCTP_ADDR_PF; - net->last_active = sctp_get_tick_count(); - SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from active to PF.\n", - net); - } - alt = sctp_find_alternate_net(stcb, net, 2); - } else if (stcb->asoc.sctp_cmt_on_off == 1) { - /* - * CMT: Using RTX_SSTHRESH policy for CMT. If CMT is being - * used, then pick dest with largest ssthresh for any - * retransmission. - */ - alt = sctp_find_alternate_net(stcb, net, 1); - /* - * CUCv2: If a different dest is picked for the - * retransmission, then new (rtx-)pseudo_cumack needs to be - * tracked for orig dest. Let CUCv2 track new (rtx-) - * pseudo-cumack always. - */ - net->find_pseudo_cumack = 1; - net->find_rtx_pseudo_cumack = 1; - } else { /* CMT is OFF */ - alt = sctp_find_alternate_net(stcb, net, 0); - } - num_mk = 0; - num_abandoned = 0; - (void)sctp_mark_all_for_resend(stcb, net, alt, win_probe, - &num_mk, &num_abandoned); - /* FR Loss recovery just ended with the T3. */ - stcb->asoc.fast_retran_loss_recovery = 0; - - /* CMT FR loss recovery ended with the T3 */ - net->fast_retran_loss_recovery = 0; - - /* - * setup the sat loss recovery that prevents satellite cwnd advance. - */ - stcb->asoc.sat_t3_loss_recovery = 1; - stcb->asoc.sat_t3_recovery_tsn = stcb->asoc.sending_seq; - - /* Backoff the timer and cwnd */ - sctp_backoff_on_timeout(stcb, net, win_probe, num_mk, num_abandoned); if (win_probe == 0) { /* We don't do normal threshold management on window probes */ if (sctp_threshold_management(inp, stcb, net, @@ -1070,17 +852,15 @@ sctp_t3rxt_timer(struct sctp_inpcb *inp, } else { ms_goneby = 0; } - if ((ms_goneby > net->RTO) || (net->RTO == 0)) { - /* - * no recent feed back in an RTO or - * more, request a RTT update - */ - if (sctp_send_hb(stcb, 1, net) < 0) + if ((net->dest_state & SCTP_ADDR_PF) == 0) { + if ((ms_goneby > net->RTO) || (net->RTO == 0)) { /* - * Less than 0 means we lost - * the assoc + * no recent feed back in an + * RTO or more, request a + * RTT update */ - return (1); + sctp_send_hb(stcb, net, SCTP_SO_NOT_LOCKED); + } } } } @@ -1097,7 +877,52 @@ sctp_t3rxt_timer(struct sctp_inpcb *inp, return (1); } } - if (net->dest_state & SCTP_ADDR_NOT_REACHABLE) { + if (stcb->asoc.sctp_cmt_on_off > 0) { + if (net->pf_threshold < net->failure_threshold) { + alt = sctp_find_alternate_net(stcb, net, 2); + } else { + /* + * CMT: Using RTX_SSTHRESH policy for CMT. If CMT is + * being used, then pick dest with largest ssthresh + * for any retransmission. + */ + alt = sctp_find_alternate_net(stcb, net, 1); + /* + * CUCv2: If a different dest is picked for the + * retransmission, then new (rtx-)pseudo_cumack + * needs to be tracked for orig dest. Let CUCv2 + * track new (rtx-) pseudo-cumack always. + */ + net->find_pseudo_cumack = 1; + net->find_rtx_pseudo_cumack = 1; + } + } else { + alt = sctp_find_alternate_net(stcb, net, 0); + } + + num_mk = 0; + num_abandoned = 0; + (void)sctp_mark_all_for_resend(stcb, net, alt, win_probe, + &num_mk, &num_abandoned); + /* FR Loss recovery just ended with the T3. */ + stcb->asoc.fast_retran_loss_recovery = 0; + + /* CMT FR loss recovery ended with the T3 */ + net->fast_retran_loss_recovery = 0; + if ((stcb->asoc.cc_functions.sctp_cwnd_new_transmission_begins) && + (net->flight_size == 0)) { + (*stcb->asoc.cc_functions.sctp_cwnd_new_transmission_begins) (stcb, net); + } + /* + * setup the sat loss recovery that prevents satellite cwnd advance. + */ + stcb->asoc.sat_t3_loss_recovery = 1; + stcb->asoc.sat_t3_recovery_tsn = stcb->asoc.sending_seq; + + /* Backoff the timer and cwnd */ + sctp_backoff_on_timeout(stcb, net, win_probe, num_mk, num_abandoned); + if ((!(net->dest_state & SCTP_ADDR_REACHABLE)) || + (net->dest_state & SCTP_ADDR_PF)) { /* Move all pending over too */ sctp_move_chunks_from_net(stcb, net); @@ -1125,23 +950,12 @@ sctp_t3rxt_timer(struct sctp_inpcb *inp, * change-primary then this flag must be cleared * from any net structures. */ - if (sctp_set_primary_addr(stcb, - (struct sockaddr *)NULL, - alt) == 0) { - net->dest_state |= SCTP_ADDR_WAS_PRIMARY; + if (stcb->asoc.alternate) { + sctp_free_remote_addr(stcb->asoc.alternate); } + stcb->asoc.alternate = alt; + atomic_add_int(&stcb->asoc.alternate->ref_count, 1); } - } else if ((stcb->asoc.sctp_cmt_on_off == 1) && - (stcb->asoc.sctp_cmt_pf > 0) && - ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) { - /* - * JRS 5/14/07 - If the destination hasn't failed completely - * but is in PF state, a PF-heartbeat needs to be sent - * manually. - */ - if (sctp_send_hb(stcb, 1, net) < 0) - /* Return less than 0 means we lost the association */ - return (1); } /* * Special case for cookie-echo'ed case, we don't do output but must @@ -1160,13 +974,7 @@ sctp_t3rxt_timer(struct sctp_inpcb *inp, lchk = sctp_try_advance_peer_ack_point(stcb, &stcb->asoc); /* C3. See if we need to send a Fwd-TSN */ - if (compare_with_wrap(stcb->asoc.advanced_peer_ack_point, - stcb->asoc.last_acked_seq, MAX_TSN)) { - /* - * ISSUE with ECN, see FWD-TSN processing for notes - * on issues that will occur when the ECN NONCE - * stuff is put into SCTP for cross checking. - */ + if (SCTP_TSN_GT(stcb->asoc.advanced_peer_ack_point, stcb->asoc.last_acked_seq)) { send_forward_tsn(stcb, &stcb->asoc); if (lchk) { /* Assure a timer is up */ @@ -1231,7 +1039,7 @@ sctp_t1init_timer(struct sctp_inpcb *inp, int sctp_cookie_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb, - struct sctp_nets *net) + struct sctp_nets *net SCTP_UNUSED) { struct sctp_nets *alt; struct sctp_tmit_chunk *cookie; @@ -1262,8 +1070,7 @@ sctp_cookie_timer(struct sctp_inpcb *inp, *ippp = htonl(SCTP_FROM_SCTP_TIMER + SCTP_LOC_3); } inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_4; - sctp_abort_an_association(inp, stcb, SCTP_INTERNAL_ERROR, - oper, SCTP_SO_NOT_LOCKED); + sctp_abort_an_association(inp, stcb, oper, SCTP_SO_NOT_LOCKED); } else { #ifdef INVARIANTS panic("Cookie timer expires in wrong state?"); @@ -1349,7 +1156,7 @@ sctp_strreset_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb, atomic_add_int(&alt->ref_count, 1); } } - if (net->dest_state & SCTP_ADDR_NOT_REACHABLE) { + if (!(net->dest_state & SCTP_ADDR_REACHABLE)) { /* * If the address went un-reachable, we need to move to * alternates for ALL chk's in queue @@ -1371,7 +1178,7 @@ sctp_asconf_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets *net) { struct sctp_nets *alt; - struct sctp_tmit_chunk *asconf, *chk, *nchk; + struct sctp_tmit_chunk *asconf, *chk; /* is this a first send, or a retransmission? */ if (TAILQ_EMPTY(&stcb->asoc.asconf_send_queue)) { @@ -1429,8 +1236,7 @@ sctp_asconf_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb, atomic_add_int(&alt->ref_count, 1); } } - for (chk = asconf; chk; chk = nchk) { - nchk = TAILQ_NEXT(chk, sctp_next); + TAILQ_FOREACH(chk, &stcb->asoc.asconf_send_queue, sctp_next) { if (chk->whoTo != alt) { sctp_free_remote_addr(chk->whoTo); chk->whoTo = alt; @@ -1440,7 +1246,7 @@ sctp_asconf_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb, sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt); chk->sent = SCTP_DATAGRAM_RESEND; } - if (net->dest_state & SCTP_ADDR_NOT_REACHABLE) { + if (!(net->dest_state & SCTP_ADDR_REACHABLE)) { /* * If the address went un-reachable, we need to move * to the alternate for ALL chunks in queue @@ -1461,7 +1267,7 @@ sctp_asconf_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb, /* Mobility adaptation */ void sctp_delete_prim_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb, - struct sctp_nets *net) + struct sctp_nets *net SCTP_UNUSED) { if (stcb->asoc.deleted_primary == NULL) { SCTPDBG(SCTP_DEBUG_ASCONF1, "delete_prim_timer: deleted_primary is not stored...\n"); @@ -1532,9 +1338,8 @@ static void sctp_audit_stream_queues_for_size(struct sctp_inpcb *inp, struct sctp_tcb *stcb) { - struct sctp_stream_out *outs; struct sctp_stream_queue_pending *sp; - unsigned int chks_in_queue = 0; + unsigned int i, chks_in_queue = 0; int being_filled = 0; /* @@ -1548,32 +1353,21 @@ sctp_audit_stream_queues_for_size(struct sctp_inpcb *inp, stcb->asoc.sent_queue_retran_cnt); stcb->asoc.sent_queue_retran_cnt = 0; } - SCTP_TCB_SEND_LOCK(stcb); - if (TAILQ_EMPTY(&stcb->asoc.out_wheel)) { - int i, cnt = 0; - - /* Check to see if a spoke fell off the wheel */ - for (i = 0; i < stcb->asoc.streamoutcnt; i++) { - if (!TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) { - sctp_insert_on_wheel(stcb, &stcb->asoc, &stcb->asoc.strmout[i], 1); - cnt++; - } - } - if (cnt) { - /* yep, we lost a spoke or two */ - SCTP_PRINTF("Found an additional %d streams NOT on outwheel, corrected\n", cnt); + if (stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, &stcb->asoc)) { + /* No stream scheduler information, initialize scheduler */ + stcb->asoc.ss_functions.sctp_ss_init(stcb, &stcb->asoc, 0); + if (!stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, &stcb->asoc)) { + /* yep, we lost a stream or two */ + SCTP_PRINTF("Found additional streams NOT managed by scheduler, corrected\n"); } else { - /* no spokes lost, */ + /* no streams lost */ stcb->asoc.total_output_queue_size = 0; } - SCTP_TCB_SEND_UNLOCK(stcb); - return; } - SCTP_TCB_SEND_UNLOCK(stcb); /* Check to see if some data queued, if so report it */ - TAILQ_FOREACH(outs, &stcb->asoc.out_wheel, next_spoke) { - if (!TAILQ_EMPTY(&outs->outqueue)) { - TAILQ_FOREACH(sp, &outs->outqueue, next) { + for (i = 0; i < stcb->asoc.streamoutcnt; i++) { + if (!TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) { + TAILQ_FOREACH(sp, &stcb->asoc.strmout[i].outqueue, next) { if (sp->msg_is_complete) being_filled++; chks_in_queue++; @@ -1607,66 +1401,62 @@ sctp_audit_stream_queues_for_size(struct sctp_inpcb *inp, int sctp_heartbeat_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb, - struct sctp_nets *net, int cnt_of_unconf) + struct sctp_nets *net) { - int ret; + uint8_t net_was_pf; - if (net) { - if (net->hb_responded == 0) { - if (net->ro._s_addr) { - /* - * Invalidate the src address if we did not - * get a response last time. - */ - sctp_free_ifa(net->ro._s_addr); - net->ro._s_addr = NULL; - net->src_addr_selected = 0; - } - sctp_backoff_on_timeout(stcb, net, 1, 0, 0); + if (net->dest_state & SCTP_ADDR_PF) { + net_was_pf = 1; + } else { + net_was_pf = 0; + } + if (net->hb_responded == 0) { + if (net->ro._s_addr) { + /* + * Invalidate the src address if we did not get a + * response last time. + */ + sctp_free_ifa(net->ro._s_addr); + net->ro._s_addr = NULL; + net->src_addr_selected = 0; } - /* Zero PBA, if it needs it */ - if (net->partial_bytes_acked) { - net->partial_bytes_acked = 0; + sctp_backoff_on_timeout(stcb, net, 1, 0, 0); + if (sctp_threshold_management(inp, stcb, net, stcb->asoc.max_send_times)) { + /* Assoc is over */ + return (1); } } + /* Zero PBA, if it needs it */ + if (net->partial_bytes_acked) { + net->partial_bytes_acked = 0; + } if ((stcb->asoc.total_output_queue_size > 0) && (TAILQ_EMPTY(&stcb->asoc.send_queue)) && (TAILQ_EMPTY(&stcb->asoc.sent_queue))) { sctp_audit_stream_queues_for_size(inp, stcb); } - /* Send a new HB, this will do threshold managment, pick a new dest */ - if (cnt_of_unconf == 0) { - if (sctp_send_hb(stcb, 0, NULL) < 0) { - return (1); - } - } else { + if (!(net->dest_state & SCTP_ADDR_NOHB) && + !((net_was_pf == 0) && (net->dest_state & SCTP_ADDR_PF))) { /* - * this will send out extra hb's up to maxburst if there are - * any unconfirmed addresses. + * when move to PF during threshold mangement, a HB has been + * queued in that routine */ - uint32_t cnt_sent = 0; - - TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { - if ((net->dest_state & SCTP_ADDR_UNCONFIRMED) && - (net->dest_state & SCTP_ADDR_REACHABLE)) { - cnt_sent++; - if (net->hb_responded == 0) { - /* Did we respond last time? */ - if (net->ro._s_addr) { - sctp_free_ifa(net->ro._s_addr); - net->ro._s_addr = NULL; - net->src_addr_selected = 0; - } - } - ret = sctp_send_hb(stcb, 1, net); - if (ret < 0) - return 1; - else if (ret == 0) { - break; - } - if (cnt_sent >= SCTP_BASE_SYSCTL(sctp_hb_maxburst)) - break; - } + uint32_t ms_gone_by; + + if ((net->last_sent_time.tv_sec > 0) || + (net->last_sent_time.tv_usec > 0)) { + struct timeval diff; + + SCTP_GETTIME_TIMEVAL(&diff); + timevalsub(&diff, &net->last_sent_time); + ms_gone_by = (uint32_t) (diff.tv_sec * 1000) + + (uint32_t) (diff.tv_usec / 1000); + } else { + ms_gone_by = 0xffffffff; + } + if ((ms_gone_by >= net->heart_beat_delay) || + (net->dest_state & SCTP_ADDR_PF)) { + sctp_send_hb(stcb, net, SCTP_SO_NOT_LOCKED); } } return (0); @@ -1679,7 +1469,7 @@ sctp_pathmtu_timer(struct sctp_inpcb *inp, { uint32_t next_mtu, mtu; - next_mtu = sctp_get_next_mtu(inp, net->mtu); + next_mtu = sctp_get_next_mtu(net->mtu); if ((next_mtu > net->mtu) && (net->port == 0)) { if ((net->src_addr_selected == 0) || @@ -1770,19 +1560,27 @@ sctp_autoclose_timer(struct sctp_inpcb *inp, */ if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) { /* only send SHUTDOWN 1st time thru */ - sctp_send_shutdown(stcb, stcb->asoc.primary_destination); + struct sctp_nets *netp; + if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) || (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); } SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT); SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING); + sctp_stop_timers_for_shutdown(stcb); + if (stcb->asoc.alternate) { + netp = stcb->asoc.alternate; + } else { + netp = stcb->asoc.primary_destination; + } + sctp_send_shutdown(stcb, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, - asoc->primary_destination); + netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb, - asoc->primary_destination); + netp); } } } else { diff --git a/freebsd/sys/netinet/sctp_timer.h b/freebsd/sys/netinet/sctp_timer.h index a26ad728..fd9df804 100644 --- a/freebsd/sys/netinet/sctp_timer.h +++ b/freebsd/sys/netinet/sctp_timer.h @@ -1,15 +1,17 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -28,22 +30,17 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp_timer.h,v 1.6 2005/03/06 16:04:18 itojun Exp $ */ #include __FBSDID("$FreeBSD$"); -#ifndef __sctp_timer_h__ -#define __sctp_timer_h__ +#ifndef _NETINET_SCTP_TIMER_H_ +#define _NETINET_SCTP_TIMER_H_ #if defined(_KERNEL) || defined(__Userspace__) #define SCTP_RTT_SHIFT 3 #define SCTP_RTT_VAR_SHIFT 2 -void -sctp_early_fr_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb, - struct sctp_nets *net); - struct sctp_nets * sctp_find_alternate_net(struct sctp_tcb *, struct sctp_nets *, int mode); @@ -63,7 +60,7 @@ sctp_shutdown_timer(struct sctp_inpcb *, struct sctp_tcb *, struct sctp_nets *); int sctp_heartbeat_timer(struct sctp_inpcb *, struct sctp_tcb *, - struct sctp_nets *, int); + struct sctp_nets *); int sctp_cookie_timer(struct sctp_inpcb *, struct sctp_tcb *, diff --git a/freebsd/sys/netinet/sctp_uio.h b/freebsd/sys/netinet/sctp_uio.h index 4d72b1ca..d8e7da45 100644 --- a/freebsd/sys/netinet/sctp_uio.h +++ b/freebsd/sys/netinet/sctp_uio.h @@ -1,15 +1,17 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -28,12 +30,11 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp_uio.h,v 1.11 2005/03/06 16:04:18 itojun Exp $ */ #include __FBSDID("$FreeBSD$"); -#ifndef __sctp_uio_h__ -#define __sctp_uio_h__ +#ifndef _NETINET_SCTP_UIO_H_ +#define _NETINET_SCTP_UIO_H_ #if ! defined(_KERNEL) @@ -45,6 +46,16 @@ __FBSDID("$FreeBSD$"); typedef uint32_t sctp_assoc_t; +#define SCTP_FUTURE_ASSOC 0 +#define SCTP_CURRENT_ASSOC 1 +#define SCTP_ALL_ASSOC 2 + +struct sctp_event { + sctp_assoc_t se_assoc_id; + uint16_t se_type; + uint8_t se_on; +}; + /* Compatibility to previous define's */ #define sctp_stream_reset_events sctp_stream_reset_event @@ -67,6 +78,14 @@ struct sctp_event_subscribe { #define SCTP_INIT 0x0001 #define SCTP_SNDRCV 0x0002 #define SCTP_EXTRCV 0x0003 +#define SCTP_SNDINFO 0x0004 +#define SCTP_RCVINFO 0x0005 +#define SCTP_NXTINFO 0x0006 +#define SCTP_PRINFO 0x0007 +#define SCTP_AUTHINFO 0x0008 +#define SCTP_DSTADDRV4 0x0009 +#define SCTP_DSTADDRV6 0x000a + /* * ancillary data structures */ @@ -91,8 +110,8 @@ struct sctp_initmsg { */ -#define SCTP_ALIGN_RESV_PAD 96 -#define SCTP_ALIGN_RESV_PAD_SHORT 80 +#define SCTP_ALIGN_RESV_PAD 92 +#define SCTP_ALIGN_RESV_PAD_SHORT 76 struct sctp_sndrcvinfo { uint16_t sinfo_stream; @@ -104,6 +123,8 @@ struct sctp_sndrcvinfo { uint32_t sinfo_tsn; uint32_t sinfo_cumtsn; sctp_assoc_t sinfo_assoc_id; + uint16_t sinfo_keynumber; + uint16_t sinfo_keynumber_valid; uint8_t __reserve_pad[SCTP_ALIGN_RESV_PAD]; }; @@ -111,7 +132,6 @@ struct sctp_extrcvinfo { uint16_t sinfo_stream; uint16_t sinfo_ssn; uint16_t sinfo_flags; - uint16_t sinfo_pr_policy; uint32_t sinfo_ppid; uint32_t sinfo_context; uint32_t sinfo_timetolive; @@ -123,15 +143,86 @@ struct sctp_extrcvinfo { uint32_t sreinfo_next_aid; uint32_t sreinfo_next_length; uint32_t sreinfo_next_ppid; + uint16_t sinfo_keynumber; + uint16_t sinfo_keynumber_valid; uint8_t __reserve_pad[SCTP_ALIGN_RESV_PAD_SHORT]; }; +struct sctp_sndinfo { + uint16_t snd_sid; + uint16_t snd_flags; + uint32_t snd_ppid; + uint32_t snd_context; + sctp_assoc_t snd_assoc_id; +}; + +struct sctp_prinfo { + uint16_t pr_policy; + uint32_t pr_value; +}; + +struct sctp_default_prinfo { + uint16_t pr_policy; + uint32_t pr_value; + sctp_assoc_t pr_assoc_id; +}; + +struct sctp_authinfo { + uint16_t auth_keynumber; +}; + +struct sctp_rcvinfo { + uint16_t rcv_sid; + uint16_t rcv_ssn; + uint16_t rcv_flags; + uint32_t rcv_ppid; + uint32_t rcv_tsn; + uint32_t rcv_cumtsn; + uint32_t rcv_context; + sctp_assoc_t rcv_assoc_id; +}; + +struct sctp_nxtinfo { + uint16_t nxt_sid; + uint16_t nxt_flags; + uint32_t nxt_ppid; + uint32_t nxt_length; + sctp_assoc_t nxt_assoc_id; +}; + #define SCTP_NO_NEXT_MSG 0x0000 #define SCTP_NEXT_MSG_AVAIL 0x0001 #define SCTP_NEXT_MSG_ISCOMPLETE 0x0002 #define SCTP_NEXT_MSG_IS_UNORDERED 0x0004 #define SCTP_NEXT_MSG_IS_NOTIFICATION 0x0008 +struct sctp_recvv_rn { + struct sctp_rcvinfo recvv_rcvinfo; + struct sctp_nxtinfo recvv_nxtinfo; +}; + +#define SCTP_RECVV_NOINFO 0 +#define SCTP_RECVV_RCVINFO 1 +#define SCTP_RECVV_NXTINFO 2 +#define SCTP_RECVV_RN 3 + +#define SCTP_SENDV_NOINFO 0 +#define SCTP_SENDV_SNDINFO 1 +#define SCTP_SENDV_PRINFO 2 +#define SCTP_SENDV_AUTHINFO 3 +#define SCTP_SENDV_SPA 4 + +struct sctp_sendv_spa { + uint32_t sendv_flags; + struct sctp_sndinfo sendv_sndinfo; + struct sctp_prinfo sendv_prinfo; + struct sctp_authinfo sendv_authinfo; +}; + +#define SCTP_SEND_SNDINFO_VALID 0x00000001 +#define SCTP_SEND_PRINFO_VALID 0x00000002 +#define SCTP_SEND_AUTHINFO_VALID 0x00000004 + struct sctp_snd_all_completes { uint16_t sall_stream; uint16_t sall_flags; @@ -142,6 +233,8 @@ struct sctp_snd_all_completes { }; /* Flags that go into the sinfo->sinfo_flags field */ +#define SCTP_NOTIFICATION 0x0010 /* next message is a notification */ +#define SCTP_COMPLETE 0x0020 /* next message is complete */ #define SCTP_EOF 0x0100 /* Start shutdown procedures */ #define SCTP_ABORT 0x0200 /* Send an ABORT to peer */ #define SCTP_UNORDERED 0x0400 /* Message is un-ordered */ @@ -150,19 +243,20 @@ struct sctp_snd_all_completes { #define SCTP_EOR 0x2000 /* end of message signal */ #define SCTP_SACK_IMMEDIATELY 0x4000 /* Set I-Bit */ -#define INVALID_SINFO_FLAG(x) (((x) & 0xffffff00 \ +#define INVALID_SINFO_FLAG(x) (((x) & 0xfffffff0 \ & ~(SCTP_EOF | SCTP_ABORT | SCTP_UNORDERED |\ SCTP_ADDR_OVER | SCTP_SENDALL | SCTP_EOR |\ SCTP_SACK_IMMEDIATELY)) != 0) /* for the endpoint */ /* The lower byte is an enumeration of PR-SCTP policies */ +#define SCTP_PR_SCTP_NONE 0x0000/* Reliable transfer */ #define SCTP_PR_SCTP_TTL 0x0001/* Time based PR-SCTP */ #define SCTP_PR_SCTP_BUF 0x0002/* Buffer based PR-SCTP */ #define SCTP_PR_SCTP_RTX 0x0003/* Number of retransmissions based PR-SCTP */ -#define PR_SCTP_POLICY(x) ((x) & 0xff) -#define PR_SCTP_ENABLED(x) (PR_SCTP_POLICY(x) != 0) +#define PR_SCTP_POLICY(x) ((x) & 0x0f) +#define PR_SCTP_ENABLED(x) (PR_SCTP_POLICY(x) != SCTP_PR_SCTP_NONE) #define PR_SCTP_TTL_ENABLED(x) (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_TTL) #define PR_SCTP_BUF_ENABLED(x) (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_BUF) #define PR_SCTP_RTX_ENABLED(x) (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_RTX) @@ -201,16 +295,23 @@ struct sctp_assoc_change { uint16_t sac_outbound_streams; uint16_t sac_inbound_streams; sctp_assoc_t sac_assoc_id; + uint8_t sac_info[]; }; /* sac_state values */ -#define SCTP_COMM_UP 0x0001 -#define SCTP_COMM_LOST 0x0002 -#define SCTP_RESTART 0x0003 -#define SCTP_SHUTDOWN_COMP 0x0004 -#define SCTP_CANT_STR_ASSOC 0x0005 - - +#define SCTP_COMM_UP 0x0001 +#define SCTP_COMM_LOST 0x0002 +#define SCTP_RESTART 0x0003 +#define SCTP_SHUTDOWN_COMP 0x0004 +#define SCTP_CANT_STR_ASSOC 0x0005 + +/* sac_info values */ +#define SCTP_ASSOC_SUPPORTS_PR 0x01 +#define SCTP_ASSOC_SUPPORTS_AUTH 0x02 +#define SCTP_ASSOC_SUPPORTS_ASCONF 0x03 +#define SCTP_ASSOC_SUPPORTS_MULTIBUF 0x04 +#define SCTP_ASSOC_SUPPORTS_RE_CONFIG 0x05 +#define SCTP_ASSOC_SUPPORTS_MAX 0x05 /* * Address event */ @@ -233,31 +334,11 @@ struct sctp_paddr_change { #define SCTP_ADDR_MADE_PRIM 0x0005 #define SCTP_ADDR_CONFIRMED 0x0006 -/* - * CAUTION: these are user exposed SCTP addr reachability states must be - * compatible with SCTP_ADDR states in sctp_constants.h - */ -#ifdef SCTP_ACTIVE -#undef SCTP_ACTIVE -#endif #define SCTP_ACTIVE 0x0001 /* SCTP_ADDR_REACHABLE */ - -#ifdef SCTP_INACTIVE -#undef SCTP_INACTIVE -#endif -#define SCTP_INACTIVE 0x0002 /* SCTP_ADDR_NOT_REACHABLE */ - -#ifdef SCTP_UNCONFIRMED -#undef SCTP_UNCONFIRMED -#endif +#define SCTP_INACTIVE 0x0002 /* neither SCTP_ADDR_REACHABLE nor + * SCTP_ADDR_UNCONFIRMED */ #define SCTP_UNCONFIRMED 0x0200 /* SCTP_ADDR_UNCONFIRMED */ -#ifdef SCTP_NOHEARTBEAT -#undef SCTP_NOHEARTBEAT -#endif -#define SCTP_NOHEARTBEAT 0x0040 /* SCTP_ADDR_NOHB */ - - /* remote error events */ struct sctp_remote_error { uint16_t sre_type; @@ -268,7 +349,7 @@ struct sctp_remote_error { uint8_t sre_data[4]; }; -/* data send failure event */ +/* data send failure event (deprecated) */ struct sctp_send_failed { uint16_t ssf_type; uint16_t ssf_flags; @@ -279,6 +360,17 @@ struct sctp_send_failed { uint8_t ssf_data[]; }; +/* data send failure event (not deprecated) */ +struct sctp_send_failed_event { + uint16_t ssfe_type; + uint16_t ssfe_flags; + uint32_t ssfe_length; + uint32_t ssfe_error; + struct sctp_sndinfo ssfe_info; + sctp_assoc_t ssfe_assoc_id; + uint8_t ssfe_data[]; +}; + /* flag that indicates state of data */ #define SCTP_DATA_UNSENT 0x0001 /* inqueue never on wire */ #define SCTP_DATA_SENT 0x0002 /* on wire at failure */ @@ -349,7 +441,8 @@ struct sctp_authkey_event { }; /* indication values */ -#define SCTP_AUTH_NEWKEY 0x0001 +#define SCTP_AUTH_NEW_KEY 0x0001 +#define SCTP_AUTH_NEWKEY SCTP_AUTH_NEW_KEY #define SCTP_AUTH_NO_AUTH 0x0002 #define SCTP_AUTH_FREE_KEY 0x0003 @@ -363,23 +456,52 @@ struct sctp_sender_dry_event { /* - * stream reset event + * Stream reset event - subscribe to SCTP_STREAM_RESET_EVENT */ struct sctp_stream_reset_event { uint16_t strreset_type; uint16_t strreset_flags; uint32_t strreset_length; sctp_assoc_t strreset_assoc_id; - uint16_t strreset_list[]; + uint16_t strreset_stream_list[]; }; -/* flags in strreset_flags field */ -#define SCTP_STRRESET_INBOUND_STR 0x0001 -#define SCTP_STRRESET_OUTBOUND_STR 0x0002 -#define SCTP_STRRESET_ALL_STREAMS 0x0004 -#define SCTP_STRRESET_STREAM_LIST 0x0008 -#define SCTP_STRRESET_FAILED 0x0010 -#define SCTP_STRRESET_ADD_STREAM 0x0020 +/* flags in stream_reset_event (strreset_flags) */ +#define SCTP_STREAM_RESET_INCOMING_SSN 0x0001 +#define SCTP_STREAM_RESET_OUTGOING_SSN 0x0002 +#define SCTP_STREAM_RESET_DENIED 0x0004 +#define SCTP_STREAM_RESET_FAILED 0x0008 + +/* + * Assoc reset event - subscribe to SCTP_ASSOC_RESET_EVENT + */ +struct sctp_assoc_reset_event { + uint16_t assocreset_type; + uint16_t assocreset_flags; + uint32_t assocreset_length; + sctp_assoc_t assocreset_assoc_id; + uint32_t assocreset_local_tsn; + uint32_t assocreset_remote_tsn; +}; + +#define SCTP_ASSOC_RESET_DENIED 0x0004 +#define SCTP_ASSOC_RESET_FAILED 0x0008 + +/* + * Stream change event - subscribe to SCTP_STREAM_CHANGE_EVENT + */ +struct sctp_stream_change_event { + uint16_t strchange_type; + uint16_t strchange_flags; + uint32_t strchange_length; + sctp_assoc_t strchange_assoc_id; + uint16_t strchange_instrms; + uint16_t strchange_outstrms; +}; + +#define SCTP_STREAM_CHANGE_DENIED 0x0004 +#define SCTP_STREAM_CHANGE_FAILED 0x0008 + /* SCTP notification event */ struct sctp_tlv { @@ -401,23 +523,29 @@ union sctp_notification { struct sctp_pdapi_event sn_pdapi_event; struct sctp_authkey_event sn_auth_event; struct sctp_sender_dry_event sn_sender_dry_event; + struct sctp_send_failed_event sn_send_failed_event; struct sctp_stream_reset_event sn_strreset_event; + struct sctp_assoc_reset_event sn_assocreset_event; + struct sctp_stream_change_event sn_strchange_event; }; /* notification types */ -#define SCTP_ASSOC_CHANGE 0x0001 -#define SCTP_PEER_ADDR_CHANGE 0x0002 -#define SCTP_REMOTE_ERROR 0x0003 -#define SCTP_SEND_FAILED 0x0004 -#define SCTP_SHUTDOWN_EVENT 0x0005 -#define SCTP_ADAPTATION_INDICATION 0x0006 +#define SCTP_ASSOC_CHANGE 0x0001 +#define SCTP_PEER_ADDR_CHANGE 0x0002 +#define SCTP_REMOTE_ERROR 0x0003 +#define SCTP_SEND_FAILED 0x0004 +#define SCTP_SHUTDOWN_EVENT 0x0005 +#define SCTP_ADAPTATION_INDICATION 0x0006 /* same as above */ -#define SCTP_ADAPTION_INDICATION 0x0006 -#define SCTP_PARTIAL_DELIVERY_EVENT 0x0007 -#define SCTP_AUTHENTICATION_EVENT 0x0008 -#define SCTP_STREAM_RESET_EVENT 0x0009 -#define SCTP_SENDER_DRY_EVENT 0x000a -#define SCTP__NOTIFICATIONS_STOPPED_EVENT 0x000b /* we don't send this */ +#define SCTP_ADAPTION_INDICATION 0x0006 +#define SCTP_PARTIAL_DELIVERY_EVENT 0x0007 +#define SCTP_AUTHENTICATION_EVENT 0x0008 +#define SCTP_STREAM_RESET_EVENT 0x0009 +#define SCTP_SENDER_DRY_EVENT 0x000a +#define SCTP_NOTIFICATIONS_STOPPED_EVENT 0x000b /* we don't send this */ +#define SCTP_ASSOC_RESET_EVENT 0x000c +#define SCTP_STREAM_CHANGE_EVENT 0x000d +#define SCTP_SEND_FAILED_EVENT 0x000e /* * socket option structs */ @@ -430,9 +558,11 @@ struct sctp_paddrparams { uint32_t spp_flags; uint32_t spp_ipv6_flowlabel; uint16_t spp_pathmaxrxt; - uint8_t spp_ipv4_tos; + uint8_t spp_dscp; }; +#define spp_ipv4_tos spp_dscp + #define SPP_HB_ENABLE 0x00000001 #define SPP_HB_DISABLE 0x00000002 #define SPP_HB_DEMAND 0x00000004 @@ -440,7 +570,15 @@ struct sctp_paddrparams { #define SPP_PMTUD_DISABLE 0x00000010 #define SPP_HB_TIME_IS_ZERO 0x00000080 #define SPP_IPV6_FLOWLABEL 0x00000100 -#define SPP_IPV4_TOS 0x00000200 +#define SPP_DSCP 0x00000200 +#define SPP_IPV4_TOS SPP_DSCP + +struct sctp_paddrthlds { + struct sockaddr_storage spt_address; + sctp_assoc_t spt_assoc_id; + uint16_t spt_pathmaxrxt; + uint16_t spt_pathpfthld; +}; struct sctp_paddrinfo { struct sockaddr_storage spinfo_address; @@ -486,13 +624,6 @@ struct sctp_getaddresses { struct sockaddr addr[1]; }; -struct sctp_setstrm_timeout { - sctp_assoc_t ssto_assoc_id; - uint32_t ssto_timeout; - uint32_t ssto_streamid_start; - uint32_t ssto_streamid_end; -}; - struct sctp_status { sctp_assoc_t sstat_assoc_id; int32_t sstat_state; @@ -517,6 +648,7 @@ struct sctp_authchunk { struct sctp_authkey { sctp_assoc_t sca_assoc_id; uint16_t sca_keynumber; + uint16_t sca_keylength; uint8_t sca_key[]; }; @@ -544,6 +676,7 @@ struct sctp_authkeyid { /* SCTP_PEER_AUTH_CHUNKS / SCTP_LOCAL_AUTH_CHUNKS */ struct sctp_authchunks { sctp_assoc_t gauth_assoc_id; + uint32_t gauth_number_of_chunks; uint8_t gauth_chunks[]; }; @@ -552,6 +685,17 @@ struct sctp_assoc_value { uint32_t assoc_value; }; +struct sctp_cc_option { + int option; + struct sctp_assoc_value aid_value; +}; + +struct sctp_stream_value { + sctp_assoc_t assoc_id; + uint16_t stream_id; + uint16_t stream_value; +}; + struct sctp_assoc_ids { uint32_t gaids_number_of_ids; sctp_assoc_t gaids_assoc_id[]; @@ -574,6 +718,12 @@ struct sctp_timeouts { uint32_t stimo_shutdownack; }; +struct sctp_udpencaps { + struct sockaddr_storage sue_address; + sctp_assoc_t sue_assoc_id; + uint16_t sue_port; +}; + struct sctp_cwnd_args { struct sctp_nets *net; /* network to *//* FIXME: LP64 issue */ uint32_t cwnd_new_value;/* cwnd in k */ @@ -604,19 +754,18 @@ struct sctp_blk_args { */ #define SCTP_MAX_EXPLICT_STR_RESET 1000 -#define SCTP_RESET_LOCAL_RECV 0x0001 -#define SCTP_RESET_LOCAL_SEND 0x0002 -#define SCTP_RESET_BOTH 0x0003 -#define SCTP_RESET_TSN 0x0004 -#define SCTP_RESET_ADD_STREAMS 0x0005 - -struct sctp_stream_reset { - sctp_assoc_t strrst_assoc_id; - uint16_t strrst_flags; - uint16_t strrst_num_streams; /* 0 == ALL */ - uint16_t strrst_list[]; /* list if strrst_num_streams is not 0 */ +struct sctp_reset_streams { + sctp_assoc_t srs_assoc_id; + uint16_t srs_flags; + uint16_t srs_number_streams; /* 0 == ALL */ + uint16_t srs_stream_list[]; /* list if strrst_num_streams is not 0 */ }; +struct sctp_add_streams { + sctp_assoc_t sas_assoc_id; + uint16_t sas_instrms; + uint16_t sas_outstrms; +}; struct sctp_get_nonce_values { sctp_assoc_t gn_assoc_id; @@ -892,18 +1041,8 @@ struct sctpstat { * fired */ uint32_t sctps_timoassockill; /* Number of asoc free timers expired */ uint32_t sctps_timoinpkill; /* Number of inp free timers expired */ - /* Early fast retransmission counters */ - uint32_t sctps_earlyfrstart; - uint32_t sctps_earlyfrstop; - uint32_t sctps_earlyfrmrkretrans; - uint32_t sctps_earlyfrstpout; - uint32_t sctps_earlyfrstpidsck1; - uint32_t sctps_earlyfrstpidsck2; - uint32_t sctps_earlyfrstpidsck3; - uint32_t sctps_earlyfrstpidsck4; - uint32_t sctps_earlyfrstrid; - uint32_t sctps_earlyfrstrout; - uint32_t sctps_earlyfrstrtmr; + /* former early FR counters */ + uint32_t sctps_spare[11]; /* others */ uint32_t sctps_hdrops; /* packet shorter than header */ uint32_t sctps_badsum; /* checksum error */ @@ -942,7 +1081,7 @@ struct sctpstat { uint32_t sctps_wu_sacks_sent; /* Window Update only sacks sent */ uint32_t sctps_sends_with_flags; /* number of sends with * sinfo_flags !=0 */ - uint32_t sctps_sends_with_unord; /* number of unordered sends */ + uint32_t sctps_sends_with_unord; /* number of unordered sends */ uint32_t sctps_sends_with_eof; /* number of sends with EOF flag set */ uint32_t sctps_sends_with_abort; /* number of sends with ABORT * flag set */ @@ -961,8 +1100,9 @@ struct sctpstat { * max burst inflight to net */ uint32_t sctps_fwdtsn_map_over; /* number of map array over-runs via * fwd-tsn's */ - - uint32_t sctps_reserved[32]; /* Future ABI compat - remove int's + uint32_t sctps_queue_upd_ecne; /* Number of times we queued or + * updated an ECN chunk on send queue */ + uint32_t sctps_reserved[31]; /* Future ABI compat - remove int's * from here when adding new */ }; @@ -984,12 +1124,8 @@ struct sctpstat { #define SCTP_STAT_DECR_GAUGE32(_x) SCTP_STAT_DECR(_x) union sctp_sockstore { -#if defined(INET) || !defined(_KERNEL) struct sockaddr_in sin; -#endif -#if defined(INET6) || !defined(_KERNEL) struct sockaddr_in6 sin6; -#endif struct sockaddr sa; }; @@ -1075,9 +1211,11 @@ struct xsctp_raddr { uint8_t active; /* sctpAssocLocalRemEntry 3 */ uint8_t confirmed; /* */ uint8_t heartbeat_enabled; /* sctpAssocLocalRemEntry 4 */ + uint8_t potentially_failed; struct sctp_timeval start_time; /* sctpAssocLocalRemEntry 8 */ uint32_t rtt; - uint32_t extra_padding[32]; /* future */ + uint32_t heartbeat_interval; + uint32_t extra_padding[31]; /* future */ }; #define SCTP_MAX_LOGGING_SIZE 30000 @@ -1139,26 +1277,34 @@ int sctp_getladdrs __P((int, sctp_assoc_t, struct sockaddr **)); void sctp_freeladdrs __P((struct sockaddr *)); int sctp_opt_info __P((int, sctp_assoc_t, int, void *, socklen_t *)); +/* deprecated */ ssize_t sctp_sendmsg -__P((int, const void *, size_t, - const struct sockaddr *, +__P((int, const void *, size_t, const struct sockaddr *, socklen_t, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t)); - ssize_t sctp_send __P((int sd, const void *msg, size_t len, - const struct sctp_sndrcvinfo *sinfo, int flags)); +/* deprecated */ + ssize_t sctp_send __P((int, const void *, size_t, + const struct sctp_sndrcvinfo *, int)); + +/* deprecated */ + ssize_t sctp_sendx __P((int, const void *, size_t, struct sockaddr *, + int, struct sctp_sndrcvinfo *, int)); + +/* deprecated */ + ssize_t sctp_sendmsgx __P((int sd, const void *, size_t, struct sockaddr *, + int, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t)); - ssize_t sctp_sendx __P((int sd, const void *msg, size_t len, - struct sockaddr *addrs, int addrcnt, - struct sctp_sndrcvinfo *sinfo, int flags)); + sctp_assoc_t sctp_getassocid __P((int, struct sockaddr *)); - ssize_t sctp_sendmsgx __P((int sd, const void *, size_t, - struct sockaddr *, int, - uint32_t, uint32_t, uint16_t, uint32_t, uint32_t)); +/* deprecated */ + ssize_t sctp_recvmsg __P((int, void *, size_t, struct sockaddr *, socklen_t *, + struct sctp_sndrcvinfo *, int *)); - sctp_assoc_t sctp_getassocid __P((int sd, struct sockaddr *sa)); + ssize_t sctp_sendv __P((int, const struct iovec *, int, struct sockaddr *, + int, void *, socklen_t, unsigned int, int)); - ssize_t sctp_recvmsg __P((int, void *, size_t, struct sockaddr *, - socklen_t *, struct sctp_sndrcvinfo *, int *)); + ssize_t sctp_recvv __P((int, const struct iovec *, int, struct sockaddr *, + socklen_t *, void *, socklen_t *, unsigned int *, int *)); __END_DECLS diff --git a/freebsd/sys/netinet/sctp_usrreq.c b/freebsd/sys/netinet/sctp_usrreq.c index e901cf97..548ba9f3 100644 --- a/freebsd/sys/netinet/sctp_usrreq.c +++ b/freebsd/sys/netinet/sctp_usrreq.c @@ -2,16 +2,18 @@ /*- * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -30,16 +32,15 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp_usrreq.c,v 1.48 2005/03/07 23:26:08 itojun Exp $ */ - #include __FBSDID("$FreeBSD$"); + #include #include #include #include #include -#if defined(INET6) +#ifdef INET6 #endif #include #include @@ -50,19 +51,18 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include +extern struct sctp_cc_functions sctp_cc_functions[]; +extern struct sctp_ss_functions sctp_ss_functions[]; void sctp_init(void) { u_long sb_max_adj; - bzero(&SCTP_BASE_STATS, sizeof(struct sctpstat)); - /* Initialize and modify the sysctled variables */ sctp_init_sysctls(); if ((nmbclusters / 8) > SCTP_ASOC_MAX_CHUNKS_ON_QUEUE) @@ -80,7 +80,6 @@ sctp_init(void) * now I will just copy. */ SCTP_BASE_SYSCTL(sctp_recvspace) = SCTP_BASE_SYSCTL(sctp_sendspace); - SCTP_BASE_VAR(first_time) = 0; SCTP_BASE_VAR(sctp_pcb_initialized) = 0; sctp_pcb_init(); @@ -89,8 +88,6 @@ sctp_init(void) SCTP_BASE_VAR(packet_log_end) = 0; bzero(&SCTP_BASE_VAR(packet_log_buffer), SCTP_PACKET_LOG_SIZE); #endif - - } void @@ -102,10 +99,7 @@ sctp_finish(void) void -sctp_pathmtu_adjustment(struct sctp_inpcb *inp, - struct sctp_tcb *stcb, - struct sctp_nets *net, - uint16_t nxtsz) +sctp_pathmtu_adjustment(struct sctp_tcb *stcb, uint16_t nxtsz) { struct sctp_tmit_chunk *chk; uint16_t overhead; @@ -151,6 +145,7 @@ sctp_pathmtu_adjustment(struct sctp_inpcb *inp, } } +#ifdef INET static void sctp_notify_mbuf(struct sctp_inpcb *inp, struct sctp_tcb *stcb, @@ -213,7 +208,7 @@ sctp_notify_mbuf(struct sctp_inpcb *inp, } /* now what about the ep? */ if (stcb->asoc.smallest_mtu > nxtsz) { - sctp_pathmtu_adjustment(inp, stcb, net, nxtsz); + sctp_pathmtu_adjustment(stcb, nxtsz); } if (tmr_stopped) sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net); @@ -221,6 +216,7 @@ sctp_notify_mbuf(struct sctp_inpcb *inp, SCTP_TCB_UNLOCK(stcb); } +#endif void sctp_notify(struct sctp_inpcb *inp, @@ -230,15 +226,13 @@ sctp_notify(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets *net) { -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif - /* protection */ - int reason; struct icmp *icmph; - + /* protection */ if ((inp == NULL) || (stcb == NULL) || (net == NULL) || (sh == NULL) || (to == NULL)) { if (stcb) @@ -274,35 +268,10 @@ sctp_notify(struct sctp_inpcb *inp, */ if (net->dest_state & SCTP_ADDR_REACHABLE) { /* Ok that destination is NOT reachable */ - SCTP_PRINTF("ICMP (thresh %d/%d) takes interface %p down\n", - net->error_count, - net->failure_threshold, - net); - net->dest_state &= ~SCTP_ADDR_REACHABLE; - net->dest_state |= SCTP_ADDR_NOT_REACHABLE; - /* - * JRS 5/14/07 - If a destination is unreachable, - * the PF bit is turned off. This allows an - * unambiguous use of the PF bit for destinations - * that are reachable but potentially failed. If the - * destination is set to the unreachable state, also - * set the destination to the PF state. - */ - /* - * Add debug message here if destination is not in - * PF state. - */ - /* Stop any running T3 timers here? */ - if ((stcb->asoc.sctp_cmt_on_off == 1) && - (stcb->asoc.sctp_cmt_pf > 0)) { - net->dest_state &= ~SCTP_ADDR_PF; - SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from PF to unreachable.\n", - net); - } - net->error_count = net->failure_threshold + 1; + net->dest_state &= ~SCTP_ADDR_PF; sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, - stcb, SCTP_FAILED_THRESHOLD, + stcb, 0, (void *)net, SCTP_SO_NOT_LOCKED); } SCTP_TCB_UNLOCK(stcb); @@ -315,9 +284,8 @@ sctp_notify(struct sctp_inpcb *inp, * now is dead. In either case treat it like a OOTB abort * with no TCB */ - reason = SCTP_PEER_FAULTY; - sctp_abort_notification(stcb, reason, SCTP_SO_NOT_LOCKED); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) + sctp_abort_notification(stcb, 1, 0, NULL, SCTP_SO_NOT_LOCKED); +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(inp); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); @@ -326,7 +294,7 @@ sctp_notify(struct sctp_inpcb *inp, atomic_subtract_int(&stcb->asoc.refcnt, 1); #endif (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); /* SCTP_TCB_UNLOCK(stcb); MT: I think this is not needed. */ #endif @@ -336,6 +304,7 @@ sctp_notify(struct sctp_inpcb *inp, } } +#ifdef INET void sctp_ctlinput(cmd, sa, vip) int cmd; @@ -378,8 +347,8 @@ sctp_ctlinput(cmd, sa, vip) * 'from' holds our local endpoint address. Thus we reverse * the to and the from in the lookup. */ - stcb = sctp_findassociation_addr_sa((struct sockaddr *)&from, - (struct sockaddr *)&to, + stcb = sctp_findassociation_addr_sa((struct sockaddr *)&to, + (struct sockaddr *)&from, &inp, &net, 1, vrf_id); if (stcb != NULL && inp && (inp->sctp_socket != NULL)) { if (cmd != PRC_MSGSIZE) { @@ -405,6 +374,8 @@ sctp_ctlinput(cmd, sa, vip) return; } +#endif + static int sctp_getcred(SYSCTL_HANDLER_ARGS) { @@ -428,8 +399,8 @@ sctp_getcred(SYSCTL_HANDLER_ARGS) if (error) return (error); - stcb = sctp_findassociation_addr_sa(sintosa(&addrs[0]), - sintosa(&addrs[1]), + stcb = sctp_findassociation_addr_sa(sintosa(&addrs[1]), + sintosa(&addrs[0]), &inp, &net, 1, vrf_id); if (stcb == NULL || inp == NULL || inp->sctp_socket == NULL) { if ((inp != NULL) && (stcb == NULL)) { @@ -467,6 +438,7 @@ SYSCTL_PROC(_net_inet_sctp, OID_AUTO, getcred, CTLTYPE_OPAQUE | CTLFLAG_RW, 0, 0, sctp_getcred, "S,ucred", "Get the ucred of a SCTP connection"); +#ifdef INET static void sctp_abort(struct socket *so) { @@ -474,7 +446,7 @@ sctp_abort(struct socket *so) uint32_t flags; inp = (struct sctp_inpcb *)so->so_pcb; - if (inp == 0) { + if (inp == NULL) { return; } sctp_must_try_again: @@ -510,7 +482,7 @@ sctp_must_try_again: } static int -sctp_attach(struct socket *so, int proto, struct thread *p) +sctp_attach(struct socket *so, int proto SCTP_UNUSED, struct thread *p SCTP_UNUSED) { struct sctp_inpcb *inp; struct inpcb *ip_inp; @@ -525,17 +497,17 @@ sctp_attach(struct socket *so, int proto, struct thread *p) inp = (struct sctp_inpcb *)so->so_pcb; if (inp != 0) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - return EINVAL; + return (EINVAL); } if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { error = SCTP_SORESERVE(so, SCTP_BASE_SYSCTL(sctp_sendspace), SCTP_BASE_SYSCTL(sctp_recvspace)); if (error) { - return error; + return (error); } } error = sctp_inpcb_alloc(so, vrf_id); if (error) { - return error; + return (error); } inp = (struct sctp_inpcb *)so->so_pcb; SCTP_INP_WLOCK(inp); @@ -567,39 +539,34 @@ try_again: SCTP_INP_WUNLOCK(inp); } } - return error; + return (error); } #endif /* IPSEC */ SCTP_INP_WUNLOCK(inp); - return 0; + return (0); } static int sctp_bind(struct socket *so, struct sockaddr *addr, struct thread *p) { - struct sctp_inpcb *inp = NULL; - int error; + struct sctp_inpcb *inp; -#ifdef INET6 - if (addr && addr->sa_family != AF_INET) { - /* must be a v4 address! */ - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - return EINVAL; - } -#endif /* INET6 */ - if (addr && (addr->sa_len != sizeof(struct sockaddr_in))) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - return EINVAL; - } inp = (struct sctp_inpcb *)so->so_pcb; - if (inp == 0) { + if (inp == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - return EINVAL; + return (EINVAL); + } + if (addr != NULL) { + if ((addr->sa_family != AF_INET) || + (addr->sa_len != sizeof(struct sockaddr_in))) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + return (EINVAL); + } } - error = sctp_inpcb_bind(so, addr, NULL, p); - return error; + return (sctp_inpcb_bind(so, addr, NULL, p)); } +#endif void sctp_close(struct socket *so) { @@ -607,7 +574,7 @@ sctp_close(struct socket *so) uint32_t flags; inp = (struct sctp_inpcb *)so->so_pcb; - if (inp == 0) + if (inp == NULL) return; /* @@ -672,20 +639,19 @@ sctp_sendm(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, int error; inp = (struct sctp_inpcb *)so->so_pcb; - if (inp == 0) { + if (inp == NULL) { if (control) { sctp_m_freem(control); control = NULL; } SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); sctp_m_freem(m); - return EINVAL; + return (EINVAL); } /* Got to have an to address if we are NOT a connected socket */ if ((addr == NULL) && ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) || - (inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE)) - ) { + (inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE))) { goto connected_type; } else if (addr == NULL) { SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EDESTADDRREQ); @@ -707,7 +673,7 @@ sctp_sendm(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, control = NULL; } error = EDESTADDRREQ; - return EDESTADDRREQ; + return (error); } #endif /* INET6 */ connected_type: @@ -806,9 +772,6 @@ sctp_disconnect(struct socket *so) ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT); ph->param_length = htons(SCTP_BUF_LEN(err)); } -#if defined(SCTP_PANIC_ON_ABORT) - panic("disconnect does an abort"); -#endif sctp_send_abort_tcb(stcb, err, SCTP_SO_LOCKED); SCTP_STAT_INCR_COUNTER32(sctps_aborted); } @@ -831,22 +794,26 @@ sctp_disconnect(struct socket *so) if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) && (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) { /* only send SHUTDOWN 1st time thru */ - sctp_stop_timers_for_shutdown(stcb); - sctp_send_shutdown(stcb, - stcb->asoc.primary_destination); - sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_LOCKED); + struct sctp_nets *netp; + if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) || (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); } SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT); SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING); + sctp_stop_timers_for_shutdown(stcb); + if (stcb->asoc.alternate) { + netp = stcb->asoc.alternate; + } else { + netp = stcb->asoc.primary_destination; + } + sctp_send_shutdown(stcb, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, - stcb->sctp_ep, stcb, - asoc->primary_destination); + stcb->sctp_ep, stcb, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, - stcb->sctp_ep, stcb, - asoc->primary_destination); + stcb->sctp_ep, stcb, netp); + sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_LOCKED); } } else { /* @@ -859,9 +826,17 @@ sctp_disconnect(struct socket *so) * we will allow user data to be sent first * and move to SHUTDOWN-PENDING */ + struct sctp_nets *netp; + + if (stcb->asoc.alternate) { + netp = stcb->asoc.alternate; + } else { + netp = stcb->asoc.primary_destination; + } + asoc->state |= SCTP_STATE_SHUTDOWN_PENDING; sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb, - asoc->primary_destination); + netp); if (asoc->locked_on_sending) { /* Locked to send out the data */ struct sctp_stream_queue_pending *sp; @@ -881,7 +856,7 @@ sctp_disconnect(struct socket *so) struct mbuf *op_err; abort_anyway: - op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)), + op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_DONTWAIT, 1, MT_DATA); if (op_err) { /* @@ -889,22 +864,12 @@ sctp_disconnect(struct socket *so) * initiated abort */ struct sctp_paramhdr *ph; - uint32_t *ippp; - - SCTP_BUF_LEN(op_err) = - (sizeof(struct sctp_paramhdr) + sizeof(uint32_t)); - ph = mtod(op_err, - struct sctp_paramhdr *); - ph->param_type = htons( - SCTP_CAUSE_USER_INITIATED_ABT); + + SCTP_BUF_LEN(op_err) = sizeof(struct sctp_paramhdr); + ph = mtod(op_err, struct sctp_paramhdr *); + ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT); ph->param_length = htons(SCTP_BUF_LEN(op_err)); - ippp = (uint32_t *) (ph + 1); - *ippp = htonl(SCTP_FROM_SCTP_USRREQ + SCTP_LOC_4); } -#if defined(SCTP_PANIC_ON_ABORT) - panic("disconnect does an abort"); -#endif - stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_4; sctp_send_abort_tcb(stcb, op_err, SCTP_SO_LOCKED); SCTP_STAT_INCR_COUNTER32(sctps_aborted); @@ -929,7 +894,7 @@ sctp_disconnect(struct socket *so) /* UDP model does not support this */ SCTP_INP_RUNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP); - return EOPNOTSUPP; + return (EOPNOTSUPP); } } @@ -947,7 +912,7 @@ sctp_flush(struct socket *so, int how) inp = (struct sctp_inpcb *)so->so_pcb; if (inp == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - return EINVAL; + return (EINVAL); } SCTP_INP_RLOCK(inp); /* For the 1 to many model this does nothing */ @@ -989,13 +954,14 @@ sctp_shutdown(struct socket *so) struct sctp_inpcb *inp; inp = (struct sctp_inpcb *)so->so_pcb; - if (inp == 0) { + if (inp == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - return EINVAL; + return (EINVAL); } SCTP_INP_RLOCK(inp); /* For UDP model this is a invalid call */ - if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) { + if (!((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { /* Restore the flags that the soshutdown took away. */ SOCKBUF_LOCK(&so->so_rcv); so->so_rcv.sb_state &= ~SBS_CANTRCVMORE; @@ -1041,31 +1007,43 @@ sctp_shutdown(struct socket *so) /* there is nothing queued to send, so I'm done... */ if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) { /* only send SHUTDOWN the first time through */ - sctp_stop_timers_for_shutdown(stcb); - sctp_send_shutdown(stcb, - stcb->asoc.primary_destination); - sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_LOCKED); + struct sctp_nets *netp; + if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) || (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); } SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT); SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING); + sctp_stop_timers_for_shutdown(stcb); + if (stcb->asoc.alternate) { + netp = stcb->asoc.alternate; + } else { + netp = stcb->asoc.primary_destination; + } + sctp_send_shutdown(stcb, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, - stcb->sctp_ep, stcb, - asoc->primary_destination); + stcb->sctp_ep, stcb, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, - stcb->sctp_ep, stcb, - asoc->primary_destination); + stcb->sctp_ep, stcb, netp); + sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_LOCKED); } } else { /* * we still got (or just got) data to send, so set * SHUTDOWN_PENDING */ + struct sctp_nets *netp; + + if (stcb->asoc.alternate) { + netp = stcb->asoc.alternate; + } else { + netp = stcb->asoc.primary_destination; + } + asoc->state |= SCTP_STATE_SHUTDOWN_PENDING; sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb, - asoc->primary_destination); + netp); if (asoc->locked_on_sending) { /* Locked to send out the data */ @@ -1087,29 +1065,19 @@ sctp_shutdown(struct socket *so) struct mbuf *op_err; abort_anyway: - op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)), + op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_DONTWAIT, 1, MT_DATA); if (op_err) { /* Fill in the user initiated abort */ struct sctp_paramhdr *ph; - uint32_t *ippp; - - SCTP_BUF_LEN(op_err) = - sizeof(struct sctp_paramhdr) + sizeof(uint32_t); - ph = mtod(op_err, - struct sctp_paramhdr *); - ph->param_type = htons( - SCTP_CAUSE_USER_INITIATED_ABT); + + SCTP_BUF_LEN(op_err) = sizeof(struct sctp_paramhdr); + ph = mtod(op_err, struct sctp_paramhdr *); + ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT); ph->param_length = htons(SCTP_BUF_LEN(op_err)); - ippp = (uint32_t *) (ph + 1); - *ippp = htonl(SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6); } -#if defined(SCTP_PANIC_ON_ABORT) - panic("shutdown does an abort"); -#endif stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6; sctp_abort_an_association(stcb->sctp_ep, stcb, - SCTP_RESPONSE_TO_USER_REQ, op_err, SCTP_SO_LOCKED); goto skip_unlock; } else { @@ -1120,7 +1088,7 @@ sctp_shutdown(struct socket *so) } skip_unlock: SCTP_INP_RUNLOCK(inp); - return 0; + return (0); } /* @@ -1165,23 +1133,29 @@ sctp_fill_up_addresses_vrf(struct sctp_inpcb *inp, if (stcb) { /* Turn on all the appropriate scope */ - loopback_scope = stcb->asoc.loopback_scope; - ipv4_local_scope = stcb->asoc.ipv4_local_scope; - local_scope = stcb->asoc.local_scope; - site_scope = stcb->asoc.site_scope; + loopback_scope = stcb->asoc.scope.loopback_scope; + ipv4_local_scope = stcb->asoc.scope.ipv4_local_scope; + local_scope = stcb->asoc.scope.local_scope; + site_scope = stcb->asoc.scope.site_scope; + ipv4_addr_legal = stcb->asoc.scope.ipv4_addr_legal; + ipv6_addr_legal = stcb->asoc.scope.ipv6_addr_legal; } else { - /* Turn on ALL scope, since we look at the EP */ - loopback_scope = ipv4_local_scope = local_scope = - site_scope = 1; - } - ipv4_addr_legal = ipv6_addr_legal = 0; - if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { - ipv6_addr_legal = 1; - if (SCTP_IPV6_V6ONLY(inp) == 0) { + /* Use generic values for endpoints. */ + loopback_scope = 1; + ipv4_local_scope = 1; + local_scope = 1; + site_scope = 1; + if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { + ipv6_addr_legal = 1; + if (SCTP_IPV6_V6ONLY(inp)) { + ipv4_addr_legal = 0; + } else { + ipv4_addr_legal = 1; + } + } else { + ipv6_addr_legal = 0; ipv4_addr_legal = 1; } - } else { - ipv4_addr_legal = 1; } vrf = sctp_find_vrf(vrf_id); if (vrf == NULL) { @@ -1210,6 +1184,7 @@ sctp_fill_up_addresses_vrf(struct sctp_inpcb *inp, } } switch (sctp_ifa->address.sa.sa_family) { +#ifdef INET case AF_INET: if (ipv4_addr_legal) { struct sockaddr_in *sin; @@ -1249,6 +1224,7 @@ sctp_fill_up_addresses_vrf(struct sctp_inpcb *inp, continue; } break; +#endif #ifdef INET6 case AF_INET6: if (ipv6_addr_legal) { @@ -1318,8 +1294,21 @@ sctp_fill_up_addresses_vrf(struct sctp_inpcb *inp, } if (sctp_fill_user_address(sas, &laddr->ifa->address.sa)) continue; - - ((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport; + switch (laddr->ifa->address.sa.sa_family) { +#ifdef INET + case AF_INET: + ((struct sockaddr_in *)sas)->sin_port = inp->sctp_lport; + break; +#endif +#ifdef INET6 + case AF_INET6: + ((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport; + break; +#endif + default: + /* TSNH */ + break; + } sas = (struct sockaddr_storage *)((caddr_t)sas + laddr->ifa->address.sa.sa_len); actual += laddr->ifa->address.sa.sa_len; @@ -1374,28 +1363,46 @@ sctp_count_max_addresses_vrf(struct sctp_inpcb *inp, uint32_t vrf_id) LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) { LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { /* Count them if they are the right type */ - if (sctp_ifa->address.sa.sa_family == AF_INET) { + switch (sctp_ifa->address.sa.sa_family) { +#ifdef INET + case AF_INET: if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) cnt += sizeof(struct sockaddr_in6); else cnt += sizeof(struct sockaddr_in); - - } else if (sctp_ifa->address.sa.sa_family == AF_INET6) + break; +#endif +#ifdef INET6 + case AF_INET6: cnt += sizeof(struct sockaddr_in6); + break; +#endif + default: + break; + } } } } else { struct sctp_laddr *laddr; LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { - if (laddr->ifa->address.sa.sa_family == AF_INET) { + switch (laddr->ifa->address.sa.sa_family) { +#ifdef INET + case AF_INET: if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) cnt += sizeof(struct sockaddr_in6); else cnt += sizeof(struct sockaddr_in); - - } else if (laddr->ifa->address.sa.sa_family == AF_INET6) + break; +#endif +#ifdef INET6 + case AF_INET6: cnt += sizeof(struct sockaddr_in6); + break; +#endif + default: + break; + } } } return (cnt); @@ -1422,7 +1429,6 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval, struct sctp_tcb *stcb = NULL; struct sockaddr *sa; int num_v6 = 0, num_v4 = 0, *totaddrp, totaddr; - int added = 0; uint32_t vrf_id; int bad_addresses = 0; sctp_assoc_t *a_id; @@ -1516,15 +1522,30 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval, /* Gak! no memory */ goto out_now; } + if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) { + stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED; + /* Set the connected flag so we can queue data */ + soisconnecting(so); + } SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_WAIT); /* move to second address */ - if (sa->sa_family == AF_INET) + switch (sa->sa_family) { +#ifdef INET + case AF_INET: sa = (struct sockaddr *)((caddr_t)sa + sizeof(struct sockaddr_in)); - else + break; +#endif +#ifdef INET6 + case AF_INET6: sa = (struct sockaddr *)((caddr_t)sa + sizeof(struct sockaddr_in6)); + break; +#endif + default: + break; + } error = 0; - added = sctp_connectx_helper_add(stcb, sa, (totaddr - 1), &error); + sctp_connectx_helper_add(stcb, sa, (totaddr - 1), &error); /* Fill in the return id */ if (error) { (void)sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6); @@ -1555,7 +1576,7 @@ out_now: SCTP_ASOC_CREATE_UNLOCK(inp); } SCTP_INP_DECR_REF(inp); - return error; + return (error); } #define SCTP_FIND_STCB(inp, stcb, assoc_id) { \ @@ -1567,7 +1588,7 @@ out_now: SCTP_TCB_LOCK(stcb); \ } \ SCTP_INP_RUNLOCK(inp); \ - } else if (assoc_id != 0) { \ + } else if (assoc_id > SCTP_ALL_ASSOC) { \ stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1); \ if (stcb == NULL) { \ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); \ @@ -1580,7 +1601,7 @@ out_now: } -#define SCTP_CHECK_AND_CAST(destp, srcp, type, size) {\ +#define SCTP_CHECK_AND_CAST(destp, srcp, type, size) {\ if (size < sizeof(type)) { \ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); \ error = EINVAL; \ @@ -1603,7 +1624,7 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, return (EINVAL); } inp = (struct sctp_inpcb *)so->so_pcb; - if (inp == 0) { + if (inp == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return EINVAL; } @@ -1655,10 +1676,6 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT); error = ENOPROTOOPT; } /* end switch (sopt->sopt_name) */ - if (optname != SCTP_AUTOCLOSE) { - /* make it an "on/off" value */ - val = (val != 0); - } if (*optsize < sizeof(val)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; @@ -1698,8 +1715,8 @@ flags_out: SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize); *value = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE); *optsize = sizeof(uint32_t); + break; } - break; case SCTP_PARTIAL_DELIVERY_POINT: { uint32_t *value; @@ -1707,8 +1724,8 @@ flags_out: SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize); *value = inp->partial_delivery_point; *optsize = sizeof(uint32_t); + break; } - break; case SCTP_FRAGMENT_INTERLEAVE: { uint32_t *value; @@ -1724,8 +1741,8 @@ flags_out: *value = SCTP_FRAG_LEVEL_0; } *optsize = sizeof(uint32_t); + break; } - break; case SCTP_CMT_ON_OFF: { struct sctp_assoc_value *av; @@ -1736,14 +1753,22 @@ flags_out: av->assoc_value = stcb->asoc.sctp_cmt_on_off; SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_RLOCK(inp); - av->assoc_value = inp->sctp_cmt_on_off; - SCTP_INP_RUNLOCK(inp); + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (av->assoc_id == SCTP_FUTURE_ASSOC)) { + SCTP_INP_RLOCK(inp); + av->assoc_value = inp->sctp_cmt_on_off; + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } } - *optsize = sizeof(*av); + if (error == 0) { + *optsize = sizeof(struct sctp_assoc_value); + } + break; } - break; - /* JRS - Get socket option for pluggable congestion control */ case SCTP_PLUGGABLE_CC: { struct sctp_assoc_value *av; @@ -1754,11 +1779,92 @@ flags_out: av->assoc_value = stcb->asoc.congestion_control_module; SCTP_TCB_UNLOCK(stcb); } else { - av->assoc_value = inp->sctp_ep.sctp_default_cc_module; + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (av->assoc_id == SCTP_FUTURE_ASSOC)) { + SCTP_INP_RLOCK(inp); + av->assoc_value = inp->sctp_ep.sctp_default_cc_module; + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + } + if (error == 0) { + *optsize = sizeof(struct sctp_assoc_value); + } + break; + } + case SCTP_CC_OPTION: + { + struct sctp_cc_option *cc_opt; + + SCTP_CHECK_AND_CAST(cc_opt, optval, struct sctp_cc_option, *optsize); + SCTP_FIND_STCB(inp, stcb, cc_opt->aid_value.assoc_id); + if (stcb == NULL) { + error = EINVAL; + } else { + if (stcb->asoc.cc_functions.sctp_cwnd_socket_option == NULL) { + error = ENOTSUP; + } else { + error = (*stcb->asoc.cc_functions.sctp_cwnd_socket_option) (stcb, 0, cc_opt); + *optsize = sizeof(struct sctp_cc_option); + } + SCTP_TCB_UNLOCK(stcb); } - *optsize = sizeof(*av); + break; + } + case SCTP_PLUGGABLE_SS: + { + struct sctp_assoc_value *av; + + SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize); + SCTP_FIND_STCB(inp, stcb, av->assoc_id); + if (stcb) { + av->assoc_value = stcb->asoc.stream_scheduling_module; + SCTP_TCB_UNLOCK(stcb); + } else { + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (av->assoc_id == SCTP_FUTURE_ASSOC)) { + SCTP_INP_RLOCK(inp); + av->assoc_value = inp->sctp_ep.sctp_default_ss_module; + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + } + if (error == 0) { + *optsize = sizeof(struct sctp_assoc_value); + } + break; + } + case SCTP_SS_VALUE: + { + struct sctp_stream_value *av; + + SCTP_CHECK_AND_CAST(av, optval, struct sctp_stream_value, *optsize); + SCTP_FIND_STCB(inp, stcb, av->assoc_id); + if (stcb) { + if (stcb->asoc.ss_functions.sctp_ss_get_value(stcb, &stcb->asoc, &stcb->asoc.strmout[av->stream_id], + &av->stream_value) < 0) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } else { + *optsize = sizeof(struct sctp_stream_value); + } + SCTP_TCB_UNLOCK(stcb); + } else { + /* + * Can't get stream value without + * association + */ + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + break; } - break; case SCTP_GET_ADDR_LEN: { struct sctp_assoc_value *av; @@ -1779,10 +1885,11 @@ flags_out: #endif if (error) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + } else { + *optsize = sizeof(struct sctp_assoc_value); } - *optsize = sizeof(*av); + break; } - break; case SCTP_GET_ASSOC_NUMBER: { uint32_t *value, cnt; @@ -1796,9 +1903,8 @@ flags_out: SCTP_INP_RUNLOCK(inp); *value = cnt; *optsize = sizeof(uint32_t); + break; } - break; - case SCTP_GET_ASSOC_ID_LIST: { struct sctp_assoc_ids *ids; @@ -1818,10 +1924,12 @@ flags_out: } } SCTP_INP_RUNLOCK(inp); - ids->gaids_number_of_ids = at; - *optsize = ((at * sizeof(sctp_assoc_t)) + sizeof(uint32_t)); + if (error == 0) { + ids->gaids_number_of_ids = at; + *optsize = ((at * sizeof(sctp_assoc_t)) + sizeof(uint32_t)); + } + break; } - break; case SCTP_CONTEXT: { struct sctp_assoc_value *av; @@ -1833,19 +1941,29 @@ flags_out: av->assoc_value = stcb->asoc.context; SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_RLOCK(inp); - av->assoc_value = inp->sctp_context; - SCTP_INP_RUNLOCK(inp); + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (av->assoc_id == SCTP_FUTURE_ASSOC)) { + SCTP_INP_RLOCK(inp); + av->assoc_value = inp->sctp_context; + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + } + if (error == 0) { + *optsize = sizeof(struct sctp_assoc_value); } - *optsize = sizeof(*av); + break; } - break; case SCTP_VRF_ID: { uint32_t *default_vrfid; SCTP_CHECK_AND_CAST(default_vrfid, optval, uint32_t, *optsize); *default_vrfid = inp->def_vrf_id; + *optsize = sizeof(uint32_t); break; } case SCTP_GET_ASOC_VRF: @@ -1857,9 +1975,10 @@ flags_out: if (stcb == NULL) { error = EINVAL; SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); - break; + } else { + id->assoc_value = stcb->asoc.vrf_id; + *optsize = sizeof(struct sctp_assoc_value); } - id->assoc_value = stcb->asoc.vrf_id; break; } case SCTP_GET_VRF_IDS: @@ -1879,13 +1998,13 @@ flags_out: gnv->gn_peers_tag = stcb->asoc.peer_vtag; gnv->gn_local_tag = stcb->asoc.my_vtag; SCTP_TCB_UNLOCK(stcb); + *optsize = sizeof(struct sctp_get_nonce_values); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN); error = ENOTCONN; } - *optsize = sizeof(*gnv); + break; } - break; case SCTP_DELAYED_SACK: { struct sctp_sack_info *sack; @@ -1897,16 +2016,24 @@ flags_out: sack->sack_freq = stcb->asoc.sack_freq; SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_RLOCK(inp); - sack->sack_delay = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]); - sack->sack_freq = inp->sctp_ep.sctp_sack_freq; - SCTP_INP_RUNLOCK(inp); - } - *optsize = sizeof(*sack); - } - break; - - case SCTP_GET_SNDBUF_USE: + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (sack->sack_assoc_id == SCTP_FUTURE_ASSOC)) { + SCTP_INP_RLOCK(inp); + sack->sack_delay = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]); + sack->sack_freq = inp->sctp_ep.sctp_sack_freq; + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + } + if (error == 0) { + *optsize = sizeof(struct sctp_sack_info); + } + break; + } + case SCTP_GET_SNDBUF_USE: { struct sctp_sockstat *ss; @@ -1918,13 +2045,13 @@ flags_out: ss->ss_total_recv_buf = (stcb->asoc.size_on_reasm_queue + stcb->asoc.size_on_all_streams); SCTP_TCB_UNLOCK(stcb); + *optsize = sizeof(struct sctp_sockstat); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN); error = ENOTCONN; } - *optsize = sizeof(struct sctp_sockstat); + break; } - break; case SCTP_MAX_BURST: { uint8_t *value; @@ -1932,11 +2059,15 @@ flags_out: SCTP_CHECK_AND_CAST(value, optval, uint8_t, *optsize); SCTP_INP_RLOCK(inp); - *value = inp->sctp_ep.max_burst; + if (inp->sctp_ep.max_burst < 256) { + *value = inp->sctp_ep.max_burst; + } else { + *value = 255; + } SCTP_INP_RUNLOCK(inp); *optsize = sizeof(uint8_t); + break; } - break; case SCTP_MAXSEG: { struct sctp_assoc_value *av; @@ -1949,21 +2080,30 @@ flags_out: av->assoc_value = sctp_get_frag_point(stcb, &stcb->asoc); SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_RLOCK(inp); - if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { - ovh = SCTP_MED_OVERHEAD; + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (av->assoc_id == SCTP_FUTURE_ASSOC)) { + SCTP_INP_RLOCK(inp); + if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { + ovh = SCTP_MED_OVERHEAD; + } else { + ovh = SCTP_MED_V4_OVERHEAD; + } + if (inp->sctp_frag_point >= SCTP_DEFAULT_MAXSEGMENT) + av->assoc_value = 0; + else + av->assoc_value = inp->sctp_frag_point - ovh; + SCTP_INP_RUNLOCK(inp); } else { - ovh = SCTP_MED_V4_OVERHEAD; + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; } - if (inp->sctp_frag_point >= SCTP_DEFAULT_MAXSEGMENT) - av->assoc_value = 0; - else - av->assoc_value = inp->sctp_frag_point - ovh; - SCTP_INP_RUNLOCK(inp); } - *optsize = sizeof(struct sctp_assoc_value); + if (error == 0) { + *optsize = sizeof(struct sctp_assoc_value); + } + break; } - break; case SCTP_GET_STAT_LOG: error = sctp_fill_stat_log(optval, optsize); break; @@ -1972,7 +2112,7 @@ flags_out: struct sctp_event_subscribe *events; SCTP_CHECK_AND_CAST(events, optval, struct sctp_event_subscribe, *optsize); - memset(events, 0, sizeof(*events)); + memset(events, 0, sizeof(struct sctp_event_subscribe)); SCTP_INP_RLOCK(inp); if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) events->sctp_data_io_event = 1; @@ -2008,9 +2148,8 @@ flags_out: events->sctp_stream_reset_event = 1; SCTP_INP_RUNLOCK(inp); *optsize = sizeof(struct sctp_event_subscribe); + break; } - break; - case SCTP_ADAPTATION_LAYER: { uint32_t *value; @@ -2021,8 +2160,8 @@ flags_out: *value = inp->sctp_ep.adaptation_layer_indicator; SCTP_INP_RUNLOCK(inp); *optsize = sizeof(uint32_t); + break; } - break; case SCTP_SET_INITIAL_DBG_SEQ: { uint32_t *value; @@ -2032,8 +2171,8 @@ flags_out: *value = inp->sctp_ep.initial_sequence_debug; SCTP_INP_RUNLOCK(inp); *optsize = sizeof(uint32_t); + break; } - break; case SCTP_GET_LOCAL_ADDR_SIZE: { uint32_t *value; @@ -2043,8 +2182,8 @@ flags_out: *value = sctp_count_max_addresses(inp); SCTP_INP_RUNLOCK(inp); *optsize = sizeof(uint32_t); + break; } - break; case SCTP_GET_REMOTE_ADDR_SIZE: { uint32_t *value; @@ -2059,25 +2198,34 @@ flags_out: size = 0; /* Count the sizes */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { - if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) || - (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET6)) { + if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) { size += sizeof(struct sockaddr_in6); - } else if (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET) { - size += sizeof(struct sockaddr_in); } else { - /* huh */ - break; + switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) { +#ifdef INET + case AF_INET: + size += sizeof(struct sockaddr_in); + break; +#endif +#ifdef INET6 + case AF_INET6: + size += sizeof(struct sockaddr_in6); + break; +#endif + default: + break; + } } } SCTP_TCB_UNLOCK(stcb); *value = (uint32_t) size; + *optsize = sizeof(uint32_t); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN); error = ENOTCONN; } - *optsize = sizeof(uint32_t); + break; } - break; case SCTP_GET_PEER_ADDRESSES: /* * Get the address information, an array is passed in to @@ -2098,20 +2246,33 @@ flags_out: sas = (struct sockaddr_storage *)&saddr->addr[0]; TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { - if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) || - (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET6)) { + if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) { cpsz = sizeof(struct sockaddr_in6); - } else if (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET) { - cpsz = sizeof(struct sockaddr_in); } else { - /* huh */ + switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) { +#ifdef INET + case AF_INET: + cpsz = sizeof(struct sockaddr_in); + break; +#endif +#ifdef INET6 + case AF_INET6: + cpsz = sizeof(struct sockaddr_in6); + break; +#endif + default: + cpsz = 0; + break; + } + } + if (cpsz == 0) { break; } if (left < cpsz) { /* not enough room. */ break; } -#ifdef INET6 +#if defined(INET) && defined(INET6) if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) && (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET)) { /* Must map the address */ @@ -2120,7 +2281,7 @@ flags_out: } else { #endif memcpy(sas, &net->ro._l_addr, cpsz); -#ifdef INET6 +#if defined(INET) && defined(INET6) } #endif ((struct sockaddr_in *)sas)->sin_port = stcb->rport; @@ -2134,8 +2295,8 @@ flags_out: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); error = ENOENT; } + break; } - break; case SCTP_GET_LOCAL_ADDRESSES: { size_t limit, actual; @@ -2152,8 +2313,8 @@ flags_out: SCTP_TCB_UNLOCK(stcb); } *optsize = sizeof(struct sockaddr_storage) + actual; + break; } - break; case SCTP_PEER_ADDR_PARAMS: { struct sctp_paddrparams *paddrp; @@ -2183,6 +2344,7 @@ flags_out: struct sockaddr *sa; sa = (struct sockaddr *)&paddrp->spp_address; +#ifdef INET if (sa->sa_family == AF_INET) { struct sockaddr_in *sin; @@ -2193,7 +2355,10 @@ flags_out: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); break; } - } else if (sa->sa_family == AF_INET6) { + } else +#endif +#ifdef INET6 + if (sa->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)sa; @@ -2203,7 +2368,9 @@ flags_out: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); break; } - } else { + } else +#endif + { error = EAFNOSUPPORT; SCTP_TCB_UNLOCK(stcb); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); @@ -2211,7 +2378,7 @@ flags_out: } } if (stcb) { - /* Applys to the specific association */ + /* Applies to the specific association */ paddrp->spp_flags = 0; if (net) { int ovh; @@ -2222,29 +2389,29 @@ flags_out: ovh = SCTP_MED_V4_OVERHEAD; } - + paddrp->spp_hbinterval = net->heart_beat_delay; paddrp->spp_pathmaxrxt = net->failure_threshold; paddrp->spp_pathmtu = net->mtu - ovh; /* get flags for HB */ - if (net->dest_state & SCTP_ADDR_NOHB) + if (net->dest_state & SCTP_ADDR_NOHB) { paddrp->spp_flags |= SPP_HB_DISABLE; - else + } else { paddrp->spp_flags |= SPP_HB_ENABLE; + } /* get flags for PMTU */ - if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) { + if (net->dest_state & SCTP_ADDR_NO_PMTUD) { paddrp->spp_flags |= SPP_PMTUD_ENABLE; } else { paddrp->spp_flags |= SPP_PMTUD_DISABLE; } -#ifdef INET - if (net->ro._l_addr.sin.sin_family == AF_INET) { - paddrp->spp_ipv4_tos = net->tos_flowlabel & 0x000000fc; - paddrp->spp_flags |= SPP_IPV4_TOS; + if (net->dscp & 0x01) { + paddrp->spp_dscp = net->dscp & 0xfc; + paddrp->spp_flags |= SPP_DSCP; } -#endif #ifdef INET6 - if (net->ro._l_addr.sin6.sin6_family == AF_INET6) { - paddrp->spp_ipv6_flowlabel = net->tos_flowlabel; + if ((net->ro._l_addr.sa.sa_family == AF_INET6) && + (net->flowlabel & 0x80000000)) { + paddrp->spp_ipv6_flowlabel = net->flowlabel & 0x000fffff; paddrp->spp_flags |= SPP_IPV6_FLOWLABEL; } #endif @@ -2253,69 +2420,78 @@ flags_out: * No destination so return default * value */ - int cnt = 0; - paddrp->spp_pathmaxrxt = stcb->asoc.def_net_failure; paddrp->spp_pathmtu = sctp_get_frag_point(stcb, &stcb->asoc); -#ifdef INET - paddrp->spp_ipv4_tos = stcb->asoc.default_tos & 0x000000fc; - paddrp->spp_flags |= SPP_IPV4_TOS; -#endif + if (stcb->asoc.default_dscp & 0x01) { + paddrp->spp_dscp = stcb->asoc.default_dscp & 0xfc; + paddrp->spp_flags |= SPP_DSCP; + } #ifdef INET6 - paddrp->spp_ipv6_flowlabel = stcb->asoc.default_flowlabel; - paddrp->spp_flags |= SPP_IPV6_FLOWLABEL; + if (stcb->asoc.default_flowlabel & 0x80000000) { + paddrp->spp_ipv6_flowlabel = stcb->asoc.default_flowlabel & 0x000fffff; + paddrp->spp_flags |= SPP_IPV6_FLOWLABEL; + } #endif /* default settings should be these */ - if (stcb->asoc.hb_is_disabled == 0) { - paddrp->spp_flags |= SPP_HB_ENABLE; - } else { + if (sctp_stcb_is_feature_on(inp, stcb, SCTP_PCB_FLAGS_DONOT_HEARTBEAT)) { paddrp->spp_flags |= SPP_HB_DISABLE; + } else { + paddrp->spp_flags |= SPP_HB_ENABLE; } - TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { - if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) { - cnt++; - } - } - if (cnt) { + if (sctp_stcb_is_feature_on(inp, stcb, SCTP_PCB_FLAGS_DO_NOT_PMTUD)) { + paddrp->spp_flags |= SPP_PMTUD_DISABLE; + } else { paddrp->spp_flags |= SPP_PMTUD_ENABLE; } + paddrp->spp_hbinterval = stcb->asoc.heart_beat_delay; } - paddrp->spp_hbinterval = stcb->asoc.heart_beat_delay; paddrp->spp_assoc_id = sctp_get_associd(stcb); SCTP_TCB_UNLOCK(stcb); } else { - /* Use endpoint defaults */ - SCTP_INP_RLOCK(inp); - paddrp->spp_pathmaxrxt = inp->sctp_ep.def_net_failure; - paddrp->spp_hbinterval = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]); - paddrp->spp_assoc_id = (sctp_assoc_t) 0; - /* get inp's default */ -#ifdef INET - paddrp->spp_ipv4_tos = inp->ip_inp.inp.inp_ip_tos; - paddrp->spp_flags |= SPP_IPV4_TOS; -#endif + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (paddrp->spp_assoc_id == SCTP_FUTURE_ASSOC)) { + /* Use endpoint defaults */ + SCTP_INP_RLOCK(inp); + paddrp->spp_pathmaxrxt = inp->sctp_ep.def_net_failure; + paddrp->spp_hbinterval = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]); + paddrp->spp_assoc_id = SCTP_FUTURE_ASSOC; + /* get inp's default */ + if (inp->sctp_ep.default_dscp & 0x01) { + paddrp->spp_dscp = inp->sctp_ep.default_dscp & 0xfc; + paddrp->spp_flags |= SPP_DSCP; + } #ifdef INET6 - if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { - paddrp->spp_ipv6_flowlabel = ((struct in6pcb *)inp)->in6p_flowinfo; - paddrp->spp_flags |= SPP_IPV6_FLOWLABEL; - } + if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) && + (inp->sctp_ep.default_flowlabel & 0x80000000)) { + paddrp->spp_ipv6_flowlabel = inp->sctp_ep.default_flowlabel & 0x000fffff; + paddrp->spp_flags |= SPP_IPV6_FLOWLABEL; + } #endif - /* can't return this */ - paddrp->spp_pathmtu = 0; + /* can't return this */ + paddrp->spp_pathmtu = 0; - /* default behavior, no stcb */ - paddrp->spp_flags = SPP_PMTUD_ENABLE; - - if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT)) { - paddrp->spp_flags |= SPP_HB_ENABLE; + if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT)) { + paddrp->spp_flags |= SPP_HB_ENABLE; + } else { + paddrp->spp_flags |= SPP_HB_DISABLE; + } + if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DO_NOT_PMTUD)) { + paddrp->spp_flags |= SPP_PMTUD_ENABLE; + } else { + paddrp->spp_flags |= SPP_PMTUD_DISABLE; + } + SCTP_INP_RUNLOCK(inp); } else { - paddrp->spp_flags |= SPP_HB_DISABLE; + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; } - SCTP_INP_RUNLOCK(inp); } - *optsize = sizeof(struct sctp_paddrparams); + if (error == 0) { + *optsize = sizeof(struct sctp_paddrparams); + } + break; } - break; case SCTP_GET_PEER_ADDR_INFO: { struct sctp_paddrinfo *paddri; @@ -2343,12 +2519,23 @@ flags_out: } if ((stcb) && (net)) { - paddri->spinfo_state = net->dest_state & (SCTP_REACHABLE_MASK | SCTP_ADDR_NOHB); + if (net->dest_state & SCTP_ADDR_UNCONFIRMED) { + /* It's unconfirmed */ + paddri->spinfo_state = SCTP_UNCONFIRMED; + } else if (net->dest_state & SCTP_ADDR_REACHABLE) { + /* It's active */ + paddri->spinfo_state = SCTP_ACTIVE; + } else { + /* It's inactive */ + paddri->spinfo_state = SCTP_INACTIVE; + } paddri->spinfo_cwnd = net->cwnd; - paddri->spinfo_srtt = ((net->lastsa >> 2) + net->lastsv) >> 1; + paddri->spinfo_srtt = net->lastsa >> SCTP_RTT_SHIFT; paddri->spinfo_rto = net->RTO; paddri->spinfo_assoc_id = sctp_get_associd(stcb); + paddri->spinfo_mtu = net->mtu; SCTP_TCB_UNLOCK(stcb); + *optsize = sizeof(struct sctp_paddrinfo); } else { if (stcb) { SCTP_TCB_UNLOCK(stcb); @@ -2356,9 +2543,8 @@ flags_out: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); error = ENOENT; } - *optsize = sizeof(struct sctp_paddrinfo); + break; } - break; case SCTP_PCB_STATUS: { struct sctp_pcbinfo *spcb; @@ -2366,9 +2552,8 @@ flags_out: SCTP_CHECK_AND_CAST(spcb, optval, struct sctp_pcbinfo, *optsize); sctp_fill_pcbinfo(spcb); *optsize = sizeof(struct sctp_pcbinfo); + break; } - break; - case SCTP_STATUS: { struct sctp_nets *net; @@ -2378,7 +2563,7 @@ flags_out: SCTP_FIND_STCB(inp, stcb, sstat->sstat_assoc_id); if (stcb == NULL) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } @@ -2411,16 +2596,25 @@ flags_out: * Again the user can get info from sctp_constants.h * for what the state of the network is. */ - sstat->sstat_primary.spinfo_state = net->dest_state & SCTP_REACHABLE_MASK; + if (net->dest_state & SCTP_ADDR_UNCONFIRMED) { + /* It's unconfirmed */ + sstat->sstat_primary.spinfo_state = SCTP_UNCONFIRMED; + } else if (net->dest_state & SCTP_ADDR_REACHABLE) { + /* It's active */ + sstat->sstat_primary.spinfo_state = SCTP_ACTIVE; + } else { + /* It's inactive */ + sstat->sstat_primary.spinfo_state = SCTP_INACTIVE; + } sstat->sstat_primary.spinfo_cwnd = net->cwnd; - sstat->sstat_primary.spinfo_srtt = net->lastsa; + sstat->sstat_primary.spinfo_srtt = net->lastsa >> SCTP_RTT_SHIFT; sstat->sstat_primary.spinfo_rto = net->RTO; sstat->sstat_primary.spinfo_mtu = net->mtu; sstat->sstat_primary.spinfo_assoc_id = sctp_get_associd(stcb); SCTP_TCB_UNLOCK(stcb); - *optsize = sizeof(*sstat); + *optsize = sizeof(struct sctp_status); + break; } - break; case SCTP_RTOINFO: { struct sctp_rtoinfo *srto; @@ -2434,15 +2628,24 @@ flags_out: srto->srto_min = stcb->asoc.minrto; SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_RLOCK(inp); - srto->srto_initial = inp->sctp_ep.initial_rto; - srto->srto_max = inp->sctp_ep.sctp_maxrto; - srto->srto_min = inp->sctp_ep.sctp_minrto; - SCTP_INP_RUNLOCK(inp); + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (srto->srto_assoc_id == SCTP_FUTURE_ASSOC)) { + SCTP_INP_RLOCK(inp); + srto->srto_initial = inp->sctp_ep.initial_rto; + srto->srto_max = inp->sctp_ep.sctp_maxrto; + srto->srto_min = inp->sctp_ep.sctp_minrto; + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + } + if (error == 0) { + *optsize = sizeof(struct sctp_rtoinfo); } - *optsize = sizeof(*srto); + break; } - break; case SCTP_TIMEOUTS: { struct sctp_timeouts *stimo; @@ -2459,23 +2662,21 @@ flags_out: stimo->stimo_cookie = stcb->asoc.timocookie; stimo->stimo_shutdownack = stcb->asoc.timoshutdownack; SCTP_TCB_UNLOCK(stcb); + *optsize = sizeof(struct sctp_timeouts); } else { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } - *optsize = sizeof(*stimo); + break; } - break; case SCTP_ASSOCINFO: { struct sctp_assocparams *sasoc; - uint32_t oldval; SCTP_CHECK_AND_CAST(sasoc, optval, struct sctp_assocparams, *optsize); SCTP_FIND_STCB(inp, stcb, sasoc->sasoc_assoc_id); if (stcb) { - oldval = sasoc->sasoc_cookie_life; sasoc->sasoc_cookie_life = TICKS_TO_MSEC(stcb->asoc.cookie_life); sasoc->sasoc_asocmaxrxt = stcb->asoc.max_send_times; sasoc->sasoc_number_peer_destinations = stcb->asoc.numnets; @@ -2483,17 +2684,26 @@ flags_out: sasoc->sasoc_local_rwnd = stcb->asoc.my_rwnd; SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_RLOCK(inp); - sasoc->sasoc_cookie_life = TICKS_TO_MSEC(inp->sctp_ep.def_cookie_life); - sasoc->sasoc_asocmaxrxt = inp->sctp_ep.max_send_times; - sasoc->sasoc_number_peer_destinations = 0; - sasoc->sasoc_peer_rwnd = 0; - sasoc->sasoc_local_rwnd = sbspace(&inp->sctp_socket->so_rcv); - SCTP_INP_RUNLOCK(inp); + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (sasoc->sasoc_assoc_id == SCTP_FUTURE_ASSOC)) { + SCTP_INP_RLOCK(inp); + sasoc->sasoc_cookie_life = TICKS_TO_MSEC(inp->sctp_ep.def_cookie_life); + sasoc->sasoc_asocmaxrxt = inp->sctp_ep.max_send_times; + sasoc->sasoc_number_peer_destinations = 0; + sasoc->sasoc_peer_rwnd = 0; + sasoc->sasoc_local_rwnd = sbspace(&inp->sctp_socket->so_rcv); + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } } - *optsize = sizeof(*sasoc); + if (error == 0) { + *optsize = sizeof(struct sctp_assocparams); + } + break; } - break; case SCTP_DEFAULT_SEND_PARAM: { struct sctp_sndrcvinfo *s_info; @@ -2505,13 +2715,22 @@ flags_out: memcpy(s_info, &stcb->asoc.def_send, sizeof(stcb->asoc.def_send)); SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_RLOCK(inp); - memcpy(s_info, &inp->def_send, sizeof(inp->def_send)); - SCTP_INP_RUNLOCK(inp); + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (s_info->sinfo_assoc_id == SCTP_FUTURE_ASSOC)) { + SCTP_INP_RLOCK(inp); + memcpy(s_info, &inp->def_send, sizeof(inp->def_send)); + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + } + if (error == 0) { + *optsize = sizeof(struct sctp_sndrcvinfo); } - *optsize = sizeof(*s_info); + break; } - break; case SCTP_INITMSG: { struct sctp_initmsg *sinit; @@ -2523,9 +2742,9 @@ flags_out: sinit->sinit_max_attempts = inp->sctp_ep.max_init_times; sinit->sinit_max_init_timeo = inp->sctp_ep.initial_init_rto_max; SCTP_INP_RUNLOCK(inp); - *optsize = sizeof(*sinit); + *optsize = sizeof(struct sctp_initmsg); + break; } - break; case SCTP_PRIMARY_ADDR: /* we allow a "get" operation on this */ { @@ -2546,14 +2765,13 @@ flags_out: &stcb->asoc.primary_destination->ro._l_addr, len); SCTP_TCB_UNLOCK(stcb); + *optsize = sizeof(struct sctp_setprim); } else { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } - *optsize = sizeof(*ssp); + break; } - break; - case SCTP_HMAC_IDENT: { struct sctp_hmacalgo *shmac; @@ -2575,7 +2793,7 @@ flags_out: size = sizeof(*shmac) + (hmaclist->num_algo * sizeof(shmac->shmac_idents[0])); if ((size_t)(*optsize) < size) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; SCTP_INP_RUNLOCK(inp); break; @@ -2601,12 +2819,21 @@ flags_out: scact->scact_keynumber = stcb->asoc.authinfo.active_keyid; SCTP_TCB_UNLOCK(stcb); } else { - /* get the endpoint active key */ - SCTP_INP_RLOCK(inp); - scact->scact_keynumber = inp->sctp_ep.default_keyid; - SCTP_INP_RUNLOCK(inp); + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (scact->scact_assoc_id == SCTP_FUTURE_ASSOC)) { + /* get the endpoint active key */ + SCTP_INP_RLOCK(inp); + scact->scact_keynumber = inp->sctp_ep.default_keyid; + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + } + if (error == 0) { + *optsize = sizeof(struct sctp_authkeyid); } - *optsize = sizeof(*scact); break; } case SCTP_LOCAL_AUTH_CHUNKS: @@ -2629,24 +2856,34 @@ flags_out: } else { /* copy in the chunks */ (void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks); + sac->gauth_number_of_chunks = (uint32_t) size; + *optsize = sizeof(struct sctp_authchunks) + size; } SCTP_TCB_UNLOCK(stcb); } else { - /* get off the endpoint */ - SCTP_INP_RLOCK(inp); - chklist = inp->sctp_ep.local_auth_chunks; - /* is there enough space? */ - size = sctp_auth_get_chklist_size(chklist); - if (*optsize < (sizeof(struct sctp_authchunks) + size)) { - error = EINVAL; - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (sac->gauth_assoc_id == SCTP_FUTURE_ASSOC)) { + /* get off the endpoint */ + SCTP_INP_RLOCK(inp); + chklist = inp->sctp_ep.local_auth_chunks; + /* is there enough space? */ + size = sctp_auth_get_chklist_size(chklist); + if (*optsize < (sizeof(struct sctp_authchunks) + size)) { + error = EINVAL; + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + } else { + /* copy in the chunks */ + (void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks); + sac->gauth_number_of_chunks = (uint32_t) size; + *optsize = sizeof(struct sctp_authchunks) + size; + } + SCTP_INP_RUNLOCK(inp); } else { - /* copy in the chunks */ - (void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; } - SCTP_INP_RUNLOCK(inp); } - *optsize = sizeof(struct sctp_authchunks) + size; break; } case SCTP_PEER_AUTH_CHUNKS: @@ -2669,35 +2906,423 @@ flags_out: } else { /* copy in the chunks */ (void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks); + sac->gauth_number_of_chunks = (uint32_t) size; + *optsize = sizeof(struct sctp_authchunks) + size; } SCTP_TCB_UNLOCK(stcb); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); error = ENOENT; } - *optsize = sizeof(struct sctp_authchunks) + size; break; } + case SCTP_EVENT: + { + struct sctp_event *event; + uint32_t event_type; + SCTP_CHECK_AND_CAST(event, optval, struct sctp_event, *optsize); + SCTP_FIND_STCB(inp, stcb, event->se_assoc_id); - default: - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT); - error = ENOPROTOOPT; - *optsize = 0; - break; - } /* end switch (sopt->sopt_name) */ - return (error); -} + switch (event->se_type) { + case SCTP_ASSOC_CHANGE: + event_type = SCTP_PCB_FLAGS_RECVASSOCEVNT; + break; + case SCTP_PEER_ADDR_CHANGE: + event_type = SCTP_PCB_FLAGS_RECVPADDREVNT; + break; + case SCTP_REMOTE_ERROR: + event_type = SCTP_PCB_FLAGS_RECVPEERERR; + break; + case SCTP_SEND_FAILED: + event_type = SCTP_PCB_FLAGS_RECVSENDFAILEVNT; + break; + case SCTP_SHUTDOWN_EVENT: + event_type = SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT; + break; + case SCTP_ADAPTATION_INDICATION: + event_type = SCTP_PCB_FLAGS_ADAPTATIONEVNT; + break; + case SCTP_PARTIAL_DELIVERY_EVENT: + event_type = SCTP_PCB_FLAGS_PDAPIEVNT; + break; + case SCTP_AUTHENTICATION_EVENT: + event_type = SCTP_PCB_FLAGS_AUTHEVNT; + break; + case SCTP_STREAM_RESET_EVENT: + event_type = SCTP_PCB_FLAGS_STREAM_RESETEVNT; + break; + case SCTP_SENDER_DRY_EVENT: + event_type = SCTP_PCB_FLAGS_DRYEVNT; + break; + case SCTP_NOTIFICATIONS_STOPPED_EVENT: + event_type = 0; + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTSUP); + error = ENOTSUP; + break; + case SCTP_ASSOC_RESET_EVENT: + event_type = SCTP_PCB_FLAGS_ASSOC_RESETEVNT; + break; + case SCTP_STREAM_CHANGE_EVENT: + event_type = SCTP_PCB_FLAGS_STREAM_CHANGEEVNT; + break; + case SCTP_SEND_FAILED_EVENT: + event_type = SCTP_PCB_FLAGS_RECVNSENDFAILEVNT; + break; + default: + event_type = 0; + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + break; + } + if (event_type > 0) { + if (stcb) { + event->se_on = sctp_stcb_is_feature_on(inp, stcb, event_type); + SCTP_TCB_UNLOCK(stcb); + } else { + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (event->se_assoc_id == SCTP_FUTURE_ASSOC)) { + SCTP_INP_RLOCK(inp); + event->se_on = sctp_is_feature_on(inp, event_type); + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + } + } + if (error == 0) { + *optsize = sizeof(struct sctp_event); + } + break; + } + case SCTP_RECVRCVINFO: + { + int onoff; -static int -sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, - void *p) -{ - int error, set_opt; - uint32_t *mopt; - struct sctp_tcb *stcb = NULL; - struct sctp_inpcb *inp = NULL; - uint32_t vrf_id; + if (*optsize < sizeof(int)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } else { + SCTP_INP_RLOCK(inp); + onoff = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVRCVINFO); + SCTP_INP_RUNLOCK(inp); + } + if (error == 0) { + /* return the option value */ + *(int *)optval = onoff; + *optsize = sizeof(int); + } + break; + } + case SCTP_RECVNXTINFO: + { + int onoff; + + if (*optsize < sizeof(int)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } else { + SCTP_INP_RLOCK(inp); + onoff = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO); + SCTP_INP_RUNLOCK(inp); + } + if (error == 0) { + /* return the option value */ + *(int *)optval = onoff; + *optsize = sizeof(int); + } + break; + } + case SCTP_DEFAULT_SNDINFO: + { + struct sctp_sndinfo *info; + + SCTP_CHECK_AND_CAST(info, optval, struct sctp_sndinfo, *optsize); + SCTP_FIND_STCB(inp, stcb, info->snd_assoc_id); + + if (stcb) { + info->snd_sid = stcb->asoc.def_send.sinfo_stream; + info->snd_flags = stcb->asoc.def_send.sinfo_flags; + info->snd_flags &= 0xfff0; + info->snd_ppid = stcb->asoc.def_send.sinfo_ppid; + info->snd_context = stcb->asoc.def_send.sinfo_context; + SCTP_TCB_UNLOCK(stcb); + } else { + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (info->snd_assoc_id == SCTP_FUTURE_ASSOC)) { + SCTP_INP_RLOCK(inp); + info->snd_sid = inp->def_send.sinfo_stream; + info->snd_flags = inp->def_send.sinfo_flags; + info->snd_flags &= 0xfff0; + info->snd_ppid = inp->def_send.sinfo_ppid; + info->snd_context = inp->def_send.sinfo_context; + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + } + if (error == 0) { + *optsize = sizeof(struct sctp_sndinfo); + } + break; + } + case SCTP_DEFAULT_PRINFO: + { + struct sctp_default_prinfo *info; + + SCTP_CHECK_AND_CAST(info, optval, struct sctp_default_prinfo, *optsize); + SCTP_FIND_STCB(inp, stcb, info->pr_assoc_id); + + if (stcb) { + info->pr_policy = PR_SCTP_POLICY(stcb->asoc.def_send.sinfo_flags); + info->pr_value = stcb->asoc.def_send.sinfo_timetolive; + SCTP_TCB_UNLOCK(stcb); + } else { + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (info->pr_assoc_id == SCTP_FUTURE_ASSOC)) { + SCTP_INP_RLOCK(inp); + info->pr_policy = PR_SCTP_POLICY(inp->def_send.sinfo_flags); + info->pr_value = inp->def_send.sinfo_timetolive; + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + } + if (error == 0) { + *optsize = sizeof(struct sctp_default_prinfo); + } + break; + } + case SCTP_PEER_ADDR_THLDS: + { + struct sctp_paddrthlds *thlds; + struct sctp_nets *net; + + SCTP_CHECK_AND_CAST(thlds, optval, struct sctp_paddrthlds, *optsize); + SCTP_FIND_STCB(inp, stcb, thlds->spt_assoc_id); + + net = NULL; + if (stcb) { + net = sctp_findnet(stcb, (struct sockaddr *)&thlds->spt_address); + } else { + /* + * We increment here since + * sctp_findassociation_ep_addr() wil do a + * decrement if it finds the stcb as long as + * the locked tcb (last argument) is NOT a + * TCB.. aka NULL. + */ + SCTP_INP_INCR_REF(inp); + stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&thlds->spt_address, &net, NULL, NULL); + if (stcb == NULL) { + SCTP_INP_DECR_REF(inp); + } + } + if (stcb && (net == NULL)) { + struct sockaddr *sa; + + sa = (struct sockaddr *)&thlds->spt_address; +#ifdef INET + if (sa->sa_family == AF_INET) { + struct sockaddr_in *sin; + + sin = (struct sockaddr_in *)sa; + if (sin->sin_addr.s_addr) { + error = EINVAL; + SCTP_TCB_UNLOCK(stcb); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + break; + } + } else +#endif +#ifdef INET6 + if (sa->sa_family == AF_INET6) { + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *)sa; + if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + error = EINVAL; + SCTP_TCB_UNLOCK(stcb); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + break; + } + } else +#endif + { + error = EAFNOSUPPORT; + SCTP_TCB_UNLOCK(stcb); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + break; + } + } + if (stcb) { + if (net) { + thlds->spt_pathmaxrxt = net->failure_threshold; + thlds->spt_pathpfthld = net->pf_threshold; + } else { + thlds->spt_pathmaxrxt = stcb->asoc.def_net_failure; + thlds->spt_pathpfthld = stcb->asoc.def_net_pf_threshold; + } + thlds->spt_assoc_id = sctp_get_associd(stcb); + SCTP_TCB_UNLOCK(stcb); + } else { + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (thlds->spt_assoc_id == SCTP_FUTURE_ASSOC)) { + /* Use endpoint defaults */ + SCTP_INP_RLOCK(inp); + thlds->spt_pathmaxrxt = inp->sctp_ep.def_net_failure; + thlds->spt_pathpfthld = inp->sctp_ep.def_net_pf_threshold; + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + } + if (error == 0) { + *optsize = sizeof(struct sctp_paddrthlds); + } + break; + } + case SCTP_REMOTE_UDP_ENCAPS_PORT: + { + struct sctp_udpencaps *encaps; + struct sctp_nets *net; + + SCTP_CHECK_AND_CAST(encaps, optval, struct sctp_udpencaps, *optsize); + SCTP_FIND_STCB(inp, stcb, encaps->sue_assoc_id); + + if (stcb) { + net = sctp_findnet(stcb, (struct sockaddr *)&encaps->sue_address); + } else { + /* + * We increment here since + * sctp_findassociation_ep_addr() wil do a + * decrement if it finds the stcb as long as + * the locked tcb (last argument) is NOT a + * TCB.. aka NULL. + */ + net = NULL; + SCTP_INP_INCR_REF(inp); + stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&encaps->sue_address, &net, NULL, NULL); + if (stcb == NULL) { + SCTP_INP_DECR_REF(inp); + } + } + if (stcb && (net == NULL)) { + struct sockaddr *sa; + + sa = (struct sockaddr *)&encaps->sue_address; +#ifdef INET + if (sa->sa_family == AF_INET) { + struct sockaddr_in *sin; + + sin = (struct sockaddr_in *)sa; + if (sin->sin_addr.s_addr) { + error = EINVAL; + SCTP_TCB_UNLOCK(stcb); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + break; + } + } else +#endif +#ifdef INET6 + if (sa->sa_family == AF_INET6) { + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *)sa; + if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + error = EINVAL; + SCTP_TCB_UNLOCK(stcb); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + break; + } + } else +#endif + { + error = EAFNOSUPPORT; + SCTP_TCB_UNLOCK(stcb); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + break; + } + } + if (stcb) { + if (net) { + encaps->sue_port = net->port; + } else { + encaps->sue_port = stcb->asoc.port; + } + SCTP_TCB_UNLOCK(stcb); + } else { + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (encaps->sue_assoc_id == SCTP_FUTURE_ASSOC)) { + SCTP_INP_RLOCK(inp); + encaps->sue_port = inp->sctp_ep.port; + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + } + if (error == 0) { + *optsize = sizeof(struct sctp_paddrparams); + } + break; + } + case SCTP_ENABLE_STREAM_RESET: + { + struct sctp_assoc_value *av; + + SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize); + SCTP_FIND_STCB(inp, stcb, av->assoc_id); + + if (stcb) { + av->assoc_value = (uint32_t) stcb->asoc.local_strreset_support; + SCTP_TCB_UNLOCK(stcb); + } else { + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (av->assoc_id == SCTP_FUTURE_ASSOC)) { + SCTP_INP_RLOCK(inp); + av->assoc_value = (uint32_t) inp->local_strreset_support; + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + } + if (error == 0) { + *optsize = sizeof(struct sctp_assoc_value); + } + break; + } + default: + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT); + error = ENOPROTOOPT; + break; + } /* end switch (sopt->sopt_name) */ + if (error) { + *optsize = 0; + } + return (error); +} + +static int +sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, + void *p) +{ + int error, set_opt; + uint32_t *mopt; + struct sctp_tcb *stcb = NULL; + struct sctp_inpcb *inp = NULL; + uint32_t vrf_id; if (optval == NULL) { SCTP_PRINTF("optval is NULL\n"); @@ -2705,10 +3330,10 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, return (EINVAL); } inp = (struct sctp_inpcb *)so->so_pcb; - if (inp == 0) { + if (inp == NULL) { SCTP_PRINTF("inp is NULL?\n"); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - return EINVAL; + return (EINVAL); } vrf_id = inp->def_vrf_id; @@ -2736,6 +3361,12 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, */ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { /* only valid for bound all sockets */ + if ((SCTP_BASE_SYSCTL(sctp_auto_asconf) == 0) && + (*mopt != 0)) { + /* forbidden by admin */ + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EPERM); + return (EPERM); + } set_opt = SCTP_PCB_FLAGS_AUTO_ASCONF; } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); @@ -2798,8 +3429,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, sctp_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE); else sctp_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE); + break; } - break; case SCTP_PARTIAL_DELIVERY_POINT: { uint32_t *value; @@ -2811,8 +3442,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, break; } inp->partial_delivery_point = *value; + break; } - break; case SCTP_FRAGMENT_INTERLEAVE: /* not yet until we re-write sctp_recvmsg() */ { @@ -2833,120 +3464,214 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } + break; } - break; case SCTP_CMT_ON_OFF: if (SCTP_BASE_SYSCTL(sctp_cmt_on_off)) { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); + if (av->assoc_value > SCTP_CMT_MAX) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + break; + } SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { - if (av->assoc_value != 0) - stcb->asoc.sctp_cmt_on_off = 1; - else - stcb->asoc.sctp_cmt_on_off = 0; + stcb->asoc.sctp_cmt_on_off = av->assoc_value; SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_WLOCK(inp); - if (av->assoc_value != 0) - inp->sctp_cmt_on_off = 1; - else - inp->sctp_cmt_on_off = 0; - SCTP_INP_WUNLOCK(inp); + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (av->assoc_id == SCTP_FUTURE_ASSOC) || + (av->assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + inp->sctp_cmt_on_off = av->assoc_value; + SCTP_INP_WUNLOCK(inp); + } + if ((av->assoc_id == SCTP_CURRENT_ASSOC) || + (av->assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + stcb->asoc.sctp_cmt_on_off = av->assoc_value; + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); + } } } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT); error = ENOPROTOOPT; } break; - /* JRS - Set socket option for pluggable congestion control */ case SCTP_PLUGGABLE_CC: { struct sctp_assoc_value *av; + struct sctp_nets *net; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); + if ((av->assoc_value != SCTP_CC_RFC2581) && + (av->assoc_value != SCTP_CC_HSTCP) && + (av->assoc_value != SCTP_CC_HTCP) && + (av->assoc_value != SCTP_CC_RTCC)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + break; + } SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { - switch (av->assoc_value) { - /* - * JRS - Standard TCP congestion - * control - */ - case SCTP_CC_RFC2581: - { - stcb->asoc.congestion_control_module = SCTP_CC_RFC2581; - stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param; - stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_cwnd_update_after_sack; - stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_cwnd_update_after_fr; - stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout; - stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo; - stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped; - stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output; - stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer; - SCTP_TCB_UNLOCK(stcb); - break; + stcb->asoc.cc_functions = sctp_cc_functions[av->assoc_value]; + stcb->asoc.congestion_control_module = av->assoc_value; + if (stcb->asoc.cc_functions.sctp_set_initial_cc_param != NULL) { + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net); } - /* - * JRS - High Speed TCP congestion - * control (Floyd) - */ - case SCTP_CC_HSTCP: - { - stcb->asoc.congestion_control_module = SCTP_CC_HSTCP; - stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param; - stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_hs_cwnd_update_after_sack; - stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_hs_cwnd_update_after_fr; - stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout; - stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo; - stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped; - stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output; - stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer; + } + SCTP_TCB_UNLOCK(stcb); + } else { + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (av->assoc_id == SCTP_FUTURE_ASSOC) || + (av->assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + inp->sctp_ep.sctp_default_cc_module = av->assoc_value; + SCTP_INP_WUNLOCK(inp); + } + if ((av->assoc_id == SCTP_CURRENT_ASSOC) || + (av->assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + stcb->asoc.cc_functions = sctp_cc_functions[av->assoc_value]; + stcb->asoc.congestion_control_module = av->assoc_value; + if (stcb->asoc.cc_functions.sctp_set_initial_cc_param != NULL) { + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net); + } + } SCTP_TCB_UNLOCK(stcb); - break; } - /* JRS - HTCP congestion control */ - case SCTP_CC_HTCP: - { - stcb->asoc.congestion_control_module = SCTP_CC_HTCP; - stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_htcp_set_initial_cc_param; - stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_htcp_cwnd_update_after_sack; - stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_htcp_cwnd_update_after_fr; - stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_htcp_cwnd_update_after_timeout; - stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_htcp_cwnd_update_after_ecn_echo; - stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped; - stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output; - stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_htcp_cwnd_update_after_fr_timer; + SCTP_INP_RUNLOCK(inp); + } + } + break; + } + case SCTP_CC_OPTION: + { + struct sctp_cc_option *cc_opt; + + SCTP_CHECK_AND_CAST(cc_opt, optval, struct sctp_cc_option, optsize); + SCTP_FIND_STCB(inp, stcb, cc_opt->aid_value.assoc_id); + if (stcb == NULL) { + if (cc_opt->aid_value.assoc_id == SCTP_CURRENT_ASSOC) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + if (stcb->asoc.cc_functions.sctp_cwnd_socket_option) { + (*stcb->asoc.cc_functions.sctp_cwnd_socket_option) (stcb, 1, cc_opt); + } SCTP_TCB_UNLOCK(stcb); - break; } - /* - * JRS - All other values are - * invalid - */ - default: - { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - error = EINVAL; + SCTP_INP_RUNLOCK(inp); + } else { + error = EINVAL; + } + } else { + if (stcb->asoc.cc_functions.sctp_cwnd_socket_option == NULL) { + error = ENOTSUP; + } else { + error = (*stcb->asoc.cc_functions.sctp_cwnd_socket_option) (stcb, 1, + cc_opt); + } + SCTP_TCB_UNLOCK(stcb); + } + break; + } + case SCTP_PLUGGABLE_SS: + { + struct sctp_assoc_value *av; + + SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); + if ((av->assoc_value != SCTP_SS_DEFAULT) && + (av->assoc_value != SCTP_SS_ROUND_ROBIN) && + (av->assoc_value != SCTP_SS_ROUND_ROBIN_PACKET) && + (av->assoc_value != SCTP_SS_PRIORITY) && + (av->assoc_value != SCTP_SS_FAIR_BANDWITH) && + (av->assoc_value != SCTP_SS_FIRST_COME)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + break; + } + SCTP_FIND_STCB(inp, stcb, av->assoc_id); + if (stcb) { + stcb->asoc.ss_functions.sctp_ss_clear(stcb, &stcb->asoc, 1, 1); + stcb->asoc.ss_functions = sctp_ss_functions[av->assoc_value]; + stcb->asoc.stream_scheduling_module = av->assoc_value; + stcb->asoc.ss_functions.sctp_ss_init(stcb, &stcb->asoc, 1); + SCTP_TCB_UNLOCK(stcb); + } else { + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (av->assoc_id == SCTP_FUTURE_ASSOC) || + (av->assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + inp->sctp_ep.sctp_default_ss_module = av->assoc_value; + SCTP_INP_WUNLOCK(inp); + } + if ((av->assoc_id == SCTP_CURRENT_ASSOC) || + (av->assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + stcb->asoc.ss_functions.sctp_ss_clear(stcb, &stcb->asoc, 1, 1); + stcb->asoc.ss_functions = sctp_ss_functions[av->assoc_value]; + stcb->asoc.stream_scheduling_module = av->assoc_value; + stcb->asoc.ss_functions.sctp_ss_init(stcb, &stcb->asoc, 1); SCTP_TCB_UNLOCK(stcb); - break; } + SCTP_INP_RUNLOCK(inp); + } + } + break; + } + case SCTP_SS_VALUE: + { + struct sctp_stream_value *av; + + SCTP_CHECK_AND_CAST(av, optval, struct sctp_stream_value, optsize); + SCTP_FIND_STCB(inp, stcb, av->assoc_id); + if (stcb) { + if (stcb->asoc.ss_functions.sctp_ss_set_value(stcb, &stcb->asoc, &stcb->asoc.strmout[av->stream_id], + av->stream_value) < 0) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; } + SCTP_TCB_UNLOCK(stcb); } else { - switch (av->assoc_value) { - case SCTP_CC_RFC2581: - case SCTP_CC_HSTCP: - case SCTP_CC_HTCP: - inp->sctp_ep.sctp_default_cc_module = av->assoc_value; - break; - default: + if (av->assoc_id == SCTP_CURRENT_ASSOC) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + stcb->asoc.ss_functions.sctp_ss_set_value(stcb, + &stcb->asoc, + &stcb->asoc.strmout[av->stream_id], + av->stream_value); + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); + + } else { + /* + * Can't set stream value without + * association + */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; - break; - }; + } } + break; } - break; case SCTP_CLR_STAT_LOG: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP); error = EOPNOTSUPP; @@ -2962,12 +3687,27 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, stcb->asoc.context = av->assoc_value; SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_WLOCK(inp); - inp->sctp_context = av->assoc_value; - SCTP_INP_WUNLOCK(inp); + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (av->assoc_id == SCTP_FUTURE_ASSOC) || + (av->assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + inp->sctp_context = av->assoc_value; + SCTP_INP_WUNLOCK(inp); + } + if ((av->assoc_id == SCTP_CURRENT_ASSOC) || + (av->assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + stcb->asoc.context = av->assoc_value; + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); + } } + break; } - break; case SCTP_VRF_ID: { uint32_t *default_vrfid; @@ -3002,12 +3742,12 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, if (sack->sack_delay) { if (sack->sack_delay > SCTP_MAX_SACK_DELAY) sack->sack_delay = SCTP_MAX_SACK_DELAY; + if (MSEC_TO_TICKS(sack->sack_delay) < 1) { + sack->sack_delay = TICKS_TO_MSEC(1); + } } if (stcb) { if (sack->sack_delay) { - if (MSEC_TO_TICKS(sack->sack_delay) < 1) { - sack->sack_delay = TICKS_TO_MSEC(1); - } stcb->asoc.delayed_ack = sack->sack_delay; } if (sack->sack_freq) { @@ -3015,17 +3755,34 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_WLOCK(inp); - if (sack->sack_delay) { - if (MSEC_TO_TICKS(sack->sack_delay) < 1) { - sack->sack_delay = TICKS_TO_MSEC(1); + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (sack->sack_assoc_id == SCTP_FUTURE_ASSOC) || + (sack->sack_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + if (sack->sack_delay) { + inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV] = MSEC_TO_TICKS(sack->sack_delay); } - inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV] = MSEC_TO_TICKS(sack->sack_delay); + if (sack->sack_freq) { + inp->sctp_ep.sctp_sack_freq = sack->sack_freq; + } + SCTP_INP_WUNLOCK(inp); } - if (sack->sack_freq) { - inp->sctp_ep.sctp_sack_freq = sack->sack_freq; + if ((sack->sack_assoc_id == SCTP_CURRENT_ASSOC) || + (sack->sack_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + if (sack->sack_delay) { + stcb->asoc.delayed_ack = sack->sack_delay; + } + if (sack->sack_freq) { + stcb->asoc.sack_freq = sack->sack_freq; + } + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); } - SCTP_INP_WUNLOCK(inp); } break; } @@ -3052,11 +3809,20 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, size_t size; SCTP_CHECK_AND_CAST(sca, optval, struct sctp_authkey, optsize); + if (sca->sca_keylength == 0) { + size = optsize - sizeof(struct sctp_authkey); + } else { + if (sca->sca_keylength + sizeof(struct sctp_authkey) <= optsize) { + size = sca->sca_keylength; + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + break; + } + } SCTP_FIND_STCB(inp, stcb, sca->sca_assoc_id); - size = optsize - sizeof(*sca); if (stcb) { - /* set it on the assoc */ shared_keys = &stcb->asoc.shared_keys; /* clear the cached keys for this key id */ sctp_clear_cachedkeys(stcb, sca->sca_keynumber); @@ -3086,39 +3852,78 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, error = sctp_insert_sharedkey(shared_keys, shared_key); SCTP_TCB_UNLOCK(stcb); } else { - /* set it on the endpoint */ - SCTP_INP_WLOCK(inp); - shared_keys = &inp->sctp_ep.shared_keys; - /* - * clear the cached keys on all assocs for - * this key id - */ - sctp_clear_cachedkeys_ep(inp, sca->sca_keynumber); - /* - * create the new shared key and - * insert/replace it - */ - if (size > 0) { - key = sctp_set_key(sca->sca_key, (uint32_t) size); - if (key == NULL) { + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (sca->sca_assoc_id == SCTP_FUTURE_ASSOC) || + (sca->sca_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + shared_keys = &inp->sctp_ep.shared_keys; + /* + * clear the cached keys on all + * assocs for this key id + */ + sctp_clear_cachedkeys_ep(inp, sca->sca_keynumber); + /* + * create the new shared key and + * insert/replace it + */ + if (size > 0) { + key = sctp_set_key(sca->sca_key, (uint32_t) size); + if (key == NULL) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM); + error = ENOMEM; + SCTP_INP_WUNLOCK(inp); + break; + } + } + shared_key = sctp_alloc_sharedkey(); + if (shared_key == NULL) { + sctp_free_key(key); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM); error = ENOMEM; SCTP_INP_WUNLOCK(inp); break; } - } - shared_key = sctp_alloc_sharedkey(); - if (shared_key == NULL) { - sctp_free_key(key); - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM); - error = ENOMEM; + shared_key->key = key; + shared_key->keyid = sca->sca_keynumber; + error = sctp_insert_sharedkey(shared_keys, shared_key); SCTP_INP_WUNLOCK(inp); - break; } - shared_key->key = key; - shared_key->keyid = sca->sca_keynumber; - error = sctp_insert_sharedkey(shared_keys, shared_key); - SCTP_INP_WUNLOCK(inp); + if ((sca->sca_assoc_id == SCTP_CURRENT_ASSOC) || + (sca->sca_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + shared_keys = &stcb->asoc.shared_keys; + /* + * clear the cached keys for + * this key id + */ + sctp_clear_cachedkeys(stcb, sca->sca_keynumber); + /* + * create the new shared key + * and insert/replace it + */ + if (size > 0) { + key = sctp_set_key(sca->sca_key, (uint32_t) size); + if (key == NULL) { + SCTP_TCB_UNLOCK(stcb); + continue; + } + } + shared_key = sctp_alloc_sharedkey(); + if (shared_key == NULL) { + sctp_free_key(key); + SCTP_TCB_UNLOCK(stcb); + continue; + } + shared_key->key = key; + shared_key->keyid = sca->sca_keynumber; + error = sctp_insert_sharedkey(shared_keys, shared_key); + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); + } } break; } @@ -3128,7 +3933,6 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, sctp_hmaclist_t *hmaclist; uint16_t hmacid; uint32_t i; - size_t found; SCTP_CHECK_AND_CAST(shmac, optval, struct sctp_hmacalgo, optsize); @@ -3179,8 +3983,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, { struct sctp_authkeyid *scact; - SCTP_CHECK_AND_CAST(scact, optval, struct sctp_authkeyid, - optsize); + SCTP_CHECK_AND_CAST(scact, optval, struct sctp_authkeyid, optsize); SCTP_FIND_STCB(inp, stcb, scact->scact_assoc_id); /* set the active key on the right place */ @@ -3195,16 +3998,27 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } SCTP_TCB_UNLOCK(stcb); } else { - /* set the active key on the endpoint */ - SCTP_INP_WLOCK(inp); - if (sctp_auth_setactivekey_ep(inp, - scact->scact_keynumber)) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, - SCTP_FROM_SCTP_USRREQ, - EINVAL); - error = EINVAL; + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (scact->scact_assoc_id == SCTP_FUTURE_ASSOC) || + (scact->scact_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + if (sctp_auth_setactivekey_ep(inp, scact->scact_keynumber)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + SCTP_INP_WUNLOCK(inp); + } + if ((scact->scact_assoc_id == SCTP_CURRENT_ASSOC) || + (scact->scact_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + sctp_auth_setactivekey(stcb, scact->scact_keynumber); + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); } - SCTP_INP_WUNLOCK(inp); } break; } @@ -3212,30 +4026,38 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, { struct sctp_authkeyid *scdel; - SCTP_CHECK_AND_CAST(scdel, optval, struct sctp_authkeyid, - optsize); + SCTP_CHECK_AND_CAST(scdel, optval, struct sctp_authkeyid, optsize); SCTP_FIND_STCB(inp, stcb, scdel->scact_assoc_id); /* delete the key from the right place */ if (stcb) { - if (sctp_delete_sharedkey(stcb, - scdel->scact_keynumber)) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, - SCTP_FROM_SCTP_USRREQ, - EINVAL); + if (sctp_delete_sharedkey(stcb, scdel->scact_keynumber)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_WLOCK(inp); - if (sctp_delete_sharedkey_ep(inp, - scdel->scact_keynumber)) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, - SCTP_FROM_SCTP_USRREQ, - EINVAL); - error = EINVAL; + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (scdel->scact_assoc_id == SCTP_FUTURE_ASSOC) || + (scdel->scact_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + if (sctp_delete_sharedkey_ep(inp, scdel->scact_keynumber)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + SCTP_INP_WUNLOCK(inp); + } + if ((scdel->scact_assoc_id == SCTP_CURRENT_ASSOC) || + (scdel->scact_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + sctp_delete_sharedkey(stcb, scdel->scact_keynumber); + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); } - SCTP_INP_WUNLOCK(inp); } break; } @@ -3243,45 +4065,85 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, { struct sctp_authkeyid *keyid; - SCTP_CHECK_AND_CAST(keyid, optval, struct sctp_authkeyid, - optsize); + SCTP_CHECK_AND_CAST(keyid, optval, struct sctp_authkeyid, optsize); SCTP_FIND_STCB(inp, stcb, keyid->scact_assoc_id); /* deactivate the key from the right place */ if (stcb) { - if (sctp_deact_sharedkey(stcb, - keyid->scact_keynumber)) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, - SCTP_FROM_SCTP_USRREQ, - EINVAL); + if (sctp_deact_sharedkey(stcb, keyid->scact_keynumber)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_WLOCK(inp); - if (sctp_deact_sharedkey_ep(inp, - keyid->scact_keynumber)) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, - SCTP_FROM_SCTP_USRREQ, - EINVAL); - error = EINVAL; + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (keyid->scact_assoc_id == SCTP_FUTURE_ASSOC) || + (keyid->scact_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + if (sctp_deact_sharedkey_ep(inp, keyid->scact_keynumber)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + SCTP_INP_WUNLOCK(inp); + } + if ((keyid->scact_assoc_id == SCTP_CURRENT_ASSOC) || + (keyid->scact_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + sctp_deact_sharedkey(stcb, keyid->scact_keynumber); + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); } - SCTP_INP_WUNLOCK(inp); } break; } + case SCTP_ENABLE_STREAM_RESET: + { + struct sctp_assoc_value *av; + SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); + if (av->assoc_value & (~SCTP_ENABLE_VALUE_MASK)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + break; + } + SCTP_FIND_STCB(inp, stcb, av->assoc_id); + if (stcb) { + stcb->asoc.local_strreset_support = (uint8_t) av->assoc_value; + SCTP_TCB_UNLOCK(stcb); + } else { + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (av->assoc_id == SCTP_FUTURE_ASSOC) || + (av->assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + inp->local_strreset_support = (uint8_t) av->assoc_value; + SCTP_INP_WUNLOCK(inp); + } + if ((av->assoc_id == SCTP_CURRENT_ASSOC) || + (av->assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + stcb->asoc.local_strreset_support = (uint8_t) av->assoc_value; + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); + } + } + break; + } case SCTP_RESET_STREAMS: { - struct sctp_stream_reset *strrst; - uint8_t send_in = 0, send_tsn = 0, send_out = 0, - addstream = 0; - uint16_t addstrmcnt = 0; - int i; - - SCTP_CHECK_AND_CAST(strrst, optval, struct sctp_stream_reset, optsize); - SCTP_FIND_STCB(inp, stcb, strrst->strrst_assoc_id); + struct sctp_reset_streams *strrst; + int i, send_out = 0; + int send_in = 0; + SCTP_CHECK_AND_CAST(strrst, optval, struct sctp_reset_streams, optsize); + SCTP_FIND_STCB(inp, stcb, strrst->srs_assoc_id); if (stcb == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); error = ENOENT; @@ -3289,13 +4151,10 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } if (stcb->asoc.peer_supports_strreset == 0) { /* - * Peer does not support it, we return - * protocol not supported since this is true - * for this feature and this peer, not the - * socket request in general. + * Peer does not support the chunk type. */ - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EPROTONOSUPPORT); - error = EPROTONOSUPPORT; + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP); + error = EOPNOTSUPP; SCTP_TCB_UNLOCK(stcb); break; } @@ -3305,151 +4164,147 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, SCTP_TCB_UNLOCK(stcb); break; } - if (strrst->strrst_flags == SCTP_RESET_LOCAL_RECV) { - send_in = 1; - } else if (strrst->strrst_flags == SCTP_RESET_LOCAL_SEND) { - send_out = 1; - } else if (strrst->strrst_flags == SCTP_RESET_BOTH) { + if (strrst->srs_flags & SCTP_STREAM_RESET_INCOMING) { send_in = 1; + } + if (strrst->srs_flags & SCTP_STREAM_RESET_OUTGOING) { send_out = 1; - } else if (strrst->strrst_flags == SCTP_RESET_TSN) { - send_tsn = 1; - } else if (strrst->strrst_flags == SCTP_RESET_ADD_STREAMS) { - if (send_tsn || - send_in || - send_out) { - /* We can't do that and add streams */ - error = EINVAL; - goto skip_stuff; - } - if (stcb->asoc.stream_reset_outstanding) { - error = EBUSY; - goto skip_stuff; - } - addstream = 1; - /* We allocate here */ - addstrmcnt = strrst->strrst_num_streams; - if ((int)(addstrmcnt + stcb->asoc.streamoutcnt) > 0xffff) { - /* You can't have more than 64k */ - error = EINVAL; - goto skip_stuff; - } - if ((stcb->asoc.strm_realoutsize - stcb->asoc.streamoutcnt) < addstrmcnt) { - /* Need to allocate more */ - struct sctp_stream_out *oldstream; - struct sctp_stream_queue_pending *sp; - int removed; - - oldstream = stcb->asoc.strmout; - /* get some more */ - SCTP_MALLOC(stcb->asoc.strmout, struct sctp_stream_out *, - ((stcb->asoc.streamoutcnt + addstrmcnt) * sizeof(struct sctp_stream_out)), - SCTP_M_STRMO); - if (stcb->asoc.strmout == NULL) { - stcb->asoc.strmout = oldstream; - error = ENOMEM; - goto skip_stuff; - } - /* - * Ok now we proceed with copying - * the old out stuff and - * initializing the new stuff. - */ - SCTP_TCB_SEND_LOCK(stcb); - for (i = 0; i < stcb->asoc.streamoutcnt; i++) { - TAILQ_INIT(&stcb->asoc.strmout[i].outqueue); - stcb->asoc.strmout[i].next_sequence_sent = oldstream[i].next_sequence_sent; - stcb->asoc.strmout[i].last_msg_incomplete = oldstream[i].last_msg_incomplete; - stcb->asoc.strmout[i].stream_no = i; - if (oldstream[i].next_spoke.tqe_next) { - sctp_remove_from_wheel(stcb, &stcb->asoc, &oldstream[i], 1); - stcb->asoc.strmout[i].next_spoke.tqe_next = NULL; - stcb->asoc.strmout[i].next_spoke.tqe_prev = NULL; - removed = 1; - } else { - /* not on out wheel */ - stcb->asoc.strmout[i].next_spoke.tqe_next = NULL; - stcb->asoc.strmout[i].next_spoke.tqe_prev = NULL; - removed = 0; - } - /* - * now anything on those - * queues? - */ - while (TAILQ_EMPTY(&oldstream[i].outqueue) == 0) { - sp = TAILQ_FIRST(&oldstream[i].outqueue); - TAILQ_REMOVE(&oldstream[i].outqueue, sp, next); - TAILQ_INSERT_TAIL(&stcb->asoc.strmout[i].outqueue, sp, next); - } - /* Did we disrupt the wheel? */ - if (removed) { - sctp_insert_on_wheel(stcb, - &stcb->asoc, - &stcb->asoc.strmout[i], - 1); - } - /* - * Now move assoc pointers - * too - */ - if (stcb->asoc.last_out_stream == &oldstream[i]) { - stcb->asoc.last_out_stream = &stcb->asoc.strmout[i]; - } - if (stcb->asoc.locked_on_sending == &oldstream[i]) { - stcb->asoc.locked_on_sending = &stcb->asoc.strmout[i]; - } - } - /* now the new streams */ - for (i = stcb->asoc.streamoutcnt; i < (stcb->asoc.streamoutcnt + addstrmcnt); i++) { - stcb->asoc.strmout[i].next_sequence_sent = 0x0; - TAILQ_INIT(&stcb->asoc.strmout[i].outqueue); - stcb->asoc.strmout[i].stream_no = i; - stcb->asoc.strmout[i].last_msg_incomplete = 0; - stcb->asoc.strmout[i].next_spoke.tqe_next = NULL; - stcb->asoc.strmout[i].next_spoke.tqe_prev = NULL; - } - stcb->asoc.strm_realoutsize = stcb->asoc.streamoutcnt + addstrmcnt; - SCTP_FREE(oldstream, SCTP_M_STRMO); - } - SCTP_TCB_SEND_UNLOCK(stcb); - goto skip_stuff; - } else { + } + if ((send_in == 0) && (send_out == 0)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; SCTP_TCB_UNLOCK(stcb); break; } - for (i = 0; i < strrst->strrst_num_streams; i++) { + for (i = 0; i < strrst->srs_number_streams; i++) { if ((send_in) && - - (strrst->strrst_list[i] > stcb->asoc.streamincnt)) { + (strrst->srs_stream_list[i] > stcb->asoc.streamincnt)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; - goto get_out; + break; } if ((send_out) && - (strrst->strrst_list[i] > stcb->asoc.streamoutcnt)) { + (strrst->srs_stream_list[i] > stcb->asoc.streamoutcnt)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; - goto get_out; + break; } } - skip_stuff: if (error) { - get_out: SCTP_TCB_UNLOCK(stcb); break; } - error = sctp_send_str_reset_req(stcb, strrst->strrst_num_streams, - strrst->strrst_list, - send_out, (stcb->asoc.str_reset_seq_in - 3), - send_in, send_tsn, addstream, addstrmcnt); + error = sctp_send_str_reset_req(stcb, strrst->srs_number_streams, + strrst->srs_stream_list, + send_out, send_in, 0, 0, 0, 0, 0); sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED); SCTP_TCB_UNLOCK(stcb); + break; } - break; + case SCTP_ADD_STREAMS: + { + struct sctp_add_streams *stradd; + uint8_t addstream = 0; + uint16_t add_o_strmcnt = 0; + uint16_t add_i_strmcnt = 0; + + SCTP_CHECK_AND_CAST(stradd, optval, struct sctp_add_streams, optsize); + SCTP_FIND_STCB(inp, stcb, stradd->sas_assoc_id); + if (stcb == NULL) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); + error = ENOENT; + break; + } + if (stcb->asoc.peer_supports_strreset == 0) { + /* + * Peer does not support the chunk type. + */ + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP); + error = EOPNOTSUPP; + SCTP_TCB_UNLOCK(stcb); + break; + } + if (stcb->asoc.stream_reset_outstanding) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY); + error = EALREADY; + SCTP_TCB_UNLOCK(stcb); + break; + } + if ((stradd->sas_outstrms == 0) && + (stradd->sas_instrms == 0)) { + error = EINVAL; + goto skip_stuff; + } + if (stradd->sas_outstrms) { + addstream = 1; + /* We allocate here */ + add_o_strmcnt = stradd->sas_outstrms; + if ((((int)add_o_strmcnt) + ((int)stcb->asoc.streamoutcnt)) > 0x0000ffff) { + /* You can't have more than 64k */ + error = EINVAL; + goto skip_stuff; + } + } + if (stradd->sas_instrms) { + int cnt; + + addstream |= 2; + /* + * We allocate inside + * sctp_send_str_reset_req() + */ + add_i_strmcnt = stradd->sas_instrms; + cnt = add_i_strmcnt; + cnt += stcb->asoc.streamincnt; + if (cnt > 0x0000ffff) { + /* You can't have more than 64k */ + error = EINVAL; + goto skip_stuff; + } + if (cnt > (int)stcb->asoc.max_inbound_streams) { + /* More than you are allowed */ + error = EINVAL; + goto skip_stuff; + } + } + error = sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, 0, addstream, add_o_strmcnt, add_i_strmcnt, 0); + sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED); + skip_stuff: + SCTP_TCB_UNLOCK(stcb); + break; + } + case SCTP_RESET_ASSOC: + { + uint32_t *value; + SCTP_CHECK_AND_CAST(value, optval, uint32_t, optsize); + SCTP_FIND_STCB(inp, stcb, (sctp_assoc_t) * value); + if (stcb == NULL) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); + error = ENOENT; + break; + } + if (stcb->asoc.peer_supports_strreset == 0) { + /* + * Peer does not support the chunk type. + */ + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP); + error = EOPNOTSUPP; + SCTP_TCB_UNLOCK(stcb); + break; + } + if (stcb->asoc.stream_reset_outstanding) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY); + error = EALREADY; + SCTP_TCB_UNLOCK(stcb); + break; + } + error = sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, 1, 0, 0, 0, 0); + sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED); + SCTP_TCB_UNLOCK(stcb); + break; + } case SCTP_CONNECT_X: if (optsize < (sizeof(int) + sizeof(struct sockaddr_in))) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); @@ -3458,7 +4313,6 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } error = sctp_do_connect_x(so, inp, optval, optsize, p, 0); break; - case SCTP_CONNECT_X_DELAYED: if (optsize < (sizeof(int) + sizeof(struct sockaddr_in))) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); @@ -3467,7 +4321,6 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } error = sctp_do_connect_x(so, inp, optval, optsize, p, 1); break; - case SCTP_CONNECT_X_COMPLETE: { struct sockaddr *sa; @@ -3521,8 +4374,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, error = EALREADY; } SCTP_TCB_UNLOCK(stcb); + break; } - break; case SCTP_MAX_BURST: { uint8_t *burst; @@ -3530,12 +4383,10 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, SCTP_CHECK_AND_CAST(burst, optval, uint8_t, optsize); SCTP_INP_WLOCK(inp); - if (*burst) { - inp->sctp_ep.max_burst = *burst; - } + inp->sctp_ep.max_burst = *burst; SCTP_INP_WUNLOCK(inp); + break; } - break; case SCTP_MAXSEG: { struct sctp_assoc_value *av; @@ -3557,20 +4408,27 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_WLOCK(inp); - /* - * FIXME MT: I think this is not in tune - * with the API ID - */ - if (av->assoc_value) { - inp->sctp_frag_point = (av->assoc_value + ovh); + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (av->assoc_id == SCTP_FUTURE_ASSOC)) { + SCTP_INP_WLOCK(inp); + /* + * FIXME MT: I think this is not in + * tune with the API ID + */ + if (av->assoc_value) { + inp->sctp_frag_point = (av->assoc_value + ovh); + } else { + inp->sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT; + } + SCTP_INP_WUNLOCK(inp); } else { - inp->sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT; + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; } - SCTP_INP_WUNLOCK(inp); } + break; } - break; case SCTP_EVENTS: { struct sctp_event_subscribe *events; @@ -3634,22 +4492,6 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, if (events->sctp_sender_dry_event) { sctp_feature_on(inp, SCTP_PCB_FLAGS_DRYEVNT); - if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || - (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { - stcb = LIST_FIRST(&inp->sctp_asoc_list); - if (stcb) { - SCTP_TCB_LOCK(stcb); - } - if (stcb && - TAILQ_EMPTY(&stcb->asoc.send_queue) && - TAILQ_EMPTY(&stcb->asoc.sent_queue) && - (stcb->asoc.stream_queue_cnt == 0)) { - sctp_ulp_notify(SCTP_NOTIFY_SENDER_DRY, stcb, 0, NULL, SCTP_SO_LOCKED); - } - if (stcb) { - SCTP_TCB_UNLOCK(stcb); - } - } } else { sctp_feature_off(inp, SCTP_PCB_FLAGS_DRYEVNT); } @@ -3660,9 +4502,84 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, sctp_feature_off(inp, SCTP_PCB_FLAGS_STREAM_RESETEVNT); } SCTP_INP_WUNLOCK(inp); - } - break; + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + if (events->sctp_association_event) { + sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVASSOCEVNT); + } else { + sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVASSOCEVNT); + } + if (events->sctp_address_event) { + sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVPADDREVNT); + } else { + sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVPADDREVNT); + } + if (events->sctp_send_failure_event) { + sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT); + } else { + sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT); + } + if (events->sctp_peer_error_event) { + sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVPEERERR); + } else { + sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVPEERERR); + } + if (events->sctp_shutdown_event) { + sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT); + } else { + sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT); + } + if (events->sctp_partial_delivery_event) { + sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_PDAPIEVNT); + } else { + sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_PDAPIEVNT); + } + if (events->sctp_adaptation_layer_event) { + sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_ADAPTATIONEVNT); + } else { + sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_ADAPTATIONEVNT); + } + if (events->sctp_authentication_event) { + sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_AUTHEVNT); + } else { + sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_AUTHEVNT); + } + if (events->sctp_sender_dry_event) { + sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_DRYEVNT); + } else { + sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_DRYEVNT); + } + if (events->sctp_stream_reset_event) { + sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_STREAM_RESETEVNT); + } else { + sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_STREAM_RESETEVNT); + } + SCTP_TCB_UNLOCK(stcb); + } + /* + * Send up the sender dry event only for 1-to-1 + * style sockets. + */ + if (events->sctp_sender_dry_event) { + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { + stcb = LIST_FIRST(&inp->sctp_asoc_list); + if (stcb) { + SCTP_TCB_LOCK(stcb); + if (TAILQ_EMPTY(&stcb->asoc.send_queue) && + TAILQ_EMPTY(&stcb->asoc.sent_queue) && + (stcb->asoc.stream_queue_cnt == 0)) { + sctp_ulp_notify(SCTP_NOTIFY_SENDER_DRY, stcb, 0, NULL, SCTP_SO_LOCKED); + } + SCTP_TCB_UNLOCK(stcb); + } + } + } + SCTP_INP_RUNLOCK(inp); + break; + } case SCTP_ADAPTATION_LAYER: { struct sctp_setadaptation *adap_bits; @@ -3670,9 +4587,10 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, SCTP_CHECK_AND_CAST(adap_bits, optval, struct sctp_setadaptation, optsize); SCTP_INP_WLOCK(inp); inp->sctp_ep.adaptation_layer_indicator = adap_bits->ssb_adaptation_ind; + inp->sctp_ep.adaptation_layer_indicator_provided = 1; SCTP_INP_WUNLOCK(inp); + break; } - break; #ifdef SCTP_DEBUG case SCTP_SET_INITIAL_DBG_SEQ: { @@ -3682,8 +4600,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, SCTP_INP_WLOCK(inp); inp->sctp_ep.initial_sequence_debug = *vvv; SCTP_INP_WUNLOCK(inp); + break; } - break; #endif case SCTP_DEFAULT_SEND_PARAM: { @@ -3693,7 +4611,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, SCTP_FIND_STCB(inp, stcb, s_info->sinfo_assoc_id); if (stcb) { - if (s_info->sinfo_stream <= stcb->asoc.streamoutcnt) { + if (s_info->sinfo_stream < stcb->asoc.streamoutcnt) { memcpy(&stcb->asoc.def_send, s_info, min(optsize, sizeof(stcb->asoc.def_send))); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); @@ -3701,14 +4619,30 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_WLOCK(inp); - memcpy(&inp->def_send, s_info, min(optsize, sizeof(inp->def_send))); - SCTP_INP_WUNLOCK(inp); + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (s_info->sinfo_assoc_id == SCTP_FUTURE_ASSOC) || + (s_info->sinfo_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + memcpy(&inp->def_send, s_info, min(optsize, sizeof(inp->def_send))); + SCTP_INP_WUNLOCK(inp); + } + if ((s_info->sinfo_assoc_id == SCTP_CURRENT_ASSOC) || + (s_info->sinfo_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + if (s_info->sinfo_stream < stcb->asoc.streamoutcnt) { + memcpy(&stcb->asoc.def_send, s_info, min(optsize, sizeof(stcb->asoc.def_send))); + } + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); + } } + break; } - break; case SCTP_PEER_ADDR_PARAMS: - /* Applys to the specific association */ { struct sctp_paddrparams *paddrp; struct sctp_nets *net; @@ -3738,7 +4672,9 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, struct sockaddr *sa; sa = (struct sockaddr *)&paddrp->spp_address; +#ifdef INET if (sa->sa_family == AF_INET) { + struct sockaddr_in *sin; sin = (struct sockaddr_in *)sa; @@ -3748,7 +4684,10 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, error = EINVAL; break; } - } else if (sa->sa_family == AF_INET6) { + } else +#endif +#ifdef INET6 + if (sa->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)sa; @@ -3758,7 +4697,9 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, error = EINVAL; break; } - } else { + } else +#endif + { error = EAFNOSUPPORT; SCTP_TCB_UNLOCK(stcb); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); @@ -3792,71 +4733,156 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, ovh = SCTP_MED_V4_OVERHEAD; } - if (paddrp->spp_hbinterval) - stcb->asoc.heart_beat_delay = paddrp->spp_hbinterval; - else if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) - stcb->asoc.heart_beat_delay = 0; - /* network sets ? */ if (net) { /************************NET SPECIFIC SET ******************/ - if (paddrp->spp_flags & SPP_HB_DEMAND) { - /* on demand HB */ - if (sctp_send_hb(stcb, 1, net) < 0) { - /* asoc destroyed */ - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - error = EINVAL; - break; - } - } if (paddrp->spp_flags & SPP_HB_DISABLE) { + if (!(net->dest_state & SCTP_ADDR_UNCONFIRMED) && + !(net->dest_state & SCTP_ADDR_NOHB)) { + sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, + SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10); + } net->dest_state |= SCTP_ADDR_NOHB; } if (paddrp->spp_flags & SPP_HB_ENABLE) { + if (paddrp->spp_hbinterval) { + net->heart_beat_delay = paddrp->spp_hbinterval; + } else if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) { + net->heart_beat_delay = 0; + } + sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, + SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10); + sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net); net->dest_state &= ~SCTP_ADDR_NOHB; } + if (paddrp->spp_flags & SPP_HB_DEMAND) { + /* on demand HB */ + sctp_send_hb(stcb, net, SCTP_SO_LOCKED); + sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_SOCKOPT, SCTP_SO_LOCKED); + sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net); + } if ((paddrp->spp_flags & SPP_PMTUD_DISABLE) && (paddrp->spp_pathmtu >= SCTP_SMALLEST_PMTU)) { if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) { sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10); } + net->dest_state |= SCTP_ADDR_NO_PMTUD; if (paddrp->spp_pathmtu > SCTP_DEFAULT_MINSEGMENT) { net->mtu = paddrp->spp_pathmtu + ovh; if (net->mtu < stcb->asoc.smallest_mtu) { - sctp_pathmtu_adjustment(inp, stcb, net, net->mtu); + sctp_pathmtu_adjustment(stcb, net->mtu); } } } if (paddrp->spp_flags & SPP_PMTUD_ENABLE) { - if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) { + if (!SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) { sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net); } + net->dest_state &= ~SCTP_ADDR_NO_PMTUD; } - if (paddrp->spp_pathmaxrxt) - net->failure_threshold = paddrp->spp_pathmaxrxt; -#ifdef INET - if (paddrp->spp_flags & SPP_IPV4_TOS) { - if (net->ro._l_addr.sin.sin_family == AF_INET) { - net->tos_flowlabel = paddrp->spp_ipv4_tos & 0x000000fc; + if (paddrp->spp_pathmaxrxt) { + if (net->dest_state & SCTP_ADDR_PF) { + if (net->error_count > paddrp->spp_pathmaxrxt) { + net->dest_state &= ~SCTP_ADDR_PF; + } + } else { + if ((net->error_count <= paddrp->spp_pathmaxrxt) && + (net->error_count > net->pf_threshold)) { + net->dest_state |= SCTP_ADDR_PF; + sctp_send_hb(stcb, net, SCTP_SO_LOCKED); + sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_TIMER + SCTP_LOC_3); + sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net); + } } + if (net->dest_state & SCTP_ADDR_REACHABLE) { + if (net->error_count > paddrp->spp_pathmaxrxt) { + net->dest_state &= ~SCTP_ADDR_REACHABLE; + sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, net, SCTP_SO_LOCKED); + } + } else { + if (net->error_count <= paddrp->spp_pathmaxrxt) { + net->dest_state |= SCTP_ADDR_REACHABLE; + sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, net, SCTP_SO_LOCKED); + } + } + net->failure_threshold = paddrp->spp_pathmaxrxt; + } + if (paddrp->spp_flags & SPP_DSCP) { + net->dscp = paddrp->spp_dscp & 0xfc; + net->dscp |= 0x01; } -#endif #ifdef INET6 if (paddrp->spp_flags & SPP_IPV6_FLOWLABEL) { - if (net->ro._l_addr.sin6.sin6_family == AF_INET6) { - net->tos_flowlabel = paddrp->spp_ipv6_flowlabel; + if (net->ro._l_addr.sa.sa_family == AF_INET6) { + net->flowlabel = paddrp->spp_ipv6_flowlabel & 0x000fffff; + net->flowlabel |= 0x80000000; } } #endif } else { /************************ASSOC ONLY -- NO NET SPECIFIC SET ******************/ - if (paddrp->spp_pathmaxrxt) + if (paddrp->spp_pathmaxrxt) { stcb->asoc.def_net_failure = paddrp->spp_pathmaxrxt; - + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + if (net->dest_state & SCTP_ADDR_PF) { + if (net->error_count > paddrp->spp_pathmaxrxt) { + net->dest_state &= ~SCTP_ADDR_PF; + } + } else { + if ((net->error_count <= paddrp->spp_pathmaxrxt) && + (net->error_count > net->pf_threshold)) { + net->dest_state |= SCTP_ADDR_PF; + sctp_send_hb(stcb, net, SCTP_SO_LOCKED); + sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_TIMER + SCTP_LOC_3); + sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net); + } + } + if (net->dest_state & SCTP_ADDR_REACHABLE) { + if (net->error_count > paddrp->spp_pathmaxrxt) { + net->dest_state &= ~SCTP_ADDR_REACHABLE; + sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, net, SCTP_SO_LOCKED); + } + } else { + if (net->error_count <= paddrp->spp_pathmaxrxt) { + net->dest_state |= SCTP_ADDR_REACHABLE; + sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, net, SCTP_SO_LOCKED); + } + } + net->failure_threshold = paddrp->spp_pathmaxrxt; + } + } if (paddrp->spp_flags & SPP_HB_ENABLE) { + if (paddrp->spp_hbinterval) { + stcb->asoc.heart_beat_delay = paddrp->spp_hbinterval; + } else if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) { + stcb->asoc.heart_beat_delay = 0; + } /* Turn back on the timer */ - stcb->asoc.hb_is_disabled = 0; - sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net); + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + if (paddrp->spp_hbinterval) { + net->heart_beat_delay = paddrp->spp_hbinterval; + } else if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) { + net->heart_beat_delay = 0; + } + if (net->dest_state & SCTP_ADDR_NOHB) { + net->dest_state &= ~SCTP_ADDR_NOHB; + } + sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, + SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10); + sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net); + } + sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_DONOT_HEARTBEAT); + } + if (paddrp->spp_flags & SPP_HB_DISABLE) { + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + if (!(net->dest_state & SCTP_ADDR_NOHB)) { + net->dest_state |= SCTP_ADDR_NOHB; + if (!(net->dest_state & SCTP_ADDR_UNCONFIRMED)) { + sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10); + } + } + } + sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_DONOT_HEARTBEAT); } if ((paddrp->spp_flags & SPP_PMTUD_DISABLE) && (paddrp->spp_pathmtu >= SCTP_SMALLEST_PMTU)) { TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { @@ -3864,87 +4890,103 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10); } + net->dest_state |= SCTP_ADDR_NO_PMTUD; if (paddrp->spp_pathmtu > SCTP_DEFAULT_MINSEGMENT) { net->mtu = paddrp->spp_pathmtu + ovh; if (net->mtu < stcb->asoc.smallest_mtu) { - sctp_pathmtu_adjustment(inp, stcb, net, net->mtu); + sctp_pathmtu_adjustment(stcb, net->mtu); } } } + sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_DO_NOT_PMTUD); } if (paddrp->spp_flags & SPP_PMTUD_ENABLE) { TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { - if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) { + if (!SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) { sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net); } + net->dest_state &= ~SCTP_ADDR_NO_PMTUD; } + sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_DO_NOT_PMTUD); } - if (paddrp->spp_flags & SPP_HB_DISABLE) { - int cnt_of_unconf = 0; - struct sctp_nets *lnet; - - stcb->asoc.hb_is_disabled = 1; - TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) { - if (lnet->dest_state & SCTP_ADDR_UNCONFIRMED) { - cnt_of_unconf++; - } - } - /* - * stop the timer ONLY if we - * have no unconfirmed - * addresses - */ - if (cnt_of_unconf == 0) { - TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { - sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, - SCTP_FROM_SCTP_USRREQ + SCTP_LOC_11); - } + if (paddrp->spp_flags & SPP_DSCP) { + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + net->dscp = paddrp->spp_dscp & 0xfc; + net->dscp |= 0x01; } + stcb->asoc.default_dscp = paddrp->spp_dscp & 0xfc; + stcb->asoc.default_dscp |= 0x01; } - if (paddrp->spp_flags & SPP_HB_ENABLE) { - /* start up the timer. */ +#ifdef INET6 + if (paddrp->spp_flags & SPP_IPV6_FLOWLABEL) { TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { - sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net); + if (net->ro._l_addr.sa.sa_family == AF_INET6) { + net->flowlabel = paddrp->spp_ipv6_flowlabel & 0x000fffff; + net->flowlabel |= 0x80000000; + } } + stcb->asoc.default_flowlabel = paddrp->spp_ipv6_flowlabel & 0x000fffff; + stcb->asoc.default_flowlabel |= 0x80000000; } -#ifdef INET - if (paddrp->spp_flags & SPP_IPV4_TOS) - stcb->asoc.default_tos = paddrp->spp_ipv4_tos & 0x000000fc; -#endif -#ifdef INET6 - if (paddrp->spp_flags & SPP_IPV6_FLOWLABEL) - stcb->asoc.default_flowlabel = paddrp->spp_ipv6_flowlabel; #endif - } SCTP_TCB_UNLOCK(stcb); } else { /************************NO TCB, SET TO default stuff ******************/ - SCTP_INP_WLOCK(inp); - /* - * For the TOS/FLOWLABEL stuff you set it - * with the options on the socket - */ - if (paddrp->spp_pathmaxrxt) { - inp->sctp_ep.def_net_failure = paddrp->spp_pathmaxrxt; - } - if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) - inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = 0; - else if (paddrp->spp_hbinterval) { - if (paddrp->spp_hbinterval > SCTP_MAX_HB_INTERVAL) - paddrp->spp_hbinterval = SCTP_MAX_HB_INTERVAL; - inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(paddrp->spp_hbinterval); - } - if (paddrp->spp_flags & SPP_HB_ENABLE) { - sctp_feature_off(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT); - - } else if (paddrp->spp_flags & SPP_HB_DISABLE) { - sctp_feature_on(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT); + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (paddrp->spp_assoc_id == SCTP_FUTURE_ASSOC)) { + SCTP_INP_WLOCK(inp); + /* + * For the TOS/FLOWLABEL stuff you + * set it with the options on the + * socket + */ + if (paddrp->spp_pathmaxrxt) { + inp->sctp_ep.def_net_failure = paddrp->spp_pathmaxrxt; + } + if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) + inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = 0; + else if (paddrp->spp_hbinterval) { + if (paddrp->spp_hbinterval > SCTP_MAX_HB_INTERVAL) + paddrp->spp_hbinterval = SCTP_MAX_HB_INTERVAL; + inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(paddrp->spp_hbinterval); + } + if (paddrp->spp_flags & SPP_HB_ENABLE) { + if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) { + inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = 0; + } else if (paddrp->spp_hbinterval) { + inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(paddrp->spp_hbinterval); + } + sctp_feature_off(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT); + } else if (paddrp->spp_flags & SPP_HB_DISABLE) { + sctp_feature_on(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT); + } + if (paddrp->spp_flags & SPP_PMTUD_ENABLE) { + sctp_feature_off(inp, SCTP_PCB_FLAGS_DO_NOT_PMTUD); + } else if (paddrp->spp_flags & SPP_PMTUD_DISABLE) { + sctp_feature_on(inp, SCTP_PCB_FLAGS_DO_NOT_PMTUD); + } + if (paddrp->spp_flags & SPP_DSCP) { + inp->sctp_ep.default_dscp = paddrp->spp_dscp & 0xfc; + inp->sctp_ep.default_dscp |= 0x01; + } +#ifdef INET6 + if (paddrp->spp_flags & SPP_IPV6_FLOWLABEL) { + if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { + inp->sctp_ep.default_flowlabel = paddrp->spp_ipv6_flowlabel & 0x000fffff; + inp->sctp_ep.default_flowlabel |= 0x80000000; + } + } +#endif + SCTP_INP_WUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; } - SCTP_INP_WUNLOCK(inp); } + break; } - break; case SCTP_RTOINFO: { struct sctp_rtoinfo *srto; @@ -3976,31 +5018,38 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_WLOCK(inp); - if (srto->srto_initial) - new_init = srto->srto_initial; - else - new_init = inp->sctp_ep.initial_rto; - if (srto->srto_max) - new_max = srto->srto_max; - else - new_max = inp->sctp_ep.sctp_maxrto; - if (srto->srto_min) - new_min = srto->srto_min; - else - new_min = inp->sctp_ep.sctp_minrto; - if ((new_min <= new_init) && (new_init <= new_max)) { - inp->sctp_ep.initial_rto = new_init; - inp->sctp_ep.sctp_maxrto = new_max; - inp->sctp_ep.sctp_minrto = new_min; + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (srto->srto_assoc_id == SCTP_FUTURE_ASSOC)) { + SCTP_INP_WLOCK(inp); + if (srto->srto_initial) + new_init = srto->srto_initial; + else + new_init = inp->sctp_ep.initial_rto; + if (srto->srto_max) + new_max = srto->srto_max; + else + new_max = inp->sctp_ep.sctp_maxrto; + if (srto->srto_min) + new_min = srto->srto_min; + else + new_min = inp->sctp_ep.sctp_minrto; + if ((new_min <= new_init) && (new_init <= new_max)) { + inp->sctp_ep.initial_rto = new_init; + inp->sctp_ep.sctp_maxrto = new_max; + inp->sctp_ep.sctp_minrto = new_min; + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + SCTP_INP_WUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } - SCTP_INP_WUNLOCK(inp); } + break; } - break; case SCTP_ASSOCINFO: { struct sctp_assocparams *sasoc; @@ -4018,27 +5067,28 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, if (stcb) { if (sasoc->sasoc_asocmaxrxt) stcb->asoc.max_send_times = sasoc->sasoc_asocmaxrxt; - sasoc->sasoc_number_peer_destinations = stcb->asoc.numnets; - sasoc->sasoc_peer_rwnd = 0; - sasoc->sasoc_local_rwnd = 0; if (sasoc->sasoc_cookie_life) { stcb->asoc.cookie_life = MSEC_TO_TICKS(sasoc->sasoc_cookie_life); } SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_WLOCK(inp); - if (sasoc->sasoc_asocmaxrxt) - inp->sctp_ep.max_send_times = sasoc->sasoc_asocmaxrxt; - sasoc->sasoc_number_peer_destinations = 0; - sasoc->sasoc_peer_rwnd = 0; - sasoc->sasoc_local_rwnd = 0; - if (sasoc->sasoc_cookie_life) { - inp->sctp_ep.def_cookie_life = MSEC_TO_TICKS(sasoc->sasoc_cookie_life); + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (sasoc->sasoc_assoc_id == SCTP_FUTURE_ASSOC)) { + SCTP_INP_WLOCK(inp); + if (sasoc->sasoc_asocmaxrxt) + inp->sctp_ep.max_send_times = sasoc->sasoc_asocmaxrxt; + if (sasoc->sasoc_cookie_life) { + inp->sctp_ep.def_cookie_life = MSEC_TO_TICKS(sasoc->sasoc_cookie_life); + } + SCTP_INP_WUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; } - SCTP_INP_WUNLOCK(inp); } + break; } - break; case SCTP_INITMSG: { struct sctp_initmsg *sinit; @@ -4057,12 +5107,12 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, if (sinit->sinit_max_init_timeo) inp->sctp_ep.initial_init_rto_max = sinit->sinit_max_init_timeo; SCTP_INP_WUNLOCK(inp); + break; } - break; case SCTP_PRIMARY_ADDR: { struct sctp_setprim *spa; - struct sctp_nets *net, *lnet; + struct sctp_nets *net; SCTP_CHECK_AND_CAST(spa, optval, struct sctp_setprim, optsize); SCTP_FIND_STCB(inp, stcb, spa->ssp_assoc_id); @@ -4091,12 +5141,13 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, if ((net != stcb->asoc.primary_destination) && (!(net->dest_state & SCTP_ADDR_UNCONFIRMED))) { /* Ok we need to set it */ - lnet = stcb->asoc.primary_destination; if (sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net) == 0) { - if (net->dest_state & SCTP_ADDR_SWITCH_PRIMARY) { - net->dest_state |= SCTP_ADDR_DOUBLE_SWITCH; + if ((stcb->asoc.alternate) && + (!(net->dest_state & SCTP_ADDR_PF)) && + (net->dest_state & SCTP_ADDR_REACHABLE)) { + sctp_free_remote_addr(stcb->asoc.alternate); + stcb->asoc.alternate = NULL; } - net->dest_state |= SCTP_ADDR_SWITCH_PRIMARY; } } } else { @@ -4106,8 +5157,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, if (stcb) { SCTP_TCB_UNLOCK(stcb); } + break; } - break; case SCTP_SET_DYNAMIC_PRIMARY: { union sctp_sockstore *ss; @@ -4120,8 +5171,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, SCTP_CHECK_AND_CAST(ss, optval, union sctp_sockstore, optsize); /* SUPER USER CHECK? */ error = sctp_dynamic_set_primary(&ss->sa, vrf_id); + break; } - break; case SCTP_SET_PEER_PRIMARY_ADDR: { struct sctp_setpeerprim *sspp; @@ -4152,117 +5203,589 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, __FUNCTION__); continue; } - if (laddr->ifa == ifa) { - found = 1; - break; + if (laddr->ifa == ifa) { + found = 1; + break; + } + } + if (!found) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + goto out_of_it; + } + } + if (sctp_set_primary_ip_address_sa(stcb, + (struct sockaddr *)&sspp->sspp_addr) != 0) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + out_of_it: + SCTP_TCB_UNLOCK(stcb); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + break; + } + case SCTP_BINDX_ADD_ADDR: + { + struct sctp_getaddresses *addrs; + struct thread *td; + + td = (struct thread *)p; + SCTP_CHECK_AND_CAST(addrs, optval, struct sctp_getaddresses, + optsize); +#ifdef INET + if (addrs->addr->sa_family == AF_INET) { + if (optsize < sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + break; + } + if (td != NULL && (error = prison_local_ip4(td->td_ucred, &(((struct sockaddr_in *)(addrs->addr))->sin_addr)))) { + SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, error); + break; + } + } else +#endif +#ifdef INET6 + if (addrs->addr->sa_family == AF_INET6) { + if (optsize < sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in6)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + break; + } + if (td != NULL && (error = prison_local_ip6(td->td_ucred, &(((struct sockaddr_in6 *)(addrs->addr))->sin6_addr), + (SCTP_IPV6_V6ONLY(inp) != 0))) != 0) { + SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, error); + break; + } + } else +#endif + { + error = EAFNOSUPPORT; + break; + } + sctp_bindx_add_address(so, inp, addrs->addr, + addrs->sget_assoc_id, vrf_id, + &error, p); + break; + } + case SCTP_BINDX_REM_ADDR: + { + struct sctp_getaddresses *addrs; + struct thread *td; + + td = (struct thread *)p; + + SCTP_CHECK_AND_CAST(addrs, optval, struct sctp_getaddresses, optsize); +#ifdef INET + if (addrs->addr->sa_family == AF_INET) { + if (optsize < sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + break; + } + if (td != NULL && (error = prison_local_ip4(td->td_ucred, &(((struct sockaddr_in *)(addrs->addr))->sin_addr)))) { + SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, error); + break; + } + } else +#endif +#ifdef INET6 + if (addrs->addr->sa_family == AF_INET6) { + if (optsize < sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in6)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + break; + } + if (td != NULL && + (error = prison_local_ip6(td->td_ucred, + &(((struct sockaddr_in6 *)(addrs->addr))->sin6_addr), + (SCTP_IPV6_V6ONLY(inp) != 0))) != 0) { + SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, error); + break; + } + } else +#endif + { + error = EAFNOSUPPORT; + break; + } + sctp_bindx_delete_address(inp, addrs->addr, + addrs->sget_assoc_id, vrf_id, + &error); + break; + } + case SCTP_EVENT: + { + struct sctp_event *event; + uint32_t event_type; + + SCTP_CHECK_AND_CAST(event, optval, struct sctp_event, optsize); + SCTP_FIND_STCB(inp, stcb, event->se_assoc_id); + switch (event->se_type) { + case SCTP_ASSOC_CHANGE: + event_type = SCTP_PCB_FLAGS_RECVASSOCEVNT; + break; + case SCTP_PEER_ADDR_CHANGE: + event_type = SCTP_PCB_FLAGS_RECVPADDREVNT; + break; + case SCTP_REMOTE_ERROR: + event_type = SCTP_PCB_FLAGS_RECVPEERERR; + break; + case SCTP_SEND_FAILED: + event_type = SCTP_PCB_FLAGS_RECVSENDFAILEVNT; + break; + case SCTP_SHUTDOWN_EVENT: + event_type = SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT; + break; + case SCTP_ADAPTATION_INDICATION: + event_type = SCTP_PCB_FLAGS_ADAPTATIONEVNT; + break; + case SCTP_PARTIAL_DELIVERY_EVENT: + event_type = SCTP_PCB_FLAGS_PDAPIEVNT; + break; + case SCTP_AUTHENTICATION_EVENT: + event_type = SCTP_PCB_FLAGS_AUTHEVNT; + break; + case SCTP_STREAM_RESET_EVENT: + event_type = SCTP_PCB_FLAGS_STREAM_RESETEVNT; + break; + case SCTP_SENDER_DRY_EVENT: + event_type = SCTP_PCB_FLAGS_DRYEVNT; + break; + case SCTP_NOTIFICATIONS_STOPPED_EVENT: + event_type = 0; + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTSUP); + error = ENOTSUP; + break; + case SCTP_ASSOC_RESET_EVENT: + event_type = SCTP_PCB_FLAGS_ASSOC_RESETEVNT; + break; + case SCTP_STREAM_CHANGE_EVENT: + event_type = SCTP_PCB_FLAGS_STREAM_CHANGEEVNT; + break; + case SCTP_SEND_FAILED_EVENT: + event_type = SCTP_PCB_FLAGS_RECVNSENDFAILEVNT; + break; + default: + event_type = 0; + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + break; + } + if (event_type > 0) { + if (stcb) { + if (event->se_on) { + sctp_stcb_feature_on(inp, stcb, event_type); + if (event_type == SCTP_PCB_FLAGS_DRYEVNT) { + if (TAILQ_EMPTY(&stcb->asoc.send_queue) && + TAILQ_EMPTY(&stcb->asoc.sent_queue) && + (stcb->asoc.stream_queue_cnt == 0)) { + sctp_ulp_notify(SCTP_NOTIFY_SENDER_DRY, stcb, 0, NULL, SCTP_SO_LOCKED); + } + } + } else { + sctp_stcb_feature_off(inp, stcb, event_type); + } + SCTP_TCB_UNLOCK(stcb); + } else { + /* + * We don't want to send up a storm + * of events, so return an error for + * sender dry events + */ + if ((event_type == SCTP_PCB_FLAGS_DRYEVNT) && + ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) == 0) && + ((inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) == 0) && + ((event->se_assoc_id == SCTP_ALL_ASSOC) || + (event->se_assoc_id == SCTP_CURRENT_ASSOC))) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTSUP); + error = ENOTSUP; + break; + } + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (event->se_assoc_id == SCTP_FUTURE_ASSOC) || + (event->se_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + if (event->se_on) { + sctp_feature_on(inp, event_type); + } else { + sctp_feature_off(inp, event_type); + } + SCTP_INP_WUNLOCK(inp); + } + if ((event->se_assoc_id == SCTP_CURRENT_ASSOC) || + (event->se_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + if (event->se_on) { + sctp_stcb_feature_on(inp, stcb, event_type); + } else { + sctp_stcb_feature_off(inp, stcb, event_type); + } + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); + } + } + } + break; + } + case SCTP_RECVRCVINFO: + { + int *onoff; + + SCTP_CHECK_AND_CAST(onoff, optval, int, optsize); + SCTP_INP_WLOCK(inp); + if (*onoff != 0) { + sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVRCVINFO); + } else { + sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVRCVINFO); + } + SCTP_INP_WUNLOCK(inp); + break; + } + case SCTP_RECVNXTINFO: + { + int *onoff; + + SCTP_CHECK_AND_CAST(onoff, optval, int, optsize); + SCTP_INP_WLOCK(inp); + if (*onoff != 0) { + sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO); + } else { + sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVNXTINFO); + } + SCTP_INP_WUNLOCK(inp); + break; + } + case SCTP_DEFAULT_SNDINFO: + { + struct sctp_sndinfo *info; + uint16_t policy; + + SCTP_CHECK_AND_CAST(info, optval, struct sctp_sndinfo, optsize); + SCTP_FIND_STCB(inp, stcb, info->snd_assoc_id); + + if (stcb) { + if (info->snd_sid < stcb->asoc.streamoutcnt) { + stcb->asoc.def_send.sinfo_stream = info->snd_sid; + policy = PR_SCTP_POLICY(stcb->asoc.def_send.sinfo_flags); + stcb->asoc.def_send.sinfo_flags = info->snd_flags; + stcb->asoc.def_send.sinfo_flags |= policy; + stcb->asoc.def_send.sinfo_ppid = info->snd_ppid; + stcb->asoc.def_send.sinfo_context = info->snd_context; + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + SCTP_TCB_UNLOCK(stcb); + } else { + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (info->snd_assoc_id == SCTP_FUTURE_ASSOC) || + (info->snd_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + inp->def_send.sinfo_stream = info->snd_sid; + policy = PR_SCTP_POLICY(inp->def_send.sinfo_flags); + inp->def_send.sinfo_flags = info->snd_flags; + inp->def_send.sinfo_flags |= policy; + inp->def_send.sinfo_ppid = info->snd_ppid; + inp->def_send.sinfo_context = info->snd_context; + SCTP_INP_WUNLOCK(inp); + } + if ((info->snd_assoc_id == SCTP_CURRENT_ASSOC) || + (info->snd_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + if (info->snd_sid < stcb->asoc.streamoutcnt) { + stcb->asoc.def_send.sinfo_stream = info->snd_sid; + policy = PR_SCTP_POLICY(stcb->asoc.def_send.sinfo_flags); + stcb->asoc.def_send.sinfo_flags = info->snd_flags; + stcb->asoc.def_send.sinfo_flags |= policy; + stcb->asoc.def_send.sinfo_ppid = info->snd_ppid; + stcb->asoc.def_send.sinfo_context = info->snd_context; + } + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); + } + } + break; + } + case SCTP_DEFAULT_PRINFO: + { + struct sctp_default_prinfo *info; + + SCTP_CHECK_AND_CAST(info, optval, struct sctp_default_prinfo, optsize); + SCTP_FIND_STCB(inp, stcb, info->pr_assoc_id); + + if (PR_SCTP_INVALID_POLICY(info->pr_policy)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + break; + } + if (stcb) { + stcb->asoc.def_send.sinfo_flags &= 0xfff0; + stcb->asoc.def_send.sinfo_flags |= info->pr_policy; + stcb->asoc.def_send.sinfo_timetolive = info->pr_value; + SCTP_TCB_UNLOCK(stcb); + } else { + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (info->pr_assoc_id == SCTP_FUTURE_ASSOC) || + (info->pr_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + inp->def_send.sinfo_flags &= 0xfff0; + inp->def_send.sinfo_flags |= info->pr_policy; + inp->def_send.sinfo_timetolive = info->pr_value; + SCTP_INP_WUNLOCK(inp); + } + if ((info->pr_assoc_id == SCTP_CURRENT_ASSOC) || + (info->pr_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + stcb->asoc.def_send.sinfo_flags &= 0xfff0; + stcb->asoc.def_send.sinfo_flags |= info->pr_policy; + stcb->asoc.def_send.sinfo_timetolive = info->pr_value; + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); + } + } + break; + } + case SCTP_PEER_ADDR_THLDS: + /* Applies to the specific association */ + { + struct sctp_paddrthlds *thlds; + struct sctp_nets *net; + + SCTP_CHECK_AND_CAST(thlds, optval, struct sctp_paddrthlds, optsize); + SCTP_FIND_STCB(inp, stcb, thlds->spt_assoc_id); + net = NULL; + if (stcb) { + net = sctp_findnet(stcb, (struct sockaddr *)&thlds->spt_assoc_id); + } else { + /* + * We increment here since + * sctp_findassociation_ep_addr() wil do a + * decrement if it finds the stcb as long as + * the locked tcb (last argument) is NOT a + * TCB.. aka NULL. + */ + SCTP_INP_INCR_REF(inp); + stcb = sctp_findassociation_ep_addr(&inp, + (struct sockaddr *)&thlds->spt_assoc_id, + &net, NULL, NULL); + if (stcb == NULL) { + SCTP_INP_DECR_REF(inp); + } + } + if (stcb && (net == NULL)) { + struct sockaddr *sa; + + sa = (struct sockaddr *)&thlds->spt_assoc_id; +#ifdef INET + if (sa->sa_family == AF_INET) { + + struct sockaddr_in *sin; + + sin = (struct sockaddr_in *)sa; + if (sin->sin_addr.s_addr) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + SCTP_TCB_UNLOCK(stcb); + error = EINVAL; + break; + } + } else +#endif +#ifdef INET6 + if (sa->sa_family == AF_INET6) { + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *)sa; + if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + SCTP_TCB_UNLOCK(stcb); + error = EINVAL; + break; + } + } else +#endif + { + error = EAFNOSUPPORT; + SCTP_TCB_UNLOCK(stcb); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + break; + } + } + if (stcb) { + if (net) { + if (net->dest_state & SCTP_ADDR_PF) { + if ((net->failure_threshold > thlds->spt_pathmaxrxt) || + (net->failure_threshold <= thlds->spt_pathpfthld)) { + net->dest_state &= ~SCTP_ADDR_PF; + } + } else { + if ((net->failure_threshold > thlds->spt_pathpfthld) && + (net->failure_threshold <= thlds->spt_pathmaxrxt)) { + net->dest_state |= SCTP_ADDR_PF; + sctp_send_hb(stcb, net, SCTP_SO_LOCKED); + sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_TIMER + SCTP_LOC_3); + sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net); } } - if (!found) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - error = EINVAL; - goto out_of_it; + if (net->dest_state & SCTP_ADDR_REACHABLE) { + if (net->failure_threshold > thlds->spt_pathmaxrxt) { + net->dest_state &= ~SCTP_ADDR_REACHABLE; + sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, net, SCTP_SO_LOCKED); + } + } else { + if (net->failure_threshold <= thlds->spt_pathmaxrxt) { + net->dest_state |= SCTP_ADDR_REACHABLE; + sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, net, SCTP_SO_LOCKED); + } + } + net->failure_threshold = thlds->spt_pathmaxrxt; + net->pf_threshold = thlds->spt_pathpfthld; + } else { + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + if (net->dest_state & SCTP_ADDR_PF) { + if ((net->failure_threshold > thlds->spt_pathmaxrxt) || + (net->failure_threshold <= thlds->spt_pathpfthld)) { + net->dest_state &= ~SCTP_ADDR_PF; + } + } else { + if ((net->failure_threshold > thlds->spt_pathpfthld) && + (net->failure_threshold <= thlds->spt_pathmaxrxt)) { + net->dest_state |= SCTP_ADDR_PF; + sctp_send_hb(stcb, net, SCTP_SO_LOCKED); + sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_TIMER + SCTP_LOC_3); + sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net); + } + } + if (net->dest_state & SCTP_ADDR_REACHABLE) { + if (net->failure_threshold > thlds->spt_pathmaxrxt) { + net->dest_state &= ~SCTP_ADDR_REACHABLE; + sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, net, SCTP_SO_LOCKED); + } + } else { + if (net->failure_threshold <= thlds->spt_pathmaxrxt) { + net->dest_state |= SCTP_ADDR_REACHABLE; + sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, net, SCTP_SO_LOCKED); + } + } + net->failure_threshold = thlds->spt_pathmaxrxt; + net->pf_threshold = thlds->spt_pathpfthld; } + stcb->asoc.def_net_failure = thlds->spt_pathmaxrxt; + stcb->asoc.def_net_pf_threshold = thlds->spt_pathpfthld; } - if (sctp_set_primary_ip_address_sa(stcb, - (struct sockaddr *)&sspp->sspp_addr) != 0) { + } else { + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (thlds->spt_assoc_id == SCTP_FUTURE_ASSOC)) { + SCTP_INP_WLOCK(inp); + inp->sctp_ep.def_net_failure = thlds->spt_pathmaxrxt; + inp->sctp_ep.def_net_pf_threshold = thlds->spt_pathpfthld; + SCTP_INP_WUNLOCK(inp); + } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } - out_of_it: - SCTP_TCB_UNLOCK(stcb); - } else { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - error = EINVAL; } - + break; } - break; - case SCTP_BINDX_ADD_ADDR: + case SCTP_REMOTE_UDP_ENCAPS_PORT: { - struct sctp_getaddresses *addrs; - size_t sz; - struct thread *td; + struct sctp_udpencaps *encaps; + struct sctp_nets *net; - td = (struct thread *)p; - SCTP_CHECK_AND_CAST(addrs, optval, struct sctp_getaddresses, - optsize); - if (addrs->addr->sa_family == AF_INET) { - sz = sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in); - if (optsize < sz) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - error = EINVAL; - break; - } - if (td != NULL && (error = prison_local_ip4(td->td_ucred, &(((struct sockaddr_in *)(addrs->addr))->sin_addr)))) { - SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, error); - break; - } -#ifdef INET6 - } else if (addrs->addr->sa_family == AF_INET6) { - sz = sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in6); - if (optsize < sz) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - error = EINVAL; - break; - } - if (td != NULL && (error = prison_local_ip6(td->td_ucred, &(((struct sockaddr_in6 *)(addrs->addr))->sin6_addr), - (SCTP_IPV6_V6ONLY(inp) != 0))) != 0) { - SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, error); - break; - } -#endif + SCTP_CHECK_AND_CAST(encaps, optval, struct sctp_udpencaps, optsize); + SCTP_FIND_STCB(inp, stcb, encaps->sue_assoc_id); + if (stcb) { + net = sctp_findnet(stcb, (struct sockaddr *)&encaps->sue_address); } else { - error = EAFNOSUPPORT; - break; + /* + * We increment here since + * sctp_findassociation_ep_addr() wil do a + * decrement if it finds the stcb as long as + * the locked tcb (last argument) is NOT a + * TCB.. aka NULL. + */ + net = NULL; + SCTP_INP_INCR_REF(inp); + stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&encaps->sue_address, &net, NULL, NULL); + if (stcb == NULL) { + SCTP_INP_DECR_REF(inp); + } } - sctp_bindx_add_address(so, inp, addrs->addr, - addrs->sget_assoc_id, vrf_id, - &error, p); - } - break; - case SCTP_BINDX_REM_ADDR: - { - struct sctp_getaddresses *addrs; - size_t sz; - struct thread *td; + if (stcb && (net == NULL)) { + struct sockaddr *sa; - td = (struct thread *)p; + sa = (struct sockaddr *)&encaps->sue_address; +#ifdef INET + if (sa->sa_family == AF_INET) { - SCTP_CHECK_AND_CAST(addrs, optval, struct sctp_getaddresses, optsize); - if (addrs->addr->sa_family == AF_INET) { - sz = sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in); - if (optsize < sz) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - error = EINVAL; + struct sockaddr_in *sin; + + sin = (struct sockaddr_in *)sa; + if (sin->sin_addr.s_addr) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + SCTP_TCB_UNLOCK(stcb); + error = EINVAL; + break; + } + } else +#endif +#ifdef INET6 + if (sa->sa_family == AF_INET6) { + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *)sa; + if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + SCTP_TCB_UNLOCK(stcb); + error = EINVAL; + break; + } + } else +#endif + { + error = EAFNOSUPPORT; + SCTP_TCB_UNLOCK(stcb); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); break; } - if (td != NULL && (error = prison_local_ip4(td->td_ucred, &(((struct sockaddr_in *)(addrs->addr))->sin_addr)))) { - SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, error); - break; + } + if (stcb) { + if (net) { + net->port = encaps->sue_port; + } else { + stcb->asoc.port = encaps->sue_port; } -#ifdef INET6 - } else if (addrs->addr->sa_family == AF_INET6) { - sz = sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in6); - if (optsize < sz) { + SCTP_TCB_UNLOCK(stcb); + } else { + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + (encaps->sue_assoc_id == SCTP_FUTURE_ASSOC)) { + SCTP_INP_WLOCK(inp); + inp->sctp_ep.port = encaps->sue_port; + SCTP_INP_WUNLOCK(inp); + } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; - break; - } - if (td != NULL && (error = prison_local_ip6(td->td_ucred, &(((struct sockaddr_in6 *)(addrs->addr))->sin6_addr), - (SCTP_IPV6_V6ONLY(inp) != 0))) != 0) { - SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, error); - break; } -#endif - } else { - error = EAFNOSUPPORT; - break; } - sctp_bindx_delete_address(so, inp, addrs->addr, - addrs->sget_assoc_id, vrf_id, - &error); + break; } - break; default: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT); error = ENOPROTOOPT; @@ -4276,31 +5799,28 @@ sctp_ctloutput(struct socket *so, struct sockopt *sopt) { void *optval = NULL; size_t optsize = 0; - struct sctp_inpcb *inp; void *p; int error = 0; - inp = (struct sctp_inpcb *)so->so_pcb; - if (inp == 0) { - /* I made the same as TCP since we are not setup? */ - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - return (ECONNRESET); - } if (sopt->sopt_level != IPPROTO_SCTP) { /* wrong proto level... send back up to IP */ #ifdef INET6 if (INP_CHECK_SOCKAF(so, AF_INET6)) error = ip6_ctloutput(so, sopt); - else #endif /* INET6 */ +#if defined(INET) && defined(INET6) + else +#endif +#ifdef INET error = ip_ctloutput(so, sopt); +#endif return (error); } optsize = sopt->sopt_valsize; if (optsize) { SCTP_MALLOC(optval, void *, optsize, SCTP_M_SOCKOPT); if (optval == NULL) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOBUFS); + SCTP_LTRACE_ERR_RET(so->so_pcb, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOBUFS); return (ENOBUFS); } error = sooptcopyin(sopt, optval, optsize, optsize); @@ -4315,7 +5835,7 @@ sctp_ctloutput(struct socket *so, struct sockopt *sopt) } else if (sopt->sopt_dir == SOPT_GET) { error = sctp_getopt(so, sopt->sopt_name, optval, &optsize, p); } else { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + SCTP_LTRACE_ERR_RET(so->so_pcb, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } if ((error == 0) && (optval != NULL)) { @@ -4328,7 +5848,7 @@ out: return (error); } - +#ifdef INET static int sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p) { @@ -4339,7 +5859,7 @@ sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p) struct sctp_tcb *stcb = NULL; inp = (struct sctp_inpcb *)so->so_pcb; - if (inp == 0) { + if (inp == NULL) { /* I made the same as TCP since we are not setup? */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (ECONNRESET); @@ -4348,34 +5868,42 @@ sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p) SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return EINVAL; } + switch (addr->sa_family) { #ifdef INET6 - if (addr->sa_family == AF_INET6) { - struct sockaddr_in6 *sin6p; + case AF_INET6: + { + struct sockaddr_in6 *sin6p; - if (addr->sa_len != sizeof(struct sockaddr_in6)) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - return (EINVAL); - } - sin6p = (struct sockaddr_in6 *)addr; - if (p != NULL && (error = prison_remote_ip6(p->td_ucred, &sin6p->sin6_addr)) != 0) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); - return (error); + if (addr->sa_len != sizeof(struct sockaddr_in6)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + return (EINVAL); + } + sin6p = (struct sockaddr_in6 *)addr; + if (p != NULL && (error = prison_remote_ip6(p->td_ucred, &sin6p->sin6_addr)) != 0) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + return (error); + } + break; } - } else #endif - if (addr->sa_family == AF_INET) { - struct sockaddr_in *sinp; +#ifdef INET + case AF_INET: + { + struct sockaddr_in *sinp; - if (addr->sa_len != sizeof(struct sockaddr_in)) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - return (EINVAL); - } - sinp = (struct sockaddr_in *)addr; - if (p != NULL && (error = prison_remote_ip4(p->td_ucred, &sinp->sin_addr)) != 0) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); - return (error); + if (addr->sa_len != sizeof(struct sockaddr_in)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + return (EINVAL); + } + sinp = (struct sockaddr_in *)addr; + if (p != NULL && (error = prison_remote_ip4(p->td_ucred, &sinp->sin_addr)) != 0) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + return (error); + } + break; } - } else { +#endif + default: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EAFNOSUPPORT); return (EAFNOSUPPORT); } @@ -4398,7 +5926,7 @@ sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p) error = EINVAL; goto out_now; } -#endif /* INET6 */ +#endif if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) == SCTP_PCB_FLAGS_UNBOUND) { /* Bind a ephemeral port */ @@ -4455,15 +5983,6 @@ sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p) if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) { stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED; /* Set the connected flag so we can queue data */ - SOCKBUF_LOCK(&so->so_rcv); - so->so_rcv.sb_state &= ~SBS_CANTRCVMORE; - SOCKBUF_UNLOCK(&so->so_rcv); - SOCKBUF_LOCK(&so->so_snd); - so->so_snd.sb_state &= ~SBS_CANTSENDMORE; - SOCKBUF_UNLOCK(&so->so_snd); - SOCK_LOCK(so); - so->so_state &= ~SS_ISDISCONNECTING; - SOCK_UNLOCK(so); soisconnecting(so); } SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_WAIT); @@ -4479,9 +5998,11 @@ out_now: SCTP_ASOC_CREATE_UNLOCK(inp); } SCTP_INP_DECR_REF(inp); - return error; + return (error); } +#endif + int sctp_listen(struct socket *so, int backlog, struct thread *p) { @@ -4498,7 +6019,7 @@ sctp_listen(struct socket *so, int backlog, struct thread *p) struct sctp_inpcb *inp; inp = (struct sctp_inpcb *)so->so_pcb; - if (inp == 0) { + if (inp == NULL) { /* I made the same as TCP since we are not setup? */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (ECONNRESET); @@ -4515,7 +6036,20 @@ sctp_listen(struct socket *so, int backlog, struct thread *p) LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { memcpy(&store, &laddr->ifa->address, sizeof(store)); - sp->sin.sin_port = inp->sctp_lport; + switch (sp->sa.sa_family) { +#ifdef INET + case AF_INET: + sp->sin.sin_port = inp->sctp_lport; + break; +#endif +#ifdef INET6 + case AF_INET6: + sp->sin6.sin6_port = inp->sctp_lport; + break; +#endif + default: + break; + } tinp = sctp_pcb_findep(&sp->sa, 0, 0, inp->def_vrf_id); if (tinp && (tinp != inp) && ((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) == 0) && @@ -4534,17 +6068,32 @@ sctp_listen(struct socket *so, int backlog, struct thread *p) } else { /* Setup a local addr bound all */ memset(&store, 0, sizeof(store)); - store.sin.sin_port = inp->sctp_lport; + switch (sp->sa.sa_family) { +#ifdef INET + case AF_INET: + store.sin.sin_port = inp->sctp_lport; + break; +#endif +#ifdef INET6 + case AF_INET6: + sp->sin6.sin6_port = inp->sctp_lport; + break; +#endif + default: + break; + } #ifdef INET6 if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { store.sa.sa_family = AF_INET6; store.sa.sa_len = sizeof(struct sockaddr_in6); } #endif +#ifdef INET if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) { store.sa.sa_family = AF_INET; store.sa.sa_len = sizeof(struct sockaddr_in); } +#endif tinp = sctp_pcb_findep(&sp->sa, 0, 0, inp->def_vrf_id); if (tinp && (tinp != inp) && ((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) == 0) && @@ -4634,7 +6183,7 @@ sctp_accept(struct socket *so, struct sockaddr **addr) #endif inp = (struct sctp_inpcb *)so->so_pcb; - if (inp == 0) { + if (inp == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (ECONNRESET); } @@ -4661,6 +6210,7 @@ sctp_accept(struct socket *so, struct sockaddr **addr) stcb->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE; SCTP_TCB_UNLOCK(stcb); switch (store.sa.sa_family) { +#ifdef INET case AF_INET: { struct sockaddr_in *sin; @@ -4670,11 +6220,12 @@ sctp_accept(struct socket *so, struct sockaddr **addr) return (ENOMEM); sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); - sin->sin_port = ((struct sockaddr_in *)&store)->sin_port; - sin->sin_addr = ((struct sockaddr_in *)&store)->sin_addr; + sin->sin_port = store.sin.sin_port; + sin->sin_addr = store.sin.sin_addr; *addr = (struct sockaddr *)sin; break; } +#endif #ifdef INET6 case AF_INET6: { @@ -4685,9 +6236,8 @@ sctp_accept(struct socket *so, struct sockaddr **addr) return (ENOMEM); sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); - sin6->sin6_port = ((struct sockaddr_in6 *)&store)->sin6_port; - - sin6->sin6_addr = ((struct sockaddr_in6 *)&store)->sin6_addr; + sin6->sin6_port = store.sin6.sin6_port; + sin6->sin6_addr = store.sin6.sin6_addr; if ((error = sa6_recoverscope(sin6)) != 0) { SCTP_FREE_SONAME(sin6); return (error); @@ -4736,6 +6286,7 @@ sctp_accept(struct socket *so, struct sockaddr **addr) return (0); } +#ifdef INET int sctp_ingetaddr(struct socket *so, struct sockaddr **addr) { @@ -4756,7 +6307,7 @@ sctp_ingetaddr(struct socket *so, struct sockaddr **addr) if (!inp) { SCTP_FREE_SONAME(sin); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - return ECONNRESET; + return (ECONNRESET); } SCTP_INP_RLOCK(inp); sin->sin_port = inp->sctp_lport; @@ -4825,7 +6376,7 @@ sctp_ingetaddr(struct socket *so, struct sockaddr **addr) SCTP_FREE_SONAME(sin); SCTP_INP_RUNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); - return ENOENT; + return (ENOENT); } } SCTP_INP_RUNLOCK(inp); @@ -4836,7 +6387,7 @@ sctp_ingetaddr(struct socket *so, struct sockaddr **addr) int sctp_peeraddr(struct socket *so, struct sockaddr **addr) { - struct sockaddr_in *sin = (struct sockaddr_in *)*addr; + struct sockaddr_in *sin; int fnd; struct sockaddr_in *sin_a; struct sctp_inpcb *inp; @@ -4844,25 +6395,19 @@ sctp_peeraddr(struct socket *so, struct sockaddr **addr) struct sctp_nets *net; /* Do the malloc first in case it blocks. */ - inp = (struct sctp_inpcb *)so->so_pcb; - if ((inp == NULL) || - ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0)) { - /* UDP type and listeners will drop out here */ - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN); - return (ENOTCONN); - } SCTP_MALLOC_SONAME(sin, struct sockaddr_in *, sizeof *sin); if (sin == NULL) return (ENOMEM); sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); - /* We must recapture incase we blocked */ inp = (struct sctp_inpcb *)so->so_pcb; - if (!inp) { + if ((inp == NULL) || + ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0)) { + /* UDP type and listeners will drop out here */ SCTP_FREE_SONAME(sin); - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - return ECONNRESET; + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN); + return (ENOTCONN); } SCTP_INP_RLOCK(inp); stcb = LIST_FIRST(&inp->sctp_asoc_list); @@ -4873,7 +6418,7 @@ sctp_peeraddr(struct socket *so, struct sockaddr **addr) if (stcb == NULL) { SCTP_FREE_SONAME(sin); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - return ECONNRESET; + return (ECONNRESET); } fnd = 0; TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { @@ -4890,7 +6435,7 @@ sctp_peeraddr(struct socket *so, struct sockaddr **addr) /* No IPv4 address */ SCTP_FREE_SONAME(sin); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); - return ENOENT; + return (ENOENT); } (*addr) = (struct sockaddr *)sin; return (0); @@ -4916,3 +6461,5 @@ struct pr_usrreqs sctp_usrreqs = { .pru_sosend = sctp_sosend, .pru_soreceive = sctp_soreceive }; + +#endif diff --git a/freebsd/sys/netinet/sctp_var.h b/freebsd/sys/netinet/sctp_var.h index bff7f5de..3862b90b 100644 --- a/freebsd/sys/netinet/sctp_var.h +++ b/freebsd/sys/netinet/sctp_var.h @@ -1,15 +1,17 @@ /*- * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -28,8 +30,6 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctp_var.h,v 1.24 2005/03/06 16:04:19 itojun Exp $ */ - #include __FBSDID("$FreeBSD$"); @@ -48,6 +48,31 @@ extern struct pr_usrreqs sctp_usrreqs; #define sctp_is_feature_on(inp, feature) ((inp->sctp_features & feature) == feature) #define sctp_is_feature_off(inp, feature) ((inp->sctp_features & feature) == 0) +#define sctp_stcb_feature_on(inp, stcb, feature) {\ + if (stcb) { \ + stcb->asoc.sctp_features |= feature; \ + } else if (inp) { \ + inp->sctp_features |= feature; \ + } \ +} +#define sctp_stcb_feature_off(inp, stcb, feature) {\ + if (stcb) { \ + stcb->asoc.sctp_features &= ~feature; \ + } else if (inp) { \ + inp->sctp_features &= ~feature; \ + } \ +} +#define sctp_stcb_is_feature_on(inp, stcb, feature) \ + (((stcb != NULL) && \ + ((stcb->asoc.sctp_features & feature) == feature)) || \ + ((stcb == NULL) && (inp != NULL) && \ + ((inp->sctp_features & feature) == feature))) +#define sctp_stcb_is_feature_off(inp, stcb, feature) \ + (((stcb != NULL) && \ + ((stcb->asoc.sctp_features & feature) == 0)) || \ + ((stcb == NULL) && (inp != NULL) && \ + ((inp->sctp_features & feature) == 0)) || \ + ((stcb == NULL) && (inp == NULL))) /* managing mobility_feature in inpcb (by micchie) */ #define sctp_mobility_feature_on(inp, feature) (inp->sctp_mobility_features |= feature) @@ -85,9 +110,9 @@ extern struct pr_usrreqs sctp_usrreqs; } \ } -#define sctp_free_a_strmoq(_stcb, _strmoq) { \ +#define sctp_free_a_strmoq(_stcb, _strmoq, _so_locked) { \ if ((_strmoq)->holds_key_ref) { \ - sctp_auth_key_release(stcb, sp->auth_keyid); \ + sctp_auth_key_release(stcb, sp->auth_keyid, _so_locked); \ (_strmoq)->holds_key_ref = 0; \ } \ SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_strmoq), (_strmoq)); \ @@ -103,9 +128,9 @@ extern struct pr_usrreqs sctp_usrreqs; } \ } -#define sctp_free_a_chunk(_stcb, _chk) { \ +#define sctp_free_a_chunk(_stcb, _chk, _so_locked) { \ if ((_chk)->holds_key_ref) {\ - sctp_auth_key_release((_stcb), (_chk)->auth_keyid); \ + sctp_auth_key_release((_stcb), (_chk)->auth_keyid, _so_locked); \ (_chk)->holds_key_ref = 0; \ } \ if (_stcb) { \ @@ -130,7 +155,7 @@ extern struct pr_usrreqs sctp_usrreqs; } #define sctp_alloc_a_chunk(_stcb, _chk) { \ - if (TAILQ_EMPTY(&(_stcb)->asoc.free_chunks)) { \ + if (TAILQ_EMPTY(&(_stcb)->asoc.free_chunks)) { \ (_chk) = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_chunk), struct sctp_tmit_chunk); \ if ((_chk)) { \ SCTP_INCR_CHK_COUNT(); \ @@ -153,7 +178,6 @@ extern struct pr_usrreqs sctp_usrreqs; if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&(__net)->ref_count)) { \ (void)SCTP_OS_TIMER_STOP(&(__net)->rxt_timer.timer); \ (void)SCTP_OS_TIMER_STOP(&(__net)->pmtu_timer.timer); \ - (void)SCTP_OS_TIMER_STOP(&(__net)->fr_timer.timer); \ if ((__net)->ro.ro_rt) { \ RTFREE((__net)->ro.ro_rt); \ (__net)->ro.ro_rt = NULL; \ @@ -163,7 +187,7 @@ extern struct pr_usrreqs sctp_usrreqs; (__net)->ro._s_addr = NULL; \ } \ (__net)->src_addr_selected = 0; \ - (__net)->dest_state = SCTP_ADDR_NOT_REACHABLE; \ + (__net)->dest_state &= ~SCTP_ADDR_REACHABLE; \ SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_net), (__net)); \ SCTP_DECR_RADDR_COUNT(); \ } \ @@ -210,7 +234,7 @@ extern struct pr_usrreqs sctp_usrreqs; #define sctp_mbuf_crush(data) do { \ struct mbuf *_m; \ _m = (data); \ - while(_m && (SCTP_BUF_LEN(_m) == 0)) { \ + while (_m && (SCTP_BUF_LEN(_m) == 0)) { \ (data) = SCTP_BUF_NEXT(_m); \ SCTP_BUF_NEXT(_m) = NULL; \ sctp_m_free(_m); \ @@ -286,6 +310,8 @@ extern struct pr_usrreqs sctp_usrreqs; #endif +#define SCTP_PF_ENABLED(_net) (_net->pf_threshold < _net->failure_threshold) +#define SCTP_NET_IS_PF(_net) (_net->pf_threshold < _net->error_count) struct sctp_nets; struct sctp_inpcb; @@ -298,9 +324,16 @@ int sctp_disconnect(struct socket *so); void sctp_ctlinput __P((int, struct sockaddr *, void *)); int sctp_ctloutput __P((struct socket *, struct sockopt *)); + +#ifdef INET void sctp_input_with_port __P((struct mbuf *, int, uint16_t)); + +#endif +#ifdef INET void sctp_input __P((struct mbuf *, int)); -void sctp_pathmtu_adjustment __P((struct sctp_inpcb *, struct sctp_tcb *, struct sctp_nets *, uint16_t)); + +#endif +void sctp_pathmtu_adjustment __P((struct sctp_tcb *, uint16_t)); void sctp_drain __P((void)); void sctp_init __P((void)); diff --git a/freebsd/sys/netinet/sctputil.c b/freebsd/sys/netinet/sctputil.c index e9a279c9..60888bef 100644 --- a/freebsd/sys/netinet/sctputil.c +++ b/freebsd/sys/netinet/sctputil.c @@ -2,16 +2,18 @@ /*- * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -30,8 +32,6 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/* $KAME: sctputil.c,v 1.37 2005/03/07 23:26:09 itojun Exp $ */ - #include __FBSDID("$FreeBSD$"); @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); #include #include #ifdef INET6 +#include #endif #include #include @@ -49,17 +50,21 @@ __FBSDID("$FreeBSD$"); #include /* for sctp_deliver_data() */ #include #include -#include #include +#include +#include +#include #ifndef KTR_SCTP #define KTR_SCTP KTR_SUBSYS #endif +extern struct sctp_cc_functions sctp_cc_functions[]; +extern struct sctp_ss_functions sctp_ss_functions[]; + void -sctp_sblog(struct sockbuf *sb, - struct sctp_tcb *stcb, int from, int incr) +sctp_sblog(struct sockbuf *sb, struct sctp_tcb *stcb, int from, int incr) { struct sctp_cwnd_log sctp_clog; @@ -103,7 +108,6 @@ sctp_log_closing(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int16_t loc) sctp_clog.x.misc.log4); } - void rto_logging(struct sctp_nets *net, int from) { @@ -111,7 +115,7 @@ rto_logging(struct sctp_nets *net, int from) memset(&sctp_clog, 0, sizeof(sctp_clog)); sctp_clog.x.rto.net = (void *)net; - sctp_clog.x.rto.rtt = net->prev_rtt; + sctp_clog.x.rto.rtt = net->rtt / 1000; SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x", SCTP_LOG_EVENT_RTT, from, @@ -119,7 +123,6 @@ rto_logging(struct sctp_nets *net, int from) sctp_clog.x.misc.log2, sctp_clog.x.misc.log3, sctp_clog.x.misc.log4); - } void @@ -140,7 +143,6 @@ sctp_log_strm_del_alt(struct sctp_tcb *stcb, uint32_t tsn, uint16_t sseq, uint16 sctp_clog.x.misc.log2, sctp_clog.x.misc.log3, sctp_clog.x.misc.log4); - } void @@ -162,7 +164,6 @@ sctp_log_nagle_event(struct sctp_tcb *stcb, int action) sctp_clog.x.misc.log4); } - void sctp_log_sack(uint32_t old_cumack, uint32_t cumack, uint32_t tsn, uint16_t gaps, uint16_t dups, int from) { @@ -201,8 +202,7 @@ sctp_log_map(uint32_t map, uint32_t cum, uint32_t high, int from) } void -sctp_log_fr(uint32_t biggest_tsn, uint32_t biggest_new_tsn, uint32_t tsn, - int from) +sctp_log_fr(uint32_t biggest_tsn, uint32_t biggest_new_tsn, uint32_t tsn, int from) { struct sctp_cwnd_log sctp_clog; @@ -217,10 +217,8 @@ sctp_log_fr(uint32_t biggest_tsn, uint32_t biggest_new_tsn, uint32_t tsn, sctp_clog.x.misc.log2, sctp_clog.x.misc.log3, sctp_clog.x.misc.log4); - } - void sctp_log_mb(struct mbuf *m, int from) { @@ -246,10 +244,8 @@ sctp_log_mb(struct mbuf *m, int from) sctp_clog.x.misc.log4); } - void -sctp_log_strm_del(struct sctp_queued_to_read *control, struct sctp_queued_to_read *poschk, - int from) +sctp_log_strm_del(struct sctp_queued_to_read *control, struct sctp_queued_to_read *poschk, int from) { struct sctp_cwnd_log sctp_clog; @@ -275,7 +271,6 @@ sctp_log_strm_del(struct sctp_queued_to_read *control, struct sctp_queued_to_rea sctp_clog.x.misc.log2, sctp_clog.x.misc.log3, sctp_clog.x.misc.log4); - } void @@ -311,7 +306,6 @@ sctp_log_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net, int augment, uint8_t sctp_clog.x.misc.log2, sctp_clog.x.misc.log3, sctp_clog.x.misc.log4); - } void @@ -356,7 +350,6 @@ sctp_log_lock(struct sctp_inpcb *inp, struct sctp_tcb *stcb, uint8_t from) sctp_clog.x.misc.log2, sctp_clog.x.misc.log3, sctp_clog.x.misc.log4); - } void @@ -384,7 +377,6 @@ sctp_log_maxburst(struct sctp_tcb *stcb, struct sctp_nets *net, int error, int b sctp_clog.x.misc.log2, sctp_clog.x.misc.log3, sctp_clog.x.misc.log4); - } void @@ -439,7 +431,6 @@ sctp_log_mbcnt(uint8_t from, uint32_t total_oq, uint32_t book, uint32_t total_mb sctp_clog.x.misc.log2, sctp_clog.x.misc.log3, sctp_clog.x.misc.log4); - } void @@ -452,7 +443,7 @@ sctp_misc_ints(uint8_t from, uint32_t a, uint32_t b, uint32_t c, uint32_t d) } void -sctp_wakeup_log(struct sctp_tcb *stcb, uint32_t cumtsn, uint32_t wake_cnt, int from) +sctp_wakeup_log(struct sctp_tcb *stcb, uint32_t wake_cnt, int from) { struct sctp_cwnd_log sctp_clog; @@ -495,11 +486,10 @@ sctp_wakeup_log(struct sctp_tcb *stcb, uint32_t cumtsn, uint32_t wake_cnt, int f sctp_clog.x.misc.log2, sctp_clog.x.misc.log3, sctp_clog.x.misc.log4); - } void -sctp_log_block(uint8_t from, struct socket *so, struct sctp_association *asoc, int sendlen) +sctp_log_block(uint8_t from, struct sctp_association *asoc, int sendlen) { struct sctp_cwnd_log sctp_clog; @@ -517,11 +507,10 @@ sctp_log_block(uint8_t from, struct socket *so, struct sctp_association *asoc, i sctp_clog.x.misc.log2, sctp_clog.x.misc.log3, sctp_clog.x.misc.log4); - } int -sctp_fill_stat_log(void *optval, size_t *optsize) +sctp_fill_stat_log(void *optval SCTP_UNUSED, size_t *optsize SCTP_UNUSED) { /* May need to fix this if ktrdump does not work */ return (0); @@ -699,7 +688,7 @@ sctp_auditing(int from, struct sctp_inpcb *inp, struct sctp_tcb *stcb, } if (lnet->flight_size != tot_out) { SCTP_PRINTF("net:%p flight was %d corrected to %d\n", - lnet, lnet->flight_size, + (void *)lnet, lnet->flight_size, tot_out); lnet->flight_size = tot_out; } @@ -737,15 +726,14 @@ sctp_stop_timers_for_shutdown(struct sctp_tcb *stcb) asoc = &stcb->asoc; - (void)SCTP_OS_TIMER_STOP(&asoc->hb_timer.timer); (void)SCTP_OS_TIMER_STOP(&asoc->dack_timer.timer); (void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer); (void)SCTP_OS_TIMER_STOP(&asoc->asconf_timer.timer); (void)SCTP_OS_TIMER_STOP(&asoc->autoclose_timer.timer); (void)SCTP_OS_TIMER_STOP(&asoc->delayed_event_timer.timer); TAILQ_FOREACH(net, &asoc->nets, sctp_next) { - (void)SCTP_OS_TIMER_STOP(&net->fr_timer.timer); (void)SCTP_OS_TIMER_STOP(&net->pmtu_timer.timer); + (void)SCTP_OS_TIMER_STOP(&net->hb_timer.timer); } } @@ -799,7 +787,7 @@ sctp_get_prev_mtu(uint32_t val) * entry, just return val. */ uint32_t -sctp_get_next_mtu(struct sctp_inpcb *inp, uint32_t val) +sctp_get_next_mtu(uint32_t val) { /* select another MTU that is just bigger than this one */ uint32_t i; @@ -870,28 +858,29 @@ retry: } uint32_t -sctp_select_a_tag(struct sctp_inpcb *inp, uint16_t lport, uint16_t rport, int save_in_twait) +sctp_select_a_tag(struct sctp_inpcb *inp, uint16_t lport, uint16_t rport, int check) { - uint32_t x, not_done; + uint32_t x; struct timeval now; - (void)SCTP_GETTIME_TIMEVAL(&now); - not_done = 1; - while (not_done) { + if (check) { + (void)SCTP_GETTIME_TIMEVAL(&now); + } + for (;;) { x = sctp_select_initial_TSN(&inp->sctp_ep); if (x == 0) { /* we never use 0 */ continue; } - if (sctp_is_vtag_good(inp, x, lport, rport, &now, save_in_twait)) { - not_done = 0; + if (!check || sctp_is_vtag_good(x, lport, rport, &now)) { + break; } } return (x); } int -sctp_init_asoc(struct sctp_inpcb *m, struct sctp_tcb *stcb, +sctp_init_asoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, uint32_t override_tag, uint32_t vrf_id) { struct sctp_association *asoc; @@ -912,40 +901,41 @@ sctp_init_asoc(struct sctp_inpcb *m, struct sctp_tcb *stcb, asoc = &stcb->asoc; /* init all variables to a known value. */ SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_INUSE); - asoc->max_burst = m->sctp_ep.max_burst; - asoc->heart_beat_delay = TICKS_TO_MSEC(m->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]); - asoc->cookie_life = m->sctp_ep.def_cookie_life; - asoc->sctp_cmt_on_off = m->sctp_cmt_on_off; + asoc->max_burst = inp->sctp_ep.max_burst; + asoc->fr_max_burst = inp->sctp_ep.fr_max_burst; + asoc->heart_beat_delay = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]); + asoc->cookie_life = inp->sctp_ep.def_cookie_life; + asoc->sctp_cmt_on_off = inp->sctp_cmt_on_off; + asoc->ecn_allowed = inp->sctp_ecn_enable; asoc->sctp_nr_sack_on_off = (uint8_t) SCTP_BASE_SYSCTL(sctp_nr_sack_on_off); - asoc->sctp_cmt_pf = (uint8_t) SCTP_BASE_SYSCTL(sctp_cmt_pf); - asoc->sctp_frag_point = m->sctp_frag_point; -#ifdef INET - asoc->default_tos = m->ip_inp.inp.inp_ip_tos; -#else - asoc->default_tos = 0; -#endif - + asoc->sctp_cmt_pf = (uint8_t) 0; + asoc->sctp_frag_point = inp->sctp_frag_point; + asoc->sctp_features = inp->sctp_features; + asoc->default_dscp = inp->sctp_ep.default_dscp; #ifdef INET6 - asoc->default_flowlabel = ((struct in6pcb *)m)->in6p_flowinfo; -#else - asoc->default_flowlabel = 0; + if (inp->sctp_ep.default_flowlabel) { + asoc->default_flowlabel = inp->sctp_ep.default_flowlabel; + } else { + if (inp->ip_inp.inp.inp_flags & IN6P_AUTOFLOWLABEL) { + asoc->default_flowlabel = sctp_select_initial_TSN(&inp->sctp_ep); + asoc->default_flowlabel &= 0x000fffff; + asoc->default_flowlabel |= 0x80000000; + } else { + asoc->default_flowlabel = 0; + } + } #endif asoc->sb_send_resv = 0; if (override_tag) { asoc->my_vtag = override_tag; } else { - asoc->my_vtag = sctp_select_a_tag(m, stcb->sctp_ep->sctp_lport, stcb->rport, 1); + asoc->my_vtag = sctp_select_a_tag(inp, stcb->sctp_ep->sctp_lport, stcb->rport, 1); } /* Get the nonce tags */ - asoc->my_vtag_nonce = sctp_select_a_tag(m, stcb->sctp_ep->sctp_lport, stcb->rport, 0); - asoc->peer_vtag_nonce = sctp_select_a_tag(m, stcb->sctp_ep->sctp_lport, stcb->rport, 0); + asoc->my_vtag_nonce = sctp_select_a_tag(inp, stcb->sctp_ep->sctp_lport, stcb->rport, 0); + asoc->peer_vtag_nonce = sctp_select_a_tag(inp, stcb->sctp_ep->sctp_lport, stcb->rport, 0); asoc->vrf_id = vrf_id; - if (sctp_is_feature_on(m, SCTP_PCB_FLAGS_DONOT_HEARTBEAT)) - asoc->hb_is_disabled = 1; - else - asoc->hb_is_disabled = 0; - #ifdef SCTP_ASOCLOG_OF_TSNS asoc->tsn_in_at = 0; asoc->tsn_out_at = 0; @@ -960,7 +950,7 @@ sctp_init_asoc(struct sctp_inpcb *m, struct sctp_tcb *stcb, asoc->refcnt = 0; asoc->assoc_up_sent = 0; asoc->asconf_seq_out = asoc->str_reset_seq_out = asoc->init_seq_number = asoc->sending_seq = - sctp_select_initial_TSN(&m->sctp_ep); + sctp_select_initial_TSN(&inp->sctp_ep); asoc->asconf_seq_out_acked = asoc->asconf_seq_out - 1; /* we are optimisitic here */ asoc->peer_supports_pktdrop = 1; @@ -971,7 +961,6 @@ sctp_init_asoc(struct sctp_inpcb *m, struct sctp_tcb *stcb, asoc->last_net_cmt_send_started = NULL; /* This will need to be adjusted */ - asoc->last_cwr_tsn = asoc->init_seq_number - 1; asoc->last_acked_seq = asoc->init_seq_number - 1; asoc->advanced_peer_ack_point = asoc->last_acked_seq; asoc->asconf_seq_in = asoc->last_acked_seq; @@ -979,55 +968,42 @@ sctp_init_asoc(struct sctp_inpcb *m, struct sctp_tcb *stcb, /* here we are different, we hold the next one we expect */ asoc->str_reset_seq_in = asoc->last_acked_seq + 1; - asoc->initial_init_rto_max = m->sctp_ep.initial_init_rto_max; - asoc->initial_rto = m->sctp_ep.initial_rto; + asoc->initial_init_rto_max = inp->sctp_ep.initial_init_rto_max; + asoc->initial_rto = inp->sctp_ep.initial_rto; - asoc->max_init_times = m->sctp_ep.max_init_times; - asoc->max_send_times = m->sctp_ep.max_send_times; - asoc->def_net_failure = m->sctp_ep.def_net_failure; + asoc->max_init_times = inp->sctp_ep.max_init_times; + asoc->max_send_times = inp->sctp_ep.max_send_times; + asoc->def_net_failure = inp->sctp_ep.def_net_failure; + asoc->def_net_pf_threshold = inp->sctp_ep.def_net_pf_threshold; asoc->free_chunk_cnt = 0; asoc->iam_blocking = 0; - /* ECN Nonce initialization */ - asoc->context = m->sctp_context; - asoc->def_send = m->def_send; - asoc->ecn_nonce_allowed = 0; - asoc->receiver_nonce_sum = 1; - asoc->nonce_sum_expect_base = 1; - asoc->nonce_sum_check = 1; - asoc->nonce_resync_tsn = 0; - asoc->nonce_wait_for_ecne = 0; - asoc->nonce_wait_tsn = 0; - asoc->delayed_ack = TICKS_TO_MSEC(m->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]); - asoc->sack_freq = m->sctp_ep.sctp_sack_freq; + asoc->context = inp->sctp_context; + asoc->local_strreset_support = inp->local_strreset_support; + asoc->def_send = inp->def_send; + asoc->delayed_ack = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]); + asoc->sack_freq = inp->sctp_ep.sctp_sack_freq; asoc->pr_sctp_cnt = 0; asoc->total_output_queue_size = 0; - if (m->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { - struct in6pcb *inp6; - - /* Its a V6 socket */ - inp6 = (struct in6pcb *)m; - asoc->ipv6_addr_legal = 1; - /* Now look at the binding flag to see if V4 will be legal */ - if (SCTP_IPV6_V6ONLY(inp6) == 0) { - asoc->ipv4_addr_legal = 1; + if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { + asoc->scope.ipv6_addr_legal = 1; + if (SCTP_IPV6_V6ONLY(inp) == 0) { + asoc->scope.ipv4_addr_legal = 1; } else { - /* V4 addresses are NOT legal on the association */ - asoc->ipv4_addr_legal = 0; + asoc->scope.ipv4_addr_legal = 0; } } else { - /* Its a V4 socket, no - V6 */ - asoc->ipv4_addr_legal = 1; - asoc->ipv6_addr_legal = 0; + asoc->scope.ipv6_addr_legal = 0; + asoc->scope.ipv4_addr_legal = 1; } - asoc->my_rwnd = max(SCTP_SB_LIMIT_RCV(m->sctp_socket), SCTP_MINIMAL_RWND); - asoc->peers_rwnd = SCTP_SB_LIMIT_RCV(m->sctp_socket); + asoc->my_rwnd = max(SCTP_SB_LIMIT_RCV(inp->sctp_socket), SCTP_MINIMAL_RWND); + asoc->peers_rwnd = SCTP_SB_LIMIT_RCV(inp->sctp_socket); - asoc->smallest_mtu = m->sctp_frag_point; - asoc->minrto = m->sctp_ep.sctp_minrto; - asoc->maxrto = m->sctp_ep.sctp_maxrto; + asoc->smallest_mtu = inp->sctp_frag_point; + asoc->minrto = inp->sctp_ep.sctp_minrto; + asoc->maxrto = inp->sctp_ep.sctp_maxrto; asoc->locked_on_sending = NULL; asoc->stream_locked_on = 0; @@ -1044,77 +1020,20 @@ sctp_init_asoc(struct sctp_inpcb *m, struct sctp_tcb *stcb, /* Setup to fill the hb random cache at first HB */ asoc->hb_random_idx = 4; - asoc->sctp_autoclose_ticks = m->sctp_ep.auto_close_time; + asoc->sctp_autoclose_ticks = inp->sctp_ep.auto_close_time; - /* - * JRS - Pick the default congestion control module based on the - * sysctl. - */ - switch (m->sctp_ep.sctp_default_cc_module) { - /* JRS - Standard TCP congestion control */ - case SCTP_CC_RFC2581: - { - stcb->asoc.congestion_control_module = SCTP_CC_RFC2581; - stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param; - stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_cwnd_update_after_sack; - stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_cwnd_update_after_fr; - stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout; - stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo; - stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped; - stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output; - stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer; - break; - } - /* JRS - High Speed TCP congestion control (Floyd) */ - case SCTP_CC_HSTCP: - { - stcb->asoc.congestion_control_module = SCTP_CC_HSTCP; - stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param; - stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_hs_cwnd_update_after_sack; - stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_hs_cwnd_update_after_fr; - stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout; - stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo; - stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped; - stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output; - stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer; - break; - } - /* JRS - HTCP congestion control */ - case SCTP_CC_HTCP: - { - stcb->asoc.congestion_control_module = SCTP_CC_HTCP; - stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_htcp_set_initial_cc_param; - stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_htcp_cwnd_update_after_sack; - stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_htcp_cwnd_update_after_fr; - stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_htcp_cwnd_update_after_timeout; - stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_htcp_cwnd_update_after_ecn_echo; - stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped; - stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output; - stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_htcp_cwnd_update_after_fr_timer; - break; - } - /* JRS - By default, use RFC2581 */ - default: - { - stcb->asoc.congestion_control_module = SCTP_CC_RFC2581; - stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param; - stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_cwnd_update_after_sack; - stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_cwnd_update_after_fr; - stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout; - stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo; - stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped; - stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output; - stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer; - break; - } - } + stcb->asoc.congestion_control_module = inp->sctp_ep.sctp_default_cc_module; + stcb->asoc.cc_functions = sctp_cc_functions[inp->sctp_ep.sctp_default_cc_module]; + + stcb->asoc.stream_scheduling_module = inp->sctp_ep.sctp_default_ss_module; + stcb->asoc.ss_functions = sctp_ss_functions[inp->sctp_ep.sctp_default_ss_module]; /* * Now the stream parameters, here we allocate space for all streams * that we request by default. */ asoc->strm_realoutsize = asoc->streamoutcnt = asoc->pre_open_streams = - m->sctp_ep.pre_open_stream_count; + inp->sctp_ep.pre_open_stream_count; SCTP_MALLOC(asoc->strmout, struct sctp_stream_out *, asoc->streamoutcnt * sizeof(struct sctp_stream_out), SCTP_M_STRMO); @@ -1132,13 +1051,15 @@ sctp_init_asoc(struct sctp_inpcb *m, struct sctp_tcb *stcb, * that were dropped must be notified to the upper layer as * failed to send. */ - asoc->strmout[i].next_sequence_sent = 0x0; + asoc->strmout[i].next_sequence_send = 0x0; TAILQ_INIT(&asoc->strmout[i].outqueue); + asoc->strmout[i].chunks_on_queues = 0; asoc->strmout[i].stream_no = i; asoc->strmout[i].last_msg_incomplete = 0; - asoc->strmout[i].next_spoke.tqe_next = 0; - asoc->strmout[i].next_spoke.tqe_prev = 0; + asoc->ss_functions.sctp_ss_init_stream(&asoc->strmout[i], NULL); } + asoc->ss_functions.sctp_ss_init(stcb, asoc, 0); + /* Now the mapping array */ asoc->mapping_array_size = SCTP_INITIAL_MAPPING_ARRAY; SCTP_MALLOC(asoc->mapping_array, uint8_t *, asoc->mapping_array_size, @@ -1161,14 +1082,13 @@ sctp_init_asoc(struct sctp_inpcb *m, struct sctp_tcb *stcb, /* Now the init of the other outqueues */ TAILQ_INIT(&asoc->free_chunks); - TAILQ_INIT(&asoc->out_wheel); TAILQ_INIT(&asoc->control_send_queue); TAILQ_INIT(&asoc->asconf_send_queue); TAILQ_INIT(&asoc->send_queue); TAILQ_INIT(&asoc->sent_queue); TAILQ_INIT(&asoc->reasmqueue); TAILQ_INIT(&asoc->resetHead); - asoc->max_inbound_streams = m->sctp_ep.max_open_streams_intome; + asoc->max_inbound_streams = inp->sctp_ep.max_open_streams_intome; TAILQ_INIT(&asoc->asconf_queue); /* authentication fields */ asoc->authinfo.random = NULL; @@ -1179,6 +1099,7 @@ sctp_init_asoc(struct sctp_inpcb *m, struct sctp_tcb *stcb, asoc->authinfo.recv_keyid = 0; LIST_INIT(&asoc->shared_keys); asoc->marked_retrans = 0; + asoc->port = inp->sctp_ep.port; asoc->timoinit = 0; asoc->timodata = 0; asoc->timosack = 0; @@ -1200,34 +1121,34 @@ sctp_print_mapping_array(struct sctp_association *asoc) { unsigned int i, limit; - printf("Mapping array size: %d, baseTSN: %8.8x, cumAck: %8.8x, highestTSN: (%8.8x, %8.8x).\n", + SCTP_PRINTF("Mapping array size: %d, baseTSN: %8.8x, cumAck: %8.8x, highestTSN: (%8.8x, %8.8x).\n", asoc->mapping_array_size, asoc->mapping_array_base_tsn, asoc->cumulative_tsn, asoc->highest_tsn_inside_map, asoc->highest_tsn_inside_nr_map); for (limit = asoc->mapping_array_size; limit > 1; limit--) { - if (asoc->mapping_array[limit - 1]) { + if (asoc->mapping_array[limit - 1] != 0) { break; } } - printf("Renegable mapping array (last %d entries are zero):\n", asoc->mapping_array_size - limit); + SCTP_PRINTF("Renegable mapping array (last %d entries are zero):\n", asoc->mapping_array_size - limit); for (i = 0; i < limit; i++) { - printf("%2.2x%c", asoc->mapping_array[i], ((i + 1) % 16) ? ' ' : '\n'); + SCTP_PRINTF("%2.2x%c", asoc->mapping_array[i], ((i + 1) % 16) ? ' ' : '\n'); } if (limit % 16) - printf("\n"); + SCTP_PRINTF("\n"); for (limit = asoc->mapping_array_size; limit > 1; limit--) { if (asoc->nr_mapping_array[limit - 1]) { break; } } - printf("Non renegable mapping array (last %d entries are zero):\n", asoc->mapping_array_size - limit); + SCTP_PRINTF("Non renegable mapping array (last %d entries are zero):\n", asoc->mapping_array_size - limit); for (i = 0; i < limit; i++) { - printf("%2.2x%c", asoc->nr_mapping_array[i], ((i + 1) % 16) ? ' ' : '\n'); + SCTP_PRINTF("%2.2x%c", asoc->nr_mapping_array[i], ((i + 1) % 16) ? ' ' : '\n'); } if (limit % 16) - printf("\n"); + SCTP_PRINTF("\n"); } int @@ -1354,10 +1275,6 @@ select_a_new_ep: /* We won't be staying here */ SCTP_INP_DECR_REF(it->inp); atomic_add_int(&it->stcb->asoc.refcnt, -1); - if (sctp_it_ctl.iterator_flags & - SCTP_ITERATOR_MUST_EXIT) { - goto done_with_iterator; - } if (sctp_it_ctl.iterator_flags & SCTP_ITERATOR_STOP_CUR_IT) { sctp_it_ctl.iterator_flags &= ~SCTP_ITERATOR_STOP_CUR_IT; @@ -1369,7 +1286,7 @@ select_a_new_ep: goto no_stcb; } /* If we reach here huh? */ - printf("Unknown it ctl flag %x\n", + SCTP_PRINTF("Unknown it ctl flag %x\n", sctp_it_ctl.iterator_flags); sctp_it_ctl.iterator_flags = 0; } @@ -1419,27 +1336,22 @@ no_stcb: void sctp_iterator_worker(void) { - struct sctp_iterator *it = NULL; + struct sctp_iterator *it, *nit; /* This function is called with the WQ lock in place */ sctp_it_ctl.iterator_running = 1; - sctp_it_ctl.cur_it = it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead); - while (it) { + TAILQ_FOREACH_SAFE(it, &sctp_it_ctl.iteratorhead, sctp_nxt_itr, nit) { + sctp_it_ctl.cur_it = it; /* now lets work on this one */ TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr); SCTP_IPI_ITERATOR_WQ_UNLOCK(); CURVNET_SET(it->vn); sctp_iterator_work(it); - + sctp_it_ctl.cur_it = NULL; CURVNET_RESTORE(); SCTP_IPI_ITERATOR_WQ_LOCK(); - if (sctp_it_ctl.iterator_flags & SCTP_ITERATOR_MUST_EXIT) { - sctp_it_ctl.cur_it = NULL; - break; - } /* sa_ignore FREED_MEMORY */ - sctp_it_ctl.cur_it = it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead); } sctp_it_ctl.iterator_running = 0; return; @@ -1450,7 +1362,7 @@ static void sctp_handle_addr_wq(void) { /* deal with the ADDR wq from the rtsock calls */ - struct sctp_laddr *wi; + struct sctp_laddr *wi, *nwi; struct sctp_asconf_iterator *asc; SCTP_MALLOC(asc, struct sctp_asconf_iterator *, @@ -1467,12 +1379,10 @@ sctp_handle_addr_wq(void) asc->cnt = 0; SCTP_WQ_ADDR_LOCK(); - wi = LIST_FIRST(&SCTP_BASE_INFO(addr_wq)); - while (wi != NULL) { + LIST_FOREACH_SAFE(wi, &SCTP_BASE_INFO(addr_wq), sctp_nxt_addr, nwi) { LIST_REMOVE(wi, sctp_nxt_addr); LIST_INSERT_HEAD(&asc->list_of_work, wi, sctp_nxt_addr); asc->cnt++; - wi = LIST_FIRST(&SCTP_BASE_INFO(addr_wq)); } SCTP_WQ_ADDR_UNLOCK(); @@ -1490,9 +1400,6 @@ sctp_handle_addr_wq(void) } } -int retcode = 0; -int cur_oerr = 0; - void sctp_timeout_handler(void *t) { @@ -1501,7 +1408,7 @@ sctp_timeout_handler(void *t) struct sctp_nets *net; struct sctp_timer *tmr; -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif @@ -1523,7 +1430,7 @@ sctp_timeout_handler(void *t) if (tmr->self != (void *)tmr) { /* * SCTP_PRINTF("Stale SCTP timer fired (%p), ignoring...\n", - * tmr); + * (void *)tmr); */ CURVNET_RESTORE(); return; @@ -1547,7 +1454,7 @@ sctp_timeout_handler(void *t) type = tmr->type; if (inp) { SCTP_INP_INCR_REF(inp); - if ((inp->sctp_socket == 0) && + if ((inp->sctp_socket == NULL) && ((tmr->type != SCTP_TIMER_TYPE_INPKILL) && (tmr->type != SCTP_TIMER_TYPE_INIT) && (tmr->type != SCTP_TIMER_TYPE_SEND) && @@ -1653,9 +1560,7 @@ sctp_timeout_handler(void *t) stcb->asoc.num_send_timers_up = 0; } SCTP_TCB_LOCK_ASSERT(stcb); - cur_oerr = stcb->asoc.overall_error_count; - retcode = sctp_t3rxt_timer(inp, stcb, net); - if (retcode) { + if (sctp_t3rxt_timer(inp, stcb, net)) { /* no need to unlock on tcb its gone */ goto out_decr; @@ -1666,8 +1571,7 @@ sctp_timeout_handler(void *t) #endif sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED); if ((stcb->asoc.num_send_timers_up == 0) && - (stcb->asoc.sent_queue_cnt > 0) - ) { + (stcb->asoc.sent_queue_cnt > 0)) { struct sctp_tmit_chunk *chk; /* @@ -1697,11 +1601,10 @@ sctp_timeout_handler(void *t) case SCTP_TIMER_TYPE_RECV: if ((stcb == NULL) || (inp == NULL)) { break; - } { - SCTP_STAT_INCR(sctps_timosack); - stcb->asoc.timosack++; - sctp_send_sack(stcb); } + SCTP_STAT_INCR(sctps_timosack); + stcb->asoc.timosack++; + sctp_send_sack(stcb, SCTP_SO_NOT_LOCKED); #ifdef SCTP_AUDITING_ENABLED sctp_auditing(4, inp, stcb, net); #endif @@ -1723,33 +1626,20 @@ sctp_timeout_handler(void *t) sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_SHUT_TMR, SCTP_SO_NOT_LOCKED); break; case SCTP_TIMER_TYPE_HEARTBEAT: - { - struct sctp_nets *lnet; - int cnt_of_unconf = 0; - - if ((stcb == NULL) || (inp == NULL)) { - break; - } - SCTP_STAT_INCR(sctps_timoheartbeat); - stcb->asoc.timoheartbeat++; - TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) { - if ((lnet->dest_state & SCTP_ADDR_UNCONFIRMED) && - (lnet->dest_state & SCTP_ADDR_REACHABLE)) { - cnt_of_unconf++; - } - } - if (cnt_of_unconf == 0) { - if (sctp_heartbeat_timer(inp, stcb, lnet, - cnt_of_unconf)) { - /* no need to unlock on tcb its gone */ - goto out_decr; - } - } + if ((stcb == NULL) || (inp == NULL) || (net == NULL)) { + break; + } + SCTP_STAT_INCR(sctps_timoheartbeat); + stcb->asoc.timoheartbeat++; + if (sctp_heartbeat_timer(inp, stcb, net)) { + /* no need to unlock on tcb its gone */ + goto out_decr; + } #ifdef SCTP_AUDITING_ENABLED - sctp_auditing(4, inp, stcb, lnet); + sctp_auditing(4, inp, stcb, net); #endif - sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, - stcb->sctp_ep, stcb, lnet); + if (!(net->dest_state & SCTP_ADDR_NOHB)) { + sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net); sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_HB_TMR, SCTP_SO_NOT_LOCKED); } break; @@ -1829,8 +1719,7 @@ sctp_timeout_handler(void *t) break; } SCTP_STAT_INCR(sctps_timoshutdownguard); - sctp_abort_an_association(inp, stcb, - SCTP_SHUTDOWN_GUARD_EXPIRES, NULL, SCTP_SO_NOT_LOCKED); + sctp_abort_an_association(inp, stcb, NULL, SCTP_SO_NOT_LOCKED); /* no need to unlock on tcb its gone */ goto out_decr; @@ -1845,14 +1734,6 @@ sctp_timeout_handler(void *t) SCTP_STAT_INCR(sctps_timostrmrst); sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_TMR, SCTP_SO_NOT_LOCKED); break; - case SCTP_TIMER_TYPE_EARLYFR: - /* Need to do FR of things for net */ - if ((stcb == NULL) || (inp == NULL)) { - break; - } - SCTP_STAT_INCR(sctps_timoearlyfr); - sctp_early_fr_timer(inp, stcb, net); - break; case SCTP_TIMER_TYPE_ASCONF: if ((stcb == NULL) || (inp == NULL)) { break; @@ -1892,7 +1773,7 @@ sctp_timeout_handler(void *t) /* Can we free it yet? */ SCTP_INP_DECR_REF(inp); sctp_timer_stop(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL, SCTP_FROM_SCTPUTIL + SCTP_LOC_1); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(inp); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); @@ -1901,7 +1782,7 @@ sctp_timeout_handler(void *t) atomic_subtract_int(&stcb->asoc.refcnt, 1); #endif (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_2); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif /* @@ -1929,7 +1810,7 @@ sctp_timeout_handler(void *t) SCTPDBG(SCTP_DEBUG_TIMER1, "sctp_timeout_handler:unknown timer %d\n", tmr->type); break; - }; + } #ifdef SCTP_AUDITING_ENABLED sctp_audit_log(0xF1, (uint8_t) tmr->type); if (inp) @@ -1963,14 +1844,12 @@ void sctp_timer_start(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets *net) { - int to_ticks; + uint32_t to_ticks; struct sctp_timer *tmr; if ((t_type != SCTP_TIMER_TYPE_ADDR_WQ) && (inp == NULL)) return; - to_ticks = 0; - tmr = NULL; if (stcb) { SCTP_TCB_LOCK_ASSERT(stcb); @@ -2050,71 +1929,38 @@ sctp_timer_start(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb, * though we use a different timer. We also add the HB timer * PLUS a random jitter. */ - if ((inp == NULL) || (stcb == NULL)) { + if ((inp == NULL) || (stcb == NULL) || (net == NULL)) { return; } else { uint32_t rndval; - uint8_t this_random; - int cnt_of_unconf = 0; - struct sctp_nets *lnet; - - TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) { - if ((lnet->dest_state & SCTP_ADDR_UNCONFIRMED) && - (lnet->dest_state & SCTP_ADDR_REACHABLE)) { - cnt_of_unconf++; - } - } - if (cnt_of_unconf) { - net = lnet = NULL; - (void)sctp_heartbeat_timer(inp, stcb, lnet, cnt_of_unconf); - } - if (stcb->asoc.hb_random_idx > 3) { - rndval = sctp_select_initial_TSN(&inp->sctp_ep); - memcpy(stcb->asoc.hb_random_values, &rndval, - sizeof(stcb->asoc.hb_random_values)); - stcb->asoc.hb_random_idx = 0; - } - this_random = stcb->asoc.hb_random_values[stcb->asoc.hb_random_idx]; - stcb->asoc.hb_random_idx++; - stcb->asoc.hb_ect_randombit = 0; - /* - * this_random will be 0 - 256 ms RTO is in ms. - */ - if ((stcb->asoc.hb_is_disabled) && - (cnt_of_unconf == 0)) { + uint32_t jitter; + + if ((net->dest_state & SCTP_ADDR_NOHB) && + !(net->dest_state & SCTP_ADDR_UNCONFIRMED)) { return; } - if (net) { - int delay; - - delay = stcb->asoc.heart_beat_delay; - TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) { - if ((lnet->dest_state & SCTP_ADDR_UNCONFIRMED) && - ((lnet->dest_state & SCTP_ADDR_OUT_OF_SCOPE) == 0) && - (lnet->dest_state & SCTP_ADDR_REACHABLE)) { - delay = 0; - } - } - if (net->RTO == 0) { - /* Never been checked */ - to_ticks = this_random + stcb->asoc.initial_rto + delay; - } else { - /* set rto_val to the ms */ - to_ticks = delay + net->RTO + this_random; - } + if (net->RTO == 0) { + to_ticks = stcb->asoc.initial_rto; } else { - if (cnt_of_unconf) { - to_ticks = this_random + stcb->asoc.initial_rto; - } else { - to_ticks = stcb->asoc.heart_beat_delay + this_random + stcb->asoc.initial_rto; - } + to_ticks = net->RTO; + } + rndval = sctp_select_initial_TSN(&inp->sctp_ep); + jitter = rndval % to_ticks; + if (jitter >= (to_ticks >> 1)) { + to_ticks = to_ticks + (jitter - (to_ticks >> 1)); + } else { + to_ticks = to_ticks - jitter; + } + if (!(net->dest_state & SCTP_ADDR_UNCONFIRMED) && + !(net->dest_state & SCTP_ADDR_PF)) { + to_ticks += net->heart_beat_delay; } /* * Now we must convert the to_ticks that are now in * ms to ticks. */ to_ticks = MSEC_TO_TICKS(to_ticks); - tmr = &stcb->asoc.hb_timer; + tmr = &net->hb_timer; } break; case SCTP_TIMER_TYPE_COOKIE: @@ -2174,6 +2020,9 @@ sctp_timer_start(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb, if (net == NULL) { return; } + if (net->dest_state & SCTP_ADDR_NO_PMTUD) { + return; + } to_ticks = inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_PMTU]; tmr = &net->pmtu_timer; break; @@ -2215,35 +2064,6 @@ sctp_timer_start(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb, } tmr = &stcb->asoc.strreset_timer; break; - - case SCTP_TIMER_TYPE_EARLYFR: - { - unsigned int msec; - - if ((stcb == NULL) || (net == NULL)) { - return; - } - if (net->flight_size > net->cwnd) { - /* no need to start */ - return; - } - SCTP_STAT_INCR(sctps_earlyfrstart); - if (net->lastsa == 0) { - /* Hmm no rtt estimate yet? */ - msec = stcb->asoc.initial_rto >> 2; - } else { - msec = ((net->lastsa >> 2) + net->lastsv) >> 1; - } - if (msec < SCTP_BASE_SYSCTL(sctp_early_fr_msec)) { - msec = SCTP_BASE_SYSCTL(sctp_early_fr_msec); - if (msec < SCTP_MINFR_MSEC_FLOOR) { - msec = SCTP_MINFR_MSEC_FLOOR; - } - } - to_ticks = MSEC_TO_TICKS(msec); - tmr = &net->fr_timer; - } - break; case SCTP_TIMER_TYPE_ASCONF: /* * Here the timer comes from the stcb but its value is from @@ -2285,10 +2105,10 @@ sctp_timer_start(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb, __FUNCTION__, t_type); return; break; - }; + } if ((to_ticks <= 0) || (tmr == NULL)) { SCTPDBG(SCTP_DEBUG_TIMER1, "%s: %d:software error to_ticks:%d tmr:%p not set ??\n", - __FUNCTION__, t_type, to_ticks, tmr); + __FUNCTION__, t_type, to_ticks, (void *)tmr); return; } if (SCTP_OS_TIMER_PENDING(&tmr->timer)) { @@ -2338,13 +2158,6 @@ sctp_timer_stop(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb, case SCTP_TIMER_TYPE_ADDR_WQ: tmr = &SCTP_BASE_INFO(addr_wq_timer); break; - case SCTP_TIMER_TYPE_EARLYFR: - if ((stcb == NULL) || (net == NULL)) { - return; - } - tmr = &net->fr_timer; - SCTP_STAT_INCR(sctps_earlyfrstop); - break; case SCTP_TIMER_TYPE_SEND: if ((stcb == NULL) || (net == NULL)) { return; @@ -2370,10 +2183,10 @@ sctp_timer_stop(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb, tmr = &net->rxt_timer; break; case SCTP_TIMER_TYPE_HEARTBEAT: - if (stcb == NULL) { + if ((stcb == NULL) || (net == NULL)) { return; } - tmr = &stcb->asoc.hb_timer; + tmr = &net->hb_timer; break; case SCTP_TIMER_TYPE_COOKIE: if ((stcb == NULL) || (net == NULL)) { @@ -2453,7 +2266,7 @@ sctp_timer_stop(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb, SCTPDBG(SCTP_DEBUG_TIMER1, "%s: Unknown timer type %d\n", __FUNCTION__, t_type); break; - }; + } if (tmr == NULL) { return; } @@ -2528,20 +2341,20 @@ sctp_mtu_size_reset(struct sctp_inpcb *inp, * given an association and starting time of the current RTT period return * RTO in number of msecs net should point to the current network */ + uint32_t sctp_calculate_rto(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_nets *net, struct timeval *told, - int safe) + int safe, int rtt_from_sack) { /*- * given an association and the starting time of the current RTT * period (in value1/value2) return RTO in number of msecs. */ - int calc_time = 0; - int o_calctime; - uint32_t new_rto = 0; + int32_t rtt; /* RTT in ms */ + uint32_t new_rto; int first_measure = 0; struct timeval now, then, *old; @@ -2560,81 +2373,72 @@ sctp_calculate_rto(struct sctp_tcb *stcb, /* 1. calculate new RTT */ /************************/ /* get the current time */ - (void)SCTP_GETTIME_TIMEVAL(&now); - /* compute the RTT value */ - if ((u_long)now.tv_sec > (u_long)old->tv_sec) { - calc_time = ((u_long)now.tv_sec - (u_long)old->tv_sec) * 1000; - if ((u_long)now.tv_usec > (u_long)old->tv_usec) { - calc_time += (((u_long)now.tv_usec - - (u_long)old->tv_usec) / 1000); - } else if ((u_long)now.tv_usec < (u_long)old->tv_usec) { - /* Borrow 1,000ms from current calculation */ - calc_time -= 1000; - /* Add in the slop over */ - calc_time += ((int)now.tv_usec / 1000); - /* Add in the pre-second ms's */ - calc_time += (((int)1000000 - (int)old->tv_usec) / 1000); - } - } else if ((u_long)now.tv_sec == (u_long)old->tv_sec) { - if ((u_long)now.tv_usec > (u_long)old->tv_usec) { - calc_time = ((u_long)now.tv_usec - - (u_long)old->tv_usec) / 1000; - } else if ((u_long)now.tv_usec < (u_long)old->tv_usec) { - /* impossible .. garbage in nothing out */ - goto calc_rto; - } else if ((u_long)now.tv_usec == (u_long)old->tv_usec) { - /* - * We have to have 1 usec :-D this must be the - * loopback. - */ - calc_time = 1; + if (stcb->asoc.use_precise_time) { + (void)SCTP_GETPTIME_TIMEVAL(&now); + } else { + (void)SCTP_GETTIME_TIMEVAL(&now); + } + timevalsub(&now, old); + /* store the current RTT in us */ + net->rtt = (uint64_t) 1000000 *(uint64_t) now.tv_sec + + (uint64_t) now.tv_usec; + + /* computer rtt in ms */ + rtt = net->rtt / 1000; + if ((asoc->cc_functions.sctp_rtt_calculated) && (rtt_from_sack == SCTP_RTT_FROM_DATA)) { + /* + * Tell the CC module that a new update has just occurred + * from a sack + */ + (*asoc->cc_functions.sctp_rtt_calculated) (stcb, net, &now); + } + /* + * Do we need to determine the lan? We do this only on sacks i.e. + * RTT being determined from data not non-data (HB/INIT->INITACK). + */ + if ((rtt_from_sack == SCTP_RTT_FROM_DATA) && + (net->lan_type == SCTP_LAN_UNKNOWN)) { + if (net->rtt > SCTP_LOCAL_LAN_RTT) { + net->lan_type = SCTP_LAN_INTERNET; } else { - /* impossible .. garbage in nothing out */ - goto calc_rto; + net->lan_type = SCTP_LAN_LOCAL; } - } else { - /* Clock wrapped? */ - goto calc_rto; } /***************************/ /* 2. update RTTVAR & SRTT */ /***************************/ - net->rtt = o_calctime = calc_time; - /* this is Van Jacobson's integer version */ + /*- + * Compute the scaled average lastsa and the + * scaled variance lastsv as described in van Jacobson + * Paper "Congestion Avoidance and Control", Annex A. + * + * (net->lastsa >> SCTP_RTT_SHIFT) is the srtt + * (net->lastsa >> SCTP_RTT_VAR_SHIFT) is the rttvar + */ if (net->RTO_measured) { - calc_time -= (net->lastsa >> SCTP_RTT_SHIFT); /* take away 1/8th when - * shift=3 */ + rtt -= (net->lastsa >> SCTP_RTT_SHIFT); + net->lastsa += rtt; + if (rtt < 0) { + rtt = -rtt; + } + rtt -= (net->lastsv >> SCTP_RTT_VAR_SHIFT); + net->lastsv += rtt; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_RTTVAR_LOGGING_ENABLE) { rto_logging(net, SCTP_LOG_RTTVAR); } - net->prev_rtt = o_calctime; - net->lastsa += calc_time; /* add 7/8th into sa when - * shift=3 */ - if (calc_time < 0) { - calc_time = -calc_time; - } - calc_time -= (net->lastsv >> SCTP_RTT_VAR_SHIFT); /* take away 1/4 when - * VAR shift=2 */ - net->lastsv += calc_time; - if (net->lastsv == 0) { - net->lastsv = SCTP_CLOCK_GRANULARITY; - } } else { /* First RTO measurment */ net->RTO_measured = 1; - net->lastsa = calc_time << SCTP_RTT_SHIFT; /* Multiply by 8 when - * shift=3 */ - net->lastsv = calc_time; - if (net->lastsv == 0) { - net->lastsv = SCTP_CLOCK_GRANULARITY; - } first_measure = 1; - net->prev_rtt = o_calctime; + net->lastsa = rtt << SCTP_RTT_SHIFT; + net->lastsv = (rtt / 2) << SCTP_RTT_VAR_SHIFT; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_RTTVAR_LOGGING_ENABLE) { rto_logging(net, SCTP_LOG_INITIAL_RTT); } } -calc_rto: + if (net->lastsv == 0) { + net->lastsv = SCTP_CLOCK_GRANULARITY; + } new_rto = (net->lastsa >> SCTP_RTT_SHIFT) + net->lastsv; if ((new_rto > SCTP_SAT_NETWORK_MIN) && (stcb->asoc.sat_network_lockout == 0)) { @@ -2742,8 +2546,8 @@ sctp_add_pad_tombuf(struct mbuf *m, int padlen) tmp = sctp_get_mbuf_for_msg(padlen, 0, M_DONTWAIT, 1, MT_DATA); if (tmp == NULL) { /* Out of space GAK! we are in big trouble. */ - SCTP_LTRACE_ERR_RET_PKT(m, NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL); - return (ENOSPC); + SCTP_LTRACE_ERR_RET_PKT(m, NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS); + return (ENOBUFS); } /* setup and insert in middle */ SCTP_BUF_LEN(tmp) = padlen; @@ -2765,15 +2569,13 @@ sctp_pad_lastmbuf(struct mbuf *m, int padval, struct mbuf *last_mbuf) /* find the last mbuf in chain and pad it */ struct mbuf *m_at; - m_at = m; if (last_mbuf) { return (sctp_add_pad_tombuf(last_mbuf, padval)); } else { - while (m_at) { + for (m_at = m; m_at; m_at = SCTP_BUF_NEXT(m_at)) { if (SCTP_BUF_NEXT(m_at) == NULL) { return (sctp_add_pad_tombuf(m_at, padval)); } - m_at = SCTP_BUF_NEXT(m_at); } } SCTP_LTRACE_ERR_RET_PKT(m, NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, EFAULT); @@ -2781,8 +2583,8 @@ sctp_pad_lastmbuf(struct mbuf *m, int padval, struct mbuf *last_mbuf) } static void -sctp_notify_assoc_change(uint32_t event, struct sctp_tcb *stcb, - uint32_t error, void *data, int so_locked +sctp_notify_assoc_change(uint16_t state, struct sctp_tcb *stcb, + uint16_t error, struct sctp_abort_chunk *abort, uint8_t from_peer, int so_locked #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) SCTP_UNUSED #endif @@ -2791,111 +2593,134 @@ sctp_notify_assoc_change(uint32_t event, struct sctp_tcb *stcb, struct mbuf *m_notify; struct sctp_assoc_change *sac; struct sctp_queued_to_read *control; + size_t notif_len, abort_len; + unsigned int i; -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif - /* - * For TCP model AND UDP connected sockets we will send an error up - * when an ABORT comes in. - */ - if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || - (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) && - ((event == SCTP_COMM_LOST) || (event == SCTP_CANT_STR_ASSOC))) { - if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) { - SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNREFUSED); - stcb->sctp_socket->so_error = ECONNREFUSED; + if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVASSOCEVNT)) { + notif_len = sizeof(struct sctp_assoc_change); + if (abort != NULL) { + abort_len = htons(abort->ch.chunk_length); } else { - SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNRESET); - stcb->sctp_socket->so_error = ECONNRESET; + abort_len = 0; + } + if ((state == SCTP_COMM_UP) || (state == SCTP_RESTART)) { + notif_len += SCTP_ASSOC_SUPPORTS_MAX; + } else if ((state == SCTP_COMM_LOST) || (state == SCTP_CANT_STR_ASSOC)) { + notif_len += abort_len; + } + m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_DONTWAIT, 1, MT_DATA); + if (m_notify == NULL) { + /* Retry with smaller value. */ + notif_len = sizeof(struct sctp_assoc_change); + m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_DONTWAIT, 1, MT_DATA); + if (m_notify == NULL) { + goto set_error; + } } - /* Wake ANY sleepers */ -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) - so = SCTP_INP_SO(stcb->sctp_ep); - if (!so_locked) { - atomic_add_int(&stcb->asoc.refcnt, 1); - SCTP_TCB_UNLOCK(stcb); - SCTP_SOCKET_LOCK(so, 1); - SCTP_TCB_LOCK(stcb); - atomic_subtract_int(&stcb->asoc.refcnt, 1); - if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) { - SCTP_SOCKET_UNLOCK(so, 1); - return; + SCTP_BUF_NEXT(m_notify) = NULL; + sac = mtod(m_notify, struct sctp_assoc_change *); + sac->sac_type = SCTP_ASSOC_CHANGE; + sac->sac_flags = 0; + sac->sac_length = sizeof(struct sctp_assoc_change); + sac->sac_state = state; + sac->sac_error = error; + /* XXX verify these stream counts */ + sac->sac_outbound_streams = stcb->asoc.streamoutcnt; + sac->sac_inbound_streams = stcb->asoc.streamincnt; + sac->sac_assoc_id = sctp_get_associd(stcb); + if (notif_len > sizeof(struct sctp_assoc_change)) { + if ((state == SCTP_COMM_UP) || (state == SCTP_RESTART)) { + i = 0; + if (stcb->asoc.peer_supports_prsctp) { + sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_PR; + } + if (stcb->asoc.peer_supports_auth) { + sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_AUTH; + } + if (stcb->asoc.peer_supports_asconf) { + sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_ASCONF; + } + sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_MULTIBUF; + if (stcb->asoc.peer_supports_strreset) { + sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_RE_CONFIG; + } + sac->sac_length += i; + } else if ((state == SCTP_COMM_LOST) || (state == SCTP_CANT_STR_ASSOC)) { + memcpy(sac->sac_info, abort, abort_len); + sac->sac_length += abort_len; } } -#endif - socantrcvmore(stcb->sctp_socket); - sorwakeup(stcb->sctp_socket); - sowwakeup(stcb->sctp_socket); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) - if (!so_locked) { - SCTP_SOCKET_UNLOCK(so, 1); + SCTP_BUF_LEN(m_notify) = sac->sac_length; + control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination, + 0, 0, stcb->asoc.context, 0, 0, 0, + m_notify); + if (control != NULL) { + control->length = SCTP_BUF_LEN(m_notify); + /* not that we need this */ + control->tail_mbuf = m_notify; + control->spec_flags = M_NOTIFICATION; + sctp_add_to_readq(stcb->sctp_ep, stcb, + control, + &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, + so_locked); + } else { + sctp_m_freem(m_notify); } -#endif - } - if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVASSOCEVNT)) { - /* event not enabled */ - return; - } - m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_assoc_change), 0, M_DONTWAIT, 1, MT_DATA); - if (m_notify == NULL) - /* no space left */ - return; - SCTP_BUF_LEN(m_notify) = 0; - - sac = mtod(m_notify, struct sctp_assoc_change *); - sac->sac_type = SCTP_ASSOC_CHANGE; - sac->sac_flags = 0; - sac->sac_length = sizeof(struct sctp_assoc_change); - sac->sac_state = event; - sac->sac_error = error; - /* XXX verify these stream counts */ - sac->sac_outbound_streams = stcb->asoc.streamoutcnt; - sac->sac_inbound_streams = stcb->asoc.streamincnt; - sac->sac_assoc_id = sctp_get_associd(stcb); - SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_assoc_change); - SCTP_BUF_NEXT(m_notify) = NULL; - control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination, - 0, 0, 0, 0, 0, 0, - m_notify); - if (control == NULL) { - /* no memory */ - sctp_m_freem(m_notify); - return; } - control->length = SCTP_BUF_LEN(m_notify); - /* not that we need this */ - control->tail_mbuf = m_notify; - control->spec_flags = M_NOTIFICATION; - sctp_add_to_readq(stcb->sctp_ep, stcb, - control, - &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, - so_locked); - if (event == SCTP_COMM_LOST) { - /* Wake up any sleeper */ -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) - so = SCTP_INP_SO(stcb->sctp_ep); - if (!so_locked) { - atomic_add_int(&stcb->asoc.refcnt, 1); - SCTP_TCB_UNLOCK(stcb); - SCTP_SOCKET_LOCK(so, 1); - SCTP_TCB_LOCK(stcb); - atomic_subtract_int(&stcb->asoc.refcnt, 1); - if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) { - SCTP_SOCKET_UNLOCK(so, 1); - return; + /* + * For 1-to-1 style sockets, we send up and error when an ABORT + * comes in. + */ +set_error: + if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) && + ((state == SCTP_COMM_LOST) || (state == SCTP_CANT_STR_ASSOC))) { + SOCK_LOCK(stcb->sctp_socket); + if (from_peer) { + if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) { + SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNREFUSED); + stcb->sctp_socket->so_error = ECONNREFUSED; + } else { + SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNRESET); + stcb->sctp_socket->so_error = ECONNRESET; } + } else { + SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNABORTED); + stcb->sctp_socket->so_error = ECONNABORTED; } -#endif - sctp_sowwakeup(stcb->sctp_ep, stcb->sctp_socket); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) - if (!so_locked) { + } + /* Wake ANY sleepers */ +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) + so = SCTP_INP_SO(stcb->sctp_ep); + if (!so_locked) { + atomic_add_int(&stcb->asoc.refcnt, 1); + SCTP_TCB_UNLOCK(stcb); + SCTP_SOCKET_LOCK(so, 1); + SCTP_TCB_LOCK(stcb); + atomic_subtract_int(&stcb->asoc.refcnt, 1); + if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) { SCTP_SOCKET_UNLOCK(so, 1); + return; } + } #endif + if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) && + ((state == SCTP_COMM_LOST) || (state == SCTP_CANT_STR_ASSOC))) { + socantrcvmore_locked(stcb->sctp_socket); + } + sorwakeup(stcb->sctp_socket); + sowwakeup(stcb->sctp_socket); +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) + if (!so_locked) { + SCTP_SOCKET_UNLOCK(so, 1); } +#endif } static void @@ -2906,7 +2731,8 @@ sctp_notify_peer_addr_change(struct sctp_tcb *stcb, uint32_t state, struct sctp_paddr_change *spc; struct sctp_queued_to_read *control; - if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVPADDREVNT)) { + if ((stcb == NULL) || + sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVPADDREVNT)) { /* event not enabled */ return; } @@ -2919,9 +2745,11 @@ sctp_notify_peer_addr_change(struct sctp_tcb *stcb, uint32_t state, spc->spc_flags = 0; spc->spc_length = sizeof(struct sctp_paddr_change); switch (sa->sa_family) { +#ifdef INET case AF_INET: memcpy(&spc->spc_aaddr, sa, sizeof(struct sockaddr_in)); break; +#endif #ifdef INET6 case AF_INET6: { @@ -2955,7 +2783,7 @@ sctp_notify_peer_addr_change(struct sctp_tcb *stcb, uint32_t state, /* append to socket */ control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination, - 0, 0, 0, 0, 0, 0, + 0, 0, stcb->asoc.context, 0, 0, 0, m_notify); if (control == NULL) { /* no memory */ @@ -2975,7 +2803,7 @@ sctp_notify_peer_addr_change(struct sctp_tcb *stcb, uint32_t state, static void -sctp_notify_send_failed(struct sctp_tcb *stcb, uint32_t error, +sctp_notify_send_failed(struct sctp_tcb *stcb, uint8_t sent, uint32_t error, struct sctp_tmit_chunk *chk, int so_locked #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) SCTP_UNUSED @@ -2984,38 +2812,68 @@ sctp_notify_send_failed(struct sctp_tcb *stcb, uint32_t error, { struct mbuf *m_notify; struct sctp_send_failed *ssf; + struct sctp_send_failed_event *ssfe; struct sctp_queued_to_read *control; int length; - if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVSENDFAILEVNT)) { + if ((stcb == NULL) || + (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT) && + sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT))) { /* event not enabled */ return; } - m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_send_failed), 0, M_DONTWAIT, 1, MT_DATA); + if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT)) { + length = sizeof(struct sctp_send_failed_event); + } else { + length = sizeof(struct sctp_send_failed); + } + m_notify = sctp_get_mbuf_for_msg(length, 0, M_DONTWAIT, 1, MT_DATA); if (m_notify == NULL) /* no space left */ return; - length = sizeof(struct sctp_send_failed) + chk->send_size; + length += chk->send_size; length -= sizeof(struct sctp_data_chunk); SCTP_BUF_LEN(m_notify) = 0; - ssf = mtod(m_notify, struct sctp_send_failed *); - ssf->ssf_type = SCTP_SEND_FAILED; - if (error == SCTP_NOTIFY_DATAGRAM_UNSENT) - ssf->ssf_flags = SCTP_DATA_UNSENT; - else - ssf->ssf_flags = SCTP_DATA_SENT; - ssf->ssf_length = length; - ssf->ssf_error = error; - /* not exactly what the user sent in, but should be close :) */ - bzero(&ssf->ssf_info, sizeof(ssf->ssf_info)); - ssf->ssf_info.sinfo_stream = chk->rec.data.stream_number; - ssf->ssf_info.sinfo_ssn = chk->rec.data.stream_seq; - ssf->ssf_info.sinfo_flags = chk->rec.data.rcv_flags; - ssf->ssf_info.sinfo_ppid = chk->rec.data.payloadtype; - ssf->ssf_info.sinfo_context = chk->rec.data.context; - ssf->ssf_info.sinfo_assoc_id = sctp_get_associd(stcb); - ssf->ssf_assoc_id = sctp_get_associd(stcb); - + if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT)) { + ssfe = mtod(m_notify, struct sctp_send_failed_event *); + ssfe->ssfe_type = SCTP_SEND_FAILED_EVENT; + if (sent) { + ssfe->ssfe_flags = SCTP_DATA_SENT; + } else { + ssfe->ssfe_flags = SCTP_DATA_UNSENT; + } + ssfe->ssfe_length = length; + ssfe->ssfe_error = error; + /* not exactly what the user sent in, but should be close :) */ + bzero(&ssfe->ssfe_info, sizeof(ssfe->ssfe_info)); + ssfe->ssfe_info.snd_sid = chk->rec.data.stream_number; + ssfe->ssfe_info.snd_flags = chk->rec.data.rcv_flags; + ssfe->ssfe_info.snd_ppid = chk->rec.data.payloadtype; + ssfe->ssfe_info.snd_context = chk->rec.data.context; + ssfe->ssfe_info.snd_assoc_id = sctp_get_associd(stcb); + ssfe->ssfe_assoc_id = sctp_get_associd(stcb); + SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed_event); + } else { + ssf = mtod(m_notify, struct sctp_send_failed *); + ssf->ssf_type = SCTP_SEND_FAILED; + if (sent) { + ssf->ssf_flags = SCTP_DATA_SENT; + } else { + ssf->ssf_flags = SCTP_DATA_UNSENT; + } + ssf->ssf_length = length; + ssf->ssf_error = error; + /* not exactly what the user sent in, but should be close :) */ + bzero(&ssf->ssf_info, sizeof(ssf->ssf_info)); + ssf->ssf_info.sinfo_stream = chk->rec.data.stream_number; + ssf->ssf_info.sinfo_ssn = chk->rec.data.stream_seq; + ssf->ssf_info.sinfo_flags = chk->rec.data.rcv_flags; + ssf->ssf_info.sinfo_ppid = chk->rec.data.payloadtype; + ssf->ssf_info.sinfo_context = chk->rec.data.context; + ssf->ssf_info.sinfo_assoc_id = sctp_get_associd(stcb); + ssf->ssf_assoc_id = sctp_get_associd(stcb); + SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed); + } if (chk->data) { /* * trim off the sctp chunk header(it should be there) @@ -3027,7 +2885,6 @@ sctp_notify_send_failed(struct sctp_tcb *stcb, uint32_t error, } } SCTP_BUF_NEXT(m_notify) = chk->data; - SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed); /* Steal off the mbuf */ chk->data = NULL; /* @@ -3041,7 +2898,7 @@ sctp_notify_send_failed(struct sctp_tcb *stcb, uint32_t error, } /* append to socket */ control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination, - 0, 0, 0, 0, 0, 0, + 0, 0, stcb->asoc.context, 0, 0, 0, m_notify); if (control == NULL) { /* no memory */ @@ -3067,42 +2924,69 @@ sctp_notify_send_failed2(struct sctp_tcb *stcb, uint32_t error, { struct mbuf *m_notify; struct sctp_send_failed *ssf; + struct sctp_send_failed_event *ssfe; struct sctp_queued_to_read *control; int length; - if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVSENDFAILEVNT)) { + if ((stcb == NULL) || + (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT) && + sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT))) { /* event not enabled */ return; } - length = sizeof(struct sctp_send_failed) + sp->length; - m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_send_failed), 0, M_DONTWAIT, 1, MT_DATA); - if (m_notify == NULL) + if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT)) { + length = sizeof(struct sctp_send_failed_event); + } else { + length = sizeof(struct sctp_send_failed); + } + m_notify = sctp_get_mbuf_for_msg(length, 0, M_DONTWAIT, 1, MT_DATA); + if (m_notify == NULL) { /* no space left */ return; + } + length += sp->length; SCTP_BUF_LEN(m_notify) = 0; - ssf = mtod(m_notify, struct sctp_send_failed *); - ssf->ssf_type = SCTP_SEND_FAILED; - if (error == SCTP_NOTIFY_DATAGRAM_UNSENT) - ssf->ssf_flags = SCTP_DATA_UNSENT; - else - ssf->ssf_flags = SCTP_DATA_SENT; - ssf->ssf_length = length; - ssf->ssf_error = error; - /* not exactly what the user sent in, but should be close :) */ - bzero(&ssf->ssf_info, sizeof(ssf->ssf_info)); - ssf->ssf_info.sinfo_stream = sp->stream; - ssf->ssf_info.sinfo_ssn = sp->strseq; - if (sp->some_taken) { - ssf->ssf_info.sinfo_flags = SCTP_DATA_LAST_FRAG; + if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT)) { + ssfe = mtod(m_notify, struct sctp_send_failed_event *); + ssfe->ssfe_type = SCTP_SEND_FAILED_EVENT; + ssfe->ssfe_flags = SCTP_DATA_UNSENT; + ssfe->ssfe_length = length; + ssfe->ssfe_error = error; + /* not exactly what the user sent in, but should be close :) */ + bzero(&ssfe->ssfe_info, sizeof(ssfe->ssfe_info)); + ssfe->ssfe_info.snd_sid = sp->stream; + if (sp->some_taken) { + ssfe->ssfe_info.snd_flags = SCTP_DATA_LAST_FRAG; + } else { + ssfe->ssfe_info.snd_flags = SCTP_DATA_NOT_FRAG; + } + ssfe->ssfe_info.snd_ppid = sp->ppid; + ssfe->ssfe_info.snd_context = sp->context; + ssfe->ssfe_info.snd_assoc_id = sctp_get_associd(stcb); + ssfe->ssfe_assoc_id = sctp_get_associd(stcb); + SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed_event); } else { - ssf->ssf_info.sinfo_flags = SCTP_DATA_NOT_FRAG; + ssf = mtod(m_notify, struct sctp_send_failed *); + ssf->ssf_type = SCTP_SEND_FAILED; + ssf->ssf_flags = SCTP_DATA_UNSENT; + ssf->ssf_length = length; + ssf->ssf_error = error; + /* not exactly what the user sent in, but should be close :) */ + bzero(&ssf->ssf_info, sizeof(ssf->ssf_info)); + ssf->ssf_info.sinfo_stream = sp->stream; + ssf->ssf_info.sinfo_ssn = 0; + if (sp->some_taken) { + ssf->ssf_info.sinfo_flags = SCTP_DATA_LAST_FRAG; + } else { + ssf->ssf_info.sinfo_flags = SCTP_DATA_NOT_FRAG; + } + ssf->ssf_info.sinfo_ppid = sp->ppid; + ssf->ssf_info.sinfo_context = sp->context; + ssf->ssf_info.sinfo_assoc_id = sctp_get_associd(stcb); + ssf->ssf_assoc_id = sctp_get_associd(stcb); + SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed); } - ssf->ssf_info.sinfo_ppid = sp->ppid; - ssf->ssf_info.sinfo_context = sp->context; - ssf->ssf_info.sinfo_assoc_id = sctp_get_associd(stcb); - ssf->ssf_assoc_id = sctp_get_associd(stcb); SCTP_BUF_NEXT(m_notify) = sp->data; - SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed); /* Steal off the mbuf */ sp->data = NULL; @@ -3117,7 +3001,7 @@ sctp_notify_send_failed2(struct sctp_tcb *stcb, uint32_t error, } /* append to socket */ control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination, - 0, 0, 0, 0, 0, 0, + 0, 0, stcb->asoc.context, 0, 0, 0, m_notify); if (control == NULL) { /* no memory */ @@ -3133,14 +3017,14 @@ sctp_notify_send_failed2(struct sctp_tcb *stcb, uint32_t error, static void -sctp_notify_adaptation_layer(struct sctp_tcb *stcb, - uint32_t error) +sctp_notify_adaptation_layer(struct sctp_tcb *stcb) { struct mbuf *m_notify; struct sctp_adaptation_event *sai; struct sctp_queued_to_read *control; - if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_ADAPTATIONEVNT)) { + if ((stcb == NULL) || + sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_ADAPTATIONEVNT)) { /* event not enabled */ return; } @@ -3161,7 +3045,7 @@ sctp_notify_adaptation_layer(struct sctp_tcb *stcb, /* append to socket */ control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination, - 0, 0, 0, 0, 0, 0, + 0, 0, stcb->asoc.context, 0, 0, 0, m_notify); if (control == NULL) { /* no memory */ @@ -3191,7 +3075,8 @@ sctp_notify_partial_delivery_indication(struct sctp_tcb *stcb, uint32_t error, struct sctp_queued_to_read *control; struct sockbuf *sb; - if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_PDAPIEVNT)) { + if ((stcb == NULL) || + sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_PDAPIEVNT)) { /* event not enabled */ return; } @@ -3215,7 +3100,7 @@ sctp_notify_partial_delivery_indication(struct sctp_tcb *stcb, uint32_t error, SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_pdapi_event); SCTP_BUF_NEXT(m_notify) = NULL; control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination, - 0, 0, 0, 0, 0, 0, + 0, 0, stcb->asoc.context, 0, 0, 0, m_notify); if (control == NULL) { /* no memory */ @@ -3246,7 +3131,7 @@ sctp_notify_partial_delivery_indication(struct sctp_tcb *stcb, uint32_t error, } if (stcb->sctp_ep && stcb->sctp_socket) { /* This should always be the case */ -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; so = SCTP_INP_SO(stcb->sctp_ep); @@ -3263,7 +3148,7 @@ sctp_notify_partial_delivery_indication(struct sctp_tcb *stcb, uint32_t error, } #endif sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) if (!so_locked) { SCTP_SOCKET_UNLOCK(so, 1); } @@ -3285,7 +3170,7 @@ sctp_notify_shutdown_event(struct sctp_tcb *stcb) if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { /* mark socket closed for read/write and wakeup! */ -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; so = SCTP_INP_SO(stcb->sctp_ep); @@ -3300,11 +3185,11 @@ sctp_notify_shutdown_event(struct sctp_tcb *stcb) } #endif socantsendmore(stcb->sctp_socket); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } - if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT)) { + if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT)) { /* event not enabled */ return; } @@ -3323,7 +3208,7 @@ sctp_notify_shutdown_event(struct sctp_tcb *stcb) /* append to socket */ control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination, - 0, 0, 0, 0, 0, 0, + 0, 0, stcb->asoc.context, 0, 0, 0, m_notify); if (control == NULL) { /* no memory */ @@ -3351,7 +3236,8 @@ sctp_notify_sender_dry_event(struct sctp_tcb *stcb, struct sctp_sender_dry_event *event; struct sctp_queued_to_read *control; - if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_DRYEVNT)) { + if ((stcb == NULL) || + sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_DRYEVNT)) { /* event not enabled */ return; } @@ -3372,7 +3258,8 @@ sctp_notify_sender_dry_event(struct sctp_tcb *stcb, /* append to socket */ control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination, - 0, 0, 0, 0, 0, 0, m_notify); + 0, 0, stcb->asoc.context, 0, 0, 0, + m_notify); if (control == NULL) { /* no memory */ sctp_m_freem(m_notify); @@ -3387,36 +3274,99 @@ sctp_notify_sender_dry_event(struct sctp_tcb *stcb, } -static void -sctp_notify_stream_reset_add(struct sctp_tcb *stcb, int number_entries, int flag) +void +sctp_notify_stream_reset_add(struct sctp_tcb *stcb, uint16_t numberin, uint16_t numberout, int flag) { struct mbuf *m_notify; struct sctp_queued_to_read *control; - struct sctp_stream_reset_event *strreset; + struct sctp_stream_change_event *stradd; int len; - if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_STREAM_RESETEVNT)) { + if ((stcb == NULL) || + (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_STREAM_CHANGEEVNT))) { /* event not enabled */ return; } + if ((stcb->asoc.peer_req_out) && flag) { + /* Peer made the request, don't tell the local user */ + stcb->asoc.peer_req_out = 0; + return; + } + stcb->asoc.peer_req_out = 0; m_notify = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA); if (m_notify == NULL) /* no space left */ return; SCTP_BUF_LEN(m_notify) = 0; - len = sizeof(struct sctp_stream_reset_event) + (number_entries * sizeof(uint16_t)); + len = sizeof(struct sctp_stream_change_event); if (len > M_TRAILINGSPACE(m_notify)) { /* never enough room */ sctp_m_freem(m_notify); return; } - strreset = mtod(m_notify, struct sctp_stream_reset_event *); - strreset->strreset_type = SCTP_STREAM_RESET_EVENT; - strreset->strreset_flags = SCTP_STRRESET_ADD_STREAM | flag; - strreset->strreset_length = len; - strreset->strreset_assoc_id = sctp_get_associd(stcb); - strreset->strreset_list[0] = number_entries; + stradd = mtod(m_notify, struct sctp_stream_change_event *); + stradd->strchange_type = SCTP_STREAM_CHANGE_EVENT; + stradd->strchange_flags = flag; + stradd->strchange_length = len; + stradd->strchange_assoc_id = sctp_get_associd(stcb); + stradd->strchange_instrms = numberin; + stradd->strchange_outstrms = numberout; + SCTP_BUF_LEN(m_notify) = len; + SCTP_BUF_NEXT(m_notify) = NULL; + if (sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv) < SCTP_BUF_LEN(m_notify)) { + /* no space */ + sctp_m_freem(m_notify); + return; + } + /* append to socket */ + control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination, + 0, 0, stcb->asoc.context, 0, 0, 0, + m_notify); + if (control == NULL) { + /* no memory */ + sctp_m_freem(m_notify); + return; + } + control->spec_flags = M_NOTIFICATION; + control->length = SCTP_BUF_LEN(m_notify); + /* not that we need this */ + control->tail_mbuf = m_notify; + sctp_add_to_readq(stcb->sctp_ep, stcb, + control, + &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED); +} + +void +sctp_notify_stream_reset_tsn(struct sctp_tcb *stcb, uint32_t sending_tsn, uint32_t recv_tsn, int flag) +{ + struct mbuf *m_notify; + struct sctp_queued_to_read *control; + struct sctp_assoc_reset_event *strasoc; + int len; + if ((stcb == NULL) || + (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_ASSOC_RESETEVNT))) { + /* event not enabled */ + return; + } + m_notify = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA); + if (m_notify == NULL) + /* no space left */ + return; + SCTP_BUF_LEN(m_notify) = 0; + len = sizeof(struct sctp_assoc_reset_event); + if (len > M_TRAILINGSPACE(m_notify)) { + /* never enough room */ + sctp_m_freem(m_notify); + return; + } + strasoc = mtod(m_notify, struct sctp_assoc_reset_event *); + strasoc->assocreset_type = SCTP_ASSOC_RESET_EVENT; + strasoc->assocreset_flags = flag; + strasoc->assocreset_length = len; + strasoc->assocreset_assoc_id = sctp_get_associd(stcb); + strasoc->assocreset_local_tsn = sending_tsn; + strasoc->assocreset_remote_tsn = recv_tsn; SCTP_BUF_LEN(m_notify) = len; SCTP_BUF_NEXT(m_notify) = NULL; if (sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv) < SCTP_BUF_LEN(m_notify)) { @@ -3426,7 +3376,7 @@ sctp_notify_stream_reset_add(struct sctp_tcb *stcb, int number_entries, int flag } /* append to socket */ control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination, - 0, 0, 0, 0, 0, 0, + 0, 0, stcb->asoc.context, 0, 0, 0, m_notify); if (control == NULL) { /* no memory */ @@ -3443,6 +3393,7 @@ sctp_notify_stream_reset_add(struct sctp_tcb *stcb, int number_entries, int flag } + static void sctp_notify_stream_reset(struct sctp_tcb *stcb, int number_entries, uint16_t * list, int flag) @@ -3452,7 +3403,8 @@ sctp_notify_stream_reset(struct sctp_tcb *stcb, struct sctp_stream_reset_event *strreset; int len; - if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_STREAM_RESETEVNT)) { + if ((stcb == NULL) || + (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_STREAM_RESETEVNT))) { /* event not enabled */ return; } @@ -3469,18 +3421,14 @@ sctp_notify_stream_reset(struct sctp_tcb *stcb, } strreset = mtod(m_notify, struct sctp_stream_reset_event *); strreset->strreset_type = SCTP_STREAM_RESET_EVENT; - if (number_entries == 0) { - strreset->strreset_flags = flag | SCTP_STRRESET_ALL_STREAMS; - } else { - strreset->strreset_flags = flag | SCTP_STRRESET_STREAM_LIST; - } + strreset->strreset_flags = flag; strreset->strreset_length = len; strreset->strreset_assoc_id = sctp_get_associd(stcb); if (number_entries) { int i; for (i = 0; i < number_entries; i++) { - strreset->strreset_list[i] = ntohs(list[i]); + strreset->strreset_stream_list[i] = ntohs(list[i]); } } SCTP_BUF_LEN(m_notify) = len; @@ -3492,7 +3440,7 @@ sctp_notify_stream_reset(struct sctp_tcb *stcb, } /* append to socket */ control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination, - 0, 0, 0, 0, 0, 0, + 0, 0, stcb->asoc.context, 0, 0, 0, m_notify); if (control == NULL) { /* no memory */ @@ -3509,6 +3457,63 @@ sctp_notify_stream_reset(struct sctp_tcb *stcb, } +static void +sctp_notify_remote_error(struct sctp_tcb *stcb, uint16_t error, struct sctp_error_chunk *chunk) +{ + struct mbuf *m_notify; + struct sctp_remote_error *sre; + struct sctp_queued_to_read *control; + size_t notif_len, chunk_len; + + if ((stcb == NULL) || + sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVPEERERR)) { + return; + } + if (chunk != NULL) { + chunk_len = htons(chunk->ch.chunk_length); + } else { + chunk_len = 0; + } + notif_len = sizeof(struct sctp_remote_error) + chunk_len; + m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_DONTWAIT, 1, MT_DATA); + if (m_notify == NULL) { + /* Retry with smaller value. */ + notif_len = sizeof(struct sctp_remote_error); + m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_DONTWAIT, 1, MT_DATA); + if (m_notify == NULL) { + return; + } + } + SCTP_BUF_NEXT(m_notify) = NULL; + sre = mtod(m_notify, struct sctp_remote_error *); + sre->sre_type = SCTP_REMOTE_ERROR; + sre->sre_flags = 0; + sre->sre_length = sizeof(struct sctp_remote_error); + sre->sre_error = error; + sre->sre_assoc_id = sctp_get_associd(stcb); + if (notif_len > sizeof(struct sctp_remote_error)) { + memcpy(sre->sre_data, chunk, chunk_len); + sre->sre_length += chunk_len; + } + SCTP_BUF_LEN(m_notify) = sre->sre_length; + control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination, + 0, 0, stcb->asoc.context, 0, 0, 0, + m_notify); + if (control != NULL) { + control->length = SCTP_BUF_LEN(m_notify); + /* not that we need this */ + control->tail_mbuf = m_notify; + control->spec_flags = M_NOTIFICATION; + sctp_add_to_readq(stcb->sctp_ep, stcb, + control, + &stcb->sctp_socket->so_rcv, 1, + SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED); + } else { + sctp_m_freem(m_notify); + } +} + + void sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb, uint32_t error, void *data, int so_locked @@ -3539,11 +3544,11 @@ sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb, switch (notification) { case SCTP_NOTIFY_ASSOC_UP: if (stcb->asoc.assoc_up_sent == 0) { - sctp_notify_assoc_change(SCTP_COMM_UP, stcb, error, NULL, so_locked); + sctp_notify_assoc_change(SCTP_COMM_UP, stcb, error, NULL, 0, so_locked); stcb->asoc.assoc_up_sent = 1; } if (stcb->asoc.adaptation_needed && (stcb->asoc.adaptation_sent == 0)) { - sctp_notify_adaptation_layer(stcb, error); + sctp_notify_adaptation_layer(stcb); } if (stcb->asoc.peer_supports_auth == 0) { sctp_ulp_notify(SCTP_NOTIFY_NO_PEER_AUTH, stcb, 0, @@ -3551,7 +3556,7 @@ sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb, } break; case SCTP_NOTIFY_ASSOC_DOWN: - sctp_notify_assoc_change(SCTP_SHUTDOWN_COMP, stcb, error, NULL, so_locked); + sctp_notify_assoc_change(SCTP_SHUTDOWN_COMP, stcb, error, NULL, 0, so_locked); break; case SCTP_NOTIFY_INTERFACE_DOWN: { @@ -3584,8 +3589,12 @@ sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb, sctp_notify_send_failed2(stcb, error, (struct sctp_stream_queue_pending *)data, so_locked); break; - case SCTP_NOTIFY_DG_FAIL: - sctp_notify_send_failed(stcb, error, + case SCTP_NOTIFY_SENT_DG_FAIL: + sctp_notify_send_failed(stcb, 1, error, + (struct sctp_tmit_chunk *)data, so_locked); + break; + case SCTP_NOTIFY_UNSENT_DG_FAIL: + sctp_notify_send_failed(stcb, 0, error, (struct sctp_tmit_chunk *)data, so_locked); break; case SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION: @@ -3597,50 +3606,50 @@ sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb, sctp_notify_partial_delivery_indication(stcb, error, val, so_locked); break; } - case SCTP_NOTIFY_STRDATA_ERR: - break; - case SCTP_NOTIFY_ASSOC_ABORTED: + case SCTP_NOTIFY_ASSOC_LOC_ABORTED: if ((stcb) && (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) || ((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED))) { - sctp_notify_assoc_change(SCTP_CANT_STR_ASSOC, stcb, error, NULL, so_locked); + sctp_notify_assoc_change(SCTP_CANT_STR_ASSOC, stcb, error, data, 0, so_locked); } else { - sctp_notify_assoc_change(SCTP_COMM_LOST, stcb, error, NULL, so_locked); + sctp_notify_assoc_change(SCTP_COMM_LOST, stcb, error, data, 0, so_locked); } break; - case SCTP_NOTIFY_PEER_OPENED_STREAM: - break; - case SCTP_NOTIFY_STREAM_OPENED_OK: + case SCTP_NOTIFY_ASSOC_REM_ABORTED: + if ((stcb) && (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) || + ((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED))) { + sctp_notify_assoc_change(SCTP_CANT_STR_ASSOC, stcb, error, data, 1, so_locked); + } else { + sctp_notify_assoc_change(SCTP_COMM_LOST, stcb, error, data, 1, so_locked); + } break; case SCTP_NOTIFY_ASSOC_RESTART: - sctp_notify_assoc_change(SCTP_RESTART, stcb, error, data, so_locked); + sctp_notify_assoc_change(SCTP_RESTART, stcb, error, NULL, 0, so_locked); if (stcb->asoc.peer_supports_auth == 0) { sctp_ulp_notify(SCTP_NOTIFY_NO_PEER_AUTH, stcb, 0, NULL, so_locked); } break; - case SCTP_NOTIFY_HB_RESP: - break; - case SCTP_NOTIFY_STR_RESET_INSTREAM_ADD_OK: - sctp_notify_stream_reset_add(stcb, error, SCTP_STRRESET_INBOUND_STR); - break; - case SCTP_NOTIFY_STR_RESET_ADD_OK: - sctp_notify_stream_reset_add(stcb, error, SCTP_STRRESET_OUTBOUND_STR); - break; - case SCTP_NOTIFY_STR_RESET_ADD_FAIL: - sctp_notify_stream_reset_add(stcb, error, (SCTP_STRRESET_FAILED | SCTP_STRRESET_OUTBOUND_STR)); - break; - case SCTP_NOTIFY_STR_RESET_SEND: - sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), SCTP_STRRESET_OUTBOUND_STR); + sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), SCTP_STREAM_RESET_OUTGOING_SSN); break; case SCTP_NOTIFY_STR_RESET_RECV: - sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), SCTP_STRRESET_INBOUND_STR); + sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), SCTP_STREAM_RESET_INCOMING); break; case SCTP_NOTIFY_STR_RESET_FAILED_OUT: - sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), (SCTP_STRRESET_OUTBOUND_STR | SCTP_STRRESET_FAILED)); + sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), + (SCTP_STREAM_RESET_OUTGOING_SSN | SCTP_STREAM_RESET_FAILED)); + break; + case SCTP_NOTIFY_STR_RESET_DENIED_OUT: + sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), + (SCTP_STREAM_RESET_OUTGOING_SSN | SCTP_STREAM_RESET_DENIED)); break; case SCTP_NOTIFY_STR_RESET_FAILED_IN: - sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), (SCTP_STRRESET_INBOUND_STR | SCTP_STRRESET_FAILED)); + sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), + (SCTP_STREAM_RESET_INCOMING | SCTP_STREAM_RESET_FAILED)); + break; + case SCTP_NOTIFY_STR_RESET_DENIED_IN: + sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), + (SCTP_STREAM_RESET_INCOMING | SCTP_STREAM_RESET_DENIED)); break; case SCTP_NOTIFY_ASCONF_ADD_IP: sctp_notify_peer_addr_change(stcb, SCTP_ADDR_ADDED, data, @@ -3654,15 +3663,11 @@ sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb, sctp_notify_peer_addr_change(stcb, SCTP_ADDR_MADE_PRIM, data, error); break; - case SCTP_NOTIFY_ASCONF_SUCCESS: - break; - case SCTP_NOTIFY_ASCONF_FAILED: - break; case SCTP_NOTIFY_PEER_SHUTDOWN: sctp_notify_shutdown_event(stcb); break; case SCTP_NOTIFY_AUTH_NEW_KEY: - sctp_notify_authentication(stcb, SCTP_AUTH_NEWKEY, error, + sctp_notify_authentication(stcb, SCTP_AUTH_NEW_KEY, error, (uint16_t) (uintptr_t) data, so_locked); break; @@ -3679,6 +3684,9 @@ sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb, case SCTP_NOTIFY_SENDER_DRY: sctp_notify_sender_dry_event(stcb, so_locked); break; + case SCTP_NOTIFY_REMOTE_ERROR: + sctp_notify_remote_error(stcb, error, data); + break; default: SCTPDBG(SCTP_DEBUG_UTIL1, "%s: unknown notification %xh (%u)\n", __FUNCTION__, notification, notification); @@ -3687,7 +3695,7 @@ sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb, } void -sctp_report_all_outbound(struct sctp_tcb *stcb, int holds_lock, int so_locked +sctp_report_all_outbound(struct sctp_tcb *stcb, uint16_t error, int holds_lock, int so_locked #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) SCTP_UNUSED #endif @@ -3695,22 +3703,21 @@ sctp_report_all_outbound(struct sctp_tcb *stcb, int holds_lock, int so_locked { struct sctp_association *asoc; struct sctp_stream_out *outs; - struct sctp_tmit_chunk *chk; - struct sctp_stream_queue_pending *sp; + struct sctp_tmit_chunk *chk, *nchk; + struct sctp_stream_queue_pending *sp, *nsp; int i; - asoc = &stcb->asoc; - if (stcb == NULL) { return; } - if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { + asoc = &stcb->asoc; + if (asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) { /* already being freed */ return; } if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) || (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) || - (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)) { + (asoc->state & SCTP_STATE_CLOSED_SOCKET)) { return; } /* now through all the gunk freeing chunks */ @@ -3718,61 +3725,70 @@ sctp_report_all_outbound(struct sctp_tcb *stcb, int holds_lock, int so_locked SCTP_TCB_SEND_LOCK(stcb); } /* sent queue SHOULD be empty */ - if (!TAILQ_EMPTY(&asoc->sent_queue)) { - chk = TAILQ_FIRST(&asoc->sent_queue); - while (chk) { - TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next); - asoc->sent_queue_cnt--; - if (chk->data != NULL) { - sctp_free_bufspace(stcb, asoc, chk, 1); - sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb, - SCTP_NOTIFY_DATAGRAM_SENT, chk, so_locked); - if (chk->data) { - sctp_m_freem(chk->data); - chk->data = NULL; - } + TAILQ_FOREACH_SAFE(chk, &asoc->sent_queue, sctp_next, nchk) { + TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next); + asoc->sent_queue_cnt--; + if (chk->sent != SCTP_DATAGRAM_NR_ACKED) { + if (asoc->strmout[chk->rec.data.stream_number].chunks_on_queues > 0) { + asoc->strmout[chk->rec.data.stream_number].chunks_on_queues--; +#ifdef INVARIANTS + } else { + panic("No chunks on the queues for sid %u.", chk->rec.data.stream_number); +#endif + } + } + if (chk->data != NULL) { + sctp_free_bufspace(stcb, asoc, chk, 1); + sctp_ulp_notify(SCTP_NOTIFY_SENT_DG_FAIL, stcb, + error, chk, so_locked); + if (chk->data) { + sctp_m_freem(chk->data); + chk->data = NULL; } - sctp_free_a_chunk(stcb, chk); - /* sa_ignore FREED_MEMORY */ - chk = TAILQ_FIRST(&asoc->sent_queue); } + sctp_free_a_chunk(stcb, chk, so_locked); + /* sa_ignore FREED_MEMORY */ } /* pending send queue SHOULD be empty */ - if (!TAILQ_EMPTY(&asoc->send_queue)) { - chk = TAILQ_FIRST(&asoc->send_queue); - while (chk) { - TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next); - asoc->send_queue_cnt--; - if (chk->data != NULL) { - sctp_free_bufspace(stcb, asoc, chk, 1); - sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb, - SCTP_NOTIFY_DATAGRAM_UNSENT, chk, so_locked); - if (chk->data) { - sctp_m_freem(chk->data); - chk->data = NULL; - } + TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) { + TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next); + asoc->send_queue_cnt--; + if (asoc->strmout[chk->rec.data.stream_number].chunks_on_queues > 0) { + asoc->strmout[chk->rec.data.stream_number].chunks_on_queues--; +#ifdef INVARIANTS + } else { + panic("No chunks on the queues for sid %u.", chk->rec.data.stream_number); +#endif + } + if (chk->data != NULL) { + sctp_free_bufspace(stcb, asoc, chk, 1); + sctp_ulp_notify(SCTP_NOTIFY_UNSENT_DG_FAIL, stcb, + error, chk, so_locked); + if (chk->data) { + sctp_m_freem(chk->data); + chk->data = NULL; } - sctp_free_a_chunk(stcb, chk); - /* sa_ignore FREED_MEMORY */ - chk = TAILQ_FIRST(&asoc->send_queue); } + sctp_free_a_chunk(stcb, chk, so_locked); + /* sa_ignore FREED_MEMORY */ } - for (i = 0; i < stcb->asoc.streamoutcnt; i++) { + for (i = 0; i < asoc->streamoutcnt; i++) { /* For each stream */ - outs = &stcb->asoc.strmout[i]; + outs = &asoc->strmout[i]; /* clean up any sends there */ - stcb->asoc.locked_on_sending = NULL; - sp = TAILQ_FIRST(&outs->outqueue); - while (sp) { - stcb->asoc.stream_queue_cnt--; + asoc->locked_on_sending = NULL; + TAILQ_FOREACH_SAFE(sp, &outs->outqueue, next, nsp) { + asoc->stream_queue_cnt--; TAILQ_REMOVE(&outs->outqueue, sp, next); sctp_free_spbufspace(stcb, asoc, sp); if (sp->data) { sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL, stcb, - SCTP_NOTIFY_DATAGRAM_UNSENT, (void *)sp, so_locked); + error, (void *)sp, so_locked); if (sp->data) { sctp_m_freem(sp->data); sp->data = NULL; + sp->tail_mbuf = NULL; + sp->length = 0; } } if (sp->net) { @@ -3780,9 +3796,8 @@ sctp_report_all_outbound(struct sctp_tcb *stcb, int holds_lock, int so_locked sp->net = NULL; } /* Free the chunk */ - sctp_free_a_strmoq(stcb, sp); + sctp_free_a_strmoq(stcb, sp, so_locked); /* sa_ignore FREED_MEMORY */ - sp = TAILQ_FIRST(&outs->outqueue); } } @@ -3792,39 +3807,46 @@ sctp_report_all_outbound(struct sctp_tcb *stcb, int holds_lock, int so_locked } void -sctp_abort_notification(struct sctp_tcb *stcb, int error, int so_locked +sctp_abort_notification(struct sctp_tcb *stcb, uint8_t from_peer, uint16_t error, + struct sctp_abort_chunk *abort, int so_locked #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) SCTP_UNUSED #endif ) { - if (stcb == NULL) { return; } + if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || + ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) && + (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_CONNECTED))) { + stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_WAS_ABORTED; + } if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) || (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) || (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)) { return; } /* Tell them we lost the asoc */ - sctp_report_all_outbound(stcb, 1, so_locked); - if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || - ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) && - (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_CONNECTED))) { - stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_WAS_ABORTED; + sctp_report_all_outbound(stcb, error, 1, so_locked); + if (from_peer) { + sctp_ulp_notify(SCTP_NOTIFY_ASSOC_REM_ABORTED, stcb, error, abort, so_locked); + } else { + sctp_ulp_notify(SCTP_NOTIFY_ASSOC_LOC_ABORTED, stcb, error, abort, so_locked); } - sctp_ulp_notify(SCTP_NOTIFY_ASSOC_ABORTED, stcb, error, NULL, so_locked); } void sctp_abort_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb, - struct mbuf *m, int iphlen, struct sctphdr *sh, struct mbuf *op_err, + struct mbuf *m, int iphlen, + struct sockaddr *src, struct sockaddr *dst, + struct sctphdr *sh, struct mbuf *op_err, + uint8_t use_mflowid, uint32_t mflowid, uint32_t vrf_id, uint16_t port) { uint32_t vtag; -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif @@ -3833,15 +3855,17 @@ sctp_abort_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb, if (stcb != NULL) { /* We have a TCB to abort, send notification too */ vtag = stcb->asoc.peer_vtag; - sctp_abort_notification(stcb, 0, SCTP_SO_NOT_LOCKED); + sctp_abort_notification(stcb, 0, 0, NULL, SCTP_SO_NOT_LOCKED); /* get the assoc vrf id and table id */ vrf_id = stcb->asoc.vrf_id; stcb->asoc.state |= SCTP_STATE_WAS_ABORTED; } - sctp_send_abort(m, iphlen, sh, vtag, op_err, vrf_id, port); + sctp_send_abort(m, iphlen, src, dst, sh, vtag, op_err, + use_mflowid, mflowid, + vrf_id, port); if (stcb != NULL) { /* Ok, now lets free it */ -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(inp); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); @@ -3855,7 +3879,7 @@ sctp_abort_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb, SCTP_STAT_DECR_GAUGE32(sctps_currestab); } (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_4); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } @@ -3927,21 +3951,19 @@ none_in: void sctp_abort_an_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb, - int error, struct mbuf *op_err, + struct mbuf *op_err, int so_locked #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) SCTP_UNUSED #endif ) { - uint32_t vtag; - -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(inp); #endif if (stcb == NULL) { @@ -3956,14 +3978,11 @@ sctp_abort_an_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb, } else { stcb->asoc.state |= SCTP_STATE_WAS_ABORTED; } - vtag = stcb->asoc.peer_vtag; /* notify the ulp */ - if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) - sctp_abort_notification(stcb, error, so_locked); + if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) { + sctp_abort_notification(stcb, 0, 0, NULL, so_locked); + } /* notify the peer */ -#if defined(SCTP_PANIC_ON_ABORT) - panic("aborting an association"); -#endif sctp_send_abort_tcb(stcb, op_err, so_locked); SCTP_STAT_INCR_COUNTER32(sctps_aborted); if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) || @@ -3974,7 +3993,7 @@ sctp_abort_an_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb, #ifdef SCTP_ASOCLOG_OF_TSNS sctp_print_out_track_log(stcb); #endif -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) if (!so_locked) { atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); @@ -3984,7 +4003,7 @@ sctp_abort_an_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb, } #endif (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_5); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) if (!so_locked) { SCTP_SOCKET_UNLOCK(so, 1); } @@ -3992,11 +4011,15 @@ sctp_abort_an_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb, } void -sctp_handle_ootb(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh, - struct sctp_inpcb *inp, struct mbuf *op_err, uint32_t vrf_id, uint16_t port) +sctp_handle_ootb(struct mbuf *m, int iphlen, int offset, + struct sockaddr *src, struct sockaddr *dst, + struct sctphdr *sh, struct sctp_inpcb *inp, + uint8_t use_mflowid, uint32_t mflowid, + uint32_t vrf_id, uint16_t port) { struct sctp_chunkhdr *ch, chunk_buf; unsigned int chk_length; + int contains_init_chunk; SCTP_STAT_INCR_COUNTER32(sctps_outoftheblue); /* Generate a TO address for future reference */ @@ -4006,6 +4029,7 @@ sctp_handle_ootb(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh, SCTP_CALLED_DIRECTLY_NOCMPSET); } } + contains_init_chunk = 0; ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset, sizeof(*ch), (uint8_t *) & chunk_buf); while (ch != NULL) { @@ -4015,6 +4039,9 @@ sctp_handle_ootb(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh, break; } switch (ch->chunk_type) { + case SCTP_INIT: + contains_init_chunk = 1; + break; case SCTP_COOKIE_ECHO: /* We hit here only if the assoc is being freed */ return; @@ -4031,7 +4058,9 @@ sctp_handle_ootb(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh, */ return; case SCTP_SHUTDOWN_ACK: - sctp_send_shutdown_complete2(m, iphlen, sh, vrf_id, port); + sctp_send_shutdown_complete2(src, dst, sh, + use_mflowid, mflowid, + vrf_id, port); return; default: break; @@ -4040,7 +4069,13 @@ sctp_handle_ootb(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh, ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset, sizeof(*ch), (uint8_t *) & chunk_buf); } - sctp_send_abort(m, iphlen, sh, 0, op_err, vrf_id, port); + if ((SCTP_BASE_SYSCTL(sctp_blackhole) == 0) || + ((SCTP_BASE_SYSCTL(sctp_blackhole) == 1) && + (contains_init_chunk == 0))) { + sctp_send_abort(m, iphlen, src, dst, sh, 0, NULL, + use_mflowid, mflowid, + vrf_id, port); + } } /* @@ -4170,6 +4205,7 @@ sctp_cmpaddr(struct sockaddr *sa1, struct sockaddr *sa2) sin6_2)); } #endif +#ifdef INET case AF_INET: { /* IPv4 addresses */ @@ -4179,6 +4215,7 @@ sctp_cmpaddr(struct sockaddr *sa1, struct sockaddr *sa2) sin_2 = (struct sockaddr_in *)sa2; return (sin_1->sin_addr.s_addr == sin_2->sin_addr.s_addr); } +#endif default: /* we don't do these... */ return (0); @@ -4191,7 +4228,6 @@ sctp_print_address(struct sockaddr *sa) #ifdef INET6 char ip6buf[INET6_ADDRSTRLEN]; - ip6buf[0] = 0; #endif switch (sa->sa_family) { @@ -4208,6 +4244,7 @@ sctp_print_address(struct sockaddr *sa) break; } #endif +#ifdef INET case AF_INET: { struct sockaddr_in *sin; @@ -4219,62 +4256,9 @@ sctp_print_address(struct sockaddr *sa) p[0], p[1], p[2], p[3], ntohs(sin->sin_port)); break; } - default: - SCTP_PRINTF("?\n"); - break; - } -} - -void -sctp_print_address_pkt(struct ip *iph, struct sctphdr *sh) -{ - switch (iph->ip_v) { - case IPVERSION: - { - struct sockaddr_in lsa, fsa; - - bzero(&lsa, sizeof(lsa)); - lsa.sin_len = sizeof(lsa); - lsa.sin_family = AF_INET; - lsa.sin_addr = iph->ip_src; - lsa.sin_port = sh->src_port; - bzero(&fsa, sizeof(fsa)); - fsa.sin_len = sizeof(fsa); - fsa.sin_family = AF_INET; - fsa.sin_addr = iph->ip_dst; - fsa.sin_port = sh->dest_port; - SCTP_PRINTF("src: "); - sctp_print_address((struct sockaddr *)&lsa); - SCTP_PRINTF("dest: "); - sctp_print_address((struct sockaddr *)&fsa); - break; - } -#ifdef INET6 - case IPV6_VERSION >> 4: - { - struct ip6_hdr *ip6; - struct sockaddr_in6 lsa6, fsa6; - - ip6 = (struct ip6_hdr *)iph; - bzero(&lsa6, sizeof(lsa6)); - lsa6.sin6_len = sizeof(lsa6); - lsa6.sin6_family = AF_INET6; - lsa6.sin6_addr = ip6->ip6_src; - lsa6.sin6_port = sh->src_port; - bzero(&fsa6, sizeof(fsa6)); - fsa6.sin6_len = sizeof(fsa6); - fsa6.sin6_family = AF_INET6; - fsa6.sin6_addr = ip6->ip6_dst; - fsa6.sin6_port = sh->dest_port; - SCTP_PRINTF("src: "); - sctp_print_address((struct sockaddr *)&lsa6); - SCTP_PRINTF("dest: "); - sctp_print_address((struct sockaddr *)&fsa6); - break; - } #endif default: - /* TSNH */ + SCTP_PRINTF("?\n"); break; } } @@ -4313,10 +4297,8 @@ sctp_pull_off_control_to_new_inp(struct sctp_inpcb *old_inp, } /* lock the socket buffers */ SCTP_INP_READ_LOCK(old_inp); - control = TAILQ_FIRST(&old_inp->read_queue); - /* Pull off all for out target stcb */ - while (control) { - nctl = TAILQ_NEXT(control, next); + TAILQ_FOREACH_SAFE(control, &old_inp->read_queue, next, nctl) { + /* Pull off all for out target stcb */ if (control->stcb == stcb) { /* remove it we want it */ TAILQ_REMOVE(&old_inp->read_queue, control, next); @@ -4333,17 +4315,14 @@ sctp_pull_off_control_to_new_inp(struct sctp_inpcb *old_inp, m = SCTP_BUF_NEXT(m); } } - control = nctl; } SCTP_INP_READ_UNLOCK(old_inp); /* Remove the sb-lock on the old socket */ sbunlock(&old_so->so_rcv); /* Now we move them over to the new socket buffer */ - control = TAILQ_FIRST(&tmp_queue); SCTP_INP_READ_LOCK(new_inp); - while (control) { - nctl = TAILQ_NEXT(control, next); + TAILQ_FOREACH_SAFE(control, &tmp_queue, next, nctl) { TAILQ_INSERT_TAIL(&new_inp->read_queue, control, next); m = control->data; while (m) { @@ -4356,7 +4335,6 @@ sctp_pull_off_control_to_new_inp(struct sctp_inpcb *old_inp, } m = SCTP_BUF_NEXT(m); } - control = nctl; } SCTP_INP_READ_UNLOCK(new_inp); } @@ -4457,16 +4435,20 @@ sctp_add_to_readq(struct sctp_inpcb *inp, if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) { SCTP_ZERO_COPY_EVENT(inp, inp->sctp_socket); } else { -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; so = SCTP_INP_SO(inp); if (!so_locked) { - atomic_add_int(&stcb->asoc.refcnt, 1); - SCTP_TCB_UNLOCK(stcb); + if (stcb) { + atomic_add_int(&stcb->asoc.refcnt, 1); + SCTP_TCB_UNLOCK(stcb); + } SCTP_SOCKET_LOCK(so, 1); - SCTP_TCB_LOCK(stcb); - atomic_subtract_int(&stcb->asoc.refcnt, 1); + if (stcb) { + SCTP_TCB_LOCK(stcb); + atomic_subtract_int(&stcb->asoc.refcnt, 1); + } if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { SCTP_SOCKET_UNLOCK(so, 1); return; @@ -4474,7 +4456,7 @@ sctp_add_to_readq(struct sctp_inpcb *inp, } #endif sctp_sorwakeup(inp, inp->sctp_socket); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) if (!so_locked) { SCTP_SOCKET_UNLOCK(so, 1); } @@ -4517,7 +4499,7 @@ get_out: } if (inp && (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_CANT_READ)) { SCTP_INP_READ_UNLOCK(inp); - return 0; + return (0); } if (control->end_added) { /* huh this one is complete? */ @@ -4607,22 +4589,26 @@ get_out: if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) { SCTP_ZERO_COPY_EVENT(inp, inp->sctp_socket); } else { -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; so = SCTP_INP_SO(inp); - atomic_add_int(&stcb->asoc.refcnt, 1); - SCTP_TCB_UNLOCK(stcb); + if (stcb) { + atomic_add_int(&stcb->asoc.refcnt, 1); + SCTP_TCB_UNLOCK(stcb); + } SCTP_SOCKET_LOCK(so, 1); - SCTP_TCB_LOCK(stcb); - atomic_subtract_int(&stcb->asoc.refcnt, 1); + if (stcb) { + SCTP_TCB_LOCK(stcb); + atomic_subtract_int(&stcb->asoc.refcnt, 1); + } if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { SCTP_SOCKET_UNLOCK(so, 1); return (0); } #endif sctp_sorwakeup(inp, inp->sctp_socket); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } @@ -4695,14 +4681,14 @@ sctp_free_bufspace(struct sctp_tcb *stcb, struct sctp_association *asoc, int sctp_release_pr_sctp_chunk(struct sctp_tcb *stcb, struct sctp_tmit_chunk *tp1, - int reason, int so_locked + uint8_t sent, int so_locked #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) SCTP_UNUSED #endif ) { struct sctp_stream_out *strq; - struct sctp_tmit_chunk *chk = NULL; + struct sctp_tmit_chunk *chk = NULL, *tp2; struct sctp_stream_queue_pending *sp; uint16_t stream = 0, seq = 0; uint8_t foundeom = 0; @@ -4722,7 +4708,11 @@ sctp_release_pr_sctp_chunk(struct sctp_tcb *stcb, struct sctp_tmit_chunk *tp1, sctp_free_bufspace(stcb, &stcb->asoc, tp1, 1); stcb->asoc.peers_rwnd += tp1->send_size; stcb->asoc.peers_rwnd += SCTP_BASE_SYSCTL(sctp_peer_chunk_oh); - sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb, reason, tp1, so_locked); + if (sent) { + sctp_ulp_notify(SCTP_NOTIFY_SENT_DG_FAIL, stcb, 0, tp1, so_locked); + } else { + sctp_ulp_notify(SCTP_NOTIFY_UNSENT_DG_FAIL, stcb, 0, tp1, so_locked); + } if (tp1->data) { sctp_m_freem(tp1->data); tp1->data = NULL; @@ -4756,15 +4746,11 @@ sctp_release_pr_sctp_chunk(struct sctp_tcb *stcb, struct sctp_tmit_chunk *tp1, * The multi-part message was scattered across the send and * sent queue. */ -next_on_sent: - tp1 = TAILQ_FIRST(&stcb->asoc.send_queue); - /* - * recurse throught the send_queue too, starting at the - * beginning. - */ - if ((tp1) && - (tp1->rec.data.stream_number == stream) && - (tp1->rec.data.stream_seq == seq)) { + TAILQ_FOREACH_SAFE(tp1, &stcb->asoc.send_queue, sctp_next, tp2) { + if ((tp1->rec.data.stream_number != stream) || + (tp1->rec.data.stream_seq != seq)) { + break; + } /* * save to chk in case we have some on stream out * queue. If so and we have an un-transmitted one we @@ -4773,7 +4759,11 @@ next_on_sent: chk = tp1; ret_sz += tp1->book_size; sctp_free_bufspace(stcb, &stcb->asoc, tp1, 1); - sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb, reason, tp1, so_locked); + if (sent) { + sctp_ulp_notify(SCTP_NOTIFY_SENT_DG_FAIL, stcb, 0, tp1, so_locked); + } else { + sctp_ulp_notify(SCTP_NOTIFY_UNSENT_DG_FAIL, stcb, 0, tp1, so_locked); + } if (tp1->data) { sctp_m_freem(tp1->data); tp1->data = NULL; @@ -4794,7 +4784,6 @@ next_on_sent: sctp_next); stcb->asoc.send_queue_cnt--; stcb->asoc.sent_queue_cnt++; - goto next_on_sent; } } if (foundeom == 0) { @@ -4802,81 +4791,73 @@ next_on_sent: * Still no eom found. That means there is stuff left on the * stream out queue.. yuck. */ - strq = &stcb->asoc.strmout[stream]; SCTP_TCB_SEND_LOCK(stcb); + strq = &stcb->asoc.strmout[stream]; sp = TAILQ_FIRST(&strq->outqueue); - while (sp->strseq <= seq) { - /* Check if its our SEQ */ - if (sp->strseq == seq) { - sp->discard_rest = 1; - /* - * We may need to put a chunk on the queue - * that holds the TSN that would have been - * sent with the LAST bit. - */ + if (sp != NULL) { + sp->discard_rest = 1; + /* + * We may need to put a chunk on the queue that + * holds the TSN that would have been sent with the + * LAST bit. + */ + if (chk == NULL) { + /* Yep, we have to */ + sctp_alloc_a_chunk(stcb, chk); if (chk == NULL) { - /* Yep, we have to */ - sctp_alloc_a_chunk(stcb, chk); - if (chk == NULL) { - /* - * we are hosed. All we can - * do is nothing.. which - * will cause an abort if - * the peer is paying - * attention. - */ - goto oh_well; - } - memset(chk, 0, sizeof(*chk)); - chk->rec.data.rcv_flags = SCTP_DATA_LAST_FRAG; - chk->sent = SCTP_FORWARD_TSN_SKIP; - chk->asoc = &stcb->asoc; - chk->rec.data.stream_seq = sp->strseq; - chk->rec.data.stream_number = sp->stream; - chk->rec.data.payloadtype = sp->ppid; - chk->rec.data.context = sp->context; - chk->flags = sp->act_flags; - if (sp->net) - chk->whoTo = sp->net; - else - chk->whoTo = stcb->asoc.primary_destination; - atomic_add_int(&chk->whoTo->ref_count, 1); - chk->rec.data.TSN_seq = atomic_fetchadd_int(&stcb->asoc.sending_seq, 1); - stcb->asoc.pr_sctp_cnt++; - chk->pr_sctp_on = 1; - TAILQ_INSERT_TAIL(&stcb->asoc.sent_queue, chk, sctp_next); - stcb->asoc.sent_queue_cnt++; - stcb->asoc.pr_sctp_cnt++; - } else { - chk->rec.data.rcv_flags |= SCTP_DATA_LAST_FRAG; - } - oh_well: - if (sp->data) { /* - * Pull any data to free up the SB - * and allow sender to "add more" - * whilc we will throw away :-) + * we are hosed. All we can do is + * nothing.. which will cause an + * abort if the peer is paying + * attention. */ - sctp_free_spbufspace(stcb, &stcb->asoc, - sp); - ret_sz += sp->length; - do_wakeup_routine = 1; - sp->some_taken = 1; - sctp_m_freem(sp->data); - sp->length = 0; - sp->data = NULL; - sp->tail_mbuf = NULL; + goto oh_well; } - break; + memset(chk, 0, sizeof(*chk)); + chk->rec.data.rcv_flags = SCTP_DATA_LAST_FRAG; + chk->sent = SCTP_FORWARD_TSN_SKIP; + chk->asoc = &stcb->asoc; + chk->rec.data.stream_seq = strq->next_sequence_send; + chk->rec.data.stream_number = sp->stream; + chk->rec.data.payloadtype = sp->ppid; + chk->rec.data.context = sp->context; + chk->flags = sp->act_flags; + if (sp->net) + chk->whoTo = sp->net; + else + chk->whoTo = stcb->asoc.primary_destination; + atomic_add_int(&chk->whoTo->ref_count, 1); + chk->rec.data.TSN_seq = atomic_fetchadd_int(&stcb->asoc.sending_seq, 1); + stcb->asoc.pr_sctp_cnt++; + chk->pr_sctp_on = 1; + TAILQ_INSERT_TAIL(&stcb->asoc.sent_queue, chk, sctp_next); + stcb->asoc.sent_queue_cnt++; + stcb->asoc.pr_sctp_cnt++; } else { - /* Next one please */ - sp = TAILQ_NEXT(sp, next); + chk->rec.data.rcv_flags |= SCTP_DATA_LAST_FRAG; } - } /* End while */ + strq->next_sequence_send++; + oh_well: + if (sp->data) { + /* + * Pull any data to free up the SB and allow + * sender to "add more" while we will throw + * away :-) + */ + sctp_free_spbufspace(stcb, &stcb->asoc, sp); + ret_sz += sp->length; + do_wakeup_routine = 1; + sp->some_taken = 1; + sctp_m_freem(sp->data); + sp->data = NULL; + sp->tail_mbuf = NULL; + sp->length = 0; + } + } SCTP_TCB_SEND_UNLOCK(stcb); } if (do_wakeup_routine) { -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; so = SCTP_INP_SO(stcb->sctp_ep); @@ -4894,7 +4875,7 @@ next_on_sent: } #endif sctp_sowwakeup(stcb->sctp_ep, stcb->sctp_socket); -#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING) +#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) if (!so_locked) { SCTP_SOCKET_UNLOCK(so, 1); } @@ -4923,6 +4904,7 @@ sctp_find_ifa_in_ep(struct sctp_inpcb *inp, struct sockaddr *addr, continue; if (addr->sa_family != laddr->ifa->address.sa.sa_family) continue; +#ifdef INET if (addr->sa_family == AF_INET) { if (((struct sockaddr_in *)addr)->sin_addr.s_addr == laddr->ifa->address.sin.sin_addr.s_addr) { @@ -4934,6 +4916,7 @@ sctp_find_ifa_in_ep(struct sctp_inpcb *inp, struct sockaddr *addr, break; } } +#endif #ifdef INET6 if (addr->sa_family == AF_INET6) { if (SCTP6_ARE_ADDR_EQUAL((struct sockaddr_in6 *)addr, @@ -4957,22 +4940,33 @@ sctp_find_ifa_in_ep(struct sctp_inpcb *inp, struct sockaddr *addr, uint32_t sctp_get_ifa_hash_val(struct sockaddr *addr) { - if (addr->sa_family == AF_INET) { - struct sockaddr_in *sin; + switch (addr->sa_family) { +#ifdef INET + case AF_INET: + { + struct sockaddr_in *sin; - sin = (struct sockaddr_in *)addr; - return (sin->sin_addr.s_addr ^ (sin->sin_addr.s_addr >> 16)); - } else if (addr->sa_family == AF_INET6) { - struct sockaddr_in6 *sin6; - uint32_t hash_of_addr; + sin = (struct sockaddr_in *)addr; + return (sin->sin_addr.s_addr ^ (sin->sin_addr.s_addr >> 16)); + } +#endif +#ifdef INET6 + case AF_INET6: + { + struct sockaddr_in6 *sin6; + uint32_t hash_of_addr; - sin6 = (struct sockaddr_in6 *)addr; - hash_of_addr = (sin6->sin6_addr.s6_addr32[0] + - sin6->sin6_addr.s6_addr32[1] + - sin6->sin6_addr.s6_addr32[2] + - sin6->sin6_addr.s6_addr32[3]); - hash_of_addr = (hash_of_addr ^ (hash_of_addr >> 16)); - return (hash_of_addr); + sin6 = (struct sockaddr_in6 *)addr; + hash_of_addr = (sin6->sin6_addr.s6_addr32[0] + + sin6->sin6_addr.s6_addr32[1] + + sin6->sin6_addr.s6_addr32[2] + + sin6->sin6_addr.s6_addr32[3]); + hash_of_addr = (hash_of_addr ^ (hash_of_addr >> 16)); + return (hash_of_addr); + } +#endif + default: + break; } return (0); } @@ -5021,6 +5015,7 @@ stage_right: } if (addr->sa_family != sctp_ifap->address.sa.sa_family) continue; +#ifdef INET if (addr->sa_family == AF_INET) { if (((struct sockaddr_in *)addr)->sin_addr.s_addr == sctp_ifap->address.sin.sin_addr.s_addr) { @@ -5031,6 +5026,7 @@ stage_right: break; } } +#endif #ifdef INET6 if (addr->sa_family == AF_INET6) { if (SCTP6_ARE_ADDR_EQUAL((struct sockaddr_in6 *)addr, @@ -5110,7 +5106,7 @@ sctp_user_rcvd(struct sctp_tcb *stcb, uint32_t * freed_so_far, int hold_rlock, goto out; } SCTP_STAT_INCR(sctps_wu_sacks_sent); - sctp_send_sack(stcb); + sctp_send_sack(stcb, SCTP_SO_LOCKED); sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_USR_RCVD, SCTP_SO_LOCKED); @@ -5218,10 +5214,10 @@ sctp_sorecvmsg(struct socket *so, rwnd_req, block_allowed, so->so_rcv.sb_cc, uio->uio_resid); } error = sblock(&so->so_rcv, (block_allowed ? SBL_WAIT : 0)); - sockbuf_lock = 1; if (error) { goto release_unlocked; } + sockbuf_lock = 1; restart: @@ -5234,7 +5230,7 @@ restart_nosblocks: (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE)) { goto out; } - if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { + if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && (so->so_rcv.sb_cc == 0)) { if (so->so_error) { error = so->so_error; if ((in_flags & MSG_PEEK) == 0) @@ -5242,7 +5238,6 @@ restart_nosblocks: goto out; } else { if (so->so_rcv.sb_cc == 0) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOTCONN); /* indicate EOF */ error = 0; goto out; @@ -5267,17 +5262,6 @@ restart_nosblocks: */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ECONNRESET); error = ECONNRESET; - /* - * You get this once if you are - * active open side - */ - if (!(inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { - /* - * Remove flag if on the - * active open side - */ - inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAS_ABORTED; - } } so->so_state &= ~(SS_ISCONNECTING | SS_ISDISCONNECTING | @@ -5287,8 +5271,6 @@ restart_nosblocks: if ((inp->sctp_flags & SCTP_PCB_FLAGS_WAS_CONNECTED) == 0) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOTCONN); error = ENOTCONN; - } else { - inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAS_CONNECTED; } } goto out; @@ -5321,18 +5303,6 @@ restart_nosblocks: */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ECONNRESET); error = ECONNRESET; - /* - * You get this once if you - * are active open side - */ - if (!(inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { - /* - * Remove flag if on - * the active open - * side - */ - inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAS_ABORTED; - } } so->so_state &= ~(SS_ISCONNECTING | SS_ISDISCONNECTING | @@ -5342,8 +5312,6 @@ restart_nosblocks: if ((inp->sctp_flags & SCTP_PCB_FLAGS_WAS_CONNECTED) == 0) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOTCONN); error = ENOTCONN; - } else { - inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAS_CONNECTED; } } goto out; @@ -5369,7 +5337,6 @@ restart_nosblocks: */ if (hold_rlock == 0) { SCTP_INP_READ_LOCK(inp); - hold_rlock = 1; } control = TAILQ_FIRST(&inp->read_queue); if ((control == NULL) && (so->so_rcv.sb_cc != 0)) { @@ -5539,7 +5506,7 @@ found_one: #ifdef INVARIANTS panic("refcnt already incremented"); #else - printf("refcnt already incremented?\n"); + SCTP_PRINTF("refcnt already incremented?\n"); #endif } else { atomic_add_int(&stcb->asoc.refcnt, 1); @@ -5567,7 +5534,8 @@ found_one: if ((sinfo) && filling_sinfo) { memcpy(sinfo, control, sizeof(struct sctp_nonpad_sndrcvinfo)); nxt = TAILQ_NEXT(control, next); - if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO)) { + if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO) || + sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO)) { struct sctp_extrcvinfo *s_extra; s_extra = (struct sctp_extrcvinfo *)sinfo; @@ -5639,28 +5607,31 @@ found_one: } #endif if (fromlen && from) { - struct sockaddr *to; - + cp_len = min((size_t)fromlen, (size_t)control->whoFrom->ro._l_addr.sa.sa_len); + switch (control->whoFrom->ro._l_addr.sa.sa_family) { +#ifdef INET6 + case AF_INET6: + ((struct sockaddr_in6 *)from)->sin6_port = control->port_from; + break; +#endif #ifdef INET - cp_len = min((size_t)fromlen, (size_t)control->whoFrom->ro._l_addr.sin.sin_len); - memcpy(from, &control->whoFrom->ro._l_addr, cp_len); - ((struct sockaddr_in *)from)->sin_port = control->port_from; -#else - /* No AF_INET use AF_INET6 */ - cp_len = min((size_t)fromlen, (size_t)control->whoFrom->ro._l_addr.sin6.sin6_len); - memcpy(from, &control->whoFrom->ro._l_addr, cp_len); - ((struct sockaddr_in6 *)from)->sin6_port = control->port_from; + case AF_INET: + ((struct sockaddr_in *)from)->sin_port = control->port_from; + break; #endif + default: + break; + } + memcpy(from, &control->whoFrom->ro._l_addr, cp_len); - to = from; #if defined(INET) && defined(INET6) if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) && - (to->sa_family == AF_INET) && + (from->sa_family == AF_INET) && ((size_t)fromlen >= sizeof(struct sockaddr_in6))) { struct sockaddr_in *sin; struct sockaddr_in6 sin6; - sin = (struct sockaddr_in *)to; + sin = (struct sockaddr_in *)from; bzero(&sin6, sizeof(sin6)); sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); @@ -5669,15 +5640,15 @@ found_one: &sin6.sin6_addr.s6_addr32[3], sizeof(sin6.sin6_addr.s6_addr32[3])); sin6.sin6_port = sin->sin_port; - memcpy(from, (caddr_t)&sin6, sizeof(sin6)); + memcpy(from, &sin6, sizeof(struct sockaddr_in6)); } #endif -#if defined(INET6) +#ifdef INET6 { - struct sockaddr_in6 lsa6, *to6; + struct sockaddr_in6 lsa6, *from6; - to6 = (struct sockaddr_in6 *)to; - sctp_recover_scope_mac(to6, (&lsa6)); + from6 = (struct sockaddr_in6 *)from; + sctp_recover_scope_mac(from6, (&lsa6)); } #endif } @@ -6014,7 +5985,9 @@ wait_some_more: } if (control->end_added) { out_flags |= MSG_EOR; - if ((control->do_not_ref_stcb == 0) && ((control->spec_flags & M_NOTIFICATION) == 0)) + if ((control->do_not_ref_stcb == 0) && + (control->stcb != NULL) && + ((control->spec_flags & M_NOTIFICATION) == 0)) control->stcb->asoc.strmin[control->sinfo_stream].delivery_started = 0; } if (control->spec_flags & M_NOTIFICATION) { @@ -6074,7 +6047,8 @@ out: if (((out_flags & MSG_EOR) == 0) && ((in_flags & MSG_PEEK) == 0) && (sinfo) && - (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO))) { + (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO) || + sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO))) { struct sctp_extrcvinfo *s_extra; s_extra = (struct sctp_extrcvinfo *)sinfo; @@ -6082,11 +6056,9 @@ out: } if (hold_rlock == 1) { SCTP_INP_READ_UNLOCK(inp); - hold_rlock = 0; } if (hold_sblock) { SOCKBUF_UNLOCK(&so->so_rcv); - hold_sblock = 0; } if (sockbuf_lock) { sbunlock(&so->so_rcv); @@ -6107,7 +6079,6 @@ out: #endif } atomic_add_int(&stcb->asoc.refcnt, -1); - freecnt_applied = 0; /* Save the value back for next time */ stcb->freed_by_sorcv_sincelast = freed_so_far; } @@ -6224,8 +6195,9 @@ sctp_soreceive(struct socket *so, SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL); return (EINVAL); } - if ((sctp_is_feature_off(inp, - SCTP_PCB_FLAGS_RECVDATAIOEVNT)) || + if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT) && + sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVRCVINFO) && + sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVNXTINFO)) || (controlp == NULL)) { /* user does not want the sndrcv ctl */ filling_sinfo = 0; @@ -6261,71 +6233,6 @@ sctp_soreceive(struct socket *so, } -int -sctp_l_soreceive(struct socket *so, - struct sockaddr **name, - struct uio *uio, - char **controlp, - int *controllen, - int *flag) -{ - int error, fromlen; - uint8_t sockbuf[256]; - struct sockaddr *from; - struct sctp_extrcvinfo sinfo; - int filling_sinfo = 1; - struct sctp_inpcb *inp; - - inp = (struct sctp_inpcb *)so->so_pcb; - /* pickup the assoc we are reading from */ - if (inp == NULL) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL); - return (EINVAL); - } - if ((sctp_is_feature_off(inp, - SCTP_PCB_FLAGS_RECVDATAIOEVNT)) || - (controlp == NULL)) { - /* user does not want the sndrcv ctl */ - filling_sinfo = 0; - } - if (name) { - from = (struct sockaddr *)sockbuf; - fromlen = sizeof(sockbuf); - from->sa_len = 0; - } else { - from = NULL; - fromlen = 0; - } - - error = sctp_sorecvmsg(so, uio, - (struct mbuf **)NULL, - from, fromlen, flag, - (struct sctp_sndrcvinfo *)&sinfo, - filling_sinfo); - if ((controlp) && (filling_sinfo)) { - /* - * copy back the sinfo in a CMSG format note that the caller - * has reponsibility for freeing the memory. - */ - if (filling_sinfo) - *controlp = sctp_build_ctl_cchunk(inp, - controllen, - (struct sctp_sndrcvinfo *)&sinfo); - } - if (name) { - /* copy back the address info */ - if (from && from->sa_len) { - *name = sodupsockaddr(from, M_WAIT); - } else { - *name = NULL; - } - } - return (error); -} - - - - @@ -6339,13 +6246,33 @@ sctp_connectx_helper_add(struct sctp_tcb *stcb, struct sockaddr *addr, struct sockaddr *sa; size_t incr = 0; +#ifdef INET + struct sockaddr_in *sin; + +#endif +#ifdef INET6 + struct sockaddr_in6 *sin6; + +#endif + sa = addr; inp = stcb->sctp_ep; *error = 0; for (i = 0; i < totaddr; i++) { - if (sa->sa_family == AF_INET) { + switch (sa->sa_family) { +#ifdef INET + case AF_INET: incr = sizeof(struct sockaddr_in); - if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) { + sin = (struct sockaddr_in *)sa; + if ((sin->sin_addr.s_addr == INADDR_ANY) || + (sin->sin_addr.s_addr == INADDR_BROADCAST) || + IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { + SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, EINVAL); + (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_7); + *error = EINVAL; + goto out_now; + } + if (sctp_add_remote_addr(stcb, sa, NULL, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) { /* assoc gone no un-lock */ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS); (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_7); @@ -6353,9 +6280,20 @@ sctp_connectx_helper_add(struct sctp_tcb *stcb, struct sockaddr *addr, goto out_now; } added++; - } else if (sa->sa_family == AF_INET6) { + break; +#endif +#ifdef INET6 + case AF_INET6: incr = sizeof(struct sockaddr_in6); - if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) { + sin6 = (struct sockaddr_in6 *)sa; + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || + IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { + SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, EINVAL); + (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_8); + *error = EINVAL; + goto out_now; + } + if (sctp_add_remote_addr(stcb, sa, NULL, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) { /* assoc gone no un-lock */ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS); (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_8); @@ -6363,6 +6301,10 @@ sctp_connectx_helper_add(struct sctp_tcb *stcb, struct sockaddr *addr, goto out_now; } added++; + break; +#endif + default: + break; } sa = (struct sockaddr *)((caddr_t)sa + incr); } @@ -6381,10 +6323,13 @@ sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr, at = incr = 0; sa = addr; + *error = *num_v6 = *num_v4 = 0; /* account and validate addresses */ for (i = 0; i < (size_t)*totaddr; i++) { - if (sa->sa_family == AF_INET) { + switch (sa->sa_family) { +#ifdef INET + case AF_INET: (*num_v4) += 1; incr = sizeof(struct sockaddr_in); if (sa->sa_len != incr) { @@ -6393,30 +6338,40 @@ sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr, *bad_addr = 1; return (NULL); } - } else if (sa->sa_family == AF_INET6) { - struct sockaddr_in6 *sin6; - - sin6 = (struct sockaddr_in6 *)sa; - if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { - /* Must be non-mapped for connectx */ - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL); - *error = EINVAL; - *bad_addr = 1; - return (NULL); - } - (*num_v6) += 1; - incr = sizeof(struct sockaddr_in6); - if (sa->sa_len != incr) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL); - *error = EINVAL; - *bad_addr = 1; - return (NULL); + break; +#endif +#ifdef INET6 + case AF_INET6: + { + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *)sa; + if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { + /* Must be non-mapped for connectx */ + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL); + *error = EINVAL; + *bad_addr = 1; + return (NULL); + } + (*num_v6) += 1; + incr = sizeof(struct sockaddr_in6); + if (sa->sa_len != incr) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL); + *error = EINVAL; + *bad_addr = 1; + return (NULL); + } + break; } - } else { +#endif + default: *totaddr = i; /* we are done */ break; } + if (i == (size_t)*totaddr) { + break; + } SCTP_INP_INCR_REF(inp); stcb = sctp_findassociation_ep_addr(&inp, sa, NULL, NULL, NULL); if (stcb != NULL) { @@ -6457,7 +6412,7 @@ sctp_bindx_add_address(struct socket *so, struct sctp_inpcb *inp, return; } addr_touse = sa; -#if defined(INET6) && !defined(__Userspace__) /* TODO port in6_sin6_2_sin */ +#ifdef INET6 if (sa->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; @@ -6486,6 +6441,7 @@ sctp_bindx_add_address(struct socket *so, struct sctp_inpcb *inp, } } #endif +#ifdef INET if (sa->sa_family == AF_INET) { if (sa->sa_len != sizeof(struct sockaddr_in)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL); @@ -6500,6 +6456,7 @@ sctp_bindx_add_address(struct socket *so, struct sctp_inpcb *inp, return; } } +#endif if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) { if (p == NULL) { /* Can't get proc for Net/Open BSD */ @@ -6565,7 +6522,7 @@ sctp_bindx_add_address(struct socket *so, struct sctp_inpcb *inp, * assumes all arguments are valid/checked by caller. */ void -sctp_bindx_delete_address(struct socket *so, struct sctp_inpcb *inp, +sctp_bindx_delete_address(struct sctp_inpcb *inp, struct sockaddr *sa, sctp_assoc_t assoc_id, uint32_t vrf_id, int *error) { @@ -6583,7 +6540,7 @@ sctp_bindx_delete_address(struct socket *so, struct sctp_inpcb *inp, return; } addr_touse = sa; -#if defined(INET6) && !defined(__Userspace__) /* TODO port in6_sin6_2_sin */ +#ifdef INET6 if (sa->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; @@ -6612,6 +6569,7 @@ sctp_bindx_delete_address(struct socket *so, struct sctp_inpcb *inp, } } #endif +#ifdef INET if (sa->sa_family == AF_INET) { if (sa->sa_len != sizeof(struct sockaddr_in)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL); @@ -6626,6 +6584,7 @@ sctp_bindx_delete_address(struct socket *so, struct sctp_inpcb *inp, return; } } +#endif /* * No lock required mgmt_ep_sa does its own locking. If the FIX: * below is ever changed we may need to lock before calling @@ -6658,20 +6617,12 @@ sctp_local_addr_count(struct sctp_tcb *stcb) int count = 0; /* Turn on all the appropriate scopes */ - loopback_scope = stcb->asoc.loopback_scope; - ipv4_local_scope = stcb->asoc.ipv4_local_scope; - local_scope = stcb->asoc.local_scope; - site_scope = stcb->asoc.site_scope; - ipv4_addr_legal = ipv6_addr_legal = 0; - if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { - ipv6_addr_legal = 1; - if (SCTP_IPV6_V6ONLY(stcb->sctp_ep) == 0) { - ipv4_addr_legal = 1; - } - } else { - ipv4_addr_legal = 1; - } - + loopback_scope = stcb->asoc.scope.loopback_scope; + ipv4_local_scope = stcb->asoc.scope.ipv4_local_scope; + local_scope = stcb->asoc.scope.local_scope; + site_scope = stcb->asoc.scope.site_scope; + ipv4_addr_legal = stcb->asoc.scope.ipv4_addr_legal; + ipv6_addr_legal = stcb->asoc.scope.ipv6_addr_legal; SCTP_IPI_ADDR_RLOCK(); vrf = sctp_find_vrf(stcb->asoc.vrf_id); if (vrf == NULL) { @@ -6692,6 +6643,7 @@ sctp_local_addr_count(struct sctp_tcb *stcb) if (sctp_is_addr_restricted(stcb, sctp_ifa)) continue; switch (sctp_ifa->address.sa.sa_family) { +#ifdef INET case AF_INET: if (ipv4_addr_legal) { struct sockaddr_in *sin; @@ -6714,6 +6666,7 @@ sctp_local_addr_count(struct sctp_tcb *stcb) continue; } break; +#endif #ifdef INET6 case AF_INET6: if (ipv6_addr_legal) { @@ -6808,170 +6761,181 @@ sctp_log_trace(uint32_t subsys, const char *str SCTP_UNUSED, uint32_t a, uint32_ } #endif -/* We will need to add support - * to bind the ports and such here - * so we can do UDP tunneling. In - * the mean-time, we return error - */ -#include -#include -#include -#ifdef INET6 -#include -#endif - static void sctp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *ignored) { struct ip *iph; + +#ifdef INET6 + struct ip6_hdr *ip6; + +#endif struct mbuf *sp, *last; struct udphdr *uhdr; - uint16_t port = 0, len; - int header_size = sizeof(struct udphdr) + sizeof(struct sctphdr); + uint16_t port; - /* - * Split out the mbuf chain. Leave the IP header in m, place the - * rest in the sp. - */ if ((m->m_flags & M_PKTHDR) == 0) { /* Can't handle one that is not a pkt hdr */ goto out; } - /* pull the src port */ + /* Pull the src port */ iph = mtod(m, struct ip *); uhdr = (struct udphdr *)((caddr_t)iph + off); - port = uhdr->uh_sport; + /* + * Split out the mbuf chain. Leave the IP header in m, place the + * rest in the sp. + */ sp = m_split(m, off, M_DONTWAIT); if (sp == NULL) { /* Gak, drop packet, we can't do a split */ goto out; } - if (sp->m_pkthdr.len < header_size) { - /* Gak, packet can't have an SCTP header in it - to small */ + if (sp->m_pkthdr.len < sizeof(struct udphdr) + sizeof(struct sctphdr)) { + /* Gak, packet can't have an SCTP header in it - too small */ m_freem(sp); goto out; } - /* ok now pull up the UDP header and SCTP header together */ - sp = m_pullup(sp, header_size); + /* Now pull up the UDP header and SCTP header together */ + sp = m_pullup(sp, sizeof(struct udphdr) + sizeof(struct sctphdr)); if (sp == NULL) { /* Gak pullup failed */ goto out; } - /* trim out the UDP header */ + /* Trim out the UDP header */ m_adj(sp, sizeof(struct udphdr)); /* Now reconstruct the mbuf chain */ - /* 1) find last one */ - last = m; - while (last->m_next != NULL) { - last = last->m_next; - } + for (last = m; last->m_next; last = last->m_next); last->m_next = sp; m->m_pkthdr.len += sp->m_pkthdr.len; - last = m; - while (last != NULL) { - last = last->m_next; - } - /* Now its ready for sctp_input or sctp6_input */ iph = mtod(m, struct ip *); switch (iph->ip_v) { +#ifdef INET case IPVERSION: - { - /* its IPv4 */ - len = SCTP_GET_IPV4_LENGTH(iph); - len -= sizeof(struct udphdr); - SCTP_GET_IPV4_LENGTH(iph) = len; - sctp_input_with_port(m, off, port); - break; - } + iph->ip_len -= sizeof(struct udphdr); + sctp_input_with_port(m, off, port); + break; +#endif #ifdef INET6 case IPV6_VERSION >> 4: - { - /* its IPv6 - NOT supported */ - goto out; - break; - - } + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - sizeof(struct udphdr)); + sctp6_input_with_port(&m, &off, port); + break; #endif default: - { - m_freem(m); - break; - } + goto out; + break; } return; out: m_freem(m); } -void +void sctp_over_udp_stop(void) { - struct socket *sop; - /* * This function assumes sysctl caller holds sctp_sysctl_info_lock() * for writting! */ - if (SCTP_BASE_INFO(udp_tun_socket) == NULL) { - /* Nothing to do */ - return; +#ifdef INET + if (SCTP_BASE_INFO(udp4_tun_socket) != NULL) { + soclose(SCTP_BASE_INFO(udp4_tun_socket)); + SCTP_BASE_INFO(udp4_tun_socket) = NULL; + } +#endif +#ifdef INET6 + if (SCTP_BASE_INFO(udp6_tun_socket) != NULL) { + soclose(SCTP_BASE_INFO(udp6_tun_socket)); + SCTP_BASE_INFO(udp6_tun_socket) = NULL; } - sop = SCTP_BASE_INFO(udp_tun_socket); - soclose(sop); - SCTP_BASE_INFO(udp_tun_socket) = NULL; +#endif } -int + +int sctp_over_udp_start(void) { uint16_t port; int ret; + +#ifdef INET struct sockaddr_in sin; - struct socket *sop = NULL; - struct thread *th; - struct ucred *cred; +#endif +#ifdef INET6 + struct sockaddr_in6 sin6; + +#endif /* * This function assumes sysctl caller holds sctp_sysctl_info_lock() * for writting! */ port = SCTP_BASE_SYSCTL(sctp_udp_tunneling_port); - if (port == 0) { + if (ntohs(port) == 0) { /* Must have a port set */ return (EINVAL); } - if (SCTP_BASE_INFO(udp_tun_socket) != NULL) { +#ifdef INET + if (SCTP_BASE_INFO(udp4_tun_socket) != NULL) { + /* Already running -- must stop first */ + return (EALREADY); + } +#endif +#ifdef INET6 + if (SCTP_BASE_INFO(udp6_tun_socket) != NULL) { /* Already running -- must stop first */ return (EALREADY); } - th = curthread; - cred = th->td_ucred; - if ((ret = socreate(PF_INET, &sop, - SOCK_DGRAM, IPPROTO_UDP, cred, th))) { +#endif +#ifdef INET + if ((ret = socreate(PF_INET, &SCTP_BASE_INFO(udp4_tun_socket), + SOCK_DGRAM, IPPROTO_UDP, + curthread->td_ucred, curthread))) { + sctp_over_udp_stop(); return (ret); } - SCTP_BASE_INFO(udp_tun_socket) = sop; - /* call the special UDP hook */ - ret = udp_set_kernel_tunneling(sop, sctp_recv_udp_tunneled_packet); - if (ret) { - goto exit_stage_left; + /* Call the special UDP hook. */ + if ((ret = udp_set_kernel_tunneling(SCTP_BASE_INFO(udp4_tun_socket), + sctp_recv_udp_tunneled_packet))) { + sctp_over_udp_stop(); + return (ret); } - /* Ok we have a socket, bind it to the port */ - memset(&sin, 0, sizeof(sin)); - sin.sin_len = sizeof(sin); + /* Ok, we have a socket, bind it to the port. */ + memset(&sin, 0, sizeof(struct sockaddr_in)); + sin.sin_len = sizeof(struct sockaddr_in); sin.sin_family = AF_INET; sin.sin_port = htons(port); - ret = sobind(sop, (struct sockaddr *)&sin, th); - if (ret) { - /* Close up we cant get the port */ -exit_stage_left: + if ((ret = sobind(SCTP_BASE_INFO(udp4_tun_socket), + (struct sockaddr *)&sin, curthread))) { sctp_over_udp_stop(); return (ret); } - /* - * Ok we should now get UDP packets directly to our input routine - * sctp_recv_upd_tunneled_packet(). - */ +#endif +#ifdef INET6 + if ((ret = socreate(PF_INET6, &SCTP_BASE_INFO(udp6_tun_socket), + SOCK_DGRAM, IPPROTO_UDP, + curthread->td_ucred, curthread))) { + sctp_over_udp_stop(); + return (ret); + } + /* Call the special UDP hook. */ + if ((ret = udp_set_kernel_tunneling(SCTP_BASE_INFO(udp6_tun_socket), + sctp_recv_udp_tunneled_packet))) { + sctp_over_udp_stop(); + return (ret); + } + /* Ok, we have a socket, bind it to the port. */ + memset(&sin6, 0, sizeof(struct sockaddr_in6)); + sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_family = AF_INET6; + sin6.sin6_port = htons(port); + if ((ret = sobind(SCTP_BASE_INFO(udp6_tun_socket), + (struct sockaddr *)&sin6, curthread))) { + sctp_over_udp_stop(); + return (ret); + } +#endif return (0); } diff --git a/freebsd/sys/netinet/sctputil.h b/freebsd/sys/netinet/sctputil.h index d109ec23..411bfafc 100644 --- a/freebsd/sys/netinet/sctputil.h +++ b/freebsd/sys/netinet/sctputil.h @@ -1,15 +1,17 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. + * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. + * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived @@ -28,14 +30,11 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ - -/* $KAME: sctputil.h,v 1.15 2005/03/06 16:04:19 itojun Exp $ */ - #include __FBSDID("$FreeBSD$"); -#ifndef __sctputil_h__ -#define __sctputil_h__ +#ifndef _NETINET_SCTP_UTIL_H_ +#define _NETINET_SCTP_UTIL_H_ #if defined(_KERNEL) || defined(__Userspace__) @@ -85,6 +84,12 @@ int sctp_init_asoc(struct sctp_inpcb *, struct sctp_tcb *, uint32_t, uint32_t); void sctp_fill_random_store(struct sctp_pcb *); +void +sctp_notify_stream_reset_add(struct sctp_tcb *stcb, uint16_t numberin, + uint16_t numberout, int flag); +void + sctp_notify_stream_reset_tsn(struct sctp_tcb *stcb, uint32_t sending_tsn, uint32_t recv_tsn, int flag); + void sctp_timer_start(int, struct sctp_inpcb *, struct sctp_tcb *, struct sctp_nets *); @@ -125,14 +130,14 @@ sctp_append_to_readq(struct sctp_inpcb *inp, void sctp_iterator_worker(void); uint32_t sctp_get_prev_mtu(uint32_t); -uint32_t sctp_get_next_mtu(struct sctp_inpcb *, uint32_t); +uint32_t sctp_get_next_mtu(uint32_t); void sctp_timeout_handler(void *); uint32_t sctp_calculate_rto(struct sctp_tcb *, struct sctp_association *, - struct sctp_nets *, struct timeval *, int); + struct sctp_nets *, struct timeval *, int, int); uint32_t sctp_calculate_len(struct mbuf *); @@ -162,7 +167,7 @@ sctp_pull_off_control_to_new_inp(struct sctp_inpcb *old_inp, void sctp_stop_timers_for_shutdown(struct sctp_tcb *); void -sctp_report_all_outbound(struct sctp_tcb *, int, int +sctp_report_all_outbound(struct sctp_tcb *, uint16_t, int, int #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) SCTP_UNUSED #endif @@ -171,7 +176,8 @@ sctp_report_all_outbound(struct sctp_tcb *, int, int int sctp_expand_mapping_array(struct sctp_association *, uint32_t); void -sctp_abort_notification(struct sctp_tcb *, int, int +sctp_abort_notification(struct sctp_tcb *, uint8_t, uint16_t, + struct sctp_abort_chunk *, int #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) SCTP_UNUSED #endif @@ -179,13 +185,16 @@ sctp_abort_notification(struct sctp_tcb *, int, int /* We abort responding to an IP packet for some reason */ void -sctp_abort_association(struct sctp_inpcb *, struct sctp_tcb *, - struct mbuf *, int, struct sctphdr *, struct mbuf *, uint32_t, uint16_t); +sctp_abort_association(struct sctp_inpcb *, struct sctp_tcb *, struct mbuf *, + int, struct sockaddr *, struct sockaddr *, + struct sctphdr *, struct mbuf *, + uint8_t, uint32_t, + uint32_t, uint16_t); /* We choose to abort via user input */ void -sctp_abort_an_association(struct sctp_inpcb *, struct sctp_tcb *, int, +sctp_abort_an_association(struct sctp_inpcb *, struct sctp_tcb *, struct mbuf *, int #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) SCTP_UNUSED @@ -193,8 +202,11 @@ sctp_abort_an_association(struct sctp_inpcb *, struct sctp_tcb *, int, ); void -sctp_handle_ootb(struct mbuf *, int, int, struct sctphdr *, - struct sctp_inpcb *, struct mbuf *, uint32_t, uint16_t); +sctp_handle_ootb(struct mbuf *, int, int, + struct sockaddr *, struct sockaddr *, + struct sctphdr *, struct sctp_inpcb *, + uint8_t, uint32_t, + uint32_t, uint16_t); int sctp_connectx_helper_add(struct sctp_tcb *stcb, struct sockaddr *addr, @@ -231,11 +243,10 @@ struct sockaddr_in6 * int sctp_cmpaddr(struct sockaddr *, struct sockaddr *); void sctp_print_address(struct sockaddr *); -void sctp_print_address_pkt(struct ip *, struct sctphdr *); int sctp_release_pr_sctp_chunk(struct sctp_tcb *, struct sctp_tmit_chunk *, - int, int + uint8_t, int #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) SCTP_UNUSED #endif @@ -248,7 +259,7 @@ sctp_bindx_add_address(struct socket *so, struct sctp_inpcb *inp, struct sockaddr *sa, sctp_assoc_t assoc_id, uint32_t vrf_id, int *error, void *p); void -sctp_bindx_delete_address(struct socket *so, struct sctp_inpcb *inp, +sctp_bindx_delete_address(struct sctp_inpcb *inp, struct sockaddr *sa, sctp_assoc_t assoc_id, uint32_t vrf_id, int *error); @@ -311,7 +322,7 @@ do { \ } \ } while (0) -/* new functions to start/stop udp tunneling */ +/* functions to start/stop udp tunneling */ void sctp_over_udp_stop(void); int sctp_over_udp_start(void); @@ -322,26 +333,11 @@ sctp_soreceive(struct socket *so, struct sockaddr **psa, struct mbuf **controlp, int *flagsp); - -/* For those not passing mbufs, this does the - * translations for you. Caller owns memory - * of size controllen returned in controlp. - */ -int -sctp_l_soreceive(struct socket *so, - struct sockaddr **name, - struct uio *uio, - char **controlp, - int *controllen, - int *flag); - - void sctp_misc_ints(uint8_t from, uint32_t a, uint32_t b, uint32_t c, uint32_t d); void sctp_wakeup_log(struct sctp_tcb *stcb, - uint32_t cumtsn, uint32_t wake_cnt, int from); void sctp_log_strm_del_alt(struct sctp_tcb *stcb, uint32_t, uint16_t, uint16_t, int); @@ -367,7 +363,7 @@ void sctp_log_closing(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int16_t loc void sctp_log_lock(struct sctp_inpcb *inp, struct sctp_tcb *stcb, uint8_t from); void sctp_log_maxburst(struct sctp_tcb *stcb, struct sctp_nets *, int, int, uint8_t); -void sctp_log_block(uint8_t, struct socket *, struct sctp_association *, int); +void sctp_log_block(uint8_t, struct sctp_association *, int); void sctp_log_rwnd(uint8_t, uint32_t, uint32_t, uint32_t); void sctp_log_mbcnt(uint8_t, uint32_t, uint32_t, uint32_t, uint32_t); void sctp_log_rwnd_set(uint8_t, uint32_t, uint32_t, uint32_t, uint32_t); @@ -386,7 +382,5 @@ sctp_auditing(int, struct sctp_inpcb *, struct sctp_tcb *, void sctp_audit_log(uint8_t, uint8_t); #endif - - #endif /* _KERNEL */ #endif diff --git a/freebsd/sys/netinet/tcp.h b/freebsd/sys/netinet/tcp.h index 8779582a..affc4df0 100644 --- a/freebsd/sys/netinet/tcp.h +++ b/freebsd/sys/netinet/tcp.h @@ -140,16 +140,21 @@ struct tcphdr { #endif /* __BSD_VISIBLE */ /* - * User-settable options (used with setsockopt). + * User-settable options (used with setsockopt). These are discrete + * values and are not masked together. Some values appear to be + * bitmasks for historical reasons. */ -#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ +#define TCP_NODELAY 1 /* don't delay send to coalesce packets */ #if __BSD_VISIBLE -#define TCP_MAXSEG 0x02 /* set maximum segment size */ -#define TCP_NOPUSH 0x04 /* don't push last block of write */ -#define TCP_NOOPT 0x08 /* don't use TCP options */ -#define TCP_MD5SIG 0x10 /* use MD5 digests (RFC2385) */ -#define TCP_INFO 0x20 /* retrieve tcp_info structure */ -#define TCP_CONGESTION 0x40 /* get/set congestion control algorithm */ +#define TCP_MAXSEG 2 /* set maximum segment size */ +#define TCP_NOPUSH 4 /* don't push last block of write */ +#define TCP_NOOPT 8 /* don't use TCP options */ +#define TCP_MD5SIG 16 /* use MD5 digests (RFC2385) */ +#define TCP_INFO 32 /* retrieve tcp_info structure */ +#define TCP_CONGESTION 64 /* get/set congestion control algorithm */ + +/* Start of reserved space for third-party user-settable options. */ +#define TCP_VENDOR SO_VENDOR #define TCP_CA_NAME_MAX 16 /* max congestion control name length */ @@ -217,9 +222,12 @@ struct tcp_info { u_int32_t tcpi_snd_nxt; /* Next egress seqno */ u_int32_t tcpi_rcv_nxt; /* Next ingress seqno */ u_int32_t tcpi_toe_tid; /* HWTID for TOE endpoints */ + u_int32_t tcpi_snd_rexmitpack; /* Retransmitted packets */ + u_int32_t tcpi_rcv_ooopack; /* Out-of-order packets */ + u_int32_t tcpi_snd_zerowin; /* Zero-sized windows sent */ /* Padding to grow without breaking ABI. */ - u_int32_t __tcpi_pad[29]; /* Padding. */ + u_int32_t __tcpi_pad[26]; /* Padding. */ }; #endif diff --git a/freebsd/sys/netinet/tcp_hostcache.c b/freebsd/sys/netinet/tcp_hostcache.c index 352df1dd..69cb1a74 100644 --- a/freebsd/sys/netinet/tcp_hostcache.c +++ b/freebsd/sys/netinet/tcp_hostcache.c @@ -123,19 +123,19 @@ static void tcp_hc_purge(void *); SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hostcache, CTLFLAG_RW, 0, "TCP Host cache"); -SYSCTL_VNET_INT(_net_inet_tcp_hostcache, OID_AUTO, cachelimit, CTLFLAG_RDTUN, +SYSCTL_VNET_UINT(_net_inet_tcp_hostcache, OID_AUTO, cachelimit, CTLFLAG_RDTUN, &VNET_NAME(tcp_hostcache.cache_limit), 0, "Overall entry limit for hostcache"); -SYSCTL_VNET_INT(_net_inet_tcp_hostcache, OID_AUTO, hashsize, CTLFLAG_RDTUN, +SYSCTL_VNET_UINT(_net_inet_tcp_hostcache, OID_AUTO, hashsize, CTLFLAG_RDTUN, &VNET_NAME(tcp_hostcache.hashsize), 0, "Size of TCP hostcache hashtable"); -SYSCTL_VNET_INT(_net_inet_tcp_hostcache, OID_AUTO, bucketlimit, +SYSCTL_VNET_UINT(_net_inet_tcp_hostcache, OID_AUTO, bucketlimit, CTLFLAG_RDTUN, &VNET_NAME(tcp_hostcache.bucket_limit), 0, "Per-bucket hash limit for hostcache"); -SYSCTL_VNET_INT(_net_inet_tcp_hostcache, OID_AUTO, count, CTLFLAG_RD, +SYSCTL_VNET_UINT(_net_inet_tcp_hostcache, OID_AUTO, count, CTLFLAG_RD, &VNET_NAME(tcp_hostcache.cache_count), 0, "Current number of entries in hostcache"); @@ -176,6 +176,7 @@ static MALLOC_DEFINE(M_HOSTCACHE, "hostcache", "TCP hostcache"); void tcp_hc_init(void) { + u_int cache_limit; int i; /* @@ -184,23 +185,27 @@ tcp_hc_init(void) V_tcp_hostcache.cache_count = 0; V_tcp_hostcache.hashsize = TCP_HOSTCACHE_HASHSIZE; V_tcp_hostcache.bucket_limit = TCP_HOSTCACHE_BUCKETLIMIT; - V_tcp_hostcache.cache_limit = - V_tcp_hostcache.hashsize * V_tcp_hostcache.bucket_limit; V_tcp_hostcache.expire = TCP_HOSTCACHE_EXPIRE; V_tcp_hostcache.prune = TCP_HOSTCACHE_PRUNE; TUNABLE_INT_FETCH("net.inet.tcp.hostcache.hashsize", &V_tcp_hostcache.hashsize); - TUNABLE_INT_FETCH("net.inet.tcp.hostcache.cachelimit", - &V_tcp_hostcache.cache_limit); - TUNABLE_INT_FETCH("net.inet.tcp.hostcache.bucketlimit", - &V_tcp_hostcache.bucket_limit); if (!powerof2(V_tcp_hostcache.hashsize)) { printf("WARNING: hostcache hash size is not a power of 2.\n"); V_tcp_hostcache.hashsize = TCP_HOSTCACHE_HASHSIZE; /* default */ } V_tcp_hostcache.hashmask = V_tcp_hostcache.hashsize - 1; + TUNABLE_INT_FETCH("net.inet.tcp.hostcache.bucketlimit", + &V_tcp_hostcache.bucket_limit); + + cache_limit = V_tcp_hostcache.hashsize * V_tcp_hostcache.bucket_limit; + V_tcp_hostcache.cache_limit = cache_limit; + TUNABLE_INT_FETCH("net.inet.tcp.hostcache.cachelimit", + &V_tcp_hostcache.cache_limit); + if (V_tcp_hostcache.cache_limit > cache_limit) + V_tcp_hostcache.cache_limit = cache_limit; + /* * Allocate the hash table. */ @@ -626,7 +631,7 @@ sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS) msec(hc_entry->rmx_rtt * (RTM_RTTUNIT / (hz * TCP_RTT_SCALE))), msec(hc_entry->rmx_rttvar * - (RTM_RTTUNIT / (hz * TCP_RTT_SCALE))), + (RTM_RTTUNIT / (hz * TCP_RTTVAR_SCALE))), hc_entry->rmx_bandwidth * 8, hc_entry->rmx_cwnd, hc_entry->rmx_sendpipe, diff --git a/freebsd/sys/netinet/tcp_input.c b/freebsd/sys/netinet/tcp_input.c index 8feb571f..508f6bf2 100644 --- a/freebsd/sys/netinet/tcp_input.c +++ b/freebsd/sys/netinet/tcp_input.c @@ -3,6 +3,20 @@ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 * The Regents of the University of California. All rights reserved. + * Copyright (c) 2007-2008,2010 + * Swinburne University of Technology, Melbourne, Australia. + * Copyright (c) 2009-2010 Lawrence Stewart + * Copyright (c) 2010 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed at the Centre for Advanced Internet + * Architectures, Swinburne University of Technology, by Lawrence Stewart, + * James Healy and David Hayes, made possible in part by a grant from the Cisco + * University Research Program Fund at Community Foundation Silicon Valley. + * + * Portions of this software were developed at the Centre for Advanced + * Internet Architectures, Swinburne University of Technology, Melbourne, + * Australia by David Hayes under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -42,6 +56,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include /* for proc0 declaration */ @@ -63,6 +78,7 @@ __FBSDID("$FreeBSD$"); #define TCPSTATES /* for logging */ +#include #include #include #include @@ -77,7 +93,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include @@ -98,7 +113,7 @@ __FBSDID("$FreeBSD$"); #include -static const int tcprexmtthresh = 3; +const int tcprexmtthresh = 3; VNET_DEFINE(struct tcpstat, tcpstat); SYSCTL_VNET_STRUCT(_net_inet_tcp, TCPCTL_STATS, stats, CTLFLAG_RW, @@ -134,19 +149,16 @@ SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, rfc3042, CTLFLAG_RW, "Enable RFC 3042 (Limited Transmit)"); VNET_DEFINE(int, tcp_do_rfc3390) = 1; -#define V_tcp_do_rfc3390 VNET(tcp_do_rfc3390) SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_RW, &VNET_NAME(tcp_do_rfc3390), 0, "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)"); VNET_DEFINE(int, tcp_do_rfc3465) = 1; -#define V_tcp_do_rfc3465 VNET(tcp_do_rfc3465) SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_RW, &VNET_NAME(tcp_do_rfc3465), 0, "Enable RFC 3465 (Appropriate Byte Counting)"); VNET_DEFINE(int, tcp_abc_l_var) = 2; -#define V_tcp_abc_l_var VNET(tcp_abc_l_var) SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, abc_l_var, CTLFLAG_RW, &VNET_NAME(tcp_abc_l_var), 2, "Cap the max cwnd increment during slow-start to this number of segments"); @@ -205,8 +217,18 @@ static void tcp_pulloutofband(struct socket *, struct tcphdr *, struct mbuf *, int); static void tcp_xmit_timer(struct tcpcb *, int); static void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *); -static void inline - tcp_congestion_exp(struct tcpcb *); +static void inline cc_ack_received(struct tcpcb *tp, struct tcphdr *th, + uint16_t type); +static void inline cc_conn_init(struct tcpcb *tp); +static void inline cc_post_recovery(struct tcpcb *tp, struct tcphdr *th); +static void inline tcp_fields_to_host(struct tcphdr *); +static void inline hhook_run_tcp_est_in(struct tcpcb *tp, + struct tcphdr *th, struct tcpopt *to); +#ifdef TCP_SIGNATURE +static void inline tcp_fields_to_net(struct tcphdr *); +static int inline tcp_signature_verify_input(struct mbuf *, int, int, + int, struct tcpopt *, struct tcphdr *, u_int); +#endif /* * Kernel module interface for updating tcpstat. The argument is an index @@ -222,21 +244,244 @@ kmod_tcpstat_inc(int statnum) (*((u_long *)&V_tcpstat + statnum))++; } +/* + * Wrapper for the TCP established input helper hook. + */ static void inline -tcp_congestion_exp(struct tcpcb *tp) +hhook_run_tcp_est_in(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to) { - u_int win; - - win = min(tp->snd_wnd, tp->snd_cwnd) / - 2 / tp->t_maxseg; - if (win < 2) - win = 2; - tp->snd_ssthresh = win * tp->t_maxseg; - ENTER_FASTRECOVERY(tp); - tp->snd_recover = tp->snd_max; - if (tp->t_flags & TF_ECN_PERMIT) - tp->t_flags |= TF_ECN_SND_CWR; + struct tcp_hhook_data hhook_data; + + if (V_tcp_hhh[HHOOK_TCP_EST_IN]->hhh_nhooks > 0) { + hhook_data.tp = tp; + hhook_data.th = th; + hhook_data.to = to; + + hhook_run_hooks(V_tcp_hhh[HHOOK_TCP_EST_IN], &hhook_data, + tp->osd); + } +} + +/* + * CC wrapper hook functions + */ +static void inline +cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t type) +{ + INP_WLOCK_ASSERT(tp->t_inpcb); + + tp->ccv->bytes_this_ack = BYTES_THIS_ACK(tp, th); + if (tp->snd_cwnd == min(tp->snd_cwnd, tp->snd_wnd)) + tp->ccv->flags |= CCF_CWND_LIMITED; + else + tp->ccv->flags &= ~CCF_CWND_LIMITED; + + if (type == CC_ACK) { + if (tp->snd_cwnd > tp->snd_ssthresh) { + tp->t_bytes_acked += min(tp->ccv->bytes_this_ack, + V_tcp_abc_l_var * tp->t_maxseg); + if (tp->t_bytes_acked >= tp->snd_cwnd) { + tp->t_bytes_acked -= tp->snd_cwnd; + tp->ccv->flags |= CCF_ABC_SENTAWND; + } + } else { + tp->ccv->flags &= ~CCF_ABC_SENTAWND; + tp->t_bytes_acked = 0; + } + } + + if (CC_ALGO(tp)->ack_received != NULL) { + /* XXXLAS: Find a way to live without this */ + tp->ccv->curack = th->th_ack; + CC_ALGO(tp)->ack_received(tp->ccv, type); + } +} + +static void inline +cc_conn_init(struct tcpcb *tp) +{ + struct hc_metrics_lite metrics; + struct inpcb *inp = tp->t_inpcb; + int rtt; +#ifdef INET6 + int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0; +#endif + + INP_WLOCK_ASSERT(tp->t_inpcb); + + tcp_hc_get(&inp->inp_inc, &metrics); + + if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) { + tp->t_srtt = rtt; + tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE; + TCPSTAT_INC(tcps_usedrtt); + if (metrics.rmx_rttvar) { + tp->t_rttvar = metrics.rmx_rttvar; + TCPSTAT_INC(tcps_usedrttvar); + } else { + /* default variation is +- 1 rtt */ + tp->t_rttvar = + tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE; + } + TCPT_RANGESET(tp->t_rxtcur, + ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1, + tp->t_rttmin, TCPTV_REXMTMAX); + } + if (metrics.rmx_ssthresh) { + /* + * There's some sort of gateway or interface + * buffer limit on the path. Use this to set + * the slow start threshhold, but set the + * threshold to no less than 2*mss. + */ + tp->snd_ssthresh = max(2 * tp->t_maxseg, metrics.rmx_ssthresh); + TCPSTAT_INC(tcps_usedssthresh); + } + + /* + * Set the slow-start flight size depending on whether this + * is a local network or not. + * + * Extend this so we cache the cwnd too and retrieve it here. + * Make cwnd even bigger than RFC3390 suggests but only if we + * have previous experience with the remote host. Be careful + * not make cwnd bigger than remote receive window or our own + * send socket buffer. Maybe put some additional upper bound + * on the retrieved cwnd. Should do incremental updates to + * hostcache when cwnd collapses so next connection doesn't + * overloads the path again. + * + * XXXAO: Initializing the CWND from the hostcache is broken + * and in its current form not RFC conformant. It is disabled + * until fixed or removed entirely. + * + * RFC3390 says only do this if SYN or SYN/ACK didn't got lost. + * We currently check only in syncache_socket for that. + */ +/* #define TCP_METRICS_CWND */ +#ifdef TCP_METRICS_CWND + if (metrics.rmx_cwnd) + tp->snd_cwnd = max(tp->t_maxseg, min(metrics.rmx_cwnd / 2, + min(tp->snd_wnd, so->so_snd.sb_hiwat))); + else +#endif + if (V_tcp_do_rfc3390) + tp->snd_cwnd = min(4 * tp->t_maxseg, + max(2 * tp->t_maxseg, 4380)); +#ifdef INET6 + else if ((isipv6 && in6_localaddr(&inp->in6p_faddr)) || + (!isipv6 && in_localaddr(inp->inp_faddr))) +#else + else if (in_localaddr(inp->inp_faddr)) +#endif + tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz_local; + else + tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz; + + if (CC_ALGO(tp)->conn_init != NULL) + CC_ALGO(tp)->conn_init(tp->ccv); +} + +void inline +cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type) +{ + INP_WLOCK_ASSERT(tp->t_inpcb); + + switch(type) { + case CC_NDUPACK: + if (!IN_FASTRECOVERY(tp->t_flags)) { + tp->snd_recover = tp->snd_max; + if (tp->t_flags & TF_ECN_PERMIT) + tp->t_flags |= TF_ECN_SND_CWR; + } + break; + case CC_ECN: + if (!IN_CONGRECOVERY(tp->t_flags)) { + TCPSTAT_INC(tcps_ecn_rcwnd); + tp->snd_recover = tp->snd_max; + if (tp->t_flags & TF_ECN_PERMIT) + tp->t_flags |= TF_ECN_SND_CWR; + } + break; + case CC_RTO: + tp->t_dupacks = 0; + tp->t_bytes_acked = 0; + EXIT_RECOVERY(tp->t_flags); + tp->snd_ssthresh = max(2, min(tp->snd_wnd, tp->snd_cwnd) / 2 / + tp->t_maxseg) * tp->t_maxseg; + tp->snd_cwnd = tp->t_maxseg; + break; + case CC_RTO_ERR: + TCPSTAT_INC(tcps_sndrexmitbad); + /* RTO was unnecessary, so reset everything. */ + tp->snd_cwnd = tp->snd_cwnd_prev; + tp->snd_ssthresh = tp->snd_ssthresh_prev; + tp->snd_recover = tp->snd_recover_prev; + if (tp->t_flags & TF_WASFRECOVERY) + ENTER_FASTRECOVERY(tp->t_flags); + if (tp->t_flags & TF_WASCRECOVERY) + ENTER_CONGRECOVERY(tp->t_flags); + tp->snd_nxt = tp->snd_max; + tp->t_flags &= ~TF_PREVVALID; + tp->t_badrxtwin = 0; + break; + } + + if (CC_ALGO(tp)->cong_signal != NULL) { + if (th != NULL) + tp->ccv->curack = th->th_ack; + CC_ALGO(tp)->cong_signal(tp->ccv, type); + } +} + +static void inline +cc_post_recovery(struct tcpcb *tp, struct tcphdr *th) +{ + INP_WLOCK_ASSERT(tp->t_inpcb); + + /* XXXLAS: KASSERT that we're in recovery? */ + + if (CC_ALGO(tp)->post_recovery != NULL) { + tp->ccv->curack = th->th_ack; + CC_ALGO(tp)->post_recovery(tp->ccv); + } + /* XXXLAS: EXIT_RECOVERY ? */ + tp->t_bytes_acked = 0; +} + +static inline void +tcp_fields_to_host(struct tcphdr *th) +{ + + th->th_seq = ntohl(th->th_seq); + th->th_ack = ntohl(th->th_ack); + th->th_win = ntohs(th->th_win); + th->th_urp = ntohs(th->th_urp); +} + +#ifdef TCP_SIGNATURE +static inline void +tcp_fields_to_net(struct tcphdr *th) +{ + + th->th_seq = htonl(th->th_seq); + th->th_ack = htonl(th->th_ack); + th->th_win = htons(th->th_win); + th->th_urp = htons(th->th_urp); +} + +static inline int +tcp_signature_verify_input(struct mbuf *m, int off0, int tlen, int optlen, + struct tcpopt *to, struct tcphdr *th, u_int tcpbflag) +{ + int ret; + + tcp_fields_to_net(th); + ret = tcp_signature_verify(m, off0, tlen, optlen, to, th, tcpbflag); + tcp_fields_to_host(th); + return (ret); } +#endif /* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */ #ifdef INET6 @@ -317,6 +562,9 @@ tcp_input(struct mbuf *m, int off0) int thflags; int rstreason = 0; /* For badport_bandlim accounting purposes */ uint8_t iptos; +#ifdef TCP_SIGNATURE + uint8_t sig_checked = 0; +#endif #ifdef IPFIREWALL_FORWARD struct m_tag *fwd_tag; #endif @@ -474,10 +722,7 @@ tcp_input(struct mbuf *m, int off0) /* * Convert TCP protocol specific fields to host format. */ - th->th_seq = ntohl(th->th_seq); - th->th_ack = ntohl(th->th_ack); - th->th_win = ntohs(th->th_win); - th->th_urp = ntohs(th->th_urp); + tcp_fields_to_host(th); /* * Delay dropping TCP, IP headers, IPv6 ext headers, and TCP options. @@ -819,6 +1064,26 @@ relocked: tp = intotcpcb(inp); KASSERT(tp->t_state == TCPS_SYN_RECEIVED, ("%s: ", __func__)); +#ifdef TCP_SIGNATURE + if (sig_checked == 0) { + tcp_dooptions(&to, optp, optlen, + (thflags & TH_SYN) ? TO_SYN : 0); + if (!tcp_signature_verify_input(m, off0, tlen, + optlen, &to, th, tp->t_flags)) { + + /* + * In SYN_SENT state if it receives an + * RST, it is allowed for further + * processing. + */ + if ((thflags & TH_RST) == 0 || + (tp->t_state == TCPS_SYN_SENT) == 0) + goto dropunlock; + } + sig_checked = 1; + } +#endif + /* * Process the segment and the data it * contains. tcp_do_segment() consumes @@ -1023,6 +1288,25 @@ relocked: return; } +#ifdef TCP_SIGNATURE + if (sig_checked == 0) { + tcp_dooptions(&to, optp, optlen, + (thflags & TH_SYN) ? TO_SYN : 0); + if (!tcp_signature_verify_input(m, off0, tlen, optlen, &to, + th, tp->t_flags)) { + + /* + * In SYN_SENT state if it receives an RST, it is + * allowed for further processing. + */ + if ((thflags & TH_RST) == 0 || + (tp->t_state == TCPS_SYN_SENT) == 0) + goto dropunlock; + } + sig_checked = 1; + } +#endif + /* * Segment belongs to a connection in SYN_SENT, ESTABLISHED or later * state. tcp_do_segment() always consumes the mbuf chain, unlocks @@ -1089,6 +1373,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, short ostate = 0; #endif thflags = th->th_flags; + tp->sackhint.last_sack_ack = 0; /* * If this is either a state-changing packet or current state isn't @@ -1159,14 +1444,9 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, TCPSTAT_INC(tcps_ecn_ect1); break; } - /* - * Congestion experienced. - * Ignore if we are already trying to recover. - */ - if ((thflags & TH_ECE) && - SEQ_LEQ(th->th_ack, tp->snd_recover)) { - TCPSTAT_INC(tcps_ecn_rcwnd); - tcp_congestion_exp(tp); + /* Congestion experienced. */ + if (thflags & TH_ECE) { + cc_cong_signal(tp, th, CC_ECN); } } @@ -1185,7 +1465,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, */ if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) { to.to_tsecr -= tp->ts_offset; - if (TSTMP_GT(to.to_tsecr, ticks)) + if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks())) to.to_tsecr = 0; } @@ -1210,7 +1490,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, if (to.to_flags & TOF_TS) { tp->t_flags |= TF_RCVD_TSTMP; tp->ts_recent = to.to_tsval; - tp->ts_recent_age = ticks; + tp->ts_recent_age = tcp_ts_getticks(); } if (to.to_flags & TOF_MSS) tcp_mss(tp, to.to_mss); @@ -1254,22 +1534,16 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, */ if ((to.to_flags & TOF_TS) != 0 && SEQ_LEQ(th->th_seq, tp->last_ack_sent)) { - tp->ts_recent_age = ticks; + tp->ts_recent_age = tcp_ts_getticks(); tp->ts_recent = to.to_tsval; } if (tlen == 0) { if (SEQ_GT(th->th_ack, tp->snd_una) && SEQ_LEQ(th->th_ack, tp->snd_max) && - tp->snd_cwnd >= tp->snd_wnd && - ((!V_tcp_do_newreno && - !(tp->t_flags & TF_SACK_PERMIT) && - tp->t_dupacks < tcprexmtthresh) || - ((V_tcp_do_newreno || - (tp->t_flags & TF_SACK_PERMIT)) && - !IN_FASTRECOVERY(tp) && - (to.to_flags & TOF_SACK) == 0 && - TAILQ_EMPTY(&tp->snd_holes)))) { + !IN_RECOVERY(tp->t_flags) && + (to.to_flags & TOF_SACK) == 0 && + TAILQ_EMPTY(&tp->snd_holes)) { /* * This is a pure ack for outstanding data. */ @@ -1288,16 +1562,9 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, * "bad retransmit" recovery. */ if (tp->t_rxtshift == 1 && + tp->t_flags & TF_PREVVALID && (int)(ticks - tp->t_badrxtwin) < 0) { - TCPSTAT_INC(tcps_sndrexmitbad); - tp->snd_cwnd = tp->snd_cwnd_prev; - tp->snd_ssthresh = - tp->snd_ssthresh_prev; - tp->snd_recover = tp->snd_recover_prev; - if (tp->t_flags & TF_WASFRECOVERY) - ENTER_FASTRECOVERY(tp); - tp->snd_nxt = tp->snd_max; - tp->t_badrxtwin = 0; + cc_cong_signal(tp, th, CC_RTO_ERR); } /* @@ -1310,11 +1577,13 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, */ if ((to.to_flags & TOF_TS) != 0 && to.to_tsecr) { - if (!tp->t_rttlow || - tp->t_rttlow > ticks - to.to_tsecr) - tp->t_rttlow = ticks - to.to_tsecr; + u_int t; + + t = tcp_ts_getticks() - to.to_tsecr; + if (!tp->t_rttlow || tp->t_rttlow > t) + tp->t_rttlow = t; tcp_xmit_timer(tp, - ticks - to.to_tsecr + 1); + TCP_TS_TO_TICKS(t) + 1); } else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) { if (!tp->t_rttlow || @@ -1324,13 +1593,26 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, ticks - tp->t_rtttime); } tcp_xmit_bandwidth_limit(tp, th->th_ack); - acked = th->th_ack - tp->snd_una; + acked = BYTES_THIS_ACK(tp, th); + + /* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */ + hhook_run_tcp_est_in(tp, th, &to); + TCPSTAT_INC(tcps_rcvackpack); TCPSTAT_ADD(tcps_rcvackbyte, acked); sbdrop(&so->so_snd, acked); if (SEQ_GT(tp->snd_una, tp->snd_recover) && SEQ_LEQ(th->th_ack, tp->snd_recover)) tp->snd_recover = th->th_ack - 1; + + /* + * Let the congestion control algorithm update + * congestion control related information. This + * typically means increasing the congestion + * window. + */ + cc_ack_received(tp, th, CC_ACK); + tp->snd_una = th->th_ack; /* * Pull snd_wl2 up to prevent seq wrap relative @@ -1560,7 +1842,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, (TF_RCVD_SCALE|TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; } - tp->rcv_adv += tp->rcv_wnd; + tp->rcv_adv += imin(tp->rcv_wnd, + TCP_MAXWIN << tp->rcv_scale); tp->snd_una++; /* SYN is acked */ /* * If there's data, delay ACK; if there's also a FIN @@ -1590,6 +1873,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, thflags &= ~TH_SYN; } else { tp->t_state = TCPS_ESTABLISHED; + cc_conn_init(tp); tcp_timer_activate(tp, TT_KEEP, tcp_keepidle); } } else { @@ -1770,7 +2054,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, TSTMP_LT(to.to_tsval, tp->ts_recent)) { /* Check to see if ts_recent is over 24 days old. */ - if (ticks - tp->ts_recent_age > TCP_PAWS_IDLE) { + if (tcp_ts_getticks() - tp->ts_recent_age > TCP_PAWS_IDLE) { /* * Invalidate ts_recent. If this segment updates * ts_recent, the age will be reset later and ts_recent @@ -1929,7 +2213,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, SEQ_LEQ(th->th_seq, tp->last_ack_sent) && SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen + ((thflags & (TH_SYN|TH_FIN)) != 0))) { - tp->ts_recent_age = ticks; + tp->ts_recent_age = tcp_ts_getticks(); tp->ts_recent = to.to_tsval; } @@ -1993,6 +2277,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, tp->t_flags &= ~TF_NEEDFIN; } else { tp->t_state = TCPS_ESTABLISHED; + cc_conn_init(tp); tcp_timer_activate(tp, TT_KEEP, tcp_keepidle); } /* @@ -2027,6 +2312,10 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, ((to.to_flags & TOF_SACK) || !TAILQ_EMPTY(&tp->snd_holes))) tcp_sack_doack(tp, &to, th->th_ack); + + /* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */ + hhook_run_tcp_est_in(tp, th, &to); + if (SEQ_LEQ(th->th_ack, tp->snd_una)) { if (tlen == 0 && tiwin == tp->snd_wnd) { TCPSTAT_INC(tcps_rcvdupack); @@ -2061,11 +2350,10 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, th->th_ack != tp->snd_una) tp->t_dupacks = 0; else if (++tp->t_dupacks > tcprexmtthresh || - ((V_tcp_do_newreno || - (tp->t_flags & TF_SACK_PERMIT)) && - IN_FASTRECOVERY(tp))) { + IN_FASTRECOVERY(tp->t_flags)) { + cc_ack_received(tp, th, CC_DUPACK); if ((tp->t_flags & TF_SACK_PERMIT) && - IN_FASTRECOVERY(tp)) { + IN_FASTRECOVERY(tp->t_flags)) { int awnd; /* @@ -2083,6 +2371,16 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, } } else tp->snd_cwnd += tp->t_maxseg; + if ((thflags & TH_FIN) && + (TCPS_HAVERCVDFIN(tp->t_state) == 0)) { + /* + * If its a fin we need to process + * it to avoid a race where both + * sides enter FIN-WAIT and send FIN|ACK + * at the same time. + */ + break; + } (void) tcp_output(tp); goto drop; } else if (tp->t_dupacks == tcprexmtthresh) { @@ -2096,19 +2394,20 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, * recovery. */ if (tp->t_flags & TF_SACK_PERMIT) { - if (IN_FASTRECOVERY(tp)) { + if (IN_FASTRECOVERY(tp->t_flags)) { tp->t_dupacks = 0; break; } - } else if (V_tcp_do_newreno || - V_tcp_do_ecn) { + } else { if (SEQ_LEQ(th->th_ack, tp->snd_recover)) { tp->t_dupacks = 0; break; } } - tcp_congestion_exp(tp); + /* Congestion signal before ack. */ + cc_cong_signal(tp, th, CC_NDUPACK); + cc_ack_received(tp, th, CC_DUPACK); tcp_timer_activate(tp, TT_REXMT, 0); tp->t_rtttime = 0; if (tp->t_flags & TF_SACK_PERMIT) { @@ -2121,6 +2420,16 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, } tp->snd_nxt = th->th_ack; tp->snd_cwnd = tp->t_maxseg; + if ((thflags & TH_FIN) && + (TCPS_HAVERCVDFIN(tp->t_state) == 0)) { + /* + * If its a fin we need to process + * it to avoid a race where both + * sides enter FIN-WAIT and send FIN|ACK + * at the same time. + */ + break; + } (void) tcp_output(tp); KASSERT(tp->snd_limited <= 2, ("%s: tp->snd_limited too big", @@ -2132,6 +2441,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, tp->snd_nxt = onxt; goto drop; } else if (V_tcp_do_rfc3042) { + cc_ack_received(tp, th, CC_DUPACK); u_long oldcwnd = tp->snd_cwnd; tcp_seq oldsndmax = tp->snd_max; u_int sent; @@ -2146,6 +2456,16 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, (tp->snd_nxt - tp->snd_una) + (tp->t_dupacks - tp->snd_limited) * tp->t_maxseg; + if ((thflags & TH_FIN) && + (TCPS_HAVERCVDFIN(tp->t_state) == 0)) { + /* + * If its a fin we need to process + * it to avoid a race where both + * sides enter FIN-WAIT and send FIN|ACK + * at the same time. + */ + break; + } (void) tcp_output(tp); sent = tp->snd_max - oldsndmax; if (sent > tp->t_maxseg) { @@ -2173,37 +2493,14 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, * If the congestion window was inflated to account * for the other side's cached packets, retract it. */ - if (V_tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) { - if (IN_FASTRECOVERY(tp)) { - if (SEQ_LT(th->th_ack, tp->snd_recover)) { - if (tp->t_flags & TF_SACK_PERMIT) - tcp_sack_partialack(tp, th); - else - tcp_newreno_partial_ack(tp, th); - } else { - /* - * Out of fast recovery. - * Window inflation should have left us - * with approximately snd_ssthresh - * outstanding data. - * But in case we would be inclined to - * send a burst, better to do it via - * the slow start mechanism. - */ - if (SEQ_GT(th->th_ack + - tp->snd_ssthresh, - tp->snd_max)) - tp->snd_cwnd = tp->snd_max - - th->th_ack + - tp->t_maxseg; - else - tp->snd_cwnd = tp->snd_ssthresh; - } - } - } else { - if (tp->t_dupacks >= tcprexmtthresh && - tp->snd_cwnd > tp->snd_ssthresh) - tp->snd_cwnd = tp->snd_ssthresh; + if (IN_FASTRECOVERY(tp->t_flags)) { + if (SEQ_LT(th->th_ack, tp->snd_recover)) { + if (tp->t_flags & TF_SACK_PERMIT) + tcp_sack_partialack(tp, th); + else + tcp_newreno_partial_ack(tp, th); + } else + cc_post_recovery(tp, th); } tp->t_dupacks = 0; /* @@ -2234,7 +2531,7 @@ process_ACK: ("tcp_input: process_ACK ti_locked %d", ti_locked)); INP_WLOCK_ASSERT(tp->t_inpcb); - acked = th->th_ack - tp->snd_una; + acked = BYTES_THIS_ACK(tp, th); TCPSTAT_INC(tcps_rcvackpack); TCPSTAT_ADD(tcps_rcvackbyte, acked); @@ -2245,16 +2542,9 @@ process_ACK: * original cwnd and ssthresh, and proceed to transmit where * we left off. */ - if (tp->t_rxtshift == 1 && (int)(ticks - tp->t_badrxtwin) < 0) { - TCPSTAT_INC(tcps_sndrexmitbad); - tp->snd_cwnd = tp->snd_cwnd_prev; - tp->snd_ssthresh = tp->snd_ssthresh_prev; - tp->snd_recover = tp->snd_recover_prev; - if (tp->t_flags & TF_WASFRECOVERY) - ENTER_FASTRECOVERY(tp); - tp->snd_nxt = tp->snd_max; - tp->t_badrxtwin = 0; /* XXX probably not required */ - } + if (tp->t_rxtshift == 1 && tp->t_flags & TF_PREVVALID && + (int)(ticks - tp->t_badrxtwin) < 0) + cc_cong_signal(tp, th, CC_RTO_ERR); /* * If we have a timestamp reply, update smoothed @@ -2270,11 +2560,13 @@ process_ACK: * timestamps of 0 or we could calculate a * huge RTT and blow up the retransmit timer. */ - if ((to.to_flags & TOF_TS) != 0 && - to.to_tsecr) { - if (!tp->t_rttlow || tp->t_rttlow > ticks - to.to_tsecr) - tp->t_rttlow = ticks - to.to_tsecr; - tcp_xmit_timer(tp, ticks - to.to_tsecr + 1); + if ((to.to_flags & TOF_TS) != 0 && to.to_tsecr) { + u_int t; + + t = tcp_ts_getticks() - to.to_tsecr; + if (!tp->t_rttlow || tp->t_rttlow > t) + tp->t_rttlow = t; + tcp_xmit_timer(tp, TCP_TS_TO_TICKS(t) + 1); } else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) { if (!tp->t_rttlow || tp->t_rttlow > ticks - tp->t_rtttime) tp->t_rttlow = ticks - tp->t_rtttime; @@ -2302,61 +2594,12 @@ process_ACK: goto step6; /* - * When new data is acked, open the congestion window. - * Method depends on which congestion control state we're - * in (slow start or cong avoid) and if ABC (RFC 3465) is - * enabled. - * - * slow start: cwnd <= ssthresh - * cong avoid: cwnd > ssthresh - * - * slow start and ABC (RFC 3465): - * Grow cwnd exponentially by the amount of data - * ACKed capping the max increment per ACK to - * (abc_l_var * maxseg) bytes. - * - * slow start without ABC (RFC 2581): - * Grow cwnd exponentially by maxseg per ACK. - * - * cong avoid and ABC (RFC 3465): - * Grow cwnd linearly by maxseg per RTT for each - * cwnd worth of ACKed data. - * - * cong avoid without ABC (RFC 2581): - * Grow cwnd linearly by approximately maxseg per RTT using - * maxseg^2 / cwnd per ACK as the increment. - * If cwnd > maxseg^2, fix the cwnd increment at 1 byte to - * avoid capping cwnd. + * Let the congestion control algorithm update congestion + * control related information. This typically means increasing + * the congestion window. */ - if ((!V_tcp_do_newreno && !(tp->t_flags & TF_SACK_PERMIT)) || - !IN_FASTRECOVERY(tp)) { - u_int cw = tp->snd_cwnd; - u_int incr = tp->t_maxseg; - /* In congestion avoidance? */ - if (cw > tp->snd_ssthresh) { - if (V_tcp_do_rfc3465) { - tp->t_bytes_acked += acked; - if (tp->t_bytes_acked >= tp->snd_cwnd) - tp->t_bytes_acked -= cw; - else - incr = 0; - } - else - incr = max((incr * incr / cw), 1); - /* - * In slow-start with ABC enabled and no RTO in sight? - * (Must not use abc_l_var > 1 if slow starting after an - * RTO. On RTO, snd_nxt = snd_una, so the snd_nxt == - * snd_max check is sufficient to handle this). - */ - } else if (V_tcp_do_rfc3465 && - tp->snd_nxt == tp->snd_max) - incr = min(acked, - V_tcp_abc_l_var * tp->t_maxseg); - /* ABC is on by default, so (incr == 0) frequently. */ - if (incr > 0) - tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<snd_scale); - } + cc_ack_received(tp, th, CC_ACK); + SOCKBUF_LOCK(&so->so_snd); if (acked > so->so_snd.sb_cc) { tp->snd_wnd -= so->so_snd.sb_cc; @@ -2370,16 +2613,14 @@ process_ACK: /* NB: sowwakeup_locked() does an implicit unlock. */ sowwakeup_locked(so); /* Detect una wraparound. */ - if ((V_tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) && - !IN_FASTRECOVERY(tp) && + if (!IN_RECOVERY(tp->t_flags) && SEQ_GT(tp->snd_una, tp->snd_recover) && SEQ_LEQ(th->th_ack, tp->snd_recover)) tp->snd_recover = th->th_ack - 1; - if ((V_tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) && - IN_FASTRECOVERY(tp) && + /* XXXLAS: Can this be moved up into cc_post_recovery? */ + if (IN_RECOVERY(tp->t_flags) && SEQ_GEQ(th->th_ack, tp->snd_recover)) { - EXIT_FASTRECOVERY(tp); - tp->t_bytes_acked = 0; + EXIT_RECOVERY(tp->t_flags); } tp->snd_una = th->th_ack; if (tp->t_flags & TF_SACK_PERMIT) { @@ -2609,7 +2850,10 @@ dodata: /* XXX */ * buffer size. * XXX: Unused. */ - len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt); + if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) + len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt); + else + len = so->so_rcv.sb_hiwat; #endif } else { m_freem(m); @@ -3078,22 +3322,19 @@ tcp_xmit_timer(struct tcpcb *tp, int rtt) * are present. Store the upper limit of the length of options plus * data in maxopd. * - * In case of T/TCP, we call this routine during implicit connection - * setup as well (offer = -1), to initialize maxseg from the cached - * MSS of our peer. - * * NOTE that this routine is only called when we process an incoming - * segment. Outgoing SYN/ACK MSS settings are handled in tcp_mssopt(). + * segment, or an ICMP need fragmentation datagram. Outgoing SYN/ACK MSS + * settings are handled in tcp_mssopt(). */ void -tcp_mss_update(struct tcpcb *tp, int offer, +tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer, struct hc_metrics_lite *metricptr, int *mtuflags) { int mss; u_long maxmtu; struct inpcb *inp = tp->t_inpcb; struct hc_metrics_lite metrics; - int origoffer = offer; + int origoffer; #ifdef INET6 int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0; size_t min_protoh = isipv6 ? @@ -3105,6 +3346,12 @@ tcp_mss_update(struct tcpcb *tp, int offer, INP_WLOCK_ASSERT(tp->t_inpcb); + if (mtuoffer != -1) { + KASSERT(offer == -1, ("%s: conflict", __func__)); + offer = mtuoffer - min_protoh; + } + origoffer = offer; + /* Initialize. */ #ifdef INET6 if (isipv6) { @@ -3244,24 +3491,19 @@ tcp_mss_update(struct tcpcb *tp, int offer, void tcp_mss(struct tcpcb *tp, int offer) { - int rtt, mss; + int mss; u_long bufsize; struct inpcb *inp; struct socket *so; struct hc_metrics_lite metrics; int mtuflags = 0; -#ifdef INET6 - int isipv6; -#endif + KASSERT(tp != NULL, ("%s: tp == NULL", __func__)); - tcp_mss_update(tp, offer, &metrics, &mtuflags); + tcp_mss_update(tp, offer, -1, &metrics, &mtuflags); mss = tp->t_maxseg; inp = tp->t_inpcb; -#ifdef INET6 - isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0; -#endif /* * If there's a pipesize, change the socket buffer to that size, @@ -3301,77 +3543,6 @@ tcp_mss(struct tcpcb *tp, int offer) (void)sbreserve_locked(&so->so_rcv, bufsize, so, NULL); } SOCKBUF_UNLOCK(&so->so_rcv); - /* - * While we're here, check the others too. - */ - if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) { - tp->t_srtt = rtt; - tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE; - TCPSTAT_INC(tcps_usedrtt); - if (metrics.rmx_rttvar) { - tp->t_rttvar = metrics.rmx_rttvar; - TCPSTAT_INC(tcps_usedrttvar); - } else { - /* default variation is +- 1 rtt */ - tp->t_rttvar = - tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE; - } - TCPT_RANGESET(tp->t_rxtcur, - ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1, - tp->t_rttmin, TCPTV_REXMTMAX); - } - if (metrics.rmx_ssthresh) { - /* - * There's some sort of gateway or interface - * buffer limit on the path. Use this to set - * the slow start threshhold, but set the - * threshold to no less than 2*mss. - */ - tp->snd_ssthresh = max(2 * mss, metrics.rmx_ssthresh); - TCPSTAT_INC(tcps_usedssthresh); - } - if (metrics.rmx_bandwidth) - tp->snd_bandwidth = metrics.rmx_bandwidth; - - /* - * Set the slow-start flight size depending on whether this - * is a local network or not. - * - * Extend this so we cache the cwnd too and retrieve it here. - * Make cwnd even bigger than RFC3390 suggests but only if we - * have previous experience with the remote host. Be careful - * not make cwnd bigger than remote receive window or our own - * send socket buffer. Maybe put some additional upper bound - * on the retrieved cwnd. Should do incremental updates to - * hostcache when cwnd collapses so next connection doesn't - * overloads the path again. - * - * XXXAO: Initializing the CWND from the hostcache is broken - * and in its current form not RFC conformant. It is disabled - * until fixed or removed entirely. - * - * RFC3390 says only do this if SYN or SYN/ACK didn't got lost. - * We currently check only in syncache_socket for that. - */ -/* #define TCP_METRICS_CWND */ -#ifdef TCP_METRICS_CWND - if (metrics.rmx_cwnd) - tp->snd_cwnd = max(mss, - min(metrics.rmx_cwnd / 2, - min(tp->snd_wnd, so->so_snd.sb_hiwat))); - else -#endif - if (V_tcp_do_rfc3390) - tp->snd_cwnd = min(4 * mss, max(2 * mss, 4380)); -#ifdef INET6 - else if ((isipv6 && in6_localaddr(&inp->in6p_faddr)) || - (!isipv6 && in_localaddr(inp->inp_faddr))) -#else - else if (in_localaddr(inp->inp_faddr)) -#endif - tp->snd_cwnd = mss * V_ss_fltsz_local; - else - tp->snd_cwnd = mss * V_ss_fltsz; /* Check the interface for TSO capabilities. */ if (mtuflags & CSUM_TSO) @@ -3435,7 +3606,7 @@ tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th) * Set snd_cwnd to one segment beyond acknowledged offset. * (tp->snd_una has not yet been updated when this function is called.) */ - tp->snd_cwnd = tp->t_maxseg + (th->th_ack - tp->snd_una); + tp->snd_cwnd = tp->t_maxseg + BYTES_THIS_ACK(tp, th); tp->t_flags |= TF_ACKNOW; (void) tcp_output(tp); tp->snd_cwnd = ocwnd; @@ -3445,8 +3616,8 @@ tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th) * Partial window deflation. Relies on fact that tp->snd_una * not updated yet. */ - if (tp->snd_cwnd > th->th_ack - tp->snd_una) - tp->snd_cwnd -= th->th_ack - tp->snd_una; + if (tp->snd_cwnd > BYTES_THIS_ACK(tp, th)) + tp->snd_cwnd -= BYTES_THIS_ACK(tp, th); else tp->snd_cwnd = 0; tp->snd_cwnd += tp->t_maxseg; diff --git a/freebsd/sys/netinet/tcp_lro.c b/freebsd/sys/netinet/tcp_lro.c index bbb98e60..f8f678e3 100644 --- a/freebsd/sys/netinet/tcp_lro.c +++ b/freebsd/sys/netinet/tcp_lro.c @@ -1,39 +1,33 @@ #include -/****************************************************************************** - -Copyright (c) 2007, Myricom Inc. -Copyright (c) 2008, Intel Corporation. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Neither the name of the Myricom Inc, nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - 3. Neither the name of the Intel Corporation, nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - -$FreeBSD$ -***************************************************************************/ +/*- + * Copyright (c) 2007, Myricom Inc. + * Copyright (c) 2008, Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ #include #include diff --git a/freebsd/sys/netinet/tcp_lro.h b/freebsd/sys/netinet/tcp_lro.h index 08aac690..7e498871 100644 --- a/freebsd/sys/netinet/tcp_lro.h +++ b/freebsd/sys/netinet/tcp_lro.h @@ -1,39 +1,32 @@ -/******************************************************************************* +/*- + * Copyright (c) 2006, Myricom Inc. + * Copyright (c) 2008, Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ -Copyright (c) 2006, Myricom Inc. -Copyright (c) 2008, Intel Corporation. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Neither the name of the Myricom Inc, nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - 2. Neither the name of the Intel Corporation, nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - - -$FreeBSD$ - -***************************************************************************/ #ifndef _TCP_LRO_H_ #define _TCP_LRO_H_ diff --git a/freebsd/sys/netinet/tcp_offload.h b/freebsd/sys/netinet/tcp_offload.h index 48f35ff6..313185f6 100644 --- a/freebsd/sys/netinet/tcp_offload.h +++ b/freebsd/sys/netinet/tcp_offload.h @@ -56,7 +56,7 @@ * * It is assumed that individuals deploying TOE will want connections * to be offloaded without software changes so all connections on an - * interface providing TOE are offloaded unless the the SO_NO_OFFLOAD + * interface providing TOE are offloaded unless the SO_NO_OFFLOAD * flag is set on the socket. * * diff --git a/freebsd/sys/netinet/tcp_output.c b/freebsd/sys/netinet/tcp_output.c index 55a1f6e4..8d6881d1 100644 --- a/freebsd/sys/netinet/tcp_output.c +++ b/freebsd/sys/netinet/tcp_output.c @@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -55,6 +56,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -66,7 +68,6 @@ __FBSDID("$FreeBSD$"); #include #include #endif -#include #define TCPOUTFLAGS #include #include @@ -104,11 +105,6 @@ SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, local_slowstart_flightsize, CTLFLAG_RW, &VNET_NAME(ss_fltsz_local), 1, "Slow start flight size for local networks"); -VNET_DEFINE(int, tcp_do_newreno) = 1; -SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, newreno, CTLFLAG_RW, - &VNET_NAME(tcp_do_newreno), 0, - "Enable NewReno Algorithms"); - VNET_DEFINE(int, tcp_do_tso) = 1; #define V_tcp_do_tso VNET(tcp_do_tso) SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW, @@ -133,6 +129,43 @@ SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_RW, &VNET_NAME(tcp_autosndbuf_max), 0, "Max size of automatic send buffer"); +static void inline hhook_run_tcp_est_out(struct tcpcb *tp, + struct tcphdr *th, struct tcpopt *to, + long len, int tso); +static void inline cc_after_idle(struct tcpcb *tp); + +/* + * Wrapper for the TCP established ouput helper hook. + */ +static void inline +hhook_run_tcp_est_out(struct tcpcb *tp, struct tcphdr *th, + struct tcpopt *to, long len, int tso) +{ + struct tcp_hhook_data hhook_data; + + if (V_tcp_hhh[HHOOK_TCP_EST_OUT]->hhh_nhooks > 0) { + hhook_data.tp = tp; + hhook_data.th = th; + hhook_data.to = to; + hhook_data.len = len; + hhook_data.tso = tso; + + hhook_run_hooks(V_tcp_hhh[HHOOK_TCP_EST_OUT], &hhook_data, + tp->osd); + } +} + +/* + * CC wrapper hook functions + */ +static void inline +cc_after_idle(struct tcpcb *tp) +{ + INP_WLOCK_ASSERT(tp->t_inpcb); + + if (CC_ALGO(tp)->after_idle != NULL) + CC_ALGO(tp)->after_idle(tp->ccv); +} /* * Tcp output routine: figure out what should be sent and send it. @@ -142,7 +175,7 @@ tcp_output(struct tcpcb *tp) { struct socket *so = tp->t_inpcb->inp_socket; long len, recwin, sendwin; - int off, flags, error, rw; + int off, flags, error; struct mbuf *m; struct ip *ip = NULL; struct ipovly *ipov = NULL; @@ -176,37 +209,8 @@ tcp_output(struct tcpcb *tp) * to send, then transmit; otherwise, investigate further. */ idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una); - if (idle && ticks - tp->t_rcvtime >= tp->t_rxtcur) { - /* - * If we've been idle for more than one retransmit - * timeout the old congestion window is no longer - * current and we have to reduce it to the restart - * window before we can transmit again. - * - * The restart window is the initial window or the last - * CWND, whichever is smaller. - * - * This is done to prevent us from flooding the path with - * a full CWND at wirespeed, overloading router and switch - * buffers along the way. - * - * See RFC5681 Section 4.1. "Restarting Idle Connections". - */ - if (V_tcp_do_rfc3390) - rw = min(4 * tp->t_maxseg, - max(2 * tp->t_maxseg, 4380)); -#ifdef INET6 - else if ((isipv6 ? in6_localaddr(&tp->t_inpcb->in6p_faddr) : - in_localaddr(tp->t_inpcb->inp_faddr))) -#else - else if (in_localaddr(tp->t_inpcb->inp_faddr)) -#endif - rw = V_ss_fltsz_local * tp->t_maxseg; - else - rw = V_ss_fltsz * tp->t_maxseg; - - tp->snd_cwnd = min(rw, tp->snd_cwnd); - } + if (idle && ticks - tp->t_rcvtime >= tp->t_rxtcur) + cc_after_idle(tp); tp->t_flags &= ~TF_LASTIDLE; if (idle) { if (tp->t_flags & TF_MORETOCOME) { @@ -244,7 +248,7 @@ again: sack_bytes_rxmt = 0; len = 0; p = NULL; - if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp) && + if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp->t_flags) && (p = tcp_sack_output(tp, &sack_bytes_rxmt))) { long cwin; @@ -572,14 +576,28 @@ after_sack_rexmit: * taking into account that we are limited by * TCP_MAXWIN << tp->rcv_scale. */ - long adv = min(recwin, (long)TCP_MAXWIN << tp->rcv_scale) - - (tp->rcv_adv - tp->rcv_nxt); + long adv; + int oldwin; + + adv = min(recwin, (long)TCP_MAXWIN << tp->rcv_scale); + if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) { + oldwin = (tp->rcv_adv - tp->rcv_nxt); + adv -= oldwin; + } else + oldwin = 0; + /* + * If the new window size ends up being the same as the old + * size when it is scaled, then don't force a window update. + */ + if (oldwin >> tp->rcv_scale == (adv + oldwin) >> tp->rcv_scale) + goto dontupdate; if (adv >= (long) (2 * tp->t_maxseg)) goto send; if (2 * adv >= (long) so->so_rcv.sb_hiwat) goto send; } +dontupdate: /* * Send if we owe the peer an ACK, RST, SYN, or urgent data. ACKNOW @@ -686,13 +704,13 @@ send: /* Timestamps. */ if ((tp->t_flags & TF_RCVD_TSTMP) || ((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) { - to.to_tsval = ticks + tp->ts_offset; + to.to_tsval = tcp_ts_getticks() + tp->ts_offset; to.to_tsecr = tp->ts_recent; to.to_flags |= TOF_TS; /* Set receive buffer autosizing timestamp. */ if (tp->rfbuf_ts == 0 && (so->so_rcv.sb_flags & SB_AUTOSIZE)) - tp->rfbuf_ts = ticks; + tp->rfbuf_ts = tcp_ts_getticks(); } /* Selective ACK's. */ if (tp->t_flags & TF_SACK_PERMIT) { @@ -785,6 +803,7 @@ send: if ((tp->t_flags & TF_FORCEDATA) && len == 1) TCPSTAT_INC(tcps_sndprobe); else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) { + tp->t_sndrexmitpack++; TCPSTAT_INC(tcps_sndrexmitpack); TCPSTAT_ADD(tcps_sndrexmitbyte, len); } else { @@ -985,7 +1004,8 @@ send: if (recwin < (long)(so->so_rcv.sb_hiwat / 4) && recwin < (long)tp->t_maxseg) recwin = 0; - if (recwin < (long)(tp->rcv_adv - tp->rcv_nxt)) + if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) && + recwin < (long)(tp->rcv_adv - tp->rcv_nxt)) recwin = (long)(tp->rcv_adv - tp->rcv_nxt); if (recwin > (long)TCP_MAXWIN << tp->rcv_scale) recwin = (long)TCP_MAXWIN << tp->rcv_scale; @@ -1009,9 +1029,10 @@ send: * to read more data than can be buffered prior to transmitting on * the connection. */ - if (th->th_win == 0) + if (th->th_win == 0) { + tp->t_sndzerowin++; tp->t_flags |= TF_RXWIN0SENT; - else + } else tp->t_flags &= ~TF_RXWIN0SENT; if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { th->th_urp = htons((u_short)(tp->snd_up - tp->snd_nxt)); @@ -1140,6 +1161,9 @@ timer: tp->snd_max = tp->snd_nxt + len; } + /* Run HHOOK_TCP_ESTABLISHED_OUT helper hooks. */ + hhook_run_tcp_est_out(tp, th, &to, len, tso); + #ifdef TCPDEBUG /* * Trace. @@ -1266,7 +1290,7 @@ out: */ if (tso) tp->t_flags &= ~TF_TSO; - tcp_mtudisc(tp->t_inpcb, 0); + tcp_mtudisc(tp->t_inpcb, -1); return (0); case EHOSTDOWN: case EHOSTUNREACH: @@ -1289,7 +1313,7 @@ out: * then remember the size of the advertised window. * Any pending ACK has now been sent. */ - if (recwin > 0 && SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv)) + if (recwin >= 0 && SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv)) tp->rcv_adv = tp->rcv_nxt + recwin; tp->last_ack_sent = tp->rcv_nxt; tp->t_flags &= ~(TF_ACKNOW | TF_DELACK); @@ -1302,7 +1326,7 @@ out: * on the transmitter effectively destroys the TCP window, forcing * it to four packets (1.5Kx4 = 6K window). */ - if (sendalot && (!V_tcp_do_newreno || --maxburst)) + if (sendalot && --maxburst) goto again; #endif if (sendalot) @@ -1316,6 +1340,7 @@ tcp_setpersist(struct tcpcb *tp) int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; int tt; + tp->t_flags &= ~TF_PREVVALID; if (tcp_timer_active(tp, TT_REXMT)) panic("tcp_setpersist: retransmit pending"); /* diff --git a/freebsd/sys/netinet/tcp_reass.c b/freebsd/sys/netinet/tcp_reass.c index 64aeca84..432b6ba1 100644 --- a/freebsd/sys/netinet/tcp_reass.c +++ b/freebsd/sys/netinet/tcp_reass.c @@ -84,19 +84,22 @@ SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0, static VNET_DEFINE(int, tcp_reass_maxseg) = 0; #define V_tcp_reass_maxseg VNET(tcp_reass_maxseg) -SYSCTL_VNET_PROC(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN, +SYSCTL_VNET_PROC(_net_inet_tcp_reass, OID_AUTO, maxsegments, + CTLTYPE_INT | CTLFLAG_RDTUN, &VNET_NAME(tcp_reass_maxseg), 0, &tcp_reass_sysctl_maxseg, "I", "Global maximum number of TCP Segments in Reassembly Queue"); static VNET_DEFINE(int, tcp_reass_qsize) = 0; #define V_tcp_reass_qsize VNET(tcp_reass_qsize) -SYSCTL_VNET_PROC(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD, +SYSCTL_VNET_PROC(_net_inet_tcp_reass, OID_AUTO, cursegments, + CTLTYPE_INT | CTLFLAG_RD, &VNET_NAME(tcp_reass_qsize), 0, &tcp_reass_sysctl_qsize, "I", "Global number of TCP Segments currently in Reassembly Queue"); static VNET_DEFINE(int, tcp_reass_overflows) = 0; #define V_tcp_reass_overflows VNET(tcp_reass_overflows) -SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD, +SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, overflows, + CTLTYPE_INT | CTLFLAG_RD, &VNET_NAME(tcp_reass_overflows), 0, "Global number of TCP Segment Reassembly Queue Overflows"); @@ -176,7 +179,9 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m) struct tseg_qent *nq; struct tseg_qent *te = NULL; struct socket *so = tp->t_inpcb->inp_socket; + char *s = NULL; int flags; + struct tseg_qent tqs; INP_WLOCK_ASSERT(tp->t_inpcb); @@ -214,19 +219,45 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m) TCPSTAT_INC(tcps_rcvmemdrop); m_freem(m); *tlenp = 0; + if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) { + log(LOG_DEBUG, "%s; %s: queue limit reached, " + "segment dropped\n", s, __func__); + free(s, M_TCPLOG); + } return (0); } /* * Allocate a new queue entry. If we can't, or hit the zone limit * just drop the pkt. + * + * Use a temporary structure on the stack for the missing segment + * when the zone is exhausted. Otherwise we may get stuck. */ te = uma_zalloc(V_tcp_reass_zone, M_NOWAIT); if (te == NULL) { - TCPSTAT_INC(tcps_rcvmemdrop); - m_freem(m); - *tlenp = 0; - return (0); + if (th->th_seq != tp->rcv_nxt) { + TCPSTAT_INC(tcps_rcvmemdrop); + m_freem(m); + *tlenp = 0; + if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, + NULL))) { + log(LOG_DEBUG, "%s; %s: global zone limit " + "reached, segment dropped\n", s, __func__); + free(s, M_TCPLOG); + } + return (0); + } else { + bzero(&tqs, sizeof(struct tseg_qent)); + te = &tqs; + if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, + NULL))) { + log(LOG_DEBUG, + "%s; %s: global zone limit reached, using " + "stack for missing segment\n", s, __func__); + free(s, M_TCPLOG); + } + } } tp->t_segqlen++; @@ -268,6 +299,7 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m) th->th_seq += i; } } + tp->t_rcvoopack++; TCPSTAT_INC(tcps_rcvoopack); TCPSTAT_ADD(tcps_rcvoobyte, *tlenp); @@ -302,6 +334,8 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m) if (p == NULL) { LIST_INSERT_HEAD(&tp->t_segq, te, tqe_q); } else { + KASSERT(te != &tqs, ("%s: temporary stack based entry not " + "first element in queue", __func__)); LIST_INSERT_AFTER(p, te, tqe_q); } @@ -325,7 +359,8 @@ present: m_freem(q->tqe_m); else sbappendstream_locked(&so->so_rcv, q->tqe_m); - uma_zfree(V_tcp_reass_zone, q); + if (q != &tqs) + uma_zfree(V_tcp_reass_zone, q); tp->t_segqlen--; q = nq; } while (q && q->tqe_th->th_seq == tp->rcv_nxt); diff --git a/freebsd/sys/netinet/tcp_sack.c b/freebsd/sys/netinet/tcp_sack.c index 7a7df0bb..fc93be91 100644 --- a/freebsd/sys/netinet/tcp_sack.c +++ b/freebsd/sys/netinet/tcp_sack.c @@ -427,6 +427,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) * are received. */ sblkp = &sack_blocks[num_sack_blks - 1]; /* Last SACK block */ + tp->sackhint.last_sack_ack = sblkp->end; if (SEQ_LT(tp->snd_fack, sblkp->start)) { /* * The highest SACK block is beyond fack. Append new SACK @@ -578,7 +579,7 @@ tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th) tcp_timer_activate(tp, TT_REXMT, 0); tp->t_rtttime = 0; /* Send one or 2 segments based on how much new data was acked. */ - if (((th->th_ack - tp->snd_una) / tp->t_maxseg) > 2) + if ((BYTES_THIS_ACK(tp, th) / tp->t_maxseg) > 2) num_segs = 2; tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit + (tp->snd_nxt - tp->sack_newdata) + num_segs * tp->t_maxseg); diff --git a/freebsd/sys/netinet/tcp_seq.h b/freebsd/sys/netinet/tcp_seq.h index f58b537c..51d971f2 100644 --- a/freebsd/sys/netinet/tcp_seq.h +++ b/freebsd/sys/netinet/tcp_seq.h @@ -62,7 +62,34 @@ (tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = \ (tp)->snd_recover = (tp)->iss -#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * hz) - /* timestamp wrap-around time */ +#ifdef _KERNEL +/* + * Clock macros for RFC 1323 timestamps. + */ +#define TCP_TS_TO_TICKS(_t) ((_t) * hz / 1000) + +/* Timestamp wrap-around time, 24 days. */ +#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * 1000) + +/* + * tcp_ts_getticks() in ms, should be 1ms < x < 1000ms according to RFC 1323. + * We always use 1ms granularity independent of hz. + */ +static __inline u_int +tcp_ts_getticks(void) +{ + struct timeval tv; + u_long ms; + + /* + * getmicrouptime() should be good enough for any 1-1000ms granularity. + * Do not use getmicrotime() here as it might break nfsroot/tcp. + */ + getmicrouptime(&tv); + ms = tv.tv_sec * 1000 + tv.tv_usec / 1000; + + return (ms); +} +#endif /* _KERNEL */ #endif /* _NETINET_TCP_SEQ_H_ */ diff --git a/freebsd/sys/netinet/tcp_subr.c b/freebsd/sys/netinet/tcp_subr.c index 027a2ad6..a42f2fa5 100644 --- a/freebsd/sys/netinet/tcp_subr.c +++ b/freebsd/sys/netinet/tcp_subr.c @@ -47,7 +47,9 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include +#include #include #include #include @@ -68,6 +70,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -86,7 +89,6 @@ __FBSDID("$FreeBSD$"); #include #endif #include -#include #include #include #include @@ -204,7 +206,7 @@ static int do_tcpdrain = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0, "Enable tcp_drain routine for extra help when low on mbufs"); -SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD, +SYSCTL_VNET_UINT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD, &VNET_NAME(tcbinfo.ipi_count), 0, "Number of active PCBs"); static VNET_DEFINE(int, icmp_may_rst) = 1; @@ -263,10 +265,19 @@ SYSCTL_VNET_INT(_net_inet_tcp_inflight, OID_AUTO, stab, CTLFLAG_RW, &VNET_NAME(tcp_inflight_stab), 0, "Inflight Algorithm Stabilization 20 = 2 packets"); +#ifdef TCP_SIGNATURE +static int tcp_sig_checksigs = 1; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, signature_verify_input, CTLFLAG_RW, + &tcp_sig_checksigs, 0, "Verify RFC2385 digests on inbound traffic"); +#endif + VNET_DEFINE(uma_zone_t, sack_hole_zone); #define V_sack_hole_zone VNET(sack_hole_zone) +VNET_DEFINE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST+1]); + static struct inpcb *tcp_notify(struct inpcb *, int); +static struct inpcb *tcp_mtudisc_notify(struct inpcb *, int); static void tcp_isn_tick(void *); static char * tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr); @@ -290,6 +301,8 @@ static char * tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, struct tcpcb_mem { struct tcpcb tcb; struct tcp_timer tt; + struct cc_var ccv; + struct osd osd; }; static VNET_DEFINE(uma_zone_t, tcpcb_zone); @@ -335,6 +348,14 @@ tcp_init(void) V_tcbinfo.ipi_vnet = curvnet; #endif V_tcbinfo.ipi_listhead = &V_tcb; + + if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN, + &V_tcp_hhh[HHOOK_TCP_EST_IN], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0) + printf("%s: WARNING: unable to register helper hook\n", __func__); + if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT, + &V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0) + printf("%s: WARNING: unable to register helper hook\n", __func__); + hashsize = TCBHASHSIZE; TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &hashsize); if (!powerof2(hashsize)) { @@ -705,6 +726,32 @@ tcp_newtcpcb(struct inpcb *inp) if (tm == NULL) return (NULL); tp = &tm->tcb; + + /* Initialise cc_var struct for this tcpcb. */ + tp->ccv = &tm->ccv; + tp->ccv->type = IPPROTO_TCP; + tp->ccv->ccvc.tcp = tp; + + /* + * Use the current system default CC algorithm. + */ + CC_LIST_RLOCK(); + KASSERT(!STAILQ_EMPTY(&cc_list), ("cc_list is empty!")); + CC_ALGO(tp) = CC_DEFAULT(); + CC_LIST_RUNLOCK(); + + if (CC_ALGO(tp)->cb_init != NULL) + if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) { + uma_zfree(V_tcpcb_zone, tm); + return (NULL); + } + + tp->osd = &tm->osd; + if (khelp_init_osd(HELPER_CLASS_TCP, tp->osd)) { + uma_zfree(V_tcpcb_zone, tm); + return (NULL); + } + #ifdef VIMAGE tp->t_vnet = inp->inp_vnet; #endif @@ -753,6 +800,69 @@ tcp_newtcpcb(struct inpcb *inp) return (tp); /* XXX */ } +/* + * Switch the congestion control algorithm back to NewReno for any active + * control blocks using an algorithm which is about to go away. + * This ensures the CC framework can allow the unload to proceed without leaving + * any dangling pointers which would trigger a panic. + * Returning non-zero would inform the CC framework that something went wrong + * and it would be unsafe to allow the unload to proceed. However, there is no + * way for this to occur with this implementation so we always return zero. + */ +int +tcp_ccalgounload(struct cc_algo *unload_algo) +{ + struct cc_algo *tmpalgo; + struct inpcb *inp; + struct tcpcb *tp; + VNET_ITERATOR_DECL(vnet_iter); + + /* + * Check all active control blocks across all network stacks and change + * any that are using "unload_algo" back to NewReno. If "unload_algo" + * requires cleanup code to be run, call it. + */ + VNET_LIST_RLOCK(); + VNET_FOREACH(vnet_iter) { + CURVNET_SET(vnet_iter); + INP_INFO_RLOCK(&V_tcbinfo); + /* + * New connections already part way through being initialised + * with the CC algo we're removing will not race with this code + * because the INP_INFO_WLOCK is held during initialisation. We + * therefore don't enter the loop below until the connection + * list has stabilised. + */ + LIST_FOREACH(inp, &V_tcb, inp_list) { + INP_WLOCK(inp); + /* Important to skip tcptw structs. */ + if (!(inp->inp_flags & INP_TIMEWAIT) && + (tp = intotcpcb(inp)) != NULL) { + /* + * By holding INP_WLOCK here, we are assured + * that the connection is not currently + * executing inside the CC module's functions + * i.e. it is safe to make the switch back to + * NewReno. + */ + if (CC_ALGO(tp) == unload_algo) { + tmpalgo = CC_ALGO(tp); + /* NewReno does not require any init. */ + CC_ALGO(tp) = &newreno_cc_algo; + if (tmpalgo->cb_destroy != NULL) + tmpalgo->cb_destroy(tp->ccv); + } + } + INP_WUNLOCK(inp); + } + INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); + } + VNET_LIST_RUNLOCK(); + + return (0); +} + /* * Drop a TCP connection, reporting * the specified error. If connection is synchronized, @@ -863,6 +973,14 @@ tcp_discardcb(struct tcpcb *tp) tcp_offload_detach(tp); tcp_free_sackholes(tp); + + /* Allow the CC algorithm to clean up after itself. */ + if (CC_ALGO(tp)->cb_destroy != NULL) + CC_ALGO(tp)->cb_destroy(tp->ccv); + + khelp_destroy_osd(tp->osd); + + CC_ALGO(tp) = NULL; inp->inp_ppcb = NULL; tp->t_inpcb = NULL; uma_zfree(V_tcpcb_zone, tp); @@ -1135,7 +1253,8 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) return (error); } -SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0, +SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, + CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0, tcp_pcblist, "S,xtcpcb", "List of active TCP connections"); static int @@ -1260,7 +1379,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) return; if (cmd == PRC_MSGSIZE) - notify = tcp_mtudisc; + notify = tcp_mtudisc_notify; else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB || cmd == PRC_UNREACH_PORT || cmd == PRC_TIMXCEED_INTRANS) && ip) notify = tcp_drop_syn_sent; @@ -1327,16 +1446,17 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) mtu = V_tcp_minmss + sizeof(struct tcpiphdr); /* - * Only cache the the MTU if it + * Only cache the MTU if it * is smaller than the interface * or route MTU. tcp_mtudisc() * will do right thing by itself. */ if (mtu <= tcp_maxmtu(&inc, NULL)) tcp_hc_updatemtu(&inc, mtu); - } - - inp = (*notify)(inp, inetctlerrmap[cmd]); + tcp_mtudisc(inp, mtu); + } else + inp = (*notify)(inp, + inetctlerrmap[cmd]); } } if (inp != NULL) @@ -1375,7 +1495,7 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d) return; if (cmd == PRC_MSGSIZE) - notify = tcp_mtudisc; + notify = tcp_mtudisc_notify; else if (!PRC_IS_REDIRECT(cmd) && ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0)) return; @@ -1594,12 +1714,19 @@ tcp_drop_syn_sent(struct inpcb *inp, int errno) /* * When `need fragmentation' ICMP is received, update our idea of the MSS - * based on the new value in the route. Also nudge TCP to send something, - * since we know the packet we just sent was dropped. + * based on the new value. Also nudge TCP to send something, since we + * know the packet we just sent was dropped. * This duplicates some code in the tcp_mss() function in tcp_input.c. */ +static struct inpcb * +tcp_mtudisc_notify(struct inpcb *inp, int error) +{ + + return (tcp_mtudisc(inp, -1)); +} + struct inpcb * -tcp_mtudisc(struct inpcb *inp, int errno) +tcp_mtudisc(struct inpcb *inp, int mtuoffer) { struct tcpcb *tp; struct socket *so; @@ -1612,7 +1739,7 @@ tcp_mtudisc(struct inpcb *inp, int errno) tp = intotcpcb(inp); KASSERT(tp != NULL, ("tcp_mtudisc: tp == NULL")); - tcp_mss_update(tp, -1, NULL, NULL); + tcp_mss_update(tp, -1, mtuoffer, NULL, NULL); so = inp->inp_socket; SOCKBUF_LOCK(&so->so_snd); @@ -1627,7 +1754,7 @@ tcp_mtudisc(struct inpcb *inp, int errno) tcp_free_sackholes(tp); tp->snd_recover = tp->snd_max; if (tp->t_flags & TF_SACK_PERMIT) - EXIT_FASTRECOVERY(tp); + EXIT_FASTRECOVERY(tp->t_flags); tcp_output_send(tp); return (inp); } @@ -1689,7 +1816,7 @@ tcp_maxmtu6(struct in_conninfo *inc, int *flags) sro6.ro_dst.sin6_family = AF_INET6; sro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6); sro6.ro_dst.sin6_addr = inc->inc6_faddr; - rtalloc_ign((struct route *)&sro6, 0); + in6_rtalloc_ign(&sro6, 0, inc->inc_fibnum); } if (sro6.ro_rt != NULL) { ifp = sro6.ro_rt->rt_ifp; @@ -2090,6 +2217,66 @@ tcp_signature_compute(struct mbuf *m, int _unused, int len, int optlen, KEY_FREESAV(&sav); return (0); } + +/* + * Verify the TCP-MD5 hash of a TCP segment. (RFC2385) + * + * Parameters: + * m pointer to head of mbuf chain + * len length of TCP segment data, excluding options + * optlen length of TCP segment options + * buf pointer to storage for computed MD5 digest + * direction direction of flow (IPSEC_DIR_INBOUND or OUTBOUND) + * + * Return 1 if successful, otherwise return 0. + */ +int +tcp_signature_verify(struct mbuf *m, int off0, int tlen, int optlen, + struct tcpopt *to, struct tcphdr *th, u_int tcpbflag) +{ + char tmpdigest[TCP_SIGLEN]; + + if (tcp_sig_checksigs == 0) + return (1); + if ((tcpbflag & TF_SIGNATURE) == 0) { + if ((to->to_flags & TOF_SIGNATURE) != 0) { + + /* + * If this socket is not expecting signature but + * the segment contains signature just fail. + */ + TCPSTAT_INC(tcps_sig_err_sigopt); + TCPSTAT_INC(tcps_sig_rcvbadsig); + return (0); + } + + /* Signature is not expected, and not present in segment. */ + return (1); + } + + /* + * If this socket is expecting signature but the segment does not + * contain any just fail. + */ + if ((to->to_flags & TOF_SIGNATURE) == 0) { + TCPSTAT_INC(tcps_sig_err_nosigopt); + TCPSTAT_INC(tcps_sig_rcvbadsig); + return (0); + } + if (tcp_signature_compute(m, off0, tlen, optlen, &tmpdigest[0], + IPSEC_DIR_INBOUND) == -1) { + TCPSTAT_INC(tcps_sig_err_buildsig); + TCPSTAT_INC(tcps_sig_rcvbadsig); + return (0); + } + + if (bcmp(to->to_signature, &tmpdigest[0], TCP_SIGLEN) != 0) { + TCPSTAT_INC(tcps_sig_rcvbadsig); + return (0); + } + TCPSTAT_INC(tcps_sig_rcvgoodsig); + return (1); +} #endif /* TCP_SIGNATURE */ static int diff --git a/freebsd/sys/netinet/tcp_syncache.c b/freebsd/sys/netinet/tcp_syncache.c index 4dc1208f..46b8414c 100644 --- a/freebsd/sys/netinet/tcp_syncache.c +++ b/freebsd/sys/netinet/tcp_syncache.c @@ -150,23 +150,23 @@ static VNET_DEFINE(struct tcp_syncache, tcp_syncache); SYSCTL_NODE(_net_inet_tcp, OID_AUTO, syncache, CTLFLAG_RW, 0, "TCP SYN cache"); -SYSCTL_VNET_INT(_net_inet_tcp_syncache, OID_AUTO, bucketlimit, CTLFLAG_RDTUN, +SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OID_AUTO, bucketlimit, CTLFLAG_RDTUN, &VNET_NAME(tcp_syncache.bucket_limit), 0, "Per-bucket hash limit for syncache"); -SYSCTL_VNET_INT(_net_inet_tcp_syncache, OID_AUTO, cachelimit, CTLFLAG_RDTUN, +SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OID_AUTO, cachelimit, CTLFLAG_RDTUN, &VNET_NAME(tcp_syncache.cache_limit), 0, "Overall entry limit for syncache"); -SYSCTL_VNET_INT(_net_inet_tcp_syncache, OID_AUTO, count, CTLFLAG_RD, +SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OID_AUTO, count, CTLFLAG_RD, &VNET_NAME(tcp_syncache.cache_count), 0, "Current number of entries in syncache"); -SYSCTL_VNET_INT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_RDTUN, +SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_RDTUN, &VNET_NAME(tcp_syncache.hashsize), 0, "Size of TCP syncache hashtable"); -SYSCTL_VNET_INT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_RW, +SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_RW, &VNET_NAME(tcp_syncache.rexmt_limit), 0, "Limit on SYN/ACK retransmissions"); @@ -526,7 +526,7 @@ syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th) * used, or we are under memory pressure, a valid RST * may not find a syncache entry. In that case we're * done and no SYN|ACK retransmissions will happen. - * Otherwise the the RST was misdirected or spoofed. + * Otherwise the RST was misdirected or spoofed. */ if (sc == NULL) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) @@ -814,7 +814,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) if (sc->sc_flags & SCF_TIMESTAMP) { tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP; tp->ts_recent = sc->sc_tsreflect; - tp->ts_recent_age = ticks; + tp->ts_recent_age = tcp_ts_getticks(); tp->ts_offset = sc->sc_tsoff; } #ifdef TCP_SIGNATURE @@ -1023,7 +1023,8 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, struct syncache_head *sch; struct mbuf *ipopts = NULL; u_int32_t flowtmp; - int win, sb_hiwat, ip_ttl, ip_tos, noopt; + u_int ltflags; + int win, sb_hiwat, ip_ttl, ip_tos; char *s; #ifdef INET6 int autoflowlabel = 0; @@ -1056,7 +1057,7 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, ip_tos = inp->inp_ip_tos; win = sbspace(&so->so_rcv); sb_hiwat = so->so_rcv.sb_hiwat; - noopt = (tp->t_flags & TF_NOOPT); + ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE)); /* By the time we drop the lock these should no longer be used. */ so = NULL; @@ -1208,7 +1209,7 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, */ if (to->to_flags & TOF_TS) { sc->sc_tsreflect = to->to_tsval; - sc->sc_ts = ticks; + sc->sc_ts = tcp_ts_getticks(); sc->sc_flags |= SCF_TIMESTAMP; } if (to->to_flags & TOF_SCALE) { @@ -1251,14 +1252,14 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, * XXX: Currently we always record the option by default and will * attempt to use it in syncache_respond(). */ - if (to->to_flags & TOF_SIGNATURE) + if (to->to_flags & TOF_SIGNATURE || ltflags & TF_SIGNATURE) sc->sc_flags |= SCF_SIGNATURE; #endif if (to->to_flags & TOF_SACKPERM) sc->sc_flags |= SCF_SACK; if (to->to_flags & TOF_MSS) sc->sc_peer_mss = to->to_mss; /* peer mss may be zero */ - if (noopt) + if (ltflags & TF_NOOPT) sc->sc_flags |= SCF_NOOPT; if ((th->th_flags & (TH_ECE|TH_CWR)) && V_tcp_do_ecn) sc->sc_flags |= SCF_ECN; @@ -1639,7 +1640,7 @@ syncookie_generate(struct syncache_head *sch, struct syncache *sc, data |= md5_buffer[2] << 10; /* more digest bits */ data ^= md5_buffer[3]; sc->sc_ts = data; - sc->sc_tsoff = data - ticks; /* after XOR */ + sc->sc_tsoff = data - tcp_ts_getticks(); /* after XOR */ } TCPSTAT_INC(tcps_sc_sendcookie); @@ -1724,7 +1725,7 @@ syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch, sc->sc_flags |= SCF_TIMESTAMP; sc->sc_tsreflect = to->to_tsval; sc->sc_ts = to->to_tsecr; - sc->sc_tsoff = to->to_tsecr - ticks; + sc->sc_tsoff = to->to_tsecr - tcp_ts_getticks(); sc->sc_flags |= (data & 0x1) ? SCF_SIGNATURE : 0; sc->sc_flags |= ((data >> 1) & 0x1) ? SCF_SACK : 0; sc->sc_requested_s_scale = min((data >> 2) & 0xf, diff --git a/freebsd/sys/netinet/tcp_timer.c b/freebsd/sys/netinet/tcp_timer.c index cbf9206f..4fe0645a 100644 --- a/freebsd/sys/netinet/tcp_timer.c +++ b/freebsd/sys/netinet/tcp_timer.c @@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -59,7 +60,6 @@ __FBSDID("$FreeBSD$"); #include #endif #include -#include #include #include #include @@ -119,6 +119,11 @@ int tcp_maxpersistidle; /* max idle time in persist */ int tcp_maxidle; +static int tcp_rexmit_drop_options = 1; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW, + &tcp_rexmit_drop_options, 0, + "Drop TCP options from 3rd and later retransmitted SYN"); + /* * Tcp protocol timeout routine called every 500 ms. * Updates timestamps used for TCP @@ -178,13 +183,18 @@ tcp_timer_delack(void *xtp) return; } INP_WLOCK(inp); - if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_delack) - || !callout_active(&tp->t_timers->tt_delack)) { + if (callout_pending(&tp->t_timers->tt_delack) || + !callout_active(&tp->t_timers->tt_delack)) { INP_WUNLOCK(inp); CURVNET_RESTORE(); return; } callout_deactivate(&tp->t_timers->tt_delack); + if ((inp->inp_flags & INP_DROPPED) != 0) { + INP_WUNLOCK(inp); + CURVNET_RESTORE(); + return; + } tp->t_flags |= TF_ACKNOW; TCPSTAT_INC(tcps_delack); @@ -224,7 +234,7 @@ tcp_timer_2msl(void *xtp) } INP_WLOCK(inp); tcp_free_sackholes(tp); - if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_2msl) || + if (callout_pending(&tp->t_timers->tt_2msl) || !callout_active(&tp->t_timers->tt_2msl)) { INP_WUNLOCK(tp->t_inpcb); INP_INFO_WUNLOCK(&V_tcbinfo); @@ -232,6 +242,12 @@ tcp_timer_2msl(void *xtp) return; } callout_deactivate(&tp->t_timers->tt_2msl); + if ((inp->inp_flags & INP_DROPPED) != 0) { + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); + return; + } /* * 2 MSL timeout in shutdown went off. If we're closed but * still waiting for peer to close and connection has been idle @@ -295,14 +311,20 @@ tcp_timer_keep(void *xtp) return; } INP_WLOCK(inp); - if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_keep) - || !callout_active(&tp->t_timers->tt_keep)) { + if (callout_pending(&tp->t_timers->tt_keep) || + !callout_active(&tp->t_timers->tt_keep)) { INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } callout_deactivate(&tp->t_timers->tt_keep); + if ((inp->inp_flags & INP_DROPPED) != 0) { + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); + return; + } /* * Keep-alive timer went off; send something * or drop connection if idle for too long. @@ -390,14 +412,20 @@ tcp_timer_persist(void *xtp) return; } INP_WLOCK(inp); - if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_persist) - || !callout_active(&tp->t_timers->tt_persist)) { + if (callout_pending(&tp->t_timers->tt_persist) || + !callout_active(&tp->t_timers->tt_persist)) { INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } callout_deactivate(&tp->t_timers->tt_persist); + if ((inp->inp_flags & INP_DROPPED) != 0) { + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); + return; + } /* * Persistance timer into zero window. * Force a byte to be output, if possible. @@ -463,14 +491,20 @@ tcp_timer_rexmt(void * xtp) return; } INP_WLOCK(inp); - if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_rexmt) - || !callout_active(&tp->t_timers->tt_rexmt)) { + if (callout_pending(&tp->t_timers->tt_rexmt) || + !callout_active(&tp->t_timers->tt_rexmt)) { INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } callout_deactivate(&tp->t_timers->tt_rexmt); + if ((inp->inp_flags & INP_DROPPED) != 0) { + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); + return; + } tcp_free_sackholes(tp); /* * Retransmission timer went off. Message has not @@ -499,12 +533,18 @@ tcp_timer_rexmt(void * xtp) tp->snd_cwnd_prev = tp->snd_cwnd; tp->snd_ssthresh_prev = tp->snd_ssthresh; tp->snd_recover_prev = tp->snd_recover; - if (IN_FASTRECOVERY(tp)) - tp->t_flags |= TF_WASFRECOVERY; + if (IN_FASTRECOVERY(tp->t_flags)) + tp->t_flags |= TF_WASFRECOVERY; else - tp->t_flags &= ~TF_WASFRECOVERY; + tp->t_flags &= ~TF_WASFRECOVERY; + if (IN_CONGRECOVERY(tp->t_flags)) + tp->t_flags |= TF_WASCRECOVERY; + else + tp->t_flags &= ~TF_WASCRECOVERY; tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); - } + tp->t_flags |= TF_PREVVALID; + } else + tp->t_flags &= ~TF_PREVVALID; TCPSTAT_INC(tcps_rexmttimeo); if (tp->t_state == TCPS_SYN_SENT) rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift]; @@ -513,13 +553,14 @@ tcp_timer_rexmt(void * xtp) TCPT_RANGESET(tp->t_rxtcur, rexmt, tp->t_rttmin, TCPTV_REXMTMAX); /* - * Disable rfc1323 if we havn't got any response to + * Disable rfc1323 if we haven't got any response to * our third SYN to work-around some broken terminal servers * (most of which have hopefully been retired) that have bad VJ * header compression code which trashes TCP segments containing * unknown-to-them TCP options. */ - if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3)) + if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) && + (tp->t_rxtshift == 3)) tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP); /* * If we backed off this far, our srtt estimate is probably bogus. @@ -546,40 +587,9 @@ tcp_timer_rexmt(void * xtp) * If timing a segment in this window, stop the timer. */ tp->t_rtttime = 0; - /* - * Close the congestion window down to one segment - * (we'll open it by one segment for each ack we get). - * Since we probably have a window's worth of unacked - * data accumulated, this "slow start" keeps us from - * dumping all that data as back-to-back packets (which - * might overwhelm an intermediate gateway). - * - * There are two phases to the opening: Initially we - * open by one mss on each ack. This makes the window - * size increase exponentially with time. If the - * window is larger than the path can handle, this - * exponential growth results in dropped packet(s) - * almost immediately. To get more time between - * drops but still "push" the network to take advantage - * of improving conditions, we switch from exponential - * to linear window opening at some threshhold size. - * For a threshhold, we use half the current window - * size, truncated to a multiple of the mss. - * - * (the minimum cwnd that will give us exponential - * growth is 2 mss. We don't allow the threshhold - * to go below this.) - */ - { - u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; - if (win < 2) - win = 2; - tp->snd_cwnd = tp->t_maxseg; - tp->snd_ssthresh = win * tp->t_maxseg; - tp->t_dupacks = 0; - } - EXIT_FASTRECOVERY(tp); - tp->t_bytes_acked = 0; + + cc_cong_signal(tp, NULL, CC_RTO); + (void) tcp_output(tp); out: diff --git a/freebsd/sys/netinet/tcp_timewait.c b/freebsd/sys/netinet/tcp_timewait.c index de321e3f..73f3cfc9 100644 --- a/freebsd/sys/netinet/tcp_timewait.c +++ b/freebsd/sys/netinet/tcp_timewait.c @@ -230,7 +230,10 @@ tcp_twstart(struct tcpcb *tp) /* * Recover last window size sent. */ - tw->last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale; + if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) + tw->last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale; + else + tw->last_win = 0; /* * Set t_recent if timestamps are used on the connection. @@ -399,7 +402,7 @@ tcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct tcphdr *th, } /* - * Drop the the segment if it does not contain an ACK. + * Drop the segment if it does not contain an ACK. */ if ((thflags & TH_ACK) == 0) goto drop; @@ -534,7 +537,7 @@ tcp_twrespond(struct tcptw *tw, int flags) */ if (tw->t_recent && flags == TH_ACK) { to.to_flags |= TOF_TS; - to.to_tsval = ticks + tw->ts_offset; + to.to_tsval = tcp_ts_getticks() + tw->ts_offset; to.to_tsecr = tw->t_recent; } optlen = tcp_addoptions(&to, (u_char *)(th + 1)); diff --git a/freebsd/sys/netinet/tcp_usrreq.c b/freebsd/sys/netinet/tcp_usrreq.c index 3d803987..4b0e08e9 100644 --- a/freebsd/sys/netinet/tcp_usrreq.c +++ b/freebsd/sys/netinet/tcp_usrreq.c @@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #ifdef INET6 @@ -79,7 +80,6 @@ __FBSDID("$FreeBSD$"); #include #include #endif -#include #include #include #include @@ -204,8 +204,10 @@ tcp_detach(struct socket *so, struct inpcb *inp) tcp_discardcb(tp); in_pcbdetach(inp); in_pcbfree(inp); - } else + } else { in_pcbdetach(inp); + INP_WUNLOCK(inp); + } } } @@ -1228,6 +1230,9 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti) ti->tcpi_rcv_mss = tp->t_maxseg; if (tp->t_flags & TF_TOE) ti->tcpi_options |= TCPI_OPT_TOE; + ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack; + ti->tcpi_rcv_ooopack = tp->t_rcvoopack; + ti->tcpi_snd_zerowin = tp->t_sndzerowin; } /* @@ -1252,6 +1257,8 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt) struct inpcb *inp; struct tcpcb *tp; struct tcp_info ti; + char buf[TCP_CA_NAME_MAX]; + struct cc_algo *algo; error = 0; inp = sotoinpcb(so); @@ -1362,6 +1369,54 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt) error = EINVAL; break; + case TCP_CONGESTION: + INP_WUNLOCK(inp); + bzero(buf, sizeof(buf)); + error = sooptcopyin(sopt, &buf, sizeof(buf), 1); + if (error) + break; + INP_WLOCK_RECHECK(inp); + /* + * Return EINVAL if we can't find the requested cc algo. + */ + error = EINVAL; + CC_LIST_RLOCK(); + STAILQ_FOREACH(algo, &cc_list, entries) { + if (strncmp(buf, algo->name, TCP_CA_NAME_MAX) + == 0) { + /* We've found the requested algo. */ + error = 0; + /* + * We hold a write lock over the tcb + * so it's safe to do these things + * without ordering concerns. + */ + if (CC_ALGO(tp)->cb_destroy != NULL) + CC_ALGO(tp)->cb_destroy(tp->ccv); + CC_ALGO(tp) = algo; + /* + * If something goes pear shaped + * initialising the new algo, + * fall back to newreno (which + * does not require initialisation). + */ + if (algo->cb_init != NULL) + if (algo->cb_init(tp->ccv) > 0) { + CC_ALGO(tp) = &newreno_cc_algo; + /* + * The only reason init + * should fail is + * because of malloc. + */ + error = ENOMEM; + } + break; /* Break the STAILQ_FOREACH. */ + } + } + CC_LIST_RUNLOCK(); + INP_WUNLOCK(inp); + break; + default: INP_WUNLOCK(inp); error = ENOPROTOOPT; @@ -1405,6 +1460,12 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt) INP_WUNLOCK(inp); error = sooptcopyout(sopt, &ti, sizeof ti); break; + case TCP_CONGESTION: + bzero(buf, sizeof(buf)); + strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX); + INP_WUNLOCK(inp); + error = sooptcopyout(sopt, buf, TCP_CA_NAME_MAX); + break; default: INP_WUNLOCK(inp); error = ENOPROTOOPT; @@ -1694,10 +1755,6 @@ db_print_tflags(u_int t_flags) db_printf("%sTF_NOPUSH", comma ? ", " : ""); comma = 1; } - if (t_flags & TF_NOPUSH) { - db_printf("%sTF_NOPUSH", comma ? ", " : ""); - comma = 1; - } if (t_flags & TF_MORETOCOME) { db_printf("%sTF_MORETOCOME", comma ? ", " : ""); comma = 1; @@ -1718,6 +1775,10 @@ db_print_tflags(u_int t_flags) db_printf("%sTF_FASTRECOVERY", comma ? ", " : ""); comma = 1; } + if (t_flags & TF_CONGRECOVERY) { + db_printf("%sTF_CONGRECOVERY", comma ? ", " : ""); + comma = 1; + } if (t_flags & TF_WASFRECOVERY) { db_printf("%sTF_WASFRECOVERY", comma ? ", " : ""); comma = 1; diff --git a/freebsd/sys/netinet/tcp_var.h b/freebsd/sys/netinet/tcp_var.h index 7e186bcd..618250cd 100644 --- a/freebsd/sys/netinet/tcp_var.h +++ b/freebsd/sys/netinet/tcp_var.h @@ -70,8 +70,8 @@ struct sackhole { struct sackhint { struct sackhole *nexthole; int sack_bytes_rexmit; + tcp_seq last_sack_ack; /* Most recent/largest sacked ack */ - int ispare; /* explicit pad for 64bit alignment */ uint64_t _pad[2]; /* 1 sacked_bytes, 1 TBD */ }; @@ -196,9 +196,17 @@ struct tcpcb { void *t_toe; /* TOE pcb pointer */ int t_bytes_acked; /* # bytes acked during current RTT */ - int t_ispare; /* explicit pad for 64bit alignment */ - void *t_pspare2[6]; /* 2 CC / 4 TBD */ - uint64_t _pad[12]; /* 7 UTO, 5 TBD (1-2 CC/RTT?) */ + int t_sndzerowin; /* zero-window updates sent */ + + struct cc_algo *cc_algo; /* congestion control algorithm */ + struct cc_var *ccv; /* congestion control specific vars */ + struct osd *osd; /* storage for Khelp module data */ + + void *t_pspare2[3]; /* 3 TBD */ + uint64_t _pad[10]; /* 7 UTO, 3 TBD (1-2 CC/RTT?) */ + + uint64_t t_sndrexmitpack;/* retransmit packets sent */ + uint64_t t_rcvoopack; /* out-of-order packets received */ }; /* @@ -217,6 +225,7 @@ struct tcpcb { #define TF_NEEDSYN 0x000400 /* send SYN (implicit state) */ #define TF_NEEDFIN 0x000800 /* send FIN (implicit state) */ #define TF_NOPUSH 0x001000 /* don't push */ +#define TF_PREVVALID 0x002000 /* saved values for bad rxmit valid */ #define TF_MORETOCOME 0x010000 /* More data to be appended to sock */ #define TF_LQ_OVERFLOW 0x020000 /* listen queue overflow */ #define TF_LASTIDLE 0x040000 /* connection was previously idle */ @@ -230,10 +239,22 @@ struct tcpcb { #define TF_ECN_PERMIT 0x4000000 /* connection ECN-ready */ #define TF_ECN_SND_CWR 0x8000000 /* ECN CWR in queue */ #define TF_ECN_SND_ECE 0x10000000 /* ECN ECE in queue */ +#define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */ +#define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */ + +#define IN_FASTRECOVERY(t_flags) (t_flags & TF_FASTRECOVERY) +#define ENTER_FASTRECOVERY(t_flags) t_flags |= TF_FASTRECOVERY +#define EXIT_FASTRECOVERY(t_flags) t_flags &= ~TF_FASTRECOVERY -#define IN_FASTRECOVERY(tp) (tp->t_flags & TF_FASTRECOVERY) -#define ENTER_FASTRECOVERY(tp) tp->t_flags |= TF_FASTRECOVERY -#define EXIT_FASTRECOVERY(tp) tp->t_flags &= ~TF_FASTRECOVERY +#define IN_CONGRECOVERY(t_flags) (t_flags & TF_CONGRECOVERY) +#define ENTER_CONGRECOVERY(t_flags) t_flags |= TF_CONGRECOVERY +#define EXIT_CONGRECOVERY(t_flags) t_flags &= ~TF_CONGRECOVERY + +#define IN_RECOVERY(t_flags) (t_flags & (TF_CONGRECOVERY | TF_FASTRECOVERY)) +#define ENTER_RECOVERY(t_flags) t_flags |= (TF_CONGRECOVERY | TF_FASTRECOVERY) +#define EXIT_RECOVERY(t_flags) t_flags &= ~(TF_CONGRECOVERY | TF_FASTRECOVERY) + +#define BYTES_THIS_ACK(tp, th) (th->th_ack - tp->snd_una) /* * Flags for the t_oobflags field. @@ -466,7 +487,14 @@ struct tcpstat { u_long tcps_ecn_shs; /* ECN successful handshakes */ u_long tcps_ecn_rcwnd; /* # times ECN reduced the cwnd */ - u_long _pad[12]; /* 6 UTO, 6 TBD */ + /* TCP_SIGNATURE related stats */ + u_long tcps_sig_rcvgoodsig; /* Total matching signature received */ + u_long tcps_sig_rcvbadsig; /* Total bad signature received */ + u_long tcps_sig_err_buildsig; /* Mismatching signature received */ + u_long tcps_sig_err_sigopt; /* No signature expected by socket */ + u_long tcps_sig_err_nosigopt; /* No signature provided by segment */ + + u_long _pad[7]; /* 6 UTO, 1 TBD */ }; #ifdef _KERNEL @@ -483,6 +511,22 @@ struct tcpstat { void kmod_tcpstat_inc(int statnum); #define KMOD_TCPSTAT_INC(name) \ kmod_tcpstat_inc(offsetof(struct tcpstat, name) / sizeof(u_long)) + +/* + * TCP specific helper hook point identifiers. + */ +#define HHOOK_TCP_EST_IN 0 +#define HHOOK_TCP_EST_OUT 1 +#define HHOOK_TCP_LAST HHOOK_TCP_EST_OUT + +struct tcp_hhook_data { + struct tcpcb *tp; + struct tcphdr *th; + struct tcpopt *to; + long len; + int tso; + tcp_seq curack; +}; #endif /* @@ -553,10 +597,11 @@ VNET_DECLARE(int, tcp_mssdflt); /* XXX */ VNET_DECLARE(int, tcp_minmss); VNET_DECLARE(int, tcp_delack_enabled); VNET_DECLARE(int, tcp_do_rfc3390); -VNET_DECLARE(int, tcp_do_newreno); VNET_DECLARE(int, path_mtu_discovery); VNET_DECLARE(int, ss_fltsz); VNET_DECLARE(int, ss_fltsz_local); +VNET_DECLARE(int, tcp_do_rfc3465); +VNET_DECLARE(int, tcp_abc_l_var); #define V_tcb VNET(tcb) #define V_tcbinfo VNET(tcbinfo) #define V_tcpstat VNET(tcpstat) @@ -564,10 +609,11 @@ VNET_DECLARE(int, ss_fltsz_local); #define V_tcp_minmss VNET(tcp_minmss) #define V_tcp_delack_enabled VNET(tcp_delack_enabled) #define V_tcp_do_rfc3390 VNET(tcp_do_rfc3390) -#define V_tcp_do_newreno VNET(tcp_do_newreno) #define V_path_mtu_discovery VNET(path_mtu_discovery) #define V_ss_fltsz VNET(ss_fltsz) #define V_ss_fltsz_local VNET(ss_fltsz_local) +#define V_tcp_do_rfc3465 VNET(tcp_do_rfc3465) +#define V_tcp_abc_l_var VNET(tcp_abc_l_var) VNET_DECLARE(int, tcp_do_sack); /* SACK enabled/disabled */ VNET_DECLARE(int, tcp_sc_rst_sock_fail); /* RST on sock alloc failure */ @@ -579,7 +625,11 @@ VNET_DECLARE(int, tcp_ecn_maxretries); #define V_tcp_do_ecn VNET(tcp_do_ecn) #define V_tcp_ecn_maxretries VNET(tcp_ecn_maxretries) +VNET_DECLARE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST + 1]); +#define V_tcp_hhh VNET(tcp_hhh) + int tcp_addoptions(struct tcpopt *, u_char *); +int tcp_ccalgounload(struct cc_algo *unload_algo); struct tcpcb * tcp_close(struct tcpcb *); void tcp_discardcb(struct tcpcb *); @@ -611,7 +661,8 @@ void tcp_reass_destroy(void); void tcp_input(struct mbuf *, int); u_long tcp_maxmtu(struct in_conninfo *, int *); u_long tcp_maxmtu6(struct in_conninfo *, int *); -void tcp_mss_update(struct tcpcb *, int, struct hc_metrics_lite *, int *); +void tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *, + int *); void tcp_mss(struct tcpcb *, int); int tcp_mssopt(struct in_conninfo *); struct inpcb * @@ -634,6 +685,8 @@ int tcp_twrespond(struct tcptw *, int); void tcp_setpersist(struct tcpcb *); #ifdef TCP_SIGNATURE int tcp_signature_compute(struct mbuf *, int, int, int, u_char *, u_int); +int tcp_signature_verify(struct mbuf *, int, int, int, struct tcpopt *, + struct tcphdr *, u_int); #endif void tcp_slowtimo(void); struct tcptemp * @@ -670,6 +723,8 @@ void tcp_free_sackholes(struct tcpcb *tp); int tcp_newreno(struct tcpcb *, struct tcphdr *); u_long tcp_seq_subtract(u_long, u_long ); +void cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type); + #endif /* _KERNEL */ #endif /* _NETINET_TCP_VAR_H_ */ diff --git a/freebsd/sys/netinet/udp.h b/freebsd/sys/netinet/udp.h index 6841683f..5ec55970 100644 --- a/freebsd/sys/netinet/udp.h +++ b/freebsd/sys/netinet/udp.h @@ -48,8 +48,10 @@ struct udphdr { /* * User-settable options (used with setsockopt). */ -#define UDP_ENCAP 0x01 +#define UDP_ENCAP 1 +/* Start of reserved space for third-party user-settable options. */ +#define UDP_VENDOR SO_VENDOR /* * UDP Encapsulation of IPsec Packets options. diff --git a/freebsd/sys/netinet/udp_usrreq.c b/freebsd/sys/netinet/udp_usrreq.c index d47ffdaf..87cb13fc 100644 --- a/freebsd/sys/netinet/udp_usrreq.c +++ b/freebsd/sys/netinet/udp_usrreq.c @@ -106,9 +106,9 @@ __FBSDID("$FreeBSD$"); * packets that would otherwise be discarded due to bad checksums, and may * cause problems (especially for NFS data blocks). */ -static int udp_cksum = 1; -SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, &udp_cksum, - 0, "compute udp checksum"); +VNET_DEFINE(int, udp_cksum) = 1; +SYSCTL_VNET_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, + &VNET_NAME(udp_cksum), 0, "compute udp checksum"); int udp_log_in_vain = 0; SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, @@ -500,6 +500,15 @@ udp_input(struct mbuf *m, int off) INP_RLOCK(inp); + /* + * Detached PCBs can linger in the list if someone + * holds a reference. (e.g. udp_pcblist) + */ + if (inp->inp_socket == NULL) { + INP_RUNLOCK(inp); + continue; + } + /* * Handle socket delivery policy for any-source * and source-specific multicast. [RFC3678] @@ -626,6 +635,15 @@ udp_input(struct mbuf *m, int off) */ INP_RLOCK(inp); INP_INFO_RUNLOCK(&V_udbinfo); + + /* + * Detached PCBs can linger in the hash table if someone holds a + * reference. (e.g. udp_pcblist) + */ + if (inp->inp_socket == NULL) { + INP_RUNLOCK(inp); + goto badunlocked; + } if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) { INP_RUNLOCK(inp); goto badunlocked; @@ -826,7 +844,8 @@ udp_pcblist(SYSCTL_HANDLER_ARGS) return (error); } -SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0, +SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, + CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0, udp_pcblist, "S,xinpcb", "List of active UDP sockets"); static int @@ -975,6 +994,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, int ipflags; u_short fport, lport; int unlock_udbinfo; + u_char tos; /* * udp_output() may need to temporarily bind or connect the current @@ -990,6 +1010,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, } src.sin_family = 0; + tos = inp->inp_ip_tos; if (control != NULL) { /* * XXX: Currently, we assume all the optional information is @@ -1027,6 +1048,14 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, *(struct in_addr *)CMSG_DATA(cm); break; + case IP_TOS: + if (cm->cmsg_len != CMSG_LEN(sizeof(u_char))) { + error = EINVAL; + break; + } + tos = *(u_char *)CMSG_DATA(cm); + break; + default: error = ENOPROTOOPT; break; @@ -1233,7 +1262,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, /* * Set up checksum and output datagram. */ - if (udp_cksum) { + if (V_udp_cksum) { if (inp->inp_flags & INP_ONESBCAST) faddr.s_addr = INADDR_BROADCAST; ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr, @@ -1244,7 +1273,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, ui->ui_sum = 0; ((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len; ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */ - ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ + ((struct ip *)ui)->ip_tos = tos; /* XXX */ UDPSTAT_INC(udps_opackets); if (unlock_udbinfo == 2) diff --git a/freebsd/sys/netinet/udp_var.h b/freebsd/sys/netinet/udp_var.h index 947400b1..5cf7dc9f 100644 --- a/freebsd/sys/netinet/udp_var.h +++ b/freebsd/sys/netinet/udp_var.h @@ -140,8 +140,10 @@ VNET_DECLARE(struct inpcbinfo, udbinfo); extern u_long udp_sendspace; extern u_long udp_recvspace; +VNET_DECLARE(int, udp_cksum); VNET_DECLARE(struct udpstat, udpstat); VNET_DECLARE(int, udp_blackhole); +#define V_udp_cksum VNET(udp_cksum) #define V_udpstat VNET(udpstat) #define V_udp_blackhole VNET(udp_blackhole) extern int udp_log_in_vain; -- cgit v1.2.3