Move files to match FreeBSD layout

author: Sebastian Huber <sebastian.huber@embedded-brains.de> 2013-10-09 22:42:09 +0200
committer: Sebastian Huber <sebastian.huber@embedded-brains.de> 2013-10-10 09:06:58 +0200
commit: bceabc95c1c85d793200446fa85f1ddc6313ea29 (patch)
tree: 973c8bd8deca9fd69913f2895cc91e0e6114d46c /freebsd/sys/netinet
parent: Add FreeBSD sources as a submodule (diff)
download: rtems-libbsd-bceabc95c1c85d793200446fa85f1ddc6313ea29.tar.bz2
154 files changed, 140097 insertions, 0 deletions
diff --git a/freebsd/sys/netinet/accf_data.c b/freebsd/sys/netinet/accf_data.c
new file mode 100644
index 00000000..15696daf
--- /dev/null
+++ b/freebsd/sys/netinet/accf_data.c
@@ -0,0 +1,68 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2000 Alfred Perlstein <alfred@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#define ACCEPT_FILTER_MOD
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/socketvar.h>
+
+/* accept filter that holds a socket until data arrives */
+
+static int	sohasdata(struct socket *so, void *arg, int waitflag);
+
+static struct accept_filter accf_data_filter = {
+	"dataready",
+	sohasdata,
+	NULL,
+	NULL
+};
+
+static moduledata_t accf_data_mod = {
+	"accf_data",
+	accept_filt_generic_mod_event,
+	&accf_data_filter
+};
+
+DECLARE_MODULE(accf_data, accf_data_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
+
+static int
+sohasdata(struct socket *so, void *arg, int waitflag)
+{
+
+	if (!soreadable(so))
+		return (SU_OK);
+
+	return (SU_ISCONNECTED);
+}
diff --git a/freebsd/sys/netinet/accf_dns.c b/freebsd/sys/netinet/accf_dns.c
new file mode 100644
index 00000000..f91cbb08
--- /dev/null
+++ b/freebsd/sys/netinet/accf_dns.c
@@ -0,0 +1,134 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*
+ * Copyright (C) 2007 David Malone <dwmalone@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	$FreeBSD$
+ */
+
+#define ACCEPT_FILTER_MOD
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/socketvar.h>
+
+/* check for full DNS request */
+static int sohasdns(struct socket *so, void *arg, int waitflag);
+
+struct packet {
+	struct mbuf *m;		/* Current mbuf. */
+	struct mbuf *n;		/* nextpkt mbuf. */
+	unsigned long moff;	/* Offset of the beginning of m. */
+	unsigned long offset;	/* Which offset we are working at. */
+	unsigned long len;	/* The number of bytes we have to play with. */
+};
+
+#define DNS_OK 0
+#define DNS_WAIT -1
+#define DNS_RUN -2
+
+/* check we can skip over various parts of DNS request */
+static int skippacket(struct sockbuf *sb);
+
+static struct accept_filter accf_dns_filter = {
+	"dnsready",
+	sohasdns,
+	NULL,
+	NULL
+};
+
+static moduledata_t accf_dns_mod = {
+	"accf_dns",
+	accept_filt_generic_mod_event,
+	&accf_dns_filter
+};
+
+DECLARE_MODULE(accf_dns, accf_dns_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
+
+static int
+sohasdns(struct socket *so, void *arg, int waitflag)
+{
+	struct sockbuf *sb = &so->so_rcv;
+
+	/* If the socket is full, we're ready. */
+	if (sb->sb_cc >= sb->sb_hiwat || sb->sb_mbcnt >= sb->sb_mbmax)
+		goto ready;
+
+	/* Check to see if we have a request. */
+	if (skippacket(sb) == DNS_WAIT)
+		return (SU_OK);
+
+ready:
+	return (SU_ISCONNECTED);
+}
+
+#define GET8(p, val) do { \
+	if (p->offset < p->moff) \
+		return DNS_RUN; \
+	while (p->offset >= p->moff + p->m->m_len) { \
+		p->moff += p->m->m_len; \
+		p->m = p->m->m_next; \
+		if (p->m == NULL) { \
+			p->m = p->n; \
+			p->n = p->m->m_nextpkt; \
+		} \
+		if (p->m == NULL) \
+			return DNS_WAIT; \
+	} \
+	val = *(mtod(p->m, unsigned char *) + (p->offset - p->moff)); \
+	p->offset++; \
+	} while (0)
+
+#define GET16(p, val) do { \
+	unsigned int v0, v1; \
+	GET8(p, v0); \
+	GET8(p, v1); \
+	val = v0 * 0x100 + v1; \
+	} while (0)
+
+static int
+skippacket(struct sockbuf *sb) {
+	unsigned long packlen;
+	struct packet q, *p = &q;
+
+	if (sb->sb_cc < 2)
+		return DNS_WAIT;
+
+	q.m = sb->sb_mb;
+	q.n = q.m->m_nextpkt;
+	q.moff = 0;
+	q.offset = 0;
+	q.len = sb->sb_cc;
+
+	GET16(p, packlen);
+	if (packlen + 2 > q.len)
+		return DNS_WAIT;
+
+	return DNS_OK;
+}
diff --git a/freebsd/sys/netinet/accf_http.c b/freebsd/sys/netinet/accf_http.c
new file mode 100644
index 00000000..ce21b1d1
--- /dev/null
+++ b/freebsd/sys/netinet/accf_http.c
@@ -0,0 +1,351 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2000 Paycounter, Inc.
+ * Author: Alfred Perlstein <alfred@paycounter.com>, <alfred@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#define ACCEPT_FILTER_MOD
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/socketvar.h>
+
+/* check for GET/HEAD */
+static int sohashttpget(struct socket *so, void *arg, int waitflag);
+/* check for HTTP/1.0 or HTTP/1.1 */
+static int soparsehttpvers(struct socket *so, void *arg, int waitflag);
+/* check for end of HTTP/1.x request */
+static int soishttpconnected(struct socket *so, void *arg, int waitflag);
+/* strcmp on an mbuf chain */
+static int mbufstrcmp(struct mbuf *m, struct mbuf *npkt, int offset, char *cmp);
+/* strncmp on an mbuf chain */
+static int mbufstrncmp(struct mbuf *m, struct mbuf *npkt, int offset,
+	int max, char *cmp);
+/* socketbuffer is full */
+static int sbfull(struct sockbuf *sb);
+
+static struct accept_filter accf_http_filter = {
+	"httpready",
+	sohashttpget,
+	NULL,
+	NULL
+};
+
+static moduledata_t accf_http_mod = {
+	"accf_http",
+	accept_filt_generic_mod_event,
+	&accf_http_filter
+};
+
+DECLARE_MODULE(accf_http, accf_http_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
+
+static int parse_http_version = 1;
+
+SYSCTL_NODE(_net_inet_accf, OID_AUTO, http, CTLFLAG_RW, 0,
+"HTTP accept filter");
+SYSCTL_INT(_net_inet_accf_http, OID_AUTO, parsehttpversion, CTLFLAG_RW,
+&parse_http_version, 1,
+"Parse http version so that non 1.x requests work");
+
+#ifdef ACCF_HTTP_DEBUG
+#define DPRINT(fmt, args...)						\
+	do {								\
+		printf("%s:%d: " fmt "\n", __func__, __LINE__, ##args);	\
+	} while (0)
+#else
+#define DPRINT(fmt, args...)
+#endif
+
+static int
+sbfull(struct sockbuf *sb)
+{
+
+	DPRINT("sbfull, cc(%ld) >= hiwat(%ld): %d, "
+	    "mbcnt(%ld) >= mbmax(%ld): %d",
+	    sb->sb_cc, sb->sb_hiwat, sb->sb_cc >= sb->sb_hiwat,
+	    sb->sb_mbcnt, sb->sb_mbmax, sb->sb_mbcnt >= sb->sb_mbmax);
+	return (sb->sb_cc >= sb->sb_hiwat || sb->sb_mbcnt >= sb->sb_mbmax);
+}
+
+/*
+ * start at mbuf m, (must provide npkt if exists)
+ * starting at offset in m compare characters in mbuf chain for 'cmp'
+ */
+static int
+mbufstrcmp(struct mbuf *m, struct mbuf *npkt, int offset, char *cmp)
+{
+	struct mbuf *n;
+
+	for (; m != NULL; m = n) {
+		n = npkt;
+		if (npkt)
+			npkt = npkt->m_nextpkt;
+		for (; m; m = m->m_next) {
+			for (; offset < m->m_len; offset++, cmp++) {
+				if (*cmp == '\0')
+					return (1);
+				else if (*cmp != *(mtod(m, char *) + offset))
+					return (0);
+			}
+			if (*cmp == '\0')
+				return (1);
+			offset = 0;
+		}
+	}
+	return (0);
+}
+
+/*
+ * start at mbuf m, (must provide npkt if exists)
+ * starting at offset in m compare characters in mbuf chain for 'cmp'
+ * stop at 'max' characters
+ */
+static int
+mbufstrncmp(struct mbuf *m, struct mbuf *npkt, int offset, int max, char *cmp)
+{
+	struct mbuf *n;
+
+	for (; m != NULL; m = n) {
+		n = npkt;
+		if (npkt)
+			npkt = npkt->m_nextpkt;
+		for (; m; m = m->m_next) {
+			for (; offset < m->m_len; offset++, cmp++, max--) {
+				if (max == 0 || *cmp == '\0')
+					return (1);
+				else if (*cmp != *(mtod(m, char *) + offset))
+					return (0);
+			}
+			if (max == 0 || *cmp == '\0')
+				return (1);
+			offset = 0;
+		}
+	}
+	return (0);
+}
+
+#define STRSETUP(sptr, slen, str)					\
+	do {								\
+		sptr = str;						\
+		slen = sizeof(str) - 1;					\
+	} while(0)
+
+static int
+sohashttpget(struct socket *so, void *arg, int waitflag)
+{
+
+	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0 && !sbfull(&so->so_rcv)) {
+		struct mbuf *m;
+		char *cmp;
+		int	cmplen, cc;
+
+		m = so->so_rcv.sb_mb;
+		cc = so->so_rcv.sb_cc - 1;
+		if (cc < 1)
+			return (SU_OK);
+		switch (*mtod(m, char *)) {
+		case 'G':
+			STRSETUP(cmp, cmplen, "ET ");
+			break;
+		case 'H':
+			STRSETUP(cmp, cmplen, "EAD ");
+			break;
+		default:
+			goto fallout;
+		}
+		if (cc < cmplen) {
+			if (mbufstrncmp(m, m->m_nextpkt, 1, cc, cmp) == 1) {
+				DPRINT("short cc (%d) but mbufstrncmp ok", cc);
+				return (SU_OK);
+			} else {
+				DPRINT("short cc (%d) mbufstrncmp failed", cc);
+				goto fallout;
+			}
+		}
+		if (mbufstrcmp(m, m->m_nextpkt, 1, cmp) == 1) {
+			DPRINT("mbufstrcmp ok");
+			if (parse_http_version == 0)
+				return (soishttpconnected(so, arg, waitflag));
+			else
+				return (soparsehttpvers(so, arg, waitflag));
+		}
+		DPRINT("mbufstrcmp bad");
+	}
+
+fallout:
+	DPRINT("fallout");
+	return (SU_ISCONNECTED);
+}
+
+static int
+soparsehttpvers(struct socket *so, void *arg, int waitflag)
+{
+	struct mbuf *m, *n;
+	int	i, cc, spaces, inspaces;
+
+	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) != 0 || sbfull(&so->so_rcv))
+		goto fallout;
+
+	m = so->so_rcv.sb_mb;
+	cc = so->so_rcv.sb_cc;
+	inspaces = spaces = 0;
+	for (m = so->so_rcv.sb_mb; m; m = n) {
+		n = m->m_nextpkt;
+		for (; m; m = m->m_next) {
+			for (i = 0; i < m->m_len; i++, cc--) {
+				switch (*(mtod(m, char *) + i)) {
+				case ' ':
+					/* tabs? '\t' */
+					if (!inspaces) {
+						spaces++;
+						inspaces = 1;
+					}
+					break;
+				case '\r':
+				case '\n':
+					DPRINT("newline");
+					goto fallout;
+				default:
+					if (spaces != 2) {
+						inspaces = 0;
+						break;
+					}
+
+					/*
+					 * if we don't have enough characters
+					 * left (cc < sizeof("HTTP/1.0") - 1)
+					 * then see if the remaining ones
+					 * are a request we can parse.
+					 */
+					if (cc < sizeof("HTTP/1.0") - 1) {
+						if (mbufstrncmp(m, n, i, cc,
+							"HTTP/1.") == 1) {
+							DPRINT("ok");
+							goto readmore;
+						} else {
+							DPRINT("bad");
+							goto fallout;
+						}
+					} else if (
+					    mbufstrcmp(m, n, i, "HTTP/1.0") ||
+					    mbufstrcmp(m, n, i, "HTTP/1.1")) {
+						DPRINT("ok");
+						return (soishttpconnected(so,
+						    arg, waitflag));
+					} else {
+						DPRINT("bad");
+						goto fallout;
+					}
+				}
+			}
+		}
+	}
+readmore:
+	DPRINT("readmore");
+	/*
+	 * if we hit here we haven't hit something
+	 * we don't understand or a newline, so try again
+	 */
+	soupcall_set(so, SO_RCV, soparsehttpvers, arg);
+	return (SU_OK);
+
+fallout:
+	DPRINT("fallout");
+	return (SU_ISCONNECTED);
+}
+
+
+#define NCHRS 3
+
+static int
+soishttpconnected(struct socket *so, void *arg, int waitflag)
+{
+	char a, b, c;
+	struct mbuf *m, *n;
+	int ccleft, copied;
+
+	DPRINT("start");
+	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) != 0 || sbfull(&so->so_rcv))
+		goto gotit;
+
+	/*
+	 * Walk the socketbuffer and copy the last NCHRS (3) into a, b, and c
+	 * copied - how much we've copied so far
+	 * ccleft - how many bytes remaining in the socketbuffer
+	 * just loop over the mbufs subtracting from 'ccleft' until we only
+	 * have NCHRS left
+	 */
+	copied = 0;
+	ccleft = so->so_rcv.sb_cc;
+	if (ccleft < NCHRS)
+		goto readmore;
+	a = b = c = '\0';
+	for (m = so->so_rcv.sb_mb; m; m = n) {
+		n = m->m_nextpkt;
+		for (; m; m = m->m_next) {
+			ccleft -= m->m_len;
+			if (ccleft <= NCHRS) {
+				char *src;
+				int tocopy;
+
+				tocopy = (NCHRS - ccleft) - copied;
+				src = mtod(m, char *) + (m->m_len - tocopy);
+
+				while (tocopy--) {
+					switch (copied++) {
+					case 0:
+						a = *src++;
+						break;
+					case 1:
+						b = *src++;
+						break;
+					case 2:
+						c = *src++;
+						break;
+					}
+				}
+			}
+		}
+	}
+	if (c == '\n' && (b == '\n' || (b == '\r' && a == '\n'))) {
+		/* we have all request headers */
+		goto gotit;
+	}
+
+readmore:
+	soupcall_set(so, SO_RCV, soishttpconnected, arg);
+	return (SU_OK);
+
+gotit:
+	return (SU_ISCONNECTED);
+}
diff --git a/freebsd/sys/netinet/icmp6.h b/freebsd/sys/netinet/icmp6.h
new file mode 100644
index 00000000..bf61ac5b
--- /dev/null
+++ b/freebsd/sys/netinet/icmp6.h
@@ -0,0 +1,2 @@
+#include <freebsd/bsd.h>
+#include <freebsd/netinet/icmp6.h>
diff --git a/freebsd/sys/netinet/icmp_var.h b/freebsd/sys/netinet/icmp_var.h
new file mode 100644
index 00000000..d55fc4d3
--- /dev/null
+++ b/freebsd/sys/netinet/icmp_var.h
@@ -0,0 +1,108 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)icmp_var.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_ICMP_VAR_HH_
+#define _NETINET_ICMP_VAR_HH_
+
+
+/*
+ * Variables related to this implementation
+ * of the internet control message protocol.
+ */
+struct	icmpstat {
+/* statistics related to icmp packets generated */
+	u_long	icps_error;		/* # of calls to icmp_error */
+	u_long	icps_oldshort;		/* no error 'cuz old ip too short */
+	u_long	icps_oldicmp;		/* no error 'cuz old was icmp */
+	u_long	icps_outhist[ICMP_MAXTYPE + 1];
+/* statistics related to input messages processed */
+	u_long	icps_badcode;		/* icmp_code out of range */
+	u_long	icps_tooshort;		/* packet < ICMP_MINLEN */
+	u_long	icps_checksum;		/* bad checksum */
+	u_long	icps_badlen;		/* calculated bound mismatch */
+	u_long	icps_reflect;		/* number of responses */
+	u_long	icps_inhist[ICMP_MAXTYPE + 1];
+	u_long	icps_bmcastecho;	/* b/mcast echo requests dropped */
+	u_long	icps_bmcasttstamp;	/* b/mcast tstamp requests dropped */
+	u_long	icps_badaddr;		/* bad return address */
+	u_long	icps_noroute;		/* no route back */
+};
+
+#ifdef _KERNEL
+/*
+ * In-kernel consumers can use these accessor macros directly to update
+ * stats.
+ */
+#define	ICMPSTAT_ADD(name, val)	V_icmpstat.name += (val)
+#define	ICMPSTAT_INC(name)	ICMPSTAT_ADD(name, 1)
+
+/*
+ * Kernel module consumers must use this accessor macro.
+ */
+void	kmod_icmpstat_inc(int statnum);
+#define	KMOD_ICMPSTAT_INC(name)						\
+	kmod_icmpstat_inc(offsetof(struct icmpstat, name) / sizeof(u_long))
+#endif
+
+/*
+ * Names for ICMP sysctl objects
+ */
+#define	ICMPCTL_MASKREPL	1	/* allow replies to netmask requests */
+#define	ICMPCTL_STATS		2	/* statistics (read-only) */
+#define ICMPCTL_ICMPLIM		3
+#define ICMPCTL_MAXID		4
+
+#define ICMPCTL_NAMES { \
+	{ 0, 0 }, \
+	{ "maskrepl", CTLTYPE_INT }, \
+	{ "stats", CTLTYPE_STRUCT }, \
+	{ "icmplim", CTLTYPE_INT }, \
+}
+
+#ifdef _KERNEL
+SYSCTL_DECL(_net_inet_icmp);
+
+VNET_DECLARE(struct icmpstat, icmpstat);	/* icmp statistics. */
+#define	V_icmpstat	VNET(icmpstat)
+
+extern int badport_bandlim(int);
+#define BANDLIM_UNLIMITED -1
+#define BANDLIM_ICMP_UNREACH 0
+#define BANDLIM_ICMP_ECHO 1
+#define BANDLIM_ICMP_TSTAMP 2
+#define BANDLIM_RST_CLOSEDPORT 3 /* No connection, and no listeners */
+#define BANDLIM_RST_OPENPORT 4   /* No connection, listener */
+#define BANDLIM_ICMP6_UNREACH 5
+#define BANDLIM_MAX 5
+#endif
+
+#endif
diff --git a/freebsd/sys/netinet/if_atm.c b/freebsd/sys/netinet/if_atm.c
new file mode 100644
index 00000000..ea6c567d
--- /dev/null
+++ b/freebsd/sys/netinet/if_atm.c
@@ -0,0 +1,366 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*      $NetBSD: if_atm.c,v 1.6 1996/10/13 02:03:01 christos Exp $       */
+
+/*-
+ *
+ * Copyright (c) 1996 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * IP <=> ATM address resolution.
+ */
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_natm.h>
+
+#if defined(INET) || defined(INET6)
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/syslog.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/if_atm.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/if_atm.h>
+
+#ifdef NATM
+#include <freebsd/netnatm/natm.h>
+#endif
+
+#define SDL(s) ((struct sockaddr_dl *)s)
+
+#define	GET3BYTE(V, A, L)	do {				\
+	(V) = ((A)[0] << 16) | ((A)[1] << 8) | (A)[2];		\
+	(A) += 3;						\
+	(L) -= 3;						\
+    } while (0)
+
+#define GET2BYTE(V, A, L)	do {				\
+	(V) = ((A)[0] << 8) | (A)[1];				\
+	(A) += 2;						\
+	(L) -= 2;						\
+    } while (0)
+
+#define GET1BYTE(V, A, L)	do {				\
+	(V) = *(A)++;						\
+	(L)--;							\
+    } while (0)
+
+
+/*
+ * atm_rtrequest: handle ATM rt request (in support of generic code)
+ *   inputs: "req" = request code
+ *           "rt" = route entry
+ *           "info" = rt_addrinfo
+ */
+void
+atm_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
+{
+	struct sockaddr *gate = rt->rt_gateway;
+	struct atmio_openvcc op;
+	struct atmio_closevcc cl;
+	u_char *addr;
+	u_int alen;
+#ifdef NATM
+	struct sockaddr_in *sin;
+	struct natmpcb *npcb = NULL;
+#endif
+	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
+
+	if (rt->rt_flags & RTF_GATEWAY)   /* link level requests only */
+		return;
+
+	switch (req) {
+
+	case RTM_RESOLVE: /* resolve: only happens when cloning */
+		printf("atm_rtrequest: RTM_RESOLVE request detected?\n");
+		break;
+
+	case RTM_ADD:
+		/*
+		 * route added by a command (e.g. ifconfig, route, arp...).
+		 *
+		 * first check to see if this is not a host route, in which
+		 * case we are being called via "ifconfig" to set the address.
+		 */
+		if ((rt->rt_flags & RTF_HOST) == 0) {
+			rt_setgate(rt,rt_key(rt),(struct sockaddr *)&null_sdl);
+			gate = rt->rt_gateway;
+			SDL(gate)->sdl_type = rt->rt_ifp->if_type;
+			SDL(gate)->sdl_index = rt->rt_ifp->if_index;
+			break;
+		}
+
+		if (gate->sa_family != AF_LINK ||
+		    gate->sa_len < sizeof(null_sdl)) {
+			log(LOG_DEBUG, "atm_rtrequest: bad gateway value");
+			break;
+		}
+
+		KASSERT(rt->rt_ifp->if_ioctl != NULL,
+		    ("atm_rtrequest: null ioctl"));
+
+		/*
+		 * Parse and verify the link level address as
+		 * an open request
+		 */
+#ifdef NATM
+		NATM_LOCK();
+#endif
+		bzero(&op, sizeof(op));
+		addr = LLADDR(SDL(gate));
+		alen = SDL(gate)->sdl_alen;
+		if (alen < 4) {
+			printf("%s: bad link-level address\n", __func__);
+			goto failed;
+		}
+
+		if (alen == 4) {
+			/* old type address */
+			GET1BYTE(op.param.flags, addr, alen);
+			GET1BYTE(op.param.vpi, addr, alen);
+			GET2BYTE(op.param.vci, addr, alen);
+			op.param.traffic = ATMIO_TRAFFIC_UBR;
+			op.param.aal = (op.param.flags & ATM_PH_AAL5) ?
+			    ATMIO_AAL_5 : ATMIO_AAL_0;
+		} else {
+			/* new address */
+			op.param.aal = ATMIO_AAL_5;
+
+			GET1BYTE(op.param.flags, addr, alen);
+			op.param.flags &= ATM_PH_LLCSNAP;
+
+			GET1BYTE(op.param.vpi, addr, alen);
+			GET2BYTE(op.param.vci, addr, alen);
+
+			GET1BYTE(op.param.traffic, addr, alen);
+
+			switch (op.param.traffic) {
+
+			  case ATMIO_TRAFFIC_UBR:
+				if (alen >= 3)
+					GET3BYTE(op.param.tparam.pcr,
+					    addr, alen);
+				break;
+
+			  case ATMIO_TRAFFIC_CBR:
+				if (alen < 3)
+					goto bad_param;
+				GET3BYTE(op.param.tparam.pcr, addr, alen);
+				break;
+
+			  case ATMIO_TRAFFIC_VBR:
+				if (alen < 3 * 3)
+					goto bad_param;
+				GET3BYTE(op.param.tparam.pcr, addr, alen);
+				GET3BYTE(op.param.tparam.scr, addr, alen);
+				GET3BYTE(op.param.tparam.mbs, addr, alen);
+				break;
+
+			  case ATMIO_TRAFFIC_ABR:
+				if (alen < 4 * 3 + 2 + 1 * 2 + 3)
+					goto bad_param;
+				GET3BYTE(op.param.tparam.pcr, addr, alen);
+				GET3BYTE(op.param.tparam.mcr, addr, alen);
+				GET3BYTE(op.param.tparam.icr, addr, alen);
+				GET3BYTE(op.param.tparam.tbe, addr, alen);
+				GET1BYTE(op.param.tparam.nrm, addr, alen);
+				GET1BYTE(op.param.tparam.trm, addr, alen);
+				GET2BYTE(op.param.tparam.adtf, addr, alen);
+				GET1BYTE(op.param.tparam.rif, addr, alen);
+				GET1BYTE(op.param.tparam.rdf, addr, alen);
+				GET1BYTE(op.param.tparam.cdf, addr, alen);
+				break;
+
+			  default:
+			  bad_param:
+				printf("%s: bad traffic params\n", __func__);
+				goto failed;
+			}
+		}
+		op.param.rmtu = op.param.tmtu = rt->rt_ifp->if_mtu;
+#ifdef NATM
+		/*
+		 * let native ATM know we are using this VCI/VPI
+		 * (i.e. reserve it)
+		 */
+		sin = (struct sockaddr_in *) rt_key(rt);
+		if (sin->sin_family != AF_INET)
+			goto failed;
+		npcb = npcb_add(NULL, rt->rt_ifp, op.param.vci,  op.param.vpi);
+		if (npcb == NULL)
+			goto failed;
+		npcb->npcb_flags |= NPCB_IP;
+		npcb->ipaddr.s_addr = sin->sin_addr.s_addr;
+		/* XXX: move npcb to llinfo when ATM ARP is ready */
+		rt->rt_llinfo = (caddr_t) npcb;
+		rt->rt_flags |= RTF_LLINFO;
+#endif
+		/*
+		 * let the lower level know this circuit is active
+		 */
+		op.rxhand = NULL;
+		op.param.flags |= ATMIO_FLAG_ASYNC;
+		if (rt->rt_ifp->if_ioctl(rt->rt_ifp, SIOCATMOPENVCC,
+		    (caddr_t)&op) != 0) {
+			printf("atm: couldn't add VC\n");
+			goto failed;
+		}
+
+		SDL(gate)->sdl_type = rt->rt_ifp->if_type;
+		SDL(gate)->sdl_index = rt->rt_ifp->if_index;
+
+#ifdef NATM
+		NATM_UNLOCK();
+#endif
+		break;
+
+failed:
+#ifdef NATM
+		if (npcb) {
+			npcb_free(npcb, NPCB_DESTROY);
+			rt->rt_llinfo = NULL;
+			rt->rt_flags &= ~RTF_LLINFO;
+		}
+		NATM_UNLOCK();
+#endif
+		/* mark as invalid. We cannot RTM_DELETE the route from
+		 * here, because the recursive call to rtrequest1 does
+		 * not really work. */
+		rt->rt_flags |= RTF_REJECT;
+		break;
+
+	case RTM_DELETE:
+#ifdef NATM
+		/*
+		 * tell native ATM we are done with this VC
+		 */
+		if (rt->rt_flags & RTF_LLINFO) {
+			NATM_LOCK();
+			npcb_free((struct natmpcb *)rt->rt_llinfo,
+			    NPCB_DESTROY);
+			rt->rt_llinfo = NULL;
+			rt->rt_flags &= ~RTF_LLINFO;
+			NATM_UNLOCK();
+		}
+#endif
+		/*
+		 * tell the lower layer to disable this circuit
+		 */
+		bzero(&op, sizeof(op));
+		addr = LLADDR(SDL(gate));
+		addr++;
+		cl.vpi = *addr++;
+		cl.vci = *addr++ << 8;
+		cl.vci |= *addr++;
+		(void)rt->rt_ifp->if_ioctl(rt->rt_ifp, SIOCATMCLOSEVCC,
+		    (caddr_t)&cl);
+		break;
+	}
+}
+
+/*
+ * atmresolve:
+ *   inputs:
+ *     [1] "rt" = the link level route to use (or null if need to look one up)
+ *     [2] "m" = mbuf containing the data to be sent
+ *     [3] "dst" = sockaddr_in (IP) address of dest.
+ *   output:
+ *     [4] "desten" = ATM pseudo header which we will fill in VPI/VCI info
+ *   return:
+ *     0 == resolve FAILED; note that "m" gets m_freem'd in this case
+ *     1 == resolve OK; desten contains result
+ *
+ *   XXX: will need more work if we wish to support ATMARP in the kernel,
+ *   but this is enough for PVCs entered via the "route" command.
+ */
+int
+atmresolve(struct rtentry *rt, struct mbuf *m, struct sockaddr *dst,
+    struct atm_pseudohdr *desten)
+{
+	struct sockaddr_dl *sdl;
+
+	if (m->m_flags & (M_BCAST | M_MCAST)) {
+		log(LOG_INFO,
+		    "atmresolve: BCAST/MCAST packet detected/dumped\n");
+		goto bad;
+	}
+
+	if (rt == NULL) {
+		rt = RTALLOC1(dst, 0); /* link level on table 0 XXX MRT */
+		if (rt == NULL)
+			goto bad;	/* failed */
+		RT_REMREF(rt);		/* don't keep LL references */
+		if ((rt->rt_flags & RTF_GATEWAY) != 0 ||
+		    rt->rt_gateway->sa_family != AF_LINK) {
+			RT_UNLOCK(rt);
+			goto bad;
+		}
+		RT_UNLOCK(rt);
+	}
+
+	/*
+	 * note that rt_gateway is a sockaddr_dl which contains the
+	 * atm_pseudohdr data structure for this route.   we currently
+	 * don't need any rt_llinfo info (but will if we want to support
+	 * ATM ARP [c.f. if_ether.c]).
+	 */
+	sdl = SDL(rt->rt_gateway);
+
+	/*
+	 * Check the address family and length is valid, the address
+	 * is resolved; otherwise, try to resolve.
+	 */
+	if (sdl->sdl_family == AF_LINK && sdl->sdl_alen >= sizeof(*desten)) {
+		bcopy(LLADDR(sdl), desten, sizeof(*desten));
+		return (1);	/* ok, go for it! */
+	}
+
+	/*
+	 * we got an entry, but it doesn't have valid link address
+	 * info in it (it is prob. the interface route, which has
+	 * sdl_alen == 0).    dump packet.  (fall through to "bad").
+	 */
+bad:
+	m_freem(m);
+	return (0);
+}
+#endif /* INET */
diff --git a/freebsd/sys/netinet/if_atm.h b/freebsd/sys/netinet/if_atm.h
new file mode 100644
index 00000000..bd8b5143
--- /dev/null
+++ b/freebsd/sys/netinet/if_atm.h
@@ -0,0 +1,47 @@
+/* $FreeBSD$ */
+/*      $NetBSD: if_atm.h,v 1.2 1996/07/03 17:17:17 chuck Exp $       */
+
+/*-
+ *
+ * Copyright (c) 1996 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * if_atm.h
+ */
+
+struct atm_pseudohdr;
+struct mbuf;
+struct rtentry;
+struct sockaddr;
+
+void atm_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
+int atmresolve(struct rtentry *, struct mbuf *, struct sockaddr *,
+		struct atm_pseudohdr *);
diff --git a/freebsd/sys/netinet/if_ether.c b/freebsd/sys/netinet/if_ether.c
new file mode 100644
index 00000000..2e40c0d2
--- /dev/null
+++ b/freebsd/sys/netinet/if_ether.c
@@ -0,0 +1,859 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)if_ether.c	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Ethernet address resolution protocol.
+ * TODO:
+ *	add "inuse/lock" bit (or ref. count) along with valid bit
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_inet.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/syslog.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/if_llc.h>
+#include <freebsd/net/ethernet.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/net/if_llatbl.h>
+#include <freebsd/netinet/if_ether.h>
+#if defined(INET) || defined(INET6)
+#include <freebsd/netinet/ip_carp.h>
+#endif
+
+#include <freebsd/net/if_arc.h>
+#include <freebsd/net/iso88025.h>
+
+#include <freebsd/security/mac/mac_framework.h>
+
+#define SIN(s) ((struct sockaddr_in *)s)
+#define SDL(s) ((struct sockaddr_dl *)s)
+
+SYSCTL_DECL(_net_link_ether);
+SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, "");
+SYSCTL_NODE(_net_link_ether, PF_ARP, arp, CTLFLAG_RW, 0, "");
+
+/* timer values */
+static VNET_DEFINE(int, arpt_keep) = (20*60);	/* once resolved, good for 20
+						 * minutes */
+static VNET_DEFINE(int, arp_maxtries) = 5;
+VNET_DEFINE(int, useloopback) = 1;	/* use loopback interface for
+					 * local traffic */
+static VNET_DEFINE(int, arp_proxyall) = 0;
+static VNET_DEFINE(int, arpt_down) = 20;      /* keep incomplete entries for
+					       * 20 seconds */
+static VNET_DEFINE(struct arpstat, arpstat);  /* ARP statistics, see if_arp.h */
+
+#define	V_arpt_keep		VNET(arpt_keep)
+#define	V_arpt_down		VNET(arpt_down)
+#define	V_arp_maxtries		VNET(arp_maxtries)
+#define	V_arp_proxyall		VNET(arp_proxyall)
+#define	V_arpstat		VNET(arpstat)
+
+SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW,
+	&VNET_NAME(arpt_keep), 0,
+	"ARP entry lifetime in seconds");
+SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_RW,
+	&VNET_NAME(arp_maxtries), 0,
+	"ARP resolution attempts before returning error");
+SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, useloopback, CTLFLAG_RW,
+	&VNET_NAME(useloopback), 0,
+	"Use the loopback interface for local traffic");
+SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW,
+	&VNET_NAME(arp_proxyall), 0,
+	"Enable proxy ARP for all suitable requests");
+SYSCTL_VNET_STRUCT(_net_link_ether_arp, OID_AUTO, stats, CTLFLAG_RW,
+	&VNET_NAME(arpstat), arpstat,
+	"ARP statistics (struct arpstat, net/if_arp.h)");
+
+static void	arp_init(void);
+void		arprequest(struct ifnet *,
+			struct in_addr *, struct in_addr *, u_char *);
+static void	arpintr(struct mbuf *);
+static void	arptimer(void *);
+#ifdef INET
+static void	in_arpinput(struct mbuf *);
+#endif
+
+static const struct netisr_handler arp_nh = {
+	.nh_name = "arp",
+	.nh_handler = arpintr,
+	.nh_proto = NETISR_ARP,
+	.nh_policy = NETISR_POLICY_SOURCE,
+};
+
+#ifdef AF_INET
+void arp_ifscrub(struct ifnet *ifp, uint32_t addr);
+
+/*
+ * called by in_ifscrub to remove entry from the table when
+ * the interface goes away
+ */
+void
+arp_ifscrub(struct ifnet *ifp, uint32_t addr)
+{
+	struct sockaddr_in addr4;
+
+	bzero((void *)&addr4, sizeof(addr4));
+	addr4.sin_len    = sizeof(addr4);
+	addr4.sin_family = AF_INET;
+	addr4.sin_addr.s_addr = addr;
+	IF_AFDATA_LOCK(ifp);
+	lla_lookup(LLTABLE(ifp), (LLE_DELETE | LLE_IFADDR),
+	    (struct sockaddr *)&addr4);
+	IF_AFDATA_UNLOCK(ifp);
+}
+#endif
+
+/*
+ * Timeout routine.  Age arp_tab entries periodically.
+ */
+static void
+arptimer(void *arg)
+{
+	struct ifnet *ifp;
+	struct llentry   *lle;
+
+	KASSERT(arg != NULL, ("%s: arg NULL", __func__));
+	lle = (struct llentry *)arg;
+	ifp = lle->lle_tbl->llt_ifp;
+	CURVNET_SET(ifp->if_vnet);
+	IF_AFDATA_LOCK(ifp);
+	LLE_WLOCK(lle);
+	if (lle->la_flags & LLE_STATIC)
+		LLE_WUNLOCK(lle);
+	else {
+		if (!callout_pending(&lle->la_timer) &&
+		    callout_active(&lle->la_timer)) {
+			callout_stop(&lle->la_timer);
+			LLE_REMREF(lle);
+			(void) llentry_free(lle);
+			ARPSTAT_INC(timeouts);
+		} 
+#ifdef DIAGNOSTIC
+		else {
+			struct sockaddr *l3addr = L3_ADDR(lle);
+			log(LOG_INFO, 
+			    "arptimer issue: %p, IPv4 address: \"%s\"\n", lle,
+			    inet_ntoa(
+			        ((const struct sockaddr_in *)l3addr)->sin_addr));
+		}
+#endif
+	}
+	IF_AFDATA_UNLOCK(ifp);
+	CURVNET_RESTORE();
+}
+
+/*
+ * Broadcast an ARP request. Caller specifies:
+ *	- arp header source ip address
+ *	- arp header target ip address
+ *	- arp header source ethernet address
+ */
+void
+arprequest(struct ifnet *ifp, struct in_addr *sip, struct in_addr  *tip,
+    u_char *enaddr)
+{
+	struct mbuf *m;
+	struct arphdr *ah;
+	struct sockaddr sa;
+
+	if (sip == NULL) {
+		/* XXX don't believe this can happen (or explain why) */
+		/*
+		 * The caller did not supply a source address, try to find
+		 * a compatible one among those assigned to this interface.
+		 */
+		struct ifaddr *ifa;
+
+		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+			if (!ifa->ifa_addr ||
+			    ifa->ifa_addr->sa_family != AF_INET)
+				continue;
+			sip = &SIN(ifa->ifa_addr)->sin_addr;
+			if (0 == ((sip->s_addr ^ tip->s_addr) &
+			    SIN(ifa->ifa_netmask)->sin_addr.s_addr) )
+				break;  /* found it. */
+		}
+		if (sip == NULL) {  
+			printf("%s: cannot find matching address\n", __func__);
+			return;
+		}
+	}
+
+	if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
+		return;
+	m->m_len = sizeof(*ah) + 2*sizeof(struct in_addr) +
+		2*ifp->if_data.ifi_addrlen;
+	m->m_pkthdr.len = m->m_len;
+	MH_ALIGN(m, m->m_len);
+	ah = mtod(m, struct arphdr *);
+	bzero((caddr_t)ah, m->m_len);
+#ifdef MAC
+	mac_netinet_arp_send(ifp, m);
+#endif
+	ah->ar_pro = htons(ETHERTYPE_IP);
+	ah->ar_hln = ifp->if_addrlen;		/* hardware address length */
+	ah->ar_pln = sizeof(struct in_addr);	/* protocol address length */
+	ah->ar_op = htons(ARPOP_REQUEST);
+	bcopy((caddr_t)enaddr, (caddr_t)ar_sha(ah), ah->ar_hln);
+	bcopy((caddr_t)sip, (caddr_t)ar_spa(ah), ah->ar_pln);
+	bcopy((caddr_t)tip, (caddr_t)ar_tpa(ah), ah->ar_pln);
+	sa.sa_family = AF_ARP;
+	sa.sa_len = 2;
+	m->m_flags |= M_BCAST;
+	(*ifp->if_output)(ifp, m, &sa, NULL);
+	ARPSTAT_INC(txrequests);
+}
+
+/*
+ * Resolve an IP address into an ethernet address.
+ * On input:
+ *    ifp is the interface we use
+ *    rt0 is the route to the final destination (possibly useless)
+ *    m is the mbuf. May be NULL if we don't have a packet.
+ *    dst is the next hop,
+ *    desten is where we want the address.
+ *
+ * On success, desten is filled in and the function returns 0;
+ * If the packet must be held pending resolution, we return EWOULDBLOCK
+ * On other errors, we return the corresponding error code.
+ * Note that m_freem() handles NULL.
+ */
+int
+arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m,
+	struct sockaddr *dst, u_char *desten, struct llentry **lle)
+{
+	struct llentry *la = 0;
+	u_int flags = 0;
+	int error, renew;
+
+	*lle = NULL;
+	if (m != NULL) {
+		if (m->m_flags & M_BCAST) {
+			/* broadcast */
+			(void)memcpy(desten,
+			    ifp->if_broadcastaddr, ifp->if_addrlen);
+			return (0);
+		}
+		if (m->m_flags & M_MCAST && ifp->if_type != IFT_ARCNET) {
+			/* multicast */
+			ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten);
+			return (0);
+		}
+	}
+	/* XXXXX
+	 */
+retry:
+	IF_AFDATA_RLOCK(ifp);	
+	la = lla_lookup(LLTABLE(ifp), flags, dst);
+	IF_AFDATA_RUNLOCK(ifp);	
+	if ((la == NULL) && ((flags & LLE_EXCLUSIVE) == 0)
+	    && ((ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0)) {		
+		flags |= (LLE_CREATE | LLE_EXCLUSIVE);
+		IF_AFDATA_WLOCK(ifp);	
+		la = lla_lookup(LLTABLE(ifp), flags, dst);
+		IF_AFDATA_WUNLOCK(ifp);	
+	}
+	if (la == NULL) {
+		if (flags & LLE_CREATE)
+			log(LOG_DEBUG,
+			    "arpresolve: can't allocate llinfo for %s\n",
+			    inet_ntoa(SIN(dst)->sin_addr));
+		m_freem(m);
+		return (EINVAL);
+	} 
+
+	if ((la->la_flags & LLE_VALID) &&
+	    ((la->la_flags & LLE_STATIC) || la->la_expire > time_second)) {
+		bcopy(&la->ll_addr, desten, ifp->if_addrlen);
+		/*
+		 * If entry has an expiry time and it is approaching,
+		 * see if we need to send an ARP request within this
+		 * arpt_down interval.
+		 */
+		if (!(la->la_flags & LLE_STATIC) &&
+		    time_second + la->la_preempt > la->la_expire) {
+			arprequest(ifp, NULL,
+			    &SIN(dst)->sin_addr, IF_LLADDR(ifp));
+
+			la->la_preempt--;
+		}
+		
+		*lle = la;
+		error = 0;
+		goto done;
+	} 
+			    
+	if (la->la_flags & LLE_STATIC) {   /* should not happen! */
+		log(LOG_DEBUG, "arpresolve: ouch, empty static llinfo for %s\n",
+		    inet_ntoa(SIN(dst)->sin_addr));
+		m_freem(m);
+		error = EINVAL;
+		goto done;
+	}
+
+	renew = (la->la_asked == 0 || la->la_expire != time_second);
+	if ((renew || m != NULL) && (flags & LLE_EXCLUSIVE) == 0) {
+		flags |= LLE_EXCLUSIVE;
+		LLE_RUNLOCK(la);
+		goto retry;
+	}
+	/*
+	 * There is an arptab entry, but no ethernet address
+	 * response yet.  Replace the held mbuf with this
+	 * latest one.
+	 */
+	if (m != NULL) {
+		if (la->la_hold != NULL) {
+			m_freem(la->la_hold);
+			ARPSTAT_INC(dropped);
+		}
+		la->la_hold = m;
+		if (renew == 0 && (flags & LLE_EXCLUSIVE)) {
+			flags &= ~LLE_EXCLUSIVE;
+			LLE_DOWNGRADE(la);
+		}
+		
+	}
+	/*
+	 * Return EWOULDBLOCK if we have tried less than arp_maxtries. It
+	 * will be masked by ether_output(). Return EHOSTDOWN/EHOSTUNREACH
+	 * if we have already sent arp_maxtries ARP requests. Retransmit the
+	 * ARP request, but not faster than one request per second.
+	 */
+	if (la->la_asked < V_arp_maxtries)
+		error = EWOULDBLOCK;	/* First request. */
+	else
+		error = rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) ?
+		    EHOSTUNREACH : EHOSTDOWN;
+
+	if (renew) {
+		int canceled;
+
+		LLE_ADDREF(la);
+		la->la_expire = time_second;
+		canceled = callout_reset(&la->la_timer, hz * V_arpt_down,
+		    arptimer, la);
+		if (canceled)
+			LLE_REMREF(la);
+		la->la_asked++;
+		LLE_WUNLOCK(la);
+		arprequest(ifp, NULL, &SIN(dst)->sin_addr,
+		    IF_LLADDR(ifp));
+		return (error);
+	}
+done:
+	if (flags & LLE_EXCLUSIVE)
+		LLE_WUNLOCK(la);
+	else
+		LLE_RUNLOCK(la);
+	return (error);
+}
+
+/*
+ * Common length and type checks are done here,
+ * then the protocol-specific routine is called.
+ */
+static void
+arpintr(struct mbuf *m)
+{
+	struct arphdr *ar;
+
+	if (m->m_len < sizeof(struct arphdr) &&
+	    ((m = m_pullup(m, sizeof(struct arphdr))) == NULL)) {
+		log(LOG_ERR, "arp: runt packet -- m_pullup failed\n");
+		return;
+	}
+	ar = mtod(m, struct arphdr *);
+
+	if (ntohs(ar->ar_hrd) != ARPHRD_ETHER &&
+	    ntohs(ar->ar_hrd) != ARPHRD_IEEE802 &&
+	    ntohs(ar->ar_hrd) != ARPHRD_ARCNET &&
+	    ntohs(ar->ar_hrd) != ARPHRD_IEEE1394) {
+		log(LOG_ERR, "arp: unknown hardware address format (0x%2D)\n",
+		    (unsigned char *)&ar->ar_hrd, "");
+		m_freem(m);
+		return;
+	}
+
+	if (m->m_len < arphdr_len(ar)) {
+		if ((m = m_pullup(m, arphdr_len(ar))) == NULL) {
+			log(LOG_ERR, "arp: runt packet\n");
+			m_freem(m);
+			return;
+		}
+		ar = mtod(m, struct arphdr *);
+	}
+
+	ARPSTAT_INC(received);
+	switch (ntohs(ar->ar_pro)) {
+#ifdef INET
+	case ETHERTYPE_IP:
+		in_arpinput(m);
+		return;
+#endif
+	}
+	m_freem(m);
+}
+
+#ifdef INET
+/*
+ * ARP for Internet protocols on 10 Mb/s Ethernet.
+ * Algorithm is that given in RFC 826.
+ * In addition, a sanity check is performed on the sender
+ * protocol address, to catch impersonators.
+ * We no longer handle negotiations for use of trailer protocol:
+ * Formerly, ARP replied for protocol type ETHERTYPE_TRAIL sent
+ * along with IP replies if we wanted trailers sent to us,
+ * and also sent them in response to IP replies.
+ * This allowed either end to announce the desire to receive
+ * trailer packets.
+ * We no longer reply to requests for ETHERTYPE_TRAIL protocol either,
+ * but formerly didn't normally send requests.
+ */
+static int log_arp_wrong_iface = 1;
+static int log_arp_movements = 1;
+static int log_arp_permanent_modify = 1;
+
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_wrong_iface, CTLFLAG_RW,
+	&log_arp_wrong_iface, 0,
+	"log arp packets arriving on the wrong interface");
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_movements, CTLFLAG_RW,
+        &log_arp_movements, 0,
+        "log arp replies from MACs different than the one in the cache");
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_permanent_modify, CTLFLAG_RW,
+        &log_arp_permanent_modify, 0,
+        "log arp replies from MACs different than the one in the permanent arp entry");
+
+
+static void
+in_arpinput(struct mbuf *m)
+{
+	struct arphdr *ah;
+	struct ifnet *ifp = m->m_pkthdr.rcvif;
+	struct llentry *la = NULL;
+	struct rtentry *rt;
+	struct ifaddr *ifa;
+	struct in_ifaddr *ia;
+	struct mbuf *hold;
+	struct sockaddr sa;
+	struct in_addr isaddr, itaddr, myaddr;
+	u_int8_t *enaddr = NULL;
+	int op, flags;
+	int req_len;
+	int bridged = 0, is_bridge = 0;
+	int carp_match = 0;
+	struct sockaddr_in sin;
+	sin.sin_len = sizeof(struct sockaddr_in);
+	sin.sin_family = AF_INET;
+	sin.sin_addr.s_addr = 0;
+
+	if (ifp->if_bridge)
+		bridged = 1;
+	if (ifp->if_type == IFT_BRIDGE)
+		is_bridge = 1;
+
+	req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr));
+	if (m->m_len < req_len && (m = m_pullup(m, req_len)) == NULL) {
+		log(LOG_ERR, "in_arp: runt packet -- m_pullup failed\n");
+		return;
+	}
+
+	ah = mtod(m, struct arphdr *);
+	op = ntohs(ah->ar_op);
+	(void)memcpy(&isaddr, ar_spa(ah), sizeof (isaddr));
+	(void)memcpy(&itaddr, ar_tpa(ah), sizeof (itaddr));
+
+	if (op == ARPOP_REPLY)
+		ARPSTAT_INC(rxreplies);
+
+	/*
+	 * For a bridge, we want to check the address irrespective
+	 * of the receive interface. (This will change slightly
+	 * when we have clusters of interfaces).
+	 * If the interface does not match, but the recieving interface
+	 * is part of carp, we call carp_iamatch to see if this is a
+	 * request for the virtual host ip.
+	 * XXX: This is really ugly!
+	 */
+	IN_IFADDR_RLOCK();
+	LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
+		if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
+		    ia->ia_ifp == ifp) &&
+		    itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
+			ifa_ref(&ia->ia_ifa);
+			IN_IFADDR_RUNLOCK();
+			goto match;
+		}
+		if (ifp->if_carp != NULL &&
+		    (*carp_iamatch_p)(ifp, ia, &isaddr, &enaddr) &&
+		    itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
+			carp_match = 1;
+			ifa_ref(&ia->ia_ifa);
+			IN_IFADDR_RUNLOCK();
+			goto match;
+		}
+	}
+	LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash)
+		if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
+		    ia->ia_ifp == ifp) &&
+		    isaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
+			ifa_ref(&ia->ia_ifa);
+			IN_IFADDR_RUNLOCK();
+			goto match;
+		}
+
+#define BDG_MEMBER_MATCHES_ARP(addr, ifp, ia)				\
+  (ia->ia_ifp->if_bridge == ifp->if_softc &&				\
+  !bcmp(IF_LLADDR(ia->ia_ifp), IF_LLADDR(ifp), ifp->if_addrlen) &&	\
+  addr == ia->ia_addr.sin_addr.s_addr)
+	/*
+	 * Check the case when bridge shares its MAC address with
+	 * some of its children, so packets are claimed by bridge
+	 * itself (bridge_input() does it first), but they are really
+	 * meant to be destined to the bridge member.
+	 */
+	if (is_bridge) {
+		LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
+			if (BDG_MEMBER_MATCHES_ARP(itaddr.s_addr, ifp, ia)) {
+				ifa_ref(&ia->ia_ifa);
+				ifp = ia->ia_ifp;
+				IN_IFADDR_RUNLOCK();
+				goto match;
+			}
+		}
+	}
+#undef BDG_MEMBER_MATCHES_ARP
+	IN_IFADDR_RUNLOCK();
+
+	/*
+	 * No match, use the first inet address on the receive interface
+	 * as a dummy address for the rest of the function.
+	 */
+	IF_ADDR_LOCK(ifp);
+	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
+		if (ifa->ifa_addr->sa_family == AF_INET) {
+			ia = ifatoia(ifa);
+			ifa_ref(ifa);
+			IF_ADDR_UNLOCK(ifp);
+			goto match;
+		}
+	IF_ADDR_UNLOCK(ifp);
+
+	/*
+	 * If bridging, fall back to using any inet address.
+	 */
+	IN_IFADDR_RLOCK();
+	if (!bridged || (ia = TAILQ_FIRST(&V_in_ifaddrhead)) == NULL) {
+		IN_IFADDR_RUNLOCK();
+		goto drop;
+	}
+	ifa_ref(&ia->ia_ifa);
+	IN_IFADDR_RUNLOCK();
+match:
+	if (!enaddr)
+		enaddr = (u_int8_t *)IF_LLADDR(ifp);
+	myaddr = ia->ia_addr.sin_addr;
+	ifa_free(&ia->ia_ifa);
+	if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen))
+		goto drop;	/* it's from me, ignore it. */
+	if (!bcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) {
+		log(LOG_ERR,
+		    "arp: link address is broadcast for IP address %s!\n",
+		    inet_ntoa(isaddr));
+		goto drop;
+	}
+	/*
+	 * Warn if another host is using the same IP address, but only if the
+	 * IP address isn't 0.0.0.0, which is used for DHCP only, in which
+	 * case we suppress the warning to avoid false positive complaints of
+	 * potential misconfiguration.
+	 */
+	if (!bridged && isaddr.s_addr == myaddr.s_addr && myaddr.s_addr != 0) {
+		log(LOG_ERR,
+		   "arp: %*D is using my IP address %s on %s!\n",
+		   ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
+		   inet_ntoa(isaddr), ifp->if_xname);
+		itaddr = myaddr;
+		ARPSTAT_INC(dupips);
+		goto reply;
+	}
+	if (ifp->if_flags & IFF_STATICARP)
+		goto reply;
+
+	bzero(&sin, sizeof(sin));
+	sin.sin_len = sizeof(struct sockaddr_in);
+	sin.sin_family = AF_INET;
+	sin.sin_addr = isaddr;
+	flags = (itaddr.s_addr == myaddr.s_addr) ? LLE_CREATE : 0;
+	flags |= LLE_EXCLUSIVE;
+	IF_AFDATA_LOCK(ifp); 
+	la = lla_lookup(LLTABLE(ifp), flags, (struct sockaddr *)&sin);
+	IF_AFDATA_UNLOCK(ifp);
+	if (la != NULL) {
+		/* the following is not an error when doing bridging */
+		if (!bridged && la->lle_tbl->llt_ifp != ifp && !carp_match) {
+			if (log_arp_wrong_iface)
+				log(LOG_ERR, "arp: %s is on %s "
+				    "but got reply from %*D on %s\n",
+				    inet_ntoa(isaddr),
+				    la->lle_tbl->llt_ifp->if_xname,
+				    ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
+				    ifp->if_xname);
+			LLE_WUNLOCK(la);
+			goto reply;
+		}
+		if ((la->la_flags & LLE_VALID) &&
+		    bcmp(ar_sha(ah), &la->ll_addr, ifp->if_addrlen)) {
+			if (la->la_flags & LLE_STATIC) {
+				LLE_WUNLOCK(la);
+				log(LOG_ERR,
+				    "arp: %*D attempts to modify permanent "
+				    "entry for %s on %s\n",
+				    ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
+				    inet_ntoa(isaddr), ifp->if_xname);
+				goto reply;
+			}
+			if (log_arp_movements) {
+			        log(LOG_INFO, "arp: %s moved from %*D "
+				    "to %*D on %s\n",
+				    inet_ntoa(isaddr),
+				    ifp->if_addrlen,
+				    (u_char *)&la->ll_addr, ":",
+				    ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
+				    ifp->if_xname);
+			}
+		}
+		    
+		if (ifp->if_addrlen != ah->ar_hln) {
+			LLE_WUNLOCK(la);
+			log(LOG_WARNING,
+			    "arp from %*D: addr len: new %d, i/f %d (ignored)",
+			    ifp->if_addrlen, (u_char *) ar_sha(ah), ":",
+			    ah->ar_hln, ifp->if_addrlen);
+			goto reply;
+		}
+		(void)memcpy(&la->ll_addr, ar_sha(ah), ifp->if_addrlen);
+		la->la_flags |= LLE_VALID;
+
+		if (!(la->la_flags & LLE_STATIC)) {
+			int canceled;
+
+			LLE_ADDREF(la);
+			la->la_expire = time_second + V_arpt_keep;
+			canceled = callout_reset(&la->la_timer,
+			    hz * V_arpt_keep, arptimer, la);
+			if (canceled)
+				LLE_REMREF(la);
+		}
+		la->la_asked = 0;
+		la->la_preempt = V_arp_maxtries;
+		hold = la->la_hold;
+		if (hold != NULL) {
+			la->la_hold = NULL;
+			memcpy(&sa, L3_ADDR(la), sizeof(sa));
+		}
+		LLE_WUNLOCK(la);
+		if (hold != NULL)
+			(*ifp->if_output)(ifp, hold, &sa, NULL);
+	}
+reply:
+	if (op != ARPOP_REQUEST)
+		goto drop;
+	ARPSTAT_INC(rxrequests);
+
+	if (itaddr.s_addr == myaddr.s_addr) {
+		/* Shortcut.. the receiving interface is the target. */
+		(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
+		(void)memcpy(ar_sha(ah), enaddr, ah->ar_hln);
+	} else {
+		struct llentry *lle = NULL;
+
+		sin.sin_addr = itaddr;
+		IF_AFDATA_LOCK(ifp); 
+		lle = lla_lookup(LLTABLE(ifp), 0, (struct sockaddr *)&sin);
+		IF_AFDATA_UNLOCK(ifp);
+
+		if ((lle != NULL) && (lle->la_flags & LLE_PUB)) {
+			(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
+			(void)memcpy(ar_sha(ah), &lle->ll_addr, ah->ar_hln);
+			LLE_RUNLOCK(lle);
+		} else {
+
+			if (lle != NULL)
+				LLE_RUNLOCK(lle);
+
+			if (!V_arp_proxyall)
+				goto drop;
+			
+			sin.sin_addr = itaddr;
+			/* XXX MRT use table 0 for arp reply  */
+			rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0);
+			if (!rt)
+				goto drop;
+
+			/*
+			 * Don't send proxies for nodes on the same interface
+			 * as this one came out of, or we'll get into a fight
+			 * over who claims what Ether address.
+			 */
+			if (!rt->rt_ifp || rt->rt_ifp == ifp) {
+				RTFREE_LOCKED(rt);
+				goto drop;
+			}
+			RTFREE_LOCKED(rt);
+
+			(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
+			(void)memcpy(ar_sha(ah), enaddr, ah->ar_hln);
+
+			/*
+			 * Also check that the node which sent the ARP packet
+			 * is on the the interface we expect it to be on. This
+			 * avoids ARP chaos if an interface is connected to the
+			 * wrong network.
+			 */
+			sin.sin_addr = isaddr;
+			
+			/* XXX MRT use table 0 for arp checks */
+			rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0);
+			if (!rt)
+				goto drop;
+			if (rt->rt_ifp != ifp) {
+				log(LOG_INFO, "arp_proxy: ignoring request"
+				    " from %s via %s, expecting %s\n",
+				    inet_ntoa(isaddr), ifp->if_xname,
+				    rt->rt_ifp->if_xname);
+				RTFREE_LOCKED(rt);
+				goto drop;
+			}
+			RTFREE_LOCKED(rt);
+
+#ifdef DEBUG_PROXY
+			printf("arp: proxying for %s\n",
+			       inet_ntoa(itaddr));
+#endif
+		}
+	}
+
+	if (itaddr.s_addr == myaddr.s_addr &&
+	    IN_LINKLOCAL(ntohl(itaddr.s_addr))) {
+		/* RFC 3927 link-local IPv4; always reply by broadcast. */
+#ifdef DEBUG_LINKLOCAL
+		printf("arp: sending reply for link-local addr %s\n",
+		    inet_ntoa(itaddr));
+#endif
+		m->m_flags |= M_BCAST;
+		m->m_flags &= ~M_MCAST;
+	} else {
+		/* default behaviour; never reply by broadcast. */
+		m->m_flags &= ~(M_BCAST|M_MCAST);
+	}
+	(void)memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln);
+	(void)memcpy(ar_spa(ah), &itaddr, ah->ar_pln);
+	ah->ar_op = htons(ARPOP_REPLY);
+	ah->ar_pro = htons(ETHERTYPE_IP); /* let's be sure! */
+	m->m_len = sizeof(*ah) + (2 * ah->ar_pln) + (2 * ah->ar_hln);   
+	m->m_pkthdr.len = m->m_len;   
+	sa.sa_family = AF_ARP;
+	sa.sa_len = 2;
+	(*ifp->if_output)(ifp, m, &sa, NULL);
+	ARPSTAT_INC(txreplies);
+	return;
+
+drop:
+	m_freem(m);
+}
+#endif
+
+void
+arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
+{
+	struct llentry *lle;
+
+	if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) {
+		arprequest(ifp, &IA_SIN(ifa)->sin_addr,
+				&IA_SIN(ifa)->sin_addr, IF_LLADDR(ifp));
+		/* 
+		 * interface address is considered static entry
+		 * because the output of the arp utility shows
+		 * that L2 entry as permanent
+		 */
+		IF_AFDATA_LOCK(ifp);
+		lle = lla_lookup(LLTABLE(ifp), (LLE_CREATE | LLE_IFADDR | LLE_STATIC),
+				 (struct sockaddr *)IA_SIN(ifa));
+		IF_AFDATA_UNLOCK(ifp);
+		if (lle == NULL)
+			log(LOG_INFO, "arp_ifinit: cannot create arp "
+			    "entry for interface address\n");
+		else
+			LLE_RUNLOCK(lle);
+	}
+	ifa->ifa_rtrequest = NULL;
+}
+
+void
+arp_ifinit2(struct ifnet *ifp, struct ifaddr *ifa, u_char *enaddr)
+{
+	if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY)
+		arprequest(ifp, &IA_SIN(ifa)->sin_addr,
+				&IA_SIN(ifa)->sin_addr, enaddr);
+	ifa->ifa_rtrequest = NULL;
+}
+
+static void
+arp_init(void)
+{
+
+	netisr_register(&arp_nh);
+}
+SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0);
diff --git a/freebsd/sys/netinet/if_ether.h b/freebsd/sys/netinet/if_ether.h
new file mode 100644
index 00000000..e3c8d009
--- /dev/null
+++ b/freebsd/sys/netinet/if_ether.h
@@ -0,0 +1,2 @@
+#include <freebsd/bsd.h>
+#include <freebsd/netinet/if_ether.h>
diff --git a/freebsd/sys/netinet/igmp.c b/freebsd/sys/netinet/igmp.c
new file mode 100644
index 00000000..5f8893d7
--- /dev/null
+++ b/freebsd/sys/netinet/igmp.c
@@ -0,0 +1,3655 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2007-2009 Bruce Simpson.
+ * Copyright (c) 1988 Stephen Deering.
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
+ */
+
+/*
+ * Internet Group Management Protocol (IGMP) routines.
+ * [RFC1112, RFC2236, RFC3376]
+ *
+ * Written by Steve Deering, Stanford, May 1988.
+ * Modified by Rosen Sharma, Stanford, Aug 1994.
+ * Modified by Bill Fenner, Xerox PARC, Feb 1995.
+ * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
+ * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson.
+ *
+ * MULTICAST Revision: 3.5.1.4
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/ktr.h>
+#include <freebsd/sys/condvar.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_options.h>
+#include <freebsd/netinet/igmp.h>
+#include <freebsd/netinet/igmp_var.h>
+
+#include <freebsd/machine/in_cksum.h>
+
+#include <freebsd/security/mac/mac_framework.h>
+
+#ifndef KTR_IGMPV3
+#define KTR_IGMPV3 KTR_INET
+#endif
+
+static struct igmp_ifinfo *
+		igi_alloc_locked(struct ifnet *);
+static void	igi_delete_locked(const struct ifnet *);
+static void	igmp_dispatch_queue(struct ifqueue *, int, const int);
+static void	igmp_fasttimo_vnet(void);
+static void	igmp_final_leave(struct in_multi *, struct igmp_ifinfo *);
+static int	igmp_handle_state_change(struct in_multi *,
+		    struct igmp_ifinfo *);
+static int	igmp_initial_join(struct in_multi *, struct igmp_ifinfo *);
+static int	igmp_input_v1_query(struct ifnet *, const struct ip *,
+		    const struct igmp *);
+static int	igmp_input_v2_query(struct ifnet *, const struct ip *,
+		    const struct igmp *);
+static int	igmp_input_v3_query(struct ifnet *, const struct ip *,
+		    /*const*/ struct igmpv3 *);
+static int	igmp_input_v3_group_query(struct in_multi *,
+		    struct igmp_ifinfo *, int, /*const*/ struct igmpv3 *);
+static int	igmp_input_v1_report(struct ifnet *, /*const*/ struct ip *,
+		    /*const*/ struct igmp *);
+static int	igmp_input_v2_report(struct ifnet *, /*const*/ struct ip *,
+		    /*const*/ struct igmp *);
+static void	igmp_intr(struct mbuf *);
+static int	igmp_isgroupreported(const struct in_addr);
+static struct mbuf *
+		igmp_ra_alloc(void);
+#ifdef KTR
+static char *	igmp_rec_type_to_str(const int);
+#endif
+static void	igmp_set_version(struct igmp_ifinfo *, const int);
+static void	igmp_slowtimo_vnet(void);
+static int	igmp_v1v2_queue_report(struct in_multi *, const int);
+static void	igmp_v1v2_process_group_timer(struct in_multi *, const int);
+static void	igmp_v1v2_process_querier_timers(struct igmp_ifinfo *);
+static void	igmp_v2_update_group(struct in_multi *, const int);
+static void	igmp_v3_cancel_link_timers(struct igmp_ifinfo *);
+static void	igmp_v3_dispatch_general_query(struct igmp_ifinfo *);
+static struct mbuf *
+		igmp_v3_encap_report(struct ifnet *, struct mbuf *);
+static int	igmp_v3_enqueue_group_record(struct ifqueue *,
+		    struct in_multi *, const int, const int, const int);
+static int	igmp_v3_enqueue_filter_change(struct ifqueue *,
+		    struct in_multi *);
+static void	igmp_v3_process_group_timers(struct igmp_ifinfo *,
+		    struct ifqueue *, struct ifqueue *, struct in_multi *,
+		    const int);
+static int	igmp_v3_merge_state_changes(struct in_multi *,
+		    struct ifqueue *);
+static void	igmp_v3_suppress_group_record(struct in_multi *);
+static int	sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS);
+static int	sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS);
+static int	sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS);
+
+static const struct netisr_handler igmp_nh = {
+	.nh_name = "igmp",
+	.nh_handler = igmp_intr,
+	.nh_proto = NETISR_IGMP,
+	.nh_policy = NETISR_POLICY_SOURCE,
+};
+
+/*
+ * System-wide globals.
+ *
+ * Unlocked access to these is OK, except for the global IGMP output
+ * queue. The IGMP subsystem lock ends up being system-wide for the moment,
+ * because all VIMAGEs have to share a global output queue, as netisrs
+ * themselves are not virtualized.
+ *
+ * Locking:
+ *  * The permitted lock order is: IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
+ *    Any may be taken independently; if any are held at the same
+ *    time, the above lock order must be followed.
+ *  * All output is delegated to the netisr.
+ *    Now that Giant has been eliminated, the netisr may be inlined.
+ *  * IN_MULTI_LOCK covers in_multi.
+ *  * IGMP_LOCK covers igmp_ifinfo and any global variables in this file,
+ *    including the output queue.
+ *  * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of
+ *    per-link state iterators.
+ *  * igmp_ifinfo is valid as long as PF_INET is attached to the interface,
+ *    therefore it is not refcounted.
+ *    We allow unlocked reads of igmp_ifinfo when accessed via in_multi.
+ *
+ * Reference counting
+ *  * IGMP acquires its own reference every time an in_multi is passed to
+ *    it and the group is being joined for the first time.
+ *  * IGMP releases its reference(s) on in_multi in a deferred way,
+ *    because the operations which process the release run as part of
+ *    a loop whose control variables are directly affected by the release
+ *    (that, and not recursing on the IF_ADDR_LOCK).
+ *
+ * VIMAGE: Each in_multi corresponds to an ifp, and each ifp corresponds
+ * to a vnet in ifp->if_vnet.
+ *
+ * SMPng: XXX We may potentially race operations on ifma_protospec.
+ * The problem is that we currently lack a clean way of taking the
+ * IF_ADDR_LOCK() between the ifnet and in layers w/o recursing,
+ * as anything which modifies ifma needs to be covered by that lock.
+ * So check for ifma_protospec being NULL before proceeding.
+ */
+struct mtx		 igmp_mtx;
+
+struct mbuf		*m_raopt;		 /* Router Alert option */
+MALLOC_DEFINE(M_IGMP, "igmp", "igmp state");
+
+/*
+ * VIMAGE-wide globals.
+ *
+ * The IGMPv3 timers themselves need to run per-image, however,
+ * protosw timers run globally (see tcp).
+ * An ifnet can only be in one vimage at a time, and the loopback
+ * ifnet, loif, is itself virtualized.
+ * It would otherwise be possible to seriously hose IGMP state,
+ * and create inconsistencies in upstream multicast routing, if you have
+ * multiple VIMAGEs running on the same link joining different multicast
+ * groups, UNLESS the "primary IP address" is different. This is because
+ * IGMP for IPv4 does not force link-local addresses to be used for each
+ * node, unlike MLD for IPv6.
+ * Obviously the IGMPv3 per-interface state has per-vimage granularity
+ * also as a result.
+ *
+ * FUTURE: Stop using IFP_TO_IA/INADDR_ANY, and use source address selection
+ * policy to control the address used by IGMP on the link.
+ */
+static VNET_DEFINE(int, interface_timers_running);	/* IGMPv3 general
+							 * query response */
+static VNET_DEFINE(int, state_change_timers_running);	/* IGMPv3 state-change
+							 * retransmit */
+static VNET_DEFINE(int, current_state_timers_running);	/* IGMPv1/v2 host
+							 * report; IGMPv3 g/sg
+							 * query response */
+
+#define	V_interface_timers_running	VNET(interface_timers_running)
+#define	V_state_change_timers_running	VNET(state_change_timers_running)
+#define	V_current_state_timers_running	VNET(current_state_timers_running)
+
+static VNET_DEFINE(LIST_HEAD(, igmp_ifinfo), igi_head);
+static VNET_DEFINE(struct igmpstat, igmpstat) = {
+	.igps_version = IGPS_VERSION_3,
+	.igps_len = sizeof(struct igmpstat),
+};
+static VNET_DEFINE(struct timeval, igmp_gsrdelay) = {10, 0};
+
+#define	V_igi_head			VNET(igi_head)
+#define	V_igmpstat			VNET(igmpstat)
+#define	V_igmp_gsrdelay			VNET(igmp_gsrdelay)
+
+static VNET_DEFINE(int, igmp_recvifkludge) = 1;
+static VNET_DEFINE(int, igmp_sendra) = 1;
+static VNET_DEFINE(int, igmp_sendlocal) = 1;
+static VNET_DEFINE(int, igmp_v1enable) = 1;
+static VNET_DEFINE(int, igmp_v2enable) = 1;
+static VNET_DEFINE(int, igmp_legacysupp);
+static VNET_DEFINE(int, igmp_default_version) = IGMP_VERSION_3;
+
+#define	V_igmp_recvifkludge		VNET(igmp_recvifkludge)
+#define	V_igmp_sendra			VNET(igmp_sendra)
+#define	V_igmp_sendlocal		VNET(igmp_sendlocal)
+#define	V_igmp_v1enable			VNET(igmp_v1enable)
+#define	V_igmp_v2enable			VNET(igmp_v2enable)
+#define	V_igmp_legacysupp		VNET(igmp_legacysupp)
+#define	V_igmp_default_version		VNET(igmp_default_version)
+
+/*
+ * Virtualized sysctls.
+ */
+SYSCTL_VNET_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RW,
+    &VNET_NAME(igmpstat), igmpstat, "");
+SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_RW,
+    &VNET_NAME(igmp_recvifkludge), 0,
+    "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address");
+SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_RW,
+    &VNET_NAME(igmp_sendra), 0,
+    "Send IP Router Alert option in IGMPv2/v3 messages");
+SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_RW,
+    &VNET_NAME(igmp_sendlocal), 0,
+    "Send IGMP membership reports for 224.0.0.0/24 groups");
+SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_RW,
+    &VNET_NAME(igmp_v1enable), 0,
+    "Enable backwards compatibility with IGMPv1");
+SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_RW,
+    &VNET_NAME(igmp_v2enable), 0,
+    "Enable backwards compatibility with IGMPv2");
+SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_RW,
+    &VNET_NAME(igmp_legacysupp), 0,
+    "Allow v1/v2 reports to suppress v3 group responses");
+SYSCTL_VNET_PROC(_net_inet_igmp, OID_AUTO, default_version,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+    &VNET_NAME(igmp_default_version), 0, sysctl_igmp_default_version, "I",
+    "Default version of IGMP to run on each interface");
+SYSCTL_VNET_PROC(_net_inet_igmp, OID_AUTO, gsrdelay,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+    &VNET_NAME(igmp_gsrdelay.tv_sec), 0, sysctl_igmp_gsr, "I",
+    "Rate limit for IGMPv3 Group-and-Source queries in seconds");
+
+/*
+ * Non-virtualized sysctls.
+ */
+SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_MPSAFE,
+    sysctl_igmp_ifinfo, "Per-interface IGMPv3 state");
+
+static __inline void
+igmp_save_context(struct mbuf *m, struct ifnet *ifp)
+{
+
+#ifdef VIMAGE
+	m->m_pkthdr.header = ifp->if_vnet;
+#endif /* VIMAGE */
+	m->m_pkthdr.flowid = ifp->if_index;
+}
+
+static __inline void
+igmp_scrub_context(struct mbuf *m)
+{
+
+	m->m_pkthdr.header = NULL;
+	m->m_pkthdr.flowid = 0;
+}
+
+#ifdef KTR
+static __inline char *
+inet_ntoa_haddr(in_addr_t haddr)
+{
+	struct in_addr ia;
+
+	ia.s_addr = htonl(haddr);
+	return (inet_ntoa(ia));
+}
+#endif
+
+/*
+ * Restore context from a queued IGMP output chain.
+ * Return saved ifindex.
+ *
+ * VIMAGE: The assertion is there to make sure that we
+ * actually called CURVNET_SET() with what's in the mbuf chain.
+ */
+static __inline uint32_t
+igmp_restore_context(struct mbuf *m)
+{
+
+#ifdef notyet
+#if defined(VIMAGE) && defined(INVARIANTS)
+	KASSERT(curvnet == (m->m_pkthdr.header),
+	    ("%s: called when curvnet was not restored", __func__));
+#endif
+#endif
+	return (m->m_pkthdr.flowid);
+}
+
+/*
+ * Retrieve or set default IGMP version.
+ *
+ * VIMAGE: Assume curvnet set by caller.
+ * SMPng: NOTE: Serialized by IGMP lock.
+ */
+static int
+sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS)
+{
+	int	 error;
+	int	 new;
+
+	error = sysctl_wire_old_buffer(req, sizeof(int));
+	if (error)
+		return (error);
+
+	IGMP_LOCK();
+
+	new = V_igmp_default_version;
+
+	error = sysctl_handle_int(oidp, &new, 0, req);
+	if (error || !req->newptr)
+		goto out_locked;
+
+	if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) {
+		error = EINVAL;
+		goto out_locked;
+	}
+
+	CTR2(KTR_IGMPV3, "change igmp_default_version from %d to %d",
+	     V_igmp_default_version, new);
+
+	V_igmp_default_version = new;
+
+out_locked:
+	IGMP_UNLOCK();
+	return (error);
+}
+
+/*
+ * Retrieve or set threshold between group-source queries in seconds.
+ *
+ * VIMAGE: Assume curvnet set by caller.
+ * SMPng: NOTE: Serialized by IGMP lock.
+ */
+static int
+sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	int i;
+
+	error = sysctl_wire_old_buffer(req, sizeof(int));
+	if (error)
+		return (error);
+
+	IGMP_LOCK();
+
+	i = V_igmp_gsrdelay.tv_sec;
+
+	error = sysctl_handle_int(oidp, &i, 0, req);
+	if (error || !req->newptr)
+		goto out_locked;
+
+	if (i < -1 || i >= 60) {
+		error = EINVAL;
+		goto out_locked;
+	}
+
+	CTR2(KTR_IGMPV3, "change igmp_gsrdelay from %d to %d",
+	     V_igmp_gsrdelay.tv_sec, i);
+	V_igmp_gsrdelay.tv_sec = i;
+
+out_locked:
+	IGMP_UNLOCK();
+	return (error);
+}
+
+/*
+ * Expose struct igmp_ifinfo to userland, keyed by ifindex.
+ * For use by ifmcstat(8).
+ *
+ * SMPng: NOTE: Does an unlocked ifindex space read.
+ * VIMAGE: Assume curvnet set by caller. The node handler itself
+ * is not directly virtualized.
+ */
+static int
+sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS)
+{
+	int			*name;
+	int			 error;
+	u_int			 namelen;
+	struct ifnet		*ifp;
+	struct igmp_ifinfo	*igi;
+
+	name = (int *)arg1;
+	namelen = arg2;
+
+	if (req->newptr != NULL)
+		return (EPERM);
+
+	if (namelen != 1)
+		return (EINVAL);
+
+	error = sysctl_wire_old_buffer(req, sizeof(struct igmp_ifinfo));
+	if (error)
+		return (error);
+
+	IN_MULTI_LOCK();
+	IGMP_LOCK();
+
+	if (name[0] <= 0 || name[0] > V_if_index) {
+		error = ENOENT;
+		goto out_locked;
+	}
+
+	error = ENOENT;
+
+	ifp = ifnet_byindex(name[0]);
+	if (ifp == NULL)
+		goto out_locked;
+
+	LIST_FOREACH(igi, &V_igi_head, igi_link) {
+		if (ifp == igi->igi_ifp) {
+			error = SYSCTL_OUT(req, igi,
+			    sizeof(struct igmp_ifinfo));
+			break;
+		}
+	}
+
+out_locked:
+	IGMP_UNLOCK();
+	IN_MULTI_UNLOCK();
+	return (error);
+}
+
+/*
+ * Dispatch an entire queue of pending packet chains
+ * using the netisr.
+ * VIMAGE: Assumes the vnet pointer has been set.
+ */
+static void
+igmp_dispatch_queue(struct ifqueue *ifq, int limit, const int loop)
+{
+	struct mbuf *m;
+
+	for (;;) {
+		_IF_DEQUEUE(ifq, m);
+		if (m == NULL)
+			break;
+		CTR3(KTR_IGMPV3, "%s: dispatch %p from %p", __func__, ifq, m);
+		if (loop)
+			m->m_flags |= M_IGMP_LOOP;
+		netisr_dispatch(NETISR_IGMP, m);
+		if (--limit == 0)
+			break;
+	}
+}
+
+/*
+ * Filter outgoing IGMP report state by group.
+ *
+ * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1).
+ * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are
+ * disabled for all groups in the 224.0.0.0/24 link-local scope. However,
+ * this may break certain IGMP snooping switches which rely on the old
+ * report behaviour.
+ *
+ * Return zero if the given group is one for which IGMP reports
+ * should be suppressed, or non-zero if reports should be issued.
+ */
+static __inline int
+igmp_isgroupreported(const struct in_addr addr)
+{
+
+	if (in_allhosts(addr) ||
+	    ((!V_igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr)))))
+		return (0);
+
+	return (1);
+}
+
+/*
+ * Construct a Router Alert option to use in outgoing packets.
+ */
+static struct mbuf *
+igmp_ra_alloc(void)
+{
+	struct mbuf	*m;
+	struct ipoption	*p;
+
+	MGET(m, M_DONTWAIT, MT_DATA);
+	p = mtod(m, struct ipoption *);
+	p->ipopt_dst.s_addr = INADDR_ANY;
+	p->ipopt_list[0] = IPOPT_RA;	/* Router Alert Option */
+	p->ipopt_list[1] = 0x04;	/* 4 bytes long */
+	p->ipopt_list[2] = IPOPT_EOL;	/* End of IP option list */
+	p->ipopt_list[3] = 0x00;	/* pad byte */
+	m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
+
+	return (m);
+}
+
+/*
+ * Attach IGMP when PF_INET is attached to an interface.
+ */
+struct igmp_ifinfo *
+igmp_domifattach(struct ifnet *ifp)
+{
+	struct igmp_ifinfo *igi;
+
+	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)",
+	    __func__, ifp, ifp->if_xname);
+
+	IGMP_LOCK();
+
+	igi = igi_alloc_locked(ifp);
+	if (!(ifp->if_flags & IFF_MULTICAST))
+		igi->igi_flags |= IGIF_SILENT;
+
+	IGMP_UNLOCK();
+
+	return (igi);
+}
+
+/*
+ * VIMAGE: assume curvnet set by caller.
+ */
+static struct igmp_ifinfo *
+igi_alloc_locked(/*const*/ struct ifnet *ifp)
+{
+	struct igmp_ifinfo *igi;
+
+	IGMP_LOCK_ASSERT();
+
+	igi = malloc(sizeof(struct igmp_ifinfo), M_IGMP, M_NOWAIT|M_ZERO);
+	if (igi == NULL)
+		goto out;
+
+	igi->igi_ifp = ifp;
+	igi->igi_version = V_igmp_default_version;
+	igi->igi_flags = 0;
+	igi->igi_rv = IGMP_RV_INIT;
+	igi->igi_qi = IGMP_QI_INIT;
+	igi->igi_qri = IGMP_QRI_INIT;
+	igi->igi_uri = IGMP_URI_INIT;
+
+	SLIST_INIT(&igi->igi_relinmhead);
+
+	/*
+	 * Responses to general queries are subject to bounds.
+	 */
+	IFQ_SET_MAXLEN(&igi->igi_gq, IGMP_MAX_RESPONSE_PACKETS);
+
+	LIST_INSERT_HEAD(&V_igi_head, igi, igi_link);
+
+	CTR2(KTR_IGMPV3, "allocate igmp_ifinfo for ifp %p(%s)",
+	     ifp, ifp->if_xname);
+
+out:
+	return (igi);
+}
+
+/*
+ * Hook for ifdetach.
+ *
+ * NOTE: Some finalization tasks need to run before the protocol domain
+ * is detached, but also before the link layer does its cleanup.
+ *
+ * SMPNG: igmp_ifdetach() needs to take IF_ADDR_LOCK().
+ * XXX This is also bitten by unlocked ifma_protospec access.
+ */
+void
+igmp_ifdetach(struct ifnet *ifp)
+{
+	struct igmp_ifinfo	*igi;
+	struct ifmultiaddr	*ifma;
+	struct in_multi		*inm, *tinm;
+
+	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", __func__, ifp,
+	    ifp->if_xname);
+
+	IGMP_LOCK();
+
+	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
+	if (igi->igi_version == IGMP_VERSION_3) {
+		IF_ADDR_LOCK(ifp);
+		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+			if (ifma->ifma_addr->sa_family != AF_INET ||
+			    ifma->ifma_protospec == NULL)
+				continue;
+#if 0
+			KASSERT(ifma->ifma_protospec != NULL,
+			    ("%s: ifma_protospec is NULL", __func__));
+#endif
+			inm = (struct in_multi *)ifma->ifma_protospec;
+			if (inm->inm_state == IGMP_LEAVING_MEMBER) {
+				SLIST_INSERT_HEAD(&igi->igi_relinmhead,
+				    inm, inm_nrele);
+			}
+			inm_clear_recorded(inm);
+		}
+		IF_ADDR_UNLOCK(ifp);
+		/*
+		 * Free the in_multi reference(s) for this IGMP lifecycle.
+		 */
+		SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead, inm_nrele,
+		    tinm) {
+			SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
+			inm_release_locked(inm);
+		}
+	}
+
+	IGMP_UNLOCK();
+}
+
+/*
+ * Hook for domifdetach.
+ */
+void
+igmp_domifdetach(struct ifnet *ifp)
+{
+	struct igmp_ifinfo *igi;
+
+	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)",
+	    __func__, ifp, ifp->if_xname);
+
+	IGMP_LOCK();
+
+	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
+	igi_delete_locked(ifp);
+
+	IGMP_UNLOCK();
+}
+
+static void
+igi_delete_locked(const struct ifnet *ifp)
+{
+	struct igmp_ifinfo *igi, *tigi;
+
+	CTR3(KTR_IGMPV3, "%s: freeing igmp_ifinfo for ifp %p(%s)",
+	    __func__, ifp, ifp->if_xname);
+
+	IGMP_LOCK_ASSERT();
+
+	LIST_FOREACH_SAFE(igi, &V_igi_head, igi_link, tigi) {
+		if (igi->igi_ifp == ifp) {
+			/*
+			 * Free deferred General Query responses.
+			 */
+			_IF_DRAIN(&igi->igi_gq);
+
+			LIST_REMOVE(igi, igi_link);
+
+			KASSERT(SLIST_EMPTY(&igi->igi_relinmhead),
+			    ("%s: there are dangling in_multi references",
+			    __func__));
+
+			free(igi, M_IGMP);
+			return;
+		}
+	}
+
+#ifdef INVARIANTS
+	panic("%s: igmp_ifinfo not found for ifp %p\n", __func__,  ifp);
+#endif
+}
+
+/*
+ * Process a received IGMPv1 query.
+ * Return non-zero if the message should be dropped.
+ *
+ * VIMAGE: The curvnet pointer is derived from the input ifp.
+ */
+static int
+igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip,
+    const struct igmp *igmp)
+{
+	struct ifmultiaddr	*ifma;
+	struct igmp_ifinfo	*igi;
+	struct in_multi		*inm;
+
+	/*
+	 * IGMPv1 Host Mmembership Queries SHOULD always be addressed to
+	 * 224.0.0.1. They are always treated as General Queries.
+	 * igmp_group is always ignored. Do not drop it as a userland
+	 * daemon may wish to see it.
+	 * XXX SMPng: unlocked increments in igmpstat assumed atomic.
+	 */
+	if (!in_allhosts(ip->ip_dst) || !in_nullhost(igmp->igmp_group)) {
+		IGMPSTAT_INC(igps_rcv_badqueries);
+		return (0);
+	}
+	IGMPSTAT_INC(igps_rcv_gen_queries);
+
+	IN_MULTI_LOCK();
+	IGMP_LOCK();
+
+	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
+	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
+
+	if (igi->igi_flags & IGIF_LOOPBACK) {
+		CTR2(KTR_IGMPV3, "ignore v1 query on IGIF_LOOPBACK ifp %p(%s)",
+		    ifp, ifp->if_xname);
+		goto out_locked;
+	}
+
+	/*
+	 * Switch to IGMPv1 host compatibility mode.
+	 */
+	igmp_set_version(igi, IGMP_VERSION_1);
+
+	CTR2(KTR_IGMPV3, "process v1 query on ifp %p(%s)", ifp, ifp->if_xname);
+
+	/*
+	 * Start the timers in all of our group records
+	 * for the interface on which the query arrived,
+	 * except those which are already running.
+	 */
+	IF_ADDR_LOCK(ifp);
+	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+		if (ifma->ifma_addr->sa_family != AF_INET ||
+		    ifma->ifma_protospec == NULL)
+			continue;
+		inm = (struct in_multi *)ifma->ifma_protospec;
+		if (inm->inm_timer != 0)
+			continue;
+		switch (inm->inm_state) {
+		case IGMP_NOT_MEMBER:
+		case IGMP_SILENT_MEMBER:
+			break;
+		case IGMP_G_QUERY_PENDING_MEMBER:
+		case IGMP_SG_QUERY_PENDING_MEMBER:
+		case IGMP_REPORTING_MEMBER:
+		case IGMP_IDLE_MEMBER:
+		case IGMP_LAZY_MEMBER:
+		case IGMP_SLEEPING_MEMBER:
+		case IGMP_AWAKENING_MEMBER:
+			inm->inm_state = IGMP_REPORTING_MEMBER;
+			inm->inm_timer = IGMP_RANDOM_DELAY(
+			    IGMP_V1V2_MAX_RI * PR_FASTHZ);
+			V_current_state_timers_running = 1;
+			break;
+		case IGMP_LEAVING_MEMBER:
+			break;
+		}
+	}
+	IF_ADDR_UNLOCK(ifp);
+
+out_locked:
+	IGMP_UNLOCK();
+	IN_MULTI_UNLOCK();
+
+	return (0);
+}
+
+/*
+ * Process a received IGMPv2 general or group-specific query.
+ */
+static int
+igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
+    const struct igmp *igmp)
+{
+	struct ifmultiaddr	*ifma;
+	struct igmp_ifinfo	*igi;
+	struct in_multi		*inm;
+	int			 is_general_query;
+	uint16_t		 timer;
+
+	is_general_query = 0;
+
+	/*
+	 * Validate address fields upfront.
+	 * XXX SMPng: unlocked increments in igmpstat assumed atomic.
+	 */
+	if (in_nullhost(igmp->igmp_group)) {
+		/*
+		 * IGMPv2 General Query.
+		 * If this was not sent to the all-hosts group, ignore it.
+		 */
+		if (!in_allhosts(ip->ip_dst))
+			return (0);
+		IGMPSTAT_INC(igps_rcv_gen_queries);
+		is_general_query = 1;
+	} else {
+		/* IGMPv2 Group-Specific Query. */
+		IGMPSTAT_INC(igps_rcv_group_queries);
+	}
+
+	IN_MULTI_LOCK();
+	IGMP_LOCK();
+
+	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
+	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
+
+	if (igi->igi_flags & IGIF_LOOPBACK) {
+		CTR2(KTR_IGMPV3, "ignore v2 query on IGIF_LOOPBACK ifp %p(%s)",
+		    ifp, ifp->if_xname);
+		goto out_locked;
+	}
+
+	/*
+	 * Ignore v2 query if in v1 Compatibility Mode.
+	 */
+	if (igi->igi_version == IGMP_VERSION_1)
+		goto out_locked;
+
+	igmp_set_version(igi, IGMP_VERSION_2);
+
+	timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE;
+	if (timer == 0)
+		timer = 1;
+
+	if (is_general_query) {
+		/*
+		 * For each reporting group joined on this
+		 * interface, kick the report timer.
+		 */
+		CTR2(KTR_IGMPV3, "process v2 general query on ifp %p(%s)",
+		    ifp, ifp->if_xname);
+		IF_ADDR_LOCK(ifp);
+		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+			if (ifma->ifma_addr->sa_family != AF_INET ||
+			    ifma->ifma_protospec == NULL)
+				continue;
+			inm = (struct in_multi *)ifma->ifma_protospec;
+			igmp_v2_update_group(inm, timer);
+		}
+		IF_ADDR_UNLOCK(ifp);
+	} else {
+		/*
+		 * Group-specific IGMPv2 query, we need only
+		 * look up the single group to process it.
+		 */
+		inm = inm_lookup(ifp, igmp->igmp_group);
+		if (inm != NULL) {
+			CTR3(KTR_IGMPV3, "process v2 query %s on ifp %p(%s)",
+			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
+			igmp_v2_update_group(inm, timer);
+		}
+	}
+
+out_locked:
+	IGMP_UNLOCK();
+	IN_MULTI_UNLOCK();
+
+	return (0);
+}
+
+/*
+ * Update the report timer on a group in response to an IGMPv2 query.
+ *
+ * If we are becoming the reporting member for this group, start the timer.
+ * If we already are the reporting member for this group, and timer is
+ * below the threshold, reset it.
+ *
+ * We may be updating the group for the first time since we switched
+ * to IGMPv3. If we are, then we must clear any recorded source lists,
+ * and transition to REPORTING state; the group timer is overloaded
+ * for group and group-source query responses. 
+ *
+ * Unlike IGMPv3, the delay per group should be jittered
+ * to avoid bursts of IGMPv2 reports.
+ */
+static void
+igmp_v2_update_group(struct in_multi *inm, const int timer)
+{
+
+	CTR4(KTR_IGMPV3, "%s: %s/%s timer=%d", __func__,
+	    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname, timer);
+
+	IN_MULTI_LOCK_ASSERT();
+
+	switch (inm->inm_state) {
+	case IGMP_NOT_MEMBER:
+	case IGMP_SILENT_MEMBER:
+		break;
+	case IGMP_REPORTING_MEMBER:
+		if (inm->inm_timer != 0 &&
+		    inm->inm_timer <= timer) {
+			CTR1(KTR_IGMPV3, "%s: REPORTING and timer running, "
+			    "skipping.", __func__);
+			break;
+		}
+		/* FALLTHROUGH */
+	case IGMP_SG_QUERY_PENDING_MEMBER:
+	case IGMP_G_QUERY_PENDING_MEMBER:
+	case IGMP_IDLE_MEMBER:
+	case IGMP_LAZY_MEMBER:
+	case IGMP_AWAKENING_MEMBER:
+		CTR1(KTR_IGMPV3, "%s: ->REPORTING", __func__);
+		inm->inm_state = IGMP_REPORTING_MEMBER;
+		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
+		V_current_state_timers_running = 1;
+		break;
+	case IGMP_SLEEPING_MEMBER:
+		CTR1(KTR_IGMPV3, "%s: ->AWAKENING", __func__);
+		inm->inm_state = IGMP_AWAKENING_MEMBER;
+		break;
+	case IGMP_LEAVING_MEMBER:
+		break;
+	}
+}
+
+/*
+ * Process a received IGMPv3 general, group-specific or
+ * group-and-source-specific query.
+ * Assumes m has already been pulled up to the full IGMP message length.
+ * Return 0 if successful, otherwise an appropriate error code is returned.
+ */
+static int
+igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip,
+    /*const*/ struct igmpv3 *igmpv3)
+{
+	struct igmp_ifinfo	*igi;
+	struct in_multi		*inm;
+	int			 is_general_query;
+	uint32_t		 maxresp, nsrc, qqi;
+	uint16_t		 timer;
+	uint8_t			 qrv;
+
+	is_general_query = 0;
+
+	CTR2(KTR_IGMPV3, "process v3 query on ifp %p(%s)", ifp, ifp->if_xname);
+
+	maxresp = igmpv3->igmp_code;	/* in 1/10ths of a second */
+	if (maxresp >= 128) {
+		maxresp = IGMP_MANT(igmpv3->igmp_code) <<
+			  (IGMP_EXP(igmpv3->igmp_code) + 3);
+	}
+
+	/*
+	 * Robustness must never be less than 2 for on-wire IGMPv3.
+	 * FUTURE: Check if ifp has IGIF_LOOPBACK set, as we will make
+	 * an exception for interfaces whose IGMPv3 state changes
+	 * are redirected to loopback (e.g. MANET).
+	 */
+	qrv = IGMP_QRV(igmpv3->igmp_misc);
+	if (qrv < 2) {
+		CTR3(KTR_IGMPV3, "%s: clamping qrv %d to %d", __func__,
+		    qrv, IGMP_RV_INIT);
+		qrv = IGMP_RV_INIT;
+	}
+
+	qqi = igmpv3->igmp_qqi;
+	if (qqi >= 128) {
+		qqi = IGMP_MANT(igmpv3->igmp_qqi) <<
+		     (IGMP_EXP(igmpv3->igmp_qqi) + 3);
+	}
+
+	timer = maxresp * PR_FASTHZ / IGMP_TIMER_SCALE;
+	if (timer == 0)
+		timer = 1;
+
+	nsrc = ntohs(igmpv3->igmp_numsrc);
+
+	/*
+	 * Validate address fields and versions upfront before
+	 * accepting v3 query.
+	 * XXX SMPng: Unlocked access to igmpstat counters here.
+	 */
+	if (in_nullhost(igmpv3->igmp_group)) {
+		/*
+		 * IGMPv3 General Query.
+		 *
+		 * General Queries SHOULD be directed to 224.0.0.1.
+		 * A general query with a source list has undefined
+		 * behaviour; discard it.
+		 */
+		IGMPSTAT_INC(igps_rcv_gen_queries);
+		if (!in_allhosts(ip->ip_dst) || nsrc > 0) {
+			IGMPSTAT_INC(igps_rcv_badqueries);
+			return (0);
+		}
+		is_general_query = 1;
+	} else {
+		/* Group or group-source specific query. */
+		if (nsrc == 0)
+			IGMPSTAT_INC(igps_rcv_group_queries);
+		else
+			IGMPSTAT_INC(igps_rcv_gsr_queries);
+	}
+
+	IN_MULTI_LOCK();
+	IGMP_LOCK();
+
+	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
+	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
+
+	if (igi->igi_flags & IGIF_LOOPBACK) {
+		CTR2(KTR_IGMPV3, "ignore v3 query on IGIF_LOOPBACK ifp %p(%s)",
+		    ifp, ifp->if_xname);
+		goto out_locked;
+	}
+
+	/*
+	 * Discard the v3 query if we're in Compatibility Mode.
+	 * The RFC is not obviously worded that hosts need to stay in
+	 * compatibility mode until the Old Version Querier Present
+	 * timer expires.
+	 */
+	if (igi->igi_version != IGMP_VERSION_3) {
+		CTR3(KTR_IGMPV3, "ignore v3 query in v%d mode on ifp %p(%s)",
+		    igi->igi_version, ifp, ifp->if_xname);
+		goto out_locked;
+	}
+
+	igmp_set_version(igi, IGMP_VERSION_3);
+	igi->igi_rv = qrv;
+	igi->igi_qi = qqi;
+	igi->igi_qri = maxresp;
+
+	CTR4(KTR_IGMPV3, "%s: qrv %d qi %d qri %d", __func__, qrv, qqi,
+	    maxresp);
+
+	if (is_general_query) {
+		/*
+		 * Schedule a current-state report on this ifp for
+		 * all groups, possibly containing source lists.
+		 * If there is a pending General Query response
+		 * scheduled earlier than the selected delay, do
+		 * not schedule any other reports.
+		 * Otherwise, reset the interface timer.
+		 */
+		CTR2(KTR_IGMPV3, "process v3 general query on ifp %p(%s)",
+		    ifp, ifp->if_xname);
+		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) {
+			igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer);
+			V_interface_timers_running = 1;
+		}
+	} else {
+		/*
+		 * Group-source-specific queries are throttled on
+		 * a per-group basis to defeat denial-of-service attempts.
+		 * Queries for groups we are not a member of on this
+		 * link are simply ignored.
+		 */
+		inm = inm_lookup(ifp, igmpv3->igmp_group);
+		if (inm == NULL)
+			goto out_locked;
+		if (nsrc > 0) {
+			if (!ratecheck(&inm->inm_lastgsrtv,
+			    &V_igmp_gsrdelay)) {
+				CTR1(KTR_IGMPV3, "%s: GS query throttled.",
+				    __func__);
+				IGMPSTAT_INC(igps_drop_gsr_queries);
+				goto out_locked;
+			}
+		}
+		CTR3(KTR_IGMPV3, "process v3 %s query on ifp %p(%s)",
+		     inet_ntoa(igmpv3->igmp_group), ifp, ifp->if_xname);
+		/*
+		 * If there is a pending General Query response
+		 * scheduled sooner than the selected delay, no
+		 * further report need be scheduled.
+		 * Otherwise, prepare to respond to the
+		 * group-specific or group-and-source query.
+		 */
+		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer)
+			igmp_input_v3_group_query(inm, igi, timer, igmpv3);
+	}
+
+out_locked:
+	IGMP_UNLOCK();
+	IN_MULTI_UNLOCK();
+
+	return (0);
+}
+
+/*
+ * Process a recieved IGMPv3 group-specific or group-and-source-specific
+ * query.
+ * Return <0 if any error occured. Currently this is ignored.
+ */
+static int
+igmp_input_v3_group_query(struct in_multi *inm, struct igmp_ifinfo *igi,
+    int timer, /*const*/ struct igmpv3 *igmpv3)
+{
+	int			 retval;
+	uint16_t		 nsrc;
+
+	IN_MULTI_LOCK_ASSERT();
+	IGMP_LOCK_ASSERT();
+
+	retval = 0;
+
+	switch (inm->inm_state) {
+	case IGMP_NOT_MEMBER:
+	case IGMP_SILENT_MEMBER:
+	case IGMP_SLEEPING_MEMBER:
+	case IGMP_LAZY_MEMBER:
+	case IGMP_AWAKENING_MEMBER:
+	case IGMP_IDLE_MEMBER:
+	case IGMP_LEAVING_MEMBER:
+		return (retval);
+		break;
+	case IGMP_REPORTING_MEMBER:
+	case IGMP_G_QUERY_PENDING_MEMBER:
+	case IGMP_SG_QUERY_PENDING_MEMBER:
+		break;
+	}
+
+	nsrc = ntohs(igmpv3->igmp_numsrc);
+
+	/*
+	 * Deal with group-specific queries upfront.
+	 * If any group query is already pending, purge any recorded
+	 * source-list state if it exists, and schedule a query response
+	 * for this group-specific query.
+	 */
+	if (nsrc == 0) {
+		if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
+		    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
+			inm_clear_recorded(inm);
+			timer = min(inm->inm_timer, timer);
+		}
+		inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER;
+		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
+		V_current_state_timers_running = 1;
+		return (retval);
+	}
+
+	/*
+	 * Deal with the case where a group-and-source-specific query has
+	 * been received but a group-specific query is already pending.
+	 */
+	if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) {
+		timer = min(inm->inm_timer, timer);
+		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
+		V_current_state_timers_running = 1;
+		return (retval);
+	}
+
+	/*
+	 * Finally, deal with the case where a group-and-source-specific
+	 * query has been received, where a response to a previous g-s-r
+	 * query exists, or none exists.
+	 * In this case, we need to parse the source-list which the Querier
+	 * has provided us with and check if we have any source list filter
+	 * entries at T1 for these sources. If we do not, there is no need
+	 * schedule a report and the query may be dropped.
+	 * If we do, we must record them and schedule a current-state
+	 * report for those sources.
+	 * FIXME: Handling source lists larger than 1 mbuf requires that
+	 * we pass the mbuf chain pointer down to this function, and use
+	 * m_getptr() to walk the chain.
+	 */
+	if (inm->inm_nsrc > 0) {
+		const struct in_addr	*ap;
+		int			 i, nrecorded;
+
+		ap = (const struct in_addr *)(igmpv3 + 1);
+		nrecorded = 0;
+		for (i = 0; i < nsrc; i++, ap++) {
+			retval = inm_record_source(inm, ap->s_addr);
+			if (retval < 0)
+				break;
+			nrecorded += retval;
+		}
+		if (nrecorded > 0) {
+			CTR1(KTR_IGMPV3,
+			    "%s: schedule response to SG query", __func__);
+			inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER;
+			inm->inm_timer = IGMP_RANDOM_DELAY(timer);
+			V_current_state_timers_running = 1;
+		}
+	}
+
+	return (retval);
+}
+
+/*
+ * Process a received IGMPv1 host membership report.
+ *
+ * NOTE: 0.0.0.0 workaround breaks const correctness.
+ */
+static int
+igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip,
+    /*const*/ struct igmp *igmp)
+{
+	struct in_ifaddr *ia;
+	struct in_multi *inm;
+
+	IGMPSTAT_INC(igps_rcv_reports);
+
+	if (ifp->if_flags & IFF_LOOPBACK)
+		return (0);
+
+	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
+	    !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
+		IGMPSTAT_INC(igps_rcv_badreports);
+		return (EINVAL);
+	}
+
+	/*
+	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
+	 * Booting clients may use the source address 0.0.0.0. Some
+	 * IGMP daemons may not know how to use IP_RECVIF to determine
+	 * the interface upon which this message was received.
+	 * Replace 0.0.0.0 with the subnet address if told to do so.
+	 */
+	if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) {
+		IFP_TO_IA(ifp, ia);
+		if (ia != NULL) {
+			ip->ip_src.s_addr = htonl(ia->ia_subnet);
+			ifa_free(&ia->ia_ifa);
+		}
+	}
+
+	CTR3(KTR_IGMPV3, "process v1 report %s on ifp %p(%s)",
+	     inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
+
+	/*
+	 * IGMPv1 report suppression.
+	 * If we are a member of this group, and our membership should be
+	 * reported, stop our group timer and transition to the 'lazy' state.
+	 */
+	IN_MULTI_LOCK();
+	inm = inm_lookup(ifp, igmp->igmp_group);
+	if (inm != NULL) {
+		struct igmp_ifinfo *igi;
+
+		igi = inm->inm_igi;
+		if (igi == NULL) {
+			KASSERT(igi != NULL,
+			    ("%s: no igi for ifp %p", __func__, ifp));
+			goto out_locked;
+		}
+
+		IGMPSTAT_INC(igps_rcv_ourreports);
+
+		/*
+		 * If we are in IGMPv3 host mode, do not allow the
+		 * other host's IGMPv1 report to suppress our reports
+		 * unless explicitly configured to do so.
+		 */
+		if (igi->igi_version == IGMP_VERSION_3) {
+			if (V_igmp_legacysupp)
+				igmp_v3_suppress_group_record(inm);
+			goto out_locked;
+		}
+
+		inm->inm_timer = 0;
+
+		switch (inm->inm_state) {
+		case IGMP_NOT_MEMBER:
+		case IGMP_SILENT_MEMBER:
+			break;
+		case IGMP_IDLE_MEMBER:
+		case IGMP_LAZY_MEMBER:
+		case IGMP_AWAKENING_MEMBER:
+			CTR3(KTR_IGMPV3,
+			    "report suppressed for %s on ifp %p(%s)",
+			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
+		case IGMP_SLEEPING_MEMBER:
+			inm->inm_state = IGMP_SLEEPING_MEMBER;
+			break;
+		case IGMP_REPORTING_MEMBER:
+			CTR3(KTR_IGMPV3,
+			    "report suppressed for %s on ifp %p(%s)",
+			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
+			if (igi->igi_version == IGMP_VERSION_1)
+				inm->inm_state = IGMP_LAZY_MEMBER;
+			else if (igi->igi_version == IGMP_VERSION_2)
+				inm->inm_state = IGMP_SLEEPING_MEMBER;
+			break;
+		case IGMP_G_QUERY_PENDING_MEMBER:
+		case IGMP_SG_QUERY_PENDING_MEMBER:
+		case IGMP_LEAVING_MEMBER:
+			break;
+		}
+	}
+
+out_locked:
+	IN_MULTI_UNLOCK();
+
+	return (0);
+}
+
+/*
+ * Process a received IGMPv2 host membership report.
+ *
+ * NOTE: 0.0.0.0 workaround breaks const correctness.
+ */
+static int
+igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip,
+    /*const*/ struct igmp *igmp)
+{
+	struct in_ifaddr *ia;
+	struct in_multi *inm;
+
+	/*
+	 * Make sure we don't hear our own membership report.  Fast
+	 * leave requires knowing that we are the only member of a
+	 * group.
+	 */
+	IFP_TO_IA(ifp, ia);
+	if (ia != NULL && in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) {
+		ifa_free(&ia->ia_ifa);
+		return (0);
+	}
+
+	IGMPSTAT_INC(igps_rcv_reports);
+
+	if (ifp->if_flags & IFF_LOOPBACK) {
+		if (ia != NULL)
+			ifa_free(&ia->ia_ifa);
+		return (0);
+	}
+
+	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
+	    !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
+		if (ia != NULL)
+			ifa_free(&ia->ia_ifa);
+		IGMPSTAT_INC(igps_rcv_badreports);
+		return (EINVAL);
+	}
+
+	/*
+	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
+	 * Booting clients may use the source address 0.0.0.0. Some
+	 * IGMP daemons may not know how to use IP_RECVIF to determine
+	 * the interface upon which this message was received.
+	 * Replace 0.0.0.0 with the subnet address if told to do so.
+	 */
+	if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) {
+		if (ia != NULL)
+			ip->ip_src.s_addr = htonl(ia->ia_subnet);
+	}
+	if (ia != NULL)
+		ifa_free(&ia->ia_ifa);
+
+	CTR3(KTR_IGMPV3, "process v2 report %s on ifp %p(%s)",
+	     inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
+
+	/*
+	 * IGMPv2 report suppression.
+	 * If we are a member of this group, and our membership should be
+	 * reported, and our group timer is pending or about to be reset,
+	 * stop our group timer by transitioning to the 'lazy' state.
+	 */
+	IN_MULTI_LOCK();
+	inm = inm_lookup(ifp, igmp->igmp_group);
+	if (inm != NULL) {
+		struct igmp_ifinfo *igi;
+
+		igi = inm->inm_igi;
+		KASSERT(igi != NULL, ("%s: no igi for ifp %p", __func__, ifp));
+
+		IGMPSTAT_INC(igps_rcv_ourreports);
+
+		/*
+		 * If we are in IGMPv3 host mode, do not allow the
+		 * other host's IGMPv1 report to suppress our reports
+		 * unless explicitly configured to do so.
+		 */
+		if (igi->igi_version == IGMP_VERSION_3) {
+			if (V_igmp_legacysupp)
+				igmp_v3_suppress_group_record(inm);
+			goto out_locked;
+		}
+
+		inm->inm_timer = 0;
+
+		switch (inm->inm_state) {
+		case IGMP_NOT_MEMBER:
+		case IGMP_SILENT_MEMBER:
+		case IGMP_SLEEPING_MEMBER:
+			break;
+		case IGMP_REPORTING_MEMBER:
+		case IGMP_IDLE_MEMBER:
+		case IGMP_AWAKENING_MEMBER:
+			CTR3(KTR_IGMPV3,
+			    "report suppressed for %s on ifp %p(%s)",
+			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
+		case IGMP_LAZY_MEMBER:
+			inm->inm_state = IGMP_LAZY_MEMBER;
+			break;
+		case IGMP_G_QUERY_PENDING_MEMBER:
+		case IGMP_SG_QUERY_PENDING_MEMBER:
+		case IGMP_LEAVING_MEMBER:
+			break;
+		}
+	}
+
+out_locked:
+	IN_MULTI_UNLOCK();
+
+	return (0);
+}
+
+void
+igmp_input(struct mbuf *m, int off)
+{
+	int iphlen;
+	struct ifnet *ifp;
+	struct igmp *igmp;
+	struct ip *ip;
+	int igmplen;
+	int minlen;
+	int queryver;
+
+	CTR3(KTR_IGMPV3, "%s: called w/mbuf (%p,%d)", __func__, m, off);
+
+	ifp = m->m_pkthdr.rcvif;
+
+	IGMPSTAT_INC(igps_rcv_total);
+
+	ip = mtod(m, struct ip *);
+	iphlen = off;
+	igmplen = ip->ip_len;
+
+	/*
+	 * Validate lengths.
+	 */
+	if (igmplen < IGMP_MINLEN) {
+		IGMPSTAT_INC(igps_rcv_tooshort);
+		m_freem(m);
+		return;
+	}
+
+	/*
+	 * Always pullup to the minimum size for v1/v2 or v3
+	 * to amortize calls to m_pullup().
+	 */
+	minlen = iphlen;
+	if (igmplen >= IGMP_V3_QUERY_MINLEN)
+		minlen += IGMP_V3_QUERY_MINLEN;
+	else
+		minlen += IGMP_MINLEN;
+	if ((m->m_flags & M_EXT || m->m_len < minlen) &&
+	    (m = m_pullup(m, minlen)) == 0) {
+		IGMPSTAT_INC(igps_rcv_tooshort);
+		return;
+	}
+	ip = mtod(m, struct ip *);
+
+	/*
+	 * Validate checksum.
+	 */
+	m->m_data += iphlen;
+	m->m_len -= iphlen;
+	igmp = mtod(m, struct igmp *);
+	if (in_cksum(m, igmplen)) {
+		IGMPSTAT_INC(igps_rcv_badsum);
+		m_freem(m);
+		return;
+	}
+	m->m_data -= iphlen;
+	m->m_len += iphlen;
+
+	/*
+	 * IGMP control traffic is link-scope, and must have a TTL of 1.
+	 * DVMRP traffic (e.g. mrinfo, mtrace) is an exception;
+	 * probe packets may come from beyond the LAN.
+	 */
+	if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) {
+		IGMPSTAT_INC(igps_rcv_badttl);
+		m_freem(m);
+		return;
+	}
+
+	switch (igmp->igmp_type) {
+	case IGMP_HOST_MEMBERSHIP_QUERY:
+		if (igmplen == IGMP_MINLEN) {
+			if (igmp->igmp_code == 0)
+				queryver = IGMP_VERSION_1;
+			else
+				queryver = IGMP_VERSION_2;
+		} else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
+			queryver = IGMP_VERSION_3;
+		} else {
+			IGMPSTAT_INC(igps_rcv_tooshort);
+			m_freem(m);
+			return;
+		}
+
+		switch (queryver) {
+		case IGMP_VERSION_1:
+			IGMPSTAT_INC(igps_rcv_v1v2_queries);
+			if (!V_igmp_v1enable)
+				break;
+			if (igmp_input_v1_query(ifp, ip, igmp) != 0) {
+				m_freem(m);
+				return;
+			}
+			break;
+
+		case IGMP_VERSION_2:
+			IGMPSTAT_INC(igps_rcv_v1v2_queries);
+			if (!V_igmp_v2enable)
+				break;
+			if (igmp_input_v2_query(ifp, ip, igmp) != 0) {
+				m_freem(m);
+				return;
+			}
+			break;
+
+		case IGMP_VERSION_3: {
+				struct igmpv3 *igmpv3;
+				uint16_t igmpv3len;
+				uint16_t srclen;
+				int nsrc;
+
+				IGMPSTAT_INC(igps_rcv_v3_queries);
+				igmpv3 = (struct igmpv3 *)igmp;
+				/*
+				 * Validate length based on source count.
+				 */
+				nsrc = ntohs(igmpv3->igmp_numsrc);
+				srclen = sizeof(struct in_addr) * nsrc;
+				if (nsrc * sizeof(in_addr_t) > srclen) {
+					IGMPSTAT_INC(igps_rcv_tooshort);
+					return;
+				}
+				/*
+				 * m_pullup() may modify m, so pullup in
+				 * this scope.
+				 */
+				igmpv3len = iphlen + IGMP_V3_QUERY_MINLEN +
+				    srclen;
+				if ((m->m_flags & M_EXT ||
+				     m->m_len < igmpv3len) &&
+				    (m = m_pullup(m, igmpv3len)) == NULL) {
+					IGMPSTAT_INC(igps_rcv_tooshort);
+					return;
+				}
+				igmpv3 = (struct igmpv3 *)(mtod(m, uint8_t *)
+				    + iphlen);
+				if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
+					m_freem(m);
+					return;
+				}
+			}
+			break;
+		}
+		break;
+
+	case IGMP_v1_HOST_MEMBERSHIP_REPORT:
+		if (!V_igmp_v1enable)
+			break;
+		if (igmp_input_v1_report(ifp, ip, igmp) != 0) {
+			m_freem(m);
+			return;
+		}
+		break;
+
+	case IGMP_v2_HOST_MEMBERSHIP_REPORT:
+		if (!V_igmp_v2enable)
+			break;
+		if (!ip_checkrouteralert(m))
+			IGMPSTAT_INC(igps_rcv_nora);
+		if (igmp_input_v2_report(ifp, ip, igmp) != 0) {
+			m_freem(m);
+			return;
+		}
+		break;
+
+	case IGMP_v3_HOST_MEMBERSHIP_REPORT:
+		/*
+		 * Hosts do not need to process IGMPv3 membership reports,
+		 * as report suppression is no longer required.
+		 */
+		if (!ip_checkrouteralert(m))
+			IGMPSTAT_INC(igps_rcv_nora);
+		break;
+
+	default:
+		break;
+	}
+
+	/*
+	 * Pass all valid IGMP packets up to any process(es) listening on a
+	 * raw IGMP socket.
+	 */
+	rip_input(m, off);
+}
+
+
+/*
+ * Fast timeout handler (global).
+ * VIMAGE: Timeout handlers are expected to service all vimages.
+ */
+void
+igmp_fasttimo(void)
+{
+	VNET_ITERATOR_DECL(vnet_iter);
+
+	VNET_LIST_RLOCK_NOSLEEP();
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter);
+		igmp_fasttimo_vnet();
+		CURVNET_RESTORE();
+	}
+	VNET_LIST_RUNLOCK_NOSLEEP();
+}
+
+/*
+ * Fast timeout handler (per-vnet).
+ * Sends are shuffled off to a netisr to deal with Giant.
+ *
+ * VIMAGE: Assume caller has set up our curvnet.
+ */
+static void
+igmp_fasttimo_vnet(void)
+{
+	struct ifqueue		 scq;	/* State-change packets */
+	struct ifqueue		 qrq;	/* Query response packets */
+	struct ifnet		*ifp;
+	struct igmp_ifinfo	*igi;
+	struct ifmultiaddr	*ifma, *tifma;
+	struct in_multi		*inm;
+	int			 loop, uri_fasthz;
+
+	loop = 0;
+	uri_fasthz = 0;
+
+	/*
+	 * Quick check to see if any work needs to be done, in order to
+	 * minimize the overhead of fasttimo processing.
+	 * SMPng: XXX Unlocked reads.
+	 */
+	if (!V_current_state_timers_running &&
+	    !V_interface_timers_running &&
+	    !V_state_change_timers_running)
+		return;
+
+	IN_MULTI_LOCK();
+	IGMP_LOCK();
+
+	/*
+	 * IGMPv3 General Query response timer processing.
+	 */
+	if (V_interface_timers_running) {
+		CTR1(KTR_IGMPV3, "%s: interface timers running", __func__);
+
+		V_interface_timers_running = 0;
+		LIST_FOREACH(igi, &V_igi_head, igi_link) {
+			if (igi->igi_v3_timer == 0) {
+				/* Do nothing. */
+			} else if (--igi->igi_v3_timer == 0) {
+				igmp_v3_dispatch_general_query(igi);
+			} else {
+				V_interface_timers_running = 1;
+			}
+		}
+	}
+
+	if (!V_current_state_timers_running &&
+	    !V_state_change_timers_running)
+		goto out_locked;
+
+	V_current_state_timers_running = 0;
+	V_state_change_timers_running = 0;
+
+	CTR1(KTR_IGMPV3, "%s: state change timers running", __func__);
+
+	/*
+	 * IGMPv1/v2/v3 host report and state-change timer processing.
+	 * Note: Processing a v3 group timer may remove a node.
+	 */
+	LIST_FOREACH(igi, &V_igi_head, igi_link) {
+		ifp = igi->igi_ifp;
+
+		if (igi->igi_version == IGMP_VERSION_3) {
+			loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
+			uri_fasthz = IGMP_RANDOM_DELAY(igi->igi_uri *
+			    PR_FASTHZ);
+
+			memset(&qrq, 0, sizeof(struct ifqueue));
+			IFQ_SET_MAXLEN(&qrq, IGMP_MAX_G_GS_PACKETS);
+
+			memset(&scq, 0, sizeof(struct ifqueue));
+			IFQ_SET_MAXLEN(&scq, IGMP_MAX_STATE_CHANGE_PACKETS);
+		}
+
+		IF_ADDR_LOCK(ifp);
+		TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link,
+		    tifma) {
+			if (ifma->ifma_addr->sa_family != AF_INET ||
+			    ifma->ifma_protospec == NULL)
+				continue;
+			inm = (struct in_multi *)ifma->ifma_protospec;
+			switch (igi->igi_version) {
+			case IGMP_VERSION_1:
+			case IGMP_VERSION_2:
+				igmp_v1v2_process_group_timer(inm,
+				    igi->igi_version);
+				break;
+			case IGMP_VERSION_3:
+				igmp_v3_process_group_timers(igi, &qrq,
+				    &scq, inm, uri_fasthz);
+				break;
+			}
+		}
+		IF_ADDR_UNLOCK(ifp);
+
+		if (igi->igi_version == IGMP_VERSION_3) {
+			struct in_multi		*tinm;
+
+			igmp_dispatch_queue(&qrq, 0, loop);
+			igmp_dispatch_queue(&scq, 0, loop);
+
+			/*
+			 * Free the in_multi reference(s) for this
+			 * IGMP lifecycle.
+			 */
+			SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead,
+			    inm_nrele, tinm) {
+				SLIST_REMOVE_HEAD(&igi->igi_relinmhead,
+				    inm_nrele);
+				inm_release_locked(inm);
+			}
+		}
+	}
+
+out_locked:
+	IGMP_UNLOCK();
+	IN_MULTI_UNLOCK();
+}
+
+/*
+ * Update host report group timer for IGMPv1/v2.
+ * Will update the global pending timer flags.
+ */
+static void
+igmp_v1v2_process_group_timer(struct in_multi *inm, const int version)
+{
+	int report_timer_expired;
+
+	IN_MULTI_LOCK_ASSERT();
+	IGMP_LOCK_ASSERT();
+
+	if (inm->inm_timer == 0) {
+		report_timer_expired = 0;
+	} else if (--inm->inm_timer == 0) {
+		report_timer_expired = 1;
+	} else {
+		V_current_state_timers_running = 1;
+		return;
+	}
+
+	switch (inm->inm_state) {
+	case IGMP_NOT_MEMBER:
+	case IGMP_SILENT_MEMBER:
+	case IGMP_IDLE_MEMBER:
+	case IGMP_LAZY_MEMBER:
+	case IGMP_SLEEPING_MEMBER:
+	case IGMP_AWAKENING_MEMBER:
+		break;
+	case IGMP_REPORTING_MEMBER:
+		if (report_timer_expired) {
+			inm->inm_state = IGMP_IDLE_MEMBER;
+			(void)igmp_v1v2_queue_report(inm,
+			    (version == IGMP_VERSION_2) ?
+			     IGMP_v2_HOST_MEMBERSHIP_REPORT :
+			     IGMP_v1_HOST_MEMBERSHIP_REPORT);
+		}
+		break;
+	case IGMP_G_QUERY_PENDING_MEMBER:
+	case IGMP_SG_QUERY_PENDING_MEMBER:
+	case IGMP_LEAVING_MEMBER:
+		break;
+	}
+}
+
+/*
+ * Update a group's timers for IGMPv3.
+ * Will update the global pending timer flags.
+ * Note: Unlocked read from igi.
+ */
+static void
+igmp_v3_process_group_timers(struct igmp_ifinfo *igi,
+    struct ifqueue *qrq, struct ifqueue *scq,
+    struct in_multi *inm, const int uri_fasthz)
+{
+	int query_response_timer_expired;
+	int state_change_retransmit_timer_expired;
+
+	IN_MULTI_LOCK_ASSERT();
+	IGMP_LOCK_ASSERT();
+
+	query_response_timer_expired = 0;
+	state_change_retransmit_timer_expired = 0;
+
+	/*
+	 * During a transition from v1/v2 compatibility mode back to v3,
+	 * a group record in REPORTING state may still have its group
+	 * timer active. This is a no-op in this function; it is easier
+	 * to deal with it here than to complicate the slow-timeout path.
+	 */
+	if (inm->inm_timer == 0) {
+		query_response_timer_expired = 0;
+	} else if (--inm->inm_timer == 0) {
+		query_response_timer_expired = 1;
+	} else {
+		V_current_state_timers_running = 1;
+	}
+
+	if (inm->inm_sctimer == 0) {
+		state_change_retransmit_timer_expired = 0;
+	} else if (--inm->inm_sctimer == 0) {
+		state_change_retransmit_timer_expired = 1;
+	} else {
+		V_state_change_timers_running = 1;
+	}
+
+	/* We are in fasttimo, so be quick about it. */
+	if (!state_change_retransmit_timer_expired &&
+	    !query_response_timer_expired)
+		return;
+
+	switch (inm->inm_state) {
+	case IGMP_NOT_MEMBER:
+	case IGMP_SILENT_MEMBER:
+	case IGMP_SLEEPING_MEMBER:
+	case IGMP_LAZY_MEMBER:
+	case IGMP_AWAKENING_MEMBER:
+	case IGMP_IDLE_MEMBER:
+		break;
+	case IGMP_G_QUERY_PENDING_MEMBER:
+	case IGMP_SG_QUERY_PENDING_MEMBER:
+		/*
+		 * Respond to a previously pending Group-Specific
+		 * or Group-and-Source-Specific query by enqueueing
+		 * the appropriate Current-State report for
+		 * immediate transmission.
+		 */
+		if (query_response_timer_expired) {
+			int retval;
+
+			retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1,
+			    (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER));
+			CTR2(KTR_IGMPV3, "%s: enqueue record = %d",
+			    __func__, retval);
+			inm->inm_state = IGMP_REPORTING_MEMBER;
+			/* XXX Clear recorded sources for next time. */
+			inm_clear_recorded(inm);
+		}
+		/* FALLTHROUGH */
+	case IGMP_REPORTING_MEMBER:
+	case IGMP_LEAVING_MEMBER:
+		if (state_change_retransmit_timer_expired) {
+			/*
+			 * State-change retransmission timer fired.
+			 * If there are any further pending retransmissions,
+			 * set the global pending state-change flag, and
+			 * reset the timer.
+			 */
+			if (--inm->inm_scrv > 0) {
+				inm->inm_sctimer = uri_fasthz;
+				V_state_change_timers_running = 1;
+			}
+			/*
+			 * Retransmit the previously computed state-change
+			 * report. If there are no further pending
+			 * retransmissions, the mbuf queue will be consumed.
+			 * Update T0 state to T1 as we have now sent
+			 * a state-change.
+			 */
+			(void)igmp_v3_merge_state_changes(inm, scq);
+
+			inm_commit(inm);
+			CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__,
+			    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
+
+			/*
+			 * If we are leaving the group for good, make sure
+			 * we release IGMP's reference to it.
+			 * This release must be deferred using a SLIST,
+			 * as we are called from a loop which traverses
+			 * the in_ifmultiaddr TAILQ.
+			 */
+			if (inm->inm_state == IGMP_LEAVING_MEMBER &&
+			    inm->inm_scrv == 0) {
+				inm->inm_state = IGMP_NOT_MEMBER;
+				SLIST_INSERT_HEAD(&igi->igi_relinmhead,
+				    inm, inm_nrele);
+			}
+		}
+		break;
+	}
+}
+
+
+/*
+ * Suppress a group's pending response to a group or source/group query.
+ *
+ * Do NOT suppress state changes. This leads to IGMPv3 inconsistency.
+ * Do NOT update ST1/ST0 as this operation merely suppresses
+ * the currently pending group record.
+ * Do NOT suppress the response to a general query. It is possible but
+ * it would require adding another state or flag.
+ */
+static void
+igmp_v3_suppress_group_record(struct in_multi *inm)
+{
+
+	IN_MULTI_LOCK_ASSERT();
+
+	KASSERT(inm->inm_igi->igi_version == IGMP_VERSION_3,
+		("%s: not IGMPv3 mode on link", __func__));
+
+	if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER ||
+	    inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER)
+		return;
+
+	if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER)
+		inm_clear_recorded(inm);
+
+	inm->inm_timer = 0;
+	inm->inm_state = IGMP_REPORTING_MEMBER;
+}
+
+/*
+ * Switch to a different IGMP version on the given interface,
+ * as per Section 7.2.1.
+ */
+static void
+igmp_set_version(struct igmp_ifinfo *igi, const int version)
+{
+	int old_version_timer;
+
+	IGMP_LOCK_ASSERT();
+
+	CTR4(KTR_IGMPV3, "%s: switching to v%d on ifp %p(%s)", __func__,
+	    version, igi->igi_ifp, igi->igi_ifp->if_xname);
+
+	if (version == IGMP_VERSION_1 || version == IGMP_VERSION_2) {
+		/*
+		 * Compute the "Older Version Querier Present" timer as per
+		 * Section 8.12.
+		 */
+		old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri;
+		old_version_timer *= PR_SLOWHZ;
+
+		if (version == IGMP_VERSION_1) {
+			igi->igi_v1_timer = old_version_timer;
+			igi->igi_v2_timer = 0;
+		} else if (version == IGMP_VERSION_2) {
+			igi->igi_v1_timer = 0;
+			igi->igi_v2_timer = old_version_timer;
+		}
+	}
+
+	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
+		if (igi->igi_version != IGMP_VERSION_2) {
+			igi->igi_version = IGMP_VERSION_2;
+			igmp_v3_cancel_link_timers(igi);
+		}
+	} else if (igi->igi_v1_timer > 0) {
+		if (igi->igi_version != IGMP_VERSION_1) {
+			igi->igi_version = IGMP_VERSION_1;
+			igmp_v3_cancel_link_timers(igi);
+		}
+	}
+}
+
+/*
+ * Cancel pending IGMPv3 timers for the given link and all groups
+ * joined on it; state-change, general-query, and group-query timers.
+ *
+ * Only ever called on a transition from v3 to Compatibility mode. Kill
+ * the timers stone dead (this may be expensive for large N groups), they
+ * will be restarted if Compatibility Mode deems that they must be due to
+ * query processing.
+ */
+static void
+igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi)
+{
+	struct ifmultiaddr	*ifma;
+	struct ifnet		*ifp;
+	struct in_multi		*inm;
+
+	CTR3(KTR_IGMPV3, "%s: cancel v3 timers on ifp %p(%s)", __func__,
+	    igi->igi_ifp, igi->igi_ifp->if_xname);
+
+	IN_MULTI_LOCK_ASSERT();
+	IGMP_LOCK_ASSERT();
+
+	/*
+	 * Stop the v3 General Query Response on this link stone dead.
+	 * If fasttimo is woken up due to V_interface_timers_running,
+	 * the flag will be cleared if there are no pending link timers.
+	 */
+	igi->igi_v3_timer = 0;
+
+	/*
+	 * Now clear the current-state and state-change report timers
+	 * for all memberships scoped to this link.
+	 */
+	ifp = igi->igi_ifp;
+	IF_ADDR_LOCK(ifp);
+	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+		if (ifma->ifma_addr->sa_family != AF_INET ||
+		    ifma->ifma_protospec == NULL)
+			continue;
+		inm = (struct in_multi *)ifma->ifma_protospec;
+		switch (inm->inm_state) {
+		case IGMP_NOT_MEMBER:
+		case IGMP_SILENT_MEMBER:
+		case IGMP_IDLE_MEMBER:
+		case IGMP_LAZY_MEMBER:
+		case IGMP_SLEEPING_MEMBER:
+		case IGMP_AWAKENING_MEMBER:
+			/*
+			 * These states are either not relevant in v3 mode,
+			 * or are unreported. Do nothing.
+			 */
+			break;
+		case IGMP_LEAVING_MEMBER:
+			/*
+			 * If we are leaving the group and switching to
+			 * compatibility mode, we need to release the final
+			 * reference held for issuing the INCLUDE {}, and
+			 * transition to REPORTING to ensure the host leave
+			 * message is sent upstream to the old querier --
+			 * transition to NOT would lose the leave and race.
+			 *
+			 * SMPNG: Must drop and re-acquire IF_ADDR_LOCK
+			 * around inm_release_locked(), as it is not
+			 * a recursive mutex.
+			 */
+			IF_ADDR_UNLOCK(ifp);
+			inm_release_locked(inm);
+			IF_ADDR_LOCK(ifp);
+			/* FALLTHROUGH */
+		case IGMP_G_QUERY_PENDING_MEMBER:
+		case IGMP_SG_QUERY_PENDING_MEMBER:
+			inm_clear_recorded(inm);
+			/* FALLTHROUGH */
+		case IGMP_REPORTING_MEMBER:
+			inm->inm_state = IGMP_REPORTING_MEMBER;
+			break;
+		}
+		/*
+		 * Always clear state-change and group report timers.
+		 * Free any pending IGMPv3 state-change records.
+		 */
+		inm->inm_sctimer = 0;
+		inm->inm_timer = 0;
+		_IF_DRAIN(&inm->inm_scq);
+	}
+	IF_ADDR_UNLOCK(ifp);
+}
+
+/*
+ * Update the Older Version Querier Present timers for a link.
+ * See Section 7.2.1 of RFC 3376.
+ */
+static void
+igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi)
+{
+
+	IGMP_LOCK_ASSERT();
+
+	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) {
+		/*
+		 * IGMPv1 and IGMPv2 Querier Present timers expired.
+		 *
+		 * Revert to IGMPv3.
+		 */
+		if (igi->igi_version != IGMP_VERSION_3) {
+			CTR5(KTR_IGMPV3,
+			    "%s: transition from v%d -> v%d on %p(%s)",
+			    __func__, igi->igi_version, IGMP_VERSION_3,
+			    igi->igi_ifp, igi->igi_ifp->if_xname);
+			igi->igi_version = IGMP_VERSION_3;
+		}
+	} else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
+		/*
+		 * IGMPv1 Querier Present timer expired,
+		 * IGMPv2 Querier Present timer running.
+		 * If IGMPv2 was disabled since last timeout,
+		 * revert to IGMPv3.
+		 * If IGMPv2 is enabled, revert to IGMPv2.
+		 */
+		if (!V_igmp_v2enable) {
+			CTR5(KTR_IGMPV3,
+			    "%s: transition from v%d -> v%d on %p(%s)",
+			    __func__, igi->igi_version, IGMP_VERSION_3,
+			    igi->igi_ifp, igi->igi_ifp->if_xname);
+			igi->igi_v2_timer = 0;
+			igi->igi_version = IGMP_VERSION_3;
+		} else {
+			--igi->igi_v2_timer;
+			if (igi->igi_version != IGMP_VERSION_2) {
+				CTR5(KTR_IGMPV3,
+				    "%s: transition from v%d -> v%d on %p(%s)",
+				    __func__, igi->igi_version, IGMP_VERSION_2,
+				    igi->igi_ifp, igi->igi_ifp->if_xname);
+				igi->igi_version = IGMP_VERSION_2;
+			}
+		}
+	} else if (igi->igi_v1_timer > 0) {
+		/*
+		 * IGMPv1 Querier Present timer running.
+		 * Stop IGMPv2 timer if running.
+		 *
+		 * If IGMPv1 was disabled since last timeout,
+		 * revert to IGMPv3.
+		 * If IGMPv1 is enabled, reset IGMPv2 timer if running.
+		 */
+		if (!V_igmp_v1enable) {
+			CTR5(KTR_IGMPV3,
+			    "%s: transition from v%d -> v%d on %p(%s)",
+			    __func__, igi->igi_version, IGMP_VERSION_3,
+			    igi->igi_ifp, igi->igi_ifp->if_xname);
+			igi->igi_v1_timer = 0;
+			igi->igi_version = IGMP_VERSION_3;
+		} else {
+			--igi->igi_v1_timer;
+		}
+		if (igi->igi_v2_timer > 0) {
+			CTR3(KTR_IGMPV3,
+			    "%s: cancel v2 timer on %p(%s)",
+			    __func__, igi->igi_ifp, igi->igi_ifp->if_xname);
+			igi->igi_v2_timer = 0;
+		}
+	}
+}
+
+/*
+ * Global slowtimo handler.
+ * VIMAGE: Timeout handlers are expected to service all vimages.
+ */
+void
+igmp_slowtimo(void)
+{
+	VNET_ITERATOR_DECL(vnet_iter);
+
+	VNET_LIST_RLOCK_NOSLEEP();
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter);
+		igmp_slowtimo_vnet();
+		CURVNET_RESTORE();
+	}
+	VNET_LIST_RUNLOCK_NOSLEEP();
+}
+
+/*
+ * Per-vnet slowtimo handler.
+ */
+static void
+igmp_slowtimo_vnet(void)
+{
+	struct igmp_ifinfo *igi;
+
+	IGMP_LOCK();
+
+	LIST_FOREACH(igi, &V_igi_head, igi_link) {
+		igmp_v1v2_process_querier_timers(igi);
+	}
+
+	IGMP_UNLOCK();
+}
+
+/*
+ * Dispatch an IGMPv1/v2 host report or leave message.
+ * These are always small enough to fit inside a single mbuf.
+ */
+static int
+igmp_v1v2_queue_report(struct in_multi *inm, const int type)
+{
+	struct ifnet		*ifp;
+	struct igmp		*igmp;
+	struct ip		*ip;
+	struct mbuf		*m;
+
+	IN_MULTI_LOCK_ASSERT();
+	IGMP_LOCK_ASSERT();
+
+	ifp = inm->inm_ifp;
+
+	MGETHDR(m, M_DONTWAIT, MT_DATA);
+	if (m == NULL)
+		return (ENOMEM);
+	MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
+
+	m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp);
+
+	m->m_data += sizeof(struct ip);
+	m->m_len = sizeof(struct igmp);
+
+	igmp = mtod(m, struct igmp *);
+	igmp->igmp_type = type;
+	igmp->igmp_code = 0;
+	igmp->igmp_group = inm->inm_addr;
+	igmp->igmp_cksum = 0;
+	igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp));
+
+	m->m_data -= sizeof(struct ip);
+	m->m_len += sizeof(struct ip);
+
+	ip = mtod(m, struct ip *);
+	ip->ip_tos = 0;
+	ip->ip_len = sizeof(struct ip) + sizeof(struct igmp);
+	ip->ip_off = 0;
+	ip->ip_p = IPPROTO_IGMP;
+	ip->ip_src.s_addr = INADDR_ANY;
+
+	if (type == IGMP_HOST_LEAVE_MESSAGE)
+		ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP);
+	else
+		ip->ip_dst = inm->inm_addr;
+
+	igmp_save_context(m, ifp);
+
+	m->m_flags |= M_IGMPV2;
+	if (inm->inm_igi->igi_flags & IGIF_LOOPBACK)
+		m->m_flags |= M_IGMP_LOOP;
+
+	CTR2(KTR_IGMPV3, "%s: netisr_dispatch(NETISR_IGMP, %p)", __func__, m);
+	netisr_dispatch(NETISR_IGMP, m);
+
+	return (0);
+}
+
+/*
+ * Process a state change from the upper layer for the given IPv4 group.
+ *
+ * Each socket holds a reference on the in_multi in its own ip_moptions.
+ * The socket layer will have made the necessary updates to.the group
+ * state, it is now up to IGMP to issue a state change report if there
+ * has been any change between T0 (when the last state-change was issued)
+ * and T1 (now).
+ *
+ * We use the IGMPv3 state machine at group level. The IGMP module
+ * however makes the decision as to which IGMP protocol version to speak.
+ * A state change *from* INCLUDE {} always means an initial join.
+ * A state change *to* INCLUDE {} always means a final leave.
+ *
+ * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can
+ * save ourselves a bunch of work; any exclusive mode groups need not
+ * compute source filter lists.
+ *
+ * VIMAGE: curvnet should have been set by caller, as this routine
+ * is called from the socket option handlers.
+ */
+int
+igmp_change_state(struct in_multi *inm)
+{
+	struct igmp_ifinfo *igi;
+	struct ifnet *ifp;
+	int error;
+
+	IN_MULTI_LOCK_ASSERT();
+
+	error = 0;
+
+	/*
+	 * Try to detect if the upper layer just asked us to change state
+	 * for an interface which has now gone away.
+	 */
+	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
+	ifp = inm->inm_ifma->ifma_ifp;
+	if (ifp != NULL) {
+		/*
+		 * Sanity check that netinet's notion of ifp is the
+		 * same as net's.
+		 */
+		KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
+	}
+
+	IGMP_LOCK();
+
+	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
+	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
+
+	/*
+	 * If we detect a state transition to or from MCAST_UNDEFINED
+	 * for this group, then we are starting or finishing an IGMP
+	 * life cycle for this group.
+	 */
+	if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) {
+		CTR3(KTR_IGMPV3, "%s: inm transition %d -> %d", __func__,
+		    inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode);
+		if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) {
+			CTR1(KTR_IGMPV3, "%s: initial join", __func__);
+			error = igmp_initial_join(inm, igi);
+			goto out_locked;
+		} else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) {
+			CTR1(KTR_IGMPV3, "%s: final leave", __func__);
+			igmp_final_leave(inm, igi);
+			goto out_locked;
+		}
+	} else {
+		CTR1(KTR_IGMPV3, "%s: filter set change", __func__);
+	}
+
+	error = igmp_handle_state_change(inm, igi);
+
+out_locked:
+	IGMP_UNLOCK();
+	return (error);
+}
+
+/*
+ * Perform the initial join for an IGMP group.
+ *
+ * When joining a group:
+ *  If the group should have its IGMP traffic suppressed, do nothing.
+ *  IGMPv1 starts sending IGMPv1 host membership reports.
+ *  IGMPv2 starts sending IGMPv2 host membership reports.
+ *  IGMPv3 will schedule an IGMPv3 state-change report containing the
+ *  initial state of the membership.
+ */
+static int
+igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi)
+{
+	struct ifnet		*ifp;
+	struct ifqueue		*ifq;
+	int			 error, retval, syncstates;
+
+	CTR4(KTR_IGMPV3, "%s: initial join %s on ifp %p(%s)",
+	    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp,
+	    inm->inm_ifp->if_xname);
+
+	error = 0;
+	syncstates = 1;
+
+	ifp = inm->inm_ifp;
+
+	IN_MULTI_LOCK_ASSERT();
+	IGMP_LOCK_ASSERT();
+
+	KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__));
+
+	/*
+	 * Groups joined on loopback or marked as 'not reported',
+	 * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and
+	 * are never reported in any IGMP protocol exchanges.
+	 * All other groups enter the appropriate IGMP state machine
+	 * for the version in use on this link.
+	 * A link marked as IGIF_SILENT causes IGMP to be completely
+	 * disabled for the link.
+	 */
+	if ((ifp->if_flags & IFF_LOOPBACK) ||
+	    (igi->igi_flags & IGIF_SILENT) ||
+	    !igmp_isgroupreported(inm->inm_addr)) {
+		CTR1(KTR_IGMPV3,
+"%s: not kicking state machine for silent group", __func__);
+		inm->inm_state = IGMP_SILENT_MEMBER;
+		inm->inm_timer = 0;
+	} else {
+		/*
+		 * Deal with overlapping in_multi lifecycle.
+		 * If this group was LEAVING, then make sure
+		 * we drop the reference we picked up to keep the
+		 * group around for the final INCLUDE {} enqueue.
+		 */
+		if (igi->igi_version == IGMP_VERSION_3 &&
+		    inm->inm_state == IGMP_LEAVING_MEMBER)
+			inm_release_locked(inm);
+
+		inm->inm_state = IGMP_REPORTING_MEMBER;
+
+		switch (igi->igi_version) {
+		case IGMP_VERSION_1:
+		case IGMP_VERSION_2:
+			inm->inm_state = IGMP_IDLE_MEMBER;
+			error = igmp_v1v2_queue_report(inm,
+			    (igi->igi_version == IGMP_VERSION_2) ?
+			     IGMP_v2_HOST_MEMBERSHIP_REPORT :
+			     IGMP_v1_HOST_MEMBERSHIP_REPORT);
+			if (error == 0) {
+				inm->inm_timer = IGMP_RANDOM_DELAY(
+				    IGMP_V1V2_MAX_RI * PR_FASTHZ);
+				V_current_state_timers_running = 1;
+			}
+			break;
+
+		case IGMP_VERSION_3:
+			/*
+			 * Defer update of T0 to T1, until the first copy
+			 * of the state change has been transmitted.
+			 */
+			syncstates = 0;
+
+			/*
+			 * Immediately enqueue a State-Change Report for
+			 * this interface, freeing any previous reports.
+			 * Don't kick the timers if there is nothing to do,
+			 * or if an error occurred.
+			 */
+			ifq = &inm->inm_scq;
+			_IF_DRAIN(ifq);
+			retval = igmp_v3_enqueue_group_record(ifq, inm, 1,
+			    0, 0);
+			CTR2(KTR_IGMPV3, "%s: enqueue record = %d",
+			    __func__, retval);
+			if (retval <= 0) {
+				error = retval * -1;
+				break;
+			}
+
+			/*
+			 * Schedule transmission of pending state-change
+			 * report up to RV times for this link. The timer
+			 * will fire at the next igmp_fasttimo (~200ms),
+			 * giving us an opportunity to merge the reports.
+			 */
+			if (igi->igi_flags & IGIF_LOOPBACK) {
+				inm->inm_scrv = 1;
+			} else {
+				KASSERT(igi->igi_rv > 1,
+				   ("%s: invalid robustness %d", __func__,
+				    igi->igi_rv));
+				inm->inm_scrv = igi->igi_rv;
+			}
+			inm->inm_sctimer = 1;
+			V_state_change_timers_running = 1;
+
+			error = 0;
+			break;
+		}
+	}
+
+	/*
+	 * Only update the T0 state if state change is atomic,
+	 * i.e. we don't need to wait for a timer to fire before we
+	 * can consider the state change to have been communicated.
+	 */
+	if (syncstates) {
+		inm_commit(inm);
+		CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__,
+		    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
+	}
+
+	return (error);
+}
+
+/*
+ * Issue an intermediate state change during the IGMP life-cycle.
+ */
+static int
+igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi)
+{
+	struct ifnet		*ifp;
+	int			 retval;
+
+	CTR4(KTR_IGMPV3, "%s: state change for %s on ifp %p(%s)",
+	    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp,
+	    inm->inm_ifp->if_xname);
+
+	ifp = inm->inm_ifp;
+
+	IN_MULTI_LOCK_ASSERT();
+	IGMP_LOCK_ASSERT();
+
+	KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__));
+
+	if ((ifp->if_flags & IFF_LOOPBACK) ||
+	    (igi->igi_flags & IGIF_SILENT) ||
+	    !igmp_isgroupreported(inm->inm_addr) ||
+	    (igi->igi_version != IGMP_VERSION_3)) {
+		if (!igmp_isgroupreported(inm->inm_addr)) {
+			CTR1(KTR_IGMPV3,
+"%s: not kicking state machine for silent group", __func__);
+		}
+		CTR1(KTR_IGMPV3, "%s: nothing to do", __func__);
+		inm_commit(inm);
+		CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__,
+		    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
+		return (0);
+	}
+
+	_IF_DRAIN(&inm->inm_scq);
+
+	retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0);
+	CTR2(KTR_IGMPV3, "%s: enqueue record = %d", __func__, retval);
+	if (retval <= 0)
+		return (-retval);
+
+	/*
+	 * If record(s) were enqueued, start the state-change
+	 * report timer for this group.
+	 */
+	inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : igi->igi_rv);
+	inm->inm_sctimer = 1;
+	V_state_change_timers_running = 1;
+
+	return (0);
+}
+
+/*
+ * Perform the final leave for an IGMP group.
+ *
+ * When leaving a group:
+ *  IGMPv1 does nothing.
+ *  IGMPv2 sends a host leave message, if and only if we are the reporter.
+ *  IGMPv3 enqueues a state-change report containing a transition
+ *  to INCLUDE {} for immediate transmission.
+ */
+static void
+igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi)
+{
+	int syncstates;
+
+	syncstates = 1;
+
+	CTR4(KTR_IGMPV3, "%s: final leave %s on ifp %p(%s)",
+	    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp,
+	    inm->inm_ifp->if_xname);
+
+	IN_MULTI_LOCK_ASSERT();
+	IGMP_LOCK_ASSERT();
+
+	switch (inm->inm_state) {
+	case IGMP_NOT_MEMBER:
+	case IGMP_SILENT_MEMBER:
+	case IGMP_LEAVING_MEMBER:
+		/* Already leaving or left; do nothing. */
+		CTR1(KTR_IGMPV3,
+"%s: not kicking state machine for silent group", __func__);
+		break;
+	case IGMP_REPORTING_MEMBER:
+	case IGMP_IDLE_MEMBER:
+	case IGMP_G_QUERY_PENDING_MEMBER:
+	case IGMP_SG_QUERY_PENDING_MEMBER:
+		if (igi->igi_version == IGMP_VERSION_2) {
+#ifdef INVARIANTS
+			if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
+			    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER)
+			panic("%s: IGMPv3 state reached, not IGMPv3 mode",
+			     __func__);
+#endif
+			igmp_v1v2_queue_report(inm, IGMP_HOST_LEAVE_MESSAGE);
+			inm->inm_state = IGMP_NOT_MEMBER;
+		} else if (igi->igi_version == IGMP_VERSION_3) {
+			/*
+			 * Stop group timer and all pending reports.
+			 * Immediately enqueue a state-change report
+			 * TO_IN {} to be sent on the next fast timeout,
+			 * giving us an opportunity to merge reports.
+			 */
+			_IF_DRAIN(&inm->inm_scq);
+			inm->inm_timer = 0;
+			if (igi->igi_flags & IGIF_LOOPBACK) {
+				inm->inm_scrv = 1;
+			} else {
+				inm->inm_scrv = igi->igi_rv;
+			}
+			CTR4(KTR_IGMPV3, "%s: Leaving %s/%s with %d "
+			    "pending retransmissions.", __func__,
+			    inet_ntoa(inm->inm_addr),
+			    inm->inm_ifp->if_xname, inm->inm_scrv);
+			if (inm->inm_scrv == 0) {
+				inm->inm_state = IGMP_NOT_MEMBER;
+				inm->inm_sctimer = 0;
+			} else {
+				int retval;
+
+				inm_acquire_locked(inm);
+
+				retval = igmp_v3_enqueue_group_record(
+				    &inm->inm_scq, inm, 1, 0, 0);
+				KASSERT(retval != 0,
+				    ("%s: enqueue record = %d", __func__,
+				     retval));
+
+				inm->inm_state = IGMP_LEAVING_MEMBER;
+				inm->inm_sctimer = 1;
+				V_state_change_timers_running = 1;
+				syncstates = 0;
+			}
+			break;
+		}
+		break;
+	case IGMP_LAZY_MEMBER:
+	case IGMP_SLEEPING_MEMBER:
+	case IGMP_AWAKENING_MEMBER:
+		/* Our reports are suppressed; do nothing. */
+		break;
+	}
+
+	if (syncstates) {
+		inm_commit(inm);
+		CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__,
+		    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
+		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
+		CTR3(KTR_IGMPV3, "%s: T1 now MCAST_UNDEFINED for %s/%s",
+		    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
+	}
+}
+
+/*
+ * Enqueue an IGMPv3 group record to the given output queue.
+ *
+ * XXX This function could do with having the allocation code
+ * split out, and the multiple-tree-walks coalesced into a single
+ * routine as has been done in igmp_v3_enqueue_filter_change().
+ *
+ * If is_state_change is zero, a current-state record is appended.
+ * If is_state_change is non-zero, a state-change report is appended.
+ *
+ * If is_group_query is non-zero, an mbuf packet chain is allocated.
+ * If is_group_query is zero, and if there is a packet with free space
+ * at the tail of the queue, it will be appended to providing there
+ * is enough free space.
+ * Otherwise a new mbuf packet chain is allocated.
+ *
+ * If is_source_query is non-zero, each source is checked to see if
+ * it was recorded for a Group-Source query, and will be omitted if
+ * it is not both in-mode and recorded.
+ *
+ * The function will attempt to allocate leading space in the packet
+ * for the IP/IGMP header to be prepended without fragmenting the chain.
+ *
+ * If successful the size of all data appended to the queue is returned,
+ * otherwise an error code less than zero is returned, or zero if
+ * no record(s) were appended.
+ */
+static int
+igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
+    const int is_state_change, const int is_group_query,
+    const int is_source_query)
+{
+	struct igmp_grouprec	 ig;
+	struct igmp_grouprec	*pig;
+	struct ifnet		*ifp;
+	struct ip_msource	*ims, *nims;
+	struct mbuf		*m0, *m, *md;
+	int			 error, is_filter_list_change;
+	int			 minrec0len, m0srcs, msrcs, nbytes, off;
+	int			 record_has_sources;
+	int			 now;
+	int			 type;
+	in_addr_t		 naddr;
+	uint8_t			 mode;
+
+	IN_MULTI_LOCK_ASSERT();
+
+	error = 0;
+	ifp = inm->inm_ifp;
+	is_filter_list_change = 0;
+	m = NULL;
+	m0 = NULL;
+	m0srcs = 0;
+	msrcs = 0;
+	nbytes = 0;
+	nims = NULL;
+	record_has_sources = 1;
+	pig = NULL;
+	type = IGMP_DO_NOTHING;
+	mode = inm->inm_st[1].iss_fmode;
+
+	/*
+	 * If we did not transition out of ASM mode during t0->t1,
+	 * and there are no source nodes to process, we can skip
+	 * the generation of source records.
+	 */
+	if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 &&
+	    inm->inm_nsrc == 0)
+		record_has_sources = 0;
+
+	if (is_state_change) {
+		/*
+		 * Queue a state change record.
+		 * If the mode did not change, and there are non-ASM
+		 * listeners or source filters present,
+		 * we potentially need to issue two records for the group.
+		 * If we are transitioning to MCAST_UNDEFINED, we need
+		 * not send any sources.
+		 * If there are ASM listeners, and there was no filter
+		 * mode transition of any kind, do nothing.
+		 */
+		if (mode != inm->inm_st[0].iss_fmode) {
+			if (mode == MCAST_EXCLUDE) {
+				CTR1(KTR_IGMPV3, "%s: change to EXCLUDE",
+				    __func__);
+				type = IGMP_CHANGE_TO_EXCLUDE_MODE;
+			} else {
+				CTR1(KTR_IGMPV3, "%s: change to INCLUDE",
+				    __func__);
+				type = IGMP_CHANGE_TO_INCLUDE_MODE;
+				if (mode == MCAST_UNDEFINED)
+					record_has_sources = 0;
+			}
+		} else {
+			if (record_has_sources) {
+				is_filter_list_change = 1;
+			} else {
+				type = IGMP_DO_NOTHING;
+			}
+		}
+	} else {
+		/*
+		 * Queue a current state record.
+		 */
+		if (mode == MCAST_EXCLUDE) {
+			type = IGMP_MODE_IS_EXCLUDE;
+		} else if (mode == MCAST_INCLUDE) {
+			type = IGMP_MODE_IS_INCLUDE;
+			KASSERT(inm->inm_st[1].iss_asm == 0,
+			    ("%s: inm %p is INCLUDE but ASM count is %d",
+			     __func__, inm, inm->inm_st[1].iss_asm));
+		}
+	}
+
+	/*
+	 * Generate the filter list changes using a separate function.
+	 */
+	if (is_filter_list_change)
+		return (igmp_v3_enqueue_filter_change(ifq, inm));
+
+	if (type == IGMP_DO_NOTHING) {
+		CTR3(KTR_IGMPV3, "%s: nothing to do for %s/%s",
+		    __func__, inet_ntoa(inm->inm_addr),
+		    inm->inm_ifp->if_xname);
+		return (0);
+	}
+
+	/*
+	 * If any sources are present, we must be able to fit at least
+	 * one in the trailing space of the tail packet's mbuf,
+	 * ideally more.
+	 */
+	minrec0len = sizeof(struct igmp_grouprec);
+	if (record_has_sources)
+		minrec0len += sizeof(in_addr_t);
+
+	CTR4(KTR_IGMPV3, "%s: queueing %s for %s/%s", __func__,
+	    igmp_rec_type_to_str(type), inet_ntoa(inm->inm_addr),
+	    inm->inm_ifp->if_xname);
+
+	/*
+	 * Check if we have a packet in the tail of the queue for this
+	 * group into which the first group record for this group will fit.
+	 * Otherwise allocate a new packet.
+	 * Always allocate leading space for IP+RA_OPT+IGMP+REPORT.
+	 * Note: Group records for G/GSR query responses MUST be sent
+	 * in their own packet.
+	 */
+	m0 = ifq->ifq_tail;
+	if (!is_group_query &&
+	    m0 != NULL &&
+	    (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) &&
+	    (m0->m_pkthdr.len + minrec0len) <
+	     (ifp->if_mtu - IGMP_LEADINGSPACE)) {
+		m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
+			    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
+		m = m0;
+		CTR1(KTR_IGMPV3, "%s: use existing packet", __func__);
+	} else {
+		if (_IF_QFULL(ifq)) {
+			CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__);
+			return (-ENOMEM);
+		}
+		m = NULL;
+		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
+		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
+		if (!is_state_change && !is_group_query) {
+			m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+			if (m)
+				m->m_data += IGMP_LEADINGSPACE;
+		}
+		if (m == NULL) {
+			m = m_gethdr(M_DONTWAIT, MT_DATA);
+			if (m)
+				MH_ALIGN(m, IGMP_LEADINGSPACE);
+		}
+		if (m == NULL)
+			return (-ENOMEM);
+
+		igmp_save_context(m, ifp);
+
+		CTR1(KTR_IGMPV3, "%s: allocated first packet", __func__);
+	}
+
+	/*
+	 * Append group record.
+	 * If we have sources, we don't know how many yet.
+	 */
+	ig.ig_type = type;
+	ig.ig_datalen = 0;
+	ig.ig_numsrc = 0;
+	ig.ig_group = inm->inm_addr;
+	if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
+		if (m != m0)
+			m_freem(m);
+		CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__);
+		return (-ENOMEM);
+	}
+	nbytes += sizeof(struct igmp_grouprec);
+
+	/*
+	 * Append as many sources as will fit in the first packet.
+	 * If we are appending to a new packet, the chain allocation
+	 * may potentially use clusters; use m_getptr() in this case.
+	 * If we are appending to an existing packet, we need to obtain
+	 * a pointer to the group record after m_append(), in case a new
+	 * mbuf was allocated.
+	 * Only append sources which are in-mode at t1. If we are
+	 * transitioning to MCAST_UNDEFINED state on the group, do not
+	 * include source entries.
+	 * Only report recorded sources in our filter set when responding
+	 * to a group-source query.
+	 */
+	if (record_has_sources) {
+		if (m == m0) {
+			md = m_last(m);
+			pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) +
+			    md->m_len - nbytes);
+		} else {
+			md = m_getptr(m, 0, &off);
+			pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) +
+			    off);
+		}
+		msrcs = 0;
+		RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) {
+			CTR2(KTR_IGMPV3, "%s: visit node %s", __func__,
+			    inet_ntoa_haddr(ims->ims_haddr));
+			now = ims_get_mode(inm, ims, 1);
+			CTR2(KTR_IGMPV3, "%s: node is %d", __func__, now);
+			if ((now != mode) ||
+			    (now == mode && mode == MCAST_UNDEFINED)) {
+				CTR1(KTR_IGMPV3, "%s: skip node", __func__);
+				continue;
+			}
+			if (is_source_query && ims->ims_stp == 0) {
+				CTR1(KTR_IGMPV3, "%s: skip unrecorded node",
+				    __func__);
+				continue;
+			}
+			CTR1(KTR_IGMPV3, "%s: append node", __func__);
+			naddr = htonl(ims->ims_haddr);
+			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
+				if (m != m0)
+					m_freem(m);
+				CTR1(KTR_IGMPV3, "%s: m_append() failed.",
+				    __func__);
+				return (-ENOMEM);
+			}
+			nbytes += sizeof(in_addr_t);
+			++msrcs;
+			if (msrcs == m0srcs)
+				break;
+		}
+		CTR2(KTR_IGMPV3, "%s: msrcs is %d this packet", __func__,
+		    msrcs);
+		pig->ig_numsrc = htons(msrcs);
+		nbytes += (msrcs * sizeof(in_addr_t));
+	}
+
+	if (is_source_query && msrcs == 0) {
+		CTR1(KTR_IGMPV3, "%s: no recorded sources to report", __func__);
+		if (m != m0)
+			m_freem(m);
+		return (0);
+	}
+
+	/*
+	 * We are good to go with first packet.
+	 */
+	if (m != m0) {
+		CTR1(KTR_IGMPV3, "%s: enqueueing first packet", __func__);
+		m->m_pkthdr.PH_vt.vt_nrecs = 1;
+		_IF_ENQUEUE(ifq, m);
+	} else
+		m->m_pkthdr.PH_vt.vt_nrecs++;
+
+	/*
+	 * No further work needed if no source list in packet(s).
+	 */
+	if (!record_has_sources)
+		return (nbytes);
+
+	/*
+	 * Whilst sources remain to be announced, we need to allocate
+	 * a new packet and fill out as many sources as will fit.
+	 * Always try for a cluster first.
+	 */
+	while (nims != NULL) {
+		if (_IF_QFULL(ifq)) {
+			CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__);
+			return (-ENOMEM);
+		}
+		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+		if (m)
+			m->m_data += IGMP_LEADINGSPACE;
+		if (m == NULL) {
+			m = m_gethdr(M_DONTWAIT, MT_DATA);
+			if (m)
+				MH_ALIGN(m, IGMP_LEADINGSPACE);
+		}
+		if (m == NULL)
+			return (-ENOMEM);
+		igmp_save_context(m, ifp);
+		md = m_getptr(m, 0, &off);
+		pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + off);
+		CTR1(KTR_IGMPV3, "%s: allocated next packet", __func__);
+
+		if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
+			if (m != m0)
+				m_freem(m);
+			CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__);
+			return (-ENOMEM);
+		}
+		m->m_pkthdr.PH_vt.vt_nrecs = 1;
+		nbytes += sizeof(struct igmp_grouprec);
+
+		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
+		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
+
+		msrcs = 0;
+		RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
+			CTR2(KTR_IGMPV3, "%s: visit node %s", __func__,
+			    inet_ntoa_haddr(ims->ims_haddr));
+			now = ims_get_mode(inm, ims, 1);
+			if ((now != mode) ||
+			    (now == mode && mode == MCAST_UNDEFINED)) {
+				CTR1(KTR_IGMPV3, "%s: skip node", __func__);
+				continue;
+			}
+			if (is_source_query && ims->ims_stp == 0) {
+				CTR1(KTR_IGMPV3, "%s: skip unrecorded node",
+				    __func__);
+				continue;
+			}
+			CTR1(KTR_IGMPV3, "%s: append node", __func__);
+			naddr = htonl(ims->ims_haddr);
+			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
+				if (m != m0)
+					m_freem(m);
+				CTR1(KTR_IGMPV3, "%s: m_append() failed.",
+				    __func__);
+				return (-ENOMEM);
+			}
+			++msrcs;
+			if (msrcs == m0srcs)
+				break;
+		}
+		pig->ig_numsrc = htons(msrcs);
+		nbytes += (msrcs * sizeof(in_addr_t));
+
+		CTR1(KTR_IGMPV3, "%s: enqueueing next packet", __func__);
+		_IF_ENQUEUE(ifq, m);
+	}
+
+	return (nbytes);
+}
+
+/*
+ * Type used to mark record pass completion.
+ * We exploit the fact we can cast to this easily from the
+ * current filter modes on each ip_msource node.
+ */
+typedef enum {
+	REC_NONE = 0x00,	/* MCAST_UNDEFINED */
+	REC_ALLOW = 0x01,	/* MCAST_INCLUDE */
+	REC_BLOCK = 0x02,	/* MCAST_EXCLUDE */
+	REC_FULL = REC_ALLOW | REC_BLOCK
+} rectype_t;
+
+/*
+ * Enqueue an IGMPv3 filter list change to the given output queue.
+ *
+ * Source list filter state is held in an RB-tree. When the filter list
+ * for a group is changed without changing its mode, we need to compute
+ * the deltas between T0 and T1 for each source in the filter set,
+ * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
+ *
+ * As we may potentially queue two record types, and the entire R-B tree
+ * needs to be walked at once, we break this out into its own function
+ * so we can generate a tightly packed queue of packets.
+ *
+ * XXX This could be written to only use one tree walk, although that makes
+ * serializing into the mbuf chains a bit harder. For now we do two walks
+ * which makes things easier on us, and it may or may not be harder on
+ * the L2 cache.
+ *
+ * If successful the size of all data appended to the queue is returned,
+ * otherwise an error code less than zero is returned, or zero if
+ * no record(s) were appended.
+ */
+static int
+igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
+{
+	static const int MINRECLEN =
+	    sizeof(struct igmp_grouprec) + sizeof(in_addr_t);
+	struct ifnet		*ifp;
+	struct igmp_grouprec	 ig;
+	struct igmp_grouprec	*pig;
+	struct ip_msource	*ims, *nims;
+	struct mbuf		*m, *m0, *md;
+	in_addr_t		 naddr;
+	int			 m0srcs, nbytes, npbytes, off, rsrcs, schanged;
+	int			 nallow, nblock;
+	uint8_t			 mode, now, then;
+	rectype_t		 crt, drt, nrt;
+
+	IN_MULTI_LOCK_ASSERT();
+
+	if (inm->inm_nsrc == 0 ||
+	    (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0))
+		return (0);
+
+	ifp = inm->inm_ifp;			/* interface */
+	mode = inm->inm_st[1].iss_fmode;	/* filter mode at t1 */
+	crt = REC_NONE;	/* current group record type */
+	drt = REC_NONE;	/* mask of completed group record types */
+	nrt = REC_NONE;	/* record type for current node */
+	m0srcs = 0;	/* # source which will fit in current mbuf chain */
+	nbytes = 0;	/* # of bytes appended to group's state-change queue */
+	npbytes = 0;	/* # of bytes appended this packet */
+	rsrcs = 0;	/* # sources encoded in current record */
+	schanged = 0;	/* # nodes encoded in overall filter change */
+	nallow = 0;	/* # of source entries in ALLOW_NEW */
+	nblock = 0;	/* # of source entries in BLOCK_OLD */
+	nims = NULL;	/* next tree node pointer */
+
+	/*
+	 * For each possible filter record mode.
+	 * The first kind of source we encounter tells us which
+	 * is the first kind of record we start appending.
+	 * If a node transitioned to UNDEFINED at t1, its mode is treated
+	 * as the inverse of the group's filter mode.
+	 */
+	while (drt != REC_FULL) {
+		do {
+			m0 = ifq->ifq_tail;
+			if (m0 != NULL &&
+			    (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <=
+			     IGMP_V3_REPORT_MAXRECS) &&
+			    (m0->m_pkthdr.len + MINRECLEN) <
+			     (ifp->if_mtu - IGMP_LEADINGSPACE)) {
+				m = m0;
+				m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
+					    sizeof(struct igmp_grouprec)) /
+				    sizeof(in_addr_t);
+				CTR1(KTR_IGMPV3,
+				    "%s: use previous packet", __func__);
+			} else {
+				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+				if (m)
+					m->m_data += IGMP_LEADINGSPACE;
+				if (m == NULL) {
+					m = m_gethdr(M_DONTWAIT, MT_DATA);
+					if (m)
+						MH_ALIGN(m, IGMP_LEADINGSPACE);
+				}
+				if (m == NULL) {
+					CTR1(KTR_IGMPV3,
+					    "%s: m_get*() failed", __func__);
+					return (-ENOMEM);
+				}
+				m->m_pkthdr.PH_vt.vt_nrecs = 0;
+				igmp_save_context(m, ifp);
+				m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
+				    sizeof(struct igmp_grouprec)) /
+				    sizeof(in_addr_t);
+				npbytes = 0;
+				CTR1(KTR_IGMPV3,
+				    "%s: allocated new packet", __func__);
+			}
+			/*
+			 * Append the IGMP group record header to the
+			 * current packet's data area.
+			 * Recalculate pointer to free space for next
+			 * group record, in case m_append() allocated
+			 * a new mbuf or cluster.
+			 */
+			memset(&ig, 0, sizeof(ig));
+			ig.ig_group = inm->inm_addr;
+			if (!m_append(m, sizeof(ig), (void *)&ig)) {
+				if (m != m0)
+					m_freem(m);
+				CTR1(KTR_IGMPV3,
+				    "%s: m_append() failed", __func__);
+				return (-ENOMEM);
+			}
+			npbytes += sizeof(struct igmp_grouprec);
+			if (m != m0) {
+				/* new packet; offset in c hain */
+				md = m_getptr(m, npbytes -
+				    sizeof(struct igmp_grouprec), &off);
+				pig = (struct igmp_grouprec *)(mtod(md,
+				    uint8_t *) + off);
+			} else {
+				/* current packet; offset from last append */
+				md = m_last(m);
+				pig = (struct igmp_grouprec *)(mtod(md,
+				    uint8_t *) + md->m_len -
+				    sizeof(struct igmp_grouprec));
+			}
+			/*
+			 * Begin walking the tree for this record type
+			 * pass, or continue from where we left off
+			 * previously if we had to allocate a new packet.
+			 * Only report deltas in-mode at t1.
+			 * We need not report included sources as allowed
+			 * if we are in inclusive mode on the group,
+			 * however the converse is not true.
+			 */
+			rsrcs = 0;
+			if (nims == NULL)
+				nims = RB_MIN(ip_msource_tree, &inm->inm_srcs);
+			RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
+				CTR2(KTR_IGMPV3, "%s: visit node %s",
+				    __func__, inet_ntoa_haddr(ims->ims_haddr));
+				now = ims_get_mode(inm, ims, 1);
+				then = ims_get_mode(inm, ims, 0);
+				CTR3(KTR_IGMPV3, "%s: mode: t0 %d, t1 %d",
+				    __func__, then, now);
+				if (now == then) {
+					CTR1(KTR_IGMPV3,
+					    "%s: skip unchanged", __func__);
+					continue;
+				}
+				if (mode == MCAST_EXCLUDE &&
+				    now == MCAST_INCLUDE) {
+					CTR1(KTR_IGMPV3,
+					    "%s: skip IN src on EX group",
+					    __func__);
+					continue;
+				}
+				nrt = (rectype_t)now;
+				if (nrt == REC_NONE)
+					nrt = (rectype_t)(~mode & REC_FULL);
+				if (schanged++ == 0) {
+					crt = nrt;
+				} else if (crt != nrt)
+					continue;
+				naddr = htonl(ims->ims_haddr);
+				if (!m_append(m, sizeof(in_addr_t),
+				    (void *)&naddr)) {
+					if (m != m0)
+						m_freem(m);
+					CTR1(KTR_IGMPV3,
+					    "%s: m_append() failed", __func__);
+					return (-ENOMEM);
+				}
+				nallow += !!(crt == REC_ALLOW);
+				nblock += !!(crt == REC_BLOCK);
+				if (++rsrcs == m0srcs)
+					break;
+			}
+			/*
+			 * If we did not append any tree nodes on this
+			 * pass, back out of allocations.
+			 */
+			if (rsrcs == 0) {
+				npbytes -= sizeof(struct igmp_grouprec);
+				if (m != m0) {
+					CTR1(KTR_IGMPV3,
+					    "%s: m_free(m)", __func__);
+					m_freem(m);
+				} else {
+					CTR1(KTR_IGMPV3,
+					    "%s: m_adj(m, -ig)", __func__);
+					m_adj(m, -((int)sizeof(
+					    struct igmp_grouprec)));
+				}
+				continue;
+			}
+			npbytes += (rsrcs * sizeof(in_addr_t));
+			if (crt == REC_ALLOW)
+				pig->ig_type = IGMP_ALLOW_NEW_SOURCES;
+			else if (crt == REC_BLOCK)
+				pig->ig_type = IGMP_BLOCK_OLD_SOURCES;
+			pig->ig_numsrc = htons(rsrcs);
+			/*
+			 * Count the new group record, and enqueue this
+			 * packet if it wasn't already queued.
+			 */
+			m->m_pkthdr.PH_vt.vt_nrecs++;
+			if (m != m0)
+				_IF_ENQUEUE(ifq, m);
+			nbytes += npbytes;
+		} while (nims != NULL);
+		drt |= crt;
+		crt = (~crt & REC_FULL);
+	}
+
+	CTR3(KTR_IGMPV3, "%s: queued %d ALLOW_NEW, %d BLOCK_OLD", __func__,
+	    nallow, nblock);
+
+	return (nbytes);
+}
+
+static int
+igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
+{
+	struct ifqueue	*gq;
+	struct mbuf	*m;		/* pending state-change */
+	struct mbuf	*m0;		/* copy of pending state-change */
+	struct mbuf	*mt;		/* last state-change in packet */
+	int		 docopy, domerge;
+	u_int		 recslen;
+
+	docopy = 0;
+	domerge = 0;
+	recslen = 0;
+
+	IN_MULTI_LOCK_ASSERT();
+	IGMP_LOCK_ASSERT();
+
+	/*
+	 * If there are further pending retransmissions, make a writable
+	 * copy of each queued state-change message before merging.
+	 */
+	if (inm->inm_scrv > 0)
+		docopy = 1;
+
+	gq = &inm->inm_scq;
+#ifdef KTR
+	if (gq->ifq_head == NULL) {
+		CTR2(KTR_IGMPV3, "%s: WARNING: queue for inm %p is empty",
+		    __func__, inm);
+	}
+#endif
+
+	m = gq->ifq_head;
+	while (m != NULL) {
+		/*
+		 * Only merge the report into the current packet if
+		 * there is sufficient space to do so; an IGMPv3 report
+		 * packet may only contain 65,535 group records.
+		 * Always use a simple mbuf chain concatentation to do this,
+		 * as large state changes for single groups may have
+		 * allocated clusters.
+		 */
+		domerge = 0;
+		mt = ifscq->ifq_tail;
+		if (mt != NULL) {
+			recslen = m_length(m, NULL);
+
+			if ((mt->m_pkthdr.PH_vt.vt_nrecs +
+			    m->m_pkthdr.PH_vt.vt_nrecs <=
+			    IGMP_V3_REPORT_MAXRECS) &&
+			    (mt->m_pkthdr.len + recslen <=
+			    (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE)))
+				domerge = 1;
+		}
+
+		if (!domerge && _IF_QFULL(gq)) {
+			CTR2(KTR_IGMPV3,
+			    "%s: outbound queue full, skipping whole packet %p",
+			    __func__, m);
+			mt = m->m_nextpkt;
+			if (!docopy)
+				m_freem(m);
+			m = mt;
+			continue;
+		}
+
+		if (!docopy) {
+			CTR2(KTR_IGMPV3, "%s: dequeueing %p", __func__, m);
+			_IF_DEQUEUE(gq, m0);
+			m = m0->m_nextpkt;
+		} else {
+			CTR2(KTR_IGMPV3, "%s: copying %p", __func__, m);
+			m0 = m_dup(m, M_NOWAIT);
+			if (m0 == NULL)
+				return (ENOMEM);
+			m0->m_nextpkt = NULL;
+			m = m->m_nextpkt;
+		}
+
+		if (!domerge) {
+			CTR3(KTR_IGMPV3, "%s: queueing %p to ifscq %p)",
+			    __func__, m0, ifscq);
+			_IF_ENQUEUE(ifscq, m0);
+		} else {
+			struct mbuf *mtl;	/* last mbuf of packet mt */
+
+			CTR3(KTR_IGMPV3, "%s: merging %p with ifscq tail %p)",
+			    __func__, m0, mt);
+
+			mtl = m_last(mt);
+			m0->m_flags &= ~M_PKTHDR;
+			mt->m_pkthdr.len += recslen;
+			mt->m_pkthdr.PH_vt.vt_nrecs +=
+			    m0->m_pkthdr.PH_vt.vt_nrecs;
+
+			mtl->m_next = m0;
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Respond to a pending IGMPv3 General Query.
+ */
+static void
+igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
+{
+	struct ifmultiaddr	*ifma, *tifma;
+	struct ifnet		*ifp;
+	struct in_multi		*inm;
+	int			 retval, loop;
+
+	IN_MULTI_LOCK_ASSERT();
+	IGMP_LOCK_ASSERT();
+
+	KASSERT(igi->igi_version == IGMP_VERSION_3,
+	    ("%s: called when version %d", __func__, igi->igi_version));
+
+	ifp = igi->igi_ifp;
+
+	IF_ADDR_LOCK(ifp);
+	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, tifma) {
+		if (ifma->ifma_addr->sa_family != AF_INET ||
+		    ifma->ifma_protospec == NULL)
+			continue;
+
+		inm = (struct in_multi *)ifma->ifma_protospec;
+		KASSERT(ifp == inm->inm_ifp,
+		    ("%s: inconsistent ifp", __func__));
+
+		switch (inm->inm_state) {
+		case IGMP_NOT_MEMBER:
+		case IGMP_SILENT_MEMBER:
+			break;
+		case IGMP_REPORTING_MEMBER:
+		case IGMP_IDLE_MEMBER:
+		case IGMP_LAZY_MEMBER:
+		case IGMP_SLEEPING_MEMBER:
+		case IGMP_AWAKENING_MEMBER:
+			inm->inm_state = IGMP_REPORTING_MEMBER;
+			retval = igmp_v3_enqueue_group_record(&igi->igi_gq,
+			    inm, 0, 0, 0);
+			CTR2(KTR_IGMPV3, "%s: enqueue record = %d",
+			    __func__, retval);
+			break;
+		case IGMP_G_QUERY_PENDING_MEMBER:
+		case IGMP_SG_QUERY_PENDING_MEMBER:
+		case IGMP_LEAVING_MEMBER:
+			break;
+		}
+	}
+	IF_ADDR_UNLOCK(ifp);
+
+	loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
+	igmp_dispatch_queue(&igi->igi_gq, IGMP_MAX_RESPONSE_BURST, loop);
+
+	/*
+	 * Slew transmission of bursts over 500ms intervals.
+	 */
+	if (igi->igi_gq.ifq_head != NULL) {
+		igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY(
+		    IGMP_RESPONSE_BURST_INTERVAL);
+		V_interface_timers_running = 1;
+	}
+}
+
+/*
+ * Transmit the next pending IGMP message in the output queue.
+ *
+ * We get called from netisr_processqueue(). A mutex private to igmpoq
+ * will be acquired and released around this routine.
+ *
+ * VIMAGE: Needs to store/restore vnet pointer on a per-mbuf-chain basis.
+ * MRT: Nothing needs to be done, as IGMP traffic is always local to
+ * a link and uses a link-scope multicast address.
+ */
+static void
+igmp_intr(struct mbuf *m)
+{
+	struct ip_moptions	 imo;
+	struct ifnet		*ifp;
+	struct mbuf		*ipopts, *m0;
+	int			 error;
+	uint32_t		 ifindex;
+
+	CTR2(KTR_IGMPV3, "%s: transmit %p", __func__, m);
+
+	/*
+	 * Set VNET image pointer from enqueued mbuf chain
+	 * before doing anything else. Whilst we use interface
+	 * indexes to guard against interface detach, they are
+	 * unique to each VIMAGE and must be retrieved.
+	 */
+	CURVNET_SET((struct vnet *)(m->m_pkthdr.header));
+	ifindex = igmp_restore_context(m);
+
+	/*
+	 * Check if the ifnet still exists. This limits the scope of
+	 * any race in the absence of a global ifp lock for low cost
+	 * (an array lookup).
+	 */
+	ifp = ifnet_byindex(ifindex);
+	if (ifp == NULL) {
+		CTR3(KTR_IGMPV3, "%s: dropped %p as ifindex %u went away.",
+		    __func__, m, ifindex);
+		m_freem(m);
+		IPSTAT_INC(ips_noroute);
+		goto out;
+	}
+
+	ipopts = V_igmp_sendra ? m_raopt : NULL;
+
+	imo.imo_multicast_ttl  = 1;
+	imo.imo_multicast_vif  = -1;
+	imo.imo_multicast_loop = (V_ip_mrouter != NULL);
+
+	/*
+	 * If the user requested that IGMP traffic be explicitly
+	 * redirected to the loopback interface (e.g. they are running a
+	 * MANET interface and the routing protocol needs to see the
+	 * updates), handle this now.
+	 */
+	if (m->m_flags & M_IGMP_LOOP)
+		imo.imo_multicast_ifp = V_loif;
+	else
+		imo.imo_multicast_ifp = ifp;
+
+	if (m->m_flags & M_IGMPV2) {
+		m0 = m;
+	} else {
+		m0 = igmp_v3_encap_report(ifp, m);
+		if (m0 == NULL) {
+			CTR2(KTR_IGMPV3, "%s: dropped %p", __func__, m);
+			m_freem(m);
+			IPSTAT_INC(ips_odropped);
+			goto out;
+		}
+	}
+
+	igmp_scrub_context(m0);
+	m->m_flags &= ~(M_PROTOFLAGS);
+	m0->m_pkthdr.rcvif = V_loif;
+#ifdef MAC
+	mac_netinet_igmp_send(ifp, m0);
+#endif
+	error = ip_output(m0, ipopts, NULL, 0, &imo, NULL);
+	if (error) {
+		CTR3(KTR_IGMPV3, "%s: ip_output(%p) = %d", __func__, m0, error);
+		goto out;
+	}
+
+	IGMPSTAT_INC(igps_snd_reports);
+
+out:
+	/*
+	 * We must restore the existing vnet pointer before
+	 * continuing as we are run from netisr context.
+	 */
+	CURVNET_RESTORE();
+}
+
+/*
+ * Encapsulate an IGMPv3 report.
+ *
+ * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf
+ * chain has already had its IP/IGMPv3 header prepended. In this case
+ * the function will not attempt to prepend; the lengths and checksums
+ * will however be re-computed.
+ *
+ * Returns a pointer to the new mbuf chain head, or NULL if the
+ * allocation failed.
+ */
+static struct mbuf *
+igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
+{
+	struct igmp_report	*igmp;
+	struct ip		*ip;
+	int			 hdrlen, igmpreclen;
+
+	KASSERT((m->m_flags & M_PKTHDR),
+	    ("%s: mbuf chain %p is !M_PKTHDR", __func__, m));
+
+	igmpreclen = m_length(m, NULL);
+	hdrlen = sizeof(struct ip) + sizeof(struct igmp_report);
+
+	if (m->m_flags & M_IGMPV3_HDR) {
+		igmpreclen -= hdrlen;
+	} else {
+		M_PREPEND(m, hdrlen, M_DONTWAIT);
+		if (m == NULL)
+			return (NULL);
+		m->m_flags |= M_IGMPV3_HDR;
+	}
+
+	CTR2(KTR_IGMPV3, "%s: igmpreclen is %d", __func__, igmpreclen);
+
+	m->m_data += sizeof(struct ip);
+	m->m_len -= sizeof(struct ip);
+
+	igmp = mtod(m, struct igmp_report *);
+	igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT;
+	igmp->ir_rsv1 = 0;
+	igmp->ir_rsv2 = 0;
+	igmp->ir_numgrps = htons(m->m_pkthdr.PH_vt.vt_nrecs);
+	igmp->ir_cksum = 0;
+	igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen);
+	m->m_pkthdr.PH_vt.vt_nrecs = 0;
+
+	m->m_data -= sizeof(struct ip);
+	m->m_len += sizeof(struct ip);
+
+	ip = mtod(m, struct ip *);
+	ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
+	ip->ip_len = hdrlen + igmpreclen;
+	ip->ip_off = IP_DF;
+	ip->ip_p = IPPROTO_IGMP;
+	ip->ip_sum = 0;
+
+	ip->ip_src.s_addr = INADDR_ANY;
+
+	if (m->m_flags & M_IGMP_LOOP) {
+		struct in_ifaddr *ia;
+
+		IFP_TO_IA(ifp, ia);
+		if (ia != NULL) {
+			ip->ip_src = ia->ia_addr.sin_addr;
+			ifa_free(&ia->ia_ifa);
+		}
+	}
+
+	ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP);
+
+	return (m);
+}
+
+#ifdef KTR
+static char *
+igmp_rec_type_to_str(const int type)
+{
+
+	switch (type) {
+		case IGMP_CHANGE_TO_EXCLUDE_MODE:
+			return "TO_EX";
+			break;
+		case IGMP_CHANGE_TO_INCLUDE_MODE:
+			return "TO_IN";
+			break;
+		case IGMP_MODE_IS_EXCLUDE:
+			return "MODE_EX";
+			break;
+		case IGMP_MODE_IS_INCLUDE:
+			return "MODE_IN";
+			break;
+		case IGMP_ALLOW_NEW_SOURCES:
+			return "ALLOW_NEW";
+			break;
+		case IGMP_BLOCK_OLD_SOURCES:
+			return "BLOCK_OLD";
+			break;
+		default:
+			break;
+	}
+	return "unknown";
+}
+#endif
+
+static void
+igmp_init(void *unused __unused)
+{
+
+	CTR1(KTR_IGMPV3, "%s: initializing", __func__);
+
+	IGMP_LOCK_INIT();
+
+	m_raopt = igmp_ra_alloc();
+
+	netisr_register(&igmp_nh);
+}
+SYSINIT(igmp_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, igmp_init, NULL);
+
+static void
+igmp_uninit(void *unused __unused)
+{
+
+	CTR1(KTR_IGMPV3, "%s: tearing down", __func__);
+
+	netisr_unregister(&igmp_nh);
+
+	m_free(m_raopt);
+	m_raopt = NULL;
+
+	IGMP_LOCK_DESTROY();
+}
+SYSUNINIT(igmp_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, igmp_uninit, NULL);
+
+static void
+vnet_igmp_init(const void *unused __unused)
+{
+
+	CTR1(KTR_IGMPV3, "%s: initializing", __func__);
+
+	LIST_INIT(&V_igi_head);
+}
+VNET_SYSINIT(vnet_igmp_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_igmp_init,
+    NULL);
+
+static void
+vnet_igmp_uninit(const void *unused __unused)
+{
+
+	CTR1(KTR_IGMPV3, "%s: tearing down", __func__);
+
+	KASSERT(LIST_EMPTY(&V_igi_head),
+	    ("%s: igi list not empty; ifnets not detached?", __func__));
+}
+VNET_SYSUNINIT(vnet_igmp_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY,
+    vnet_igmp_uninit, NULL);
+
+static int
+igmp_modevent(module_t mod, int type, void *unused __unused)
+{
+
+    switch (type) {
+    case MOD_LOAD:
+    case MOD_UNLOAD:
+	break;
+    default:
+	return (EOPNOTSUPP);
+    }
+    return (0);
+}
+
+static moduledata_t igmp_mod = {
+    "igmp",
+    igmp_modevent,
+    0
+};
+DECLARE_MODULE(igmp, igmp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
diff --git a/freebsd/sys/netinet/igmp.h b/freebsd/sys/netinet/igmp.h
new file mode 100644
index 00000000..f328d21f
--- /dev/null
+++ b/freebsd/sys/netinet/igmp.h
@@ -0,0 +1,2 @@
+#include <freebsd/bsd.h>
+#include <freebsd/netinet/igmp.h>
diff --git a/freebsd/sys/netinet/igmp_var.h b/freebsd/sys/netinet/igmp_var.h
new file mode 100644
index 00000000..e1abe6ab
--- /dev/null
+++ b/freebsd/sys/netinet/igmp_var.h
@@ -0,0 +1,225 @@
+/*-a
+ * Copyright (c) 1988 Stephen Deering.
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)igmp_var.h	8.1 (Berkeley) 7/19/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IGMP_VAR_HH_
+#define _NETINET_IGMP_VAR_HH_
+
+/*
+ * Internet Group Management Protocol (IGMP),
+ * implementation-specific definitions.
+ *
+ * Written by Steve Deering, Stanford, May 1988.
+ *
+ * MULTICAST Revision: 3.5.1.3
+ */
+
+#ifndef BURN_BRIDGES
+/*
+ * Pre-IGMPV3 igmpstat structure.
+ */
+struct oigmpstat {
+	u_int igps_rcv_total;		/* total IGMP messages received */
+	u_int igps_rcv_tooshort;	/* received with too few bytes */
+	u_int igps_rcv_badsum;		/* received with bad checksum */
+	u_int igps_rcv_queries;		/* received membership queries */
+	u_int igps_rcv_badqueries;	/* received invalid queries */
+	u_int igps_rcv_reports;		/* received membership reports */
+	u_int igps_rcv_badreports;	/* received invalid reports */
+	u_int igps_rcv_ourreports;	/* received reports for our groups */
+	u_int igps_snd_reports;		/* sent membership reports */
+	u_int igps_rcv_toolong;		/* received with too many bytes */
+};
+#endif
+
+/*
+ * IGMPv3 protocol statistics.
+ */
+struct igmpstat {
+	/*
+	 * Structure header (to insulate ABI changes).
+	 */
+	uint32_t igps_version;		/* version of this structure */
+	uint32_t igps_len;		/* length of this structure */
+	/*
+	 * Message statistics.
+	 */
+	uint64_t igps_rcv_total;	/* total IGMP messages received */
+	uint64_t igps_rcv_tooshort;	/* received with too few bytes */
+	uint64_t igps_rcv_badttl;	/* received with ttl other than 1 */
+	uint64_t igps_rcv_badsum;	/* received with bad checksum */
+	/*
+	 * Query statistics.
+	 */
+	uint64_t igps_rcv_v1v2_queries;	/* received IGMPv1/IGMPv2 queries */
+	uint64_t igps_rcv_v3_queries;	/* received IGMPv3 queries */
+	uint64_t igps_rcv_badqueries;	/* received invalid queries */
+	uint64_t igps_rcv_gen_queries;	/* received general queries */
+	uint64_t igps_rcv_group_queries;/* received group queries */
+	uint64_t igps_rcv_gsr_queries;	/* received group-source queries */
+	uint64_t igps_drop_gsr_queries;	/* dropped group-source queries */
+	/*
+	 * Report statistics.
+	 */
+	uint64_t igps_rcv_reports;	/* received membership reports */
+	uint64_t igps_rcv_badreports;	/* received invalid reports */
+	uint64_t igps_rcv_ourreports;	/* received reports for our groups */
+	uint64_t igps_rcv_nora;		/* received w/o Router Alert option */
+	uint64_t igps_snd_reports;	/* sent membership reports */
+	/*
+	 * Padding for future additions.
+	 */
+	uint64_t __igps_pad[4];
+};
+#define IGPS_VERSION_3	3		/* as of FreeBSD 8.x */
+#define IGPS_VERSION3_LEN		168
+
+#ifdef _KERNEL
+#define	IGMPSTAT_ADD(name, val)		V_igmpstat.name += (val)
+#define	IGMPSTAT_INC(name)		IGMPSTAT_ADD(name, 1)
+#endif
+
+#ifdef CTASSERT
+CTASSERT(sizeof(struct igmpstat) == 168);
+#endif
+
+#ifdef _KERNEL
+#define IGMP_RANDOM_DELAY(X) (random() % (X) + 1)
+
+#define IGMP_MAX_STATE_CHANGES		24 /* Max pending changes per group */
+
+/*
+ * IGMP per-group states.
+ */
+#define IGMP_NOT_MEMBER			0 /* Can garbage collect in_multi */
+#define IGMP_SILENT_MEMBER		1 /* Do not perform IGMP for group */
+#define IGMP_REPORTING_MEMBER		2 /* IGMPv1/2/3 we are reporter */
+#define IGMP_IDLE_MEMBER		3 /* IGMPv1/2 we reported last */
+#define IGMP_LAZY_MEMBER		4 /* IGMPv1/2 other member reporting */
+#define IGMP_SLEEPING_MEMBER		5 /* IGMPv1/2 start query response */
+#define IGMP_AWAKENING_MEMBER		6 /* IGMPv1/2 group timer will start */
+#define IGMP_G_QUERY_PENDING_MEMBER	7 /* IGMPv3 group query pending */
+#define IGMP_SG_QUERY_PENDING_MEMBER	8 /* IGMPv3 source query pending */
+#define IGMP_LEAVING_MEMBER		9 /* IGMPv3 dying gasp (pending last */
+					  /* retransmission of INCLUDE {}) */
+
+/*
+ * IGMP version tag.
+ */
+#define IGMP_VERSION_NONE		0 /* Invalid */
+#define IGMP_VERSION_1			1
+#define IGMP_VERSION_2			2
+#define IGMP_VERSION_3			3 /* Default */
+
+/*
+ * IGMPv3 protocol control variables.
+ */
+#define IGMP_RV_INIT		2	/* Robustness Variable */
+#define IGMP_RV_MIN		1
+#define IGMP_RV_MAX		7
+
+#define IGMP_QI_INIT		125	/* Query Interval (s) */
+#define IGMP_QI_MIN		1
+#define IGMP_QI_MAX		255
+
+#define IGMP_QRI_INIT		10	/* Query Response Interval (s) */
+#define IGMP_QRI_MIN		1
+#define IGMP_QRI_MAX		255
+
+#define IGMP_URI_INIT		3	/* Unsolicited Report Interval (s) */
+#define IGMP_URI_MIN		0
+#define IGMP_URI_MAX		10
+
+#define IGMP_MAX_G_GS_PACKETS		8 /* # of packets to answer G/GS */
+#define IGMP_MAX_STATE_CHANGE_PACKETS	8 /* # of packets per state change */
+#define IGMP_MAX_RESPONSE_PACKETS	16 /* # of packets for general query */
+#define IGMP_MAX_RESPONSE_BURST		4 /* # of responses to send at once */
+#define IGMP_RESPONSE_BURST_INTERVAL	(PR_FASTHZ / 2)	/* 500ms */
+
+/*
+ * IGMP-specific mbuf flags.
+ */
+#define M_IGMPV2	M_PROTO1	/* Packet is IGMPv2 */
+#define M_IGMPV3_HDR	M_PROTO2	/* Packet has IGMPv3 headers */
+#define M_GROUPREC	M_PROTO3	/* mbuf chain is a group record */
+#define M_IGMP_LOOP	M_PROTO4	/* transmit on loif, not real ifp */
+
+/*
+ * Default amount of leading space for IGMPv3 to allocate at the
+ * beginning of its mbuf packet chains, to avoid fragmentation and
+ * unnecessary allocation of leading mbufs.
+ */
+#define RAOPT_LEN	4		/* Length of IP Router Alert option */
+#define	IGMP_LEADINGSPACE		\
+	(sizeof(struct ip) + RAOPT_LEN + sizeof(struct igmp_report))
+
+/*
+ * Subsystem lock macros.
+ * The IGMP lock is only taken with IGMP. Currently it is system-wide.
+ * VIMAGE: The lock could be pushed to per-VIMAGE granularity in future.
+ */
+#define	IGMP_LOCK_INIT()	mtx_init(&igmp_mtx, "igmp_mtx", NULL, MTX_DEF)
+#define	IGMP_LOCK_DESTROY()	mtx_destroy(&igmp_mtx)
+#define	IGMP_LOCK()		mtx_lock(&igmp_mtx)
+#define	IGMP_LOCK_ASSERT()	mtx_assert(&igmp_mtx, MA_OWNED)
+#define	IGMP_UNLOCK()		mtx_unlock(&igmp_mtx)
+#define	IGMP_UNLOCK_ASSERT()	mtx_assert(&igmp_mtx, MA_NOTOWNED)
+
+struct igmp_ifinfo;
+
+int	igmp_change_state(struct in_multi *);
+void	igmp_fasttimo(void);
+struct igmp_ifinfo *
+	igmp_domifattach(struct ifnet *);
+void	igmp_domifdetach(struct ifnet *);
+void	igmp_ifdetach(struct ifnet *);
+void	igmp_input(struct mbuf *, int);
+void	igmp_slowtimo(void);
+
+SYSCTL_DECL(_net_inet_igmp);
+
+#endif /* _KERNEL */
+
+/*
+ * Names for IGMP sysctl objects
+ */
+#define IGMPCTL_STATS		1	/* statistics (read-only) */
+#define IGMPCTL_MAXID		2
+
+#define IGMPCTL_NAMES { \
+	{ 0, 0 }, \
+	{ "stats", CTLTYPE_STRUCT } \
+}
+#endif
diff --git a/freebsd/sys/netinet/in.c b/freebsd/sys/netinet/in.c
new file mode 100644
index 00000000..64e5d329
--- /dev/null
+++ b/freebsd/sys/netinet/in.c
@@ -0,0 +1,1601 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * Copyright (C) 2001 WIDE Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in.c	8.4 (Berkeley) 1/9/95
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_mpath.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/jail.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/syslog.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_var.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/if_llatbl.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/igmp_var.h>
+#include <freebsd/netinet/udp.h>
+#include <freebsd/netinet/udp_var.h>
+
+static int in_mask2len(struct in_addr *);
+static void in_len2mask(struct in_addr *, int);
+static int in_lifaddr_ioctl(struct socket *, u_long, caddr_t,
+	struct ifnet *, struct thread *);
+
+static int	in_addprefix(struct in_ifaddr *, int);
+static int	in_scrubprefix(struct in_ifaddr *);
+static void	in_socktrim(struct sockaddr_in *);
+static int	in_ifinit(struct ifnet *,
+	    struct in_ifaddr *, struct sockaddr_in *, int);
+static void	in_purgemaddrs(struct ifnet *);
+
+static VNET_DEFINE(int, subnetsarelocal);
+#define	V_subnetsarelocal		VNET(subnetsarelocal)
+SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, subnets_are_local, CTLFLAG_RW,
+	&VNET_NAME(subnetsarelocal), 0,
+	"Treat all subnets as directly connected");
+static VNET_DEFINE(int, sameprefixcarponly);
+#define	V_sameprefixcarponly		VNET(sameprefixcarponly)
+SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, same_prefix_carp_only, CTLFLAG_RW,
+	&VNET_NAME(sameprefixcarponly), 0,
+	"Refuse to create same prefixes on different interfaces");
+
+VNET_DECLARE(struct inpcbinfo, ripcbinfo);
+#define	V_ripcbinfo			VNET(ripcbinfo)
+
+/*
+ * Return 1 if an internet address is for a ``local'' host
+ * (one to which we have a connection).  If subnetsarelocal
+ * is true, this includes other subnets of the local net.
+ * Otherwise, it includes only the directly-connected (sub)nets.
+ */
+int
+in_localaddr(struct in_addr in)
+{
+	register u_long i = ntohl(in.s_addr);
+	register struct in_ifaddr *ia;
+
+	IN_IFADDR_RLOCK();
+	if (V_subnetsarelocal) {
+		TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
+			if ((i & ia->ia_netmask) == ia->ia_net) {
+				IN_IFADDR_RUNLOCK();
+				return (1);
+			}
+		}
+	} else {
+		TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
+			if ((i & ia->ia_subnetmask) == ia->ia_subnet) {
+				IN_IFADDR_RUNLOCK();
+				return (1);
+			}
+		}
+	}
+	IN_IFADDR_RUNLOCK();
+	return (0);
+}
+
+/*
+ * Return 1 if an internet address is for the local host and configured
+ * on one of its interfaces.
+ */
+int
+in_localip(struct in_addr in)
+{
+	struct in_ifaddr *ia;
+
+	IN_IFADDR_RLOCK();
+	LIST_FOREACH(ia, INADDR_HASH(in.s_addr), ia_hash) {
+		if (IA_SIN(ia)->sin_addr.s_addr == in.s_addr) {
+			IN_IFADDR_RUNLOCK();
+			return (1);
+		}
+	}
+	IN_IFADDR_RUNLOCK();
+	return (0);
+}
+
+/*
+ * Determine whether an IP address is in a reserved set of addresses
+ * that may not be forwarded, or whether datagrams to that destination
+ * may be forwarded.
+ */
+int
+in_canforward(struct in_addr in)
+{
+	register u_long i = ntohl(in.s_addr);
+	register u_long net;
+
+	if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i))
+		return (0);
+	if (IN_CLASSA(i)) {
+		net = i & IN_CLASSA_NET;
+		if (net == 0 || net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))
+			return (0);
+	}
+	return (1);
+}
+
+/*
+ * Trim a mask in a sockaddr
+ */
+static void
+in_socktrim(struct sockaddr_in *ap)
+{
+    register char *cplim = (char *) &ap->sin_addr;
+    register char *cp = (char *) (&ap->sin_addr + 1);
+
+    ap->sin_len = 0;
+    while (--cp >= cplim)
+	if (*cp) {
+	    (ap)->sin_len = cp - (char *) (ap) + 1;
+	    break;
+	}
+}
+
+static int
+in_mask2len(mask)
+	struct in_addr *mask;
+{
+	int x, y;
+	u_char *p;
+
+	p = (u_char *)mask;
+	for (x = 0; x < sizeof(*mask); x++) {
+		if (p[x] != 0xff)
+			break;
+	}
+	y = 0;
+	if (x < sizeof(*mask)) {
+		for (y = 0; y < 8; y++) {
+			if ((p[x] & (0x80 >> y)) == 0)
+				break;
+		}
+	}
+	return (x * 8 + y);
+}
+
+static void
+in_len2mask(struct in_addr *mask, int len)
+{
+	int i;
+	u_char *p;
+
+	p = (u_char *)mask;
+	bzero(mask, sizeof(*mask));
+	for (i = 0; i < len / 8; i++)
+		p[i] = 0xff;
+	if (len % 8)
+		p[i] = (0xff00 >> (len % 8)) & 0xff;
+}
+
+/*
+ * Generic internet control operations (ioctl's).
+ *
+ * ifp is NULL if not an interface-specific ioctl.
+ */
+/* ARGSUSED */
+int
+in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
+    struct thread *td)
+{
+	register struct ifreq *ifr = (struct ifreq *)data;
+	register struct in_ifaddr *ia, *iap;
+	register struct ifaddr *ifa;
+	struct in_addr allhosts_addr;
+	struct in_addr dst;
+	struct in_ifinfo *ii;
+	struct in_aliasreq *ifra = (struct in_aliasreq *)data;
+	struct sockaddr_in oldaddr;
+	int error, hostIsNew, iaIsNew, maskIsNew;
+	int iaIsFirst;
+
+	ia = NULL;
+	iaIsFirst = 0;
+	iaIsNew = 0;
+	allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
+
+	/*
+	 * Filter out ioctls we implement directly; forward the rest on to
+	 * in_lifaddr_ioctl() and ifp->if_ioctl().
+	 */
+	switch (cmd) {
+	case SIOCAIFADDR:
+	case SIOCDIFADDR:
+	case SIOCGIFADDR:
+	case SIOCGIFBRDADDR:
+	case SIOCGIFDSTADDR:
+	case SIOCGIFNETMASK:
+	case SIOCSIFADDR:
+	case SIOCSIFBRDADDR:
+	case SIOCSIFDSTADDR:
+	case SIOCSIFNETMASK:
+		break;
+
+	case SIOCALIFADDR:
+		if (td != NULL) {
+			error = priv_check(td, PRIV_NET_ADDIFADDR);
+			if (error)
+				return (error);
+		}
+		if (ifp == NULL)
+			return (EINVAL);
+		return in_lifaddr_ioctl(so, cmd, data, ifp, td);
+
+	case SIOCDLIFADDR:
+		if (td != NULL) {
+			error = priv_check(td, PRIV_NET_DELIFADDR);
+			if (error)
+				return (error);
+		}
+		if (ifp == NULL)
+			return (EINVAL);
+		return in_lifaddr_ioctl(so, cmd, data, ifp, td);
+
+	case SIOCGLIFADDR:
+		if (ifp == NULL)
+			return (EINVAL);
+		return in_lifaddr_ioctl(so, cmd, data, ifp, td);
+
+	default:
+		if (ifp == NULL || ifp->if_ioctl == NULL)
+			return (EOPNOTSUPP);
+		return ((*ifp->if_ioctl)(ifp, cmd, data));
+	}
+
+	if (ifp == NULL)
+		return (EADDRNOTAVAIL);
+
+	/*
+	 * Security checks before we get involved in any work.
+	 */
+	switch (cmd) {
+	case SIOCAIFADDR:
+	case SIOCSIFADDR:
+	case SIOCSIFBRDADDR:
+	case SIOCSIFNETMASK:
+	case SIOCSIFDSTADDR:
+		if (td != NULL) {
+			error = priv_check(td, PRIV_NET_ADDIFADDR);
+			if (error)
+				return (error);
+		}
+		break;
+
+	case SIOCDIFADDR:
+		if (td != NULL) {
+			error = priv_check(td, PRIV_NET_DELIFADDR);
+			if (error)
+				return (error);
+		}
+		break;
+	}
+
+	/*
+	 * Find address for this interface, if it exists.
+	 *
+	 * If an alias address was specified, find that one instead of the
+	 * first one on the interface, if possible.
+	 */
+	dst = ((struct sockaddr_in *)&ifr->ifr_addr)->sin_addr;
+	IN_IFADDR_RLOCK();
+	LIST_FOREACH(iap, INADDR_HASH(dst.s_addr), ia_hash) {
+		if (iap->ia_ifp == ifp &&
+		    iap->ia_addr.sin_addr.s_addr == dst.s_addr) {
+			if (td == NULL || prison_check_ip4(td->td_ucred,
+			    &dst) == 0)
+				ia = iap;
+			break;
+		}
+	}
+	if (ia != NULL)
+		ifa_ref(&ia->ia_ifa);
+	IN_IFADDR_RUNLOCK();
+	if (ia == NULL) {
+		IF_ADDR_LOCK(ifp);
+		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+			iap = ifatoia(ifa);
+      if (iap->ia_addr.sin_family == AF_INET) {
+				if (td != NULL &&
+				    prison_check_ip4(td->td_ucred,
+				    &iap->ia_addr.sin_addr) != 0)
+					continue;
+				ia = iap;
+				break;
+			}
+		}
+		if (ia != NULL)
+			ifa_ref(&ia->ia_ifa);
+		IF_ADDR_UNLOCK(ifp);
+	}
+	if (ia == NULL)
+		iaIsFirst = 1;
+
+	error = 0;
+	switch (cmd) {
+	case SIOCAIFADDR:
+	case SIOCDIFADDR:
+		if (ifra->ifra_addr.sin_family == AF_INET) {
+			struct in_ifaddr *oia;
+
+			IN_IFADDR_RLOCK();
+			for (oia = ia; ia; ia = TAILQ_NEXT(ia, ia_link)) {
+				if (ia->ia_ifp == ifp  &&
+				    ia->ia_addr.sin_addr.s_addr ==
+				    ifra->ifra_addr.sin_addr.s_addr)
+					break;
+			}
+			if (ia != NULL && ia != oia)
+				ifa_ref(&ia->ia_ifa);
+			if (oia != NULL && ia != oia)
+				ifa_free(&oia->ia_ifa);
+			IN_IFADDR_RUNLOCK();
+			if ((ifp->if_flags & IFF_POINTOPOINT)
+			    && (cmd == SIOCAIFADDR)
+			    && (ifra->ifra_dstaddr.sin_addr.s_addr
+				== INADDR_ANY)) {
+				error = EDESTADDRREQ;
+				goto out;
+			}
+		}
+		if (cmd == SIOCDIFADDR && ia == NULL) {
+			error = EADDRNOTAVAIL;
+			goto out;
+		}
+		/* FALLTHROUGH */
+	case SIOCSIFADDR:
+	case SIOCSIFNETMASK:
+	case SIOCSIFDSTADDR:
+		if (ia == NULL) {
+			ia = (struct in_ifaddr *)
+				malloc(sizeof *ia, M_IFADDR, M_NOWAIT |
+				    M_ZERO);
+			if (ia == NULL) {
+				error = ENOBUFS;
+				goto out;
+			}
+
+			ifa = &ia->ia_ifa;
+			ifa_init(ifa);
+			ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr;
+			ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr;
+			ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask;
+
+			ia->ia_sockmask.sin_len = 8;
+			ia->ia_sockmask.sin_family = AF_INET;
+			if (ifp->if_flags & IFF_BROADCAST) {
+				ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr);
+				ia->ia_broadaddr.sin_family = AF_INET;
+			}
+			ia->ia_ifp = ifp;
+
+			ifa_ref(ifa);			/* if_addrhead */
+			IF_ADDR_LOCK(ifp);
+			TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link);
+			IF_ADDR_UNLOCK(ifp);
+			ifa_ref(ifa);			/* in_ifaddrhead */
+			IN_IFADDR_WLOCK();
+			TAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link);
+			IN_IFADDR_WUNLOCK();
+			iaIsNew = 1;
+		}
+		break;
+
+	case SIOCSIFBRDADDR:
+	case SIOCGIFADDR:
+	case SIOCGIFNETMASK:
+	case SIOCGIFDSTADDR:
+	case SIOCGIFBRDADDR:
+		if (ia == NULL) {
+			error = EADDRNOTAVAIL;
+			goto out;
+		}
+		break;
+	}
+
+	/*
+	 * Most paths in this switch return directly or via out.  Only paths
+	 * that remove the address break in order to hit common removal code.
+	 */
+	switch (cmd) {
+	case SIOCGIFADDR:
+		*((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_addr;
+		goto out;
+
+	case SIOCGIFBRDADDR:
+		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
+			error = EINVAL;
+			goto out;
+		}
+		*((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_broadaddr;
+		goto out;
+
+	case SIOCGIFDSTADDR:
+		if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
+			error = EINVAL;
+			goto out;
+		}
+		*((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_dstaddr;
+		goto out;
+
+	case SIOCGIFNETMASK:
+		*((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_sockmask;
+		goto out;
+
+	case SIOCSIFDSTADDR:
+		if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
+			error = EINVAL;
+			goto out;
+		}
+		oldaddr = ia->ia_dstaddr;
+		ia->ia_dstaddr = *(struct sockaddr_in *)&ifr->ifr_dstaddr;
+		if (ifp->if_ioctl != NULL) {
+			error = (*ifp->if_ioctl)(ifp, SIOCSIFDSTADDR,
+			    (caddr_t)ia);
+			if (error) {
+				ia->ia_dstaddr = oldaddr;
+				goto out;
+			}
+		}
+		if (ia->ia_flags & IFA_ROUTE) {
+			ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&oldaddr;
+			rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
+			ia->ia_ifa.ifa_dstaddr =
+					(struct sockaddr *)&ia->ia_dstaddr;
+			rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP);
+		}
+		goto out;
+
+	case SIOCSIFBRDADDR:
+		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
+			error = EINVAL;
+			goto out;
+		}
+		ia->ia_broadaddr = *(struct sockaddr_in *)&ifr->ifr_broadaddr;
+		goto out;
+
+	case SIOCSIFADDR:
+		error = in_ifinit(ifp, ia,
+		    (struct sockaddr_in *) &ifr->ifr_addr, 1);
+		if (error != 0 && iaIsNew)
+			break;
+		if (error == 0) {
+			ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
+			if (iaIsFirst &&
+			    (ifp->if_flags & IFF_MULTICAST) != 0) {
+				error = in_joingroup(ifp, &allhosts_addr,
+				    NULL, &ii->ii_allhosts);
+			}
+			EVENTHANDLER_INVOKE(ifaddr_event, ifp);
+		}
+		error = 0;
+		goto out;
+
+	case SIOCSIFNETMASK:
+		ia->ia_sockmask.sin_addr = ifra->ifra_addr.sin_addr;
+		ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr);
+		goto out;
+
+	case SIOCAIFADDR:
+		maskIsNew = 0;
+		hostIsNew = 1;
+		error = 0;
+		if (ia->ia_addr.sin_family == AF_INET) {
+			if (ifra->ifra_addr.sin_len == 0) {
+				ifra->ifra_addr = ia->ia_addr;
+				hostIsNew = 0;
+			} else if (ifra->ifra_addr.sin_addr.s_addr ==
+					       ia->ia_addr.sin_addr.s_addr)
+				hostIsNew = 0;
+		}
+		if (ifra->ifra_mask.sin_len) {
+			/*
+			 * QL: XXX
+			 * Need to scrub the prefix here in case
+			 * the issued command is SIOCAIFADDR with
+			 * the same address, but with a different
+			 * prefix length. And if the prefix length
+			 * is the same as before, then the call is
+			 * un-necessarily executed here.
+			 */
+			in_ifscrub(ifp, ia);
+			ia->ia_sockmask = ifra->ifra_mask;
+			ia->ia_sockmask.sin_family = AF_INET;
+			ia->ia_subnetmask =
+			     ntohl(ia->ia_sockmask.sin_addr.s_addr);
+			maskIsNew = 1;
+		}
+		if ((ifp->if_flags & IFF_POINTOPOINT) &&
+		    (ifra->ifra_dstaddr.sin_family == AF_INET)) {
+			in_ifscrub(ifp, ia);
+			ia->ia_dstaddr = ifra->ifra_dstaddr;
+			maskIsNew  = 1; /* We lie; but the effect's the same */
+		}
+		if (ifra->ifra_addr.sin_family == AF_INET &&
+		    (hostIsNew || maskIsNew))
+			error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0);
+		if (error != 0 && iaIsNew)
+			goto out;
+
+		if ((ifp->if_flags & IFF_BROADCAST) &&
+		    (ifra->ifra_broadaddr.sin_family == AF_INET))
+			ia->ia_broadaddr = ifra->ifra_broadaddr;
+		if (error == 0) {
+			ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
+			if (iaIsFirst &&
+			    (ifp->if_flags & IFF_MULTICAST) != 0) {
+				error = in_joingroup(ifp, &allhosts_addr,
+				    NULL, &ii->ii_allhosts);
+			}
+			EVENTHANDLER_INVOKE(ifaddr_event, ifp);
+		}
+		goto out;
+
+	case SIOCDIFADDR:
+		/*
+		 * in_ifscrub kills the interface route.
+		 */
+		in_ifscrub(ifp, ia);
+
+		/*
+		 * in_ifadown gets rid of all the rest of
+		 * the routes.  This is not quite the right
+		 * thing to do, but at least if we are running
+		 * a routing process they will come back.
+		 */
+		in_ifadown(&ia->ia_ifa, 1);
+		EVENTHANDLER_INVOKE(ifaddr_event, ifp);
+		error = 0;
+		break;
+
+	default:
+		panic("in_control: unsupported ioctl");
+	}
+
+	IF_ADDR_LOCK(ifp);
+	/* Re-check that ia is still part of the list. */
+	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+		if (ifa == &ia->ia_ifa)
+			break;
+	}
+	if (ifa == NULL) {
+		/*
+		 * If we lost the race with another thread, there is no need to
+		 * try it again for the next loop as there is no other exit
+		 * path between here and out.
+		 */
+		IF_ADDR_UNLOCK(ifp);
+		error = EADDRNOTAVAIL;
+		goto out;
+	}
+	TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
+	IF_ADDR_UNLOCK(ifp);
+	ifa_free(&ia->ia_ifa);				/* if_addrhead */
+
+	IN_IFADDR_WLOCK();
+	TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link);
+	if (ia->ia_addr.sin_family == AF_INET) {
+		struct in_ifaddr *if_ia;
+
+		LIST_REMOVE(ia, ia_hash);
+		IN_IFADDR_WUNLOCK();
+		/*
+		 * If this is the last IPv4 address configured on this
+		 * interface, leave the all-hosts group.
+		 * No state-change report need be transmitted.
+		 */
+		if_ia = NULL;
+		IFP_TO_IA(ifp, if_ia);
+		if (if_ia == NULL) {
+			ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
+			IN_MULTI_LOCK();
+			if (ii->ii_allhosts) {
+				(void)in_leavegroup_locked(ii->ii_allhosts,
+				    NULL);
+				ii->ii_allhosts = NULL;
+			}
+			IN_MULTI_UNLOCK();
+		} else
+			ifa_free(&if_ia->ia_ifa);
+	} else
+		IN_IFADDR_WUNLOCK();
+	ifa_free(&ia->ia_ifa);				/* in_ifaddrhead */
+out:
+	if (ia != NULL)
+		ifa_free(&ia->ia_ifa);
+	return (error);
+}
+
+/*
+ * SIOC[GAD]LIFADDR.
+ *	SIOCGLIFADDR: get first address. (?!?)
+ *	SIOCGLIFADDR with IFLR_PREFIX:
+ *		get first address that matches the specified prefix.
+ *	SIOCALIFADDR: add the specified address.
+ *	SIOCALIFADDR with IFLR_PREFIX:
+ *		EINVAL since we can't deduce hostid part of the address.
+ *	SIOCDLIFADDR: delete the specified address.
+ *	SIOCDLIFADDR with IFLR_PREFIX:
+ *		delete the first address that matches the specified prefix.
+ * return values:
+ *	EINVAL on invalid parameters
+ *	EADDRNOTAVAIL on prefix match failed/specified address not found
+ *	other values may be returned from in_ioctl()
+ */
+static int
+in_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
+    struct ifnet *ifp, struct thread *td)
+{
+	struct if_laddrreq *iflr = (struct if_laddrreq *)data;
+	struct ifaddr *ifa;
+
+	/* sanity checks */
+	if (data == NULL || ifp == NULL) {
+		panic("invalid argument to in_lifaddr_ioctl");
+		/*NOTRECHED*/
+	}
+
+	switch (cmd) {
+	case SIOCGLIFADDR:
+		/* address must be specified on GET with IFLR_PREFIX */
+		if ((iflr->flags & IFLR_PREFIX) == 0)
+			break;
+		/*FALLTHROUGH*/
+	case SIOCALIFADDR:
+	case SIOCDLIFADDR:
+		/* address must be specified on ADD and DELETE */
+		if (iflr->addr.ss_family != AF_INET)
+			return (EINVAL);
+		if (iflr->addr.ss_len != sizeof(struct sockaddr_in))
+			return (EINVAL);
+		/* XXX need improvement */
+		if (iflr->dstaddr.ss_family
+		 && iflr->dstaddr.ss_family != AF_INET)
+			return (EINVAL);
+		if (iflr->dstaddr.ss_family
+		 && iflr->dstaddr.ss_len != sizeof(struct sockaddr_in))
+			return (EINVAL);
+		break;
+	default: /*shouldn't happen*/
+		return (EOPNOTSUPP);
+	}
+	if (sizeof(struct in_addr) * 8 < iflr->prefixlen)
+		return (EINVAL);
+
+	switch (cmd) {
+	case SIOCALIFADDR:
+	    {
+		struct in_aliasreq ifra;
+
+		if (iflr->flags & IFLR_PREFIX)
+			return (EINVAL);
+
+		/* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */
+		bzero(&ifra, sizeof(ifra));
+		bcopy(iflr->iflr_name, ifra.ifra_name,
+			sizeof(ifra.ifra_name));
+
+		bcopy(&iflr->addr, &ifra.ifra_addr, iflr->addr.ss_len);
+
+		if (iflr->dstaddr.ss_family) {	/*XXX*/
+			bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr,
+				iflr->dstaddr.ss_len);
+		}
+
+		ifra.ifra_mask.sin_family = AF_INET;
+		ifra.ifra_mask.sin_len = sizeof(struct sockaddr_in);
+		in_len2mask(&ifra.ifra_mask.sin_addr, iflr->prefixlen);
+
+		return (in_control(so, SIOCAIFADDR, (caddr_t)&ifra, ifp, td));
+	    }
+	case SIOCGLIFADDR:
+	case SIOCDLIFADDR:
+	    {
+		struct in_ifaddr *ia;
+		struct in_addr mask, candidate, match;
+		struct sockaddr_in *sin;
+
+		bzero(&mask, sizeof(mask));
+		bzero(&match, sizeof(match));
+		if (iflr->flags & IFLR_PREFIX) {
+			/* lookup a prefix rather than address. */
+			in_len2mask(&mask, iflr->prefixlen);
+
+			sin = (struct sockaddr_in *)&iflr->addr;
+			match.s_addr = sin->sin_addr.s_addr;
+			match.s_addr &= mask.s_addr;
+
+			/* if you set extra bits, that's wrong */
+			if (match.s_addr != sin->sin_addr.s_addr)
+				return (EINVAL);
+
+		} else {
+			/* on getting an address, take the 1st match */
+			/* on deleting an address, do exact match */
+			if (cmd != SIOCGLIFADDR) {
+				in_len2mask(&mask, 32);
+				sin = (struct sockaddr_in *)&iflr->addr;
+				match.s_addr = sin->sin_addr.s_addr;
+			}
+		}
+
+		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)	{
+			if (ifa->ifa_addr->sa_family != AF_INET6)
+				continue;
+			if (match.s_addr == 0)
+				break;
+			candidate.s_addr = ((struct sockaddr_in *)&ifa->ifa_addr)->sin_addr.s_addr;
+			candidate.s_addr &= mask.s_addr;
+			if (candidate.s_addr == match.s_addr)
+				break;
+		}
+		if (ifa == NULL)
+			return (EADDRNOTAVAIL);
+		ia = (struct in_ifaddr *)ifa;
+
+		if (cmd == SIOCGLIFADDR) {
+			/* fill in the if_laddrreq structure */
+			bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin_len);
+
+			if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
+				bcopy(&ia->ia_dstaddr, &iflr->dstaddr,
+					ia->ia_dstaddr.sin_len);
+			} else
+				bzero(&iflr->dstaddr, sizeof(iflr->dstaddr));
+
+			iflr->prefixlen =
+				in_mask2len(&ia->ia_sockmask.sin_addr);
+
+			iflr->flags = 0;	/*XXX*/
+
+			return (0);
+		} else {
+			struct in_aliasreq ifra;
+
+			/* fill in_aliasreq and do ioctl(SIOCDIFADDR_IN6) */
+			bzero(&ifra, sizeof(ifra));
+			bcopy(iflr->iflr_name, ifra.ifra_name,
+				sizeof(ifra.ifra_name));
+
+			bcopy(&ia->ia_addr, &ifra.ifra_addr,
+				ia->ia_addr.sin_len);
+			if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
+				bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr,
+					ia->ia_dstaddr.sin_len);
+			}
+			bcopy(&ia->ia_sockmask, &ifra.ifra_dstaddr,
+				ia->ia_sockmask.sin_len);
+
+			return (in_control(so, SIOCDIFADDR, (caddr_t)&ifra,
+			    ifp, td));
+		}
+	    }
+	}
+
+	return (EOPNOTSUPP);	/*just for safety*/
+}
+
+/*
+ * Delete any existing route for an interface.
+ */
+void
+in_ifscrub(struct ifnet *ifp, struct in_ifaddr *ia)
+{
+
+	in_scrubprefix(ia);
+}
+
+/*
+ * Initialize an interface's internet address
+ * and routing table entry.
+ */
+static int
+in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
+    int scrub)
+{
+	register u_long i = ntohl(sin->sin_addr.s_addr);
+	struct sockaddr_in oldaddr;
+	int s = splimp(), flags = RTF_UP, error = 0;
+
+	oldaddr = ia->ia_addr;
+	if (oldaddr.sin_family == AF_INET)
+		LIST_REMOVE(ia, ia_hash);
+	ia->ia_addr = *sin;
+	if (ia->ia_addr.sin_family == AF_INET) {
+		IN_IFADDR_WLOCK();
+		LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
+		    ia, ia_hash);
+		IN_IFADDR_WUNLOCK();
+	}
+	/*
+	 * Give the interface a chance to initialize
+	 * if this is its first address,
+	 * and to validate the address if necessary.
+	 */
+	if (ifp->if_ioctl != NULL) {
+		error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
+		if (error) {
+			splx(s);
+			/* LIST_REMOVE(ia, ia_hash) is done in in_control */
+			ia->ia_addr = oldaddr;
+			IN_IFADDR_WLOCK();
+			if (ia->ia_addr.sin_family == AF_INET)
+				LIST_INSERT_HEAD(INADDR_HASH(
+				    ia->ia_addr.sin_addr.s_addr), ia, ia_hash);
+			else
+				/*
+				 * If oldaddr family is not AF_INET (e.g.
+				 * interface has been just created) in_control
+				 * does not call LIST_REMOVE, and we end up
+				 * with bogus ia entries in hash
+				 */
+				LIST_REMOVE(ia, ia_hash);
+			IN_IFADDR_WUNLOCK();
+			return (error);
+		}
+	}
+	splx(s);
+	if (scrub) {
+		ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr;
+		in_ifscrub(ifp, ia);
+		ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
+	}
+	if (IN_CLASSA(i))
+		ia->ia_netmask = IN_CLASSA_NET;
+	else if (IN_CLASSB(i))
+		ia->ia_netmask = IN_CLASSB_NET;
+	else
+		ia->ia_netmask = IN_CLASSC_NET;
+	/*
+	 * The subnet mask usually includes at least the standard network part,
+	 * but may may be smaller in the case of supernetting.
+	 * If it is set, we believe it.
+	 */
+	if (ia->ia_subnetmask == 0) {
+		ia->ia_subnetmask = ia->ia_netmask;
+		ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask);
+	} else
+		ia->ia_netmask &= ia->ia_subnetmask;
+	ia->ia_net = i & ia->ia_netmask;
+	ia->ia_subnet = i & ia->ia_subnetmask;
+	in_socktrim(&ia->ia_sockmask);
+	/*
+	 * XXX: carp(4) does not have interface route
+	 */
+	if (ifp->if_type == IFT_CARP)
+		return (0);
+	/*
+	 * Add route for the network.
+	 */
+	ia->ia_ifa.ifa_metric = ifp->if_metric;
+	if (ifp->if_flags & IFF_BROADCAST) {
+		ia->ia_broadaddr.sin_addr.s_addr =
+			htonl(ia->ia_subnet | ~ia->ia_subnetmask);
+		ia->ia_netbroadcast.s_addr =
+			htonl(ia->ia_net | ~ ia->ia_netmask);
+	} else if (ifp->if_flags & IFF_LOOPBACK) {
+		ia->ia_dstaddr = ia->ia_addr;
+		flags |= RTF_HOST;
+	} else if (ifp->if_flags & IFF_POINTOPOINT) {
+		if (ia->ia_dstaddr.sin_family != AF_INET)
+			return (0);
+		flags |= RTF_HOST;
+	}
+	if ((error = in_addprefix(ia, flags)) != 0)
+		return (error);
+
+	if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY)
+		return (0);
+
+	if (ifp->if_flags & IFF_POINTOPOINT) {
+		if (ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)
+			return (0);
+	}
+
+
+	/*
+	 * add a loopback route to self
+	 */
+	if (V_useloopback && !(ifp->if_flags & IFF_LOOPBACK)) {
+		struct route ia_ro;
+
+		bzero(&ia_ro, sizeof(ia_ro));
+		*((struct sockaddr_in *)(&ia_ro.ro_dst)) = ia->ia_addr;
+		rtalloc_ign_fib(&ia_ro, 0, 0);
+		if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) &&
+		    (ia_ro.ro_rt->rt_ifp == V_loif)) {
+			RT_LOCK(ia_ro.ro_rt);
+			RT_ADDREF(ia_ro.ro_rt);
+			RTFREE_LOCKED(ia_ro.ro_rt);
+		} else
+			error = ifa_add_loopback_route((struct ifaddr *)ia,
+				       (struct sockaddr *)&ia->ia_addr);
+		if (error == 0)
+			ia->ia_flags |= IFA_RTSELF;
+		if (ia_ro.ro_rt != NULL)
+			RTFREE(ia_ro.ro_rt);
+	}
+
+	return (error);
+}
+
+#define rtinitflags(x) \
+	((((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) != 0) \
+	    ? RTF_HOST : 0)
+
+/*
+ * Generate a routing message when inserting or deleting
+ * an interface address alias.
+ */
+static void in_addralias_rtmsg(int cmd, struct in_addr *prefix,
+    struct in_ifaddr *target)
+{
+	struct route pfx_ro;
+	struct sockaddr_in *pfx_addr;
+	struct rtentry msg_rt;
+
+	/* QL: XXX
+	 * This is a bit questionable because there is no
+	 * additional route entry added/deleted for an address
+	 * alias. Therefore this route report is inaccurate.
+	 */
+	bzero(&pfx_ro, sizeof(pfx_ro));
+	pfx_addr = (struct sockaddr_in *)(&pfx_ro.ro_dst);
+	pfx_addr->sin_len = sizeof(*pfx_addr);
+	pfx_addr->sin_family = AF_INET;
+	pfx_addr->sin_addr = *prefix;
+	rtalloc_ign_fib(&pfx_ro, 0, 0);
+	if (pfx_ro.ro_rt != NULL) {
+		msg_rt = *pfx_ro.ro_rt;
+
+		/* QL: XXX
+		 * Point the gateway to the new interface
+		 * address as if a new prefix route entry has
+		 * been added through the new address alias.
+		 * All other parts of the rtentry is accurate,
+		 * e.g., rt_key, rt_mask, rt_ifp etc.
+		 */
+		msg_rt.rt_gateway =
+			(struct sockaddr *)&target->ia_addr;
+		rt_newaddrmsg(cmd,
+			      (struct ifaddr *)target,
+			      0, &msg_rt);
+		RTFREE(pfx_ro.ro_rt);
+	}
+	return;
+}
+
+/*
+ * Check if we have a route for the given prefix already or add one accordingly.
+ */
+static int
+in_addprefix(struct in_ifaddr *target, int flags)
+{
+	struct in_ifaddr *ia;
+	struct in_addr prefix, mask, p, m;
+	int error;
+
+	if ((flags & RTF_HOST) != 0) {
+		prefix = target->ia_dstaddr.sin_addr;
+		mask.s_addr = 0;
+	} else {
+		prefix = target->ia_addr.sin_addr;
+		mask = target->ia_sockmask.sin_addr;
+		prefix.s_addr &= mask.s_addr;
+	}
+
+	IN_IFADDR_RLOCK();
+	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
+		if (rtinitflags(ia)) {
+			p = ia->ia_addr.sin_addr;
+
+			if (prefix.s_addr != p.s_addr)
+				continue;
+		} else {
+			p = ia->ia_addr.sin_addr;
+			m = ia->ia_sockmask.sin_addr;
+			p.s_addr &= m.s_addr;
+
+			if (prefix.s_addr != p.s_addr ||
+			    mask.s_addr != m.s_addr)
+				continue;
+		}
+
+		/*
+		 * If we got a matching prefix route inserted by other
+		 * interface address, we are done here.
+		 */
+		if (ia->ia_flags & IFA_ROUTE) {
+#ifdef RADIX_MPATH
+			if (ia->ia_addr.sin_addr.s_addr ==
+			    target->ia_addr.sin_addr.s_addr) {
+				IN_IFADDR_RUNLOCK();
+				return (EEXIST);
+			} else
+				break;
+#endif
+			if (V_sameprefixcarponly &&
+			    target->ia_ifp->if_type != IFT_CARP &&
+			    ia->ia_ifp->if_type != IFT_CARP) {
+				IN_IFADDR_RUNLOCK();
+				return (EEXIST);
+			} else {
+				in_addralias_rtmsg(RTM_ADD, &prefix, target);
+				IN_IFADDR_RUNLOCK();
+				return (0);
+			}
+		}
+	}
+	IN_IFADDR_RUNLOCK();
+
+	/*
+	 * No-one seem to have this prefix route, so we try to insert it.
+	 */
+	error = rtinit(&target->ia_ifa, (int)RTM_ADD, flags);
+	if (!error)
+		target->ia_flags |= IFA_ROUTE;
+	return (error);
+}
+
+extern void arp_ifscrub(struct ifnet *ifp, uint32_t addr);
+
+/*
+ * If there is no other address in the system that can serve a route to the
+ * same prefix, remove the route.  Hand over the route to the new address
+ * otherwise.
+ */
+static int
+in_scrubprefix(struct in_ifaddr *target)
+{
+	struct in_ifaddr *ia;
+	struct in_addr prefix, mask, p;
+	int error = 0;
+	struct sockaddr_in prefix0, mask0;
+
+	/*
+	 * Remove the loopback route to the interface address.
+	 * The "useloopback" setting is not consulted because if the
+	 * user configures an interface address, turns off this
+	 * setting, and then tries to delete that interface address,
+	 * checking the current setting of "useloopback" would leave
+	 * that interface address loopback route untouched, which
+	 * would be wrong. Therefore the interface address loopback route
+	 * deletion is unconditional.
+	 */
+	if ((target->ia_addr.sin_addr.s_addr != INADDR_ANY) &&
+	    !(target->ia_ifp->if_flags & IFF_LOOPBACK) &&
+	    (target->ia_flags & IFA_RTSELF)) {
+		struct route ia_ro;
+		int freeit = 0;
+
+		bzero(&ia_ro, sizeof(ia_ro));
+		*((struct sockaddr_in *)(&ia_ro.ro_dst)) = target->ia_addr;
+		rtalloc_ign_fib(&ia_ro, 0, 0);
+		if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) &&
+		    (ia_ro.ro_rt->rt_ifp == V_loif)) {
+			RT_LOCK(ia_ro.ro_rt);
+			if (ia_ro.ro_rt->rt_refcnt <= 1)
+				freeit = 1;
+			else
+				RT_REMREF(ia_ro.ro_rt);
+			RTFREE_LOCKED(ia_ro.ro_rt);
+		}
+		if (freeit)
+			error = ifa_del_loopback_route((struct ifaddr *)target,
+				       (struct sockaddr *)&target->ia_addr);
+		if (error == 0)
+			target->ia_flags &= ~IFA_RTSELF;
+		/* remove arp cache */
+		arp_ifscrub(target->ia_ifp, IA_SIN(target)->sin_addr.s_addr);
+	}
+
+	if (rtinitflags(target))
+		prefix = target->ia_dstaddr.sin_addr;
+	else {
+		prefix = target->ia_addr.sin_addr;
+		mask = target->ia_sockmask.sin_addr;
+		prefix.s_addr &= mask.s_addr;
+	}
+
+	if ((target->ia_flags & IFA_ROUTE) == 0) {
+		in_addralias_rtmsg(RTM_DELETE, &prefix, target);
+		return (0);
+	}
+
+	IN_IFADDR_RLOCK();
+	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
+		if (rtinitflags(ia))
+			p = ia->ia_dstaddr.sin_addr;
+		else {
+			p = ia->ia_addr.sin_addr;
+			p.s_addr &= ia->ia_sockmask.sin_addr.s_addr;
+		}
+
+		if (prefix.s_addr != p.s_addr)
+			continue;
+
+		/*
+		 * If we got a matching prefix address, move IFA_ROUTE and
+		 * the route itself to it.  Make sure that routing daemons
+		 * get a heads-up.
+		 *
+		 * XXX: a special case for carp(4) interface - this should
+		 *      be more generally specified as an interface that
+		 *      doesn't support such action.
+		 */
+		if ((ia->ia_flags & IFA_ROUTE) == 0
+		    && (ia->ia_ifp->if_type != IFT_CARP)
+							) {
+			IN_IFADDR_RUNLOCK();
+			rtinit(&(target->ia_ifa), (int)RTM_DELETE,
+			    rtinitflags(target));
+			target->ia_flags &= ~IFA_ROUTE;
+
+			error = rtinit(&ia->ia_ifa, (int)RTM_ADD,
+			    rtinitflags(ia) | RTF_UP);
+			if (error == 0)
+				ia->ia_flags |= IFA_ROUTE;
+			return (error);
+		}
+	}
+	IN_IFADDR_RUNLOCK();
+
+	/*
+	 * remove all L2 entries on the given prefix
+	 */
+	bzero(&prefix0, sizeof(prefix0));
+	prefix0.sin_len = sizeof(prefix0);
+	prefix0.sin_family = AF_INET;
+	prefix0.sin_addr.s_addr = target->ia_subnet;
+	bzero(&mask0, sizeof(mask0));
+	mask0.sin_len = sizeof(mask0);
+	mask0.sin_family = AF_INET;
+	mask0.sin_addr.s_addr = target->ia_subnetmask;
+	lltable_prefix_free(AF_INET, (struct sockaddr *)&prefix0,
+			    (struct sockaddr *)&mask0);
+
+	/*
+	 * As no-one seem to have this prefix, we can remove the route.
+	 */
+	rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target));
+	target->ia_flags &= ~IFA_ROUTE;
+	return (0);
+}
+
+#undef rtinitflags
+
+/*
+ * Return 1 if the address might be a local broadcast address.
+ */
+int
+in_broadcast(struct in_addr in, struct ifnet *ifp)
+{
+	register struct ifaddr *ifa;
+	u_long t;
+
+	if (in.s_addr == INADDR_BROADCAST ||
+	    in.s_addr == INADDR_ANY)
+		return (1);
+	if ((ifp->if_flags & IFF_BROADCAST) == 0)
+		return (0);
+	t = ntohl(in.s_addr);
+	/*
+	 * Look through the list of addresses for a match
+	 * with a broadcast address.
+	 */
+#define ia ((struct in_ifaddr *)ifa)
+	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
+		if (ifa->ifa_addr->sa_family == AF_INET &&
+		    (in.s_addr == ia->ia_broadaddr.sin_addr.s_addr ||
+		     in.s_addr == ia->ia_netbroadcast.s_addr ||
+		     /*
+		      * Check for old-style (host 0) broadcast.
+		      */
+		     t == ia->ia_subnet || t == ia->ia_net) &&
+		     /*
+		      * Check for an all one subnetmask. These
+		      * only exist when an interface gets a secondary
+		      * address.
+		      */
+		     ia->ia_subnetmask != (u_long)0xffffffff)
+			    return (1);
+	return (0);
+#undef ia
+}
+
+/*
+ * On interface removal, clean up IPv4 data structures hung off of the ifnet.
+ */
+void
+in_ifdetach(struct ifnet *ifp)
+{
+
+	in_pcbpurgeif0(&V_ripcbinfo, ifp);
+	in_pcbpurgeif0(&V_udbinfo, ifp);
+	in_purgemaddrs(ifp);
+}
+
+/*
+ * Delete all IPv4 multicast address records, and associated link-layer
+ * multicast address records, associated with ifp.
+ * XXX It looks like domifdetach runs AFTER the link layer cleanup.
+ * XXX This should not race with ifma_protospec being set during
+ * a new allocation, if it does, we have bigger problems.
+ */
+static void
+in_purgemaddrs(struct ifnet *ifp)
+{
+	LIST_HEAD(,in_multi) purgeinms;
+	struct in_multi		*inm, *tinm;
+	struct ifmultiaddr	*ifma;
+
+	LIST_INIT(&purgeinms);
+	IN_MULTI_LOCK();
+
+	/*
+	 * Extract list of in_multi associated with the detaching ifp
+	 * which the PF_INET layer is about to release.
+	 * We need to do this as IF_ADDR_LOCK() may be re-acquired
+	 * by code further down.
+	 */
+	IF_ADDR_LOCK(ifp);
+	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+		if (ifma->ifma_addr->sa_family != AF_INET ||
+		    ifma->ifma_protospec == NULL)
+			continue;
+#if 0
+		KASSERT(ifma->ifma_protospec != NULL,
+		    ("%s: ifma_protospec is NULL", __func__));
+#endif
+		inm = (struct in_multi *)ifma->ifma_protospec;
+		LIST_INSERT_HEAD(&purgeinms, inm, inm_link);
+	}
+	IF_ADDR_UNLOCK(ifp);
+
+	LIST_FOREACH_SAFE(inm, &purgeinms, inm_link, tinm) {
+		LIST_REMOVE(inm, inm_link);
+		inm_release_locked(inm);
+	}
+	igmp_ifdetach(ifp);
+
+	IN_MULTI_UNLOCK();
+}
+
+#include <freebsd/net/if_dl.h>
+#include <freebsd/netinet/if_ether.h>
+
+struct in_llentry {
+	struct llentry		base;
+	struct sockaddr_in	l3_addr4;
+};
+
+static struct llentry *
+in_lltable_new(const struct sockaddr *l3addr, u_int flags)
+{
+	struct in_llentry *lle;
+
+	lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_DONTWAIT | M_ZERO);
+	if (lle == NULL)		/* NB: caller generates msg */
+		return NULL;
+
+	callout_init(&lle->base.la_timer, CALLOUT_MPSAFE);
+	/*
+	 * For IPv4 this will trigger "arpresolve" to generate
+	 * an ARP request.
+	 */
+	lle->base.la_expire = time_second; /* mark expired */
+	lle->l3_addr4 = *(const struct sockaddr_in *)l3addr;
+	lle->base.lle_refcnt = 1;
+	LLE_LOCK_INIT(&lle->base);
+	return &lle->base;
+}
+
+/*
+ * Deletes an address from the address table.
+ * This function is called by the timer functions
+ * such as arptimer() and nd6_llinfo_timer(), and
+ * the caller does the locking.
+ */
+static void
+in_lltable_free(struct lltable *llt, struct llentry *lle)
+{
+	LLE_WUNLOCK(lle);
+	LLE_LOCK_DESTROY(lle);
+	free(lle, M_LLTABLE);
+}
+
+
+#define IN_ARE_MASKED_ADDR_EQUAL(d, a, m)	(			\
+	    (((ntohl((d)->sin_addr.s_addr) ^ (a)->sin_addr.s_addr) & (m)->sin_addr.s_addr)) == 0 )
+
+static void
+in_lltable_prefix_free(struct lltable *llt,
+		       const struct sockaddr *prefix,
+		       const struct sockaddr *mask)
+{
+	const struct sockaddr_in *pfx = (const struct sockaddr_in *)prefix;
+	const struct sockaddr_in *msk = (const struct sockaddr_in *)mask;
+	struct llentry *lle, *next;
+	register int i;
+
+	for (i=0; i < LLTBL_HASHTBL_SIZE; i++) {
+		LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
+
+			if (IN_ARE_MASKED_ADDR_EQUAL((struct sockaddr_in *)L3_ADDR(lle),
+						     pfx, msk)) {
+				int canceled;
+
+				canceled = callout_drain(&lle->la_timer);
+				LLE_WLOCK(lle);
+				if (canceled)
+					LLE_REMREF(lle);
+				llentry_free(lle);
+			}
+		}
+	}
+}
+
+
+static int
+in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr)
+{
+	struct rtentry *rt;
+
+	KASSERT(l3addr->sa_family == AF_INET,
+	    ("sin_family %d", l3addr->sa_family));
+
+	/* XXX rtalloc1 should take a const param */
+	rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0);
+	if (rt == NULL || (!(flags & LLE_PUB) &&
+			   ((rt->rt_flags & RTF_GATEWAY) ||
+			    (rt->rt_ifp != ifp)))) {
+#ifdef DIAGNOSTIC
+		log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n",
+		    inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr));
+#endif
+		if (rt != NULL)
+			RTFREE_LOCKED(rt);
+		return (EINVAL);
+	}
+	RTFREE_LOCKED(rt);
+	return 0;
+}
+
+/*
+ * Return NULL if not found or marked for deletion.
+ * If found return lle read locked.
+ */
+static struct llentry *
+in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
+{
+	const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
+	struct ifnet *ifp = llt->llt_ifp;
+	struct llentry *lle;
+	struct llentries *lleh;
+	u_int hashkey;
+
+	IF_AFDATA_LOCK_ASSERT(ifp);
+	KASSERT(l3addr->sa_family == AF_INET,
+	    ("sin_family %d", l3addr->sa_family));
+
+	hashkey = sin->sin_addr.s_addr;
+	lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
+	LIST_FOREACH(lle, lleh, lle_next) {
+		struct sockaddr_in *sa2 = (struct sockaddr_in *)L3_ADDR(lle);
+		if (lle->la_flags & LLE_DELETED)
+			continue;
+		if (sa2->sin_addr.s_addr == sin->sin_addr.s_addr)
+			break;
+	}
+	if (lle == NULL) {
+#ifdef DIAGNOSTIC
+		if (flags & LLE_DELETE)
+			log(LOG_INFO, "interface address is missing from cache = %p  in delete\n", lle);
+#endif
+		if (!(flags & LLE_CREATE))
+			return (NULL);
+		/*
+		 * A route that covers the given address must have
+		 * been installed 1st because we are doing a resolution,
+		 * verify this.
+		 */
+		if (!(flags & LLE_IFADDR) &&
+		    in_lltable_rtcheck(ifp, flags, l3addr) != 0)
+			goto done;
+
+		lle = in_lltable_new(l3addr, flags);
+		if (lle == NULL) {
+			log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
+			goto done;
+		}
+		lle->la_flags = flags & ~LLE_CREATE;
+		if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) {
+			bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen);
+			lle->la_flags |= (LLE_VALID | LLE_STATIC);
+		}
+
+		lle->lle_tbl  = llt;
+		lle->lle_head = lleh;
+		LIST_INSERT_HEAD(lleh, lle, lle_next);
+	} else if (flags & LLE_DELETE) {
+		if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
+			LLE_WLOCK(lle);
+			lle->la_flags = LLE_DELETED;
+			LLE_WUNLOCK(lle);
+#ifdef DIAGNOSTIC
+			log(LOG_INFO, "ifaddr cache = %p  is deleted\n", lle);
+#endif
+		}
+		lle = (void *)-1;
+
+	}
+	if (LLE_IS_VALID(lle)) {
+		if (flags & LLE_EXCLUSIVE)
+			LLE_WLOCK(lle);
+		else
+			LLE_RLOCK(lle);
+	}
+done:
+	return (lle);
+}
+
+static int
+in_lltable_dump(struct lltable *llt, struct sysctl_req *wr)
+{
+#define	SIN(lle)	((struct sockaddr_in *) L3_ADDR(lle))
+	struct ifnet *ifp = llt->llt_ifp;
+	struct llentry *lle;
+	/* XXX stack use */
+	struct {
+		struct rt_msghdr	rtm;
+		struct sockaddr_inarp	sin;
+		struct sockaddr_dl	sdl;
+	} arpc;
+	int error, i;
+
+	LLTABLE_LOCK_ASSERT();
+
+	error = 0;
+	for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
+		LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
+			struct sockaddr_dl *sdl;
+
+			/* skip deleted entries */
+			if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
+				continue;
+			/* Skip if jailed and not a valid IP of the prison. */
+			if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0)
+				continue;
+			/*
+			 * produce a msg made of:
+			 *  struct rt_msghdr;
+			 *  struct sockaddr_inarp; (IPv4)
+			 *  struct sockaddr_dl;
+			 */
+			bzero(&arpc, sizeof(arpc));
+			arpc.rtm.rtm_msglen = sizeof(arpc);
+			arpc.rtm.rtm_version = RTM_VERSION;
+			arpc.rtm.rtm_type = RTM_GET;
+			arpc.rtm.rtm_flags = RTF_UP;
+			arpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY;
+			arpc.sin.sin_family = AF_INET;
+			arpc.sin.sin_len = sizeof(arpc.sin);
+			arpc.sin.sin_addr.s_addr = SIN(lle)->sin_addr.s_addr;
+
+			/* publish */
+			if (lle->la_flags & LLE_PUB) {
+				arpc.rtm.rtm_flags |= RTF_ANNOUNCE;
+				/* proxy only */
+				if (lle->la_flags & LLE_PROXY)
+					arpc.sin.sin_other = SIN_PROXY;
+			}
+
+			sdl = &arpc.sdl;
+			sdl->sdl_family = AF_LINK;
+			sdl->sdl_len = sizeof(*sdl);
+			sdl->sdl_index = ifp->if_index;
+			sdl->sdl_type = ifp->if_type;
+			if ((lle->la_flags & LLE_VALID) == LLE_VALID) {
+				sdl->sdl_alen = ifp->if_addrlen;
+				bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
+			} else {
+				sdl->sdl_alen = 0;
+				bzero(LLADDR(sdl), ifp->if_addrlen);
+			}
+
+			arpc.rtm.rtm_rmx.rmx_expire =
+			    lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
+			arpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA);
+			if (lle->la_flags & LLE_STATIC)
+				arpc.rtm.rtm_flags |= RTF_STATIC;
+			arpc.rtm.rtm_index = ifp->if_index;
+			error = SYSCTL_OUT(wr, &arpc, sizeof(arpc));
+			if (error)
+				break;
+		}
+	}
+	return error;
+#undef SIN
+}
+
+void *
+in_domifattach(struct ifnet *ifp)
+{
+	struct in_ifinfo *ii;
+	struct lltable *llt;
+
+	ii = malloc(sizeof(struct in_ifinfo), M_IFADDR, M_WAITOK|M_ZERO);
+
+	llt = lltable_init(ifp, AF_INET);
+	if (llt != NULL) {
+		llt->llt_new = in_lltable_new;
+		llt->llt_free = in_lltable_free;
+		llt->llt_prefix_free = in_lltable_prefix_free;
+		llt->llt_rtcheck = in_lltable_rtcheck;
+		llt->llt_lookup = in_lltable_lookup;
+		llt->llt_dump = in_lltable_dump;
+	}
+	ii->ii_llt = llt;
+
+	ii->ii_igmp = igmp_domifattach(ifp);
+
+	return ii;
+}
+
+void
+in_domifdetach(struct ifnet *ifp, void *aux)
+{
+	struct in_ifinfo *ii = (struct in_ifinfo *)aux;
+
+	igmp_domifdetach(ifp);
+	lltable_free(ii->ii_llt);
+	free(ii, M_IFADDR);
+}
diff --git a/freebsd/sys/netinet/in.h b/freebsd/sys/netinet/in.h
new file mode 100644
index 00000000..73c7ca1a
--- /dev/null
+++ b/freebsd/sys/netinet/in.h
@@ -0,0 +1,2 @@
+#include <freebsd/bsd.h>
+#include <freebsd/netinet/in.h>
diff --git a/freebsd/sys/netinet/in_gif.c b/freebsd/sys/netinet/in_gif.c
new file mode 100644
index 00000000..3613e214
--- /dev/null
+++ b/freebsd/sys/netinet/in_gif.c
@@ -0,0 +1,469 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*	$KAME: in_gif.c,v 1.54 2001/05/14 14:02:16 itojun Exp $	*/
+
+/*-
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_mrouting.h>
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/errno.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/malloc.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/in_gif.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip_encap.h>
+#include <freebsd/netinet/ip_ecn.h>
+
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#endif
+
+#ifdef MROUTING
+#include <freebsd/netinet/ip_mroute.h>
+#endif /* MROUTING */
+
+#include <freebsd/net/if_gif.h>	
+
+static int gif_validate4(const struct ip *, struct gif_softc *,
+	struct ifnet *);
+
+extern  struct domain inetdomain;
+struct protosw in_gif_protosw = {
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		0/* IPPROTO_IPV[46] */,
+	.pr_flags =		PR_ATOMIC|PR_ADDR,
+	.pr_input =		in_gif_input,
+	.pr_output =		(pr_output_t*)rip_output,
+	.pr_ctloutput =		rip_ctloutput,
+	.pr_usrreqs =		&rip_usrreqs
+};
+
+VNET_DEFINE(int, ip_gif_ttl) = GIF_TTL;
+#define	V_ip_gif_ttl		VNET(ip_gif_ttl)
+SYSCTL_VNET_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_RW,
+	&VNET_NAME(ip_gif_ttl), 0, "");
+
+int
+in_gif_output(struct ifnet *ifp, int family, struct mbuf *m)
+{
+	struct gif_softc *sc = ifp->if_softc;
+	struct sockaddr_in *dst = (struct sockaddr_in *)&sc->gif_ro.ro_dst;
+	struct sockaddr_in *sin_src = (struct sockaddr_in *)sc->gif_psrc;
+	struct sockaddr_in *sin_dst = (struct sockaddr_in *)sc->gif_pdst;
+	struct ip iphdr;	/* capsule IP header, host byte ordered */
+	struct etherip_header eiphdr;
+	int error, len, proto;
+	u_int8_t tos;
+
+	GIF_LOCK_ASSERT(sc);
+
+	if (sin_src == NULL || sin_dst == NULL ||
+	    sin_src->sin_family != AF_INET ||
+	    sin_dst->sin_family != AF_INET) {
+		m_freem(m);
+		return EAFNOSUPPORT;
+	}
+
+	switch (family) {
+#ifdef INET
+	case AF_INET:
+	    {
+		struct ip *ip;
+
+		proto = IPPROTO_IPV4;
+		if (m->m_len < sizeof(*ip)) {
+			m = m_pullup(m, sizeof(*ip));
+			if (!m)
+				return ENOBUFS;
+		}
+		ip = mtod(m, struct ip *);
+		tos = ip->ip_tos;
+		break;
+	    }
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+	    {
+		struct ip6_hdr *ip6;
+		proto = IPPROTO_IPV6;
+		if (m->m_len < sizeof(*ip6)) {
+			m = m_pullup(m, sizeof(*ip6));
+			if (!m)
+				return ENOBUFS;
+		}
+		ip6 = mtod(m, struct ip6_hdr *);
+		tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+		break;
+	    }
+#endif /* INET6 */
+	case AF_LINK:
+ 		proto = IPPROTO_ETHERIP;
+
+		/*
+		 * GIF_SEND_REVETHIP (disabled by default) intentionally
+		 * sends an EtherIP packet with revered version field in
+		 * the header.  This is a knob for backward compatibility
+		 * with FreeBSD 7.2R or prior.
+		 */
+		if ((sc->gif_options & GIF_SEND_REVETHIP)) {
+ 			eiphdr.eip_ver = 0;
+ 			eiphdr.eip_resvl = ETHERIP_VERSION;
+ 			eiphdr.eip_resvh = 0;
+		} else {
+ 			eiphdr.eip_ver = ETHERIP_VERSION;
+ 			eiphdr.eip_resvl = 0;
+ 			eiphdr.eip_resvh = 0;
+		}
+ 		/* prepend Ethernet-in-IP header */
+ 		M_PREPEND(m, sizeof(struct etherip_header), M_DONTWAIT);
+ 		if (m && m->m_len < sizeof(struct etherip_header))
+ 			m = m_pullup(m, sizeof(struct etherip_header));
+ 		if (m == NULL)
+ 			return ENOBUFS;
+ 		bcopy(&eiphdr, mtod(m, struct etherip_header *),
+		    sizeof(struct etherip_header));
+		break;
+
+	default:
+#ifdef DEBUG
+		printf("in_gif_output: warning: unknown family %d passed\n",
+			family);
+#endif
+		m_freem(m);
+		return EAFNOSUPPORT;
+	}
+
+	bzero(&iphdr, sizeof(iphdr));
+	iphdr.ip_src = sin_src->sin_addr;
+	/* bidirectional configured tunnel mode */
+	if (sin_dst->sin_addr.s_addr != INADDR_ANY)
+		iphdr.ip_dst = sin_dst->sin_addr;
+	else {
+		m_freem(m);
+		return ENETUNREACH;
+	}
+	iphdr.ip_p = proto;
+	/* version will be set in ip_output() */
+	iphdr.ip_ttl = V_ip_gif_ttl;
+	iphdr.ip_len = m->m_pkthdr.len + sizeof(struct ip);
+	ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED : ECN_NOCARE,
+		       &iphdr.ip_tos, &tos);
+
+	/* prepend new IP header */
+	len = sizeof(struct ip);
+#ifndef __NO_STRICT_ALIGNMENT
+	if (family == AF_LINK)
+		len += ETHERIP_ALIGN;
+#endif
+	M_PREPEND(m, len, M_DONTWAIT);
+	if (m != NULL && m->m_len < len)
+		m = m_pullup(m, len);
+	if (m == NULL) {
+		printf("ENOBUFS in in_gif_output %d\n", __LINE__);
+		return ENOBUFS;
+	}
+#ifndef __NO_STRICT_ALIGNMENT
+	if (family == AF_LINK) {
+		len = mtod(m, vm_offset_t) & 3;
+		KASSERT(len == 0 || len == ETHERIP_ALIGN,
+		    ("in_gif_output: unexpected misalignment"));
+		m->m_data += len;
+		m->m_len -= ETHERIP_ALIGN;
+	}
+#endif
+	bcopy(&iphdr, mtod(m, struct ip *), sizeof(struct ip));
+
+	M_SETFIB(m, sc->gif_fibnum);
+
+	if (dst->sin_family != sin_dst->sin_family ||
+	    dst->sin_addr.s_addr != sin_dst->sin_addr.s_addr) {
+		/* cache route doesn't match */
+		bzero(dst, sizeof(*dst));
+		dst->sin_family = sin_dst->sin_family;
+		dst->sin_len = sizeof(struct sockaddr_in);
+		dst->sin_addr = sin_dst->sin_addr;
+		if (sc->gif_ro.ro_rt) {
+			RTFREE(sc->gif_ro.ro_rt);
+			sc->gif_ro.ro_rt = NULL;
+		}
+#if 0
+		GIF2IFP(sc)->if_mtu = GIF_MTU;
+#endif
+	}
+
+	if (sc->gif_ro.ro_rt == NULL) {
+		in_rtalloc_ign(&sc->gif_ro, 0, sc->gif_fibnum);
+		if (sc->gif_ro.ro_rt == NULL) {
+			m_freem(m);
+			return ENETUNREACH;
+		}
+
+		/* if it constitutes infinite encapsulation, punt. */
+		if (sc->gif_ro.ro_rt->rt_ifp == ifp) {
+			m_freem(m);
+			return ENETUNREACH;	/* XXX */
+		}
+#if 0
+		ifp->if_mtu = sc->gif_ro.ro_rt->rt_ifp->if_mtu
+			- sizeof(struct ip);
+#endif
+	}
+
+	error = ip_output(m, NULL, &sc->gif_ro, 0, NULL, NULL);
+
+	if (!(GIF2IFP(sc)->if_flags & IFF_LINK0) &&
+	    sc->gif_ro.ro_rt != NULL) {
+		RTFREE(sc->gif_ro.ro_rt);
+		sc->gif_ro.ro_rt = NULL;
+	}
+
+	return (error);
+}
+
+void
+in_gif_input(struct mbuf *m, int off)
+{
+	struct ifnet *gifp = NULL;
+	struct gif_softc *sc;
+	struct ip *ip;
+	int af;
+	u_int8_t otos;
+	int proto;
+
+	ip = mtod(m, struct ip *);
+	proto = ip->ip_p;
+
+	sc = (struct gif_softc *)encap_getarg(m);
+	if (sc == NULL) {
+		m_freem(m);
+		KMOD_IPSTAT_INC(ips_nogif);
+		return;
+	}
+
+	gifp = GIF2IFP(sc);
+	if (gifp == NULL || (gifp->if_flags & IFF_UP) == 0) {
+		m_freem(m);
+		KMOD_IPSTAT_INC(ips_nogif);
+		return;
+	}
+
+	otos = ip->ip_tos;
+	m_adj(m, off);
+
+	switch (proto) {
+#ifdef INET
+	case IPPROTO_IPV4:
+	    {
+		struct ip *ip;
+		af = AF_INET;
+		if (m->m_len < sizeof(*ip)) {
+			m = m_pullup(m, sizeof(*ip));
+			if (!m)
+				return;
+		}
+		ip = mtod(m, struct ip *);
+		if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ?
+				  ECN_ALLOWED : ECN_NOCARE,
+				  &otos, &ip->ip_tos) == 0) {
+			m_freem(m);
+			return;
+		}
+		break;
+	    }
+#endif
+#ifdef INET6
+	case IPPROTO_IPV6:
+	    {
+		struct ip6_hdr *ip6;
+		u_int8_t itos, oitos;
+
+		af = AF_INET6;
+		if (m->m_len < sizeof(*ip6)) {
+			m = m_pullup(m, sizeof(*ip6));
+			if (!m)
+				return;
+		}
+		ip6 = mtod(m, struct ip6_hdr *);
+		itos = oitos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+		if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ?
+				  ECN_ALLOWED : ECN_NOCARE,
+				  &otos, &itos) == 0) {
+			m_freem(m);
+			return;
+		}
+		if (itos != oitos) {
+			ip6->ip6_flow &= ~htonl(0xff << 20);
+			ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
+		}
+		break;
+	    }
+#endif /* INET6 */
+ 	case IPPROTO_ETHERIP:
+ 		af = AF_LINK;
+ 		break;	
+
+	default:
+		KMOD_IPSTAT_INC(ips_nogif);
+		m_freem(m);
+		return;
+	}
+	gif_input(m, af, gifp);
+	return;
+}
+
+/*
+ * validate outer address.
+ */
+static int
+gif_validate4(const struct ip *ip, struct gif_softc *sc, struct ifnet *ifp)
+{
+	struct sockaddr_in *src, *dst;
+	struct in_ifaddr *ia4;
+
+	src = (struct sockaddr_in *)sc->gif_psrc;
+	dst = (struct sockaddr_in *)sc->gif_pdst;
+
+	/* check for address match */
+	if (src->sin_addr.s_addr != ip->ip_dst.s_addr ||
+	    dst->sin_addr.s_addr != ip->ip_src.s_addr)
+		return 0;
+
+	/* martian filters on outer source - NOT done in ip_input! */
+	if (IN_MULTICAST(ntohl(ip->ip_src.s_addr)))
+		return 0;
+	switch ((ntohl(ip->ip_src.s_addr) & 0xff000000) >> 24) {
+	case 0: case 127: case 255:
+		return 0;
+	}
+
+	/* reject packets with broadcast on source */
+	/* XXXRW: should use hash lists? */
+	IN_IFADDR_RLOCK();
+	TAILQ_FOREACH(ia4, &V_in_ifaddrhead, ia_link) {
+		if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
+			continue;
+		if (ip->ip_src.s_addr == ia4->ia_broadaddr.sin_addr.s_addr) {
+			IN_IFADDR_RUNLOCK();
+			return 0;
+		}
+	}
+	IN_IFADDR_RUNLOCK();
+
+	/* ingress filters on outer source */
+	if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0 && ifp) {
+		struct sockaddr_in sin;
+		struct rtentry *rt;
+
+		bzero(&sin, sizeof(sin));
+		sin.sin_family = AF_INET;
+		sin.sin_len = sizeof(struct sockaddr_in);
+		sin.sin_addr = ip->ip_src;
+		/* XXX MRT  check for the interface we would use on output */
+		rt = in_rtalloc1((struct sockaddr *)&sin, 0,
+		    0UL, sc->gif_fibnum);
+		if (!rt || rt->rt_ifp != ifp) {
+#if 0
+			log(LOG_WARNING, "%s: packet from 0x%x dropped "
+			    "due to ingress filter\n", if_name(GIF2IFP(sc)),
+			    (u_int32_t)ntohl(sin.sin_addr.s_addr));
+#endif
+			if (rt)
+				RTFREE_LOCKED(rt);
+			return 0;
+		}
+		RTFREE_LOCKED(rt);
+	}
+
+	return 32 * 2;
+}
+
+/*
+ * we know that we are in IFF_UP, outer address available, and outer family
+ * matched the physical addr family.  see gif_encapcheck().
+ */
+int
+gif_encapcheck4(const struct mbuf *m, int off, int proto, void *arg)
+{
+	struct ip ip;
+	struct gif_softc *sc;
+	struct ifnet *ifp;
+
+	/* sanity check done in caller */
+	sc = (struct gif_softc *)arg;
+
+	/* LINTED const cast */
+	m_copydata(m, 0, sizeof(ip), (caddr_t)&ip);
+	ifp = ((m->m_flags & M_PKTHDR) != 0) ? m->m_pkthdr.rcvif : NULL;
+
+	return gif_validate4(&ip, sc, ifp);
+}
+
+int
+in_gif_attach(struct gif_softc *sc)
+{
+	sc->encap_cookie4 = encap_attach_func(AF_INET, -1, gif_encapcheck,
+	    &in_gif_protosw, sc);
+	if (sc->encap_cookie4 == NULL)
+		return EEXIST;
+	return 0;
+}
+
+int
+in_gif_detach(struct gif_softc *sc)
+{
+	int error;
+
+	error = encap_detach(sc->encap_cookie4);
+	if (error == 0)
+		sc->encap_cookie4 = NULL;
+	return error;
+}
diff --git a/freebsd/sys/netinet/in_gif.h b/freebsd/sys/netinet/in_gif.h
new file mode 100644
index 00000000..1e42b01f
--- /dev/null
+++ b/freebsd/sys/netinet/in_gif.h
@@ -0,0 +1,45 @@
+/*	$FreeBSD$	*/
+/*	$KAME: in_gif.h,v 1.5 2000/04/14 08:36:02 itojun Exp $	*/
+
+/*-
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _NETINET_IN_GIF_HH_
+#define _NETINET_IN_GIF_HH_
+
+#define GIF_TTL		30
+
+struct gif_softc;
+void in_gif_input(struct mbuf *, int);
+int in_gif_output(struct ifnet *, int, struct mbuf *);
+int gif_encapcheck4(const struct mbuf *, int, int, void *);
+int in_gif_attach(struct gif_softc *);
+int in_gif_detach(struct gif_softc *);
+
+#endif /*_NETINET_IN_GIF_HH_*/
diff --git a/freebsd/sys/netinet/in_mcast.c b/freebsd/sys/netinet/in_mcast.c
new file mode 100644
index 00000000..ed2bcc12
--- /dev/null
+++ b/freebsd/sys/netinet/in_mcast.c
@@ -0,0 +1,2902 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2007-2009 Bruce Simpson.
+ * Copyright (c) 2005 Robert N. M. Watson.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * IPv4 multicast socket, group, and socket option processing module.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/ktr.h>
+#include <freebsd/sys/tree.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/igmp_var.h>
+
+#ifndef KTR_IGMPV3
+#define KTR_IGMPV3 KTR_INET
+#endif
+
+#ifndef __SOCKUNION_DECLARED
+union sockunion {
+	struct sockaddr_storage	ss;
+	struct sockaddr		sa;
+	struct sockaddr_dl	sdl;
+	struct sockaddr_in	sin;
+};
+typedef union sockunion sockunion_t;
+#define __SOCKUNION_DECLARED
+#endif /* __SOCKUNION_DECLARED */
+
+static MALLOC_DEFINE(M_INMFILTER, "in_mfilter",
+    "IPv4 multicast PCB-layer source filter");
+static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
+static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
+static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource",
+    "IPv4 multicast IGMP-layer source filter");
+
+/*
+ * Locking:
+ * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
+ * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however
+ *   it can be taken by code in net/if.c also.
+ * - ip_moptions and in_mfilter are covered by the INP_WLOCK.
+ *
+ * struct in_multi is covered by IN_MULTI_LOCK. There isn't strictly
+ * any need for in_multi itself to be virtualized -- it is bound to an ifp
+ * anyway no matter what happens.
+ */
+struct mtx in_multi_mtx;
+MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF);
+
+/*
+ * Functions with non-static linkage defined in this file should be
+ * declared in in_var.h:
+ *  imo_multi_filter()
+ *  in_addmulti()
+ *  in_delmulti()
+ *  in_joingroup()
+ *  in_joingroup_locked()
+ *  in_leavegroup()
+ *  in_leavegroup_locked()
+ * and ip_var.h:
+ *  inp_freemoptions()
+ *  inp_getmoptions()
+ *  inp_setmoptions()
+ *
+ * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti()
+ * and in_delmulti().
+ */
+static void	imf_commit(struct in_mfilter *);
+static int	imf_get_source(struct in_mfilter *imf,
+		    const struct sockaddr_in *psin,
+		    struct in_msource **);
+static struct in_msource *
+		imf_graft(struct in_mfilter *, const uint8_t,
+		    const struct sockaddr_in *);
+static void	imf_leave(struct in_mfilter *);
+static int	imf_prune(struct in_mfilter *, const struct sockaddr_in *);
+static void	imf_purge(struct in_mfilter *);
+static void	imf_rollback(struct in_mfilter *);
+static void	imf_reap(struct in_mfilter *);
+static int	imo_grow(struct ip_moptions *);
+static size_t	imo_match_group(const struct ip_moptions *,
+		    const struct ifnet *, const struct sockaddr *);
+static struct in_msource *
+		imo_match_source(const struct ip_moptions *, const size_t,
+		    const struct sockaddr *);
+static void	ims_merge(struct ip_msource *ims,
+		    const struct in_msource *lims, const int rollback);
+static int	in_getmulti(struct ifnet *, const struct in_addr *,
+		    struct in_multi **);
+static int	inm_get_source(struct in_multi *inm, const in_addr_t haddr,
+		    const int noalloc, struct ip_msource **pims);
+static int	inm_is_ifp_detached(const struct in_multi *);
+static int	inm_merge(struct in_multi *, /*const*/ struct in_mfilter *);
+static void	inm_purge(struct in_multi *);
+static void	inm_reap(struct in_multi *);
+static struct ip_moptions *
+		inp_findmoptions(struct inpcb *);
+static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
+static int	inp_join_group(struct inpcb *, struct sockopt *);
+static int	inp_leave_group(struct inpcb *, struct sockopt *);
+static struct ifnet *
+		inp_lookup_mcast_ifp(const struct inpcb *,
+		    const struct sockaddr_in *, const struct in_addr);
+static int	inp_block_unblock_source(struct inpcb *, struct sockopt *);
+static int	inp_set_multicast_if(struct inpcb *, struct sockopt *);
+static int	inp_set_source_filters(struct inpcb *, struct sockopt *);
+static int	sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS);
+
+SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, "IPv4 multicast");
+
+static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER;
+SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc,
+    CTLFLAG_RW | CTLFLAG_TUN, &in_mcast_maxgrpsrc, 0,
+    "Max source filters per group");
+TUNABLE_ULONG("net.inet.ip.mcast.maxgrpsrc", &in_mcast_maxgrpsrc);
+
+static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER;
+SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc,
+    CTLFLAG_RW | CTLFLAG_TUN, &in_mcast_maxsocksrc, 0,
+    "Max source filters per socket");
+TUNABLE_ULONG("net.inet.ip.mcast.maxsocksrc", &in_mcast_maxsocksrc);
+
+int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP;
+SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_TUN,
+    &in_mcast_loop, 0, "Loopback multicast datagrams by default");
+TUNABLE_INT("net.inet.ip.mcast.loop", &in_mcast_loop);
+
+SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters,
+    CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
+    "Per-interface stack-wide source filters");
+
+/*
+ * Inline function which wraps assertions for a valid ifp.
+ * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
+ * is detached.
+ */
+static int __inline
+inm_is_ifp_detached(const struct in_multi *inm)
+{
+	struct ifnet *ifp;
+
+	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
+	ifp = inm->inm_ifma->ifma_ifp;
+	if (ifp != NULL) {
+		/*
+		 * Sanity check that netinet's notion of ifp is the
+		 * same as net's.
+		 */
+		KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
+	}
+
+	return (ifp == NULL);
+}
+
+/*
+ * Initialize an in_mfilter structure to a known state at t0, t1
+ * with an empty source filter list.
+ */
+static __inline void
+imf_init(struct in_mfilter *imf, const int st0, const int st1)
+{
+	memset(imf, 0, sizeof(struct in_mfilter));
+	RB_INIT(&imf->imf_sources);
+	imf->imf_st[0] = st0;
+	imf->imf_st[1] = st1;
+}
+
+/*
+ * Resize the ip_moptions vector to the next power-of-two minus 1.
+ * May be called with locks held; do not sleep.
+ */
+static int
+imo_grow(struct ip_moptions *imo)
+{
+	struct in_multi		**nmships;
+	struct in_multi		**omships;
+	struct in_mfilter	 *nmfilters;
+	struct in_mfilter	 *omfilters;
+	size_t			  idx;
+	size_t			  newmax;
+	size_t			  oldmax;
+
+	nmships = NULL;
+	nmfilters = NULL;
+	omships = imo->imo_membership;
+	omfilters = imo->imo_mfilters;
+	oldmax = imo->imo_max_memberships;
+	newmax = ((oldmax + 1) * 2) - 1;
+
+	if (newmax <= IP_MAX_MEMBERSHIPS) {
+		nmships = (struct in_multi **)realloc(omships,
+		    sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT);
+		nmfilters = (struct in_mfilter *)realloc(omfilters,
+		    sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT);
+		if (nmships != NULL && nmfilters != NULL) {
+			/* Initialize newly allocated source filter heads. */
+			for (idx = oldmax; idx < newmax; idx++) {
+				imf_init(&nmfilters[idx], MCAST_UNDEFINED,
+				    MCAST_EXCLUDE);
+			}
+			imo->imo_max_memberships = newmax;
+			imo->imo_membership = nmships;
+			imo->imo_mfilters = nmfilters;
+		}
+	}
+
+	if (nmships == NULL || nmfilters == NULL) {
+		if (nmships != NULL)
+			free(nmships, M_IPMOPTS);
+		if (nmfilters != NULL)
+			free(nmfilters, M_INMFILTER);
+		return (ETOOMANYREFS);
+	}
+
+	return (0);
+}
+
+/*
+ * Find an IPv4 multicast group entry for this ip_moptions instance
+ * which matches the specified group, and optionally an interface.
+ * Return its index into the array, or -1 if not found.
+ */
+static size_t
+imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp,
+    const struct sockaddr *group)
+{
+	const struct sockaddr_in *gsin;
+	struct in_multi	**pinm;
+	int		  idx;
+	int		  nmships;
+
+	gsin = (const struct sockaddr_in *)group;
+
+	/* The imo_membership array may be lazy allocated. */
+	if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
+		return (-1);
+
+	nmships = imo->imo_num_memberships;
+	pinm = &imo->imo_membership[0];
+	for (idx = 0; idx < nmships; idx++, pinm++) {
+		if (*pinm == NULL)
+			continue;
+		if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) &&
+		    in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) {
+			break;
+		}
+	}
+	if (idx >= nmships)
+		idx = -1;
+
+	return (idx);
+}
+
+/*
+ * Find an IPv4 multicast source entry for this imo which matches
+ * the given group index for this socket, and source address.
+ *
+ * NOTE: This does not check if the entry is in-mode, merely if
+ * it exists, which may not be the desired behaviour.
+ */
+static struct in_msource *
+imo_match_source(const struct ip_moptions *imo, const size_t gidx,
+    const struct sockaddr *src)
+{
+	struct ip_msource	 find;
+	struct in_mfilter	*imf;
+	struct ip_msource	*ims;
+	const sockunion_t	*psa;
+
+	KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
+	KASSERT(gidx != -1 && gidx < imo->imo_num_memberships,
+	    ("%s: invalid index %d\n", __func__, (int)gidx));
+
+	/* The imo_mfilters array may be lazy allocated. */
+	if (imo->imo_mfilters == NULL)
+		return (NULL);
+	imf = &imo->imo_mfilters[gidx];
+
+	/* Source trees are keyed in host byte order. */
+	psa = (const sockunion_t *)src;
+	find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr);
+	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
+
+	return ((struct in_msource *)ims);
+}
+
+/*
+ * Perform filtering for multicast datagrams on a socket by group and source.
+ *
+ * Returns 0 if a datagram should be allowed through, or various error codes
+ * if the socket was not a member of the group, or the source was muted, etc.
+ */
+int
+imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp,
+    const struct sockaddr *group, const struct sockaddr *src)
+{
+	size_t gidx;
+	struct in_msource *ims;
+	int mode;
+
+	KASSERT(ifp != NULL, ("%s: null ifp", __func__));
+
+	gidx = imo_match_group(imo, ifp, group);
+	if (gidx == -1)
+		return (MCAST_NOTGMEMBER);
+
+	/*
+	 * Check if the source was included in an (S,G) join.
+	 * Allow reception on exclusive memberships by default,
+	 * reject reception on inclusive memberships by default.
+	 * Exclude source only if an in-mode exclude filter exists.
+	 * Include source only if an in-mode include filter exists.
+	 * NOTE: We are comparing group state here at IGMP t1 (now)
+	 * with socket-layer t0 (since last downcall).
+	 */
+	mode = imo->imo_mfilters[gidx].imf_st[1];
+	ims = imo_match_source(imo, gidx, src);
+
+	if ((ims == NULL && mode == MCAST_INCLUDE) ||
+	    (ims != NULL && ims->imsl_st[0] != mode))
+		return (MCAST_NOTSMEMBER);
+
+	return (MCAST_PASS);
+}
+
+/*
+ * Find and return a reference to an in_multi record for (ifp, group),
+ * and bump its reference count.
+ * If one does not exist, try to allocate it, and update link-layer multicast
+ * filters on ifp to listen for group.
+ * Assumes the IN_MULTI lock is held across the call.
+ * Return 0 if successful, otherwise return an appropriate error code.
+ */
+static int
+in_getmulti(struct ifnet *ifp, const struct in_addr *group,
+    struct in_multi **pinm)
+{
+	struct sockaddr_in	 gsin;
+	struct ifmultiaddr	*ifma;
+	struct in_ifinfo	*ii;
+	struct in_multi		*inm;
+	int error;
+
+	IN_MULTI_LOCK_ASSERT();
+
+	ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET];
+
+	inm = inm_lookup(ifp, *group);
+	if (inm != NULL) {
+		/*
+		 * If we already joined this group, just bump the
+		 * refcount and return it.
+		 */
+		KASSERT(inm->inm_refcount >= 1,
+		    ("%s: bad refcount %d", __func__, inm->inm_refcount));
+		++inm->inm_refcount;
+		*pinm = inm;
+		return (0);
+	}
+
+	memset(&gsin, 0, sizeof(gsin));
+	gsin.sin_family = AF_INET;
+	gsin.sin_len = sizeof(struct sockaddr_in);
+	gsin.sin_addr = *group;
+
+	/*
+	 * Check if a link-layer group is already associated
+	 * with this network-layer group on the given ifnet.
+	 */
+	error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma);
+	if (error != 0)
+		return (error);
+
+	/* XXX ifma_protospec must be covered by IF_ADDR_LOCK */
+	IF_ADDR_LOCK(ifp);
+
+	/*
+	 * If something other than netinet is occupying the link-layer
+	 * group, print a meaningful error message and back out of
+	 * the allocation.
+	 * Otherwise, bump the refcount on the existing network-layer
+	 * group association and return it.
+	 */
+	if (ifma->ifma_protospec != NULL) {
+		inm = (struct in_multi *)ifma->ifma_protospec;
+#ifdef INVARIANTS
+		KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr",
+		    __func__));
+		KASSERT(ifma->ifma_addr->sa_family == AF_INET,
+		    ("%s: ifma not AF_INET", __func__));
+		KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__));
+		if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
+		    !in_hosteq(inm->inm_addr, *group))
+			panic("%s: ifma %p is inconsistent with %p (%s)",
+			    __func__, ifma, inm, inet_ntoa(*group));
+#endif
+		++inm->inm_refcount;
+		*pinm = inm;
+		IF_ADDR_UNLOCK(ifp);
+		return (0);
+	}
+
+	IF_ADDR_LOCK_ASSERT(ifp);
+
+	/*
+	 * A new in_multi record is needed; allocate and initialize it.
+	 * We DO NOT perform an IGMP join as the in_ layer may need to
+	 * push an initial source list down to IGMP to support SSM.
+	 *
+	 * The initial source filter state is INCLUDE, {} as per the RFC.
+	 */
+	inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO);
+	if (inm == NULL) {
+		if_delmulti_ifma(ifma);
+		IF_ADDR_UNLOCK(ifp);
+		return (ENOMEM);
+	}
+	inm->inm_addr = *group;
+	inm->inm_ifp = ifp;
+	inm->inm_igi = ii->ii_igmp;
+	inm->inm_ifma = ifma;
+	inm->inm_refcount = 1;
+	inm->inm_state = IGMP_NOT_MEMBER;
+
+	/*
+	 * Pending state-changes per group are subject to a bounds check.
+	 */
+	IFQ_SET_MAXLEN(&inm->inm_scq, IGMP_MAX_STATE_CHANGES);
+
+	inm->inm_st[0].iss_fmode = MCAST_UNDEFINED;
+	inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
+	RB_INIT(&inm->inm_srcs);
+
+	ifma->ifma_protospec = inm;
+
+	*pinm = inm;
+
+	IF_ADDR_UNLOCK(ifp);
+	return (0);
+}
+
+/*
+ * Drop a reference to an in_multi record.
+ *
+ * If the refcount drops to 0, free the in_multi record and
+ * delete the underlying link-layer membership.
+ */
+void
+inm_release_locked(struct in_multi *inm)
+{
+	struct ifmultiaddr *ifma;
+
+	IN_MULTI_LOCK_ASSERT();
+
+	CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount);
+
+	if (--inm->inm_refcount > 0) {
+		CTR2(KTR_IGMPV3, "%s: refcount is now %d", __func__,
+		    inm->inm_refcount);
+		return;
+	}
+
+	CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm);
+
+	ifma = inm->inm_ifma;
+
+	/* XXX this access is not covered by IF_ADDR_LOCK */
+	CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma);
+	KASSERT(ifma->ifma_protospec == inm,
+	    ("%s: ifma_protospec != inm", __func__));
+	ifma->ifma_protospec = NULL;
+
+	inm_purge(inm);
+
+	free(inm, M_IPMADDR);
+
+	if_delmulti_ifma(ifma);
+}
+
+/*
+ * Clear recorded source entries for a group.
+ * Used by the IGMP code. Caller must hold the IN_MULTI lock.
+ * FIXME: Should reap.
+ */
+void
+inm_clear_recorded(struct in_multi *inm)
+{
+	struct ip_msource	*ims;
+
+	IN_MULTI_LOCK_ASSERT();
+
+	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
+		if (ims->ims_stp) {
+			ims->ims_stp = 0;
+			--inm->inm_st[1].iss_rec;
+		}
+	}
+	KASSERT(inm->inm_st[1].iss_rec == 0,
+	    ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec));
+}
+
+/*
+ * Record a source as pending for a Source-Group IGMPv3 query.
+ * This lives here as it modifies the shared tree.
+ *
+ * inm is the group descriptor.
+ * naddr is the address of the source to record in network-byte order.
+ *
+ * If the net.inet.igmp.sgalloc sysctl is non-zero, we will
+ * lazy-allocate a source node in response to an SG query.
+ * Otherwise, no allocation is performed. This saves some memory
+ * with the trade-off that the source will not be reported to the
+ * router if joined in the window between the query response and
+ * the group actually being joined on the local host.
+ *
+ * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed.
+ * This turns off the allocation of a recorded source entry if
+ * the group has not been joined.
+ *
+ * Return 0 if the source didn't exist or was already marked as recorded.
+ * Return 1 if the source was marked as recorded by this function.
+ * Return <0 if any error occured (negated errno code).
+ */
+int
+inm_record_source(struct in_multi *inm, const in_addr_t naddr)
+{
+	struct ip_msource	 find;
+	struct ip_msource	*ims, *nims;
+
+	IN_MULTI_LOCK_ASSERT();
+
+	find.ims_haddr = ntohl(naddr);
+	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
+	if (ims && ims->ims_stp)
+		return (0);
+	if (ims == NULL) {
+		if (inm->inm_nsrc == in_mcast_maxgrpsrc)
+			return (-ENOSPC);
+		nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
+		    M_NOWAIT | M_ZERO);
+		if (nims == NULL)
+			return (-ENOMEM);
+		nims->ims_haddr = find.ims_haddr;
+		RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
+		++inm->inm_nsrc;
+		ims = nims;
+	}
+
+	/*
+	 * Mark the source as recorded and update the recorded
+	 * source count.
+	 */
+	++ims->ims_stp;
+	++inm->inm_st[1].iss_rec;
+
+	return (1);
+}
+
+/*
+ * Return a pointer to an in_msource owned by an in_mfilter,
+ * given its source address.
+ * Lazy-allocate if needed. If this is a new entry its filter state is
+ * undefined at t0.
+ *
+ * imf is the filter set being modified.
+ * haddr is the source address in *host* byte-order.
+ *
+ * SMPng: May be called with locks held; malloc must not block.
+ */
+static int
+imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin,
+    struct in_msource **plims)
+{
+	struct ip_msource	 find;
+	struct ip_msource	*ims, *nims;
+	struct in_msource	*lims;
+	int			 error;
+
+	error = 0;
+	ims = NULL;
+	lims = NULL;
+
+	/* key is host byte order */
+	find.ims_haddr = ntohl(psin->sin_addr.s_addr);
+	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
+	lims = (struct in_msource *)ims;
+	if (lims == NULL) {
+		if (imf->imf_nsrc == in_mcast_maxsocksrc)
+			return (ENOSPC);
+		nims = malloc(sizeof(struct in_msource), M_INMFILTER,
+		    M_NOWAIT | M_ZERO);
+		if (nims == NULL)
+			return (ENOMEM);
+		lims = (struct in_msource *)nims;
+		lims->ims_haddr = find.ims_haddr;
+		lims->imsl_st[0] = MCAST_UNDEFINED;
+		RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
+		++imf->imf_nsrc;
+	}
+
+	*plims = lims;
+
+	return (error);
+}
+
+/*
+ * Graft a source entry into an existing socket-layer filter set,
+ * maintaining any required invariants and checking allocations.
+ *
+ * The source is marked as being in the new filter mode at t1.
+ *
+ * Return the pointer to the new node, otherwise return NULL.
+ */
+static struct in_msource *
+imf_graft(struct in_mfilter *imf, const uint8_t st1,
+    const struct sockaddr_in *psin)
+{
+	struct ip_msource	*nims;
+	struct in_msource	*lims;
+
+	nims = malloc(sizeof(struct in_msource), M_INMFILTER,
+	    M_NOWAIT | M_ZERO);
+	if (nims == NULL)
+		return (NULL);
+	lims = (struct in_msource *)nims;
+	lims->ims_haddr = ntohl(psin->sin_addr.s_addr);
+	lims->imsl_st[0] = MCAST_UNDEFINED;
+	lims->imsl_st[1] = st1;
+	RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
+	++imf->imf_nsrc;
+
+	return (lims);
+}
+
+/*
+ * Prune a source entry from an existing socket-layer filter set,
+ * maintaining any required invariants and checking allocations.
+ *
+ * The source is marked as being left at t1, it is not freed.
+ *
+ * Return 0 if no error occurred, otherwise return an errno value.
+ */
+static int
+imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin)
+{
+	struct ip_msource	 find;
+	struct ip_msource	*ims;
+	struct in_msource	*lims;
+
+	/* key is host byte order */
+	find.ims_haddr = ntohl(psin->sin_addr.s_addr);
+	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
+	if (ims == NULL)
+		return (ENOENT);
+	lims = (struct in_msource *)ims;
+	lims->imsl_st[1] = MCAST_UNDEFINED;
+	return (0);
+}
+
+/*
+ * Revert socket-layer filter set deltas at t1 to t0 state.
+ */
+static void
+imf_rollback(struct in_mfilter *imf)
+{
+	struct ip_msource	*ims, *tims;
+	struct in_msource	*lims;
+
+	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
+		lims = (struct in_msource *)ims;
+		if (lims->imsl_st[0] == lims->imsl_st[1]) {
+			/* no change at t1 */
+			continue;
+		} else if (lims->imsl_st[0] != MCAST_UNDEFINED) {
+			/* revert change to existing source at t1 */
+			lims->imsl_st[1] = lims->imsl_st[0];
+		} else {
+			/* revert source added t1 */
+			CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
+			RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
+			free(ims, M_INMFILTER);
+			imf->imf_nsrc--;
+		}
+	}
+	imf->imf_st[1] = imf->imf_st[0];
+}
+
+/*
+ * Mark socket-layer filter set as INCLUDE {} at t1.
+ */
+static void
+imf_leave(struct in_mfilter *imf)
+{
+	struct ip_msource	*ims;
+	struct in_msource	*lims;
+
+	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
+		lims = (struct in_msource *)ims;
+		lims->imsl_st[1] = MCAST_UNDEFINED;
+	}
+	imf->imf_st[1] = MCAST_INCLUDE;
+}
+
+/*
+ * Mark socket-layer filter set deltas as committed.
+ */
+static void
+imf_commit(struct in_mfilter *imf)
+{
+	struct ip_msource	*ims;
+	struct in_msource	*lims;
+
+	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
+		lims = (struct in_msource *)ims;
+		lims->imsl_st[0] = lims->imsl_st[1];
+	}
+	imf->imf_st[0] = imf->imf_st[1];
+}
+
+/*
+ * Reap unreferenced sources from socket-layer filter set.
+ */
+static void
+imf_reap(struct in_mfilter *imf)
+{
+	struct ip_msource	*ims, *tims;
+	struct in_msource	*lims;
+
+	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
+		lims = (struct in_msource *)ims;
+		if ((lims->imsl_st[0] == MCAST_UNDEFINED) &&
+		    (lims->imsl_st[1] == MCAST_UNDEFINED)) {
+			CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims);
+			RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
+			free(ims, M_INMFILTER);
+			imf->imf_nsrc--;
+		}
+	}
+}
+
+/*
+ * Purge socket-layer filter set.
+ */
+static void
+imf_purge(struct in_mfilter *imf)
+{
+	struct ip_msource	*ims, *tims;
+
+	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
+		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
+		RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
+		free(ims, M_INMFILTER);
+		imf->imf_nsrc--;
+	}
+	imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED;
+	KASSERT(RB_EMPTY(&imf->imf_sources),
+	    ("%s: imf_sources not empty", __func__));
+}
+
+/*
+ * Look up a source filter entry for a multicast group.
+ *
+ * inm is the group descriptor to work with.
+ * haddr is the host-byte-order IPv4 address to look up.
+ * noalloc may be non-zero to suppress allocation of sources.
+ * *pims will be set to the address of the retrieved or allocated source.
+ *
+ * SMPng: NOTE: may be called with locks held.
+ * Return 0 if successful, otherwise return a non-zero error code.
+ */
+static int
+inm_get_source(struct in_multi *inm, const in_addr_t haddr,
+    const int noalloc, struct ip_msource **pims)
+{
+	struct ip_msource	 find;
+	struct ip_msource	*ims, *nims;
+#ifdef KTR
+	struct in_addr ia;
+#endif
+
+	find.ims_haddr = haddr;
+	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
+	if (ims == NULL && !noalloc) {
+		if (inm->inm_nsrc == in_mcast_maxgrpsrc)
+			return (ENOSPC);
+		nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
+		    M_NOWAIT | M_ZERO);
+		if (nims == NULL)
+			return (ENOMEM);
+		nims->ims_haddr = haddr;
+		RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
+		++inm->inm_nsrc;
+		ims = nims;
+#ifdef KTR
+		ia.s_addr = htonl(haddr);
+		CTR3(KTR_IGMPV3, "%s: allocated %s as %p", __func__,
+		    inet_ntoa(ia), ims);
+#endif
+	}
+
+	*pims = ims;
+	return (0);
+}
+
+/*
+ * Merge socket-layer source into IGMP-layer source.
+ * If rollback is non-zero, perform the inverse of the merge.
+ */
+static void
+ims_merge(struct ip_msource *ims, const struct in_msource *lims,
+    const int rollback)
+{
+	int n = rollback ? -1 : 1;
+#ifdef KTR
+	struct in_addr ia;
+
+	ia.s_addr = htonl(ims->ims_haddr);
+#endif
+
+	if (lims->imsl_st[0] == MCAST_EXCLUDE) {
+		CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on %s",
+		    __func__, n, inet_ntoa(ia));
+		ims->ims_st[1].ex -= n;
+	} else if (lims->imsl_st[0] == MCAST_INCLUDE) {
+		CTR3(KTR_IGMPV3, "%s: t1 in -= %d on %s",
+		    __func__, n, inet_ntoa(ia));
+		ims->ims_st[1].in -= n;
+	}
+
+	if (lims->imsl_st[1] == MCAST_EXCLUDE) {
+		CTR3(KTR_IGMPV3, "%s: t1 ex += %d on %s",
+		    __func__, n, inet_ntoa(ia));
+		ims->ims_st[1].ex += n;
+	} else if (lims->imsl_st[1] == MCAST_INCLUDE) {
+		CTR3(KTR_IGMPV3, "%s: t1 in += %d on %s",
+		    __func__, n, inet_ntoa(ia));
+		ims->ims_st[1].in += n;
+	}
+}
+
+/*
+ * Atomically update the global in_multi state, when a membership's
+ * filter list is being updated in any way.
+ *
+ * imf is the per-inpcb-membership group filter pointer.
+ * A fake imf may be passed for in-kernel consumers.
+ *
+ * XXX This is a candidate for a set-symmetric-difference style loop
+ * which would eliminate the repeated lookup from root of ims nodes,
+ * as they share the same key space.
+ *
+ * If any error occurred this function will back out of refcounts
+ * and return a non-zero value.
+ */
+static int
+inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
+{
+	struct ip_msource	*ims, *nims;
+	struct in_msource	*lims;
+	int			 schanged, error;
+	int			 nsrc0, nsrc1;
+
+	schanged = 0;
+	error = 0;
+	nsrc1 = nsrc0 = 0;
+
+	/*
+	 * Update the source filters first, as this may fail.
+	 * Maintain count of in-mode filters at t0, t1. These are
+	 * used to work out if we transition into ASM mode or not.
+	 * Maintain a count of source filters whose state was
+	 * actually modified by this operation.
+	 */
+	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
+		lims = (struct in_msource *)ims;
+		if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++;
+		if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++;
+		if (lims->imsl_st[0] == lims->imsl_st[1]) continue;
+		error = inm_get_source(inm, lims->ims_haddr, 0, &nims);
+		++schanged;
+		if (error)
+			break;
+		ims_merge(nims, lims, 0);
+	}
+	if (error) {
+		struct ip_msource *bims;
+
+		RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) {
+			lims = (struct in_msource *)ims;
+			if (lims->imsl_st[0] == lims->imsl_st[1])
+				continue;
+			(void)inm_get_source(inm, lims->ims_haddr, 1, &bims);
+			if (bims == NULL)
+				continue;
+			ims_merge(bims, lims, 1);
+		}
+		goto out_reap;
+	}
+
+	CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1",
+	    __func__, nsrc0, nsrc1);
+
+	/* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */
+	if (imf->imf_st[0] == imf->imf_st[1] &&
+	    imf->imf_st[1] == MCAST_INCLUDE) {
+		if (nsrc1 == 0) {
+			CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
+			--inm->inm_st[1].iss_in;
+		}
+	}
+
+	/* Handle filter mode transition on socket. */
+	if (imf->imf_st[0] != imf->imf_st[1]) {
+		CTR3(KTR_IGMPV3, "%s: imf transition %d to %d",
+		    __func__, imf->imf_st[0], imf->imf_st[1]);
+
+		if (imf->imf_st[0] == MCAST_EXCLUDE) {
+			CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__);
+			--inm->inm_st[1].iss_ex;
+		} else if (imf->imf_st[0] == MCAST_INCLUDE) {
+			CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
+			--inm->inm_st[1].iss_in;
+		}
+
+		if (imf->imf_st[1] == MCAST_EXCLUDE) {
+			CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__);
+			inm->inm_st[1].iss_ex++;
+		} else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) {
+			CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__);
+			inm->inm_st[1].iss_in++;
+		}
+	}
+
+	/*
+	 * Track inm filter state in terms of listener counts.
+	 * If there are any exclusive listeners, stack-wide
+	 * membership is exclusive.
+	 * Otherwise, if only inclusive listeners, stack-wide is inclusive.
+	 * If no listeners remain, state is undefined at t1,
+	 * and the IGMP lifecycle for this group should finish.
+	 */
+	if (inm->inm_st[1].iss_ex > 0) {
+		CTR1(KTR_IGMPV3, "%s: transition to EX", __func__);
+		inm->inm_st[1].iss_fmode = MCAST_EXCLUDE;
+	} else if (inm->inm_st[1].iss_in > 0) {
+		CTR1(KTR_IGMPV3, "%s: transition to IN", __func__);
+		inm->inm_st[1].iss_fmode = MCAST_INCLUDE;
+	} else {
+		CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__);
+		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
+	}
+
+	/* Decrement ASM listener count on transition out of ASM mode. */
+	if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
+		if ((imf->imf_st[1] != MCAST_EXCLUDE) ||
+		    (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0))
+			CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__);
+			--inm->inm_st[1].iss_asm;
+	}
+
+	/* Increment ASM listener count on transition to ASM mode. */
+	if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) {
+		CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__);
+		inm->inm_st[1].iss_asm++;
+	}
+
+	CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm);
+	inm_print(inm);
+
+out_reap:
+	if (schanged > 0) {
+		CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__);
+		inm_reap(inm);
+	}
+	return (error);
+}
+
+/*
+ * Mark an in_multi's filter set deltas as committed.
+ * Called by IGMP after a state change has been enqueued.
+ */
+void
+inm_commit(struct in_multi *inm)
+{
+	struct ip_msource	*ims;
+
+	CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm);
+	CTR1(KTR_IGMPV3, "%s: pre commit:", __func__);
+	inm_print(inm);
+
+	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
+		ims->ims_st[0] = ims->ims_st[1];
+	}
+	inm->inm_st[0] = inm->inm_st[1];
+}
+
+/*
+ * Reap unreferenced nodes from an in_multi's filter set.
+ */
+static void
+inm_reap(struct in_multi *inm)
+{
+	struct ip_msource	*ims, *tims;
+
+	RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
+		if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 ||
+		    ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 ||
+		    ims->ims_stp != 0)
+			continue;
+		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
+		RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
+		free(ims, M_IPMSOURCE);
+		inm->inm_nsrc--;
+	}
+}
+
+/*
+ * Purge all source nodes from an in_multi's filter set.
+ */
+static void
+inm_purge(struct in_multi *inm)
+{
+	struct ip_msource	*ims, *tims;
+
+	RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
+		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
+		RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
+		free(ims, M_IPMSOURCE);
+		inm->inm_nsrc--;
+	}
+}
+
+/*
+ * Join a multicast group; unlocked entry point.
+ *
+ * SMPng: XXX: in_joingroup() is called from in_control() when Giant
+ * is not held. Fortunately, ifp is unlikely to have been detached
+ * at this point, so we assume it's OK to recurse.
+ */
+int
+in_joingroup(struct ifnet *ifp, const struct in_addr *gina,
+    /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
+{
+	int error;
+
+	IN_MULTI_LOCK();
+	error = in_joingroup_locked(ifp, gina, imf, pinm);
+	IN_MULTI_UNLOCK();
+
+	return (error);
+}
+
+/*
+ * Join a multicast group; real entry point.
+ *
+ * Only preserves atomicity at inm level.
+ * NOTE: imf argument cannot be const due to sys/tree.h limitations.
+ *
+ * If the IGMP downcall fails, the group is not joined, and an error
+ * code is returned.
+ */
+int
+in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina,
+    /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
+{
+	struct in_mfilter	 timf;
+	struct in_multi		*inm;
+	int			 error;
+
+	IN_MULTI_LOCK_ASSERT();
+
+	CTR4(KTR_IGMPV3, "%s: join %s on %p(%s))", __func__,
+	    inet_ntoa(*gina), ifp, ifp->if_xname);
+
+	error = 0;
+	inm = NULL;
+
+	/*
+	 * If no imf was specified (i.e. kernel consumer),
+	 * fake one up and assume it is an ASM join.
+	 */
+	if (imf == NULL) {
+		imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE);
+		imf = &timf;
+	}
+
+	error = in_getmulti(ifp, gina, &inm);
+	if (error) {
+		CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__);
+		return (error);
+	}
+
+	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
+	error = inm_merge(inm, imf);
+	if (error) {
+		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
+		goto out_inm_release;
+	}
+
+	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
+	error = igmp_change_state(inm);
+	if (error) {
+		CTR1(KTR_IGMPV3, "%s: failed to update source", __func__);
+		goto out_inm_release;
+	}
+
+out_inm_release:
+	if (error) {
+		CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
+		inm_release_locked(inm);
+	} else {
+		*pinm = inm;
+	}
+
+	return (error);
+}
+
+/*
+ * Leave a multicast group; unlocked entry point.
+ */
+int
+in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
+{
+	struct ifnet *ifp;
+	int error;
+
+	ifp = inm->inm_ifp;
+
+	IN_MULTI_LOCK();
+	error = in_leavegroup_locked(inm, imf);
+	IN_MULTI_UNLOCK();
+
+	return (error);
+}
+
+/*
+ * Leave a multicast group; real entry point.
+ * All source filters will be expunged.
+ *
+ * Only preserves atomicity at inm level.
+ *
+ * Holding the write lock for the INP which contains imf
+ * is highly advisable. We can't assert for it as imf does not
+ * contain a back-pointer to the owning inp.
+ *
+ * Note: This is not the same as inm_release(*) as this function also
+ * makes a state change downcall into IGMP.
+ */
+int
+in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
+{
+	struct in_mfilter	 timf;
+	int			 error;
+
+	error = 0;
+
+	IN_MULTI_LOCK_ASSERT();
+
+	CTR5(KTR_IGMPV3, "%s: leave inm %p, %s/%s, imf %p", __func__,
+	    inm, inet_ntoa(inm->inm_addr),
+	    (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname),
+	    imf);
+
+	/*
+	 * If no imf was specified (i.e. kernel consumer),
+	 * fake one up and assume it is an ASM join.
+	 */
+	if (imf == NULL) {
+		imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED);
+		imf = &timf;
+	}
+
+	/*
+	 * Begin state merge transaction at IGMP layer.
+	 *
+	 * As this particular invocation should not cause any memory
+	 * to be allocated, and there is no opportunity to roll back
+	 * the transaction, it MUST NOT fail.
+	 */
+	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
+	error = inm_merge(inm, imf);
+	KASSERT(error == 0, ("%s: failed to merge inm state", __func__));
+
+	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
+	error = igmp_change_state(inm);
+	if (error)
+		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
+
+	CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
+	inm_release_locked(inm);
+
+	return (error);
+}
+
+/*#ifndef BURN_BRIDGES*/
+/*
+ * Join an IPv4 multicast group in (*,G) exclusive mode.
+ * The group must be a 224.0.0.0/24 link-scope group.
+ * This KPI is for legacy kernel consumers only.
+ */
+struct in_multi *
+in_addmulti(struct in_addr *ap, struct ifnet *ifp)
+{
+	struct in_multi *pinm;
+	int error;
+
+	KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)),
+	    ("%s: %s not in 224.0.0.0/24", __func__, inet_ntoa(*ap)));
+
+	error = in_joingroup(ifp, ap, NULL, &pinm);
+	if (error != 0)
+		pinm = NULL;
+
+	return (pinm);
+}
+
+/*
+ * Leave an IPv4 multicast group, assumed to be in exclusive (*,G) mode.
+ * This KPI is for legacy kernel consumers only.
+ */
+void
+in_delmulti(struct in_multi *inm)
+{
+
+	(void)in_leavegroup(inm, NULL);
+}
+/*#endif*/
+
+/*
+ * Block or unblock an ASM multicast source on an inpcb.
+ * This implements the delta-based API described in RFC 3678.
+ *
+ * The delta-based API applies only to exclusive-mode memberships.
+ * An IGMP downcall will be performed.
+ *
+ * SMPng: NOTE: Must take Giant as a join may create a new ifma.
+ *
+ * Return 0 if successful, otherwise return an appropriate error code.
+ */
+static int
+inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct group_source_req		 gsr;
+	sockunion_t			*gsa, *ssa;
+	struct ifnet			*ifp;
+	struct in_mfilter		*imf;
+	struct ip_moptions		*imo;
+	struct in_msource		*ims;
+	struct in_multi			*inm;
+	size_t				 idx;
+	uint16_t			 fmode;
+	int				 error, doblock;
+
+	ifp = NULL;
+	error = 0;
+	doblock = 0;
+
+	memset(&gsr, 0, sizeof(struct group_source_req));
+	gsa = (sockunion_t *)&gsr.gsr_group;
+	ssa = (sockunion_t *)&gsr.gsr_source;
+
+	switch (sopt->sopt_name) {
+	case IP_BLOCK_SOURCE:
+	case IP_UNBLOCK_SOURCE: {
+		struct ip_mreq_source	 mreqs;
+
+		error = sooptcopyin(sopt, &mreqs,
+		    sizeof(struct ip_mreq_source),
+		    sizeof(struct ip_mreq_source));
+		if (error)
+			return (error);
+
+		gsa->sin.sin_family = AF_INET;
+		gsa->sin.sin_len = sizeof(struct sockaddr_in);
+		gsa->sin.sin_addr = mreqs.imr_multiaddr;
+
+		ssa->sin.sin_family = AF_INET;
+		ssa->sin.sin_len = sizeof(struct sockaddr_in);
+		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
+
+		if (!in_nullhost(mreqs.imr_interface))
+			INADDR_TO_IFP(mreqs.imr_interface, ifp);
+
+		if (sopt->sopt_name == IP_BLOCK_SOURCE)
+			doblock = 1;
+
+		CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
+		    __func__, inet_ntoa(mreqs.imr_interface), ifp);
+		break;
+	    }
+
+	case MCAST_BLOCK_SOURCE:
+	case MCAST_UNBLOCK_SOURCE:
+		error = sooptcopyin(sopt, &gsr,
+		    sizeof(struct group_source_req),
+		    sizeof(struct group_source_req));
+		if (error)
+			return (error);
+
+		if (gsa->sin.sin_family != AF_INET ||
+		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
+			return (EINVAL);
+
+		if (ssa->sin.sin_family != AF_INET ||
+		    ssa->sin.sin_len != sizeof(struct sockaddr_in))
+			return (EINVAL);
+
+		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
+			return (EADDRNOTAVAIL);
+
+		ifp = ifnet_byindex(gsr.gsr_interface);
+
+		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
+			doblock = 1;
+		break;
+
+	default:
+		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
+		    __func__, sopt->sopt_name);
+		return (EOPNOTSUPP);
+		break;
+	}
+
+	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
+		return (EINVAL);
+
+	/*
+	 * Check if we are actually a member of this group.
+	 */
+	imo = inp_findmoptions(inp);
+	idx = imo_match_group(imo, ifp, &gsa->sa);
+	if (idx == -1 || imo->imo_mfilters == NULL) {
+		error = EADDRNOTAVAIL;
+		goto out_inp_locked;
+	}
+
+	KASSERT(imo->imo_mfilters != NULL,
+	    ("%s: imo_mfilters not allocated", __func__));
+	imf = &imo->imo_mfilters[idx];
+	inm = imo->imo_membership[idx];
+
+	/*
+	 * Attempting to use the delta-based API on an
+	 * non exclusive-mode membership is an error.
+	 */
+	fmode = imf->imf_st[0];
+	if (fmode != MCAST_EXCLUDE) {
+		error = EINVAL;
+		goto out_inp_locked;
+	}
+
+	/*
+	 * Deal with error cases up-front:
+	 *  Asked to block, but already blocked; or
+	 *  Asked to unblock, but nothing to unblock.
+	 * If adding a new block entry, allocate it.
+	 */
+	ims = imo_match_source(imo, idx, &ssa->sa);
+	if ((ims != NULL && doblock) || (ims == NULL && !doblock)) {
+		CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__,
+		    inet_ntoa(ssa->sin.sin_addr), doblock ? "" : "not ");
+		error = EADDRNOTAVAIL;
+		goto out_inp_locked;
+	}
+
+	INP_WLOCK_ASSERT(inp);
+
+	/*
+	 * Begin state merge transaction at socket layer.
+	 */
+	if (doblock) {
+		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
+		ims = imf_graft(imf, fmode, &ssa->sin);
+		if (ims == NULL)
+			error = ENOMEM;
+	} else {
+		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
+		error = imf_prune(imf, &ssa->sin);
+	}
+
+	if (error) {
+		CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__);
+		goto out_imf_rollback;
+	}
+
+	/*
+	 * Begin state merge transaction at IGMP layer.
+	 */
+	IN_MULTI_LOCK();
+
+	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
+	error = inm_merge(inm, imf);
+	if (error) {
+		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
+		goto out_imf_rollback;
+	}
+
+	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
+	error = igmp_change_state(inm);
+	if (error)
+		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
+
+	IN_MULTI_UNLOCK();
+
+out_imf_rollback:
+	if (error)
+		imf_rollback(imf);
+	else
+		imf_commit(imf);
+
+	imf_reap(imf);
+
+out_inp_locked:
+	INP_WUNLOCK(inp);
+	return (error);
+}
+
+/*
+ * Given an inpcb, return its multicast options structure pointer.  Accepts
+ * an unlocked inpcb pointer, but will return it locked.  May sleep.
+ *
+ * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
+ * SMPng: NOTE: Returns with the INP write lock held.
+ */
+static struct ip_moptions *
+inp_findmoptions(struct inpcb *inp)
+{
+	struct ip_moptions	 *imo;
+	struct in_multi		**immp;
+	struct in_mfilter	 *imfp;
+	size_t			  idx;
+
+	INP_WLOCK(inp);
+	if (inp->inp_moptions != NULL)
+		return (inp->inp_moptions);
+
+	INP_WUNLOCK(inp);
+
+	imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK);
+	immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS,
+	    M_WAITOK | M_ZERO);
+	imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS,
+	    M_INMFILTER, M_WAITOK);
+
+	imo->imo_multicast_ifp = NULL;
+	imo->imo_multicast_addr.s_addr = INADDR_ANY;
+	imo->imo_multicast_vif = -1;
+	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
+	imo->imo_multicast_loop = in_mcast_loop;
+	imo->imo_num_memberships = 0;
+	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
+	imo->imo_membership = immp;
+
+	/* Initialize per-group source filters. */
+	for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++)
+		imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE);
+	imo->imo_mfilters = imfp;
+
+	INP_WLOCK(inp);
+	if (inp->inp_moptions != NULL) {
+		free(imfp, M_INMFILTER);
+		free(immp, M_IPMOPTS);
+		free(imo, M_IPMOPTS);
+		return (inp->inp_moptions);
+	}
+	inp->inp_moptions = imo;
+	return (imo);
+}
+
+/*
+ * Discard the IP multicast options (and source filters).
+ *
+ * SMPng: NOTE: assumes INP write lock is held.
+ */
+void
+inp_freemoptions(struct ip_moptions *imo)
+{
+	struct in_mfilter	*imf;
+	size_t			 idx, nmships;
+
+	KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
+
+	nmships = imo->imo_num_memberships;
+	for (idx = 0; idx < nmships; ++idx) {
+		imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL;
+		if (imf)
+			imf_leave(imf);
+		(void)in_leavegroup(imo->imo_membership[idx], imf);
+		if (imf)
+			imf_purge(imf);
+	}
+
+	if (imo->imo_mfilters)
+		free(imo->imo_mfilters, M_INMFILTER);
+	free(imo->imo_membership, M_IPMOPTS);
+	free(imo, M_IPMOPTS);
+}
+
+/*
+ * Atomically get source filters on a socket for an IPv4 multicast group.
+ * Called with INP lock held; returns with lock released.
+ */
+static int
+inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct __msfilterreq	 msfr;
+	sockunion_t		*gsa;
+	struct ifnet		*ifp;
+	struct ip_moptions	*imo;
+	struct in_mfilter	*imf;
+	struct ip_msource	*ims;
+	struct in_msource	*lims;
+	struct sockaddr_in	*psin;
+	struct sockaddr_storage	*ptss;
+	struct sockaddr_storage	*tss;
+	int			 error;
+	size_t			 idx, nsrcs, ncsrcs;
+
+	INP_WLOCK_ASSERT(inp);
+
+	imo = inp->inp_moptions;
+	KASSERT(imo != NULL, ("%s: null ip_moptions", __func__));
+
+	INP_WUNLOCK(inp);
+
+	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
+	    sizeof(struct __msfilterreq));
+	if (error)
+		return (error);
+
+	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
+		return (EINVAL);
+
+	ifp = ifnet_byindex(msfr.msfr_ifindex);
+	if (ifp == NULL)
+		return (EINVAL);
+
+	INP_WLOCK(inp);
+
+	/*
+	 * Lookup group on the socket.
+	 */
+	gsa = (sockunion_t *)&msfr.msfr_group;
+	idx = imo_match_group(imo, ifp, &gsa->sa);
+	if (idx == -1 || imo->imo_mfilters == NULL) {
+		INP_WUNLOCK(inp);
+		return (EADDRNOTAVAIL);
+	}
+	imf = &imo->imo_mfilters[idx];
+
+	/*
+	 * Ignore memberships which are in limbo.
+	 */
+	if (imf->imf_st[1] == MCAST_UNDEFINED) {
+		INP_WUNLOCK(inp);
+		return (EAGAIN);
+	}
+	msfr.msfr_fmode = imf->imf_st[1];
+
+	/*
+	 * If the user specified a buffer, copy out the source filter
+	 * entries to userland gracefully.
+	 * We only copy out the number of entries which userland
+	 * has asked for, but we always tell userland how big the
+	 * buffer really needs to be.
+	 */
+	tss = NULL;
+	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
+		tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
+		    M_TEMP, M_NOWAIT | M_ZERO);
+		if (tss == NULL) {
+			INP_WUNLOCK(inp);
+			return (ENOBUFS);
+		}
+	}
+
+	/*
+	 * Count number of sources in-mode at t0.
+	 * If buffer space exists and remains, copy out source entries.
+	 */
+	nsrcs = msfr.msfr_nsrcs;
+	ncsrcs = 0;
+	ptss = tss;
+	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
+		lims = (struct in_msource *)ims;
+		if (lims->imsl_st[0] == MCAST_UNDEFINED ||
+		    lims->imsl_st[0] != imf->imf_st[0])
+			continue;
+		++ncsrcs;
+		if (tss != NULL && nsrcs > 0) {
+			psin = (struct sockaddr_in *)ptss;
+			psin->sin_family = AF_INET;
+			psin->sin_len = sizeof(struct sockaddr_in);
+			psin->sin_addr.s_addr = htonl(lims->ims_haddr);
+			psin->sin_port = 0;
+			++ptss;
+			--nsrcs;
+		}
+	}
+
+	INP_WUNLOCK(inp);
+
+	if (tss != NULL) {
+		error = copyout(tss, msfr.msfr_srcs,
+		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
+		free(tss, M_TEMP);
+		if (error)
+			return (error);
+	}
+
+	msfr.msfr_nsrcs = ncsrcs;
+	error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
+
+	return (error);
+}
+
+/*
+ * Return the IP multicast options in response to user getsockopt().
+ */
+int
+inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct ip_mreqn		 mreqn;
+	struct ip_moptions	*imo;
+	struct ifnet		*ifp;
+	struct in_ifaddr	*ia;
+	int			 error, optval;
+	u_char			 coptval;
+
+	INP_WLOCK(inp);
+	imo = inp->inp_moptions;
+	/*
+	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
+	 * or is a divert socket, reject it.
+	 */
+	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
+	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
+	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
+		INP_WUNLOCK(inp);
+		return (EOPNOTSUPP);
+	}
+
+	error = 0;
+	switch (sopt->sopt_name) {
+	case IP_MULTICAST_VIF:
+		if (imo != NULL)
+			optval = imo->imo_multicast_vif;
+		else
+			optval = -1;
+		INP_WUNLOCK(inp);
+		error = sooptcopyout(sopt, &optval, sizeof(int));
+		break;
+
+	case IP_MULTICAST_IF:
+		memset(&mreqn, 0, sizeof(struct ip_mreqn));
+		if (imo != NULL) {
+			ifp = imo->imo_multicast_ifp;
+			if (!in_nullhost(imo->imo_multicast_addr)) {
+				mreqn.imr_address = imo->imo_multicast_addr;
+			} else if (ifp != NULL) {
+				mreqn.imr_ifindex = ifp->if_index;
+				IFP_TO_IA(ifp, ia);
+				if (ia != NULL) {
+					mreqn.imr_address =
+					    IA_SIN(ia)->sin_addr;
+					ifa_free(&ia->ia_ifa);
+				}
+			}
+		}
+		INP_WUNLOCK(inp);
+		if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
+			error = sooptcopyout(sopt, &mreqn,
+			    sizeof(struct ip_mreqn));
+		} else {
+			error = sooptcopyout(sopt, &mreqn.imr_address,
+			    sizeof(struct in_addr));
+		}
+		break;
+
+	case IP_MULTICAST_TTL:
+		if (imo == 0)
+			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
+		else
+			optval = coptval = imo->imo_multicast_ttl;
+		INP_WUNLOCK(inp);
+		if (sopt->sopt_valsize == sizeof(u_char))
+			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
+		else
+			error = sooptcopyout(sopt, &optval, sizeof(int));
+		break;
+
+	case IP_MULTICAST_LOOP:
+		if (imo == 0)
+			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
+		else
+			optval = coptval = imo->imo_multicast_loop;
+		INP_WUNLOCK(inp);
+		if (sopt->sopt_valsize == sizeof(u_char))
+			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
+		else
+			error = sooptcopyout(sopt, &optval, sizeof(int));
+		break;
+
+	case IP_MSFILTER:
+		if (imo == NULL) {
+			error = EADDRNOTAVAIL;
+			INP_WUNLOCK(inp);
+		} else {
+			error = inp_get_source_filters(inp, sopt);
+		}
+		break;
+
+	default:
+		INP_WUNLOCK(inp);
+		error = ENOPROTOOPT;
+		break;
+	}
+
+	INP_UNLOCK_ASSERT(inp);
+
+	return (error);
+}
+
+/*
+ * Look up the ifnet to use for a multicast group membership,
+ * given the IPv4 address of an interface, and the IPv4 group address.
+ *
+ * This routine exists to support legacy multicast applications
+ * which do not understand that multicast memberships are scoped to
+ * specific physical links in the networking stack, or which need
+ * to join link-scope groups before IPv4 addresses are configured.
+ *
+ * If inp is non-NULL, use this socket's current FIB number for any
+ * required FIB lookup.
+ * If ina is INADDR_ANY, look up the group address in the unicast FIB,
+ * and use its ifp; usually, this points to the default next-hop.
+ *
+ * If the FIB lookup fails, attempt to use the first non-loopback
+ * interface with multicast capability in the system as a
+ * last resort. The legacy IPv4 ASM API requires that we do
+ * this in order to allow groups to be joined when the routing
+ * table has not yet been populated during boot.
+ *
+ * Returns NULL if no ifp could be found.
+ *
+ * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP.
+ * FUTURE: Implement IPv4 source-address selection.
+ */
+static struct ifnet *
+inp_lookup_mcast_ifp(const struct inpcb *inp,
+    const struct sockaddr_in *gsin, const struct in_addr ina)
+{
+	struct ifnet *ifp;
+
+	KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__));
+	KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)),
+	    ("%s: not multicast", __func__));
+
+	ifp = NULL;
+	if (!in_nullhost(ina)) {
+		INADDR_TO_IFP(ina, ifp);
+	} else {
+		struct route ro;
+
+		ro.ro_rt = NULL;
+		memcpy(&ro.ro_dst, gsin, sizeof(struct sockaddr_in));
+		in_rtalloc_ign(&ro, 0, inp ? inp->inp_inc.inc_fibnum : 0);
+		if (ro.ro_rt != NULL) {
+			ifp = ro.ro_rt->rt_ifp;
+			KASSERT(ifp != NULL, ("%s: null ifp", __func__));
+			RTFREE(ro.ro_rt);
+		} else {
+			struct in_ifaddr *ia;
+			struct ifnet *mifp;
+
+			mifp = NULL;
+			IN_IFADDR_RLOCK();
+			TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
+				mifp = ia->ia_ifp;
+				if (!(mifp->if_flags & IFF_LOOPBACK) &&
+				     (mifp->if_flags & IFF_MULTICAST)) {
+					ifp = mifp;
+					break;
+				}
+			}
+			IN_IFADDR_RUNLOCK();
+		}
+	}
+
+	return (ifp);
+}
+
+/*
+ * Join an IPv4 multicast group, possibly with a source.
+ */
+static int
+inp_join_group(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct group_source_req		 gsr;
+	sockunion_t			*gsa, *ssa;
+	struct ifnet			*ifp;
+	struct in_mfilter		*imf;
+	struct ip_moptions		*imo;
+	struct in_multi			*inm;
+	struct in_msource		*lims;
+	size_t				 idx;
+	int				 error, is_new;
+
+	ifp = NULL;
+	imf = NULL;
+	error = 0;
+	is_new = 0;
+
+	memset(&gsr, 0, sizeof(struct group_source_req));
+	gsa = (sockunion_t *)&gsr.gsr_group;
+	gsa->ss.ss_family = AF_UNSPEC;
+	ssa = (sockunion_t *)&gsr.gsr_source;
+	ssa->ss.ss_family = AF_UNSPEC;
+
+	switch (sopt->sopt_name) {
+	case IP_ADD_MEMBERSHIP:
+	case IP_ADD_SOURCE_MEMBERSHIP: {
+		struct ip_mreq_source	 mreqs;
+
+		if (sopt->sopt_name == IP_ADD_MEMBERSHIP) {
+			error = sooptcopyin(sopt, &mreqs,
+			    sizeof(struct ip_mreq),
+			    sizeof(struct ip_mreq));
+			/*
+			 * Do argument switcharoo from ip_mreq into
+			 * ip_mreq_source to avoid using two instances.
+			 */
+			mreqs.imr_interface = mreqs.imr_sourceaddr;
+			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
+		} else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
+			error = sooptcopyin(sopt, &mreqs,
+			    sizeof(struct ip_mreq_source),
+			    sizeof(struct ip_mreq_source));
+		}
+		if (error)
+			return (error);
+
+		gsa->sin.sin_family = AF_INET;
+		gsa->sin.sin_len = sizeof(struct sockaddr_in);
+		gsa->sin.sin_addr = mreqs.imr_multiaddr;
+
+		if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
+			ssa->sin.sin_family = AF_INET;
+			ssa->sin.sin_len = sizeof(struct sockaddr_in);
+			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
+		}
+
+		if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
+			return (EINVAL);
+
+		ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
+		    mreqs.imr_interface);
+		CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
+		    __func__, inet_ntoa(mreqs.imr_interface), ifp);
+		break;
+	}
+
+	case MCAST_JOIN_GROUP:
+	case MCAST_JOIN_SOURCE_GROUP:
+		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
+			error = sooptcopyin(sopt, &gsr,
+			    sizeof(struct group_req),
+			    sizeof(struct group_req));
+		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
+			error = sooptcopyin(sopt, &gsr,
+			    sizeof(struct group_source_req),
+			    sizeof(struct group_source_req));
+		}
+		if (error)
+			return (error);
+
+		if (gsa->sin.sin_family != AF_INET ||
+		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
+			return (EINVAL);
+
+		/*
+		 * Overwrite the port field if present, as the sockaddr
+		 * being copied in may be matched with a binary comparison.
+		 */
+		gsa->sin.sin_port = 0;
+		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
+			if (ssa->sin.sin_family != AF_INET ||
+			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
+				return (EINVAL);
+			ssa->sin.sin_port = 0;
+		}
+
+		if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
+			return (EINVAL);
+
+		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
+			return (EADDRNOTAVAIL);
+		ifp = ifnet_byindex(gsr.gsr_interface);
+		break;
+
+	default:
+		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
+		    __func__, sopt->sopt_name);
+		return (EOPNOTSUPP);
+		break;
+	}
+
+	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
+		return (EADDRNOTAVAIL);
+
+	imo = inp_findmoptions(inp);
+	idx = imo_match_group(imo, ifp, &gsa->sa);
+	if (idx == -1) {
+		is_new = 1;
+	} else {
+		inm = imo->imo_membership[idx];
+		imf = &imo->imo_mfilters[idx];
+		if (ssa->ss.ss_family != AF_UNSPEC) {
+			/*
+			 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership
+			 * is an error. On an existing inclusive membership,
+			 * it just adds the source to the filter list.
+			 */
+			if (imf->imf_st[1] != MCAST_INCLUDE) {
+				error = EINVAL;
+				goto out_inp_locked;
+			}
+			/* Throw out duplicates. */
+			lims = imo_match_source(imo, idx, &ssa->sa);
+			if (lims != NULL) {
+				error = EADDRNOTAVAIL;
+				goto out_inp_locked;
+			}
+		} else {
+			/*
+			 * MCAST_JOIN_GROUP on an existing inclusive
+			 * membership is an error; if you want to change
+			 * filter mode, you must use the userland API
+			 * setsourcefilter().
+			 */
+			if (imf->imf_st[1] == MCAST_INCLUDE) {
+				error = EINVAL;
+				goto out_inp_locked;
+			}
+			/*
+			 * MCAST_JOIN_GROUP on an existing exclusive
+			 * membership is an error; return EADDRINUSE
+			 * to preserve 4.4BSD API idempotence, and
+			 * avoid tedious detour to code below.
+			 * NOTE: This is bending RFC 3678 a bit.
+			 */
+			if (imf->imf_st[1] == MCAST_EXCLUDE) {
+				error = EADDRINUSE;
+				goto out_inp_locked;
+			}
+		}
+	}
+
+	/*
+	 * Begin state merge transaction at socket layer.
+	 */
+	INP_WLOCK_ASSERT(inp);
+
+	if (is_new) {
+		if (imo->imo_num_memberships == imo->imo_max_memberships) {
+			error = imo_grow(imo);
+			if (error)
+				goto out_inp_locked;
+		}
+		/*
+		 * Allocate the new slot upfront so we can deal with
+		 * grafting the new source filter in same code path
+		 * as for join-source on existing membership.
+		 */
+		idx = imo->imo_num_memberships;
+		imo->imo_membership[idx] = NULL;
+		imo->imo_num_memberships++;
+		KASSERT(imo->imo_mfilters != NULL,
+		    ("%s: imf_mfilters vector was not allocated", __func__));
+		imf = &imo->imo_mfilters[idx];
+		KASSERT(RB_EMPTY(&imf->imf_sources),
+		    ("%s: imf_sources not empty", __func__));
+	}
+
+	/*
+	 * Graft new source into filter list for this inpcb's
+	 * membership of the group. The in_multi may not have
+	 * been allocated yet if this is a new membership, however,
+	 * the in_mfilter slot will be allocated and must be initialized.
+	 */
+	if (ssa->ss.ss_family != AF_UNSPEC) {
+		/* Membership starts in IN mode */
+		if (is_new) {
+			CTR1(KTR_IGMPV3, "%s: new join w/source", __func__);
+			imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE);
+		} else {
+			CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
+		}
+		lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin);
+		if (lims == NULL) {
+			CTR1(KTR_IGMPV3, "%s: merge imf state failed",
+			    __func__);
+			error = ENOMEM;
+			goto out_imo_free;
+		}
+	} else {
+		/* No address specified; Membership starts in EX mode */
+		if (is_new) {
+			CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__);
+			imf_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE);
+		}
+	}
+
+	/*
+	 * Begin state merge transaction at IGMP layer.
+	 */
+	IN_MULTI_LOCK();
+
+	if (is_new) {
+		error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf,
+		    &inm);
+		if (error)
+			goto out_imo_free;
+		imo->imo_membership[idx] = inm;
+	} else {
+		CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
+		error = inm_merge(inm, imf);
+		if (error) {
+			CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
+			    __func__);
+			goto out_imf_rollback;
+		}
+		CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
+		error = igmp_change_state(inm);
+		if (error) {
+			CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
+			    __func__);
+			goto out_imf_rollback;
+		}
+	}
+
+	IN_MULTI_UNLOCK();
+
+out_imf_rollback:
+	INP_WLOCK_ASSERT(inp);
+	if (error) {
+		imf_rollback(imf);
+		if (is_new)
+			imf_purge(imf);
+		else
+			imf_reap(imf);
+	} else {
+		imf_commit(imf);
+	}
+
+out_imo_free:
+	if (error && is_new) {
+		imo->imo_membership[idx] = NULL;
+		--imo->imo_num_memberships;
+	}
+
+out_inp_locked:
+	INP_WUNLOCK(inp);
+	return (error);
+}
+
+/*
+ * Leave an IPv4 multicast group on an inpcb, possibly with a source.
+ */
+static int
+inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct group_source_req		 gsr;
+	struct ip_mreq_source		 mreqs;
+	sockunion_t			*gsa, *ssa;
+	struct ifnet			*ifp;
+	struct in_mfilter		*imf;
+	struct ip_moptions		*imo;
+	struct in_msource		*ims;
+	struct in_multi			*inm;
+	size_t				 idx;
+	int				 error, is_final;
+
+	ifp = NULL;
+	error = 0;
+	is_final = 1;
+
+	memset(&gsr, 0, sizeof(struct group_source_req));
+	gsa = (sockunion_t *)&gsr.gsr_group;
+	gsa->ss.ss_family = AF_UNSPEC;
+	ssa = (sockunion_t *)&gsr.gsr_source;
+	ssa->ss.ss_family = AF_UNSPEC;
+
+	switch (sopt->sopt_name) {
+	case IP_DROP_MEMBERSHIP:
+	case IP_DROP_SOURCE_MEMBERSHIP:
+		if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
+			error = sooptcopyin(sopt, &mreqs,
+			    sizeof(struct ip_mreq),
+			    sizeof(struct ip_mreq));
+			/*
+			 * Swap interface and sourceaddr arguments,
+			 * as ip_mreq and ip_mreq_source are laid
+			 * out differently.
+			 */
+			mreqs.imr_interface = mreqs.imr_sourceaddr;
+			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
+		} else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
+			error = sooptcopyin(sopt, &mreqs,
+			    sizeof(struct ip_mreq_source),
+			    sizeof(struct ip_mreq_source));
+		}
+		if (error)
+			return (error);
+
+		gsa->sin.sin_family = AF_INET;
+		gsa->sin.sin_len = sizeof(struct sockaddr_in);
+		gsa->sin.sin_addr = mreqs.imr_multiaddr;
+
+		if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
+			ssa->sin.sin_family = AF_INET;
+			ssa->sin.sin_len = sizeof(struct sockaddr_in);
+			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
+		}
+
+		/*
+		 * Attempt to look up hinted ifp from interface address.
+		 * Fallthrough with null ifp iff lookup fails, to
+		 * preserve 4.4BSD mcast API idempotence.
+		 * XXX NOTE WELL: The RFC 3678 API is preferred because
+		 * using an IPv4 address as a key is racy.
+		 */
+		if (!in_nullhost(mreqs.imr_interface))
+			INADDR_TO_IFP(mreqs.imr_interface, ifp);
+
+		CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
+		    __func__, inet_ntoa(mreqs.imr_interface), ifp);
+
+		break;
+
+	case MCAST_LEAVE_GROUP:
+	case MCAST_LEAVE_SOURCE_GROUP:
+		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
+			error = sooptcopyin(sopt, &gsr,
+			    sizeof(struct group_req),
+			    sizeof(struct group_req));
+		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
+			error = sooptcopyin(sopt, &gsr,
+			    sizeof(struct group_source_req),
+			    sizeof(struct group_source_req));
+		}
+		if (error)
+			return (error);
+
+		if (gsa->sin.sin_family != AF_INET ||
+		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
+			return (EINVAL);
+
+		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
+			if (ssa->sin.sin_family != AF_INET ||
+			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
+				return (EINVAL);
+		}
+
+		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
+			return (EADDRNOTAVAIL);
+
+		ifp = ifnet_byindex(gsr.gsr_interface);
+
+		if (ifp == NULL)
+			return (EADDRNOTAVAIL);
+		break;
+
+	default:
+		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
+		    __func__, sopt->sopt_name);
+		return (EOPNOTSUPP);
+		break;
+	}
+
+	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
+		return (EINVAL);
+
+	/*
+	 * Find the membership in the membership array.
+	 */
+	imo = inp_findmoptions(inp);
+	idx = imo_match_group(imo, ifp, &gsa->sa);
+	if (idx == -1) {
+		error = EADDRNOTAVAIL;
+		goto out_inp_locked;
+	}
+	inm = imo->imo_membership[idx];
+	imf = &imo->imo_mfilters[idx];
+
+	if (ssa->ss.ss_family != AF_UNSPEC)
+		is_final = 0;
+
+	/*
+	 * Begin state merge transaction at socket layer.
+	 */
+	INP_WLOCK_ASSERT(inp);
+
+	/*
+	 * If we were instructed only to leave a given source, do so.
+	 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships.
+	 */
+	if (is_final) {
+		imf_leave(imf);
+	} else {
+		if (imf->imf_st[0] == MCAST_EXCLUDE) {
+			error = EADDRNOTAVAIL;
+			goto out_inp_locked;
+		}
+		ims = imo_match_source(imo, idx, &ssa->sa);
+		if (ims == NULL) {
+			CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__,
+			    inet_ntoa(ssa->sin.sin_addr), "not ");
+			error = EADDRNOTAVAIL;
+			goto out_inp_locked;
+		}
+		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
+		error = imf_prune(imf, &ssa->sin);
+		if (error) {
+			CTR1(KTR_IGMPV3, "%s: merge imf state failed",
+			    __func__);
+			goto out_inp_locked;
+		}
+	}
+
+	/*
+	 * Begin state merge transaction at IGMP layer.
+	 */
+	IN_MULTI_LOCK();
+
+	if (is_final) {
+		/*
+		 * Give up the multicast address record to which
+		 * the membership points.
+		 */
+		(void)in_leavegroup_locked(inm, imf);
+	} else {
+		CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
+		error = inm_merge(inm, imf);
+		if (error) {
+			CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
+			    __func__);
+			goto out_imf_rollback;
+		}
+
+		CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
+		error = igmp_change_state(inm);
+		if (error) {
+			CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
+			    __func__);
+		}
+	}
+
+	IN_MULTI_UNLOCK();
+
+out_imf_rollback:
+	if (error)
+		imf_rollback(imf);
+	else
+		imf_commit(imf);
+
+	imf_reap(imf);
+
+	if (is_final) {
+		/* Remove the gap in the membership and filter array. */
+		for (++idx; idx < imo->imo_num_memberships; ++idx) {
+			imo->imo_membership[idx-1] = imo->imo_membership[idx];
+			imo->imo_mfilters[idx-1] = imo->imo_mfilters[idx];
+		}
+		imo->imo_num_memberships--;
+	}
+
+out_inp_locked:
+	INP_WUNLOCK(inp);
+	return (error);
+}
+
+/*
+ * Select the interface for transmitting IPv4 multicast datagrams.
+ *
+ * Either an instance of struct in_addr or an instance of struct ip_mreqn
+ * may be passed to this socket option. An address of INADDR_ANY or an
+ * interface index of 0 is used to remove a previous selection.
+ * When no interface is selected, one is chosen for every send.
+ */
+static int
+inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct in_addr		 addr;
+	struct ip_mreqn		 mreqn;
+	struct ifnet		*ifp;
+	struct ip_moptions	*imo;
+	int			 error;
+
+	if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
+		/*
+		 * An interface index was specified using the
+		 * Linux-derived ip_mreqn structure.
+		 */
+		error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
+		    sizeof(struct ip_mreqn));
+		if (error)
+			return (error);
+
+		if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex)
+			return (EINVAL);
+
+		if (mreqn.imr_ifindex == 0) {
+			ifp = NULL;
+		} else {
+			ifp = ifnet_byindex(mreqn.imr_ifindex);
+			if (ifp == NULL)
+				return (EADDRNOTAVAIL);
+		}
+	} else {
+		/*
+		 * An interface was specified by IPv4 address.
+		 * This is the traditional BSD usage.
+		 */
+		error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
+		    sizeof(struct in_addr));
+		if (error)
+			return (error);
+		if (in_nullhost(addr)) {
+			ifp = NULL;
+		} else {
+			INADDR_TO_IFP(addr, ifp);
+			if (ifp == NULL)
+				return (EADDRNOTAVAIL);
+		}
+		CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = %s", __func__, ifp,
+		    inet_ntoa(addr));
+	}
+
+	/* Reject interfaces which do not support multicast. */
+	if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
+		return (EOPNOTSUPP);
+
+	imo = inp_findmoptions(inp);
+	imo->imo_multicast_ifp = ifp;
+	imo->imo_multicast_addr.s_addr = INADDR_ANY;
+	INP_WUNLOCK(inp);
+
+	return (0);
+}
+
+/*
+ * Atomically set source filters on a socket for an IPv4 multicast group.
+ *
+ * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
+ */
+static int
+inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct __msfilterreq	 msfr;
+	sockunion_t		*gsa;
+	struct ifnet		*ifp;
+	struct in_mfilter	*imf;
+	struct ip_moptions	*imo;
+	struct in_multi		*inm;
+	size_t			 idx;
+	int			 error;
+
+	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
+	    sizeof(struct __msfilterreq));
+	if (error)
+		return (error);
+
+	if (msfr.msfr_nsrcs > in_mcast_maxsocksrc ||
+	    (msfr.msfr_fmode != MCAST_EXCLUDE &&
+	     msfr.msfr_fmode != MCAST_INCLUDE))
+		return (EINVAL);
+
+	if (msfr.msfr_group.ss_family != AF_INET ||
+	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
+		return (EINVAL);
+
+	gsa = (sockunion_t *)&msfr.msfr_group;
+	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
+		return (EINVAL);
+
+	gsa->sin.sin_port = 0;	/* ignore port */
+
+	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
+		return (EADDRNOTAVAIL);
+
+	ifp = ifnet_byindex(msfr.msfr_ifindex);
+	if (ifp == NULL)
+		return (EADDRNOTAVAIL);
+
+	/*
+	 * Take the INP write lock.
+	 * Check if this socket is a member of this group.
+	 */
+	imo = inp_findmoptions(inp);
+	idx = imo_match_group(imo, ifp, &gsa->sa);
+	if (idx == -1 || imo->imo_mfilters == NULL) {
+		error = EADDRNOTAVAIL;
+		goto out_inp_locked;
+	}
+	inm = imo->imo_membership[idx];
+	imf = &imo->imo_mfilters[idx];
+
+	/*
+	 * Begin state merge transaction at socket layer.
+	 */
+	INP_WLOCK_ASSERT(inp);
+
+	imf->imf_st[1] = msfr.msfr_fmode;
+
+	/*
+	 * Apply any new source filters, if present.
+	 * Make a copy of the user-space source vector so
+	 * that we may copy them with a single copyin. This
+	 * allows us to deal with page faults up-front.
+	 */
+	if (msfr.msfr_nsrcs > 0) {
+		struct in_msource	*lims;
+		struct sockaddr_in	*psin;
+		struct sockaddr_storage	*kss, *pkss;
+		int			 i;
+
+		INP_WUNLOCK(inp);
+ 
+		CTR2(KTR_IGMPV3, "%s: loading %lu source list entries",
+		    __func__, (unsigned long)msfr.msfr_nsrcs);
+		kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
+		    M_TEMP, M_WAITOK);
+		error = copyin(msfr.msfr_srcs, kss,
+		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
+		if (error) {
+			free(kss, M_TEMP);
+			return (error);
+		}
+
+		INP_WLOCK(inp);
+
+		/*
+		 * Mark all source filters as UNDEFINED at t1.
+		 * Restore new group filter mode, as imf_leave()
+		 * will set it to INCLUDE.
+		 */
+		imf_leave(imf);
+		imf->imf_st[1] = msfr.msfr_fmode;
+
+		/*
+		 * Update socket layer filters at t1, lazy-allocating
+		 * new entries. This saves a bunch of memory at the
+		 * cost of one RB_FIND() per source entry; duplicate
+		 * entries in the msfr_nsrcs vector are ignored.
+		 * If we encounter an error, rollback transaction.
+		 *
+		 * XXX This too could be replaced with a set-symmetric
+		 * difference like loop to avoid walking from root
+		 * every time, as the key space is common.
+		 */
+		for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) {
+			psin = (struct sockaddr_in *)pkss;
+			if (psin->sin_family != AF_INET) {
+				error = EAFNOSUPPORT;
+				break;
+			}
+			if (psin->sin_len != sizeof(struct sockaddr_in)) {
+				error = EINVAL;
+				break;
+			}
+			error = imf_get_source(imf, psin, &lims);
+			if (error)
+				break;
+			lims->imsl_st[1] = imf->imf_st[1];
+		}
+		free(kss, M_TEMP);
+	}
+
+	if (error)
+		goto out_imf_rollback;
+
+	INP_WLOCK_ASSERT(inp);
+	IN_MULTI_LOCK();
+
+	/*
+	 * Begin state merge transaction at IGMP layer.
+	 */
+	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
+	error = inm_merge(inm, imf);
+	if (error) {
+		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
+		goto out_imf_rollback;
+	}
+
+	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
+	error = igmp_change_state(inm);
+	if (error)
+		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
+
+	IN_MULTI_UNLOCK();
+
+out_imf_rollback:
+	if (error)
+		imf_rollback(imf);
+	else
+		imf_commit(imf);
+
+	imf_reap(imf);
+
+out_inp_locked:
+	INP_WUNLOCK(inp);
+	return (error);
+}
+
+/*
+ * Set the IP multicast options in response to user setsockopt().
+ *
+ * Many of the socket options handled in this function duplicate the
+ * functionality of socket options in the regular unicast API. However,
+ * it is not possible to merge the duplicate code, because the idempotence
+ * of the IPv4 multicast part of the BSD Sockets API must be preserved;
+ * the effects of these options must be treated as separate and distinct.
+ *
+ * SMPng: XXX: Unlocked read of inp_socket believed OK.
+ * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING
+ * is refactored to no longer use vifs.
+ */
+int
+inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct ip_moptions	*imo;
+	int			 error;
+
+	error = 0;
+
+	/*
+	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
+	 * or is a divert socket, reject it.
+	 */
+	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
+	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
+	     inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
+		return (EOPNOTSUPP);
+
+	switch (sopt->sopt_name) {
+	case IP_MULTICAST_VIF: {
+		int vifi;
+		/*
+		 * Select a multicast VIF for transmission.
+		 * Only useful if multicast forwarding is active.
+		 */
+		if (legal_vif_num == NULL) {
+			error = EOPNOTSUPP;
+			break;
+		}
+		error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
+		if (error)
+			break;
+		if (!legal_vif_num(vifi) && (vifi != -1)) {
+			error = EINVAL;
+			break;
+		}
+		imo = inp_findmoptions(inp);
+		imo->imo_multicast_vif = vifi;
+		INP_WUNLOCK(inp);
+		break;
+	}
+
+	case IP_MULTICAST_IF:
+		error = inp_set_multicast_if(inp, sopt);
+		break;
+
+	case IP_MULTICAST_TTL: {
+		u_char ttl;
+
+		/*
+		 * Set the IP time-to-live for outgoing multicast packets.
+		 * The original multicast API required a char argument,
+		 * which is inconsistent with the rest of the socket API.
+		 * We allow either a char or an int.
+		 */
+		if (sopt->sopt_valsize == sizeof(u_char)) {
+			error = sooptcopyin(sopt, &ttl, sizeof(u_char),
+			    sizeof(u_char));
+			if (error)
+				break;
+		} else {
+			u_int ittl;
+
+			error = sooptcopyin(sopt, &ittl, sizeof(u_int),
+			    sizeof(u_int));
+			if (error)
+				break;
+			if (ittl > 255) {
+				error = EINVAL;
+				break;
+			}
+			ttl = (u_char)ittl;
+		}
+		imo = inp_findmoptions(inp);
+		imo->imo_multicast_ttl = ttl;
+		INP_WUNLOCK(inp);
+		break;
+	}
+
+	case IP_MULTICAST_LOOP: {
+		u_char loop;
+
+		/*
+		 * Set the loopback flag for outgoing multicast packets.
+		 * Must be zero or one.  The original multicast API required a
+		 * char argument, which is inconsistent with the rest
+		 * of the socket API.  We allow either a char or an int.
+		 */
+		if (sopt->sopt_valsize == sizeof(u_char)) {
+			error = sooptcopyin(sopt, &loop, sizeof(u_char),
+			    sizeof(u_char));
+			if (error)
+				break;
+		} else {
+			u_int iloop;
+
+			error = sooptcopyin(sopt, &iloop, sizeof(u_int),
+					    sizeof(u_int));
+			if (error)
+				break;
+			loop = (u_char)iloop;
+		}
+		imo = inp_findmoptions(inp);
+		imo->imo_multicast_loop = !!loop;
+		INP_WUNLOCK(inp);
+		break;
+	}
+
+	case IP_ADD_MEMBERSHIP:
+	case IP_ADD_SOURCE_MEMBERSHIP:
+	case MCAST_JOIN_GROUP:
+	case MCAST_JOIN_SOURCE_GROUP:
+		error = inp_join_group(inp, sopt);
+		break;
+
+	case IP_DROP_MEMBERSHIP:
+	case IP_DROP_SOURCE_MEMBERSHIP:
+	case MCAST_LEAVE_GROUP:
+	case MCAST_LEAVE_SOURCE_GROUP:
+		error = inp_leave_group(inp, sopt);
+		break;
+
+	case IP_BLOCK_SOURCE:
+	case IP_UNBLOCK_SOURCE:
+	case MCAST_BLOCK_SOURCE:
+	case MCAST_UNBLOCK_SOURCE:
+		error = inp_block_unblock_source(inp, sopt);
+		break;
+
+	case IP_MSFILTER:
+		error = inp_set_source_filters(inp, sopt);
+		break;
+
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+
+	INP_UNLOCK_ASSERT(inp);
+
+	return (error);
+}
+
+/*
+ * Expose IGMP's multicast filter mode and source list(s) to userland,
+ * keyed by (ifindex, group).
+ * The filter mode is written out as a uint32_t, followed by
+ * 0..n of struct in_addr.
+ * For use by ifmcstat(8).
+ * SMPng: NOTE: unlocked read of ifindex space.
+ */
+static int
+sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS)
+{
+	struct in_addr			 src, group;
+	struct ifnet			*ifp;
+	struct ifmultiaddr		*ifma;
+	struct in_multi			*inm;
+	struct ip_msource		*ims;
+	int				*name;
+	int				 retval;
+	u_int				 namelen;
+	uint32_t			 fmode, ifindex;
+
+	name = (int *)arg1;
+	namelen = arg2;
+
+	if (req->newptr != NULL)
+		return (EPERM);
+
+	if (namelen != 2)
+		return (EINVAL);
+
+	ifindex = name[0];
+	if (ifindex <= 0 || ifindex > V_if_index) {
+		CTR2(KTR_IGMPV3, "%s: ifindex %u out of range",
+		    __func__, ifindex);
+		return (ENOENT);
+	}
+
+	group.s_addr = name[1];
+	if (!IN_MULTICAST(ntohl(group.s_addr))) {
+		CTR2(KTR_IGMPV3, "%s: group %s is not multicast",
+		    __func__, inet_ntoa(group));
+		return (EINVAL);
+	}
+
+	ifp = ifnet_byindex(ifindex);
+	if (ifp == NULL) {
+		CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u",
+		    __func__, ifindex);
+		return (ENOENT);
+	}
+
+	retval = sysctl_wire_old_buffer(req,
+	    sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr)));
+	if (retval)
+		return (retval);
+
+	IN_MULTI_LOCK();
+
+	IF_ADDR_LOCK(ifp);
+	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+		if (ifma->ifma_addr->sa_family != AF_INET ||
+		    ifma->ifma_protospec == NULL)
+			continue;
+		inm = (struct in_multi *)ifma->ifma_protospec;
+		if (!in_hosteq(inm->inm_addr, group))
+			continue;
+		fmode = inm->inm_st[1].iss_fmode;
+		retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t));
+		if (retval != 0)
+			break;
+		RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
+#ifdef KTR
+			struct in_addr ina;
+			ina.s_addr = htonl(ims->ims_haddr);
+			CTR2(KTR_IGMPV3, "%s: visit node %s", __func__,
+			    inet_ntoa(ina));
+#endif
+			/*
+			 * Only copy-out sources which are in-mode.
+			 */
+			if (fmode != ims_get_mode(inm, ims, 1)) {
+				CTR1(KTR_IGMPV3, "%s: skip non-in-mode",
+				    __func__);
+				continue;
+			}
+			src.s_addr = htonl(ims->ims_haddr);
+			retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr));
+			if (retval != 0)
+				break;
+		}
+	}
+	IF_ADDR_UNLOCK(ifp);
+
+	IN_MULTI_UNLOCK();
+
+	return (retval);
+}
+
+#ifdef KTR
+
+static const char *inm_modestrs[] = { "un", "in", "ex" };
+
+static const char *
+inm_mode_str(const int mode)
+{
+
+	if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE)
+		return (inm_modestrs[mode]);
+	return ("??");
+}
+
+static const char *inm_statestrs[] = {
+	"not-member",
+	"silent",
+	"idle",
+	"lazy",
+	"sleeping",
+	"awakening",
+	"query-pending",
+	"sg-query-pending",
+	"leaving"
+};
+
+static const char *
+inm_state_str(const int state)
+{
+
+	if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER)
+		return (inm_statestrs[state]);
+	return ("??");
+}
+
+/*
+ * Dump an in_multi structure to the console.
+ */
+void
+inm_print(const struct in_multi *inm)
+{
+	int t;
+
+	if ((ktr_mask & KTR_IGMPV3) == 0)
+		return;
+
+	printf("%s: --- begin inm %p ---\n", __func__, inm);
+	printf("addr %s ifp %p(%s) ifma %p\n",
+	    inet_ntoa(inm->inm_addr),
+	    inm->inm_ifp,
+	    inm->inm_ifp->if_xname,
+	    inm->inm_ifma);
+	printf("timer %u state %s refcount %u scq.len %u\n",
+	    inm->inm_timer,
+	    inm_state_str(inm->inm_state),
+	    inm->inm_refcount,
+	    inm->inm_scq.ifq_len);
+	printf("igi %p nsrc %lu sctimer %u scrv %u\n",
+	    inm->inm_igi,
+	    inm->inm_nsrc,
+	    inm->inm_sctimer,
+	    inm->inm_scrv);
+	for (t = 0; t < 2; t++) {
+		printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t,
+		    inm_mode_str(inm->inm_st[t].iss_fmode),
+		    inm->inm_st[t].iss_asm,
+		    inm->inm_st[t].iss_ex,
+		    inm->inm_st[t].iss_in,
+		    inm->inm_st[t].iss_rec);
+	}
+	printf("%s: --- end inm %p ---\n", __func__, inm);
+}
+
+#else /* !KTR */
+
+void
+inm_print(const struct in_multi *inm)
+{
+
+}
+
+#endif /* KTR */
+
+RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp);
diff --git a/freebsd/sys/netinet/in_pcb.c b/freebsd/sys/netinet/in_pcb.c
new file mode 100644
index 00000000..186a0f0a
--- /dev/null
+++ b/freebsd/sys/netinet/in_pcb.c
@@ -0,0 +1,1958 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993, 1995
+ *	The Regents of the University of California.
+ * Copyright (c) 2007-2009 Robert N. M. Watson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in_pcb.c	8.4 (Berkeley) 5/24/95
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_ddb.h>
+#include <freebsd/local/opt_ipsec.h>
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/domain.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/jail.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/sysctl.h>
+
+#ifdef DDB
+#include <freebsd/ddb/ddb.h>
+#endif
+
+#include <freebsd/vm/uma.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/tcp_var.h>
+#include <freebsd/netinet/udp.h>
+#include <freebsd/netinet/udp_var.h>
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#include <freebsd/netinet6/ip6_var.h>
+#endif /* INET6 */
+
+
+#ifdef IPSEC
+#include <freebsd/netipsec/ipsec.h>
+#include <freebsd/netipsec/key.h>
+#endif /* IPSEC */
+
+#include <freebsd/security/mac/mac_framework.h>
+
+/*
+ * These configure the range of local port addresses assigned to
+ * "unspecified" outgoing connections/packets/whatever.
+ */
+VNET_DEFINE(int, ipport_lowfirstauto) = IPPORT_RESERVED - 1;	/* 1023 */
+VNET_DEFINE(int, ipport_lowlastauto) = IPPORT_RESERVEDSTART;	/* 600 */
+VNET_DEFINE(int, ipport_firstauto) = IPPORT_EPHEMERALFIRST;	/* 10000 */
+VNET_DEFINE(int, ipport_lastauto) = IPPORT_EPHEMERALLAST;	/* 65535 */
+VNET_DEFINE(int, ipport_hifirstauto) = IPPORT_HIFIRSTAUTO;	/* 49152 */
+VNET_DEFINE(int, ipport_hilastauto) = IPPORT_HILASTAUTO;	/* 65535 */
+
+/*
+ * Reserved ports accessible only to root. There are significant
+ * security considerations that must be accounted for when changing these,
+ * but the security benefits can be great. Please be careful.
+ */
+VNET_DEFINE(int, ipport_reservedhigh) = IPPORT_RESERVED - 1;	/* 1023 */
+VNET_DEFINE(int, ipport_reservedlow);
+
+/* Variables dealing with random ephemeral port allocation. */
+VNET_DEFINE(int, ipport_randomized) = 1;	/* user controlled via sysctl */
+VNET_DEFINE(int, ipport_randomcps) = 10;	/* user controlled via sysctl */
+VNET_DEFINE(int, ipport_randomtime) = 45;	/* user controlled via sysctl */
+VNET_DEFINE(int, ipport_stoprandom);		/* toggled by ipport_tick */
+VNET_DEFINE(int, ipport_tcpallocs);
+static VNET_DEFINE(int, ipport_tcplastcount);
+
+#define	V_ipport_tcplastcount		VNET(ipport_tcplastcount)
+
+#define RANGECHK(var, min, max) \
+	if ((var) < (min)) { (var) = (min); } \
+	else if ((var) > (max)) { (var) = (max); }
+
+static void	in_pcbremlists(struct inpcb *inp);
+
+static int
+sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+
+#ifdef VIMAGE
+	error = vnet_sysctl_handle_int(oidp, arg1, arg2, req);
+#else
+	error = sysctl_handle_int(oidp, arg1, arg2, req);
+#endif
+	if (error == 0) {
+		RANGECHK(V_ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
+		RANGECHK(V_ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
+		RANGECHK(V_ipport_firstauto, IPPORT_RESERVED, IPPORT_MAX);
+		RANGECHK(V_ipport_lastauto, IPPORT_RESERVED, IPPORT_MAX);
+		RANGECHK(V_ipport_hifirstauto, IPPORT_RESERVED, IPPORT_MAX);
+		RANGECHK(V_ipport_hilastauto, IPPORT_RESERVED, IPPORT_MAX);
+	}
+	return (error);
+}
+
+#undef RANGECHK
+
+SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
+
+SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
+	CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_lowfirstauto), 0,
+	&sysctl_net_ipport_check, "I", "");
+SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
+	CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_lowlastauto), 0,
+	&sysctl_net_ipport_check, "I", "");
+SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, first,
+	CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_firstauto), 0,
+	&sysctl_net_ipport_check, "I", "");
+SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, last,
+	CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_lastauto), 0,
+	&sysctl_net_ipport_check, "I", "");
+SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
+	CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_hifirstauto), 0,
+	&sysctl_net_ipport_check, "I", "");
+SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
+	CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_hilastauto), 0,
+	&sysctl_net_ipport_check, "I", "");
+SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh,
+	CTLFLAG_RW|CTLFLAG_SECURE, &VNET_NAME(ipport_reservedhigh), 0, "");
+SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow,
+	CTLFLAG_RW|CTLFLAG_SECURE, &VNET_NAME(ipport_reservedlow), 0, "");
+SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomized, CTLFLAG_RW,
+	&VNET_NAME(ipport_randomized), 0, "Enable random port allocation");
+SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomcps, CTLFLAG_RW,
+	&VNET_NAME(ipport_randomcps), 0, "Maximum number of random port "
+	"allocations before switching to a sequental one");
+SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, CTLFLAG_RW,
+	&VNET_NAME(ipport_randomtime), 0,
+	"Minimum time to keep sequental port "
+	"allocation before switching to a random one");
+
+/*
+ * in_pcb.c: manage the Protocol Control Blocks.
+ *
+ * NOTE: It is assumed that most of these functions will be called with
+ * the pcbinfo lock held, and often, the inpcb lock held, as these utility
+ * functions often modify hash chains or addresses in pcbs.
+ */
+
+/*
+ * Allocate a PCB and associate it with the socket.
+ * On success return with the PCB locked.
+ */
+int
+in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
+{
+	struct inpcb *inp;
+	int error;
+
+	INP_INFO_WLOCK_ASSERT(pcbinfo);
+	error = 0;
+	inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
+	if (inp == NULL)
+		return (ENOBUFS);
+	bzero(inp, inp_zero_size);
+	inp->inp_pcbinfo = pcbinfo;
+	inp->inp_socket = so;
+	inp->inp_cred = crhold(so->so_cred);
+	inp->inp_inc.inc_fibnum = so->so_fibnum;
+#ifdef MAC
+	error = mac_inpcb_init(inp, M_NOWAIT);
+	if (error != 0)
+		goto out;
+	mac_inpcb_create(so, inp);
+#endif
+#ifdef IPSEC
+	error = ipsec_init_policy(so, &inp->inp_sp);
+	if (error != 0) {
+#ifdef MAC
+		mac_inpcb_destroy(inp);
+#endif
+		goto out;
+	}
+#endif /*IPSEC*/
+#ifdef INET6
+	if (INP_SOCKAF(so) == AF_INET6) {
+		inp->inp_vflag |= INP_IPV6PROTO;
+		if (V_ip6_v6only)
+			inp->inp_flags |= IN6P_IPV6_V6ONLY;
+	}
+#endif
+	LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
+	pcbinfo->ipi_count++;
+	so->so_pcb = (caddr_t)inp;
+#ifdef INET6
+	if (V_ip6_auto_flowlabel)
+		inp->inp_flags |= IN6P_AUTOFLOWLABEL;
+#endif
+	INP_WLOCK(inp);
+	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
+	inp->inp_refcount = 1;	/* Reference from the inpcbinfo */
+#if defined(IPSEC) || defined(MAC)
+out:
+	if (error != 0) {
+		crfree(inp->inp_cred);
+		uma_zfree(pcbinfo->ipi_zone, inp);
+	}
+#endif
+	return (error);
+}
+
+int
+in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
+{
+	int anonport, error;
+
+	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
+	INP_WLOCK_ASSERT(inp);
+
+	if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
+		return (EINVAL);
+	anonport = inp->inp_lport == 0 && (nam == NULL ||
+	    ((struct sockaddr_in *)nam)->sin_port == 0);
+	error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr,
+	    &inp->inp_lport, cred);
+	if (error)
+		return (error);
+	if (in_pcbinshash(inp) != 0) {
+		inp->inp_laddr.s_addr = INADDR_ANY;
+		inp->inp_lport = 0;
+		return (EAGAIN);
+	}
+	if (anonport)
+		inp->inp_flags |= INP_ANONPORT;
+	return (0);
+}
+
+/*
+ * Set up a bind operation on a PCB, performing port allocation
+ * as required, but do not actually modify the PCB. Callers can
+ * either complete the bind by setting inp_laddr/inp_lport and
+ * calling in_pcbinshash(), or they can just use the resulting
+ * port and address to authorise the sending of a once-off packet.
+ *
+ * On error, the values of *laddrp and *lportp are not changed.
+ */
+int
+in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
+    u_short *lportp, struct ucred *cred)
+{
+	struct socket *so = inp->inp_socket;
+	unsigned short *lastport;
+	struct sockaddr_in *sin;
+	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
+	struct in_addr laddr;
+	u_short lport = 0;
+	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
+	int error;
+	int dorandom;
+
+	/*
+	 * Because no actual state changes occur here, a global write lock on
+	 * the pcbinfo isn't required.
+	 */
+	INP_INFO_LOCK_ASSERT(pcbinfo);
+	INP_LOCK_ASSERT(inp);
+
+	if (TAILQ_EMPTY(&V_in_ifaddrhead)) /* XXX broken! */
+		return (EADDRNOTAVAIL);
+	laddr.s_addr = *laddrp;
+	if (nam != NULL && laddr.s_addr != INADDR_ANY)
+		return (EINVAL);
+	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
+		wild = INPLOOKUP_WILDCARD;
+	if (nam == NULL) {
+		if ((error = prison_local_ip4(cred, &laddr)) != 0)
+			return (error);
+	} else {
+		sin = (struct sockaddr_in *)nam;
+		if (nam->sa_len != sizeof (*sin))
+			return (EINVAL);
+#ifdef notdef
+		/*
+		 * We should check the family, but old programs
+		 * incorrectly fail to initialize it.
+		 */
+		if (sin->sin_family != AF_INET)
+			return (EAFNOSUPPORT);
+#endif
+		error = prison_local_ip4(cred, &sin->sin_addr);
+		if (error)
+			return (error);
+		if (sin->sin_port != *lportp) {
+			/* Don't allow the port to change. */
+			if (*lportp != 0)
+				return (EINVAL);
+			lport = sin->sin_port;
+		}
+		/* NB: lport is left as 0 if the port isn't being changed. */
+		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
+			/*
+			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
+			 * allow complete duplication of binding if
+			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
+			 * and a multicast address is bound on both
+			 * new and duplicated sockets.
+			 */
+			if (so->so_options & SO_REUSEADDR)
+				reuseport = SO_REUSEADDR|SO_REUSEPORT;
+		} else if (sin->sin_addr.s_addr != INADDR_ANY) {
+			sin->sin_port = 0;		/* yech... */
+			bzero(&sin->sin_zero, sizeof(sin->sin_zero));
+			/*
+			 * Is the address a local IP address?
+			 * If INP_BINDANY is set, then the socket may be bound
+			 * to any endpoint address, local or not.
+			 */
+			if ((inp->inp_flags & INP_BINDANY) == 0 &&
+			    ifa_ifwithaddr_check((struct sockaddr *)sin) == 0)
+				return (EADDRNOTAVAIL);
+		}
+		laddr = sin->sin_addr;
+		if (lport) {
+			struct inpcb *t;
+			struct tcptw *tw;
+
+			/* GROSS */
+			if (ntohs(lport) <= V_ipport_reservedhigh &&
+			    ntohs(lport) >= V_ipport_reservedlow &&
+			    priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT,
+			    0))
+				return (EACCES);
+			if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
+			    priv_check_cred(inp->inp_cred,
+			    PRIV_NETINET_REUSEPORT, 0) != 0) {
+				t = in_pcblookup_local(pcbinfo, sin->sin_addr,
+				    lport, INPLOOKUP_WILDCARD, cred);
+	/*
+	 * XXX
+	 * This entire block sorely needs a rewrite.
+	 */
+				if (t &&
+				    ((t->inp_flags & INP_TIMEWAIT) == 0) &&
+				    (so->so_type != SOCK_STREAM ||
+				     ntohl(t->inp_faddr.s_addr) == INADDR_ANY) &&
+				    (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
+				     ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
+				     (t->inp_socket->so_options &
+					 SO_REUSEPORT) == 0) &&
+				    (inp->inp_cred->cr_uid !=
+				     t->inp_cred->cr_uid))
+					return (EADDRINUSE);
+			}
+			t = in_pcblookup_local(pcbinfo, sin->sin_addr,
+			    lport, wild, cred);
+			if (t && (t->inp_flags & INP_TIMEWAIT)) {
+				/*
+				 * XXXRW: If an incpb has had its timewait
+				 * state recycled, we treat the address as
+				 * being in use (for now).  This is better
+				 * than a panic, but not desirable.
+				 */
+				tw = intotw(inp);
+				if (tw == NULL ||
+				    (reuseport & tw->tw_so_options) == 0)
+					return (EADDRINUSE);
+			} else if (t &&
+			    (reuseport & t->inp_socket->so_options) == 0) {
+#ifdef INET6
+				if (ntohl(sin->sin_addr.s_addr) !=
+				    INADDR_ANY ||
+				    ntohl(t->inp_laddr.s_addr) !=
+				    INADDR_ANY ||
+				    INP_SOCKAF(so) ==
+				    INP_SOCKAF(t->inp_socket))
+#endif
+				return (EADDRINUSE);
+			}
+		}
+	}
+	if (*lportp != 0)
+		lport = *lportp;
+	if (lport == 0) {
+		u_short first, last, aux;
+		int count;
+
+		if (inp->inp_flags & INP_HIGHPORT) {
+			first = V_ipport_hifirstauto;	/* sysctl */
+			last  = V_ipport_hilastauto;
+			lastport = &pcbinfo->ipi_lasthi;
+		} else if (inp->inp_flags & INP_LOWPORT) {
+			error = priv_check_cred(cred,
+			    PRIV_NETINET_RESERVEDPORT, 0);
+			if (error)
+				return error;
+			first = V_ipport_lowfirstauto;	/* 1023 */
+			last  = V_ipport_lowlastauto;	/* 600 */
+			lastport = &pcbinfo->ipi_lastlow;
+		} else {
+			first = V_ipport_firstauto;	/* sysctl */
+			last  = V_ipport_lastauto;
+			lastport = &pcbinfo->ipi_lastport;
+		}
+		/*
+		 * For UDP, use random port allocation as long as the user
+		 * allows it.  For TCP (and as of yet unknown) connections,
+		 * use random port allocation only if the user allows it AND
+		 * ipport_tick() allows it.
+		 */
+		if (V_ipport_randomized &&
+			(!V_ipport_stoprandom || pcbinfo == &V_udbinfo))
+			dorandom = 1;
+		else
+			dorandom = 0;
+		/*
+		 * It makes no sense to do random port allocation if
+		 * we have the only port available.
+		 */
+		if (first == last)
+			dorandom = 0;
+		/* Make sure to not include UDP packets in the count. */
+		if (pcbinfo != &V_udbinfo)
+			V_ipport_tcpallocs++;
+		/*
+		 * Instead of having two loops further down counting up or down
+		 * make sure that first is always <= last and go with only one
+		 * code path implementing all logic.
+		 */
+		if (first > last) {
+			aux = first;
+			first = last;
+			last = aux;
+		}
+
+		if (dorandom)
+			*lastport = first +
+				    (arc4random() % (last - first));
+
+		count = last - first;
+
+		do {
+			if (count-- < 0)	/* completely used? */
+				return (EADDRNOTAVAIL);
+			++*lastport;
+			if (*lastport < first || *lastport > last)
+				*lastport = first;
+			lport = htons(*lastport);
+		} while (in_pcblookup_local(pcbinfo, laddr,
+		    lport, wild, cred));
+	}
+	*laddrp = laddr.s_addr;
+	*lportp = lport;
+	return (0);
+}
+
+/*
+ * Connect from a socket to a specified address.
+ * Both address and port must be specified in argument sin.
+ * If don't have a local address for this socket yet,
+ * then pick one.
+ */
+int
+in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
+{
+	u_short lport, fport;
+	in_addr_t laddr, faddr;
+	int anonport, error;
+
+	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
+	INP_WLOCK_ASSERT(inp);
+
+	lport = inp->inp_lport;
+	laddr = inp->inp_laddr.s_addr;
+	anonport = (lport == 0);
+	error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport,
+	    NULL, cred);
+	if (error)
+		return (error);
+
+	/* Do the initial binding of the local address if required. */
+	if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) {
+		inp->inp_lport = lport;
+		inp->inp_laddr.s_addr = laddr;
+		if (in_pcbinshash(inp) != 0) {
+			inp->inp_laddr.s_addr = INADDR_ANY;
+			inp->inp_lport = 0;
+			return (EAGAIN);
+		}
+	}
+
+	/* Commit the remaining changes. */
+	inp->inp_lport = lport;
+	inp->inp_laddr.s_addr = laddr;
+	inp->inp_faddr.s_addr = faddr;
+	inp->inp_fport = fport;
+	in_pcbrehash(inp);
+
+	if (anonport)
+		inp->inp_flags |= INP_ANONPORT;
+	return (0);
+}
+
+/*
+ * Do proper source address selection on an unbound socket in case
+ * of connect. Take jails into account as well.
+ */
+static int
+in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
+    struct ucred *cred)
+{
+	struct ifaddr *ifa;
+	struct sockaddr *sa;
+	struct sockaddr_in *sin;
+	struct route sro;
+	int error;
+
+	KASSERT(laddr != NULL, ("%s: laddr NULL", __func__));
+
+	/*
+	 * Bypass source address selection and use the primary jail IP
+	 * if requested.
+	 */
+	if (cred != NULL && !prison_saddrsel_ip4(cred, laddr))
+		return (0);
+
+	error = 0;
+	bzero(&sro, sizeof(sro));
+
+	sin = (struct sockaddr_in *)&sro.ro_dst;
+	sin->sin_family = AF_INET;
+	sin->sin_len = sizeof(struct sockaddr_in);
+	sin->sin_addr.s_addr = faddr->s_addr;
+
+	/*
+	 * If route is known our src addr is taken from the i/f,
+	 * else punt.
+	 *
+	 * Find out route to destination.
+	 */
+	if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0)
+		in_rtalloc_ign(&sro, 0, inp->inp_inc.inc_fibnum);
+
+	/*
+	 * If we found a route, use the address corresponding to
+	 * the outgoing interface.
+	 *
+	 * Otherwise assume faddr is reachable on a directly connected
+	 * network and try to find a corresponding interface to take
+	 * the source address from.
+	 */
+	if (sro.ro_rt == NULL || sro.ro_rt->rt_ifp == NULL) {
+		struct in_ifaddr *ia;
+		struct ifnet *ifp;
+
+		ia = ifatoia(ifa_ifwithdstaddr((struct sockaddr *)sin));
+		if (ia == NULL)
+			ia = ifatoia(ifa_ifwithnet((struct sockaddr *)sin, 0));
+		if (ia == NULL) {
+			error = ENETUNREACH;
+			goto done;
+		}
+
+		if (cred == NULL || !prison_flag(cred, PR_IP4)) {
+			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
+			ifa_free(&ia->ia_ifa);
+			goto done;
+		}
+
+		ifp = ia->ia_ifp;
+		ifa_free(&ia->ia_ifa);
+		ia = NULL;
+		IF_ADDR_LOCK(ifp);
+		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+
+			sa = ifa->ifa_addr;
+			if (sa->sa_family != AF_INET)
+				continue;
+			sin = (struct sockaddr_in *)sa;
+			if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
+				ia = (struct in_ifaddr *)ifa;
+				break;
+			}
+		}
+		if (ia != NULL) {
+			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
+			IF_ADDR_UNLOCK(ifp);
+			goto done;
+		}
+		IF_ADDR_UNLOCK(ifp);
+
+		/* 3. As a last resort return the 'default' jail address. */
+		error = prison_get_ip4(cred, laddr);
+		goto done;
+	}
+
+	/*
+	 * If the outgoing interface on the route found is not
+	 * a loopback interface, use the address from that interface.
+	 * In case of jails do those three steps:
+	 * 1. check if the interface address belongs to the jail. If so use it.
+	 * 2. check if we have any address on the outgoing interface
+	 *    belonging to this jail. If so use it.
+	 * 3. as a last resort return the 'default' jail address.
+	 */
+	if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) {
+		struct in_ifaddr *ia;
+		struct ifnet *ifp;
+
+		/* If not jailed, use the default returned. */
+		if (cred == NULL || !prison_flag(cred, PR_IP4)) {
+			ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa;
+			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
+			goto done;
+		}
+
+		/* Jailed. */
+		/* 1. Check if the iface address belongs to the jail. */
+		sin = (struct sockaddr_in *)sro.ro_rt->rt_ifa->ifa_addr;
+		if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
+			ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa;
+			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
+			goto done;
+		}
+
+		/*
+		 * 2. Check if we have any address on the outgoing interface
+		 *    belonging to this jail.
+		 */
+		ia = NULL;
+		ifp = sro.ro_rt->rt_ifp;
+		IF_ADDR_LOCK(ifp);
+		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+			sa = ifa->ifa_addr;
+			if (sa->sa_family != AF_INET)
+				continue;
+			sin = (struct sockaddr_in *)sa;
+			if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
+				ia = (struct in_ifaddr *)ifa;
+				break;
+			}
+		}
+		if (ia != NULL) {
+			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
+			IF_ADDR_UNLOCK(ifp);
+			goto done;
+		}
+		IF_ADDR_UNLOCK(ifp);
+
+		/* 3. As a last resort return the 'default' jail address. */
+		error = prison_get_ip4(cred, laddr);
+		goto done;
+	}
+
+	/*
+	 * The outgoing interface is marked with 'loopback net', so a route
+	 * to ourselves is here.
+	 * Try to find the interface of the destination address and then
+	 * take the address from there. That interface is not necessarily
+	 * a loopback interface.
+	 * In case of jails, check that it is an address of the jail
+	 * and if we cannot find, fall back to the 'default' jail address.
+	 */
+	if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) {
+		struct sockaddr_in sain;
+		struct in_ifaddr *ia;
+
+		bzero(&sain, sizeof(struct sockaddr_in));
+		sain.sin_family = AF_INET;
+		sain.sin_len = sizeof(struct sockaddr_in);
+		sain.sin_addr.s_addr = faddr->s_addr;
+
+		ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sain)));
+		if (ia == NULL)
+			ia = ifatoia(ifa_ifwithnet(sintosa(&sain), 0));
+		if (ia == NULL)
+			ia = ifatoia(ifa_ifwithaddr(sintosa(&sain)));
+
+		if (cred == NULL || !prison_flag(cred, PR_IP4)) {
+			if (ia == NULL) {
+				error = ENETUNREACH;
+				goto done;
+			}
+			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
+			ifa_free(&ia->ia_ifa);
+			goto done;
+		}
+
+		/* Jailed. */
+		if (ia != NULL) {
+			struct ifnet *ifp;
+
+			ifp = ia->ia_ifp;
+			ifa_free(&ia->ia_ifa);
+			ia = NULL;
+			IF_ADDR_LOCK(ifp);
+			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+
+				sa = ifa->ifa_addr;
+				if (sa->sa_family != AF_INET)
+					continue;
+				sin = (struct sockaddr_in *)sa;
+				if (prison_check_ip4(cred,
+				    &sin->sin_addr) == 0) {
+					ia = (struct in_ifaddr *)ifa;
+					break;
+				}
+			}
+			if (ia != NULL) {
+				laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
+				IF_ADDR_UNLOCK(ifp);
+				goto done;
+			}
+			IF_ADDR_UNLOCK(ifp);
+		}
+
+		/* 3. As a last resort return the 'default' jail address. */
+		error = prison_get_ip4(cred, laddr);
+		goto done;
+	}
+
+done:
+	if (sro.ro_rt != NULL)
+		RTFREE(sro.ro_rt);
+	return (error);
+}
+
+/*
+ * Set up for a connect from a socket to the specified address.
+ * On entry, *laddrp and *lportp should contain the current local
+ * address and port for the PCB; these are updated to the values
+ * that should be placed in inp_laddr and inp_lport to complete
+ * the connect.
+ *
+ * On success, *faddrp and *fportp will be set to the remote address
+ * and port. These are not updated in the error case.
+ *
+ * If the operation fails because the connection already exists,
+ * *oinpp will be set to the PCB of that connection so that the
+ * caller can decide to override it. In all other cases, *oinpp
+ * is set to NULL.
+ */
+int
+in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam,
+    in_addr_t *laddrp, u_short *lportp, in_addr_t *faddrp, u_short *fportp,
+    struct inpcb **oinpp, struct ucred *cred)
+{
+	struct sockaddr_in *sin = (struct sockaddr_in *)nam;
+	struct in_ifaddr *ia;
+	struct inpcb *oinp;
+	struct in_addr laddr, faddr;
+	u_short lport, fport;
+	int error;
+
+	/*
+	 * Because a global state change doesn't actually occur here, a read
+	 * lock is sufficient.
+	 */
+	INP_INFO_LOCK_ASSERT(inp->inp_pcbinfo);
+	INP_LOCK_ASSERT(inp);
+
+	if (oinpp != NULL)
+		*oinpp = NULL;
+	if (nam->sa_len != sizeof (*sin))
+		return (EINVAL);
+	if (sin->sin_family != AF_INET)
+		return (EAFNOSUPPORT);
+	if (sin->sin_port == 0)
+		return (EADDRNOTAVAIL);
+	laddr.s_addr = *laddrp;
+	lport = *lportp;
+	faddr = sin->sin_addr;
+	fport = sin->sin_port;
+
+	if (!TAILQ_EMPTY(&V_in_ifaddrhead)) {
+		/*
+		 * If the destination address is INADDR_ANY,
+		 * use the primary local address.
+		 * If the supplied address is INADDR_BROADCAST,
+		 * and the primary interface supports broadcast,
+		 * choose the broadcast address for that interface.
+		 */
+		if (faddr.s_addr == INADDR_ANY) {
+			IN_IFADDR_RLOCK();
+			faddr =
+			    IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr;
+			IN_IFADDR_RUNLOCK();
+			if (cred != NULL &&
+			    (error = prison_get_ip4(cred, &faddr)) != 0)
+				return (error);
+		} else if (faddr.s_addr == (u_long)INADDR_BROADCAST) {
+			IN_IFADDR_RLOCK();
+			if (TAILQ_FIRST(&V_in_ifaddrhead)->ia_ifp->if_flags &
+			    IFF_BROADCAST)
+				faddr = satosin(&TAILQ_FIRST(
+				    &V_in_ifaddrhead)->ia_broadaddr)->sin_addr;
+			IN_IFADDR_RUNLOCK();
+		}
+	}
+	if (laddr.s_addr == INADDR_ANY) {
+		error = in_pcbladdr(inp, &faddr, &laddr, cred);
+		/*
+		 * If the destination address is multicast and an outgoing
+		 * interface has been set as a multicast option, prefer the
+		 * address of that interface as our source address.
+		 */
+		if (IN_MULTICAST(ntohl(faddr.s_addr)) &&
+		    inp->inp_moptions != NULL) {
+			struct ip_moptions *imo;
+			struct ifnet *ifp;
+
+			imo = inp->inp_moptions;
+			if (imo->imo_multicast_ifp != NULL) {
+				ifp = imo->imo_multicast_ifp;
+				IN_IFADDR_RLOCK();
+				TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link)
+					if (ia->ia_ifp == ifp)
+						break;
+				if (ia == NULL) {
+					IN_IFADDR_RUNLOCK();
+					error = EADDRNOTAVAIL;
+				} else {
+					laddr = ia->ia_addr.sin_addr;
+					IN_IFADDR_RUNLOCK();
+					error = 0;
+				}
+			}
+		}
+		if (error)
+			return (error);
+	}
+	oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport,
+	    0, NULL);
+	if (oinp != NULL) {
+		if (oinpp != NULL)
+			*oinpp = oinp;
+		return (EADDRINUSE);
+	}
+	if (lport == 0) {
+		error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport,
+		    cred);
+		if (error)
+			return (error);
+	}
+	*laddrp = laddr.s_addr;
+	*lportp = lport;
+	*faddrp = faddr.s_addr;
+	*fportp = fport;
+	return (0);
+}
+
+void
+in_pcbdisconnect(struct inpcb *inp)
+{
+
+	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
+	INP_WLOCK_ASSERT(inp);
+
+	inp->inp_faddr.s_addr = INADDR_ANY;
+	inp->inp_fport = 0;
+	in_pcbrehash(inp);
+}
+
+/*
+ * in_pcbdetach() is responsibe for disassociating a socket from an inpcb.
+ * For most protocols, this will be invoked immediately prior to calling
+ * in_pcbfree().  However, with TCP the inpcb may significantly outlive the
+ * socket, in which case in_pcbfree() is deferred.
+ */
+void
+in_pcbdetach(struct inpcb *inp)
+{
+
+	KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__));
+
+	inp->inp_socket->so_pcb = NULL;
+	inp->inp_socket = NULL;
+}
+
+/*
+ * in_pcbfree_internal() frees an inpcb that has been detached from its
+ * socket, and whose reference count has reached 0.  It will also remove the
+ * inpcb from any global lists it might remain on.
+ */
+static void
+in_pcbfree_internal(struct inpcb *inp)
+{
+	struct inpcbinfo *ipi = inp->inp_pcbinfo;
+
+	KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
+	KASSERT(inp->inp_refcount == 0, ("%s: refcount !0", __func__));
+
+	INP_INFO_WLOCK_ASSERT(ipi);
+	INP_WLOCK_ASSERT(inp);
+
+#ifdef IPSEC
+	if (inp->inp_sp != NULL)
+		ipsec_delete_pcbpolicy(inp);
+#endif /* IPSEC */
+	inp->inp_gencnt = ++ipi->ipi_gencnt;
+	in_pcbremlists(inp);
+#ifdef INET6
+	if (inp->inp_vflag & INP_IPV6PROTO) {
+		ip6_freepcbopts(inp->in6p_outputopts);
+		if (inp->in6p_moptions != NULL)
+			ip6_freemoptions(inp->in6p_moptions);
+	}
+#endif
+	if (inp->inp_options)
+		(void)m_free(inp->inp_options);
+	if (inp->inp_moptions != NULL)
+		inp_freemoptions(inp->inp_moptions);
+	inp->inp_vflag = 0;
+	crfree(inp->inp_cred);
+
+#ifdef MAC
+	mac_inpcb_destroy(inp);
+#endif
+	INP_WUNLOCK(inp);
+	uma_zfree(ipi->ipi_zone, inp);
+}
+
+/*
+ * in_pcbref() bumps the reference count on an inpcb in order to maintain
+ * stability of an inpcb pointer despite the inpcb lock being released.  This
+ * is used in TCP when the inpcbinfo lock needs to be acquired or upgraded,
+ * but where the inpcb lock is already held.
+ *
+ * While the inpcb will not be freed, releasing the inpcb lock means that the
+ * connection's state may change, so the caller should be careful to
+ * revalidate any cached state on reacquiring the lock.  Drop the reference
+ * using in_pcbrele().
+ */
+void
+in_pcbref(struct inpcb *inp)
+{
+
+	INP_WLOCK_ASSERT(inp);
+
+	KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
+
+	inp->inp_refcount++;
+}
+
+/*
+ * Drop a refcount on an inpcb elevated using in_pcbref(); because a call to
+ * in_pcbfree() may have been made between in_pcbref() and in_pcbrele(), we
+ * return a flag indicating whether or not the inpcb remains valid.  If it is
+ * valid, we return with the inpcb lock held.
+ */
+int
+in_pcbrele(struct inpcb *inp)
+{
+#ifdef INVARIANTS
+	struct inpcbinfo *ipi = inp->inp_pcbinfo;
+#endif
+
+	KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
+
+	INP_INFO_WLOCK_ASSERT(ipi);
+	INP_WLOCK_ASSERT(inp);
+
+	inp->inp_refcount--;
+	if (inp->inp_refcount > 0)
+		return (0);
+	in_pcbfree_internal(inp);
+	return (1);
+}
+
+/*
+ * Unconditionally schedule an inpcb to be freed by decrementing its
+ * reference count, which should occur only after the inpcb has been detached
+ * from its socket.  If another thread holds a temporary reference (acquired
+ * using in_pcbref()) then the free is deferred until that reference is
+ * released using in_pcbrele(), but the inpcb is still unlocked.
+ */
+void
+in_pcbfree(struct inpcb *inp)
+{
+#ifdef INVARIANTS
+	struct inpcbinfo *ipi = inp->inp_pcbinfo;
+#endif
+
+	KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL",
+	    __func__));
+
+	INP_INFO_WLOCK_ASSERT(ipi);
+	INP_WLOCK_ASSERT(inp);
+
+	if (!in_pcbrele(inp))
+		INP_WUNLOCK(inp);
+}
+
+/*
+ * in_pcbdrop() removes an inpcb from hashed lists, releasing its address and
+ * port reservation, and preventing it from being returned by inpcb lookups.
+ *
+ * It is used by TCP to mark an inpcb as unused and avoid future packet
+ * delivery or event notification when a socket remains open but TCP has
+ * closed.  This might occur as a result of a shutdown()-initiated TCP close
+ * or a RST on the wire, and allows the port binding to be reused while still
+ * maintaining the invariant that so_pcb always points to a valid inpcb until
+ * in_pcbdetach().
+ *
+ * XXXRW: An inp_lport of 0 is used to indicate that the inpcb is not on hash
+ * lists, but can lead to confusing netstat output, as open sockets with
+ * closed TCP connections will no longer appear to have their bound port
+ * number.  An explicit flag would be better, as it would allow us to leave
+ * the port number intact after the connection is dropped.
+ *
+ * XXXRW: Possibly in_pcbdrop() should also prevent future notifications by
+ * in_pcbnotifyall() and in_pcbpurgeif0()?
+ */
+void
+in_pcbdrop(struct inpcb *inp)
+{
+
+	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
+	INP_WLOCK_ASSERT(inp);
+
+	inp->inp_flags |= INP_DROPPED;
+	if (inp->inp_flags & INP_INHASHLIST) {
+		struct inpcbport *phd = inp->inp_phd;
+
+		LIST_REMOVE(inp, inp_hash);
+		LIST_REMOVE(inp, inp_portlist);
+		if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
+			LIST_REMOVE(phd, phd_hash);
+			free(phd, M_PCB);
+		}
+		inp->inp_flags &= ~INP_INHASHLIST;
+	}
+}
+
+/*
+ * Common routines to return the socket addresses associated with inpcbs.
+ */
+struct sockaddr *
+in_sockaddr(in_port_t port, struct in_addr *addr_p)
+{
+	struct sockaddr_in *sin;
+
+	sin = malloc(sizeof *sin, M_SONAME,
+		M_WAITOK | M_ZERO);
+	sin->sin_family = AF_INET;
+	sin->sin_len = sizeof(*sin);
+	sin->sin_addr = *addr_p;
+	sin->sin_port = port;
+
+	return (struct sockaddr *)sin;
+}
+
+int
+in_getsockaddr(struct socket *so, struct sockaddr **nam)
+{
+	struct inpcb *inp;
+	struct in_addr addr;
+	in_port_t port;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("in_getsockaddr: inp == NULL"));
+
+	INP_RLOCK(inp);
+	port = inp->inp_lport;
+	addr = inp->inp_laddr;
+	INP_RUNLOCK(inp);
+
+	*nam = in_sockaddr(port, &addr);
+	return 0;
+}
+
+int
+in_getpeeraddr(struct socket *so, struct sockaddr **nam)
+{
+	struct inpcb *inp;
+	struct in_addr addr;
+	in_port_t port;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("in_getpeeraddr: inp == NULL"));
+
+	INP_RLOCK(inp);
+	port = inp->inp_fport;
+	addr = inp->inp_faddr;
+	INP_RUNLOCK(inp);
+
+	*nam = in_sockaddr(port, &addr);
+	return 0;
+}
+
+void  in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, int errno,
+    struct inpcb *(*notify)(struct inpcb *, int))
+{
+	struct inpcb *inp, *inp_temp;
+
+	INP_INFO_WLOCK(pcbinfo);
+	LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
+		INP_WLOCK(inp);
+#ifdef INET6
+		if ((inp->inp_vflag & INP_IPV4) == 0) {
+			INP_WUNLOCK(inp);
+			continue;
+		}
+#endif
+		if (inp->inp_faddr.s_addr != faddr.s_addr ||
+		    inp->inp_socket == NULL) {
+			INP_WUNLOCK(inp);
+			continue;
+		}
+		if ((*notify)(inp, errno))
+			INP_WUNLOCK(inp);
+	}
+	INP_INFO_WUNLOCK(pcbinfo);
+}
+
+void
+in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
+{
+	struct inpcb *inp;
+	struct ip_moptions *imo;
+	int i, gap;
+
+	INP_INFO_RLOCK(pcbinfo);
+	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
+		INP_WLOCK(inp);
+		imo = inp->inp_moptions;
+		if ((inp->inp_vflag & INP_IPV4) &&
+		    imo != NULL) {
+			/*
+			 * Unselect the outgoing interface if it is being
+			 * detached.
+			 */
+			if (imo->imo_multicast_ifp == ifp)
+				imo->imo_multicast_ifp = NULL;
+
+			/*
+			 * Drop multicast group membership if we joined
+			 * through the interface being detached.
+			 */
+			for (i = 0, gap = 0; i < imo->imo_num_memberships;
+			    i++) {
+				if (imo->imo_membership[i]->inm_ifp == ifp) {
+					in_delmulti(imo->imo_membership[i]);
+					gap++;
+				} else if (gap != 0)
+					imo->imo_membership[i - gap] =
+					    imo->imo_membership[i];
+			}
+			imo->imo_num_memberships -= gap;
+		}
+		INP_WUNLOCK(inp);
+	}
+	INP_INFO_RUNLOCK(pcbinfo);
+}
+
+/*
+ * Lookup a PCB based on the local address and port.
+ */
+#define INP_LOOKUP_MAPPED_PCB_COST	3
+struct inpcb *
+in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
+    u_short lport, int wild_okay, struct ucred *cred)
+{
+	struct inpcb *inp;
+#ifdef INET6
+	int matchwild = 3 + INP_LOOKUP_MAPPED_PCB_COST;
+#else
+	int matchwild = 3;
+#endif
+	int wildcard;
+
+	INP_INFO_LOCK_ASSERT(pcbinfo);
+
+	if (!wild_okay) {
+		struct inpcbhead *head;
+		/*
+		 * Look for an unconnected (wildcard foreign addr) PCB that
+		 * matches the local address and port we're looking for.
+		 */
+		head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
+		    0, pcbinfo->ipi_hashmask)];
+		LIST_FOREACH(inp, head, inp_hash) {
+#ifdef INET6
+			/* XXX inp locking */
+			if ((inp->inp_vflag & INP_IPV4) == 0)
+				continue;
+#endif
+			if (inp->inp_faddr.s_addr == INADDR_ANY &&
+			    inp->inp_laddr.s_addr == laddr.s_addr &&
+			    inp->inp_lport == lport) {
+				/*
+				 * Found?
+				 */
+				if (cred == NULL ||
+				    prison_equal_ip4(cred->cr_prison,
+					inp->inp_cred->cr_prison))
+					return (inp);
+			}
+		}
+		/*
+		 * Not found.
+		 */
+		return (NULL);
+	} else {
+		struct inpcbporthead *porthash;
+		struct inpcbport *phd;
+		struct inpcb *match = NULL;
+		/*
+		 * Best fit PCB lookup.
+		 *
+		 * First see if this local port is in use by looking on the
+		 * port hash list.
+		 */
+		porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
+		    pcbinfo->ipi_porthashmask)];
+		LIST_FOREACH(phd, porthash, phd_hash) {
+			if (phd->phd_port == lport)
+				break;
+		}
+		if (phd != NULL) {
+			/*
+			 * Port is in use by one or more PCBs. Look for best
+			 * fit.
+			 */
+			LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
+				wildcard = 0;
+				if (cred != NULL &&
+				    !prison_equal_ip4(inp->inp_cred->cr_prison,
+					cred->cr_prison))
+					continue;
+#ifdef INET6
+				/* XXX inp locking */
+				if ((inp->inp_vflag & INP_IPV4) == 0)
+					continue;
+				/*
+				 * We never select the PCB that has
+				 * INP_IPV6 flag and is bound to :: if
+				 * we have another PCB which is bound
+				 * to 0.0.0.0.  If a PCB has the
+				 * INP_IPV6 flag, then we set its cost
+				 * higher than IPv4 only PCBs.
+				 *
+				 * Note that the case only happens
+				 * when a socket is bound to ::, under
+				 * the condition that the use of the
+				 * mapped address is allowed.
+				 */
+				if ((inp->inp_vflag & INP_IPV6) != 0)
+					wildcard += INP_LOOKUP_MAPPED_PCB_COST;
+#endif
+				if (inp->inp_faddr.s_addr != INADDR_ANY)
+					wildcard++;
+				if (inp->inp_laddr.s_addr != INADDR_ANY) {
+					if (laddr.s_addr == INADDR_ANY)
+						wildcard++;
+					else if (inp->inp_laddr.s_addr != laddr.s_addr)
+						continue;
+				} else {
+					if (laddr.s_addr != INADDR_ANY)
+						wildcard++;
+				}
+				if (wildcard < matchwild) {
+					match = inp;
+					matchwild = wildcard;
+					if (matchwild == 0)
+						break;
+				}
+			}
+		}
+		return (match);
+	}
+}
+#undef INP_LOOKUP_MAPPED_PCB_COST
+
+/*
+ * Lookup PCB in hash list.
+ */
+struct inpcb *
+in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
+    u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
+    struct ifnet *ifp)
+{
+	struct inpcbhead *head;
+	struct inpcb *inp, *tmpinp;
+	u_short fport = fport_arg, lport = lport_arg;
+
+	INP_INFO_LOCK_ASSERT(pcbinfo);
+
+	/*
+	 * First look for an exact match.
+	 */
+	tmpinp = NULL;
+	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
+	    pcbinfo->ipi_hashmask)];
+	LIST_FOREACH(inp, head, inp_hash) {
+#ifdef INET6
+		/* XXX inp locking */
+		if ((inp->inp_vflag & INP_IPV4) == 0)
+			continue;
+#endif
+		if (inp->inp_faddr.s_addr == faddr.s_addr &&
+		    inp->inp_laddr.s_addr == laddr.s_addr &&
+		    inp->inp_fport == fport &&
+		    inp->inp_lport == lport) {
+			/*
+			 * XXX We should be able to directly return
+			 * the inp here, without any checks.
+			 * Well unless both bound with SO_REUSEPORT?
+			 */
+			if (prison_flag(inp->inp_cred, PR_IP4))
+				return (inp);
+			if (tmpinp == NULL)
+				tmpinp = inp;
+		}
+	}
+	if (tmpinp != NULL)
+		return (tmpinp);
+
+	/*
+	 * Then look for a wildcard match, if requested.
+	 */
+	if (wildcard == INPLOOKUP_WILDCARD) {
+		struct inpcb *local_wild = NULL, *local_exact = NULL;
+#ifdef INET6
+		struct inpcb *local_wild_mapped = NULL;
+#endif
+		struct inpcb *jail_wild = NULL;
+		int injail;
+
+		/*
+		 * Order of socket selection - we always prefer jails.
+		 *      1. jailed, non-wild.
+		 *      2. jailed, wild.
+		 *      3. non-jailed, non-wild.
+		 *      4. non-jailed, wild.
+		 */
+
+		head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
+		    0, pcbinfo->ipi_hashmask)];
+		LIST_FOREACH(inp, head, inp_hash) {
+#ifdef INET6
+			/* XXX inp locking */
+			if ((inp->inp_vflag & INP_IPV4) == 0)
+				continue;
+#endif
+			if (inp->inp_faddr.s_addr != INADDR_ANY ||
+			    inp->inp_lport != lport)
+				continue;
+
+			/* XXX inp locking */
+			if (ifp && ifp->if_type == IFT_FAITH &&
+			    (inp->inp_flags & INP_FAITH) == 0)
+				continue;
+
+			injail = prison_flag(inp->inp_cred, PR_IP4);
+			if (injail) {
+				if (prison_check_ip4(inp->inp_cred,
+				    &laddr) != 0)
+					continue;
+			} else {
+				if (local_exact != NULL)
+					continue;
+			}
+
+			if (inp->inp_laddr.s_addr == laddr.s_addr) {
+				if (injail)
+					return (inp);
+				else
+					local_exact = inp;
+			} else if (inp->inp_laddr.s_addr == INADDR_ANY) {
+#ifdef INET6
+				/* XXX inp locking, NULL check */
+				if (inp->inp_vflag & INP_IPV6PROTO)
+					local_wild_mapped = inp;
+				else
+#endif /* INET6 */
+					if (injail)
+						jail_wild = inp;
+					else
+						local_wild = inp;
+			}
+		} /* LIST_FOREACH */
+		if (jail_wild != NULL)
+			return (jail_wild);
+		if (local_exact != NULL)
+			return (local_exact);
+		if (local_wild != NULL)
+			return (local_wild);
+#ifdef INET6
+		if (local_wild_mapped != NULL)
+			return (local_wild_mapped);
+#endif /* defined(INET6) */
+	} /* if (wildcard == INPLOOKUP_WILDCARD) */
+
+	return (NULL);
+}
+
+/*
+ * Insert PCB onto various hash lists.
+ */
+int
+in_pcbinshash(struct inpcb *inp)
+{
+	struct inpcbhead *pcbhash;
+	struct inpcbporthead *pcbporthash;
+	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
+	struct inpcbport *phd;
+	u_int32_t hashkey_faddr;
+
+	INP_INFO_WLOCK_ASSERT(pcbinfo);
+	INP_WLOCK_ASSERT(inp);
+	KASSERT((inp->inp_flags & INP_INHASHLIST) == 0,
+	    ("in_pcbinshash: INP_INHASHLIST"));
+
+#ifdef INET6
+	if (inp->inp_vflag & INP_IPV6)
+		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
+	else
+#endif /* INET6 */
+	hashkey_faddr = inp->inp_faddr.s_addr;
+
+	pcbhash = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
+		 inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
+
+	pcbporthash = &pcbinfo->ipi_porthashbase[
+	    INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];
+
+	/*
+	 * Go through port list and look for a head for this lport.
+	 */
+	LIST_FOREACH(phd, pcbporthash, phd_hash) {
+		if (phd->phd_port == inp->inp_lport)
+			break;
+	}
+	/*
+	 * If none exists, malloc one and tack it on.
+	 */
+	if (phd == NULL) {
+		phd = malloc(sizeof(struct inpcbport), M_PCB, M_NOWAIT);
+		if (phd == NULL) {
+			return (ENOBUFS); /* XXX */
+		}
+		phd->phd_port = inp->inp_lport;
+		LIST_INIT(&phd->phd_pcblist);
+		LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
+	}
+	inp->inp_phd = phd;
+	LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
+	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
+	inp->inp_flags |= INP_INHASHLIST;
+	return (0);
+}
+
+/*
+ * Move PCB to the proper hash bucket when { faddr, fport } have  been
+ * changed. NOTE: This does not handle the case of the lport changing (the
+ * hashed port list would have to be updated as well), so the lport must
+ * not change after in_pcbinshash() has been called.
+ */
+void
+in_pcbrehash(struct inpcb *inp)
+{
+	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
+	struct inpcbhead *head;
+	u_int32_t hashkey_faddr;
+
+	INP_INFO_WLOCK_ASSERT(pcbinfo);
+	INP_WLOCK_ASSERT(inp);
+	KASSERT(inp->inp_flags & INP_INHASHLIST,
+	    ("in_pcbrehash: !INP_INHASHLIST"));
+
+#ifdef INET6
+	if (inp->inp_vflag & INP_IPV6)
+		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
+	else
+#endif /* INET6 */
+	hashkey_faddr = inp->inp_faddr.s_addr;
+
+	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
+		inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
+
+	LIST_REMOVE(inp, inp_hash);
+	LIST_INSERT_HEAD(head, inp, inp_hash);
+}
+
+/*
+ * Remove PCB from various lists.
+ */
+static void
+in_pcbremlists(struct inpcb *inp)
+{
+	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
+
+	INP_INFO_WLOCK_ASSERT(pcbinfo);
+	INP_WLOCK_ASSERT(inp);
+
+	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
+	if (inp->inp_flags & INP_INHASHLIST) {
+		struct inpcbport *phd = inp->inp_phd;
+
+		LIST_REMOVE(inp, inp_hash);
+		LIST_REMOVE(inp, inp_portlist);
+		if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
+			LIST_REMOVE(phd, phd_hash);
+			free(phd, M_PCB);
+		}
+		inp->inp_flags &= ~INP_INHASHLIST;
+	}
+	LIST_REMOVE(inp, inp_list);
+	pcbinfo->ipi_count--;
+}
+
+/*
+ * A set label operation has occurred at the socket layer, propagate the
+ * label change into the in_pcb for the socket.
+ */
+void
+in_pcbsosetlabel(struct socket *so)
+{
+#ifdef MAC
+	struct inpcb *inp;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("in_pcbsosetlabel: so->so_pcb == NULL"));
+
+	INP_WLOCK(inp);
+	SOCK_LOCK(so);
+	mac_inpcb_sosetlabel(so, inp);
+	SOCK_UNLOCK(so);
+	INP_WUNLOCK(inp);
+#endif
+}
+
+/*
+ * ipport_tick runs once per second, determining if random port allocation
+ * should be continued.  If more than ipport_randomcps ports have been
+ * allocated in the last second, then we return to sequential port
+ * allocation. We return to random allocation only once we drop below
+ * ipport_randomcps for at least ipport_randomtime seconds.
+ */
+void
+ipport_tick(void *xtp)
+{
+	VNET_ITERATOR_DECL(vnet_iter);
+
+	VNET_LIST_RLOCK_NOSLEEP();
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter);	/* XXX appease INVARIANTS here */
+		if (V_ipport_tcpallocs <=
+		    V_ipport_tcplastcount + V_ipport_randomcps) {
+			if (V_ipport_stoprandom > 0)
+				V_ipport_stoprandom--;
+		} else
+			V_ipport_stoprandom = V_ipport_randomtime;
+		V_ipport_tcplastcount = V_ipport_tcpallocs;
+		CURVNET_RESTORE();
+	}
+	VNET_LIST_RUNLOCK_NOSLEEP();
+	callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL);
+}
+
+void
+inp_wlock(struct inpcb *inp)
+{
+
+	INP_WLOCK(inp);
+}
+
+void
+inp_wunlock(struct inpcb *inp)
+{
+
+	INP_WUNLOCK(inp);
+}
+
+void
+inp_rlock(struct inpcb *inp)
+{
+
+	INP_RLOCK(inp);
+}
+
+void
+inp_runlock(struct inpcb *inp)
+{
+
+	INP_RUNLOCK(inp);
+}
+
+#ifdef INVARIANTS
+void
+inp_lock_assert(struct inpcb *inp)
+{
+
+	INP_WLOCK_ASSERT(inp);
+}
+
+void
+inp_unlock_assert(struct inpcb *inp)
+{
+
+	INP_UNLOCK_ASSERT(inp);
+}
+#endif
+
+void
+inp_apply_all(void (*func)(struct inpcb *, void *), void *arg)
+{
+	struct inpcb *inp;
+
+	INP_INFO_RLOCK(&V_tcbinfo);
+	LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
+		INP_WLOCK(inp);
+		func(inp, arg);
+		INP_WUNLOCK(inp);
+	}
+	INP_INFO_RUNLOCK(&V_tcbinfo);
+}
+
+struct socket *
+inp_inpcbtosocket(struct inpcb *inp)
+{
+
+	INP_WLOCK_ASSERT(inp);
+	return (inp->inp_socket);
+}
+
+struct tcpcb *
+inp_inpcbtotcpcb(struct inpcb *inp)
+{
+
+	INP_WLOCK_ASSERT(inp);
+	return ((struct tcpcb *)inp->inp_ppcb);
+}
+
+int
+inp_ip_tos_get(const struct inpcb *inp)
+{
+
+	return (inp->inp_ip_tos);
+}
+
+void
+inp_ip_tos_set(struct inpcb *inp, int val)
+{
+
+	inp->inp_ip_tos = val;
+}
+
+void
+inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
+    uint32_t *faddr, uint16_t *fp)
+{
+
+	INP_LOCK_ASSERT(inp);
+	*laddr = inp->inp_laddr.s_addr;
+	*faddr = inp->inp_faddr.s_addr;
+	*lp = inp->inp_lport;
+	*fp = inp->inp_fport;
+}
+
+struct inpcb *
+so_sotoinpcb(struct socket *so)
+{
+
+	return (sotoinpcb(so));
+}
+
+struct tcpcb *
+so_sototcpcb(struct socket *so)
+{
+
+	return (sototcpcb(so));
+}
+
+#ifdef DDB
+static void
+db_print_indent(int indent)
+{
+	int i;
+
+	for (i = 0; i < indent; i++)
+		db_printf(" ");
+}
+
+static void
+db_print_inconninfo(struct in_conninfo *inc, const char *name, int indent)
+{
+	char faddr_str[48], laddr_str[48];
+
+	db_print_indent(indent);
+	db_printf("%s at %p\n", name, inc);
+
+	indent += 2;
+
+#ifdef INET6
+	if (inc->inc_flags & INC_ISIPV6) {
+		/* IPv6. */
+		ip6_sprintf(laddr_str, &inc->inc6_laddr);
+		ip6_sprintf(faddr_str, &inc->inc6_faddr);
+	} else {
+#endif
+		/* IPv4. */
+		inet_ntoa_r(inc->inc_laddr, laddr_str);
+		inet_ntoa_r(inc->inc_faddr, faddr_str);
+#ifdef INET6
+	}
+#endif
+	db_print_indent(indent);
+	db_printf("inc_laddr %s   inc_lport %u\n", laddr_str,
+	    ntohs(inc->inc_lport));
+	db_print_indent(indent);
+	db_printf("inc_faddr %s   inc_fport %u\n", faddr_str,
+	    ntohs(inc->inc_fport));
+}
+
+static void
+db_print_inpflags(int inp_flags)
+{
+	int comma;
+
+	comma = 0;
+	if (inp_flags & INP_RECVOPTS) {
+		db_printf("%sINP_RECVOPTS", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_RECVRETOPTS) {
+		db_printf("%sINP_RECVRETOPTS", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_RECVDSTADDR) {
+		db_printf("%sINP_RECVDSTADDR", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_HDRINCL) {
+		db_printf("%sINP_HDRINCL", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_HIGHPORT) {
+		db_printf("%sINP_HIGHPORT", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_LOWPORT) {
+		db_printf("%sINP_LOWPORT", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_ANONPORT) {
+		db_printf("%sINP_ANONPORT", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_RECVIF) {
+		db_printf("%sINP_RECVIF", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_MTUDISC) {
+		db_printf("%sINP_MTUDISC", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_FAITH) {
+		db_printf("%sINP_FAITH", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_RECVTTL) {
+		db_printf("%sINP_RECVTTL", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_DONTFRAG) {
+		db_printf("%sINP_DONTFRAG", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & IN6P_IPV6_V6ONLY) {
+		db_printf("%sIN6P_IPV6_V6ONLY", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & IN6P_PKTINFO) {
+		db_printf("%sIN6P_PKTINFO", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & IN6P_HOPLIMIT) {
+		db_printf("%sIN6P_HOPLIMIT", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & IN6P_HOPOPTS) {
+		db_printf("%sIN6P_HOPOPTS", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & IN6P_DSTOPTS) {
+		db_printf("%sIN6P_DSTOPTS", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & IN6P_RTHDR) {
+		db_printf("%sIN6P_RTHDR", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & IN6P_RTHDRDSTOPTS) {
+		db_printf("%sIN6P_RTHDRDSTOPTS", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & IN6P_TCLASS) {
+		db_printf("%sIN6P_TCLASS", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & IN6P_AUTOFLOWLABEL) {
+		db_printf("%sIN6P_AUTOFLOWLABEL", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_TIMEWAIT) {
+		db_printf("%sINP_TIMEWAIT", comma ? ", " : "");
+		comma  = 1;
+	}
+	if (inp_flags & INP_ONESBCAST) {
+		db_printf("%sINP_ONESBCAST", comma ? ", " : "");
+		comma  = 1;
+	}
+	if (inp_flags & INP_DROPPED) {
+		db_printf("%sINP_DROPPED", comma ? ", " : "");
+		comma  = 1;
+	}
+	if (inp_flags & INP_SOCKREF) {
+		db_printf("%sINP_SOCKREF", comma ? ", " : "");
+		comma  = 1;
+	}
+	if (inp_flags & IN6P_RFC2292) {
+		db_printf("%sIN6P_RFC2292", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & IN6P_MTU) {
+		db_printf("IN6P_MTU%s", comma ? ", " : "");
+		comma = 1;
+	}
+}
+
+static void
+db_print_inpvflag(u_char inp_vflag)
+{
+	int comma;
+
+	comma = 0;
+	if (inp_vflag & INP_IPV4) {
+		db_printf("%sINP_IPV4", comma ? ", " : "");
+		comma  = 1;
+	}
+	if (inp_vflag & INP_IPV6) {
+		db_printf("%sINP_IPV6", comma ? ", " : "");
+		comma  = 1;
+	}
+	if (inp_vflag & INP_IPV6PROTO) {
+		db_printf("%sINP_IPV6PROTO", comma ? ", " : "");
+		comma  = 1;
+	}
+}
+
+static void
+db_print_inpcb(struct inpcb *inp, const char *name, int indent)
+{
+
+	db_print_indent(indent);
+	db_printf("%s at %p\n", name, inp);
+
+	indent += 2;
+
+	db_print_indent(indent);
+	db_printf("inp_flow: 0x%x\n", inp->inp_flow);
+
+	db_print_inconninfo(&inp->inp_inc, "inp_conninfo", indent);
+
+	db_print_indent(indent);
+	db_printf("inp_ppcb: %p   inp_pcbinfo: %p   inp_socket: %p\n",
+	    inp->inp_ppcb, inp->inp_pcbinfo, inp->inp_socket);
+
+	db_print_indent(indent);
+	db_printf("inp_label: %p   inp_flags: 0x%x (",
+	   inp->inp_label, inp->inp_flags);
+	db_print_inpflags(inp->inp_flags);
+	db_printf(")\n");
+
+	db_print_indent(indent);
+	db_printf("inp_sp: %p   inp_vflag: 0x%x (", inp->inp_sp,
+	    inp->inp_vflag);
+	db_print_inpvflag(inp->inp_vflag);
+	db_printf(")\n");
+
+	db_print_indent(indent);
+	db_printf("inp_ip_ttl: %d   inp_ip_p: %d   inp_ip_minttl: %d\n",
+	    inp->inp_ip_ttl, inp->inp_ip_p, inp->inp_ip_minttl);
+
+	db_print_indent(indent);
+#ifdef INET6
+	if (inp->inp_vflag & INP_IPV6) {
+		db_printf("in6p_options: %p   in6p_outputopts: %p   "
+		    "in6p_moptions: %p\n", inp->in6p_options,
+		    inp->in6p_outputopts, inp->in6p_moptions);
+		db_printf("in6p_icmp6filt: %p   in6p_cksum %d   "
+		    "in6p_hops %u\n", inp->in6p_icmp6filt, inp->in6p_cksum,
+		    inp->in6p_hops);
+	} else
+#endif
+	{
+		db_printf("inp_ip_tos: %d   inp_ip_options: %p   "
+		    "inp_ip_moptions: %p\n", inp->inp_ip_tos,
+		    inp->inp_options, inp->inp_moptions);
+	}
+
+	db_print_indent(indent);
+	db_printf("inp_phd: %p   inp_gencnt: %ju\n", inp->inp_phd,
+	    (uintmax_t)inp->inp_gencnt);
+}
+
+DB_SHOW_COMMAND(inpcb, db_show_inpcb)
+{
+	struct inpcb *inp;
+
+	if (!have_addr) {
+		db_printf("usage: show inpcb <addr>\n");
+		return;
+	}
+	inp = (struct inpcb *)addr;
+
+	db_print_inpcb(inp, "inpcb", 0);
+}
+#endif
diff --git a/freebsd/sys/netinet/in_pcb.h b/freebsd/sys/netinet/in_pcb.h
new file mode 100644
index 00000000..8cd4a5f8
--- /dev/null
+++ b/freebsd/sys/netinet/in_pcb.h
@@ -0,0 +1,525 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1993
+ *	The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in_pcb.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IN_PCB_HH_
+#define _NETINET_IN_PCB_HH_
+
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/_lock.h>
+#include <freebsd/sys/_mutex.h>
+#include <freebsd/sys/_rwlock.h>
+
+#ifdef _KERNEL
+#include <freebsd/sys/rwlock.h>
+#include <freebsd/net/vnet.h>
+#endif
+
+#define	in6pcb		inpcb	/* for KAME src sync over BSD*'s */
+#define	in6p_sp		inp_sp	/* for KAME src sync over BSD*'s */
+struct inpcbpolicy;
+
+/*
+ * struct inpcb is the common protocol control block structure used in most
+ * IP transport protocols.
+ *
+ * Pointers to local and foreign host table entries, local and foreign socket
+ * numbers, and pointers up (to a socket structure) and down (to a
+ * protocol-specific control block) are stored here.
+ */
+LIST_HEAD(inpcbhead, inpcb);
+LIST_HEAD(inpcbporthead, inpcbport);
+typedef	u_quad_t	inp_gen_t;
+
+/*
+ * PCB with AF_INET6 null bind'ed laddr can receive AF_INET input packet.
+ * So, AF_INET6 null laddr is also used as AF_INET null laddr, by utilizing
+ * the following structure.
+ */
+struct in_addr_4in6 {
+	u_int32_t	ia46_pad32[3];
+	struct	in_addr	ia46_addr4;
+};
+
+/*
+ * NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553.  in_conninfo has
+ * some extra padding to accomplish this.
+ */
+struct in_endpoints {
+	u_int16_t	ie_fport;		/* foreign port */
+	u_int16_t	ie_lport;		/* local port */
+	/* protocol dependent part, local and foreign addr */
+	union {
+		/* foreign host table entry */
+		struct	in_addr_4in6 ie46_foreign;
+		struct	in6_addr ie6_foreign;
+	} ie_dependfaddr;
+	union {
+		/* local host table entry */
+		struct	in_addr_4in6 ie46_local;
+		struct	in6_addr ie6_local;
+	} ie_dependladdr;
+};
+#define	ie_faddr	ie_dependfaddr.ie46_foreign.ia46_addr4
+#define	ie_laddr	ie_dependladdr.ie46_local.ia46_addr4
+#define	ie6_faddr	ie_dependfaddr.ie6_foreign
+#define	ie6_laddr	ie_dependladdr.ie6_local
+
+/*
+ * XXX The defines for inc_* are hacks and should be changed to direct
+ * references.
+ */
+struct in_conninfo {
+	u_int8_t	inc_flags;
+	u_int8_t	inc_len;
+	u_int16_t	inc_fibnum;	/* XXX was pad, 16 bits is plenty */
+	/* protocol dependent part */
+	struct	in_endpoints inc_ie;
+};
+
+/*
+ * Flags for inc_flags.
+ */
+#define	INC_ISIPV6	0x01
+
+#define inc_isipv6	inc_flags	/* temp compatability */
+#define	inc_fport	inc_ie.ie_fport
+#define	inc_lport	inc_ie.ie_lport
+#define	inc_faddr	inc_ie.ie_faddr
+#define	inc_laddr	inc_ie.ie_laddr
+#define	inc6_faddr	inc_ie.ie6_faddr
+#define	inc6_laddr	inc_ie.ie6_laddr
+
+struct	icmp6_filter;
+
+/*-
+ * struct inpcb captures the network layer state for TCP, UDP, and raw IPv4
+ * and IPv6 sockets.  In the case of TCP, further per-connection state is
+ * hung off of inp_ppcb most of the time.  Almost all fields of struct inpcb
+ * are static after creation or protected by a per-inpcb rwlock, inp_lock.  A
+ * few fields also require the global pcbinfo lock for the inpcb to be held,
+ * when modified, such as the global connection lists and hashes, as well as
+ * binding information (which affects which hash a connection is on).  This
+ * model means that connections can be looked up without holding the
+ * per-connection lock, which is important for performance when attempting to
+ * find the connection for a packet given its IP and port tuple.  Writing to
+ * these fields that write locks be held on both the inpcb and global locks.
+ *
+ * Key:
+ * (c) - Constant after initialization
+ * (i) - Protected by the inpcb lock
+ * (p) - Protected by the pcbinfo lock for the inpcb
+ * (s) - Protected by another subsystem's locks
+ * (x) - Undefined locking
+ *
+ * A few other notes:
+ *
+ * When a read lock is held, stability of the field is guaranteed; to write
+ * to a field, a write lock must generally be held.
+ *
+ * netinet/netinet6-layer code should not assume that the inp_socket pointer
+ * is safe to dereference without inp_lock being held, even for protocols
+ * other than TCP (where the inpcb persists during TIMEWAIT even after the
+ * socket has been freed), or there may be close(2)-related races.
+ *
+ * The inp_vflag field is overloaded, and would otherwise ideally be (c).
+ */
+struct inpcb {
+	LIST_ENTRY(inpcb) inp_hash;	/* (i/p) hash list */
+	LIST_ENTRY(inpcb) inp_list;	/* (i/p) list for all PCBs for proto */
+	void	*inp_ppcb;		/* (i) pointer to per-protocol pcb */
+	struct	inpcbinfo *inp_pcbinfo;	/* (c) PCB list info */
+	struct	socket *inp_socket;	/* (i) back pointer to socket */
+	struct	ucred	*inp_cred;	/* (c) cache of socket cred */
+	u_int32_t inp_flow;		/* (i) IPv6 flow information */
+	int	inp_flags;		/* (i) generic IP/datagram flags */
+	int	inp_flags2;		/* (i) generic IP/datagram flags #2*/
+	u_char	inp_vflag;		/* (i) IP version flag (v4/v6) */
+	u_char	inp_ip_ttl;		/* (i) time to live proto */
+	u_char	inp_ip_p;		/* (c) protocol proto */
+	u_char	inp_ip_minttl;		/* (i) minimum TTL or drop */
+	uint32_t inp_flowid;		/* (x) flow id / queue id */
+	u_int	inp_refcount;		/* (i) refcount */
+	void	*inp_pspare[4];		/* (x) rtentry / general use */
+	u_int	inp_ispare[4];		/* general use */
+
+	/* Local and foreign ports, local and foreign addr. */
+	struct	in_conninfo inp_inc;	/* (i/p) list for PCB's local port */
+
+	/* MAC and IPSEC policy information. */
+	struct	label *inp_label;	/* (i) MAC label */
+	struct	inpcbpolicy *inp_sp;    /* (s) for IPSEC */
+
+	/* Protocol-dependent part; options. */
+	struct {
+		u_char	inp4_ip_tos;		/* (i) type of service proto */
+		struct	mbuf *inp4_options;	/* (i) IP options */
+		struct	ip_moptions *inp4_moptions; /* (i) IP mcast options */
+	} inp_depend4;
+	struct {
+		/* (i) IP options */
+		struct	mbuf *inp6_options;
+		/* (i) IP6 options for outgoing packets */
+		struct	ip6_pktopts *inp6_outputopts;
+		/* (i) IP multicast options */
+		struct	ip6_moptions *inp6_moptions;
+		/* (i) ICMPv6 code type filter */
+		struct	icmp6_filter *inp6_icmp6filt;
+		/* (i) IPV6_CHECKSUM setsockopt */
+		int	inp6_cksum;
+		short	inp6_hops;
+	} inp_depend6;
+	LIST_ENTRY(inpcb) inp_portlist;	/* (i/p) */
+	struct	inpcbport *inp_phd;	/* (i/p) head of this list */
+#define inp_zero_size offsetof(struct inpcb, inp_gencnt)
+	inp_gen_t	inp_gencnt;	/* (c) generation count */
+	struct llentry	*inp_lle;	/* cached L2 information */
+	struct rtentry	*inp_rt;	/* cached L3 information */
+	struct rwlock	inp_lock;
+};
+#define	inp_fport	inp_inc.inc_fport
+#define	inp_lport	inp_inc.inc_lport
+#define	inp_faddr	inp_inc.inc_faddr
+#define	inp_laddr	inp_inc.inc_laddr
+#define	inp_ip_tos	inp_depend4.inp4_ip_tos
+#define	inp_options	inp_depend4.inp4_options
+#define	inp_moptions	inp_depend4.inp4_moptions
+
+#define	in6p_faddr	inp_inc.inc6_faddr
+#define	in6p_laddr	inp_inc.inc6_laddr
+#define	in6p_hops	inp_depend6.inp6_hops	/* default hop limit */
+#define	in6p_flowinfo	inp_flow
+#define	in6p_options	inp_depend6.inp6_options
+#define	in6p_outputopts	inp_depend6.inp6_outputopts
+#define	in6p_moptions	inp_depend6.inp6_moptions
+#define	in6p_icmp6filt	inp_depend6.inp6_icmp6filt
+#define	in6p_cksum	inp_depend6.inp6_cksum
+
+#define	inp_vnet	inp_pcbinfo->ipi_vnet
+
+/*
+ * The range of the generation count, as used in this implementation, is 9e19.
+ * We would have to create 300 billion connections per second for this number
+ * to roll over in a year.  This seems sufficiently unlikely that we simply
+ * don't concern ourselves with that possibility.
+ */
+
+/*
+ * Interface exported to userland by various protocols which use inpcbs.  Hack
+ * alert -- only define if struct xsocket is in scope.
+ */
+#ifdef _SYS_SOCKETVAR_HH_
+struct	xinpcb {
+	size_t	xi_len;		/* length of this structure */
+	struct	inpcb xi_inp;
+	struct	xsocket xi_socket;
+	u_quad_t	xi_alignment_hack;
+};
+
+struct	xinpgen {
+	size_t	xig_len;	/* length of this structure */
+	u_int	xig_count;	/* number of PCBs at this time */
+	inp_gen_t xig_gen;	/* generation count at this time */
+	so_gen_t xig_sogen;	/* socket generation count at this time */
+};
+#endif /* _SYS_SOCKETVAR_HH_ */
+
+struct inpcbport {
+	LIST_ENTRY(inpcbport) phd_hash;
+	struct inpcbhead phd_pcblist;
+	u_short phd_port;
+};
+
+/*
+ * Global data structure for each high-level protocol (UDP, TCP, ...) in both
+ * IPv4 and IPv6.  Holds inpcb lists and information for managing them.
+ */
+struct inpcbinfo {
+	/*
+	 * Global list of inpcbs on the protocol.
+	 */
+	struct inpcbhead	*ipi_listhead;
+	u_int			 ipi_count;
+
+	/*
+	 * Global hash of inpcbs, hashed by local and foreign addresses and
+	 * port numbers.
+	 */
+	struct inpcbhead	*ipi_hashbase;
+	u_long			 ipi_hashmask;
+
+	/*
+	 * Global hash of inpcbs, hashed by only local port number.
+	 */
+	struct inpcbporthead	*ipi_porthashbase;
+	u_long			 ipi_porthashmask;
+
+	/*
+	 * Fields associated with port lookup and allocation.
+	 */
+	u_short			 ipi_lastport;
+	u_short			 ipi_lastlow;
+	u_short			 ipi_lasthi;
+
+	/*
+	 * UMA zone from which inpcbs are allocated for this protocol.
+	 */
+	struct	uma_zone	*ipi_zone;
+
+	/*
+	 * Generation count--incremented each time a connection is allocated
+	 * or freed.
+	 */
+	u_quad_t		 ipi_gencnt;
+	struct rwlock		 ipi_lock;
+
+	/*
+	 * Pointer to network stack instance
+	 */
+	struct vnet		*ipi_vnet;
+
+	/*
+	 * general use 2
+	 */
+	void 			*ipi_pspare[2];
+};
+
+#define INP_LOCK_INIT(inp, d, t) \
+	rw_init_flags(&(inp)->inp_lock, (t), RW_RECURSE |  RW_DUPOK)
+#define INP_LOCK_DESTROY(inp)	rw_destroy(&(inp)->inp_lock)
+#define INP_RLOCK(inp)		rw_rlock(&(inp)->inp_lock)
+#define INP_WLOCK(inp)		rw_wlock(&(inp)->inp_lock)
+#define INP_TRY_RLOCK(inp)	rw_try_rlock(&(inp)->inp_lock)
+#define INP_TRY_WLOCK(inp)	rw_try_wlock(&(inp)->inp_lock)
+#define INP_RUNLOCK(inp)	rw_runlock(&(inp)->inp_lock)
+#define INP_WUNLOCK(inp)	rw_wunlock(&(inp)->inp_lock)
+#define	INP_TRY_UPGRADE(inp)	rw_try_upgrade(&(inp)->inp_lock)
+#define	INP_DOWNGRADE(inp)	rw_downgrade(&(inp)->inp_lock)
+#define	INP_WLOCKED(inp)	rw_wowned(&(inp)->inp_lock)
+#define	INP_LOCK_ASSERT(inp)	rw_assert(&(inp)->inp_lock, RA_LOCKED)
+#define	INP_RLOCK_ASSERT(inp)	rw_assert(&(inp)->inp_lock, RA_RLOCKED)
+#define	INP_WLOCK_ASSERT(inp)	rw_assert(&(inp)->inp_lock, RA_WLOCKED)
+#define	INP_UNLOCK_ASSERT(inp)	rw_assert(&(inp)->inp_lock, RA_UNLOCKED)
+
+#ifdef _KERNEL
+/*
+ * These locking functions are for inpcb consumers outside of sys/netinet,
+ * more specifically, they were added for the benefit of TOE drivers. The
+ * macros are reserved for use by the stack.
+ */
+void inp_wlock(struct inpcb *);
+void inp_wunlock(struct inpcb *);
+void inp_rlock(struct inpcb *);
+void inp_runlock(struct inpcb *);
+
+#ifdef INVARIANTS
+void inp_lock_assert(struct inpcb *);
+void inp_unlock_assert(struct inpcb *);
+#else
+static __inline void
+inp_lock_assert(struct inpcb *inp __unused)
+{
+}
+
+static __inline void
+inp_unlock_assert(struct inpcb *inp __unused)
+{
+}
+
+#endif
+
+void	inp_apply_all(void (*func)(struct inpcb *, void *), void *arg);
+int 	inp_ip_tos_get(const struct inpcb *inp);
+void 	inp_ip_tos_set(struct inpcb *inp, int val);
+struct socket *
+	inp_inpcbtosocket(struct inpcb *inp);
+struct tcpcb *
+	inp_inpcbtotcpcb(struct inpcb *inp);
+void 	inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
+		uint32_t *faddr, uint16_t *fp);
+
+#endif /* _KERNEL */
+
+#define INP_INFO_LOCK_INIT(ipi, d) \
+	rw_init_flags(&(ipi)->ipi_lock, (d), RW_RECURSE)
+#define INP_INFO_LOCK_DESTROY(ipi)  rw_destroy(&(ipi)->ipi_lock)
+#define INP_INFO_RLOCK(ipi)	rw_rlock(&(ipi)->ipi_lock)
+#define INP_INFO_WLOCK(ipi)	rw_wlock(&(ipi)->ipi_lock)
+#define INP_INFO_TRY_RLOCK(ipi)	rw_try_rlock(&(ipi)->ipi_lock)
+#define INP_INFO_TRY_WLOCK(ipi)	rw_try_wlock(&(ipi)->ipi_lock)
+#define INP_INFO_TRY_UPGRADE(ipi)	rw_try_upgrade(&(ipi)->ipi_lock)
+#define INP_INFO_RUNLOCK(ipi)	rw_runlock(&(ipi)->ipi_lock)
+#define INP_INFO_WUNLOCK(ipi)	rw_wunlock(&(ipi)->ipi_lock)
+#define	INP_INFO_LOCK_ASSERT(ipi)	rw_assert(&(ipi)->ipi_lock, RA_LOCKED)
+#define INP_INFO_RLOCK_ASSERT(ipi)	rw_assert(&(ipi)->ipi_lock, RA_RLOCKED)
+#define INP_INFO_WLOCK_ASSERT(ipi)	rw_assert(&(ipi)->ipi_lock, RA_WLOCKED)
+#define INP_INFO_UNLOCK_ASSERT(ipi)	rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED)
+
+#define INP_PCBHASH(faddr, lport, fport, mask) \
+	(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
+#define INP_PCBPORTHASH(lport, mask) \
+	(ntohs((lport)) & (mask))
+
+/*
+ * Flags for inp_vflags -- historically version flags only
+ */
+#define	INP_IPV4	0x1
+#define	INP_IPV6	0x2
+#define	INP_IPV6PROTO	0x4		/* opened under IPv6 protocol */
+
+/*
+ * Flags for inp_flags.
+ */
+#define	INP_RECVOPTS		0x00000001 /* receive incoming IP options */
+#define	INP_RECVRETOPTS		0x00000002 /* receive IP options for reply */
+#define	INP_RECVDSTADDR		0x00000004 /* receive IP dst address */
+#define	INP_HDRINCL		0x00000008 /* user supplies entire IP header */
+#define	INP_HIGHPORT		0x00000010 /* user wants "high" port binding */
+#define	INP_LOWPORT		0x00000020 /* user wants "low" port binding */
+#define	INP_ANONPORT		0x00000040 /* port chosen for user */
+#define	INP_RECVIF		0x00000080 /* receive incoming interface */
+#define	INP_MTUDISC		0x00000100 /* user can do MTU discovery */
+#define	INP_FAITH		0x00000200 /* accept FAITH'ed connections */
+#define	INP_RECVTTL		0x00000400 /* receive incoming IP TTL */
+#define	INP_DONTFRAG		0x00000800 /* don't fragment packet */
+#define	INP_BINDANY		0x00001000 /* allow bind to any address */
+#define	INP_INHASHLIST		0x00002000 /* in_pcbinshash() has been called */
+#define	IN6P_IPV6_V6ONLY	0x00008000 /* restrict AF_INET6 socket for v6 */
+#define	IN6P_PKTINFO		0x00010000 /* receive IP6 dst and I/F */
+#define	IN6P_HOPLIMIT		0x00020000 /* receive hoplimit */
+#define	IN6P_HOPOPTS		0x00040000 /* receive hop-by-hop options */
+#define	IN6P_DSTOPTS		0x00080000 /* receive dst options after rthdr */
+#define	IN6P_RTHDR		0x00100000 /* receive routing header */
+#define	IN6P_RTHDRDSTOPTS	0x00200000 /* receive dstoptions before rthdr */
+#define	IN6P_TCLASS		0x00400000 /* receive traffic class value */
+#define	IN6P_AUTOFLOWLABEL	0x00800000 /* attach flowlabel automatically */
+#define	INP_TIMEWAIT		0x01000000 /* in TIMEWAIT, ppcb is tcptw */
+#define	INP_ONESBCAST		0x02000000 /* send all-ones broadcast */
+#define	INP_DROPPED		0x04000000 /* protocol drop flag */
+#define	INP_SOCKREF		0x08000000 /* strong socket reference */
+#define	INP_SW_FLOWID           0x10000000 /* software generated flow id */
+#define	INP_HW_FLOWID           0x20000000 /* hardware generated flow id */
+#define	IN6P_RFC2292		0x40000000 /* used RFC2292 API on the socket */
+#define	IN6P_MTU		0x80000000 /* receive path MTU */
+
+#define	INP_CONTROLOPTS		(INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\
+				 INP_RECVIF|INP_RECVTTL|\
+				 IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|\
+				 IN6P_DSTOPTS|IN6P_RTHDR|IN6P_RTHDRDSTOPTS|\
+				 IN6P_TCLASS|IN6P_AUTOFLOWLABEL|IN6P_RFC2292|\
+				 IN6P_MTU)
+
+/*
+ * Flags for inp_flags2.
+ */
+#define	INP_LLE_VALID		0x00000001 /* cached lle is valid */
+#define	INP_RT_VALID		0x00000002 /* cached rtentry is valid */
+
+#define	INPLOOKUP_WILDCARD	1
+#define	sotoinpcb(so)	((struct inpcb *)(so)->so_pcb)
+#define	sotoin6pcb(so)	sotoinpcb(so) /* for KAME src sync over BSD*'s */
+
+#define	INP_SOCKAF(so) so->so_proto->pr_domain->dom_family
+
+#define	INP_CHECK_SOCKAF(so, af)	(INP_SOCKAF(so) == af)
+
+#ifdef _KERNEL
+VNET_DECLARE(int, ipport_reservedhigh);
+VNET_DECLARE(int, ipport_reservedlow);
+VNET_DECLARE(int, ipport_lowfirstauto);
+VNET_DECLARE(int, ipport_lowlastauto);
+VNET_DECLARE(int, ipport_firstauto);
+VNET_DECLARE(int, ipport_lastauto);
+VNET_DECLARE(int, ipport_hifirstauto);
+VNET_DECLARE(int, ipport_hilastauto);
+VNET_DECLARE(int, ipport_randomized);
+VNET_DECLARE(int, ipport_randomcps);
+VNET_DECLARE(int, ipport_randomtime);
+VNET_DECLARE(int, ipport_stoprandom);
+VNET_DECLARE(int, ipport_tcpallocs);
+
+#define	V_ipport_reservedhigh	VNET(ipport_reservedhigh)
+#define	V_ipport_reservedlow	VNET(ipport_reservedlow)
+#define	V_ipport_lowfirstauto	VNET(ipport_lowfirstauto)
+#define	V_ipport_lowlastauto	VNET(ipport_lowlastauto)
+#define	V_ipport_firstauto	VNET(ipport_firstauto)
+#define	V_ipport_lastauto	VNET(ipport_lastauto)
+#define	V_ipport_hifirstauto	VNET(ipport_hifirstauto)
+#define	V_ipport_hilastauto	VNET(ipport_hilastauto)
+#define	V_ipport_randomized	VNET(ipport_randomized)
+#define	V_ipport_randomcps	VNET(ipport_randomcps)
+#define	V_ipport_randomtime	VNET(ipport_randomtime)
+#define	V_ipport_stoprandom	VNET(ipport_stoprandom)
+#define	V_ipport_tcpallocs	VNET(ipport_tcpallocs)
+
+extern struct callout ipport_tick_callout;
+
+void	in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
+int	in_pcballoc(struct socket *, struct inpcbinfo *);
+int	in_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *);
+int	in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
+	    u_short *, struct ucred *);
+int	in_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *);
+int	in_pcbconnect_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
+	    u_short *, in_addr_t *, u_short *, struct inpcb **,
+	    struct ucred *);
+void	in_pcbdetach(struct inpcb *);
+void	in_pcbdisconnect(struct inpcb *);
+void	in_pcbdrop(struct inpcb *);
+void	in_pcbfree(struct inpcb *);
+int	in_pcbinshash(struct inpcb *);
+struct inpcb *
+	in_pcblookup_local(struct inpcbinfo *,
+	    struct in_addr, u_short, int, struct ucred *);
+struct inpcb *
+	in_pcblookup_hash(struct inpcbinfo *, struct in_addr, u_int,
+	    struct in_addr, u_int, int, struct ifnet *);
+#ifndef __rtems__
+void  in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr,
+      int, struct inpcb *(*)(struct inpcb *, int));
+#else
+void  in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, int errno,
+    struct inpcb *(*notify)(struct inpcb *, int));
+#endif
+void	in_pcbref(struct inpcb *);
+void	in_pcbrehash(struct inpcb *);
+int	in_pcbrele(struct inpcb *);
+void	in_pcbsetsolabel(struct socket *so);
+int	in_getpeeraddr(struct socket *so, struct sockaddr **nam);
+int	in_getsockaddr(struct socket *so, struct sockaddr **nam);
+struct sockaddr *
+	in_sockaddr(in_port_t port, struct in_addr *addr);
+void	in_pcbsosetlabel(struct socket *so);
+void	ipport_tick(void *xtp);
+#endif /* _KERNEL */
+
+#endif /* !_NETINET_IN_PCB_HH_ */
diff --git a/freebsd/sys/netinet/in_proto.c b/freebsd/sys/netinet/in_proto.c
new file mode 100644
index 00000000..9be0b626
--- /dev/null
+++ b/freebsd/sys/netinet/in_proto.c
@@ -0,0 +1,400 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in_proto.c	8.2 (Berkeley) 2/9/95
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_ipx.h>
+#include <freebsd/local/opt_mrouting.h>
+#include <freebsd/local/opt_ipsec.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_pf.h>
+#include <freebsd/local/opt_sctp.h>
+#include <freebsd/local/opt_mpath.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/domain.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/sysctl.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/route.h>
+#ifdef RADIX_MPATH
+#include <freebsd/net/radix_mpath.h>
+#endif
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_icmp.h>
+#include <freebsd/netinet/igmp_var.h>
+#include <freebsd/netinet/tcp.h>
+#include <freebsd/netinet/tcp_timer.h>
+#include <freebsd/netinet/tcp_var.h>
+#include <freebsd/netinet/udp.h>
+#include <freebsd/netinet/udp_var.h>
+#include <freebsd/netinet/ip_encap.h>
+
+/*
+ * TCP/IP protocol family: IP, ICMP, UDP, TCP.
+ */
+
+static struct pr_usrreqs nousrreqs;
+
+#ifdef IPSEC
+#include <freebsd/netipsec/ipsec.h>
+#endif /* IPSEC */
+
+#ifdef SCTP
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/sctp_pcb.h>
+#include <freebsd/netinet/sctp.h>
+#include <freebsd/netinet/sctp_var.h>
+#endif /* SCTP */
+
+#ifdef DEV_PFSYNC
+#include <freebsd/net/pfvar.h>
+#include <freebsd/net/if_pfsync.h>
+#endif
+
+extern	struct domain inetdomain;
+
+/* Spacer for loadable protocols. */
+#define IPPROTOSPACER   			\
+{						\
+	.pr_domain =		&inetdomain,	\
+	.pr_protocol =		PROTO_SPACER,	\
+	.pr_usrreqs =		&nousrreqs	\
+}
+
+struct protosw inetsw[] = {
+{
+	.pr_type =		0,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_IP,
+	.pr_init =		ip_init,
+#ifdef VIMAGE
+	.pr_destroy =		ip_destroy,
+#endif
+	.pr_slowtimo =		ip_slowtimo,
+	.pr_drain =		ip_drain,
+	.pr_usrreqs =		&nousrreqs
+},
+{
+	.pr_type =		SOCK_DGRAM,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_UDP,
+	.pr_flags =		PR_ATOMIC|PR_ADDR,
+	.pr_input =		udp_input,
+	.pr_ctlinput =		udp_ctlinput,
+	.pr_ctloutput =		udp_ctloutput,
+	.pr_init =		udp_init,
+#ifdef VIMAGE
+	.pr_destroy =		udp_destroy,
+#endif
+	.pr_usrreqs =		&udp_usrreqs
+},
+{
+	.pr_type =		SOCK_STREAM,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_TCP,
+	.pr_flags =		PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD,
+	.pr_input =		tcp_input,
+	.pr_ctlinput =		tcp_ctlinput,
+	.pr_ctloutput =		tcp_ctloutput,
+	.pr_init =		tcp_init,
+#ifdef VIMAGE
+	.pr_destroy =		tcp_destroy,
+#endif
+	.pr_slowtimo =		tcp_slowtimo,
+	.pr_drain =		tcp_drain,
+	.pr_usrreqs =		&tcp_usrreqs
+},
+#ifdef SCTP
+{ 
+	.pr_type =		SOCK_DGRAM,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_SCTP,
+	.pr_flags =		PR_WANTRCVD,
+	.pr_input =		sctp_input,
+	.pr_ctlinput =		sctp_ctlinput,
+	.pr_ctloutput =		sctp_ctloutput,
+	.pr_init =		sctp_init,
+#ifdef VIMAGE
+	.pr_destroy =		sctp_finish,
+#endif
+	.pr_drain =		sctp_drain,
+	.pr_usrreqs =		&sctp_usrreqs
+},
+{
+	.pr_type =		SOCK_SEQPACKET,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_SCTP,
+	.pr_flags =		PR_WANTRCVD,
+	.pr_input =		sctp_input,
+	.pr_ctlinput =		sctp_ctlinput,
+	.pr_ctloutput =		sctp_ctloutput,
+	.pr_drain =		sctp_drain,
+	.pr_usrreqs =		&sctp_usrreqs
+},
+
+{ 
+	.pr_type =		SOCK_STREAM,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_SCTP,
+	.pr_flags =		PR_WANTRCVD,
+	.pr_input =		sctp_input,
+	.pr_ctlinput =		sctp_ctlinput,
+	.pr_ctloutput =		sctp_ctloutput,
+	.pr_drain =		sctp_drain,
+	.pr_usrreqs =		&sctp_usrreqs
+},
+#endif /* SCTP */
+{
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_RAW,
+	.pr_flags =		PR_ATOMIC|PR_ADDR,
+	.pr_input =		rip_input,
+	.pr_ctlinput =		rip_ctlinput,
+	.pr_ctloutput =		rip_ctloutput,
+	.pr_usrreqs =		&rip_usrreqs
+},
+{
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_ICMP,
+	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
+	.pr_input =		icmp_input,
+	.pr_ctloutput =		rip_ctloutput,
+	.pr_usrreqs =		&rip_usrreqs
+},
+{
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_IGMP,
+	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
+	.pr_input =		igmp_input,
+	.pr_ctloutput =		rip_ctloutput,
+	.pr_fasttimo =		igmp_fasttimo,
+	.pr_slowtimo =		igmp_slowtimo,
+	.pr_usrreqs =		&rip_usrreqs
+},
+{
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_RSVP,
+	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
+	.pr_input =		rsvp_input,
+	.pr_ctloutput =		rip_ctloutput,
+	.pr_usrreqs =		&rip_usrreqs
+},
+#ifdef IPSEC
+{
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_AH,
+	.pr_flags =		PR_ATOMIC|PR_ADDR,
+	.pr_input =		ah4_input,
+	.pr_ctlinput =		ah4_ctlinput,
+	.pr_usrreqs =		&nousrreqs
+},
+{
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_ESP,
+	.pr_flags =		PR_ATOMIC|PR_ADDR,
+	.pr_input =		esp4_input,
+	.pr_ctlinput =		esp4_ctlinput,
+	.pr_usrreqs =		&nousrreqs
+},
+{
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_IPCOMP,
+	.pr_flags =		PR_ATOMIC|PR_ADDR,
+	.pr_input =		ipcomp4_input,
+	.pr_usrreqs =		&nousrreqs
+},
+#endif /* IPSEC */
+{
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_IPV4,
+	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
+	.pr_input =		encap4_input,
+	.pr_ctloutput =		rip_ctloutput,
+	.pr_init =		encap_init,
+	.pr_usrreqs =		&rip_usrreqs
+},
+{
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_MOBILE,
+	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
+	.pr_input =		encap4_input,
+	.pr_ctloutput =		rip_ctloutput,
+	.pr_init =		encap_init,
+	.pr_usrreqs =		&rip_usrreqs
+},
+{
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_ETHERIP,
+	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
+	.pr_input =		encap4_input,
+	.pr_ctloutput =		rip_ctloutput,
+	.pr_init =		encap_init,
+	.pr_usrreqs =		&rip_usrreqs
+},
+{
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_GRE,
+	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
+	.pr_input =		encap4_input,
+	.pr_ctloutput =		rip_ctloutput,
+	.pr_init =		encap_init,
+	.pr_usrreqs =		&rip_usrreqs
+},
+# ifdef INET6
+{
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_IPV6,
+	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
+	.pr_input =		encap4_input,
+	.pr_ctloutput =		rip_ctloutput,
+	.pr_init =		encap_init,
+	.pr_usrreqs =		&rip_usrreqs
+},
+#endif
+{
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_PIM,
+	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
+	.pr_input =		encap4_input,
+	.pr_ctloutput =		rip_ctloutput,
+	.pr_usrreqs =		&rip_usrreqs
+},
+#ifdef DEV_PFSYNC
+{
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_PFSYNC,
+	.pr_flags =		PR_ATOMIC|PR_ADDR,
+	.pr_input =		pfsync_input,
+	.pr_ctloutput =		rip_ctloutput,
+	.pr_usrreqs =		&rip_usrreqs
+},
+#endif	/* DEV_PFSYNC */
+/* Spacer n-times for loadable protocols. */
+IPPROTOSPACER,
+IPPROTOSPACER,
+IPPROTOSPACER,
+IPPROTOSPACER,
+IPPROTOSPACER,
+IPPROTOSPACER,
+IPPROTOSPACER,
+IPPROTOSPACER,
+/* raw wildcard */
+{
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_flags =		PR_ATOMIC|PR_ADDR,
+	.pr_input =		rip_input,
+	.pr_ctloutput =		rip_ctloutput,
+	.pr_init =		rip_init,
+#ifdef VIMAGE
+	.pr_destroy =		rip_destroy,
+#endif
+	.pr_usrreqs =		&rip_usrreqs
+},
+};
+
+extern int in_inithead(void **, int);
+extern int in_detachhead(void **, int);
+
+struct domain inetdomain = {
+	.dom_family =		AF_INET,
+	.dom_name =		"internet",
+	.dom_protosw =		inetsw,
+	.dom_protoswNPROTOSW =	&inetsw[sizeof(inetsw)/sizeof(inetsw[0])],
+#ifdef RADIX_MPATH
+	.dom_rtattach =		rn4_mpath_inithead,
+#else
+	.dom_rtattach =		in_inithead,
+#endif
+#ifdef VIMAGE
+	.dom_rtdetach =		in_detachhead,
+#endif
+	.dom_rtoffset =		32,
+	.dom_maxrtkey =		sizeof(struct sockaddr_in),
+	.dom_ifattach =		in_domifattach,
+	.dom_ifdetach =		in_domifdetach
+};
+
+VNET_DOMAIN_SET(inet);
+
+SYSCTL_NODE(_net,      PF_INET,		inet,	CTLFLAG_RW, 0,
+	"Internet Family");
+
+SYSCTL_NODE(_net_inet, IPPROTO_IP,	ip,	CTLFLAG_RW, 0,	"IP");
+SYSCTL_NODE(_net_inet, IPPROTO_ICMP,	icmp,	CTLFLAG_RW, 0,	"ICMP");
+SYSCTL_NODE(_net_inet, IPPROTO_UDP,	udp,	CTLFLAG_RW, 0,	"UDP");
+SYSCTL_NODE(_net_inet, IPPROTO_TCP,	tcp,	CTLFLAG_RW, 0,	"TCP");
+#ifdef SCTP
+SYSCTL_NODE(_net_inet, IPPROTO_SCTP,	sctp,	CTLFLAG_RW, 0,	"SCTP");
+#endif
+SYSCTL_NODE(_net_inet, IPPROTO_IGMP,	igmp,	CTLFLAG_RW, 0,	"IGMP");
+#ifdef IPSEC
+/* XXX no protocol # to use, pick something "reserved" */
+SYSCTL_NODE(_net_inet, 253,		ipsec,	CTLFLAG_RW, 0,	"IPSEC");
+SYSCTL_NODE(_net_inet, IPPROTO_AH,	ah,	CTLFLAG_RW, 0,	"AH");
+SYSCTL_NODE(_net_inet, IPPROTO_ESP,	esp,	CTLFLAG_RW, 0,	"ESP");
+SYSCTL_NODE(_net_inet, IPPROTO_IPCOMP,	ipcomp,	CTLFLAG_RW, 0,	"IPCOMP");
+SYSCTL_NODE(_net_inet, IPPROTO_IPIP,	ipip,	CTLFLAG_RW, 0,	"IPIP");
+#endif /* IPSEC */
+SYSCTL_NODE(_net_inet, IPPROTO_RAW,	raw,	CTLFLAG_RW, 0,	"RAW");
+#ifdef DEV_PFSYNC
+SYSCTL_NODE(_net_inet, IPPROTO_PFSYNC,	pfsync,	CTLFLAG_RW, 0,	"PFSYNC");
+#endif
diff --git a/freebsd/sys/netinet/in_rmx.c b/freebsd/sys/netinet/in_rmx.c
new file mode 100644
index 00000000..25f99ea0
--- /dev/null
+++ b/freebsd/sys/netinet/in_rmx.c
@@ -0,0 +1,516 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright 1994, 1995 Massachusetts Institute of Technology
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that both the above copyright notice and this
+ * permission notice appear in all copies, that both the above
+ * copyright notice and this permission notice appear in all
+ * supporting documentation, and that the name of M.I.T. not be used
+ * in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission.  M.I.T. makes
+ * no representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied
+ * warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
+ * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
+ * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This code does two things necessary for the enhanced TCP metrics to
+ * function in a useful manner:
+ *  1) It marks all non-host routes as `cloning', thus ensuring that
+ *     every actual reference to such a route actually gets turned
+ *     into a reference to a host route to the specific destination
+ *     requested.
+ *  2) When such routes lose all their references, it arranges for them
+ *     to be deleted in some random collection of circumstances, so that
+ *     a large quantity of stale routing data is not kept in kernel memory
+ *     indefinitely.  See in_rtqtimo() below for the exact mechanism.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/sys/callout.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip_var.h>
+
+extern int	in_inithead(void **head, int off);
+#ifdef VIMAGE
+extern int	in_detachhead(void **head, int off);
+#endif
+
+#define RTPRF_OURS		RTF_PROTO3	/* set on routes we manage */
+
+/*
+ * Do what we need to do when inserting a route.
+ */
+static struct radix_node *
+in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
+    struct radix_node *treenodes)
+{
+	struct rtentry *rt = (struct rtentry *)treenodes;
+	struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt);
+
+	RADIX_NODE_HEAD_WLOCK_ASSERT(head);
+	/*
+	 * A little bit of help for both IP output and input:
+	 *   For host routes, we make sure that RTF_BROADCAST
+	 *   is set for anything that looks like a broadcast address.
+	 *   This way, we can avoid an expensive call to in_broadcast()
+	 *   in ip_output() most of the time (because the route passed
+	 *   to ip_output() is almost always a host route).
+	 *
+	 *   We also do the same for local addresses, with the thought
+	 *   that this might one day be used to speed up ip_input().
+	 *
+	 * We also mark routes to multicast addresses as such, because
+	 * it's easy to do and might be useful (but this is much more
+	 * dubious since it's so easy to inspect the address).
+	 */
+	if (rt->rt_flags & RTF_HOST) {
+		if (in_broadcast(sin->sin_addr, rt->rt_ifp)) {
+			rt->rt_flags |= RTF_BROADCAST;
+		} else if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr ==
+		    sin->sin_addr.s_addr) {
+			rt->rt_flags |= RTF_LOCAL;
+		}
+	}
+	if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+		rt->rt_flags |= RTF_MULTICAST;
+
+	if (!rt->rt_rmx.rmx_mtu && rt->rt_ifp)
+		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
+
+	return (rn_addroute(v_arg, n_arg, head, treenodes));
+}
+
+/*
+ * This code is the inverse of in_clsroute: on first reference, if we
+ * were managing the route, stop doing so and set the expiration timer
+ * back off again.
+ */
+static struct radix_node *
+in_matroute(void *v_arg, struct radix_node_head *head)
+{
+	struct radix_node *rn = rn_match(v_arg, head);
+	struct rtentry *rt = (struct rtentry *)rn;
+
+	if (rt) {
+		RT_LOCK(rt);
+		if (rt->rt_flags & RTPRF_OURS) {
+			rt->rt_flags &= ~RTPRF_OURS;
+			rt->rt_rmx.rmx_expire = 0;
+		}
+		RT_UNLOCK(rt);
+	}
+	return rn;
+}
+
+static VNET_DEFINE(int, rtq_reallyold) = 60*60; /* one hour is "really old" */
+#define	V_rtq_reallyold		VNET(rtq_reallyold)
+SYSCTL_VNET_INT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire, CTLFLAG_RW,
+    &VNET_NAME(rtq_reallyold), 0,
+    "Default expiration time on dynamically learned routes");
+
+/* never automatically crank down to less */
+static VNET_DEFINE(int, rtq_minreallyold) = 10;
+#define	V_rtq_minreallyold	VNET(rtq_minreallyold)
+SYSCTL_VNET_INT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW,
+    &VNET_NAME(rtq_minreallyold), 0,
+    "Minimum time to attempt to hold onto dynamically learned routes");
+
+/* 128 cached routes is "too many" */
+static VNET_DEFINE(int, rtq_toomany) = 128;
+#define	V_rtq_toomany		VNET(rtq_toomany)
+SYSCTL_VNET_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW,
+    &VNET_NAME(rtq_toomany), 0,
+    "Upper limit on dynamically learned routes");
+
+/*
+ * On last reference drop, mark the route as belong to us so that it can be
+ * timed out.
+ */
+static void
+in_clsroute(struct radix_node *rn, struct radix_node_head *head)
+{
+	struct rtentry *rt = (struct rtentry *)rn;
+
+	RT_LOCK_ASSERT(rt);
+
+	if (!(rt->rt_flags & RTF_UP))
+		return;			/* prophylactic measures */
+
+	if (rt->rt_flags & RTPRF_OURS)
+		return;
+
+	if (!(rt->rt_flags & RTF_DYNAMIC))
+		return;
+
+	/*
+	 * If rtq_reallyold is 0, just delete the route without
+	 * waiting for a timeout cycle to kill it.
+	 */
+	if (V_rtq_reallyold != 0) {
+		rt->rt_flags |= RTPRF_OURS;
+		rt->rt_rmx.rmx_expire = time_uptime + V_rtq_reallyold;
+	} else {
+		rtexpunge(rt);
+	}
+}
+
+struct rtqk_arg {
+	struct radix_node_head *rnh;
+	int draining;
+	int killed;
+	int found;
+	int updating;
+	time_t nextstop;
+};
+
+/*
+ * Get rid of old routes.  When draining, this deletes everything, even when
+ * the timeout is not expired yet.  When updating, this makes sure that
+ * nothing has a timeout longer than the current value of rtq_reallyold.
+ */
+static int
+in_rtqkill(struct radix_node *rn, void *rock)
+{
+	struct rtqk_arg *ap = rock;
+	struct rtentry *rt = (struct rtentry *)rn;
+	int err;
+
+	RADIX_NODE_HEAD_WLOCK_ASSERT(ap->rnh);
+
+	if (rt->rt_flags & RTPRF_OURS) {
+		ap->found++;
+
+		if (ap->draining || rt->rt_rmx.rmx_expire <= time_uptime) {
+			if (rt->rt_refcnt > 0)
+				panic("rtqkill route really not free");
+
+			err = in_rtrequest(RTM_DELETE,
+					(struct sockaddr *)rt_key(rt),
+					rt->rt_gateway, rt_mask(rt),
+					rt->rt_flags | RTF_RNH_LOCKED, 0,
+					rt->rt_fibnum);
+			if (err) {
+				log(LOG_WARNING, "in_rtqkill: error %d\n", err);
+			} else {
+				ap->killed++;
+			}
+		} else {
+			if (ap->updating &&
+			    (rt->rt_rmx.rmx_expire - time_uptime >
+			     V_rtq_reallyold)) {
+				rt->rt_rmx.rmx_expire =
+				    time_uptime + V_rtq_reallyold;
+			}
+			ap->nextstop = lmin(ap->nextstop,
+					    rt->rt_rmx.rmx_expire);
+		}
+	}
+
+	return 0;
+}
+
+#define RTQ_TIMEOUT	60*10	/* run no less than once every ten minutes */
+static VNET_DEFINE(int, rtq_timeout) = RTQ_TIMEOUT;
+static VNET_DEFINE(struct callout, rtq_timer);
+
+#define	V_rtq_timeout		VNET(rtq_timeout)
+#define	V_rtq_timer		VNET(rtq_timer)
+
+static void in_rtqtimo_one(void *rock);
+
+static void
+in_rtqtimo(void *rock)
+{
+	CURVNET_SET((struct vnet *) rock);
+	int fibnum;
+	void *newrock;
+	struct timeval atv;
+
+	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
+		newrock = rt_tables_get_rnh(fibnum, AF_INET);
+		if (newrock != NULL)
+			in_rtqtimo_one(newrock);
+	}
+	atv.tv_usec = 0;
+	atv.tv_sec = V_rtq_timeout;
+	callout_reset(&V_rtq_timer, tvtohz(&atv), in_rtqtimo, rock);
+	CURVNET_RESTORE();
+}
+
+static void
+in_rtqtimo_one(void *rock)
+{
+	struct radix_node_head *rnh = rock;
+	struct rtqk_arg arg;
+	static time_t last_adjusted_timeout = 0;
+
+	arg.found = arg.killed = 0;
+	arg.rnh = rnh;
+	arg.nextstop = time_uptime + V_rtq_timeout;
+	arg.draining = arg.updating = 0;
+	RADIX_NODE_HEAD_LOCK(rnh);
+	rnh->rnh_walktree(rnh, in_rtqkill, &arg);
+	RADIX_NODE_HEAD_UNLOCK(rnh);
+
+	/*
+	 * Attempt to be somewhat dynamic about this:
+	 * If there are ``too many'' routes sitting around taking up space,
+	 * then crank down the timeout, and see if we can't make some more
+	 * go away.  However, we make sure that we will never adjust more
+	 * than once in rtq_timeout seconds, to keep from cranking down too
+	 * hard.
+	 */
+	if ((arg.found - arg.killed > V_rtq_toomany) &&
+	    (time_uptime - last_adjusted_timeout >= V_rtq_timeout) &&
+	    V_rtq_reallyold > V_rtq_minreallyold) {
+		V_rtq_reallyold = 2 * V_rtq_reallyold / 3;
+		if (V_rtq_reallyold < V_rtq_minreallyold) {
+			V_rtq_reallyold = V_rtq_minreallyold;
+		}
+
+		last_adjusted_timeout = time_uptime;
+#ifdef DIAGNOSTIC
+		log(LOG_DEBUG, "in_rtqtimo: adjusted rtq_reallyold to %d\n",
+		    V_rtq_reallyold);
+#endif
+		arg.found = arg.killed = 0;
+		arg.updating = 1;
+		RADIX_NODE_HEAD_LOCK(rnh);
+		rnh->rnh_walktree(rnh, in_rtqkill, &arg);
+		RADIX_NODE_HEAD_UNLOCK(rnh);
+	}
+
+}
+
+void
+in_rtqdrain(void)
+{
+	VNET_ITERATOR_DECL(vnet_iter);
+	struct radix_node_head *rnh;
+	struct rtqk_arg arg;
+	int 	fibnum;
+
+	VNET_LIST_RLOCK_NOSLEEP();
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter);
+
+		for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) {
+			rnh = rt_tables_get_rnh(fibnum, AF_INET);
+			arg.found = arg.killed = 0;
+			arg.rnh = rnh;
+			arg.nextstop = 0;
+			arg.draining = 1;
+			arg.updating = 0;
+			RADIX_NODE_HEAD_LOCK(rnh);
+			rnh->rnh_walktree(rnh, in_rtqkill, &arg);
+			RADIX_NODE_HEAD_UNLOCK(rnh);
+		}
+		CURVNET_RESTORE();
+	}
+	VNET_LIST_RUNLOCK_NOSLEEP();
+}
+
+static int _in_rt_was_here;
+/*
+ * Initialize our routing tree.
+ */
+int
+in_inithead(void **head, int off)
+{
+	struct radix_node_head *rnh;
+
+	/* XXX MRT
+	 * This can be called from vfs_export.c too in which case 'off'
+	 * will be 0. We know the correct value so just use that and
+	 * return directly if it was 0.
+	 * This is a hack that replaces an even worse hack on a bad hack
+	 * on a bad design. After RELENG_7 this should be fixed but that
+	 * will change the ABI, so for now do it this way.
+	 */
+	if (!rn_inithead(head, 32))
+		return 0;
+
+	if (off == 0)		/* XXX MRT  see above */
+		return 1;	/* only do the rest for a real routing table */
+
+	rnh = *head;
+	rnh->rnh_addaddr = in_addroute;
+	rnh->rnh_matchaddr = in_matroute;
+	rnh->rnh_close = in_clsroute;
+	if (_in_rt_was_here == 0 ) {
+		callout_init(&V_rtq_timer, CALLOUT_MPSAFE);
+		callout_reset(&V_rtq_timer, 1, in_rtqtimo, curvnet);
+		_in_rt_was_here = 1;
+	}
+	return 1;
+}
+
+#ifdef VIMAGE
+int
+in_detachhead(void **head, int off)
+{
+
+	callout_drain(&V_rtq_timer);
+	return (1);
+}
+#endif
+
+/*
+ * This zaps old routes when the interface goes down or interface
+ * address is deleted.  In the latter case, it deletes static routes
+ * that point to this address.  If we don't do this, we may end up
+ * using the old address in the future.  The ones we always want to
+ * get rid of are things like ARP entries, since the user might down
+ * the interface, walk over to a completely different network, and
+ * plug back in.
+ */
+struct in_ifadown_arg {
+	struct ifaddr *ifa;
+	int del;
+};
+
+static int
+in_ifadownkill(struct radix_node *rn, void *xap)
+{
+	struct in_ifadown_arg *ap = xap;
+	struct rtentry *rt = (struct rtentry *)rn;
+
+	RT_LOCK(rt);
+	if (rt->rt_ifa == ap->ifa &&
+	    (ap->del || !(rt->rt_flags & RTF_STATIC))) {
+		/*
+		 * Aquire a reference so that it can later be freed
+		 * as the refcount would be 0 here in case of at least
+		 * ap->del.
+		 */
+		RT_ADDREF(rt);
+		/*
+		 * Disconnect it from the tree and permit protocols
+		 * to cleanup.
+		 */
+		rtexpunge(rt);
+		/*
+		 * At this point it is an rttrash node, and in case
+		 * the above is the only reference we must free it.
+		 * If we do not noone will have a pointer and the
+		 * rtentry will be leaked forever.
+		 * In case someone else holds a reference, we are
+		 * fine as we only decrement the refcount. In that
+		 * case if the other entity calls RT_REMREF, we
+		 * will still be leaking but at least we tried.
+		 */
+		RTFREE_LOCKED(rt);
+		return (0);
+	}
+	RT_UNLOCK(rt);
+	return 0;
+}
+
+int
+in_ifadown(struct ifaddr *ifa, int delete)
+{
+	struct in_ifadown_arg arg;
+	struct radix_node_head *rnh;
+	int	fibnum;
+
+	if (ifa->ifa_addr->sa_family != AF_INET)
+		return 1;
+
+	for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) {
+		rnh = rt_tables_get_rnh(fibnum, AF_INET);
+		arg.ifa = ifa;
+		arg.del = delete;
+		RADIX_NODE_HEAD_LOCK(rnh);
+		rnh->rnh_walktree(rnh, in_ifadownkill, &arg);
+		RADIX_NODE_HEAD_UNLOCK(rnh);
+		ifa->ifa_flags &= ~IFA_ROUTE;		/* XXXlocking? */
+	}
+	return 0;
+}
+
+/*
+ * inet versions of rt functions. These have fib extensions and 
+ * for now will just reference the _fib variants.
+ * eventually this order will be reversed,
+ */
+void
+in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum)
+{
+	rtalloc_ign_fib(ro, ignflags, fibnum);
+}
+
+int
+in_rtrequest( int req,
+	struct sockaddr *dst,
+	struct sockaddr *gateway,
+	struct sockaddr *netmask,
+	int flags,
+	struct rtentry **ret_nrt,
+	u_int fibnum)
+{
+	return (rtrequest_fib(req, dst, gateway, netmask, 
+	    flags, ret_nrt, fibnum));
+}
+
+struct rtentry *
+in_rtalloc1(struct sockaddr *dst, int report, u_long ignflags, u_int fibnum)
+{
+	return (rtalloc1_fib(dst, report, ignflags, fibnum));
+}
+
+void
+in_rtredirect(struct sockaddr *dst,
+	struct sockaddr *gateway,
+	struct sockaddr *netmask,
+	int flags,
+	struct sockaddr *src,
+	u_int fibnum)
+{
+	rtredirect_fib(dst, gateway, netmask, flags, src, fibnum);
+}
+ 
+void
+in_rtalloc(struct route *ro, u_int fibnum)
+{
+	rtalloc_ign_fib(ro, 0UL, fibnum);
+}
+
+#if 0
+int	 in_rt_getifa(struct rt_addrinfo *, u_int fibnum);
+int	 in_rtioctl(u_long, caddr_t, u_int);
+int	 in_rtrequest1(int, struct rt_addrinfo *, struct rtentry **, u_int);
+#endif
+
+
diff --git a/freebsd/sys/netinet/in_systm.h b/freebsd/sys/netinet/in_systm.h
new file mode 100644
index 00000000..68bb190e
--- /dev/null
+++ b/freebsd/sys/netinet/in_systm.h
@@ -0,0 +1,2 @@
+#include <freebsd/bsd.h>
+#include <freebsd/netinet/in_systm.h>
diff --git a/freebsd/sys/netinet/in_var.h b/freebsd/sys/netinet/in_var.h
new file mode 100644
index 00000000..c921ad31
--- /dev/null
+++ b/freebsd/sys/netinet/in_var.h
@@ -0,0 +1,475 @@
+/*-
+ * Copyright (c) 1985, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in_var.h	8.2 (Berkeley) 1/9/95
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IN_VAR_HH_
+#define _NETINET_IN_VAR_HH_
+
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/fnv_hash.h>
+#include <freebsd/sys/tree.h>
+
+struct igmp_ifinfo;
+struct in_multi;
+struct lltable;
+
+/*
+ * IPv4 per-interface state.
+ */
+struct in_ifinfo {
+	struct lltable		*ii_llt;	/* ARP state */
+	struct igmp_ifinfo	*ii_igmp;	/* IGMP state */
+	struct in_multi		*ii_allhosts;	/* 224.0.0.1 membership */
+};
+
+/*
+ * Interface address, Internet version.  One of these structures
+ * is allocated for each Internet address on an interface.
+ * The ifaddr structure contains the protocol-independent part
+ * of the structure and is assumed to be first.
+ */
+struct in_ifaddr {
+	struct	ifaddr ia_ifa;		/* protocol-independent info */
+#define	ia_ifp		ia_ifa.ifa_ifp
+#define ia_flags	ia_ifa.ifa_flags
+					/* ia_{,sub}net{,mask} in host order */
+	u_long	ia_net;			/* network number of interface */
+	u_long	ia_netmask;		/* mask of net part */
+	u_long	ia_subnet;		/* subnet number, including net */
+	u_long	ia_subnetmask;		/* mask of subnet part */
+	struct	in_addr ia_netbroadcast; /* to recognize net broadcasts */
+	LIST_ENTRY(in_ifaddr) ia_hash;	/* entry in bucket of inet addresses */
+	TAILQ_ENTRY(in_ifaddr) ia_link;	/* list of internet addresses */
+	struct	sockaddr_in ia_addr;	/* reserve space for interface name */
+	struct	sockaddr_in ia_dstaddr; /* reserve space for broadcast addr */
+#define	ia_broadaddr	ia_dstaddr
+	struct	sockaddr_in ia_sockmask; /* reserve space for general netmask */
+};
+
+struct	in_aliasreq {
+	char	ifra_name[IFNAMSIZ];		/* if name, e.g. "en0" */
+	struct	sockaddr_in ifra_addr;
+	struct	sockaddr_in ifra_broadaddr;
+#define ifra_dstaddr ifra_broadaddr
+	struct	sockaddr_in ifra_mask;
+};
+/*
+ * Given a pointer to an in_ifaddr (ifaddr),
+ * return a pointer to the addr as a sockaddr_in.
+ */
+#define IA_SIN(ia)    (&(((struct in_ifaddr *)(ia))->ia_addr))
+#define IA_DSTSIN(ia) (&(((struct in_ifaddr *)(ia))->ia_dstaddr))
+
+#define IN_LNAOF(in, ifa) \
+	((ntohl((in).s_addr) & ~((struct in_ifaddr *)(ifa)->ia_subnetmask))
+
+
+#ifdef	_KERNEL
+extern	u_char	inetctlerrmap[];
+
+#define LLTABLE(ifp)	\
+	((struct in_ifinfo *)(ifp)->if_afdata[AF_INET])->ii_llt
+/*
+ * Hash table for IP addresses.
+ */
+TAILQ_HEAD(in_ifaddrhead, in_ifaddr);
+LIST_HEAD(in_ifaddrhashhead, in_ifaddr);
+
+VNET_DECLARE(struct in_ifaddrhashhead *, in_ifaddrhashtbl);
+VNET_DECLARE(struct in_ifaddrhead, in_ifaddrhead);
+VNET_DECLARE(u_long, in_ifaddrhmask);		/* mask for hash table */
+
+#define	V_in_ifaddrhashtbl	VNET(in_ifaddrhashtbl)
+#define	V_in_ifaddrhead		VNET(in_ifaddrhead)
+#define	V_in_ifaddrhmask	VNET(in_ifaddrhmask)
+
+#define INADDR_NHASH_LOG2       9
+#define INADDR_NHASH		(1 << INADDR_NHASH_LOG2)
+#define INADDR_HASHVAL(x)	fnv_32_buf((&(x)), sizeof(x), FNV1_32_INIT)
+#define INADDR_HASH(x) \
+	(&V_in_ifaddrhashtbl[INADDR_HASHVAL(x) & V_in_ifaddrhmask])
+
+extern	struct rwlock in_ifaddr_lock;
+
+#define	IN_IFADDR_LOCK_ASSERT()	rw_assert(&in_ifaddr_lock, RA_LOCKED)
+#define	IN_IFADDR_RLOCK()	rw_rlock(&in_ifaddr_lock)
+#define	IN_IFADDR_RLOCK_ASSERT()	rw_assert(&in_ifaddr_lock, RA_RLOCKED)
+#define	IN_IFADDR_RUNLOCK()	rw_runlock(&in_ifaddr_lock)
+#define	IN_IFADDR_WLOCK()	rw_wlock(&in_ifaddr_lock)
+#define	IN_IFADDR_WLOCK_ASSERT()	rw_assert(&in_ifaddr_lock, RA_WLOCKED)
+#define	IN_IFADDR_WUNLOCK()	rw_wunlock(&in_ifaddr_lock)
+
+/*
+ * Macro for finding the internet address structure (in_ifaddr)
+ * corresponding to one of our IP addresses (in_addr).
+ */
+#define INADDR_TO_IFADDR(addr, ia) \
+	/* struct in_addr addr; */ \
+	/* struct in_ifaddr *ia; */ \
+do { \
+\
+	LIST_FOREACH(ia, INADDR_HASH((addr).s_addr), ia_hash) \
+		if (IA_SIN(ia)->sin_addr.s_addr == (addr).s_addr) \
+			break; \
+} while (0)
+
+/*
+ * Macro for finding the interface (ifnet structure) corresponding to one
+ * of our IP addresses.
+ */
+#define INADDR_TO_IFP(addr, ifp) \
+	/* struct in_addr addr; */ \
+	/* struct ifnet *ifp; */ \
+{ \
+	struct in_ifaddr *ia; \
+\
+	INADDR_TO_IFADDR(addr, ia); \
+	(ifp) = (ia == NULL) ? NULL : ia->ia_ifp; \
+}
+
+/*
+ * Macro for finding the internet address structure (in_ifaddr) corresponding
+ * to a given interface (ifnet structure).
+ */
+#define IFP_TO_IA(ifp, ia)						\
+	/* struct ifnet *ifp; */					\
+	/* struct in_ifaddr *ia; */					\
+{									\
+	for ((ia) = TAILQ_FIRST(&V_in_ifaddrhead);			\
+	    (ia) != NULL && (ia)->ia_ifp != (ifp);			\
+	    (ia) = TAILQ_NEXT((ia), ia_link))				\
+		continue;						\
+	if ((ia) != NULL)						\
+		ifa_ref(&(ia)->ia_ifa);					\
+}
+#endif
+
+/*
+ * IP datagram reassembly.
+ */
+#define	IPREASS_NHASH_LOG2	6
+#define	IPREASS_NHASH		(1 << IPREASS_NHASH_LOG2)
+#define	IPREASS_HMASK		(IPREASS_NHASH - 1)
+#define	IPREASS_HASH(x,y) \
+	(((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
+
+/*
+ * Legacy IPv4 IGMP per-link structure.
+ */
+struct router_info {
+	struct ifnet *rti_ifp;
+	int    rti_type; /* type of router which is querier on this interface */
+	int    rti_time; /* # of slow timeouts since last old query */
+	SLIST_ENTRY(router_info) rti_list;
+};
+
+/*
+ * Per-interface IGMP router version information.
+ */
+struct igmp_ifinfo {
+	LIST_ENTRY(igmp_ifinfo) igi_link;
+	struct ifnet *igi_ifp;	/* interface this instance belongs to */
+	uint32_t igi_version;	/* IGMPv3 Host Compatibility Mode */
+	uint32_t igi_v1_timer;	/* IGMPv1 Querier Present timer (s) */
+	uint32_t igi_v2_timer;	/* IGMPv2 Querier Present timer (s) */
+	uint32_t igi_v3_timer;	/* IGMPv3 General Query (interface) timer (s)*/
+	uint32_t igi_flags;	/* IGMP per-interface flags */
+	uint32_t igi_rv;	/* IGMPv3 Robustness Variable */
+	uint32_t igi_qi;	/* IGMPv3 Query Interval (s) */
+	uint32_t igi_qri;	/* IGMPv3 Query Response Interval (s) */
+	uint32_t igi_uri;	/* IGMPv3 Unsolicited Report Interval (s) */
+	SLIST_HEAD(,in_multi)	igi_relinmhead; /* released groups */
+	struct ifqueue	 igi_gq;	/* queue of general query responses */
+};
+
+#define IGIF_SILENT	0x00000001	/* Do not use IGMP on this ifp */
+#define IGIF_LOOPBACK	0x00000002	/* Send IGMP reports to loopback */
+
+/*
+ * IPv4 multicast IGMP-layer source entry.
+ */
+struct ip_msource {
+	RB_ENTRY(ip_msource)	ims_link;	/* RB tree links */
+	in_addr_t		ims_haddr;	/* host byte order */
+	struct ims_st {
+		uint16_t	ex;		/* # of exclusive members */
+		uint16_t	in;		/* # of inclusive members */
+	}			ims_st[2];	/* state at t0, t1 */
+	uint8_t			ims_stp;	/* pending query */
+};
+
+/*
+ * IPv4 multicast PCB-layer source entry.
+ */
+struct in_msource {
+	RB_ENTRY(ip_msource)	ims_link;	/* RB tree links */
+	in_addr_t		ims_haddr;	/* host byte order */
+	uint8_t			imsl_st[2];	/* state before/at commit */
+};
+
+RB_HEAD(ip_msource_tree, ip_msource);	/* define struct ip_msource_tree */
+
+static __inline int
+ip_msource_cmp(const struct ip_msource *a, const struct ip_msource *b)
+{
+
+	if (a->ims_haddr < b->ims_haddr)
+		return (-1);
+	if (a->ims_haddr == b->ims_haddr)
+		return (0);
+	return (1);
+}
+RB_PROTOTYPE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp);
+
+/*
+ * IPv4 multicast PCB-layer group filter descriptor.
+ */
+struct in_mfilter {
+	struct ip_msource_tree	imf_sources; /* source list for (S,G) */
+	u_long			imf_nsrc;    /* # of source entries */
+	uint8_t			imf_st[2];   /* state before/at commit */
+};
+
+/*
+ * IPv4 group descriptor.
+ *
+ * For every entry on an ifnet's if_multiaddrs list which represents
+ * an IP multicast group, there is one of these structures.
+ *
+ * If any source filters are present, then a node will exist in the RB-tree
+ * to permit fast lookup by source whenever an operation takes place.
+ * This permits pre-order traversal when we issue reports.
+ * Source filter trees are kept separately from the socket layer to
+ * greatly simplify locking.
+ *
+ * When IGMPv3 is active, inm_timer is the response to group query timer.
+ * The state-change timer inm_sctimer is separate; whenever state changes
+ * for the group the state change record is generated and transmitted,
+ * and kept if retransmissions are necessary.
+ *
+ * FUTURE: inm_link is now only used when groups are being purged
+ * on a detaching ifnet. It could be demoted to a SLIST_ENTRY, but
+ * because it is at the very start of the struct, we can't do this
+ * w/o breaking the ABI for ifmcstat.
+ */
+struct in_multi {
+	LIST_ENTRY(in_multi) inm_link;	/* to-be-released by in_ifdetach */
+	struct	in_addr inm_addr;	/* IP multicast address, convenience */
+	struct	ifnet *inm_ifp;		/* back pointer to ifnet */
+	struct	ifmultiaddr *inm_ifma;	/* back pointer to ifmultiaddr */
+	u_int	inm_timer;		/* IGMPv1/v2 group / v3 query timer */
+	u_int	inm_state;		/* state of the membership */
+	void	*inm_rti;		/* unused, legacy field */
+	u_int	inm_refcount;		/* reference count */
+
+	/* New fields for IGMPv3 follow. */
+	struct igmp_ifinfo	*inm_igi;	/* IGMP info */
+	SLIST_ENTRY(in_multi)	 inm_nrele;	/* to-be-released by IGMP */
+	struct ip_msource_tree	 inm_srcs;	/* tree of sources */
+	u_long			 inm_nsrc;	/* # of tree entries */
+
+	struct ifqueue		 inm_scq;	/* queue of pending
+						 * state-change packets */
+	struct timeval		 inm_lastgsrtv;	/* Time of last G-S-R query */
+	uint16_t		 inm_sctimer;	/* state-change timer */
+	uint16_t		 inm_scrv;	/* state-change rexmit count */
+
+	/*
+	 * SSM state counters which track state at T0 (the time the last
+	 * state-change report's RV timer went to zero) and T1
+	 * (time of pending report, i.e. now).
+	 * Used for computing IGMPv3 state-change reports. Several refcounts
+	 * are maintained here to optimize for common use-cases.
+	 */
+	struct inm_st {
+		uint16_t	iss_fmode;	/* IGMP filter mode */
+		uint16_t	iss_asm;	/* # of ASM listeners */
+		uint16_t	iss_ex;		/* # of exclusive members */
+		uint16_t	iss_in;		/* # of inclusive members */
+		uint16_t	iss_rec;	/* # of recorded sources */
+	}			inm_st[2];	/* state at t0, t1 */
+};
+
+/*
+ * Helper function to derive the filter mode on a source entry
+ * from its internal counters. Predicates are:
+ *  A source is only excluded if all listeners exclude it.
+ *  A source is only included if no listeners exclude it,
+ *  and at least one listener includes it.
+ * May be used by ifmcstat(8).
+ */
+static __inline uint8_t
+ims_get_mode(const struct in_multi *inm, const struct ip_msource *ims,
+    uint8_t t)
+{
+
+	t = !!t;
+	if (inm->inm_st[t].iss_ex > 0 &&
+	    inm->inm_st[t].iss_ex == ims->ims_st[t].ex)
+		return (MCAST_EXCLUDE);
+	else if (ims->ims_st[t].in > 0 && ims->ims_st[t].ex == 0)
+		return (MCAST_INCLUDE);
+	return (MCAST_UNDEFINED);
+}
+
+#ifdef _KERNEL
+
+#ifdef SYSCTL_DECL
+SYSCTL_DECL(_net_inet);
+SYSCTL_DECL(_net_inet_ip);
+SYSCTL_DECL(_net_inet_raw);
+#endif
+
+/*
+ * Lock macros for IPv4 layer multicast address lists.  IPv4 lock goes
+ * before link layer multicast locks in the lock order.  In most cases,
+ * consumers of IN_*_MULTI() macros should acquire the locks before
+ * calling them; users of the in_{add,del}multi() functions should not.
+ */
+extern struct mtx in_multi_mtx;
+#define	IN_MULTI_LOCK()		mtx_lock(&in_multi_mtx)
+#define	IN_MULTI_UNLOCK()	mtx_unlock(&in_multi_mtx)
+#define	IN_MULTI_LOCK_ASSERT()	mtx_assert(&in_multi_mtx, MA_OWNED)
+#define	IN_MULTI_UNLOCK_ASSERT() mtx_assert(&in_multi_mtx, MA_NOTOWNED)
+
+/*
+ * Function for looking up an in_multi record for an IPv4 multicast address
+ * on a given interface. ifp must be valid. If no record found, return NULL.
+ * The IN_MULTI_LOCK and IF_ADDR_LOCK on ifp must be held.
+ */
+static __inline struct in_multi *
+inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina)
+{
+	struct ifmultiaddr *ifma;
+	struct in_multi *inm;
+
+	IN_MULTI_LOCK_ASSERT();
+	IF_ADDR_LOCK_ASSERT(ifp);
+
+	inm = NULL;
+	TAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) {
+		if (ifma->ifma_addr->sa_family == AF_INET) {
+			inm = (struct in_multi *)ifma->ifma_protospec;
+			if (inm->inm_addr.s_addr == ina.s_addr)
+				break;
+			inm = NULL;
+		}
+	}
+	return (inm);
+}
+
+/*
+ * Wrapper for inm_lookup_locked().
+ * The IF_ADDR_LOCK will be taken on ifp and released on return.
+ */
+static __inline struct in_multi *
+inm_lookup(struct ifnet *ifp, const struct in_addr ina)
+{
+	struct in_multi *inm;
+
+	IN_MULTI_LOCK_ASSERT();
+	IF_ADDR_LOCK(ifp);
+	inm = inm_lookup_locked(ifp, ina);
+	IF_ADDR_UNLOCK(ifp);
+
+	return (inm);
+}
+
+/* Acquire an in_multi record. */
+static __inline void
+inm_acquire_locked(struct in_multi *inm)
+{
+
+	IN_MULTI_LOCK_ASSERT();
+	++inm->inm_refcount;
+}
+
+/*
+ * Return values for imo_multi_filter().
+ */
+#define MCAST_PASS		0	/* Pass */
+#define MCAST_NOTGMEMBER	1	/* This host not a member of group */
+#define MCAST_NOTSMEMBER	2	/* This host excluded source */
+#define MCAST_MUTED		3	/* [deprecated] */
+
+struct	rtentry;
+struct	route;
+struct	ip_moptions;
+
+int	imo_multi_filter(const struct ip_moptions *, const struct ifnet *,
+	    const struct sockaddr *, const struct sockaddr *);
+void	inm_commit(struct in_multi *);
+void	inm_clear_recorded(struct in_multi *);
+void	inm_print(const struct in_multi *);
+int	inm_record_source(struct in_multi *inm, const in_addr_t);
+void	inm_release(struct in_multi *);
+void	inm_release_locked(struct in_multi *);
+struct	in_multi *
+	in_addmulti(struct in_addr *, struct ifnet *);
+void	in_delmulti(struct in_multi *);
+int	in_joingroup(struct ifnet *, const struct in_addr *,
+	    /*const*/ struct in_mfilter *, struct in_multi **);
+int	in_joingroup_locked(struct ifnet *, const struct in_addr *,
+	    /*const*/ struct in_mfilter *, struct in_multi **);
+int	in_leavegroup(struct in_multi *, /*const*/ struct in_mfilter *);
+int	in_leavegroup_locked(struct in_multi *,
+	    /*const*/ struct in_mfilter *);
+int	in_control(struct socket *, u_long, caddr_t, struct ifnet *,
+	    struct thread *);
+void	in_rtqdrain(void);
+void	ip_input(struct mbuf *);
+int	in_ifadown(struct ifaddr *ifa, int);
+void	in_ifscrub(struct ifnet *, struct in_ifaddr *);
+struct	mbuf	*ip_fastforward(struct mbuf *);
+void	*in_domifattach(struct ifnet *);
+void	in_domifdetach(struct ifnet *, void *);
+
+
+/* XXX */
+void	 in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum);
+void	 in_rtalloc(struct route *ro, u_int fibnum);
+struct rtentry *in_rtalloc1(struct sockaddr *, int, u_long, u_int);
+void	 in_rtredirect(struct sockaddr *, struct sockaddr *,
+	    struct sockaddr *, int, struct sockaddr *, u_int);
+int	 in_rtrequest(int, struct sockaddr *,
+	    struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int);
+
+#if 0
+int	 in_rt_getifa(struct rt_addrinfo *, u_int fibnum);
+int	 in_rtioctl(u_long, caddr_t, u_int);
+int	 in_rtrequest1(int, struct rt_addrinfo *, struct rtentry **, u_int);
+#endif
+#endif /* _KERNEL */
+
+/* INET6 stuff */
+#include <freebsd/netinet6/in6_var.h>
+
+#endif /* _NETINET_IN_VAR_HH_ */
diff --git a/freebsd/sys/netinet/ip.h b/freebsd/sys/netinet/ip.h
new file mode 100644
index 00000000..9d5d8a9c
--- /dev/null
+++ b/freebsd/sys/netinet/ip.h
@@ -0,0 +1,2 @@
+#include <freebsd/bsd.h>
+#include <freebsd/netinet/ip.h>
diff --git a/freebsd/sys/netinet/ip6.h b/freebsd/sys/netinet/ip6.h
new file mode 100644
index 00000000..f30da6d1
--- /dev/null
+++ b/freebsd/sys/netinet/ip6.h
@@ -0,0 +1,2 @@
+#include <freebsd/bsd.h>
+#include <freebsd/netinet/ip6.h>
diff --git a/freebsd/sys/netinet/ip_carp.c b/freebsd/sys/netinet/ip_carp.c
new file mode 100644
index 00000000..25b20895
--- /dev/null
+++ b/freebsd/sys/netinet/ip_carp.c
@@ -0,0 +1,2427 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*
+ * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
+ * Copyright (c) 2003 Ryan McBride. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_bpf.h>
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/types.h>
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/conf.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/limits.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/time.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/filio.h>
+#include <freebsd/sys/sockio.h>
+
+#include <freebsd/sys/socket.h>
+#ifndef __rtems__
+#include <freebsd/sys/vnode.h>
+#endif
+
+#include <freebsd/machine/stdarg.h>
+
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/ethernet.h>
+#include <freebsd/net/fddi.h>
+#include <freebsd/net/iso88025.h>
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_clone.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#ifdef INET
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/if_ether.h>
+#include <freebsd/machine/in_cksum.h>
+#endif
+
+#ifdef INET6
+#include <freebsd/netinet/icmp6.h>
+#include <freebsd/netinet/ip6.h>
+#include <freebsd/netinet6/ip6protosw.h>
+#include <freebsd/netinet6/ip6_var.h>
+#include <freebsd/netinet6/scope6_var.h>
+#include <freebsd/netinet6/nd6.h>
+#endif
+
+#include <freebsd/crypto/sha1.h>
+#include <freebsd/netinet/ip_carp.h>
+
+#define	CARP_IFNAME	"carp"
+static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
+SYSCTL_DECL(_net_inet_carp);
+
+struct carp_softc {
+	struct ifnet	 	*sc_ifp;	/* Interface clue */
+	struct ifnet		*sc_carpdev;	/* Pointer to parent interface */
+	struct in_ifaddr 	*sc_ia;		/* primary iface address */
+	struct ip_moptions 	 sc_imo;
+#ifdef INET6
+	struct in6_ifaddr 	*sc_ia6;	/* primary iface address v6 */
+	struct ip6_moptions 	 sc_im6o;
+#endif /* INET6 */
+	TAILQ_ENTRY(carp_softc)	 sc_list;
+
+	enum { INIT = 0, BACKUP, MASTER }	sc_state;
+
+	int			 sc_flags_backup;
+	int			 sc_suppress;
+
+	int			 sc_sendad_errors;
+#define	CARP_SENDAD_MAX_ERRORS	3
+	int			 sc_sendad_success;
+#define	CARP_SENDAD_MIN_SUCCESS 3
+
+	int			 sc_vhid;
+	int			 sc_advskew;
+	int			 sc_naddrs;
+	int			 sc_naddrs6;
+	int			 sc_advbase;	/* seconds */
+	int			 sc_init_counter;
+	u_int64_t		 sc_counter;
+
+	/* authentication */
+#define CARP_HMAC_PAD	64
+	unsigned char sc_key[CARP_KEY_LEN];
+	unsigned char sc_pad[CARP_HMAC_PAD];
+	SHA1_CTX sc_sha1;
+
+	struct callout		 sc_ad_tmo;	/* advertisement timeout */
+	struct callout		 sc_md_tmo;	/* master down timeout */
+	struct callout 		 sc_md6_tmo;	/* master down timeout */
+
+	LIST_ENTRY(carp_softc)	 sc_next;	/* Interface clue */
+};
+#define	SC2IFP(sc)	((sc)->sc_ifp)
+
+int carp_suppress_preempt = 0;
+int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 };	/* XXX for now */
+SYSCTL_NODE(_net_inet, IPPROTO_CARP,	carp,	CTLFLAG_RW, 0,	"CARP");
+SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
+    &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
+SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
+    &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
+SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
+    &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
+SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
+    &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
+SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
+    &carp_suppress_preempt, 0, "Preemption is suppressed");
+
+struct carpstats carpstats;
+SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
+    &carpstats, carpstats,
+    "CARP statistics (struct carpstats, netinet/ip_carp.h)");
+
+struct carp_if {
+	TAILQ_HEAD(, carp_softc) vhif_vrs;
+	int vhif_nvrs;
+
+	struct ifnet 	*vhif_ifp;
+	struct mtx	 vhif_mtx;
+};
+
+#define	CARP_INET	0
+#define	CARP_INET6	1
+static int proto_reg[] = {-1, -1};
+
+/* Get carp_if from softc. Valid after carp_set_addr{,6}. */
+#define	SC2CIF(sc)		((struct carp_if *)(sc)->sc_carpdev->if_carp)
+
+/* lock per carp_if queue */
+#define	CARP_LOCK_INIT(cif)	mtx_init(&(cif)->vhif_mtx, "carp_if", 	\
+	NULL, MTX_DEF)
+#define	CARP_LOCK_DESTROY(cif)	mtx_destroy(&(cif)->vhif_mtx)
+#define	CARP_LOCK_ASSERT(cif)	mtx_assert(&(cif)->vhif_mtx, MA_OWNED)
+#define	CARP_LOCK(cif)		mtx_lock(&(cif)->vhif_mtx)
+#define	CARP_UNLOCK(cif)	mtx_unlock(&(cif)->vhif_mtx)
+
+#define	CARP_SCLOCK(sc)		mtx_lock(&SC2CIF(sc)->vhif_mtx)
+#define	CARP_SCUNLOCK(sc)	mtx_unlock(&SC2CIF(sc)->vhif_mtx)
+#define	CARP_SCLOCK_ASSERT(sc)	mtx_assert(&SC2CIF(sc)->vhif_mtx, MA_OWNED)
+
+#define	CARP_LOG(...)	do {				\
+	if (carp_opts[CARPCTL_LOG] > 0)			\
+		log(LOG_INFO, __VA_ARGS__);		\
+} while (0)
+
+#define	CARP_DEBUG(...)	do {				\
+	if (carp_opts[CARPCTL_LOG] > 1)			\
+		log(LOG_DEBUG, __VA_ARGS__);		\
+} while (0)
+
+static void	carp_hmac_prepare(struct carp_softc *);
+static void	carp_hmac_generate(struct carp_softc *, u_int32_t *,
+		    unsigned char *);
+static int	carp_hmac_verify(struct carp_softc *, u_int32_t *,
+		    unsigned char *);
+static void	carp_setroute(struct carp_softc *, int);
+static void	carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
+static int 	carp_clone_create(struct if_clone *, int, caddr_t);
+static void 	carp_clone_destroy(struct ifnet *);
+static void	carpdetach(struct carp_softc *, int);
+static int	carp_prepare_ad(struct mbuf *, struct carp_softc *,
+		    struct carp_header *);
+static void	carp_send_ad_all(void);
+static void	carp_send_ad(void *);
+static void	carp_send_ad_locked(struct carp_softc *);
+static void	carp_send_arp(struct carp_softc *);
+static void	carp_master_down(void *);
+static void	carp_master_down_locked(struct carp_softc *);
+static int	carp_ioctl(struct ifnet *, u_long, caddr_t);
+static int	carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *,
+    		    struct route *);
+static void	carp_start(struct ifnet *);
+static void	carp_setrun(struct carp_softc *, sa_family_t);
+static void	carp_set_state(struct carp_softc *, int);
+static int	carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
+enum	{ CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
+
+static void	carp_multicast_cleanup(struct carp_softc *);
+static int	carp_set_addr(struct carp_softc *, struct sockaddr_in *);
+static int	carp_del_addr(struct carp_softc *, struct sockaddr_in *);
+static void	carp_carpdev_state_locked(struct carp_if *);
+static void	carp_sc_state_locked(struct carp_softc *);
+#ifdef INET6
+static void	carp_send_na(struct carp_softc *);
+static int	carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
+static int	carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
+static void	carp_multicast6_cleanup(struct carp_softc *);
+#endif
+
+static LIST_HEAD(, carp_softc) carpif_list;
+static struct mtx carp_mtx;
+IFC_SIMPLE_DECLARE(carp, 0);
+
+static eventhandler_tag if_detach_event_tag;
+
+static __inline u_int16_t
+carp_cksum(struct mbuf *m, int len)
+{
+	return (in_cksum(m, len));
+}
+
+static void
+carp_hmac_prepare(struct carp_softc *sc)
+{
+	u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
+	u_int8_t vhid = sc->sc_vhid & 0xff;
+	struct ifaddr *ifa;
+	int i, found;
+#ifdef INET
+	struct in_addr last, cur, in;
+#endif
+#ifdef INET6
+	struct in6_addr last6, cur6, in6;
+#endif
+
+	if (sc->sc_carpdev)
+		CARP_SCLOCK(sc);
+
+	/* XXX: possible race here */
+
+	/* compute ipad from key */
+	bzero(sc->sc_pad, sizeof(sc->sc_pad));
+	bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
+	for (i = 0; i < sizeof(sc->sc_pad); i++)
+		sc->sc_pad[i] ^= 0x36;
+
+	/* precompute first part of inner hash */
+	SHA1Init(&sc->sc_sha1);
+	SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
+	SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
+	SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
+	SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
+#ifdef INET
+	cur.s_addr = 0;
+	do {
+		found = 0;
+		last = cur;
+		cur.s_addr = 0xffffffff;
+		IF_ADDR_LOCK(SC2IFP(sc));
+		TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+			in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
+			if (ifa->ifa_addr->sa_family == AF_INET &&
+			    ntohl(in.s_addr) > ntohl(last.s_addr) &&
+			    ntohl(in.s_addr) < ntohl(cur.s_addr)) {
+				cur.s_addr = in.s_addr;
+				found++;
+			}
+		}
+		IF_ADDR_UNLOCK(SC2IFP(sc));
+		if (found)
+			SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
+	} while (found);
+#endif /* INET */
+#ifdef INET6
+	memset(&cur6, 0, sizeof(cur6));
+	do {
+		found = 0;
+		last6 = cur6;
+		memset(&cur6, 0xff, sizeof(cur6));
+		IF_ADDR_LOCK(SC2IFP(sc));
+		TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
+			if (IN6_IS_SCOPE_EMBED(&in6))
+				in6.s6_addr16[1] = 0;
+			if (ifa->ifa_addr->sa_family == AF_INET6 &&
+			    memcmp(&in6, &last6, sizeof(in6)) > 0 &&
+			    memcmp(&in6, &cur6, sizeof(in6)) < 0) {
+				cur6 = in6;
+				found++;
+			}
+		}
+		IF_ADDR_UNLOCK(SC2IFP(sc));
+		if (found)
+			SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
+	} while (found);
+#endif /* INET6 */
+
+	/* convert ipad to opad */
+	for (i = 0; i < sizeof(sc->sc_pad); i++)
+		sc->sc_pad[i] ^= 0x36 ^ 0x5c;
+
+	if (sc->sc_carpdev)
+		CARP_SCUNLOCK(sc);
+}
+
+static void
+carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
+    unsigned char md[20])
+{
+	SHA1_CTX sha1ctx;
+
+	/* fetch first half of inner hash */
+	bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
+
+	SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
+	SHA1Final(md, &sha1ctx);
+
+	/* outer hash */
+	SHA1Init(&sha1ctx);
+	SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
+	SHA1Update(&sha1ctx, md, 20);
+	SHA1Final(md, &sha1ctx);
+}
+
+static int
+carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
+    unsigned char md[20])
+{
+	unsigned char md2[20];
+
+	CARP_SCLOCK_ASSERT(sc);
+
+	carp_hmac_generate(sc, counter, md2);
+
+	return (bcmp(md, md2, sizeof(md2)));
+}
+
+static void
+carp_setroute(struct carp_softc *sc, int cmd)
+{
+	struct ifaddr *ifa;
+	int s;
+
+	if (sc->sc_carpdev)
+		CARP_SCLOCK_ASSERT(sc);
+
+	s = splnet();
+	TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+		if (ifa->ifa_addr->sa_family == AF_INET &&
+		    sc->sc_carpdev != NULL) {
+			int count = carp_addrcount(
+			    (struct carp_if *)sc->sc_carpdev->if_carp,
+			    ifatoia(ifa), CARP_COUNT_MASTER);
+
+			if ((cmd == RTM_ADD && count == 1) ||
+			    (cmd == RTM_DELETE && count == 0))
+				rtinit(ifa, cmd, RTF_UP | RTF_HOST);
+		}
+	}
+	splx(s);
+}
+
+static int
+carp_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+
+	struct carp_softc *sc;
+	struct ifnet *ifp;
+
+	sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
+	ifp = SC2IFP(sc) = if_alloc(IFT_ETHER);
+	if (ifp == NULL) {
+		free(sc, M_CARP);
+		return (ENOSPC);
+	}
+
+	sc->sc_flags_backup = 0;
+	sc->sc_suppress = 0;
+	sc->sc_advbase = CARP_DFLTINTV;
+	sc->sc_vhid = -1;	/* required setting */
+	sc->sc_advskew = 0;
+	sc->sc_init_counter = 1;
+	sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */
+	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
+	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
+	    M_WAITOK);
+	sc->sc_imo.imo_mfilters = NULL;
+	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
+	sc->sc_imo.imo_multicast_vif = -1;
+#ifdef INET6
+	sc->sc_im6o.im6o_membership = (struct in6_multi **)malloc(
+	    (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP,
+	    M_WAITOK);
+	sc->sc_im6o.im6o_mfilters = NULL;
+	sc->sc_im6o.im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
+	sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
+#endif
+
+	callout_init(&sc->sc_ad_tmo, CALLOUT_MPSAFE);
+	callout_init(&sc->sc_md_tmo, CALLOUT_MPSAFE);
+	callout_init(&sc->sc_md6_tmo, CALLOUT_MPSAFE);
+
+	ifp->if_softc = sc;
+	if_initname(ifp, CARP_IFNAME, unit);
+	ifp->if_mtu = ETHERMTU;
+	ifp->if_flags = IFF_LOOPBACK;
+	ifp->if_ioctl = carp_ioctl;
+	ifp->if_output = carp_looutput;
+	ifp->if_start = carp_start;
+	ifp->if_type = IFT_CARP;
+	ifp->if_snd.ifq_maxlen = ifqmaxlen;
+	ifp->if_hdrlen = 0;
+	if_attach(ifp);
+	bpfattach(SC2IFP(sc), DLT_NULL, sizeof(u_int32_t));
+	mtx_lock(&carp_mtx);
+	LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
+	mtx_unlock(&carp_mtx);
+	return (0);
+}
+
+static void
+carp_clone_destroy(struct ifnet *ifp)
+{
+	struct carp_softc *sc = ifp->if_softc;
+
+	if (sc->sc_carpdev)
+		CARP_SCLOCK(sc);
+	carpdetach(sc, 1);	/* Returns unlocked. */
+
+	mtx_lock(&carp_mtx);
+	LIST_REMOVE(sc, sc_next);
+	mtx_unlock(&carp_mtx);
+	bpfdetach(ifp);
+	if_detach(ifp);
+	if_free_type(ifp, IFT_ETHER);
+	free(sc->sc_imo.imo_membership, M_CARP);
+#ifdef INET6
+	free(sc->sc_im6o.im6o_membership, M_CARP);
+#endif
+	free(sc, M_CARP);
+}
+
+/*
+ * This function can be called on CARP interface destroy path,
+ * and in case of the removal of the underlying interface as
+ * well. We differentiate these two cases. In the latter case
+ * we do not cleanup our multicast memberships, since they
+ * are already freed. Also, in the latter case we do not
+ * release the lock on return, because the function will be
+ * called once more, for another CARP instance on the same
+ * interface.
+ */
+static void
+carpdetach(struct carp_softc *sc, int unlock)
+{
+	struct carp_if *cif;
+
+	callout_stop(&sc->sc_ad_tmo);
+	callout_stop(&sc->sc_md_tmo);
+	callout_stop(&sc->sc_md6_tmo);
+
+	if (sc->sc_suppress)
+		carp_suppress_preempt--;
+	sc->sc_suppress = 0;
+
+	if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
+		carp_suppress_preempt--;
+	sc->sc_sendad_errors = 0;
+
+	carp_set_state(sc, INIT);
+	SC2IFP(sc)->if_flags &= ~IFF_UP;
+	carp_setrun(sc, 0);
+	if (unlock)
+		carp_multicast_cleanup(sc);
+#ifdef INET6
+	carp_multicast6_cleanup(sc);
+#endif
+
+	if (sc->sc_carpdev != NULL) {
+		cif = (struct carp_if *)sc->sc_carpdev->if_carp;
+		CARP_LOCK_ASSERT(cif);
+		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
+		if (!--cif->vhif_nvrs) {
+			ifpromisc(sc->sc_carpdev, 0);
+			sc->sc_carpdev->if_carp = NULL;
+			CARP_LOCK_DESTROY(cif);
+			free(cif, M_CARP);
+		} else if (unlock)
+			CARP_UNLOCK(cif);
+		sc->sc_carpdev = NULL;
+	}
+}
+
+/* Detach an interface from the carp. */
+static void
+carp_ifdetach(void *arg __unused, struct ifnet *ifp)
+{
+	struct carp_if *cif = (struct carp_if *)ifp->if_carp;
+	struct carp_softc *sc, *nextsc;
+
+	if (cif == NULL)
+		return;
+
+	/*
+	 * XXX: At the end of for() cycle the lock will be destroyed.
+	 */
+	CARP_LOCK(cif);
+	for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
+		nextsc = TAILQ_NEXT(sc, sc_list);
+		carpdetach(sc, 0);
+	}
+}
+
+/*
+ * process input packet.
+ * we have rearranged checks order compared to the rfc,
+ * but it seems more efficient this way or not possible otherwise.
+ */
+void
+carp_input(struct mbuf *m, int hlen)
+{
+	struct ip *ip = mtod(m, struct ip *);
+	struct carp_header *ch;
+	int iplen, len;
+
+	CARPSTATS_INC(carps_ipackets);
+
+	if (!carp_opts[CARPCTL_ALLOW]) {
+		m_freem(m);
+		return;
+	}
+
+	/* check if received on a valid carp interface */
+	if (m->m_pkthdr.rcvif->if_carp == NULL) {
+		CARPSTATS_INC(carps_badif);
+		CARP_DEBUG("carp_input: packet received on non-carp "
+		    "interface: %s\n",
+		    m->m_pkthdr.rcvif->if_xname);
+		m_freem(m);
+		return;
+	}
+
+	/* verify that the IP TTL is 255.  */
+	if (ip->ip_ttl != CARP_DFLTTL) {
+		CARPSTATS_INC(carps_badttl);
+		CARP_DEBUG("carp_input: received ttl %d != 255 on %s\n",
+		    ip->ip_ttl,
+		    m->m_pkthdr.rcvif->if_xname);
+		m_freem(m);
+		return;
+	}
+
+	iplen = ip->ip_hl << 2;
+
+	if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
+		CARPSTATS_INC(carps_badlen);
+		CARP_DEBUG("carp_input: received len %zd < "
+		    "sizeof(struct carp_header) on %s\n",
+		    m->m_len - sizeof(struct ip),
+		    m->m_pkthdr.rcvif->if_xname);
+		m_freem(m);
+		return;
+	}
+
+	if (iplen + sizeof(*ch) < m->m_len) {
+		if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
+			CARPSTATS_INC(carps_hdrops);
+			CARP_DEBUG("carp_input: pullup failed\n");
+			return;
+		}
+		ip = mtod(m, struct ip *);
+	}
+	ch = (struct carp_header *)((char *)ip + iplen);
+
+	/*
+	 * verify that the received packet length is
+	 * equal to the CARP header
+	 */
+	len = iplen + sizeof(*ch);
+	if (len > m->m_pkthdr.len) {
+		CARPSTATS_INC(carps_badlen);
+		CARP_DEBUG("carp_input: packet too short %d on %s\n",
+		    m->m_pkthdr.len,
+		    m->m_pkthdr.rcvif->if_xname);
+		m_freem(m);
+		return;
+	}
+
+	if ((m = m_pullup(m, len)) == NULL) {
+		CARPSTATS_INC(carps_hdrops);
+		return;
+	}
+	ip = mtod(m, struct ip *);
+	ch = (struct carp_header *)((char *)ip + iplen);
+
+	/* verify the CARP checksum */
+	m->m_data += iplen;
+	if (carp_cksum(m, len - iplen)) {
+		CARPSTATS_INC(carps_badsum);
+		CARP_DEBUG("carp_input: checksum failed on %s\n",
+		    m->m_pkthdr.rcvif->if_xname);
+		m_freem(m);
+		return;
+	}
+	m->m_data -= iplen;
+
+	carp_input_c(m, ch, AF_INET);
+}
+
+#ifdef INET6
+int
+carp6_input(struct mbuf **mp, int *offp, int proto)
+{
+	struct mbuf *m = *mp;
+	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
+	struct carp_header *ch;
+	u_int len;
+
+	CARPSTATS_INC(carps_ipackets6);
+
+	if (!carp_opts[CARPCTL_ALLOW]) {
+		m_freem(m);
+		return (IPPROTO_DONE);
+	}
+
+	/* check if received on a valid carp interface */
+	if (m->m_pkthdr.rcvif->if_carp == NULL) {
+		CARPSTATS_INC(carps_badif);
+		CARP_DEBUG("carp6_input: packet received on non-carp "
+		    "interface: %s\n",
+		    m->m_pkthdr.rcvif->if_xname);
+		m_freem(m);
+		return (IPPROTO_DONE);
+	}
+
+	/* verify that the IP TTL is 255 */
+	if (ip6->ip6_hlim != CARP_DFLTTL) {
+		CARPSTATS_INC(carps_badttl);
+		CARP_DEBUG("carp6_input: received ttl %d != 255 on %s\n",
+		    ip6->ip6_hlim,
+		    m->m_pkthdr.rcvif->if_xname);
+		m_freem(m);
+		return (IPPROTO_DONE);
+	}
+
+	/* verify that we have a complete carp packet */
+	len = m->m_len;
+	IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
+	if (ch == NULL) {
+		CARPSTATS_INC(carps_badlen);
+		CARP_DEBUG("carp6_input: packet size %u too small\n", len);
+		return (IPPROTO_DONE);
+	}
+
+
+	/* verify the CARP checksum */
+	m->m_data += *offp;
+	if (carp_cksum(m, sizeof(*ch))) {
+		CARPSTATS_INC(carps_badsum);
+		CARP_DEBUG("carp6_input: checksum failed, on %s\n",
+		    m->m_pkthdr.rcvif->if_xname);
+		m_freem(m);
+		return (IPPROTO_DONE);
+	}
+	m->m_data -= *offp;
+
+	carp_input_c(m, ch, AF_INET6);
+	return (IPPROTO_DONE);
+}
+#endif /* INET6 */
+
+static void
+carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
+{
+	struct ifnet *ifp = m->m_pkthdr.rcvif;
+	struct carp_softc *sc;
+	u_int64_t tmp_counter;
+	struct timeval sc_tv, ch_tv;
+
+	/* verify that the VHID is valid on the receiving interface */
+	CARP_LOCK(ifp->if_carp);
+	TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list)
+		if (sc->sc_vhid == ch->carp_vhid)
+			break;
+
+	if (!sc || !((SC2IFP(sc)->if_flags & IFF_UP) &&
+	    (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
+		CARPSTATS_INC(carps_badvhid);
+		CARP_UNLOCK(ifp->if_carp);
+		m_freem(m);
+		return;
+	}
+
+	getmicrotime(&SC2IFP(sc)->if_lastchange);
+	SC2IFP(sc)->if_ipackets++;
+	SC2IFP(sc)->if_ibytes += m->m_pkthdr.len;
+
+	if (bpf_peers_present(SC2IFP(sc)->if_bpf)) {
+		struct ip *ip = mtod(m, struct ip *);
+		uint32_t af1 = af;
+
+		/* BPF wants net byte order */
+		ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2));
+		ip->ip_off = htons(ip->ip_off);
+		bpf_mtap2(SC2IFP(sc)->if_bpf, &af1, sizeof(af1), m);
+	}
+
+	/* verify the CARP version. */
+	if (ch->carp_version != CARP_VERSION) {
+		CARPSTATS_INC(carps_badver);
+		SC2IFP(sc)->if_ierrors++;
+		CARP_UNLOCK(ifp->if_carp);
+		CARP_DEBUG("%s; invalid version %d\n",
+		    SC2IFP(sc)->if_xname,
+		    ch->carp_version);
+		m_freem(m);
+		return;
+	}
+
+	/* verify the hash */
+	if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
+		CARPSTATS_INC(carps_badauth);
+		SC2IFP(sc)->if_ierrors++;
+		CARP_UNLOCK(ifp->if_carp);
+		CARP_DEBUG("%s: incorrect hash\n", SC2IFP(sc)->if_xname);
+		m_freem(m);
+		return;
+	}
+
+	tmp_counter = ntohl(ch->carp_counter[0]);
+	tmp_counter = tmp_counter<<32;
+	tmp_counter += ntohl(ch->carp_counter[1]);
+
+	/* XXX Replay protection goes here */
+
+	sc->sc_init_counter = 0;
+	sc->sc_counter = tmp_counter;
+
+	sc_tv.tv_sec = sc->sc_advbase;
+	if (carp_suppress_preempt && sc->sc_advskew <  240)
+		sc_tv.tv_usec = 240 * 1000000 / 256;
+	else
+		sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
+	ch_tv.tv_sec = ch->carp_advbase;
+	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
+
+	switch (sc->sc_state) {
+	case INIT:
+		break;
+	case MASTER:
+		/*
+		 * If we receive an advertisement from a master who's going to
+		 * be more frequent than us, go into BACKUP state.
+		 */
+		if (timevalcmp(&sc_tv, &ch_tv, >) ||
+		    timevalcmp(&sc_tv, &ch_tv, ==)) {
+			callout_stop(&sc->sc_ad_tmo);
+			CARP_LOG("%s: MASTER -> BACKUP "
+			   "(more frequent advertisement received)\n",
+			   SC2IFP(sc)->if_xname);
+			carp_set_state(sc, BACKUP);
+			carp_setrun(sc, 0);
+			carp_setroute(sc, RTM_DELETE);
+		}
+		break;
+	case BACKUP:
+		/*
+		 * If we're pre-empting masters who advertise slower than us,
+		 * and this one claims to be slower, treat him as down.
+		 */
+		if (carp_opts[CARPCTL_PREEMPT] &&
+		    timevalcmp(&sc_tv, &ch_tv, <)) {
+			CARP_LOG("%s: BACKUP -> MASTER "
+			    "(preempting a slower master)\n",
+			    SC2IFP(sc)->if_xname);
+			carp_master_down_locked(sc);
+			break;
+		}
+
+		/*
+		 *  If the master is going to advertise at such a low frequency
+		 *  that he's guaranteed to time out, we'd might as well just
+		 *  treat him as timed out now.
+		 */
+		sc_tv.tv_sec = sc->sc_advbase * 3;
+		if (timevalcmp(&sc_tv, &ch_tv, <)) {
+			CARP_LOG("%s: BACKUP -> MASTER "
+			    "(master timed out)\n",
+			    SC2IFP(sc)->if_xname);
+			carp_master_down_locked(sc);
+			break;
+		}
+
+		/*
+		 * Otherwise, we reset the counter and wait for the next
+		 * advertisement.
+		 */
+		carp_setrun(sc, af);
+		break;
+	}
+
+	CARP_UNLOCK(ifp->if_carp);
+
+	m_freem(m);
+	return;
+}
+
+static int
+carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
+{
+	struct m_tag *mtag;
+	struct ifnet *ifp = SC2IFP(sc);
+
+	if (sc->sc_init_counter) {
+		/* this could also be seconds since unix epoch */
+		sc->sc_counter = arc4random();
+		sc->sc_counter = sc->sc_counter << 32;
+		sc->sc_counter += arc4random();
+	} else
+		sc->sc_counter++;
+
+	ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
+	ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
+
+	carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
+
+	/* Tag packet for carp_output */
+	mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT);
+	if (mtag == NULL) {
+		m_freem(m);
+		SC2IFP(sc)->if_oerrors++;
+		return (ENOMEM);
+	}
+	bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
+	m_tag_prepend(m, mtag);
+
+	return (0);
+}
+
+static void
+carp_send_ad_all(void)
+{
+	struct carp_softc *sc;
+
+	mtx_lock(&carp_mtx);
+	LIST_FOREACH(sc, &carpif_list, sc_next) {
+		if (sc->sc_carpdev == NULL)
+			continue;
+		CARP_SCLOCK(sc);
+		if ((SC2IFP(sc)->if_flags & IFF_UP) &&
+		    (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING) &&
+		     sc->sc_state == MASTER)
+			carp_send_ad_locked(sc);
+		CARP_SCUNLOCK(sc);
+	}
+	mtx_unlock(&carp_mtx);
+}
+
+static void
+carp_send_ad(void *v)
+{
+	struct carp_softc *sc = v;
+
+	CARP_SCLOCK(sc);
+	carp_send_ad_locked(sc);
+	CARP_SCUNLOCK(sc);
+}
+
+static void
+carp_send_ad_locked(struct carp_softc *sc)
+{
+	struct carp_header ch;
+	struct timeval tv;
+	struct carp_header *ch_ptr;
+	struct mbuf *m;
+	int len, advbase, advskew;
+
+	CARP_SCLOCK_ASSERT(sc);
+
+	/* bow out if we've lost our UPness or RUNNINGuiness */
+	if (!((SC2IFP(sc)->if_flags & IFF_UP) &&
+	    (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
+		advbase = 255;
+		advskew = 255;
+	} else {
+		advbase = sc->sc_advbase;
+		if (!carp_suppress_preempt || sc->sc_advskew > 240)
+			advskew = sc->sc_advskew;
+		else
+			advskew = 240;
+		tv.tv_sec = advbase;
+		tv.tv_usec = advskew * 1000000 / 256;
+	}
+
+	ch.carp_version = CARP_VERSION;
+	ch.carp_type = CARP_ADVERTISEMENT;
+	ch.carp_vhid = sc->sc_vhid;
+	ch.carp_advbase = advbase;
+	ch.carp_advskew = advskew;
+	ch.carp_authlen = 7;	/* XXX DEFINE */
+	ch.carp_pad1 = 0;	/* must be zero */
+	ch.carp_cksum = 0;
+
+#ifdef INET
+	if (sc->sc_ia) {
+		struct ip *ip;
+
+		MGETHDR(m, M_DONTWAIT, MT_HEADER);
+		if (m == NULL) {
+			SC2IFP(sc)->if_oerrors++;
+			CARPSTATS_INC(carps_onomem);
+			/* XXX maybe less ? */
+			if (advbase != 255 || advskew != 255)
+				callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
+				    carp_send_ad, sc);
+			return;
+		}
+		len = sizeof(*ip) + sizeof(ch);
+		m->m_pkthdr.len = len;
+		m->m_pkthdr.rcvif = NULL;
+		m->m_len = len;
+		MH_ALIGN(m, m->m_len);
+		m->m_flags |= M_MCAST;
+		ip = mtod(m, struct ip *);
+		ip->ip_v = IPVERSION;
+		ip->ip_hl = sizeof(*ip) >> 2;
+		ip->ip_tos = IPTOS_LOWDELAY;
+		ip->ip_len = len;
+		ip->ip_id = ip_newid();
+		ip->ip_off = IP_DF;
+		ip->ip_ttl = CARP_DFLTTL;
+		ip->ip_p = IPPROTO_CARP;
+		ip->ip_sum = 0;
+		ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr;
+		ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
+
+		ch_ptr = (struct carp_header *)(&ip[1]);
+		bcopy(&ch, ch_ptr, sizeof(ch));
+		if (carp_prepare_ad(m, sc, ch_ptr))
+			return;
+
+		m->m_data += sizeof(*ip);
+		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
+		m->m_data -= sizeof(*ip);
+
+		getmicrotime(&SC2IFP(sc)->if_lastchange);
+		SC2IFP(sc)->if_opackets++;
+		SC2IFP(sc)->if_obytes += len;
+		CARPSTATS_INC(carps_opackets);
+
+		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
+			SC2IFP(sc)->if_oerrors++;
+			if (sc->sc_sendad_errors < INT_MAX)
+				sc->sc_sendad_errors++;
+			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
+				carp_suppress_preempt++;
+				if (carp_suppress_preempt == 1) {
+					CARP_SCUNLOCK(sc);
+					carp_send_ad_all();
+					CARP_SCLOCK(sc);
+				}
+			}
+			sc->sc_sendad_success = 0;
+		} else {
+			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
+				if (++sc->sc_sendad_success >=
+				    CARP_SENDAD_MIN_SUCCESS) {
+					carp_suppress_preempt--;
+					sc->sc_sendad_errors = 0;
+				}
+			} else
+				sc->sc_sendad_errors = 0;
+		}
+	}
+#endif /* INET */
+#ifdef INET6
+	if (sc->sc_ia6) {
+		struct ip6_hdr *ip6;
+
+		MGETHDR(m, M_DONTWAIT, MT_HEADER);
+		if (m == NULL) {
+			SC2IFP(sc)->if_oerrors++;
+			CARPSTATS_INC(carps_onomem);
+			/* XXX maybe less ? */
+			if (advbase != 255 || advskew != 255)
+				callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
+				    carp_send_ad, sc);
+			return;
+		}
+		len = sizeof(*ip6) + sizeof(ch);
+		m->m_pkthdr.len = len;
+		m->m_pkthdr.rcvif = NULL;
+		m->m_len = len;
+		MH_ALIGN(m, m->m_len);
+		m->m_flags |= M_MCAST;
+		ip6 = mtod(m, struct ip6_hdr *);
+		bzero(ip6, sizeof(*ip6));
+		ip6->ip6_vfc |= IPV6_VERSION;
+		ip6->ip6_hlim = CARP_DFLTTL;
+		ip6->ip6_nxt = IPPROTO_CARP;
+		bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
+		    sizeof(struct in6_addr));
+		/* set the multicast destination */
+
+		ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
+		ip6->ip6_dst.s6_addr8[15] = 0x12;
+		if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
+			SC2IFP(sc)->if_oerrors++;
+			m_freem(m);
+			CARP_DEBUG("%s: in6_setscope failed\n", __func__);
+			return;
+		}
+
+		ch_ptr = (struct carp_header *)(&ip6[1]);
+		bcopy(&ch, ch_ptr, sizeof(ch));
+		if (carp_prepare_ad(m, sc, ch_ptr))
+			return;
+
+		m->m_data += sizeof(*ip6);
+		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
+		m->m_data -= sizeof(*ip6);
+
+		getmicrotime(&SC2IFP(sc)->if_lastchange);
+		SC2IFP(sc)->if_opackets++;
+		SC2IFP(sc)->if_obytes += len;
+		CARPSTATS_INC(carps_opackets6);
+
+		if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
+			SC2IFP(sc)->if_oerrors++;
+			if (sc->sc_sendad_errors < INT_MAX)
+				sc->sc_sendad_errors++;
+			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
+				carp_suppress_preempt++;
+				if (carp_suppress_preempt == 1) {
+					CARP_SCUNLOCK(sc);
+					carp_send_ad_all();
+					CARP_SCLOCK(sc);
+				}
+			}
+			sc->sc_sendad_success = 0;
+		} else {
+			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
+				if (++sc->sc_sendad_success >=
+				    CARP_SENDAD_MIN_SUCCESS) {
+					carp_suppress_preempt--;
+					sc->sc_sendad_errors = 0;
+				}
+			} else
+				sc->sc_sendad_errors = 0;
+		}
+	}
+#endif /* INET6 */
+
+	if (advbase != 255 || advskew != 255)
+		callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
+		    carp_send_ad, sc);
+
+}
+
+/*
+ * Broadcast a gratuitous ARP request containing
+ * the virtual router MAC address for each IP address
+ * associated with the virtual router.
+ */
+static void
+carp_send_arp(struct carp_softc *sc)
+{
+	struct ifaddr *ifa;
+
+	TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+
+		if (ifa->ifa_addr->sa_family != AF_INET)
+			continue;
+
+/*		arprequest(sc->sc_carpdev, &in, &in, IF_LLADDR(sc->sc_ifp)); */
+		arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp));
+
+		DELAY(1000);	/* XXX */
+	}
+}
+
+#ifdef INET6
+static void
+carp_send_na(struct carp_softc *sc)
+{
+	struct ifaddr *ifa;
+	struct in6_addr *in6;
+	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
+
+	TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+
+		if (ifa->ifa_addr->sa_family != AF_INET6)
+			continue;
+
+		in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
+		nd6_na_output(sc->sc_carpdev, &mcast, in6,
+		    ND_NA_FLAG_OVERRIDE, 1, NULL);
+		DELAY(1000);	/* XXX */
+	}
+}
+#endif /* INET6 */
+
+static int
+carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
+{
+	struct carp_softc *vh;
+	struct ifaddr *ifa;
+	int count = 0;
+
+	CARP_LOCK_ASSERT(cif);
+
+	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
+		if ((type == CARP_COUNT_RUNNING &&
+		    (SC2IFP(vh)->if_flags & IFF_UP) &&
+		    (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) ||
+		    (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
+			IF_ADDR_LOCK(SC2IFP(vh));
+			TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
+			    ifa_list) {
+				if (ifa->ifa_addr->sa_family == AF_INET &&
+				    ia->ia_addr.sin_addr.s_addr ==
+				    ifatoia(ifa)->ia_addr.sin_addr.s_addr)
+					count++;
+			}
+			IF_ADDR_UNLOCK(SC2IFP(vh));
+		}
+	}
+	return (count);
+}
+
+int
+carp_iamatch(struct ifnet *ifp, struct in_ifaddr *ia,
+    struct in_addr *isaddr, u_int8_t **enaddr)
+{
+	struct carp_if *cif;
+	struct carp_softc *vh;
+	int index, count = 0;
+	struct ifaddr *ifa;
+
+	cif = ifp->if_carp;
+	CARP_LOCK(cif);
+
+	if (carp_opts[CARPCTL_ARPBALANCE]) {
+		/*
+		 * XXX proof of concept implementation.
+		 * We use the source ip to decide which virtual host should
+		 * handle the request. If we're master of that virtual host,
+		 * then we respond, otherwise, just drop the arp packet on
+		 * the floor.
+		 */
+		count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING);
+		if (count == 0) {
+			/* should never reach this */
+			CARP_UNLOCK(cif);
+			return (0);
+		}
+
+		/* this should be a hash, like pf_hash() */
+		index = ntohl(isaddr->s_addr) % count;
+		count = 0;
+
+		TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
+			if ((SC2IFP(vh)->if_flags & IFF_UP) &&
+			    (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) {
+				IF_ADDR_LOCK(SC2IFP(vh));
+				TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
+				    ifa_list) {
+					if (ifa->ifa_addr->sa_family ==
+					    AF_INET &&
+					    ia->ia_addr.sin_addr.s_addr ==
+					    ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
+						if (count == index) {
+							if (vh->sc_state ==
+							    MASTER) {
+								*enaddr = IF_LLADDR(vh->sc_ifp);
+								IF_ADDR_UNLOCK(SC2IFP(vh));
+								CARP_UNLOCK(cif);
+								return (1);
+							} else {
+								IF_ADDR_UNLOCK(SC2IFP(vh));
+								CARP_UNLOCK(cif);
+								return (0);
+							}
+						}
+						count++;
+					}
+				}
+				IF_ADDR_UNLOCK(SC2IFP(vh));
+			}
+		}
+	} else {
+		TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
+			if ((SC2IFP(vh)->if_flags & IFF_UP) &&
+			    (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
+			    ia->ia_ifp == SC2IFP(vh) &&
+			    vh->sc_state == MASTER) {
+				*enaddr = IF_LLADDR(vh->sc_ifp);
+				CARP_UNLOCK(cif);
+				return (1);
+			}
+		}
+	}
+	CARP_UNLOCK(cif);
+	return (0);
+}
+
+#ifdef INET6
+struct ifaddr *
+carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
+{
+	struct carp_if *cif;
+	struct carp_softc *vh;
+	struct ifaddr *ifa;
+
+	cif = ifp->if_carp;
+	CARP_LOCK(cif);
+	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
+		IF_ADDR_LOCK(SC2IFP(vh));
+		TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) {
+			if (IN6_ARE_ADDR_EQUAL(taddr,
+			    &ifatoia6(ifa)->ia_addr.sin6_addr) &&
+ 			    (SC2IFP(vh)->if_flags & IFF_UP) &&
+			    (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
+			    vh->sc_state == MASTER) {
+				ifa_ref(ifa);
+				IF_ADDR_UNLOCK(SC2IFP(vh));
+			    	CARP_UNLOCK(cif);
+				return (ifa);
+			}
+		}
+		IF_ADDR_UNLOCK(SC2IFP(vh));
+	}
+	CARP_UNLOCK(cif);
+
+	return (NULL);
+}
+
+caddr_t
+carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
+{
+	struct m_tag *mtag;
+	struct carp_if *cif;
+	struct carp_softc *sc;
+	struct ifaddr *ifa;
+
+	cif = ifp->if_carp;
+	CARP_LOCK(cif);
+	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
+		IF_ADDR_LOCK(SC2IFP(sc));
+		TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+			if (IN6_ARE_ADDR_EQUAL(taddr,
+			    &ifatoia6(ifa)->ia_addr.sin6_addr) &&
+ 			    (SC2IFP(sc)->if_flags & IFF_UP) &&
+			    (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING)) {
+				struct ifnet *ifp = SC2IFP(sc);
+				mtag = m_tag_get(PACKET_TAG_CARP,
+				    sizeof(struct ifnet *), M_NOWAIT);
+				if (mtag == NULL) {
+					/* better a bit than nothing */
+					IF_ADDR_UNLOCK(SC2IFP(sc));
+					CARP_UNLOCK(cif);
+					return (IF_LLADDR(sc->sc_ifp));
+				}
+				bcopy(&ifp, (caddr_t)(mtag + 1),
+				    sizeof(struct ifnet *));
+				m_tag_prepend(m, mtag);
+
+				IF_ADDR_UNLOCK(SC2IFP(sc));
+				CARP_UNLOCK(cif);
+				return (IF_LLADDR(sc->sc_ifp));
+			}
+		}
+		IF_ADDR_UNLOCK(SC2IFP(sc));
+	}
+	CARP_UNLOCK(cif);
+
+	return (NULL);
+}
+#endif
+
+struct ifnet *
+carp_forus(struct ifnet *ifp, u_char *dhost)
+{
+	struct carp_if *cif;
+	struct carp_softc *vh;
+	u_int8_t *ena = dhost;
+
+	if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
+		return (NULL);
+
+	cif = ifp->if_carp;
+	CARP_LOCK(cif);
+	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
+		if ((SC2IFP(vh)->if_flags & IFF_UP) &&
+		    (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
+		    vh->sc_state == MASTER &&
+		    !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) {
+		    	CARP_UNLOCK(cif);
+			return (SC2IFP(vh));
+		}
+
+    	CARP_UNLOCK(cif);
+	return (NULL);
+}
+
+static void
+carp_master_down(void *v)
+{
+	struct carp_softc *sc = v;
+
+	CARP_SCLOCK(sc);
+	carp_master_down_locked(sc);
+	CARP_SCUNLOCK(sc);
+}
+
+static void
+carp_master_down_locked(struct carp_softc *sc)
+{
+	if (sc->sc_carpdev)
+		CARP_SCLOCK_ASSERT(sc);
+
+	switch (sc->sc_state) {
+	case INIT:
+		printf("%s: master_down event in INIT state\n",
+		    SC2IFP(sc)->if_xname);
+		break;
+	case MASTER:
+		break;
+	case BACKUP:
+		carp_set_state(sc, MASTER);
+		carp_send_ad_locked(sc);
+		carp_send_arp(sc);
+#ifdef INET6
+		carp_send_na(sc);
+#endif /* INET6 */
+		carp_setrun(sc, 0);
+		carp_setroute(sc, RTM_ADD);
+		break;
+	}
+}
+
+/*
+ * When in backup state, af indicates whether to reset the master down timer
+ * for v4 or v6. If it's set to zero, reset the ones which are already pending.
+ */
+static void
+carp_setrun(struct carp_softc *sc, sa_family_t af)
+{
+	struct timeval tv;
+
+	if (sc->sc_carpdev == NULL) {
+		SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
+		carp_set_state(sc, INIT);
+		return;
+	} else
+		CARP_SCLOCK_ASSERT(sc);
+
+	if (SC2IFP(sc)->if_flags & IFF_UP &&
+	    sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6) &&
+	    sc->sc_carpdev->if_link_state == LINK_STATE_UP)
+		SC2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING;
+	else {
+		SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
+		carp_setroute(sc, RTM_DELETE);
+		return;
+	}
+
+	switch (sc->sc_state) {
+	case INIT:
+		if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
+			carp_send_ad_locked(sc);
+			carp_send_arp(sc);
+#ifdef INET6
+			carp_send_na(sc);
+#endif /* INET6 */
+			CARP_LOG("%s: INIT -> MASTER (preempting)\n",
+			    SC2IFP(sc)->if_xname);
+			carp_set_state(sc, MASTER);
+			carp_setroute(sc, RTM_ADD);
+		} else {
+			CARP_LOG("%s: INIT -> BACKUP\n", SC2IFP(sc)->if_xname);
+			carp_set_state(sc, BACKUP);
+			carp_setroute(sc, RTM_DELETE);
+			carp_setrun(sc, 0);
+		}
+		break;
+	case BACKUP:
+		callout_stop(&sc->sc_ad_tmo);
+		tv.tv_sec = 3 * sc->sc_advbase;
+		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
+		switch (af) {
+#ifdef INET
+		case AF_INET:
+			callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
+			    carp_master_down, sc);
+			break;
+#endif /* INET */
+#ifdef INET6
+		case AF_INET6:
+			callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
+			    carp_master_down, sc);
+			break;
+#endif /* INET6 */
+		default:
+			if (sc->sc_naddrs)
+				callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
+				    carp_master_down, sc);
+			if (sc->sc_naddrs6)
+				callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
+				    carp_master_down, sc);
+			break;
+		}
+		break;
+	case MASTER:
+		tv.tv_sec = sc->sc_advbase;
+		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
+		callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
+		    carp_send_ad, sc);
+		break;
+	}
+}
+
+static void
+carp_multicast_cleanup(struct carp_softc *sc)
+{
+	struct ip_moptions *imo = &sc->sc_imo;
+	u_int16_t n = imo->imo_num_memberships;
+
+	/* Clean up our own multicast memberships */
+	while (n-- > 0) {
+		if (imo->imo_membership[n] != NULL) {
+			in_delmulti(imo->imo_membership[n]);
+			imo->imo_membership[n] = NULL;
+		}
+	}
+	KASSERT(imo->imo_mfilters == NULL,
+	   ("%s: imo_mfilters != NULL", __func__));
+	imo->imo_num_memberships = 0;
+	imo->imo_multicast_ifp = NULL;
+}
+
+#ifdef INET6
+static void
+carp_multicast6_cleanup(struct carp_softc *sc)
+{
+	struct ip6_moptions *im6o = &sc->sc_im6o;
+	u_int16_t n = im6o->im6o_num_memberships;
+
+	while (n-- > 0) {
+		if (im6o->im6o_membership[n] != NULL) {
+			in6_mc_leave(im6o->im6o_membership[n], NULL);
+			im6o->im6o_membership[n] = NULL;
+		}
+	}
+	KASSERT(im6o->im6o_mfilters == NULL,
+	   ("%s: im6o_mfilters != NULL", __func__));
+	im6o->im6o_num_memberships = 0;
+	im6o->im6o_multicast_ifp = NULL;
+}
+#endif
+
+static int
+carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
+{
+	struct ifnet *ifp;
+	struct carp_if *cif;
+	struct in_ifaddr *ia, *ia_if;
+	struct ip_moptions *imo = &sc->sc_imo;
+	struct in_addr addr;
+	u_long iaddr = htonl(sin->sin_addr.s_addr);
+	int own, error;
+
+	if (sin->sin_addr.s_addr == 0) {
+		if (!(SC2IFP(sc)->if_flags & IFF_UP))
+			carp_set_state(sc, INIT);
+		if (sc->sc_naddrs)
+			SC2IFP(sc)->if_flags |= IFF_UP;
+		if (sc->sc_carpdev)
+			CARP_SCLOCK(sc);
+		carp_setrun(sc, 0);
+		if (sc->sc_carpdev)
+			CARP_SCUNLOCK(sc);
+		return (0);
+	}
+
+	/* we have to do it by hands to check we won't match on us */
+	ia_if = NULL; own = 0;
+	IN_IFADDR_RLOCK();
+	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
+		/* and, yeah, we need a multicast-capable iface too */
+		if (ia->ia_ifp != SC2IFP(sc) &&
+		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
+		    (iaddr & ia->ia_subnetmask) == ia->ia_subnet) {
+			if (!ia_if)
+				ia_if = ia;
+			if (sin->sin_addr.s_addr ==
+			    ia->ia_addr.sin_addr.s_addr)
+				own++;
+		}
+	}
+
+	if (!ia_if) {
+		IN_IFADDR_RUNLOCK();
+		return (EADDRNOTAVAIL);
+	}
+
+	ia = ia_if;
+	ifa_ref(&ia->ia_ifa);
+	IN_IFADDR_RUNLOCK();
+
+	ifp = ia->ia_ifp;
+
+	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
+	    (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp)) {
+		ifa_free(&ia->ia_ifa);
+		return (EADDRNOTAVAIL);
+	}
+
+	if (imo->imo_num_memberships == 0) {
+		addr.s_addr = htonl(INADDR_CARP_GROUP);
+		if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) ==
+		    NULL) {
+			ifa_free(&ia->ia_ifa);
+			return (ENOBUFS);
+		}
+		imo->imo_num_memberships++;
+		imo->imo_multicast_ifp = ifp;
+		imo->imo_multicast_ttl = CARP_DFLTTL;
+		imo->imo_multicast_loop = 0;
+	}
+
+	if (!ifp->if_carp) {
+
+		cif = malloc(sizeof(*cif), M_CARP,
+		    M_WAITOK|M_ZERO);
+		if (!cif) {
+			error = ENOBUFS;
+			goto cleanup;
+		}
+		if ((error = ifpromisc(ifp, 1))) {
+			free(cif, M_CARP);
+			goto cleanup;
+		}
+
+		CARP_LOCK_INIT(cif);
+		CARP_LOCK(cif);
+		cif->vhif_ifp = ifp;
+		TAILQ_INIT(&cif->vhif_vrs);
+		ifp->if_carp = cif;
+
+	} else {
+		struct carp_softc *vr;
+
+		cif = (struct carp_if *)ifp->if_carp;
+		CARP_LOCK(cif);
+		TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
+			if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
+				CARP_UNLOCK(cif);
+				error = EEXIST;
+				goto cleanup;
+			}
+	}
+	sc->sc_ia = ia;
+	sc->sc_carpdev = ifp;
+
+	{ /* XXX prevent endless loop if already in queue */
+	struct carp_softc *vr, *after = NULL;
+	int myself = 0;
+	cif = (struct carp_if *)ifp->if_carp;
+
+	/* XXX: cif should not change, right? So we still hold the lock */
+	CARP_LOCK_ASSERT(cif);
+
+	TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
+		if (vr == sc)
+			myself = 1;
+		if (vr->sc_vhid < sc->sc_vhid)
+			after = vr;
+	}
+
+	if (!myself) {
+		/* We're trying to keep things in order */
+		if (after == NULL) {
+			TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
+		} else {
+			TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
+		}
+		cif->vhif_nvrs++;
+	}
+	}
+
+	sc->sc_naddrs++;
+	SC2IFP(sc)->if_flags |= IFF_UP;
+	if (own)
+		sc->sc_advskew = 0;
+	carp_sc_state_locked(sc);
+	carp_setrun(sc, 0);
+
+	CARP_UNLOCK(cif);
+	ifa_free(&ia->ia_ifa);	/* XXXRW: should hold reference for softc. */
+
+	return (0);
+
+cleanup:
+	in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
+	ifa_free(&ia->ia_ifa);
+	return (error);
+}
+
+static int
+carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin)
+{
+	int error = 0;
+
+	if (!--sc->sc_naddrs) {
+		struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
+		struct ip_moptions *imo = &sc->sc_imo;
+
+		CARP_LOCK(cif);
+		callout_stop(&sc->sc_ad_tmo);
+		SC2IFP(sc)->if_flags &= ~IFF_UP;
+		SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
+		sc->sc_vhid = -1;
+		in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
+		imo->imo_multicast_ifp = NULL;
+		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
+		if (!--cif->vhif_nvrs) {
+			sc->sc_carpdev->if_carp = NULL;
+			CARP_LOCK_DESTROY(cif);
+			free(cif, M_CARP);
+		} else {
+			CARP_UNLOCK(cif);
+		}
+	}
+
+	return (error);
+}
+
+#ifdef INET6
+static int
+carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
+{
+	struct ifnet *ifp;
+	struct carp_if *cif;
+	struct in6_ifaddr *ia, *ia_if;
+	struct ip6_moptions *im6o = &sc->sc_im6o;
+	struct in6_addr in6;
+	int own, error;
+
+	error = 0;
+
+	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+		if (!(SC2IFP(sc)->if_flags & IFF_UP))
+			carp_set_state(sc, INIT);
+		if (sc->sc_naddrs6)
+			SC2IFP(sc)->if_flags |= IFF_UP;
+		if (sc->sc_carpdev)
+			CARP_SCLOCK(sc);
+		carp_setrun(sc, 0);
+		if (sc->sc_carpdev)
+			CARP_SCUNLOCK(sc);
+		return (0);
+	}
+
+	/* we have to do it by hands to check we won't match on us */
+	ia_if = NULL; own = 0;
+	IN6_IFADDR_RLOCK();
+	TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
+		int i;
+
+		for (i = 0; i < 4; i++) {
+			if ((sin6->sin6_addr.s6_addr32[i] &
+			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
+			    (ia->ia_addr.sin6_addr.s6_addr32[i] &
+			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
+				break;
+		}
+		/* and, yeah, we need a multicast-capable iface too */
+		if (ia->ia_ifp != SC2IFP(sc) &&
+		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
+		    (i == 4)) {
+			if (!ia_if)
+				ia_if = ia;
+			if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
+			    &ia->ia_addr.sin6_addr))
+				own++;
+		}
+	}
+
+	if (!ia_if) {
+		IN6_IFADDR_RUNLOCK();
+		return (EADDRNOTAVAIL);
+	}
+	ia = ia_if;
+	ifa_ref(&ia->ia_ifa);
+	IN6_IFADDR_RUNLOCK();
+	ifp = ia->ia_ifp;
+
+	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
+	    (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) {
+		ifa_free(&ia->ia_ifa);
+		return (EADDRNOTAVAIL);
+	}
+
+	if (!sc->sc_naddrs6) {
+		struct in6_multi *in6m;
+
+		im6o->im6o_multicast_ifp = ifp;
+
+		/* join CARP multicast address */
+		bzero(&in6, sizeof(in6));
+		in6.s6_addr16[0] = htons(0xff02);
+		in6.s6_addr8[15] = 0x12;
+		if (in6_setscope(&in6, ifp, NULL) != 0)
+			goto cleanup;
+		in6m = NULL;
+		error = in6_mc_join(ifp, &in6, NULL, &in6m, 0);
+		if (error)
+			goto cleanup;
+		im6o->im6o_membership[0] = in6m;
+		im6o->im6o_num_memberships++;
+
+		/* join solicited multicast address */
+		bzero(&in6, sizeof(in6));
+		in6.s6_addr16[0] = htons(0xff02);
+		in6.s6_addr32[1] = 0;
+		in6.s6_addr32[2] = htonl(1);
+		in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
+		in6.s6_addr8[12] = 0xff;
+		if (in6_setscope(&in6, ifp, NULL) != 0)
+			goto cleanup;
+		in6m = NULL;
+		error = in6_mc_join(ifp, &in6, NULL, &in6m, 0);
+		if (error)
+			goto cleanup;
+		im6o->im6o_membership[1] = in6m;
+		im6o->im6o_num_memberships++;
+	}
+
+	if (!ifp->if_carp) {
+		cif = malloc(sizeof(*cif), M_CARP,
+		    M_WAITOK|M_ZERO);
+		if (!cif) {
+			error = ENOBUFS;
+			goto cleanup;
+		}
+		if ((error = ifpromisc(ifp, 1))) {
+			free(cif, M_CARP);
+			goto cleanup;
+		}
+
+		CARP_LOCK_INIT(cif);
+		CARP_LOCK(cif);
+		cif->vhif_ifp = ifp;
+		TAILQ_INIT(&cif->vhif_vrs);
+		ifp->if_carp = cif;
+
+	} else {
+		struct carp_softc *vr;
+
+		cif = (struct carp_if *)ifp->if_carp;
+		CARP_LOCK(cif);
+		TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
+			if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
+				CARP_UNLOCK(cif);
+				error = EINVAL;
+				goto cleanup;
+			}
+	}
+	sc->sc_ia6 = ia;
+	sc->sc_carpdev = ifp;
+
+	{ /* XXX prevent endless loop if already in queue */
+	struct carp_softc *vr, *after = NULL;
+	int myself = 0;
+	cif = (struct carp_if *)ifp->if_carp;
+	CARP_LOCK_ASSERT(cif);
+
+	TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
+		if (vr == sc)
+			myself = 1;
+		if (vr->sc_vhid < sc->sc_vhid)
+			after = vr;
+	}
+
+	if (!myself) {
+		/* We're trying to keep things in order */
+		if (after == NULL) {
+			TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
+		} else {
+			TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
+		}
+		cif->vhif_nvrs++;
+	}
+	}
+
+	sc->sc_naddrs6++;
+	SC2IFP(sc)->if_flags |= IFF_UP;
+	if (own)
+		sc->sc_advskew = 0;
+	carp_sc_state_locked(sc);
+	carp_setrun(sc, 0);
+
+	CARP_UNLOCK(cif);
+	ifa_free(&ia->ia_ifa);	/* XXXRW: should hold reference for softc. */
+
+	return (0);
+
+cleanup:
+	if (!sc->sc_naddrs6)
+		carp_multicast6_cleanup(sc);
+	ifa_free(&ia->ia_ifa);
+	return (error);
+}
+
+static int
+carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
+{
+	int error = 0;
+
+	if (!--sc->sc_naddrs6) {
+		struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
+
+		CARP_LOCK(cif);
+		callout_stop(&sc->sc_ad_tmo);
+		SC2IFP(sc)->if_flags &= ~IFF_UP;
+		SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
+		sc->sc_vhid = -1;
+		carp_multicast6_cleanup(sc);
+		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
+		if (!--cif->vhif_nvrs) {
+			CARP_LOCK_DESTROY(cif);
+			sc->sc_carpdev->if_carp = NULL;
+			free(cif, M_CARP);
+		} else
+			CARP_UNLOCK(cif);
+	}
+
+	return (error);
+}
+#endif /* INET6 */
+
+static int
+carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
+{
+	struct carp_softc *sc = ifp->if_softc, *vr;
+	struct carpreq carpr;
+	struct ifaddr *ifa;
+	struct ifreq *ifr;
+	struct ifaliasreq *ifra;
+	int locked = 0, error = 0;
+
+	ifa = (struct ifaddr *)addr;
+	ifra = (struct ifaliasreq *)addr;
+	ifr = (struct ifreq *)addr;
+
+	switch (cmd) {
+	case SIOCSIFADDR:
+		switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+		case AF_INET:
+			SC2IFP(sc)->if_flags |= IFF_UP;
+			bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
+			    sizeof(struct sockaddr));
+			error = carp_set_addr(sc, satosin(ifa->ifa_addr));
+			break;
+#endif /* INET */
+#ifdef INET6
+		case AF_INET6:
+			SC2IFP(sc)->if_flags |= IFF_UP;
+			error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
+			break;
+#endif /* INET6 */
+		default:
+			error = EAFNOSUPPORT;
+			break;
+		}
+		break;
+
+	case SIOCAIFADDR:
+		switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+		case AF_INET:
+			SC2IFP(sc)->if_flags |= IFF_UP;
+			bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
+			    sizeof(struct sockaddr));
+			error = carp_set_addr(sc, satosin(&ifra->ifra_addr));
+			break;
+#endif /* INET */
+#ifdef INET6
+		case AF_INET6:
+			SC2IFP(sc)->if_flags |= IFF_UP;
+			error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr));
+			break;
+#endif /* INET6 */
+		default:
+			error = EAFNOSUPPORT;
+			break;
+		}
+		break;
+
+	case SIOCDIFADDR:
+		switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+		case AF_INET:
+			error = carp_del_addr(sc, satosin(&ifra->ifra_addr));
+			break;
+#endif /* INET */
+#ifdef INET6
+		case AF_INET6:
+			error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr));
+			break;
+#endif /* INET6 */
+		default:
+			error = EAFNOSUPPORT;
+			break;
+		}
+		break;
+
+	case SIOCSIFFLAGS:
+		if (sc->sc_carpdev) {
+			locked = 1;
+			CARP_SCLOCK(sc);
+		}
+		if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
+ 			callout_stop(&sc->sc_ad_tmo);
+ 			callout_stop(&sc->sc_md_tmo);
+ 			callout_stop(&sc->sc_md6_tmo);
+			if (sc->sc_state == MASTER)
+				carp_send_ad_locked(sc);
+			carp_set_state(sc, INIT);
+			carp_setrun(sc, 0);
+		} else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
+			SC2IFP(sc)->if_flags |= IFF_UP;
+			carp_setrun(sc, 0);
+		}
+		break;
+
+	case SIOCSVH:
+		error = priv_check(curthread, PRIV_NETINET_CARP);
+		if (error)
+			break;
+		if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
+			break;
+		error = 1;
+		if (sc->sc_carpdev) {
+			locked = 1;
+			CARP_SCLOCK(sc);
+		}
+		if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
+			switch (carpr.carpr_state) {
+			case BACKUP:
+				callout_stop(&sc->sc_ad_tmo);
+				carp_set_state(sc, BACKUP);
+				carp_setrun(sc, 0);
+				carp_setroute(sc, RTM_DELETE);
+				break;
+			case MASTER:
+				carp_master_down_locked(sc);
+				break;
+			default:
+				break;
+			}
+		}
+		if (carpr.carpr_vhid > 0) {
+			if (carpr.carpr_vhid > 255) {
+				error = EINVAL;
+				break;
+			}
+			if (sc->sc_carpdev) {
+				struct carp_if *cif;
+				cif = (struct carp_if *)sc->sc_carpdev->if_carp;
+				TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
+					if (vr != sc &&
+					    vr->sc_vhid == carpr.carpr_vhid) {
+						error = EEXIST;
+						break;
+					}
+				if (error == EEXIST)
+					break;
+			}
+			sc->sc_vhid = carpr.carpr_vhid;
+			IF_LLADDR(sc->sc_ifp)[0] = 0;
+			IF_LLADDR(sc->sc_ifp)[1] = 0;
+			IF_LLADDR(sc->sc_ifp)[2] = 0x5e;
+			IF_LLADDR(sc->sc_ifp)[3] = 0;
+			IF_LLADDR(sc->sc_ifp)[4] = 1;
+			IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid;
+			error--;
+		}
+		if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
+			if (carpr.carpr_advskew >= 255) {
+				error = EINVAL;
+				break;
+			}
+			if (carpr.carpr_advbase > 255) {
+				error = EINVAL;
+				break;
+			}
+			sc->sc_advbase = carpr.carpr_advbase;
+			sc->sc_advskew = carpr.carpr_advskew;
+			error--;
+		}
+		bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
+		if (error > 0)
+			error = EINVAL;
+		else {
+			error = 0;
+			carp_setrun(sc, 0);
+		}
+		break;
+
+	case SIOCGVH:
+		/* XXX: lockless read */
+		bzero(&carpr, sizeof(carpr));
+		carpr.carpr_state = sc->sc_state;
+		carpr.carpr_vhid = sc->sc_vhid;
+		carpr.carpr_advbase = sc->sc_advbase;
+		carpr.carpr_advskew = sc->sc_advskew;
+		error = priv_check(curthread, PRIV_NETINET_CARP);
+		if (error == 0)
+			bcopy(sc->sc_key, carpr.carpr_key,
+			    sizeof(carpr.carpr_key));
+		error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
+		break;
+
+	default:
+		error = EINVAL;
+	}
+
+	if (locked)
+		CARP_SCUNLOCK(sc);
+
+	carp_hmac_prepare(sc);
+
+	return (error);
+}
+
+/*
+ * XXX: this is looutput. We should eventually use it from there.
+ */
+static int
+carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+    struct route *ro)
+{
+	u_int32_t af;
+	struct rtentry *rt = NULL;
+
+	M_ASSERTPKTHDR(m); /* check if we have the packet header */
+
+	if (ro != NULL)
+		rt = ro->ro_rt;
+	if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
+		m_freem(m);
+		return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
+			rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
+	}
+
+	ifp->if_opackets++;
+	ifp->if_obytes += m->m_pkthdr.len;
+
+	/* BPF writes need to be handled specially. */
+	if (dst->sa_family == AF_UNSPEC) {
+		bcopy(dst->sa_data, &af, sizeof(af));
+		dst->sa_family = af;
+	}
+
+#if 1	/* XXX */
+	switch (dst->sa_family) {
+	case AF_INET:
+	case AF_INET6:
+	case AF_IPX:
+	case AF_APPLETALK:
+		break;
+	default:
+		printf("carp_looutput: af=%d unexpected\n", dst->sa_family);
+		m_freem(m);
+		return (EAFNOSUPPORT);
+	}
+#endif
+	return(if_simloop(ifp, m, dst->sa_family, 0));
+}
+
+/*
+ * Start output on carp interface. This function should never be called.
+ */
+static void
+carp_start(struct ifnet *ifp)
+{
+#ifdef DEBUG
+	printf("%s: start called\n", ifp->if_xname);
+#endif
+}
+
+int
+carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
+    struct rtentry *rt)
+{
+	struct m_tag *mtag;
+	struct carp_softc *sc;
+	struct ifnet *carp_ifp;
+
+	if (!sa)
+		return (0);
+
+	switch (sa->sa_family) {
+#ifdef INET
+	case AF_INET:
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		break;
+#endif /* INET6 */
+	default:
+		return (0);
+	}
+
+	mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
+	if (mtag == NULL)
+		return (0);
+
+	bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *));
+	sc = carp_ifp->if_softc;
+
+	/* Set the source MAC address to Virtual Router MAC Address */
+	switch (ifp->if_type) {
+	case IFT_ETHER:
+	case IFT_L2VLAN: {
+			struct ether_header *eh;
+
+			eh = mtod(m, struct ether_header *);
+			eh->ether_shost[0] = 0;
+			eh->ether_shost[1] = 0;
+			eh->ether_shost[2] = 0x5e;
+			eh->ether_shost[3] = 0;
+			eh->ether_shost[4] = 1;
+			eh->ether_shost[5] = sc->sc_vhid;
+		}
+		break;
+	case IFT_FDDI: {
+			struct fddi_header *fh;
+
+			fh = mtod(m, struct fddi_header *);
+			fh->fddi_shost[0] = 0;
+			fh->fddi_shost[1] = 0;
+			fh->fddi_shost[2] = 0x5e;
+			fh->fddi_shost[3] = 0;
+			fh->fddi_shost[4] = 1;
+			fh->fddi_shost[5] = sc->sc_vhid;
+		}
+		break;
+	case IFT_ISO88025: {
+ 			struct iso88025_header *th;
+ 			th = mtod(m, struct iso88025_header *);
+			th->iso88025_shost[0] = 3;
+			th->iso88025_shost[1] = 0;
+			th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1);
+			th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1);
+			th->iso88025_shost[4] = 0;
+			th->iso88025_shost[5] = 0;
+		}
+		break;
+	default:
+		printf("%s: carp is not supported for this interface type\n",
+		    ifp->if_xname);
+		return (EOPNOTSUPP);
+	}
+
+	return (0);
+}
+
+static void
+carp_set_state(struct carp_softc *sc, int state)
+{
+	int link_state;
+
+	if (sc->sc_carpdev)
+		CARP_SCLOCK_ASSERT(sc);
+
+	if (sc->sc_state == state)
+		return;
+
+	sc->sc_state = state;
+	switch (state) {
+	case BACKUP:
+		link_state = LINK_STATE_DOWN;
+		break;
+	case MASTER:
+		link_state = LINK_STATE_UP;
+		break;
+	default:
+		link_state = LINK_STATE_UNKNOWN;
+		break;
+	}
+	if_link_state_change(SC2IFP(sc), link_state);
+}
+
+void
+carp_carpdev_state(struct ifnet *ifp)
+{
+	struct carp_if *cif;
+
+	cif = ifp->if_carp;
+	CARP_LOCK(cif);
+	carp_carpdev_state_locked(cif);
+	CARP_UNLOCK(cif);
+}
+
+static void
+carp_carpdev_state_locked(struct carp_if *cif)
+{
+	struct carp_softc *sc;
+
+	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
+		carp_sc_state_locked(sc);
+}
+
+static void
+carp_sc_state_locked(struct carp_softc *sc)
+{
+	CARP_SCLOCK_ASSERT(sc);
+
+	if (sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
+	    !(sc->sc_carpdev->if_flags & IFF_UP)) {
+		sc->sc_flags_backup = SC2IFP(sc)->if_flags;
+		SC2IFP(sc)->if_flags &= ~IFF_UP;
+		SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
+		callout_stop(&sc->sc_ad_tmo);
+		callout_stop(&sc->sc_md_tmo);
+		callout_stop(&sc->sc_md6_tmo);
+		carp_set_state(sc, INIT);
+		carp_setrun(sc, 0);
+		if (!sc->sc_suppress) {
+			carp_suppress_preempt++;
+			if (carp_suppress_preempt == 1) {
+				CARP_SCUNLOCK(sc);
+				carp_send_ad_all();
+				CARP_SCLOCK(sc);
+			}
+		}
+		sc->sc_suppress = 1;
+	} else {
+		SC2IFP(sc)->if_flags |= sc->sc_flags_backup;
+		carp_set_state(sc, INIT);
+		carp_setrun(sc, 0);
+		if (sc->sc_suppress)
+			carp_suppress_preempt--;
+		sc->sc_suppress = 0;
+	}
+
+	return;
+}
+
+#ifdef INET
+extern  struct domain inetdomain;
+static struct protosw in_carp_protosw = {
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_CARP,
+	.pr_flags =		PR_ATOMIC|PR_ADDR,
+	.pr_input =		carp_input,
+	.pr_output =		(pr_output_t *)rip_output,
+	.pr_ctloutput =		rip_ctloutput,
+	.pr_usrreqs =		&rip_usrreqs
+};
+#endif
+
+#ifdef INET6
+extern	struct domain inet6domain;
+static struct ip6protosw in6_carp_protosw = {
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inet6domain,
+	.pr_protocol =		IPPROTO_CARP,
+	.pr_flags =		PR_ATOMIC|PR_ADDR,
+	.pr_input =		carp6_input,
+	.pr_output =		rip6_output,
+	.pr_ctloutput =		rip6_ctloutput,
+	.pr_usrreqs =		&rip6_usrreqs
+};
+#endif
+
+static void
+carp_mod_cleanup(void)
+{
+
+	if (if_detach_event_tag == NULL)
+		return;
+	EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
+	if_clone_detach(&carp_cloner);
+#ifdef INET
+	if (proto_reg[CARP_INET] == 0) {
+		(void)ipproto_unregister(IPPROTO_CARP);
+		pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW);
+		proto_reg[CARP_INET] = -1;
+	}
+	carp_iamatch_p = NULL;
+#endif
+#ifdef INET6
+	if (proto_reg[CARP_INET6] == 0) {
+		(void)ip6proto_unregister(IPPROTO_CARP);
+		pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW);
+		proto_reg[CARP_INET6] = -1;
+	}
+	carp_iamatch6_p = NULL;
+	carp_macmatch6_p = NULL;
+#endif
+	carp_linkstate_p = NULL;
+	carp_forus_p = NULL;
+	carp_output_p = NULL;
+	mtx_destroy(&carp_mtx);
+}
+
+static int
+carp_mod_load(void)
+{
+	int err;
+
+	if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
+		carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
+	if (if_detach_event_tag == NULL)
+		return (ENOMEM);
+	mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
+	LIST_INIT(&carpif_list);
+	if_clone_attach(&carp_cloner);
+	carp_linkstate_p = carp_carpdev_state;
+	carp_forus_p = carp_forus;
+	carp_output_p = carp_output;
+#ifdef INET6
+	carp_iamatch6_p = carp_iamatch6;
+	carp_macmatch6_p = carp_macmatch6;
+	proto_reg[CARP_INET6] = pf_proto_register(PF_INET6,
+	    (struct protosw *)&in6_carp_protosw);
+	if (proto_reg[CARP_INET6] != 0) {
+		printf("carp: error %d attaching to PF_INET6\n",
+		    proto_reg[CARP_INET6]);
+		carp_mod_cleanup();
+		return (EINVAL);
+	}
+	err = ip6proto_register(IPPROTO_CARP);
+	if (err) {
+		printf("carp: error %d registering with INET6\n", err);
+		carp_mod_cleanup();
+		return (EINVAL);
+	}
+#endif
+#ifdef INET
+	carp_iamatch_p = carp_iamatch;
+	proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw);
+	if (proto_reg[CARP_INET] != 0) {
+		printf("carp: error %d attaching to PF_INET\n",
+		    proto_reg[CARP_INET]);
+		carp_mod_cleanup();
+		return (EINVAL);
+	}
+	err = ipproto_register(IPPROTO_CARP);
+	if (err) {
+		printf("carp: error %d registering with INET\n", err);
+		carp_mod_cleanup();
+		return (EINVAL);
+	}
+#endif
+	return 0;
+}
+
+static int
+carp_modevent(module_t mod, int type, void *data)
+{
+	switch (type) {
+	case MOD_LOAD:
+		return carp_mod_load();
+		/* NOTREACHED */
+	case MOD_UNLOAD:
+		/*
+		 * XXX: For now, disallow module unloading by default due to
+		 * a race condition where a thread may dereference one of the
+		 * function pointer hooks after the module has been
+		 * unloaded, during processing of a packet, causing a panic.
+		 */
+#ifdef CARPMOD_CAN_UNLOAD
+		carp_mod_cleanup();
+#else
+		return (EBUSY);
+#endif
+		break;
+
+	default:
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
+static moduledata_t carp_mod = {
+	"carp",
+	carp_modevent,
+	0
+};
+
+DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);
diff --git a/freebsd/sys/netinet/ip_carp.h b/freebsd/sys/netinet/ip_carp.h
new file mode 100644
index 00000000..2f2b4f28
--- /dev/null
+++ b/freebsd/sys/netinet/ip_carp.h
@@ -0,0 +1,191 @@
+/*	$FreeBSD$	*/
+/*	$OpenBSD: ip_carp.h,v 1.8 2004/07/29 22:12:15 mcbride Exp $	*/
+
+/*
+ * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
+ * Copyright (c) 2003 Ryan McBride. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _IP_CARP_H
+#define	_IP_CARP_H
+
+/*
+ * The CARP header layout is as follows:
+ *
+ *     0                   1                   2                   3
+ *     0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |Version| Type  | VirtualHostID |    AdvSkew    |    Auth Len   |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |   Reserved    |     AdvBase   |          Checksum             |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |                         Counter (1)                           |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |                         Counter (2)                           |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |                        SHA-1 HMAC (1)                         |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |                        SHA-1 HMAC (2)                         |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |                        SHA-1 HMAC (3)                         |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |                        SHA-1 HMAC (4)                         |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |                        SHA-1 HMAC (5)                         |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ */
+
+struct carp_header {
+#if BYTE_ORDER == LITTLE_ENDIAN
+	u_int8_t	carp_type:4,
+			carp_version:4;
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+	u_int8_t	carp_version:4,
+			carp_type:4;
+#endif
+	u_int8_t	carp_vhid;	/* virtual host id */
+	u_int8_t	carp_advskew;	/* advertisement skew */
+	u_int8_t	carp_authlen;   /* size of counter+md, 32bit chunks */
+	u_int8_t	carp_pad1;	/* reserved */
+	u_int8_t	carp_advbase;	/* advertisement interval */
+	u_int16_t	carp_cksum;
+	u_int32_t	carp_counter[2];
+	unsigned char	carp_md[20];	/* SHA1 HMAC */
+} __packed;
+
+#ifdef CTASSERT
+CTASSERT(sizeof(struct carp_header) == 36);
+#endif
+
+#define	CARP_DFLTTL		255
+
+/* carp_version */
+#define	CARP_VERSION		2
+
+/* carp_type */
+#define	CARP_ADVERTISEMENT	0x01
+
+#define	CARP_KEY_LEN		20	/* a sha1 hash of a passphrase */
+
+/* carp_advbase */
+#define	CARP_DFLTINTV		1
+
+/*
+ * Statistics.
+ */
+struct carpstats {
+	uint64_t	carps_ipackets;		/* total input packets, IPv4 */
+	uint64_t	carps_ipackets6;	/* total input packets, IPv6 */
+	uint64_t	carps_badif;		/* wrong interface */
+	uint64_t	carps_badttl;		/* TTL is not CARP_DFLTTL */
+	uint64_t	carps_hdrops;		/* packets shorter than hdr */
+	uint64_t	carps_badsum;		/* bad checksum */
+	uint64_t	carps_badver;		/* bad (incl unsupp) version */
+	uint64_t	carps_badlen;		/* data length does not match */
+	uint64_t	carps_badauth;		/* bad authentication */
+	uint64_t	carps_badvhid;		/* bad VHID */
+	uint64_t	carps_badaddrs;		/* bad address list */
+
+	uint64_t	carps_opackets;		/* total output packets, IPv4 */
+	uint64_t	carps_opackets6;	/* total output packets, IPv6 */
+	uint64_t	carps_onomem;		/* no memory for an mbuf */
+	uint64_t	carps_ostates;		/* total state updates sent */
+
+	uint64_t	carps_preempt;		/* if enabled, preemptions */
+};
+
+#ifdef _KERNEL
+#define	CARPSTATS_ADD(name, val)	carpstats.name += (val)
+#define	CARPSTATS_INC(name)		CARPSTATS_ADD(name, 1)
+#endif
+
+/*
+ * Configuration structure for SIOCSVH SIOCGVH
+ */
+struct carpreq {
+	int		carpr_state;
+#define	CARP_STATES	"INIT", "BACKUP", "MASTER"
+#define	CARP_MAXSTATE	2
+	int		carpr_vhid;
+	int		carpr_advskew;
+	int		carpr_advbase;
+	unsigned char	carpr_key[CARP_KEY_LEN];
+};
+#define	SIOCSVH	_IOWR('i', 245, struct ifreq)
+#define	SIOCGVH	_IOWR('i', 246, struct ifreq)
+
+/*
+ * Names for CARP sysctl objects
+ */
+#define	CARPCTL_ALLOW		1	/* accept incoming CARP packets */
+#define	CARPCTL_PREEMPT		2	/* high-pri backup preemption mode */
+#define	CARPCTL_LOG		3	/* log bad packets */
+#define	CARPCTL_STATS		4	/* statistics (read-only) */
+#define	CARPCTL_ARPBALANCE	5	/* balance arp responses */
+#define	CARPCTL_MAXID		6
+
+#define	CARPCTL_NAMES { \
+	{ 0, 0 }, \
+	{ "allow", CTLTYPE_INT }, \
+	{ "preempt", CTLTYPE_INT }, \
+	{ "log", CTLTYPE_INT }, \
+	{ "stats", CTLTYPE_STRUCT }, \
+	{ "arpbalance", CTLTYPE_INT }, \
+}
+
+#ifdef _KERNEL
+void		 carp_carpdev_state(struct ifnet *);
+void		 carp_input (struct mbuf *, int);
+int		 carp6_input (struct mbuf **, int *, int);
+int		 carp_output (struct ifnet *, struct mbuf *, struct sockaddr *,
+		     struct rtentry *);
+int		 carp_iamatch (struct ifnet *, struct in_ifaddr *, struct in_addr *,
+		     u_int8_t **);
+struct ifaddr	*carp_iamatch6(struct ifnet *, struct in6_addr *);
+caddr_t		carp_macmatch6(struct ifnet *, struct mbuf *, const struct in6_addr *);
+struct	ifnet	*carp_forus (struct ifnet *, u_char *);
+
+/* These are external networking stack hooks for CARP */
+/* net/if.c */
+extern void (*carp_linkstate_p)(struct ifnet *);
+/* net/if_bridge.c net/if_ethersubr.c */
+extern struct ifnet *(*carp_forus_p)(struct ifnet *, u_char *);
+/* net/if_ethersubr.c */
+extern int (*carp_output_p)(struct ifnet *, struct mbuf *,
+    struct sockaddr *, struct rtentry *);
+#ifdef INET
+/* netinet/if_ether.c */
+extern int (*carp_iamatch_p)(struct ifnet *, struct in_ifaddr *,
+    struct in_addr *, u_int8_t **);
+#endif
+#ifdef INET6
+/* netinet6/nd6_nbr.c */
+extern struct ifaddr *(*carp_iamatch6_p)(struct ifnet *, struct in6_addr *);
+extern caddr_t (*carp_macmatch6_p)(struct ifnet *, struct mbuf *,
+    const struct in6_addr *);
+#endif
+#endif
+#endif /* _IP_CARP_H */
diff --git a/freebsd/sys/netinet/ip_divert.c b/freebsd/sys/netinet/ip_divert.c
new file mode 100644
index 00000000..13999825
--- /dev/null
+++ b/freebsd/sys/netinet/ip_divert.c
@@ -0,0 +1,818 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#if !defined(KLD_MODULE)
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_sctp.h>
+#ifndef INET
+#error "IPDIVERT requires INET."
+#endif
+#endif
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/rwlock.h>
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/sx.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/systm.h>
+
+#include <freebsd/vm/uma.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/netisr.h> 
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+#ifdef SCTP
+#include <freebsd/netinet/sctp_crc32.h>
+#endif
+
+#include <freebsd/security/mac/mac_framework.h>
+
+/*
+ * Divert sockets
+ */
+
+/*
+ * Allocate enough space to hold a full IP packet
+ */
+#define	DIVSNDQ		(65536 + 100)
+#define	DIVRCVQ		(65536 + 100)
+
+/*
+ * Divert sockets work in conjunction with ipfw or other packet filters,
+ * see the divert(4) manpage for features.
+ * Packets are selected by the packet filter and tagged with an
+ * MTAG_IPFW_RULE tag carrying the 'divert port' number (as set by
+ * the packet filter) and information on the matching filter rule for
+ * subsequent reinjection. The divert_port is used to put the packet
+ * on the corresponding divert socket, while the rule number is passed
+ * up (at least partially) as the sin_port in the struct sockaddr.
+ *
+ * Packets written to the divert socket carry in sin_addr a
+ * destination address, and in sin_port the number of the filter rule
+ * after which to continue processing.
+ * If the destination address is INADDR_ANY, the packet is treated as
+ * as outgoing and sent to ip_output(); otherwise it is treated as
+ * incoming and sent to ip_input().
+ * Further, sin_zero carries some information on the interface,
+ * which can be used in the reinject -- see comments in the code.
+ *
+ * On reinjection, processing in ip_input() and ip_output()
+ * will be exactly the same as for the original packet, except that
+ * packet filter processing will start at the rule number after the one
+ * written in the sin_port (ipfw does not allow a rule #0, so sin_port=0
+ * will apply the entire ruleset to the packet).
+ */
+
+/* Internal variables. */
+static VNET_DEFINE(struct inpcbhead, divcb);
+static VNET_DEFINE(struct inpcbinfo, divcbinfo);
+
+#define	V_divcb				VNET(divcb)
+#define	V_divcbinfo			VNET(divcbinfo)
+
+static u_long	div_sendspace = DIVSNDQ;	/* XXX sysctl ? */
+static u_long	div_recvspace = DIVRCVQ;	/* XXX sysctl ? */
+
+static eventhandler_tag ip_divert_event_tag;
+
+/*
+ * Initialize divert connection block queue.
+ */
+static void
+div_zone_change(void *tag)
+{
+
+	uma_zone_set_max(V_divcbinfo.ipi_zone, maxsockets);
+}
+
+static int
+div_inpcb_init(void *mem, int size, int flags)
+{
+	struct inpcb *inp = mem;
+
+	INP_LOCK_INIT(inp, "inp", "divinp");
+	return (0);
+}
+
+static void
+div_inpcb_fini(void *mem, int size)
+{
+	struct inpcb *inp = mem;
+
+	INP_LOCK_DESTROY(inp);
+}
+
+static void
+div_init(void)
+{
+
+	INP_INFO_LOCK_INIT(&V_divcbinfo, "div");
+	LIST_INIT(&V_divcb);
+	V_divcbinfo.ipi_listhead = &V_divcb;
+#ifdef VIMAGE
+	V_divcbinfo.ipi_vnet = curvnet;
+#endif
+	/*
+	 * XXX We don't use the hash list for divert IP, but it's easier
+	 * to allocate a one entry hash list than it is to check all
+	 * over the place for hashbase == NULL.
+	 */
+	V_divcbinfo.ipi_hashbase = hashinit(1, M_PCB, &V_divcbinfo.ipi_hashmask);
+	V_divcbinfo.ipi_porthashbase = hashinit(1, M_PCB,
+	    &V_divcbinfo.ipi_porthashmask);
+	V_divcbinfo.ipi_zone = uma_zcreate("divcb", sizeof(struct inpcb),
+	    NULL, NULL, div_inpcb_init, div_inpcb_fini, UMA_ALIGN_PTR,
+	    UMA_ZONE_NOFREE);
+	uma_zone_set_max(V_divcbinfo.ipi_zone, maxsockets);
+}
+
+static void
+div_destroy(void)
+{
+
+	INP_INFO_LOCK_DESTROY(&V_divcbinfo);
+	uma_zdestroy(V_divcbinfo.ipi_zone);
+	hashdestroy(V_divcbinfo.ipi_hashbase, M_PCB, V_divcbinfo.ipi_hashmask);
+	hashdestroy(V_divcbinfo.ipi_porthashbase, M_PCB,
+	    V_divcbinfo.ipi_porthashmask);
+}
+
+/*
+ * IPPROTO_DIVERT is not in the real IP protocol number space; this
+ * function should never be called.  Just in case, drop any packets.
+ */
+static void
+div_input(struct mbuf *m, int off)
+{
+
+	KMOD_IPSTAT_INC(ips_noproto);
+	m_freem(m);
+}
+
+/*
+ * Divert a packet by passing it up to the divert socket at port 'port'.
+ *
+ * Setup generic address and protocol structures for div_input routine,
+ * then pass them along with mbuf chain.
+ */
+static void
+divert_packet(struct mbuf *m, int incoming)
+{
+	struct ip *ip;
+	struct inpcb *inp;
+	struct socket *sa;
+	u_int16_t nport;
+	struct sockaddr_in divsrc;
+	struct m_tag *mtag;
+
+	mtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL);
+	if (mtag == NULL) {
+		m_freem(m);
+		return;
+	}
+	/* Assure header */
+	if (m->m_len < sizeof(struct ip) &&
+	    (m = m_pullup(m, sizeof(struct ip))) == 0)
+		return;
+	ip = mtod(m, struct ip *);
+
+	/* Delayed checksums are currently not compatible with divert. */
+	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+		ip->ip_len = ntohs(ip->ip_len);
+		in_delayed_cksum(m);
+		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+		ip->ip_len = htons(ip->ip_len);
+	}
+#ifdef SCTP
+	if (m->m_pkthdr.csum_flags & CSUM_SCTP) {
+		ip->ip_len = ntohs(ip->ip_len);
+		sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
+		m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
+		ip->ip_len = htons(ip->ip_len);
+	}
+#endif
+	bzero(&divsrc, sizeof(divsrc));
+	divsrc.sin_len = sizeof(divsrc);
+	divsrc.sin_family = AF_INET;
+	/* record matching rule, in host format */
+	divsrc.sin_port = ((struct ipfw_rule_ref *)(mtag+1))->rulenum;
+	/*
+	 * Record receive interface address, if any.
+	 * But only for incoming packets.
+	 */
+	if (incoming) {
+		struct ifaddr *ifa;
+		struct ifnet *ifp;
+
+		/* Sanity check */
+		M_ASSERTPKTHDR(m);
+
+		/* Find IP address for receive interface */
+		ifp = m->m_pkthdr.rcvif;
+		if_addr_rlock(ifp);
+		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+			if (ifa->ifa_addr->sa_family != AF_INET)
+				continue;
+			divsrc.sin_addr =
+			    ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr;
+			break;
+		}
+		if_addr_runlock(ifp);
+	}
+	/*
+	 * Record the incoming interface name whenever we have one.
+	 */
+	if (m->m_pkthdr.rcvif) {
+		/*
+		 * Hide the actual interface name in there in the 
+		 * sin_zero array. XXX This needs to be moved to a
+		 * different sockaddr type for divert, e.g.
+		 * sockaddr_div with multiple fields like 
+		 * sockaddr_dl. Presently we have only 7 bytes
+		 * but that will do for now as most interfaces
+		 * are 4 or less + 2 or less bytes for unit.
+		 * There is probably a faster way of doing this,
+		 * possibly taking it from the sockaddr_dl on the iface.
+		 * This solves the problem of a P2P link and a LAN interface
+		 * having the same address, which can result in the wrong
+		 * interface being assigned to the packet when fed back
+		 * into the divert socket. Theoretically if the daemon saves
+		 * and re-uses the sockaddr_in as suggested in the man pages,
+		 * this iface name will come along for the ride.
+		 * (see div_output for the other half of this.)
+		 */ 
+		strlcpy(divsrc.sin_zero, m->m_pkthdr.rcvif->if_xname,
+		    sizeof(divsrc.sin_zero));
+	}
+
+	/* Put packet on socket queue, if any */
+	sa = NULL;
+	nport = htons((u_int16_t)(((struct ipfw_rule_ref *)(mtag+1))->info));
+	INP_INFO_RLOCK(&V_divcbinfo);
+	LIST_FOREACH(inp, &V_divcb, inp_list) {
+		/* XXX why does only one socket match? */
+		if (inp->inp_lport == nport) {
+			INP_RLOCK(inp);
+			sa = inp->inp_socket;
+			SOCKBUF_LOCK(&sa->so_rcv);
+			if (sbappendaddr_locked(&sa->so_rcv,
+			    (struct sockaddr *)&divsrc, m,
+			    (struct mbuf *)0) == 0) {
+				SOCKBUF_UNLOCK(&sa->so_rcv);
+				sa = NULL;	/* force mbuf reclaim below */
+			} else
+				sorwakeup_locked(sa);
+			INP_RUNLOCK(inp);
+			break;
+		}
+	}
+	INP_INFO_RUNLOCK(&V_divcbinfo);
+	if (sa == NULL) {
+		m_freem(m);
+		KMOD_IPSTAT_INC(ips_noproto);
+		KMOD_IPSTAT_DEC(ips_delivered);
+        }
+}
+
+/*
+ * Deliver packet back into the IP processing machinery.
+ *
+ * If no address specified, or address is 0.0.0.0, send to ip_output();
+ * otherwise, send to ip_input() and mark as having been received on
+ * the interface with that address.
+ */
+static int
+div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin,
+    struct mbuf *control)
+{
+	struct m_tag *mtag;
+	struct ipfw_rule_ref *dt;
+	int error = 0;
+	struct mbuf *options;
+
+	/*
+	 * An mbuf may hasn't come from userland, but we pretend
+	 * that it has.
+	 */
+	m->m_pkthdr.rcvif = NULL;
+	m->m_nextpkt = NULL;
+	M_SETFIB(m, so->so_fibnum);
+
+	if (control)
+		m_freem(control);		/* XXX */
+
+	mtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL);
+	if (mtag == NULL) {
+		/* this should be normal */
+		mtag = m_tag_alloc(MTAG_IPFW_RULE, 0,
+		    sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO);
+		if (mtag == NULL) {
+			error = ENOBUFS;
+			goto cantsend;
+		}
+		m_tag_prepend(m, mtag);
+	}
+	dt = (struct ipfw_rule_ref *)(mtag+1);
+
+	/* Loopback avoidance and state recovery */
+	if (sin) {
+		int i;
+
+		/* set the starting point. We provide a non-zero slot,
+		 * but a non_matching chain_id to skip that info and use
+		 * the rulenum/rule_id.
+		 */
+		dt->slot = 1; /* dummy, chain_id is invalid */
+		dt->chain_id = 0;
+		dt->rulenum = sin->sin_port+1; /* host format ? */
+		dt->rule_id = 0;
+		/*
+		 * Find receive interface with the given name, stuffed
+		 * (if it exists) in the sin_zero[] field.
+		 * The name is user supplied data so don't trust its size
+		 * or that it is zero terminated.
+		 */
+		for (i = 0; i < sizeof(sin->sin_zero) && sin->sin_zero[i]; i++)
+			;
+		if ( i > 0 && i < sizeof(sin->sin_zero))
+			m->m_pkthdr.rcvif = ifunit(sin->sin_zero);
+	}
+
+	/* Reinject packet into the system as incoming or outgoing */
+	if (!sin || sin->sin_addr.s_addr == 0) {
+		struct ip *const ip = mtod(m, struct ip *);
+		struct inpcb *inp;
+
+		dt->info |= IPFW_IS_DIVERT | IPFW_INFO_OUT;
+		inp = sotoinpcb(so);
+		INP_RLOCK(inp);
+		/*
+		 * Don't allow both user specified and setsockopt options,
+		 * and don't allow packet length sizes that will crash
+		 */
+		if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options) ||
+		     ((u_short)ntohs(ip->ip_len) > m->m_pkthdr.len)) {
+			error = EINVAL;
+			INP_RUNLOCK(inp);
+			m_freem(m);
+		} else {
+			/* Convert fields to host order for ip_output() */
+			ip->ip_len = ntohs(ip->ip_len);
+			ip->ip_off = ntohs(ip->ip_off);
+
+			/* Send packet to output processing */
+			KMOD_IPSTAT_INC(ips_rawout);		/* XXX */
+
+#ifdef MAC
+			mac_inpcb_create_mbuf(inp, m);
+#endif
+			/*
+			 * Get ready to inject the packet into ip_output().
+			 * Just in case socket options were specified on the
+			 * divert socket, we duplicate them.  This is done
+			 * to avoid having to hold the PCB locks over the call
+			 * to ip_output(), as doing this results in a number of
+			 * lock ordering complexities.
+			 *
+			 * Note that we set the multicast options argument for
+			 * ip_output() to NULL since it should be invariant that
+			 * they are not present.
+			 */
+			KASSERT(inp->inp_moptions == NULL,
+			    ("multicast options set on a divert socket"));
+			options = NULL;
+			/*
+			 * XXXCSJP: It is unclear to me whether or not it makes
+			 * sense for divert sockets to have options.  However,
+			 * for now we will duplicate them with the INP locks
+			 * held so we can use them in ip_output() without
+			 * requring a reference to the pcb.
+			 */
+			if (inp->inp_options != NULL) {
+				options = m_dup(inp->inp_options, M_DONTWAIT);
+				if (options == NULL)
+					error = ENOBUFS;
+			}
+			INP_RUNLOCK(inp);
+			if (error == ENOBUFS) {
+				m_freem(m);
+				return (error);
+			}
+			error = ip_output(m, options, NULL,
+			    ((so->so_options & SO_DONTROUTE) ?
+			    IP_ROUTETOIF : 0) | IP_ALLOWBROADCAST |
+			    IP_RAWOUTPUT, NULL, NULL);
+			if (options != NULL)
+				m_freem(options);
+		}
+	} else {
+		dt->info |= IPFW_IS_DIVERT | IPFW_INFO_IN;
+		if (m->m_pkthdr.rcvif == NULL) {
+			/*
+			 * No luck with the name, check by IP address.
+			 * Clear the port and the ifname to make sure
+			 * there are no distractions for ifa_ifwithaddr.
+			 */
+			struct	ifaddr *ifa;
+
+			bzero(sin->sin_zero, sizeof(sin->sin_zero));
+			sin->sin_port = 0;
+			ifa = ifa_ifwithaddr((struct sockaddr *) sin);
+			if (ifa == NULL) {
+				error = EADDRNOTAVAIL;
+				goto cantsend;
+			}
+			m->m_pkthdr.rcvif = ifa->ifa_ifp;
+			ifa_free(ifa);
+		}
+#ifdef MAC
+		mac_socket_create_mbuf(so, m);
+#endif
+		/* Send packet to input processing via netisr */
+		netisr_queue_src(NETISR_IP, (uintptr_t)so, m);
+	}
+
+	return error;
+
+cantsend:
+	m_freem(m);
+	return error;
+}
+
+static int
+div_attach(struct socket *so, int proto, struct thread *td)
+{
+	struct inpcb *inp;
+	int error;
+
+	inp  = sotoinpcb(so);
+	KASSERT(inp == NULL, ("div_attach: inp != NULL"));
+	if (td != NULL) {
+		error = priv_check(td, PRIV_NETINET_DIVERT);
+		if (error)
+			return (error);
+	}
+	error = soreserve(so, div_sendspace, div_recvspace);
+	if (error)
+		return error;
+	INP_INFO_WLOCK(&V_divcbinfo);
+	error = in_pcballoc(so, &V_divcbinfo);
+	if (error) {
+		INP_INFO_WUNLOCK(&V_divcbinfo);
+		return error;
+	}
+	inp = (struct inpcb *)so->so_pcb;
+	INP_INFO_WUNLOCK(&V_divcbinfo);
+	inp->inp_ip_p = proto;
+	inp->inp_vflag |= INP_IPV4;
+	inp->inp_flags |= INP_HDRINCL;
+	INP_WUNLOCK(inp);
+	return 0;
+}
+
+static void
+div_detach(struct socket *so)
+{
+	struct inpcb *inp;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("div_detach: inp == NULL"));
+	INP_INFO_WLOCK(&V_divcbinfo);
+	INP_WLOCK(inp);
+	in_pcbdetach(inp);
+	in_pcbfree(inp);
+	INP_INFO_WUNLOCK(&V_divcbinfo);
+}
+
+static int
+div_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+	struct inpcb *inp;
+	int error;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("div_bind: inp == NULL"));
+	/* in_pcbbind assumes that nam is a sockaddr_in
+	 * and in_pcbbind requires a valid address. Since divert
+	 * sockets don't we need to make sure the address is
+	 * filled in properly.
+	 * XXX -- divert should not be abusing in_pcbind
+	 * and should probably have its own family.
+	 */
+	if (nam->sa_family != AF_INET)
+		return EAFNOSUPPORT;
+	((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY;
+	INP_INFO_WLOCK(&V_divcbinfo);
+	INP_WLOCK(inp);
+	error = in_pcbbind(inp, nam, td->td_ucred);
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_divcbinfo);
+	return error;
+}
+
+static int
+div_shutdown(struct socket *so)
+{
+	struct inpcb *inp;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("div_shutdown: inp == NULL"));
+	INP_WLOCK(inp);
+	socantsendmore(so);
+	INP_WUNLOCK(inp);
+	return 0;
+}
+
+static int
+div_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
+    struct mbuf *control, struct thread *td)
+{
+
+	/* Packet must have a header (but that's about it) */
+	if (m->m_len < sizeof (struct ip) &&
+	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
+		KMOD_IPSTAT_INC(ips_toosmall);
+		m_freem(m);
+		return EINVAL;
+	}
+
+	/* Send packet */
+	return div_output(so, m, (struct sockaddr_in *)nam, control);
+}
+
+static void
+div_ctlinput(int cmd, struct sockaddr *sa, void *vip)
+{
+        struct in_addr faddr;
+
+	faddr = ((struct sockaddr_in *)sa)->sin_addr;
+	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
+        	return;
+	if (PRC_IS_REDIRECT(cmd))
+		return;
+}
+
+static int
+div_pcblist(SYSCTL_HANDLER_ARGS)
+{
+	int error, i, n;
+	struct inpcb *inp, **inp_list;
+	inp_gen_t gencnt;
+	struct xinpgen xig;
+
+	/*
+	 * The process of preparing the TCB list is too time-consuming and
+	 * resource-intensive to repeat twice on every request.
+	 */
+	if (req->oldptr == 0) {
+		n = V_divcbinfo.ipi_count;
+		n += imax(n / 8, 10);
+		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
+		return 0;
+	}
+
+	if (req->newptr != 0)
+		return EPERM;
+
+	/*
+	 * OK, now we're committed to doing something.
+	 */
+	INP_INFO_RLOCK(&V_divcbinfo);
+	gencnt = V_divcbinfo.ipi_gencnt;
+	n = V_divcbinfo.ipi_count;
+	INP_INFO_RUNLOCK(&V_divcbinfo);
+
+	error = sysctl_wire_old_buffer(req,
+	    2 * sizeof(xig) + n*sizeof(struct xinpcb));
+	if (error != 0)
+		return (error);
+
+	xig.xig_len = sizeof xig;
+	xig.xig_count = n;
+	xig.xig_gen = gencnt;
+	xig.xig_sogen = so_gencnt;
+	error = SYSCTL_OUT(req, &xig, sizeof xig);
+	if (error)
+		return error;
+
+	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
+	if (inp_list == 0)
+		return ENOMEM;
+	
+	INP_INFO_RLOCK(&V_divcbinfo);
+	for (inp = LIST_FIRST(V_divcbinfo.ipi_listhead), i = 0; inp && i < n;
+	     inp = LIST_NEXT(inp, inp_list)) {
+		INP_WLOCK(inp);
+		if (inp->inp_gencnt <= gencnt &&
+		    cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
+			in_pcbref(inp);
+			inp_list[i++] = inp;
+		}
+		INP_WUNLOCK(inp);
+	}
+	INP_INFO_RUNLOCK(&V_divcbinfo);
+	n = i;
+
+	error = 0;
+	for (i = 0; i < n; i++) {
+		inp = inp_list[i];
+		INP_RLOCK(inp);
+		if (inp->inp_gencnt <= gencnt) {
+			struct xinpcb xi;
+			bzero(&xi, sizeof(xi));
+			xi.xi_len = sizeof xi;
+			/* XXX should avoid extra copy */
+			bcopy(inp, &xi.xi_inp, sizeof *inp);
+			if (inp->inp_socket)
+				sotoxsocket(inp->inp_socket, &xi.xi_socket);
+			INP_RUNLOCK(inp);
+			error = SYSCTL_OUT(req, &xi, sizeof xi);
+		} else
+			INP_RUNLOCK(inp);
+	}
+	INP_INFO_WLOCK(&V_divcbinfo);
+	for (i = 0; i < n; i++) {
+		inp = inp_list[i];
+		INP_WLOCK(inp);
+		if (!in_pcbrele(inp))
+			INP_WUNLOCK(inp);
+	}
+	INP_INFO_WUNLOCK(&V_divcbinfo);
+
+	if (!error) {
+		/*
+		 * Give the user an updated idea of our state.
+		 * If the generation differs from what we told
+		 * her before, she knows that something happened
+		 * while we were processing this request, and it
+		 * might be necessary to retry.
+		 */
+		INP_INFO_RLOCK(&V_divcbinfo);
+		xig.xig_gen = V_divcbinfo.ipi_gencnt;
+		xig.xig_sogen = so_gencnt;
+		xig.xig_count = V_divcbinfo.ipi_count;
+		INP_INFO_RUNLOCK(&V_divcbinfo);
+		error = SYSCTL_OUT(req, &xig, sizeof xig);
+	}
+	free(inp_list, M_TEMP);
+	return error;
+}
+
+#ifdef SYSCTL_NODE
+SYSCTL_NODE(_net_inet, IPPROTO_DIVERT, divert, CTLFLAG_RW, 0, "IPDIVERT");
+SYSCTL_PROC(_net_inet_divert, OID_AUTO, pcblist, CTLFLAG_RD, 0, 0,
+	    div_pcblist, "S,xinpcb", "List of active divert sockets");
+#endif
+
+struct pr_usrreqs div_usrreqs = {
+	.pru_attach =		div_attach,
+	.pru_bind =		div_bind,
+	.pru_control =		in_control,
+	.pru_detach =		div_detach,
+	.pru_peeraddr =		in_getpeeraddr,
+	.pru_send =		div_send,
+	.pru_shutdown =		div_shutdown,
+	.pru_sockaddr =		in_getsockaddr,
+	.pru_sosetlabel =	in_pcbsosetlabel
+};
+
+struct protosw div_protosw = {
+	.pr_type =		SOCK_RAW,
+	.pr_protocol =		IPPROTO_DIVERT,
+	.pr_flags =		PR_ATOMIC|PR_ADDR,
+	.pr_input =		div_input,
+	.pr_ctlinput =		div_ctlinput,
+	.pr_ctloutput =		ip_ctloutput,
+	.pr_init =		div_init,
+#ifdef VIMAGE
+	.pr_destroy =		div_destroy,
+#endif
+	.pr_usrreqs =		&div_usrreqs
+};
+
+static int
+div_modevent(module_t mod, int type, void *unused)
+{
+	int err = 0;
+#ifndef VIMAGE
+	int n;
+#endif
+
+	switch (type) {
+	case MOD_LOAD:
+		/*
+		 * Protocol will be initialized by pf_proto_register().
+		 * We don't have to register ip_protox because we are not
+		 * a true IP protocol that goes over the wire.
+		 */
+		err = pf_proto_register(PF_INET, &div_protosw);
+		if (err != 0)
+			return (err);
+		ip_divert_ptr = divert_packet;
+		ip_divert_event_tag = EVENTHANDLER_REGISTER(maxsockets_change,
+		    div_zone_change, NULL, EVENTHANDLER_PRI_ANY);
+		break;
+	case MOD_QUIESCE:
+		/*
+		 * IPDIVERT may normally not be unloaded because of the
+		 * potential race conditions.  Tell kldunload we can't be
+		 * unloaded unless the unload is forced.
+		 */
+		err = EPERM;
+		break;
+	case MOD_UNLOAD:
+#ifdef VIMAGE
+		err = EPERM;
+		break;
+#else
+		/*
+		 * Forced unload.
+		 *
+		 * Module ipdivert can only be unloaded if no sockets are
+		 * connected.  Maybe this can be changed later to forcefully
+		 * disconnect any open sockets.
+		 *
+		 * XXXRW: Note that there is a slight race here, as a new
+		 * socket open request could be spinning on the lock and then
+		 * we destroy the lock.
+		 */
+		INP_INFO_WLOCK(&V_divcbinfo);
+		n = V_divcbinfo.ipi_count;
+		if (n != 0) {
+			err = EBUSY;
+			INP_INFO_WUNLOCK(&V_divcbinfo);
+			break;
+		}
+		ip_divert_ptr = NULL;
+		err = pf_proto_unregister(PF_INET, IPPROTO_DIVERT, SOCK_RAW);
+		INP_INFO_WUNLOCK(&V_divcbinfo);
+		div_destroy();
+		EVENTHANDLER_DEREGISTER(maxsockets_change, ip_divert_event_tag);
+		break;
+#endif /* !VIMAGE */
+	default:
+		err = EOPNOTSUPP;
+		break;
+	}
+	return err;
+}
+
+static moduledata_t ipdivertmod = {
+        "ipdivert",
+        div_modevent,
+        0
+};
+
+DECLARE_MODULE(ipdivert, ipdivertmod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
+MODULE_DEPEND(ipdivert, ipfw, 2, 2, 2);
+MODULE_VERSION(ipdivert, 1);
diff --git a/freebsd/sys/netinet/ip_divert.h b/freebsd/sys/netinet/ip_divert.h
new file mode 100644
index 00000000..eb9b33d4
--- /dev/null
+++ b/freebsd/sys/netinet/ip_divert.h
@@ -0,0 +1,55 @@
+/*-
+ * Copyright (c) 2003 Sam Leffler, Errno Consulting
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
+ *    redistribution must be conditioned upon including a substantially
+ *    similar Disclaimer requirement for further binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGES.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IP_DIVERT_HH_
+#define	_NETINET_IP_DIVERT_HH_
+
+/*
+ * divert has no custom kernel-userland API.
+ *
+ * All communication occurs through a sockaddr_in socket where
+ *
+ * kernel-->userland
+ *	sin_port = matching rule, host format;
+ * 	sin_addr = IN: first address of the incoming interface;
+ *		   OUT: INADDR_ANY
+ *	sin_zero = if fits, the interface name (max 7 bytes + NUL)
+ *
+ * userland->kernel
+ *	sin_port = restart-rule - 1, host order
+ *		(we restart at sin_port + 1)
+ *	sin_addr = IN: address of the incoming interface;
+ *		   OUT: INADDR_ANY
+ */
+#endif /* _NETINET_IP_DIVERT_HH_ */
diff --git a/freebsd/sys/netinet/ip_dummynet.h b/freebsd/sys/netinet/ip_dummynet.h
new file mode 100644
index 00000000..0bbc3263
--- /dev/null
+++ b/freebsd/sys/netinet/ip_dummynet.h
@@ -0,0 +1,263 @@
+/*-
+ * Copyright (c) 1998-2010 Luigi Rizzo, Universita` di Pisa
+ * Portions Copyright (c) 2000 Akamba Corp.
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_DUMMYNET_H
+#define _IP_DUMMYNET_H
+
+/*
+ * Definition of the kernel-userland API for dummynet.
+ *
+ * Setsockopt() and getsockopt() pass a batch of objects, each
+ * of them starting with a "struct dn_id" which should fully identify
+ * the object and its relation with others in the sequence.
+ * The first object in each request should have
+ *	 type= DN_CMD_*, id = DN_API_VERSION.
+ * For other objects, type and subtype specify the object, len indicates
+ * the total length including the header, and 'id' identifies the specific
+ * object.
+ *
+ * Most objects are numbered with an identifier in the range 1..65535.
+ * DN_MAX_ID indicates the first value outside the range.
+ */
+
+#define	DN_API_VERSION	12500000
+#define	DN_MAX_ID	0x10000
+
+struct dn_id {
+	uint16_t	len;	/* total obj len including this header */
+	uint8_t		type;
+	uint8_t		subtype;
+	uint32_t	id;	/* generic id */
+};
+
+/*
+ * These values are in the type field of struct dn_id.
+ * To preserve the ABI, never rearrange the list or delete
+ * entries with the exception of DN_LAST
+ */
+enum {
+	DN_NONE = 0,
+	DN_LINK = 1,
+	DN_FS,
+	DN_SCH,
+	DN_SCH_I,
+	DN_QUEUE,
+	DN_DELAY_LINE,
+	DN_PROFILE,
+	DN_FLOW,		/* struct dn_flow */
+	DN_TEXT,		/* opaque text is the object */
+
+	DN_CMD_CONFIG = 0x80,	/* objects follow */
+	DN_CMD_DELETE,		/* subtype + list of entries */
+	DN_CMD_GET,		/* subtype + list of entries */
+	DN_CMD_FLUSH,
+	/* for compatibility with FreeBSD 7.2/8 */
+	DN_COMPAT_PIPE,
+	DN_COMPAT_QUEUE,
+	DN_GET_COMPAT,
+
+	/* special commands for emulation of sysctl variables */
+	DN_SYSCTL_GET,
+	DN_SYSCTL_SET,
+
+	DN_LAST,
+} ;
+
+enum { /* subtype for schedulers, flowset and the like */
+	DN_SCHED_UNKNOWN = 0,
+	DN_SCHED_FIFO = 1,
+	DN_SCHED_WF2QP = 2,
+	/* others are in individual modules */
+} ;
+
+enum {	/* user flags */
+	DN_HAVE_MASK	= 0x0001,	/* fs or sched has a mask */
+	DN_NOERROR	= 0x0002,	/* do not report errors */
+	DN_QHT_HASH	= 0x0004,	/* qht is a hash table */
+	DN_QSIZE_BYTES	= 0x0008,	/* queue size is in bytes */
+	DN_HAS_PROFILE	= 0x0010,	/* a link has a profile */
+	DN_IS_RED	= 0x0020,
+	DN_IS_GENTLE_RED= 0x0040,
+	DN_PIPE_CMD	= 0x1000,	/* pipe config... */
+};
+
+/*
+ * link template.
+ */
+struct dn_link {
+	struct dn_id oid;
+
+    /*
+	 * Userland sets bw and delay in bits/s and milliseconds.
+	 * The kernel converts this back and forth to bits/tick and ticks.
+	 * XXX what about burst ?
+     */
+	int32_t		link_nr;
+	int		bandwidth;	/* bit/s or bits/tick.   */
+	int		delay;		/* ms and ticks */
+	uint64_t	burst;		/* scaled. bits*Hz  XXX */
+} ;
+
+/*
+ * A flowset, which is a template for flows. Contains parameters
+ * from the command line: id, target scheduler, queue sizes, plr,
+ * flow masks, buckets for the flow hash, and possibly scheduler-
+ * specific parameters (weight, quantum and so on).
+ */
+struct dn_fs {
+	struct dn_id oid;
+	uint32_t fs_nr;	/* the flowset number */
+	uint32_t flags;	/* userland flags */
+    int qsize ;			/* queue size in slots or bytes */
+	int32_t plr;	/* PLR, pkt loss rate (2^31-1 means 100%) */
+	uint32_t buckets;	/* buckets used for the queue hash table */
+
+    struct ipfw_flow_id flow_mask ;
+	uint32_t sched_nr;	/* the scheduler we attach to */
+	/* generic scheduler parameters. Leave them at -1 if unset.
+	 * Now we use 0: weight, 1: lmax, 2: priority
+	 */
+	int par[4];
+
+	/* RED/GRED parameters.
+	 * weight and probabilities are in the range 0..1 represented
+	 * in fixed point arithmetic with SCALE_RED decimal bits.
+	 */
+#define SCALE_RED               16
+#define SCALE(x)                ( (x) << SCALE_RED )
+#define SCALE_VAL(x)            ( (x) >> SCALE_RED )
+#define SCALE_MUL(x,y)          ( ( (x) * (y) ) >> SCALE_RED )
+    int w_q ;			/* queue weight (scaled) */
+    int max_th ;		/* maximum threshold for queue (scaled) */
+    int min_th ;		/* minimum threshold for queue (scaled) */
+    int max_p ;			/* maximum value for p_b (scaled) */
+
+};
+
+/*
+ * dn_flow collects flow_id and stats for queues and scheduler
+ * instances, and is used to pass these info to userland.
+ * oid.type/oid.subtype describe the object, oid.id is number
+ * of the parent object.
+ */
+struct dn_flow {
+	struct dn_id	oid;
+	struct ipfw_flow_id fid;
+	uint64_t	tot_pkts; /* statistics counters  */
+	uint64_t	tot_bytes;
+	uint32_t	length; /* Queue lenght, in packets */
+	uint32_t	len_bytes; /* Queue lenght, in bytes */
+	uint32_t	drops;
+};
+
+
+    /*
+ * Scheduler template, mostly indicating the name, number,
+ * sched_mask and buckets.
+     */
+struct dn_sch {
+	struct dn_id	oid;
+	uint32_t	sched_nr; /* N, scheduler number */
+	uint32_t	buckets; /* number of buckets for the instances */
+	uint32_t	flags;	/* have_mask, ... */
+
+	char name[16];	/* null terminated */
+	/* mask to select the appropriate scheduler instance */
+	struct ipfw_flow_id sched_mask; /* M */
+};
+
+
+/* A delay profile is attached to a link.
+ * Note that a profile, as any other object, cannot be longer than 2^16
+ */
+#define	ED_MAX_SAMPLES_NO	1024
+struct dn_profile {
+	struct dn_id	oid;
+    /* fields to simulate a delay profile */
+#define ED_MAX_NAME_LEN		32
+    char name[ED_MAX_NAME_LEN];
+	int		link_nr;
+    int loss_level;
+	int		bandwidth;	// XXX use link bandwidth?
+	int		samples_no;	/* actual length of samples[] */
+	int samples[ED_MAX_SAMPLES_NO]; /* may be shorter */
+};
+
+
+
+/*
+ * Overall structure of dummynet
+
+In dummynet, packets are selected with the firewall rules, and passed
+to two different objects: PIPE or QUEUE (bad name).
+
+A QUEUE defines a classifier, which groups packets into flows
+according to a 'mask', puts them into independent queues (one
+per flow) with configurable size and queue management policy,
+and passes flows to a scheduler:
+
+                 (flow_mask|sched_mask)  sched_mask
+	 +---------+   weight Wx  +-------------+
+         |         |->-[flow]-->--|             |-+
+    -->--| QUEUE x |   ...        |             | |
+         |         |->-[flow]-->--| SCHEDuler N | |
+	 +---------+              |             | |
+	     ...                  |             +--[LINK N]-->--
+	 +---------+   weight Wy  |             | +--[LINK N]-->--
+         |         |->-[flow]-->--|             | |
+    -->--| QUEUE y |   ...        |             | |
+         |         |->-[flow]-->--|             | |
+	 +---------+              +-------------+ |
+	                            +-------------+
+
+Many QUEUE objects can connect to the same scheduler, each
+QUEUE object can have its own set of parameters.
+
+In turn, the SCHEDuler 'forks' multiple instances according
+to a 'sched_mask', each instance manages its own set of queues
+and transmits on a private instance of a configurable LINK.
+
+A PIPE is a simplified version of the above, where there
+is no flow_mask, and each scheduler instance handles a single queue.
+
+The following data structures (visible from userland) describe
+the objects used by dummynet:
+
+ + dn_link, contains the main configuration parameters related
+   to delay and bandwidth;
+ + dn_profile describes a delay profile;
+ + dn_flow describes the flow status (flow id, statistics)
+   
+ + dn_sch describes a scheduler
+ + dn_fs describes a flowset (msk, weight, queue parameters)
+
+ *
+ */
+
+#endif /* _IP_DUMMYNET_H */
diff --git a/freebsd/sys/netinet/ip_ecn.c b/freebsd/sys/netinet/ip_ecn.c
new file mode 100644
index 00000000..97b32b2c
--- /dev/null
+++ b/freebsd/sys/netinet/ip_ecn.c
@@ -0,0 +1,194 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*	$KAME: ip_ecn.c,v 1.12 2002/01/07 11:34:47 kjc Exp $	*/
+
+/*-
+ * Copyright (C) 1999 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+/*
+ * ECN consideration on tunnel ingress/egress operation.
+ * http://www.aciri.org/floyd/papers/draft-ipsec-ecn-00.txt
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/errno.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#endif
+
+#include <freebsd/netinet/ip_ecn.h>
+#ifdef INET6
+#include <freebsd/netinet6/ip6_ecn.h>
+#endif
+
+/*
+ * ECN and TOS (or TCLASS) processing rules at tunnel encapsulation and
+ * decapsulation from RFC3168:
+ *
+ *                      Outer Hdr at                 Inner Hdr at
+ *                      Encapsulator                 Decapsulator
+ *   Header fields:     --------------------         ------------
+ *     DS Field         copied from inner hdr        no change
+ *     ECN Field        constructed by (I)           constructed by (E)
+ *
+ * ECN_ALLOWED (full functionality):
+ *    (I) if the ECN field in the inner header is set to CE, then set the
+ *    ECN field in the outer header to ECT(0).
+ *    otherwise, copy the ECN field to the outer header.
+ *
+ *    (E) if the ECN field in the outer header is set to CE and the ECN
+ *    field of the inner header is not-ECT, drop the packet.
+ *    if the ECN field in the inner header is set to ECT(0) or ECT(1)
+ *    and the ECN field in the outer header is set to CE, then copy CE to
+ *    the inner header.  otherwise, make no change to the inner header.
+ *
+ * ECN_FORBIDDEN (limited functionality):
+ *    (I) set the ECN field to not-ECT in the outer header.
+ *
+ *    (E) if the ECN field in the outer header is set to CE, drop the packet.
+ *    otherwise, make no change to the ECN field in the inner header.
+ *
+ * the drop rule is for backward compatibility and protection against
+ * erasure of CE.
+ */
+
+/*
+ * modify outer ECN (TOS) field on ingress operation (tunnel encapsulation).
+ */
+void
+ip_ecn_ingress(int mode, u_int8_t *outer, const u_int8_t *inner)
+{
+
+	if (!outer || !inner)
+		panic("NULL pointer passed to ip_ecn_ingress");
+
+	*outer = *inner;
+	switch (mode) {
+	case ECN_ALLOWED:		/* ECN allowed */
+		/*
+		 * full-functionality: if the inner is CE, set ECT(0)
+		 * to the outer.  otherwise, copy the ECN field.
+		 */
+		if ((*inner & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+			*outer &= ~IPTOS_ECN_ECT1;
+		break;
+	case ECN_FORBIDDEN:		/* ECN forbidden */
+		/*
+		 * limited-functionality: set not-ECT to the outer
+		 */
+		*outer &= ~IPTOS_ECN_MASK;
+		break;
+	case ECN_NOCARE:	/* no consideration to ECN */
+		break;
+	}
+}
+
+/*
+ * modify inner ECN (TOS) field on egress operation (tunnel decapsulation).
+ * the caller should drop the packet if the return value is 0.
+ */
+int
+ip_ecn_egress(int mode, const u_int8_t *outer, u_int8_t *inner)
+{
+
+	if (!outer || !inner)
+		panic("NULL pointer passed to ip_ecn_egress");
+
+	switch (mode) {
+	case ECN_ALLOWED:
+		/*
+		 * full-functionality: if the outer is CE and the inner is
+		 * not-ECT, should drop it.  otherwise, copy CE.
+		 */
+		if ((*outer & IPTOS_ECN_MASK) == IPTOS_ECN_CE) {
+			if ((*inner & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
+				return (0);
+			*inner |= IPTOS_ECN_CE;
+		}
+		break;
+	case ECN_FORBIDDEN:		/* ECN forbidden */
+		/*
+		 * limited-functionality: if the outer is CE, should drop it.
+		 * otherwise, leave the inner.
+		 */
+		if ((*outer & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+			return (0);
+		break;
+	case ECN_NOCARE:	/* no consideration to ECN */
+		break;
+	}
+	return (1);
+}
+
+#ifdef INET6
+void
+ip6_ecn_ingress(int mode, u_int32_t *outer, const u_int32_t *inner)
+{
+	u_int8_t outer8, inner8;
+
+	if (!outer || !inner)
+		panic("NULL pointer passed to ip6_ecn_ingress");
+
+	inner8 = (ntohl(*inner) >> 20) & 0xff;
+	ip_ecn_ingress(mode, &outer8, &inner8);
+	*outer &= ~htonl(0xff << 20);
+	*outer |= htonl((u_int32_t)outer8 << 20);
+}
+
+int
+ip6_ecn_egress(int mode, const u_int32_t *outer, u_int32_t *inner)
+{
+	u_int8_t outer8, inner8, oinner8;
+
+	if (!outer || !inner)
+		panic("NULL pointer passed to ip6_ecn_egress");
+
+	outer8 = (ntohl(*outer) >> 20) & 0xff;
+	inner8 = oinner8 = (ntohl(*inner) >> 20) & 0xff;
+	if (ip_ecn_egress(mode, &outer8, &inner8) == 0)
+		return (0);
+	if (inner8 != oinner8) {
+		*inner &= ~htonl(0xff << 20);
+		*inner |= htonl((u_int32_t)inner8 << 20);
+	}
+	return (1);
+}
+#endif
diff --git a/freebsd/sys/netinet/ip_ecn.h b/freebsd/sys/netinet/ip_ecn.h
new file mode 100644
index 00000000..271c8a47
--- /dev/null
+++ b/freebsd/sys/netinet/ip_ecn.h
@@ -0,0 +1,53 @@
+/*	$FreeBSD$	*/
+/*	$KAME: ip_ecn.h,v 1.8 2002/01/07 11:34:47 kjc Exp $	*/
+
+/*-
+ * Copyright (C) 1999 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+/*
+ * ECN consideration on tunnel ingress/egress operation.
+ * http://www.aciri.org/floyd/papers/draft-ipsec-ecn-00.txt
+ */
+
+#ifndef _NETINET_IP_ECN_HH_
+#define _NETINET_IP_ECN_HH_
+
+#if defined(_KERNEL) && !defined(_LKM)
+#include <freebsd/local/opt_inet.h>
+#endif
+
+#define ECN_ALLOWED	1	/* ECN allowed */
+#define ECN_FORBIDDEN	0	/* ECN forbidden */
+#define ECN_NOCARE	(-1)	/* no consideration to ECN */
+
+#ifdef _KERNEL
+extern void ip_ecn_ingress(int, u_int8_t *, const u_int8_t *);
+extern int ip_ecn_egress(int, const u_int8_t *, u_int8_t *);
+#endif
+#endif
diff --git a/freebsd/sys/netinet/ip_encap.c b/freebsd/sys/netinet/ip_encap.c
new file mode 100644
index 00000000..45b0593c
--- /dev/null
+++ b/freebsd/sys/netinet/ip_encap.c
@@ -0,0 +1,465 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*	$KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $	*/
+
+/*-
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * My grandfather said that there's a devil inside tunnelling technology...
+ *
+ * We have surprisingly many protocols that want packets with IP protocol
+ * #4 or #41.  Here's a list of protocols that want protocol #41:
+ *	RFC1933 configured tunnel
+ *	RFC1933 automatic tunnel
+ *	RFC2401 IPsec tunnel
+ *	RFC2473 IPv6 generic packet tunnelling
+ *	RFC2529 6over4 tunnel
+ *	mobile-ip6 (uses RFC2473)
+ *	RFC3056 6to4 tunnel
+ *	isatap tunnel
+ * Here's a list of protocol that want protocol #4:
+ *	RFC1853 IPv4-in-IPv4 tunnelling
+ *	RFC2003 IPv4 encapsulation within IPv4
+ *	RFC2344 reverse tunnelling for mobile-ip4
+ *	RFC2401 IPsec tunnel
+ * Well, what can I say.  They impose different en/decapsulation mechanism
+ * from each other, so they need separate protocol handler.  The only one
+ * we can easily determine by protocol # is IPsec, which always has
+ * AH/ESP/IPComp header right after outer IP header.
+ *
+ * So, clearly good old protosw does not work for protocol #4 and #41.
+ * The code will let you match protocol via src/dst address pair.
+ */
+/* XXX is M_NETADDR correct? */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_mrouting.h>
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/errno.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/queue.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/route.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_encap.h>
+
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#include <freebsd/netinet6/ip6_var.h>
+#include <freebsd/netinet6/ip6protosw.h>
+#endif
+
+#include <freebsd/machine/stdarg.h>
+
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/malloc.h>
+static MALLOC_DEFINE(M_NETADDR, "encap_export_host", "Export host address structure");
+
+static void encap_add(struct encaptab *);
+static int mask_match(const struct encaptab *, const struct sockaddr *,
+		const struct sockaddr *);
+static void encap_fillarg(struct mbuf *, const struct encaptab *);
+
+/*
+ * All global variables in ip_encap.c are locked using encapmtx.
+ */
+static struct mtx encapmtx;
+MTX_SYSINIT(encapmtx, &encapmtx, "encapmtx", MTX_DEF);
+LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(encaptab);
+
+/*
+ * We currently keey encap_init() for source code compatibility reasons --
+ * it's referenced by KAME pieces in netinet6.
+ */
+void
+encap_init(void)
+{
+}
+
+#ifdef INET
+void
+encap4_input(struct mbuf *m, int off)
+{
+	struct ip *ip;
+	int proto;
+	struct sockaddr_in s, d;
+	const struct protosw *psw;
+	struct encaptab *ep, *match;
+	int prio, matchprio;
+
+	ip = mtod(m, struct ip *);
+	proto = ip->ip_p;
+
+	bzero(&s, sizeof(s));
+	s.sin_family = AF_INET;
+	s.sin_len = sizeof(struct sockaddr_in);
+	s.sin_addr = ip->ip_src;
+	bzero(&d, sizeof(d));
+	d.sin_family = AF_INET;
+	d.sin_len = sizeof(struct sockaddr_in);
+	d.sin_addr = ip->ip_dst;
+
+	match = NULL;
+	matchprio = 0;
+	mtx_lock(&encapmtx);
+	LIST_FOREACH(ep, &encaptab, chain) {
+		if (ep->af != AF_INET)
+			continue;
+		if (ep->proto >= 0 && ep->proto != proto)
+			continue;
+		if (ep->func)
+			prio = (*ep->func)(m, off, proto, ep->arg);
+		else {
+			/*
+			 * it's inbound traffic, we need to match in reverse
+			 * order
+			 */
+			prio = mask_match(ep, (struct sockaddr *)&d,
+			    (struct sockaddr *)&s);
+		}
+
+		/*
+		 * We prioritize the matches by using bit length of the
+		 * matches.  mask_match() and user-supplied matching function
+		 * should return the bit length of the matches (for example,
+		 * if both src/dst are matched for IPv4, 64 should be returned).
+		 * 0 or negative return value means "it did not match".
+		 *
+		 * The question is, since we have two "mask" portion, we
+		 * cannot really define total order between entries.
+		 * For example, which of these should be preferred?
+		 * mask_match() returns 48 (32 + 16) for both of them.
+		 *	src=3ffe::/16, dst=3ffe:501::/32
+		 *	src=3ffe:501::/32, dst=3ffe::/16
+		 *
+		 * We need to loop through all the possible candidates
+		 * to get the best match - the search takes O(n) for
+		 * n attachments (i.e. interfaces).
+		 */
+		if (prio <= 0)
+			continue;
+		if (prio > matchprio) {
+			matchprio = prio;
+			match = ep;
+		}
+	}
+	mtx_unlock(&encapmtx);
+
+	if (match) {
+		/* found a match, "match" has the best one */
+		psw = match->psw;
+		if (psw && psw->pr_input) {
+			encap_fillarg(m, match);
+			(*psw->pr_input)(m, off);
+		} else
+			m_freem(m);
+		return;
+	}
+
+	/* last resort: inject to raw socket */
+	rip_input(m, off);
+}
+#endif
+
+#ifdef INET6
+int
+encap6_input(struct mbuf **mp, int *offp, int proto)
+{
+	struct mbuf *m = *mp;
+	struct ip6_hdr *ip6;
+	struct sockaddr_in6 s, d;
+	const struct ip6protosw *psw;
+	struct encaptab *ep, *match;
+	int prio, matchprio;
+
+	ip6 = mtod(m, struct ip6_hdr *);
+
+	bzero(&s, sizeof(s));
+	s.sin6_family = AF_INET6;
+	s.sin6_len = sizeof(struct sockaddr_in6);
+	s.sin6_addr = ip6->ip6_src;
+	bzero(&d, sizeof(d));
+	d.sin6_family = AF_INET6;
+	d.sin6_len = sizeof(struct sockaddr_in6);
+	d.sin6_addr = ip6->ip6_dst;
+
+	match = NULL;
+	matchprio = 0;
+	mtx_lock(&encapmtx);
+	LIST_FOREACH(ep, &encaptab, chain) {
+		if (ep->af != AF_INET6)
+			continue;
+		if (ep->proto >= 0 && ep->proto != proto)
+			continue;
+		if (ep->func)
+			prio = (*ep->func)(m, *offp, proto, ep->arg);
+		else {
+			/*
+			 * it's inbound traffic, we need to match in reverse
+			 * order
+			 */
+			prio = mask_match(ep, (struct sockaddr *)&d,
+			    (struct sockaddr *)&s);
+		}
+
+		/* see encap4_input() for issues here */
+		if (prio <= 0)
+			continue;
+		if (prio > matchprio) {
+			matchprio = prio;
+			match = ep;
+		}
+	}
+	mtx_unlock(&encapmtx);
+
+	if (match) {
+		/* found a match */
+		psw = (const struct ip6protosw *)match->psw;
+		if (psw && psw->pr_input) {
+			encap_fillarg(m, match);
+			return (*psw->pr_input)(mp, offp, proto);
+		} else {
+			m_freem(m);
+			return IPPROTO_DONE;
+		}
+	}
+
+	/* last resort: inject to raw socket */
+	return rip6_input(mp, offp, proto);
+}
+#endif
+
+/*lint -sem(encap_add, custodial(1)) */
+static void
+encap_add(struct encaptab *ep)
+{
+
+	mtx_assert(&encapmtx, MA_OWNED);
+	LIST_INSERT_HEAD(&encaptab, ep, chain);
+}
+
+/*
+ * sp (src ptr) is always my side, and dp (dst ptr) is always remote side.
+ * length of mask (sm and dm) is assumed to be same as sp/dp.
+ * Return value will be necessary as input (cookie) for encap_detach().
+ */
+const struct encaptab *
+encap_attach(int af, int proto, const struct sockaddr *sp,
+    const struct sockaddr *sm, const struct sockaddr *dp,
+    const struct sockaddr *dm, const struct protosw *psw, void *arg)
+{
+	struct encaptab *ep;
+
+	/* sanity check on args */
+	if (sp->sa_len > sizeof(ep->src) || dp->sa_len > sizeof(ep->dst))
+		return (NULL);
+	if (sp->sa_len != dp->sa_len)
+		return (NULL);
+	if (af != sp->sa_family || af != dp->sa_family)
+		return (NULL);
+
+	/* check if anyone have already attached with exactly same config */
+	mtx_lock(&encapmtx);
+	LIST_FOREACH(ep, &encaptab, chain) {
+		if (ep->af != af)
+			continue;
+		if (ep->proto != proto)
+			continue;
+		if (ep->src.ss_len != sp->sa_len ||
+		    bcmp(&ep->src, sp, sp->sa_len) != 0 ||
+		    bcmp(&ep->srcmask, sm, sp->sa_len) != 0)
+			continue;
+		if (ep->dst.ss_len != dp->sa_len ||
+		    bcmp(&ep->dst, dp, dp->sa_len) != 0 ||
+		    bcmp(&ep->dstmask, dm, dp->sa_len) != 0)
+			continue;
+
+		mtx_unlock(&encapmtx);
+		return (NULL);
+	}
+
+	ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT);	/*XXX*/
+	if (ep == NULL) {
+		mtx_unlock(&encapmtx);
+		return (NULL);
+	}
+	bzero(ep, sizeof(*ep));
+
+	ep->af = af;
+	ep->proto = proto;
+	bcopy(sp, &ep->src, sp->sa_len);
+	bcopy(sm, &ep->srcmask, sp->sa_len);
+	bcopy(dp, &ep->dst, dp->sa_len);
+	bcopy(dm, &ep->dstmask, dp->sa_len);
+	ep->psw = psw;
+	ep->arg = arg;
+
+	encap_add(ep);
+	mtx_unlock(&encapmtx);
+	return (ep);
+}
+
+const struct encaptab *
+encap_attach_func(int af, int proto,
+    int (*func)(const struct mbuf *, int, int, void *),
+    const struct protosw *psw, void *arg)
+{
+	struct encaptab *ep;
+
+	/* sanity check on args */
+	if (!func)
+		return (NULL);
+
+	ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT);	/*XXX*/
+	if (ep == NULL)
+		return (NULL);
+	bzero(ep, sizeof(*ep));
+
+	ep->af = af;
+	ep->proto = proto;
+	ep->func = func;
+	ep->psw = psw;
+	ep->arg = arg;
+
+	mtx_lock(&encapmtx);
+	encap_add(ep);
+	mtx_unlock(&encapmtx);
+	return (ep);
+}
+
+int
+encap_detach(const struct encaptab *cookie)
+{
+	const struct encaptab *ep = cookie;
+	struct encaptab *p;
+
+	mtx_lock(&encapmtx);
+	LIST_FOREACH(p, &encaptab, chain) {
+		if (p == ep) {
+			LIST_REMOVE(p, chain);
+			mtx_unlock(&encapmtx);
+			free(p, M_NETADDR);	/*XXX*/
+			return 0;
+		}
+	}
+	mtx_unlock(&encapmtx);
+
+	return EINVAL;
+}
+
+static int
+mask_match(const struct encaptab *ep, const struct sockaddr *sp,
+    const struct sockaddr *dp)
+{
+	struct sockaddr_storage s;
+	struct sockaddr_storage d;
+	int i;
+	const u_int8_t *p, *q;
+	u_int8_t *r;
+	int matchlen;
+
+	if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d))
+		return 0;
+	if (sp->sa_family != ep->af || dp->sa_family != ep->af)
+		return 0;
+	if (sp->sa_len != ep->src.ss_len || dp->sa_len != ep->dst.ss_len)
+		return 0;
+
+	matchlen = 0;
+
+	p = (const u_int8_t *)sp;
+	q = (const u_int8_t *)&ep->srcmask;
+	r = (u_int8_t *)&s;
+	for (i = 0 ; i < sp->sa_len; i++) {
+		r[i] = p[i] & q[i];
+		/* XXX estimate */
+		matchlen += (q[i] ? 8 : 0);
+	}
+
+	p = (const u_int8_t *)dp;
+	q = (const u_int8_t *)&ep->dstmask;
+	r = (u_int8_t *)&d;
+	for (i = 0 ; i < dp->sa_len; i++) {
+		r[i] = p[i] & q[i];
+		/* XXX rough estimate */
+		matchlen += (q[i] ? 8 : 0);
+	}
+
+	/* need to overwrite len/family portion as we don't compare them */
+	s.ss_len = sp->sa_len;
+	s.ss_family = sp->sa_family;
+	d.ss_len = dp->sa_len;
+	d.ss_family = dp->sa_family;
+
+	if (bcmp(&s, &ep->src, ep->src.ss_len) == 0 &&
+	    bcmp(&d, &ep->dst, ep->dst.ss_len) == 0) {
+		return matchlen;
+	} else
+		return 0;
+}
+
+static void
+encap_fillarg(struct mbuf *m, const struct encaptab *ep)
+{
+	struct m_tag *tag;
+
+	tag = m_tag_get(PACKET_TAG_ENCAP, sizeof (void*), M_NOWAIT);
+	if (tag) {
+		*(void**)(tag+1) = ep->arg;
+		m_tag_prepend(m, tag);
+	}
+}
+
+void *
+encap_getarg(struct mbuf *m)
+{
+	void *p = NULL;
+	struct m_tag *tag;
+
+	tag = m_tag_find(m, PACKET_TAG_ENCAP, NULL);
+	if (tag) {
+		p = *(void**)(tag+1);
+		m_tag_delete(m, tag);
+	}
+	return p;
+}
diff --git a/freebsd/sys/netinet/ip_encap.h b/freebsd/sys/netinet/ip_encap.h
new file mode 100644
index 00000000..44dd1a0d
--- /dev/null
+++ b/freebsd/sys/netinet/ip_encap.h
@@ -0,0 +1,64 @@
+/*	$FreeBSD$	*/
+/*	$KAME: ip_encap.h,v 1.7 2000/03/25 07:23:37 sumikawa Exp $	*/
+
+/*-
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _NETINET_IP_ENCAP_HH_
+#define _NETINET_IP_ENCAP_HH_
+
+#ifdef _KERNEL
+
+struct encaptab {
+	LIST_ENTRY(encaptab) chain;
+	int af;
+	int proto;			/* -1: don't care, I'll check myself */
+	struct sockaddr_storage src;	/* my addr */
+	struct sockaddr_storage srcmask;
+	struct sockaddr_storage dst;	/* remote addr */
+	struct sockaddr_storage dstmask;
+	int (*func)(const struct mbuf *, int, int, void *);
+	const struct protosw *psw;	/* only pr_input will be used */
+	void *arg;			/* passed via m->m_pkthdr.aux */
+};
+
+void	encap_init(void);
+void	encap4_input(struct mbuf *, int);
+int	encap6_input(struct mbuf **, int *, int);
+const struct encaptab *encap_attach(int, int, const struct sockaddr *,
+	const struct sockaddr *, const struct sockaddr *,
+	const struct sockaddr *, const struct protosw *, void *);
+const struct encaptab *encap_attach_func(int, int,
+	int (*)(const struct mbuf *, int, int, void *),
+	const struct protosw *, void *);
+int	encap_detach(const struct encaptab *);
+void	*encap_getarg(struct mbuf *);
+#endif
+
+#endif /*_NETINET_IP_ENCAP_HH_*/
diff --git a/freebsd/sys/netinet/ip_fastfwd.c b/freebsd/sys/netinet/ip_fastfwd.c
new file mode 100644
index 00000000..6d406b2b
--- /dev/null
+++ b/freebsd/sys/netinet/ip_fastfwd.c
@@ -0,0 +1,619 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2003 Andre Oppermann, Internet Business Solutions AG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * ip_fastforward gets its speed from processing the forwarded packet to
+ * completion (if_output on the other side) without any queues or netisr's.
+ * The receiving interface DMAs the packet into memory, the upper half of
+ * driver calls ip_fastforward, we do our routing table lookup and directly
+ * send it off to the outgoing interface, which DMAs the packet to the
+ * network card. The only part of the packet we touch with the CPU is the
+ * IP header (unless there are complex firewall rules touching other parts
+ * of the packet, but that is up to you). We are essentially limited by bus
+ * bandwidth and how fast the network card/driver can set up receives and
+ * transmits.
+ *
+ * We handle basic errors, IP header errors, checksum errors,
+ * destination unreachable, fragmentation and fragmentation needed and
+ * report them via ICMP to the sender.
+ *
+ * Else if something is not pure IPv4 unicast forwarding we fall back to
+ * the normal ip_input processing path. We should only be called from
+ * interfaces connected to the outside world.
+ *
+ * Firewalling is fully supported including divert, ipfw fwd and ipfilter
+ * ipnat and address rewrite.
+ *
+ * IPSEC is not supported if this host is a tunnel broker. IPSEC is
+ * supported for connections to/from local host.
+ *
+ * We try to do the least expensive (in CPU ops) checks and operations
+ * first to catch junk with as little overhead as possible.
+ * 
+ * We take full advantage of hardware support for IP checksum and
+ * fragmentation offloading.
+ *
+ * We don't do ICMP redirect in the fast forwarding path. I have had my own
+ * cases where two core routers with Zebra routing suite would send millions
+ * ICMP redirects to connected hosts if the destination router was not the
+ * default gateway. In one case it was filling the routing table of a host
+ * with approximately 300.000 cloned redirect entries until it ran out of
+ * kernel memory. However the networking code proved very robust and it didn't
+ * crash or fail in other ways.
+ */
+
+/*
+ * Many thanks to Matt Thomas of NetBSD for basic structure of ip_flow.c which
+ * is being followed here.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_ipfw.h>
+#include <freebsd/local/opt_ipstealth.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sysctl.h>
+
+#include <freebsd/net/pfil.h>
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/if_var.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_icmp.h>
+#include <freebsd/netinet/ip_options.h>
+
+#include <freebsd/machine/in_cksum.h>
+
+static VNET_DEFINE(int, ipfastforward_active);
+#define	V_ipfastforward_active		VNET(ipfastforward_active)
+
+SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, fastforwarding, CTLFLAG_RW,
+    &VNET_NAME(ipfastforward_active), 0, "Enable fast IP forwarding");
+
+static struct sockaddr_in *
+ip_findroute(struct route *ro, struct in_addr dest, struct mbuf *m)
+{
+	struct sockaddr_in *dst;
+	struct rtentry *rt;
+
+	/*
+	 * Find route to destination.
+	 */
+	bzero(ro, sizeof(*ro));
+	dst = (struct sockaddr_in *)&ro->ro_dst;
+	dst->sin_family = AF_INET;
+	dst->sin_len = sizeof(*dst);
+	dst->sin_addr.s_addr = dest.s_addr;
+	in_rtalloc_ign(ro, 0, M_GETFIB(m));
+
+	/*
+	 * Route there and interface still up?
+	 */
+	rt = ro->ro_rt;
+	if (rt && (rt->rt_flags & RTF_UP) &&
+	    (rt->rt_ifp->if_flags & IFF_UP) &&
+	    (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+		if (rt->rt_flags & RTF_GATEWAY)
+			dst = (struct sockaddr_in *)rt->rt_gateway;
+	} else {
+		IPSTAT_INC(ips_noroute);
+		IPSTAT_INC(ips_cantforward);
+		if (rt)
+			RTFREE(rt);
+		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
+		return NULL;
+	}
+	return dst;
+}
+
+/*
+ * Try to forward a packet based on the destination address.
+ * This is a fast path optimized for the plain forwarding case.
+ * If the packet is handled (and consumed) here then we return 1;
+ * otherwise 0 is returned and the packet should be delivered
+ * to ip_input for full processing.
+ */
+struct mbuf *
+ip_fastforward(struct mbuf *m)
+{
+	struct ip *ip;
+	struct mbuf *m0 = NULL;
+	struct route ro;
+	struct sockaddr_in *dst = NULL;
+	struct ifnet *ifp;
+	struct in_addr odest, dest;
+	u_short sum, ip_len;
+	int error = 0;
+	int hlen, mtu;
+#ifdef IPFIREWALL_FORWARD
+	struct m_tag *fwd_tag;
+#endif
+
+	/*
+	 * Are we active and forwarding packets?
+	 */
+	if (!V_ipfastforward_active || !V_ipforwarding)
+		return m;
+
+	M_ASSERTVALID(m);
+	M_ASSERTPKTHDR(m);
+
+	bzero(&ro, sizeof(ro));
+
+	/*
+	 * Step 1: check for packet drop conditions (and sanity checks)
+	 */
+
+	/*
+	 * Is entire packet big enough?
+	 */
+	if (m->m_pkthdr.len < sizeof(struct ip)) {
+		IPSTAT_INC(ips_tooshort);
+		goto drop;
+	}
+
+	/*
+	 * Is first mbuf large enough for ip header and is header present?
+	 */
+	if (m->m_len < sizeof (struct ip) &&
+	   (m = m_pullup(m, sizeof (struct ip))) == NULL) {
+		IPSTAT_INC(ips_toosmall);
+		return NULL;	/* mbuf already free'd */
+	}
+
+	ip = mtod(m, struct ip *);
+
+	/*
+	 * Is it IPv4?
+	 */
+	if (ip->ip_v != IPVERSION) {
+		IPSTAT_INC(ips_badvers);
+		goto drop;
+	}
+
+	/*
+	 * Is IP header length correct and is it in first mbuf?
+	 */
+	hlen = ip->ip_hl << 2;
+	if (hlen < sizeof(struct ip)) {	/* minimum header length */
+		IPSTAT_INC(ips_badhlen);
+		goto drop;
+	}
+	if (hlen > m->m_len) {
+		if ((m = m_pullup(m, hlen)) == NULL) {
+			IPSTAT_INC(ips_badhlen);
+			return NULL;	/* mbuf already free'd */
+		}
+		ip = mtod(m, struct ip *);
+	}
+
+	/*
+	 * Checksum correct?
+	 */
+	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED)
+		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
+	else {
+		if (hlen == sizeof(struct ip))
+			sum = in_cksum_hdr(ip);
+		else
+			sum = in_cksum(m, hlen);
+	}
+	if (sum) {
+		IPSTAT_INC(ips_badsum);
+		goto drop;
+	}
+
+	/*
+	 * Remember that we have checked the IP header and found it valid.
+	 */
+	m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID);
+
+	ip_len = ntohs(ip->ip_len);
+
+	/*
+	 * Is IP length longer than packet we have got?
+	 */
+	if (m->m_pkthdr.len < ip_len) {
+		IPSTAT_INC(ips_tooshort);
+		goto drop;
+	}
+
+	/*
+	 * Is packet longer than IP header tells us? If yes, truncate packet.
+	 */
+	if (m->m_pkthdr.len > ip_len) {
+		if (m->m_len == m->m_pkthdr.len) {
+			m->m_len = ip_len;
+			m->m_pkthdr.len = ip_len;
+		} else
+			m_adj(m, ip_len - m->m_pkthdr.len);
+	}
+
+	/*
+	 * Is packet from or to 127/8?
+	 */
+	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
+	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
+		IPSTAT_INC(ips_badaddr);
+		goto drop;
+	}
+
+#ifdef ALTQ
+	/*
+	 * Is packet dropped by traffic conditioner?
+	 */
+	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
+		goto drop;
+#endif
+
+	/*
+	 * Step 2: fallback conditions to normal ip_input path processing
+	 */
+
+	/*
+	 * Only IP packets without options
+	 */
+	if (ip->ip_hl != (sizeof(struct ip) >> 2)) {
+		if (ip_doopts == 1)
+			return m;
+		else if (ip_doopts == 2) {
+			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_FILTER_PROHIB,
+				0, 0);
+			return NULL;	/* mbuf already free'd */
+		}
+		/* else ignore IP options and continue */
+	}
+
+	/*
+	 * Only unicast IP, not from loopback, no L2 or IP broadcast,
+	 * no multicast, no INADDR_ANY
+	 *
+	 * XXX: Probably some of these checks could be direct drop
+	 * conditions.  However it is not clear whether there are some
+	 * hacks or obscure behaviours which make it neccessary to
+	 * let ip_input handle it.  We play safe here and let ip_input
+	 * deal with it until it is proven that we can directly drop it.
+	 */
+	if ((m->m_flags & (M_BCAST|M_MCAST)) ||
+	    (m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) ||
+	    ntohl(ip->ip_src.s_addr) == (u_long)INADDR_BROADCAST ||
+	    ntohl(ip->ip_dst.s_addr) == (u_long)INADDR_BROADCAST ||
+	    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
+	    IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
+	    IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) ||
+	    IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) ||
+	    ip->ip_src.s_addr == INADDR_ANY ||
+	    ip->ip_dst.s_addr == INADDR_ANY )
+		return m;
+
+	/*
+	 * Is it for a local address on this host?
+	 */
+	if (in_localip(ip->ip_dst))
+		return m;
+
+	IPSTAT_INC(ips_total);
+
+	/*
+	 * Step 3: incoming packet firewall processing
+	 */
+
+	/*
+	 * Convert to host representation
+	 */
+	ip->ip_len = ntohs(ip->ip_len);
+	ip->ip_off = ntohs(ip->ip_off);
+
+	odest.s_addr = dest.s_addr = ip->ip_dst.s_addr;
+
+	/*
+	 * Run through list of ipfilter hooks for input packets
+	 */
+	if (!PFIL_HOOKED(&V_inet_pfil_hook))
+		goto passin;
+
+	if (pfil_run_hooks(
+	    &V_inet_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN, NULL) ||
+	    m == NULL)
+		goto drop;
+
+	M_ASSERTVALID(m);
+	M_ASSERTPKTHDR(m);
+
+	ip = mtod(m, struct ip *);	/* m may have changed by pfil hook */
+	dest.s_addr = ip->ip_dst.s_addr;
+
+	/*
+	 * Destination address changed?
+	 */
+	if (odest.s_addr != dest.s_addr) {
+		/*
+		 * Is it now for a local address on this host?
+		 */
+		if (in_localip(dest))
+			goto forwardlocal;
+		/*
+		 * Go on with new destination address
+		 */
+	}
+#ifdef IPFIREWALL_FORWARD
+	if (m->m_flags & M_FASTFWD_OURS) {
+		/*
+		 * ipfw changed it for a local address on this host.
+		 */
+		goto forwardlocal;
+	}
+#endif /* IPFIREWALL_FORWARD */
+
+passin:
+	/*
+	 * Step 4: decrement TTL and look up route
+	 */
+
+	/*
+	 * Check TTL
+	 */
+#ifdef IPSTEALTH
+	if (!V_ipstealth) {
+#endif
+	if (ip->ip_ttl <= IPTTLDEC) {
+		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0);
+		return NULL;	/* mbuf already free'd */
+	}
+
+	/*
+	 * Decrement the TTL and incrementally change the IP header checksum.
+	 * Don't bother doing this with hw checksum offloading, it's faster
+	 * doing it right here.
+	 */
+	ip->ip_ttl -= IPTTLDEC;
+	if (ip->ip_sum >= (u_int16_t) ~htons(IPTTLDEC << 8))
+		ip->ip_sum -= ~htons(IPTTLDEC << 8);
+	else
+		ip->ip_sum += htons(IPTTLDEC << 8);
+#ifdef IPSTEALTH
+	}
+#endif
+
+	/*
+	 * Find route to destination.
+	 */
+	if ((dst = ip_findroute(&ro, dest, m)) == NULL)
+		return NULL;	/* icmp unreach already sent */
+	ifp = ro.ro_rt->rt_ifp;
+
+	/*
+	 * Immediately drop blackholed traffic, and directed broadcasts
+	 * for either the all-ones or all-zero subnet addresses on
+	 * locally attached networks.
+	 */
+	if ((ro.ro_rt->rt_flags & (RTF_BLACKHOLE|RTF_BROADCAST)) != 0)
+		goto drop;
+
+	/*
+	 * Step 5: outgoing firewall packet processing
+	 */
+
+	/*
+	 * Run through list of hooks for output packets.
+	 */
+	if (!PFIL_HOOKED(&V_inet_pfil_hook))
+		goto passout;
+
+	if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_OUT, NULL) || m == NULL) {
+		goto drop;
+	}
+
+	M_ASSERTVALID(m);
+	M_ASSERTPKTHDR(m);
+
+	ip = mtod(m, struct ip *);
+	dest.s_addr = ip->ip_dst.s_addr;
+
+	/*
+	 * Destination address changed?
+	 */
+#ifndef IPFIREWALL_FORWARD
+	if (odest.s_addr != dest.s_addr) {
+#else
+	fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
+	if (odest.s_addr != dest.s_addr || fwd_tag != NULL) {
+#endif /* IPFIREWALL_FORWARD */
+		/*
+		 * Is it now for a local address on this host?
+		 */
+#ifndef IPFIREWALL_FORWARD
+		if (in_localip(dest)) {
+#else
+		if (m->m_flags & M_FASTFWD_OURS || in_localip(dest)) {
+#endif /* IPFIREWALL_FORWARD */
+forwardlocal:
+			/*
+			 * Return packet for processing by ip_input().
+			 * Keep host byte order as expected at ip_input's
+			 * "ours"-label.
+			 */
+			m->m_flags |= M_FASTFWD_OURS;
+			if (ro.ro_rt)
+				RTFREE(ro.ro_rt);
+			return m;
+		}
+		/*
+		 * Redo route lookup with new destination address
+		 */
+#ifdef IPFIREWALL_FORWARD
+		if (fwd_tag) {
+			dest.s_addr = ((struct sockaddr_in *)
+				    (fwd_tag + 1))->sin_addr.s_addr;
+			m_tag_delete(m, fwd_tag);
+		}
+#endif /* IPFIREWALL_FORWARD */
+		RTFREE(ro.ro_rt);
+		if ((dst = ip_findroute(&ro, dest, m)) == NULL)
+			return NULL;	/* icmp unreach already sent */
+		ifp = ro.ro_rt->rt_ifp;
+	}
+
+passout:
+	/*
+	 * Step 6: send off the packet
+	 */
+
+	/*
+	 * Check if route is dampned (when ARP is unable to resolve)
+	 */
+	if ((ro.ro_rt->rt_flags & RTF_REJECT) &&
+	    (ro.ro_rt->rt_rmx.rmx_expire == 0 ||
+	    time_uptime < ro.ro_rt->rt_rmx.rmx_expire)) {
+		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
+		goto consumed;
+	}
+
+#ifndef ALTQ
+	/*
+	 * Check if there is enough space in the interface queue
+	 */
+	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
+	    ifp->if_snd.ifq_maxlen) {
+		IPSTAT_INC(ips_odropped);
+		/* would send source quench here but that is depreciated */
+		goto drop;
+	}
+#endif
+
+	/*
+	 * Check if media link state of interface is not down
+	 */
+	if (ifp->if_link_state == LINK_STATE_DOWN) {
+		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
+		goto consumed;
+	}
+
+	/*
+	 * Check if packet fits MTU or if hardware will fragment for us
+	 */
+	if (ro.ro_rt->rt_rmx.rmx_mtu)
+		mtu = min(ro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
+	else
+		mtu = ifp->if_mtu;
+
+	if (ip->ip_len <= mtu ||
+	    (ifp->if_hwassist & CSUM_FRAGMENT && (ip->ip_off & IP_DF) == 0)) {
+		/*
+		 * Restore packet header fields to original values
+		 */
+		ip->ip_len = htons(ip->ip_len);
+		ip->ip_off = htons(ip->ip_off);
+		/*
+		 * Send off the packet via outgoing interface
+		 */
+		error = (*ifp->if_output)(ifp, m,
+				(struct sockaddr *)dst, &ro);
+	} else {
+		/*
+		 * Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery
+		 */
+		if (ip->ip_off & IP_DF) {
+			IPSTAT_INC(ips_cantfrag);
+			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
+				0, mtu);
+			goto consumed;
+		} else {
+			/*
+			 * We have to fragment the packet
+			 */
+			m->m_pkthdr.csum_flags |= CSUM_IP;
+			/*
+			 * ip_fragment expects ip_len and ip_off in host byte
+			 * order but returns all packets in network byte order
+			 */
+			if (ip_fragment(ip, &m, mtu, ifp->if_hwassist,
+					(~ifp->if_hwassist & CSUM_DELAY_IP))) {
+				goto drop;
+			}
+			KASSERT(m != NULL, ("null mbuf and no error"));
+			/*
+			 * Send off the fragments via outgoing interface
+			 */
+			error = 0;
+			do {
+				m0 = m->m_nextpkt;
+				m->m_nextpkt = NULL;
+
+				error = (*ifp->if_output)(ifp, m,
+					(struct sockaddr *)dst, &ro);
+				if (error)
+					break;
+			} while ((m = m0) != NULL);
+			if (error) {
+				/* Reclaim remaining fragments */
+				for (m = m0; m; m = m0) {
+					m0 = m->m_nextpkt;
+					m_freem(m);
+				}
+			} else
+				IPSTAT_INC(ips_fragmented);
+		}
+	}
+
+	if (error != 0)
+		IPSTAT_INC(ips_odropped);
+	else {
+		ro.ro_rt->rt_rmx.rmx_pksent++;
+		IPSTAT_INC(ips_forward);
+		IPSTAT_INC(ips_fastforward);
+	}
+consumed:
+	RTFREE(ro.ro_rt);
+	return NULL;
+drop:
+	if (m)
+		m_freem(m);
+	if (ro.ro_rt)
+		RTFREE(ro.ro_rt);
+	return NULL;
+}
diff --git a/freebsd/sys/netinet/ip_fw.h b/freebsd/sys/netinet/ip_fw.h
new file mode 100644
index 00000000..cf5d8d03
--- /dev/null
+++ b/freebsd/sys/netinet/ip_fw.h
@@ -0,0 +1,579 @@
+/*-
+ * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IPFW2_H
+#define _IPFW2_H
+
+/*
+ * The default rule number.  By the design of ip_fw, the default rule
+ * is the last one, so its number can also serve as the highest number
+ * allowed for a rule.  The ip_fw code relies on both meanings of this
+ * constant. 
+ */
+#define	IPFW_DEFAULT_RULE	65535
+
+/*
+ * The number of ipfw tables.  The maximum allowed table number is the
+ * (IPFW_TABLES_MAX - 1).
+ */
+#define	IPFW_TABLES_MAX		128
+
+/*
+ * Most commands (queue, pipe, tag, untag, limit...) can have a 16-bit
+ * argument between 1 and 65534. The value 0 is unused, the value
+ * 65535 (IP_FW_TABLEARG) is used to represent 'tablearg', i.e. the
+ * can be 1..65534, or 65535 to indicate the use of a 'tablearg'
+ * result of the most recent table() lookup.
+ * Note that 16bit is only a historical limit, resulting from
+ * the use of a 16-bit fields for that value. In reality, we can have
+ * 2^32 pipes, queues, tag values and so on, and use 0 as a tablearg.
+ */
+#define	IPFW_ARG_MIN		1
+#define	IPFW_ARG_MAX		65534
+#define IP_FW_TABLEARG		65535	/* XXX should use 0 */
+
+/*
+ * The kernel representation of ipfw rules is made of a list of
+ * 'instructions' (for all practical purposes equivalent to BPF
+ * instructions), which specify which fields of the packet
+ * (or its metadata) should be analysed.
+ *
+ * Each instruction is stored in a structure which begins with
+ * "ipfw_insn", and can contain extra fields depending on the
+ * instruction type (listed below).
+ * Note that the code is written so that individual instructions
+ * have a size which is a multiple of 32 bits. This means that, if
+ * such structures contain pointers or other 64-bit entities,
+ * (there is just one instance now) they may end up unaligned on
+ * 64-bit architectures, so the must be handled with care.
+ *
+ * "enum ipfw_opcodes" are the opcodes supported. We can have up
+ * to 256 different opcodes. When adding new opcodes, they should
+ * be appended to the end of the opcode list before O_LAST_OPCODE,
+ * this will prevent the ABI from being broken, otherwise users
+ * will have to recompile ipfw(8) when they update the kernel.
+ */
+
+enum ipfw_opcodes {		/* arguments (4 byte each)	*/
+	O_NOP,
+
+	O_IP_SRC,		/* u32 = IP			*/
+	O_IP_SRC_MASK,		/* ip = IP/mask			*/
+	O_IP_SRC_ME,		/* none				*/
+	O_IP_SRC_SET,		/* u32=base, arg1=len, bitmap	*/
+
+	O_IP_DST,		/* u32 = IP			*/
+	O_IP_DST_MASK,		/* ip = IP/mask			*/
+	O_IP_DST_ME,		/* none				*/
+	O_IP_DST_SET,		/* u32=base, arg1=len, bitmap	*/
+
+	O_IP_SRCPORT,		/* (n)port list:mask 4 byte ea	*/
+	O_IP_DSTPORT,		/* (n)port list:mask 4 byte ea	*/
+	O_PROTO,		/* arg1=protocol		*/
+
+	O_MACADDR2,		/* 2 mac addr:mask		*/
+	O_MAC_TYPE,		/* same as srcport		*/
+
+	O_LAYER2,		/* none				*/
+	O_IN,			/* none				*/
+	O_FRAG,			/* none				*/
+
+	O_RECV,			/* none				*/
+	O_XMIT,			/* none				*/
+	O_VIA,			/* none				*/
+
+	O_IPOPT,		/* arg1 = 2*u8 bitmap		*/
+	O_IPLEN,		/* arg1 = len			*/
+	O_IPID,			/* arg1 = id			*/
+
+	O_IPTOS,		/* arg1 = id			*/
+	O_IPPRECEDENCE,		/* arg1 = precedence << 5	*/
+	O_IPTTL,		/* arg1 = TTL			*/
+
+	O_IPVER,		/* arg1 = version		*/
+	O_UID,			/* u32 = id			*/
+	O_GID,			/* u32 = id			*/
+	O_ESTAB,		/* none (tcp established)	*/
+	O_TCPFLAGS,		/* arg1 = 2*u8 bitmap		*/
+	O_TCPWIN,		/* arg1 = desired win		*/
+	O_TCPSEQ,		/* u32 = desired seq.		*/
+	O_TCPACK,		/* u32 = desired seq.		*/
+	O_ICMPTYPE,		/* u32 = icmp bitmap		*/
+	O_TCPOPTS,		/* arg1 = 2*u8 bitmap		*/
+
+	O_VERREVPATH,		/* none				*/
+	O_VERSRCREACH,		/* none				*/
+
+	O_PROBE_STATE,		/* none				*/
+	O_KEEP_STATE,		/* none				*/
+	O_LIMIT,		/* ipfw_insn_limit		*/
+	O_LIMIT_PARENT,		/* dyn_type, not an opcode.	*/
+
+	/*
+	 * These are really 'actions'.
+	 */
+
+	O_LOG,			/* ipfw_insn_log		*/
+	O_PROB,			/* u32 = match probability	*/
+
+	O_CHECK_STATE,		/* none				*/
+	O_ACCEPT,		/* none				*/
+	O_DENY,			/* none 			*/
+	O_REJECT,		/* arg1=icmp arg (same as deny)	*/
+	O_COUNT,		/* none				*/
+	O_SKIPTO,		/* arg1=next rule number	*/
+	O_PIPE,			/* arg1=pipe number		*/
+	O_QUEUE,		/* arg1=queue number		*/
+	O_DIVERT,		/* arg1=port number		*/
+	O_TEE,			/* arg1=port number		*/
+	O_FORWARD_IP,		/* fwd sockaddr			*/
+	O_FORWARD_MAC,		/* fwd mac			*/
+	O_NAT,                  /* nope                         */
+	O_REASS,                /* none                         */
+	
+	/*
+	 * More opcodes.
+	 */
+	O_IPSEC,		/* has ipsec history 		*/
+	O_IP_SRC_LOOKUP,	/* arg1=table number, u32=value	*/
+	O_IP_DST_LOOKUP,	/* arg1=table number, u32=value	*/
+	O_ANTISPOOF,		/* none				*/
+	O_JAIL,			/* u32 = id			*/
+	O_ALTQ,			/* u32 = altq classif. qid	*/
+	O_DIVERTED,		/* arg1=bitmap (1:loop, 2:out)	*/
+	O_TCPDATALEN,		/* arg1 = tcp data len		*/
+	O_IP6_SRC,		/* address without mask		*/
+	O_IP6_SRC_ME,		/* my addresses			*/
+	O_IP6_SRC_MASK,		/* address with the mask	*/
+	O_IP6_DST,
+	O_IP6_DST_ME,
+	O_IP6_DST_MASK,
+	O_FLOW6ID,		/* for flow id tag in the ipv6 pkt */
+	O_ICMP6TYPE,		/* icmp6 packet type filtering	*/
+	O_EXT_HDR,		/* filtering for ipv6 extension header */
+	O_IP6,
+
+	/*
+	 * actions for ng_ipfw
+	 */
+	O_NETGRAPH,		/* send to ng_ipfw		*/
+	O_NGTEE,		/* copy to ng_ipfw		*/
+
+	O_IP4,
+
+	O_UNREACH6,		/* arg1=icmpv6 code arg (deny)  */
+
+	O_TAG,   		/* arg1=tag number */
+	O_TAGGED,		/* arg1=tag number */
+
+	O_SETFIB,		/* arg1=FIB number */
+	O_FIB,			/* arg1=FIB desired fib number */
+
+	O_LAST_OPCODE		/* not an opcode!		*/
+};
+
+/*
+ * The extension header are filtered only for presence using a bit
+ * vector with a flag for each header.
+ */
+#define EXT_FRAGMENT	0x1
+#define EXT_HOPOPTS	0x2
+#define EXT_ROUTING	0x4
+#define EXT_AH		0x8
+#define EXT_ESP		0x10
+#define EXT_DSTOPTS	0x20
+#define EXT_RTHDR0		0x40
+#define EXT_RTHDR2		0x80
+
+/*
+ * Template for instructions.
+ *
+ * ipfw_insn is used for all instructions which require no operands,
+ * a single 16-bit value (arg1), or a couple of 8-bit values.
+ *
+ * For other instructions which require different/larger arguments
+ * we have derived structures, ipfw_insn_*.
+ *
+ * The size of the instruction (in 32-bit words) is in the low
+ * 6 bits of "len". The 2 remaining bits are used to implement
+ * NOT and OR on individual instructions. Given a type, you can
+ * compute the length to be put in "len" using F_INSN_SIZE(t)
+ *
+ * F_NOT	negates the match result of the instruction.
+ *
+ * F_OR		is used to build or blocks. By default, instructions
+ *		are evaluated as part of a logical AND. An "or" block
+ *		{ X or Y or Z } contains F_OR set in all but the last
+ *		instruction of the block. A match will cause the code
+ *		to skip past the last instruction of the block.
+ *
+ * NOTA BENE: in a couple of places we assume that
+ *	sizeof(ipfw_insn) == sizeof(u_int32_t)
+ * this needs to be fixed.
+ *
+ */
+typedef struct	_ipfw_insn {	/* template for instructions */
+	u_int8_t 	opcode;
+	u_int8_t	len;	/* number of 32-bit words */
+#define	F_NOT		0x80
+#define	F_OR		0x40
+#define	F_LEN_MASK	0x3f
+#define	F_LEN(cmd)	((cmd)->len & F_LEN_MASK)
+
+	u_int16_t	arg1;
+} ipfw_insn;
+
+/*
+ * The F_INSN_SIZE(type) computes the size, in 4-byte words, of
+ * a given type.
+ */
+#define	F_INSN_SIZE(t)	((sizeof (t))/sizeof(u_int32_t))
+
+/*
+ * This is used to store an array of 16-bit entries (ports etc.)
+ */
+typedef struct	_ipfw_insn_u16 {
+	ipfw_insn o;
+	u_int16_t ports[2];	/* there may be more */
+} ipfw_insn_u16;
+
+/*
+ * This is used to store an array of 32-bit entries
+ * (uid, single IPv4 addresses etc.)
+ */
+typedef struct	_ipfw_insn_u32 {
+	ipfw_insn o;
+	u_int32_t d[1];	/* one or more */
+} ipfw_insn_u32;
+
+/*
+ * This is used to store IP addr-mask pairs.
+ */
+typedef struct	_ipfw_insn_ip {
+	ipfw_insn o;
+	struct in_addr	addr;
+	struct in_addr	mask;
+} ipfw_insn_ip;
+
+/*
+ * This is used to forward to a given address (ip).
+ */
+typedef struct  _ipfw_insn_sa {
+	ipfw_insn o;
+	struct sockaddr_in sa;
+} ipfw_insn_sa;
+
+/*
+ * This is used for MAC addr-mask pairs.
+ */
+typedef struct	_ipfw_insn_mac {
+	ipfw_insn o;
+	u_char addr[12];	/* dst[6] + src[6] */
+	u_char mask[12];	/* dst[6] + src[6] */
+} ipfw_insn_mac;
+
+/*
+ * This is used for interface match rules (recv xx, xmit xx).
+ */
+typedef struct	_ipfw_insn_if {
+	ipfw_insn o;
+	union {
+		struct in_addr ip;
+		int glob;
+	} p;
+	char name[IFNAMSIZ];
+} ipfw_insn_if;
+
+/*
+ * This is used for storing an altq queue id number.
+ */
+typedef struct _ipfw_insn_altq {
+	ipfw_insn	o;
+	u_int32_t	qid;
+} ipfw_insn_altq;
+
+/*
+ * This is used for limit rules.
+ */
+typedef struct	_ipfw_insn_limit {
+	ipfw_insn o;
+	u_int8_t _pad;
+	u_int8_t limit_mask;	/* combination of DYN_* below	*/
+#define	DYN_SRC_ADDR	0x1
+#define	DYN_SRC_PORT	0x2
+#define	DYN_DST_ADDR	0x4
+#define	DYN_DST_PORT	0x8
+
+	u_int16_t conn_limit;
+} ipfw_insn_limit;
+
+/*
+ * This is used for log instructions.
+ */
+typedef struct  _ipfw_insn_log {
+        ipfw_insn o;
+	u_int32_t max_log;	/* how many do we log -- 0 = all */
+	u_int32_t log_left;	/* how many left to log 	*/
+} ipfw_insn_log;
+
+/*
+ * Data structures required by both ipfw(8) and ipfw(4) but not part of the
+ * management API are protected by IPFW_INTERNAL.
+ */
+#ifdef IPFW_INTERNAL
+/* Server pool support (LSNAT). */
+struct cfg_spool {
+	LIST_ENTRY(cfg_spool)   _next;          /* chain of spool instances */
+	struct in_addr          addr;
+	u_short                 port;
+};
+#endif
+
+/* Redirect modes id. */
+#define REDIR_ADDR      0x01
+#define REDIR_PORT      0x02
+#define REDIR_PROTO     0x04
+
+#ifdef IPFW_INTERNAL
+/* Nat redirect configuration. */
+struct cfg_redir {
+	LIST_ENTRY(cfg_redir)   _next;          /* chain of redir instances */
+	u_int16_t               mode;           /* type of redirect mode */
+	struct in_addr	        laddr;          /* local ip address */
+	struct in_addr	        paddr;          /* public ip address */
+	struct in_addr	        raddr;          /* remote ip address */
+	u_short                 lport;          /* local port */
+	u_short                 pport;          /* public port */
+	u_short                 rport;          /* remote port  */
+	u_short                 pport_cnt;      /* number of public ports */
+	u_short                 rport_cnt;      /* number of remote ports */
+	int                     proto;          /* protocol: tcp/udp */
+	struct alias_link       **alink;	
+	/* num of entry in spool chain */
+	u_int16_t               spool_cnt;      
+	/* chain of spool instances */
+	LIST_HEAD(spool_chain, cfg_spool) spool_chain;
+};
+#endif
+
+#define NAT_BUF_LEN     1024
+
+#ifdef IPFW_INTERNAL
+/* Nat configuration data struct. */
+struct cfg_nat {
+	/* chain of nat instances */
+	LIST_ENTRY(cfg_nat)     _next;
+	int                     id;                     /* nat id */
+	struct in_addr          ip;                     /* nat ip address */
+	char                    if_name[IF_NAMESIZE];   /* interface name */
+	int                     mode;                   /* aliasing mode */
+	struct libalias	        *lib;                   /* libalias instance */
+	/* number of entry in spool chain */
+	int                     redir_cnt;              
+	/* chain of redir instances */
+	LIST_HEAD(redir_chain, cfg_redir) redir_chain;  
+};
+#endif
+
+#define SOF_NAT         sizeof(struct cfg_nat)
+#define SOF_REDIR       sizeof(struct cfg_redir)
+#define SOF_SPOOL       sizeof(struct cfg_spool)
+
+/* Nat command. */
+typedef struct	_ipfw_insn_nat {
+ 	ipfw_insn	o;
+ 	struct cfg_nat *nat;	
+} ipfw_insn_nat;
+
+/* Apply ipv6 mask on ipv6 addr */
+#define APPLY_MASK(addr,mask)                          \
+    (addr)->__u6_addr.__u6_addr32[0] &= (mask)->__u6_addr.__u6_addr32[0]; \
+    (addr)->__u6_addr.__u6_addr32[1] &= (mask)->__u6_addr.__u6_addr32[1]; \
+    (addr)->__u6_addr.__u6_addr32[2] &= (mask)->__u6_addr.__u6_addr32[2]; \
+    (addr)->__u6_addr.__u6_addr32[3] &= (mask)->__u6_addr.__u6_addr32[3];
+
+/* Structure for ipv6 */
+typedef struct _ipfw_insn_ip6 {
+       ipfw_insn o;
+       struct in6_addr addr6;
+       struct in6_addr mask6;
+} ipfw_insn_ip6;
+
+/* Used to support icmp6 types */
+typedef struct _ipfw_insn_icmp6 {
+       ipfw_insn o;
+       uint32_t d[7]; /* XXX This number si related to the netinet/icmp6.h
+                       *     define ICMP6_MAXTYPE
+                       *     as follows: n = ICMP6_MAXTYPE/32 + 1
+                        *     Actually is 203 
+                       */
+} ipfw_insn_icmp6;
+
+/*
+ * Here we have the structure representing an ipfw rule.
+ *
+ * It starts with a general area (with link fields and counters)
+ * followed by an array of one or more instructions, which the code
+ * accesses as an array of 32-bit values.
+ *
+ * Given a rule pointer  r:
+ *
+ *  r->cmd		is the start of the first instruction.
+ *  ACTION_PTR(r)	is the start of the first action (things to do
+ *			once a rule matched).
+ *
+ * When assembling instruction, remember the following:
+ *
+ *  + if a rule has a "keep-state" (or "limit") option, then the
+ *	first instruction (at r->cmd) MUST BE an O_PROBE_STATE
+ *  + if a rule has a "log" option, then the first action
+ *	(at ACTION_PTR(r)) MUST be O_LOG
+ *  + if a rule has an "altq" option, it comes after "log"
+ *  + if a rule has an O_TAG option, it comes after "log" and "altq"
+ *
+ * NOTE: we use a simple linked list of rules because we never need
+ * 	to delete a rule without scanning the list. We do not use
+ *	queue(3) macros for portability and readability.
+ */
+
+struct ip_fw {
+	struct ip_fw	*x_next;	/* linked list of rules		*/
+	struct ip_fw	*next_rule;	/* ptr to next [skipto] rule	*/
+	/* 'next_rule' is used to pass up 'set_disable' status		*/
+
+	uint16_t	act_ofs;	/* offset of action in 32-bit units */
+	uint16_t	cmd_len;	/* # of 32-bit words in cmd	*/
+	uint16_t	rulenum;	/* rule number			*/
+	uint8_t	set;		/* rule set (0..31)		*/
+#define	RESVD_SET	31	/* set for default and persistent rules */
+	uint8_t		_pad;		/* padding			*/
+	uint32_t	id;		/* rule id */
+
+	/* These fields are present in all rules.			*/
+	uint64_t	pcnt;		/* Packet counter		*/
+	uint64_t	bcnt;		/* Byte counter			*/
+	uint32_t	timestamp;	/* tv_sec of last match		*/
+
+	ipfw_insn	cmd[1];		/* storage for commands		*/
+};
+
+#define ACTION_PTR(rule)				\
+	(ipfw_insn *)( (u_int32_t *)((rule)->cmd) + ((rule)->act_ofs) )
+
+#define RULESIZE(rule)  (sizeof(struct ip_fw) + \
+	((struct ip_fw *)(rule))->cmd_len * 4 - 4)
+
+#if 1 // should be moved to in.h
+/*
+ * This structure is used as a flow mask and a flow id for various
+ * parts of the code.
+ * addr_type is used in userland and kernel to mark the address type.
+ * fib is used in the kernel to record the fib in use.
+ * _flags is used in the kernel to store tcp flags for dynamic rules.
+ */
+struct ipfw_flow_id {
+	uint32_t	dst_ip;
+	uint32_t	src_ip;
+	uint16_t	dst_port;
+	uint16_t	src_port;
+	uint8_t		fib;
+	uint8_t		proto;
+	uint8_t		_flags;	/* protocol-specific flags */
+	uint8_t		addr_type; /* 4=ip4, 6=ip6, 1=ether ? */
+	struct in6_addr dst_ip6;
+	struct in6_addr src_ip6;
+	uint32_t	flow_id6;
+	uint32_t	extra; /* queue/pipe or frag_id */
+};
+#endif
+
+#define IS_IP6_FLOW_ID(id)	((id)->addr_type == 6)
+
+/*
+ * Dynamic ipfw rule.
+ */
+typedef struct _ipfw_dyn_rule ipfw_dyn_rule;
+
+struct _ipfw_dyn_rule {
+	ipfw_dyn_rule	*next;		/* linked list of rules.	*/
+	struct ip_fw *rule;		/* pointer to rule		*/
+	/* 'rule' is used to pass up the rule number (from the parent)	*/
+
+	ipfw_dyn_rule *parent;		/* pointer to parent rule	*/
+	u_int64_t	pcnt;		/* packet match counter		*/
+	u_int64_t	bcnt;		/* byte match counter		*/
+	struct ipfw_flow_id id;		/* (masked) flow id		*/
+	u_int32_t	expire;		/* expire time			*/
+	u_int32_t	bucket;		/* which bucket in hash table	*/
+	u_int32_t	state;		/* state of this rule (typically a
+					 * combination of TCP flags)
+					 */
+	u_int32_t	ack_fwd;	/* most recent ACKs in forward	*/
+	u_int32_t	ack_rev;	/* and reverse directions (used	*/
+					/* to generate keepalives)	*/
+	u_int16_t	dyn_type;	/* rule type			*/
+	u_int16_t	count;		/* refcount			*/
+};
+
+/*
+ * Definitions for IP option names.
+ */
+#define	IP_FW_IPOPT_LSRR	0x01
+#define	IP_FW_IPOPT_SSRR	0x02
+#define	IP_FW_IPOPT_RR		0x04
+#define	IP_FW_IPOPT_TS		0x08
+
+/*
+ * Definitions for TCP option names.
+ */
+#define	IP_FW_TCPOPT_MSS	0x01
+#define	IP_FW_TCPOPT_WINDOW	0x02
+#define	IP_FW_TCPOPT_SACK	0x04
+#define	IP_FW_TCPOPT_TS		0x08
+#define	IP_FW_TCPOPT_CC		0x10
+
+#define	ICMP_REJECT_RST		0x100	/* fake ICMP code (send a TCP RST) */
+#define	ICMP6_UNREACH_RST	0x100	/* fake ICMPv6 code (send a TCP RST) */
+
+/*
+ * These are used for lookup tables.
+ */
+typedef struct	_ipfw_table_entry {
+	in_addr_t	addr;		/* network address		*/
+	u_int32_t	value;		/* value			*/
+	u_int16_t	tbl;		/* table number			*/
+	u_int8_t	masklen;	/* mask length			*/
+} ipfw_table_entry;
+
+typedef struct	_ipfw_table {
+	u_int32_t	size;		/* size of entries in bytes	*/
+	u_int32_t	cnt;		/* # of entries			*/
+	u_int16_t	tbl;		/* table number			*/
+	ipfw_table_entry ent[0];	/* entries			*/
+} ipfw_table;
+
+#endif /* _IPFW2_H */
diff --git a/freebsd/sys/netinet/ip_gre.c b/freebsd/sys/netinet/ip_gre.c
new file mode 100644
index 00000000..253376de
--- /dev/null
+++ b/freebsd/sys/netinet/ip_gre.c
@@ -0,0 +1,336 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*	$NetBSD: ip_gre.c,v 1.29 2003/09/05 23:02:43 itojun Exp $ */
+
+/*-
+ * Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Heiko W.Rupp <hwr@pilhuhn.de>
+ *
+ * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * deencapsulate tunneled packets and send them on
+ * output half is in net/if_gre.[ch]
+ * This currently handles IPPROTO_GRE, IPPROTO_MOBILE
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_atalk.h>
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/errno.h>
+#include <freebsd/sys/time.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/net/bpf.h>
+#include <freebsd/net/ethernet.h>
+#include <freebsd/net/if.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/raw_cb.h>
+
+#ifdef INET
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_gre.h>
+#include <freebsd/machine/in_cksum.h>
+#else
+#error ip_gre input without IP?
+#endif
+
+#ifdef NETATALK
+#include <freebsd/netatalk/at.h>
+#include <freebsd/netatalk/at_var.h>
+#include <freebsd/netatalk/at_extern.h>
+#endif
+
+/* Needs IP headers. */
+#include <freebsd/net/if_gre.h>
+
+#include <freebsd/machine/stdarg.h>
+
+#if 1
+void gre_inet_ntoa(struct in_addr in);	/* XXX */
+#endif
+
+static struct gre_softc *gre_lookup(struct mbuf *, u_int8_t);
+
+static struct mbuf *gre_input2(struct mbuf *, int, u_char);
+
+/*
+ * De-encapsulate a packet and feed it back through ip input (this
+ * routine is called whenever IP gets a packet with proto type
+ * IPPROTO_GRE and a local destination address).
+ * This really is simple
+ */
+void
+gre_input(struct mbuf *m, int off)
+{
+	int proto;
+
+	proto = (mtod(m, struct ip *))->ip_p;
+
+	m = gre_input2(m, off, proto);
+
+	/*
+	 * If no matching tunnel that is up is found. We inject
+	 * the mbuf to raw ip socket to see if anyone picks it up.
+	 */
+	if (m != NULL)
+		rip_input(m, off);
+}
+
+/*
+ * Decapsulate. Does the real work and is called from gre_input()
+ * (above). Returns an mbuf back if packet is not yet processed,
+ * and NULL if it needs no further processing. proto is the protocol
+ * number of the "calling" foo_input() routine.
+ */
+static struct mbuf *
+gre_input2(struct mbuf *m ,int hlen, u_char proto)
+{
+	struct greip *gip;
+	int isr;
+	struct gre_softc *sc;
+	u_int16_t flags;
+	u_int32_t af;
+
+	if ((sc = gre_lookup(m, proto)) == NULL) {
+		/* No matching tunnel or tunnel is down. */
+		return (m);
+	}
+
+	if (m->m_len < sizeof(*gip)) {
+		m = m_pullup(m, sizeof(*gip));
+		if (m == NULL)
+			return (NULL);
+	}
+	gip = mtod(m, struct greip *);
+
+	GRE2IFP(sc)->if_ipackets++;
+	GRE2IFP(sc)->if_ibytes += m->m_pkthdr.len;
+
+	switch (proto) {
+	case IPPROTO_GRE:
+		hlen += sizeof(struct gre_h);
+
+		/* process GRE flags as packet can be of variable len */
+		flags = ntohs(gip->gi_flags);
+
+		/* Checksum & Offset are present */
+		if ((flags & GRE_CP) | (flags & GRE_RP))
+			hlen += 4;
+		/* We don't support routing fields (variable length) */
+		if (flags & GRE_RP)
+			return (m);
+		if (flags & GRE_KP)
+			hlen += 4;
+		if (flags & GRE_SP)
+			hlen += 4;
+
+		switch (ntohs(gip->gi_ptype)) { /* ethertypes */
+		case WCCP_PROTOCOL_TYPE:
+			if (sc->wccp_ver == WCCP_V2)
+				hlen += 4;
+			/* FALLTHROUGH */
+		case ETHERTYPE_IP:	/* shouldn't need a schednetisr(), */
+			isr = NETISR_IP;/* as we are in ip_input */
+			af = AF_INET;
+			break;
+#ifdef INET6
+		case ETHERTYPE_IPV6:
+			isr = NETISR_IPV6;
+			af = AF_INET6;
+			break;
+#endif
+#ifdef NETATALK
+		case ETHERTYPE_ATALK:
+			isr = NETISR_ATALK1;
+			af = AF_APPLETALK;
+			break;
+#endif
+		default:
+			/* Others not yet supported. */
+			return (m);
+		}
+		break;
+	default:
+		/* Others not yet supported. */
+		return (m);
+	}
+
+	if (hlen > m->m_pkthdr.len) {
+		m_freem(m);
+		return (NULL);
+	}
+	/* Unlike NetBSD, in FreeBSD m_adj() adjusts m->m_pkthdr.len as well */
+	m_adj(m, hlen);
+
+	if (bpf_peers_present(GRE2IFP(sc)->if_bpf)) {
+		bpf_mtap2(GRE2IFP(sc)->if_bpf, &af, sizeof(af), m);
+	}
+
+	m->m_pkthdr.rcvif = GRE2IFP(sc);
+
+	netisr_queue(isr, m);
+
+	/* Packet is done, no further processing needed. */
+	return (NULL);
+}
+
+/*
+ * input routine for IPPRPOTO_MOBILE
+ * This is a little bit diffrent from the other modes, as the
+ * encapsulating header was not prepended, but instead inserted
+ * between IP header and payload
+ */
+
+void
+gre_mobile_input(struct mbuf *m, int hlen)
+{
+	struct ip *ip;
+	struct mobip_h *mip;
+	struct gre_softc *sc;
+	int msiz;
+
+	if ((sc = gre_lookup(m, IPPROTO_MOBILE)) == NULL) {
+		/* No matching tunnel or tunnel is down. */
+		m_freem(m);
+		return;
+	}
+
+	if (m->m_len < sizeof(*mip)) {
+		m = m_pullup(m, sizeof(*mip));
+		if (m == NULL)
+			return;
+	}
+	ip = mtod(m, struct ip *);
+	mip = mtod(m, struct mobip_h *);
+
+	GRE2IFP(sc)->if_ipackets++;
+	GRE2IFP(sc)->if_ibytes += m->m_pkthdr.len;
+
+	if (ntohs(mip->mh.proto) & MOB_HH_SBIT) {
+		msiz = MOB_HH_SIZ_L;
+		mip->mi.ip_src.s_addr = mip->mh.osrc;
+	} else
+		msiz = MOB_HH_SIZ_S;
+
+	if (m->m_len < (ip->ip_hl << 2) + msiz) {
+		m = m_pullup(m, (ip->ip_hl << 2) + msiz);
+		if (m == NULL)
+			return;
+		ip = mtod(m, struct ip *);
+		mip = mtod(m, struct mobip_h *);
+	}
+
+	mip->mi.ip_dst.s_addr = mip->mh.odst;
+	mip->mi.ip_p = (ntohs(mip->mh.proto) >> 8);
+
+	if (gre_in_cksum((u_int16_t *)&mip->mh, msiz) != 0) {
+		m_freem(m);
+		return;
+	}
+
+	bcopy((caddr_t)(ip) + (ip->ip_hl << 2) + msiz, (caddr_t)(ip) +
+	    (ip->ip_hl << 2), m->m_len - msiz - (ip->ip_hl << 2));
+	m->m_len -= msiz;
+	m->m_pkthdr.len -= msiz;
+
+	/*
+	 * On FreeBSD, rip_input() supplies us with ip->ip_len
+	 * already converted into host byteorder and also decreases
+	 * it by the lengh of IP header, however, ip_input() expects
+	 * that this field is in the original format (network byteorder
+	 * and full size of IP packet), so that adjust accordingly.
+	 */
+	ip->ip_len = htons(ip->ip_len + sizeof(struct ip) - msiz);
+
+	ip->ip_sum = 0;
+	ip->ip_sum = in_cksum(m, (ip->ip_hl << 2));
+
+	if (bpf_peers_present(GRE2IFP(sc)->if_bpf)) {
+		u_int32_t af = AF_INET;
+		bpf_mtap2(GRE2IFP(sc)->if_bpf, &af, sizeof(af), m);
+	}
+
+	m->m_pkthdr.rcvif = GRE2IFP(sc);
+
+	netisr_queue(NETISR_IP, m);
+}
+
+/*
+ * Find the gre interface associated with our src/dst/proto set.
+ *
+ * XXXRW: Need some sort of drain/refcount mechanism so that the softc
+ * reference remains valid after it's returned from gre_lookup().  Right
+ * now, I'm thinking it should be reference-counted with a gre_dropref()
+ * when the caller is done with the softc.  This is complicated by how
+ * to handle destroying the gre softc; probably using a gre_drain() in
+ * in_gre.c during destroy.
+ */
+static struct gre_softc *
+gre_lookup(struct mbuf *m, u_int8_t proto)
+{
+	struct ip *ip = mtod(m, struct ip *);
+	struct gre_softc *sc;
+
+	mtx_lock(&gre_mtx);
+	for (sc = LIST_FIRST(&gre_softc_list); sc != NULL;
+	     sc = LIST_NEXT(sc, sc_list)) {
+		if ((sc->g_dst.s_addr == ip->ip_src.s_addr) &&
+		    (sc->g_src.s_addr == ip->ip_dst.s_addr) &&
+		    (sc->g_proto == proto) &&
+		    ((GRE2IFP(sc)->if_flags & IFF_UP) != 0)) {
+			mtx_unlock(&gre_mtx);
+			return (sc);
+		}
+	}
+	mtx_unlock(&gre_mtx);
+
+	return (NULL);
+}
diff --git a/freebsd/sys/netinet/ip_gre.h b/freebsd/sys/netinet/ip_gre.h
new file mode 100644
index 00000000..1fb67d93
--- /dev/null
+++ b/freebsd/sys/netinet/ip_gre.h
@@ -0,0 +1,43 @@
+/*	$NetBSD: ip_gre.h,v 1.5 2002/06/09 16:33:40 itojun Exp $ */
+/*	 $FreeBSD$ */
+
+/*-
+ * Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Heiko W.Rupp <hwr@pilhuhn.de>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef _KERNEL
+void gre_input(struct mbuf *, int);
+void gre_mobile_input(struct mbuf *, int);
+#endif /* _KERNEL */
diff --git a/freebsd/sys/netinet/ip_icmp.c b/freebsd/sys/netinet/ip_icmp.c
new file mode 100644
index 00000000..b7a83128
--- /dev/null
+++ b/freebsd/sys/netinet/ip_icmp.c
@@ -0,0 +1,986 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_ipsec.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/time.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/syslog.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_icmp.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_options.h>
+#include <freebsd/netinet/tcp.h>
+#include <freebsd/netinet/tcp_var.h>
+#include <freebsd/netinet/tcpip.h>
+#include <freebsd/netinet/icmp_var.h>
+
+#ifdef IPSEC
+#include <freebsd/netipsec/ipsec.h>
+#include <freebsd/netipsec/key.h>
+#endif
+
+#include <freebsd/machine/in_cksum.h>
+
+#include <freebsd/security/mac/mac_framework.h>
+
+/*
+ * ICMP routines: error generation, receive packet processing, and
+ * routines to turnaround packets back to the originator, and
+ * host table maintenance routines.
+ */
+VNET_DEFINE(struct icmpstat, icmpstat);
+SYSCTL_VNET_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RW,
+	&VNET_NAME(icmpstat), icmpstat, "");
+
+static VNET_DEFINE(int, icmpmaskrepl) = 0;
+#define	V_icmpmaskrepl			VNET(icmpmaskrepl)
+SYSCTL_VNET_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW,
+	&VNET_NAME(icmpmaskrepl), 0,
+	"Reply to ICMP Address Mask Request packets.");
+
+static VNET_DEFINE(u_int, icmpmaskfake) = 0;
+#define	V_icmpmaskfake			VNET(icmpmaskfake)
+SYSCTL_VNET_UINT(_net_inet_icmp, OID_AUTO, maskfake, CTLFLAG_RW,
+	&VNET_NAME(icmpmaskfake), 0,
+	"Fake reply to ICMP Address Mask Request packets.");
+
+static VNET_DEFINE(int, drop_redirect) = 0;
+#define	V_drop_redirect			VNET(drop_redirect)
+SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW,
+	&VNET_NAME(drop_redirect), 0,
+	"Ignore ICMP redirects");
+
+static VNET_DEFINE(int, log_redirect) = 0;
+#define	V_log_redirect			VNET(log_redirect)
+SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW,
+	&VNET_NAME(log_redirect), 0,
+	"Log ICMP redirects to the console");
+
+static VNET_DEFINE(int, icmplim) = 200;
+#define	V_icmplim			VNET(icmplim)
+SYSCTL_VNET_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW,
+	&VNET_NAME(icmplim), 0,
+	"Maximum number of ICMP responses per second");
+
+static VNET_DEFINE(int, icmplim_output) = 1;
+#define	V_icmplim_output		VNET(icmplim_output)
+SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_RW,
+	&VNET_NAME(icmplim_output), 0,
+	"Enable rate limiting of ICMP responses");
+
+static VNET_DEFINE(char, reply_src[IFNAMSIZ]);
+#define	V_reply_src			VNET(reply_src)
+SYSCTL_VNET_STRING(_net_inet_icmp, OID_AUTO, reply_src, CTLFLAG_RW,
+	&VNET_NAME(reply_src), IFNAMSIZ,
+	"icmp reply source for non-local packets.");
+
+static VNET_DEFINE(int, icmp_rfi) = 0;
+#define	V_icmp_rfi			VNET(icmp_rfi)
+SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, reply_from_interface, CTLFLAG_RW,
+	&VNET_NAME(icmp_rfi), 0,
+	"ICMP reply from incoming interface for non-local packets");
+
+static VNET_DEFINE(int, icmp_quotelen) = 8;
+#define	V_icmp_quotelen			VNET(icmp_quotelen)
+SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, quotelen, CTLFLAG_RW,
+	&VNET_NAME(icmp_quotelen), 0,
+	"Number of bytes from original packet to quote in ICMP reply");
+
+/*
+ * ICMP broadcast echo sysctl
+ */
+static VNET_DEFINE(int, icmpbmcastecho) = 0;
+#define	V_icmpbmcastecho		VNET(icmpbmcastecho)
+SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW,
+	&VNET_NAME(icmpbmcastecho), 0,
+	"");
+
+
+#ifdef ICMPPRINTFS
+int	icmpprintfs = 0;
+#endif
+
+static void	icmp_reflect(struct mbuf *);
+static void	icmp_send(struct mbuf *, struct mbuf *);
+
+extern	struct protosw inetsw[];
+
+/*
+ * Kernel module interface for updating icmpstat.  The argument is an index
+ * into icmpstat treated as an array of u_long.  While this encodes the
+ * general layout of icmpstat into the caller, it doesn't encode its
+ * location, so that future changes to add, for example, per-CPU stats
+ * support won't cause binary compatibility problems for kernel modules.
+ */
+void
+kmod_icmpstat_inc(int statnum)
+{
+
+	(*((u_long *)&V_icmpstat + statnum))++;
+}
+
+/*
+ * Generate an error packet of type error
+ * in response to bad packet ip.
+ */
+void
+icmp_error(struct mbuf *n, int type, int code, uint32_t dest, int mtu)
+{
+	register struct ip *oip = mtod(n, struct ip *), *nip;
+	register unsigned oiphlen = oip->ip_hl << 2;
+	register struct icmp *icp;
+	register struct mbuf *m;
+	unsigned icmplen, icmpelen, nlen;
+
+	KASSERT((u_int)type <= ICMP_MAXTYPE, ("%s: illegal ICMP type", __func__));
+#ifdef ICMPPRINTFS
+	if (icmpprintfs)
+		printf("icmp_error(%p, %x, %d)\n", oip, type, code);
+#endif
+	if (type != ICMP_REDIRECT)
+		ICMPSTAT_INC(icps_error);
+	/*
+	 * Don't send error:
+	 *  if the original packet was encrypted.
+	 *  if not the first fragment of message.
+	 *  in response to a multicast or broadcast packet.
+	 *  if the old packet protocol was an ICMP error message.
+	 */
+	if (n->m_flags & M_DECRYPTED)
+		goto freeit;
+	if (oip->ip_off & ~(IP_MF|IP_DF))
+		goto freeit;
+	if (n->m_flags & (M_BCAST|M_MCAST))
+		goto freeit;
+	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
+	  n->m_len >= oiphlen + ICMP_MINLEN &&
+	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiphlen))->icmp_type)) {
+		ICMPSTAT_INC(icps_oldicmp);
+		goto freeit;
+	}
+	/* Drop if IP header plus 8 bytes is not contignous in first mbuf. */
+	if (oiphlen + 8 > n->m_len)
+		goto freeit;
+	/*
+	 * Calculate length to quote from original packet and
+	 * prevent the ICMP mbuf from overflowing.
+	 * Unfortunatly this is non-trivial since ip_forward()
+	 * sends us truncated packets.
+	 */
+	nlen = m_length(n, NULL);
+	if (oip->ip_p == IPPROTO_TCP) {
+		struct tcphdr *th;
+		int tcphlen;
+
+		if (oiphlen + sizeof(struct tcphdr) > n->m_len &&
+		    n->m_next == NULL)
+			goto stdreply;
+		if (n->m_len < oiphlen + sizeof(struct tcphdr) &&
+		    ((n = m_pullup(n, oiphlen + sizeof(struct tcphdr))) == NULL))
+			goto freeit;
+		th = (struct tcphdr *)((caddr_t)oip + oiphlen);
+		tcphlen = th->th_off << 2;
+		if (tcphlen < sizeof(struct tcphdr))
+			goto freeit;
+		if (oip->ip_len < oiphlen + tcphlen)
+			goto freeit;
+		if (oiphlen + tcphlen > n->m_len && n->m_next == NULL)
+			goto stdreply;
+		if (n->m_len < oiphlen + tcphlen && 
+		    ((n = m_pullup(n, oiphlen + tcphlen)) == NULL))
+			goto freeit;
+		icmpelen = max(tcphlen, min(V_icmp_quotelen, oip->ip_len - oiphlen));
+	} else
+stdreply:	icmpelen = max(8, min(V_icmp_quotelen, oip->ip_len - oiphlen));
+
+	icmplen = min(oiphlen + icmpelen, nlen);
+	if (icmplen < sizeof(struct ip))
+		goto freeit;
+
+	if (MHLEN > sizeof(struct ip) + ICMP_MINLEN + icmplen)
+		m = m_gethdr(M_DONTWAIT, MT_DATA);
+	else
+		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+	if (m == NULL)
+		goto freeit;
+#ifdef MAC
+	mac_netinet_icmp_reply(n, m);
+#endif
+	icmplen = min(icmplen, M_TRAILINGSPACE(m) - sizeof(struct ip) - ICMP_MINLEN);
+	m_align(m, ICMP_MINLEN + icmplen);
+	m->m_len = ICMP_MINLEN + icmplen;
+
+	/* XXX MRT  make the outgoing packet use the same FIB
+	 * that was associated with the incoming packet
+	 */
+	M_SETFIB(m, M_GETFIB(n));
+	icp = mtod(m, struct icmp *);
+	ICMPSTAT_INC(icps_outhist[type]);
+	icp->icmp_type = type;
+	if (type == ICMP_REDIRECT)
+		icp->icmp_gwaddr.s_addr = dest;
+	else {
+		icp->icmp_void = 0;
+		/*
+		 * The following assignments assume an overlay with the
+		 * just zeroed icmp_void field.
+		 */
+		if (type == ICMP_PARAMPROB) {
+			icp->icmp_pptr = code;
+			code = 0;
+		} else if (type == ICMP_UNREACH &&
+			code == ICMP_UNREACH_NEEDFRAG && mtu) {
+			icp->icmp_nextmtu = htons(mtu);
+		}
+	}
+	icp->icmp_code = code;
+
+	/*
+	 * Copy the quotation into ICMP message and
+	 * convert quoted IP header back to network representation.
+	 */
+	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
+	nip = &icp->icmp_ip;
+	nip->ip_len = htons(nip->ip_len);
+	nip->ip_off = htons(nip->ip_off);
+
+	/*
+	 * Set up ICMP message mbuf and copy old IP header (without options
+	 * in front of ICMP message.
+	 * If the original mbuf was meant to bypass the firewall, the error
+	 * reply should bypass as well.
+	 */
+	m->m_flags |= n->m_flags & M_SKIP_FIREWALL;
+	m->m_data -= sizeof(struct ip);
+	m->m_len += sizeof(struct ip);
+	m->m_pkthdr.len = m->m_len;
+	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
+	nip = mtod(m, struct ip *);
+	bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip));
+	nip->ip_len = m->m_len;
+	nip->ip_v = IPVERSION;
+	nip->ip_hl = 5;
+	nip->ip_p = IPPROTO_ICMP;
+	nip->ip_tos = 0;
+	icmp_reflect(m);
+
+freeit:
+	m_freem(n);
+}
+
+/*
+ * Process a received ICMP message.
+ */
+void
+icmp_input(struct mbuf *m, int off)
+{
+	struct icmp *icp;
+	struct in_ifaddr *ia;
+	struct ip *ip = mtod(m, struct ip *);
+	struct sockaddr_in icmpsrc, icmpdst, icmpgw;
+	int hlen = off;
+	int icmplen = ip->ip_len;
+	int i, code;
+	void (*ctlfunc)(int, struct sockaddr *, void *);
+	int fibnum;
+
+	/*
+	 * Locate icmp structure in mbuf, and check
+	 * that not corrupted and of at least minimum length.
+	 */
+#ifdef ICMPPRINTFS
+	if (icmpprintfs) {
+		char buf[4 * sizeof "123"];
+		strcpy(buf, inet_ntoa(ip->ip_src));
+		printf("icmp_input from %s to %s, len %d\n",
+		       buf, inet_ntoa(ip->ip_dst), icmplen);
+	}
+#endif
+	if (icmplen < ICMP_MINLEN) {
+		ICMPSTAT_INC(icps_tooshort);
+		goto freeit;
+	}
+	i = hlen + min(icmplen, ICMP_ADVLENMIN);
+	if (m->m_len < i && (m = m_pullup(m, i)) == NULL)  {
+		ICMPSTAT_INC(icps_tooshort);
+		return;
+	}
+	ip = mtod(m, struct ip *);
+	m->m_len -= hlen;
+	m->m_data += hlen;
+	icp = mtod(m, struct icmp *);
+	if (in_cksum(m, icmplen)) {
+		ICMPSTAT_INC(icps_checksum);
+		goto freeit;
+	}
+	m->m_len += hlen;
+	m->m_data -= hlen;
+
+	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
+		/*
+		 * Deliver very specific ICMP type only.
+		 */
+		switch (icp->icmp_type) {
+		case ICMP_UNREACH:
+		case ICMP_TIMXCEED:
+			break;
+		default:
+			goto freeit;
+		}
+	}
+
+#ifdef ICMPPRINTFS
+	if (icmpprintfs)
+		printf("icmp_input, type %d code %d\n", icp->icmp_type,
+		    icp->icmp_code);
+#endif
+
+	/*
+	 * Message type specific processing.
+	 */
+	if (icp->icmp_type > ICMP_MAXTYPE)
+		goto raw;
+
+	/* Initialize */
+	bzero(&icmpsrc, sizeof(icmpsrc));
+	icmpsrc.sin_len = sizeof(struct sockaddr_in);
+	icmpsrc.sin_family = AF_INET;
+	bzero(&icmpdst, sizeof(icmpdst));
+	icmpdst.sin_len = sizeof(struct sockaddr_in);
+	icmpdst.sin_family = AF_INET;
+	bzero(&icmpgw, sizeof(icmpgw));
+	icmpgw.sin_len = sizeof(struct sockaddr_in);
+	icmpgw.sin_family = AF_INET;
+
+	ICMPSTAT_INC(icps_inhist[icp->icmp_type]);
+	code = icp->icmp_code;
+	switch (icp->icmp_type) {
+
+	case ICMP_UNREACH:
+		switch (code) {
+			case ICMP_UNREACH_NET:
+			case ICMP_UNREACH_HOST:
+			case ICMP_UNREACH_SRCFAIL:
+			case ICMP_UNREACH_NET_UNKNOWN:
+			case ICMP_UNREACH_HOST_UNKNOWN:
+			case ICMP_UNREACH_ISOLATED:
+			case ICMP_UNREACH_TOSNET:
+			case ICMP_UNREACH_TOSHOST:
+			case ICMP_UNREACH_HOST_PRECEDENCE:
+			case ICMP_UNREACH_PRECEDENCE_CUTOFF:
+				code = PRC_UNREACH_NET;
+				break;
+
+			case ICMP_UNREACH_NEEDFRAG:
+				code = PRC_MSGSIZE;
+				break;
+
+			/*
+			 * RFC 1122, Sections 3.2.2.1 and 4.2.3.9.
+			 * Treat subcodes 2,3 as immediate RST
+			 */
+			case ICMP_UNREACH_PROTOCOL:
+			case ICMP_UNREACH_PORT:
+				code = PRC_UNREACH_PORT;
+				break;
+
+			case ICMP_UNREACH_NET_PROHIB:
+			case ICMP_UNREACH_HOST_PROHIB:
+			case ICMP_UNREACH_FILTER_PROHIB:
+				code = PRC_UNREACH_ADMIN_PROHIB;
+				break;
+
+			default:
+				goto badcode;
+		}
+		goto deliver;
+
+	case ICMP_TIMXCEED:
+		if (code > 1)
+			goto badcode;
+		code += PRC_TIMXCEED_INTRANS;
+		goto deliver;
+
+	case ICMP_PARAMPROB:
+		if (code > 1)
+			goto badcode;
+		code = PRC_PARAMPROB;
+		goto deliver;
+
+	case ICMP_SOURCEQUENCH:
+		if (code)
+			goto badcode;
+		code = PRC_QUENCH;
+	deliver:
+		/*
+		 * Problem with datagram; advise higher level routines.
+		 */
+		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
+		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
+			ICMPSTAT_INC(icps_badlen);
+			goto freeit;
+		}
+		icp->icmp_ip.ip_len = ntohs(icp->icmp_ip.ip_len);
+		/* Discard ICMP's in response to multicast packets */
+		if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr)))
+			goto badcode;
+#ifdef ICMPPRINTFS
+		if (icmpprintfs)
+			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
+#endif
+		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
+		/*
+		 * XXX if the packet contains [IPv4 AH TCP], we can't make a
+		 * notification to TCP layer.
+		 */
+		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
+		if (ctlfunc)
+			(*ctlfunc)(code, (struct sockaddr *)&icmpsrc,
+				   (void *)&icp->icmp_ip);
+		break;
+
+	badcode:
+		ICMPSTAT_INC(icps_badcode);
+		break;
+
+	case ICMP_ECHO:
+		if (!V_icmpbmcastecho
+		    && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
+			ICMPSTAT_INC(icps_bmcastecho);
+			break;
+		}
+		icp->icmp_type = ICMP_ECHOREPLY;
+		if (badport_bandlim(BANDLIM_ICMP_ECHO) < 0)
+			goto freeit;
+		else
+			goto reflect;
+
+	case ICMP_TSTAMP:
+		if (!V_icmpbmcastecho
+		    && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
+			ICMPSTAT_INC(icps_bmcasttstamp);
+			break;
+		}
+		if (icmplen < ICMP_TSLEN) {
+			ICMPSTAT_INC(icps_badlen);
+			break;
+		}
+		icp->icmp_type = ICMP_TSTAMPREPLY;
+		icp->icmp_rtime = iptime();
+		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
+		if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0)
+			goto freeit;
+		else
+			goto reflect;
+
+	case ICMP_MASKREQ:
+		if (V_icmpmaskrepl == 0)
+			break;
+		/*
+		 * We are not able to respond with all ones broadcast
+		 * unless we receive it over a point-to-point interface.
+		 */
+		if (icmplen < ICMP_MASKLEN)
+			break;
+		switch (ip->ip_dst.s_addr) {
+
+		case INADDR_BROADCAST:
+		case INADDR_ANY:
+			icmpdst.sin_addr = ip->ip_src;
+			break;
+
+		default:
+			icmpdst.sin_addr = ip->ip_dst;
+		}
+		ia = (struct in_ifaddr *)ifaof_ifpforaddr(
+			    (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
+		if (ia == NULL)
+			break;
+		if (ia->ia_ifp == NULL) {
+			ifa_free(&ia->ia_ifa);
+			break;
+		}
+		icp->icmp_type = ICMP_MASKREPLY;
+		if (V_icmpmaskfake == 0)
+			icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
+		else
+			icp->icmp_mask = V_icmpmaskfake;
+		if (ip->ip_src.s_addr == 0) {
+			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
+			    ip->ip_src = satosin(&ia->ia_broadaddr)->sin_addr;
+			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
+			    ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr;
+		}
+		ifa_free(&ia->ia_ifa);
+reflect:
+		ip->ip_len += hlen;	/* since ip_input deducts this */
+		ICMPSTAT_INC(icps_reflect);
+		ICMPSTAT_INC(icps_outhist[icp->icmp_type]);
+		icmp_reflect(m);
+		return;
+
+	case ICMP_REDIRECT:
+		if (V_log_redirect) {
+			u_long src, dst, gw;
+
+			src = ntohl(ip->ip_src.s_addr);
+			dst = ntohl(icp->icmp_ip.ip_dst.s_addr);
+			gw = ntohl(icp->icmp_gwaddr.s_addr);
+			printf("icmp redirect from %d.%d.%d.%d: "
+			       "%d.%d.%d.%d => %d.%d.%d.%d\n",
+			       (int)(src >> 24), (int)((src >> 16) & 0xff),
+			       (int)((src >> 8) & 0xff), (int)(src & 0xff),
+			       (int)(dst >> 24), (int)((dst >> 16) & 0xff),
+			       (int)((dst >> 8) & 0xff), (int)(dst & 0xff),
+			       (int)(gw >> 24), (int)((gw >> 16) & 0xff),
+			       (int)((gw >> 8) & 0xff), (int)(gw & 0xff));
+		}
+		/*
+		 * RFC1812 says we must ignore ICMP redirects if we
+		 * are acting as router.
+		 */
+		if (V_drop_redirect || V_ipforwarding)
+			break;
+		if (code > 3)
+			goto badcode;
+		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
+		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
+			ICMPSTAT_INC(icps_badlen);
+			break;
+		}
+		/*
+		 * Short circuit routing redirects to force
+		 * immediate change in the kernel's routing
+		 * tables.  The message is also handed to anyone
+		 * listening on a raw socket (e.g. the routing
+		 * daemon for use in updating its tables).
+		 */
+		icmpgw.sin_addr = ip->ip_src;
+		icmpdst.sin_addr = icp->icmp_gwaddr;
+#ifdef	ICMPPRINTFS
+		if (icmpprintfs) {
+			char buf[4 * sizeof "123"];
+			strcpy(buf, inet_ntoa(icp->icmp_ip.ip_dst));
+
+			printf("redirect dst %s to %s\n",
+			       buf, inet_ntoa(icp->icmp_gwaddr));
+		}
+#endif
+		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
+		for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) {
+			in_rtredirect((struct sockaddr *)&icmpsrc,
+			  (struct sockaddr *)&icmpdst,
+			  (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
+			  (struct sockaddr *)&icmpgw, fibnum);
+		}
+		pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc);
+#ifdef IPSEC
+		key_sa_routechange((struct sockaddr *)&icmpsrc);
+#endif
+		break;
+
+	/*
+	 * No kernel processing for the following;
+	 * just fall through to send to raw listener.
+	 */
+	case ICMP_ECHOREPLY:
+	case ICMP_ROUTERADVERT:
+	case ICMP_ROUTERSOLICIT:
+	case ICMP_TSTAMPREPLY:
+	case ICMP_IREQREPLY:
+	case ICMP_MASKREPLY:
+	default:
+		break;
+	}
+
+raw:
+	rip_input(m, off);
+	return;
+
+freeit:
+	m_freem(m);
+}
+
+/*
+ * Reflect the ip packet back to the source
+ */
+static void
+icmp_reflect(struct mbuf *m)
+{
+	struct ip *ip = mtod(m, struct ip *);
+	struct ifaddr *ifa;
+	struct ifnet *ifp;
+	struct in_ifaddr *ia;
+	struct in_addr t;
+	struct mbuf *opts = 0;
+	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
+
+	if (IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
+	    IN_EXPERIMENTAL(ntohl(ip->ip_src.s_addr)) ||
+	    IN_ZERONET(ntohl(ip->ip_src.s_addr)) ) {
+		m_freem(m);	/* Bad return address */
+		ICMPSTAT_INC(icps_badaddr);
+		goto done;	/* Ip_output() will check for broadcast */
+	}
+
+	t = ip->ip_dst;
+	ip->ip_dst = ip->ip_src;
+
+	/*
+	 * Source selection for ICMP replies:
+	 *
+	 * If the incoming packet was addressed directly to one of our
+	 * own addresses, use dst as the src for the reply.
+	 */
+	IN_IFADDR_RLOCK();
+	LIST_FOREACH(ia, INADDR_HASH(t.s_addr), ia_hash) {
+		if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr) {
+			t = IA_SIN(ia)->sin_addr;
+			IN_IFADDR_RUNLOCK();
+			goto match;
+		}
+	}
+	IN_IFADDR_RUNLOCK();
+
+	/*
+	 * If the incoming packet was addressed to one of our broadcast
+	 * addresses, use the first non-broadcast address which corresponds
+	 * to the incoming interface.
+	 */
+	ifp = m->m_pkthdr.rcvif;
+	if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) {
+		IF_ADDR_LOCK(ifp);
+		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+			if (ifa->ifa_addr->sa_family != AF_INET)
+				continue;
+			ia = ifatoia(ifa);
+			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
+			    t.s_addr) {
+				t = IA_SIN(ia)->sin_addr;
+				IF_ADDR_UNLOCK(ifp);
+				goto match;
+			}
+		}
+		IF_ADDR_UNLOCK(ifp);
+	}
+	/*
+	 * If the packet was transiting through us, use the address of
+	 * the interface the packet came through in.  If that interface
+	 * doesn't have a suitable IP address, the normal selection
+	 * criteria apply.
+	 */
+	if (V_icmp_rfi && ifp != NULL) {
+		IF_ADDR_LOCK(ifp);
+		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+			if (ifa->ifa_addr->sa_family != AF_INET)
+				continue;
+			ia = ifatoia(ifa);
+			t = IA_SIN(ia)->sin_addr;
+			IF_ADDR_UNLOCK(ifp);
+			goto match;
+		}
+		IF_ADDR_UNLOCK(ifp);
+	}
+	/*
+	 * If the incoming packet was not addressed directly to us, use
+	 * designated interface for icmp replies specified by sysctl
+	 * net.inet.icmp.reply_src (default not set). Otherwise continue
+	 * with normal source selection.
+	 */
+	if (V_reply_src[0] != '\0' && (ifp = ifunit(V_reply_src))) {
+		IF_ADDR_LOCK(ifp);
+		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+			if (ifa->ifa_addr->sa_family != AF_INET)
+				continue;
+			ia = ifatoia(ifa);
+			t = IA_SIN(ia)->sin_addr;
+			IF_ADDR_UNLOCK(ifp);
+			goto match;
+		}
+		IF_ADDR_UNLOCK(ifp);
+	}
+	/*
+	 * If the packet was transiting through us, use the address of
+	 * the interface that is the closest to the packet source.
+	 * When we don't have a route back to the packet source, stop here
+	 * and drop the packet.
+	 */
+	ia = ip_rtaddr(ip->ip_dst, M_GETFIB(m));
+	if (ia == NULL) {
+		m_freem(m);
+		ICMPSTAT_INC(icps_noroute);
+		goto done;
+	}
+	t = IA_SIN(ia)->sin_addr;
+	ifa_free(&ia->ia_ifa);
+match:
+#ifdef MAC
+	mac_netinet_icmp_replyinplace(m);
+#endif
+	ip->ip_src = t;
+	ip->ip_ttl = V_ip_defttl;
+
+	if (optlen > 0) {
+		register u_char *cp;
+		int opt, cnt;
+		u_int len;
+
+		/*
+		 * Retrieve any source routing from the incoming packet;
+		 * add on any record-route or timestamp options.
+		 */
+		cp = (u_char *) (ip + 1);
+		if ((opts = ip_srcroute(m)) == 0 &&
+		    (opts = m_gethdr(M_DONTWAIT, MT_DATA))) {
+			opts->m_len = sizeof(struct in_addr);
+			mtod(opts, struct in_addr *)->s_addr = 0;
+		}
+		if (opts) {
+#ifdef ICMPPRINTFS
+		    if (icmpprintfs)
+			    printf("icmp_reflect optlen %d rt %d => ",
+				optlen, opts->m_len);
+#endif
+		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
+			    opt = cp[IPOPT_OPTVAL];
+			    if (opt == IPOPT_EOL)
+				    break;
+			    if (opt == IPOPT_NOP)
+				    len = 1;
+			    else {
+				    if (cnt < IPOPT_OLEN + sizeof(*cp))
+					    break;
+				    len = cp[IPOPT_OLEN];
+				    if (len < IPOPT_OLEN + sizeof(*cp) ||
+				        len > cnt)
+					    break;
+			    }
+			    /*
+			     * Should check for overflow, but it "can't happen"
+			     */
+			    if (opt == IPOPT_RR || opt == IPOPT_TS ||
+				opt == IPOPT_SECURITY) {
+				    bcopy((caddr_t)cp,
+					mtod(opts, caddr_t) + opts->m_len, len);
+				    opts->m_len += len;
+			    }
+		    }
+		    /* Terminate & pad, if necessary */
+		    cnt = opts->m_len % 4;
+		    if (cnt) {
+			    for (; cnt < 4; cnt++) {
+				    *(mtod(opts, caddr_t) + opts->m_len) =
+					IPOPT_EOL;
+				    opts->m_len++;
+			    }
+		    }
+#ifdef ICMPPRINTFS
+		    if (icmpprintfs)
+			    printf("%d\n", opts->m_len);
+#endif
+		}
+		/*
+		 * Now strip out original options by copying rest of first
+		 * mbuf's data back, and adjust the IP length.
+		 */
+		ip->ip_len -= optlen;
+		ip->ip_v = IPVERSION;
+		ip->ip_hl = 5;
+		m->m_len -= optlen;
+		if (m->m_flags & M_PKTHDR)
+			m->m_pkthdr.len -= optlen;
+		optlen += sizeof(struct ip);
+		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
+			 (unsigned)(m->m_len - sizeof(struct ip)));
+	}
+	m_tag_delete_nonpersistent(m);
+	m->m_flags &= ~(M_BCAST|M_MCAST);
+	icmp_send(m, opts);
+done:
+	if (opts)
+		(void)m_free(opts);
+}
+
+/*
+ * Send an icmp packet back to the ip level,
+ * after supplying a checksum.
+ */
+static void
+icmp_send(struct mbuf *m, struct mbuf *opts)
+{
+	register struct ip *ip = mtod(m, struct ip *);
+	register int hlen;
+	register struct icmp *icp;
+
+	hlen = ip->ip_hl << 2;
+	m->m_data += hlen;
+	m->m_len -= hlen;
+	icp = mtod(m, struct icmp *);
+	icp->icmp_cksum = 0;
+	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
+	m->m_data -= hlen;
+	m->m_len += hlen;
+	m->m_pkthdr.rcvif = (struct ifnet *)0;
+#ifdef ICMPPRINTFS
+	if (icmpprintfs) {
+		char buf[4 * sizeof "123"];
+		strcpy(buf, inet_ntoa(ip->ip_dst));
+		printf("icmp_send dst %s src %s\n",
+		       buf, inet_ntoa(ip->ip_src));
+	}
+#endif
+	(void) ip_output(m, opts, NULL, 0, NULL, NULL);
+}
+
+/*
+ * Return milliseconds since 00:00 GMT in network format.
+ */
+uint32_t
+iptime(void)
+{
+	struct timeval atv;
+	u_long t;
+
+	getmicrotime(&atv);
+	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
+	return (htonl(t));
+}
+
+/*
+ * Return the next larger or smaller MTU plateau (table from RFC 1191)
+ * given current value MTU.  If DIR is less than zero, a larger plateau
+ * is returned; otherwise, a smaller value is returned.
+ */
+int
+ip_next_mtu(int mtu, int dir)
+{
+	static int mtutab[] = {
+		65535, 32000, 17914, 8166, 4352, 2002, 1492, 1280, 1006, 508,
+		296, 68, 0
+	};
+	int i, size;
+
+	size = (sizeof mtutab) / (sizeof mtutab[0]);
+	if (dir >= 0) {
+		for (i = 0; i < size; i++)
+			if (mtu > mtutab[i])
+				return mtutab[i];
+	} else {
+		for (i = size - 1; i >= 0; i--)
+			if (mtu < mtutab[i])
+				return mtutab[i];
+		if (mtu == mtutab[0])
+			return mtutab[0];
+	}
+	return 0;
+}
+
+
+/*
+ * badport_bandlim() - check for ICMP bandwidth limit
+ *
+ *	Return 0 if it is ok to send an ICMP error response, -1 if we have
+ *	hit our bandwidth limit and it is not ok.
+ *
+ *	If icmplim is <= 0, the feature is disabled and 0 is returned.
+ *
+ *	For now we separate the TCP and UDP subsystems w/ different 'which'
+ *	values.  We may eventually remove this separation (and simplify the
+ *	code further).
+ *
+ *	Note that the printing of the error message is delayed so we can
+ *	properly print the icmp error rate that the system was trying to do
+ *	(i.e. 22000/100 pps, etc...).  This can cause long delays in printing
+ *	the 'final' error, but it doesn't make sense to solve the printing
+ *	delay with more complex code.
+ */
+
+int
+badport_bandlim(int which)
+{
+
+#define	N(a)	(sizeof (a) / sizeof (a[0]))
+	static struct rate {
+		const char	*type;
+		struct timeval	lasttime;
+		int		curpps;
+	} rates[BANDLIM_MAX+1] = {
+		{ "icmp unreach response" },
+		{ "icmp ping response" },
+		{ "icmp tstamp response" },
+		{ "closed port RST response" },
+		{ "open port RST response" },
+		{ "icmp6 unreach response" }
+	};
+
+	/*
+	 * Return ok status if feature disabled or argument out of range.
+	 */
+	if (V_icmplim > 0 && (u_int) which < N(rates)) {
+		struct rate *r = &rates[which];
+		int opps = r->curpps;
+
+		if (!ppsratecheck(&r->lasttime, &r->curpps, V_icmplim))
+			return -1;	/* discard packet */
+		/*
+		 * If we've dropped below the threshold after having
+		 * rate-limited traffic print the message.  This preserves
+		 * the previous behaviour at the expense of added complexity.
+		 */
+		if (V_icmplim_output && opps > V_icmplim)
+			log(LOG_NOTICE, "Limiting %s from %d to %d packets/sec\n",
+				r->type, opps, V_icmplim);
+	}
+	return 0;			/* okay to send packet */
+#undef N
+}
diff --git a/freebsd/sys/netinet/ip_icmp.h b/freebsd/sys/netinet/ip_icmp.h
new file mode 100644
index 00000000..903f033d
--- /dev/null
+++ b/freebsd/sys/netinet/ip_icmp.h
@@ -0,0 +1,2 @@
+#include <freebsd/bsd.h>
+#include <freebsd/netinet/ip_icmp.h>
diff --git a/freebsd/sys/netinet/ip_id.c b/freebsd/sys/netinet/ip_id.c
new file mode 100644
index 00000000..ba99cdbb
--- /dev/null
+++ b/freebsd/sys/netinet/ip_id.c
@@ -0,0 +1,211 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+
+/*-
+ * Copyright (c) 2008 Michael J. Silbersack.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * IP ID generation is a fascinating topic.
+ *
+ * In order to avoid ID collisions during packet reassembly, common sense
+ * dictates that the period between reuse of IDs be as large as possible.
+ * This leads to the classic implementation of a system-wide counter, thereby
+ * ensuring that IDs repeat only once every 2^16 packets.
+ *
+ * Subsequent security researchers have pointed out that using a global
+ * counter makes ID values predictable.  This predictability allows traffic
+ * analysis, idle scanning, and even packet injection in specific cases.
+ * These results suggest that IP IDs should be as random as possible.
+ *
+ * The "searchable queues" algorithm used in this IP ID implementation was
+ * proposed by Amit Klein.  It is a compromise between the above two
+ * viewpoints that has provable behavior that can be tuned to the user's
+ * requirements.
+ *
+ * The basic concept is that we supplement a standard random number generator
+ * with a queue of the last L IDs that we have handed out to ensure that all
+ * IDs have a period of at least L.
+ *
+ * To efficiently implement this idea, we keep two data structures: a
+ * circular array of IDs of size L and a bitstring of 65536 bits.
+ *
+ * To start, we ask the RNG for a new ID.  A quick index into the bitstring
+ * is used to determine if this is a recently used value.  The process is
+ * repeated until a value is returned that is not in the bitstring.
+ *
+ * Having found a usable ID, we remove the ID stored at the current position
+ * in the queue from the bitstring and replace it with our new ID.  Our new
+ * ID is then added to the bitstring and the queue pointer is incremented.
+ *
+ * The lower limit of 512 was chosen because there doesn't seem to be much
+ * point to having a smaller value.  The upper limit of 32768 was chosen for
+ * two reasons.  First, every step above 32768 decreases the entropy.  Taken
+ * to an extreme, 65533 would offer 1 bit of entropy.  Second, the number of
+ * attempts it takes the algorithm to find an unused ID drastically
+ * increases, killing performance.  The default value of 8192 was chosen
+ * because it provides a good tradeoff between randomness and non-repetition.
+ *
+ * With L=8192, the queue will use 16K of memory.  The bitstring always
+ * uses 8K of memory.  No memory is allocated until the use of random ids is
+ * enabled.
+ */
+
+#include <freebsd/sys/types.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/time.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/libkern.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/random.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/sys/bitstring.h>
+
+static MALLOC_DEFINE(M_IPID, "ipid", "randomized ip id state");
+
+static u_int16_t 	*id_array = NULL;
+static bitstr_t		*id_bits = NULL;
+static int		 array_ptr = 0;
+static int		 array_size = 8192;
+static int		 random_id_collisions = 0;
+static int		 random_id_total = 0;
+static struct mtx	 ip_id_mtx;
+
+static void	ip_initid(void);
+static int	sysctl_ip_id_change(SYSCTL_HANDLER_ARGS);
+
+MTX_SYSINIT(ip_id_mtx, &ip_id_mtx, "ip_id_mtx", MTX_DEF);
+
+SYSCTL_DECL(_net_inet_ip);
+SYSCTL_PROC(_net_inet_ip, OID_AUTO, random_id_period, CTLTYPE_INT|CTLFLAG_RW,
+    &array_size, 0, sysctl_ip_id_change, "IU", "IP ID Array size");
+SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_collisions, CTLFLAG_RD,
+    &random_id_collisions, 0, "Count of IP ID collisions");
+SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_total, CTLFLAG_RD,
+    &random_id_total, 0, "Count of IP IDs created");
+
+static int
+sysctl_ip_id_change(SYSCTL_HANDLER_ARGS)
+{
+	int error, new;
+
+	new = array_size;
+	error = sysctl_handle_int(oidp, &new, 0, req);
+	if (error == 0 && req->newptr) {
+		if (new >= 512 && new <= 32768) {
+			mtx_lock(&ip_id_mtx);
+			array_size = new;
+			ip_initid();
+			mtx_unlock(&ip_id_mtx);
+		} else
+			error = EINVAL;
+	}
+	return (error);
+}
+
+/*
+ * ip_initid() runs with a mutex held and may execute in a network context.
+ * As a result, it uses M_NOWAIT.  Ideally, we would always do this
+ * allocation from the sysctl contact and have it be an invariant that if
+ * this random ID allocation mode is selected, the buffers are present.  This
+ * would also avoid potential network context failures of IP ID generation.
+ */
+static void
+ip_initid(void)
+{
+
+	mtx_assert(&ip_id_mtx, MA_OWNED);
+
+	if (id_array != NULL) {
+		free(id_array, M_IPID);
+		free(id_bits, M_IPID);
+	}
+	random_id_collisions = 0;
+	random_id_total = 0;
+	array_ptr = 0;
+	id_array = (u_int16_t *) malloc(array_size * sizeof(u_int16_t),
+	    M_IPID, M_NOWAIT | M_ZERO);
+	id_bits = (bitstr_t *) malloc(bitstr_size(65536), M_IPID,
+	    M_NOWAIT | M_ZERO);
+	if (id_array == NULL || id_bits == NULL) {
+		/* Neither or both. */
+		if (id_array != NULL) {
+			free(id_array, M_IPID);
+			id_array = NULL;
+		}
+		if (id_bits != NULL) {
+			free(id_bits, M_IPID);
+			id_bits = NULL;
+		}
+	}
+}
+
+u_int16_t
+ip_randomid(void)
+{
+	u_int16_t new_id;
+
+	mtx_lock(&ip_id_mtx);
+	if (id_array == NULL)
+		ip_initid();
+
+	/*
+	 * Fail gracefully; return a fixed id if memory allocation failed;
+	 * ideally we wouldn't do allocation in this context in order to
+	 * avoid the possibility of this failure mode.
+	 */
+	if (id_array == NULL) {
+		mtx_unlock(&ip_id_mtx);
+		return (1);
+	}
+
+	/*
+	 * To avoid a conflict with the zeros that the array is initially
+	 * filled with, we never hand out an id of zero.
+	 */
+	new_id = 0;
+	do {
+		if (new_id != 0)
+			random_id_collisions++;
+		arc4rand(&new_id, sizeof(new_id), 0);
+	} while (bit_test(id_bits, new_id) || new_id == 0);
+	bit_clear(id_bits, id_array[array_ptr]);
+	bit_set(id_bits, new_id);
+	id_array[array_ptr] = new_id;
+	array_ptr++;
+	if (array_ptr == array_size)
+		array_ptr = 0;
+	random_id_total++;
+	mtx_unlock(&ip_id_mtx);
+	return (new_id);
+}
diff --git a/freebsd/sys/netinet/ip_input.c b/freebsd/sys/netinet/ip_input.c
new file mode 100644
index 00000000..3964e886
--- /dev/null
+++ b/freebsd/sys/netinet/ip_input.c
@@ -0,0 +1,1794 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_bootp.h>
+#include <freebsd/local/opt_ipfw.h>
+#include <freebsd/local/opt_ipstealth.h>
+#include <freebsd/local/opt_ipsec.h>
+#include <freebsd/local/opt_route.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/callout.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/domain.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/time.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/rwlock.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/sys/sysctl.h>
+
+#include <freebsd/net/pfil.h>
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/if_var.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/vnet.h>
+#include <freebsd/net/flowtable.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_fw.h>
+#include <freebsd/netinet/ip_icmp.h>
+#include <freebsd/netinet/ip_options.h>
+#include <freebsd/machine/in_cksum.h>
+#include <freebsd/netinet/ip_carp.h>
+#ifdef IPSEC
+#include <freebsd/netinet/ip_ipsec.h>
+#endif /* IPSEC */
+
+#include <freebsd/sys/socketvar.h>
+
+#include <freebsd/security/mac/mac_framework.h>
+
+#ifdef CTASSERT
+CTASSERT(sizeof(struct ip) == 20);
+#endif
+
+struct	rwlock in_ifaddr_lock;
+RW_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock");
+
+VNET_DEFINE(int, rsvp_on);
+
+VNET_DEFINE(int, ipforwarding);
+SYSCTL_VNET_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
+    &VNET_NAME(ipforwarding), 0,
+    "Enable IP forwarding between interfaces");
+
+static VNET_DEFINE(int, ipsendredirects) = 1;	/* XXX */
+#define	V_ipsendredirects	VNET(ipsendredirects)
+SYSCTL_VNET_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
+    &VNET_NAME(ipsendredirects), 0,
+    "Enable sending IP redirects");
+
+VNET_DEFINE(int, ip_defttl) = IPDEFTTL;
+SYSCTL_VNET_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
+    &VNET_NAME(ip_defttl), 0,
+    "Maximum TTL on IP packets");
+
+static VNET_DEFINE(int, ip_keepfaith);
+#define	V_ip_keepfaith		VNET(ip_keepfaith)
+SYSCTL_VNET_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
+    &VNET_NAME(ip_keepfaith), 0,
+    "Enable packet capture for FAITH IPv4->IPv6 translater daemon");
+
+static VNET_DEFINE(int, ip_sendsourcequench);
+#define	V_ip_sendsourcequench	VNET(ip_sendsourcequench)
+SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW,
+    &VNET_NAME(ip_sendsourcequench), 0,
+    "Enable the transmission of source quench packets");
+
+VNET_DEFINE(int, ip_do_randomid);
+SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW,
+    &VNET_NAME(ip_do_randomid), 0,
+    "Assign random ip_id values");
+
+/*
+ * XXX - Setting ip_checkinterface mostly implements the receive side of
+ * the Strong ES model described in RFC 1122, but since the routing table
+ * and transmit implementation do not implement the Strong ES model,
+ * setting this to 1 results in an odd hybrid.
+ *
+ * XXX - ip_checkinterface currently must be disabled if you use ipnat
+ * to translate the destination address to another local interface.
+ *
+ * XXX - ip_checkinterface must be disabled if you add IP aliases
+ * to the loopback interface instead of the interface where the
+ * packets for those addresses are received.
+ */
+static VNET_DEFINE(int, ip_checkinterface);
+#define	V_ip_checkinterface	VNET(ip_checkinterface)
+SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW,
+    &VNET_NAME(ip_checkinterface), 0,
+    "Verify packet arrives on correct interface");
+
+VNET_DEFINE(struct pfil_head, inet_pfil_hook);	/* Packet filter hooks */
+
+static struct netisr_handler ip_nh = {
+	.nh_name = "ip",
+	.nh_handler = ip_input,
+	.nh_proto = NETISR_IP,
+	.nh_policy = NETISR_POLICY_FLOW,
+};
+
+extern	struct domain inetdomain;
+extern	struct protosw inetsw[];
+u_char	ip_protox[IPPROTO_MAX];
+VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead);  /* first inet address */
+VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table  */
+VNET_DEFINE(u_long, in_ifaddrhmask);		/* mask for hash table */
+
+VNET_DEFINE(struct ipstat, ipstat);
+SYSCTL_VNET_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
+    &VNET_NAME(ipstat), ipstat,
+    "IP statistics (struct ipstat, netinet/ip_var.h)");
+
+static VNET_DEFINE(uma_zone_t, ipq_zone);
+static VNET_DEFINE(TAILQ_HEAD(ipqhead, ipq), ipq[IPREASS_NHASH]);
+static struct mtx ipqlock;
+
+#define	V_ipq_zone		VNET(ipq_zone)
+#define	V_ipq			VNET(ipq)
+
+#define	IPQ_LOCK()	mtx_lock(&ipqlock)
+#define	IPQ_UNLOCK()	mtx_unlock(&ipqlock)
+#define	IPQ_LOCK_INIT()	mtx_init(&ipqlock, "ipqlock", NULL, MTX_DEF)
+#define	IPQ_LOCK_ASSERT()	mtx_assert(&ipqlock, MA_OWNED)
+
+static void	maxnipq_update(void);
+static void	ipq_zone_change(void *);
+static void	ip_drain_locked(void);
+
+static VNET_DEFINE(int, maxnipq);  /* Administrative limit on # reass queues. */
+static VNET_DEFINE(int, nipq);			/* Total # of reass queues */
+#define	V_maxnipq		VNET(maxnipq)
+#define	V_nipq			VNET(nipq)
+SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_RD,
+    &VNET_NAME(nipq), 0,
+    "Current number of IPv4 fragment reassembly queue entries");
+
+static VNET_DEFINE(int, maxfragsperpacket);
+#define	V_maxfragsperpacket	VNET(maxfragsperpacket)
+SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW,
+    &VNET_NAME(maxfragsperpacket), 0,
+    "Maximum number of IPv4 fragments allowed per packet");
+
+struct callout	ipport_tick_callout;
+
+#ifdef IPCTL_DEFMTU
+SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
+    &ip_mtu, 0, "Default MTU");
+#endif
+
+#ifdef IPSTEALTH
+VNET_DEFINE(int, ipstealth);
+SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
+    &VNET_NAME(ipstealth), 0,
+    "IP stealth mode, no TTL decrementation on forwarding");
+#endif
+
+#ifdef FLOWTABLE
+static VNET_DEFINE(int, ip_output_flowtable_size) = 2048;
+VNET_DEFINE(struct flowtable *, ip_ft);
+#define	V_ip_output_flowtable_size	VNET(ip_output_flowtable_size)
+
+SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, output_flowtable_size, CTLFLAG_RDTUN,
+    &VNET_NAME(ip_output_flowtable_size), 2048,
+    "number of entries in the per-cpu output flow caches");
+#endif
+
+VNET_DEFINE(int, fw_one_pass) = 1;
+
+static void	ip_freef(struct ipqhead *, struct ipq *);
+
+/*
+ * Kernel module interface for updating ipstat.  The argument is an index
+ * into ipstat treated as an array of u_long.  While this encodes the general
+ * layout of ipstat into the caller, it doesn't encode its location, so that
+ * future changes to add, for example, per-CPU stats support won't cause
+ * binary compatibility problems for kernel modules.
+ */
+void
+kmod_ipstat_inc(int statnum)
+{
+
+	(*((u_long *)&V_ipstat + statnum))++;
+}
+
+void
+kmod_ipstat_dec(int statnum)
+{
+
+	(*((u_long *)&V_ipstat + statnum))--;
+}
+
+static int
+sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
+{
+	int error, qlimit;
+
+	netisr_getqlimit(&ip_nh, &qlimit);
+	error = sysctl_handle_int(oidp, &qlimit, 0, req);
+	if (error || !req->newptr)
+		return (error);
+	if (qlimit < 1)
+		return (EINVAL);
+	return (netisr_setqlimit(&ip_nh, qlimit));
+}
+SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen,
+    CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I",
+    "Maximum size of the IP input queue");
+
+static int
+sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS)
+{
+	u_int64_t qdrops_long;
+	int error, qdrops;
+
+	netisr_getqdrops(&ip_nh, &qdrops_long);
+	qdrops = qdrops_long;
+	error = sysctl_handle_int(oidp, &qdrops, 0, req);
+	if (error || !req->newptr)
+		return (error);
+	if (qdrops != 0)
+		return (EINVAL);
+	netisr_clearqdrops(&ip_nh);
+	return (0);
+}
+
+SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops,
+    CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I",
+    "Number of packets dropped from the IP input queue");
+
+/*
+ * IP initialization: fill in IP protocol switch table.
+ * All protocols not implemented in kernel go to raw IP protocol handler.
+ */
+void
+ip_init(void)
+{
+	struct protosw *pr;
+	int i;
+
+	V_ip_id = time_second & 0xffff;
+
+	TAILQ_INIT(&V_in_ifaddrhead);
+	V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask);
+
+	/* Initialize IP reassembly queue. */
+	for (i = 0; i < IPREASS_NHASH; i++)
+		TAILQ_INIT(&V_ipq[i]);
+	V_maxnipq = nmbclusters / 32;
+	V_maxfragsperpacket = 16;
+	V_ipq_zone = uma_zcreate("ipq", sizeof(struct ipq), NULL, NULL, NULL,
+	    NULL, UMA_ALIGN_PTR, 0);
+	maxnipq_update();
+
+	/* Initialize packet filter hooks. */
+	V_inet_pfil_hook.ph_type = PFIL_TYPE_AF;
+	V_inet_pfil_hook.ph_af = AF_INET;
+	if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0)
+		printf("%s: WARNING: unable to register pfil hook, "
+			"error %d\n", __func__, i);
+
+#ifdef FLOWTABLE
+	if (TUNABLE_INT_FETCH("net.inet.ip.output_flowtable_size",
+		&V_ip_output_flowtable_size)) {
+		if (V_ip_output_flowtable_size < 256)
+			V_ip_output_flowtable_size = 256;
+		if (!powerof2(V_ip_output_flowtable_size)) {
+			printf("flowtable must be power of 2 size\n");
+			V_ip_output_flowtable_size = 2048;
+		}
+	} else {
+		/*
+		 * round up to the next power of 2
+		 */
+		V_ip_output_flowtable_size = 1 << fls((1024 + maxusers * 64)-1);
+	}
+	V_ip_ft = flowtable_alloc("ipv4", V_ip_output_flowtable_size, FL_PCPU);
+#endif
+
+	/* Skip initialization of globals for non-default instances. */
+	if (!IS_DEFAULT_VNET(curvnet))
+		return;
+
+	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
+	if (pr == NULL)
+		panic("ip_init: PF_INET not found");
+
+	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
+	for (i = 0; i < IPPROTO_MAX; i++)
+		ip_protox[i] = pr - inetsw;
+	/*
+	 * Cycle through IP protocols and put them into the appropriate place
+	 * in ip_protox[].
+	 */
+	for (pr = inetdomain.dom_protosw;
+	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
+		if (pr->pr_domain->dom_family == PF_INET &&
+		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
+			/* Be careful to only index valid IP protocols. */
+			if (pr->pr_protocol < IPPROTO_MAX)
+				ip_protox[pr->pr_protocol] = pr - inetsw;
+		}
+
+	/* Start ipport_tick. */
+	callout_init(&ipport_tick_callout, CALLOUT_MPSAFE);
+	callout_reset(&ipport_tick_callout, 1, ipport_tick, NULL);
+	EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL,
+		SHUTDOWN_PRI_DEFAULT);
+	EVENTHANDLER_REGISTER(nmbclusters_change, ipq_zone_change,
+		NULL, EVENTHANDLER_PRI_ANY);
+
+	/* Initialize various other remaining things. */
+	IPQ_LOCK_INIT();
+	netisr_register(&ip_nh);
+}
+
+#ifdef VIMAGE
+void
+ip_destroy(void)
+{
+
+	/* Cleanup in_ifaddr hash table; should be empty. */
+	hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask);
+
+	IPQ_LOCK();
+	ip_drain_locked();
+	IPQ_UNLOCK();
+
+	uma_zdestroy(V_ipq_zone);
+}
+#endif
+
+void
+ip_fini(void *xtp)
+{
+
+	callout_stop(&ipport_tick_callout);
+}
+
+/*
+ * Ip input routine.  Checksum and byte swap header.  If fragmented
+ * try to reassemble.  Process options.  Pass to next level.
+ */
+void
+ip_input(struct mbuf *m)
+{
+	struct ip *ip = NULL;
+	struct in_ifaddr *ia = NULL;
+	struct ifaddr *ifa;
+	struct ifnet *ifp;
+	int    checkif, hlen = 0;
+	u_short sum;
+	int dchg = 0;				/* dest changed after fw */
+	struct in_addr odst;			/* original dst address */
+
+	M_ASSERTPKTHDR(m);
+
+	if (m->m_flags & M_FASTFWD_OURS) {
+		/*
+		 * Firewall or NAT changed destination to local.
+		 * We expect ip_len and ip_off to be in host byte order.
+		 */
+		m->m_flags &= ~M_FASTFWD_OURS;
+		/* Set up some basics that will be used later. */
+		ip = mtod(m, struct ip *);
+		hlen = ip->ip_hl << 2;
+		goto ours;
+	}
+
+	IPSTAT_INC(ips_total);
+
+	if (m->m_pkthdr.len < sizeof(struct ip))
+		goto tooshort;
+
+	if (m->m_len < sizeof (struct ip) &&
+	    (m = m_pullup(m, sizeof (struct ip))) == NULL) {
+		IPSTAT_INC(ips_toosmall);
+		return;
+	}
+	ip = mtod(m, struct ip *);
+
+	if (ip->ip_v != IPVERSION) {
+		IPSTAT_INC(ips_badvers);
+		goto bad;
+	}
+
+	hlen = ip->ip_hl << 2;
+	if (hlen < sizeof(struct ip)) {	/* minimum header length */
+		IPSTAT_INC(ips_badhlen);
+		goto bad;
+	}
+	if (hlen > m->m_len) {
+		if ((m = m_pullup(m, hlen)) == NULL) {
+			IPSTAT_INC(ips_badhlen);
+			return;
+		}
+		ip = mtod(m, struct ip *);
+	}
+
+	/* 127/8 must not appear on wire - RFC1122 */
+	ifp = m->m_pkthdr.rcvif;
+	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
+	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
+		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
+			IPSTAT_INC(ips_badaddr);
+			goto bad;
+		}
+	}
+
+	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
+		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
+	} else {
+		if (hlen == sizeof(struct ip)) {
+			sum = in_cksum_hdr(ip);
+		} else {
+			sum = in_cksum(m, hlen);
+		}
+	}
+	if (sum) {
+		IPSTAT_INC(ips_badsum);
+		goto bad;
+	}
+
+#ifdef ALTQ
+	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
+		/* packet is dropped by traffic conditioner */
+		return;
+#endif
+
+	/*
+	 * Convert fields to host representation.
+	 */
+	ip->ip_len = ntohs(ip->ip_len);
+	if (ip->ip_len < hlen) {
+		IPSTAT_INC(ips_badlen);
+		goto bad;
+	}
+	ip->ip_off = ntohs(ip->ip_off);
+
+	/*
+	 * Check that the amount of data in the buffers
+	 * is as at least much as the IP header would have us expect.
+	 * Trim mbufs if longer than we expect.
+	 * Drop packet if shorter than we expect.
+	 */
+	if (m->m_pkthdr.len < ip->ip_len) {
+tooshort:
+		IPSTAT_INC(ips_tooshort);
+		goto bad;
+	}
+	if (m->m_pkthdr.len > ip->ip_len) {
+		if (m->m_len == m->m_pkthdr.len) {
+			m->m_len = ip->ip_len;
+			m->m_pkthdr.len = ip->ip_len;
+		} else
+			m_adj(m, ip->ip_len - m->m_pkthdr.len);
+	}
+#ifdef IPSEC
+	/*
+	 * Bypass packet filtering for packets from a tunnel (gif).
+	 */
+	if (ip_ipsec_filtertunnel(m))
+		goto passin;
+#endif /* IPSEC */
+
+	/*
+	 * Run through list of hooks for input packets.
+	 *
+	 * NB: Beware of the destination address changing (e.g.
+	 *     by NAT rewriting).  When this happens, tell
+	 *     ip_forward to do the right thing.
+	 */
+
+	/* Jump over all PFIL processing if hooks are not active. */
+	if (!PFIL_HOOKED(&V_inet_pfil_hook))
+		goto passin;
+
+	odst = ip->ip_dst;
+	if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, NULL) != 0)
+		return;
+	if (m == NULL)			/* consumed by filter */
+		return;
+
+	ip = mtod(m, struct ip *);
+	dchg = (odst.s_addr != ip->ip_dst.s_addr);
+	ifp = m->m_pkthdr.rcvif;
+
+#ifdef IPFIREWALL_FORWARD
+	if (m->m_flags & M_FASTFWD_OURS) {
+		m->m_flags &= ~M_FASTFWD_OURS;
+		goto ours;
+	}
+	if ((dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL)) != 0) {
+		/*
+		 * Directly ship the packet on.  This allows forwarding
+		 * packets originally destined to us to some other directly
+		 * connected host.
+		 */
+		ip_forward(m, dchg);
+		return;
+	}
+#endif /* IPFIREWALL_FORWARD */
+
+passin:
+	/*
+	 * Process options and, if not destined for us,
+	 * ship it on.  ip_dooptions returns 1 when an
+	 * error was detected (causing an icmp message
+	 * to be sent and the original packet to be freed).
+	 */
+	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0))
+		return;
+
+        /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
+         * matter if it is destined to another node, or whether it is 
+         * a multicast one, RSVP wants it! and prevents it from being forwarded
+         * anywhere else. Also checks if the rsvp daemon is running before
+	 * grabbing the packet.
+         */
+	if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP) 
+		goto ours;
+
+	/*
+	 * Check our list of addresses, to see if the packet is for us.
+	 * If we don't have any addresses, assume any unicast packet
+	 * we receive might be for us (and let the upper layers deal
+	 * with it).
+	 */
+	if (TAILQ_EMPTY(&V_in_ifaddrhead) &&
+	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
+		goto ours;
+
+	/*
+	 * Enable a consistency check between the destination address
+	 * and the arrival interface for a unicast packet (the RFC 1122
+	 * strong ES model) if IP forwarding is disabled and the packet
+	 * is not locally generated and the packet is not subject to
+	 * 'ipfw fwd'.
+	 *
+	 * XXX - Checking also should be disabled if the destination
+	 * address is ipnat'ed to a different interface.
+	 *
+	 * XXX - Checking is incompatible with IP aliases added
+	 * to the loopback interface instead of the interface where
+	 * the packets are received.
+	 *
+	 * XXX - This is the case for carp vhost IPs as well so we
+	 * insert a workaround. If the packet got here, we already
+	 * checked with carp_iamatch() and carp_forus().
+	 */
+	checkif = V_ip_checkinterface && (V_ipforwarding == 0) && 
+	    ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) &&
+	    ifp->if_carp == NULL && (dchg == 0);
+
+	/*
+	 * Check for exact addresses in the hash bucket.
+	 */
+	/* IN_IFADDR_RLOCK(); */
+	LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
+		/*
+		 * If the address matches, verify that the packet
+		 * arrived via the correct interface if checking is
+		 * enabled.
+		 */
+		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && 
+		    (!checkif || ia->ia_ifp == ifp)) {
+			ifa_ref(&ia->ia_ifa);
+			/* IN_IFADDR_RUNLOCK(); */
+			goto ours;
+		}
+	}
+	/* IN_IFADDR_RUNLOCK(); */
+
+	/*
+	 * Check for broadcast addresses.
+	 *
+	 * Only accept broadcast packets that arrive via the matching
+	 * interface.  Reception of forwarded directed broadcasts would
+	 * be handled via ip_forward() and ether_output() with the loopback
+	 * into the stack for SIMPLEX interfaces handled by ether_output().
+	 */
+	if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) {
+		IF_ADDR_LOCK(ifp);
+	        TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+			if (ifa->ifa_addr->sa_family != AF_INET)
+				continue;
+			ia = ifatoia(ifa);
+			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
+			    ip->ip_dst.s_addr) {
+				ifa_ref(ifa);
+				IF_ADDR_UNLOCK(ifp);
+				goto ours;
+			}
+			if (ia->ia_netbroadcast.s_addr == ip->ip_dst.s_addr) {
+				ifa_ref(ifa);
+				IF_ADDR_UNLOCK(ifp);
+				goto ours;
+			}
+#ifdef BOOTP_COMPAT
+			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) {
+				ifa_ref(ifa);
+				IF_ADDR_UNLOCK(ifp);
+				goto ours;
+			}
+#endif
+		}
+		IF_ADDR_UNLOCK(ifp);
+		ia = NULL;
+	}
+	/* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */
+	if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
+		IPSTAT_INC(ips_cantforward);
+		m_freem(m);
+		return;
+	}
+	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
+		if (V_ip_mrouter) {
+			/*
+			 * If we are acting as a multicast router, all
+			 * incoming multicast packets are passed to the
+			 * kernel-level multicast forwarding function.
+			 * The packet is returned (relatively) intact; if
+			 * ip_mforward() returns a non-zero value, the packet
+			 * must be discarded, else it may be accepted below.
+			 */
+			if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) {
+				IPSTAT_INC(ips_cantforward);
+				m_freem(m);
+				return;
+			}
+
+			/*
+			 * The process-level routing daemon needs to receive
+			 * all multicast IGMP packets, whether or not this
+			 * host belongs to their destination groups.
+			 */
+			if (ip->ip_p == IPPROTO_IGMP)
+				goto ours;
+			IPSTAT_INC(ips_forward);
+		}
+		/*
+		 * Assume the packet is for us, to avoid prematurely taking
+		 * a lock on the in_multi hash. Protocols must perform
+		 * their own filtering and update statistics accordingly.
+		 */
+		goto ours;
+	}
+	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
+		goto ours;
+	if (ip->ip_dst.s_addr == INADDR_ANY)
+		goto ours;
+
+	/*
+	 * FAITH(Firewall Aided Internet Translator)
+	 */
+	if (ifp && ifp->if_type == IFT_FAITH) {
+		if (V_ip_keepfaith) {
+			if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP) 
+				goto ours;
+		}
+		m_freem(m);
+		return;
+	}
+
+	/*
+	 * Not for us; forward if possible and desirable.
+	 */
+	if (V_ipforwarding == 0) {
+		IPSTAT_INC(ips_cantforward);
+		m_freem(m);
+	} else {
+#ifdef IPSEC
+		if (ip_ipsec_fwd(m))
+			goto bad;
+#endif /* IPSEC */
+		ip_forward(m, dchg);
+	}
+	return;
+
+ours:
+#ifdef IPSTEALTH
+	/*
+	 * IPSTEALTH: Process non-routing options only
+	 * if the packet is destined for us.
+	 */
+	if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1)) {
+		if (ia != NULL)
+			ifa_free(&ia->ia_ifa);
+		return;
+	}
+#endif /* IPSTEALTH */
+
+	/* Count the packet in the ip address stats */
+	if (ia != NULL) {
+		ia->ia_ifa.if_ipackets++;
+		ia->ia_ifa.if_ibytes += m->m_pkthdr.len;
+		ifa_free(&ia->ia_ifa);
+	}
+
+	/*
+	 * Attempt reassembly; if it succeeds, proceed.
+	 * ip_reass() will return a different mbuf.
+	 */
+	if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
+		m = ip_reass(m);
+		if (m == NULL)
+			return;
+		ip = mtod(m, struct ip *);
+		/* Get the header length of the reassembled packet */
+		hlen = ip->ip_hl << 2;
+	}
+
+	/*
+	 * Further protocols expect the packet length to be w/o the
+	 * IP header.
+	 */
+	ip->ip_len -= hlen;
+
+#ifdef IPSEC
+	/*
+	 * enforce IPsec policy checking if we are seeing last header.
+	 * note that we do not visit this with protocols with pcb layer
+	 * code - like udp/tcp/raw ip.
+	 */
+	if (ip_ipsec_input(m))
+		goto bad;
+#endif /* IPSEC */
+
+	/*
+	 * Switch out to protocol's input routine.
+	 */
+	IPSTAT_INC(ips_delivered);
+
+	(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
+	return;
+bad:
+	m_freem(m);
+}
+
+/*
+ * After maxnipq has been updated, propagate the change to UMA.  The UMA zone
+ * max has slightly different semantics than the sysctl, for historical
+ * reasons.
+ */
+static void
+maxnipq_update(void)
+{
+
+	/*
+	 * -1 for unlimited allocation.
+	 */
+	if (V_maxnipq < 0)
+		uma_zone_set_max(V_ipq_zone, 0);
+	/*
+	 * Positive number for specific bound.
+	 */
+	if (V_maxnipq > 0)
+		uma_zone_set_max(V_ipq_zone, V_maxnipq);
+	/*
+	 * Zero specifies no further fragment queue allocation -- set the
+	 * bound very low, but rely on implementation elsewhere to actually
+	 * prevent allocation and reclaim current queues.
+	 */
+	if (V_maxnipq == 0)
+		uma_zone_set_max(V_ipq_zone, 1);
+}
+
+static void
+ipq_zone_change(void *tag)
+{
+
+	if (V_maxnipq > 0 && V_maxnipq < (nmbclusters / 32)) {
+		V_maxnipq = nmbclusters / 32;
+		maxnipq_update();
+	}
+}
+
+static int
+sysctl_maxnipq(SYSCTL_HANDLER_ARGS)
+{
+	int error, i;
+
+	i = V_maxnipq;
+	error = sysctl_handle_int(oidp, &i, 0, req);
+	if (error || !req->newptr)
+		return (error);
+
+	/*
+	 * XXXRW: Might be a good idea to sanity check the argument and place
+	 * an extreme upper bound.
+	 */
+	if (i < -1)
+		return (EINVAL);
+	V_maxnipq = i;
+	maxnipq_update();
+	return (0);
+}
+
+SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets, CTLTYPE_INT|CTLFLAG_RW,
+    NULL, 0, sysctl_maxnipq, "I",
+    "Maximum number of IPv4 fragment reassembly queue entries");
+
+/*
+ * Take incoming datagram fragment and try to reassemble it into
+ * whole datagram.  If the argument is the first fragment or one
+ * in between the function will return NULL and store the mbuf
+ * in the fragment chain.  If the argument is the last fragment
+ * the packet will be reassembled and the pointer to the new
+ * mbuf returned for further processing.  Only m_tags attached
+ * to the first packet/fragment are preserved.
+ * The IP header is *NOT* adjusted out of iplen.
+ */
+struct mbuf *
+ip_reass(struct mbuf *m)
+{
+	struct ip *ip;
+	struct mbuf *p, *q, *nq, *t;
+	struct ipq *fp = NULL;
+	struct ipqhead *head;
+	int i, hlen, next;
+	u_int8_t ecn, ecn0;
+	u_short hash;
+
+	/* If maxnipq or maxfragsperpacket are 0, never accept fragments. */
+	if (V_maxnipq == 0 || V_maxfragsperpacket == 0) {
+		IPSTAT_INC(ips_fragments);
+		IPSTAT_INC(ips_fragdropped);
+		m_freem(m);
+		return (NULL);
+	}
+
+	ip = mtod(m, struct ip *);
+	hlen = ip->ip_hl << 2;
+
+	hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
+	head = &V_ipq[hash];
+	IPQ_LOCK();
+
+	/*
+	 * Look for queue of fragments
+	 * of this datagram.
+	 */
+	TAILQ_FOREACH(fp, head, ipq_list)
+		if (ip->ip_id == fp->ipq_id &&
+		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
+		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
+#ifdef MAC
+		    mac_ipq_match(m, fp) &&
+#endif
+		    ip->ip_p == fp->ipq_p)
+			goto found;
+
+	fp = NULL;
+
+	/*
+	 * Attempt to trim the number of allocated fragment queues if it
+	 * exceeds the administrative limit.
+	 */
+	if ((V_nipq > V_maxnipq) && (V_maxnipq > 0)) {
+		/*
+		 * drop something from the tail of the current queue
+		 * before proceeding further
+		 */
+		struct ipq *q = TAILQ_LAST(head, ipqhead);
+		if (q == NULL) {   /* gak */
+			for (i = 0; i < IPREASS_NHASH; i++) {
+				struct ipq *r = TAILQ_LAST(&V_ipq[i], ipqhead);
+				if (r) {
+					IPSTAT_ADD(ips_fragtimeout,
+					    r->ipq_nfrags);
+					ip_freef(&V_ipq[i], r);
+					break;
+				}
+			}
+		} else {
+			IPSTAT_ADD(ips_fragtimeout, q->ipq_nfrags);
+			ip_freef(head, q);
+		}
+	}
+
+found:
+	/*
+	 * Adjust ip_len to not reflect header,
+	 * convert offset of this to bytes.
+	 */
+	ip->ip_len -= hlen;
+	if (ip->ip_off & IP_MF) {
+		/*
+		 * Make sure that fragments have a data length
+		 * that's a non-zero multiple of 8 bytes.
+		 */
+		if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
+			IPSTAT_INC(ips_toosmall); /* XXX */
+			goto dropfrag;
+		}
+		m->m_flags |= M_FRAG;
+	} else
+		m->m_flags &= ~M_FRAG;
+	ip->ip_off <<= 3;
+
+
+	/*
+	 * Attempt reassembly; if it succeeds, proceed.
+	 * ip_reass() will return a different mbuf.
+	 */
+	IPSTAT_INC(ips_fragments);
+	m->m_pkthdr.header = ip;
+
+	/* Previous ip_reass() started here. */
+	/*
+	 * Presence of header sizes in mbufs
+	 * would confuse code below.
+	 */
+	m->m_data += hlen;
+	m->m_len -= hlen;
+
+	/*
+	 * If first fragment to arrive, create a reassembly queue.
+	 */
+	if (fp == NULL) {
+		fp = uma_zalloc(V_ipq_zone, M_NOWAIT);
+		if (fp == NULL)
+			goto dropfrag;
+#ifdef MAC
+		if (mac_ipq_init(fp, M_NOWAIT) != 0) {
+			uma_zfree(V_ipq_zone, fp);
+			fp = NULL;
+			goto dropfrag;
+		}
+		mac_ipq_create(m, fp);
+#endif
+		TAILQ_INSERT_HEAD(head, fp, ipq_list);
+		V_nipq++;
+		fp->ipq_nfrags = 1;
+		fp->ipq_ttl = IPFRAGTTL;
+		fp->ipq_p = ip->ip_p;
+		fp->ipq_id = ip->ip_id;
+		fp->ipq_src = ip->ip_src;
+		fp->ipq_dst = ip->ip_dst;
+		fp->ipq_frags = m;
+		m->m_nextpkt = NULL;
+		goto done;
+	} else {
+		fp->ipq_nfrags++;
+#ifdef MAC
+		mac_ipq_update(m, fp);
+#endif
+	}
+
+#define GETIP(m)	((struct ip*)((m)->m_pkthdr.header))
+
+	/*
+	 * Handle ECN by comparing this segment with the first one;
+	 * if CE is set, do not lose CE.
+	 * drop if CE and not-ECT are mixed for the same packet.
+	 */
+	ecn = ip->ip_tos & IPTOS_ECN_MASK;
+	ecn0 = GETIP(fp->ipq_frags)->ip_tos & IPTOS_ECN_MASK;
+	if (ecn == IPTOS_ECN_CE) {
+		if (ecn0 == IPTOS_ECN_NOTECT)
+			goto dropfrag;
+		if (ecn0 != IPTOS_ECN_CE)
+			GETIP(fp->ipq_frags)->ip_tos |= IPTOS_ECN_CE;
+	}
+	if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT)
+		goto dropfrag;
+
+	/*
+	 * Find a segment which begins after this one does.
+	 */
+	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
+		if (GETIP(q)->ip_off > ip->ip_off)
+			break;
+
+	/*
+	 * If there is a preceding segment, it may provide some of
+	 * our data already.  If so, drop the data from the incoming
+	 * segment.  If it provides all of our data, drop us, otherwise
+	 * stick new segment in the proper place.
+	 *
+	 * If some of the data is dropped from the the preceding
+	 * segment, then it's checksum is invalidated.
+	 */
+	if (p) {
+		i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
+		if (i > 0) {
+			if (i >= ip->ip_len)
+				goto dropfrag;
+			m_adj(m, i);
+			m->m_pkthdr.csum_flags = 0;
+			ip->ip_off += i;
+			ip->ip_len -= i;
+		}
+		m->m_nextpkt = p->m_nextpkt;
+		p->m_nextpkt = m;
+	} else {
+		m->m_nextpkt = fp->ipq_frags;
+		fp->ipq_frags = m;
+	}
+
+	/*
+	 * While we overlap succeeding segments trim them or,
+	 * if they are completely covered, dequeue them.
+	 */
+	for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
+	     q = nq) {
+		i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off;
+		if (i < GETIP(q)->ip_len) {
+			GETIP(q)->ip_len -= i;
+			GETIP(q)->ip_off += i;
+			m_adj(q, i);
+			q->m_pkthdr.csum_flags = 0;
+			break;
+		}
+		nq = q->m_nextpkt;
+		m->m_nextpkt = nq;
+		IPSTAT_INC(ips_fragdropped);
+		fp->ipq_nfrags--;
+		m_freem(q);
+	}
+
+	/*
+	 * Check for complete reassembly and perform frag per packet
+	 * limiting.
+	 *
+	 * Frag limiting is performed here so that the nth frag has
+	 * a chance to complete the packet before we drop the packet.
+	 * As a result, n+1 frags are actually allowed per packet, but
+	 * only n will ever be stored. (n = maxfragsperpacket.)
+	 *
+	 */
+	next = 0;
+	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
+		if (GETIP(q)->ip_off != next) {
+			if (fp->ipq_nfrags > V_maxfragsperpacket) {
+				IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
+				ip_freef(head, fp);
+			}
+			goto done;
+		}
+		next += GETIP(q)->ip_len;
+	}
+	/* Make sure the last packet didn't have the IP_MF flag */
+	if (p->m_flags & M_FRAG) {
+		if (fp->ipq_nfrags > V_maxfragsperpacket) {
+			IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
+			ip_freef(head, fp);
+		}
+		goto done;
+	}
+
+	/*
+	 * Reassembly is complete.  Make sure the packet is a sane size.
+	 */
+	q = fp->ipq_frags;
+	ip = GETIP(q);
+	if (next + (ip->ip_hl << 2) > IP_MAXPACKET) {
+		IPSTAT_INC(ips_toolong);
+		IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
+		ip_freef(head, fp);
+		goto done;
+	}
+
+	/*
+	 * Concatenate fragments.
+	 */
+	m = q;
+	t = m->m_next;
+	m->m_next = NULL;
+	m_cat(m, t);
+	nq = q->m_nextpkt;
+	q->m_nextpkt = NULL;
+	for (q = nq; q != NULL; q = nq) {
+		nq = q->m_nextpkt;
+		q->m_nextpkt = NULL;
+		m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
+		m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
+		m_cat(m, q);
+	}
+	/*
+	 * In order to do checksumming faster we do 'end-around carry' here
+	 * (and not in for{} loop), though it implies we are not going to
+	 * reassemble more than 64k fragments.
+	 */
+	m->m_pkthdr.csum_data =
+	    (m->m_pkthdr.csum_data & 0xffff) + (m->m_pkthdr.csum_data >> 16);
+#ifdef MAC
+	mac_ipq_reassemble(fp, m);
+	mac_ipq_destroy(fp);
+#endif
+
+	/*
+	 * Create header for new ip packet by modifying header of first
+	 * packet;  dequeue and discard fragment reassembly header.
+	 * Make header visible.
+	 */
+	ip->ip_len = (ip->ip_hl << 2) + next;
+	ip->ip_src = fp->ipq_src;
+	ip->ip_dst = fp->ipq_dst;
+	TAILQ_REMOVE(head, fp, ipq_list);
+	V_nipq--;
+	uma_zfree(V_ipq_zone, fp);
+	m->m_len += (ip->ip_hl << 2);
+	m->m_data -= (ip->ip_hl << 2);
+	/* some debugging cruft by sklower, below, will go away soon */
+	if (m->m_flags & M_PKTHDR)	/* XXX this should be done elsewhere */
+		m_fixhdr(m);
+	IPSTAT_INC(ips_reassembled);
+	IPQ_UNLOCK();
+	return (m);
+
+dropfrag:
+	IPSTAT_INC(ips_fragdropped);
+	if (fp != NULL)
+		fp->ipq_nfrags--;
+	m_freem(m);
+done:
+	IPQ_UNLOCK();
+	return (NULL);
+
+#undef GETIP
+}
+
+/*
+ * Free a fragment reassembly header and all
+ * associated datagrams.
+ */
+static void
+ip_freef(struct ipqhead *fhp, struct ipq *fp)
+{
+	struct mbuf *q;
+
+	IPQ_LOCK_ASSERT();
+
+	while (fp->ipq_frags) {
+		q = fp->ipq_frags;
+		fp->ipq_frags = q->m_nextpkt;
+		m_freem(q);
+	}
+	TAILQ_REMOVE(fhp, fp, ipq_list);
+	uma_zfree(V_ipq_zone, fp);
+	V_nipq--;
+}
+
+/*
+ * IP timer processing;
+ * if a timer expires on a reassembly
+ * queue, discard it.
+ */
+void
+ip_slowtimo(void)
+{
+	VNET_ITERATOR_DECL(vnet_iter);
+	struct ipq *fp;
+	int i;
+
+	VNET_LIST_RLOCK_NOSLEEP();
+	IPQ_LOCK();
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter);
+		for (i = 0; i < IPREASS_NHASH; i++) {
+			for(fp = TAILQ_FIRST(&V_ipq[i]); fp;) {
+				struct ipq *fpp;
+
+				fpp = fp;
+				fp = TAILQ_NEXT(fp, ipq_list);
+				if(--fpp->ipq_ttl == 0) {
+					IPSTAT_ADD(ips_fragtimeout,
+					    fpp->ipq_nfrags);
+					ip_freef(&V_ipq[i], fpp);
+				}
+			}
+		}
+		/*
+		 * If we are over the maximum number of fragments
+		 * (due to the limit being lowered), drain off
+		 * enough to get down to the new limit.
+		 */
+		if (V_maxnipq >= 0 && V_nipq > V_maxnipq) {
+			for (i = 0; i < IPREASS_NHASH; i++) {
+				while (V_nipq > V_maxnipq &&
+				    !TAILQ_EMPTY(&V_ipq[i])) {
+					IPSTAT_ADD(ips_fragdropped,
+					    TAILQ_FIRST(&V_ipq[i])->ipq_nfrags);
+					ip_freef(&V_ipq[i],
+					    TAILQ_FIRST(&V_ipq[i]));
+				}
+			}
+		}
+		CURVNET_RESTORE();
+	}
+	IPQ_UNLOCK();
+	VNET_LIST_RUNLOCK_NOSLEEP();
+}
+
+/*
+ * Drain off all datagram fragments.
+ */
+static void
+ip_drain_locked(void)
+{
+	int     i;
+
+	IPQ_LOCK_ASSERT();
+
+	for (i = 0; i < IPREASS_NHASH; i++) {
+		while(!TAILQ_EMPTY(&V_ipq[i])) {
+			IPSTAT_ADD(ips_fragdropped,
+			    TAILQ_FIRST(&V_ipq[i])->ipq_nfrags);
+			ip_freef(&V_ipq[i], TAILQ_FIRST(&V_ipq[i]));
+		}
+	}
+}
+
+void
+ip_drain(void)
+{
+	VNET_ITERATOR_DECL(vnet_iter);
+
+	VNET_LIST_RLOCK_NOSLEEP();
+	IPQ_LOCK();
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter);
+		ip_drain_locked();
+		CURVNET_RESTORE();
+	}
+	IPQ_UNLOCK();
+	VNET_LIST_RUNLOCK_NOSLEEP();
+	in_rtqdrain();
+}
+
+/*
+ * The protocol to be inserted into ip_protox[] must be already registered
+ * in inetsw[], either statically or through pf_proto_register().
+ */
+int
+ipproto_register(short ipproto)
+{
+	struct protosw *pr;
+
+	/* Sanity checks. */
+	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
+		return (EPROTONOSUPPORT);
+
+	/*
+	 * The protocol slot must not be occupied by another protocol
+	 * already.  An index pointing to IPPROTO_RAW is unused.
+	 */
+	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
+	if (pr == NULL)
+		return (EPFNOSUPPORT);
+	if (ip_protox[ipproto] != pr - inetsw)	/* IPPROTO_RAW */
+		return (EEXIST);
+
+	/* Find the protocol position in inetsw[] and set the index. */
+	for (pr = inetdomain.dom_protosw;
+	     pr < inetdomain.dom_protoswNPROTOSW; pr++) {
+		if (pr->pr_domain->dom_family == PF_INET &&
+		    pr->pr_protocol && pr->pr_protocol == ipproto) {
+			ip_protox[pr->pr_protocol] = pr - inetsw;
+			return (0);
+		}
+	}
+	return (EPROTONOSUPPORT);
+}
+
+int
+ipproto_unregister(short ipproto)
+{
+	struct protosw *pr;
+
+	/* Sanity checks. */
+	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
+		return (EPROTONOSUPPORT);
+
+	/* Check if the protocol was indeed registered. */
+	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
+	if (pr == NULL)
+		return (EPFNOSUPPORT);
+	if (ip_protox[ipproto] == pr - inetsw)  /* IPPROTO_RAW */
+		return (ENOENT);
+
+	/* Reset the protocol slot to IPPROTO_RAW. */
+	ip_protox[ipproto] = pr - inetsw;
+	return (0);
+}
+
+/*
+ * Given address of next destination (final or next hop), return (referenced)
+ * internet address info of interface to be used to get there.
+ */
+struct in_ifaddr *
+ip_rtaddr(struct in_addr dst, u_int fibnum)
+{
+	struct route sro;
+	struct sockaddr_in *sin;
+	struct in_ifaddr *ia;
+
+	bzero(&sro, sizeof(sro));
+	sin = (struct sockaddr_in *)&sro.ro_dst;
+	sin->sin_family = AF_INET;
+	sin->sin_len = sizeof(*sin);
+	sin->sin_addr = dst;
+	in_rtalloc_ign(&sro, 0, fibnum);
+
+	if (sro.ro_rt == NULL)
+		return (NULL);
+
+	ia = ifatoia(sro.ro_rt->rt_ifa);
+	ifa_ref(&ia->ia_ifa);
+	RTFREE(sro.ro_rt);
+	return (ia);
+}
+
+u_char inetctlerrmap[PRC_NCMDS] = {
+	0,		0,		0,		0,
+	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
+	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
+	EMSGSIZE,	EHOSTUNREACH,	0,		0,
+	0,		0,		EHOSTUNREACH,	0,
+	ENOPROTOOPT,	ECONNREFUSED
+};
+
+/*
+ * Forward a packet.  If some error occurs return the sender
+ * an icmp packet.  Note we can't always generate a meaningful
+ * icmp message because icmp doesn't have a large enough repertoire
+ * of codes and types.
+ *
+ * If not forwarding, just drop the packet.  This could be confusing
+ * if ipforwarding was zero but some routing protocol was advancing
+ * us as a gateway to somewhere.  However, we must let the routing
+ * protocol deal with that.
+ *
+ * The srcrt parameter indicates whether the packet is being forwarded
+ * via a source route.
+ */
+void
+ip_forward(struct mbuf *m, int srcrt)
+{
+	struct ip *ip = mtod(m, struct ip *);
+	struct in_ifaddr *ia;
+	struct mbuf *mcopy;
+	struct in_addr dest;
+	struct route ro;
+	int error, type = 0, code = 0, mtu = 0;
+
+	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
+		IPSTAT_INC(ips_cantforward);
+		m_freem(m);
+		return;
+	}
+#ifdef IPSTEALTH
+	if (!V_ipstealth) {
+#endif
+		if (ip->ip_ttl <= IPTTLDEC) {
+			icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
+			    0, 0);
+			return;
+		}
+#ifdef IPSTEALTH
+	}
+#endif
+
+	ia = ip_rtaddr(ip->ip_dst, M_GETFIB(m));
+#ifndef IPSEC
+	/*
+	 * 'ia' may be NULL if there is no route for this destination.
+	 * In case of IPsec, Don't discard it just yet, but pass it to
+	 * ip_output in case of outgoing IPsec policy.
+	 */
+	if (!srcrt && ia == NULL) {
+		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
+		return;
+	}
+#endif
+
+	/*
+	 * Save the IP header and at most 8 bytes of the payload,
+	 * in case we need to generate an ICMP message to the src.
+	 *
+	 * XXX this can be optimized a lot by saving the data in a local
+	 * buffer on the stack (72 bytes at most), and only allocating the
+	 * mbuf if really necessary. The vast majority of the packets
+	 * are forwarded without having to send an ICMP back (either
+	 * because unnecessary, or because rate limited), so we are
+	 * really we are wasting a lot of work here.
+	 *
+	 * We don't use m_copy() because it might return a reference
+	 * to a shared cluster. Both this function and ip_output()
+	 * assume exclusive access to the IP header in `m', so any
+	 * data in a cluster may change before we reach icmp_error().
+	 */
+	MGETHDR(mcopy, M_DONTWAIT, m->m_type);
+	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) {
+		/*
+		 * It's probably ok if the pkthdr dup fails (because
+		 * the deep copy of the tag chain failed), but for now
+		 * be conservative and just discard the copy since
+		 * code below may some day want the tags.
+		 */
+		m_free(mcopy);
+		mcopy = NULL;
+	}
+	if (mcopy != NULL) {
+		mcopy->m_len = min(ip->ip_len, M_TRAILINGSPACE(mcopy));
+		mcopy->m_pkthdr.len = mcopy->m_len;
+		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
+	}
+
+#ifdef IPSTEALTH
+	if (!V_ipstealth) {
+#endif
+		ip->ip_ttl -= IPTTLDEC;
+#ifdef IPSTEALTH
+	}
+#endif
+
+	/*
+	 * If forwarding packet using same interface that it came in on,
+	 * perhaps should send a redirect to sender to shortcut a hop.
+	 * Only send redirect if source is sending directly to us,
+	 * and if packet was not source routed (or has any options).
+	 * Also, don't send redirect if forwarding using a default route
+	 * or a route modified by a redirect.
+	 */
+	dest.s_addr = 0;
+	if (!srcrt && V_ipsendredirects &&
+	    ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
+		struct sockaddr_in *sin;
+		struct rtentry *rt;
+
+		bzero(&ro, sizeof(ro));
+		sin = (struct sockaddr_in *)&ro.ro_dst;
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(*sin);
+		sin->sin_addr = ip->ip_dst;
+		in_rtalloc_ign(&ro, 0, M_GETFIB(m));
+
+		rt = ro.ro_rt;
+
+		if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
+		    satosin(rt_key(rt))->sin_addr.s_addr != 0) {
+#define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
+			u_long src = ntohl(ip->ip_src.s_addr);
+
+			if (RTA(rt) &&
+			    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
+				if (rt->rt_flags & RTF_GATEWAY)
+					dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr;
+				else
+					dest.s_addr = ip->ip_dst.s_addr;
+				/* Router requirements says to only send host redirects */
+				type = ICMP_REDIRECT;
+				code = ICMP_REDIRECT_HOST;
+			}
+		}
+		if (rt)
+			RTFREE(rt);
+	}
+
+	/*
+	 * Try to cache the route MTU from ip_output so we can consider it for
+	 * the ICMP_UNREACH_NEEDFRAG "Next-Hop MTU" field described in RFC1191.
+	 */
+	bzero(&ro, sizeof(ro));
+
+	error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
+
+	if (error == EMSGSIZE && ro.ro_rt)
+		mtu = ro.ro_rt->rt_rmx.rmx_mtu;
+	if (ro.ro_rt)
+		RTFREE(ro.ro_rt);
+
+	if (error)
+		IPSTAT_INC(ips_cantforward);
+	else {
+		IPSTAT_INC(ips_forward);
+		if (type)
+			IPSTAT_INC(ips_redirectsent);
+		else {
+			if (mcopy)
+				m_freem(mcopy);
+			if (ia != NULL)
+				ifa_free(&ia->ia_ifa);
+			return;
+		}
+	}
+	if (mcopy == NULL) {
+		if (ia != NULL)
+			ifa_free(&ia->ia_ifa);
+		return;
+	}
+
+	switch (error) {
+
+	case 0:				/* forwarded, but need redirect */
+		/* type, code set above */
+		break;
+
+	case ENETUNREACH:
+	case EHOSTUNREACH:
+	case ENETDOWN:
+	case EHOSTDOWN:
+	default:
+		type = ICMP_UNREACH;
+		code = ICMP_UNREACH_HOST;
+		break;
+
+	case EMSGSIZE:
+		type = ICMP_UNREACH;
+		code = ICMP_UNREACH_NEEDFRAG;
+
+#ifdef IPSEC
+		/* 
+		 * If IPsec is configured for this path,
+		 * override any possibly mtu value set by ip_output.
+		 */ 
+		mtu = ip_ipsec_mtu(mcopy, mtu);
+#endif /* IPSEC */
+		/*
+		 * If the MTU was set before make sure we are below the
+		 * interface MTU.
+		 * If the MTU wasn't set before use the interface mtu or
+		 * fall back to the next smaller mtu step compared to the
+		 * current packet size.
+		 */
+		if (mtu != 0) {
+			if (ia != NULL)
+				mtu = min(mtu, ia->ia_ifp->if_mtu);
+		} else {
+			if (ia != NULL)
+				mtu = ia->ia_ifp->if_mtu;
+			else
+				mtu = ip_next_mtu(ip->ip_len, 0);
+		}
+		IPSTAT_INC(ips_cantfrag);
+		break;
+
+	case ENOBUFS:
+		/*
+		 * A router should not generate ICMP_SOURCEQUENCH as
+		 * required in RFC1812 Requirements for IP Version 4 Routers.
+		 * Source quench could be a big problem under DoS attacks,
+		 * or if the underlying interface is rate-limited.
+		 * Those who need source quench packets may re-enable them
+		 * via the net.inet.ip.sendsourcequench sysctl.
+		 */
+		if (V_ip_sendsourcequench == 0) {
+			m_freem(mcopy);
+			if (ia != NULL)
+				ifa_free(&ia->ia_ifa);
+			return;
+		} else {
+			type = ICMP_SOURCEQUENCH;
+			code = 0;
+		}
+		break;
+
+	case EACCES:			/* ipfw denied packet */
+		m_freem(mcopy);
+		if (ia != NULL)
+			ifa_free(&ia->ia_ifa);
+		return;
+	}
+	if (ia != NULL)
+		ifa_free(&ia->ia_ifa);
+	icmp_error(mcopy, type, code, dest.s_addr, mtu);
+}
+
+void
+ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
+    struct mbuf *m)
+{
+
+	if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) {
+		struct bintime bt;
+
+		bintime(&bt);
+		if (inp->inp_socket->so_options & SO_BINTIME) {
+			*mp = sbcreatecontrol((caddr_t) &bt, sizeof(bt),
+			SCM_BINTIME, SOL_SOCKET);
+			if (*mp)
+				mp = &(*mp)->m_next;
+		}
+		if (inp->inp_socket->so_options & SO_TIMESTAMP) {
+			struct timeval tv;
+
+			bintime2timeval(&bt, &tv);
+			*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
+				SCM_TIMESTAMP, SOL_SOCKET);
+			if (*mp)
+				mp = &(*mp)->m_next;
+		}
+	}
+	if (inp->inp_flags & INP_RECVDSTADDR) {
+		*mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
+		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
+		if (*mp)
+			mp = &(*mp)->m_next;
+	}
+	if (inp->inp_flags & INP_RECVTTL) {
+		*mp = sbcreatecontrol((caddr_t) &ip->ip_ttl,
+		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
+		if (*mp)
+			mp = &(*mp)->m_next;
+	}
+#ifdef notyet
+	/* XXX
+	 * Moving these out of udp_input() made them even more broken
+	 * than they already were.
+	 */
+	/* options were tossed already */
+	if (inp->inp_flags & INP_RECVOPTS) {
+		*mp = sbcreatecontrol((caddr_t) opts_deleted_above,
+		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
+		if (*mp)
+			mp = &(*mp)->m_next;
+	}
+	/* ip_srcroute doesn't do what we want here, need to fix */
+	if (inp->inp_flags & INP_RECVRETOPTS) {
+		*mp = sbcreatecontrol((caddr_t) ip_srcroute(m),
+		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
+		if (*mp)
+			mp = &(*mp)->m_next;
+	}
+#endif
+	if (inp->inp_flags & INP_RECVIF) {
+		struct ifnet *ifp;
+		struct sdlbuf {
+			struct sockaddr_dl sdl;
+			u_char	pad[32];
+		} sdlbuf;
+		struct sockaddr_dl *sdp;
+		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
+
+		if (((ifp = m->m_pkthdr.rcvif)) 
+		&& ( ifp->if_index && (ifp->if_index <= V_if_index))) {
+			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
+			/*
+			 * Change our mind and don't try copy.
+			 */
+			if ((sdp->sdl_family != AF_LINK)
+			|| (sdp->sdl_len > sizeof(sdlbuf))) {
+				goto makedummy;
+			}
+			bcopy(sdp, sdl2, sdp->sdl_len);
+		} else {
+makedummy:	
+			sdl2->sdl_len
+				= offsetof(struct sockaddr_dl, sdl_data[0]);
+			sdl2->sdl_family = AF_LINK;
+			sdl2->sdl_index = 0;
+			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
+		}
+		*mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
+			IP_RECVIF, IPPROTO_IP);
+		if (*mp)
+			mp = &(*mp)->m_next;
+	}
+}
+
+/*
+ * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the
+ * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on
+ * locking.  This code remains in ip_input.c as ip_mroute.c is optionally
+ * compiled.
+ */
+static VNET_DEFINE(int, ip_rsvp_on);
+VNET_DEFINE(struct socket *, ip_rsvpd);
+
+#define	V_ip_rsvp_on		VNET(ip_rsvp_on)
+
+int
+ip_rsvp_init(struct socket *so)
+{
+
+	if (so->so_type != SOCK_RAW ||
+	    so->so_proto->pr_protocol != IPPROTO_RSVP)
+		return EOPNOTSUPP;
+
+	if (V_ip_rsvpd != NULL)
+		return EADDRINUSE;
+
+	V_ip_rsvpd = so;
+	/*
+	 * This may seem silly, but we need to be sure we don't over-increment
+	 * the RSVP counter, in case something slips up.
+	 */
+	if (!V_ip_rsvp_on) {
+		V_ip_rsvp_on = 1;
+		V_rsvp_on++;
+	}
+
+	return 0;
+}
+
+int
+ip_rsvp_done(void)
+{
+
+	V_ip_rsvpd = NULL;
+	/*
+	 * This may seem silly, but we need to be sure we don't over-decrement
+	 * the RSVP counter, in case something slips up.
+	 */
+	if (V_ip_rsvp_on) {
+		V_ip_rsvp_on = 0;
+		V_rsvp_on--;
+	}
+	return 0;
+}
+
+void
+rsvp_input(struct mbuf *m, int off)	/* XXX must fixup manually */
+{
+
+	if (rsvp_input_p) { /* call the real one if loaded */
+		rsvp_input_p(m, off);
+		return;
+	}
+
+	/* Can still get packets with rsvp_on = 0 if there is a local member
+	 * of the group to which the RSVP packet is addressed.  But in this
+	 * case we want to throw the packet away.
+	 */
+	
+	if (!V_rsvp_on) {
+		m_freem(m);
+		return;
+	}
+
+	if (V_ip_rsvpd != NULL) { 
+		rip_input(m, off);
+		return;
+	}
+	/* Drop the packet */
+	m_freem(m);
+}
diff --git a/freebsd/sys/netinet/ip_ipsec.c b/freebsd/sys/netinet/ip_ipsec.c
new file mode 100644
index 00000000..f19d5e0e
--- /dev/null
+++ b/freebsd/sys/netinet/ip_ipsec.c
@@ -0,0 +1,424 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *      The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_ipsec.h>
+#include <freebsd/local/opt_sctp.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/errno.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/sysctl.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_options.h>
+#include <freebsd/netinet/ip_ipsec.h>
+#ifdef SCTP
+#include <freebsd/netinet/sctp_crc32.h>
+#endif
+
+#include <freebsd/machine/in_cksum.h>
+
+#ifdef IPSEC
+#include <freebsd/netipsec/ipsec.h>
+#include <freebsd/netipsec/xform.h>
+#include <freebsd/netipsec/key.h>
+#endif /*IPSEC*/
+
+extern	struct protosw inetsw[];
+
+#ifdef IPSEC
+#ifdef IPSEC_FILTERTUNNEL
+static VNET_DEFINE(int, ip4_ipsec_filtertunnel) = 1;
+#else
+static VNET_DEFINE(int, ip4_ipsec_filtertunnel) = 0;
+#endif
+#define	V_ip4_ipsec_filtertunnel VNET(ip4_ipsec_filtertunnel)
+
+SYSCTL_DECL(_net_inet_ipsec);
+SYSCTL_VNET_INT(_net_inet_ipsec, OID_AUTO, filtertunnel,
+	CTLFLAG_RW, &VNET_NAME(ip4_ipsec_filtertunnel), 0,
+	"If set filter packets from an IPsec tunnel.");
+#endif /* IPSEC */
+
+/*
+ * Check if we have to jump over firewall processing for this packet.
+ * Called from ip_input().
+ * 1 = jump over firewall, 0 = packet goes through firewall.
+ */
+int
+ip_ipsec_filtertunnel(struct mbuf *m)
+{
+#if defined(IPSEC)
+
+	/*
+	 * Bypass packet filtering for packets from a tunnel.
+	 */
+	if (!V_ip4_ipsec_filtertunnel &&
+	    m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL)
+		return 1;
+#endif
+	return 0;
+}
+
+/*
+ * Check if this packet has an active SA and needs to be dropped instead
+ * of forwarded.
+ * Called from ip_input().
+ * 1 = drop packet, 0 = forward packet.
+ */
+int
+ip_ipsec_fwd(struct mbuf *m)
+{
+#ifdef IPSEC
+	struct m_tag *mtag;
+	struct tdb_ident *tdbi;
+	struct secpolicy *sp;
+	int s, error;
+
+	mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
+	s = splnet();
+	if (mtag != NULL) {
+		tdbi = (struct tdb_ident *)(mtag + 1);
+		sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
+	} else {
+		sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
+					   IP_FORWARDING, &error);   
+	}
+	if (sp == NULL) {	/* NB: can happen if error */
+		splx(s);
+		/*XXX error stat???*/
+		DPRINTF(("ip_input: no SP for forwarding\n"));	/*XXX*/
+		return 1;
+	}
+
+	/*
+	 * Check security policy against packet attributes.
+	 */
+	error = ipsec_in_reject(sp, m);
+	KEY_FREESP(&sp);
+	splx(s);
+	if (error) {
+		IPSTAT_INC(ips_cantforward);
+		return 1;
+	}
+#endif /* IPSEC */
+	return 0;
+}
+
+/*
+ * Check if protocol type doesn't have a further header and do IPSEC
+ * decryption or reject right now.  Protocols with further headers get
+ * their IPSEC treatment within the protocol specific processing.
+ * Called from ip_input().
+ * 1 = drop packet, 0 = continue processing packet.
+ */
+int
+ip_ipsec_input(struct mbuf *m)
+{
+#ifdef IPSEC
+	struct ip *ip = mtod(m, struct ip *);
+	struct m_tag *mtag;
+	struct tdb_ident *tdbi;
+	struct secpolicy *sp;
+	int s, error;
+	/*
+	 * enforce IPsec policy checking if we are seeing last header.
+	 * note that we do not visit this with protocols with pcb layer
+	 * code - like udp/tcp/raw ip.
+	 */
+	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) {
+		/*
+		 * Check if the packet has already had IPsec processing
+		 * done.  If so, then just pass it along.  This tag gets
+		 * set during AH, ESP, etc. input handling, before the
+		 * packet is returned to the ip input queue for delivery.
+		 */ 
+		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
+		s = splnet();
+		if (mtag != NULL) {
+			tdbi = (struct tdb_ident *)(mtag + 1);
+			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
+		} else {
+			sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
+						   IP_FORWARDING, &error);   
+		}
+		if (sp != NULL) {
+			/*
+			 * Check security policy against packet attributes.
+			 */
+			error = ipsec_in_reject(sp, m);
+			KEY_FREESP(&sp);
+		} else {
+			/* XXX error stat??? */
+			error = EINVAL;
+			DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
+			return 1;
+		}
+		splx(s);
+		if (error)
+			return 1;
+	}
+#endif /* IPSEC */
+	return 0;
+}
+
+/*
+ * Compute the MTU for a forwarded packet that gets IPSEC encapsulated.
+ * Called from ip_forward().
+ * Returns MTU suggestion for ICMP needfrag reply.
+ */
+int
+ip_ipsec_mtu(struct mbuf *m, int mtu)
+{
+	/*
+	 * If the packet is routed over IPsec tunnel, tell the
+	 * originator the tunnel MTU.
+	 *	tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
+	 * XXX quickhack!!!
+	 */
+	struct secpolicy *sp = NULL;
+	int ipsecerror;
+	int ipsechdr;
+	struct route *ro;
+	sp = ipsec_getpolicybyaddr(m,
+				   IPSEC_DIR_OUTBOUND,
+				   IP_FORWARDING,
+				   &ipsecerror);
+	if (sp != NULL) {
+		/* count IPsec header size */
+		ipsechdr = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, NULL);
+
+		/*
+		 * find the correct route for outer IPv4
+		 * header, compute tunnel MTU.
+		 */
+		if (sp->req != NULL &&
+		    sp->req->sav != NULL &&
+		    sp->req->sav->sah != NULL) {
+			ro = &sp->req->sav->sah->route_cache.sa_route;
+			if (ro->ro_rt && ro->ro_rt->rt_ifp) {
+				mtu =
+				    ro->ro_rt->rt_rmx.rmx_mtu ?
+				    ro->ro_rt->rt_rmx.rmx_mtu :
+				    ro->ro_rt->rt_ifp->if_mtu;
+				mtu -= ipsechdr;
+			}
+		}
+		KEY_FREESP(&sp);
+	}
+	return mtu;
+}
+
+/*
+ * 
+ * Called from ip_output().
+ * 1 = drop packet, 0 = continue processing packet,
+ * -1 = packet was reinjected and stop processing packet
+ */
+int
+ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error,
+    struct ifnet **ifp)
+{
+#ifdef IPSEC
+	struct secpolicy *sp = NULL;
+	struct ip *ip = mtod(*m, struct ip *);
+	struct tdb_ident *tdbi;
+	struct m_tag *mtag;
+	int s;
+	/*
+	 * Check the security policy (SP) for the packet and, if
+	 * required, do IPsec-related processing.  There are two
+	 * cases here; the first time a packet is sent through
+	 * it will be untagged and handled by ipsec4_checkpolicy.
+	 * If the packet is resubmitted to ip_output (e.g. after
+	 * AH, ESP, etc. processing), there will be a tag to bypass
+	 * the lookup and related policy checking.
+	 */
+	mtag = m_tag_find(*m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
+	s = splnet();
+	if (mtag != NULL) {
+		tdbi = (struct tdb_ident *)(mtag + 1);
+		sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND);
+		if (sp == NULL)
+			*error = -EINVAL;	/* force silent drop */
+		m_tag_delete(*m, mtag);
+	} else {
+		sp = ipsec4_checkpolicy(*m, IPSEC_DIR_OUTBOUND, *flags,
+					error, inp);
+	}
+	/*
+	 * There are four return cases:
+	 *    sp != NULL	 	    apply IPsec policy
+	 *    sp == NULL, error == 0	    no IPsec handling needed
+	 *    sp == NULL, error == -EINVAL  discard packet w/o error
+	 *    sp == NULL, error != 0	    discard packet, report error
+	 */
+	if (sp != NULL) {
+		/* Loop detection, check if ipsec processing already done */
+		KASSERT(sp->req != NULL, ("ip_output: no ipsec request"));
+		for (mtag = m_tag_first(*m); mtag != NULL;
+		     mtag = m_tag_next(*m, mtag)) {
+			if (mtag->m_tag_cookie != MTAG_ABI_COMPAT)
+				continue;
+			if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
+			    mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
+				continue;
+			/*
+			 * Check if policy has an SA associated with it.
+			 * This can happen when an SP has yet to acquire
+			 * an SA; e.g. on first reference.  If it occurs,
+			 * then we let ipsec4_process_packet do its thing.
+			 */
+			if (sp->req->sav == NULL)
+				break;
+			tdbi = (struct tdb_ident *)(mtag + 1);
+			if (tdbi->spi == sp->req->sav->spi &&
+			    tdbi->proto == sp->req->sav->sah->saidx.proto &&
+			    bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst,
+				 sizeof (union sockaddr_union)) == 0) {
+				/*
+				 * No IPsec processing is needed, free
+				 * reference to SP.
+				 *
+				 * NB: null pointer to avoid free at
+				 *     done: below.
+				 */
+				KEY_FREESP(&sp), sp = NULL;
+				splx(s);
+				goto done;
+			}
+		}
+
+		/*
+		 * Do delayed checksums now because we send before
+		 * this is done in the normal processing path.
+		 */
+		if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+			in_delayed_cksum(*m);
+			(*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+		}
+#ifdef SCTP
+		if ((*m)->m_pkthdr.csum_flags & CSUM_SCTP) {
+			sctp_delayed_cksum(*m, (uint32_t)(ip->ip_hl << 2));
+			(*m)->m_pkthdr.csum_flags &= ~CSUM_SCTP;
+		}
+#endif
+		ip->ip_len = htons(ip->ip_len);
+		ip->ip_off = htons(ip->ip_off);
+
+		/* NB: callee frees mbuf */
+		*error = ipsec4_process_packet(*m, sp->req, *flags, 0);
+		if (*error == EJUSTRETURN) {
+			/*
+			 * We had a SP with a level of 'use' and no SA. We
+			 * will just continue to process the packet without
+			 * IPsec processing and return without error.
+			 */
+			*error = 0;
+			ip->ip_len = ntohs(ip->ip_len);
+			ip->ip_off = ntohs(ip->ip_off);
+			goto done;
+		}
+		/*
+		 * Preserve KAME behaviour: ENOENT can be returned
+		 * when an SA acquire is in progress.  Don't propagate
+		 * this to user-level; it confuses applications.
+		 *
+		 * XXX this will go away when the SADB is redone.
+		 */
+		if (*error == ENOENT)
+			*error = 0;
+		splx(s);
+		goto reinjected;
+	} else {	/* sp == NULL */
+		splx(s);
+
+		if (*error != 0) {
+			/*
+			 * Hack: -EINVAL is used to signal that a packet
+			 * should be silently discarded.  This is typically
+			 * because we asked key management for an SA and
+			 * it was delayed (e.g. kicked up to IKE).
+			 */
+			if (*error == -EINVAL)
+				*error = 0;
+			goto bad;
+		} else {
+			/* No IPsec processing for this packet. */
+		}
+#ifdef notyet
+		/*
+		 * If deferred crypto processing is needed, check that
+		 * the interface supports it.
+		 */ 
+		mtag = m_tag_find(*m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL);
+		if (mtag != NULL && ifp != NULL &&
+		    ((*ifp)->if_capenable & IFCAP_IPSEC) == 0) {
+			/* notify IPsec to do its own crypto */
+			ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
+			*error = EHOSTUNREACH;
+			goto bad;
+		}
+#endif
+	}
+done:
+	if (sp != NULL)
+		KEY_FREESP(&sp);
+	return 0;
+reinjected:
+	if (sp != NULL)
+		KEY_FREESP(&sp);
+	return -1;
+bad:
+	if (sp != NULL)
+		KEY_FREESP(&sp);
+	return 1;
+#endif /* IPSEC */
+	return 0;
+}
diff --git a/freebsd/sys/netinet/ip_ipsec.h b/freebsd/sys/netinet/ip_ipsec.h
new file mode 100644
index 00000000..c4de1652
--- /dev/null
+++ b/freebsd/sys/netinet/ip_ipsec.h
@@ -0,0 +1,41 @@
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *      The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IP_IPSEC_HH_
+#define _NETINET_IP_IPSEC_HH_
+
+int	ip_ipsec_filtertunnel(struct mbuf *);
+int	ip_ipsec_fwd(struct mbuf *);
+int	ip_ipsec_input(struct mbuf *);
+int	ip_ipsec_mtu(struct mbuf *, int);
+int	ip_ipsec_output(struct mbuf **, struct inpcb *, int *, int *,
+	    struct ifnet **);
+#endif
diff --git a/freebsd/sys/netinet/ip_mroute.c b/freebsd/sys/netinet/ip_mroute.c
new file mode 100644
index 00000000..2f7676ad
--- /dev/null
+++ b/freebsd/sys/netinet/ip_mroute.c
@@ -0,0 +1,2952 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1989 Stephen Deering
+ * Copyright (c) 1992, 1993
+ *      The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *      @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93
+ */
+
+/*
+ * IP multicast forwarding procedures
+ *
+ * Written by David Waitzman, BBN Labs, August 1988.
+ * Modified by Steve Deering, Stanford, February 1989.
+ * Modified by Mark J. Steiglitz, Stanford, May, 1991
+ * Modified by Van Jacobson, LBL, January 1993
+ * Modified by Ajit Thyagarajan, PARC, August 1993
+ * Modified by Bill Fenner, PARC, April 1995
+ * Modified by Ahmed Helmy, SGI, June 1996
+ * Modified by George Edmond Eddy (Rusty), ISI, February 1998
+ * Modified by Pavlin Radoslavov, USC/ISI, May 1998, August 1999, October 2000
+ * Modified by Hitoshi Asaeda, WIDE, August 2000
+ * Modified by Pavlin Radoslavov, ICSI, October 2002
+ *
+ * MROUTING Revision: 3.5
+ * and PIM-SMv2 and PIM-DM support, advanced API support,
+ * bandwidth metering and signaling
+ */
+
+/*
+ * TODO: Prefix functions with ipmf_.
+ * TODO: Maintain a refcount on if_allmulti() in ifnet or in the protocol
+ * domain attachment (if_afdata) so we can track consumers of that service.
+ * TODO: Deprecate routing socket path for SIOCGETSGCNT and SIOCGETVIFCNT,
+ * move it to socket options.
+ * TODO: Cleanup LSRR removal further.
+ * TODO: Push RSVP stubs into raw_ip.c.
+ * TODO: Use bitstring.h for vif set.
+ * TODO: Fix mrt6_ioctl dangling ref when dynamically loaded.
+ * TODO: Sync ip6_mroute.c with this file.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_mrouting.h>
+
+#define _PIM_VT 1
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/stddef.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/ktr.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/sockio.h>
+#include <freebsd/sys/sx.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/time.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/igmp.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_encap.h>
+#include <freebsd/netinet/ip_mroute.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_options.h>
+#include <freebsd/netinet/pim.h>
+#include <freebsd/netinet/pim_var.h>
+#include <freebsd/netinet/udp.h>
+
+#include <freebsd/machine/in_cksum.h>
+
+#include <freebsd/security/mac/mac_framework.h>
+
+#ifndef KTR_IPMF
+#define KTR_IPMF KTR_INET
+#endif
+
+#define		VIFI_INVALID	((vifi_t) -1)
+#define		M_HASCL(m)	((m)->m_flags & M_EXT)
+
+static VNET_DEFINE(uint32_t, last_tv_sec); /* last time we processed this */
+#define	V_last_tv_sec	VNET(last_tv_sec)
+
+static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast forwarding cache");
+
+/*
+ * Locking.  We use two locks: one for the virtual interface table and
+ * one for the forwarding table.  These locks may be nested in which case
+ * the VIF lock must always be taken first.  Note that each lock is used
+ * to cover not only the specific data structure but also related data
+ * structures.
+ */
+
+static struct mtx mrouter_mtx;
+#define	MROUTER_LOCK()		mtx_lock(&mrouter_mtx)
+#define	MROUTER_UNLOCK()	mtx_unlock(&mrouter_mtx)
+#define	MROUTER_LOCK_ASSERT()	mtx_assert(&mrouter_mtx, MA_OWNED)
+#define	MROUTER_LOCK_INIT()						\
+	mtx_init(&mrouter_mtx, "IPv4 multicast forwarding", NULL, MTX_DEF)
+#define	MROUTER_LOCK_DESTROY()	mtx_destroy(&mrouter_mtx)
+
+static int ip_mrouter_cnt;	/* # of vnets with active mrouters */
+static int ip_mrouter_unloading; /* Allow no more V_ip_mrouter sockets */
+
+static VNET_DEFINE(struct mrtstat, mrtstat);
+#define	V_mrtstat		VNET(mrtstat)
+SYSCTL_VNET_STRUCT(_net_inet_ip, OID_AUTO, mrtstat, CTLFLAG_RW,
+    &VNET_NAME(mrtstat), mrtstat,
+    "IPv4 Multicast Forwarding Statistics (struct mrtstat, "
+    "netinet/ip_mroute.h)");
+
+static VNET_DEFINE(u_long, mfchash);
+#define	V_mfchash		VNET(mfchash)
+#define	MFCHASH(a, g)							\
+	((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \
+	  ((g).s_addr >> 20) ^ ((g).s_addr >> 10) ^ (g).s_addr) & V_mfchash)
+#define	MFCHASHSIZE	256
+
+static u_long mfchashsize;			/* Hash size */
+static VNET_DEFINE(u_char *, nexpire);		/* 0..mfchashsize-1 */
+#define	V_nexpire		VNET(nexpire)
+static VNET_DEFINE(LIST_HEAD(mfchashhdr, mfc)*, mfchashtbl);
+#define	V_mfchashtbl		VNET(mfchashtbl)
+
+static struct mtx mfc_mtx;
+#define	MFC_LOCK()		mtx_lock(&mfc_mtx)
+#define	MFC_UNLOCK()		mtx_unlock(&mfc_mtx)
+#define	MFC_LOCK_ASSERT()	mtx_assert(&mfc_mtx, MA_OWNED)
+#define	MFC_LOCK_INIT()							\
+	mtx_init(&mfc_mtx, "IPv4 multicast forwarding cache", NULL, MTX_DEF)
+#define	MFC_LOCK_DESTROY()	mtx_destroy(&mfc_mtx)
+
+static VNET_DEFINE(vifi_t, numvifs);
+#define	V_numvifs		VNET(numvifs)
+static VNET_DEFINE(struct vif, viftable[MAXVIFS]);
+#define	V_viftable		VNET(viftable)
+SYSCTL_VNET_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_RD,
+    &VNET_NAME(viftable), sizeof(V_viftable), "S,vif[MAXVIFS]",
+    "IPv4 Multicast Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)");
+
+static struct mtx vif_mtx;
+#define	VIF_LOCK()		mtx_lock(&vif_mtx)
+#define	VIF_UNLOCK()		mtx_unlock(&vif_mtx)
+#define	VIF_LOCK_ASSERT()	mtx_assert(&vif_mtx, MA_OWNED)
+#define	VIF_LOCK_INIT()							\
+	mtx_init(&vif_mtx, "IPv4 multicast interfaces", NULL, MTX_DEF)
+#define	VIF_LOCK_DESTROY()	mtx_destroy(&vif_mtx)
+
+static eventhandler_tag if_detach_event_tag = NULL;
+
+static VNET_DEFINE(struct callout, expire_upcalls_ch);
+#define	V_expire_upcalls_ch	VNET(expire_upcalls_ch)
+
+#define		EXPIRE_TIMEOUT	(hz / 4)	/* 4x / second		*/
+#define		UPCALL_EXPIRE	6		/* number of timeouts	*/
+
+/*
+ * Bandwidth meter variables and constants
+ */
+static MALLOC_DEFINE(M_BWMETER, "bwmeter", "multicast upcall bw meters");
+/*
+ * Pending timeouts are stored in a hash table, the key being the
+ * expiration time. Periodically, the entries are analysed and processed.
+ */
+#define	BW_METER_BUCKETS	1024
+static VNET_DEFINE(struct bw_meter*, bw_meter_timers[BW_METER_BUCKETS]);
+#define	V_bw_meter_timers	VNET(bw_meter_timers)
+static VNET_DEFINE(struct callout, bw_meter_ch);
+#define	V_bw_meter_ch		VNET(bw_meter_ch)
+#define	BW_METER_PERIOD (hz)		/* periodical handling of bw meters */
+
+/*
+ * Pending upcalls are stored in a vector which is flushed when
+ * full, or periodically
+ */
+static VNET_DEFINE(struct bw_upcall, bw_upcalls[BW_UPCALLS_MAX]);
+#define	V_bw_upcalls		VNET(bw_upcalls)
+static VNET_DEFINE(u_int, bw_upcalls_n); /* # of pending upcalls */
+#define	V_bw_upcalls_n    	VNET(bw_upcalls_n)
+static VNET_DEFINE(struct callout, bw_upcalls_ch);
+#define	V_bw_upcalls_ch		VNET(bw_upcalls_ch)
+
+#define BW_UPCALLS_PERIOD (hz)		/* periodical flush of bw upcalls */
+
+static VNET_DEFINE(struct pimstat, pimstat);
+#define	V_pimstat		VNET(pimstat)
+
+SYSCTL_NODE(_net_inet, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
+SYSCTL_VNET_STRUCT(_net_inet_pim, PIMCTL_STATS, stats, CTLFLAG_RD,
+    &VNET_NAME(pimstat), pimstat,
+    "PIM Statistics (struct pimstat, netinet/pim_var.h)");
+
+static u_long	pim_squelch_wholepkt = 0;
+SYSCTL_ULONG(_net_inet_pim, OID_AUTO, squelch_wholepkt, CTLFLAG_RW,
+    &pim_squelch_wholepkt, 0,
+    "Disable IGMP_WHOLEPKT notifications if rendezvous point is unspecified");
+
+extern  struct domain inetdomain;
+static const struct protosw in_pim_protosw = {
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_PIM,
+	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
+	.pr_input =		pim_input,
+	.pr_output =		(pr_output_t*)rip_output,
+	.pr_ctloutput =		rip_ctloutput,
+	.pr_usrreqs =		&rip_usrreqs
+};
+static const struct encaptab *pim_encap_cookie;
+
+static int pim_encapcheck(const struct mbuf *, int, int, void *);
+
+/*
+ * Note: the PIM Register encapsulation adds the following in front of a
+ * data packet:
+ *
+ * struct pim_encap_hdr {
+ *    struct ip ip;
+ *    struct pim_encap_pimhdr  pim;
+ * }
+ *
+ */
+
+struct pim_encap_pimhdr {
+	struct pim pim;
+	uint32_t   flags;
+};
+#define		PIM_ENCAP_TTL	64
+
+static struct ip pim_encap_iphdr = {
+#if BYTE_ORDER == LITTLE_ENDIAN
+	sizeof(struct ip) >> 2,
+	IPVERSION,
+#else
+	IPVERSION,
+	sizeof(struct ip) >> 2,
+#endif
+	0,			/* tos */
+	sizeof(struct ip),	/* total length */
+	0,			/* id */
+	0,			/* frag offset */
+	PIM_ENCAP_TTL,
+	IPPROTO_PIM,
+	0,			/* checksum */
+};
+
+static struct pim_encap_pimhdr pim_encap_pimhdr = {
+    {
+	PIM_MAKE_VT(PIM_VERSION, PIM_REGISTER), /* PIM vers and message type */
+	0,			/* reserved */
+	0,			/* checksum */
+    },
+    0				/* flags */
+};
+
+static VNET_DEFINE(vifi_t, reg_vif_num) = VIFI_INVALID;
+#define	V_reg_vif_num		VNET(reg_vif_num)
+static VNET_DEFINE(struct ifnet, multicast_register_if);
+#define	V_multicast_register_if	VNET(multicast_register_if)
+
+/*
+ * Private variables.
+ */
+
+static u_long	X_ip_mcast_src(int);
+static int	X_ip_mforward(struct ip *, struct ifnet *, struct mbuf *,
+		    struct ip_moptions *);
+static int	X_ip_mrouter_done(void);
+static int	X_ip_mrouter_get(struct socket *, struct sockopt *);
+static int	X_ip_mrouter_set(struct socket *, struct sockopt *);
+static int	X_legal_vif_num(int);
+static int	X_mrt_ioctl(u_long, caddr_t, int);
+
+static int	add_bw_upcall(struct bw_upcall *);
+static int	add_mfc(struct mfcctl2 *);
+static int	add_vif(struct vifctl *);
+static void	bw_meter_prepare_upcall(struct bw_meter *, struct timeval *);
+static void	bw_meter_process(void);
+static void	bw_meter_receive_packet(struct bw_meter *, int,
+		    struct timeval *);
+static void	bw_upcalls_send(void);
+static int	del_bw_upcall(struct bw_upcall *);
+static int	del_mfc(struct mfcctl2 *);
+static int	del_vif(vifi_t);
+static int	del_vif_locked(vifi_t);
+static void	expire_bw_meter_process(void *);
+static void	expire_bw_upcalls_send(void *);
+static void	expire_mfc(struct mfc *);
+static void	expire_upcalls(void *);
+static void	free_bw_list(struct bw_meter *);
+static int	get_sg_cnt(struct sioc_sg_req *);
+static int	get_vif_cnt(struct sioc_vif_req *);
+static void	if_detached_event(void *, struct ifnet *);
+static int	ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t);
+static int	ip_mrouter_init(struct socket *, int);
+static __inline struct mfc *
+		mfc_find(struct in_addr *, struct in_addr *);
+static void	phyint_send(struct ip *, struct vif *, struct mbuf *);
+static struct mbuf *
+		pim_register_prepare(struct ip *, struct mbuf *);
+static int	pim_register_send(struct ip *, struct vif *,
+		    struct mbuf *, struct mfc *);
+static int	pim_register_send_rp(struct ip *, struct vif *,
+		    struct mbuf *, struct mfc *);
+static int	pim_register_send_upcall(struct ip *, struct vif *,
+		    struct mbuf *, struct mfc *);
+static void	schedule_bw_meter(struct bw_meter *, struct timeval *);
+static void	send_packet(struct vif *, struct mbuf *);
+static int	set_api_config(uint32_t *);
+static int	set_assert(int);
+static int	socket_send(struct socket *, struct mbuf *,
+		    struct sockaddr_in *);
+static void	unschedule_bw_meter(struct bw_meter *);
+
+/*
+ * Kernel multicast forwarding API capabilities and setup.
+ * If more API capabilities are added to the kernel, they should be
+ * recorded in `mrt_api_support'.
+ */
+#define MRT_API_VERSION		0x0305
+
+static const int mrt_api_version = MRT_API_VERSION;
+static const uint32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF |
+					 MRT_MFC_FLAGS_BORDER_VIF |
+					 MRT_MFC_RP |
+					 MRT_MFC_BW_UPCALL);
+static VNET_DEFINE(uint32_t, mrt_api_config);
+#define	V_mrt_api_config	VNET(mrt_api_config)
+static VNET_DEFINE(int, pim_assert_enabled);
+#define	V_pim_assert_enabled	VNET(pim_assert_enabled)
+static struct timeval pim_assert_interval = { 3, 0 };	/* Rate limit */
+
+/*
+ * Find a route for a given origin IP address and multicast group address.
+ * Statistics must be updated by the caller.
+ */
+static __inline struct mfc *
+mfc_find(struct in_addr *o, struct in_addr *g)
+{
+	struct mfc *rt;
+
+	MFC_LOCK_ASSERT();
+
+	LIST_FOREACH(rt, &V_mfchashtbl[MFCHASH(*o, *g)], mfc_hash) {
+		if (in_hosteq(rt->mfc_origin, *o) &&
+		    in_hosteq(rt->mfc_mcastgrp, *g) &&
+		    TAILQ_EMPTY(&rt->mfc_stall))
+			break;
+	}
+
+	return (rt);
+}
+
+/*
+ * Handle MRT setsockopt commands to modify the multicast forwarding tables.
+ */
+static int
+X_ip_mrouter_set(struct socket *so, struct sockopt *sopt)
+{
+    int	error, optval;
+    vifi_t	vifi;
+    struct	vifctl vifc;
+    struct	mfcctl2 mfc;
+    struct	bw_upcall bw_upcall;
+    uint32_t	i;
+
+    if (so != V_ip_mrouter && sopt->sopt_name != MRT_INIT)
+	return EPERM;
+
+    error = 0;
+    switch (sopt->sopt_name) {
+    case MRT_INIT:
+	error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
+	if (error)
+	    break;
+	error = ip_mrouter_init(so, optval);
+	break;
+
+    case MRT_DONE:
+	error = ip_mrouter_done();
+	break;
+
+    case MRT_ADD_VIF:
+	error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc);
+	if (error)
+	    break;
+	error = add_vif(&vifc);
+	break;
+
+    case MRT_DEL_VIF:
+	error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi);
+	if (error)
+	    break;
+	error = del_vif(vifi);
+	break;
+
+    case MRT_ADD_MFC:
+    case MRT_DEL_MFC:
+	/*
+	 * select data size depending on API version.
+	 */
+	if (sopt->sopt_name == MRT_ADD_MFC &&
+		V_mrt_api_config & MRT_API_FLAGS_ALL) {
+	    error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl2),
+				sizeof(struct mfcctl2));
+	} else {
+	    error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl),
+				sizeof(struct mfcctl));
+	    bzero((caddr_t)&mfc + sizeof(struct mfcctl),
+			sizeof(mfc) - sizeof(struct mfcctl));
+	}
+	if (error)
+	    break;
+	if (sopt->sopt_name == MRT_ADD_MFC)
+	    error = add_mfc(&mfc);
+	else
+	    error = del_mfc(&mfc);
+	break;
+
+    case MRT_ASSERT:
+	error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
+	if (error)
+	    break;
+	set_assert(optval);
+	break;
+
+    case MRT_API_CONFIG:
+	error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
+	if (!error)
+	    error = set_api_config(&i);
+	if (!error)
+	    error = sooptcopyout(sopt, &i, sizeof i);
+	break;
+
+    case MRT_ADD_BW_UPCALL:
+    case MRT_DEL_BW_UPCALL:
+	error = sooptcopyin(sopt, &bw_upcall, sizeof bw_upcall,
+				sizeof bw_upcall);
+	if (error)
+	    break;
+	if (sopt->sopt_name == MRT_ADD_BW_UPCALL)
+	    error = add_bw_upcall(&bw_upcall);
+	else
+	    error = del_bw_upcall(&bw_upcall);
+	break;
+
+    default:
+	error = EOPNOTSUPP;
+	break;
+    }
+    return error;
+}
+
+/*
+ * Handle MRT getsockopt commands
+ */
+static int
+X_ip_mrouter_get(struct socket *so, struct sockopt *sopt)
+{
+    int error;
+
+    switch (sopt->sopt_name) {
+    case MRT_VERSION:
+	error = sooptcopyout(sopt, &mrt_api_version, sizeof mrt_api_version);
+	break;
+
+    case MRT_ASSERT:
+	error = sooptcopyout(sopt, &V_pim_assert_enabled,
+	    sizeof V_pim_assert_enabled);
+	break;
+
+    case MRT_API_SUPPORT:
+	error = sooptcopyout(sopt, &mrt_api_support, sizeof mrt_api_support);
+	break;
+
+    case MRT_API_CONFIG:
+	error = sooptcopyout(sopt, &V_mrt_api_config, sizeof V_mrt_api_config);
+	break;
+
+    default:
+	error = EOPNOTSUPP;
+	break;
+    }
+    return error;
+}
+
+/*
+ * Handle ioctl commands to obtain information from the cache
+ */
+static int
+X_mrt_ioctl(u_long cmd, caddr_t data, int fibnum __unused)
+{
+    int error = 0;
+
+    /*
+     * Currently the only function calling this ioctl routine is rtioctl().
+     * Typically, only root can create the raw socket in order to execute
+     * this ioctl method, however the request might be coming from a prison
+     */
+    error = priv_check(curthread, PRIV_NETINET_MROUTE);
+    if (error)
+	return (error);
+    switch (cmd) {
+    case (SIOCGETVIFCNT):
+	error = get_vif_cnt((struct sioc_vif_req *)data);
+	break;
+
+    case (SIOCGETSGCNT):
+	error = get_sg_cnt((struct sioc_sg_req *)data);
+	break;
+
+    default:
+	error = EINVAL;
+	break;
+    }
+    return error;
+}
+
+/*
+ * returns the packet, byte, rpf-failure count for the source group provided
+ */
+static int
+get_sg_cnt(struct sioc_sg_req *req)
+{
+    struct mfc *rt;
+
+    MFC_LOCK();
+    rt = mfc_find(&req->src, &req->grp);
+    if (rt == NULL) {
+	MFC_UNLOCK();
+	req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
+	return EADDRNOTAVAIL;
+    }
+    req->pktcnt = rt->mfc_pkt_cnt;
+    req->bytecnt = rt->mfc_byte_cnt;
+    req->wrong_if = rt->mfc_wrong_if;
+    MFC_UNLOCK();
+    return 0;
+}
+
+/*
+ * returns the input and output packet and byte counts on the vif provided
+ */
+static int
+get_vif_cnt(struct sioc_vif_req *req)
+{
+    vifi_t vifi = req->vifi;
+
+    VIF_LOCK();
+    if (vifi >= V_numvifs) {
+	VIF_UNLOCK();
+	return EINVAL;
+    }
+
+    req->icount = V_viftable[vifi].v_pkt_in;
+    req->ocount = V_viftable[vifi].v_pkt_out;
+    req->ibytes = V_viftable[vifi].v_bytes_in;
+    req->obytes = V_viftable[vifi].v_bytes_out;
+    VIF_UNLOCK();
+
+    return 0;
+}
+
+static void
+if_detached_event(void *arg __unused, struct ifnet *ifp)
+{
+    vifi_t vifi;
+    int i;
+
+    MROUTER_LOCK();
+
+    if (V_ip_mrouter == NULL) {
+	MROUTER_UNLOCK();
+	return;
+    }
+
+    VIF_LOCK();
+    MFC_LOCK();
+
+    /*
+     * Tear down multicast forwarder state associated with this ifnet.
+     * 1. Walk the vif list, matching vifs against this ifnet.
+     * 2. Walk the multicast forwarding cache (mfc) looking for
+     *    inner matches with this vif's index.
+     * 3. Expire any matching multicast forwarding cache entries.
+     * 4. Free vif state. This should disable ALLMULTI on the interface.
+     */
+    for (vifi = 0; vifi < V_numvifs; vifi++) {
+	if (V_viftable[vifi].v_ifp != ifp)
+		continue;
+	for (i = 0; i < mfchashsize; i++) {
+		struct mfc *rt, *nrt;
+		for (rt = LIST_FIRST(&V_mfchashtbl[i]); rt; rt = nrt) {
+			nrt = LIST_NEXT(rt, mfc_hash);
+			if (rt->mfc_parent == vifi) {
+				expire_mfc(rt);
+			}
+		}
+	}
+	del_vif_locked(vifi);
+    }
+
+    MFC_UNLOCK();
+    VIF_UNLOCK();
+
+    MROUTER_UNLOCK();
+}
+
+/*
+ * Enable multicast forwarding.
+ */
+static int
+ip_mrouter_init(struct socket *so, int version)
+{
+
+    CTR3(KTR_IPMF, "%s: so_type %d, pr_protocol %d", __func__,
+        so->so_type, so->so_proto->pr_protocol);
+
+    if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_IGMP)
+	return EOPNOTSUPP;
+
+    if (version != 1)
+	return ENOPROTOOPT;
+
+    MROUTER_LOCK();
+
+    if (ip_mrouter_unloading) {
+	MROUTER_UNLOCK();
+	return ENOPROTOOPT;
+    }
+
+    if (V_ip_mrouter != NULL) {
+	MROUTER_UNLOCK();
+	return EADDRINUSE;
+    }
+
+    V_mfchashtbl = hashinit_flags(mfchashsize, M_MRTABLE, &V_mfchash,
+	HASH_NOWAIT);
+
+    callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls,
+	curvnet);
+    callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send,
+	curvnet);
+    callout_reset(&V_bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process,
+	curvnet);
+
+    V_ip_mrouter = so;
+    ip_mrouter_cnt++;
+
+    MROUTER_UNLOCK();
+
+    CTR1(KTR_IPMF, "%s: done", __func__);
+
+    return 0;
+}
+
+/*
+ * Disable multicast forwarding.
+ */
+static int
+X_ip_mrouter_done(void)
+{
+    vifi_t vifi;
+    int i;
+    struct ifnet *ifp;
+    struct ifreq ifr;
+
+    MROUTER_LOCK();
+
+    if (V_ip_mrouter == NULL) {
+	MROUTER_UNLOCK();
+	return EINVAL;
+    }
+
+    /*
+     * Detach/disable hooks to the reset of the system.
+     */
+    V_ip_mrouter = NULL;
+    ip_mrouter_cnt--;
+    V_mrt_api_config = 0;
+
+    VIF_LOCK();
+
+    /*
+     * For each phyint in use, disable promiscuous reception of all IP
+     * multicasts.
+     */
+    for (vifi = 0; vifi < V_numvifs; vifi++) {
+	if (!in_nullhost(V_viftable[vifi].v_lcl_addr) &&
+		!(V_viftable[vifi].v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
+	    struct sockaddr_in *so = (struct sockaddr_in *)&(ifr.ifr_addr);
+
+	    so->sin_len = sizeof(struct sockaddr_in);
+	    so->sin_family = AF_INET;
+	    so->sin_addr.s_addr = INADDR_ANY;
+	    ifp = V_viftable[vifi].v_ifp;
+	    if_allmulti(ifp, 0);
+	}
+    }
+    bzero((caddr_t)V_viftable, sizeof(V_viftable));
+    V_numvifs = 0;
+    V_pim_assert_enabled = 0;
+
+    VIF_UNLOCK();
+
+    callout_stop(&V_expire_upcalls_ch);
+    callout_stop(&V_bw_upcalls_ch);
+    callout_stop(&V_bw_meter_ch);
+
+    MFC_LOCK();
+
+    /*
+     * Free all multicast forwarding cache entries.
+     * Do not use hashdestroy(), as we must perform other cleanup.
+     */
+    for (i = 0; i < mfchashsize; i++) {
+	struct mfc *rt, *nrt;
+	for (rt = LIST_FIRST(&V_mfchashtbl[i]); rt; rt = nrt) {
+		nrt = LIST_NEXT(rt, mfc_hash);
+		expire_mfc(rt);
+	}
+    }
+    free(V_mfchashtbl, M_MRTABLE);
+    V_mfchashtbl = NULL;
+
+    bzero(V_nexpire, sizeof(V_nexpire[0]) * mfchashsize);
+
+    V_bw_upcalls_n = 0;
+    bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers));
+
+    MFC_UNLOCK();
+
+    V_reg_vif_num = VIFI_INVALID;
+
+    MROUTER_UNLOCK();
+
+    CTR1(KTR_IPMF, "%s: done", __func__);
+
+    return 0;
+}
+
+/*
+ * Set PIM assert processing global
+ */
+static int
+set_assert(int i)
+{
+    if ((i != 1) && (i != 0))
+	return EINVAL;
+
+    V_pim_assert_enabled = i;
+
+    return 0;
+}
+
+/*
+ * Configure API capabilities
+ */
+int
+set_api_config(uint32_t *apival)
+{
+    int i;
+
+    /*
+     * We can set the API capabilities only if it is the first operation
+     * after MRT_INIT. I.e.:
+     *  - there are no vifs installed
+     *  - pim_assert is not enabled
+     *  - the MFC table is empty
+     */
+    if (V_numvifs > 0) {
+	*apival = 0;
+	return EPERM;
+    }
+    if (V_pim_assert_enabled) {
+	*apival = 0;
+	return EPERM;
+    }
+
+    MFC_LOCK();
+
+    for (i = 0; i < mfchashsize; i++) {
+	if (LIST_FIRST(&V_mfchashtbl[i]) != NULL) {
+	    *apival = 0;
+	    return EPERM;
+	}
+    }
+
+    MFC_UNLOCK();
+
+    V_mrt_api_config = *apival & mrt_api_support;
+    *apival = V_mrt_api_config;
+
+    return 0;
+}
+
+/*
+ * Add a vif to the vif table
+ */
+static int
+add_vif(struct vifctl *vifcp)
+{
+    struct vif *vifp = V_viftable + vifcp->vifc_vifi;
+    struct sockaddr_in sin = {sizeof sin, AF_INET};
+    struct ifaddr *ifa;
+    struct ifnet *ifp;
+    int error;
+
+    VIF_LOCK();
+    if (vifcp->vifc_vifi >= MAXVIFS) {
+	VIF_UNLOCK();
+	return EINVAL;
+    }
+    /* rate limiting is no longer supported by this code */
+    if (vifcp->vifc_rate_limit != 0) {
+	log(LOG_ERR, "rate limiting is no longer supported\n");
+	VIF_UNLOCK();
+	return EINVAL;
+    }
+    if (!in_nullhost(vifp->v_lcl_addr)) {
+	VIF_UNLOCK();
+	return EADDRINUSE;
+    }
+    if (in_nullhost(vifcp->vifc_lcl_addr)) {
+	VIF_UNLOCK();
+	return EADDRNOTAVAIL;
+    }
+
+    /* Find the interface with an address in AF_INET family */
+    if (vifcp->vifc_flags & VIFF_REGISTER) {
+	/*
+	 * XXX: Because VIFF_REGISTER does not really need a valid
+	 * local interface (e.g. it could be 127.0.0.2), we don't
+	 * check its address.
+	 */
+	ifp = NULL;
+    } else {
+	sin.sin_addr = vifcp->vifc_lcl_addr;
+	ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
+	if (ifa == NULL) {
+	    VIF_UNLOCK();
+	    return EADDRNOTAVAIL;
+	}
+	ifp = ifa->ifa_ifp;
+	ifa_free(ifa);
+    }
+
+    if ((vifcp->vifc_flags & VIFF_TUNNEL) != 0) {
+	CTR1(KTR_IPMF, "%s: tunnels are no longer supported", __func__);
+	VIF_UNLOCK();
+	return EOPNOTSUPP;
+    } else if (vifcp->vifc_flags & VIFF_REGISTER) {
+	ifp = &V_multicast_register_if;
+	CTR2(KTR_IPMF, "%s: add register vif for ifp %p", __func__, ifp);
+	if (V_reg_vif_num == VIFI_INVALID) {
+	    if_initname(&V_multicast_register_if, "register_vif", 0);
+	    V_multicast_register_if.if_flags = IFF_LOOPBACK;
+	    V_reg_vif_num = vifcp->vifc_vifi;
+	}
+    } else {		/* Make sure the interface supports multicast */
+	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
+	    VIF_UNLOCK();
+	    return EOPNOTSUPP;
+	}
+
+	/* Enable promiscuous reception of all IP multicasts from the if */
+	error = if_allmulti(ifp, 1);
+	if (error) {
+	    VIF_UNLOCK();
+	    return error;
+	}
+    }
+
+    vifp->v_flags     = vifcp->vifc_flags;
+    vifp->v_threshold = vifcp->vifc_threshold;
+    vifp->v_lcl_addr  = vifcp->vifc_lcl_addr;
+    vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
+    vifp->v_ifp       = ifp;
+    /* initialize per vif pkt counters */
+    vifp->v_pkt_in    = 0;
+    vifp->v_pkt_out   = 0;
+    vifp->v_bytes_in  = 0;
+    vifp->v_bytes_out = 0;
+    bzero(&vifp->v_route, sizeof(vifp->v_route));
+
+    /* Adjust numvifs up if the vifi is higher than numvifs */
+    if (V_numvifs <= vifcp->vifc_vifi)
+	V_numvifs = vifcp->vifc_vifi + 1;
+
+    VIF_UNLOCK();
+
+    CTR4(KTR_IPMF, "%s: add vif %d laddr %s thresh %x", __func__,
+	(int)vifcp->vifc_vifi, inet_ntoa(vifcp->vifc_lcl_addr),
+	(int)vifcp->vifc_threshold);
+
+    return 0;
+}
+
+/*
+ * Delete a vif from the vif table
+ */
+static int
+del_vif_locked(vifi_t vifi)
+{
+    struct vif *vifp;
+
+    VIF_LOCK_ASSERT();
+
+    if (vifi >= V_numvifs) {
+	return EINVAL;
+    }
+    vifp = &V_viftable[vifi];
+    if (in_nullhost(vifp->v_lcl_addr)) {
+	return EADDRNOTAVAIL;
+    }
+
+    if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER)))
+	if_allmulti(vifp->v_ifp, 0);
+
+    if (vifp->v_flags & VIFF_REGISTER)
+	V_reg_vif_num = VIFI_INVALID;
+
+    bzero((caddr_t)vifp, sizeof (*vifp));
+
+    CTR2(KTR_IPMF, "%s: delete vif %d", __func__, (int)vifi);
+
+    /* Adjust numvifs down */
+    for (vifi = V_numvifs; vifi > 0; vifi--)
+	if (!in_nullhost(V_viftable[vifi-1].v_lcl_addr))
+	    break;
+    V_numvifs = vifi;
+
+    return 0;
+}
+
+static int
+del_vif(vifi_t vifi)
+{
+    int cc;
+
+    VIF_LOCK();
+    cc = del_vif_locked(vifi);
+    VIF_UNLOCK();
+
+    return cc;
+}
+
+/*
+ * update an mfc entry without resetting counters and S,G addresses.
+ */
+static void
+update_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp)
+{
+    int i;
+
+    rt->mfc_parent = mfccp->mfcc_parent;
+    for (i = 0; i < V_numvifs; i++) {
+	rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
+	rt->mfc_flags[i] = mfccp->mfcc_flags[i] & V_mrt_api_config &
+	    MRT_MFC_FLAGS_ALL;
+    }
+    /* set the RP address */
+    if (V_mrt_api_config & MRT_MFC_RP)
+	rt->mfc_rp = mfccp->mfcc_rp;
+    else
+	rt->mfc_rp.s_addr = INADDR_ANY;
+}
+
+/*
+ * fully initialize an mfc entry from the parameter.
+ */
+static void
+init_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp)
+{
+    rt->mfc_origin     = mfccp->mfcc_origin;
+    rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
+
+    update_mfc_params(rt, mfccp);
+
+    /* initialize pkt counters per src-grp */
+    rt->mfc_pkt_cnt    = 0;
+    rt->mfc_byte_cnt   = 0;
+    rt->mfc_wrong_if   = 0;
+    timevalclear(&rt->mfc_last_assert);
+}
+
+static void
+expire_mfc(struct mfc *rt)
+{
+	struct rtdetq *rte, *nrte;
+
+	free_bw_list(rt->mfc_bw_meter);
+
+	TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) {
+		m_freem(rte->m);
+		TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link);
+		free(rte, M_MRTABLE);
+	}
+
+	LIST_REMOVE(rt, mfc_hash);
+	free(rt, M_MRTABLE);
+}
+
+/*
+ * Add an mfc entry
+ */
+static int
+add_mfc(struct mfcctl2 *mfccp)
+{
+    struct mfc *rt;
+    struct rtdetq *rte, *nrte;
+    u_long hash = 0;
+    u_short nstl;
+
+    VIF_LOCK();
+    MFC_LOCK();
+
+    rt = mfc_find(&mfccp->mfcc_origin, &mfccp->mfcc_mcastgrp);
+
+    /* If an entry already exists, just update the fields */
+    if (rt) {
+	CTR4(KTR_IPMF, "%s: update mfc orig %s group %lx parent %x",
+	    __func__, inet_ntoa(mfccp->mfcc_origin),
+	    (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
+	    mfccp->mfcc_parent);
+	update_mfc_params(rt, mfccp);
+	MFC_UNLOCK();
+	VIF_UNLOCK();
+	return (0);
+    }
+
+    /*
+     * Find the entry for which the upcall was made and update
+     */
+    nstl = 0;
+    hash = MFCHASH(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp);
+    LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) {
+	if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) &&
+	    in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) &&
+	    !TAILQ_EMPTY(&rt->mfc_stall)) {
+		CTR5(KTR_IPMF,
+		    "%s: add mfc orig %s group %lx parent %x qh %p",
+		    __func__, inet_ntoa(mfccp->mfcc_origin),
+		    (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
+		    mfccp->mfcc_parent,
+		    TAILQ_FIRST(&rt->mfc_stall));
+		if (nstl++)
+			CTR1(KTR_IPMF, "%s: multiple matches", __func__);
+
+		init_mfc_params(rt, mfccp);
+		rt->mfc_expire = 0;	/* Don't clean this guy up */
+		V_nexpire[hash]--;
+
+		/* Free queued packets, but attempt to forward them first. */
+		TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) {
+			if (rte->ifp != NULL)
+				ip_mdq(rte->m, rte->ifp, rt, -1);
+			m_freem(rte->m);
+			TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link);
+			rt->mfc_nstall--;
+			free(rte, M_MRTABLE);
+		}
+	}
+    }
+
+    /*
+     * It is possible that an entry is being inserted without an upcall
+     */
+    if (nstl == 0) {
+	CTR1(KTR_IPMF, "%s: adding mfc w/o upcall", __func__);
+	LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) {
+		if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) &&
+		    in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp)) {
+			init_mfc_params(rt, mfccp);
+			if (rt->mfc_expire)
+			    V_nexpire[hash]--;
+			rt->mfc_expire = 0;
+			break; /* XXX */
+		}
+	}
+
+	if (rt == NULL) {		/* no upcall, so make a new entry */
+	    rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
+	    if (rt == NULL) {
+		MFC_UNLOCK();
+		VIF_UNLOCK();
+		return (ENOBUFS);
+	    }
+
+	    init_mfc_params(rt, mfccp);
+	    TAILQ_INIT(&rt->mfc_stall);
+	    rt->mfc_nstall = 0;
+
+	    rt->mfc_expire     = 0;
+	    rt->mfc_bw_meter = NULL;
+
+	    /* insert new entry at head of hash chain */
+	    LIST_INSERT_HEAD(&V_mfchashtbl[hash], rt, mfc_hash);
+	}
+    }
+
+    MFC_UNLOCK();
+    VIF_UNLOCK();
+
+    return (0);
+}
+
+/*
+ * Delete an mfc entry
+ */
+static int
+del_mfc(struct mfcctl2 *mfccp)
+{
+    struct in_addr	origin;
+    struct in_addr	mcastgrp;
+    struct mfc		*rt;
+
+    origin = mfccp->mfcc_origin;
+    mcastgrp = mfccp->mfcc_mcastgrp;
+
+    CTR3(KTR_IPMF, "%s: delete mfc orig %s group %lx", __func__,
+	inet_ntoa(origin), (u_long)ntohl(mcastgrp.s_addr));
+
+    MFC_LOCK();
+
+    rt = mfc_find(&origin, &mcastgrp);
+    if (rt == NULL) {
+	MFC_UNLOCK();
+	return EADDRNOTAVAIL;
+    }
+
+    /*
+     * free the bw_meter entries
+     */
+    free_bw_list(rt->mfc_bw_meter);
+    rt->mfc_bw_meter = NULL;
+
+    LIST_REMOVE(rt, mfc_hash);
+    free(rt, M_MRTABLE);
+
+    MFC_UNLOCK();
+
+    return (0);
+}
+
+/*
+ * Send a message to the routing daemon on the multicast routing socket.
+ */
+static int
+socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src)
+{
+    if (s) {
+	SOCKBUF_LOCK(&s->so_rcv);
+	if (sbappendaddr_locked(&s->so_rcv, (struct sockaddr *)src, mm,
+	    NULL) != 0) {
+	    sorwakeup_locked(s);
+	    return 0;
+	}
+	SOCKBUF_UNLOCK(&s->so_rcv);
+    }
+    m_freem(mm);
+    return -1;
+}
+
+/*
+ * IP multicast forwarding function. This function assumes that the packet
+ * pointed to by "ip" has arrived on (or is about to be sent to) the interface
+ * pointed to by "ifp", and the packet is to be relayed to other networks
+ * that have members of the packet's destination IP multicast group.
+ *
+ * The packet is returned unscathed to the caller, unless it is
+ * erroneous, in which case a non-zero return value tells the caller to
+ * discard it.
+ */
+
+#define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
+
+static int
+X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
+    struct ip_moptions *imo)
+{
+    struct mfc *rt;
+    int error;
+    vifi_t vifi;
+
+    CTR3(KTR_IPMF, "ip_mforward: delete mfc orig %s group %lx ifp %p",
+	inet_ntoa(ip->ip_src), (u_long)ntohl(ip->ip_dst.s_addr), ifp);
+
+    if (ip->ip_hl < (sizeof(struct ip) + TUNNEL_LEN) >> 2 ||
+		((u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
+	/*
+	 * Packet arrived via a physical interface or
+	 * an encapsulated tunnel or a register_vif.
+	 */
+    } else {
+	/*
+	 * Packet arrived through a source-route tunnel.
+	 * Source-route tunnels are no longer supported.
+	 */
+	return (1);
+    }
+
+    VIF_LOCK();
+    MFC_LOCK();
+    if (imo && ((vifi = imo->imo_multicast_vif) < V_numvifs)) {
+	if (ip->ip_ttl < MAXTTL)
+	    ip->ip_ttl++;	/* compensate for -1 in *_send routines */
+	error = ip_mdq(m, ifp, NULL, vifi);
+	MFC_UNLOCK();
+	VIF_UNLOCK();
+	return error;
+    }
+
+    /*
+     * Don't forward a packet with time-to-live of zero or one,
+     * or a packet destined to a local-only group.
+     */
+    if (ip->ip_ttl <= 1 || IN_LOCAL_GROUP(ntohl(ip->ip_dst.s_addr))) {
+	MFC_UNLOCK();
+	VIF_UNLOCK();
+	return 0;
+    }
+
+    /*
+     * Determine forwarding vifs from the forwarding cache table
+     */
+    MRTSTAT_INC(mrts_mfc_lookups);
+    rt = mfc_find(&ip->ip_src, &ip->ip_dst);
+
+    /* Entry exists, so forward if necessary */
+    if (rt != NULL) {
+	error = ip_mdq(m, ifp, rt, -1);
+	MFC_UNLOCK();
+	VIF_UNLOCK();
+	return error;
+    } else {
+	/*
+	 * If we don't have a route for packet's origin,
+	 * Make a copy of the packet & send message to routing daemon
+	 */
+
+	struct mbuf *mb0;
+	struct rtdetq *rte;
+	u_long hash;
+	int hlen = ip->ip_hl << 2;
+
+	MRTSTAT_INC(mrts_mfc_misses);
+	MRTSTAT_INC(mrts_no_route);
+	CTR2(KTR_IPMF, "ip_mforward: no mfc for (%s,%lx)",
+	    inet_ntoa(ip->ip_src), (u_long)ntohl(ip->ip_dst.s_addr));
+
+	/*
+	 * Allocate mbufs early so that we don't do extra work if we are
+	 * just going to fail anyway.  Make sure to pullup the header so
+	 * that other people can't step on it.
+	 */
+	rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE,
+	    M_NOWAIT|M_ZERO);
+	if (rte == NULL) {
+	    MFC_UNLOCK();
+	    VIF_UNLOCK();
+	    return ENOBUFS;
+	}
+
+	mb0 = m_copypacket(m, M_DONTWAIT);
+	if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen))
+	    mb0 = m_pullup(mb0, hlen);
+	if (mb0 == NULL) {
+	    free(rte, M_MRTABLE);
+	    MFC_UNLOCK();
+	    VIF_UNLOCK();
+	    return ENOBUFS;
+	}
+
+	/* is there an upcall waiting for this flow ? */
+	hash = MFCHASH(ip->ip_src, ip->ip_dst);
+	LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) {
+		if (in_hosteq(ip->ip_src, rt->mfc_origin) &&
+		    in_hosteq(ip->ip_dst, rt->mfc_mcastgrp) &&
+		    !TAILQ_EMPTY(&rt->mfc_stall))
+			break;
+	}
+
+	if (rt == NULL) {
+	    int i;
+	    struct igmpmsg *im;
+	    struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
+	    struct mbuf *mm;
+
+	    /*
+	     * Locate the vifi for the incoming interface for this packet.
+	     * If none found, drop packet.
+	     */
+	    for (vifi = 0; vifi < V_numvifs &&
+		    V_viftable[vifi].v_ifp != ifp; vifi++)
+		;
+	    if (vifi >= V_numvifs)	/* vif not found, drop packet */
+		goto non_fatal;
+
+	    /* no upcall, so make a new entry */
+	    rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
+	    if (rt == NULL)
+		goto fail;
+
+	    /* Make a copy of the header to send to the user level process */
+	    mm = m_copy(mb0, 0, hlen);
+	    if (mm == NULL)
+		goto fail1;
+
+	    /*
+	     * Send message to routing daemon to install
+	     * a route into the kernel table
+	     */
+
+	    im = mtod(mm, struct igmpmsg *);
+	    im->im_msgtype = IGMPMSG_NOCACHE;
+	    im->im_mbz = 0;
+	    im->im_vif = vifi;
+
+	    MRTSTAT_INC(mrts_upcalls);
+
+	    k_igmpsrc.sin_addr = ip->ip_src;
+	    if (socket_send(V_ip_mrouter, mm, &k_igmpsrc) < 0) {
+		CTR0(KTR_IPMF, "ip_mforward: socket queue full");
+		MRTSTAT_INC(mrts_upq_sockfull);
+fail1:
+		free(rt, M_MRTABLE);
+fail:
+		free(rte, M_MRTABLE);
+		m_freem(mb0);
+		MFC_UNLOCK();
+		VIF_UNLOCK();
+		return ENOBUFS;
+	    }
+
+	    /* insert new entry at head of hash chain */
+	    rt->mfc_origin.s_addr     = ip->ip_src.s_addr;
+	    rt->mfc_mcastgrp.s_addr   = ip->ip_dst.s_addr;
+	    rt->mfc_expire	      = UPCALL_EXPIRE;
+	    V_nexpire[hash]++;
+	    for (i = 0; i < V_numvifs; i++) {
+		rt->mfc_ttls[i] = 0;
+		rt->mfc_flags[i] = 0;
+	    }
+	    rt->mfc_parent = -1;
+
+	    /* clear the RP address */
+	    rt->mfc_rp.s_addr = INADDR_ANY;
+	    rt->mfc_bw_meter = NULL;
+
+	    /* initialize pkt counters per src-grp */
+	    rt->mfc_pkt_cnt = 0;
+	    rt->mfc_byte_cnt = 0;
+	    rt->mfc_wrong_if = 0;
+	    timevalclear(&rt->mfc_last_assert);
+
+	    TAILQ_INIT(&rt->mfc_stall);
+	    rt->mfc_nstall = 0;
+
+	    /* link into table */
+	    LIST_INSERT_HEAD(&V_mfchashtbl[hash], rt, mfc_hash);
+	    TAILQ_INSERT_HEAD(&rt->mfc_stall, rte, rte_link);
+	    rt->mfc_nstall++;
+
+	} else {
+	    /* determine if queue has overflowed */
+	    if (rt->mfc_nstall > MAX_UPQ) {
+		MRTSTAT_INC(mrts_upq_ovflw);
+non_fatal:
+		free(rte, M_MRTABLE);
+		m_freem(mb0);
+		MFC_UNLOCK();
+		VIF_UNLOCK();
+		return (0);
+	    }
+	    TAILQ_INSERT_TAIL(&rt->mfc_stall, rte, rte_link);
+	    rt->mfc_nstall++;
+	}
+
+	rte->m			= mb0;
+	rte->ifp		= ifp;
+
+	MFC_UNLOCK();
+	VIF_UNLOCK();
+
+	return 0;
+    }
+}
+
+/*
+ * Clean up the cache entry if upcall is not serviced
+ */
+static void
+expire_upcalls(void *arg)
+{
+    int i;
+
+    CURVNET_SET((struct vnet *) arg);
+
+    MFC_LOCK();
+
+    for (i = 0; i < mfchashsize; i++) {
+	struct mfc *rt, *nrt;
+
+	if (V_nexpire[i] == 0)
+	    continue;
+
+	for (rt = LIST_FIRST(&V_mfchashtbl[i]); rt; rt = nrt) {
+		nrt = LIST_NEXT(rt, mfc_hash);
+
+		if (TAILQ_EMPTY(&rt->mfc_stall))
+			continue;
+
+		if (rt->mfc_expire == 0 || --rt->mfc_expire > 0)
+			continue;
+
+		/*
+		 * free the bw_meter entries
+		 */
+		while (rt->mfc_bw_meter != NULL) {
+		    struct bw_meter *x = rt->mfc_bw_meter;
+
+		    rt->mfc_bw_meter = x->bm_mfc_next;
+		    free(x, M_BWMETER);
+		}
+
+		MRTSTAT_INC(mrts_cache_cleanups);
+		CTR3(KTR_IPMF, "%s: expire (%lx, %lx)", __func__,
+		    (u_long)ntohl(rt->mfc_origin.s_addr),
+		    (u_long)ntohl(rt->mfc_mcastgrp.s_addr));
+
+		expire_mfc(rt);
+	    }
+    }
+
+    MFC_UNLOCK();
+
+    callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls,
+	curvnet);
+
+    CURVNET_RESTORE();
+}
+
+/*
+ * Packet forwarding routine once entry in the cache is made
+ */
+static int
+ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif)
+{
+    struct ip  *ip = mtod(m, struct ip *);
+    vifi_t vifi;
+    int plen = ip->ip_len;
+
+    VIF_LOCK_ASSERT();
+
+    /*
+     * If xmt_vif is not -1, send on only the requested vif.
+     *
+     * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.)
+     */
+    if (xmt_vif < V_numvifs) {
+	if (V_viftable[xmt_vif].v_flags & VIFF_REGISTER)
+		pim_register_send(ip, V_viftable + xmt_vif, m, rt);
+	else
+		phyint_send(ip, V_viftable + xmt_vif, m);
+	return 1;
+    }
+
+    /*
+     * Don't forward if it didn't arrive from the parent vif for its origin.
+     */
+    vifi = rt->mfc_parent;
+    if ((vifi >= V_numvifs) || (V_viftable[vifi].v_ifp != ifp)) {
+	CTR4(KTR_IPMF, "%s: rx on wrong ifp %p (vifi %d, v_ifp %p)",
+	    __func__, ifp, (int)vifi, V_viftable[vifi].v_ifp);
+	MRTSTAT_INC(mrts_wrong_if);
+	++rt->mfc_wrong_if;
+	/*
+	 * If we are doing PIM assert processing, send a message
+	 * to the routing daemon.
+	 *
+	 * XXX: A PIM-SM router needs the WRONGVIF detection so it
+	 * can complete the SPT switch, regardless of the type
+	 * of the iif (broadcast media, GRE tunnel, etc).
+	 */
+	if (V_pim_assert_enabled && (vifi < V_numvifs) &&
+	    V_viftable[vifi].v_ifp) {
+
+	    if (ifp == &V_multicast_register_if)
+		PIMSTAT_INC(pims_rcv_registers_wrongiif);
+
+	    /* Get vifi for the incoming packet */
+	    for (vifi = 0; vifi < V_numvifs && V_viftable[vifi].v_ifp != ifp;
+		vifi++)
+		;
+	    if (vifi >= V_numvifs)
+		return 0;	/* The iif is not found: ignore the packet. */
+
+	    if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_DISABLE_WRONGVIF)
+		return 0;	/* WRONGVIF disabled: ignore the packet */
+
+	    if (ratecheck(&rt->mfc_last_assert, &pim_assert_interval)) {
+		struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
+		struct igmpmsg *im;
+		int hlen = ip->ip_hl << 2;
+		struct mbuf *mm = m_copy(m, 0, hlen);
+
+		if (mm && (M_HASCL(mm) || mm->m_len < hlen))
+		    mm = m_pullup(mm, hlen);
+		if (mm == NULL)
+		    return ENOBUFS;
+
+		im = mtod(mm, struct igmpmsg *);
+		im->im_msgtype	= IGMPMSG_WRONGVIF;
+		im->im_mbz		= 0;
+		im->im_vif		= vifi;
+
+		MRTSTAT_INC(mrts_upcalls);
+
+		k_igmpsrc.sin_addr = im->im_src;
+		if (socket_send(V_ip_mrouter, mm, &k_igmpsrc) < 0) {
+		    CTR1(KTR_IPMF, "%s: socket queue full", __func__);
+		    MRTSTAT_INC(mrts_upq_sockfull);
+		    return ENOBUFS;
+		}
+	    }
+	}
+	return 0;
+    }
+
+
+    /* If I sourced this packet, it counts as output, else it was input. */
+    if (in_hosteq(ip->ip_src, V_viftable[vifi].v_lcl_addr)) {
+	V_viftable[vifi].v_pkt_out++;
+	V_viftable[vifi].v_bytes_out += plen;
+    } else {
+	V_viftable[vifi].v_pkt_in++;
+	V_viftable[vifi].v_bytes_in += plen;
+    }
+    rt->mfc_pkt_cnt++;
+    rt->mfc_byte_cnt += plen;
+
+    /*
+     * For each vif, decide if a copy of the packet should be forwarded.
+     * Forward if:
+     *		- the ttl exceeds the vif's threshold
+     *		- there are group members downstream on interface
+     */
+    for (vifi = 0; vifi < V_numvifs; vifi++)
+	if ((rt->mfc_ttls[vifi] > 0) && (ip->ip_ttl > rt->mfc_ttls[vifi])) {
+	    V_viftable[vifi].v_pkt_out++;
+	    V_viftable[vifi].v_bytes_out += plen;
+	    if (V_viftable[vifi].v_flags & VIFF_REGISTER)
+		pim_register_send(ip, V_viftable + vifi, m, rt);
+	    else
+		phyint_send(ip, V_viftable + vifi, m);
+	}
+
+    /*
+     * Perform upcall-related bw measuring.
+     */
+    if (rt->mfc_bw_meter != NULL) {
+	struct bw_meter *x;
+	struct timeval now;
+
+	microtime(&now);
+	MFC_LOCK_ASSERT();
+	for (x = rt->mfc_bw_meter; x != NULL; x = x->bm_mfc_next)
+	    bw_meter_receive_packet(x, plen, &now);
+    }
+
+    return 0;
+}
+
+/*
+ * Check if a vif number is legal/ok. This is used by in_mcast.c.
+ */
+static int
+X_legal_vif_num(int vif)
+{
+	int ret;
+
+	ret = 0;
+	if (vif < 0)
+		return (ret);
+
+	VIF_LOCK();
+	if (vif < V_numvifs)
+		ret = 1;
+	VIF_UNLOCK();
+
+	return (ret);
+}
+
+/*
+ * Return the local address used by this vif
+ */
+static u_long
+X_ip_mcast_src(int vifi)
+{
+	in_addr_t addr;
+
+	addr = INADDR_ANY;
+	if (vifi < 0)
+		return (addr);
+
+	VIF_LOCK();
+	if (vifi < V_numvifs)
+		addr = V_viftable[vifi].v_lcl_addr.s_addr;
+	VIF_UNLOCK();
+
+	return (addr);
+}
+
+static void
+phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m)
+{
+    struct mbuf *mb_copy;
+    int hlen = ip->ip_hl << 2;
+
+    VIF_LOCK_ASSERT();
+
+    /*
+     * Make a new reference to the packet; make sure that
+     * the IP header is actually copied, not just referenced,
+     * so that ip_output() only scribbles on the copy.
+     */
+    mb_copy = m_copypacket(m, M_DONTWAIT);
+    if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen))
+	mb_copy = m_pullup(mb_copy, hlen);
+    if (mb_copy == NULL)
+	return;
+
+    send_packet(vifp, mb_copy);
+}
+
+static void
+send_packet(struct vif *vifp, struct mbuf *m)
+{
+	struct ip_moptions imo;
+	struct in_multi *imm[2];
+	int error;
+
+	VIF_LOCK_ASSERT();
+
+	imo.imo_multicast_ifp  = vifp->v_ifp;
+	imo.imo_multicast_ttl  = mtod(m, struct ip *)->ip_ttl - 1;
+	imo.imo_multicast_loop = 1;
+	imo.imo_multicast_vif  = -1;
+	imo.imo_num_memberships = 0;
+	imo.imo_max_memberships = 2;
+	imo.imo_membership  = &imm[0];
+
+	/*
+	 * Re-entrancy should not be a problem here, because
+	 * the packets that we send out and are looped back at us
+	 * should get rejected because they appear to come from
+	 * the loopback interface, thus preventing looping.
+	 */
+	error = ip_output(m, NULL, &vifp->v_route, IP_FORWARDING, &imo, NULL);
+	CTR3(KTR_IPMF, "%s: vif %td err %d", __func__,
+	    (ptrdiff_t)(vifp - V_viftable), error);
+}
+
+/*
+ * Stubs for old RSVP socket shim implementation.
+ */
+
+static int
+X_ip_rsvp_vif(struct socket *so __unused, struct sockopt *sopt __unused)
+{
+
+	return (EOPNOTSUPP);
+}
+
+static void
+X_ip_rsvp_force_done(struct socket *so __unused)
+{
+
+}
+
+static void
+X_rsvp_input(struct mbuf *m, int off __unused)
+{
+
+	if (!V_rsvp_on)
+		m_freem(m);
+}
+
+/*
+ * Code for bandwidth monitors
+ */
+
+/*
+ * Define common interface for timeval-related methods
+ */
+#define	BW_TIMEVALCMP(tvp, uvp, cmp) timevalcmp((tvp), (uvp), cmp)
+#define	BW_TIMEVALDECR(vvp, uvp) timevalsub((vvp), (uvp))
+#define	BW_TIMEVALADD(vvp, uvp) timevaladd((vvp), (uvp))
+
+static uint32_t
+compute_bw_meter_flags(struct bw_upcall *req)
+{
+    uint32_t flags = 0;
+
+    if (req->bu_flags & BW_UPCALL_UNIT_PACKETS)
+	flags |= BW_METER_UNIT_PACKETS;
+    if (req->bu_flags & BW_UPCALL_UNIT_BYTES)
+	flags |= BW_METER_UNIT_BYTES;
+    if (req->bu_flags & BW_UPCALL_GEQ)
+	flags |= BW_METER_GEQ;
+    if (req->bu_flags & BW_UPCALL_LEQ)
+	flags |= BW_METER_LEQ;
+
+    return flags;
+}
+
+/*
+ * Add a bw_meter entry
+ */
+static int
+add_bw_upcall(struct bw_upcall *req)
+{
+    struct mfc *mfc;
+    struct timeval delta = { BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC,
+		BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC };
+    struct timeval now;
+    struct bw_meter *x;
+    uint32_t flags;
+
+    if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL))
+	return EOPNOTSUPP;
+
+    /* Test if the flags are valid */
+    if (!(req->bu_flags & (BW_UPCALL_UNIT_PACKETS | BW_UPCALL_UNIT_BYTES)))
+	return EINVAL;
+    if (!(req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ)))
+	return EINVAL;
+    if ((req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ))
+	    == (BW_UPCALL_GEQ | BW_UPCALL_LEQ))
+	return EINVAL;
+
+    /* Test if the threshold time interval is valid */
+    if (BW_TIMEVALCMP(&req->bu_threshold.b_time, &delta, <))
+	return EINVAL;
+
+    flags = compute_bw_meter_flags(req);
+
+    /*
+     * Find if we have already same bw_meter entry
+     */
+    MFC_LOCK();
+    mfc = mfc_find(&req->bu_src, &req->bu_dst);
+    if (mfc == NULL) {
+	MFC_UNLOCK();
+	return EADDRNOTAVAIL;
+    }
+    for (x = mfc->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) {
+	if ((BW_TIMEVALCMP(&x->bm_threshold.b_time,
+			   &req->bu_threshold.b_time, ==)) &&
+	    (x->bm_threshold.b_packets == req->bu_threshold.b_packets) &&
+	    (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) &&
+	    (x->bm_flags & BW_METER_USER_FLAGS) == flags)  {
+	    MFC_UNLOCK();
+	    return 0;		/* XXX Already installed */
+	}
+    }
+
+    /* Allocate the new bw_meter entry */
+    x = (struct bw_meter *)malloc(sizeof(*x), M_BWMETER, M_NOWAIT);
+    if (x == NULL) {
+	MFC_UNLOCK();
+	return ENOBUFS;
+    }
+
+    /* Set the new bw_meter entry */
+    x->bm_threshold.b_time = req->bu_threshold.b_time;
+    microtime(&now);
+    x->bm_start_time = now;
+    x->bm_threshold.b_packets = req->bu_threshold.b_packets;
+    x->bm_threshold.b_bytes = req->bu_threshold.b_bytes;
+    x->bm_measured.b_packets = 0;
+    x->bm_measured.b_bytes = 0;
+    x->bm_flags = flags;
+    x->bm_time_next = NULL;
+    x->bm_time_hash = BW_METER_BUCKETS;
+
+    /* Add the new bw_meter entry to the front of entries for this MFC */
+    x->bm_mfc = mfc;
+    x->bm_mfc_next = mfc->mfc_bw_meter;
+    mfc->mfc_bw_meter = x;
+    schedule_bw_meter(x, &now);
+    MFC_UNLOCK();
+
+    return 0;
+}
+
+static void
+free_bw_list(struct bw_meter *list)
+{
+    while (list != NULL) {
+	struct bw_meter *x = list;
+
+	list = list->bm_mfc_next;
+	unschedule_bw_meter(x);
+	free(x, M_BWMETER);
+    }
+}
+
+/*
+ * Delete one or multiple bw_meter entries
+ */
+static int
+del_bw_upcall(struct bw_upcall *req)
+{
+    struct mfc *mfc;
+    struct bw_meter *x;
+
+    if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL))
+	return EOPNOTSUPP;
+
+    MFC_LOCK();
+
+    /* Find the corresponding MFC entry */
+    mfc = mfc_find(&req->bu_src, &req->bu_dst);
+    if (mfc == NULL) {
+	MFC_UNLOCK();
+	return EADDRNOTAVAIL;
+    } else if (req->bu_flags & BW_UPCALL_DELETE_ALL) {
+	/*
+	 * Delete all bw_meter entries for this mfc
+	 */
+	struct bw_meter *list;
+
+	list = mfc->mfc_bw_meter;
+	mfc->mfc_bw_meter = NULL;
+	free_bw_list(list);
+	MFC_UNLOCK();
+	return 0;
+    } else {			/* Delete a single bw_meter entry */
+	struct bw_meter *prev;
+	uint32_t flags = 0;
+
+	flags = compute_bw_meter_flags(req);
+
+	/* Find the bw_meter entry to delete */
+	for (prev = NULL, x = mfc->mfc_bw_meter; x != NULL;
+	     prev = x, x = x->bm_mfc_next) {
+	    if ((BW_TIMEVALCMP(&x->bm_threshold.b_time,
+			       &req->bu_threshold.b_time, ==)) &&
+		(x->bm_threshold.b_packets == req->bu_threshold.b_packets) &&
+		(x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) &&
+		(x->bm_flags & BW_METER_USER_FLAGS) == flags)
+		break;
+	}
+	if (x != NULL) { /* Delete entry from the list for this MFC */
+	    if (prev != NULL)
+		prev->bm_mfc_next = x->bm_mfc_next;	/* remove from middle*/
+	    else
+		x->bm_mfc->mfc_bw_meter = x->bm_mfc_next;/* new head of list */
+
+	    unschedule_bw_meter(x);
+	    MFC_UNLOCK();
+	    /* Free the bw_meter entry */
+	    free(x, M_BWMETER);
+	    return 0;
+	} else {
+	    MFC_UNLOCK();
+	    return EINVAL;
+	}
+    }
+    /* NOTREACHED */
+}
+
+/*
+ * Perform bandwidth measurement processing that may result in an upcall
+ */
+static void
+bw_meter_receive_packet(struct bw_meter *x, int plen, struct timeval *nowp)
+{
+    struct timeval delta;
+
+    MFC_LOCK_ASSERT();
+
+    delta = *nowp;
+    BW_TIMEVALDECR(&delta, &x->bm_start_time);
+
+    if (x->bm_flags & BW_METER_GEQ) {
+	/*
+	 * Processing for ">=" type of bw_meter entry
+	 */
+	if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) {
+	    /* Reset the bw_meter entry */
+	    x->bm_start_time = *nowp;
+	    x->bm_measured.b_packets = 0;
+	    x->bm_measured.b_bytes = 0;
+	    x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
+	}
+
+	/* Record that a packet is received */
+	x->bm_measured.b_packets++;
+	x->bm_measured.b_bytes += plen;
+
+	/*
+	 * Test if we should deliver an upcall
+	 */
+	if (!(x->bm_flags & BW_METER_UPCALL_DELIVERED)) {
+	    if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
+		 (x->bm_measured.b_packets >= x->bm_threshold.b_packets)) ||
+		((x->bm_flags & BW_METER_UNIT_BYTES) &&
+		 (x->bm_measured.b_bytes >= x->bm_threshold.b_bytes))) {
+		/* Prepare an upcall for delivery */
+		bw_meter_prepare_upcall(x, nowp);
+		x->bm_flags |= BW_METER_UPCALL_DELIVERED;
+	    }
+	}
+    } else if (x->bm_flags & BW_METER_LEQ) {
+	/*
+	 * Processing for "<=" type of bw_meter entry
+	 */
+	if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) {
+	    /*
+	     * We are behind time with the multicast forwarding table
+	     * scanning for "<=" type of bw_meter entries, so test now
+	     * if we should deliver an upcall.
+	     */
+	    if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
+		 (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) ||
+		((x->bm_flags & BW_METER_UNIT_BYTES) &&
+		 (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) {
+		/* Prepare an upcall for delivery */
+		bw_meter_prepare_upcall(x, nowp);
+	    }
+	    /* Reschedule the bw_meter entry */
+	    unschedule_bw_meter(x);
+	    schedule_bw_meter(x, nowp);
+	}
+
+	/* Record that a packet is received */
+	x->bm_measured.b_packets++;
+	x->bm_measured.b_bytes += plen;
+
+	/*
+	 * Test if we should restart the measuring interval
+	 */
+	if ((x->bm_flags & BW_METER_UNIT_PACKETS &&
+	     x->bm_measured.b_packets <= x->bm_threshold.b_packets) ||
+	    (x->bm_flags & BW_METER_UNIT_BYTES &&
+	     x->bm_measured.b_bytes <= x->bm_threshold.b_bytes)) {
+	    /* Don't restart the measuring interval */
+	} else {
+	    /* Do restart the measuring interval */
+	    /*
+	     * XXX: note that we don't unschedule and schedule, because this
+	     * might be too much overhead per packet. Instead, when we process
+	     * all entries for a given timer hash bin, we check whether it is
+	     * really a timeout. If not, we reschedule at that time.
+	     */
+	    x->bm_start_time = *nowp;
+	    x->bm_measured.b_packets = 0;
+	    x->bm_measured.b_bytes = 0;
+	    x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
+	}
+    }
+}
+
+/*
+ * Prepare a bandwidth-related upcall
+ */
+static void
+bw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp)
+{
+    struct timeval delta;
+    struct bw_upcall *u;
+
+    MFC_LOCK_ASSERT();
+
+    /*
+     * Compute the measured time interval
+     */
+    delta = *nowp;
+    BW_TIMEVALDECR(&delta, &x->bm_start_time);
+
+    /*
+     * If there are too many pending upcalls, deliver them now
+     */
+    if (V_bw_upcalls_n >= BW_UPCALLS_MAX)
+	bw_upcalls_send();
+
+    /*
+     * Set the bw_upcall entry
+     */
+    u = &V_bw_upcalls[V_bw_upcalls_n++];
+    u->bu_src = x->bm_mfc->mfc_origin;
+    u->bu_dst = x->bm_mfc->mfc_mcastgrp;
+    u->bu_threshold.b_time = x->bm_threshold.b_time;
+    u->bu_threshold.b_packets = x->bm_threshold.b_packets;
+    u->bu_threshold.b_bytes = x->bm_threshold.b_bytes;
+    u->bu_measured.b_time = delta;
+    u->bu_measured.b_packets = x->bm_measured.b_packets;
+    u->bu_measured.b_bytes = x->bm_measured.b_bytes;
+    u->bu_flags = 0;
+    if (x->bm_flags & BW_METER_UNIT_PACKETS)
+	u->bu_flags |= BW_UPCALL_UNIT_PACKETS;
+    if (x->bm_flags & BW_METER_UNIT_BYTES)
+	u->bu_flags |= BW_UPCALL_UNIT_BYTES;
+    if (x->bm_flags & BW_METER_GEQ)
+	u->bu_flags |= BW_UPCALL_GEQ;
+    if (x->bm_flags & BW_METER_LEQ)
+	u->bu_flags |= BW_UPCALL_LEQ;
+}
+
+/*
+ * Send the pending bandwidth-related upcalls
+ */
+static void
+bw_upcalls_send(void)
+{
+    struct mbuf *m;
+    int len = V_bw_upcalls_n * sizeof(V_bw_upcalls[0]);
+    struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
+    static struct igmpmsg igmpmsg = { 0,		/* unused1 */
+				      0,		/* unused2 */
+				      IGMPMSG_BW_UPCALL,/* im_msgtype */
+				      0,		/* im_mbz  */
+				      0,		/* im_vif  */
+				      0,		/* unused3 */
+				      { 0 },		/* im_src  */
+				      { 0 } };		/* im_dst  */
+
+    MFC_LOCK_ASSERT();
+
+    if (V_bw_upcalls_n == 0)
+	return;			/* No pending upcalls */
+
+    V_bw_upcalls_n = 0;
+
+    /*
+     * Allocate a new mbuf, initialize it with the header and
+     * the payload for the pending calls.
+     */
+    MGETHDR(m, M_DONTWAIT, MT_DATA);
+    if (m == NULL) {
+	log(LOG_WARNING, "bw_upcalls_send: cannot allocate mbuf\n");
+	return;
+    }
+
+    m->m_len = m->m_pkthdr.len = 0;
+    m_copyback(m, 0, sizeof(struct igmpmsg), (caddr_t)&igmpmsg);
+    m_copyback(m, sizeof(struct igmpmsg), len, (caddr_t)&V_bw_upcalls[0]);
+
+    /*
+     * Send the upcalls
+     * XXX do we need to set the address in k_igmpsrc ?
+     */
+    MRTSTAT_INC(mrts_upcalls);
+    if (socket_send(V_ip_mrouter, m, &k_igmpsrc) < 0) {
+	log(LOG_WARNING, "bw_upcalls_send: ip_mrouter socket queue full\n");
+	MRTSTAT_INC(mrts_upq_sockfull);
+    }
+}
+
+/*
+ * Compute the timeout hash value for the bw_meter entries
+ */
+#define	BW_METER_TIMEHASH(bw_meter, hash)				\
+    do {								\
+	struct timeval next_timeval = (bw_meter)->bm_start_time;	\
+									\
+	BW_TIMEVALADD(&next_timeval, &(bw_meter)->bm_threshold.b_time); \
+	(hash) = next_timeval.tv_sec;					\
+	if (next_timeval.tv_usec)					\
+	    (hash)++; /* XXX: make sure we don't timeout early */	\
+	(hash) %= BW_METER_BUCKETS;					\
+    } while (0)
+
+/*
+ * Schedule a timer to process periodically bw_meter entry of type "<="
+ * by linking the entry in the proper hash bucket.
+ */
+static void
+schedule_bw_meter(struct bw_meter *x, struct timeval *nowp)
+{
+    int time_hash;
+
+    MFC_LOCK_ASSERT();
+
+    if (!(x->bm_flags & BW_METER_LEQ))
+	return;		/* XXX: we schedule timers only for "<=" entries */
+
+    /*
+     * Reset the bw_meter entry
+     */
+    x->bm_start_time = *nowp;
+    x->bm_measured.b_packets = 0;
+    x->bm_measured.b_bytes = 0;
+    x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
+
+    /*
+     * Compute the timeout hash value and insert the entry
+     */
+    BW_METER_TIMEHASH(x, time_hash);
+    x->bm_time_next = V_bw_meter_timers[time_hash];
+    V_bw_meter_timers[time_hash] = x;
+    x->bm_time_hash = time_hash;
+}
+
+/*
+ * Unschedule the periodic timer that processes bw_meter entry of type "<="
+ * by removing the entry from the proper hash bucket.
+ */
+static void
+unschedule_bw_meter(struct bw_meter *x)
+{
+    int time_hash;
+    struct bw_meter *prev, *tmp;
+
+    MFC_LOCK_ASSERT();
+
+    if (!(x->bm_flags & BW_METER_LEQ))
+	return;		/* XXX: we schedule timers only for "<=" entries */
+
+    /*
+     * Compute the timeout hash value and delete the entry
+     */
+    time_hash = x->bm_time_hash;
+    if (time_hash >= BW_METER_BUCKETS)
+	return;		/* Entry was not scheduled */
+
+    for (prev = NULL, tmp = V_bw_meter_timers[time_hash];
+	     tmp != NULL; prev = tmp, tmp = tmp->bm_time_next)
+	if (tmp == x)
+	    break;
+
+    if (tmp == NULL)
+	panic("unschedule_bw_meter: bw_meter entry not found");
+
+    if (prev != NULL)
+	prev->bm_time_next = x->bm_time_next;
+    else
+	V_bw_meter_timers[time_hash] = x->bm_time_next;
+
+    x->bm_time_next = NULL;
+    x->bm_time_hash = BW_METER_BUCKETS;
+}
+
+
+/*
+ * Process all "<=" type of bw_meter that should be processed now,
+ * and for each entry prepare an upcall if necessary. Each processed
+ * entry is rescheduled again for the (periodic) processing.
+ *
+ * This is run periodically (once per second normally). On each round,
+ * all the potentially matching entries are in the hash slot that we are
+ * looking at.
+ */
+static void
+bw_meter_process()
+{
+    uint32_t loops;
+    int i;
+    struct timeval now, process_endtime;
+
+    microtime(&now);
+    if (V_last_tv_sec == now.tv_sec)
+	return;		/* nothing to do */
+
+    loops = now.tv_sec - V_last_tv_sec;
+    V_last_tv_sec = now.tv_sec;
+    if (loops > BW_METER_BUCKETS)
+	loops = BW_METER_BUCKETS;
+
+    MFC_LOCK();
+    /*
+     * Process all bins of bw_meter entries from the one after the last
+     * processed to the current one. On entry, i points to the last bucket
+     * visited, so we need to increment i at the beginning of the loop.
+     */
+    for (i = (now.tv_sec - loops) % BW_METER_BUCKETS; loops > 0; loops--) {
+	struct bw_meter *x, *tmp_list;
+
+	if (++i >= BW_METER_BUCKETS)
+	    i = 0;
+
+	/* Disconnect the list of bw_meter entries from the bin */
+	tmp_list = V_bw_meter_timers[i];
+	V_bw_meter_timers[i] = NULL;
+
+	/* Process the list of bw_meter entries */
+	while (tmp_list != NULL) {
+	    x = tmp_list;
+	    tmp_list = tmp_list->bm_time_next;
+
+	    /* Test if the time interval is over */
+	    process_endtime = x->bm_start_time;
+	    BW_TIMEVALADD(&process_endtime, &x->bm_threshold.b_time);
+	    if (BW_TIMEVALCMP(&process_endtime, &now, >)) {
+		/* Not yet: reschedule, but don't reset */
+		int time_hash;
+
+		BW_METER_TIMEHASH(x, time_hash);
+		if (time_hash == i && process_endtime.tv_sec == now.tv_sec) {
+		    /*
+		     * XXX: somehow the bin processing is a bit ahead of time.
+		     * Put the entry in the next bin.
+		     */
+		    if (++time_hash >= BW_METER_BUCKETS)
+			time_hash = 0;
+		}
+		x->bm_time_next = V_bw_meter_timers[time_hash];
+		V_bw_meter_timers[time_hash] = x;
+		x->bm_time_hash = time_hash;
+
+		continue;
+	    }
+
+	    /*
+	     * Test if we should deliver an upcall
+	     */
+	    if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
+		 (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) ||
+		((x->bm_flags & BW_METER_UNIT_BYTES) &&
+		 (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) {
+		/* Prepare an upcall for delivery */
+		bw_meter_prepare_upcall(x, &now);
+	    }
+
+	    /*
+	     * Reschedule for next processing
+	     */
+	    schedule_bw_meter(x, &now);
+	}
+    }
+
+    /* Send all upcalls that are pending delivery */
+    bw_upcalls_send();
+
+    MFC_UNLOCK();
+}
+
+/*
+ * A periodic function for sending all upcalls that are pending delivery
+ */
+static void
+expire_bw_upcalls_send(void *arg)
+{
+    CURVNET_SET((struct vnet *) arg);
+
+    MFC_LOCK();
+    bw_upcalls_send();
+    MFC_UNLOCK();
+
+    callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send,
+	curvnet);
+    CURVNET_RESTORE();
+}
+
+/*
+ * A periodic function for periodic scanning of the multicast forwarding
+ * table for processing all "<=" bw_meter entries.
+ */
+static void
+expire_bw_meter_process(void *arg)
+{
+    CURVNET_SET((struct vnet *) arg);
+
+    if (V_mrt_api_config & MRT_MFC_BW_UPCALL)
+	bw_meter_process();
+
+    callout_reset(&V_bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process,
+	curvnet);
+    CURVNET_RESTORE();
+}
+
+/*
+ * End of bandwidth monitoring code
+ */
+
+/*
+ * Send the packet up to the user daemon, or eventually do kernel encapsulation
+ *
+ */
+static int
+pim_register_send(struct ip *ip, struct vif *vifp, struct mbuf *m,
+    struct mfc *rt)
+{
+    struct mbuf *mb_copy, *mm;
+
+    /*
+     * Do not send IGMP_WHOLEPKT notifications to userland, if the
+     * rendezvous point was unspecified, and we were told not to.
+     */
+    if (pim_squelch_wholepkt != 0 && (V_mrt_api_config & MRT_MFC_RP) &&
+	in_nullhost(rt->mfc_rp))
+	return 0;
+
+    mb_copy = pim_register_prepare(ip, m);
+    if (mb_copy == NULL)
+	return ENOBUFS;
+
+    /*
+     * Send all the fragments. Note that the mbuf for each fragment
+     * is freed by the sending machinery.
+     */
+    for (mm = mb_copy; mm; mm = mb_copy) {
+	mb_copy = mm->m_nextpkt;
+	mm->m_nextpkt = 0;
+	mm = m_pullup(mm, sizeof(struct ip));
+	if (mm != NULL) {
+	    ip = mtod(mm, struct ip *);
+	    if ((V_mrt_api_config & MRT_MFC_RP) && !in_nullhost(rt->mfc_rp)) {
+		pim_register_send_rp(ip, vifp, mm, rt);
+	    } else {
+		pim_register_send_upcall(ip, vifp, mm, rt);
+	    }
+	}
+    }
+
+    return 0;
+}
+
+/*
+ * Return a copy of the data packet that is ready for PIM Register
+ * encapsulation.
+ * XXX: Note that in the returned copy the IP header is a valid one.
+ */
+static struct mbuf *
+pim_register_prepare(struct ip *ip, struct mbuf *m)
+{
+    struct mbuf *mb_copy = NULL;
+    int mtu;
+
+    /* Take care of delayed checksums */
+    if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+	in_delayed_cksum(m);
+	m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+    }
+
+    /*
+     * Copy the old packet & pullup its IP header into the
+     * new mbuf so we can modify it.
+     */
+    mb_copy = m_copypacket(m, M_DONTWAIT);
+    if (mb_copy == NULL)
+	return NULL;
+    mb_copy = m_pullup(mb_copy, ip->ip_hl << 2);
+    if (mb_copy == NULL)
+	return NULL;
+
+    /* take care of the TTL */
+    ip = mtod(mb_copy, struct ip *);
+    --ip->ip_ttl;
+
+    /* Compute the MTU after the PIM Register encapsulation */
+    mtu = 0xffff - sizeof(pim_encap_iphdr) - sizeof(pim_encap_pimhdr);
+
+    if (ip->ip_len <= mtu) {
+	/* Turn the IP header into a valid one */
+	ip->ip_len = htons(ip->ip_len);
+	ip->ip_off = htons(ip->ip_off);
+	ip->ip_sum = 0;
+	ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
+    } else {
+	/* Fragment the packet */
+	if (ip_fragment(ip, &mb_copy, mtu, 0, CSUM_DELAY_IP) != 0) {
+	    m_freem(mb_copy);
+	    return NULL;
+	}
+    }
+    return mb_copy;
+}
+
+/*
+ * Send an upcall with the data packet to the user-level process.
+ */
+static int
+pim_register_send_upcall(struct ip *ip, struct vif *vifp,
+    struct mbuf *mb_copy, struct mfc *rt)
+{
+    struct mbuf *mb_first;
+    int len = ntohs(ip->ip_len);
+    struct igmpmsg *im;
+    struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
+
+    VIF_LOCK_ASSERT();
+
+    /*
+     * Add a new mbuf with an upcall header
+     */
+    MGETHDR(mb_first, M_DONTWAIT, MT_DATA);
+    if (mb_first == NULL) {
+	m_freem(mb_copy);
+	return ENOBUFS;
+    }
+    mb_first->m_data += max_linkhdr;
+    mb_first->m_pkthdr.len = len + sizeof(struct igmpmsg);
+    mb_first->m_len = sizeof(struct igmpmsg);
+    mb_first->m_next = mb_copy;
+
+    /* Send message to routing daemon */
+    im = mtod(mb_first, struct igmpmsg *);
+    im->im_msgtype	= IGMPMSG_WHOLEPKT;
+    im->im_mbz		= 0;
+    im->im_vif		= vifp - V_viftable;
+    im->im_src		= ip->ip_src;
+    im->im_dst		= ip->ip_dst;
+
+    k_igmpsrc.sin_addr	= ip->ip_src;
+
+    MRTSTAT_INC(mrts_upcalls);
+
+    if (socket_send(V_ip_mrouter, mb_first, &k_igmpsrc) < 0) {
+	CTR1(KTR_IPMF, "%s: socket queue full", __func__);
+	MRTSTAT_INC(mrts_upq_sockfull);
+	return ENOBUFS;
+    }
+
+    /* Keep statistics */
+    PIMSTAT_INC(pims_snd_registers_msgs);
+    PIMSTAT_ADD(pims_snd_registers_bytes, len);
+
+    return 0;
+}
+
+/*
+ * Encapsulate the data packet in PIM Register message and send it to the RP.
+ */
+static int
+pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy,
+    struct mfc *rt)
+{
+    struct mbuf *mb_first;
+    struct ip *ip_outer;
+    struct pim_encap_pimhdr *pimhdr;
+    int len = ntohs(ip->ip_len);
+    vifi_t vifi = rt->mfc_parent;
+
+    VIF_LOCK_ASSERT();
+
+    if ((vifi >= V_numvifs) || in_nullhost(V_viftable[vifi].v_lcl_addr)) {
+	m_freem(mb_copy);
+	return EADDRNOTAVAIL;		/* The iif vif is invalid */
+    }
+
+    /*
+     * Add a new mbuf with the encapsulating header
+     */
+    MGETHDR(mb_first, M_DONTWAIT, MT_DATA);
+    if (mb_first == NULL) {
+	m_freem(mb_copy);
+	return ENOBUFS;
+    }
+    mb_first->m_data += max_linkhdr;
+    mb_first->m_len = sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr);
+    mb_first->m_next = mb_copy;
+
+    mb_first->m_pkthdr.len = len + mb_first->m_len;
+
+    /*
+     * Fill in the encapsulating IP and PIM header
+     */
+    ip_outer = mtod(mb_first, struct ip *);
+    *ip_outer = pim_encap_iphdr;
+    ip_outer->ip_id = ip_newid();
+    ip_outer->ip_len = len + sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr);
+    ip_outer->ip_src = V_viftable[vifi].v_lcl_addr;
+    ip_outer->ip_dst = rt->mfc_rp;
+    /*
+     * Copy the inner header TOS to the outer header, and take care of the
+     * IP_DF bit.
+     */
+    ip_outer->ip_tos = ip->ip_tos;
+    if (ntohs(ip->ip_off) & IP_DF)
+	ip_outer->ip_off |= IP_DF;
+    pimhdr = (struct pim_encap_pimhdr *)((caddr_t)ip_outer
+					 + sizeof(pim_encap_iphdr));
+    *pimhdr = pim_encap_pimhdr;
+    /* If the iif crosses a border, set the Border-bit */
+    if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_BORDER_VIF & V_mrt_api_config)
+	pimhdr->flags |= htonl(PIM_BORDER_REGISTER);
+
+    mb_first->m_data += sizeof(pim_encap_iphdr);
+    pimhdr->pim.pim_cksum = in_cksum(mb_first, sizeof(pim_encap_pimhdr));
+    mb_first->m_data -= sizeof(pim_encap_iphdr);
+
+    send_packet(vifp, mb_first);
+
+    /* Keep statistics */
+    PIMSTAT_INC(pims_snd_registers_msgs);
+    PIMSTAT_ADD(pims_snd_registers_bytes, len);
+
+    return 0;
+}
+
+/*
+ * pim_encapcheck() is called by the encap4_input() path at runtime to
+ * determine if a packet is for PIM; allowing PIM to be dynamically loaded
+ * into the kernel.
+ */
+static int
+pim_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+{
+
+#ifdef DIAGNOSTIC
+    KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM"));
+#endif
+    if (proto != IPPROTO_PIM)
+	return 0;	/* not for us; reject the datagram. */
+
+    return 64;		/* claim the datagram. */
+}
+
+/*
+ * PIM-SMv2 and PIM-DM messages processing.
+ * Receives and verifies the PIM control messages, and passes them
+ * up to the listening socket, using rip_input().
+ * The only message with special processing is the PIM_REGISTER message
+ * (used by PIM-SM): the PIM header is stripped off, and the inner packet
+ * is passed to if_simloop().
+ */
+void
+pim_input(struct mbuf *m, int off)
+{
+    struct ip *ip = mtod(m, struct ip *);
+    struct pim *pim;
+    int minlen;
+    int datalen = ip->ip_len;
+    int ip_tos;
+    int iphlen = off;
+
+    /* Keep statistics */
+    PIMSTAT_INC(pims_rcv_total_msgs);
+    PIMSTAT_ADD(pims_rcv_total_bytes, datalen);
+
+    /*
+     * Validate lengths
+     */
+    if (datalen < PIM_MINLEN) {
+	PIMSTAT_INC(pims_rcv_tooshort);
+	CTR3(KTR_IPMF, "%s: short packet (%d) from %s",
+	    __func__, datalen, inet_ntoa(ip->ip_src));
+	m_freem(m);
+	return;
+    }
+
+    /*
+     * If the packet is at least as big as a REGISTER, go agead
+     * and grab the PIM REGISTER header size, to avoid another
+     * possible m_pullup() later.
+     *
+     * PIM_MINLEN       == pimhdr + u_int32_t == 4 + 4 = 8
+     * PIM_REG_MINLEN   == pimhdr + reghdr + encap_iphdr == 4 + 4 + 20 = 28
+     */
+    minlen = iphlen + (datalen >= PIM_REG_MINLEN ? PIM_REG_MINLEN : PIM_MINLEN);
+    /*
+     * Get the IP and PIM headers in contiguous memory, and
+     * possibly the PIM REGISTER header.
+     */
+    if ((m->m_flags & M_EXT || m->m_len < minlen) &&
+	(m = m_pullup(m, minlen)) == 0) {
+	CTR1(KTR_IPMF, "%s: m_pullup() failed", __func__);
+	return;
+    }
+
+    /* m_pullup() may have given us a new mbuf so reset ip. */
+    ip = mtod(m, struct ip *);
+    ip_tos = ip->ip_tos;
+
+    /* adjust mbuf to point to the PIM header */
+    m->m_data += iphlen;
+    m->m_len  -= iphlen;
+    pim = mtod(m, struct pim *);
+
+    /*
+     * Validate checksum. If PIM REGISTER, exclude the data packet.
+     *
+     * XXX: some older PIMv2 implementations don't make this distinction,
+     * so for compatibility reason perform the checksum over part of the
+     * message, and if error, then over the whole message.
+     */
+    if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER && in_cksum(m, PIM_MINLEN) == 0) {
+	/* do nothing, checksum okay */
+    } else if (in_cksum(m, datalen)) {
+	PIMSTAT_INC(pims_rcv_badsum);
+	CTR1(KTR_IPMF, "%s: invalid checksum", __func__);
+	m_freem(m);
+	return;
+    }
+
+    /* PIM version check */
+    if (PIM_VT_V(pim->pim_vt) < PIM_VERSION) {
+	PIMSTAT_INC(pims_rcv_badversion);
+	CTR3(KTR_IPMF, "%s: bad version %d expect %d", __func__,
+	    (int)PIM_VT_V(pim->pim_vt), PIM_VERSION);
+	m_freem(m);
+	return;
+    }
+
+    /* restore mbuf back to the outer IP */
+    m->m_data -= iphlen;
+    m->m_len  += iphlen;
+
+    if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER) {
+	/*
+	 * Since this is a REGISTER, we'll make a copy of the register
+	 * headers ip + pim + u_int32 + encap_ip, to be passed up to the
+	 * routing daemon.
+	 */
+	struct sockaddr_in dst = { sizeof(dst), AF_INET };
+	struct mbuf *mcp;
+	struct ip *encap_ip;
+	u_int32_t *reghdr;
+	struct ifnet *vifp;
+
+	VIF_LOCK();
+	if ((V_reg_vif_num >= V_numvifs) || (V_reg_vif_num == VIFI_INVALID)) {
+	    VIF_UNLOCK();
+	    CTR2(KTR_IPMF, "%s: register vif not set: %d", __func__,
+		(int)V_reg_vif_num);
+	    m_freem(m);
+	    return;
+	}
+	/* XXX need refcnt? */
+	vifp = V_viftable[V_reg_vif_num].v_ifp;
+	VIF_UNLOCK();
+
+	/*
+	 * Validate length
+	 */
+	if (datalen < PIM_REG_MINLEN) {
+	    PIMSTAT_INC(pims_rcv_tooshort);
+	    PIMSTAT_INC(pims_rcv_badregisters);
+	    CTR1(KTR_IPMF, "%s: register packet size too small", __func__);
+	    m_freem(m);
+	    return;
+	}
+
+	reghdr = (u_int32_t *)(pim + 1);
+	encap_ip = (struct ip *)(reghdr + 1);
+
+	CTR3(KTR_IPMF, "%s: register: encap ip src %s len %d",
+	    __func__, inet_ntoa(encap_ip->ip_src), ntohs(encap_ip->ip_len));
+
+	/* verify the version number of the inner packet */
+	if (encap_ip->ip_v != IPVERSION) {
+	    PIMSTAT_INC(pims_rcv_badregisters);
+	    CTR1(KTR_IPMF, "%s: bad encap ip version", __func__);
+	    m_freem(m);
+	    return;
+	}
+
+	/* verify the inner packet is destined to a mcast group */
+	if (!IN_MULTICAST(ntohl(encap_ip->ip_dst.s_addr))) {
+	    PIMSTAT_INC(pims_rcv_badregisters);
+	    CTR2(KTR_IPMF, "%s: bad encap ip dest %s", __func__,
+		inet_ntoa(encap_ip->ip_dst));
+	    m_freem(m);
+	    return;
+	}
+
+	/* If a NULL_REGISTER, pass it to the daemon */
+	if ((ntohl(*reghdr) & PIM_NULL_REGISTER))
+	    goto pim_input_to_daemon;
+
+	/*
+	 * Copy the TOS from the outer IP header to the inner IP header.
+	 */
+	if (encap_ip->ip_tos != ip_tos) {
+	    /* Outer TOS -> inner TOS */
+	    encap_ip->ip_tos = ip_tos;
+	    /* Recompute the inner header checksum. Sigh... */
+
+	    /* adjust mbuf to point to the inner IP header */
+	    m->m_data += (iphlen + PIM_MINLEN);
+	    m->m_len  -= (iphlen + PIM_MINLEN);
+
+	    encap_ip->ip_sum = 0;
+	    encap_ip->ip_sum = in_cksum(m, encap_ip->ip_hl << 2);
+
+	    /* restore mbuf to point back to the outer IP header */
+	    m->m_data -= (iphlen + PIM_MINLEN);
+	    m->m_len  += (iphlen + PIM_MINLEN);
+	}
+
+	/*
+	 * Decapsulate the inner IP packet and loopback to forward it
+	 * as a normal multicast packet. Also, make a copy of the
+	 *     outer_iphdr + pimhdr + reghdr + encap_iphdr
+	 * to pass to the daemon later, so it can take the appropriate
+	 * actions (e.g., send back PIM_REGISTER_STOP).
+	 * XXX: here m->m_data points to the outer IP header.
+	 */
+	mcp = m_copy(m, 0, iphlen + PIM_REG_MINLEN);
+	if (mcp == NULL) {
+	    CTR1(KTR_IPMF, "%s: m_copy() failed", __func__);
+	    m_freem(m);
+	    return;
+	}
+
+	/* Keep statistics */
+	/* XXX: registers_bytes include only the encap. mcast pkt */
+	PIMSTAT_INC(pims_rcv_registers_msgs);
+	PIMSTAT_ADD(pims_rcv_registers_bytes, ntohs(encap_ip->ip_len));
+
+	/*
+	 * forward the inner ip packet; point m_data at the inner ip.
+	 */
+	m_adj(m, iphlen + PIM_MINLEN);
+
+	CTR4(KTR_IPMF,
+	    "%s: forward decap'd REGISTER: src %lx dst %lx vif %d",
+	    __func__,
+	    (u_long)ntohl(encap_ip->ip_src.s_addr),
+	    (u_long)ntohl(encap_ip->ip_dst.s_addr),
+	    (int)V_reg_vif_num);
+
+	/* NB: vifp was collected above; can it change on us? */
+	if_simloop(vifp, m, dst.sin_family, 0);
+
+	/* prepare the register head to send to the mrouting daemon */
+	m = mcp;
+    }
+
+pim_input_to_daemon:
+    /*
+     * Pass the PIM message up to the daemon; if it is a Register message,
+     * pass the 'head' only up to the daemon. This includes the
+     * outer IP header, PIM header, PIM-Register header and the
+     * inner IP header.
+     * XXX: the outer IP header pkt size of a Register is not adjust to
+     * reflect the fact that the inner multicast data is truncated.
+     */
+    rip_input(m, iphlen);
+
+    return;
+}
+
+static int
+sysctl_mfctable(SYSCTL_HANDLER_ARGS)
+{
+	struct mfc	*rt;
+	int		 error, i;
+
+	if (req->newptr)
+		return (EPERM);
+	if (V_mfchashtbl == NULL)	/* XXX unlocked */
+		return (0);
+	error = sysctl_wire_old_buffer(req, 0);
+	if (error)
+		return (error);
+
+	MFC_LOCK();
+	for (i = 0; i < mfchashsize; i++) {
+		LIST_FOREACH(rt, &V_mfchashtbl[i], mfc_hash) {
+			error = SYSCTL_OUT(req, rt, sizeof(struct mfc));
+			if (error)
+				goto out_locked;
+		}
+	}
+out_locked:
+	MFC_UNLOCK();
+	return (error);
+}
+
+SYSCTL_NODE(_net_inet_ip, OID_AUTO, mfctable, CTLFLAG_RD, sysctl_mfctable,
+    "IPv4 Multicast Forwarding Table (struct *mfc[mfchashsize], "
+    "netinet/ip_mroute.h)");
+
+static void
+vnet_mroute_init(const void *unused __unused)
+{
+
+	MALLOC(V_nexpire, u_char *, mfchashsize, M_MRTABLE, M_WAITOK|M_ZERO);
+	bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers));
+	callout_init(&V_expire_upcalls_ch, CALLOUT_MPSAFE);
+	callout_init(&V_bw_upcalls_ch, CALLOUT_MPSAFE);
+	callout_init(&V_bw_meter_ch, CALLOUT_MPSAFE);
+}
+
+VNET_SYSINIT(vnet_mroute_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, vnet_mroute_init,
+	NULL);
+
+static void
+vnet_mroute_uninit(const void *unused __unused)
+{
+
+	FREE(V_nexpire, M_MRTABLE);
+	V_nexpire = NULL;
+}
+
+VNET_SYSUNINIT(vnet_mroute_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE,
+	vnet_mroute_uninit, NULL);
+
+static int
+ip_mroute_modevent(module_t mod, int type, void *unused)
+{
+
+    switch (type) {
+    case MOD_LOAD:
+	MROUTER_LOCK_INIT();
+
+	if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
+	    if_detached_event, NULL, EVENTHANDLER_PRI_ANY);
+	if (if_detach_event_tag == NULL) {
+		printf("ip_mroute: unable to ifnet_deperture_even handler\n");
+		MROUTER_LOCK_DESTROY();
+		return (EINVAL);
+	}
+
+	MFC_LOCK_INIT();
+	VIF_LOCK_INIT();
+
+	mfchashsize = MFCHASHSIZE;
+#ifndef __rtems__
+	if (TUNABLE_ULONG_FETCH("net.inet.ip.mfchashsize", &mfchashsize) &&
+	    !powerof2(mfchashsize)) {
+		printf("WARNING: %s not a power of 2; using default\n",
+		    "net.inet.ip.mfchashsize");
+		mfchashsize = MFCHASHSIZE;
+      }
+#endif
+
+	pim_squelch_wholepkt = 0;
+	TUNABLE_ULONG_FETCH("net.inet.pim.squelch_wholepkt",
+	    &pim_squelch_wholepkt);
+
+	pim_encap_cookie = encap_attach_func(AF_INET, IPPROTO_PIM,
+	    pim_encapcheck, &in_pim_protosw, NULL);
+	if (pim_encap_cookie == NULL) {
+		printf("ip_mroute: unable to attach pim encap\n");
+		VIF_LOCK_DESTROY();
+		MFC_LOCK_DESTROY();
+		MROUTER_LOCK_DESTROY();
+		return (EINVAL);
+	}
+
+	ip_mcast_src = X_ip_mcast_src;
+	ip_mforward = X_ip_mforward;
+	ip_mrouter_done = X_ip_mrouter_done;
+	ip_mrouter_get = X_ip_mrouter_get;
+	ip_mrouter_set = X_ip_mrouter_set;
+
+	ip_rsvp_force_done = X_ip_rsvp_force_done;
+	ip_rsvp_vif = X_ip_rsvp_vif;
+
+	legal_vif_num = X_legal_vif_num;
+	mrt_ioctl = X_mrt_ioctl;
+	rsvp_input_p = X_rsvp_input;
+	break;
+
+    case MOD_UNLOAD:
+	/*
+	 * Typically module unload happens after the user-level
+	 * process has shutdown the kernel services (the check
+	 * below insures someone can't just yank the module out
+	 * from under a running process).  But if the module is
+	 * just loaded and then unloaded w/o starting up a user
+	 * process we still need to cleanup.
+	 */
+	MROUTER_LOCK();
+	if (ip_mrouter_cnt != 0) {
+	    MROUTER_UNLOCK();
+	    return (EINVAL);
+	}
+	ip_mrouter_unloading = 1;
+	MROUTER_UNLOCK();
+
+	EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
+
+	if (pim_encap_cookie) {
+	    encap_detach(pim_encap_cookie);
+	    pim_encap_cookie = NULL;
+	}
+
+	ip_mcast_src = NULL;
+	ip_mforward = NULL;
+	ip_mrouter_done = NULL;
+	ip_mrouter_get = NULL;
+	ip_mrouter_set = NULL;
+
+	ip_rsvp_force_done = NULL;
+	ip_rsvp_vif = NULL;
+
+	legal_vif_num = NULL;
+	mrt_ioctl = NULL;
+	rsvp_input_p = NULL;
+
+	VIF_LOCK_DESTROY();
+	MFC_LOCK_DESTROY();
+	MROUTER_LOCK_DESTROY();
+	break;
+
+    default:
+	return EOPNOTSUPP;
+    }
+    return 0;
+}
+
+static moduledata_t ip_mroutemod = {
+    "ip_mroute",
+    ip_mroute_modevent,
+    0
+};
+
+DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PSEUDO, SI_ORDER_ANY);
diff --git a/freebsd/sys/netinet/ip_mroute.h b/freebsd/sys/netinet/ip_mroute.h
new file mode 100644
index 00000000..3bc7f52f
--- /dev/null
+++ b/freebsd/sys/netinet/ip_mroute.h
@@ -0,0 +1,359 @@
+/*-
+ * Copyright (c) 1989 Stephen Deering.
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ip_mroute.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IP_MROUTE_HH_
+#define _NETINET_IP_MROUTE_HH_
+
+/*
+ * Definitions for IP multicast forwarding.
+ *
+ * Written by David Waitzman, BBN Labs, August 1988.
+ * Modified by Steve Deering, Stanford, February 1989.
+ * Modified by Ajit Thyagarajan, PARC, August 1993.
+ * Modified by Ajit Thyagarajan, PARC, August 1994.
+ * Modified by Ahmed Helmy, SGI, June 1996.
+ * Modified by Pavlin Radoslavov, ICSI, October 2002.
+ *
+ * MROUTING Revision: 3.3.1.3
+ * and PIM-SMv2 and PIM-DM support, advanced API support,
+ * bandwidth metering and signaling.
+ */
+
+/*
+ * Multicast Routing set/getsockopt commands.
+ */
+#define	MRT_INIT	100	/* initialize forwarder */
+#define	MRT_DONE	101	/* shut down forwarder */
+#define	MRT_ADD_VIF	102	/* create virtual interface */
+#define	MRT_DEL_VIF	103	/* delete virtual interface */
+#define MRT_ADD_MFC	104	/* insert forwarding cache entry */
+#define MRT_DEL_MFC	105	/* delete forwarding cache entry */
+#define MRT_VERSION	106	/* get kernel version number */
+#define MRT_ASSERT      107     /* enable assert processing */
+#define MRT_PIM		MRT_ASSERT /* enable PIM processing */
+#define MRT_API_SUPPORT	109	/* supported MRT API */
+#define MRT_API_CONFIG	110	/* config MRT API */
+#define MRT_ADD_BW_UPCALL 111	/* create bandwidth monitor */
+#define MRT_DEL_BW_UPCALL 112	/* delete bandwidth monitor */
+
+/*
+ * Types and macros for handling bitmaps with one bit per virtual interface.
+ */
+#define	MAXVIFS 32
+typedef u_long vifbitmap_t;
+typedef u_short vifi_t;		/* type of a vif index */
+#define ALL_VIFS (vifi_t)-1
+
+#define	VIFM_SET(n, m)		((m) |= (1 << (n)))
+#define	VIFM_CLR(n, m)		((m) &= ~(1 << (n)))
+#define	VIFM_ISSET(n, m)	((m) & (1 << (n)))
+#define	VIFM_CLRALL(m)		((m) = 0x00000000)
+#define	VIFM_COPY(mfrom, mto)	((mto) = (mfrom))
+#define	VIFM_SAME(m1, m2)	((m1) == (m2))
+
+struct mfc;
+
+/*
+ * Argument structure for MRT_ADD_VIF.
+ * (MRT_DEL_VIF takes a single vifi_t argument.)
+ */
+struct vifctl {
+	vifi_t	vifc_vifi;		/* the index of the vif to be added */
+	u_char	vifc_flags;		/* VIFF_ flags defined below */
+	u_char	vifc_threshold;		/* min ttl required to forward on vif */
+	u_int	vifc_rate_limit;	/* max rate */
+	struct	in_addr vifc_lcl_addr;	/* local interface address */
+	struct	in_addr vifc_rmt_addr;	/* remote address (tunnels only) */
+};
+
+#define	VIFF_TUNNEL	0x1		/* no-op; retained for old source */
+#define VIFF_SRCRT	0x2		/* no-op; retained for old source */
+#define VIFF_REGISTER	0x4		/* used for PIM Register encap/decap */
+
+/*
+ * Argument structure for MRT_ADD_MFC and MRT_DEL_MFC
+ * XXX if you change this, make sure to change struct mfcctl2 as well.
+ */
+struct mfcctl {
+    struct in_addr  mfcc_origin;		/* ip origin of mcasts       */
+    struct in_addr  mfcc_mcastgrp;		/* multicast group associated*/
+    vifi_t	    mfcc_parent;		/* incoming vif              */
+    u_char	    mfcc_ttls[MAXVIFS];		/* forwarding ttls on vifs   */
+};
+
+/*
+ * The new argument structure for MRT_ADD_MFC and MRT_DEL_MFC overlays
+ * and extends the old struct mfcctl.
+ */
+struct mfcctl2 {
+	/* the mfcctl fields */
+	struct in_addr	mfcc_origin;		/* ip origin of mcasts	     */
+	struct in_addr	mfcc_mcastgrp;		/* multicast group associated*/
+	vifi_t		mfcc_parent;		/* incoming vif		     */
+	u_char		mfcc_ttls[MAXVIFS];	/* forwarding ttls on vifs   */
+
+	/* extension fields */
+	uint8_t		mfcc_flags[MAXVIFS];	/* the MRT_MFC_FLAGS_* flags */
+	struct in_addr	mfcc_rp;		/* the RP address            */
+};
+/*
+ * The advanced-API flags.
+ *
+ * The MRT_MFC_FLAGS_XXX API flags are also used as flags
+ * for the mfcc_flags field.
+ */
+#define	MRT_MFC_FLAGS_DISABLE_WRONGVIF	(1 << 0) /* disable WRONGVIF signals */
+#define	MRT_MFC_FLAGS_BORDER_VIF	(1 << 1) /* border vif		     */
+#define MRT_MFC_RP			(1 << 8) /* enable RP address	     */
+#define MRT_MFC_BW_UPCALL		(1 << 9) /* enable bw upcalls	     */
+#define MRT_MFC_FLAGS_ALL		(MRT_MFC_FLAGS_DISABLE_WRONGVIF |    \
+					 MRT_MFC_FLAGS_BORDER_VIF)
+#define MRT_API_FLAGS_ALL		(MRT_MFC_FLAGS_ALL |		     \
+					 MRT_MFC_RP |			     \
+					 MRT_MFC_BW_UPCALL)
+
+/*
+ * Structure for installing or delivering an upcall if the
+ * measured bandwidth is above or below a threshold.
+ *
+ * User programs (e.g. daemons) may have a need to know when the
+ * bandwidth used by some data flow is above or below some threshold.
+ * This interface allows the userland to specify the threshold (in
+ * bytes and/or packets) and the measurement interval. Flows are
+ * all packet with the same source and destination IP address.
+ * At the moment the code is only used for multicast destinations
+ * but there is nothing that prevents its use for unicast.
+ *
+ * The measurement interval cannot be shorter than some Tmin (currently, 3s).
+ * The threshold is set in packets and/or bytes per_interval.
+ *
+ * Measurement works as follows:
+ *
+ * For >= measurements:
+ * The first packet marks the start of a measurement interval.
+ * During an interval we count packets and bytes, and when we
+ * pass the threshold we deliver an upcall and we are done.
+ * The first packet after the end of the interval resets the
+ * count and restarts the measurement.
+ *
+ * For <= measurement:
+ * We start a timer to fire at the end of the interval, and
+ * then for each incoming packet we count packets and bytes.
+ * When the timer fires, we compare the value with the threshold,
+ * schedule an upcall if we are below, and restart the measurement
+ * (reschedule timer and zero counters).
+ */
+
+struct bw_data {
+	struct timeval	b_time;
+	uint64_t	b_packets;
+	uint64_t	b_bytes;
+};
+
+struct bw_upcall {
+	struct in_addr	bu_src;			/* source address            */
+	struct in_addr	bu_dst;			/* destination address       */
+	uint32_t	bu_flags;		/* misc flags (see below)    */
+#define BW_UPCALL_UNIT_PACKETS   (1 << 0)	/* threshold (in packets)    */
+#define BW_UPCALL_UNIT_BYTES     (1 << 1)	/* threshold (in bytes)      */
+#define BW_UPCALL_GEQ            (1 << 2)	/* upcall if bw >= threshold */
+#define BW_UPCALL_LEQ            (1 << 3)	/* upcall if bw <= threshold */
+#define BW_UPCALL_DELETE_ALL     (1 << 4)	/* delete all upcalls for s,d*/
+	struct bw_data	bu_threshold;		/* the bw threshold	     */
+	struct bw_data	bu_measured;		/* the measured bw	     */
+};
+
+/* max. number of upcalls to deliver together */
+#define BW_UPCALLS_MAX				128
+/* min. threshold time interval for bandwidth measurement */
+#define BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC	3
+#define BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC	0
+
+/*
+ * The kernel's multicast routing statistics.
+ */
+struct mrtstat {
+    u_long	mrts_mfc_lookups;	/* # forw. cache hash table hits   */
+    u_long	mrts_mfc_misses;	/* # forw. cache hash table misses */
+    u_long	mrts_upcalls;		/* # calls to multicast routing daemon */
+    u_long	mrts_no_route;		/* no route for packet's origin    */
+    u_long	mrts_bad_tunnel;	/* malformed tunnel options        */
+    u_long	mrts_cant_tunnel;	/* no room for tunnel options      */
+    u_long	mrts_wrong_if;		/* arrived on wrong interface	   */
+    u_long	mrts_upq_ovflw;		/* upcall Q overflow		   */
+    u_long	mrts_cache_cleanups;	/* # entries with no upcalls	   */
+    u_long	mrts_drop_sel;		/* pkts dropped selectively        */
+    u_long	mrts_q_overflow;	/* pkts dropped - Q overflow       */
+    u_long	mrts_pkt2large;		/* pkts dropped - size > BKT SIZE  */
+    u_long	mrts_upq_sockfull;	/* upcalls dropped - socket full */
+};
+
+#ifdef _KERNEL
+#define	MRTSTAT_ADD(name, val)	V_mrtstat.name += (val)
+#define	MRTSTAT_INC(name)	MRTSTAT_ADD(name, 1)
+#endif
+
+/*
+ * Argument structure used by mrouted to get src-grp pkt counts
+ */
+struct sioc_sg_req {
+    struct in_addr src;
+    struct in_addr grp;
+    u_long pktcnt;
+    u_long bytecnt;
+    u_long wrong_if;
+};
+
+/*
+ * Argument structure used by mrouted to get vif pkt counts
+ */
+struct sioc_vif_req {
+    vifi_t vifi;		/* vif number				*/
+    u_long icount;		/* Input packet count on vif		*/
+    u_long ocount;		/* Output packet count on vif		*/
+    u_long ibytes;		/* Input byte count on vif		*/
+    u_long obytes;		/* Output byte count on vif		*/
+};
+
+
+/*
+ * The kernel's virtual-interface structure.
+ */
+struct vif {
+    u_char		v_flags;	/* VIFF_ flags defined above         */
+    u_char		v_threshold;	/* min ttl required to forward on vif*/
+    struct in_addr	v_lcl_addr;	/* local interface address           */
+    struct in_addr	v_rmt_addr;	/* remote address (tunnels only)     */
+    struct ifnet       *v_ifp;		/* pointer to interface              */
+    u_long		v_pkt_in;	/* # pkts in on interface            */
+    u_long		v_pkt_out;	/* # pkts out on interface           */
+    u_long		v_bytes_in;	/* # bytes in on interface	     */
+    u_long		v_bytes_out;	/* # bytes out on interface	     */
+    struct route	v_route;	/* cached route */
+};
+
+#ifdef _KERNEL
+/*
+ * The kernel's multicast forwarding cache entry structure
+ */
+struct mfc {
+	LIST_ENTRY(mfc)	mfc_hash;
+	struct in_addr	mfc_origin;		/* IP origin of mcasts	     */
+	struct in_addr  mfc_mcastgrp;		/* multicast group associated*/
+	vifi_t		mfc_parent;		/* incoming vif              */
+	u_char		mfc_ttls[MAXVIFS];	/* forwarding ttls on vifs   */
+	u_long		mfc_pkt_cnt;		/* pkt count for src-grp     */
+	u_long		mfc_byte_cnt;		/* byte count for src-grp    */
+	u_long		mfc_wrong_if;		/* wrong if for src-grp	     */
+	int		mfc_expire;		/* time to clean entry up    */
+	struct timeval	mfc_last_assert;	/* last time I sent an assert*/
+	uint8_t		mfc_flags[MAXVIFS];	/* the MRT_MFC_FLAGS_* flags */
+	struct in_addr	mfc_rp;			/* the RP address	     */
+	struct bw_meter	*mfc_bw_meter;		/* list of bandwidth meters  */
+	u_long		mfc_nstall;		/* # of packets awaiting mfc */
+	TAILQ_HEAD(, rtdetq) mfc_stall;		/* q of packets awaiting mfc */
+};
+#endif /* _KERNEL */
+
+/*
+ * Struct used to communicate from kernel to multicast router
+ * note the convenient similarity to an IP packet
+ */
+struct igmpmsg {
+    uint32_t	    unused1;
+    uint32_t	    unused2;
+    u_char	    im_msgtype;			/* what type of message	    */
+#define IGMPMSG_NOCACHE		1	/* no MFC in the kernel		    */
+#define IGMPMSG_WRONGVIF	2	/* packet came from wrong interface */
+#define	IGMPMSG_WHOLEPKT	3	/* PIM pkt for user level encap.    */
+#define	IGMPMSG_BW_UPCALL	4	/* BW monitoring upcall		    */
+    u_char	    im_mbz;			/* must be zero		    */
+    u_char	    im_vif;			/* vif rec'd on		    */
+    u_char	    unused3;
+    struct in_addr  im_src, im_dst;
+};
+
+#ifdef _KERNEL
+/*
+ * Argument structure used for pkt info. while upcall is made
+ */
+struct rtdetq {
+    TAILQ_ENTRY(rtdetq)	rte_link;
+    struct mbuf		*m;		/* A copy of the packet		    */
+    struct ifnet	*ifp;		/* Interface pkt came in on	    */
+    vifi_t		xmt_vif;	/* Saved copy of imo_multicast_vif  */
+};
+#define MAX_UPQ	4		/* max. no of pkts in upcall Q */
+#endif /* _KERNEL */
+
+/*
+ * Structure for measuring the bandwidth and sending an upcall if the
+ * measured bandwidth is above or below a threshold.
+ */
+struct bw_meter {
+	struct bw_meter	*bm_mfc_next;		/* next bw meter (same mfc)  */
+	struct bw_meter	*bm_time_next;		/* next bw meter (same time) */
+	uint32_t	bm_time_hash;		/* the time hash value       */
+	struct mfc	*bm_mfc;		/* the corresponding mfc     */
+	uint32_t	bm_flags;		/* misc flags (see below)    */
+#define BW_METER_UNIT_PACKETS	(1 << 0)	/* threshold (in packets)    */
+#define BW_METER_UNIT_BYTES	(1 << 1)	/* threshold (in bytes)      */
+#define BW_METER_GEQ		(1 << 2)	/* upcall if bw >= threshold */
+#define BW_METER_LEQ		(1 << 3)	/* upcall if bw <= threshold */
+#define BW_METER_USER_FLAGS	(BW_METER_UNIT_PACKETS |		\
+				 BW_METER_UNIT_BYTES |			\
+				 BW_METER_GEQ |				\
+				 BW_METER_LEQ)
+
+#define BW_METER_UPCALL_DELIVERED (1 << 24)	/* upcall was delivered      */
+
+	struct bw_data	bm_threshold;		/* the upcall threshold	     */
+	struct bw_data	bm_measured;		/* the measured bw	     */
+	struct timeval	bm_start_time;		/* abs. time		     */
+};
+
+#ifdef _KERNEL
+
+struct sockopt;
+
+extern int	(*ip_mrouter_set)(struct socket *, struct sockopt *);
+extern int	(*ip_mrouter_get)(struct socket *, struct sockopt *);
+extern int	(*ip_mrouter_done)(void);
+extern int	(*mrt_ioctl)(u_long, caddr_t, int);
+
+#endif /* _KERNEL */
+
+#endif /* _NETINET_IP_MROUTE_HH_ */
diff --git a/freebsd/sys/netinet/ip_options.c b/freebsd/sys/netinet/ip_options.c
new file mode 100644
index 00000000..f8b31607
--- /dev/null
+++ b/freebsd/sys/netinet/ip_options.c
@@ -0,0 +1,747 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *      The Regents of the University of California.
+ * Copyright (c) 2005 Andre Oppermann, Internet Business Solutions AG.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_ipstealth.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/domain.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/time.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/sys/sysctl.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/if_var.h>
+#include <freebsd/net/if_dl.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_options.h>
+#include <freebsd/netinet/ip_icmp.h>
+#include <freebsd/machine/in_cksum.h>
+
+#include <freebsd/sys/socketvar.h>
+
+#include <freebsd/security/mac/mac_framework.h>
+
+static int	ip_dosourceroute = 0;
+SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
+    &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
+
+static int	ip_acceptsourceroute = 0;
+SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute, 
+    CTLFLAG_RW, &ip_acceptsourceroute, 0, 
+    "Enable accepting source routed IP packets");
+
+int		ip_doopts = 1;	/* 0 = ignore, 1 = process, 2 = reject */
+SYSCTL_INT(_net_inet_ip, OID_AUTO, process_options, CTLFLAG_RW,
+    &ip_doopts, 0, "Enable IP options processing ([LS]SRR, RR, TS)");
+
+static void	save_rte(struct mbuf *m, u_char *, struct in_addr);
+
+/*
+ * Do option processing on a datagram, possibly discarding it if bad options
+ * are encountered, or forwarding it if source-routed.
+ *
+ * The pass argument is used when operating in the IPSTEALTH mode to tell
+ * what options to process: [LS]SRR (pass 0) or the others (pass 1).  The
+ * reason for as many as two passes is that when doing IPSTEALTH, non-routing
+ * options should be processed only if the packet is for us.
+ *
+ * Returns 1 if packet has been forwarded/freed, 0 if the packet should be
+ * processed further.
+ */
+int
+ip_dooptions(struct mbuf *m, int pass)
+{
+	struct ip *ip = mtod(m, struct ip *);
+	u_char *cp;
+	struct in_ifaddr *ia;
+	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
+	struct in_addr *sin, dst;
+	uint32_t ntime;
+	struct	sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
+
+	/* Ignore or reject packets with IP options. */
+	if (ip_doopts == 0)
+		return 0;
+	else if (ip_doopts == 2) {
+		type = ICMP_UNREACH;
+		code = ICMP_UNREACH_FILTER_PROHIB;
+		goto bad;
+	}
+
+	dst = ip->ip_dst;
+	cp = (u_char *)(ip + 1);
+	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
+	for (; cnt > 0; cnt -= optlen, cp += optlen) {
+		opt = cp[IPOPT_OPTVAL];
+		if (opt == IPOPT_EOL)
+			break;
+		if (opt == IPOPT_NOP)
+			optlen = 1;
+		else {
+			if (cnt < IPOPT_OLEN + sizeof(*cp)) {
+				code = &cp[IPOPT_OLEN] - (u_char *)ip;
+				goto bad;
+			}
+			optlen = cp[IPOPT_OLEN];
+			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
+				code = &cp[IPOPT_OLEN] - (u_char *)ip;
+				goto bad;
+			}
+		}
+		switch (opt) {
+
+		default:
+			break;
+
+		/*
+		 * Source routing with record.  Find interface with current
+		 * destination address.  If none on this machine then drop if
+		 * strictly routed, or do nothing if loosely routed.  Record
+		 * interface address and bring up next address component.  If
+		 * strictly routed make sure next address is on directly
+		 * accessible net.
+		 */
+		case IPOPT_LSRR:
+		case IPOPT_SSRR:
+#ifdef IPSTEALTH
+			if (V_ipstealth && pass > 0)
+				break;
+#endif
+			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
+				code = &cp[IPOPT_OLEN] - (u_char *)ip;
+				goto bad;
+			}
+			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
+				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+				goto bad;
+			}
+			ipaddr.sin_addr = ip->ip_dst;
+			if (ifa_ifwithaddr_check((struct sockaddr *)&ipaddr)
+			    == 0) {
+				if (opt == IPOPT_SSRR) {
+					type = ICMP_UNREACH;
+					code = ICMP_UNREACH_SRCFAIL;
+					goto bad;
+				}
+				if (!ip_dosourceroute)
+					goto nosourcerouting;
+				/*
+				 * Loose routing, and not at next destination
+				 * yet; nothing to do except forward.
+				 */
+				break;
+			}
+			off--;			/* 0 origin */
+			if (off > optlen - (int)sizeof(struct in_addr)) {
+				/*
+				 * End of source route.  Should be for us.
+				 */
+				if (!ip_acceptsourceroute)
+					goto nosourcerouting;
+				save_rte(m, cp, ip->ip_src);
+				break;
+			}
+#ifdef IPSTEALTH
+			if (V_ipstealth)
+				goto dropit;
+#endif
+			if (!ip_dosourceroute) {
+				if (V_ipforwarding) {
+					char buf[16]; /* aaa.bbb.ccc.ddd\0 */
+					/*
+					 * Acting as a router, so generate
+					 * ICMP
+					 */
+nosourcerouting:
+					strcpy(buf, inet_ntoa(ip->ip_dst));
+					log(LOG_WARNING, 
+					    "attempted source route from %s to %s\n",
+					    inet_ntoa(ip->ip_src), buf);
+					type = ICMP_UNREACH;
+					code = ICMP_UNREACH_SRCFAIL;
+					goto bad;
+				} else {
+					/*
+					 * Not acting as a router, so
+					 * silently drop.
+					 */
+#ifdef IPSTEALTH
+dropit:
+#endif
+					IPSTAT_INC(ips_cantforward);
+					m_freem(m);
+					return (1);
+				}
+			}
+
+			/*
+			 * locate outgoing interface
+			 */
+			(void)memcpy(&ipaddr.sin_addr, cp + off,
+			    sizeof(ipaddr.sin_addr));
+
+			if (opt == IPOPT_SSRR) {
+#define	INA	struct in_ifaddr *
+#define	SA	struct sockaddr *
+			    if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == NULL)
+				    ia = (INA)ifa_ifwithnet((SA)&ipaddr, 0);
+			} else
+/* XXX MRT 0 for routing */
+				ia = ip_rtaddr(ipaddr.sin_addr, M_GETFIB(m));
+			if (ia == NULL) {
+				type = ICMP_UNREACH;
+				code = ICMP_UNREACH_SRCFAIL;
+				goto bad;
+			}
+			ip->ip_dst = ipaddr.sin_addr;
+			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
+			    sizeof(struct in_addr));
+			ifa_free(&ia->ia_ifa);
+			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
+			/*
+			 * Let ip_intr's mcast routing check handle mcast pkts
+			 */
+			forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
+			break;
+
+		case IPOPT_RR:
+#ifdef IPSTEALTH
+			if (V_ipstealth && pass == 0)
+				break;
+#endif
+			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
+				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+				goto bad;
+			}
+			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
+				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+				goto bad;
+			}
+			/*
+			 * If no space remains, ignore.
+			 */
+			off--;			/* 0 origin */
+			if (off > optlen - (int)sizeof(struct in_addr))
+				break;
+			(void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
+			    sizeof(ipaddr.sin_addr));
+			/*
+			 * Locate outgoing interface; if we're the
+			 * destination, use the incoming interface (should be
+			 * same).
+			 */
+			if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == NULL &&
+			    (ia = ip_rtaddr(ipaddr.sin_addr, M_GETFIB(m))) == NULL) {
+				type = ICMP_UNREACH;
+				code = ICMP_UNREACH_HOST;
+				goto bad;
+			}
+			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
+			    sizeof(struct in_addr));
+			ifa_free(&ia->ia_ifa);
+			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
+			break;
+
+		case IPOPT_TS:
+#ifdef IPSTEALTH
+			if (V_ipstealth && pass == 0)
+				break;
+#endif
+			code = cp - (u_char *)ip;
+			if (optlen < 4 || optlen > 40) {
+				code = &cp[IPOPT_OLEN] - (u_char *)ip;
+				goto bad;
+			}
+			if ((off = cp[IPOPT_OFFSET]) < 5) {
+				code = &cp[IPOPT_OLEN] - (u_char *)ip;
+				goto bad;
+			}
+			if (off > optlen - (int)sizeof(int32_t)) {
+				cp[IPOPT_OFFSET + 1] += (1 << 4);
+				if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) {
+					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+					goto bad;
+				}
+				break;
+			}
+			off--;				/* 0 origin */
+			sin = (struct in_addr *)(cp + off);
+			switch (cp[IPOPT_OFFSET + 1] & 0x0f) {
+
+			case IPOPT_TS_TSONLY:
+				break;
+
+			case IPOPT_TS_TSANDADDR:
+				if (off + sizeof(uint32_t) +
+				    sizeof(struct in_addr) > optlen) {
+					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+					goto bad;
+				}
+				ipaddr.sin_addr = dst;
+				ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
+							    m->m_pkthdr.rcvif);
+				if (ia == NULL)
+					continue;
+				(void)memcpy(sin, &IA_SIN(ia)->sin_addr,
+				    sizeof(struct in_addr));
+				ifa_free(&ia->ia_ifa);
+				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
+				off += sizeof(struct in_addr);
+				break;
+
+			case IPOPT_TS_PRESPEC:
+				if (off + sizeof(uint32_t) +
+				    sizeof(struct in_addr) > optlen) {
+					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+					goto bad;
+				}
+				(void)memcpy(&ipaddr.sin_addr, sin,
+				    sizeof(struct in_addr));
+				if (ifa_ifwithaddr_check((SA)&ipaddr) == 0)
+					continue;
+				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
+				off += sizeof(struct in_addr);
+				break;
+
+			default:
+				code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip;
+				goto bad;
+			}
+			ntime = iptime();
+			(void)memcpy(cp + off, &ntime, sizeof(uint32_t));
+			cp[IPOPT_OFFSET] += sizeof(uint32_t);
+		}
+	}
+	if (forward && V_ipforwarding) {
+		ip_forward(m, 1);
+		return (1);
+	}
+	return (0);
+bad:
+	icmp_error(m, type, code, 0, 0);
+	IPSTAT_INC(ips_badoptions);
+	return (1);
+}
+
+/*
+ * Save incoming source route for use in replies, to be picked up later by
+ * ip_srcroute if the receiver is interested.
+ */
+static void
+save_rte(struct mbuf *m, u_char *option, struct in_addr dst)
+{
+	unsigned olen;
+	struct ipopt_tag *opts;
+
+	opts = (struct ipopt_tag *)m_tag_get(PACKET_TAG_IPOPTIONS,
+	    sizeof(struct ipopt_tag), M_NOWAIT);
+	if (opts == NULL)
+		return;
+
+	olen = option[IPOPT_OLEN];
+	if (olen > sizeof(opts->ip_srcrt) - (1 + sizeof(dst))) {
+		m_tag_free((struct m_tag *)opts);
+		return;
+	}
+	bcopy(option, opts->ip_srcrt.srcopt, olen);
+	opts->ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
+	opts->ip_srcrt.dst = dst;
+	m_tag_prepend(m, (struct m_tag *)opts);
+}
+
+/*
+ * Retrieve incoming source route for use in replies, in the same form used
+ * by setsockopt.  The first hop is placed before the options, will be
+ * removed later.
+ */
+struct mbuf *
+ip_srcroute(struct mbuf *m0)
+{
+	struct in_addr *p, *q;
+	struct mbuf *m;
+	struct ipopt_tag *opts;
+
+	opts = (struct ipopt_tag *)m_tag_find(m0, PACKET_TAG_IPOPTIONS, NULL);
+	if (opts == NULL)
+		return (NULL);
+
+	if (opts->ip_nhops == 0)
+		return (NULL);
+	m = m_get(M_DONTWAIT, MT_DATA);
+	if (m == NULL)
+		return (NULL);
+
+#define OPTSIZ	(sizeof(opts->ip_srcrt.nop) + sizeof(opts->ip_srcrt.srcopt))
+
+	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
+	m->m_len = opts->ip_nhops * sizeof(struct in_addr) +
+	    sizeof(struct in_addr) + OPTSIZ;
+
+	/*
+	 * First, save first hop for return route.
+	 */
+	p = &(opts->ip_srcrt.route[opts->ip_nhops - 1]);
+	*(mtod(m, struct in_addr *)) = *p--;
+
+	/*
+	 * Copy option fields and padding (nop) to mbuf.
+	 */
+	opts->ip_srcrt.nop = IPOPT_NOP;
+	opts->ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
+	(void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
+	    &(opts->ip_srcrt.nop), OPTSIZ);
+	q = (struct in_addr *)(mtod(m, caddr_t) +
+	    sizeof(struct in_addr) + OPTSIZ);
+#undef OPTSIZ
+	/*
+	 * Record return path as an IP source route, reversing the path
+	 * (pointers are now aligned).
+	 */
+	while (p >= opts->ip_srcrt.route) {
+		*q++ = *p--;
+	}
+	/*
+	 * Last hop goes to final destination.
+	 */
+	*q = opts->ip_srcrt.dst;
+	m_tag_delete(m0, (struct m_tag *)opts);
+	return (m);
+}
+
+/*
+ * Strip out IP options, at higher level protocol in the kernel.  Second
+ * argument is buffer to which options will be moved, and return value is
+ * their length.
+ *
+ * XXX should be deleted; last arg currently ignored.
+ */
+void
+ip_stripoptions(struct mbuf *m, struct mbuf *mopt)
+{
+	int i;
+	struct ip *ip = mtod(m, struct ip *);
+	caddr_t opts;
+	int olen;
+
+	olen = (ip->ip_hl << 2) - sizeof (struct ip);
+	opts = (caddr_t)(ip + 1);
+	i = m->m_len - (sizeof (struct ip) + olen);
+	bcopy(opts + olen, opts, (unsigned)i);
+	m->m_len -= olen;
+	if (m->m_flags & M_PKTHDR)
+		m->m_pkthdr.len -= olen;
+	ip->ip_v = IPVERSION;
+	ip->ip_hl = sizeof(struct ip) >> 2;
+}
+
+/*
+ * Insert IP options into preformed packet.  Adjust IP destination as
+ * required for IP source routing, as indicated by a non-zero in_addr at the
+ * start of the options.
+ *
+ * XXX This routine assumes that the packet has no options in place.
+ */
+struct mbuf *
+ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
+{
+	struct ipoption *p = mtod(opt, struct ipoption *);
+	struct mbuf *n;
+	struct ip *ip = mtod(m, struct ip *);
+	unsigned optlen;
+
+	optlen = opt->m_len - sizeof(p->ipopt_dst);
+	if (optlen + ip->ip_len > IP_MAXPACKET) {
+		*phlen = 0;
+		return (m);		/* XXX should fail */
+	}
+	if (p->ipopt_dst.s_addr)
+		ip->ip_dst = p->ipopt_dst;
+	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
+		MGETHDR(n, M_DONTWAIT, MT_DATA);
+		if (n == NULL) {
+			*phlen = 0;
+			return (m);
+		}
+		M_MOVE_PKTHDR(n, m);
+		n->m_pkthdr.rcvif = NULL;
+		n->m_pkthdr.len += optlen;
+		m->m_len -= sizeof(struct ip);
+		m->m_data += sizeof(struct ip);
+		n->m_next = m;
+		m = n;
+		m->m_len = optlen + sizeof(struct ip);
+		m->m_data += max_linkhdr;
+		bcopy(ip, mtod(m, void *), sizeof(struct ip));
+	} else {
+		m->m_data -= optlen;
+		m->m_len += optlen;
+		m->m_pkthdr.len += optlen;
+		bcopy(ip, mtod(m, void *), sizeof(struct ip));
+	}
+	ip = mtod(m, struct ip *);
+	bcopy(p->ipopt_list, ip + 1, optlen);
+	*phlen = sizeof(struct ip) + optlen;
+	ip->ip_v = IPVERSION;
+	ip->ip_hl = *phlen >> 2;
+	ip->ip_len += optlen;
+	return (m);
+}
+
+/*
+ * Copy options from ip to jp, omitting those not copied during
+ * fragmentation.
+ */
+int
+ip_optcopy(struct ip *ip, struct ip *jp)
+{
+	u_char *cp, *dp;
+	int opt, optlen, cnt;
+
+	cp = (u_char *)(ip + 1);
+	dp = (u_char *)(jp + 1);
+	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
+	for (; cnt > 0; cnt -= optlen, cp += optlen) {
+		opt = cp[0];
+		if (opt == IPOPT_EOL)
+			break;
+		if (opt == IPOPT_NOP) {
+			/* Preserve for IP mcast tunnel's LSRR alignment. */
+			*dp++ = IPOPT_NOP;
+			optlen = 1;
+			continue;
+		}
+
+		KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp),
+		    ("ip_optcopy: malformed ipv4 option"));
+		optlen = cp[IPOPT_OLEN];
+		KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt,
+		    ("ip_optcopy: malformed ipv4 option"));
+
+		/* Bogus lengths should have been caught by ip_dooptions. */
+		if (optlen > cnt)
+			optlen = cnt;
+		if (IPOPT_COPIED(opt)) {
+			bcopy(cp, dp, optlen);
+			dp += optlen;
+		}
+	}
+	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
+		*dp++ = IPOPT_EOL;
+	return (optlen);
+}
+
+/*
+ * Set up IP options in pcb for insertion in output packets.  Store in mbuf
+ * with pointer in pcbopt, adding pseudo-option with destination address if
+ * source routed.
+ */
+int
+ip_pcbopts(struct inpcb *inp, int optname, struct mbuf *m)
+{
+	int cnt, optlen;
+	u_char *cp;
+	struct mbuf **pcbopt;
+	u_char opt;
+
+	INP_WLOCK_ASSERT(inp);
+
+	pcbopt = &inp->inp_options;
+
+	/* turn off any old options */
+	if (*pcbopt)
+		(void)m_free(*pcbopt);
+	*pcbopt = 0;
+	if (m == NULL || m->m_len == 0) {
+		/*
+		 * Only turning off any previous options.
+		 */
+		if (m != NULL)
+			(void)m_free(m);
+		return (0);
+	}
+
+	if (m->m_len % sizeof(int32_t))
+		goto bad;
+	/*
+	 * IP first-hop destination address will be stored before actual
+	 * options; move other options back and clear it when none present.
+	 */
+	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
+		goto bad;
+	cnt = m->m_len;
+	m->m_len += sizeof(struct in_addr);
+	cp = mtod(m, u_char *) + sizeof(struct in_addr);
+	bcopy(mtod(m, void *), cp, (unsigned)cnt);
+	bzero(mtod(m, void *), sizeof(struct in_addr));
+
+	for (; cnt > 0; cnt -= optlen, cp += optlen) {
+		opt = cp[IPOPT_OPTVAL];
+		if (opt == IPOPT_EOL)
+			break;
+		if (opt == IPOPT_NOP)
+			optlen = 1;
+		else {
+			if (cnt < IPOPT_OLEN + sizeof(*cp))
+				goto bad;
+			optlen = cp[IPOPT_OLEN];
+			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
+				goto bad;
+		}
+		switch (opt) {
+
+		default:
+			break;
+
+		case IPOPT_LSRR:
+		case IPOPT_SSRR:
+			/*
+			 * User process specifies route as:
+			 *
+			 *	->A->B->C->D
+			 *
+			 * D must be our final destination (but we can't
+			 * check that since we may not have connected yet).
+			 * A is first hop destination, which doesn't appear
+			 * in actual IP option, but is stored before the
+			 * options.
+			 */
+			/* XXX-BZ PRIV_NETINET_SETHDROPTS? */
+			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
+				goto bad;
+			m->m_len -= sizeof(struct in_addr);
+			cnt -= sizeof(struct in_addr);
+			optlen -= sizeof(struct in_addr);
+			cp[IPOPT_OLEN] = optlen;
+			/*
+			 * Move first hop before start of options.
+			 */
+			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
+			    sizeof(struct in_addr));
+			/*
+			 * Then copy rest of options back
+			 * to close up the deleted entry.
+			 */
+			bcopy((&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)),
+			    &cp[IPOPT_OFFSET+1],
+			    (unsigned)cnt - (IPOPT_MINOFF - 1));
+			break;
+		}
+	}
+	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
+		goto bad;
+	*pcbopt = m;
+	return (0);
+
+bad:
+	(void)m_free(m);
+	return (EINVAL);
+}
+
+/*
+ * Check for the presence of the IP Router Alert option [RFC2113]
+ * in the header of an IPv4 datagram.
+ *
+ * This call is not intended for use from the forwarding path; it is here
+ * so that protocol domains may check for the presence of the option.
+ * Given how FreeBSD's IPv4 stack is currently structured, the Router Alert
+ * option does not have much relevance to the implementation, though this
+ * may change in future.
+ * Router alert options SHOULD be passed if running in IPSTEALTH mode and
+ * we are not the endpoint.
+ * Length checks on individual options should already have been peformed
+ * by ip_dooptions() therefore they are folded under INVARIANTS here.
+ *
+ * Return zero if not present or options are invalid, non-zero if present.
+ */
+int
+ip_checkrouteralert(struct mbuf *m)
+{
+	struct ip *ip = mtod(m, struct ip *);
+	u_char *cp;
+	int opt, optlen, cnt, found_ra;
+
+	found_ra = 0;
+	cp = (u_char *)(ip + 1);
+	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
+	for (; cnt > 0; cnt -= optlen, cp += optlen) {
+		opt = cp[IPOPT_OPTVAL];
+		if (opt == IPOPT_EOL)
+			break;
+		if (opt == IPOPT_NOP)
+			optlen = 1;
+		else {
+#ifdef INVARIANTS
+			if (cnt < IPOPT_OLEN + sizeof(*cp))
+				break;
+#endif
+			optlen = cp[IPOPT_OLEN];
+#ifdef INVARIANTS
+			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
+				break;
+#endif
+		}
+		switch (opt) {
+		case IPOPT_RA:
+#ifdef INVARIANTS
+			if (optlen != IPOPT_OFFSET + sizeof(uint16_t) ||
+			    (*((uint16_t *)&cp[IPOPT_OFFSET]) != 0))
+			    break;
+			else
+#endif
+			found_ra = 1;
+			break;
+		default:
+			break;
+		}
+	}
+
+	return (found_ra);
+}
diff --git a/freebsd/sys/netinet/ip_options.h b/freebsd/sys/netinet/ip_options.h
new file mode 100644
index 00000000..9c08004d
--- /dev/null
+++ b/freebsd/sys/netinet/ip_options.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.
+ * Copyright (c) 2005 Andre Oppermann, Internet Business Solutions AG.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IP_OPTIONS_HH_
+#define	_NETINET_IP_OPTIONS_HH_
+
+struct ipoptrt {
+        struct  in_addr dst;                    /* final destination */
+        char    nop;                            /* one NOP to align */
+        char    srcopt[IPOPT_OFFSET + 1];       /* OPTVAL, OLEN and OFFSET */
+        struct  in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
+};
+
+struct ipopt_tag {
+	struct	m_tag tag;			/* m_tag */
+	int	ip_nhops;
+	struct	ipoptrt ip_srcrt;
+};
+
+extern	int	ip_doopts;		/* process or ignore IP options */
+
+int		 ip_checkrouteralert(struct mbuf *);
+int		 ip_dooptions(struct mbuf *, int);
+struct mbuf	*ip_insertoptions(struct mbuf *, struct mbuf *, int *);
+int		 ip_optcopy(struct ip *, struct ip *);
+int		 ip_pcbopts(struct inpcb *, int, struct mbuf *);
+void		 ip_stripoptions(struct mbuf *, struct mbuf *);
+struct mbuf	*ip_srcroute(struct mbuf *);
+
+#endif /* !_NETINET_IP_OPTIONS_HH_ */
diff --git a/freebsd/sys/netinet/ip_output.c b/freebsd/sys/netinet/ip_output.c
new file mode 100644
index 00000000..51132333
--- /dev/null
+++ b/freebsd/sys/netinet/ip_output.c
@@ -0,0 +1,1284 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_ipfw.h>
+#include <freebsd/local/opt_ipsec.h>
+#include <freebsd/local/opt_route.h>
+#include <freebsd/local/opt_mbuf_stress_test.h>
+#include <freebsd/local/opt_mpath.h>
+#include <freebsd/local/opt_sctp.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/ucred.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_llatbl.h>
+#include <freebsd/net/netisr.h>
+#include <freebsd/net/pfil.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/flowtable.h>
+#ifdef RADIX_MPATH
+#include <freebsd/net/radix_mpath.h>
+#endif
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_options.h>
+#ifdef SCTP
+#include <freebsd/netinet/sctp.h>
+#include <freebsd/netinet/sctp_crc32.h>
+#endif
+
+#ifdef IPSEC
+#include <freebsd/netinet/ip_ipsec.h>
+#include <freebsd/netipsec/ipsec.h>
+#endif /* IPSEC*/
+
+#include <freebsd/machine/in_cksum.h>
+
+#include <freebsd/security/mac/mac_framework.h>
+
+#define print_ip(x, a, y)	 printf("%s %d.%d.%d.%d%s",\
+				x, (ntohl(a.s_addr)>>24)&0xFF,\
+				  (ntohl(a.s_addr)>>16)&0xFF,\
+				  (ntohl(a.s_addr)>>8)&0xFF,\
+				  (ntohl(a.s_addr))&0xFF, y);
+
+VNET_DEFINE(u_short, ip_id);
+
+#ifdef MBUF_STRESS_TEST
+int mbuf_frag_size = 0;
+SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
+	&mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
+#endif
+
+static void	ip_mloopback
+	(struct ifnet *, struct mbuf *, struct sockaddr_in *, int);
+
+
+extern int in_mcast_loop;
+extern	struct protosw inetsw[];
+
+/*
+ * IP output.  The packet in mbuf chain m contains a skeletal IP
+ * header (with len, off, ttl, proto, tos, src, dst).
+ * The mbuf chain containing the packet will be freed.
+ * The mbuf opt, if present, will not be freed.
+ * In the IP forwarding case, the packet will arrive with options already
+ * inserted, so must have a NULL opt pointer.
+ */
+int
+ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
+    struct ip_moptions *imo, struct inpcb *inp)
+{
+	struct ip *ip;
+	struct ifnet *ifp = NULL;	/* keep compiler happy */
+	struct mbuf *m0;
+	int hlen = sizeof (struct ip);
+	int mtu;
+	int len, error = 0;
+	int nortfree = 0;
+	struct sockaddr_in *dst = NULL;	/* keep compiler happy */
+	struct in_ifaddr *ia = NULL;
+	int isbroadcast, sw_csum;
+	struct route iproute;
+	struct rtentry *rte;	/* cache for ro->ro_rt */
+	struct in_addr odst;
+#ifdef IPFIREWALL_FORWARD
+	struct m_tag *fwd_tag = NULL;
+#endif
+#ifdef IPSEC
+	int no_route_but_check_spd = 0;
+#endif
+	M_ASSERTPKTHDR(m);
+
+	if (inp != NULL) {
+		INP_LOCK_ASSERT(inp);
+		M_SETFIB(m, inp->inp_inc.inc_fibnum);
+		if (inp->inp_flags & (INP_HW_FLOWID|INP_SW_FLOWID)) {
+			m->m_pkthdr.flowid = inp->inp_flowid;
+			m->m_flags |= M_FLOWID;
+		}
+	}
+
+	if (ro == NULL) {
+		ro = &iproute;
+		bzero(ro, sizeof (*ro));
+
+#ifdef FLOWTABLE
+		{
+			struct flentry *fle;
+			
+			/*
+			 * The flow table returns route entries valid for up to 30
+			 * seconds; we rely on the remainder of ip_output() taking no
+			 * longer than that long for the stability of ro_rt.  The
+			 * flow ID assignment must have happened before this point.
+			 */
+			if ((fle = flowtable_lookup_mbuf(V_ip_ft, m, AF_INET)) != NULL) {
+				flow_to_route(fle, ro);
+				nortfree = 1;
+			}
+		}
+#endif
+	}
+
+	if (opt) {
+		len = 0;
+		m = ip_insertoptions(m, opt, &len);
+		if (len != 0)
+			hlen = len;
+	}
+	ip = mtod(m, struct ip *);
+
+	/*
+	 * Fill in IP header.  If we are not allowing fragmentation,
+	 * then the ip_id field is meaningless, but we don't set it
+	 * to zero.  Doing so causes various problems when devices along
+	 * the path (routers, load balancers, firewalls, etc.) illegally
+	 * disable DF on our packet.  Note that a 16-bit counter
+	 * will wrap around in less than 10 seconds at 100 Mbit/s on a
+	 * medium with MTU 1500.  See Steven M. Bellovin, "A Technique
+	 * for Counting NATted Hosts", Proc. IMW'02, available at
+	 * <http://www.cs.columbia.edu/~smb/papers/fnat.pdf>.
+	 */
+	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
+		ip->ip_v = IPVERSION;
+		ip->ip_hl = hlen >> 2;
+		ip->ip_id = ip_newid();
+		IPSTAT_INC(ips_localout);
+	} else {
+		hlen = ip->ip_hl << 2;
+	}
+
+	dst = (struct sockaddr_in *)&ro->ro_dst;
+again:
+	/*
+	 * If there is a cached route,
+	 * check that it is to the same destination
+	 * and is still up.  If not, free it and try again.
+	 * The address family should also be checked in case of sharing the
+	 * cache with IPv6.
+	 */
+	rte = ro->ro_rt;
+	if (rte && ((rte->rt_flags & RTF_UP) == 0 ||
+		    rte->rt_ifp == NULL ||
+		    !RT_LINK_IS_UP(rte->rt_ifp) ||
+			  dst->sin_family != AF_INET ||
+			  dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
+		if (!nortfree)
+			RTFREE(rte);
+		rte = ro->ro_rt = (struct rtentry *)NULL;
+		ro->ro_lle = (struct llentry *)NULL;
+	}
+#ifdef IPFIREWALL_FORWARD
+	if (rte == NULL && fwd_tag == NULL) {
+#else
+	if (rte == NULL) {
+#endif
+		bzero(dst, sizeof(*dst));
+		dst->sin_family = AF_INET;
+		dst->sin_len = sizeof(*dst);
+		dst->sin_addr = ip->ip_dst;
+	}
+	/*
+	 * If routing to interface only, short circuit routing lookup.
+	 * The use of an all-ones broadcast address implies this; an
+	 * interface is specified by the broadcast address of an interface,
+	 * or the destination address of a ptp interface.
+	 */
+	if (flags & IP_SENDONES) {
+		if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst)))) == NULL &&
+		    (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL) {
+			IPSTAT_INC(ips_noroute);
+			error = ENETUNREACH;
+			goto bad;
+		}
+		ip->ip_dst.s_addr = INADDR_BROADCAST;
+		dst->sin_addr = ip->ip_dst;
+		ifp = ia->ia_ifp;
+		ip->ip_ttl = 1;
+		isbroadcast = 1;
+	} else if (flags & IP_ROUTETOIF) {
+		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL &&
+		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst), 0))) == NULL) {
+			IPSTAT_INC(ips_noroute);
+			error = ENETUNREACH;
+			goto bad;
+		}
+		ifp = ia->ia_ifp;
+		ip->ip_ttl = 1;
+		isbroadcast = in_broadcast(dst->sin_addr, ifp);
+	} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
+	    imo != NULL && imo->imo_multicast_ifp != NULL) {
+		/*
+		 * Bypass the normal routing lookup for multicast
+		 * packets if the interface is specified.
+		 */
+		ifp = imo->imo_multicast_ifp;
+		IFP_TO_IA(ifp, ia);
+		isbroadcast = 0;	/* fool gcc */
+	} else {
+		/*
+		 * We want to do any cloning requested by the link layer,
+		 * as this is probably required in all cases for correct
+		 * operation (as it is for ARP).
+		 */
+		if (rte == NULL) {
+#ifdef RADIX_MPATH
+			rtalloc_mpath_fib(ro,
+			    ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
+			    inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
+#else
+			in_rtalloc_ign(ro, 0,
+			    inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
+#endif
+			rte = ro->ro_rt;
+		}
+		if (rte == NULL ||
+		    rte->rt_ifp == NULL ||
+		    !RT_LINK_IS_UP(rte->rt_ifp)) {
+#ifdef IPSEC
+			/*
+			 * There is no route for this packet, but it is
+			 * possible that a matching SPD entry exists.
+			 */
+			no_route_but_check_spd = 1;
+			mtu = 0; /* Silence GCC warning. */
+			goto sendit;
+#endif
+			IPSTAT_INC(ips_noroute);
+			error = EHOSTUNREACH;
+			goto bad;
+		}
+		ia = ifatoia(rte->rt_ifa);
+		ifa_ref(&ia->ia_ifa);
+		ifp = rte->rt_ifp;
+		rte->rt_rmx.rmx_pksent++;
+		if (rte->rt_flags & RTF_GATEWAY)
+			dst = (struct sockaddr_in *)rte->rt_gateway;
+		if (rte->rt_flags & RTF_HOST)
+			isbroadcast = (rte->rt_flags & RTF_BROADCAST);
+		else
+			isbroadcast = in_broadcast(dst->sin_addr, ifp);
+	}
+	/*
+	 * Calculate MTU.  If we have a route that is up, use that,
+	 * otherwise use the interface's MTU.
+	 */
+	if (rte != NULL && (rte->rt_flags & (RTF_UP|RTF_HOST))) {
+		/*
+		 * This case can happen if the user changed the MTU
+		 * of an interface after enabling IP on it.  Because
+		 * most netifs don't keep track of routes pointing to
+		 * them, there is no way for one to update all its
+		 * routes when the MTU is changed.
+		 */
+		if (rte->rt_rmx.rmx_mtu > ifp->if_mtu)
+			rte->rt_rmx.rmx_mtu = ifp->if_mtu;
+		mtu = rte->rt_rmx.rmx_mtu;
+	} else {
+		mtu = ifp->if_mtu;
+	}
+	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
+		m->m_flags |= M_MCAST;
+		/*
+		 * IP destination address is multicast.  Make sure "dst"
+		 * still points to the address in "ro".  (It may have been
+		 * changed to point to a gateway address, above.)
+		 */
+		dst = (struct sockaddr_in *)&ro->ro_dst;
+		/*
+		 * See if the caller provided any multicast options
+		 */
+		if (imo != NULL) {
+			ip->ip_ttl = imo->imo_multicast_ttl;
+			if (imo->imo_multicast_vif != -1)
+				ip->ip_src.s_addr =
+				    ip_mcast_src ?
+				    ip_mcast_src(imo->imo_multicast_vif) :
+				    INADDR_ANY;
+		} else
+			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
+		/*
+		 * Confirm that the outgoing interface supports multicast.
+		 */
+		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
+			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
+				IPSTAT_INC(ips_noroute);
+				error = ENETUNREACH;
+				goto bad;
+			}
+		}
+		/*
+		 * If source address not specified yet, use address
+		 * of outgoing interface.
+		 */
+		if (ip->ip_src.s_addr == INADDR_ANY) {
+			/* Interface may have no addresses. */
+			if (ia != NULL)
+				ip->ip_src = IA_SIN(ia)->sin_addr;
+		}
+
+		if ((imo == NULL && in_mcast_loop) ||
+		    (imo && imo->imo_multicast_loop)) {
+			/*
+			 * Loop back multicast datagram if not expressly
+			 * forbidden to do so, even if we are not a member
+			 * of the group; ip_input() will filter it later,
+			 * thus deferring a hash lookup and mutex acquisition
+			 * at the expense of a cheap copy using m_copym().
+			 */
+			ip_mloopback(ifp, m, dst, hlen);
+		} else {
+			/*
+			 * If we are acting as a multicast router, perform
+			 * multicast forwarding as if the packet had just
+			 * arrived on the interface to which we are about
+			 * to send.  The multicast forwarding function
+			 * recursively calls this function, using the
+			 * IP_FORWARDING flag to prevent infinite recursion.
+			 *
+			 * Multicasts that are looped back by ip_mloopback(),
+			 * above, will be forwarded by the ip_input() routine,
+			 * if necessary.
+			 */
+			if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) {
+				/*
+				 * If rsvp daemon is not running, do not
+				 * set ip_moptions. This ensures that the packet
+				 * is multicast and not just sent down one link
+				 * as prescribed by rsvpd.
+				 */
+				if (!V_rsvp_on)
+					imo = NULL;
+				if (ip_mforward &&
+				    ip_mforward(ip, ifp, m, imo) != 0) {
+					m_freem(m);
+					goto done;
+				}
+			}
+		}
+
+		/*
+		 * Multicasts with a time-to-live of zero may be looped-
+		 * back, above, but must not be transmitted on a network.
+		 * Also, multicasts addressed to the loopback interface
+		 * are not sent -- the above call to ip_mloopback() will
+		 * loop back a copy. ip_input() will drop the copy if
+		 * this host does not belong to the destination group on
+		 * the loopback interface.
+		 */
+		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
+			m_freem(m);
+			goto done;
+		}
+
+		goto sendit;
+	}
+
+	/*
+	 * If the source address is not specified yet, use the address
+	 * of the outoing interface.
+	 */
+	if (ip->ip_src.s_addr == INADDR_ANY) {
+		/* Interface may have no addresses. */
+		if (ia != NULL) {
+			ip->ip_src = IA_SIN(ia)->sin_addr;
+		}
+	}
+
+	/*
+	 * Verify that we have any chance at all of being able to queue the
+	 * packet or packet fragments, unless ALTQ is enabled on the given
+	 * interface in which case packetdrop should be done by queueing.
+	 */
+#ifdef ALTQ
+	if ((!ALTQ_IS_ENABLED(&ifp->if_snd)) &&
+	    ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >=
+	    ifp->if_snd.ifq_maxlen))
+#else
+	if ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >=
+	    ifp->if_snd.ifq_maxlen)
+#endif /* ALTQ */
+	{
+		error = ENOBUFS;
+		IPSTAT_INC(ips_odropped);
+		ifp->if_snd.ifq_drops += (ip->ip_len / ifp->if_mtu + 1);
+		goto bad;
+	}
+
+	/*
+	 * Look for broadcast address and
+	 * verify user is allowed to send
+	 * such a packet.
+	 */
+	if (isbroadcast) {
+		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
+			error = EADDRNOTAVAIL;
+			goto bad;
+		}
+		if ((flags & IP_ALLOWBROADCAST) == 0) {
+			error = EACCES;
+			goto bad;
+		}
+		/* don't allow broadcast messages to be fragmented */
+		if (ip->ip_len > mtu) {
+			error = EMSGSIZE;
+			goto bad;
+		}
+		m->m_flags |= M_BCAST;
+	} else {
+		m->m_flags &= ~M_BCAST;
+	}
+
+sendit:
+#ifdef IPSEC
+	switch(ip_ipsec_output(&m, inp, &flags, &error, &ifp)) {
+	case 1:
+		goto bad;
+	case -1:
+		goto done;
+	case 0:
+	default:
+		break;	/* Continue with packet processing. */
+	}
+	/*
+	 * Check if there was a route for this packet; return error if not.
+	 */
+	if (no_route_but_check_spd) {
+		IPSTAT_INC(ips_noroute);
+		error = EHOSTUNREACH;
+		goto bad;
+	}
+	/* Update variables that are affected by ipsec4_output(). */
+	ip = mtod(m, struct ip *);
+	hlen = ip->ip_hl << 2;
+#endif /* IPSEC */
+
+	/* Jump over all PFIL processing if hooks are not active. */
+	if (!PFIL_HOOKED(&V_inet_pfil_hook))
+		goto passout;
+
+	/* Run through list of hooks for output packets. */
+	odst.s_addr = ip->ip_dst.s_addr;
+	error = pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_OUT, inp);
+	if (error != 0 || m == NULL)
+		goto done;
+
+	ip = mtod(m, struct ip *);
+
+	/* See if destination IP address was changed by packet filter. */
+	if (odst.s_addr != ip->ip_dst.s_addr) {
+		m->m_flags |= M_SKIP_FIREWALL;
+		/* If destination is now ourself drop to ip_input(). */
+		if (in_localip(ip->ip_dst)) {
+			m->m_flags |= M_FASTFWD_OURS;
+			if (m->m_pkthdr.rcvif == NULL)
+				m->m_pkthdr.rcvif = V_loif;
+			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+				m->m_pkthdr.csum_flags |=
+				    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+				m->m_pkthdr.csum_data = 0xffff;
+			}
+			m->m_pkthdr.csum_flags |=
+			    CSUM_IP_CHECKED | CSUM_IP_VALID;
+#ifdef SCTP
+			if (m->m_pkthdr.csum_flags & CSUM_SCTP)
+				m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
+#endif
+			error = netisr_queue(NETISR_IP, m);
+			goto done;
+		} else
+			goto again;	/* Redo the routing table lookup. */
+	}
+
+#ifdef IPFIREWALL_FORWARD
+	/* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
+	if (m->m_flags & M_FASTFWD_OURS) {
+		if (m->m_pkthdr.rcvif == NULL)
+			m->m_pkthdr.rcvif = V_loif;
+		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+			m->m_pkthdr.csum_flags |=
+			    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+			m->m_pkthdr.csum_data = 0xffff;
+		}
+#ifdef SCTP
+		if (m->m_pkthdr.csum_flags & CSUM_SCTP)
+			m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
+#endif
+		m->m_pkthdr.csum_flags |=
+			    CSUM_IP_CHECKED | CSUM_IP_VALID;
+
+		error = netisr_queue(NETISR_IP, m);
+		goto done;
+	}
+	/* Or forward to some other address? */
+	fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
+	if (fwd_tag) {
+		dst = (struct sockaddr_in *)&ro->ro_dst;
+		bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
+		m->m_flags |= M_SKIP_FIREWALL;
+		m_tag_delete(m, fwd_tag);
+		goto again;
+	}
+#endif /* IPFIREWALL_FORWARD */
+
+passout:
+	/* 127/8 must not appear on wire - RFC1122. */
+	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
+	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
+		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
+			IPSTAT_INC(ips_badaddr);
+			error = EADDRNOTAVAIL;
+			goto bad;
+		}
+	}
+
+	m->m_pkthdr.csum_flags |= CSUM_IP;
+	sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
+	if (sw_csum & CSUM_DELAY_DATA) {
+		in_delayed_cksum(m);
+		sw_csum &= ~CSUM_DELAY_DATA;
+	}
+#ifdef SCTP
+	if (sw_csum & CSUM_SCTP) {
+		sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
+		sw_csum &= ~CSUM_SCTP;
+	}
+#endif
+	m->m_pkthdr.csum_flags &= ifp->if_hwassist;
+
+	/*
+	 * If small enough for interface, or the interface will take
+	 * care of the fragmentation for us, we can just send directly.
+	 */
+	if (ip->ip_len <= mtu ||
+	    (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 ||
+	    ((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
+		ip->ip_len = htons(ip->ip_len);
+		ip->ip_off = htons(ip->ip_off);
+		ip->ip_sum = 0;
+		if (sw_csum & CSUM_DELAY_IP)
+			ip->ip_sum = in_cksum(m, hlen);
+
+		/*
+		 * Record statistics for this interface address.
+		 * With CSUM_TSO the byte/packet count will be slightly
+		 * incorrect because we count the IP+TCP headers only
+		 * once instead of for every generated packet.
+		 */
+		if (!(flags & IP_FORWARDING) && ia) {
+			if (m->m_pkthdr.csum_flags & CSUM_TSO)
+				ia->ia_ifa.if_opackets +=
+				    m->m_pkthdr.len / m->m_pkthdr.tso_segsz;
+			else
+				ia->ia_ifa.if_opackets++;
+			ia->ia_ifa.if_obytes += m->m_pkthdr.len;
+		}
+#ifdef MBUF_STRESS_TEST
+		if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
+			m = m_fragment(m, M_DONTWAIT, mbuf_frag_size);
+#endif
+		/*
+		 * Reset layer specific mbuf flags
+		 * to avoid confusing lower layers.
+		 */
+		m->m_flags &= ~(M_PROTOFLAGS);
+		error = (*ifp->if_output)(ifp, m,
+		    		(struct sockaddr *)dst, ro);
+		goto done;
+	}
+
+	/* Balk when DF bit is set or the interface didn't support TSO. */
+	if ((ip->ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) {
+		error = EMSGSIZE;
+		IPSTAT_INC(ips_cantfrag);
+		goto bad;
+	}
+
+	/*
+	 * Too large for interface; fragment if possible. If successful,
+	 * on return, m will point to a list of packets to be sent.
+	 */
+	error = ip_fragment(ip, &m, mtu, ifp->if_hwassist, sw_csum);
+	if (error)
+		goto bad;
+	for (; m; m = m0) {
+		m0 = m->m_nextpkt;
+		m->m_nextpkt = 0;
+		if (error == 0) {
+			/* Record statistics for this interface address. */
+			if (ia != NULL) {
+				ia->ia_ifa.if_opackets++;
+				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
+			}
+			/*
+			 * Reset layer specific mbuf flags
+			 * to avoid confusing upper layers.
+			 */
+			m->m_flags &= ~(M_PROTOFLAGS);
+
+			error = (*ifp->if_output)(ifp, m,
+			    (struct sockaddr *)dst, ro);
+		} else
+			m_freem(m);
+	}
+
+	if (error == 0)
+		IPSTAT_INC(ips_fragmented);
+
+done:
+	if (ro == &iproute && ro->ro_rt && !nortfree) {
+		RTFREE(ro->ro_rt);
+	}
+	if (ia != NULL)
+		ifa_free(&ia->ia_ifa);
+	return (error);
+bad:
+	m_freem(m);
+	goto done;
+}
+
+/*
+ * Create a chain of fragments which fit the given mtu. m_frag points to the
+ * mbuf to be fragmented; on return it points to the chain with the fragments.
+ * Return 0 if no error. If error, m_frag may contain a partially built
+ * chain of fragments that should be freed by the caller.
+ *
+ * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
+ * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP).
+ */
+int
+ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
+    u_long if_hwassist_flags, int sw_csum)
+{
+	int error = 0;
+	int hlen = ip->ip_hl << 2;
+	int len = (mtu - hlen) & ~7;	/* size of payload in each fragment */
+	int off;
+	struct mbuf *m0 = *m_frag;	/* the original packet		*/
+	int firstlen;
+	struct mbuf **mnext;
+	int nfrags;
+
+	if (ip->ip_off & IP_DF) {	/* Fragmentation not allowed */
+		IPSTAT_INC(ips_cantfrag);
+		return EMSGSIZE;
+	}
+
+	/*
+	 * Must be able to put at least 8 bytes per fragment.
+	 */
+	if (len < 8)
+		return EMSGSIZE;
+
+	/*
+	 * If the interface will not calculate checksums on
+	 * fragmented packets, then do it here.
+	 */
+	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
+	    (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
+		in_delayed_cksum(m0);
+		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+	}
+#ifdef SCTP
+	if (m0->m_pkthdr.csum_flags & CSUM_SCTP &&
+	    (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
+		sctp_delayed_cksum(m0, hlen);
+		m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
+	}
+#endif
+	if (len > PAGE_SIZE) {
+		/* 
+		 * Fragment large datagrams such that each segment 
+		 * contains a multiple of PAGE_SIZE amount of data, 
+		 * plus headers. This enables a receiver to perform 
+		 * page-flipping zero-copy optimizations.
+		 *
+		 * XXX When does this help given that sender and receiver
+		 * could have different page sizes, and also mtu could
+		 * be less than the receiver's page size ?
+		 */
+		int newlen;
+		struct mbuf *m;
+
+		for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next)
+			off += m->m_len;
+
+		/*
+		 * firstlen (off - hlen) must be aligned on an 
+		 * 8-byte boundary
+		 */
+		if (off < hlen)
+			goto smart_frag_failure;
+		off = ((off - hlen) & ~7) + hlen;
+		newlen = (~PAGE_MASK) & mtu;
+		if ((newlen + sizeof (struct ip)) > mtu) {
+			/* we failed, go back the default */
+smart_frag_failure:
+			newlen = len;
+			off = hlen + len;
+		}
+		len = newlen;
+
+	} else {
+		off = hlen + len;
+	}
+
+	firstlen = off - hlen;
+	mnext = &m0->m_nextpkt;		/* pointer to next packet */
+
+	/*
+	 * Loop through length of segment after first fragment,
+	 * make new header and copy data of each part and link onto chain.
+	 * Here, m0 is the original packet, m is the fragment being created.
+	 * The fragments are linked off the m_nextpkt of the original
+	 * packet, which after processing serves as the first fragment.
+	 */
+	for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) {
+		struct ip *mhip;	/* ip header on the fragment */
+		struct mbuf *m;
+		int mhlen = sizeof (struct ip);
+
+		MGETHDR(m, M_DONTWAIT, MT_DATA);
+		if (m == NULL) {
+			error = ENOBUFS;
+			IPSTAT_INC(ips_odropped);
+			goto done;
+		}
+		m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
+		/*
+		 * In the first mbuf, leave room for the link header, then
+		 * copy the original IP header including options. The payload
+		 * goes into an additional mbuf chain returned by m_copym().
+		 */
+		m->m_data += max_linkhdr;
+		mhip = mtod(m, struct ip *);
+		*mhip = *ip;
+		if (hlen > sizeof (struct ip)) {
+			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
+			mhip->ip_v = IPVERSION;
+			mhip->ip_hl = mhlen >> 2;
+		}
+		m->m_len = mhlen;
+		/* XXX do we need to add ip->ip_off below ? */
+		mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
+		if (off + len >= ip->ip_len) {	/* last fragment */
+			len = ip->ip_len - off;
+			m->m_flags |= M_LASTFRAG;
+		} else
+			mhip->ip_off |= IP_MF;
+		mhip->ip_len = htons((u_short)(len + mhlen));
+		m->m_next = m_copym(m0, off, len, M_DONTWAIT);
+		if (m->m_next == NULL) {	/* copy failed */
+			m_free(m);
+			error = ENOBUFS;	/* ??? */
+			IPSTAT_INC(ips_odropped);
+			goto done;
+		}
+		m->m_pkthdr.len = mhlen + len;
+		m->m_pkthdr.rcvif = NULL;
+#ifdef MAC
+		mac_netinet_fragment(m0, m);
+#endif
+		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
+		mhip->ip_off = htons(mhip->ip_off);
+		mhip->ip_sum = 0;
+		if (sw_csum & CSUM_DELAY_IP)
+			mhip->ip_sum = in_cksum(m, mhlen);
+		*mnext = m;
+		mnext = &m->m_nextpkt;
+	}
+	IPSTAT_ADD(ips_ofragments, nfrags);
+
+	/* set first marker for fragment chain */
+	m0->m_flags |= M_FIRSTFRAG | M_FRAG;
+	m0->m_pkthdr.csum_data = nfrags;
+
+	/*
+	 * Update first fragment by trimming what's been copied out
+	 * and updating header.
+	 */
+	m_adj(m0, hlen + firstlen - ip->ip_len);
+	m0->m_pkthdr.len = hlen + firstlen;
+	ip->ip_len = htons((u_short)m0->m_pkthdr.len);
+	ip->ip_off |= IP_MF;
+	ip->ip_off = htons(ip->ip_off);
+	ip->ip_sum = 0;
+	if (sw_csum & CSUM_DELAY_IP)
+		ip->ip_sum = in_cksum(m0, hlen);
+
+done:
+	*m_frag = m0;
+	return error;
+}
+
+void
+in_delayed_cksum(struct mbuf *m)
+{
+	struct ip *ip;
+	u_short csum, offset;
+
+	ip = mtod(m, struct ip *);
+	offset = ip->ip_hl << 2 ;
+	csum = in_cksum_skip(m, ip->ip_len, offset);
+	if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
+		csum = 0xffff;
+	offset += m->m_pkthdr.csum_data;	/* checksum offset */
+
+	if (offset + sizeof(u_short) > m->m_len) {
+		printf("delayed m_pullup, m->len: %d  off: %d  p: %d\n",
+		    m->m_len, offset, ip->ip_p);
+		/*
+		 * XXX
+		 * this shouldn't happen, but if it does, the
+		 * correct behavior may be to insert the checksum
+		 * in the appropriate next mbuf in the chain.
+		 */
+		return;
+	}
+	*(u_short *)(m->m_data + offset) = csum;
+}
+
+/*
+ * IP socket option processing.
+ */
+int
+ip_ctloutput(struct socket *so, struct sockopt *sopt)
+{
+	struct	inpcb *inp = sotoinpcb(so);
+	int	error, optval;
+
+	error = optval = 0;
+	if (sopt->sopt_level != IPPROTO_IP) {
+		if ((sopt->sopt_level == SOL_SOCKET) &&
+		    (sopt->sopt_name == SO_SETFIB)) {
+			inp->inp_inc.inc_fibnum = so->so_fibnum;
+			return (0);
+		}
+		return (EINVAL);
+	}
+
+	switch (sopt->sopt_dir) {
+	case SOPT_SET:
+		switch (sopt->sopt_name) {
+		case IP_OPTIONS:
+#ifdef notyet
+		case IP_RETOPTS:
+#endif
+		{
+			struct mbuf *m;
+			if (sopt->sopt_valsize > MLEN) {
+				error = EMSGSIZE;
+				break;
+			}
+			MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
+			if (m == NULL) {
+				error = ENOBUFS;
+				break;
+			}
+			m->m_len = sopt->sopt_valsize;
+			error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
+					    m->m_len);
+			if (error) {
+				m_free(m);
+				break;
+			}
+			INP_WLOCK(inp);
+			error = ip_pcbopts(inp, sopt->sopt_name, m);
+			INP_WUNLOCK(inp);
+			return (error);
+		}
+
+		case IP_BINDANY:
+			if (sopt->sopt_td != NULL) {
+				error = priv_check(sopt->sopt_td,
+				    PRIV_NETINET_BINDANY);
+				if (error)
+					break;
+			}
+			/* FALLTHROUGH */
+		case IP_TOS:
+		case IP_TTL:
+		case IP_MINTTL:
+		case IP_RECVOPTS:
+		case IP_RECVRETOPTS:
+		case IP_RECVDSTADDR:
+		case IP_RECVTTL:
+		case IP_RECVIF:
+		case IP_FAITH:
+		case IP_ONESBCAST:
+		case IP_DONTFRAG:
+			error = sooptcopyin(sopt, &optval, sizeof optval,
+					    sizeof optval);
+			if (error)
+				break;
+
+			switch (sopt->sopt_name) {
+			case IP_TOS:
+				inp->inp_ip_tos = optval;
+				break;
+
+			case IP_TTL:
+				inp->inp_ip_ttl = optval;
+				break;
+
+			case IP_MINTTL:
+				if (optval >= 0 && optval <= MAXTTL)
+					inp->inp_ip_minttl = optval;
+				else
+					error = EINVAL;
+				break;
+
+#define	OPTSET(bit) do {						\
+	INP_WLOCK(inp);							\
+	if (optval)							\
+		inp->inp_flags |= bit;					\
+	else								\
+		inp->inp_flags &= ~bit;					\
+	INP_WUNLOCK(inp);						\
+} while (0)
+
+			case IP_RECVOPTS:
+				OPTSET(INP_RECVOPTS);
+				break;
+
+			case IP_RECVRETOPTS:
+				OPTSET(INP_RECVRETOPTS);
+				break;
+
+			case IP_RECVDSTADDR:
+				OPTSET(INP_RECVDSTADDR);
+				break;
+
+			case IP_RECVTTL:
+				OPTSET(INP_RECVTTL);
+				break;
+
+			case IP_RECVIF:
+				OPTSET(INP_RECVIF);
+				break;
+
+			case IP_FAITH:
+				OPTSET(INP_FAITH);
+				break;
+
+			case IP_ONESBCAST:
+				OPTSET(INP_ONESBCAST);
+				break;
+			case IP_DONTFRAG:
+				OPTSET(INP_DONTFRAG);
+				break;
+			case IP_BINDANY:
+				OPTSET(INP_BINDANY);
+				break;
+			}
+			break;
+#undef OPTSET
+
+		/*
+		 * Multicast socket options are processed by the in_mcast
+		 * module.
+		 */
+		case IP_MULTICAST_IF:
+		case IP_MULTICAST_VIF:
+		case IP_MULTICAST_TTL:
+		case IP_MULTICAST_LOOP:
+		case IP_ADD_MEMBERSHIP:
+		case IP_DROP_MEMBERSHIP:
+		case IP_ADD_SOURCE_MEMBERSHIP:
+		case IP_DROP_SOURCE_MEMBERSHIP:
+		case IP_BLOCK_SOURCE:
+		case IP_UNBLOCK_SOURCE:
+		case IP_MSFILTER:
+		case MCAST_JOIN_GROUP:
+		case MCAST_LEAVE_GROUP:
+		case MCAST_JOIN_SOURCE_GROUP:
+		case MCAST_LEAVE_SOURCE_GROUP:
+		case MCAST_BLOCK_SOURCE:
+		case MCAST_UNBLOCK_SOURCE:
+			error = inp_setmoptions(inp, sopt);
+			break;
+
+		case IP_PORTRANGE:
+			error = sooptcopyin(sopt, &optval, sizeof optval,
+					    sizeof optval);
+			if (error)
+				break;
+
+			INP_WLOCK(inp);
+			switch (optval) {
+			case IP_PORTRANGE_DEFAULT:
+				inp->inp_flags &= ~(INP_LOWPORT);
+				inp->inp_flags &= ~(INP_HIGHPORT);
+				break;
+
+			case IP_PORTRANGE_HIGH:
+				inp->inp_flags &= ~(INP_LOWPORT);
+				inp->inp_flags |= INP_HIGHPORT;
+				break;
+
+			case IP_PORTRANGE_LOW:
+				inp->inp_flags &= ~(INP_HIGHPORT);
+				inp->inp_flags |= INP_LOWPORT;
+				break;
+
+			default:
+				error = EINVAL;
+				break;
+			}
+			INP_WUNLOCK(inp);
+			break;
+
+#ifdef IPSEC
+		case IP_IPSEC_POLICY:
+		{
+			caddr_t req;
+			struct mbuf *m;
+
+			if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
+				break;
+			if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
+				break;
+			req = mtod(m, caddr_t);
+			error = ipsec_set_policy(inp, sopt->sopt_name, req,
+			    m->m_len, (sopt->sopt_td != NULL) ?
+			    sopt->sopt_td->td_ucred : NULL);
+			m_freem(m);
+			break;
+		}
+#endif /* IPSEC */
+
+		default:
+			error = ENOPROTOOPT;
+			break;
+		}
+		break;
+
+	case SOPT_GET:
+		switch (sopt->sopt_name) {
+		case IP_OPTIONS:
+		case IP_RETOPTS:
+			if (inp->inp_options)
+				error = sooptcopyout(sopt, 
+						     mtod(inp->inp_options,
+							  char *),
+						     inp->inp_options->m_len);
+			else
+				sopt->sopt_valsize = 0;
+			break;
+
+		case IP_TOS:
+		case IP_TTL:
+		case IP_MINTTL:
+		case IP_RECVOPTS:
+		case IP_RECVRETOPTS:
+		case IP_RECVDSTADDR:
+		case IP_RECVTTL:
+		case IP_RECVIF:
+		case IP_PORTRANGE:
+		case IP_FAITH:
+		case IP_ONESBCAST:
+		case IP_DONTFRAG:
+		case IP_BINDANY:
+			switch (sopt->sopt_name) {
+
+			case IP_TOS:
+				optval = inp->inp_ip_tos;
+				break;
+
+			case IP_TTL:
+				optval = inp->inp_ip_ttl;
+				break;
+
+			case IP_MINTTL:
+				optval = inp->inp_ip_minttl;
+				break;
+
+#define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
+
+			case IP_RECVOPTS:
+				optval = OPTBIT(INP_RECVOPTS);
+				break;
+
+			case IP_RECVRETOPTS:
+				optval = OPTBIT(INP_RECVRETOPTS);
+				break;
+
+			case IP_RECVDSTADDR:
+				optval = OPTBIT(INP_RECVDSTADDR);
+				break;
+
+			case IP_RECVTTL:
+				optval = OPTBIT(INP_RECVTTL);
+				break;
+
+			case IP_RECVIF:
+				optval = OPTBIT(INP_RECVIF);
+				break;
+
+			case IP_PORTRANGE:
+				if (inp->inp_flags & INP_HIGHPORT)
+					optval = IP_PORTRANGE_HIGH;
+				else if (inp->inp_flags & INP_LOWPORT)
+					optval = IP_PORTRANGE_LOW;
+				else
+					optval = 0;
+				break;
+
+			case IP_FAITH:
+				optval = OPTBIT(INP_FAITH);
+				break;
+
+			case IP_ONESBCAST:
+				optval = OPTBIT(INP_ONESBCAST);
+				break;
+			case IP_DONTFRAG:
+				optval = OPTBIT(INP_DONTFRAG);
+				break;
+			case IP_BINDANY:
+				optval = OPTBIT(INP_BINDANY);
+				break;
+			}
+			error = sooptcopyout(sopt, &optval, sizeof optval);
+			break;
+
+		/*
+		 * Multicast socket options are processed by the in_mcast
+		 * module.
+		 */
+		case IP_MULTICAST_IF:
+		case IP_MULTICAST_VIF:
+		case IP_MULTICAST_TTL:
+		case IP_MULTICAST_LOOP:
+		case IP_MSFILTER:
+			error = inp_getmoptions(inp, sopt);
+			break;
+
+#ifdef IPSEC
+		case IP_IPSEC_POLICY:
+		{
+			struct mbuf *m = NULL;
+			caddr_t req = NULL;
+			size_t len = 0;
+
+			if (m != 0) {
+				req = mtod(m, caddr_t);
+				len = m->m_len;
+			}
+			error = ipsec_get_policy(sotoinpcb(so), req, len, &m);
+			if (error == 0)
+				error = soopt_mcopyout(sopt, m); /* XXX */
+			if (error == 0)
+				m_freem(m);
+			break;
+		}
+#endif /* IPSEC */
+
+		default:
+			error = ENOPROTOOPT;
+			break;
+		}
+		break;
+	}
+	return (error);
+}
+
+/*
+ * Routine called from ip_output() to loop back a copy of an IP multicast
+ * packet to the input queue of a specified interface.  Note that this
+ * calls the output routine of the loopback "driver", but with an interface
+ * pointer that might NOT be a loopback interface -- evil, but easier than
+ * replicating that code here.
+ */
+static void
+ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst,
+    int hlen)
+{
+	register struct ip *ip;
+	struct mbuf *copym;
+
+	/*
+	 * Make a deep copy of the packet because we're going to
+	 * modify the pack in order to generate checksums.
+	 */
+	copym = m_dup(m, M_DONTWAIT);
+	if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
+		copym = m_pullup(copym, hlen);
+	if (copym != NULL) {
+		/* If needed, compute the checksum and mark it as valid. */
+		if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+			in_delayed_cksum(copym);
+			copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+			copym->m_pkthdr.csum_flags |=
+			    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+			copym->m_pkthdr.csum_data = 0xffff;
+		}
+		/*
+		 * We don't bother to fragment if the IP length is greater
+		 * than the interface's MTU.  Can this possibly matter?
+		 */
+		ip = mtod(copym, struct ip *);
+		ip->ip_len = htons(ip->ip_len);
+		ip->ip_off = htons(ip->ip_off);
+		ip->ip_sum = 0;
+		ip->ip_sum = in_cksum(copym, hlen);
+#if 1 /* XXX */
+		if (dst->sin_family != AF_INET) {
+			printf("ip_mloopback: bad address family %d\n",
+						dst->sin_family);
+			dst->sin_family = AF_INET;
+		}
+#endif
+		if_simloop(ifp, copym, dst->sin_family, 0);
+	}
+}
diff --git a/freebsd/sys/netinet/ip_var.h b/freebsd/sys/netinet/ip_var.h
new file mode 100644
index 00000000..2902174d
--- /dev/null
+++ b/freebsd/sys/netinet/ip_var.h
@@ -0,0 +1,315 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ip_var.h	8.2 (Berkeley) 1/9/95
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IP_VAR_HH_
+#define	_NETINET_IP_VAR_HH_
+
+#include <freebsd/sys/queue.h>
+
+/*
+ * Overlay for ip header used by other protocols (tcp, udp).
+ */
+struct ipovly {
+	u_char	ih_x1[9];		/* (unused) */
+	u_char	ih_pr;			/* protocol */
+	u_short	ih_len;			/* protocol length */
+	struct	in_addr ih_src;		/* source internet address */
+	struct	in_addr ih_dst;		/* destination internet address */
+};
+
+#ifdef _KERNEL
+/*
+ * Ip reassembly queue structure.  Each fragment
+ * being reassembled is attached to one of these structures.
+ * They are timed out after ipq_ttl drops to 0, and may also
+ * be reclaimed if memory becomes tight.
+ */
+struct ipq {
+	TAILQ_ENTRY(ipq) ipq_list;	/* to other reass headers */
+	u_char	ipq_ttl;		/* time for reass q to live */
+	u_char	ipq_p;			/* protocol of this fragment */
+	u_short	ipq_id;			/* sequence id for reassembly */
+	struct mbuf *ipq_frags;		/* to ip headers of fragments */
+	struct	in_addr ipq_src,ipq_dst;
+	u_char	ipq_nfrags;		/* # frags in this packet */
+	struct label *ipq_label;	/* MAC label */
+};
+#endif /* _KERNEL */
+
+/*
+ * Structure stored in mbuf in inpcb.ip_options
+ * and passed to ip_output when ip options are in use.
+ * The actual length of the options (including ipopt_dst)
+ * is in m_len.
+ */
+#define MAX_IPOPTLEN	40
+
+struct ipoption {
+	struct	in_addr ipopt_dst;	/* first-hop dst if source routed */
+	char	ipopt_list[MAX_IPOPTLEN];	/* options proper */
+};
+
+/*
+ * Structure attached to inpcb.ip_moptions and
+ * passed to ip_output when IP multicast options are in use.
+ * This structure is lazy-allocated.
+ */
+struct ip_moptions {
+	struct	ifnet *imo_multicast_ifp; /* ifp for outgoing multicasts */
+	struct in_addr imo_multicast_addr; /* ifindex/addr on MULTICAST_IF */
+	u_long	imo_multicast_vif;	/* vif num outgoing multicasts */
+	u_char	imo_multicast_ttl;	/* TTL for outgoing multicasts */
+	u_char	imo_multicast_loop;	/* 1 => hear sends if a member */
+	u_short	imo_num_memberships;	/* no. memberships this socket */
+	u_short	imo_max_memberships;	/* max memberships this socket */
+	struct	in_multi **imo_membership;	/* group memberships */
+	struct	in_mfilter *imo_mfilters;	/* source filters */
+};
+
+struct	ipstat {
+	u_long	ips_total;		/* total packets received */
+	u_long	ips_badsum;		/* checksum bad */
+	u_long	ips_tooshort;		/* packet too short */
+	u_long	ips_toosmall;		/* not enough data */
+	u_long	ips_badhlen;		/* ip header length < data size */
+	u_long	ips_badlen;		/* ip length < ip header length */
+	u_long	ips_fragments;		/* fragments received */
+	u_long	ips_fragdropped;	/* frags dropped (dups, out of space) */
+	u_long	ips_fragtimeout;	/* fragments timed out */
+	u_long	ips_forward;		/* packets forwarded */
+	u_long	ips_fastforward;	/* packets fast forwarded */
+	u_long	ips_cantforward;	/* packets rcvd for unreachable dest */
+	u_long	ips_redirectsent;	/* packets forwarded on same net */
+	u_long	ips_noproto;		/* unknown or unsupported protocol */
+	u_long	ips_delivered;		/* datagrams delivered to upper level*/
+	u_long	ips_localout;		/* total ip packets generated here */
+	u_long	ips_odropped;		/* lost packets due to nobufs, etc. */
+	u_long	ips_reassembled;	/* total packets reassembled ok */
+	u_long	ips_fragmented;		/* datagrams successfully fragmented */
+	u_long	ips_ofragments;		/* output fragments created */
+	u_long	ips_cantfrag;		/* don't fragment flag was set, etc. */
+	u_long	ips_badoptions;		/* error in option processing */
+	u_long	ips_noroute;		/* packets discarded due to no route */
+	u_long	ips_badvers;		/* ip version != 4 */
+	u_long	ips_rawout;		/* total raw ip packets generated */
+	u_long	ips_toolong;		/* ip length > max ip packet size */
+	u_long	ips_notmember;		/* multicasts for unregistered grps */
+	u_long	ips_nogif;		/* no match gif found */
+	u_long	ips_badaddr;		/* invalid address on header */
+};
+
+#ifdef _KERNEL
+
+#include <freebsd/net/vnet.h>
+
+/*
+ * In-kernel consumers can use these accessor macros directly to update
+ * stats.
+ */
+#define	IPSTAT_ADD(name, val)	V_ipstat.name += (val)
+#define	IPSTAT_SUB(name, val)	V_ipstat.name -= (val)
+#define	IPSTAT_INC(name)	IPSTAT_ADD(name, 1)
+#define	IPSTAT_DEC(name)	IPSTAT_SUB(name, 1)
+
+/*
+ * Kernel module consumers must use this accessor macro.
+ */
+void	kmod_ipstat_inc(int statnum);
+#define	KMOD_IPSTAT_INC(name)						\
+	kmod_ipstat_inc(offsetof(struct ipstat, name) / sizeof(u_long))
+void	kmod_ipstat_dec(int statnum);
+#define	KMOD_IPSTAT_DEC(name)						\
+	kmod_ipstat_dec(offsetof(struct ipstat, name) / sizeof(u_long))
+
+/* flags passed to ip_output as last parameter */
+#define	IP_FORWARDING		0x1		/* most of ip header exists */
+#define	IP_RAWOUTPUT		0x2		/* raw ip header exists */
+#define	IP_SENDONES		0x4		/* send all-ones broadcast */
+#define	IP_SENDTOIF		0x8		/* send on specific ifnet */
+#define IP_ROUTETOIF		SO_DONTROUTE	/* 0x10 bypass routing tables */
+#define IP_ALLOWBROADCAST	SO_BROADCAST	/* 0x20 can send broadcast packets */
+
+/*
+ * mbuf flag used by ip_fastfwd
+ */
+#define	M_FASTFWD_OURS		M_PROTO1	/* changed dst to local */
+
+#ifdef __NO_STRICT_ALIGNMENT
+#define IP_HDR_ALIGNED_P(ip)	1
+#else
+#define IP_HDR_ALIGNED_P(ip)	((((intptr_t) (ip)) & 3) == 0)
+#endif
+
+struct ip;
+struct inpcb;
+struct route;
+struct sockopt;
+
+VNET_DECLARE(struct ipstat, ipstat);
+VNET_DECLARE(u_short, ip_id);			/* ip packet ctr, for ids */
+VNET_DECLARE(int, ip_defttl);			/* default IP ttl */
+VNET_DECLARE(int, ipforwarding);		/* ip forwarding */
+#ifdef IPSTEALTH
+VNET_DECLARE(int, ipstealth);			/* stealth forwarding */
+#endif
+extern u_char	ip_protox[];
+VNET_DECLARE(struct socket *, ip_rsvpd);	/* reservation protocol daemon*/
+VNET_DECLARE(struct socket *, ip_mrouter);	/* multicast routing daemon */
+extern int	(*legal_vif_num)(int);
+extern u_long	(*ip_mcast_src)(int);
+VNET_DECLARE(int, rsvp_on);
+extern struct	pr_usrreqs rip_usrreqs;
+
+#define	V_ipstat		VNET(ipstat)
+#define	V_ip_id			VNET(ip_id)
+#define	V_ip_defttl		VNET(ip_defttl)
+#define	V_ipforwarding		VNET(ipforwarding)
+#ifdef IPSTEALTH
+#define	V_ipstealth		VNET(ipstealth)
+#endif
+#define	V_ip_rsvpd		VNET(ip_rsvpd)
+#define	V_ip_mrouter		VNET(ip_mrouter)
+#define	V_rsvp_on		VNET(rsvp_on)
+
+void	inp_freemoptions(struct ip_moptions *);
+int	inp_getmoptions(struct inpcb *, struct sockopt *);
+int	inp_setmoptions(struct inpcb *, struct sockopt *);
+
+int	ip_ctloutput(struct socket *, struct sockopt *sopt);
+void	ip_drain(void);
+void	ip_fini(void *xtp);
+int	ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
+	    u_long if_hwassist_flags, int sw_csum);
+void	ip_forward(struct mbuf *m, int srcrt);
+void	ip_init(void);
+#ifdef VIMAGE
+void	ip_destroy(void);
+#endif
+extern int
+	(*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
+	    struct ip_moptions *);
+int	ip_output(struct mbuf *,
+	    struct mbuf *, struct route *, int, struct ip_moptions *,
+	    struct inpcb *);
+int	ipproto_register(short);
+int	ipproto_unregister(short);
+struct mbuf *
+	ip_reass(struct mbuf *);
+struct in_ifaddr *
+	ip_rtaddr(struct in_addr, u_int fibnum);
+void	ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
+	    struct mbuf *);
+void	ip_slowtimo(void);
+u_int16_t	ip_randomid(void);
+int	rip_ctloutput(struct socket *, struct sockopt *);
+void	rip_ctlinput(int, struct sockaddr *, void *);
+void	rip_init(void);
+#ifdef VIMAGE
+void	rip_destroy(void);
+#endif
+void	rip_input(struct mbuf *, int);
+int	rip_output(struct mbuf *, struct socket *, u_long);
+void	ipip_input(struct mbuf *, int);
+void	rsvp_input(struct mbuf *, int);
+int	ip_rsvp_init(struct socket *);
+int	ip_rsvp_done(void);
+extern int	(*ip_rsvp_vif)(struct socket *, struct sockopt *);
+extern void	(*ip_rsvp_force_done)(struct socket *);
+extern void	(*rsvp_input_p)(struct mbuf *m, int off);
+
+VNET_DECLARE(struct pfil_head, inet_pfil_hook);	/* packet filter hooks */
+#define	V_inet_pfil_hook	VNET(inet_pfil_hook)
+
+void	in_delayed_cksum(struct mbuf *m);
+
+/* Hooks for ipfw, dummynet, divert etc. Most are declared in raw_ip.c */
+/*
+ * Reference to an ipfw or packet filter rule that can be carried
+ * outside critical sections.
+ * A rule is identified by rulenum:rule_id which is ordered.
+ * In version chain_id the rule can be found in slot 'slot', so
+ * we don't need a lookup if chain_id == chain->id.
+ *
+ * On exit from the firewall this structure refers to the rule after
+ * the matching one (slot points to the new rule; rulenum:rule_id-1
+ * is the matching rule), and additional info (e.g. info often contains
+ * the insn argument or tablearg in the low 16 bits, in host format).
+ * On entry, the structure is valid if slot>0, and refers to the starting
+ * rules. 'info' contains the reason for reinject, e.g. divert port,
+ * divert direction, and so on.
+ */
+struct ipfw_rule_ref {
+	uint32_t	slot;		/* slot for matching rule	*/
+	uint32_t	rulenum;	/* matching rule number		*/
+	uint32_t	rule_id;	/* matching rule id		*/
+	uint32_t	chain_id;	/* ruleset id			*/
+	uint32_t	info;		/* see below			*/
+};
+
+enum {
+	IPFW_INFO_MASK	= 0x0000ffff,
+	IPFW_INFO_OUT	= 0x00000000,	/* outgoing, just for convenience */
+	IPFW_INFO_IN	= 0x80000000,	/* incoming, overloads dir */
+	IPFW_ONEPASS	= 0x40000000,	/* One-pass, do not reinject */
+	IPFW_IS_MASK	= 0x30000000,	/* which source ? */
+	IPFW_IS_DIVERT	= 0x20000000,
+	IPFW_IS_DUMMYNET =0x10000000,
+	IPFW_IS_PIPE	= 0x08000000,	/* pip1=1, queue = 0 */
+};
+#define MTAG_IPFW	1148380143	/* IPFW-tagged cookie */
+#define MTAG_IPFW_RULE	1262273568	/* rule reference */
+
+struct ip_fw_args;
+typedef int	(*ip_fw_chk_ptr_t)(struct ip_fw_args *args);
+typedef int	(*ip_fw_ctl_ptr_t)(struct sockopt *);
+VNET_DECLARE(ip_fw_chk_ptr_t, ip_fw_chk_ptr);
+VNET_DECLARE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr);
+#define	V_ip_fw_chk_ptr		VNET(ip_fw_chk_ptr)
+#define	V_ip_fw_ctl_ptr		VNET(ip_fw_ctl_ptr)
+
+/* Divert hooks. */
+extern void	(*ip_divert_ptr)(struct mbuf *m, int incoming);
+/* ng_ipfw hooks -- XXX make it the same as divert and dummynet */
+extern int	(*ng_ipfw_input_p)(struct mbuf **, int,
+			struct ip_fw_args *, int);
+
+extern int	(*ip_dn_ctl_ptr)(struct sockopt *);
+extern int	(*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *);
+
+VNET_DECLARE(int, ip_do_randomid);
+#define	V_ip_do_randomid	VNET(ip_do_randomid)
+#define	ip_newid()	((V_ip_do_randomid != 0) ? ip_randomid() : \
+			    htons(V_ip_id++))
+
+#endif /* _KERNEL */
+
+#endif /* !_NETINET_IP_VAR_HH_ */
diff --git a/freebsd/sys/netinet/ipfw/dn_heap.c b/freebsd/sys/netinet/ipfw/dn_heap.c
new file mode 100644
index 00000000..1e6133bc
--- /dev/null
+++ b/freebsd/sys/netinet/ipfw/dn_heap.c
@@ -0,0 +1,552 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Binary heap and hash tables, used in dummynet
+ *
+ * $FreeBSD$
+ */
+
+#include <freebsd/sys/cdefs.h>
+#include <freebsd/sys/param.h>
+#ifdef _KERNEL
+__FBSDID("$FreeBSD$");
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/netinet/ipfw/dn_heap.h>
+#ifndef log
+#define log(x, arg...)
+#endif
+
+#else /* !_KERNEL */
+
+#include <freebsd/stdio.h>
+#include <freebsd/dn_test.h>
+#include <freebsd/strings.h>
+#include <freebsd/stdlib.h>
+
+#include  "dn_heap.h"
+#define log(x, arg...)	fprintf(stderr, ## arg)
+#define panic(x...)	fprintf(stderr, ## x), exit(1)
+#define MALLOC_DEFINE(a, b, c)
+static void *my_malloc(int s) {	return malloc(s); }
+static void my_free(void *p) {	free(p); }
+#define malloc(s, t, w)	my_malloc(s)
+#define free(p, t)	my_free(p)
+#endif /* !_KERNEL */
+
+MALLOC_DEFINE(M_DN_HEAP, "dummynet", "dummynet heap");
+
+/*
+ * Heap management functions.
+ *
+ * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2.
+ * Some macros help finding parent/children so we can optimize them.
+ *
+ * heap_init() is called to expand the heap when needed.
+ * Increment size in blocks of 16 entries.
+ * Returns 1 on error, 0 on success
+ */
+#define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 )
+#define HEAP_LEFT(x) ( (x)+(x) + 1 )
+#define	HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; }
+#define HEAP_INCREMENT	15
+
+static int
+heap_resize(struct dn_heap *h, unsigned int new_size)
+{
+	struct dn_heap_entry *p;
+
+	if (h->size >= new_size )	/* have enough room */
+		return 0;
+#if 1  /* round to the next power of 2 */
+	new_size |= new_size >> 1;
+	new_size |= new_size >> 2;
+	new_size |= new_size >> 4;
+	new_size |= new_size >> 8;
+	new_size |= new_size >> 16;
+#else
+	new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT;
+#endif
+	p = malloc(new_size * sizeof(*p), M_DN_HEAP, M_NOWAIT);
+	if (p == NULL) {
+		printf("--- %s, resize %d failed\n", __func__, new_size );
+		return 1; /* error */
+	}
+	if (h->size > 0) {
+		bcopy(h->p, p, h->size * sizeof(*p) );
+		free(h->p, M_DN_HEAP);
+	}
+	h->p = p;
+	h->size = new_size;
+	return 0;
+}
+
+int
+heap_init(struct dn_heap *h, int size, int ofs)
+{
+	if (heap_resize(h, size))
+		return 1;
+	h->elements = 0;
+	h->ofs = ofs;
+	return 0;
+}
+
+/*
+ * Insert element in heap. Normally, p != NULL, we insert p in
+ * a new position and bubble up. If p == NULL, then the element is
+ * already in place, and key is the position where to start the
+ * bubble-up.
+ * Returns 1 on failure (cannot allocate new heap entry)
+ *
+ * If ofs > 0 the position (index, int) of the element in the heap is
+ * also stored in the element itself at the given offset in bytes.
+ */
+#define SET_OFFSET(h, i) do {					\
+	if (h->ofs > 0)						\
+	    *((int32_t *)((char *)(h->p[i].object) + h->ofs)) = i;	\
+	} while (0)
+/*
+ * RESET_OFFSET is used for sanity checks. It sets ofs
+ * to an invalid value.
+ */
+#define RESET_OFFSET(h, i) do {					\
+	if (h->ofs > 0)						\
+	    *((int32_t *)((char *)(h->p[i].object) + h->ofs)) = -16;	\
+	} while (0)
+
+int
+heap_insert(struct dn_heap *h, uint64_t key1, void *p)
+{
+	int son = h->elements;
+
+	//log("%s key %llu p %p\n", __FUNCTION__, key1, p);
+	if (p == NULL) { /* data already there, set starting point */
+		son = key1;
+	} else { /* insert new element at the end, possibly resize */
+		son = h->elements;
+		if (son == h->size) /* need resize... */
+			// XXX expand by 16 or so
+			if (heap_resize(h, h->elements+16) )
+				return 1; /* failure... */
+		h->p[son].object = p;
+		h->p[son].key = key1;
+		h->elements++;
+	}
+	/* make sure that son >= father along the path */
+	while (son > 0) {
+		int father = HEAP_FATHER(son);
+		struct dn_heap_entry tmp;
+
+		if (DN_KEY_LT( h->p[father].key, h->p[son].key ) )
+			break; /* found right position */
+		/* son smaller than father, swap and repeat */
+		HEAP_SWAP(h->p[son], h->p[father], tmp);
+		SET_OFFSET(h, son);
+		son = father;
+	}
+	SET_OFFSET(h, son);
+	return 0;
+}
+
+/*
+ * remove top element from heap, or obj if obj != NULL
+ */
+void
+heap_extract(struct dn_heap *h, void *obj)
+{
+	int child, father, max = h->elements - 1;
+
+	if (max < 0) {
+		printf("--- %s: empty heap 0x%p\n", __FUNCTION__, h);
+		return;
+	}
+	if (obj == NULL)
+		father = 0; /* default: move up smallest child */
+	else { /* extract specific element, index is at offset */
+		if (h->ofs <= 0)
+			panic("%s: extract from middle not set on %p\n",
+				__FUNCTION__, h);
+		father = *((int *)((char *)obj + h->ofs));
+		if (father < 0 || father >= h->elements) {
+			panic("%s: father %d out of bound 0..%d\n",
+				__FUNCTION__, father, h->elements);
+		}
+	}
+	/*
+	 * below, father is the index of the empty element, which
+	 * we replace at each step with the smallest child until we
+	 * reach the bottom level.
+	 */
+	// XXX why removing RESET_OFFSET increases runtime by 10% ?
+	RESET_OFFSET(h, father);
+	while ( (child = HEAP_LEFT(father)) <= max ) {
+		if (child != max &&
+		    DN_KEY_LT(h->p[child+1].key, h->p[child].key) )
+			child++; /* take right child, otherwise left */
+		h->p[father] = h->p[child];
+		SET_OFFSET(h, father);
+		father = child;
+	}
+	h->elements--;
+	if (father != max) {
+		/*
+		 * Fill hole with last entry and bubble up,
+		 * reusing the insert code
+		 */
+		h->p[father] = h->p[max];
+		heap_insert(h, father, NULL);
+	}
+}
+
+#if 0
+/*
+ * change object position and update references
+ * XXX this one is never used!
+ */
+static void
+heap_move(struct dn_heap *h, uint64_t new_key, void *object)
+{
+	int temp, i, max = h->elements-1;
+	struct dn_heap_entry *p, buf;
+
+	if (h->ofs <= 0)
+		panic("cannot move items on this heap");
+	p = h->p;	/* shortcut */
+
+	i = *((int *)((char *)object + h->ofs));
+	if (DN_KEY_LT(new_key, p[i].key) ) { /* must move up */
+		p[i].key = new_key;
+		for (; i>0 &&
+		    DN_KEY_LT(new_key, p[(temp = HEAP_FATHER(i))].key);
+		    i = temp ) { /* bubble up */
+			HEAP_SWAP(p[i], p[temp], buf);
+			SET_OFFSET(h, i);
+		}
+	} else {		/* must move down */
+		p[i].key = new_key;
+		while ( (temp = HEAP_LEFT(i)) <= max ) {
+			/* found left child */
+			if (temp != max &&
+			    DN_KEY_LT(p[temp+1].key, p[temp].key))
+				temp++; /* select child with min key */
+			if (DN_KEY_LT(>p[temp].key, new_key)) {
+				/* go down */
+				HEAP_SWAP(p[i], p[temp], buf);
+				SET_OFFSET(h, i);
+			} else
+				break;
+			i = temp;
+		}
+	}
+	SET_OFFSET(h, i);
+}
+#endif /* heap_move, unused */
+
+/*
+ * heapify() will reorganize data inside an array to maintain the
+ * heap property. It is needed when we delete a bunch of entries.
+ */
+static void
+heapify(struct dn_heap *h)
+{
+	int i;
+
+	for (i = 0; i < h->elements; i++ )
+		heap_insert(h, i , NULL);
+}
+
+int
+heap_scan(struct dn_heap *h, int (*fn)(void *, uintptr_t),
+	uintptr_t arg)
+{
+	int i, ret, found;
+
+	for (i = found = 0 ; i < h->elements ;) {
+		ret = fn(h->p[i].object, arg);
+		if (ret & HEAP_SCAN_DEL) {
+			h->elements-- ;
+			h->p[i] = h->p[h->elements] ;
+			found++ ;
+		} else
+			i++ ;
+		if (ret & HEAP_SCAN_END)
+			break;
+	}
+	if (found)
+		heapify(h);
+	return found;
+}
+
+/*
+ * cleanup the heap and free data structure
+ */
+void
+heap_free(struct dn_heap *h)
+{
+	if (h->size >0 )
+		free(h->p, M_DN_HEAP);
+	bzero(h, sizeof(*h) );
+}
+
+/*
+ * hash table support.
+ */
+
+struct dn_ht {
+        int buckets;            /* how many buckets, really buckets - 1*/
+        int entries;            /* how many entries */
+        int ofs;	        /* offset of link field */
+        uint32_t (*hash)(uintptr_t, int, void *arg);
+        int (*match)(void *_el, uintptr_t key, int, void *);
+        void *(*newh)(uintptr_t, int, void *);
+        void **ht;              /* bucket heads */
+};
+/*
+ * Initialize, allocating bucket pointers inline.
+ * Recycle previous record if possible.
+ * If the 'newh' function is not supplied, we assume that the
+ * key passed to ht_find is the same object to be stored in.
+ */
+struct dn_ht *
+dn_ht_init(struct dn_ht *ht, int buckets, int ofs,
+        uint32_t (*h)(uintptr_t, int, void *),
+        int (*match)(void *, uintptr_t, int, void *),
+	void *(*newh)(uintptr_t, int, void *))
+{
+	int l;
+
+	/*
+	 * Notes about rounding bucket size to a power of two.
+	 * Given the original bucket size, we compute the nearest lower and
+	 * higher power of two, minus 1  (respectively b_min and b_max) because
+	 * this value will be used to do an AND with the index returned
+	 * by hash function.
+	 * To choice between these two values, the original bucket size is
+	 * compared with b_min. If the original size is greater than 4/3 b_min,
+	 * we round the bucket size to b_max, else to b_min.
+	 * This ratio try to round to the nearest power of two, advantaging
+	 * the greater size if the different between two power is relatively
+	 * big.
+	 * Rounding the bucket size to a power of two avoid the use of
+	 * module when calculating the correct bucket.
+	 * The ht->buckets variable store the bucket size - 1 to simply
+	 * do an AND between the index returned by hash function and ht->bucket
+	 * instead of a module.
+	 */
+	int b_min; /* min buckets */
+	int b_max; /* max buckets */
+	int b_ori; /* original buckets */
+
+	if (h == NULL || match == NULL) {
+		printf("--- missing hash or match function");
+		return NULL;
+	}
+	if (buckets < 1 || buckets > 65536)
+		return NULL;
+
+	b_ori = buckets;
+	/* calculate next power of 2, - 1*/
+	buckets |= buckets >> 1;
+	buckets |= buckets >> 2;
+	buckets |= buckets >> 4;
+	buckets |= buckets >> 8;
+	buckets |= buckets >> 16;
+
+	b_max = buckets; /* Next power */
+	b_min = buckets >> 1; /* Previous power */
+
+	/* Calculate the 'nearest' bucket size */
+	if (b_min * 4000 / 3000 < b_ori)
+		buckets = b_max;
+	else
+		buckets = b_min;
+
+	if (ht) {	/* see if we can reuse */
+		if (buckets <= ht->buckets) {
+			ht->buckets = buckets;
+		} else {
+			/* free pointers if not allocated inline */
+			if (ht->ht != (void *)(ht + 1))
+				free(ht->ht, M_DN_HEAP);
+			free(ht, M_DN_HEAP);
+			ht = NULL;
+		}
+	}
+	if (ht == NULL) {
+		/* Allocate buckets + 1 entries because buckets is use to
+		 * do the AND with the index returned by hash function
+		 */
+		l = sizeof(*ht) + (buckets + 1) * sizeof(void **);
+		ht = malloc(l, M_DN_HEAP, M_NOWAIT | M_ZERO);
+	}
+	if (ht) {
+		ht->ht = (void **)(ht + 1);
+		ht->buckets = buckets;
+		ht->ofs = ofs;
+		ht->hash = h;
+		ht->match = match;
+		ht->newh = newh;
+	}
+	return ht;
+}
+
+/* dummy callback for dn_ht_free to unlink all */
+static int
+do_del(void *obj, void *arg)
+{
+	return DNHT_SCAN_DEL;
+}
+
+void
+dn_ht_free(struct dn_ht *ht, int flags)
+{
+	if (ht == NULL)
+		return;
+	if (flags & DNHT_REMOVE) {
+		(void)dn_ht_scan(ht, do_del, NULL);
+	} else {
+		if (ht->ht && ht->ht != (void *)(ht + 1))
+			free(ht->ht, M_DN_HEAP);
+		free(ht, M_DN_HEAP);
+	}
+}
+
+int
+dn_ht_entries(struct dn_ht *ht)
+{
+	return ht ? ht->entries : 0;
+}
+
+/* lookup and optionally create or delete element */
+void *
+dn_ht_find(struct dn_ht *ht, uintptr_t key, int flags, void *arg)
+{
+	int i;
+	void **pp, *p;
+
+	if (ht == NULL)	/* easy on an empty hash */
+		return NULL;
+	i = (ht->buckets == 1) ? 0 :
+		(ht->hash(key, flags, arg) & ht->buckets);
+
+	for (pp = &ht->ht[i]; (p = *pp); pp = (void **)((char *)p + ht->ofs)) {
+		if (flags & DNHT_MATCH_PTR) {
+			if (key == (uintptr_t)p)
+				break;
+		} else if (ht->match(p, key, flags, arg)) /* found match */
+			break;
+	}
+	if (p) {
+		if (flags & DNHT_REMOVE) {
+			/* link in the next element */
+			*pp = *(void **)((char *)p + ht->ofs);
+			*(void **)((char *)p + ht->ofs) = NULL;
+			ht->entries--;
+		}
+	} else if (flags & DNHT_INSERT) {
+		// printf("%s before calling new, bucket %d ofs %d\n",
+		//	__FUNCTION__, i, ht->ofs);
+		p = ht->newh ? ht->newh(key, flags, arg) : (void *)key;
+		// printf("%s newh returns %p\n", __FUNCTION__, p);
+		if (p) {
+			ht->entries++;
+			*(void **)((char *)p + ht->ofs) = ht->ht[i];
+			ht->ht[i] = p;
+		}
+	}
+	return p;
+}
+
+/*
+ * do a scan with the option to delete the object. Extract next before
+ * running the callback because the element may be destroyed there.
+ */
+int
+dn_ht_scan(struct dn_ht *ht, int (*fn)(void *, void *), void *arg)
+{
+	int i, ret, found = 0;
+	void **curp, *cur, *next;
+
+	if (ht == NULL || fn == NULL)
+		return 0;
+	for (i = 0; i <= ht->buckets; i++) {
+		curp = &ht->ht[i];
+		while ( (cur = *curp) != NULL) {
+			next = *(void **)((char *)cur + ht->ofs);
+			ret = fn(cur, arg);
+			if (ret & DNHT_SCAN_DEL) {
+				found++;
+				ht->entries--;
+				*curp = next;
+			} else {
+				curp = (void **)((char *)cur + ht->ofs);
+			}
+			if (ret & DNHT_SCAN_END)
+				return found;
+		}
+	}
+	return found;
+}
+
+/*
+ * Similar to dn_ht_scan(), except thah the scan is performed only
+ * in the bucket 'bucket'. The function returns a correct bucket number if
+ * the original is invalid
+ */
+int
+dn_ht_scan_bucket(struct dn_ht *ht, int *bucket, int (*fn)(void *, void *),
+		 void *arg)
+{
+	int i, ret, found = 0;
+	void **curp, *cur, *next;
+
+	if (ht == NULL || fn == NULL)
+		return 0;
+	if (*bucket > ht->buckets)
+		*bucket = 0;
+	i = *bucket;
+
+	curp = &ht->ht[i];
+	while ( (cur = *curp) != NULL) {
+		next = *(void **)((char *)cur + ht->ofs);
+		ret = fn(cur, arg);
+		if (ret & DNHT_SCAN_DEL) {
+			found++;
+			ht->entries--;
+			*curp = next;
+		} else {
+			curp = (void **)((char *)cur + ht->ofs);
+		}
+		if (ret & DNHT_SCAN_END)
+			return found;
+	}
+	return found;
+}
+
diff --git a/freebsd/sys/netinet/ipfw/dn_heap.h b/freebsd/sys/netinet/ipfw/dn_heap.h
new file mode 100644
index 00000000..c95473ad
--- /dev/null
+++ b/freebsd/sys/netinet/ipfw/dn_heap.h
@@ -0,0 +1,191 @@
+/*-
+ * Copyright (c) 1998-2010 Luigi Rizzo, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Binary heap and hash tables, header file
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_DN_HEAP_H
+#define _IP_DN_HEAP_H
+
+#define DN_KEY_LT(a,b)     ((int64_t)((a)-(b)) < 0)
+#define DN_KEY_LEQ(a,b)    ((int64_t)((a)-(b)) <= 0)
+
+/*
+ * This module implements a binary heap supporting random extraction.
+ *
+ * A heap entry contains an uint64_t key and a pointer to object.
+ * DN_KEY_LT(a,b) returns true if key 'a' is smaller than 'b'
+ *
+ * The heap is a struct dn_heap plus a dynamically allocated
+ * array of dn_heap_entry entries. 'size' represents the size of
+ * the array, 'elements' count entries in use. The topmost
+ * element has the smallest key.
+ * The heap supports ordered insert, and extract from the top.
+ * To extract an object from the middle of the heap, we the object
+ * must reserve an 'int32_t' to store the position of the object
+ * in the heap itself, and the location of this field must be
+ * passed as an argument to heap_init() -- use -1 if the feature
+ * is not used.
+ */
+struct dn_heap_entry {
+	uint64_t key;	/* sorting key, smallest comes first */
+	void *object;	/* object pointer */
+};
+
+struct dn_heap {
+	int size;	/* the size of the array */
+	int elements;	/* elements in use */
+	int ofs;	/* offset in the object of heap index */
+	struct dn_heap_entry *p;	/* array of "size" entries */
+};
+
+enum {
+	HEAP_SCAN_DEL = 1,
+	HEAP_SCAN_END = 2,
+};
+
+/*
+ * heap_init() reinitializes the heap setting the size and the offset
+ *	of the index for random extraction (use -1 if not used).
+ *	The 'elements' counter is set to 0.
+ *
+ * SET_HEAP_OFS() indicates where, in the object, is stored the index
+ *	for random extractions from the heap.
+ *
+ * heap_free() frees the memory associated to a heap.
+ *
+ * heap_insert() adds a key-pointer pair to the heap
+ *
+ * HEAP_TOP() returns a pointer to the top element of the heap,
+ *	but makes no checks on its existance (XXX should we change ?)
+ *
+ * heap_extract() removes the entry at the top, returing the pointer.
+ *	(the key should have been read before).
+ *
+ * heap_scan() invokes a callback on each entry of the heap.
+ *	The callback can return a combination of HEAP_SCAN_DEL and
+ *	HEAP_SCAN_END. HEAP_SCAN_DEL means the current element must
+ *	be removed, and HEAP_SCAN_END means to terminate the scan.
+ *	heap_scan() returns the number of elements removed.
+ *	Because the order is not guaranteed, we should use heap_scan()
+ *	only as a last resort mechanism.
+ */
+#define HEAP_TOP(h)	((h)->p)
+#define SET_HEAP_OFS(h, n)	do { (h)->ofs = n; } while (0)
+int     heap_init(struct dn_heap *h, int size, int ofs);
+int     heap_insert(struct dn_heap *h, uint64_t key1, void *p);
+void    heap_extract(struct dn_heap *h, void *obj);
+void heap_free(struct dn_heap *h);
+int heap_scan(struct dn_heap *, int (*)(void *, uintptr_t), uintptr_t);
+
+/*------------------------------------------------------
+ * This module implements a generic hash table with support for
+ * running callbacks on the entire table. To avoid allocating
+ * memory during hash table operations, objects must reserve
+ * space for a link field. XXX if the heap is moderately full,
+ * an SLIST suffices, and we can tolerate the cost of a hash
+ * computation on each removal.
+ *
+ * dn_ht_init() initializes the table, setting the number of
+ *	buckets, the offset of the link field, the main callbacks.
+ *	Callbacks are:
+ * 
+ *	hash(key, flags, arg) called to return a bucket index.
+ *	match(obj, key, flags, arg) called to determine if key
+ *		matches the current 'obj' in the heap
+ *	newh(key, flags, arg) optional, used to allocate a new
+ *		object during insertions.
+ *
+ * dn_ht_free() frees the heap or unlink elements.
+ *	DNHT_REMOVE unlink elements, 0 frees the heap.
+ *	You need two calls to do both.
+ *
+ * dn_ht_find() is the main lookup function, which can also be
+ *	used to insert or delete elements in the hash table.
+ *	The final 'arg' is passed to all callbacks.
+ *
+ * dn_ht_scan() is used to invoke a callback on all entries of
+ *	the heap, or possibly on just one bucket. The callback
+ *	is invoked with a pointer to the object, and must return
+ *	one of DNHT_SCAN_DEL or DNHT_SCAN_END to request the
+ *	removal of the object from the heap and the end of the
+ *	scan, respectively.
+ *
+ * dn_ht_scan_bucket() is similar to dn_ht_scan(), except that it scans
+ *	only the specific bucket of the table. The bucket is a in-out
+ *	parameter and return a valid bucket number if the original
+ *	is invalid.
+ *
+ * A combination of flags can be used to modify the operation
+ * of the dn_ht_find(), and of the callbacks:
+ *
+ * DNHT_KEY_IS_OBJ	means the key is the object pointer.
+ *	It is usally of interest for the hash and match functions.
+ *
+ * DNHT_MATCH_PTR	during a lookup, match pointers instead
+ *	of calling match(). Normally used when removing specific
+ *	entries. Does not imply KEY_IS_OBJ as the latter _is_ used
+ *	by the match function.
+ *
+ * DNHT_INSERT		insert the element if not found.
+ *	Calls new() to allocates a new object unless
+ *	DNHT_KEY_IS_OBJ is set.
+ *
+ * DNHT_UNIQUE		only insert if object not found.
+ *	XXX should it imply DNHT_INSERT ?
+ *
+ * DNHT_REMOVE		remove objects if we find them.
+ */
+struct dn_ht;	/* should be opaque */
+
+struct dn_ht *dn_ht_init(struct dn_ht *, int buckets, int ofs, 
+        uint32_t (*hash)(uintptr_t, int, void *),
+        int (*match)(void *, uintptr_t, int, void *),
+        void *(*newh)(uintptr_t, int, void *));
+void dn_ht_free(struct dn_ht *, int flags);
+
+void *dn_ht_find(struct dn_ht *, uintptr_t, int, void *);
+int dn_ht_scan(struct dn_ht *, int (*)(void *, void *), void *);
+int dn_ht_scan_bucket(struct dn_ht *, int * , int (*)(void *, void *), void *);
+int dn_ht_entries(struct dn_ht *);
+
+enum {  /* flags values.
+	 * first two are returned by the scan callback to indicate
+	 * to delete the matching element or to end the scan
+	 */
+        DNHT_SCAN_DEL	= 0x0001,
+        DNHT_SCAN_END	= 0x0002,
+        DNHT_KEY_IS_OBJ	= 0x0004,	/* key is the obj pointer */
+        DNHT_MATCH_PTR	= 0x0008,	/* match by pointer, not match() */
+        DNHT_INSERT	= 0x0010,	/* insert if not found */
+        DNHT_UNIQUE	= 0x0020,	/* report error if already there */
+        DNHT_REMOVE	= 0x0040,	/* remove on find or dn_ht_free */
+}; 
+
+#endif /* _IP_DN_HEAP_H */
diff --git a/freebsd/sys/netinet/ipfw/dn_sched.h b/freebsd/sys/netinet/ipfw/dn_sched.h
new file mode 100644
index 00000000..fe54b020
--- /dev/null
+++ b/freebsd/sys/netinet/ipfw/dn_sched.h
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2010 Riccardo Panicucci, Luigi Rizzo, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * The API to write a packet scheduling algorithm for dummynet.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _DN_SCHED_H
+#define _DN_SCHED_H
+
+#define	DN_MULTIQUEUE	0x01
+/*
+ * Descriptor for a scheduling algorithm.
+ * Contains all function pointers for a given scheduler
+ * This is typically created when a module is loaded, and stored
+ * in a global list of schedulers.
+ */
+struct dn_alg {
+	uint32_t type;           /* the scheduler type */
+	const char *name;   /* scheduler name */
+	uint32_t flags;	/* DN_MULTIQUEUE if supports multiple queues */
+
+	/*
+	 * The following define the size of 3 optional data structures
+	 * that may need to be allocated at runtime, and are appended
+	 * to each of the base data structures: scheduler, sched.inst,
+	 * and queue. We don't have a per-flowset structure.
+	 */
+	/*    + parameters attached to the template, e.g.
+	 *	default queue sizes, weights, quantum size, and so on;
+	 */
+	size_t schk_datalen;
+
+	/*    + per-instance parameters, such as timestamps,
+	 *	containers for queues, etc;
+	 */
+	size_t si_datalen;
+
+	size_t q_datalen;	/* per-queue parameters (e.g. S,F) */
+
+	/*
+	 * Methods implemented by the scheduler:
+	 * enqueue	enqueue packet 'm' on scheduler 's', queue 'q'.
+	 *	q is NULL for !MULTIQUEUE.
+	 *	Return 0 on success, 1 on drop (packet consumed anyways).
+	 *	Note that q should be interpreted only as a hint
+	 *	on the flow that the mbuf belongs to: while a
+	 *	scheduler will normally enqueue m into q, it is ok
+	 *	to leave q alone and put the mbuf elsewhere.
+	 *	This function is called in two cases:
+	 *	 - when a new packet arrives to the scheduler;
+	 *	 - when a scheduler is reconfigured. In this case the
+	 *	   call is issued by the new_queue callback, with a 
+	 *	   non empty queue (q) and m pointing to the first
+	 *	   mbuf in the queue. For this reason, the function
+	 *	   should internally check for (m != q->mq.head)
+	 *	   before calling dn_enqueue().
+	 *
+	 * dequeue	Called when scheduler instance 's' can
+	 *	dequeue a packet. Return NULL if none are available.
+	 *	XXX what about non work-conserving ?
+	 *
+	 * config	called on 'sched X config ...', normally writes
+	 *	in the area of size sch_arg
+	 *
+	 * destroy	called on 'sched delete', frees everything
+	 *	in sch_arg (other parts are handled by more specific
+	 *	functions)
+	 *
+	 * new_sched    called when a new instance is created, e.g.
+	 *	to create the local queue for !MULTIQUEUE, set V or
+	 *	copy parameters for WFQ, and so on.
+	 *
+	 * free_sched	called when deleting an instance, cleans
+	 *	extra data in the per-instance area.
+	 *
+	 * new_fsk	called when a flowset is linked to a scheduler,
+	 *	e.g. to validate parameters such as weights etc.
+	 * free_fsk	when a flowset is unlinked from a scheduler.
+	 *	(probably unnecessary)
+	 *
+	 * new_queue	called to set the per-queue parameters,
+	 *	e.g. S and F, adjust sum of weights in the parent, etc.
+	 *
+	 *	The new_queue callback is normally called from when
+	 *	creating a new queue. In some cases (such as a
+	 *	scheduler change or reconfiguration) it can be called
+	 *	with a non empty queue. In this case, the queue
+	 *	In case of non empty queue, the new_queue callback could
+	 *	need to call the enqueue function. In this case,
+	 *	the callback should eventually call enqueue() passing
+	 *	as m the first element in the queue.
+	 *
+	 * free_queue	actions related to a queue removal, e.g. undo
+	 *	all the above. If the queue has data in it, also remove
+	 *	from the scheduler. This can e.g. happen during a reconfigure.
+	 */
+	int (*enqueue)(struct dn_sch_inst *, struct dn_queue *,
+		struct mbuf *);
+	struct mbuf * (*dequeue)(struct dn_sch_inst *);
+
+	int (*config)(struct dn_schk *);
+	int (*destroy)(struct dn_schk*);
+	int (*new_sched)(struct dn_sch_inst *);
+	int (*free_sched)(struct dn_sch_inst *);
+	int (*new_fsk)(struct dn_fsk *f);
+	int (*free_fsk)(struct dn_fsk *f);
+	int (*new_queue)(struct dn_queue *q);
+	int (*free_queue)(struct dn_queue *q);
+
+	/* run-time fields */
+	int ref_count;      /* XXX number of instances in the system */
+	SLIST_ENTRY(dn_alg) next; /* Next scheduler in the list */
+};
+
+/* MSVC does not support initializers so we need this ugly macro */
+#ifdef _WIN32
+#define _SI(fld)        
+#else
+#define _SI(fld)        fld
+#endif
+
+/*
+ * Additionally, dummynet exports some functions and macros
+ * to be used by schedulers:
+ */
+
+void dn_free_pkts(struct mbuf *mnext);
+int dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop);
+/* bound a variable between min and max */
+int ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg);
+
+/*
+ * Extract the head of a queue, update stats. Must be the very last
+ * thing done on a dequeue as the queue itself may go away.
+ */
+static __inline struct mbuf*
+dn_dequeue(struct dn_queue *q)
+{
+	struct mbuf *m = q->mq.head;
+	if (m == NULL)
+		return NULL;
+	q->mq.head = m->m_nextpkt;
+	q->ni.length--;
+	q->ni.len_bytes -= m->m_pkthdr.len;
+	if (q->_si) {
+		q->_si->ni.length--;
+		q->_si->ni.len_bytes -= m->m_pkthdr.len;
+	}
+	if (q->ni.length == 0) /* queue is now idle */
+		q->q_time = dn_cfg.curr_time;
+	return m;
+}
+
+int dn_sched_modevent(module_t mod, int cmd, void *arg);
+
+#define DECLARE_DNSCHED_MODULE(name, dnsched)			\
+	static moduledata_t name##_mod = {			\
+		#name, dn_sched_modevent, dnsched		\
+	};							\
+	DECLARE_MODULE(name, name##_mod, 			\
+		SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); 	\
+        MODULE_DEPEND(name, dummynet, 3, 3, 3);
+#endif /* _DN_SCHED_H */
diff --git a/freebsd/sys/netinet/ipfw/dn_sched_fifo.c b/freebsd/sys/netinet/ipfw/dn_sched_fifo.c
new file mode 100644
index 00000000..6d5a4a12
--- /dev/null
+++ b/freebsd/sys/netinet/ipfw/dn_sched_fifo.c
@@ -0,0 +1,122 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*
+ * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ */
+
+#ifdef _KERNEL
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/net/if.h>	/* IFNAMSIZ */
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip_var.h>		/* ipfw_rule_ref */
+#include <freebsd/netinet/ip_fw.h>	/* flow_id */
+#include <freebsd/netinet/ip_dummynet.h>
+#include <freebsd/netinet/ipfw/dn_heap.h>
+#include <freebsd/netinet/ipfw/ip_dn_private.h>
+#include <freebsd/netinet/ipfw/dn_sched.h>
+#else
+#include <freebsd/dn_test.h>
+#endif
+
+/*
+ * This file implements a FIFO scheduler for a single queue.
+ * The queue is allocated as part of the scheduler instance,
+ * and there is a single flowset is in the template which stores
+ * queue size and policy.
+ * Enqueue and dequeue use the default library functions.
+ */
+static int 
+fifo_enqueue(struct dn_sch_inst *si, struct dn_queue *q, struct mbuf *m)
+{
+	/* XXX if called with q != NULL and m=NULL, this is a
+	 * re-enqueue from an existing scheduler, which we should
+	 * handle.
+	 */
+	return dn_enqueue((struct dn_queue *)(si+1), m, 0);
+}
+
+static struct mbuf *
+fifo_dequeue(struct dn_sch_inst *si)
+{
+	return dn_dequeue((struct dn_queue *)(si + 1));
+}
+
+static int
+fifo_new_sched(struct dn_sch_inst *si)
+{
+	/* This scheduler instance contains the queue */
+	struct dn_queue *q = (struct dn_queue *)(si + 1);
+
+        set_oid(&q->ni.oid, DN_QUEUE, sizeof(*q));
+	q->_si = si;
+	q->fs = si->sched->fs;
+	return 0;
+}
+
+static int
+fifo_free_sched(struct dn_sch_inst *si)
+{
+	struct dn_queue *q = (struct dn_queue *)(si + 1);
+	dn_free_pkts(q->mq.head);
+	bzero(q, sizeof(*q));
+	return 0;
+}
+
+/*
+ * FIFO scheduler descriptor
+ * contains the type of the scheduler, the name, the size of extra
+ * data structures, and function pointers.
+ */
+static struct dn_alg fifo_desc = {
+	_SI( .type = )  DN_SCHED_FIFO,
+	_SI( .name = )  "FIFO",
+	_SI( .flags = ) 0,
+
+	_SI( .schk_datalen = ) 0,
+	_SI( .si_datalen = )  sizeof(struct dn_queue),
+	_SI( .q_datalen = )  0,
+
+	_SI( .enqueue = )  fifo_enqueue,
+	_SI( .dequeue = )  fifo_dequeue,
+	_SI( .config = )  NULL,
+	_SI( .destroy = )  NULL,
+	_SI( .new_sched = )  fifo_new_sched,
+	_SI( .free_sched = )  fifo_free_sched,
+	_SI( .new_fsk = )  NULL,
+	_SI( .free_fsk = )  NULL,
+	_SI( .new_queue = )  NULL,
+	_SI( .free_queue = )  NULL,
+};
+
+DECLARE_DNSCHED_MODULE(dn_fifo, &fifo_desc);
diff --git a/freebsd/sys/netinet/ipfw/dn_sched_prio.c b/freebsd/sys/netinet/ipfw/dn_sched_prio.c
new file mode 100644
index 00000000..c6b6027c
--- /dev/null
+++ b/freebsd/sys/netinet/ipfw/dn_sched_prio.c
@@ -0,0 +1,231 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*
+ * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ */
+#ifdef _KERNEL
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/net/if.h>	/* IFNAMSIZ */
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip_var.h>		/* ipfw_rule_ref */
+#include <freebsd/netinet/ip_fw.h>	/* flow_id */
+#include <freebsd/netinet/ip_dummynet.h>
+#include <freebsd/netinet/ipfw/dn_heap.h>
+#include <freebsd/netinet/ipfw/ip_dn_private.h>
+#include <freebsd/netinet/ipfw/dn_sched.h>
+#else
+#include <freebsd/dn_test.h>
+#endif
+
+#define DN_SCHED_PRIO	5 //XXX
+
+#if !defined(_KERNEL) || !defined(__linux__)
+#define test_bit(ix, pData)	((*pData) & (1<<(ix)))
+#define __set_bit(ix, pData)	(*pData) |= (1<<(ix))
+#define __clear_bit(ix, pData)	(*pData) &= ~(1<<(ix))
+#endif
+
+#ifdef __MIPSEL__
+#define __clear_bit(ix, pData)	(*pData) &= ~(1<<(ix))
+#endif
+
+/* Size of the array of queues pointers. */
+#define BITMAP_T	unsigned long
+#define MAXPRIO		(sizeof(BITMAP_T) * 8)
+
+/*
+ * The scheduler instance contains an array of pointers to queues,
+ * one for each priority, and a bitmap listing backlogged queues.
+ */
+struct prio_si {
+	BITMAP_T bitmap;			/* array bitmap */
+	struct dn_queue *q_array[MAXPRIO];	/* Array of queues pointers */
+};
+
+/*
+ * If a queue with the same priority is already backlogged, use
+ * that one instead of the queue passed as argument.
+ */
+static int 
+prio_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
+{
+	struct prio_si *si = (struct prio_si *)(_si + 1);
+	int prio = q->fs->fs.par[0];
+
+	if (test_bit(prio, &si->bitmap) == 0) {
+		/* No queue with this priority, insert */
+		__set_bit(prio, &si->bitmap);
+		si->q_array[prio] = q;
+	} else { /* use the existing queue */
+		q = si->q_array[prio];
+	}
+	if (dn_enqueue(q, m, 0))
+		return 1;
+	return 0;
+}
+
+/*
+ * Packets are dequeued only from the highest priority queue.
+ * The function ffs() return the lowest bit in the bitmap that rapresent
+ * the array index (-1) which contains the pointer to the highest priority
+ * queue.
+ * After the dequeue, if this queue become empty, it is index is removed
+ * from the bitmap.
+ * Scheduler is idle if the bitmap is empty
+ *
+ * NOTE: highest priority is 0, lowest is sched->max_prio_q
+ */
+static struct mbuf *
+prio_dequeue(struct dn_sch_inst *_si)
+{
+	struct prio_si *si = (struct prio_si *)(_si + 1);
+	struct mbuf *m;
+	struct dn_queue *q;
+	int prio;
+
+	if (si->bitmap == 0) /* scheduler idle */
+		return NULL;
+
+	prio = ffs(si->bitmap) - 1;
+
+	/* Take the highest priority queue in the scheduler */
+	q = si->q_array[prio];
+	// assert(q)
+
+	m = dn_dequeue(q);
+	if (q->mq.head == NULL) {
+		/* Queue is now empty, remove from scheduler
+		 * and mark it
+		 */
+		si->q_array[prio] = NULL;
+		__clear_bit(prio, &si->bitmap);
+	}
+	return m;
+}
+
+static int
+prio_new_sched(struct dn_sch_inst *_si)
+{
+	struct prio_si *si = (struct prio_si *)(_si + 1);
+
+	bzero(si->q_array, sizeof(si->q_array));
+	si->bitmap = 0;
+
+	return 0;
+}
+
+static int
+prio_new_fsk(struct dn_fsk *fs)
+{
+	/* Check if the prioritiy is between 0 and MAXPRIO-1 */
+	ipdn_bound_var(&fs->fs.par[0], 0, 0, MAXPRIO - 1, "PRIO priority");
+	return 0;
+}
+
+static int
+prio_new_queue(struct dn_queue *q)
+{
+	struct prio_si *si = (struct prio_si *)(q->_si + 1);
+	int prio = q->fs->fs.par[0];
+	struct dn_queue *oldq;
+
+	q->ni.oid.subtype = DN_SCHED_PRIO;
+
+	if (q->mq.head == NULL)
+		return 0;
+
+	/* Queue already full, must insert in the scheduler or append
+	 * mbufs to existing queue. This partly duplicates prio_enqueue
+	 */
+	if (test_bit(prio, &si->bitmap) == 0) {
+		/* No queue with this priority, insert */
+		__set_bit(prio, &si->bitmap);
+		si->q_array[prio] = q;
+	} else if ( (oldq = si->q_array[prio]) != q) {
+		/* must append to the existing queue.
+		 * can simply append q->mq.head to q2->...
+		 * and add the counters to those of q2
+		 */
+		oldq->mq.tail->m_nextpkt = q->mq.head;
+		oldq->mq.tail = q->mq.tail;
+		oldq->ni.length += q->ni.length;
+		q->ni.length = 0;
+		oldq->ni.len_bytes += q->ni.len_bytes;
+		q->ni.len_bytes = 0;
+		q->mq.tail = q->mq.head = NULL;
+	}
+	return 0;
+}
+
+static int
+prio_free_queue(struct dn_queue *q)
+{
+	int prio = q->fs->fs.par[0];
+	struct prio_si *si = (struct prio_si *)(q->_si + 1);
+
+	if (si->q_array[prio] == q) {
+		si->q_array[prio] = NULL;
+		__clear_bit(prio, &si->bitmap);
+	}
+	return 0;
+}
+
+
+static struct dn_alg prio_desc = {
+	_SI( .type = ) DN_SCHED_PRIO,
+	_SI( .name = ) "PRIO",
+	_SI( .flags = ) DN_MULTIQUEUE,
+
+	/* we need extra space in the si and the queue */
+	_SI( .schk_datalen = ) 0,
+	_SI( .si_datalen = ) sizeof(struct prio_si),
+	_SI( .q_datalen = ) 0,
+
+	_SI( .enqueue = ) prio_enqueue,
+	_SI( .dequeue = ) prio_dequeue,
+
+	_SI( .config = )  NULL,
+	_SI( .destroy = )  NULL,
+	_SI( .new_sched = ) prio_new_sched,
+	_SI( .free_sched = ) NULL,
+
+	_SI( .new_fsk = ) prio_new_fsk,
+	_SI( .free_fsk = )  NULL,
+
+	_SI( .new_queue = ) prio_new_queue,
+	_SI( .free_queue = ) prio_free_queue,
+};
+
+
+DECLARE_DNSCHED_MODULE(dn_prio, &prio_desc);
diff --git a/freebsd/sys/netinet/ipfw/dn_sched_qfq.c b/freebsd/sys/netinet/ipfw/dn_sched_qfq.c
new file mode 100644
index 00000000..23890199
--- /dev/null
+++ b/freebsd/sys/netinet/ipfw/dn_sched_qfq.c
@@ -0,0 +1,866 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*
+ * Copyright (c) 2010 Fabio Checconi, Luigi Rizzo, Paolo Valente
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ */
+
+#ifdef _KERNEL
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/net/if.h>	/* IFNAMSIZ */
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip_var.h>		/* ipfw_rule_ref */
+#include <freebsd/netinet/ip_fw.h>	/* flow_id */
+#include <freebsd/netinet/ip_dummynet.h>
+#include <freebsd/netinet/ipfw/dn_heap.h>
+#include <freebsd/netinet/ipfw/ip_dn_private.h>
+#include <freebsd/netinet/ipfw/dn_sched.h>
+#else
+#include <freebsd/dn_test.h>
+#endif
+
+#ifdef QFQ_DEBUG
+struct qfq_sched;
+static void dump_sched(struct qfq_sched *q, const char *msg);
+#define	NO(x)	x
+#else
+#define NO(x)
+#endif
+#define DN_SCHED_QFQ	4 // XXX Where?
+typedef	unsigned long	bitmap;
+
+/*
+ * bitmaps ops are critical. Some linux versions have __fls
+ * and the bitmap ops. Some machines have ffs
+ */
+#if defined(_WIN32)
+int fls(unsigned int n)
+{
+	int i = 0;
+	for (i = 0; n > 0; n >>= 1, i++)
+		;
+	return i;
+}
+#endif
+
+#if !defined(_KERNEL) || defined( __FreeBSD__ ) || defined(_WIN32)
+static inline unsigned long __fls(unsigned long word)
+{
+	return fls(word) - 1;
+}
+#endif
+
+#if !defined(_KERNEL) || !defined(__linux__)
+#ifdef QFQ_DEBUG
+int test_bit(int ix, bitmap *p)
+{
+	if (ix < 0 || ix > 31)
+		D("bad index %d", ix);
+	return *p & (1<<ix);
+}
+void __set_bit(int ix, bitmap *p)
+{
+	if (ix < 0 || ix > 31)
+		D("bad index %d", ix);
+	*p |= (1<<ix);
+}
+void __clear_bit(int ix, bitmap *p)
+{
+	if (ix < 0 || ix > 31)
+		D("bad index %d", ix);
+	*p &= ~(1<<ix);
+}
+#else /* !QFQ_DEBUG */
+/* XXX do we have fast version, or leave it to the compiler ? */
+#define test_bit(ix, pData)	((*pData) & (1<<(ix)))
+#define __set_bit(ix, pData)	(*pData) |= (1<<(ix))
+#define __clear_bit(ix, pData)	(*pData) &= ~(1<<(ix))
+#endif /* !QFQ_DEBUG */
+#endif /* !__linux__ */
+
+#ifdef __MIPSEL__
+#define __clear_bit(ix, pData) (*pData) &= ~(1<<(ix))
+#endif
+
+/*-------------------------------------------*/
+/*
+
+Virtual time computations.
+
+S, F and V are all computed in fixed point arithmetic with
+FRAC_BITS decimal bits.
+
+   QFQ_MAX_INDEX is the maximum index allowed for a group. We need
+  	one bit per index.
+   QFQ_MAX_WSHIFT is the maximum power of two supported as a weight.
+   The layout of the bits is as below:
+  
+                   [ MTU_SHIFT ][      FRAC_BITS    ]
+                   [ MAX_INDEX    ][ MIN_SLOT_SHIFT ]
+  				 ^.__grp->index = 0
+  				 *.__grp->slot_shift
+  
+   where MIN_SLOT_SHIFT is derived by difference from the others.
+
+The max group index corresponds to Lmax/w_min, where
+Lmax=1<<MTU_SHIFT, w_min = 1 .
+From this, and knowing how many groups (MAX_INDEX) we want,
+we can derive the shift corresponding to each group.
+
+Because we often need to compute
+	F = S + len/w_i  and V = V + len/wsum
+instead of storing w_i store the value
+	inv_w = (1<<FRAC_BITS)/w_i
+so we can do F = S + len * inv_w * wsum.
+We use W_TOT in the formulas so we can easily move between
+static and adaptive weight sum.
+
+The per-scheduler-instance data contain all the data structures
+for the scheduler: bitmaps and bucket lists.
+
+ */
+/*
+ * Maximum number of consecutive slots occupied by backlogged classes
+ * inside a group. This is approx lmax/lmin + 5.
+ * XXX check because it poses constraints on MAX_INDEX
+ */
+#define QFQ_MAX_SLOTS	32
+/*
+ * Shifts used for class<->group mapping. Class weights are
+ * in the range [1, QFQ_MAX_WEIGHT], we to map each class i to the
+ * group with the smallest index that can support the L_i / r_i
+ * configured for the class.
+ *
+ * grp->index is the index of the group; and grp->slot_shift
+ * is the shift for the corresponding (scaled) sigma_i.
+ *
+ * When computing the group index, we do (len<<FP_SHIFT)/weight,
+ * then compute an FLS (which is like a log2()), and if the result
+ * is below the MAX_INDEX region we use 0 (which is the same as
+ * using a larger len).
+ */
+#define QFQ_MAX_INDEX		19
+#define QFQ_MAX_WSHIFT		16	/* log2(max_weight) */
+
+#define	QFQ_MAX_WEIGHT		(1<<QFQ_MAX_WSHIFT)
+#define QFQ_MAX_WSUM		(2*QFQ_MAX_WEIGHT)
+//#define IWSUM	(q->i_wsum)
+#define IWSUM	((1<<FRAC_BITS)/QFQ_MAX_WSUM)
+
+#define FRAC_BITS		30	/* fixed point arithmetic */
+#define ONE_FP			(1UL << FRAC_BITS)
+
+#define QFQ_MTU_SHIFT		11	/* log2(max_len) */
+#define QFQ_MIN_SLOT_SHIFT	(FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
+
+/*
+ * Possible group states, also indexes for the bitmaps array in
+ * struct qfq_queue. We rely on ER, IR, EB, IB being numbered 0..3
+ */
+enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
+
+struct qfq_group;
+/*
+ * additional queue info. Some of this info should come from
+ * the flowset, we copy them here for faster processing.
+ * This is an overlay of the struct dn_queue
+ */
+struct qfq_class {
+	struct dn_queue _q;
+	uint64_t S, F;		/* flow timestamps (exact) */
+	struct qfq_class *next; /* Link for the slot list. */
+
+	/* group we belong to. In principle we would need the index,
+	 * which is log_2(lmax/weight), but we never reference it
+	 * directly, only the group.
+	 */
+	struct qfq_group *grp;
+
+	/* these are copied from the flowset. */
+	uint32_t	inv_w;	/* ONE_FP/weight */
+	uint32_t 	lmax;	/* Max packet size for this flow. */
+};
+
+/* Group descriptor, see the paper for details.
+ * Basically this contains the bucket lists
+ */
+struct qfq_group {
+	uint64_t S, F;			/* group timestamps (approx). */
+	unsigned int slot_shift;	/* Slot shift. */
+	unsigned int index;		/* Group index. */
+	unsigned int front;		/* Index of the front slot. */
+	bitmap full_slots;		/* non-empty slots */
+
+	/* Array of lists of active classes. */
+	struct qfq_class *slots[QFQ_MAX_SLOTS];
+};
+
+/* scheduler instance descriptor. */
+struct qfq_sched {
+	uint64_t	V;		/* Precise virtual time. */
+	uint32_t	wsum;		/* weight sum */
+	NO(uint32_t	i_wsum;		/* ONE_FP/w_sum */
+	uint32_t	_queued;	/* debugging */
+	uint32_t	loops;	/* debugging */)
+	bitmap bitmaps[QFQ_MAX_STATE];	/* Group bitmaps. */
+	struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */
+};
+
+/*---- support functions ----------------------------*/
+
+/* Generic comparison function, handling wraparound. */
+static inline int qfq_gt(uint64_t a, uint64_t b)
+{
+	return (int64_t)(a - b) > 0;
+}
+
+/* Round a precise timestamp to its slotted value. */
+static inline uint64_t qfq_round_down(uint64_t ts, unsigned int shift)
+{
+	return ts & ~((1ULL << shift) - 1);
+}
+
+/* return the pointer to the group with lowest index in the bitmap */
+static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
+					unsigned long bitmap)
+{
+	int index = ffs(bitmap) - 1; // zero-based
+	return &q->groups[index];
+}
+
+/*
+ * Calculate a flow index, given its weight and maximum packet length.
+ * index = log_2(maxlen/weight) but we need to apply the scaling.
+ * This is used only once at flow creation.
+ */
+static int qfq_calc_index(uint32_t inv_w, unsigned int maxlen)
+{
+	uint64_t slot_size = (uint64_t)maxlen *inv_w;
+	unsigned long size_map;
+	int index = 0;
+
+	size_map = (unsigned long)(slot_size >> QFQ_MIN_SLOT_SHIFT);
+	if (!size_map)
+		goto out;
+
+	index = __fls(size_map) + 1;	// basically a log_2()
+	index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1)));
+
+	if (index < 0)
+		index = 0;
+
+out:
+	ND("W = %d, L = %d, I = %d\n", ONE_FP/inv_w, maxlen, index);
+	return index;
+}
+/*---- end support functions ----*/
+
+/*-------- API calls --------------------------------*/
+/*
+ * Validate and copy parameters from flowset.
+ */
+static int
+qfq_new_queue(struct dn_queue *_q)
+{
+	struct qfq_sched *q = (struct qfq_sched *)(_q->_si + 1);
+	struct qfq_class *cl = (struct qfq_class *)_q;
+	int i;
+	uint32_t w;	/* approximated weight */
+
+	/* import parameters from the flowset. They should be correct
+	 * already.
+	 */
+	w = _q->fs->fs.par[0];
+	cl->lmax = _q->fs->fs.par[1];
+	if (!w || w > QFQ_MAX_WEIGHT) {
+		w = 1;
+		D("rounding weight to 1");
+	}
+	cl->inv_w = ONE_FP/w;
+	w = ONE_FP/cl->inv_w;	
+	if (q->wsum + w > QFQ_MAX_WSUM)
+		return EINVAL;
+
+	i = qfq_calc_index(cl->inv_w, cl->lmax);
+	cl->grp = &q->groups[i];
+	q->wsum += w;
+	// XXX cl->S = q->V; ?
+	// XXX compute q->i_wsum
+	return 0;
+}
+
+/* remove an empty queue */
+static int
+qfq_free_queue(struct dn_queue *_q)
+{
+	struct qfq_sched *q = (struct qfq_sched *)(_q->_si + 1);
+	struct qfq_class *cl = (struct qfq_class *)_q;
+	if (cl->inv_w) {
+		q->wsum -= ONE_FP/cl->inv_w;
+		cl->inv_w = 0; /* reset weight to avoid run twice */
+	}
+	return 0;
+}
+
+/* Calculate a mask to mimic what would be ffs_from(). */
+static inline unsigned long
+mask_from(unsigned long bitmap, int from)
+{
+	return bitmap & ~((1UL << from) - 1);
+}
+
+/*
+ * The state computation relies on ER=0, IR=1, EB=2, IB=3
+ * First compute eligibility comparing grp->S, q->V,
+ * then check if someone is blocking us and possibly add EB
+ */
+static inline unsigned int
+qfq_calc_state(struct qfq_sched *q, struct qfq_group *grp)
+{
+	/* if S > V we are not eligible */
+	unsigned int state = qfq_gt(grp->S, q->V);
+	unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
+	struct qfq_group *next;
+
+	if (mask) {
+		next = qfq_ffs(q, mask);
+		if (qfq_gt(grp->F, next->F))
+			state |= EB;
+	}
+
+	return state;
+}
+
+/*
+ * In principle
+ *	q->bitmaps[dst] |= q->bitmaps[src] & mask;
+ *	q->bitmaps[src] &= ~mask;
+ * but we should make sure that src != dst
+ */
+static inline void
+qfq_move_groups(struct qfq_sched *q, unsigned long mask, int src, int dst)
+{
+	q->bitmaps[dst] |= q->bitmaps[src] & mask;
+	q->bitmaps[src] &= ~mask;
+}
+
+static inline void
+qfq_unblock_groups(struct qfq_sched *q, int index, uint64_t old_finish)
+{
+	unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
+	struct qfq_group *next;
+
+	if (mask) {
+		next = qfq_ffs(q, mask);
+		if (!qfq_gt(next->F, old_finish))
+			return;
+	}
+
+	mask = (1UL << index) - 1;
+	qfq_move_groups(q, mask, EB, ER);
+	qfq_move_groups(q, mask, IB, IR);
+}
+
+/*
+ * perhaps
+ *
+	old_V ^= q->V;
+	old_V >>= QFQ_MIN_SLOT_SHIFT;
+	if (old_V) {
+		...
+	}
+ *
+ */
+static inline void
+qfq_make_eligible(struct qfq_sched *q, uint64_t old_V)
+{
+	unsigned long mask, vslot, old_vslot;
+
+	vslot = q->V >> QFQ_MIN_SLOT_SHIFT;
+	old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT;
+
+	if (vslot != old_vslot) {
+		mask = (2UL << (__fls(vslot ^ old_vslot))) - 1;
+		qfq_move_groups(q, mask, IR, ER);
+		qfq_move_groups(q, mask, IB, EB);
+	}
+}
+
+/*
+ * XXX we should make sure that slot becomes less than 32.
+ * This is guaranteed by the input values.
+ * roundedS is always cl->S rounded on grp->slot_shift bits.
+ */
+static inline void
+qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl, uint64_t roundedS)
+{
+	uint64_t slot = (roundedS - grp->S) >> grp->slot_shift;
+	unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS;
+
+	cl->next = grp->slots[i];
+	grp->slots[i] = cl;
+	__set_bit(slot, &grp->full_slots);
+}
+
+/*
+ * remove the entry from the slot
+ */
+static inline void
+qfq_front_slot_remove(struct qfq_group *grp)
+{
+	struct qfq_class **h = &grp->slots[grp->front];
+
+	*h = (*h)->next;
+	if (!*h)
+		__clear_bit(0, &grp->full_slots);
+}
+
+/*
+ * Returns the first full queue in a group. As a side effect,
+ * adjust the bucket list so the first non-empty bucket is at
+ * position 0 in full_slots.
+ */
+static inline struct qfq_class *
+qfq_slot_scan(struct qfq_group *grp)
+{
+	int i;
+
+	ND("grp %d full %x", grp->index, grp->full_slots);
+	if (!grp->full_slots)
+		return NULL;
+
+	i = ffs(grp->full_slots) - 1; // zero-based
+	if (i > 0) {
+		grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
+		grp->full_slots >>= i;
+	}
+
+	return grp->slots[grp->front];
+}
+
+/*
+ * adjust the bucket list. When the start time of a group decreases,
+ * we move the index down (modulo QFQ_MAX_SLOTS) so we don't need to
+ * move the objects. The mask of occupied slots must be shifted
+ * because we use ffs() to find the first non-empty slot.
+ * This covers decreases in the group's start time, but what about
+ * increases of the start time ?
+ * Here too we should make sure that i is less than 32
+ */
+static inline void
+qfq_slot_rotate(struct qfq_sched *q, struct qfq_group *grp, uint64_t roundedS)
+{
+	unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
+
+	grp->full_slots <<= i;
+	grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
+}
+
+
+static inline void
+qfq_update_eligible(struct qfq_sched *q, uint64_t old_V)
+{
+	bitmap ineligible;
+
+	ineligible = q->bitmaps[IR] | q->bitmaps[IB];
+	if (ineligible) {
+		if (!q->bitmaps[ER]) {
+			struct qfq_group *grp;
+			grp = qfq_ffs(q, ineligible);
+			if (qfq_gt(grp->S, q->V))
+				q->V = grp->S;
+		}
+		qfq_make_eligible(q, old_V);
+	}
+}
+
+/*
+ * Updates the class, returns true if also the group needs to be updated.
+ */
+static inline int
+qfq_update_class(struct qfq_sched *q, struct qfq_group *grp,
+	    struct qfq_class *cl)
+{
+
+	cl->S = cl->F;
+	if (cl->_q.mq.head == NULL)  {
+		qfq_front_slot_remove(grp);
+	} else {
+		unsigned int len;
+		uint64_t roundedS;
+
+		len = cl->_q.mq.head->m_pkthdr.len;
+		cl->F = cl->S + (uint64_t)len * cl->inv_w;
+		roundedS = qfq_round_down(cl->S, grp->slot_shift);
+		if (roundedS == grp->S)
+			return 0;
+
+		qfq_front_slot_remove(grp);
+		qfq_slot_insert(grp, cl, roundedS);
+	}
+	return 1;
+}
+
+static struct mbuf *
+qfq_dequeue(struct dn_sch_inst *si)
+{
+	struct qfq_sched *q = (struct qfq_sched *)(si + 1);
+	struct qfq_group *grp;
+	struct qfq_class *cl;
+	struct mbuf *m;
+	uint64_t old_V;
+
+	NO(q->loops++;)
+	if (!q->bitmaps[ER]) {
+		NO(if (q->queued)
+			dump_sched(q, "start dequeue");)
+		return NULL;
+	}
+
+	grp = qfq_ffs(q, q->bitmaps[ER]);
+
+	cl = grp->slots[grp->front];
+	/* extract from the first bucket in the bucket list */
+	m = dn_dequeue(&cl->_q);
+
+	if (!m) {
+		D("BUG/* non-workconserving leaf */");
+		return NULL;
+	}
+	NO(q->queued--;)
+	old_V = q->V;
+	q->V += (uint64_t)m->m_pkthdr.len * IWSUM;
+	ND("m is %p F 0x%llx V now 0x%llx", m, cl->F, q->V);
+
+	if (qfq_update_class(q, grp, cl)) {
+		uint64_t old_F = grp->F;
+		cl = qfq_slot_scan(grp);
+		if (!cl) { /* group gone, remove from ER */
+			__clear_bit(grp->index, &q->bitmaps[ER]);
+			// grp->S = grp->F + 1; // XXX debugging only
+		} else {
+			uint64_t roundedS = qfq_round_down(cl->S, grp->slot_shift);
+			unsigned int s;
+
+			if (grp->S == roundedS)
+				goto skip_unblock;
+			grp->S = roundedS;
+			grp->F = roundedS + (2ULL << grp->slot_shift);
+			/* remove from ER and put in the new set */
+			__clear_bit(grp->index, &q->bitmaps[ER]);
+			s = qfq_calc_state(q, grp);
+			__set_bit(grp->index, &q->bitmaps[s]);
+		}
+		/* we need to unblock even if the group has gone away */
+		qfq_unblock_groups(q, grp->index, old_F);
+	}
+
+skip_unblock:
+	qfq_update_eligible(q, old_V);
+	NO(if (!q->bitmaps[ER] && q->queued)
+		dump_sched(q, "end dequeue");)
+
+	return m;
+}
+
+/*
+ * Assign a reasonable start time for a new flow k in group i.
+ * Admissible values for \hat(F) are multiples of \sigma_i
+ * no greater than V+\sigma_i . Larger values mean that
+ * we had a wraparound so we consider the timestamp to be stale.
+ *
+ * If F is not stale and F >= V then we set S = F.
+ * Otherwise we should assign S = V, but this may violate
+ * the ordering in ER. So, if we have groups in ER, set S to
+ * the F_j of the first group j which would be blocking us.
+ * We are guaranteed not to move S backward because
+ * otherwise our group i would still be blocked.
+ */
+static inline void
+qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
+{
+	unsigned long mask;
+	uint32_t limit, roundedF;
+	int slot_shift = cl->grp->slot_shift;
+
+	roundedF = qfq_round_down(cl->F, slot_shift);
+	limit = qfq_round_down(q->V, slot_shift) + (1UL << slot_shift);
+
+	if (!qfq_gt(cl->F, q->V) || qfq_gt(roundedF, limit)) {
+		/* timestamp was stale */
+		mask = mask_from(q->bitmaps[ER], cl->grp->index);
+		if (mask) {
+			struct qfq_group *next = qfq_ffs(q, mask);
+			if (qfq_gt(roundedF, next->F)) {
+				cl->S = next->F;
+				return;
+			}
+		}
+		cl->S = q->V;
+	} else { /* timestamp is not stale */
+		cl->S = cl->F;
+	}
+}
+
+static int
+qfq_enqueue(struct dn_sch_inst *si, struct dn_queue *_q, struct mbuf *m)
+{
+	struct qfq_sched *q = (struct qfq_sched *)(si + 1);
+	struct qfq_group *grp;
+	struct qfq_class *cl = (struct qfq_class *)_q;
+	uint64_t roundedS;
+	int s;
+
+	NO(q->loops++;)
+	DX(4, "len %d flow %p inv_w 0x%x grp %d", m->m_pkthdr.len,
+		_q, cl->inv_w, cl->grp->index);
+	/* XXX verify that the packet obeys the parameters */
+	if (m != _q->mq.head) {
+		if (dn_enqueue(_q, m, 0)) /* packet was dropped */
+			return 1;
+		NO(q->queued++;)
+		if (m != _q->mq.head)
+			return 0;
+	}
+	/* If reach this point, queue q was idle */
+	grp = cl->grp;
+	qfq_update_start(q, cl); /* adjust start time */
+	/* compute new finish time and rounded start. */
+	cl->F = cl->S + (uint64_t)(m->m_pkthdr.len) * cl->inv_w;
+	roundedS = qfq_round_down(cl->S, grp->slot_shift);
+
+	/*
+	 * insert cl in the correct bucket.
+	 * If cl->S >= grp->S we don't need to adjust the
+	 * bucket list and simply go to the insertion phase.
+	 * Otherwise grp->S is decreasing, we must make room
+	 * in the bucket list, and also recompute the group state.
+	 * Finally, if there were no flows in this group and nobody
+	 * was in ER make sure to adjust V.
+	 */
+	if (grp->full_slots) {
+		if (!qfq_gt(grp->S, cl->S))
+			goto skip_update;
+		/* create a slot for this cl->S */
+		qfq_slot_rotate(q, grp, roundedS);
+		/* group was surely ineligible, remove */
+		__clear_bit(grp->index, &q->bitmaps[IR]);
+		__clear_bit(grp->index, &q->bitmaps[IB]);
+	} else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V))
+		q->V = roundedS;
+
+	grp->S = roundedS;
+	grp->F = roundedS + (2ULL << grp->slot_shift); // i.e. 2\sigma_i
+	s = qfq_calc_state(q, grp);
+	__set_bit(grp->index, &q->bitmaps[s]);
+	ND("new state %d 0x%x", s, q->bitmaps[s]);
+	ND("S %llx F %llx V %llx", cl->S, cl->F, q->V);
+skip_update:
+	qfq_slot_insert(grp, cl, roundedS);
+
+	return 0;
+}
+
+
+#if 0
+static inline void
+qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
+	struct qfq_class *cl, struct qfq_class **pprev)
+{
+	unsigned int i, offset;
+	uint64_t roundedS;
+
+	roundedS = qfq_round_down(cl->S, grp->slot_shift);
+	offset = (roundedS - grp->S) >> grp->slot_shift;
+	i = (grp->front + offset) % QFQ_MAX_SLOTS;
+
+#ifdef notyet
+	if (!pprev) {
+		pprev = &grp->slots[i];
+		while (*pprev && *pprev != cl)
+			pprev = &(*pprev)->next;
+	}
+#endif
+
+	*pprev = cl->next;
+	if (!grp->slots[i])
+		__clear_bit(offset, &grp->full_slots);
+}
+
+/*
+ * called to forcibly destroy a queue.
+ * If the queue is not in the front bucket, or if it has
+ * other queues in the front bucket, we can simply remove
+ * the queue with no other side effects.
+ * Otherwise we must propagate the event up.
+ * XXX description to be completed.
+ */
+static void
+qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl,
+				 struct qfq_class **pprev)
+{
+	struct qfq_group *grp = &q->groups[cl->index];
+	unsigned long mask;
+	uint64_t roundedS;
+	int s;
+
+	cl->F = cl->S;	// not needed if the class goes away.
+	qfq_slot_remove(q, grp, cl, pprev);
+
+	if (!grp->full_slots) {
+		/* nothing left in the group, remove from all sets.
+		 * Do ER last because if we were blocking other groups
+		 * we must unblock them.
+		 */
+		__clear_bit(grp->index, &q->bitmaps[IR]);
+		__clear_bit(grp->index, &q->bitmaps[EB]);
+		__clear_bit(grp->index, &q->bitmaps[IB]);
+
+		if (test_bit(grp->index, &q->bitmaps[ER]) &&
+		    !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
+			mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
+			if (mask)
+				mask = ~((1UL << __fls(mask)) - 1);
+			else
+				mask = ~0UL;
+			qfq_move_groups(q, mask, EB, ER);
+			qfq_move_groups(q, mask, IB, IR);
+		}
+		__clear_bit(grp->index, &q->bitmaps[ER]);
+	} else if (!grp->slots[grp->front]) {
+		cl = qfq_slot_scan(grp);
+		roundedS = qfq_round_down(cl->S, grp->slot_shift);
+		if (grp->S != roundedS) {
+			__clear_bit(grp->index, &q->bitmaps[ER]);
+			__clear_bit(grp->index, &q->bitmaps[IR]);
+			__clear_bit(grp->index, &q->bitmaps[EB]);
+			__clear_bit(grp->index, &q->bitmaps[IB]);
+			grp->S = roundedS;
+			grp->F = roundedS + (2ULL << grp->slot_shift);
+			s = qfq_calc_state(q, grp);
+			__set_bit(grp->index, &q->bitmaps[s]);
+		}
+	}
+	qfq_update_eligible(q, q->V);
+}
+#endif
+
+static int
+qfq_new_fsk(struct dn_fsk *f)
+{
+	ipdn_bound_var(&f->fs.par[0], 1, 1, QFQ_MAX_WEIGHT, "qfq weight");
+	ipdn_bound_var(&f->fs.par[1], 1500, 1, 2000, "qfq maxlen");
+	ND("weight %d len %d\n", f->fs.par[0], f->fs.par[1]);
+	return 0;
+}
+
+/*
+ * initialize a new scheduler instance
+ */
+static int
+qfq_new_sched(struct dn_sch_inst *si)
+{
+	struct qfq_sched *q = (struct qfq_sched *)(si + 1);
+	struct qfq_group *grp;
+	int i;
+
+	for (i = 0; i <= QFQ_MAX_INDEX; i++) {
+		grp = &q->groups[i];
+		grp->index = i;
+		grp->slot_shift = QFQ_MTU_SHIFT + FRAC_BITS -
+					(QFQ_MAX_INDEX - i);
+	}
+	return 0;
+}
+
+/*
+ * QFQ scheduler descriptor
+ */
+static struct dn_alg qfq_desc = {
+	_SI( .type = ) DN_SCHED_QFQ,
+	_SI( .name = ) "QFQ",
+	_SI( .flags = ) DN_MULTIQUEUE,
+
+	_SI( .schk_datalen = ) 0,
+	_SI( .si_datalen = ) sizeof(struct qfq_sched),
+	_SI( .q_datalen = ) sizeof(struct qfq_class) - sizeof(struct dn_queue),
+
+	_SI( .enqueue = ) qfq_enqueue,
+	_SI( .dequeue = ) qfq_dequeue,
+
+	_SI( .config = )  NULL,
+	_SI( .destroy = )  NULL,
+	_SI( .new_sched = ) qfq_new_sched,
+	_SI( .free_sched = )  NULL,
+	_SI( .new_fsk = ) qfq_new_fsk,
+	_SI( .free_fsk = )  NULL,
+	_SI( .new_queue = ) qfq_new_queue,
+	_SI( .free_queue = ) qfq_free_queue,
+};
+
+DECLARE_DNSCHED_MODULE(dn_qfq, &qfq_desc);
+
+#ifdef QFQ_DEBUG
+static void
+dump_groups(struct qfq_sched *q, uint32_t mask)
+{
+	int i, j;
+
+	for (i = 0; i < QFQ_MAX_INDEX + 1; i++) {
+		struct qfq_group *g = &q->groups[i];
+
+		if (0 == (mask & (1<<i)))
+			continue;
+		for (j = 0; j < QFQ_MAX_SLOTS; j++) {
+			if (g->slots[j])
+				D("    bucket %d %p", j, g->slots[j]);
+		}
+		D("full_slots 0x%x", g->full_slots);
+		D("        %2d S 0x%20llx F 0x%llx %c", i,
+			g->S, g->F,
+			mask & (1<<i) ? '1' : '0');
+	}
+}
+
+static void
+dump_sched(struct qfq_sched *q, const char *msg)
+{
+	D("--- in %s: ---", msg);
+	ND("loops %d queued %d V 0x%llx", q->loops, q->queued, q->V);
+	D("    ER 0x%08x", q->bitmaps[ER]);
+	D("    EB 0x%08x", q->bitmaps[EB]);
+	D("    IR 0x%08x", q->bitmaps[IR]);
+	D("    IB 0x%08x", q->bitmaps[IB]);
+	dump_groups(q, 0xffffffff);
+};
+#endif /* QFQ_DEBUG */
diff --git a/freebsd/sys/netinet/ipfw/dn_sched_rr.c b/freebsd/sys/netinet/ipfw/dn_sched_rr.c
new file mode 100644
index 00000000..4aa833f6
--- /dev/null
+++ b/freebsd/sys/netinet/ipfw/dn_sched_rr.c
@@ -0,0 +1,309 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*
+ * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ */
+
+#ifdef _KERNEL
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/net/if.h>	/* IFNAMSIZ */
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip_var.h>		/* ipfw_rule_ref */
+#include <freebsd/netinet/ip_fw.h>	/* flow_id */
+#include <freebsd/netinet/ip_dummynet.h>
+#include <freebsd/netinet/ipfw/dn_heap.h>
+#include <freebsd/netinet/ipfw/ip_dn_private.h>
+#include <freebsd/netinet/ipfw/dn_sched.h>
+#else
+#include <freebsd/dn_test.h>
+#endif
+
+#define DN_SCHED_RR	3 // XXX Where?
+
+struct rr_queue {
+	struct dn_queue q;		/* Standard queue */
+	int status;			/* 1: queue is in the list */
+	int credit;			/* Number of bytes to transmit */
+	int quantum;			/* quantum * C */
+	struct rr_queue *qnext;		/* */
+};
+
+/* struct rr_schk contains global config parameters
+ * and is right after dn_schk
+ */
+struct rr_schk {
+	int min_q;		/* Min quantum */
+	int max_q;		/* Max quantum */
+	int q_bytes;		/* Bytes per quantum */
+};
+
+/* per-instance round robin list, right after dn_sch_inst */
+struct rr_si {
+	struct rr_queue *head, *tail;	/* Pointer to current queue */
+};
+
+/* Append a queue to the rr list */
+static inline void
+rr_append(struct rr_queue *q, struct rr_si *si)
+{
+	q->status = 1;		/* mark as in-rr_list */
+	q->credit = q->quantum;	/* initialize credit */
+
+	/* append to the tail */
+	if (si->head == NULL)
+		si->head = q;
+	else
+		si->tail->qnext = q;
+	si->tail = q;		/* advance the tail pointer */
+	q->qnext = si->head;	/* make it circular */
+}
+
+/* Remove the head queue from circular list. */
+static inline void
+rr_remove_head(struct rr_si *si)
+{
+	if (si->head == NULL)
+		return; /* empty queue */
+	si->head->status = 0;
+	
+	if (si->head == si->tail) {
+		si->head = si->tail = NULL;
+		return;
+	}
+
+	si->head = si->head->qnext;
+	si->tail->qnext = si->head;
+}
+
+/* Remove a queue from circular list.
+ * XXX see if ti can be merge with remove_queue()
+ */
+static inline void
+remove_queue_q(struct rr_queue *q, struct rr_si *si)
+{
+	struct rr_queue *prev;
+
+	if (q->status != 1)
+		return;
+	if (q == si->head) {
+		rr_remove_head(si);
+		return;
+	}
+
+	for (prev = si->head; prev; prev = prev->qnext) {
+		if (prev->qnext != q)
+			continue;
+		prev->qnext = q->qnext;
+		if (q == si->tail)
+			si->tail = prev;
+		q->status = 0;
+		break;
+	}
+}
+
+
+static inline void
+next_pointer(struct rr_si *si)
+{
+	if (si->head == NULL)
+		return; /* empty queue */
+
+	si->head = si->head->qnext;
+	si->tail = si->tail->qnext;
+}
+
+static int 
+rr_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
+{
+	struct rr_si *si;
+	struct rr_queue *rrq;
+
+	if (m != q->mq.head) {
+		if (dn_enqueue(q, m, 0)) /* packet was dropped */
+			return 1;
+		if (m != q->mq.head)
+			return 0;
+	}
+
+	/* If reach this point, queue q was idle */ 
+	si = (struct rr_si *)(_si + 1);
+	rrq = (struct rr_queue *)q;
+
+	if (rrq->status == 1) /* Queue is already in the queue list */
+		return 0;
+
+	/* Insert the queue in the queue list */
+	rr_append(rrq, si);
+
+	return 0;
+}
+
+static struct mbuf *
+rr_dequeue(struct dn_sch_inst *_si)
+{
+	/* Access scheduler instance private data */
+	struct rr_si *si = (struct rr_si *)(_si + 1);
+	struct rr_queue *rrq;
+	uint64_t len;
+
+	while ( (rrq = si->head) ) {
+		struct mbuf *m = rrq->q.mq.head;
+		if ( m == NULL) {
+			/* empty queue, remove from list */
+			rr_remove_head(si);
+			continue;
+		}
+		len = m->m_pkthdr.len;
+
+		if (len > rrq->credit) {
+			/* Packet too big */
+			rrq->credit += rrq->quantum;
+			/* Try next queue */
+			next_pointer(si);
+		} else {
+			rrq->credit -= len;
+			return dn_dequeue(&rrq->q);
+		}
+	}
+
+	/* no packet to dequeue*/
+	return NULL;
+}
+
+static int
+rr_config(struct dn_schk *_schk)
+{
+	struct rr_schk *schk = (struct rr_schk *)(_schk + 1);
+	ND("called");
+
+	/* use reasonable quantums (64..2k bytes, default 1500) */
+	schk->min_q = 64;
+	schk->max_q = 2048;
+	schk->q_bytes = 1500;	/* quantum */
+
+	return 0;
+}
+
+static int
+rr_new_sched(struct dn_sch_inst *_si)
+{
+	struct rr_si *si = (struct rr_si *)(_si + 1);
+
+	ND("called");
+	si->head = si->tail = NULL;
+
+	return 0;
+}
+
+static int
+rr_free_sched(struct dn_sch_inst *_si)
+{
+	ND("called");
+	/* Nothing to do? */
+	return 0;
+}
+
+static int
+rr_new_fsk(struct dn_fsk *fs)
+{
+	struct rr_schk *schk = (struct rr_schk *)(fs->sched + 1);
+	/* par[0] is the weight, par[1] is the quantum step */
+	ipdn_bound_var(&fs->fs.par[0], 1,
+		1, 65536, "RR weight");
+	ipdn_bound_var(&fs->fs.par[1], schk->q_bytes,
+		schk->min_q, schk->max_q, "RR quantum");
+	return 0;
+}
+
+static int
+rr_new_queue(struct dn_queue *_q)
+{
+	struct rr_queue *q = (struct rr_queue *)_q;
+
+	_q->ni.oid.subtype = DN_SCHED_RR;
+
+	q->quantum = _q->fs->fs.par[0] * _q->fs->fs.par[1];
+	ND("called, q->quantum %d", q->quantum);
+	q->credit = q->quantum;
+	q->status = 0;
+
+	if (_q->mq.head != NULL) {
+		/* Queue NOT empty, insert in the queue list */
+		rr_append(q, (struct rr_si *)(_q->_si + 1));
+	}
+	return 0;
+}
+
+static int
+rr_free_queue(struct dn_queue *_q)
+{
+	struct rr_queue *q = (struct rr_queue *)_q;
+
+	ND("called");
+	if (q->status == 1) {
+		struct rr_si *si = (struct rr_si *)(_q->_si + 1);
+		remove_queue_q(q, si);
+	}
+	return 0;
+}
+
+/*
+ * RR scheduler descriptor
+ * contains the type of the scheduler, the name, the size of the
+ * structures and function pointers.
+ */
+static struct dn_alg rr_desc = {
+	_SI( .type = ) DN_SCHED_RR,
+	_SI( .name = ) "RR",
+	_SI( .flags = ) DN_MULTIQUEUE,
+
+	_SI( .schk_datalen = ) 0,
+	_SI( .si_datalen = ) sizeof(struct rr_si),
+	_SI( .q_datalen = ) sizeof(struct rr_queue) - sizeof(struct dn_queue),
+
+	_SI( .enqueue = ) rr_enqueue,
+	_SI( .dequeue = ) rr_dequeue,
+
+	_SI( .config = ) rr_config,
+	_SI( .destroy = ) NULL,
+	_SI( .new_sched = ) rr_new_sched,
+	_SI( .free_sched = ) rr_free_sched,
+	_SI( .new_fsk = ) rr_new_fsk,
+	_SI( .free_fsk = ) NULL,
+	_SI( .new_queue = ) rr_new_queue,
+	_SI( .free_queue = ) rr_free_queue,
+};
+
+
+DECLARE_DNSCHED_MODULE(dn_rr, &rr_desc);
diff --git a/freebsd/sys/netinet/ipfw/dn_sched_wf2q.c b/freebsd/sys/netinet/ipfw/dn_sched_wf2q.c
new file mode 100644
index 00000000..c1e4c21d
--- /dev/null
+++ b/freebsd/sys/netinet/ipfw/dn_sched_wf2q.c
@@ -0,0 +1,375 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*
+ * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
+ * Copyright (c) 2000-2002 Luigi Rizzo, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ */
+
+#ifdef _KERNEL
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/net/if.h>	/* IFNAMSIZ */
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip_var.h>		/* ipfw_rule_ref */
+#include <freebsd/netinet/ip_fw.h>	/* flow_id */
+#include <freebsd/netinet/ip_dummynet.h>
+#include <freebsd/netinet/ipfw/dn_heap.h>
+#include <freebsd/netinet/ipfw/ip_dn_private.h>
+#include <freebsd/netinet/ipfw/dn_sched.h>
+#else
+#include <freebsd/dn_test.h>
+#endif
+
+#ifndef MAX64
+#define MAX64(x,y)  (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x)
+#endif
+
+/*
+ * timestamps are computed on 64 bit using fixed point arithmetic.
+ * LMAX_BITS, WMAX_BITS are the max number of bits for the packet len
+ * and sum of weights, respectively. FRAC_BITS is the number of
+ * fractional bits. We want FRAC_BITS >> WMAX_BITS to avoid too large
+ * errors when computing the inverse, FRAC_BITS < 32 so we can do 1/w
+ * using an unsigned 32-bit division, and to avoid wraparounds we need
+ * LMAX_BITS + WMAX_BITS + FRAC_BITS << 64
+ * As an example
+ * FRAC_BITS = 26, LMAX_BITS=14, WMAX_BITS = 19
+ */
+#ifndef FRAC_BITS
+#define FRAC_BITS    28 /* shift for fixed point arithmetic */
+#define	ONE_FP	(1UL << FRAC_BITS)
+#endif
+
+/*
+ * Private information for the scheduler instance:
+ * sch_heap (key is Finish time) returns the next queue to serve
+ * ne_heap (key is Start time) stores not-eligible queues
+ * idle_heap (key=start/finish time) stores idle flows. It must
+ *	support extract-from-middle.
+ * A flow is only in 1 of the three heaps.
+ * XXX todo: use a more efficient data structure, e.g. a tree sorted
+ * by F with min_subtree(S) in each node
+ */
+struct wf2qp_si {
+    struct dn_heap sch_heap;	/* top extract - key Finish  time */
+    struct dn_heap ne_heap;	/* top extract - key Start   time */
+    struct dn_heap idle_heap;	/* random extract - key Start=Finish time */
+    uint64_t V;			/* virtual time */
+    uint32_t inv_wsum;		/* inverse of sum of weights */
+    uint32_t wsum;		/* sum of weights */
+};
+
+struct wf2qp_queue {
+    struct dn_queue _q;
+    uint64_t S, F;		/* start time, finish time */
+    uint32_t inv_w;		/* ONE_FP / weight */
+    int32_t heap_pos;		/* position (index) of struct in heap */
+};
+
+/*
+ * This file implements a WF2Q+ scheduler as it has been in dummynet
+ * since 2000.
+ * The scheduler supports per-flow queues and has O(log N) complexity.
+ *
+ * WF2Q+ needs to drain entries from the idle heap so that we
+ * can keep the sum of weights up to date. We can do it whenever
+ * we get a chance, or periodically, or following some other
+ * strategy. The function idle_check() drains at most N elements
+ * from the idle heap.
+ */
+static void
+idle_check(struct wf2qp_si *si, int n, int force)
+{
+    struct dn_heap *h = &si->idle_heap;
+    while (n-- > 0 && h->elements > 0 &&
+		(force || DN_KEY_LT(HEAP_TOP(h)->key, si->V))) {
+	struct dn_queue *q = HEAP_TOP(h)->object;
+        struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q;
+
+        heap_extract(h, NULL);
+        /* XXX to let the flowset delete the queue we should
+	 * mark it as 'unused' by the scheduler.
+	 */
+        alg_fq->S = alg_fq->F + 1; /* Mark timestamp as invalid. */
+        si->wsum -= q->fs->fs.par[0];	/* adjust sum of weights */
+	if (si->wsum > 0)
+		si->inv_wsum = ONE_FP/si->wsum;
+    }
+}
+
+static int 
+wf2qp_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
+{
+    struct dn_fsk *fs = q->fs;
+    struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
+    struct wf2qp_queue *alg_fq;
+    uint64_t len = m->m_pkthdr.len;
+
+    if (m != q->mq.head) {
+	if (dn_enqueue(q, m, 0)) /* packet was dropped */
+	    return 1;
+	if (m != q->mq.head)	/* queue was already busy */
+	    return 0;
+    }
+
+    /* If reach this point, queue q was idle */ 
+    alg_fq = (struct wf2qp_queue *)q;
+
+    if (DN_KEY_LT(alg_fq->F, alg_fq->S)) {
+        /* F<S means timestamps are invalid ->brand new queue. */
+        alg_fq->S = si->V;		/* init start time */
+        si->wsum += fs->fs.par[0];	/* add weight of new queue. */
+	si->inv_wsum = ONE_FP/si->wsum;
+    } else { /* if it was idle then it was in the idle heap */
+        heap_extract(&si->idle_heap, q);
+        alg_fq->S = MAX64(alg_fq->F, si->V);	/* compute new S */
+    }
+    alg_fq->F = alg_fq->S + len * alg_fq->inv_w;
+
+    /* if nothing is backlogged, make sure this flow is eligible */
+    if (si->ne_heap.elements == 0 && si->sch_heap.elements == 0)
+        si->V = MAX64(alg_fq->S, si->V);
+
+    /*
+     * Look at eligibility. A flow is not eligibile if S>V (when
+     * this happens, it means that there is some other flow already
+     * scheduled for the same pipe, so the sch_heap cannot be
+     * empty). If the flow is not eligible we just store it in the
+     * ne_heap. Otherwise, we store in the sch_heap.
+     * Note that for all flows in sch_heap (SCH), S_i <= V,
+     * and for all flows in ne_heap (NEH), S_i > V.
+     * So when we need to compute max(V, min(S_i)) forall i in
+     * SCH+NEH, we only need to look into NEH.
+     */
+    if (DN_KEY_LT(si->V, alg_fq->S)) {
+        /* S>V means flow Not eligible. */
+        if (si->sch_heap.elements == 0)
+            D("++ ouch! not eligible but empty scheduler!");
+        heap_insert(&si->ne_heap, alg_fq->S, q);
+    } else {
+        heap_insert(&si->sch_heap, alg_fq->F, q);
+    }
+    return 0;
+}
+
+/* XXX invariant: sch > 0 || V >= min(S in neh) */
+static struct mbuf *
+wf2qp_dequeue(struct dn_sch_inst *_si)
+{
+	/* Access scheduler instance private data */
+	struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
+	struct mbuf *m;
+	struct dn_queue *q;
+	struct dn_heap *sch = &si->sch_heap;
+	struct dn_heap *neh = &si->ne_heap;
+	struct wf2qp_queue *alg_fq;
+
+	if (sch->elements == 0 && neh->elements == 0) {
+		/* we have nothing to do. We could kill the idle heap
+		 * altogether and reset V
+		 */
+		idle_check(si, 0x7fffffff, 1);
+		si->V = 0;
+		si->wsum = 0;	/* should be set already */
+		return NULL;	/* quick return if nothing to do */
+	}
+	idle_check(si, 1, 0);	/* drain something from the idle heap */
+
+	/* make sure at least one element is eligible, bumping V
+	 * and moving entries that have become eligible.
+	 * We need to repeat the first part twice, before and
+	 * after extracting the candidate, or enqueue() will
+	 * find the data structure in a wrong state.
+	 */
+  m = NULL;
+  for(;;) {
+	/*
+	 * Compute V = max(V, min(S_i)). Remember that all elements
+	 * in sch have by definition S_i <= V so if sch is not empty,
+	 * V is surely the max and we must not update it. Conversely,
+	 * if sch is empty we only need to look at neh.
+	 * We don't need to move the queues, as it will be done at the
+	 * next enqueue
+	 */
+	if (sch->elements == 0 && neh->elements > 0) {
+		si->V = MAX64(si->V, HEAP_TOP(neh)->key);
+	}
+	while (neh->elements > 0 &&
+		    DN_KEY_LEQ(HEAP_TOP(neh)->key, si->V)) {
+		q = HEAP_TOP(neh)->object;
+		alg_fq = (struct wf2qp_queue *)q;
+		heap_extract(neh, NULL);
+		heap_insert(sch, alg_fq->F, q);
+	}
+	if (m) /* pkt found in previous iteration */
+		break;
+	/* ok we have at least one eligible pkt */
+	q = HEAP_TOP(sch)->object;
+	alg_fq = (struct wf2qp_queue *)q;
+	m = dn_dequeue(q);
+	heap_extract(sch, NULL); /* Remove queue from heap. */
+	si->V += (uint64_t)(m->m_pkthdr.len) * si->inv_wsum;
+	alg_fq->S = alg_fq->F;  /* Update start time. */
+	if (q->mq.head == 0) {	/* not backlogged any more. */
+		heap_insert(&si->idle_heap, alg_fq->F, q);
+	} else {			/* Still backlogged. */
+		/* Update F, store in neh or sch */
+		uint64_t len = q->mq.head->m_pkthdr.len;
+		alg_fq->F += len * alg_fq->inv_w;
+		if (DN_KEY_LEQ(alg_fq->S, si->V)) {
+			heap_insert(sch, alg_fq->F, q);
+		} else {
+			heap_insert(neh, alg_fq->S, q);
+		}
+	}
+    }
+	return m;
+}
+
+static int
+wf2qp_new_sched(struct dn_sch_inst *_si)
+{
+	struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
+	int ofs = offsetof(struct wf2qp_queue, heap_pos);
+
+	/* all heaps support extract from middle */
+	if (heap_init(&si->idle_heap, 16, ofs) ||
+	    heap_init(&si->sch_heap, 16, ofs) ||
+	    heap_init(&si->ne_heap, 16, ofs)) {
+		heap_free(&si->ne_heap);
+		heap_free(&si->sch_heap);
+		heap_free(&si->idle_heap);
+		return ENOMEM;
+	}
+	return 0;
+}
+
+static int
+wf2qp_free_sched(struct dn_sch_inst *_si)
+{
+	struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
+
+	heap_free(&si->sch_heap);
+	heap_free(&si->ne_heap);
+	heap_free(&si->idle_heap);
+
+	return 0;
+}
+
+static int
+wf2qp_new_fsk(struct dn_fsk *fs)
+{
+	ipdn_bound_var(&fs->fs.par[0], 1,
+		1, 100, "WF2Q+ weight");
+	return 0;
+}
+
+static int
+wf2qp_new_queue(struct dn_queue *_q)
+{
+	struct wf2qp_queue *q = (struct wf2qp_queue *)_q;
+
+	_q->ni.oid.subtype = DN_SCHED_WF2QP;
+	q->F = 0;	/* not strictly necessary */
+	q->S = q->F + 1;    /* mark timestamp as invalid. */
+        q->inv_w = ONE_FP / _q->fs->fs.par[0];
+	if (_q->mq.head != NULL) {
+		wf2qp_enqueue(_q->_si, _q, _q->mq.head);
+	}
+	return 0;
+}
+
+/*
+ * Called when the infrastructure removes a queue (e.g. flowset
+ * is reconfigured). Nothing to do if we did not 'own' the queue,
+ * otherwise remove it from the right heap and adjust the sum
+ * of weights.
+ */
+static int
+wf2qp_free_queue(struct dn_queue *q)
+{
+	struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q;
+	struct wf2qp_si *si = (struct wf2qp_si *)(q->_si + 1);
+    
+	if (alg_fq->S >= alg_fq->F + 1)
+		return 0;	/* nothing to do, not in any heap */
+	si->wsum -= q->fs->fs.par[0];
+	if (si->wsum > 0)
+		si->inv_wsum = ONE_FP/si->wsum;
+
+	/* extract from the heap. XXX TODO we may need to adjust V
+	 * to make sure the invariants hold.
+	 */
+	if (q->mq.head == NULL) {
+		heap_extract(&si->idle_heap, q);
+	} else if (DN_KEY_LT(si->V, alg_fq->S)) {
+		heap_extract(&si->ne_heap, q);
+	} else {
+		heap_extract(&si->sch_heap, q);
+	}
+	return 0;
+}
+
+/*
+ * WF2Q+ scheduler descriptor
+ * contains the type of the scheduler, the name, the size of the
+ * structures and function pointers.
+ */
+static struct dn_alg wf2qp_desc = {
+	_SI( .type = ) DN_SCHED_WF2QP,
+	_SI( .name = ) "WF2Q+",
+	_SI( .flags = ) DN_MULTIQUEUE,
+
+	/* we need extra space in the si and the queue */
+	_SI( .schk_datalen = ) 0,
+	_SI( .si_datalen = ) sizeof(struct wf2qp_si),
+	_SI( .q_datalen = ) sizeof(struct wf2qp_queue) -
+				sizeof(struct dn_queue),
+
+	_SI( .enqueue = ) wf2qp_enqueue,
+	_SI( .dequeue = ) wf2qp_dequeue,
+
+	_SI( .config = )  NULL,
+	_SI( .destroy = )  NULL,
+	_SI( .new_sched = ) wf2qp_new_sched,
+	_SI( .free_sched = ) wf2qp_free_sched,
+    
+	_SI( .new_fsk = ) wf2qp_new_fsk,
+	_SI( .free_fsk = )  NULL,
+
+	_SI( .new_queue = ) wf2qp_new_queue,
+	_SI( .free_queue = ) wf2qp_free_queue,
+};
+
+
+DECLARE_DNSCHED_MODULE(dn_wf2qp, &wf2qp_desc);
diff --git a/freebsd/sys/netinet/ipfw/ip_dn_glue.c b/freebsd/sys/netinet/ipfw/ip_dn_glue.c
new file mode 100644
index 00000000..302c4d29
--- /dev/null
+++ b/freebsd/sys/netinet/ipfw/ip_dn_glue.c
@@ -0,0 +1,847 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-    
+ * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ *
+ * Binary compatibility support for /sbin/ipfw RELENG_7 and RELENG_8
+ */
+
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/rwlock.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/time.h>
+#include <freebsd/sys/taskqueue.h>
+#include <freebsd/net/if.h>	/* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip_var.h>	/* ip_output(), IP_FORWARDING */
+#include <freebsd/netinet/ip_fw.h>
+#include <freebsd/netinet/ipfw/ip_fw_private.h>
+#include <freebsd/netinet/ipfw/dn_heap.h>
+#include <freebsd/netinet/ip_dummynet.h>
+#include <freebsd/netinet/ipfw/ip_dn_private.h>
+#include <freebsd/netinet/ipfw/dn_sched.h>
+
+/* FREEBSD7.2 ip_dummynet.h r191715*/
+
+struct dn_heap_entry7 {
+	int64_t key;        /* sorting key. Topmost element is smallest one */
+	void *object;      /* object pointer */
+};
+
+struct dn_heap7 {
+	int size;
+	int elements;
+	int offset; /* XXX if > 0 this is the offset of direct ptr to obj */
+	struct dn_heap_entry7 *p;   /* really an array of "size" entries */
+};
+
+/* Common to 7.2 and 8 */
+struct dn_flow_set {
+	SLIST_ENTRY(dn_flow_set)    next;   /* linked list in a hash slot */
+
+	u_short fs_nr ;             /* flow_set number       */
+	u_short flags_fs;
+#define DNOLD_HAVE_FLOW_MASK   0x0001
+#define DNOLD_IS_RED       0x0002
+#define DNOLD_IS_GENTLE_RED    0x0004
+#define DNOLD_QSIZE_IS_BYTES   0x0008  /* queue size is measured in bytes */
+#define DNOLD_NOERROR      0x0010  /* do not report ENOBUFS on drops  */
+#define DNOLD_HAS_PROFILE      0x0020  /* the pipe has a delay profile. */
+#define DNOLD_IS_PIPE      0x4000
+#define DNOLD_IS_QUEUE     0x8000
+
+	struct dn_pipe7 *pipe ;  /* pointer to parent pipe */
+	u_short parent_nr ;     /* parent pipe#, 0 if local to a pipe */
+
+	int weight ;        /* WFQ queue weight */
+	int qsize ;         /* queue size in slots or bytes */
+	int plr ;           /* pkt loss rate (2^31-1 means 100%) */
+
+	struct ipfw_flow_id flow_mask ;
+
+	/* hash table of queues onto this flow_set */
+	int rq_size ;       /* number of slots */
+	int rq_elements ;       /* active elements */
+	struct dn_flow_queue7 **rq;  /* array of rq_size entries */
+
+	u_int32_t last_expired ;    /* do not expire too frequently */
+	int backlogged ;        /* #active queues for this flowset */
+
+        /* RED parameters */
+#define SCALE_RED               16
+#define SCALE(x)                ( (x) << SCALE_RED )
+#define SCALE_VAL(x)            ( (x) >> SCALE_RED )
+#define SCALE_MUL(x,y)          ( ( (x) * (y) ) >> SCALE_RED )
+	int w_q ;           /* queue weight (scaled) */
+	int max_th ;        /* maximum threshold for queue (scaled) */
+	int min_th ;        /* minimum threshold for queue (scaled) */
+	int max_p ;         /* maximum value for p_b (scaled) */
+	u_int c_1 ;         /* max_p/(max_th-min_th) (scaled) */
+	u_int c_2 ;         /* max_p*min_th/(max_th-min_th) (scaled) */
+	u_int c_3 ;         /* for GRED, (1-max_p)/max_th (scaled) */
+	u_int c_4 ;         /* for GRED, 1 - 2*max_p (scaled) */
+	u_int * w_q_lookup ;    /* lookup table for computing (1-w_q)^t */
+	u_int lookup_depth ;    /* depth of lookup table */
+	int lookup_step ;       /* granularity inside the lookup table */
+	int lookup_weight ;     /* equal to (1-w_q)^t / (1-w_q)^(t+1) */
+	int avg_pkt_size ;      /* medium packet size */
+	int max_pkt_size ;      /* max packet size */
+};
+SLIST_HEAD(dn_flow_set_head, dn_flow_set);
+
+#define DN_IS_PIPE		0x4000
+#define DN_IS_QUEUE		0x8000
+struct dn_flow_queue7 {
+	struct dn_flow_queue7 *next ;
+	struct ipfw_flow_id id ;
+
+	struct mbuf *head, *tail ;  /* queue of packets */
+	u_int len ;
+	u_int len_bytes ;
+
+	u_long numbytes;
+
+	u_int64_t tot_pkts ;    /* statistics counters  */
+	u_int64_t tot_bytes ;
+	u_int32_t drops ;
+
+	int hash_slot ;     /* debugging/diagnostic */
+
+	/* RED parameters */
+	int avg ;                   /* average queue length est. (scaled) */
+	int count ;                 /* arrivals since last RED drop */
+	int random ;                /* random value (scaled) */
+	u_int32_t q_time;      /* start of queue idle time */
+
+	/* WF2Q+ support */
+	struct dn_flow_set *fs ;    /* parent flow set */
+	int heap_pos ;      /* position (index) of struct in heap */
+	int64_t sched_time ;     /* current time when queue enters ready_heap */
+
+	int64_t S,F ;        /* start time, finish time */
+};
+
+struct dn_pipe7 {        /* a pipe */
+	SLIST_ENTRY(dn_pipe7)    next;   /* linked list in a hash slot */
+
+	int pipe_nr ;       /* number   */
+	int bandwidth;      /* really, bytes/tick.  */
+	int delay ;         /* really, ticks    */
+
+	struct  mbuf *head, *tail ; /* packets in delay line */
+
+	/* WF2Q+ */
+	struct dn_heap7 scheduler_heap ; /* top extract - key Finish time*/
+	struct dn_heap7 not_eligible_heap; /* top extract- key Start time */
+	struct dn_heap7 idle_heap ; /* random extract - key Start=Finish time */
+
+	int64_t V ;          /* virtual time */
+	int sum;            /* sum of weights of all active sessions */
+
+	int numbytes;
+
+	int64_t sched_time ;     /* time pipe was scheduled in ready_heap */
+
+	/*
+	* When the tx clock come from an interface (if_name[0] != '\0'), its name
+	* is stored below, whereas the ifp is filled when the rule is configured.
+	*/
+	char if_name[IFNAMSIZ];
+	struct ifnet *ifp ;
+	int ready ; /* set if ifp != NULL and we got a signal from it */
+
+	struct dn_flow_set fs ; /* used with fixed-rate flows */
+};
+SLIST_HEAD(dn_pipe_head7, dn_pipe7);
+
+
+/* FREEBSD8 ip_dummynet.h r196045 */
+struct dn_flow_queue8 {
+	struct dn_flow_queue8 *next ;
+	struct ipfw_flow_id id ;
+
+	struct mbuf *head, *tail ;  /* queue of packets */
+	u_int len ;
+	u_int len_bytes ;
+
+	uint64_t numbytes ;     /* credit for transmission (dynamic queues) */
+	int64_t extra_bits;     /* extra bits simulating unavailable channel */
+
+	u_int64_t tot_pkts ;    /* statistics counters  */
+	u_int64_t tot_bytes ;
+	u_int32_t drops ;
+
+	int hash_slot ;     /* debugging/diagnostic */
+
+	/* RED parameters */
+	int avg ;                   /* average queue length est. (scaled) */
+	int count ;                 /* arrivals since last RED drop */
+	int random ;                /* random value (scaled) */
+	int64_t idle_time;       /* start of queue idle time */
+
+	/* WF2Q+ support */
+	struct dn_flow_set *fs ;    /* parent flow set */
+	int heap_pos ;      /* position (index) of struct in heap */
+	int64_t sched_time ;     /* current time when queue enters ready_heap */
+
+	int64_t S,F ;        /* start time, finish time */
+};
+
+struct dn_pipe8 {        /* a pipe */
+	SLIST_ENTRY(dn_pipe8)    next;   /* linked list in a hash slot */
+
+	int pipe_nr ;       /* number   */
+	int bandwidth;      /* really, bytes/tick.  */
+	int delay ;         /* really, ticks    */
+
+	struct  mbuf *head, *tail ; /* packets in delay line */
+
+	/* WF2Q+ */
+	struct dn_heap7 scheduler_heap ; /* top extract - key Finish time*/
+	struct dn_heap7 not_eligible_heap; /* top extract- key Start time */
+	struct dn_heap7 idle_heap ; /* random extract - key Start=Finish time */
+
+	int64_t V ;          /* virtual time */
+	int sum;            /* sum of weights of all active sessions */
+
+	/* Same as in dn_flow_queue, numbytes can become large */
+	int64_t numbytes;       /* bits I can transmit (more or less). */
+	uint64_t burst;     /* burst size, scaled: bits * hz */
+
+	int64_t sched_time ;     /* time pipe was scheduled in ready_heap */
+	int64_t idle_time;       /* start of pipe idle time */
+
+	char if_name[IFNAMSIZ];
+	struct ifnet *ifp ;
+	int ready ; /* set if ifp != NULL and we got a signal from it */
+
+	struct dn_flow_set fs ; /* used with fixed-rate flows */
+
+    /* fields to simulate a delay profile */
+#define ED_MAX_NAME_LEN     32
+	char name[ED_MAX_NAME_LEN];
+	int loss_level;
+	int samples_no;
+	int *samples;
+};
+
+#define ED_MAX_SAMPLES_NO   1024
+struct dn_pipe_max8 {
+	struct dn_pipe8 pipe;
+	int samples[ED_MAX_SAMPLES_NO];
+};
+SLIST_HEAD(dn_pipe_head8, dn_pipe8);
+
+/*
+ * Changes from 7.2 to 8:
+ * dn_pipe:
+ *      numbytes from int to int64_t
+ *      add burst (int64_t)
+ *      add idle_time (int64_t)
+ *      add profile
+ *      add struct dn_pipe_max
+ *      add flag DN_HAS_PROFILE
+ *
+ * dn_flow_queue
+ *      numbytes from u_long to int64_t
+ *      add extra_bits (int64_t)
+ *      q_time from u_int32_t to int64_t and name idle_time
+ *
+ * dn_flow_set unchanged
+ *
+ */
+
+/* NOTE:XXX copied from dummynet.c */
+#define O_NEXT(p, len) ((void *)((char *)p + len))
+static void
+oid_fill(struct dn_id *oid, int len, int type, uintptr_t id)
+{
+	oid->len = len;
+	oid->type = type;
+	oid->subtype = 0;
+	oid->id = id;
+}
+/* make room in the buffer and move the pointer forward */
+static void *
+o_next(struct dn_id **o, int len, int type)
+{
+	struct dn_id *ret = *o;
+	oid_fill(ret, len, type, 0);
+	*o = O_NEXT(*o, len);
+	return ret;
+}
+
+
+static size_t pipesize7 = sizeof(struct dn_pipe7);
+static size_t pipesize8 = sizeof(struct dn_pipe8);
+static size_t pipesizemax8 = sizeof(struct dn_pipe_max8);
+
+/* Indicate 'ipfw' version
+ * 1: from FreeBSD 7.2
+ * 0: from FreeBSD 8
+ * -1: unknow (for now is unused)
+ *
+ * It is update when a IP_DUMMYNET_DEL or IP_DUMMYNET_CONFIGURE request arrives
+ * NOTE: if a IP_DUMMYNET_GET arrives and the 'ipfw' version is unknow,
+ *       it is suppose to be the FreeBSD 8 version.
+ */
+static int is7 = 0;
+
+static int
+convertflags2new(int src)
+{
+	int dst = 0;
+
+	if (src & DNOLD_HAVE_FLOW_MASK)
+		dst |= DN_HAVE_MASK;
+	if (src & DNOLD_QSIZE_IS_BYTES)
+		dst |= DN_QSIZE_BYTES;
+	if (src & DNOLD_NOERROR)
+		dst |= DN_NOERROR;
+	if (src & DNOLD_IS_RED)
+		dst |= DN_IS_RED;
+	if (src & DNOLD_IS_GENTLE_RED)
+		dst |= DN_IS_GENTLE_RED;
+	if (src & DNOLD_HAS_PROFILE)
+		dst |= DN_HAS_PROFILE;
+
+	return dst;
+}
+
+static int
+convertflags2old(int src)
+{
+	int dst = 0;
+
+	if (src & DN_HAVE_MASK)
+		dst |= DNOLD_HAVE_FLOW_MASK;
+	if (src & DN_IS_RED)
+		dst |= DNOLD_IS_RED;
+	if (src & DN_IS_GENTLE_RED)
+		dst |= DNOLD_IS_GENTLE_RED;
+	if (src & DN_NOERROR)
+		dst |= DNOLD_NOERROR;
+	if (src & DN_HAS_PROFILE)
+		dst |= DNOLD_HAS_PROFILE;
+	if (src & DN_QSIZE_BYTES)
+		dst |= DNOLD_QSIZE_IS_BYTES;
+
+	return dst;
+}
+
+static int
+dn_compat_del(void *v)
+{
+	struct dn_pipe7 *p = (struct dn_pipe7 *) v;
+	struct dn_pipe8 *p8 = (struct dn_pipe8 *) v;
+	struct {
+		struct dn_id oid;
+		uintptr_t a[1];	/* add more if we want a list */
+	} cmd;
+
+	/* XXX DN_API_VERSION ??? */
+	oid_fill((void *)&cmd, sizeof(cmd), DN_CMD_DELETE, DN_API_VERSION);
+
+	if (is7) {
+		if (p->pipe_nr == 0 && p->fs.fs_nr == 0)
+			return EINVAL;
+		if (p->pipe_nr != 0 && p->fs.fs_nr != 0)
+			return EINVAL;
+	} else {
+		if (p8->pipe_nr == 0 && p8->fs.fs_nr == 0)
+			return EINVAL;
+		if (p8->pipe_nr != 0 && p8->fs.fs_nr != 0)
+			return EINVAL;
+	}
+
+	if (p->pipe_nr != 0) { /* pipe x delete */
+		cmd.a[0] = p->pipe_nr;
+		cmd.oid.subtype = DN_LINK;
+	} else { /* queue x delete */
+		cmd.oid.subtype = DN_FS;
+		cmd.a[0] = (is7) ? p->fs.fs_nr : p8->fs.fs_nr;
+	}
+
+	return do_config(&cmd, cmd.oid.len);
+}
+
+static int
+dn_compat_config_queue(struct dn_fs *fs, void* v)
+{
+	struct dn_pipe7 *p7 = (struct dn_pipe7 *)v;
+	struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
+	struct dn_flow_set *f;
+
+	if (is7)
+		f = &p7->fs;
+	else
+		f = &p8->fs;
+
+	fs->fs_nr = f->fs_nr;
+	fs->sched_nr = f->parent_nr;
+	fs->flow_mask = f->flow_mask;
+	fs->buckets = f->rq_size;
+	fs->qsize = f->qsize;
+	fs->plr = f->plr;
+	fs->par[0] = f->weight;
+	fs->flags = convertflags2new(f->flags_fs);
+	if (fs->flags & DN_IS_GENTLE_RED || fs->flags & DN_IS_RED) {
+		fs->w_q = f->w_q;
+		fs->max_th = f->max_th;
+		fs->min_th = f->min_th;
+		fs->max_p = f->max_p;
+	}
+
+	return 0;
+}
+
+static int
+dn_compat_config_pipe(struct dn_sch *sch, struct dn_link *p, 
+		      struct dn_fs *fs, void* v)
+{
+	struct dn_pipe7 *p7 = (struct dn_pipe7 *)v;
+	struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
+	int i = p7->pipe_nr;
+
+	sch->sched_nr = i;
+	sch->oid.subtype = 0;
+	p->link_nr = i;
+	fs->fs_nr = i + 2*DN_MAX_ID;
+	fs->sched_nr = i + DN_MAX_ID;
+
+	/* Common to 7 and 8 */
+	p->bandwidth = p7->bandwidth;
+	p->delay = p7->delay;
+	if (!is7) {
+		/* FreeBSD 8 has burst  */
+		p->burst = p8->burst;
+	}
+
+	/* fill the fifo flowset */
+	dn_compat_config_queue(fs, v);
+	fs->fs_nr = i + 2*DN_MAX_ID;
+	fs->sched_nr = i + DN_MAX_ID;
+
+	/* Move scheduler related parameter from fs to sch */
+	sch->buckets = fs->buckets; /*XXX*/
+	fs->buckets = 0;
+	if (fs->flags & DN_HAVE_MASK) {
+		sch->flags |= DN_HAVE_MASK;
+		fs->flags &= ~DN_HAVE_MASK;
+		sch->sched_mask = fs->flow_mask;
+		bzero(&fs->flow_mask, sizeof(struct ipfw_flow_id));
+	}
+
+	return 0;
+}
+
+static int
+dn_compat_config_profile(struct dn_profile *pf, struct dn_link *p,
+			 void *v)
+{
+	struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
+
+	p8->samples = &(((struct dn_pipe_max8 *)p8)->samples[0]);
+	
+	pf->link_nr = p->link_nr;
+	pf->loss_level = p8->loss_level;
+// 	pf->bandwidth = p->bandwidth; //XXX bandwidth redundant?
+	pf->samples_no = p8->samples_no;
+	strncpy(pf->name, p8->name,sizeof(pf->name));
+	bcopy(p8->samples, pf->samples, sizeof(pf->samples));
+
+	return 0;
+}
+
+/*
+ * If p->pipe_nr != 0 the command is 'pipe x config', so need to create
+ * the three main struct, else only a flowset is created
+ */
+static int
+dn_compat_configure(void *v)
+{
+	struct dn_id *buf = NULL, *base;
+	struct dn_sch *sch = NULL;
+	struct dn_link *p = NULL;
+	struct dn_fs *fs = NULL;
+	struct dn_profile *pf = NULL;
+	int lmax;
+	int error;
+
+	struct dn_pipe7 *p7 = (struct dn_pipe7 *)v;
+	struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
+
+	int i; /* number of object to configure */
+
+	lmax = sizeof(struct dn_id);	/* command header */
+	lmax += sizeof(struct dn_sch) + sizeof(struct dn_link) +
+		sizeof(struct dn_fs) + sizeof(struct dn_profile);
+
+	base = buf = malloc(lmax, M_DUMMYNET, M_WAIT|M_ZERO);
+	o_next(&buf, sizeof(struct dn_id), DN_CMD_CONFIG);
+	base->id = DN_API_VERSION;
+
+	/* pipe_nr is the same in p7 and p8 */
+	i = p7->pipe_nr;
+	if (i != 0) { /* pipe config */
+		sch = o_next(&buf, sizeof(*sch), DN_SCH);
+		p = o_next(&buf, sizeof(*p), DN_LINK);
+		fs = o_next(&buf, sizeof(*fs), DN_FS);
+
+		error = dn_compat_config_pipe(sch, p, fs, v);
+		if (error) {
+			free(buf, M_DUMMYNET);
+			return error;
+		}
+		if (!is7 && p8->samples_no > 0) {
+			/* Add profiles*/
+			pf = o_next(&buf, sizeof(*pf), DN_PROFILE);
+			error = dn_compat_config_profile(pf, p, v);
+			if (error) {
+				free(buf, M_DUMMYNET);
+				return error;
+			}
+		}
+	} else { /* queue config */
+		fs = o_next(&buf, sizeof(*fs), DN_FS);
+		error = dn_compat_config_queue(fs, v);
+		if (error) {
+			free(buf, M_DUMMYNET);
+			return error;
+		}
+	}
+	error = do_config(base, (char *)buf - (char *)base);
+
+	if (buf)
+		free(buf, M_DUMMYNET);
+	return error;
+}
+
+int
+dn_compat_calc_size(struct dn_parms dn_cfg)
+{
+	int need = 0;
+	/* XXX use FreeBSD 8 struct size */
+	/* NOTE:
+	 * - half scheduler: 		schk_count/2
+	 * - all flowset:		fsk_count
+	 * - all flowset queues:	queue_count
+	 * - all pipe queue:		si_count
+	 */
+	need += dn_cfg.schk_count * sizeof(struct dn_pipe8) / 2;
+	need += dn_cfg.fsk_count * sizeof(struct dn_flow_set);
+	need += dn_cfg.si_count * sizeof(struct dn_flow_queue8);
+	need += dn_cfg.queue_count * sizeof(struct dn_flow_queue8);
+
+	return need;
+}
+
+int
+dn_c_copy_q (void *_ni, void *arg)
+{
+	struct copy_args *a = arg;
+	struct dn_flow_queue7 *fq7 = (struct dn_flow_queue7 *)*a->start;
+	struct dn_flow_queue8 *fq8 = (struct dn_flow_queue8 *)*a->start;
+	struct dn_flow *ni = (struct dn_flow *)_ni;
+	int size = 0;
+
+	/* XXX hash slot not set */
+	/* No difference between 7.2/8 */
+	fq7->len = ni->length;
+	fq7->len_bytes = ni->len_bytes;
+	fq7->id = ni->fid;
+
+	if (is7) {
+		size = sizeof(struct dn_flow_queue7);
+		fq7->tot_pkts = ni->tot_pkts;
+		fq7->tot_bytes = ni->tot_bytes;
+		fq7->drops = ni->drops;
+	} else {
+		size = sizeof(struct dn_flow_queue8);
+		fq8->tot_pkts = ni->tot_pkts;
+		fq8->tot_bytes = ni->tot_bytes;
+		fq8->drops = ni->drops;
+	}
+
+	*a->start += size;
+	return 0;
+}
+
+int
+dn_c_copy_pipe(struct dn_schk *s, struct copy_args *a, int nq)
+{
+	struct dn_link *l = &s->link;
+	struct dn_fsk *f = s->fs;
+
+	struct dn_pipe7 *pipe7 = (struct dn_pipe7 *)*a->start;
+	struct dn_pipe8 *pipe8 = (struct dn_pipe8 *)*a->start;
+	struct dn_flow_set *fs;
+	int size = 0;
+
+	if (is7) {
+		fs = &pipe7->fs;
+		size = sizeof(struct dn_pipe7);
+	} else {
+		fs = &pipe8->fs;
+		size = sizeof(struct dn_pipe8);
+	}
+
+	/* These 4 field are the same in pipe7 and pipe8 */
+	pipe7->next.sle_next = (struct dn_pipe7 *)DN_IS_PIPE;
+	pipe7->bandwidth = l->bandwidth;
+	pipe7->delay = l->delay;
+	pipe7->pipe_nr = l->link_nr - DN_MAX_ID;
+
+	if (!is7) {
+		if (s->profile) {
+			struct dn_profile *pf = s->profile;
+			strncpy(pipe8->name, pf->name, sizeof(pf->name));
+			pipe8->loss_level = pf->loss_level;
+			pipe8->samples_no = pf->samples_no;
+		}
+		pipe8->burst = div64(l->burst , 8 * hz);
+	}
+
+	fs->flow_mask = s->sch.sched_mask;
+	fs->rq_size = s->sch.buckets ? s->sch.buckets : 1;
+
+	fs->parent_nr = l->link_nr - DN_MAX_ID;
+	fs->qsize = f->fs.qsize;
+	fs->plr = f->fs.plr;
+	fs->w_q = f->fs.w_q;
+	fs->max_th = f->max_th;
+	fs->min_th = f->min_th;
+	fs->max_p = f->fs.max_p;
+	fs->rq_elements = nq;
+
+	fs->flags_fs = convertflags2old(f->fs.flags);
+
+	*a->start += size;
+	return 0;
+}
+
+
+int
+dn_compat_copy_pipe(struct copy_args *a, void *_o)
+{
+	int have = a->end - *a->start;
+	int need = 0;
+	int pipe_size = sizeof(struct dn_pipe8);
+	int queue_size = sizeof(struct dn_flow_queue8);
+	int n_queue = 0; /* number of queues */
+
+	struct dn_schk *s = (struct dn_schk *)_o;
+	/* calculate needed space:
+	 * - struct dn_pipe
+	 * - if there are instances, dn_queue * n_instances
+	 */
+	n_queue = (s->sch.flags & DN_HAVE_MASK ? dn_ht_entries(s->siht) :
+						(s->siht ? 1 : 0));
+	need = pipe_size + queue_size * n_queue;
+	if (have < need) {
+		D("have %d < need %d", have, need);
+		return 1;
+	}
+	/* copy pipe */
+	dn_c_copy_pipe(s, a, n_queue);
+
+	/* copy queues */
+	if (s->sch.flags & DN_HAVE_MASK)
+		dn_ht_scan(s->siht, dn_c_copy_q, a);
+	else if (s->siht)
+		dn_c_copy_q(s->siht, a);
+	return 0;
+}
+
+int
+dn_c_copy_fs(struct dn_fsk *f, struct copy_args *a, int nq)
+{
+	struct dn_flow_set *fs = (struct dn_flow_set *)*a->start;
+
+	fs->next.sle_next = (struct dn_flow_set *)DN_IS_QUEUE;
+	fs->fs_nr = f->fs.fs_nr;
+	fs->qsize = f->fs.qsize;
+	fs->plr = f->fs.plr;
+	fs->w_q = f->fs.w_q;
+	fs->max_th = f->max_th;
+	fs->min_th = f->min_th;
+	fs->max_p = f->fs.max_p;
+	fs->flow_mask = f->fs.flow_mask;
+	fs->rq_elements = nq;
+	fs->rq_size = (f->fs.buckets ? f->fs.buckets : 1);
+	fs->parent_nr = f->fs.sched_nr;
+	fs->weight = f->fs.par[0];
+
+	fs->flags_fs = convertflags2old(f->fs.flags);
+	*a->start += sizeof(struct dn_flow_set);
+	return 0;
+}
+
+int
+dn_compat_copy_queue(struct copy_args *a, void *_o)
+{
+	int have = a->end - *a->start;
+	int need = 0;
+	int fs_size = sizeof(struct dn_flow_set);
+	int queue_size = sizeof(struct dn_flow_queue8);
+
+	struct dn_fsk *fs = (struct dn_fsk *)_o;
+	int n_queue = 0; /* number of queues */
+
+	n_queue = (fs->fs.flags & DN_HAVE_MASK ? dn_ht_entries(fs->qht) :
+						(fs->qht ? 1 : 0));
+
+	need = fs_size + queue_size * n_queue;
+	if (have < need) {
+		D("have < need");
+		return 1;
+	}
+
+	/* copy flowset */
+	dn_c_copy_fs(fs, a, n_queue);
+
+	/* copy queues */
+	if (fs->fs.flags & DN_HAVE_MASK)
+		dn_ht_scan(fs->qht, dn_c_copy_q, a);
+	else if (fs->qht)
+		dn_c_copy_q(fs->qht, a);
+
+	return 0;
+}
+
+int
+copy_data_helper_compat(void *_o, void *_arg)
+{
+	struct copy_args *a = _arg;
+
+	if (a->type == DN_COMPAT_PIPE) {
+		struct dn_schk *s = _o;
+		if (s->sch.oid.subtype != 1 || s->sch.sched_nr <= DN_MAX_ID) {
+			return 0;	/* not old type */
+		}
+		/* copy pipe parameters, and if instance exists, copy
+		 * other parameters and eventually queues.
+		 */
+		if(dn_compat_copy_pipe(a, _o))
+			return DNHT_SCAN_END;
+	} else if (a->type == DN_COMPAT_QUEUE) {
+		struct dn_fsk *fs = _o;
+		if (fs->fs.fs_nr >= DN_MAX_ID)
+			return 0;
+		if (dn_compat_copy_queue(a, _o))
+			return DNHT_SCAN_END;
+	}
+	return 0;
+}
+
+/* Main function to manage old requests */
+int
+ip_dummynet_compat(struct sockopt *sopt)
+{
+	int error=0;
+	void *v = NULL;
+	struct dn_id oid;
+
+	/* Lenght of data, used to found ipfw version... */
+	int len = sopt->sopt_valsize;
+
+	/* len can be 0 if command was dummynet_flush */
+	if (len == pipesize7) {
+		D("setting compatibility with FreeBSD 7.2");
+		is7 = 1;
+	}
+	else if (len == pipesize8 || len == pipesizemax8) {
+		D("setting compatibility with FreeBSD 8");
+		is7 = 0;
+	}
+
+	switch (sopt->sopt_name) {
+	default:
+		printf("dummynet: -- unknown option %d", sopt->sopt_name);
+		error = EINVAL;
+		break;
+
+	case IP_DUMMYNET_FLUSH:
+		oid_fill(&oid, sizeof(oid), DN_CMD_FLUSH, DN_API_VERSION);
+		do_config(&oid, oid.len);
+		break;
+
+	case IP_DUMMYNET_DEL:
+		v = malloc(len, M_TEMP, M_WAITOK);
+		error = sooptcopyin(sopt, v, len, len);
+		if (error)
+			break;
+		error = dn_compat_del(v);
+		free(v, M_DUMMYNET);
+		break;
+
+	case IP_DUMMYNET_CONFIGURE:
+		v = malloc(len, M_TEMP, M_WAITOK);
+		error = sooptcopyin(sopt, v, len, len);
+		if (error)
+			break;
+		error = dn_compat_configure(v);
+		free(v, M_DUMMYNET);
+		break;
+
+	case IP_DUMMYNET_GET: {
+		void *buf;
+		int ret;
+		int original_size = sopt->sopt_valsize;
+		int size;
+
+		ret = dummynet_get(sopt, &buf);
+		if (ret)
+			return 0;//XXX ?
+		size = sopt->sopt_valsize;
+		sopt->sopt_valsize = original_size;
+		D("size=%d, buf=%p", size, buf);
+		ret = sooptcopyout(sopt, buf, size);
+		if (ret)
+			printf("  %s ERROR sooptcopyout\n", __FUNCTION__);
+		if (buf)
+			free(buf, M_DUMMYNET);
+	    }
+	}
+
+	return error;
+}
+
+
diff --git a/freebsd/sys/netinet/ipfw/ip_dn_io.c b/freebsd/sys/netinet/ipfw/ip_dn_io.c
new file mode 100644
index 00000000..7a2c46d4
--- /dev/null
+++ b/freebsd/sys/netinet/ipfw/ip_dn_io.c
@@ -0,0 +1,796 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2010 Luigi Rizzo, Riccardo Panicucci, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Dummynet portions related to packet handling.
+ */
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/rwlock.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/time.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/net/if.h>	/* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
+#include <freebsd/net/netisr.h>
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip.h>		/* ip_len, ip_off */
+#include <freebsd/netinet/ip_var.h>	/* ip_output(), IP_FORWARDING */
+#include <freebsd/netinet/ip_fw.h>
+#include <freebsd/netinet/ipfw/ip_fw_private.h>
+#include <freebsd/netinet/ipfw/dn_heap.h>
+#include <freebsd/netinet/ip_dummynet.h>
+#include <freebsd/netinet/ipfw/ip_dn_private.h>
+#include <freebsd/netinet/ipfw/dn_sched.h>
+
+#include <freebsd/netinet/if_ether.h> /* various ether_* routines */
+
+#include <freebsd/netinet/ip6.h>       /* for ip6_input, ip6_output prototypes */
+#include <freebsd/netinet6/ip6_var.h>
+
+/*
+ * We keep a private variable for the simulation time, but we could
+ * probably use an existing one ("softticks" in sys/kern/kern_timeout.c)
+ * instead of dn_cfg.curr_time
+ */
+
+struct dn_parms dn_cfg;
+
+static long tick_last;		/* Last tick duration (usec). */
+static long tick_delta;		/* Last vs standard tick diff (usec). */
+static long tick_delta_sum;	/* Accumulated tick difference (usec).*/
+static long tick_adjustment;	/* Tick adjustments done. */
+static long tick_lost;		/* Lost(coalesced) ticks number. */
+/* Adjusted vs non-adjusted curr_time difference (ticks). */
+static long tick_diff;
+
+static unsigned long	io_pkt;
+static unsigned long	io_pkt_fast;
+static unsigned long	io_pkt_drop;
+
+/*
+ * We use a heap to store entities for which we have pending timer events.
+ * The heap is checked at every tick and all entities with expired events
+ * are extracted.
+ */
+  
+MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap");
+
+extern	void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
+
+#ifdef SYSCTL_NODE
+
+SYSBEGIN(f4)
+
+SYSCTL_DECL(_net_inet);
+SYSCTL_DECL(_net_inet_ip);
+SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
+
+/* parameters */
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size,
+    CTLFLAG_RW, &dn_cfg.hash_size, 0, "Default hash table size");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit,
+    CTLFLAG_RW, &dn_cfg.slot_limit, 0,
+    "Upper limit in slots for pipe queue.");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_byte_limit,
+    CTLFLAG_RW, &dn_cfg.byte_limit, 0,
+    "Upper limit in bytes for pipe queue.");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast,
+    CTLFLAG_RW, &dn_cfg.io_fast, 0, "Enable fast dummynet io.");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug,
+    CTLFLAG_RW, &dn_cfg.debug, 0, "Dummynet debug level");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire,
+    CTLFLAG_RW, &dn_cfg.expire, 0, "Expire empty queues/pipes");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire_cycle,
+    CTLFLAG_RD, &dn_cfg.expire_cycle, 0, "Expire cycle for queues/pipes");
+
+/* RED parameters */
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth,
+    CTLFLAG_RD, &dn_cfg.red_lookup_depth, 0, "Depth of RED lookup table");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size,
+    CTLFLAG_RD, &dn_cfg.red_avg_pkt_size, 0, "RED Medium packet size");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size,
+    CTLFLAG_RD, &dn_cfg.red_max_pkt_size, 0, "RED Max packet size");
+
+/* time adjustment */
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta,
+    CTLFLAG_RD, &tick_delta, 0, "Last vs standard tick difference (usec).");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta_sum,
+    CTLFLAG_RD, &tick_delta_sum, 0, "Accumulated tick difference (usec).");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_adjustment,
+    CTLFLAG_RD, &tick_adjustment, 0, "Tick adjustments done.");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_diff,
+    CTLFLAG_RD, &tick_diff, 0,
+    "Adjusted vs non-adjusted curr_time difference (ticks).");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost,
+    CTLFLAG_RD, &tick_lost, 0,
+    "Number of ticks coalesced by dummynet taskqueue.");
+
+/* statistics */
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, schk_count,
+    CTLFLAG_RD, &dn_cfg.schk_count, 0, "Number of schedulers");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, si_count,
+    CTLFLAG_RD, &dn_cfg.si_count, 0, "Number of scheduler instances");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, fsk_count,
+    CTLFLAG_RD, &dn_cfg.fsk_count, 0, "Number of flowsets");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, queue_count,
+    CTLFLAG_RD, &dn_cfg.queue_count, 0, "Number of queues");
+SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt,
+    CTLFLAG_RD, &io_pkt, 0,
+    "Number of packets passed to dummynet.");
+SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast,
+    CTLFLAG_RD, &io_pkt_fast, 0,
+    "Number of packets bypassed dummynet scheduler.");
+SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop,
+    CTLFLAG_RD, &io_pkt_drop, 0,
+    "Number of packets dropped by dummynet.");
+
+SYSEND
+
+#endif
+
+static void	dummynet_send(struct mbuf *);
+
+/*
+ * Packets processed by dummynet have an mbuf tag associated with
+ * them that carries their dummynet state.
+ * Outside dummynet, only the 'rule' field is relevant, and it must
+ * be at the beginning of the structure.
+ */
+struct dn_pkt_tag {
+	struct ipfw_rule_ref rule;	/* matching rule	*/
+
+	/* second part, dummynet specific */
+	int dn_dir;		/* action when packet comes out.*/
+				/* see ip_fw_private.h		*/
+	uint64_t output_time;	/* when the pkt is due for delivery*/
+	struct ifnet *ifp;	/* interface, for ip_output	*/
+	struct _ip6dn_args ip6opt;	/* XXX ipv6 options	*/
+};
+
+/*
+ * Return the mbuf tag holding the dummynet state (it should
+ * be the first one on the list).
+ */
+static struct dn_pkt_tag *
+dn_tag_get(struct mbuf *m)
+{
+	struct m_tag *mtag = m_tag_first(m);
+	KASSERT(mtag != NULL &&
+	    mtag->m_tag_cookie == MTAG_ABI_COMPAT &&
+	    mtag->m_tag_id == PACKET_TAG_DUMMYNET,
+	    ("packet on dummynet queue w/o dummynet tag!"));
+	return (struct dn_pkt_tag *)(mtag+1);
+}
+
+static inline void
+mq_append(struct mq *q, struct mbuf *m)
+{
+	if (q->head == NULL)
+		q->head = m;
+	else
+		q->tail->m_nextpkt = m;
+	q->tail = m;
+	m->m_nextpkt = NULL;
+}
+
+/*
+ * Dispose a list of packet. Use a functions so if we need to do
+ * more work, this is a central point to do it.
+ */
+void dn_free_pkts(struct mbuf *mnext)
+{
+        struct mbuf *m;
+    
+        while ((m = mnext) != NULL) {
+                mnext = m->m_nextpkt;
+                FREE_PKT(m);
+        }
+}
+
+static int
+red_drops (struct dn_queue *q, int len)
+{
+	/*
+	 * RED algorithm
+	 *
+	 * RED calculates the average queue size (avg) using a low-pass filter
+	 * with an exponential weighted (w_q) moving average:
+	 * 	avg  <-  (1-w_q) * avg + w_q * q_size
+	 * where q_size is the queue length (measured in bytes or * packets).
+	 *
+	 * If q_size == 0, we compute the idle time for the link, and set
+	 *	avg = (1 - w_q)^(idle/s)
+	 * where s is the time needed for transmitting a medium-sized packet.
+	 *
+	 * Now, if avg < min_th the packet is enqueued.
+	 * If avg > max_th the packet is dropped. Otherwise, the packet is
+	 * dropped with probability P function of avg.
+	 */
+
+	struct dn_fsk *fs = q->fs;
+	int64_t p_b = 0;
+
+	/* Queue in bytes or packets? */
+	uint32_t q_size = (fs->fs.flags & DN_QSIZE_BYTES) ?
+	    q->ni.len_bytes : q->ni.length;
+
+	/* Average queue size estimation. */
+	if (q_size != 0) {
+		/* Queue is not empty, avg <- avg + (q_size - avg) * w_q */
+		int diff = SCALE(q_size) - q->avg;
+		int64_t v = SCALE_MUL((int64_t)diff, (int64_t)fs->w_q);
+
+		q->avg += (int)v;
+	} else {
+		/*
+		 * Queue is empty, find for how long the queue has been
+		 * empty and use a lookup table for computing
+		 * (1 - * w_q)^(idle_time/s) where s is the time to send a
+		 * (small) packet.
+		 * XXX check wraps...
+		 */
+		if (q->avg) {
+			u_int t = div64((dn_cfg.curr_time - q->q_time), fs->lookup_step);
+
+			q->avg = (t < fs->lookup_depth) ?
+			    SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0;
+		}
+	}
+
+	/* Should i drop? */
+	if (q->avg < fs->min_th) {
+		q->count = -1;
+		return (0);	/* accept packet */
+	}
+	if (q->avg >= fs->max_th) {	/* average queue >=  max threshold */
+		if (fs->fs.flags & DN_IS_GENTLE_RED) {
+			/*
+			 * According to Gentle-RED, if avg is greater than
+			 * max_th the packet is dropped with a probability
+			 *	 p_b = c_3 * avg - c_4
+			 * where c_3 = (1 - max_p) / max_th
+			 *       c_4 = 1 - 2 * max_p
+			 */
+			p_b = SCALE_MUL((int64_t)fs->c_3, (int64_t)q->avg) -
+			    fs->c_4;
+		} else {
+			q->count = -1;
+			return (1);
+		}
+	} else if (q->avg > fs->min_th) {
+		/*
+		 * We compute p_b using the linear dropping function
+		 *	 p_b = c_1 * avg - c_2
+		 * where c_1 = max_p / (max_th - min_th)
+		 * 	 c_2 = max_p * min_th / (max_th - min_th)
+		 */
+		p_b = SCALE_MUL((int64_t)fs->c_1, (int64_t)q->avg) - fs->c_2;
+	}
+
+	if (fs->fs.flags & DN_QSIZE_BYTES)
+		p_b = div64((p_b * len) , fs->max_pkt_size);
+	if (++q->count == 0)
+		q->random = random() & 0xffff;
+	else {
+		/*
+		 * q->count counts packets arrived since last drop, so a greater
+		 * value of q->count means a greater packet drop probability.
+		 */
+		if (SCALE_MUL(p_b, SCALE((int64_t)q->count)) > q->random) {
+			q->count = 0;
+			/* After a drop we calculate a new random value. */
+			q->random = random() & 0xffff;
+			return (1);	/* drop */
+		}
+	}
+	/* End of RED algorithm. */
+
+	return (0);	/* accept */
+
+}
+
+/*
+ * Enqueue a packet in q, subject to space and queue management policy
+ * (whose parameters are in q->fs).
+ * Update stats for the queue and the scheduler.
+ * Return 0 on success, 1 on drop. The packet is consumed anyways.
+ */
+int
+dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop)
+{   
+	struct dn_fs *f;
+	struct dn_flow *ni;	/* stats for scheduler instance */
+	uint64_t len;
+
+	if (q->fs == NULL || q->_si == NULL) {
+		printf("%s fs %p si %p, dropping\n",
+			__FUNCTION__, q->fs, q->_si);
+		FREE_PKT(m);
+		return 1;
+	}
+	f = &(q->fs->fs);
+	ni = &q->_si->ni;
+	len = m->m_pkthdr.len;
+	/* Update statistics, then check reasons to drop pkt. */
+	q->ni.tot_bytes += len;
+	q->ni.tot_pkts++;
+	ni->tot_bytes += len;
+	ni->tot_pkts++;
+	if (drop)
+		goto drop;
+	if (f->plr && random() < f->plr)
+		goto drop;
+	if (f->flags & DN_IS_RED && red_drops(q, m->m_pkthdr.len))
+		goto drop;
+	if (f->flags & DN_QSIZE_BYTES) {
+		if (q->ni.len_bytes > f->qsize)
+			goto drop;
+	} else if (q->ni.length >= f->qsize) {
+		goto drop;
+	}
+	mq_append(&q->mq, m);
+	q->ni.length++;
+	q->ni.len_bytes += len;
+	ni->length++;
+	ni->len_bytes += len;
+	return 0;
+
+drop:
+	io_pkt_drop++;
+	q->ni.drops++;
+	ni->drops++;
+	FREE_PKT(m);
+	return 1;
+}
+
+/*
+ * Fetch packets from the delay line which are due now. If there are
+ * leftover packets, reinsert the delay line in the heap.
+ * Runs under scheduler lock.
+ */
+static void
+transmit_event(struct mq *q, struct delay_line *dline, uint64_t now)
+{
+	struct mbuf *m;
+	struct dn_pkt_tag *pkt = NULL;
+
+	dline->oid.subtype = 0; /* not in heap */
+	while ((m = dline->mq.head) != NULL) {
+		pkt = dn_tag_get(m);
+		if (!DN_KEY_LEQ(pkt->output_time, now))
+			break;
+		dline->mq.head = m->m_nextpkt;
+		mq_append(q, m);
+	}
+	if (m != NULL) {
+		dline->oid.subtype = 1; /* in heap */
+		heap_insert(&dn_cfg.evheap, pkt->output_time, dline);
+	}
+}
+
+/*
+ * Convert the additional MAC overheads/delays into an equivalent
+ * number of bits for the given data rate. The samples are
+ * in milliseconds so we need to divide by 1000.
+ */
+static uint64_t
+extra_bits(struct mbuf *m, struct dn_schk *s)
+{
+	int index;
+	uint64_t bits;
+	struct dn_profile *pf = s->profile;
+
+	if (!pf || pf->samples_no == 0)
+		return 0;
+	index  = random() % pf->samples_no;
+	bits = div64((uint64_t)pf->samples[index] * s->link.bandwidth, 1000);
+	if (index >= pf->loss_level) {
+		struct dn_pkt_tag *dt = dn_tag_get(m);
+		if (dt)
+			dt->dn_dir = DIR_DROP;
+	}
+	return bits;
+}
+
+/*
+ * Send traffic from a scheduler instance due by 'now'.
+ * Return a pointer to the head of the queue.
+ */
+static struct mbuf *
+serve_sched(struct mq *q, struct dn_sch_inst *si, uint64_t now)
+{
+	struct mq def_q;
+	struct dn_schk *s = si->sched;
+	struct mbuf *m = NULL;
+	int delay_line_idle = (si->dline.mq.head == NULL);
+	int done, bw;
+
+	if (q == NULL) {
+		q = &def_q;
+		q->head = NULL;
+	}
+
+	bw = s->link.bandwidth;
+	si->kflags &= ~DN_ACTIVE;
+
+	if (bw > 0)
+		si->credit += (now - si->sched_time) * bw;
+	else
+		si->credit = 0;
+	si->sched_time = now;
+	done = 0;
+	while (si->credit >= 0 && (m = s->fp->dequeue(si)) != NULL) {
+		uint64_t len_scaled;
+		done++;
+		len_scaled = (bw == 0) ? 0 : hz *
+		    (m->m_pkthdr.len * 8 + extra_bits(m, s));
+		si->credit -= len_scaled;
+		/* Move packet in the delay line */
+		dn_tag_get(m)->output_time += s->link.delay ;
+		mq_append(&si->dline.mq, m);
+	}
+	/*
+	 * If credit >= 0 the instance is idle, mark time.
+	 * Otherwise put back in the heap, and adjust the output
+	 * time of the last inserted packet, m, which was too early.
+	 */
+	if (si->credit >= 0) {
+		si->idle_time = now;
+	} else {
+		uint64_t t;
+		KASSERT (bw > 0, ("bw=0 and credit<0 ?"));
+		t = div64(bw - 1 - si->credit, bw);
+		if (m)
+			dn_tag_get(m)->output_time += t;
+		si->kflags |= DN_ACTIVE;
+		heap_insert(&dn_cfg.evheap, now + t, si);
+	}
+	if (delay_line_idle && done)
+		transmit_event(q, &si->dline, now);
+	return q->head;
+}
+
+/*
+ * The timer handler for dummynet. Time is computed in ticks, but
+ * but the code is tolerant to the actual rate at which this is called.
+ * Once complete, the function reschedules itself for the next tick.
+ */
+void
+dummynet_task(void *context, int pending)
+{
+	struct timeval t;
+	struct mq q = { NULL, NULL }; /* queue to accumulate results */
+
+	DN_BH_WLOCK();
+
+	/* Update number of lost(coalesced) ticks. */
+	tick_lost += pending - 1;
+
+	getmicrouptime(&t);
+	/* Last tick duration (usec). */
+	tick_last = (t.tv_sec - dn_cfg.prev_t.tv_sec) * 1000000 +
+	(t.tv_usec - dn_cfg.prev_t.tv_usec);
+	/* Last tick vs standard tick difference (usec). */
+	tick_delta = (tick_last * hz - 1000000) / hz;
+	/* Accumulated tick difference (usec). */
+	tick_delta_sum += tick_delta;
+
+	dn_cfg.prev_t = t;
+
+	/*
+	* Adjust curr_time if the accumulated tick difference is
+	* greater than the 'standard' tick. Since curr_time should
+	* be monotonically increasing, we do positive adjustments
+	* as required, and throttle curr_time in case of negative
+	* adjustment.
+	*/
+	dn_cfg.curr_time++;
+	if (tick_delta_sum - tick >= 0) {
+		int diff = tick_delta_sum / tick;
+
+		dn_cfg.curr_time += diff;
+		tick_diff += diff;
+		tick_delta_sum %= tick;
+		tick_adjustment++;
+	} else if (tick_delta_sum + tick <= 0) {
+		dn_cfg.curr_time--;
+		tick_diff--;
+		tick_delta_sum += tick;
+		tick_adjustment++;
+	}
+
+	/* serve pending events, accumulate in q */
+	for (;;) {
+		struct dn_id *p;    /* generic parameter to handler */
+
+		if (dn_cfg.evheap.elements == 0 ||
+		    DN_KEY_LT(dn_cfg.curr_time, HEAP_TOP(&dn_cfg.evheap)->key))
+			break;
+		p = HEAP_TOP(&dn_cfg.evheap)->object;
+		heap_extract(&dn_cfg.evheap, NULL);
+
+		if (p->type == DN_SCH_I) {
+			serve_sched(&q, (struct dn_sch_inst *)p, dn_cfg.curr_time);
+		} else { /* extracted a delay line */
+			transmit_event(&q, (struct delay_line *)p, dn_cfg.curr_time);
+		}
+	}
+	if (dn_cfg.expire && ++dn_cfg.expire_cycle >= dn_cfg.expire) {
+		dn_cfg.expire_cycle = 0;
+		dn_drain_scheduler();
+		dn_drain_queue();
+	}
+
+	DN_BH_WUNLOCK();
+	dn_reschedule();
+	if (q.head != NULL)
+		dummynet_send(q.head);
+}
+
+/*
+ * forward a chain of packets to the proper destination.
+ * This runs outside the dummynet lock.
+ */
+static void
+dummynet_send(struct mbuf *m)
+{
+	struct mbuf *n;
+
+	for (; m != NULL; m = n) {
+		struct ifnet *ifp = NULL;	/* gcc 3.4.6 complains */
+        	struct m_tag *tag;
+		int dst;
+
+		n = m->m_nextpkt;
+		m->m_nextpkt = NULL;
+		tag = m_tag_first(m);
+		if (tag == NULL) { /* should not happen */
+			dst = DIR_DROP;
+		} else {
+			struct dn_pkt_tag *pkt = dn_tag_get(m);
+			/* extract the dummynet info, rename the tag
+			 * to carry reinject info.
+			 */
+			dst = pkt->dn_dir;
+			ifp = pkt->ifp;
+			tag->m_tag_cookie = MTAG_IPFW_RULE;
+			tag->m_tag_id = 0;
+		}
+
+		switch (dst) {
+		case DIR_OUT:
+			SET_HOST_IPLEN(mtod(m, struct ip *));
+			ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
+			break ;
+
+		case DIR_IN :
+			/* put header in network format for ip_input() */
+			//SET_NET_IPLEN(mtod(m, struct ip *));
+			netisr_dispatch(NETISR_IP, m);
+			break;
+
+#ifdef INET6
+		case DIR_IN | PROTO_IPV6:
+			netisr_dispatch(NETISR_IPV6, m);
+			break;
+
+		case DIR_OUT | PROTO_IPV6:
+			SET_HOST_IPLEN(mtod(m, struct ip *));
+			ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL);
+			break;
+#endif
+
+		case DIR_FWD | PROTO_IFB: /* DN_TO_IFB_FWD: */
+			if (bridge_dn_p != NULL)
+				((*bridge_dn_p)(m, ifp));
+			else
+				printf("dummynet: if_bridge not loaded\n");
+
+			break;
+
+		case DIR_IN | PROTO_LAYER2: /* DN_TO_ETH_DEMUX: */
+			/*
+			 * The Ethernet code assumes the Ethernet header is
+			 * contiguous in the first mbuf header.
+			 * Insure this is true.
+			 */
+			if (m->m_len < ETHER_HDR_LEN &&
+			    (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
+				printf("dummynet/ether: pullup failed, "
+				    "dropping packet\n");
+				break;
+			}
+			ether_demux(m->m_pkthdr.rcvif, m);
+			break;
+
+		case DIR_OUT | PROTO_LAYER2: /* N_TO_ETH_OUT: */
+			ether_output_frame(ifp, m);
+			break;
+
+		case DIR_DROP:
+			/* drop the packet after some time */
+			FREE_PKT(m);
+			break;
+
+		default:
+			printf("dummynet: bad switch %d!\n", dst);
+			FREE_PKT(m);
+			break;
+		}
+	}
+}
+
+static inline int
+tag_mbuf(struct mbuf *m, int dir, struct ip_fw_args *fwa)
+{
+	struct dn_pkt_tag *dt;
+	struct m_tag *mtag;
+
+	mtag = m_tag_get(PACKET_TAG_DUMMYNET,
+		    sizeof(*dt), M_NOWAIT | M_ZERO);
+	if (mtag == NULL)
+		return 1;		/* Cannot allocate packet header. */
+	m_tag_prepend(m, mtag);		/* Attach to mbuf chain. */
+	dt = (struct dn_pkt_tag *)(mtag + 1);
+	dt->rule = fwa->rule;
+	dt->rule.info &= IPFW_ONEPASS;	/* only keep this info */
+	dt->dn_dir = dir;
+	dt->ifp = fwa->oif;
+	/* dt->output tame is updated as we move through */
+	dt->output_time = dn_cfg.curr_time;
+	return 0;
+}
+
+
+/*
+ * dummynet hook for packets.
+ * We use the argument to locate the flowset fs and the sched_set sch
+ * associated to it. The we apply flow_mask and sched_mask to
+ * determine the queue and scheduler instances.
+ *
+ * dir		where shall we send the packet after dummynet.
+ * *m0		the mbuf with the packet
+ * ifp		the 'ifp' parameter from the caller.
+ *		NULL in ip_input, destination interface in ip_output,
+ */
+int
+dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa)
+{
+	struct mbuf *m = *m0;
+	struct dn_fsk *fs = NULL;
+	struct dn_sch_inst *si;
+	struct dn_queue *q = NULL;	/* default */
+
+	int fs_id = (fwa->rule.info & IPFW_INFO_MASK) +
+		((fwa->rule.info & IPFW_IS_PIPE) ? 2*DN_MAX_ID : 0);
+	DN_BH_WLOCK();
+	io_pkt++;
+	/* we could actually tag outside the lock, but who cares... */
+	if (tag_mbuf(m, dir, fwa))
+		goto dropit;
+	if (dn_cfg.busy) {
+		/* if the upper half is busy doing something expensive,
+		 * lets queue the packet and move forward
+		 */
+		mq_append(&dn_cfg.pending, m);
+		m = *m0 = NULL; /* consumed */
+		goto done; /* already active, nothing to do */
+	}
+	/* XXX locate_flowset could be optimised with a direct ref. */
+	fs = dn_ht_find(dn_cfg.fshash, fs_id, 0, NULL);
+	if (fs == NULL)
+		goto dropit;	/* This queue/pipe does not exist! */
+	if (fs->sched == NULL)	/* should not happen */
+		goto dropit;
+	/* find scheduler instance, possibly applying sched_mask */
+	si = ipdn_si_find(fs->sched, &(fwa->f_id));
+	if (si == NULL)
+		goto dropit;
+	/*
+	 * If the scheduler supports multiple queues, find the right one
+	 * (otherwise it will be ignored by enqueue).
+	 */
+	if (fs->sched->fp->flags & DN_MULTIQUEUE) {
+		q = ipdn_q_find(fs, si, &(fwa->f_id));
+		if (q == NULL)
+			goto dropit;
+	}
+	if (fs->sched->fp->enqueue(si, q, m)) {
+		/* packet was dropped by enqueue() */
+		m = *m0 = NULL;
+		goto dropit;
+	}
+
+	if (si->kflags & DN_ACTIVE) {
+		m = *m0 = NULL; /* consumed */
+		goto done; /* already active, nothing to do */
+	}
+
+	/* compute the initial allowance */
+	if (si->idle_time < dn_cfg.curr_time) {
+	    /* Do this only on the first packet on an idle pipe */
+	    struct dn_link *p = &fs->sched->link;
+
+	    si->sched_time = dn_cfg.curr_time;
+	    si->credit = dn_cfg.io_fast ? p->bandwidth : 0;
+	    if (p->burst) {
+		uint64_t burst = (dn_cfg.curr_time - si->idle_time) * p->bandwidth;
+		if (burst > p->burst)
+			burst = p->burst;
+		si->credit += burst;
+	    }
+	}
+	/* pass through scheduler and delay line */
+	m = serve_sched(NULL, si, dn_cfg.curr_time);
+
+	/* optimization -- pass it back to ipfw for immediate send */
+	/* XXX Don't call dummynet_send() if scheduler return the packet
+	 *     just enqueued. This avoid a lock order reversal.
+	 *     
+	 */
+	if (/*dn_cfg.io_fast &&*/ m == *m0 && (dir & PROTO_LAYER2) == 0 ) {
+		/* fast io, rename the tag * to carry reinject info. */
+		struct m_tag *tag = m_tag_first(m);
+
+		tag->m_tag_cookie = MTAG_IPFW_RULE;
+		tag->m_tag_id = 0;
+		io_pkt_fast++;
+		if (m->m_nextpkt != NULL) {
+			printf("dummynet: fast io: pkt chain detected!\n");
+			m->m_nextpkt = NULL;
+		}
+		m = NULL;
+	} else {
+		*m0 = NULL;
+	}
+done:
+	DN_BH_WUNLOCK();
+	if (m)
+		dummynet_send(m);
+	return 0;
+
+dropit:
+	io_pkt_drop++;
+	DN_BH_WUNLOCK();
+	if (m)
+		FREE_PKT(m);
+	*m0 = NULL;
+	return (fs && (fs->fs.flags & DN_NOERROR)) ? 0 : ENOBUFS;
+}
diff --git a/freebsd/sys/netinet/ipfw/ip_dn_private.h b/freebsd/sys/netinet/ipfw/ip_dn_private.h
new file mode 100644
index 00000000..270f1881
--- /dev/null
+++ b/freebsd/sys/netinet/ipfw/ip_dn_private.h
@@ -0,0 +1,402 @@
+/*-
+ * Copyright (c) 2010 Luigi Rizzo, Riccardo Panicucci, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * internal dummynet APIs.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_DN_PRIVATE_H
+#define _IP_DN_PRIVATE_H
+
+/* debugging support
+ * use ND() to remove debugging, D() to print a line,
+ * DX(level, ...) to print above a certain level
+ * If you redefine D() you are expected to redefine all.
+ */
+#ifndef D
+#define ND(fmt, ...) do {} while (0)
+#define D1(fmt, ...) do {} while (0)
+#define D(fmt, ...) printf("%-10s " fmt "\n",      \
+        __FUNCTION__, ## __VA_ARGS__)
+#define DX(lev, fmt, ...) do {              \
+        if (dn_cfg.debug > lev) D(fmt, ## __VA_ARGS__); } while (0)
+#endif
+
+MALLOC_DECLARE(M_DUMMYNET);
+
+#ifndef FREE_PKT
+#define	FREE_PKT(m)	m_freem(m)
+#endif
+
+#ifndef __linux__
+#define div64(a, b)  ((int64_t)(a) / (int64_t)(b))
+#endif
+
+#define DN_LOCK_INIT() do {				\
+	mtx_init(&dn_cfg.uh_mtx, "dn_uh", NULL, MTX_DEF);	\
+	mtx_init(&dn_cfg.bh_mtx, "dn_bh", NULL, MTX_DEF);	\
+	} while (0)
+#define DN_LOCK_DESTROY() do {				\
+	mtx_destroy(&dn_cfg.uh_mtx);			\
+	mtx_destroy(&dn_cfg.bh_mtx);			\
+	} while (0)
+#if 0 /* not used yet */
+#define DN_UH_RLOCK()		mtx_lock(&dn_cfg.uh_mtx)
+#define DN_UH_RUNLOCK()		mtx_unlock(&dn_cfg.uh_mtx)
+#define DN_UH_WLOCK()		mtx_lock(&dn_cfg.uh_mtx)
+#define DN_UH_WUNLOCK()		mtx_unlock(&dn_cfg.uh_mtx)
+#define DN_UH_LOCK_ASSERT()	mtx_assert(&dn_cfg.uh_mtx, MA_OWNED)
+#endif
+
+#define DN_BH_RLOCK()		mtx_lock(&dn_cfg.uh_mtx)
+#define DN_BH_RUNLOCK()		mtx_unlock(&dn_cfg.uh_mtx)
+#define DN_BH_WLOCK()		mtx_lock(&dn_cfg.uh_mtx)
+#define DN_BH_WUNLOCK()		mtx_unlock(&dn_cfg.uh_mtx)
+#define DN_BH_LOCK_ASSERT()	mtx_assert(&dn_cfg.uh_mtx, MA_OWNED)
+
+SLIST_HEAD(dn_schk_head, dn_schk);
+SLIST_HEAD(dn_sch_inst_head, dn_sch_inst);
+SLIST_HEAD(dn_fsk_head, dn_fsk);
+SLIST_HEAD(dn_queue_head, dn_queue);
+SLIST_HEAD(dn_alg_head, dn_alg);
+
+struct mq {	/* a basic queue of packets*/
+        struct mbuf *head, *tail;
+};
+
+static inline void
+set_oid(struct dn_id *o, int type, int len)
+{
+        o->type = type;
+        o->len = len;
+        o->subtype = 0;
+};
+
+/*
+ * configuration and global data for a dummynet instance
+ *
+ * When a configuration is modified from userland, 'id' is incremented
+ * so we can use the value to check for stale pointers.
+ */
+struct dn_parms {
+	uint32_t	id;		/* configuration version */
+
+	/* defaults (sysctl-accessible) */
+	int	red_lookup_depth;
+	int	red_avg_pkt_size;
+	int	red_max_pkt_size;
+	int	hash_size;
+	int	max_hash_size;
+	long	byte_limit;		/* max queue sizes */
+	long	slot_limit;
+
+	int	io_fast;
+	int	debug;
+
+	/* timekeeping */
+	struct timeval prev_t;		/* last time dummynet_tick ran */
+	struct dn_heap	evheap;		/* scheduled events */
+
+	/* counters of objects -- used for reporting space */
+	int	schk_count;
+	int	si_count;
+	int	fsk_count;
+	int	queue_count;
+
+	/* ticks and other stuff */
+	uint64_t	curr_time;
+	/* flowsets and schedulers are in hash tables, with 'hash_size'
+	 * buckets. fshash is looked up at every packet arrival
+	 * so better be generous if we expect many entries.
+	 */
+	struct dn_ht	*fshash;
+	struct dn_ht	*schedhash;
+	/* list of flowsets without a scheduler -- use sch_chain */
+	struct dn_fsk_head	fsu;	/* list of unlinked flowsets */
+	struct dn_alg_head	schedlist;	/* list of algorithms */
+
+	/* Store the fs/sch to scan when draining. The value is the
+	 * bucket number of the hash table. Expire can be disabled
+	 * with net.inet.ip.dummynet.expire=0, or it happens every
+	 * expire ticks.
+	 **/
+	int drain_fs;
+	int drain_sch;
+	uint32_t expire;
+	uint32_t expire_cycle;	/* tick count */
+	
+	/* if the upper half is busy doing something long,
+	 * can set the busy flag and we will enqueue packets in
+	 * a queue for later processing.
+	 */
+	int	busy;
+	struct	mq	pending;
+
+#ifdef _KERNEL
+	/*
+	 * This file is normally used in the kernel, unless we do
+	 * some userland tests, in which case we do not need a mtx.
+	 * uh_mtx arbitrates between system calls and also
+	 * protects fshash, schedhash and fsunlinked.
+	 * These structures are readonly for the lower half.
+	 * bh_mtx protects all other structures which may be
+	 * modified upon packet arrivals
+	 */
+#if defined( __linux__ ) || defined( _WIN32 )
+	spinlock_t uh_mtx;
+	spinlock_t bh_mtx;
+#else
+	struct mtx uh_mtx;
+	struct mtx bh_mtx;
+#endif
+
+#endif /* _KERNEL */
+};
+
+/*
+ * Delay line, contains all packets on output from a link.
+ * Every scheduler instance has one.
+ */
+struct delay_line {
+	struct dn_id oid;
+	struct dn_sch_inst *si;
+	struct mq mq;
+};
+
+/*
+ * The kernel side of a flowset. It is linked in a hash table
+ * of flowsets, and in a list of children of their parent scheduler.
+ * qht is either the queue or (if HAVE_MASK) a hash table queues.
+ * Note that the mask to use is the (flow_mask|sched_mask), which
+ * changes as we attach/detach schedulers. So we store it here.
+ *
+ * XXX If we want to add scheduler-specific parameters, we need to
+ * put them in external storage because the scheduler may not be
+ * available when the fsk is created.
+ */
+struct dn_fsk { /* kernel side of a flowset */
+	struct dn_fs fs;
+	SLIST_ENTRY(dn_fsk) fsk_next;	/* hash chain for fshash */
+
+	struct ipfw_flow_id fsk_mask;
+
+	/* qht is a hash table of queues, or just a single queue
+	 * a bit in fs.flags tells us which one
+	 */
+	struct dn_ht	*qht;
+	struct dn_schk *sched;		/* Sched we are linked to */
+	SLIST_ENTRY(dn_fsk) sch_chain;	/* list of fsk attached to sched */
+
+	/* bucket index used by drain routine to drain queues for this
+	 * flowset
+	 */
+	int drain_bucket;
+	/* Parameter realted to RED / GRED */
+	/* original values are in dn_fs*/
+	int w_q ;		/* queue weight (scaled) */
+	int max_th ;		/* maximum threshold for queue (scaled) */
+	int min_th ;		/* minimum threshold for queue (scaled) */
+	int max_p ;		/* maximum value for p_b (scaled) */
+
+	u_int c_1 ;		/* max_p/(max_th-min_th) (scaled) */
+	u_int c_2 ;		/* max_p*min_th/(max_th-min_th) (scaled) */
+	u_int c_3 ;		/* for GRED, (1-max_p)/max_th (scaled) */
+	u_int c_4 ;		/* for GRED, 1 - 2*max_p (scaled) */
+	u_int * w_q_lookup ;	/* lookup table for computing (1-w_q)^t */
+	u_int lookup_depth ;	/* depth of lookup table */
+	int lookup_step ;	/* granularity inside the lookup table */
+	int lookup_weight ;	/* equal to (1-w_q)^t / (1-w_q)^(t+1) */
+	int avg_pkt_size ;	/* medium packet size */
+	int max_pkt_size ;	/* max packet size */
+};
+
+/*
+ * A queue is created as a child of a flowset unless it belongs to
+ * a !MULTIQUEUE scheduler. It is normally in a hash table in the
+ * flowset. fs always points to the parent flowset.
+ * si normally points to the sch_inst, unless the flowset has been
+ * detached from the scheduler -- in this case si == NULL and we
+ * should not enqueue.
+ */
+struct dn_queue {
+	struct dn_flow ni;	/* oid, flow_id, stats */
+	struct mq mq;	/* packets queue */
+	struct dn_sch_inst *_si;	/* owner scheduler instance */
+	SLIST_ENTRY(dn_queue) q_next; /* hash chain list for qht */
+	struct dn_fsk *fs;		/* parent flowset. */
+
+	/* RED parameters */
+	int avg;		/* average queue length est. (scaled) */
+	int count;		/* arrivals since last RED drop */
+	int random;		/* random value (scaled) */
+	uint64_t q_time;	/* start of queue idle time */
+
+};
+
+/*
+ * The kernel side of a scheduler. Contains the userland config,
+ * a link, pointer to extra config arguments from command line,
+ * kernel flags, and a pointer to the scheduler methods.
+ * It is stored in a hash table, and holds a list of all
+ * flowsets and scheduler instances.
+ * XXX sch must be at the beginning, see schk_hash().
+ */
+struct dn_schk {
+	struct dn_sch sch;
+	struct dn_alg *fp;	/* Pointer to scheduler functions */
+	struct dn_link link;	/* The link, embedded */
+	struct dn_profile *profile; /* delay profile, if any */
+	struct dn_id *cfg;	/* extra config arguments */
+
+	SLIST_ENTRY(dn_schk) schk_next;  /* hash chain for schedhash */
+
+	struct dn_fsk_head fsk_list;  /* all fsk linked to me */
+	struct dn_fsk *fs;	/* Flowset for !MULTIQUEUE */
+
+	/* bucket index used by the drain routine to drain the scheduler
+	 * instance for this flowset.
+	 */
+	int drain_bucket;
+
+	/* Hash table of all instances (through sch.sched_mask)
+	 * or single instance if no mask. Always valid.
+	 */
+	struct dn_ht	*siht;
+};
+
+
+/*
+ * Scheduler instance.
+ * Contains variables and all queues relative to a this instance.
+ * This struct is created a runtime.
+ */
+struct dn_sch_inst {
+	struct dn_flow	ni;	/* oid, flowid and stats */
+	SLIST_ENTRY(dn_sch_inst) si_next; /* hash chain for siht */
+	struct delay_line dline;
+	struct dn_schk *sched;	/* the template */
+	int		kflags;	/* DN_ACTIVE */
+
+	int64_t	credit;		/* bits I can transmit (more or less). */
+	uint64_t sched_time;	/* time link was scheduled in ready_heap */
+	uint64_t idle_time;	/* start of scheduler instance idle time */
+
+	/* q_count is the number of queues that this instance is using.
+	 * The counter is incremented or decremented when
+	 * a reference from the queue is created or deleted.
+	 * It is used to make sure that a scheduler instance can be safely
+	 * deleted by the drain routine. See notes below.
+	 */
+	int q_count;
+
+};
+
+/*
+ * NOTE about object drain.
+ * The system will automatically (XXX check when) drain queues and
+ * scheduler instances when they are idle.
+ * A queue is idle when it has no packets; an instance is idle when
+ * it is not in the evheap heap, and the corresponding delay line is empty.
+ * A queue can be safely deleted when it is idle because of the scheduler
+ * function xxx_free_queue() will remove any references to it.
+ * An instance can be only deleted when no queues reference it. To be sure
+ * of that, a counter (q_count) stores the number of queues that are pointing
+ * to the instance.
+ *
+ * XXX
+ * Order of scan:
+ * - take all flowset in a bucket for the flowset hash table
+ * - take all queues in a bucket for the flowset
+ * - increment the queue bucket
+ * - scan next flowset bucket
+ * Nothing is done if a bucket contains no entries.
+ *
+ * The same schema is used for sceduler instances
+ */
+
+
+/* kernel-side flags. Linux has DN_DELETE in fcntl.h
+ */
+enum {
+	/* 1 and 2 are reserved for the SCAN flags */
+	DN_DESTROY	= 0x0004, /* destroy */
+	DN_DELETE_FS	= 0x0008, /* destroy flowset */
+	DN_DETACH	= 0x0010,
+	DN_ACTIVE	= 0x0020, /* object is in evheap */
+	DN_F_DLINE	= 0x0040, /* object is a delay line */
+	DN_F_SCHI	= 0x00C0, /* object is a sched.instance */
+	DN_QHT_IS_Q	= 0x0100, /* in flowset, qht is a single queue */
+};
+
+extern struct dn_parms dn_cfg;
+
+int dummynet_io(struct mbuf **, int , struct ip_fw_args *);
+void dummynet_task(void *context, int pending);
+void dn_reschedule(void);
+
+struct dn_queue *ipdn_q_find(struct dn_fsk *, struct dn_sch_inst *,
+        struct ipfw_flow_id *);
+struct dn_sch_inst *ipdn_si_find(struct dn_schk *, struct ipfw_flow_id *);
+
+/*
+ * copy_range is a template for requests for ranges of pipes/queues/scheds.
+ * The number of ranges is variable and can be derived by o.len.
+ * As a default, we use a small number of entries so that the struct
+ * fits easily on the stack and is sufficient for most common requests.
+ */
+#define DEFAULT_RANGES	5
+struct copy_range {
+        struct dn_id o;
+        uint32_t	r[ 2 * DEFAULT_RANGES ];
+};
+
+struct copy_args {
+	char **start;
+	char *end;
+	int flags;
+	int type;
+	struct copy_range *extra;	/* extra filtering */
+};
+
+struct sockopt;
+int ip_dummynet_compat(struct sockopt *sopt);
+int dummynet_get(struct sockopt *sopt, void **compat);
+int dn_c_copy_q (void *_ni, void *arg);
+int dn_c_copy_pipe(struct dn_schk *s, struct copy_args *a, int nq);
+int dn_c_copy_fs(struct dn_fsk *f, struct copy_args *a, int nq);
+int dn_compat_copy_queue(struct copy_args *a, void *_o);
+int dn_compat_copy_pipe(struct copy_args *a, void *_o);
+int copy_data_helper_compat(void *_o, void *_arg);
+int dn_compat_calc_size(struct dn_parms dn_cfg);
+int do_config(void *p, int l);
+
+/* function to drain idle object */
+void dn_drain_scheduler(void);
+void dn_drain_queue(void);
+
+#endif /* _IP_DN_PRIVATE_H */
diff --git a/freebsd/sys/netinet/ipfw/ip_dummynet.c b/freebsd/sys/netinet/ipfw/ip_dummynet.c
new file mode 100644
index 00000000..dca39d06
--- /dev/null
+++ b/freebsd/sys/netinet/ipfw/ip_dummynet.c
@@ -0,0 +1,2297 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa
+ * Portions Copyright (c) 2000 Akamba Corp.
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Configuration and internal object management for dummynet.
+ */
+
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/rwlock.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/time.h>
+#include <freebsd/sys/taskqueue.h>
+#include <freebsd/net/if.h>	/* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip_var.h>	/* ip_output(), IP_FORWARDING */
+#include <freebsd/netinet/ip_fw.h>
+#include <freebsd/netinet/ipfw/ip_fw_private.h>
+#include <freebsd/netinet/ipfw/dn_heap.h>
+#include <freebsd/netinet/ip_dummynet.h>
+#include <freebsd/netinet/ipfw/ip_dn_private.h>
+#include <freebsd/netinet/ipfw/dn_sched.h>
+
+/* which objects to copy */
+#define DN_C_LINK 	0x01
+#define DN_C_SCH	0x02
+#define DN_C_FLOW	0x04
+#define DN_C_FS		0x08
+#define DN_C_QUEUE	0x10
+
+/* we use this argument in case of a schk_new */
+struct schk_new_arg {
+	struct dn_alg *fp;
+	struct dn_sch *sch;
+};
+
+/*---- callout hooks. ----*/
+static struct callout dn_timeout;
+static struct task	dn_task;
+static struct taskqueue	*dn_tq = NULL;
+
+static void
+dummynet(void * __unused unused)
+{
+
+	taskqueue_enqueue(dn_tq, &dn_task);
+}
+
+void
+dn_reschedule(void)
+{
+	callout_reset(&dn_timeout, 1, dummynet, NULL);
+}
+/*----- end of callout hooks -----*/
+
+/* Return a scheduler descriptor given the type or name. */
+static struct dn_alg *
+find_sched_type(int type, char *name)
+{
+	struct dn_alg *d;
+
+	SLIST_FOREACH(d, &dn_cfg.schedlist, next) {
+		if (d->type == type || (name && !strcmp(d->name, name)))
+			return d;
+	}
+	return NULL; /* not found */
+}
+
+int
+ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg)
+{
+	int oldv = *v;
+	const char *op = NULL;
+	if (oldv < lo) {
+		*v = dflt;
+		op = "Bump";
+	} else if (oldv > hi) {
+		*v = hi;
+		op = "Clamp";
+	} else
+		return *v;
+	if (op && msg)
+		printf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
+	return *v;
+}
+
+/*---- flow_id mask, hash and compare functions ---*/
+/*
+ * The flow_id includes the 5-tuple, the queue/pipe number
+ * which we store in the extra area in host order,
+ * and for ipv6 also the flow_id6.
+ * XXX see if we want the tos byte (can store in 'flags')
+ */
+static struct ipfw_flow_id *
+flow_id_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id)
+{
+	int is_v6 = IS_IP6_FLOW_ID(id);
+
+	id->dst_port &= mask->dst_port;
+	id->src_port &= mask->src_port;
+	id->proto &= mask->proto;
+	id->extra &= mask->extra;
+	if (is_v6) {
+		APPLY_MASK(&id->dst_ip6, &mask->dst_ip6);
+		APPLY_MASK(&id->src_ip6, &mask->src_ip6);
+		id->flow_id6 &= mask->flow_id6;
+	} else {
+		id->dst_ip &= mask->dst_ip;
+		id->src_ip &= mask->src_ip;
+	}
+	return id;
+}
+
+/* computes an OR of two masks, result in dst and also returned */
+static struct ipfw_flow_id *
+flow_id_or(struct ipfw_flow_id *src, struct ipfw_flow_id *dst)
+{
+	int is_v6 = IS_IP6_FLOW_ID(dst);
+
+	dst->dst_port |= src->dst_port;
+	dst->src_port |= src->src_port;
+	dst->proto |= src->proto;
+	dst->extra |= src->extra;
+	if (is_v6) {
+#define OR_MASK(_d, _s)                          \
+    (_d)->__u6_addr.__u6_addr32[0] |= (_s)->__u6_addr.__u6_addr32[0]; \
+    (_d)->__u6_addr.__u6_addr32[1] |= (_s)->__u6_addr.__u6_addr32[1]; \
+    (_d)->__u6_addr.__u6_addr32[2] |= (_s)->__u6_addr.__u6_addr32[2]; \
+    (_d)->__u6_addr.__u6_addr32[3] |= (_s)->__u6_addr.__u6_addr32[3];
+		OR_MASK(&dst->dst_ip6, &src->dst_ip6);
+		OR_MASK(&dst->src_ip6, &src->src_ip6);
+#undef OR_MASK
+		dst->flow_id6 |= src->flow_id6;
+	} else {
+		dst->dst_ip |= src->dst_ip;
+		dst->src_ip |= src->src_ip;
+	}
+	return dst;
+}
+
+static int
+nonzero_mask(struct ipfw_flow_id *m)
+{
+	if (m->dst_port || m->src_port || m->proto || m->extra)
+		return 1;
+	if (IS_IP6_FLOW_ID(m)) {
+		return
+			m->dst_ip6.__u6_addr.__u6_addr32[0] ||
+			m->dst_ip6.__u6_addr.__u6_addr32[1] ||
+			m->dst_ip6.__u6_addr.__u6_addr32[2] ||
+			m->dst_ip6.__u6_addr.__u6_addr32[3] ||
+			m->src_ip6.__u6_addr.__u6_addr32[0] ||
+			m->src_ip6.__u6_addr.__u6_addr32[1] ||
+			m->src_ip6.__u6_addr.__u6_addr32[2] ||
+			m->src_ip6.__u6_addr.__u6_addr32[3] ||
+			m->flow_id6;
+	} else {
+		return m->dst_ip || m->src_ip;
+	}
+}
+
+/* XXX we may want a better hash function */
+static uint32_t
+flow_id_hash(struct ipfw_flow_id *id)
+{
+    uint32_t i;
+
+    if (IS_IP6_FLOW_ID(id)) {
+	uint32_t *d = (uint32_t *)&id->dst_ip6;
+	uint32_t *s = (uint32_t *)&id->src_ip6;
+        i = (d[0]      ) ^ (d[1])       ^
+            (d[2]      ) ^ (d[3])       ^
+            (d[0] >> 15) ^ (d[1] >> 15) ^
+            (d[2] >> 15) ^ (d[3] >> 15) ^
+            (s[0] <<  1) ^ (s[1] <<  1) ^
+            (s[2] <<  1) ^ (s[3] <<  1) ^
+            (s[0] << 16) ^ (s[1] << 16) ^
+            (s[2] << 16) ^ (s[3] << 16) ^
+            (id->dst_port << 1) ^ (id->src_port) ^
+	    (id->extra) ^
+            (id->proto ) ^ (id->flow_id6);
+    } else {
+        i = (id->dst_ip)        ^ (id->dst_ip >> 15) ^
+            (id->src_ip << 1)   ^ (id->src_ip >> 16) ^
+	    (id->extra) ^
+            (id->dst_port << 1) ^ (id->src_port)     ^ (id->proto);
+    }
+    return i;
+}
+
+/* Like bcmp, returns 0 if ids match, 1 otherwise. */
+static int
+flow_id_cmp(struct ipfw_flow_id *id1, struct ipfw_flow_id *id2)
+{
+	int is_v6 = IS_IP6_FLOW_ID(id1);
+
+	if (!is_v6) {
+	    if (IS_IP6_FLOW_ID(id2))
+		return 1; /* different address families */
+
+	    return (id1->dst_ip == id2->dst_ip &&
+		    id1->src_ip == id2->src_ip &&
+		    id1->dst_port == id2->dst_port &&
+		    id1->src_port == id2->src_port &&
+		    id1->proto == id2->proto &&
+		    id1->extra == id2->extra) ? 0 : 1;
+	}
+	/* the ipv6 case */
+	return (
+	    !bcmp(&id1->dst_ip6,&id2->dst_ip6, sizeof(id1->dst_ip6)) &&
+	    !bcmp(&id1->src_ip6,&id2->src_ip6, sizeof(id1->src_ip6)) &&
+	    id1->dst_port == id2->dst_port &&
+	    id1->src_port == id2->src_port &&
+	    id1->proto == id2->proto &&
+	    id1->extra == id2->extra &&
+	    id1->flow_id6 == id2->flow_id6) ? 0 : 1;
+}
+/*--------- end of flow-id mask, hash and compare ---------*/
+
+/*--- support functions for the qht hashtable ----
+ * Entries are hashed by flow-id
+ */
+static uint32_t
+q_hash(uintptr_t key, int flags, void *arg)
+{
+	/* compute the hash slot from the flow id */
+	struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ?
+		&((struct dn_queue *)key)->ni.fid :
+		(struct ipfw_flow_id *)key;
+
+	return flow_id_hash(id);
+}
+
+static int
+q_match(void *obj, uintptr_t key, int flags, void *arg)
+{
+	struct dn_queue *o = (struct dn_queue *)obj;
+	struct ipfw_flow_id *id2;
+
+	if (flags & DNHT_KEY_IS_OBJ) {
+		/* compare pointers */
+		id2 = &((struct dn_queue *)key)->ni.fid;
+	} else {
+		id2 = (struct ipfw_flow_id *)key;
+	}
+	return (0 == flow_id_cmp(&o->ni.fid,  id2));
+}
+
+/*
+ * create a new queue instance for the given 'key'.
+ */
+static void *
+q_new(uintptr_t key, int flags, void *arg)
+{   
+	struct dn_queue *q, *template = arg;
+	struct dn_fsk *fs = template->fs;
+	int size = sizeof(*q) + fs->sched->fp->q_datalen;
+
+	q = malloc(size, M_DUMMYNET, M_NOWAIT | M_ZERO);
+	if (q == NULL) {
+		D("no memory for new queue");
+		return NULL;
+	}
+
+	set_oid(&q->ni.oid, DN_QUEUE, size);
+	if (fs->fs.flags & DN_QHT_HASH)
+		q->ni.fid = *(struct ipfw_flow_id *)key;
+	q->fs = fs;
+	q->_si = template->_si;
+	q->_si->q_count++;
+
+	if (fs->sched->fp->new_queue)
+		fs->sched->fp->new_queue(q);
+	dn_cfg.queue_count++;
+	return q;
+}
+
+/*
+ * Notify schedulers that a queue is going away.
+ * If (flags & DN_DESTROY), also free the packets.
+ * The version for callbacks is called q_delete_cb().
+ */
+static void
+dn_delete_queue(struct dn_queue *q, int flags)
+{
+	struct dn_fsk *fs = q->fs;
+
+	// D("fs %p si %p\n", fs, q->_si);
+	/* notify the parent scheduler that the queue is going away */
+	if (fs && fs->sched->fp->free_queue)
+		fs->sched->fp->free_queue(q);
+	q->_si->q_count--;
+	q->_si = NULL;
+	if (flags & DN_DESTROY) {
+		if (q->mq.head)
+			dn_free_pkts(q->mq.head);
+		bzero(q, sizeof(*q));	// safety
+		free(q, M_DUMMYNET);
+		dn_cfg.queue_count--;
+	}
+}
+
+static int
+q_delete_cb(void *q, void *arg)
+{
+	int flags = (int)(uintptr_t)arg;
+	dn_delete_queue(q, flags);
+	return (flags & DN_DESTROY) ? DNHT_SCAN_DEL : 0;
+}
+
+/*
+ * calls dn_delete_queue/q_delete_cb on all queues,
+ * which notifies the parent scheduler and possibly drains packets.
+ * flags & DN_DESTROY: drains queues and destroy qht;
+ */
+static void
+qht_delete(struct dn_fsk *fs, int flags)
+{
+	ND("fs %d start flags %d qht %p",
+		fs->fs.fs_nr, flags, fs->qht);
+	if (!fs->qht)
+		return;
+	if (fs->fs.flags & DN_QHT_HASH) {
+		dn_ht_scan(fs->qht, q_delete_cb, (void *)(uintptr_t)flags);
+		if (flags & DN_DESTROY) {
+			dn_ht_free(fs->qht, 0);
+			fs->qht = NULL;
+		}
+	} else {
+		dn_delete_queue((struct dn_queue *)(fs->qht), flags);
+		if (flags & DN_DESTROY)
+			fs->qht = NULL;
+	}
+}
+
+/*
+ * Find and possibly create the queue for a MULTIQUEUE scheduler.
+ * We never call it for !MULTIQUEUE (the queue is in the sch_inst).
+ */
+struct dn_queue *
+ipdn_q_find(struct dn_fsk *fs, struct dn_sch_inst *si,
+	struct ipfw_flow_id *id)
+{
+	struct dn_queue template;
+
+	template._si = si;
+	template.fs = fs;
+
+	if (fs->fs.flags & DN_QHT_HASH) {
+		struct ipfw_flow_id masked_id;
+		if (fs->qht == NULL) {
+			fs->qht = dn_ht_init(NULL, fs->fs.buckets,
+				offsetof(struct dn_queue, q_next),
+				q_hash, q_match, q_new);
+			if (fs->qht == NULL)
+				return NULL;
+		}
+		masked_id = *id;
+		flow_id_mask(&fs->fsk_mask, &masked_id);
+		return dn_ht_find(fs->qht, (uintptr_t)&masked_id,
+			DNHT_INSERT, &template);
+	} else {
+		if (fs->qht == NULL)
+			fs->qht = q_new(0, 0, &template);
+		return (struct dn_queue *)fs->qht;
+	}
+}
+/*--- end of queue hash table ---*/
+
+/*--- support functions for the sch_inst hashtable ----
+ *
+ * These are hashed by flow-id
+ */
+static uint32_t
+si_hash(uintptr_t key, int flags, void *arg)
+{
+	/* compute the hash slot from the flow id */
+	struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ?
+		&((struct dn_sch_inst *)key)->ni.fid :
+		(struct ipfw_flow_id *)key;
+
+	return flow_id_hash(id);
+}
+
+static int
+si_match(void *obj, uintptr_t key, int flags, void *arg)
+{
+	struct dn_sch_inst *o = obj;
+	struct ipfw_flow_id *id2;
+
+	id2 = (flags & DNHT_KEY_IS_OBJ) ?
+		&((struct dn_sch_inst *)key)->ni.fid :
+		(struct ipfw_flow_id *)key;
+	return flow_id_cmp(&o->ni.fid,  id2) == 0;
+}
+
+/*
+ * create a new instance for the given 'key'
+ * Allocate memory for instance, delay line and scheduler private data.
+ */
+static void *
+si_new(uintptr_t key, int flags, void *arg)
+{
+	struct dn_schk *s = arg;
+	struct dn_sch_inst *si;
+	int l = sizeof(*si) + s->fp->si_datalen;
+
+	si = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO);
+	if (si == NULL)
+		goto error;
+	/* Set length only for the part passed up to userland. */
+	set_oid(&si->ni.oid, DN_SCH_I, sizeof(struct dn_flow));
+	set_oid(&(si->dline.oid), DN_DELAY_LINE,
+		sizeof(struct delay_line));
+	/* mark si and dline as outside the event queue */
+	si->ni.oid.id = si->dline.oid.id = -1;
+
+	si->sched = s;
+	si->dline.si = si;
+
+	if (s->fp->new_sched && s->fp->new_sched(si)) {
+		D("new_sched error");
+		goto error;
+	}
+	if (s->sch.flags & DN_HAVE_MASK)
+		si->ni.fid = *(struct ipfw_flow_id *)key;
+
+	dn_cfg.si_count++;
+	return si;
+
+error:
+	if (si) {
+		bzero(si, sizeof(*si)); // safety
+		free(si, M_DUMMYNET);
+	}
+        return NULL;
+}
+
+/*
+ * Callback from siht to delete all scheduler instances. Remove
+ * si and delay line from the system heap, destroy all queues.
+ * We assume that all flowset have been notified and do not
+ * point to us anymore.
+ */
+static int
+si_destroy(void *_si, void *arg)
+{
+	struct dn_sch_inst *si = _si;
+	struct dn_schk *s = si->sched;
+	struct delay_line *dl = &si->dline;
+
+	if (dl->oid.subtype) /* remove delay line from event heap */
+		heap_extract(&dn_cfg.evheap, dl);
+	dn_free_pkts(dl->mq.head);	/* drain delay line */
+	if (si->kflags & DN_ACTIVE) /* remove si from event heap */
+		heap_extract(&dn_cfg.evheap, si);
+	if (s->fp->free_sched)
+		s->fp->free_sched(si);
+	bzero(si, sizeof(*si));	/* safety */
+	free(si, M_DUMMYNET);
+	dn_cfg.si_count--;
+	return DNHT_SCAN_DEL;
+}
+
+/*
+ * Find the scheduler instance for this packet. If we need to apply
+ * a mask, do on a local copy of the flow_id to preserve the original.
+ * Assume siht is always initialized if we have a mask.
+ */
+struct dn_sch_inst *
+ipdn_si_find(struct dn_schk *s, struct ipfw_flow_id *id)
+{
+
+	if (s->sch.flags & DN_HAVE_MASK) {
+		struct ipfw_flow_id id_t = *id;
+		flow_id_mask(&s->sch.sched_mask, &id_t);
+		return dn_ht_find(s->siht, (uintptr_t)&id_t,
+			DNHT_INSERT, s);
+	}
+	if (!s->siht)
+		s->siht = si_new(0, 0, s);
+	return (struct dn_sch_inst *)s->siht;
+}
+
+/* callback to flush credit for the scheduler instance */
+static int
+si_reset_credit(void *_si, void *arg)
+{
+	struct dn_sch_inst *si = _si;
+	struct dn_link *p = &si->sched->link;
+
+	si->credit = p->burst + (dn_cfg.io_fast ?  p->bandwidth : 0);
+	return 0;
+}
+
+static void
+schk_reset_credit(struct dn_schk *s)
+{
+	if (s->sch.flags & DN_HAVE_MASK)
+		dn_ht_scan(s->siht, si_reset_credit, NULL);
+	else if (s->siht)
+		si_reset_credit(s->siht, NULL);
+}
+/*---- end of sch_inst hashtable ---------------------*/
+
+/*-------------------------------------------------------
+ * flowset hash (fshash) support. Entries are hashed by fs_nr.
+ * New allocations are put in the fsunlinked list, from which
+ * they are removed when they point to a specific scheduler.
+ */
+static uint32_t
+fsk_hash(uintptr_t key, int flags, void *arg)
+{
+	uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key :
+		((struct dn_fsk *)key)->fs.fs_nr;
+
+	return ( (i>>8)^(i>>4)^i );
+}
+
+static int
+fsk_match(void *obj, uintptr_t key, int flags, void *arg)
+{
+	struct dn_fsk *fs = obj;
+	int i = !(flags & DNHT_KEY_IS_OBJ) ? key :
+		((struct dn_fsk *)key)->fs.fs_nr;
+
+	return (fs->fs.fs_nr == i);
+}
+
+static void *
+fsk_new(uintptr_t key, int flags, void *arg)
+{
+	struct dn_fsk *fs;
+
+	fs = malloc(sizeof(*fs), M_DUMMYNET, M_NOWAIT | M_ZERO);
+	if (fs) {
+		set_oid(&fs->fs.oid, DN_FS, sizeof(fs->fs));
+		dn_cfg.fsk_count++;
+		fs->drain_bucket = 0;
+		SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain);
+	}
+	return fs;
+}
+
+/*
+ * detach flowset from its current scheduler. Flags as follows:
+ * DN_DETACH removes from the fsk_list
+ * DN_DESTROY deletes individual queues
+ * DN_DELETE_FS destroys the flowset (otherwise goes in unlinked).
+ */
+static void
+fsk_detach(struct dn_fsk *fs, int flags)
+{
+	if (flags & DN_DELETE_FS)
+		flags |= DN_DESTROY;
+	ND("fs %d from sched %d flags %s %s %s",
+		fs->fs.fs_nr, fs->fs.sched_nr,
+		(flags & DN_DELETE_FS) ? "DEL_FS":"",
+		(flags & DN_DESTROY) ? "DEL":"",
+		(flags & DN_DETACH) ? "DET":"");
+	if (flags & DN_DETACH) { /* detach from the list */
+		struct dn_fsk_head *h;
+		h = fs->sched ? &fs->sched->fsk_list : &dn_cfg.fsu;
+		SLIST_REMOVE(h, fs, dn_fsk, sch_chain);
+	}
+	/* Free the RED parameters, they will be recomputed on
+	 * subsequent attach if needed.
+	 */
+	if (fs->w_q_lookup)
+		free(fs->w_q_lookup, M_DUMMYNET);
+	fs->w_q_lookup = NULL;
+	qht_delete(fs, flags);
+	if (fs->sched && fs->sched->fp->free_fsk)
+		fs->sched->fp->free_fsk(fs);
+	fs->sched = NULL;
+	if (flags & DN_DELETE_FS) {
+		bzero(fs, sizeof(fs));	/* safety */
+		free(fs, M_DUMMYNET);
+		dn_cfg.fsk_count--;
+	} else {
+		SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain);
+	}
+}
+
+/*
+ * Detach or destroy all flowsets in a list.
+ * flags specifies what to do:
+ * DN_DESTROY:	flush all queues
+ * DN_DELETE_FS:	DN_DESTROY + destroy flowset
+ *	DN_DELETE_FS implies DN_DESTROY
+ */
+static void
+fsk_detach_list(struct dn_fsk_head *h, int flags)
+{
+	struct dn_fsk *fs;
+	int n = 0; /* only for stats */
+
+	ND("head %p flags %x", h, flags);
+	while ((fs = SLIST_FIRST(h))) {
+		SLIST_REMOVE_HEAD(h, sch_chain);
+		n++;
+		fsk_detach(fs, flags);
+	}
+	ND("done %d flowsets", n);
+}
+
+/*
+ * called on 'queue X delete' -- removes the flowset from fshash,
+ * deletes all queues for the flowset, and removes the flowset.
+ */
+static int
+delete_fs(int i, int locked)
+{
+	struct dn_fsk *fs;
+	int err = 0;
+
+	if (!locked)
+		DN_BH_WLOCK();
+	fs = dn_ht_find(dn_cfg.fshash, i, DNHT_REMOVE, NULL);
+	ND("fs %d found %p", i, fs);
+	if (fs) {
+		fsk_detach(fs, DN_DETACH | DN_DELETE_FS);
+		err = 0;
+	} else
+		err = EINVAL;
+	if (!locked)
+		DN_BH_WUNLOCK();
+	return err;
+}
+
+/*----- end of flowset hashtable support -------------*/
+
+/*------------------------------------------------------------
+ * Scheduler hash. When searching by index we pass sched_nr,
+ * otherwise we pass struct dn_sch * which is the first field in
+ * struct dn_schk so we can cast between the two. We use this trick
+ * because in the create phase (but it should be fixed).
+ */
+static uint32_t
+schk_hash(uintptr_t key, int flags, void *_arg)
+{
+	uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key :
+		((struct dn_schk *)key)->sch.sched_nr;
+	return ( (i>>8)^(i>>4)^i );
+}
+
+static int
+schk_match(void *obj, uintptr_t key, int flags, void *_arg)
+{
+	struct dn_schk *s = (struct dn_schk *)obj;
+	int i = !(flags & DNHT_KEY_IS_OBJ) ? key :
+		((struct dn_schk *)key)->sch.sched_nr;
+	return (s->sch.sched_nr == i);
+}
+
+/*
+ * Create the entry and intialize with the sched hash if needed.
+ * Leave s->fp unset so we can tell whether a dn_ht_find() returns
+ * a new object or a previously existing one.
+ */
+static void *
+schk_new(uintptr_t key, int flags, void *arg)
+{
+	struct schk_new_arg *a = arg;
+	struct dn_schk *s;
+	int l = sizeof(*s) +a->fp->schk_datalen;
+
+	s = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO);
+	if (s == NULL)
+		return NULL;
+	set_oid(&s->link.oid, DN_LINK, sizeof(s->link));
+	s->sch = *a->sch; // copy initial values
+	s->link.link_nr = s->sch.sched_nr;
+	SLIST_INIT(&s->fsk_list);
+	/* initialize the hash table or create the single instance */
+	s->fp = a->fp;	/* si_new needs this */
+	s->drain_bucket = 0;
+	if (s->sch.flags & DN_HAVE_MASK) {
+		s->siht = dn_ht_init(NULL, s->sch.buckets,
+			offsetof(struct dn_sch_inst, si_next),
+			si_hash, si_match, si_new);
+		if (s->siht == NULL) {
+			free(s, M_DUMMYNET);
+			return NULL;
+		}
+	}
+	s->fp = NULL;	/* mark as a new scheduler */
+	dn_cfg.schk_count++;
+	return s;
+}
+
+/*
+ * Callback for sched delete. Notify all attached flowsets to
+ * detach from the scheduler, destroy the internal flowset, and
+ * all instances. The scheduler goes away too.
+ * arg is 0 (only detach flowsets and destroy instances)
+ * DN_DESTROY (detach & delete queues, delete schk)
+ * or DN_DELETE_FS (delete queues and flowsets, delete schk)
+ */
+static int
+schk_delete_cb(void *obj, void *arg)
+{
+	struct dn_schk *s = obj;
+#if 0
+	int a = (int)arg;
+	ND("sched %d arg %s%s",
+		s->sch.sched_nr,
+		a&DN_DESTROY ? "DEL ":"",
+		a&DN_DELETE_FS ? "DEL_FS":"");
+#endif
+	fsk_detach_list(&s->fsk_list, arg ? DN_DESTROY : 0);
+	/* no more flowset pointing to us now */
+	if (s->sch.flags & DN_HAVE_MASK)
+		dn_ht_scan(s->siht, si_destroy, NULL);
+	else if (s->siht)
+		si_destroy(s->siht, NULL);
+	if (s->profile) {
+		free(s->profile, M_DUMMYNET);
+		s->profile = NULL;
+	}
+	s->siht = NULL;
+	if (s->fp->destroy)
+		s->fp->destroy(s);
+	bzero(s, sizeof(*s));	// safety
+	free(obj, M_DUMMYNET);
+	dn_cfg.schk_count--;
+	return DNHT_SCAN_DEL;
+}
+
+/*
+ * called on a 'sched X delete' command. Deletes a single scheduler.
+ * This is done by removing from the schedhash, unlinking all
+ * flowsets and deleting their traffic.
+ */
+static int
+delete_schk(int i)
+{
+	struct dn_schk *s;
+
+	s = dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL);
+	ND("%d %p", i, s);
+	if (!s)
+		return EINVAL;
+	delete_fs(i + DN_MAX_ID, 1); /* first delete internal fs */
+	/* then detach flowsets, delete traffic */
+	schk_delete_cb(s, (void*)(uintptr_t)DN_DESTROY);
+	return 0;
+}
+/*--- end of schk hashtable support ---*/
+
+static int
+copy_obj(char **start, char *end, void *_o, const char *msg, int i)
+{
+	struct dn_id *o = _o;
+	int have = end - *start;
+
+	if (have < o->len || o->len == 0 || o->type == 0) {
+		D("(WARN) type %d %s %d have %d need %d",
+			o->type, msg, i, have, o->len);
+		return 1;
+	}
+	ND("type %d %s %d len %d", o->type, msg, i, o->len);
+	bcopy(_o, *start, o->len);
+	if (o->type == DN_LINK) {
+		/* Adjust burst parameter for link */
+		struct dn_link *l = (struct dn_link *)*start;
+		l->burst =  div64(l->burst, 8 * hz);
+	} else if (o->type == DN_SCH) {
+		/* Set id->id to the number of instances */
+		struct dn_schk *s = _o;
+		struct dn_id *id = (struct dn_id *)(*start);
+		id->id = (s->sch.flags & DN_HAVE_MASK) ?
+			dn_ht_entries(s->siht) : (s->siht ? 1 : 0);
+	}
+	*start += o->len;
+	return 0;
+}
+
+/* Specific function to copy a queue.
+ * Copies only the user-visible part of a queue (which is in
+ * a struct dn_flow), and sets len accordingly.
+ */
+static int
+copy_obj_q(char **start, char *end, void *_o, const char *msg, int i)
+{
+	struct dn_id *o = _o;
+	int have = end - *start;
+	int len = sizeof(struct dn_flow); /* see above comment */
+
+	if (have < len || o->len == 0 || o->type != DN_QUEUE) {
+		D("ERROR type %d %s %d have %d need %d",
+			o->type, msg, i, have, len);
+		return 1;
+	}
+	ND("type %d %s %d len %d", o->type, msg, i, len);
+	bcopy(_o, *start, len);
+	((struct dn_id*)(*start))->len = len;
+	*start += len;
+	return 0;
+}
+
+static int
+copy_q_cb(void *obj, void *arg)
+{
+	struct dn_queue *q = obj;
+	struct copy_args *a = arg;
+	struct dn_flow *ni = (struct dn_flow *)(*a->start);
+        if (copy_obj_q(a->start, a->end, &q->ni, "queue", -1))
+                return DNHT_SCAN_END;
+        ni->oid.type = DN_FLOW; /* override the DN_QUEUE */
+        ni->oid.id = si_hash((uintptr_t)&ni->fid, 0, NULL);
+        return 0;
+}
+
+static int
+copy_q(struct copy_args *a, struct dn_fsk *fs, int flags)
+{
+	if (!fs->qht)
+		return 0;
+	if (fs->fs.flags & DN_QHT_HASH)
+		dn_ht_scan(fs->qht, copy_q_cb, a);
+	else
+		copy_q_cb(fs->qht, a);
+	return 0;
+}
+
+/*
+ * This routine only copies the initial part of a profile ? XXX
+ */
+static int
+copy_profile(struct copy_args *a, struct dn_profile *p)
+{
+	int have = a->end - *a->start;
+	/* XXX here we check for max length */
+	int profile_len = sizeof(struct dn_profile) - 
+		ED_MAX_SAMPLES_NO*sizeof(int);
+
+	if (p == NULL)
+		return 0;
+	if (have < profile_len) {
+		D("error have %d need %d", have, profile_len);
+		return 1;
+	}
+	bcopy(p, *a->start, profile_len);
+	((struct dn_id *)(*a->start))->len = profile_len;
+	*a->start += profile_len;
+	return 0;
+}
+
+static int
+copy_flowset(struct copy_args *a, struct dn_fsk *fs, int flags)
+{
+	struct dn_fs *ufs = (struct dn_fs *)(*a->start);
+	if (!fs)
+		return 0;
+	ND("flowset %d", fs->fs.fs_nr);
+	if (copy_obj(a->start, a->end, &fs->fs, "flowset", fs->fs.fs_nr))
+		return DNHT_SCAN_END;
+	ufs->oid.id = (fs->fs.flags & DN_QHT_HASH) ?
+		dn_ht_entries(fs->qht) : (fs->qht ? 1 : 0);
+	if (flags) {	/* copy queues */
+		copy_q(a, fs, 0);
+	}
+	return 0;
+}
+
+static int
+copy_si_cb(void *obj, void *arg)
+{
+	struct dn_sch_inst *si = obj;
+	struct copy_args *a = arg;
+	struct dn_flow *ni = (struct dn_flow *)(*a->start);
+	if (copy_obj(a->start, a->end, &si->ni, "inst",
+			si->sched->sch.sched_nr))
+		return DNHT_SCAN_END;
+	ni->oid.type = DN_FLOW; /* override the DN_SCH_I */
+	ni->oid.id = si_hash((uintptr_t)si, DNHT_KEY_IS_OBJ, NULL);
+	return 0;
+}
+
+static int
+copy_si(struct copy_args *a, struct dn_schk *s, int flags)
+{
+	if (s->sch.flags & DN_HAVE_MASK)
+		dn_ht_scan(s->siht, copy_si_cb, a);
+	else if (s->siht)
+		copy_si_cb(s->siht, a);
+	return 0;
+}
+
+/*
+ * compute a list of children of a scheduler and copy up
+ */
+static int
+copy_fsk_list(struct copy_args *a, struct dn_schk *s, int flags)
+{
+	struct dn_fsk *fs;
+	struct dn_id *o;
+	uint32_t *p;
+
+	int n = 0, space = sizeof(*o);
+	SLIST_FOREACH(fs, &s->fsk_list, sch_chain) {
+		if (fs->fs.fs_nr < DN_MAX_ID)
+			n++;
+	}
+	space += n * sizeof(uint32_t);
+	DX(3, "sched %d has %d flowsets", s->sch.sched_nr, n);
+	if (a->end - *(a->start) < space)
+		return DNHT_SCAN_END;
+	o = (struct dn_id *)(*(a->start));
+	o->len = space;
+	*a->start += o->len;
+	o->type = DN_TEXT;
+	p = (uint32_t *)(o+1);
+	SLIST_FOREACH(fs, &s->fsk_list, sch_chain)
+		if (fs->fs.fs_nr < DN_MAX_ID)
+			*p++ = fs->fs.fs_nr;
+	return 0;
+}
+
+static int
+copy_data_helper(void *_o, void *_arg)
+{
+	struct copy_args *a = _arg;
+	uint32_t *r = a->extra->r; /* start of first range */
+	uint32_t *lim;	/* first invalid pointer */
+	int n;
+
+	lim = (uint32_t *)((char *)(a->extra) + a->extra->o.len);
+
+	if (a->type == DN_LINK || a->type == DN_SCH) {
+		/* pipe|sched show, we receive a dn_schk */
+		struct dn_schk *s = _o;
+
+		n = s->sch.sched_nr;
+		if (a->type == DN_SCH && n >= DN_MAX_ID)
+			return 0;	/* not a scheduler */
+		if (a->type == DN_LINK && n <= DN_MAX_ID)
+		    return 0;	/* not a pipe */
+
+		/* see if the object is within one of our ranges */
+		for (;r < lim; r += 2) {
+			if (n < r[0] || n > r[1])
+				continue;
+			/* Found a valid entry, copy and we are done */
+			if (a->flags & DN_C_LINK) {
+				if (copy_obj(a->start, a->end,
+				    &s->link, "link", n))
+					return DNHT_SCAN_END;
+				if (copy_profile(a, s->profile))
+					return DNHT_SCAN_END;
+				if (copy_flowset(a, s->fs, 0))
+					return DNHT_SCAN_END;
+			}
+			if (a->flags & DN_C_SCH) {
+				if (copy_obj(a->start, a->end,
+				    &s->sch, "sched", n))
+					return DNHT_SCAN_END;
+				/* list all attached flowsets */
+				if (copy_fsk_list(a, s, 0))
+					return DNHT_SCAN_END;
+			}
+			if (a->flags & DN_C_FLOW)
+				copy_si(a, s, 0);
+			break;
+		}
+	} else if (a->type == DN_FS) {
+		/* queue show, skip internal flowsets */
+		struct dn_fsk *fs = _o;
+
+		n = fs->fs.fs_nr;
+		if (n >= DN_MAX_ID)
+			return 0;
+		/* see if the object is within one of our ranges */
+		for (;r < lim; r += 2) {
+			if (n < r[0] || n > r[1])
+				continue;
+			if (copy_flowset(a, fs, 0))
+				return DNHT_SCAN_END;
+			copy_q(a, fs, 0);
+			break; /* we are done */
+		}
+	}
+	return 0;
+}
+
+static inline struct dn_schk *
+locate_scheduler(int i)
+{
+	return dn_ht_find(dn_cfg.schedhash, i, 0, NULL);
+}
+
+/*
+ * red parameters are in fixed point arithmetic.
+ */
+static int
+config_red(struct dn_fsk *fs)
+{
+	int64_t s, idle, weight, w0;
+	int t, i;
+
+	fs->w_q = fs->fs.w_q;
+	fs->max_p = fs->fs.max_p;
+	D("called");
+	/* Doing stuff that was in userland */
+	i = fs->sched->link.bandwidth;
+	s = (i <= 0) ? 0 :
+		hz * dn_cfg.red_avg_pkt_size * 8 * SCALE(1) / i;
+
+	idle = div64((s * 3) , fs->w_q); /* s, fs->w_q scaled; idle not scaled */
+	fs->lookup_step = div64(idle , dn_cfg.red_lookup_depth);
+	/* fs->lookup_step not scaled, */
+	if (!fs->lookup_step)
+		fs->lookup_step = 1;
+	w0 = weight = SCALE(1) - fs->w_q; //fs->w_q scaled
+
+	for (t = fs->lookup_step; t > 1; --t)
+		weight = SCALE_MUL(weight, w0);
+	fs->lookup_weight = (int)(weight); // scaled
+
+	/* Now doing stuff that was in kerneland */
+	fs->min_th = SCALE(fs->fs.min_th);
+	fs->max_th = SCALE(fs->fs.max_th);
+
+	fs->c_1 = fs->max_p / (fs->fs.max_th - fs->fs.min_th);
+	fs->c_2 = SCALE_MUL(fs->c_1, SCALE(fs->fs.min_th));
+
+	if (fs->fs.flags & DN_IS_GENTLE_RED) {
+		fs->c_3 = (SCALE(1) - fs->max_p) / fs->fs.max_th;
+		fs->c_4 = SCALE(1) - 2 * fs->max_p;
+	}
+
+	/* If the lookup table already exist, free and create it again. */
+	if (fs->w_q_lookup) {
+		free(fs->w_q_lookup, M_DUMMYNET);
+		fs->w_q_lookup = NULL;
+	}
+	if (dn_cfg.red_lookup_depth == 0) {
+		printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth"
+		    "must be > 0\n");
+		fs->fs.flags &= ~DN_IS_RED;
+		fs->fs.flags &= ~DN_IS_GENTLE_RED;
+		return (EINVAL);
+	}
+	fs->lookup_depth = dn_cfg.red_lookup_depth;
+	fs->w_q_lookup = (u_int *)malloc(fs->lookup_depth * sizeof(int),
+	    M_DUMMYNET, M_NOWAIT);
+	if (fs->w_q_lookup == NULL) {
+		printf("dummynet: sorry, cannot allocate red lookup table\n");
+		fs->fs.flags &= ~DN_IS_RED;
+		fs->fs.flags &= ~DN_IS_GENTLE_RED;
+		return(ENOSPC);
+	}
+
+	/* Fill the lookup table with (1 - w_q)^x */
+	fs->w_q_lookup[0] = SCALE(1) - fs->w_q;
+
+	for (i = 1; i < fs->lookup_depth; i++)
+		fs->w_q_lookup[i] =
+		    SCALE_MUL(fs->w_q_lookup[i - 1], fs->lookup_weight);
+
+	if (dn_cfg.red_avg_pkt_size < 1)
+		dn_cfg.red_avg_pkt_size = 512;
+	fs->avg_pkt_size = dn_cfg.red_avg_pkt_size;
+	if (dn_cfg.red_max_pkt_size < 1)
+		dn_cfg.red_max_pkt_size = 1500;
+	fs->max_pkt_size = dn_cfg.red_max_pkt_size;
+	D("exit");
+	return 0;
+}
+
+/* Scan all flowset attached to this scheduler and update red */
+static void
+update_red(struct dn_schk *s)
+{
+	struct dn_fsk *fs;
+	SLIST_FOREACH(fs, &s->fsk_list, sch_chain) {
+		if (fs && (fs->fs.flags & DN_IS_RED))
+			config_red(fs);
+	}
+}
+
+/* attach flowset to scheduler s, possibly requeue */
+static void
+fsk_attach(struct dn_fsk *fs, struct dn_schk *s)
+{
+	ND("remove fs %d from fsunlinked, link to sched %d",
+		fs->fs.fs_nr, s->sch.sched_nr);
+	SLIST_REMOVE(&dn_cfg.fsu, fs, dn_fsk, sch_chain);
+	fs->sched = s;
+	SLIST_INSERT_HEAD(&s->fsk_list, fs, sch_chain);
+	if (s->fp->new_fsk)
+		s->fp->new_fsk(fs);
+	/* XXX compute fsk_mask */
+	fs->fsk_mask = fs->fs.flow_mask;
+	if (fs->sched->sch.flags & DN_HAVE_MASK)
+		flow_id_or(&fs->sched->sch.sched_mask, &fs->fsk_mask);
+	if (fs->qht) {
+		/*
+		 * we must drain qht according to the old
+		 * type, and reinsert according to the new one.
+		 * The requeue is complex -- in general we need to
+		 * reclassify every single packet.
+		 * For the time being, let's hope qht is never set
+		 * when we reach this point.
+		 */
+		D("XXX TODO requeue from fs %d to sch %d",
+			fs->fs.fs_nr, s->sch.sched_nr);
+		fs->qht = NULL;
+	}
+	/* set the new type for qht */
+	if (nonzero_mask(&fs->fsk_mask))
+		fs->fs.flags |= DN_QHT_HASH;
+	else
+		fs->fs.flags &= ~DN_QHT_HASH;
+
+	/* XXX config_red() can fail... */
+	if (fs->fs.flags & DN_IS_RED)
+		config_red(fs);
+}
+
+/* update all flowsets which may refer to this scheduler */
+static void
+update_fs(struct dn_schk *s)
+{
+	struct dn_fsk *fs, *tmp;
+
+	SLIST_FOREACH_SAFE(fs, &dn_cfg.fsu, sch_chain, tmp) {
+		if (s->sch.sched_nr != fs->fs.sched_nr) {
+			D("fs %d for sch %d not %d still unlinked",
+				fs->fs.fs_nr, fs->fs.sched_nr,
+				s->sch.sched_nr);
+			continue;
+		}
+		fsk_attach(fs, s);
+	}
+}
+
+/*
+ * Configuration -- to preserve backward compatibility we use
+ * the following scheme (N is 65536)
+ *	NUMBER		SCHED	LINK	FLOWSET
+ *	   1 ..  N-1	(1)WFQ	(2)WFQ	(3)queue
+ *	 N+1 .. 2N-1	(4)FIFO (5)FIFO	(6)FIFO for sched 1..N-1
+ *	2N+1 .. 3N-1	--	--	(7)FIFO for sched N+1..2N-1
+ *
+ * "pipe i config" configures #1, #2 and #3
+ * "sched i config" configures #1 and possibly #6
+ * "queue i config" configures #3
+ * #1 is configured with 'pipe i config' or 'sched i config'
+ * #2 is configured with 'pipe i config', and created if not
+ *	existing with 'sched i config'
+ * #3 is configured with 'queue i config'
+ * #4 is automatically configured after #1, can only be FIFO
+ * #5 is automatically configured after #2
+ * #6 is automatically created when #1 is !MULTIQUEUE,
+ *	and can be updated.
+ * #7 is automatically configured after #2
+ */
+
+/*
+ * configure a link (and its FIFO instance)
+ */
+static int
+config_link(struct dn_link *p, struct dn_id *arg)
+{
+	int i;
+
+	if (p->oid.len != sizeof(*p)) {
+		D("invalid pipe len %d", p->oid.len);
+		return EINVAL;
+	}
+	i = p->link_nr;
+	if (i <= 0 || i >= DN_MAX_ID)
+		return EINVAL;
+	/*
+	 * The config program passes parameters as follows:
+	 * bw = bits/second (0 means no limits),
+	 * delay = ms, must be translated into ticks.
+	 * qsize = slots/bytes
+	 * burst ???
+	 */
+	p->delay = (p->delay * hz) / 1000;
+	/* Scale burst size: bytes -> bits * hz */
+	p->burst *= 8 * hz;
+
+	DN_BH_WLOCK();
+	/* do it twice, base link and FIFO link */
+	for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) {
+	    struct dn_schk *s = locate_scheduler(i);
+	    if (s == NULL) {
+		DN_BH_WUNLOCK();
+		D("sched %d not found", i);
+		return EINVAL;
+	    }
+	    /* remove profile if exists */
+	    if (s->profile) {
+		free(s->profile, M_DUMMYNET);
+		s->profile = NULL;
+	    }
+	    /* copy all parameters */
+	    s->link.oid = p->oid;
+	    s->link.link_nr = i;
+	    s->link.delay = p->delay;
+	    if (s->link.bandwidth != p->bandwidth) {
+		/* XXX bandwidth changes, need to update red params */
+	    s->link.bandwidth = p->bandwidth;
+		update_red(s);
+	    }
+	    s->link.burst = p->burst;
+	    schk_reset_credit(s);
+	}
+	dn_cfg.id++;
+	DN_BH_WUNLOCK();
+	return 0;
+}
+
+/*
+ * configure a flowset. Can be called from inside with locked=1,
+ */
+static struct dn_fsk *
+config_fs(struct dn_fs *nfs, struct dn_id *arg, int locked)
+{
+	int i;
+	struct dn_fsk *fs;
+
+	if (nfs->oid.len != sizeof(*nfs)) {
+		D("invalid flowset len %d", nfs->oid.len);
+		return NULL;
+	}
+	i = nfs->fs_nr;
+	if (i <= 0 || i >= 3*DN_MAX_ID)
+		return NULL;
+	ND("flowset %d", i);
+	/* XXX other sanity checks */
+        if (nfs->flags & DN_QSIZE_BYTES) {
+		ipdn_bound_var(&nfs->qsize, 16384,
+		    1500, dn_cfg.byte_limit, NULL); // "queue byte size");
+        } else {
+		ipdn_bound_var(&nfs->qsize, 50,
+		    1, dn_cfg.slot_limit, NULL); // "queue slot size");
+        }
+	if (nfs->flags & DN_HAVE_MASK) {
+		/* make sure we have some buckets */
+		ipdn_bound_var(&nfs->buckets, dn_cfg.hash_size,
+			1, dn_cfg.max_hash_size, "flowset buckets");
+	} else {
+		nfs->buckets = 1;	/* we only need 1 */
+	}
+	if (!locked)
+		DN_BH_WLOCK();
+	do { /* exit with break when done */
+	    struct dn_schk *s;
+	    int flags = nfs->sched_nr ? DNHT_INSERT : 0;
+	    int j;
+	    int oldc = dn_cfg.fsk_count;
+	    fs = dn_ht_find(dn_cfg.fshash, i, flags, NULL);
+	    if (fs == NULL) {
+		D("missing sched for flowset %d", i);
+	        break;
+	    }
+	    /* grab some defaults from the existing one */
+	    if (nfs->sched_nr == 0) /* reuse */
+		nfs->sched_nr = fs->fs.sched_nr;
+	    for (j = 0; j < sizeof(nfs->par)/sizeof(nfs->par[0]); j++) {
+		if (nfs->par[j] == -1) /* reuse */
+		    nfs->par[j] = fs->fs.par[j];
+	    }
+	    if (bcmp(&fs->fs, nfs, sizeof(*nfs)) == 0) {
+		ND("flowset %d unchanged", i);
+		break; /* no change, nothing to do */
+	    }
+	    if (oldc != dn_cfg.fsk_count)	/* new item */
+		dn_cfg.id++;
+	    s = locate_scheduler(nfs->sched_nr);
+	    /* detach from old scheduler if needed, preserving
+	     * queues if we need to reattach. Then update the
+	     * configuration, and possibly attach to the new sched.
+	     */
+	    DX(2, "fs %d changed sched %d@%p to %d@%p",
+		fs->fs.fs_nr,
+		fs->fs.sched_nr, fs->sched, nfs->sched_nr, s);
+	    if (fs->sched) {
+		int flags = s ? DN_DETACH : (DN_DETACH | DN_DESTROY);
+		flags |= DN_DESTROY; /* XXX temporary */
+		fsk_detach(fs, flags);
+	    }
+	    fs->fs = *nfs; /* copy configuration */
+	    if (s != NULL)
+		fsk_attach(fs, s);
+	} while (0);
+	if (!locked)
+		DN_BH_WUNLOCK();
+	return fs;
+}
+
+/*
+ * config/reconfig a scheduler and its FIFO variant.
+ * For !MULTIQUEUE schedulers, also set up the flowset.
+ *
+ * On reconfigurations (detected because s->fp is set),
+ * detach existing flowsets preserving traffic, preserve link,
+ * and delete the old scheduler creating a new one.
+ */
+static int
+config_sched(struct dn_sch *_nsch, struct dn_id *arg)
+{
+	struct dn_schk *s;
+	struct schk_new_arg a; /* argument for schk_new */
+	int i;
+	struct dn_link p;	/* copy of oldlink */
+	struct dn_profile *pf = NULL;	/* copy of old link profile */
+	/* Used to preserv mask parameter */
+	struct ipfw_flow_id new_mask;
+	int new_buckets = 0;
+	int new_flags = 0;
+	int pipe_cmd;
+	int err = ENOMEM;
+
+	a.sch = _nsch;
+	if (a.sch->oid.len != sizeof(*a.sch)) {
+		D("bad sched len %d", a.sch->oid.len);
+		return EINVAL;
+	}
+	i = a.sch->sched_nr;
+	if (i <= 0 || i >= DN_MAX_ID)
+		return EINVAL;
+	/* make sure we have some buckets */
+	if (a.sch->flags & DN_HAVE_MASK)
+		ipdn_bound_var(&a.sch->buckets, dn_cfg.hash_size,
+			1, dn_cfg.max_hash_size, "sched buckets");
+	/* XXX other sanity checks */
+	bzero(&p, sizeof(p));
+
+	pipe_cmd = a.sch->flags & DN_PIPE_CMD;
+	a.sch->flags &= ~DN_PIPE_CMD; //XXX do it even if is not set?
+	if (pipe_cmd) {
+		/* Copy mask parameter */
+		new_mask = a.sch->sched_mask;
+		new_buckets = a.sch->buckets;
+		new_flags = a.sch->flags;
+	}
+	DN_BH_WLOCK();
+again: /* run twice, for wfq and fifo */
+	/*
+	 * lookup the type. If not supplied, use the previous one
+	 * or default to WF2Q+. Otherwise, return an error.
+	 */
+	dn_cfg.id++;
+	a.fp = find_sched_type(a.sch->oid.subtype, a.sch->name);
+	if (a.fp != NULL) {
+		/* found. Lookup or create entry */
+		s = dn_ht_find(dn_cfg.schedhash, i, DNHT_INSERT, &a);
+	} else if (a.sch->oid.subtype == 0 && !a.sch->name[0]) {
+		/* No type. search existing s* or retry with WF2Q+ */
+		s = dn_ht_find(dn_cfg.schedhash, i, 0, &a);
+		if (s != NULL) {
+			a.fp = s->fp;
+			/* Scheduler exists, skip to FIFO scheduler 
+			 * if command was pipe config...
+			 */
+			if (pipe_cmd)
+				goto next;
+		} else {
+			/* New scheduler, create a wf2q+ with no mask
+			 * if command was pipe config...
+			 */
+			if (pipe_cmd) {
+				/* clear mask parameter */
+				bzero(&a.sch->sched_mask, sizeof(new_mask));
+				a.sch->buckets = 0;
+				a.sch->flags &= ~DN_HAVE_MASK;
+			}
+			a.sch->oid.subtype = DN_SCHED_WF2QP;
+			goto again;
+		}
+	} else {
+		D("invalid scheduler type %d %s",
+			a.sch->oid.subtype, a.sch->name);
+		err = EINVAL;
+		goto error;
+	}
+	/* normalize name and subtype */
+	a.sch->oid.subtype = a.fp->type;
+	bzero(a.sch->name, sizeof(a.sch->name));
+	strlcpy(a.sch->name, a.fp->name, sizeof(a.sch->name));
+	if (s == NULL) {
+		D("cannot allocate scheduler %d", i);
+		goto error;
+	}
+	/* restore existing link if any */
+	if (p.link_nr) {
+		s->link = p;
+		if (!pf || pf->link_nr != p.link_nr) { /* no saved value */
+			s->profile = NULL; /* XXX maybe not needed */
+		} else {
+			s->profile = malloc(sizeof(struct dn_profile),
+					     M_DUMMYNET, M_NOWAIT | M_ZERO);
+			if (s->profile == NULL) {
+				D("cannot allocate profile");
+				goto error; //XXX
+			}
+			bcopy(pf, s->profile, sizeof(*pf));
+		}
+	}
+	p.link_nr = 0;
+	if (s->fp == NULL) {
+		DX(2, "sched %d new type %s", i, a.fp->name);
+	} else if (s->fp != a.fp ||
+			bcmp(a.sch, &s->sch, sizeof(*a.sch)) ) {
+		/* already existing. */
+		DX(2, "sched %d type changed from %s to %s",
+			i, s->fp->name, a.fp->name);
+		DX(4, "   type/sub %d/%d -> %d/%d",
+			s->sch.oid.type, s->sch.oid.subtype, 
+			a.sch->oid.type, a.sch->oid.subtype);
+		if (s->link.link_nr == 0)
+			D("XXX WARNING link 0 for sched %d", i);
+		p = s->link;	/* preserve link */
+		if (s->profile) {/* preserve profile */
+			if (!pf)
+				pf = malloc(sizeof(*pf),
+				    M_DUMMYNET, M_NOWAIT | M_ZERO);
+			if (pf)	/* XXX should issue a warning otherwise */
+				bcopy(s->profile, pf, sizeof(*pf));
+		}
+		/* remove from the hash */
+		dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL);
+		/* Detach flowsets, preserve queues. */
+		// schk_delete_cb(s, NULL);
+		// XXX temporarily, kill queues
+		schk_delete_cb(s, (void *)DN_DESTROY);
+		goto again;
+	} else {
+		DX(4, "sched %d unchanged type %s", i, a.fp->name);
+	}
+	/* complete initialization */
+	s->sch = *a.sch;
+	s->fp = a.fp;
+	s->cfg = arg;
+	// XXX schk_reset_credit(s);
+	/* create the internal flowset if needed,
+	 * trying to reuse existing ones if available
+	 */
+	if (!(s->fp->flags & DN_MULTIQUEUE) && !s->fs) {
+	        s->fs = dn_ht_find(dn_cfg.fshash, i, 0, NULL);
+		if (!s->fs) {
+			struct dn_fs fs;
+			bzero(&fs, sizeof(fs));
+			set_oid(&fs.oid, DN_FS, sizeof(fs));
+			fs.fs_nr = i + DN_MAX_ID;
+			fs.sched_nr = i;
+			s->fs = config_fs(&fs, NULL, 1 /* locked */);
+		}
+		if (!s->fs) {
+			schk_delete_cb(s, (void *)DN_DESTROY);
+			D("error creating internal fs for %d", i);
+			goto error;
+		}
+	}
+	/* call init function after the flowset is created */
+	if (s->fp->config)
+		s->fp->config(s);
+	update_fs(s);
+next:
+	if (i < DN_MAX_ID) { /* now configure the FIFO instance */
+		i += DN_MAX_ID;
+		if (pipe_cmd) {
+			/* Restore mask parameter for FIFO */
+			a.sch->sched_mask = new_mask;
+			a.sch->buckets = new_buckets;
+			a.sch->flags = new_flags;
+		} else {
+			/* sched config shouldn't modify the FIFO scheduler */
+			if (dn_ht_find(dn_cfg.schedhash, i, 0, &a) != NULL) {
+				/* FIFO already exist, don't touch it */
+				err = 0; /* and this is not an error */
+				goto error;
+			}
+		}
+		a.sch->sched_nr = i;
+		a.sch->oid.subtype = DN_SCHED_FIFO;
+		bzero(a.sch->name, sizeof(a.sch->name));
+		goto again;
+	}
+	err = 0;
+error:
+	DN_BH_WUNLOCK();
+	if (pf)
+		free(pf, M_DUMMYNET);
+	return err;
+}
+
+/*
+ * attach a profile to a link
+ */
+static int
+config_profile(struct dn_profile *pf, struct dn_id *arg)
+{
+	struct dn_schk *s;
+	int i, olen, err = 0;
+
+	if (pf->oid.len < sizeof(*pf)) {
+		D("short profile len %d", pf->oid.len);
+		return EINVAL;
+	}
+	i = pf->link_nr;
+	if (i <= 0 || i >= DN_MAX_ID)
+		return EINVAL;
+	/* XXX other sanity checks */
+	DN_BH_WLOCK();
+	for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) {
+	s = locate_scheduler(i);
+
+	if (s == NULL) {
+			err = EINVAL;
+			break;
+	}
+	dn_cfg.id++;
+	/*
+	 * If we had a profile and the new one does not fit,
+	 * or it is deleted, then we need to free memory.
+	 */
+	if (s->profile && (pf->samples_no == 0 ||
+			s->profile->oid.len < pf->oid.len)) {
+		free(s->profile, M_DUMMYNET);
+		s->profile = NULL;
+	}
+		if (pf->samples_no == 0)
+			continue;
+	/*
+		 * new profile, possibly allocate memory
+	 * and copy data.
+	 */
+		if (s->profile == NULL)
+			s->profile = malloc(pf->oid.len,
+			    M_DUMMYNET, M_NOWAIT | M_ZERO);
+		if (s->profile == NULL) {
+			D("no memory for profile %d", i);
+			err = ENOMEM;
+			break;
+		}
+		/* preserve larger length XXX double check */
+		olen = s->profile->oid.len;
+		if (olen < pf->oid.len)
+			olen = pf->oid.len;
+		bcopy(pf, s->profile, pf->oid.len);
+		s->profile->oid.len = olen;
+	}
+	DN_BH_WUNLOCK();
+	return err;
+}
+
+/*
+ * Delete all objects:
+ */
+static void
+dummynet_flush(void)
+{
+
+	/* delete all schedulers and related links/queues/flowsets */
+	dn_ht_scan(dn_cfg.schedhash, schk_delete_cb,
+		(void *)(uintptr_t)DN_DELETE_FS);
+	/* delete all remaining (unlinked) flowsets */
+	DX(4, "still %d unlinked fs", dn_cfg.fsk_count);
+	dn_ht_free(dn_cfg.fshash, DNHT_REMOVE);
+	fsk_detach_list(&dn_cfg.fsu, DN_DELETE_FS);
+	/* Reinitialize system heap... */
+	heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id));
+}
+
+/*
+ * Main handler for configuration. We are guaranteed to be called
+ * with an oid which is at least a dn_id.
+ * - the first object is the command (config, delete, flush, ...)
+ * - config_link must be issued after the corresponding config_sched
+ * - parameters (DN_TXT) for an object must preceed the object
+ *   processed on a config_sched.
+ */
+int
+do_config(void *p, int l)
+{
+	struct dn_id *next, *o;
+	int err = 0, err2 = 0;
+	struct dn_id *arg = NULL;
+	uintptr_t *a;
+
+	o = p;
+	if (o->id != DN_API_VERSION) {
+		D("invalid api version got %d need %d",
+			o->id, DN_API_VERSION);
+		return EINVAL;
+	}
+	for (; l >= sizeof(*o); o = next) {
+		struct dn_id *prev = arg;
+		if (o->len < sizeof(*o) || l < o->len) {
+			D("bad len o->len %d len %d", o->len, l);
+			err = EINVAL;
+			break;
+		}
+		l -= o->len;
+		next = (struct dn_id *)((char *)o + o->len);
+		err = 0;
+		switch (o->type) {
+		default:
+			D("cmd %d not implemented", o->type);
+			break;
+#ifdef EMULATE_SYSCTL		
+		/* sysctl emulation.
+		 * if we recognize the command, jump to the correct
+		 * handler and return
+		 */
+		case DN_SYSCTL_SET:
+			err = kesysctl_emu_set(p, l);
+			return err;
+#endif
+		case DN_CMD_CONFIG: /* simply a header */
+			break;
+
+		case DN_CMD_DELETE:
+			/* the argument is in the first uintptr_t after o */
+			a = (uintptr_t *)(o+1);
+			if (o->len < sizeof(*o) + sizeof(*a)) {
+				err = EINVAL;
+				break;
+			}
+			switch (o->subtype) {
+			case DN_LINK:
+				/* delete base and derived schedulers */
+				DN_BH_WLOCK();
+				err = delete_schk(*a);
+				err2 = delete_schk(*a + DN_MAX_ID);
+				DN_BH_WUNLOCK();
+				if (!err)
+					err = err2;
+				break;
+
+			default:
+				D("invalid delete type %d",
+					o->subtype);
+				err = EINVAL;
+				break;
+
+			case DN_FS:
+				err = (*a <1 || *a >= DN_MAX_ID) ?
+					EINVAL : delete_fs(*a, 0) ;
+				break;
+			}
+			break;
+
+		case DN_CMD_FLUSH:
+			DN_BH_WLOCK();
+			dummynet_flush();
+			DN_BH_WUNLOCK();
+			break;
+		case DN_TEXT:	/* store argument the next block */
+			prev = NULL;
+			arg = o;
+			break;
+		case DN_LINK:
+			err = config_link((struct dn_link *)o, arg);
+			break;
+		case DN_PROFILE:
+			err = config_profile((struct dn_profile *)o, arg);
+			break;
+		case DN_SCH:
+			err = config_sched((struct dn_sch *)o, arg);
+			break;
+		case DN_FS:
+			err = (NULL==config_fs((struct dn_fs *)o, arg, 0));
+			break;
+		}
+		if (prev)
+			arg = NULL;
+		if (err != 0)
+			break;
+	}
+	return err;
+}
+
+static int
+compute_space(struct dn_id *cmd, struct copy_args *a)
+{
+	int x = 0, need = 0;
+	int profile_size = sizeof(struct dn_profile) - 
+		ED_MAX_SAMPLES_NO*sizeof(int);
+
+	/* NOTE about compute space:
+	 * NP 	= dn_cfg.schk_count
+	 * NSI 	= dn_cfg.si_count
+	 * NF 	= dn_cfg.fsk_count
+	 * NQ 	= dn_cfg.queue_count
+	 * - ipfw pipe show
+	 *   (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler
+	 *                             link, scheduler template, flowset
+	 *                             integrated in scheduler and header
+	 *                             for flowset list
+	 *   (NSI)*(dn_flow) all scheduler instance (includes
+	 *                              the queue instance)
+	 * - ipfw sched show
+	 *   (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler
+	 *                             link, scheduler template, flowset
+	 *                             integrated in scheduler and header
+	 *                             for flowset list
+	 *   (NSI * dn_flow) all scheduler instances
+	 *   (NF * sizeof(uint_32)) space for flowset list linked to scheduler
+	 *   (NQ * dn_queue) all queue [XXXfor now not listed]
+	 * - ipfw queue show
+	 *   (NF * dn_fs) all flowset
+	 *   (NQ * dn_queue) all queues
+	 */
+	switch (cmd->subtype) {
+	default:
+		return -1;
+	/* XXX where do LINK and SCH differ ? */
+	/* 'ipfw sched show' could list all queues associated to
+	 * a scheduler. This feature for now is disabled
+	 */
+	case DN_LINK:	/* pipe show */
+		x = DN_C_LINK | DN_C_SCH | DN_C_FLOW;
+		need += dn_cfg.schk_count *
+			(sizeof(struct dn_fs) + profile_size) / 2;
+		need += dn_cfg.fsk_count * sizeof(uint32_t);
+		break;
+	case DN_SCH:	/* sched show */
+		need += dn_cfg.schk_count *
+			(sizeof(struct dn_fs) + profile_size) / 2;
+		need += dn_cfg.fsk_count * sizeof(uint32_t);
+		x = DN_C_SCH | DN_C_LINK | DN_C_FLOW;
+		break;
+	case DN_FS:	/* queue show */
+		x = DN_C_FS | DN_C_QUEUE;
+		break;
+	case DN_GET_COMPAT:	/* compatibility mode */
+		need =  dn_compat_calc_size(dn_cfg); 
+		break;
+	}
+	a->flags = x;
+	if (x & DN_C_SCH) {
+		need += dn_cfg.schk_count * sizeof(struct dn_sch) / 2;
+		/* NOT also, each fs might be attached to a sched */
+		need += dn_cfg.schk_count * sizeof(struct dn_id) / 2;
+	}
+	if (x & DN_C_FS)
+		need += dn_cfg.fsk_count * sizeof(struct dn_fs);
+	if (x & DN_C_LINK) {
+		need += dn_cfg.schk_count * sizeof(struct dn_link) / 2;
+	}
+	/*
+	 * When exporting a queue to userland, only pass up the
+	 * struct dn_flow, which is the only visible part.
+	 */
+
+	if (x & DN_C_QUEUE)
+		need += dn_cfg.queue_count * sizeof(struct dn_flow);
+	if (x & DN_C_FLOW)
+		need += dn_cfg.si_count * (sizeof(struct dn_flow));
+	return need;
+}
+
+/*
+ * If compat != NULL dummynet_get is called in compatibility mode.
+ * *compat will be the pointer to the buffer to pass to ipfw
+ */
+int
+dummynet_get(struct sockopt *sopt, void **compat)
+{
+	int have, i, need, error;
+	char *start = NULL, *buf;
+	size_t sopt_valsize;
+	struct dn_id *cmd;
+	struct copy_args a;
+	struct copy_range r;
+	int l = sizeof(struct dn_id);
+
+	bzero(&a, sizeof(a));
+	bzero(&r, sizeof(r));
+
+	/* save and restore original sopt_valsize around copyin */
+	sopt_valsize = sopt->sopt_valsize;
+
+	cmd = &r.o;
+
+	if (!compat) {
+		/* copy at least an oid, and possibly a full object */
+		error = sooptcopyin(sopt, cmd, sizeof(r), sizeof(*cmd));
+		sopt->sopt_valsize = sopt_valsize;
+		if (error)
+			goto done;
+		l = cmd->len;
+#ifdef EMULATE_SYSCTL
+		/* sysctl emulation. */
+		if (cmd->type == DN_SYSCTL_GET)
+			return kesysctl_emu_get(sopt);
+#endif
+		if (l > sizeof(r)) {
+			/* request larger than default, allocate buffer */
+			cmd = malloc(l,  M_DUMMYNET, M_WAIT);
+			if (cmd == NULL)
+				return ENOMEM; //XXX
+			error = sooptcopyin(sopt, cmd, l, l);
+			sopt->sopt_valsize = sopt_valsize;
+			if (error)
+				goto done;
+		}
+	} else { /* compatibility */
+		error = 0;
+		cmd->type = DN_CMD_GET;
+		cmd->len = sizeof(struct dn_id);
+		cmd->subtype = DN_GET_COMPAT;
+		// cmd->id = sopt_valsize;
+		D("compatibility mode");
+	}
+	a.extra = (struct copy_range *)cmd;
+	if (cmd->len == sizeof(*cmd)) { /* no range, create a default */
+		uint32_t *rp = (uint32_t *)(cmd + 1);
+		cmd->len += 2* sizeof(uint32_t);
+		rp[0] = 1;
+		rp[1] = DN_MAX_ID - 1;
+		if (cmd->subtype == DN_LINK) {
+			rp[0] += DN_MAX_ID;
+			rp[1] += DN_MAX_ID;
+		}
+	}
+	/* Count space (under lock) and allocate (outside lock).
+	 * Exit with lock held if we manage to get enough buffer.
+	 * Try a few times then give up.
+	 */
+	for (have = 0, i = 0; i < 10; i++) {
+		DN_BH_WLOCK();
+		need = compute_space(cmd, &a);
+
+		/* if there is a range, ignore value from compute_space() */
+		if (l > sizeof(*cmd))
+			need = sopt_valsize - sizeof(*cmd);
+
+		if (need < 0) {
+			DN_BH_WUNLOCK();
+			error = EINVAL;
+			goto done;
+		}
+		need += sizeof(*cmd);
+		cmd->id = need;
+		if (have >= need)
+			break;
+
+		DN_BH_WUNLOCK();
+		if (start)
+			free(start, M_DUMMYNET);
+		start = NULL;
+		if (need > sopt_valsize)
+			break;
+
+		have = need;
+		start = malloc(have, M_DUMMYNET, M_WAITOK | M_ZERO);
+		if (start == NULL) {
+			error = ENOMEM;
+			goto done;
+		}
+	}
+
+	if (start == NULL) {
+		if (compat) {
+			*compat = NULL;
+			error =  1; // XXX
+		} else {
+			error = sooptcopyout(sopt, cmd, sizeof(*cmd));
+		}
+		goto done;
+	}
+	ND("have %d:%d sched %d, %d:%d links %d, %d:%d flowsets %d, "
+		"%d:%d si %d, %d:%d queues %d",
+		dn_cfg.schk_count, sizeof(struct dn_sch), DN_SCH,
+		dn_cfg.schk_count, sizeof(struct dn_link), DN_LINK,
+		dn_cfg.fsk_count, sizeof(struct dn_fs), DN_FS,
+		dn_cfg.si_count, sizeof(struct dn_flow), DN_SCH_I,
+		dn_cfg.queue_count, sizeof(struct dn_queue), DN_QUEUE);
+	sopt->sopt_valsize = sopt_valsize;
+	a.type = cmd->subtype;
+
+	if (compat == NULL) {
+		bcopy(cmd, start, sizeof(*cmd));
+		((struct dn_id*)(start))->len = sizeof(struct dn_id);
+		buf = start + sizeof(*cmd);
+	} else
+		buf = start;
+	a.start = &buf;
+	a.end = start + have;
+	/* start copying other objects */
+	if (compat) {
+		a.type = DN_COMPAT_PIPE;
+		dn_ht_scan(dn_cfg.schedhash, copy_data_helper_compat, &a);
+		a.type = DN_COMPAT_QUEUE;
+		dn_ht_scan(dn_cfg.fshash, copy_data_helper_compat, &a);
+	} else if (a.type == DN_FS) {
+		dn_ht_scan(dn_cfg.fshash, copy_data_helper, &a);
+	} else {
+		dn_ht_scan(dn_cfg.schedhash, copy_data_helper, &a);
+	}
+	DN_BH_WUNLOCK();
+
+	if (compat) {
+		*compat = start;
+		sopt->sopt_valsize = buf - start;
+		/* free() is done by ip_dummynet_compat() */
+		start = NULL; //XXX hack
+	} else {
+		error = sooptcopyout(sopt, start, buf - start);
+	}
+done:
+	if (cmd && cmd != &r.o)
+		free(cmd, M_DUMMYNET);
+	if (start)
+		free(start, M_DUMMYNET);
+	return error;
+}
+
+/* Callback called on scheduler instance to delete it if idle */
+static int
+drain_scheduler_cb(void *_si, void *arg)
+{
+	struct dn_sch_inst *si = _si;
+
+	if ((si->kflags & DN_ACTIVE) || si->dline.mq.head != NULL)
+		return 0;
+
+	if (si->sched->fp->flags & DN_MULTIQUEUE) {
+		if (si->q_count == 0)
+			return si_destroy(si, NULL);
+		else
+			return 0;
+	} else { /* !DN_MULTIQUEUE */
+		if ((si+1)->ni.length == 0)
+			return si_destroy(si, NULL);
+		else
+			return 0;
+	}
+	return 0; /* unreachable */
+}
+
+/* Callback called on scheduler to check if it has instances */
+static int
+drain_scheduler_sch_cb(void *_s, void *arg)
+{
+	struct dn_schk *s = _s;
+
+	if (s->sch.flags & DN_HAVE_MASK) {
+		dn_ht_scan_bucket(s->siht, &s->drain_bucket,
+				drain_scheduler_cb, NULL);
+		s->drain_bucket++;
+	} else {
+		if (s->siht) {
+			if (drain_scheduler_cb(s->siht, NULL) == DNHT_SCAN_DEL)
+				s->siht = NULL;
+		}
+	}
+	return 0;
+}
+
+/* Called every tick, try to delete a 'bucket' of scheduler */
+void
+dn_drain_scheduler(void)
+{
+	dn_ht_scan_bucket(dn_cfg.schedhash, &dn_cfg.drain_sch,
+			   drain_scheduler_sch_cb, NULL);
+	dn_cfg.drain_sch++;
+}
+
+/* Callback called on queue to delete if it is idle */
+static int
+drain_queue_cb(void *_q, void *arg)
+{
+	struct dn_queue *q = _q;
+
+	if (q->ni.length == 0) {
+		dn_delete_queue(q, DN_DESTROY);
+		return DNHT_SCAN_DEL; /* queue is deleted */
+	}
+
+	return 0; /* queue isn't deleted */
+}
+
+/* Callback called on flowset used to check if it has queues */
+static int
+drain_queue_fs_cb(void *_fs, void *arg)
+{
+	struct dn_fsk *fs = _fs;
+
+	if (fs->fs.flags & DN_QHT_HASH) {
+		/* Flowset has a hash table for queues */
+		dn_ht_scan_bucket(fs->qht, &fs->drain_bucket,
+				drain_queue_cb, NULL);
+		fs->drain_bucket++;
+	} else {
+		/* No hash table for this flowset, null the pointer 
+		 * if the queue is deleted
+		 */
+		if (fs->qht) {
+			if (drain_queue_cb(fs->qht, NULL) == DNHT_SCAN_DEL)
+				fs->qht = NULL;
+		}
+	}
+	return 0;
+}
+
+/* Called every tick, try to delete a 'bucket' of queue */
+void
+dn_drain_queue(void)
+{
+	/* scan a bucket of flowset */
+	dn_ht_scan_bucket(dn_cfg.fshash, &dn_cfg.drain_fs,
+                               drain_queue_fs_cb, NULL);
+	dn_cfg.drain_fs++;
+}
+
+/*
+ * Handler for the various dummynet socket options
+ */
+static int
+ip_dn_ctl(struct sockopt *sopt)
+{
+	void *p = NULL;
+	int error, l;
+
+	error = priv_check(sopt->sopt_td, PRIV_NETINET_DUMMYNET);
+	if (error)
+		return (error);
+
+	/* Disallow sets in really-really secure mode. */
+	if (sopt->sopt_dir == SOPT_SET) {
+		error =  securelevel_ge(sopt->sopt_td->td_ucred, 3);
+		if (error)
+			return (error);
+	}
+
+	switch (sopt->sopt_name) {
+	default :
+		D("dummynet: unknown option %d", sopt->sopt_name);
+		error = EINVAL;
+		break;
+
+	case IP_DUMMYNET_FLUSH:
+	case IP_DUMMYNET_CONFIGURE:
+	case IP_DUMMYNET_DEL:	/* remove a pipe or queue */
+	case IP_DUMMYNET_GET:
+		D("dummynet: compat option %d", sopt->sopt_name);
+		error = ip_dummynet_compat(sopt);
+		break;
+
+	case IP_DUMMYNET3 :
+		if (sopt->sopt_dir == SOPT_GET) {
+			error = dummynet_get(sopt, NULL);
+			break;
+		}
+		l = sopt->sopt_valsize;
+		if (l < sizeof(struct dn_id) || l > 12000) {
+			D("argument len %d invalid", l);
+			break;
+		}
+		p = malloc(l, M_TEMP, M_WAITOK); // XXX can it fail ?
+		error = sooptcopyin(sopt, p, l, l);
+		if (error)
+			break ;
+		error = do_config(p, l);
+		break;
+	}
+
+	if (p != NULL)
+		free(p, M_TEMP);
+
+	return error ;
+}
+
+
+static void
+ip_dn_init(void)
+{
+	static int init_done = 0;
+
+	if (init_done)
+		return;
+	init_done = 1;
+	if (bootverbose)
+		printf("DUMMYNET with IPv6 initialized (100131)\n");
+
+	/* Set defaults here. MSVC does not accept initializers,
+	 * and this is also useful for vimages
+	 */
+	/* queue limits */
+	dn_cfg.slot_limit = 100; /* Foot shooting limit for queues. */
+	dn_cfg.byte_limit = 1024 * 1024;
+	dn_cfg.expire = 1;
+
+	/* RED parameters */
+	dn_cfg.red_lookup_depth = 256;	/* default lookup table depth */
+	dn_cfg.red_avg_pkt_size = 512;	/* default medium packet size */
+	dn_cfg.red_max_pkt_size = 1500;	/* default max packet size */
+
+	/* hash tables */
+	dn_cfg.max_hash_size = 1024;	/* max in the hash tables */
+	dn_cfg.hash_size = 64;		/* default hash size */
+
+	/* create hash tables for schedulers and flowsets.
+	 * In both we search by key and by pointer.
+	 */
+	dn_cfg.schedhash = dn_ht_init(NULL, dn_cfg.hash_size,
+		offsetof(struct dn_schk, schk_next),
+		schk_hash, schk_match, schk_new);
+	dn_cfg.fshash = dn_ht_init(NULL, dn_cfg.hash_size,
+		offsetof(struct dn_fsk, fsk_next),
+		fsk_hash, fsk_match, fsk_new);
+
+	/* bucket index to drain object */
+	dn_cfg.drain_fs = 0;
+	dn_cfg.drain_sch = 0;
+
+	heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id));
+	SLIST_INIT(&dn_cfg.fsu);
+	SLIST_INIT(&dn_cfg.schedlist);
+
+	DN_LOCK_INIT();
+	ip_dn_ctl_ptr = ip_dn_ctl;
+	ip_dn_io_ptr = dummynet_io;
+
+	TASK_INIT(&dn_task, 0, dummynet_task, NULL);
+	dn_tq = taskqueue_create_fast("dummynet", M_NOWAIT,
+	    taskqueue_thread_enqueue, &dn_tq);
+	taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet");
+
+	callout_init(&dn_timeout, CALLOUT_MPSAFE);
+	callout_reset(&dn_timeout, 1, dummynet, NULL);
+
+	/* Initialize curr_time adjustment mechanics. */
+	getmicrouptime(&dn_cfg.prev_t);
+}
+
+#ifdef KLD_MODULE
+static void
+ip_dn_destroy(void)
+{
+	callout_drain(&dn_timeout);
+
+	DN_BH_WLOCK();
+	ip_dn_ctl_ptr = NULL;
+	ip_dn_io_ptr = NULL;
+
+	dummynet_flush();
+	DN_BH_WUNLOCK();
+	taskqueue_drain(dn_tq, &dn_task);
+	taskqueue_free(dn_tq);
+
+	dn_ht_free(dn_cfg.schedhash, 0);
+	dn_ht_free(dn_cfg.fshash, 0);
+	heap_free(&dn_cfg.evheap);
+
+	DN_LOCK_DESTROY();
+}
+#endif /* KLD_MODULE */
+
+static int
+dummynet_modevent(module_t mod, int type, void *data)
+{
+
+	if (type == MOD_LOAD) {
+		if (ip_dn_io_ptr) {
+			printf("DUMMYNET already loaded\n");
+			return EEXIST ;
+		}
+		ip_dn_init();
+		return 0;
+	} else if (type == MOD_UNLOAD) {
+#if !defined(KLD_MODULE)
+		printf("dummynet statically compiled, cannot unload\n");
+		return EINVAL ;
+#else
+		ip_dn_destroy();
+		return 0;
+#endif
+	} else
+		return EOPNOTSUPP;
+}
+
+/* modevent helpers for the modules */
+static int
+load_dn_sched(struct dn_alg *d)
+{
+	struct dn_alg *s;
+
+	if (d == NULL)
+		return 1; /* error */
+	ip_dn_init();	/* just in case, we need the lock */
+
+	/* Check that mandatory funcs exists */
+	if (d->enqueue == NULL || d->dequeue == NULL) {
+		D("missing enqueue or dequeue for %s", d->name);
+		return 1;
+	}
+
+	/* Search if scheduler already exists */
+	DN_BH_WLOCK();
+	SLIST_FOREACH(s, &dn_cfg.schedlist, next) {
+		if (strcmp(s->name, d->name) == 0) {
+			D("%s already loaded", d->name);
+			break; /* scheduler already exists */
+		}
+	}
+	if (s == NULL)
+		SLIST_INSERT_HEAD(&dn_cfg.schedlist, d, next);
+	DN_BH_WUNLOCK();
+	D("dn_sched %s %sloaded", d->name, s ? "not ":"");
+	return s ? 1 : 0;
+}
+
+static int
+unload_dn_sched(struct dn_alg *s)
+{
+	struct dn_alg *tmp, *r;
+	int err = EINVAL;
+
+	D("called for %s", s->name);
+
+	DN_BH_WLOCK();
+	SLIST_FOREACH_SAFE(r, &dn_cfg.schedlist, next, tmp) {
+		if (strcmp(s->name, r->name) != 0)
+			continue;
+		D("ref_count = %d", r->ref_count);
+		err = (r->ref_count != 0) ? EBUSY : 0;
+		if (err == 0)
+			SLIST_REMOVE(&dn_cfg.schedlist, r, dn_alg, next);
+		break;
+	}
+	DN_BH_WUNLOCK();
+	D("dn_sched %s %sunloaded", s->name, err ? "not ":"");
+	return err;
+}
+
+int
+dn_sched_modevent(module_t mod, int cmd, void *arg)
+{
+	struct dn_alg *sch = arg;
+
+	if (cmd == MOD_LOAD)
+		return load_dn_sched(sch);
+	else if (cmd == MOD_UNLOAD)
+		return unload_dn_sched(sch);
+	else
+		return EINVAL;
+}
+
+static moduledata_t dummynet_mod = {
+	"dummynet", dummynet_modevent, NULL
+};
+
+DECLARE_MODULE(dummynet, dummynet_mod,
+	SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY-1);
+MODULE_DEPEND(dummynet, ipfw, 2, 2, 2);
+MODULE_VERSION(dummynet, 1);
+/* end of file */
diff --git a/freebsd/sys/netinet/ipfw/ip_fw2.c b/freebsd/sys/netinet/ipfw/ip_fw2.c
new file mode 100644
index 00000000..682cced1
--- /dev/null
+++ b/freebsd/sys/netinet/ipfw/ip_fw2.c
@@ -0,0 +1,2495 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * The FreeBSD IP packet firewall, main file
+ */
+
+#if !defined(KLD_MODULE)
+#include <freebsd/local/opt_ipfw.h>
+#include <freebsd/local/opt_ipdivert.h>
+#include <freebsd/local/opt_ipdn.h>
+#include <freebsd/local/opt_inet.h>
+#ifndef INET
+#error IPFIREWALL requires INET.
+#endif /* INET */
+#endif
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_ipsec.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/condvar.h>
+#include <freebsd/sys/eventhandler.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/jail.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/rwlock.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/sys/ucred.h>
+#include <freebsd/net/ethernet.h> /* for ETHERTYPE_IP */
+#include <freebsd/net/if.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/pf_mtag.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_icmp.h>
+#include <freebsd/netinet/ip_fw.h>
+#include <freebsd/netinet/ipfw/ip_fw_private.h>
+#include <freebsd/netinet/ip_carp.h>
+#include <freebsd/netinet/pim.h>
+#include <freebsd/netinet/tcp_var.h>
+#include <freebsd/netinet/udp.h>
+#include <freebsd/netinet/udp_var.h>
+#include <freebsd/netinet/sctp.h>
+
+#include <freebsd/netinet/ip6.h>
+#include <freebsd/netinet/icmp6.h>
+#ifdef INET6
+#include <freebsd/netinet6/scope6_var.h>
+#include <freebsd/netinet6/ip6_var.h>
+#endif
+
+#include <freebsd/machine/in_cksum.h>	/* XXX for in_cksum */
+
+#ifdef MAC
+#include <freebsd/security/mac/mac_framework.h>
+#endif
+
+/*
+ * static variables followed by global ones.
+ * All ipfw global variables are here.
+ */
+
+/* ipfw_vnet_ready controls when we are open for business */
+static VNET_DEFINE(int, ipfw_vnet_ready) = 0;
+#define	V_ipfw_vnet_ready	VNET(ipfw_vnet_ready)
+
+static VNET_DEFINE(int, fw_deny_unknown_exthdrs);
+#define	V_fw_deny_unknown_exthdrs	VNET(fw_deny_unknown_exthdrs)
+
+#ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
+static int default_to_accept = 1;
+#else
+static int default_to_accept;
+#endif
+
+VNET_DEFINE(int, autoinc_step);
+
+/*
+ * Each rule belongs to one of 32 different sets (0..31).
+ * The variable set_disable contains one bit per set.
+ * If the bit is set, all rules in the corresponding set
+ * are disabled. Set RESVD_SET(31) is reserved for the default rule
+ * and rules that are not deleted by the flush command,
+ * and CANNOT be disabled.
+ * Rules in set RESVD_SET can only be deleted individually.
+ */
+VNET_DEFINE(u_int32_t, set_disable);
+#define	V_set_disable			VNET(set_disable)
+
+VNET_DEFINE(int, fw_verbose);
+/* counter for ipfw_log(NULL...) */
+VNET_DEFINE(u_int64_t, norule_counter);
+VNET_DEFINE(int, verbose_limit);
+
+/* layer3_chain contains the list of rules for layer 3 */
+VNET_DEFINE(struct ip_fw_chain, layer3_chain);
+
+ipfw_nat_t *ipfw_nat_ptr = NULL;
+struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
+ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
+ipfw_nat_cfg_t *ipfw_nat_del_ptr;
+ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
+ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
+
+#ifdef SYSCTL_NODE
+uint32_t dummy_def = IPFW_DEFAULT_RULE;
+uint32_t dummy_tables_max = IPFW_TABLES_MAX;
+
+SYSBEGIN(f3)
+
+SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
+SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
+    CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0,
+    "Only do a single pass through ipfw when using dummynet(4)");
+SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step,
+    CTLFLAG_RW, &VNET_NAME(autoinc_step), 0,
+    "Rule number auto-increment step");
+SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose,
+    CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0,
+    "Log matches to ipfw rules");
+SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit,
+    CTLFLAG_RW, &VNET_NAME(verbose_limit), 0,
+    "Set upper limit of matches of ipfw rules logged");
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD,
+    &dummy_def, 0,
+    "The default/max possible rule number.");
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, tables_max, CTLFLAG_RD,
+    &dummy_tables_max, 0,
+    "The maximum number of tables.");
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN,
+    &default_to_accept, 0,
+    "Make the default rule accept all packets.");
+TUNABLE_INT("net.inet.ip.fw.default_to_accept", &default_to_accept);
+SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, static_count,
+    CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0,
+    "Number of static rules");
+
+#ifdef INET6
+SYSCTL_DECL(_net_inet6_ip6);
+SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
+SYSCTL_VNET_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs,
+    CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_deny_unknown_exthdrs), 0,
+    "Deny packets with unknown IPv6 Extension Headers");
+#endif /* INET6 */
+
+SYSEND
+
+#endif /* SYSCTL_NODE */
+
+
+/*
+ * Some macros used in the various matching options.
+ * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T
+ * Other macros just cast void * into the appropriate type
+ */
+#define	L3HDR(T, ip)	((T *)((u_int32_t *)(ip) + (ip)->ip_hl))
+#define	TCP(p)		((struct tcphdr *)(p))
+#define	SCTP(p)		((struct sctphdr *)(p))
+#define	UDP(p)		((struct udphdr *)(p))
+#define	ICMP(p)		((struct icmphdr *)(p))
+#define	ICMP6(p)	((struct icmp6_hdr *)(p))
+
+static __inline int
+icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd)
+{
+	int type = icmp->icmp_type;
+
+	return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<<type)) );
+}
+
+#define TT	( (1 << ICMP_ECHO) | (1 << ICMP_ROUTERSOLICIT) | \
+    (1 << ICMP_TSTAMP) | (1 << ICMP_IREQ) | (1 << ICMP_MASKREQ) )
+
+static int
+is_icmp_query(struct icmphdr *icmp)
+{
+	int type = icmp->icmp_type;
+
+	return (type <= ICMP_MAXTYPE && (TT & (1<<type)) );
+}
+#undef TT
+
+/*
+ * The following checks use two arrays of 8 or 16 bits to store the
+ * bits that we want set or clear, respectively. They are in the
+ * low and high half of cmd->arg1 or cmd->d[0].
+ *
+ * We scan options and store the bits we find set. We succeed if
+ *
+ *	(want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear
+ *
+ * The code is sometimes optimized not to store additional variables.
+ */
+
+static int
+flags_match(ipfw_insn *cmd, u_int8_t bits)
+{
+	u_char want_clear;
+	bits = ~bits;
+
+	if ( ((cmd->arg1 & 0xff) & bits) != 0)
+		return 0; /* some bits we want set were clear */
+	want_clear = (cmd->arg1 >> 8) & 0xff;
+	if ( (want_clear & bits) != want_clear)
+		return 0; /* some bits we want clear were set */
+	return 1;
+}
+
+static int
+ipopts_match(struct ip *ip, ipfw_insn *cmd)
+{
+	int optlen, bits = 0;
+	u_char *cp = (u_char *)(ip + 1);
+	int x = (ip->ip_hl << 2) - sizeof (struct ip);
+
+	for (; x > 0; x -= optlen, cp += optlen) {
+		int opt = cp[IPOPT_OPTVAL];
+
+		if (opt == IPOPT_EOL)
+			break;
+		if (opt == IPOPT_NOP)
+			optlen = 1;
+		else {
+			optlen = cp[IPOPT_OLEN];
+			if (optlen <= 0 || optlen > x)
+				return 0; /* invalid or truncated */
+		}
+		switch (opt) {
+
+		default:
+			break;
+
+		case IPOPT_LSRR:
+			bits |= IP_FW_IPOPT_LSRR;
+			break;
+
+		case IPOPT_SSRR:
+			bits |= IP_FW_IPOPT_SSRR;
+			break;
+
+		case IPOPT_RR:
+			bits |= IP_FW_IPOPT_RR;
+			break;
+
+		case IPOPT_TS:
+			bits |= IP_FW_IPOPT_TS;
+			break;
+		}
+	}
+	return (flags_match(cmd, bits));
+}
+
+static int
+tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd)
+{
+	int optlen, bits = 0;
+	u_char *cp = (u_char *)(tcp + 1);
+	int x = (tcp->th_off << 2) - sizeof(struct tcphdr);
+
+	for (; x > 0; x -= optlen, cp += optlen) {
+		int opt = cp[0];
+		if (opt == TCPOPT_EOL)
+			break;
+		if (opt == TCPOPT_NOP)
+			optlen = 1;
+		else {
+			optlen = cp[1];
+			if (optlen <= 0)
+				break;
+		}
+
+		switch (opt) {
+
+		default:
+			break;
+
+		case TCPOPT_MAXSEG:
+			bits |= IP_FW_TCPOPT_MSS;
+			break;
+
+		case TCPOPT_WINDOW:
+			bits |= IP_FW_TCPOPT_WINDOW;
+			break;
+
+		case TCPOPT_SACK_PERMITTED:
+		case TCPOPT_SACK:
+			bits |= IP_FW_TCPOPT_SACK;
+			break;
+
+		case TCPOPT_TIMESTAMP:
+			bits |= IP_FW_TCPOPT_TS;
+			break;
+
+		}
+	}
+	return (flags_match(cmd, bits));
+}
+
+static int
+iface_match(struct ifnet *ifp, ipfw_insn_if *cmd)
+{
+	if (ifp == NULL)	/* no iface with this packet, match fails */
+		return 0;
+	/* Check by name or by IP address */
+	if (cmd->name[0] != '\0') { /* match by name */
+		/* Check name */
+		if (cmd->p.glob) {
+			if (fnmatch(cmd->name, ifp->if_xname, 0) == 0)
+				return(1);
+		} else {
+			if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0)
+				return(1);
+		}
+	} else {
+#ifdef	__FreeBSD__	/* and OSX too ? */
+		struct ifaddr *ia;
+
+		if_addr_rlock(ifp);
+		TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
+			if (ia->ifa_addr->sa_family != AF_INET)
+				continue;
+			if (cmd->p.ip.s_addr == ((struct sockaddr_in *)
+			    (ia->ifa_addr))->sin_addr.s_addr) {
+				if_addr_runlock(ifp);
+				return(1);	/* match */
+			}
+		}
+		if_addr_runlock(ifp);
+#endif /* __FreeBSD__ */
+	}
+	return(0);	/* no match, fail ... */
+}
+
+/*
+ * The verify_path function checks if a route to the src exists and
+ * if it is reachable via ifp (when provided).
+ * 
+ * The 'verrevpath' option checks that the interface that an IP packet
+ * arrives on is the same interface that traffic destined for the
+ * packet's source address would be routed out of.
+ * The 'versrcreach' option just checks that the source address is
+ * reachable via any route (except default) in the routing table.
+ * These two are a measure to block forged packets. This is also
+ * commonly known as "anti-spoofing" or Unicast Reverse Path
+ * Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs
+ * is purposely reminiscent of the Cisco IOS command,
+ *
+ *   ip verify unicast reverse-path
+ *   ip verify unicast source reachable-via any
+ *
+ * which implements the same functionality. But note that the syntax
+ * is misleading, and the check may be performed on all IP packets
+ * whether unicast, multicast, or broadcast.
+ */
+static int
+verify_path(struct in_addr src, struct ifnet *ifp, u_int fib)
+{
+#ifndef __FreeBSD__
+	return 0;
+#else
+	struct route ro;
+	struct sockaddr_in *dst;
+
+	bzero(&ro, sizeof(ro));
+
+	dst = (struct sockaddr_in *)&(ro.ro_dst);
+	dst->sin_family = AF_INET;
+	dst->sin_len = sizeof(*dst);
+	dst->sin_addr = src;
+	in_rtalloc_ign(&ro, 0, fib);
+
+	if (ro.ro_rt == NULL)
+		return 0;
+
+	/*
+	 * If ifp is provided, check for equality with rtentry.
+	 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
+	 * in order to pass packets injected back by if_simloop():
+	 * if useloopback == 1 routing entry (via lo0) for our own address
+	 * may exist, so we need to handle routing assymetry.
+	 */
+	if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
+		RTFREE(ro.ro_rt);
+		return 0;
+	}
+
+	/* if no ifp provided, check if rtentry is not default route */
+	if (ifp == NULL &&
+	     satosin(rt_key(ro.ro_rt))->sin_addr.s_addr == INADDR_ANY) {
+		RTFREE(ro.ro_rt);
+		return 0;
+	}
+
+	/* or if this is a blackhole/reject route */
+	if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
+		RTFREE(ro.ro_rt);
+		return 0;
+	}
+
+	/* found valid route */
+	RTFREE(ro.ro_rt);
+	return 1;
+#endif /* __FreeBSD__ */
+}
+
+#ifdef INET6
+/*
+ * ipv6 specific rules here...
+ */
+static __inline int
+icmp6type_match (int type, ipfw_insn_u32 *cmd)
+{
+	return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) );
+}
+
+static int
+flow6id_match( int curr_flow, ipfw_insn_u32 *cmd )
+{
+	int i;
+	for (i=0; i <= cmd->o.arg1; ++i )
+		if (curr_flow == cmd->d[i] )
+			return 1;
+	return 0;
+}
+
+/* support for IP6_*_ME opcodes */
+static int
+search_ip6_addr_net (struct in6_addr * ip6_addr)
+{
+	struct ifnet *mdc;
+	struct ifaddr *mdc2;
+	struct in6_ifaddr *fdm;
+	struct in6_addr copia;
+
+	TAILQ_FOREACH(mdc, &V_ifnet, if_link) {
+		if_addr_rlock(mdc);
+		TAILQ_FOREACH(mdc2, &mdc->if_addrhead, ifa_link) {
+			if (mdc2->ifa_addr->sa_family == AF_INET6) {
+				fdm = (struct in6_ifaddr *)mdc2;
+				copia = fdm->ia_addr.sin6_addr;
+				/* need for leaving scope_id in the sock_addr */
+				in6_clearscope(&copia);
+				if (IN6_ARE_ADDR_EQUAL(ip6_addr, &copia)) {
+					if_addr_runlock(mdc);
+					return 1;
+				}
+			}
+		}
+		if_addr_runlock(mdc);
+	}
+	return 0;
+}
+
+static int
+verify_path6(struct in6_addr *src, struct ifnet *ifp)
+{
+	struct route_in6 ro;
+	struct sockaddr_in6 *dst;
+
+	bzero(&ro, sizeof(ro));
+
+	dst = (struct sockaddr_in6 * )&(ro.ro_dst);
+	dst->sin6_family = AF_INET6;
+	dst->sin6_len = sizeof(*dst);
+	dst->sin6_addr = *src;
+	/* XXX MRT 0 for ipv6 at this time */
+	rtalloc_ign((struct route *)&ro, 0);
+
+	if (ro.ro_rt == NULL)
+		return 0;
+
+	/* 
+	 * if ifp is provided, check for equality with rtentry
+	 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
+	 * to support the case of sending packets to an address of our own.
+	 * (where the former interface is the first argument of if_simloop()
+	 *  (=ifp), the latter is lo0)
+	 */
+	if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
+		RTFREE(ro.ro_rt);
+		return 0;
+	}
+
+	/* if no ifp provided, check if rtentry is not default route */
+	if (ifp == NULL &&
+	    IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(ro.ro_rt))->sin6_addr)) {
+		RTFREE(ro.ro_rt);
+		return 0;
+	}
+
+	/* or if this is a blackhole/reject route */
+	if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
+		RTFREE(ro.ro_rt);
+		return 0;
+	}
+
+	/* found valid route */
+	RTFREE(ro.ro_rt);
+	return 1;
+
+}
+
+static int
+is_icmp6_query(int icmp6_type)
+{
+	if ((icmp6_type <= ICMP6_MAXTYPE) &&
+	    (icmp6_type == ICMP6_ECHO_REQUEST ||
+	    icmp6_type == ICMP6_MEMBERSHIP_QUERY ||
+	    icmp6_type == ICMP6_WRUREQUEST ||
+	    icmp6_type == ICMP6_FQDN_QUERY ||
+	    icmp6_type == ICMP6_NI_QUERY))
+		return (1);
+
+	return (0);
+}
+
+static void
+send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6)
+{
+	struct mbuf *m;
+
+	m = args->m;
+	if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) {
+		struct tcphdr *tcp;
+		tcp = (struct tcphdr *)((char *)ip6 + hlen);
+
+		if ((tcp->th_flags & TH_RST) == 0) {
+			struct mbuf *m0;
+			m0 = ipfw_send_pkt(args->m, &(args->f_id),
+			    ntohl(tcp->th_seq), ntohl(tcp->th_ack),
+			    tcp->th_flags | TH_RST);
+			if (m0 != NULL)
+				ip6_output(m0, NULL, NULL, 0, NULL, NULL,
+				    NULL);
+		}
+		FREE_PKT(m);
+	} else if (code != ICMP6_UNREACH_RST) { /* Send an ICMPv6 unreach. */
+#if 0
+		/*
+		 * Unlike above, the mbufs need to line up with the ip6 hdr,
+		 * as the contents are read. We need to m_adj() the
+		 * needed amount.
+		 * The mbuf will however be thrown away so we can adjust it.
+		 * Remember we did an m_pullup on it already so we
+		 * can make some assumptions about contiguousness.
+		 */
+		if (args->L3offset)
+			m_adj(m, args->L3offset);
+#endif
+		icmp6_error(m, ICMP6_DST_UNREACH, code, 0);
+	} else
+		FREE_PKT(m);
+
+	args->m = NULL;
+}
+
+#endif /* INET6 */
+
+
+/*
+ * sends a reject message, consuming the mbuf passed as an argument.
+ */
+static void
+send_reject(struct ip_fw_args *args, int code, int iplen, struct ip *ip)
+{
+
+#if 0
+	/* XXX When ip is not guaranteed to be at mtod() we will
+	 * need to account for this */
+	 * The mbuf will however be thrown away so we can adjust it.
+	 * Remember we did an m_pullup on it already so we
+	 * can make some assumptions about contiguousness.
+	 */
+	if (args->L3offset)
+		m_adj(m, args->L3offset);
+#endif
+	if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */
+		/* We need the IP header in host order for icmp_error(). */
+		SET_HOST_IPLEN(ip);
+		icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
+	} else if (args->f_id.proto == IPPROTO_TCP) {
+		struct tcphdr *const tcp =
+		    L3HDR(struct tcphdr, mtod(args->m, struct ip *));
+		if ( (tcp->th_flags & TH_RST) == 0) {
+			struct mbuf *m;
+			m = ipfw_send_pkt(args->m, &(args->f_id),
+				ntohl(tcp->th_seq), ntohl(tcp->th_ack),
+				tcp->th_flags | TH_RST);
+			if (m != NULL)
+				ip_output(m, NULL, NULL, 0, NULL, NULL);
+		}
+		FREE_PKT(args->m);
+	} else
+		FREE_PKT(args->m);
+	args->m = NULL;
+}
+
+/*
+ * Support for uid/gid/jail lookup. These tests are expensive
+ * (because we may need to look into the list of active sockets)
+ * so we cache the results. ugid_lookupp is 0 if we have not
+ * yet done a lookup, 1 if we succeeded, and -1 if we tried
+ * and failed. The function always returns the match value.
+ * We could actually spare the variable and use *uc, setting
+ * it to '(void *)check_uidgid if we have no info, NULL if
+ * we tried and failed, or any other value if successful.
+ */
+static int
+check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif,
+    struct in_addr dst_ip, u_int16_t dst_port, struct in_addr src_ip,
+    u_int16_t src_port, int *ugid_lookupp,
+    struct ucred **uc, struct inpcb *inp)
+{
+#ifndef __FreeBSD__
+	return cred_check(insn, proto, oif,
+	    dst_ip, dst_port, src_ip, src_port,
+	    (struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb);
+#else  /* FreeBSD */
+	struct inpcbinfo *pi;
+	int wildcard;
+	struct inpcb *pcb;
+	int match;
+
+	/*
+	 * Check to see if the UDP or TCP stack supplied us with
+	 * the PCB. If so, rather then holding a lock and looking
+	 * up the PCB, we can use the one that was supplied.
+	 */
+	if (inp && *ugid_lookupp == 0) {
+		INP_LOCK_ASSERT(inp);
+		if (inp->inp_socket != NULL) {
+			*uc = crhold(inp->inp_cred);
+			*ugid_lookupp = 1;
+		} else
+			*ugid_lookupp = -1;
+	}
+	/*
+	 * If we have already been here and the packet has no
+	 * PCB entry associated with it, then we can safely
+	 * assume that this is a no match.
+	 */
+	if (*ugid_lookupp == -1)
+		return (0);
+	if (proto == IPPROTO_TCP) {
+		wildcard = 0;
+		pi = &V_tcbinfo;
+	} else if (proto == IPPROTO_UDP) {
+		wildcard = INPLOOKUP_WILDCARD;
+		pi = &V_udbinfo;
+	} else
+		return 0;
+	match = 0;
+	if (*ugid_lookupp == 0) {
+		INP_INFO_RLOCK(pi);
+		pcb =  (oif) ?
+			in_pcblookup_hash(pi,
+				dst_ip, htons(dst_port),
+				src_ip, htons(src_port),
+				wildcard, oif) :
+			in_pcblookup_hash(pi,
+				src_ip, htons(src_port),
+				dst_ip, htons(dst_port),
+				wildcard, NULL);
+		if (pcb != NULL) {
+			*uc = crhold(pcb->inp_cred);
+			*ugid_lookupp = 1;
+		}
+		INP_INFO_RUNLOCK(pi);
+		if (*ugid_lookupp == 0) {
+			/*
+			 * We tried and failed, set the variable to -1
+			 * so we will not try again on this packet.
+			 */
+			*ugid_lookupp = -1;
+			return (0);
+		}
+	} 
+	if (insn->o.opcode == O_UID)
+		match = ((*uc)->cr_uid == (uid_t)insn->d[0]);
+	else if (insn->o.opcode == O_GID)
+		match = groupmember((gid_t)insn->d[0], *uc);
+	else if (insn->o.opcode == O_JAIL)
+		match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]);
+	return match;
+#endif /* __FreeBSD__ */
+}
+
+/*
+ * Helper function to set args with info on the rule after the matching
+ * one. slot is precise, whereas we guess rule_id as they are
+ * assigned sequentially.
+ */
+static inline void
+set_match(struct ip_fw_args *args, int slot,
+	struct ip_fw_chain *chain)
+{
+	args->rule.chain_id = chain->id;
+	args->rule.slot = slot + 1; /* we use 0 as a marker */
+	args->rule.rule_id = 1 + chain->map[slot]->id;
+	args->rule.rulenum = chain->map[slot]->rulenum;
+}
+
+/*
+ * The main check routine for the firewall.
+ *
+ * All arguments are in args so we can modify them and return them
+ * back to the caller.
+ *
+ * Parameters:
+ *
+ *	args->m	(in/out) The packet; we set to NULL when/if we nuke it.
+ *		Starts with the IP header.
+ *	args->eh (in)	Mac header if present, NULL for layer3 packet.
+ *	args->L3offset	Number of bytes bypassed if we came from L2.
+ *			e.g. often sizeof(eh)  ** NOTYET **
+ *	args->oif	Outgoing interface, NULL if packet is incoming.
+ *		The incoming interface is in the mbuf. (in)
+ *	args->divert_rule (in/out)
+ *		Skip up to the first rule past this rule number;
+ *		upon return, non-zero port number for divert or tee.
+ *
+ *	args->rule	Pointer to the last matching rule (in/out)
+ *	args->next_hop	Socket we are forwarding to (out).
+ *	args->f_id	Addresses grabbed from the packet (out)
+ * 	args->rule.info	a cookie depending on rule action
+ *
+ * Return value:
+ *
+ *	IP_FW_PASS	the packet must be accepted
+ *	IP_FW_DENY	the packet must be dropped
+ *	IP_FW_DIVERT	divert packet, port in m_tag
+ *	IP_FW_TEE	tee packet, port in m_tag
+ *	IP_FW_DUMMYNET	to dummynet, pipe in args->cookie
+ *	IP_FW_NETGRAPH	into netgraph, cookie args->cookie
+ *		args->rule contains the matching rule,
+ *		args->rule.info has additional information.
+ *
+ */
+int
+ipfw_chk(struct ip_fw_args *args)
+{
+
+	/*
+	 * Local variables holding state while processing a packet:
+	 *
+	 * IMPORTANT NOTE: to speed up the processing of rules, there
+	 * are some assumption on the values of the variables, which
+	 * are documented here. Should you change them, please check
+	 * the implementation of the various instructions to make sure
+	 * that they still work.
+	 *
+	 * args->eh	The MAC header. It is non-null for a layer2
+	 *	packet, it is NULL for a layer-3 packet.
+	 * **notyet**
+	 * args->L3offset Offset in the packet to the L3 (IP or equiv.) header.
+	 *
+	 * m | args->m	Pointer to the mbuf, as received from the caller.
+	 *	It may change if ipfw_chk() does an m_pullup, or if it
+	 *	consumes the packet because it calls send_reject().
+	 *	XXX This has to change, so that ipfw_chk() never modifies
+	 *	or consumes the buffer.
+	 * ip	is the beginning of the ip(4 or 6) header.
+	 *	Calculated by adding the L3offset to the start of data.
+	 *	(Until we start using L3offset, the packet is
+	 *	supposed to start with the ip header).
+	 */
+	struct mbuf *m = args->m;
+	struct ip *ip = mtod(m, struct ip *);
+
+	/*
+	 * For rules which contain uid/gid or jail constraints, cache
+	 * a copy of the users credentials after the pcb lookup has been
+	 * executed. This will speed up the processing of rules with
+	 * these types of constraints, as well as decrease contention
+	 * on pcb related locks.
+	 */
+#ifndef __FreeBSD__
+	struct bsd_ucred ucred_cache;
+#else
+	struct ucred *ucred_cache = NULL;
+#endif
+	int ucred_lookup = 0;
+
+	/*
+	 * oif | args->oif	If NULL, ipfw_chk has been called on the
+	 *	inbound path (ether_input, ip_input).
+	 *	If non-NULL, ipfw_chk has been called on the outbound path
+	 *	(ether_output, ip_output).
+	 */
+	struct ifnet *oif = args->oif;
+
+	int f_pos = 0;		/* index of current rule in the array */
+	int retval = 0;
+
+	/*
+	 * hlen	The length of the IP header.
+	 */
+	u_int hlen = 0;		/* hlen >0 means we have an IP pkt */
+
+	/*
+	 * offset	The offset of a fragment. offset != 0 means that
+	 *	we have a fragment at this offset of an IPv4 packet.
+	 *	offset == 0 means that (if this is an IPv4 packet)
+	 *	this is the first or only fragment.
+	 *	For IPv6 offset == 0 means there is no Fragment Header. 
+	 *	If offset != 0 for IPv6 always use correct mask to
+	 *	get the correct offset because we add IP6F_MORE_FRAG
+	 *	to be able to dectect the first fragment which would
+	 *	otherwise have offset = 0.
+	 */
+	u_short offset = 0;
+
+	/*
+	 * Local copies of addresses. They are only valid if we have
+	 * an IP packet.
+	 *
+	 * proto	The protocol. Set to 0 for non-ip packets,
+	 *	or to the protocol read from the packet otherwise.
+	 *	proto != 0 means that we have an IPv4 packet.
+	 *
+	 * src_port, dst_port	port numbers, in HOST format. Only
+	 *	valid for TCP and UDP packets.
+	 *
+	 * src_ip, dst_ip	ip addresses, in NETWORK format.
+	 *	Only valid for IPv4 packets.
+	 */
+	uint8_t proto;
+	uint16_t src_port = 0, dst_port = 0;	/* NOTE: host format	*/
+	struct in_addr src_ip, dst_ip;		/* NOTE: network format	*/
+	uint16_t iplen=0;
+	int pktlen;
+	uint16_t	etype = 0;	/* Host order stored ether type */
+
+	/*
+	 * dyn_dir = MATCH_UNKNOWN when rules unchecked,
+	 * 	MATCH_NONE when checked and not matched (q = NULL),
+	 *	MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL)
+	 */
+	int dyn_dir = MATCH_UNKNOWN;
+	ipfw_dyn_rule *q = NULL;
+	struct ip_fw_chain *chain = &V_layer3_chain;
+
+	/*
+	 * We store in ulp a pointer to the upper layer protocol header.
+	 * In the ipv4 case this is easy to determine from the header,
+	 * but for ipv6 we might have some additional headers in the middle.
+	 * ulp is NULL if not found.
+	 */
+	void *ulp = NULL;		/* upper layer protocol pointer. */
+
+	/* XXX ipv6 variables */
+	int is_ipv6 = 0;
+	uint8_t	icmp6_type = 0;
+	uint16_t ext_hd = 0;	/* bits vector for extension header filtering */
+	/* end of ipv6 variables */
+
+	int is_ipv4 = 0;
+
+	int done = 0;		/* flag to exit the outer loop */
+
+	if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready))
+		return (IP_FW_PASS);	/* accept */
+
+	dst_ip.s_addr = 0;		/* make sure it is initialized */
+	src_ip.s_addr = 0;		/* make sure it is initialized */
+	pktlen = m->m_pkthdr.len;
+	args->f_id.fib = M_GETFIB(m); /* note mbuf not altered) */
+	proto = args->f_id.proto = 0;	/* mark f_id invalid */
+		/* XXX 0 is a valid proto: IP/IPv6 Hop-by-Hop Option */
+
+/*
+ * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous,
+ * then it sets p to point at the offset "len" in the mbuf. WARNING: the
+ * pointer might become stale after other pullups (but we never use it
+ * this way).
+ */
+#define PULLUP_TO(_len, p, T)					\
+do {								\
+	int x = (_len) + sizeof(T);				\
+	if ((m)->m_len < x) {					\
+		args->m = m = m_pullup(m, x);			\
+		if (m == NULL)					\
+			goto pullup_failed;			\
+	}							\
+	p = (mtod(m, char *) + (_len));				\
+} while (0)
+
+	/*
+	 * if we have an ether header,
+	 */
+	if (args->eh)
+		etype = ntohs(args->eh->ether_type);
+
+	/* Identify IP packets and fill up variables. */
+	if (pktlen >= sizeof(struct ip6_hdr) &&
+	    (args->eh == NULL || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) {
+		struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
+		is_ipv6 = 1;
+		args->f_id.addr_type = 6;
+		hlen = sizeof(struct ip6_hdr);
+		proto = ip6->ip6_nxt;
+
+		/* Search extension headers to find upper layer protocols */
+		while (ulp == NULL) {
+			switch (proto) {
+			case IPPROTO_ICMPV6:
+				PULLUP_TO(hlen, ulp, struct icmp6_hdr);
+				icmp6_type = ICMP6(ulp)->icmp6_type;
+				break;
+
+			case IPPROTO_TCP:
+				PULLUP_TO(hlen, ulp, struct tcphdr);
+				dst_port = TCP(ulp)->th_dport;
+				src_port = TCP(ulp)->th_sport;
+				/* save flags for dynamic rules */
+				args->f_id._flags = TCP(ulp)->th_flags;
+				break;
+
+			case IPPROTO_SCTP:
+				PULLUP_TO(hlen, ulp, struct sctphdr);
+				src_port = SCTP(ulp)->src_port;
+				dst_port = SCTP(ulp)->dest_port;
+				break;
+
+			case IPPROTO_UDP:
+				PULLUP_TO(hlen, ulp, struct udphdr);
+				dst_port = UDP(ulp)->uh_dport;
+				src_port = UDP(ulp)->uh_sport;
+				break;
+
+			case IPPROTO_HOPOPTS:	/* RFC 2460 */
+				PULLUP_TO(hlen, ulp, struct ip6_hbh);
+				ext_hd |= EXT_HOPOPTS;
+				hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
+				proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
+				ulp = NULL;
+				break;
+
+			case IPPROTO_ROUTING:	/* RFC 2460 */
+				PULLUP_TO(hlen, ulp, struct ip6_rthdr);
+				switch (((struct ip6_rthdr *)ulp)->ip6r_type) {
+				case 0:
+					ext_hd |= EXT_RTHDR0;
+					break;
+				case 2:
+					ext_hd |= EXT_RTHDR2;
+					break;
+				default:
+					printf("IPFW2: IPV6 - Unknown Routing "
+					    "Header type(%d)\n",
+					    ((struct ip6_rthdr *)ulp)->ip6r_type);
+					if (V_fw_deny_unknown_exthdrs)
+					    return (IP_FW_DENY);
+					break;
+				}
+				ext_hd |= EXT_ROUTING;
+				hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
+				proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
+				ulp = NULL;
+				break;
+
+			case IPPROTO_FRAGMENT:	/* RFC 2460 */
+				PULLUP_TO(hlen, ulp, struct ip6_frag);
+				ext_hd |= EXT_FRAGMENT;
+				hlen += sizeof (struct ip6_frag);
+				proto = ((struct ip6_frag *)ulp)->ip6f_nxt;
+				offset = ((struct ip6_frag *)ulp)->ip6f_offlg &
+					IP6F_OFF_MASK;
+				/* Add IP6F_MORE_FRAG for offset of first
+				 * fragment to be != 0. */
+				offset |= ((struct ip6_frag *)ulp)->ip6f_offlg &
+					IP6F_MORE_FRAG;
+				if (offset == 0) {
+					printf("IPFW2: IPV6 - Invalid Fragment "
+					    "Header\n");
+					if (V_fw_deny_unknown_exthdrs)
+					    return (IP_FW_DENY);
+					break;
+				}
+				args->f_id.extra =
+				    ntohl(((struct ip6_frag *)ulp)->ip6f_ident);
+				ulp = NULL;
+				break;
+
+			case IPPROTO_DSTOPTS:	/* RFC 2460 */
+				PULLUP_TO(hlen, ulp, struct ip6_hbh);
+				ext_hd |= EXT_DSTOPTS;
+				hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
+				proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
+				ulp = NULL;
+				break;
+
+			case IPPROTO_AH:	/* RFC 2402 */
+				PULLUP_TO(hlen, ulp, struct ip6_ext);
+				ext_hd |= EXT_AH;
+				hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2;
+				proto = ((struct ip6_ext *)ulp)->ip6e_nxt;
+				ulp = NULL;
+				break;
+
+			case IPPROTO_ESP:	/* RFC 2406 */
+				PULLUP_TO(hlen, ulp, uint32_t);	/* SPI, Seq# */
+				/* Anything past Seq# is variable length and
+				 * data past this ext. header is encrypted. */
+				ext_hd |= EXT_ESP;
+				break;
+
+			case IPPROTO_NONE:	/* RFC 2460 */
+				/*
+				 * Packet ends here, and IPv6 header has
+				 * already been pulled up. If ip6e_len!=0
+				 * then octets must be ignored.
+				 */
+				ulp = ip; /* non-NULL to get out of loop. */
+				break;
+
+			case IPPROTO_OSPFIGP:
+				/* XXX OSPF header check? */
+				PULLUP_TO(hlen, ulp, struct ip6_ext);
+				break;
+
+			case IPPROTO_PIM:
+				/* XXX PIM header check? */
+				PULLUP_TO(hlen, ulp, struct pim);
+				break;
+
+			case IPPROTO_CARP:
+				PULLUP_TO(hlen, ulp, struct carp_header);
+				if (((struct carp_header *)ulp)->carp_version !=
+				    CARP_VERSION) 
+					return (IP_FW_DENY);
+				if (((struct carp_header *)ulp)->carp_type !=
+				    CARP_ADVERTISEMENT) 
+					return (IP_FW_DENY);
+				break;
+
+			case IPPROTO_IPV6:	/* RFC 2893 */
+				PULLUP_TO(hlen, ulp, struct ip6_hdr);
+				break;
+
+			case IPPROTO_IPV4:	/* RFC 2893 */
+				PULLUP_TO(hlen, ulp, struct ip);
+				break;
+
+			default:
+				printf("IPFW2: IPV6 - Unknown Extension "
+				    "Header(%d), ext_hd=%x\n", proto, ext_hd);
+				if (V_fw_deny_unknown_exthdrs)
+				    return (IP_FW_DENY);
+				PULLUP_TO(hlen, ulp, struct ip6_ext);
+				break;
+			} /*switch */
+		}
+		ip = mtod(m, struct ip *);
+		ip6 = (struct ip6_hdr *)ip;
+		args->f_id.src_ip6 = ip6->ip6_src;
+		args->f_id.dst_ip6 = ip6->ip6_dst;
+		args->f_id.src_ip = 0;
+		args->f_id.dst_ip = 0;
+		args->f_id.flow_id6 = ntohl(ip6->ip6_flow);
+	} else if (pktlen >= sizeof(struct ip) &&
+	    (args->eh == NULL || etype == ETHERTYPE_IP) && ip->ip_v == 4) {
+	    	is_ipv4 = 1;
+		hlen = ip->ip_hl << 2;
+		args->f_id.addr_type = 4;
+
+		/*
+		 * Collect parameters into local variables for faster matching.
+		 */
+		proto = ip->ip_p;
+		src_ip = ip->ip_src;
+		dst_ip = ip->ip_dst;
+		offset = ntohs(ip->ip_off) & IP_OFFMASK;
+		iplen = ntohs(ip->ip_len);
+		pktlen = iplen < pktlen ? iplen : pktlen;
+
+		if (offset == 0) {
+			switch (proto) {
+			case IPPROTO_TCP:
+				PULLUP_TO(hlen, ulp, struct tcphdr);
+				dst_port = TCP(ulp)->th_dport;
+				src_port = TCP(ulp)->th_sport;
+				/* save flags for dynamic rules */
+				args->f_id._flags = TCP(ulp)->th_flags;
+				break;
+
+			case IPPROTO_UDP:
+				PULLUP_TO(hlen, ulp, struct udphdr);
+				dst_port = UDP(ulp)->uh_dport;
+				src_port = UDP(ulp)->uh_sport;
+				break;
+
+			case IPPROTO_ICMP:
+				PULLUP_TO(hlen, ulp, struct icmphdr);
+				//args->f_id.flags = ICMP(ulp)->icmp_type;
+				break;
+
+			default:
+				break;
+			}
+		}
+
+		ip = mtod(m, struct ip *);
+		args->f_id.src_ip = ntohl(src_ip.s_addr);
+		args->f_id.dst_ip = ntohl(dst_ip.s_addr);
+	}
+#undef PULLUP_TO
+	if (proto) { /* we may have port numbers, store them */
+		args->f_id.proto = proto;
+		args->f_id.src_port = src_port = ntohs(src_port);
+		args->f_id.dst_port = dst_port = ntohs(dst_port);
+	}
+
+	IPFW_RLOCK(chain);
+	if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */
+		IPFW_RUNLOCK(chain);
+		return (IP_FW_PASS);	/* accept */
+	}
+	if (args->rule.slot) {
+		/*
+		 * Packet has already been tagged as a result of a previous
+		 * match on rule args->rule aka args->rule_id (PIPE, QUEUE,
+		 * REASS, NETGRAPH, DIVERT/TEE...)
+		 * Validate the slot and continue from the next one
+		 * if still present, otherwise do a lookup.
+		 */
+		f_pos = (args->rule.chain_id == chain->id) ?
+		    args->rule.slot :
+		    ipfw_find_rule(chain, args->rule.rulenum,
+			args->rule.rule_id);
+	} else {
+		f_pos = 0;
+	}
+
+	/*
+	 * Now scan the rules, and parse microinstructions for each rule.
+	 * We have two nested loops and an inner switch. Sometimes we
+	 * need to break out of one or both loops, or re-enter one of
+	 * the loops with updated variables. Loop variables are:
+	 *
+	 *	f_pos (outer loop) points to the current rule.
+	 *		On output it points to the matching rule.
+	 *	done (outer loop) is used as a flag to break the loop.
+	 *	l (inner loop)	residual length of current rule.
+	 *		cmd points to the current microinstruction.
+	 *
+	 * We break the inner loop by setting l=0 and possibly
+	 * cmdlen=0 if we don't want to advance cmd.
+	 * We break the outer loop by setting done=1
+	 * We can restart the inner loop by setting l>0 and f_pos, f, cmd
+	 * as needed.
+	 */
+	for (; f_pos < chain->n_rules; f_pos++) {
+		ipfw_insn *cmd;
+		uint32_t tablearg = 0;
+		int l, cmdlen, skip_or; /* skip rest of OR block */
+		struct ip_fw *f;
+
+		f = chain->map[f_pos];
+		if (V_set_disable & (1 << f->set) )
+			continue;
+
+		skip_or = 0;
+		for (l = f->cmd_len, cmd = f->cmd ; l > 0 ;
+		    l -= cmdlen, cmd += cmdlen) {
+			int match;
+
+			/*
+			 * check_body is a jump target used when we find a
+			 * CHECK_STATE, and need to jump to the body of
+			 * the target rule.
+			 */
+
+/* check_body: */
+			cmdlen = F_LEN(cmd);
+			/*
+			 * An OR block (insn_1 || .. || insn_n) has the
+			 * F_OR bit set in all but the last instruction.
+			 * The first match will set "skip_or", and cause
+			 * the following instructions to be skipped until
+			 * past the one with the F_OR bit clear.
+			 */
+			if (skip_or) {		/* skip this instruction */
+				if ((cmd->len & F_OR) == 0)
+					skip_or = 0;	/* next one is good */
+				continue;
+			}
+			match = 0; /* set to 1 if we succeed */
+
+			switch (cmd->opcode) {
+			/*
+			 * The first set of opcodes compares the packet's
+			 * fields with some pattern, setting 'match' if a
+			 * match is found. At the end of the loop there is
+			 * logic to deal with F_NOT and F_OR flags associated
+			 * with the opcode.
+			 */
+			case O_NOP:
+				match = 1;
+				break;
+
+			case O_FORWARD_MAC:
+				printf("ipfw: opcode %d unimplemented\n",
+				    cmd->opcode);
+				break;
+
+			case O_GID:
+			case O_UID:
+			case O_JAIL:
+				/*
+				 * We only check offset == 0 && proto != 0,
+				 * as this ensures that we have a
+				 * packet with the ports info.
+				 */
+				if (offset!=0)
+					break;
+				if (is_ipv6) /* XXX to be fixed later */
+					break;
+				if (proto == IPPROTO_TCP ||
+				    proto == IPPROTO_UDP)
+					match = check_uidgid(
+						    (ipfw_insn_u32 *)cmd,
+						    proto, oif,
+						    dst_ip, dst_port,
+						    src_ip, src_port, &ucred_lookup,
+#ifdef __FreeBSD__
+						    &ucred_cache, args->inp);
+#else
+						    (void *)&ucred_cache,
+						    (struct inpcb *)args->m);
+#endif
+				break;
+
+			case O_RECV:
+				match = iface_match(m->m_pkthdr.rcvif,
+				    (ipfw_insn_if *)cmd);
+				break;
+
+			case O_XMIT:
+				match = iface_match(oif, (ipfw_insn_if *)cmd);
+				break;
+
+			case O_VIA:
+				match = iface_match(oif ? oif :
+				    m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd);
+				break;
+
+			case O_MACADDR2:
+				if (args->eh != NULL) {	/* have MAC header */
+					u_int32_t *want = (u_int32_t *)
+						((ipfw_insn_mac *)cmd)->addr;
+					u_int32_t *mask = (u_int32_t *)
+						((ipfw_insn_mac *)cmd)->mask;
+					u_int32_t *hdr = (u_int32_t *)args->eh;
+
+					match =
+					    ( want[0] == (hdr[0] & mask[0]) &&
+					      want[1] == (hdr[1] & mask[1]) &&
+					      want[2] == (hdr[2] & mask[2]) );
+				}
+				break;
+
+			case O_MAC_TYPE:
+				if (args->eh != NULL) {
+					u_int16_t *p =
+					    ((ipfw_insn_u16 *)cmd)->ports;
+					int i;
+
+					for (i = cmdlen - 1; !match && i>0;
+					    i--, p += 2)
+						match = (etype >= p[0] &&
+						    etype <= p[1]);
+				}
+				break;
+
+			case O_FRAG:
+				match = (offset != 0);
+				break;
+
+			case O_IN:	/* "out" is "not in" */
+				match = (oif == NULL);
+				break;
+
+			case O_LAYER2:
+				match = (args->eh != NULL);
+				break;
+
+			case O_DIVERTED:
+			    {
+				/* For diverted packets, args->rule.info
+				 * contains the divert port (in host format)
+				 * reason and direction.
+	 			 */
+				uint32_t i = args->rule.info;
+				match = (i&IPFW_IS_MASK) == IPFW_IS_DIVERT &&
+				    cmd->arg1 & ((i & IPFW_INFO_IN) ? 1 : 2);
+			    }
+				break;
+
+			case O_PROTO:
+				/*
+				 * We do not allow an arg of 0 so the
+				 * check of "proto" only suffices.
+				 */
+				match = (proto == cmd->arg1);
+				break;
+
+			case O_IP_SRC:
+				match = is_ipv4 &&
+				    (((ipfw_insn_ip *)cmd)->addr.s_addr ==
+				    src_ip.s_addr);
+				break;
+
+			case O_IP_SRC_LOOKUP:
+			case O_IP_DST_LOOKUP:
+				if (is_ipv4) {
+				    uint32_t key =
+					(cmd->opcode == O_IP_DST_LOOKUP) ?
+					    dst_ip.s_addr : src_ip.s_addr;
+				    uint32_t v = 0;
+
+				    if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) {
+					/* generic lookup. The key must be
+					 * in 32bit big-endian format.
+					 */
+					v = ((ipfw_insn_u32 *)cmd)->d[1];
+					if (v == 0)
+					    key = dst_ip.s_addr;
+					else if (v == 1)
+					    key = src_ip.s_addr;
+					else if (v == 6) /* dscp */
+					    key = (ip->ip_tos >> 2) & 0x3f;
+					else if (offset != 0)
+					    break;
+					else if (proto != IPPROTO_TCP &&
+						proto != IPPROTO_UDP)
+					    break;
+					else if (v == 2)
+					    key = htonl(dst_port);
+					else if (v == 3)
+					    key = htonl(src_port);
+					else if (v == 4 || v == 5) {
+					    check_uidgid(
+						(ipfw_insn_u32 *)cmd,
+						proto, oif,
+						dst_ip, dst_port,
+						src_ip, src_port, &ucred_lookup,
+#ifdef __FreeBSD__
+						&ucred_cache, args->inp);
+					    if (v == 4 /* O_UID */)
+						key = ucred_cache->cr_uid;
+					    else if (v == 5 /* O_JAIL */)
+						key = ucred_cache->cr_prison->pr_id;
+#else /* !__FreeBSD__ */
+						(void *)&ucred_cache,
+						(struct inpcb *)args->m);
+					    if (v ==4 /* O_UID */)
+						key = ucred_cache.uid;
+					    else if (v == 5 /* O_JAIL */)
+						key = ucred_cache.xid;
+#endif /* !__FreeBSD__ */
+					    key = htonl(key);
+					} else
+					    break;
+				    }
+				    match = ipfw_lookup_table(chain,
+					cmd->arg1, key, &v);
+				    if (!match)
+					break;
+				    if (cmdlen == F_INSN_SIZE(ipfw_insn_u32))
+					match =
+					    ((ipfw_insn_u32 *)cmd)->d[0] == v;
+				    else
+					tablearg = v;
+				}
+				break;
+
+			case O_IP_SRC_MASK:
+			case O_IP_DST_MASK:
+				if (is_ipv4) {
+				    uint32_t a =
+					(cmd->opcode == O_IP_DST_MASK) ?
+					    dst_ip.s_addr : src_ip.s_addr;
+				    uint32_t *p = ((ipfw_insn_u32 *)cmd)->d;
+				    int i = cmdlen-1;
+
+				    for (; !match && i>0; i-= 2, p+= 2)
+					match = (p[0] == (a & p[1]));
+				}
+				break;
+
+			case O_IP_SRC_ME:
+				if (is_ipv4) {
+					struct ifnet *tif;
+
+					INADDR_TO_IFP(src_ip, tif);
+					match = (tif != NULL);
+					break;
+				}
+#ifdef INET6
+				/* FALLTHROUGH */
+			case O_IP6_SRC_ME:
+				match= is_ipv6 && search_ip6_addr_net(&args->f_id.src_ip6);
+#endif
+				break;
+
+			case O_IP_DST_SET:
+			case O_IP_SRC_SET:
+				if (is_ipv4) {
+					u_int32_t *d = (u_int32_t *)(cmd+1);
+					u_int32_t addr =
+					    cmd->opcode == O_IP_DST_SET ?
+						args->f_id.dst_ip :
+						args->f_id.src_ip;
+
+					    if (addr < d[0])
+						    break;
+					    addr -= d[0]; /* subtract base */
+					    match = (addr < cmd->arg1) &&
+						( d[ 1 + (addr>>5)] &
+						  (1<<(addr & 0x1f)) );
+				}
+				break;
+
+			case O_IP_DST:
+				match = is_ipv4 &&
+				    (((ipfw_insn_ip *)cmd)->addr.s_addr ==
+				    dst_ip.s_addr);
+				break;
+
+			case O_IP_DST_ME:
+				if (is_ipv4) {
+					struct ifnet *tif;
+
+					INADDR_TO_IFP(dst_ip, tif);
+					match = (tif != NULL);
+					break;
+				}
+#ifdef INET6
+				/* FALLTHROUGH */
+			case O_IP6_DST_ME:
+				match= is_ipv6 && search_ip6_addr_net(&args->f_id.dst_ip6);
+#endif
+				break;
+
+
+			case O_IP_SRCPORT:
+			case O_IP_DSTPORT:
+				/*
+				 * offset == 0 && proto != 0 is enough
+				 * to guarantee that we have a
+				 * packet with port info.
+				 */
+				if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP)
+				    && offset == 0) {
+					u_int16_t x =
+					    (cmd->opcode == O_IP_SRCPORT) ?
+						src_port : dst_port ;
+					u_int16_t *p =
+					    ((ipfw_insn_u16 *)cmd)->ports;
+					int i;
+
+					for (i = cmdlen - 1; !match && i>0;
+					    i--, p += 2)
+						match = (x>=p[0] && x<=p[1]);
+				}
+				break;
+
+			case O_ICMPTYPE:
+				match = (offset == 0 && proto==IPPROTO_ICMP &&
+				    icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) );
+				break;
+
+#ifdef INET6
+			case O_ICMP6TYPE:
+				match = is_ipv6 && offset == 0 &&
+				    proto==IPPROTO_ICMPV6 &&
+				    icmp6type_match(
+					ICMP6(ulp)->icmp6_type,
+					(ipfw_insn_u32 *)cmd);
+				break;
+#endif /* INET6 */
+
+			case O_IPOPT:
+				match = (is_ipv4 &&
+				    ipopts_match(ip, cmd) );
+				break;
+
+			case O_IPVER:
+				match = (is_ipv4 &&
+				    cmd->arg1 == ip->ip_v);
+				break;
+
+			case O_IPID:
+			case O_IPLEN:
+			case O_IPTTL:
+				if (is_ipv4) {	/* only for IP packets */
+				    uint16_t x;
+				    uint16_t *p;
+				    int i;
+
+				    if (cmd->opcode == O_IPLEN)
+					x = iplen;
+				    else if (cmd->opcode == O_IPTTL)
+					x = ip->ip_ttl;
+				    else /* must be IPID */
+					x = ntohs(ip->ip_id);
+				    if (cmdlen == 1) {
+					match = (cmd->arg1 == x);
+					break;
+				    }
+				    /* otherwise we have ranges */
+				    p = ((ipfw_insn_u16 *)cmd)->ports;
+				    i = cmdlen - 1;
+				    for (; !match && i>0; i--, p += 2)
+					match = (x >= p[0] && x <= p[1]);
+				}
+				break;
+
+			case O_IPPRECEDENCE:
+				match = (is_ipv4 &&
+				    (cmd->arg1 == (ip->ip_tos & 0xe0)) );
+				break;
+
+			case O_IPTOS:
+				match = (is_ipv4 &&
+				    flags_match(cmd, ip->ip_tos));
+				break;
+
+			case O_TCPDATALEN:
+				if (proto == IPPROTO_TCP && offset == 0) {
+				    struct tcphdr *tcp;
+				    uint16_t x;
+				    uint16_t *p;
+				    int i;
+
+				    tcp = TCP(ulp);
+				    x = iplen -
+					((ip->ip_hl + tcp->th_off) << 2);
+				    if (cmdlen == 1) {
+					match = (cmd->arg1 == x);
+					break;
+				    }
+				    /* otherwise we have ranges */
+				    p = ((ipfw_insn_u16 *)cmd)->ports;
+				    i = cmdlen - 1;
+				    for (; !match && i>0; i--, p += 2)
+					match = (x >= p[0] && x <= p[1]);
+				}
+				break;
+
+			case O_TCPFLAGS:
+				match = (proto == IPPROTO_TCP && offset == 0 &&
+				    flags_match(cmd, TCP(ulp)->th_flags));
+				break;
+
+			case O_TCPOPTS:
+				match = (proto == IPPROTO_TCP && offset == 0 &&
+				    tcpopts_match(TCP(ulp), cmd));
+				break;
+
+			case O_TCPSEQ:
+				match = (proto == IPPROTO_TCP && offset == 0 &&
+				    ((ipfw_insn_u32 *)cmd)->d[0] ==
+					TCP(ulp)->th_seq);
+				break;
+
+			case O_TCPACK:
+				match = (proto == IPPROTO_TCP && offset == 0 &&
+				    ((ipfw_insn_u32 *)cmd)->d[0] ==
+					TCP(ulp)->th_ack);
+				break;
+
+			case O_TCPWIN:
+				match = (proto == IPPROTO_TCP && offset == 0 &&
+				    cmd->arg1 == TCP(ulp)->th_win);
+				break;
+
+			case O_ESTAB:
+				/* reject packets which have SYN only */
+				/* XXX should i also check for TH_ACK ? */
+				match = (proto == IPPROTO_TCP && offset == 0 &&
+				    (TCP(ulp)->th_flags &
+				     (TH_RST | TH_ACK | TH_SYN)) != TH_SYN);
+				break;
+
+			case O_ALTQ: {
+				struct pf_mtag *at;
+				ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
+
+				match = 1;
+				at = pf_find_mtag(m);
+				if (at != NULL && at->qid != 0)
+					break;
+				at = pf_get_mtag(m);
+				if (at == NULL) {
+					/*
+					 * Let the packet fall back to the
+					 * default ALTQ.
+					 */
+					break;
+				}
+				at->qid = altq->qid;
+				if (is_ipv4)
+					at->af = AF_INET;
+				else
+					at->af = AF_LINK;
+				at->hdr = ip;
+				break;
+			}
+
+			case O_LOG:
+				ipfw_log(f, hlen, args, m,
+					    oif, offset, tablearg, ip);
+				match = 1;
+				break;
+
+			case O_PROB:
+				match = (random()<((ipfw_insn_u32 *)cmd)->d[0]);
+				break;
+
+			case O_VERREVPATH:
+				/* Outgoing packets automatically pass/match */
+				match = ((oif != NULL) ||
+				    (m->m_pkthdr.rcvif == NULL) ||
+				    (
+#ifdef INET6
+				    is_ipv6 ?
+					verify_path6(&(args->f_id.src_ip6),
+					    m->m_pkthdr.rcvif) :
+#endif
+				    verify_path(src_ip, m->m_pkthdr.rcvif,
+				        args->f_id.fib)));
+				break;
+
+			case O_VERSRCREACH:
+				/* Outgoing packets automatically pass/match */
+				match = (hlen > 0 && ((oif != NULL) ||
+#ifdef INET6
+				    is_ipv6 ?
+				        verify_path6(&(args->f_id.src_ip6),
+				            NULL) :
+#endif
+				    verify_path(src_ip, NULL, args->f_id.fib)));
+				break;
+
+			case O_ANTISPOOF:
+				/* Outgoing packets automatically pass/match */
+				if (oif == NULL && hlen > 0 &&
+				    (  (is_ipv4 && in_localaddr(src_ip))
+#ifdef INET6
+				    || (is_ipv6 &&
+				        in6_localaddr(&(args->f_id.src_ip6)))
+#endif
+				    ))
+					match =
+#ifdef INET6
+					    is_ipv6 ? verify_path6(
+					        &(args->f_id.src_ip6),
+					        m->m_pkthdr.rcvif) :
+#endif
+					    verify_path(src_ip,
+					    	m->m_pkthdr.rcvif,
+					        args->f_id.fib);
+				else
+					match = 1;
+				break;
+
+			case O_IPSEC:
+#ifdef IPSEC
+				match = (m_tag_find(m,
+				    PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL);
+#endif
+				/* otherwise no match */
+				break;
+
+#ifdef INET6
+			case O_IP6_SRC:
+				match = is_ipv6 &&
+				    IN6_ARE_ADDR_EQUAL(&args->f_id.src_ip6,
+				    &((ipfw_insn_ip6 *)cmd)->addr6);
+				break;
+
+			case O_IP6_DST:
+				match = is_ipv6 &&
+				IN6_ARE_ADDR_EQUAL(&args->f_id.dst_ip6,
+				    &((ipfw_insn_ip6 *)cmd)->addr6);
+				break;
+			case O_IP6_SRC_MASK:
+			case O_IP6_DST_MASK:
+				if (is_ipv6) {
+					int i = cmdlen - 1;
+					struct in6_addr p;
+					struct in6_addr *d =
+					    &((ipfw_insn_ip6 *)cmd)->addr6;
+
+					for (; !match && i > 0; d += 2,
+					    i -= F_INSN_SIZE(struct in6_addr)
+					    * 2) {
+						p = (cmd->opcode ==
+						    O_IP6_SRC_MASK) ?
+						    args->f_id.src_ip6:
+						    args->f_id.dst_ip6;
+						APPLY_MASK(&p, &d[1]);
+						match =
+						    IN6_ARE_ADDR_EQUAL(&d[0],
+						    &p);
+					}
+				}
+				break;
+
+			case O_FLOW6ID:
+				match = is_ipv6 &&
+				    flow6id_match(args->f_id.flow_id6,
+				    (ipfw_insn_u32 *) cmd);
+				break;
+
+			case O_EXT_HDR:
+				match = is_ipv6 &&
+				    (ext_hd & ((ipfw_insn *) cmd)->arg1);
+				break;
+
+			case O_IP6:
+				match = is_ipv6;
+				break;
+#endif
+
+			case O_IP4:
+				match = is_ipv4;
+				break;
+
+			case O_TAG: {
+				struct m_tag *mtag;
+				uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ?
+				    tablearg : cmd->arg1;
+
+				/* Packet is already tagged with this tag? */
+				mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL);
+
+				/* We have `untag' action when F_NOT flag is
+				 * present. And we must remove this mtag from
+				 * mbuf and reset `match' to zero (`match' will
+				 * be inversed later).
+				 * Otherwise we should allocate new mtag and
+				 * push it into mbuf.
+				 */
+				if (cmd->len & F_NOT) { /* `untag' action */
+					if (mtag != NULL)
+						m_tag_delete(m, mtag);
+					match = 0;
+				} else if (mtag == NULL) {
+					if ((mtag = m_tag_alloc(MTAG_IPFW,
+					    tag, 0, M_NOWAIT)) != NULL)
+						m_tag_prepend(m, mtag);
+					match = 1;
+				}
+				break;
+			}
+
+			case O_FIB: /* try match the specified fib */
+				if (args->f_id.fib == cmd->arg1)
+					match = 1;
+				break;
+
+			case O_TAGGED: {
+				struct m_tag *mtag;
+				uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ?
+				    tablearg : cmd->arg1;
+
+				if (cmdlen == 1) {
+					match = m_tag_locate(m, MTAG_IPFW,
+					    tag, NULL) != NULL;
+					break;
+				}
+
+				/* we have ranges */
+				for (mtag = m_tag_first(m);
+				    mtag != NULL && !match;
+				    mtag = m_tag_next(m, mtag)) {
+					uint16_t *p;
+					int i;
+
+					if (mtag->m_tag_cookie != MTAG_IPFW)
+						continue;
+
+					p = ((ipfw_insn_u16 *)cmd)->ports;
+					i = cmdlen - 1;
+					for(; !match && i > 0; i--, p += 2)
+						match =
+						    mtag->m_tag_id >= p[0] &&
+						    mtag->m_tag_id <= p[1];
+				}
+				break;
+			}
+				
+			/*
+			 * The second set of opcodes represents 'actions',
+			 * i.e. the terminal part of a rule once the packet
+			 * matches all previous patterns.
+			 * Typically there is only one action for each rule,
+			 * and the opcode is stored at the end of the rule
+			 * (but there are exceptions -- see below).
+			 *
+			 * In general, here we set retval and terminate the
+			 * outer loop (would be a 'break 3' in some language,
+			 * but we need to set l=0, done=1)
+			 *
+			 * Exceptions:
+			 * O_COUNT and O_SKIPTO actions:
+			 *   instead of terminating, we jump to the next rule
+			 *   (setting l=0), or to the SKIPTO target (setting
+			 *   f/f_len, cmd and l as needed), respectively.
+			 *
+			 * O_TAG, O_LOG and O_ALTQ action parameters:
+			 *   perform some action and set match = 1;
+			 *
+			 * O_LIMIT and O_KEEP_STATE: these opcodes are
+			 *   not real 'actions', and are stored right
+			 *   before the 'action' part of the rule.
+			 *   These opcodes try to install an entry in the
+			 *   state tables; if successful, we continue with
+			 *   the next opcode (match=1; break;), otherwise
+			 *   the packet must be dropped (set retval,
+			 *   break loops with l=0, done=1)
+			 *
+			 * O_PROBE_STATE and O_CHECK_STATE: these opcodes
+			 *   cause a lookup of the state table, and a jump
+			 *   to the 'action' part of the parent rule
+			 *   if an entry is found, or
+			 *   (CHECK_STATE only) a jump to the next rule if
+			 *   the entry is not found.
+			 *   The result of the lookup is cached so that
+			 *   further instances of these opcodes become NOPs.
+			 *   The jump to the next rule is done by setting
+			 *   l=0, cmdlen=0.
+			 */
+			case O_LIMIT:
+			case O_KEEP_STATE:
+				if (ipfw_install_state(f,
+				    (ipfw_insn_limit *)cmd, args, tablearg)) {
+					/* error or limit violation */
+					retval = IP_FW_DENY;
+					l = 0;	/* exit inner loop */
+					done = 1; /* exit outer loop */
+				}
+				match = 1;
+				break;
+
+			case O_PROBE_STATE:
+			case O_CHECK_STATE:
+				/*
+				 * dynamic rules are checked at the first
+				 * keep-state or check-state occurrence,
+				 * with the result being stored in dyn_dir.
+				 * The compiler introduces a PROBE_STATE
+				 * instruction for us when we have a
+				 * KEEP_STATE (because PROBE_STATE needs
+				 * to be run first).
+				 */
+				if (dyn_dir == MATCH_UNKNOWN &&
+				    (q = ipfw_lookup_dyn_rule(&args->f_id,
+				     &dyn_dir, proto == IPPROTO_TCP ?
+					TCP(ulp) : NULL))
+					!= NULL) {
+					/*
+					 * Found dynamic entry, update stats
+					 * and jump to the 'action' part of
+					 * the parent rule by setting
+					 * f, cmd, l and clearing cmdlen.
+					 */
+					q->pcnt++;
+					q->bcnt += pktlen;
+					/* XXX we would like to have f_pos
+					 * readily accessible in the dynamic
+				         * rule, instead of having to
+					 * lookup q->rule.
+					 */
+					f = q->rule;
+					f_pos = ipfw_find_rule(chain,
+						f->rulenum, f->id);
+					cmd = ACTION_PTR(f);
+					l = f->cmd_len - f->act_ofs;
+					ipfw_dyn_unlock();
+					cmdlen = 0;
+					match = 1;
+					break;
+				}
+				/*
+				 * Dynamic entry not found. If CHECK_STATE,
+				 * skip to next rule, if PROBE_STATE just
+				 * ignore and continue with next opcode.
+				 */
+				if (cmd->opcode == O_CHECK_STATE)
+					l = 0;	/* exit inner loop */
+				match = 1;
+				break;
+
+			case O_ACCEPT:
+				retval = 0;	/* accept */
+				l = 0;		/* exit inner loop */
+				done = 1;	/* exit outer loop */
+				break;
+
+			case O_PIPE:
+			case O_QUEUE:
+				set_match(args, f_pos, chain);
+				args->rule.info = (cmd->arg1 == IP_FW_TABLEARG) ?
+					tablearg : cmd->arg1;
+				if (cmd->opcode == O_PIPE)
+					args->rule.info |= IPFW_IS_PIPE;
+				if (V_fw_one_pass)
+					args->rule.info |= IPFW_ONEPASS;
+				retval = IP_FW_DUMMYNET;
+				l = 0;          /* exit inner loop */
+				done = 1;       /* exit outer loop */
+				break;
+
+			case O_DIVERT:
+			case O_TEE:
+				if (args->eh) /* not on layer 2 */
+				    break;
+				/* otherwise this is terminal */
+				l = 0;		/* exit inner loop */
+				done = 1;	/* exit outer loop */
+				retval = (cmd->opcode == O_DIVERT) ?
+					IP_FW_DIVERT : IP_FW_TEE;
+				set_match(args, f_pos, chain);
+				args->rule.info = (cmd->arg1 == IP_FW_TABLEARG) ?
+				    tablearg : cmd->arg1;
+				break;
+
+			case O_COUNT:
+				f->pcnt++;	/* update stats */
+				f->bcnt += pktlen;
+				f->timestamp = time_uptime;
+				l = 0;		/* exit inner loop */
+				break;
+
+			case O_SKIPTO:
+			    f->pcnt++;	/* update stats */
+			    f->bcnt += pktlen;
+			    f->timestamp = time_uptime;
+			    /* If possible use cached f_pos (in f->next_rule),
+			     * whose version is written in f->next_rule
+			     * (horrible hacks to avoid changing the ABI).
+			     */
+			    if (cmd->arg1 != IP_FW_TABLEARG &&
+				    (uintptr_t)f->x_next == chain->id) {
+				f_pos = (uintptr_t)f->next_rule;
+			    } else {
+				int i = (cmd->arg1 == IP_FW_TABLEARG) ?
+					tablearg : cmd->arg1;
+				/* make sure we do not jump backward */
+				if (i <= f->rulenum)
+				    i = f->rulenum + 1;
+				f_pos = ipfw_find_rule(chain, i, 0);
+				/* update the cache */
+				if (cmd->arg1 != IP_FW_TABLEARG) {
+				    f->next_rule =
+					(void *)(uintptr_t)f_pos;
+				    f->x_next =
+					(void *)(uintptr_t)chain->id;
+				}
+			    }
+			    /*
+			     * Skip disabled rules, and re-enter
+			     * the inner loop with the correct
+			     * f_pos, f, l and cmd.
+			     * Also clear cmdlen and skip_or
+			     */
+			    for (; f_pos < chain->n_rules - 1 &&
+				    (V_set_disable &
+				     (1 << chain->map[f_pos]->set));
+				    f_pos++)
+				;
+			    /* Re-enter the inner loop at the skipto rule. */
+			    f = chain->map[f_pos];
+			    l = f->cmd_len;
+			    cmd = f->cmd;
+			    match = 1;
+			    cmdlen = 0;
+			    skip_or = 0;
+			    continue;
+			    break;	/* not reached */
+
+			case O_REJECT:
+				/*
+				 * Drop the packet and send a reject notice
+				 * if the packet is not ICMP (or is an ICMP
+				 * query), and it is not multicast/broadcast.
+				 */
+				if (hlen > 0 && is_ipv4 && offset == 0 &&
+				    (proto != IPPROTO_ICMP ||
+				     is_icmp_query(ICMP(ulp))) &&
+				    !(m->m_flags & (M_BCAST|M_MCAST)) &&
+				    !IN_MULTICAST(ntohl(dst_ip.s_addr))) {
+					send_reject(args, cmd->arg1, iplen, ip);
+					m = args->m;
+				}
+				/* FALLTHROUGH */
+#ifdef INET6
+			case O_UNREACH6:
+				if (hlen > 0 && is_ipv6 &&
+				    ((offset & IP6F_OFF_MASK) == 0) &&
+				    (proto != IPPROTO_ICMPV6 ||
+				     (is_icmp6_query(icmp6_type) == 1)) &&
+				    !(m->m_flags & (M_BCAST|M_MCAST)) &&
+				    !IN6_IS_ADDR_MULTICAST(&args->f_id.dst_ip6)) {
+					send_reject6(
+					    args, cmd->arg1, hlen,
+					    (struct ip6_hdr *)ip);
+					m = args->m;
+				}
+				/* FALLTHROUGH */
+#endif
+			case O_DENY:
+				retval = IP_FW_DENY;
+				l = 0;		/* exit inner loop */
+				done = 1;	/* exit outer loop */
+				break;
+
+			case O_FORWARD_IP:
+				if (args->eh)	/* not valid on layer2 pkts */
+					break;
+				if (!q || dyn_dir == MATCH_FORWARD) {
+				    struct sockaddr_in *sa;
+				    sa = &(((ipfw_insn_sa *)cmd)->sa);
+				    if (sa->sin_addr.s_addr == INADDR_ANY) {
+					bcopy(sa, &args->hopstore,
+							sizeof(*sa));
+					args->hopstore.sin_addr.s_addr =
+						    htonl(tablearg);
+					args->next_hop = &args->hopstore;
+				    } else {
+					args->next_hop = sa;
+				    }
+				}
+				retval = IP_FW_PASS;
+				l = 0;          /* exit inner loop */
+				done = 1;       /* exit outer loop */
+				break;
+
+			case O_NETGRAPH:
+			case O_NGTEE:
+				set_match(args, f_pos, chain);
+				args->rule.info = (cmd->arg1 == IP_FW_TABLEARG) ?
+					tablearg : cmd->arg1;
+				if (V_fw_one_pass)
+					args->rule.info |= IPFW_ONEPASS;
+				retval = (cmd->opcode == O_NETGRAPH) ?
+				    IP_FW_NETGRAPH : IP_FW_NGTEE;
+				l = 0;          /* exit inner loop */
+				done = 1;       /* exit outer loop */
+				break;
+
+			case O_SETFIB:
+				f->pcnt++;	/* update stats */
+				f->bcnt += pktlen;
+				f->timestamp = time_uptime;
+				M_SETFIB(m, cmd->arg1);
+				args->f_id.fib = cmd->arg1;
+				l = 0;		/* exit inner loop */
+				break;
+
+			case O_NAT:
+ 				if (!IPFW_NAT_LOADED) {
+				    retval = IP_FW_DENY;
+				} else {
+				    struct cfg_nat *t;
+				    int nat_id;
+
+				    set_match(args, f_pos, chain);
+				    t = ((ipfw_insn_nat *)cmd)->nat;
+				    if (t == NULL) {
+					nat_id = (cmd->arg1 == IP_FW_TABLEARG) ?
+						tablearg : cmd->arg1;
+					t = (*lookup_nat_ptr)(&chain->nat, nat_id);
+
+					if (t == NULL) {
+					    retval = IP_FW_DENY;
+					    l = 0;	/* exit inner loop */
+					    done = 1;	/* exit outer loop */
+					    break;
+					}
+					if (cmd->arg1 != IP_FW_TABLEARG)
+					    ((ipfw_insn_nat *)cmd)->nat = t;
+				    }
+				    retval = ipfw_nat_ptr(args, t, m);
+				}
+				l = 0;          /* exit inner loop */
+				done = 1;       /* exit outer loop */
+				break;
+
+			case O_REASS: {
+				int ip_off;
+
+				f->pcnt++;
+				f->bcnt += pktlen;
+				l = 0;	/* in any case exit inner loop */
+				ip_off = ntohs(ip->ip_off);
+
+				/* if not fragmented, go to next rule */
+				if ((ip_off & (IP_MF | IP_OFFMASK)) == 0)
+				    break;
+				/* 
+				 * ip_reass() expects len & off in host
+				 * byte order.
+				 */
+				SET_HOST_IPLEN(ip);
+
+				args->m = m = ip_reass(m);
+
+				/*
+				 * do IP header checksum fixup.
+				 */
+				if (m == NULL) { /* fragment got swallowed */
+				    retval = IP_FW_DENY;
+				} else { /* good, packet complete */
+				    int hlen;
+
+				    ip = mtod(m, struct ip *);
+				    hlen = ip->ip_hl << 2;
+				    SET_NET_IPLEN(ip);
+				    ip->ip_sum = 0;
+				    if (hlen == sizeof(struct ip))
+					ip->ip_sum = in_cksum_hdr(ip);
+				    else
+					ip->ip_sum = in_cksum(m, hlen);
+				    retval = IP_FW_REASS;
+				    set_match(args, f_pos, chain);
+				}
+				done = 1;	/* exit outer loop */
+				break;
+			}
+
+			default:
+				panic("-- unknown opcode %d\n", cmd->opcode);
+			} /* end of switch() on opcodes */
+			/*
+			 * if we get here with l=0, then match is irrelevant.
+			 */
+
+			if (cmd->len & F_NOT)
+				match = !match;
+
+			if (match) {
+				if (cmd->len & F_OR)
+					skip_or = 1;
+			} else {
+				if (!(cmd->len & F_OR)) /* not an OR block, */
+					break;		/* try next rule    */
+			}
+
+		}	/* end of inner loop, scan opcodes */
+
+		if (done)
+			break;
+
+/* next_rule:; */	/* try next rule		*/
+
+	}		/* end of outer for, scan rules */
+
+	if (done) {
+		struct ip_fw *rule = chain->map[f_pos];
+		/* Update statistics */
+		rule->pcnt++;
+		rule->bcnt += pktlen;
+		rule->timestamp = time_uptime;
+	} else {
+		retval = IP_FW_DENY;
+		printf("ipfw: ouch!, skip past end of rules, denying packet\n");
+	}
+	IPFW_RUNLOCK(chain);
+#ifdef __FreeBSD__
+	if (ucred_cache != NULL)
+		crfree(ucred_cache);
+#endif
+	return (retval);
+
+pullup_failed:
+	if (V_fw_verbose)
+		printf("ipfw: pullup failed\n");
+	return (IP_FW_DENY);
+}
+
+/*
+ * Module and VNET glue
+ */
+
+/*
+ * Stuff that must be initialised only on boot or module load
+ */
+static int
+ipfw_init(void)
+{
+	int error = 0;
+
+	ipfw_dyn_attach();
+	/*
+ 	 * Only print out this stuff the first time around,
+	 * when called from the sysinit code.
+	 */
+	printf("ipfw2 "
+#ifdef INET6
+		"(+ipv6) "
+#endif
+		"initialized, divert %s, nat %s, "
+		"rule-based forwarding "
+#ifdef IPFIREWALL_FORWARD
+		"enabled, "
+#else
+		"disabled, "
+#endif
+		"default to %s, logging ",
+#ifdef IPDIVERT
+		"enabled",
+#else
+		"loadable",
+#endif
+#ifdef IPFIREWALL_NAT
+		"enabled",
+#else
+		"loadable",
+#endif
+		default_to_accept ? "accept" : "deny");
+
+	/*
+	 * Note: V_xxx variables can be accessed here but the vnet specific
+	 * initializer may not have been called yet for the VIMAGE case.
+	 * Tuneables will have been processed. We will print out values for
+	 * the default vnet. 
+	 * XXX This should all be rationalized AFTER 8.0
+	 */
+	if (V_fw_verbose == 0)
+		printf("disabled\n");
+	else if (V_verbose_limit == 0)
+		printf("unlimited\n");
+	else
+		printf("limited to %d packets/entry by default\n",
+		    V_verbose_limit);
+
+	ipfw_log_bpf(1); /* init */
+	return (error);
+}
+
+/*
+ * Called for the removal of the last instance only on module unload.
+ */
+static void
+ipfw_destroy(void)
+{
+
+	ipfw_log_bpf(0); /* uninit */
+	ipfw_dyn_detach();
+	printf("IP firewall unloaded\n");
+}
+
+/*
+ * Stuff that must be initialized for every instance
+ * (including the first of course).
+ */
+static int
+vnet_ipfw_init(const void *unused)
+{
+	int error;
+	struct ip_fw *rule = NULL;
+	struct ip_fw_chain *chain;
+
+	chain = &V_layer3_chain;
+
+	/* First set up some values that are compile time options */
+	V_autoinc_step = 100;	/* bounded to 1..1000 in add_rule() */
+	V_fw_deny_unknown_exthdrs = 1;
+#ifdef IPFIREWALL_VERBOSE
+	V_fw_verbose = 1;
+#endif
+#ifdef IPFIREWALL_VERBOSE_LIMIT
+	V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT;
+#endif
+#ifdef IPFIREWALL_NAT
+	LIST_INIT(&chain->nat);
+#endif
+
+	/* insert the default rule and create the initial map */
+	chain->n_rules = 1;
+	chain->static_len = sizeof(struct ip_fw);
+	chain->map = malloc(sizeof(struct ip_fw *), M_IPFW, M_NOWAIT | M_ZERO);
+	if (chain->map)
+		rule = malloc(chain->static_len, M_IPFW, M_NOWAIT | M_ZERO);
+	if (rule == NULL) {
+		if (chain->map)
+			free(chain->map, M_IPFW);
+		printf("ipfw2: ENOSPC initializing default rule "
+			"(support disabled)\n");
+		return (ENOSPC);
+	}
+	error = ipfw_init_tables(chain);
+	if (error) {
+		panic("init_tables"); /* XXX Marko fix this ! */
+	}
+
+	/* fill and insert the default rule */
+	rule->act_ofs = 0;
+	rule->rulenum = IPFW_DEFAULT_RULE;
+	rule->cmd_len = 1;
+	rule->set = RESVD_SET;
+	rule->cmd[0].len = 1;
+	rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY;
+	chain->rules = chain->default_rule = chain->map[0] = rule;
+	chain->id = rule->id = 1;
+
+	IPFW_LOCK_INIT(chain);
+	ipfw_dyn_init();
+
+	/* First set up some values that are compile time options */
+	V_ipfw_vnet_ready = 1;		/* Open for business */
+
+	/*
+	 * Hook the sockopt handler, and the layer2 (V_ip_fw_chk_ptr)
+	 * and pfil hooks for ipv4 and ipv6. Even if the latter two fail
+	 * we still keep the module alive because the sockopt and
+	 * layer2 paths are still useful.
+	 * ipfw[6]_hook return 0 on success, ENOENT on failure,
+	 * so we can ignore the exact return value and just set a flag.
+	 *
+	 * Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so
+	 * changes in the underlying (per-vnet) variables trigger
+	 * immediate hook()/unhook() calls.
+	 * In layer2 we have the same behaviour, except that V_ether_ipfw
+	 * is checked on each packet because there are no pfil hooks.
+	 */
+	V_ip_fw_ctl_ptr = ipfw_ctl;
+	V_ip_fw_chk_ptr = ipfw_chk;
+	error = ipfw_attach_hooks(1);
+	return (error);
+}
+
+/*
+ * Called for the removal of each instance.
+ */
+static int
+vnet_ipfw_uninit(const void *unused)
+{
+	struct ip_fw *reap, *rule;
+	struct ip_fw_chain *chain = &V_layer3_chain;
+	int i;
+
+	V_ipfw_vnet_ready = 0; /* tell new callers to go away */
+	/*
+	 * disconnect from ipv4, ipv6, layer2 and sockopt.
+	 * Then grab, release and grab again the WLOCK so we make
+	 * sure the update is propagated and nobody will be in.
+	 */
+	(void)ipfw_attach_hooks(0 /* detach */);
+	V_ip_fw_chk_ptr = NULL;
+	V_ip_fw_ctl_ptr = NULL;
+	IPFW_UH_WLOCK(chain);
+	IPFW_UH_WUNLOCK(chain);
+	IPFW_UH_WLOCK(chain);
+
+	IPFW_WLOCK(chain);
+	IPFW_WUNLOCK(chain);
+	IPFW_WLOCK(chain);
+
+	ipfw_dyn_uninit(0);	/* run the callout_drain */
+	ipfw_destroy_tables(chain);
+	reap = NULL;
+	for (i = 0; i < chain->n_rules; i++) {
+		rule = chain->map[i];
+		rule->x_next = reap;
+		reap = rule;
+	}
+	if (chain->map)
+		free(chain->map, M_IPFW);
+	IPFW_WUNLOCK(chain);
+	IPFW_UH_WUNLOCK(chain);
+	if (reap != NULL)
+		ipfw_reap_rules(reap);
+	IPFW_LOCK_DESTROY(chain);
+	ipfw_dyn_uninit(1);	/* free the remaining parts */
+	return 0;
+}
+
+/*
+ * Module event handler.
+ * In general we have the choice of handling most of these events by the
+ * event handler or by the (VNET_)SYS(UN)INIT handlers. I have chosen to
+ * use the SYSINIT handlers as they are more capable of expressing the
+ * flow of control during module and vnet operations, so this is just
+ * a skeleton. Note there is no SYSINIT equivalent of the module
+ * SHUTDOWN handler, but we don't have anything to do in that case anyhow.
+ */
+static int
+ipfw_modevent(module_t mod, int type, void *unused)
+{
+	int err = 0;
+
+	switch (type) {
+	case MOD_LOAD:
+		/* Called once at module load or
+	 	 * system boot if compiled in. */
+		break;
+	case MOD_QUIESCE:
+		/* Called before unload. May veto unloading. */
+		break;
+	case MOD_UNLOAD:
+		/* Called during unload. */
+		break;
+	case MOD_SHUTDOWN:
+		/* Called during system shutdown. */
+		break;
+	default:
+		err = EOPNOTSUPP;
+		break;
+	}
+	return err;
+}
+
+static moduledata_t ipfwmod = {
+	"ipfw",
+	ipfw_modevent,
+	0
+};
+
+/* Define startup order. */
+#define	IPFW_SI_SUB_FIREWALL	SI_SUB_PROTO_IFATTACHDOMAIN
+#define	IPFW_MODEVENT_ORDER	(SI_ORDER_ANY - 255) /* On boot slot in here. */
+#define	IPFW_MODULE_ORDER	(IPFW_MODEVENT_ORDER + 1) /* A little later. */
+#define	IPFW_VNET_ORDER		(IPFW_MODEVENT_ORDER + 2) /* Later still. */
+
+DECLARE_MODULE(ipfw, ipfwmod, IPFW_SI_SUB_FIREWALL, IPFW_MODEVENT_ORDER);
+MODULE_VERSION(ipfw, 2);
+/* should declare some dependencies here */
+
+/*
+ * Starting up. Done in order after ipfwmod() has been called.
+ * VNET_SYSINIT is also called for each existing vnet and each new vnet.
+ */
+SYSINIT(ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER,
+	    ipfw_init, NULL);
+VNET_SYSINIT(vnet_ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER,
+	    vnet_ipfw_init, NULL);
+ 
+/*
+ * Closing up shop. These are done in REVERSE ORDER, but still
+ * after ipfwmod() has been called. Not called on reboot.
+ * VNET_SYSUNINIT is also called for each exiting vnet as it exits.
+ * or when the module is unloaded.
+ */
+SYSUNINIT(ipfw_destroy, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER,
+	    ipfw_destroy, NULL);
+VNET_SYSUNINIT(vnet_ipfw_uninit, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER,
+	    vnet_ipfw_uninit, NULL);
+/* end of file */
diff --git a/freebsd/sys/netinet/ipfw/ip_fw_log.c b/freebsd/sys/netinet/ipfw/ip_fw_log.c
new file mode 100644
index 00000000..0a5cd94c
--- /dev/null
+++ b/freebsd/sys/netinet/ipfw/ip_fw_log.c
@@ -0,0 +1,451 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Logging support for ipfw
+ */
+
+#if !defined(KLD_MODULE)
+#include <freebsd/local/opt_ipfw.h>
+#include <freebsd/local/opt_ipdivert.h>
+#include <freebsd/local/opt_ipdn.h>
+#include <freebsd/local/opt_inet.h>
+#ifndef INET
+#error IPFIREWALL requires INET.
+#endif /* INET */
+#endif
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_ipsec.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/net/ethernet.h> /* for ETHERTYPE_IP */
+#include <freebsd/net/if.h>
+#include <freebsd/net/vnet.h>
+#include <freebsd/net/if_types.h>	/* for IFT_ETHER */
+#include <freebsd/net/bpf.h>		/* for BPF */
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_icmp.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_fw.h>
+#include <freebsd/netinet/ipfw/ip_fw_private.h>
+#include <freebsd/netinet/tcp_var.h>
+#include <freebsd/netinet/udp.h>
+
+#include <freebsd/netinet/ip6.h>
+#include <freebsd/netinet/icmp6.h>
+#ifdef INET6
+#include <freebsd/netinet6/in6_var.h>	/* ip6_sprintf() */
+#endif
+
+#ifdef MAC
+#include <freebsd/security/mac/mac_framework.h>
+#endif
+
+/*
+ * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T
+ * Other macros just cast void * into the appropriate type
+ */
+#define	L3HDR(T, ip)	((T *)((u_int32_t *)(ip) + (ip)->ip_hl))
+#define	TCP(p)		((struct tcphdr *)(p))
+#define	SCTP(p)		((struct sctphdr *)(p))
+#define	UDP(p)		((struct udphdr *)(p))
+#define	ICMP(p)		((struct icmphdr *)(p))
+#define	ICMP6(p)	((struct icmp6_hdr *)(p))
+
+#define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0
+#define SNP(buf) buf, sizeof(buf)
+
+#ifdef WITHOUT_BPF
+void
+ipfw_log_bpf(int onoff)
+{
+}
+#else /* !WITHOUT_BPF */
+static struct ifnet *log_if;	/* hook to attach to bpf */
+
+/* we use this dummy function for all ifnet callbacks */
+static int
+log_dummy(struct ifnet *ifp, u_long cmd, caddr_t addr)
+{
+	return EINVAL;
+}
+
+static int
+ipfw_log_output(struct ifnet *ifp, struct mbuf *m,
+	struct sockaddr *dst, struct route *ro)
+{
+	if (m != NULL)
+		m_freem(m);
+	return EINVAL;
+}
+
+static void
+ipfw_log_start(struct ifnet* ifp)
+{
+	panic("ipfw_log_start() must not be called");
+}
+
+static const u_char ipfwbroadcastaddr[6] =
+	{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+void
+ipfw_log_bpf(int onoff)
+{
+	struct ifnet *ifp;
+
+	if (onoff) {
+		if (log_if)
+			return;
+		ifp = if_alloc(IFT_ETHER);
+		if (ifp == NULL)
+			return;
+		if_initname(ifp, "ipfw", 0);
+		ifp->if_mtu = 65536;
+		ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
+		ifp->if_init = (void *)log_dummy;
+		ifp->if_ioctl = log_dummy;
+		ifp->if_start = ipfw_log_start;
+		ifp->if_output = ipfw_log_output;
+		ifp->if_addrlen = 6;
+		ifp->if_hdrlen = 14;
+		if_attach(ifp);
+		ifp->if_broadcastaddr = ipfwbroadcastaddr;
+		ifp->if_baudrate = IF_Mbps(10);
+		bpfattach(ifp, DLT_EN10MB, 14);
+		log_if = ifp;
+	} else {
+		if (log_if) {
+			ether_ifdetach(log_if);
+			if_free(log_if);
+		}
+		log_if = NULL;
+	}
+}
+#endif /* !WITHOUT_BPF */
+
+/*
+ * We enter here when we have a rule with O_LOG.
+ * XXX this function alone takes about 2Kbytes of code!
+ */
+void
+ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args,
+    struct mbuf *m, struct ifnet *oif, u_short offset, uint32_t tablearg,
+    struct ip *ip)
+{
+	char *action;
+	int limit_reached = 0;
+	char action2[40], proto[128], fragment[32];
+
+	if (V_fw_verbose == 0) {
+#ifndef WITHOUT_BPF
+
+		if (log_if == NULL || log_if->if_bpf == NULL)
+			return;
+
+		if (args->eh) /* layer2, use orig hdr */
+			BPF_MTAP2(log_if, args->eh, ETHER_HDR_LEN, m);
+		else
+			/* Add fake header. Later we will store
+			 * more info in the header.
+			 */
+			BPF_MTAP2(log_if, "DDDDDDSSSSSS\x08\x00", ETHER_HDR_LEN, m);
+#endif /* !WITHOUT_BPF */
+		return;
+	}
+	/* the old 'log' function */
+	fragment[0] = '\0';
+	proto[0] = '\0';
+
+	if (f == NULL) {	/* bogus pkt */
+		if (V_verbose_limit != 0 && V_norule_counter >= V_verbose_limit)
+			return;
+		V_norule_counter++;
+		if (V_norule_counter == V_verbose_limit)
+			limit_reached = V_verbose_limit;
+		action = "Refuse";
+	} else {	/* O_LOG is the first action, find the real one */
+		ipfw_insn *cmd = ACTION_PTR(f);
+		ipfw_insn_log *l = (ipfw_insn_log *)cmd;
+
+		if (l->max_log != 0 && l->log_left == 0)
+			return;
+		l->log_left--;
+		if (l->log_left == 0)
+			limit_reached = l->max_log;
+		cmd += F_LEN(cmd);	/* point to first action */
+		if (cmd->opcode == O_ALTQ) {
+			ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
+
+			snprintf(SNPARGS(action2, 0), "Altq %d",
+				altq->qid);
+			cmd += F_LEN(cmd);
+		}
+		if (cmd->opcode == O_PROB)
+			cmd += F_LEN(cmd);
+
+		if (cmd->opcode == O_TAG)
+			cmd += F_LEN(cmd);
+
+		action = action2;
+		switch (cmd->opcode) {
+		case O_DENY:
+			action = "Deny";
+			break;
+
+		case O_REJECT:
+			if (cmd->arg1==ICMP_REJECT_RST)
+				action = "Reset";
+			else if (cmd->arg1==ICMP_UNREACH_HOST)
+				action = "Reject";
+			else
+				snprintf(SNPARGS(action2, 0), "Unreach %d",
+					cmd->arg1);
+			break;
+
+		case O_UNREACH6:
+			if (cmd->arg1==ICMP6_UNREACH_RST)
+				action = "Reset";
+			else
+				snprintf(SNPARGS(action2, 0), "Unreach %d",
+					cmd->arg1);
+			break;
+
+		case O_ACCEPT:
+			action = "Accept";
+			break;
+		case O_COUNT:
+			action = "Count";
+			break;
+		case O_DIVERT:
+			snprintf(SNPARGS(action2, 0), "Divert %d",
+				cmd->arg1);
+			break;
+		case O_TEE:
+			snprintf(SNPARGS(action2, 0), "Tee %d",
+				cmd->arg1);
+			break;
+		case O_SETFIB:
+			snprintf(SNPARGS(action2, 0), "SetFib %d",
+				cmd->arg1);
+			break;
+		case O_SKIPTO:
+			snprintf(SNPARGS(action2, 0), "SkipTo %d",
+				cmd->arg1);
+			break;
+		case O_PIPE:
+			snprintf(SNPARGS(action2, 0), "Pipe %d",
+				cmd->arg1);
+			break;
+		case O_QUEUE:
+			snprintf(SNPARGS(action2, 0), "Queue %d",
+				cmd->arg1);
+			break;
+		case O_FORWARD_IP: {
+			ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd;
+			int len;
+			struct in_addr dummyaddr;
+			if (sa->sa.sin_addr.s_addr == INADDR_ANY)
+				dummyaddr.s_addr = htonl(tablearg);
+			else
+				dummyaddr.s_addr = sa->sa.sin_addr.s_addr;
+
+			len = snprintf(SNPARGS(action2, 0), "Forward to %s",
+				inet_ntoa(dummyaddr));
+
+			if (sa->sa.sin_port)
+				snprintf(SNPARGS(action2, len), ":%d",
+				    sa->sa.sin_port);
+			}
+			break;
+		case O_NETGRAPH:
+			snprintf(SNPARGS(action2, 0), "Netgraph %d",
+				cmd->arg1);
+			break;
+		case O_NGTEE:
+			snprintf(SNPARGS(action2, 0), "Ngtee %d",
+				cmd->arg1);
+			break;
+		case O_NAT:
+			action = "Nat";
+ 			break;
+		case O_REASS:
+			action = "Reass";
+			break;
+		default:
+			action = "UNKNOWN";
+			break;
+		}
+	}
+
+	if (hlen == 0) {	/* non-ip */
+		snprintf(SNPARGS(proto, 0), "MAC");
+
+	} else {
+		int len;
+#ifdef INET6
+		char src[INET6_ADDRSTRLEN + 2], dst[INET6_ADDRSTRLEN + 2];
+#else
+		char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN];
+#endif
+		struct icmphdr *icmp;
+		struct tcphdr *tcp;
+		struct udphdr *udp;
+#ifdef INET6
+		struct ip6_hdr *ip6 = NULL;
+		struct icmp6_hdr *icmp6;
+#endif
+		src[0] = '\0';
+		dst[0] = '\0';
+#ifdef INET6
+		if (IS_IP6_FLOW_ID(&(args->f_id))) {
+			char ip6buf[INET6_ADDRSTRLEN];
+			snprintf(src, sizeof(src), "[%s]",
+			    ip6_sprintf(ip6buf, &args->f_id.src_ip6));
+			snprintf(dst, sizeof(dst), "[%s]",
+			    ip6_sprintf(ip6buf, &args->f_id.dst_ip6));
+
+			ip6 = (struct ip6_hdr *)ip;
+			tcp = (struct tcphdr *)(((char *)ip) + hlen);
+			udp = (struct udphdr *)(((char *)ip) + hlen);
+		} else
+#endif
+		{
+			tcp = L3HDR(struct tcphdr, ip);
+			udp = L3HDR(struct udphdr, ip);
+
+			inet_ntoa_r(ip->ip_src, src);
+			inet_ntoa_r(ip->ip_dst, dst);
+		}
+
+		switch (args->f_id.proto) {
+		case IPPROTO_TCP:
+			len = snprintf(SNPARGS(proto, 0), "TCP %s", src);
+			if (offset == 0)
+				snprintf(SNPARGS(proto, len), ":%d %s:%d",
+				    ntohs(tcp->th_sport),
+				    dst,
+				    ntohs(tcp->th_dport));
+			else
+				snprintf(SNPARGS(proto, len), " %s", dst);
+			break;
+
+		case IPPROTO_UDP:
+			len = snprintf(SNPARGS(proto, 0), "UDP %s", src);
+			if (offset == 0)
+				snprintf(SNPARGS(proto, len), ":%d %s:%d",
+				    ntohs(udp->uh_sport),
+				    dst,
+				    ntohs(udp->uh_dport));
+			else
+				snprintf(SNPARGS(proto, len), " %s", dst);
+			break;
+
+		case IPPROTO_ICMP:
+			icmp = L3HDR(struct icmphdr, ip);
+			if (offset == 0)
+				len = snprintf(SNPARGS(proto, 0),
+				    "ICMP:%u.%u ",
+				    icmp->icmp_type, icmp->icmp_code);
+			else
+				len = snprintf(SNPARGS(proto, 0), "ICMP ");
+			len += snprintf(SNPARGS(proto, len), "%s", src);
+			snprintf(SNPARGS(proto, len), " %s", dst);
+			break;
+#ifdef INET6
+		case IPPROTO_ICMPV6:
+			icmp6 = (struct icmp6_hdr *)(((char *)ip) + hlen);
+			if (offset == 0)
+				len = snprintf(SNPARGS(proto, 0),
+				    "ICMPv6:%u.%u ",
+				    icmp6->icmp6_type, icmp6->icmp6_code);
+			else
+				len = snprintf(SNPARGS(proto, 0), "ICMPv6 ");
+			len += snprintf(SNPARGS(proto, len), "%s", src);
+			snprintf(SNPARGS(proto, len), " %s", dst);
+			break;
+#endif
+		default:
+			len = snprintf(SNPARGS(proto, 0), "P:%d %s",
+			    args->f_id.proto, src);
+			snprintf(SNPARGS(proto, len), " %s", dst);
+			break;
+		}
+
+#ifdef INET6
+		if (IS_IP6_FLOW_ID(&(args->f_id))) {
+			if (offset & (IP6F_OFF_MASK | IP6F_MORE_FRAG))
+				snprintf(SNPARGS(fragment, 0),
+				    " (frag %08x:%d@%d%s)",
+				    args->f_id.extra,
+				    ntohs(ip6->ip6_plen) - hlen,
+				    ntohs(offset & IP6F_OFF_MASK) << 3,
+				    (offset & IP6F_MORE_FRAG) ? "+" : "");
+		} else
+#endif
+		{
+			int ipoff, iplen;
+			ipoff = ntohs(ip->ip_off);
+			iplen = ntohs(ip->ip_len);
+			if (ipoff & (IP_MF | IP_OFFMASK))
+				snprintf(SNPARGS(fragment, 0),
+				    " (frag %d:%d@%d%s)",
+				    ntohs(ip->ip_id), iplen - (ip->ip_hl << 2),
+				    offset << 3,
+				    (ipoff & IP_MF) ? "+" : "");
+		}
+	}
+#ifdef __FreeBSD__
+	if (oif || m->m_pkthdr.rcvif)
+		log(LOG_SECURITY | LOG_INFO,
+		    "ipfw: %d %s %s %s via %s%s\n",
+		    f ? f->rulenum : -1,
+		    action, proto, oif ? "out" : "in",
+		    oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname,
+		    fragment);
+	else
+#endif
+		log(LOG_SECURITY | LOG_INFO,
+		    "ipfw: %d %s %s [no if info]%s\n",
+		    f ? f->rulenum : -1,
+		    action, proto, fragment);
+	if (limit_reached)
+		log(LOG_SECURITY | LOG_NOTICE,
+		    "ipfw: limit %d reached on entry %d\n",
+		    limit_reached, f ? f->rulenum : -1);
+}
+/* end of file */
diff --git a/freebsd/sys/netinet/ipfw/ip_fw_nat.c b/freebsd/sys/netinet/ipfw/ip_fw_nat.c
new file mode 100644
index 00000000..e6c8bcec
--- /dev/null
+++ b/freebsd/sys/netinet/ipfw/ip_fw_nat.c
@@ -0,0 +1,606 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2008 Paolo Pisati
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/eventhandler.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/rwlock.h>
+
+#define        IPFW_INTERNAL   /* Access to protected data structures in ip_fw.h. */
+
+#include <freebsd/netinet/libalias/alias.h>
+#include <freebsd/netinet/libalias/alias_local.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_fw.h>
+#include <freebsd/netinet/ipfw/ip_fw_private.h>
+#include <freebsd/netinet/tcp.h>
+#include <freebsd/netinet/udp.h>
+
+#include <freebsd/machine/in_cksum.h>	/* XXX for in_cksum */
+
+static VNET_DEFINE(eventhandler_tag, ifaddr_event_tag);
+#define	V_ifaddr_event_tag	VNET(ifaddr_event_tag)
+
+static void
+ifaddr_change(void *arg __unused, struct ifnet *ifp)
+{
+	struct cfg_nat *ptr;
+	struct ifaddr *ifa;
+	struct ip_fw_chain *chain;
+
+	chain = &V_layer3_chain;
+	IPFW_WLOCK(chain);
+	/* Check every nat entry... */
+	LIST_FOREACH(ptr, &chain->nat, _next) {
+		/* ...using nic 'ifp->if_xname' as dynamic alias address. */
+		if (strncmp(ptr->if_name, ifp->if_xname, IF_NAMESIZE) != 0)
+			continue;
+		if_addr_rlock(ifp);
+		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+			if (ifa->ifa_addr == NULL)
+				continue;
+			if (ifa->ifa_addr->sa_family != AF_INET)
+				continue;
+			ptr->ip = ((struct sockaddr_in *)
+			    (ifa->ifa_addr))->sin_addr;
+			LibAliasSetAddress(ptr->lib, ptr->ip);
+		}
+		if_addr_runlock(ifp);
+	}
+	IPFW_WUNLOCK(chain);
+}
+
+/*
+ * delete the pointers for nat entry ix, or all of them if ix < 0
+ */
+static void
+flush_nat_ptrs(struct ip_fw_chain *chain, const int ix)
+{
+	int i;
+	ipfw_insn_nat *cmd;
+
+	IPFW_WLOCK_ASSERT(chain);
+	for (i = 0; i < chain->n_rules; i++) {
+		cmd = (ipfw_insn_nat *)ACTION_PTR(chain->map[i]);
+		/* XXX skip log and the like ? */
+		if (cmd->o.opcode == O_NAT && cmd->nat != NULL &&
+			    (ix < 0 || cmd->nat->id == ix))
+			cmd->nat = NULL;
+	}
+}
+
+static void
+del_redir_spool_cfg(struct cfg_nat *n, struct redir_chain *head)
+{
+	struct cfg_redir *r, *tmp_r;
+	struct cfg_spool *s, *tmp_s;
+	int i, num;
+
+	LIST_FOREACH_SAFE(r, head, _next, tmp_r) {
+		num = 1; /* Number of alias_link to delete. */
+		switch (r->mode) {
+		case REDIR_PORT:
+			num = r->pport_cnt;
+			/* FALLTHROUGH */
+		case REDIR_ADDR:
+		case REDIR_PROTO:
+			/* Delete all libalias redirect entry. */
+			for (i = 0; i < num; i++)
+				LibAliasRedirectDelete(n->lib, r->alink[i]);
+			/* Del spool cfg if any. */
+			LIST_FOREACH_SAFE(s, &r->spool_chain, _next, tmp_s) {
+				LIST_REMOVE(s, _next);
+				free(s, M_IPFW);
+			}
+			free(r->alink, M_IPFW);
+			LIST_REMOVE(r, _next);
+			free(r, M_IPFW);
+			break;
+		default:
+			printf("unknown redirect mode: %u\n", r->mode);
+			/* XXX - panic?!?!? */
+			break;
+		}
+	}
+}
+
+static int
+add_redir_spool_cfg(char *buf, struct cfg_nat *ptr)
+{
+	struct cfg_redir *r, *ser_r;
+	struct cfg_spool *s, *ser_s;
+	int cnt, off, i;
+
+	for (cnt = 0, off = 0; cnt < ptr->redir_cnt; cnt++) {
+		ser_r = (struct cfg_redir *)&buf[off];
+		r = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO);
+		memcpy(r, ser_r, SOF_REDIR);
+		LIST_INIT(&r->spool_chain);
+		off += SOF_REDIR;
+		r->alink = malloc(sizeof(struct alias_link *) * r->pport_cnt,
+		    M_IPFW, M_WAITOK | M_ZERO);
+		switch (r->mode) {
+		case REDIR_ADDR:
+			r->alink[0] = LibAliasRedirectAddr(ptr->lib, r->laddr,
+			    r->paddr);
+			break;
+		case REDIR_PORT:
+			for (i = 0 ; i < r->pport_cnt; i++) {
+				/* If remotePort is all ports, set it to 0. */
+				u_short remotePortCopy = r->rport + i;
+				if (r->rport_cnt == 1 && r->rport == 0)
+					remotePortCopy = 0;
+				r->alink[i] = LibAliasRedirectPort(ptr->lib,
+				    r->laddr, htons(r->lport + i), r->raddr,
+				    htons(remotePortCopy), r->paddr,
+				    htons(r->pport + i), r->proto);
+				if (r->alink[i] == NULL) {
+					r->alink[0] = NULL;
+					break;
+				}
+			}
+			break;
+		case REDIR_PROTO:
+			r->alink[0] = LibAliasRedirectProto(ptr->lib ,r->laddr,
+			    r->raddr, r->paddr, r->proto);
+			break;
+		default:
+			printf("unknown redirect mode: %u\n", r->mode);
+			break;
+		}
+		/* XXX perhaps return an error instead of panic ? */
+		if (r->alink[0] == NULL)
+			panic("LibAliasRedirect* returned NULL");
+		/* LSNAT handling. */
+		for (i = 0; i < r->spool_cnt; i++) {
+			ser_s = (struct cfg_spool *)&buf[off];
+			s = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO);
+			memcpy(s, ser_s, SOF_SPOOL);
+			LibAliasAddServer(ptr->lib, r->alink[0],
+			    s->addr, htons(s->port));
+			off += SOF_SPOOL;
+			/* Hook spool entry. */
+			LIST_INSERT_HEAD(&r->spool_chain, s, _next);
+		}
+		/* And finally hook this redir entry. */
+		LIST_INSERT_HEAD(&ptr->redir_chain, r, _next);
+	}
+	return (1);
+}
+
+static int
+ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m)
+{
+	struct mbuf *mcl;
+	struct ip *ip;
+	/* XXX - libalias duct tape */
+	int ldt, retval;
+	char *c;
+
+	ldt = 0;
+	retval = 0;
+	mcl = m_megapullup(m, m->m_pkthdr.len);
+	if (mcl == NULL) {
+		args->m = NULL;
+		return (IP_FW_DENY);
+	}
+	ip = mtod(mcl, struct ip *);
+
+	/*
+	 * XXX - Libalias checksum offload 'duct tape':
+	 *
+	 * locally generated packets have only pseudo-header checksum
+	 * calculated and libalias will break it[1], so mark them for
+	 * later fix.  Moreover there are cases when libalias modifies
+	 * tcp packet data[2], mark them for later fix too.
+	 *
+	 * [1] libalias was never meant to run in kernel, so it does
+	 * not have any knowledge about checksum offloading, and
+	 * expects a packet with a full internet checksum.
+	 * Unfortunately, packets generated locally will have just the
+	 * pseudo header calculated, and when libalias tries to adjust
+	 * the checksum it will actually compute a wrong value.
+	 *
+	 * [2] when libalias modifies tcp's data content, full TCP
+	 * checksum has to be recomputed: the problem is that
+	 * libalias does not have any idea about checksum offloading.
+	 * To work around this, we do not do checksumming in LibAlias,
+	 * but only mark the packets in th_x2 field. If we receive a
+	 * marked packet, we calculate correct checksum for it
+	 * aware of offloading.  Why such a terrible hack instead of
+	 * recalculating checksum for each packet?
+	 * Because the previous checksum was not checked!
+	 * Recalculating checksums for EVERY packet will hide ALL
+	 * transmission errors. Yes, marked packets still suffer from
+	 * this problem. But, sigh, natd(8) has this problem, too.
+	 *
+	 * TODO: -make libalias mbuf aware (so
+	 * it can handle delayed checksum and tso)
+	 */
+
+	if (mcl->m_pkthdr.rcvif == NULL &&
+	    mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
+		ldt = 1;
+
+	c = mtod(mcl, char *);
+	if (args->oif == NULL)
+		retval = LibAliasIn(t->lib, c,
+			mcl->m_len + M_TRAILINGSPACE(mcl));
+	else
+		retval = LibAliasOut(t->lib, c,
+			mcl->m_len + M_TRAILINGSPACE(mcl));
+	if (retval == PKT_ALIAS_RESPOND) {
+		m->m_flags |= M_SKIP_FIREWALL;
+		retval = PKT_ALIAS_OK;
+	}
+	if (retval != PKT_ALIAS_OK &&
+	    retval != PKT_ALIAS_FOUND_HEADER_FRAGMENT) {
+		/* XXX - should i add some logging? */
+		m_free(mcl);
+		args->m = NULL;
+		return (IP_FW_DENY);
+	}
+	mcl->m_pkthdr.len = mcl->m_len = ntohs(ip->ip_len);
+
+	/*
+	 * XXX - libalias checksum offload
+	 * 'duct tape' (see above)
+	 */
+
+	if ((ip->ip_off & htons(IP_OFFMASK)) == 0 &&
+	    ip->ip_p == IPPROTO_TCP) {
+		struct tcphdr 	*th;
+
+		th = (struct tcphdr *)(ip + 1);
+		if (th->th_x2)
+			ldt = 1;
+	}
+
+	if (ldt) {
+		struct tcphdr 	*th;
+		struct udphdr 	*uh;
+		u_short cksum;
+
+		ip->ip_len = ntohs(ip->ip_len);
+		cksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+		    htons(ip->ip_p + ip->ip_len - (ip->ip_hl << 2)));
+
+		switch (ip->ip_p) {
+		case IPPROTO_TCP:
+			th = (struct tcphdr *)(ip + 1);
+			/*
+			 * Maybe it was set in
+			 * libalias...
+			 */
+			th->th_x2 = 0;
+			th->th_sum = cksum;
+			mcl->m_pkthdr.csum_data =
+			    offsetof(struct tcphdr, th_sum);
+			break;
+		case IPPROTO_UDP:
+			uh = (struct udphdr *)(ip + 1);
+			uh->uh_sum = cksum;
+			mcl->m_pkthdr.csum_data =
+			    offsetof(struct udphdr, uh_sum);
+			break;
+		}
+		/* No hw checksum offloading: do it ourselves */
+		if ((mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA) == 0) {
+			in_delayed_cksum(mcl);
+			mcl->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+		}
+		ip->ip_len = htons(ip->ip_len);
+	}
+	args->m = mcl;
+	return (IP_FW_NAT);
+}
+
+static struct cfg_nat *
+lookup_nat(struct nat_list *l, int nat_id)
+{
+	struct cfg_nat *res;
+
+	LIST_FOREACH(res, l, _next) {
+		if (res->id == nat_id)
+			break;
+	}
+	return res;
+}
+
+static int
+ipfw_nat_cfg(struct sockopt *sopt)
+{
+	struct cfg_nat *ptr, *ser_n;
+	char *buf;
+	struct ip_fw_chain *chain = &V_layer3_chain;
+
+	buf = malloc(NAT_BUF_LEN, M_IPFW, M_WAITOK | M_ZERO);
+	sooptcopyin(sopt, buf, NAT_BUF_LEN, sizeof(struct cfg_nat));
+	ser_n = (struct cfg_nat *)buf;
+
+	/* check valid parameter ser_n->id > 0 ? */
+	/*
+	 * Find/create nat rule.
+	 */
+	IPFW_WLOCK(chain);
+	ptr = lookup_nat(&chain->nat, ser_n->id);
+	if (ptr == NULL) {
+		/* New rule: allocate and init new instance. */
+		ptr = malloc(sizeof(struct cfg_nat),
+		    M_IPFW, M_NOWAIT | M_ZERO);
+		if (ptr == NULL) {
+			IPFW_WUNLOCK(chain);
+			free(buf, M_IPFW);
+			return (ENOSPC);
+		}
+		ptr->lib = LibAliasInit(NULL);
+		if (ptr->lib == NULL) {
+			IPFW_WUNLOCK(chain);
+			free(ptr, M_IPFW);
+			free(buf, M_IPFW);
+			return (EINVAL);
+		}
+		LIST_INIT(&ptr->redir_chain);
+	} else {
+		/* Entry already present: temporarly unhook it. */
+		LIST_REMOVE(ptr, _next);
+		flush_nat_ptrs(chain, ser_n->id);
+	}
+	IPFW_WUNLOCK(chain);
+
+	/*
+	 * Basic nat configuration.
+	 */
+	ptr->id = ser_n->id;
+	/*
+	 * XXX - what if this rule doesn't nat any ip and just
+	 * redirect?
+	 * do we set aliasaddress to 0.0.0.0?
+	 */
+	ptr->ip = ser_n->ip;
+	ptr->redir_cnt = ser_n->redir_cnt;
+	ptr->mode = ser_n->mode;
+	LibAliasSetMode(ptr->lib, ser_n->mode, ser_n->mode);
+	LibAliasSetAddress(ptr->lib, ptr->ip);
+	memcpy(ptr->if_name, ser_n->if_name, IF_NAMESIZE);
+
+	/*
+	 * Redir and LSNAT configuration.
+	 */
+	/* Delete old cfgs. */
+	del_redir_spool_cfg(ptr, &ptr->redir_chain);
+	/* Add new entries. */
+	add_redir_spool_cfg(&buf[(sizeof(struct cfg_nat))], ptr);
+	free(buf, M_IPFW);
+	IPFW_WLOCK(chain);
+	LIST_INSERT_HEAD(&chain->nat, ptr, _next);
+	IPFW_WUNLOCK(chain);
+	return (0);
+}
+
+static int
+ipfw_nat_del(struct sockopt *sopt)
+{
+	struct cfg_nat *ptr;
+	struct ip_fw_chain *chain = &V_layer3_chain;
+	int i;
+
+	sooptcopyin(sopt, &i, sizeof i, sizeof i);
+	/* XXX validate i */
+	IPFW_WLOCK(chain);
+	ptr = lookup_nat(&chain->nat, i);
+	if (ptr == NULL) {
+		IPFW_WUNLOCK(chain);
+		return (EINVAL);
+	}
+	LIST_REMOVE(ptr, _next);
+	flush_nat_ptrs(chain, i);
+	IPFW_WUNLOCK(chain);
+	del_redir_spool_cfg(ptr, &ptr->redir_chain);
+	LibAliasUninit(ptr->lib);
+	free(ptr, M_IPFW);
+	return (0);
+}
+
+static int
+ipfw_nat_get_cfg(struct sockopt *sopt)
+{
+	uint8_t *data;
+	struct cfg_nat *n;
+	struct cfg_redir *r;
+	struct cfg_spool *s;
+	int nat_cnt, off;
+	struct ip_fw_chain *chain;
+	int err = ENOSPC;
+
+	chain = &V_layer3_chain;
+	nat_cnt = 0;
+	off = sizeof(nat_cnt);
+
+	data = malloc(NAT_BUF_LEN, M_IPFW, M_WAITOK | M_ZERO);
+	IPFW_RLOCK(chain);
+	/* Serialize all the data. */
+	LIST_FOREACH(n, &chain->nat, _next) {
+		nat_cnt++;
+		if (off + SOF_NAT >= NAT_BUF_LEN)
+			goto nospace;
+		bcopy(n, &data[off], SOF_NAT);
+		off += SOF_NAT;
+		LIST_FOREACH(r, &n->redir_chain, _next) {
+			if (off + SOF_REDIR >= NAT_BUF_LEN)
+				goto nospace;
+			bcopy(r, &data[off], SOF_REDIR);
+			off += SOF_REDIR;
+			LIST_FOREACH(s, &r->spool_chain, _next) {
+				if (off + SOF_SPOOL >= NAT_BUF_LEN)
+					goto nospace;
+				bcopy(s, &data[off], SOF_SPOOL);
+				off += SOF_SPOOL;
+			}
+		}
+	}
+	err = 0; /* all good */
+nospace:
+	IPFW_RUNLOCK(chain);
+	if (err == 0) {
+		bcopy(&nat_cnt, data, sizeof(nat_cnt));
+		sooptcopyout(sopt, data, NAT_BUF_LEN);
+	} else {
+		printf("serialized data buffer not big enough:"
+		    "please increase NAT_BUF_LEN\n");
+	}
+	free(data, M_IPFW);
+	return (err);
+}
+
+static int
+ipfw_nat_get_log(struct sockopt *sopt)
+{
+	uint8_t *data;
+	struct cfg_nat *ptr;
+	int i, size;
+	struct ip_fw_chain *chain;
+
+	chain = &V_layer3_chain;
+
+	IPFW_RLOCK(chain);
+	/* one pass to count, one to copy the data */
+	i = 0;
+	LIST_FOREACH(ptr, &chain->nat, _next) {
+		if (ptr->lib->logDesc == NULL)
+			continue;
+		i++;
+	}
+	size = i * (LIBALIAS_BUF_SIZE + sizeof(int));
+	data = malloc(size, M_IPFW, M_NOWAIT | M_ZERO);
+	if (data == NULL) {
+		IPFW_RUNLOCK(chain);
+		return (ENOSPC);
+	}
+	i = 0;
+	LIST_FOREACH(ptr, &chain->nat, _next) {
+		if (ptr->lib->logDesc == NULL)
+			continue;
+		bcopy(&ptr->id, &data[i], sizeof(int));
+		i += sizeof(int);
+		bcopy(ptr->lib->logDesc, &data[i], LIBALIAS_BUF_SIZE);
+		i += LIBALIAS_BUF_SIZE;
+	}
+	IPFW_RUNLOCK(chain);
+	sooptcopyout(sopt, data, size);
+	free(data, M_IPFW);
+	return(0);
+}
+
+static void
+ipfw_nat_init(void)
+{
+
+	IPFW_WLOCK(&V_layer3_chain);
+	/* init ipfw hooks */
+	ipfw_nat_ptr = ipfw_nat;
+	lookup_nat_ptr = lookup_nat;
+	ipfw_nat_cfg_ptr = ipfw_nat_cfg;
+	ipfw_nat_del_ptr = ipfw_nat_del;
+	ipfw_nat_get_cfg_ptr = ipfw_nat_get_cfg;
+	ipfw_nat_get_log_ptr = ipfw_nat_get_log;
+	IPFW_WUNLOCK(&V_layer3_chain);
+	V_ifaddr_event_tag = EVENTHANDLER_REGISTER(
+	    ifaddr_event, ifaddr_change,
+	    NULL, EVENTHANDLER_PRI_ANY);
+}
+
+static void
+ipfw_nat_destroy(void)
+{
+	struct cfg_nat *ptr, *ptr_temp;
+	struct ip_fw_chain *chain;
+
+	chain = &V_layer3_chain;
+	IPFW_WLOCK(chain);
+	LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) {
+		LIST_REMOVE(ptr, _next);
+		del_redir_spool_cfg(ptr, &ptr->redir_chain);
+		LibAliasUninit(ptr->lib);
+		free(ptr, M_IPFW);
+	}
+	EVENTHANDLER_DEREGISTER(ifaddr_event, V_ifaddr_event_tag);
+	flush_nat_ptrs(chain, -1 /* flush all */);
+	/* deregister ipfw_nat */
+	ipfw_nat_ptr = NULL;
+	lookup_nat_ptr = NULL;
+	ipfw_nat_cfg_ptr = NULL;
+	ipfw_nat_del_ptr = NULL;
+	ipfw_nat_get_cfg_ptr = NULL;
+	ipfw_nat_get_log_ptr = NULL;
+	IPFW_WUNLOCK(chain);
+}
+
+static int
+ipfw_nat_modevent(module_t mod, int type, void *unused)
+{
+	int err = 0;
+
+	switch (type) {
+	case MOD_LOAD:
+		ipfw_nat_init();
+		break;
+
+	case MOD_UNLOAD:
+		ipfw_nat_destroy();
+		break;
+
+	default:
+		return EOPNOTSUPP;
+		break;
+	}
+	return err;
+}
+
+static moduledata_t ipfw_nat_mod = {
+	"ipfw_nat",
+	ipfw_nat_modevent,
+	0
+};
+
+DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
+MODULE_DEPEND(ipfw_nat, libalias, 1, 1, 1);
+MODULE_DEPEND(ipfw_nat, ipfw, 2, 2, 2);
+MODULE_VERSION(ipfw_nat, 1);
+/* end of file */
diff --git a/freebsd/sys/netinet/ipfw/ip_fw_pfil.c b/freebsd/sys/netinet/ipfw/ip_fw_pfil.c
new file mode 100644
index 00000000..8759f409
--- /dev/null
+++ b/freebsd/sys/netinet/ipfw/ip_fw_pfil.c
@@ -0,0 +1,417 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2004 Andre Oppermann, Internet Business Solutions AG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#if !defined(KLD_MODULE)
+#include <freebsd/local/opt_ipfw.h>
+#include <freebsd/local/opt_ipdn.h>
+#include <freebsd/local/opt_inet.h>
+#ifndef INET
+#error IPFIREWALL requires INET.
+#endif /* INET */
+#endif /* KLD_MODULE */
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/rwlock.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/sysctl.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/pfil.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_fw.h>
+#include <freebsd/netinet/ipfw/ip_fw_private.h>
+#include <freebsd/netgraph/ng_ipfw.h>
+
+#include <freebsd/machine/in_cksum.h>
+
+static VNET_DEFINE(int, fw_enable) = 1;
+#define V_fw_enable	VNET(fw_enable)
+
+#ifdef INET6
+static VNET_DEFINE(int, fw6_enable) = 1;
+#define V_fw6_enable	VNET(fw6_enable)
+#endif
+
+int ipfw_chg_hook(SYSCTL_HANDLER_ARGS);
+
+/* Forward declarations. */
+static int ipfw_divert(struct mbuf **, int, struct ipfw_rule_ref *, int);
+
+#ifdef SYSCTL_NODE
+
+SYSBEGIN(f1)
+
+SYSCTL_DECL(_net_inet_ip_fw);
+SYSCTL_VNET_PROC(_net_inet_ip_fw, OID_AUTO, enable,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_enable), 0,
+    ipfw_chg_hook, "I", "Enable ipfw");
+#ifdef INET6
+SYSCTL_DECL(_net_inet6_ip6_fw);
+SYSCTL_VNET_PROC(_net_inet6_ip6_fw, OID_AUTO, enable,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw6_enable), 0,
+    ipfw_chg_hook, "I", "Enable ipfw+6");
+#endif /* INET6 */
+
+SYSEND
+
+#endif /* SYSCTL_NODE */
+
+/*
+ * The pfilter hook to pass packets to ipfw_chk and then to
+ * dummynet, divert, netgraph or other modules.
+ * The packet may be consumed.
+ */
+int
+ipfw_check_hook(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
+    struct inpcb *inp)
+{
+	struct ip_fw_args args;
+	struct m_tag *tag;
+	int ipfw;
+	int ret;
+
+	/* all the processing now uses ip_len in net format */
+	if (mtod(*m0, struct ip *)->ip_v == 4)
+		SET_NET_IPLEN(mtod(*m0, struct ip *));
+
+	/* convert dir to IPFW values */
+	dir = (dir == PFIL_IN) ? DIR_IN : DIR_OUT;
+	bzero(&args, sizeof(args));
+
+again:
+	/*
+	 * extract and remove the tag if present. If we are left
+	 * with onepass, optimize the outgoing path.
+	 */
+	tag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL);
+	if (tag != NULL) {
+		args.rule = *((struct ipfw_rule_ref *)(tag+1));
+		m_tag_delete(*m0, tag);
+		if (args.rule.info & IPFW_ONEPASS) {
+			SET_HOST_IPLEN(mtod(*m0, struct ip *));
+			return 0;
+		}
+	}
+
+	args.m = *m0;
+	args.oif = dir == DIR_OUT ? ifp : NULL;
+	args.inp = inp;
+
+	ipfw = ipfw_chk(&args);
+	*m0 = args.m;
+
+	KASSERT(*m0 != NULL || ipfw == IP_FW_DENY, ("%s: m0 is NULL",
+	    __func__));
+
+	/* breaking out of the switch means drop */
+	ret = 0;	/* default return value for pass */
+	switch (ipfw) {
+	case IP_FW_PASS:
+		/* next_hop may be set by ipfw_chk */
+                if (args.next_hop == NULL)
+                        break; /* pass */
+#ifndef IPFIREWALL_FORWARD
+		ret = EACCES;
+#else
+	    {
+		struct m_tag *fwd_tag;
+
+		/* Incoming packets should not be tagged so we do not
+		 * m_tag_find. Outgoing packets may be tagged, so we
+		 * reuse the tag if present.
+		 */
+		fwd_tag = (dir == DIR_IN) ? NULL :
+			m_tag_find(*m0, PACKET_TAG_IPFORWARD, NULL);
+		if (fwd_tag != NULL) {
+			m_tag_unlink(*m0, fwd_tag);
+		} else {
+			fwd_tag = m_tag_get(PACKET_TAG_IPFORWARD,
+				sizeof(struct sockaddr_in), M_NOWAIT);
+			if (fwd_tag == NULL) {
+				ret = EACCES;
+				break; /* i.e. drop */
+			}
+		}
+		bcopy(args.next_hop, (fwd_tag+1), sizeof(struct sockaddr_in));
+		m_tag_prepend(*m0, fwd_tag);
+
+		if (in_localip(args.next_hop->sin_addr))
+			(*m0)->m_flags |= M_FASTFWD_OURS;
+	    }
+#endif
+		break;
+
+	case IP_FW_DENY:
+		ret = EACCES;
+		break; /* i.e. drop */
+
+	case IP_FW_DUMMYNET:
+		ret = EACCES;
+		if (ip_dn_io_ptr == NULL)
+			break; /* i.e. drop */
+		if (mtod(*m0, struct ip *)->ip_v == 4)
+			ret = ip_dn_io_ptr(m0, dir, &args);
+		else if (mtod(*m0, struct ip *)->ip_v == 6)
+			ret = ip_dn_io_ptr(m0, dir | PROTO_IPV6, &args);
+		else
+			break; /* drop it */
+		/*
+		 * XXX should read the return value.
+		 * dummynet normally eats the packet and sets *m0=NULL
+		 * unless the packet can be sent immediately. In this
+		 * case args is updated and we should re-run the
+		 * check without clearing args.
+		 */
+		if (*m0 != NULL)
+			goto again;
+		break;
+
+	case IP_FW_TEE:
+	case IP_FW_DIVERT:
+		if (ip_divert_ptr == NULL) {
+			ret = EACCES;
+			break; /* i.e. drop */
+		}
+		ret = ipfw_divert(m0, dir, &args.rule,
+			(ipfw == IP_FW_TEE) ? 1 : 0);
+		/* continue processing for the original packet (tee). */
+		if (*m0)
+			goto again;
+		break;
+
+	case IP_FW_NGTEE:
+	case IP_FW_NETGRAPH:
+		if (ng_ipfw_input_p == NULL) {
+			ret = EACCES;
+			break; /* i.e. drop */
+		}
+		ret = ng_ipfw_input_p(m0, dir, &args,
+			(ipfw == IP_FW_NGTEE) ? 1 : 0);
+		if (ipfw == IP_FW_NGTEE) /* ignore errors for NGTEE */
+			goto again;	/* continue with packet */
+		break;
+
+	case IP_FW_NAT:
+		/* honor one-pass in case of successful nat */
+		if (V_fw_one_pass)
+			break; /* ret is already 0 */
+		goto again;
+
+	case IP_FW_REASS:
+		goto again;		/* continue with packet */
+	
+	default:
+		KASSERT(0, ("%s: unknown retval", __func__));
+	}
+
+	if (ret != 0) {
+		if (*m0)
+			FREE_PKT(*m0);
+		*m0 = NULL;
+	}
+	if (*m0 && mtod(*m0, struct ip *)->ip_v == 4)
+		SET_HOST_IPLEN(mtod(*m0, struct ip *));
+	return ret;
+}
+
+/* do the divert, return 1 on error 0 on success */
+static int
+ipfw_divert(struct mbuf **m0, int incoming, struct ipfw_rule_ref *rule,
+	int tee)
+{
+	/*
+	 * ipfw_chk() has already tagged the packet with the divert tag.
+	 * If tee is set, copy packet and return original.
+	 * If not tee, consume packet and send it to divert socket.
+	 */
+	struct mbuf *clone;
+	struct ip *ip;
+	struct m_tag *tag;
+
+	/* Cloning needed for tee? */
+	if (tee == 0) {
+		clone = *m0;	/* use the original mbuf */
+		*m0 = NULL;
+	} else {
+		clone = m_dup(*m0, M_DONTWAIT);
+		/* If we cannot duplicate the mbuf, we sacrifice the divert
+		 * chain and continue with the tee-ed packet.
+		 */
+		if (clone == NULL)
+			return 1;
+	}
+
+	/*
+	 * Divert listeners can normally handle non-fragmented packets,
+	 * but we can only reass in the non-tee case.
+	 * This means that listeners on a tee rule may get fragments,
+	 * and have to live with that.
+	 * Note that we now have the 'reass' ipfw option so if we care
+	 * we can do it before a 'tee'.
+	 */
+	ip = mtod(clone, struct ip *);
+	if (!tee && ntohs(ip->ip_off) & (IP_MF | IP_OFFMASK)) {
+		int hlen;
+		struct mbuf *reass;
+
+		SET_HOST_IPLEN(ip); /* ip_reass wants host order */
+		reass = ip_reass(clone); /* Reassemble packet. */
+		if (reass == NULL)
+			return 0; /* not an error */
+		/* if reass = NULL then it was consumed by ip_reass */
+		/*
+		 * IP header checksum fixup after reassembly and leave header
+		 * in network byte order.
+		 */
+		ip = mtod(reass, struct ip *);
+		hlen = ip->ip_hl << 2;
+		SET_NET_IPLEN(ip);
+		ip->ip_sum = 0;
+		if (hlen == sizeof(struct ip))
+			ip->ip_sum = in_cksum_hdr(ip);
+		else
+			ip->ip_sum = in_cksum(reass, hlen);
+		clone = reass;
+	}
+	/* attach a tag to the packet with the reinject info */
+	tag = m_tag_alloc(MTAG_IPFW_RULE, 0,
+		    sizeof(struct ipfw_rule_ref), M_NOWAIT);
+	if (tag == NULL) {
+		FREE_PKT(clone);
+		return 1;
+	}
+	*((struct ipfw_rule_ref *)(tag+1)) = *rule;
+	m_tag_prepend(clone, tag);
+
+	/* Do the dirty job... */
+	ip_divert_ptr(clone, incoming);
+	return 0;
+}
+
+/*
+ * attach or detach hooks for a given protocol family
+ */
+static int
+ipfw_hook(int onoff, int pf)
+{
+	struct pfil_head *pfh;
+
+	pfh = pfil_head_get(PFIL_TYPE_AF, pf);
+	if (pfh == NULL)
+		return ENOENT;
+
+	(void) (onoff ? pfil_add_hook : pfil_remove_hook)
+	    (ipfw_check_hook, NULL, PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh);
+
+	return 0;
+}
+
+int
+ipfw_attach_hooks(int arg)
+{
+	int error = 0;
+
+	if (arg == 0) /* detach */
+		ipfw_hook(0, AF_INET);
+        else if (V_fw_enable && ipfw_hook(1, AF_INET) != 0) {
+                error = ENOENT; /* see ip_fw_pfil.c::ipfw_hook() */
+                printf("ipfw_hook() error\n");
+        }
+#ifdef INET6
+	if (arg == 0) /* detach */
+		ipfw_hook(0, AF_INET6);
+        else if (V_fw6_enable && ipfw_hook(1, AF_INET6) != 0) {
+                error = ENOENT;
+                printf("ipfw6_hook() error\n");
+        }
+#endif
+	return error;
+}
+
+int
+ipfw_chg_hook(SYSCTL_HANDLER_ARGS)
+{
+	int enable;
+	int oldenable;
+	int error;
+	int af;
+
+	if (arg1 == &VNET_NAME(fw_enable)) {
+		enable = V_fw_enable;
+		af = AF_INET;
+	}
+#ifdef INET6
+	else if (arg1 == &VNET_NAME(fw6_enable)) {
+		enable = V_fw6_enable;
+		af = AF_INET6;
+	}
+#endif
+	else 
+		return (EINVAL);
+
+	oldenable = enable;
+
+	error = sysctl_handle_int(oidp, &enable, 0, req);
+
+	if (error)
+		return (error);
+
+	enable = (enable) ? 1 : 0;
+
+	if (enable == oldenable)
+		return (0);
+
+	error = ipfw_hook(enable, af);
+	if (error)
+		return (error);
+	if (af == AF_INET)
+		V_fw_enable = enable;
+#ifdef INET6
+	else if (af == AF_INET6)
+		V_fw6_enable = enable;
+#endif
+
+	return (0);
+}
+/* end of file */
diff --git a/freebsd/sys/netinet/ipfw/ip_fw_private.h b/freebsd/sys/netinet/ipfw/ip_fw_private.h
new file mode 100644
index 00000000..c29ae0ad
--- /dev/null
+++ b/freebsd/sys/netinet/ipfw/ip_fw_private.h
@@ -0,0 +1,301 @@
+/*-
+ * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IPFW2_PRIVATE_H
+#define _IPFW2_PRIVATE_H
+
+/*
+ * Internal constants and data structures used by ipfw components
+ * and not meant to be exported outside the kernel.
+ */
+
+#ifdef _KERNEL
+
+/*
+ * For platforms that do not have SYSCTL support, we wrap the
+ * SYSCTL_* into a function (one per file) to collect the values
+ * into an array at module initialization. The wrapping macros,
+ * SYSBEGIN() and SYSEND, are empty in the default case.
+ */
+#ifndef SYSBEGIN
+#define SYSBEGIN(x)
+#endif
+#ifndef SYSEND
+#define SYSEND
+#endif
+
+/* Return values from ipfw_chk() */
+enum {
+	IP_FW_PASS = 0,
+	IP_FW_DENY,
+	IP_FW_DIVERT,
+	IP_FW_TEE,
+	IP_FW_DUMMYNET,
+	IP_FW_NETGRAPH,
+	IP_FW_NGTEE,
+	IP_FW_NAT,
+	IP_FW_REASS,
+};
+
+/*
+ * Structure for collecting parameters to dummynet for ip6_output forwarding
+ */
+struct _ip6dn_args {
+       struct ip6_pktopts *opt_or;
+       struct route_in6 ro_or;
+       int flags_or;
+       struct ip6_moptions *im6o_or;
+       struct ifnet *origifp_or;
+       struct ifnet *ifp_or;
+       struct sockaddr_in6 dst_or;
+       u_long mtu_or;
+       struct route_in6 ro_pmtu_or;
+};
+
+
+/*
+ * Arguments for calling ipfw_chk() and dummynet_io(). We put them
+ * all into a structure because this way it is easier and more
+ * efficient to pass variables around and extend the interface.
+ */
+struct ip_fw_args {
+	struct mbuf	*m;		/* the mbuf chain		*/
+	struct ifnet	*oif;		/* output interface		*/
+	struct sockaddr_in *next_hop;	/* forward address		*/
+
+	/*
+	 * On return, it points to the matching rule.
+	 * On entry, rule.slot > 0 means the info is valid and
+	 * contains the the starting rule for an ipfw search.
+	 * If chain_id == chain->id && slot >0 then jump to that slot.
+	 * Otherwise, we locate the first rule >= rulenum:rule_id
+	 */
+	struct ipfw_rule_ref rule;	/* match/restart info		*/
+
+	struct ether_header *eh;	/* for bridged packets		*/
+
+	struct ipfw_flow_id f_id;	/* grabbed from IP header	*/
+	//uint32_t	cookie;		/* a cookie depending on rule action */
+	struct inpcb	*inp;
+
+	struct _ip6dn_args	dummypar; /* dummynet->ip6_output */
+	struct sockaddr_in hopstore;	/* store here if cannot use a pointer */
+};
+
+MALLOC_DECLARE(M_IPFW);
+
+/*
+ * Hooks sometime need to know the direction of the packet
+ * (divert, dummynet, netgraph, ...)
+ * We use a generic definition here, with bit0-1 indicating the
+ * direction, bit 2 indicating layer2 or 3, bit 3-4 indicating the
+ * specific protocol
+ * indicating the protocol (if necessary)
+ */
+enum {
+	DIR_MASK =	0x3,
+	DIR_OUT =	0,
+	DIR_IN =	1,
+	DIR_FWD =	2,
+	DIR_DROP =	3,
+	PROTO_LAYER2 =	0x4, /* set for layer 2 */
+	/* PROTO_DEFAULT = 0, */
+	PROTO_IPV4 =	0x08,
+	PROTO_IPV6 =	0x10,
+	PROTO_IFB =	0x0c, /* layer2 + ifbridge */
+   /*	PROTO_OLDBDG =	0x14, unused, old bridge */
+};
+
+/* wrapper for freeing a packet, in case we need to do more work */
+#ifndef FREE_PKT
+#if defined(__linux__) || defined(_WIN32)
+#define FREE_PKT(m)	netisr_dispatch(-1, m)
+#else
+#define FREE_PKT(m)	m_freem(m)
+#endif
+#endif /* !FREE_PKT */
+
+/*
+ * Function definitions.
+ */
+
+/* attach (arg = 1) or detach (arg = 0) hooks */
+int ipfw_attach_hooks(int);
+#ifdef NOTYET
+void ipfw_nat_destroy(void);
+#endif
+
+/* In ip_fw_log.c */
+struct ip;
+void ipfw_log_bpf(int);
+void ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args,
+	struct mbuf *m, struct ifnet *oif, u_short offset, uint32_t tablearg,
+	struct ip *ip);
+VNET_DECLARE(u_int64_t, norule_counter);
+#define	V_norule_counter	VNET(norule_counter)
+VNET_DECLARE(int, verbose_limit);
+#define	V_verbose_limit		VNET(verbose_limit)
+
+/* In ip_fw_dynamic.c */
+
+enum { /* result for matching dynamic rules */
+	MATCH_REVERSE = 0,
+	MATCH_FORWARD,
+	MATCH_NONE,
+	MATCH_UNKNOWN,
+};
+
+/*
+ * The lock for dynamic rules is only used once outside the file,
+ * and only to release the result of lookup_dyn_rule().
+ * Eventually we may implement it with a callback on the function.
+ */
+void ipfw_dyn_unlock(void);
+
+struct tcphdr;
+struct mbuf *ipfw_send_pkt(struct mbuf *, struct ipfw_flow_id *,
+    u_int32_t, u_int32_t, int);
+int ipfw_install_state(struct ip_fw *rule, ipfw_insn_limit *cmd,
+    struct ip_fw_args *args, uint32_t tablearg);
+ipfw_dyn_rule *ipfw_lookup_dyn_rule(struct ipfw_flow_id *pkt,
+	int *match_direction, struct tcphdr *tcp);
+void ipfw_remove_dyn_children(struct ip_fw *rule);
+void ipfw_get_dynamic(char **bp, const char *ep);
+
+void ipfw_dyn_attach(void);	/* uma_zcreate .... */
+void ipfw_dyn_detach(void);	/* uma_zdestroy ... */
+void ipfw_dyn_init(void);	/* per-vnet initialization */
+void ipfw_dyn_uninit(int);	/* per-vnet deinitialization */
+int ipfw_dyn_len(void);
+
+/* common variables */
+VNET_DECLARE(int, fw_one_pass);
+#define	V_fw_one_pass		VNET(fw_one_pass)
+
+VNET_DECLARE(int, fw_verbose);
+#define	V_fw_verbose		VNET(fw_verbose)
+
+VNET_DECLARE(struct ip_fw_chain, layer3_chain);
+#define	V_layer3_chain		VNET(layer3_chain)
+
+VNET_DECLARE(u_int32_t, set_disable);
+#define	V_set_disable		VNET(set_disable)
+
+VNET_DECLARE(int, autoinc_step);
+#define V_autoinc_step		VNET(autoinc_step)
+
+struct ip_fw_chain {
+	struct ip_fw	*rules;		/* list of rules */
+	struct ip_fw	*reap;		/* list of rules to reap */
+	struct ip_fw	*default_rule;
+	int		n_rules;	/* number of static rules */
+	int		static_len;	/* total len of static rules */
+	struct ip_fw    **map;	/* array of rule ptrs to ease lookup */
+	LIST_HEAD(nat_list, cfg_nat) nat;       /* list of nat entries */
+	struct radix_node_head *tables[IPFW_TABLES_MAX];
+#if defined( __linux__ ) || defined( _WIN32 )
+	spinlock_t rwmtx;
+	spinlock_t uh_lock;
+#else
+	struct rwlock	rwmtx;
+	struct rwlock	uh_lock;	/* lock for upper half */
+#endif
+	uint32_t	id;		/* ruleset id */
+};
+
+struct sockopt;	/* used by tcp_var.h */
+
+/*
+ * The lock is heavily used by ip_fw2.c (the main file) and ip_fw_nat.c
+ * so the variable and the macros must be here.
+ */
+
+#define	IPFW_LOCK_INIT(_chain) do {			\
+	rw_init(&(_chain)->rwmtx, "IPFW static rules");	\
+	rw_init(&(_chain)->uh_lock, "IPFW UH lock");	\
+	} while (0)
+
+#define	IPFW_LOCK_DESTROY(_chain) do {			\
+	rw_destroy(&(_chain)->rwmtx);			\
+	rw_destroy(&(_chain)->uh_lock);			\
+	} while (0)
+
+#define	IPFW_WLOCK_ASSERT(_chain)	rw_assert(&(_chain)->rwmtx, RA_WLOCKED)
+
+#define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx)
+#define IPFW_RUNLOCK(p) rw_runlock(&(p)->rwmtx)
+#define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx)
+#define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx)
+
+#define IPFW_UH_RLOCK(p) rw_rlock(&(p)->uh_lock)
+#define IPFW_UH_RUNLOCK(p) rw_runlock(&(p)->uh_lock)
+#define IPFW_UH_WLOCK(p) rw_wlock(&(p)->uh_lock)
+#define IPFW_UH_WUNLOCK(p) rw_wunlock(&(p)->uh_lock)
+
+/* In ip_fw_sockopt.c */
+int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id);
+int ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule);
+int ipfw_ctl(struct sockopt *sopt);
+int ipfw_chk(struct ip_fw_args *args);
+void ipfw_reap_rules(struct ip_fw *head);
+
+/* In ip_fw_pfil */
+int ipfw_check_hook(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
+     struct inpcb *inp);
+
+/* In ip_fw_table.c */
+struct radix_node;
+int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
+    uint32_t *val);
+int ipfw_init_tables(struct ip_fw_chain *ch);
+void ipfw_destroy_tables(struct ip_fw_chain *ch);
+int ipfw_flush_table(struct ip_fw_chain *ch, uint16_t tbl);
+int ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
+    uint8_t mlen, uint32_t value);
+int ipfw_dump_table_entry(struct radix_node *rn, void *arg);
+int ipfw_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
+    uint8_t mlen);
+int ipfw_count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt);
+int ipfw_dump_table(struct ip_fw_chain *ch, ipfw_table *tbl);
+
+/* In ip_fw_nat.c -- XXX to be moved to ip_var.h */
+
+extern struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
+
+typedef int ipfw_nat_t(struct ip_fw_args *, struct cfg_nat *, struct mbuf *);
+typedef int ipfw_nat_cfg_t(struct sockopt *);
+
+extern ipfw_nat_t *ipfw_nat_ptr;
+#define IPFW_NAT_LOADED (ipfw_nat_ptr != NULL)
+
+extern ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
+extern ipfw_nat_cfg_t *ipfw_nat_del_ptr;
+extern ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
+extern ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
+
+#endif /* _KERNEL */
+#endif /* _IPFW2_PRIVATE_H */
diff --git a/freebsd/sys/netinet/ipfw/ip_fw_sockopt.c b/freebsd/sys/netinet/ipfw/ip_fw_sockopt.c
new file mode 100644
index 00000000..6af09905
--- /dev/null
+++ b/freebsd/sys/netinet/ipfw/ip_fw_sockopt.c
@@ -0,0 +1,1345 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
+ *
+ * Supported by: Valeria Paoli
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Sockopt support for ipfw. The routines here implement
+ * the upper half of the ipfw code.
+ */
+
+#if !defined(KLD_MODULE)
+#include <freebsd/local/opt_ipfw.h>
+#include <freebsd/local/opt_ipdivert.h>
+#include <freebsd/local/opt_ipdn.h>
+#include <freebsd/local/opt_inet.h>
+#ifndef INET
+#error IPFIREWALL requires INET.
+#endif /* INET */
+#endif
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_ipsec.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>	/* struct m_tag used by nested headers */
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/rwlock.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/net/if.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip_var.h> /* hooks */
+#include <freebsd/netinet/ip_fw.h>
+#include <freebsd/netinet/ipfw/ip_fw_private.h>
+
+#ifdef MAC
+#include <freebsd/security/mac/mac_framework.h>
+#endif
+
+MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's");
+
+/*
+ * static variables followed by global ones (none in this file)
+ */
+
+/*
+ * Find the smallest rule >= key, id.
+ * We could use bsearch but it is so simple that we code it directly
+ */
+int
+ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id)
+{
+	int i, lo, hi;
+	struct ip_fw *r;
+
+  	for (lo = 0, hi = chain->n_rules - 1; lo < hi;) {
+		i = (lo + hi) / 2;
+		r = chain->map[i];
+		if (r->rulenum < key)
+			lo = i + 1;	/* continue from the next one */
+		else if (r->rulenum > key)
+			hi = i;		/* this might be good */
+		else if (r->id < id)
+			lo = i + 1;	/* continue from the next one */
+		else /* r->id >= id */
+			hi = i;		/* this might be good */
+	};
+	return hi;
+}
+
+/*
+ * allocate a new map, returns the chain locked. extra is the number
+ * of entries to add or delete.
+ */
+static struct ip_fw **
+get_map(struct ip_fw_chain *chain, int extra, int locked)
+{
+
+	for (;;) {
+		struct ip_fw **map;
+		int i;
+
+		i = chain->n_rules + extra;
+		map = malloc(i * sizeof(struct ip_fw *), M_IPFW,
+			locked ? M_NOWAIT : M_WAITOK);
+		if (map == NULL) {
+			printf("%s: cannot allocate map\n", __FUNCTION__);
+			return NULL;
+		}
+		if (!locked)
+			IPFW_UH_WLOCK(chain);
+		if (i >= chain->n_rules + extra) /* good */
+			return map;
+		/* otherwise we lost the race, free and retry */
+		if (!locked)
+			IPFW_UH_WUNLOCK(chain);
+		free(map, M_IPFW);
+	}
+}
+
+/*
+ * swap the maps. It is supposed to be called with IPFW_UH_WLOCK
+ */
+static struct ip_fw **
+swap_map(struct ip_fw_chain *chain, struct ip_fw **new_map, int new_len)
+{
+	struct ip_fw **old_map;
+
+	IPFW_WLOCK(chain);
+	chain->id++;
+	chain->n_rules = new_len;
+	old_map = chain->map;
+	chain->map = new_map;
+	IPFW_WUNLOCK(chain);
+	return old_map;
+}
+
+/*
+ * Add a new rule to the list. Copy the rule into a malloc'ed area, then
+ * possibly create a rule number and add the rule to the list.
+ * Update the rule_number in the input struct so the caller knows it as well.
+ * XXX DO NOT USE FOR THE DEFAULT RULE.
+ * Must be called without IPFW_UH held
+ */
+int
+ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule)
+{
+	struct ip_fw *rule;
+	int i, l, insert_before;
+	struct ip_fw **map;	/* the new array of pointers */
+
+	if (chain->rules == NULL || input_rule->rulenum > IPFW_DEFAULT_RULE-1)
+		return (EINVAL);
+
+	l = RULESIZE(input_rule);
+	rule = malloc(l, M_IPFW, M_WAITOK | M_ZERO);
+	if (rule == NULL)
+		return (ENOSPC);
+	/* get_map returns with IPFW_UH_WLOCK if successful */
+	map = get_map(chain, 1, 0 /* not locked */);
+	if (map == NULL) {
+		free(rule, M_IPFW);
+		return ENOSPC;
+	}
+
+	bcopy(input_rule, rule, l);
+	/* clear fields not settable from userland */
+	rule->x_next = NULL;
+	rule->next_rule = NULL;
+	rule->pcnt = 0;
+	rule->bcnt = 0;
+	rule->timestamp = 0;
+
+	if (V_autoinc_step < 1)
+		V_autoinc_step = 1;
+	else if (V_autoinc_step > 1000)
+		V_autoinc_step = 1000;
+	/* find the insertion point, we will insert before */
+	insert_before = rule->rulenum ? rule->rulenum + 1 : IPFW_DEFAULT_RULE;
+	i = ipfw_find_rule(chain, insert_before, 0);
+	/* duplicate first part */
+	if (i > 0)
+		bcopy(chain->map, map, i * sizeof(struct ip_fw *));
+	map[i] = rule;
+	/* duplicate remaining part, we always have the default rule */
+	bcopy(chain->map + i, map + i + 1,
+		sizeof(struct ip_fw *) *(chain->n_rules - i));
+	if (rule->rulenum == 0) {
+		/* write back the number */
+		rule->rulenum = i > 0 ? map[i-1]->rulenum : 0;
+		if (rule->rulenum < IPFW_DEFAULT_RULE - V_autoinc_step)
+			rule->rulenum += V_autoinc_step;
+		input_rule->rulenum = rule->rulenum;
+	}
+
+	rule->id = chain->id + 1;
+	map = swap_map(chain, map, chain->n_rules + 1);
+	chain->static_len += l;
+	IPFW_UH_WUNLOCK(chain);
+	if (map)
+		free(map, M_IPFW);
+	return (0);
+}
+
+/*
+ * Reclaim storage associated with a list of rules.  This is
+ * typically the list created using remove_rule.
+ * A NULL pointer on input is handled correctly.
+ */
+void
+ipfw_reap_rules(struct ip_fw *head)
+{
+	struct ip_fw *rule;
+
+	while ((rule = head) != NULL) {
+		head = head->x_next;
+		free(rule, M_IPFW);
+	}
+}
+
+/*
+ * Used by del_entry() to check if a rule should be kept.
+ * Returns 1 if the rule must be kept, 0 otherwise.
+ *
+ * Called with cmd = {0,1,5}.
+ * cmd == 0 matches on rule numbers, excludes rules in RESVD_SET if n == 0 ;
+ * cmd == 1 matches on set numbers only, rule numbers are ignored;
+ * cmd == 5 matches on rule and set numbers.
+ *
+ * n == 0 is a wildcard for rule numbers, there is no wildcard for sets.
+ *
+ * Rules to keep are
+ *	(default || reserved || !match_set || !match_number)
+ * where
+ *   default ::= (rule->rulenum == IPFW_DEFAULT_RULE)
+ *	// the default rule is always protected
+ *
+ *   reserved ::= (cmd == 0 && n == 0 && rule->set == RESVD_SET)
+ *	// RESVD_SET is protected only if cmd == 0 and n == 0 ("ipfw flush")
+ *
+ *   match_set ::= (cmd == 0 || rule->set == set)
+ *	// set number is ignored for cmd == 0
+ *
+ *   match_number ::= (cmd == 1 || n == 0 || n == rule->rulenum)
+ *	// number is ignored for cmd == 1 or n == 0
+ *
+ */
+static int
+keep_rule(struct ip_fw *rule, uint8_t cmd, uint8_t set, uint32_t n)
+{
+	return
+		 (rule->rulenum == IPFW_DEFAULT_RULE)		||
+		 (cmd == 0 && n == 0 && rule->set == RESVD_SET)	||
+		!(cmd == 0 || rule->set == set)			||
+		!(cmd == 1 || n == 0 || n == rule->rulenum);
+}
+
+/**
+ * Remove all rules with given number, or do set manipulation.
+ * Assumes chain != NULL && *chain != NULL.
+ *
+ * The argument is an uint32_t. The low 16 bit are the rule or set number;
+ * the next 8 bits are the new set; the top 8 bits indicate the command:
+ *
+ *	0	delete rules numbered "rulenum"
+ *	1	delete rules in set "rulenum"
+ *	2	move rules "rulenum" to set "new_set"
+ *	3	move rules from set "rulenum" to set "new_set"
+ *	4	swap sets "rulenum" and "new_set"
+ *	5	delete rules "rulenum" and set "new_set"
+ */
+static int
+del_entry(struct ip_fw_chain *chain, uint32_t arg)
+{
+	struct ip_fw *rule;
+	uint32_t num;	/* rule number or old_set */
+	uint8_t cmd, new_set;
+	int start, end, i, ofs, n;
+	struct ip_fw **map = NULL;
+	int error = 0;
+
+	num = arg & 0xffff;
+	cmd = (arg >> 24) & 0xff;
+	new_set = (arg >> 16) & 0xff;
+
+	if (cmd > 5 || new_set > RESVD_SET)
+		return EINVAL;
+	if (cmd == 0 || cmd == 2 || cmd == 5) {
+		if (num >= IPFW_DEFAULT_RULE)
+			return EINVAL;
+	} else {
+		if (num > RESVD_SET)	/* old_set */
+			return EINVAL;
+	}
+
+	IPFW_UH_WLOCK(chain);	/* arbitrate writers */
+	chain->reap = NULL;	/* prepare for deletions */
+
+	switch (cmd) {
+	case 0:	/* delete rules "num" (num == 0 matches all) */
+	case 1:	/* delete all rules in set N */
+	case 5: /* delete rules with number N and set "new_set". */
+
+		/*
+		 * Locate first rule to delete (start), the rule after
+		 * the last one to delete (end), and count how many
+		 * rules to delete (n). Always use keep_rule() to
+		 * determine which rules to keep.
+		 */
+		n = 0;
+		if (cmd == 1) {
+			/* look for a specific set including RESVD_SET.
+			 * Must scan the entire range, ignore num.
+			 */
+			new_set = num;
+			for (start = -1, end = i = 0; i < chain->n_rules; i++) {
+				if (keep_rule(chain->map[i], cmd, new_set, 0))
+					continue;
+				if (start < 0)
+					start = i;
+				end = i;
+				n++;
+			}
+			end++;	/* first non-matching */
+		} else {
+			/* Optimized search on rule numbers */
+			start = ipfw_find_rule(chain, num, 0);
+			for (end = start; end < chain->n_rules; end++) {
+				rule = chain->map[end];
+				if (num > 0 && rule->rulenum != num)
+					break;
+				if (!keep_rule(rule, cmd, new_set, num))
+					n++;
+			}
+		}
+
+		if (n == 0) {
+			/* A flush request (arg == 0) on empty ruleset
+			 * returns with no error. On the contrary,
+			 * if there is no match on a specific request,
+			 * we return EINVAL.
+			 */
+			error = (arg == 0) ? 0 : EINVAL;
+			break;
+		}
+
+		/* We have something to delete. Allocate the new map */
+			map = get_map(chain, -n, 1 /* locked */);
+		if (map == NULL) {
+			error = EINVAL;
+			break;
+		}
+
+		/* 1. bcopy the initial part of the map */
+		if (start > 0)
+			bcopy(chain->map, map, start * sizeof(struct ip_fw *));
+		/* 2. copy active rules between start and end */
+		for (i = ofs = start; i < end; i++) {
+			rule = chain->map[i];
+			if (keep_rule(rule, cmd, new_set, num))
+				map[ofs++] = rule;
+		}
+		/* 3. copy the final part of the map */
+		bcopy(chain->map + end, map + ofs,
+			(chain->n_rules - end) * sizeof(struct ip_fw *));
+		/* 4. swap the maps (under BH_LOCK) */
+		map = swap_map(chain, map, chain->n_rules - n);
+		/* 5. now remove the rules deleted from the old map */
+		for (i = start; i < end; i++) {
+			int l;
+			rule = map[i];
+			if (keep_rule(rule, cmd, new_set, num))
+				continue;
+			l = RULESIZE(rule);
+			chain->static_len -= l;
+			ipfw_remove_dyn_children(rule);
+			rule->x_next = chain->reap;
+			chain->reap = rule;
+		}
+		break;
+
+	/*
+	 * In the next 3 cases the loop stops at (n_rules - 1)
+	 * because the default rule is never eligible..
+	 */
+
+	case 2:	/* move rules with given RULE number to new set */
+		for (i = 0; i < chain->n_rules - 1; i++) {
+			rule = chain->map[i];
+			if (rule->rulenum == num)
+				rule->set = new_set;
+		}
+		break;
+
+	case 3: /* move rules with given SET number to new set */
+		for (i = 0; i < chain->n_rules - 1; i++) {
+			rule = chain->map[i];
+			if (rule->set == num)
+				rule->set = new_set;
+		}
+		break;
+
+	case 4: /* swap two sets */
+		for (i = 0; i < chain->n_rules - 1; i++) {
+			rule = chain->map[i];
+			if (rule->set == num)
+				rule->set = new_set;
+			else if (rule->set == new_set)
+				rule->set = num;
+		}
+		break;
+	}
+
+	rule = chain->reap;
+	chain->reap = NULL;
+	IPFW_UH_WUNLOCK(chain);
+	ipfw_reap_rules(rule);
+	if (map)
+		free(map, M_IPFW);
+	return error;
+}
+
+/*
+ * Clear counters for a specific rule.
+ * Normally run under IPFW_UH_RLOCK, but these are idempotent ops
+ * so we only care that rules do not disappear.
+ */
+static void
+clear_counters(struct ip_fw *rule, int log_only)
+{
+	ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule);
+
+	if (log_only == 0) {
+		rule->bcnt = rule->pcnt = 0;
+		rule->timestamp = 0;
+	}
+	if (l->o.opcode == O_LOG)
+		l->log_left = l->max_log;
+}
+
+/**
+ * Reset some or all counters on firewall rules.
+ * The argument `arg' is an u_int32_t. The low 16 bit are the rule number,
+ * the next 8 bits are the set number, the top 8 bits are the command:
+ *	0	work with rules from all set's;
+ *	1	work with rules only from specified set.
+ * Specified rule number is zero if we want to clear all entries.
+ * log_only is 1 if we only want to reset logs, zero otherwise.
+ */
+static int
+zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only)
+{
+	struct ip_fw *rule;
+	char *msg;
+	int i;
+
+	uint16_t rulenum = arg & 0xffff;
+	uint8_t set = (arg >> 16) & 0xff;
+	uint8_t cmd = (arg >> 24) & 0xff;
+
+	if (cmd > 1)
+		return (EINVAL);
+	if (cmd == 1 && set > RESVD_SET)
+		return (EINVAL);
+
+	IPFW_UH_RLOCK(chain);
+	if (rulenum == 0) {
+		V_norule_counter = 0;
+		for (i = 0; i < chain->n_rules; i++) {
+			rule = chain->map[i];
+			/* Skip rules not in our set. */
+			if (cmd == 1 && rule->set != set)
+				continue;
+			clear_counters(rule, log_only);
+		}
+		msg = log_only ? "All logging counts reset" :
+		    "Accounting cleared";
+	} else {
+		int cleared = 0;
+		for (i = 0; i < chain->n_rules; i++) {
+			rule = chain->map[i];
+			if (rule->rulenum == rulenum) {
+				if (cmd == 0 || rule->set == set)
+					clear_counters(rule, log_only);
+				cleared = 1;
+			}
+			if (rule->rulenum > rulenum)
+				break;
+		}
+		if (!cleared) {	/* we did not find any matching rules */
+			IPFW_UH_RUNLOCK(chain);
+			return (EINVAL);
+		}
+		msg = log_only ? "logging count reset" : "cleared";
+	}
+	IPFW_UH_RUNLOCK(chain);
+
+	if (V_fw_verbose) {
+		int lev = LOG_SECURITY | LOG_NOTICE;
+
+		if (rulenum)
+			log(lev, "ipfw: Entry %d %s.\n", rulenum, msg);
+		else
+			log(lev, "ipfw: %s.\n", msg);
+	}
+	return (0);
+}
+
+/*
+ * Check validity of the structure before insert.
+ * Rules are simple, so this mostly need to check rule sizes.
+ */
+static int
+check_ipfw_struct(struct ip_fw *rule, int size)
+{
+	int l, cmdlen = 0;
+	int have_action=0;
+	ipfw_insn *cmd;
+
+	if (size < sizeof(*rule)) {
+		printf("ipfw: rule too short\n");
+		return (EINVAL);
+	}
+	/* first, check for valid size */
+	l = RULESIZE(rule);
+	if (l != size) {
+		printf("ipfw: size mismatch (have %d want %d)\n", size, l);
+		return (EINVAL);
+	}
+	if (rule->act_ofs >= rule->cmd_len) {
+		printf("ipfw: bogus action offset (%u > %u)\n",
+		    rule->act_ofs, rule->cmd_len - 1);
+		return (EINVAL);
+	}
+	/*
+	 * Now go for the individual checks. Very simple ones, basically only
+	 * instruction sizes.
+	 */
+	for (l = rule->cmd_len, cmd = rule->cmd ;
+			l > 0 ; l -= cmdlen, cmd += cmdlen) {
+		cmdlen = F_LEN(cmd);
+		if (cmdlen > l) {
+			printf("ipfw: opcode %d size truncated\n",
+			    cmd->opcode);
+			return EINVAL;
+		}
+		switch (cmd->opcode) {
+		case O_PROBE_STATE:
+		case O_KEEP_STATE:
+		case O_PROTO:
+		case O_IP_SRC_ME:
+		case O_IP_DST_ME:
+		case O_LAYER2:
+		case O_IN:
+		case O_FRAG:
+		case O_DIVERTED:
+		case O_IPOPT:
+		case O_IPTOS:
+		case O_IPPRECEDENCE:
+		case O_IPVER:
+		case O_TCPWIN:
+		case O_TCPFLAGS:
+		case O_TCPOPTS:
+		case O_ESTAB:
+		case O_VERREVPATH:
+		case O_VERSRCREACH:
+		case O_ANTISPOOF:
+		case O_IPSEC:
+#ifdef INET6
+		case O_IP6_SRC_ME:
+		case O_IP6_DST_ME:
+		case O_EXT_HDR:
+		case O_IP6:
+#endif
+		case O_IP4:
+		case O_TAG:
+			if (cmdlen != F_INSN_SIZE(ipfw_insn))
+				goto bad_size;
+			break;
+
+		case O_FIB:
+			if (cmdlen != F_INSN_SIZE(ipfw_insn))
+				goto bad_size;
+			if (cmd->arg1 >= rt_numfibs) {
+				printf("ipfw: invalid fib number %d\n",
+					cmd->arg1);
+				return EINVAL;
+			}
+			break;
+
+		case O_SETFIB:
+			if (cmdlen != F_INSN_SIZE(ipfw_insn))
+				goto bad_size;
+			if (cmd->arg1 >= rt_numfibs) {
+				printf("ipfw: invalid fib number %d\n",
+					cmd->arg1);
+				return EINVAL;
+			}
+			goto check_action;
+
+		case O_UID:
+		case O_GID:
+		case O_JAIL:
+		case O_IP_SRC:
+		case O_IP_DST:
+		case O_TCPSEQ:
+		case O_TCPACK:
+		case O_PROB:
+		case O_ICMPTYPE:
+			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32))
+				goto bad_size;
+			break;
+
+		case O_LIMIT:
+			if (cmdlen != F_INSN_SIZE(ipfw_insn_limit))
+				goto bad_size;
+			break;
+
+		case O_LOG:
+			if (cmdlen != F_INSN_SIZE(ipfw_insn_log))
+				goto bad_size;
+
+			((ipfw_insn_log *)cmd)->log_left =
+			    ((ipfw_insn_log *)cmd)->max_log;
+
+			break;
+
+		case O_IP_SRC_MASK:
+		case O_IP_DST_MASK:
+			/* only odd command lengths */
+			if ( !(cmdlen & 1) || cmdlen > 31)
+				goto bad_size;
+			break;
+
+		case O_IP_SRC_SET:
+		case O_IP_DST_SET:
+			if (cmd->arg1 == 0 || cmd->arg1 > 256) {
+				printf("ipfw: invalid set size %d\n",
+					cmd->arg1);
+				return EINVAL;
+			}
+			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) +
+			    (cmd->arg1+31)/32 )
+				goto bad_size;
+			break;
+
+		case O_IP_SRC_LOOKUP:
+		case O_IP_DST_LOOKUP:
+			if (cmd->arg1 >= IPFW_TABLES_MAX) {
+				printf("ipfw: invalid table number %d\n",
+				    cmd->arg1);
+				return (EINVAL);
+			}
+			if (cmdlen != F_INSN_SIZE(ipfw_insn) &&
+			    cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1 &&
+			    cmdlen != F_INSN_SIZE(ipfw_insn_u32))
+				goto bad_size;
+			break;
+
+		case O_MACADDR2:
+			if (cmdlen != F_INSN_SIZE(ipfw_insn_mac))
+				goto bad_size;
+			break;
+
+		case O_NOP:
+		case O_IPID:
+		case O_IPTTL:
+		case O_IPLEN:
+		case O_TCPDATALEN:
+		case O_TAGGED:
+			if (cmdlen < 1 || cmdlen > 31)
+				goto bad_size;
+			break;
+
+		case O_MAC_TYPE:
+		case O_IP_SRCPORT:
+		case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */
+			if (cmdlen < 2 || cmdlen > 31)
+				goto bad_size;
+			break;
+
+		case O_RECV:
+		case O_XMIT:
+		case O_VIA:
+			if (cmdlen != F_INSN_SIZE(ipfw_insn_if))
+				goto bad_size;
+			break;
+
+		case O_ALTQ:
+			if (cmdlen != F_INSN_SIZE(ipfw_insn_altq))
+				goto bad_size;
+			break;
+
+		case O_PIPE:
+		case O_QUEUE:
+			if (cmdlen != F_INSN_SIZE(ipfw_insn))
+				goto bad_size;
+			goto check_action;
+
+		case O_FORWARD_IP:
+#ifdef	IPFIREWALL_FORWARD
+			if (cmdlen != F_INSN_SIZE(ipfw_insn_sa))
+				goto bad_size;
+			goto check_action;
+#else
+			return EINVAL;
+#endif
+
+		case O_DIVERT:
+		case O_TEE:
+			if (ip_divert_ptr == NULL)
+				return EINVAL;
+			else
+				goto check_size;
+		case O_NETGRAPH:
+		case O_NGTEE:
+			if (ng_ipfw_input_p == NULL)
+				return EINVAL;
+			else
+				goto check_size;
+		case O_NAT:
+			if (!IPFW_NAT_LOADED)
+				return EINVAL;
+			if (cmdlen != F_INSN_SIZE(ipfw_insn_nat))
+ 				goto bad_size;		
+ 			goto check_action;
+		case O_FORWARD_MAC: /* XXX not implemented yet */
+		case O_CHECK_STATE:
+		case O_COUNT:
+		case O_ACCEPT:
+		case O_DENY:
+		case O_REJECT:
+#ifdef INET6
+		case O_UNREACH6:
+#endif
+		case O_SKIPTO:
+		case O_REASS:
+check_size:
+			if (cmdlen != F_INSN_SIZE(ipfw_insn))
+				goto bad_size;
+check_action:
+			if (have_action) {
+				printf("ipfw: opcode %d, multiple actions"
+					" not allowed\n",
+					cmd->opcode);
+				return EINVAL;
+			}
+			have_action = 1;
+			if (l != cmdlen) {
+				printf("ipfw: opcode %d, action must be"
+					" last opcode\n",
+					cmd->opcode);
+				return EINVAL;
+			}
+			break;
+#ifdef INET6
+		case O_IP6_SRC:
+		case O_IP6_DST:
+			if (cmdlen != F_INSN_SIZE(struct in6_addr) +
+			    F_INSN_SIZE(ipfw_insn))
+				goto bad_size;
+			break;
+
+		case O_FLOW6ID:
+			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) +
+			    ((ipfw_insn_u32 *)cmd)->o.arg1)
+				goto bad_size;
+			break;
+
+		case O_IP6_SRC_MASK:
+		case O_IP6_DST_MASK:
+			if ( !(cmdlen & 1) || cmdlen > 127)
+				goto bad_size;
+			break;
+		case O_ICMP6TYPE:
+			if( cmdlen != F_INSN_SIZE( ipfw_insn_icmp6 ) )
+				goto bad_size;
+			break;
+#endif
+
+		default:
+			switch (cmd->opcode) {
+#ifndef INET6
+			case O_IP6_SRC_ME:
+			case O_IP6_DST_ME:
+			case O_EXT_HDR:
+			case O_IP6:
+			case O_UNREACH6:
+			case O_IP6_SRC:
+			case O_IP6_DST:
+			case O_FLOW6ID:
+			case O_IP6_SRC_MASK:
+			case O_IP6_DST_MASK:
+			case O_ICMP6TYPE:
+				printf("ipfw: no IPv6 support in kernel\n");
+				return EPROTONOSUPPORT;
+#endif
+			default:
+				printf("ipfw: opcode %d, unknown opcode\n",
+					cmd->opcode);
+				return EINVAL;
+			}
+		}
+	}
+	if (have_action == 0) {
+		printf("ipfw: missing action\n");
+		return EINVAL;
+	}
+	return 0;
+
+bad_size:
+	printf("ipfw: opcode %d size %d wrong\n",
+		cmd->opcode, cmdlen);
+	return EINVAL;
+}
+
+
+/*
+ * Translation of requests for compatibility with FreeBSD 7.2/8.
+ * a static variable tells us if we have an old client from userland,
+ * and if necessary we translate requests and responses between the
+ * two formats.
+ */
+static int is7 = 0;
+
+struct ip_fw7 {
+	struct ip_fw7	*next;		/* linked list of rules     */
+	struct ip_fw7	*next_rule;	/* ptr to next [skipto] rule    */
+	/* 'next_rule' is used to pass up 'set_disable' status      */
+
+	uint16_t	act_ofs;	/* offset of action in 32-bit units */
+	uint16_t	cmd_len;	/* # of 32-bit words in cmd */
+	uint16_t	rulenum;	/* rule number          */
+	uint8_t		set;		/* rule set (0..31)     */
+	// #define RESVD_SET   31  /* set for default and persistent rules */
+	uint8_t		_pad;		/* padding          */
+	// uint32_t        id;             /* rule id, only in v.8 */
+	/* These fields are present in all rules.           */
+	uint64_t	pcnt;		/* Packet counter       */
+	uint64_t	bcnt;		/* Byte counter         */
+	uint32_t	timestamp;	/* tv_sec of last match     */
+
+	ipfw_insn	cmd[1];		/* storage for commands     */
+};
+
+	int convert_rule_to_7(struct ip_fw *rule);
+int convert_rule_to_8(struct ip_fw *rule);
+
+#ifndef RULESIZE7
+#define RULESIZE7(rule)  (sizeof(struct ip_fw7) + \
+	((struct ip_fw7 *)(rule))->cmd_len * 4 - 4)
+#endif
+
+
+/*
+ * Copy the static and dynamic rules to the supplied buffer
+ * and return the amount of space actually used.
+ * Must be run under IPFW_UH_RLOCK
+ */
+static size_t
+ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space)
+{
+	char *bp = buf;
+	char *ep = bp + space;
+	struct ip_fw *rule, *dst;
+	int l, i;
+	time_t	boot_seconds;
+
+        boot_seconds = boottime.tv_sec;
+	for (i = 0; i < chain->n_rules; i++) {
+		rule = chain->map[i];
+
+		if (is7) {
+		    /* Convert rule to FreeBSd 7.2 format */
+		    l = RULESIZE7(rule);
+		    if (bp + l + sizeof(uint32_t) <= ep) {
+			int error;
+			bcopy(rule, bp, l + sizeof(uint32_t));
+			error = convert_rule_to_7((struct ip_fw *) bp);
+			if (error)
+				return 0; /*XXX correct? */
+			/*
+			 * XXX HACK. Store the disable mask in the "next"
+			 * pointer in a wild attempt to keep the ABI the same.
+			 * Why do we do this on EVERY rule?
+			 */
+			bcopy(&V_set_disable,
+				&(((struct ip_fw7 *)bp)->next_rule),
+				sizeof(V_set_disable));
+			if (((struct ip_fw7 *)bp)->timestamp)
+			    ((struct ip_fw7 *)bp)->timestamp += boot_seconds;
+			bp += l;
+		    }
+		    continue; /* go to next rule */
+		}
+
+		/* normal mode, don't touch rules */
+		l = RULESIZE(rule);
+		if (bp + l > ep) { /* should not happen */
+			printf("overflow dumping static rules\n");
+			break;
+		}
+		dst = (struct ip_fw *)bp;
+		bcopy(rule, dst, l);
+		/*
+		 * XXX HACK. Store the disable mask in the "next"
+		 * pointer in a wild attempt to keep the ABI the same.
+		 * Why do we do this on EVERY rule?
+		 */
+		bcopy(&V_set_disable, &dst->next_rule, sizeof(V_set_disable));
+		if (dst->timestamp)
+			dst->timestamp += boot_seconds;
+		bp += l;
+	}
+	ipfw_get_dynamic(&bp, ep); /* protected by the dynamic lock */
+	return (bp - (char *)buf);
+}
+
+
+/**
+ * {set|get}sockopt parser.
+ */
+int
+ipfw_ctl(struct sockopt *sopt)
+{
+#define	RULE_MAXSIZE	(256*sizeof(u_int32_t))
+	int error;
+	size_t size;
+	struct ip_fw *buf, *rule;
+	struct ip_fw_chain *chain;
+	u_int32_t rulenum[2];
+
+	error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW);
+	if (error)
+		return (error);
+
+	/*
+	 * Disallow modifications in really-really secure mode, but still allow
+	 * the logging counters to be reset.
+	 */
+	if (sopt->sopt_name == IP_FW_ADD ||
+	    (sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)) {
+		error = securelevel_ge(sopt->sopt_td->td_ucred, 3);
+		if (error)
+			return (error);
+	}
+
+	chain = &V_layer3_chain;
+	error = 0;
+
+	switch (sopt->sopt_name) {
+	case IP_FW_GET:
+		/*
+		 * pass up a copy of the current rules. Static rules
+		 * come first (the last of which has number IPFW_DEFAULT_RULE),
+		 * followed by a possibly empty list of dynamic rule.
+		 * The last dynamic rule has NULL in the "next" field.
+		 *
+		 * Note that the calculated size is used to bound the
+		 * amount of data returned to the user.  The rule set may
+		 * change between calculating the size and returning the
+		 * data in which case we'll just return what fits.
+		 */
+		for (;;) {
+			int len = 0, want;
+
+			size = chain->static_len;
+			size += ipfw_dyn_len();
+			if (size >= sopt->sopt_valsize)
+				break;
+			buf = malloc(size, M_TEMP, M_WAITOK);
+			if (buf == NULL)
+				break;
+			IPFW_UH_RLOCK(chain);
+			/* check again how much space we need */
+			want = chain->static_len + ipfw_dyn_len();
+			if (size >= want)
+				len = ipfw_getrules(chain, buf, size);
+			IPFW_UH_RUNLOCK(chain);
+			if (size >= want)
+				error = sooptcopyout(sopt, buf, len);
+			free(buf, M_TEMP);
+			if (size >= want)
+				break;
+		}
+		break;
+
+	case IP_FW_FLUSH:
+		/* locking is done within del_entry() */
+		error = del_entry(chain, 0); /* special case, rule=0, cmd=0 means all */
+		break;
+
+	case IP_FW_ADD:
+		rule = malloc(RULE_MAXSIZE, M_TEMP, M_WAITOK);
+		error = sooptcopyin(sopt, rule, RULE_MAXSIZE,
+			sizeof(struct ip_fw7) );
+
+		/*
+		 * If the size of commands equals RULESIZE7 then we assume
+		 * a FreeBSD7.2 binary is talking to us (set is7=1).
+		 * is7 is persistent so the next 'ipfw list' command
+		 * will use this format.
+		 * NOTE: If wrong version is guessed (this can happen if
+		 *       the first ipfw command is 'ipfw [pipe] list')
+		 *       the ipfw binary may crash or loop infinitly...
+		 */
+		if (sopt->sopt_valsize == RULESIZE7(rule)) {
+		    is7 = 1;
+		    error = convert_rule_to_8(rule);
+		    if (error)
+			return error;
+		    if (error == 0)
+			error = check_ipfw_struct(rule, RULESIZE(rule));
+		} else {
+		    is7 = 0;
+		if (error == 0)
+			error = check_ipfw_struct(rule, sopt->sopt_valsize);
+		}
+		if (error == 0) {
+			/* locking is done within ipfw_add_rule() */
+			error = ipfw_add_rule(chain, rule);
+			size = RULESIZE(rule);
+			if (!error && sopt->sopt_dir == SOPT_GET) {
+				if (is7) {
+					error = convert_rule_to_7(rule);
+					size = RULESIZE7(rule);
+					if (error)
+						return error;
+				}
+				error = sooptcopyout(sopt, rule, size);
+		}
+		}
+		free(rule, M_TEMP);
+		break;
+
+	case IP_FW_DEL:
+		/*
+		 * IP_FW_DEL is used for deleting single rules or sets,
+		 * and (ab)used to atomically manipulate sets. Argument size
+		 * is used to distinguish between the two:
+		 *    sizeof(u_int32_t)
+		 *	delete single rule or set of rules,
+		 *	or reassign rules (or sets) to a different set.
+		 *    2*sizeof(u_int32_t)
+		 *	atomic disable/enable sets.
+		 *	first u_int32_t contains sets to be disabled,
+		 *	second u_int32_t contains sets to be enabled.
+		 */
+		error = sooptcopyin(sopt, rulenum,
+			2*sizeof(u_int32_t), sizeof(u_int32_t));
+		if (error)
+			break;
+		size = sopt->sopt_valsize;
+		if (size == sizeof(u_int32_t) && rulenum[0] != 0) {
+			/* delete or reassign, locking done in del_entry() */
+			error = del_entry(chain, rulenum[0]);
+		} else if (size == 2*sizeof(u_int32_t)) { /* set enable/disable */
+			IPFW_UH_WLOCK(chain);
+			V_set_disable =
+			    (V_set_disable | rulenum[0]) & ~rulenum[1] &
+			    ~(1<<RESVD_SET); /* set RESVD_SET always enabled */
+			IPFW_UH_WUNLOCK(chain);
+		} else
+			error = EINVAL;
+		break;
+
+	case IP_FW_ZERO:
+	case IP_FW_RESETLOG: /* argument is an u_int_32, the rule number */
+		rulenum[0] = 0;
+		if (sopt->sopt_val != 0) {
+		    error = sooptcopyin(sopt, rulenum,
+			    sizeof(u_int32_t), sizeof(u_int32_t));
+		    if (error)
+			break;
+		}
+		error = zero_entry(chain, rulenum[0],
+			sopt->sopt_name == IP_FW_RESETLOG);
+		break;
+
+	/*--- TABLE manipulations are protected by the IPFW_LOCK ---*/
+	case IP_FW_TABLE_ADD:
+		{
+			ipfw_table_entry ent;
+
+			error = sooptcopyin(sopt, &ent,
+			    sizeof(ent), sizeof(ent));
+			if (error)
+				break;
+			error = ipfw_add_table_entry(chain, ent.tbl,
+			    ent.addr, ent.masklen, ent.value);
+		}
+		break;
+
+	case IP_FW_TABLE_DEL:
+		{
+			ipfw_table_entry ent;
+
+			error = sooptcopyin(sopt, &ent,
+			    sizeof(ent), sizeof(ent));
+			if (error)
+				break;
+			error = ipfw_del_table_entry(chain, ent.tbl,
+			    ent.addr, ent.masklen);
+		}
+		break;
+
+	case IP_FW_TABLE_FLUSH:
+		{
+			u_int16_t tbl;
+
+			error = sooptcopyin(sopt, &tbl,
+			    sizeof(tbl), sizeof(tbl));
+			if (error)
+				break;
+			IPFW_WLOCK(chain);
+			error = ipfw_flush_table(chain, tbl);
+			IPFW_WUNLOCK(chain);
+		}
+		break;
+
+	case IP_FW_TABLE_GETSIZE:
+		{
+			u_int32_t tbl, cnt;
+
+			if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl),
+			    sizeof(tbl))))
+				break;
+			IPFW_RLOCK(chain);
+			error = ipfw_count_table(chain, tbl, &cnt);
+			IPFW_RUNLOCK(chain);
+			if (error)
+				break;
+			error = sooptcopyout(sopt, &cnt, sizeof(cnt));
+		}
+		break;
+
+	case IP_FW_TABLE_LIST:
+		{
+			ipfw_table *tbl;
+
+			if (sopt->sopt_valsize < sizeof(*tbl)) {
+				error = EINVAL;
+				break;
+			}
+			size = sopt->sopt_valsize;
+			tbl = malloc(size, M_TEMP, M_WAITOK);
+			error = sooptcopyin(sopt, tbl, size, sizeof(*tbl));
+			if (error) {
+				free(tbl, M_TEMP);
+				break;
+			}
+			tbl->size = (size - sizeof(*tbl)) /
+			    sizeof(ipfw_table_entry);
+			IPFW_RLOCK(chain);
+			error = ipfw_dump_table(chain, tbl);
+			IPFW_RUNLOCK(chain);
+			if (error) {
+				free(tbl, M_TEMP);
+				break;
+			}
+			error = sooptcopyout(sopt, tbl, size);
+			free(tbl, M_TEMP);
+		}
+		break;
+
+	/*--- NAT operations are protected by the IPFW_LOCK ---*/
+	case IP_FW_NAT_CFG:
+		if (IPFW_NAT_LOADED)
+			error = ipfw_nat_cfg_ptr(sopt);
+		else {
+			printf("IP_FW_NAT_CFG: %s\n",
+			    "ipfw_nat not present, please load it");
+			error = EINVAL;
+		}
+		break;
+
+	case IP_FW_NAT_DEL:
+		if (IPFW_NAT_LOADED)
+			error = ipfw_nat_del_ptr(sopt);
+		else {
+			printf("IP_FW_NAT_DEL: %s\n",
+			    "ipfw_nat not present, please load it");
+			error = EINVAL;
+		}
+		break;
+
+	case IP_FW_NAT_GET_CONFIG:
+		if (IPFW_NAT_LOADED)
+			error = ipfw_nat_get_cfg_ptr(sopt);
+		else {
+			printf("IP_FW_NAT_GET_CFG: %s\n",
+			    "ipfw_nat not present, please load it");
+			error = EINVAL;
+		}
+		break;
+
+	case IP_FW_NAT_GET_LOG:
+		if (IPFW_NAT_LOADED)
+			error = ipfw_nat_get_log_ptr(sopt);
+		else {
+			printf("IP_FW_NAT_GET_LOG: %s\n",
+			    "ipfw_nat not present, please load it");
+			error = EINVAL;
+		}
+		break;
+
+	default:
+		printf("ipfw: ipfw_ctl invalid option %d\n", sopt->sopt_name);
+		error = EINVAL;
+	}
+
+	return (error);
+#undef RULE_MAXSIZE
+}
+
+
+#define	RULE_MAXSIZE	(256*sizeof(u_int32_t))
+
+/* Functions to convert rules 7.2 <==> 8.0 */
+int
+convert_rule_to_7(struct ip_fw *rule)
+{
+	/* Used to modify original rule */
+	struct ip_fw7 *rule7 = (struct ip_fw7 *)rule;
+	/* copy of original rule, version 8 */
+	struct ip_fw *tmp;
+
+	/* Used to copy commands */
+	ipfw_insn *ccmd, *dst;
+	int ll = 0, ccmdlen = 0;
+
+	tmp = malloc(RULE_MAXSIZE, M_TEMP, M_NOWAIT | M_ZERO);
+	if (tmp == NULL) {
+		return 1; //XXX error
+	}
+	bcopy(rule, tmp, RULE_MAXSIZE);
+
+	/* Copy fields */
+	rule7->_pad = tmp->_pad;
+	rule7->set = tmp->set;
+	rule7->rulenum = tmp->rulenum;
+	rule7->cmd_len = tmp->cmd_len;
+	rule7->act_ofs = tmp->act_ofs;
+	rule7->next_rule = (struct ip_fw7 *)tmp->next_rule;
+	rule7->next = (struct ip_fw7 *)tmp->x_next;
+	rule7->cmd_len = tmp->cmd_len;
+	rule7->pcnt = tmp->pcnt;
+	rule7->bcnt = tmp->bcnt;
+	rule7->timestamp = tmp->timestamp;
+
+	/* Copy commands */
+	for (ll = tmp->cmd_len, ccmd = tmp->cmd, dst = rule7->cmd ;
+			ll > 0 ; ll -= ccmdlen, ccmd += ccmdlen, dst += ccmdlen) {
+		ccmdlen = F_LEN(ccmd);
+
+		bcopy(ccmd, dst, F_LEN(ccmd)*sizeof(uint32_t));
+
+		if (dst->opcode > O_NAT)
+			/* O_REASS doesn't exists in 7.2 version, so
+			 * decrement opcode if it is after O_REASS
+			 */
+			dst->opcode--;
+
+		if (ccmdlen > ll) {
+			printf("ipfw: opcode %d size truncated\n",
+				ccmd->opcode);
+			return EINVAL;
+		}
+	}
+	free(tmp, M_TEMP);
+
+	return 0;
+}
+
+int
+convert_rule_to_8(struct ip_fw *rule)
+{
+	/* Used to modify original rule */
+	struct ip_fw7 *rule7 = (struct ip_fw7 *) rule;
+
+	/* Used to copy commands */
+	ipfw_insn *ccmd, *dst;
+	int ll = 0, ccmdlen = 0;
+
+	/* Copy of original rule */
+	struct ip_fw7 *tmp = malloc(RULE_MAXSIZE, M_TEMP, M_NOWAIT | M_ZERO);
+	if (tmp == NULL) {
+		return 1; //XXX error
+	}
+
+	bcopy(rule7, tmp, RULE_MAXSIZE);
+
+	for (ll = tmp->cmd_len, ccmd = tmp->cmd, dst = rule->cmd ;
+			ll > 0 ; ll -= ccmdlen, ccmd += ccmdlen, dst += ccmdlen) {
+		ccmdlen = F_LEN(ccmd);
+		
+		bcopy(ccmd, dst, F_LEN(ccmd)*sizeof(uint32_t));
+
+		if (dst->opcode > O_NAT)
+			/* O_REASS doesn't exists in 7.2 version, so
+			 * increment opcode if it is after O_REASS
+			 */
+			dst->opcode++;
+
+		if (ccmdlen > ll) {
+			printf("ipfw: opcode %d size truncated\n",
+			    ccmd->opcode);
+			return EINVAL;
+		}
+	}
+
+	rule->_pad = tmp->_pad;
+	rule->set = tmp->set;
+	rule->rulenum = tmp->rulenum;
+	rule->cmd_len = tmp->cmd_len;
+	rule->act_ofs = tmp->act_ofs;
+	rule->next_rule = (struct ip_fw *)tmp->next_rule;
+	rule->x_next = (struct ip_fw *)tmp->next;
+	rule->cmd_len = tmp->cmd_len;
+	rule->id = 0; /* XXX see if is ok = 0 */
+	rule->pcnt = tmp->pcnt;
+	rule->bcnt = tmp->bcnt;
+	rule->timestamp = tmp->timestamp;
+
+	free (tmp, M_TEMP);
+	return 0;
+}
+
+/* end of file */
diff --git a/freebsd/sys/netinet/ipfw/ip_fw_table.c b/freebsd/sys/netinet/ipfw/ip_fw_table.c
new file mode 100644
index 00000000..39a1dfcc
--- /dev/null
+++ b/freebsd/sys/netinet/ipfw/ip_fw_table.c
@@ -0,0 +1,288 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Lookup table support for ipfw
+ *
+ * Lookup tables are implemented (at the moment) using the radix
+ * tree used for routing tables. Tables store key-value entries, where
+ * keys are network prefixes (addr/masklen), and values are integers.
+ * As a degenerate case we can interpret keys as 32-bit integers
+ * (with a /32 mask).
+ *
+ * The table is protected by the IPFW lock even for manipulation coming
+ * from userland, because operations are typically fast.
+ */
+
+#if !defined(KLD_MODULE)
+#include <freebsd/local/opt_ipfw.h>
+#include <freebsd/local/opt_ipdivert.h>
+#include <freebsd/local/opt_ipdn.h>
+#include <freebsd/local/opt_inet.h>
+#ifndef INET
+#error IPFIREWALL requires INET.
+#endif /* INET */
+#endif
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_ipsec.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/rwlock.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/net/if.h>	/* ip_fw.h requires IFNAMSIZ */
+#include <freebsd/net/radix.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip_var.h>	/* struct ipfw_rule_ref */
+#include <freebsd/netinet/ip_fw.h>
+#include <freebsd/sys/queue.h> /* LIST_HEAD */
+#include <freebsd/netinet/ipfw/ip_fw_private.h>
+
+#ifdef MAC
+#include <freebsd/security/mac/mac_framework.h>
+#endif
+
+MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables");
+
+struct table_entry {
+	struct radix_node	rn[2];
+	struct sockaddr_in	addr, mask;
+	u_int32_t		value;
+};
+
+/*
+ * The radix code expects addr and mask to be array of bytes,
+ * with the first byte being the length of the array. rn_inithead
+ * is called with the offset in bits of the lookup key within the
+ * array. If we use a sockaddr_in as the underlying type,
+ * sin_len is conveniently located at offset 0, sin_addr is at
+ * offset 4 and normally aligned.
+ * But for portability, let's avoid assumption and make the code explicit
+ */
+#define KEY_LEN(v)	*((uint8_t *)&(v))
+#define KEY_OFS		(8*offsetof(struct sockaddr_in, sin_addr))
+
+int
+ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
+    uint8_t mlen, uint32_t value)
+{
+	struct radix_node_head *rnh;
+	struct table_entry *ent;
+	struct radix_node *rn;
+
+	if (tbl >= IPFW_TABLES_MAX)
+		return (EINVAL);
+	rnh = ch->tables[tbl];
+	ent = malloc(sizeof(*ent), M_IPFW_TBL, M_NOWAIT | M_ZERO);
+	if (ent == NULL)
+		return (ENOMEM);
+	ent->value = value;
+	KEY_LEN(ent->addr) = KEY_LEN(ent->mask) = 8;
+	ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
+	ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr;
+	IPFW_WLOCK(ch);
+	rn = rnh->rnh_addaddr(&ent->addr, &ent->mask, rnh, (void *)ent);
+	if (rn == NULL) {
+		IPFW_WUNLOCK(ch);
+		free(ent, M_IPFW_TBL);
+		return (EEXIST);
+	}
+	IPFW_WUNLOCK(ch);
+	return (0);
+}
+
+int
+ipfw_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
+    uint8_t mlen)
+{
+	struct radix_node_head *rnh;
+	struct table_entry *ent;
+	struct sockaddr_in sa, mask;
+
+	if (tbl >= IPFW_TABLES_MAX)
+		return (EINVAL);
+	rnh = ch->tables[tbl];
+	KEY_LEN(sa) = KEY_LEN(mask) = 8;
+	mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
+	sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr;
+	IPFW_WLOCK(ch);
+	ent = (struct table_entry *)rnh->rnh_deladdr(&sa, &mask, rnh);
+	if (ent == NULL) {
+		IPFW_WUNLOCK(ch);
+		return (ESRCH);
+	}
+	IPFW_WUNLOCK(ch);
+	free(ent, M_IPFW_TBL);
+	return (0);
+}
+
+static int
+flush_table_entry(struct radix_node *rn, void *arg)
+{
+	struct radix_node_head * const rnh = arg;
+	struct table_entry *ent;
+
+	ent = (struct table_entry *)
+	    rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh);
+	if (ent != NULL)
+		free(ent, M_IPFW_TBL);
+	return (0);
+}
+
+int
+ipfw_flush_table(struct ip_fw_chain *ch, uint16_t tbl)
+{
+	struct radix_node_head *rnh;
+
+	IPFW_WLOCK_ASSERT(ch);
+
+	if (tbl >= IPFW_TABLES_MAX)
+		return (EINVAL);
+	rnh = ch->tables[tbl];
+	KASSERT(rnh != NULL, ("NULL IPFW table"));
+	rnh->rnh_walktree(rnh, flush_table_entry, rnh);
+	return (0);
+}
+
+void
+ipfw_destroy_tables(struct ip_fw_chain *ch)
+{
+	uint16_t tbl;
+	struct radix_node_head *rnh;
+
+	IPFW_WLOCK_ASSERT(ch);
+
+	for (tbl = 0; tbl < IPFW_TABLES_MAX; tbl++) {
+		ipfw_flush_table(ch, tbl);
+		rnh = ch->tables[tbl];
+		rn_detachhead((void **)&rnh);
+	}
+}
+
+int
+ipfw_init_tables(struct ip_fw_chain *ch)
+{ 
+	int i;
+	uint16_t j;
+
+	for (i = 0; i < IPFW_TABLES_MAX; i++) {
+		if (!rn_inithead((void **)&ch->tables[i], KEY_OFS)) {
+			for (j = 0; j < i; j++) {
+				(void) ipfw_flush_table(ch, j);
+			}
+			return (ENOMEM);
+		}
+	}
+	return (0);
+}
+
+int
+ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
+    uint32_t *val)
+{
+	struct radix_node_head *rnh;
+	struct table_entry *ent;
+	struct sockaddr_in sa;
+
+	if (tbl >= IPFW_TABLES_MAX)
+		return (0);
+	rnh = ch->tables[tbl];
+	KEY_LEN(sa) = 8;
+	sa.sin_addr.s_addr = addr;
+	ent = (struct table_entry *)(rnh->rnh_lookup(&sa, NULL, rnh));
+	if (ent != NULL) {
+		*val = ent->value;
+		return (1);
+	}
+	return (0);
+}
+
+static int
+count_table_entry(struct radix_node *rn, void *arg)
+{
+	u_int32_t * const cnt = arg;
+
+	(*cnt)++;
+	return (0);
+}
+
+int
+ipfw_count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt)
+{
+	struct radix_node_head *rnh;
+
+	if (tbl >= IPFW_TABLES_MAX)
+		return (EINVAL);
+	rnh = ch->tables[tbl];
+	*cnt = 0;
+	rnh->rnh_walktree(rnh, count_table_entry, cnt);
+	return (0);
+}
+
+static int
+dump_table_entry(struct radix_node *rn, void *arg)
+{
+	struct table_entry * const n = (struct table_entry *)rn;
+	ipfw_table * const tbl = arg;
+	ipfw_table_entry *ent;
+
+	if (tbl->cnt == tbl->size)
+		return (1);
+	ent = &tbl->ent[tbl->cnt];
+	ent->tbl = tbl->tbl;
+	if (in_nullhost(n->mask.sin_addr))
+		ent->masklen = 0;
+	else
+		ent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr));
+	ent->addr = n->addr.sin_addr.s_addr;
+	ent->value = n->value;
+	tbl->cnt++;
+	return (0);
+}
+
+int
+ipfw_dump_table(struct ip_fw_chain *ch, ipfw_table *tbl)
+{
+	struct radix_node_head *rnh;
+
+	if (tbl->tbl >= IPFW_TABLES_MAX)
+		return (EINVAL);
+	rnh = ch->tables[tbl->tbl];
+	tbl->cnt = 0;
+	rnh->rnh_walktree(rnh, dump_table_entry, tbl);
+	return (0);
+}
+/* end of file */
diff --git a/freebsd/sys/netinet/libalias/alias.c b/freebsd/sys/netinet/libalias/alias.c
new file mode 100644
index 00000000..e5c5138d
--- /dev/null
+++ b/freebsd/sys/netinet/libalias/alias.c
@@ -0,0 +1,1793 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2001 Charles Mott <cm@linktel.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+    Alias.c provides supervisory control for the functions of the
+    packet aliasing software.  It consists of routines to monitor
+    TCP connection state, protocol-specific aliasing routines,
+    fragment handling and the following outside world functional
+    interfaces: SaveFragmentPtr, GetFragmentPtr, FragmentAliasIn,
+    PacketAliasIn and PacketAliasOut.
+
+    The other C program files are briefly described. The data
+    structure framework which holds information needed to translate
+    packets is encapsulated in alias_db.c.  Data is accessed by
+    function calls, so other segments of the program need not know
+    about the underlying data structures.  Alias_ftp.c contains
+    special code for modifying the ftp PORT command used to establish
+    data connections, while alias_irc.c does the same for IRC
+    DCC. Alias_util.c contains a few utility routines.
+
+    Version 1.0 August, 1996  (cjm)
+
+    Version 1.1 August 20, 1996  (cjm)
+	PPP host accepts incoming connections for ports 0 to 1023.
+	(Gary Roberts pointed out the need to handle incoming
+	 connections.)
+
+    Version 1.2 September 7, 1996 (cjm)
+	Fragment handling error in alias_db.c corrected.
+	(Tom Torrance helped fix this problem.)
+
+    Version 1.4 September 16, 1996 (cjm)
+	- A more generalized method for handling incoming
+	  connections, without the 0-1023 restriction, is
+	  implemented in alias_db.c
+	- Improved ICMP support in alias.c.  Traceroute
+	  packet streams can now be correctly aliased.
+	- TCP connection closing logic simplified in
+	  alias.c and now allows for additional 1 minute
+	  "grace period" after FIN or RST is observed.
+
+    Version 1.5 September 17, 1996 (cjm)
+	Corrected error in handling incoming UDP packets with 0 checksum.
+	(Tom Torrance helped fix this problem.)
+
+    Version 1.6 September 18, 1996 (cjm)
+	Simplified ICMP aliasing scheme.  Should now support
+	traceroute from Win95 as well as FreeBSD.
+
+    Version 1.7 January 9, 1997 (cjm)
+	- Out-of-order fragment handling.
+	- IP checksum error fixed for ftp transfers
+	  from aliasing host.
+	- Integer return codes added to all
+	  aliasing/de-aliasing functions.
+	- Some obsolete comments cleaned up.
+	- Differential checksum computations for
+	  IP header (TCP, UDP and ICMP were already
+	  differential).
+
+    Version 2.1 May 1997 (cjm)
+	- Added support for outgoing ICMP error
+	  messages.
+	- Added two functions PacketAliasIn2()
+	  and PacketAliasOut2() for dynamic address
+	  control (e.g. round-robin allocation of
+	  incoming packets).
+
+    Version 2.2 July 1997 (cjm)
+	- Rationalized API function names to begin
+	  with "PacketAlias..."
+	- Eliminated PacketAliasIn2() and
+	  PacketAliasOut2() as poorly conceived.
+
+    Version 2.3 Dec 1998 (dillon)
+	- Major bounds checking additions, see FreeBSD/CVS
+
+    Version 3.1 May, 2000 (salander)
+	- Added hooks to handle PPTP.
+
+    Version 3.2 July, 2000 (salander and satoh)
+	- Added PacketUnaliasOut routine.
+	- Added hooks to handle RTSP/RTP.
+
+    See HISTORY file for additional revisions.
+*/
+
+#ifdef _KERNEL
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/sysctl.h>
+#else
+#include <freebsd/sys/types.h>
+#include <freebsd/stdlib.h>
+#include <freebsd/stdio.h>
+#include <freebsd/ctype.h>
+#include <freebsd/dlfcn.h>
+#include <freebsd/errno.h>
+#include <freebsd/string.h>
+#endif
+
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_icmp.h>
+#include <freebsd/netinet/tcp.h>
+#include <freebsd/netinet/udp.h>
+
+#ifdef _KERNEL
+#include <freebsd/netinet/libalias/alias.h>
+#include <freebsd/netinet/libalias/alias_local.h>
+#include <freebsd/netinet/libalias/alias_mod.h>
+#else
+#include <freebsd/err.h>
+#include <freebsd/local/alias.h>
+#include <freebsd/local/alias_local.h>
+#include <freebsd/local/alias_mod.h>
+#endif
+
+/* 
+ * Define libalias SYSCTL Node
+ */
+#ifdef SYSCTL_NODE
+
+SYSCTL_DECL(_net_inet);
+SYSCTL_DECL(_net_inet_ip);
+SYSCTL_NODE(_net_inet_ip, OID_AUTO, alias, CTLFLAG_RW, NULL, "Libalias sysctl API");
+
+#endif
+
+static __inline int
+twowords(void *p)
+{
+	uint8_t *c = p;
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+	uint16_t s1 = ((uint16_t)c[1] << 8) + (uint16_t)c[0];
+	uint16_t s2 = ((uint16_t)c[3] << 8) + (uint16_t)c[2];
+#else
+	uint16_t s1 = ((uint16_t)c[0] << 8) + (uint16_t)c[1];
+	uint16_t s2 = ((uint16_t)c[2] << 8) + (uint16_t)c[3];
+#endif
+	return (s1 + s2);
+}
+
+/* TCP Handling Routines
+
+    TcpMonitorIn()  -- These routines monitor TCP connections, and
+    TcpMonitorOut()    delete a link when a connection is closed.
+
+These routines look for SYN, FIN and RST flags to determine when TCP
+connections open and close.  When a TCP connection closes, the data
+structure containing packet aliasing information is deleted after
+a timeout period.
+*/
+
+/* Local prototypes */
+static void	TcpMonitorIn(u_char, struct alias_link *);
+
+static void	TcpMonitorOut(u_char, struct alias_link *);
+
+
+static void
+TcpMonitorIn(u_char th_flags, struct alias_link *lnk)
+{
+
+	switch (GetStateIn(lnk)) {
+	case ALIAS_TCP_STATE_NOT_CONNECTED:
+		if (th_flags & TH_RST)
+			SetStateIn(lnk, ALIAS_TCP_STATE_DISCONNECTED);
+		else if (th_flags & TH_SYN)
+			SetStateIn(lnk, ALIAS_TCP_STATE_CONNECTED);
+		break;
+	case ALIAS_TCP_STATE_CONNECTED:
+		if (th_flags & (TH_FIN | TH_RST))
+			SetStateIn(lnk, ALIAS_TCP_STATE_DISCONNECTED);
+		break;
+	}
+}
+
+static void
+TcpMonitorOut(u_char th_flags, struct alias_link *lnk)
+{
+
+	switch (GetStateOut(lnk)) {
+	case ALIAS_TCP_STATE_NOT_CONNECTED:
+		if (th_flags & TH_RST)
+			SetStateOut(lnk, ALIAS_TCP_STATE_DISCONNECTED);
+		else if (th_flags & TH_SYN)
+			SetStateOut(lnk, ALIAS_TCP_STATE_CONNECTED);
+		break;
+	case ALIAS_TCP_STATE_CONNECTED:
+		if (th_flags & (TH_FIN | TH_RST))
+			SetStateOut(lnk, ALIAS_TCP_STATE_DISCONNECTED);
+		break;
+	}
+}
+
+
+
+
+
+/* Protocol Specific Packet Aliasing Routines
+
+    IcmpAliasIn(), IcmpAliasIn1(), IcmpAliasIn2()
+    IcmpAliasOut(), IcmpAliasOut1(), IcmpAliasOut2()
+    ProtoAliasIn(), ProtoAliasOut()
+    UdpAliasIn(), UdpAliasOut()
+    TcpAliasIn(), TcpAliasOut()
+
+These routines handle protocol specific details of packet aliasing.
+One may observe a certain amount of repetitive arithmetic in these
+functions, the purpose of which is to compute a revised checksum
+without actually summing over the entire data packet, which could be
+unnecessarily time consuming.
+
+The purpose of the packet aliasing routines is to replace the source
+address of the outgoing packet and then correctly put it back for
+any incoming packets.  For TCP and UDP, ports are also re-mapped.
+
+For ICMP echo/timestamp requests and replies, the following scheme
+is used: the ID number is replaced by an alias for the outgoing
+packet.
+
+ICMP error messages are handled by looking at the IP fragment
+in the data section of the message.
+
+For TCP and UDP protocols, a port number is chosen for an outgoing
+packet, and then incoming packets are identified by IP address and
+port numbers.  For TCP packets, there is additional logic in the event
+that sequence and ACK numbers have been altered (as in the case for
+FTP data port commands).
+
+The port numbers used by the packet aliasing module are not true
+ports in the Unix sense.  No sockets are actually bound to ports.
+They are more correctly thought of as placeholders.
+
+All packets go through the aliasing mechanism, whether they come from
+the gateway machine or other machines on a local area network.
+*/
+
+
+/* Local prototypes */
+static int	IcmpAliasIn1(struct libalias *, struct ip *);
+static int	IcmpAliasIn2(struct libalias *, struct ip *);
+static int	IcmpAliasIn(struct libalias *, struct ip *);
+
+static int	IcmpAliasOut1(struct libalias *, struct ip *, int create);
+static int	IcmpAliasOut2(struct libalias *, struct ip *);
+static int	IcmpAliasOut(struct libalias *, struct ip *, int create);
+
+static int	ProtoAliasIn(struct libalias *la, struct in_addr ip_src,
+		    struct in_addr *ip_dst, u_char ip_p, u_short *ip_sum);
+static int	ProtoAliasOut(struct libalias *la, struct in_addr *ip_src, 
+		    struct in_addr ip_dst, u_char ip_p, u_short *ip_sum, 
+		    int create);
+
+static int	UdpAliasIn(struct libalias *, struct ip *);
+static int	UdpAliasOut(struct libalias *, struct ip *, int, int create);
+
+static int	TcpAliasIn(struct libalias *, struct ip *);
+static int	TcpAliasOut(struct libalias *, struct ip *, int, int create);
+
+
+static int
+IcmpAliasIn1(struct libalias *la, struct ip *pip)
+{
+
+	LIBALIAS_LOCK_ASSERT(la);
+/*
+    De-alias incoming echo and timestamp replies.
+    Alias incoming echo and timestamp requests.
+*/
+	struct alias_link *lnk;
+	struct icmp *ic;
+
+	ic = (struct icmp *)ip_next(pip);
+
+/* Get source address from ICMP data field and restore original data */
+	lnk = FindIcmpIn(la, pip->ip_src, pip->ip_dst, ic->icmp_id, 1);
+	if (lnk != NULL) {
+		u_short original_id;
+		int accumulate;
+
+		original_id = GetOriginalPort(lnk);
+
+/* Adjust ICMP checksum */
+		accumulate = ic->icmp_id;
+		accumulate -= original_id;
+		ADJUST_CHECKSUM(accumulate, ic->icmp_cksum);
+
+/* Put original sequence number back in */
+		ic->icmp_id = original_id;
+
+/* Put original address back into IP header */
+		{
+			struct in_addr original_address;
+
+			original_address = GetOriginalAddress(lnk);
+			DifferentialChecksum(&pip->ip_sum,
+			    &original_address, &pip->ip_dst, 2);
+			pip->ip_dst = original_address;
+		}
+
+		return (PKT_ALIAS_OK);
+	}
+	return (PKT_ALIAS_IGNORED);
+}
+
+static int
+IcmpAliasIn2(struct libalias *la, struct ip *pip)
+{
+
+	LIBALIAS_LOCK_ASSERT(la);
+/*
+    Alias incoming ICMP error messages containing
+    IP header and first 64 bits of datagram.
+*/
+	struct ip *ip;
+	struct icmp *ic, *ic2;
+	struct udphdr *ud;
+	struct tcphdr *tc;
+	struct alias_link *lnk;
+
+	ic = (struct icmp *)ip_next(pip);
+	ip = &ic->icmp_ip;
+
+	ud = (struct udphdr *)ip_next(ip);
+	tc = (struct tcphdr *)ip_next(ip);
+	ic2 = (struct icmp *)ip_next(ip);
+
+	if (ip->ip_p == IPPROTO_UDP)
+		lnk = FindUdpTcpIn(la, ip->ip_dst, ip->ip_src,
+		    ud->uh_dport, ud->uh_sport,
+		    IPPROTO_UDP, 0);
+	else if (ip->ip_p == IPPROTO_TCP)
+		lnk = FindUdpTcpIn(la, ip->ip_dst, ip->ip_src,
+		    tc->th_dport, tc->th_sport,
+		    IPPROTO_TCP, 0);
+	else if (ip->ip_p == IPPROTO_ICMP) {
+		if (ic2->icmp_type == ICMP_ECHO || ic2->icmp_type == ICMP_TSTAMP)
+			lnk = FindIcmpIn(la, ip->ip_dst, ip->ip_src, ic2->icmp_id, 0);
+		else
+			lnk = NULL;
+	} else
+		lnk = NULL;
+
+	if (lnk != NULL) {
+		if (ip->ip_p == IPPROTO_UDP || ip->ip_p == IPPROTO_TCP) {
+			int accumulate, accumulate2;
+			struct in_addr original_address;
+			u_short original_port;
+
+			original_address = GetOriginalAddress(lnk);
+			original_port = GetOriginalPort(lnk);
+
+/* Adjust ICMP checksum */
+			accumulate = twowords(&ip->ip_src);
+			accumulate -= twowords(&original_address);
+			accumulate += ud->uh_sport;
+			accumulate -= original_port;
+			accumulate2 = accumulate;
+			accumulate2 += ip->ip_sum;
+			ADJUST_CHECKSUM(accumulate, ip->ip_sum);
+			accumulate2 -= ip->ip_sum;
+			ADJUST_CHECKSUM(accumulate2, ic->icmp_cksum);
+
+/* Un-alias address in IP header */
+			DifferentialChecksum(&pip->ip_sum,
+			    &original_address, &pip->ip_dst, 2);
+			pip->ip_dst = original_address;
+
+/* Un-alias address and port number of original IP packet
+fragment contained in ICMP data section */
+			ip->ip_src = original_address;
+			ud->uh_sport = original_port;
+		} else if (ip->ip_p == IPPROTO_ICMP) {
+			int accumulate, accumulate2;
+			struct in_addr original_address;
+			u_short original_id;
+
+			original_address = GetOriginalAddress(lnk);
+			original_id = GetOriginalPort(lnk);
+
+/* Adjust ICMP checksum */
+			accumulate = twowords(&ip->ip_src);
+			accumulate -= twowords(&original_address);
+			accumulate += ic2->icmp_id;
+			accumulate -= original_id;
+			accumulate2 = accumulate;
+			accumulate2 += ip->ip_sum;
+			ADJUST_CHECKSUM(accumulate, ip->ip_sum);
+			accumulate2 -= ip->ip_sum;
+			ADJUST_CHECKSUM(accumulate2, ic->icmp_cksum);
+
+/* Un-alias address in IP header */
+			DifferentialChecksum(&pip->ip_sum,
+			    &original_address, &pip->ip_dst, 2);
+			pip->ip_dst = original_address;
+
+/* Un-alias address of original IP packet and sequence number of
+   embedded ICMP datagram */
+			ip->ip_src = original_address;
+			ic2->icmp_id = original_id;
+		}
+		return (PKT_ALIAS_OK);
+	}
+	return (PKT_ALIAS_IGNORED);
+}
+
+
+static int
+IcmpAliasIn(struct libalias *la, struct ip *pip)
+{
+	int iresult;
+	struct icmp *ic;
+
+	LIBALIAS_LOCK_ASSERT(la);
+/* Return if proxy-only mode is enabled */
+	if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY)
+		return (PKT_ALIAS_OK);
+
+	ic = (struct icmp *)ip_next(pip);
+
+	iresult = PKT_ALIAS_IGNORED;
+	switch (ic->icmp_type) {
+	case ICMP_ECHOREPLY:
+	case ICMP_TSTAMPREPLY:
+		if (ic->icmp_code == 0) {
+			iresult = IcmpAliasIn1(la, pip);
+		}
+		break;
+	case ICMP_UNREACH:
+	case ICMP_SOURCEQUENCH:
+	case ICMP_TIMXCEED:
+	case ICMP_PARAMPROB:
+		iresult = IcmpAliasIn2(la, pip);
+		break;
+	case ICMP_ECHO:
+	case ICMP_TSTAMP:
+		iresult = IcmpAliasIn1(la, pip);
+		break;
+	}
+	return (iresult);
+}
+
+
+static int
+IcmpAliasOut1(struct libalias *la, struct ip *pip, int create)
+{
+/*
+    Alias outgoing echo and timestamp requests.
+    De-alias outgoing echo and timestamp replies.
+*/
+	struct alias_link *lnk;
+	struct icmp *ic;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	ic = (struct icmp *)ip_next(pip);
+
+/* Save overwritten data for when echo packet returns */
+	lnk = FindIcmpOut(la, pip->ip_src, pip->ip_dst, ic->icmp_id, create);
+	if (lnk != NULL) {
+		u_short alias_id;
+		int accumulate;
+
+		alias_id = GetAliasPort(lnk);
+
+/* Since data field is being modified, adjust ICMP checksum */
+		accumulate = ic->icmp_id;
+		accumulate -= alias_id;
+		ADJUST_CHECKSUM(accumulate, ic->icmp_cksum);
+
+/* Alias sequence number */
+		ic->icmp_id = alias_id;
+
+/* Change source address */
+		{
+			struct in_addr alias_address;
+
+			alias_address = GetAliasAddress(lnk);
+			DifferentialChecksum(&pip->ip_sum,
+			    &alias_address, &pip->ip_src, 2);
+			pip->ip_src = alias_address;
+		}
+
+		return (PKT_ALIAS_OK);
+	}
+	return (PKT_ALIAS_IGNORED);
+}
+
+
+static int
+IcmpAliasOut2(struct libalias *la, struct ip *pip)
+{
+/*
+    Alias outgoing ICMP error messages containing
+    IP header and first 64 bits of datagram.
+*/
+	struct ip *ip;
+	struct icmp *ic, *ic2;
+	struct udphdr *ud;
+	struct tcphdr *tc;
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	ic = (struct icmp *)ip_next(pip);
+	ip = &ic->icmp_ip;
+
+	ud = (struct udphdr *)ip_next(ip);
+	tc = (struct tcphdr *)ip_next(ip);
+	ic2 = (struct icmp *)ip_next(ip);
+
+	if (ip->ip_p == IPPROTO_UDP)
+		lnk = FindUdpTcpOut(la, ip->ip_dst, ip->ip_src,
+		    ud->uh_dport, ud->uh_sport,
+		    IPPROTO_UDP, 0);
+	else if (ip->ip_p == IPPROTO_TCP)
+		lnk = FindUdpTcpOut(la, ip->ip_dst, ip->ip_src,
+		    tc->th_dport, tc->th_sport,
+		    IPPROTO_TCP, 0);
+	else if (ip->ip_p == IPPROTO_ICMP) {
+		if (ic2->icmp_type == ICMP_ECHO || ic2->icmp_type == ICMP_TSTAMP)
+			lnk = FindIcmpOut(la, ip->ip_dst, ip->ip_src, ic2->icmp_id, 0);
+		else
+			lnk = NULL;
+	} else
+		lnk = NULL;
+
+	if (lnk != NULL) {
+		if (ip->ip_p == IPPROTO_UDP || ip->ip_p == IPPROTO_TCP) {
+			int accumulate;
+			struct in_addr alias_address;
+			u_short alias_port;
+
+			alias_address = GetAliasAddress(lnk);
+			alias_port = GetAliasPort(lnk);
+
+/* Adjust ICMP checksum */
+			accumulate = twowords(&ip->ip_dst);
+			accumulate -= twowords(&alias_address);
+			accumulate += ud->uh_dport;
+			accumulate -= alias_port;
+			ADJUST_CHECKSUM(accumulate, ic->icmp_cksum);
+
+/*
+ * Alias address in IP header if it comes from the host
+ * the original TCP/UDP packet was destined for.
+ */
+			if (pip->ip_src.s_addr == ip->ip_dst.s_addr) {
+				DifferentialChecksum(&pip->ip_sum,
+				    &alias_address, &pip->ip_src, 2);
+				pip->ip_src = alias_address;
+			}
+/* Alias address and port number of original IP packet
+fragment contained in ICMP data section */
+			ip->ip_dst = alias_address;
+			ud->uh_dport = alias_port;
+		} else if (ip->ip_p == IPPROTO_ICMP) {
+			int accumulate;
+			struct in_addr alias_address;
+			u_short alias_id;
+
+			alias_address = GetAliasAddress(lnk);
+			alias_id = GetAliasPort(lnk);
+
+/* Adjust ICMP checksum */
+			accumulate = twowords(&ip->ip_dst);
+			accumulate -= twowords(&alias_address);
+			accumulate += ic2->icmp_id;
+			accumulate -= alias_id;
+			ADJUST_CHECKSUM(accumulate, ic->icmp_cksum);
+
+/*
+ * Alias address in IP header if it comes from the host
+ * the original ICMP message was destined for.
+ */
+			if (pip->ip_src.s_addr == ip->ip_dst.s_addr) {
+				DifferentialChecksum(&pip->ip_sum,
+				    &alias_address, &pip->ip_src, 2);
+				pip->ip_src = alias_address;
+			}
+/* Alias address of original IP packet and sequence number of
+   embedded ICMP datagram */
+			ip->ip_dst = alias_address;
+			ic2->icmp_id = alias_id;
+		}
+		return (PKT_ALIAS_OK);
+	}
+	return (PKT_ALIAS_IGNORED);
+}
+
+
+static int
+IcmpAliasOut(struct libalias *la, struct ip *pip, int create)
+{
+	int iresult;
+	struct icmp *ic;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	(void)create;
+
+/* Return if proxy-only mode is enabled */
+	if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY)
+		return (PKT_ALIAS_OK);
+
+	ic = (struct icmp *)ip_next(pip);
+
+	iresult = PKT_ALIAS_IGNORED;
+	switch (ic->icmp_type) {
+	case ICMP_ECHO:
+	case ICMP_TSTAMP:
+		if (ic->icmp_code == 0) {
+			iresult = IcmpAliasOut1(la, pip, create);
+		}
+		break;
+	case ICMP_UNREACH:
+	case ICMP_SOURCEQUENCH:
+	case ICMP_TIMXCEED:
+	case ICMP_PARAMPROB:
+		iresult = IcmpAliasOut2(la, pip);
+		break;
+	case ICMP_ECHOREPLY:
+	case ICMP_TSTAMPREPLY:
+		iresult = IcmpAliasOut1(la, pip, create);
+	}
+	return (iresult);
+}
+
+static int
+ProtoAliasIn(struct libalias *la, struct in_addr ip_src, 
+    struct in_addr *ip_dst, u_char ip_p, u_short *ip_sum)
+{
+/*
+  Handle incoming IP packets. The
+  only thing which is done in this case is to alias
+  the dest IP address of the packet to our inside
+  machine.
+*/
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+/* Return if proxy-only mode is enabled */
+	if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY)
+		return (PKT_ALIAS_OK);
+
+	lnk = FindProtoIn(la, ip_src, *ip_dst, ip_p);
+	if (lnk != NULL) {
+		struct in_addr original_address;
+
+		original_address = GetOriginalAddress(lnk);
+
+/* Restore original IP address */
+		DifferentialChecksum(ip_sum,
+		    &original_address, ip_dst, 2);
+		*ip_dst = original_address;
+
+		return (PKT_ALIAS_OK);
+	}
+	return (PKT_ALIAS_IGNORED);
+}
+
+static int
+ProtoAliasOut(struct libalias *la, struct in_addr *ip_src, 
+    struct in_addr ip_dst, u_char ip_p, u_short *ip_sum, int create)
+{
+/*
+  Handle outgoing IP packets. The
+  only thing which is done in this case is to alias
+  the source IP address of the packet.
+*/
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	(void)create;
+
+/* Return if proxy-only mode is enabled */
+	if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY)
+		return (PKT_ALIAS_OK);
+
+	lnk = FindProtoOut(la, *ip_src, ip_dst, ip_p);
+	if (lnk != NULL) {
+		struct in_addr alias_address;
+
+		alias_address = GetAliasAddress(lnk);
+
+/* Change source address */
+		DifferentialChecksum(ip_sum,
+		    &alias_address, ip_src, 2);
+		*ip_src = alias_address;
+
+		return (PKT_ALIAS_OK);
+	}
+	return (PKT_ALIAS_IGNORED);
+}
+
+
+static int
+UdpAliasIn(struct libalias *la, struct ip *pip)
+{
+	struct udphdr *ud;
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+
+	ud = (struct udphdr *)ip_next(pip);
+
+	lnk = FindUdpTcpIn(la, pip->ip_src, pip->ip_dst,
+	    ud->uh_sport, ud->uh_dport,
+	    IPPROTO_UDP, !(la->packetAliasMode & PKT_ALIAS_PROXY_ONLY));
+	if (lnk != NULL) {
+		struct in_addr alias_address;
+		struct in_addr original_address;
+		struct in_addr proxy_address;
+		u_short alias_port;
+		u_short proxy_port;
+		int accumulate;
+		int error;
+		struct alias_data ad = {
+			.lnk = lnk, 
+			.oaddr = &original_address, 
+			.aaddr = &alias_address,
+			.aport = &alias_port,
+			.sport = &ud->uh_sport,
+			.dport = &ud->uh_dport,
+			.maxpktsize = 0
+		};
+
+		alias_address = GetAliasAddress(lnk);
+		original_address = GetOriginalAddress(lnk);
+		proxy_address = GetProxyAddress(lnk);
+		alias_port = ud->uh_dport;
+		ud->uh_dport = GetOriginalPort(lnk);
+		proxy_port = GetProxyPort(lnk);
+
+		/* Walk out chain. */		
+		error = find_handler(IN, UDP, la, pip, &ad);
+		/* If we cannot figure out the packet, ignore it. */
+		if (error < 0)
+			return (PKT_ALIAS_IGNORED);
+
+/* If UDP checksum is not zero, then adjust since destination port */
+/* is being unaliased and destination address is being altered.    */
+		if (ud->uh_sum != 0) {
+			accumulate = alias_port;
+			accumulate -= ud->uh_dport;
+			accumulate += twowords(&alias_address);
+			accumulate -= twowords(&original_address);
+
+/* If this is a proxy packet, modify checksum because of source change.*/
+        		if (proxy_port != 0) {
+		                accumulate += ud->uh_sport;
+		                accumulate -= proxy_port;
+	                }
+
+	                if (proxy_address.s_addr != 0) {
+				accumulate += twowords(&pip->ip_src);
+				accumulate -= twowords(&proxy_address);
+	                }
+
+			ADJUST_CHECKSUM(accumulate, ud->uh_sum);
+		}
+/* XXX: Could the two if's below be concatenated to one ? */
+/* Restore source port and/or address in case of proxying*/
+
+    		if (proxy_port != 0)
+        		ud->uh_sport = proxy_port;
+
+    		if (proxy_address.s_addr != 0) {
+        		DifferentialChecksum(&pip->ip_sum,
+                	    &proxy_address, &pip->ip_src, 2);
+	        	pip->ip_src = proxy_address;
+    		}
+
+/* Restore original IP address */
+		DifferentialChecksum(&pip->ip_sum,
+		    &original_address, &pip->ip_dst, 2);
+		pip->ip_dst = original_address;
+
+		return (PKT_ALIAS_OK);
+	}
+	return (PKT_ALIAS_IGNORED);
+}
+
+static int
+UdpAliasOut(struct libalias *la, struct ip *pip, int maxpacketsize, int create)
+{
+	struct udphdr *ud;
+	struct alias_link *lnk;
+	struct in_addr dest_address;
+	struct in_addr proxy_server_address;
+	u_short dest_port;
+	u_short proxy_server_port;
+	int proxy_type;
+	int error;
+
+	LIBALIAS_LOCK_ASSERT(la);
+
+/* Return if proxy-only mode is enabled and not proxyrule found.*/
+	ud = (struct udphdr *)ip_next(pip);
+	proxy_type = ProxyCheck(la, &proxy_server_address, 
+		&proxy_server_port, pip->ip_src, pip->ip_dst, 
+		ud->uh_dport, pip->ip_p);
+	if (proxy_type == 0 && (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY))
+		return (PKT_ALIAS_OK);
+
+/* If this is a transparent proxy, save original destination,
+ * then alter the destination and adjust checksums */
+	dest_port = ud->uh_dport;
+	dest_address = pip->ip_dst;
+
+	if (proxy_type != 0) {
+	        int accumulate;
+
+		accumulate = twowords(&pip->ip_dst);
+		accumulate -= twowords(&proxy_server_address);
+
+	        ADJUST_CHECKSUM(accumulate, pip->ip_sum);
+
+		if (ud->uh_sum != 0) {
+			accumulate = twowords(&pip->ip_dst);
+			accumulate -= twowords(&proxy_server_address);
+    			accumulate += ud->uh_dport;
+	        	accumulate -= proxy_server_port;
+	    		ADJUST_CHECKSUM(accumulate, ud->uh_sum);
+		}
+	        pip->ip_dst = proxy_server_address;
+	        ud->uh_dport = proxy_server_port;
+	}
+	lnk = FindUdpTcpOut(la, pip->ip_src, pip->ip_dst,
+	    ud->uh_sport, ud->uh_dport,
+	    IPPROTO_UDP, create);
+	if (lnk != NULL) {
+		u_short alias_port;
+		struct in_addr alias_address;
+		struct alias_data ad = {
+			.lnk = lnk, 
+			.oaddr = NULL,
+			.aaddr = &alias_address,
+			.aport = &alias_port,
+			.sport = &ud->uh_sport,
+			.dport = &ud->uh_dport,
+			.maxpktsize = 0
+		};
+
+/* Save original destination address, if this is a proxy packet.
+ * Also modify packet to include destination encoding.  This may
+ * change the size of IP header. */
+		if (proxy_type != 0) {
+	                SetProxyPort(lnk, dest_port);
+	                SetProxyAddress(lnk, dest_address);
+	                ProxyModify(la, lnk, pip, maxpacketsize, proxy_type);
+	                ud = (struct udphdr *)ip_next(pip);
+	        }
+
+		alias_address = GetAliasAddress(lnk);
+		alias_port = GetAliasPort(lnk);
+
+		/* Walk out chain. */		
+		error = find_handler(OUT, UDP, la, pip, &ad);
+
+/* If UDP checksum is not zero, adjust since source port is */
+/* being aliased and source address is being altered        */
+		if (ud->uh_sum != 0) {
+			int accumulate;
+
+			accumulate = ud->uh_sport;
+			accumulate -= alias_port;
+			accumulate += twowords(&pip->ip_src);
+			accumulate -= twowords(&alias_address);
+			ADJUST_CHECKSUM(accumulate, ud->uh_sum);
+		}
+/* Put alias port in UDP header */
+		ud->uh_sport = alias_port;
+
+/* Change source address */
+		DifferentialChecksum(&pip->ip_sum,
+		    &alias_address, &pip->ip_src, 2);
+		pip->ip_src = alias_address;
+
+		return (PKT_ALIAS_OK);
+	}
+	return (PKT_ALIAS_IGNORED);
+}
+
+
+
+static int
+TcpAliasIn(struct libalias *la, struct ip *pip)
+{
+	struct tcphdr *tc;
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	tc = (struct tcphdr *)ip_next(pip);
+
+	lnk = FindUdpTcpIn(la, pip->ip_src, pip->ip_dst,
+	    tc->th_sport, tc->th_dport,
+	    IPPROTO_TCP,
+	    !(la->packetAliasMode & PKT_ALIAS_PROXY_ONLY));
+	if (lnk != NULL) {
+		struct in_addr alias_address;
+		struct in_addr original_address;
+		struct in_addr proxy_address;
+		u_short alias_port;
+		u_short proxy_port;
+		int accumulate, error;
+
+		/* 
+		 * The init of MANY vars is a bit below, but aliashandlepptpin 
+		 * seems to need the destination port that came within the
+		 * packet and not the original one looks below [*].
+		 */
+
+		struct alias_data ad = {
+			.lnk = lnk, 
+			.oaddr = NULL,
+			.aaddr = NULL,
+			.aport = NULL,
+			.sport = &tc->th_sport,
+			.dport = &tc->th_dport,
+			.maxpktsize = 0
+		};
+
+		/* Walk out chain. */		
+		error = find_handler(IN, TCP, la, pip, &ad);
+
+		alias_address = GetAliasAddress(lnk);
+		original_address = GetOriginalAddress(lnk);
+		proxy_address = GetProxyAddress(lnk);
+		alias_port = tc->th_dport;
+		tc->th_dport = GetOriginalPort(lnk);
+		proxy_port = GetProxyPort(lnk);
+
+		/* 
+		 * Look above, if anyone is going to add find_handler AFTER 
+		 * this aliashandlepptpin/point, please redo alias_data too.
+		 * Uncommenting the piece here below should be enough.
+		 */
+#if 0
+				 struct alias_data ad = {
+					.lnk = lnk,
+					.oaddr = &original_address,
+					.aaddr = &alias_address,
+					.aport = &alias_port,
+					.sport = &ud->uh_sport,
+					.dport = &ud->uh_dport,
+					.maxpktsize = 0
+				};
+		
+				/* Walk out chain. */
+				error = find_handler(la, pip, &ad);
+				if (error == EHDNOF)
+					printf("Protocol handler not found\n");
+#endif
+
+/* Adjust TCP checksum since destination port is being unaliased */
+/* and destination port is being altered.                        */
+		accumulate = alias_port;
+		accumulate -= tc->th_dport;
+		accumulate += twowords(&alias_address);
+		accumulate -= twowords(&original_address);
+
+/* If this is a proxy, then modify the TCP source port and
+   checksum accumulation */
+		if (proxy_port != 0) {
+			accumulate += tc->th_sport;
+			tc->th_sport = proxy_port;
+			accumulate -= tc->th_sport;
+			accumulate += twowords(&pip->ip_src);
+			accumulate -= twowords(&proxy_address);
+		}
+/* See if ACK number needs to be modified */
+		if (GetAckModified(lnk) == 1) {
+			int delta;
+
+			tc = (struct tcphdr *)ip_next(pip);
+			delta = GetDeltaAckIn(tc->th_ack, lnk);
+			if (delta != 0) {
+				accumulate += twowords(&tc->th_ack);
+				tc->th_ack = htonl(ntohl(tc->th_ack) - delta);
+				accumulate -= twowords(&tc->th_ack);
+			}
+		}
+		ADJUST_CHECKSUM(accumulate, tc->th_sum);
+
+/* Restore original IP address */
+		accumulate = twowords(&pip->ip_dst);
+		pip->ip_dst = original_address;
+		accumulate -= twowords(&pip->ip_dst);
+
+/* If this is a transparent proxy packet, then modify the source
+   address */
+		if (proxy_address.s_addr != 0) {
+			accumulate += twowords(&pip->ip_src);
+			pip->ip_src = proxy_address;
+			accumulate -= twowords(&pip->ip_src);
+		}
+		ADJUST_CHECKSUM(accumulate, pip->ip_sum);
+
+/* Monitor TCP connection state */
+		tc = (struct tcphdr *)ip_next(pip);
+		TcpMonitorIn(tc->th_flags, lnk);
+
+		return (PKT_ALIAS_OK);
+	}
+	return (PKT_ALIAS_IGNORED);
+}
+
+static int
+TcpAliasOut(struct libalias *la, struct ip *pip, int maxpacketsize, int create)
+{
+	int proxy_type, error;
+	u_short dest_port;
+	u_short proxy_server_port;
+	struct in_addr dest_address;
+	struct in_addr proxy_server_address;
+	struct tcphdr *tc;
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	tc = (struct tcphdr *)ip_next(pip);
+
+	if (create)
+		proxy_type = ProxyCheck(la, &proxy_server_address, 
+		    &proxy_server_port, pip->ip_src, pip->ip_dst, 
+		    tc->th_dport, pip->ip_p);
+	else
+		proxy_type = 0;
+
+	if (proxy_type == 0 && (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY))
+		return (PKT_ALIAS_OK);
+
+/* If this is a transparent proxy, save original destination,
+   then alter the destination and adjust checksums */
+	dest_port = tc->th_dport;
+	dest_address = pip->ip_dst;
+	if (proxy_type != 0) {
+		int accumulate;
+
+		accumulate = tc->th_dport;
+		tc->th_dport = proxy_server_port;
+		accumulate -= tc->th_dport;
+		accumulate += twowords(&pip->ip_dst);
+		accumulate -= twowords(&proxy_server_address);
+		ADJUST_CHECKSUM(accumulate, tc->th_sum);
+
+		accumulate = twowords(&pip->ip_dst);
+		pip->ip_dst = proxy_server_address;
+		accumulate -= twowords(&pip->ip_dst);
+		ADJUST_CHECKSUM(accumulate, pip->ip_sum);
+	}
+	lnk = FindUdpTcpOut(la, pip->ip_src, pip->ip_dst,
+	    tc->th_sport, tc->th_dport,
+	    IPPROTO_TCP, create);
+	if (lnk == NULL)
+		return (PKT_ALIAS_IGNORED);
+	if (lnk != NULL) {
+		u_short alias_port;
+		struct in_addr alias_address;
+		int accumulate;
+		struct alias_data ad = {
+			.lnk = lnk, 
+			.oaddr = NULL,
+			.aaddr = &alias_address,
+			.aport = &alias_port,
+			.sport = &tc->th_sport,
+			.dport = &tc->th_dport,
+			.maxpktsize = maxpacketsize
+		};
+
+/* Save original destination address, if this is a proxy packet.
+   Also modify packet to include destination encoding.  This may
+   change the size of IP header. */
+		if (proxy_type != 0) {
+			SetProxyPort(lnk, dest_port);
+			SetProxyAddress(lnk, dest_address);
+			ProxyModify(la, lnk, pip, maxpacketsize, proxy_type);
+			tc = (struct tcphdr *)ip_next(pip);
+		}
+/* Get alias address and port */
+		alias_port = GetAliasPort(lnk);
+		alias_address = GetAliasAddress(lnk);
+
+/* Monitor TCP connection state */
+		tc = (struct tcphdr *)ip_next(pip);
+		TcpMonitorOut(tc->th_flags, lnk);
+		
+		/* Walk out chain. */		
+		error = find_handler(OUT, TCP, la, pip, &ad);
+
+/* Adjust TCP checksum since source port is being aliased */
+/* and source address is being altered                    */
+		accumulate = tc->th_sport;
+		tc->th_sport = alias_port;
+		accumulate -= tc->th_sport;
+		accumulate += twowords(&pip->ip_src);
+		accumulate -= twowords(&alias_address);
+
+/* Modify sequence number if necessary */
+		if (GetAckModified(lnk) == 1) {
+			int delta;
+			
+			tc = (struct tcphdr *)ip_next(pip);
+			delta = GetDeltaSeqOut(tc->th_seq, lnk);
+			if (delta != 0) {
+				accumulate += twowords(&tc->th_seq);
+				tc->th_seq = htonl(ntohl(tc->th_seq) + delta);
+				accumulate -= twowords(&tc->th_seq);
+			}
+		}
+		ADJUST_CHECKSUM(accumulate, tc->th_sum);
+
+/* Change source address */
+		accumulate = twowords(&pip->ip_src);
+		pip->ip_src = alias_address;
+		accumulate -= twowords(&pip->ip_src);
+		ADJUST_CHECKSUM(accumulate, pip->ip_sum);
+
+		return (PKT_ALIAS_OK);
+	}
+	return (PKT_ALIAS_IGNORED);
+}
+
+
+
+
+/* Fragment Handling
+
+    FragmentIn()
+    FragmentOut()
+
+The packet aliasing module has a limited ability for handling IP
+fragments.  If the ICMP, TCP or UDP header is in the first fragment
+received, then the ID number of the IP packet is saved, and other
+fragments are identified according to their ID number and IP address
+they were sent from.  Pointers to unresolved fragments can also be
+saved and recalled when a header fragment is seen.
+*/
+
+/* Local prototypes */
+static int	FragmentIn(struct libalias *la, struct in_addr ip_src, 
+		    struct in_addr *ip_dst, u_short ip_id, u_short *ip_sum);		    
+static int	FragmentOut(struct libalias *, struct in_addr *ip_src, 
+		    u_short *ip_sum);
+
+static int
+FragmentIn(struct libalias *la, struct in_addr ip_src, struct in_addr *ip_dst,
+    u_short ip_id, u_short *ip_sum)
+{
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	lnk = FindFragmentIn2(la, ip_src, *ip_dst, ip_id);
+	if (lnk != NULL) {
+		struct in_addr original_address;
+
+		GetFragmentAddr(lnk, &original_address);
+		DifferentialChecksum(ip_sum,
+		    &original_address, ip_dst, 2);
+		*ip_dst = original_address;
+
+		return (PKT_ALIAS_OK);
+	}
+	return (PKT_ALIAS_UNRESOLVED_FRAGMENT);
+}
+
+static int
+FragmentOut(struct libalias *la, struct in_addr *ip_src, u_short *ip_sum)
+{
+	struct in_addr alias_address;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	alias_address = FindAliasAddress(la, *ip_src);
+	DifferentialChecksum(ip_sum,
+	    &alias_address, ip_src, 2);
+	*ip_src = alias_address;
+
+	return (PKT_ALIAS_OK);
+}
+
+
+
+
+
+
+/* Outside World Access
+
+	PacketAliasSaveFragment()
+	PacketAliasGetFragment()
+	PacketAliasFragmentIn()
+	PacketAliasIn()
+	PacketAliasOut()
+	PacketUnaliasOut()
+
+(prototypes in alias.h)
+*/
+
+int
+LibAliasSaveFragment(struct libalias *la, char *ptr)
+{
+	int iresult;
+	struct alias_link *lnk;
+	struct ip *pip;
+
+	LIBALIAS_LOCK(la);
+	pip = (struct ip *)ptr;
+	lnk = AddFragmentPtrLink(la, pip->ip_src, pip->ip_id);
+	iresult = PKT_ALIAS_ERROR;
+	if (lnk != NULL) {
+		SetFragmentPtr(lnk, ptr);
+		iresult = PKT_ALIAS_OK;
+	}
+	LIBALIAS_UNLOCK(la);
+	return (iresult);
+}
+
+char           *
+LibAliasGetFragment(struct libalias *la, char *ptr)
+{
+	struct alias_link *lnk;
+	char *fptr;
+	struct ip *pip;
+
+	LIBALIAS_LOCK(la);
+	pip = (struct ip *)ptr;
+	lnk = FindFragmentPtr(la, pip->ip_src, pip->ip_id);
+	if (lnk != NULL) {
+		GetFragmentPtr(lnk, &fptr);
+		SetFragmentPtr(lnk, NULL);
+		SetExpire(lnk, 0);	/* Deletes link */
+	} else		
+		fptr = NULL;
+
+	LIBALIAS_UNLOCK(la);
+	return (fptr);
+}
+
+void
+LibAliasFragmentIn(struct libalias *la, char *ptr,	/* Points to correctly
+							 * de-aliased header
+							 * fragment */
+    char *ptr_fragment		/* Points to fragment which must be
+				 * de-aliased   */
+)
+{
+	struct ip *pip;
+	struct ip *fpip;
+
+	LIBALIAS_LOCK(la);
+	(void)la;
+	pip = (struct ip *)ptr;
+	fpip = (struct ip *)ptr_fragment;
+
+	DifferentialChecksum(&fpip->ip_sum,
+	    &pip->ip_dst, &fpip->ip_dst, 2);
+	fpip->ip_dst = pip->ip_dst;
+	LIBALIAS_UNLOCK(la);
+}
+
+/* Local prototypes */
+static int
+LibAliasOutLocked(struct libalias *la, char *ptr,
+		  int maxpacketsize, int create);
+static int
+LibAliasInLocked(struct libalias *la, char *ptr,
+		  int maxpacketsize);
+
+int
+LibAliasIn(struct libalias *la, char *ptr, int maxpacketsize)
+{
+	int res;
+
+	LIBALIAS_LOCK(la);
+	res = LibAliasInLocked(la, ptr, maxpacketsize);
+	LIBALIAS_UNLOCK(la);
+	return (res);
+}
+
+static int
+LibAliasInLocked(struct libalias *la, char *ptr, int maxpacketsize)
+{
+	struct in_addr alias_addr;
+	struct ip *pip;
+	int iresult;
+
+	if (la->packetAliasMode & PKT_ALIAS_REVERSE) {
+		la->packetAliasMode &= ~PKT_ALIAS_REVERSE;
+		iresult = LibAliasOutLocked(la, ptr, maxpacketsize, 1);
+		la->packetAliasMode |= PKT_ALIAS_REVERSE;
+		goto getout;
+	}
+	HouseKeeping(la);
+	ClearCheckNewLink(la);
+	pip = (struct ip *)ptr;
+	alias_addr = pip->ip_dst;
+
+	/* Defense against mangled packets */
+	if (ntohs(pip->ip_len) > maxpacketsize
+	    || (pip->ip_hl << 2) > maxpacketsize) {
+		iresult = PKT_ALIAS_IGNORED; 
+		goto getout;
+	}
+
+	iresult = PKT_ALIAS_IGNORED;
+	if ((ntohs(pip->ip_off) & IP_OFFMASK) == 0) {
+		switch (pip->ip_p) {
+		case IPPROTO_ICMP:
+			iresult = IcmpAliasIn(la, pip);
+			break;
+		case IPPROTO_UDP:
+			iresult = UdpAliasIn(la, pip);
+			break;
+		case IPPROTO_TCP:
+			iresult = TcpAliasIn(la, pip);
+			break;
+#ifdef _KERNEL
+		case IPPROTO_SCTP:
+		  iresult = SctpAlias(la, pip, SN_TO_LOCAL);
+			break;
+#endif
+ 		case IPPROTO_GRE: {
+			int error;
+			struct alias_data ad = {
+				.lnk = NULL, 
+				.oaddr = NULL, 
+				.aaddr = NULL,
+				.aport = NULL,
+				.sport = NULL,
+				.dport = NULL,
+				.maxpktsize = 0                  
+			};
+			
+			/* Walk out chain. */		
+			error = find_handler(IN, IP, la, pip, &ad);
+			if (error ==  0)
+				iresult = PKT_ALIAS_OK;
+			else
+				iresult = ProtoAliasIn(la, pip->ip_src, 
+				    &pip->ip_dst, pip->ip_p, &pip->ip_sum);
+		}
+ 			break; 
+		default:
+			iresult = ProtoAliasIn(la, pip->ip_src, &pip->ip_dst,
+			    pip->ip_p, &pip->ip_sum);
+			break;
+		}
+
+		if (ntohs(pip->ip_off) & IP_MF) {
+			struct alias_link *lnk;
+
+			lnk = FindFragmentIn1(la, pip->ip_src, alias_addr, pip->ip_id);
+			if (lnk != NULL) {
+				iresult = PKT_ALIAS_FOUND_HEADER_FRAGMENT;
+				SetFragmentAddr(lnk, pip->ip_dst);
+			} else {
+				iresult = PKT_ALIAS_ERROR;
+			}
+		}
+	} else {
+		iresult = FragmentIn(la, pip->ip_src, &pip->ip_dst, pip->ip_id,
+		    &pip->ip_sum);
+	}
+
+getout:
+	return (iresult);
+}
+
+
+
+/* Unregistered address ranges */
+
+/* 10.0.0.0   ->   10.255.255.255 */
+#define UNREG_ADDR_A_LOWER 0x0a000000
+#define UNREG_ADDR_A_UPPER 0x0affffff
+
+/* 172.16.0.0  ->  172.31.255.255 */
+#define UNREG_ADDR_B_LOWER 0xac100000
+#define UNREG_ADDR_B_UPPER 0xac1fffff
+
+/* 192.168.0.0 -> 192.168.255.255 */
+#define UNREG_ADDR_C_LOWER 0xc0a80000
+#define UNREG_ADDR_C_UPPER 0xc0a8ffff
+
+int
+LibAliasOut(struct libalias *la, char *ptr, int maxpacketsize)
+{
+	int res;
+
+	LIBALIAS_LOCK(la);
+	res = LibAliasOutLocked(la, ptr, maxpacketsize, 1);
+	LIBALIAS_UNLOCK(la);
+	return (res);
+}
+
+int
+LibAliasOutTry(struct libalias *la, char *ptr, int maxpacketsize, int create)
+{
+	int res;
+
+	LIBALIAS_LOCK(la);
+	res = LibAliasOutLocked(la, ptr, maxpacketsize, create);
+	LIBALIAS_UNLOCK(la);
+	return (res);
+}
+
+static int
+LibAliasOutLocked(struct libalias *la, char *ptr,	/* valid IP packet */
+    int maxpacketsize,		/* How much the packet data may grow (FTP
+				 * and IRC inline changes) */
+    int create                  /* Create new entries ? */
+)
+{
+	int iresult;
+	struct in_addr addr_save;
+	struct ip *pip;
+
+	if (la->packetAliasMode & PKT_ALIAS_REVERSE) {
+		la->packetAliasMode &= ~PKT_ALIAS_REVERSE;
+		iresult = LibAliasInLocked(la, ptr, maxpacketsize);
+		la->packetAliasMode |= PKT_ALIAS_REVERSE;
+		goto getout;
+	}
+	HouseKeeping(la);
+	ClearCheckNewLink(la);
+	pip = (struct ip *)ptr;
+
+	/* Defense against mangled packets */
+	if (ntohs(pip->ip_len) > maxpacketsize
+	    || (pip->ip_hl << 2) > maxpacketsize) {
+		iresult = PKT_ALIAS_IGNORED;
+		goto getout;
+	}
+
+	addr_save = GetDefaultAliasAddress(la);
+	if (la->packetAliasMode & PKT_ALIAS_UNREGISTERED_ONLY) {
+		u_long addr;
+		int iclass;
+
+		iclass = 0;
+		addr = ntohl(pip->ip_src.s_addr);
+		if (addr >= UNREG_ADDR_C_LOWER && addr <= UNREG_ADDR_C_UPPER)
+			iclass = 3;
+		else if (addr >= UNREG_ADDR_B_LOWER && addr <= UNREG_ADDR_B_UPPER)
+			iclass = 2;
+		else if (addr >= UNREG_ADDR_A_LOWER && addr <= UNREG_ADDR_A_UPPER)
+			iclass = 1;
+
+		if (iclass == 0) {
+			SetDefaultAliasAddress(la, pip->ip_src);
+		}
+	} else if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY) {
+		SetDefaultAliasAddress(la, pip->ip_src);
+	}
+	iresult = PKT_ALIAS_IGNORED;
+	if ((ntohs(pip->ip_off) & IP_OFFMASK) == 0) {
+		switch (pip->ip_p) {
+		case IPPROTO_ICMP:
+			iresult = IcmpAliasOut(la, pip, create);
+			break;
+		case IPPROTO_UDP:
+			iresult = UdpAliasOut(la, pip, maxpacketsize, create);
+			break;
+		case IPPROTO_TCP:
+			iresult = TcpAliasOut(la, pip, maxpacketsize, create);
+			break;
+#ifdef _KERNEL
+		case IPPROTO_SCTP:
+		  iresult = SctpAlias(la, pip, SN_TO_GLOBAL);
+			break;
+#endif
+		case IPPROTO_GRE: {
+			int error;
+			struct alias_data ad = {
+				.lnk = NULL, 
+				.oaddr = NULL, 
+				.aaddr = NULL,
+				.aport = NULL,
+				.sport = NULL,
+				.dport = NULL,
+				.maxpktsize = 0                  
+			};
+			/* Walk out chain. */		
+			error = find_handler(OUT, IP, la, pip, &ad);
+			if (error == 0)
+ 				iresult = PKT_ALIAS_OK;
+ 			else
+ 				iresult = ProtoAliasOut(la, &pip->ip_src, 
+				    pip->ip_dst, pip->ip_p, &pip->ip_sum, create);
+		}
+ 			break;
+		default:
+			iresult = ProtoAliasOut(la, &pip->ip_src,
+			    pip->ip_dst, pip->ip_p, &pip->ip_sum, create);
+			break;
+		}
+	} else {
+		iresult = FragmentOut(la, &pip->ip_src, &pip->ip_sum);
+	}
+
+	SetDefaultAliasAddress(la, addr_save);
+getout:
+	return (iresult);
+}
+
+int
+LibAliasUnaliasOut(struct libalias *la, char *ptr,	/* valid IP packet */
+    int maxpacketsize		/* for error checking */
+)
+{
+	struct ip *pip;
+	struct icmp *ic;
+	struct udphdr *ud;
+	struct tcphdr *tc;
+	struct alias_link *lnk;
+	int iresult = PKT_ALIAS_IGNORED;
+
+	LIBALIAS_LOCK(la);
+	pip = (struct ip *)ptr;
+
+	/* Defense against mangled packets */
+	if (ntohs(pip->ip_len) > maxpacketsize
+	    || (pip->ip_hl << 2) > maxpacketsize)
+		goto getout;
+
+	ud = (struct udphdr *)ip_next(pip);
+	tc = (struct tcphdr *)ip_next(pip);
+	ic = (struct icmp *)ip_next(pip);
+
+	/* Find a link */
+	if (pip->ip_p == IPPROTO_UDP)
+		lnk = FindUdpTcpIn(la, pip->ip_dst, pip->ip_src,
+		    ud->uh_dport, ud->uh_sport,
+		    IPPROTO_UDP, 0);
+	else if (pip->ip_p == IPPROTO_TCP)
+		lnk = FindUdpTcpIn(la, pip->ip_dst, pip->ip_src,
+		    tc->th_dport, tc->th_sport,
+		    IPPROTO_TCP, 0);
+	else if (pip->ip_p == IPPROTO_ICMP)
+		lnk = FindIcmpIn(la, pip->ip_dst, pip->ip_src, ic->icmp_id, 0);
+	else
+		lnk = NULL;
+
+	/* Change it from an aliased packet to an unaliased packet */
+	if (lnk != NULL) {
+		if (pip->ip_p == IPPROTO_UDP || pip->ip_p == IPPROTO_TCP) {
+			int accumulate;
+			struct in_addr original_address;
+			u_short original_port;
+
+			original_address = GetOriginalAddress(lnk);
+			original_port = GetOriginalPort(lnk);
+
+			/* Adjust TCP/UDP checksum */
+			accumulate = twowords(&pip->ip_src);
+			accumulate -= twowords(&original_address);
+
+			if (pip->ip_p == IPPROTO_UDP) {
+				accumulate += ud->uh_sport;
+				accumulate -= original_port;
+				ADJUST_CHECKSUM(accumulate, ud->uh_sum);
+			} else {
+				accumulate += tc->th_sport;
+				accumulate -= original_port;
+				ADJUST_CHECKSUM(accumulate, tc->th_sum);
+			}
+
+			/* Adjust IP checksum */
+			DifferentialChecksum(&pip->ip_sum,
+			    &original_address, &pip->ip_src, 2);
+
+			/* Un-alias source address and port number */
+			pip->ip_src = original_address;
+			if (pip->ip_p == IPPROTO_UDP)
+				ud->uh_sport = original_port;
+			else
+				tc->th_sport = original_port;
+
+			iresult = PKT_ALIAS_OK;
+
+		} else if (pip->ip_p == IPPROTO_ICMP) {
+
+			int accumulate;
+			struct in_addr original_address;
+			u_short original_id;
+
+			original_address = GetOriginalAddress(lnk);
+			original_id = GetOriginalPort(lnk);
+
+			/* Adjust ICMP checksum */
+			accumulate = twowords(&pip->ip_src);
+			accumulate -= twowords(&original_address);
+			accumulate += ic->icmp_id;
+			accumulate -= original_id;
+			ADJUST_CHECKSUM(accumulate, ic->icmp_cksum);
+
+			/* Adjust IP checksum */
+			DifferentialChecksum(&pip->ip_sum,
+			    &original_address, &pip->ip_src, 2);
+
+			/* Un-alias source address and port number */
+			pip->ip_src = original_address;
+			ic->icmp_id = original_id;
+
+			iresult = PKT_ALIAS_OK;
+		}
+	}
+getout:
+	LIBALIAS_UNLOCK(la);
+	return (iresult);
+
+}
+
+#ifndef _KERNEL
+
+int
+LibAliasRefreshModules(void)
+{
+	char buf[256], conf[] = "/etc/libalias.conf";
+	FILE *fd;
+	int i, len;
+
+	fd = fopen(conf, "r");
+	if (fd == NULL)
+		err(1, "fopen(%s)", conf);
+
+	LibAliasUnLoadAllModule();
+
+	for (;;) {
+		fgets(buf, 256, fd);
+		if (feof(fd)) 
+		        break;
+		len = strlen(buf);
+		if (len > 1) {
+			for (i = 0; i < len; i++)
+				if (!isspace(buf[i]))
+					break;
+			if (buf[i] == '#')
+				continue;
+			buf[len - 1] = '\0';
+			LibAliasLoadModule(buf);
+		}
+	}
+	fclose(fd);
+	return (0);
+}
+
+int
+LibAliasLoadModule(char *path)
+{
+	struct dll *t;
+	void *handle;
+	struct proto_handler *m;
+        const char *error;
+	moduledata_t *p;
+
+        handle = dlopen (path, RTLD_LAZY);
+        if (!handle) {
+		fprintf(stderr, "%s\n", dlerror());
+		return (EINVAL);
+        }
+
+	p = dlsym(handle, "alias_mod");
+        if ((error = dlerror()) != NULL)  {
+		fprintf(stderr, "%s\n", dlerror());
+		return (EINVAL);
+        }
+
+	t = malloc(sizeof(struct dll));
+	if (t == NULL)
+		return (ENOMEM);
+	strncpy(t->name, p->name, DLL_LEN);
+	t->handle = handle;
+	if (attach_dll(t) == EEXIST) {
+		free(t);
+		fprintf(stderr, "dll conflict\n");
+		return (EEXIST);
+	}
+
+        m = dlsym(t->handle, "handlers");
+        if ((error = dlerror()) != NULL)  {
+		fprintf(stderr, "%s\n", error);
+		return (EINVAL);
+	}
+
+	LibAliasAttachHandlers(m);
+	return (0);
+}
+
+int
+LibAliasUnLoadAllModule(void)
+{
+	struct dll *t;
+	struct proto_handler *p;
+
+	/* Unload all modules then reload everything. */
+	while ((p = first_handler()) != NULL) {	
+		detach_handler(p);
+	}
+	while ((t = walk_dll_chain()) != NULL) {	
+		dlclose(t->handle);
+		free(t);
+	}
+	return (1);
+}
+
+#endif
+
+#ifdef _KERNEL
+/*
+ * m_megapullup() - this function is a big hack.
+ * Thankfully, it's only used in ng_nat and ipfw+nat.
+ *
+ * It allocates an mbuf with cluster and copies the specified part of the chain
+ * into cluster, so that it is all contiguous and can be accessed via a plain
+ * (char *) pointer. This is required, because libalias doesn't know how to
+ * handle mbuf chains.
+ *
+ * On success, m_megapullup returns an mbuf (possibly with cluster) containing
+ * the input packet, on failure NULL. The input packet is always consumed.
+ */
+struct mbuf *
+m_megapullup(struct mbuf *m, int len) {
+	struct mbuf *mcl;
+	
+	if (len > m->m_pkthdr.len)
+		goto bad;
+	
+	/* Do not reallocate packet if it is sequentional,
+	 * writable and has some extra space for expansion.
+	 * XXX: Constant 100bytes is completely empirical. */
+#define	RESERVE 100
+	if (m->m_next == NULL && M_WRITABLE(m) && M_TRAILINGSPACE(m) >= RESERVE)
+		return (m);
+
+	if (len <= MCLBYTES - RESERVE) {
+		mcl = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+	} else if (len < MJUM16BYTES) {
+		int size;
+		if (len <= MJUMPAGESIZE - RESERVE) {
+			size = MJUMPAGESIZE;
+		} else if (len <= MJUM9BYTES - RESERVE) {
+			size = MJUM9BYTES;
+		} else {
+			size = MJUM16BYTES;
+		};
+		mcl = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, size);
+	} else {
+		goto bad;
+	}
+	if (mcl == NULL)
+		goto bad;
+ 
+	m_move_pkthdr(mcl, m);
+	m_copydata(m, 0, len, mtod(mcl, caddr_t));
+	mcl->m_len = mcl->m_pkthdr.len = len;
+	m_freem(m);
+ 
+	return (mcl);
+bad:
+	m_freem(m);
+	return (NULL);
+}
+#endif
diff --git a/freebsd/sys/netinet/libalias/alias.h b/freebsd/sys/netinet/libalias/alias.h
new file mode 100644
index 00000000..f835e1b7
--- /dev/null
+++ b/freebsd/sys/netinet/libalias/alias.h
@@ -0,0 +1,232 @@
+/* lint -save -library Flexelint comment for external headers */
+
+/*-
+ * Copyright (c) 2001 Charles Mott <cm@linktel.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Alias.h defines the outside world interfaces for the packet aliasing
+ * software.
+ *
+ * This software is placed into the public domain with no restrictions on its
+ * distribution.
+ */
+
+#ifndef _ALIAS_HH_
+#define	_ALIAS_HH_
+
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip.h>
+
+#define LIBALIAS_BUF_SIZE 128
+#ifdef	_KERNEL
+/*
+ * The kernel version of libalias does not support these features.
+ */
+#define	NO_FW_PUNCH
+#define	NO_USE_SOCKETS
+#endif
+
+/*
+ * The external interface to libalias, the packet aliasing engine.
+ *
+ * There are two sets of functions:
+ *
+ * PacketAlias*() the old API which doesn't take an instance pointer
+ * and therefore can only have one packet engine at a time.
+ *
+ * LibAlias*() the new API which takes as first argument a pointer to
+ * the instance of the packet aliasing engine.
+ *
+ * The functions otherwise correspond to each other one for one, except
+ * for the LibAliasUnaliasOut()/PacketUnaliasOut() function which were
+ * were misnamed in the old API.
+ */
+
+/*
+ * The instance structure
+ */
+struct libalias;
+
+/*
+ * An anonymous structure, a pointer to which is returned from
+ * PacketAliasRedirectAddr(), PacketAliasRedirectPort() or
+ * PacketAliasRedirectProto(), passed to PacketAliasAddServer(),
+ * and freed by PacketAliasRedirectDelete().
+ */
+struct alias_link;
+
+/* Initialization and control functions. */
+struct libalias *LibAliasInit(struct libalias *);
+void		LibAliasSetAddress(struct libalias *, struct in_addr _addr);
+void		LibAliasSetFWBase(struct libalias *, unsigned int _base, unsigned int _num);
+void		LibAliasSetSkinnyPort(struct libalias *, unsigned int _port);
+unsigned int
+		LibAliasSetMode(struct libalias *, unsigned int _flags, unsigned int _mask);
+void		LibAliasUninit(struct libalias *);
+
+/* Packet Handling functions. */
+int		LibAliasIn (struct libalias *, char *_ptr, int _maxpacketsize);
+int		LibAliasOut(struct libalias *, char *_ptr, int _maxpacketsize);
+int		LibAliasOutTry(struct libalias *, char *_ptr, int _maxpacketsize, int _create);
+int		LibAliasUnaliasOut(struct libalias *, char *_ptr, int _maxpacketsize);
+
+/* Port and address redirection functions. */
+
+int
+LibAliasAddServer(struct libalias *, struct alias_link *_lnk,
+    struct in_addr _addr, unsigned short _port);
+struct alias_link *
+LibAliasRedirectAddr(struct libalias *, struct in_addr _src_addr,
+    struct in_addr _alias_addr);
+int		LibAliasRedirectDynamic(struct libalias *, struct alias_link *_lnk);
+void		LibAliasRedirectDelete(struct libalias *, struct alias_link *_lnk);
+struct alias_link *
+LibAliasRedirectPort(struct libalias *, struct in_addr _src_addr,
+    unsigned short _src_port, struct in_addr _dst_addr,
+    unsigned short _dst_port, struct in_addr _alias_addr,
+    unsigned short _alias_port, unsigned char _proto);
+struct alias_link *
+LibAliasRedirectProto(struct libalias *, struct in_addr _src_addr,
+    struct in_addr _dst_addr, struct in_addr _alias_addr,
+    unsigned char _proto);
+
+/* Fragment Handling functions. */
+void		LibAliasFragmentIn(struct libalias *, char *_ptr, char *_ptr_fragment);
+char           *LibAliasGetFragment(struct libalias *, char *_ptr);
+int		LibAliasSaveFragment(struct libalias *, char *_ptr);
+
+/* Miscellaneous functions. */
+int		LibAliasCheckNewLink(struct libalias *);
+unsigned short
+		LibAliasInternetChecksum(struct libalias *, unsigned short *_ptr, int _nbytes);
+void		LibAliasSetTarget(struct libalias *, struct in_addr _target_addr);
+
+/* Transparent proxying routines. */
+int		LibAliasProxyRule(struct libalias *, const char *_cmd);
+
+/* Module handling API */
+int             LibAliasLoadModule(char *);
+int             LibAliasUnLoadAllModule(void);
+int             LibAliasRefreshModules(void);
+
+/* Mbuf helper function. */
+struct mbuf    *m_megapullup(struct mbuf *, int);
+
+/*
+ * Mode flags and other constants.
+ */
+
+
+/* Mode flags, set using PacketAliasSetMode() */
+
+/*
+ * If PKT_ALIAS_LOG is set, a message will be printed to /var/log/alias.log
+ * every time a link is created or deleted.  This is useful for debugging.
+ */
+#define	PKT_ALIAS_LOG			0x01
+
+/*
+ * If PKT_ALIAS_DENY_INCOMING is set, then incoming connections (e.g. to ftp,
+ * telnet or web servers will be prevented by the aliasing mechanism.
+ */
+#define	PKT_ALIAS_DENY_INCOMING		0x02
+
+/*
+ * If PKT_ALIAS_SAME_PORTS is set, packets will be attempted sent from the
+ * same port as they originated on.  This allows e.g. rsh to work *99% of the
+ * time*, but _not_ 100% (it will be slightly flakey instead of not working
+ * at all).  This mode bit is set by PacketAliasInit(), so it is a default
+ * mode of operation.
+ */
+#define	PKT_ALIAS_SAME_PORTS		0x04
+
+/*
+ * If PKT_ALIAS_USE_SOCKETS is set, then when partially specified links (e.g.
+ * destination port and/or address is zero), the packet aliasing engine will
+ * attempt to allocate a socket for the aliasing port it chooses.  This will
+ * avoid interference with the host machine.  Fully specified links do not
+ * require this.  This bit is set after a call to PacketAliasInit(), so it is
+ * a default mode of operation.
+ */
+#ifndef	NO_USE_SOCKETS
+#define	PKT_ALIAS_USE_SOCKETS		0x08
+#endif
+/*-
+ * If PKT_ALIAS_UNREGISTERED_ONLY is set, then only packets with
+ * unregistered source addresses will be aliased.  Private
+ * addresses are those in the following ranges:
+ *
+ *		10.0.0.0     ->   10.255.255.255
+ *		172.16.0.0   ->   172.31.255.255
+ *		192.168.0.0  ->   192.168.255.255
+ */
+#define	PKT_ALIAS_UNREGISTERED_ONLY	0x10
+
+/*
+ * If PKT_ALIAS_RESET_ON_ADDR_CHANGE is set, then the table of dynamic
+ * aliasing links will be reset whenever PacketAliasSetAddress() changes the
+ * default aliasing address.  If the default aliasing address is left
+ * unchanged by this function call, then the table of dynamic aliasing links
+ * will be left intact.  This bit is set after a call to PacketAliasInit().
+ */
+#define	PKT_ALIAS_RESET_ON_ADDR_CHANGE	0x20
+
+#ifndef NO_FW_PUNCH
+/*
+ * If PKT_ALIAS_PUNCH_FW is set, active FTP and IRC DCC connections will
+ * create a 'hole' in the firewall to allow the transfers to work.  The
+ * ipfw rule number that the hole is created with is controlled by
+ * PacketAliasSetFWBase().  The hole will be attached to that
+ * particular alias_link, so when the link goes away the hole is deleted.
+ */
+#define	PKT_ALIAS_PUNCH_FW		0x100
+#endif
+
+/*
+ * If PKT_ALIAS_PROXY_ONLY is set, then NAT will be disabled and only
+ * transparent proxying is performed.
+ */
+#define	PKT_ALIAS_PROXY_ONLY		0x40
+
+/*
+ * If PKT_ALIAS_REVERSE is set, the actions of PacketAliasIn() and
+ * PacketAliasOut() are reversed.
+ */
+#define	PKT_ALIAS_REVERSE		0x80
+
+/* Function return codes. */
+#define	PKT_ALIAS_ERROR			-1
+#define	PKT_ALIAS_OK			1
+#define	PKT_ALIAS_IGNORED		2
+#define	PKT_ALIAS_UNRESOLVED_FRAGMENT	3
+#define	PKT_ALIAS_FOUND_HEADER_FRAGMENT	4
+
+#endif				/* !_ALIAS_HH_ */
+
+/* lint -restore */
diff --git a/freebsd/sys/netinet/libalias/alias_cuseeme.c b/freebsd/sys/netinet/libalias/alias_cuseeme.c
new file mode 100644
index 00000000..90f2aaae
--- /dev/null
+++ b/freebsd/sys/netinet/libalias/alias_cuseeme.c
@@ -0,0 +1,230 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1998 Brian Somers <brian@Awfulhak.org>
+ *                    with the aid of code written by
+ *                    Junichi SATOH <junichi@astec.co.jp> 1996, 1997.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifdef _KERNEL
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/module.h>
+#else
+#include <freebsd/errno.h>
+#include <freebsd/sys/types.h>
+#include <freebsd/stdio.h>
+#endif
+
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/udp.h>
+
+#ifdef _KERNEL
+#include <freebsd/netinet/libalias/alias.h>
+#include <freebsd/netinet/libalias/alias_local.h>
+#include <freebsd/netinet/libalias/alias_mod.h>
+#else
+#include <freebsd/local/alias_local.h>
+#include <freebsd/local/alias_mod.h>
+#endif
+
+#define CUSEEME_PORT_NUMBER 7648
+
+static void
+AliasHandleCUSeeMeOut(struct libalias *la, struct ip *pip, 
+		      struct alias_link *lnk);
+
+static void
+AliasHandleCUSeeMeIn(struct libalias *la, struct ip *pip, 
+		     struct in_addr original_addr);
+
+static int 
+fingerprint(struct libalias *la, struct alias_data *ah)
+{
+
+	if (ah->dport == NULL || ah->oaddr == NULL)
+		return (-1);
+	if (ntohs(*ah->dport) == CUSEEME_PORT_NUMBER)
+		return (0);
+	return (-1);
+}
+
+static int 
+protohandlerin(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+	AliasHandleCUSeeMeIn(la, pip, *ah->oaddr);
+	return (0);
+}
+
+static int 
+protohandlerout(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+	AliasHandleCUSeeMeOut(la, pip, ah->lnk);
+	return (0);
+}
+
+/* Kernel module definition. */
+struct proto_handler handlers[] = {
+	{ 
+	  .pri = 120, 
+	  .dir = OUT, 
+	  .proto = UDP, 
+	  .fingerprint = &fingerprint, 
+	  .protohandler = &protohandlerout
+	}, 
+	{
+	  .pri = 120, 
+	  .dir = IN, 
+	  .proto = UDP, 
+	  .fingerprint = &fingerprint, 
+	  .protohandler = &protohandlerin
+	}, 
+	{ EOH }
+};
+
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+	int error;
+
+	switch (type) {
+	case MOD_LOAD:
+		error = 0;
+		LibAliasAttachHandlers(handlers);
+		break;
+	case MOD_UNLOAD:
+		error = 0;
+		LibAliasDetachHandlers(handlers);
+		break;
+	default:
+		error = EINVAL;
+	}
+	return (error);
+}
+
+#ifdef _KERNEL
+static 
+#endif
+moduledata_t 
+alias_mod = {
+       "alias_cuseeme", mod_handler, NULL
+};
+
+#ifdef	_KERNEL
+DECLARE_MODULE(alias_cuseeme, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_cuseeme, 1);
+MODULE_DEPEND(alias_cuseeme, libalias, 1, 1, 1);
+#endif
+
+/* CU-SeeMe Data Header */
+struct cu_header {
+	u_int16_t	dest_family;
+	u_int16_t	dest_port;
+	u_int32_t	dest_addr;
+	int16_t		family;
+	u_int16_t	port;
+	u_int32_t	addr;
+	u_int32_t	seq;
+	u_int16_t	msg;
+	u_int16_t	data_type;
+	u_int16_t	packet_len;
+};
+
+/* Open Continue Header */
+struct oc_header {
+	u_int16_t	client_count;	/* Number of client info structs */
+	u_int32_t	seq_no;
+	char		user_name [20];
+	char		reserved  [4];	/* flags, version stuff, etc */
+};
+
+/* client info structures */
+struct client_info {
+	u_int32_t	address;/* Client address */
+	char		reserved  [8];	/* Flags, pruning bitfield, packet
+					 * counts etc */
+};
+
+static void
+AliasHandleCUSeeMeOut(struct libalias *la, struct ip *pip, struct alias_link *lnk)
+{
+	struct udphdr *ud = ip_next(pip);
+
+	if (ntohs(ud->uh_ulen) - sizeof(struct udphdr) >= sizeof(struct cu_header)) {
+		struct cu_header *cu;
+		struct alias_link *cu_lnk;
+
+		cu = udp_next(ud);
+		if (cu->addr)
+			cu->addr = (u_int32_t) GetAliasAddress(lnk).s_addr;
+
+		cu_lnk = FindUdpTcpOut(la, pip->ip_src, GetDestAddress(lnk),
+		    ud->uh_dport, 0, IPPROTO_UDP, 1);
+
+#ifndef NO_FW_PUNCH
+		if (cu_lnk)
+			PunchFWHole(cu_lnk);
+#endif
+	}
+}
+
+static void
+AliasHandleCUSeeMeIn(struct libalias *la, struct ip *pip, struct in_addr original_addr)
+{
+	struct in_addr alias_addr;
+	struct udphdr *ud;
+	struct cu_header *cu;
+	struct oc_header *oc;
+	struct client_info *ci;
+	char *end;
+	int i;
+
+	(void)la;
+	alias_addr.s_addr = pip->ip_dst.s_addr;
+	ud = ip_next(pip);
+	cu = udp_next(ud);
+	oc = (struct oc_header *)(cu + 1);
+	ci = (struct client_info *)(oc + 1);
+	end = (char *)ud + ntohs(ud->uh_ulen);
+
+	if ((char *)oc <= end) {
+		if (cu->dest_addr)
+			cu->dest_addr = (u_int32_t) original_addr.s_addr;
+		if (ntohs(cu->data_type) == 101)
+			/* Find and change our address */
+			for (i = 0; (char *)(ci + 1) <= end && i < oc->client_count; i++, ci++)
+				if (ci->address == (u_int32_t) alias_addr.s_addr) {
+					ci->address = (u_int32_t) original_addr.s_addr;
+					break;
+				}
+	}
+}
diff --git a/freebsd/sys/netinet/libalias/alias_db.c b/freebsd/sys/netinet/libalias/alias_db.c
new file mode 100644
index 00000000..4b003366
--- /dev/null
+++ b/freebsd/sys/netinet/libalias/alias_db.c
@@ -0,0 +1,2940 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2001 Charles Mott <cm@linktel.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+    Alias_db.c encapsulates all data structures used for storing
+    packet aliasing data.  Other parts of the aliasing software
+    access data through functions provided in this file.
+
+    Data storage is based on the notion of a "link", which is
+    established for ICMP echo/reply packets, UDP datagrams and
+    TCP stream connections.  A link stores the original source
+    and destination addresses.  For UDP and TCP, it also stores
+    source and destination port numbers, as well as an alias
+    port number.  Links are also used to store information about
+    fragments.
+
+    There is a facility for sweeping through and deleting old
+    links as new packets are sent through.  A simple timeout is
+    used for ICMP and UDP links.  TCP links are left alone unless
+    there is an incomplete connection, in which case the link
+    can be deleted after a certain amount of time.
+
+
+    Initial version: August, 1996  (cjm)
+
+    Version 1.4: September 16, 1996 (cjm)
+	Facility for handling incoming links added.
+
+    Version 1.6: September 18, 1996 (cjm)
+	ICMP data handling simplified.
+
+    Version 1.7: January 9, 1997 (cjm)
+	Fragment handling simplified.
+	Saves pointers for unresolved fragments.
+	Permits links for unspecified remote ports
+	  or unspecified remote addresses.
+	Fixed bug which did not properly zero port
+	  table entries after a link was deleted.
+	Cleaned up some obsolete comments.
+
+    Version 1.8: January 14, 1997 (cjm)
+	Fixed data type error in StartPoint().
+	(This error did not exist prior to v1.7
+	and was discovered and fixed by Ari Suutari)
+
+    Version 1.9: February 1, 1997
+	Optionally, connections initiated from packet aliasing host
+	machine will will not have their port number aliased unless it
+	conflicts with an aliasing port already being used. (cjm)
+
+	All options earlier being #ifdef'ed are now available through
+	a new interface, SetPacketAliasMode().  This allows run time
+	control (which is now available in PPP+pktAlias through the
+	'alias' keyword). (ee)
+
+	Added ability to create an alias port without
+	either destination address or port specified.
+	port type = ALIAS_PORT_UNKNOWN_DEST_ALL (ee)
+
+	Removed K&R style function headers
+	and general cleanup. (ee)
+
+	Added packetAliasMode to replace compiler #defines's (ee)
+
+	Allocates sockets for partially specified
+	ports if ALIAS_USE_SOCKETS defined. (cjm)
+
+    Version 2.0: March, 1997
+	SetAliasAddress() will now clean up alias links
+	if the aliasing address is changed. (cjm)
+
+	PacketAliasPermanentLink() function added to support permanent
+	links.  (J. Fortes suggested the need for this.)
+	Examples:
+
+	(192.168.0.1, port 23)  <-> alias port 6002, unknown dest addr/port
+
+	(192.168.0.2, port 21)  <-> alias port 3604, known dest addr
+						     unknown dest port
+
+	These permanent links allow for incoming connections to
+	machines on the local network.  They can be given with a
+	user-chosen amount of specificity, with increasing specificity
+	meaning more security. (cjm)
+
+	Quite a bit of rework to the basic engine.  The portTable[]
+	array, which kept track of which ports were in use was replaced
+	by a table/linked list structure. (cjm)
+
+	SetExpire() function added. (cjm)
+
+	DeleteLink() no longer frees memory association with a pointer
+	to a fragment (this bug was first recognized by E. Eklund in
+	v1.9).
+
+    Version 2.1: May, 1997 (cjm)
+	Packet aliasing engine reworked so that it can handle
+	multiple external addresses rather than just a single
+	host address.
+
+	PacketAliasRedirectPort() and PacketAliasRedirectAddr()
+	added to the API.  The first function is a more generalized
+	version of PacketAliasPermanentLink().  The second function
+	implements static network address translation.
+
+    Version 3.2: July, 2000 (salander and satoh)
+	Added FindNewPortGroup to get contiguous range of port values.
+
+	Added QueryUdpTcpIn and QueryUdpTcpOut to look for an aliasing
+	link but not actually add one.
+
+	Added FindRtspOut, which is closely derived from FindUdpTcpOut,
+	except that the alias port (from FindNewPortGroup) is provided
+	as input.
+
+    See HISTORY file for additional revisions.
+*/
+
+#ifdef _KERNEL
+#include <freebsd/machine/stdarg.h>
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/rwlock.h>
+#include <freebsd/sys/syslog.h>
+#else
+#include <freebsd/stdarg.h>
+#include <freebsd/stdlib.h>
+#include <freebsd/stdio.h>
+#include <freebsd/sys/errno.h>
+#include <freebsd/sys/time.h>
+#include <freebsd/unistd.h>
+#endif
+
+#include <freebsd/sys/socket.h>
+#include <freebsd/netinet/tcp.h>
+
+#ifdef _KERNEL
+#include <freebsd/netinet/libalias/alias.h>
+#include <freebsd/netinet/libalias/alias_local.h>
+#include <freebsd/netinet/libalias/alias_mod.h>
+#include <freebsd/net/if.h>
+#else
+#include <freebsd/local/alias.h>
+#include <freebsd/local/alias_local.h>
+#include <freebsd/local/alias_mod.h>
+#endif
+
+static		LIST_HEAD(, libalias) instancehead = LIST_HEAD_INITIALIZER(instancehead);
+
+
+/*
+   Constants (note: constants are also defined
+	      near relevant functions or structs)
+*/
+
+/* Parameters used for cleanup of expired links */
+/* NOTE: ALIAS_CLEANUP_INTERVAL_SECS must be less then LINK_TABLE_OUT_SIZE */
+#define ALIAS_CLEANUP_INTERVAL_SECS  64
+#define ALIAS_CLEANUP_MAX_SPOKES     (LINK_TABLE_OUT_SIZE/5)
+
+/* Timeouts (in seconds) for different link types */
+#define ICMP_EXPIRE_TIME             60
+#define UDP_EXPIRE_TIME              60
+#define PROTO_EXPIRE_TIME            60
+#define FRAGMENT_ID_EXPIRE_TIME      10
+#define FRAGMENT_PTR_EXPIRE_TIME     30
+
+/* TCP link expire time for different cases */
+/* When the link has been used and closed - minimal grace time to
+   allow ACKs and potential re-connect in FTP (XXX - is this allowed?)  */
+#ifndef TCP_EXPIRE_DEAD
+#define TCP_EXPIRE_DEAD           10
+#endif
+
+/* When the link has been used and closed on one side - the other side
+   is allowed to still send data */
+#ifndef TCP_EXPIRE_SINGLEDEAD
+#define TCP_EXPIRE_SINGLEDEAD     90
+#endif
+
+/* When the link isn't yet up */
+#ifndef TCP_EXPIRE_INITIAL
+#define TCP_EXPIRE_INITIAL       300
+#endif
+
+/* When the link is up */
+#ifndef TCP_EXPIRE_CONNECTED
+#define TCP_EXPIRE_CONNECTED   86400
+#endif
+
+
+/* Dummy port number codes used for FindLinkIn/Out() and AddLink().
+   These constants can be anything except zero, which indicates an
+   unknown port number. */
+
+#define NO_DEST_PORT     1
+#define NO_SRC_PORT      1
+
+
+
+/* Data Structures
+
+    The fundamental data structure used in this program is
+    "struct alias_link".  Whenever a TCP connection is made,
+    a UDP datagram is sent out, or an ICMP echo request is made,
+    a link record is made (if it has not already been created).
+    The link record is identified by the source address/port
+    and the destination address/port. In the case of an ICMP
+    echo request, the source port is treated as being equivalent
+    with the 16-bit ID number of the ICMP packet.
+
+    The link record also can store some auxiliary data.  For
+    TCP connections that have had sequence and acknowledgment
+    modifications, data space is available to track these changes.
+    A state field is used to keep track in changes to the TCP
+    connection state.  ID numbers of fragments can also be
+    stored in the auxiliary space.  Pointers to unresolved
+    fragments can also be stored.
+
+    The link records support two independent chainings.  Lookup
+    tables for input and out tables hold the initial pointers
+    the link chains.  On input, the lookup table indexes on alias
+    port and link type.  On output, the lookup table indexes on
+    source address, destination address, source port, destination
+    port and link type.
+*/
+
+struct ack_data_record {	/* used to save changes to ACK/sequence
+				 * numbers */
+	u_long		ack_old;
+	u_long		ack_new;
+	int		delta;
+	int		active;
+};
+
+struct tcp_state {		/* Information about TCP connection        */
+	int		in;	/* State for outside -> inside             */
+	int		out;	/* State for inside  -> outside            */
+	int		index;	/* Index to ACK data array                 */
+	int		ack_modified;	/* Indicates whether ACK and
+					 * sequence numbers */
+	/* been modified                           */
+};
+
+#define N_LINK_TCP_DATA   3	/* Number of distinct ACK number changes
+				 * saved for a modified TCP stream */
+struct tcp_dat {
+	struct tcp_state state;
+	struct ack_data_record ack[N_LINK_TCP_DATA];
+	int		fwhole;	/* Which firewall record is used for this
+				 * hole? */
+};
+
+struct server {			/* LSNAT server pool (circular list) */
+	struct in_addr	addr;
+	u_short		port;
+	struct server  *next;
+};
+
+struct alias_link {		/* Main data structure */
+	struct libalias *la;
+	struct in_addr	src_addr;	/* Address and port information        */
+	struct in_addr	dst_addr;
+	struct in_addr	alias_addr;
+	struct in_addr	proxy_addr;
+	u_short		src_port;
+	u_short		dst_port;
+	u_short		alias_port;
+	u_short		proxy_port;
+	struct server  *server;
+
+	int		link_type;	/* Type of link: TCP, UDP, ICMP,
+					 * proto, frag */
+
+/* values for link_type */
+#define LINK_ICMP                     IPPROTO_ICMP
+#define LINK_UDP                      IPPROTO_UDP
+#define LINK_TCP                      IPPROTO_TCP
+#define LINK_FRAGMENT_ID              (IPPROTO_MAX + 1)
+#define LINK_FRAGMENT_PTR             (IPPROTO_MAX + 2)
+#define LINK_ADDR                     (IPPROTO_MAX + 3)
+#define LINK_PPTP                     (IPPROTO_MAX + 4)
+
+	int		flags;	/* indicates special characteristics   */
+	int		pflags;	/* protocol-specific flags */
+
+/* flag bits */
+#define LINK_UNKNOWN_DEST_PORT     0x01
+#define LINK_UNKNOWN_DEST_ADDR     0x02
+#define LINK_PERMANENT             0x04
+#define LINK_PARTIALLY_SPECIFIED   0x03	/* logical-or of first two bits */
+#define LINK_UNFIREWALLED          0x08
+
+	int		timestamp;	/* Time link was last accessed         */
+	int		expire_time;	/* Expire time for link                */
+#ifndef	NO_USE_SOCKETS
+	int		sockfd;	/* socket descriptor                   */
+#endif
+			LIST_ENTRY    (alias_link) list_out;	/* Linked list of
+								 * pointers for     */
+			LIST_ENTRY    (alias_link) list_in;	/* input and output
+								 * lookup tables  */
+
+	union {			/* Auxiliary data                      */
+		char           *frag_ptr;
+		struct in_addr	frag_addr;
+		struct tcp_dat *tcp;
+	}		data;
+};
+
+/* Clean up procedure. */
+static void finishoff(void);
+
+/* Kernel module definition. */
+#ifdef	_KERNEL
+MALLOC_DEFINE(M_ALIAS, "libalias", "packet aliasing");
+
+MODULE_VERSION(libalias, 1);
+
+static int
+alias_mod_handler(module_t mod, int type, void *data)
+{
+	int error;
+
+	switch (type) {
+	case MOD_LOAD:
+		error = 0;
+		handler_chain_init();
+		break;
+	case MOD_QUIESCE:
+	case MOD_UNLOAD:
+	        handler_chain_destroy();
+	        finishoff();
+		error = 0;
+		break;
+	default:
+		error = EINVAL;
+	}
+
+	return (error);
+}
+
+static moduledata_t alias_mod = {
+       "alias", alias_mod_handler, NULL
+};
+
+DECLARE_MODULE(alias, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+#endif
+
+/* Internal utility routines (used only in alias_db.c)
+
+Lookup table starting points:
+    StartPointIn()           -- link table initial search point for
+				incoming packets
+    StartPointOut()          -- link table initial search point for
+				outgoing packets
+
+Miscellaneous:
+    SeqDiff()                -- difference between two TCP sequences
+    ShowAliasStats()         -- send alias statistics to a monitor file
+*/
+
+
+/* Local prototypes */
+static u_int	StartPointIn(struct in_addr, u_short, int);
+
+static		u_int
+StartPointOut(struct in_addr, struct in_addr,
+    u_short, u_short, int);
+
+static int	SeqDiff(u_long, u_long);
+
+#ifndef NO_FW_PUNCH
+/* Firewall control */
+static void	InitPunchFW(struct libalias *);
+static void	UninitPunchFW(struct libalias *);
+static void	ClearFWHole(struct alias_link *);
+
+#endif
+
+/* Log file control */
+static void	ShowAliasStats(struct libalias *);
+static int	InitPacketAliasLog(struct libalias *);
+static void	UninitPacketAliasLog(struct libalias *);
+
+void SctpShowAliasStats(struct libalias *la);
+
+static		u_int
+StartPointIn(struct in_addr alias_addr,
+    u_short alias_port,
+    int link_type)
+{
+	u_int n;
+
+	n = alias_addr.s_addr;
+	if (link_type != LINK_PPTP)
+		n += alias_port;
+	n += link_type;
+	return (n % LINK_TABLE_IN_SIZE);
+}
+
+
+static		u_int
+StartPointOut(struct in_addr src_addr, struct in_addr dst_addr,
+    u_short src_port, u_short dst_port, int link_type)
+{
+	u_int n;
+
+	n = src_addr.s_addr;
+	n += dst_addr.s_addr;
+	if (link_type != LINK_PPTP) {
+		n += src_port;
+		n += dst_port;
+	}
+	n += link_type;
+
+	return (n % LINK_TABLE_OUT_SIZE);
+}
+
+
+static int
+SeqDiff(u_long x, u_long y)
+{
+/* Return the difference between two TCP sequence numbers */
+
+/*
+    This function is encapsulated in case there are any unusual
+    arithmetic conditions that need to be considered.
+*/
+
+	return (ntohl(y) - ntohl(x));
+}
+
+#ifdef _KERNEL
+
+static void
+AliasLog(char *str, const char *format, ...)
+{
+	va_list ap;
+
+	va_start(ap, format);
+	vsnprintf(str, LIBALIAS_BUF_SIZE, format, ap);
+	va_end(ap);
+}
+#else
+static void
+AliasLog(FILE *stream, const char *format, ...)
+{
+	va_list ap;
+
+	va_start(ap, format);
+	vfprintf(stream, format, ap);
+	va_end(ap);
+	fflush(stream);
+}
+#endif
+
+static void
+ShowAliasStats(struct libalias *la)
+{
+
+	LIBALIAS_LOCK_ASSERT(la);
+/* Used for debugging */
+	if (la->logDesc) {
+		int tot  = la->icmpLinkCount + la->udpLinkCount +
+		  (la->sctpLinkCount>>1) + /* sctp counts half associations */
+			la->tcpLinkCount + la->pptpLinkCount +
+			la->protoLinkCount + la->fragmentIdLinkCount +
+			la->fragmentPtrLinkCount;
+
+		AliasLog(la->logDesc,
+			 "icmp=%u, udp=%u, tcp=%u, sctp=%u, pptp=%u, proto=%u, frag_id=%u frag_ptr=%u / tot=%u",
+			 la->icmpLinkCount,
+			 la->udpLinkCount,
+			 la->tcpLinkCount,
+			 la->sctpLinkCount>>1, /* sctp counts half associations */
+			 la->pptpLinkCount,
+			 la->protoLinkCount,
+			 la->fragmentIdLinkCount,
+			 la->fragmentPtrLinkCount, tot);
+#ifndef _KERNEL
+		AliasLog(la->logDesc, " (sock=%u)\n", la->sockCount);
+#endif
+	}
+}
+
+void SctpShowAliasStats(struct libalias *la)
+{
+
+	ShowAliasStats(la);
+}
+
+
+/* Internal routines for finding, deleting and adding links
+
+Port Allocation:
+    GetNewPort()             -- find and reserve new alias port number
+    GetSocket()              -- try to allocate a socket for a given port
+
+Link creation and deletion:
+    CleanupAliasData()      - remove all link chains from lookup table
+    IncrementalCleanup()    - look for stale links in a single chain
+    DeleteLink()            - remove link
+    AddLink()               - add link
+    ReLink()                - change link
+
+Link search:
+    FindLinkOut()           - find link for outgoing packets
+    FindLinkIn()            - find link for incoming packets
+
+Port search:
+    FindNewPortGroup()      - find an available group of ports
+*/
+
+/* Local prototypes */
+static int	GetNewPort(struct libalias *, struct alias_link *, int);
+#ifndef	NO_USE_SOCKETS
+static u_short	GetSocket(struct libalias *, u_short, int *, int);
+#endif
+static void	CleanupAliasData(struct libalias *);
+
+static void	IncrementalCleanup(struct libalias *);
+
+static void	DeleteLink(struct alias_link *);
+
+static struct alias_link *
+AddLink(struct libalias *, struct in_addr, struct in_addr, struct in_addr,
+    u_short, u_short, int, int);
+
+static struct alias_link *
+ReLink(struct alias_link *,
+    struct in_addr, struct in_addr, struct in_addr,
+    u_short, u_short, int, int);
+
+static struct alias_link *
+		FindLinkOut   (struct libalias *, struct in_addr, struct in_addr, u_short, u_short, int, int);
+
+static struct alias_link *
+		FindLinkIn    (struct libalias *, struct in_addr, struct in_addr, u_short, u_short, int, int);
+
+
+#define ALIAS_PORT_BASE            0x08000
+#define ALIAS_PORT_MASK            0x07fff
+#define ALIAS_PORT_MASK_EVEN       0x07ffe
+#define GET_NEW_PORT_MAX_ATTEMPTS       20
+
+#define GET_ALIAS_PORT                  -1
+#define GET_ALIAS_ID        GET_ALIAS_PORT
+
+#define FIND_EVEN_ALIAS_BASE             1
+
+/* GetNewPort() allocates port numbers.  Note that if a port number
+   is already in use, that does not mean that it cannot be used by
+   another link concurrently.  This is because GetNewPort() looks for
+   unused triplets: (dest addr, dest port, alias port). */
+
+static int
+GetNewPort(struct libalias *la, struct alias_link *lnk, int alias_port_param)
+{
+	int i;
+	int max_trials;
+	u_short port_sys;
+	u_short port_net;
+
+	LIBALIAS_LOCK_ASSERT(la);
+/*
+   Description of alias_port_param for GetNewPort().  When
+   this parameter is zero or positive, it precisely specifies
+   the port number.  GetNewPort() will return this number
+   without check that it is in use.
+
+   When this parameter is GET_ALIAS_PORT, it indicates to get a randomly
+   selected port number.
+*/
+
+	if (alias_port_param == GET_ALIAS_PORT) {
+		/*
+		 * The aliasing port is automatically selected by one of
+		 * two methods below:
+		 */
+		max_trials = GET_NEW_PORT_MAX_ATTEMPTS;
+
+		if (la->packetAliasMode & PKT_ALIAS_SAME_PORTS) {
+			/*
+			 * When the PKT_ALIAS_SAME_PORTS option is chosen,
+			 * the first try will be the actual source port. If
+			 * this is already in use, the remainder of the
+			 * trials will be random.
+			 */
+			port_net = lnk->src_port;
+			port_sys = ntohs(port_net);
+		} else {
+			/* First trial and all subsequent are random. */
+			port_sys = arc4random() & ALIAS_PORT_MASK;
+			port_sys += ALIAS_PORT_BASE;
+			port_net = htons(port_sys);
+		}
+	} else if (alias_port_param >= 0 && alias_port_param < 0x10000) {
+		lnk->alias_port = (u_short) alias_port_param;
+		return (0);
+	} else {
+#ifdef LIBALIAS_DEBUG
+		fprintf(stderr, "PacketAlias/GetNewPort(): ");
+		fprintf(stderr, "input parameter error\n");
+#endif
+		return (-1);
+	}
+
+
+/* Port number search */
+	for (i = 0; i < max_trials; i++) {
+		int go_ahead;
+		struct alias_link *search_result;
+
+		search_result = FindLinkIn(la, lnk->dst_addr, lnk->alias_addr,
+		    lnk->dst_port, port_net,
+		    lnk->link_type, 0);
+
+		if (search_result == NULL)
+			go_ahead = 1;
+		else if (!(lnk->flags & LINK_PARTIALLY_SPECIFIED)
+		    && (search_result->flags & LINK_PARTIALLY_SPECIFIED))
+			go_ahead = 1;
+		else
+			go_ahead = 0;
+
+		if (go_ahead) {
+#ifndef	NO_USE_SOCKETS
+			if ((la->packetAliasMode & PKT_ALIAS_USE_SOCKETS)
+			    && (lnk->flags & LINK_PARTIALLY_SPECIFIED)
+			    && ((lnk->link_type == LINK_TCP) ||
+			    (lnk->link_type == LINK_UDP))) {
+				if (GetSocket(la, port_net, &lnk->sockfd, lnk->link_type)) {
+					lnk->alias_port = port_net;
+					return (0);
+				}
+			} else {
+#endif
+				lnk->alias_port = port_net;
+				return (0);
+#ifndef	NO_USE_SOCKETS
+			}
+#endif
+		}
+		port_sys = arc4random() & ALIAS_PORT_MASK;
+		port_sys += ALIAS_PORT_BASE;
+		port_net = htons(port_sys);
+	}
+
+#ifdef LIBALIAS_DEBUG
+	fprintf(stderr, "PacketAlias/GetnewPort(): ");
+	fprintf(stderr, "could not find free port\n");
+#endif
+
+	return (-1);
+}
+
+#ifndef	NO_USE_SOCKETS
+static		u_short
+GetSocket(struct libalias *la, u_short port_net, int *sockfd, int link_type)
+{
+	int err;
+	int sock;
+	struct sockaddr_in sock_addr;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	if (link_type == LINK_TCP)
+		sock = socket(AF_INET, SOCK_STREAM, 0);
+	else if (link_type == LINK_UDP)
+		sock = socket(AF_INET, SOCK_DGRAM, 0);
+	else {
+#ifdef LIBALIAS_DEBUG
+		fprintf(stderr, "PacketAlias/GetSocket(): ");
+		fprintf(stderr, "incorrect link type\n");
+#endif
+		return (0);
+	}
+
+	if (sock < 0) {
+#ifdef LIBALIAS_DEBUG
+		fprintf(stderr, "PacketAlias/GetSocket(): ");
+		fprintf(stderr, "socket() error %d\n", *sockfd);
+#endif
+		return (0);
+	}
+	sock_addr.sin_family = AF_INET;
+	sock_addr.sin_addr.s_addr = htonl(INADDR_ANY);
+	sock_addr.sin_port = port_net;
+
+	err = bind(sock,
+	    (struct sockaddr *)&sock_addr,
+	    sizeof(sock_addr));
+	if (err == 0) {
+		la->sockCount++;
+		*sockfd = sock;
+		return (1);
+	} else {
+		close(sock);
+		return (0);
+	}
+}
+#endif
+
+/* FindNewPortGroup() returns a base port number for an available
+   range of contiguous port numbers. Note that if a port number
+   is already in use, that does not mean that it cannot be used by
+   another link concurrently.  This is because FindNewPortGroup()
+   looks for unused triplets: (dest addr, dest port, alias port). */
+
+int
+FindNewPortGroup(struct libalias *la,
+    struct in_addr dst_addr,
+    struct in_addr alias_addr,
+    u_short src_port,
+    u_short dst_port,
+    u_short port_count,
+    u_char proto,
+    u_char align)
+{
+	int i, j;
+	int max_trials;
+	u_short port_sys;
+	int link_type;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	/*
+	 * Get link_type from protocol
+	 */
+
+	switch (proto) {
+	case IPPROTO_UDP:
+		link_type = LINK_UDP;
+		break;
+	case IPPROTO_TCP:
+		link_type = LINK_TCP;
+		break;
+	default:
+		return (0);
+		break;
+	}
+
+	/*
+	 * The aliasing port is automatically selected by one of two
+	 * methods below:
+	 */
+	max_trials = GET_NEW_PORT_MAX_ATTEMPTS;
+
+	if (la->packetAliasMode & PKT_ALIAS_SAME_PORTS) {
+		/*
+		 * When the ALIAS_SAME_PORTS option is chosen, the first
+		 * try will be the actual source port. If this is already
+		 * in use, the remainder of the trials will be random.
+		 */
+		port_sys = ntohs(src_port);
+
+	} else {
+
+		/* First trial and all subsequent are random. */
+		if (align == FIND_EVEN_ALIAS_BASE)
+			port_sys = arc4random() & ALIAS_PORT_MASK_EVEN;
+		else
+			port_sys = arc4random() & ALIAS_PORT_MASK;
+
+		port_sys += ALIAS_PORT_BASE;
+	}
+
+/* Port number search */
+	for (i = 0; i < max_trials; i++) {
+
+		struct alias_link *search_result;
+
+		for (j = 0; j < port_count; j++)
+			if (0 != (search_result = FindLinkIn(la, dst_addr, alias_addr,
+			    dst_port, htons(port_sys + j),
+			    link_type, 0)))
+				break;
+
+		/* Found a good range, return base */
+		if (j == port_count)
+			return (htons(port_sys));
+
+		/* Find a new base to try */
+		if (align == FIND_EVEN_ALIAS_BASE)
+			port_sys = arc4random() & ALIAS_PORT_MASK_EVEN;
+		else
+			port_sys = arc4random() & ALIAS_PORT_MASK;
+
+		port_sys += ALIAS_PORT_BASE;
+	}
+
+#ifdef LIBALIAS_DEBUG
+	fprintf(stderr, "PacketAlias/FindNewPortGroup(): ");
+	fprintf(stderr, "could not find free port(s)\n");
+#endif
+
+	return (0);
+}
+
+static void
+CleanupAliasData(struct libalias *la)
+{
+	struct alias_link *lnk;
+	int i;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	for (i = 0; i < LINK_TABLE_OUT_SIZE; i++) {
+		lnk = LIST_FIRST(&la->linkTableOut[i]);
+		while (lnk != NULL) {
+			struct alias_link *link_next = LIST_NEXT(lnk, list_out);
+			DeleteLink(lnk);
+			lnk = link_next;
+		}
+	}
+
+	la->cleanupIndex = 0;
+}
+
+
+static void
+IncrementalCleanup(struct libalias *la)
+{
+	struct alias_link *lnk, *lnk_tmp;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	LIST_FOREACH_SAFE(lnk, &la->linkTableOut[la->cleanupIndex++],
+	    list_out, lnk_tmp) {
+		if (la->timeStamp - lnk->timestamp > lnk->expire_time)
+			DeleteLink(lnk);
+	}
+
+	if (la->cleanupIndex == LINK_TABLE_OUT_SIZE)
+		la->cleanupIndex = 0;
+}
+
+static void
+DeleteLink(struct alias_link *lnk)
+{
+	struct libalias *la = lnk->la;
+
+	LIBALIAS_LOCK_ASSERT(la);
+/* Don't do anything if the link is marked permanent */
+	if (la->deleteAllLinks == 0 && lnk->flags & LINK_PERMANENT)
+		return;
+
+#ifndef NO_FW_PUNCH
+/* Delete associated firewall hole, if any */
+	ClearFWHole(lnk);
+#endif
+
+/* Free memory allocated for LSNAT server pool */
+	if (lnk->server != NULL) {
+		struct server *head, *curr, *next;
+
+		head = curr = lnk->server;
+		do {
+			next = curr->next;
+			free(curr);
+		} while ((curr = next) != head);
+	}
+/* Adjust output table pointers */
+	LIST_REMOVE(lnk, list_out);
+
+/* Adjust input table pointers */
+	LIST_REMOVE(lnk, list_in);
+#ifndef	NO_USE_SOCKETS
+/* Close socket, if one has been allocated */
+	if (lnk->sockfd != -1) {
+		la->sockCount--;
+		close(lnk->sockfd);
+	}
+#endif
+/* Link-type dependent cleanup */
+	switch (lnk->link_type) {
+	case LINK_ICMP:
+		la->icmpLinkCount--;
+		break;
+	case LINK_UDP:
+		la->udpLinkCount--;
+		break;
+	case LINK_TCP:
+		la->tcpLinkCount--;
+		free(lnk->data.tcp);
+		break;
+	case LINK_PPTP:
+		la->pptpLinkCount--;
+		break;
+	case LINK_FRAGMENT_ID:
+		la->fragmentIdLinkCount--;
+		break;
+	case LINK_FRAGMENT_PTR:
+		la->fragmentPtrLinkCount--;
+		if (lnk->data.frag_ptr != NULL)
+			free(lnk->data.frag_ptr);
+		break;
+	case LINK_ADDR:
+		break;
+	default:
+		la->protoLinkCount--;
+		break;
+	}
+
+/* Free memory */
+	free(lnk);
+
+/* Write statistics, if logging enabled */
+	if (la->packetAliasMode & PKT_ALIAS_LOG) {
+		ShowAliasStats(la);
+	}
+}
+
+
+static struct alias_link *
+AddLink(struct libalias *la, struct in_addr src_addr,
+    struct in_addr dst_addr,
+    struct in_addr alias_addr,
+    u_short src_port,
+    u_short dst_port,
+    int alias_port_param,	/* if less than zero, alias   */
+    int link_type)
+{				/* port will be automatically *//* chosen.
+				 * If greater than    */
+	u_int start_point;	/* zero, equal to alias port  */
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	lnk = malloc(sizeof(struct alias_link));
+	if (lnk != NULL) {
+		/* Basic initialization */
+		lnk->la = la;
+		lnk->src_addr = src_addr;
+		lnk->dst_addr = dst_addr;
+		lnk->alias_addr = alias_addr;
+		lnk->proxy_addr.s_addr = INADDR_ANY;
+		lnk->src_port = src_port;
+		lnk->dst_port = dst_port;
+		lnk->proxy_port = 0;
+		lnk->server = NULL;
+		lnk->link_type = link_type;
+#ifndef	NO_USE_SOCKETS
+		lnk->sockfd = -1;
+#endif
+		lnk->flags = 0;
+		lnk->pflags = 0;
+		lnk->timestamp = la->timeStamp;
+
+		/* Expiration time */
+		switch (link_type) {
+		case LINK_ICMP:
+			lnk->expire_time = ICMP_EXPIRE_TIME;
+			break;
+		case LINK_UDP:
+			lnk->expire_time = UDP_EXPIRE_TIME;
+			break;
+		case LINK_TCP:
+			lnk->expire_time = TCP_EXPIRE_INITIAL;
+			break;
+		case LINK_PPTP:
+			lnk->flags |= LINK_PERMANENT;	/* no timeout. */
+			break;
+		case LINK_FRAGMENT_ID:
+			lnk->expire_time = FRAGMENT_ID_EXPIRE_TIME;
+			break;
+		case LINK_FRAGMENT_PTR:
+			lnk->expire_time = FRAGMENT_PTR_EXPIRE_TIME;
+			break;
+		case LINK_ADDR:
+			break;
+		default:
+			lnk->expire_time = PROTO_EXPIRE_TIME;
+			break;
+		}
+
+		/* Determine alias flags */
+		if (dst_addr.s_addr == INADDR_ANY)
+			lnk->flags |= LINK_UNKNOWN_DEST_ADDR;
+		if (dst_port == 0)
+			lnk->flags |= LINK_UNKNOWN_DEST_PORT;
+
+		/* Determine alias port */
+		if (GetNewPort(la, lnk, alias_port_param) != 0) {
+			free(lnk);
+			return (NULL);
+		}
+		/* Link-type dependent initialization */
+		switch (link_type) {
+			struct tcp_dat *aux_tcp;
+
+		case LINK_ICMP:
+			la->icmpLinkCount++;
+			break;
+		case LINK_UDP:
+			la->udpLinkCount++;
+			break;
+		case LINK_TCP:
+			aux_tcp = malloc(sizeof(struct tcp_dat));
+			if (aux_tcp != NULL) {
+				int i;
+
+				la->tcpLinkCount++;
+				aux_tcp->state.in = ALIAS_TCP_STATE_NOT_CONNECTED;
+				aux_tcp->state.out = ALIAS_TCP_STATE_NOT_CONNECTED;
+				aux_tcp->state.index = 0;
+				aux_tcp->state.ack_modified = 0;
+				for (i = 0; i < N_LINK_TCP_DATA; i++)
+					aux_tcp->ack[i].active = 0;
+				aux_tcp->fwhole = -1;
+				lnk->data.tcp = aux_tcp;
+			} else {
+#ifdef LIBALIAS_DEBUG
+				fprintf(stderr, "PacketAlias/AddLink: ");
+				fprintf(stderr, " cannot allocate auxiliary TCP data\n");
+#endif
+				free(lnk);
+				return (NULL);
+			}
+			break;
+		case LINK_PPTP:
+			la->pptpLinkCount++;
+			break;
+		case LINK_FRAGMENT_ID:
+			la->fragmentIdLinkCount++;
+			break;
+		case LINK_FRAGMENT_PTR:
+			la->fragmentPtrLinkCount++;
+			break;
+		case LINK_ADDR:
+			break;
+		default:
+			la->protoLinkCount++;
+			break;
+		}
+
+		/* Set up pointers for output lookup table */
+		start_point = StartPointOut(src_addr, dst_addr,
+		    src_port, dst_port, link_type);
+		LIST_INSERT_HEAD(&la->linkTableOut[start_point], lnk, list_out);
+
+		/* Set up pointers for input lookup table */
+		start_point = StartPointIn(alias_addr, lnk->alias_port, link_type);
+		LIST_INSERT_HEAD(&la->linkTableIn[start_point], lnk, list_in);
+	} else {
+#ifdef LIBALIAS_DEBUG
+		fprintf(stderr, "PacketAlias/AddLink(): ");
+		fprintf(stderr, "malloc() call failed.\n");
+#endif
+	}
+	if (la->packetAliasMode & PKT_ALIAS_LOG) {
+		ShowAliasStats(la);
+	}
+	return (lnk);
+}
+
+static struct alias_link *
+ReLink(struct alias_link *old_lnk,
+    struct in_addr src_addr,
+    struct in_addr dst_addr,
+    struct in_addr alias_addr,
+    u_short src_port,
+    u_short dst_port,
+    int alias_port_param,	/* if less than zero, alias   */
+    int link_type)
+{				/* port will be automatically *//* chosen.
+				 * If greater than    */
+	struct alias_link *new_lnk;	/* zero, equal to alias port  */
+	struct libalias *la = old_lnk->la;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	new_lnk = AddLink(la, src_addr, dst_addr, alias_addr,
+	    src_port, dst_port, alias_port_param,
+	    link_type);
+#ifndef NO_FW_PUNCH
+	if (new_lnk != NULL &&
+	    old_lnk->link_type == LINK_TCP &&
+	    old_lnk->data.tcp->fwhole > 0) {
+		PunchFWHole(new_lnk);
+	}
+#endif
+	DeleteLink(old_lnk);
+	return (new_lnk);
+}
+
+static struct alias_link *
+_FindLinkOut(struct libalias *la, struct in_addr src_addr,
+    struct in_addr dst_addr,
+    u_short src_port,
+    u_short dst_port,
+    int link_type,
+    int replace_partial_links)
+{
+	u_int i;
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	i = StartPointOut(src_addr, dst_addr, src_port, dst_port, link_type);
+	LIST_FOREACH(lnk, &la->linkTableOut[i], list_out) {
+		if (lnk->dst_addr.s_addr == dst_addr.s_addr &&
+		    lnk->src_addr.s_addr == src_addr.s_addr &&
+		    lnk->src_port == src_port &&
+		    lnk->dst_port == dst_port &&
+		    lnk->link_type == link_type &&
+		    lnk->server == NULL) {
+			lnk->timestamp = la->timeStamp;
+			break;
+		}
+	}
+
+/* Search for partially specified links. */
+	if (lnk == NULL && replace_partial_links) {
+		if (dst_port != 0 && dst_addr.s_addr != INADDR_ANY) {
+			lnk = _FindLinkOut(la, src_addr, dst_addr, src_port, 0,
+			    link_type, 0);
+			if (lnk == NULL)
+				lnk = _FindLinkOut(la, src_addr, la->nullAddress, src_port,
+				    dst_port, link_type, 0);
+		}
+		if (lnk == NULL &&
+		    (dst_port != 0 || dst_addr.s_addr != INADDR_ANY)) {
+			lnk = _FindLinkOut(la, src_addr, la->nullAddress, src_port, 0,
+			    link_type, 0);
+		}
+		if (lnk != NULL) {
+			lnk = ReLink(lnk,
+			    src_addr, dst_addr, lnk->alias_addr,
+			    src_port, dst_port, lnk->alias_port,
+			    link_type);
+		}
+	}
+	return (lnk);
+}
+
+static struct alias_link *
+FindLinkOut(struct libalias *la, struct in_addr src_addr,
+    struct in_addr dst_addr,
+    u_short src_port,
+    u_short dst_port,
+    int link_type,
+    int replace_partial_links)
+{
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	lnk = _FindLinkOut(la, src_addr, dst_addr, src_port, dst_port,
+	    link_type, replace_partial_links);
+
+	if (lnk == NULL) {
+		/*
+		 * The following allows permanent links to be specified as
+		 * using the default source address (i.e. device interface
+		 * address) without knowing in advance what that address
+		 * is.
+		 */
+		if (la->aliasAddress.s_addr != INADDR_ANY &&
+		    src_addr.s_addr == la->aliasAddress.s_addr) {
+			lnk = _FindLinkOut(la, la->nullAddress, dst_addr, src_port, dst_port,
+			    link_type, replace_partial_links);
+		}
+	}
+	return (lnk);
+}
+
+
+static struct alias_link *
+_FindLinkIn(struct libalias *la, struct in_addr dst_addr,
+    struct in_addr alias_addr,
+    u_short dst_port,
+    u_short alias_port,
+    int link_type,
+    int replace_partial_links)
+{
+	int flags_in;
+	u_int start_point;
+	struct alias_link *lnk;
+	struct alias_link *lnk_fully_specified;
+	struct alias_link *lnk_unknown_all;
+	struct alias_link *lnk_unknown_dst_addr;
+	struct alias_link *lnk_unknown_dst_port;
+
+	LIBALIAS_LOCK_ASSERT(la);
+/* Initialize pointers */
+	lnk_fully_specified = NULL;
+	lnk_unknown_all = NULL;
+	lnk_unknown_dst_addr = NULL;
+	lnk_unknown_dst_port = NULL;
+
+/* If either the dest addr or port is unknown, the search
+   loop will have to know about this. */
+
+	flags_in = 0;
+	if (dst_addr.s_addr == INADDR_ANY)
+		flags_in |= LINK_UNKNOWN_DEST_ADDR;
+	if (dst_port == 0)
+		flags_in |= LINK_UNKNOWN_DEST_PORT;
+
+/* Search loop */
+	start_point = StartPointIn(alias_addr, alias_port, link_type);
+	LIST_FOREACH(lnk, &la->linkTableIn[start_point], list_in) {
+		int flags;
+
+		flags = flags_in | lnk->flags;
+		if (!(flags & LINK_PARTIALLY_SPECIFIED)) {
+			if (lnk->alias_addr.s_addr == alias_addr.s_addr
+			    && lnk->alias_port == alias_port
+			    && lnk->dst_addr.s_addr == dst_addr.s_addr
+			    && lnk->dst_port == dst_port
+			    && lnk->link_type == link_type) {
+				lnk_fully_specified = lnk;
+				break;
+			}
+		} else if ((flags & LINK_UNKNOWN_DEST_ADDR)
+		    && (flags & LINK_UNKNOWN_DEST_PORT)) {
+			if (lnk->alias_addr.s_addr == alias_addr.s_addr
+			    && lnk->alias_port == alias_port
+			    && lnk->link_type == link_type) {
+				if (lnk_unknown_all == NULL)
+					lnk_unknown_all = lnk;
+			}
+		} else if (flags & LINK_UNKNOWN_DEST_ADDR) {
+			if (lnk->alias_addr.s_addr == alias_addr.s_addr
+			    && lnk->alias_port == alias_port
+			    && lnk->link_type == link_type
+			    && lnk->dst_port == dst_port) {
+				if (lnk_unknown_dst_addr == NULL)
+					lnk_unknown_dst_addr = lnk;
+			}
+		} else if (flags & LINK_UNKNOWN_DEST_PORT) {
+			if (lnk->alias_addr.s_addr == alias_addr.s_addr
+			    && lnk->alias_port == alias_port
+			    && lnk->link_type == link_type
+			    && lnk->dst_addr.s_addr == dst_addr.s_addr) {
+				if (lnk_unknown_dst_port == NULL)
+					lnk_unknown_dst_port = lnk;
+			}
+		}
+	}
+
+
+
+	if (lnk_fully_specified != NULL) {
+		lnk_fully_specified->timestamp = la->timeStamp;
+		lnk = lnk_fully_specified;
+	} else if (lnk_unknown_dst_port != NULL)
+		lnk = lnk_unknown_dst_port;
+	else if (lnk_unknown_dst_addr != NULL)
+		lnk = lnk_unknown_dst_addr;
+	else if (lnk_unknown_all != NULL)
+		lnk = lnk_unknown_all;
+	else
+		return (NULL);
+
+	if (replace_partial_links &&
+	    (lnk->flags & LINK_PARTIALLY_SPECIFIED || lnk->server != NULL)) {
+		struct in_addr src_addr;
+		u_short src_port;
+
+		if (lnk->server != NULL) {	/* LSNAT link */
+			src_addr = lnk->server->addr;
+			src_port = lnk->server->port;
+			lnk->server = lnk->server->next;
+		} else {
+			src_addr = lnk->src_addr;
+			src_port = lnk->src_port;
+		}
+
+		if (link_type == LINK_SCTP) {
+		  lnk->src_addr = src_addr;
+		  lnk->src_port = src_port;
+		  return(lnk);
+		}
+		lnk = ReLink(lnk,
+		    src_addr, dst_addr, alias_addr,
+		    src_port, dst_port, alias_port,
+		    link_type);
+	}
+	return (lnk);
+}
+
+static struct alias_link *
+FindLinkIn(struct libalias *la, struct in_addr dst_addr,
+    struct in_addr alias_addr,
+    u_short dst_port,
+    u_short alias_port,
+    int link_type,
+    int replace_partial_links)
+{
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	lnk = _FindLinkIn(la, dst_addr, alias_addr, dst_port, alias_port,
+	    link_type, replace_partial_links);
+
+	if (lnk == NULL) {
+		/*
+		 * The following allows permanent links to be specified as
+		 * using the default aliasing address (i.e. device
+		 * interface address) without knowing in advance what that
+		 * address is.
+		 */
+		if (la->aliasAddress.s_addr != INADDR_ANY &&
+		    alias_addr.s_addr == la->aliasAddress.s_addr) {
+			lnk = _FindLinkIn(la, dst_addr, la->nullAddress, dst_port, alias_port,
+			    link_type, replace_partial_links);
+		}
+	}
+	return (lnk);
+}
+
+
+
+
+/* External routines for finding/adding links
+
+-- "external" means outside alias_db.c, but within alias*.c --
+
+    FindIcmpIn(), FindIcmpOut()
+    FindFragmentIn1(), FindFragmentIn2()
+    AddFragmentPtrLink(), FindFragmentPtr()
+    FindProtoIn(), FindProtoOut()
+    FindUdpTcpIn(), FindUdpTcpOut()
+    AddPptp(), FindPptpOutByCallId(), FindPptpInByCallId(),
+    FindPptpOutByPeerCallId(), FindPptpInByPeerCallId()
+    FindOriginalAddress(), FindAliasAddress()
+
+(prototypes in alias_local.h)
+*/
+
+
+struct alias_link *
+FindIcmpIn(struct libalias *la, struct in_addr dst_addr,
+    struct in_addr alias_addr,
+    u_short id_alias,
+    int create)
+{
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	lnk = FindLinkIn(la, dst_addr, alias_addr,
+	    NO_DEST_PORT, id_alias,
+	    LINK_ICMP, 0);
+	if (lnk == NULL && create && !(la->packetAliasMode & PKT_ALIAS_DENY_INCOMING)) {
+		struct in_addr target_addr;
+
+		target_addr = FindOriginalAddress(la, alias_addr);
+		lnk = AddLink(la, target_addr, dst_addr, alias_addr,
+		    id_alias, NO_DEST_PORT, id_alias,
+		    LINK_ICMP);
+	}
+	return (lnk);
+}
+
+
+struct alias_link *
+FindIcmpOut(struct libalias *la, struct in_addr src_addr,
+    struct in_addr dst_addr,
+    u_short id,
+    int create)
+{
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	lnk = FindLinkOut(la, src_addr, dst_addr,
+	    id, NO_DEST_PORT,
+	    LINK_ICMP, 0);
+	if (lnk == NULL && create) {
+		struct in_addr alias_addr;
+
+		alias_addr = FindAliasAddress(la, src_addr);
+		lnk = AddLink(la, src_addr, dst_addr, alias_addr,
+		    id, NO_DEST_PORT, GET_ALIAS_ID,
+		    LINK_ICMP);
+	}
+	return (lnk);
+}
+
+
+struct alias_link *
+FindFragmentIn1(struct libalias *la, struct in_addr dst_addr,
+    struct in_addr alias_addr,
+    u_short ip_id)
+{
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	lnk = FindLinkIn(la, dst_addr, alias_addr,
+	    NO_DEST_PORT, ip_id,
+	    LINK_FRAGMENT_ID, 0);
+
+	if (lnk == NULL) {
+		lnk = AddLink(la, la->nullAddress, dst_addr, alias_addr,
+		    NO_SRC_PORT, NO_DEST_PORT, ip_id,
+		    LINK_FRAGMENT_ID);
+	}
+	return (lnk);
+}
+
+
+struct alias_link *
+FindFragmentIn2(struct libalias *la, struct in_addr dst_addr,	/* Doesn't add a link if
+								 * one */
+    struct in_addr alias_addr,	/* is not found.           */
+    u_short ip_id)
+{
+
+	LIBALIAS_LOCK_ASSERT(la);
+	return FindLinkIn(la, dst_addr, alias_addr,
+	    NO_DEST_PORT, ip_id,
+	    LINK_FRAGMENT_ID, 0);
+}
+
+
+struct alias_link *
+AddFragmentPtrLink(struct libalias *la, struct in_addr dst_addr,
+    u_short ip_id)
+{
+
+	LIBALIAS_LOCK_ASSERT(la);
+	return AddLink(la, la->nullAddress, dst_addr, la->nullAddress,
+	    NO_SRC_PORT, NO_DEST_PORT, ip_id,
+	    LINK_FRAGMENT_PTR);
+}
+
+
+struct alias_link *
+FindFragmentPtr(struct libalias *la, struct in_addr dst_addr,
+    u_short ip_id)
+{
+
+	LIBALIAS_LOCK_ASSERT(la);
+	return FindLinkIn(la, dst_addr, la->nullAddress,
+	    NO_DEST_PORT, ip_id,
+	    LINK_FRAGMENT_PTR, 0);
+}
+
+
+struct alias_link *
+FindProtoIn(struct libalias *la, struct in_addr dst_addr,
+    struct in_addr alias_addr,
+    u_char proto)
+{
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	lnk = FindLinkIn(la, dst_addr, alias_addr,
+	    NO_DEST_PORT, 0,
+	    proto, 1);
+
+	if (lnk == NULL && !(la->packetAliasMode & PKT_ALIAS_DENY_INCOMING)) {
+		struct in_addr target_addr;
+
+		target_addr = FindOriginalAddress(la, alias_addr);
+		lnk = AddLink(la, target_addr, dst_addr, alias_addr,
+		    NO_SRC_PORT, NO_DEST_PORT, 0,
+		    proto);
+	}
+	return (lnk);
+}
+
+
+struct alias_link *
+FindProtoOut(struct libalias *la, struct in_addr src_addr,
+    struct in_addr dst_addr,
+    u_char proto)
+{
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	lnk = FindLinkOut(la, src_addr, dst_addr,
+	    NO_SRC_PORT, NO_DEST_PORT,
+	    proto, 1);
+
+	if (lnk == NULL) {
+		struct in_addr alias_addr;
+
+		alias_addr = FindAliasAddress(la, src_addr);
+		lnk = AddLink(la, src_addr, dst_addr, alias_addr,
+		    NO_SRC_PORT, NO_DEST_PORT, 0,
+		    proto);
+	}
+	return (lnk);
+}
+
+
+struct alias_link *
+FindUdpTcpIn(struct libalias *la, struct in_addr dst_addr,
+    struct in_addr alias_addr,
+    u_short dst_port,
+    u_short alias_port,
+    u_char proto,
+    int create)
+{
+	int link_type;
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	switch (proto) {
+	case IPPROTO_UDP:
+		link_type = LINK_UDP;
+		break;
+	case IPPROTO_TCP:
+		link_type = LINK_TCP;
+		break;
+	default:
+		return (NULL);
+		break;
+	}
+
+	lnk = FindLinkIn(la, dst_addr, alias_addr,
+	    dst_port, alias_port,
+	    link_type, create);
+
+	if (lnk == NULL && create && !(la->packetAliasMode & PKT_ALIAS_DENY_INCOMING)) {
+		struct in_addr target_addr;
+
+		target_addr = FindOriginalAddress(la, alias_addr);
+		lnk = AddLink(la, target_addr, dst_addr, alias_addr,
+		    alias_port, dst_port, alias_port,
+		    link_type);
+	}
+	return (lnk);
+}
+
+
+struct alias_link *
+FindUdpTcpOut(struct libalias *la, struct in_addr src_addr,
+    struct in_addr dst_addr,
+    u_short src_port,
+    u_short dst_port,
+    u_char proto,
+    int create)
+{
+	int link_type;
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	switch (proto) {
+	case IPPROTO_UDP:
+		link_type = LINK_UDP;
+		break;
+	case IPPROTO_TCP:
+		link_type = LINK_TCP;
+		break;
+	default:
+		return (NULL);
+		break;
+	}
+
+	lnk = FindLinkOut(la, src_addr, dst_addr, src_port, dst_port, link_type, create);
+
+	if (lnk == NULL && create) {
+		struct in_addr alias_addr;
+
+		alias_addr = FindAliasAddress(la, src_addr);
+		lnk = AddLink(la, src_addr, dst_addr, alias_addr,
+		    src_port, dst_port, GET_ALIAS_PORT,
+		    link_type);
+	}
+	return (lnk);
+}
+
+
+struct alias_link *
+AddPptp(struct libalias *la, struct in_addr src_addr,
+    struct in_addr dst_addr,
+    struct in_addr alias_addr,
+    u_int16_t src_call_id)
+{
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	lnk = AddLink(la, src_addr, dst_addr, alias_addr,
+	    src_call_id, 0, GET_ALIAS_PORT,
+	    LINK_PPTP);
+
+	return (lnk);
+}
+
+
+struct alias_link *
+FindPptpOutByCallId(struct libalias *la, struct in_addr src_addr,
+    struct in_addr dst_addr,
+    u_int16_t src_call_id)
+{
+	u_int i;
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	i = StartPointOut(src_addr, dst_addr, 0, 0, LINK_PPTP);
+	LIST_FOREACH(lnk, &la->linkTableOut[i], list_out)
+	    if (lnk->link_type == LINK_PPTP &&
+	    lnk->src_addr.s_addr == src_addr.s_addr &&
+	    lnk->dst_addr.s_addr == dst_addr.s_addr &&
+	    lnk->src_port == src_call_id)
+		break;
+
+	return (lnk);
+}
+
+
+struct alias_link *
+FindPptpOutByPeerCallId(struct libalias *la, struct in_addr src_addr,
+    struct in_addr dst_addr,
+    u_int16_t dst_call_id)
+{
+	u_int i;
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	i = StartPointOut(src_addr, dst_addr, 0, 0, LINK_PPTP);
+	LIST_FOREACH(lnk, &la->linkTableOut[i], list_out)
+	    if (lnk->link_type == LINK_PPTP &&
+	    lnk->src_addr.s_addr == src_addr.s_addr &&
+	    lnk->dst_addr.s_addr == dst_addr.s_addr &&
+	    lnk->dst_port == dst_call_id)
+		break;
+
+	return (lnk);
+}
+
+
+struct alias_link *
+FindPptpInByCallId(struct libalias *la, struct in_addr dst_addr,
+    struct in_addr alias_addr,
+    u_int16_t dst_call_id)
+{
+	u_int i;
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	i = StartPointIn(alias_addr, 0, LINK_PPTP);
+	LIST_FOREACH(lnk, &la->linkTableIn[i], list_in)
+	    if (lnk->link_type == LINK_PPTP &&
+	    lnk->dst_addr.s_addr == dst_addr.s_addr &&
+	    lnk->alias_addr.s_addr == alias_addr.s_addr &&
+	    lnk->dst_port == dst_call_id)
+		break;
+
+	return (lnk);
+}
+
+
+struct alias_link *
+FindPptpInByPeerCallId(struct libalias *la, struct in_addr dst_addr,
+    struct in_addr alias_addr,
+    u_int16_t alias_call_id)
+{
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	lnk = FindLinkIn(la, dst_addr, alias_addr,
+	    0 /* any */ , alias_call_id,
+	    LINK_PPTP, 0);
+
+
+	return (lnk);
+}
+
+
+struct alias_link *
+FindRtspOut(struct libalias *la, struct in_addr src_addr,
+    struct in_addr dst_addr,
+    u_short src_port,
+    u_short alias_port,
+    u_char proto)
+{
+	int link_type;
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	switch (proto) {
+	case IPPROTO_UDP:
+		link_type = LINK_UDP;
+		break;
+	case IPPROTO_TCP:
+		link_type = LINK_TCP;
+		break;
+	default:
+		return (NULL);
+		break;
+	}
+
+	lnk = FindLinkOut(la, src_addr, dst_addr, src_port, 0, link_type, 1);
+
+	if (lnk == NULL) {
+		struct in_addr alias_addr;
+
+		alias_addr = FindAliasAddress(la, src_addr);
+		lnk = AddLink(la, src_addr, dst_addr, alias_addr,
+		    src_port, 0, alias_port,
+		    link_type);
+	}
+	return (lnk);
+}
+
+
+struct in_addr
+FindOriginalAddress(struct libalias *la, struct in_addr alias_addr)
+{
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	lnk = FindLinkIn(la, la->nullAddress, alias_addr,
+	    0, 0, LINK_ADDR, 0);
+	if (lnk == NULL) {
+		la->newDefaultLink = 1;
+		if (la->targetAddress.s_addr == INADDR_ANY)
+			return (alias_addr);
+		else if (la->targetAddress.s_addr == INADDR_NONE)
+			return (la->aliasAddress.s_addr != INADDR_ANY) ?
+			    la->aliasAddress : alias_addr;
+		else
+			return (la->targetAddress);
+	} else {
+		if (lnk->server != NULL) {	/* LSNAT link */
+			struct in_addr src_addr;
+
+			src_addr = lnk->server->addr;
+			lnk->server = lnk->server->next;
+			return (src_addr);
+		} else if (lnk->src_addr.s_addr == INADDR_ANY)
+			return (la->aliasAddress.s_addr != INADDR_ANY) ?
+			    la->aliasAddress : alias_addr;
+		else
+			return (lnk->src_addr);
+	}
+}
+
+
+struct in_addr
+FindAliasAddress(struct libalias *la, struct in_addr original_addr)
+{
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	lnk = FindLinkOut(la, original_addr, la->nullAddress,
+	    0, 0, LINK_ADDR, 0);
+	if (lnk == NULL) {
+		return (la->aliasAddress.s_addr != INADDR_ANY) ?
+		    la->aliasAddress : original_addr;
+	} else {
+		if (lnk->alias_addr.s_addr == INADDR_ANY)
+			return (la->aliasAddress.s_addr != INADDR_ANY) ?
+			    la->aliasAddress : original_addr;
+		else
+			return (lnk->alias_addr);
+	}
+}
+
+
+/* External routines for getting or changing link data
+   (external to alias_db.c, but internal to alias*.c)
+
+    SetFragmentData(), GetFragmentData()
+    SetFragmentPtr(), GetFragmentPtr()
+    SetStateIn(), SetStateOut(), GetStateIn(), GetStateOut()
+    GetOriginalAddress(), GetDestAddress(), GetAliasAddress()
+    GetOriginalPort(), GetAliasPort()
+    SetAckModified(), GetAckModified()
+    GetDeltaAckIn(), GetDeltaSeqOut(), AddSeq()
+    SetProtocolFlags(), GetProtocolFlags()
+    SetDestCallId()
+*/
+
+
+void
+SetFragmentAddr(struct alias_link *lnk, struct in_addr src_addr)
+{
+	lnk->data.frag_addr = src_addr;
+}
+
+
+void
+GetFragmentAddr(struct alias_link *lnk, struct in_addr *src_addr)
+{
+	*src_addr = lnk->data.frag_addr;
+}
+
+
+void
+SetFragmentPtr(struct alias_link *lnk, char *fptr)
+{
+	lnk->data.frag_ptr = fptr;
+}
+
+
+void
+GetFragmentPtr(struct alias_link *lnk, char **fptr)
+{
+	*fptr = lnk->data.frag_ptr;
+}
+
+
+void
+SetStateIn(struct alias_link *lnk, int state)
+{
+	/* TCP input state */
+	switch (state) {
+		case ALIAS_TCP_STATE_DISCONNECTED:
+		if (lnk->data.tcp->state.out != ALIAS_TCP_STATE_CONNECTED)
+			lnk->expire_time = TCP_EXPIRE_DEAD;
+		else
+			lnk->expire_time = TCP_EXPIRE_SINGLEDEAD;
+		break;
+	case ALIAS_TCP_STATE_CONNECTED:
+		if (lnk->data.tcp->state.out == ALIAS_TCP_STATE_CONNECTED)
+			lnk->expire_time = TCP_EXPIRE_CONNECTED;
+		break;
+	default:
+#ifdef	_KERNEL
+		panic("libalias:SetStateIn() unknown state");
+#else
+		abort();
+#endif
+	}
+	lnk->data.tcp->state.in = state;
+}
+
+
+void
+SetStateOut(struct alias_link *lnk, int state)
+{
+	/* TCP output state */
+	switch (state) {
+		case ALIAS_TCP_STATE_DISCONNECTED:
+		if (lnk->data.tcp->state.in != ALIAS_TCP_STATE_CONNECTED)
+			lnk->expire_time = TCP_EXPIRE_DEAD;
+		else
+			lnk->expire_time = TCP_EXPIRE_SINGLEDEAD;
+		break;
+	case ALIAS_TCP_STATE_CONNECTED:
+		if (lnk->data.tcp->state.in == ALIAS_TCP_STATE_CONNECTED)
+			lnk->expire_time = TCP_EXPIRE_CONNECTED;
+		break;
+	default:
+#ifdef	_KERNEL
+		panic("libalias:SetStateOut() unknown state");
+#else
+		abort();
+#endif
+	}
+	lnk->data.tcp->state.out = state;
+}
+
+
+int
+GetStateIn(struct alias_link *lnk)
+{
+	/* TCP input state */
+	return (lnk->data.tcp->state.in);
+}
+
+
+int
+GetStateOut(struct alias_link *lnk)
+{
+	/* TCP output state */
+	return (lnk->data.tcp->state.out);
+}
+
+
+struct in_addr
+GetOriginalAddress(struct alias_link *lnk)
+{
+	if (lnk->src_addr.s_addr == INADDR_ANY)
+		return (lnk->la->aliasAddress);
+	else
+		return (lnk->src_addr);
+}
+
+
+struct in_addr
+GetDestAddress(struct alias_link *lnk)
+{
+	return (lnk->dst_addr);
+}
+
+
+struct in_addr
+GetAliasAddress(struct alias_link *lnk)
+{
+	if (lnk->alias_addr.s_addr == INADDR_ANY)
+		return (lnk->la->aliasAddress);
+	else
+		return (lnk->alias_addr);
+}
+
+
+struct in_addr
+GetDefaultAliasAddress(struct libalias *la)
+{
+
+	LIBALIAS_LOCK_ASSERT(la);
+	return (la->aliasAddress);
+}
+
+
+void
+SetDefaultAliasAddress(struct libalias *la, struct in_addr alias_addr)
+{
+
+	LIBALIAS_LOCK_ASSERT(la);
+	la->aliasAddress = alias_addr;
+}
+
+
+u_short
+GetOriginalPort(struct alias_link *lnk)
+{
+	return (lnk->src_port);
+}
+
+
+u_short
+GetAliasPort(struct alias_link *lnk)
+{
+	return (lnk->alias_port);
+}
+
+#ifndef NO_FW_PUNCH
+static		u_short
+GetDestPort(struct alias_link *lnk)
+{
+	return (lnk->dst_port);
+}
+
+#endif
+
+void
+SetAckModified(struct alias_link *lnk)
+{
+/* Indicate that ACK numbers have been modified in a TCP connection */
+	lnk->data.tcp->state.ack_modified = 1;
+}
+
+
+struct in_addr
+GetProxyAddress(struct alias_link *lnk)
+{
+	return (lnk->proxy_addr);
+}
+
+
+void
+SetProxyAddress(struct alias_link *lnk, struct in_addr addr)
+{
+	lnk->proxy_addr = addr;
+}
+
+
+u_short
+GetProxyPort(struct alias_link *lnk)
+{
+	return (lnk->proxy_port);
+}
+
+
+void
+SetProxyPort(struct alias_link *lnk, u_short port)
+{
+	lnk->proxy_port = port;
+}
+
+
+int
+GetAckModified(struct alias_link *lnk)
+{
+/* See if ACK numbers have been modified */
+	return (lnk->data.tcp->state.ack_modified);
+}
+
+// XXX ip free
+int
+GetDeltaAckIn(u_long ack, struct alias_link *lnk)
+{
+/*
+Find out how much the ACK number has been altered for an incoming
+TCP packet.  To do this, a circular list of ACK numbers where the TCP
+packet size was altered is searched.
+*/
+
+	int i;
+	int delta, ack_diff_min;
+
+	delta = 0;
+	ack_diff_min = -1;
+	for (i = 0; i < N_LINK_TCP_DATA; i++) {
+		struct ack_data_record x;
+
+		x = lnk->data.tcp->ack[i];
+		if (x.active == 1) {
+			int ack_diff;
+
+			ack_diff = SeqDiff(x.ack_new, ack);
+			if (ack_diff >= 0) {
+				if (ack_diff_min >= 0) {
+					if (ack_diff < ack_diff_min) {
+						delta = x.delta;
+						ack_diff_min = ack_diff;
+					}
+				} else {
+					delta = x.delta;
+					ack_diff_min = ack_diff;
+				}
+			}
+		}
+	}
+	return (delta);
+}
+
+// XXX ip free
+int
+GetDeltaSeqOut(u_long seq, struct alias_link *lnk)
+{
+/*
+Find out how much the sequence number has been altered for an outgoing
+TCP packet.  To do this, a circular list of ACK numbers where the TCP
+packet size was altered is searched.
+*/
+
+	int i;
+	int delta, seq_diff_min;
+
+	delta = 0;
+	seq_diff_min = -1;
+	for (i = 0; i < N_LINK_TCP_DATA; i++) {
+		struct ack_data_record x;
+
+		x = lnk->data.tcp->ack[i];
+		if (x.active == 1) {
+			int seq_diff;
+
+			seq_diff = SeqDiff(x.ack_old, seq);
+			if (seq_diff >= 0) {
+				if (seq_diff_min >= 0) {
+					if (seq_diff < seq_diff_min) {
+						delta = x.delta;
+						seq_diff_min = seq_diff;
+					}
+				} else {
+					delta = x.delta;
+					seq_diff_min = seq_diff;
+				}
+			}
+		}
+	}
+	return (delta);
+}
+
+// XXX ip free
+void
+AddSeq(struct alias_link *lnk, int delta, u_int ip_hl, u_short ip_len,
+    u_long th_seq, u_int th_off)
+{
+/*
+When a TCP packet has been altered in length, save this
+information in a circular list.  If enough packets have
+been altered, then this list will begin to overwrite itself.
+*/
+
+	struct ack_data_record x;
+	int hlen, tlen, dlen;
+	int i;
+
+	hlen = (ip_hl + th_off) << 2;
+	tlen = ntohs(ip_len);
+	dlen = tlen - hlen;
+
+	x.ack_old = htonl(ntohl(th_seq) + dlen);
+	x.ack_new = htonl(ntohl(th_seq) + dlen + delta);
+	x.delta = delta;
+	x.active = 1;
+
+	i = lnk->data.tcp->state.index;
+	lnk->data.tcp->ack[i] = x;
+
+	i++;
+	if (i == N_LINK_TCP_DATA)
+		lnk->data.tcp->state.index = 0;
+	else
+		lnk->data.tcp->state.index = i;
+}
+
+void
+SetExpire(struct alias_link *lnk, int expire)
+{
+	if (expire == 0) {
+		lnk->flags &= ~LINK_PERMANENT;
+		DeleteLink(lnk);
+	} else if (expire == -1) {
+		lnk->flags |= LINK_PERMANENT;
+	} else if (expire > 0) {
+		lnk->expire_time = expire;
+	} else {
+#ifdef LIBALIAS_DEBUG
+		fprintf(stderr, "PacketAlias/SetExpire(): ");
+		fprintf(stderr, "error in expire parameter\n");
+#endif
+	}
+}
+
+void
+ClearCheckNewLink(struct libalias *la)
+{
+
+	LIBALIAS_LOCK_ASSERT(la);
+	la->newDefaultLink = 0;
+}
+
+void
+SetProtocolFlags(struct alias_link *lnk, int pflags)
+{
+
+	lnk->pflags = pflags;
+}
+
+int
+GetProtocolFlags(struct alias_link *lnk)
+{
+
+	return (lnk->pflags);
+}
+
+void
+SetDestCallId(struct alias_link *lnk, u_int16_t cid)
+{
+	struct libalias *la = lnk->la;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	la->deleteAllLinks = 1;
+	ReLink(lnk, lnk->src_addr, lnk->dst_addr, lnk->alias_addr,
+	    lnk->src_port, cid, lnk->alias_port, lnk->link_type);
+	la->deleteAllLinks = 0;
+}
+
+
+/* Miscellaneous Functions
+
+    HouseKeeping()
+    InitPacketAliasLog()
+    UninitPacketAliasLog()
+*/
+
+/*
+    Whenever an outgoing or incoming packet is handled, HouseKeeping()
+    is called to find and remove timed-out aliasing links.  Logic exists
+    to sweep through the entire table and linked list structure
+    every 60 seconds.
+
+    (prototype in alias_local.h)
+*/
+
+void
+HouseKeeping(struct libalias *la)
+{
+	int i, n;
+#ifndef	_KERNEL
+	struct timeval tv;
+	struct timezone tz;
+#endif
+
+	LIBALIAS_LOCK_ASSERT(la);
+	/*
+	 * Save system time (seconds) in global variable timeStamp for use
+	 * by other functions. This is done so as not to unnecessarily
+	 * waste timeline by making system calls.
+	 */
+#ifdef	_KERNEL
+	la->timeStamp = time_uptime;
+#else
+	gettimeofday(&tv, &tz);
+	la->timeStamp = tv.tv_sec;
+#endif
+
+	/* Compute number of spokes (output table link chains) to cover */
+	n = LINK_TABLE_OUT_SIZE * (la->timeStamp - la->lastCleanupTime);
+	n /= ALIAS_CLEANUP_INTERVAL_SECS;
+
+	/* Handle different cases */
+	if (n > 0) {
+		if (n > ALIAS_CLEANUP_MAX_SPOKES)
+			n = ALIAS_CLEANUP_MAX_SPOKES;
+		la->lastCleanupTime = la->timeStamp;
+		for (i = 0; i < n; i++)
+			IncrementalCleanup(la);
+	} else if (n < 0) {
+#ifdef LIBALIAS_DEBUG
+		fprintf(stderr, "PacketAlias/HouseKeeping(): ");
+		fprintf(stderr, "something unexpected in time values\n");
+#endif
+		la->lastCleanupTime = la->timeStamp;
+	}
+}
+
+/* Init the log file and enable logging */
+static int
+InitPacketAliasLog(struct libalias *la)
+{
+
+	LIBALIAS_LOCK_ASSERT(la);
+	if (~la->packetAliasMode & PKT_ALIAS_LOG) {
+#ifdef _KERNEL
+		if ((la->logDesc = malloc(LIBALIAS_BUF_SIZE)))
+			;
+#else
+		if ((la->logDesc = fopen("/var/log/alias.log", "w")))
+			fprintf(la->logDesc, "PacketAlias/InitPacketAliasLog: Packet alias logging enabled.\n");
+#endif
+		else
+			return (ENOMEM); /* log initialization failed */
+		la->packetAliasMode |= PKT_ALIAS_LOG;
+	}
+
+	return (1);
+}
+
+/* Close the log-file and disable logging. */
+static void
+UninitPacketAliasLog(struct libalias *la)
+{
+
+	LIBALIAS_LOCK_ASSERT(la);
+	if (la->logDesc) {
+#ifdef _KERNEL
+		free(la->logDesc);
+#else
+		fclose(la->logDesc);
+#endif
+		la->logDesc = NULL;
+	}
+	la->packetAliasMode &= ~PKT_ALIAS_LOG;
+}
+
+/* Outside world interfaces
+
+-- "outside world" means other than alias*.c routines --
+
+    PacketAliasRedirectPort()
+    PacketAliasAddServer()
+    PacketAliasRedirectProto()
+    PacketAliasRedirectAddr()
+    PacketAliasRedirectDynamic()
+    PacketAliasRedirectDelete()
+    PacketAliasSetAddress()
+    PacketAliasInit()
+    PacketAliasUninit()
+    PacketAliasSetMode()
+
+(prototypes in alias.h)
+*/
+
+/* Redirection from a specific public addr:port to a
+   private addr:port */
+struct alias_link *
+LibAliasRedirectPort(struct libalias *la, struct in_addr src_addr, u_short src_port,
+    struct in_addr dst_addr, u_short dst_port,
+    struct in_addr alias_addr, u_short alias_port,
+    u_char proto)
+{
+	int link_type;
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK(la);
+	switch (proto) {
+	case IPPROTO_UDP:
+		link_type = LINK_UDP;
+		break;
+	case IPPROTO_TCP:
+		link_type = LINK_TCP;
+		break;
+	case IPPROTO_SCTP:
+		link_type = LINK_SCTP;
+		break;
+	default:
+#ifdef LIBALIAS_DEBUG
+		fprintf(stderr, "PacketAliasRedirectPort(): ");
+		fprintf(stderr, "only SCTP, TCP and UDP protocols allowed\n");
+#endif
+		lnk = NULL;
+		goto getout;
+	}
+
+	lnk = AddLink(la, src_addr, dst_addr, alias_addr,
+	    src_port, dst_port, alias_port,
+	    link_type);
+
+	if (lnk != NULL) {
+		lnk->flags |= LINK_PERMANENT;
+	}
+#ifdef LIBALIAS_DEBUG
+	else {
+		fprintf(stderr, "PacketAliasRedirectPort(): "
+		    "call to AddLink() failed\n");
+	}
+#endif
+
+getout:
+	LIBALIAS_UNLOCK(la);
+	return (lnk);
+}
+
+/* Add server to the pool of servers */
+int
+LibAliasAddServer(struct libalias *la, struct alias_link *lnk, struct in_addr addr, u_short port)
+{
+	struct server *server;
+	int res;
+
+	LIBALIAS_LOCK(la);
+	(void)la;
+
+	server = malloc(sizeof(struct server));
+
+	if (server != NULL) {
+		struct server *head;
+
+		server->addr = addr;
+		server->port = port;
+
+		head = lnk->server;
+		if (head == NULL)
+			server->next = server;
+		else {
+			struct server *s;
+
+			for (s = head; s->next != head; s = s->next);
+			s->next = server;
+			server->next = head;
+		}
+		lnk->server = server;
+		res = 0;
+	} else
+		res = -1;
+
+	LIBALIAS_UNLOCK(la);
+	return (res);
+}
+
+/* Redirect packets of a given IP protocol from a specific
+   public address to a private address */
+struct alias_link *
+LibAliasRedirectProto(struct libalias *la, struct in_addr src_addr,
+    struct in_addr dst_addr,
+    struct in_addr alias_addr,
+    u_char proto)
+{
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK(la);
+	lnk = AddLink(la, src_addr, dst_addr, alias_addr,
+	    NO_SRC_PORT, NO_DEST_PORT, 0,
+	    proto);
+
+	if (lnk != NULL) {
+		lnk->flags |= LINK_PERMANENT;
+	}
+#ifdef LIBALIAS_DEBUG
+	else {
+		fprintf(stderr, "PacketAliasRedirectProto(): "
+		    "call to AddLink() failed\n");
+	}
+#endif
+
+	LIBALIAS_UNLOCK(la);
+	return (lnk);
+}
+
+/* Static address translation */
+struct alias_link *
+LibAliasRedirectAddr(struct libalias *la, struct in_addr src_addr,
+    struct in_addr alias_addr)
+{
+	struct alias_link *lnk;
+
+	LIBALIAS_LOCK(la);
+	lnk = AddLink(la, src_addr, la->nullAddress, alias_addr,
+	    0, 0, 0,
+	    LINK_ADDR);
+
+	if (lnk != NULL) {
+		lnk->flags |= LINK_PERMANENT;
+	}
+#ifdef LIBALIAS_DEBUG
+	else {
+		fprintf(stderr, "PacketAliasRedirectAddr(): "
+		    "call to AddLink() failed\n");
+	}
+#endif
+
+	LIBALIAS_UNLOCK(la);
+	return (lnk);
+}
+
+
+/* Mark the aliasing link dynamic */
+int
+LibAliasRedirectDynamic(struct libalias *la, struct alias_link *lnk)
+{
+	int res;
+
+	LIBALIAS_LOCK(la);
+	(void)la;
+
+	if (lnk->flags & LINK_PARTIALLY_SPECIFIED)
+		res = -1;
+	else {
+		lnk->flags &= ~LINK_PERMANENT;
+		res = 0;
+	}
+	LIBALIAS_UNLOCK(la);
+	return (res);
+}
+
+
+void
+LibAliasRedirectDelete(struct libalias *la, struct alias_link *lnk)
+{
+/* This is a dangerous function to put in the API,
+   because an invalid pointer can crash the program. */
+
+	LIBALIAS_LOCK(la);
+	la->deleteAllLinks = 1;
+	DeleteLink(lnk);
+	la->deleteAllLinks = 0;
+	LIBALIAS_UNLOCK(la);
+}
+
+
+void
+LibAliasSetAddress(struct libalias *la, struct in_addr addr)
+{
+
+	LIBALIAS_LOCK(la);
+	if (la->packetAliasMode & PKT_ALIAS_RESET_ON_ADDR_CHANGE
+	    && la->aliasAddress.s_addr != addr.s_addr)
+		CleanupAliasData(la);
+
+	la->aliasAddress = addr;
+	LIBALIAS_UNLOCK(la);
+}
+
+
+void
+LibAliasSetTarget(struct libalias *la, struct in_addr target_addr)
+{
+
+	LIBALIAS_LOCK(la);
+	la->targetAddress = target_addr;
+	LIBALIAS_UNLOCK(la);
+}
+
+static void
+finishoff(void)
+{
+
+	while (!LIST_EMPTY(&instancehead))
+		LibAliasUninit(LIST_FIRST(&instancehead));
+}
+
+struct libalias *
+LibAliasInit(struct libalias *la)
+{
+	int i;
+#ifndef	_KERNEL
+	struct timeval tv;
+	struct timezone tz;
+#endif
+
+	if (la == NULL) {
+		la = calloc(sizeof *la, 1);
+		if (la == NULL)
+			return (la);
+
+#ifndef	_KERNEL		/* kernel cleans up on module unload */
+		if (LIST_EMPTY(&instancehead))
+			atexit(finishoff);
+#endif
+		LIST_INSERT_HEAD(&instancehead, la, instancelist);
+
+#ifdef	_KERNEL
+		la->timeStamp = time_uptime;
+		la->lastCleanupTime = time_uptime;
+#else
+		gettimeofday(&tv, &tz);
+		la->timeStamp = tv.tv_sec;
+		la->lastCleanupTime = tv.tv_sec;
+#endif
+
+		for (i = 0; i < LINK_TABLE_OUT_SIZE; i++)
+			LIST_INIT(&la->linkTableOut[i]);
+		for (i = 0; i < LINK_TABLE_IN_SIZE; i++)
+			LIST_INIT(&la->linkTableIn[i]);
+#ifdef _KERNEL
+		AliasSctpInit(la);
+#endif
+		LIBALIAS_LOCK_INIT(la);
+		LIBALIAS_LOCK(la);
+	} else {
+		LIBALIAS_LOCK(la);
+		la->deleteAllLinks = 1;
+		CleanupAliasData(la);
+		la->deleteAllLinks = 0;
+#ifdef _KERNEL
+		AliasSctpTerm(la);
+		AliasSctpInit(la);
+#endif
+	}
+
+	la->aliasAddress.s_addr = INADDR_ANY;
+	la->targetAddress.s_addr = INADDR_ANY;
+
+	la->icmpLinkCount = 0;
+	la->udpLinkCount = 0;
+	la->tcpLinkCount = 0;
+	la->sctpLinkCount = 0;
+	la->pptpLinkCount = 0;
+	la->protoLinkCount = 0;
+	la->fragmentIdLinkCount = 0;
+	la->fragmentPtrLinkCount = 0;
+	la->sockCount = 0;
+
+	la->cleanupIndex = 0;
+
+	la->packetAliasMode = PKT_ALIAS_SAME_PORTS
+#ifndef	NO_USE_SOCKETS
+	    | PKT_ALIAS_USE_SOCKETS
+#endif
+	    | PKT_ALIAS_RESET_ON_ADDR_CHANGE;
+#ifndef NO_FW_PUNCH
+	la->fireWallFD = -1;
+#endif
+#ifndef _KERNEL
+	LibAliasRefreshModules();
+#endif
+	LIBALIAS_UNLOCK(la);
+	return (la);
+}
+
+void
+LibAliasUninit(struct libalias *la)
+{
+
+	LIBALIAS_LOCK(la);
+#ifdef _KERNEL
+	AliasSctpTerm(la);
+#endif
+	la->deleteAllLinks = 1;
+	CleanupAliasData(la);
+	la->deleteAllLinks = 0;
+	UninitPacketAliasLog(la);
+#ifndef NO_FW_PUNCH
+	UninitPunchFW(la);
+#endif
+	LIST_REMOVE(la, instancelist);
+	LIBALIAS_UNLOCK(la);
+	LIBALIAS_LOCK_DESTROY(la);
+	free(la);
+}
+
+/* Change mode for some operations */
+unsigned int
+LibAliasSetMode(
+    struct libalias *la,
+    unsigned int flags,		/* Which state to bring flags to */
+    unsigned int mask		/* Mask of which flags to affect (use 0 to
+				 * do a probe for flag values) */
+)
+{
+	int res = -1;
+
+	LIBALIAS_LOCK(la);
+/* Enable logging? */
+	if (flags & mask & PKT_ALIAS_LOG) {
+		/* Do the enable */
+		if (InitPacketAliasLog(la) == ENOMEM)
+			goto getout;
+	} else
+/* _Disable_ logging? */
+	if (~flags & mask & PKT_ALIAS_LOG) {
+		UninitPacketAliasLog(la);
+	}
+#ifndef NO_FW_PUNCH
+/* Start punching holes in the firewall? */
+	if (flags & mask & PKT_ALIAS_PUNCH_FW) {
+		InitPunchFW(la);
+	} else
+/* Stop punching holes in the firewall? */
+	if (~flags & mask & PKT_ALIAS_PUNCH_FW) {
+		UninitPunchFW(la);
+	}
+#endif
+
+/* Other flags can be set/cleared without special action */
+	la->packetAliasMode = (flags & mask) | (la->packetAliasMode & ~mask);
+	res = la->packetAliasMode;
+getout:
+	LIBALIAS_UNLOCK(la);
+	return (res);
+}
+
+
+int
+LibAliasCheckNewLink(struct libalias *la)
+{
+	int res;
+
+	LIBALIAS_LOCK(la);
+	res = la->newDefaultLink;
+	LIBALIAS_UNLOCK(la);
+	return (res);
+}
+
+
+#ifndef NO_FW_PUNCH
+
+/*****************
+  Code to support firewall punching.  This shouldn't really be in this
+  file, but making variables global is evil too.
+  ****************/
+
+/* Firewall include files */
+#include <freebsd/net/if.h>
+#include <freebsd/netinet/ip_fw.h>
+#include <freebsd/string.h>
+#include <freebsd/err.h>
+
+/*
+ * helper function, updates the pointer to cmd with the length
+ * of the current command, and also cleans up the first word of
+ * the new command in case it has been clobbered before.
+ */
+static ipfw_insn *
+next_cmd(ipfw_insn * cmd)
+{
+	cmd += F_LEN(cmd);
+	bzero(cmd, sizeof(*cmd));
+	return (cmd);
+}
+
+/*
+ * A function to fill simple commands of size 1.
+ * Existing flags are preserved.
+ */
+static ipfw_insn *
+fill_cmd(ipfw_insn * cmd, enum ipfw_opcodes opcode, int size,
+    int flags, u_int16_t arg)
+{
+	cmd->opcode = opcode;
+	cmd->len = ((cmd->len | flags) & (F_NOT | F_OR)) | (size & F_LEN_MASK);
+	cmd->arg1 = arg;
+	return next_cmd(cmd);
+}
+
+static ipfw_insn *
+fill_ip(ipfw_insn * cmd1, enum ipfw_opcodes opcode, u_int32_t addr)
+{
+	ipfw_insn_ip *cmd = (ipfw_insn_ip *) cmd1;
+
+	cmd->addr.s_addr = addr;
+	return fill_cmd(cmd1, opcode, F_INSN_SIZE(ipfw_insn_u32), 0, 0);
+}
+
+static ipfw_insn *
+fill_one_port(ipfw_insn * cmd1, enum ipfw_opcodes opcode, u_int16_t port)
+{
+	ipfw_insn_u16 *cmd = (ipfw_insn_u16 *) cmd1;
+
+	cmd->ports[0] = cmd->ports[1] = port;
+	return fill_cmd(cmd1, opcode, F_INSN_SIZE(ipfw_insn_u16), 0, 0);
+}
+
+static int
+fill_rule(void *buf, int bufsize, int rulenum,
+    enum ipfw_opcodes action, int proto,
+    struct in_addr sa, u_int16_t sp, struct in_addr da, u_int16_t dp)
+{
+	struct ip_fw *rule = (struct ip_fw *)buf;
+	ipfw_insn *cmd = (ipfw_insn *) rule->cmd;
+
+	bzero(buf, bufsize);
+	rule->rulenum = rulenum;
+
+	cmd = fill_cmd(cmd, O_PROTO, F_INSN_SIZE(ipfw_insn), 0, proto);
+	cmd = fill_ip(cmd, O_IP_SRC, sa.s_addr);
+	cmd = fill_one_port(cmd, O_IP_SRCPORT, sp);
+	cmd = fill_ip(cmd, O_IP_DST, da.s_addr);
+	cmd = fill_one_port(cmd, O_IP_DSTPORT, dp);
+
+	rule->act_ofs = (u_int32_t *) cmd - (u_int32_t *) rule->cmd;
+	cmd = fill_cmd(cmd, action, F_INSN_SIZE(ipfw_insn), 0, 0);
+
+	rule->cmd_len = (u_int32_t *) cmd - (u_int32_t *) rule->cmd;
+
+	return ((char *)cmd - (char *)buf);
+}
+
+static void	ClearAllFWHoles(struct libalias *la);
+
+
+#define fw_setfield(la, field, num)                         \
+do {                                                    \
+    (field)[(num) - la->fireWallBaseNum] = 1;               \
+} /*lint -save -e717 */ while(0)/* lint -restore */
+
+#define fw_clrfield(la, field, num)                         \
+do {                                                    \
+    (field)[(num) - la->fireWallBaseNum] = 0;               \
+} /*lint -save -e717 */ while(0)/* lint -restore */
+
+#define fw_tstfield(la, field, num) ((field)[(num) - la->fireWallBaseNum])
+
+static void
+InitPunchFW(struct libalias *la)
+{
+
+	LIBALIAS_LOCK_ASSERT(la);
+	la->fireWallField = malloc(la->fireWallNumNums);
+	if (la->fireWallField) {
+		memset(la->fireWallField, 0, la->fireWallNumNums);
+		if (la->fireWallFD < 0) {
+			la->fireWallFD = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
+		}
+		ClearAllFWHoles(la);
+		la->fireWallActiveNum = la->fireWallBaseNum;
+	}
+}
+
+static void
+UninitPunchFW(struct libalias *la)
+{
+
+	LIBALIAS_LOCK_ASSERT(la);
+	ClearAllFWHoles(la);
+	if (la->fireWallFD >= 0)
+		close(la->fireWallFD);
+	la->fireWallFD = -1;
+	if (la->fireWallField)
+		free(la->fireWallField);
+	la->fireWallField = NULL;
+	la->packetAliasMode &= ~PKT_ALIAS_PUNCH_FW;
+}
+
+/* Make a certain link go through the firewall */
+void
+PunchFWHole(struct alias_link *lnk)
+{
+	struct libalias *la;
+	int r;			/* Result code */
+	struct ip_fw rule;	/* On-the-fly built rule */
+	int fwhole;		/* Where to punch hole */
+
+	LIBALIAS_LOCK_ASSERT(la);
+	la = lnk->la;
+
+/* Don't do anything unless we are asked to */
+	if (!(la->packetAliasMode & PKT_ALIAS_PUNCH_FW) ||
+	    la->fireWallFD < 0 ||
+	    lnk->link_type != LINK_TCP)
+		return;
+
+	memset(&rule, 0, sizeof rule);
+
+/** Build rule **/
+
+	/* Find empty slot */
+	for (fwhole = la->fireWallActiveNum;
+	    fwhole < la->fireWallBaseNum + la->fireWallNumNums &&
+	    fw_tstfield(la, la->fireWallField, fwhole);
+	    fwhole++);
+	if (fwhole == la->fireWallBaseNum + la->fireWallNumNums) {
+		for (fwhole = la->fireWallBaseNum;
+		    fwhole < la->fireWallActiveNum &&
+		    fw_tstfield(la, la->fireWallField, fwhole);
+		    fwhole++);
+		if (fwhole == la->fireWallActiveNum) {
+			/* No rule point empty - we can't punch more holes. */
+			la->fireWallActiveNum = la->fireWallBaseNum;
+#ifdef LIBALIAS_DEBUG
+			fprintf(stderr, "libalias: Unable to create firewall hole!\n");
+#endif
+			return;
+		}
+	}
+	/* Start next search at next position */
+	la->fireWallActiveNum = fwhole + 1;
+
+	/*
+	 * generate two rules of the form
+	 *
+	 * add fwhole accept tcp from OAddr OPort to DAddr DPort add fwhole
+	 * accept tcp from DAddr DPort to OAddr OPort
+	 */
+	if (GetOriginalPort(lnk) != 0 && GetDestPort(lnk) != 0) {
+		u_int32_t rulebuf[255];
+		int i;
+
+		i = fill_rule(rulebuf, sizeof(rulebuf), fwhole,
+		    O_ACCEPT, IPPROTO_TCP,
+		    GetOriginalAddress(lnk), ntohs(GetOriginalPort(lnk)),
+		    GetDestAddress(lnk), ntohs(GetDestPort(lnk)));
+		r = setsockopt(la->fireWallFD, IPPROTO_IP, IP_FW_ADD, rulebuf, i);
+		if (r)
+			err(1, "alias punch inbound(1) setsockopt(IP_FW_ADD)");
+
+		i = fill_rule(rulebuf, sizeof(rulebuf), fwhole,
+		    O_ACCEPT, IPPROTO_TCP,
+		    GetDestAddress(lnk), ntohs(GetDestPort(lnk)),
+		    GetOriginalAddress(lnk), ntohs(GetOriginalPort(lnk)));
+		r = setsockopt(la->fireWallFD, IPPROTO_IP, IP_FW_ADD, rulebuf, i);
+		if (r)
+			err(1, "alias punch inbound(2) setsockopt(IP_FW_ADD)");
+	}
+
+/* Indicate hole applied */
+	lnk->data.tcp->fwhole = fwhole;
+	fw_setfield(la, la->fireWallField, fwhole);
+}
+
+/* Remove a hole in a firewall associated with a particular alias
+   lnk.  Calling this too often is harmless. */
+static void
+ClearFWHole(struct alias_link *lnk)
+{
+	struct libalias *la;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	la = lnk->la;
+	if (lnk->link_type == LINK_TCP) {
+		int fwhole = lnk->data.tcp->fwhole;	/* Where is the firewall
+							 * hole? */
+		struct ip_fw rule;
+
+		if (fwhole < 0)
+			return;
+
+		memset(&rule, 0, sizeof rule);	/* useless for ipfw2 */
+		while (!setsockopt(la->fireWallFD, IPPROTO_IP, IP_FW_DEL,
+		    &fwhole, sizeof fwhole));
+		fw_clrfield(la, la->fireWallField, fwhole);
+		lnk->data.tcp->fwhole = -1;
+	}
+}
+
+/* Clear out the entire range dedicated to firewall holes. */
+static void
+ClearAllFWHoles(struct libalias *la)
+{
+	struct ip_fw rule;	/* On-the-fly built rule */
+	int i;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	if (la->fireWallFD < 0)
+		return;
+
+	memset(&rule, 0, sizeof rule);
+	for (i = la->fireWallBaseNum; i < la->fireWallBaseNum + la->fireWallNumNums; i++) {
+		int r = i;
+
+		while (!setsockopt(la->fireWallFD, IPPROTO_IP, IP_FW_DEL, &r, sizeof r));
+	}
+	/* XXX: third arg correct here ? /phk */
+	memset(la->fireWallField, 0, la->fireWallNumNums);
+}
+
+#endif
+
+void
+LibAliasSetFWBase(struct libalias *la, unsigned int base, unsigned int num)
+{
+
+	LIBALIAS_LOCK(la);
+#ifndef NO_FW_PUNCH
+	la->fireWallBaseNum = base;
+	la->fireWallNumNums = num;
+#endif
+	LIBALIAS_UNLOCK(la);
+}
+
+void
+LibAliasSetSkinnyPort(struct libalias *la, unsigned int port)
+{
+
+	LIBALIAS_LOCK(la);
+	la->skinnyPort = port;
+	LIBALIAS_UNLOCK(la);
+}
+
+/*
+ * Find the address to redirect incoming packets
+ */
+struct in_addr
+FindSctpRedirectAddress(struct libalias *la,  struct sctp_nat_msg *sm)
+{
+	struct alias_link *lnk;
+	struct in_addr redir;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	lnk = FindLinkIn(la, sm->ip_hdr->ip_src, sm->ip_hdr->ip_dst,
+	    sm->sctp_hdr->dest_port,sm->sctp_hdr->dest_port, LINK_SCTP, 1);
+	if (lnk != NULL) {
+		return(lnk->src_addr); /* port redirect */
+	} else {
+		redir = FindOriginalAddress(la,sm->ip_hdr->ip_dst);
+		if (redir.s_addr == la->aliasAddress.s_addr ||
+		    redir.s_addr == la->targetAddress.s_addr) { /* No address found */
+			lnk = FindLinkIn(la, sm->ip_hdr->ip_src, sm->ip_hdr->ip_dst,
+			    NO_DEST_PORT, 0, LINK_SCTP, 1);
+			if (lnk != NULL)
+				return(lnk->src_addr); /* redirect proto */
+		}
+		return(redir); /* address redirect */
+	}
+}
diff --git a/freebsd/sys/netinet/libalias/alias_dummy.c b/freebsd/sys/netinet/libalias/alias_dummy.c
new file mode 100644
index 00000000..c5a316d4
--- /dev/null
+++ b/freebsd/sys/netinet/libalias/alias_dummy.c
@@ -0,0 +1,155 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2005 Paolo Pisati <piso@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/* 
+ * Alias_dummy is just an empty skeleton used to demostrate how to write
+ * a module for libalias, that will run unalterated in userland or in
+ * kernel land.
+ */
+
+#ifdef _KERNEL
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/module.h>
+#else
+#include <freebsd/errno.h>
+#include <freebsd/sys/types.h>
+#include <freebsd/stdio.h>
+#endif
+
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/udp.h>
+
+#ifdef _KERNEL
+#include <freebsd/netinet/libalias/alias_local.h>
+#include <freebsd/netinet/libalias/alias_mod.h>
+#else
+#include <freebsd/local/alias_local.h>
+#include <freebsd/local/alias_mod.h>
+#endif
+
+static void
+AliasHandleDummy(struct libalias *la, struct ip *ip, struct alias_data *ah);
+
+static int 
+fingerprint(struct libalias *la, struct alias_data *ah)
+{
+
+	/* 
+	 * Check here all the data that will be used later, if any field 
+	 * is empy/NULL, return a -1 value.
+	 */
+	if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL || 
+		ah->maxpktsize == 0)
+		return (-1);
+	/* 
+	 * Fingerprint the incoming packet, if it matches any conditions 
+	 * return an OK value.
+	 */
+	if (ntohs(*ah->dport) == 123
+	    || ntohs(*ah->sport) == 456)
+		return (0); /* I know how to handle it. */
+	return (-1); /* I don't recognize this packet. */
+}
+
+/* 
+ * Wrap in this general purpose function, the real function used to alias the 
+ * packets.
+ */
+
+static int 
+protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+	AliasHandleDummy(la, pip, ah);
+	return (0);
+}
+
+/* 
+ * NOTA BENE: the next variable MUST NOT be renamed in any case if you want 
+ * your module to work in userland, cause it's used to find and use all 
+ * the protocol handlers present in every module.
+ * So WATCH OUT, your module needs this variables and it needs it with 
+ * ITS EXACT NAME: handlers.
+ */
+
+struct proto_handler handlers [] = {
+	{ 
+	  .pri = 666, 
+	  .dir = IN|OUT, 
+	  .proto = UDP|TCP, 
+	  .fingerprint = &fingerprint, 
+	  .protohandler = &protohandler
+	}, 
+	{ EOH }
+};
+
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+	int error;
+
+	switch (type) {	  
+	case MOD_LOAD:
+		error = 0;
+		LibAliasAttachHandlers(handlers);
+		break;
+	case MOD_UNLOAD:
+		error = 0;
+		LibAliasDetachHandlers(handlers);
+		break;
+	default:
+		error = EINVAL;
+	}
+	return (error);
+}
+
+#ifdef _KERNEL
+static
+#endif
+moduledata_t alias_mod = {
+       "alias_dummy", mod_handler, NULL
+};
+
+#ifdef	_KERNEL
+DECLARE_MODULE(alias_dummy, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_dummy, 1);
+MODULE_DEPEND(alias_dummy, libalias, 1, 1, 1);
+#endif
+
+static void
+AliasHandleDummy(struct libalias *la, struct ip *ip, struct alias_data *ah)
+{
+	; /* Dummy. */
+}
+
diff --git a/freebsd/sys/netinet/libalias/alias_ftp.c b/freebsd/sys/netinet/libalias/alias_ftp.c
new file mode 100644
index 00000000..4e8b7177
--- /dev/null
+++ b/freebsd/sys/netinet/libalias/alias_ftp.c
@@ -0,0 +1,696 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2001 Charles Mott <cm@linktel.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+    Alias_ftp.c performs special processing for FTP sessions under
+    TCP.  Specifically, when a PORT/EPRT command from the client
+    side or 227/229 reply from the server is sent, it is intercepted
+    and modified.  The address is changed to the gateway machine
+    and an aliasing port is used.
+
+    For this routine to work, the message must fit entirely into a
+    single TCP packet.  This is typically the case, but exceptions
+    can easily be envisioned under the actual specifications.
+
+    Probably the most troubling aspect of the approach taken here is
+    that the new message will typically be a different length, and
+    this causes a certain amount of bookkeeping to keep track of the
+    changes of sequence and acknowledgment numbers, since the client
+    machine is totally unaware of the modification to the TCP stream.
+
+
+    References: RFC 959, RFC 2428.
+
+    Initial version:  August, 1996  (cjm)
+
+    Version 1.6
+	 Brian Somers and Martin Renters identified an IP checksum
+	 error for modified IP packets.
+
+    Version 1.7:  January 9, 1996 (cjm)
+	 Differential checksum computation for change
+	 in IP packet length.
+
+    Version 2.1:  May, 1997 (cjm)
+	 Very minor changes to conform with
+	 local/global/function naming conventions
+	 within the packet aliasing module.
+
+    Version 3.1:  May, 2000 (eds)
+	 Add support for passive mode, alias the 227 replies.
+
+    See HISTORY file for record of revisions.
+*/
+
+/* Includes */
+#ifdef _KERNEL
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/ctype.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/module.h>
+#else
+#include <freebsd/ctype.h>
+#include <freebsd/errno.h>
+#include <freebsd/sys/types.h>
+#include <freebsd/stdio.h>
+#include <freebsd/string.h>
+#endif
+
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/tcp.h>
+
+#ifdef _KERNEL
+#include <freebsd/netinet/libalias/alias.h>
+#include <freebsd/netinet/libalias/alias_local.h>
+#include <freebsd/netinet/libalias/alias_mod.h>
+#else
+#include <freebsd/local/alias_local.h>
+#include <freebsd/local/alias_mod.h>
+#endif
+
+#define FTP_CONTROL_PORT_NUMBER 21
+
+static void
+AliasHandleFtpOut(struct libalias *, struct ip *, struct alias_link *,	
+		  int maxpacketsize);
+
+static int 
+fingerprint(struct libalias *la, struct alias_data *ah)
+{
+
+	if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL || 
+		ah->maxpktsize == 0)
+		return (-1);
+	if (ntohs(*ah->dport) == FTP_CONTROL_PORT_NUMBER
+	    || ntohs(*ah->sport) == FTP_CONTROL_PORT_NUMBER)
+		return (0);
+	return (-1);
+}
+
+static int 
+protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+	AliasHandleFtpOut(la, pip, ah->lnk, ah->maxpktsize);
+	return (0);
+}
+
+struct proto_handler handlers[] = {
+	{ 
+	  .pri = 80, 
+	  .dir = OUT, 
+	  .proto = TCP, 
+	  .fingerprint = &fingerprint, 
+	  .protohandler = &protohandler
+	}, 
+	{ EOH }
+};
+
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+	int error;
+
+	switch (type) {	  
+	case MOD_LOAD:
+		error = 0;
+		LibAliasAttachHandlers(handlers);
+		break;
+	case MOD_UNLOAD:
+		error = 0;
+		LibAliasDetachHandlers(handlers);
+		break;
+	default:
+		error = EINVAL;
+	}
+	return (error);
+}
+
+#ifdef _KERNEL
+static
+#endif
+moduledata_t alias_mod = {
+       "alias_ftp", mod_handler, NULL
+};
+
+#ifdef	_KERNEL
+DECLARE_MODULE(alias_ftp, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_ftp, 1);
+MODULE_DEPEND(alias_ftp, libalias, 1, 1, 1);
+#endif
+
+#define FTP_CONTROL_PORT_NUMBER 21
+#define MAX_MESSAGE_SIZE	128
+
+/* FTP protocol flags. */
+#define WAIT_CRLF		0x01
+
+enum ftp_message_type {
+	FTP_PORT_COMMAND,
+	FTP_EPRT_COMMAND,
+	FTP_227_REPLY,
+	FTP_229_REPLY,
+	FTP_UNKNOWN_MESSAGE
+};
+
+static int	ParseFtpPortCommand(struct libalias *la, char *, int);
+static int	ParseFtpEprtCommand(struct libalias *la, char *, int);
+static int	ParseFtp227Reply(struct libalias *la, char *, int);
+static int	ParseFtp229Reply(struct libalias *la, char *, int);
+static void	NewFtpMessage(struct libalias *la, struct ip *, struct alias_link *, int, int);
+
+static void
+AliasHandleFtpOut(
+    struct libalias *la,
+    struct ip *pip,		/* IP packet to examine/patch */
+    struct alias_link *lnk,	/* The link to go through (aliased port) */
+    int maxpacketsize		/* The maximum size this packet can grow to
+	(including headers) */ )
+{
+	int hlen, tlen, dlen, pflags;
+	char *sptr;
+	struct tcphdr *tc;
+	int ftp_message_type;
+
+/* Calculate data length of TCP packet */
+	tc = (struct tcphdr *)ip_next(pip);
+	hlen = (pip->ip_hl + tc->th_off) << 2;
+	tlen = ntohs(pip->ip_len);
+	dlen = tlen - hlen;
+
+/* Place string pointer and beginning of data */
+	sptr = (char *)pip;
+	sptr += hlen;
+
+/*
+ * Check that data length is not too long and previous message was
+ * properly terminated with CRLF.
+ */
+	pflags = GetProtocolFlags(lnk);
+	if (dlen <= MAX_MESSAGE_SIZE && !(pflags & WAIT_CRLF)) {
+		ftp_message_type = FTP_UNKNOWN_MESSAGE;
+
+		if (ntohs(tc->th_dport) == FTP_CONTROL_PORT_NUMBER) {
+/*
+ * When aliasing a client, check for the PORT/EPRT command.
+ */
+			if (ParseFtpPortCommand(la, sptr, dlen))
+				ftp_message_type = FTP_PORT_COMMAND;
+			else if (ParseFtpEprtCommand(la, sptr, dlen))
+				ftp_message_type = FTP_EPRT_COMMAND;
+		} else {
+/*
+ * When aliasing a server, check for the 227/229 reply.
+ */
+			if (ParseFtp227Reply(la, sptr, dlen))
+				ftp_message_type = FTP_227_REPLY;
+			else if (ParseFtp229Reply(la, sptr, dlen)) {
+				ftp_message_type = FTP_229_REPLY;
+				la->true_addr.s_addr = pip->ip_src.s_addr;
+			}
+		}
+
+		if (ftp_message_type != FTP_UNKNOWN_MESSAGE)
+			NewFtpMessage(la, pip, lnk, maxpacketsize, ftp_message_type);
+	}
+/* Track the msgs which are CRLF term'd for PORT/PASV FW breach */
+
+	if (dlen) {		/* only if there's data */
+		sptr = (char *)pip;	/* start over at beginning */
+		tlen = ntohs(pip->ip_len);	/* recalc tlen, pkt may
+						 * have grown */
+		if (sptr[tlen - 2] == '\r' && sptr[tlen - 1] == '\n')
+			pflags &= ~WAIT_CRLF;
+		else
+			pflags |= WAIT_CRLF;
+		SetProtocolFlags(lnk, pflags);
+	}
+}
+
+static int
+ParseFtpPortCommand(struct libalias *la, char *sptr, int dlen)
+{
+	char ch;
+	int i, state;
+	u_int32_t addr;
+	u_short port;
+	u_int8_t octet;
+
+	/* Format: "PORT A,D,D,R,PO,RT". */
+
+	/* Return if data length is too short. */
+	if (dlen < 18)
+		return (0);
+
+	if (strncasecmp("PORT ", sptr, 5))
+		return (0);
+
+	addr = port = octet = 0;
+	state = 0;
+	for (i = 5; i < dlen; i++) {
+		ch = sptr[i];
+		switch (state) {
+		case 0:
+			if (isspace(ch))
+				break;
+			else
+				state++;
+		case 1:
+		case 3:
+		case 5:
+		case 7:
+		case 9:
+		case 11:
+			if (isdigit(ch)) {
+				octet = ch - '0';
+				state++;
+			} else
+				return (0);
+			break;
+		case 2:
+		case 4:
+		case 6:
+		case 8:
+			if (isdigit(ch))
+				octet = 10 * octet + ch - '0';
+			else if (ch == ',') {
+				addr = (addr << 8) + octet;
+				state++;
+			} else
+				return (0);
+			break;
+		case 10:
+		case 12:
+			if (isdigit(ch))
+				octet = 10 * octet + ch - '0';
+			else if (ch == ',' || state == 12) {
+				port = (port << 8) + octet;
+				state++;
+			} else
+				return (0);
+			break;
+		}
+	}
+
+	if (state == 13) {
+		la->true_addr.s_addr = htonl(addr);
+		la->true_port = port;
+		return (1);
+	} else
+		return (0);
+}
+
+static int
+ParseFtpEprtCommand(struct libalias *la, char *sptr, int dlen)
+{
+	char ch, delim;
+	int i, state;
+	u_int32_t addr;
+	u_short port;
+	u_int8_t octet;
+
+	/* Format: "EPRT |1|A.D.D.R|PORT|". */
+
+	/* Return if data length is too short. */
+	if (dlen < 18)
+		return (0);
+
+	if (strncasecmp("EPRT ", sptr, 5))
+		return (0);
+
+	addr = port = octet = 0;
+	delim = '|';		/* XXX gcc -Wuninitialized */
+	state = 0;
+	for (i = 5; i < dlen; i++) {
+		ch = sptr[i];
+		switch (state) {
+		case 0:
+			if (!isspace(ch)) {
+				delim = ch;
+				state++;
+			}
+			break;
+		case 1:
+			if (ch == '1')	/* IPv4 address */
+				state++;
+			else
+				return (0);
+			break;
+		case 2:
+			if (ch == delim)
+				state++;
+			else
+				return (0);
+			break;
+		case 3:
+		case 5:
+		case 7:
+		case 9:
+			if (isdigit(ch)) {
+				octet = ch - '0';
+				state++;
+			} else
+				return (0);
+			break;
+		case 4:
+		case 6:
+		case 8:
+		case 10:
+			if (isdigit(ch))
+				octet = 10 * octet + ch - '0';
+			else if (ch == '.' || state == 10) {
+				addr = (addr << 8) + octet;
+				state++;
+			} else
+				return (0);
+			break;
+		case 11:
+			if (isdigit(ch)) {
+				port = ch - '0';
+				state++;
+			} else
+				return (0);
+			break;
+		case 12:
+			if (isdigit(ch))
+				port = 10 * port + ch - '0';
+			else if (ch == delim)
+				state++;
+			else
+				return (0);
+			break;
+		}
+	}
+
+	if (state == 13) {
+		la->true_addr.s_addr = htonl(addr);
+		la->true_port = port;
+		return (1);
+	} else
+		return (0);
+}
+
+static int
+ParseFtp227Reply(struct libalias *la, char *sptr, int dlen)
+{
+	char ch;
+	int i, state;
+	u_int32_t addr;
+	u_short port;
+	u_int8_t octet;
+
+	/* Format: "227 Entering Passive Mode (A,D,D,R,PO,RT)" */
+
+	/* Return if data length is too short. */
+	if (dlen < 17)
+		return (0);
+
+	if (strncmp("227 ", sptr, 4))
+		return (0);
+
+	addr = port = octet = 0;
+
+	state = 0;
+	for (i = 4; i < dlen; i++) {
+		ch = sptr[i];
+		switch (state) {
+		case 0:
+			if (ch == '(')
+				state++;
+			break;
+		case 1:
+		case 3:
+		case 5:
+		case 7:
+		case 9:
+		case 11:
+			if (isdigit(ch)) {
+				octet = ch - '0';
+				state++;
+			} else
+				return (0);
+			break;
+		case 2:
+		case 4:
+		case 6:
+		case 8:
+			if (isdigit(ch))
+				octet = 10 * octet + ch - '0';
+			else if (ch == ',') {
+				addr = (addr << 8) + octet;
+				state++;
+			} else
+				return (0);
+			break;
+		case 10:
+		case 12:
+			if (isdigit(ch))
+				octet = 10 * octet + ch - '0';
+			else if (ch == ',' || (state == 12 && ch == ')')) {
+				port = (port << 8) + octet;
+				state++;
+			} else
+				return (0);
+			break;
+		}
+	}
+
+	if (state == 13) {
+		la->true_port = port;
+		la->true_addr.s_addr = htonl(addr);
+		return (1);
+	} else
+		return (0);
+}
+
+static int
+ParseFtp229Reply(struct libalias *la, char *sptr, int dlen)
+{
+	char ch, delim;
+	int i, state;
+	u_short port;
+
+	/* Format: "229 Entering Extended Passive Mode (|||PORT|)" */
+
+	/* Return if data length is too short. */
+	if (dlen < 11)
+		return (0);
+
+	if (strncmp("229 ", sptr, 4))
+		return (0);
+
+	port = 0;
+	delim = '|';		/* XXX gcc -Wuninitialized */
+
+	state = 0;
+	for (i = 4; i < dlen; i++) {
+		ch = sptr[i];
+		switch (state) {
+		case 0:
+			if (ch == '(')
+				state++;
+			break;
+		case 1:
+			delim = ch;
+			state++;
+			break;
+		case 2:
+		case 3:
+			if (ch == delim)
+				state++;
+			else
+				return (0);
+			break;
+		case 4:
+			if (isdigit(ch)) {
+				port = ch - '0';
+				state++;
+			} else
+				return (0);
+			break;
+		case 5:
+			if (isdigit(ch))
+				port = 10 * port + ch - '0';
+			else if (ch == delim)
+				state++;
+			else
+				return (0);
+			break;
+		case 6:
+			if (ch == ')')
+				state++;
+			else
+				return (0);
+			break;
+		}
+	}
+
+	if (state == 7) {
+		la->true_port = port;
+		return (1);
+	} else
+		return (0);
+}
+
+static void
+NewFtpMessage(struct libalias *la, struct ip *pip,
+    struct alias_link *lnk,
+    int maxpacketsize,
+    int ftp_message_type)
+{
+	struct alias_link *ftp_lnk;
+
+/* Security checks. */
+	if (pip->ip_src.s_addr != la->true_addr.s_addr)
+		return;
+
+	if (la->true_port < IPPORT_RESERVED)
+		return;
+
+/* Establish link to address and port found in FTP control message. */
+	ftp_lnk = FindUdpTcpOut(la, la->true_addr, GetDestAddress(lnk),
+	    htons(la->true_port), 0, IPPROTO_TCP, 1);
+
+	if (ftp_lnk != NULL) {
+		int slen, hlen, tlen, dlen;
+		struct tcphdr *tc;
+
+#ifndef NO_FW_PUNCH
+		/* Punch hole in firewall */
+		PunchFWHole(ftp_lnk);
+#endif
+
+/* Calculate data length of TCP packet */
+		tc = (struct tcphdr *)ip_next(pip);
+		hlen = (pip->ip_hl + tc->th_off) << 2;
+		tlen = ntohs(pip->ip_len);
+		dlen = tlen - hlen;
+
+/* Create new FTP message. */
+		{
+			char stemp[MAX_MESSAGE_SIZE + 1];
+			char *sptr;
+			u_short alias_port;
+			u_char *ptr;
+			int a1, a2, a3, a4, p1, p2;
+			struct in_addr alias_address;
+
+/* Decompose alias address into quad format */
+			alias_address = GetAliasAddress(lnk);
+			ptr = (u_char *) & alias_address.s_addr;
+			a1 = *ptr++;
+			a2 = *ptr++;
+			a3 = *ptr++;
+			a4 = *ptr;
+
+			alias_port = GetAliasPort(ftp_lnk);
+
+/* Prepare new command */
+			switch (ftp_message_type) {
+			case FTP_PORT_COMMAND:
+			case FTP_227_REPLY:
+				/* Decompose alias port into pair format. */
+				ptr = (char *)&alias_port;
+				p1 = *ptr++;
+				p2 = *ptr;
+
+				if (ftp_message_type == FTP_PORT_COMMAND) {
+					/* Generate PORT command string. */
+					sprintf(stemp, "PORT %d,%d,%d,%d,%d,%d\r\n",
+					    a1, a2, a3, a4, p1, p2);
+				} else {
+					/* Generate 227 reply string. */
+					sprintf(stemp,
+					    "227 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n",
+					    a1, a2, a3, a4, p1, p2);
+				}
+				break;
+			case FTP_EPRT_COMMAND:
+				/* Generate EPRT command string. */
+				sprintf(stemp, "EPRT |1|%d.%d.%d.%d|%d|\r\n",
+				    a1, a2, a3, a4, ntohs(alias_port));
+				break;
+			case FTP_229_REPLY:
+				/* Generate 229 reply string. */
+				sprintf(stemp, "229 Entering Extended Passive Mode (|||%d|)\r\n",
+				    ntohs(alias_port));
+				break;
+			}
+
+/* Save string length for IP header modification */
+			slen = strlen(stemp);
+
+/* Copy modified buffer into IP packet. */
+			sptr = (char *)pip;
+			sptr += hlen;
+			strncpy(sptr, stemp, maxpacketsize - hlen);
+		}
+
+/* Save information regarding modified seq and ack numbers */
+		{
+			int delta;
+
+			SetAckModified(lnk);
+			tc = (struct tcphdr *)ip_next(pip);				
+			delta = GetDeltaSeqOut(tc->th_seq, lnk);
+			AddSeq(lnk, delta + slen - dlen, pip->ip_hl, 
+			    pip->ip_len, tc->th_seq, tc->th_off);
+		}
+
+/* Revise IP header */
+		{
+			u_short new_len;
+
+			new_len = htons(hlen + slen);
+			DifferentialChecksum(&pip->ip_sum,
+			    &new_len,
+			    &pip->ip_len,
+			    1);
+			pip->ip_len = new_len;
+		}
+
+/* Compute TCP checksum for revised packet */
+		tc->th_sum = 0;
+#ifdef _KERNEL
+		tc->th_x2 = 1;
+#else
+		tc->th_sum = TcpChecksum(pip);
+#endif
+	} else {
+#ifdef LIBALIAS_DEBUG
+		fprintf(stderr,
+		    "PacketAlias/HandleFtpOut: Cannot allocate FTP data port\n");
+#endif
+	}
+}
diff --git a/freebsd/sys/netinet/libalias/alias_irc.c b/freebsd/sys/netinet/libalias/alias_irc.c
new file mode 100644
index 00000000..05db0f4f
--- /dev/null
+++ b/freebsd/sys/netinet/libalias/alias_irc.c
@@ -0,0 +1,490 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2001 Charles Mott <cm@linktel.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/* Alias_irc.c intercepts packages contain IRC CTCP commands, and
+	changes DCC commands to export a port on the aliasing host instead
+	of an aliased host.
+
+    For this routine to work, the DCC command must fit entirely into a
+    single TCP packet.  This will usually happen, but is not
+    guaranteed.
+
+	 The interception is likely to change the length of the packet.
+	 The handling of this is copied more-or-less verbatim from
+	 ftp_alias.c
+
+	 Initial version: Eivind Eklund <perhaps@yes.no> (ee) 97-01-29
+
+	 Version 2.1:  May, 1997 (cjm)
+	     Very minor changes to conform with
+	     local/global/function naming conventions
+	     withing the packet alising module.
+*/
+
+/* Includes */
+#ifdef _KERNEL
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/ctype.h>
+#include <freebsd/sys/limits.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/module.h>
+#else
+#include <freebsd/ctype.h>
+#include <freebsd/errno.h>
+#include <freebsd/sys/types.h>
+#include <freebsd/stdio.h>
+#include <freebsd/stdlib.h>
+#include <freebsd/string.h>
+#include <freebsd/limits.h>
+#endif
+
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/tcp.h>
+
+#ifdef _KERNEL
+#include <freebsd/netinet/libalias/alias.h>
+#include <freebsd/netinet/libalias/alias_local.h>
+#include <freebsd/netinet/libalias/alias_mod.h>
+#else
+#include <freebsd/local/alias_local.h>
+#include <freebsd/local/alias_mod.h>
+#endif
+
+#define IRC_CONTROL_PORT_NUMBER_1 6667
+#define IRC_CONTROL_PORT_NUMBER_2 6668
+
+#define PKTSIZE (IP_MAXPACKET + 1)
+char *newpacket;
+
+/* Local defines */
+#define DBprintf(a)
+
+static void
+AliasHandleIrcOut(struct libalias *, struct ip *, struct alias_link *,
+		  int maxpacketsize);
+
+static int
+fingerprint(struct libalias *la, struct alias_data *ah)
+{
+
+	if (ah->dport == NULL || ah->dport == NULL || ah->lnk == NULL ||
+	    ah->maxpktsize == 0)
+		return (-1);
+	if (ntohs(*ah->dport) == IRC_CONTROL_PORT_NUMBER_1
+	    || ntohs(*ah->dport) == IRC_CONTROL_PORT_NUMBER_2)
+		return (0);
+	return (-1);
+}
+
+static int
+protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+	newpacket = malloc(PKTSIZE);
+	if (newpacket) {
+		AliasHandleIrcOut(la, pip, ah->lnk, ah->maxpktsize);
+		free(newpacket);
+	}
+	return (0);
+}
+
+struct proto_handler handlers[] = {
+	{
+	  .pri = 90,
+	  .dir = OUT,
+	  .proto = TCP,
+	  .fingerprint = &fingerprint,
+	  .protohandler = &protohandler
+	},
+	{ EOH }
+};
+
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+	int error;
+
+	switch (type) {
+	case MOD_LOAD:
+		error = 0;
+		LibAliasAttachHandlers(handlers);
+		break;
+	case MOD_UNLOAD:
+		error = 0;
+		LibAliasDetachHandlers(handlers);
+		break;
+	default:
+		error = EINVAL;
+	}
+	return (error);
+}
+
+#ifdef _KERNEL
+static
+#endif
+moduledata_t alias_mod = {
+       "alias_irc", mod_handler, NULL
+};
+
+/* Kernel module definition. */
+#ifdef	_KERNEL
+DECLARE_MODULE(alias_irc, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_irc, 1);
+MODULE_DEPEND(alias_irc, libalias, 1, 1, 1);
+#endif
+
+static void
+AliasHandleIrcOut(struct libalias *la,
+    struct ip *pip,		/* IP packet to examine */
+    struct alias_link *lnk,	/* Which link are we on? */
+    int maxsize			/* Maximum size of IP packet including
+				 * headers */
+)
+{
+	int hlen, tlen, dlen;
+	struct in_addr true_addr;
+	u_short true_port;
+	char *sptr;
+	struct tcphdr *tc;
+	int i;			/* Iterator through the source */
+
+/* Calculate data length of TCP packet */
+	tc = (struct tcphdr *)ip_next(pip);
+	hlen = (pip->ip_hl + tc->th_off) << 2;
+	tlen = ntohs(pip->ip_len);
+	dlen = tlen - hlen;
+
+	/*
+	 * Return if data length is too short - assume an entire PRIVMSG in
+	 * each packet.
+	 */
+	if (dlen < (int)sizeof(":A!a@n.n PRIVMSG A :aDCC 1 1a") - 1)
+		return;
+
+/* Place string pointer at beginning of data */
+	sptr = (char *)pip;
+	sptr += hlen;
+	maxsize -= hlen;	/* We're interested in maximum size of
+				 * data, not packet */
+
+	/* Search for a CTCP command [Note 1] */
+	for (i = 0; i < dlen; i++) {
+		if (sptr[i] == '\001')
+			goto lFOUND_CTCP;
+	}
+	return;			/* No CTCP commands in  */
+	/* Handle CTCP commands - the buffer may have to be copied */
+lFOUND_CTCP:
+	{
+		unsigned int copyat = i;
+		unsigned int iCopy = 0;	/* How much data have we written to
+					 * copy-back string? */
+		unsigned long org_addr;	/* Original IP address */
+		unsigned short org_port;	/* Original source port
+						 * address */
+
+lCTCP_START:
+		if (i >= dlen || iCopy >= PKTSIZE)
+			goto lPACKET_DONE;
+		newpacket[iCopy++] = sptr[i++];	/* Copy the CTCP start
+						 * character */
+		/* Start of a CTCP */
+		if (i + 4 >= dlen)	/* Too short for DCC */
+			goto lBAD_CTCP;
+		if (sptr[i + 0] != 'D')
+			goto lBAD_CTCP;
+		if (sptr[i + 1] != 'C')
+			goto lBAD_CTCP;
+		if (sptr[i + 2] != 'C')
+			goto lBAD_CTCP;
+		if (sptr[i + 3] != ' ')
+			goto lBAD_CTCP;
+		/* We have a DCC command - handle it! */
+		i += 4;		/* Skip "DCC " */
+		if (iCopy + 4 > PKTSIZE)
+			goto lPACKET_DONE;
+		newpacket[iCopy++] = 'D';
+		newpacket[iCopy++] = 'C';
+		newpacket[iCopy++] = 'C';
+		newpacket[iCopy++] = ' ';
+
+		DBprintf(("Found DCC\n"));
+		/*
+		 * Skip any extra spaces (should not occur according to
+		 * protocol, but DCC breaks CTCP protocol anyway
+		 */
+		while (sptr[i] == ' ') {
+			if (++i >= dlen) {
+				DBprintf(("DCC packet terminated in just spaces\n"));
+				goto lPACKET_DONE;
+			}
+		}
+
+		DBprintf(("Transferring command...\n"));
+		while (sptr[i] != ' ') {
+			newpacket[iCopy++] = sptr[i];
+			if (++i >= dlen || iCopy >= PKTSIZE) {
+				DBprintf(("DCC packet terminated during command\n"));
+				goto lPACKET_DONE;
+			}
+		}
+		/* Copy _one_ space */
+		if (i + 1 < dlen && iCopy < PKTSIZE)
+			newpacket[iCopy++] = sptr[i++];
+
+		DBprintf(("Done command - removing spaces\n"));
+		/*
+		 * Skip any extra spaces (should not occur according to
+		 * protocol, but DCC breaks CTCP protocol anyway
+		 */
+		while (sptr[i] == ' ') {
+			if (++i >= dlen) {
+				DBprintf(("DCC packet terminated in just spaces (post-command)\n"));
+				goto lPACKET_DONE;
+			}
+		}
+
+		DBprintf(("Transferring filename...\n"));
+		while (sptr[i] != ' ') {
+			newpacket[iCopy++] = sptr[i];
+			if (++i >= dlen || iCopy >= PKTSIZE) {
+				DBprintf(("DCC packet terminated during filename\n"));
+				goto lPACKET_DONE;
+			}
+		}
+		/* Copy _one_ space */
+		if (i + 1 < dlen && iCopy < PKTSIZE)
+			newpacket[iCopy++] = sptr[i++];
+
+		DBprintf(("Done filename - removing spaces\n"));
+		/*
+		 * Skip any extra spaces (should not occur according to
+		 * protocol, but DCC breaks CTCP protocol anyway
+		 */
+		while (sptr[i] == ' ') {
+			if (++i >= dlen) {
+				DBprintf(("DCC packet terminated in just spaces (post-filename)\n"));
+				goto lPACKET_DONE;
+			}
+		}
+
+		DBprintf(("Fetching IP address\n"));
+		/* Fetch IP address */
+		org_addr = 0;
+		while (i < dlen && isdigit(sptr[i])) {
+			if (org_addr > ULONG_MAX / 10UL) {	/* Terminate on overflow */
+				DBprintf(("DCC Address overflow (org_addr == 0x%08lx, next char %c\n", org_addr, sptr[i]));
+				goto lBAD_CTCP;
+			}
+			org_addr *= 10;
+			org_addr += sptr[i++] - '0';
+		}
+		DBprintf(("Skipping space\n"));
+		if (i + 1 >= dlen || sptr[i] != ' ') {
+			DBprintf(("Overflow (%d >= %d) or bad character (%02x) terminating IP address\n", i + 1, dlen, sptr[i]));
+			goto lBAD_CTCP;
+		}
+		/*
+		 * Skip any extra spaces (should not occur according to
+		 * protocol, but DCC breaks CTCP protocol anyway, so we
+		 * might as well play it safe
+		 */
+		while (sptr[i] == ' ') {
+			if (++i >= dlen) {
+				DBprintf(("Packet failure - space overflow.\n"));
+				goto lPACKET_DONE;
+			}
+		}
+		DBprintf(("Fetching port number\n"));
+		/* Fetch source port */
+		org_port = 0;
+		while (i < dlen && isdigit(sptr[i])) {
+			if (org_port > 6554) {	/* Terminate on overflow
+						 * (65536/10 rounded up */
+				DBprintf(("DCC: port number overflow\n"));
+				goto lBAD_CTCP;
+			}
+			org_port *= 10;
+			org_port += sptr[i++] - '0';
+		}
+		/* Skip illegal addresses (or early termination) */
+		if (i >= dlen || (sptr[i] != '\001' && sptr[i] != ' ')) {
+			DBprintf(("Bad port termination\n"));
+			goto lBAD_CTCP;
+		}
+		DBprintf(("Got IP %lu and port %u\n", org_addr, (unsigned)org_port));
+
+		/* We've got the address and port - now alias it */
+		{
+			struct alias_link *dcc_lnk;
+			struct in_addr destaddr;
+
+
+			true_port = htons(org_port);
+			true_addr.s_addr = htonl(org_addr);
+			destaddr.s_addr = 0;
+
+			/* Sanity/Security checking */
+			if (!org_addr || !org_port ||
+			    pip->ip_src.s_addr != true_addr.s_addr ||
+			    org_port < IPPORT_RESERVED)
+				goto lBAD_CTCP;
+
+			/*
+			 * Steal the FTP_DATA_PORT - it doesn't really
+			 * matter, and this would probably allow it through
+			 * at least _some_ firewalls.
+			 */
+			dcc_lnk = FindUdpTcpOut(la, true_addr, destaddr,
+			    true_port, 0,
+			    IPPROTO_TCP, 1);
+			DBprintf(("Got a DCC link\n"));
+			if (dcc_lnk) {
+				struct in_addr alias_address;	/* Address from aliasing */
+				u_short alias_port;	/* Port given by
+							 * aliasing */
+				int n;
+
+#ifndef NO_FW_PUNCH
+				/* Generate firewall hole as appropriate */
+				PunchFWHole(dcc_lnk);
+#endif
+
+				alias_address = GetAliasAddress(lnk);
+				n = snprintf(&newpacket[iCopy],
+				    PKTSIZE - iCopy,
+				    "%lu ", (u_long) htonl(alias_address.s_addr));
+				if (n < 0) {
+					DBprintf(("DCC packet construct failure.\n"));
+					goto lBAD_CTCP;
+				}
+				if ((iCopy += n) >= PKTSIZE) {	/* Truncated/fit exactly
+										 * - bad news */
+					DBprintf(("DCC constructed packet overflow.\n"));
+					goto lBAD_CTCP;
+				}
+				alias_port = GetAliasPort(dcc_lnk);
+				n = snprintf(&newpacket[iCopy],
+				    PKTSIZE - iCopy,
+				    "%u", htons(alias_port));
+				if (n < 0) {
+					DBprintf(("DCC packet construct failure.\n"));
+					goto lBAD_CTCP;
+				}
+				iCopy += n;
+				/*
+				 * Done - truncated cases will be taken
+				 * care of by lBAD_CTCP
+				 */
+				DBprintf(("Aliased IP %lu and port %u\n", alias_address.s_addr, (unsigned)alias_port));
+			}
+		}
+		/*
+		 * An uninteresting CTCP - state entered right after '\001'
+		 * has been pushed.  Also used to copy the rest of a DCC,
+		 * after IP address and port has been handled
+		 */
+lBAD_CTCP:
+		for (; i < dlen && iCopy < PKTSIZE; i++, iCopy++) {
+			newpacket[iCopy] = sptr[i];	/* Copy CTCP unchanged */
+			if (sptr[i] == '\001') {
+				goto lNORMAL_TEXT;
+			}
+		}
+		goto lPACKET_DONE;
+		/* Normal text */
+lNORMAL_TEXT:
+		for (; i < dlen && iCopy < PKTSIZE; i++, iCopy++) {
+			newpacket[iCopy] = sptr[i];	/* Copy CTCP unchanged */
+			if (sptr[i] == '\001') {
+				goto lCTCP_START;
+			}
+		}
+		/* Handle the end of a packet */
+lPACKET_DONE:
+		iCopy = iCopy > maxsize - copyat ? maxsize - copyat : iCopy;
+		memcpy(sptr + copyat, newpacket, iCopy);
+
+/* Save information regarding modified seq and ack numbers */
+		{
+			int delta;
+
+			SetAckModified(lnk);
+			tc = (struct tcphdr *)ip_next(pip);
+			delta = GetDeltaSeqOut(tc->th_seq, lnk);
+			AddSeq(lnk, delta + copyat + iCopy - dlen, pip->ip_hl,
+			    pip->ip_len, tc->th_seq, tc->th_off);
+		}
+
+		/* Revise IP header */
+		{
+			u_short new_len;
+
+			new_len = htons(hlen + iCopy + copyat);
+			DifferentialChecksum(&pip->ip_sum,
+			    &new_len,
+			    &pip->ip_len,
+			    1);
+			pip->ip_len = new_len;
+		}
+
+		/* Compute TCP checksum for revised packet */
+		tc->th_sum = 0;
+#ifdef _KERNEL
+		tc->th_x2 = 1;
+#else
+		tc->th_sum = TcpChecksum(pip);
+#endif
+		return;
+	}
+}
+
+/* Notes:
+	[Note 1]
+	The initial search will most often fail; it could be replaced with a 32-bit specific search.
+	Such a search would be done for 32-bit unsigned value V:
+	V ^= 0x01010101;				  (Search is for null bytes)
+	if( ((V-0x01010101)^V) & 0x80808080 ) {
+     (found a null bytes which was a 01 byte)
+	}
+   To assert that the processor is 32-bits, do
+   extern int ircdccar[32];        (32 bits)
+   extern int ircdccar[CHAR_BIT*sizeof(unsigned int)];
+   which will generate a type-error on all but 32-bit machines.
+
+	[Note 2] This routine really ought to be replaced with one that
+	creates a transparent proxy on the aliasing host, to allow arbitary
+	changes in the TCP stream.  This should not be too difficult given
+	this base;  I (ee) will try to do this some time later.
+	*/
diff --git a/freebsd/sys/netinet/libalias/alias_local.h b/freebsd/sys/netinet/libalias/alias_local.h
new file mode 100644
index 00000000..e24ece49
--- /dev/null
+++ b/freebsd/sys/netinet/libalias/alias_local.h
@@ -0,0 +1,397 @@
+/*-
+ * Copyright (c) 2001 Charles Mott <cm@linktel.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Alias_local.h contains the function prototypes for alias.c,
+ * alias_db.c, alias_util.c and alias_ftp.c, alias_irc.c (as well
+ * as any future add-ons).  It also includes macros, globals and
+ * struct definitions shared by more than one alias*.c file.
+ *
+ * This include file is intended to be used only within the aliasing
+ * software.  Outside world interfaces are defined in alias.h
+ *
+ * This software is placed into the public domain with no restrictions
+ * on its distribution.
+ *
+ * Initial version:  August, 1996  (cjm)
+ *
+ * <updated several times by original author and Eivind Eklund>
+ */
+
+#ifndef _ALIAS_LOCAL_HH_
+#define	_ALIAS_LOCAL_HH_
+
+#include <freebsd/sys/types.h>
+#include <freebsd/sys/sysctl.h>
+
+#ifdef _KERNEL
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/mutex.h>
+
+/* XXX: LibAliasSetTarget() uses this constant. */
+#define	INADDR_NONE	0xffffffff
+
+#include <freebsd/netinet/libalias/alias_sctp.h>
+#else
+#include <freebsd/local/alias_sctp.h>
+#endif
+
+/* Sizes of input and output link tables */
+#define LINK_TABLE_OUT_SIZE        4001
+#define LINK_TABLE_IN_SIZE         4001
+
+struct proxy_entry;
+
+struct libalias {
+	LIST_ENTRY(libalias) instancelist;
+
+	int		packetAliasMode;	/* Mode flags                      */
+	/* - documented in alias.h  */
+
+	struct in_addr	aliasAddress;	/* Address written onto source     */
+	/* field of IP packet.           */
+
+	struct in_addr	targetAddress;	/* IP address incoming packets     */
+	/* are sent to if no aliasing    */
+	/* link already exists           */
+
+	struct in_addr	nullAddress;	/* Used as a dummy parameter for   */
+	/* some function calls           */
+
+			LIST_HEAD     (, alias_link) linkTableOut[LINK_TABLE_OUT_SIZE];
+	/* Lookup table of pointers to     */
+	/* chains of link records. Each  */
+
+			LIST_HEAD     (, alias_link) linkTableIn[LINK_TABLE_IN_SIZE];
+	/* link record is doubly indexed */
+	/* into input and output lookup  */
+	/* tables.                       */
+
+	/* Link statistics                 */
+	int		icmpLinkCount;
+	int		udpLinkCount;
+	int		tcpLinkCount;
+	int		pptpLinkCount;
+	int		protoLinkCount;
+	int		fragmentIdLinkCount;
+	int		fragmentPtrLinkCount;
+	int		sockCount;
+
+	int		cleanupIndex;	/* Index to chain of link table    */
+	/* being inspected for old links   */
+
+	int		timeStamp;	/* System time in seconds for      */
+	/* current packet                  */
+
+	int		lastCleanupTime;	/* Last time
+						 * IncrementalCleanup()  */
+	/* was called                      */
+
+	int		deleteAllLinks;	/* If equal to zero, DeleteLink()  */
+	/* will not remove permanent links */
+	
+	/* log descriptor        */ 
+#ifdef  _KERNEL
+	char           *logDesc;        
+#else 
+	FILE           *logDesc;	
+#endif
+	/* statistics monitoring */
+
+	int		newDefaultLink;	/* Indicates if a new aliasing     */
+	/* link has been created after a   */
+	/* call to PacketAliasIn/Out().    */
+
+#ifndef NO_FW_PUNCH
+	int		fireWallFD;	/* File descriptor to be able to   */
+	/* control firewall.  Opened by    */
+	/* PacketAliasSetMode on first     */
+	/* setting the PKT_ALIAS_PUNCH_FW  */
+	/* flag.                           */
+	int		fireWallBaseNum;	/* The first firewall entry
+						 * free for our use */
+	int		fireWallNumNums;	/* How many entries can we
+						 * use? */
+	int		fireWallActiveNum;	/* Which entry did we last
+						 * use? */
+	char           *fireWallField;	/* bool array for entries */
+#endif
+
+	unsigned int	skinnyPort;	/* TCP port used by the Skinny     */
+	/* protocol.                       */
+
+	struct proxy_entry *proxyList;
+
+	struct in_addr	true_addr;	/* in network byte order. */
+	u_short		true_port;	/* in host byte order. */
+
+	/*
+	 * sctp code support
+	 */
+
+	/* counts associations that have progressed to UP and not yet removed */
+	int		sctpLinkCount;
+#ifdef  _KERNEL
+	/* timing queue for keeping track of association timeouts */
+	struct sctp_nat_timer sctpNatTimer;
+	
+	/* size of hash table used in this instance */
+	u_int sctpNatTableSize;
+	
+/* 
+ * local look up table sorted by l_vtag/l_port 
+ */
+	LIST_HEAD(sctpNatTableL, sctp_nat_assoc) *sctpTableLocal;
+/* 
+ * global look up table sorted by g_vtag/g_port 
+ */
+	LIST_HEAD(sctpNatTableG, sctp_nat_assoc) *sctpTableGlobal;
+	
+	/* 
+	 * avoid races in libalias: every public function has to use it.
+	 */
+	struct mtx mutex;
+#endif
+};
+
+/* Macros */
+
+#ifdef _KERNEL
+#define LIBALIAS_LOCK_INIT(l) \
+        mtx_init(&l->mutex, "per-instance libalias mutex", NULL, MTX_DEF)
+#define LIBALIAS_LOCK_ASSERT(l) mtx_assert(&l->mutex, MA_OWNED)
+#define LIBALIAS_LOCK(l) mtx_lock(&l->mutex)
+#define LIBALIAS_UNLOCK(l) mtx_unlock(&l->mutex)
+#define LIBALIAS_LOCK_DESTROY(l)	mtx_destroy(&l->mutex)
+#else
+#define LIBALIAS_LOCK_INIT(l)
+#define LIBALIAS_LOCK_ASSERT(l)
+#define LIBALIAS_LOCK(l)
+#define LIBALIAS_UNLOCK(l)
+#define LIBALIAS_LOCK_DESTROY(l)
+#endif
+
+/*
+ * The following macro is used to update an
+ * internet checksum.  "delta" is a 32-bit
+ * accumulation of all the changes to the
+ * checksum (adding in new 16-bit words and
+ * subtracting out old words), and "cksum"
+ * is the checksum value to be updated.
+ */
+#define	ADJUST_CHECKSUM(acc, cksum) \
+	do { \
+		acc += cksum; \
+		if (acc < 0) { \
+			acc = -acc; \
+			acc = (acc >> 16) + (acc & 0xffff); \
+			acc += acc >> 16; \
+			cksum = (u_short) ~acc; \
+		} else { \
+			acc = (acc >> 16) + (acc & 0xffff); \
+			acc += acc >> 16; \
+			cksum = (u_short) acc; \
+		} \
+	} while (0)
+
+
+/* Prototypes */
+
+/*
+ * SctpFunction prototypes
+ * 
+ */
+void AliasSctpInit(struct libalias *la);
+void AliasSctpTerm(struct libalias *la);
+int SctpAlias(struct libalias *la, struct ip *ip, int direction);
+
+/*
+ * We do not calculate TCP checksums when libalias is a kernel
+ * module, since it has no idea about checksum offloading.
+ * If TCP data has changed, then we just set checksum to zero,
+ * and caller must recalculate it himself.
+ * In case if libalias will edit UDP data, the same approach
+ * should be used.
+ */
+#ifndef _KERNEL
+u_short		IpChecksum(struct ip *_pip);
+u_short		TcpChecksum(struct ip *_pip);
+#endif
+void
+DifferentialChecksum(u_short * _cksum, void * _new, void * _old, int _n);
+
+/* Internal data access */
+struct alias_link *
+FindIcmpIn(struct libalias *la, struct in_addr _dst_addr, struct in_addr _alias_addr,
+    u_short _id_alias, int _create);
+struct alias_link *
+FindIcmpOut(struct libalias *la, struct in_addr _src_addr, struct in_addr _dst_addr,
+    u_short _id, int _create);
+struct alias_link *
+FindFragmentIn1(struct libalias *la, struct in_addr _dst_addr, struct in_addr _alias_addr,
+    u_short _ip_id);
+struct alias_link *
+FindFragmentIn2(struct libalias *la, struct in_addr _dst_addr, struct in_addr _alias_addr,
+    u_short _ip_id);
+struct alias_link *
+		AddFragmentPtrLink(struct libalias *la, struct in_addr _dst_addr, u_short _ip_id);
+struct alias_link *
+		FindFragmentPtr(struct libalias *la, struct in_addr _dst_addr, u_short _ip_id);
+struct alias_link *
+FindProtoIn(struct libalias *la, struct in_addr _dst_addr, struct in_addr _alias_addr,
+    u_char _proto);
+struct alias_link *
+FindProtoOut(struct libalias *la, struct in_addr _src_addr, struct in_addr _dst_addr,
+    u_char _proto);
+struct alias_link *
+FindUdpTcpIn(struct libalias *la, struct in_addr _dst_addr, struct in_addr _alias_addr,
+    u_short _dst_port, u_short _alias_port, u_char _proto, int _create);
+struct alias_link *
+FindUdpTcpOut(struct libalias *la, struct in_addr _src_addr, struct in_addr _dst_addr,
+    u_short _src_port, u_short _dst_port, u_char _proto, int _create);
+struct alias_link *
+AddPptp(struct libalias *la, struct in_addr _src_addr, struct in_addr _dst_addr,
+    struct in_addr _alias_addr, u_int16_t _src_call_id);
+struct alias_link *
+FindPptpOutByCallId(struct libalias *la, struct in_addr _src_addr,
+    struct in_addr _dst_addr, u_int16_t _src_call_id);
+struct alias_link *
+FindPptpInByCallId(struct libalias *la, struct in_addr _dst_addr,
+    struct in_addr _alias_addr, u_int16_t _dst_call_id);
+struct alias_link *
+FindPptpOutByPeerCallId(struct libalias *la, struct in_addr _src_addr,
+    struct in_addr _dst_addr, u_int16_t _dst_call_id);
+struct alias_link *
+FindPptpInByPeerCallId(struct libalias *la, struct in_addr _dst_addr,
+    struct in_addr _alias_addr, u_int16_t _alias_call_id);
+struct alias_link *
+FindRtspOut(struct libalias *la, struct in_addr _src_addr, struct in_addr _dst_addr,
+    u_short _src_port, u_short _alias_port, u_char _proto);
+struct in_addr
+		FindOriginalAddress(struct libalias *la, struct in_addr _alias_addr);
+struct in_addr
+		FindAliasAddress(struct libalias *la, struct in_addr _original_addr);
+struct in_addr 
+FindSctpRedirectAddress(struct libalias *la,  struct sctp_nat_msg *sm);
+
+/* External data access/modification */
+int
+FindNewPortGroup(struct libalias *la, struct in_addr _dst_addr, struct in_addr _alias_addr,
+    u_short _src_port, u_short _dst_port, u_short _port_count,
+    u_char _proto, u_char _align);
+void		GetFragmentAddr(struct alias_link *_lnk, struct in_addr *_src_addr);
+void		SetFragmentAddr(struct alias_link *_lnk, struct in_addr _src_addr);
+void		GetFragmentPtr(struct alias_link *_lnk, char **_fptr);
+void		SetFragmentPtr(struct alias_link *_lnk, char *fptr);
+void		SetStateIn(struct alias_link *_lnk, int _state);
+void		SetStateOut(struct alias_link *_lnk, int _state);
+int		GetStateIn (struct alias_link *_lnk);
+int		GetStateOut(struct alias_link *_lnk);
+struct in_addr
+		GetOriginalAddress(struct alias_link *_lnk);
+struct in_addr
+		GetDestAddress(struct alias_link *_lnk);
+struct in_addr
+		GetAliasAddress(struct alias_link *_lnk);
+struct in_addr
+		GetDefaultAliasAddress(struct libalias *la);
+void		SetDefaultAliasAddress(struct libalias *la, struct in_addr _alias_addr);
+u_short		GetOriginalPort(struct alias_link *_lnk);
+u_short		GetAliasPort(struct alias_link *_lnk);
+struct in_addr
+		GetProxyAddress(struct alias_link *_lnk);
+void		SetProxyAddress(struct alias_link *_lnk, struct in_addr _addr);
+u_short		GetProxyPort(struct alias_link *_lnk);
+void		SetProxyPort(struct alias_link *_lnk, u_short _port);
+void		SetAckModified(struct alias_link *_lnk);
+int		GetAckModified(struct alias_link *_lnk);
+int		GetDeltaAckIn(u_long, struct alias_link *_lnk);
+int             GetDeltaSeqOut(u_long, struct alias_link *lnk);
+void            AddSeq(struct alias_link *lnk, int delta, u_int ip_hl, 
+		    u_short ip_len, u_long th_seq, u_int th_off);
+void		SetExpire (struct alias_link *_lnk, int _expire);
+void		ClearCheckNewLink(struct libalias *la);
+void		SetProtocolFlags(struct alias_link *_lnk, int _pflags);
+int		GetProtocolFlags(struct alias_link *_lnk);
+void		SetDestCallId(struct alias_link *_lnk, u_int16_t _cid);
+
+#ifndef NO_FW_PUNCH
+void		PunchFWHole(struct alias_link *_lnk);
+
+#endif
+
+/* Housekeeping function */
+void		HouseKeeping(struct libalias *);
+
+/* Tcp specfic routines */
+/* lint -save -library Suppress flexelint warnings */
+
+/* Transparent proxy routines */
+int
+ProxyCheck(struct libalias *la, struct in_addr *proxy_server_addr,
+    u_short * proxy_server_port, struct in_addr src_addr, 
+    struct in_addr dst_addr, u_short dst_port, u_char ip_p);
+void
+ProxyModify(struct libalias *la, struct alias_link *_lnk, struct ip *_pip,
+    int _maxpacketsize, int _proxy_type);
+
+enum alias_tcp_state {
+	ALIAS_TCP_STATE_NOT_CONNECTED,
+	ALIAS_TCP_STATE_CONNECTED,
+	ALIAS_TCP_STATE_DISCONNECTED
+};
+
+#if defined(_NETINET_IP_HH_)
+static __inline void *
+ip_next(struct ip *iphdr)
+{
+	char *p = (char *)iphdr;
+	return (&p[iphdr->ip_hl * 4]);
+}
+#endif
+
+#if defined(_NETINET_TCP_HH_)
+static __inline void *
+tcp_next(struct tcphdr *tcphdr)
+{
+	char *p = (char *)tcphdr;
+	return (&p[tcphdr->th_off * 4]);
+}
+#endif
+
+#if defined(_NETINET_UDP_HH_)
+static __inline void *
+udp_next(struct udphdr *udphdr)
+{
+	return ((void *)(udphdr + 1));
+}
+#endif
+
+#endif				/* !_ALIAS_LOCAL_HH_ */
diff --git a/freebsd/sys/netinet/libalias/alias_mod.c b/freebsd/sys/netinet/libalias/alias_mod.c
new file mode 100644
index 00000000..fa15b2e4
--- /dev/null
+++ b/freebsd/sys/netinet/libalias/alias_mod.c
@@ -0,0 +1,292 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2005 Paolo Pisati <piso@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifdef _KERNEL
+#include <freebsd/sys/libkern.h>
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/rwlock.h>
+#else
+#include <freebsd/stdio.h>
+#include <freebsd/string.h>
+#include <freebsd/sys/types.h>
+#include <freebsd/errno.h>
+#endif
+
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip.h>
+
+#ifdef _KERNEL
+#include <freebsd/netinet/libalias/alias_local.h>
+#include <freebsd/netinet/libalias/alias_mod.h>
+#else
+#include <freebsd/local/alias_local.h>
+#include <freebsd/local/alias_mod.h>
+#endif
+
+/* Protocol and userland module handlers chains. */
+LIST_HEAD(handler_chain, proto_handler) handler_chain = LIST_HEAD_INITIALIZER(handler_chain);
+#ifdef _KERNEL
+struct rwlock   handler_rw;
+#endif
+SLIST_HEAD(dll_chain, dll) dll_chain = SLIST_HEAD_INITIALIZER(dll_chain); 
+
+#ifdef _KERNEL
+
+#define	LIBALIAS_RWLOCK_INIT() \
+        rw_init(&handler_rw, "Libalias_modules_rwlock")
+#define	LIBALIAS_RWLOCK_DESTROY()	rw_destroy(&handler_rw)
+#define	LIBALIAS_WLOCK_ASSERT() \
+        rw_assert(&handler_rw, RA_WLOCKED)
+
+static __inline void
+LIBALIAS_RLOCK(void)
+{
+	rw_rlock(&handler_rw);
+}
+
+static __inline void
+LIBALIAS_RUNLOCK(void)
+{
+	rw_runlock(&handler_rw);
+}
+
+static __inline void
+LIBALIAS_WLOCK(void)
+{
+	rw_wlock(&handler_rw);
+}
+
+static __inline void
+LIBALIAS_WUNLOCK(void)
+{
+	rw_wunlock(&handler_rw);
+}
+
+static void
+_handler_chain_init(void)
+{
+
+	if (!rw_initialized(&handler_rw))
+		LIBALIAS_RWLOCK_INIT();
+}
+
+static void
+_handler_chain_destroy(void)
+{
+
+	if (rw_initialized(&handler_rw))
+		LIBALIAS_RWLOCK_DESTROY();
+}
+
+#else
+#define	LIBALIAS_RWLOCK_INIT() ;
+#define	LIBALIAS_RWLOCK_DESTROY()	;
+#define	LIBALIAS_WLOCK_ASSERT()	;
+#define	LIBALIAS_RLOCK() ;
+#define	LIBALIAS_RUNLOCK() ;
+#define	LIBALIAS_WLOCK() ;
+#define	LIBALIAS_WUNLOCK() ;
+#define _handler_chain_init() ;
+#define _handler_chain_destroy() ;
+#endif 
+
+void
+handler_chain_init(void)
+{
+	_handler_chain_init();
+}
+
+void
+handler_chain_destroy(void)
+{
+	_handler_chain_destroy();
+}
+
+static int
+_attach_handler(struct proto_handler *p)
+{
+	struct proto_handler *b;
+
+	LIBALIAS_WLOCK_ASSERT();
+	b = NULL;
+	LIST_FOREACH(b, &handler_chain, entries) {
+		if ((b->pri == p->pri) && 
+		    (b->dir == p->dir) &&
+		    (b->proto == p->proto))
+			return (EEXIST); /* Priority conflict. */
+		if (b->pri > p->pri) {
+			LIST_INSERT_BEFORE(b, p, entries);
+			return (0);
+		}
+	}
+	/* End of list or found right position, inserts here. */
+	if (b)
+		LIST_INSERT_AFTER(b, p, entries);
+	else
+		LIST_INSERT_HEAD(&handler_chain, p, entries);
+	return (0);
+}
+
+static int
+_detach_handler(struct proto_handler *p)
+{
+	struct proto_handler *b, *b_tmp;
+
+	LIBALIAS_WLOCK_ASSERT();	
+	LIST_FOREACH_SAFE(b, &handler_chain, entries, b_tmp) {
+		if (b == p) {
+			LIST_REMOVE(b, entries);
+			return (0);
+		}
+	}
+	return (ENOENT); /* Handler not found. */
+}
+
+int
+LibAliasAttachHandlers(struct proto_handler *_p)
+{
+	int i, error;
+
+	LIBALIAS_WLOCK();
+	error = -1;
+	for (i = 0; 1; i++) {
+		if (*((int *)&_p[i]) == EOH) 
+			break;
+		error = _attach_handler(&_p[i]);
+		if (error != 0) 
+			break;
+	}
+	LIBALIAS_WUNLOCK();
+	return (error);
+}
+
+int
+LibAliasDetachHandlers(struct proto_handler *_p)
+{
+	int i, error;
+
+	LIBALIAS_WLOCK();
+	error = -1;
+	for (i = 0; 1; i++) {
+		if (*((int *)&_p[i]) == EOH) 
+			break;
+		error = _detach_handler(&_p[i]);
+		if (error != 0) 
+			break;
+	}
+	LIBALIAS_WUNLOCK();
+	return (error);
+}
+
+int
+detach_handler(struct proto_handler *_p)
+{
+	int error;
+
+	LIBALIAS_WLOCK();
+	error = -1;
+	error = _detach_handler(_p);
+	LIBALIAS_WUNLOCK();
+	return (error);
+}
+
+int
+find_handler(int8_t dir, int8_t proto, struct libalias *la, __unused struct ip *pip, 
+    struct alias_data *ad)
+{
+	struct proto_handler *p;
+	int error;
+
+	LIBALIAS_RLOCK();
+	error = ENOENT;
+	LIST_FOREACH(p, &handler_chain, entries) {
+		if ((p->dir & dir) && (p->proto & proto))
+			if (p->fingerprint(la, ad) == 0) {
+				error = p->protohandler(la, pip, ad);
+				break;
+			}
+	}
+	LIBALIAS_RUNLOCK();
+	return (error);	
+}
+
+struct proto_handler *
+first_handler(void)
+{
+	
+	return (LIST_FIRST(&handler_chain));	
+}
+
+/* Dll manipulation code - this code is not thread safe... */
+
+int
+attach_dll(struct dll *p)
+{
+	struct dll *b;
+
+	SLIST_FOREACH(b, &dll_chain, next) {
+		if (!strncmp(b->name, p->name, DLL_LEN))
+			return (EEXIST); /* Dll name conflict. */
+	}
+	SLIST_INSERT_HEAD(&dll_chain, p, next);
+	return (0);
+}
+
+void *
+detach_dll(char *p)
+{
+	struct dll *b, *b_tmp;
+	void *error;
+
+	b = NULL;
+	error = NULL;
+	SLIST_FOREACH_SAFE(b, &dll_chain, next, b_tmp)
+		if (!strncmp(b->name, p, DLL_LEN)) {
+			SLIST_REMOVE(&dll_chain, b, dll, next); 
+			error = b;
+			break;
+		}
+	return (error);
+}
+
+struct dll *
+walk_dll_chain(void)
+{
+	struct dll *t;
+
+	t = SLIST_FIRST(&dll_chain);
+	if (t == NULL)
+		return (NULL);
+	SLIST_REMOVE_HEAD(&dll_chain, next);
+	return (t);
+}
diff --git a/freebsd/sys/netinet/libalias/alias_mod.h b/freebsd/sys/netinet/libalias/alias_mod.h
new file mode 100644
index 00000000..f5f98cc3
--- /dev/null
+++ b/freebsd/sys/netinet/libalias/alias_mod.h
@@ -0,0 +1,163 @@
+/*-
+ * Copyright (c) 2005 Paolo Pisati <piso@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Alias_mod.h defines the outside world interfaces for the packet aliasing
+ * modular framework
+ */
+
+#ifndef _ALIAS_MOD_HH_
+#define _ALIAS_MOD_HH_
+
+#ifdef _KERNEL
+MALLOC_DECLARE(M_ALIAS);
+
+/* Use kernel allocator. */
+#if defined(_SYS_MALLOC_HH_)
+#ifndef __rtems__
+#define	malloc(x)	malloc(x, M_ALIAS, M_NOWAIT|M_ZERO)
+#define	calloc(x, n)	malloc(x*n)
+#define	free(x)		free(x, M_ALIAS)
+#else /* __rtems__ */
+#define malloc(x) _bsd_malloc(x, M_ALIAS, M_NOWAIT|M_ZERO)
+#define calloc(x, n)  malloc(x*n)
+#define free(x)   _bsd_free(x, M_ALIAS)
+#endif /* __rtems__ */
+#endif
+#endif
+
+/* Protocol handlers struct & function. */
+
+/* Packet flow direction. */
+#define IN                              1
+#define OUT                             2
+
+/* Working protocol. */
+#define IP                              1
+#define TCP                             2
+#define UDP                             4
+
+/*
+ * Data passed to protocol handler module, it must be filled
+ * right before calling find_handler() to determine which
+ * module is elegible to be called.
+ */
+
+struct alias_data {
+	struct alias_link       *lnk;
+	struct in_addr          *oaddr;         /* Original address. */
+	struct in_addr          *aaddr;         /* Alias address. */
+	uint16_t                *aport;         /* Alias port. */
+	uint16_t                *sport, *dport;	/* Source & destination port */
+	uint16_t                maxpktsize;     /* Max packet size. */
+};
+
+/*
+ * This structure contains all the information necessary to make
+ * a protocol handler correctly work.
+ */
+
+struct proto_handler {
+	u_int pri;                                              /* Handler priority. */
+        int16_t dir;                                            /* Flow direction. */
+	uint8_t proto;                                          /* Working protocol. */
+	int (*fingerprint)(struct libalias *,                   /* Fingerprint * function. */
+	    struct alias_data *);
+	int (*protohandler)(struct libalias *,                  /* Aliasing * function. */
+	    struct ip *, struct alias_data *);
+	LIST_ENTRY(proto_handler) entries;
+};
+
+
+/*
+ * Used only in userland when libalias needs to keep track of all
+ * module loaded. In kernel land (kld mode) we don't need to care
+ * care about libalias modules cause it's kld to do it for us.
+ */
+
+#define DLL_LEN         32
+struct dll {
+	char            name[DLL_LEN];  /* Name of module. */
+	void            *handle;        /*
+					 * Ptr to shared obj obtained through
+					 * dlopen() - use this ptr to get access
+					 * to any symbols from a loaded module
+					 * via dlsym().
+					 */
+	SLIST_ENTRY(dll)        next;
+};
+
+/* Functions used with protocol handlers. */
+
+void            handler_chain_init(void);
+void            handler_chain_destroy(void);
+int             LibAliasAttachHandlers(struct proto_handler *);
+int             LibAliasDetachHandlers(struct proto_handler *);
+int             detach_handler(struct proto_handler *);
+int             find_handler(int8_t, int8_t, struct libalias *,
+			   struct ip *, struct alias_data *);
+struct proto_handler *first_handler(void);
+
+/* Functions used with dll module. */
+
+void            dll_chain_init(void);
+void            dll_chain_destroy(void);
+int             attach_dll(struct dll *);
+void            *detach_dll(char *);
+struct dll      *walk_dll_chain(void);
+
+/* End of handlers. */
+#define EOH     -1
+
+/*
+ * Some defines borrowed from sys/module.h used to compile a kld
+ * in userland as a shared lib.
+ */
+
+#ifndef _KERNEL
+typedef enum modeventtype {
+        MOD_LOAD,
+        MOD_UNLOAD,
+        MOD_SHUTDOWN,
+        MOD_QUIESCE
+} modeventtype_t;
+
+typedef struct module *module_t;
+typedef int (*modeventhand_t)(module_t, int /* modeventtype_t */, void *);
+
+/*
+ * Struct for registering modules statically via SYSINIT.
+ */
+typedef struct moduledata {
+        const char      *name;          /* module name */
+        modeventhand_t  evhand;         /* event handler */
+        void            *priv;          /* extra data */
+} moduledata_t;
+#endif
+
+#endif				/* !_ALIAS_MOD_HH_ */
diff --git a/freebsd/sys/netinet/libalias/alias_nbt.c b/freebsd/sys/netinet/libalias/alias_nbt.c
new file mode 100644
index 00000000..31ee0006
--- /dev/null
+++ b/freebsd/sys/netinet/libalias/alias_nbt.c
@@ -0,0 +1,855 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Written by Atsushi Murai <amurai@spec.co.jp>
+ * Copyright (c) 1998, System Planning and Engineering Co.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *  TODO:
+ *       oClean up.
+ *       oConsidering for word alignment for other platform.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+    alias_nbt.c performs special processing for NetBios over TCP/IP
+    sessions by UDP.
+
+    Initial version:  May, 1998  (Atsushi Murai <amurai@spec.co.jp>)
+
+    See HISTORY file for record of revisions.
+*/
+
+/* Includes */
+#ifdef _KERNEL
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/module.h>
+#else
+#include <freebsd/errno.h>
+#include <freebsd/sys/types.h>
+#include <freebsd/stdio.h>
+#include <freebsd/strings.h>
+#endif
+
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/udp.h>
+
+#ifdef _KERNEL
+#include <freebsd/netinet/libalias/alias_local.h>
+#include <freebsd/netinet/libalias/alias_mod.h>
+#else
+#include <freebsd/local/alias_local.h>
+#include <freebsd/local/alias_mod.h>
+#endif
+
+#define NETBIOS_NS_PORT_NUMBER 137
+#define NETBIOS_DGM_PORT_NUMBER 138
+
+static int
+AliasHandleUdpNbt(struct libalias *, struct ip *, struct alias_link *, 
+		  struct in_addr *, u_short);
+
+static int
+AliasHandleUdpNbtNS(struct libalias *, struct ip *, struct alias_link *,
+		    struct in_addr *, u_short *, struct in_addr *, u_short *);
+static int 
+fingerprint1(struct libalias *la, struct alias_data *ah)
+{
+
+	if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL || 
+	    ah->aaddr == NULL || ah->aport == NULL)
+		return (-1);
+	if (ntohs(*ah->dport) == NETBIOS_DGM_PORT_NUMBER
+	    || ntohs(*ah->sport) == NETBIOS_DGM_PORT_NUMBER)		
+		return (0);
+	return (-1);
+}
+
+static int 
+protohandler1(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+	return (AliasHandleUdpNbt(la, pip, ah->lnk, ah->aaddr, *ah->aport));
+}
+
+static int 
+fingerprint2(struct libalias *la, struct alias_data *ah)
+{
+
+	if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL || 
+	    ah->aaddr == NULL || ah->aport == NULL)
+		return (-1);
+	if (ntohs(*ah->dport) == NETBIOS_NS_PORT_NUMBER
+	    || ntohs(*ah->sport) == NETBIOS_NS_PORT_NUMBER)
+		return (0);
+	return (-1);
+}
+
+static int 
+protohandler2in(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+	AliasHandleUdpNbtNS(la, pip, ah->lnk, ah->aaddr, ah->aport,
+ 			    ah->oaddr, ah->dport);
+	return (0);
+}
+
+static int 
+protohandler2out(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+	return (AliasHandleUdpNbtNS(la, pip, ah->lnk, &pip->ip_src, ah->sport,
+ 	    ah->aaddr, ah->aport));
+}
+
+/* Kernel module definition. */
+struct proto_handler handlers[] = {
+	{ 
+	  .pri = 130, 
+	  .dir = IN|OUT, 
+	  .proto = UDP, 
+	  .fingerprint = &fingerprint1, 
+	  .protohandler = &protohandler1
+	}, 
+	{ 
+	  .pri = 140, 
+	  .dir = IN, 
+	  .proto = UDP, 
+	  .fingerprint = &fingerprint2, 
+	  .protohandler = &protohandler2in
+	}, 
+	{ 
+	  .pri = 140, 
+	  .dir = OUT, 
+	  .proto = UDP, 
+	  .fingerprint = &fingerprint2, 
+	  .protohandler = &protohandler2out
+	}, 
+	{ EOH }
+};
+
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+	int error;
+
+	switch (type) {
+	case MOD_LOAD:
+		error = 0;
+		LibAliasAttachHandlers(handlers);
+		break;
+	case MOD_UNLOAD:
+		error = 0;
+		LibAliasDetachHandlers(handlers);
+		break;
+	default:
+		error = EINVAL;
+	}
+	return (error);
+}
+
+#ifdef	_KERNEL
+static 
+#endif
+moduledata_t alias_mod = {
+       "alias_nbt", mod_handler, NULL
+};
+
+#ifdef	_KERNEL
+DECLARE_MODULE(alias_nbt, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_nbt, 1);
+MODULE_DEPEND(alias_nbt, libalias, 1, 1, 1);
+#endif
+
+typedef struct {
+	struct in_addr	oldaddr;
+	u_short		oldport;
+	struct in_addr	newaddr;
+	u_short		newport;
+	u_short        *uh_sum;
+}		NBTArguments;
+
+typedef struct {
+	unsigned char	type;
+	unsigned char	flags;
+	u_short		id;
+	struct in_addr	source_ip;
+	u_short		source_port;
+	u_short		len;
+	u_short		offset;
+}		NbtDataHeader;
+
+#define OpQuery		0
+#define OpUnknown	4
+#define OpRegist	5
+#define OpRelease	6
+#define OpWACK		7
+#define OpRefresh	8
+typedef struct {
+	u_short		nametrid;
+	u_short		dir:	1, opcode:4, nmflags:7, rcode:4;
+	u_short		qdcount;
+	u_short		ancount;
+	u_short		nscount;
+	u_short		arcount;
+}		NbtNSHeader;
+
+#define FMT_ERR		0x1
+#define SRV_ERR		0x2
+#define IMP_ERR		0x4
+#define RFS_ERR		0x5
+#define ACT_ERR		0x6
+#define CFT_ERR		0x7
+
+
+#ifdef LIBALIAS_DEBUG
+static void
+PrintRcode(u_char rcode)
+{
+
+	switch (rcode) {
+		case FMT_ERR:
+		printf("\nFormat Error.");
+	case SRV_ERR:
+		printf("\nSever failure.");
+	case IMP_ERR:
+		printf("\nUnsupported request error.\n");
+	case RFS_ERR:
+		printf("\nRefused error.\n");
+	case ACT_ERR:
+		printf("\nActive error.\n");
+	case CFT_ERR:
+		printf("\nName in conflict error.\n");
+	default:
+		printf("\n?%c?=%0x\n", '?', rcode);
+
+	}
+}
+
+#endif
+
+
+/* Handling Name field */
+static u_char  *
+AliasHandleName(u_char * p, char *pmax)
+{
+
+	u_char *s;
+	u_char c;
+	int compress;
+
+	/* Following length field */
+
+	if (p == NULL || (char *)p >= pmax)
+		return (NULL);
+
+	if (*p & 0xc0) {
+		p = p + 2;
+		if ((char *)p > pmax)
+			return (NULL);
+		return ((u_char *) p);
+	}
+	while ((*p & 0x3f) != 0x00) {
+		s = p + 1;
+		if (*p == 0x20)
+			compress = 1;
+		else
+			compress = 0;
+
+		/* Get next length field */
+		p = (u_char *) (p + (*p & 0x3f) + 1);
+		if ((char *)p > pmax) {
+			p = NULL;
+			break;
+		}
+#ifdef LIBALIAS_DEBUG
+		printf(":");
+#endif
+		while (s < p) {
+			if (compress == 1) {
+				c = (u_char) (((((*s & 0x0f) << 4) | (*(s + 1) & 0x0f)) - 0x11));
+#ifdef LIBALIAS_DEBUG
+				if (isprint(c))
+					printf("%c", c);
+				else
+					printf("<0x%02x>", c);
+#endif
+				s += 2;
+			} else {
+#ifdef LIBALIAS_DEBUG
+				printf("%c", *s);
+#endif
+				s++;
+			}
+		}
+#ifdef LIBALIAS_DEBUG
+		printf(":");
+		fflush(stdout);
+#endif
+	}
+
+	/* Set up to out of Name field */
+	if (p == NULL || (char *)p >= pmax)
+		p = NULL;
+	else
+		p++;
+	return ((u_char *) p);
+}
+
+/*
+ * NetBios Datagram Handler (IP/UDP)
+ */
+#define DGM_DIRECT_UNIQ		0x10
+#define DGM_DIRECT_GROUP	0x11
+#define DGM_BROADCAST		0x12
+#define DGM_ERROR			0x13
+#define DGM_QUERY			0x14
+#define DGM_POSITIVE_RES	0x15
+#define DGM_NEGATIVE_RES	0x16
+
+static int
+AliasHandleUdpNbt(
+    struct libalias *la,
+    struct ip *pip,		/* IP packet to examine/patch */
+    struct alias_link *lnk,
+    struct in_addr *alias_address,
+    u_short alias_port
+)
+{
+	struct udphdr *uh;
+	NbtDataHeader *ndh;
+	u_char *p = NULL;
+	char *pmax;
+
+	(void)la;
+	(void)lnk;
+
+	/* Calculate data length of UDP packet */
+	uh = (struct udphdr *)ip_next(pip);
+	pmax = (char *)uh + ntohs(uh->uh_ulen);
+
+	ndh = (NbtDataHeader *)udp_next(uh);
+	if ((char *)(ndh + 1) > pmax)
+		return (-1);
+#ifdef LIBALIAS_DEBUG
+	printf("\nType=%02x,", ndh->type);
+#endif
+	switch (ndh->type) {
+	case DGM_DIRECT_UNIQ:
+	case DGM_DIRECT_GROUP:
+	case DGM_BROADCAST:
+		p = (u_char *) ndh + 14;
+		p = AliasHandleName(p, pmax);	/* Source Name */
+		p = AliasHandleName(p, pmax);	/* Destination Name */
+		break;
+	case DGM_ERROR:
+		p = (u_char *) ndh + 11;
+		break;
+	case DGM_QUERY:
+	case DGM_POSITIVE_RES:
+	case DGM_NEGATIVE_RES:
+		p = (u_char *) ndh + 10;
+		p = AliasHandleName(p, pmax);	/* Destination Name */
+		break;
+	}
+	if (p == NULL || (char *)p > pmax)
+		p = NULL;
+#ifdef LIBALIAS_DEBUG
+	printf("%s:%d-->", inet_ntoa(ndh->source_ip), ntohs(ndh->source_port));
+#endif
+	/* Doing an IP address and Port number Translation */
+	if (uh->uh_sum != 0) {
+		int acc;
+		u_short *sptr;
+
+		acc = ndh->source_port;
+		acc -= alias_port;
+		sptr = (u_short *) & (ndh->source_ip);
+		acc += *sptr++;
+		acc += *sptr;
+		sptr = (u_short *) alias_address;
+		acc -= *sptr++;
+		acc -= *sptr;
+		ADJUST_CHECKSUM(acc, uh->uh_sum);
+	}
+	ndh->source_ip = *alias_address;
+	ndh->source_port = alias_port;
+#ifdef LIBALIAS_DEBUG
+	printf("%s:%d\n", inet_ntoa(ndh->source_ip), ntohs(ndh->source_port));
+	fflush(stdout);
+#endif
+	return ((p == NULL) ? -1 : 0);
+}
+
+/* Question Section */
+#define QS_TYPE_NB		0x0020
+#define QS_TYPE_NBSTAT	0x0021
+#define QS_CLAS_IN		0x0001
+typedef struct {
+	u_short		type;	/* The type of Request */
+	u_short		class;	/* The class of Request */
+}		NBTNsQuestion;
+
+static u_char  *
+AliasHandleQuestion(
+    u_short count,
+    NBTNsQuestion * q,
+    char *pmax,
+    NBTArguments * nbtarg)
+{
+
+	(void)nbtarg;
+
+	while (count != 0) {
+		/* Name Filed */
+		q = (NBTNsQuestion *) AliasHandleName((u_char *) q, pmax);
+
+		if (q == NULL || (char *)(q + 1) > pmax) {
+			q = NULL;
+			break;
+		}
+		/* Type and Class filed */
+		switch (ntohs(q->type)) {
+		case QS_TYPE_NB:
+		case QS_TYPE_NBSTAT:
+			q = q + 1;
+			break;
+		default:
+#ifdef LIBALIAS_DEBUG
+			printf("\nUnknown Type on Question %0x\n", ntohs(q->type));
+#endif
+			break;
+		}
+		count--;
+	}
+
+	/* Set up to out of Question Section */
+	return ((u_char *) q);
+}
+
+/* Resource Record */
+#define RR_TYPE_A		0x0001
+#define RR_TYPE_NS		0x0002
+#define RR_TYPE_NULL	0x000a
+#define RR_TYPE_NB		0x0020
+#define RR_TYPE_NBSTAT	0x0021
+#define RR_CLAS_IN		0x0001
+#define SizeOfNsResource	8
+typedef struct {
+	u_short		type;
+	u_short		class;
+	unsigned int	ttl;
+	u_short		rdlen;
+}		NBTNsResource;
+
+#define SizeOfNsRNB			6
+typedef struct {
+	u_short		g:	1  , ont:2, resv:13;
+	struct in_addr	addr;
+}		NBTNsRNB;
+
+static u_char  *
+AliasHandleResourceNB(
+    NBTNsResource * q,
+    char *pmax,
+    NBTArguments * nbtarg)
+{
+	NBTNsRNB *nb;
+	u_short bcount;
+
+	if (q == NULL || (char *)(q + 1) > pmax)
+		return (NULL);
+	/* Check out a length */
+	bcount = ntohs(q->rdlen);
+
+	/* Forward to Resource NB position */
+	nb = (NBTNsRNB *) ((u_char *) q + SizeOfNsResource);
+
+	/* Processing all in_addr array */
+#ifdef LIBALIAS_DEBUG
+	printf("NB rec[%s", inet_ntoa(nbtarg->oldaddr));
+	printf("->%s, %dbytes] ", inet_ntoa(nbtarg->newaddr), bcount);
+#endif
+	while (nb != NULL && bcount != 0) {
+		if ((char *)(nb + 1) > pmax) {
+			nb = NULL;
+			break;
+		}
+#ifdef LIBALIAS_DEBUG
+		printf("<%s>", inet_ntoa(nb->addr));
+#endif
+		if (!bcmp(&nbtarg->oldaddr, &nb->addr, sizeof(struct in_addr))) {
+			if (*nbtarg->uh_sum != 0) {
+				int acc;
+				u_short *sptr;
+
+				sptr = (u_short *) & (nb->addr);
+				acc = *sptr++;
+				acc += *sptr;
+				sptr = (u_short *) & (nbtarg->newaddr);
+				acc -= *sptr++;
+				acc -= *sptr;
+				ADJUST_CHECKSUM(acc, *nbtarg->uh_sum);
+			}
+			nb->addr = nbtarg->newaddr;
+#ifdef LIBALIAS_DEBUG
+			printf("O");
+#endif
+		}
+#ifdef LIBALIAS_DEBUG
+		else {
+			printf(".");
+		}
+#endif
+		nb = (NBTNsRNB *) ((u_char *) nb + SizeOfNsRNB);
+		bcount -= SizeOfNsRNB;
+	}
+	if (nb == NULL || (char *)(nb + 1) > pmax) {
+		nb = NULL;
+	}
+	return ((u_char *) nb);
+}
+
+#define SizeOfResourceA		6
+typedef struct {
+	struct in_addr	addr;
+}		NBTNsResourceA;
+
+static u_char  *
+AliasHandleResourceA(
+    NBTNsResource * q,
+    char *pmax,
+    NBTArguments * nbtarg)
+{
+	NBTNsResourceA *a;
+	u_short bcount;
+
+	if (q == NULL || (char *)(q + 1) > pmax)
+		return (NULL);
+
+	/* Forward to Resource A position */
+	a = (NBTNsResourceA *) ((u_char *) q + sizeof(NBTNsResource));
+
+	/* Check out of length */
+	bcount = ntohs(q->rdlen);
+
+	/* Processing all in_addr array */
+#ifdef LIBALIAS_DEBUG
+	printf("Arec [%s", inet_ntoa(nbtarg->oldaddr));
+	printf("->%s]", inet_ntoa(nbtarg->newaddr));
+#endif
+	while (bcount != 0) {
+		if (a == NULL || (char *)(a + 1) > pmax)
+			return (NULL);
+#ifdef LIBALIAS_DEBUG
+		printf("..%s", inet_ntoa(a->addr));
+#endif
+		if (!bcmp(&nbtarg->oldaddr, &a->addr, sizeof(struct in_addr))) {
+			if (*nbtarg->uh_sum != 0) {
+				int acc;
+				u_short *sptr;
+
+				sptr = (u_short *) & (a->addr);	/* Old */
+				acc = *sptr++;
+				acc += *sptr;
+				sptr = (u_short *) & nbtarg->newaddr;	/* New */
+				acc -= *sptr++;
+				acc -= *sptr;
+				ADJUST_CHECKSUM(acc, *nbtarg->uh_sum);
+			}
+			a->addr = nbtarg->newaddr;
+		}
+		a++;		/* XXXX */
+		bcount -= SizeOfResourceA;
+	}
+	if (a == NULL || (char *)(a + 1) > pmax)
+		a = NULL;
+	return ((u_char *) a);
+}
+
+typedef struct {
+	u_short		opcode:4, flags:8, resv:4;
+}		NBTNsResourceNULL;
+
+static u_char  *
+AliasHandleResourceNULL(
+    NBTNsResource * q,
+    char *pmax,
+    NBTArguments * nbtarg)
+{
+	NBTNsResourceNULL *n;
+	u_short bcount;
+
+	(void)nbtarg;
+
+	if (q == NULL || (char *)(q + 1) > pmax)
+		return (NULL);
+
+	/* Forward to Resource NULL position */
+	n = (NBTNsResourceNULL *) ((u_char *) q + sizeof(NBTNsResource));
+
+	/* Check out of length */
+	bcount = ntohs(q->rdlen);
+
+	/* Processing all in_addr array */
+	while (bcount != 0) {
+		if ((char *)(n + 1) > pmax) {
+			n = NULL;
+			break;
+		}
+		n++;
+		bcount -= sizeof(NBTNsResourceNULL);
+	}
+	if ((char *)(n + 1) > pmax)
+		n = NULL;
+
+	return ((u_char *) n);
+}
+
+static u_char  *
+AliasHandleResourceNS(
+    NBTNsResource * q,
+    char *pmax,
+    NBTArguments * nbtarg)
+{
+	NBTNsResourceNULL *n;
+	u_short bcount;
+
+	(void)nbtarg;
+
+	if (q == NULL || (char *)(q + 1) > pmax)
+		return (NULL);
+
+	/* Forward to Resource NULL position */
+	n = (NBTNsResourceNULL *) ((u_char *) q + sizeof(NBTNsResource));
+
+	/* Check out of length */
+	bcount = ntohs(q->rdlen);
+
+	/* Resource Record Name Filed */
+	q = (NBTNsResource *) AliasHandleName((u_char *) n, pmax);	/* XXX */
+
+	if (q == NULL || (char *)((u_char *) n + bcount) > pmax)
+		return (NULL);
+	else
+		return ((u_char *) n + bcount);
+}
+
+typedef struct {
+	u_short		numnames;
+}		NBTNsResourceNBSTAT;
+
+static u_char  *
+AliasHandleResourceNBSTAT(
+    NBTNsResource * q,
+    char *pmax,
+    NBTArguments * nbtarg)
+{
+	NBTNsResourceNBSTAT *n;
+	u_short bcount;
+
+	(void)nbtarg;
+
+	if (q == NULL || (char *)(q + 1) > pmax)
+		return (NULL);
+
+	/* Forward to Resource NBSTAT position */
+	n = (NBTNsResourceNBSTAT *) ((u_char *) q + sizeof(NBTNsResource));
+
+	/* Check out of length */
+	bcount = ntohs(q->rdlen);
+
+	if (q == NULL || (char *)((u_char *) n + bcount) > pmax)
+		return (NULL);
+	else
+		return ((u_char *) n + bcount);
+}
+
+static u_char  *
+AliasHandleResource(
+    u_short count,
+    NBTNsResource * q,
+    char *pmax,
+    NBTArguments
+    * nbtarg)
+{
+	while (count != 0) {
+		/* Resource Record Name Filed */
+		q = (NBTNsResource *) AliasHandleName((u_char *) q, pmax);
+
+		if (q == NULL || (char *)(q + 1) > pmax)
+			break;
+#ifdef LIBALIAS_DEBUG
+		printf("type=%02x, count=%d\n", ntohs(q->type), count);
+#endif
+
+		/* Type and Class filed */
+		switch (ntohs(q->type)) {
+		case RR_TYPE_NB:
+			q = (NBTNsResource *) AliasHandleResourceNB(
+			    q,
+			    pmax,
+			    nbtarg
+			    );
+			break;
+		case RR_TYPE_A:
+			q = (NBTNsResource *) AliasHandleResourceA(
+			    q,
+			    pmax,
+			    nbtarg
+			    );
+			break;
+		case RR_TYPE_NS:
+			q = (NBTNsResource *) AliasHandleResourceNS(
+			    q,
+			    pmax,
+			    nbtarg
+			    );
+			break;
+		case RR_TYPE_NULL:
+			q = (NBTNsResource *) AliasHandleResourceNULL(
+			    q,
+			    pmax,
+			    nbtarg
+			    );
+			break;
+		case RR_TYPE_NBSTAT:
+			q = (NBTNsResource *) AliasHandleResourceNBSTAT(
+			    q,
+			    pmax,
+			    nbtarg
+			    );
+			break;
+		default:
+#ifdef LIBALIAS_DEBUG
+			printf(
+			    "\nUnknown Type of Resource %0x\n",
+			    ntohs(q->type)
+			    );
+			fflush(stdout);
+#endif
+			break;
+		}
+		count--;
+	}
+	return ((u_char *) q);
+}
+
+static int
+AliasHandleUdpNbtNS(
+    struct libalias *la,
+    struct ip *pip,		/* IP packet to examine/patch */
+    struct alias_link *lnk,
+    struct in_addr *alias_address,
+    u_short * alias_port,
+    struct in_addr *original_address,
+    u_short * original_port)
+{
+	struct udphdr *uh;
+	NbtNSHeader *nsh;
+	u_char *p;
+	char *pmax;
+	NBTArguments nbtarg;
+
+	(void)la;
+	(void)lnk;
+
+	/* Set up Common Parameter */
+	nbtarg.oldaddr = *alias_address;
+	nbtarg.oldport = *alias_port;
+	nbtarg.newaddr = *original_address;
+	nbtarg.newport = *original_port;
+
+	/* Calculate data length of UDP packet */
+	uh = (struct udphdr *)ip_next(pip);
+	nbtarg.uh_sum = &(uh->uh_sum);
+	nsh = (NbtNSHeader *)udp_next(uh);
+	p = (u_char *) (nsh + 1);
+	pmax = (char *)uh + ntohs(uh->uh_ulen);
+
+	if ((char *)(nsh + 1) > pmax)
+		return (-1);
+
+#ifdef LIBALIAS_DEBUG
+	printf(" [%s] ID=%02x, op=%01x, flag=%02x, rcode=%01x, qd=%04x"
+	    ", an=%04x, ns=%04x, ar=%04x, [%d]-->",
+	    nsh->dir ? "Response" : "Request",
+	    nsh->nametrid,
+	    nsh->opcode,
+	    nsh->nmflags,
+	    nsh->rcode,
+	    ntohs(nsh->qdcount),
+	    ntohs(nsh->ancount),
+	    ntohs(nsh->nscount),
+	    ntohs(nsh->arcount),
+	    (u_char *) p - (u_char *) nsh
+	    );
+#endif
+
+	/* Question Entries */
+	if (ntohs(nsh->qdcount) != 0) {
+		p = AliasHandleQuestion(
+		    ntohs(nsh->qdcount),
+		    (NBTNsQuestion *) p,
+		    pmax,
+		    &nbtarg
+		    );
+	}
+	/* Answer Resource Records */
+	if (ntohs(nsh->ancount) != 0) {
+		p = AliasHandleResource(
+		    ntohs(nsh->ancount),
+		    (NBTNsResource *) p,
+		    pmax,
+		    &nbtarg
+		    );
+	}
+	/* Authority Resource Recodrs */
+	if (ntohs(nsh->nscount) != 0) {
+		p = AliasHandleResource(
+		    ntohs(nsh->nscount),
+		    (NBTNsResource *) p,
+		    pmax,
+		    &nbtarg
+		    );
+	}
+	/* Additional Resource Recodrs */
+	if (ntohs(nsh->arcount) != 0) {
+		p = AliasHandleResource(
+		    ntohs(nsh->arcount),
+		    (NBTNsResource *) p,
+		    pmax,
+		    &nbtarg
+		    );
+	}
+#ifdef LIBALIAS_DEBUG
+	PrintRcode(nsh->rcode);
+#endif
+	return ((p == NULL) ? -1 : 0);
+}
diff --git a/freebsd/sys/netinet/libalias/alias_pptp.c b/freebsd/sys/netinet/libalias/alias_pptp.c
new file mode 100644
index 00000000..f6c7f199
--- /dev/null
+++ b/freebsd/sys/netinet/libalias/alias_pptp.c
@@ -0,0 +1,525 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*
+ * alias_pptp.c
+ *
+ * Copyright (c) 2000 Whistle Communications, Inc.
+ * All rights reserved.
+ *
+ * Subject to the following obligations and disclaimer of warranty, use and
+ * redistribution of this software, in source or object code forms, with or
+ * without modifications are expressly permitted by Whistle Communications;
+ * provided, however, that:
+ * 1. Any and all reproductions of the source or object code must include the
+ *    copyright notice above and the following disclaimer of warranties; and
+ * 2. No rights are granted, in any manner or form, to use Whistle
+ *    Communications, Inc. trademarks, including the mark "WHISTLE
+ *    COMMUNICATIONS" on advertising, endorsements, or otherwise except as
+ *    such appears in the above copyright notice or in the software.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND
+ * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO
+ * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE,
+ * INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT.
+ * WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY
+ * REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS
+ * SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE.
+ * IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES
+ * RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING
+ * WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
+ *
+ * Author: Erik Salander <erik@whistle.com>
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/* Includes */
+#ifdef _KERNEL
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/limits.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/module.h>
+#else
+#include <freebsd/errno.h>
+#include <freebsd/limits.h>
+#include <freebsd/sys/types.h>
+#include <freebsd/stdio.h>
+#endif
+
+#include <freebsd/netinet/tcp.h>
+
+#ifdef _KERNEL
+#include <freebsd/netinet/libalias/alias.h>
+#include <freebsd/netinet/libalias/alias_local.h>
+#include <freebsd/netinet/libalias/alias_mod.h>
+#else
+#include <freebsd/local/alias.h>
+#include <freebsd/local/alias_local.h>
+#include <freebsd/local/alias_mod.h>
+#endif
+
+#define PPTP_CONTROL_PORT_NUMBER 1723
+
+static void
+AliasHandlePptpOut(struct libalias *, struct ip *, struct alias_link *);
+
+static void
+AliasHandlePptpIn(struct libalias *, struct ip *, struct alias_link *);
+
+static int
+AliasHandlePptpGreOut(struct libalias *, struct ip *);
+
+static int
+AliasHandlePptpGreIn(struct libalias *, struct ip *);
+
+static int 
+fingerprint(struct libalias *la, struct alias_data *ah)
+{
+
+	if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL)
+		return (-1);
+	if (ntohs(*ah->dport) == PPTP_CONTROL_PORT_NUMBER
+	    || ntohs(*ah->sport) == PPTP_CONTROL_PORT_NUMBER)
+		return (0);
+	return (-1);
+}
+
+static int 
+fingerprintgre(struct libalias *la, struct alias_data *ah)
+{
+
+	return (0);
+}
+
+static int 
+protohandlerin(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+	AliasHandlePptpIn(la, pip, ah->lnk);
+	return (0);
+}
+
+static int 
+protohandlerout(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+	AliasHandlePptpOut(la, pip, ah->lnk);
+	return (0);
+}
+
+static int 
+protohandlergrein(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+	if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY ||
+	    AliasHandlePptpGreIn(la, pip) == 0)
+		return (0);
+	return (-1);
+}
+
+static int 
+protohandlergreout(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+	if (AliasHandlePptpGreOut(la, pip) == 0)
+		return (0);
+	return (-1);
+}
+
+/* Kernel module definition. */
+struct proto_handler handlers[] = {
+	{ 
+	  .pri = 200, 
+	  .dir = IN, 
+	  .proto = TCP, 
+	  .fingerprint = &fingerprint, 
+	  .protohandler = &protohandlerin
+	},
+	{ 
+	  .pri = 210, 
+	  .dir = OUT, 
+	  .proto = TCP, 
+	  .fingerprint = &fingerprint, 
+	  .protohandler = &protohandlerout
+	},
+/* 
+ * WATCH OUT!!! these 2 handlers NEED a priority of INT_MAX (highest possible) 
+ * cause they will ALWAYS process packets, so they must be the last one
+ * in chain: look fingerprintgre() above.
+ */
+	{ 
+	  .pri = INT_MAX, 
+	  .dir = IN, 
+	  .proto = IP, 
+	  .fingerprint = &fingerprintgre, 
+	  .protohandler = &protohandlergrein
+	},
+	{ 
+	  .pri = INT_MAX, 
+	  .dir = OUT, 
+	  .proto = IP, 
+	  .fingerprint = &fingerprintgre, 
+	  .protohandler = &protohandlergreout
+	}, 
+	{ EOH }
+};
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+	int error;
+
+	switch (type) {
+	case MOD_LOAD:
+		error = 0;
+		LibAliasAttachHandlers(handlers);
+		break;
+	case MOD_UNLOAD:
+		error = 0;
+		LibAliasDetachHandlers(handlers);
+		break;
+	default:
+		error = EINVAL;
+	}
+	return (error);
+}
+
+#ifdef _KERNEL
+static 
+#endif
+moduledata_t alias_mod = {
+       "alias_pptp", mod_handler, NULL
+};
+
+#ifdef	_KERNEL
+DECLARE_MODULE(alias_pptp, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_pptp, 1);
+MODULE_DEPEND(alias_pptp, libalias, 1, 1, 1);
+#endif
+
+/*
+   Alias_pptp.c performs special processing for PPTP sessions under TCP.
+   Specifically, watch PPTP control messages and alias the Call ID or the
+   Peer's Call ID in the appropriate messages.  Note, PPTP requires
+   "de-aliasing" of incoming packets, this is different than any other
+   TCP applications that are currently (ie. FTP, IRC and RTSP) aliased.
+
+   For Call IDs encountered for the first time, a PPTP alias link is created.
+   The PPTP alias link uses the Call ID in place of the original port number.
+   An alias Call ID is created.
+
+   For this routine to work, the PPTP control messages must fit entirely
+   into a single TCP packet.  This is typically the case, but is not
+   required by the spec.
+
+   Unlike some of the other TCP applications that are aliased (ie. FTP,
+   IRC and RTSP), the PPTP control messages that need to be aliased are
+   guaranteed to remain the same length.  The aliased Call ID is a fixed
+   length field.
+
+   Reference: RFC 2637
+
+   Initial version:  May, 2000 (eds)
+
+*/
+
+/*
+ * PPTP definitions
+ */
+
+struct grehdr {			/* Enhanced GRE header. */
+	u_int16_t	gh_flags;	/* Flags. */
+	u_int16_t	gh_protocol;	/* Protocol type. */
+	u_int16_t	gh_length;	/* Payload length. */
+	u_int16_t	gh_call_id;	/* Call ID. */
+	u_int32_t	gh_seq_no;	/* Sequence number (optional). */
+	u_int32_t	gh_ack_no;	/* Acknowledgment number
+					 * (optional). */
+};
+typedef struct grehdr GreHdr;
+
+/* The PPTP protocol ID used in the GRE 'proto' field. */
+#define PPTP_GRE_PROTO          0x880b
+
+/* Bits that must be set a certain way in all PPTP/GRE packets. */
+#define PPTP_INIT_VALUE		((0x2001 << 16) | PPTP_GRE_PROTO)
+#define PPTP_INIT_MASK		0xef7fffff
+
+#define PPTP_MAGIC		0x1a2b3c4d
+#define PPTP_CTRL_MSG_TYPE	1
+
+enum {
+	PPTP_StartCtrlConnRequest = 1,
+	PPTP_StartCtrlConnReply = 2,
+	PPTP_StopCtrlConnRequest = 3,
+	PPTP_StopCtrlConnReply = 4,
+	PPTP_EchoRequest = 5,
+	PPTP_EchoReply = 6,
+	PPTP_OutCallRequest = 7,
+	PPTP_OutCallReply = 8,
+	PPTP_InCallRequest = 9,
+	PPTP_InCallReply = 10,
+	PPTP_InCallConn = 11,
+	PPTP_CallClearRequest = 12,
+	PPTP_CallDiscNotify = 13,
+	PPTP_WanErrorNotify = 14,
+	PPTP_SetLinkInfo = 15
+};
+
+ /* Message structures */
+struct pptpMsgHead {
+	u_int16_t	length;	/* total length */
+	u_int16_t	msgType;/* PPTP message type */
+	u_int32_t	magic;	/* magic cookie */
+	u_int16_t	type;	/* control message type */
+	u_int16_t	resv0;	/* reserved */
+};
+typedef struct pptpMsgHead *PptpMsgHead;
+
+struct pptpCodes {
+	u_int8_t	resCode;/* Result Code */
+	u_int8_t	errCode;/* Error Code */
+};
+typedef struct pptpCodes *PptpCode;
+
+struct pptpCallIds {
+	u_int16_t	cid1;	/* Call ID field #1 */
+	u_int16_t	cid2;	/* Call ID field #2 */
+};
+typedef struct pptpCallIds *PptpCallId;
+
+static PptpCallId AliasVerifyPptp(struct ip *, u_int16_t *);
+
+
+static void
+AliasHandlePptpOut(struct libalias *la,
+    struct ip *pip,		/* IP packet to examine/patch */
+    struct alias_link *lnk)
+{				/* The PPTP control link */
+	struct alias_link *pptp_lnk;
+	PptpCallId cptr;
+	PptpCode codes;
+	u_int16_t ctl_type;	/* control message type */
+	struct tcphdr *tc;
+
+	/* Verify valid PPTP control message */
+	if ((cptr = AliasVerifyPptp(pip, &ctl_type)) == NULL)
+		return;
+
+	/* Modify certain PPTP messages */
+	switch (ctl_type) {
+	case PPTP_OutCallRequest:
+	case PPTP_OutCallReply:
+	case PPTP_InCallRequest:
+	case PPTP_InCallReply:
+		/*
+		 * Establish PPTP link for address and Call ID found in
+		 * control message.
+		 */
+		pptp_lnk = AddPptp(la, GetOriginalAddress(lnk), GetDestAddress(lnk),
+		    GetAliasAddress(lnk), cptr->cid1);
+		break;
+	case PPTP_CallClearRequest:
+	case PPTP_CallDiscNotify:
+		/*
+		 * Find PPTP link for address and Call ID found in control
+		 * message.
+		 */
+		pptp_lnk = FindPptpOutByCallId(la, GetOriginalAddress(lnk),
+		    GetDestAddress(lnk),
+		    cptr->cid1);
+		break;
+	default:
+		return;
+	}
+
+	if (pptp_lnk != NULL) {
+		int accumulate = cptr->cid1;
+
+		/* alias the Call Id */
+		cptr->cid1 = GetAliasPort(pptp_lnk);
+
+		/* Compute TCP checksum for revised packet */
+		tc = (struct tcphdr *)ip_next(pip);
+		accumulate -= cptr->cid1;
+		ADJUST_CHECKSUM(accumulate, tc->th_sum);
+
+		switch (ctl_type) {
+		case PPTP_OutCallReply:
+		case PPTP_InCallReply:
+			codes = (PptpCode) (cptr + 1);
+			if (codes->resCode == 1)	/* Connection
+							 * established, */
+				SetDestCallId(pptp_lnk,	/* note the Peer's Call
+								 * ID. */
+				    cptr->cid2);
+			else
+				SetExpire(pptp_lnk, 0);	/* Connection refused. */
+			break;
+		case PPTP_CallDiscNotify:	/* Connection closed. */
+			SetExpire(pptp_lnk, 0);
+			break;
+		}
+	}
+}
+
+static void
+AliasHandlePptpIn(struct libalias *la,
+    struct ip *pip,		/* IP packet to examine/patch */
+    struct alias_link *lnk)
+{				/* The PPTP control link */
+	struct alias_link *pptp_lnk;
+	PptpCallId cptr;
+	u_int16_t *pcall_id;
+	u_int16_t ctl_type;	/* control message type */
+	struct tcphdr *tc;
+
+	/* Verify valid PPTP control message */
+	if ((cptr = AliasVerifyPptp(pip, &ctl_type)) == NULL)
+		return;
+
+	/* Modify certain PPTP messages */
+	switch (ctl_type) {
+	case PPTP_InCallConn:
+	case PPTP_WanErrorNotify:
+	case PPTP_SetLinkInfo:
+		pcall_id = &cptr->cid1;
+		break;
+	case PPTP_OutCallReply:
+	case PPTP_InCallReply:
+		pcall_id = &cptr->cid2;
+		break;
+	case PPTP_CallDiscNotify:	/* Connection closed. */
+		pptp_lnk = FindPptpInByCallId(la, GetDestAddress(lnk),
+		    GetAliasAddress(lnk),
+		    cptr->cid1);
+		if (pptp_lnk != NULL)
+			SetExpire(pptp_lnk, 0);
+		return;
+	default:
+		return;
+	}
+
+	/* Find PPTP link for address and Call ID found in PPTP Control Msg */
+	pptp_lnk = FindPptpInByPeerCallId(la, GetDestAddress(lnk),
+	    GetAliasAddress(lnk),
+	    *pcall_id);
+
+	if (pptp_lnk != NULL) {
+		int accumulate = *pcall_id;
+
+		/* De-alias the Peer's Call Id. */
+		*pcall_id = GetOriginalPort(pptp_lnk);
+
+		/* Compute TCP checksum for modified packet */
+		tc = (struct tcphdr *)ip_next(pip);
+		accumulate -= *pcall_id;
+		ADJUST_CHECKSUM(accumulate, tc->th_sum);
+
+		if (ctl_type == PPTP_OutCallReply || ctl_type == PPTP_InCallReply) {
+			PptpCode codes = (PptpCode) (cptr + 1);
+
+			if (codes->resCode == 1)	/* Connection
+							 * established, */
+				SetDestCallId(pptp_lnk,	/* note the Call ID. */
+				    cptr->cid1);
+			else
+				SetExpire(pptp_lnk, 0);	/* Connection refused. */
+		}
+	}
+}
+
+static		PptpCallId
+AliasVerifyPptp(struct ip *pip, u_int16_t * ptype)
+{				/* IP packet to examine/patch */
+	int hlen, tlen, dlen;
+	PptpMsgHead hptr;
+	struct tcphdr *tc;
+
+	/* Calculate some lengths */
+	tc = (struct tcphdr *)ip_next(pip);
+	hlen = (pip->ip_hl + tc->th_off) << 2;
+	tlen = ntohs(pip->ip_len);
+	dlen = tlen - hlen;
+
+	/* Verify data length */
+	if (dlen < (int)(sizeof(struct pptpMsgHead) + sizeof(struct pptpCallIds)))
+		return (NULL);
+
+	/* Move up to PPTP message header */
+	hptr = (PptpMsgHead) tcp_next(tc);
+
+	/* Return the control message type */
+	*ptype = ntohs(hptr->type);
+
+	/* Verify PPTP Control Message */
+	if ((ntohs(hptr->msgType) != PPTP_CTRL_MSG_TYPE) ||
+	    (ntohl(hptr->magic) != PPTP_MAGIC))
+		return (NULL);
+
+	/* Verify data length. */
+	if ((*ptype == PPTP_OutCallReply || *ptype == PPTP_InCallReply) &&
+	    (dlen < (int)(sizeof(struct pptpMsgHead) + sizeof(struct pptpCallIds) +
+		sizeof(struct pptpCodes))))
+		return (NULL);
+	else
+		return (PptpCallId) (hptr + 1);
+}
+
+static int
+AliasHandlePptpGreOut(struct libalias *la, struct ip *pip)
+{
+	GreHdr *gr;
+	struct alias_link *lnk;
+
+	gr = (GreHdr *) ip_next(pip);
+
+	/* Check GRE header bits. */
+	if ((ntohl(*((u_int32_t *) gr)) & PPTP_INIT_MASK) != PPTP_INIT_VALUE)
+		return (-1);
+
+	lnk = FindPptpOutByPeerCallId(la, pip->ip_src, pip->ip_dst, gr->gh_call_id);
+	if (lnk != NULL) {
+		struct in_addr alias_addr = GetAliasAddress(lnk);
+
+		/* Change source IP address. */
+		DifferentialChecksum(&pip->ip_sum,
+		    &alias_addr, &pip->ip_src, 2);
+		pip->ip_src = alias_addr;
+	}
+	return (0);
+}
+
+static int
+AliasHandlePptpGreIn(struct libalias *la, struct ip *pip)
+{
+	GreHdr *gr;
+	struct alias_link *lnk;
+
+	gr = (GreHdr *) ip_next(pip);
+
+	/* Check GRE header bits. */
+	if ((ntohl(*((u_int32_t *) gr)) & PPTP_INIT_MASK) != PPTP_INIT_VALUE)
+		return (-1);
+
+	lnk = FindPptpInByPeerCallId(la, pip->ip_src, pip->ip_dst, gr->gh_call_id);
+	if (lnk != NULL) {
+		struct in_addr src_addr = GetOriginalAddress(lnk);
+
+		/* De-alias the Peer's Call Id. */
+		gr->gh_call_id = GetOriginalPort(lnk);
+
+		/* Restore original IP address. */
+		DifferentialChecksum(&pip->ip_sum,
+		    &src_addr, &pip->ip_dst, 2);
+		pip->ip_dst = src_addr;
+	}
+	return (0);
+}
diff --git a/freebsd/sys/netinet/libalias/alias_proxy.c b/freebsd/sys/netinet/libalias/alias_proxy.c
new file mode 100644
index 00000000..f4f2b643
--- /dev/null
+++ b/freebsd/sys/netinet/libalias/alias_proxy.c
@@ -0,0 +1,870 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2001 Charles Mott <cm@linktel.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/* file: alias_proxy.c
+
+    This file encapsulates special operations related to transparent
+    proxy redirection.  This is where packets with a particular destination,
+    usually tcp port 80, are redirected to a proxy server.
+
+    When packets are proxied, the destination address and port are
+    modified.  In certain cases, it is necessary to somehow encode
+    the original address/port info into the packet.  Two methods are
+    presently supported: addition of a [DEST addr port] string at the
+    beginning of a tcp stream, or inclusion of an optional field
+    in the IP header.
+
+    There is one public API function:
+
+	PacketAliasProxyRule()    -- Adds and deletes proxy
+				     rules.
+
+    Rules are stored in a linear linked list, so lookup efficiency
+    won't be too good for large lists.
+
+
+    Initial development: April, 1998 (cjm)
+*/
+
+
+/* System includes */
+#ifdef _KERNEL
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/ctype.h>
+#include <freebsd/sys/libkern.h>
+#include <freebsd/sys/limits.h>
+#else
+#include <freebsd/sys/types.h>
+#include <freebsd/ctype.h>
+#include <freebsd/stdio.h>
+#include <freebsd/stdlib.h>
+#include <freebsd/netdb.h>
+#include <freebsd/string.h>
+#endif
+
+#include <freebsd/netinet/tcp.h>
+
+#ifdef _KERNEL
+#include <freebsd/netinet/libalias/alias.h>
+#include <freebsd/netinet/libalias/alias_local.h>
+#include <freebsd/netinet/libalias/alias_mod.h>
+#else
+#include <freebsd/arpa/inet.h>
+#include <freebsd/local/alias.h>		/* Public API functions for libalias */
+#include <freebsd/local/alias_local.h>	/* Functions used by alias*.c */
+#endif
+
+/*
+    Data structures
+ */
+
+/*
+ * A linked list of arbitrary length, based on struct proxy_entry is
+ * used to store proxy rules.
+ */
+struct proxy_entry {
+	struct libalias *la;
+#define PROXY_TYPE_ENCODE_NONE      1
+#define PROXY_TYPE_ENCODE_TCPSTREAM 2
+#define PROXY_TYPE_ENCODE_IPHDR     3
+	int		rule_index;
+	int		proxy_type;
+	u_char		proto;
+	u_short		proxy_port;
+	u_short		server_port;
+
+	struct in_addr	server_addr;
+
+	struct in_addr	src_addr;
+	struct in_addr	src_mask;
+
+	struct in_addr	dst_addr;
+	struct in_addr	dst_mask;
+
+	struct proxy_entry *next;
+	struct proxy_entry *last;
+};
+
+
+
+/*
+    File scope variables
+*/
+
+
+
+/* Local (static) functions:
+
+    IpMask()                 -- Utility function for creating IP
+				masks from integer (1-32) specification.
+    IpAddr()                 -- Utility function for converting string
+				to IP address
+    IpPort()                 -- Utility function for converting string
+				to port number
+    RuleAdd()                -- Adds an element to the rule list.
+    RuleDelete()             -- Removes an element from the rule list.
+    RuleNumberDelete()       -- Removes all elements from the rule list
+				having a certain rule number.
+    ProxyEncodeTcpStream()   -- Adds [DEST x.x.x.x xxxx] to the beginning
+				of a TCP stream.
+    ProxyEncodeIpHeader()    -- Adds an IP option indicating the true
+				destination of a proxied IP packet
+*/
+
+static int	IpMask(int, struct in_addr *);
+static int	IpAddr(char *, struct in_addr *);
+static int	IpPort(char *, int, int *);
+static void	RuleAdd(struct libalias *la, struct proxy_entry *);
+static void	RuleDelete(struct proxy_entry *);
+static int	RuleNumberDelete(struct libalias *la, int);
+static void	ProxyEncodeTcpStream(struct alias_link *, struct ip *, int);
+static void	ProxyEncodeIpHeader(struct ip *, int);
+
+static int
+IpMask(int nbits, struct in_addr *mask)
+{
+	int i;
+	u_int imask;
+
+	if (nbits < 0 || nbits > 32)
+		return (-1);
+
+	imask = 0;
+	for (i = 0; i < nbits; i++)
+		imask = (imask >> 1) + 0x80000000;
+	mask->s_addr = htonl(imask);
+
+	return (0);
+}
+
+static int
+IpAddr(char *s, struct in_addr *addr)
+{
+	if (inet_aton(s, addr) == 0)
+		return (-1);
+	else
+		return (0);
+}
+
+static int
+IpPort(char *s, int proto, int *port)
+{
+	int n;
+
+	n = sscanf(s, "%d", port);
+	if (n != 1)
+#ifndef _KERNEL	/* XXX: we accept only numeric ports in kernel */
+	{
+		struct servent *se;
+
+		if (proto == IPPROTO_TCP)
+			se = getservbyname(s, "tcp");
+		else if (proto == IPPROTO_UDP)
+			se = getservbyname(s, "udp");
+		else
+			return (-1);
+
+		if (se == NULL)
+			return (-1);
+
+		*port = (u_int) ntohs(se->s_port);
+	}
+#else
+		return (-1);
+#endif
+	return (0);
+}
+
+void
+RuleAdd(struct libalias *la, struct proxy_entry *entry)
+{
+	int rule_index;
+	struct proxy_entry *ptr;
+	struct proxy_entry *ptr_last;
+
+	LIBALIAS_LOCK_ASSERT(la);
+
+	if (la->proxyList == NULL) {
+		la->proxyList = entry;
+		entry->last = NULL;
+		entry->next = NULL;
+		return;
+	}
+	entry->la = la;
+
+	rule_index = entry->rule_index;
+	ptr = la->proxyList;
+	ptr_last = NULL;
+	while (ptr != NULL) {
+		if (ptr->rule_index >= rule_index) {
+			if (ptr_last == NULL) {
+				entry->next = la->proxyList;
+				entry->last = NULL;
+				la->proxyList->last = entry;
+				la->proxyList = entry;
+				return;
+			}
+			ptr_last->next = entry;
+			ptr->last = entry;
+			entry->last = ptr->last;
+			entry->next = ptr;
+			return;
+		}
+		ptr_last = ptr;
+		ptr = ptr->next;
+	}
+
+	ptr_last->next = entry;
+	entry->last = ptr_last;
+	entry->next = NULL;
+}
+
+static void
+RuleDelete(struct proxy_entry *entry)
+{
+	struct libalias *la;
+
+	la = entry->la;
+	LIBALIAS_LOCK_ASSERT(la);
+	if (entry->last != NULL)
+		entry->last->next = entry->next;
+	else
+		la->proxyList = entry->next;
+
+	if (entry->next != NULL)
+		entry->next->last = entry->last;
+
+	free(entry);
+}
+
+static int
+RuleNumberDelete(struct libalias *la, int rule_index)
+{
+	int err;
+	struct proxy_entry *ptr;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	err = -1;
+	ptr = la->proxyList;
+	while (ptr != NULL) {
+		struct proxy_entry *ptr_next;
+
+		ptr_next = ptr->next;
+		if (ptr->rule_index == rule_index) {
+			err = 0;
+			RuleDelete(ptr);
+		}
+		ptr = ptr_next;
+	}
+
+	return (err);
+}
+
+static void
+ProxyEncodeTcpStream(struct alias_link *lnk,
+    struct ip *pip,
+    int maxpacketsize)
+{
+	int slen;
+	char buffer[40];
+	struct tcphdr *tc;
+
+/* Compute pointer to tcp header */
+	tc = (struct tcphdr *)ip_next(pip);
+
+/* Don't modify if once already modified */
+
+	if (GetAckModified(lnk))
+		return;
+
+/* Translate destination address and port to string form */
+	snprintf(buffer, sizeof(buffer) - 2, "[DEST %s %d]",
+	    inet_ntoa(GetProxyAddress(lnk)), (u_int) ntohs(GetProxyPort(lnk)));
+
+/* Pad string out to a multiple of two in length */
+	slen = strlen(buffer);
+	switch (slen % 2) {
+	case 0:
+		strcat(buffer, " \n");
+		slen += 2;
+		break;
+	case 1:
+		strcat(buffer, "\n");
+		slen += 1;
+	}
+
+/* Check for packet overflow */
+	if ((int)(ntohs(pip->ip_len) + strlen(buffer)) > maxpacketsize)
+		return;
+
+/* Shift existing TCP data and insert destination string */
+	{
+		int dlen;
+		int hlen;
+		char *p;
+
+		hlen = (pip->ip_hl + tc->th_off) << 2;
+		dlen = ntohs(pip->ip_len) - hlen;
+
+/* Modify first packet that has data in it */
+
+		if (dlen == 0)
+			return;
+
+		p = (char *)pip;
+		p += hlen;
+
+		bcopy(p, p + slen, dlen);
+		memcpy(p, buffer, slen);
+	}
+
+/* Save information about modfied sequence number */
+	{
+		int delta;
+
+		SetAckModified(lnk);
+		tc = (struct tcphdr *)ip_next(pip);			
+		delta = GetDeltaSeqOut(tc->th_seq, lnk);
+		AddSeq(lnk, delta + slen, pip->ip_hl, pip->ip_len, tc->th_seq,
+		    tc->th_off);
+	}
+
+/* Update IP header packet length and checksum */
+	{
+		int accumulate;
+
+		accumulate = pip->ip_len;
+		pip->ip_len = htons(ntohs(pip->ip_len) + slen);
+		accumulate -= pip->ip_len;
+
+		ADJUST_CHECKSUM(accumulate, pip->ip_sum);
+	}
+
+/* Update TCP checksum, Use TcpChecksum since so many things have
+   already changed. */
+
+	tc->th_sum = 0;
+#ifdef _KERNEL
+	tc->th_x2 = 1;
+#else
+	tc->th_sum = TcpChecksum(pip);
+#endif
+}
+
+static void
+ProxyEncodeIpHeader(struct ip *pip,
+    int maxpacketsize)
+{
+#define OPTION_LEN_BYTES  8
+#define OPTION_LEN_INT16  4
+#define OPTION_LEN_INT32  2
+	u_char option[OPTION_LEN_BYTES];
+
+#ifdef LIBALIAS_DEBUG
+	fprintf(stdout, " ip cksum 1 = %x\n", (u_int) IpChecksum(pip));
+	fprintf(stdout, "tcp cksum 1 = %x\n", (u_int) TcpChecksum(pip));
+#endif
+
+	(void)maxpacketsize;
+
+/* Check to see that there is room to add an IP option */
+	if (pip->ip_hl > (0x0f - OPTION_LEN_INT32))
+		return;
+
+/* Build option and copy into packet */
+	{
+		u_char *ptr;
+		struct tcphdr *tc;
+
+		ptr = (u_char *) pip;
+		ptr += 20;
+		memcpy(ptr + OPTION_LEN_BYTES, ptr, ntohs(pip->ip_len) - 20);
+
+		option[0] = 0x64;	/* class: 3 (reserved), option 4 */
+		option[1] = OPTION_LEN_BYTES;
+
+		memcpy(&option[2], (u_char *) & pip->ip_dst, 4);
+
+		tc = (struct tcphdr *)ip_next(pip);
+		memcpy(&option[6], (u_char *) & tc->th_sport, 2);
+
+		memcpy(ptr, option, 8);
+	}
+
+/* Update checksum, header length and packet length */
+	{
+		int i;
+		int accumulate;
+		u_short *sptr;
+
+		sptr = (u_short *) option;
+		accumulate = 0;
+		for (i = 0; i < OPTION_LEN_INT16; i++)
+			accumulate -= *(sptr++);
+
+		sptr = (u_short *) pip;
+		accumulate += *sptr;
+		pip->ip_hl += OPTION_LEN_INT32;
+		accumulate -= *sptr;
+
+		accumulate += pip->ip_len;
+		pip->ip_len = htons(ntohs(pip->ip_len) + OPTION_LEN_BYTES);
+		accumulate -= pip->ip_len;
+
+		ADJUST_CHECKSUM(accumulate, pip->ip_sum);
+	}
+#undef OPTION_LEN_BYTES
+#undef OPTION_LEN_INT16
+#undef OPTION_LEN_INT32
+#ifdef LIBALIAS_DEBUG
+	fprintf(stdout, " ip cksum 2 = %x\n", (u_int) IpChecksum(pip));
+	fprintf(stdout, "tcp cksum 2 = %x\n", (u_int) TcpChecksum(pip));
+#endif
+}
+
+
+/* Functions by other packet alias source files
+
+    ProxyCheck()         -- Checks whether an outgoing packet should
+			    be proxied.
+    ProxyModify()        -- Encodes the original destination address/port
+			    for a packet which is to be redirected to
+			    a proxy server.
+*/
+
+int
+ProxyCheck(struct libalias *la, struct in_addr *proxy_server_addr,
+    u_short * proxy_server_port, struct in_addr src_addr, 
+    struct in_addr dst_addr, u_short dst_port, u_char ip_p)
+{
+	struct proxy_entry *ptr;
+
+	LIBALIAS_LOCK_ASSERT(la);
+
+	ptr = la->proxyList;
+	while (ptr != NULL) {
+		u_short proxy_port;
+
+		proxy_port = ptr->proxy_port;
+		if ((dst_port == proxy_port || proxy_port == 0)
+		    && ip_p == ptr->proto
+		    && src_addr.s_addr != ptr->server_addr.s_addr) {
+			struct in_addr src_addr_masked;
+			struct in_addr dst_addr_masked;
+
+			src_addr_masked.s_addr = src_addr.s_addr & ptr->src_mask.s_addr;
+			dst_addr_masked.s_addr = dst_addr.s_addr & ptr->dst_mask.s_addr;
+
+			if ((src_addr_masked.s_addr == ptr->src_addr.s_addr)
+			    && (dst_addr_masked.s_addr == ptr->dst_addr.s_addr)) {
+				if ((*proxy_server_port = ptr->server_port) == 0)
+					*proxy_server_port = dst_port;
+				*proxy_server_addr = ptr->server_addr;
+				return (ptr->proxy_type);
+			}
+		}
+		ptr = ptr->next;
+	}
+
+	return (0);
+}
+
+void
+ProxyModify(struct libalias *la, struct alias_link *lnk,
+    struct ip *pip,
+    int maxpacketsize,
+    int proxy_type)
+{
+
+	LIBALIAS_LOCK_ASSERT(la);
+	(void)la;
+
+	switch (proxy_type) {
+		case PROXY_TYPE_ENCODE_IPHDR:
+		ProxyEncodeIpHeader(pip, maxpacketsize);
+		break;
+
+	case PROXY_TYPE_ENCODE_TCPSTREAM:
+		ProxyEncodeTcpStream(lnk, pip, maxpacketsize);
+		break;
+	}
+}
+
+
+/*
+    Public API functions
+*/
+
+int
+LibAliasProxyRule(struct libalias *la, const char *cmd)
+{
+/*
+ * This function takes command strings of the form:
+ *
+ *   server <addr>[:<port>]
+ *   [port <port>]
+ *   [rule n]
+ *   [proto tcp|udp]
+ *   [src <addr>[/n]]
+ *   [dst <addr>[/n]]
+ *   [type encode_tcp_stream|encode_ip_hdr|no_encode]
+ *
+ *   delete <rule number>
+ *
+ * Subfields can be in arbitrary order.  Port numbers and addresses
+ * must be in either numeric or symbolic form. An optional rule number
+ * is used to control the order in which rules are searched.  If two
+ * rules have the same number, then search order cannot be guaranteed,
+ * and the rules should be disjoint.  If no rule number is specified,
+ * then 0 is used, and group 0 rules are always checked before any
+ * others.
+ */
+	int i, n, len, ret;
+	int cmd_len;
+	int token_count;
+	int state;
+	char *token;
+	char buffer[256];
+	char str_port[sizeof(buffer)];
+	char str_server_port[sizeof(buffer)];
+	char *res = buffer;
+
+	int rule_index;
+	int proto;
+	int proxy_type;
+	int proxy_port;
+	int server_port;
+	struct in_addr server_addr;
+	struct in_addr src_addr, src_mask;
+	struct in_addr dst_addr, dst_mask;
+	struct proxy_entry *proxy_entry;
+
+	LIBALIAS_LOCK(la);
+	ret = 0;
+/* Copy command line into a buffer */
+	cmd += strspn(cmd, " \t");
+	cmd_len = strlen(cmd);
+	if (cmd_len > (int)(sizeof(buffer) - 1)) {
+		ret = -1;
+		goto getout;
+	}
+	strcpy(buffer, cmd);
+
+/* Convert to lower case */
+	len = strlen(buffer);
+	for (i = 0; i < len; i++)
+		buffer[i] = tolower((unsigned char)buffer[i]);
+
+/* Set default proxy type */
+
+/* Set up default values */
+	rule_index = 0;
+	proxy_type = PROXY_TYPE_ENCODE_NONE;
+	proto = IPPROTO_TCP;
+	proxy_port = 0;
+	server_addr.s_addr = 0;
+	server_port = 0;
+	src_addr.s_addr = 0;
+	IpMask(0, &src_mask);
+	dst_addr.s_addr = 0;
+	IpMask(0, &dst_mask);
+
+	str_port[0] = 0;
+	str_server_port[0] = 0;
+
+/* Parse command string with state machine */
+#define STATE_READ_KEYWORD    0
+#define STATE_READ_TYPE       1
+#define STATE_READ_PORT       2
+#define STATE_READ_SERVER     3
+#define STATE_READ_RULE       4
+#define STATE_READ_DELETE     5
+#define STATE_READ_PROTO      6
+#define STATE_READ_SRC        7
+#define STATE_READ_DST        8
+	state = STATE_READ_KEYWORD;
+	token = strsep(&res, " \t");
+	token_count = 0;
+	while (token != NULL) {
+		token_count++;
+		switch (state) {
+		case STATE_READ_KEYWORD:
+			if (strcmp(token, "type") == 0)
+				state = STATE_READ_TYPE;
+			else if (strcmp(token, "port") == 0)
+				state = STATE_READ_PORT;
+			else if (strcmp(token, "server") == 0)
+				state = STATE_READ_SERVER;
+			else if (strcmp(token, "rule") == 0)
+				state = STATE_READ_RULE;
+			else if (strcmp(token, "delete") == 0)
+				state = STATE_READ_DELETE;
+			else if (strcmp(token, "proto") == 0)
+				state = STATE_READ_PROTO;
+			else if (strcmp(token, "src") == 0)
+				state = STATE_READ_SRC;
+			else if (strcmp(token, "dst") == 0)
+				state = STATE_READ_DST;
+			else {
+				ret = -1;
+				goto getout;
+			}
+			break;
+
+		case STATE_READ_TYPE:
+			if (strcmp(token, "encode_ip_hdr") == 0)
+				proxy_type = PROXY_TYPE_ENCODE_IPHDR;
+			else if (strcmp(token, "encode_tcp_stream") == 0)
+				proxy_type = PROXY_TYPE_ENCODE_TCPSTREAM;
+			else if (strcmp(token, "no_encode") == 0)
+				proxy_type = PROXY_TYPE_ENCODE_NONE;
+			else {
+				ret = -1;
+				goto getout;
+			}
+			state = STATE_READ_KEYWORD;
+			break;
+
+		case STATE_READ_PORT:
+			strcpy(str_port, token);
+			state = STATE_READ_KEYWORD;
+			break;
+
+		case STATE_READ_SERVER:
+			{
+				int err;
+				char *p;
+				char s[sizeof(buffer)];
+
+				p = token;
+				while (*p != ':' && *p != 0)
+					p++;
+
+				if (*p != ':') {
+					err = IpAddr(token, &server_addr);
+					if (err) {
+						ret = -1;
+						goto getout;
+					}
+				} else {
+					*p = ' ';
+
+					n = sscanf(token, "%s %s", s, str_server_port);
+					if (n != 2) {
+						ret = -1;
+						goto getout;
+					}
+
+					err = IpAddr(s, &server_addr);
+					if (err) {
+						ret = -1;
+						goto getout;
+					}
+				}
+			}
+			state = STATE_READ_KEYWORD;
+			break;
+
+		case STATE_READ_RULE:
+			n = sscanf(token, "%d", &rule_index);
+			if (n != 1 || rule_index < 0) {
+				ret = -1;
+				goto getout;
+			}
+			state = STATE_READ_KEYWORD;
+			break;
+
+		case STATE_READ_DELETE:
+			{
+				int err;
+				int rule_to_delete;
+
+				if (token_count != 2) {
+					ret = -1;
+					goto getout;
+				}
+
+				n = sscanf(token, "%d", &rule_to_delete);
+				if (n != 1) {
+					ret = -1;
+					goto getout;
+				}
+				err = RuleNumberDelete(la, rule_to_delete);
+				if (err)
+					ret = -1;
+				ret = 0;
+				goto getout;
+			}
+
+		case STATE_READ_PROTO:
+			if (strcmp(token, "tcp") == 0)
+				proto = IPPROTO_TCP;
+			else if (strcmp(token, "udp") == 0)
+				proto = IPPROTO_UDP;
+			else {
+				ret = -1;
+				goto getout;
+			}
+			state = STATE_READ_KEYWORD;
+			break;
+
+		case STATE_READ_SRC:
+		case STATE_READ_DST:
+			{
+				int err;
+				char *p;
+				struct in_addr mask;
+				struct in_addr addr;
+
+				p = token;
+				while (*p != '/' && *p != 0)
+					p++;
+
+				if (*p != '/') {
+					IpMask(32, &mask);
+					err = IpAddr(token, &addr);
+					if (err) {
+						ret = -1;
+						goto getout;
+					}
+				} else {
+					int nbits;
+					char s[sizeof(buffer)];
+
+					*p = ' ';
+					n = sscanf(token, "%s %d", s, &nbits);
+					if (n != 2) {
+						ret = -1;
+						goto getout;
+					}
+
+					err = IpAddr(s, &addr);
+					if (err) {
+						ret = -1;
+						goto getout;
+					}
+
+					err = IpMask(nbits, &mask);
+					if (err) {
+						ret = -1;
+						goto getout;
+					}
+				}
+
+				if (state == STATE_READ_SRC) {
+					src_addr = addr;
+					src_mask = mask;
+				} else {
+					dst_addr = addr;
+					dst_mask = mask;
+				}
+			}
+			state = STATE_READ_KEYWORD;
+			break;
+
+		default:
+			ret = -1;
+			goto getout;
+			break;
+		}
+
+		do {
+			token = strsep(&res, " \t");
+		} while (token != NULL && !*token);
+	}
+#undef STATE_READ_KEYWORD
+#undef STATE_READ_TYPE
+#undef STATE_READ_PORT
+#undef STATE_READ_SERVER
+#undef STATE_READ_RULE
+#undef STATE_READ_DELETE
+#undef STATE_READ_PROTO
+#undef STATE_READ_SRC
+#undef STATE_READ_DST
+
+/* Convert port strings to numbers.  This needs to be done after
+   the string is parsed, because the prototype might not be designated
+   before the ports (which might be symbolic entries in /etc/services) */
+
+	if (strlen(str_port) != 0) {
+		int err;
+
+		err = IpPort(str_port, proto, &proxy_port);
+		if (err) {
+			ret = -1;
+			goto getout;
+		}
+	} else {
+		proxy_port = 0;
+	}
+
+	if (strlen(str_server_port) != 0) {
+		int err;
+
+		err = IpPort(str_server_port, proto, &server_port);
+		if (err) {
+			ret = -1;
+			goto getout;
+		}
+	} else {
+		server_port = 0;
+	}
+
+/* Check that at least the server address has been defined */
+	if (server_addr.s_addr == 0) {
+		ret = -1;
+		goto getout;
+	}
+
+/* Add to linked list */
+	proxy_entry = malloc(sizeof(struct proxy_entry));
+	if (proxy_entry == NULL) {
+		ret = -1;
+		goto getout;
+	}
+
+	proxy_entry->proxy_type = proxy_type;
+	proxy_entry->rule_index = rule_index;
+	proxy_entry->proto = proto;
+	proxy_entry->proxy_port = htons(proxy_port);
+	proxy_entry->server_port = htons(server_port);
+	proxy_entry->server_addr = server_addr;
+	proxy_entry->src_addr.s_addr = src_addr.s_addr & src_mask.s_addr;
+	proxy_entry->dst_addr.s_addr = dst_addr.s_addr & dst_mask.s_addr;
+	proxy_entry->src_mask = src_mask;
+	proxy_entry->dst_mask = dst_mask;
+
+	RuleAdd(la, proxy_entry);
+
+getout:
+	LIBALIAS_UNLOCK(la);
+	return (ret);
+}
diff --git a/freebsd/sys/netinet/libalias/alias_sctp.c b/freebsd/sys/netinet/libalias/alias_sctp.c
new file mode 100644
index 00000000..cdec258c
--- /dev/null
+++ b/freebsd/sys/netinet/libalias/alias_sctp.c
@@ -0,0 +1,2700 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2008
+ * 	Swinburne University of Technology, Melbourne, Australia.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ *  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ *  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ *  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ *  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ *  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ *  SUCH DAMAGE.
+ */
+
+/*
+ * Alias_sctp forms part of the libalias kernel module to handle 
+ * Network Address Translation (NAT) for the SCTP protocol.
+ *
+ *  This software was developed by David A. Hayes and Jason But
+ *
+ * The design is outlined in CAIA technical report number  080618A
+ * (D. Hayes and J. But, "Alias_sctp Version 0.1: SCTP NAT implementation in IPFW")
+ *
+ * Development is part of the CAIA SONATA project,
+ * proposed by Jason But and Grenville Armitage:
+ * http://caia.swin.edu.au/urp/sonata/
+ *
+ * 
+ * This project has been made possible in part by a grant from
+ * the Cisco University Research Program Fund at Community
+ * Foundation Silicon Valley.
+ *
+ */
+/** @mainpage 
+ * Alias_sctp is part of the SONATA (http://caia.swin.edu.au/urp/sonata) project
+ * to develop and release a BSD licensed implementation of a Network Address
+ * Translation (NAT) module that supports the Stream Control Transmission
+ * Protocol (SCTP).
+ *
+ * Traditional address and port number look ups are inadequate for SCTP's
+ * operation due to both processing requirements and issues with multi-homing.
+ * Alias_sctp integrates with FreeBSD's ipfw/libalias NAT system.
+ *
+ * Version 0.2 features include:
+ * - Support for global multi-homing
+ * - Support for ASCONF modification from Internet Draft
+ *   (draft-stewart-behave-sctpnat-04, R. Stewart and M. Tuexen, "Stream control
+ *   transmission protocol (SCTP) network address translation," Jul. 2008) to
+ *   provide support for multi-homed privately addressed hosts
+ * - Support for forwarding of T-flagged packets
+ * - Generation and delivery of AbortM/ErrorM packets upon detection of NAT
+ *   collisions
+ * - Per-port forwarding rules
+ * - Dynamically controllable logging and statistics
+ * - Dynamic management of timers
+ * - Dynamic control of hash-table size
+ */
+
+/* $FreeBSD$ */
+
+#ifdef _KERNEL
+#include <freebsd/machine/stdarg.h>
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/netinet/libalias/alias_sctp.h>
+#include <freebsd/netinet/libalias/alias.h>
+#include <freebsd/netinet/libalias/alias_local.h>
+#include <freebsd/netinet/sctp_crc32.h>
+#include <freebsd/machine/in_cksum.h>
+#else
+#include <freebsd/local/alias_sctp.h>
+#include <freebsd/arpa/inet.h>
+#include <freebsd/local/alias.h>
+#include <freebsd/local/alias_local.h>
+#include <freebsd/machine/in_cksum.h>
+#include <freebsd/sys/libkern.h>
+#endif //#ifdef _KERNEL
+
+/* ----------------------------------------------------------------------
+ *                          FUNCTION PROTOTYPES
+ * ----------------------------------------------------------------------
+ */
+/* Packet Parsing Functions */
+static int sctp_PktParser(struct libalias *la, int direction, struct ip *pip,
+    struct sctp_nat_msg *sm, struct sctp_nat_assoc **passoc);
+static int GetAsconfVtags(struct libalias *la, struct sctp_nat_msg *sm,
+    uint32_t *l_vtag, uint32_t *g_vtag, int direction);
+static int IsASCONFack(struct libalias *la, struct sctp_nat_msg *sm, int direction);
+
+static void AddGlobalIPAddresses(struct sctp_nat_msg *sm, struct sctp_nat_assoc *assoc, int direction);
+static int  Add_Global_Address_to_List(struct sctp_nat_assoc *assoc,  struct sctp_GlobalAddress *G_addr);
+static void RmGlobalIPAddresses(struct sctp_nat_msg *sm, struct sctp_nat_assoc *assoc, int direction);
+static int IsADDorDEL(struct libalias *la, struct sctp_nat_msg *sm, int direction);
+
+/* State Machine Functions */
+static int ProcessSctpMsg(struct libalias *la, int direction, \
+    struct sctp_nat_msg *sm, struct sctp_nat_assoc *assoc);
+
+static int ID_process(struct libalias *la, int direction,\
+    struct sctp_nat_assoc *assoc, struct sctp_nat_msg *sm);
+static int INi_process(struct libalias *la, int direction,\
+    struct sctp_nat_assoc *assoc, struct sctp_nat_msg *sm);
+static int INa_process(struct libalias *la, int direction,\
+    struct sctp_nat_assoc *assoc, struct sctp_nat_msg *sm);
+static int UP_process(struct libalias *la, int direction,\
+    struct sctp_nat_assoc *assoc, struct sctp_nat_msg *sm);
+static int CL_process(struct libalias *la, int direction,\
+    struct sctp_nat_assoc *assoc, struct sctp_nat_msg *sm);
+static void TxAbortErrorM(struct libalias *la,  struct sctp_nat_msg *sm,\
+    struct sctp_nat_assoc *assoc, int sndrply, int direction);
+
+/* Hash Table Functions */
+static struct sctp_nat_assoc*
+FindSctpLocal(struct libalias *la, struct in_addr l_addr, struct in_addr g_addr, uint32_t l_vtag, uint16_t l_port, uint16_t g_port);
+static struct sctp_nat_assoc*
+FindSctpGlobal(struct libalias *la, struct in_addr g_addr, uint32_t g_vtag, uint16_t g_port, uint16_t l_port, int *partial_match);
+static struct sctp_nat_assoc*
+FindSctpGlobalClash(struct libalias *la,  struct sctp_nat_assoc *Cassoc);
+static struct sctp_nat_assoc*
+FindSctpLocalT(struct libalias *la,  struct in_addr g_addr, uint32_t l_vtag, uint16_t g_port, uint16_t l_port);
+static struct sctp_nat_assoc*
+FindSctpGlobalT(struct libalias *la, struct in_addr g_addr, uint32_t g_vtag, uint16_t l_port, uint16_t g_port);
+
+static int AddSctpAssocLocal(struct libalias *la, struct sctp_nat_assoc *assoc, struct in_addr g_addr);
+static int AddSctpAssocGlobal(struct libalias *la, struct sctp_nat_assoc *assoc);
+static void RmSctpAssoc(struct libalias *la, struct sctp_nat_assoc *assoc);
+static void freeGlobalAddressList(struct sctp_nat_assoc *assoc);
+
+/* Timer Queue Functions */
+static void sctp_AddTimeOut(struct libalias *la, struct sctp_nat_assoc *assoc);
+static void sctp_RmTimeOut(struct libalias *la, struct sctp_nat_assoc *assoc);
+static void sctp_ResetTimeOut(struct libalias *la, struct sctp_nat_assoc *assoc, int newexp);
+void sctp_CheckTimers(struct libalias *la);
+
+
+/* Logging Functions */
+static void logsctperror(char* errormsg, uint32_t vtag, int error, int direction);
+static void logsctpparse(int direction, struct sctp_nat_msg *sm);
+static void logsctpassoc(struct sctp_nat_assoc *assoc, char *s);
+static void logTimerQ(struct libalias *la);
+static void logSctpGlobal(struct libalias *la);
+static void logSctpLocal(struct libalias *la);
+#ifdef _KERNEL
+static void SctpAliasLog(const char *format, ...);
+#endif
+
+/** @defgroup external External code changes and modifications
+ *
+ * Some changes have been made to files external to alias_sctp.(c|h). These
+ * changes are primarily due to code needing to call static functions within
+ * those files or to perform extra functionality that can only be performed
+ * within these files.
+ */
+/** @ingroup external
+ * @brief Log current statistics for the libalias instance
+ *
+ * This function is defined in alias_db.c, since it calls static functions in
+ * this file
+ *
+ * Calls the higher level ShowAliasStats() in alias_db.c which logs all current
+ * statistics about the libalias instance - including SCTP statistics
+ * 
+ * @param la Pointer to the libalias instance
+ */
+void SctpShowAliasStats(struct libalias *la);
+
+#ifdef	_KERNEL
+
+MALLOC_DEFINE(M_SCTPNAT, "sctpnat", "sctp nat dbs");
+/* Use kernel allocator. */
+#ifdef _SYS_MALLOC_HH_
+#define	sn_malloc(x)	malloc(x, M_SCTPNAT, M_NOWAIT|M_ZERO)
+#define	sn_calloc(n,x)	sn_malloc(x * n)
+#define	sn_free(x)	free(x, M_SCTPNAT)
+#endif// #ifdef _SYS_MALLOC_HH_
+
+#else //#ifdef	_KERNEL
+#define	sn_malloc(x)	malloc(x)
+#define	sn_calloc(n, x)	calloc(n, x)
+#define	sn_free(x)	free(x)
+
+#endif //#ifdef	_KERNEL
+
+/** @defgroup packet_parser SCTP Packet Parsing
+ *
+ * Macros to:
+ * - Return pointers to the first and next SCTP chunks within an SCTP Packet
+ * - Define possible return values of the packet parsing process
+ * - SCTP message types for storing in the sctp_nat_msg structure @{
+ */
+
+#define SN_SCTP_FIRSTCHUNK(sctphead)	(struct sctp_chunkhdr *)(((char *)sctphead) + sizeof(struct sctphdr))
+/**< Returns a pointer to the first chunk in an SCTP packet given a pointer to the SCTP header */
+
+#define SN_SCTP_NEXTCHUNK(chunkhead)	(struct sctp_chunkhdr *)(((char *)chunkhead) + SCTP_SIZE32(ntohs(chunkhead->chunk_length)))
+/**< Returns a pointer to the next chunk in an SCTP packet given a pointer to the current chunk */
+
+#define SN_SCTP_NEXTPARAM(param)	(struct sctp_paramhdr *)(((char *)param) + SCTP_SIZE32(ntohs(param->param_length)))
+/**< Returns a pointer to the next parameter in an SCTP packet given a pointer to the current parameter */
+
+#define SN_MIN_CHUNK_SIZE        4    /**< Smallest possible SCTP chunk size in bytes */
+#define SN_MIN_PARAM_SIZE        4    /**< Smallest possible SCTP param size in bytes */
+#define SN_VTAG_PARAM_SIZE      12    /**< Size of  SCTP ASCONF vtag param in bytes */
+#define SN_ASCONFACK_PARAM_SIZE  8    /**< Size of  SCTP ASCONF ACK param in bytes */
+
+/* Packet parsing return codes */
+#define SN_PARSE_OK                  0    /**< Packet parsed for SCTP messages */
+#define SN_PARSE_ERROR_IPSHL         1    /**< Packet parsing error - IP and SCTP common header len */
+#define SN_PARSE_ERROR_AS_MALLOC     2    /**< Packet parsing error - assoc malloc */
+#define SN_PARSE_ERROR_CHHL          3    /**< Packet parsing error - Chunk header len */
+#define SN_PARSE_ERROR_DIR           4    /**< Packet parsing error - Direction */
+#define SN_PARSE_ERROR_VTAG          5    /**< Packet parsing error - Vtag */
+#define SN_PARSE_ERROR_CHUNK         6    /**< Packet parsing error - Chunk */
+#define SN_PARSE_ERROR_PORT          7    /**< Packet parsing error - Port=0 */
+#define SN_PARSE_ERROR_LOOKUP        8    /**< Packet parsing error - Lookup */
+#define SN_PARSE_ERROR_PARTIALLOOKUP 9    /**< Packet parsing error - partial lookup only found */
+#define SN_PARSE_ERROR_LOOKUP_ABORT  10   /**< Packet parsing error - Lookup - but abort packet */
+
+/* Alias_sctp performs its processing based on a number of key messages */
+#define SN_SCTP_ABORT       0x0000    /**< a packet containing an ABORT chunk */
+#define SN_SCTP_INIT        0x0001    /**< a packet containing an INIT chunk */
+#define SN_SCTP_INITACK     0x0002    /**< a packet containing an INIT-ACK chunk */
+#define SN_SCTP_SHUTCOMP    0x0010    /**< a packet containing a SHUTDOWN-COMPLETE chunk */
+#define SN_SCTP_SHUTACK     0x0020    /**< a packet containing a SHUTDOWN-ACK chunk */
+#define SN_SCTP_ASCONF      0x0100    /**< a packet containing an ASCONF chunk */
+#define SN_SCTP_ASCONFACK   0x0200    /**< a packet containing an ASCONF-ACK chunk */
+#define SN_SCTP_OTHER       0xFFFF    /**< a packet containing a chunk that is not of interest */
+
+/** @}
+ * @defgroup state_machine SCTP NAT State Machine
+ *
+ * Defines the various states an association can be within the NAT @{
+ */
+#define SN_ID  0x0000		/**< Idle state */
+#define SN_INi 0x0010		/**< Initialising, waiting for InitAck state */
+#define SN_INa 0x0020		/**< Initialising, waiting for AddIpAck state */
+#define SN_UP  0x0100		/**< Association in UP state */
+#define SN_CL  0x1000		/**< Closing state */
+#define SN_RM  0x2000		/**< Removing state */
+
+/** @}
+ * @defgroup Logging Logging Functionality
+ *
+ * Define various log levels and a macro to call specified log functions only if
+ * the current log level (sysctl_log_level) matches the specified level @{
+ */
+#define	SN_LOG_LOW	  0	
+#define SN_LOG_EVENT      1
+#define	SN_LOG_INFO	  2
+#define	SN_LOG_DETAIL	  3
+#define	SN_LOG_DEBUG	  4
+#define	SN_LOG_DEBUG_MAX  5
+
+#define	SN_LOG(level, action)	if (sysctl_log_level >= level) { action; } /**< Perform log action ONLY if the current log level meets the specified log level */
+
+/** @}
+ * @defgroup Hash Hash Table Macros and Functions
+ *
+ * Defines minimum/maximum/default values for the hash table size @{
+ */
+#define SN_MIN_HASH_SIZE        101   /**< Minimum hash table size (set to stop users choosing stupid values) */
+#define SN_MAX_HASH_SIZE    1000001   /**< Maximum hash table size (NB must be less than max int) */
+#define SN_DEFAULT_HASH_SIZE   2003   /**< A reasonable default size for the hash tables */
+
+#define SN_LOCAL_TBL           0x01   /**< assoc in local table */
+#define SN_GLOBAL_TBL          0x02   /**< assoc in global table */
+#define SN_BOTH_TBL            0x03   /**< assoc in both tables */
+#define SN_WAIT_TOLOCAL        0x10   /**< assoc waiting for TOLOCAL asconf ACK*/
+#define SN_WAIT_TOGLOBAL       0x20   /**< assoc waiting for TOLOCAL asconf ACK*/
+#define SN_NULL_TBL            0x00   /**< assoc in No table */
+#define SN_MAX_GLOBAL_ADDRESSES 100   /**< absolute maximum global address count*/
+
+#define SN_ADD_OK                 0   /**< Association added to the table */
+#define SN_ADD_CLASH              1   /**< Clash when trying to add the assoc. info to the table */
+
+#define SN_TABLE_HASH(vtag, port, size) (((u_int) vtag + (u_int) port) % (u_int) size) /**< Calculate the hash table lookup position */
+
+/** @}
+ * @defgroup Timer Timer Queue Macros and Functions
+ *
+ * Timer macros set minimum/maximum timeout values and calculate timer expiry
+ * times for the provided libalias instance @{
+ */
+#define SN_MIN_TIMER 1
+#define SN_MAX_TIMER 600
+#define SN_TIMER_QUEUE_SIZE SN_MAX_TIMER+2
+
+#define SN_I_T(la) (la->timeStamp + sysctl_init_timer)       /**< INIT State expiration time in seconds */
+#define SN_U_T(la) (la->timeStamp + sysctl_up_timer)         /**< UP State expiration time in seconds */
+#define SN_C_T(la) (la->timeStamp + sysctl_shutdown_timer)   /**< CL State expiration time in seconds */
+#define SN_X_T(la) (la->timeStamp + sysctl_holddown_timer)   /**< Wait after a shutdown complete in seconds */
+
+/** @}
+ * @defgroup sysctl SysCtl Variable and callback function declarations
+ *
+ * Sysctl variables to modify NAT functionality in real-time along with associated functions
+ * to manage modifications to the sysctl variables @{
+ */
+
+/* Callbacks */
+int sysctl_chg_loglevel(SYSCTL_HANDLER_ARGS);
+int sysctl_chg_timer(SYSCTL_HANDLER_ARGS);
+int sysctl_chg_hashtable_size(SYSCTL_HANDLER_ARGS);
+int sysctl_chg_error_on_ootb(SYSCTL_HANDLER_ARGS);
+int sysctl_chg_accept_global_ootb_addip(SYSCTL_HANDLER_ARGS);
+int sysctl_chg_initialising_chunk_proc_limit(SYSCTL_HANDLER_ARGS);
+int sysctl_chg_chunk_proc_limit(SYSCTL_HANDLER_ARGS);
+int sysctl_chg_param_proc_limit(SYSCTL_HANDLER_ARGS);
+int sysctl_chg_track_global_addresses(SYSCTL_HANDLER_ARGS);
+
+/* Sysctl variables */
+/** @brief net.inet.ip.alias.sctp.log_level */
+static u_int sysctl_log_level = 0; /**< Stores the current level of logging */
+/** @brief net.inet.ip.alias.sctp.init_timer */
+static u_int sysctl_init_timer = 15; /**< Seconds to hold an association in the table waiting for an INIT-ACK or AddIP-ACK */
+/** @brief net.inet.ip.alias.sctp.up_timer */
+static u_int sysctl_up_timer = 300; /**< Seconds to hold an association in the table while no packets are transmitted */
+/** @brief net.inet.ip.alias.sctp.shutdown_timer */
+static u_int sysctl_shutdown_timer = 15; /**< Seconds to hold an association in the table waiting for a SHUTDOWN-COMPLETE */
+/** @brief net.inet.ip.alias.sctp.holddown_timer */
+static u_int sysctl_holddown_timer = 0; /**< Seconds to hold an association in the table after it has been shutdown (to allow for lost SHUTDOWN-COMPLETEs) */
+/** @brief net.inet.ip.alias.sctp.hashtable_size */
+static u_int sysctl_hashtable_size = SN_DEFAULT_HASH_SIZE; /**< Sets the hash table size for any NEW NAT instances (existing instances retain their existing Hash Table */
+/** @brief net.inet.ip.alias.sctp.error_on_ootb */
+static u_int sysctl_error_on_ootb = 1; /**< NAT response  to receipt of OOTB packet
+					  (0 - No response, 1 - NAT will send ErrorM only to local side,
+					  2 -  NAT will send local ErrorM and global ErrorM if there was a partial association match
+					  3 - NAT will send ErrorM to both local and global) */
+/** @brief net.inet.ip.alias.sctp.accept_global_ootb_addip */
+static u_int sysctl_accept_global_ootb_addip = 0; /**<NAT responset to receipt of global OOTB AddIP (0 - No response, 1 - NAT will accept OOTB global AddIP messages for processing (Security risk)) */
+/** @brief net.inet.ip.alias.sctp.initialising_chunk_proc_limit */
+static u_int sysctl_initialising_chunk_proc_limit = 2; /**< A limit on the number of chunks that should be searched if there is no matching association (DoS prevention) */
+/** @brief net.inet.ip.alias.sctp.param_proc_limit */
+static u_int sysctl_chunk_proc_limit = 5; /**< A limit on the number of chunks that should be searched (DoS prevention) */
+/** @brief net.inet.ip.alias.sctp.param_proc_limit */
+static u_int sysctl_param_proc_limit = 25; /**< A limit on the number of parameters (in chunks) that should be searched (DoS prevention) */
+/** @brief net.inet.ip.alias.sctp.track_global_addresses */
+static u_int sysctl_track_global_addresses = 0; /**< Configures the global address tracking option within the NAT (0 - Global tracking is disabled, > 0 - enables tracking but limits the number of global IP addresses to this value)
+						   If set to >=1 the NAT will track that many global IP addresses. This may reduce look up table conflicts, but increases processing */
+
+#define SN_NO_ERROR_ON_OOTB              0 /**< Send no errorM on out of the blue packets */
+#define SN_LOCAL_ERROR_ON_OOTB           1 /**< Send only local errorM on out of the blue packets */
+#define SN_LOCALandPARTIAL_ERROR_ON_OOTB 2 /**< Send local errorM and global errorM for out of the blue packets only if partial match found */
+#define SN_ERROR_ON_OOTB                 3 /**< Send errorM on out of the blue packets */
+
+#ifdef SYSCTL_NODE
+
+SYSCTL_DECL(_net_inet);
+SYSCTL_DECL(_net_inet_ip);
+SYSCTL_DECL(_net_inet_ip_alias);
+
+SYSCTL_NODE(_net_inet_ip_alias, OID_AUTO, sctp, CTLFLAG_RW, NULL, "SCTP NAT");
+
+SYSCTL_PROC(_net_inet_ip_alias_sctp, OID_AUTO, log_level, CTLTYPE_UINT | CTLFLAG_RW,
+    &sysctl_log_level, 0, sysctl_chg_loglevel, "IU",
+    "Level of detail (0 - default, 1 - event, 2 - info, 3 - detail, 4 - debug, 5 - max debug)");
+SYSCTL_PROC(_net_inet_ip_alias_sctp, OID_AUTO, init_timer, CTLTYPE_UINT | CTLFLAG_RW,
+    &sysctl_init_timer, 0, sysctl_chg_timer, "IU",
+    "Timeout value (s) while waiting for (INIT-ACK|AddIP-ACK)");
+SYSCTL_PROC(_net_inet_ip_alias_sctp, OID_AUTO, up_timer, CTLTYPE_UINT | CTLFLAG_RW,
+    &sysctl_up_timer, 0, sysctl_chg_timer, "IU",
+    "Timeout value (s) to keep an association up with no traffic");
+SYSCTL_PROC(_net_inet_ip_alias_sctp, OID_AUTO, shutdown_timer, CTLTYPE_UINT | CTLFLAG_RW,
+    &sysctl_shutdown_timer, 0, sysctl_chg_timer, "IU",
+    "Timeout value (s) while waiting for SHUTDOWN-COMPLETE");
+SYSCTL_PROC(_net_inet_ip_alias_sctp, OID_AUTO, holddown_timer, CTLTYPE_UINT | CTLFLAG_RW,
+    &sysctl_holddown_timer, 0, sysctl_chg_timer, "IU",
+    "Hold association in table for this many seconds after receiving a SHUTDOWN-COMPLETE");
+SYSCTL_PROC(_net_inet_ip_alias_sctp, OID_AUTO, hashtable_size, CTLTYPE_UINT | CTLFLAG_RW,
+    &sysctl_hashtable_size, 0, sysctl_chg_hashtable_size, "IU",
+    "Size of hash tables used for NAT lookups (100 < prime_number > 1000001)");
+SYSCTL_PROC(_net_inet_ip_alias_sctp, OID_AUTO, error_on_ootb, CTLTYPE_UINT | CTLFLAG_RW,
+    &sysctl_error_on_ootb, 0, sysctl_chg_error_on_ootb, "IU",
+    "ErrorM sent on receipt of ootb packet:\n\t0 - none,\n\t1 - to local only,\n\t2 - to local and global if a partial association match,\n\t3 - to local and global (DoS risk)");
+SYSCTL_PROC(_net_inet_ip_alias_sctp, OID_AUTO, accept_global_ootb_addip, CTLTYPE_UINT | CTLFLAG_RW,
+    &sysctl_accept_global_ootb_addip, 0, sysctl_chg_accept_global_ootb_addip, "IU",
+    "NAT response to receipt of global OOTB AddIP:\n\t0 - No response,\n\t1 - NAT will accept OOTB global AddIP messages for processing (Security risk)");
+SYSCTL_PROC(_net_inet_ip_alias_sctp, OID_AUTO, initialising_chunk_proc_limit, CTLTYPE_UINT | CTLFLAG_RW,
+    &sysctl_initialising_chunk_proc_limit, 0, sysctl_chg_initialising_chunk_proc_limit, "IU",
+    "Number of chunks that should be processed if there is no current association found:\n\t > 0 (A high value is a DoS risk)");
+SYSCTL_PROC(_net_inet_ip_alias_sctp, OID_AUTO, chunk_proc_limit, CTLTYPE_UINT | CTLFLAG_RW,
+    &sysctl_chunk_proc_limit, 0, sysctl_chg_chunk_proc_limit, "IU",
+    "Number of chunks that should be processed to find key chunk:\n\t>= initialising_chunk_proc_limit (A high value is a DoS risk)");
+SYSCTL_PROC(_net_inet_ip_alias_sctp, OID_AUTO, param_proc_limit, CTLTYPE_UINT | CTLFLAG_RW,
+    &sysctl_param_proc_limit, 0, sysctl_chg_param_proc_limit, "IU",
+    "Number of parameters (in a chunk) that should be processed to find key parameters:\n\t> 1 (A high value is a DoS risk)");
+SYSCTL_PROC(_net_inet_ip_alias_sctp, OID_AUTO, track_global_addresses, CTLTYPE_UINT | CTLFLAG_RW,
+    &sysctl_track_global_addresses, 0, sysctl_chg_track_global_addresses, "IU",
+    "Configures the global address tracking option within the NAT:\n\t0 - Global tracking is disabled,\n\t> 0 - enables tracking but limits the number of global IP addresses to this value");
+
+#endif /* SYSCTL_NODE */
+
+/** @}
+ * @ingroup sysctl
+ * @brief sysctl callback for changing net.inet.ip.fw.sctp.log_level
+ *
+ * Updates the variable sysctl_log_level to the provided value and ensures
+ * it is in the valid range (SN_LOG_LOW -> SN_LOG_DEBUG)
+ */
+int sysctl_chg_loglevel(SYSCTL_HANDLER_ARGS)
+{
+	u_int level = *(u_int *)arg1;
+	int error;
+
+	error = sysctl_handle_int(oidp, &level, 0, req);
+	if (error) return (error);
+
+	sysctl_log_level = (level > SN_LOG_DEBUG_MAX)?(SN_LOG_DEBUG_MAX):(level);
+	sysctl_log_level = (level < SN_LOG_LOW)?(SN_LOG_LOW):(level);
+
+	return (0);
+}
+
+/** @ingroup sysctl
+ * @brief sysctl callback for changing net.inet.ip.fw.sctp.(init_timer|up_timer|shutdown_timer)
+ *
+ * Updates the timer-based sysctl variables. The new values are sanity-checked
+ * to make sure that they are within the range SN_MIN_TIMER-SN_MAX_TIMER. The
+ * holddown timer is allowed to be 0
+ */
+int sysctl_chg_timer(SYSCTL_HANDLER_ARGS)
+{
+	u_int timer = *(u_int *)arg1;
+	int error;
+
+	error = sysctl_handle_int(oidp, &timer, 0, req);
+	if (error) return (error);
+
+	timer = (timer > SN_MAX_TIMER)?(SN_MAX_TIMER):(timer);
+
+	if (((u_int *)arg1) != &sysctl_holddown_timer)
+	    {
+		    timer = (timer < SN_MIN_TIMER)?(SN_MIN_TIMER):(timer);
+	    }
+
+	*(u_int *)arg1 = timer;
+
+	return (0);
+}
+
+/** @ingroup sysctl
+ * @brief sysctl callback for changing net.inet.ip.alias.sctp.hashtable_size
+ *
+ * Updates the hashtable_size sysctl variable. The new value should be a prime
+ * number.  We sanity check to ensure that the size is within the range
+ * SN_MIN_HASH_SIZE-SN_MAX_HASH_SIZE. We then check the provided number to see
+ * if it is prime. We approximate by checking that (2,3,5,7,11) are not factors,
+ * incrementing the user provided value until we find a suitable number.
+ */
+int sysctl_chg_hashtable_size(SYSCTL_HANDLER_ARGS)
+{
+	u_int size = *(u_int *)arg1;
+	int error;
+
+	error = sysctl_handle_int(oidp, &size, 0, req);
+	if (error) return (error);
+
+	size = (size < SN_MIN_HASH_SIZE)?(SN_MIN_HASH_SIZE):((size > SN_MAX_HASH_SIZE)?(SN_MAX_HASH_SIZE):(size));
+
+	size |= 0x00000001; /* make odd */
+
+	for(;(((size % 3) == 0) || ((size % 5) == 0) || ((size % 7) == 0) || ((size % 11) == 0)); size+=2);
+	sysctl_hashtable_size = size;
+
+	return (0);
+}
+
+/** @ingroup sysctl
+ * @brief sysctl callback for changing net.inet.ip.alias.sctp.error_on_ootb
+ *
+ * Updates the error_on_clash sysctl variable. 
+ * If set to 0, no ErrorM will be sent if there is a look up table clash
+ * If set to 1, an ErrorM is sent only to the local side
+ * If set to 2, an ErrorM is sent to the local side and global side if there is
+ *                                                  a partial association match
+ * If set to 3, an ErrorM is sent to both local and global sides (DoS) risk.
+ */
+int sysctl_chg_error_on_ootb(SYSCTL_HANDLER_ARGS)
+{
+	u_int flag = *(u_int *)arg1;
+	int error;
+
+	error = sysctl_handle_int(oidp, &flag, 0, req);
+	if (error) return (error);
+
+	sysctl_error_on_ootb = (flag > SN_ERROR_ON_OOTB) ? SN_ERROR_ON_OOTB: flag;
+
+	return (0);
+}
+
+/** @ingroup sysctl
+ * @brief sysctl callback for changing net.inet.ip.alias.sctp.accept_global_ootb_addip
+ *
+ * If set to 1 the NAT will accept ootb global addip messages for processing (Security risk)
+ * Default is 0, only responding to local ootb AddIP messages
+ */
+int sysctl_chg_accept_global_ootb_addip(SYSCTL_HANDLER_ARGS)
+{
+	u_int flag = *(u_int *)arg1;
+	int error;
+
+	error = sysctl_handle_int(oidp, &flag, 0, req);
+	if (error) return (error);
+
+	sysctl_accept_global_ootb_addip = (flag == 1) ? 1: 0;
+
+	return (0);
+}
+
+/** @ingroup sysctl
+ * @brief sysctl callback for changing net.inet.ip.alias.sctp.initialising_chunk_proc_limit
+ *
+ * Updates the initialising_chunk_proc_limit sysctl variable.  Number of chunks
+ * that should be processed if there is no current association found: > 0 (A
+ * high value is a DoS risk)
+ */
+int sysctl_chg_initialising_chunk_proc_limit(SYSCTL_HANDLER_ARGS)
+{
+	u_int proclimit = *(u_int *)arg1;
+	int error;
+
+	error = sysctl_handle_int(oidp, &proclimit, 0, req);
+	if (error) return (error);
+
+	sysctl_initialising_chunk_proc_limit = (proclimit < 1) ? 1: proclimit;
+	sysctl_chunk_proc_limit = 
+		(sysctl_chunk_proc_limit < sysctl_initialising_chunk_proc_limit) ? sysctl_initialising_chunk_proc_limit : sysctl_chunk_proc_limit;
+ 
+	return (0);
+}
+
+/** @ingroup sysctl
+ * @brief sysctl callback for changing net.inet.ip.alias.sctp.chunk_proc_limit
+ *
+ * Updates the chunk_proc_limit sysctl variable. 
+ * Number of chunks that should be processed to find key chunk:
+ *  >= initialising_chunk_proc_limit (A high value is a DoS risk)
+ */
+int sysctl_chg_chunk_proc_limit(SYSCTL_HANDLER_ARGS)
+{
+	u_int proclimit = *(u_int *)arg1;
+	int error;
+
+	error = sysctl_handle_int(oidp, &proclimit, 0, req);
+	if (error) return (error);
+
+	sysctl_chunk_proc_limit = 
+		(proclimit < sysctl_initialising_chunk_proc_limit) ? sysctl_initialising_chunk_proc_limit : proclimit;
+
+	return (0);
+}
+
+
+/** @ingroup sysctl
+ * @brief sysctl callback for changing net.inet.ip.alias.sctp.param_proc_limit
+ *
+ * Updates the param_proc_limit sysctl variable. 
+ * Number of parameters that should be processed to find key parameters:
+ *  > 1 (A high value is a DoS risk)
+ */
+int sysctl_chg_param_proc_limit(SYSCTL_HANDLER_ARGS)
+{
+	u_int proclimit = *(u_int *)arg1;
+	int error;
+
+	error = sysctl_handle_int(oidp, &proclimit, 0, req);
+	if (error) return (error);
+
+	sysctl_param_proc_limit = 
+		(proclimit < 2) ? 2 : proclimit;
+
+	return (0);
+}
+
+/** @ingroup sysctl
+ * @brief sysctl callback for changing net.inet.ip.alias.sctp.track_global_addresses
+ *
+ *Configures the global address tracking option within the NAT (0 - Global
+ *tracking is disabled, > 0 - enables tracking but limits the number of global
+ *IP addresses to this value)
+ */
+int sysctl_chg_track_global_addresses(SYSCTL_HANDLER_ARGS)
+{
+	u_int num_to_track = *(u_int *)arg1;
+	int error;
+
+	error = sysctl_handle_int(oidp, &num_to_track, 0, req);
+	if (error) return (error);
+
+	sysctl_track_global_addresses = (num_to_track > SN_MAX_GLOBAL_ADDRESSES) ? SN_MAX_GLOBAL_ADDRESSES : num_to_track;
+
+	return (0);
+}
+
+
+/* ----------------------------------------------------------------------
+ *                            CODE BEGINS HERE
+ * ----------------------------------------------------------------------
+ */
+/** 
+ * @brief Initialises the SCTP NAT Implementation
+ * 
+ * Creates the look-up tables and the timer queue and initialises all state
+ * variables
+ *
+ * @param la Pointer to the relevant libalias instance
+ */
+void AliasSctpInit(struct libalias *la)
+{
+	/* Initialise association tables*/
+	int i;
+	la->sctpNatTableSize = sysctl_hashtable_size;
+	SN_LOG(SN_LOG_EVENT,
+	    SctpAliasLog("Initialising SCTP NAT Instance (hash_table_size:%d)\n", la->sctpNatTableSize));
+	la->sctpTableLocal = sn_calloc(la->sctpNatTableSize, sizeof(struct sctpNatTableL));
+	la->sctpTableGlobal = sn_calloc(la->sctpNatTableSize, sizeof(struct sctpNatTableG));
+	la->sctpNatTimer.TimerQ = sn_calloc(SN_TIMER_QUEUE_SIZE, sizeof(struct sctpTimerQ));
+	/* Initialise hash table */
+	for (i = 0; i < la->sctpNatTableSize; i++) {
+		LIST_INIT(&la->sctpTableLocal[i]);
+		LIST_INIT(&la->sctpTableGlobal[i]);
+	}
+
+	/* Initialise circular timer Q*/
+	for (i = 0; i < SN_TIMER_QUEUE_SIZE; i++)
+		LIST_INIT(&la->sctpNatTimer.TimerQ[i]);
+#ifdef _KERNEL
+	la->sctpNatTimer.loc_time=time_uptime; /* la->timeStamp is not set yet */
+#else
+	la->sctpNatTimer.loc_time=la->timeStamp;
+#endif
+	la->sctpNatTimer.cur_loc = 0;
+	la->sctpLinkCount = 0;
+}
+
+/** 
+ * @brief Cleans-up the SCTP NAT Implementation prior to unloading
+ *
+ * Removes all entries from the timer queue, freeing associations as it goes.
+ * We then free memory allocated to the look-up tables and the time queue
+ *
+ * NOTE: We do not need to traverse the look-up tables as each association
+ *       will always have an entry in the timer queue, freeing this memory
+ *       once will free all memory allocated to entries in the look-up tables
+ *
+ * @param la Pointer to the relevant libalias instance
+ */
+void AliasSctpTerm(struct libalias *la)
+{
+	struct sctp_nat_assoc *assoc1, *assoc2;
+	int                   i;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	SN_LOG(SN_LOG_EVENT,
+	    SctpAliasLog("Removing SCTP NAT Instance\n"));
+	for (i = 0; i < SN_TIMER_QUEUE_SIZE; i++) {
+		assoc1 = LIST_FIRST(&la->sctpNatTimer.TimerQ[i]);
+		while (assoc1 != NULL) {
+			freeGlobalAddressList(assoc1);
+			assoc2 = LIST_NEXT(assoc1, timer_Q);
+			sn_free(assoc1);
+			assoc1 = assoc2;
+		}
+	}
+
+	sn_free(la->sctpTableLocal);
+	sn_free(la->sctpTableGlobal);
+	sn_free(la->sctpNatTimer.TimerQ);
+}
+
+/**
+ * @brief Handles SCTP packets passed from libalias
+ *
+ * This function needs to actually NAT/drop packets and possibly create and
+ * send AbortM or ErrorM packets in response. The process involves:
+ * - Validating the direction parameter passed by the caller
+ * - Checking and handling any expired timers for the NAT
+ * - Calling sctp_PktParser() to parse the packet
+ * - Call ProcessSctpMsg() to decide the appropriate outcome and to update
+ *   the NAT tables
+ * - Based on the return code either:
+ *   - NAT the packet
+ *   - Construct and send an ErrorM|AbortM packet
+ *   - Mark the association for removal from the tables
+ * - Potentially remove the association from all lookup tables
+ * - Return the appropriate result to libalias
+ *
+ * @param la Pointer to the relevant libalias instance
+ * @param pip Pointer to IP packet to process
+ * @param direction SN_TO_LOCAL | SN_TO_GLOBAL
+ * 
+ * @return  PKT_ALIAS_OK | PKT_ALIAS_IGNORE | PKT_ALIAS_ERROR
+ */
+int
+SctpAlias(struct libalias *la, struct ip *pip, int direction)
+{
+	int rtnval;
+	struct sctp_nat_msg msg;
+	struct sctp_nat_assoc *assoc = NULL;
+
+	if ((direction != SN_TO_LOCAL) && (direction != SN_TO_GLOBAL)) {
+		SctpAliasLog("ERROR: Invalid direction\n");
+		return(PKT_ALIAS_ERROR);
+	}
+
+	sctp_CheckTimers(la); /* Check timers */ 
+
+	/* Parse the packet */
+	rtnval = sctp_PktParser(la, direction, pip, &msg, &assoc); //using *char (change to mbuf when get code from paolo)
+	switch (rtnval) {
+	case SN_PARSE_OK:
+		break;
+	case SN_PARSE_ERROR_CHHL:
+		/* Not an error if there is a chunk length parsing error and this is a fragmented packet */
+		if (ntohs(pip->ip_off) & IP_MF) {
+			rtnval = SN_PARSE_OK;
+			break;
+		}
+		SN_LOG(SN_LOG_EVENT,
+		    logsctperror("SN_PARSE_ERROR", msg.sctp_hdr->v_tag, rtnval, direction));
+		return(PKT_ALIAS_ERROR);
+	case SN_PARSE_ERROR_PARTIALLOOKUP:
+		if (sysctl_error_on_ootb > SN_LOCALandPARTIAL_ERROR_ON_OOTB) {
+			SN_LOG(SN_LOG_EVENT,
+			    logsctperror("SN_PARSE_ERROR", msg.sctp_hdr->v_tag, rtnval, direction));
+			return(PKT_ALIAS_ERROR);
+		}
+	case SN_PARSE_ERROR_LOOKUP:
+		if (sysctl_error_on_ootb == SN_ERROR_ON_OOTB ||
+		    (sysctl_error_on_ootb == SN_LOCALandPARTIAL_ERROR_ON_OOTB && direction == SN_TO_LOCAL) ||
+		    (sysctl_error_on_ootb == SN_LOCAL_ERROR_ON_OOTB && direction == SN_TO_GLOBAL)) {
+			TxAbortErrorM(la, &msg, assoc, SN_REFLECT_ERROR, direction); /*NB assoc=NULL */
+			return(PKT_ALIAS_RESPOND);
+		}
+	default:
+		SN_LOG(SN_LOG_EVENT,
+		    logsctperror("SN_PARSE_ERROR", msg.sctp_hdr->v_tag, rtnval, direction));
+		return(PKT_ALIAS_ERROR);
+	}
+
+	SN_LOG(SN_LOG_DETAIL,
+	    logsctpassoc(assoc, "*");
+	    logsctpparse(direction, &msg);
+		);
+
+	/* Process the SCTP message */
+	rtnval = ProcessSctpMsg(la, direction, &msg, assoc);
+
+	SN_LOG(SN_LOG_DEBUG_MAX,
+	    logsctpassoc(assoc, "-");
+	    logSctpLocal(la);
+	    logSctpGlobal(la);
+		);
+	SN_LOG(SN_LOG_DEBUG, logTimerQ(la));
+
+	switch(rtnval){
+	case SN_NAT_PKT:
+		switch(direction) {
+		case SN_TO_LOCAL:
+			DifferentialChecksum(&(msg.ip_hdr->ip_sum),
+			    &(assoc->l_addr), &(msg.ip_hdr->ip_dst), 2);
+			msg.ip_hdr->ip_dst = assoc->l_addr; /* change dst address to local address*/
+			break;
+		case SN_TO_GLOBAL:
+			DifferentialChecksum(&(msg.ip_hdr->ip_sum),
+			    &(assoc->a_addr),  &(msg.ip_hdr->ip_src), 2);
+			msg.ip_hdr->ip_src = assoc->a_addr; /* change src to alias addr*/
+			break;
+		default:
+			rtnval = SN_DROP_PKT; /* shouldn't get here, but if it does drop packet */
+			SN_LOG(SN_LOG_LOW, logsctperror("ERROR: Invalid direction", msg.sctp_hdr->v_tag, rtnval, direction));
+			break;
+		}
+		break;
+	case SN_DROP_PKT:
+		SN_LOG(SN_LOG_DETAIL, logsctperror("SN_DROP_PKT", msg.sctp_hdr->v_tag, rtnval, direction));
+		break;
+	case SN_REPLY_ABORT:
+	case SN_REPLY_ERROR:
+	case SN_SEND_ABORT: 
+		TxAbortErrorM(la, &msg, assoc, rtnval, direction);
+		break;
+	default:
+		// big error, remove association and go to idle and write log messages
+		SN_LOG(SN_LOG_LOW, logsctperror("SN_PROCESSING_ERROR", msg.sctp_hdr->v_tag, rtnval, direction));
+		assoc->state=SN_RM;/* Mark for removal*/
+		break;
+	}
+
+	/* Remove association if tagged for removal */
+	if (assoc->state == SN_RM) {
+		if (assoc->TableRegister) {
+			sctp_RmTimeOut(la, assoc);
+			RmSctpAssoc(la, assoc);
+		}
+		LIBALIAS_LOCK_ASSERT(la);
+		freeGlobalAddressList(assoc);
+		sn_free(assoc);
+	}
+	switch(rtnval) {
+	case SN_NAT_PKT:
+		return(PKT_ALIAS_OK);
+	case SN_SEND_ABORT:
+		return(PKT_ALIAS_OK);
+	case SN_REPLY_ABORT:
+	case SN_REPLY_ERROR:
+	case SN_REFLECT_ERROR:
+		return(PKT_ALIAS_RESPOND);
+	case SN_DROP_PKT:
+	default:
+		return(PKT_ALIAS_ERROR);
+	}
+}
+
+/** 
+ * @brief Send an AbortM or ErrorM
+ *
+ * We construct the new SCTP packet to send in place of the existing packet we
+ * have been asked to NAT. This function can only be called if the original
+ * packet was successfully parsed as a valid SCTP packet.
+ *
+ * An AbortM (without cause) packet is the smallest SCTP packet available and as
+ * such there is always space in the existing packet buffer to fit the AbortM
+ * packet. An ErrorM packet is 4 bytes longer than the (the error cause is not
+ * optional). An ErrorM is sent in response to an AddIP when the Vtag/address
+ * combination, if added, will produce a conflict in the association look up
+ * tables. It may also be used for an unexpected packet - a packet with no
+ * matching association in the NAT table and we are requesting an AddIP so we
+ * can add it.  The smallest valid SCTP packet while the association is in an
+ * up-state is a Heartbeat packet, which is big enough to be transformed to an
+ * ErrorM.
+ *
+ * We create a temporary character array to store the packet as we are constructing
+ * it. We then populate the array with appropriate values based on:
+ * - Packet type (AbortM | ErrorM)
+ * - Initial packet direction (SN_TO_LOCAL | SN_TO_GLOBAL)
+ * - NAT response (Send packet | Reply packet)
+ *
+ * Once complete, we copy the contents of the temporary packet over the original
+ * SCTP packet we were asked to NAT
+ *
+ * @param la Pointer to the relevant libalias instance
+ * @param sm Pointer to sctp message information
+ * @param assoc Pointer to current association details
+ * @param sndrply SN_SEND_ABORT | SN_REPLY_ABORT | SN_REPLY_ERROR
+ * @param direction SN_TO_LOCAL | SN_TO_GLOBAL
+ */
+static uint32_t
+local_sctp_finalize_crc32(uint32_t crc32c)
+{
+	/* This routine is duplicated from SCTP 
+	 * we need to do that since it MAY be that SCTP
+	 * is NOT compiled into the kernel. The CRC32C routines
+	 * however are always available in libkern.
+	 */
+	uint32_t result;
+#if BYTE_ORDER == BIG_ENDIAN
+	uint8_t byte0, byte1, byte2, byte3;
+
+#endif
+	/* Complement the result */
+	result = ~crc32c;
+#if BYTE_ORDER == BIG_ENDIAN
+	/*
+	 * For BIG-ENDIAN.. aka Motorola byte order the result is in
+	 * little-endian form. So we must manually swap the bytes. Then we
+	 * can call htonl() which does nothing...
+	 */
+	byte0 = result & 0x000000ff;
+	byte1 = (result >> 8) & 0x000000ff;
+	byte2 = (result >> 16) & 0x000000ff;
+	byte3 = (result >> 24) & 0x000000ff;
+	crc32c = ((byte0 << 24) | (byte1 << 16) | (byte2 << 8) | byte3);
+#else
+	/*
+	 * For INTEL platforms the result comes out in network order. No
+	 * htonl is required or the swap above. So we optimize out both the
+	 * htonl and the manual swap above.
+	 */
+	crc32c = result;
+#endif
+	return (crc32c);
+}
+
+static void
+TxAbortErrorM(struct libalias *la, struct sctp_nat_msg *sm, struct sctp_nat_assoc *assoc, int sndrply, int direction)
+{
+	int sctp_size = sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_error_cause);
+	int ip_size = sizeof(struct ip) + sctp_size;
+	int include_error_cause = 1;
+	char tmp_ip[ip_size];
+
+	if (ntohs(sm->ip_hdr->ip_len) < ip_size) { /* short packet, cannot send error cause */
+		include_error_cause = 0;
+		ip_size = ip_size -  sizeof(struct sctp_error_cause);
+		sctp_size = sctp_size -  sizeof(struct sctp_error_cause);
+	}
+	/* Assign header pointers packet */
+	struct ip* ip = (struct ip *) tmp_ip;
+	struct sctphdr* sctp_hdr = (struct sctphdr *) ((char *) ip + sizeof(*ip));
+	struct sctp_chunkhdr* chunk_hdr = (struct sctp_chunkhdr *) ((char *) sctp_hdr + sizeof(*sctp_hdr));
+	struct sctp_error_cause* error_cause = (struct sctp_error_cause *) ((char *) chunk_hdr + sizeof(*chunk_hdr));
+
+	/* construct ip header */
+	ip->ip_v = sm->ip_hdr->ip_v;
+	ip->ip_hl = 5; /* 5*32 bit words */
+	ip->ip_tos = 0;
+	ip->ip_len = htons(ip_size);
+	ip->ip_id =  sm->ip_hdr->ip_id;
+	ip->ip_off = 0;
+	ip->ip_ttl = 255;
+	ip->ip_p = IPPROTO_SCTP;
+	/*
+	  The definitions below should be removed when they make it into the SCTP stack
+	*/
+#define SCTP_MIDDLEBOX_FLAG 0x02
+#define SCTP_NAT_TABLE_COLLISION 0x00b0
+#define SCTP_MISSING_NAT 0x00b1
+	chunk_hdr->chunk_type = (sndrply & SN_TX_ABORT) ? SCTP_ABORT_ASSOCIATION : SCTP_OPERATION_ERROR;
+	chunk_hdr->chunk_flags = SCTP_MIDDLEBOX_FLAG;
+	if (include_error_cause) {
+		error_cause->code = htons((sndrply & SN_REFLECT_ERROR) ? SCTP_MISSING_NAT :  SCTP_NAT_TABLE_COLLISION);
+		error_cause->length = htons(sizeof(struct sctp_error_cause));
+		chunk_hdr->chunk_length = htons(sizeof(*chunk_hdr) + sizeof(struct sctp_error_cause));
+	} else {
+		chunk_hdr->chunk_length = htons(sizeof(*chunk_hdr));
+	}
+
+	/* set specific values */
+	switch(sndrply) {
+	case SN_REFLECT_ERROR:
+		chunk_hdr->chunk_flags |= SCTP_HAD_NO_TCB; /* set Tbit */
+		sctp_hdr->v_tag =  sm->sctp_hdr->v_tag;
+		break;
+	case SN_REPLY_ERROR:
+		sctp_hdr->v_tag = (direction == SN_TO_LOCAL) ? assoc->g_vtag :  assoc->l_vtag ;
+		break;
+	case SN_SEND_ABORT:
+		sctp_hdr->v_tag =  sm->sctp_hdr->v_tag;
+		break;
+	case SN_REPLY_ABORT:
+		sctp_hdr->v_tag = sm->sctpchnk.Init->initiate_tag;
+		break;
+	}
+  
+	/* Set send/reply values */
+	if (sndrply == SN_SEND_ABORT) { /*pass through NAT */
+		ip->ip_src = (direction == SN_TO_LOCAL) ? sm->ip_hdr->ip_src : assoc->a_addr;
+		ip->ip_dst = (direction == SN_TO_LOCAL) ? assoc->l_addr : sm->ip_hdr->ip_dst;
+		sctp_hdr->src_port = sm->sctp_hdr->src_port;
+		sctp_hdr->dest_port = sm->sctp_hdr->dest_port;
+	} else { /* reply and reflect */
+		ip->ip_src = sm->ip_hdr->ip_dst;
+		ip->ip_dst = sm->ip_hdr->ip_src;
+		sctp_hdr->src_port = sm->sctp_hdr->dest_port;
+		sctp_hdr->dest_port = sm->sctp_hdr->src_port;
+	}
+  
+	/* Calculate IP header checksum */
+	ip->ip_sum = in_cksum_hdr(ip);
+  
+	/* calculate SCTP header CRC32 */
+	sctp_hdr->checksum = 0;
+	sctp_hdr->checksum = local_sctp_finalize_crc32(calculate_crc32c(0xffffffff, (unsigned char *) sctp_hdr, sctp_size));
+
+	memcpy(sm->ip_hdr, ip, ip_size);
+
+	SN_LOG(SN_LOG_EVENT,SctpAliasLog("%s %s 0x%x (->%s:%u vtag=0x%x crc=0x%x)\n",
+		((sndrply == SN_SEND_ABORT) ? "Sending" : "Replying"),
+		((sndrply & SN_TX_ERROR) ? "ErrorM" : "AbortM"),
+		(include_error_cause ? ntohs(error_cause->code) : 0),
+		inet_ntoa(ip->ip_dst),ntohs(sctp_hdr->dest_port), 
+		ntohl(sctp_hdr->v_tag), ntohl(sctp_hdr->checksum)));
+}
+
+/* ----------------------------------------------------------------------
+ *                           PACKET PARSER CODE
+ * ----------------------------------------------------------------------
+ */
+/** @addtogroup packet_parser
+ *
+ * These functions parse the SCTP packet and fill a sctp_nat_msg structure
+ * with the parsed contents.
+ */
+/** @ingroup packet_parser
+ * @brief Parses SCTP packets for the key SCTP chunk that will be processed
+ * 
+ * This module parses SCTP packets for the key SCTP chunk that will be processed
+ * The module completes the sctp_nat_msg structure and either retrieves the
+ * relevant (existing) stored association from the Hash Tables or creates a new
+ * association entity with state SN_ID
+ *
+ * @param la Pointer to the relevant libalias instance
+ * @param direction SN_TO_LOCAL | SN_TO_GLOBAL 
+ * @param pip 
+ * @param sm Pointer to sctp message information
+ * @param passoc Pointer to the association this SCTP Message belongs to
+ * 
+ * @return SN_PARSE_OK | SN_PARSE_ERROR_*
+ */
+static int
+sctp_PktParser(struct libalias *la, int direction, struct ip *pip,
+    struct sctp_nat_msg *sm, struct sctp_nat_assoc **passoc)
+//sctp_PktParser(int direction, struct mbuf *ipak, int ip_hdr_len,struct sctp_nat_msg *sm, struct sctp_nat_assoc *assoc)
+{
+	struct sctphdr *sctp_hdr;
+	struct sctp_chunkhdr *chunk_hdr;
+	struct sctp_paramhdr *param_hdr;
+	struct in_addr ipv4addr;
+	int bytes_left; /* bytes left in ip packet */
+	int chunk_length;
+	int chunk_count;
+	int partial_match = 0;
+	//  mbuf *mp;
+	//  int mlen;
+
+	//  mlen = SCTP_HEADER_LEN(i_pak);
+	//  mp = SCTP_HEADER_TO_CHAIN(i_pak); /* does nothing in bsd since header and chain not separate */
+
+	/*
+	 * Note, that if the VTag is zero, it must be an INIT
+	 * Also, I am only interested in the content of INIT and ADDIP chunks
+	 */
+
+	// no mbuf stuff from Paolo yet so ...
+	sm->ip_hdr = pip;
+	/* remove ip header length from the bytes_left */
+	bytes_left = ntohs(pip->ip_len) - (pip->ip_hl << 2);
+
+	/* Check SCTP header length and move to first chunk */
+	if (bytes_left < sizeof(struct sctphdr)) {
+		sm->sctp_hdr = NULL;
+		return(SN_PARSE_ERROR_IPSHL); /* packet not long enough*/
+	}
+
+	sm->sctp_hdr = sctp_hdr = (struct sctphdr *) ip_next(pip);
+	bytes_left -= sizeof(struct sctphdr);
+  
+	/* Check for valid ports (zero valued ports would find partially initialised associations */
+	if (sctp_hdr->src_port == 0 || sctp_hdr->dest_port == 0)
+		return(SN_PARSE_ERROR_PORT);
+
+	/* Check length of first chunk */
+	if (bytes_left < SN_MIN_CHUNK_SIZE) /* malformed chunk - could cause endless loop*/
+		return(SN_PARSE_ERROR_CHHL); /* packet not long enough for this chunk */
+  
+	/* First chunk */
+	chunk_hdr = SN_SCTP_FIRSTCHUNK(sctp_hdr);
+  
+	chunk_length = SCTP_SIZE32(ntohs(chunk_hdr->chunk_length));
+	if ((chunk_length < SN_MIN_CHUNK_SIZE) || (chunk_length > bytes_left)) /* malformed chunk - could cause endless loop*/
+		return(SN_PARSE_ERROR_CHHL);
+
+	if ((chunk_hdr->chunk_flags & SCTP_HAD_NO_TCB) &&
+	    ((chunk_hdr->chunk_type == SCTP_ABORT_ASSOCIATION) ||
+		(chunk_hdr->chunk_type == SCTP_SHUTDOWN_COMPLETE))) {
+		/* T-Bit set */
+		if (direction == SN_TO_LOCAL)
+			*passoc = FindSctpGlobalT(la,  pip->ip_src, sctp_hdr->v_tag, sctp_hdr->dest_port, sctp_hdr->src_port);
+		else
+			*passoc = FindSctpLocalT(la, pip->ip_dst, sctp_hdr->v_tag, sctp_hdr->dest_port, sctp_hdr->src_port);
+	} else {
+		/* Proper v_tag settings */
+		if (direction == SN_TO_LOCAL)
+			*passoc = FindSctpGlobal(la, pip->ip_src, sctp_hdr->v_tag, sctp_hdr->src_port, sctp_hdr->dest_port, &partial_match);
+		else
+			*passoc = FindSctpLocal(la, pip->ip_src,  pip->ip_dst, sctp_hdr->v_tag, sctp_hdr->src_port, sctp_hdr->dest_port);
+	}
+
+	chunk_count = 1;
+	/* Real packet parsing occurs below */
+	sm->msg = SN_SCTP_OTHER;/* Initialise to largest value*/
+	sm->chunk_length = 0; /* only care about length for key chunks */
+	while (IS_SCTP_CONTROL(chunk_hdr)) {
+		switch(chunk_hdr->chunk_type) {
+		case SCTP_INITIATION:
+			if (chunk_length < sizeof(struct sctp_init_chunk)) /* malformed chunk*/
+				return(SN_PARSE_ERROR_CHHL);
+			sm->msg = SN_SCTP_INIT;
+			sm->sctpchnk.Init = (struct sctp_init *) ((char *) chunk_hdr + sizeof(struct sctp_chunkhdr));
+			sm->chunk_length = chunk_length;
+			/* if no existing association, create a new one */
+			if (*passoc == NULL) {
+				if (sctp_hdr->v_tag == 0){ //Init requires vtag=0
+					*passoc = (struct sctp_nat_assoc *) sn_malloc(sizeof(struct sctp_nat_assoc));
+					if (*passoc == NULL) {/* out of resources */ 
+						return(SN_PARSE_ERROR_AS_MALLOC);
+					}
+					/* Initialise association - malloc initialises memory to zeros */
+					(*passoc)->state = SN_ID;
+					LIST_INIT(&((*passoc)->Gaddr)); /* always initialise to avoid memory problems */
+					(*passoc)->TableRegister = SN_NULL_TBL;
+					return(SN_PARSE_OK);
+				}
+				return(SN_PARSE_ERROR_VTAG);
+			}
+			return(SN_PARSE_ERROR_LOOKUP);
+		case SCTP_INITIATION_ACK:
+			if (chunk_length < sizeof(struct sctp_init_ack_chunk)) /* malformed chunk*/
+				return(SN_PARSE_ERROR_CHHL);
+			sm->msg = SN_SCTP_INITACK;
+			sm->sctpchnk.InitAck = (struct sctp_init_ack *) ((char *) chunk_hdr + sizeof(struct sctp_chunkhdr));
+			sm->chunk_length = chunk_length;
+			return ((*passoc == NULL)?(SN_PARSE_ERROR_LOOKUP):(SN_PARSE_OK));
+		case SCTP_ABORT_ASSOCIATION: /* access only minimum sized chunk */
+			sm->msg = SN_SCTP_ABORT;
+			sm->chunk_length = chunk_length;
+			return ((*passoc == NULL)?(SN_PARSE_ERROR_LOOKUP_ABORT):(SN_PARSE_OK));
+		case SCTP_SHUTDOWN_ACK:
+			if (chunk_length < sizeof(struct sctp_shutdown_ack_chunk)) /* malformed chunk*/
+				return(SN_PARSE_ERROR_CHHL);
+			if (sm->msg > SN_SCTP_SHUTACK) {
+				sm->msg = SN_SCTP_SHUTACK;
+				sm->chunk_length = chunk_length;
+			}
+			break;
+		case SCTP_SHUTDOWN_COMPLETE:  /* minimum sized chunk */
+			if (sm->msg > SN_SCTP_SHUTCOMP) {
+				sm->msg = SN_SCTP_SHUTCOMP;
+				sm->chunk_length = chunk_length;
+			}
+			return ((*passoc == NULL)?(SN_PARSE_ERROR_LOOKUP):(SN_PARSE_OK));
+		case SCTP_ASCONF:
+			if (sm->msg > SN_SCTP_ASCONF) {
+				if (chunk_length < (sizeof(struct  sctp_asconf_chunk) + sizeof(struct  sctp_ipv4addr_param))) /* malformed chunk*/
+					return(SN_PARSE_ERROR_CHHL);
+				//leave parameter searching to later, if required
+				param_hdr = (struct sctp_paramhdr *) ((char *) chunk_hdr + sizeof(struct sctp_asconf_chunk)); /*compulsory IP parameter*/
+				if (ntohs(param_hdr->param_type) == SCTP_IPV4_ADDRESS) {
+					if ((*passoc == NULL) && (direction == SN_TO_LOCAL)) { /* AddIP with no association */
+						/* try look up with the ASCONF packet's alternative address */
+						ipv4addr.s_addr = ((struct sctp_ipv4addr_param *) param_hdr)->addr;
+						*passoc = FindSctpGlobal(la, ipv4addr, sctp_hdr->v_tag, sctp_hdr->src_port, sctp_hdr->dest_port, &partial_match);
+					}
+					param_hdr = (struct sctp_paramhdr *) 
+						((char *) param_hdr + sizeof(struct sctp_ipv4addr_param)); /*asconf's compulsory address parameter */
+					sm->chunk_length = chunk_length - sizeof(struct  sctp_asconf_chunk) - sizeof(struct  sctp_ipv4addr_param); /* rest of chunk */
+				} else {
+					if (chunk_length < (sizeof(struct  sctp_asconf_chunk) + sizeof(struct  sctp_ipv6addr_param))) /* malformed chunk*/
+						return(SN_PARSE_ERROR_CHHL);
+					param_hdr = (struct sctp_paramhdr *) 
+						((char *) param_hdr + sizeof(struct sctp_ipv6addr_param)); /*asconf's compulsory address parameter */	  
+					sm->chunk_length = chunk_length - sizeof(struct  sctp_asconf_chunk) - sizeof(struct  sctp_ipv6addr_param); /* rest of chunk */
+				}
+				sm->msg = SN_SCTP_ASCONF;
+				sm->sctpchnk.Asconf = param_hdr;
+	
+				if (*passoc == NULL) { /* AddIP with no association */
+					*passoc = (struct sctp_nat_assoc *) sn_malloc(sizeof(struct sctp_nat_assoc));
+					if (*passoc == NULL) {/* out of resources */ 
+						return(SN_PARSE_ERROR_AS_MALLOC);
+					}
+					/* Initialise association  - malloc initialises memory to zeros */
+					(*passoc)->state = SN_ID;
+					LIST_INIT(&((*passoc)->Gaddr)); /* always initialise to avoid memory problems */
+					(*passoc)->TableRegister = SN_NULL_TBL;
+					return(SN_PARSE_OK);
+				}
+			}
+			break;
+		case SCTP_ASCONF_ACK:
+			if (sm->msg > SN_SCTP_ASCONFACK) {
+				if (chunk_length < sizeof(struct  sctp_asconf_ack_chunk)) /* malformed chunk*/
+					return(SN_PARSE_ERROR_CHHL);
+				//leave parameter searching to later, if required
+				param_hdr = (struct sctp_paramhdr *) ((char *) chunk_hdr 
+				    + sizeof(struct sctp_asconf_ack_chunk));
+				sm->msg = SN_SCTP_ASCONFACK;
+				sm->sctpchnk.Asconf = param_hdr;
+				sm->chunk_length = chunk_length - sizeof(struct sctp_asconf_ack_chunk);
+			}
+			break;
+		default:
+			break; /* do nothing*/
+		}
+
+		/* if no association is found exit - we need to find an Init or AddIP within sysctl_initialising_chunk_proc_limit */
+		if ((*passoc == NULL) && (chunk_count >= sysctl_initialising_chunk_proc_limit))
+			return(SN_PARSE_ERROR_LOOKUP);
+
+		/* finished with this chunk, on to the next chunk*/
+		bytes_left-= chunk_length;
+
+		/* Is this the end of the packet ? */
+		if (bytes_left == 0)
+			return (*passoc == NULL)?(SN_PARSE_ERROR_LOOKUP):(SN_PARSE_OK);
+
+		/* Are there enough bytes in packet to at least retrieve length of next chunk ? */
+		if (bytes_left < SN_MIN_CHUNK_SIZE)
+			return(SN_PARSE_ERROR_CHHL);
+
+		chunk_hdr = SN_SCTP_NEXTCHUNK(chunk_hdr);
+
+		/* Is the chunk long enough to not cause endless look and are there enough bytes in packet to read the chunk ? */
+		chunk_length = SCTP_SIZE32(ntohs(chunk_hdr->chunk_length));
+		if ((chunk_length < SN_MIN_CHUNK_SIZE) || (chunk_length > bytes_left))
+			return(SN_PARSE_ERROR_CHHL);
+		if(++chunk_count > sysctl_chunk_proc_limit)
+			return(SN_PARSE_OK); /* limit for processing chunks, take what we get */
+	}
+
+	if (*passoc == NULL)
+		return (partial_match)?(SN_PARSE_ERROR_PARTIALLOOKUP):(SN_PARSE_ERROR_LOOKUP);
+	else
+		return(SN_PARSE_OK);
+}
+
+/** @ingroup packet_parser
+ * @brief Extract Vtags from Asconf Chunk
+ *
+ * GetAsconfVtags scans an Asconf Chunk for the vtags parameter, and then
+ * extracts the vtags.
+ * 
+ * GetAsconfVtags is not called from within sctp_PktParser. It is called only
+ * from within ID_process when an AddIP has been received.
+ *
+ * @param la Pointer to the relevant libalias instance
+ * @param sm Pointer to sctp message information
+ * @param l_vtag Pointer to the local vtag in the association this SCTP Message belongs to
+ * @param g_vtag Pointer to the local vtag in the association this SCTP Message belongs to
+ * @param direction SN_TO_LOCAL | SN_TO_GLOBAL 
+ * 
+ * @return 1 - success | 0 - fail 
+ */
+static int
+GetAsconfVtags(struct libalias *la, struct sctp_nat_msg *sm, uint32_t *l_vtag, uint32_t *g_vtag, int direction)
+{
+	/* To be removed when information is in the sctp headers */
+#define SCTP_VTAG_PARAM 0xC007
+	struct sctp_vtag_param {
+		struct sctp_paramhdr ph;/* type=SCTP_VTAG_PARAM */
+		uint32_t local_vtag;
+		uint32_t remote_vtag;
+	}                    __attribute__((packed));
+  
+	struct sctp_vtag_param *vtag_param;
+	struct sctp_paramhdr *param;
+	int bytes_left;
+	int param_size;
+	int param_count;
+
+	param_count = 1;
+	param = sm->sctpchnk.Asconf;
+	param_size = SCTP_SIZE32(ntohs(param->param_length));
+	bytes_left = sm->chunk_length;
+	/* step through Asconf parameters */
+	while((bytes_left >= param_size) && (bytes_left >= SN_VTAG_PARAM_SIZE)) {
+		if (ntohs(param->param_type) == SCTP_VTAG_PARAM) {
+			vtag_param = (struct sctp_vtag_param *) param;
+			switch(direction) {
+				/* The Internet draft is a little ambigious as to order of these vtags.
+				   We think it is this way around. If we are wrong, the order will need
+				   to be changed. */
+			case SN_TO_GLOBAL:
+				*g_vtag = vtag_param->local_vtag;
+				*l_vtag = vtag_param->remote_vtag;
+				break;
+			case SN_TO_LOCAL:
+				*g_vtag = vtag_param->remote_vtag;
+				*l_vtag = vtag_param->local_vtag;
+				break;
+			}
+			return(1); /* found */
+		}
+
+		bytes_left -= param_size;
+		if (bytes_left < SN_MIN_PARAM_SIZE) return(0);
+
+		param = SN_SCTP_NEXTPARAM(param);
+		param_size = SCTP_SIZE32(ntohs(param->param_length));
+		if (++param_count > sysctl_param_proc_limit) {
+			SN_LOG(SN_LOG_EVENT,
+			    logsctperror("Parameter parse limit exceeded (GetAsconfVtags)",
+				sm->sctp_hdr->v_tag, sysctl_param_proc_limit, direction));
+			return(0); /* not found limit exceeded*/
+		}
+	}
+	return(0); /* not found */
+}
+
+/** @ingroup packet_parser
+ * @brief AddGlobalIPAddresses from Init,InitAck,or AddIP packets
+ * 
+ * AddGlobalIPAddresses scans an SCTP chunk (in sm) for Global IP addresses, and
+ * adds them.
+ *
+ * @param sm Pointer to sctp message information
+ * @param assoc Pointer to the association this SCTP Message belongs to
+ * @param direction SN_TO_LOCAL | SN_TO_GLOBAL 
+ * 
+ */
+static void
+AddGlobalIPAddresses(struct sctp_nat_msg *sm, struct sctp_nat_assoc *assoc, int direction)
+{
+	struct sctp_ipv4addr_param *ipv4_param;
+	struct sctp_paramhdr *param = NULL;
+	struct sctp_GlobalAddress *G_Addr;
+	struct in_addr g_addr = {0};
+	int bytes_left = 0;
+	int param_size;
+	int param_count, addr_param_count = 0;
+
+	switch(direction) {
+	case SN_TO_GLOBAL: /* does not contain global addresses */
+		g_addr = sm->ip_hdr->ip_dst;
+		bytes_left = 0; /* force exit */
+		break;
+	case SN_TO_LOCAL:
+		g_addr = sm->ip_hdr->ip_src;
+		param_count = 1;
+		switch(sm->msg) {
+		case SN_SCTP_INIT:
+			bytes_left = sm->chunk_length - sizeof(struct sctp_init_chunk);
+			param = (struct sctp_paramhdr *)((char *)sm->sctpchnk.Init + sizeof(struct sctp_init));
+			break;
+		case SN_SCTP_INITACK:
+			bytes_left = sm->chunk_length - sizeof(struct sctp_init_ack_chunk);
+			param = (struct sctp_paramhdr *)((char *)sm->sctpchnk.InitAck + sizeof(struct sctp_init_ack));
+			break;
+		case SN_SCTP_ASCONF:
+			bytes_left = sm->chunk_length;
+			param = sm->sctpchnk.Asconf;
+			break;
+		}
+	}
+	if (bytes_left >= SN_MIN_PARAM_SIZE)	
+		param_size = SCTP_SIZE32(ntohs(param->param_length));
+	else
+		param_size = bytes_left+1; /* force skip loop */
+  
+	if ((assoc->state == SN_ID) && ((sm->msg == SN_SCTP_INIT) || (bytes_left < SN_MIN_PARAM_SIZE))) {/* add pkt address */ 
+		G_Addr = (struct sctp_GlobalAddress *) sn_malloc(sizeof(struct sctp_GlobalAddress));
+		if (G_Addr == NULL) {/* out of resources */ 
+			SN_LOG(SN_LOG_EVENT,
+			    logsctperror("AddGlobalIPAddress: No resources for adding global address - revert to no tracking", 
+				sm->sctp_hdr->v_tag,  0, direction));
+			assoc->num_Gaddr = 0; /* don't track any more for this assoc*/
+			sysctl_track_global_addresses=0;
+			return;
+		}
+		G_Addr->g_addr = g_addr;
+		if (!Add_Global_Address_to_List(assoc, G_Addr))
+			SN_LOG(SN_LOG_EVENT,
+			    logsctperror("AddGlobalIPAddress: Address already in list", 
+				sm->sctp_hdr->v_tag,  assoc->num_Gaddr, direction));
+	}
+
+	/* step through parameters */
+	while((bytes_left >= param_size) && (bytes_left >= sizeof(struct sctp_ipv4addr_param))) {
+		if (assoc->num_Gaddr >= sysctl_track_global_addresses) {
+			SN_LOG(SN_LOG_EVENT,
+			    logsctperror("AddGlobalIPAddress: Maximum Number of addresses reached", 
+				sm->sctp_hdr->v_tag,  sysctl_track_global_addresses, direction));
+			return;
+		}
+		switch(ntohs(param->param_type)) {
+		case SCTP_ADD_IP_ADDRESS:
+			/* skip to address parameter - leave param_size so bytes left will be calculated properly*/
+			param = (struct sctp_paramhdr *) &((struct sctp_asconf_addrv4_param *) param)->addrp;
+		case SCTP_IPV4_ADDRESS:
+			ipv4_param = (struct sctp_ipv4addr_param *) param;
+			/* add addresses to association */
+			G_Addr = (struct sctp_GlobalAddress *) sn_malloc(sizeof(struct sctp_GlobalAddress));
+			if (G_Addr == NULL) {/* out of resources */ 
+				SN_LOG(SN_LOG_EVENT,
+				    logsctperror("AddGlobalIPAddress: No resources for adding global address - revert to no tracking", 
+					sm->sctp_hdr->v_tag,  0, direction));
+				assoc->num_Gaddr = 0; /* don't track any more for this assoc*/
+				sysctl_track_global_addresses=0;
+				return;
+			}
+			/* add address */
+			addr_param_count++;
+			if ((sm->msg == SN_SCTP_ASCONF) && (ipv4_param->addr == INADDR_ANY)) { /* use packet address */
+				G_Addr->g_addr = g_addr;
+				if (!Add_Global_Address_to_List(assoc, G_Addr))
+					SN_LOG(SN_LOG_EVENT,
+					    logsctperror("AddGlobalIPAddress: Address already in list", 
+						sm->sctp_hdr->v_tag,  assoc->num_Gaddr, direction));
+				return; /*shouldn't be any other addresses if the zero address is given*/
+			} else {
+				G_Addr->g_addr.s_addr = ipv4_param->addr;
+				if (!Add_Global_Address_to_List(assoc, G_Addr))
+					SN_LOG(SN_LOG_EVENT,
+					    logsctperror("AddGlobalIPAddress: Address already in list", 
+						sm->sctp_hdr->v_tag,  assoc->num_Gaddr, direction));
+			}
+		} 
+    
+		bytes_left -= param_size;
+		if (bytes_left < SN_MIN_PARAM_SIZE)
+			break;
+    
+		param = SN_SCTP_NEXTPARAM(param);
+		param_size = SCTP_SIZE32(ntohs(param->param_length));
+		if (++param_count > sysctl_param_proc_limit) {
+			SN_LOG(SN_LOG_EVENT,
+			    logsctperror("Parameter parse limit exceeded (AddGlobalIPAddress)",
+				sm->sctp_hdr->v_tag, sysctl_param_proc_limit, direction));
+			break; /* limit exceeded*/
+		}
+	}
+	if (addr_param_count == 0) {
+		SN_LOG(SN_LOG_DETAIL,
+		    logsctperror("AddGlobalIPAddress: no address parameters to add", 
+			sm->sctp_hdr->v_tag, assoc->num_Gaddr, direction));
+	}
+}
+
+/** 
+ * @brief Add_Global_Address_to_List
+ *
+ * Adds a global IP address to an associations address list, if it is not
+ * already there.  The first address added us usually the packet's address, and
+ * is most likely to be used, so it is added at the beginning. Subsequent
+ * addresses are added after this one.
+ * 
+ * @param assoc Pointer to the association this SCTP Message belongs to
+ * @param G_addr Pointer to the global address to add
+ *
+ * @return 1 - success | 0 - fail 
+ */
+static int  Add_Global_Address_to_List(struct sctp_nat_assoc *assoc,  struct sctp_GlobalAddress *G_addr)
+{
+	struct sctp_GlobalAddress *iter_G_Addr = NULL, *first_G_Addr = NULL;     
+	first_G_Addr = LIST_FIRST(&(assoc->Gaddr));  
+	if (first_G_Addr == NULL) {
+		LIST_INSERT_HEAD(&(assoc->Gaddr), G_addr, list_Gaddr); /* add new address to beginning of list*/
+	} else {
+		LIST_FOREACH(iter_G_Addr, &(assoc->Gaddr), list_Gaddr) {
+			if (G_addr->g_addr.s_addr == iter_G_Addr->g_addr.s_addr)
+				return(0); /* already exists, so don't add */
+		}
+		LIST_INSERT_AFTER(first_G_Addr, G_addr, list_Gaddr); /* add address to end of list*/
+	}
+	assoc->num_Gaddr++;	  
+	return(1); /* success */   
+}
+
+/** @ingroup packet_parser
+ * @brief RmGlobalIPAddresses from DelIP packets
+ *
+ * RmGlobalIPAddresses scans an ASCONF chunk for DelIP parameters to remove the
+ * given Global IP addresses from the association. It will not delete the
+ * the address if it is a list of one address.
+ *
+ *
+ * @param sm Pointer to sctp message information
+ * @param assoc Pointer to the association this SCTP Message belongs to
+ * @param direction SN_TO_LOCAL | SN_TO_GLOBAL 
+ * 
+ */
+static void
+RmGlobalIPAddresses(struct sctp_nat_msg *sm, struct sctp_nat_assoc *assoc, int direction)
+{
+	struct sctp_asconf_addrv4_param *asconf_ipv4_param;
+	struct sctp_paramhdr *param;
+	struct sctp_GlobalAddress *G_Addr, *G_Addr_tmp;
+	struct in_addr g_addr;
+	int bytes_left;
+	int param_size;
+	int param_count;
+
+	if(direction == SN_TO_GLOBAL)
+		g_addr = sm->ip_hdr->ip_dst;
+	else
+		g_addr = sm->ip_hdr->ip_src;
+
+	bytes_left = sm->chunk_length;
+	param_count = 1;
+	param = sm->sctpchnk.Asconf;
+	if (bytes_left >= SN_MIN_PARAM_SIZE) {		
+		param_size = SCTP_SIZE32(ntohs(param->param_length));
+	} else {
+		SN_LOG(SN_LOG_EVENT,
+		    logsctperror("RmGlobalIPAddress: truncated packet - cannot remove IP addresses", 
+			sm->sctp_hdr->v_tag, sysctl_track_global_addresses, direction));
+		return;
+	}
+ 
+	/* step through Asconf parameters */
+	while((bytes_left >= param_size) && (bytes_left >= sizeof(struct sctp_ipv4addr_param))) {
+		if (ntohs(param->param_type) == SCTP_DEL_IP_ADDRESS) {
+			asconf_ipv4_param = (struct sctp_asconf_addrv4_param *) param;
+			if (asconf_ipv4_param->addrp.addr == INADDR_ANY) { /* remove all bar pkt address */
+				LIST_FOREACH_SAFE(G_Addr, &(assoc->Gaddr), list_Gaddr, G_Addr_tmp) {
+					if(G_Addr->g_addr.s_addr != sm->ip_hdr->ip_src.s_addr) {
+						if (assoc->num_Gaddr > 1) { /* only delete if more than one */
+							LIST_REMOVE(G_Addr, list_Gaddr);
+							sn_free(G_Addr);
+							assoc->num_Gaddr--;
+						} else {
+							SN_LOG(SN_LOG_EVENT,
+							    logsctperror("RmGlobalIPAddress: Request to remove last IP address (didn't)", 
+								sm->sctp_hdr->v_tag, assoc->num_Gaddr, direction));
+						}
+					}
+				}
+				return; /*shouldn't be any other addresses if the zero address is given*/
+			} else {
+				LIST_FOREACH_SAFE(G_Addr, &(assoc->Gaddr), list_Gaddr, G_Addr_tmp) {
+					if(G_Addr->g_addr.s_addr == asconf_ipv4_param->addrp.addr) {
+						if (assoc->num_Gaddr > 1) { /* only delete if more than one */
+							LIST_REMOVE(G_Addr, list_Gaddr);
+							sn_free(G_Addr);
+							assoc->num_Gaddr--;
+							break; /* Since add only adds new addresses, there should be no double entries */
+						} else {
+							SN_LOG(SN_LOG_EVENT,
+							    logsctperror("RmGlobalIPAddress: Request to remove last IP address (didn't)", 
+								sm->sctp_hdr->v_tag, assoc->num_Gaddr, direction));
+						}
+					}
+				}
+			}
+		}      
+		bytes_left -= param_size;
+		if (bytes_left == 0) return;
+		else if (bytes_left < SN_MIN_PARAM_SIZE) {
+			SN_LOG(SN_LOG_EVENT,
+			    logsctperror("RmGlobalIPAddress: truncated packet - may not have removed all IP addresses", 
+				sm->sctp_hdr->v_tag, sysctl_track_global_addresses, direction));
+			return;
+		}
+    
+		param = SN_SCTP_NEXTPARAM(param);
+		param_size = SCTP_SIZE32(ntohs(param->param_length));
+		if (++param_count > sysctl_param_proc_limit) {
+			SN_LOG(SN_LOG_EVENT,
+			    logsctperror("Parameter parse limit exceeded (RmGlobalIPAddress)",
+				sm->sctp_hdr->v_tag, sysctl_param_proc_limit, direction));
+			return; /* limit exceeded*/
+		}
+	}
+}
+
+/**  @ingroup packet_parser
+ * @brief Check that ASCONF was successful
+ *
+ * Each ASCONF configuration parameter carries a correlation ID which should be
+ * matched with an ASCONFack. This is difficult for a NAT, since every
+ * association could potentially have a number of outstanding ASCONF
+ * configuration parameters, which should only be activated on receipt of the
+ * ACK.
+ *
+ * Currently we only look for an ACK when the NAT is setting up a new
+ * association (ie AddIP for a connection that the NAT does not know about
+ * because the original Init went through a public interface or another NAT)
+ * Since there is currently no connection on this path, there should be no other
+ * ASCONF configuration parameters outstanding, so we presume that if there is
+ * an ACK that it is responding to the AddIP and activate the new association.
+ * 
+ * @param la Pointer to the relevant libalias instance
+ * @param sm Pointer to sctp message information
+ * @param direction SN_TO_LOCAL | SN_TO_GLOBAL 
+ * 
+ * @return 1 - success | 0 - fail
+ */
+static int
+IsASCONFack(struct libalias *la, struct sctp_nat_msg *sm, int direction)
+{
+	struct sctp_paramhdr *param;
+	int bytes_left;
+	int param_size;
+	int param_count;
+
+	param_count = 1;
+	param = sm->sctpchnk.Asconf;
+	param_size = SCTP_SIZE32(ntohs(param->param_length));
+	if (param_size == 8)
+		return(1); /*success - default acknowledgement of everything */
+
+	bytes_left = sm->chunk_length;
+	if (bytes_left < param_size)
+		return(0); /* not found */
+	/* step through Asconf parameters */
+	while(bytes_left >= SN_ASCONFACK_PARAM_SIZE) {
+		if (ntohs(param->param_type) == SCTP_SUCCESS_REPORT)
+			return(1); /* success - but can't match correlation IDs - should only be one */
+		/* check others just in case */
+		bytes_left -= param_size;
+		if (bytes_left >= SN_MIN_PARAM_SIZE) {
+			param = SN_SCTP_NEXTPARAM(param);
+		} else {
+			return(0);
+		}
+		param_size = SCTP_SIZE32(ntohs(param->param_length));
+		if (bytes_left < param_size) return(0);
+
+		if (++param_count > sysctl_param_proc_limit) {
+			SN_LOG(SN_LOG_EVENT,
+			    logsctperror("Parameter parse limit exceeded (IsASCONFack)", 
+				sm->sctp_hdr->v_tag, sysctl_param_proc_limit, direction));
+			return(0); /* not found limit exceeded*/
+		}
+	}
+	return(0); /* not success */
+}
+
+/**  @ingroup packet_parser
+ * @brief Check to see if ASCONF contains an Add IP or Del IP parameter 
+ * 
+ * IsADDorDEL scans an ASCONF packet to see if it contains an AddIP or DelIP
+ * parameter
+ *
+ * @param la Pointer to the relevant libalias instance
+ * @param sm Pointer to sctp message information
+ * @param direction SN_TO_LOCAL | SN_TO_GLOBAL 
+ * 
+ * @return SCTP_ADD_IP_ADDRESS | SCTP_DEL_IP_ADDRESS | 0 - fail
+ */
+static int
+IsADDorDEL(struct libalias *la, struct sctp_nat_msg *sm, int direction)
+{
+	struct sctp_paramhdr *param;
+	int bytes_left;
+	int param_size;
+	int param_count;
+
+	param_count = 1;
+	param = sm->sctpchnk.Asconf;
+	param_size = SCTP_SIZE32(ntohs(param->param_length));
+
+	bytes_left = sm->chunk_length;
+	if (bytes_left < param_size)
+		return(0); /* not found */
+	/* step through Asconf parameters */
+	while(bytes_left >= SN_ASCONFACK_PARAM_SIZE) {
+		if (ntohs(param->param_type) == SCTP_ADD_IP_ADDRESS)
+			return(SCTP_ADD_IP_ADDRESS);
+		else if (ntohs(param->param_type) == SCTP_DEL_IP_ADDRESS) 
+			return(SCTP_DEL_IP_ADDRESS);
+		/* check others just in case */
+		bytes_left -= param_size;
+		if (bytes_left >= SN_MIN_PARAM_SIZE) {
+			param = SN_SCTP_NEXTPARAM(param);
+		} else {
+			return(0); /*Neither found */
+		}
+		param_size = SCTP_SIZE32(ntohs(param->param_length));
+		if (bytes_left < param_size) return(0);
+
+		if (++param_count > sysctl_param_proc_limit) {
+			SN_LOG(SN_LOG_EVENT,
+			    logsctperror("Parameter parse limit exceeded IsADDorDEL)", 
+				sm->sctp_hdr->v_tag, sysctl_param_proc_limit, direction));
+			return(0); /* not found limit exceeded*/
+		}
+	}
+	return(0);  /*Neither found */
+}
+
+/* ----------------------------------------------------------------------
+ *                            STATE MACHINE CODE
+ * ----------------------------------------------------------------------
+ */
+/** @addtogroup state_machine
+ *
+ * The SCTP NAT State Machine functions will:
+ * - Process an already parsed packet
+ * - Use the existing NAT Hash Tables
+ * - Determine the next state for the association
+ * - Update the NAT Hash Tables and Timer Queues
+ * - Return the appropriate action to take with the packet
+ */
+/** @ingroup state_machine
+ * @brief Process SCTP message
+ *
+ * This function is the base state machine. It calls the processing engine for
+ * each state.
+ *
+ * @param la Pointer to the relevant libalias instance
+ * @param direction SN_TO_LOCAL | SN_TO_GLOBAL 
+ * @param sm Pointer to sctp message information
+ * @param assoc Pointer to the association this SCTP Message belongs to
+ *
+ * @return SN_DROP_PKT | SN_NAT_PKT | SN_REPLY_ABORT | SN_REPLY_ERROR | SN_PROCESSING_ERROR
+ */
+static int
+ProcessSctpMsg(struct libalias *la, int direction, struct sctp_nat_msg *sm, struct sctp_nat_assoc *assoc)
+{
+	int rtnval;
+
+	switch (assoc->state) {
+	case SN_ID: /* Idle */
+		rtnval = ID_process(la, direction, assoc, sm);
+		if (rtnval != SN_NAT_PKT) {
+			assoc->state = SN_RM;/* Mark for removal*/
+		}
+		return(rtnval);
+	case SN_INi: /* Initialising - Init */
+		return(INi_process(la, direction, assoc, sm));
+	case SN_INa: /* Initialising - AddIP */
+		return(INa_process(la, direction, assoc, sm));
+	case SN_UP:  /* Association UP */
+		return(UP_process(la, direction, assoc, sm));
+	case SN_CL:  /* Association Closing */
+		return(CL_process(la, direction, assoc, sm));
+	}
+	return(SN_PROCESSING_ERROR);
+}
+
+/** @ingroup state_machine
+ * @brief Process SCTP message while in the Idle state
+ *
+ * This function looks for an Incoming INIT or AddIP message.
+ *
+ * All other SCTP messages are invalid when in SN_ID, and are dropped.
+ *
+ * @param la Pointer to the relevant libalias instance
+ * @param direction SN_TO_LOCAL | SN_TO_GLOBAL
+ * @param sm Pointer to sctp message information
+ * @param assoc Pointer to the association this SCTP Message belongs to
+ *
+ * @return SN_NAT_PKT | SN_DROP_PKT | SN_REPLY_ABORT | SN_REPLY_ERROR
+ */
+static int
+ID_process(struct libalias *la, int direction, struct sctp_nat_assoc *assoc, struct sctp_nat_msg *sm)
+{
+	switch(sm->msg) {
+	case SN_SCTP_ASCONF:           /* a packet containing an ASCONF chunk with ADDIP */
+		if (!sysctl_accept_global_ootb_addip && (direction == SN_TO_LOCAL))
+			return(SN_DROP_PKT);
+		/* if this Asconf packet does not contain the Vtag parameters it is of no use in Idle state */
+		if (!GetAsconfVtags(la, sm, &(assoc->l_vtag), &(assoc->g_vtag), direction))
+			return(SN_DROP_PKT);
+	case SN_SCTP_INIT:            /* a packet containing an INIT chunk or an ASCONF AddIP */
+		if (sysctl_track_global_addresses)
+			AddGlobalIPAddresses(sm, assoc, direction);
+		switch(direction){
+		case SN_TO_GLOBAL:
+			assoc->l_addr = sm->ip_hdr->ip_src;
+			assoc->a_addr = FindAliasAddress(la, assoc->l_addr);
+			assoc->l_port = sm->sctp_hdr->src_port;
+			assoc->g_port = sm->sctp_hdr->dest_port;
+			if(sm->msg == SN_SCTP_INIT)
+				assoc->g_vtag = sm->sctpchnk.Init->initiate_tag;
+			if (AddSctpAssocGlobal(la, assoc)) /* DB clash *///**** need to add dst address
+				return((sm->msg == SN_SCTP_INIT) ? SN_REPLY_ABORT : SN_REPLY_ERROR);
+			if(sm->msg == SN_SCTP_ASCONF) {
+				if (AddSctpAssocLocal(la, assoc, sm->ip_hdr->ip_dst)) /* DB clash */
+					return(SN_REPLY_ERROR);
+				assoc->TableRegister |= SN_WAIT_TOLOCAL; /* wait for tolocal ack */
+			}
+		break;
+		case SN_TO_LOCAL:
+			assoc->l_addr = FindSctpRedirectAddress(la, sm);
+			assoc->a_addr = sm->ip_hdr->ip_dst; 
+			assoc->l_port = sm->sctp_hdr->dest_port;
+			assoc->g_port = sm->sctp_hdr->src_port;
+			if(sm->msg == SN_SCTP_INIT)
+				assoc->l_vtag = sm->sctpchnk.Init->initiate_tag;
+			if (AddSctpAssocLocal(la, assoc, sm->ip_hdr->ip_src)) /* DB clash */
+				return((sm->msg == SN_SCTP_INIT) ? SN_REPLY_ABORT : SN_REPLY_ERROR);
+			if(sm->msg == SN_SCTP_ASCONF) {
+				if (AddSctpAssocGlobal(la, assoc)) /* DB clash */ //**** need to add src address
+					return(SN_REPLY_ERROR);
+				assoc->TableRegister |= SN_WAIT_TOGLOBAL; /* wait for toglobal ack */
+					}
+			break;
+		}
+	assoc->state = (sm->msg == SN_SCTP_INIT) ? SN_INi : SN_INa;
+	assoc->exp = SN_I_T(la);
+	sctp_AddTimeOut(la,assoc);
+	return(SN_NAT_PKT);
+	default: /* Any other type of SCTP message is not valid in Idle */
+		return(SN_DROP_PKT);
+	}
+return(SN_DROP_PKT);/* shouldn't get here very bad: log, drop and hope for the best */
+}
+
+/** @ingroup state_machine
+ * @brief Process SCTP message while waiting for an INIT-ACK message
+ *
+ * Only an INIT-ACK, resent INIT, or an ABORT SCTP packet are valid in this
+ * state, all other packets are dropped.
+ * 
+ * @param la Pointer to the relevant libalias instance
+ * @param direction SN_TO_LOCAL | SN_TO_GLOBAL 
+ * @param sm Pointer to sctp message information
+ * @param assoc Pointer to the association this SCTP Message belongs to
+ * 
+ * @return SN_NAT_PKT | SN_DROP_PKT | SN_REPLY_ABORT 
+ */
+static int
+INi_process(struct libalias *la, int direction, struct sctp_nat_assoc *assoc, struct sctp_nat_msg *sm)
+{
+	switch(sm->msg) {
+	case SN_SCTP_INIT:            /* a packet containing a retransmitted INIT chunk */
+		sctp_ResetTimeOut(la, assoc, SN_I_T(la));
+		return(SN_NAT_PKT);
+	case SN_SCTP_INITACK:         /* a packet containing an INIT-ACK chunk */
+		switch(direction){
+		case SN_TO_LOCAL:
+			if (assoc->num_Gaddr) /*If tracking global addresses for this association */
+				AddGlobalIPAddresses(sm, assoc, direction);
+			assoc->l_vtag = sm->sctpchnk.Init->initiate_tag;
+			if (AddSctpAssocLocal(la, assoc, sm->ip_hdr->ip_src)) { /* DB clash */
+				assoc->state = SN_RM;/* Mark for removal*/
+				return(SN_SEND_ABORT);
+			}
+			break;
+		case SN_TO_GLOBAL:
+			assoc->l_addr = sm->ip_hdr->ip_src; // Only if not set in Init! *
+			assoc->g_vtag = sm->sctpchnk.Init->initiate_tag;
+			if (AddSctpAssocGlobal(la, assoc)) { /* DB clash */
+				assoc->state = SN_RM;/* Mark for removal*/
+				return(SN_SEND_ABORT);
+			}
+			break;
+		}
+		assoc->state = SN_UP;/* association established for NAT */
+		sctp_ResetTimeOut(la,assoc, SN_U_T(la));
+		return(SN_NAT_PKT);
+	case SN_SCTP_ABORT:           /* a packet containing an ABORT chunk */
+		assoc->state = SN_RM;/* Mark for removal*/
+		return(SN_NAT_PKT);
+	default:
+		return(SN_DROP_PKT);
+	}
+	return(SN_DROP_PKT);/* shouldn't get here very bad: log, drop and hope for the best */
+}
+
+/** @ingroup state_machine
+ * @brief Process SCTP message while waiting for an AddIp-ACK message
+ * 
+ * Only an AddIP-ACK, resent AddIP, or an ABORT message are valid, all other
+ * SCTP packets are dropped
+ *
+ * @param la Pointer to the relevant libalias instance
+ * @param direction SN_TO_LOCAL | SN_TO_GLOBAL 
+ * @param sm Pointer to sctp message information
+ * @param assoc Pointer to the association this SCTP Message belongs to
+ * 
+ * @return SN_NAT_PKT | SN_DROP_PKT 
+ */
+static int
+INa_process(struct libalias *la, int direction,struct sctp_nat_assoc *assoc, struct sctp_nat_msg *sm)
+{
+	switch(sm->msg) {
+	case SN_SCTP_ASCONF:           /* a packet containing an ASCONF chunk*/
+		sctp_ResetTimeOut(la,assoc, SN_I_T(la));
+		return(SN_NAT_PKT);
+	case SN_SCTP_ASCONFACK:        /* a packet containing an ASCONF chunk with a ADDIP-ACK */
+		switch(direction){
+		case SN_TO_LOCAL:
+			if (!(assoc->TableRegister & SN_WAIT_TOLOCAL)) /* wrong direction */
+				return(SN_DROP_PKT);
+			break;
+		case SN_TO_GLOBAL:
+			if (!(assoc->TableRegister & SN_WAIT_TOGLOBAL)) /* wrong direction */
+				return(SN_DROP_PKT);
+		}
+		if (IsASCONFack(la,sm,direction)) {
+			assoc->TableRegister &= SN_BOTH_TBL; /* remove wait flags */
+			assoc->state = SN_UP; /* association established for NAT */
+			sctp_ResetTimeOut(la,assoc, SN_U_T(la));
+			return(SN_NAT_PKT);
+		} else {
+			assoc->state = SN_RM;/* Mark for removal*/
+			return(SN_NAT_PKT);
+		}
+	case SN_SCTP_ABORT:           /* a packet containing an ABORT chunk */
+		assoc->state = SN_RM;/* Mark for removal*/
+		return(SN_NAT_PKT);
+	default:
+		return(SN_DROP_PKT);
+	}
+	return(SN_DROP_PKT);/* shouldn't get here very bad: log, drop and hope for the best */
+}
+
+/** @ingroup state_machine
+ * @brief Process SCTP messages while association is UP redirecting packets
+ * 
+ * While in the SN_UP state, all packets for the particular association
+ * are passed. Only a SHUT-ACK or an ABORT will cause a change of state.
+ *
+ * @param la Pointer to the relevant libalias instance
+ * @param direction SN_TO_LOCAL | SN_TO_GLOBAL 
+ * @param sm Pointer to sctp message information
+ * @param assoc Pointer to the association this SCTP Message belongs to
+ * 
+ * @return SN_NAT_PKT | SN_DROP_PKT 
+ */
+static int
+UP_process(struct libalias *la, int direction, struct sctp_nat_assoc *assoc, struct sctp_nat_msg *sm)
+{
+	switch(sm->msg) {
+	case SN_SCTP_SHUTACK:         /* a packet containing a SHUTDOWN-ACK chunk */
+		assoc->state = SN_CL;
+		sctp_ResetTimeOut(la,assoc, SN_C_T(la));
+		return(SN_NAT_PKT);
+	case SN_SCTP_ABORT:           /* a packet containing an ABORT chunk */
+		assoc->state = SN_RM;/* Mark for removal*/
+		return(SN_NAT_PKT);
+	case SN_SCTP_ASCONF:           /* a packet containing an ASCONF chunk*/
+		if ((direction == SN_TO_LOCAL) && assoc->num_Gaddr) /*If tracking global addresses for this association & from global side */
+			switch(IsADDorDEL(la,sm,direction)) {
+			case SCTP_ADD_IP_ADDRESS:
+				AddGlobalIPAddresses(sm, assoc, direction);
+				break;
+			case SCTP_DEL_IP_ADDRESS:
+				RmGlobalIPAddresses(sm, assoc, direction);
+				break;
+			} /* fall through to default */
+	default:
+		sctp_ResetTimeOut(la,assoc, SN_U_T(la));
+		return(SN_NAT_PKT);  /* forward packet */
+	}
+	return(SN_DROP_PKT);/* shouldn't get here very bad: log, drop and hope for the best */
+}
+
+/** @ingroup state_machine
+ * @brief Process SCTP message while association is in the process of closing
+ *
+ * This function waits for a SHUT-COMP to close the association. Depending on
+ * the the setting of sysctl_holddown_timer it may not remove the association
+ * immediately, but leave it up until SN_X_T(la). Only SHUT-COMP, SHUT-ACK, and
+ * ABORT packets are permitted in this state. All other packets are dropped.
+ *
+ * @param la Pointer to the relevant libalias instance
+ * @param direction SN_TO_LOCAL | SN_TO_GLOBAL 
+ * @param sm Pointer to sctp message information
+ * @param assoc Pointer to the association this SCTP Message belongs to
+ * 
+ * @return SN_NAT_PKT | SN_DROP_PKT 
+ */
+static int
+CL_process(struct libalias *la, int direction,struct sctp_nat_assoc *assoc, struct sctp_nat_msg *sm)
+{
+	switch(sm->msg) {
+	case SN_SCTP_SHUTCOMP:        /* a packet containing a SHUTDOWN-COMPLETE chunk */
+		assoc->state = SN_CL;  /* Stay in Close state until timeout */
+		if (sysctl_holddown_timer > 0)
+			sctp_ResetTimeOut(la, assoc, SN_X_T(la));/* allow to stay open for Tbit packets*/
+		else
+			assoc->state = SN_RM;/* Mark for removal*/
+		return(SN_NAT_PKT);
+	case SN_SCTP_SHUTACK:         /* a packet containing a SHUTDOWN-ACK chunk */
+		assoc->state = SN_CL;  /* Stay in Close state until timeout */
+		sctp_ResetTimeOut(la, assoc, SN_C_T(la));
+		return(SN_NAT_PKT);
+	case SN_SCTP_ABORT:           /* a packet containing an ABORT chunk */
+		assoc->state = SN_RM;/* Mark for removal*/
+		return(SN_NAT_PKT);
+	default:
+		return(SN_DROP_PKT);
+	}
+	return(SN_DROP_PKT);/* shouldn't get here very bad: log, drop and hope for the best */
+}
+
+/* ----------------------------------------------------------------------
+ *                           HASH TABLE CODE
+ * ----------------------------------------------------------------------
+ */
+/** @addtogroup Hash
+ *
+ * The Hash functions facilitate searching the NAT Hash Tables for associations
+ * as well as adding/removing associations from the table(s).
+ */
+/** @ingroup Hash
+ * @brief Find the SCTP association given the local address, port and vtag
+ * 
+ * Searches the local look-up table for the association entry matching the
+ * provided local <address:ports:vtag> tuple
+ *
+ * @param la Pointer to the relevant libalias instance
+ * @param l_addr local address
+ * @param g_addr global address
+ * @param l_vtag local Vtag
+ * @param l_port local Port
+ * @param g_port global Port
+ * 
+ * @return pointer to association or NULL
+ */
+static struct sctp_nat_assoc*
+FindSctpLocal(struct libalias *la, struct in_addr l_addr, struct in_addr g_addr, uint32_t l_vtag, uint16_t l_port, uint16_t g_port)
+{
+	u_int i;
+	struct sctp_nat_assoc *assoc = NULL;
+	struct sctp_GlobalAddress *G_Addr = NULL;
+  
+	if (l_vtag != 0) { /* an init packet, vtag==0 */
+		i = SN_TABLE_HASH(l_vtag, l_port, la->sctpNatTableSize);
+		LIST_FOREACH(assoc, &la->sctpTableLocal[i], list_L) {
+			if ((assoc->l_vtag == l_vtag) && (assoc->l_port == l_port) && (assoc->g_port == g_port)\
+			    && (assoc->l_addr.s_addr == l_addr.s_addr)) {
+				if (assoc->num_Gaddr) {
+					LIST_FOREACH(G_Addr, &(assoc->Gaddr), list_Gaddr) {
+						if(G_Addr->g_addr.s_addr == g_addr.s_addr)
+							return(assoc);
+					}
+				} else {
+					return(assoc);
+				}
+			}
+		}
+	}
+	return(NULL);
+}
+
+/** @ingroup Hash
+ * @brief Check for Global Clash
+ * 
+ * Searches the global look-up table for the association entry matching the
+ * provided global <(addresses):ports:vtag> tuple
+ *
+ * @param la Pointer to the relevant libalias instance
+ * @param Cassoc association being checked for a clash
+ * 
+ * @return pointer to association or NULL
+ */
+static struct sctp_nat_assoc*
+FindSctpGlobalClash(struct libalias *la,  struct sctp_nat_assoc *Cassoc)
+{
+	u_int i;
+	struct sctp_nat_assoc *assoc = NULL;
+	struct sctp_GlobalAddress *G_Addr = NULL;
+	struct sctp_GlobalAddress *G_AddrC = NULL;
+  
+	if (Cassoc->g_vtag != 0) { /* an init packet, vtag==0 */
+		i = SN_TABLE_HASH(Cassoc->g_vtag, Cassoc->g_port, la->sctpNatTableSize);
+		LIST_FOREACH(assoc, &la->sctpTableGlobal[i], list_G) {
+			if ((assoc->g_vtag == Cassoc->g_vtag) && (assoc->g_port == Cassoc->g_port) && (assoc->l_port == Cassoc->l_port)) {
+				if (assoc->num_Gaddr) {
+					LIST_FOREACH(G_AddrC, &(Cassoc->Gaddr), list_Gaddr) {
+						LIST_FOREACH(G_Addr, &(assoc->Gaddr), list_Gaddr) {
+							if(G_Addr->g_addr.s_addr == G_AddrC->g_addr.s_addr)
+								return(assoc);
+						}
+					}
+				} else {
+					return(assoc);
+				}
+			}
+		}
+	}
+	return(NULL);
+}
+
+/** @ingroup Hash
+ * @brief Find the SCTP association given the global port and vtag
+ * 
+ * Searches the global look-up table for the association entry matching the
+ * provided global <address:ports:vtag> tuple
+ *
+ * If all but the global address match it sets partial_match to 1 to indicate a
+ * partial match. If the NAT is tracking global IP addresses for this
+ * association, the NAT may respond with an ERRORM to request the missing
+ * address to be added.
+ *
+ * @param la Pointer to the relevant libalias instance
+ * @param g_addr global address
+ * @param g_vtag global vtag
+ * @param g_port global port
+ * @param l_port local port
+ * 
+ * @return pointer to association or NULL
+ */
+static struct sctp_nat_assoc*
+FindSctpGlobal(struct libalias *la, struct in_addr g_addr, uint32_t g_vtag, uint16_t g_port, uint16_t l_port, int *partial_match)
+{
+	u_int i;
+	struct sctp_nat_assoc *assoc = NULL;
+	struct sctp_GlobalAddress *G_Addr = NULL;
+  
+	*partial_match = 0;
+	if (g_vtag != 0) { /* an init packet, vtag==0 */
+		i = SN_TABLE_HASH(g_vtag, g_port, la->sctpNatTableSize);
+		LIST_FOREACH(assoc, &la->sctpTableGlobal[i], list_G) {
+			if ((assoc->g_vtag == g_vtag) && (assoc->g_port == g_port) && (assoc->l_port == l_port)) {
+				*partial_match = 1;
+				if (assoc->num_Gaddr) {
+					LIST_FOREACH(G_Addr, &(assoc->Gaddr), list_Gaddr) {
+						if(G_Addr->g_addr.s_addr == g_addr.s_addr)
+							return(assoc);
+					}
+				} else {
+					return(assoc);
+				}
+			}
+		}
+	}
+	return(NULL);
+}
+
+/** @ingroup Hash
+ * @brief Find the SCTP association for a T-Flag message (given the global port and local vtag)
+ * 
+ * Searches the local look-up table for a unique association entry matching the
+ * provided global port and local vtag information
+ *
+ * @param la Pointer to the relevant libalias instance
+ * @param g_addr global address
+ * @param l_vtag local Vtag
+ * @param g_port global Port
+ * @param l_port local Port
+ * 
+ * @return pointer to association or NULL
+ */
+static struct sctp_nat_assoc*
+FindSctpLocalT(struct libalias *la,  struct in_addr g_addr, uint32_t l_vtag, uint16_t g_port, uint16_t l_port)
+{
+	u_int i;
+	struct sctp_nat_assoc *assoc = NULL, *lastmatch = NULL;
+	struct sctp_GlobalAddress *G_Addr = NULL;
+	int cnt = 0;
+  
+	if (l_vtag != 0) { /* an init packet, vtag==0 */
+		i = SN_TABLE_HASH(l_vtag, g_port, la->sctpNatTableSize);
+		LIST_FOREACH(assoc, &la->sctpTableGlobal[i], list_G) {
+			if ((assoc->g_vtag == l_vtag) && (assoc->g_port == g_port) && (assoc->l_port == l_port)) {
+				if (assoc->num_Gaddr) {
+					LIST_FOREACH(G_Addr, &(assoc->Gaddr), list_Gaddr) {
+						if(G_Addr->g_addr.s_addr == G_Addr->g_addr.s_addr)
+							return(assoc); /* full match */
+					}
+				} else {
+					if (++cnt > 1) return(NULL);
+					lastmatch = assoc;
+				}
+			}
+		}
+	}
+	/* If there is more than one match we do not know which local address to send to */
+	return( cnt ? lastmatch : NULL );
+}
+
+/** @ingroup Hash
+ * @brief Find the SCTP association for a T-Flag message (given the local port and global vtag)
+ * 
+ * Searches the global look-up table for a unique association entry matching the
+ * provided local port and global vtag information
+ *
+ * @param la Pointer to the relevant libalias instance
+ * @param g_addr global address
+ * @param g_vtag global vtag
+ * @param l_port local port
+ * @param g_port global port
+ * 
+ * @return pointer to association or NULL
+ */
+static struct sctp_nat_assoc*
+FindSctpGlobalT(struct libalias *la, struct in_addr g_addr, uint32_t g_vtag, uint16_t l_port, uint16_t g_port)
+{
+	u_int i;
+	struct sctp_nat_assoc *assoc = NULL;
+	struct sctp_GlobalAddress *G_Addr = NULL;
+ 
+	if (g_vtag != 0) { /* an init packet, vtag==0 */
+		i = SN_TABLE_HASH(g_vtag, l_port, la->sctpNatTableSize);
+		LIST_FOREACH(assoc, &la->sctpTableLocal[i], list_L) {
+			if ((assoc->l_vtag == g_vtag) && (assoc->l_port == l_port) && (assoc->g_port == g_port)) {
+				if (assoc->num_Gaddr) {
+					LIST_FOREACH(G_Addr, &(assoc->Gaddr), list_Gaddr) {
+						if(G_Addr->g_addr.s_addr == g_addr.s_addr)
+							return(assoc);
+					}
+				} else {
+					return(assoc);
+				}
+			}
+		}
+	}
+	return(NULL);
+}
+
+/** @ingroup Hash
+ * @brief  Add the sctp association information to the local look up table
+ * 
+ * Searches the local look-up table for an existing association with the same
+ * details. If a match exists and is ONLY in the local look-up table then this
+ * is a repeated INIT packet, we need to remove this association from the
+ * look-up table and add the new association
+ *
+ * The new association is added to the head of the list and state is updated
+ *
+ * @param la Pointer to the relevant libalias instance
+ * @param assoc pointer to sctp association
+ * @param g_addr global address
+ * 
+ * @return SN_ADD_OK | SN_ADD_CLASH
+ */
+static int
+AddSctpAssocLocal(struct libalias *la, struct sctp_nat_assoc *assoc, struct in_addr g_addr)
+{
+	struct sctp_nat_assoc *found;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	found = FindSctpLocal(la, assoc->l_addr, g_addr, assoc->l_vtag, assoc->l_port, assoc->g_port);
+	/*
+	 * Note that if a different global address initiated this Init,
+	 * ie it wasn't resent as presumed:
+	 *  - the local receiver if receiving it for the first time will establish
+	 *    an association with the new global host
+	 *  - if receiving an init from a different global address after sending a
+	 *    lost initack it will send an initack to the new global host, the first
+	 *    association attempt will then be blocked if retried.
+	 */
+	if (found != NULL) {
+		if ((found->TableRegister == SN_LOCAL_TBL) && (found->g_port == assoc->g_port)) { /* resent message */
+			RmSctpAssoc(la, found);
+			sctp_RmTimeOut(la, found);
+			freeGlobalAddressList(found);
+			sn_free(found);
+		} else
+			return(SN_ADD_CLASH);
+	}
+  
+	LIST_INSERT_HEAD(&la->sctpTableLocal[SN_TABLE_HASH(assoc->l_vtag, assoc->l_port, la->sctpNatTableSize)],
+	    assoc, list_L);
+	assoc->TableRegister |= SN_LOCAL_TBL;
+	la->sctpLinkCount++; //increment link count
+
+	if (assoc->TableRegister == SN_BOTH_TBL) {
+		/* libalias log -- controlled by libalias */
+		if (la->packetAliasMode & PKT_ALIAS_LOG)
+			SctpShowAliasStats(la);
+
+		SN_LOG(SN_LOG_INFO, logsctpassoc(assoc, "^"));
+	}
+
+	return(SN_ADD_OK);
+}
+
+/** @ingroup Hash
+ * @brief  Add the sctp association information to the global look up table
+ *
+ * Searches the global look-up table for an existing association with the same
+ * details. If a match exists and is ONLY in the global look-up table then this
+ * is a repeated INIT packet, we need to remove this association from the
+ * look-up table and add the new association
+ *
+ * The new association is added to the head of the list and state is updated
+ *
+ * @param la Pointer to the relevant libalias instance
+ * @param assoc pointer to sctp association
+ *
+ * @return SN_ADD_OK | SN_ADD_CLASH
+ */
+static int
+AddSctpAssocGlobal(struct libalias *la, struct sctp_nat_assoc *assoc)
+{
+	struct sctp_nat_assoc *found;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	found = FindSctpGlobalClash(la, assoc);
+	if (found != NULL) {
+		if ((found->TableRegister == SN_GLOBAL_TBL) &&			\
+		    (found->l_addr.s_addr == assoc->l_addr.s_addr) && (found->l_port == assoc->l_port)) { /* resent message */
+			RmSctpAssoc(la, found);
+			sctp_RmTimeOut(la, found);
+			freeGlobalAddressList(found);
+			sn_free(found);
+		} else
+			return(SN_ADD_CLASH);
+	}
+ 
+	LIST_INSERT_HEAD(&la->sctpTableGlobal[SN_TABLE_HASH(assoc->g_vtag, assoc->g_port, la->sctpNatTableSize)],
+	    assoc, list_G);
+	assoc->TableRegister |= SN_GLOBAL_TBL;
+	la->sctpLinkCount++; //increment link count
+
+	if (assoc->TableRegister == SN_BOTH_TBL) {
+		/* libalias log -- controlled by libalias */
+		if (la->packetAliasMode & PKT_ALIAS_LOG)
+			SctpShowAliasStats(la);
+
+		SN_LOG(SN_LOG_INFO, logsctpassoc(assoc, "^"));
+	}
+
+	return(SN_ADD_OK);
+}
+
+/** @ingroup Hash
+ * @brief Remove the sctp association information from the look up table
+ * 
+ * For each of the two (local/global) look-up tables, remove the association
+ * from that table IF it has been registered in that table.
+ *
+ * NOTE: The calling code is responsible for freeing memory allocated to the
+ *       association structure itself
+ *
+ * NOTE: The association is NOT removed from the timer queue
+ *
+ * @param la Pointer to the relevant libalias instance
+ * @param assoc pointer to sctp association
+ */
+static void
+RmSctpAssoc(struct libalias *la, struct sctp_nat_assoc *assoc)
+{
+	//  struct sctp_nat_assoc *found;
+	if (assoc == NULL) {
+		/* very bad, log and die*/
+		SN_LOG(SN_LOG_LOW,
+		    logsctperror("ERROR: alias_sctp:RmSctpAssoc(NULL)\n", 0, 0, SN_TO_NODIR));
+		return;
+	}
+	/* log if association is fully up and now closing */
+	if (assoc->TableRegister == SN_BOTH_TBL) {
+		SN_LOG(SN_LOG_INFO, logsctpassoc(assoc, "$"));
+	}
+	LIBALIAS_LOCK_ASSERT(la);
+	if (assoc->TableRegister & SN_LOCAL_TBL) {
+		assoc->TableRegister ^= SN_LOCAL_TBL;
+		la->sctpLinkCount--; //decrement link count
+		LIST_REMOVE(assoc, list_L);
+	}
+  
+	if (assoc->TableRegister & SN_GLOBAL_TBL) {
+		assoc->TableRegister ^= SN_GLOBAL_TBL;
+		la->sctpLinkCount--; //decrement link count
+		LIST_REMOVE(assoc, list_G);
+	}
+	//  sn_free(assoc); //Don't remove now, remove if needed later
+	/* libalias logging -- controlled by libalias log definition */
+	if (la->packetAliasMode & PKT_ALIAS_LOG)
+		SctpShowAliasStats(la);
+}
+
+/** 
+ * @ingroup Hash
+ * @brief  free the Global Address List memory
+ * 
+ * freeGlobalAddressList deletes all global IP addresses in an associations
+ * global IP address list.
+ *
+ * @param assoc 
+ */
+static void freeGlobalAddressList(struct sctp_nat_assoc *assoc)
+{
+	struct sctp_GlobalAddress *gaddr1=NULL,*gaddr2=NULL;
+	/*free global address list*/
+	gaddr1 = LIST_FIRST(&(assoc->Gaddr));  
+	while (gaddr1 != NULL) {
+		gaddr2 = LIST_NEXT(gaddr1, list_Gaddr);
+		sn_free(gaddr1);
+		gaddr1 = gaddr2;
+	}
+}
+/* ----------------------------------------------------------------------
+ *                            TIMER QUEUE CODE
+ * ----------------------------------------------------------------------
+ */
+/** @addtogroup Timer
+ *
+ * The timer queue management functions are designed to operate efficiently with
+ * a minimum of interaction with the queues.
+ *
+ * Once a timeout is set in the queue it will not be altered in the queue unless
+ * it has to be changed to a shorter time (usually only for aborts and closing).
+ * On a queue timeout, the real expiry time is checked, and if not leq than the
+ * timeout it is requeued (O(1)) at its later time. This is especially important
+ * for normal packets sent during an association. When a timer expires, it is
+ * updated to its new expiration time if necessary, or processed as a
+ * timeout. This means that while in UP state, the timing queue is only altered
+ * every U_T (every few minutes) for a particular association.
+ */
+/** @ingroup Timer
+ * @brief Add an association timeout to the timer queue
+ *
+ * Determine the location in the queue to add the timeout and insert the
+ * association into the list at that queue position
+ *
+ * @param la 
+ * @param assoc 
+ */
+static void
+sctp_AddTimeOut(struct libalias *la, struct sctp_nat_assoc *assoc)
+{
+	int add_loc;
+	LIBALIAS_LOCK_ASSERT(la);
+	add_loc = assoc->exp - la->sctpNatTimer.loc_time + la->sctpNatTimer.cur_loc;
+	if (add_loc >= SN_TIMER_QUEUE_SIZE)
+		add_loc -= SN_TIMER_QUEUE_SIZE;
+	LIST_INSERT_HEAD(&la->sctpNatTimer.TimerQ[add_loc], assoc, timer_Q);
+	assoc->exp_loc = add_loc;
+}
+
+/** @ingroup Timer
+ * @brief Remove an association from timer queue
+ *
+ * This is an O(1) operation to remove the association pointer from its
+ * current position in the timer queue
+ *
+ * @param la Pointer to the relevant libalias instance
+ * @param assoc pointer to sctp association
+ */
+static void
+sctp_RmTimeOut(struct libalias *la, struct sctp_nat_assoc *assoc)
+{
+	LIBALIAS_LOCK_ASSERT(la);
+	LIST_REMOVE(assoc, timer_Q);/* Note this is O(1) */
+}
+
+
+/** @ingroup Timer
+ * @brief Reset timer in timer queue
+ *
+ * Reset the actual timeout for the specified association. If it is earlier than
+ * the existing timeout, then remove and re-install the association into the
+ * queue
+ *
+ * @param la Pointer to the relevant libalias instance
+ * @param assoc pointer to sctp association
+ * @param newexp New expiration time
+ */
+static void
+sctp_ResetTimeOut(struct libalias *la, struct sctp_nat_assoc *assoc, int newexp)
+{
+	if (newexp < assoc->exp) {
+		sctp_RmTimeOut(la, assoc);
+		assoc->exp = newexp;
+		sctp_AddTimeOut(la, assoc);
+	} else {
+		assoc->exp = newexp;
+	}
+}
+
+/** @ingroup Timer
+ * @brief Check timer Q against current time
+ * 
+ * Loop through each entry in the timer queue since the last time we processed
+ * the timer queue until now (the current time). For each association in the
+ * event list, we remove it from that position in the timer queue and check if
+ * it has really expired. If so we:
+ * - Log the timer expiry
+ * - Remove the association from the NAT tables
+ * - Release the memory used by the association
+ *
+ * If the timer hasn't really expired we place the association into its new
+ * correct position in the timer queue.
+ * 
+ * @param la  Pointer to the relevant libalias instance
+ */
+void
+sctp_CheckTimers(struct libalias *la)
+{
+	struct sctp_nat_assoc *assoc;
+  
+	LIBALIAS_LOCK_ASSERT(la);
+	while(la->timeStamp >= la->sctpNatTimer.loc_time) {
+		while (!LIST_EMPTY(&la->sctpNatTimer.TimerQ[la->sctpNatTimer.cur_loc])) {  
+			assoc = LIST_FIRST(&la->sctpNatTimer.TimerQ[la->sctpNatTimer.cur_loc]);
+			//SLIST_REMOVE_HEAD(&la->sctpNatTimer.TimerQ[la->sctpNatTimer.cur_loc], timer_Q);
+			LIST_REMOVE(assoc, timer_Q);
+			if (la->timeStamp >= assoc->exp) { /* state expired */
+				SN_LOG(((assoc->state == SN_CL)?(SN_LOG_DEBUG):(SN_LOG_INFO)),
+				    logsctperror("Timer Expired", assoc->g_vtag, assoc->state, SN_TO_NODIR));
+				RmSctpAssoc(la, assoc);
+				freeGlobalAddressList(assoc);
+				sn_free(assoc);
+			} else {/* state not expired, reschedule timer*/
+				sctp_AddTimeOut(la, assoc);
+			}
+		}
+		/* Goto next location in the timer queue*/
+		++la->sctpNatTimer.loc_time;
+		if (++la->sctpNatTimer.cur_loc >= SN_TIMER_QUEUE_SIZE)
+			la->sctpNatTimer.cur_loc = 0;
+	}
+}
+
+/* ----------------------------------------------------------------------
+ *                              LOGGING CODE 
+ * ----------------------------------------------------------------------
+ */
+/** @addtogroup Logging
+ *
+ * The logging functions provide logging of different items ranging from logging
+ * a simple message, through logging an association details to logging the
+ * current state of the NAT tables
+ */
+/** @ingroup Logging
+ * @brief Log sctp nat errors
+ * 
+ * @param errormsg Error message to be logged
+ * @param vtag Current Vtag
+ * @param error Error number
+ * @param direction Direction of packet
+ */
+static void
+logsctperror(char* errormsg, uint32_t vtag, int error, int direction)
+{
+	char dir;
+	switch(direction) {
+	case SN_TO_LOCAL:
+		dir = 'L';
+		break;
+	case SN_TO_GLOBAL:
+		dir = 'G';
+		break;
+	default:
+		dir = '*';
+		break;
+	}
+	SctpAliasLog("->%c %s (vt=%u) %d\n", dir, errormsg, ntohl(vtag), error);
+}
+
+/** @ingroup Logging
+ * @brief Log what the parser parsed
+ * 
+ * @param direction Direction of packet
+ * @param sm Pointer to sctp message information
+ */
+static void
+logsctpparse(int direction, struct sctp_nat_msg *sm)
+{
+	char *ploc, *pstate;
+	switch(direction) {
+	case SN_TO_LOCAL:
+		ploc = "TO_LOCAL -";
+		break;
+	case SN_TO_GLOBAL:
+		ploc = "TO_GLOBAL -";
+		break;
+	default:
+		ploc = "";
+	}
+	switch(sm->msg) {
+	case SN_SCTP_INIT:
+		pstate = "Init";
+		break;
+	case SN_SCTP_INITACK:
+		pstate = "InitAck";
+		break;
+	case SN_SCTP_ABORT:
+		pstate = "Abort";
+		break;
+	case SN_SCTP_SHUTACK:
+		pstate = "ShutAck";
+		break;
+	case SN_SCTP_SHUTCOMP:
+		pstate = "ShutComp";
+		break;
+	case SN_SCTP_ASCONF:
+		pstate = "Asconf";
+		break;
+	case SN_SCTP_ASCONFACK:
+		pstate = "AsconfAck";
+		break;
+	case SN_SCTP_OTHER:
+		pstate = "Other";
+		break;
+	default:
+		pstate = "***ERROR***";
+		break;
+	}
+	SctpAliasLog("Parsed: %s %s\n", ploc, pstate);
+}
+
+/** @ingroup Logging
+ * @brief Log an SCTP association's details
+ * 
+ * @param assoc pointer to sctp association
+ * @param s Character that indicates the state of processing for this packet
+ */
+static void logsctpassoc(struct sctp_nat_assoc *assoc, char* s)
+{
+	struct sctp_GlobalAddress *G_Addr = NULL;
+	char *sp;
+	switch(assoc->state) {
+	case SN_ID:
+		sp = "ID ";
+		break;
+	case SN_INi:
+		sp = "INi ";
+		break;
+	case SN_INa:
+		sp = "INa ";
+		break;
+	case SN_UP:
+		sp = "UP ";
+		break;
+	case SN_CL:
+		sp = "CL ";
+		break;
+	case SN_RM:
+		sp = "RM ";
+		break;
+	default:
+		sp = "***ERROR***";
+		break;
+	}
+	SctpAliasLog("%sAssoc: %s exp=%u la=%s lv=%u lp=%u gv=%u gp=%u tbl=%d\n",
+	    s, sp, assoc->exp, inet_ntoa(assoc->l_addr), ntohl(assoc->l_vtag),
+	    ntohs(assoc->l_port), ntohl(assoc->g_vtag), ntohs(assoc->g_port),
+	    assoc->TableRegister);
+	/* list global addresses */
+	LIST_FOREACH(G_Addr, &(assoc->Gaddr), list_Gaddr) {
+		SctpAliasLog("\t\tga=%s\n",inet_ntoa(G_Addr->g_addr));
+	}
+}
+
+/** @ingroup Logging
+ * @brief Output Global table to log 
+ * 
+ * @param la Pointer to the relevant libalias instance
+ */
+static void logSctpGlobal(struct libalias *la)
+{
+	u_int i;
+	struct sctp_nat_assoc *assoc = NULL;
+  
+	SctpAliasLog("G->\n");
+	for (i=0; i < la->sctpNatTableSize; i++) {
+		LIST_FOREACH(assoc, &la->sctpTableGlobal[i], list_G) {
+			logsctpassoc(assoc, " ");
+		}
+	}
+}
+
+/** @ingroup Logging
+ * @brief  Output Local table to log 
+ * 
+ * @param la Pointer to the relevant libalias instance
+ */
+static void logSctpLocal(struct libalias *la)
+{
+	u_int i;
+	struct sctp_nat_assoc *assoc = NULL;
+  
+	SctpAliasLog("L->\n");
+	for (i=0; i < la->sctpNatTableSize; i++) {
+		LIST_FOREACH(assoc, &la->sctpTableLocal[i], list_L) {
+			logsctpassoc(assoc, " ");
+		}
+	}
+}
+
+/** @ingroup Logging
+ * @brief Output timer queue to log
+ * 
+ * @param la Pointer to the relevant libalias instance
+ */
+static void logTimerQ(struct libalias *la)
+{
+	static char buf[50];
+	u_int i;
+	struct sctp_nat_assoc *assoc = NULL;
+
+	SctpAliasLog("t->\n");
+	for (i=0; i < SN_TIMER_QUEUE_SIZE; i++) {
+		LIST_FOREACH(assoc, &la->sctpNatTimer.TimerQ[i], timer_Q) {
+			snprintf(buf, 50, " l=%u ",i);
+			//SctpAliasLog(la->logDesc," l=%d ",i);
+			logsctpassoc(assoc, buf);
+		}
+	}
+}
+
+/** @ingroup Logging 
+ * @brief Sctp NAT logging function
+ * 
+ * This function is based on a similar function in alias_db.c
+ *
+ * @param str/stream logging descriptor  
+ * @param format printf type string
+ */
+#ifdef _KERNEL
+static void
+SctpAliasLog(const char *format, ...)
+{
+	char buffer[LIBALIAS_BUF_SIZE];
+	va_list ap;
+	va_start(ap, format);
+	vsnprintf(buffer, LIBALIAS_BUF_SIZE, format, ap);
+	va_end(ap);
+	log(LOG_SECURITY | LOG_INFO,
+	    "alias_sctp: %s", buffer);
+}
+#else
+static void
+SctpAliasLog(FILE *stream, const char *format, ...)
+{
+	va_list ap;
+
+	va_start(ap, format);
+	vfprintf(stream, format, ap);
+	va_end(ap);
+	fflush(stream);
+}
+#endif
diff --git a/freebsd/sys/netinet/libalias/alias_sctp.h b/freebsd/sys/netinet/libalias/alias_sctp.h
new file mode 100644
index 00000000..9ea21959
--- /dev/null
+++ b/freebsd/sys/netinet/libalias/alias_sctp.h
@@ -0,0 +1,201 @@
+/*-
+ * Copyright (c) 2008
+ * 	Swinburne University of Technology, Melbourne, Australia.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ *  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ *  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ *  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ *  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ *  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ *  SUCH DAMAGE.
+ */
+
+/*
+ * Alias_sctp forms part of the libalias kernel module to handle
+ * Network Address Translation (NAT) for the SCTP protocol.
+ *
+ *  This software was developed by David A. Hayes
+ *  with leadership and advice from Jason But
+ *
+ * The design is outlined in CAIA technical report number  080618A
+ * (D. Hayes and J. But, "Alias_sctp Version 0.1: SCTP NAT implementation in IPFW")
+ *
+ * Development is part of the CAIA SONATA project,
+ * proposed by Jason But and Grenville Armitage:
+ * http://caia.swin.edu.au/urp/sonata/
+ *
+ *
+ * This project has been made possible in part by a grant from
+ * the Cisco University Research Program Fund at Community
+ * Foundation Silicon Valley.
+ *
+ */
+
+/* $FreeBSD$ */
+
+#ifndef _ALIAS_SCTP_HH_
+#define _ALIAS_SCTP_HH_
+
+#include <freebsd/sys/param.h>
+#ifdef	_KERNEL
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/module.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/uio.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/syslog.h>
+#endif // #ifdef	_KERNEL
+#include <freebsd/sys/types.h>
+
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/types.h>
+#include <freebsd/sys/time.h>
+
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip.h>
+
+/**
+ * These are defined in sctp_os_bsd.h, but it can't be included due to its local file
+ * inclusion, so I'm defining them here.
+ *
+ */
+#include <freebsd/machine/cpufunc.h>
+#include <freebsd/machine/cpu.h>
+/* The packed define for 64 bit platforms */
+#ifndef SCTP_PACKED
+#define SCTP_PACKED __attribute__((packed))
+#endif //#ifndef SCTP_PACKED
+#ifndef SCTP_UNUSED
+#define SCTP_UNUSED __attribute__((unused))
+#endif //#ifndef SCTP_UNUSED
+
+
+#include <freebsd/netinet/sctp.h>
+//#include <freebsd/netinet/sctp_os_bsd.h> --might be needed later for mbuf stuff
+#include <freebsd/netinet/sctp_header.h>
+
+#ifndef _KERNEL
+#include <freebsd/stdlib.h>
+#include <freebsd/stdio.h>
+#include <freebsd/curses.h>
+#endif //#ifdef _KERNEL
+
+
+#define LINK_SCTP                      IPPROTO_SCTP
+
+
+#define SN_TO_LOCAL              0   /**< packet traveling from global to local */
+#define SN_TO_GLOBAL             1   /**< packet traveling from local to global */
+#define SN_TO_NODIR             99   /**< used where direction is not important */
+
+#define SN_NAT_PKT          0x0000   /**< Network Address Translate packet */
+#define SN_DROP_PKT         0x0001   /**< drop packet (don't forward it) */
+#define SN_PROCESSING_ERROR 0x0003   /**< Packet processing error */
+#define SN_REPLY_ABORT      0x0010   /**< Reply with ABORT to sender (don't forward it) */
+#define SN_SEND_ABORT       0x0020   /**< Send ABORT to destination */
+#define SN_TX_ABORT         0x0030   /**< mask for transmitting abort */
+#define SN_REFLECT_ERROR    0x0100   /**< Reply with ERROR to sender on OOTB packet Tbit set */
+#define SN_REPLY_ERROR      0x0200   /**< Reply with ERROR to sender on ASCONF clash */
+#define SN_TX_ERROR         0x0300   /**< mask for transmitting error */
+
+
+#define PKT_ALIAS_RESPOND   0x1000   /**< Signal to libalias that there is a response packet to send */
+/*
+ * Data structures
+ */
+
+/**
+ * @brief sctp association information
+ *
+ * Structure that contains information about a particular sctp association
+ * currently under Network Address Translation.
+ * Information is stored in network byte order (as is libalias)***
+ */
+struct sctp_nat_assoc {
+	uint32_t l_vtag;		/**< local side verification tag */
+	uint16_t l_port;		/**< local side port number */
+	uint32_t g_vtag;		/**< global side verification tag */
+	uint16_t g_port;		/**< global side port number */
+	struct in_addr l_addr;	/**< local ip address */
+	struct in_addr a_addr;	/**< alias ip address */
+	int state;			/**< current state of NAT association */
+	int TableRegister;		/**< stores which look up tables association is registered in */
+	int	exp;			/**< timer expiration in seconds from uptime */
+	int exp_loc;			/**< current location in timer_Q */
+	int num_Gaddr;		/**< number of global IP addresses in the list */
+	LIST_HEAD(sctpGlobalAddresshead,sctp_GlobalAddress) Gaddr; /**< List of global addresses */
+							    LIST_ENTRY (sctp_nat_assoc) list_L; /**< Linked list of pointers for Local table*/
+											LIST_ENTRY (sctp_nat_assoc) list_G; /**< Linked list of pointers for Global table */
+														    LIST_ENTRY (sctp_nat_assoc) timer_Q; /**< Linked list of pointers for timer Q */
+//Using libalias locking
+};
+
+struct sctp_GlobalAddress {
+	struct in_addr g_addr;
+	LIST_ENTRY (sctp_GlobalAddress) list_Gaddr; /**< Linked list of pointers for Global table */
+};
+
+/**
+ * @brief SCTP chunk of interest
+ *
+ * The only chunks whose contents are of any interest are the INIT and ASCONF_AddIP
+ */
+union sctpChunkOfInt {
+	struct sctp_init *Init;	/**< Pointer to Init Chunk */
+	struct sctp_init_ack *InitAck;	/**< Pointer to Init Chunk */
+	struct sctp_paramhdr *Asconf; /**< Pointer to ASCONF chunk */
+};
+
+
+/**
+ * @brief SCTP message
+ *
+ * Structure containing the relevant information from the SCTP message
+ */
+struct sctp_nat_msg {
+	uint16_t msg;			/**< one of the key messages defined above */
+#ifndef __rtems__
+#ifdef INET6
+	//  struct ip6_hdr *ip_hdr;	/**< pointer to ip packet header */ /*no inet6 support yet*/
+#else
+	struct ip *ip_hdr;		/**< pointer to ip packet header */
+#endif //#ifdef INET6
+#else //__rtems__
+  struct ip *ip_hdr;    /**< pointer to ip packet header */
+#endif //__rtems__
+	struct sctphdr *sctp_hdr;	/**< pointer to sctp common header */
+	union sctpChunkOfInt sctpchnk; /**< union of pointers to the chunk of interest */
+	int chunk_length;		/**< length of chunk of interest */
+};
+
+
+/**
+ * @brief sctp nat timer queue structure
+ *
+ */
+
+struct sctp_nat_timer {
+	int loc_time;			/**< time in seconds for the current location in the queue */
+	int cur_loc;			/**< index of the current location in the circular queue */
+	LIST_HEAD(sctpTimerQ,sctp_nat_assoc) *TimerQ; /**< List of associations at this position in the timer Q */
+};
+
+
+
+#endif //#ifndef _ALIAS_SCTP_H
diff --git a/freebsd/sys/netinet/libalias/alias_skinny.c b/freebsd/sys/netinet/libalias/alias_skinny.c
new file mode 100644
index 00000000..4d311efe
--- /dev/null
+++ b/freebsd/sys/netinet/libalias/alias_skinny.c
@@ -0,0 +1,449 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * alias_skinny.c
+ *
+ * Copyright (c) 2002, 2003 MarcusCom, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author: Joe Marcus Clarke <marcus@FreeBSD.org>
+ *
+ * $FreeBSD$
+ */
+
+#ifdef _KERNEL
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/module.h>
+#else
+#include <freebsd/errno.h>
+#include <freebsd/stdio.h>
+#include <freebsd/unistd.h>
+#endif
+
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/tcp.h>
+
+#ifdef _KERNEL
+#include <freebsd/netinet/libalias/alias_local.h>
+#include <freebsd/netinet/libalias/alias_mod.h>
+#else
+#include <freebsd/local/alias_local.h>
+#include <freebsd/local/alias_mod.h>
+#endif
+
+static void
+AliasHandleSkinny(struct libalias *, struct ip *, struct alias_link *);
+
+static int 
+fingerprint(struct libalias *la, struct alias_data *ah)
+{
+
+	if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL)
+		return (-1);
+	if (la->skinnyPort != 0 && (ntohs(*ah->sport) == la->skinnyPort ||
+				    ntohs(*ah->dport) == la->skinnyPort))
+		return (0);
+	return (-1);
+}
+
+static int 
+protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+        AliasHandleSkinny(la, pip, ah->lnk);
+	return (0);
+}
+
+struct proto_handler handlers[] = {
+	{ 
+	  .pri = 110, 
+	  .dir = IN|OUT, 
+	  .proto = TCP, 
+	  .fingerprint = &fingerprint, 
+	  .protohandler = &protohandler
+	}, 
+	{ EOH }
+};
+
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+	int error;
+
+	switch (type) {
+	case MOD_LOAD:
+		error = 0;
+		LibAliasAttachHandlers(handlers);
+		break;
+	case MOD_UNLOAD:
+		error = 0;
+		LibAliasDetachHandlers(handlers);
+		break;
+	default:
+		error = EINVAL;
+	}
+	return (error);
+}
+
+#ifdef _KERNEL
+static 
+#endif
+moduledata_t alias_mod = {
+       "alias_skinny", mod_handler, NULL
+};
+
+#ifdef	_KERNEL
+DECLARE_MODULE(alias_skinny, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_skinny, 1);
+MODULE_DEPEND(alias_skinny, libalias, 1, 1, 1);
+#endif
+
+/*
+ * alias_skinny.c handles the translation for the Cisco Skinny Station
+ * protocol.  Skinny typically uses TCP port 2000 to set up calls between
+ * a Cisco Call Manager and a Cisco IP phone.  When a phone comes on line,
+ * it first needs to register with the Call Manager.  To do this it sends
+ * a registration message.  This message contains the IP address of the
+ * IP phone.  This message must then be translated to reflect our global
+ * IP address.  Along with the registration message (and usually in the
+ * same packet), the phone sends an IP port message.  This message indicates
+ * the TCP port over which it will communicate.
+ *
+ * When a call is placed from the phone, the Call Manager will send an
+ * Open Receive Channel message to the phone to let the caller know someone
+ * has answered.  The phone then sends back an Open Receive Channel
+ * Acknowledgement.  In this packet, the phone sends its IP address again,
+ * and the UDP port over which the voice traffic should flow.  These values
+ * need translation.  Right after the Open Receive Channel Acknowledgement,
+ * the Call Manager sends a Start Media Transmission message indicating the
+ * call is connected.  This message contains the IP address and UDP port
+ * number of the remote (called) party.  Once this message is translated, the
+ * call can commence.  The called part sends the first UDP packet to the
+ * calling phone at the pre-arranged UDP port in the Open Receive Channel
+ * Acknowledgement.
+ *
+ * Skinny is a Cisco-proprietary protocol and is a trademark of Cisco Systems,
+ * Inc.  All rights reserved.
+*/
+
+/* #define LIBALIAS_DEBUG 1 */
+
+/* Message types that need translating */
+#define REG_MSG         0x00000001
+#define IP_PORT_MSG     0x00000002
+#define OPNRCVCH_ACK    0x00000022
+#define START_MEDIATX   0x0000008a
+
+struct skinny_header {
+	u_int32_t	len;
+	u_int32_t	reserved;
+	u_int32_t	msgId;
+};
+
+struct RegisterMessage {
+	u_int32_t	msgId;
+	char		devName   [16];
+	u_int32_t	uid;
+	u_int32_t	instance;
+	u_int32_t	ipAddr;
+	u_char		devType;
+	u_int32_t	maxStreams;
+};
+
+struct IpPortMessage {
+	u_int32_t	msgId;
+	u_int32_t	stationIpPort;	/* Note: Skinny uses 32-bit port
+					 * numbers */
+};
+
+struct OpenReceiveChannelAck {
+	u_int32_t	msgId;
+	u_int32_t	status;
+	u_int32_t	ipAddr;
+	u_int32_t	port;
+	u_int32_t	passThruPartyID;
+};
+
+struct StartMediaTransmission {
+	u_int32_t	msgId;
+	u_int32_t	conferenceID;
+	u_int32_t	passThruPartyID;
+	u_int32_t	remoteIpAddr;
+	u_int32_t	remotePort;
+	u_int32_t	MSPacket;
+	u_int32_t	payloadCap;
+	u_int32_t	precedence;
+	u_int32_t	silenceSuppression;
+	u_short		maxFramesPerPacket;
+	u_int32_t	G723BitRate;
+};
+
+typedef enum {
+	ClientToServer = 0,
+	ServerToClient = 1
+} ConvDirection;
+
+
+static int
+alias_skinny_reg_msg(struct RegisterMessage *reg_msg, struct ip *pip,
+    struct tcphdr *tc, struct alias_link *lnk,
+    ConvDirection direction)
+{
+	(void)direction;
+
+	reg_msg->ipAddr = (u_int32_t) GetAliasAddress(lnk).s_addr;
+
+	tc->th_sum = 0;
+#ifdef _KERNEL
+	tc->th_x2 = 1;
+#else
+	tc->th_sum = TcpChecksum(pip);
+#endif
+
+	return (0);
+}
+
+static int
+alias_skinny_startmedia(struct StartMediaTransmission *start_media,
+    struct ip *pip, struct tcphdr *tc,
+    struct alias_link *lnk, u_int32_t localIpAddr,
+    ConvDirection direction)
+{
+	struct in_addr dst, src;
+
+	(void)pip;
+	(void)tc;
+	(void)lnk;
+	(void)direction;
+
+	dst.s_addr = start_media->remoteIpAddr;
+	src.s_addr = localIpAddr;
+
+	/*
+	 * XXX I should probably handle in bound global translations as
+	 * well.
+	 */
+
+	return (0);
+}
+
+static int
+alias_skinny_port_msg(struct IpPortMessage *port_msg, struct ip *pip,
+    struct tcphdr *tc, struct alias_link *lnk,
+    ConvDirection direction)
+{
+	(void)direction;
+
+	port_msg->stationIpPort = (u_int32_t) ntohs(GetAliasPort(lnk));
+
+	tc->th_sum = 0;
+#ifdef _KERNEL
+	tc->th_x2 = 1;
+#else
+	tc->th_sum = TcpChecksum(pip);
+#endif
+	return (0);
+}
+
+static int
+alias_skinny_opnrcvch_ack(struct libalias *la, struct OpenReceiveChannelAck *opnrcvch_ack,
+    struct ip *pip, struct tcphdr *tc,
+    struct alias_link *lnk, u_int32_t * localIpAddr,
+    ConvDirection direction)
+{
+	struct in_addr null_addr;
+	struct alias_link *opnrcv_lnk;
+	u_int32_t localPort;
+
+	(void)lnk;
+	(void)direction;
+
+	*localIpAddr = (u_int32_t) opnrcvch_ack->ipAddr;
+	localPort = opnrcvch_ack->port;
+
+	null_addr.s_addr = INADDR_ANY;
+	opnrcv_lnk = FindUdpTcpOut(la, pip->ip_src, null_addr,
+	    htons((u_short) opnrcvch_ack->port), 0,
+	    IPPROTO_UDP, 1);
+	opnrcvch_ack->ipAddr = (u_int32_t) GetAliasAddress(opnrcv_lnk).s_addr;
+	opnrcvch_ack->port = (u_int32_t) ntohs(GetAliasPort(opnrcv_lnk));
+
+	tc->th_sum = 0;
+#ifdef _KERNEL
+	tc->th_x2 = 1;
+#else
+	tc->th_sum = TcpChecksum(pip);
+#endif
+	return (0);
+}
+
+static void
+AliasHandleSkinny(struct libalias *la, struct ip *pip, struct alias_link *lnk)
+{
+	size_t hlen, tlen, dlen;
+	struct tcphdr *tc;
+	u_int32_t msgId, t, len, lip;
+	struct skinny_header *sd;
+	size_t orig_len, skinny_hdr_len = sizeof(struct skinny_header);
+	ConvDirection direction;
+
+	lip = -1;
+	tc = (struct tcphdr *)ip_next(pip);
+	hlen = (pip->ip_hl + tc->th_off) << 2;
+	tlen = ntohs(pip->ip_len);
+	dlen = tlen - hlen;
+
+	sd = (struct skinny_header *)tcp_next(tc);
+
+	/*
+	 * XXX This direction is reserved for future use.  I still need to
+	 * handle the scenario where the call manager is on the inside, and
+	 * the calling phone is on the global outside.
+	 */
+	if (ntohs(tc->th_dport) == la->skinnyPort) {
+		direction = ClientToServer;
+	} else if (ntohs(tc->th_sport) == la->skinnyPort) {
+		direction = ServerToClient;
+	} else {
+#ifdef LIBALIAS_DEBUG
+		fprintf(stderr,
+		    "PacketAlias/Skinny: Invalid port number, not a Skinny packet\n");
+#endif
+		return;
+	}
+
+	orig_len = dlen;
+	/*
+	 * Skinny packets can contain many messages.  We need to loop
+	 * through the packet using len to determine message boundaries.
+	 * This comes into play big time with port messages being in the
+	 * same packet as register messages.  Also, open receive channel
+	 * acks are usually buried in a pakcet some 400 bytes long.
+	 */
+	while (dlen >= skinny_hdr_len) {
+		len = (sd->len);
+		msgId = (sd->msgId);
+		t = len;
+
+		if (t > orig_len || t > dlen) {
+#ifdef LIBALIAS_DEBUG
+			fprintf(stderr,
+			    "PacketAlias/Skinny: Not a skinny packet, invalid length \n");
+#endif
+			return;
+		}
+		switch (msgId) {
+		case REG_MSG: {
+			struct RegisterMessage *reg_mesg;
+
+			if (len < (int)sizeof(struct RegisterMessage)) {
+#ifdef LIBALIAS_DEBUG
+				fprintf(stderr,
+				    "PacketAlias/Skinny: Not a skinny packet, bad registration message\n");
+#endif
+				return;
+			}
+			reg_mesg = (struct RegisterMessage *)&sd->msgId;
+#ifdef LIBALIAS_DEBUG
+			fprintf(stderr,
+			    "PacketAlias/Skinny: Received a register message");
+#endif
+			alias_skinny_reg_msg(reg_mesg, pip, tc, lnk, direction);
+			break;
+		}
+		case IP_PORT_MSG: {
+			struct IpPortMessage *port_mesg;
+
+			if (len < (int)sizeof(struct IpPortMessage)) {
+#ifdef LIBALIAS_DEBUG
+				fprintf(stderr,
+				    "PacketAlias/Skinny: Not a skinny packet, port message\n");
+#endif
+				return;
+			}
+#ifdef LIBALIAS_DEBUG
+			fprintf(stderr,
+			    "PacketAlias/Skinny: Received ipport message\n");
+#endif
+			port_mesg = (struct IpPortMessage *)&sd->msgId;
+			alias_skinny_port_msg(port_mesg, pip, tc, lnk, direction);
+			break;
+		}
+		case OPNRCVCH_ACK: {
+			struct OpenReceiveChannelAck *opnrcvchn_ack;
+
+			if (len < (int)sizeof(struct OpenReceiveChannelAck)) {
+#ifdef LIBALIAS_DEBUG
+				fprintf(stderr,
+				    "PacketAlias/Skinny: Not a skinny packet, packet,OpnRcvChnAckMsg\n");
+#endif
+				return;
+			}
+#ifdef LIBALIAS_DEBUG
+			fprintf(stderr,
+			    "PacketAlias/Skinny: Received open rcv channel msg\n");
+#endif
+			opnrcvchn_ack = (struct OpenReceiveChannelAck *)&sd->msgId;
+			alias_skinny_opnrcvch_ack(la, opnrcvchn_ack, pip, tc, lnk, &lip, direction);
+			break;
+		}
+		case START_MEDIATX: {
+			struct StartMediaTransmission *startmedia_tx;
+
+			if (len < (int)sizeof(struct StartMediaTransmission)) {
+#ifdef LIBALIAS_DEBUG
+				fprintf(stderr,
+				    "PacketAlias/Skinny: Not a skinny packet,StartMediaTx Message\n");
+#endif
+				return;
+			}
+			if (lip == -1) {
+#ifdef LIBALIAS_DEBUG
+				fprintf(stderr,
+				    "PacketAlias/Skinny: received a"
+				    " packet,StartMediaTx Message before"
+				    " packet,OpnRcvChnAckMsg\n"
+#endif
+				return;
+			}
+
+#ifdef LIBALIAS_DEBUG
+			fprintf(stderr,
+			    "PacketAlias/Skinny: Received start media trans msg\n");
+#endif
+			startmedia_tx = (struct StartMediaTransmission *)&sd->msgId;
+			alias_skinny_startmedia(startmedia_tx, pip, tc, lnk, lip, direction);
+			break;
+		}
+		default:
+			break;
+		}
+		/* Place the pointer at the next message in the packet. */
+		dlen -= len + (skinny_hdr_len - sizeof(msgId));
+		sd = (struct skinny_header *)(((char *)&sd->msgId) + len);
+	}
+}
diff --git a/freebsd/sys/netinet/libalias/alias_smedia.c b/freebsd/sys/netinet/libalias/alias_smedia.c
new file mode 100644
index 00000000..3d558a94
--- /dev/null
+++ b/freebsd/sys/netinet/libalias/alias_smedia.c
@@ -0,0 +1,551 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*
+ * alias_smedia.c
+ *
+ * Copyright (c) 2000 Whistle Communications, Inc.
+ * All rights reserved.
+ *
+ * Subject to the following obligations and disclaimer of warranty, use and
+ * redistribution of this software, in source or object code forms, with or
+ * without modifications are expressly permitted by Whistle Communications;
+ * provided, however, that:
+ * 1. Any and all reproductions of the source or object code must include the
+ *    copyright notice above and the following disclaimer of warranties; and
+ * 2. No rights are granted, in any manner or form, to use Whistle
+ *    Communications, Inc. trademarks, including the mark "WHISTLE
+ *    COMMUNICATIONS" on advertising, endorsements, or otherwise except as
+ *    such appears in the above copyright notice or in the software.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND
+ * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO
+ * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE,
+ * INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT.
+ * WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY
+ * REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS
+ * SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE.
+ * IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES
+ * RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING
+ * WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
+ *
+ * Copyright (c) 2000  Junichi SATOH <junichi@astec.co.jp>
+ *                                   <junichi@junichi.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Authors: Erik Salander <erik@whistle.com>
+ *          Junichi SATOH <junichi@astec.co.jp>
+ *                        <junichi@junichi.org>
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+   Alias_smedia.c is meant to contain the aliasing code for streaming media
+   protocols.  It performs special processing for RSTP sessions under TCP.
+   Specifically, when a SETUP request is sent by a client, or a 200 reply
+   is sent by a server, it is intercepted and modified.  The address is
+   changed to the gateway machine and an aliasing port is used.
+
+   More specifically, the "client_port" configuration parameter is
+   parsed for SETUP requests.  The "server_port" configuration parameter is
+   parsed for 200 replies eminating from a server.  This is intended to handle
+   the unicast case.
+
+   RTSP also allows a redirection of a stream to another client by using the
+   "destination" configuration parameter.  The destination config parm would
+   indicate a different IP address.  This function is NOT supported by the
+   RTSP translation code below.
+
+   The RTSP multicast functions without any address translation intervention.
+
+   For this routine to work, the SETUP/200 must fit entirely
+   into a single TCP packet.  This is typically the case, but exceptions
+   can easily be envisioned under the actual specifications.
+
+   Probably the most troubling aspect of the approach taken here is
+   that the new SETUP/200 will typically be a different length, and
+   this causes a certain amount of bookkeeping to keep track of the
+   changes of sequence and acknowledgment numbers, since the client
+   machine is totally unaware of the modification to the TCP stream.
+
+   Initial version:  May, 2000 (eds)
+*/
+
+#ifdef _KERNEL
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/module.h>
+#else
+#include <freebsd/errno.h>
+#include <freebsd/sys/types.h>
+#include <freebsd/stdio.h>
+#include <freebsd/string.h>
+#endif
+
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/tcp.h>
+
+#ifdef _KERNEL
+#include <freebsd/netinet/libalias/alias.h>
+#include <freebsd/netinet/libalias/alias_local.h>
+#include <freebsd/netinet/libalias/alias_mod.h>
+#else
+#include <freebsd/local/alias_local.h>
+#include <freebsd/local/alias_mod.h>
+#endif
+
+#define RTSP_CONTROL_PORT_NUMBER_1 554
+#define RTSP_CONTROL_PORT_NUMBER_2 7070
+#define TFTP_PORT_NUMBER 69
+
+static void
+AliasHandleRtspOut(struct libalias *, struct ip *, struct alias_link *,	
+		  int maxpacketsize);
+static int 
+fingerprint(struct libalias *la, struct alias_data *ah)
+{
+
+	if (ah->dport != NULL && ah->aport != NULL && ah->sport != NULL &&
+            ntohs(*ah->dport) == TFTP_PORT_NUMBER)
+		return (0);
+	if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL || 
+	    ah->maxpktsize == 0)
+		return (-1);
+	if (ntohs(*ah->dport) == RTSP_CONTROL_PORT_NUMBER_1
+	    || ntohs(*ah->sport) == RTSP_CONTROL_PORT_NUMBER_1
+	    || ntohs(*ah->dport) == RTSP_CONTROL_PORT_NUMBER_2
+	    || ntohs(*ah->sport) == RTSP_CONTROL_PORT_NUMBER_2)
+		return (0);
+	return (-1);
+}
+
+static int 
+protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+	if (ntohs(*ah->dport) == TFTP_PORT_NUMBER)
+		FindRtspOut(la, pip->ip_src, pip->ip_dst,
+ 			    *ah->sport, *ah->aport, IPPROTO_UDP);
+	else AliasHandleRtspOut(la, pip, ah->lnk, ah->maxpktsize);	
+	return (0);
+}
+
+struct proto_handler handlers[] = {
+	{ 
+	  .pri = 100, 
+	  .dir = OUT, 
+	  .proto = TCP|UDP,
+	  .fingerprint = &fingerprint, 
+	  .protohandler = &protohandler
+	}, 
+	{ EOH }
+};
+
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+	int error;
+
+	switch (type) {
+	case MOD_LOAD:
+		error = 0;
+		LibAliasAttachHandlers(handlers);
+		break;
+	case MOD_UNLOAD:
+		error = 0;
+		LibAliasDetachHandlers(handlers);
+		break;
+	default:
+		error = EINVAL;
+	}
+	return (error);
+}
+
+#ifdef _KERNEL
+static 
+#endif
+moduledata_t alias_mod = {
+       "alias_smedia", mod_handler, NULL
+};
+
+#ifdef	_KERNEL
+DECLARE_MODULE(alias_smedia, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_smedia, 1);
+MODULE_DEPEND(alias_smedia, libalias, 1, 1, 1);
+#endif
+
+#define RTSP_CONTROL_PORT_NUMBER_1 554
+#define RTSP_CONTROL_PORT_NUMBER_2 7070
+#define RTSP_PORT_GROUP            2
+
+#define ISDIGIT(a) (((a) >= '0') && ((a) <= '9'))
+
+static int
+search_string(char *data, int dlen, const char *search_str)
+{
+	int i, j, k;
+	int search_str_len;
+
+	search_str_len = strlen(search_str);
+	for (i = 0; i < dlen - search_str_len; i++) {
+		for (j = i, k = 0; j < dlen - search_str_len; j++, k++) {
+			if (data[j] != search_str[k] &&
+			    data[j] != search_str[k] - ('a' - 'A')) {
+				break;
+			}
+			if (k == search_str_len - 1) {
+				return (j + 1);
+			}
+		}
+	}
+	return (-1);
+}
+
+static int
+alias_rtsp_out(struct libalias *la, struct ip *pip,
+    struct alias_link *lnk,
+    char *data,
+    const char *port_str)
+{
+	int hlen, tlen, dlen;
+	struct tcphdr *tc;
+	int i, j, pos, state, port_dlen, new_dlen, delta;
+	u_short p[2], new_len;
+	u_short sport, eport, base_port;
+	u_short salias = 0, ealias = 0, base_alias = 0;
+	const char *transport_str = "transport:";
+	char newdata[2048], *port_data, *port_newdata, stemp[80];
+	int links_created = 0, pkt_updated = 0;
+	struct alias_link *rtsp_lnk = NULL;
+	struct in_addr null_addr;
+
+	/* Calculate data length of TCP packet */
+	tc = (struct tcphdr *)ip_next(pip);
+	hlen = (pip->ip_hl + tc->th_off) << 2;
+	tlen = ntohs(pip->ip_len);
+	dlen = tlen - hlen;
+
+	/* Find keyword, "Transport: " */
+	pos = search_string(data, dlen, transport_str);
+	if (pos < 0) {
+		return (-1);
+	}
+	port_data = data + pos;
+	port_dlen = dlen - pos;
+
+	memcpy(newdata, data, pos);
+	port_newdata = newdata + pos;
+
+	while (port_dlen > (int)strlen(port_str)) {
+		/* Find keyword, appropriate port string */
+		pos = search_string(port_data, port_dlen, port_str);
+		if (pos < 0) {
+			break;
+		}
+		memcpy(port_newdata, port_data, pos + 1);
+		port_newdata += (pos + 1);
+
+		p[0] = p[1] = 0;
+		sport = eport = 0;
+		state = 0;
+		for (i = pos; i < port_dlen; i++) {
+			switch (state) {
+			case 0:
+				if (port_data[i] == '=') {
+					state++;
+				}
+				break;
+			case 1:
+				if (ISDIGIT(port_data[i])) {
+					p[0] = p[0] * 10 + port_data[i] - '0';
+				} else {
+					if (port_data[i] == ';') {
+						state = 3;
+					}
+					if (port_data[i] == '-') {
+						state++;
+					}
+				}
+				break;
+			case 2:
+				if (ISDIGIT(port_data[i])) {
+					p[1] = p[1] * 10 + port_data[i] - '0';
+				} else {
+					state++;
+				}
+				break;
+			case 3:
+				base_port = p[0];
+				sport = htons(p[0]);
+				eport = htons(p[1]);
+
+				if (!links_created) {
+
+					links_created = 1;
+					/*
+					 * Find an even numbered port
+					 * number base that satisfies the
+					 * contiguous number of ports we
+					 * need
+					 */
+					null_addr.s_addr = 0;
+					if (0 == (salias = FindNewPortGroup(la, null_addr,
+					    FindAliasAddress(la, pip->ip_src),
+					    sport, 0,
+					    RTSP_PORT_GROUP,
+					    IPPROTO_UDP, 1))) {
+#ifdef LIBALIAS_DEBUG
+						fprintf(stderr,
+						    "PacketAlias/RTSP: Cannot find contiguous RTSP data ports\n");
+#endif
+					} else {
+
+						base_alias = ntohs(salias);
+						for (j = 0; j < RTSP_PORT_GROUP; j++) {
+							/*
+							 * Establish link
+							 * to port found in
+							 * RTSP packet
+							 */
+							rtsp_lnk = FindRtspOut(la, GetOriginalAddress(lnk), null_addr,
+							    htons(base_port + j), htons(base_alias + j),
+							    IPPROTO_UDP);
+							if (rtsp_lnk != NULL) {
+#ifndef NO_FW_PUNCH
+								/*
+								 * Punch
+								 * hole in
+								 * firewall
+								 */
+								PunchFWHole(rtsp_lnk);
+#endif
+							} else {
+#ifdef LIBALIAS_DEBUG
+								fprintf(stderr,
+								    "PacketAlias/RTSP: Cannot allocate RTSP data ports\n");
+#endif
+								break;
+							}
+						}
+					}
+					ealias = htons(base_alias + (RTSP_PORT_GROUP - 1));
+				}
+				if (salias && rtsp_lnk) {
+
+					pkt_updated = 1;
+
+					/* Copy into IP packet */
+					sprintf(stemp, "%d", ntohs(salias));
+					memcpy(port_newdata, stemp, strlen(stemp));
+					port_newdata += strlen(stemp);
+
+					if (eport != 0) {
+						*port_newdata = '-';
+						port_newdata++;
+
+						/* Copy into IP packet */
+						sprintf(stemp, "%d", ntohs(ealias));
+						memcpy(port_newdata, stemp, strlen(stemp));
+						port_newdata += strlen(stemp);
+					}
+					*port_newdata = ';';
+					port_newdata++;
+				}
+				state++;
+				break;
+			}
+			if (state > 3) {
+				break;
+			}
+		}
+		port_data += i;
+		port_dlen -= i;
+	}
+
+	if (!pkt_updated)
+		return (-1);
+
+	memcpy(port_newdata, port_data, port_dlen);
+	port_newdata += port_dlen;
+	*port_newdata = '\0';
+
+	/* Create new packet */
+	new_dlen = port_newdata - newdata;
+	memcpy(data, newdata, new_dlen);
+
+	SetAckModified(lnk);
+	tc = (struct tcphdr *)ip_next(pip);
+	delta = GetDeltaSeqOut(tc->th_seq, lnk);
+	AddSeq(lnk, delta + new_dlen - dlen, pip->ip_hl, pip->ip_len, 
+	    tc->th_seq, tc->th_off);
+
+	new_len = htons(hlen + new_dlen);
+	DifferentialChecksum(&pip->ip_sum,
+	    &new_len,
+	    &pip->ip_len,
+	    1);
+	pip->ip_len = new_len;
+
+	tc->th_sum = 0;
+#ifdef _KERNEL
+	tc->th_x2 = 1;
+#else
+	tc->th_sum = TcpChecksum(pip);
+#endif
+	return (0);
+}
+
+/* Support the protocol used by early versions of RealPlayer */
+
+static int
+alias_pna_out(struct libalias *la, struct ip *pip,
+    struct alias_link *lnk,
+    char *data,
+    int dlen)
+{
+	struct alias_link *pna_links;
+	u_short msg_id, msg_len;
+	char *work;
+	u_short alias_port, port;
+	struct tcphdr *tc;
+
+	work = data;
+	work += 5;
+	while (work + 4 < data + dlen) {
+		memcpy(&msg_id, work, 2);
+		work += 2;
+		memcpy(&msg_len, work, 2);
+		work += 2;
+		if (ntohs(msg_id) == 0) {
+			/* end of options */
+			return (0);
+		}
+		if ((ntohs(msg_id) == 1) || (ntohs(msg_id) == 7)) {
+			memcpy(&port, work, 2);
+			pna_links = FindUdpTcpOut(la, pip->ip_src, GetDestAddress(lnk),
+			    port, 0, IPPROTO_UDP, 1);
+			if (pna_links != NULL) {
+#ifndef NO_FW_PUNCH
+				/* Punch hole in firewall */
+				PunchFWHole(pna_links);
+#endif
+				tc = (struct tcphdr *)ip_next(pip);
+				alias_port = GetAliasPort(pna_links);
+				memcpy(work, &alias_port, 2);
+
+				/* Compute TCP checksum for revised packet */
+				tc->th_sum = 0;
+#ifdef _KERNEL
+				tc->th_x2 = 1;
+#else
+				tc->th_sum = TcpChecksum(pip);
+#endif
+			}
+		}
+		work += ntohs(msg_len);
+	}
+
+	return (0);
+}
+
+static void
+AliasHandleRtspOut(struct libalias *la, struct ip *pip, struct alias_link *lnk, int maxpacketsize)
+{
+	int hlen, tlen, dlen;
+	struct tcphdr *tc;
+	char *data;
+	const char *setup = "SETUP", *pna = "PNA", *str200 = "200";
+	const char *okstr = "OK", *client_port_str = "client_port";
+	const char *server_port_str = "server_port";
+	int i, parseOk;
+
+	(void)maxpacketsize;
+
+	tc = (struct tcphdr *)ip_next(pip);
+	hlen = (pip->ip_hl + tc->th_off) << 2;
+	tlen = ntohs(pip->ip_len);
+	dlen = tlen - hlen;
+
+	data = (char *)pip;
+	data += hlen;
+
+	/* When aliasing a client, check for the SETUP request */
+	if ((ntohs(tc->th_dport) == RTSP_CONTROL_PORT_NUMBER_1) ||
+	    (ntohs(tc->th_dport) == RTSP_CONTROL_PORT_NUMBER_2)) {
+
+		if (dlen >= (int)strlen(setup)) {
+			if (memcmp(data, setup, strlen(setup)) == 0) {
+				alias_rtsp_out(la, pip, lnk, data, client_port_str);
+				return;
+			}
+		}
+		if (dlen >= (int)strlen(pna)) {
+			if (memcmp(data, pna, strlen(pna)) == 0) {
+				alias_pna_out(la, pip, lnk, data, dlen);
+			}
+		}
+	} else {
+
+		/*
+		 * When aliasing a server, check for the 200 reply
+		 * Accomodate varying number of blanks between 200 & OK
+		 */
+
+		if (dlen >= (int)strlen(str200)) {
+
+			for (parseOk = 0, i = 0;
+			    i <= dlen - (int)strlen(str200);
+			    i++) {
+				if (memcmp(&data[i], str200, strlen(str200)) == 0) {
+					parseOk = 1;
+					break;
+				}
+			}
+			if (parseOk) {
+
+				i += strlen(str200);	/* skip string found */
+				while (data[i] == ' ')	/* skip blank(s) */
+					i++;
+
+				if ((dlen - i) >= (int)strlen(okstr)) {
+
+					if (memcmp(&data[i], okstr, strlen(okstr)) == 0)
+						alias_rtsp_out(la, pip, lnk, data, server_port_str);
+
+				}
+			}
+		}
+	}
+}
diff --git a/freebsd/sys/netinet/libalias/alias_util.c b/freebsd/sys/netinet/libalias/alias_util.c
new file mode 100644
index 00000000..1e0c95ae
--- /dev/null
+++ b/freebsd/sys/netinet/libalias/alias_util.c
@@ -0,0 +1,178 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2001 Charles Mott <cm@linktel.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+
+/*
+    Alias_util.c contains general utilities used by other functions
+    in the packet aliasing module.  At the moment, there are functions
+    for computing IP header and TCP packet checksums.
+
+    The checksum routines are based upon example code in a Unix networking
+    text written by Stevens (sorry, I can't remember the title -- but
+    at least this is a good author).
+
+    Initial Version:  August, 1996  (cjm)
+
+    Version 1.7:  January 9, 1997
+	 Added differential checksum update function.
+*/
+
+#ifdef _KERNEL
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/proc.h>
+#else
+#include <freebsd/sys/types.h>
+#include <freebsd/stdio.h>
+#endif
+
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/tcp.h>
+
+#ifdef _KERNEL
+#include <freebsd/netinet/libalias/alias.h>
+#include <freebsd/netinet/libalias/alias_local.h>
+#else
+#include <freebsd/local/alias.h>
+#include <freebsd/local/alias_local.h>
+#endif
+
+/*
+ * Note: the checksum routines assume that the actual checksum word has
+ * been zeroed out.  If the checksum word is filled with the proper value,
+ * then these routines will give a result of zero (useful for testing
+ * purposes);
+ */
+u_short
+LibAliasInternetChecksum(struct libalias *la __unused, u_short * ptr,
+	int nbytes)
+{
+	int sum, oddbyte;
+
+	LIBALIAS_LOCK(la);
+	sum = 0;
+	while (nbytes > 1) {
+		sum += *ptr++;
+		nbytes -= 2;
+	}
+	if (nbytes == 1) {
+		oddbyte = 0;
+		((u_char *) & oddbyte)[0] = *(u_char *) ptr;
+		((u_char *) & oddbyte)[1] = 0;
+		sum += oddbyte;
+	}
+	sum = (sum >> 16) + (sum & 0xffff);
+	sum += (sum >> 16);
+	LIBALIAS_UNLOCK(la);
+	return (~sum);
+}
+
+#ifndef	_KERNEL
+u_short
+IpChecksum(struct ip *pip)
+{
+	return (LibAliasInternetChecksum(NULL, (u_short *) pip,
+	    (pip->ip_hl << 2)));
+
+}
+
+u_short
+TcpChecksum(struct ip *pip)
+{
+	u_short *ptr;
+	struct tcphdr *tc;
+	int nhdr, ntcp, nbytes;
+	int sum, oddbyte;
+
+	nhdr = pip->ip_hl << 2;
+	ntcp = ntohs(pip->ip_len) - nhdr;
+
+	tc = (struct tcphdr *)ip_next(pip);
+	ptr = (u_short *) tc;
+
+/* Add up TCP header and data */
+	nbytes = ntcp;
+	sum = 0;
+	while (nbytes > 1) {
+		sum += *ptr++;
+		nbytes -= 2;
+	}
+	if (nbytes == 1) {
+		oddbyte = 0;
+		((u_char *) & oddbyte)[0] = *(u_char *) ptr;
+		((u_char *) & oddbyte)[1] = 0;
+		sum += oddbyte;
+	}
+/* "Pseudo-header" data */
+	ptr = (void *)&pip->ip_dst;
+	sum += *ptr++;
+	sum += *ptr;
+	ptr = (void *)&pip->ip_src;
+	sum += *ptr++;
+	sum += *ptr;
+	sum += htons((u_short) ntcp);
+	sum += htons((u_short) pip->ip_p);
+
+/* Roll over carry bits */
+	sum = (sum >> 16) + (sum & 0xffff);
+	sum += (sum >> 16);
+
+/* Return checksum */
+	return ((u_short) ~ sum);
+}
+#endif	/* not _KERNEL */
+
+void
+DifferentialChecksum(u_short * cksum, void *newp, void *oldp, int n)
+{
+	int i;
+	int accumulate;
+	u_short *new = newp;
+	u_short *old = oldp;
+
+	accumulate = *cksum;
+	for (i = 0; i < n; i++) {
+		accumulate -= *new++;
+		accumulate += *old++;
+	}
+
+	if (accumulate < 0) {
+		accumulate = -accumulate;
+		accumulate = (accumulate >> 16) + (accumulate & 0xffff);
+		accumulate += accumulate >> 16;
+		*cksum = (u_short) ~ accumulate;
+	} else {
+		accumulate = (accumulate >> 16) + (accumulate & 0xffff);
+		accumulate += accumulate >> 16;
+		*cksum = (u_short) accumulate;
+	}
+}
diff --git a/freebsd/sys/netinet/pim.h b/freebsd/sys/netinet/pim.h
new file mode 100644
index 00000000..2f887cc2
--- /dev/null
+++ b/freebsd/sys/netinet/pim.h
@@ -0,0 +1,119 @@
+/*-
+ * Copyright (c) 1996-2000
+ * University of Southern California/Information Sciences Institute.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_PIM_HH_
+#define _NETINET_PIM_HH_
+
+/*
+ * Protocol Independent Multicast (PIM) definitions.
+ * RFC 2362, June 1998.
+ *
+ * Written by Ahmed Helmy, USC/SGI, July 1996.
+ * Modified by George Edmond Eddy (Rusty), ISI, February 1998.
+ * Modified by Pavlin Radoslavov, USC/ISI, May 1998, October 2000.
+ */
+
+#include <freebsd/sys/types.h>
+
+#ifndef _PIM_VT
+#ifndef BYTE_ORDER
+# error BYTE_ORDER is not defined!
+#endif
+#if (BYTE_ORDER != BIG_ENDIAN) && (BYTE_ORDER != LITTLE_ENDIAN)
+# error BYTE_ORDER must be defined to either BIG_ENDIAN or LITTLE_ENDIAN
+#endif
+#endif /* ! _PIM_VT */
+
+/*
+ * PIM packet header
+ */
+struct pim {
+#ifdef _PIM_VT
+	uint8_t		pim_vt;		/* PIM version and message type	*/
+#else /* ! _PIM_VT   */
+#if BYTE_ORDER == BIG_ENDIAN
+	u_int		pim_vers:4,	/* PIM protocol version		*/
+			pim_type:4;	/* PIM message type		*/
+#endif
+#if BYTE_ORDER == LITTLE_ENDIAN
+	u_int		pim_type:4,	/* PIM message type		*/
+			pim_vers:4;	/* PIM protocol version		*/
+#endif
+#endif /* ! _PIM_VT  */
+	uint8_t		pim_reserved;	/* Reserved			*/
+	uint16_t	pim_cksum;	/* IP-style checksum		*/
+};
+/* KAME-related name backward compatibility */
+#define pim_ver pim_vers
+#define pim_rsv pim_reserved
+
+#ifdef _PIM_VT
+#define PIM_MAKE_VT(v, t)	(0xff & (((v) << 4) | (0x0f & (t))))
+#define PIM_VT_V(x)		(((x) >> 4) & 0x0f)
+#define PIM_VT_T(x)		((x) & 0x0f)
+#endif /* _PIM_VT */
+
+#define PIM_VERSION		2
+#define PIM_MINLEN		8	/* PIM message min. length	*/
+#define PIM_REG_MINLEN	(PIM_MINLEN+20)	/* PIM Register hdr + inner IPv4 hdr */
+#define PIM6_REG_MINLEN	(PIM_MINLEN+40)	/* PIM Register hdr + inner IPv6 hdr */
+
+/*
+ * PIM message types
+ */
+#define PIM_HELLO		0x0	/* PIM-SM and PIM-DM		*/
+#define PIM_REGISTER		0x1	/* PIM-SM only			*/
+#define PIM_REGISTER_STOP	0x2	/* PIM-SM only			*/
+#define PIM_JOIN_PRUNE		0x3	/* PIM-SM and PIM-DM		*/
+#define PIM_BOOTSTRAP		0x4	/* PIM-SM only			*/
+#define PIM_ASSERT		0x5	/* PIM-SM and PIM-DM		*/
+#define PIM_GRAFT		0x6	/* PIM-DM only			*/
+#define PIM_GRAFT_ACK		0x7	/* PIM-DM only			*/
+#define PIM_CAND_RP_ADV		0x8	/* PIM-SM only			*/
+#define PIM_ALL_DF_ELECTION	0xa	/* Bidir-PIM-SM only		*/
+
+/*
+ * PIM-Register message flags
+ */
+#define PIM_BORDER_REGISTER 0x80000000U	/* The Border bit (host-order)	*/
+#define PIM_NULL_REGISTER   0x40000000U	/* The Null-Register bit (host-order)*/
+
+/*
+ * All-PIM-Routers IPv4 and IPv6 multicast addresses
+ */
+#define INADDR_ALLPIM_ROUTERS_GROUP	(uint32_t)0xe000000dU  /* 224.0.0.13 */
+#define IN6ADDR_LINKLOCAL_ALLPIM_ROUTERS	"ff02::d"
+#define IN6ADDR_LINKLOCAL_ALLPIM_ROUTERS_INIT				\
+	{{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,		\
+	    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d }}}
+
+#endif /* _NETINET_PIM_HH_ */
diff --git a/freebsd/sys/netinet/pim_var.h b/freebsd/sys/netinet/pim_var.h
new file mode 100644
index 00000000..9d80bbb2
--- /dev/null
+++ b/freebsd/sys/netinet/pim_var.h
@@ -0,0 +1,84 @@
+/*-
+ * Copyright (c) 1998-2000
+ * University of Southern California/Information Sciences Institute.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_PIM_VAR_HH_
+#define _NETINET_PIM_VAR_HH_
+
+/*
+ * Protocol Independent Multicast (PIM),
+ * kernel variables and implementation-specific definitions.
+ *
+ * Written by George Edmond Eddy (Rusty), ISI, February 1998.
+ * Modified by Pavlin Radoslavov, USC/ISI, May 1998, Aug 1999, October 2000.
+ * Modified by Hitoshi Asaeda, WIDE, August 1998.
+ */
+
+/*
+ * PIM statistics kept in the kernel
+ */
+struct pimstat {
+	u_quad_t pims_rcv_total_msgs;	   /* total PIM messages received    */
+	u_quad_t pims_rcv_total_bytes;	   /* total PIM bytes received	     */
+	u_quad_t pims_rcv_tooshort;	   /* rcvd with too few bytes	     */
+	u_quad_t pims_rcv_badsum;	   /* rcvd with bad checksum	     */
+	u_quad_t pims_rcv_badversion;	   /* rcvd bad PIM version	     */
+	u_quad_t pims_rcv_registers_msgs;  /* rcvd regs. msgs (data only)    */
+	u_quad_t pims_rcv_registers_bytes; /* rcvd regs. bytes (data only)   */
+	u_quad_t pims_rcv_registers_wrongiif; /* rcvd regs. on wrong iif     */
+	u_quad_t pims_rcv_badregisters;	   /* rcvd invalid registers	     */
+	u_quad_t pims_snd_registers_msgs;  /* sent regs. msgs (data only)    */
+	u_quad_t pims_snd_registers_bytes; /* sent regs. bytes (data only)   */
+};
+
+#ifdef _KERNEL
+#define	PIMSTAT_ADD(name, val)	V_pimstat.name += (val)
+#define	PIMSTAT_INC(name)	PIMSTAT_ADD(name, 1)
+#endif
+
+/*
+ * Names for PIM sysctl objects
+ */
+#define PIMCTL_STATS		1	/* statistics (read-only) */
+#define PIMCTL_MAXID		2
+
+#define PIMCTL_NAMES {			\
+	{ 0, 0 },			\
+	{ "stats", CTLTYPE_STRUCT },	\
+}
+
+#ifdef _KERNEL
+
+void pim_input(struct mbuf *, int);
+SYSCTL_DECL(_net_inet_pim);
+#endif
+
+#endif /* _NETINET_PIM_VAR_HH_ */
diff --git a/freebsd/sys/netinet/raw_ip.c b/freebsd/sys/netinet/raw_ip.c
new file mode 100644
index 00000000..fb90880f
--- /dev/null
+++ b/freebsd/sys/netinet/raw_ip.c
@@ -0,0 +1,1116 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_ipsec.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/jail.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/rwlock.h>
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/sx.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/systm.h>
+
+#include <freebsd/vm/uma.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_mroute.h>
+
+#ifdef IPSEC
+#include <freebsd/netipsec/ipsec.h>
+#endif /*IPSEC*/
+
+#include <freebsd/security/mac/mac_framework.h>
+
+VNET_DEFINE(struct inpcbhead, ripcb);
+VNET_DEFINE(struct inpcbinfo, ripcbinfo);
+
+#define	V_ripcb			VNET(ripcb)
+#define	V_ripcbinfo		VNET(ripcbinfo)
+
+/*
+ * Control and data hooks for ipfw, dummynet, divert and so on.
+ * The data hooks are not used here but it is convenient
+ * to keep them all in one place.
+ */
+VNET_DEFINE(ip_fw_chk_ptr_t, ip_fw_chk_ptr) = NULL;
+VNET_DEFINE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr) = NULL;
+
+int	(*ip_dn_ctl_ptr)(struct sockopt *);
+int	(*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *);
+void	(*ip_divert_ptr)(struct mbuf *, int);
+int	(*ng_ipfw_input_p)(struct mbuf **, int,
+			struct ip_fw_args *, int);
+
+/*
+ * Hooks for multicast routing. They all default to NULL, so leave them not
+ * initialized and rely on BSS being set to 0.
+ */
+
+/*
+ * The socket used to communicate with the multicast routing daemon.
+ */
+VNET_DEFINE(struct socket *, ip_mrouter);
+
+/*
+ * The various mrouter and rsvp functions.
+ */
+int (*ip_mrouter_set)(struct socket *, struct sockopt *);
+int (*ip_mrouter_get)(struct socket *, struct sockopt *);
+int (*ip_mrouter_done)(void);
+int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
+		   struct ip_moptions *);
+int (*mrt_ioctl)(u_long, caddr_t, int);
+int (*legal_vif_num)(int);
+u_long (*ip_mcast_src)(int);
+
+void (*rsvp_input_p)(struct mbuf *m, int off);
+int (*ip_rsvp_vif)(struct socket *, struct sockopt *);
+void (*ip_rsvp_force_done)(struct socket *);
+
+/*
+ * Hash functions
+ */
+
+#define INP_PCBHASH_RAW_SIZE	256
+#define INP_PCBHASH_RAW(proto, laddr, faddr, mask) \
+        (((proto) + (laddr) + (faddr)) % (mask) + 1)
+
+static void
+rip_inshash(struct inpcb *inp)
+{
+	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
+	struct inpcbhead *pcbhash;
+	int hash;
+
+	INP_INFO_WLOCK_ASSERT(pcbinfo);
+	INP_WLOCK_ASSERT(inp);
+	
+	if (inp->inp_ip_p != 0 &&
+	    inp->inp_laddr.s_addr != INADDR_ANY &&
+	    inp->inp_faddr.s_addr != INADDR_ANY) {
+		hash = INP_PCBHASH_RAW(inp->inp_ip_p, inp->inp_laddr.s_addr,
+		    inp->inp_faddr.s_addr, pcbinfo->ipi_hashmask);
+	} else
+		hash = 0;
+	pcbhash = &pcbinfo->ipi_hashbase[hash];
+	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
+}
+
+static void
+rip_delhash(struct inpcb *inp)
+{
+
+	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
+	INP_WLOCK_ASSERT(inp);
+
+	LIST_REMOVE(inp, inp_hash);
+}
+
+/*
+ * Raw interface to IP protocol.
+ */
+
+/*
+ * Initialize raw connection block q.
+ */
+static void
+rip_zone_change(void *tag)
+{
+
+	uma_zone_set_max(V_ripcbinfo.ipi_zone, maxsockets);
+}
+
+static int
+rip_inpcb_init(void *mem, int size, int flags)
+{
+	struct inpcb *inp = mem;
+
+	INP_LOCK_INIT(inp, "inp", "rawinp");
+	return (0);
+}
+
+void
+rip_init(void)
+{
+
+	INP_INFO_LOCK_INIT(&V_ripcbinfo, "rip");
+	LIST_INIT(&V_ripcb);
+#ifdef VIMAGE
+	V_ripcbinfo.ipi_vnet = curvnet;
+#endif
+	V_ripcbinfo.ipi_listhead = &V_ripcb;
+	V_ripcbinfo.ipi_hashbase =
+	    hashinit(INP_PCBHASH_RAW_SIZE, M_PCB, &V_ripcbinfo.ipi_hashmask);
+	V_ripcbinfo.ipi_porthashbase =
+	    hashinit(1, M_PCB, &V_ripcbinfo.ipi_porthashmask);
+	V_ripcbinfo.ipi_zone = uma_zcreate("ripcb", sizeof(struct inpcb),
+	    NULL, NULL, rip_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+	uma_zone_set_max(V_ripcbinfo.ipi_zone, maxsockets);
+	EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL,
+	    EVENTHANDLER_PRI_ANY);
+}
+
+#ifdef VIMAGE
+void
+rip_destroy(void)
+{
+
+	hashdestroy(V_ripcbinfo.ipi_hashbase, M_PCB,
+	    V_ripcbinfo.ipi_hashmask);
+	hashdestroy(V_ripcbinfo.ipi_porthashbase, M_PCB,
+	    V_ripcbinfo.ipi_porthashmask);
+}
+#endif
+
+static int
+rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n,
+    struct sockaddr_in *ripsrc)
+{
+	int policyfail = 0;
+
+	INP_RLOCK_ASSERT(last);
+
+#ifdef IPSEC
+	/* check AH/ESP integrity. */
+	if (ipsec4_in_reject(n, last)) {
+		policyfail = 1;
+	}
+#endif /* IPSEC */
+#ifdef MAC
+	if (!policyfail && mac_inpcb_check_deliver(last, n) != 0)
+		policyfail = 1;
+#endif
+	/* Check the minimum TTL for socket. */
+	if (last->inp_ip_minttl && last->inp_ip_minttl > ip->ip_ttl)
+		policyfail = 1;
+	if (!policyfail) {
+		struct mbuf *opts = NULL;
+		struct socket *so;
+
+		so = last->inp_socket;
+		if ((last->inp_flags & INP_CONTROLOPTS) ||
+		    (so->so_options & (SO_TIMESTAMP | SO_BINTIME)))
+			ip_savecontrol(last, &opts, ip, n);
+		SOCKBUF_LOCK(&so->so_rcv);
+		if (sbappendaddr_locked(&so->so_rcv,
+		    (struct sockaddr *)ripsrc, n, opts) == 0) {
+			/* should notify about lost packet */
+			m_freem(n);
+			if (opts)
+				m_freem(opts);
+			SOCKBUF_UNLOCK(&so->so_rcv);
+		} else
+			sorwakeup_locked(so);
+	} else
+		m_freem(n);
+	return (policyfail);
+}
+
+/*
+ * Setup generic address and protocol structures for raw_input routine, then
+ * pass them along with mbuf chain.
+ */
+void
+rip_input(struct mbuf *m, int off)
+{
+	struct ifnet *ifp;
+	struct ip *ip = mtod(m, struct ip *);
+	int proto = ip->ip_p;
+	struct inpcb *inp, *last;
+	struct sockaddr_in ripsrc;
+	int hash;
+
+	bzero(&ripsrc, sizeof(ripsrc));
+	ripsrc.sin_len = sizeof(ripsrc);
+	ripsrc.sin_family = AF_INET;
+	ripsrc.sin_addr = ip->ip_src;
+	last = NULL;
+
+	ifp = m->m_pkthdr.rcvif;
+
+	hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr,
+	    ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask);
+	INP_INFO_RLOCK(&V_ripcbinfo);
+	LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) {
+		if (inp->inp_ip_p != proto)
+			continue;
+#ifdef INET6
+		/* XXX inp locking */
+		if ((inp->inp_vflag & INP_IPV4) == 0)
+			continue;
+#endif
+		if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
+			continue;
+		if (inp->inp_faddr.s_addr != ip->ip_src.s_addr)
+			continue;
+		if (jailed_without_vnet(inp->inp_cred)) {
+			/*
+			 * XXX: If faddr was bound to multicast group,
+			 * jailed raw socket will drop datagram.
+			 */
+			if (prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
+				continue;
+		}
+		if (last != NULL) {
+			struct mbuf *n;
+
+			n = m_copy(m, 0, (int)M_COPYALL);
+			if (n != NULL)
+		    	    (void) rip_append(last, ip, n, &ripsrc);
+			/* XXX count dropped packet */
+			INP_RUNLOCK(last);
+		}
+		INP_RLOCK(inp);
+		last = inp;
+	}
+	LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[0], inp_hash) {
+		if (inp->inp_ip_p && inp->inp_ip_p != proto)
+			continue;
+#ifdef INET6
+		/* XXX inp locking */
+		if ((inp->inp_vflag & INP_IPV4) == 0)
+			continue;
+#endif
+		if (!in_nullhost(inp->inp_laddr) &&
+		    !in_hosteq(inp->inp_laddr, ip->ip_dst))
+			continue;
+		if (!in_nullhost(inp->inp_faddr) &&
+		    !in_hosteq(inp->inp_faddr, ip->ip_src))
+			continue;
+		if (jailed_without_vnet(inp->inp_cred)) {
+			/*
+			 * Allow raw socket in jail to receive multicast;
+			 * assume process had PRIV_NETINET_RAW at attach,
+			 * and fall through into normal filter path if so.
+			 */
+			if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
+			    prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
+				continue;
+		}
+		/*
+		 * If this raw socket has multicast state, and we
+		 * have received a multicast, check if this socket
+		 * should receive it, as multicast filtering is now
+		 * the responsibility of the transport layer.
+		 */
+		if (inp->inp_moptions != NULL &&
+		    IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
+			/*
+			 * If the incoming datagram is for IGMP, allow it
+			 * through unconditionally to the raw socket.
+			 *
+			 * In the case of IGMPv2, we may not have explicitly
+			 * joined the group, and may have set IFF_ALLMULTI
+			 * on the interface. imo_multi_filter() may discard
+			 * control traffic we actually need to see.
+			 *
+			 * Userland multicast routing daemons should continue
+			 * filter the control traffic appropriately.
+			 */
+			int blocked;
+
+			blocked = MCAST_PASS;
+			if (proto != IPPROTO_IGMP) {
+				struct sockaddr_in group;
+
+				bzero(&group, sizeof(struct sockaddr_in));
+				group.sin_len = sizeof(struct sockaddr_in);
+				group.sin_family = AF_INET;
+				group.sin_addr = ip->ip_dst;
+
+				blocked = imo_multi_filter(inp->inp_moptions,
+				    ifp,
+				    (struct sockaddr *)&group,
+				    (struct sockaddr *)&ripsrc);
+			}
+
+			if (blocked != MCAST_PASS) {
+				IPSTAT_INC(ips_notmember);
+				continue;
+			}
+		}
+		if (last != NULL) {
+			struct mbuf *n;
+
+			n = m_copy(m, 0, (int)M_COPYALL);
+			if (n != NULL)
+				(void) rip_append(last, ip, n, &ripsrc);
+			/* XXX count dropped packet */
+			INP_RUNLOCK(last);
+		}
+		INP_RLOCK(inp);
+		last = inp;
+	}
+	INP_INFO_RUNLOCK(&V_ripcbinfo);
+	if (last != NULL) {
+		if (rip_append(last, ip, m, &ripsrc) != 0)
+			IPSTAT_INC(ips_delivered);
+		INP_RUNLOCK(last);
+	} else {
+		m_freem(m);
+		IPSTAT_INC(ips_noproto);
+		IPSTAT_DEC(ips_delivered);
+	}
+}
+
+/*
+ * Generate IP header and pass packet to ip_output.  Tack on options user may
+ * have setup with control call.
+ */
+int
+rip_output(struct mbuf *m, struct socket *so, u_long dst)
+{
+	struct ip *ip;
+	int error;
+	struct inpcb *inp = sotoinpcb(so);
+	int flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) |
+	    IP_ALLOWBROADCAST;
+
+	/*
+	 * If the user handed us a complete IP packet, use it.  Otherwise,
+	 * allocate an mbuf for a header and fill it in.
+	 */
+	if ((inp->inp_flags & INP_HDRINCL) == 0) {
+		if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
+			m_freem(m);
+			return(EMSGSIZE);
+		}
+		M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
+		if (m == NULL)
+			return(ENOBUFS);
+
+		INP_RLOCK(inp);
+		ip = mtod(m, struct ip *);
+		ip->ip_tos = inp->inp_ip_tos;
+		if (inp->inp_flags & INP_DONTFRAG)
+			ip->ip_off = IP_DF;
+		else
+			ip->ip_off = 0;
+		ip->ip_p = inp->inp_ip_p;
+		ip->ip_len = m->m_pkthdr.len;
+		ip->ip_src = inp->inp_laddr;
+		if (jailed(inp->inp_cred)) {
+			/*
+			 * prison_local_ip4() would be good enough but would
+			 * let a source of INADDR_ANY pass, which we do not
+			 * want to see from jails. We do not go through the
+			 * pain of in_pcbladdr() for raw sockets.
+			 */
+			if (ip->ip_src.s_addr == INADDR_ANY)
+				error = prison_get_ip4(inp->inp_cred,
+				    &ip->ip_src);
+			else
+				error = prison_local_ip4(inp->inp_cred,
+				    &ip->ip_src);
+			if (error != 0) {
+				INP_RUNLOCK(inp);
+				m_freem(m);
+				return (error);
+			}
+		}
+		ip->ip_dst.s_addr = dst;
+		ip->ip_ttl = inp->inp_ip_ttl;
+	} else {
+		if (m->m_pkthdr.len > IP_MAXPACKET) {
+			m_freem(m);
+			return(EMSGSIZE);
+		}
+		INP_RLOCK(inp);
+		ip = mtod(m, struct ip *);
+		error = prison_check_ip4(inp->inp_cred, &ip->ip_src);
+		if (error != 0) {
+			INP_RUNLOCK(inp);
+			m_freem(m);
+			return (error);
+		}
+
+		/*
+		 * Don't allow both user specified and setsockopt options,
+		 * and don't allow packet length sizes that will crash.
+		 */
+		if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options)
+		    || (ip->ip_len > m->m_pkthdr.len)
+		    || (ip->ip_len < (ip->ip_hl << 2))) {
+			INP_RUNLOCK(inp);
+			m_freem(m);
+			return (EINVAL);
+		}
+		if (ip->ip_id == 0)
+			ip->ip_id = ip_newid();
+
+		/*
+		 * XXX prevent ip_output from overwriting header fields.
+		 */
+		flags |= IP_RAWOUTPUT;
+		IPSTAT_INC(ips_rawout);
+	}
+
+	if (inp->inp_flags & INP_ONESBCAST)
+		flags |= IP_SENDONES;
+
+#ifdef MAC
+	mac_inpcb_create_mbuf(inp, m);
+#endif
+
+	error = ip_output(m, inp->inp_options, NULL, flags,
+	    inp->inp_moptions, inp);
+	INP_RUNLOCK(inp);
+	return (error);
+}
+
+/*
+ * Raw IP socket option processing.
+ *
+ * IMPORTANT NOTE regarding access control: Traditionally, raw sockets could
+ * only be created by a privileged process, and as such, socket option
+ * operations to manage system properties on any raw socket were allowed to
+ * take place without explicit additional access control checks.  However,
+ * raw sockets can now also be created in jail(), and therefore explicit
+ * checks are now required.  Likewise, raw sockets can be used by a process
+ * after it gives up privilege, so some caution is required.  For options
+ * passed down to the IP layer via ip_ctloutput(), checks are assumed to be
+ * performed in ip_ctloutput() and therefore no check occurs here.
+ * Unilaterally checking priv_check() here breaks normal IP socket option
+ * operations on raw sockets.
+ *
+ * When adding new socket options here, make sure to add access control
+ * checks here as necessary.
+ */
+int
+rip_ctloutput(struct socket *so, struct sockopt *sopt)
+{
+	struct	inpcb *inp = sotoinpcb(so);
+	int	error, optval;
+
+	if (sopt->sopt_level != IPPROTO_IP) {
+		if ((sopt->sopt_level == SOL_SOCKET) &&
+		    (sopt->sopt_name == SO_SETFIB)) {
+			inp->inp_inc.inc_fibnum = so->so_fibnum;
+			return (0);
+		}
+		return (EINVAL);
+	}
+
+	error = 0;
+	switch (sopt->sopt_dir) {
+	case SOPT_GET:
+		switch (sopt->sopt_name) {
+		case IP_HDRINCL:
+			optval = inp->inp_flags & INP_HDRINCL;
+			error = sooptcopyout(sopt, &optval, sizeof optval);
+			break;
+
+		case IP_FW3:	/* generic ipfw v.3 functions */
+		case IP_FW_ADD:	/* ADD actually returns the body... */
+		case IP_FW_GET:
+		case IP_FW_TABLE_GETSIZE:
+		case IP_FW_TABLE_LIST:
+		case IP_FW_NAT_GET_CONFIG:
+		case IP_FW_NAT_GET_LOG:
+			if (V_ip_fw_ctl_ptr != NULL)
+				error = V_ip_fw_ctl_ptr(sopt);
+			else
+				error = ENOPROTOOPT;
+			break;
+
+		case IP_DUMMYNET3:	/* generic dummynet v.3 functions */
+		case IP_DUMMYNET_GET:
+			if (ip_dn_ctl_ptr != NULL)
+				error = ip_dn_ctl_ptr(sopt);
+			else
+				error = ENOPROTOOPT;
+			break ;
+
+		case MRT_INIT:
+		case MRT_DONE:
+		case MRT_ADD_VIF:
+		case MRT_DEL_VIF:
+		case MRT_ADD_MFC:
+		case MRT_DEL_MFC:
+		case MRT_VERSION:
+		case MRT_ASSERT:
+		case MRT_API_SUPPORT:
+		case MRT_API_CONFIG:
+		case MRT_ADD_BW_UPCALL:
+		case MRT_DEL_BW_UPCALL:
+			error = priv_check(curthread, PRIV_NETINET_MROUTE);
+			if (error != 0)
+				return (error);
+			error = ip_mrouter_get ? ip_mrouter_get(so, sopt) :
+				EOPNOTSUPP;
+			break;
+
+		default:
+			error = ip_ctloutput(so, sopt);
+			break;
+		}
+		break;
+
+	case SOPT_SET:
+		switch (sopt->sopt_name) {
+		case IP_HDRINCL:
+			error = sooptcopyin(sopt, &optval, sizeof optval,
+					    sizeof optval);
+			if (error)
+				break;
+			if (optval)
+				inp->inp_flags |= INP_HDRINCL;
+			else
+				inp->inp_flags &= ~INP_HDRINCL;
+			break;
+
+		case IP_FW3:	/* generic ipfw v.3 functions */
+		case IP_FW_ADD:
+		case IP_FW_DEL:
+		case IP_FW_FLUSH:
+		case IP_FW_ZERO:
+		case IP_FW_RESETLOG:
+		case IP_FW_TABLE_ADD:
+		case IP_FW_TABLE_DEL:
+		case IP_FW_TABLE_FLUSH:
+		case IP_FW_NAT_CFG:
+		case IP_FW_NAT_DEL:
+			if (V_ip_fw_ctl_ptr != NULL)
+				error = V_ip_fw_ctl_ptr(sopt);
+			else
+				error = ENOPROTOOPT;
+			break;
+
+		case IP_DUMMYNET3:	/* generic dummynet v.3 functions */
+		case IP_DUMMYNET_CONFIGURE:
+		case IP_DUMMYNET_DEL:
+		case IP_DUMMYNET_FLUSH:
+			if (ip_dn_ctl_ptr != NULL)
+				error = ip_dn_ctl_ptr(sopt);
+			else
+				error = ENOPROTOOPT ;
+			break ;
+
+		case IP_RSVP_ON:
+			error = priv_check(curthread, PRIV_NETINET_MROUTE);
+			if (error != 0)
+				return (error);
+			error = ip_rsvp_init(so);
+			break;
+
+		case IP_RSVP_OFF:
+			error = priv_check(curthread, PRIV_NETINET_MROUTE);
+			if (error != 0)
+				return (error);
+			error = ip_rsvp_done();
+			break;
+
+		case IP_RSVP_VIF_ON:
+		case IP_RSVP_VIF_OFF:
+			error = priv_check(curthread, PRIV_NETINET_MROUTE);
+			if (error != 0)
+				return (error);
+			error = ip_rsvp_vif ?
+				ip_rsvp_vif(so, sopt) : EINVAL;
+			break;
+
+		case MRT_INIT:
+		case MRT_DONE:
+		case MRT_ADD_VIF:
+		case MRT_DEL_VIF:
+		case MRT_ADD_MFC:
+		case MRT_DEL_MFC:
+		case MRT_VERSION:
+		case MRT_ASSERT:
+		case MRT_API_SUPPORT:
+		case MRT_API_CONFIG:
+		case MRT_ADD_BW_UPCALL:
+		case MRT_DEL_BW_UPCALL:
+			error = priv_check(curthread, PRIV_NETINET_MROUTE);
+			if (error != 0)
+				return (error);
+			error = ip_mrouter_set ? ip_mrouter_set(so, sopt) :
+					EOPNOTSUPP;
+			break;
+
+		default:
+			error = ip_ctloutput(so, sopt);
+			break;
+		}
+		break;
+	}
+
+	return (error);
+}
+
+/*
+ * This function exists solely to receive the PRC_IFDOWN messages which are
+ * sent by if_down().  It looks for an ifaddr whose ifa_addr is sa, and calls
+ * in_ifadown() to remove all routes corresponding to that address.  It also
+ * receives the PRC_IFUP messages from if_up() and reinstalls the interface
+ * routes.
+ */
+void
+rip_ctlinput(int cmd, struct sockaddr *sa, void *vip)
+{
+	struct in_ifaddr *ia;
+	struct ifnet *ifp;
+	int err;
+	int flags;
+
+	switch (cmd) {
+	case PRC_IFDOWN:
+		IN_IFADDR_RLOCK();
+		TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
+			if (ia->ia_ifa.ifa_addr == sa
+			    && (ia->ia_flags & IFA_ROUTE)) {
+				ifa_ref(&ia->ia_ifa);
+				IN_IFADDR_RUNLOCK();
+				/*
+				 * in_ifscrub kills the interface route.
+				 */
+				in_ifscrub(ia->ia_ifp, ia);
+				/*
+				 * in_ifadown gets rid of all the rest of the
+				 * routes.  This is not quite the right thing
+				 * to do, but at least if we are running a
+				 * routing process they will come back.
+				 */
+				in_ifadown(&ia->ia_ifa, 0);
+				ifa_free(&ia->ia_ifa);
+				break;
+			}
+		}
+		if (ia == NULL)		/* If ia matched, already unlocked. */
+			IN_IFADDR_RUNLOCK();
+		break;
+
+	case PRC_IFUP:
+		IN_IFADDR_RLOCK();
+		TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
+			if (ia->ia_ifa.ifa_addr == sa)
+				break;
+		}
+		if (ia == NULL || (ia->ia_flags & IFA_ROUTE)) {
+			IN_IFADDR_RUNLOCK();
+			return;
+		}
+		ifa_ref(&ia->ia_ifa);
+		IN_IFADDR_RUNLOCK();
+		flags = RTF_UP;
+		ifp = ia->ia_ifa.ifa_ifp;
+
+		if ((ifp->if_flags & IFF_LOOPBACK)
+		    || (ifp->if_flags & IFF_POINTOPOINT))
+			flags |= RTF_HOST;
+
+		err = rtinit(&ia->ia_ifa, RTM_ADD, flags);
+		if (err == 0)
+			ia->ia_flags |= IFA_ROUTE;
+		err = ifa_add_loopback_route((struct ifaddr *)ia, sa);
+		ifa_free(&ia->ia_ifa);
+		break;
+	}
+}
+
+u_long	rip_sendspace = 9216;
+u_long	rip_recvspace = 9216;
+
+SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
+    &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
+SYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
+    &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams");
+
+static int
+rip_attach(struct socket *so, int proto, struct thread *td)
+{
+	struct inpcb *inp;
+	int error;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp == NULL, ("rip_attach: inp != NULL"));
+
+	error = priv_check(td, PRIV_NETINET_RAW);
+	if (error)
+		return (error);
+	if (proto >= IPPROTO_MAX || proto < 0)
+		return EPROTONOSUPPORT;
+	error = soreserve(so, rip_sendspace, rip_recvspace);
+	if (error)
+		return (error);
+	INP_INFO_WLOCK(&V_ripcbinfo);
+	error = in_pcballoc(so, &V_ripcbinfo);
+	if (error) {
+		INP_INFO_WUNLOCK(&V_ripcbinfo);
+		return (error);
+	}
+	inp = (struct inpcb *)so->so_pcb;
+	inp->inp_vflag |= INP_IPV4;
+	inp->inp_ip_p = proto;
+	inp->inp_ip_ttl = V_ip_defttl;
+	rip_inshash(inp);
+	INP_INFO_WUNLOCK(&V_ripcbinfo);
+	INP_WUNLOCK(inp);
+	return (0);
+}
+
+static void
+rip_detach(struct socket *so)
+{
+	struct inpcb *inp;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("rip_detach: inp == NULL"));
+	KASSERT(inp->inp_faddr.s_addr == INADDR_ANY, 
+	    ("rip_detach: not closed"));
+
+	INP_INFO_WLOCK(&V_ripcbinfo);
+	INP_WLOCK(inp);
+	rip_delhash(inp);
+	if (so == V_ip_mrouter && ip_mrouter_done)
+		ip_mrouter_done();
+	if (ip_rsvp_force_done)
+		ip_rsvp_force_done(so);
+	if (so == V_ip_rsvpd)
+		ip_rsvp_done();
+	in_pcbdetach(inp);
+	in_pcbfree(inp);
+	INP_INFO_WUNLOCK(&V_ripcbinfo);
+}
+
+static void
+rip_dodisconnect(struct socket *so, struct inpcb *inp)
+{
+
+	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
+	INP_WLOCK_ASSERT(inp);
+
+	rip_delhash(inp);
+	inp->inp_faddr.s_addr = INADDR_ANY;
+	rip_inshash(inp);
+	SOCK_LOCK(so);
+	so->so_state &= ~SS_ISCONNECTED;
+	SOCK_UNLOCK(so);
+}
+
+static void
+rip_abort(struct socket *so)
+{
+	struct inpcb *inp;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("rip_abort: inp == NULL"));
+
+	INP_INFO_WLOCK(&V_ripcbinfo);
+	INP_WLOCK(inp);
+	rip_dodisconnect(so, inp);
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_ripcbinfo);
+}
+
+static void
+rip_close(struct socket *so)
+{
+	struct inpcb *inp;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("rip_close: inp == NULL"));
+
+	INP_INFO_WLOCK(&V_ripcbinfo);
+	INP_WLOCK(inp);
+	rip_dodisconnect(so, inp);
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_ripcbinfo);
+}
+
+static int
+rip_disconnect(struct socket *so)
+{
+	struct inpcb *inp;
+
+	if ((so->so_state & SS_ISCONNECTED) == 0)
+		return (ENOTCONN);
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("rip_disconnect: inp == NULL"));
+
+	INP_INFO_WLOCK(&V_ripcbinfo);
+	INP_WLOCK(inp);
+	rip_dodisconnect(so, inp);
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_ripcbinfo);
+	return (0);
+}
+
+static int
+rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
+	struct inpcb *inp;
+	int error;
+
+	if (nam->sa_len != sizeof(*addr))
+		return (EINVAL);
+
+	error = prison_check_ip4(td->td_ucred, &addr->sin_addr);
+	if (error != 0)
+		return (error);
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("rip_bind: inp == NULL"));
+
+	if (TAILQ_EMPTY(&V_ifnet) ||
+	    (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) ||
+	    (addr->sin_addr.s_addr &&
+	     (inp->inp_flags & INP_BINDANY) == 0 &&
+	     ifa_ifwithaddr_check((struct sockaddr *)addr) == 0))
+		return (EADDRNOTAVAIL);
+
+	INP_INFO_WLOCK(&V_ripcbinfo);
+	INP_WLOCK(inp);
+	rip_delhash(inp);
+	inp->inp_laddr = addr->sin_addr;
+	rip_inshash(inp);
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_ripcbinfo);
+	return (0);
+}
+
+static int
+rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
+	struct inpcb *inp;
+
+	if (nam->sa_len != sizeof(*addr))
+		return (EINVAL);
+	if (TAILQ_EMPTY(&V_ifnet))
+		return (EADDRNOTAVAIL);
+	if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK)
+		return (EAFNOSUPPORT);
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("rip_connect: inp == NULL"));
+
+	INP_INFO_WLOCK(&V_ripcbinfo);
+	INP_WLOCK(inp);
+	rip_delhash(inp);
+	inp->inp_faddr = addr->sin_addr;
+	rip_inshash(inp);
+	soisconnected(so);
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_ripcbinfo);
+	return (0);
+}
+
+static int
+rip_shutdown(struct socket *so)
+{
+	struct inpcb *inp;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("rip_shutdown: inp == NULL"));
+
+	INP_WLOCK(inp);
+	socantsendmore(so);
+	INP_WUNLOCK(inp);
+	return (0);
+}
+
+static int
+rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
+    struct mbuf *control, struct thread *td)
+{
+	struct inpcb *inp;
+	u_long dst;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("rip_send: inp == NULL"));
+
+	/*
+	 * Note: 'dst' reads below are unlocked.
+	 */
+	if (so->so_state & SS_ISCONNECTED) {
+		if (nam) {
+			m_freem(m);
+			return (EISCONN);
+		}
+		dst = inp->inp_faddr.s_addr;	/* Unlocked read. */
+	} else {
+		if (nam == NULL) {
+			m_freem(m);
+			return (ENOTCONN);
+		}
+		dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr;
+	}
+	return (rip_output(m, so, dst));
+}
+
+static int
+rip_pcblist(SYSCTL_HANDLER_ARGS)
+{
+	int error, i, n;
+	struct inpcb *inp, **inp_list;
+	inp_gen_t gencnt;
+	struct xinpgen xig;
+
+	/*
+	 * The process of preparing the TCB list is too time-consuming and
+	 * resource-intensive to repeat twice on every request.
+	 */
+	if (req->oldptr == 0) {
+		n = V_ripcbinfo.ipi_count;
+		n += imax(n / 8, 10);
+		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
+		return (0);
+	}
+
+	if (req->newptr != 0)
+		return (EPERM);
+
+	/*
+	 * OK, now we're committed to doing something.
+	 */
+	INP_INFO_RLOCK(&V_ripcbinfo);
+	gencnt = V_ripcbinfo.ipi_gencnt;
+	n = V_ripcbinfo.ipi_count;
+	INP_INFO_RUNLOCK(&V_ripcbinfo);
+
+	xig.xig_len = sizeof xig;
+	xig.xig_count = n;
+	xig.xig_gen = gencnt;
+	xig.xig_sogen = so_gencnt;
+	error = SYSCTL_OUT(req, &xig, sizeof xig);
+	if (error)
+		return (error);
+
+	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
+	if (inp_list == 0)
+		return (ENOMEM);
+
+	INP_INFO_RLOCK(&V_ripcbinfo);
+	for (inp = LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n;
+	     inp = LIST_NEXT(inp, inp_list)) {
+		INP_WLOCK(inp);
+		if (inp->inp_gencnt <= gencnt &&
+		    cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
+			in_pcbref(inp);
+			inp_list[i++] = inp;
+		}
+		INP_WUNLOCK(inp);
+	}
+	INP_INFO_RUNLOCK(&V_ripcbinfo);
+	n = i;
+
+	error = 0;
+	for (i = 0; i < n; i++) {
+		inp = inp_list[i];
+		INP_RLOCK(inp);
+		if (inp->inp_gencnt <= gencnt) {
+			struct xinpcb xi;
+
+			bzero(&xi, sizeof(xi));
+			xi.xi_len = sizeof xi;
+			/* XXX should avoid extra copy */
+			bcopy(inp, &xi.xi_inp, sizeof *inp);
+			if (inp->inp_socket)
+				sotoxsocket(inp->inp_socket, &xi.xi_socket);
+			INP_RUNLOCK(inp);
+			error = SYSCTL_OUT(req, &xi, sizeof xi);
+		} else
+			INP_RUNLOCK(inp);
+	}
+	INP_INFO_WLOCK(&V_ripcbinfo);
+	for (i = 0; i < n; i++) {
+		inp = inp_list[i];
+		INP_WLOCK(inp);
+		if (!in_pcbrele(inp))
+			INP_WUNLOCK(inp);
+	}
+	INP_INFO_WUNLOCK(&V_ripcbinfo);
+
+	if (!error) {
+		/*
+		 * Give the user an updated idea of our state.  If the
+		 * generation differs from what we told her before, she knows
+		 * that something happened while we were processing this
+		 * request, and it might be necessary to retry.
+		 */
+		INP_INFO_RLOCK(&V_ripcbinfo);
+		xig.xig_gen = V_ripcbinfo.ipi_gencnt;
+		xig.xig_sogen = so_gencnt;
+		xig.xig_count = V_ripcbinfo.ipi_count;
+		INP_INFO_RUNLOCK(&V_ripcbinfo);
+		error = SYSCTL_OUT(req, &xig, sizeof xig);
+	}
+	free(inp_list, M_TEMP);
+	return (error);
+}
+
+SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD, 0, 0,
+    rip_pcblist, "S,xinpcb", "List of active raw IP sockets");
+
+struct pr_usrreqs rip_usrreqs = {
+	.pru_abort =		rip_abort,
+	.pru_attach =		rip_attach,
+	.pru_bind =		rip_bind,
+	.pru_connect =		rip_connect,
+	.pru_control =		in_control,
+	.pru_detach =		rip_detach,
+	.pru_disconnect =	rip_disconnect,
+	.pru_peeraddr =		in_getpeeraddr,
+	.pru_send =		rip_send,
+	.pru_shutdown =		rip_shutdown,
+	.pru_sockaddr =		in_getsockaddr,
+	.pru_sosetlabel =	in_pcbsosetlabel,
+	.pru_close =		rip_close,
+};
diff --git a/freebsd/sys/netinet/sctp.h b/freebsd/sys/netinet/sctp.h
new file mode 100644
index 00000000..bf188a23
--- /dev/null
+++ b/freebsd/sys/netinet/sctp.h
@@ -0,0 +1,549 @@
+/*-
+ * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/* $KAME: sctp.h,v 1.18 2005/03/06 16:04:16 itojun Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifndef _NETINET_SCTP_HH_
+#define _NETINET_SCTP_HH_
+
+#include <freebsd/sys/types.h>
+
+
+#define SCTP_PACKED __attribute__((packed))
+
+/*
+ * SCTP protocol - RFC2960.
+ */
+struct sctphdr {
+	uint16_t src_port;	/* source port */
+	uint16_t dest_port;	/* destination port */
+	uint32_t v_tag;		/* verification tag of packet */
+	uint32_t checksum;	/* Adler32 C-Sum */
+	/* chunks follow... */
+}       SCTP_PACKED;
+
+/*
+ * SCTP Chunks
+ */
+struct sctp_chunkhdr {
+	uint8_t chunk_type;	/* chunk type */
+	uint8_t chunk_flags;	/* chunk flags */
+	uint16_t chunk_length;	/* chunk length */
+	/* optional params follow */
+}             SCTP_PACKED;
+
+/*
+ * SCTP chunk parameters
+ */
+struct sctp_paramhdr {
+	uint16_t param_type;	/* parameter type */
+	uint16_t param_length;	/* parameter length */
+}             SCTP_PACKED;
+
+/*
+ * user socket options: socket API defined
+ */
+/*
+ * read-write options
+ */
+#define SCTP_RTOINFO			0x00000001
+#define SCTP_ASSOCINFO			0x00000002
+#define SCTP_INITMSG			0x00000003
+#define SCTP_NODELAY			0x00000004
+#define SCTP_AUTOCLOSE			0x00000005
+#define SCTP_SET_PEER_PRIMARY_ADDR	0x00000006
+#define SCTP_PRIMARY_ADDR		0x00000007
+#define SCTP_ADAPTATION_LAYER		0x00000008
+/* same as above */
+#define SCTP_ADAPTION_LAYER		0x00000008
+#define SCTP_DISABLE_FRAGMENTS		0x00000009
+#define SCTP_PEER_ADDR_PARAMS 		0x0000000a
+#define SCTP_DEFAULT_SEND_PARAM		0x0000000b
+/* ancillary data/notification interest options */
+#define SCTP_EVENTS			0x0000000c
+/* Without this applied we will give V4 and V6 addresses on a V6 socket */
+#define SCTP_I_WANT_MAPPED_V4_ADDR	0x0000000d
+#define SCTP_MAXSEG 			0x0000000e
+#define SCTP_DELAYED_SACK               0x0000000f
+#define SCTP_FRAGMENT_INTERLEAVE        0x00000010
+#define SCTP_PARTIAL_DELIVERY_POINT     0x00000011
+/* authentication support */
+#define SCTP_AUTH_CHUNK 		0x00000012
+#define SCTP_AUTH_KEY 			0x00000013
+#define SCTP_HMAC_IDENT 		0x00000014
+#define SCTP_AUTH_ACTIVE_KEY 		0x00000015
+#define SCTP_AUTH_DELETE_KEY 		0x00000016
+#define SCTP_USE_EXT_RCVINFO		0x00000017
+#define SCTP_AUTO_ASCONF		0x00000018	/* rw */
+#define SCTP_MAXBURST			0x00000019	/* rw */
+#define SCTP_MAX_BURST			0x00000019	/* rw */
+/* assoc level context */
+#define SCTP_CONTEXT                    0x0000001a	/* rw */
+/* explicit EOR signalling */
+#define SCTP_EXPLICIT_EOR               0x0000001b
+#define SCTP_REUSE_PORT                 0x0000001c	/* rw */
+#define SCTP_AUTH_DEACTIVATE_KEY	0x0000001d
+
+/*
+ * read-only options
+ */
+#define SCTP_STATUS			0x00000100
+#define SCTP_GET_PEER_ADDR_INFO		0x00000101
+/* authentication support */
+#define SCTP_PEER_AUTH_CHUNKS 		0x00000102
+#define SCTP_LOCAL_AUTH_CHUNKS 		0x00000103
+#define SCTP_GET_ASSOC_NUMBER           0x00000104	/* ro */
+#define SCTP_GET_ASSOC_ID_LIST          0x00000105	/* ro */
+#define SCTP_TIMEOUTS                   0x00000106
+
+/*
+ * user socket options: BSD implementation specific
+ */
+/*
+ * Blocking I/O is enabled on any TCP type socket by default. For the UDP
+ * model if this is turned on then the socket buffer is shared for send
+ * resources amongst all associations.  The default for the UDP model is that
+ * is SS_NBIO is set.  Which means all associations have a separate send
+ * limit BUT they will NOT ever BLOCK instead you will get an error back
+ * EAGAIN if you try to send too much. If you want the blocking semantics you
+ * set this option at the cost of sharing one socket send buffer size amongst
+ * all associations. Peeled off sockets turn this option off and block. But
+ * since both TCP and peeled off sockets have only one assoc per socket this
+ * is fine. It probably does NOT make sense to set this on SS_NBIO on a TCP
+ * model OR peeled off UDP model, but we do allow you to do so. You just use
+ * the normal syscall to toggle SS_NBIO the way you want.
+ *
+ * Blocking I/O is controlled by the SS_NBIO flag on the socket state so_state
+ * field.
+ */
+
+/* these should probably go into sockets API */
+#define SCTP_RESET_STREAMS		0x00001004	/* wo */
+
+
+/* here on down are more implementation specific */
+#define SCTP_SET_DEBUG_LEVEL		0x00001005
+#define SCTP_CLR_STAT_LOG               0x00001007
+/* CMT ON/OFF socket option */
+#define SCTP_CMT_ON_OFF                 0x00001200
+#define SCTP_CMT_USE_DAC                0x00001201
+/* JRS - Pluggable Congestion Control Socket option */
+#define SCTP_PLUGGABLE_CC               0x00001202
+
+/* read only */
+#define SCTP_GET_SNDBUF_USE		0x00001101
+#define SCTP_GET_STAT_LOG		0x00001103
+#define SCTP_PCB_STATUS			0x00001104
+#define SCTP_GET_NONCE_VALUES           0x00001105
+
+
+/* Special hook for dynamically setting primary for all assoc's,
+ * this is a write only option that requires root privilege.
+ */
+#define SCTP_SET_DYNAMIC_PRIMARY        0x00002001
+
+/* VRF (virtual router feature) and multi-VRF support
+ * options. VRF's provide splits within a router
+ * that give the views of multiple routers. A
+ * standard host, without VRF support, is just
+ * a single VRF. If VRF's are supported then
+ * the transport must be VRF aware. This means
+ * that every socket call coming in must be directed
+ * within the endpoint to one of the VRF's it belongs
+ * to. The endpoint, before binding, may select
+ * the "default" VRF it is in by using a set socket
+ * option with SCTP_VRF_ID. This will also
+ * get propagated to the default VRF. Once the
+ * endpoint binds an address then it CANNOT add
+ * additional VRF's to become a Multi-VRF endpoint.
+ *
+ * Before BINDING additional VRF's can be added with
+ * the SCTP_ADD_VRF_ID call or deleted with
+ * SCTP_DEL_VRF_ID.
+ *
+ * Associations are ALWAYS contained inside a single
+ * VRF. They cannot reside in two (or more) VRF's. Incoming
+ * packets, assuming the router is VRF aware, can always
+ * tell us what VRF they arrived on. A host not supporting
+ * any VRF's will find that the packets always arrived on the
+ * single VRF that the host has.
+ *
+ */
+
+#define SCTP_VRF_ID			0x00003001
+#define SCTP_ADD_VRF_ID			0x00003002
+#define SCTP_GET_VRF_IDS		0x00003003
+#define SCTP_GET_ASOC_VRF               0x00003004
+#define SCTP_DEL_VRF_ID                 0x00003005
+
+/*
+ * If you enable packet logging you can get
+ * a poor mans ethereal output in binary
+ * form. Note this is a compile option to
+ * the kernel,  SCTP_PACKET_LOGGING, and
+ * without it in your kernel you
+ * will get a EOPNOTSUPP
+ */
+#define SCTP_GET_PACKET_LOG             0x00004001
+
+/*
+ * hidden implementation specific options these are NOT user visible (should
+ * move out of sctp.h)
+ */
+/* sctp_bindx() flags as hidden socket options */
+#define SCTP_BINDX_ADD_ADDR		0x00008001
+#define SCTP_BINDX_REM_ADDR		0x00008002
+/* Hidden socket option that gets the addresses */
+#define SCTP_GET_PEER_ADDRESSES		0x00008003
+#define SCTP_GET_LOCAL_ADDRESSES	0x00008004
+/* return the total count in bytes needed to hold all local addresses bound */
+#define SCTP_GET_LOCAL_ADDR_SIZE	0x00008005
+/* Return the total count in bytes needed to hold the remote address */
+#define SCTP_GET_REMOTE_ADDR_SIZE	0x00008006
+/* hidden option for connectx */
+#define SCTP_CONNECT_X			0x00008007
+/* hidden option for connectx_delayed, part of sendx */
+#define SCTP_CONNECT_X_DELAYED		0x00008008
+#define SCTP_CONNECT_X_COMPLETE         0x00008009
+/* hidden socket option based sctp_peeloff */
+#define SCTP_PEELOFF                    0x0000800a
+/* the real worker for sctp_getaddrlen() */
+#define SCTP_GET_ADDR_LEN               0x0000800b
+/* temporary workaround for Apple listen() issue, no args used */
+#define SCTP_LISTEN_FIX			0x0000800c
+/* Debug things that need to be purged */
+#define SCTP_SET_INITIAL_DBG_SEQ	0x00009f00
+
+/* JRS - Supported congestion control modules for pluggable
+ * congestion control
+ */
+/* Standard TCP Congestion Control */
+#define SCTP_CC_RFC2581		0x00000000
+/* High Speed TCP Congestion Control (Floyd) */
+#define SCTP_CC_HSTCP		0x00000001
+/* HTCP Congestion Control */
+#define SCTP_CC_HTCP		0x00000002
+
+
+/* fragment interleave constants
+ * setting must be one of these or
+ * EINVAL returned.
+ */
+#define SCTP_FRAG_LEVEL_0    0x00000000
+#define SCTP_FRAG_LEVEL_1    0x00000001
+#define SCTP_FRAG_LEVEL_2    0x00000002
+
+/*
+ * user state values
+ */
+#define SCTP_CLOSED			0x0000
+#define SCTP_BOUND			0x1000
+#define SCTP_LISTEN			0x2000
+#define SCTP_COOKIE_WAIT		0x0002
+#define SCTP_COOKIE_ECHOED		0x0004
+#define SCTP_ESTABLISHED		0x0008
+#define SCTP_SHUTDOWN_SENT		0x0010
+#define SCTP_SHUTDOWN_RECEIVED		0x0020
+#define SCTP_SHUTDOWN_ACK_SENT		0x0040
+#define SCTP_SHUTDOWN_PENDING		0x0080
+
+/*
+ * SCTP operational error codes (user visible)
+ */
+#define SCTP_CAUSE_NO_ERROR		0x0000
+#define SCTP_CAUSE_INVALID_STREAM	0x0001
+#define SCTP_CAUSE_MISSING_PARAM	0x0002
+#define SCTP_CAUSE_STALE_COOKIE		0x0003
+#define SCTP_CAUSE_OUT_OF_RESC		0x0004
+#define SCTP_CAUSE_UNRESOLVABLE_ADDR	0x0005
+#define SCTP_CAUSE_UNRECOG_CHUNK	0x0006
+#define SCTP_CAUSE_INVALID_PARAM	0x0007
+#define SCTP_CAUSE_UNRECOG_PARAM	0x0008
+#define SCTP_CAUSE_NO_USER_DATA		0x0009
+#define SCTP_CAUSE_COOKIE_IN_SHUTDOWN	0x000a
+#define SCTP_CAUSE_RESTART_W_NEWADDR	0x000b
+#define SCTP_CAUSE_USER_INITIATED_ABT	0x000c
+#define SCTP_CAUSE_PROTOCOL_VIOLATION	0x000d
+
+/* Error causes from RFC5061 */
+#define SCTP_CAUSE_DELETING_LAST_ADDR	0x00a0
+#define SCTP_CAUSE_RESOURCE_SHORTAGE	0x00a1
+#define SCTP_CAUSE_DELETING_SRC_ADDR	0x00a2
+#define SCTP_CAUSE_ILLEGAL_ASCONF_ACK	0x00a3
+#define SCTP_CAUSE_REQUEST_REFUSED	0x00a4
+
+/* Error causes from nat-draft */
+#define SCTP_CAUSE_NAT_COLLIDING_STATE  0x00b0
+#define SCTP_CAUSE_NAT_MISSING_STATE    0x00b1
+
+/* Error causes from RFC4895 */
+#define SCTP_CAUSE_UNSUPPORTED_HMACID	0x0105
+
+/*
+ * error cause parameters (user visible)
+ */
+struct sctp_error_cause {
+	uint16_t code;
+	uint16_t length;
+	/* optional cause-specific info may follow */
+}                SCTP_PACKED;
+
+struct sctp_error_invalid_stream {
+	struct sctp_error_cause cause;	/* code=SCTP_ERROR_INVALID_STREAM */
+	uint16_t stream_id;	/* stream id of the DATA in error */
+	uint16_t reserved;
+}                         SCTP_PACKED;
+
+struct sctp_error_missing_param {
+	struct sctp_error_cause cause;	/* code=SCTP_ERROR_MISSING_PARAM */
+	uint32_t num_missing_params;	/* number of missing parameters */
+	/* uint16_t param_type's follow */
+}                        SCTP_PACKED;
+
+struct sctp_error_stale_cookie {
+	struct sctp_error_cause cause;	/* code=SCTP_ERROR_STALE_COOKIE */
+	uint32_t stale_time;	/* time in usec of staleness */
+}                       SCTP_PACKED;
+
+struct sctp_error_out_of_resource {
+	struct sctp_error_cause cause;	/* code=SCTP_ERROR_OUT_OF_RESOURCES */
+}                          SCTP_PACKED;
+
+struct sctp_error_unresolv_addr {
+	struct sctp_error_cause cause;	/* code=SCTP_ERROR_UNRESOLVABLE_ADDR */
+
+}                        SCTP_PACKED;
+
+struct sctp_error_unrecognized_chunk {
+	struct sctp_error_cause cause;	/* code=SCTP_ERROR_UNRECOG_CHUNK */
+	struct sctp_chunkhdr ch;/* header from chunk in error */
+}                             SCTP_PACKED;
+
+/*
+ * Main SCTP chunk types we place these here so natd and f/w's in user land
+ * can find them.
+ */
+/************0x00 series ***********/
+#define SCTP_DATA		0x00
+#define SCTP_INITIATION		0x01
+#define SCTP_INITIATION_ACK	0x02
+#define SCTP_SELECTIVE_ACK	0x03
+#define SCTP_HEARTBEAT_REQUEST	0x04
+#define SCTP_HEARTBEAT_ACK	0x05
+#define SCTP_ABORT_ASSOCIATION	0x06
+#define SCTP_SHUTDOWN		0x07
+#define SCTP_SHUTDOWN_ACK	0x08
+#define SCTP_OPERATION_ERROR	0x09
+#define SCTP_COOKIE_ECHO	0x0a
+#define SCTP_COOKIE_ACK		0x0b
+#define SCTP_ECN_ECHO		0x0c
+#define SCTP_ECN_CWR		0x0d
+#define SCTP_SHUTDOWN_COMPLETE	0x0e
+/* RFC4895 */
+#define SCTP_AUTHENTICATION     0x0f
+/* EY nr_sack chunk id*/
+#define SCTP_NR_SELECTIVE_ACK 0x10
+/************0x40 series ***********/
+/************0x80 series ***********/
+/* RFC5061 */
+#define	SCTP_ASCONF_ACK		0x80
+/* draft-ietf-stewart-pktdrpsctp */
+#define SCTP_PACKET_DROPPED	0x81
+/* draft-ietf-stewart-strreset-xxx */
+#define SCTP_STREAM_RESET       0x82
+
+/* RFC4820                         */
+#define SCTP_PAD_CHUNK          0x84
+/************0xc0 series ***********/
+/* RFC3758 */
+#define SCTP_FORWARD_CUM_TSN	0xc0
+/* RFC5061 */
+#define SCTP_ASCONF		0xc1
+
+
+/* ABORT and SHUTDOWN COMPLETE FLAG */
+#define SCTP_HAD_NO_TCB		0x01
+
+/* Packet dropped flags */
+#define SCTP_FROM_MIDDLE_BOX	SCTP_HAD_NO_TCB
+#define SCTP_BADCRC		0x02
+#define SCTP_PACKET_TRUNCATED	0x04
+
+#define SCTP_SAT_NETWORK_MIN	400	/* min ms for RTT to set satellite
+					 * time */
+#define SCTP_SAT_NETWORK_BURST_INCR  2	/* how many times to multiply maxburst
+					 * in sat */
+
+/* Data Chuck Specific Flags */
+#define SCTP_DATA_FRAG_MASK        0x03
+#define SCTP_DATA_MIDDLE_FRAG      0x00
+#define SCTP_DATA_LAST_FRAG        0x01
+#define SCTP_DATA_FIRST_FRAG       0x02
+#define SCTP_DATA_NOT_FRAG         0x03
+#define SCTP_DATA_UNORDERED        0x04
+#define SCTP_DATA_SACK_IMMEDIATELY 0x08
+/* ECN Nonce: SACK Chunk Specific Flags */
+#define SCTP_SACK_NONCE_SUM        0x01
+
+/* CMT DAC algorithm SACK flag */
+#define SCTP_SACK_CMT_DAC          0x80
+
+/*
+ * PCB flags (in sctp_flags bitmask).
+ * Note the features and flags are meant
+ * for use by netstat.
+ */
+#define SCTP_PCB_FLAGS_UDPTYPE		0x00000001
+#define SCTP_PCB_FLAGS_TCPTYPE		0x00000002
+#define SCTP_PCB_FLAGS_BOUNDALL		0x00000004
+#define SCTP_PCB_FLAGS_ACCEPTING	0x00000008
+#define SCTP_PCB_FLAGS_UNBOUND		0x00000010
+#define SCTP_PCB_FLAGS_CLOSE_IP         0x00040000
+#define SCTP_PCB_FLAGS_WAS_CONNECTED    0x00080000
+#define SCTP_PCB_FLAGS_WAS_ABORTED      0x00100000
+/* TCP model support */
+
+#define SCTP_PCB_FLAGS_CONNECTED	0x00200000
+#define SCTP_PCB_FLAGS_IN_TCPPOOL	0x00400000
+#define SCTP_PCB_FLAGS_DONT_WAKE	0x00800000
+#define SCTP_PCB_FLAGS_WAKEOUTPUT	0x01000000
+#define SCTP_PCB_FLAGS_WAKEINPUT	0x02000000
+#define SCTP_PCB_FLAGS_BOUND_V6		0x04000000
+#define SCTP_PCB_FLAGS_BLOCKING_IO	0x08000000
+#define SCTP_PCB_FLAGS_SOCKET_GONE	0x10000000
+#define SCTP_PCB_FLAGS_SOCKET_ALLGONE	0x20000000
+#define SCTP_PCB_FLAGS_SOCKET_CANT_READ	0x40000000
+/* flags to copy to new PCB */
+#define SCTP_PCB_COPY_FLAGS		(SCTP_PCB_FLAGS_BOUNDALL|\
+					 SCTP_PCB_FLAGS_WAKEINPUT|\
+					 SCTP_PCB_FLAGS_BOUND_V6)
+
+
+/*
+ * PCB Features (in sctp_features bitmask)
+ */
+#define SCTP_PCB_FLAGS_EXT_RCVINFO      0x00000002
+#define SCTP_PCB_FLAGS_DONOT_HEARTBEAT  0x00000004
+#define SCTP_PCB_FLAGS_FRAG_INTERLEAVE  0x00000008
+#define SCTP_PCB_FLAGS_INTERLEAVE_STRMS	0x00000010
+#define SCTP_PCB_FLAGS_DO_ASCONF	0x00000020
+#define SCTP_PCB_FLAGS_AUTO_ASCONF	0x00000040
+#define SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE 0x00000080
+/* socket options */
+#define SCTP_PCB_FLAGS_NODELAY		0x00000100
+#define SCTP_PCB_FLAGS_AUTOCLOSE	0x00000200
+#define SCTP_PCB_FLAGS_RECVDATAIOEVNT	0x00000400
+#define SCTP_PCB_FLAGS_RECVASSOCEVNT	0x00000800
+#define SCTP_PCB_FLAGS_RECVPADDREVNT	0x00001000
+#define SCTP_PCB_FLAGS_RECVPEERERR	0x00002000
+#define SCTP_PCB_FLAGS_RECVSENDFAILEVNT	0x00004000
+#define SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT	0x00008000
+#define SCTP_PCB_FLAGS_ADAPTATIONEVNT	0x00010000
+#define SCTP_PCB_FLAGS_PDAPIEVNT	0x00020000
+#define SCTP_PCB_FLAGS_AUTHEVNT		0x00040000
+#define SCTP_PCB_FLAGS_STREAM_RESETEVNT 0x00080000
+#define SCTP_PCB_FLAGS_NO_FRAGMENT	0x00100000
+#define SCTP_PCB_FLAGS_EXPLICIT_EOR     0x00400000
+#define SCTP_PCB_FLAGS_NEEDS_MAPPED_V4	0x00800000
+#define SCTP_PCB_FLAGS_MULTIPLE_ASCONFS	0x01000000
+#define SCTP_PCB_FLAGS_PORTREUSE        0x02000000
+#define SCTP_PCB_FLAGS_DRYEVNT          0x04000000
+/*-
+ * mobility_features parameters (by micchie).Note
+ * these features are applied against the
+ * sctp_mobility_features flags.. not the sctp_features
+ * flags.
+ */
+#define SCTP_MOBILITY_BASE		0x00000001
+#define SCTP_MOBILITY_FASTHANDOFF	0x00000002
+#define SCTP_MOBILITY_PRIM_DELETED	0x00000004
+
+
+#define SCTP_SMALLEST_PMTU 512	/* smallest pmtu allowed when disabling PMTU
+				 * discovery */
+
+#include <freebsd/netinet/sctp_uio.h>
+
+/* This dictates the size of the packet
+ * collection buffer. This only applies
+ * if SCTP_PACKET_LOGGING is enabled in
+ * your config.
+ */
+#define SCTP_PACKET_LOG_SIZE 65536
+
+/* Maximum delays and such a user can set for options that
+ * take ms.
+ */
+#define SCTP_MAX_SACK_DELAY 500	/* per RFC4960 */
+#define SCTP_MAX_HB_INTERVAL 14400000	/* 4 hours in ms */
+#define SCTP_MAX_COOKIE_LIFE  3600000	/* 1 hour in ms */
+
+
+/* Types of logging/KTR tracing  that can be enabled via the
+ * sysctl net.inet.sctp.sctp_logging. You must also enable
+ * SUBSYS tracing.
+ * Note that you must have the SCTP option in the kernel
+ * to enable these as well.
+ */
+#define SCTP_BLK_LOGGING_ENABLE				0x00000001
+#define SCTP_CWND_MONITOR_ENABLE			0x00000002
+#define SCTP_CWND_LOGGING_ENABLE			0x00000004
+#define SCTP_EARLYFR_LOGGING_ENABLE			0x00000010
+#define SCTP_FLIGHT_LOGGING_ENABLE			0x00000020
+#define SCTP_FR_LOGGING_ENABLE				0x00000040
+#define SCTP_LOCK_LOGGING_ENABLE			0x00000080
+#define SCTP_MAP_LOGGING_ENABLE				0x00000100
+#define SCTP_MBCNT_LOGGING_ENABLE			0x00000200
+#define SCTP_MBUF_LOGGING_ENABLE			0x00000400
+#define SCTP_NAGLE_LOGGING_ENABLE			0x00000800
+#define SCTP_RECV_RWND_LOGGING_ENABLE		0x00001000
+#define SCTP_RTTVAR_LOGGING_ENABLE			0x00002000
+#define SCTP_SACK_LOGGING_ENABLE			0x00004000
+#define SCTP_SACK_RWND_LOGGING_ENABLE		0x00008000
+#define SCTP_SB_LOGGING_ENABLE				0x00010000
+#define SCTP_STR_LOGGING_ENABLE				0x00020000
+#define SCTP_WAKE_LOGGING_ENABLE			0x00040000
+#define SCTP_LOG_MAXBURST_ENABLE			0x00080000
+#define SCTP_LOG_RWND_ENABLE    			0x00100000
+#define SCTP_LOG_SACK_ARRIVALS_ENABLE       0x00200000
+#define SCTP_LTRACE_CHUNK_ENABLE            0x00400000
+#define SCTP_LTRACE_ERROR_ENABLE            0x00800000
+#define SCTP_LAST_PACKET_TRACING            0x01000000
+#define SCTP_THRESHOLD_LOGGING              0x02000000
+#define SCTP_LOG_AT_SEND_2_SCTP             0x04000000
+#define SCTP_LOG_AT_SEND_2_OUTQ             0x08000000
+#define SCTP_LOG_TRY_ADVANCE                0x10000000
+
+
+#undef SCTP_PACKED
+
+#endif				/* !_NETINET_SCTP_HH_ */
diff --git a/freebsd/sys/netinet/sctp_asconf.c b/freebsd/sys/netinet/sctp_asconf.c
new file mode 100644
index 00000000..206cf600
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_asconf.c
@@ -0,0 +1,3397 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_asconf.c,v 1.24 2005/03/06 16:04:16 itojun Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include <freebsd/netinet/sctp_os.h>
+#include <freebsd/netinet/sctp_var.h>
+#include <freebsd/netinet/sctp_sysctl.h>
+#include <freebsd/netinet/sctp_pcb.h>
+#include <freebsd/netinet/sctp_header.h>
+#include <freebsd/netinet/sctputil.h>
+#include <freebsd/netinet/sctp_output.h>
+#include <freebsd/netinet/sctp_asconf.h>
+#include <freebsd/netinet/sctp_timer.h>
+
+/*
+ * debug flags:
+ * SCTP_DEBUG_ASCONF1: protocol info, general info and errors
+ * SCTP_DEBUG_ASCONF2: detailed info
+ */
+#ifdef SCTP_DEBUG
+#endif				/* SCTP_DEBUG */
+
+
+static void
+sctp_asconf_get_source_ip(struct mbuf *m, struct sockaddr *sa)
+{
+	struct ip *iph;
+	struct sockaddr_in *sin;
+
+#ifdef INET6
+	struct sockaddr_in6 *sin6;
+
+#endif
+
+	iph = mtod(m, struct ip *);
+	if (iph->ip_v == IPVERSION) {
+		/* IPv4 source */
+		sin = (struct sockaddr_in *)sa;
+		bzero(sin, sizeof(*sin));
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(struct sockaddr_in);
+		sin->sin_port = 0;
+		sin->sin_addr.s_addr = iph->ip_src.s_addr;
+		return;
+	}
+#ifdef INET6
+	else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+		/* IPv6 source */
+		struct ip6_hdr *ip6;
+
+		sin6 = (struct sockaddr_in6 *)sa;
+		bzero(sin6, sizeof(*sin6));
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_len = sizeof(struct sockaddr_in6);
+		sin6->sin6_port = 0;
+		ip6 = mtod(m, struct ip6_hdr *);
+		sin6->sin6_addr = ip6->ip6_src;
+		return;
+	}
+#endif				/* INET6 */
+	else
+		return;
+}
+
+/*
+ * draft-ietf-tsvwg-addip-sctp
+ *
+ * An ASCONF parameter queue exists per asoc which holds the pending address
+ * operations.  Lists are updated upon receipt of ASCONF-ACK.
+ *
+ * A restricted_addrs list exists per assoc to hold local addresses that are
+ * not (yet) usable by the assoc as a source address.  These addresses are
+ * either pending an ASCONF operation (and exist on the ASCONF parameter
+ * queue), or they are permanently restricted (the peer has returned an
+ * ERROR indication to an ASCONF(ADD), or the peer does not support ASCONF).
+ *
+ * Deleted addresses are always immediately removed from the lists as they will
+ * (shortly) no longer exist in the kernel.  We send ASCONFs as a courtesy,
+ * only if allowed.
+ */
+
+/*
+ * ASCONF parameter processing.
+ * response_required: set if a reply is required (eg. SUCCESS_REPORT).
+ * returns a mbuf to an "error" response parameter or NULL/"success" if ok.
+ * FIX: allocating this many mbufs on the fly is pretty inefficient...
+ */
+static struct mbuf *
+sctp_asconf_success_response(uint32_t id)
+{
+	struct mbuf *m_reply = NULL;
+	struct sctp_asconf_paramhdr *aph;
+
+	m_reply = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_paramhdr),
+	    0, M_DONTWAIT, 1, MT_DATA);
+	if (m_reply == NULL) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "asconf_success_response: couldn't get mbuf!\n");
+		return NULL;
+	}
+	aph = mtod(m_reply, struct sctp_asconf_paramhdr *);
+	aph->correlation_id = id;
+	aph->ph.param_type = htons(SCTP_SUCCESS_REPORT);
+	aph->ph.param_length = sizeof(struct sctp_asconf_paramhdr);
+	SCTP_BUF_LEN(m_reply) = aph->ph.param_length;
+	aph->ph.param_length = htons(aph->ph.param_length);
+
+	return m_reply;
+}
+
+static struct mbuf *
+sctp_asconf_error_response(uint32_t id, uint16_t cause, uint8_t * error_tlv,
+    uint16_t tlv_length)
+{
+	struct mbuf *m_reply = NULL;
+	struct sctp_asconf_paramhdr *aph;
+	struct sctp_error_cause *error;
+	uint8_t *tlv;
+
+	m_reply = sctp_get_mbuf_for_msg((sizeof(struct sctp_asconf_paramhdr) +
+	    tlv_length +
+	    sizeof(struct sctp_error_cause)),
+	    0, M_DONTWAIT, 1, MT_DATA);
+	if (m_reply == NULL) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "asconf_error_response: couldn't get mbuf!\n");
+		return NULL;
+	}
+	aph = mtod(m_reply, struct sctp_asconf_paramhdr *);
+	error = (struct sctp_error_cause *)(aph + 1);
+
+	aph->correlation_id = id;
+	aph->ph.param_type = htons(SCTP_ERROR_CAUSE_IND);
+	error->code = htons(cause);
+	error->length = tlv_length + sizeof(struct sctp_error_cause);
+	aph->ph.param_length = error->length +
+	    sizeof(struct sctp_asconf_paramhdr);
+
+	if (aph->ph.param_length > MLEN) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "asconf_error_response: tlv_length (%xh) too big\n",
+		    tlv_length);
+		sctp_m_freem(m_reply);	/* discard */
+		return NULL;
+	}
+	if (error_tlv != NULL) {
+		tlv = (uint8_t *) (error + 1);
+		memcpy(tlv, error_tlv, tlv_length);
+	}
+	SCTP_BUF_LEN(m_reply) = aph->ph.param_length;
+	error->length = htons(error->length);
+	aph->ph.param_length = htons(aph->ph.param_length);
+
+	return m_reply;
+}
+
+static struct mbuf *
+sctp_process_asconf_add_ip(struct mbuf *m, struct sctp_asconf_paramhdr *aph,
+    struct sctp_tcb *stcb, int response_required)
+{
+	struct mbuf *m_reply = NULL;
+	struct sockaddr_storage sa_source, sa_store;
+	struct sctp_ipv4addr_param *v4addr;
+	uint16_t param_type, param_length, aparam_length;
+	struct sockaddr *sa;
+	struct sockaddr_in *sin;
+	int zero_address = 0;
+
+#ifdef INET6
+	struct sockaddr_in6 *sin6;
+	struct sctp_ipv6addr_param *v6addr;
+
+#endif				/* INET6 */
+
+	aparam_length = ntohs(aph->ph.param_length);
+	v4addr = (struct sctp_ipv4addr_param *)(aph + 1);
+#ifdef INET6
+	v6addr = (struct sctp_ipv6addr_param *)(aph + 1);
+#endif				/* INET6 */
+	param_type = ntohs(v4addr->ph.param_type);
+	param_length = ntohs(v4addr->ph.param_length);
+
+	sa = (struct sockaddr *)&sa_store;
+	switch (param_type) {
+	case SCTP_IPV4_ADDRESS:
+		if (param_length != sizeof(struct sctp_ipv4addr_param)) {
+			/* invalid param size */
+			return NULL;
+		}
+		sin = (struct sockaddr_in *)&sa_store;
+		bzero(sin, sizeof(*sin));
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(struct sockaddr_in);
+		sin->sin_port = stcb->rport;
+		sin->sin_addr.s_addr = v4addr->addr;
+		if (sin->sin_addr.s_addr == INADDR_ANY)
+			zero_address = 1;
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_add_ip: adding ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+		break;
+	case SCTP_IPV6_ADDRESS:
+#ifdef INET6
+		if (param_length != sizeof(struct sctp_ipv6addr_param)) {
+			/* invalid param size */
+			return NULL;
+		}
+		sin6 = (struct sockaddr_in6 *)&sa_store;
+		bzero(sin6, sizeof(*sin6));
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_len = sizeof(struct sockaddr_in6);
+		sin6->sin6_port = stcb->rport;
+		memcpy((caddr_t)&sin6->sin6_addr, v6addr->addr,
+		    sizeof(struct in6_addr));
+		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+			zero_address = 1;
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_add_ip: adding ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+#else
+		/* IPv6 not enabled! */
+		/* FIX ME: currently sends back an invalid param error */
+		m_reply = sctp_asconf_error_response(aph->correlation_id,
+		    SCTP_CAUSE_INVALID_PARAM, (uint8_t *) aph, aparam_length);
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_asconf_add_ip: v6 disabled- skipping ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+		return m_reply;
+#endif
+		break;
+	default:
+		m_reply = sctp_asconf_error_response(aph->correlation_id,
+		    SCTP_CAUSE_UNRESOLVABLE_ADDR, (uint8_t *) aph,
+		    aparam_length);
+		return m_reply;
+	}			/* end switch */
+
+	/* if 0.0.0.0/::0, add the source address instead */
+	if (zero_address && SCTP_BASE_SYSCTL(sctp_nat_friendly)) {
+		sa = (struct sockaddr *)&sa_source;
+		sctp_asconf_get_source_ip(m, sa);
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_asconf_add_ip: using source addr ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+	}
+	/* add the address */
+	if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE,
+	    SCTP_ADDR_DYNAMIC_ADDED) != 0) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_asconf_add_ip: error adding address\n");
+		m_reply = sctp_asconf_error_response(aph->correlation_id,
+		    SCTP_CAUSE_RESOURCE_SHORTAGE, (uint8_t *) aph,
+		    aparam_length);
+	} else {
+		/* notify upper layer */
+		sctp_ulp_notify(SCTP_NOTIFY_ASCONF_ADD_IP, stcb, 0, sa, SCTP_SO_NOT_LOCKED);
+		if (response_required) {
+			m_reply =
+			    sctp_asconf_success_response(aph->correlation_id);
+		}
+		sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb,
+		    NULL, SCTP_FROM_SCTP_ASCONF + SCTP_LOC_1);
+		sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep,
+		    stcb, NULL);
+	}
+
+	return m_reply;
+}
+
+static int
+sctp_asconf_del_remote_addrs_except(struct sctp_tcb *stcb, struct sockaddr *src)
+{
+	struct sctp_nets *src_net, *net;
+
+	/* make sure the source address exists as a destination net */
+	src_net = sctp_findnet(stcb, src);
+	if (src_net == NULL) {
+		/* not found */
+		return -1;
+	}
+	/* delete all destination addresses except the source */
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		if (net != src_net) {
+			/* delete this address */
+			sctp_remove_net(stcb, net);
+			SCTPDBG(SCTP_DEBUG_ASCONF1,
+			    "asconf_del_remote_addrs_except: deleting ");
+			SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1,
+			    (struct sockaddr *)&net->ro._l_addr);
+			/* notify upper layer */
+			sctp_ulp_notify(SCTP_NOTIFY_ASCONF_DELETE_IP, stcb, 0,
+			    (struct sockaddr *)&net->ro._l_addr, SCTP_SO_NOT_LOCKED);
+		}
+	}
+	return 0;
+}
+
+static struct mbuf *
+sctp_process_asconf_delete_ip(struct mbuf *m, struct sctp_asconf_paramhdr *aph,
+    struct sctp_tcb *stcb, int response_required)
+{
+	struct mbuf *m_reply = NULL;
+	struct sockaddr_storage sa_source, sa_store;
+	struct sctp_ipv4addr_param *v4addr;
+	uint16_t param_type, param_length, aparam_length;
+	struct sockaddr *sa;
+	struct sockaddr_in *sin;
+	int zero_address = 0;
+	int result;
+
+#ifdef INET6
+	struct sockaddr_in6 *sin6;
+	struct sctp_ipv6addr_param *v6addr;
+
+#endif				/* INET6 */
+
+	/* get the source IP address for src and 0.0.0.0/::0 delete checks */
+	sctp_asconf_get_source_ip(m, (struct sockaddr *)&sa_source);
+
+	aparam_length = ntohs(aph->ph.param_length);
+	v4addr = (struct sctp_ipv4addr_param *)(aph + 1);
+#ifdef INET6
+	v6addr = (struct sctp_ipv6addr_param *)(aph + 1);
+#endif				/* INET6 */
+	param_type = ntohs(v4addr->ph.param_type);
+	param_length = ntohs(v4addr->ph.param_length);
+
+	sa = (struct sockaddr *)&sa_store;
+	switch (param_type) {
+	case SCTP_IPV4_ADDRESS:
+		if (param_length != sizeof(struct sctp_ipv4addr_param)) {
+			/* invalid param size */
+			return NULL;
+		}
+		sin = (struct sockaddr_in *)&sa_store;
+		bzero(sin, sizeof(*sin));
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(struct sockaddr_in);
+		sin->sin_port = stcb->rport;
+		sin->sin_addr.s_addr = v4addr->addr;
+		if (sin->sin_addr.s_addr == INADDR_ANY)
+			zero_address = 1;
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_asconf_delete_ip: deleting ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+		break;
+	case SCTP_IPV6_ADDRESS:
+		if (param_length != sizeof(struct sctp_ipv6addr_param)) {
+			/* invalid param size */
+			return NULL;
+		}
+#ifdef INET6
+		sin6 = (struct sockaddr_in6 *)&sa_store;
+		bzero(sin6, sizeof(*sin6));
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_len = sizeof(struct sockaddr_in6);
+		sin6->sin6_port = stcb->rport;
+		memcpy(&sin6->sin6_addr, v6addr->addr,
+		    sizeof(struct in6_addr));
+		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+			zero_address = 1;
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_asconf_delete_ip: deleting ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+#else
+		/* IPv6 not enabled!  No "action" needed; just ack it */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_asconf_delete_ip: v6 disabled- ignoring: ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+		/* just respond with a "success" ASCONF-ACK */
+		return NULL;
+#endif
+		break;
+	default:
+		m_reply = sctp_asconf_error_response(aph->correlation_id,
+		    SCTP_CAUSE_UNRESOLVABLE_ADDR, (uint8_t *) aph,
+		    aparam_length);
+		return m_reply;
+	}
+
+	/* make sure the source address is not being deleted */
+	if (sctp_cmpaddr(sa, (struct sockaddr *)&sa_source)) {
+		/* trying to delete the source address! */
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_delete_ip: tried to delete source addr\n");
+		m_reply = sctp_asconf_error_response(aph->correlation_id,
+		    SCTP_CAUSE_DELETING_SRC_ADDR, (uint8_t *) aph,
+		    aparam_length);
+		return m_reply;
+	}
+	/* if deleting 0.0.0.0/::0, delete all addresses except src addr */
+	if (zero_address && SCTP_BASE_SYSCTL(sctp_nat_friendly)) {
+		result = sctp_asconf_del_remote_addrs_except(stcb,
+		    (struct sockaddr *)&sa_source);
+
+		if (result) {
+			/* src address did not exist? */
+			SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_delete_ip: src addr does not exist?\n");
+			/* what error to reply with?? */
+			m_reply =
+			    sctp_asconf_error_response(aph->correlation_id,
+			    SCTP_CAUSE_REQUEST_REFUSED, (uint8_t *) aph,
+			    aparam_length);
+		} else if (response_required) {
+			m_reply =
+			    sctp_asconf_success_response(aph->correlation_id);
+		}
+		return m_reply;
+	}
+	/* delete the address */
+	result = sctp_del_remote_addr(stcb, sa);
+	/*
+	 * note if result == -2, the address doesn't exist in the asoc but
+	 * since it's being deleted anyways, we just ack the delete -- but
+	 * this probably means something has already gone awry
+	 */
+	if (result == -1) {
+		/* only one address in the asoc */
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_delete_ip: tried to delete last IP addr!\n");
+		m_reply = sctp_asconf_error_response(aph->correlation_id,
+		    SCTP_CAUSE_DELETING_LAST_ADDR, (uint8_t *) aph,
+		    aparam_length);
+	} else {
+		if (response_required) {
+			m_reply = sctp_asconf_success_response(aph->correlation_id);
+		}
+		/* notify upper layer */
+		sctp_ulp_notify(SCTP_NOTIFY_ASCONF_DELETE_IP, stcb, 0, sa, SCTP_SO_NOT_LOCKED);
+	}
+	return m_reply;
+}
+
+static struct mbuf *
+sctp_process_asconf_set_primary(struct mbuf *m,
+    struct sctp_asconf_paramhdr *aph,
+    struct sctp_tcb *stcb, int response_required)
+{
+	struct mbuf *m_reply = NULL;
+	struct sockaddr_storage sa_source, sa_store;
+	struct sctp_ipv4addr_param *v4addr;
+	uint16_t param_type, param_length, aparam_length;
+	struct sockaddr *sa;
+	struct sockaddr_in *sin;
+	int zero_address = 0;
+
+#ifdef INET6
+	struct sockaddr_in6 *sin6;
+	struct sctp_ipv6addr_param *v6addr;
+
+#endif				/* INET6 */
+
+	aparam_length = ntohs(aph->ph.param_length);
+	v4addr = (struct sctp_ipv4addr_param *)(aph + 1);
+#ifdef INET6
+	v6addr = (struct sctp_ipv6addr_param *)(aph + 1);
+#endif				/* INET6 */
+	param_type = ntohs(v4addr->ph.param_type);
+	param_length = ntohs(v4addr->ph.param_length);
+
+	sa = (struct sockaddr *)&sa_store;
+	switch (param_type) {
+	case SCTP_IPV4_ADDRESS:
+		if (param_length != sizeof(struct sctp_ipv4addr_param)) {
+			/* invalid param size */
+			return NULL;
+		}
+		sin = (struct sockaddr_in *)&sa_store;
+		bzero(sin, sizeof(*sin));
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(struct sockaddr_in);
+		sin->sin_addr.s_addr = v4addr->addr;
+		if (sin->sin_addr.s_addr == INADDR_ANY)
+			zero_address = 1;
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_set_primary: ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+		break;
+	case SCTP_IPV6_ADDRESS:
+		if (param_length != sizeof(struct sctp_ipv6addr_param)) {
+			/* invalid param size */
+			return NULL;
+		}
+#ifdef INET6
+		sin6 = (struct sockaddr_in6 *)&sa_store;
+		bzero(sin6, sizeof(*sin6));
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_len = sizeof(struct sockaddr_in6);
+		memcpy((caddr_t)&sin6->sin6_addr, v6addr->addr,
+		    sizeof(struct in6_addr));
+		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+			zero_address = 1;
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_set_primary: ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+#else
+		/* IPv6 not enabled!  No "action" needed; just ack it */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_asconf_set_primary: v6 disabled- ignoring: ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+		/* just respond with a "success" ASCONF-ACK */
+		return NULL;
+#endif
+		break;
+	default:
+		m_reply = sctp_asconf_error_response(aph->correlation_id,
+		    SCTP_CAUSE_UNRESOLVABLE_ADDR, (uint8_t *) aph,
+		    aparam_length);
+		return m_reply;
+	}
+
+	/* if 0.0.0.0/::0, use the source address instead */
+	if (zero_address && SCTP_BASE_SYSCTL(sctp_nat_friendly)) {
+		sa = (struct sockaddr *)&sa_source;
+		sctp_asconf_get_source_ip(m, sa);
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_asconf_set_primary: using source addr ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+	}
+	/* set the primary address */
+	if (sctp_set_primary_addr(stcb, sa, NULL) == 0) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_asconf_set_primary: primary address set\n");
+		/* notify upper layer */
+		sctp_ulp_notify(SCTP_NOTIFY_ASCONF_SET_PRIMARY, stcb, 0, sa, SCTP_SO_NOT_LOCKED);
+
+		if (response_required) {
+			m_reply = sctp_asconf_success_response(aph->correlation_id);
+		}
+		/*
+		 * Mobility adaptation. Ideally, when the reception of SET
+		 * PRIMARY with DELETE IP ADDRESS of the previous primary
+		 * destination, unacknowledged DATA are retransmitted
+		 * immediately to the new primary destination for seamless
+		 * handover. If the destination is UNCONFIRMED and marked to
+		 * REQ_PRIM, The retransmission occur when reception of the
+		 * HEARTBEAT-ACK.  (See sctp_handle_heartbeat_ack in
+		 * sctp_input.c) Also, when change of the primary
+		 * destination, it is better that all subsequent new DATA
+		 * containing already queued DATA are transmitted to the new
+		 * primary destination. (by micchie)
+		 */
+		if ((sctp_is_mobility_feature_on(stcb->sctp_ep,
+		    SCTP_MOBILITY_BASE) ||
+		    sctp_is_mobility_feature_on(stcb->sctp_ep,
+		    SCTP_MOBILITY_FASTHANDOFF)) &&
+		    sctp_is_mobility_feature_on(stcb->sctp_ep,
+		    SCTP_MOBILITY_PRIM_DELETED) &&
+		    (stcb->asoc.primary_destination->dest_state &
+		    SCTP_ADDR_UNCONFIRMED) == 0) {
+
+			sctp_timer_stop(SCTP_TIMER_TYPE_PRIM_DELETED, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_TIMER + SCTP_LOC_7);
+			if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+			    SCTP_MOBILITY_FASTHANDOFF)) {
+				sctp_assoc_immediate_retrans(stcb,
+				    stcb->asoc.primary_destination);
+			}
+			if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+			    SCTP_MOBILITY_BASE)) {
+				sctp_move_chunks_from_net(stcb,
+				    stcb->asoc.deleted_primary);
+			}
+			sctp_delete_prim_timer(stcb->sctp_ep, stcb,
+			    stcb->asoc.deleted_primary);
+		}
+	} else {
+		/* couldn't set the requested primary address! */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_asconf_set_primary: set primary failed!\n");
+		/* must have been an invalid address, so report */
+		m_reply = sctp_asconf_error_response(aph->correlation_id,
+		    SCTP_CAUSE_UNRESOLVABLE_ADDR, (uint8_t *) aph,
+		    aparam_length);
+	}
+
+	return m_reply;
+}
+
+/*
+ * handles an ASCONF chunk.
+ * if all parameters are processed ok, send a plain (empty) ASCONF-ACK
+ */
+void
+sctp_handle_asconf(struct mbuf *m, unsigned int offset,
+    struct sctp_asconf_chunk *cp, struct sctp_tcb *stcb,
+    int first)
+{
+	struct sctp_association *asoc;
+	uint32_t serial_num;
+	struct mbuf *n, *m_ack, *m_result, *m_tail;
+	struct sctp_asconf_ack_chunk *ack_cp;
+	struct sctp_asconf_paramhdr *aph, *ack_aph;
+	struct sctp_ipv6addr_param *p_addr;
+	unsigned int asconf_limit;
+	int error = 0;		/* did an error occur? */
+
+	/* asconf param buffer */
+	uint8_t aparam_buf[SCTP_PARAM_BUFFER_SIZE];
+	struct sctp_asconf_ack *ack, *ack_next;
+
+	/* verify minimum length */
+	if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_asconf_chunk)) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "handle_asconf: chunk too small = %xh\n",
+		    ntohs(cp->ch.chunk_length));
+		return;
+	}
+	asoc = &stcb->asoc;
+	serial_num = ntohl(cp->serial_number);
+
+	if (compare_with_wrap(asoc->asconf_seq_in, serial_num, MAX_SEQ) ||
+	    serial_num == asoc->asconf_seq_in) {
+		/* got a duplicate ASCONF */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "handle_asconf: got duplicate serial number = %xh\n",
+		    serial_num);
+		return;
+	} else if (serial_num != (asoc->asconf_seq_in + 1)) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: incorrect serial number = %xh (expected next = %xh)\n",
+		    serial_num, asoc->asconf_seq_in + 1);
+		return;
+	}
+	/* it's the expected "next" sequence number, so process it */
+	asoc->asconf_seq_in = serial_num;	/* update sequence */
+	/* get length of all the param's in the ASCONF */
+	asconf_limit = offset + ntohs(cp->ch.chunk_length);
+	SCTPDBG(SCTP_DEBUG_ASCONF1,
+	    "handle_asconf: asconf_limit=%u, sequence=%xh\n",
+	    asconf_limit, serial_num);
+
+	if (first) {
+		/* delete old cache */
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: Now processing firstASCONF. Try to delte old cache\n");
+
+		ack = TAILQ_FIRST(&stcb->asoc.asconf_ack_sent);
+		while (ack != NULL) {
+			ack_next = TAILQ_NEXT(ack, next);
+			if (ack->serial_number == serial_num)
+				break;
+			SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: delete old(%u) < first(%u)\n",
+			    ack->serial_number, serial_num);
+			TAILQ_REMOVE(&stcb->asoc.asconf_ack_sent, ack, next);
+			if (ack->data != NULL) {
+				sctp_m_freem(ack->data);
+			}
+			SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asconf_ack), ack);
+			ack = ack_next;
+		}
+	}
+	m_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_ack_chunk), 0,
+	    M_DONTWAIT, 1, MT_DATA);
+	if (m_ack == NULL) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "handle_asconf: couldn't get mbuf!\n");
+		return;
+	}
+	m_tail = m_ack;		/* current reply chain's tail */
+
+	/* fill in ASCONF-ACK header */
+	ack_cp = mtod(m_ack, struct sctp_asconf_ack_chunk *);
+	ack_cp->ch.chunk_type = SCTP_ASCONF_ACK;
+	ack_cp->ch.chunk_flags = 0;
+	ack_cp->serial_number = htonl(serial_num);
+	/* set initial lengths (eg. just an ASCONF-ACK), ntohx at the end! */
+	SCTP_BUF_LEN(m_ack) = sizeof(struct sctp_asconf_ack_chunk);
+	ack_cp->ch.chunk_length = sizeof(struct sctp_asconf_ack_chunk);
+
+	/* skip the lookup address parameter */
+	offset += sizeof(struct sctp_asconf_chunk);
+	p_addr = (struct sctp_ipv6addr_param *)sctp_m_getptr(m, offset, sizeof(struct sctp_paramhdr), (uint8_t *) & aparam_buf);
+	if (p_addr == NULL) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "handle_asconf: couldn't get lookup addr!\n");
+		/* respond with a missing/invalid mandatory parameter error */
+		return;
+	}
+	/* param_length is already validated in process_control... */
+	offset += ntohs(p_addr->ph.param_length);	/* skip lookup addr */
+
+	/* get pointer to first asconf param in ASCONF-ACK */
+	ack_aph = (struct sctp_asconf_paramhdr *)(mtod(m_ack, caddr_t)+sizeof(struct sctp_asconf_ack_chunk));
+	if (ack_aph == NULL) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "Gak in asconf2\n");
+		return;
+	}
+	/* get pointer to first asconf param in ASCONF */
+	aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(m, offset, sizeof(struct sctp_asconf_paramhdr), (uint8_t *) & aparam_buf);
+	if (aph == NULL) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "Empty ASCONF received?\n");
+		goto send_reply;
+	}
+	/* process through all parameters */
+	while (aph != NULL) {
+		unsigned int param_length, param_type;
+
+		param_type = ntohs(aph->ph.param_type);
+		param_length = ntohs(aph->ph.param_length);
+		if (offset + param_length > asconf_limit) {
+			/* parameter goes beyond end of chunk! */
+			sctp_m_freem(m_ack);
+			return;
+		}
+		m_result = NULL;
+
+		if (param_length > sizeof(aparam_buf)) {
+			SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: param length (%u) larger than buffer size!\n", param_length);
+			sctp_m_freem(m_ack);
+			return;
+		}
+		if (param_length <= sizeof(struct sctp_paramhdr)) {
+			SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: param length (%u) too short\n", param_length);
+			sctp_m_freem(m_ack);
+		}
+		/* get the entire parameter */
+		aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(m, offset, param_length, aparam_buf);
+		if (aph == NULL) {
+			SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: couldn't get entire param\n");
+			sctp_m_freem(m_ack);
+			return;
+		}
+		switch (param_type) {
+		case SCTP_ADD_IP_ADDRESS:
+			asoc->peer_supports_asconf = 1;
+			m_result = sctp_process_asconf_add_ip(m, aph, stcb,
+			    error);
+			break;
+		case SCTP_DEL_IP_ADDRESS:
+			asoc->peer_supports_asconf = 1;
+			m_result = sctp_process_asconf_delete_ip(m, aph, stcb,
+			    error);
+			break;
+		case SCTP_ERROR_CAUSE_IND:
+			/* not valid in an ASCONF chunk */
+			break;
+		case SCTP_SET_PRIM_ADDR:
+			asoc->peer_supports_asconf = 1;
+			m_result = sctp_process_asconf_set_primary(m, aph,
+			    stcb, error);
+			break;
+		case SCTP_NAT_VTAGS:
+			SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: sees a NAT VTAG state parameter\n");
+			break;
+		case SCTP_SUCCESS_REPORT:
+			/* not valid in an ASCONF chunk */
+			break;
+		case SCTP_ULP_ADAPTATION:
+			/* FIX */
+			break;
+		default:
+			if ((param_type & 0x8000) == 0) {
+				/* Been told to STOP at this param */
+				asconf_limit = offset;
+				/*
+				 * FIX FIX - We need to call
+				 * sctp_arethere_unrecognized_parameters()
+				 * to get a operr and send it for any
+				 * param's with the 0x4000 bit set OR do it
+				 * here ourselves... note we still must STOP
+				 * if the 0x8000 bit is clear.
+				 */
+			}
+			/* unknown/invalid param type */
+			break;
+		}		/* switch */
+
+		/* add any (error) result to the reply mbuf chain */
+		if (m_result != NULL) {
+			SCTP_BUF_NEXT(m_tail) = m_result;
+			m_tail = m_result;
+			/* update lengths, make sure it's aligned too */
+			SCTP_BUF_LEN(m_result) = SCTP_SIZE32(SCTP_BUF_LEN(m_result));
+			ack_cp->ch.chunk_length += SCTP_BUF_LEN(m_result);
+			/* set flag to force success reports */
+			error = 1;
+		}
+		offset += SCTP_SIZE32(param_length);
+		/* update remaining ASCONF message length to process */
+		if (offset >= asconf_limit) {
+			/* no more data in the mbuf chain */
+			break;
+		}
+		/* get pointer to next asconf param */
+		aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(m, offset,
+		    sizeof(struct sctp_asconf_paramhdr),
+		    (uint8_t *) & aparam_buf);
+		if (aph == NULL) {
+			/* can't get an asconf paramhdr */
+			SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: can't get asconf param hdr!\n");
+			/* FIX ME - add error here... */
+		}
+	}
+
+send_reply:
+	ack_cp->ch.chunk_length = htons(ack_cp->ch.chunk_length);
+	/* save the ASCONF-ACK reply */
+	ack = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_asconf_ack),
+	    struct sctp_asconf_ack);
+	if (ack == NULL) {
+		sctp_m_freem(m_ack);
+		return;
+	}
+	ack->serial_number = serial_num;
+	ack->last_sent_to = NULL;
+	ack->data = m_ack;
+	ack->len = 0;
+	n = m_ack;
+	while (n) {
+		ack->len += SCTP_BUF_LEN(n);
+		n = SCTP_BUF_NEXT(n);
+	}
+	TAILQ_INSERT_TAIL(&stcb->asoc.asconf_ack_sent, ack, next);
+
+	/* see if last_control_chunk_from is set properly (use IP src addr) */
+	if (stcb->asoc.last_control_chunk_from == NULL) {
+		/*
+		 * this could happen if the source address was just newly
+		 * added
+		 */
+		struct ip *iph;
+		struct sctphdr *sh;
+		struct sockaddr_storage from_store;
+		struct sockaddr *from = (struct sockaddr *)&from_store;
+
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: looking up net for IP source address\n");
+		/* pullup already done, IP options already stripped */
+		iph = mtod(m, struct ip *);
+		sh = (struct sctphdr *)((caddr_t)iph + sizeof(*iph));
+		switch (iph->ip_v) {
+		case IPVERSION:
+			{
+				struct sockaddr_in *from4;
+
+				from4 = (struct sockaddr_in *)&from_store;
+				bzero(from4, sizeof(*from4));
+				from4->sin_family = AF_INET;
+				from4->sin_len = sizeof(struct sockaddr_in);
+				from4->sin_addr.s_addr = iph->ip_src.s_addr;
+				from4->sin_port = sh->src_port;
+				break;
+			}
+#ifdef INET6
+		case IPV6_VERSION >> 4:
+			{
+				struct ip6_hdr *ip6;
+				struct sockaddr_in6 *from6;
+
+				ip6 = mtod(m, struct ip6_hdr *);
+				from6 = (struct sockaddr_in6 *)&from_store;
+				bzero(from6, sizeof(*from6));
+				from6->sin6_family = AF_INET6;
+				from6->sin6_len = sizeof(struct sockaddr_in6);
+				from6->sin6_addr = ip6->ip6_src;
+				from6->sin6_port = sh->src_port;
+				/*
+				 * Get the scopes in properly to the sin6
+				 * addr's
+				 */
+				/* we probably don't need these operations */
+				(void)sa6_recoverscope(from6);
+				sa6_embedscope(from6,
+				    MODULE_GLOBAL(ip6_use_defzone));
+
+				break;
+			}
+#endif
+		default:
+			/* unknown address type */
+			from = NULL;
+		}
+		if (from != NULL) {
+			SCTPDBG(SCTP_DEBUG_ASCONF1, "Looking for IP source: ");
+			SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, from);
+			/* look up the from address */
+			stcb->asoc.last_control_chunk_from = sctp_findnet(stcb, from);
+#ifdef SCTP_DEBUG
+			if (stcb->asoc.last_control_chunk_from == NULL)
+				SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: IP source address not found?!\n");
+#endif
+		}
+	}
+}
+
+/*
+ * does the address match? returns 0 if not, 1 if so
+ */
+static uint32_t
+sctp_asconf_addr_match(struct sctp_asconf_addr *aa, struct sockaddr *sa)
+{
+#ifdef INET6
+	if (sa->sa_family == AF_INET6) {
+		/* IPv6 sa address */
+		/* XXX scopeid */
+		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
+
+		if ((aa->ap.addrp.ph.param_type == SCTP_IPV6_ADDRESS) &&
+		    (memcmp(&aa->ap.addrp.addr, &sin6->sin6_addr,
+		    sizeof(struct in6_addr)) == 0)) {
+			return (1);
+		}
+	} else
+#endif				/* INET6 */
+	if (sa->sa_family == AF_INET) {
+		/* IPv4 sa address */
+		struct sockaddr_in *sin = (struct sockaddr_in *)sa;
+
+		if ((aa->ap.addrp.ph.param_type == SCTP_IPV4_ADDRESS) &&
+		    (memcmp(&aa->ap.addrp.addr, &sin->sin_addr,
+		    sizeof(struct in_addr)) == 0)) {
+			return (1);
+		}
+	}
+	return (0);
+}
+
+/*
+ * does the address match? returns 0 if not, 1 if so
+ */
+static uint32_t
+sctp_addr_match(
+    struct sctp_ipv6addr_param *v6addr,
+    struct sockaddr *sa)
+{
+	uint16_t param_type, param_length;
+	struct sctp_ipv4addr_param *v4addr = (struct sctp_ipv4addr_param *)v6addr;
+
+#ifdef INET6
+	if (sa->sa_family == AF_INET6) {
+		/* IPv6 sa address */
+		/* XXX scopeid */
+		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
+
+		param_type = ntohs(v6addr->ph.param_type);
+		param_length = ntohs(v6addr->ph.param_length);
+
+		if ((param_type == SCTP_IPV6_ADDRESS) &&
+		    param_length == sizeof(struct sctp_ipv6addr_param) &&
+		    (memcmp(&v6addr->addr, &sin6->sin6_addr,
+		    sizeof(struct in6_addr)) == 0)) {
+			return (1);
+		}
+	}
+#endif
+	if (sa->sa_family == AF_INET) {
+		/* IPv4 sa address */
+		struct sockaddr_in *sin = (struct sockaddr_in *)sa;
+
+		param_type = ntohs(v4addr->ph.param_type);
+		param_length = ntohs(v4addr->ph.param_length);
+
+		if ((param_type == SCTP_IPV4_ADDRESS) &&
+		    param_length == sizeof(struct sctp_ipv4addr_param) &&
+		    (memcmp(&v4addr->addr, &sin->sin_addr,
+		    sizeof(struct in_addr)) == 0)) {
+			return (1);
+		}
+	}
+	return (0);
+}
+
+/*
+ * Cleanup for non-responded/OP ERR'd ASCONF
+ */
+void
+sctp_asconf_cleanup(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	/* mark peer as ASCONF incapable */
+	stcb->asoc.peer_supports_asconf = 0;
+	/*
+	 * clear out any existing asconfs going out
+	 */
+	sctp_timer_stop(SCTP_TIMER_TYPE_ASCONF, stcb->sctp_ep, stcb, net,
+	    SCTP_FROM_SCTP_ASCONF + SCTP_LOC_2);
+	stcb->asoc.asconf_seq_out_acked = stcb->asoc.asconf_seq_out;
+	/* remove the old ASCONF on our outbound queue */
+	sctp_toss_old_asconf(stcb);
+}
+
+/*
+ * cleanup any cached source addresses that may be topologically
+ * incorrect after a new address has been added to this interface.
+ */
+static void
+sctp_asconf_nets_cleanup(struct sctp_tcb *stcb, struct sctp_ifn *ifn)
+{
+	struct sctp_nets *net;
+
+	/*
+	 * Ideally, we want to only clear cached routes and source addresses
+	 * that are topologically incorrect.  But since there is no easy way
+	 * to know whether the newly added address on the ifn would cause a
+	 * routing change (i.e. a new egress interface would be chosen)
+	 * without doing a new routing lookup and source address selection,
+	 * we will (for now) just flush any cached route using a different
+	 * ifn (and cached source addrs) and let output re-choose them
+	 * during the next send on that net.
+	 */
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		/*
+		 * clear any cached route (and cached source address) if the
+		 * route's interface is NOT the same as the address change.
+		 * If it's the same interface, just clear the cached source
+		 * address.
+		 */
+		if (SCTP_ROUTE_HAS_VALID_IFN(&net->ro) &&
+		    ((ifn == NULL) ||
+		    (SCTP_GET_IF_INDEX_FROM_ROUTE(&net->ro) != ifn->ifn_index))) {
+			/* clear any cached route */
+			RTFREE(net->ro.ro_rt);
+			net->ro.ro_rt = NULL;
+		}
+		/* clear any cached source address */
+		if (net->src_addr_selected) {
+			sctp_free_ifa(net->ro._s_addr);
+			net->ro._s_addr = NULL;
+			net->src_addr_selected = 0;
+		}
+	}
+}
+
+
+void
+sctp_assoc_immediate_retrans(struct sctp_tcb *stcb, struct sctp_nets *dstnet)
+{
+	int error;
+
+	if (dstnet->dest_state & SCTP_ADDR_UNCONFIRMED) {
+		return;
+	}
+	if (stcb->asoc.deleted_primary == NULL) {
+		return;
+	}
+	if (!TAILQ_EMPTY(&stcb->asoc.sent_queue)) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "assoc_immediate_retrans: Deleted primary is ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, &stcb->asoc.deleted_primary->ro._l_addr.sa);
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "Current Primary is ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, &stcb->asoc.primary_destination->ro._l_addr.sa);
+		sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb,
+		    stcb->asoc.deleted_primary,
+		    SCTP_FROM_SCTP_TIMER + SCTP_LOC_8);
+		stcb->asoc.num_send_timers_up--;
+		if (stcb->asoc.num_send_timers_up < 0) {
+			stcb->asoc.num_send_timers_up = 0;
+		}
+		SCTP_TCB_LOCK_ASSERT(stcb);
+		error = sctp_t3rxt_timer(stcb->sctp_ep, stcb,
+		    stcb->asoc.deleted_primary);
+		if (error) {
+			SCTP_INP_DECR_REF(stcb->sctp_ep);
+			return;
+		}
+		SCTP_TCB_LOCK_ASSERT(stcb);
+#ifdef SCTP_AUDITING_ENABLED
+		sctp_auditing(4, stcb->sctp_ep, stcb, stcb->asoc.deleted_primary);
+#endif
+		sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
+		if ((stcb->asoc.num_send_timers_up == 0) &&
+		    (stcb->asoc.sent_queue_cnt > 0)) {
+			struct sctp_tmit_chunk *chk;
+
+			chk = TAILQ_FIRST(&stcb->asoc.sent_queue);
+			sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+			    stcb, chk->whoTo);
+		}
+	}
+	return;
+}
+
+static int
+    sctp_asconf_queue_mgmt(struct sctp_tcb *, struct sctp_ifa *, uint16_t);
+
+void
+sctp_net_immediate_retrans(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	struct sctp_tmit_chunk *chk;
+
+	SCTPDBG(SCTP_DEBUG_ASCONF1, "net_immediate_retrans: RTO is %d\n", net->RTO);
+	sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net,
+	    SCTP_FROM_SCTP_TIMER + SCTP_LOC_5);
+	stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net);
+	net->error_count = 0;
+	TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
+		if (chk->whoTo == net) {
+			if (chk->sent < SCTP_DATAGRAM_RESEND) {
+				chk->sent = SCTP_DATAGRAM_RESEND;
+				sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+				sctp_flight_size_decrease(chk);
+				sctp_total_flight_decrease(stcb, chk);
+				net->marked_retrans++;
+				stcb->asoc.marked_retrans++;
+			}
+		}
+	}
+	if (net->marked_retrans) {
+		sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
+	}
+}
+
+static void
+sctp_path_check_and_react(struct sctp_tcb *stcb, struct sctp_ifa *newifa)
+{
+	struct sctp_nets *net;
+	int addrnum, changed;
+
+	/*
+	 * If number of local valid addresses is 1, the valid address is
+	 * probably newly added address. Several valid addresses in this
+	 * association.  A source address may not be changed.  Additionally,
+	 * they can be configured on a same interface as "alias" addresses.
+	 * (by micchie)
+	 */
+	addrnum = sctp_local_addr_count(stcb);
+	SCTPDBG(SCTP_DEBUG_ASCONF1, "p_check_react(): %d local addresses\n",
+	    addrnum);
+	if (addrnum == 1) {
+		TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+			/* clear any cached route and source address */
+			if (net->ro.ro_rt) {
+				RTFREE(net->ro.ro_rt);
+				net->ro.ro_rt = NULL;
+			}
+			if (net->src_addr_selected) {
+				sctp_free_ifa(net->ro._s_addr);
+				net->ro._s_addr = NULL;
+				net->src_addr_selected = 0;
+			}
+			/* Retransmit unacknowledged DATA chunks immediately */
+			if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+			    SCTP_MOBILITY_FASTHANDOFF)) {
+				sctp_net_immediate_retrans(stcb, net);
+			}
+			/* also, SET PRIMARY is maybe already sent */
+		}
+		return;
+	}
+	/* Multiple local addresses exsist in the association.  */
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		/* clear any cached route and source address */
+		if (net->ro.ro_rt) {
+			RTFREE(net->ro.ro_rt);
+			net->ro.ro_rt = NULL;
+		}
+		if (net->src_addr_selected) {
+			sctp_free_ifa(net->ro._s_addr);
+			net->ro._s_addr = NULL;
+			net->src_addr_selected = 0;
+		}
+		/*
+		 * Check if the nexthop is corresponding to the new address.
+		 * If the new address is corresponding to the current
+		 * nexthop, the path will be changed. If the new address is
+		 * NOT corresponding to the current nexthop, the path will
+		 * not be changed.
+		 */
+		SCTP_RTALLOC((sctp_route_t *) & net->ro,
+		    stcb->sctp_ep->def_vrf_id);
+		if (net->ro.ro_rt == NULL)
+			continue;
+
+		changed = 0;
+		if (net->ro._l_addr.sa.sa_family == AF_INET) {
+			if (sctp_v4src_match_nexthop(newifa, (sctp_route_t *) & net->ro))
+				changed = 1;
+		}
+#ifdef INET6
+		if (net->ro._l_addr.sa.sa_family == AF_INET6) {
+			if (sctp_v6src_match_nexthop(
+			    &newifa->address.sin6, (sctp_route_t *) & net->ro))
+				changed = 1;
+		}
+#endif
+		/*
+		 * if the newly added address does not relate routing
+		 * information, we skip.
+		 */
+		if (changed == 0)
+			continue;
+		/* Retransmit unacknowledged DATA chunks immediately */
+		if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+		    SCTP_MOBILITY_FASTHANDOFF)) {
+			sctp_net_immediate_retrans(stcb, net);
+		}
+		/* Send SET PRIMARY for this new address */
+		if (net == stcb->asoc.primary_destination) {
+			(void)sctp_asconf_queue_mgmt(stcb, newifa,
+			    SCTP_SET_PRIM_ADDR);
+		}
+	}
+}
+
+/*
+ * process an ADD/DELETE IP ack from peer.
+ * addr: corresponding sctp_ifa to the address being added/deleted.
+ * type: SCTP_ADD_IP_ADDRESS or SCTP_DEL_IP_ADDRESS.
+ * flag: 1=success, 0=failure.
+ */
+static void
+sctp_asconf_addr_mgmt_ack(struct sctp_tcb *stcb, struct sctp_ifa *addr,
+    uint16_t type, uint32_t flag)
+{
+	/*
+	 * do the necessary asoc list work- if we get a failure indication,
+	 * leave the address on the assoc's restricted list.  If we get a
+	 * success indication, remove the address from the restricted list.
+	 */
+	/*
+	 * Note: this will only occur for ADD_IP_ADDRESS, since
+	 * DEL_IP_ADDRESS is never actually added to the list...
+	 */
+	if (flag) {
+		/* success case, so remove from the restricted list */
+		sctp_del_local_addr_restricted(stcb, addr);
+
+		if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+		    SCTP_MOBILITY_BASE) ||
+		    sctp_is_mobility_feature_on(stcb->sctp_ep,
+		    SCTP_MOBILITY_FASTHANDOFF)) {
+			sctp_path_check_and_react(stcb, addr);
+			return;
+		}
+		/* clear any cached/topologically incorrect source addresses */
+		sctp_asconf_nets_cleanup(stcb, addr->ifn_p);
+	}
+	/* else, leave it on the list */
+}
+
+/*
+ * add an asconf add/delete/set primary IP address parameter to the queue.
+ * type = SCTP_ADD_IP_ADDRESS, SCTP_DEL_IP_ADDRESS, SCTP_SET_PRIM_ADDR.
+ * returns 0 if queued, -1 if not queued/removed.
+ * NOTE: if adding, but a delete for the same address is already scheduled
+ * (and not yet sent out), simply remove it from queue.  Same for deleting
+ * an address already scheduled for add.  If a duplicate operation is found,
+ * ignore the new one.
+ */
+static int
+sctp_asconf_queue_mgmt(struct sctp_tcb *stcb, struct sctp_ifa *ifa,
+    uint16_t type)
+{
+	struct sctp_asconf_addr *aa, *aa_next;
+	struct sockaddr *sa;
+
+	/* make sure the request isn't already in the queue */
+	for (aa = TAILQ_FIRST(&stcb->asoc.asconf_queue); aa != NULL;
+	    aa = aa_next) {
+		aa_next = TAILQ_NEXT(aa, next);
+		/* address match? */
+		if (sctp_asconf_addr_match(aa, &ifa->address.sa) == 0)
+			continue;
+		/*
+		 * is the request already in queue but not sent? pass the
+		 * request already sent in order to resolve the following
+		 * case: 1. arrival of ADD, then sent 2. arrival of DEL. we
+		 * can't remove the ADD request already sent 3. arrival of
+		 * ADD
+		 */
+		if (aa->ap.aph.ph.param_type == type && aa->sent == 0) {
+			return (-1);
+		}
+		/* is the negative request already in queue, and not sent */
+		if ((aa->sent == 0) && (type == SCTP_ADD_IP_ADDRESS) &&
+		    (aa->ap.aph.ph.param_type == SCTP_DEL_IP_ADDRESS)) {
+			/* add requested, delete already queued */
+			TAILQ_REMOVE(&stcb->asoc.asconf_queue, aa, next);
+			/* remove the ifa from the restricted list */
+			sctp_del_local_addr_restricted(stcb, ifa);
+			/* free the asconf param */
+			SCTP_FREE(aa, SCTP_M_ASC_ADDR);
+			SCTPDBG(SCTP_DEBUG_ASCONF2, "asconf_queue_mgmt: add removes queued entry\n");
+			return (-1);
+		}
+		if ((aa->sent == 0) && (type == SCTP_DEL_IP_ADDRESS) &&
+		    (aa->ap.aph.ph.param_type == SCTP_ADD_IP_ADDRESS)) {
+			/* delete requested, add already queued */
+			TAILQ_REMOVE(&stcb->asoc.asconf_queue, aa, next);
+			/* remove the aa->ifa from the restricted list */
+			sctp_del_local_addr_restricted(stcb, aa->ifa);
+			/* free the asconf param */
+			SCTP_FREE(aa, SCTP_M_ASC_ADDR);
+			SCTPDBG(SCTP_DEBUG_ASCONF2, "asconf_queue_mgmt: delete removes queued entry\n");
+			return (-1);
+		}
+	}			/* for each aa */
+
+	/* adding new request to the queue */
+	SCTP_MALLOC(aa, struct sctp_asconf_addr *, sizeof(*aa),
+	    SCTP_M_ASC_ADDR);
+	if (aa == NULL) {
+		/* didn't get memory */
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "asconf_queue_mgmt: failed to get memory!\n");
+		return (-1);
+	}
+	aa->special_del = 0;
+	/* fill in asconf address parameter fields */
+	/* top level elements are "networked" during send */
+	aa->ap.aph.ph.param_type = type;
+	aa->ifa = ifa;
+	atomic_add_int(&ifa->refcount, 1);
+	/* correlation_id filled in during send routine later... */
+	if (ifa->address.sa.sa_family == AF_INET6) {
+		/* IPv6 address */
+		struct sockaddr_in6 *sin6;
+
+		sin6 = (struct sockaddr_in6 *)&ifa->address.sa;
+		sa = (struct sockaddr *)sin6;
+		aa->ap.addrp.ph.param_type = SCTP_IPV6_ADDRESS;
+		aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv6addr_param));
+		aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) +
+		    sizeof(struct sctp_ipv6addr_param);
+		memcpy(&aa->ap.addrp.addr, &sin6->sin6_addr,
+		    sizeof(struct in6_addr));
+	} else if (ifa->address.sa.sa_family == AF_INET) {
+		/* IPv4 address */
+		struct sockaddr_in *sin;
+
+		sin = (struct sockaddr_in *)&ifa->address.sa;
+		sa = (struct sockaddr *)sin;
+		aa->ap.addrp.ph.param_type = SCTP_IPV4_ADDRESS;
+		aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv4addr_param));
+		aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) +
+		    sizeof(struct sctp_ipv4addr_param);
+		memcpy(&aa->ap.addrp.addr, &sin->sin_addr,
+		    sizeof(struct in_addr));
+	} else {
+		/* invalid family! */
+		SCTP_FREE(aa, SCTP_M_ASC_ADDR);
+		sctp_free_ifa(ifa);
+		return (-1);
+	}
+	aa->sent = 0;		/* clear sent flag */
+
+	TAILQ_INSERT_TAIL(&stcb->asoc.asconf_queue, aa, next);
+#ifdef SCTP_DEBUG
+	if (SCTP_BASE_SYSCTL(sctp_debug_on) && SCTP_DEBUG_ASCONF2) {
+		if (type == SCTP_ADD_IP_ADDRESS) {
+			SCTP_PRINTF("asconf_queue_mgmt: inserted asconf ADD_IP_ADDRESS: ");
+			SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, sa);
+		} else if (type == SCTP_DEL_IP_ADDRESS) {
+			SCTP_PRINTF("asconf_queue_mgmt: appended asconf DEL_IP_ADDRESS: ");
+			SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, sa);
+		} else {
+			SCTP_PRINTF("asconf_queue_mgmt: appended asconf SET_PRIM_ADDR: ");
+			SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, sa);
+		}
+	}
+#endif
+
+	return (0);
+}
+
+
+/*
+ * add an asconf operation for the given ifa and type.
+ * type = SCTP_ADD_IP_ADDRESS, SCTP_DEL_IP_ADDRESS, SCTP_SET_PRIM_ADDR.
+ * returns 0 if completed, -1 if not completed, 1 if immediate send is
+ * advisable.
+ */
+static int
+sctp_asconf_queue_add(struct sctp_tcb *stcb, struct sctp_ifa *ifa,
+    uint16_t type)
+{
+	uint32_t status;
+	int pending_delete_queued = 0;
+
+	/* see if peer supports ASCONF */
+	if (stcb->asoc.peer_supports_asconf == 0) {
+		return (-1);
+	}
+	/*
+	 * if this is deleting the last address from the assoc, mark it as
+	 * pending.
+	 */
+	if ((type == SCTP_DEL_IP_ADDRESS) && !stcb->asoc.asconf_del_pending &&
+	    (sctp_local_addr_count(stcb) < 2)) {
+		/* set the pending delete info only */
+		stcb->asoc.asconf_del_pending = 1;
+		stcb->asoc.asconf_addr_del_pending = ifa;
+		atomic_add_int(&ifa->refcount, 1);
+		SCTPDBG(SCTP_DEBUG_ASCONF2,
+		    "asconf_queue_add: mark delete last address pending\n");
+		return (-1);
+	}
+	/* queue an asconf parameter */
+	status = sctp_asconf_queue_mgmt(stcb, ifa, type);
+
+	/*
+	 * if this is an add, and there is a delete also pending (i.e. the
+	 * last local address is being changed), queue the pending delete
+	 * too.
+	 */
+	if ((type == SCTP_ADD_IP_ADDRESS) && stcb->asoc.asconf_del_pending && (status == 0)) {
+		/* queue in the pending delete */
+		if (sctp_asconf_queue_mgmt(stcb,
+		    stcb->asoc.asconf_addr_del_pending,
+		    SCTP_DEL_IP_ADDRESS) == 0) {
+			SCTPDBG(SCTP_DEBUG_ASCONF2, "asconf_queue_add: queing pending delete\n");
+			pending_delete_queued = 1;
+			/* clear out the pending delete info */
+			stcb->asoc.asconf_del_pending = 0;
+			sctp_free_ifa(stcb->asoc.asconf_addr_del_pending);
+			stcb->asoc.asconf_addr_del_pending = NULL;
+		}
+	}
+	if (pending_delete_queued) {
+		struct sctp_nets *net;
+
+		/*
+		 * since we know that the only/last address is now being
+		 * changed in this case, reset the cwnd/rto on all nets to
+		 * start as a new address and path.  Also clear the error
+		 * counts to give the assoc the best chance to complete the
+		 * address change.
+		 */
+		TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+			stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb,
+			    net);
+			net->RTO = 0;
+			net->error_count = 0;
+		}
+		stcb->asoc.overall_error_count = 0;
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
+			sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+			    stcb->asoc.overall_error_count,
+			    0,
+			    SCTP_FROM_SCTP_ASCONF,
+			    __LINE__);
+		}
+		/* queue in an advisory set primary too */
+		(void)sctp_asconf_queue_mgmt(stcb, ifa, SCTP_SET_PRIM_ADDR);
+		/* let caller know we should send this out immediately */
+		status = 1;
+	}
+	return (status);
+}
+
+/*-
+ * add an asconf delete IP address parameter to the queue by sockaddr and
+ * possibly with no sctp_ifa available.  This is only called by the routine
+ * that checks the addresses in an INIT-ACK against the current address list.
+ * returns 0 if completed, non-zero if not completed.
+ * NOTE: if an add is already scheduled (and not yet sent out), simply
+ * remove it from queue.  If a duplicate operation is found, ignore the
+ * new one.
+ */
+static int
+sctp_asconf_queue_sa_delete(struct sctp_tcb *stcb, struct sockaddr *sa)
+{
+	struct sctp_ifa *ifa;
+	struct sctp_asconf_addr *aa, *aa_next;
+	uint32_t vrf_id;
+
+	if (stcb == NULL) {
+		return (-1);
+	}
+	/* see if peer supports ASCONF */
+	if (stcb->asoc.peer_supports_asconf == 0) {
+		return (-1);
+	}
+	/* make sure the request isn't already in the queue */
+	for (aa = TAILQ_FIRST(&stcb->asoc.asconf_queue); aa != NULL;
+	    aa = aa_next) {
+		aa_next = TAILQ_NEXT(aa, next);
+		/* address match? */
+		if (sctp_asconf_addr_match(aa, sa) == 0)
+			continue;
+		/* is the request already in queue (sent or not) */
+		if (aa->ap.aph.ph.param_type == SCTP_DEL_IP_ADDRESS) {
+			return (-1);
+		}
+		/* is the negative request already in queue, and not sent */
+		if (aa->sent == 1)
+			continue;
+		if (aa->ap.aph.ph.param_type == SCTP_ADD_IP_ADDRESS) {
+			/* add already queued, so remove existing entry */
+			TAILQ_REMOVE(&stcb->asoc.asconf_queue, aa, next);
+			sctp_del_local_addr_restricted(stcb, aa->ifa);
+			/* free the entry */
+			SCTP_FREE(aa, SCTP_M_ASC_ADDR);
+			return (-1);
+		}
+	}			/* for each aa */
+
+	/* find any existing ifa-- NOTE ifa CAN be allowed to be NULL */
+	if (stcb) {
+		vrf_id = stcb->asoc.vrf_id;
+	} else {
+		vrf_id = SCTP_DEFAULT_VRFID;
+	}
+	ifa = sctp_find_ifa_by_addr(sa, vrf_id, SCTP_ADDR_NOT_LOCKED);
+
+	/* adding new request to the queue */
+	SCTP_MALLOC(aa, struct sctp_asconf_addr *, sizeof(*aa),
+	    SCTP_M_ASC_ADDR);
+	if (aa == NULL) {
+		/* didn't get memory */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "sctp_asconf_queue_sa_delete: failed to get memory!\n");
+		return (-1);
+	}
+	aa->special_del = 0;
+	/* fill in asconf address parameter fields */
+	/* top level elements are "networked" during send */
+	aa->ap.aph.ph.param_type = SCTP_DEL_IP_ADDRESS;
+	aa->ifa = ifa;
+	if (ifa)
+		atomic_add_int(&ifa->refcount, 1);
+	/* correlation_id filled in during send routine later... */
+	if (sa->sa_family == AF_INET6) {
+		/* IPv6 address */
+		struct sockaddr_in6 *sin6;
+
+		sin6 = (struct sockaddr_in6 *)sa;
+		aa->ap.addrp.ph.param_type = SCTP_IPV6_ADDRESS;
+		aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv6addr_param));
+		aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) + sizeof(struct sctp_ipv6addr_param);
+		memcpy(&aa->ap.addrp.addr, &sin6->sin6_addr,
+		    sizeof(struct in6_addr));
+	} else if (sa->sa_family == AF_INET) {
+		/* IPv4 address */
+		struct sockaddr_in *sin = (struct sockaddr_in *)sa;
+
+		aa->ap.addrp.ph.param_type = SCTP_IPV4_ADDRESS;
+		aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv4addr_param));
+		aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) + sizeof(struct sctp_ipv4addr_param);
+		memcpy(&aa->ap.addrp.addr, &sin->sin_addr,
+		    sizeof(struct in_addr));
+	} else {
+		/* invalid family! */
+		SCTP_FREE(aa, SCTP_M_ASC_ADDR);
+		if (ifa)
+			sctp_free_ifa(ifa);
+		return (-1);
+	}
+	aa->sent = 0;		/* clear sent flag */
+
+	/* delete goes to the back of the queue */
+	TAILQ_INSERT_TAIL(&stcb->asoc.asconf_queue, aa, next);
+
+	/* sa_ignore MEMLEAK {memory is put on the tailq} */
+	return (0);
+}
+
+/*
+ * find a specific asconf param on our "sent" queue
+ */
+static struct sctp_asconf_addr *
+sctp_asconf_find_param(struct sctp_tcb *stcb, uint32_t correlation_id)
+{
+	struct sctp_asconf_addr *aa;
+
+	TAILQ_FOREACH(aa, &stcb->asoc.asconf_queue, next) {
+		if (aa->ap.aph.correlation_id == correlation_id &&
+		    aa->sent == 1) {
+			/* found it */
+			return (aa);
+		}
+	}
+	/* didn't find it */
+	return (NULL);
+}
+
+/*
+ * process an SCTP_ERROR_CAUSE_IND for a ASCONF-ACK parameter and do
+ * notifications based on the error response
+ */
+static void
+sctp_asconf_process_error(struct sctp_tcb *stcb,
+    struct sctp_asconf_paramhdr *aph)
+{
+	struct sctp_error_cause *eh;
+	struct sctp_paramhdr *ph;
+	uint16_t param_type;
+	uint16_t error_code;
+
+	eh = (struct sctp_error_cause *)(aph + 1);
+	ph = (struct sctp_paramhdr *)(eh + 1);
+	/* validate lengths */
+	if (htons(eh->length) + sizeof(struct sctp_error_cause) >
+	    htons(aph->ph.param_length)) {
+		/* invalid error cause length */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "asconf_process_error: cause element too long\n");
+		return;
+	}
+	if (htons(ph->param_length) + sizeof(struct sctp_paramhdr) >
+	    htons(eh->length)) {
+		/* invalid included TLV length */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "asconf_process_error: included TLV too long\n");
+		return;
+	}
+	/* which error code ? */
+	error_code = ntohs(eh->code);
+	param_type = ntohs(aph->ph.param_type);
+	/* FIX: this should go back up the REMOTE_ERROR ULP notify */
+	switch (error_code) {
+	case SCTP_CAUSE_RESOURCE_SHORTAGE:
+		/* we allow ourselves to "try again" for this error */
+		break;
+	default:
+		/* peer can't handle it... */
+		switch (param_type) {
+		case SCTP_ADD_IP_ADDRESS:
+		case SCTP_DEL_IP_ADDRESS:
+			stcb->asoc.peer_supports_asconf = 0;
+			break;
+		case SCTP_SET_PRIM_ADDR:
+			stcb->asoc.peer_supports_asconf = 0;
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+/*
+ * process an asconf queue param.
+ * aparam: parameter to process, will be removed from the queue.
+ * flag: 1=success case, 0=failure case
+ */
+static void
+sctp_asconf_process_param_ack(struct sctp_tcb *stcb,
+    struct sctp_asconf_addr *aparam, uint32_t flag)
+{
+	uint16_t param_type;
+
+	/* process this param */
+	param_type = aparam->ap.aph.ph.param_type;
+	switch (param_type) {
+	case SCTP_ADD_IP_ADDRESS:
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_param_ack: added IP address\n");
+		sctp_asconf_addr_mgmt_ack(stcb, aparam->ifa, param_type, flag);
+		break;
+	case SCTP_DEL_IP_ADDRESS:
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_param_ack: deleted IP address\n");
+		/* nothing really to do... lists already updated */
+		break;
+	case SCTP_SET_PRIM_ADDR:
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_param_ack: set primary IP address\n");
+		/* nothing to do... peer may start using this addr */
+		if (flag == 0)
+			stcb->asoc.peer_supports_asconf = 0;
+		break;
+	default:
+		/* should NEVER happen */
+		break;
+	}
+
+	/* remove the param and free it */
+	TAILQ_REMOVE(&stcb->asoc.asconf_queue, aparam, next);
+	if (aparam->ifa)
+		sctp_free_ifa(aparam->ifa);
+	SCTP_FREE(aparam, SCTP_M_ASC_ADDR);
+}
+
+/*
+ * cleanup from a bad asconf ack parameter
+ */
+static void
+sctp_asconf_ack_clear(struct sctp_tcb *stcb)
+{
+	/* assume peer doesn't really know how to do asconfs */
+	stcb->asoc.peer_supports_asconf = 0;
+	/* XXX we could free the pending queue here */
+}
+
+void
+sctp_handle_asconf_ack(struct mbuf *m, int offset,
+    struct sctp_asconf_ack_chunk *cp, struct sctp_tcb *stcb,
+    struct sctp_nets *net, int *abort_no_unlock)
+{
+	struct sctp_association *asoc;
+	uint32_t serial_num;
+	uint16_t ack_length;
+	struct sctp_asconf_paramhdr *aph;
+	struct sctp_asconf_addr *aa, *aa_next;
+	uint32_t last_error_id = 0;	/* last error correlation id */
+	uint32_t id;
+	struct sctp_asconf_addr *ap;
+
+	/* asconf param buffer */
+	uint8_t aparam_buf[SCTP_PARAM_BUFFER_SIZE];
+
+	/* verify minimum length */
+	if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_asconf_ack_chunk)) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "handle_asconf_ack: chunk too small = %xh\n",
+		    ntohs(cp->ch.chunk_length));
+		return;
+	}
+	asoc = &stcb->asoc;
+	serial_num = ntohl(cp->serial_number);
+
+	/*
+	 * NOTE: we may want to handle this differently- currently, we will
+	 * abort when we get an ack for the expected serial number + 1 (eg.
+	 * we didn't send it), process an ack normally if it is the expected
+	 * serial number, and re-send the previous ack for *ALL* other
+	 * serial numbers
+	 */
+
+	/*
+	 * if the serial number is the next expected, but I didn't send it,
+	 * abort the asoc, since someone probably just hijacked us...
+	 */
+	if (serial_num == (asoc->asconf_seq_out + 1)) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf_ack: got unexpected next serial number! Aborting asoc!\n");
+		sctp_abort_an_association(stcb->sctp_ep, stcb,
+		    SCTP_CAUSE_ILLEGAL_ASCONF_ACK, NULL, SCTP_SO_NOT_LOCKED);
+		*abort_no_unlock = 1;
+		return;
+	}
+	if (serial_num != asoc->asconf_seq_out_acked + 1) {
+		/* got a duplicate/unexpected ASCONF-ACK */
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf_ack: got duplicate/unexpected serial number = %xh (expected = %xh)\n",
+		    serial_num, asoc->asconf_seq_out_acked + 1);
+		return;
+	}
+	if (serial_num == asoc->asconf_seq_out - 1) {
+		/* stop our timer */
+		sctp_timer_stop(SCTP_TIMER_TYPE_ASCONF, stcb->sctp_ep, stcb, net,
+		    SCTP_FROM_SCTP_ASCONF + SCTP_LOC_3);
+	}
+	/* process the ASCONF-ACK contents */
+	ack_length = ntohs(cp->ch.chunk_length) -
+	    sizeof(struct sctp_asconf_ack_chunk);
+	offset += sizeof(struct sctp_asconf_ack_chunk);
+	/* process through all parameters */
+	while (ack_length >= sizeof(struct sctp_asconf_paramhdr)) {
+		unsigned int param_length, param_type;
+
+		/* get pointer to next asconf parameter */
+		aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(m, offset,
+		    sizeof(struct sctp_asconf_paramhdr), aparam_buf);
+		if (aph == NULL) {
+			/* can't get an asconf paramhdr */
+			sctp_asconf_ack_clear(stcb);
+			return;
+		}
+		param_type = ntohs(aph->ph.param_type);
+		param_length = ntohs(aph->ph.param_length);
+		if (param_length > ack_length) {
+			sctp_asconf_ack_clear(stcb);
+			return;
+		}
+		if (param_length < sizeof(struct sctp_paramhdr)) {
+			sctp_asconf_ack_clear(stcb);
+			return;
+		}
+		/* get the complete parameter... */
+		if (param_length > sizeof(aparam_buf)) {
+			SCTPDBG(SCTP_DEBUG_ASCONF1,
+			    "param length (%u) larger than buffer size!\n", param_length);
+			sctp_asconf_ack_clear(stcb);
+			return;
+		}
+		aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(m, offset, param_length, aparam_buf);
+		if (aph == NULL) {
+			sctp_asconf_ack_clear(stcb);
+			return;
+		}
+		/* correlation_id is transparent to peer, no ntohl needed */
+		id = aph->correlation_id;
+
+		switch (param_type) {
+		case SCTP_ERROR_CAUSE_IND:
+			last_error_id = id;
+			/* find the corresponding asconf param in our queue */
+			ap = sctp_asconf_find_param(stcb, id);
+			if (ap == NULL) {
+				/* hmm... can't find this in our queue! */
+				break;
+			}
+			/* process the parameter, failed flag */
+			sctp_asconf_process_param_ack(stcb, ap, 0);
+			/* process the error response */
+			sctp_asconf_process_error(stcb, aph);
+			break;
+		case SCTP_SUCCESS_REPORT:
+			/* find the corresponding asconf param in our queue */
+			ap = sctp_asconf_find_param(stcb, id);
+			if (ap == NULL) {
+				/* hmm... can't find this in our queue! */
+				break;
+			}
+			/* process the parameter, success flag */
+			sctp_asconf_process_param_ack(stcb, ap, 1);
+			break;
+		default:
+			break;
+		}		/* switch */
+
+		/* update remaining ASCONF-ACK message length to process */
+		ack_length -= SCTP_SIZE32(param_length);
+		if (ack_length <= 0) {
+			/* no more data in the mbuf chain */
+			break;
+		}
+		offset += SCTP_SIZE32(param_length);
+	}			/* while */
+
+	/*
+	 * if there are any "sent" params still on the queue, these are
+	 * implicitly "success", or "failed" (if we got an error back) ...
+	 * so process these appropriately
+	 * 
+	 * we assume that the correlation_id's are monotonically increasing
+	 * beginning from 1 and that we don't have *that* many outstanding
+	 * at any given time
+	 */
+	if (last_error_id == 0)
+		last_error_id--;/* set to "max" value */
+	for (aa = TAILQ_FIRST(&stcb->asoc.asconf_queue); aa != NULL;
+	    aa = aa_next) {
+		aa_next = TAILQ_NEXT(aa, next);
+		if (aa->sent == 1) {
+			/*
+			 * implicitly successful or failed if correlation_id
+			 * < last_error_id, then success else, failure
+			 */
+			if (aa->ap.aph.correlation_id < last_error_id)
+				sctp_asconf_process_param_ack(stcb, aa, 1);
+			else
+				sctp_asconf_process_param_ack(stcb, aa, 0);
+		} else {
+			/*
+			 * since we always process in order (FIFO queue) if
+			 * we reach one that hasn't been sent, the rest
+			 * should not have been sent either. so, we're
+			 * done...
+			 */
+			break;
+		}
+	}
+
+	/* update the next sequence number to use */
+	asoc->asconf_seq_out_acked++;
+	/* remove the old ASCONF on our outbound queue */
+	sctp_toss_old_asconf(stcb);
+	if (!TAILQ_EMPTY(&stcb->asoc.asconf_queue)) {
+#ifdef SCTP_TIMER_BASED_ASCONF
+		/* we have more params, so restart our timer */
+		sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, stcb->sctp_ep,
+		    stcb, net);
+#else
+		/* we have more params, so send out more */
+		sctp_send_asconf(stcb, net, SCTP_ADDR_NOT_LOCKED);
+#endif
+	}
+}
+
+#ifdef INET6
+static uint32_t
+sctp_is_scopeid_in_nets(struct sctp_tcb *stcb, struct sockaddr *sa)
+{
+	struct sockaddr_in6 *sin6, *net6;
+	struct sctp_nets *net;
+
+	if (sa->sa_family != AF_INET6) {
+		/* wrong family */
+		return (0);
+	}
+	sin6 = (struct sockaddr_in6 *)sa;
+	if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) == 0) {
+		/* not link local address */
+		return (0);
+	}
+	/* hunt through our destination nets list for this scope_id */
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		if (((struct sockaddr *)(&net->ro._l_addr))->sa_family !=
+		    AF_INET6)
+			continue;
+		net6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+		if (IN6_IS_ADDR_LINKLOCAL(&net6->sin6_addr) == 0)
+			continue;
+		if (sctp_is_same_scope(sin6, net6)) {
+			/* found one */
+			return (1);
+		}
+	}
+	/* didn't find one */
+	return (0);
+}
+
+#endif
+
+/*
+ * address management functions
+ */
+static void
+sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_ifa *ifa, uint16_t type, int addr_locked)
+{
+	int status;
+
+
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0 &&
+	    sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DO_ASCONF)) {
+		/* subset bound, no ASCONF allowed case, so ignore */
+		return;
+	}
+	/*
+	 * note: we know this is not the subset bound, no ASCONF case eg.
+	 * this is boundall or subset bound w/ASCONF allowed
+	 */
+
+	/* first, make sure it's a good address family */
+	if (ifa->address.sa.sa_family != AF_INET6 &&
+	    ifa->address.sa.sa_family != AF_INET) {
+		return;
+	}
+	/* make sure we're "allowed" to add this type of addr */
+	if (ifa->address.sa.sa_family == AF_INET6) {
+		/* invalid if we're not a v6 endpoint */
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0)
+			return;
+		/* is the v6 addr really valid ? */
+		if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+			return;
+		}
+	}
+	/* put this address on the "pending/do not use yet" list */
+	sctp_add_local_addr_restricted(stcb, ifa);
+	/*
+	 * check address scope if address is out of scope, don't queue
+	 * anything... note: this would leave the address on both inp and
+	 * asoc lists
+	 */
+	switch (ifa->address.sa.sa_family) {
+#ifdef INET6
+	case AF_INET6:
+		{
+			struct sockaddr_in6 *sin6;
+
+			sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+				/* we skip unspecifed addresses */
+				return;
+			}
+			if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+				if (stcb->asoc.local_scope == 0) {
+					return;
+				}
+				/* is it the right link local scope? */
+				if (sctp_is_scopeid_in_nets(stcb, &ifa->address.sa) == 0) {
+					return;
+				}
+			}
+			if (stcb->asoc.site_scope == 0 &&
+			    IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) {
+				return;
+			}
+			break;
+		}
+#endif
+	case AF_INET:
+		{
+			struct sockaddr_in *sin;
+			struct in6pcb *inp6;
+
+			inp6 = (struct in6pcb *)&inp->ip_inp.inp;
+			/* invalid if we are a v6 only endpoint */
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+			    SCTP_IPV6_V6ONLY(inp6))
+				return;
+
+			sin = (struct sockaddr_in *)&ifa->address.sa;
+			if (sin->sin_addr.s_addr == 0) {
+				/* we skip unspecifed addresses */
+				return;
+			}
+			if (stcb->asoc.ipv4_local_scope == 0 &&
+			    IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
+				return;
+			}
+			break;
+		}
+	default:
+		/* else, not AF_INET or AF_INET6, so skip */
+		return;
+	}
+
+	/* queue an asconf for this address add/delete */
+	if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF)) {
+		/* does the peer do asconf? */
+		if (stcb->asoc.peer_supports_asconf) {
+			/* queue an asconf for this addr */
+			status = sctp_asconf_queue_add(stcb, ifa, type);
+
+			/*
+			 * if queued ok, and in the open state, send out the
+			 * ASCONF.  If in the non-open state, these will be
+			 * sent when the state goes open.
+			 */
+			if (status == 0 &&
+			    SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+#ifdef SCTP_TIMER_BASED_ASCONF
+				sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp,
+				    stcb, stcb->asoc.primary_destination);
+#else
+				sctp_send_asconf(stcb, stcb->asoc.primary_destination,
+				    addr_locked);
+#endif
+			}
+		}
+	}
+}
+
+
+int
+sctp_asconf_iterator_ep(struct sctp_inpcb *inp, void *ptr, uint32_t val)
+{
+	struct sctp_asconf_iterator *asc;
+	struct sctp_ifa *ifa;
+	struct sctp_laddr *l;
+	int cnt_invalid = 0;
+
+	asc = (struct sctp_asconf_iterator *)ptr;
+	LIST_FOREACH(l, &asc->list_of_work, sctp_nxt_addr) {
+		ifa = l->ifa;
+		if (ifa->address.sa.sa_family == AF_INET6) {
+			/* invalid if we're not a v6 endpoint */
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
+				cnt_invalid++;
+				if (asc->cnt == cnt_invalid)
+					return (1);
+				else
+					continue;
+			}
+		} else if (ifa->address.sa.sa_family == AF_INET) {
+			/* invalid if we are a v6 only endpoint */
+			struct in6pcb *inp6;
+
+			inp6 = (struct in6pcb *)&inp->ip_inp.inp;
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+			    SCTP_IPV6_V6ONLY(inp6)) {
+				cnt_invalid++;
+				if (asc->cnt == cnt_invalid)
+					return (1);
+				else
+					continue;
+			}
+		} else {
+			/* invalid address family */
+			cnt_invalid++;
+			if (asc->cnt == cnt_invalid)
+				return (1);
+			else
+				continue;
+		}
+	}
+	return (0);
+}
+
+static int
+sctp_asconf_iterator_ep_end(struct sctp_inpcb *inp, void *ptr, uint32_t val)
+{
+	struct sctp_ifa *ifa;
+	struct sctp_asconf_iterator *asc;
+	struct sctp_laddr *laddr, *nladdr, *l;
+
+	/* Only for specific case not bound all */
+	asc = (struct sctp_asconf_iterator *)ptr;
+	LIST_FOREACH(l, &asc->list_of_work, sctp_nxt_addr) {
+		ifa = l->ifa;
+		if (l->action == SCTP_ADD_IP_ADDRESS) {
+			LIST_FOREACH(laddr, &inp->sctp_addr_list,
+			    sctp_nxt_addr) {
+				if (laddr->ifa == ifa) {
+					laddr->action = 0;
+					break;
+				}
+			}
+		} else if (l->action == SCTP_DEL_IP_ADDRESS) {
+			laddr = LIST_FIRST(&inp->sctp_addr_list);
+			while (laddr) {
+				nladdr = LIST_NEXT(laddr, sctp_nxt_addr);
+				/* remove only after all guys are done */
+				if (laddr->ifa == ifa) {
+					sctp_del_local_addr_ep(inp, ifa);
+				}
+				laddr = nladdr;
+			}
+		}
+	}
+	return (0);
+}
+
+void
+sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    void *ptr, uint32_t val)
+{
+	struct sctp_asconf_iterator *asc;
+	struct sctp_ifa *ifa;
+	struct sctp_laddr *l;
+	int cnt_invalid = 0;
+	int type, status;
+	int num_queued = 0;
+
+	asc = (struct sctp_asconf_iterator *)ptr;
+	LIST_FOREACH(l, &asc->list_of_work, sctp_nxt_addr) {
+		ifa = l->ifa;
+		type = l->action;
+
+		/* address's vrf_id must be the vrf_id of the assoc */
+		if (ifa->vrf_id != stcb->asoc.vrf_id) {
+			continue;
+		}
+		/* Same checks again for assoc */
+		switch (ifa->address.sa.sa_family) {
+#ifdef INET6
+		case AF_INET6:
+			{
+				/* invalid if we're not a v6 endpoint */
+				struct sockaddr_in6 *sin6;
+
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
+					cnt_invalid++;
+					if (asc->cnt == cnt_invalid)
+						return;
+					else
+						continue;
+				}
+				sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+				if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+					/* we skip unspecifed addresses */
+					continue;
+				}
+				if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+					if (stcb->asoc.local_scope == 0) {
+						continue;
+					}
+					/* is it the right link local scope? */
+					if (sctp_is_scopeid_in_nets(stcb, &ifa->address.sa) == 0) {
+						continue;
+					}
+				}
+				break;
+			}
+#endif
+		case AF_INET:
+			{
+				/* invalid if we are a v6 only endpoint */
+				struct in6pcb *inp6;
+				struct sockaddr_in *sin;
+
+				inp6 = (struct in6pcb *)&inp->ip_inp.inp;
+				/* invalid if we are a v6 only endpoint */
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+				    SCTP_IPV6_V6ONLY(inp6))
+					continue;
+
+				sin = (struct sockaddr_in *)&ifa->address.sa;
+				if (sin->sin_addr.s_addr == 0) {
+					/* we skip unspecifed addresses */
+					continue;
+				}
+				if (stcb->asoc.ipv4_local_scope == 0 &&
+				    IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
+					continue;
+				}
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+				    SCTP_IPV6_V6ONLY(inp6)) {
+					cnt_invalid++;
+					if (asc->cnt == cnt_invalid)
+						return;
+					else
+						continue;
+				}
+				break;
+			}
+		default:
+			/* invalid address family */
+			cnt_invalid++;
+			if (asc->cnt == cnt_invalid)
+				return;
+			else
+				continue;
+			break;
+		}
+
+		if (type == SCTP_ADD_IP_ADDRESS) {
+			/* prevent this address from being used as a source */
+			sctp_add_local_addr_restricted(stcb, ifa);
+		} else if (type == SCTP_DEL_IP_ADDRESS) {
+			struct sctp_nets *net;
+
+			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+				sctp_rtentry_t *rt;
+
+				/* delete this address if cached */
+				if (net->ro._s_addr == ifa) {
+					sctp_free_ifa(net->ro._s_addr);
+					net->ro._s_addr = NULL;
+					net->src_addr_selected = 0;
+					rt = net->ro.ro_rt;
+					if (rt) {
+						RTFREE(rt);
+						net->ro.ro_rt = NULL;
+					}
+					/*
+					 * Now we deleted our src address,
+					 * should we not also now reset the
+					 * cwnd/rto to start as if its a new
+					 * address?
+					 */
+					stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net);
+					net->RTO = 0;
+
+				}
+			}
+		} else if (type == SCTP_SET_PRIM_ADDR) {
+			if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) {
+				/* must validate the ifa is in the ep */
+				if (sctp_is_addr_in_ep(stcb->sctp_ep, ifa) == 0) {
+					continue;
+				}
+			} else {
+				/* Need to check scopes for this guy */
+				if (sctp_is_address_in_scope(ifa,
+				    stcb->asoc.ipv4_addr_legal,
+				    stcb->asoc.ipv6_addr_legal,
+				    stcb->asoc.loopback_scope,
+				    stcb->asoc.ipv4_local_scope,
+				    stcb->asoc.local_scope,
+				    stcb->asoc.site_scope, 0) == 0) {
+					continue;
+				}
+			}
+		}
+		/* queue an asconf for this address add/delete */
+		if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF) &&
+		    stcb->asoc.peer_supports_asconf) {
+			/* queue an asconf for this addr */
+			status = sctp_asconf_queue_add(stcb, ifa, type);
+			/*
+			 * if queued ok, and in the open state, update the
+			 * count of queued params.  If in the non-open
+			 * state, these get sent when the assoc goes open.
+			 */
+			if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+				if (status >= 0) {
+					num_queued++;
+				}
+			}
+		}
+	}
+	/*
+	 * If we have queued params in the open state, send out an ASCONF.
+	 */
+	if (num_queued > 0) {
+		sctp_send_asconf(stcb, stcb->asoc.primary_destination,
+		    SCTP_ADDR_NOT_LOCKED);
+	}
+}
+
+void
+sctp_asconf_iterator_end(void *ptr, uint32_t val)
+{
+	struct sctp_asconf_iterator *asc;
+	struct sctp_ifa *ifa;
+	struct sctp_laddr *l, *l_next;
+
+	asc = (struct sctp_asconf_iterator *)ptr;
+	l = LIST_FIRST(&asc->list_of_work);
+	while (l != NULL) {
+		l_next = LIST_NEXT(l, sctp_nxt_addr);
+		ifa = l->ifa;
+		if (l->action == SCTP_ADD_IP_ADDRESS) {
+			/* Clear the defer use flag */
+			ifa->localifa_flags &= ~SCTP_ADDR_DEFER_USE;
+		}
+		sctp_free_ifa(ifa);
+		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_laddr), l);
+		SCTP_DECR_LADDR_COUNT();
+		l = l_next;
+	}
+	SCTP_FREE(asc, SCTP_M_ASC_IT);
+}
+
+/*
+ * sa is the sockaddr to ask the peer to set primary to.
+ * returns: 0 = completed, -1 = error
+ */
+int32_t
+sctp_set_primary_ip_address_sa(struct sctp_tcb *stcb, struct sockaddr *sa)
+{
+	uint32_t vrf_id;
+	struct sctp_ifa *ifa;
+
+	/* find the ifa for the desired set primary */
+	vrf_id = stcb->asoc.vrf_id;
+	ifa = sctp_find_ifa_by_addr(sa, vrf_id, SCTP_ADDR_NOT_LOCKED);
+	if (ifa == NULL) {
+		/* Invalid address */
+		return (-1);
+	}
+	/* queue an ASCONF:SET_PRIM_ADDR to be sent */
+	if (!sctp_asconf_queue_add(stcb, ifa, SCTP_SET_PRIM_ADDR)) {
+		/* set primary queuing succeeded */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "set_primary_ip_address_sa: queued on tcb=%p, ",
+		    stcb);
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+		if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+#ifdef SCTP_TIMER_BASED_ASCONF
+			sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
+			    stcb->sctp_ep, stcb,
+			    stcb->asoc.primary_destination);
+#else
+			sctp_send_asconf(stcb, stcb->asoc.primary_destination,
+			    SCTP_ADDR_NOT_LOCKED);
+#endif
+		}
+	} else {
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "set_primary_ip_address_sa: failed to add to queue on tcb=%p, ",
+		    stcb);
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+		return (-1);
+	}
+	return (0);
+}
+
+void
+sctp_set_primary_ip_address(struct sctp_ifa *ifa)
+{
+	struct sctp_inpcb *inp;
+
+	/* go through all our PCB's */
+	LIST_FOREACH(inp, &SCTP_BASE_INFO(listhead), sctp_list) {
+		struct sctp_tcb *stcb;
+
+		/* process for all associations for this endpoint */
+		LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+			/* queue an ASCONF:SET_PRIM_ADDR to be sent */
+			if (!sctp_asconf_queue_add(stcb, ifa,
+			    SCTP_SET_PRIM_ADDR)) {
+				/* set primary queuing succeeded */
+				SCTPDBG(SCTP_DEBUG_ASCONF1, "set_primary_ip_address: queued on stcb=%p, ",
+				    stcb);
+				SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, &ifa->address.sa);
+				if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+#ifdef SCTP_TIMER_BASED_ASCONF
+					sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
+					    stcb->sctp_ep, stcb,
+					    stcb->asoc.primary_destination);
+#else
+					sctp_send_asconf(stcb, stcb->asoc.primary_destination,
+					    SCTP_ADDR_NOT_LOCKED);
+#endif
+				}
+			}
+		}		/* for each stcb */
+	}			/* for each inp */
+}
+
+int
+sctp_is_addr_pending(struct sctp_tcb *stcb, struct sctp_ifa *sctp_ifa)
+{
+	struct sctp_tmit_chunk *chk, *nchk;
+	unsigned int offset, asconf_limit;
+	struct sctp_asconf_chunk *acp;
+	struct sctp_asconf_paramhdr *aph;
+	uint8_t aparam_buf[SCTP_PARAM_BUFFER_SIZE];
+	struct sctp_ipv6addr_param *p_addr;
+	int add_cnt, del_cnt;
+	uint16_t last_param_type;
+
+	add_cnt = del_cnt = 0;
+	last_param_type = 0;
+	for (chk = TAILQ_FIRST(&stcb->asoc.asconf_send_queue); chk != NULL;
+	    chk = nchk) {
+		/* get next chk */
+		nchk = TAILQ_NEXT(chk, sctp_next);
+
+		if (chk->data == NULL) {
+			SCTPDBG(SCTP_DEBUG_ASCONF1, "is_addr_pending: No mbuf data?\n");
+			continue;
+		}
+		offset = 0;
+		acp = mtod(chk->data, struct sctp_asconf_chunk *);
+		offset += sizeof(struct sctp_asconf_chunk);
+		asconf_limit = ntohs(acp->ch.chunk_length);
+		p_addr = (struct sctp_ipv6addr_param *)sctp_m_getptr(chk->data, offset, sizeof(struct sctp_paramhdr), aparam_buf);
+		if (p_addr == NULL) {
+			SCTPDBG(SCTP_DEBUG_ASCONF1, "is_addr_pending: couldn't get lookup addr!\n");
+			continue;
+		}
+		offset += ntohs(p_addr->ph.param_length);
+
+		aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(chk->data, offset, sizeof(struct sctp_asconf_paramhdr), aparam_buf);
+		if (aph == NULL) {
+			SCTPDBG(SCTP_DEBUG_ASCONF1, "is_addr_pending: Empty ASCONF will be sent?\n");
+			continue;
+		}
+		while (aph != NULL) {
+			unsigned int param_length, param_type;
+
+			param_type = ntohs(aph->ph.param_type);
+			param_length = ntohs(aph->ph.param_length);
+			if (offset + param_length > asconf_limit) {
+				/* parameter goes beyond end of chunk! */
+				break;
+			}
+			if (param_length > sizeof(aparam_buf)) {
+				SCTPDBG(SCTP_DEBUG_ASCONF1, "is_addr_pending: param length (%u) larger than buffer size!\n", param_length);
+				break;
+			}
+			if (param_length <= sizeof(struct sctp_paramhdr)) {
+				SCTPDBG(SCTP_DEBUG_ASCONF1, "is_addr_pending: param length(%u) too short\n", param_length);
+				break;
+			}
+			aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(chk->data, offset, param_length, aparam_buf);
+			if (aph == NULL) {
+				SCTPDBG(SCTP_DEBUG_ASCONF1, "is_addr_pending: couldn't get entire param\n");
+				break;
+			}
+			p_addr = (struct sctp_ipv6addr_param *)(aph + 1);
+			if (sctp_addr_match(p_addr, &sctp_ifa->address.sa) != 0) {
+				switch (param_type) {
+				case SCTP_ADD_IP_ADDRESS:
+					add_cnt++;
+					break;
+				case SCTP_DEL_IP_ADDRESS:
+					del_cnt++;
+					break;
+				default:
+					break;
+				}
+				last_param_type = param_type;
+			}
+			offset += SCTP_SIZE32(param_length);
+			if (offset >= asconf_limit) {
+				/* no more data in the mbuf chain */
+				break;
+			}
+			/* get pointer to next asconf param */
+			aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(chk->data, offset, sizeof(struct sctp_asconf_paramhdr), aparam_buf);
+		}
+	}
+
+	/*
+	 * we want to find the sequences which consist of ADD -> DEL -> ADD
+	 * or DEL -> ADD
+	 */
+	if (add_cnt > del_cnt ||
+	    (add_cnt == del_cnt && last_param_type == SCTP_ADD_IP_ADDRESS)) {
+		return 1;
+	}
+	return 0;
+}
+
+static struct sockaddr *
+sctp_find_valid_localaddr(struct sctp_tcb *stcb, int addr_locked)
+{
+	struct sctp_vrf *vrf = NULL;
+	struct sctp_ifn *sctp_ifn;
+	struct sctp_ifa *sctp_ifa;
+
+	if (addr_locked == SCTP_ADDR_NOT_LOCKED)
+		SCTP_IPI_ADDR_RLOCK();
+	vrf = sctp_find_vrf(stcb->asoc.vrf_id);
+	if (vrf == NULL) {
+		if (addr_locked == SCTP_ADDR_NOT_LOCKED)
+			SCTP_IPI_ADDR_RUNLOCK();
+		return (NULL);
+	}
+	LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+		if (stcb->asoc.loopback_scope == 0 &&
+		    SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
+			/* Skip if loopback_scope not set */
+			continue;
+		}
+		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+			if (sctp_ifa->address.sa.sa_family == AF_INET &&
+			    stcb->asoc.ipv4_addr_legal) {
+				struct sockaddr_in *sin;
+
+				sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+				if (sin->sin_addr.s_addr == 0) {
+					/* skip unspecifed addresses */
+					continue;
+				}
+				if (stcb->asoc.ipv4_local_scope == 0 &&
+				    IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))
+					continue;
+
+				if (sctp_is_addr_restricted(stcb, sctp_ifa) &&
+				    (!sctp_is_addr_pending(stcb, sctp_ifa)))
+					continue;
+				/* found a valid local v4 address to use */
+				if (addr_locked == SCTP_ADDR_NOT_LOCKED)
+					SCTP_IPI_ADDR_RUNLOCK();
+				return (&sctp_ifa->address.sa);
+			} else if (sctp_ifa->address.sa.sa_family == AF_INET6 &&
+			    stcb->asoc.ipv6_addr_legal) {
+				struct sockaddr_in6 *sin6;
+
+				if (sctp_ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+					continue;
+				}
+				sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+				if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+					/* we skip unspecifed addresses */
+					continue;
+				}
+				if (stcb->asoc.local_scope == 0 &&
+				    IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
+					continue;
+				if (stcb->asoc.site_scope == 0 &&
+				    IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))
+					continue;
+
+				if (sctp_is_addr_restricted(stcb, sctp_ifa) &&
+				    (!sctp_is_addr_pending(stcb, sctp_ifa)))
+					continue;
+				/* found a valid local v6 address to use */
+				if (addr_locked == SCTP_ADDR_NOT_LOCKED)
+					SCTP_IPI_ADDR_RUNLOCK();
+				return (&sctp_ifa->address.sa);
+			}
+		}
+	}
+	/* no valid addresses found */
+	if (addr_locked == SCTP_ADDR_NOT_LOCKED)
+		SCTP_IPI_ADDR_RUNLOCK();
+	return (NULL);
+}
+
+static struct sockaddr *
+sctp_find_valid_localaddr_ep(struct sctp_tcb *stcb)
+{
+	struct sctp_laddr *laddr;
+
+	LIST_FOREACH(laddr, &stcb->sctp_ep->sctp_addr_list, sctp_nxt_addr) {
+		if (laddr->ifa == NULL) {
+			continue;
+		}
+		/* is the address restricted ? */
+		if (sctp_is_addr_restricted(stcb, laddr->ifa) &&
+		    (!sctp_is_addr_pending(stcb, laddr->ifa)))
+			continue;
+
+		/* found a valid local address to use */
+		return (&laddr->ifa->address.sa);
+	}
+	/* no valid addresses found */
+	return (NULL);
+}
+
+/*
+ * builds an ASCONF chunk from queued ASCONF params.
+ * returns NULL on error (no mbuf, no ASCONF params queued, etc).
+ */
+struct mbuf *
+sctp_compose_asconf(struct sctp_tcb *stcb, int *retlen, int addr_locked)
+{
+	struct mbuf *m_asconf, *m_asconf_chk;
+	struct sctp_asconf_addr *aa;
+	struct sctp_asconf_chunk *acp;
+	struct sctp_asconf_paramhdr *aph;
+	struct sctp_asconf_addr_param *aap;
+	uint32_t p_length;
+	uint32_t correlation_id = 1;	/* 0 is reserved... */
+	caddr_t ptr, lookup_ptr;
+	uint8_t lookup_used = 0;
+
+	/* are there any asconf params to send? */
+	TAILQ_FOREACH(aa, &stcb->asoc.asconf_queue, next) {
+		if (aa->sent == 0)
+			break;
+	}
+	if (aa == NULL)
+		return (NULL);
+
+	/*
+	 * get a chunk header mbuf and a cluster for the asconf params since
+	 * it's simpler to fill in the asconf chunk header lookup address on
+	 * the fly
+	 */
+	m_asconf_chk = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_chunk), 0, M_DONTWAIT, 1, MT_DATA);
+	if (m_asconf_chk == NULL) {
+		/* no mbuf's */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "compose_asconf: couldn't get chunk mbuf!\n");
+		return (NULL);
+	}
+	m_asconf = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	if (m_asconf == NULL) {
+		/* no mbuf's */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "compose_asconf: couldn't get mbuf!\n");
+		sctp_m_freem(m_asconf_chk);
+		return (NULL);
+	}
+	SCTP_BUF_LEN(m_asconf_chk) = sizeof(struct sctp_asconf_chunk);
+	SCTP_BUF_LEN(m_asconf) = 0;
+	acp = mtod(m_asconf_chk, struct sctp_asconf_chunk *);
+	bzero(acp, sizeof(struct sctp_asconf_chunk));
+	/* save pointers to lookup address and asconf params */
+	lookup_ptr = (caddr_t)(acp + 1);	/* after the header */
+	ptr = mtod(m_asconf, caddr_t);	/* beginning of cluster */
+
+	/* fill in chunk header info */
+	acp->ch.chunk_type = SCTP_ASCONF;
+	acp->ch.chunk_flags = 0;
+	acp->serial_number = htonl(stcb->asoc.asconf_seq_out);
+	stcb->asoc.asconf_seq_out++;
+
+	/* add parameters... up to smallest MTU allowed */
+	TAILQ_FOREACH(aa, &stcb->asoc.asconf_queue, next) {
+		if (aa->sent)
+			continue;
+		/* get the parameter length */
+		p_length = SCTP_SIZE32(aa->ap.aph.ph.param_length);
+		/* will it fit in current chunk? */
+		if (SCTP_BUF_LEN(m_asconf) + p_length > stcb->asoc.smallest_mtu) {
+			/* won't fit, so we're done with this chunk */
+			break;
+		}
+		/* assign (and store) a correlation id */
+		aa->ap.aph.correlation_id = correlation_id++;
+
+		/*
+		 * fill in address if we're doing a delete this is a simple
+		 * way for us to fill in the correlation address, which
+		 * should only be used by the peer if we're deleting our
+		 * source address and adding a new address (e.g. renumbering
+		 * case)
+		 */
+		if (lookup_used == 0 &&
+		    (aa->special_del == 0) &&
+		    aa->ap.aph.ph.param_type == SCTP_DEL_IP_ADDRESS) {
+			struct sctp_ipv6addr_param *lookup;
+			uint16_t p_size, addr_size;
+
+			lookup = (struct sctp_ipv6addr_param *)lookup_ptr;
+			lookup->ph.param_type =
+			    htons(aa->ap.addrp.ph.param_type);
+			if (aa->ap.addrp.ph.param_type == SCTP_IPV6_ADDRESS) {
+				/* copy IPv6 address */
+				p_size = sizeof(struct sctp_ipv6addr_param);
+				addr_size = sizeof(struct in6_addr);
+			} else {
+				/* copy IPv4 address */
+				p_size = sizeof(struct sctp_ipv4addr_param);
+				addr_size = sizeof(struct in_addr);
+			}
+			lookup->ph.param_length = htons(SCTP_SIZE32(p_size));
+			memcpy(lookup->addr, &aa->ap.addrp.addr, addr_size);
+			SCTP_BUF_LEN(m_asconf_chk) += SCTP_SIZE32(p_size);
+			lookup_used = 1;
+		}
+		/* copy into current space */
+		memcpy(ptr, &aa->ap, p_length);
+
+		/* network elements and update lengths */
+		aph = (struct sctp_asconf_paramhdr *)ptr;
+		aap = (struct sctp_asconf_addr_param *)ptr;
+		/* correlation_id is transparent to peer, no htonl needed */
+		aph->ph.param_type = htons(aph->ph.param_type);
+		aph->ph.param_length = htons(aph->ph.param_length);
+		aap->addrp.ph.param_type = htons(aap->addrp.ph.param_type);
+		aap->addrp.ph.param_length = htons(aap->addrp.ph.param_length);
+
+		SCTP_BUF_LEN(m_asconf) += SCTP_SIZE32(p_length);
+		ptr += SCTP_SIZE32(p_length);
+
+		/*
+		 * these params are removed off the pending list upon
+		 * getting an ASCONF-ACK back from the peer, just set flag
+		 */
+		aa->sent = 1;
+	}
+	/* check to see if the lookup addr has been populated yet */
+	if (lookup_used == 0) {
+		/* NOTE: if the address param is optional, can skip this... */
+		/* add any valid (existing) address... */
+		struct sctp_ipv6addr_param *lookup;
+		uint16_t p_size, addr_size;
+		struct sockaddr *found_addr;
+		caddr_t addr_ptr;
+
+		if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL)
+			found_addr = sctp_find_valid_localaddr(stcb,
+			    addr_locked);
+		else
+			found_addr = sctp_find_valid_localaddr_ep(stcb);
+
+		lookup = (struct sctp_ipv6addr_param *)lookup_ptr;
+		if (found_addr != NULL) {
+			if (found_addr->sa_family == AF_INET6) {
+				/* copy IPv6 address */
+				lookup->ph.param_type =
+				    htons(SCTP_IPV6_ADDRESS);
+				p_size = sizeof(struct sctp_ipv6addr_param);
+				addr_size = sizeof(struct in6_addr);
+				addr_ptr = (caddr_t)&((struct sockaddr_in6 *)
+				    found_addr)->sin6_addr;
+			} else {
+				/* copy IPv4 address */
+				lookup->ph.param_type =
+				    htons(SCTP_IPV4_ADDRESS);
+				p_size = sizeof(struct sctp_ipv4addr_param);
+				addr_size = sizeof(struct in_addr);
+				addr_ptr = (caddr_t)&((struct sockaddr_in *)
+				    found_addr)->sin_addr;
+			}
+			lookup->ph.param_length = htons(SCTP_SIZE32(p_size));
+			memcpy(lookup->addr, addr_ptr, addr_size);
+			SCTP_BUF_LEN(m_asconf_chk) += SCTP_SIZE32(p_size);
+			lookup_used = 1;
+		} else {
+			/* uh oh... don't have any address?? */
+			SCTPDBG(SCTP_DEBUG_ASCONF1,
+			    "compose_asconf: no lookup addr!\n");
+			/* for now, we send a IPv4 address of 0.0.0.0 */
+			lookup->ph.param_type = htons(SCTP_IPV4_ADDRESS);
+			lookup->ph.param_length = htons(SCTP_SIZE32(sizeof(struct sctp_ipv4addr_param)));
+			bzero(lookup->addr, sizeof(struct in_addr));
+			SCTP_BUF_LEN(m_asconf_chk) += SCTP_SIZE32(sizeof(struct sctp_ipv4addr_param));
+			lookup_used = 1;
+		}
+	}
+	/* chain it all together */
+	SCTP_BUF_NEXT(m_asconf_chk) = m_asconf;
+	*retlen = SCTP_BUF_LEN(m_asconf_chk) + SCTP_BUF_LEN(m_asconf);
+	acp->ch.chunk_length = ntohs(*retlen);
+
+	return (m_asconf_chk);
+}
+
+/*
+ * section to handle address changes before an association is up eg. changes
+ * during INIT/INIT-ACK/COOKIE-ECHO handshake
+ */
+
+/*
+ * processes the (local) addresses in the INIT-ACK chunk
+ */
+static void
+sctp_process_initack_addresses(struct sctp_tcb *stcb, struct mbuf *m,
+    unsigned int offset, unsigned int length)
+{
+	struct sctp_paramhdr tmp_param, *ph;
+	uint16_t plen, ptype;
+	struct sctp_ifa *sctp_ifa;
+	struct sctp_ipv6addr_param addr_store;
+	struct sockaddr_in6 sin6;
+	struct sockaddr_in sin;
+	struct sockaddr *sa;
+	uint32_t vrf_id;
+
+	SCTPDBG(SCTP_DEBUG_ASCONF2, "processing init-ack addresses\n");
+	if (stcb == NULL)	/* Un-needed check for SA */
+		return;
+
+	/* convert to upper bound */
+	length += offset;
+
+	if ((offset + sizeof(struct sctp_paramhdr)) > length) {
+		return;
+	}
+	/* init the addresses */
+	bzero(&sin6, sizeof(sin6));
+	sin6.sin6_family = AF_INET6;
+	sin6.sin6_len = sizeof(sin6);
+	sin6.sin6_port = stcb->rport;
+
+	bzero(&sin, sizeof(sin));
+	sin.sin_len = sizeof(sin);
+	sin.sin_family = AF_INET;
+	sin.sin_port = stcb->rport;
+
+	/* go through the addresses in the init-ack */
+	ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset,
+	    sizeof(struct sctp_paramhdr), (uint8_t *) & tmp_param);
+	while (ph != NULL) {
+		ptype = ntohs(ph->param_type);
+		plen = ntohs(ph->param_length);
+		if (ptype == SCTP_IPV6_ADDRESS) {
+			struct sctp_ipv6addr_param *a6p;
+
+			/* get the entire IPv6 address param */
+			a6p = (struct sctp_ipv6addr_param *)
+			    sctp_m_getptr(m, offset,
+			    sizeof(struct sctp_ipv6addr_param),
+			    (uint8_t *) & addr_store);
+			if (plen != sizeof(struct sctp_ipv6addr_param) ||
+			    a6p == NULL) {
+				return;
+			}
+			memcpy(&sin6.sin6_addr, a6p->addr,
+			    sizeof(struct in6_addr));
+			sa = (struct sockaddr *)&sin6;
+		} else if (ptype == SCTP_IPV4_ADDRESS) {
+			struct sctp_ipv4addr_param *a4p;
+
+			/* get the entire IPv4 address param */
+			a4p = (struct sctp_ipv4addr_param *)sctp_m_getptr(m, offset,
+			    sizeof(struct sctp_ipv4addr_param),
+			    (uint8_t *) & addr_store);
+			if (plen != sizeof(struct sctp_ipv4addr_param) ||
+			    a4p == NULL) {
+				return;
+			}
+			sin.sin_addr.s_addr = a4p->addr;
+			sa = (struct sockaddr *)&sin;
+		} else {
+			goto next_addr;
+		}
+
+		/* see if this address really (still) exists */
+		if (stcb) {
+			vrf_id = stcb->asoc.vrf_id;
+		} else {
+			vrf_id = SCTP_DEFAULT_VRFID;
+		}
+		sctp_ifa = sctp_find_ifa_by_addr(sa, vrf_id,
+		    SCTP_ADDR_NOT_LOCKED);
+		if (sctp_ifa == NULL) {
+			/* address doesn't exist anymore */
+			int status;
+
+			/* are ASCONFs allowed ? */
+			if ((sctp_is_feature_on(stcb->sctp_ep,
+			    SCTP_PCB_FLAGS_DO_ASCONF)) &&
+			    stcb->asoc.peer_supports_asconf) {
+				/* queue an ASCONF DEL_IP_ADDRESS */
+				status = sctp_asconf_queue_sa_delete(stcb, sa);
+				/*
+				 * if queued ok, and in correct state, send
+				 * out the ASCONF.
+				 */
+				if (status == 0 &&
+				    SCTP_GET_STATE(&stcb->asoc) ==
+				    SCTP_STATE_OPEN) {
+#ifdef SCTP_TIMER_BASED_ASCONF
+					sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
+					    stcb->sctp_ep, stcb,
+					    stcb->asoc.primary_destination);
+#else
+					sctp_send_asconf(stcb, stcb->asoc.primary_destination,
+					    SCTP_ADDR_NOT_LOCKED);
+#endif
+				}
+			}
+		}
+next_addr:
+		/*
+		 * Sanity check:  Make sure the length isn't 0, otherwise
+		 * we'll be stuck in this loop for a long time...
+		 */
+		if (SCTP_SIZE32(plen) == 0) {
+			SCTP_PRINTF("process_initack_addrs: bad len (%d) type=%xh\n",
+			    plen, ptype);
+			return;
+		}
+		/* get next parameter */
+		offset += SCTP_SIZE32(plen);
+		if ((offset + sizeof(struct sctp_paramhdr)) > length)
+			return;
+		ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset,
+		    sizeof(struct sctp_paramhdr), (uint8_t *) & tmp_param);
+	}			/* while */
+}
+
+/* FIX ME: need to verify return result for v6 address type if v6 disabled */
+/*
+ * checks to see if a specific address is in the initack address list returns
+ * 1 if found, 0 if not
+ */
+static uint32_t
+sctp_addr_in_initack(struct sctp_tcb *stcb, struct mbuf *m, uint32_t offset,
+    uint32_t length, struct sockaddr *sa)
+{
+	struct sctp_paramhdr tmp_param, *ph;
+	uint16_t plen, ptype;
+	struct sctp_ipv6addr_param addr_store;
+	struct sockaddr_in *sin;
+	struct sctp_ipv4addr_param *a4p;
+
+#ifdef INET6
+	struct sockaddr_in6 *sin6;
+	struct sctp_ipv6addr_param *a6p;
+	struct sockaddr_in6 sin6_tmp;
+
+#endif				/* INET6 */
+
+	if (
+#ifdef INET6
+	    (sa->sa_family != AF_INET6) &&
+#endif				/* INET6 */
+	    (sa->sa_family != AF_INET))
+		return (0);
+
+	SCTPDBG(SCTP_DEBUG_ASCONF2, "find_initack_addr: starting search for ");
+	SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, sa);
+	/* convert to upper bound */
+	length += offset;
+
+	if ((offset + sizeof(struct sctp_paramhdr)) > length) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "find_initack_addr: invalid offset?\n");
+		return (0);
+	}
+	/* go through the addresses in the init-ack */
+	ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset,
+	    sizeof(struct sctp_paramhdr), (uint8_t *) & tmp_param);
+	while (ph != NULL) {
+		ptype = ntohs(ph->param_type);
+		plen = ntohs(ph->param_length);
+#ifdef INET6
+		if (ptype == SCTP_IPV6_ADDRESS && sa->sa_family == AF_INET6) {
+			/* get the entire IPv6 address param */
+			a6p = (struct sctp_ipv6addr_param *)
+			    sctp_m_getptr(m, offset,
+			    sizeof(struct sctp_ipv6addr_param),
+			    (uint8_t *) & addr_store);
+			if (plen != sizeof(struct sctp_ipv6addr_param) ||
+			    (ph == NULL) ||
+			    (a6p == NULL)) {
+				return (0);
+			}
+			sin6 = (struct sockaddr_in6 *)sa;
+			if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) {
+				/* create a copy and clear scope */
+				memcpy(&sin6_tmp, sin6,
+				    sizeof(struct sockaddr_in6));
+				sin6 = &sin6_tmp;
+				in6_clearscope(&sin6->sin6_addr);
+			}
+			if (memcmp(&sin6->sin6_addr, a6p->addr,
+			    sizeof(struct in6_addr)) == 0) {
+				/* found it */
+				return (1);
+			}
+		} else
+#endif				/* INET6 */
+
+			if (ptype == SCTP_IPV4_ADDRESS &&
+		    sa->sa_family == AF_INET) {
+			/* get the entire IPv4 address param */
+			a4p = (struct sctp_ipv4addr_param *)sctp_m_getptr(m,
+			    offset, sizeof(struct sctp_ipv4addr_param),
+			    (uint8_t *) & addr_store);
+			if (plen != sizeof(struct sctp_ipv4addr_param) ||
+			    (ph == NULL) ||
+			    (a4p == NULL)) {
+				return (0);
+			}
+			sin = (struct sockaddr_in *)sa;
+			if (sin->sin_addr.s_addr == a4p->addr) {
+				/* found it */
+				return (1);
+			}
+		}
+		/* get next parameter */
+		offset += SCTP_SIZE32(plen);
+		if (offset + sizeof(struct sctp_paramhdr) > length)
+			return (0);
+		ph = (struct sctp_paramhdr *)
+		    sctp_m_getptr(m, offset, sizeof(struct sctp_paramhdr),
+		    (uint8_t *) & tmp_param);
+	}			/* while */
+	/* not found! */
+	return (0);
+}
+
+/*
+ * makes sure that the current endpoint local addr list is consistent with
+ * the new association (eg. subset bound, asconf allowed) adds addresses as
+ * necessary
+ */
+static void
+sctp_check_address_list_ep(struct sctp_tcb *stcb, struct mbuf *m, int offset,
+    int length, struct sockaddr *init_addr)
+{
+	struct sctp_laddr *laddr;
+
+	/* go through the endpoint list */
+	LIST_FOREACH(laddr, &stcb->sctp_ep->sctp_addr_list, sctp_nxt_addr) {
+		/* be paranoid and validate the laddr */
+		if (laddr->ifa == NULL) {
+			SCTPDBG(SCTP_DEBUG_ASCONF1,
+			    "check_addr_list_ep: laddr->ifa is NULL");
+			continue;
+		}
+		if (laddr->ifa == NULL) {
+			SCTPDBG(SCTP_DEBUG_ASCONF1, "check_addr_list_ep: laddr->ifa->ifa_addr is NULL");
+			continue;
+		}
+		/* do i have it implicitly? */
+		if (sctp_cmpaddr(&laddr->ifa->address.sa, init_addr)) {
+			continue;
+		}
+		/* check to see if in the init-ack */
+		if (!sctp_addr_in_initack(stcb, m, offset, length,
+		    &laddr->ifa->address.sa)) {
+			/* try to add it */
+			sctp_addr_mgmt_assoc(stcb->sctp_ep, stcb, laddr->ifa,
+			    SCTP_ADD_IP_ADDRESS, SCTP_ADDR_NOT_LOCKED);
+		}
+	}
+}
+
+/*
+ * makes sure that the current kernel address list is consistent with the new
+ * association (with all addrs bound) adds addresses as necessary
+ */
+static void
+sctp_check_address_list_all(struct sctp_tcb *stcb, struct mbuf *m, int offset,
+    int length, struct sockaddr *init_addr,
+    uint16_t local_scope, uint16_t site_scope,
+    uint16_t ipv4_scope, uint16_t loopback_scope)
+{
+	struct sctp_vrf *vrf = NULL;
+	struct sctp_ifn *sctp_ifn;
+	struct sctp_ifa *sctp_ifa;
+	uint32_t vrf_id;
+
+	if (stcb) {
+		vrf_id = stcb->asoc.vrf_id;
+	} else {
+		return;
+	}
+	SCTP_IPI_ADDR_RLOCK();
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf == NULL) {
+		SCTP_IPI_ADDR_RUNLOCK();
+		return;
+	}
+	/* go through all our known interfaces */
+	LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+		if (loopback_scope == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
+			/* skip loopback interface */
+			continue;
+		}
+		/* go through each interface address */
+		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+			/* do i have it implicitly? */
+			if (sctp_cmpaddr(&sctp_ifa->address.sa, init_addr)) {
+				continue;
+			}
+			/* check to see if in the init-ack */
+			if (!sctp_addr_in_initack(stcb, m, offset, length,
+			    &sctp_ifa->address.sa)) {
+				/* try to add it */
+				sctp_addr_mgmt_assoc(stcb->sctp_ep, stcb,
+				    sctp_ifa, SCTP_ADD_IP_ADDRESS,
+				    SCTP_ADDR_LOCKED);
+			}
+		}		/* end foreach ifa */
+	}			/* end foreach ifn */
+	SCTP_IPI_ADDR_RUNLOCK();
+}
+
+/*
+ * validates an init-ack chunk (from a cookie-echo) with current addresses
+ * adds addresses from the init-ack into our local address list, if needed
+ * queues asconf adds/deletes addresses as needed and makes appropriate list
+ * changes for source address selection m, offset: points to the start of the
+ * address list in an init-ack chunk length: total length of the address
+ * params only init_addr: address where my INIT-ACK was sent from
+ */
+void
+sctp_check_address_list(struct sctp_tcb *stcb, struct mbuf *m, int offset,
+    int length, struct sockaddr *init_addr,
+    uint16_t local_scope, uint16_t site_scope,
+    uint16_t ipv4_scope, uint16_t loopback_scope)
+{
+	/* process the local addresses in the initack */
+	sctp_process_initack_addresses(stcb, m, offset, length);
+
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		/* bound all case */
+		sctp_check_address_list_all(stcb, m, offset, length, init_addr,
+		    local_scope, site_scope, ipv4_scope, loopback_scope);
+	} else {
+		/* subset bound case */
+		if (sctp_is_feature_on(stcb->sctp_ep,
+		    SCTP_PCB_FLAGS_DO_ASCONF)) {
+			/* asconf's allowed */
+			sctp_check_address_list_ep(stcb, m, offset, length,
+			    init_addr);
+		}
+		/* else, no asconfs allowed, so what we sent is what we get */
+	}
+}
+
+/*
+ * sctp_bindx() support
+ */
+uint32_t
+sctp_addr_mgmt_ep_sa(struct sctp_inpcb *inp, struct sockaddr *sa,
+    uint32_t type, uint32_t vrf_id, struct sctp_ifa *sctp_ifap)
+{
+	struct sctp_ifa *ifa;
+
+	if (sa->sa_len == 0) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_ASCONF, EINVAL);
+		return (EINVAL);
+	}
+	if (sctp_ifap) {
+		ifa = sctp_ifap;
+	} else if (type == SCTP_ADD_IP_ADDRESS) {
+		/* For an add the address MUST be on the system */
+		ifa = sctp_find_ifa_by_addr(sa, vrf_id, SCTP_ADDR_NOT_LOCKED);
+	} else if (type == SCTP_DEL_IP_ADDRESS) {
+		/* For a delete we need to find it in the inp */
+		ifa = sctp_find_ifa_in_ep(inp, sa, SCTP_ADDR_NOT_LOCKED);
+	} else {
+		ifa = NULL;
+	}
+	if (ifa != NULL) {
+		if (type == SCTP_ADD_IP_ADDRESS) {
+			sctp_add_local_addr_ep(inp, ifa, type);
+		} else if (type == SCTP_DEL_IP_ADDRESS) {
+			struct sctp_laddr *laddr;
+
+			if (inp->laddr_count < 2) {
+				/* can't delete the last local address */
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_ASCONF, EINVAL);
+				return (EINVAL);
+			}
+			LIST_FOREACH(laddr, &inp->sctp_addr_list,
+			    sctp_nxt_addr) {
+				if (ifa == laddr->ifa) {
+					/* Mark in the delete */
+					laddr->action = type;
+				}
+			}
+		}
+		if (!LIST_EMPTY(&inp->sctp_asoc_list)) {
+			/*
+			 * There is no need to start the iterator if the inp
+			 * has no associations.
+			 */
+			struct sctp_asconf_iterator *asc;
+			struct sctp_laddr *wi;
+
+			SCTP_MALLOC(asc, struct sctp_asconf_iterator *,
+			    sizeof(struct sctp_asconf_iterator),
+			    SCTP_M_ASC_IT);
+			if (asc == NULL) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_ASCONF, ENOMEM);
+				return (ENOMEM);
+			}
+			wi = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr);
+			if (wi == NULL) {
+				SCTP_FREE(asc, SCTP_M_ASC_IT);
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_ASCONF, ENOMEM);
+				return (ENOMEM);
+			}
+			LIST_INIT(&asc->list_of_work);
+			asc->cnt = 1;
+			SCTP_INCR_LADDR_COUNT();
+			wi->ifa = ifa;
+			wi->action = type;
+			atomic_add_int(&ifa->refcount, 1);
+			LIST_INSERT_HEAD(&asc->list_of_work, wi, sctp_nxt_addr);
+			(void)sctp_initiate_iterator(sctp_asconf_iterator_ep,
+			    sctp_asconf_iterator_stcb,
+			    sctp_asconf_iterator_ep_end,
+			    SCTP_PCB_ANY_FLAGS,
+			    SCTP_PCB_ANY_FEATURES,
+			    SCTP_ASOC_ANY_STATE,
+			    (void *)asc, 0,
+			    sctp_asconf_iterator_end, inp, 0);
+		}
+		return (0);
+	} else {
+		/* invalid address! */
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_ASCONF, EADDRNOTAVAIL);
+		return (EADDRNOTAVAIL);
+	}
+}
+
+void
+sctp_asconf_send_nat_state_update(struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	struct sctp_asconf_addr *aa;
+	struct sctp_ifa *sctp_ifap;
+	struct sctp_asconf_tag_param *vtag;
+	struct sockaddr_in *to;
+
+#ifdef INET6
+	struct sockaddr_in6 *to6;
+
+#endif
+	if (net == NULL) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "sctp_asconf_send_nat_state_update: Missing net\n");
+		return;
+	}
+	if (stcb == NULL) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "sctp_asconf_send_nat_state_update: Missing stcb\n");
+		return;
+	}
+	/*
+	 * Need to have in the asconf: - vtagparam(my_vtag/peer_vtag) -
+	 * add(0.0.0.0) - del(0.0.0.0) - Any global addresses add(addr)
+	 */
+	SCTP_MALLOC(aa, struct sctp_asconf_addr *, sizeof(*aa),
+	    SCTP_M_ASC_ADDR);
+	if (aa == NULL) {
+		/* didn't get memory */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "sctp_asconf_send_nat_state_update: failed to get memory!\n");
+		return;
+	}
+	aa->special_del = 0;
+	/* fill in asconf address parameter fields */
+	/* top level elements are "networked" during send */
+	aa->ifa = NULL;
+	aa->sent = 0;		/* clear sent flag */
+	vtag = (struct sctp_asconf_tag_param *)&aa->ap.aph;
+	vtag->aph.ph.param_type = SCTP_NAT_VTAGS;
+	vtag->aph.ph.param_length = sizeof(struct sctp_asconf_tag_param);
+	vtag->local_vtag = htonl(stcb->asoc.my_vtag);
+	vtag->remote_vtag = htonl(stcb->asoc.peer_vtag);
+	TAILQ_INSERT_TAIL(&stcb->asoc.asconf_queue, aa, next);
+
+	SCTP_MALLOC(aa, struct sctp_asconf_addr *, sizeof(*aa),
+	    SCTP_M_ASC_ADDR);
+	if (aa == NULL) {
+		/* didn't get memory */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "sctp_asconf_send_nat_state_update: failed to get memory!\n");
+		return;
+	}
+	memset(aa, 0, sizeof(struct sctp_asconf_addr));
+	/* fill in asconf address parameter fields */
+	/* ADD(0.0.0.0) */
+	if (net->ro._l_addr.sa.sa_family == AF_INET) {
+		aa->ap.aph.ph.param_type = SCTP_ADD_IP_ADDRESS;
+		aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_addrv4_param);
+		aa->ap.addrp.ph.param_type = SCTP_IPV4_ADDRESS;
+		aa->ap.addrp.ph.param_length = sizeof(struct sctp_ipv4addr_param);
+		/* No need to add an address, we are using 0.0.0.0 */
+		TAILQ_INSERT_TAIL(&stcb->asoc.asconf_queue, aa, next);
+	}
+#ifdef INET6
+	else if (net->ro._l_addr.sa.sa_family == AF_INET6) {
+		aa->ap.aph.ph.param_type = SCTP_ADD_IP_ADDRESS;
+		aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_addr_param);
+		aa->ap.addrp.ph.param_type = SCTP_IPV6_ADDRESS;
+		aa->ap.addrp.ph.param_length = sizeof(struct sctp_ipv6addr_param);
+		/* No need to add an address, we are using 0.0.0.0 */
+		TAILQ_INSERT_TAIL(&stcb->asoc.asconf_queue, aa, next);
+	}
+#endif				/* INET6 */
+	SCTP_MALLOC(aa, struct sctp_asconf_addr *, sizeof(*aa),
+	    SCTP_M_ASC_ADDR);
+	if (aa == NULL) {
+		/* didn't get memory */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "sctp_asconf_send_nat_state_update: failed to get memory!\n");
+		return;
+	}
+	memset(aa, 0, sizeof(struct sctp_asconf_addr));
+	/* fill in asconf address parameter fields */
+	/* ADD(0.0.0.0) */
+	if (net->ro._l_addr.sa.sa_family == AF_INET) {
+		aa->ap.aph.ph.param_type = SCTP_ADD_IP_ADDRESS;
+		aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_addrv4_param);
+		aa->ap.addrp.ph.param_type = SCTP_IPV4_ADDRESS;
+		aa->ap.addrp.ph.param_length = sizeof(struct sctp_ipv4addr_param);
+		/* No need to add an address, we are using 0.0.0.0 */
+		TAILQ_INSERT_TAIL(&stcb->asoc.asconf_queue, aa, next);
+	}
+#ifdef INET6
+	else if (net->ro._l_addr.sa.sa_family == AF_INET6) {
+		aa->ap.aph.ph.param_type = SCTP_DEL_IP_ADDRESS;
+		aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_addr_param);
+		aa->ap.addrp.ph.param_type = SCTP_IPV6_ADDRESS;
+		aa->ap.addrp.ph.param_length = sizeof(struct sctp_ipv6addr_param);
+		/* No need to add an address, we are using 0.0.0.0 */
+		TAILQ_INSERT_TAIL(&stcb->asoc.asconf_queue, aa, next);
+	}
+#endif				/* INET6 */
+	/* Now we must hunt the addresses and add all global addresses */
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		struct sctp_vrf *vrf = NULL;
+		struct sctp_ifn *sctp_ifnp;
+		uint32_t vrf_id;
+
+		vrf_id = stcb->sctp_ep->def_vrf_id;
+		vrf = sctp_find_vrf(vrf_id);
+		if (vrf == NULL) {
+			goto skip_rest;
+		}
+		SCTP_IPI_ADDR_RLOCK();
+		LIST_FOREACH(sctp_ifnp, &vrf->ifnlist, next_ifn) {
+			LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) {
+				if (sctp_ifap->address.sa.sa_family == AF_INET) {
+					to = &sctp_ifap->address.sin;
+
+					if (IN4_ISPRIVATE_ADDRESS(&to->sin_addr)) {
+						continue;
+					}
+					if (IN4_ISLOOPBACK_ADDRESS(&to->sin_addr)) {
+						continue;
+					}
+				}
+#ifdef INET6
+				else if (sctp_ifap->address.sa.sa_family == AF_INET6) {
+					to6 = &sctp_ifap->address.sin6;
+					if (IN6_IS_ADDR_LOOPBACK(&to6->sin6_addr)) {
+						continue;
+					}
+					if (IN6_IS_ADDR_LINKLOCAL(&to6->sin6_addr)) {
+						continue;
+					}
+				}
+#endif
+				sctp_asconf_queue_mgmt(stcb, sctp_ifap, SCTP_ADD_IP_ADDRESS);
+			}
+		}
+		SCTP_IPI_ADDR_RUNLOCK();
+	} else {
+		struct sctp_laddr *laddr;
+
+		LIST_FOREACH(laddr, &stcb->sctp_ep->sctp_addr_list, sctp_nxt_addr) {
+			if (laddr->ifa == NULL) {
+				continue;
+			}
+			if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED)
+				/*
+				 * Address being deleted by the system, dont
+				 * list.
+				 */
+				continue;
+			if (laddr->action == SCTP_DEL_IP_ADDRESS) {
+				/*
+				 * Address being deleted on this ep don't
+				 * list.
+				 */
+				continue;
+			}
+			sctp_ifap = laddr->ifa;
+			if (sctp_ifap->address.sa.sa_family == AF_INET) {
+				to = &sctp_ifap->address.sin;
+
+				if (IN4_ISPRIVATE_ADDRESS(&to->sin_addr)) {
+					continue;
+				}
+				if (IN4_ISLOOPBACK_ADDRESS(&to->sin_addr)) {
+					continue;
+				}
+			}
+#ifdef INET6
+			else if (sctp_ifap->address.sa.sa_family == AF_INET6) {
+				to6 = &sctp_ifap->address.sin6;
+				if (IN6_IS_ADDR_LOOPBACK(&to6->sin6_addr)) {
+					continue;
+				}
+				if (IN6_IS_ADDR_LINKLOCAL(&to6->sin6_addr)) {
+					continue;
+				}
+			}
+#endif
+			sctp_asconf_queue_mgmt(stcb, sctp_ifap, SCTP_ADD_IP_ADDRESS);
+		}
+	}
+skip_rest:
+	/* Now we must send the asconf into the queue */
+	sctp_send_asconf(stcb, net, 0);
+}
diff --git a/freebsd/sys/netinet/sctp_asconf.h b/freebsd/sys/netinet/sctp_asconf.h
new file mode 100644
index 00000000..ff8cf378
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_asconf.h
@@ -0,0 +1,96 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_asconf.h,v 1.8 2005/03/06 16:04:16 itojun Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifndef _NETINET_SCTP_ASCONF_HH_
+#define _NETINET_SCTP_ASCONF_HH_
+
+#if defined(_KERNEL) || defined(__Userspace__)
+
+/*
+ * function prototypes
+ */
+extern void sctp_asconf_cleanup(struct sctp_tcb *, struct sctp_nets *);
+
+extern struct mbuf *sctp_compose_asconf(struct sctp_tcb *, int *, int);
+
+extern void
+sctp_handle_asconf(struct mbuf *, unsigned int, struct sctp_asconf_chunk *,
+    struct sctp_tcb *, int i);
+
+extern void
+sctp_handle_asconf_ack(struct mbuf *, int, struct sctp_asconf_ack_chunk *,
+    struct sctp_tcb *, struct sctp_nets *, int *);
+
+extern uint32_t
+sctp_addr_mgmt_ep_sa(struct sctp_inpcb *, struct sockaddr *,
+    uint32_t, uint32_t, struct sctp_ifa *);
+
+
+extern int 
+sctp_asconf_iterator_ep(struct sctp_inpcb *inp, void *ptr,
+    uint32_t val);
+extern void 
+sctp_asconf_iterator_stcb(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    void *ptr, uint32_t type);
+extern void sctp_asconf_iterator_end(void *ptr, uint32_t val);
+
+
+extern int32_t
+sctp_set_primary_ip_address_sa(struct sctp_tcb *,
+    struct sockaddr *);
+
+extern void
+     sctp_set_primary_ip_address(struct sctp_ifa *ifa);
+
+extern void
+sctp_check_address_list(struct sctp_tcb *, struct mbuf *, int, int,
+    struct sockaddr *, uint16_t, uint16_t, uint16_t, uint16_t);
+
+extern void
+     sctp_assoc_immediate_retrans(struct sctp_tcb *, struct sctp_nets *);
+extern void
+     sctp_net_immediate_retrans(struct sctp_tcb *, struct sctp_nets *);
+
+extern void
+sctp_asconf_send_nat_state_update(struct sctp_tcb *stcb,
+    struct sctp_nets *net);
+
+extern int
+    sctp_is_addr_pending(struct sctp_tcb *, struct sctp_ifa *);
+
+#endif				/* _KERNEL */
+
+#endif				/* !_NETINET_SCTP_ASCONF_HH_ */
diff --git a/freebsd/sys/netinet/sctp_auth.c b/freebsd/sys/netinet/sctp_auth.c
new file mode 100644
index 00000000..6c2bf908
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_auth.c
@@ -0,0 +1,2128 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/netinet/sctp_os.h>
+#include <freebsd/netinet/sctp.h>
+#include <freebsd/netinet/sctp_header.h>
+#include <freebsd/netinet/sctp_pcb.h>
+#include <freebsd/netinet/sctp_var.h>
+#include <freebsd/netinet/sctp_sysctl.h>
+#include <freebsd/netinet/sctputil.h>
+#include <freebsd/netinet/sctp_indata.h>
+#include <freebsd/netinet/sctp_output.h>
+#include <freebsd/netinet/sctp_auth.h>
+
+#ifdef SCTP_DEBUG
+#define SCTP_AUTH_DEBUG		(SCTP_BASE_SYSCTL(sctp_debug_on) & SCTP_DEBUG_AUTH1)
+#define SCTP_AUTH_DEBUG2	(SCTP_BASE_SYSCTL(sctp_debug_on) & SCTP_DEBUG_AUTH2)
+#endif				/* SCTP_DEBUG */
+
+
+void
+sctp_clear_chunklist(sctp_auth_chklist_t * chklist)
+{
+	bzero(chklist, sizeof(*chklist));
+	/* chklist->num_chunks = 0; */
+}
+
+sctp_auth_chklist_t *
+sctp_alloc_chunklist(void)
+{
+	sctp_auth_chklist_t *chklist;
+
+	SCTP_MALLOC(chklist, sctp_auth_chklist_t *, sizeof(*chklist),
+	    SCTP_M_AUTH_CL);
+	if (chklist == NULL) {
+		SCTPDBG(SCTP_DEBUG_AUTH1, "sctp_alloc_chunklist: failed to get memory!\n");
+	} else {
+		sctp_clear_chunklist(chklist);
+	}
+	return (chklist);
+}
+
+void
+sctp_free_chunklist(sctp_auth_chklist_t * list)
+{
+	if (list != NULL)
+		SCTP_FREE(list, SCTP_M_AUTH_CL);
+}
+
+sctp_auth_chklist_t *
+sctp_copy_chunklist(sctp_auth_chklist_t * list)
+{
+	sctp_auth_chklist_t *new_list;
+
+	if (list == NULL)
+		return (NULL);
+
+	/* get a new list */
+	new_list = sctp_alloc_chunklist();
+	if (new_list == NULL)
+		return (NULL);
+	/* copy it */
+	bcopy(list, new_list, sizeof(*new_list));
+
+	return (new_list);
+}
+
+
+/*
+ * add a chunk to the required chunks list
+ */
+int
+sctp_auth_add_chunk(uint8_t chunk, sctp_auth_chklist_t * list)
+{
+	if (list == NULL)
+		return (-1);
+
+	/* is chunk restricted? */
+	if ((chunk == SCTP_INITIATION) ||
+	    (chunk == SCTP_INITIATION_ACK) ||
+	    (chunk == SCTP_SHUTDOWN_COMPLETE) ||
+	    (chunk == SCTP_AUTHENTICATION)) {
+		return (-1);
+	}
+	if (list->chunks[chunk] == 0) {
+		list->chunks[chunk] = 1;
+		list->num_chunks++;
+		SCTPDBG(SCTP_DEBUG_AUTH1,
+		    "SCTP: added chunk %u (0x%02x) to Auth list\n",
+		    chunk, chunk);
+	}
+	return (0);
+}
+
+/*
+ * delete a chunk from the required chunks list
+ */
+int
+sctp_auth_delete_chunk(uint8_t chunk, sctp_auth_chklist_t * list)
+{
+	if (list == NULL)
+		return (-1);
+
+	/* is chunk restricted? */
+	if ((chunk == SCTP_ASCONF) ||
+	    (chunk == SCTP_ASCONF_ACK)) {
+		return (-1);
+	}
+	if (list->chunks[chunk] == 1) {
+		list->chunks[chunk] = 0;
+		list->num_chunks--;
+		SCTPDBG(SCTP_DEBUG_AUTH1,
+		    "SCTP: deleted chunk %u (0x%02x) from Auth list\n",
+		    chunk, chunk);
+	}
+	return (0);
+}
+
+size_t
+sctp_auth_get_chklist_size(const sctp_auth_chklist_t * list)
+{
+	if (list == NULL)
+		return (0);
+	else
+		return (list->num_chunks);
+}
+
+/*
+ * set the default list of chunks requiring AUTH
+ */
+void
+sctp_auth_set_default_chunks(sctp_auth_chklist_t * list)
+{
+	(void)sctp_auth_add_chunk(SCTP_ASCONF, list);
+	(void)sctp_auth_add_chunk(SCTP_ASCONF_ACK, list);
+}
+
+/*
+ * return the current number and list of required chunks caller must
+ * guarantee ptr has space for up to 256 bytes
+ */
+int
+sctp_serialize_auth_chunks(const sctp_auth_chklist_t * list, uint8_t * ptr)
+{
+	int i, count = 0;
+
+	if (list == NULL)
+		return (0);
+
+	for (i = 0; i < 256; i++) {
+		if (list->chunks[i] != 0) {
+			*ptr++ = i;
+			count++;
+		}
+	}
+	return (count);
+}
+
+int
+sctp_pack_auth_chunks(const sctp_auth_chklist_t * list, uint8_t * ptr)
+{
+	int i, size = 0;
+
+	if (list == NULL)
+		return (0);
+
+	if (list->num_chunks <= 32) {
+		/* just list them, one byte each */
+		for (i = 0; i < 256; i++) {
+			if (list->chunks[i] != 0) {
+				*ptr++ = i;
+				size++;
+			}
+		}
+	} else {
+		int index, offset;
+
+		/* pack into a 32 byte bitfield */
+		for (i = 0; i < 256; i++) {
+			if (list->chunks[i] != 0) {
+				index = i / 8;
+				offset = i % 8;
+				ptr[index] |= (1 << offset);
+			}
+		}
+		size = 32;
+	}
+	return (size);
+}
+
+int
+sctp_unpack_auth_chunks(const uint8_t * ptr, uint8_t num_chunks,
+    sctp_auth_chklist_t * list)
+{
+	int i;
+	int size;
+
+	if (list == NULL)
+		return (0);
+
+	if (num_chunks <= 32) {
+		/* just pull them, one byte each */
+		for (i = 0; i < num_chunks; i++) {
+			(void)sctp_auth_add_chunk(*ptr++, list);
+		}
+		size = num_chunks;
+	} else {
+		int index, offset;
+
+		/* unpack from a 32 byte bitfield */
+		for (index = 0; index < 32; index++) {
+			for (offset = 0; offset < 8; offset++) {
+				if (ptr[index] & (1 << offset)) {
+					(void)sctp_auth_add_chunk((index * 8) + offset, list);
+				}
+			}
+		}
+		size = 32;
+	}
+	return (size);
+}
+
+
+/*
+ * allocate structure space for a key of length keylen
+ */
+sctp_key_t *
+sctp_alloc_key(uint32_t keylen)
+{
+	sctp_key_t *new_key;
+
+	SCTP_MALLOC(new_key, sctp_key_t *, sizeof(*new_key) + keylen,
+	    SCTP_M_AUTH_KY);
+	if (new_key == NULL) {
+		/* out of memory */
+		return (NULL);
+	}
+	new_key->keylen = keylen;
+	return (new_key);
+}
+
+void
+sctp_free_key(sctp_key_t * key)
+{
+	if (key != NULL)
+		SCTP_FREE(key, SCTP_M_AUTH_KY);
+}
+
+void
+sctp_print_key(sctp_key_t * key, const char *str)
+{
+	uint32_t i;
+
+	if (key == NULL) {
+		printf("%s: [Null key]\n", str);
+		return;
+	}
+	printf("%s: len %u, ", str, key->keylen);
+	if (key->keylen) {
+		for (i = 0; i < key->keylen; i++)
+			printf("%02x", key->key[i]);
+		printf("\n");
+	} else {
+		printf("[Null key]\n");
+	}
+}
+
+void
+sctp_show_key(sctp_key_t * key, const char *str)
+{
+	uint32_t i;
+
+	if (key == NULL) {
+		printf("%s: [Null key]\n", str);
+		return;
+	}
+	printf("%s: len %u, ", str, key->keylen);
+	if (key->keylen) {
+		for (i = 0; i < key->keylen; i++)
+			printf("%02x", key->key[i]);
+		printf("\n");
+	} else {
+		printf("[Null key]\n");
+	}
+}
+
+static uint32_t
+sctp_get_keylen(sctp_key_t * key)
+{
+	if (key != NULL)
+		return (key->keylen);
+	else
+		return (0);
+}
+
+/*
+ * generate a new random key of length 'keylen'
+ */
+sctp_key_t *
+sctp_generate_random_key(uint32_t keylen)
+{
+	sctp_key_t *new_key;
+
+	/* validate keylen */
+	if (keylen > SCTP_AUTH_RANDOM_SIZE_MAX)
+		keylen = SCTP_AUTH_RANDOM_SIZE_MAX;
+
+	new_key = sctp_alloc_key(keylen);
+	if (new_key == NULL) {
+		/* out of memory */
+		return (NULL);
+	}
+	SCTP_READ_RANDOM(new_key->key, keylen);
+	new_key->keylen = keylen;
+	return (new_key);
+}
+
+sctp_key_t *
+sctp_set_key(uint8_t * key, uint32_t keylen)
+{
+	sctp_key_t *new_key;
+
+	new_key = sctp_alloc_key(keylen);
+	if (new_key == NULL) {
+		/* out of memory */
+		return (NULL);
+	}
+	bcopy(key, new_key->key, keylen);
+	return (new_key);
+}
+
+/*-
+ * given two keys of variable size, compute which key is "larger/smaller"
+ * returns:  1 if key1 > key2
+ *          -1 if key1 < key2
+ *           0 if key1 = key2
+ */
+static int
+sctp_compare_key(sctp_key_t * key1, sctp_key_t * key2)
+{
+	uint32_t maxlen;
+	uint32_t i;
+	uint32_t key1len, key2len;
+	uint8_t *key_1, *key_2;
+	uint8_t temp[SCTP_AUTH_RANDOM_SIZE_MAX];
+
+	/* sanity/length check */
+	key1len = sctp_get_keylen(key1);
+	key2len = sctp_get_keylen(key2);
+	if ((key1len == 0) && (key2len == 0))
+		return (0);
+	else if (key1len == 0)
+		return (-1);
+	else if (key2len == 0)
+		return (1);
+
+	if (key1len != key2len) {
+		if (key1len >= key2len)
+			maxlen = key1len;
+		else
+			maxlen = key2len;
+		bzero(temp, maxlen);
+		if (key1len < maxlen) {
+			/* prepend zeroes to key1 */
+			bcopy(key1->key, temp + (maxlen - key1len), key1len);
+			key_1 = temp;
+			key_2 = key2->key;
+		} else {
+			/* prepend zeroes to key2 */
+			bcopy(key2->key, temp + (maxlen - key2len), key2len);
+			key_1 = key1->key;
+			key_2 = temp;
+		}
+	} else {
+		maxlen = key1len;
+		key_1 = key1->key;
+		key_2 = key2->key;
+	}
+
+	for (i = 0; i < maxlen; i++) {
+		if (*key_1 > *key_2)
+			return (1);
+		else if (*key_1 < *key_2)
+			return (-1);
+		key_1++;
+		key_2++;
+	}
+
+	/* keys are equal value, so check lengths */
+	if (key1len == key2len)
+		return (0);
+	else if (key1len < key2len)
+		return (-1);
+	else
+		return (1);
+}
+
+/*
+ * generate the concatenated keying material based on the two keys and the
+ * shared key (if available). draft-ietf-tsvwg-auth specifies the specific
+ * order for concatenation
+ */
+sctp_key_t *
+sctp_compute_hashkey(sctp_key_t * key1, sctp_key_t * key2, sctp_key_t * shared)
+{
+	uint32_t keylen;
+	sctp_key_t *new_key;
+	uint8_t *key_ptr;
+
+	keylen = sctp_get_keylen(key1) + sctp_get_keylen(key2) +
+	    sctp_get_keylen(shared);
+
+	if (keylen > 0) {
+		/* get space for the new key */
+		new_key = sctp_alloc_key(keylen);
+		if (new_key == NULL) {
+			/* out of memory */
+			return (NULL);
+		}
+		new_key->keylen = keylen;
+		key_ptr = new_key->key;
+	} else {
+		/* all keys empty/null?! */
+		return (NULL);
+	}
+
+	/* concatenate the keys */
+	if (sctp_compare_key(key1, key2) <= 0) {
+		/* key is shared + key1 + key2 */
+		if (sctp_get_keylen(shared)) {
+			bcopy(shared->key, key_ptr, shared->keylen);
+			key_ptr += shared->keylen;
+		}
+		if (sctp_get_keylen(key1)) {
+			bcopy(key1->key, key_ptr, key1->keylen);
+			key_ptr += key1->keylen;
+		}
+		if (sctp_get_keylen(key2)) {
+			bcopy(key2->key, key_ptr, key2->keylen);
+			key_ptr += key2->keylen;
+		}
+	} else {
+		/* key is shared + key2 + key1 */
+		if (sctp_get_keylen(shared)) {
+			bcopy(shared->key, key_ptr, shared->keylen);
+			key_ptr += shared->keylen;
+		}
+		if (sctp_get_keylen(key2)) {
+			bcopy(key2->key, key_ptr, key2->keylen);
+			key_ptr += key2->keylen;
+		}
+		if (sctp_get_keylen(key1)) {
+			bcopy(key1->key, key_ptr, key1->keylen);
+			key_ptr += key1->keylen;
+		}
+	}
+	return (new_key);
+}
+
+
+sctp_sharedkey_t *
+sctp_alloc_sharedkey(void)
+{
+	sctp_sharedkey_t *new_key;
+
+	SCTP_MALLOC(new_key, sctp_sharedkey_t *, sizeof(*new_key),
+	    SCTP_M_AUTH_KY);
+	if (new_key == NULL) {
+		/* out of memory */
+		return (NULL);
+	}
+	new_key->keyid = 0;
+	new_key->key = NULL;
+	new_key->refcount = 1;
+	new_key->deactivated = 0;
+	return (new_key);
+}
+
+void
+sctp_free_sharedkey(sctp_sharedkey_t * skey)
+{
+	if (skey == NULL)
+		return;
+
+	if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&skey->refcount)) {
+		if (skey->key != NULL)
+			sctp_free_key(skey->key);
+		SCTP_FREE(skey, SCTP_M_AUTH_KY);
+	}
+}
+
+sctp_sharedkey_t *
+sctp_find_sharedkey(struct sctp_keyhead *shared_keys, uint16_t key_id)
+{
+	sctp_sharedkey_t *skey;
+
+	LIST_FOREACH(skey, shared_keys, next) {
+		if (skey->keyid == key_id)
+			return (skey);
+	}
+	return (NULL);
+}
+
+int
+sctp_insert_sharedkey(struct sctp_keyhead *shared_keys,
+    sctp_sharedkey_t * new_skey)
+{
+	sctp_sharedkey_t *skey;
+
+	if ((shared_keys == NULL) || (new_skey == NULL))
+		return (EINVAL);
+
+	/* insert into an empty list? */
+	if (LIST_EMPTY(shared_keys)) {
+		LIST_INSERT_HEAD(shared_keys, new_skey, next);
+		return (0);
+	}
+	/* insert into the existing list, ordered by key id */
+	LIST_FOREACH(skey, shared_keys, next) {
+		if (new_skey->keyid < skey->keyid) {
+			/* insert it before here */
+			LIST_INSERT_BEFORE(skey, new_skey, next);
+			return (0);
+		} else if (new_skey->keyid == skey->keyid) {
+			/* replace the existing key */
+			/* verify this key *can* be replaced */
+			if ((skey->deactivated) && (skey->refcount > 1)) {
+				SCTPDBG(SCTP_DEBUG_AUTH1,
+				    "can't replace shared key id %u\n",
+				    new_skey->keyid);
+				return (EBUSY);
+			}
+			SCTPDBG(SCTP_DEBUG_AUTH1,
+			    "replacing shared key id %u\n",
+			    new_skey->keyid);
+			LIST_INSERT_BEFORE(skey, new_skey, next);
+			LIST_REMOVE(skey, next);
+			sctp_free_sharedkey(skey);
+			return (0);
+		}
+		if (LIST_NEXT(skey, next) == NULL) {
+			/* belongs at the end of the list */
+			LIST_INSERT_AFTER(skey, new_skey, next);
+			return (0);
+		}
+	}
+	/* shouldn't reach here */
+	return (0);
+}
+
+void
+sctp_auth_key_acquire(struct sctp_tcb *stcb, uint16_t key_id)
+{
+	sctp_sharedkey_t *skey;
+
+	/* find the shared key */
+	skey = sctp_find_sharedkey(&stcb->asoc.shared_keys, key_id);
+
+	/* bump the ref count */
+	if (skey) {
+		atomic_add_int(&skey->refcount, 1);
+		SCTPDBG(SCTP_DEBUG_AUTH2,
+		    "%s: stcb %p key %u refcount acquire to %d\n",
+		    __FUNCTION__, stcb, key_id, skey->refcount);
+	}
+}
+
+void
+sctp_auth_key_release(struct sctp_tcb *stcb, uint16_t key_id)
+{
+	sctp_sharedkey_t *skey;
+
+	/* find the shared key */
+	skey = sctp_find_sharedkey(&stcb->asoc.shared_keys, key_id);
+
+	/* decrement the ref count */
+	if (skey) {
+		sctp_free_sharedkey(skey);
+		SCTPDBG(SCTP_DEBUG_AUTH2,
+		    "%s: stcb %p key %u refcount release to %d\n",
+		    __FUNCTION__, stcb, key_id, skey->refcount);
+
+		/* see if a notification should be generated */
+		if ((skey->refcount <= 1) && (skey->deactivated)) {
+			/* notify ULP that key is no longer used */
+			sctp_ulp_notify(SCTP_NOTIFY_AUTH_FREE_KEY, stcb,
+			    key_id, 0, SCTP_SO_LOCKED);
+			SCTPDBG(SCTP_DEBUG_AUTH2,
+			    "%s: stcb %p key %u no longer used, %d\n",
+			    __FUNCTION__, stcb, key_id, skey->refcount);
+		}
+	}
+}
+
+static sctp_sharedkey_t *
+sctp_copy_sharedkey(const sctp_sharedkey_t * skey)
+{
+	sctp_sharedkey_t *new_skey;
+
+	if (skey == NULL)
+		return (NULL);
+	new_skey = sctp_alloc_sharedkey();
+	if (new_skey == NULL)
+		return (NULL);
+	if (skey->key != NULL)
+		new_skey->key = sctp_set_key(skey->key->key, skey->key->keylen);
+	else
+		new_skey->key = NULL;
+	new_skey->keyid = skey->keyid;
+	return (new_skey);
+}
+
+int
+sctp_copy_skeylist(const struct sctp_keyhead *src, struct sctp_keyhead *dest)
+{
+	sctp_sharedkey_t *skey, *new_skey;
+	int count = 0;
+
+	if ((src == NULL) || (dest == NULL))
+		return (0);
+	LIST_FOREACH(skey, src, next) {
+		new_skey = sctp_copy_sharedkey(skey);
+		if (new_skey != NULL) {
+			(void)sctp_insert_sharedkey(dest, new_skey);
+			count++;
+		}
+	}
+	return (count);
+}
+
+
+sctp_hmaclist_t *
+sctp_alloc_hmaclist(uint8_t num_hmacs)
+{
+	sctp_hmaclist_t *new_list;
+	int alloc_size;
+
+	alloc_size = sizeof(*new_list) + num_hmacs * sizeof(new_list->hmac[0]);
+	SCTP_MALLOC(new_list, sctp_hmaclist_t *, alloc_size,
+	    SCTP_M_AUTH_HL);
+	if (new_list == NULL) {
+		/* out of memory */
+		return (NULL);
+	}
+	new_list->max_algo = num_hmacs;
+	new_list->num_algo = 0;
+	return (new_list);
+}
+
+void
+sctp_free_hmaclist(sctp_hmaclist_t * list)
+{
+	if (list != NULL) {
+		SCTP_FREE(list, SCTP_M_AUTH_HL);
+		list = NULL;
+	}
+}
+
+int
+sctp_auth_add_hmacid(sctp_hmaclist_t * list, uint16_t hmac_id)
+{
+	int i;
+
+	if (list == NULL)
+		return (-1);
+	if (list->num_algo == list->max_algo) {
+		SCTPDBG(SCTP_DEBUG_AUTH1,
+		    "SCTP: HMAC id list full, ignoring add %u\n", hmac_id);
+		return (-1);
+	}
+	if ((hmac_id != SCTP_AUTH_HMAC_ID_SHA1) &&
+#ifdef HAVE_SHA224
+	    (hmac_id != SCTP_AUTH_HMAC_ID_SHA224) &&
+#endif
+#ifdef HAVE_SHA2
+	    (hmac_id != SCTP_AUTH_HMAC_ID_SHA256) &&
+	    (hmac_id != SCTP_AUTH_HMAC_ID_SHA384) &&
+	    (hmac_id != SCTP_AUTH_HMAC_ID_SHA512) &&
+#endif
+	    1) {
+		return (-1);
+	}
+	/* Now is it already in the list */
+	for (i = 0; i < list->num_algo; i++) {
+		if (list->hmac[i] == hmac_id) {
+			/* already in list */
+			return (-1);
+		}
+	}
+	SCTPDBG(SCTP_DEBUG_AUTH1, "SCTP: add HMAC id %u to list\n", hmac_id);
+	list->hmac[list->num_algo++] = hmac_id;
+	return (0);
+}
+
+sctp_hmaclist_t *
+sctp_copy_hmaclist(sctp_hmaclist_t * list)
+{
+	sctp_hmaclist_t *new_list;
+	int i;
+
+	if (list == NULL)
+		return (NULL);
+	/* get a new list */
+	new_list = sctp_alloc_hmaclist(list->max_algo);
+	if (new_list == NULL)
+		return (NULL);
+	/* copy it */
+	new_list->max_algo = list->max_algo;
+	new_list->num_algo = list->num_algo;
+	for (i = 0; i < list->num_algo; i++)
+		new_list->hmac[i] = list->hmac[i];
+	return (new_list);
+}
+
+sctp_hmaclist_t *
+sctp_default_supported_hmaclist(void)
+{
+	sctp_hmaclist_t *new_list;
+
+	new_list = sctp_alloc_hmaclist(2);
+	if (new_list == NULL)
+		return (NULL);
+	(void)sctp_auth_add_hmacid(new_list, SCTP_AUTH_HMAC_ID_SHA1);
+	(void)sctp_auth_add_hmacid(new_list, SCTP_AUTH_HMAC_ID_SHA256);
+	return (new_list);
+}
+
+/*-
+ * HMAC algos are listed in priority/preference order
+ * find the best HMAC id to use for the peer based on local support
+ */
+uint16_t
+sctp_negotiate_hmacid(sctp_hmaclist_t * peer, sctp_hmaclist_t * local)
+{
+	int i, j;
+
+	if ((local == NULL) || (peer == NULL))
+		return (SCTP_AUTH_HMAC_ID_RSVD);
+
+	for (i = 0; i < peer->num_algo; i++) {
+		for (j = 0; j < local->num_algo; j++) {
+			if (peer->hmac[i] == local->hmac[j]) {
+				/* found the "best" one */
+				SCTPDBG(SCTP_DEBUG_AUTH1,
+				    "SCTP: negotiated peer HMAC id %u\n",
+				    peer->hmac[i]);
+				return (peer->hmac[i]);
+			}
+		}
+	}
+	/* didn't find one! */
+	return (SCTP_AUTH_HMAC_ID_RSVD);
+}
+
+/*-
+ * serialize the HMAC algo list and return space used
+ * caller must guarantee ptr has appropriate space
+ */
+int
+sctp_serialize_hmaclist(sctp_hmaclist_t * list, uint8_t * ptr)
+{
+	int i;
+	uint16_t hmac_id;
+
+	if (list == NULL)
+		return (0);
+
+	for (i = 0; i < list->num_algo; i++) {
+		hmac_id = htons(list->hmac[i]);
+		bcopy(&hmac_id, ptr, sizeof(hmac_id));
+		ptr += sizeof(hmac_id);
+	}
+	return (list->num_algo * sizeof(hmac_id));
+}
+
+int
+sctp_verify_hmac_param(struct sctp_auth_hmac_algo *hmacs, uint32_t num_hmacs)
+{
+	uint32_t i;
+	uint16_t hmac_id;
+	uint32_t sha1_supported = 0;
+
+	for (i = 0; i < num_hmacs; i++) {
+		hmac_id = ntohs(hmacs->hmac_ids[i]);
+		if (hmac_id == SCTP_AUTH_HMAC_ID_SHA1)
+			sha1_supported = 1;
+	}
+	/* all HMAC id's are supported */
+	if (sha1_supported == 0)
+		return (-1);
+	else
+		return (0);
+}
+
+sctp_authinfo_t *
+sctp_alloc_authinfo(void)
+{
+	sctp_authinfo_t *new_authinfo;
+
+	SCTP_MALLOC(new_authinfo, sctp_authinfo_t *, sizeof(*new_authinfo),
+	    SCTP_M_AUTH_IF);
+
+	if (new_authinfo == NULL) {
+		/* out of memory */
+		return (NULL);
+	}
+	bzero(new_authinfo, sizeof(*new_authinfo));
+	return (new_authinfo);
+}
+
+void
+sctp_free_authinfo(sctp_authinfo_t * authinfo)
+{
+	if (authinfo == NULL)
+		return;
+
+	if (authinfo->random != NULL)
+		sctp_free_key(authinfo->random);
+	if (authinfo->peer_random != NULL)
+		sctp_free_key(authinfo->peer_random);
+	if (authinfo->assoc_key != NULL)
+		sctp_free_key(authinfo->assoc_key);
+	if (authinfo->recv_key != NULL)
+		sctp_free_key(authinfo->recv_key);
+
+	/* We are NOT dynamically allocating authinfo's right now... */
+	/* SCTP_FREE(authinfo, SCTP_M_AUTH_??); */
+}
+
+
+uint32_t
+sctp_get_auth_chunk_len(uint16_t hmac_algo)
+{
+	int size;
+
+	size = sizeof(struct sctp_auth_chunk) + sctp_get_hmac_digest_len(hmac_algo);
+	return (SCTP_SIZE32(size));
+}
+
+uint32_t
+sctp_get_hmac_digest_len(uint16_t hmac_algo)
+{
+	switch (hmac_algo) {
+	case SCTP_AUTH_HMAC_ID_SHA1:
+		return (SCTP_AUTH_DIGEST_LEN_SHA1);
+#ifdef HAVE_SHA224
+	case SCTP_AUTH_HMAC_ID_SHA224:
+		return (SCTP_AUTH_DIGEST_LEN_SHA224);
+#endif
+#ifdef HAVE_SHA2
+	case SCTP_AUTH_HMAC_ID_SHA256:
+		return (SCTP_AUTH_DIGEST_LEN_SHA256);
+	case SCTP_AUTH_HMAC_ID_SHA384:
+		return (SCTP_AUTH_DIGEST_LEN_SHA384);
+	case SCTP_AUTH_HMAC_ID_SHA512:
+		return (SCTP_AUTH_DIGEST_LEN_SHA512);
+#endif
+	default:
+		/* unknown HMAC algorithm: can't do anything */
+		return (0);
+	}			/* end switch */
+}
+
+static inline int
+sctp_get_hmac_block_len(uint16_t hmac_algo)
+{
+	switch (hmac_algo) {
+	case SCTP_AUTH_HMAC_ID_SHA1:
+#ifdef HAVE_SHA224
+	case SCTP_AUTH_HMAC_ID_SHA224:
+#endif
+		return (64);
+#ifdef HAVE_SHA2
+	case SCTP_AUTH_HMAC_ID_SHA256:
+		return (64);
+	case SCTP_AUTH_HMAC_ID_SHA384:
+	case SCTP_AUTH_HMAC_ID_SHA512:
+		return (128);
+#endif
+	case SCTP_AUTH_HMAC_ID_RSVD:
+	default:
+		/* unknown HMAC algorithm: can't do anything */
+		return (0);
+	}			/* end switch */
+}
+
+static void
+sctp_hmac_init(uint16_t hmac_algo, sctp_hash_context_t * ctx)
+{
+	switch (hmac_algo) {
+	case SCTP_AUTH_HMAC_ID_SHA1:
+		SHA1_Init(&ctx->sha1);
+		break;
+#ifdef HAVE_SHA224
+	case SCTP_AUTH_HMAC_ID_SHA224:
+		break;
+#endif
+#ifdef HAVE_SHA2
+	case SCTP_AUTH_HMAC_ID_SHA256:
+		SHA256_Init(&ctx->sha256);
+		break;
+	case SCTP_AUTH_HMAC_ID_SHA384:
+		SHA384_Init(&ctx->sha384);
+		break;
+	case SCTP_AUTH_HMAC_ID_SHA512:
+		SHA512_Init(&ctx->sha512);
+		break;
+#endif
+	case SCTP_AUTH_HMAC_ID_RSVD:
+	default:
+		/* unknown HMAC algorithm: can't do anything */
+		return;
+	}			/* end switch */
+}
+
+static void
+sctp_hmac_update(uint16_t hmac_algo, sctp_hash_context_t * ctx,
+    uint8_t * text, uint32_t textlen)
+{
+	switch (hmac_algo) {
+	case SCTP_AUTH_HMAC_ID_SHA1:
+		SHA1_Update(&ctx->sha1, text, textlen);
+		break;
+#ifdef HAVE_SHA224
+	case SCTP_AUTH_HMAC_ID_SHA224:
+		break;
+#endif
+#ifdef HAVE_SHA2
+	case SCTP_AUTH_HMAC_ID_SHA256:
+		SHA256_Update(&ctx->sha256, text, textlen);
+		break;
+	case SCTP_AUTH_HMAC_ID_SHA384:
+		SHA384_Update(&ctx->sha384, text, textlen);
+		break;
+	case SCTP_AUTH_HMAC_ID_SHA512:
+		SHA512_Update(&ctx->sha512, text, textlen);
+		break;
+#endif
+	case SCTP_AUTH_HMAC_ID_RSVD:
+	default:
+		/* unknown HMAC algorithm: can't do anything */
+		return;
+	}			/* end switch */
+}
+
+static void
+sctp_hmac_final(uint16_t hmac_algo, sctp_hash_context_t * ctx,
+    uint8_t * digest)
+{
+	switch (hmac_algo) {
+	case SCTP_AUTH_HMAC_ID_SHA1:
+		SHA1_Final(digest, &ctx->sha1);
+		break;
+#ifdef HAVE_SHA224
+	case SCTP_AUTH_HMAC_ID_SHA224:
+		break;
+#endif
+#ifdef HAVE_SHA2
+	case SCTP_AUTH_HMAC_ID_SHA256:
+		SHA256_Final(digest, &ctx->sha256);
+		break;
+	case SCTP_AUTH_HMAC_ID_SHA384:
+		/* SHA384 is truncated SHA512 */
+		SHA384_Final(digest, &ctx->sha384);
+		break;
+	case SCTP_AUTH_HMAC_ID_SHA512:
+		SHA512_Final(digest, &ctx->sha512);
+		break;
+#endif
+	case SCTP_AUTH_HMAC_ID_RSVD:
+	default:
+		/* unknown HMAC algorithm: can't do anything */
+		return;
+	}			/* end switch */
+}
+
+/*-
+ * Keyed-Hashing for Message Authentication: FIPS 198 (RFC 2104)
+ *
+ * Compute the HMAC digest using the desired hash key, text, and HMAC
+ * algorithm.  Resulting digest is placed in 'digest' and digest length
+ * is returned, if the HMAC was performed.
+ *
+ * WARNING: it is up to the caller to supply sufficient space to hold the
+ * resultant digest.
+ */
+uint32_t
+sctp_hmac(uint16_t hmac_algo, uint8_t * key, uint32_t keylen,
+    uint8_t * text, uint32_t textlen, uint8_t * digest)
+{
+	uint32_t digestlen;
+	uint32_t blocklen;
+	sctp_hash_context_t ctx;
+	uint8_t ipad[128], opad[128];	/* keyed hash inner/outer pads */
+	uint8_t temp[SCTP_AUTH_DIGEST_LEN_MAX];
+	uint32_t i;
+
+	/* sanity check the material and length */
+	if ((key == NULL) || (keylen == 0) || (text == NULL) ||
+	    (textlen == 0) || (digest == NULL)) {
+		/* can't do HMAC with empty key or text or digest store */
+		return (0);
+	}
+	/* validate the hmac algo and get the digest length */
+	digestlen = sctp_get_hmac_digest_len(hmac_algo);
+	if (digestlen == 0)
+		return (0);
+
+	/* hash the key if it is longer than the hash block size */
+	blocklen = sctp_get_hmac_block_len(hmac_algo);
+	if (keylen > blocklen) {
+		sctp_hmac_init(hmac_algo, &ctx);
+		sctp_hmac_update(hmac_algo, &ctx, key, keylen);
+		sctp_hmac_final(hmac_algo, &ctx, temp);
+		/* set the hashed key as the key */
+		keylen = digestlen;
+		key = temp;
+	}
+	/* initialize the inner/outer pads with the key and "append" zeroes */
+	bzero(ipad, blocklen);
+	bzero(opad, blocklen);
+	bcopy(key, ipad, keylen);
+	bcopy(key, opad, keylen);
+
+	/* XOR the key with ipad and opad values */
+	for (i = 0; i < blocklen; i++) {
+		ipad[i] ^= 0x36;
+		opad[i] ^= 0x5c;
+	}
+
+	/* perform inner hash */
+	sctp_hmac_init(hmac_algo, &ctx);
+	sctp_hmac_update(hmac_algo, &ctx, ipad, blocklen);
+	sctp_hmac_update(hmac_algo, &ctx, text, textlen);
+	sctp_hmac_final(hmac_algo, &ctx, temp);
+
+	/* perform outer hash */
+	sctp_hmac_init(hmac_algo, &ctx);
+	sctp_hmac_update(hmac_algo, &ctx, opad, blocklen);
+	sctp_hmac_update(hmac_algo, &ctx, temp, digestlen);
+	sctp_hmac_final(hmac_algo, &ctx, digest);
+
+	return (digestlen);
+}
+
+/* mbuf version */
+uint32_t
+sctp_hmac_m(uint16_t hmac_algo, uint8_t * key, uint32_t keylen,
+    struct mbuf *m, uint32_t m_offset, uint8_t * digest, uint32_t trailer)
+{
+	uint32_t digestlen;
+	uint32_t blocklen;
+	sctp_hash_context_t ctx;
+	uint8_t ipad[128], opad[128];	/* keyed hash inner/outer pads */
+	uint8_t temp[SCTP_AUTH_DIGEST_LEN_MAX];
+	uint32_t i;
+	struct mbuf *m_tmp;
+
+	/* sanity check the material and length */
+	if ((key == NULL) || (keylen == 0) || (m == NULL) || (digest == NULL)) {
+		/* can't do HMAC with empty key or text or digest store */
+		return (0);
+	}
+	/* validate the hmac algo and get the digest length */
+	digestlen = sctp_get_hmac_digest_len(hmac_algo);
+	if (digestlen == 0)
+		return (0);
+
+	/* hash the key if it is longer than the hash block size */
+	blocklen = sctp_get_hmac_block_len(hmac_algo);
+	if (keylen > blocklen) {
+		sctp_hmac_init(hmac_algo, &ctx);
+		sctp_hmac_update(hmac_algo, &ctx, key, keylen);
+		sctp_hmac_final(hmac_algo, &ctx, temp);
+		/* set the hashed key as the key */
+		keylen = digestlen;
+		key = temp;
+	}
+	/* initialize the inner/outer pads with the key and "append" zeroes */
+	bzero(ipad, blocklen);
+	bzero(opad, blocklen);
+	bcopy(key, ipad, keylen);
+	bcopy(key, opad, keylen);
+
+	/* XOR the key with ipad and opad values */
+	for (i = 0; i < blocklen; i++) {
+		ipad[i] ^= 0x36;
+		opad[i] ^= 0x5c;
+	}
+
+	/* perform inner hash */
+	sctp_hmac_init(hmac_algo, &ctx);
+	sctp_hmac_update(hmac_algo, &ctx, ipad, blocklen);
+	/* find the correct starting mbuf and offset (get start of text) */
+	m_tmp = m;
+	while ((m_tmp != NULL) && (m_offset >= (uint32_t) SCTP_BUF_LEN(m_tmp))) {
+		m_offset -= SCTP_BUF_LEN(m_tmp);
+		m_tmp = SCTP_BUF_NEXT(m_tmp);
+	}
+	/* now use the rest of the mbuf chain for the text */
+	while (m_tmp != NULL) {
+		if ((SCTP_BUF_NEXT(m_tmp) == NULL) && trailer) {
+			sctp_hmac_update(hmac_algo, &ctx, mtod(m_tmp, uint8_t *) + m_offset,
+			    SCTP_BUF_LEN(m_tmp) - (trailer + m_offset));
+		} else {
+			sctp_hmac_update(hmac_algo, &ctx, mtod(m_tmp, uint8_t *) + m_offset,
+			    SCTP_BUF_LEN(m_tmp) - m_offset);
+		}
+
+		/* clear the offset since it's only for the first mbuf */
+		m_offset = 0;
+		m_tmp = SCTP_BUF_NEXT(m_tmp);
+	}
+	sctp_hmac_final(hmac_algo, &ctx, temp);
+
+	/* perform outer hash */
+	sctp_hmac_init(hmac_algo, &ctx);
+	sctp_hmac_update(hmac_algo, &ctx, opad, blocklen);
+	sctp_hmac_update(hmac_algo, &ctx, temp, digestlen);
+	sctp_hmac_final(hmac_algo, &ctx, digest);
+
+	return (digestlen);
+}
+
+/*-
+ * verify the HMAC digest using the desired hash key, text, and HMAC
+ * algorithm.
+ * Returns -1 on error, 0 on success.
+ */
+int
+sctp_verify_hmac(uint16_t hmac_algo, uint8_t * key, uint32_t keylen,
+    uint8_t * text, uint32_t textlen,
+    uint8_t * digest, uint32_t digestlen)
+{
+	uint32_t len;
+	uint8_t temp[SCTP_AUTH_DIGEST_LEN_MAX];
+
+	/* sanity check the material and length */
+	if ((key == NULL) || (keylen == 0) ||
+	    (text == NULL) || (textlen == 0) || (digest == NULL)) {
+		/* can't do HMAC with empty key or text or digest */
+		return (-1);
+	}
+	len = sctp_get_hmac_digest_len(hmac_algo);
+	if ((len == 0) || (digestlen != len))
+		return (-1);
+
+	/* compute the expected hash */
+	if (sctp_hmac(hmac_algo, key, keylen, text, textlen, temp) != len)
+		return (-1);
+
+	if (memcmp(digest, temp, digestlen) != 0)
+		return (-1);
+	else
+		return (0);
+}
+
+
+/*
+ * computes the requested HMAC using a key struct (which may be modified if
+ * the keylen exceeds the HMAC block len).
+ */
+uint32_t
+sctp_compute_hmac(uint16_t hmac_algo, sctp_key_t * key, uint8_t * text,
+    uint32_t textlen, uint8_t * digest)
+{
+	uint32_t digestlen;
+	uint32_t blocklen;
+	sctp_hash_context_t ctx;
+	uint8_t temp[SCTP_AUTH_DIGEST_LEN_MAX];
+
+	/* sanity check */
+	if ((key == NULL) || (text == NULL) || (textlen == 0) ||
+	    (digest == NULL)) {
+		/* can't do HMAC with empty key or text or digest store */
+		return (0);
+	}
+	/* validate the hmac algo and get the digest length */
+	digestlen = sctp_get_hmac_digest_len(hmac_algo);
+	if (digestlen == 0)
+		return (0);
+
+	/* hash the key if it is longer than the hash block size */
+	blocklen = sctp_get_hmac_block_len(hmac_algo);
+	if (key->keylen > blocklen) {
+		sctp_hmac_init(hmac_algo, &ctx);
+		sctp_hmac_update(hmac_algo, &ctx, key->key, key->keylen);
+		sctp_hmac_final(hmac_algo, &ctx, temp);
+		/* save the hashed key as the new key */
+		key->keylen = digestlen;
+		bcopy(temp, key->key, key->keylen);
+	}
+	return (sctp_hmac(hmac_algo, key->key, key->keylen, text, textlen,
+	    digest));
+}
+
+/* mbuf version */
+uint32_t
+sctp_compute_hmac_m(uint16_t hmac_algo, sctp_key_t * key, struct mbuf *m,
+    uint32_t m_offset, uint8_t * digest)
+{
+	uint32_t digestlen;
+	uint32_t blocklen;
+	sctp_hash_context_t ctx;
+	uint8_t temp[SCTP_AUTH_DIGEST_LEN_MAX];
+
+	/* sanity check */
+	if ((key == NULL) || (m == NULL) || (digest == NULL)) {
+		/* can't do HMAC with empty key or text or digest store */
+		return (0);
+	}
+	/* validate the hmac algo and get the digest length */
+	digestlen = sctp_get_hmac_digest_len(hmac_algo);
+	if (digestlen == 0)
+		return (0);
+
+	/* hash the key if it is longer than the hash block size */
+	blocklen = sctp_get_hmac_block_len(hmac_algo);
+	if (key->keylen > blocklen) {
+		sctp_hmac_init(hmac_algo, &ctx);
+		sctp_hmac_update(hmac_algo, &ctx, key->key, key->keylen);
+		sctp_hmac_final(hmac_algo, &ctx, temp);
+		/* save the hashed key as the new key */
+		key->keylen = digestlen;
+		bcopy(temp, key->key, key->keylen);
+	}
+	return (sctp_hmac_m(hmac_algo, key->key, key->keylen, m, m_offset, digest, 0));
+}
+
+int
+sctp_auth_is_supported_hmac(sctp_hmaclist_t * list, uint16_t id)
+{
+	int i;
+
+	if ((list == NULL) || (id == SCTP_AUTH_HMAC_ID_RSVD))
+		return (0);
+
+	for (i = 0; i < list->num_algo; i++)
+		if (list->hmac[i] == id)
+			return (1);
+
+	/* not in the list */
+	return (0);
+}
+
+
+/*-
+ * clear any cached key(s) if they match the given key id on an association.
+ * the cached key(s) will be recomputed and re-cached at next use.
+ * ASSUMES TCB_LOCK is already held
+ */
+void
+sctp_clear_cachedkeys(struct sctp_tcb *stcb, uint16_t keyid)
+{
+	if (stcb == NULL)
+		return;
+
+	if (keyid == stcb->asoc.authinfo.assoc_keyid) {
+		sctp_free_key(stcb->asoc.authinfo.assoc_key);
+		stcb->asoc.authinfo.assoc_key = NULL;
+	}
+	if (keyid == stcb->asoc.authinfo.recv_keyid) {
+		sctp_free_key(stcb->asoc.authinfo.recv_key);
+		stcb->asoc.authinfo.recv_key = NULL;
+	}
+}
+
+/*-
+ * clear any cached key(s) if they match the given key id for all assocs on
+ * an endpoint.
+ * ASSUMES INP_WLOCK is already held
+ */
+void
+sctp_clear_cachedkeys_ep(struct sctp_inpcb *inp, uint16_t keyid)
+{
+	struct sctp_tcb *stcb;
+
+	if (inp == NULL)
+		return;
+
+	/* clear the cached keys on all assocs on this instance */
+	LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+		SCTP_TCB_LOCK(stcb);
+		sctp_clear_cachedkeys(stcb, keyid);
+		SCTP_TCB_UNLOCK(stcb);
+	}
+}
+
+/*-
+ * delete a shared key from an association
+ * ASSUMES TCB_LOCK is already held
+ */
+int
+sctp_delete_sharedkey(struct sctp_tcb *stcb, uint16_t keyid)
+{
+	sctp_sharedkey_t *skey;
+
+	if (stcb == NULL)
+		return (-1);
+
+	/* is the keyid the assoc active sending key */
+	if (keyid == stcb->asoc.authinfo.active_keyid)
+		return (-1);
+
+	/* does the key exist? */
+	skey = sctp_find_sharedkey(&stcb->asoc.shared_keys, keyid);
+	if (skey == NULL)
+		return (-1);
+
+	/* are there other refcount holders on the key? */
+	if (skey->refcount > 1)
+		return (-1);
+
+	/* remove it */
+	LIST_REMOVE(skey, next);
+	sctp_free_sharedkey(skey);	/* frees skey->key as well */
+
+	/* clear any cached keys */
+	sctp_clear_cachedkeys(stcb, keyid);
+	return (0);
+}
+
+/*-
+ * deletes a shared key from the endpoint
+ * ASSUMES INP_WLOCK is already held
+ */
+int
+sctp_delete_sharedkey_ep(struct sctp_inpcb *inp, uint16_t keyid)
+{
+	sctp_sharedkey_t *skey;
+
+	if (inp == NULL)
+		return (-1);
+
+	/* is the keyid the active sending key on the endpoint */
+	if (keyid == inp->sctp_ep.default_keyid)
+		return (-1);
+
+	/* does the key exist? */
+	skey = sctp_find_sharedkey(&inp->sctp_ep.shared_keys, keyid);
+	if (skey == NULL)
+		return (-1);
+
+	/* endpoint keys are not refcounted */
+
+	/* remove it */
+	LIST_REMOVE(skey, next);
+	sctp_free_sharedkey(skey);	/* frees skey->key as well */
+
+	/* clear any cached keys */
+	sctp_clear_cachedkeys_ep(inp, keyid);
+	return (0);
+}
+
+/*-
+ * set the active key on an association
+ * ASSUMES TCB_LOCK is already held
+ */
+int
+sctp_auth_setactivekey(struct sctp_tcb *stcb, uint16_t keyid)
+{
+	sctp_sharedkey_t *skey = NULL;
+
+	/* find the key on the assoc */
+	skey = sctp_find_sharedkey(&stcb->asoc.shared_keys, keyid);
+	if (skey == NULL) {
+		/* that key doesn't exist */
+		return (-1);
+	}
+	if ((skey->deactivated) && (skey->refcount > 1)) {
+		/* can't reactivate a deactivated key with other refcounts */
+		return (-1);
+	}
+	/* set the (new) active key */
+	stcb->asoc.authinfo.active_keyid = keyid;
+	/* reset the deactivated flag */
+	skey->deactivated = 0;
+
+	return (0);
+}
+
+/*-
+ * set the active key on an endpoint
+ * ASSUMES INP_WLOCK is already held
+ */
+int
+sctp_auth_setactivekey_ep(struct sctp_inpcb *inp, uint16_t keyid)
+{
+	sctp_sharedkey_t *skey;
+
+	/* find the key */
+	skey = sctp_find_sharedkey(&inp->sctp_ep.shared_keys, keyid);
+	if (skey == NULL) {
+		/* that key doesn't exist */
+		return (-1);
+	}
+	inp->sctp_ep.default_keyid = keyid;
+	return (0);
+}
+
+/*-
+ * deactivates a shared key from the association
+ * ASSUMES INP_WLOCK is already held
+ */
+int
+sctp_deact_sharedkey(struct sctp_tcb *stcb, uint16_t keyid)
+{
+	sctp_sharedkey_t *skey;
+
+	if (stcb == NULL)
+		return (-1);
+
+	/* is the keyid the assoc active sending key */
+	if (keyid == stcb->asoc.authinfo.active_keyid)
+		return (-1);
+
+	/* does the key exist? */
+	skey = sctp_find_sharedkey(&stcb->asoc.shared_keys, keyid);
+	if (skey == NULL)
+		return (-1);
+
+	/* are there other refcount holders on the key? */
+	if (skey->refcount == 1) {
+		/* no other users, send a notification for this key */
+		sctp_ulp_notify(SCTP_NOTIFY_AUTH_FREE_KEY, stcb, keyid, 0,
+		    SCTP_SO_LOCKED);
+	}
+	/* mark the key as deactivated */
+	skey->deactivated = 1;
+
+	return (0);
+}
+
+/*-
+ * deactivates a shared key from the endpoint
+ * ASSUMES INP_WLOCK is already held
+ */
+int
+sctp_deact_sharedkey_ep(struct sctp_inpcb *inp, uint16_t keyid)
+{
+	sctp_sharedkey_t *skey;
+
+	if (inp == NULL)
+		return (-1);
+
+	/* is the keyid the active sending key on the endpoint */
+	if (keyid == inp->sctp_ep.default_keyid)
+		return (-1);
+
+	/* does the key exist? */
+	skey = sctp_find_sharedkey(&inp->sctp_ep.shared_keys, keyid);
+	if (skey == NULL)
+		return (-1);
+
+	/* endpoint keys are not refcounted */
+
+	/* remove it */
+	LIST_REMOVE(skey, next);
+	sctp_free_sharedkey(skey);	/* frees skey->key as well */
+
+	return (0);
+}
+
+/*
+ * get local authentication parameters from cookie (from INIT-ACK)
+ */
+void
+sctp_auth_get_cookie_params(struct sctp_tcb *stcb, struct mbuf *m,
+    uint32_t offset, uint32_t length)
+{
+	struct sctp_paramhdr *phdr, tmp_param;
+	uint16_t plen, ptype;
+	uint8_t random_store[SCTP_PARAM_BUFFER_SIZE];
+	struct sctp_auth_random *p_random = NULL;
+	uint16_t random_len = 0;
+	uint8_t hmacs_store[SCTP_PARAM_BUFFER_SIZE];
+	struct sctp_auth_hmac_algo *hmacs = NULL;
+	uint16_t hmacs_len = 0;
+	uint8_t chunks_store[SCTP_PARAM_BUFFER_SIZE];
+	struct sctp_auth_chunk_list *chunks = NULL;
+	uint16_t num_chunks = 0;
+	sctp_key_t *new_key;
+	uint32_t keylen;
+
+	/* convert to upper bound */
+	length += offset;
+
+	phdr = (struct sctp_paramhdr *)sctp_m_getptr(m, offset,
+	    sizeof(struct sctp_paramhdr), (uint8_t *) & tmp_param);
+	while (phdr != NULL) {
+		ptype = ntohs(phdr->param_type);
+		plen = ntohs(phdr->param_length);
+
+		if ((plen == 0) || (offset + plen > length))
+			break;
+
+		if (ptype == SCTP_RANDOM) {
+			if (plen > sizeof(random_store))
+				break;
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)random_store, min(plen, sizeof(random_store)));
+			if (phdr == NULL)
+				return;
+			/* save the random and length for the key */
+			p_random = (struct sctp_auth_random *)phdr;
+			random_len = plen - sizeof(*p_random);
+		} else if (ptype == SCTP_HMAC_LIST) {
+			int num_hmacs;
+			int i;
+
+			if (plen > sizeof(hmacs_store))
+				break;
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)hmacs_store, min(plen, sizeof(hmacs_store)));
+			if (phdr == NULL)
+				return;
+			/* save the hmacs list and num for the key */
+			hmacs = (struct sctp_auth_hmac_algo *)phdr;
+			hmacs_len = plen - sizeof(*hmacs);
+			num_hmacs = hmacs_len / sizeof(hmacs->hmac_ids[0]);
+			if (stcb->asoc.local_hmacs != NULL)
+				sctp_free_hmaclist(stcb->asoc.local_hmacs);
+			stcb->asoc.local_hmacs = sctp_alloc_hmaclist(num_hmacs);
+			if (stcb->asoc.local_hmacs != NULL) {
+				for (i = 0; i < num_hmacs; i++) {
+					(void)sctp_auth_add_hmacid(stcb->asoc.local_hmacs,
+					    ntohs(hmacs->hmac_ids[i]));
+				}
+			}
+		} else if (ptype == SCTP_CHUNK_LIST) {
+			int i;
+
+			if (plen > sizeof(chunks_store))
+				break;
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)chunks_store, min(plen, sizeof(chunks_store)));
+			if (phdr == NULL)
+				return;
+			chunks = (struct sctp_auth_chunk_list *)phdr;
+			num_chunks = plen - sizeof(*chunks);
+			/* save chunks list and num for the key */
+			if (stcb->asoc.local_auth_chunks != NULL)
+				sctp_clear_chunklist(stcb->asoc.local_auth_chunks);
+			else
+				stcb->asoc.local_auth_chunks = sctp_alloc_chunklist();
+			for (i = 0; i < num_chunks; i++) {
+				(void)sctp_auth_add_chunk(chunks->chunk_types[i],
+				    stcb->asoc.local_auth_chunks);
+			}
+		}
+		/* get next parameter */
+		offset += SCTP_SIZE32(plen);
+		if (offset + sizeof(struct sctp_paramhdr) > length)
+			break;
+		phdr = (struct sctp_paramhdr *)sctp_m_getptr(m, offset, sizeof(struct sctp_paramhdr),
+		    (uint8_t *) & tmp_param);
+	}
+	/* concatenate the full random key */
+	keylen = sizeof(*p_random) + random_len + sizeof(*hmacs) + hmacs_len;
+	if (chunks != NULL) {
+		keylen += sizeof(*chunks) + num_chunks;
+	}
+	new_key = sctp_alloc_key(keylen);
+	if (new_key != NULL) {
+		/* copy in the RANDOM */
+		if (p_random != NULL) {
+			keylen = sizeof(*p_random) + random_len;
+			bcopy(p_random, new_key->key, keylen);
+		}
+		/* append in the AUTH chunks */
+		if (chunks != NULL) {
+			bcopy(chunks, new_key->key + keylen,
+			    sizeof(*chunks) + num_chunks);
+			keylen += sizeof(*chunks) + num_chunks;
+		}
+		/* append in the HMACs */
+		if (hmacs != NULL) {
+			bcopy(hmacs, new_key->key + keylen,
+			    sizeof(*hmacs) + hmacs_len);
+		}
+	}
+	if (stcb->asoc.authinfo.random != NULL)
+		sctp_free_key(stcb->asoc.authinfo.random);
+	stcb->asoc.authinfo.random = new_key;
+	stcb->asoc.authinfo.random_len = random_len;
+	sctp_clear_cachedkeys(stcb, stcb->asoc.authinfo.assoc_keyid);
+	sctp_clear_cachedkeys(stcb, stcb->asoc.authinfo.recv_keyid);
+
+	/* negotiate what HMAC to use for the peer */
+	stcb->asoc.peer_hmac_id = sctp_negotiate_hmacid(stcb->asoc.peer_hmacs,
+	    stcb->asoc.local_hmacs);
+
+	/* copy defaults from the endpoint */
+	/* FIX ME: put in cookie? */
+	stcb->asoc.authinfo.active_keyid = stcb->sctp_ep->sctp_ep.default_keyid;
+	/* copy out the shared key list (by reference) from the endpoint */
+	(void)sctp_copy_skeylist(&stcb->sctp_ep->sctp_ep.shared_keys,
+	    &stcb->asoc.shared_keys);
+}
+
+/*
+ * compute and fill in the HMAC digest for a packet
+ */
+void
+sctp_fill_hmac_digest_m(struct mbuf *m, uint32_t auth_offset,
+    struct sctp_auth_chunk *auth, struct sctp_tcb *stcb, uint16_t keyid)
+{
+	uint32_t digestlen;
+	sctp_sharedkey_t *skey;
+	sctp_key_t *key;
+
+	if ((stcb == NULL) || (auth == NULL))
+		return;
+
+	/* zero the digest + chunk padding */
+	digestlen = sctp_get_hmac_digest_len(stcb->asoc.peer_hmac_id);
+	bzero(auth->hmac, SCTP_SIZE32(digestlen));
+
+	/* is the desired key cached? */
+	if ((keyid != stcb->asoc.authinfo.assoc_keyid) ||
+	    (stcb->asoc.authinfo.assoc_key == NULL)) {
+		if (stcb->asoc.authinfo.assoc_key != NULL) {
+			/* free the old cached key */
+			sctp_free_key(stcb->asoc.authinfo.assoc_key);
+		}
+		skey = sctp_find_sharedkey(&stcb->asoc.shared_keys, keyid);
+		/* the only way skey is NULL is if null key id 0 is used */
+		if (skey != NULL)
+			key = skey->key;
+		else
+			key = NULL;
+		/* compute a new assoc key and cache it */
+		stcb->asoc.authinfo.assoc_key =
+		    sctp_compute_hashkey(stcb->asoc.authinfo.random,
+		    stcb->asoc.authinfo.peer_random, key);
+		stcb->asoc.authinfo.assoc_keyid = keyid;
+		SCTPDBG(SCTP_DEBUG_AUTH1, "caching key id %u\n",
+		    stcb->asoc.authinfo.assoc_keyid);
+#ifdef SCTP_DEBUG
+		if (SCTP_AUTH_DEBUG)
+			sctp_print_key(stcb->asoc.authinfo.assoc_key,
+			    "Assoc Key");
+#endif
+	}
+	/* set in the active key id */
+	auth->shared_key_id = htons(keyid);
+
+	/* compute and fill in the digest */
+	(void)sctp_compute_hmac_m(stcb->asoc.peer_hmac_id, stcb->asoc.authinfo.assoc_key,
+	    m, auth_offset, auth->hmac);
+}
+
+
+static void
+sctp_bzero_m(struct mbuf *m, uint32_t m_offset, uint32_t size)
+{
+	struct mbuf *m_tmp;
+	uint8_t *data;
+
+	/* sanity check */
+	if (m == NULL)
+		return;
+
+	/* find the correct starting mbuf and offset (get start position) */
+	m_tmp = m;
+	while ((m_tmp != NULL) && (m_offset >= (uint32_t) SCTP_BUF_LEN(m_tmp))) {
+		m_offset -= SCTP_BUF_LEN(m_tmp);
+		m_tmp = SCTP_BUF_NEXT(m_tmp);
+	}
+	/* now use the rest of the mbuf chain */
+	while ((m_tmp != NULL) && (size > 0)) {
+		data = mtod(m_tmp, uint8_t *) + m_offset;
+		if (size > (uint32_t) SCTP_BUF_LEN(m_tmp)) {
+			bzero(data, SCTP_BUF_LEN(m_tmp));
+			size -= SCTP_BUF_LEN(m_tmp);
+		} else {
+			bzero(data, size);
+			size = 0;
+		}
+		/* clear the offset since it's only for the first mbuf */
+		m_offset = 0;
+		m_tmp = SCTP_BUF_NEXT(m_tmp);
+	}
+}
+
+/*-
+ * process the incoming Authentication chunk
+ * return codes:
+ *   -1 on any authentication error
+ *    0 on authentication verification
+ */
+int
+sctp_handle_auth(struct sctp_tcb *stcb, struct sctp_auth_chunk *auth,
+    struct mbuf *m, uint32_t offset)
+{
+	uint16_t chunklen;
+	uint16_t shared_key_id;
+	uint16_t hmac_id;
+	sctp_sharedkey_t *skey;
+	uint32_t digestlen;
+	uint8_t digest[SCTP_AUTH_DIGEST_LEN_MAX];
+	uint8_t computed_digest[SCTP_AUTH_DIGEST_LEN_MAX];
+
+	/* auth is checked for NULL by caller */
+	chunklen = ntohs(auth->ch.chunk_length);
+	if (chunklen < sizeof(*auth)) {
+		SCTP_STAT_INCR(sctps_recvauthfailed);
+		return (-1);
+	}
+	SCTP_STAT_INCR(sctps_recvauth);
+
+	/* get the auth params */
+	shared_key_id = ntohs(auth->shared_key_id);
+	hmac_id = ntohs(auth->hmac_id);
+	SCTPDBG(SCTP_DEBUG_AUTH1,
+	    "SCTP AUTH Chunk: shared key %u, HMAC id %u\n",
+	    shared_key_id, hmac_id);
+
+	/* is the indicated HMAC supported? */
+	if (!sctp_auth_is_supported_hmac(stcb->asoc.local_hmacs, hmac_id)) {
+		struct mbuf *m_err;
+		struct sctp_auth_invalid_hmac *err;
+
+		SCTP_STAT_INCR(sctps_recvivalhmacid);
+		SCTPDBG(SCTP_DEBUG_AUTH1,
+		    "SCTP Auth: unsupported HMAC id %u\n",
+		    hmac_id);
+		/*
+		 * report this in an Error Chunk: Unsupported HMAC
+		 * Identifier
+		 */
+		m_err = sctp_get_mbuf_for_msg(sizeof(*err), 0, M_DONTWAIT,
+		    1, MT_HEADER);
+		if (m_err != NULL) {
+			/* pre-reserve some space */
+			SCTP_BUF_RESV_UF(m_err, sizeof(struct sctp_chunkhdr));
+			/* fill in the error */
+			err = mtod(m_err, struct sctp_auth_invalid_hmac *);
+			bzero(err, sizeof(*err));
+			err->ph.param_type = htons(SCTP_CAUSE_UNSUPPORTED_HMACID);
+			err->ph.param_length = htons(sizeof(*err));
+			err->hmac_id = ntohs(hmac_id);
+			SCTP_BUF_LEN(m_err) = sizeof(*err);
+			/* queue it */
+			sctp_queue_op_err(stcb, m_err);
+		}
+		return (-1);
+	}
+	/* get the indicated shared key, if available */
+	if ((stcb->asoc.authinfo.recv_key == NULL) ||
+	    (stcb->asoc.authinfo.recv_keyid != shared_key_id)) {
+		/* find the shared key on the assoc first */
+		skey = sctp_find_sharedkey(&stcb->asoc.shared_keys,
+		    shared_key_id);
+		/* if the shared key isn't found, discard the chunk */
+		if (skey == NULL) {
+			SCTP_STAT_INCR(sctps_recvivalkeyid);
+			SCTPDBG(SCTP_DEBUG_AUTH1,
+			    "SCTP Auth: unknown key id %u\n",
+			    shared_key_id);
+			return (-1);
+		}
+		/* generate a notification if this is a new key id */
+		if (stcb->asoc.authinfo.recv_keyid != shared_key_id)
+			/*
+			 * sctp_ulp_notify(SCTP_NOTIFY_AUTH_NEW_KEY, stcb,
+			 * shared_key_id, (void
+			 * *)stcb->asoc.authinfo.recv_keyid);
+			 */
+			sctp_notify_authentication(stcb, SCTP_AUTH_NEWKEY,
+			    shared_key_id, stcb->asoc.authinfo.recv_keyid,
+			    SCTP_SO_NOT_LOCKED);
+		/* compute a new recv assoc key and cache it */
+		if (stcb->asoc.authinfo.recv_key != NULL)
+			sctp_free_key(stcb->asoc.authinfo.recv_key);
+		stcb->asoc.authinfo.recv_key =
+		    sctp_compute_hashkey(stcb->asoc.authinfo.random,
+		    stcb->asoc.authinfo.peer_random, skey->key);
+		stcb->asoc.authinfo.recv_keyid = shared_key_id;
+#ifdef SCTP_DEBUG
+		if (SCTP_AUTH_DEBUG)
+			sctp_print_key(stcb->asoc.authinfo.recv_key, "Recv Key");
+#endif
+	}
+	/* validate the digest length */
+	digestlen = sctp_get_hmac_digest_len(hmac_id);
+	if (chunklen < (sizeof(*auth) + digestlen)) {
+		/* invalid digest length */
+		SCTP_STAT_INCR(sctps_recvauthfailed);
+		SCTPDBG(SCTP_DEBUG_AUTH1,
+		    "SCTP Auth: chunk too short for HMAC\n");
+		return (-1);
+	}
+	/* save a copy of the digest, zero the pseudo header, and validate */
+	bcopy(auth->hmac, digest, digestlen);
+	sctp_bzero_m(m, offset + sizeof(*auth), SCTP_SIZE32(digestlen));
+	(void)sctp_compute_hmac_m(hmac_id, stcb->asoc.authinfo.recv_key,
+	    m, offset, computed_digest);
+
+	/* compare the computed digest with the one in the AUTH chunk */
+	if (memcmp(digest, computed_digest, digestlen) != 0) {
+		SCTP_STAT_INCR(sctps_recvauthfailed);
+		SCTPDBG(SCTP_DEBUG_AUTH1,
+		    "SCTP Auth: HMAC digest check failed\n");
+		return (-1);
+	}
+	return (0);
+}
+
+/*
+ * Generate NOTIFICATION
+ */
+void
+sctp_notify_authentication(struct sctp_tcb *stcb, uint32_t indication,
+    uint16_t keyid, uint16_t alt_keyid, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	struct mbuf *m_notify;
+	struct sctp_authkey_event *auth;
+	struct sctp_queued_to_read *control;
+
+	if ((stcb == NULL) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+	    (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)
+	    ) {
+		/* If the socket is gone we are out of here */
+		return;
+	}
+	if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_AUTHEVNT))
+		/* event not enabled */
+		return;
+
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_authkey_event),
+	    0, M_DONTWAIT, 1, MT_HEADER);
+	if (m_notify == NULL)
+		/* no space left */
+		return;
+
+	SCTP_BUF_LEN(m_notify) = 0;
+	auth = mtod(m_notify, struct sctp_authkey_event *);
+	auth->auth_type = SCTP_AUTHENTICATION_EVENT;
+	auth->auth_flags = 0;
+	auth->auth_length = sizeof(*auth);
+	auth->auth_keynumber = keyid;
+	auth->auth_altkeynumber = alt_keyid;
+	auth->auth_indication = indication;
+	auth->auth_assoc_id = sctp_get_associd(stcb);
+
+	SCTP_BUF_LEN(m_notify) = sizeof(*auth);
+	SCTP_BUF_NEXT(m_notify) = NULL;
+
+	/* append to socket */
+	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+	    0, 0, 0, 0, 0, 0, m_notify);
+	if (control == NULL) {
+		/* no memory */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	control->spec_flags = M_NOTIFICATION;
+	control->length = SCTP_BUF_LEN(m_notify);
+	/* not that we need this */
+	control->tail_mbuf = m_notify;
+	sctp_add_to_readq(stcb->sctp_ep, stcb, control,
+	    &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, so_locked);
+}
+
+
+/*-
+ * validates the AUTHentication related parameters in an INIT/INIT-ACK
+ * Note: currently only used for INIT as INIT-ACK is handled inline
+ * with sctp_load_addresses_from_init()
+ */
+int
+sctp_validate_init_auth_params(struct mbuf *m, int offset, int limit)
+{
+	struct sctp_paramhdr *phdr, parm_buf;
+	uint16_t ptype, plen;
+	int peer_supports_asconf = 0;
+	int peer_supports_auth = 0;
+	int got_random = 0, got_hmacs = 0, got_chklist = 0;
+	uint8_t saw_asconf = 0;
+	uint8_t saw_asconf_ack = 0;
+
+	/* go through each of the params. */
+	phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf));
+	while (phdr) {
+		ptype = ntohs(phdr->param_type);
+		plen = ntohs(phdr->param_length);
+
+		if (offset + plen > limit) {
+			break;
+		}
+		if (plen < sizeof(struct sctp_paramhdr)) {
+			break;
+		}
+		if (ptype == SCTP_SUPPORTED_CHUNK_EXT) {
+			/* A supported extension chunk */
+			struct sctp_supported_chunk_types_param *pr_supported;
+			uint8_t local_store[SCTP_PARAM_BUFFER_SIZE];
+			int num_ent, i;
+
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)&local_store, min(plen, sizeof(local_store)));
+			if (phdr == NULL) {
+				return (-1);
+			}
+			pr_supported = (struct sctp_supported_chunk_types_param *)phdr;
+			num_ent = plen - sizeof(struct sctp_paramhdr);
+			for (i = 0; i < num_ent; i++) {
+				switch (pr_supported->chunk_types[i]) {
+				case SCTP_ASCONF:
+				case SCTP_ASCONF_ACK:
+					peer_supports_asconf = 1;
+					break;
+				case SCTP_AUTHENTICATION:
+					peer_supports_auth = 1;
+					break;
+				default:
+					/* one we don't care about */
+					break;
+				}
+			}
+		} else if (ptype == SCTP_RANDOM) {
+			got_random = 1;
+			/* enforce the random length */
+			if (plen != (sizeof(struct sctp_auth_random) +
+			    SCTP_AUTH_RANDOM_SIZE_REQUIRED)) {
+				SCTPDBG(SCTP_DEBUG_AUTH1,
+				    "SCTP: invalid RANDOM len\n");
+				return (-1);
+			}
+		} else if (ptype == SCTP_HMAC_LIST) {
+			uint8_t store[SCTP_PARAM_BUFFER_SIZE];
+			struct sctp_auth_hmac_algo *hmacs;
+			int num_hmacs;
+
+			if (plen > sizeof(store))
+				break;
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)store, min(plen, sizeof(store)));
+			if (phdr == NULL)
+				return (-1);
+			hmacs = (struct sctp_auth_hmac_algo *)phdr;
+			num_hmacs = (plen - sizeof(*hmacs)) /
+			    sizeof(hmacs->hmac_ids[0]);
+			/* validate the hmac list */
+			if (sctp_verify_hmac_param(hmacs, num_hmacs)) {
+				SCTPDBG(SCTP_DEBUG_AUTH1,
+				    "SCTP: invalid HMAC param\n");
+				return (-1);
+			}
+			got_hmacs = 1;
+		} else if (ptype == SCTP_CHUNK_LIST) {
+			int i, num_chunks;
+			uint8_t chunks_store[SCTP_SMALL_CHUNK_STORE];
+
+			/* did the peer send a non-empty chunk list? */
+			struct sctp_auth_chunk_list *chunks = NULL;
+
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)chunks_store,
+			    min(plen, sizeof(chunks_store)));
+			if (phdr == NULL)
+				return (-1);
+
+			/*-
+			 * Flip through the list and mark that the
+			 * peer supports asconf/asconf_ack.
+			 */
+			chunks = (struct sctp_auth_chunk_list *)phdr;
+			num_chunks = plen - sizeof(*chunks);
+			for (i = 0; i < num_chunks; i++) {
+				/* record asconf/asconf-ack if listed */
+				if (chunks->chunk_types[i] == SCTP_ASCONF)
+					saw_asconf = 1;
+				if (chunks->chunk_types[i] == SCTP_ASCONF_ACK)
+					saw_asconf_ack = 1;
+
+			}
+			if (num_chunks)
+				got_chklist = 1;
+		}
+		offset += SCTP_SIZE32(plen);
+		if (offset >= limit) {
+			break;
+		}
+		phdr = sctp_get_next_param(m, offset, &parm_buf,
+		    sizeof(parm_buf));
+	}
+	/* validate authentication required parameters */
+	if (got_random && got_hmacs) {
+		peer_supports_auth = 1;
+	} else {
+		peer_supports_auth = 0;
+	}
+	if (!peer_supports_auth && got_chklist) {
+		SCTPDBG(SCTP_DEBUG_AUTH1,
+		    "SCTP: peer sent chunk list w/o AUTH\n");
+		return (-1);
+	}
+	if (!SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk) && peer_supports_asconf &&
+	    !peer_supports_auth) {
+		SCTPDBG(SCTP_DEBUG_AUTH1,
+		    "SCTP: peer supports ASCONF but not AUTH\n");
+		return (-1);
+	} else if ((peer_supports_asconf) && (peer_supports_auth) &&
+	    ((saw_asconf == 0) || (saw_asconf_ack == 0))) {
+		return (-2);
+	}
+	return (0);
+}
+
+void
+sctp_initialize_auth_params(struct sctp_inpcb *inp, struct sctp_tcb *stcb)
+{
+	uint16_t chunks_len = 0;
+	uint16_t hmacs_len = 0;
+	uint16_t random_len = SCTP_AUTH_RANDOM_SIZE_DEFAULT;
+	sctp_key_t *new_key;
+	uint16_t keylen;
+
+	/* initialize hmac list from endpoint */
+	stcb->asoc.local_hmacs = sctp_copy_hmaclist(inp->sctp_ep.local_hmacs);
+	if (stcb->asoc.local_hmacs != NULL) {
+		hmacs_len = stcb->asoc.local_hmacs->num_algo *
+		    sizeof(stcb->asoc.local_hmacs->hmac[0]);
+	}
+	/* initialize auth chunks list from endpoint */
+	stcb->asoc.local_auth_chunks =
+	    sctp_copy_chunklist(inp->sctp_ep.local_auth_chunks);
+	if (stcb->asoc.local_auth_chunks != NULL) {
+		int i;
+
+		for (i = 0; i < 256; i++) {
+			if (stcb->asoc.local_auth_chunks->chunks[i])
+				chunks_len++;
+		}
+	}
+	/* copy defaults from the endpoint */
+	stcb->asoc.authinfo.active_keyid = inp->sctp_ep.default_keyid;
+
+	/* copy out the shared key list (by reference) from the endpoint */
+	(void)sctp_copy_skeylist(&inp->sctp_ep.shared_keys,
+	    &stcb->asoc.shared_keys);
+
+	/* now set the concatenated key (random + chunks + hmacs) */
+	/* key includes parameter headers */
+	keylen = (3 * sizeof(struct sctp_paramhdr)) + random_len + chunks_len +
+	    hmacs_len;
+	new_key = sctp_alloc_key(keylen);
+	if (new_key != NULL) {
+		struct sctp_paramhdr *ph;
+		int plen;
+
+		/* generate and copy in the RANDOM */
+		ph = (struct sctp_paramhdr *)new_key->key;
+		ph->param_type = htons(SCTP_RANDOM);
+		plen = sizeof(*ph) + random_len;
+		ph->param_length = htons(plen);
+		SCTP_READ_RANDOM(new_key->key + sizeof(*ph), random_len);
+		keylen = plen;
+
+		/* append in the AUTH chunks */
+		/* NOTE: currently we always have chunks to list */
+		ph = (struct sctp_paramhdr *)(new_key->key + keylen);
+		ph->param_type = htons(SCTP_CHUNK_LIST);
+		plen = sizeof(*ph) + chunks_len;
+		ph->param_length = htons(plen);
+		keylen += sizeof(*ph);
+		if (stcb->asoc.local_auth_chunks) {
+			int i;
+
+			for (i = 0; i < 256; i++) {
+				if (stcb->asoc.local_auth_chunks->chunks[i])
+					new_key->key[keylen++] = i;
+			}
+		}
+		/* append in the HMACs */
+		ph = (struct sctp_paramhdr *)(new_key->key + keylen);
+		ph->param_type = htons(SCTP_HMAC_LIST);
+		plen = sizeof(*ph) + hmacs_len;
+		ph->param_length = htons(plen);
+		keylen += sizeof(*ph);
+		(void)sctp_serialize_hmaclist(stcb->asoc.local_hmacs,
+		    new_key->key + keylen);
+	}
+	if (stcb->asoc.authinfo.random != NULL)
+		sctp_free_key(stcb->asoc.authinfo.random);
+	stcb->asoc.authinfo.random = new_key;
+	stcb->asoc.authinfo.random_len = random_len;
+}
diff --git a/freebsd/sys/netinet/sctp_auth.h b/freebsd/sys/netinet/sctp_auth.h
new file mode 100644
index 00000000..da4dc09b
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_auth.h
@@ -0,0 +1,235 @@
+/*-
+ * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifndef __SCTP_AUTH_HH__
+#define __SCTP_AUTH_HH__
+
+
+/* digest lengths */
+#define SCTP_AUTH_DIGEST_LEN_SHA1	20
+#define SCTP_AUTH_DIGEST_LEN_SHA224	28
+#define SCTP_AUTH_DIGEST_LEN_SHA256	32
+#define SCTP_AUTH_DIGEST_LEN_SHA384	48
+#define SCTP_AUTH_DIGEST_LEN_SHA512	64
+#define SCTP_AUTH_DIGEST_LEN_MAX	64
+
+/* random sizes */
+#define SCTP_AUTH_RANDOM_SIZE_DEFAULT	32
+#define SCTP_AUTH_RANDOM_SIZE_REQUIRED	32
+#define SCTP_AUTH_RANDOM_SIZE_MAX	256
+
+/* union of all supported HMAC algorithm contexts */
+typedef union sctp_hash_context {
+	SHA1_CTX sha1;
+#ifdef HAVE_SHA2
+	SHA256_CTX sha256;
+	SHA384_CTX sha384;
+	SHA512_CTX sha512;
+#endif
+}                 sctp_hash_context_t;
+
+typedef struct sctp_key {
+	uint32_t keylen;
+	uint8_t key[];
+}        sctp_key_t;
+
+typedef struct sctp_shared_key {
+	LIST_ENTRY(sctp_shared_key) next;
+	sctp_key_t *key;	/* key text */
+	uint32_t refcount;	/* reference count */
+	uint16_t keyid;		/* shared key ID */
+	uint8_t deactivated;	/* key is deactivated */
+}               sctp_sharedkey_t;
+
+LIST_HEAD(sctp_keyhead, sctp_shared_key);
+
+/* authentication chunks list */
+typedef struct sctp_auth_chklist {
+	uint8_t chunks[256];
+	uint8_t num_chunks;
+}                 sctp_auth_chklist_t;
+
+/* hmac algos supported list */
+typedef struct sctp_hmaclist {
+	uint16_t max_algo;	/* max algorithms allocated */
+	uint16_t num_algo;	/* num algorithms used */
+	uint16_t hmac[];
+}             sctp_hmaclist_t;
+
+/* authentication info */
+typedef struct sctp_authinfo {
+	sctp_key_t *random;	/* local random key (concatenated) */
+	uint32_t random_len;	/* local random number length for param */
+	sctp_key_t *peer_random;/* peer's random key (concatenated) */
+	sctp_key_t *assoc_key;	/* cached concatenated send key */
+	sctp_key_t *recv_key;	/* cached concatenated recv key */
+	uint16_t active_keyid;	/* active send keyid */
+	uint16_t assoc_keyid;	/* current send keyid (cached) */
+	uint16_t recv_keyid;	/* last recv keyid (cached) */
+}             sctp_authinfo_t;
+
+
+
+/*
+ * Macros
+ */
+#define sctp_auth_is_required_chunk(chunk, list) ((list == NULL) ? (0) : (list->chunks[chunk] != 0))
+
+/*
+ * function prototypes
+ */
+
+/* socket option api functions */
+extern sctp_auth_chklist_t *sctp_alloc_chunklist(void);
+extern void sctp_free_chunklist(sctp_auth_chklist_t * chklist);
+extern void sctp_clear_chunklist(sctp_auth_chklist_t * chklist);
+extern sctp_auth_chklist_t *sctp_copy_chunklist(sctp_auth_chklist_t * chklist);
+extern int sctp_auth_add_chunk(uint8_t chunk, sctp_auth_chklist_t * list);
+extern int sctp_auth_delete_chunk(uint8_t chunk, sctp_auth_chklist_t * list);
+extern size_t sctp_auth_get_chklist_size(const sctp_auth_chklist_t * list);
+extern void sctp_auth_set_default_chunks(sctp_auth_chklist_t * list);
+extern int 
+sctp_serialize_auth_chunks(const sctp_auth_chklist_t * list,
+    uint8_t * ptr);
+extern int 
+sctp_pack_auth_chunks(const sctp_auth_chklist_t * list,
+    uint8_t * ptr);
+extern int 
+sctp_unpack_auth_chunks(const uint8_t * ptr, uint8_t num_chunks,
+    sctp_auth_chklist_t * list);
+
+/* key handling */
+extern sctp_key_t *sctp_alloc_key(uint32_t keylen);
+extern void sctp_free_key(sctp_key_t * key);
+extern void sctp_print_key(sctp_key_t * key, const char *str);
+extern void sctp_show_key(sctp_key_t * key, const char *str);
+extern sctp_key_t *sctp_generate_random_key(uint32_t keylen);
+extern sctp_key_t *sctp_set_key(uint8_t * key, uint32_t keylen);
+extern sctp_key_t *
+sctp_compute_hashkey(sctp_key_t * key1, sctp_key_t * key2,
+    sctp_key_t * shared);
+
+/* shared key handling */
+extern sctp_sharedkey_t *sctp_alloc_sharedkey(void);
+extern void sctp_free_sharedkey(sctp_sharedkey_t * skey);
+extern sctp_sharedkey_t *
+sctp_find_sharedkey(struct sctp_keyhead *shared_keys,
+    uint16_t key_id);
+extern int 
+sctp_insert_sharedkey(struct sctp_keyhead *shared_keys,
+    sctp_sharedkey_t * new_skey);
+extern int 
+sctp_copy_skeylist(const struct sctp_keyhead *src,
+    struct sctp_keyhead *dest);
+
+/* ref counts on shared keys, by key id */
+extern void sctp_auth_key_acquire(struct sctp_tcb *stcb, uint16_t keyid);
+extern void sctp_auth_key_release(struct sctp_tcb *stcb, uint16_t keyid);
+
+
+/* hmac list handling */
+extern sctp_hmaclist_t *sctp_alloc_hmaclist(uint8_t num_hmacs);
+extern void sctp_free_hmaclist(sctp_hmaclist_t * list);
+extern int sctp_auth_add_hmacid(sctp_hmaclist_t * list, uint16_t hmac_id);
+extern sctp_hmaclist_t *sctp_copy_hmaclist(sctp_hmaclist_t * list);
+extern sctp_hmaclist_t *sctp_default_supported_hmaclist(void);
+extern uint16_t 
+sctp_negotiate_hmacid(sctp_hmaclist_t * peer,
+    sctp_hmaclist_t * local);
+extern int sctp_serialize_hmaclist(sctp_hmaclist_t * list, uint8_t * ptr);
+extern int 
+sctp_verify_hmac_param(struct sctp_auth_hmac_algo *hmacs,
+    uint32_t num_hmacs);
+
+extern sctp_authinfo_t *sctp_alloc_authinfo(void);
+extern void sctp_free_authinfo(sctp_authinfo_t * authinfo);
+
+/* keyed-HMAC functions */
+extern uint32_t sctp_get_auth_chunk_len(uint16_t hmac_algo);
+extern uint32_t sctp_get_hmac_digest_len(uint16_t hmac_algo);
+extern uint32_t 
+sctp_hmac(uint16_t hmac_algo, uint8_t * key, uint32_t keylen,
+    uint8_t * text, uint32_t textlen, uint8_t * digest);
+extern int 
+sctp_verify_hmac(uint16_t hmac_algo, uint8_t * key, uint32_t keylen,
+    uint8_t * text, uint32_t textlen, uint8_t * digest, uint32_t digestlen);
+extern uint32_t 
+sctp_compute_hmac(uint16_t hmac_algo, sctp_key_t * key,
+    uint8_t * text, uint32_t textlen, uint8_t * digest);
+extern int sctp_auth_is_supported_hmac(sctp_hmaclist_t * list, uint16_t id);
+
+/* mbuf versions */
+extern uint32_t 
+sctp_hmac_m(uint16_t hmac_algo, uint8_t * key, uint32_t keylen,
+    struct mbuf *m, uint32_t m_offset, uint8_t * digest, uint32_t trailer);
+extern uint32_t 
+sctp_compute_hmac_m(uint16_t hmac_algo, sctp_key_t * key,
+    struct mbuf *m, uint32_t m_offset, uint8_t * digest);
+
+/*
+ * authentication routines
+ */
+extern void sctp_clear_cachedkeys(struct sctp_tcb *stcb, uint16_t keyid);
+extern void sctp_clear_cachedkeys_ep(struct sctp_inpcb *inp, uint16_t keyid);
+extern int sctp_delete_sharedkey(struct sctp_tcb *stcb, uint16_t keyid);
+extern int sctp_delete_sharedkey_ep(struct sctp_inpcb *inp, uint16_t keyid);
+extern int sctp_auth_setactivekey(struct sctp_tcb *stcb, uint16_t keyid);
+extern int sctp_auth_setactivekey_ep(struct sctp_inpcb *inp, uint16_t keyid);
+extern int sctp_deact_sharedkey(struct sctp_tcb *stcb, uint16_t keyid);
+extern int sctp_deact_sharedkey_ep(struct sctp_inpcb *inp, uint16_t keyid);
+
+extern void 
+sctp_auth_get_cookie_params(struct sctp_tcb *stcb, struct mbuf *m,
+    uint32_t offset, uint32_t length);
+extern void 
+sctp_fill_hmac_digest_m(struct mbuf *m, uint32_t auth_offset,
+    struct sctp_auth_chunk *auth, struct sctp_tcb *stcb, uint16_t key_id);
+extern struct mbuf *
+sctp_add_auth_chunk(struct mbuf *m, struct mbuf **m_end,
+    struct sctp_auth_chunk **auth_ret, uint32_t * offset,
+    struct sctp_tcb *stcb, uint8_t chunk);
+extern int 
+sctp_handle_auth(struct sctp_tcb *stcb, struct sctp_auth_chunk *ch,
+    struct mbuf *m, uint32_t offset);
+extern void 
+sctp_notify_authentication(struct sctp_tcb *stcb,
+    uint32_t indication, uint16_t keyid, uint16_t alt_keyid, int so_locked);
+extern int 
+sctp_validate_init_auth_params(struct mbuf *m, int offset,
+    int limit);
+extern void 
+sctp_initialize_auth_params(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb);
+
+/* test functions */
+#endif				/* __SCTP_AUTH_HH__ */
diff --git a/freebsd/sys/netinet/sctp_bsd_addr.c b/freebsd/sys/netinet/sctp_bsd_addr.c
new file mode 100644
index 00000000..8782e681
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_bsd_addr.c
@@ -0,0 +1,562 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_output.c,v 1.46 2005/03/06 16:04:17 itojun Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/netinet/sctp_os.h>
+#include <freebsd/netinet/sctp_var.h>
+#include <freebsd/netinet/sctp_pcb.h>
+#include <freebsd/netinet/sctp_header.h>
+#include <freebsd/netinet/sctputil.h>
+#include <freebsd/netinet/sctp_output.h>
+#include <freebsd/netinet/sctp_bsd_addr.h>
+#include <freebsd/netinet/sctp_uio.h>
+#include <freebsd/netinet/sctputil.h>
+#include <freebsd/netinet/sctp_timer.h>
+#include <freebsd/netinet/sctp_asconf.h>
+#include <freebsd/netinet/sctp_sysctl.h>
+#include <freebsd/netinet/sctp_indata.h>
+#include <freebsd/sys/unistd.h>
+
+/* Declare all of our malloc named types */
+MALLOC_DEFINE(SCTP_M_MAP, "sctp_map", "sctp asoc map descriptor");
+MALLOC_DEFINE(SCTP_M_STRMI, "sctp_stri", "sctp stream in array");
+MALLOC_DEFINE(SCTP_M_STRMO, "sctp_stro", "sctp stream out array");
+MALLOC_DEFINE(SCTP_M_ASC_ADDR, "sctp_aadr", "sctp asconf address");
+MALLOC_DEFINE(SCTP_M_ASC_IT, "sctp_a_it", "sctp asconf iterator");
+MALLOC_DEFINE(SCTP_M_AUTH_CL, "sctp_atcl", "sctp auth chunklist");
+MALLOC_DEFINE(SCTP_M_AUTH_KY, "sctp_atky", "sctp auth key");
+MALLOC_DEFINE(SCTP_M_AUTH_HL, "sctp_athm", "sctp auth hmac list");
+MALLOC_DEFINE(SCTP_M_AUTH_IF, "sctp_athi", "sctp auth info");
+MALLOC_DEFINE(SCTP_M_STRESET, "sctp_stre", "sctp stream reset");
+MALLOC_DEFINE(SCTP_M_CMSG, "sctp_cmsg", "sctp CMSG buffer");
+MALLOC_DEFINE(SCTP_M_COPYAL, "sctp_cpal", "sctp copy all");
+MALLOC_DEFINE(SCTP_M_VRF, "sctp_vrf", "sctp vrf struct");
+MALLOC_DEFINE(SCTP_M_IFA, "sctp_ifa", "sctp ifa struct");
+MALLOC_DEFINE(SCTP_M_IFN, "sctp_ifn", "sctp ifn struct");
+MALLOC_DEFINE(SCTP_M_TIMW, "sctp_timw", "sctp time block");
+MALLOC_DEFINE(SCTP_M_MVRF, "sctp_mvrf", "sctp mvrf pcb list");
+MALLOC_DEFINE(SCTP_M_ITER, "sctp_iter", "sctp iterator control");
+MALLOC_DEFINE(SCTP_M_SOCKOPT, "sctp_socko", "sctp socket option");
+
+/* Global NON-VNET structure that controls the iterator */
+struct iterator_control sctp_it_ctl;
+static int __sctp_thread_based_iterator_started = 0;
+
+
+static void
+sctp_cleanup_itqueue(void)
+{
+	struct sctp_iterator *it;
+
+	while ((it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead)) != NULL) {
+		if (it->function_atend != NULL) {
+			(*it->function_atend) (it->pointer, it->val);
+		}
+		TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr);
+		SCTP_FREE(it, SCTP_M_ITER);
+	}
+}
+
+
+void
+sctp_wakeup_iterator(void)
+{
+	wakeup(&sctp_it_ctl.iterator_running);
+}
+
+static void
+sctp_iterator_thread(void *v)
+{
+	SCTP_IPI_ITERATOR_WQ_LOCK();
+	while (1) {
+		msleep(&sctp_it_ctl.iterator_running,
+		    &sctp_it_ctl.ipi_iterator_wq_mtx,
+		    0, "waiting_for_work", 0);
+		if (sctp_it_ctl.iterator_flags & SCTP_ITERATOR_MUST_EXIT) {
+			SCTP_IPI_ITERATOR_WQ_DESTROY();
+			SCTP_ITERATOR_LOCK_DESTROY();
+			sctp_cleanup_itqueue();
+			__sctp_thread_based_iterator_started = 0;
+			kthread_exit();
+		}
+		sctp_iterator_worker();
+	}
+}
+
+void
+sctp_startup_iterator(void)
+{
+	if (__sctp_thread_based_iterator_started) {
+		/* You only get one */
+		return;
+	}
+	/* init the iterator head */
+	__sctp_thread_based_iterator_started = 1;
+	sctp_it_ctl.iterator_running = 0;
+	sctp_it_ctl.iterator_flags = 0;
+	sctp_it_ctl.cur_it = NULL;
+	SCTP_ITERATOR_LOCK_INIT();
+	SCTP_IPI_ITERATOR_WQ_INIT();
+	TAILQ_INIT(&sctp_it_ctl.iteratorhead);
+
+	int ret;
+
+	ret = kproc_create(sctp_iterator_thread,
+	    (void *)NULL,
+	    &sctp_it_ctl.thread_proc,
+	    RFPROC,
+	    SCTP_KTHREAD_PAGES,
+	    SCTP_KTRHEAD_NAME);
+}
+
+#ifdef INET6
+
+void
+sctp_gather_internal_ifa_flags(struct sctp_ifa *ifa)
+{
+	struct in6_ifaddr *ifa6;
+
+	ifa6 = (struct in6_ifaddr *)ifa->ifa;
+	ifa->flags = ifa6->ia6_flags;
+	if (!MODULE_GLOBAL(ip6_use_deprecated)) {
+		if (ifa->flags &
+		    IN6_IFF_DEPRECATED) {
+			ifa->localifa_flags |= SCTP_ADDR_IFA_UNUSEABLE;
+		} else {
+			ifa->localifa_flags &= ~SCTP_ADDR_IFA_UNUSEABLE;
+		}
+	} else {
+		ifa->localifa_flags &= ~SCTP_ADDR_IFA_UNUSEABLE;
+	}
+	if (ifa->flags &
+	    (IN6_IFF_DETACHED |
+	    IN6_IFF_ANYCAST |
+	    IN6_IFF_NOTREADY)) {
+		ifa->localifa_flags |= SCTP_ADDR_IFA_UNUSEABLE;
+	} else {
+		ifa->localifa_flags &= ~SCTP_ADDR_IFA_UNUSEABLE;
+	}
+}
+
+#endif				/* INET6 */
+
+
+static uint32_t
+sctp_is_desired_interface_type(struct ifaddr *ifa)
+{
+	int result;
+
+	/* check the interface type to see if it's one we care about */
+	switch (ifa->ifa_ifp->if_type) {
+	case IFT_ETHER:
+	case IFT_ISO88023:
+	case IFT_ISO88024:
+	case IFT_ISO88025:
+	case IFT_ISO88026:
+	case IFT_STARLAN:
+	case IFT_P10:
+	case IFT_P80:
+	case IFT_HY:
+	case IFT_FDDI:
+	case IFT_XETHER:
+	case IFT_ISDNBASIC:
+	case IFT_ISDNPRIMARY:
+	case IFT_PTPSERIAL:
+	case IFT_OTHER:
+	case IFT_PPP:
+	case IFT_LOOP:
+	case IFT_SLIP:
+	case IFT_GIF:
+	case IFT_L2VLAN:
+	case IFT_IP:
+	case IFT_IPOVERCDLC:
+	case IFT_IPOVERCLAW:
+	case IFT_VIRTUALIPADDRESS:
+		result = 1;
+		break;
+	default:
+		result = 0;
+	}
+
+	return (result);
+}
+
+
+
+
+static void
+sctp_init_ifns_for_vrf(int vrfid)
+{
+	/*
+	 * Here we must apply ANY locks needed by the IFN we access and also
+	 * make sure we lock any IFA that exists as we float through the
+	 * list of IFA's
+	 */
+	struct ifnet *ifn;
+	struct ifaddr *ifa;
+	struct in6_ifaddr *ifa6;
+	struct sctp_ifa *sctp_ifa;
+	uint32_t ifa_flags;
+
+	IFNET_RLOCK();
+	TAILQ_FOREACH(ifn, &MODULE_GLOBAL(ifnet), if_list) {
+		IF_ADDR_LOCK(ifn);
+		TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) {
+			if (ifa->ifa_addr == NULL) {
+				continue;
+			}
+			if ((ifa->ifa_addr->sa_family != AF_INET) && (ifa->ifa_addr->sa_family != AF_INET6)) {
+				/* non inet/inet6 skip */
+				continue;
+			}
+			if (ifa->ifa_addr->sa_family == AF_INET6) {
+				if (IN6_IS_ADDR_UNSPECIFIED(&((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr)) {
+					/* skip unspecifed addresses */
+					continue;
+				}
+			} else {
+				if (((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr == 0) {
+					continue;
+				}
+			}
+			if (sctp_is_desired_interface_type(ifa) == 0) {
+				/* non desired type */
+				continue;
+			}
+			if (ifa->ifa_addr->sa_family == AF_INET6) {
+				ifa6 = (struct in6_ifaddr *)ifa;
+				ifa_flags = ifa6->ia6_flags;
+			} else {
+				ifa_flags = 0;
+			}
+			sctp_ifa = sctp_add_addr_to_vrf(vrfid,
+			    (void *)ifn,
+			    ifn->if_index,
+			    ifn->if_type,
+			    ifn->if_xname,
+			    (void *)ifa,
+			    ifa->ifa_addr,
+			    ifa_flags,
+			    0);
+			if (sctp_ifa) {
+				sctp_ifa->localifa_flags &= ~SCTP_ADDR_DEFER_USE;
+			}
+		}
+		IF_ADDR_UNLOCK(ifn);
+	}
+	IFNET_RUNLOCK();
+}
+
+void
+sctp_init_vrf_list(int vrfid)
+{
+	if (vrfid > SCTP_MAX_VRF_ID)
+		/* can't do that */
+		return;
+
+	/* Don't care about return here */
+	(void)sctp_allocate_vrf(vrfid);
+
+	/*
+	 * Now we need to build all the ifn's for this vrf and there
+	 * addresses
+	 */
+	sctp_init_ifns_for_vrf(vrfid);
+}
+
+void
+sctp_addr_change(struct ifaddr *ifa, int cmd)
+{
+	uint32_t ifa_flags = 0;
+
+	/*
+	 * BSD only has one VRF, if this changes we will need to hook in the
+	 * right things here to get the id to pass to the address managment
+	 * routine.
+	 */
+	if (SCTP_BASE_VAR(first_time) == 0) {
+		/* Special test to see if my ::1 will showup with this */
+		SCTP_BASE_VAR(first_time) = 1;
+		sctp_init_ifns_for_vrf(SCTP_DEFAULT_VRFID);
+	}
+	if ((cmd != RTM_ADD) && (cmd != RTM_DELETE)) {
+		/* don't know what to do with this */
+		return;
+	}
+	if (ifa->ifa_addr == NULL) {
+		return;
+	}
+	if ((ifa->ifa_addr->sa_family != AF_INET) && (ifa->ifa_addr->sa_family != AF_INET6)) {
+		/* non inet/inet6 skip */
+		return;
+	}
+	if (ifa->ifa_addr->sa_family == AF_INET6) {
+		ifa_flags = ((struct in6_ifaddr *)ifa)->ia6_flags;
+		if (IN6_IS_ADDR_UNSPECIFIED(&((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr)) {
+			/* skip unspecifed addresses */
+			return;
+		}
+	} else {
+		if (((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr == 0) {
+			return;
+		}
+	}
+
+	if (sctp_is_desired_interface_type(ifa) == 0) {
+		/* non desired type */
+		return;
+	}
+	if (cmd == RTM_ADD) {
+		(void)sctp_add_addr_to_vrf(SCTP_DEFAULT_VRFID, (void *)ifa->ifa_ifp,
+		    ifa->ifa_ifp->if_index, ifa->ifa_ifp->if_type,
+		    ifa->ifa_ifp->if_xname,
+		    (void *)ifa, ifa->ifa_addr, ifa_flags, 1);
+	} else {
+
+		sctp_del_addr_from_vrf(SCTP_DEFAULT_VRFID, ifa->ifa_addr,
+		    ifa->ifa_ifp->if_index,
+		    ifa->ifa_ifp->if_xname
+		    );
+		/*
+		 * We don't bump refcount here so when it completes the
+		 * final delete will happen.
+		 */
+	}
+}
+
+void
+     sctp_add_or_del_interfaces(int (*pred) (struct ifnet *), int add){
+	struct ifnet *ifn;
+	struct ifaddr *ifa;
+
+	IFNET_RLOCK();
+	TAILQ_FOREACH(ifn, &MODULE_GLOBAL(ifnet), if_list) {
+		if (!(*pred) (ifn)) {
+			continue;
+		}
+		TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) {
+			sctp_addr_change(ifa, add ? RTM_ADD : RTM_DELETE);
+		}
+	}
+	IFNET_RUNLOCK();
+}
+
+struct mbuf *
+sctp_get_mbuf_for_msg(unsigned int space_needed, int want_header,
+    int how, int allonebuf, int type)
+{
+	struct mbuf *m = NULL;
+
+	m = m_getm2(NULL, space_needed, how, type, want_header ? M_PKTHDR : 0);
+	if (m == NULL) {
+		/* bad, no memory */
+		return (m);
+	}
+	if (allonebuf) {
+		int siz;
+
+		if (SCTP_BUF_IS_EXTENDED(m)) {
+			siz = SCTP_BUF_EXTEND_SIZE(m);
+		} else {
+			if (want_header)
+				siz = MHLEN;
+			else
+				siz = MLEN;
+		}
+		if (siz < space_needed) {
+			m_freem(m);
+			return (NULL);
+		}
+	}
+	if (SCTP_BUF_NEXT(m)) {
+		sctp_m_freem(SCTP_BUF_NEXT(m));
+		SCTP_BUF_NEXT(m) = NULL;
+	}
+#ifdef SCTP_MBUF_LOGGING
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
+		if (SCTP_BUF_IS_EXTENDED(m)) {
+			sctp_log_mb(m, SCTP_MBUF_IALLOC);
+		}
+	}
+#endif
+	return (m);
+}
+
+
+#ifdef SCTP_PACKET_LOGGING
+void
+sctp_packet_log(struct mbuf *m, int length)
+{
+	int *lenat, thisone;
+	void *copyto;
+	uint32_t *tick_tock;
+	int total_len;
+	int grabbed_lock = 0;
+	int value, newval, thisend, thisbegin;
+
+	/*
+	 * Buffer layout. -sizeof this entry (total_len) -previous end
+	 * (value) -ticks of log      (ticks) o -ip packet o -as logged -
+	 * where this started (thisbegin) x <--end points here
+	 */
+	total_len = SCTP_SIZE32((length + (4 * sizeof(int))));
+	/* Log a packet to the buffer. */
+	if (total_len > SCTP_PACKET_LOG_SIZE) {
+		/* Can't log this packet I have not a buffer big enough */
+		return;
+	}
+	if (length < (int)(SCTP_MIN_V4_OVERHEAD + sizeof(struct sctp_cookie_ack_chunk))) {
+		return;
+	}
+	atomic_add_int(&SCTP_BASE_VAR(packet_log_writers), 1);
+try_again:
+	if (SCTP_BASE_VAR(packet_log_writers) > SCTP_PKTLOG_WRITERS_NEED_LOCK) {
+		SCTP_IP_PKTLOG_LOCK();
+		grabbed_lock = 1;
+again_locked:
+		value = SCTP_BASE_VAR(packet_log_end);
+		newval = SCTP_BASE_VAR(packet_log_end) + total_len;
+		if (newval >= SCTP_PACKET_LOG_SIZE) {
+			/* we wrapped */
+			thisbegin = 0;
+			thisend = total_len;
+		} else {
+			thisbegin = SCTP_BASE_VAR(packet_log_end);
+			thisend = newval;
+		}
+		if (!(atomic_cmpset_int(&SCTP_BASE_VAR(packet_log_end), value, thisend))) {
+			goto again_locked;
+		}
+	} else {
+		value = SCTP_BASE_VAR(packet_log_end);
+		newval = SCTP_BASE_VAR(packet_log_end) + total_len;
+		if (newval >= SCTP_PACKET_LOG_SIZE) {
+			/* we wrapped */
+			thisbegin = 0;
+			thisend = total_len;
+		} else {
+			thisbegin = SCTP_BASE_VAR(packet_log_end);
+			thisend = newval;
+		}
+		if (!(atomic_cmpset_int(&SCTP_BASE_VAR(packet_log_end), value, thisend))) {
+			goto try_again;
+		}
+	}
+	/* Sanity check */
+	if (thisend >= SCTP_PACKET_LOG_SIZE) {
+		printf("Insanity stops a log thisbegin:%d thisend:%d writers:%d lock:%d end:%d\n",
+		    thisbegin,
+		    thisend,
+		    SCTP_BASE_VAR(packet_log_writers),
+		    grabbed_lock,
+		    SCTP_BASE_VAR(packet_log_end));
+		SCTP_BASE_VAR(packet_log_end) = 0;
+		goto no_log;
+
+	}
+	lenat = (int *)&SCTP_BASE_VAR(packet_log_buffer)[thisbegin];
+	*lenat = total_len;
+	lenat++;
+	*lenat = value;
+	lenat++;
+	tick_tock = (uint32_t *) lenat;
+	lenat++;
+	*tick_tock = sctp_get_tick_count();
+	copyto = (void *)lenat;
+	thisone = thisend - sizeof(int);
+	lenat = (int *)&SCTP_BASE_VAR(packet_log_buffer)[thisone];
+	*lenat = thisbegin;
+	if (grabbed_lock) {
+		SCTP_IP_PKTLOG_UNLOCK();
+		grabbed_lock = 0;
+	}
+	m_copydata(m, 0, length, (caddr_t)copyto);
+no_log:
+	if (grabbed_lock) {
+		SCTP_IP_PKTLOG_UNLOCK();
+	}
+	atomic_subtract_int(&SCTP_BASE_VAR(packet_log_writers), 1);
+}
+
+
+int
+sctp_copy_out_packet_log(uint8_t * target, int length)
+{
+	/*
+	 * We wind through the packet log starting at start copying up to
+	 * length bytes out. We return the number of bytes copied.
+	 */
+	int tocopy, this_copy;
+	int *lenat;
+	int did_delay = 0;
+
+	tocopy = length;
+	if (length < (int)(2 * sizeof(int))) {
+		/* not enough room */
+		return (0);
+	}
+	if (SCTP_PKTLOG_WRITERS_NEED_LOCK) {
+		atomic_add_int(&SCTP_BASE_VAR(packet_log_writers), SCTP_PKTLOG_WRITERS_NEED_LOCK);
+again:
+		if ((did_delay == 0) && (SCTP_BASE_VAR(packet_log_writers) != SCTP_PKTLOG_WRITERS_NEED_LOCK)) {
+			/*
+			 * we delay here for just a moment hoping the
+			 * writer(s) that were present when we entered will
+			 * have left and we only have locking ones that will
+			 * contend with us for the lock. This does not
+			 * assure 100% access, but its good enough for a
+			 * logging facility like this.
+			 */
+			did_delay = 1;
+			DELAY(10);
+			goto again;
+		}
+	}
+	SCTP_IP_PKTLOG_LOCK();
+	lenat = (int *)target;
+	*lenat = SCTP_BASE_VAR(packet_log_end);
+	lenat++;
+	this_copy = min((length - sizeof(int)), SCTP_PACKET_LOG_SIZE);
+	memcpy((void *)lenat, (void *)SCTP_BASE_VAR(packet_log_buffer), this_copy);
+	if (SCTP_PKTLOG_WRITERS_NEED_LOCK) {
+		atomic_subtract_int(&SCTP_BASE_VAR(packet_log_writers),
+		    SCTP_PKTLOG_WRITERS_NEED_LOCK);
+	}
+	SCTP_IP_PKTLOG_UNLOCK();
+	return (this_copy + sizeof(int));
+}
+
+#endif
diff --git a/freebsd/sys/netinet/sctp_bsd_addr.h b/freebsd/sys/netinet/sctp_bsd_addr.h
new file mode 100644
index 00000000..67d65dc6
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_bsd_addr.h
@@ -0,0 +1,63 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifndef __sctp_bsd_addr_h__
+#define __sctp_bsd_addr_h__
+#include <freebsd/netinet/sctp_pcb.h>
+
+#if defined(_KERNEL) || defined(__Userspace__)
+
+extern struct iterator_control sctp_it_ctl;
+void sctp_wakeup_iterator(void);
+
+void sctp_startup_iterator(void);
+
+
+#ifdef INET6
+void sctp_gather_internal_ifa_flags(struct sctp_ifa *ifa);
+
+#endif
+
+#ifdef  SCTP_PACKET_LOGGING
+
+void sctp_packet_log(struct mbuf *m, int length);
+int sctp_copy_out_packet_log(uint8_t * target, int length);
+
+#endif
+
+void sctp_addr_change(struct ifaddr *ifa, int cmd);
+
+void sctp_add_or_del_interfaces(int (*pred) (struct ifnet *), int add);
+
+#endif
+#endif
diff --git a/freebsd/sys/netinet/sctp_cc_functions.c b/freebsd/sys/netinet/sctp_cc_functions.c
new file mode 100644
index 00000000..668fd673
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_cc_functions.c
@@ -0,0 +1,1565 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <freebsd/netinet/sctp_os.h>
+#include <freebsd/netinet/sctp_var.h>
+#include <freebsd/netinet/sctp_sysctl.h>
+#include <freebsd/netinet/sctp_pcb.h>
+#include <freebsd/netinet/sctp_header.h>
+#include <freebsd/netinet/sctputil.h>
+#include <freebsd/netinet/sctp_output.h>
+#include <freebsd/netinet/sctp_input.h>
+#include <freebsd/netinet/sctp_indata.h>
+#include <freebsd/netinet/sctp_uio.h>
+#include <freebsd/netinet/sctp_timer.h>
+#include <freebsd/netinet/sctp_auth.h>
+#include <freebsd/netinet/sctp_asconf.h>
+#include <freebsd/netinet/sctp_cc_functions.h>
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+void
+sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	struct sctp_association *assoc;
+	uint32_t cwnd_in_mtu;
+
+	assoc = &stcb->asoc;
+	/*
+	 * We take the minimum of the burst limit and the initial congestion
+	 * window. The initial congestion window is at least two times the
+	 * MTU.
+	 */
+	cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd);
+	if ((assoc->max_burst > 0) && (cwnd_in_mtu > assoc->max_burst))
+		cwnd_in_mtu = assoc->max_burst;
+	net->cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu;
+	net->ssthresh = assoc->peers_rwnd;
+
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) &
+	    (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
+		sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION);
+	}
+}
+
+void
+sctp_cwnd_update_after_fr(struct sctp_tcb *stcb,
+    struct sctp_association *asoc)
+{
+	struct sctp_nets *net;
+
+	/*-
+	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) &&
+	 * (net->fast_retran_loss_recovery == 0)))
+	 */
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		if ((asoc->fast_retran_loss_recovery == 0) ||
+		    (asoc->sctp_cmt_on_off == 1)) {
+			/* out of a RFC2582 Fast recovery window? */
+			if (net->net_ack > 0) {
+				/*
+				 * per section 7.2.3, are there any
+				 * destinations that had a fast retransmit
+				 * to them. If so what we need to do is
+				 * adjust ssthresh and cwnd.
+				 */
+				struct sctp_tmit_chunk *lchk;
+				int old_cwnd = net->cwnd;
+
+				net->ssthresh = net->cwnd / 2;
+				if (net->ssthresh < (net->mtu * 2)) {
+					net->ssthresh = 2 * net->mtu;
+				}
+				net->cwnd = net->ssthresh;
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+					sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
+					    SCTP_CWND_LOG_FROM_FR);
+				}
+				lchk = TAILQ_FIRST(&asoc->send_queue);
+
+				net->partial_bytes_acked = 0;
+				/* Turn on fast recovery window */
+				asoc->fast_retran_loss_recovery = 1;
+				if (lchk == NULL) {
+					/* Mark end of the window */
+					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
+				} else {
+					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
+				}
+
+				/*
+				 * CMT fast recovery -- per destination
+				 * recovery variable.
+				 */
+				net->fast_retran_loss_recovery = 1;
+
+				if (lchk == NULL) {
+					/* Mark end of the window */
+					net->fast_recovery_tsn = asoc->sending_seq - 1;
+				} else {
+					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
+				}
+
+				/*
+				 * Disable Nonce Sum Checking and store the
+				 * resync tsn
+				 */
+				asoc->nonce_sum_check = 0;
+				asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1;
+
+				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
+				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
+				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
+				    stcb->sctp_ep, stcb, net);
+			}
+		} else if (net->net_ack > 0) {
+			/*
+			 * Mark a peg that we WOULD have done a cwnd
+			 * reduction but RFC2582 prevented this action.
+			 */
+			SCTP_STAT_INCR(sctps_fastretransinrtt);
+		}
+	}
+}
+
+void
+sctp_cwnd_update_after_sack(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    int accum_moved, int reneged_all, int will_exit)
+{
+	struct sctp_nets *net;
+
+	/******************************/
+	/* update cwnd and Early FR   */
+	/******************************/
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+
+#ifdef JANA_CMT_FAST_RECOVERY
+		/*
+		 * CMT fast recovery code. Need to debug.
+		 */
+		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
+			if (compare_with_wrap(asoc->last_acked_seq,
+			    net->fast_recovery_tsn, MAX_TSN) ||
+			    (asoc->last_acked_seq == net->fast_recovery_tsn) ||
+			    compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) ||
+			    (net->pseudo_cumack == net->fast_recovery_tsn)) {
+				net->will_exit_fast_recovery = 1;
+			}
+		}
+#endif
+		if (SCTP_BASE_SYSCTL(sctp_early_fr)) {
+			/*
+			 * So, first of all do we need to have a Early FR
+			 * timer running?
+			 */
+			if ((!TAILQ_EMPTY(&asoc->sent_queue) &&
+			    (net->ref_count > 1) &&
+			    (net->flight_size < net->cwnd)) ||
+			    (reneged_all)) {
+				/*
+				 * yes, so in this case stop it if its
+				 * running, and then restart it. Reneging
+				 * all is a special case where we want to
+				 * run the Early FR timer and then force the
+				 * last few unacked to be sent, causing us
+				 * to illicit a sack with gaps to force out
+				 * the others.
+				 */
+				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+					SCTP_STAT_INCR(sctps_earlyfrstpidsck2);
+					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
+				}
+				SCTP_STAT_INCR(sctps_earlyfrstrid);
+				sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
+			} else {
+				/* No, stop it if its running */
+				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+					SCTP_STAT_INCR(sctps_earlyfrstpidsck3);
+					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_21);
+				}
+			}
+		}
+		/* if nothing was acked on this destination skip it */
+		if (net->net_ack == 0) {
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
+			}
+			continue;
+		}
+		if (net->net_ack2 > 0) {
+			/*
+			 * Karn's rule applies to clearing error count, this
+			 * is optional.
+			 */
+			net->error_count = 0;
+			if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
+			    SCTP_ADDR_NOT_REACHABLE) {
+				/* addr came good */
+				net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
+				net->dest_state |= SCTP_ADDR_REACHABLE;
+				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
+				    SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
+				/* now was it the primary? if so restore */
+				if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
+					(void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net);
+				}
+			}
+			/*
+			 * JRS 5/14/07 - If CMT PF is on and the destination
+			 * is in PF state, set the destination to active
+			 * state and set the cwnd to one or two MTU's based
+			 * on whether PF1 or PF2 is being used.
+			 * 
+			 * Should we stop any running T3 timer here?
+			 */
+			if ((asoc->sctp_cmt_on_off == 1) &&
+			    (asoc->sctp_cmt_pf > 0) &&
+			    ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) {
+				net->dest_state &= ~SCTP_ADDR_PF;
+				net->cwnd = net->mtu * asoc->sctp_cmt_pf;
+				SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
+				    net, net->cwnd);
+				/*
+				 * Since the cwnd value is explicitly set,
+				 * skip the code that updates the cwnd
+				 * value.
+				 */
+				goto skip_cwnd_update;
+			}
+		}
+#ifdef JANA_CMT_FAST_RECOVERY
+		/*
+		 * CMT fast recovery code
+		 */
+		/*
+		 * if (sctp_cmt_on_off == 1 &&
+		 * net->fast_retran_loss_recovery &&
+		 * net->will_exit_fast_recovery == 0) { @@@ Do something }
+		 * else if (sctp_cmt_on_off == 0 &&
+		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
+		 */
+#endif
+
+		if (asoc->fast_retran_loss_recovery &&
+		    (will_exit == 0) &&
+		    (asoc->sctp_cmt_on_off == 0)) {
+			/*
+			 * If we are in loss recovery we skip any cwnd
+			 * update
+			 */
+			goto skip_cwnd_update;
+		}
+		/*
+		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
+		 * moved.
+		 */
+		if (accum_moved ||
+		    ((asoc->sctp_cmt_on_off == 1) && net->new_pseudo_cumack)) {
+			/* If the cumulative ack moved we can proceed */
+			if (net->cwnd <= net->ssthresh) {
+				/* We are in slow start */
+				if (net->flight_size + net->net_ack >= net->cwnd) {
+					if (net->net_ack > (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable))) {
+						net->cwnd += (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable));
+						if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+							sctp_log_cwnd(stcb, net, net->mtu,
+							    SCTP_CWND_LOG_FROM_SS);
+						}
+					} else {
+						net->cwnd += net->net_ack;
+						if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+							sctp_log_cwnd(stcb, net, net->net_ack,
+							    SCTP_CWND_LOG_FROM_SS);
+						}
+					}
+				} else {
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+						sctp_log_cwnd(stcb, net, net->net_ack,
+						    SCTP_CWND_LOG_NOADV_SS);
+					}
+				}
+			} else {
+				/* We are in congestion avoidance */
+				/*
+				 * Add to pba
+				 */
+				net->partial_bytes_acked += net->net_ack;
+
+				if ((net->flight_size + net->net_ack >= net->cwnd) &&
+				    (net->partial_bytes_acked >= net->cwnd)) {
+					net->partial_bytes_acked -= net->cwnd;
+					net->cwnd += net->mtu;
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+						sctp_log_cwnd(stcb, net, net->mtu,
+						    SCTP_CWND_LOG_FROM_CA);
+					}
+				} else {
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+						sctp_log_cwnd(stcb, net, net->net_ack,
+						    SCTP_CWND_LOG_NOADV_CA);
+					}
+				}
+			}
+		} else {
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+				sctp_log_cwnd(stcb, net, net->mtu,
+				    SCTP_CWND_LOG_NO_CUMACK);
+			}
+		}
+skip_cwnd_update:
+		/*
+		 * NOW, according to Karn's rule do we need to restore the
+		 * RTO timer back? Check our net_ack2. If not set then we
+		 * have a ambiguity.. i.e. all data ack'd was sent to more
+		 * than one place.
+		 */
+		if (net->net_ack2) {
+			/* restore any doubled timers */
+			net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1;
+			if (net->RTO < stcb->asoc.minrto) {
+				net->RTO = stcb->asoc.minrto;
+			}
+			if (net->RTO > stcb->asoc.maxrto) {
+				net->RTO = stcb->asoc.maxrto;
+			}
+		}
+	}
+}
+
+void
+sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	int old_cwnd = net->cwnd;
+
+	net->ssthresh = max(net->cwnd / 2, 4 * net->mtu);
+	net->cwnd = net->mtu;
+	net->partial_bytes_acked = 0;
+
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+		sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX);
+	}
+}
+
+void
+sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	int old_cwnd = net->cwnd;
+
+	SCTP_STAT_INCR(sctps_ecnereducedcwnd);
+	net->ssthresh = net->cwnd / 2;
+	if (net->ssthresh < net->mtu) {
+		net->ssthresh = net->mtu;
+		/* here back off the timer as well, to slow us down */
+		net->RTO <<= 1;
+	}
+	net->cwnd = net->ssthresh;
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
+	}
+}
+
+void
+sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb,
+    struct sctp_nets *net, struct sctp_pktdrop_chunk *cp,
+    uint32_t * bottle_bw, uint32_t * on_queue)
+{
+	uint32_t bw_avail;
+	int rtt, incr;
+	int old_cwnd = net->cwnd;
+
+	/* need real RTT for this calc */
+	rtt = ((net->lastsa >> 2) + net->lastsv) >> 1;
+	/* get bottle neck bw */
+	*bottle_bw = ntohl(cp->bottle_bw);
+	/* and whats on queue */
+	*on_queue = ntohl(cp->current_onq);
+	/*
+	 * adjust the on-queue if our flight is more it could be that the
+	 * router has not yet gotten data "in-flight" to it
+	 */
+	if (*on_queue < net->flight_size)
+		*on_queue = net->flight_size;
+	/* calculate the available space */
+	bw_avail = (*bottle_bw * rtt) / 1000;
+	if (bw_avail > *bottle_bw) {
+		/*
+		 * Cap the growth to no more than the bottle neck. This can
+		 * happen as RTT slides up due to queues. It also means if
+		 * you have more than a 1 second RTT with a empty queue you
+		 * will be limited to the bottle_bw per second no matter if
+		 * other points have 1/2 the RTT and you could get more
+		 * out...
+		 */
+		bw_avail = *bottle_bw;
+	}
+	if (*on_queue > bw_avail) {
+		/*
+		 * No room for anything else don't allow anything else to be
+		 * "added to the fire".
+		 */
+		int seg_inflight, seg_onqueue, my_portion;
+
+		net->partial_bytes_acked = 0;
+
+		/* how much are we over queue size? */
+		incr = *on_queue - bw_avail;
+		if (stcb->asoc.seen_a_sack_this_pkt) {
+			/*
+			 * undo any cwnd adjustment that the sack might have
+			 * made
+			 */
+			net->cwnd = net->prev_cwnd;
+		}
+		/* Now how much of that is mine? */
+		seg_inflight = net->flight_size / net->mtu;
+		seg_onqueue = *on_queue / net->mtu;
+		my_portion = (incr * seg_inflight) / seg_onqueue;
+
+		/* Have I made an adjustment already */
+		if (net->cwnd > net->flight_size) {
+			/*
+			 * for this flight I made an adjustment we need to
+			 * decrease the portion by a share our previous
+			 * adjustment.
+			 */
+			int diff_adj;
+
+			diff_adj = net->cwnd - net->flight_size;
+			if (diff_adj > my_portion)
+				my_portion = 0;
+			else
+				my_portion -= diff_adj;
+		}
+		/*
+		 * back down to the previous cwnd (assume we have had a sack
+		 * before this packet). minus what ever portion of the
+		 * overage is my fault.
+		 */
+		net->cwnd -= my_portion;
+
+		/* we will NOT back down more than 1 MTU */
+		if (net->cwnd <= net->mtu) {
+			net->cwnd = net->mtu;
+		}
+		/* force into CA */
+		net->ssthresh = net->cwnd - 1;
+	} else {
+		/*
+		 * Take 1/4 of the space left or max burst up .. whichever
+		 * is less.
+		 */
+		incr = min((bw_avail - *on_queue) >> 2,
+		    stcb->asoc.max_burst * net->mtu);
+		net->cwnd += incr;
+	}
+	if (net->cwnd > bw_avail) {
+		/* We can't exceed the pipe size */
+		net->cwnd = bw_avail;
+	}
+	if (net->cwnd < net->mtu) {
+		/* We always have 1 MTU */
+		net->cwnd = net->mtu;
+	}
+	if (net->cwnd - old_cwnd != 0) {
+		/* log only changes */
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+			sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
+			    SCTP_CWND_LOG_FROM_SAT);
+		}
+	}
+}
+
+void
+sctp_cwnd_update_after_output(struct sctp_tcb *stcb,
+    struct sctp_nets *net, int burst_limit)
+{
+	int old_cwnd = net->cwnd;
+
+	if (net->ssthresh < net->cwnd)
+		net->ssthresh = net->cwnd;
+	net->cwnd = (net->flight_size + (burst_limit * net->mtu));
+
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST);
+	}
+}
+
+void
+sctp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	int old_cwnd = net->cwnd;
+
+	sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED);
+	/*
+	 * make a small adjustment to cwnd and force to CA.
+	 */
+	if (net->cwnd > net->mtu)
+		/* drop down one MTU after sending */
+		net->cwnd -= net->mtu;
+	if (net->cwnd < net->ssthresh)
+		/* still in SS move to CA */
+		net->ssthresh = net->cwnd - 1;
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+		sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR);
+	}
+}
+
+struct sctp_hs_raise_drop {
+	int32_t cwnd;
+	int32_t increase;
+	int32_t drop_percent;
+};
+
+#define SCTP_HS_TABLE_SIZE 73
+
+struct sctp_hs_raise_drop sctp_cwnd_adjust[SCTP_HS_TABLE_SIZE] = {
+	{38, 1, 50},		/* 0   */
+	{118, 2, 44},		/* 1   */
+	{221, 3, 41},		/* 2   */
+	{347, 4, 38},		/* 3   */
+	{495, 5, 37},		/* 4   */
+	{663, 6, 35},		/* 5   */
+	{851, 7, 34},		/* 6   */
+	{1058, 8, 33},		/* 7   */
+	{1284, 9, 32},		/* 8   */
+	{1529, 10, 31},		/* 9   */
+	{1793, 11, 30},		/* 10  */
+	{2076, 12, 29},		/* 11  */
+	{2378, 13, 28},		/* 12  */
+	{2699, 14, 28},		/* 13  */
+	{3039, 15, 27},		/* 14  */
+	{3399, 16, 27},		/* 15  */
+	{3778, 17, 26},		/* 16  */
+	{4177, 18, 26},		/* 17  */
+	{4596, 19, 25},		/* 18  */
+	{5036, 20, 25},		/* 19  */
+	{5497, 21, 24},		/* 20  */
+	{5979, 22, 24},		/* 21  */
+	{6483, 23, 23},		/* 22  */
+	{7009, 24, 23},		/* 23  */
+	{7558, 25, 22},		/* 24  */
+	{8130, 26, 22},		/* 25  */
+	{8726, 27, 22},		/* 26  */
+	{9346, 28, 21},		/* 27  */
+	{9991, 29, 21},		/* 28  */
+	{10661, 30, 21},	/* 29  */
+	{11358, 31, 20},	/* 30  */
+	{12082, 32, 20},	/* 31  */
+	{12834, 33, 20},	/* 32  */
+	{13614, 34, 19},	/* 33  */
+	{14424, 35, 19},	/* 34  */
+	{15265, 36, 19},	/* 35  */
+	{16137, 37, 19},	/* 36  */
+	{17042, 38, 18},	/* 37  */
+	{17981, 39, 18},	/* 38  */
+	{18955, 40, 18},	/* 39  */
+	{19965, 41, 17},	/* 40  */
+	{21013, 42, 17},	/* 41  */
+	{22101, 43, 17},	/* 42  */
+	{23230, 44, 17},	/* 43  */
+	{24402, 45, 16},	/* 44  */
+	{25618, 46, 16},	/* 45  */
+	{26881, 47, 16},	/* 46  */
+	{28193, 48, 16},	/* 47  */
+	{29557, 49, 15},	/* 48  */
+	{30975, 50, 15},	/* 49  */
+	{32450, 51, 15},	/* 50  */
+	{33986, 52, 15},	/* 51  */
+	{35586, 53, 14},	/* 52  */
+	{37253, 54, 14},	/* 53  */
+	{38992, 55, 14},	/* 54  */
+	{40808, 56, 14},	/* 55  */
+	{42707, 57, 13},	/* 56  */
+	{44694, 58, 13},	/* 57  */
+	{46776, 59, 13},	/* 58  */
+	{48961, 60, 13},	/* 59  */
+	{51258, 61, 13},	/* 60  */
+	{53677, 62, 12},	/* 61  */
+	{56230, 63, 12},	/* 62  */
+	{58932, 64, 12},	/* 63  */
+	{61799, 65, 12},	/* 64  */
+	{64851, 66, 11},	/* 65  */
+	{68113, 67, 11},	/* 66  */
+	{71617, 68, 11},	/* 67  */
+	{75401, 69, 10},	/* 68  */
+	{79517, 70, 10},	/* 69  */
+	{84035, 71, 10},	/* 70  */
+	{89053, 72, 10},	/* 71  */
+	{94717, 73, 9}		/* 72  */
+};
+
+static void
+sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	int cur_val, i, indx, incr;
+
+	cur_val = net->cwnd >> 10;
+	indx = SCTP_HS_TABLE_SIZE - 1;
+#ifdef SCTP_DEBUG
+	printf("HS CC CAlled.\n");
+#endif
+	if (cur_val < sctp_cwnd_adjust[0].cwnd) {
+		/* normal mode */
+		if (net->net_ack > net->mtu) {
+			net->cwnd += net->mtu;
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+				sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_SS);
+			}
+		} else {
+			net->cwnd += net->net_ack;
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+				sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_FROM_SS);
+			}
+		}
+	} else {
+		for (i = net->last_hs_used; i < SCTP_HS_TABLE_SIZE; i++) {
+			if (cur_val < sctp_cwnd_adjust[i].cwnd) {
+				indx = i;
+				break;
+			}
+		}
+		net->last_hs_used = indx;
+		incr = ((sctp_cwnd_adjust[indx].increase) << 10);
+		net->cwnd += incr;
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+			sctp_log_cwnd(stcb, net, incr, SCTP_CWND_LOG_FROM_SS);
+		}
+	}
+}
+
+static void
+sctp_hs_cwnd_decrease(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	int cur_val, i, indx;
+	int old_cwnd = net->cwnd;
+
+	cur_val = net->cwnd >> 10;
+	if (cur_val < sctp_cwnd_adjust[0].cwnd) {
+		/* normal mode */
+		net->ssthresh = net->cwnd / 2;
+		if (net->ssthresh < (net->mtu * 2)) {
+			net->ssthresh = 2 * net->mtu;
+		}
+		net->cwnd = net->ssthresh;
+	} else {
+		/* drop by the proper amount */
+		net->ssthresh = net->cwnd - (int)((net->cwnd / 100) *
+		    sctp_cwnd_adjust[net->last_hs_used].drop_percent);
+		net->cwnd = net->ssthresh;
+		/* now where are we */
+		indx = net->last_hs_used;
+		cur_val = net->cwnd >> 10;
+		/* reset where we are in the table */
+		if (cur_val < sctp_cwnd_adjust[0].cwnd) {
+			/* feel out of hs */
+			net->last_hs_used = 0;
+		} else {
+			for (i = indx; i >= 1; i--) {
+				if (cur_val > sctp_cwnd_adjust[i - 1].cwnd) {
+					break;
+				}
+			}
+			net->last_hs_used = indx;
+		}
+	}
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR);
+	}
+}
+
+void
+sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb,
+    struct sctp_association *asoc)
+{
+	struct sctp_nets *net;
+
+	/*
+	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) &&
+	 * (net->fast_retran_loss_recovery == 0)))
+	 */
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		if ((asoc->fast_retran_loss_recovery == 0) ||
+		    (asoc->sctp_cmt_on_off == 1)) {
+			/* out of a RFC2582 Fast recovery window? */
+			if (net->net_ack > 0) {
+				/*
+				 * per section 7.2.3, are there any
+				 * destinations that had a fast retransmit
+				 * to them. If so what we need to do is
+				 * adjust ssthresh and cwnd.
+				 */
+				struct sctp_tmit_chunk *lchk;
+
+				sctp_hs_cwnd_decrease(stcb, net);
+
+				lchk = TAILQ_FIRST(&asoc->send_queue);
+
+				net->partial_bytes_acked = 0;
+				/* Turn on fast recovery window */
+				asoc->fast_retran_loss_recovery = 1;
+				if (lchk == NULL) {
+					/* Mark end of the window */
+					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
+				} else {
+					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
+				}
+
+				/*
+				 * CMT fast recovery -- per destination
+				 * recovery variable.
+				 */
+				net->fast_retran_loss_recovery = 1;
+
+				if (lchk == NULL) {
+					/* Mark end of the window */
+					net->fast_recovery_tsn = asoc->sending_seq - 1;
+				} else {
+					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
+				}
+
+				/*
+				 * Disable Nonce Sum Checking and store the
+				 * resync tsn
+				 */
+				asoc->nonce_sum_check = 0;
+				asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1;
+
+				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
+				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
+				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
+				    stcb->sctp_ep, stcb, net);
+			}
+		} else if (net->net_ack > 0) {
+			/*
+			 * Mark a peg that we WOULD have done a cwnd
+			 * reduction but RFC2582 prevented this action.
+			 */
+			SCTP_STAT_INCR(sctps_fastretransinrtt);
+		}
+	}
+}
+
+void
+sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    int accum_moved, int reneged_all, int will_exit)
+{
+	struct sctp_nets *net;
+
+	/******************************/
+	/* update cwnd and Early FR   */
+	/******************************/
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+
+#ifdef JANA_CMT_FAST_RECOVERY
+		/*
+		 * CMT fast recovery code. Need to debug.
+		 */
+		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
+			if (compare_with_wrap(asoc->last_acked_seq,
+			    net->fast_recovery_tsn, MAX_TSN) ||
+			    (asoc->last_acked_seq == net->fast_recovery_tsn) ||
+			    compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) ||
+			    (net->pseudo_cumack == net->fast_recovery_tsn)) {
+				net->will_exit_fast_recovery = 1;
+			}
+		}
+#endif
+		if (SCTP_BASE_SYSCTL(sctp_early_fr)) {
+			/*
+			 * So, first of all do we need to have a Early FR
+			 * timer running?
+			 */
+			if ((!TAILQ_EMPTY(&asoc->sent_queue) &&
+			    (net->ref_count > 1) &&
+			    (net->flight_size < net->cwnd)) ||
+			    (reneged_all)) {
+				/*
+				 * yes, so in this case stop it if its
+				 * running, and then restart it. Reneging
+				 * all is a special case where we want to
+				 * run the Early FR timer and then force the
+				 * last few unacked to be sent, causing us
+				 * to illicit a sack with gaps to force out
+				 * the others.
+				 */
+				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+					SCTP_STAT_INCR(sctps_earlyfrstpidsck2);
+					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
+				}
+				SCTP_STAT_INCR(sctps_earlyfrstrid);
+				sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
+			} else {
+				/* No, stop it if its running */
+				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+					SCTP_STAT_INCR(sctps_earlyfrstpidsck3);
+					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_21);
+				}
+			}
+		}
+		/* if nothing was acked on this destination skip it */
+		if (net->net_ack == 0) {
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
+			}
+			continue;
+		}
+		if (net->net_ack2 > 0) {
+			/*
+			 * Karn's rule applies to clearing error count, this
+			 * is optional.
+			 */
+			net->error_count = 0;
+			if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
+			    SCTP_ADDR_NOT_REACHABLE) {
+				/* addr came good */
+				net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
+				net->dest_state |= SCTP_ADDR_REACHABLE;
+				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
+				    SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
+				/* now was it the primary? if so restore */
+				if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
+					(void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net);
+				}
+			}
+			/*
+			 * JRS 5/14/07 - If CMT PF is on and the destination
+			 * is in PF state, set the destination to active
+			 * state and set the cwnd to one or two MTU's based
+			 * on whether PF1 or PF2 is being used.
+			 * 
+			 * Should we stop any running T3 timer here?
+			 */
+			if ((asoc->sctp_cmt_on_off == 1) &&
+			    (asoc->sctp_cmt_pf > 0) &&
+			    ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) {
+				net->dest_state &= ~SCTP_ADDR_PF;
+				net->cwnd = net->mtu * asoc->sctp_cmt_pf;
+				SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
+				    net, net->cwnd);
+				/*
+				 * Since the cwnd value is explicitly set,
+				 * skip the code that updates the cwnd
+				 * value.
+				 */
+				goto skip_cwnd_update;
+			}
+		}
+#ifdef JANA_CMT_FAST_RECOVERY
+		/*
+		 * CMT fast recovery code
+		 */
+		/*
+		 * if (sctp_cmt_on_off == 1 &&
+		 * net->fast_retran_loss_recovery &&
+		 * net->will_exit_fast_recovery == 0) { @@@ Do something }
+		 * else if (sctp_cmt_on_off == 0 &&
+		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
+		 */
+#endif
+
+		if (asoc->fast_retran_loss_recovery &&
+		    (will_exit == 0) &&
+		    (asoc->sctp_cmt_on_off == 0)) {
+			/*
+			 * If we are in loss recovery we skip any cwnd
+			 * update
+			 */
+			goto skip_cwnd_update;
+		}
+		/*
+		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
+		 * moved.
+		 */
+		if (accum_moved ||
+		    ((asoc->sctp_cmt_on_off == 1) && net->new_pseudo_cumack)) {
+			/* If the cumulative ack moved we can proceed */
+			if (net->cwnd <= net->ssthresh) {
+				/* We are in slow start */
+				if (net->flight_size + net->net_ack >= net->cwnd) {
+
+					sctp_hs_cwnd_increase(stcb, net);
+
+				} else {
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+						sctp_log_cwnd(stcb, net, net->net_ack,
+						    SCTP_CWND_LOG_NOADV_SS);
+					}
+				}
+			} else {
+				/* We are in congestion avoidance */
+				net->partial_bytes_acked += net->net_ack;
+				if ((net->flight_size + net->net_ack >= net->cwnd) &&
+				    (net->partial_bytes_acked >= net->cwnd)) {
+					net->partial_bytes_acked -= net->cwnd;
+					net->cwnd += net->mtu;
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+						sctp_log_cwnd(stcb, net, net->mtu,
+						    SCTP_CWND_LOG_FROM_CA);
+					}
+				} else {
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+						sctp_log_cwnd(stcb, net, net->net_ack,
+						    SCTP_CWND_LOG_NOADV_CA);
+					}
+				}
+			}
+		} else {
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+				sctp_log_cwnd(stcb, net, net->mtu,
+				    SCTP_CWND_LOG_NO_CUMACK);
+			}
+		}
+skip_cwnd_update:
+		/*
+		 * NOW, according to Karn's rule do we need to restore the
+		 * RTO timer back? Check our net_ack2. If not set then we
+		 * have a ambiguity.. i.e. all data ack'd was sent to more
+		 * than one place.
+		 */
+		if (net->net_ack2) {
+			/* restore any doubled timers */
+			net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1;
+			if (net->RTO < stcb->asoc.minrto) {
+				net->RTO = stcb->asoc.minrto;
+			}
+			if (net->RTO > stcb->asoc.maxrto) {
+				net->RTO = stcb->asoc.maxrto;
+			}
+		}
+	}
+}
+
+
+/*
+ * H-TCP congestion control. The algorithm is detailed in:
+ * R.N.Shorten, D.J.Leith:
+ *   "H-TCP: TCP for high-speed and long-distance networks"
+ *   Proc. PFLDnet, Argonne, 2004.
+ * http://www.hamilton.ie/net/htcp3.pdf
+ */
+
+
+static int use_rtt_scaling = 1;
+static int use_bandwidth_switch = 1;
+
+static inline int
+between(uint32_t seq1, uint32_t seq2, uint32_t seq3)
+{
+	return seq3 - seq2 >= seq1 - seq2;
+}
+
+static inline uint32_t
+htcp_cong_time(struct htcp *ca)
+{
+	return sctp_get_tick_count() - ca->last_cong;
+}
+
+static inline uint32_t
+htcp_ccount(struct htcp *ca)
+{
+	return htcp_cong_time(ca) / ca->minRTT;
+}
+
+static inline void
+htcp_reset(struct htcp *ca)
+{
+	ca->undo_last_cong = ca->last_cong;
+	ca->undo_maxRTT = ca->maxRTT;
+	ca->undo_old_maxB = ca->old_maxB;
+	ca->last_cong = sctp_get_tick_count();
+}
+
+#ifdef SCTP_NOT_USED
+
+static uint32_t
+htcp_cwnd_undo(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	net->htcp_ca.last_cong = net->htcp_ca.undo_last_cong;
+	net->htcp_ca.maxRTT = net->htcp_ca.undo_maxRTT;
+	net->htcp_ca.old_maxB = net->htcp_ca.undo_old_maxB;
+	return max(net->cwnd, ((net->ssthresh / net->mtu << 7) / net->htcp_ca.beta) * net->mtu);
+}
+
+#endif
+
+static inline void
+measure_rtt(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	uint32_t srtt = net->lastsa >> 3;
+
+	/* keep track of minimum RTT seen so far, minRTT is zero at first */
+	if (net->htcp_ca.minRTT > srtt || !net->htcp_ca.minRTT)
+		net->htcp_ca.minRTT = srtt;
+
+	/* max RTT */
+	if (net->fast_retran_ip == 0 && net->ssthresh < 0xFFFF && htcp_ccount(&net->htcp_ca) > 3) {
+		if (net->htcp_ca.maxRTT < net->htcp_ca.minRTT)
+			net->htcp_ca.maxRTT = net->htcp_ca.minRTT;
+		if (net->htcp_ca.maxRTT < srtt && srtt <= net->htcp_ca.maxRTT + MSEC_TO_TICKS(20))
+			net->htcp_ca.maxRTT = srtt;
+	}
+}
+
+static void
+measure_achieved_throughput(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	uint32_t now = sctp_get_tick_count();
+
+	if (net->fast_retran_ip == 0)
+		net->htcp_ca.bytes_acked = net->net_ack;
+
+	if (!use_bandwidth_switch)
+		return;
+
+	/* achieved throughput calculations */
+	/* JRS - not 100% sure of this statement */
+	if (net->fast_retran_ip == 1) {
+		net->htcp_ca.bytecount = 0;
+		net->htcp_ca.lasttime = now;
+		return;
+	}
+	net->htcp_ca.bytecount += net->net_ack;
+
+	if (net->htcp_ca.bytecount >= net->cwnd - ((net->htcp_ca.alpha >> 7 ? : 1) * net->mtu)
+	    && now - net->htcp_ca.lasttime >= net->htcp_ca.minRTT
+	    && net->htcp_ca.minRTT > 0) {
+		uint32_t cur_Bi = net->htcp_ca.bytecount / net->mtu * hz / (now - net->htcp_ca.lasttime);
+
+		if (htcp_ccount(&net->htcp_ca) <= 3) {
+			/* just after backoff */
+			net->htcp_ca.minB = net->htcp_ca.maxB = net->htcp_ca.Bi = cur_Bi;
+		} else {
+			net->htcp_ca.Bi = (3 * net->htcp_ca.Bi + cur_Bi) / 4;
+			if (net->htcp_ca.Bi > net->htcp_ca.maxB)
+				net->htcp_ca.maxB = net->htcp_ca.Bi;
+			if (net->htcp_ca.minB > net->htcp_ca.maxB)
+				net->htcp_ca.minB = net->htcp_ca.maxB;
+		}
+		net->htcp_ca.bytecount = 0;
+		net->htcp_ca.lasttime = now;
+	}
+}
+
+static inline void
+htcp_beta_update(struct htcp *ca, uint32_t minRTT, uint32_t maxRTT)
+{
+	if (use_bandwidth_switch) {
+		uint32_t maxB = ca->maxB;
+		uint32_t old_maxB = ca->old_maxB;
+
+		ca->old_maxB = ca->maxB;
+
+		if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) {
+			ca->beta = BETA_MIN;
+			ca->modeswitch = 0;
+			return;
+		}
+	}
+	if (ca->modeswitch && minRTT > (uint32_t) MSEC_TO_TICKS(10) && maxRTT) {
+		ca->beta = (minRTT << 7) / maxRTT;
+		if (ca->beta < BETA_MIN)
+			ca->beta = BETA_MIN;
+		else if (ca->beta > BETA_MAX)
+			ca->beta = BETA_MAX;
+	} else {
+		ca->beta = BETA_MIN;
+		ca->modeswitch = 1;
+	}
+}
+
+static inline void
+htcp_alpha_update(struct htcp *ca)
+{
+	uint32_t minRTT = ca->minRTT;
+	uint32_t factor = 1;
+	uint32_t diff = htcp_cong_time(ca);
+
+	if (diff > (uint32_t) hz) {
+		diff -= hz;
+		factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / hz)) / hz;
+	}
+	if (use_rtt_scaling && minRTT) {
+		uint32_t scale = (hz << 3) / (10 * minRTT);
+
+		scale = min(max(scale, 1U << 2), 10U << 3);	/* clamping ratio to
+								 * interval [0.5,10]<<3 */
+		factor = (factor << 3) / scale;
+		if (!factor)
+			factor = 1;
+	}
+	ca->alpha = 2 * factor * ((1 << 7) - ca->beta);
+	if (!ca->alpha)
+		ca->alpha = ALPHA_BASE;
+}
+
+/* After we have the rtt data to calculate beta, we'd still prefer to wait one
+ * rtt before we adjust our beta to ensure we are working from a consistent
+ * data.
+ *
+ * This function should be called when we hit a congestion event since only at
+ * that point do we really have a real sense of maxRTT (the queues en route
+ * were getting just too full now).
+ */
+static void
+htcp_param_update(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	uint32_t minRTT = net->htcp_ca.minRTT;
+	uint32_t maxRTT = net->htcp_ca.maxRTT;
+
+	htcp_beta_update(&net->htcp_ca, minRTT, maxRTT);
+	htcp_alpha_update(&net->htcp_ca);
+
+	/*
+	 * add slowly fading memory for maxRTT to accommodate routing
+	 * changes etc
+	 */
+	if (minRTT > 0 && maxRTT > minRTT)
+		net->htcp_ca.maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100;
+}
+
+static uint32_t
+htcp_recalc_ssthresh(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	htcp_param_update(stcb, net);
+	return max(((net->cwnd / net->mtu * net->htcp_ca.beta) >> 7) * net->mtu, 2U * net->mtu);
+}
+
+static void
+htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	/*-
+	 * How to handle these functions?
+         *	if (!tcp_is_cwnd_limited(sk, in_flight)) RRS - good question.
+	 *		return;
+	 */
+	if (net->cwnd <= net->ssthresh) {
+		/* We are in slow start */
+		if (net->flight_size + net->net_ack >= net->cwnd) {
+			if (net->net_ack > (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable))) {
+				net->cwnd += (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable));
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+					sctp_log_cwnd(stcb, net, net->mtu,
+					    SCTP_CWND_LOG_FROM_SS);
+				}
+			} else {
+				net->cwnd += net->net_ack;
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+					sctp_log_cwnd(stcb, net, net->net_ack,
+					    SCTP_CWND_LOG_FROM_SS);
+				}
+			}
+		} else {
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+				sctp_log_cwnd(stcb, net, net->net_ack,
+				    SCTP_CWND_LOG_NOADV_SS);
+			}
+		}
+	} else {
+		measure_rtt(stcb, net);
+
+		/*
+		 * In dangerous area, increase slowly. In theory this is
+		 * net->cwnd += alpha / net->cwnd
+		 */
+		/* What is snd_cwnd_cnt?? */
+		if (((net->partial_bytes_acked / net->mtu * net->htcp_ca.alpha) >> 7) * net->mtu >= net->cwnd) {
+			/*-
+			 * Does SCTP have a cwnd clamp?
+			 * if (net->snd_cwnd < net->snd_cwnd_clamp) - Nope (RRS).
+			 */
+			net->cwnd += net->mtu;
+			net->partial_bytes_acked = 0;
+			htcp_alpha_update(&net->htcp_ca);
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+				sctp_log_cwnd(stcb, net, net->mtu,
+				    SCTP_CWND_LOG_FROM_CA);
+			}
+		} else {
+			net->partial_bytes_acked += net->net_ack;
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+				sctp_log_cwnd(stcb, net, net->net_ack,
+				    SCTP_CWND_LOG_NOADV_CA);
+			}
+		}
+
+		net->htcp_ca.bytes_acked = net->mtu;
+	}
+}
+
+#ifdef SCTP_NOT_USED
+/* Lower bound on congestion window. */
+static uint32_t
+htcp_min_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	return net->ssthresh;
+}
+
+#endif
+
+static void
+htcp_init(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	memset(&net->htcp_ca, 0, sizeof(struct htcp));
+	net->htcp_ca.alpha = ALPHA_BASE;
+	net->htcp_ca.beta = BETA_MIN;
+	net->htcp_ca.bytes_acked = net->mtu;
+	net->htcp_ca.last_cong = sctp_get_tick_count();
+}
+
+void
+sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	/*
+	 * We take the max of the burst limit times a MTU or the
+	 * INITIAL_CWND. We then limit this to 4 MTU's of sending.
+	 */
+	net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
+	net->ssthresh = stcb->asoc.peers_rwnd;
+	htcp_init(stcb, net);
+
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
+		sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION);
+	}
+}
+
+void
+sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    int accum_moved, int reneged_all, int will_exit)
+{
+	struct sctp_nets *net;
+
+	/******************************/
+	/* update cwnd and Early FR   */
+	/******************************/
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+
+#ifdef JANA_CMT_FAST_RECOVERY
+		/*
+		 * CMT fast recovery code. Need to debug.
+		 */
+		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
+			if (compare_with_wrap(asoc->last_acked_seq,
+			    net->fast_recovery_tsn, MAX_TSN) ||
+			    (asoc->last_acked_seq == net->fast_recovery_tsn) ||
+			    compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) ||
+			    (net->pseudo_cumack == net->fast_recovery_tsn)) {
+				net->will_exit_fast_recovery = 1;
+			}
+		}
+#endif
+		if (SCTP_BASE_SYSCTL(sctp_early_fr)) {
+			/*
+			 * So, first of all do we need to have a Early FR
+			 * timer running?
+			 */
+			if ((!TAILQ_EMPTY(&asoc->sent_queue) &&
+			    (net->ref_count > 1) &&
+			    (net->flight_size < net->cwnd)) ||
+			    (reneged_all)) {
+				/*
+				 * yes, so in this case stop it if its
+				 * running, and then restart it. Reneging
+				 * all is a special case where we want to
+				 * run the Early FR timer and then force the
+				 * last few unacked to be sent, causing us
+				 * to illicit a sack with gaps to force out
+				 * the others.
+				 */
+				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+					SCTP_STAT_INCR(sctps_earlyfrstpidsck2);
+					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
+				}
+				SCTP_STAT_INCR(sctps_earlyfrstrid);
+				sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
+			} else {
+				/* No, stop it if its running */
+				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+					SCTP_STAT_INCR(sctps_earlyfrstpidsck3);
+					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_21);
+				}
+			}
+		}
+		/* if nothing was acked on this destination skip it */
+		if (net->net_ack == 0) {
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
+			}
+			continue;
+		}
+		if (net->net_ack2 > 0) {
+			/*
+			 * Karn's rule applies to clearing error count, this
+			 * is optional.
+			 */
+			net->error_count = 0;
+			if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
+			    SCTP_ADDR_NOT_REACHABLE) {
+				/* addr came good */
+				net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
+				net->dest_state |= SCTP_ADDR_REACHABLE;
+				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
+				    SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
+				/* now was it the primary? if so restore */
+				if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
+					(void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net);
+				}
+			}
+			/*
+			 * JRS 5/14/07 - If CMT PF is on and the destination
+			 * is in PF state, set the destination to active
+			 * state and set the cwnd to one or two MTU's based
+			 * on whether PF1 or PF2 is being used.
+			 * 
+			 * Should we stop any running T3 timer here?
+			 */
+			if ((asoc->sctp_cmt_on_off == 1) &&
+			    (asoc->sctp_cmt_pf > 0) &&
+			    ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) {
+				net->dest_state &= ~SCTP_ADDR_PF;
+				net->cwnd = net->mtu * asoc->sctp_cmt_pf;
+				SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
+				    net, net->cwnd);
+				/*
+				 * Since the cwnd value is explicitly set,
+				 * skip the code that updates the cwnd
+				 * value.
+				 */
+				goto skip_cwnd_update;
+			}
+		}
+#ifdef JANA_CMT_FAST_RECOVERY
+		/*
+		 * CMT fast recovery code
+		 */
+		/*
+		 * if (sctp_cmt_on_off == 1 &&
+		 * net->fast_retran_loss_recovery &&
+		 * net->will_exit_fast_recovery == 0) { @@@ Do something }
+		 * else if (sctp_cmt_on_off == 0 &&
+		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
+		 */
+#endif
+
+		if (asoc->fast_retran_loss_recovery &&
+		    will_exit == 0 &&
+		    (asoc->sctp_cmt_on_off == 0)) {
+			/*
+			 * If we are in loss recovery we skip any cwnd
+			 * update
+			 */
+			goto skip_cwnd_update;
+		}
+		/*
+		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
+		 * moved.
+		 */
+		if (accum_moved ||
+		    ((asoc->sctp_cmt_on_off == 1) && net->new_pseudo_cumack)) {
+			htcp_cong_avoid(stcb, net);
+			measure_achieved_throughput(stcb, net);
+		} else {
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+				sctp_log_cwnd(stcb, net, net->mtu,
+				    SCTP_CWND_LOG_NO_CUMACK);
+			}
+		}
+skip_cwnd_update:
+		/*
+		 * NOW, according to Karn's rule do we need to restore the
+		 * RTO timer back? Check our net_ack2. If not set then we
+		 * have a ambiguity.. i.e. all data ack'd was sent to more
+		 * than one place.
+		 */
+		if (net->net_ack2) {
+			/* restore any doubled timers */
+			net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1;
+			if (net->RTO < stcb->asoc.minrto) {
+				net->RTO = stcb->asoc.minrto;
+			}
+			if (net->RTO > stcb->asoc.maxrto) {
+				net->RTO = stcb->asoc.maxrto;
+			}
+		}
+	}
+}
+
+void
+sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb,
+    struct sctp_association *asoc)
+{
+	struct sctp_nets *net;
+
+	/*
+	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) &&
+	 * (net->fast_retran_loss_recovery == 0)))
+	 */
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		if ((asoc->fast_retran_loss_recovery == 0) ||
+		    (asoc->sctp_cmt_on_off == 1)) {
+			/* out of a RFC2582 Fast recovery window? */
+			if (net->net_ack > 0) {
+				/*
+				 * per section 7.2.3, are there any
+				 * destinations that had a fast retransmit
+				 * to them. If so what we need to do is
+				 * adjust ssthresh and cwnd.
+				 */
+				struct sctp_tmit_chunk *lchk;
+				int old_cwnd = net->cwnd;
+
+				/* JRS - reset as if state were changed */
+				htcp_reset(&net->htcp_ca);
+				net->ssthresh = htcp_recalc_ssthresh(stcb, net);
+				net->cwnd = net->ssthresh;
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+					sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
+					    SCTP_CWND_LOG_FROM_FR);
+				}
+				lchk = TAILQ_FIRST(&asoc->send_queue);
+
+				net->partial_bytes_acked = 0;
+				/* Turn on fast recovery window */
+				asoc->fast_retran_loss_recovery = 1;
+				if (lchk == NULL) {
+					/* Mark end of the window */
+					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
+				} else {
+					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
+				}
+
+				/*
+				 * CMT fast recovery -- per destination
+				 * recovery variable.
+				 */
+				net->fast_retran_loss_recovery = 1;
+
+				if (lchk == NULL) {
+					/* Mark end of the window */
+					net->fast_recovery_tsn = asoc->sending_seq - 1;
+				} else {
+					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
+				}
+
+				/*
+				 * Disable Nonce Sum Checking and store the
+				 * resync tsn
+				 */
+				asoc->nonce_sum_check = 0;
+				asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1;
+
+				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
+				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
+				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
+				    stcb->sctp_ep, stcb, net);
+			}
+		} else if (net->net_ack > 0) {
+			/*
+			 * Mark a peg that we WOULD have done a cwnd
+			 * reduction but RFC2582 prevented this action.
+			 */
+			SCTP_STAT_INCR(sctps_fastretransinrtt);
+		}
+	}
+}
+
+void
+sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	int old_cwnd = net->cwnd;
+
+	/* JRS - reset as if the state were being changed to timeout */
+	htcp_reset(&net->htcp_ca);
+	net->ssthresh = htcp_recalc_ssthresh(stcb, net);
+	net->cwnd = net->mtu;
+	net->partial_bytes_acked = 0;
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+		sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX);
+	}
+}
+
+void
+sctp_htcp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	int old_cwnd;
+
+	old_cwnd = net->cwnd;
+
+	sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED);
+	net->htcp_ca.last_cong = sctp_get_tick_count();
+	/*
+	 * make a small adjustment to cwnd and force to CA.
+	 */
+	if (net->cwnd > net->mtu)
+		/* drop down one MTU after sending */
+		net->cwnd -= net->mtu;
+	if (net->cwnd < net->ssthresh)
+		/* still in SS move to CA */
+		net->ssthresh = net->cwnd - 1;
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+		sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR);
+	}
+}
+
+void
+sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	int old_cwnd;
+
+	old_cwnd = net->cwnd;
+
+	/* JRS - reset hctp as if state changed */
+	htcp_reset(&net->htcp_ca);
+	SCTP_STAT_INCR(sctps_ecnereducedcwnd);
+	net->ssthresh = htcp_recalc_ssthresh(stcb, net);
+	if (net->ssthresh < net->mtu) {
+		net->ssthresh = net->mtu;
+		/* here back off the timer as well, to slow us down */
+		net->RTO <<= 1;
+	}
+	net->cwnd = net->ssthresh;
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
+	}
+}
diff --git a/freebsd/sys/netinet/sctp_cc_functions.h b/freebsd/sys/netinet/sctp_cc_functions.h
new file mode 100644
index 00000000..3b95d7de
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_cc_functions.h
@@ -0,0 +1,116 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifndef __sctp_cc_functions_h__
+#define __sctp_cc_functions_h__
+
+#if defined(_KERNEL) || defined(__Userspace__)
+
+void
+sctp_set_initial_cc_param(struct sctp_tcb *stcb,
+    struct sctp_nets *net);
+
+void
+sctp_cwnd_update_after_fr(struct sctp_tcb *stcb,
+    struct sctp_association *asoc);
+
+void
+sctp_cwnd_update_after_sack(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    int accum_moved, int reneged_all, int will_exit);
+
+void
+sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb,
+    struct sctp_nets *net);
+
+void
+sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb,
+    struct sctp_association *asoc);
+
+void
+sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    int accum_moved, int reneged_all, int will_exit);
+
+void
+sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb,
+    struct sctp_nets *net);
+
+void
+sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb,
+    struct sctp_nets *net, struct sctp_pktdrop_chunk *cp,
+    uint32_t * bottle_bw, uint32_t * on_queue);
+
+void
+sctp_cwnd_update_after_output(struct sctp_tcb *stcb,
+    struct sctp_nets *net, int burst_limit);
+
+void
+sctp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb, struct sctp_nets *net);
+
+/*
+ * HTCP algorithms are directly taken from
+ * R.N.Shorten, D.J.Leith and are work/outcome from
+ * a Cisco-URP grant to enhance HTCP for satellite
+ * communications. We use the BSD Liscense
+ * granted from his source and have modified his
+ * algorithms to fit within the SCTP BSD framework.
+ */
+
+void
+sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb,
+    struct sctp_nets *net);
+
+void
+sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb,
+    struct sctp_association *asoc);
+
+void
+sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    int accum_moved, int reneged_all, int will_exit);
+
+void
+sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb,
+    struct sctp_nets *net);
+
+void
+sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb,
+    struct sctp_nets *net);
+
+void
+sctp_htcp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb, struct sctp_nets *net);
+
+#endif
+#endif
diff --git a/freebsd/sys/netinet/sctp_constants.h b/freebsd/sys/netinet/sctp_constants.h
new file mode 100644
index 00000000..c4f4be23
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_constants.h
@@ -0,0 +1,1051 @@
+/*-
+ * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_constants.h,v 1.17 2005/03/06 16:04:17 itojun Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifndef __sctp_constants_h__
+#define __sctp_constants_h__
+
+/* IANA assigned port number for SCTP over UDP encapsulation */
+/* For freebsd we cannot bind the port at
+ * startup. Otherwise what will happen is
+ * we really won't be bound. The user must
+ * put it into the sysctl... or we need
+ * to build a special timer for this to allow
+ * us to wait 1 second or so after the system
+ * comes up.
+ */
+#define SCTP_OVER_UDP_TUNNELING_PORT 0
+/* Number of packets to get before sack sent by default */
+#define SCTP_DEFAULT_SACK_FREQ 2
+
+/* Address limit - This variable is calculated
+ * based on an 65535 byte max ip packet. We take out 100 bytes
+ * for the cookie, 40 bytes for a v6 header and 32
+ * bytes for the init structure. A second init structure
+ * for the init-ack and then finally a third one for the
+ * imbedded init. This yeilds 100+40+(3 * 32) = 236 bytes.
+ * This leaves 65299 bytes for addresses. We throw out the 299 bytes.
+ * Now whatever we send in the INIT() we need to allow to get back in the
+ * INIT-ACK plus all the values from INIT and INIT-ACK
+ * listed in the cookie. Plus we need some overhead for
+ * maybe copied parameters in the COOKIE. If we
+ * allow 1080 addresses, and each side has 1080 V6 addresses
+ * that will be 21600 bytes. In the INIT-ACK we will
+ * see the INIT-ACK 21600 + 43200 in the cookie. This leaves
+ * about 500 bytes slack for misc things in the cookie.
+ */
+#define SCTP_ADDRESS_LIMIT 1080
+
+/* We need at least 2k of space for us, inits
+ * larger than that lets abort.
+ */
+#define SCTP_LARGEST_INIT_ACCEPTED (65535 - 2048)
+
+/* Number of addresses where we just skip the counting */
+#define SCTP_COUNT_LIMIT 40
+
+#define SCTP_ZERO_COPY_TICK_DELAY (((100 * hz) + 999) / 1000)
+#define SCTP_ZERO_COPY_SENDQ_TICK_DELAY (((100 * hz) + 999) / 1000)
+
+/* Number of ticks to delay before running
+ * iterator on an address change.
+ */
+#define SCTP_ADDRESS_TICK_DELAY 2
+
+#define SCTP_VERSION_STRING "KAME-BSD 1.1"
+/* #define SCTP_AUDITING_ENABLED 1 used for debug/auditing */
+#define SCTP_AUDIT_SIZE 256
+
+
+#define SCTP_KTRHEAD_NAME "sctp_iterator"
+#define SCTP_KTHREAD_PAGES 0
+
+
+/* If you support Multi-VRF how big to
+ * make the initial array of VRF's to.
+ */
+#define SCTP_DEFAULT_VRF_SIZE 4
+
+/* constants for rto calc */
+#define sctp_align_safe_nocopy 0
+#define sctp_align_unsafe_makecopy 1
+
+/* JRS - Values defined for the HTCP algorithm */
+#define ALPHA_BASE	(1<<7)	/* 1.0 with shift << 7 */
+#define BETA_MIN	(1<<6)	/* 0.5 with shift << 7 */
+#define BETA_MAX	102	/* 0.8 with shift << 7 */
+
+/* Places that CWND log can happen from */
+#define SCTP_CWND_LOG_FROM_FR	1
+#define SCTP_CWND_LOG_FROM_RTX	2
+#define SCTP_CWND_LOG_FROM_BRST	3
+#define SCTP_CWND_LOG_FROM_SS	4
+#define SCTP_CWND_LOG_FROM_CA	5
+#define SCTP_CWND_LOG_FROM_SAT	6
+#define SCTP_BLOCK_LOG_INTO_BLK 7
+#define SCTP_BLOCK_LOG_OUTOF_BLK 8
+#define SCTP_BLOCK_LOG_CHECK     9
+#define SCTP_STR_LOG_FROM_INTO_STRD 10
+#define SCTP_STR_LOG_FROM_IMMED_DEL 11
+#define SCTP_STR_LOG_FROM_INSERT_HD 12
+#define SCTP_STR_LOG_FROM_INSERT_MD 13
+#define SCTP_STR_LOG_FROM_INSERT_TL 14
+#define SCTP_STR_LOG_FROM_MARK_TSN  15
+#define SCTP_STR_LOG_FROM_EXPRS_DEL 16
+#define SCTP_FR_LOG_BIGGEST_TSNS    17
+#define SCTP_FR_LOG_STRIKE_TEST     18
+#define SCTP_FR_LOG_STRIKE_CHUNK    19
+#define SCTP_FR_T3_TIMEOUT          20
+#define SCTP_MAP_PREPARE_SLIDE      21
+#define SCTP_MAP_SLIDE_FROM         22
+#define SCTP_MAP_SLIDE_RESULT       23
+#define SCTP_MAP_SLIDE_CLEARED	    24
+#define SCTP_MAP_SLIDE_NONE         25
+#define SCTP_FR_T3_MARK_TIME        26
+#define SCTP_FR_T3_MARKED           27
+#define SCTP_FR_T3_STOPPED          28
+#define SCTP_FR_MARKED              30
+#define SCTP_CWND_LOG_NOADV_SS      31
+#define SCTP_CWND_LOG_NOADV_CA      32
+#define SCTP_MAX_BURST_APPLIED      33
+#define SCTP_MAX_IFP_APPLIED        34
+#define SCTP_MAX_BURST_ERROR_STOP   35
+#define SCTP_INCREASE_PEER_RWND     36
+#define SCTP_DECREASE_PEER_RWND     37
+#define SCTP_SET_PEER_RWND_VIA_SACK 38
+#define SCTP_LOG_MBCNT_INCREASE     39
+#define SCTP_LOG_MBCNT_DECREASE     40
+#define SCTP_LOG_MBCNT_CHKSET       41
+#define SCTP_LOG_NEW_SACK           42
+#define SCTP_LOG_TSN_ACKED          43
+#define SCTP_LOG_TSN_REVOKED        44
+#define SCTP_LOG_LOCK_TCB           45
+#define SCTP_LOG_LOCK_INP           46
+#define SCTP_LOG_LOCK_SOCK          47
+#define SCTP_LOG_LOCK_SOCKBUF_R     48
+#define SCTP_LOG_LOCK_SOCKBUF_S     49
+#define SCTP_LOG_LOCK_CREATE        50
+#define SCTP_LOG_INITIAL_RTT        51
+#define SCTP_LOG_RTTVAR             52
+#define SCTP_LOG_SBALLOC            53
+#define SCTP_LOG_SBFREE             54
+#define SCTP_LOG_SBRESULT           55
+#define SCTP_FR_DUPED               56
+#define SCTP_FR_MARKED_EARLY        57
+#define SCTP_FR_CWND_REPORT         58
+#define SCTP_FR_CWND_REPORT_START   59
+#define SCTP_FR_CWND_REPORT_STOP    60
+#define SCTP_CWND_LOG_FROM_SEND     61
+#define SCTP_CWND_INITIALIZATION    62
+#define SCTP_CWND_LOG_FROM_T3       63
+#define SCTP_CWND_LOG_FROM_SACK     64
+#define SCTP_CWND_LOG_NO_CUMACK     65
+#define SCTP_CWND_LOG_FROM_RESEND   66
+#define SCTP_FR_LOG_CHECK_STRIKE    67
+#define SCTP_SEND_NOW_COMPLETES     68
+#define SCTP_CWND_LOG_FILL_OUTQ_CALLED 69
+#define SCTP_CWND_LOG_FILL_OUTQ_FILLS  70
+#define SCTP_LOG_FREE_SENT             71
+#define SCTP_NAGLE_APPLIED          72
+#define SCTP_NAGLE_SKIPPED          73
+#define SCTP_WAKESND_FROM_SACK      74
+#define SCTP_WAKESND_FROM_FWDTSN    75
+#define SCTP_NOWAKE_FROM_SACK       76
+#define SCTP_CWNDLOG_PRESEND        77
+#define SCTP_CWNDLOG_ENDSEND        78
+#define SCTP_AT_END_OF_SACK         79
+#define SCTP_REASON_FOR_SC          80
+#define SCTP_BLOCK_LOG_INTO_BLKA    81
+#define SCTP_ENTER_USER_RECV        82
+#define SCTP_USER_RECV_SACKS        83
+#define SCTP_SORECV_BLOCKSA         84
+#define SCTP_SORECV_BLOCKSB         85
+#define SCTP_SORECV_DONE            86
+#define SCTP_SACK_RWND_UPDATE       87
+#define SCTP_SORECV_ENTER           88
+#define SCTP_SORECV_ENTERPL         89
+#define SCTP_MBUF_INPUT             90
+#define SCTP_MBUF_IALLOC            91
+#define SCTP_MBUF_IFREE             92
+#define SCTP_MBUF_ICOPY             93
+#define SCTP_MBUF_SPLIT             94
+#define SCTP_SORCV_FREECTL          95
+#define SCTP_SORCV_DOESCPY          96
+#define SCTP_SORCV_DOESLCK          97
+#define SCTP_SORCV_DOESADJ          98
+#define SCTP_SORCV_BOTWHILE         99
+#define SCTP_SORCV_PASSBF          100
+#define SCTP_SORCV_ADJD            101
+#define SCTP_UNKNOWN_MAX           102
+#define SCTP_RANDY_STUFF           103
+#define SCTP_RANDY_STUFF1          104
+#define SCTP_STRMOUT_LOG_ASSIGN	   105
+#define SCTP_STRMOUT_LOG_SEND	   106
+#define SCTP_FLIGHT_LOG_DOWN_CA    107
+#define SCTP_FLIGHT_LOG_UP         108
+#define SCTP_FLIGHT_LOG_DOWN_GAP   109
+#define SCTP_FLIGHT_LOG_DOWN_RSND  110
+#define SCTP_FLIGHT_LOG_UP_RSND    111
+#define SCTP_FLIGHT_LOG_DOWN_RSND_TO    112
+#define SCTP_FLIGHT_LOG_DOWN_WP    113
+#define SCTP_FLIGHT_LOG_UP_REVOKE  114
+#define SCTP_FLIGHT_LOG_DOWN_PDRP  115
+#define SCTP_FLIGHT_LOG_DOWN_PMTU  116
+#define SCTP_SACK_LOG_NORMAL	   117
+#define SCTP_SACK_LOG_EXPRESS	   118
+#define SCTP_MAP_TSN_ENTERS        119
+#define SCTP_THRESHOLD_CLEAR       120
+#define SCTP_THRESHOLD_INCR        121
+#define SCTP_FLIGHT_LOG_DWN_WP_FWD 122
+#define SCTP_FWD_TSN_CHECK         123
+#define SCTP_LOG_MAX_TYPES 124
+/*
+ * To turn on various logging, you must first enable 'options KTR' and
+ * you might want to bump the entires 'options KTR_ENTRIES=80000'.
+ * To get something to log you define one of the logging defines.
+ * (see LINT).
+ *
+ * This gets the compile in place, but you still need to turn the
+ * logging flag on too in the sysctl (see in sctp.h).
+ */
+
+#define SCTP_LOG_EVENT_UNKNOWN 0
+#define SCTP_LOG_EVENT_CWND  1
+#define SCTP_LOG_EVENT_BLOCK 2
+#define SCTP_LOG_EVENT_STRM  3
+#define SCTP_LOG_EVENT_FR    4
+#define SCTP_LOG_EVENT_MAP   5
+#define SCTP_LOG_EVENT_MAXBURST 6
+#define SCTP_LOG_EVENT_RWND  7
+#define SCTP_LOG_EVENT_MBCNT 8
+#define SCTP_LOG_EVENT_SACK  9
+#define SCTP_LOG_LOCK_EVENT 10
+#define SCTP_LOG_EVENT_RTT  11
+#define SCTP_LOG_EVENT_SB   12
+#define SCTP_LOG_EVENT_NAGLE 13
+#define SCTP_LOG_EVENT_WAKE 14
+#define SCTP_LOG_MISC_EVENT 15
+#define SCTP_LOG_EVENT_CLOSE 16
+#define SCTP_LOG_EVENT_MBUF 17
+#define SCTP_LOG_CHUNK_PROC 18
+#define SCTP_LOG_ERROR_RET  19
+
+#define SCTP_LOG_MAX_EVENT 20
+
+#define SCTP_LOCK_UNKNOWN 2
+
+
+/* number of associations by default for zone allocation */
+#define SCTP_MAX_NUM_OF_ASOC	40000
+/* how many addresses per assoc remote and local */
+#define SCTP_SCALE_FOR_ADDR	2
+
+/* default AUTO_ASCONF mode enable(1)/disable(0) value (sysctl) */
+#define SCTP_DEFAULT_AUTO_ASCONF	1
+
+/* default MULTIPLE_ASCONF mode enable(1)/disable(0) value (sysctl) */
+#define SCTP_DEFAULT_MULTIPLE_ASCONFS	0
+
+/* default MOBILITY_BASE mode enable(1)/disable(0) value (sysctl) */
+#define SCTP_DEFAULT_MOBILITY_BASE	0
+
+/* default MOBILITY_FASTHANDOFF mode enable(1)/disable(0) value (sysctl) */
+#define SCTP_DEFAULT_MOBILITY_FASTHANDOFF	0
+
+/*
+ * Theshold for rwnd updates, we have to read (sb_hiwat >>
+ * SCTP_RWND_HIWAT_SHIFT) before we will look to see if we need to send a
+ * window update sack. When we look, we compare the last rwnd we sent vs the
+ * current rwnd. It too must be greater than this value. Using 3 divdes the
+ * hiwat by 8, so for 200k rwnd we need to read 24k. For a 64k rwnd we need
+ * to read 8k. This seems about right.. I hope :-D.. we do set a
+ * min of a MTU on it so if the rwnd is real small we will insist
+ * on a full MTU of 1500 bytes.
+ */
+#define SCTP_RWND_HIWAT_SHIFT 3
+
+/* How much of the rwnd must the
+ * message be taking up to start partial delivery.
+ * We calculate this by shifing the hi_water (recv_win)
+ * left the following .. set to 1, when a message holds
+ * 1/2 the rwnd. If we set it to 2 when a message holds
+ * 1/4 the rwnd...etc..
+ */
+
+#define SCTP_PARTIAL_DELIVERY_SHIFT 1
+
+/*
+ * default HMAC for cookies, etc... use one of the AUTH HMAC id's
+ * SCTP_HMAC is the HMAC_ID to use
+ * SCTP_SIGNATURE_SIZE is the digest length
+ */
+#define SCTP_HMAC		SCTP_AUTH_HMAC_ID_SHA1
+#define SCTP_SIGNATURE_SIZE	SCTP_AUTH_DIGEST_LEN_SHA1
+#define SCTP_SIGNATURE_ALOC_SIZE SCTP_SIGNATURE_SIZE
+
+/*
+ * the SCTP protocol signature this includes the version number encoded in
+ * the last 4 bits of the signature.
+ */
+#define PROTO_SIGNATURE_A	0x30000000
+#define SCTP_VERSION_NUMBER	0x3
+
+#define MAX_TSN	0xffffffff
+#define MAX_SEQ	0xffff
+
+/* how many executions every N tick's */
+#define SCTP_ITERATOR_MAX_AT_ONCE 20
+
+/* number of clock ticks between iterator executions */
+#define SCTP_ITERATOR_TICKS 1
+
+/*
+ * option: If you comment out the following you will receive the old behavior
+ * of obeying cwnd for the fast retransmit algorithm. With this defined a FR
+ * happens right away with-out waiting for the flightsize to drop below the
+ * cwnd value (which is reduced by the FR to 1/2 the inflight packets).
+ */
+#define SCTP_IGNORE_CWND_ON_FR 1
+
+/*
+ * Adds implementors guide behavior to only use newest highest update in SACK
+ * gap ack's to figure out if you need to stroke a chunk for FR.
+ */
+#define SCTP_NO_FR_UNLESS_SEGMENT_SMALLER 1
+
+/* default max I can burst out after a fast retransmit */
+#define SCTP_DEF_MAX_BURST 4
+/* IP hdr (20/40) + 12+2+2 (enet) + sctp common 12 */
+#define SCTP_FIRST_MBUF_RESV 68
+/* Packet transmit states in the sent field */
+#define SCTP_DATAGRAM_UNSENT		0
+#define SCTP_DATAGRAM_SENT		1
+#define SCTP_DATAGRAM_RESEND1		2	/* not used (in code, but may
+						 * hit this value) */
+#define SCTP_DATAGRAM_RESEND2		3	/* not used (in code, but may
+						 * hit this value) */
+#define SCTP_DATAGRAM_RESEND		4
+#define SCTP_DATAGRAM_ACKED		10010
+#define SCTP_DATAGRAM_MARKED		20010
+#define SCTP_FORWARD_TSN_SKIP		30010
+
+/* chunk output send from locations */
+#define SCTP_OUTPUT_FROM_USR_SEND       0
+#define SCTP_OUTPUT_FROM_T3       	1
+#define SCTP_OUTPUT_FROM_INPUT_ERROR    2
+#define SCTP_OUTPUT_FROM_CONTROL_PROC   3
+#define SCTP_OUTPUT_FROM_SACK_TMR       4
+#define SCTP_OUTPUT_FROM_SHUT_TMR       5
+#define SCTP_OUTPUT_FROM_HB_TMR         6
+#define SCTP_OUTPUT_FROM_SHUT_ACK_TMR   7
+#define SCTP_OUTPUT_FROM_ASCONF_TMR     8
+#define SCTP_OUTPUT_FROM_STRRST_TMR     9
+#define SCTP_OUTPUT_FROM_AUTOCLOSE_TMR  10
+#define SCTP_OUTPUT_FROM_EARLY_FR_TMR   11
+#define SCTP_OUTPUT_FROM_STRRST_REQ     12
+#define SCTP_OUTPUT_FROM_USR_RCVD       13
+#define SCTP_OUTPUT_FROM_COOKIE_ACK     14
+#define SCTP_OUTPUT_FROM_DRAIN          15
+#define SCTP_OUTPUT_FROM_CLOSING        16
+/* SCTP chunk types are moved sctp.h for application (NAT, FW) use */
+
+/* align to 32-bit sizes */
+#define SCTP_SIZE32(x)	((((x)+3) >> 2) << 2)
+
+#define IS_SCTP_CONTROL(a) ((a)->chunk_type != SCTP_DATA)
+#define IS_SCTP_DATA(a) ((a)->chunk_type == SCTP_DATA)
+
+
+/* SCTP parameter types */
+/*************0x0000 series*************/
+#define SCTP_HEARTBEAT_INFO		0x0001
+#define SCTP_IPV4_ADDRESS		0x0005
+#define SCTP_IPV6_ADDRESS		0x0006
+#define SCTP_STATE_COOKIE		0x0007
+#define SCTP_UNRECOG_PARAM		0x0008
+#define SCTP_COOKIE_PRESERVE		0x0009
+#define SCTP_HOSTNAME_ADDRESS		0x000b
+#define SCTP_SUPPORTED_ADDRTYPE		0x000c
+
+/* draft-ietf-stewart-tsvwg-strreset-xxx */
+#define SCTP_STR_RESET_OUT_REQUEST	0x000d
+#define SCTP_STR_RESET_IN_REQUEST	0x000e
+#define SCTP_STR_RESET_TSN_REQUEST	0x000f
+#define SCTP_STR_RESET_RESPONSE		0x0010
+#define SCTP_STR_RESET_ADD_STREAMS  0x0011
+
+#define SCTP_MAX_RESET_PARAMS 2
+#define SCTP_STREAM_RESET_TSN_DELTA    0x1000
+
+/*************0x4000 series*************/
+
+/*************0x8000 series*************/
+#define SCTP_ECN_CAPABLE		0x8000
+/* ECN Nonce: draft-ladha-sctp-ecn-nonce */
+#define SCTP_ECN_NONCE_SUPPORTED	0x8001
+/* draft-ietf-tsvwg-auth-xxx */
+#define SCTP_RANDOM			0x8002
+#define SCTP_CHUNK_LIST			0x8003
+#define SCTP_HMAC_LIST			0x8004
+/*
+ * draft-ietf-tsvwg-addip-sctp-xx param=0x8008  len=0xNNNN Byte | Byte | Byte
+ * | Byte Byte | Byte ...
+ *
+ * Where each byte is a chunk type extension supported. For example, to support
+ * all chunks one would have (in hex):
+ *
+ * 80 01 00 09 C0 C1 80 81 82 00 00 00
+ *
+ * Has the parameter. C0 = PR-SCTP    (RFC3758) C1, 80 = ASCONF (addip draft) 81
+ * = Packet Drop 82 = Stream Reset 83 = Authentication
+ */
+#define SCTP_SUPPORTED_CHUNK_EXT    0x8008
+
+/*************0xC000 series*************/
+#define SCTP_PRSCTP_SUPPORTED		0xc000
+/* draft-ietf-tsvwg-addip-sctp */
+#define SCTP_ADD_IP_ADDRESS		0xc001
+#define SCTP_DEL_IP_ADDRESS		0xc002
+#define SCTP_ERROR_CAUSE_IND		0xc003
+#define SCTP_SET_PRIM_ADDR		0xc004
+#define SCTP_SUCCESS_REPORT		0xc005
+#define SCTP_ULP_ADAPTATION		0xc006
+/* behave-nat-draft */
+#define SCTP_HAS_NAT_SUPPORT            0xc007
+#define SCTP_NAT_VTAGS                  0xc008
+
+/* Notification error codes */
+#define SCTP_NOTIFY_DATAGRAM_UNSENT	0x0001
+#define SCTP_NOTIFY_DATAGRAM_SENT	0x0002
+#define SCTP_FAILED_THRESHOLD		0x0004
+#define SCTP_HEARTBEAT_SUCCESS		0x0008
+#define SCTP_RESPONSE_TO_USER_REQ	0x0010
+#define SCTP_INTERNAL_ERROR		0x0020
+#define SCTP_SHUTDOWN_GUARD_EXPIRES	0x0040
+#define SCTP_RECEIVED_SACK		0x0080
+#define SCTP_PEER_FAULTY		0x0100
+#define SCTP_ICMP_REFUSED		0x0200
+
+/* bits for TOS field */
+#define SCTP_ECT0_BIT		0x02
+#define SCTP_ECT1_BIT		0x01
+#define SCTP_CE_BITS		0x03
+
+/* below turns off above */
+#define SCTP_FLEXIBLE_ADDRESS	0x20
+#define SCTP_NO_HEARTBEAT	0x40
+
+/* mask to get sticky */
+#define SCTP_STICKY_OPTIONS_MASK	0x0c
+
+
+/*
+ * SCTP states for internal state machine XXX (should match "user" values)
+ */
+#define SCTP_STATE_EMPTY		0x0000
+#define SCTP_STATE_INUSE		0x0001
+#define SCTP_STATE_COOKIE_WAIT		0x0002
+#define SCTP_STATE_COOKIE_ECHOED	0x0004
+#define SCTP_STATE_OPEN			0x0008
+#define SCTP_STATE_SHUTDOWN_SENT	0x0010
+#define SCTP_STATE_SHUTDOWN_RECEIVED	0x0020
+#define SCTP_STATE_SHUTDOWN_ACK_SENT	0x0040
+#define SCTP_STATE_SHUTDOWN_PENDING	0x0080
+#define SCTP_STATE_CLOSED_SOCKET	0x0100
+#define SCTP_STATE_ABOUT_TO_BE_FREED    0x0200
+#define SCTP_STATE_PARTIAL_MSG_LEFT     0x0400
+#define SCTP_STATE_WAS_ABORTED          0x0800
+#define SCTP_STATE_IN_ACCEPT_QUEUE      0x1000
+#define SCTP_STATE_MASK			0x007f
+
+#define SCTP_GET_STATE(asoc)	((asoc)->state & SCTP_STATE_MASK)
+#define SCTP_SET_STATE(asoc, newstate)  ((asoc)->state = ((asoc)->state & ~SCTP_STATE_MASK) |  newstate)
+#define SCTP_CLEAR_SUBSTATE(asoc, substate) ((asoc)->state &= ~substate)
+#define SCTP_ADD_SUBSTATE(asoc, substate) ((asoc)->state |= substate)
+
+/* SCTP reachability state for each address */
+#define SCTP_ADDR_REACHABLE		0x001
+#define SCTP_ADDR_NOT_REACHABLE		0x002
+#define SCTP_ADDR_NOHB			0x004
+#define SCTP_ADDR_BEING_DELETED		0x008
+#define SCTP_ADDR_NOT_IN_ASSOC		0x010
+#define SCTP_ADDR_WAS_PRIMARY		0x020
+#define SCTP_ADDR_SWITCH_PRIMARY	0x040
+#define SCTP_ADDR_OUT_OF_SCOPE		0x080
+#define SCTP_ADDR_DOUBLE_SWITCH		0x100
+#define SCTP_ADDR_UNCONFIRMED		0x200
+#define SCTP_ADDR_REQ_PRIMARY           0x400
+/* JRS 5/13/07 - Added potentially failed state for CMT PF */
+#define SCTP_ADDR_PF            0x800
+#define SCTP_REACHABLE_MASK		0x203
+
+/* bound address types (e.g. valid address types to allow) */
+#define SCTP_BOUND_V6		0x01
+#define SCTP_BOUND_V4		0x02
+
+/*
+ * what is the default number of mbufs in a chain I allow before switching to
+ * a cluster
+ */
+#define SCTP_DEFAULT_MBUFS_IN_CHAIN 5
+
+/* How long a cookie lives in milli-seconds */
+#define SCTP_DEFAULT_COOKIE_LIFE	60000
+
+/* resource limit of streams */
+#define MAX_SCTP_STREAMS	2048
+
+/* Maximum the mapping array will  grow to (TSN mapping array) */
+#define SCTP_MAPPING_ARRAY	512
+
+/* size of the inital malloc on the mapping array */
+#define SCTP_INITIAL_MAPPING_ARRAY  16
+/* how much we grow the mapping array each call */
+#define SCTP_MAPPING_ARRAY_INCR     32
+
+/*
+ * Here we define the timer types used by the implementation as arguments in
+ * the set/get timer type calls.
+ */
+#define SCTP_TIMER_INIT 	0
+#define SCTP_TIMER_RECV 	1
+#define SCTP_TIMER_SEND 	2
+#define SCTP_TIMER_HEARTBEAT	3
+#define SCTP_TIMER_PMTU		4
+#define SCTP_TIMER_MAXSHUTDOWN	5
+#define SCTP_TIMER_SIGNATURE	6
+/*
+ * number of timer types in the base SCTP structure used in the set/get and
+ * has the base default.
+ */
+#define SCTP_NUM_TMRS	7
+
+/* timer types */
+#define SCTP_TIMER_TYPE_NONE		0
+#define SCTP_TIMER_TYPE_SEND		1
+#define SCTP_TIMER_TYPE_INIT		2
+#define SCTP_TIMER_TYPE_RECV		3
+#define SCTP_TIMER_TYPE_SHUTDOWN	4
+#define SCTP_TIMER_TYPE_HEARTBEAT	5
+#define SCTP_TIMER_TYPE_COOKIE		6
+#define SCTP_TIMER_TYPE_NEWCOOKIE	7
+#define SCTP_TIMER_TYPE_PATHMTURAISE	8
+#define SCTP_TIMER_TYPE_SHUTDOWNACK	9
+#define SCTP_TIMER_TYPE_ASCONF		10
+#define SCTP_TIMER_TYPE_SHUTDOWNGUARD	11
+#define SCTP_TIMER_TYPE_AUTOCLOSE	12
+#define SCTP_TIMER_TYPE_EVENTWAKE	13
+#define SCTP_TIMER_TYPE_STRRESET        14
+#define SCTP_TIMER_TYPE_INPKILL         15
+#define SCTP_TIMER_TYPE_EARLYFR         17
+#define SCTP_TIMER_TYPE_ASOCKILL        18
+#define SCTP_TIMER_TYPE_ADDR_WQ         19
+#define SCTP_TIMER_TYPE_ZERO_COPY       20
+#define SCTP_TIMER_TYPE_ZCOPY_SENDQ     21
+#define SCTP_TIMER_TYPE_PRIM_DELETED    22
+/* add new timers here - and increment LAST */
+#define SCTP_TIMER_TYPE_LAST            23
+
+#define SCTP_IS_TIMER_TYPE_VALID(t)	(((t) > SCTP_TIMER_TYPE_NONE) && \
+					 ((t) < SCTP_TIMER_TYPE_LAST))
+
+
+
+/* max number of TSN's dup'd that I will hold */
+#define SCTP_MAX_DUP_TSNS	20
+
+/*
+ * Here we define the types used when setting the retry amounts.
+ */
+/* How many drop re-attempts we make on  INIT/COOKIE-ECHO */
+#define SCTP_RETRY_DROPPED_THRESH 4
+
+/*
+ * Maxmium number of chunks a single association can have on it. Note that
+ * this is a squishy number since the count can run over this if the user
+ * sends a large message down .. the fragmented chunks don't count until
+ * AFTER the message is on queue.. it would be the next send that blocks
+ * things. This number will get tuned up at boot in the sctp_init and use the
+ * number of clusters as a base. This way high bandwidth environments will
+ * not get impacted by the lower bandwidth sending a bunch of 1 byte chunks
+ */
+#define SCTP_ASOC_MAX_CHUNKS_ON_QUEUE 512
+
+
+/* The conversion from time to ticks and vice versa is done by rounding
+ * upwards. This way we can test in the code the time to be positive and
+ * know that this corresponds to a positive number of ticks.
+ */
+#define MSEC_TO_TICKS(x) ((hz == 1000) ? x : ((((x) * hz) + 999) / 1000))
+#define TICKS_TO_MSEC(x) ((hz == 1000) ? x : ((((x) * 1000) + (hz - 1)) / hz))
+
+#define SEC_TO_TICKS(x) ((x) * hz)
+#define TICKS_TO_SEC(x) (((x) + (hz - 1)) / hz)
+
+/*
+ * Basically the minimum amount of time before I do a early FR. Making this
+ * value to low will cause duplicate retransmissions.
+ */
+#define SCTP_MINFR_MSEC_TIMER 250
+/* The floor this value is allowed to fall to when starting a timer. */
+#define SCTP_MINFR_MSEC_FLOOR 20
+
+/* init timer def = 1 sec */
+#define SCTP_INIT_SEC	1
+
+/* send timer def = 1 seconds */
+#define SCTP_SEND_SEC	1
+
+/* recv timer def = 200ms  */
+#define SCTP_RECV_MSEC	200
+
+/* 30 seconds + RTO (in ms) */
+#define SCTP_HB_DEFAULT_MSEC	30000
+
+/* Max time I will wait for Shutdown to complete */
+#define SCTP_DEF_MAX_SHUTDOWN_SEC 180
+
+
+/*
+ * This is how long a secret lives, NOT how long a cookie lives how many
+ * ticks the current secret will live.
+ */
+#define SCTP_DEFAULT_SECRET_LIFE_SEC 3600
+
+#define SCTP_RTO_UPPER_BOUND	(60000)	/* 60 sec in ms */
+#define SCTP_RTO_UPPER_BOUND_SEC 60	/* for the init timer */
+#define SCTP_RTO_LOWER_BOUND	(1000)	/* 1 sec in ms */
+#define SCTP_RTO_INITIAL	(3000)	/* 3 sec in ms */
+
+
+#define SCTP_INP_KILL_TIMEOUT 20/* number of ms to retry kill of inpcb */
+#define SCTP_ASOC_KILL_TIMEOUT 10	/* number of ms to retry kill of inpcb */
+
+#define SCTP_DEF_MAX_INIT	8
+#define SCTP_DEF_MAX_SEND	10
+#define SCTP_DEF_MAX_PATH_RTX	5
+
+#define SCTP_DEF_PMTU_RAISE_SEC	600	/* 10 min between raise attempts */
+
+
+/* How many streams I request initally by default */
+#define SCTP_OSTREAM_INITIAL 10
+
+/*
+ * How many smallest_mtu's need to increase before a window update sack is
+ * sent (should be a power of 2).
+ */
+/* Send window update (incr * this > hiwat). Should be a power of 2 */
+#define SCTP_MINIMAL_RWND		(4096)	/* minimal rwnd */
+
+#define SCTP_ADDRMAX		24
+
+/* SCTP DEBUG Switch parameters */
+#define SCTP_DEBUG_TIMER1	0x00000001
+#define SCTP_DEBUG_TIMER2	0x00000002	/* unused */
+#define SCTP_DEBUG_TIMER3	0x00000004	/* unused */
+#define SCTP_DEBUG_TIMER4	0x00000008
+#define SCTP_DEBUG_OUTPUT1	0x00000010
+#define SCTP_DEBUG_OUTPUT2	0x00000020
+#define SCTP_DEBUG_OUTPUT3	0x00000040
+#define SCTP_DEBUG_OUTPUT4	0x00000080
+#define SCTP_DEBUG_UTIL1	0x00000100
+#define SCTP_DEBUG_UTIL2	0x00000200	/* unused */
+#define SCTP_DEBUG_AUTH1	0x00000400
+#define SCTP_DEBUG_AUTH2	0x00000800	/* unused */
+#define SCTP_DEBUG_INPUT1	0x00001000
+#define SCTP_DEBUG_INPUT2	0x00002000
+#define SCTP_DEBUG_INPUT3	0x00004000
+#define SCTP_DEBUG_INPUT4	0x00008000	/* unused */
+#define SCTP_DEBUG_ASCONF1	0x00010000
+#define SCTP_DEBUG_ASCONF2	0x00020000
+#define SCTP_DEBUG_OUTPUT5	0x00040000	/* unused */
+#define SCTP_DEBUG_XXX		0x00080000	/* unused */
+#define SCTP_DEBUG_PCB1		0x00100000
+#define SCTP_DEBUG_PCB2		0x00200000	/* unused */
+#define SCTP_DEBUG_PCB3		0x00400000
+#define SCTP_DEBUG_PCB4		0x00800000
+#define SCTP_DEBUG_INDATA1	0x01000000
+#define SCTP_DEBUG_INDATA2	0x02000000	/* unused */
+#define SCTP_DEBUG_INDATA3	0x04000000	/* unused */
+#define SCTP_DEBUG_CRCOFFLOAD	0x08000000	/* unused */
+#define SCTP_DEBUG_USRREQ1	0x10000000	/* unused */
+#define SCTP_DEBUG_USRREQ2	0x20000000	/* unused */
+#define SCTP_DEBUG_PEEL1	0x40000000
+#define SCTP_DEBUG_XXXXX	0x80000000	/* unused */
+#define SCTP_DEBUG_ALL		0x7ff3ffff
+#define SCTP_DEBUG_NOISY	0x00040000
+
+/* What sender needs to see to avoid SWS or we consider peers rwnd 0 */
+#define SCTP_SWS_SENDER_DEF	1420
+
+/*
+ * SWS is scaled to the sb_hiwat of the socket. A value of 2 is hiwat/4, 1
+ * would be hiwat/2 etc.
+ */
+/* What receiver needs to see in sockbuf or we tell peer its 1 */
+#define SCTP_SWS_RECEIVER_DEF	3000
+
+#define SCTP_INITIAL_CWND 4380
+
+#define SCTP_DEFAULT_MTU 1500	/* emergency default MTU */
+/* amount peer is obligated to have in rwnd or I will abort */
+#define SCTP_MIN_RWND	1500
+
+#define SCTP_DEFAULT_MAXSEGMENT 65535
+
+#define SCTP_CHUNK_BUFFER_SIZE	512
+#define SCTP_PARAM_BUFFER_SIZE	512
+
+/* small chunk store for looking at chunk_list in auth */
+#define SCTP_SMALL_CHUNK_STORE 260
+
+#define SCTP_DEFAULT_MINSEGMENT 512	/* MTU size ... if no mtu disc */
+#define SCTP_HOW_MANY_SECRETS	2	/* how many secrets I keep */
+
+#define SCTP_NUMBER_OF_SECRETS	8	/* or 8 * 4 = 32 octets */
+#define SCTP_SECRET_SIZE	32	/* number of octets in a 256 bits */
+
+
+/*
+ * SCTP upper layer notifications
+ */
+#define SCTP_NOTIFY_ASSOC_UP                     1
+#define SCTP_NOTIFY_ASSOC_DOWN                   2
+#define SCTP_NOTIFY_INTERFACE_DOWN               3
+#define SCTP_NOTIFY_INTERFACE_UP                 4
+#define SCTP_NOTIFY_DG_FAIL                      5
+#define SCTP_NOTIFY_STRDATA_ERR                  6
+#define SCTP_NOTIFY_ASSOC_ABORTED                7
+#define SCTP_NOTIFY_PEER_OPENED_STREAM           8
+#define SCTP_NOTIFY_STREAM_OPENED_OK             9
+#define SCTP_NOTIFY_ASSOC_RESTART               10
+#define SCTP_NOTIFY_HB_RESP                     11
+#define SCTP_NOTIFY_ASCONF_SUCCESS              12
+#define SCTP_NOTIFY_ASCONF_FAILED               13
+#define SCTP_NOTIFY_PEER_SHUTDOWN               14
+#define SCTP_NOTIFY_ASCONF_ADD_IP               15
+#define SCTP_NOTIFY_ASCONF_DELETE_IP            16
+#define SCTP_NOTIFY_ASCONF_SET_PRIMARY          17
+#define SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION 18
+#define SCTP_NOTIFY_INTERFACE_CONFIRMED         20
+#define SCTP_NOTIFY_STR_RESET_RECV              21
+#define SCTP_NOTIFY_STR_RESET_SEND              22
+#define SCTP_NOTIFY_STR_RESET_FAILED_OUT        23
+#define SCTP_NOTIFY_STR_RESET_FAILED_IN         24
+#define SCTP_NOTIFY_AUTH_NEW_KEY                25
+#define SCTP_NOTIFY_AUTH_FREE_KEY               26
+#define SCTP_NOTIFY_SPECIAL_SP_FAIL             27
+#define SCTP_NOTIFY_NO_PEER_AUTH                28
+#define SCTP_NOTIFY_SENDER_DRY                  29
+#define SCTP_NOTIFY_STR_RESET_ADD_OK            30
+#define SCTP_NOTIFY_STR_RESET_ADD_FAIL          31
+#define SCTP_NOTIFY_STR_RESET_INSTREAM_ADD_OK   32
+#define SCTP_NOTIFY_MAX                         32
+
+
+/* This is the value for messages that are NOT completely
+ * copied down where we will start to split the message.
+ * So, with our default, we split only if the piece we
+ * want to take will fill up a full MTU (assuming
+ * a 1500 byte MTU).
+ */
+#define SCTP_DEFAULT_SPLIT_POINT_MIN 2904
+
+/* ABORT CODES and other tell-tale location
+ * codes are generated by adding the below
+ * to the instance id.
+ */
+
+/* File defines */
+#define SCTP_FROM_SCTP_INPUT   0x10000000
+#define SCTP_FROM_SCTP_PCB     0x20000000
+#define SCTP_FROM_SCTP_INDATA  0x30000000
+#define SCTP_FROM_SCTP_TIMER   0x40000000
+#define SCTP_FROM_SCTP_USRREQ  0x50000000
+#define SCTP_FROM_SCTPUTIL     0x60000000
+#define SCTP_FROM_SCTP6_USRREQ 0x70000000
+#define SCTP_FROM_SCTP_ASCONF  0x80000000
+#define SCTP_FROM_SCTP_OUTPUT  0x90000000
+#define SCTP_FROM_SCTP_PEELOFF 0xa0000000
+#define SCTP_FROM_SCTP_PANDA   0xb0000000
+#define SCTP_FROM_SCTP_SYSCTL  0xc0000000
+
+/* Location ID's */
+#define SCTP_LOC_1  0x00000001
+#define SCTP_LOC_2  0x00000002
+#define SCTP_LOC_3  0x00000003
+#define SCTP_LOC_4  0x00000004
+#define SCTP_LOC_5  0x00000005
+#define SCTP_LOC_6  0x00000006
+#define SCTP_LOC_7  0x00000007
+#define SCTP_LOC_8  0x00000008
+#define SCTP_LOC_9  0x00000009
+#define SCTP_LOC_10 0x0000000a
+#define SCTP_LOC_11 0x0000000b
+#define SCTP_LOC_12 0x0000000c
+#define SCTP_LOC_13 0x0000000d
+#define SCTP_LOC_14 0x0000000e
+#define SCTP_LOC_15 0x0000000f
+#define SCTP_LOC_16 0x00000010
+#define SCTP_LOC_17 0x00000011
+#define SCTP_LOC_18 0x00000012
+#define SCTP_LOC_19 0x00000013
+#define SCTP_LOC_20 0x00000014
+#define SCTP_LOC_21 0x00000015
+#define SCTP_LOC_22 0x00000016
+#define SCTP_LOC_23 0x00000017
+#define SCTP_LOC_24 0x00000018
+#define SCTP_LOC_25 0x00000019
+#define SCTP_LOC_26 0x0000001a
+#define SCTP_LOC_27 0x0000001b
+#define SCTP_LOC_28 0x0000001c
+#define SCTP_LOC_29 0x0000001d
+#define SCTP_LOC_30 0x0000001e
+#define SCTP_LOC_31 0x0000001f
+#define SCTP_LOC_32 0x00000020
+#define SCTP_LOC_33 0x00000021
+
+
+/* Free assoc codes */
+#define SCTP_NORMAL_PROC      0
+#define SCTP_PCBFREE_NOFORCE  1
+#define SCTP_PCBFREE_FORCE    2
+
+/* From codes for adding addresses */
+#define SCTP_ADDR_IS_CONFIRMED 8
+#define SCTP_ADDR_DYNAMIC_ADDED 6
+#define SCTP_IN_COOKIE_PROC 100
+#define SCTP_ALLOC_ASOC  1
+#define SCTP_LOAD_ADDR_2 2
+#define SCTP_LOAD_ADDR_3 3
+#define SCTP_LOAD_ADDR_4 4
+#define SCTP_LOAD_ADDR_5 5
+
+#define SCTP_DONOT_SETSCOPE 0
+#define SCTP_DO_SETSCOPE 1
+
+
+/* This value determines the default for when
+ * we try to add more on the send queue., if
+ * there is room. This prevents us from cycling
+ * into the copy_resume routine to often if
+ * we have not got enough space to add a decent
+ * enough size message. Note that if we have enough
+ * space to complete the message copy we will always
+ * add to the message, no matter what the size. Its
+ * only when we reach the point that we have some left
+ * to add, there is only room for part of it that we
+ * will use this threshold. Its also a sysctl.
+ */
+#define SCTP_DEFAULT_ADD_MORE 1452
+
+#ifndef SCTP_PCBHASHSIZE
+/* default number of association hash buckets in each endpoint */
+#define SCTP_PCBHASHSIZE 256
+#endif
+#ifndef SCTP_TCBHASHSIZE
+#define SCTP_TCBHASHSIZE 1024
+#endif
+
+#ifndef SCTP_CHUNKQUEUE_SCALE
+#define SCTP_CHUNKQUEUE_SCALE 10
+#endif
+
+/* clock variance is 1 ms */
+#define SCTP_CLOCK_GRANULARITY	1
+#define IP_HDR_SIZE 40		/* we use the size of a IP6 header here this
+				 * detracts a small amount for ipv4 but it
+				 * simplifies the ipv6 addition */
+
+/* Argument magic number for sctp_inpcb_free() */
+
+/* third argument */
+#define SCTP_CALLED_DIRECTLY_NOCMPSET     0
+#define SCTP_CALLED_AFTER_CMPSET_OFCLOSE  1
+#define SCTP_CALLED_FROM_INPKILL_TIMER    2
+/* second argument */
+#define SCTP_FREE_SHOULD_USE_ABORT          1
+#define SCTP_FREE_SHOULD_USE_GRACEFUL_CLOSE 0
+
+#ifndef IPPROTO_SCTP
+#define IPPROTO_SCTP 132	/* the Official IANA number :-) */
+#endif				/* !IPPROTO_SCTP */
+
+#define SCTP_MAX_DATA_BUNDLING		256
+
+/* modular comparison */
+/* True if a > b (mod = M) */
+#define compare_with_wrap(a, b, M) (((a > b) && ((a - b) < ((M >> 1) + 1))) || \
+              ((b > a) && ((b - a) > ((M >> 1) + 1))))
+
+
+/* Mapping array manipulation routines */
+#define SCTP_IS_TSN_PRESENT(arry, gap) ((arry[(gap >> 3)] >> (gap & 0x07)) & 0x01)
+#define SCTP_SET_TSN_PRESENT(arry, gap) (arry[(gap >> 3)] |= (0x01 << ((gap & 0x07))))
+#define SCTP_UNSET_TSN_PRESENT(arry, gap) (arry[(gap >> 3)] &= ((~(0x01 << ((gap & 0x07)))) & 0xff))
+#define SCTP_CALC_TSN_TO_GAP(gap, tsn, mapping_tsn) do { \
+	                if (tsn >= mapping_tsn) { \
+						gap = tsn - mapping_tsn; \
+					} else { \
+						gap = (MAX_TSN - mapping_tsn) + tsn + 1; \
+					} \
+                  } while(0)
+
+
+#define SCTP_RETRAN_DONE -1
+#define SCTP_RETRAN_EXIT -2
+
+/*
+ * This value defines the number of vtag block time wait entry's per list
+ * element.  Each entry will take 2 4 byte ints (and of course the overhead
+ * of the next pointer as well). Using 15 as an example will yield * ((8 *
+ * 15) + 8) or 128 bytes of overhead for each timewait block that gets
+ * initialized. Increasing it to 31 would yeild 256 bytes per block.
+ */
+#define SCTP_NUMBER_IN_VTAG_BLOCK 15
+/*
+ * If we use the STACK option, we have an array of this size head pointers.
+ * This array is mod'd the with the size to find which bucket and then all
+ * entries must be searched to see if the tag is in timed wait. If so we
+ * reject it.
+ */
+#define SCTP_STACK_VTAG_HASH_SIZE   32
+
+/*
+ * Number of seconds of time wait for a vtag.
+ */
+#define SCTP_TIME_WAIT 60
+
+#define SCTP_SEND_BUFFER_SPLITTING 0x00000001
+#define SCTP_RECV_BUFFER_SPLITTING 0x00000002
+
+/* The system retains a cache of free chunks such to
+ * cut down on calls the memory allocation system. There
+ * is a per association limit of free items and a overall
+ * system limit. If either one gets hit then the resource
+ * stops being cached.
+ */
+
+#define SCTP_DEF_ASOC_RESC_LIMIT 10
+#define SCTP_DEF_SYSTEM_RESC_LIMIT 1000
+
+/*-
+ * defines for socket lock states.
+ * Used by __APPLE__ and SCTP_SO_LOCK_TESTING
+ */
+#define SCTP_SO_LOCKED		1
+#define SCTP_SO_NOT_LOCKED	0
+
+
+#define SCTP_HOLDS_LOCK 1
+#define SCTP_NOT_LOCKED 0
+
+/*-
+ * For address locks, do we hold the lock?
+ */
+#define SCTP_ADDR_LOCKED 1
+#define SCTP_ADDR_NOT_LOCKED 0
+
+#define IN4_ISPRIVATE_ADDRESS(a) \
+   ((((uint8_t *)&(a)->s_addr)[0] == 10) || \
+    ((((uint8_t *)&(a)->s_addr)[0] == 172) && \
+     (((uint8_t *)&(a)->s_addr)[1] >= 16) && \
+     (((uint8_t *)&(a)->s_addr)[1] <= 32)) || \
+    ((((uint8_t *)&(a)->s_addr)[0] == 192) && \
+     (((uint8_t *)&(a)->s_addr)[1] == 168)))
+
+#define IN4_ISLOOPBACK_ADDRESS(a) \
+    ((((uint8_t *)&(a)->s_addr)[0] == 127) && \
+     (((uint8_t *)&(a)->s_addr)[1] == 0) && \
+     (((uint8_t *)&(a)->s_addr)[2] == 0) && \
+     (((uint8_t *)&(a)->s_addr)[3] == 1))
+
+
+#if defined(_KERNEL)
+
+#define SCTP_GETTIME_TIMEVAL(x)	(getmicrouptime(x))
+#define SCTP_GETPTIME_TIMEVAL(x)	(microuptime(x))
+#endif
+/*#if defined(__FreeBSD__) || defined(__APPLE__)*/
+/*#define SCTP_GETTIME_TIMEVAL(x) { \*/
+/*	(x)->tv_sec = ticks / 1000; \*/
+/*	(x)->tv_usec = (ticks % 1000) * 1000; \*/
+/*}*/
+
+/*#else*/
+/*#define SCTP_GETTIME_TIMEVAL(x)	(microtime(x))*/
+/*#endif				 __FreeBSD__ */
+
+#if defined(_KERNEL) || defined(__Userspace__)
+#define sctp_sowwakeup(inp, so) \
+do { \
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE) { \
+		inp->sctp_flags |= SCTP_PCB_FLAGS_WAKEOUTPUT; \
+	} else { \
+		sowwakeup(so); \
+	} \
+} while (0)
+
+#define sctp_sowwakeup_locked(inp, so) \
+do { \
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE) { \
+                SOCKBUF_UNLOCK(&((so)->so_snd)); \
+		inp->sctp_flags |= SCTP_PCB_FLAGS_WAKEOUTPUT; \
+	} else { \
+		sowwakeup_locked(so); \
+	} \
+} while (0)
+
+#define sctp_sorwakeup(inp, so) \
+do { \
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE) { \
+		inp->sctp_flags |= SCTP_PCB_FLAGS_WAKEINPUT; \
+	} else { \
+		sorwakeup(so); \
+	} \
+} while (0)
+
+#define sctp_sorwakeup_locked(inp, so) \
+do { \
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE) { \
+		inp->sctp_flags |= SCTP_PCB_FLAGS_WAKEINPUT; \
+                SOCKBUF_UNLOCK(&((so)->so_rcv)); \
+	} else { \
+		sorwakeup_locked(so); \
+	} \
+} while (0)
+
+#endif				/* _KERNEL || __Userspace__ */
+#endif
diff --git a/freebsd/sys/netinet/sctp_crc32.c b/freebsd/sys/netinet/sctp_crc32.c
new file mode 100644
index 00000000..aa4c08cf
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_crc32.c
@@ -0,0 +1,148 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_crc32.c,v 1.12 2005/03/06 16:04:17 itojun Exp $	 */
+
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/netinet/sctp_os.h>
+#include <freebsd/netinet/sctp.h>
+#include <freebsd/netinet/sctp_crc32.h>
+#include <freebsd/netinet/sctp_pcb.h>
+
+
+#if !defined(SCTP_WITH_NO_CSUM)
+
+static uint32_t
+sctp_finalize_crc32c(uint32_t crc32c)
+{
+	uint32_t result;
+
+#if BYTE_ORDER == BIG_ENDIAN
+	uint8_t byte0, byte1, byte2, byte3;
+
+#endif
+	/* Complement the result */
+	result = ~crc32c;
+#if BYTE_ORDER == BIG_ENDIAN
+	/*
+	 * For BIG-ENDIAN.. aka Motorola byte order the result is in
+	 * little-endian form. So we must manually swap the bytes. Then we
+	 * can call htonl() which does nothing...
+	 */
+	byte0 = result & 0x000000ff;
+	byte1 = (result >> 8) & 0x000000ff;
+	byte2 = (result >> 16) & 0x000000ff;
+	byte3 = (result >> 24) & 0x000000ff;
+	crc32c = ((byte0 << 24) | (byte1 << 16) | (byte2 << 8) | byte3);
+#else
+	/*
+	 * For INTEL platforms the result comes out in network order. No
+	 * htonl is required or the swap above. So we optimize out both the
+	 * htonl and the manual swap above.
+	 */
+	crc32c = result;
+#endif
+	return (crc32c);
+}
+
+uint32_t
+sctp_calculate_cksum(struct mbuf *m, uint32_t offset)
+{
+	/*
+	 * given a mbuf chain with a packetheader offset by 'offset'
+	 * pointing at a sctphdr (with csum set to 0) go through the chain
+	 * of SCTP_BUF_NEXT()'s and calculate the SCTP checksum. This also
+	 * has a side bonus as it will calculate the total length of the
+	 * mbuf chain. Note: if offset is greater than the total mbuf
+	 * length, checksum=1, pktlen=0 is returned (ie. no real error code)
+	 */
+	uint32_t base = 0xffffffff;
+	struct mbuf *at;
+
+	at = m;
+	/* find the correct mbuf and offset into mbuf */
+	while ((at != NULL) && (offset > (uint32_t) SCTP_BUF_LEN(at))) {
+		offset -= SCTP_BUF_LEN(at);	/* update remaining offset
+						 * left */
+		at = SCTP_BUF_NEXT(at);
+	}
+	while (at != NULL) {
+		if ((SCTP_BUF_LEN(at) - offset) > 0) {
+			base = calculate_crc32c(base,
+			    (unsigned char *)(SCTP_BUF_AT(at, offset)),
+			    (unsigned int)(SCTP_BUF_LEN(at) - offset));
+		}
+		if (offset) {
+			/* we only offset once into the first mbuf */
+			if (offset < (uint32_t) SCTP_BUF_LEN(at))
+				offset = 0;
+			else
+				offset -= SCTP_BUF_LEN(at);
+		}
+		at = SCTP_BUF_NEXT(at);
+	}
+	base = sctp_finalize_crc32c(base);
+	return (base);
+}
+
+#endif				/* !defined(SCTP_WITH_NO_CSUM) */
+
+
+void
+sctp_delayed_cksum(struct mbuf *m, uint32_t offset)
+{
+#if defined(SCTP_WITH_NO_CSUM)
+	panic("sctp_delayed_cksum() called when using no SCTP CRC.");
+#else
+	uint32_t checksum;
+
+	checksum = sctp_calculate_cksum(m, offset);
+	SCTP_STAT_DECR(sctps_sendhwcrc);
+	SCTP_STAT_INCR(sctps_sendswcrc);
+	offset += offsetof(struct sctphdr, checksum);
+
+	if (offset + sizeof(uint32_t) > (uint32_t) (m->m_len)) {
+		printf("sctp_delayed_cksum(): m->len: %d,  off: %d.\n",
+		    (uint32_t) m->m_len, offset);
+		/*
+		 * XXX this shouldn't happen, but if it does, the correct
+		 * behavior may be to insert the checksum in the appropriate
+		 * next mbuf in the chain.
+		 */
+		return;
+	}
+	*(uint32_t *) (m->m_data + offset) = checksum;
+#endif
+}
diff --git a/freebsd/sys/netinet/sctp_crc32.h b/freebsd/sys/netinet/sctp_crc32.h
new file mode 100644
index 00000000..768b25d5
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_crc32.h
@@ -0,0 +1,47 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_crc32.h,v 1.5 2004/08/17 04:06:16 itojun Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifndef __crc32c_h__
+#define __crc32c_h__
+
+#if defined(_KERNEL)
+#if !defined(SCTP_WITH_NO_CSUM)
+uint32_t sctp_calculate_cksum(struct mbuf *, uint32_t);
+
+#endif
+void sctp_delayed_cksum(struct mbuf *, uint32_t offset);
+
+#endif				/* _KERNEL */
+#endif				/* __crc32c_h__ */
diff --git a/freebsd/sys/netinet/sctp_header.h b/freebsd/sys/netinet/sctp_header.h
new file mode 100644
index 00000000..141bfcda
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_header.h
@@ -0,0 +1,624 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_header.h,v 1.14 2005/03/06 16:04:17 itojun Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifndef __sctp_header_h__
+#define __sctp_header_h__
+
+#include <freebsd/sys/time.h>
+#include <freebsd/netinet/sctp.h>
+#include <freebsd/netinet/sctp_constants.h>
+
+#define SCTP_PACKED __attribute__((packed))
+
+/*
+ * Parameter structures
+ */
+struct sctp_ipv4addr_param {
+	struct sctp_paramhdr ph;/* type=SCTP_IPV4_PARAM_TYPE, len=8 */
+	uint32_t addr;		/* IPV4 address */
+}                   SCTP_PACKED;
+
+#define SCTP_V6_ADDR_BYTES 16
+
+
+struct sctp_ipv6addr_param {
+	struct sctp_paramhdr ph;/* type=SCTP_IPV6_PARAM_TYPE, len=20 */
+	uint8_t addr[SCTP_V6_ADDR_BYTES];	/* IPV6 address */
+}                   SCTP_PACKED;
+
+/* Cookie Preservative */
+struct sctp_cookie_perserve_param {
+	struct sctp_paramhdr ph;/* type=SCTP_COOKIE_PRESERVE, len=8 */
+	uint32_t time;		/* time in ms to extend cookie */
+}                          SCTP_PACKED;
+
+#define SCTP_ARRAY_MIN_LEN 1
+/* Host Name Address */
+struct sctp_host_name_param {
+	struct sctp_paramhdr ph;/* type=SCTP_HOSTNAME_ADDRESS */
+	char name[SCTP_ARRAY_MIN_LEN];	/* host name */
+}                    SCTP_PACKED;
+
+/*
+ * This is the maximum padded size of a s-a-p
+ * so paramheadr + 3 address types (6 bytes) + 2 byte pad = 12
+ */
+#define SCTP_MAX_ADDR_PARAMS_SIZE 12
+/* supported address type */
+struct sctp_supported_addr_param {
+	struct sctp_paramhdr ph;/* type=SCTP_SUPPORTED_ADDRTYPE */
+	uint16_t addr_type[SCTP_ARRAY_MIN_LEN];	/* array of supported address
+						 * types */
+}                         SCTP_PACKED;
+
+/* ECN parameter */
+struct sctp_ecn_supported_param {
+	struct sctp_paramhdr ph;/* type=SCTP_ECN_CAPABLE */
+}                        SCTP_PACKED;
+
+
+/* heartbeat info parameter */
+struct sctp_heartbeat_info_param {
+	struct sctp_paramhdr ph;
+	uint32_t time_value_1;
+	uint32_t time_value_2;
+	uint32_t random_value1;
+	uint32_t random_value2;
+	uint16_t user_req;
+	uint8_t addr_family;
+	uint8_t addr_len;
+	char address[SCTP_ADDRMAX];
+}                         SCTP_PACKED;
+
+
+/* draft-ietf-tsvwg-prsctp */
+/* PR-SCTP supported parameter */
+struct sctp_prsctp_supported_param {
+	struct sctp_paramhdr ph;
+}                           SCTP_PACKED;
+
+
+/* draft-ietf-tsvwg-addip-sctp */
+struct sctp_asconf_paramhdr {	/* an ASCONF "parameter" */
+	struct sctp_paramhdr ph;/* a SCTP parameter header */
+	uint32_t correlation_id;/* correlation id for this param */
+}                    SCTP_PACKED;
+
+struct sctp_asconf_addr_param {	/* an ASCONF address parameter */
+	struct sctp_asconf_paramhdr aph;	/* asconf "parameter" */
+	struct sctp_ipv6addr_param addrp;	/* max storage size */
+}                      SCTP_PACKED;
+
+
+struct sctp_asconf_tag_param {	/* an ASCONF NAT-Vtag parameter */
+	struct sctp_asconf_paramhdr aph;	/* asconf "parameter" */
+	uint32_t local_vtag;
+	uint32_t remote_vtag;
+}                     SCTP_PACKED;
+
+
+struct sctp_asconf_addrv4_param {	/* an ASCONF address (v4) parameter */
+	struct sctp_asconf_paramhdr aph;	/* asconf "parameter" */
+	struct sctp_ipv4addr_param addrp;	/* max storage size */
+}                        SCTP_PACKED;
+
+#define SCTP_MAX_SUPPORTED_EXT 256
+
+struct sctp_supported_chunk_types_param {
+	struct sctp_paramhdr ph;/* type = 0x8008  len = x */
+	uint8_t chunk_types[];
+}                                SCTP_PACKED;
+
+
+/* ECN Nonce: draft-ladha-sctp-ecn-nonce */
+struct sctp_ecn_nonce_supported_param {
+	struct sctp_paramhdr ph;/* type = 0x8001  len = 4 */
+}                              SCTP_PACKED;
+
+
+/*
+ * Structures for DATA chunks
+ */
+struct sctp_data {
+	uint32_t tsn;
+	uint16_t stream_id;
+	uint16_t stream_sequence;
+	uint32_t protocol_id;
+	/* user data follows */
+}         SCTP_PACKED;
+
+struct sctp_data_chunk {
+	struct sctp_chunkhdr ch;
+	struct sctp_data dp;
+}               SCTP_PACKED;
+
+/*
+ * Structures for the control chunks
+ */
+
+/* Initiate (INIT)/Initiate Ack (INIT ACK) */
+struct sctp_init {
+	uint32_t initiate_tag;	/* initiate tag */
+	uint32_t a_rwnd;	/* a_rwnd */
+	uint16_t num_outbound_streams;	/* OS */
+	uint16_t num_inbound_streams;	/* MIS */
+	uint32_t initial_tsn;	/* I-TSN */
+	/* optional param's follow */
+}         SCTP_PACKED;
+
+#define SCTP_IDENTIFICATION_SIZE 16
+#define SCTP_ADDRESS_SIZE 4
+#define SCTP_RESERVE_SPACE 6
+/* state cookie header */
+struct sctp_state_cookie {	/* this is our definition... */
+	uint8_t identification[SCTP_IDENTIFICATION_SIZE];	/* id of who we are */
+	struct timeval time_entered;	/* the time I built cookie */
+	uint32_t cookie_life;	/* life I will award this cookie */
+	uint32_t tie_tag_my_vtag;	/* my tag in old association */
+
+	uint32_t tie_tag_peer_vtag;	/* peers tag in old association */
+	uint32_t peers_vtag;	/* peers tag in INIT (for quick ref) */
+
+	uint32_t my_vtag;	/* my tag in INIT-ACK (for quick ref) */
+	uint32_t address[SCTP_ADDRESS_SIZE];	/* 4 ints/128 bits */
+	uint32_t addr_type;	/* address type */
+	uint32_t laddress[SCTP_ADDRESS_SIZE];	/* my local from address */
+	uint32_t laddr_type;	/* my local from address type */
+	uint32_t scope_id;	/* v6 scope id for link-locals */
+
+	uint16_t peerport;	/* port address of the peer in the INIT */
+	uint16_t myport;	/* my port address used in the INIT */
+	uint8_t ipv4_addr_legal;/* Are V4 addr legal? */
+	uint8_t ipv6_addr_legal;/* Are V6 addr legal? */
+	uint8_t local_scope;	/* IPv6 local scope flag */
+	uint8_t site_scope;	/* IPv6 site scope flag */
+
+	uint8_t ipv4_scope;	/* IPv4 private addr scope */
+	uint8_t loopback_scope;	/* loopback scope information */
+	uint8_t reserved[SCTP_RESERVE_SPACE];	/* Align to 64 bits */
+	/*
+	 * at the end is tacked on the INIT chunk and the INIT-ACK chunk
+	 * (minus the cookie).
+	 */
+}                 SCTP_PACKED;
+
+
+/* Used for NAT state error cause */
+struct sctp_missing_nat_state {
+	uint16_t cause;
+	uint16_t length;
+	uint8_t data[];
+}                      SCTP_PACKED;
+
+
+struct sctp_inv_mandatory_param {
+	uint16_t cause;
+	uint16_t length;
+	uint32_t num_param;
+	uint16_t param;
+	/*
+	 * We include this to 0 it since only a missing cookie will cause
+	 * this error.
+	 */
+	uint16_t resv;
+}                        SCTP_PACKED;
+
+struct sctp_unresolv_addr {
+	uint16_t cause;
+	uint16_t length;
+	uint16_t addr_type;
+	uint16_t reserved;	/* Only one invalid addr type */
+}                  SCTP_PACKED;
+
+/* state cookie parameter */
+struct sctp_state_cookie_param {
+	struct sctp_paramhdr ph;
+	struct sctp_state_cookie cookie;
+}                       SCTP_PACKED;
+
+struct sctp_init_chunk {
+	struct sctp_chunkhdr ch;
+	struct sctp_init init;
+}               SCTP_PACKED;
+
+struct sctp_init_msg {
+	struct sctphdr sh;
+	struct sctp_init_chunk msg;
+}             SCTP_PACKED;
+
+/* ... used for both INIT and INIT ACK */
+#define sctp_init_ack		sctp_init
+#define sctp_init_ack_chunk	sctp_init_chunk
+#define sctp_init_ack_msg	sctp_init_msg
+
+
+/* Selective Ack (SACK) */
+struct sctp_gap_ack_block {
+	uint16_t start;		/* Gap Ack block start */
+	uint16_t end;		/* Gap Ack block end */
+}                  SCTP_PACKED;
+
+struct sctp_sack {
+	uint32_t cum_tsn_ack;	/* cumulative TSN Ack */
+	uint32_t a_rwnd;	/* updated a_rwnd of sender */
+	uint16_t num_gap_ack_blks;	/* number of Gap Ack blocks */
+	uint16_t num_dup_tsns;	/* number of duplicate TSNs */
+	/* struct sctp_gap_ack_block's follow */
+	/* uint32_t duplicate_tsn's follow */
+}         SCTP_PACKED;
+
+struct sctp_sack_chunk {
+	struct sctp_chunkhdr ch;
+	struct sctp_sack sack;
+}               SCTP_PACKED;
+
+struct sctp_nr_sack {
+	uint32_t cum_tsn_ack;	/* cumulative TSN Ack */
+	uint32_t a_rwnd;	/* updated a_rwnd of sender */
+	uint16_t num_gap_ack_blks;	/* number of Gap Ack blocks */
+	uint16_t num_nr_gap_ack_blks;	/* number of NR Gap Ack blocks */
+	uint16_t num_dup_tsns;	/* number of duplicate TSNs */
+	uint16_t reserved;	/* not currently used */
+	/* struct sctp_gap_ack_block's follow */
+	/* uint32_t duplicate_tsn's follow */
+}            SCTP_PACKED;
+
+struct sctp_nr_sack_chunk {
+	struct sctp_chunkhdr ch;
+	struct sctp_nr_sack nr_sack;
+}                  SCTP_PACKED;
+
+
+/* Heartbeat Request (HEARTBEAT) */
+struct sctp_heartbeat {
+	struct sctp_heartbeat_info_param hb_info;
+}              SCTP_PACKED;
+
+struct sctp_heartbeat_chunk {
+	struct sctp_chunkhdr ch;
+	struct sctp_heartbeat heartbeat;
+}                    SCTP_PACKED;
+
+/* ... used for Heartbeat Ack (HEARTBEAT ACK) */
+#define sctp_heartbeat_ack		sctp_heartbeat
+#define sctp_heartbeat_ack_chunk	sctp_heartbeat_chunk
+
+
+/* Abort Asssociation (ABORT) */
+struct sctp_abort_chunk {
+	struct sctp_chunkhdr ch;
+	/* optional error cause may follow */
+}                SCTP_PACKED;
+
+struct sctp_abort_msg {
+	struct sctphdr sh;
+	struct sctp_abort_chunk msg;
+}              SCTP_PACKED;
+
+
+/* Shutdown Association (SHUTDOWN) */
+struct sctp_shutdown_chunk {
+	struct sctp_chunkhdr ch;
+	uint32_t cumulative_tsn_ack;
+}                   SCTP_PACKED;
+
+
+/* Shutdown Acknowledgment (SHUTDOWN ACK) */
+struct sctp_shutdown_ack_chunk {
+	struct sctp_chunkhdr ch;
+}                       SCTP_PACKED;
+
+
+/* Operation Error (ERROR) */
+struct sctp_error_chunk {
+	struct sctp_chunkhdr ch;
+	/* optional error causes follow */
+}                SCTP_PACKED;
+
+
+/* Cookie Echo (COOKIE ECHO) */
+struct sctp_cookie_echo_chunk {
+	struct sctp_chunkhdr ch;
+	struct sctp_state_cookie cookie;
+}                      SCTP_PACKED;
+
+/* Cookie Acknowledgment (COOKIE ACK) */
+struct sctp_cookie_ack_chunk {
+	struct sctp_chunkhdr ch;
+}                     SCTP_PACKED;
+
+/* Explicit Congestion Notification Echo (ECNE) */
+struct sctp_ecne_chunk {
+	struct sctp_chunkhdr ch;
+	uint32_t tsn;
+}               SCTP_PACKED;
+
+/* Congestion Window Reduced (CWR) */
+struct sctp_cwr_chunk {
+	struct sctp_chunkhdr ch;
+	uint32_t tsn;
+}              SCTP_PACKED;
+
+/* Shutdown Complete (SHUTDOWN COMPLETE) */
+struct sctp_shutdown_complete_chunk {
+	struct sctp_chunkhdr ch;
+}                            SCTP_PACKED;
+
+/* Oper error holding a stale cookie */
+struct sctp_stale_cookie_msg {
+	struct sctp_paramhdr ph;/* really an error cause */
+	uint32_t time_usec;
+}                     SCTP_PACKED;
+
+struct sctp_adaptation_layer_indication {
+	struct sctp_paramhdr ph;
+	uint32_t indication;
+}                                SCTP_PACKED;
+
+struct sctp_cookie_while_shutting_down {
+	struct sctphdr sh;
+	struct sctp_chunkhdr ch;
+	struct sctp_paramhdr ph;/* really an error cause */
+}                               SCTP_PACKED;
+
+struct sctp_shutdown_complete_msg {
+	struct sctphdr sh;
+	struct sctp_shutdown_complete_chunk shut_cmp;
+}                          SCTP_PACKED;
+
+/*
+ * draft-ietf-tsvwg-addip-sctp
+ */
+/* Address/Stream Configuration Change (ASCONF) */
+struct sctp_asconf_chunk {
+	struct sctp_chunkhdr ch;
+	uint32_t serial_number;
+	/* lookup address parameter (mandatory) */
+	/* asconf parameters follow */
+}                 SCTP_PACKED;
+
+/* Address/Stream Configuration Acknowledge (ASCONF ACK) */
+struct sctp_asconf_ack_chunk {
+	struct sctp_chunkhdr ch;
+	uint32_t serial_number;
+	/* asconf parameters follow */
+}                     SCTP_PACKED;
+
+/* draft-ietf-tsvwg-prsctp */
+/* Forward Cumulative TSN (FORWARD TSN) */
+struct sctp_forward_tsn_chunk {
+	struct sctp_chunkhdr ch;
+	uint32_t new_cumulative_tsn;
+	/* stream/sequence pairs (sctp_strseq) follow */
+}                      SCTP_PACKED;
+
+struct sctp_strseq {
+	uint16_t stream;
+	uint16_t sequence;
+}           SCTP_PACKED;
+
+struct sctp_forward_tsn_msg {
+	struct sctphdr sh;
+	struct sctp_forward_tsn_chunk msg;
+}                    SCTP_PACKED;
+
+/* should be a multiple of 4 - 1 aka 3/7/11 etc. */
+
+#define SCTP_NUM_DB_TO_VERIFY 31
+
+struct sctp_chunk_desc {
+	uint8_t chunk_type;
+	uint8_t data_bytes[SCTP_NUM_DB_TO_VERIFY];
+	uint32_t tsn_ifany;
+}               SCTP_PACKED;
+
+
+struct sctp_pktdrop_chunk {
+	struct sctp_chunkhdr ch;
+	uint32_t bottle_bw;
+	uint32_t current_onq;
+	uint16_t trunc_len;
+	uint16_t reserved;
+	uint8_t data[];
+}                  SCTP_PACKED;
+
+/**********STREAM RESET STUFF ******************/
+
+struct sctp_stream_reset_out_request {
+	struct sctp_paramhdr ph;
+	uint32_t request_seq;	/* monotonically increasing seq no */
+	uint32_t response_seq;	/* if a response, the resp seq no */
+	uint32_t send_reset_at_tsn;	/* last TSN I assigned outbound */
+	uint16_t list_of_streams[];	/* if not all list of streams */
+}                             SCTP_PACKED;
+
+struct sctp_stream_reset_in_request {
+	struct sctp_paramhdr ph;
+	uint32_t request_seq;
+	uint16_t list_of_streams[];	/* if not all list of streams */
+}                            SCTP_PACKED;
+
+
+struct sctp_stream_reset_tsn_request {
+	struct sctp_paramhdr ph;
+	uint32_t request_seq;
+}                             SCTP_PACKED;
+
+struct sctp_stream_reset_response {
+	struct sctp_paramhdr ph;
+	uint32_t response_seq;	/* if a response, the resp seq no */
+	uint32_t result;
+}                          SCTP_PACKED;
+
+struct sctp_stream_reset_response_tsn {
+	struct sctp_paramhdr ph;
+	uint32_t response_seq;	/* if a response, the resp seq no */
+	uint32_t result;
+	uint32_t senders_next_tsn;
+	uint32_t receivers_next_tsn;
+}                              SCTP_PACKED;
+
+struct sctp_stream_reset_add_strm {
+	struct sctp_paramhdr ph;
+	uint32_t request_seq;
+	uint16_t number_of_streams;
+	uint16_t reserved;
+}                          SCTP_PACKED;
+
+#define SCTP_STREAM_RESET_NOTHING   0x00000000	/* Nothing for me to do */
+#define SCTP_STREAM_RESET_PERFORMED 0x00000001	/* Did it */
+#define SCTP_STREAM_RESET_DENIED    0x00000002	/* refused to do it */
+#define SCTP_STREAM_RESET_ERROR_STR 0x00000003	/* bad Stream no */
+#define SCTP_STREAM_RESET_TRY_LATER 0x00000004	/* collision, try again */
+#define SCTP_STREAM_RESET_BAD_SEQNO 0x00000005	/* bad str-reset seq no */
+
+/*
+ * convience structures, note that if you are making a request for specific
+ * streams then the request will need to be an overlay structure.
+ */
+
+struct sctp_stream_reset_out_req {
+	struct sctp_chunkhdr ch;
+	struct sctp_stream_reset_out_request sr_req;
+}                         SCTP_PACKED;
+
+struct sctp_stream_reset_in_req {
+	struct sctp_chunkhdr ch;
+	struct sctp_stream_reset_in_request sr_req;
+}                        SCTP_PACKED;
+
+struct sctp_stream_reset_tsn_req {
+	struct sctp_chunkhdr ch;
+	struct sctp_stream_reset_tsn_request sr_req;
+}                         SCTP_PACKED;
+
+struct sctp_stream_reset_resp {
+	struct sctp_chunkhdr ch;
+	struct sctp_stream_reset_response sr_resp;
+}                      SCTP_PACKED;
+
+/* respone only valid with a TSN request */
+struct sctp_stream_reset_resp_tsn {
+	struct sctp_chunkhdr ch;
+	struct sctp_stream_reset_response_tsn sr_resp;
+}                          SCTP_PACKED;
+
+/****************************************************/
+
+/*
+ * Authenticated chunks support draft-ietf-tsvwg-sctp-auth
+ */
+
+/* Should we make the max be 32? */
+#define SCTP_RANDOM_MAX_SIZE 256
+struct sctp_auth_random {
+	struct sctp_paramhdr ph;/* type = 0x8002 */
+	uint8_t random_data[];
+}                SCTP_PACKED;
+
+struct sctp_auth_chunk_list {
+	struct sctp_paramhdr ph;/* type = 0x8003 */
+	uint8_t chunk_types[];
+}                    SCTP_PACKED;
+
+struct sctp_auth_hmac_algo {
+	struct sctp_paramhdr ph;/* type = 0x8004 */
+	uint16_t hmac_ids[];
+}                   SCTP_PACKED;
+
+struct sctp_auth_chunk {
+	struct sctp_chunkhdr ch;
+	uint16_t shared_key_id;
+	uint16_t hmac_id;
+	uint8_t hmac[];
+}               SCTP_PACKED;
+
+struct sctp_auth_invalid_hmac {
+	struct sctp_paramhdr ph;
+	uint16_t hmac_id;
+	uint16_t padding;
+}                      SCTP_PACKED;
+
+/*
+ * we pre-reserve enough room for a ECNE or CWR AND a SACK with no missing
+ * pieces. If ENCE is missing we could have a couple of blocks. This way we
+ * optimize so we MOST likely can bundle a SACK/ECN with the smallest size
+ * data chunk I will split into. We could increase throughput slightly by
+ * taking out these two but the  24-sack/8-CWR i.e. 32 bytes I pre-reserve I
+ * feel is worth it for now.
+ */
+#ifndef SCTP_MAX_OVERHEAD
+#ifdef INET6
+#define SCTP_MAX_OVERHEAD (sizeof(struct sctp_data_chunk) + \
+			   sizeof(struct sctphdr) + \
+			   sizeof(struct sctp_ecne_chunk) + \
+			   sizeof(struct sctp_sack_chunk) + \
+			   sizeof(struct ip6_hdr))
+
+#define SCTP_MED_OVERHEAD (sizeof(struct sctp_data_chunk) + \
+			   sizeof(struct sctphdr) + \
+			   sizeof(struct ip6_hdr))
+
+
+#define SCTP_MIN_OVERHEAD (sizeof(struct ip6_hdr) + \
+			   sizeof(struct sctphdr))
+
+#else
+#define SCTP_MAX_OVERHEAD (sizeof(struct sctp_data_chunk) + \
+			   sizeof(struct sctphdr) + \
+			   sizeof(struct sctp_ecne_chunk) + \
+			   sizeof(struct sctp_sack_chunk) + \
+			   sizeof(struct ip))
+
+#define SCTP_MED_OVERHEAD (sizeof(struct sctp_data_chunk) + \
+			   sizeof(struct sctphdr) + \
+			   sizeof(struct ip))
+
+
+#define SCTP_MIN_OVERHEAD (sizeof(struct ip) + \
+			   sizeof(struct sctphdr))
+
+#endif				/* INET6 */
+#endif				/* !SCTP_MAX_OVERHEAD */
+
+#define SCTP_MED_V4_OVERHEAD (sizeof(struct sctp_data_chunk) + \
+			      sizeof(struct sctphdr) + \
+			      sizeof(struct ip))
+
+#define SCTP_MIN_V4_OVERHEAD (sizeof(struct ip) + \
+			      sizeof(struct sctphdr))
+
+#undef SCTP_PACKED
+#endif				/* !__sctp_header_h__ */
diff --git a/freebsd/sys/netinet/sctp_indata.c b/freebsd/sys/netinet/sctp_indata.c
new file mode 100644
index 00000000..963b3205
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_indata.c
@@ -0,0 +1,5800 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_indata.c,v 1.36 2005/03/06 16:04:17 itojun Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/netinet/sctp_os.h>
+#include <freebsd/netinet/sctp_var.h>
+#include <freebsd/netinet/sctp_sysctl.h>
+#include <freebsd/netinet/sctp_pcb.h>
+#include <freebsd/netinet/sctp_header.h>
+#include <freebsd/netinet/sctputil.h>
+#include <freebsd/netinet/sctp_output.h>
+#include <freebsd/netinet/sctp_input.h>
+#include <freebsd/netinet/sctp_indata.h>
+#include <freebsd/netinet/sctp_uio.h>
+#include <freebsd/netinet/sctp_timer.h>
+
+
+/*
+ * NOTES: On the outbound side of things I need to check the sack timer to
+ * see if I should generate a sack into the chunk queue (if I have data to
+ * send that is and will be sending it .. for bundling.
+ *
+ * The callback in sctp_usrreq.c will get called when the socket is read from.
+ * This will cause sctp_service_queues() to get called on the top entry in
+ * the list.
+ */
+
+void
+sctp_set_rwnd(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+	asoc->my_rwnd = sctp_calc_rwnd(stcb, asoc);
+}
+
+/* Calculate what the rwnd would be */
+uint32_t
+sctp_calc_rwnd(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+	uint32_t calc = 0;
+
+	/*
+	 * This is really set wrong with respect to a 1-2-m socket. Since
+	 * the sb_cc is the count that everyone as put up. When we re-write
+	 * sctp_soreceive then we will fix this so that ONLY this
+	 * associations data is taken into account.
+	 */
+	if (stcb->sctp_socket == NULL)
+		return (calc);
+
+	if (stcb->asoc.sb_cc == 0 &&
+	    asoc->size_on_reasm_queue == 0 &&
+	    asoc->size_on_all_streams == 0) {
+		/* Full rwnd granted */
+		calc = max(SCTP_SB_LIMIT_RCV(stcb->sctp_socket), SCTP_MINIMAL_RWND);
+		return (calc);
+	}
+	/* get actual space */
+	calc = (uint32_t) sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv);
+
+	/*
+	 * take out what has NOT been put on socket queue and we yet hold
+	 * for putting up.
+	 */
+	calc = sctp_sbspace_sub(calc, (uint32_t) (asoc->size_on_reasm_queue +
+	    asoc->cnt_on_reasm_queue * MSIZE));
+	calc = sctp_sbspace_sub(calc, (uint32_t) (asoc->size_on_all_streams +
+	    asoc->cnt_on_all_streams * MSIZE));
+
+	if (calc == 0) {
+		/* out of space */
+		return (calc);
+	}
+	/* what is the overhead of all these rwnd's */
+	calc = sctp_sbspace_sub(calc, stcb->asoc.my_rwnd_control_len);
+	/*
+	 * If the window gets too small due to ctrl-stuff, reduce it to 1,
+	 * even it is 0. SWS engaged
+	 */
+	if (calc < stcb->asoc.my_rwnd_control_len) {
+		calc = 1;
+	}
+	return (calc);
+}
+
+
+
+/*
+ * Build out our readq entry based on the incoming packet.
+ */
+struct sctp_queued_to_read *
+sctp_build_readq_entry(struct sctp_tcb *stcb,
+    struct sctp_nets *net,
+    uint32_t tsn, uint32_t ppid,
+    uint32_t context, uint16_t stream_no,
+    uint16_t stream_seq, uint8_t flags,
+    struct mbuf *dm)
+{
+	struct sctp_queued_to_read *read_queue_e = NULL;
+
+	sctp_alloc_a_readq(stcb, read_queue_e);
+	if (read_queue_e == NULL) {
+		goto failed_build;
+	}
+	read_queue_e->sinfo_stream = stream_no;
+	read_queue_e->sinfo_ssn = stream_seq;
+	read_queue_e->sinfo_flags = (flags << 8);
+	read_queue_e->sinfo_ppid = ppid;
+	read_queue_e->sinfo_context = stcb->asoc.context;
+	read_queue_e->sinfo_timetolive = 0;
+	read_queue_e->sinfo_tsn = tsn;
+	read_queue_e->sinfo_cumtsn = tsn;
+	read_queue_e->sinfo_assoc_id = sctp_get_associd(stcb);
+	read_queue_e->whoFrom = net;
+	read_queue_e->length = 0;
+	atomic_add_int(&net->ref_count, 1);
+	read_queue_e->data = dm;
+	read_queue_e->spec_flags = 0;
+	read_queue_e->tail_mbuf = NULL;
+	read_queue_e->aux_data = NULL;
+	read_queue_e->stcb = stcb;
+	read_queue_e->port_from = stcb->rport;
+	read_queue_e->do_not_ref_stcb = 0;
+	read_queue_e->end_added = 0;
+	read_queue_e->some_taken = 0;
+	read_queue_e->pdapi_aborted = 0;
+failed_build:
+	return (read_queue_e);
+}
+
+
+/*
+ * Build out our readq entry based on the incoming packet.
+ */
+static struct sctp_queued_to_read *
+sctp_build_readq_entry_chk(struct sctp_tcb *stcb,
+    struct sctp_tmit_chunk *chk)
+{
+	struct sctp_queued_to_read *read_queue_e = NULL;
+
+	sctp_alloc_a_readq(stcb, read_queue_e);
+	if (read_queue_e == NULL) {
+		goto failed_build;
+	}
+	read_queue_e->sinfo_stream = chk->rec.data.stream_number;
+	read_queue_e->sinfo_ssn = chk->rec.data.stream_seq;
+	read_queue_e->sinfo_flags = (chk->rec.data.rcv_flags << 8);
+	read_queue_e->sinfo_ppid = chk->rec.data.payloadtype;
+	read_queue_e->sinfo_context = stcb->asoc.context;
+	read_queue_e->sinfo_timetolive = 0;
+	read_queue_e->sinfo_tsn = chk->rec.data.TSN_seq;
+	read_queue_e->sinfo_cumtsn = chk->rec.data.TSN_seq;
+	read_queue_e->sinfo_assoc_id = sctp_get_associd(stcb);
+	read_queue_e->whoFrom = chk->whoTo;
+	read_queue_e->aux_data = NULL;
+	read_queue_e->length = 0;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	read_queue_e->data = chk->data;
+	read_queue_e->tail_mbuf = NULL;
+	read_queue_e->stcb = stcb;
+	read_queue_e->port_from = stcb->rport;
+	read_queue_e->spec_flags = 0;
+	read_queue_e->do_not_ref_stcb = 0;
+	read_queue_e->end_added = 0;
+	read_queue_e->some_taken = 0;
+	read_queue_e->pdapi_aborted = 0;
+failed_build:
+	return (read_queue_e);
+}
+
+
+struct mbuf *
+sctp_build_ctl_nchunk(struct sctp_inpcb *inp,
+    struct sctp_sndrcvinfo *sinfo)
+{
+	struct sctp_sndrcvinfo *outinfo;
+	struct cmsghdr *cmh;
+	struct mbuf *ret;
+	int len;
+	int use_extended = 0;
+
+	if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) {
+		/* user does not want the sndrcv ctl */
+		return (NULL);
+	}
+	if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO)) {
+		use_extended = 1;
+		len = CMSG_LEN(sizeof(struct sctp_extrcvinfo));
+	} else {
+		len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
+	}
+
+
+	ret = sctp_get_mbuf_for_msg(len,
+	    0, M_DONTWAIT, 1, MT_DATA);
+
+	if (ret == NULL) {
+		/* No space */
+		return (ret);
+	}
+	/* We need a CMSG header followed by the struct  */
+	cmh = mtod(ret, struct cmsghdr *);
+	outinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmh);
+	cmh->cmsg_level = IPPROTO_SCTP;
+	if (use_extended) {
+		cmh->cmsg_type = SCTP_EXTRCV;
+		cmh->cmsg_len = len;
+		memcpy(outinfo, sinfo, len);
+	} else {
+		cmh->cmsg_type = SCTP_SNDRCV;
+		cmh->cmsg_len = len;
+		*outinfo = *sinfo;
+	}
+	SCTP_BUF_LEN(ret) = cmh->cmsg_len;
+	return (ret);
+}
+
+
+char *
+sctp_build_ctl_cchunk(struct sctp_inpcb *inp,
+    int *control_len,
+    struct sctp_sndrcvinfo *sinfo)
+{
+	struct sctp_sndrcvinfo *outinfo;
+	struct cmsghdr *cmh;
+	char *buf;
+	int len;
+	int use_extended = 0;
+
+	if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) {
+		/* user does not want the sndrcv ctl */
+		return (NULL);
+	}
+	if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO)) {
+		use_extended = 1;
+		len = CMSG_LEN(sizeof(struct sctp_extrcvinfo));
+	} else {
+		len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
+	}
+	SCTP_MALLOC(buf, char *, len, SCTP_M_CMSG);
+	if (buf == NULL) {
+		/* No space */
+		return (buf);
+	}
+	/* We need a CMSG header followed by the struct  */
+	cmh = (struct cmsghdr *)buf;
+	outinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmh);
+	cmh->cmsg_level = IPPROTO_SCTP;
+	if (use_extended) {
+		cmh->cmsg_type = SCTP_EXTRCV;
+		cmh->cmsg_len = len;
+		memcpy(outinfo, sinfo, len);
+	} else {
+		cmh->cmsg_type = SCTP_SNDRCV;
+		cmh->cmsg_len = len;
+		*outinfo = *sinfo;
+	}
+	*control_len = len;
+	return (buf);
+}
+
+static void
+sctp_mark_non_revokable(struct sctp_association *asoc, uint32_t tsn)
+{
+	uint32_t gap, i, cumackp1;
+	int fnd = 0;
+
+	if (SCTP_BASE_SYSCTL(sctp_do_drain) == 0) {
+		return;
+	}
+	cumackp1 = asoc->cumulative_tsn + 1;
+	if (compare_with_wrap(cumackp1, tsn, MAX_TSN)) {
+		/*
+		 * this tsn is behind the cum ack and thus we don't need to
+		 * worry about it being moved from one to the other.
+		 */
+		return;
+	}
+	SCTP_CALC_TSN_TO_GAP(gap, tsn, asoc->mapping_array_base_tsn);
+	if (!SCTP_IS_TSN_PRESENT(asoc->mapping_array, gap)) {
+		printf("gap:%x tsn:%x\n", gap, tsn);
+		sctp_print_mapping_array(asoc);
+#ifdef INVARIANTS
+		panic("Things are really messed up now!!");
+#endif
+	}
+	SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, gap);
+	SCTP_UNSET_TSN_PRESENT(asoc->mapping_array, gap);
+	if (compare_with_wrap(tsn, asoc->highest_tsn_inside_nr_map, MAX_TSN)) {
+		asoc->highest_tsn_inside_nr_map = tsn;
+	}
+	if (tsn == asoc->highest_tsn_inside_map) {
+		/* We must back down to see what the new highest is */
+		for (i = tsn - 1; (compare_with_wrap(i, asoc->mapping_array_base_tsn, MAX_TSN) ||
+		    (i == asoc->mapping_array_base_tsn)); i--) {
+			SCTP_CALC_TSN_TO_GAP(gap, i, asoc->mapping_array_base_tsn);
+			if (SCTP_IS_TSN_PRESENT(asoc->mapping_array, gap)) {
+				asoc->highest_tsn_inside_map = i;
+				fnd = 1;
+				break;
+			}
+		}
+		if (!fnd) {
+			asoc->highest_tsn_inside_map = asoc->mapping_array_base_tsn - 1;
+		}
+	}
+}
+
+
+/*
+ * We are delivering currently from the reassembly queue. We must continue to
+ * deliver until we either: 1) run out of space. 2) run out of sequential
+ * TSN's 3) hit the SCTP_DATA_LAST_FRAG flag.
+ */
+static void
+sctp_service_reassembly(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+	struct sctp_tmit_chunk *chk;
+	uint16_t nxt_todel;
+	uint16_t stream_no;
+	int end = 0;
+	int cntDel;
+
+	struct sctp_queued_to_read *control, *ctl, *ctlat;
+
+	if (stcb == NULL)
+		return;
+
+	cntDel = stream_no = 0;
+	if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+	    (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) ||
+	    (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)) {
+		/* socket above is long gone or going.. */
+abandon:
+		asoc->fragmented_delivery_inprogress = 0;
+		chk = TAILQ_FIRST(&asoc->reasmqueue);
+		while (chk) {
+			TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
+			asoc->size_on_reasm_queue -= chk->send_size;
+			sctp_ucount_decr(asoc->cnt_on_reasm_queue);
+			/*
+			 * Lose the data pointer, since its in the socket
+			 * buffer
+			 */
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			/* Now free the address and data */
+			sctp_free_a_chunk(stcb, chk);
+			/* sa_ignore FREED_MEMORY */
+			chk = TAILQ_FIRST(&asoc->reasmqueue);
+		}
+		return;
+	}
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	do {
+		chk = TAILQ_FIRST(&asoc->reasmqueue);
+		if (chk == NULL) {
+			return;
+		}
+		if (chk->rec.data.TSN_seq != (asoc->tsn_last_delivered + 1)) {
+			/* Can't deliver more :< */
+			return;
+		}
+		stream_no = chk->rec.data.stream_number;
+		nxt_todel = asoc->strmin[stream_no].last_sequence_delivered + 1;
+		if (nxt_todel != chk->rec.data.stream_seq &&
+		    (chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0) {
+			/*
+			 * Not the next sequence to deliver in its stream OR
+			 * unordered
+			 */
+			return;
+		}
+		if (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) {
+
+			control = sctp_build_readq_entry_chk(stcb, chk);
+			if (control == NULL) {
+				/* out of memory? */
+				return;
+			}
+			/* save it off for our future deliveries */
+			stcb->asoc.control_pdapi = control;
+			if (chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG)
+				end = 1;
+			else
+				end = 0;
+			sctp_mark_non_revokable(asoc, chk->rec.data.TSN_seq);
+			sctp_add_to_readq(stcb->sctp_ep,
+			    stcb, control, &stcb->sctp_socket->so_rcv, end,
+			    SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
+			cntDel++;
+		} else {
+			if (chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG)
+				end = 1;
+			else
+				end = 0;
+			sctp_mark_non_revokable(asoc, chk->rec.data.TSN_seq);
+			if (sctp_append_to_readq(stcb->sctp_ep, stcb,
+			    stcb->asoc.control_pdapi,
+			    chk->data, end, chk->rec.data.TSN_seq,
+			    &stcb->sctp_socket->so_rcv)) {
+				/*
+				 * something is very wrong, either
+				 * control_pdapi is NULL, or the tail_mbuf
+				 * is corrupt, or there is a EOM already on
+				 * the mbuf chain.
+				 */
+				if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+					goto abandon;
+				} else {
+#ifdef INVARIANTS
+					if ((stcb->asoc.control_pdapi == NULL) || (stcb->asoc.control_pdapi->tail_mbuf == NULL)) {
+						panic("This should not happen control_pdapi NULL?");
+					}
+					/* if we did not panic, it was a EOM */
+					panic("Bad chunking ??");
+#else
+					if ((stcb->asoc.control_pdapi == NULL) || (stcb->asoc.control_pdapi->tail_mbuf == NULL)) {
+						SCTP_PRINTF("This should not happen control_pdapi NULL?\n");
+					}
+					SCTP_PRINTF("Bad chunking ??\n");
+					SCTP_PRINTF("Dumping re-assembly queue this will probably hose the association\n");
+
+#endif
+					goto abandon;
+				}
+			}
+			cntDel++;
+		}
+		/* pull it we did it */
+		TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
+		if (chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) {
+			asoc->fragmented_delivery_inprogress = 0;
+			if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0) {
+				asoc->strmin[stream_no].last_sequence_delivered++;
+			}
+			if ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == 0) {
+				SCTP_STAT_INCR_COUNTER64(sctps_reasmusrmsgs);
+			}
+		} else if (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) {
+			/*
+			 * turn the flag back on since we just  delivered
+			 * yet another one.
+			 */
+			asoc->fragmented_delivery_inprogress = 1;
+		}
+		asoc->tsn_of_pdapi_last_delivered = chk->rec.data.TSN_seq;
+		asoc->last_flags_delivered = chk->rec.data.rcv_flags;
+		asoc->last_strm_seq_delivered = chk->rec.data.stream_seq;
+		asoc->last_strm_no_delivered = chk->rec.data.stream_number;
+
+		asoc->tsn_last_delivered = chk->rec.data.TSN_seq;
+		asoc->size_on_reasm_queue -= chk->send_size;
+		sctp_ucount_decr(asoc->cnt_on_reasm_queue);
+		/* free up the chk */
+		chk->data = NULL;
+		sctp_free_a_chunk(stcb, chk);
+
+		if (asoc->fragmented_delivery_inprogress == 0) {
+			/*
+			 * Now lets see if we can deliver the next one on
+			 * the stream
+			 */
+			struct sctp_stream_in *strm;
+
+			strm = &asoc->strmin[stream_no];
+			nxt_todel = strm->last_sequence_delivered + 1;
+			ctl = TAILQ_FIRST(&strm->inqueue);
+			if (ctl && (nxt_todel == ctl->sinfo_ssn)) {
+				while (ctl != NULL) {
+					/* Deliver more if we can. */
+					if (nxt_todel == ctl->sinfo_ssn) {
+						ctlat = TAILQ_NEXT(ctl, next);
+						TAILQ_REMOVE(&strm->inqueue, ctl, next);
+						asoc->size_on_all_streams -= ctl->length;
+						sctp_ucount_decr(asoc->cnt_on_all_streams);
+						strm->last_sequence_delivered++;
+						sctp_mark_non_revokable(asoc, ctl->sinfo_tsn);
+						sctp_add_to_readq(stcb->sctp_ep, stcb,
+						    ctl,
+						    &stcb->sctp_socket->so_rcv, 1,
+						    SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
+						ctl = ctlat;
+					} else {
+						break;
+					}
+					nxt_todel = strm->last_sequence_delivered + 1;
+				}
+			}
+			break;
+		}
+		/* sa_ignore FREED_MEMORY */
+		chk = TAILQ_FIRST(&asoc->reasmqueue);
+	} while (chk);
+}
+
+/*
+ * Queue the chunk either right into the socket buffer if it is the next one
+ * to go OR put it in the correct place in the delivery queue.  If we do
+ * append to the so_buf, keep doing so until we are out of order. One big
+ * question still remains, what to do when the socket buffer is FULL??
+ */
+static void
+sctp_queue_data_to_stream(struct sctp_tcb *stcb, struct sctp_association *asoc,
+    struct sctp_queued_to_read *control, int *abort_flag)
+{
+	/*
+	 * FIX-ME maybe? What happens when the ssn wraps? If we are getting
+	 * all the data in one stream this could happen quite rapidly. One
+	 * could use the TSN to keep track of things, but this scheme breaks
+	 * down in the other type of stream useage that could occur. Send a
+	 * single msg to stream 0, send 4Billion messages to stream 1, now
+	 * send a message to stream 0. You have a situation where the TSN
+	 * has wrapped but not in the stream. Is this worth worrying about
+	 * or should we just change our queue sort at the bottom to be by
+	 * TSN.
+	 * 
+	 * Could it also be legal for a peer to send ssn 1 with TSN 2 and ssn 2
+	 * with TSN 1? If the peer is doing some sort of funky TSN/SSN
+	 * assignment this could happen... and I don't see how this would be
+	 * a violation. So for now I am undecided an will leave the sort by
+	 * SSN alone. Maybe a hybred approach is the answer
+	 * 
+	 */
+	struct sctp_stream_in *strm;
+	struct sctp_queued_to_read *at;
+	int queue_needed;
+	uint16_t nxt_todel;
+	struct mbuf *oper;
+
+	queue_needed = 1;
+	asoc->size_on_all_streams += control->length;
+	sctp_ucount_incr(asoc->cnt_on_all_streams);
+	strm = &asoc->strmin[control->sinfo_stream];
+	nxt_todel = strm->last_sequence_delivered + 1;
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) {
+		sctp_log_strm_del(control, NULL, SCTP_STR_LOG_FROM_INTO_STRD);
+	}
+	SCTPDBG(SCTP_DEBUG_INDATA1,
+	    "queue to stream called for ssn:%u lastdel:%u nxt:%u\n",
+	    (uint32_t) control->sinfo_stream,
+	    (uint32_t) strm->last_sequence_delivered,
+	    (uint32_t) nxt_todel);
+	if (compare_with_wrap(strm->last_sequence_delivered,
+	    control->sinfo_ssn, MAX_SEQ) ||
+	    (strm->last_sequence_delivered == control->sinfo_ssn)) {
+		/* The incoming sseq is behind where we last delivered? */
+		SCTPDBG(SCTP_DEBUG_INDATA1, "Duplicate S-SEQ:%d delivered:%d from peer, Abort  association\n",
+		    control->sinfo_ssn, strm->last_sequence_delivered);
+protocol_error:
+		/*
+		 * throw it in the stream so it gets cleaned up in
+		 * association destruction
+		 */
+		TAILQ_INSERT_HEAD(&strm->inqueue, control, next);
+		oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+		    0, M_DONTWAIT, 1, MT_DATA);
+		if (oper) {
+			struct sctp_paramhdr *ph;
+			uint32_t *ippp;
+
+			SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+			    (sizeof(uint32_t) * 3);
+			ph = mtod(oper, struct sctp_paramhdr *);
+			ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+			ph->param_length = htons(SCTP_BUF_LEN(oper));
+			ippp = (uint32_t *) (ph + 1);
+			*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_1);
+			ippp++;
+			*ippp = control->sinfo_tsn;
+			ippp++;
+			*ippp = ((control->sinfo_stream << 16) | control->sinfo_ssn);
+		}
+		stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_1;
+		sctp_abort_an_association(stcb->sctp_ep, stcb,
+		    SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+		*abort_flag = 1;
+		return;
+
+	}
+	if (nxt_todel == control->sinfo_ssn) {
+		/* can be delivered right away? */
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) {
+			sctp_log_strm_del(control, NULL, SCTP_STR_LOG_FROM_IMMED_DEL);
+		}
+		/* EY it wont be queued if it could be delivered directly */
+		queue_needed = 0;
+		asoc->size_on_all_streams -= control->length;
+		sctp_ucount_decr(asoc->cnt_on_all_streams);
+		strm->last_sequence_delivered++;
+
+		sctp_mark_non_revokable(asoc, control->sinfo_tsn);
+		sctp_add_to_readq(stcb->sctp_ep, stcb,
+		    control,
+		    &stcb->sctp_socket->so_rcv, 1,
+		    SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
+		control = TAILQ_FIRST(&strm->inqueue);
+		while (control != NULL) {
+			/* all delivered */
+			nxt_todel = strm->last_sequence_delivered + 1;
+			if (nxt_todel == control->sinfo_ssn) {
+				at = TAILQ_NEXT(control, next);
+				TAILQ_REMOVE(&strm->inqueue, control, next);
+				asoc->size_on_all_streams -= control->length;
+				sctp_ucount_decr(asoc->cnt_on_all_streams);
+				strm->last_sequence_delivered++;
+				/*
+				 * We ignore the return of deliver_data here
+				 * since we always can hold the chunk on the
+				 * d-queue. And we have a finite number that
+				 * can be delivered from the strq.
+				 */
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) {
+					sctp_log_strm_del(control, NULL,
+					    SCTP_STR_LOG_FROM_IMMED_DEL);
+				}
+				sctp_mark_non_revokable(asoc, control->sinfo_tsn);
+				sctp_add_to_readq(stcb->sctp_ep, stcb,
+				    control,
+				    &stcb->sctp_socket->so_rcv, 1,
+				    SCTP_READ_LOCK_NOT_HELD,
+				    SCTP_SO_NOT_LOCKED);
+				control = at;
+				continue;
+			}
+			break;
+		}
+	}
+	if (queue_needed) {
+		/*
+		 * Ok, we did not deliver this guy, find the correct place
+		 * to put it on the queue.
+		 */
+		if ((compare_with_wrap(asoc->cumulative_tsn,
+		    control->sinfo_tsn, MAX_TSN)) ||
+		    (control->sinfo_tsn == asoc->cumulative_tsn)) {
+			goto protocol_error;
+		}
+		if (TAILQ_EMPTY(&strm->inqueue)) {
+			/* Empty queue */
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) {
+				sctp_log_strm_del(control, NULL, SCTP_STR_LOG_FROM_INSERT_HD);
+			}
+			TAILQ_INSERT_HEAD(&strm->inqueue, control, next);
+		} else {
+			TAILQ_FOREACH(at, &strm->inqueue, next) {
+				if (compare_with_wrap(at->sinfo_ssn,
+				    control->sinfo_ssn, MAX_SEQ)) {
+					/*
+					 * one in queue is bigger than the
+					 * new one, insert before this one
+					 */
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) {
+						sctp_log_strm_del(control, at,
+						    SCTP_STR_LOG_FROM_INSERT_MD);
+					}
+					TAILQ_INSERT_BEFORE(at, control, next);
+					break;
+				} else if (at->sinfo_ssn == control->sinfo_ssn) {
+					/*
+					 * Gak, He sent me a duplicate str
+					 * seq number
+					 */
+					/*
+					 * foo bar, I guess I will just free
+					 * this new guy, should we abort
+					 * too? FIX ME MAYBE? Or it COULD be
+					 * that the SSN's have wrapped.
+					 * Maybe I should compare to TSN
+					 * somehow... sigh for now just blow
+					 * away the chunk!
+					 */
+
+					if (control->data)
+						sctp_m_freem(control->data);
+					control->data = NULL;
+					asoc->size_on_all_streams -= control->length;
+					sctp_ucount_decr(asoc->cnt_on_all_streams);
+					if (control->whoFrom) {
+						sctp_free_remote_addr(control->whoFrom);
+						control->whoFrom = NULL;
+					}
+					sctp_free_a_readq(stcb, control);
+					return;
+				} else {
+					if (TAILQ_NEXT(at, next) == NULL) {
+						/*
+						 * We are at the end, insert
+						 * it after this one
+						 */
+						if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) {
+							sctp_log_strm_del(control, at,
+							    SCTP_STR_LOG_FROM_INSERT_TL);
+						}
+						TAILQ_INSERT_AFTER(&strm->inqueue,
+						    at, control, next);
+						break;
+					}
+				}
+			}
+		}
+	}
+}
+
+/*
+ * Returns two things: You get the total size of the deliverable parts of the
+ * first fragmented message on the reassembly queue. And you get a 1 back if
+ * all of the message is ready or a 0 back if the message is still incomplete
+ */
+static int
+sctp_is_all_msg_on_reasm(struct sctp_association *asoc, uint32_t * t_size)
+{
+	struct sctp_tmit_chunk *chk;
+	uint32_t tsn;
+
+	*t_size = 0;
+	chk = TAILQ_FIRST(&asoc->reasmqueue);
+	if (chk == NULL) {
+		/* nothing on the queue */
+		return (0);
+	}
+	if ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == 0) {
+		/* Not a first on the queue */
+		return (0);
+	}
+	tsn = chk->rec.data.TSN_seq;
+	while (chk) {
+		if (tsn != chk->rec.data.TSN_seq) {
+			return (0);
+		}
+		*t_size += chk->send_size;
+		if (chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) {
+			return (1);
+		}
+		tsn++;
+		chk = TAILQ_NEXT(chk, sctp_next);
+	}
+	return (0);
+}
+
+static void
+sctp_deliver_reasm_check(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+	struct sctp_tmit_chunk *chk;
+	uint16_t nxt_todel;
+	uint32_t tsize, pd_point;
+
+doit_again:
+	chk = TAILQ_FIRST(&asoc->reasmqueue);
+	if (chk == NULL) {
+		/* Huh? */
+		asoc->size_on_reasm_queue = 0;
+		asoc->cnt_on_reasm_queue = 0;
+		return;
+	}
+	if (asoc->fragmented_delivery_inprogress == 0) {
+		nxt_todel =
+		    asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered + 1;
+		if ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) &&
+		    (nxt_todel == chk->rec.data.stream_seq ||
+		    (chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED))) {
+			/*
+			 * Yep the first one is here and its ok to deliver
+			 * but should we?
+			 */
+			if (stcb->sctp_socket) {
+				pd_point = min(SCTP_SB_LIMIT_RCV(stcb->sctp_socket),
+				    stcb->sctp_ep->partial_delivery_point);
+			} else {
+				pd_point = stcb->sctp_ep->partial_delivery_point;
+			}
+			if (sctp_is_all_msg_on_reasm(asoc, &tsize) || (tsize >= pd_point)) {
+
+				/*
+				 * Yes, we setup to start reception, by
+				 * backing down the TSN just in case we
+				 * can't deliver. If we
+				 */
+				asoc->fragmented_delivery_inprogress = 1;
+				asoc->tsn_last_delivered =
+				    chk->rec.data.TSN_seq - 1;
+				asoc->str_of_pdapi =
+				    chk->rec.data.stream_number;
+				asoc->ssn_of_pdapi = chk->rec.data.stream_seq;
+				asoc->pdapi_ppid = chk->rec.data.payloadtype;
+				asoc->fragment_flags = chk->rec.data.rcv_flags;
+				sctp_service_reassembly(stcb, asoc);
+			}
+		}
+	} else {
+		/*
+		 * Service re-assembly will deliver stream data queued at
+		 * the end of fragmented delivery.. but it wont know to go
+		 * back and call itself again... we do that here with the
+		 * got doit_again
+		 */
+		sctp_service_reassembly(stcb, asoc);
+		if (asoc->fragmented_delivery_inprogress == 0) {
+			/*
+			 * finished our Fragmented delivery, could be more
+			 * waiting?
+			 */
+			goto doit_again;
+		}
+	}
+}
+
+/*
+ * Dump onto the re-assembly queue, in its proper place. After dumping on the
+ * queue, see if anthing can be delivered. If so pull it off (or as much as
+ * we can. If we run out of space then we must dump what we can and set the
+ * appropriate flag to say we queued what we could.
+ */
+static void
+sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc,
+    struct sctp_tmit_chunk *chk, int *abort_flag)
+{
+	struct mbuf *oper;
+	uint32_t cum_ackp1, last_tsn, prev_tsn, post_tsn;
+	u_char last_flags;
+	struct sctp_tmit_chunk *at, *prev, *next;
+
+	prev = next = NULL;
+	cum_ackp1 = asoc->tsn_last_delivered + 1;
+	if (TAILQ_EMPTY(&asoc->reasmqueue)) {
+		/* This is the first one on the queue */
+		TAILQ_INSERT_HEAD(&asoc->reasmqueue, chk, sctp_next);
+		/*
+		 * we do not check for delivery of anything when only one
+		 * fragment is here
+		 */
+		asoc->size_on_reasm_queue = chk->send_size;
+		sctp_ucount_incr(asoc->cnt_on_reasm_queue);
+		if (chk->rec.data.TSN_seq == cum_ackp1) {
+			if (asoc->fragmented_delivery_inprogress == 0 &&
+			    (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) !=
+			    SCTP_DATA_FIRST_FRAG) {
+				/*
+				 * An empty queue, no delivery inprogress,
+				 * we hit the next one and it does NOT have
+				 * a FIRST fragment mark.
+				 */
+				SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, its not first, no fragmented delivery in progress\n");
+				oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+				    0, M_DONTWAIT, 1, MT_DATA);
+
+				if (oper) {
+					struct sctp_paramhdr *ph;
+					uint32_t *ippp;
+
+					SCTP_BUF_LEN(oper) =
+					    sizeof(struct sctp_paramhdr) +
+					    (sizeof(uint32_t) * 3);
+					ph = mtod(oper, struct sctp_paramhdr *);
+					ph->param_type =
+					    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+					ph->param_length = htons(SCTP_BUF_LEN(oper));
+					ippp = (uint32_t *) (ph + 1);
+					*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_2);
+					ippp++;
+					*ippp = chk->rec.data.TSN_seq;
+					ippp++;
+					*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+
+				}
+				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_2;
+				sctp_abort_an_association(stcb->sctp_ep, stcb,
+				    SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+				*abort_flag = 1;
+			} else if (asoc->fragmented_delivery_inprogress &&
+			    (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == SCTP_DATA_FIRST_FRAG) {
+				/*
+				 * We are doing a partial delivery and the
+				 * NEXT chunk MUST be either the LAST or
+				 * MIDDLE fragment NOT a FIRST
+				 */
+				SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it IS a first and fragmented delivery in progress\n");
+				oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+				    0, M_DONTWAIT, 1, MT_DATA);
+				if (oper) {
+					struct sctp_paramhdr *ph;
+					uint32_t *ippp;
+
+					SCTP_BUF_LEN(oper) =
+					    sizeof(struct sctp_paramhdr) +
+					    (3 * sizeof(uint32_t));
+					ph = mtod(oper, struct sctp_paramhdr *);
+					ph->param_type =
+					    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+					ph->param_length = htons(SCTP_BUF_LEN(oper));
+					ippp = (uint32_t *) (ph + 1);
+					*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_3);
+					ippp++;
+					*ippp = chk->rec.data.TSN_seq;
+					ippp++;
+					*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+				}
+				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_3;
+				sctp_abort_an_association(stcb->sctp_ep, stcb,
+				    SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+				*abort_flag = 1;
+			} else if (asoc->fragmented_delivery_inprogress) {
+				/*
+				 * Here we are ok with a MIDDLE or LAST
+				 * piece
+				 */
+				if (chk->rec.data.stream_number !=
+				    asoc->str_of_pdapi) {
+					/* Got to be the right STR No */
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it IS not same stream number %d vs %d\n",
+					    chk->rec.data.stream_number,
+					    asoc->str_of_pdapi);
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (sizeof(uint32_t) * 3);
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_4);
+						ippp++;
+						*ippp = chk->rec.data.TSN_seq;
+						ippp++;
+						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_4;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+					*abort_flag = 1;
+				} else if ((asoc->fragment_flags & SCTP_DATA_UNORDERED) !=
+					    SCTP_DATA_UNORDERED &&
+				    chk->rec.data.stream_seq != asoc->ssn_of_pdapi) {
+					/* Got to be the right STR Seq */
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it IS not same stream seq %d vs %d\n",
+					    chk->rec.data.stream_seq,
+					    asoc->ssn_of_pdapi);
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (3 * sizeof(uint32_t));
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_5);
+						ippp++;
+						*ippp = chk->rec.data.TSN_seq;
+						ippp++;
+						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_5;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+					*abort_flag = 1;
+				}
+			}
+		}
+		return;
+	}
+	/* Find its place */
+	TAILQ_FOREACH(at, &asoc->reasmqueue, sctp_next) {
+		if (compare_with_wrap(at->rec.data.TSN_seq,
+		    chk->rec.data.TSN_seq, MAX_TSN)) {
+			/*
+			 * one in queue is bigger than the new one, insert
+			 * before this one
+			 */
+			/* A check */
+			asoc->size_on_reasm_queue += chk->send_size;
+			sctp_ucount_incr(asoc->cnt_on_reasm_queue);
+			next = at;
+			TAILQ_INSERT_BEFORE(at, chk, sctp_next);
+			break;
+		} else if (at->rec.data.TSN_seq == chk->rec.data.TSN_seq) {
+			/* Gak, He sent me a duplicate str seq number */
+			/*
+			 * foo bar, I guess I will just free this new guy,
+			 * should we abort too? FIX ME MAYBE? Or it COULD be
+			 * that the SSN's have wrapped. Maybe I should
+			 * compare to TSN somehow... sigh for now just blow
+			 * away the chunk!
+			 */
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			sctp_free_a_chunk(stcb, chk);
+			return;
+		} else {
+			last_flags = at->rec.data.rcv_flags;
+			last_tsn = at->rec.data.TSN_seq;
+			prev = at;
+			if (TAILQ_NEXT(at, sctp_next) == NULL) {
+				/*
+				 * We are at the end, insert it after this
+				 * one
+				 */
+				/* check it first */
+				asoc->size_on_reasm_queue += chk->send_size;
+				sctp_ucount_incr(asoc->cnt_on_reasm_queue);
+				TAILQ_INSERT_AFTER(&asoc->reasmqueue, at, chk, sctp_next);
+				break;
+			}
+		}
+	}
+	/* Now the audits */
+	if (prev) {
+		prev_tsn = chk->rec.data.TSN_seq - 1;
+		if (prev_tsn == prev->rec.data.TSN_seq) {
+			/*
+			 * Ok the one I am dropping onto the end is the
+			 * NEXT. A bit of valdiation here.
+			 */
+			if ((prev->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
+			    SCTP_DATA_FIRST_FRAG ||
+			    (prev->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
+			    SCTP_DATA_MIDDLE_FRAG) {
+				/*
+				 * Insert chk MUST be a MIDDLE or LAST
+				 * fragment
+				 */
+				if ((chk->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
+				    SCTP_DATA_FIRST_FRAG) {
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - It can be a midlle or last but not a first\n");
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it's a FIRST!\n");
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (3 * sizeof(uint32_t));
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_6);
+						ippp++;
+						*ippp = chk->rec.data.TSN_seq;
+						ippp++;
+						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_6;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+					*abort_flag = 1;
+					return;
+				}
+				if (chk->rec.data.stream_number !=
+				    prev->rec.data.stream_number) {
+					/*
+					 * Huh, need the correct STR here,
+					 * they must be the same.
+					 */
+					SCTP_PRINTF("Prev check - Gak, Evil plot, ssn:%d not the same as at:%d\n",
+					    chk->rec.data.stream_number,
+					    prev->rec.data.stream_number);
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (3 * sizeof(uint32_t));
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_7);
+						ippp++;
+						*ippp = chk->rec.data.TSN_seq;
+						ippp++;
+						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_7;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+					*abort_flag = 1;
+					return;
+				}
+				if ((prev->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0 &&
+				    chk->rec.data.stream_seq !=
+				    prev->rec.data.stream_seq) {
+					/*
+					 * Huh, need the correct STR here,
+					 * they must be the same.
+					 */
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - Gak, Evil plot, sseq:%d not the same as at:%d\n",
+					    chk->rec.data.stream_seq,
+					    prev->rec.data.stream_seq);
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (3 * sizeof(uint32_t));
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_8);
+						ippp++;
+						*ippp = chk->rec.data.TSN_seq;
+						ippp++;
+						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_8;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+					*abort_flag = 1;
+					return;
+				}
+			} else if ((prev->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
+			    SCTP_DATA_LAST_FRAG) {
+				/* Insert chk MUST be a FIRST */
+				if ((chk->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) !=
+				    SCTP_DATA_FIRST_FRAG) {
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - Gak, evil plot, its not FIRST and it must be!\n");
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (3 * sizeof(uint32_t));
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_9);
+						ippp++;
+						*ippp = chk->rec.data.TSN_seq;
+						ippp++;
+						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_9;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+					*abort_flag = 1;
+					return;
+				}
+			}
+		}
+	}
+	if (next) {
+		post_tsn = chk->rec.data.TSN_seq + 1;
+		if (post_tsn == next->rec.data.TSN_seq) {
+			/*
+			 * Ok the one I am inserting ahead of is my NEXT
+			 * one. A bit of valdiation here.
+			 */
+			if (next->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) {
+				/* Insert chk MUST be a last fragment */
+				if ((chk->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK)
+				    != SCTP_DATA_LAST_FRAG) {
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Next is FIRST, we must be LAST\n");
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, its not a last!\n");
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (3 * sizeof(uint32_t));
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_10);
+						ippp++;
+						*ippp = chk->rec.data.TSN_seq;
+						ippp++;
+						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_10;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+					*abort_flag = 1;
+					return;
+				}
+			} else if ((next->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
+				    SCTP_DATA_MIDDLE_FRAG ||
+				    (next->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
+			    SCTP_DATA_LAST_FRAG) {
+				/*
+				 * Insert chk CAN be MIDDLE or FIRST NOT
+				 * LAST
+				 */
+				if ((chk->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
+				    SCTP_DATA_LAST_FRAG) {
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Next is a MIDDLE/LAST\n");
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, new prev chunk is a LAST\n");
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (3 * sizeof(uint32_t));
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_11);
+						ippp++;
+						*ippp = chk->rec.data.TSN_seq;
+						ippp++;
+						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_11;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+					*abort_flag = 1;
+					return;
+				}
+				if (chk->rec.data.stream_number !=
+				    next->rec.data.stream_number) {
+					/*
+					 * Huh, need the correct STR here,
+					 * they must be the same.
+					 */
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Gak, Evil plot, ssn:%d not the same as at:%d\n",
+					    chk->rec.data.stream_number,
+					    next->rec.data.stream_number);
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (3 * sizeof(uint32_t));
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_12);
+						ippp++;
+						*ippp = chk->rec.data.TSN_seq;
+						ippp++;
+						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_12;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+					*abort_flag = 1;
+					return;
+				}
+				if ((next->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0 &&
+				    chk->rec.data.stream_seq !=
+				    next->rec.data.stream_seq) {
+					/*
+					 * Huh, need the correct STR here,
+					 * they must be the same.
+					 */
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Gak, Evil plot, sseq:%d not the same as at:%d\n",
+					    chk->rec.data.stream_seq,
+					    next->rec.data.stream_seq);
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (3 * sizeof(uint32_t));
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_13);
+						ippp++;
+						*ippp = chk->rec.data.TSN_seq;
+						ippp++;
+						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_13;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+					*abort_flag = 1;
+					return;
+				}
+			}
+		}
+	}
+	/* Do we need to do some delivery? check */
+	sctp_deliver_reasm_check(stcb, asoc);
+}
+
+/*
+ * This is an unfortunate routine. It checks to make sure a evil guy is not
+ * stuffing us full of bad packet fragments. A broken peer could also do this
+ * but this is doubtful. It is to bad I must worry about evil crackers sigh
+ * :< more cycles.
+ */
+static int
+sctp_does_tsn_belong_to_reasm(struct sctp_association *asoc,
+    uint32_t TSN_seq)
+{
+	struct sctp_tmit_chunk *at;
+	uint32_t tsn_est;
+
+	TAILQ_FOREACH(at, &asoc->reasmqueue, sctp_next) {
+		if (compare_with_wrap(TSN_seq,
+		    at->rec.data.TSN_seq, MAX_TSN)) {
+			/* is it one bigger? */
+			tsn_est = at->rec.data.TSN_seq + 1;
+			if (tsn_est == TSN_seq) {
+				/* yep. It better be a last then */
+				if ((at->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) !=
+				    SCTP_DATA_LAST_FRAG) {
+					/*
+					 * Ok this guy belongs next to a guy
+					 * that is NOT last, it should be a
+					 * middle/last, not a complete
+					 * chunk.
+					 */
+					return (1);
+				} else {
+					/*
+					 * This guy is ok since its a LAST
+					 * and the new chunk is a fully
+					 * self- contained one.
+					 */
+					return (0);
+				}
+			}
+		} else if (TSN_seq == at->rec.data.TSN_seq) {
+			/* Software error since I have a dup? */
+			return (1);
+		} else {
+			/*
+			 * Ok, 'at' is larger than new chunk but does it
+			 * need to be right before it.
+			 */
+			tsn_est = TSN_seq + 1;
+			if (tsn_est == at->rec.data.TSN_seq) {
+				/* Yep, It better be a first */
+				if ((at->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) !=
+				    SCTP_DATA_FIRST_FRAG) {
+					return (1);
+				} else {
+					return (0);
+				}
+			}
+		}
+	}
+	return (0);
+}
+
+
+static int
+sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc,
+    struct mbuf **m, int offset, struct sctp_data_chunk *ch, int chk_length,
+    struct sctp_nets *net, uint32_t * high_tsn, int *abort_flag,
+    int *break_flag, int last_chunk)
+{
+	/* Process a data chunk */
+	/* struct sctp_tmit_chunk *chk; */
+	struct sctp_tmit_chunk *chk;
+	uint32_t tsn, gap;
+	struct mbuf *dmbuf;
+	int indx, the_len;
+	int need_reasm_check = 0;
+	uint16_t strmno, strmseq;
+	struct mbuf *oper;
+	struct sctp_queued_to_read *control;
+	int ordered;
+	uint32_t protocol_id;
+	uint8_t chunk_flags;
+	struct sctp_stream_reset_list *liste;
+
+	chk = NULL;
+	tsn = ntohl(ch->dp.tsn);
+	chunk_flags = ch->ch.chunk_flags;
+	if ((chunk_flags & SCTP_DATA_SACK_IMMEDIATELY) == SCTP_DATA_SACK_IMMEDIATELY) {
+		asoc->send_sack = 1;
+	}
+	protocol_id = ch->dp.protocol_id;
+	ordered = ((chunk_flags & SCTP_DATA_UNORDERED) == 0);
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) {
+		sctp_log_map(tsn, asoc->cumulative_tsn, asoc->highest_tsn_inside_map, SCTP_MAP_TSN_ENTERS);
+	}
+	if (stcb == NULL) {
+		return (0);
+	}
+	SCTP_LTRACE_CHK(stcb->sctp_ep, stcb, ch->ch.chunk_type, tsn);
+	if (compare_with_wrap(asoc->cumulative_tsn, tsn, MAX_TSN) ||
+	    asoc->cumulative_tsn == tsn) {
+		/* It is a duplicate */
+		SCTP_STAT_INCR(sctps_recvdupdata);
+		if (asoc->numduptsns < SCTP_MAX_DUP_TSNS) {
+			/* Record a dup for the next outbound sack */
+			asoc->dup_tsns[asoc->numduptsns] = tsn;
+			asoc->numduptsns++;
+		}
+		asoc->send_sack = 1;
+		return (0);
+	}
+	/* Calculate the number of TSN's between the base and this TSN */
+	SCTP_CALC_TSN_TO_GAP(gap, tsn, asoc->mapping_array_base_tsn);
+	if (gap >= (SCTP_MAPPING_ARRAY << 3)) {
+		/* Can't hold the bit in the mapping at max array, toss it */
+		return (0);
+	}
+	if (gap >= (uint32_t) (asoc->mapping_array_size << 3)) {
+		SCTP_TCB_LOCK_ASSERT(stcb);
+		if (sctp_expand_mapping_array(asoc, gap)) {
+			/* Can't expand, drop it */
+			return (0);
+		}
+	}
+	if (compare_with_wrap(tsn, *high_tsn, MAX_TSN)) {
+		*high_tsn = tsn;
+	}
+	/* See if we have received this one already */
+	if (SCTP_IS_TSN_PRESENT(asoc->mapping_array, gap) ||
+	    SCTP_IS_TSN_PRESENT(asoc->nr_mapping_array, gap)) {
+		SCTP_STAT_INCR(sctps_recvdupdata);
+		if (asoc->numduptsns < SCTP_MAX_DUP_TSNS) {
+			/* Record a dup for the next outbound sack */
+			asoc->dup_tsns[asoc->numduptsns] = tsn;
+			asoc->numduptsns++;
+		}
+		asoc->send_sack = 1;
+		return (0);
+	}
+	/*
+	 * Check to see about the GONE flag, duplicates would cause a sack
+	 * to be sent up above
+	 */
+	if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+	    (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET))
+	    ) {
+		/*
+		 * wait a minute, this guy is gone, there is no longer a
+		 * receiver. Send peer an ABORT!
+		 */
+		struct mbuf *op_err;
+
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
+		sctp_abort_an_association(stcb->sctp_ep, stcb, 0, op_err, SCTP_SO_NOT_LOCKED);
+		*abort_flag = 1;
+		return (0);
+	}
+	/*
+	 * Now before going further we see if there is room. If NOT then we
+	 * MAY let one through only IF this TSN is the one we are waiting
+	 * for on a partial delivery API.
+	 */
+
+	/* now do the tests */
+	if (((asoc->cnt_on_all_streams +
+	    asoc->cnt_on_reasm_queue +
+	    asoc->cnt_msg_on_sb) >= SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue)) ||
+	    (((int)asoc->my_rwnd) <= 0)) {
+		/*
+		 * When we have NO room in the rwnd we check to make sure
+		 * the reader is doing its job...
+		 */
+		if (stcb->sctp_socket->so_rcv.sb_cc) {
+			/* some to read, wake-up */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			struct socket *so;
+
+			so = SCTP_INP_SO(stcb->sctp_ep);
+			atomic_add_int(&stcb->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_SOCKET_LOCK(so, 1);
+			SCTP_TCB_LOCK(stcb);
+			atomic_subtract_int(&stcb->asoc.refcnt, 1);
+			if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+				/* assoc was freed while we were unlocked */
+				SCTP_SOCKET_UNLOCK(so, 1);
+				return (0);
+			}
+#endif
+			sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+		}
+		/* now is it in the mapping array of what we have accepted? */
+		if (compare_with_wrap(tsn, asoc->highest_tsn_inside_map, MAX_TSN) &&
+		    compare_with_wrap(tsn, asoc->highest_tsn_inside_nr_map, MAX_TSN)) {
+			/* Nope not in the valid range dump it */
+			sctp_set_rwnd(stcb, asoc);
+			if ((asoc->cnt_on_all_streams +
+			    asoc->cnt_on_reasm_queue +
+			    asoc->cnt_msg_on_sb) >= SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue)) {
+				SCTP_STAT_INCR(sctps_datadropchklmt);
+			} else {
+				SCTP_STAT_INCR(sctps_datadroprwnd);
+			}
+			indx = *break_flag;
+			*break_flag = 1;
+			return (0);
+		}
+	}
+	strmno = ntohs(ch->dp.stream_id);
+	if (strmno >= asoc->streamincnt) {
+		struct sctp_paramhdr *phdr;
+		struct mbuf *mb;
+
+		mb = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) * 2),
+		    0, M_DONTWAIT, 1, MT_DATA);
+		if (mb != NULL) {
+			/* add some space up front so prepend will work well */
+			SCTP_BUF_RESV_UF(mb, sizeof(struct sctp_chunkhdr));
+			phdr = mtod(mb, struct sctp_paramhdr *);
+			/*
+			 * Error causes are just param's and this one has
+			 * two back to back phdr, one with the error type
+			 * and size, the other with the streamid and a rsvd
+			 */
+			SCTP_BUF_LEN(mb) = (sizeof(struct sctp_paramhdr) * 2);
+			phdr->param_type = htons(SCTP_CAUSE_INVALID_STREAM);
+			phdr->param_length =
+			    htons(sizeof(struct sctp_paramhdr) * 2);
+			phdr++;
+			/* We insert the stream in the type field */
+			phdr->param_type = ch->dp.stream_id;
+			/* And set the length to 0 for the rsvd field */
+			phdr->param_length = 0;
+			sctp_queue_op_err(stcb, mb);
+		}
+		SCTP_STAT_INCR(sctps_badsid);
+		SCTP_TCB_LOCK_ASSERT(stcb);
+		SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, gap);
+		if (compare_with_wrap(tsn, asoc->highest_tsn_inside_nr_map, MAX_TSN)) {
+			asoc->highest_tsn_inside_nr_map = tsn;
+		}
+		if (tsn == (asoc->cumulative_tsn + 1)) {
+			/* Update cum-ack */
+			asoc->cumulative_tsn = tsn;
+		}
+		return (0);
+	}
+	/*
+	 * Before we continue lets validate that we are not being fooled by
+	 * an evil attacker. We can only have 4k chunks based on our TSN
+	 * spread allowed by the mapping array 512 * 8 bits, so there is no
+	 * way our stream sequence numbers could have wrapped. We of course
+	 * only validate the FIRST fragment so the bit must be set.
+	 */
+	strmseq = ntohs(ch->dp.stream_sequence);
+#ifdef SCTP_ASOCLOG_OF_TSNS
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	if (asoc->tsn_in_at >= SCTP_TSN_LOG_SIZE) {
+		asoc->tsn_in_at = 0;
+		asoc->tsn_in_wrapped = 1;
+	}
+	asoc->in_tsnlog[asoc->tsn_in_at].tsn = tsn;
+	asoc->in_tsnlog[asoc->tsn_in_at].strm = strmno;
+	asoc->in_tsnlog[asoc->tsn_in_at].seq = strmseq;
+	asoc->in_tsnlog[asoc->tsn_in_at].sz = chk_length;
+	asoc->in_tsnlog[asoc->tsn_in_at].flgs = chunk_flags;
+	asoc->in_tsnlog[asoc->tsn_in_at].stcb = (void *)stcb;
+	asoc->in_tsnlog[asoc->tsn_in_at].in_pos = asoc->tsn_in_at;
+	asoc->in_tsnlog[asoc->tsn_in_at].in_out = 1;
+	asoc->tsn_in_at++;
+#endif
+	if ((chunk_flags & SCTP_DATA_FIRST_FRAG) &&
+	    (TAILQ_EMPTY(&asoc->resetHead)) &&
+	    (chunk_flags & SCTP_DATA_UNORDERED) == 0 &&
+	    (compare_with_wrap(asoc->strmin[strmno].last_sequence_delivered,
+	    strmseq, MAX_SEQ) ||
+	    asoc->strmin[strmno].last_sequence_delivered == strmseq)) {
+		/* The incoming sseq is behind where we last delivered? */
+		SCTPDBG(SCTP_DEBUG_INDATA1, "EVIL/Broken-Dup S-SEQ:%d delivered:%d from peer, Abort!\n",
+		    strmseq, asoc->strmin[strmno].last_sequence_delivered);
+		oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+		    0, M_DONTWAIT, 1, MT_DATA);
+		if (oper) {
+			struct sctp_paramhdr *ph;
+			uint32_t *ippp;
+
+			SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+			    (3 * sizeof(uint32_t));
+			ph = mtod(oper, struct sctp_paramhdr *);
+			ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+			ph->param_length = htons(SCTP_BUF_LEN(oper));
+			ippp = (uint32_t *) (ph + 1);
+			*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_14);
+			ippp++;
+			*ippp = tsn;
+			ippp++;
+			*ippp = ((strmno << 16) | strmseq);
+
+		}
+		stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_14;
+		sctp_abort_an_association(stcb->sctp_ep, stcb,
+		    SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+		*abort_flag = 1;
+		return (0);
+	}
+	/************************************
+	 * From here down we may find ch-> invalid
+	 * so its a good idea NOT to use it.
+	 *************************************/
+
+	the_len = (chk_length - sizeof(struct sctp_data_chunk));
+	if (last_chunk == 0) {
+		dmbuf = SCTP_M_COPYM(*m,
+		    (offset + sizeof(struct sctp_data_chunk)),
+		    the_len, M_DONTWAIT);
+#ifdef SCTP_MBUF_LOGGING
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
+			struct mbuf *mat;
+
+			mat = dmbuf;
+			while (mat) {
+				if (SCTP_BUF_IS_EXTENDED(mat)) {
+					sctp_log_mb(mat, SCTP_MBUF_ICOPY);
+				}
+				mat = SCTP_BUF_NEXT(mat);
+			}
+		}
+#endif
+	} else {
+		/* We can steal the last chunk */
+		int l_len;
+
+		dmbuf = *m;
+		/* lop off the top part */
+		m_adj(dmbuf, (offset + sizeof(struct sctp_data_chunk)));
+		if (SCTP_BUF_NEXT(dmbuf) == NULL) {
+			l_len = SCTP_BUF_LEN(dmbuf);
+		} else {
+			/*
+			 * need to count up the size hopefully does not hit
+			 * this to often :-0
+			 */
+			struct mbuf *lat;
+
+			l_len = 0;
+			lat = dmbuf;
+			while (lat) {
+				l_len += SCTP_BUF_LEN(lat);
+				lat = SCTP_BUF_NEXT(lat);
+			}
+		}
+		if (l_len > the_len) {
+			/* Trim the end round bytes off  too */
+			m_adj(dmbuf, -(l_len - the_len));
+		}
+	}
+	if (dmbuf == NULL) {
+		SCTP_STAT_INCR(sctps_nomem);
+		return (0);
+	}
+	if ((chunk_flags & SCTP_DATA_NOT_FRAG) == SCTP_DATA_NOT_FRAG &&
+	    asoc->fragmented_delivery_inprogress == 0 &&
+	    TAILQ_EMPTY(&asoc->resetHead) &&
+	    ((ordered == 0) ||
+	    ((uint16_t) (asoc->strmin[strmno].last_sequence_delivered + 1) == strmseq &&
+	    TAILQ_EMPTY(&asoc->strmin[strmno].inqueue)))) {
+		/* Candidate for express delivery */
+		/*
+		 * Its not fragmented, No PD-API is up, Nothing in the
+		 * delivery queue, Its un-ordered OR ordered and the next to
+		 * deliver AND nothing else is stuck on the stream queue,
+		 * And there is room for it in the socket buffer. Lets just
+		 * stuff it up the buffer....
+		 */
+
+		/* It would be nice to avoid this copy if we could :< */
+		sctp_alloc_a_readq(stcb, control);
+		sctp_build_readq_entry_mac(control, stcb, asoc->context, net, tsn,
+		    protocol_id,
+		    stcb->asoc.context,
+		    strmno, strmseq,
+		    chunk_flags,
+		    dmbuf);
+		if (control == NULL) {
+			goto failed_express_del;
+		}
+		SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, gap);
+		if (compare_with_wrap(tsn, asoc->highest_tsn_inside_nr_map, MAX_TSN)) {
+			asoc->highest_tsn_inside_nr_map = tsn;
+		}
+		sctp_add_to_readq(stcb->sctp_ep, stcb,
+		    control, &stcb->sctp_socket->so_rcv,
+		    1, SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
+
+		if ((chunk_flags & SCTP_DATA_UNORDERED) == 0) {
+			/* for ordered, bump what we delivered */
+			asoc->strmin[strmno].last_sequence_delivered++;
+		}
+		SCTP_STAT_INCR(sctps_recvexpress);
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) {
+			sctp_log_strm_del_alt(stcb, tsn, strmseq, strmno,
+			    SCTP_STR_LOG_FROM_EXPRS_DEL);
+		}
+		control = NULL;
+
+		goto finish_express_del;
+	}
+failed_express_del:
+	/* If we reach here this is a new chunk */
+	chk = NULL;
+	control = NULL;
+	/* Express for fragmented delivery? */
+	if ((asoc->fragmented_delivery_inprogress) &&
+	    (stcb->asoc.control_pdapi) &&
+	    (asoc->str_of_pdapi == strmno) &&
+	    (asoc->ssn_of_pdapi == strmseq)
+	    ) {
+		control = stcb->asoc.control_pdapi;
+		if ((chunk_flags & SCTP_DATA_FIRST_FRAG) == SCTP_DATA_FIRST_FRAG) {
+			/* Can't be another first? */
+			goto failed_pdapi_express_del;
+		}
+		if (tsn == (control->sinfo_tsn + 1)) {
+			/* Yep, we can add it on */
+			int end = 0;
+			uint32_t cumack;
+
+			if (chunk_flags & SCTP_DATA_LAST_FRAG) {
+				end = 1;
+			}
+			cumack = asoc->cumulative_tsn;
+			if ((cumack + 1) == tsn)
+				cumack = tsn;
+
+			if (sctp_append_to_readq(stcb->sctp_ep, stcb, control, dmbuf, end,
+			    tsn,
+			    &stcb->sctp_socket->so_rcv)) {
+				SCTP_PRINTF("Append fails end:%d\n", end);
+				goto failed_pdapi_express_del;
+			}
+			SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, gap);
+			if (compare_with_wrap(tsn, asoc->highest_tsn_inside_nr_map, MAX_TSN)) {
+				asoc->highest_tsn_inside_nr_map = tsn;
+			}
+			SCTP_STAT_INCR(sctps_recvexpressm);
+			control->sinfo_tsn = tsn;
+			asoc->tsn_last_delivered = tsn;
+			asoc->fragment_flags = chunk_flags;
+			asoc->tsn_of_pdapi_last_delivered = tsn;
+			asoc->last_flags_delivered = chunk_flags;
+			asoc->last_strm_seq_delivered = strmseq;
+			asoc->last_strm_no_delivered = strmno;
+			if (end) {
+				/* clean up the flags and such */
+				asoc->fragmented_delivery_inprogress = 0;
+				if ((chunk_flags & SCTP_DATA_UNORDERED) == 0) {
+					asoc->strmin[strmno].last_sequence_delivered++;
+				}
+				stcb->asoc.control_pdapi = NULL;
+				if (TAILQ_EMPTY(&asoc->reasmqueue) == 0) {
+					/*
+					 * There could be another message
+					 * ready
+					 */
+					need_reasm_check = 1;
+				}
+			}
+			control = NULL;
+			goto finish_express_del;
+		}
+	}
+failed_pdapi_express_del:
+	control = NULL;
+	if (SCTP_BASE_SYSCTL(sctp_do_drain) == 0) {
+		SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, gap);
+		if (compare_with_wrap(tsn, asoc->highest_tsn_inside_nr_map, MAX_TSN)) {
+			asoc->highest_tsn_inside_nr_map = tsn;
+		}
+	} else {
+		SCTP_SET_TSN_PRESENT(asoc->mapping_array, gap);
+		if (compare_with_wrap(tsn, asoc->highest_tsn_inside_map, MAX_TSN)) {
+			asoc->highest_tsn_inside_map = tsn;
+		}
+	}
+	if ((chunk_flags & SCTP_DATA_NOT_FRAG) != SCTP_DATA_NOT_FRAG) {
+		sctp_alloc_a_chunk(stcb, chk);
+		if (chk == NULL) {
+			/* No memory so we drop the chunk */
+			SCTP_STAT_INCR(sctps_nomem);
+			if (last_chunk == 0) {
+				/* we copied it, free the copy */
+				sctp_m_freem(dmbuf);
+			}
+			return (0);
+		}
+		chk->rec.data.TSN_seq = tsn;
+		chk->no_fr_allowed = 0;
+		chk->rec.data.stream_seq = strmseq;
+		chk->rec.data.stream_number = strmno;
+		chk->rec.data.payloadtype = protocol_id;
+		chk->rec.data.context = stcb->asoc.context;
+		chk->rec.data.doing_fast_retransmit = 0;
+		chk->rec.data.rcv_flags = chunk_flags;
+		chk->asoc = asoc;
+		chk->send_size = the_len;
+		chk->whoTo = net;
+		atomic_add_int(&net->ref_count, 1);
+		chk->data = dmbuf;
+	} else {
+		sctp_alloc_a_readq(stcb, control);
+		sctp_build_readq_entry_mac(control, stcb, asoc->context, net, tsn,
+		    protocol_id,
+		    stcb->asoc.context,
+		    strmno, strmseq,
+		    chunk_flags,
+		    dmbuf);
+		if (control == NULL) {
+			/* No memory so we drop the chunk */
+			SCTP_STAT_INCR(sctps_nomem);
+			if (last_chunk == 0) {
+				/* we copied it, free the copy */
+				sctp_m_freem(dmbuf);
+			}
+			return (0);
+		}
+		control->length = the_len;
+	}
+
+	/* Mark it as received */
+	/* Now queue it where it belongs */
+	if (control != NULL) {
+		/* First a sanity check */
+		if (asoc->fragmented_delivery_inprogress) {
+			/*
+			 * Ok, we have a fragmented delivery in progress if
+			 * this chunk is next to deliver OR belongs in our
+			 * view to the reassembly, the peer is evil or
+			 * broken.
+			 */
+			uint32_t estimate_tsn;
+
+			estimate_tsn = asoc->tsn_last_delivered + 1;
+			if (TAILQ_EMPTY(&asoc->reasmqueue) &&
+			    (estimate_tsn == control->sinfo_tsn)) {
+				/* Evil/Broke peer */
+				sctp_m_freem(control->data);
+				control->data = NULL;
+				if (control->whoFrom) {
+					sctp_free_remote_addr(control->whoFrom);
+					control->whoFrom = NULL;
+				}
+				sctp_free_a_readq(stcb, control);
+				oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+				    0, M_DONTWAIT, 1, MT_DATA);
+				if (oper) {
+					struct sctp_paramhdr *ph;
+					uint32_t *ippp;
+
+					SCTP_BUF_LEN(oper) =
+					    sizeof(struct sctp_paramhdr) +
+					    (3 * sizeof(uint32_t));
+					ph = mtod(oper, struct sctp_paramhdr *);
+					ph->param_type =
+					    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+					ph->param_length = htons(SCTP_BUF_LEN(oper));
+					ippp = (uint32_t *) (ph + 1);
+					*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_15);
+					ippp++;
+					*ippp = tsn;
+					ippp++;
+					*ippp = ((strmno << 16) | strmseq);
+				}
+				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_15;
+				sctp_abort_an_association(stcb->sctp_ep, stcb,
+				    SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+				*abort_flag = 1;
+				return (0);
+			} else {
+				if (sctp_does_tsn_belong_to_reasm(asoc, control->sinfo_tsn)) {
+					sctp_m_freem(control->data);
+					control->data = NULL;
+					if (control->whoFrom) {
+						sctp_free_remote_addr(control->whoFrom);
+						control->whoFrom = NULL;
+					}
+					sctp_free_a_readq(stcb, control);
+
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (3 * sizeof(uint32_t));
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_16);
+						ippp++;
+						*ippp = tsn;
+						ippp++;
+						*ippp = ((strmno << 16) | strmseq);
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_16;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+					*abort_flag = 1;
+					return (0);
+				}
+			}
+		} else {
+			/* No PDAPI running */
+			if (!TAILQ_EMPTY(&asoc->reasmqueue)) {
+				/*
+				 * Reassembly queue is NOT empty validate
+				 * that this tsn does not need to be in
+				 * reasembly queue. If it does then our peer
+				 * is broken or evil.
+				 */
+				if (sctp_does_tsn_belong_to_reasm(asoc, control->sinfo_tsn)) {
+					sctp_m_freem(control->data);
+					control->data = NULL;
+					if (control->whoFrom) {
+						sctp_free_remote_addr(control->whoFrom);
+						control->whoFrom = NULL;
+					}
+					sctp_free_a_readq(stcb, control);
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (3 * sizeof(uint32_t));
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_17);
+						ippp++;
+						*ippp = tsn;
+						ippp++;
+						*ippp = ((strmno << 16) | strmseq);
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_17;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+					*abort_flag = 1;
+					return (0);
+				}
+			}
+		}
+		/* ok, if we reach here we have passed the sanity checks */
+		if (chunk_flags & SCTP_DATA_UNORDERED) {
+			/* queue directly into socket buffer */
+			sctp_mark_non_revokable(asoc, control->sinfo_tsn);
+			sctp_add_to_readq(stcb->sctp_ep, stcb,
+			    control,
+			    &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
+		} else {
+			/*
+			 * Special check for when streams are resetting. We
+			 * could be more smart about this and check the
+			 * actual stream to see if it is not being reset..
+			 * that way we would not create a HOLB when amongst
+			 * streams being reset and those not being reset.
+			 * 
+			 * We take complete messages that have a stream reset
+			 * intervening (aka the TSN is after where our
+			 * cum-ack needs to be) off and put them on a
+			 * pending_reply_queue. The reassembly ones we do
+			 * not have to worry about since they are all sorted
+			 * and proceessed by TSN order. It is only the
+			 * singletons I must worry about.
+			 */
+			if (((liste = TAILQ_FIRST(&asoc->resetHead)) != NULL) &&
+			    ((compare_with_wrap(tsn, liste->tsn, MAX_TSN)))
+			    ) {
+				/*
+				 * yep its past where we need to reset... go
+				 * ahead and queue it.
+				 */
+				if (TAILQ_EMPTY(&asoc->pending_reply_queue)) {
+					/* first one on */
+					TAILQ_INSERT_TAIL(&asoc->pending_reply_queue, control, next);
+				} else {
+					struct sctp_queued_to_read *ctlOn;
+					unsigned char inserted = 0;
+
+					ctlOn = TAILQ_FIRST(&asoc->pending_reply_queue);
+					while (ctlOn) {
+						if (compare_with_wrap(control->sinfo_tsn,
+						    ctlOn->sinfo_tsn, MAX_TSN)) {
+							ctlOn = TAILQ_NEXT(ctlOn, next);
+						} else {
+							/* found it */
+							TAILQ_INSERT_BEFORE(ctlOn, control, next);
+							inserted = 1;
+							break;
+						}
+					}
+					if (inserted == 0) {
+						/*
+						 * must be put at end, use
+						 * prevP (all setup from
+						 * loop) to setup nextP.
+						 */
+						TAILQ_INSERT_TAIL(&asoc->pending_reply_queue, control, next);
+					}
+				}
+			} else {
+				sctp_queue_data_to_stream(stcb, asoc, control, abort_flag);
+				if (*abort_flag) {
+					return (0);
+				}
+			}
+		}
+	} else {
+		/* Into the re-assembly queue */
+		sctp_queue_data_for_reasm(stcb, asoc, chk, abort_flag);
+		if (*abort_flag) {
+			/*
+			 * the assoc is now gone and chk was put onto the
+			 * reasm queue, which has all been freed.
+			 */
+			*m = NULL;
+			return (0);
+		}
+	}
+finish_express_del:
+	if (tsn == (asoc->cumulative_tsn + 1)) {
+		/* Update cum-ack */
+		asoc->cumulative_tsn = tsn;
+	}
+	if (last_chunk) {
+		*m = NULL;
+	}
+	if (ordered) {
+		SCTP_STAT_INCR_COUNTER64(sctps_inorderchunks);
+	} else {
+		SCTP_STAT_INCR_COUNTER64(sctps_inunorderchunks);
+	}
+	SCTP_STAT_INCR(sctps_recvdata);
+	/* Set it present please */
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) {
+		sctp_log_strm_del_alt(stcb, tsn, strmseq, strmno, SCTP_STR_LOG_FROM_MARK_TSN);
+	}
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) {
+		sctp_log_map(asoc->mapping_array_base_tsn, asoc->cumulative_tsn,
+		    asoc->highest_tsn_inside_map, SCTP_MAP_PREPARE_SLIDE);
+	}
+	/* check the special flag for stream resets */
+	if (((liste = TAILQ_FIRST(&asoc->resetHead)) != NULL) &&
+	    ((compare_with_wrap(asoc->cumulative_tsn, liste->tsn, MAX_TSN)) ||
+	    (asoc->cumulative_tsn == liste->tsn))
+	    ) {
+		/*
+		 * we have finished working through the backlogged TSN's now
+		 * time to reset streams. 1: call reset function. 2: free
+		 * pending_reply space 3: distribute any chunks in
+		 * pending_reply_queue.
+		 */
+		struct sctp_queued_to_read *ctl;
+
+		sctp_reset_in_stream(stcb, liste->number_entries, liste->req.list_of_streams);
+		TAILQ_REMOVE(&asoc->resetHead, liste, next_resp);
+		SCTP_FREE(liste, SCTP_M_STRESET);
+		/* sa_ignore FREED_MEMORY */
+		liste = TAILQ_FIRST(&asoc->resetHead);
+		ctl = TAILQ_FIRST(&asoc->pending_reply_queue);
+		if (ctl && (liste == NULL)) {
+			/* All can be removed */
+			while (ctl) {
+				TAILQ_REMOVE(&asoc->pending_reply_queue, ctl, next);
+				sctp_queue_data_to_stream(stcb, asoc, ctl, abort_flag);
+				if (*abort_flag) {
+					return (0);
+				}
+				ctl = TAILQ_FIRST(&asoc->pending_reply_queue);
+			}
+		} else if (ctl) {
+			/* more than one in queue */
+			while (!compare_with_wrap(ctl->sinfo_tsn, liste->tsn, MAX_TSN)) {
+				/*
+				 * if ctl->sinfo_tsn is <= liste->tsn we can
+				 * process it which is the NOT of
+				 * ctl->sinfo_tsn > liste->tsn
+				 */
+				TAILQ_REMOVE(&asoc->pending_reply_queue, ctl, next);
+				sctp_queue_data_to_stream(stcb, asoc, ctl, abort_flag);
+				if (*abort_flag) {
+					return (0);
+				}
+				ctl = TAILQ_FIRST(&asoc->pending_reply_queue);
+			}
+		}
+		/*
+		 * Now service re-assembly to pick up anything that has been
+		 * held on reassembly queue?
+		 */
+		sctp_deliver_reasm_check(stcb, asoc);
+		need_reasm_check = 0;
+	}
+	if (need_reasm_check) {
+		/* Another one waits ? */
+		sctp_deliver_reasm_check(stcb, asoc);
+	}
+	return (1);
+}
+
+int8_t sctp_map_lookup_tab[256] = {
+	0, 1, 0, 2, 0, 1, 0, 3,
+	0, 1, 0, 2, 0, 1, 0, 4,
+	0, 1, 0, 2, 0, 1, 0, 3,
+	0, 1, 0, 2, 0, 1, 0, 5,
+	0, 1, 0, 2, 0, 1, 0, 3,
+	0, 1, 0, 2, 0, 1, 0, 4,
+	0, 1, 0, 2, 0, 1, 0, 3,
+	0, 1, 0, 2, 0, 1, 0, 6,
+	0, 1, 0, 2, 0, 1, 0, 3,
+	0, 1, 0, 2, 0, 1, 0, 4,
+	0, 1, 0, 2, 0, 1, 0, 3,
+	0, 1, 0, 2, 0, 1, 0, 5,
+	0, 1, 0, 2, 0, 1, 0, 3,
+	0, 1, 0, 2, 0, 1, 0, 4,
+	0, 1, 0, 2, 0, 1, 0, 3,
+	0, 1, 0, 2, 0, 1, 0, 7,
+	0, 1, 0, 2, 0, 1, 0, 3,
+	0, 1, 0, 2, 0, 1, 0, 4,
+	0, 1, 0, 2, 0, 1, 0, 3,
+	0, 1, 0, 2, 0, 1, 0, 5,
+	0, 1, 0, 2, 0, 1, 0, 3,
+	0, 1, 0, 2, 0, 1, 0, 4,
+	0, 1, 0, 2, 0, 1, 0, 3,
+	0, 1, 0, 2, 0, 1, 0, 6,
+	0, 1, 0, 2, 0, 1, 0, 3,
+	0, 1, 0, 2, 0, 1, 0, 4,
+	0, 1, 0, 2, 0, 1, 0, 3,
+	0, 1, 0, 2, 0, 1, 0, 5,
+	0, 1, 0, 2, 0, 1, 0, 3,
+	0, 1, 0, 2, 0, 1, 0, 4,
+	0, 1, 0, 2, 0, 1, 0, 3,
+	0, 1, 0, 2, 0, 1, 0, 8
+};
+
+
+void
+sctp_slide_mapping_arrays(struct sctp_tcb *stcb)
+{
+	/*
+	 * Now we also need to check the mapping array in a couple of ways.
+	 * 1) Did we move the cum-ack point?
+	 * 
+	 * When you first glance at this you might think that all entries that
+	 * make up the postion of the cum-ack would be in the nr-mapping
+	 * array only.. i.e. things up to the cum-ack are always
+	 * deliverable. Thats true with one exception, when its a fragmented
+	 * message we may not deliver the data until some threshold (or all
+	 * of it) is in place. So we must OR the nr_mapping_array and
+	 * mapping_array to get a true picture of the cum-ack.
+	 */
+	struct sctp_association *asoc;
+	int at;
+	uint8_t val;
+	int slide_from, slide_end, lgap, distance;
+	uint32_t old_cumack, old_base, old_highest, highest_tsn;
+
+	asoc = &stcb->asoc;
+	at = 0;
+
+	old_cumack = asoc->cumulative_tsn;
+	old_base = asoc->mapping_array_base_tsn;
+	old_highest = asoc->highest_tsn_inside_map;
+	/*
+	 * We could probably improve this a small bit by calculating the
+	 * offset of the current cum-ack as the starting point.
+	 */
+	at = 0;
+	for (slide_from = 0; slide_from < stcb->asoc.mapping_array_size; slide_from++) {
+		val = asoc->nr_mapping_array[slide_from] | asoc->mapping_array[slide_from];
+		if (val == 0xff) {
+			at += 8;
+		} else {
+			/* there is a 0 bit */
+			at += sctp_map_lookup_tab[val];
+			break;
+		}
+	}
+	asoc->cumulative_tsn = asoc->mapping_array_base_tsn + (at - 1);
+
+	if (compare_with_wrap(asoc->cumulative_tsn, asoc->highest_tsn_inside_map, MAX_TSN) &&
+	    compare_with_wrap(asoc->cumulative_tsn, asoc->highest_tsn_inside_nr_map, MAX_TSN)) {
+#ifdef INVARIANTS
+		panic("huh, cumack 0x%x greater than high-tsn 0x%x in map",
+		    asoc->cumulative_tsn, asoc->highest_tsn_inside_map);
+#else
+		SCTP_PRINTF("huh, cumack 0x%x greater than high-tsn 0x%x in map - should panic?\n",
+		    asoc->cumulative_tsn, asoc->highest_tsn_inside_map);
+		sctp_print_mapping_array(asoc);
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) {
+			sctp_log_map(0, 6, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
+		}
+		asoc->highest_tsn_inside_map = asoc->cumulative_tsn;
+		asoc->highest_tsn_inside_nr_map = asoc->cumulative_tsn;
+#endif
+	}
+	if (compare_with_wrap(asoc->highest_tsn_inside_nr_map,
+	    asoc->highest_tsn_inside_map,
+	    MAX_TSN)) {
+		highest_tsn = asoc->highest_tsn_inside_nr_map;
+	} else {
+		highest_tsn = asoc->highest_tsn_inside_map;
+	}
+	if ((asoc->cumulative_tsn == highest_tsn) && (at >= 8)) {
+		/* The complete array was completed by a single FR */
+		/* highest becomes the cum-ack */
+		int clr;
+
+#ifdef INVARIANTS
+		unsigned int i;
+
+#endif
+
+		/* clear the array */
+		clr = ((at + 7) >> 3);
+		if (clr > asoc->mapping_array_size) {
+			clr = asoc->mapping_array_size;
+		}
+		memset(asoc->mapping_array, 0, clr);
+		memset(asoc->nr_mapping_array, 0, clr);
+#ifdef INVARIANTS
+		for (i = 0; i < asoc->mapping_array_size; i++) {
+			if ((asoc->mapping_array[i]) || (asoc->nr_mapping_array[i])) {
+				printf("Error Mapping array's not clean at clear\n");
+				sctp_print_mapping_array(asoc);
+			}
+		}
+#endif
+		asoc->mapping_array_base_tsn = asoc->cumulative_tsn + 1;
+		asoc->highest_tsn_inside_nr_map = asoc->highest_tsn_inside_map = asoc->cumulative_tsn;
+	} else if (at >= 8) {
+		/* we can slide the mapping array down */
+		/* slide_from holds where we hit the first NON 0xff byte */
+
+		/*
+		 * now calculate the ceiling of the move using our highest
+		 * TSN value
+		 */
+		SCTP_CALC_TSN_TO_GAP(lgap, highest_tsn, asoc->mapping_array_base_tsn);
+		slide_end = (lgap >> 3);
+		if (slide_end < slide_from) {
+			sctp_print_mapping_array(asoc);
+#ifdef INVARIANTS
+			panic("impossible slide");
+#else
+			printf("impossible slide lgap:%x slide_end:%x slide_from:%x? at:%d\n",
+			    lgap, slide_end, slide_from, at);
+			return;
+#endif
+		}
+		if (slide_end > asoc->mapping_array_size) {
+#ifdef INVARIANTS
+			panic("would overrun buffer");
+#else
+			printf("Gak, would have overrun map end:%d slide_end:%d\n",
+			    asoc->mapping_array_size, slide_end);
+			slide_end = asoc->mapping_array_size;
+#endif
+		}
+		distance = (slide_end - slide_from) + 1;
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) {
+			sctp_log_map(old_base, old_cumack, old_highest,
+			    SCTP_MAP_PREPARE_SLIDE);
+			sctp_log_map((uint32_t) slide_from, (uint32_t) slide_end,
+			    (uint32_t) lgap, SCTP_MAP_SLIDE_FROM);
+		}
+		if (distance + slide_from > asoc->mapping_array_size ||
+		    distance < 0) {
+			/*
+			 * Here we do NOT slide forward the array so that
+			 * hopefully when more data comes in to fill it up
+			 * we will be able to slide it forward. Really I
+			 * don't think this should happen :-0
+			 */
+
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) {
+				sctp_log_map((uint32_t) distance, (uint32_t) slide_from,
+				    (uint32_t) asoc->mapping_array_size,
+				    SCTP_MAP_SLIDE_NONE);
+			}
+		} else {
+			int ii;
+
+			for (ii = 0; ii < distance; ii++) {
+				asoc->mapping_array[ii] = asoc->mapping_array[slide_from + ii];
+				asoc->nr_mapping_array[ii] = asoc->nr_mapping_array[slide_from + ii];
+
+			}
+			for (ii = distance; ii < asoc->mapping_array_size; ii++) {
+				asoc->mapping_array[ii] = 0;
+				asoc->nr_mapping_array[ii] = 0;
+			}
+			if (asoc->highest_tsn_inside_map + 1 == asoc->mapping_array_base_tsn) {
+				asoc->highest_tsn_inside_map += (slide_from << 3);
+			}
+			if (asoc->highest_tsn_inside_nr_map + 1 == asoc->mapping_array_base_tsn) {
+				asoc->highest_tsn_inside_nr_map += (slide_from << 3);
+			}
+			asoc->mapping_array_base_tsn += (slide_from << 3);
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) {
+				sctp_log_map(asoc->mapping_array_base_tsn,
+				    asoc->cumulative_tsn, asoc->highest_tsn_inside_map,
+				    SCTP_MAP_SLIDE_RESULT);
+			}
+		}
+	}
+}
+
+
+void
+sctp_sack_check(struct sctp_tcb *stcb, int was_a_gap, int *abort_flag)
+{
+	struct sctp_association *asoc;
+	uint32_t highest_tsn;
+
+	asoc = &stcb->asoc;
+	if (compare_with_wrap(asoc->highest_tsn_inside_nr_map,
+	    asoc->highest_tsn_inside_map,
+	    MAX_TSN)) {
+		highest_tsn = asoc->highest_tsn_inside_nr_map;
+	} else {
+		highest_tsn = asoc->highest_tsn_inside_map;
+	}
+
+	/*
+	 * Now we need to see if we need to queue a sack or just start the
+	 * timer (if allowed).
+	 */
+	if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) {
+		/*
+		 * Ok special case, in SHUTDOWN-SENT case. here we maker
+		 * sure SACK timer is off and instead send a SHUTDOWN and a
+		 * SACK
+		 */
+		if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
+			sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
+			    stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_INDATA + SCTP_LOC_18);
+		}
+		sctp_send_shutdown(stcb, stcb->asoc.primary_destination);
+		sctp_send_sack(stcb);
+	} else {
+		int is_a_gap;
+
+		/* is there a gap now ? */
+		is_a_gap = compare_with_wrap(highest_tsn, stcb->asoc.cumulative_tsn, MAX_TSN);
+
+		/*
+		 * CMT DAC algorithm: increase number of packets received
+		 * since last ack
+		 */
+		stcb->asoc.cmt_dac_pkts_rcvd++;
+
+		if ((stcb->asoc.send_sack == 1) ||	/* We need to send a
+							 * SACK */
+		    ((was_a_gap) && (is_a_gap == 0)) ||	/* was a gap, but no
+							 * longer is one */
+		    (stcb->asoc.numduptsns) ||	/* we have dup's */
+		    (is_a_gap) ||	/* is still a gap */
+		    (stcb->asoc.delayed_ack == 0) ||	/* Delayed sack disabled */
+		    (stcb->asoc.data_pkts_seen >= stcb->asoc.sack_freq)	/* hit limit of pkts */
+		    ) {
+
+			if ((stcb->asoc.sctp_cmt_on_off == 1) &&
+			    (SCTP_BASE_SYSCTL(sctp_cmt_use_dac)) &&
+			    (stcb->asoc.send_sack == 0) &&
+			    (stcb->asoc.numduptsns == 0) &&
+			    (stcb->asoc.delayed_ack) &&
+			    (!SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer))) {
+
+				/*
+				 * CMT DAC algorithm: With CMT, delay acks
+				 * even in the face of
+				 * 
+				 * reordering. Therefore, if acks that do not
+				 * have to be sent because of the above
+				 * reasons, will be delayed. That is, acks
+				 * that would have been sent due to gap
+				 * reports will be delayed with DAC. Start
+				 * the delayed ack timer.
+				 */
+				sctp_timer_start(SCTP_TIMER_TYPE_RECV,
+				    stcb->sctp_ep, stcb, NULL);
+			} else {
+				/*
+				 * Ok we must build a SACK since the timer
+				 * is pending, we got our first packet OR
+				 * there are gaps or duplicates.
+				 */
+				(void)SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer);
+				sctp_send_sack(stcb);
+			}
+		} else {
+			if (!SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
+				sctp_timer_start(SCTP_TIMER_TYPE_RECV,
+				    stcb->sctp_ep, stcb, NULL);
+			}
+		}
+	}
+}
+
+void
+sctp_service_queues(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+	struct sctp_tmit_chunk *chk;
+	uint32_t tsize, pd_point;
+	uint16_t nxt_todel;
+
+	if (asoc->fragmented_delivery_inprogress) {
+		sctp_service_reassembly(stcb, asoc);
+	}
+	/* Can we proceed further, i.e. the PD-API is complete */
+	if (asoc->fragmented_delivery_inprogress) {
+		/* no */
+		return;
+	}
+	/*
+	 * Now is there some other chunk I can deliver from the reassembly
+	 * queue.
+	 */
+doit_again:
+	chk = TAILQ_FIRST(&asoc->reasmqueue);
+	if (chk == NULL) {
+		asoc->size_on_reasm_queue = 0;
+		asoc->cnt_on_reasm_queue = 0;
+		return;
+	}
+	nxt_todel = asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered + 1;
+	if ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) &&
+	    ((nxt_todel == chk->rec.data.stream_seq) ||
+	    (chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED))) {
+		/*
+		 * Yep the first one is here. We setup to start reception,
+		 * by backing down the TSN just in case we can't deliver.
+		 */
+
+		/*
+		 * Before we start though either all of the message should
+		 * be here or the socket buffer max or nothing on the
+		 * delivery queue and something can be delivered.
+		 */
+		if (stcb->sctp_socket) {
+			pd_point = min(SCTP_SB_LIMIT_RCV(stcb->sctp_socket),
+			    stcb->sctp_ep->partial_delivery_point);
+		} else {
+			pd_point = stcb->sctp_ep->partial_delivery_point;
+		}
+		if (sctp_is_all_msg_on_reasm(asoc, &tsize) || (tsize >= pd_point)) {
+			asoc->fragmented_delivery_inprogress = 1;
+			asoc->tsn_last_delivered = chk->rec.data.TSN_seq - 1;
+			asoc->str_of_pdapi = chk->rec.data.stream_number;
+			asoc->ssn_of_pdapi = chk->rec.data.stream_seq;
+			asoc->pdapi_ppid = chk->rec.data.payloadtype;
+			asoc->fragment_flags = chk->rec.data.rcv_flags;
+			sctp_service_reassembly(stcb, asoc);
+			if (asoc->fragmented_delivery_inprogress == 0) {
+				goto doit_again;
+			}
+		}
+	}
+}
+
+int
+sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length,
+    struct sctphdr *sh, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net, uint32_t * high_tsn)
+{
+	struct sctp_data_chunk *ch, chunk_buf;
+	struct sctp_association *asoc;
+	int num_chunks = 0;	/* number of control chunks processed */
+	int stop_proc = 0;
+	int chk_length, break_flag, last_chunk;
+	int abort_flag = 0, was_a_gap;
+	struct mbuf *m;
+	uint32_t highest_tsn;
+
+	/* set the rwnd */
+	sctp_set_rwnd(stcb, &stcb->asoc);
+
+	m = *mm;
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	asoc = &stcb->asoc;
+	if (compare_with_wrap(asoc->highest_tsn_inside_nr_map, asoc->highest_tsn_inside_map, MAX_TSN)) {
+		highest_tsn = asoc->highest_tsn_inside_nr_map;
+	} else {
+		highest_tsn = asoc->highest_tsn_inside_map;
+	}
+	was_a_gap = compare_with_wrap(highest_tsn, stcb->asoc.cumulative_tsn, MAX_TSN);
+	/*
+	 * setup where we got the last DATA packet from for any SACK that
+	 * may need to go out. Don't bump the net. This is done ONLY when a
+	 * chunk is assigned.
+	 */
+	asoc->last_data_chunk_from = net;
+
+	/*-
+	 * Now before we proceed we must figure out if this is a wasted
+	 * cluster... i.e. it is a small packet sent in and yet the driver
+	 * underneath allocated a full cluster for it. If so we must copy it
+	 * to a smaller mbuf and free up the cluster mbuf. This will help
+	 * with cluster starvation. Note for __Panda__ we don't do this
+	 * since it has clusters all the way down to 64 bytes.
+	 */
+	if (SCTP_BUF_LEN(m) < (long)MLEN && SCTP_BUF_NEXT(m) == NULL) {
+		/* we only handle mbufs that are singletons.. not chains */
+		m = sctp_get_mbuf_for_msg(SCTP_BUF_LEN(m), 0, M_DONTWAIT, 1, MT_DATA);
+		if (m) {
+			/* ok lets see if we can copy the data up */
+			caddr_t *from, *to;
+
+			/* get the pointers and copy */
+			to = mtod(m, caddr_t *);
+			from = mtod((*mm), caddr_t *);
+			memcpy(to, from, SCTP_BUF_LEN((*mm)));
+			/* copy the length and free up the old */
+			SCTP_BUF_LEN(m) = SCTP_BUF_LEN((*mm));
+			sctp_m_freem(*mm);
+			/* sucess, back copy */
+			*mm = m;
+		} else {
+			/* We are in trouble in the mbuf world .. yikes */
+			m = *mm;
+		}
+	}
+	/* get pointer to the first chunk header */
+	ch = (struct sctp_data_chunk *)sctp_m_getptr(m, *offset,
+	    sizeof(struct sctp_data_chunk), (uint8_t *) & chunk_buf);
+	if (ch == NULL) {
+		return (1);
+	}
+	/*
+	 * process all DATA chunks...
+	 */
+	*high_tsn = asoc->cumulative_tsn;
+	break_flag = 0;
+	asoc->data_pkts_seen++;
+	while (stop_proc == 0) {
+		/* validate chunk length */
+		chk_length = ntohs(ch->ch.chunk_length);
+		if (length - *offset < chk_length) {
+			/* all done, mutulated chunk */
+			stop_proc = 1;
+			break;
+		}
+		if (ch->ch.chunk_type == SCTP_DATA) {
+			if ((size_t)chk_length < sizeof(struct sctp_data_chunk) + 1) {
+				/*
+				 * Need to send an abort since we had a
+				 * invalid data chunk.
+				 */
+				struct mbuf *op_err;
+
+				op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 2 * sizeof(uint32_t)),
+				    0, M_DONTWAIT, 1, MT_DATA);
+
+				if (op_err) {
+					struct sctp_paramhdr *ph;
+					uint32_t *ippp;
+
+					SCTP_BUF_LEN(op_err) = sizeof(struct sctp_paramhdr) +
+					    (2 * sizeof(uint32_t));
+					ph = mtod(op_err, struct sctp_paramhdr *);
+					ph->param_type =
+					    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+					ph->param_length = htons(SCTP_BUF_LEN(op_err));
+					ippp = (uint32_t *) (ph + 1);
+					*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_19);
+					ippp++;
+					*ippp = asoc->cumulative_tsn;
+
+				}
+				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_19;
+				sctp_abort_association(inp, stcb, m, iphlen, sh,
+				    op_err, 0, net->port);
+				return (2);
+			}
+#ifdef SCTP_AUDITING_ENABLED
+			sctp_audit_log(0xB1, 0);
+#endif
+			if (SCTP_SIZE32(chk_length) == (length - *offset)) {
+				last_chunk = 1;
+			} else {
+				last_chunk = 0;
+			}
+			if (sctp_process_a_data_chunk(stcb, asoc, mm, *offset, ch,
+			    chk_length, net, high_tsn, &abort_flag, &break_flag,
+			    last_chunk)) {
+				num_chunks++;
+			}
+			if (abort_flag)
+				return (2);
+
+			if (break_flag) {
+				/*
+				 * Set because of out of rwnd space and no
+				 * drop rep space left.
+				 */
+				stop_proc = 1;
+				break;
+			}
+		} else {
+			/* not a data chunk in the data region */
+			switch (ch->ch.chunk_type) {
+			case SCTP_INITIATION:
+			case SCTP_INITIATION_ACK:
+			case SCTP_SELECTIVE_ACK:
+			case SCTP_NR_SELECTIVE_ACK:	/* EY */
+			case SCTP_HEARTBEAT_REQUEST:
+			case SCTP_HEARTBEAT_ACK:
+			case SCTP_ABORT_ASSOCIATION:
+			case SCTP_SHUTDOWN:
+			case SCTP_SHUTDOWN_ACK:
+			case SCTP_OPERATION_ERROR:
+			case SCTP_COOKIE_ECHO:
+			case SCTP_COOKIE_ACK:
+			case SCTP_ECN_ECHO:
+			case SCTP_ECN_CWR:
+			case SCTP_SHUTDOWN_COMPLETE:
+			case SCTP_AUTHENTICATION:
+			case SCTP_ASCONF_ACK:
+			case SCTP_PACKET_DROPPED:
+			case SCTP_STREAM_RESET:
+			case SCTP_FORWARD_CUM_TSN:
+			case SCTP_ASCONF:
+				/*
+				 * Now, what do we do with KNOWN chunks that
+				 * are NOT in the right place?
+				 * 
+				 * For now, I do nothing but ignore them. We
+				 * may later want to add sysctl stuff to
+				 * switch out and do either an ABORT() or
+				 * possibly process them.
+				 */
+				if (SCTP_BASE_SYSCTL(sctp_strict_data_order)) {
+					struct mbuf *op_err;
+
+					op_err = sctp_generate_invmanparam(SCTP_CAUSE_PROTOCOL_VIOLATION);
+					sctp_abort_association(inp, stcb, m, iphlen, sh, op_err, 0, net->port);
+					return (2);
+				}
+				break;
+			default:
+				/* unknown chunk type, use bit rules */
+				if (ch->ch.chunk_type & 0x40) {
+					/* Add a error report to the queue */
+					struct mbuf *merr;
+					struct sctp_paramhdr *phd;
+
+					merr = sctp_get_mbuf_for_msg(sizeof(*phd), 0, M_DONTWAIT, 1, MT_DATA);
+					if (merr) {
+						phd = mtod(merr, struct sctp_paramhdr *);
+						/*
+						 * We cheat and use param
+						 * type since we did not
+						 * bother to define a error
+						 * cause struct. They are
+						 * the same basic format
+						 * with different names.
+						 */
+						phd->param_type =
+						    htons(SCTP_CAUSE_UNRECOG_CHUNK);
+						phd->param_length =
+						    htons(chk_length + sizeof(*phd));
+						SCTP_BUF_LEN(merr) = sizeof(*phd);
+						SCTP_BUF_NEXT(merr) = SCTP_M_COPYM(m, *offset,
+						    SCTP_SIZE32(chk_length),
+						    M_DONTWAIT);
+						if (SCTP_BUF_NEXT(merr)) {
+							sctp_queue_op_err(stcb, merr);
+						} else {
+							sctp_m_freem(merr);
+						}
+					}
+				}
+				if ((ch->ch.chunk_type & 0x80) == 0) {
+					/* discard the rest of this packet */
+					stop_proc = 1;
+				}	/* else skip this bad chunk and
+					 * continue... */
+				break;
+			};	/* switch of chunk type */
+		}
+		*offset += SCTP_SIZE32(chk_length);
+		if ((*offset >= length) || stop_proc) {
+			/* no more data left in the mbuf chain */
+			stop_proc = 1;
+			continue;
+		}
+		ch = (struct sctp_data_chunk *)sctp_m_getptr(m, *offset,
+		    sizeof(struct sctp_data_chunk), (uint8_t *) & chunk_buf);
+		if (ch == NULL) {
+			*offset = length;
+			stop_proc = 1;
+			break;
+
+		}
+	}			/* while */
+	if (break_flag) {
+		/*
+		 * we need to report rwnd overrun drops.
+		 */
+		sctp_send_packet_dropped(stcb, net, *mm, iphlen, 0);
+	}
+	if (num_chunks) {
+		/*
+		 * Did we get data, if so update the time for auto-close and
+		 * give peer credit for being alive.
+		 */
+		SCTP_STAT_INCR(sctps_recvpktwithdata);
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
+			sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+			    stcb->asoc.overall_error_count,
+			    0,
+			    SCTP_FROM_SCTP_INDATA,
+			    __LINE__);
+		}
+		stcb->asoc.overall_error_count = 0;
+		(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_last_rcvd);
+	}
+	/* now service all of the reassm queue if needed */
+	if (!(TAILQ_EMPTY(&asoc->reasmqueue)))
+		sctp_service_queues(stcb, asoc);
+
+	if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) {
+		/* Assure that we ack right away */
+		stcb->asoc.send_sack = 1;
+	}
+	/* Start a sack timer or QUEUE a SACK for sending */
+	sctp_sack_check(stcb, was_a_gap, &abort_flag);
+	if (abort_flag)
+		return (2);
+
+	return (0);
+}
+
+static int
+sctp_process_segment_range(struct sctp_tcb *stcb, struct sctp_tmit_chunk **p_tp1, uint32_t last_tsn,
+    uint16_t frag_strt, uint16_t frag_end, int nr_sacking,
+    int *num_frs,
+    uint32_t * biggest_newly_acked_tsn,
+    uint32_t * this_sack_lowest_newack,
+    int *ecn_seg_sums)
+{
+	struct sctp_tmit_chunk *tp1;
+	unsigned int theTSN;
+	int j, wake_him = 0, circled = 0;
+
+	/* Recover the tp1 we last saw */
+	tp1 = *p_tp1;
+	if (tp1 == NULL) {
+		tp1 = TAILQ_FIRST(&stcb->asoc.sent_queue);
+	}
+	for (j = frag_strt; j <= frag_end; j++) {
+		theTSN = j + last_tsn;
+		while (tp1) {
+			if (tp1->rec.data.doing_fast_retransmit)
+				(*num_frs) += 1;
+
+			/*-
+			 * CMT: CUCv2 algorithm. For each TSN being
+			 * processed from the sent queue, track the
+			 * next expected pseudo-cumack, or
+			 * rtx_pseudo_cumack, if required. Separate
+			 * cumack trackers for first transmissions,
+			 * and retransmissions.
+			 */
+			if ((tp1->whoTo->find_pseudo_cumack == 1) && (tp1->sent < SCTP_DATAGRAM_RESEND) &&
+			    (tp1->snd_count == 1)) {
+				tp1->whoTo->pseudo_cumack = tp1->rec.data.TSN_seq;
+				tp1->whoTo->find_pseudo_cumack = 0;
+			}
+			if ((tp1->whoTo->find_rtx_pseudo_cumack == 1) && (tp1->sent < SCTP_DATAGRAM_RESEND) &&
+			    (tp1->snd_count > 1)) {
+				tp1->whoTo->rtx_pseudo_cumack = tp1->rec.data.TSN_seq;
+				tp1->whoTo->find_rtx_pseudo_cumack = 0;
+			}
+			if (tp1->rec.data.TSN_seq == theTSN) {
+				if (tp1->sent != SCTP_DATAGRAM_UNSENT) {
+					/*-
+					 * must be held until
+					 * cum-ack passes
+					 */
+					/*-
+					 * ECN Nonce: Add the nonce
+					 * value to the sender's
+					 * nonce sum
+					 */
+					if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+						/*-
+						 * If it is less than RESEND, it is
+						 * now no-longer in flight.
+						 * Higher values may already be set
+						 * via previous Gap Ack Blocks...
+						 * i.e. ACKED or RESEND.
+						 */
+						if (compare_with_wrap(tp1->rec.data.TSN_seq,
+						    *biggest_newly_acked_tsn, MAX_TSN)) {
+							*biggest_newly_acked_tsn = tp1->rec.data.TSN_seq;
+						}
+						/*-
+						 * CMT: SFR algo (and HTNA) - set
+						 * saw_newack to 1 for dest being
+						 * newly acked. update
+						 * this_sack_highest_newack if
+						 * appropriate.
+						 */
+						if (tp1->rec.data.chunk_was_revoked == 0)
+							tp1->whoTo->saw_newack = 1;
+
+						if (compare_with_wrap(tp1->rec.data.TSN_seq,
+						    tp1->whoTo->this_sack_highest_newack,
+						    MAX_TSN)) {
+							tp1->whoTo->this_sack_highest_newack =
+							    tp1->rec.data.TSN_seq;
+						}
+						/*-
+						 * CMT DAC algo: also update
+						 * this_sack_lowest_newack
+						 */
+						if (*this_sack_lowest_newack == 0) {
+							if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SACK_LOGGING_ENABLE) {
+								sctp_log_sack(*this_sack_lowest_newack,
+								    last_tsn,
+								    tp1->rec.data.TSN_seq,
+								    0,
+								    0,
+								    SCTP_LOG_TSN_ACKED);
+							}
+							*this_sack_lowest_newack = tp1->rec.data.TSN_seq;
+						}
+						/*-
+						 * CMT: CUCv2 algorithm. If (rtx-)pseudo-cumack for corresp
+						 * dest is being acked, then we have a new (rtx-)pseudo-cumack. Set
+						 * new_(rtx_)pseudo_cumack to TRUE so that the cwnd for this dest can be
+						 * updated. Also trigger search for the next expected (rtx-)pseudo-cumack.
+						 * Separate pseudo_cumack trackers for first transmissions and
+						 * retransmissions.
+						 */
+						if (tp1->rec.data.TSN_seq == tp1->whoTo->pseudo_cumack) {
+							if (tp1->rec.data.chunk_was_revoked == 0) {
+								tp1->whoTo->new_pseudo_cumack = 1;
+							}
+							tp1->whoTo->find_pseudo_cumack = 1;
+						}
+						if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+							sctp_log_cwnd(stcb, tp1->whoTo, tp1->rec.data.TSN_seq, SCTP_CWND_LOG_FROM_SACK);
+						}
+						if (tp1->rec.data.TSN_seq == tp1->whoTo->rtx_pseudo_cumack) {
+							if (tp1->rec.data.chunk_was_revoked == 0) {
+								tp1->whoTo->new_pseudo_cumack = 1;
+							}
+							tp1->whoTo->find_rtx_pseudo_cumack = 1;
+						}
+						if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SACK_LOGGING_ENABLE) {
+							sctp_log_sack(*biggest_newly_acked_tsn,
+							    last_tsn,
+							    tp1->rec.data.TSN_seq,
+							    frag_strt,
+							    frag_end,
+							    SCTP_LOG_TSN_ACKED);
+						}
+						if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
+							sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_GAP,
+							    tp1->whoTo->flight_size,
+							    tp1->book_size,
+							    (uintptr_t) tp1->whoTo,
+							    tp1->rec.data.TSN_seq);
+						}
+						sctp_flight_size_decrease(tp1);
+						sctp_total_flight_decrease(stcb, tp1);
+
+						tp1->whoTo->net_ack += tp1->send_size;
+						if (tp1->snd_count < 2) {
+							/*-
+							 * True non-retransmited chunk
+							 */
+							tp1->whoTo->net_ack2 += tp1->send_size;
+
+							/*-
+							 * update RTO too ?
+							 */
+							if (tp1->do_rtt) {
+								tp1->whoTo->RTO =
+								    sctp_calculate_rto(stcb,
+								    &stcb->asoc,
+								    tp1->whoTo,
+								    &tp1->sent_rcv_time,
+								    sctp_align_safe_nocopy);
+								tp1->do_rtt = 0;
+							}
+						}
+					}
+					if (tp1->sent <= SCTP_DATAGRAM_RESEND) {
+						(*ecn_seg_sums) += tp1->rec.data.ect_nonce;
+						(*ecn_seg_sums) &= SCTP_SACK_NONCE_SUM;
+						if (compare_with_wrap(tp1->rec.data.TSN_seq,
+						    stcb->asoc.this_sack_highest_gap,
+						    MAX_TSN)) {
+							stcb->asoc.this_sack_highest_gap =
+							    tp1->rec.data.TSN_seq;
+						}
+						if (tp1->sent == SCTP_DATAGRAM_RESEND) {
+							sctp_ucount_decr(stcb->asoc.sent_queue_retran_cnt);
+#ifdef SCTP_AUDITING_ENABLED
+							sctp_audit_log(0xB2,
+							    (stcb->asoc.sent_queue_retran_cnt & 0x000000ff));
+#endif
+						}
+					}
+					/*-
+					 * All chunks NOT UNSENT fall through here and are marked
+					 * (leave PR-SCTP ones that are to skip alone though)
+					 */
+					if (tp1->sent != SCTP_FORWARD_TSN_SKIP)
+						tp1->sent = SCTP_DATAGRAM_MARKED;
+
+					if (tp1->rec.data.chunk_was_revoked) {
+						/* deflate the cwnd */
+						tp1->whoTo->cwnd -= tp1->book_size;
+						tp1->rec.data.chunk_was_revoked = 0;
+					}
+					/* NR Sack code here */
+					if (nr_sacking) {
+						if (tp1->data) {
+							/*
+							 * sa_ignore
+							 * NO_NULL_CHK
+							 */
+							sctp_free_bufspace(stcb, &stcb->asoc, tp1, 1);
+							sctp_m_freem(tp1->data);
+							tp1->data = NULL;
+						}
+						wake_him++;
+					}
+				}
+				break;
+			}	/* if (tp1->TSN_seq == theTSN) */
+			if (compare_with_wrap(tp1->rec.data.TSN_seq, theTSN,
+			    MAX_TSN))
+				break;
+
+			tp1 = TAILQ_NEXT(tp1, sctp_next);
+			if ((tp1 == NULL) && (circled == 0)) {
+				circled++;
+				tp1 = TAILQ_FIRST(&stcb->asoc.sent_queue);
+			}
+		}		/* end while (tp1) */
+		if (tp1 == NULL) {
+			circled = 0;
+			tp1 = TAILQ_FIRST(&stcb->asoc.sent_queue);
+		}
+		/* In case the fragments were not in order we must reset */
+	}			/* end for (j = fragStart */
+	*p_tp1 = tp1;
+	return (wake_him);	/* Return value only used for nr-sack */
+}
+
+
+static int
+sctp_handle_segments(struct mbuf *m, int *offset, struct sctp_tcb *stcb, struct sctp_association *asoc,
+    uint32_t last_tsn, uint32_t * biggest_tsn_acked,
+    uint32_t * biggest_newly_acked_tsn, uint32_t * this_sack_lowest_newack,
+    int num_seg, int num_nr_seg, int *ecn_seg_sums)
+{
+	struct sctp_gap_ack_block *frag, block;
+	struct sctp_tmit_chunk *tp1;
+	int i;
+	int num_frs = 0;
+	int chunk_freed;
+	int non_revocable;
+	uint16_t frag_strt, frag_end, prev_frag_end;
+
+	tp1 = TAILQ_FIRST(&asoc->sent_queue);
+	prev_frag_end = 0;
+	chunk_freed = 0;
+
+	for (i = 0; i < (num_seg + num_nr_seg); i++) {
+		if (i == num_seg) {
+			prev_frag_end = 0;
+			tp1 = TAILQ_FIRST(&asoc->sent_queue);
+		}
+		frag = (struct sctp_gap_ack_block *)sctp_m_getptr(m, *offset,
+		    sizeof(struct sctp_gap_ack_block), (uint8_t *) & block);
+		*offset += sizeof(block);
+		if (frag == NULL) {
+			return (chunk_freed);
+		}
+		frag_strt = ntohs(frag->start);
+		frag_end = ntohs(frag->end);
+
+		if (frag_strt > frag_end) {
+			/* This gap report is malformed, skip it. */
+			continue;
+		}
+		if (frag_strt <= prev_frag_end) {
+			/* This gap report is not in order, so restart. */
+			tp1 = TAILQ_FIRST(&asoc->sent_queue);
+		}
+		if (compare_with_wrap((last_tsn + frag_end), *biggest_tsn_acked, MAX_TSN)) {
+			*biggest_tsn_acked = last_tsn + frag_end;
+		}
+		if (i < num_seg) {
+			non_revocable = 0;
+		} else {
+			non_revocable = 1;
+		}
+		if (sctp_process_segment_range(stcb, &tp1, last_tsn, frag_strt, frag_end,
+		    non_revocable, &num_frs, biggest_newly_acked_tsn,
+		    this_sack_lowest_newack, ecn_seg_sums)) {
+			chunk_freed = 1;
+		}
+		prev_frag_end = frag_end;
+	}
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) {
+		if (num_frs)
+			sctp_log_fr(*biggest_tsn_acked,
+			    *biggest_newly_acked_tsn,
+			    last_tsn, SCTP_FR_LOG_BIGGEST_TSNS);
+	}
+	return (chunk_freed);
+}
+
+static void
+sctp_check_for_revoked(struct sctp_tcb *stcb,
+    struct sctp_association *asoc, uint32_t cumack,
+    uint32_t biggest_tsn_acked)
+{
+	struct sctp_tmit_chunk *tp1;
+	int tot_revoked = 0;
+
+	tp1 = TAILQ_FIRST(&asoc->sent_queue);
+	while (tp1) {
+		if (compare_with_wrap(tp1->rec.data.TSN_seq, cumack,
+		    MAX_TSN)) {
+			/*
+			 * ok this guy is either ACK or MARKED. If it is
+			 * ACKED it has been previously acked but not this
+			 * time i.e. revoked.  If it is MARKED it was ACK'ed
+			 * again.
+			 */
+			if (compare_with_wrap(tp1->rec.data.TSN_seq, biggest_tsn_acked,
+			    MAX_TSN))
+				break;
+
+
+			if (tp1->sent == SCTP_DATAGRAM_ACKED) {
+				/* it has been revoked */
+				tp1->sent = SCTP_DATAGRAM_SENT;
+				tp1->rec.data.chunk_was_revoked = 1;
+				/*
+				 * We must add this stuff back in to assure
+				 * timers and such get started.
+				 */
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
+					sctp_misc_ints(SCTP_FLIGHT_LOG_UP_REVOKE,
+					    tp1->whoTo->flight_size,
+					    tp1->book_size,
+					    (uintptr_t) tp1->whoTo,
+					    tp1->rec.data.TSN_seq);
+				}
+				sctp_flight_size_increase(tp1);
+				sctp_total_flight_increase(stcb, tp1);
+				/*
+				 * We inflate the cwnd to compensate for our
+				 * artificial inflation of the flight_size.
+				 */
+				tp1->whoTo->cwnd += tp1->book_size;
+				tot_revoked++;
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SACK_LOGGING_ENABLE) {
+					sctp_log_sack(asoc->last_acked_seq,
+					    cumack,
+					    tp1->rec.data.TSN_seq,
+					    0,
+					    0,
+					    SCTP_LOG_TSN_REVOKED);
+				}
+			} else if (tp1->sent == SCTP_DATAGRAM_MARKED) {
+				/* it has been re-acked in this SACK */
+				tp1->sent = SCTP_DATAGRAM_ACKED;
+			}
+		}
+		if (tp1->sent == SCTP_DATAGRAM_UNSENT)
+			break;
+		tp1 = TAILQ_NEXT(tp1, sctp_next);
+	}
+	if (tot_revoked > 0) {
+		/*
+		 * Setup the ecn nonce re-sync point. We do this since once
+		 * data is revoked we begin to retransmit things, which do
+		 * NOT have the ECN bits set. This means we are now out of
+		 * sync and must wait until we get back in sync with the
+		 * peer to check ECN bits.
+		 */
+		tp1 = TAILQ_FIRST(&asoc->send_queue);
+		if (tp1 == NULL) {
+			asoc->nonce_resync_tsn = asoc->sending_seq;
+		} else {
+			asoc->nonce_resync_tsn = tp1->rec.data.TSN_seq;
+		}
+		asoc->nonce_wait_for_ecne = 0;
+		asoc->nonce_sum_check = 0;
+	}
+}
+
+
+static void
+sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc,
+    uint32_t biggest_tsn_acked, uint32_t biggest_tsn_newly_acked, uint32_t this_sack_lowest_newack, int accum_moved)
+{
+	struct sctp_tmit_chunk *tp1;
+	int strike_flag = 0;
+	struct timeval now;
+	int tot_retrans = 0;
+	uint32_t sending_seq;
+	struct sctp_nets *net;
+	int num_dests_sacked = 0;
+
+	/*
+	 * select the sending_seq, this is either the next thing ready to be
+	 * sent but not transmitted, OR, the next seq we assign.
+	 */
+	tp1 = TAILQ_FIRST(&stcb->asoc.send_queue);
+	if (tp1 == NULL) {
+		sending_seq = asoc->sending_seq;
+	} else {
+		sending_seq = tp1->rec.data.TSN_seq;
+	}
+
+	/* CMT DAC algo: finding out if SACK is a mixed SACK */
+	if ((asoc->sctp_cmt_on_off == 1) &&
+	    SCTP_BASE_SYSCTL(sctp_cmt_use_dac)) {
+		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+			if (net->saw_newack)
+				num_dests_sacked++;
+		}
+	}
+	if (stcb->asoc.peer_supports_prsctp) {
+		(void)SCTP_GETTIME_TIMEVAL(&now);
+	}
+	tp1 = TAILQ_FIRST(&asoc->sent_queue);
+	while (tp1) {
+		strike_flag = 0;
+		if (tp1->no_fr_allowed) {
+			/* this one had a timeout or something */
+			tp1 = TAILQ_NEXT(tp1, sctp_next);
+			continue;
+		}
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) {
+			if (tp1->sent < SCTP_DATAGRAM_RESEND)
+				sctp_log_fr(biggest_tsn_newly_acked,
+				    tp1->rec.data.TSN_seq,
+				    tp1->sent,
+				    SCTP_FR_LOG_CHECK_STRIKE);
+		}
+		if (compare_with_wrap(tp1->rec.data.TSN_seq, biggest_tsn_acked,
+		    MAX_TSN) ||
+		    tp1->sent == SCTP_DATAGRAM_UNSENT) {
+			/* done */
+			break;
+		}
+		if (stcb->asoc.peer_supports_prsctp) {
+			if ((PR_SCTP_TTL_ENABLED(tp1->flags)) && tp1->sent < SCTP_DATAGRAM_ACKED) {
+				/* Is it expired? */
+				if (timevalcmp(&now, &tp1->rec.data.timetodrop, >)) {
+					/* Yes so drop it */
+					if (tp1->data != NULL) {
+						(void)sctp_release_pr_sctp_chunk(stcb, tp1,
+						    (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
+						    SCTP_SO_NOT_LOCKED);
+					}
+					tp1 = TAILQ_NEXT(tp1, sctp_next);
+					continue;
+				}
+			}
+		}
+		if (compare_with_wrap(tp1->rec.data.TSN_seq,
+		    asoc->this_sack_highest_gap, MAX_TSN)) {
+			/* we are beyond the tsn in the sack  */
+			break;
+		}
+		if (tp1->sent >= SCTP_DATAGRAM_RESEND) {
+			/* either a RESEND, ACKED, or MARKED */
+			/* skip */
+			if (tp1->sent == SCTP_FORWARD_TSN_SKIP) {
+				/* Continue strikin FWD-TSN chunks */
+				tp1->rec.data.fwd_tsn_cnt++;
+			}
+			tp1 = TAILQ_NEXT(tp1, sctp_next);
+			continue;
+		}
+		/*
+		 * CMT : SFR algo (covers part of DAC and HTNA as well)
+		 */
+		if (tp1->whoTo && tp1->whoTo->saw_newack == 0) {
+			/*
+			 * No new acks were receieved for data sent to this
+			 * dest. Therefore, according to the SFR algo for
+			 * CMT, no data sent to this dest can be marked for
+			 * FR using this SACK.
+			 */
+			tp1 = TAILQ_NEXT(tp1, sctp_next);
+			continue;
+		} else if (tp1->whoTo && compare_with_wrap(tp1->rec.data.TSN_seq,
+		    tp1->whoTo->this_sack_highest_newack, MAX_TSN)) {
+			/*
+			 * CMT: New acks were receieved for data sent to
+			 * this dest. But no new acks were seen for data
+			 * sent after tp1. Therefore, according to the SFR
+			 * algo for CMT, tp1 cannot be marked for FR using
+			 * this SACK. This step covers part of the DAC algo
+			 * and the HTNA algo as well.
+			 */
+			tp1 = TAILQ_NEXT(tp1, sctp_next);
+			continue;
+		}
+		/*
+		 * Here we check to see if we were have already done a FR
+		 * and if so we see if the biggest TSN we saw in the sack is
+		 * smaller than the recovery point. If so we don't strike
+		 * the tsn... otherwise we CAN strike the TSN.
+		 */
+		/*
+		 * @@@ JRI: Check for CMT if (accum_moved &&
+		 * asoc->fast_retran_loss_recovery && (sctp_cmt_on_off ==
+		 * 0)) {
+		 */
+		if (accum_moved && asoc->fast_retran_loss_recovery) {
+			/*
+			 * Strike the TSN if in fast-recovery and cum-ack
+			 * moved.
+			 */
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) {
+				sctp_log_fr(biggest_tsn_newly_acked,
+				    tp1->rec.data.TSN_seq,
+				    tp1->sent,
+				    SCTP_FR_LOG_STRIKE_CHUNK);
+			}
+			if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+				tp1->sent++;
+			}
+			if ((asoc->sctp_cmt_on_off == 1) &&
+			    SCTP_BASE_SYSCTL(sctp_cmt_use_dac)) {
+				/*
+				 * CMT DAC algorithm: If SACK flag is set to
+				 * 0, then lowest_newack test will not pass
+				 * because it would have been set to the
+				 * cumack earlier. If not already to be
+				 * rtx'd, If not a mixed sack and if tp1 is
+				 * not between two sacked TSNs, then mark by
+				 * one more. NOTE that we are marking by one
+				 * additional time since the SACK DAC flag
+				 * indicates that two packets have been
+				 * received after this missing TSN.
+				 */
+				if ((tp1->sent < SCTP_DATAGRAM_RESEND) && (num_dests_sacked == 1) &&
+				    compare_with_wrap(this_sack_lowest_newack, tp1->rec.data.TSN_seq, MAX_TSN)) {
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) {
+						sctp_log_fr(16 + num_dests_sacked,
+						    tp1->rec.data.TSN_seq,
+						    tp1->sent,
+						    SCTP_FR_LOG_STRIKE_CHUNK);
+					}
+					tp1->sent++;
+				}
+			}
+		} else if ((tp1->rec.data.doing_fast_retransmit) &&
+		    (asoc->sctp_cmt_on_off == 0)) {
+			/*
+			 * For those that have done a FR we must take
+			 * special consideration if we strike. I.e the
+			 * biggest_newly_acked must be higher than the
+			 * sending_seq at the time we did the FR.
+			 */
+			if (
+#ifdef SCTP_FR_TO_ALTERNATE
+			/*
+			 * If FR's go to new networks, then we must only do
+			 * this for singly homed asoc's. However if the FR's
+			 * go to the same network (Armando's work) then its
+			 * ok to FR multiple times.
+			 */
+			    (asoc->numnets < 2)
+#else
+			    (1)
+#endif
+			    ) {
+
+				if ((compare_with_wrap(biggest_tsn_newly_acked,
+				    tp1->rec.data.fast_retran_tsn, MAX_TSN)) ||
+				    (biggest_tsn_newly_acked ==
+				    tp1->rec.data.fast_retran_tsn)) {
+					/*
+					 * Strike the TSN, since this ack is
+					 * beyond where things were when we
+					 * did a FR.
+					 */
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) {
+						sctp_log_fr(biggest_tsn_newly_acked,
+						    tp1->rec.data.TSN_seq,
+						    tp1->sent,
+						    SCTP_FR_LOG_STRIKE_CHUNK);
+					}
+					if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+						tp1->sent++;
+					}
+					strike_flag = 1;
+					if ((asoc->sctp_cmt_on_off == 1) &&
+					    SCTP_BASE_SYSCTL(sctp_cmt_use_dac)) {
+						/*
+						 * CMT DAC algorithm: If
+						 * SACK flag is set to 0,
+						 * then lowest_newack test
+						 * will not pass because it
+						 * would have been set to
+						 * the cumack earlier. If
+						 * not already to be rtx'd,
+						 * If not a mixed sack and
+						 * if tp1 is not between two
+						 * sacked TSNs, then mark by
+						 * one more. NOTE that we
+						 * are marking by one
+						 * additional time since the
+						 * SACK DAC flag indicates
+						 * that two packets have
+						 * been received after this
+						 * missing TSN.
+						 */
+						if ((tp1->sent < SCTP_DATAGRAM_RESEND) &&
+						    (num_dests_sacked == 1) &&
+						    compare_with_wrap(this_sack_lowest_newack,
+						    tp1->rec.data.TSN_seq, MAX_TSN)) {
+							if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) {
+								sctp_log_fr(32 + num_dests_sacked,
+								    tp1->rec.data.TSN_seq,
+								    tp1->sent,
+								    SCTP_FR_LOG_STRIKE_CHUNK);
+							}
+							if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+								tp1->sent++;
+							}
+						}
+					}
+				}
+			}
+			/*
+			 * JRI: TODO: remove code for HTNA algo. CMT's SFR
+			 * algo covers HTNA.
+			 */
+		} else if (compare_with_wrap(tp1->rec.data.TSN_seq,
+		    biggest_tsn_newly_acked, MAX_TSN)) {
+			/*
+			 * We don't strike these: This is the  HTNA
+			 * algorithm i.e. we don't strike If our TSN is
+			 * larger than the Highest TSN Newly Acked.
+			 */
+			;
+		} else {
+			/* Strike the TSN */
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) {
+				sctp_log_fr(biggest_tsn_newly_acked,
+				    tp1->rec.data.TSN_seq,
+				    tp1->sent,
+				    SCTP_FR_LOG_STRIKE_CHUNK);
+			}
+			if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+				tp1->sent++;
+			}
+			if ((asoc->sctp_cmt_on_off == 1) &&
+			    SCTP_BASE_SYSCTL(sctp_cmt_use_dac)) {
+				/*
+				 * CMT DAC algorithm: If SACK flag is set to
+				 * 0, then lowest_newack test will not pass
+				 * because it would have been set to the
+				 * cumack earlier. If not already to be
+				 * rtx'd, If not a mixed sack and if tp1 is
+				 * not between two sacked TSNs, then mark by
+				 * one more. NOTE that we are marking by one
+				 * additional time since the SACK DAC flag
+				 * indicates that two packets have been
+				 * received after this missing TSN.
+				 */
+				if ((tp1->sent < SCTP_DATAGRAM_RESEND) && (num_dests_sacked == 1) &&
+				    compare_with_wrap(this_sack_lowest_newack, tp1->rec.data.TSN_seq, MAX_TSN)) {
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) {
+						sctp_log_fr(48 + num_dests_sacked,
+						    tp1->rec.data.TSN_seq,
+						    tp1->sent,
+						    SCTP_FR_LOG_STRIKE_CHUNK);
+					}
+					tp1->sent++;
+				}
+			}
+		}
+		if (tp1->sent == SCTP_DATAGRAM_RESEND) {
+			struct sctp_nets *alt;
+
+			/* fix counts and things */
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
+				sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_RSND,
+				    (tp1->whoTo ? (tp1->whoTo->flight_size) : 0),
+				    tp1->book_size,
+				    (uintptr_t) tp1->whoTo,
+				    tp1->rec.data.TSN_seq);
+			}
+			if (tp1->whoTo) {
+				tp1->whoTo->net_ack++;
+				sctp_flight_size_decrease(tp1);
+			}
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_RWND_ENABLE) {
+				sctp_log_rwnd(SCTP_INCREASE_PEER_RWND,
+				    asoc->peers_rwnd, tp1->send_size, SCTP_BASE_SYSCTL(sctp_peer_chunk_oh));
+			}
+			/* add back to the rwnd */
+			asoc->peers_rwnd += (tp1->send_size + SCTP_BASE_SYSCTL(sctp_peer_chunk_oh));
+
+			/* remove from the total flight */
+			sctp_total_flight_decrease(stcb, tp1);
+
+			if ((stcb->asoc.peer_supports_prsctp) &&
+			    (PR_SCTP_RTX_ENABLED(tp1->flags))) {
+				/*
+				 * Has it been retransmitted tv_sec times? -
+				 * we store the retran count there.
+				 */
+				if (tp1->snd_count > tp1->rec.data.timetodrop.tv_sec) {
+					/* Yes, so drop it */
+					if (tp1->data != NULL) {
+						(void)sctp_release_pr_sctp_chunk(stcb, tp1,
+						    (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
+						    SCTP_SO_NOT_LOCKED);
+					}
+					/* Make sure to flag we had a FR */
+					tp1->whoTo->net_ack++;
+					tp1 = TAILQ_NEXT(tp1, sctp_next);
+					continue;
+				}
+			}
+			/* printf("OK, we are now ready to FR this guy\n"); */
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) {
+				sctp_log_fr(tp1->rec.data.TSN_seq, tp1->snd_count,
+				    0, SCTP_FR_MARKED);
+			}
+			if (strike_flag) {
+				/* This is a subsequent FR */
+				SCTP_STAT_INCR(sctps_sendmultfastretrans);
+			}
+			sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+			if (asoc->sctp_cmt_on_off == 1) {
+				/*
+				 * CMT: Using RTX_SSTHRESH policy for CMT.
+				 * If CMT is being used, then pick dest with
+				 * largest ssthresh for any retransmission.
+				 */
+				tp1->no_fr_allowed = 1;
+				alt = tp1->whoTo;
+				/* sa_ignore NO_NULL_CHK */
+				if (asoc->sctp_cmt_pf > 0) {
+					/*
+					 * JRS 5/18/07 - If CMT PF is on,
+					 * use the PF version of
+					 * find_alt_net()
+					 */
+					alt = sctp_find_alternate_net(stcb, alt, 2);
+				} else {
+					/*
+					 * JRS 5/18/07 - If only CMT is on,
+					 * use the CMT version of
+					 * find_alt_net()
+					 */
+					/* sa_ignore NO_NULL_CHK */
+					alt = sctp_find_alternate_net(stcb, alt, 1);
+				}
+				if (alt == NULL) {
+					alt = tp1->whoTo;
+				}
+				/*
+				 * CUCv2: If a different dest is picked for
+				 * the retransmission, then new
+				 * (rtx-)pseudo_cumack needs to be tracked
+				 * for orig dest. Let CUCv2 track new (rtx-)
+				 * pseudo-cumack always.
+				 */
+				if (tp1->whoTo) {
+					tp1->whoTo->find_pseudo_cumack = 1;
+					tp1->whoTo->find_rtx_pseudo_cumack = 1;
+				}
+			} else {/* CMT is OFF */
+
+#ifdef SCTP_FR_TO_ALTERNATE
+				/* Can we find an alternate? */
+				alt = sctp_find_alternate_net(stcb, tp1->whoTo, 0);
+#else
+				/*
+				 * default behavior is to NOT retransmit
+				 * FR's to an alternate. Armando Caro's
+				 * paper details why.
+				 */
+				alt = tp1->whoTo;
+#endif
+			}
+
+			tp1->rec.data.doing_fast_retransmit = 1;
+			tot_retrans++;
+			/* mark the sending seq for possible subsequent FR's */
+			/*
+			 * printf("Marking TSN for FR new value %x\n",
+			 * (uint32_t)tpi->rec.data.TSN_seq);
+			 */
+			if (TAILQ_EMPTY(&asoc->send_queue)) {
+				/*
+				 * If the queue of send is empty then its
+				 * the next sequence number that will be
+				 * assigned so we subtract one from this to
+				 * get the one we last sent.
+				 */
+				tp1->rec.data.fast_retran_tsn = sending_seq;
+			} else {
+				/*
+				 * If there are chunks on the send queue
+				 * (unsent data that has made it from the
+				 * stream queues but not out the door, we
+				 * take the first one (which will have the
+				 * lowest TSN) and subtract one to get the
+				 * one we last sent.
+				 */
+				struct sctp_tmit_chunk *ttt;
+
+				ttt = TAILQ_FIRST(&asoc->send_queue);
+				tp1->rec.data.fast_retran_tsn =
+				    ttt->rec.data.TSN_seq;
+			}
+
+			if (tp1->do_rtt) {
+				/*
+				 * this guy had a RTO calculation pending on
+				 * it, cancel it
+				 */
+				tp1->do_rtt = 0;
+			}
+			if (alt != tp1->whoTo) {
+				/* yes, there is an alternate. */
+				sctp_free_remote_addr(tp1->whoTo);
+				/* sa_ignore FREED_MEMORY */
+				tp1->whoTo = alt;
+				atomic_add_int(&alt->ref_count, 1);
+			}
+		}
+		tp1 = TAILQ_NEXT(tp1, sctp_next);
+	}			/* while (tp1) */
+
+	if (tot_retrans > 0) {
+		/*
+		 * Setup the ecn nonce re-sync point. We do this since once
+		 * we go to FR something we introduce a Karn's rule scenario
+		 * and won't know the totals for the ECN bits.
+		 */
+		asoc->nonce_resync_tsn = sending_seq;
+		asoc->nonce_wait_for_ecne = 0;
+		asoc->nonce_sum_check = 0;
+	}
+}
+
+struct sctp_tmit_chunk *
+sctp_try_advance_peer_ack_point(struct sctp_tcb *stcb,
+    struct sctp_association *asoc)
+{
+	struct sctp_tmit_chunk *tp1, *tp2, *a_adv = NULL;
+	struct timeval now;
+	int now_filled = 0;
+
+	if (asoc->peer_supports_prsctp == 0) {
+		return (NULL);
+	}
+	tp1 = TAILQ_FIRST(&asoc->sent_queue);
+	while (tp1) {
+		if (tp1->sent != SCTP_FORWARD_TSN_SKIP &&
+		    tp1->sent != SCTP_DATAGRAM_RESEND) {
+			/* no chance to advance, out of here */
+			break;
+		}
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) {
+			if (tp1->sent == SCTP_FORWARD_TSN_SKIP) {
+				sctp_misc_ints(SCTP_FWD_TSN_CHECK,
+				    asoc->advanced_peer_ack_point,
+				    tp1->rec.data.TSN_seq, 0, 0);
+			}
+		}
+		if (!PR_SCTP_ENABLED(tp1->flags)) {
+			/*
+			 * We can't fwd-tsn past any that are reliable aka
+			 * retransmitted until the asoc fails.
+			 */
+			break;
+		}
+		if (!now_filled) {
+			(void)SCTP_GETTIME_TIMEVAL(&now);
+			now_filled = 1;
+		}
+		tp2 = TAILQ_NEXT(tp1, sctp_next);
+		/*
+		 * now we got a chunk which is marked for another
+		 * retransmission to a PR-stream but has run out its chances
+		 * already maybe OR has been marked to skip now. Can we skip
+		 * it if its a resend?
+		 */
+		if (tp1->sent == SCTP_DATAGRAM_RESEND &&
+		    (PR_SCTP_TTL_ENABLED(tp1->flags))) {
+			/*
+			 * Now is this one marked for resend and its time is
+			 * now up?
+			 */
+			if (timevalcmp(&now, &tp1->rec.data.timetodrop, >)) {
+				/* Yes so drop it */
+				if (tp1->data) {
+					(void)sctp_release_pr_sctp_chunk(stcb, tp1,
+					    (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
+					    SCTP_SO_NOT_LOCKED);
+				}
+			} else {
+				/*
+				 * No, we are done when hit one for resend
+				 * whos time as not expired.
+				 */
+				break;
+			}
+		}
+		/*
+		 * Ok now if this chunk is marked to drop it we can clean up
+		 * the chunk, advance our peer ack point and we can check
+		 * the next chunk.
+		 */
+		if (tp1->sent == SCTP_FORWARD_TSN_SKIP) {
+			/* advance PeerAckPoint goes forward */
+			if (compare_with_wrap(tp1->rec.data.TSN_seq,
+			    asoc->advanced_peer_ack_point,
+			    MAX_TSN)) {
+
+				asoc->advanced_peer_ack_point = tp1->rec.data.TSN_seq;
+				a_adv = tp1;
+			} else if (tp1->rec.data.TSN_seq == asoc->advanced_peer_ack_point) {
+				/* No update but we do save the chk */
+				a_adv = tp1;
+			}
+		} else {
+			/*
+			 * If it is still in RESEND we can advance no
+			 * further
+			 */
+			break;
+		}
+		/*
+		 * If we hit here we just dumped tp1, move to next tsn on
+		 * sent queue.
+		 */
+		tp1 = tp2;
+	}
+	return (a_adv);
+}
+
+static int
+sctp_fs_audit(struct sctp_association *asoc)
+{
+	struct sctp_tmit_chunk *chk;
+	int inflight = 0, resend = 0, inbetween = 0, acked = 0, above = 0;
+	int entry_flight, entry_cnt, ret;
+
+	entry_flight = asoc->total_flight;
+	entry_cnt = asoc->total_flight_count;
+	ret = 0;
+
+	if (asoc->pr_sctp_cnt >= asoc->sent_queue_cnt)
+		return (0);
+
+	TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
+		if (chk->sent < SCTP_DATAGRAM_RESEND) {
+			printf("Chk TSN:%u size:%d inflight cnt:%d\n",
+			    chk->rec.data.TSN_seq,
+			    chk->send_size,
+			    chk->snd_count
+			    );
+			inflight++;
+		} else if (chk->sent == SCTP_DATAGRAM_RESEND) {
+			resend++;
+		} else if (chk->sent < SCTP_DATAGRAM_ACKED) {
+			inbetween++;
+		} else if (chk->sent > SCTP_DATAGRAM_ACKED) {
+			above++;
+		} else {
+			acked++;
+		}
+	}
+
+	if ((inflight > 0) || (inbetween > 0)) {
+#ifdef INVARIANTS
+		panic("Flight size-express incorrect? \n");
+#else
+		printf("asoc->total_flight:%d cnt:%d\n",
+		    entry_flight, entry_cnt);
+
+		SCTP_PRINTF("Flight size-express incorrect F:%d I:%d R:%d Ab:%d ACK:%d\n",
+		    inflight, inbetween, resend, above, acked);
+		ret = 1;
+#endif
+	}
+	return (ret);
+}
+
+
+static void
+sctp_window_probe_recovery(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    struct sctp_nets *net,
+    struct sctp_tmit_chunk *tp1)
+{
+	tp1->window_probe = 0;
+	if ((tp1->sent >= SCTP_DATAGRAM_ACKED) || (tp1->data == NULL)) {
+		/* TSN's skipped we do NOT move back. */
+		sctp_misc_ints(SCTP_FLIGHT_LOG_DWN_WP_FWD,
+		    tp1->whoTo->flight_size,
+		    tp1->book_size,
+		    (uintptr_t) tp1->whoTo,
+		    tp1->rec.data.TSN_seq);
+		return;
+	}
+	/* First setup this by shrinking flight */
+	sctp_flight_size_decrease(tp1);
+	sctp_total_flight_decrease(stcb, tp1);
+	/* Now mark for resend */
+	tp1->sent = SCTP_DATAGRAM_RESEND;
+	sctp_ucount_incr(asoc->sent_queue_retran_cnt);
+
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
+		sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_WP,
+		    tp1->whoTo->flight_size,
+		    tp1->book_size,
+		    (uintptr_t) tp1->whoTo,
+		    tp1->rec.data.TSN_seq);
+	}
+}
+
+void
+sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack,
+    uint32_t rwnd, int nonce_sum_flag, int *abort_now)
+{
+	struct sctp_nets *net;
+	struct sctp_association *asoc;
+	struct sctp_tmit_chunk *tp1, *tp2;
+	uint32_t old_rwnd;
+	int win_probe_recovery = 0;
+	int win_probe_recovered = 0;
+	int j, done_once = 0;
+
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_SACK_ARRIVALS_ENABLE) {
+		sctp_misc_ints(SCTP_SACK_LOG_EXPRESS, cumack,
+		    rwnd, stcb->asoc.last_acked_seq, stcb->asoc.peers_rwnd);
+	}
+	SCTP_TCB_LOCK_ASSERT(stcb);
+#ifdef SCTP_ASOCLOG_OF_TSNS
+	stcb->asoc.cumack_log[stcb->asoc.cumack_log_at] = cumack;
+	stcb->asoc.cumack_log_at++;
+	if (stcb->asoc.cumack_log_at > SCTP_TSN_LOG_SIZE) {
+		stcb->asoc.cumack_log_at = 0;
+	}
+#endif
+	asoc = &stcb->asoc;
+	old_rwnd = asoc->peers_rwnd;
+	if (compare_with_wrap(asoc->last_acked_seq, cumack, MAX_TSN)) {
+		/* old ack */
+		return;
+	} else if (asoc->last_acked_seq == cumack) {
+		/* Window update sack */
+		asoc->peers_rwnd = sctp_sbspace_sub(rwnd,
+		    (uint32_t) (asoc->total_flight + (asoc->total_flight_count * SCTP_BASE_SYSCTL(sctp_peer_chunk_oh))));
+		if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+			/* SWS sender side engages */
+			asoc->peers_rwnd = 0;
+		}
+		if (asoc->peers_rwnd > old_rwnd) {
+			goto again;
+		}
+		return;
+	}
+	/* First setup for CC stuff */
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		net->prev_cwnd = net->cwnd;
+		net->net_ack = 0;
+		net->net_ack2 = 0;
+
+		/*
+		 * CMT: Reset CUC and Fast recovery algo variables before
+		 * SACK processing
+		 */
+		net->new_pseudo_cumack = 0;
+		net->will_exit_fast_recovery = 0;
+	}
+	if (SCTP_BASE_SYSCTL(sctp_strict_sacks)) {
+		uint32_t send_s;
+
+		if (!TAILQ_EMPTY(&asoc->sent_queue)) {
+			tp1 = TAILQ_LAST(&asoc->sent_queue,
+			    sctpchunk_listhead);
+			send_s = tp1->rec.data.TSN_seq + 1;
+		} else {
+			send_s = asoc->sending_seq;
+		}
+		if ((cumack == send_s) ||
+		    compare_with_wrap(cumack, send_s, MAX_TSN)) {
+#ifndef INVARIANTS
+			struct mbuf *oper;
+
+#endif
+#ifdef INVARIANTS
+			panic("Impossible sack 1");
+#else
+
+			*abort_now = 1;
+			/* XXX */
+			oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+			    0, M_DONTWAIT, 1, MT_DATA);
+			if (oper) {
+				struct sctp_paramhdr *ph;
+				uint32_t *ippp;
+
+				SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+				    sizeof(uint32_t);
+				ph = mtod(oper, struct sctp_paramhdr *);
+				ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+				ph->param_length = htons(SCTP_BUF_LEN(oper));
+				ippp = (uint32_t *) (ph + 1);
+				*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_25);
+			}
+			stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_25;
+			sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+			return;
+#endif
+		}
+	}
+	asoc->this_sack_highest_gap = cumack;
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
+		sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+		    stcb->asoc.overall_error_count,
+		    0,
+		    SCTP_FROM_SCTP_INDATA,
+		    __LINE__);
+	}
+	stcb->asoc.overall_error_count = 0;
+	if (compare_with_wrap(cumack, asoc->last_acked_seq, MAX_TSN)) {
+		/* process the new consecutive TSN first */
+		tp1 = TAILQ_FIRST(&asoc->sent_queue);
+		while (tp1) {
+			tp2 = TAILQ_NEXT(tp1, sctp_next);
+			if (compare_with_wrap(cumack, tp1->rec.data.TSN_seq,
+			    MAX_TSN) ||
+			    cumack == tp1->rec.data.TSN_seq) {
+				if (tp1->sent == SCTP_DATAGRAM_UNSENT) {
+					printf("Warning, an unsent is now acked?\n");
+				}
+				/*
+				 * ECN Nonce: Add the nonce to the sender's
+				 * nonce sum
+				 */
+				asoc->nonce_sum_expect_base += tp1->rec.data.ect_nonce;
+				if (tp1->sent < SCTP_DATAGRAM_ACKED) {
+					/*
+					 * If it is less than ACKED, it is
+					 * now no-longer in flight. Higher
+					 * values may occur during marking
+					 */
+					if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+						if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
+							sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_CA,
+							    tp1->whoTo->flight_size,
+							    tp1->book_size,
+							    (uintptr_t) tp1->whoTo,
+							    tp1->rec.data.TSN_seq);
+						}
+						sctp_flight_size_decrease(tp1);
+						/* sa_ignore NO_NULL_CHK */
+						sctp_total_flight_decrease(stcb, tp1);
+					}
+					tp1->whoTo->net_ack += tp1->send_size;
+					if (tp1->snd_count < 2) {
+						/*
+						 * True non-retransmited
+						 * chunk
+						 */
+						tp1->whoTo->net_ack2 +=
+						    tp1->send_size;
+
+						/* update RTO too? */
+						if (tp1->do_rtt) {
+							tp1->whoTo->RTO =
+							/*
+							 * sa_ignore
+							 * NO_NULL_CHK
+							 */
+							    sctp_calculate_rto(stcb,
+							    asoc, tp1->whoTo,
+							    &tp1->sent_rcv_time,
+							    sctp_align_safe_nocopy);
+							tp1->do_rtt = 0;
+						}
+					}
+					/*
+					 * CMT: CUCv2 algorithm. From the
+					 * cumack'd TSNs, for each TSN being
+					 * acked for the first time, set the
+					 * following variables for the
+					 * corresp destination.
+					 * new_pseudo_cumack will trigger a
+					 * cwnd update.
+					 * find_(rtx_)pseudo_cumack will
+					 * trigger search for the next
+					 * expected (rtx-)pseudo-cumack.
+					 */
+					tp1->whoTo->new_pseudo_cumack = 1;
+					tp1->whoTo->find_pseudo_cumack = 1;
+					tp1->whoTo->find_rtx_pseudo_cumack = 1;
+
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+						/* sa_ignore NO_NULL_CHK */
+						sctp_log_cwnd(stcb, tp1->whoTo, tp1->rec.data.TSN_seq, SCTP_CWND_LOG_FROM_SACK);
+					}
+				}
+				if (tp1->sent == SCTP_DATAGRAM_RESEND) {
+					sctp_ucount_decr(asoc->sent_queue_retran_cnt);
+				}
+				if (tp1->rec.data.chunk_was_revoked) {
+					/* deflate the cwnd */
+					tp1->whoTo->cwnd -= tp1->book_size;
+					tp1->rec.data.chunk_was_revoked = 0;
+				}
+				tp1->sent = SCTP_DATAGRAM_ACKED;
+				TAILQ_REMOVE(&asoc->sent_queue, tp1, sctp_next);
+				if (tp1->data) {
+					/* sa_ignore NO_NULL_CHK */
+					sctp_free_bufspace(stcb, asoc, tp1, 1);
+					sctp_m_freem(tp1->data);
+				}
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SACK_LOGGING_ENABLE) {
+					sctp_log_sack(asoc->last_acked_seq,
+					    cumack,
+					    tp1->rec.data.TSN_seq,
+					    0,
+					    0,
+					    SCTP_LOG_FREE_SENT);
+				}
+				tp1->data = NULL;
+				asoc->sent_queue_cnt--;
+				sctp_free_a_chunk(stcb, tp1);
+				tp1 = tp2;
+			} else {
+				break;
+			}
+		}
+
+	}
+	/* sa_ignore NO_NULL_CHK */
+	if (stcb->sctp_socket) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		struct socket *so;
+
+#endif
+		SOCKBUF_LOCK(&stcb->sctp_socket->so_snd);
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_WAKE_LOGGING_ENABLE) {
+			/* sa_ignore NO_NULL_CHK */
+			sctp_wakeup_log(stcb, cumack, 1, SCTP_WAKESND_FROM_SACK);
+		}
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		so = SCTP_INP_SO(stcb->sctp_ep);
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+			/* assoc was freed while we were unlocked */
+			SCTP_SOCKET_UNLOCK(so, 1);
+			return;
+		}
+#endif
+		sctp_sowwakeup_locked(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+	} else {
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_WAKE_LOGGING_ENABLE) {
+			sctp_wakeup_log(stcb, cumack, 1, SCTP_NOWAKE_FROM_SACK);
+		}
+	}
+
+	/* JRS - Use the congestion control given in the CC module */
+	if (asoc->last_acked_seq != cumack)
+		asoc->cc_functions.sctp_cwnd_update_after_sack(stcb, asoc, 1, 0, 0);
+
+	asoc->last_acked_seq = cumack;
+
+	if (TAILQ_EMPTY(&asoc->sent_queue)) {
+		/* nothing left in-flight */
+		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+			net->flight_size = 0;
+			net->partial_bytes_acked = 0;
+		}
+		asoc->total_flight = 0;
+		asoc->total_flight_count = 0;
+	}
+	/* ECN Nonce updates */
+	if (asoc->ecn_nonce_allowed) {
+		if (asoc->nonce_sum_check) {
+			if (nonce_sum_flag != ((asoc->nonce_sum_expect_base) & SCTP_SACK_NONCE_SUM)) {
+				if (asoc->nonce_wait_for_ecne == 0) {
+					struct sctp_tmit_chunk *lchk;
+
+					lchk = TAILQ_FIRST(&asoc->send_queue);
+					asoc->nonce_wait_for_ecne = 1;
+					if (lchk) {
+						asoc->nonce_wait_tsn = lchk->rec.data.TSN_seq;
+					} else {
+						asoc->nonce_wait_tsn = asoc->sending_seq;
+					}
+				} else {
+					if (compare_with_wrap(asoc->last_acked_seq, asoc->nonce_wait_tsn, MAX_TSN) ||
+					    (asoc->last_acked_seq == asoc->nonce_wait_tsn)) {
+						/*
+						 * Misbehaving peer. We need
+						 * to react to this guy
+						 */
+						asoc->ecn_allowed = 0;
+						asoc->ecn_nonce_allowed = 0;
+					}
+				}
+			}
+		} else {
+			/* See if Resynchronization Possible */
+			if (compare_with_wrap(asoc->last_acked_seq, asoc->nonce_resync_tsn, MAX_TSN)) {
+				asoc->nonce_sum_check = 1;
+				/*
+				 * Now we must calculate what the base is.
+				 * We do this based on two things, we know
+				 * the total's for all the segments
+				 * gap-acked in the SACK (none). We also
+				 * know the SACK's nonce sum, its in
+				 * nonce_sum_flag. So we can build a truth
+				 * table to back-calculate the new value of
+				 * asoc->nonce_sum_expect_base:
+				 * 
+				 * SACK-flag-Value         Seg-Sums Base 0 0 0
+				 * 1                    0 1 0 1 1 1
+				 * 1 0
+				 */
+				asoc->nonce_sum_expect_base = (0 ^ nonce_sum_flag) & SCTP_SACK_NONCE_SUM;
+			}
+		}
+	}
+	/* RWND update */
+	asoc->peers_rwnd = sctp_sbspace_sub(rwnd,
+	    (uint32_t) (asoc->total_flight + (asoc->total_flight_count * SCTP_BASE_SYSCTL(sctp_peer_chunk_oh))));
+	if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+		/* SWS sender side engages */
+		asoc->peers_rwnd = 0;
+	}
+	if (asoc->peers_rwnd > old_rwnd) {
+		win_probe_recovery = 1;
+	}
+	/* Now assure a timer where data is queued at */
+again:
+	j = 0;
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		int to_ticks;
+
+		if (win_probe_recovery && (net->window_probe)) {
+			win_probe_recovered = 1;
+			/*
+			 * Find first chunk that was used with window probe
+			 * and clear the sent
+			 */
+			/* sa_ignore FREED_MEMORY */
+			TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) {
+				if (tp1->window_probe) {
+					/* move back to data send queue */
+					sctp_window_probe_recovery(stcb, asoc, net, tp1);
+					break;
+				}
+			}
+		}
+		if (net->RTO == 0) {
+			to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+		} else {
+			to_ticks = MSEC_TO_TICKS(net->RTO);
+		}
+		if (net->flight_size) {
+			j++;
+			(void)SCTP_OS_TIMER_START(&net->rxt_timer.timer, to_ticks,
+			    sctp_timeout_handler, &net->rxt_timer);
+			if (net->window_probe) {
+				net->window_probe = 0;
+			}
+		} else {
+			if (net->window_probe) {
+				/*
+				 * In window probes we must assure a timer
+				 * is still running there
+				 */
+				net->window_probe = 0;
+				if (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+					SCTP_OS_TIMER_START(&net->rxt_timer.timer, to_ticks,
+					    sctp_timeout_handler, &net->rxt_timer);
+				}
+			} else if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+				sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+				    stcb, net,
+				    SCTP_FROM_SCTP_INDATA + SCTP_LOC_22);
+			}
+			if (SCTP_BASE_SYSCTL(sctp_early_fr)) {
+				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+					SCTP_STAT_INCR(sctps_earlyfrstpidsck4);
+					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_23);
+				}
+			}
+		}
+	}
+	if ((j == 0) &&
+	    (!TAILQ_EMPTY(&asoc->sent_queue)) &&
+	    (asoc->sent_queue_retran_cnt == 0) &&
+	    (win_probe_recovered == 0) &&
+	    (done_once == 0)) {
+		/*
+		 * huh, this should not happen unless all packets are
+		 * PR-SCTP and marked to skip of course.
+		 */
+		if (sctp_fs_audit(asoc)) {
+			TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+				net->flight_size = 0;
+			}
+			asoc->total_flight = 0;
+			asoc->total_flight_count = 0;
+			asoc->sent_queue_retran_cnt = 0;
+			TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) {
+				if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+					sctp_flight_size_increase(tp1);
+					sctp_total_flight_increase(stcb, tp1);
+				} else if (tp1->sent == SCTP_DATAGRAM_RESEND) {
+					sctp_ucount_incr(asoc->sent_queue_retran_cnt);
+				}
+			}
+		}
+		done_once = 1;
+		goto again;
+	}
+	/**********************************/
+	/* Now what about shutdown issues */
+	/**********************************/
+	if (TAILQ_EMPTY(&asoc->send_queue) && TAILQ_EMPTY(&asoc->sent_queue)) {
+		/* nothing left on sendqueue.. consider done */
+		/* clean up */
+		if ((asoc->stream_queue_cnt == 1) &&
+		    ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
+		    (asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED)) &&
+		    (asoc->locked_on_sending)
+		    ) {
+			struct sctp_stream_queue_pending *sp;
+
+			/*
+			 * I may be in a state where we got all across.. but
+			 * cannot write more due to a shutdown... we abort
+			 * since the user did not indicate EOR in this case.
+			 * The sp will be cleaned during free of the asoc.
+			 */
+			sp = TAILQ_LAST(&((asoc->locked_on_sending)->outqueue),
+			    sctp_streamhead);
+			if ((sp) && (sp->length == 0)) {
+				/* Let cleanup code purge it */
+				if (sp->msg_is_complete) {
+					asoc->stream_queue_cnt--;
+				} else {
+					asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+					asoc->locked_on_sending = NULL;
+					asoc->stream_queue_cnt--;
+				}
+			}
+		}
+		if ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) &&
+		    (asoc->stream_queue_cnt == 0)) {
+			if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) {
+				/* Need to abort here */
+				struct mbuf *oper;
+
+		abort_out_now:
+				*abort_now = 1;
+				/* XXX */
+				oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+				    0, M_DONTWAIT, 1, MT_DATA);
+				if (oper) {
+					struct sctp_paramhdr *ph;
+					uint32_t *ippp;
+
+					SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+					    sizeof(uint32_t);
+					ph = mtod(oper, struct sctp_paramhdr *);
+					ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
+					ph->param_length = htons(SCTP_BUF_LEN(oper));
+					ippp = (uint32_t *) (ph + 1);
+					*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_24);
+				}
+				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_24;
+				sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_RESPONSE_TO_USER_REQ, oper, SCTP_SO_NOT_LOCKED);
+			} else {
+				if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+				    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+					SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+				}
+				SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+				SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+				sctp_stop_timers_for_shutdown(stcb);
+				sctp_send_shutdown(stcb,
+				    stcb->asoc.primary_destination);
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
+				    stcb->sctp_ep, stcb, asoc->primary_destination);
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+				    stcb->sctp_ep, stcb, asoc->primary_destination);
+			}
+		} else if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) &&
+		    (asoc->stream_queue_cnt == 0)) {
+			if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) {
+				goto abort_out_now;
+			}
+			SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+			SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
+			SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+			sctp_send_shutdown_ack(stcb,
+			    stcb->asoc.primary_destination);
+			sctp_stop_timers_for_shutdown(stcb);
+			sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK,
+			    stcb->sctp_ep, stcb, asoc->primary_destination);
+		}
+	}
+	/*********************************************/
+	/* Here we perform PR-SCTP procedures        */
+	/* (section 4.2)                             */
+	/*********************************************/
+	/* C1. update advancedPeerAckPoint */
+	if (compare_with_wrap(cumack, asoc->advanced_peer_ack_point, MAX_TSN)) {
+		asoc->advanced_peer_ack_point = cumack;
+	}
+	/* PR-Sctp issues need to be addressed too */
+	if ((asoc->peer_supports_prsctp) && (asoc->pr_sctp_cnt > 0)) {
+		struct sctp_tmit_chunk *lchk;
+		uint32_t old_adv_peer_ack_point;
+
+		old_adv_peer_ack_point = asoc->advanced_peer_ack_point;
+		lchk = sctp_try_advance_peer_ack_point(stcb, asoc);
+		/* C3. See if we need to send a Fwd-TSN */
+		if (compare_with_wrap(asoc->advanced_peer_ack_point, cumack,
+		    MAX_TSN)) {
+			/*
+			 * ISSUE with ECN, see FWD-TSN processing for notes
+			 * on issues that will occur when the ECN NONCE
+			 * stuff is put into SCTP for cross checking.
+			 */
+			if (compare_with_wrap(asoc->advanced_peer_ack_point, old_adv_peer_ack_point,
+			    MAX_TSN)) {
+				send_forward_tsn(stcb, asoc);
+				/*
+				 * ECN Nonce: Disable Nonce Sum check when
+				 * FWD TSN is sent and store resync tsn
+				 */
+				asoc->nonce_sum_check = 0;
+				asoc->nonce_resync_tsn = asoc->advanced_peer_ack_point;
+			} else if (lchk) {
+				/* try to FR fwd-tsn's that get lost too */
+				if (lchk->rec.data.fwd_tsn_cnt >= 3) {
+					send_forward_tsn(stcb, asoc);
+				}
+			}
+		}
+		if (lchk) {
+			/* Assure a timer is up */
+			sctp_timer_start(SCTP_TIMER_TYPE_SEND,
+			    stcb->sctp_ep, stcb, lchk->whoTo);
+		}
+	}
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SACK_RWND_LOGGING_ENABLE) {
+		sctp_misc_ints(SCTP_SACK_RWND_UPDATE,
+		    rwnd,
+		    stcb->asoc.peers_rwnd,
+		    stcb->asoc.total_flight,
+		    stcb->asoc.total_output_queue_size);
+	}
+}
+
+void
+sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup,
+    struct sctp_tcb *stcb, struct sctp_nets *net_from,
+    uint16_t num_seg, uint16_t num_nr_seg, uint16_t num_dup,
+    int *abort_now, uint8_t flags,
+    uint32_t cum_ack, uint32_t rwnd)
+{
+	struct sctp_association *asoc;
+	struct sctp_tmit_chunk *tp1, *tp2;
+	uint32_t last_tsn, biggest_tsn_acked, biggest_tsn_newly_acked, this_sack_lowest_newack;
+	uint32_t sav_cum_ack;
+	uint16_t wake_him = 0;
+	uint32_t send_s = 0;
+	long j;
+	int accum_moved = 0;
+	int will_exit_fast_recovery = 0;
+	uint32_t a_rwnd, old_rwnd;
+	int win_probe_recovery = 0;
+	int win_probe_recovered = 0;
+	struct sctp_nets *net = NULL;
+	int nonce_sum_flag, ecn_seg_sums = 0;
+	int done_once;
+	uint8_t reneged_all = 0;
+	uint8_t cmt_dac_flag;
+
+	/*
+	 * we take any chance we can to service our queues since we cannot
+	 * get awoken when the socket is read from :<
+	 */
+	/*
+	 * Now perform the actual SACK handling: 1) Verify that it is not an
+	 * old sack, if so discard. 2) If there is nothing left in the send
+	 * queue (cum-ack is equal to last acked) then you have a duplicate
+	 * too, update any rwnd change and verify no timers are running.
+	 * then return. 3) Process any new consequtive data i.e. cum-ack
+	 * moved process these first and note that it moved. 4) Process any
+	 * sack blocks. 5) Drop any acked from the queue. 6) Check for any
+	 * revoked blocks and mark. 7) Update the cwnd. 8) Nothing left,
+	 * sync up flightsizes and things, stop all timers and also check
+	 * for shutdown_pending state. If so then go ahead and send off the
+	 * shutdown. If in shutdown recv, send off the shutdown-ack and
+	 * start that timer, Ret. 9) Strike any non-acked things and do FR
+	 * procedure if needed being sure to set the FR flag. 10) Do pr-sctp
+	 * procedures. 11) Apply any FR penalties. 12) Assure we will SACK
+	 * if in shutdown_recv state.
+	 */
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	/* CMT DAC algo */
+	this_sack_lowest_newack = 0;
+	j = 0;
+	SCTP_STAT_INCR(sctps_slowpath_sack);
+	last_tsn = cum_ack;
+	nonce_sum_flag = flags & SCTP_SACK_NONCE_SUM;
+	cmt_dac_flag = flags & SCTP_SACK_CMT_DAC;
+#ifdef SCTP_ASOCLOG_OF_TSNS
+	stcb->asoc.cumack_log[stcb->asoc.cumack_log_at] = cum_ack;
+	stcb->asoc.cumack_log_at++;
+	if (stcb->asoc.cumack_log_at > SCTP_TSN_LOG_SIZE) {
+		stcb->asoc.cumack_log_at = 0;
+	}
+#endif
+	a_rwnd = rwnd;
+
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_SACK_ARRIVALS_ENABLE) {
+		sctp_misc_ints(SCTP_SACK_LOG_NORMAL, cum_ack,
+		    rwnd, stcb->asoc.last_acked_seq, stcb->asoc.peers_rwnd);
+	}
+	old_rwnd = stcb->asoc.peers_rwnd;
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
+		sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+		    stcb->asoc.overall_error_count,
+		    0,
+		    SCTP_FROM_SCTP_INDATA,
+		    __LINE__);
+	}
+	stcb->asoc.overall_error_count = 0;
+	asoc = &stcb->asoc;
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SACK_LOGGING_ENABLE) {
+		sctp_log_sack(asoc->last_acked_seq,
+		    cum_ack,
+		    0,
+		    num_seg,
+		    num_dup,
+		    SCTP_LOG_NEW_SACK);
+	}
+	if ((num_dup) && (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_FR_LOGGING_ENABLE | SCTP_EARLYFR_LOGGING_ENABLE))) {
+		uint16_t i;
+		uint32_t *dupdata, dblock;
+
+		for (i = 0; i < num_dup; i++) {
+			dupdata = (uint32_t *) sctp_m_getptr(m, offset_dup + i * sizeof(uint32_t),
+			    sizeof(uint32_t), (uint8_t *) & dblock);
+			if (dupdata == NULL) {
+				break;
+			}
+			sctp_log_fr(*dupdata, 0, 0, SCTP_FR_DUPED);
+		}
+	}
+	if (SCTP_BASE_SYSCTL(sctp_strict_sacks)) {
+		/* reality check */
+		if (!TAILQ_EMPTY(&asoc->sent_queue)) {
+			tp1 = TAILQ_LAST(&asoc->sent_queue,
+			    sctpchunk_listhead);
+			send_s = tp1->rec.data.TSN_seq + 1;
+		} else {
+			tp1 = NULL;
+			send_s = asoc->sending_seq;
+		}
+		if (cum_ack == send_s ||
+		    compare_with_wrap(cum_ack, send_s, MAX_TSN)) {
+			struct mbuf *oper;
+
+			/*
+			 * no way, we have not even sent this TSN out yet.
+			 * Peer is hopelessly messed up with us.
+			 */
+			printf("NEW cum_ack:%x send_s:%x is smaller or equal\n",
+			    cum_ack, send_s);
+			if (tp1) {
+				printf("Got send_s from tsn:%x + 1 of tp1:%p\n",
+				    tp1->rec.data.TSN_seq, tp1);
+			}
+	hopeless_peer:
+			*abort_now = 1;
+			/* XXX */
+			oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+			    0, M_DONTWAIT, 1, MT_DATA);
+			if (oper) {
+				struct sctp_paramhdr *ph;
+				uint32_t *ippp;
+
+				SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+				    sizeof(uint32_t);
+				ph = mtod(oper, struct sctp_paramhdr *);
+				ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+				ph->param_length = htons(SCTP_BUF_LEN(oper));
+				ippp = (uint32_t *) (ph + 1);
+				*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_25);
+			}
+			stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_25;
+			sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+			return;
+		}
+	}
+	/**********************/
+	/* 1) check the range */
+	/**********************/
+	if (compare_with_wrap(asoc->last_acked_seq, last_tsn, MAX_TSN)) {
+		/* acking something behind */
+		return;
+	}
+	sav_cum_ack = asoc->last_acked_seq;
+
+	/* update the Rwnd of the peer */
+	if (TAILQ_EMPTY(&asoc->sent_queue) &&
+	    TAILQ_EMPTY(&asoc->send_queue) &&
+	    (asoc->stream_queue_cnt == 0)) {
+		/* nothing left on send/sent and strmq */
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_RWND_ENABLE) {
+			sctp_log_rwnd_set(SCTP_SET_PEER_RWND_VIA_SACK,
+			    asoc->peers_rwnd, 0, 0, a_rwnd);
+		}
+		asoc->peers_rwnd = a_rwnd;
+		if (asoc->sent_queue_retran_cnt) {
+			asoc->sent_queue_retran_cnt = 0;
+		}
+		if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+			/* SWS sender side engages */
+			asoc->peers_rwnd = 0;
+		}
+		/* stop any timers */
+		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+			sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+			    stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_26);
+			if (SCTP_BASE_SYSCTL(sctp_early_fr)) {
+				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+					SCTP_STAT_INCR(sctps_earlyfrstpidsck1);
+					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_26);
+				}
+			}
+			net->partial_bytes_acked = 0;
+			net->flight_size = 0;
+		}
+		asoc->total_flight = 0;
+		asoc->total_flight_count = 0;
+		return;
+	}
+	/*
+	 * We init netAckSz and netAckSz2 to 0. These are used to track 2
+	 * things. The total byte count acked is tracked in netAckSz AND
+	 * netAck2 is used to track the total bytes acked that are un-
+	 * amibguious and were never retransmitted. We track these on a per
+	 * destination address basis.
+	 */
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		net->prev_cwnd = net->cwnd;
+		net->net_ack = 0;
+		net->net_ack2 = 0;
+
+		/*
+		 * CMT: Reset CUC and Fast recovery algo variables before
+		 * SACK processing
+		 */
+		net->new_pseudo_cumack = 0;
+		net->will_exit_fast_recovery = 0;
+	}
+	/* process the new consecutive TSN first */
+	tp1 = TAILQ_FIRST(&asoc->sent_queue);
+	while (tp1) {
+		if (compare_with_wrap(last_tsn, tp1->rec.data.TSN_seq,
+		    MAX_TSN) ||
+		    last_tsn == tp1->rec.data.TSN_seq) {
+			if (tp1->sent != SCTP_DATAGRAM_UNSENT) {
+				/*
+				 * ECN Nonce: Add the nonce to the sender's
+				 * nonce sum
+				 */
+				asoc->nonce_sum_expect_base += tp1->rec.data.ect_nonce;
+				accum_moved = 1;
+				if (tp1->sent < SCTP_DATAGRAM_ACKED) {
+					/*
+					 * If it is less than ACKED, it is
+					 * now no-longer in flight. Higher
+					 * values may occur during marking
+					 */
+					if ((tp1->whoTo->dest_state &
+					    SCTP_ADDR_UNCONFIRMED) &&
+					    (tp1->snd_count < 2)) {
+						/*
+						 * If there was no retran
+						 * and the address is
+						 * un-confirmed and we sent
+						 * there and are now
+						 * sacked.. its confirmed,
+						 * mark it so.
+						 */
+						tp1->whoTo->dest_state &=
+						    ~SCTP_ADDR_UNCONFIRMED;
+					}
+					if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+						if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
+							sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_CA,
+							    tp1->whoTo->flight_size,
+							    tp1->book_size,
+							    (uintptr_t) tp1->whoTo,
+							    tp1->rec.data.TSN_seq);
+						}
+						sctp_flight_size_decrease(tp1);
+						sctp_total_flight_decrease(stcb, tp1);
+					}
+					tp1->whoTo->net_ack += tp1->send_size;
+
+					/* CMT SFR and DAC algos */
+					this_sack_lowest_newack = tp1->rec.data.TSN_seq;
+					tp1->whoTo->saw_newack = 1;
+
+					if (tp1->snd_count < 2) {
+						/*
+						 * True non-retransmited
+						 * chunk
+						 */
+						tp1->whoTo->net_ack2 +=
+						    tp1->send_size;
+
+						/* update RTO too? */
+						if (tp1->do_rtt) {
+							tp1->whoTo->RTO =
+							    sctp_calculate_rto(stcb,
+							    asoc, tp1->whoTo,
+							    &tp1->sent_rcv_time,
+							    sctp_align_safe_nocopy);
+							tp1->do_rtt = 0;
+						}
+					}
+					/*
+					 * CMT: CUCv2 algorithm. From the
+					 * cumack'd TSNs, for each TSN being
+					 * acked for the first time, set the
+					 * following variables for the
+					 * corresp destination.
+					 * new_pseudo_cumack will trigger a
+					 * cwnd update.
+					 * find_(rtx_)pseudo_cumack will
+					 * trigger search for the next
+					 * expected (rtx-)pseudo-cumack.
+					 */
+					tp1->whoTo->new_pseudo_cumack = 1;
+					tp1->whoTo->find_pseudo_cumack = 1;
+					tp1->whoTo->find_rtx_pseudo_cumack = 1;
+
+
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SACK_LOGGING_ENABLE) {
+						sctp_log_sack(asoc->last_acked_seq,
+						    cum_ack,
+						    tp1->rec.data.TSN_seq,
+						    0,
+						    0,
+						    SCTP_LOG_TSN_ACKED);
+					}
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+						sctp_log_cwnd(stcb, tp1->whoTo, tp1->rec.data.TSN_seq, SCTP_CWND_LOG_FROM_SACK);
+					}
+				}
+				if (tp1->sent == SCTP_DATAGRAM_RESEND) {
+					sctp_ucount_decr(asoc->sent_queue_retran_cnt);
+#ifdef SCTP_AUDITING_ENABLED
+					sctp_audit_log(0xB3,
+					    (asoc->sent_queue_retran_cnt & 0x000000ff));
+#endif
+				}
+				if (tp1->rec.data.chunk_was_revoked) {
+					/* deflate the cwnd */
+					tp1->whoTo->cwnd -= tp1->book_size;
+					tp1->rec.data.chunk_was_revoked = 0;
+				}
+				tp1->sent = SCTP_DATAGRAM_ACKED;
+			}
+		} else {
+			break;
+		}
+		tp1 = TAILQ_NEXT(tp1, sctp_next);
+	}
+	biggest_tsn_newly_acked = biggest_tsn_acked = last_tsn;
+	/* always set this up to cum-ack */
+	asoc->this_sack_highest_gap = last_tsn;
+
+	if ((num_seg > 0) || (num_nr_seg > 0)) {
+
+		/*
+		 * CMT: SFR algo (and HTNA) - this_sack_highest_newack has
+		 * to be greater than the cumack. Also reset saw_newack to 0
+		 * for all dests.
+		 */
+		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+			net->saw_newack = 0;
+			net->this_sack_highest_newack = last_tsn;
+		}
+
+		/*
+		 * thisSackHighestGap will increase while handling NEW
+		 * segments this_sack_highest_newack will increase while
+		 * handling NEWLY ACKED chunks. this_sack_lowest_newack is
+		 * used for CMT DAC algo. saw_newack will also change.
+		 */
+		if (sctp_handle_segments(m, &offset_seg, stcb, asoc, last_tsn, &biggest_tsn_acked,
+		    &biggest_tsn_newly_acked, &this_sack_lowest_newack,
+		    num_seg, num_nr_seg, &ecn_seg_sums)) {
+			wake_him++;
+		}
+		if (SCTP_BASE_SYSCTL(sctp_strict_sacks)) {
+			/*
+			 * validate the biggest_tsn_acked in the gap acks if
+			 * strict adherence is wanted.
+			 */
+			if ((biggest_tsn_acked == send_s) ||
+			    (compare_with_wrap(biggest_tsn_acked, send_s, MAX_TSN))) {
+				/*
+				 * peer is either confused or we are under
+				 * attack. We must abort.
+				 */
+				printf("Hopeless peer! biggest_tsn_acked:%x largest seq:%x\n",
+				    biggest_tsn_acked,
+				    send_s);
+
+				goto hopeless_peer;
+			}
+		}
+	}
+	/*******************************************/
+	/* cancel ALL T3-send timer if accum moved */
+	/*******************************************/
+	if (asoc->sctp_cmt_on_off == 1) {
+		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+			if (net->new_pseudo_cumack)
+				sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+				    stcb, net,
+				    SCTP_FROM_SCTP_INDATA + SCTP_LOC_27);
+
+		}
+	} else {
+		if (accum_moved) {
+			TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+				sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+				    stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_28);
+			}
+		}
+	}
+	/********************************************/
+	/* drop the acked chunks from the sentqueue */
+	/********************************************/
+	asoc->last_acked_seq = cum_ack;
+
+	tp1 = TAILQ_FIRST(&asoc->sent_queue);
+	if (tp1 == NULL)
+		goto done_with_it;
+	do {
+		if (compare_with_wrap(tp1->rec.data.TSN_seq, cum_ack,
+		    MAX_TSN)) {
+			break;
+		}
+		if (tp1->sent == SCTP_DATAGRAM_UNSENT) {
+			/* no more sent on list */
+			printf("Warning, tp1->sent == %d and its now acked?\n",
+			    tp1->sent);
+		}
+		tp2 = TAILQ_NEXT(tp1, sctp_next);
+		TAILQ_REMOVE(&asoc->sent_queue, tp1, sctp_next);
+		if (tp1->pr_sctp_on) {
+			if (asoc->pr_sctp_cnt != 0)
+				asoc->pr_sctp_cnt--;
+		}
+		if (TAILQ_EMPTY(&asoc->sent_queue) &&
+		    (asoc->total_flight > 0)) {
+#ifdef INVARIANTS
+			panic("Warning flight size is postive and should be 0");
+#else
+			SCTP_PRINTF("Warning flight size incorrect should be 0 is %d\n",
+			    asoc->total_flight);
+#endif
+			asoc->total_flight = 0;
+		}
+		if (tp1->data) {
+			/* sa_ignore NO_NULL_CHK */
+			sctp_free_bufspace(stcb, asoc, tp1, 1);
+			sctp_m_freem(tp1->data);
+			if (asoc->peer_supports_prsctp && PR_SCTP_BUF_ENABLED(tp1->flags)) {
+				asoc->sent_queue_cnt_removeable--;
+			}
+		}
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SACK_LOGGING_ENABLE) {
+			sctp_log_sack(asoc->last_acked_seq,
+			    cum_ack,
+			    tp1->rec.data.TSN_seq,
+			    0,
+			    0,
+			    SCTP_LOG_FREE_SENT);
+		}
+		tp1->data = NULL;
+		asoc->sent_queue_cnt--;
+		sctp_free_a_chunk(stcb, tp1);
+		wake_him++;
+		tp1 = tp2;
+	} while (tp1 != NULL);
+
+done_with_it:
+	/* sa_ignore NO_NULL_CHK */
+	if ((wake_him) && (stcb->sctp_socket)) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		struct socket *so;
+
+#endif
+		SOCKBUF_LOCK(&stcb->sctp_socket->so_snd);
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_WAKE_LOGGING_ENABLE) {
+			sctp_wakeup_log(stcb, cum_ack, wake_him, SCTP_WAKESND_FROM_SACK);
+		}
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		so = SCTP_INP_SO(stcb->sctp_ep);
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+			/* assoc was freed while we were unlocked */
+			SCTP_SOCKET_UNLOCK(so, 1);
+			return;
+		}
+#endif
+		sctp_sowwakeup_locked(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+	} else {
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_WAKE_LOGGING_ENABLE) {
+			sctp_wakeup_log(stcb, cum_ack, wake_him, SCTP_NOWAKE_FROM_SACK);
+		}
+	}
+
+	if (asoc->fast_retran_loss_recovery && accum_moved) {
+		if (compare_with_wrap(asoc->last_acked_seq,
+		    asoc->fast_recovery_tsn, MAX_TSN) ||
+		    asoc->last_acked_seq == asoc->fast_recovery_tsn) {
+			/* Setup so we will exit RFC2582 fast recovery */
+			will_exit_fast_recovery = 1;
+		}
+	}
+	/*
+	 * Check for revoked fragments:
+	 * 
+	 * if Previous sack - Had no frags then we can't have any revoked if
+	 * Previous sack - Had frag's then - If we now have frags aka
+	 * num_seg > 0 call sctp_check_for_revoked() to tell if peer revoked
+	 * some of them. else - The peer revoked all ACKED fragments, since
+	 * we had some before and now we have NONE.
+	 */
+
+	if (num_seg) {
+		sctp_check_for_revoked(stcb, asoc, cum_ack, biggest_tsn_acked);
+		asoc->saw_sack_with_frags = 1;
+	} else if (asoc->saw_sack_with_frags) {
+		int cnt_revoked = 0;
+
+		tp1 = TAILQ_FIRST(&asoc->sent_queue);
+		if (tp1 != NULL) {
+			/* Peer revoked all dg's marked or acked */
+			TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) {
+				if (tp1->sent == SCTP_DATAGRAM_ACKED) {
+					tp1->sent = SCTP_DATAGRAM_SENT;
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
+						sctp_misc_ints(SCTP_FLIGHT_LOG_UP_REVOKE,
+						    tp1->whoTo->flight_size,
+						    tp1->book_size,
+						    (uintptr_t) tp1->whoTo,
+						    tp1->rec.data.TSN_seq);
+					}
+					sctp_flight_size_increase(tp1);
+					sctp_total_flight_increase(stcb, tp1);
+					tp1->rec.data.chunk_was_revoked = 1;
+					/*
+					 * To ensure that this increase in
+					 * flightsize, which is artificial,
+					 * does not throttle the sender, we
+					 * also increase the cwnd
+					 * artificially.
+					 */
+					tp1->whoTo->cwnd += tp1->book_size;
+					cnt_revoked++;
+				}
+			}
+			if (cnt_revoked) {
+				reneged_all = 1;
+			}
+		}
+		asoc->saw_sack_with_frags = 0;
+	}
+	if (num_nr_seg > 0)
+		asoc->saw_sack_with_nr_frags = 1;
+	else
+		asoc->saw_sack_with_nr_frags = 0;
+
+	/* JRS - Use the congestion control given in the CC module */
+	asoc->cc_functions.sctp_cwnd_update_after_sack(stcb, asoc, accum_moved, reneged_all, will_exit_fast_recovery);
+
+	if (TAILQ_EMPTY(&asoc->sent_queue)) {
+		/* nothing left in-flight */
+		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+			/* stop all timers */
+			if (SCTP_BASE_SYSCTL(sctp_early_fr)) {
+				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+					SCTP_STAT_INCR(sctps_earlyfrstpidsck4);
+					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_29);
+				}
+			}
+			sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+			    stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_30);
+			net->flight_size = 0;
+			net->partial_bytes_acked = 0;
+		}
+		asoc->total_flight = 0;
+		asoc->total_flight_count = 0;
+	}
+	/**********************************/
+	/* Now what about shutdown issues */
+	/**********************************/
+	if (TAILQ_EMPTY(&asoc->send_queue) && TAILQ_EMPTY(&asoc->sent_queue)) {
+		/* nothing left on sendqueue.. consider done */
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_RWND_ENABLE) {
+			sctp_log_rwnd_set(SCTP_SET_PEER_RWND_VIA_SACK,
+			    asoc->peers_rwnd, 0, 0, a_rwnd);
+		}
+		asoc->peers_rwnd = a_rwnd;
+		if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+			/* SWS sender side engages */
+			asoc->peers_rwnd = 0;
+		}
+		/* clean up */
+		if ((asoc->stream_queue_cnt == 1) &&
+		    ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
+		    (asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED)) &&
+		    (asoc->locked_on_sending)
+		    ) {
+			struct sctp_stream_queue_pending *sp;
+
+			/*
+			 * I may be in a state where we got all across.. but
+			 * cannot write more due to a shutdown... we abort
+			 * since the user did not indicate EOR in this case.
+			 */
+			sp = TAILQ_LAST(&((asoc->locked_on_sending)->outqueue),
+			    sctp_streamhead);
+			if ((sp) && (sp->length == 0)) {
+				asoc->locked_on_sending = NULL;
+				if (sp->msg_is_complete) {
+					asoc->stream_queue_cnt--;
+				} else {
+					asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+					asoc->stream_queue_cnt--;
+				}
+			}
+		}
+		if ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) &&
+		    (asoc->stream_queue_cnt == 0)) {
+			if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) {
+				/* Need to abort here */
+				struct mbuf *oper;
+
+		abort_out_now:
+				*abort_now = 1;
+				/* XXX */
+				oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+				    0, M_DONTWAIT, 1, MT_DATA);
+				if (oper) {
+					struct sctp_paramhdr *ph;
+					uint32_t *ippp;
+
+					SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+					    sizeof(uint32_t);
+					ph = mtod(oper, struct sctp_paramhdr *);
+					ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
+					ph->param_length = htons(SCTP_BUF_LEN(oper));
+					ippp = (uint32_t *) (ph + 1);
+					*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_31);
+				}
+				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_31;
+				sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_RESPONSE_TO_USER_REQ, oper, SCTP_SO_NOT_LOCKED);
+				return;
+			} else {
+				if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+				    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+					SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+				}
+				SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+				SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+				sctp_stop_timers_for_shutdown(stcb);
+				sctp_send_shutdown(stcb,
+				    stcb->asoc.primary_destination);
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
+				    stcb->sctp_ep, stcb, asoc->primary_destination);
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+				    stcb->sctp_ep, stcb, asoc->primary_destination);
+			}
+			return;
+		} else if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) &&
+		    (asoc->stream_queue_cnt == 0)) {
+			if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) {
+				goto abort_out_now;
+			}
+			SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+			SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
+			SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+			sctp_send_shutdown_ack(stcb,
+			    stcb->asoc.primary_destination);
+			sctp_stop_timers_for_shutdown(stcb);
+			sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK,
+			    stcb->sctp_ep, stcb, asoc->primary_destination);
+			return;
+		}
+	}
+	/*
+	 * Now here we are going to recycle net_ack for a different use...
+	 * HEADS UP.
+	 */
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		net->net_ack = 0;
+	}
+
+	/*
+	 * CMT DAC algorithm: If SACK DAC flag was 0, then no extra marking
+	 * to be done. Setting this_sack_lowest_newack to the cum_ack will
+	 * automatically ensure that.
+	 */
+	if ((asoc->sctp_cmt_on_off == 1) &&
+	    SCTP_BASE_SYSCTL(sctp_cmt_use_dac) &&
+	    (cmt_dac_flag == 0)) {
+		this_sack_lowest_newack = cum_ack;
+	}
+	if ((num_seg > 0) || (num_nr_seg > 0)) {
+		sctp_strike_gap_ack_chunks(stcb, asoc, biggest_tsn_acked,
+		    biggest_tsn_newly_acked, this_sack_lowest_newack, accum_moved);
+	}
+	/* JRS - Use the congestion control given in the CC module */
+	asoc->cc_functions.sctp_cwnd_update_after_fr(stcb, asoc);
+
+	/******************************************************************
+	 *  Here we do the stuff with ECN Nonce checking.
+	 *  We basically check to see if the nonce sum flag was incorrect
+	 *  or if resynchronization needs to be done. Also if we catch a
+	 *  misbehaving receiver we give him the kick.
+	 ******************************************************************/
+
+	if (asoc->ecn_nonce_allowed) {
+		if (asoc->nonce_sum_check) {
+			if (nonce_sum_flag != ((asoc->nonce_sum_expect_base + ecn_seg_sums) & SCTP_SACK_NONCE_SUM)) {
+				if (asoc->nonce_wait_for_ecne == 0) {
+					struct sctp_tmit_chunk *lchk;
+
+					lchk = TAILQ_FIRST(&asoc->send_queue);
+					asoc->nonce_wait_for_ecne = 1;
+					if (lchk) {
+						asoc->nonce_wait_tsn = lchk->rec.data.TSN_seq;
+					} else {
+						asoc->nonce_wait_tsn = asoc->sending_seq;
+					}
+				} else {
+					if (compare_with_wrap(asoc->last_acked_seq, asoc->nonce_wait_tsn, MAX_TSN) ||
+					    (asoc->last_acked_seq == asoc->nonce_wait_tsn)) {
+						/*
+						 * Misbehaving peer. We need
+						 * to react to this guy
+						 */
+						asoc->ecn_allowed = 0;
+						asoc->ecn_nonce_allowed = 0;
+					}
+				}
+			}
+		} else {
+			/* See if Resynchronization Possible */
+			if (compare_with_wrap(asoc->last_acked_seq, asoc->nonce_resync_tsn, MAX_TSN)) {
+				asoc->nonce_sum_check = 1;
+				/*
+				 * now we must calculate what the base is.
+				 * We do this based on two things, we know
+				 * the total's for all the segments
+				 * gap-acked in the SACK, its stored in
+				 * ecn_seg_sums. We also know the SACK's
+				 * nonce sum, its in nonce_sum_flag. So we
+				 * can build a truth table to back-calculate
+				 * the new value of
+				 * asoc->nonce_sum_expect_base:
+				 * 
+				 * SACK-flag-Value         Seg-Sums Base 0 0 0
+				 * 1                    0 1 0 1 1 1
+				 * 1 0
+				 */
+				asoc->nonce_sum_expect_base = (ecn_seg_sums ^ nonce_sum_flag) & SCTP_SACK_NONCE_SUM;
+			}
+		}
+	}
+	/* Now are we exiting loss recovery ? */
+	if (will_exit_fast_recovery) {
+		/* Ok, we must exit fast recovery */
+		asoc->fast_retran_loss_recovery = 0;
+	}
+	if ((asoc->sat_t3_loss_recovery) &&
+	    ((compare_with_wrap(asoc->last_acked_seq, asoc->sat_t3_recovery_tsn,
+	    MAX_TSN) ||
+	    (asoc->last_acked_seq == asoc->sat_t3_recovery_tsn)))) {
+		/* end satellite t3 loss recovery */
+		asoc->sat_t3_loss_recovery = 0;
+	}
+	/*
+	 * CMT Fast recovery
+	 */
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		if (net->will_exit_fast_recovery) {
+			/* Ok, we must exit fast recovery */
+			net->fast_retran_loss_recovery = 0;
+		}
+	}
+
+	/* Adjust and set the new rwnd value */
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_RWND_ENABLE) {
+		sctp_log_rwnd_set(SCTP_SET_PEER_RWND_VIA_SACK,
+		    asoc->peers_rwnd, asoc->total_flight, (asoc->total_flight_count * SCTP_BASE_SYSCTL(sctp_peer_chunk_oh)), a_rwnd);
+	}
+	asoc->peers_rwnd = sctp_sbspace_sub(a_rwnd,
+	    (uint32_t) (asoc->total_flight + (asoc->total_flight_count * SCTP_BASE_SYSCTL(sctp_peer_chunk_oh))));
+	if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+		/* SWS sender side engages */
+		asoc->peers_rwnd = 0;
+	}
+	if (asoc->peers_rwnd > old_rwnd) {
+		win_probe_recovery = 1;
+	}
+	/*
+	 * Now we must setup so we have a timer up for anyone with
+	 * outstanding data.
+	 */
+	done_once = 0;
+again:
+	j = 0;
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		if (win_probe_recovery && (net->window_probe)) {
+			win_probe_recovered = 1;
+			/*-
+			 * Find first chunk that was used with
+			 * window probe and clear the event. Put
+			 * it back into the send queue as if has
+			 * not been sent.
+			 */
+			TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) {
+				if (tp1->window_probe) {
+					sctp_window_probe_recovery(stcb, asoc, net, tp1);
+					break;
+				}
+			}
+		}
+		if (net->flight_size) {
+			j++;
+			if (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
+				    stcb->sctp_ep, stcb, net);
+			}
+			if (net->window_probe) {
+				net->window_probe = 0;
+			}
+		} else {
+			if (net->window_probe) {
+				/*
+				 * In window probes we must assure a timer
+				 * is still running there
+				 */
+				if (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+					sctp_timer_start(SCTP_TIMER_TYPE_SEND,
+					    stcb->sctp_ep, stcb, net);
+
+				}
+			} else if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+				sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+				    stcb, net,
+				    SCTP_FROM_SCTP_INDATA + SCTP_LOC_22);
+			}
+			if (SCTP_BASE_SYSCTL(sctp_early_fr)) {
+				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+					SCTP_STAT_INCR(sctps_earlyfrstpidsck4);
+					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_23);
+				}
+			}
+		}
+	}
+	if ((j == 0) &&
+	    (!TAILQ_EMPTY(&asoc->sent_queue)) &&
+	    (asoc->sent_queue_retran_cnt == 0) &&
+	    (win_probe_recovered == 0) &&
+	    (done_once == 0)) {
+		/*
+		 * huh, this should not happen unless all packets are
+		 * PR-SCTP and marked to skip of course.
+		 */
+		if (sctp_fs_audit(asoc)) {
+			TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+				net->flight_size = 0;
+			}
+			asoc->total_flight = 0;
+			asoc->total_flight_count = 0;
+			asoc->sent_queue_retran_cnt = 0;
+			TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) {
+				if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+					sctp_flight_size_increase(tp1);
+					sctp_total_flight_increase(stcb, tp1);
+				} else if (tp1->sent == SCTP_DATAGRAM_RESEND) {
+					sctp_ucount_incr(asoc->sent_queue_retran_cnt);
+				}
+			}
+		}
+		done_once = 1;
+		goto again;
+	}
+	/*********************************************/
+	/* Here we perform PR-SCTP procedures        */
+	/* (section 4.2)                             */
+	/*********************************************/
+	/* C1. update advancedPeerAckPoint */
+	if (compare_with_wrap(cum_ack, asoc->advanced_peer_ack_point, MAX_TSN)) {
+		asoc->advanced_peer_ack_point = cum_ack;
+	}
+	/* C2. try to further move advancedPeerAckPoint ahead */
+	if ((asoc->peer_supports_prsctp) && (asoc->pr_sctp_cnt > 0)) {
+		struct sctp_tmit_chunk *lchk;
+		uint32_t old_adv_peer_ack_point;
+
+		old_adv_peer_ack_point = asoc->advanced_peer_ack_point;
+		lchk = sctp_try_advance_peer_ack_point(stcb, asoc);
+		/* C3. See if we need to send a Fwd-TSN */
+		if (compare_with_wrap(asoc->advanced_peer_ack_point, cum_ack,
+		    MAX_TSN)) {
+			/*
+			 * ISSUE with ECN, see FWD-TSN processing for notes
+			 * on issues that will occur when the ECN NONCE
+			 * stuff is put into SCTP for cross checking.
+			 */
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) {
+				sctp_misc_ints(SCTP_FWD_TSN_CHECK,
+				    0xee, cum_ack, asoc->advanced_peer_ack_point,
+				    old_adv_peer_ack_point);
+			}
+			if (compare_with_wrap(asoc->advanced_peer_ack_point, old_adv_peer_ack_point,
+			    MAX_TSN)) {
+
+				send_forward_tsn(stcb, asoc);
+				/*
+				 * ECN Nonce: Disable Nonce Sum check when
+				 * FWD TSN is sent and store resync tsn
+				 */
+				asoc->nonce_sum_check = 0;
+				asoc->nonce_resync_tsn = asoc->advanced_peer_ack_point;
+			} else if (lchk) {
+				/* try to FR fwd-tsn's that get lost too */
+				if (lchk->rec.data.fwd_tsn_cnt >= 3) {
+					send_forward_tsn(stcb, asoc);
+				}
+			}
+		}
+		if (lchk) {
+			/* Assure a timer is up */
+			sctp_timer_start(SCTP_TIMER_TYPE_SEND,
+			    stcb->sctp_ep, stcb, lchk->whoTo);
+		}
+	}
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SACK_RWND_LOGGING_ENABLE) {
+		sctp_misc_ints(SCTP_SACK_RWND_UPDATE,
+		    a_rwnd,
+		    stcb->asoc.peers_rwnd,
+		    stcb->asoc.total_flight,
+		    stcb->asoc.total_output_queue_size);
+	}
+}
+
+void
+sctp_update_acked(struct sctp_tcb *stcb, struct sctp_shutdown_chunk *cp,
+    struct sctp_nets *netp, int *abort_flag)
+{
+	/* Copy cum-ack */
+	uint32_t cum_ack, a_rwnd;
+
+	cum_ack = ntohl(cp->cumulative_tsn_ack);
+	/* Arrange so a_rwnd does NOT change */
+	a_rwnd = stcb->asoc.peers_rwnd + stcb->asoc.total_flight;
+
+	/* Now call the express sack handling */
+	sctp_express_handle_sack(stcb, cum_ack, a_rwnd, 0, abort_flag);
+}
+
+static void
+sctp_kick_prsctp_reorder_queue(struct sctp_tcb *stcb,
+    struct sctp_stream_in *strmin)
+{
+	struct sctp_queued_to_read *ctl, *nctl;
+	struct sctp_association *asoc;
+	uint16_t tt;
+
+	asoc = &stcb->asoc;
+	tt = strmin->last_sequence_delivered;
+	/*
+	 * First deliver anything prior to and including the stream no that
+	 * came in
+	 */
+	ctl = TAILQ_FIRST(&strmin->inqueue);
+	while (ctl) {
+		nctl = TAILQ_NEXT(ctl, next);
+		if (compare_with_wrap(tt, ctl->sinfo_ssn, MAX_SEQ) ||
+		    (tt == ctl->sinfo_ssn)) {
+			/* this is deliverable now */
+			TAILQ_REMOVE(&strmin->inqueue, ctl, next);
+			/* subtract pending on streams */
+			asoc->size_on_all_streams -= ctl->length;
+			sctp_ucount_decr(asoc->cnt_on_all_streams);
+			/* deliver it to at least the delivery-q */
+			if (stcb->sctp_socket) {
+				sctp_mark_non_revokable(asoc, ctl->sinfo_tsn);
+				sctp_add_to_readq(stcb->sctp_ep, stcb,
+				    ctl,
+				    &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_HELD, SCTP_SO_NOT_LOCKED);
+			}
+		} else {
+			/* no more delivery now. */
+			break;
+		}
+		ctl = nctl;
+	}
+	/*
+	 * now we must deliver things in queue the normal way  if any are
+	 * now ready.
+	 */
+	tt = strmin->last_sequence_delivered + 1;
+	ctl = TAILQ_FIRST(&strmin->inqueue);
+	while (ctl) {
+		nctl = TAILQ_NEXT(ctl, next);
+		if (tt == ctl->sinfo_ssn) {
+			/* this is deliverable now */
+			TAILQ_REMOVE(&strmin->inqueue, ctl, next);
+			/* subtract pending on streams */
+			asoc->size_on_all_streams -= ctl->length;
+			sctp_ucount_decr(asoc->cnt_on_all_streams);
+			/* deliver it to at least the delivery-q */
+			strmin->last_sequence_delivered = ctl->sinfo_ssn;
+			if (stcb->sctp_socket) {
+				sctp_mark_non_revokable(asoc, ctl->sinfo_tsn);
+				sctp_add_to_readq(stcb->sctp_ep, stcb,
+				    ctl,
+				    &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_HELD, SCTP_SO_NOT_LOCKED);
+
+			}
+			tt = strmin->last_sequence_delivered + 1;
+		} else {
+			break;
+		}
+		ctl = nctl;
+	}
+}
+
+static void
+sctp_flush_reassm_for_str_seq(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    uint16_t stream, uint16_t seq)
+{
+	struct sctp_tmit_chunk *chk, *at;
+
+	if (!TAILQ_EMPTY(&asoc->reasmqueue)) {
+		/* For each one on here see if we need to toss it */
+		/*
+		 * For now large messages held on the reasmqueue that are
+		 * complete will be tossed too. We could in theory do more
+		 * work to spin through and stop after dumping one msg aka
+		 * seeing the start of a new msg at the head, and call the
+		 * delivery function... to see if it can be delivered... But
+		 * for now we just dump everything on the queue.
+		 */
+		chk = TAILQ_FIRST(&asoc->reasmqueue);
+		while (chk) {
+			at = TAILQ_NEXT(chk, sctp_next);
+			/*
+			 * Do not toss it if on a different stream or marked
+			 * for unordered delivery in which case the stream
+			 * sequence number has no meaning.
+			 */
+			if ((chk->rec.data.stream_number != stream) ||
+			    ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == SCTP_DATA_UNORDERED)) {
+				chk = at;
+				continue;
+			}
+			if (chk->rec.data.stream_seq == seq) {
+				/* It needs to be tossed */
+				TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
+				if (compare_with_wrap(chk->rec.data.TSN_seq,
+				    asoc->tsn_last_delivered, MAX_TSN)) {
+					asoc->tsn_last_delivered =
+					    chk->rec.data.TSN_seq;
+					asoc->str_of_pdapi =
+					    chk->rec.data.stream_number;
+					asoc->ssn_of_pdapi =
+					    chk->rec.data.stream_seq;
+					asoc->fragment_flags =
+					    chk->rec.data.rcv_flags;
+				}
+				asoc->size_on_reasm_queue -= chk->send_size;
+				sctp_ucount_decr(asoc->cnt_on_reasm_queue);
+
+				/* Clear up any stream problem */
+				if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) !=
+				    SCTP_DATA_UNORDERED &&
+				    (compare_with_wrap(chk->rec.data.stream_seq,
+				    asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered,
+				    MAX_SEQ))) {
+					/*
+					 * We must dump forward this streams
+					 * sequence number if the chunk is
+					 * not unordered that is being
+					 * skipped. There is a chance that
+					 * if the peer does not include the
+					 * last fragment in its FWD-TSN we
+					 * WILL have a problem here since
+					 * you would have a partial chunk in
+					 * queue that may not be
+					 * deliverable. Also if a Partial
+					 * delivery API as started the user
+					 * may get a partial chunk. The next
+					 * read returning a new chunk...
+					 * really ugly but I see no way
+					 * around it! Maybe a notify??
+					 */
+					asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered =
+					    chk->rec.data.stream_seq;
+				}
+				if (chk->data) {
+					sctp_m_freem(chk->data);
+					chk->data = NULL;
+				}
+				sctp_free_a_chunk(stcb, chk);
+			} else if (compare_with_wrap(chk->rec.data.stream_seq, seq, MAX_SEQ)) {
+				/*
+				 * If the stream_seq is > than the purging
+				 * one, we are done
+				 */
+				break;
+			}
+			chk = at;
+		}
+	}
+}
+
+
+void
+sctp_handle_forward_tsn(struct sctp_tcb *stcb,
+    struct sctp_forward_tsn_chunk *fwd,
+    int *abort_flag, struct mbuf *m, int offset)
+{
+	/*
+	 * ISSUES that MUST be fixed for ECN! When we are the sender of the
+	 * forward TSN, when the SACK comes back that acknowledges the
+	 * FWD-TSN we must reset the NONCE sum to match correctly. This will
+	 * get quite tricky since we may have sent more data interveneing
+	 * and must carefully account for what the SACK says on the nonce
+	 * and any gaps that are reported. This work will NOT be done here,
+	 * but I note it here since it is really related to PR-SCTP and
+	 * FWD-TSN's
+	 */
+
+	/* The pr-sctp fwd tsn */
+	/*
+	 * here we will perform all the data receiver side steps for
+	 * processing FwdTSN, as required in by pr-sctp draft:
+	 * 
+	 * Assume we get FwdTSN(x):
+	 * 
+	 * 1) update local cumTSN to x 2) try to further advance cumTSN to x +
+	 * others we have 3) examine and update re-ordering queue on
+	 * pr-in-streams 4) clean up re-assembly queue 5) Send a sack to
+	 * report where we are.
+	 */
+	struct sctp_association *asoc;
+	uint32_t new_cum_tsn, gap;
+	unsigned int i, fwd_sz, cumack_set_flag, m_size;
+	uint32_t str_seq;
+	struct sctp_stream_in *strm;
+	struct sctp_tmit_chunk *chk, *at;
+	struct sctp_queued_to_read *ctl, *sv;
+
+	cumack_set_flag = 0;
+	asoc = &stcb->asoc;
+	if ((fwd_sz = ntohs(fwd->ch.chunk_length)) < sizeof(struct sctp_forward_tsn_chunk)) {
+		SCTPDBG(SCTP_DEBUG_INDATA1,
+		    "Bad size too small/big fwd-tsn\n");
+		return;
+	}
+	m_size = (stcb->asoc.mapping_array_size << 3);
+	/*************************************************************/
+	/* 1. Here we update local cumTSN and shift the bitmap array */
+	/*************************************************************/
+	new_cum_tsn = ntohl(fwd->new_cumulative_tsn);
+
+	if (compare_with_wrap(asoc->cumulative_tsn, new_cum_tsn, MAX_TSN) ||
+	    asoc->cumulative_tsn == new_cum_tsn) {
+		/* Already got there ... */
+		return;
+	}
+	/*
+	 * now we know the new TSN is more advanced, let's find the actual
+	 * gap
+	 */
+	SCTP_CALC_TSN_TO_GAP(gap, new_cum_tsn, asoc->mapping_array_base_tsn);
+	asoc->cumulative_tsn = new_cum_tsn;
+	if (gap >= m_size) {
+		if ((long)gap > sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv)) {
+			struct mbuf *oper;
+
+			/*
+			 * out of range (of single byte chunks in the rwnd I
+			 * give out). This must be an attacker.
+			 */
+			*abort_flag = 1;
+			oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+			    0, M_DONTWAIT, 1, MT_DATA);
+			if (oper) {
+				struct sctp_paramhdr *ph;
+				uint32_t *ippp;
+
+				SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+				    (sizeof(uint32_t) * 3);
+				ph = mtod(oper, struct sctp_paramhdr *);
+				ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+				ph->param_length = htons(SCTP_BUF_LEN(oper));
+				ippp = (uint32_t *) (ph + 1);
+				*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_33);
+				ippp++;
+				*ippp = asoc->highest_tsn_inside_map;
+				ippp++;
+				*ippp = new_cum_tsn;
+			}
+			stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_33;
+			sctp_abort_an_association(stcb->sctp_ep, stcb,
+			    SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+			return;
+		}
+		SCTP_STAT_INCR(sctps_fwdtsn_map_over);
+
+		memset(stcb->asoc.mapping_array, 0, stcb->asoc.mapping_array_size);
+		asoc->mapping_array_base_tsn = new_cum_tsn + 1;
+		asoc->highest_tsn_inside_map = new_cum_tsn;
+
+		memset(stcb->asoc.nr_mapping_array, 0, stcb->asoc.mapping_array_size);
+		asoc->highest_tsn_inside_nr_map = new_cum_tsn;
+
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) {
+			sctp_log_map(0, 3, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
+		}
+		asoc->last_echo_tsn = asoc->highest_tsn_inside_map;
+	} else {
+		SCTP_TCB_LOCK_ASSERT(stcb);
+		for (i = 0; i <= gap; i++) {
+			if (!SCTP_IS_TSN_PRESENT(asoc->mapping_array, i) &&
+			    !SCTP_IS_TSN_PRESENT(asoc->nr_mapping_array, i)) {
+				SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, i);
+				if (compare_with_wrap(asoc->mapping_array_base_tsn + i, asoc->highest_tsn_inside_nr_map, MAX_TSN)) {
+					asoc->highest_tsn_inside_nr_map = asoc->mapping_array_base_tsn + i;
+				}
+			}
+		}
+	}
+	/*************************************************************/
+	/* 2. Clear up re-assembly queue                             */
+	/*************************************************************/
+	/*
+	 * First service it if pd-api is up, just in case we can progress it
+	 * forward
+	 */
+	if (asoc->fragmented_delivery_inprogress) {
+		sctp_service_reassembly(stcb, asoc);
+	}
+	if (!TAILQ_EMPTY(&asoc->reasmqueue)) {
+		/* For each one on here see if we need to toss it */
+		/*
+		 * For now large messages held on the reasmqueue that are
+		 * complete will be tossed too. We could in theory do more
+		 * work to spin through and stop after dumping one msg aka
+		 * seeing the start of a new msg at the head, and call the
+		 * delivery function... to see if it can be delivered... But
+		 * for now we just dump everything on the queue.
+		 */
+		chk = TAILQ_FIRST(&asoc->reasmqueue);
+		while (chk) {
+			at = TAILQ_NEXT(chk, sctp_next);
+			if ((compare_with_wrap(new_cum_tsn,
+			    chk->rec.data.TSN_seq, MAX_TSN)) ||
+			    (new_cum_tsn == chk->rec.data.TSN_seq)) {
+				/* It needs to be tossed */
+				TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
+				if (compare_with_wrap(chk->rec.data.TSN_seq,
+				    asoc->tsn_last_delivered, MAX_TSN)) {
+					asoc->tsn_last_delivered =
+					    chk->rec.data.TSN_seq;
+					asoc->str_of_pdapi =
+					    chk->rec.data.stream_number;
+					asoc->ssn_of_pdapi =
+					    chk->rec.data.stream_seq;
+					asoc->fragment_flags =
+					    chk->rec.data.rcv_flags;
+				}
+				asoc->size_on_reasm_queue -= chk->send_size;
+				sctp_ucount_decr(asoc->cnt_on_reasm_queue);
+
+				/* Clear up any stream problem */
+				if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) !=
+				    SCTP_DATA_UNORDERED &&
+				    (compare_with_wrap(chk->rec.data.stream_seq,
+				    asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered,
+				    MAX_SEQ))) {
+					/*
+					 * We must dump forward this streams
+					 * sequence number if the chunk is
+					 * not unordered that is being
+					 * skipped. There is a chance that
+					 * if the peer does not include the
+					 * last fragment in its FWD-TSN we
+					 * WILL have a problem here since
+					 * you would have a partial chunk in
+					 * queue that may not be
+					 * deliverable. Also if a Partial
+					 * delivery API as started the user
+					 * may get a partial chunk. The next
+					 * read returning a new chunk...
+					 * really ugly but I see no way
+					 * around it! Maybe a notify??
+					 */
+					asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered =
+					    chk->rec.data.stream_seq;
+				}
+				if (chk->data) {
+					sctp_m_freem(chk->data);
+					chk->data = NULL;
+				}
+				sctp_free_a_chunk(stcb, chk);
+			} else {
+				/*
+				 * Ok we have gone beyond the end of the
+				 * fwd-tsn's mark.
+				 */
+				break;
+			}
+			chk = at;
+		}
+	}
+	/*******************************************************/
+	/* 3. Update the PR-stream re-ordering queues and fix  */
+	/* delivery issues as needed.                       */
+	/*******************************************************/
+	fwd_sz -= sizeof(*fwd);
+	if (m && fwd_sz) {
+		/* New method. */
+		unsigned int num_str;
+		struct sctp_strseq *stseq, strseqbuf;
+
+		offset += sizeof(*fwd);
+
+		SCTP_INP_READ_LOCK(stcb->sctp_ep);
+		num_str = fwd_sz / sizeof(struct sctp_strseq);
+		for (i = 0; i < num_str; i++) {
+			uint16_t st;
+
+			stseq = (struct sctp_strseq *)sctp_m_getptr(m, offset,
+			    sizeof(struct sctp_strseq),
+			    (uint8_t *) & strseqbuf);
+			offset += sizeof(struct sctp_strseq);
+			if (stseq == NULL) {
+				break;
+			}
+			/* Convert */
+			st = ntohs(stseq->stream);
+			stseq->stream = st;
+			st = ntohs(stseq->sequence);
+			stseq->sequence = st;
+
+			/* now process */
+
+			/*
+			 * Ok we now look for the stream/seq on the read
+			 * queue where its not all delivered. If we find it
+			 * we transmute the read entry into a PDI_ABORTED.
+			 */
+			if (stseq->stream >= asoc->streamincnt) {
+				/* screwed up streams, stop!  */
+				break;
+			}
+			if ((asoc->str_of_pdapi == stseq->stream) &&
+			    (asoc->ssn_of_pdapi == stseq->sequence)) {
+				/*
+				 * If this is the one we were partially
+				 * delivering now then we no longer are.
+				 * Note this will change with the reassembly
+				 * re-write.
+				 */
+				asoc->fragmented_delivery_inprogress = 0;
+			}
+			sctp_flush_reassm_for_str_seq(stcb, asoc, stseq->stream, stseq->sequence);
+			TAILQ_FOREACH(ctl, &stcb->sctp_ep->read_queue, next) {
+				if ((ctl->sinfo_stream == stseq->stream) &&
+				    (ctl->sinfo_ssn == stseq->sequence)) {
+					str_seq = (stseq->stream << 16) | stseq->sequence;
+					ctl->end_added = 1;
+					ctl->pdapi_aborted = 1;
+					sv = stcb->asoc.control_pdapi;
+					stcb->asoc.control_pdapi = ctl;
+					sctp_ulp_notify(SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION,
+					    stcb,
+					    SCTP_PARTIAL_DELIVERY_ABORTED,
+					    (void *)&str_seq,
+					    SCTP_SO_NOT_LOCKED);
+					stcb->asoc.control_pdapi = sv;
+					break;
+				} else if ((ctl->sinfo_stream == stseq->stream) &&
+				    (compare_with_wrap(ctl->sinfo_ssn, stseq->sequence, MAX_SEQ))) {
+					/* We are past our victim SSN */
+					break;
+				}
+			}
+			strm = &asoc->strmin[stseq->stream];
+			if (compare_with_wrap(stseq->sequence,
+			    strm->last_sequence_delivered, MAX_SEQ)) {
+				/* Update the sequence number */
+				strm->last_sequence_delivered =
+				    stseq->sequence;
+			}
+			/* now kick the stream the new way */
+			/* sa_ignore NO_NULL_CHK */
+			sctp_kick_prsctp_reorder_queue(stcb, strm);
+		}
+		SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
+	}
+	/*
+	 * Now slide thing forward.
+	 */
+	sctp_slide_mapping_arrays(stcb);
+
+	if (!TAILQ_EMPTY(&asoc->reasmqueue)) {
+		/* now lets kick out and check for more fragmented delivery */
+		/* sa_ignore NO_NULL_CHK */
+		sctp_deliver_reasm_check(stcb, &stcb->asoc);
+	}
+}
diff --git a/freebsd/sys/netinet/sctp_indata.h b/freebsd/sys/netinet/sctp_indata.h
new file mode 100644
index 00000000..a231ecaf
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_indata.h
@@ -0,0 +1,129 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_indata.h,v 1.9 2005/03/06 16:04:17 itojun Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifndef __sctp_indata_h__
+#define __sctp_indata_h__
+
+#if defined(_KERNEL) || defined(__Userspace__)
+
+struct sctp_queued_to_read *
+sctp_build_readq_entry(struct sctp_tcb *stcb,
+    struct sctp_nets *net,
+    uint32_t tsn, uint32_t ppid,
+    uint32_t context, uint16_t stream_no,
+    uint16_t stream_seq, uint8_t flags,
+    struct mbuf *dm);
+
+
+#define sctp_build_readq_entry_mac(_ctl, in_it, a, net, tsn, ppid, context, stream_no, stream_seq, flags, dm) do { \
+	if (_ctl) { \
+		atomic_add_int(&((net)->ref_count), 1); \
+		(_ctl)->sinfo_stream = stream_no; \
+		(_ctl)->sinfo_ssn = stream_seq; \
+		(_ctl)->sinfo_flags = (flags << 8); \
+		(_ctl)->sinfo_ppid = ppid; \
+		(_ctl)->sinfo_context = a; \
+		(_ctl)->sinfo_timetolive = 0; \
+		(_ctl)->sinfo_tsn = tsn; \
+		(_ctl)->sinfo_cumtsn = tsn; \
+		(_ctl)->sinfo_assoc_id = sctp_get_associd((in_it)); \
+		(_ctl)->length = 0; \
+		(_ctl)->held_length = 0; \
+		(_ctl)->whoFrom = net; \
+		(_ctl)->data = dm; \
+		(_ctl)->tail_mbuf = NULL; \
+	        (_ctl)->aux_data = NULL; \
+		(_ctl)->stcb = (in_it); \
+		(_ctl)->port_from = (in_it)->rport; \
+		(_ctl)->spec_flags = 0; \
+		(_ctl)->do_not_ref_stcb = 0; \
+		(_ctl)->end_added = 0; \
+		(_ctl)->pdapi_aborted = 0; \
+		(_ctl)->some_taken = 0; \
+	} \
+} while (0)
+
+
+
+struct mbuf *
+sctp_build_ctl_nchunk(struct sctp_inpcb *inp,
+    struct sctp_sndrcvinfo *sinfo);
+
+char *
+sctp_build_ctl_cchunk(struct sctp_inpcb *inp,
+    int *control_len,
+    struct sctp_sndrcvinfo *sinfo);
+
+void sctp_set_rwnd(struct sctp_tcb *, struct sctp_association *);
+
+uint32_t
+sctp_calc_rwnd(struct sctp_tcb *stcb, struct sctp_association *asoc);
+
+void
+sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack,
+    uint32_t rwnd, int nonce_sum_flag, int *abort_now);
+
+void
+sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup,
+    struct sctp_tcb *stcb, struct sctp_nets *net_from,
+    uint16_t num_seg, uint16_t num_nr_seg, uint16_t num_dup,
+    int *abort_now, uint8_t flags,
+    uint32_t cum_ack, uint32_t rwnd);
+
+/* draft-ietf-tsvwg-usctp */
+void
+sctp_handle_forward_tsn(struct sctp_tcb *,
+    struct sctp_forward_tsn_chunk *, int *, struct mbuf *, int);
+
+struct sctp_tmit_chunk *
+                sctp_try_advance_peer_ack_point(struct sctp_tcb *, struct sctp_association *);
+
+void sctp_service_queues(struct sctp_tcb *, struct sctp_association *);
+
+void
+sctp_update_acked(struct sctp_tcb *, struct sctp_shutdown_chunk *,
+    struct sctp_nets *, int *);
+
+int
+sctp_process_data(struct mbuf **, int, int *, int, struct sctphdr *,
+    struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *, uint32_t *);
+
+void sctp_slide_mapping_arrays(struct sctp_tcb *stcb);
+
+void sctp_sack_check(struct sctp_tcb *, int, int *);
+
+#endif
+#endif
diff --git a/freebsd/sys/netinet/sctp_input.c b/freebsd/sys/netinet/sctp_input.c
new file mode 100644
index 00000000..080813b4
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_input.c
@@ -0,0 +1,5965 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_input.c,v 1.27 2005/03/06 16:04:17 itojun Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/netinet/sctp_os.h>
+#include <freebsd/netinet/sctp_var.h>
+#include <freebsd/netinet/sctp_sysctl.h>
+#include <freebsd/netinet/sctp_pcb.h>
+#include <freebsd/netinet/sctp_header.h>
+#include <freebsd/netinet/sctputil.h>
+#include <freebsd/netinet/sctp_output.h>
+#include <freebsd/netinet/sctp_input.h>
+#include <freebsd/netinet/sctp_auth.h>
+#include <freebsd/netinet/sctp_indata.h>
+#include <freebsd/netinet/sctp_asconf.h>
+#include <freebsd/netinet/sctp_bsd_addr.h>
+#include <freebsd/netinet/sctp_timer.h>
+#include <freebsd/netinet/sctp_crc32.h>
+#include <freebsd/netinet/udp.h>
+
+
+
+static void
+sctp_stop_all_cookie_timers(struct sctp_tcb *stcb)
+{
+	struct sctp_nets *net;
+
+	/*
+	 * This now not only stops all cookie timers it also stops any INIT
+	 * timers as well. This will make sure that the timers are stopped
+	 * in all collision cases.
+	 */
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		if (net->rxt_timer.type == SCTP_TIMER_TYPE_COOKIE) {
+			sctp_timer_stop(SCTP_TIMER_TYPE_COOKIE,
+			    stcb->sctp_ep,
+			    stcb,
+			    net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_1);
+		} else if (net->rxt_timer.type == SCTP_TIMER_TYPE_INIT) {
+			sctp_timer_stop(SCTP_TIMER_TYPE_INIT,
+			    stcb->sctp_ep,
+			    stcb,
+			    net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_2);
+		}
+	}
+}
+
+/* INIT handler */
+static void
+sctp_handle_init(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh,
+    struct sctp_init_chunk *cp, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net, int *abort_no_unlock, uint32_t vrf_id, uint16_t port)
+{
+	struct sctp_init *init;
+	struct mbuf *op_err;
+	uint32_t init_limit;
+
+	SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_init: handling INIT tcb:%p\n",
+	    stcb);
+	if (stcb == NULL) {
+		SCTP_INP_RLOCK(inp);
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+			goto outnow;
+		}
+	}
+	op_err = NULL;
+	init = &cp->init;
+	/* First are we accepting? */
+	if ((inp->sctp_socket->so_qlimit == 0) && (stcb == NULL)) {
+		SCTPDBG(SCTP_DEBUG_INPUT2,
+		    "sctp_handle_init: Abort, so_qlimit:%d\n",
+		    inp->sctp_socket->so_qlimit);
+		/*
+		 * FIX ME ?? What about TCP model and we have a
+		 * match/restart case? Actually no fix is needed. the lookup
+		 * will always find the existing assoc so stcb would not be
+		 * NULL. It may be questionable to do this since we COULD
+		 * just send back the INIT-ACK and hope that the app did
+		 * accept()'s by the time the COOKIE was sent. But there is
+		 * a price to pay for COOKIE generation and I don't want to
+		 * pay it on the chance that the app will actually do some
+		 * accepts(). The App just looses and should NOT be in this
+		 * state :-)
+		 */
+		sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+		    vrf_id, port);
+		if (stcb)
+			*abort_no_unlock = 1;
+		goto outnow;
+	}
+	if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_init_chunk)) {
+		/* Invalid length */
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+		sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+		    vrf_id, port);
+		if (stcb)
+			*abort_no_unlock = 1;
+		goto outnow;
+	}
+	/* validate parameters */
+	if (init->initiate_tag == 0) {
+		/* protocol error... send abort */
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+		sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+		    vrf_id, port);
+		if (stcb)
+			*abort_no_unlock = 1;
+		goto outnow;
+	}
+	if (ntohl(init->a_rwnd) < SCTP_MIN_RWND) {
+		/* invalid parameter... send abort */
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+		sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+		    vrf_id, port);
+		if (stcb)
+			*abort_no_unlock = 1;
+		goto outnow;
+	}
+	if (init->num_inbound_streams == 0) {
+		/* protocol error... send abort */
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+		sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+		    vrf_id, port);
+		if (stcb)
+			*abort_no_unlock = 1;
+		goto outnow;
+	}
+	if (init->num_outbound_streams == 0) {
+		/* protocol error... send abort */
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+		sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+		    vrf_id, port);
+		if (stcb)
+			*abort_no_unlock = 1;
+		goto outnow;
+	}
+	init_limit = offset + ntohs(cp->ch.chunk_length);
+	if (sctp_validate_init_auth_params(m, offset + sizeof(*cp),
+	    init_limit)) {
+		/* auth parameter(s) error... send abort */
+		sctp_abort_association(inp, stcb, m, iphlen, sh, NULL, vrf_id, port);
+		if (stcb)
+			*abort_no_unlock = 1;
+		goto outnow;
+	}
+	/* send an INIT-ACK w/cookie */
+	SCTPDBG(SCTP_DEBUG_INPUT3, "sctp_handle_init: sending INIT-ACK\n");
+	sctp_send_initiate_ack(inp, stcb, m, iphlen, offset, sh, cp, vrf_id, port,
+	    ((stcb == NULL) ? SCTP_HOLDS_LOCK : SCTP_NOT_LOCKED));
+outnow:
+	if (stcb == NULL) {
+		SCTP_INP_RUNLOCK(inp);
+	}
+}
+
+/*
+ * process peer "INIT/INIT-ACK" chunk returns value < 0 on error
+ */
+
+int
+sctp_is_there_unsent_data(struct sctp_tcb *stcb)
+{
+	int unsent_data = 0;
+	struct sctp_stream_queue_pending *sp;
+	struct sctp_stream_out *strq;
+	struct sctp_association *asoc;
+
+	/*
+	 * This function returns the number of streams that have true unsent
+	 * data on them. Note that as it looks through it will clean up any
+	 * places that have old data that has been sent but left at top of
+	 * stream queue.
+	 */
+	asoc = &stcb->asoc;
+	SCTP_TCB_SEND_LOCK(stcb);
+	if (!TAILQ_EMPTY(&asoc->out_wheel)) {
+		/* Check to see if some data queued */
+		TAILQ_FOREACH(strq, &asoc->out_wheel, next_spoke) {
+	is_there_another:
+			/* sa_ignore FREED_MEMORY */
+			sp = TAILQ_FIRST(&strq->outqueue);
+			if (sp == NULL) {
+				continue;
+			}
+			if ((sp->msg_is_complete) &&
+			    (sp->length == 0) &&
+			    (sp->sender_all_done)) {
+				/*
+				 * We are doing differed cleanup. Last time
+				 * through when we took all the data the
+				 * sender_all_done was not set.
+				 */
+				if (sp->put_last_out == 0) {
+					SCTP_PRINTF("Gak, put out entire msg with NO end!-1\n");
+					SCTP_PRINTF("sender_done:%d len:%d msg_comp:%d put_last_out:%d\n",
+					    sp->sender_all_done,
+					    sp->length,
+					    sp->msg_is_complete,
+					    sp->put_last_out);
+				}
+				atomic_subtract_int(&stcb->asoc.stream_queue_cnt, 1);
+				TAILQ_REMOVE(&strq->outqueue, sp, next);
+				if (sp->net) {
+					sctp_free_remote_addr(sp->net);
+					sp->net = NULL;
+				}
+				if (sp->data) {
+					sctp_m_freem(sp->data);
+					sp->data = NULL;
+				}
+				sctp_free_a_strmoq(stcb, sp);
+				goto is_there_another;
+			} else {
+				unsent_data++;
+				continue;
+			}
+		}
+	}
+	SCTP_TCB_SEND_UNLOCK(stcb);
+	return (unsent_data);
+}
+
+static int
+sctp_process_init(struct sctp_init_chunk *cp, struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	struct sctp_init *init;
+	struct sctp_association *asoc;
+	struct sctp_nets *lnet;
+	unsigned int i;
+
+	init = &cp->init;
+	asoc = &stcb->asoc;
+	/* save off parameters */
+	asoc->peer_vtag = ntohl(init->initiate_tag);
+	asoc->peers_rwnd = ntohl(init->a_rwnd);
+	if (!TAILQ_EMPTY(&asoc->nets)) {
+		/* update any ssthresh's that may have a default */
+		TAILQ_FOREACH(lnet, &asoc->nets, sctp_next) {
+			lnet->ssthresh = asoc->peers_rwnd;
+
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
+				sctp_log_cwnd(stcb, lnet, 0, SCTP_CWND_INITIALIZATION);
+			}
+		}
+	}
+	SCTP_TCB_SEND_LOCK(stcb);
+	if (asoc->pre_open_streams > ntohs(init->num_inbound_streams)) {
+		unsigned int newcnt;
+		struct sctp_stream_out *outs;
+		struct sctp_stream_queue_pending *sp;
+		struct sctp_tmit_chunk *chk, *chk_next;
+
+		/* abandon the upper streams */
+		newcnt = ntohs(init->num_inbound_streams);
+		if (!TAILQ_EMPTY(&asoc->send_queue)) {
+			chk = TAILQ_FIRST(&asoc->send_queue);
+			while (chk) {
+				chk_next = TAILQ_NEXT(chk, sctp_next);
+				if (chk->rec.data.stream_number >= newcnt) {
+					TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next);
+					asoc->send_queue_cnt--;
+					if (chk->data != NULL) {
+						sctp_free_bufspace(stcb, asoc, chk, 1);
+						sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb,
+						    SCTP_NOTIFY_DATAGRAM_UNSENT, chk, SCTP_SO_NOT_LOCKED);
+						if (chk->data) {
+							sctp_m_freem(chk->data);
+							chk->data = NULL;
+						}
+					}
+					sctp_free_a_chunk(stcb, chk);
+					/* sa_ignore FREED_MEMORY */
+				}
+				chk = chk_next;
+			}
+		}
+		if (asoc->strmout) {
+			for (i = newcnt; i < asoc->pre_open_streams; i++) {
+				outs = &asoc->strmout[i];
+				sp = TAILQ_FIRST(&outs->outqueue);
+				while (sp) {
+					TAILQ_REMOVE(&outs->outqueue, sp, next);
+					asoc->stream_queue_cnt--;
+					sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL,
+					    stcb, SCTP_NOTIFY_DATAGRAM_UNSENT,
+					    sp, SCTP_SO_NOT_LOCKED);
+					if (sp->data) {
+						sctp_m_freem(sp->data);
+						sp->data = NULL;
+					}
+					if (sp->net) {
+						sctp_free_remote_addr(sp->net);
+						sp->net = NULL;
+					}
+					/* Free the chunk */
+					sctp_free_a_strmoq(stcb, sp);
+					/* sa_ignore FREED_MEMORY */
+					sp = TAILQ_FIRST(&outs->outqueue);
+				}
+			}
+		}
+		/* cut back the count */
+		asoc->pre_open_streams = newcnt;
+	}
+	SCTP_TCB_SEND_UNLOCK(stcb);
+	asoc->strm_realoutsize = asoc->streamoutcnt = asoc->pre_open_streams;
+	/* init tsn's */
+	asoc->highest_tsn_inside_map = asoc->asconf_seq_in = ntohl(init->initial_tsn) - 1;
+	/* EY - nr_sack: initialize highest tsn in nr_mapping_array */
+	asoc->highest_tsn_inside_nr_map = asoc->highest_tsn_inside_map;
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) {
+		sctp_log_map(0, 5, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
+	}
+	/* This is the next one we expect */
+	asoc->str_reset_seq_in = asoc->asconf_seq_in + 1;
+
+	asoc->mapping_array_base_tsn = ntohl(init->initial_tsn);
+	asoc->tsn_last_delivered = asoc->cumulative_tsn = asoc->asconf_seq_in;
+	asoc->last_echo_tsn = asoc->asconf_seq_in;
+	asoc->advanced_peer_ack_point = asoc->last_acked_seq;
+	/* open the requested streams */
+
+	if (asoc->strmin != NULL) {
+		/* Free the old ones */
+		struct sctp_queued_to_read *ctl;
+
+		for (i = 0; i < asoc->streamincnt; i++) {
+			ctl = TAILQ_FIRST(&asoc->strmin[i].inqueue);
+			while (ctl) {
+				TAILQ_REMOVE(&asoc->strmin[i].inqueue, ctl, next);
+				sctp_free_remote_addr(ctl->whoFrom);
+				ctl->whoFrom = NULL;
+				sctp_m_freem(ctl->data);
+				ctl->data = NULL;
+				sctp_free_a_readq(stcb, ctl);
+				ctl = TAILQ_FIRST(&asoc->strmin[i].inqueue);
+			}
+		}
+		SCTP_FREE(asoc->strmin, SCTP_M_STRMI);
+	}
+	asoc->streamincnt = ntohs(init->num_outbound_streams);
+	if (asoc->streamincnt > MAX_SCTP_STREAMS) {
+		asoc->streamincnt = MAX_SCTP_STREAMS;
+	}
+	SCTP_MALLOC(asoc->strmin, struct sctp_stream_in *, asoc->streamincnt *
+	    sizeof(struct sctp_stream_in), SCTP_M_STRMI);
+	if (asoc->strmin == NULL) {
+		/* we didn't get memory for the streams! */
+		SCTPDBG(SCTP_DEBUG_INPUT2, "process_init: couldn't get memory for the streams!\n");
+		return (-1);
+	}
+	for (i = 0; i < asoc->streamincnt; i++) {
+		asoc->strmin[i].stream_no = i;
+		asoc->strmin[i].last_sequence_delivered = 0xffff;
+		/*
+		 * U-stream ranges will be set when the cookie is unpacked.
+		 * Or for the INIT sender they are un set (if pr-sctp not
+		 * supported) when the INIT-ACK arrives.
+		 */
+		TAILQ_INIT(&asoc->strmin[i].inqueue);
+		asoc->strmin[i].delivery_started = 0;
+	}
+	/*
+	 * load_address_from_init will put the addresses into the
+	 * association when the COOKIE is processed or the INIT-ACK is
+	 * processed. Both types of COOKIE's existing and new call this
+	 * routine. It will remove addresses that are no longer in the
+	 * association (for the restarting case where addresses are
+	 * removed). Up front when the INIT arrives we will discard it if it
+	 * is a restart and new addresses have been added.
+	 */
+	/* sa_ignore MEMLEAK */
+	return (0);
+}
+
+/*
+ * INIT-ACK message processing/consumption returns value < 0 on error
+ */
+static int
+sctp_process_init_ack(struct mbuf *m, int iphlen, int offset,
+    struct sctphdr *sh, struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb,
+    struct sctp_nets *net, int *abort_no_unlock, uint32_t vrf_id)
+{
+	struct sctp_association *asoc;
+	struct mbuf *op_err;
+	int retval, abort_flag;
+	uint32_t initack_limit;
+	int nat_friendly = 0;
+
+	/* First verify that we have no illegal param's */
+	abort_flag = 0;
+	op_err = NULL;
+
+	op_err = sctp_arethere_unrecognized_parameters(m,
+	    (offset + sizeof(struct sctp_init_chunk)),
+	    &abort_flag, (struct sctp_chunkhdr *)cp, &nat_friendly);
+	if (abort_flag) {
+		/* Send an abort and notify peer */
+		sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_CAUSE_PROTOCOL_VIOLATION, op_err, SCTP_SO_NOT_LOCKED);
+		*abort_no_unlock = 1;
+		return (-1);
+	}
+	asoc = &stcb->asoc;
+	asoc->peer_supports_nat = (uint8_t) nat_friendly;
+	/* process the peer's parameters in the INIT-ACK */
+	retval = sctp_process_init((struct sctp_init_chunk *)cp, stcb, net);
+	if (retval < 0) {
+		return (retval);
+	}
+	initack_limit = offset + ntohs(cp->ch.chunk_length);
+	/* load all addresses */
+	if ((retval = sctp_load_addresses_from_init(stcb, m, iphlen,
+	    (offset + sizeof(struct sctp_init_chunk)), initack_limit, sh,
+	    NULL))) {
+		/* Huh, we should abort */
+		SCTPDBG(SCTP_DEBUG_INPUT1,
+		    "Load addresses from INIT causes an abort %d\n",
+		    retval);
+		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
+		    NULL, 0, net->port);
+		*abort_no_unlock = 1;
+		return (-1);
+	}
+	/* if the peer doesn't support asconf, flush the asconf queue */
+	if (asoc->peer_supports_asconf == 0) {
+		struct sctp_asconf_addr *aparam;
+
+		while (!TAILQ_EMPTY(&asoc->asconf_queue)) {
+			/* sa_ignore FREED_MEMORY */
+			aparam = TAILQ_FIRST(&asoc->asconf_queue);
+			TAILQ_REMOVE(&asoc->asconf_queue, aparam, next);
+			SCTP_FREE(aparam, SCTP_M_ASC_ADDR);
+		}
+	}
+	stcb->asoc.peer_hmac_id = sctp_negotiate_hmacid(stcb->asoc.peer_hmacs,
+	    stcb->asoc.local_hmacs);
+	if (op_err) {
+		sctp_queue_op_err(stcb, op_err);
+		/* queuing will steal away the mbuf chain to the out queue */
+		op_err = NULL;
+	}
+	/* extract the cookie and queue it to "echo" it back... */
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
+		sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+		    stcb->asoc.overall_error_count,
+		    0,
+		    SCTP_FROM_SCTP_INPUT,
+		    __LINE__);
+	}
+	stcb->asoc.overall_error_count = 0;
+	net->error_count = 0;
+
+	/*
+	 * Cancel the INIT timer, We do this first before queueing the
+	 * cookie. We always cancel at the primary to assue that we are
+	 * canceling the timer started by the INIT which always goes to the
+	 * primary.
+	 */
+	sctp_timer_stop(SCTP_TIMER_TYPE_INIT, stcb->sctp_ep, stcb,
+	    asoc->primary_destination, SCTP_FROM_SCTP_INPUT + SCTP_LOC_4);
+
+	/* calculate the RTO */
+	net->RTO = sctp_calculate_rto(stcb, asoc, net, &asoc->time_entered, sctp_align_safe_nocopy);
+
+	retval = sctp_send_cookie_echo(m, offset, stcb, net);
+	if (retval < 0) {
+		/*
+		 * No cookie, we probably should send a op error. But in any
+		 * case if there is no cookie in the INIT-ACK, we can
+		 * abandon the peer, its broke.
+		 */
+		if (retval == -3) {
+			/* We abort with an error of missing mandatory param */
+			op_err =
+			    sctp_generate_invmanparam(SCTP_CAUSE_MISSING_PARAM);
+			if (op_err) {
+				/*
+				 * Expand beyond to include the mandatory
+				 * param cookie
+				 */
+				struct sctp_inv_mandatory_param *mp;
+
+				SCTP_BUF_LEN(op_err) =
+				    sizeof(struct sctp_inv_mandatory_param);
+				mp = mtod(op_err,
+				    struct sctp_inv_mandatory_param *);
+				/* Subtract the reserved param */
+				mp->length =
+				    htons(sizeof(struct sctp_inv_mandatory_param) - 2);
+				mp->num_param = htonl(1);
+				mp->param = htons(SCTP_STATE_COOKIE);
+				mp->resv = 0;
+			}
+			sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
+			    sh, op_err, 0, net->port);
+			*abort_no_unlock = 1;
+		}
+		return (retval);
+	}
+	return (0);
+}
+
+static void
+sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp,
+    struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	struct sockaddr_storage store;
+	struct sockaddr_in *sin;
+	struct sockaddr_in6 *sin6;
+	struct sctp_nets *r_net, *f_net;
+	struct timeval tv;
+	int req_prim = 0;
+
+	if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_heartbeat_chunk)) {
+		/* Invalid length */
+		return;
+	}
+	sin = (struct sockaddr_in *)&store;
+	sin6 = (struct sockaddr_in6 *)&store;
+
+	memset(&store, 0, sizeof(store));
+	if (cp->heartbeat.hb_info.addr_family == AF_INET &&
+	    cp->heartbeat.hb_info.addr_len == sizeof(struct sockaddr_in)) {
+		sin->sin_family = cp->heartbeat.hb_info.addr_family;
+		sin->sin_len = cp->heartbeat.hb_info.addr_len;
+		sin->sin_port = stcb->rport;
+		memcpy(&sin->sin_addr, cp->heartbeat.hb_info.address,
+		    sizeof(sin->sin_addr));
+	} else if (cp->heartbeat.hb_info.addr_family == AF_INET6 &&
+	    cp->heartbeat.hb_info.addr_len == sizeof(struct sockaddr_in6)) {
+		sin6->sin6_family = cp->heartbeat.hb_info.addr_family;
+		sin6->sin6_len = cp->heartbeat.hb_info.addr_len;
+		sin6->sin6_port = stcb->rport;
+		memcpy(&sin6->sin6_addr, cp->heartbeat.hb_info.address,
+		    sizeof(sin6->sin6_addr));
+	} else {
+		return;
+	}
+	r_net = sctp_findnet(stcb, (struct sockaddr *)sin);
+	if (r_net == NULL) {
+		SCTPDBG(SCTP_DEBUG_INPUT1, "Huh? I can't find the address I sent it to, discard\n");
+		return;
+	}
+	if ((r_net && (r_net->dest_state & SCTP_ADDR_UNCONFIRMED)) &&
+	    (r_net->heartbeat_random1 == cp->heartbeat.hb_info.random_value1) &&
+	    (r_net->heartbeat_random2 == cp->heartbeat.hb_info.random_value2)) {
+		/*
+		 * If the its a HB and it's random value is correct when can
+		 * confirm the destination.
+		 */
+		r_net->dest_state &= ~SCTP_ADDR_UNCONFIRMED;
+		if (r_net->dest_state & SCTP_ADDR_REQ_PRIMARY) {
+			stcb->asoc.primary_destination = r_net;
+			r_net->dest_state &= ~SCTP_ADDR_WAS_PRIMARY;
+			r_net->dest_state &= ~SCTP_ADDR_REQ_PRIMARY;
+			f_net = TAILQ_FIRST(&stcb->asoc.nets);
+			if (f_net != r_net) {
+				/*
+				 * first one on the list is NOT the primary
+				 * sctp_cmpaddr() is much more efficent if
+				 * the primary is the first on the list,
+				 * make it so.
+				 */
+				TAILQ_REMOVE(&stcb->asoc.nets, r_net, sctp_next);
+				TAILQ_INSERT_HEAD(&stcb->asoc.nets, r_net, sctp_next);
+			}
+			req_prim = 1;
+		}
+		sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
+		    stcb, 0, (void *)r_net, SCTP_SO_NOT_LOCKED);
+	}
+	r_net->error_count = 0;
+	r_net->hb_responded = 1;
+	tv.tv_sec = cp->heartbeat.hb_info.time_value_1;
+	tv.tv_usec = cp->heartbeat.hb_info.time_value_2;
+	if (r_net->dest_state & SCTP_ADDR_NOT_REACHABLE) {
+		r_net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
+		r_net->dest_state |= SCTP_ADDR_REACHABLE;
+		sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
+		    SCTP_HEARTBEAT_SUCCESS, (void *)r_net, SCTP_SO_NOT_LOCKED);
+		/* now was it the primary? if so restore */
+		if (r_net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
+			(void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, r_net);
+		}
+	}
+	/*
+	 * JRS 5/14/07 - If CMT PF is on and the destination is in PF state,
+	 * set the destination to active state and set the cwnd to one or
+	 * two MTU's based on whether PF1 or PF2 is being used. If a T3
+	 * timer is running, for the destination, stop the timer because a
+	 * PF-heartbeat was received.
+	 */
+	if ((stcb->asoc.sctp_cmt_on_off == 1) &&
+	    (stcb->asoc.sctp_cmt_pf > 0) &&
+	    ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) {
+		if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+			sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+			    stcb, net,
+			    SCTP_FROM_SCTP_INPUT + SCTP_LOC_5);
+		}
+		net->dest_state &= ~SCTP_ADDR_PF;
+		net->cwnd = net->mtu * stcb->asoc.sctp_cmt_pf;
+		SCTPDBG(SCTP_DEBUG_INPUT1, "Destination %p moved from PF to reachable with cwnd %d.\n",
+		    net, net->cwnd);
+	}
+	/* Now lets do a RTO with this */
+	r_net->RTO = sctp_calculate_rto(stcb, &stcb->asoc, r_net, &tv, sctp_align_safe_nocopy);
+	/* Mobility adaptation */
+	if (req_prim) {
+		if ((sctp_is_mobility_feature_on(stcb->sctp_ep,
+		    SCTP_MOBILITY_BASE) ||
+		    sctp_is_mobility_feature_on(stcb->sctp_ep,
+		    SCTP_MOBILITY_FASTHANDOFF)) &&
+		    sctp_is_mobility_feature_on(stcb->sctp_ep,
+		    SCTP_MOBILITY_PRIM_DELETED)) {
+
+			sctp_timer_stop(SCTP_TIMER_TYPE_PRIM_DELETED, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_TIMER + SCTP_LOC_7);
+			if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+			    SCTP_MOBILITY_FASTHANDOFF)) {
+				sctp_assoc_immediate_retrans(stcb,
+				    stcb->asoc.primary_destination);
+			}
+			if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+			    SCTP_MOBILITY_BASE)) {
+				sctp_move_chunks_from_net(stcb,
+				    stcb->asoc.deleted_primary);
+			}
+			sctp_delete_prim_timer(stcb->sctp_ep, stcb,
+			    stcb->asoc.deleted_primary);
+		}
+	}
+}
+
+static int
+sctp_handle_nat_colliding_state(struct sctp_tcb *stcb)
+{
+	/*
+	 * return 0 means we want you to proceed with the abort non-zero
+	 * means no abort processing
+	 */
+	struct sctpasochead *head;
+
+	if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) {
+		/* generate a new vtag and send init */
+		LIST_REMOVE(stcb, sctp_asocs);
+		stcb->asoc.my_vtag = sctp_select_a_tag(stcb->sctp_ep, stcb->sctp_ep->sctp_lport, stcb->rport, 1);
+		head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(stcb->asoc.my_vtag, SCTP_BASE_INFO(hashasocmark))];
+		/*
+		 * put it in the bucket in the vtag hash of assoc's for the
+		 * system
+		 */
+		LIST_INSERT_HEAD(head, stcb, sctp_asocs);
+		sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
+		return (1);
+	}
+	if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED) {
+		/*
+		 * treat like a case where the cookie expired i.e.: - dump
+		 * current cookie. - generate a new vtag. - resend init.
+		 */
+		/* generate a new vtag and send init */
+		LIST_REMOVE(stcb, sctp_asocs);
+		stcb->asoc.state &= ~SCTP_STATE_COOKIE_ECHOED;
+		stcb->asoc.state |= SCTP_STATE_COOKIE_WAIT;
+		sctp_stop_all_cookie_timers(stcb);
+		sctp_toss_old_cookies(stcb, &stcb->asoc);
+		stcb->asoc.my_vtag = sctp_select_a_tag(stcb->sctp_ep, stcb->sctp_ep->sctp_lport, stcb->rport, 1);
+		head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(stcb->asoc.my_vtag, SCTP_BASE_INFO(hashasocmark))];
+		/*
+		 * put it in the bucket in the vtag hash of assoc's for the
+		 * system
+		 */
+		LIST_INSERT_HEAD(head, stcb, sctp_asocs);
+		sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
+		return (1);
+	}
+	return (0);
+}
+
+static int
+sctp_handle_nat_missing_state(struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	/*
+	 * return 0 means we want you to proceed with the abort non-zero
+	 * means no abort processing
+	 */
+	if (stcb->asoc.peer_supports_auth == 0) {
+		SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_nat_missing_state: Peer does not support AUTH, cannot send an asconf\n");
+		return (0);
+	}
+	sctp_asconf_send_nat_state_update(stcb, net);
+	return (1);
+}
+
+
+static void
+sctp_handle_abort(struct sctp_abort_chunk *cp,
+    struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+#endif
+	uint16_t len;
+
+	SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_abort: handling ABORT\n");
+	if (stcb == NULL)
+		return;
+
+	len = ntohs(cp->ch.chunk_length);
+	if (len > sizeof(struct sctp_chunkhdr)) {
+		/*
+		 * Need to check the cause codes for our two magic nat
+		 * aborts which don't kill the assoc necessarily.
+		 */
+		struct sctp_abort_chunk *cpnext;
+		struct sctp_missing_nat_state *natc;
+		uint16_t cause;
+
+		cpnext = cp;
+		cpnext++;
+		natc = (struct sctp_missing_nat_state *)cpnext;
+		cause = ntohs(natc->cause);
+		if (cause == SCTP_CAUSE_NAT_COLLIDING_STATE) {
+			SCTPDBG(SCTP_DEBUG_INPUT2, "Received Colliding state abort flags:%x\n",
+			    cp->ch.chunk_flags);
+			if (sctp_handle_nat_colliding_state(stcb)) {
+				return;
+			}
+		} else if (cause == SCTP_CAUSE_NAT_MISSING_STATE) {
+			SCTPDBG(SCTP_DEBUG_INPUT2, "Received missing state abort flags:%x\n",
+			    cp->ch.chunk_flags);
+			if (sctp_handle_nat_missing_state(stcb, net)) {
+				return;
+			}
+		}
+	}
+	/* stop any receive timers */
+	sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_6);
+	/* notify user of the abort and clean up... */
+	sctp_abort_notification(stcb, 0, SCTP_SO_NOT_LOCKED);
+	/* free the tcb */
+#if defined(SCTP_PANIC_ON_ABORT)
+	printf("stcb:%p state:%d rport:%d net:%p\n",
+	    stcb, stcb->asoc.state, stcb->rport, net);
+	if (!(stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)) {
+		panic("Received an ABORT");
+	} else {
+		printf("No panic its in state %x closed\n", stcb->asoc.state);
+	}
+#endif
+	SCTP_STAT_INCR_COUNTER32(sctps_aborted);
+	if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+	    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+		SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+	}
+#ifdef SCTP_ASOCLOG_OF_TSNS
+	sctp_print_out_track_log(stcb);
+#endif
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	so = SCTP_INP_SO(stcb->sctp_ep);
+	atomic_add_int(&stcb->asoc.refcnt, 1);
+	SCTP_TCB_UNLOCK(stcb);
+	SCTP_SOCKET_LOCK(so, 1);
+	SCTP_TCB_LOCK(stcb);
+	atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+	stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
+	(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
+	    SCTP_FROM_SCTP_INPUT + SCTP_LOC_6);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+	SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_abort: finished\n");
+}
+
+static void
+sctp_handle_shutdown(struct sctp_shutdown_chunk *cp,
+    struct sctp_tcb *stcb, struct sctp_nets *net, int *abort_flag)
+{
+	struct sctp_association *asoc;
+	int some_on_streamwheel;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+#endif
+
+	SCTPDBG(SCTP_DEBUG_INPUT2,
+	    "sctp_handle_shutdown: handling SHUTDOWN\n");
+	if (stcb == NULL)
+		return;
+	asoc = &stcb->asoc;
+	if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
+	    (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+		return;
+	}
+	if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_shutdown_chunk)) {
+		/* Shutdown NOT the expected size */
+		return;
+	} else {
+		sctp_update_acked(stcb, cp, net, abort_flag);
+		if (*abort_flag) {
+			return;
+		}
+	}
+	if (asoc->control_pdapi) {
+		/*
+		 * With a normal shutdown we assume the end of last record.
+		 */
+		SCTP_INP_READ_LOCK(stcb->sctp_ep);
+		asoc->control_pdapi->end_added = 1;
+		asoc->control_pdapi->pdapi_aborted = 1;
+		asoc->control_pdapi = NULL;
+		SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		so = SCTP_INP_SO(stcb->sctp_ep);
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+			/* assoc was freed while we were unlocked */
+			SCTP_SOCKET_UNLOCK(so, 1);
+			return;
+		}
+#endif
+		sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+	}
+	/* goto SHUTDOWN_RECEIVED state to block new requests */
+	if (stcb->sctp_socket) {
+		if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+		    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT) &&
+		    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT)) {
+			SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_RECEIVED);
+			SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+			/*
+			 * notify upper layer that peer has initiated a
+			 * shutdown
+			 */
+			sctp_ulp_notify(SCTP_NOTIFY_PEER_SHUTDOWN, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
+
+			/* reset time */
+			(void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered);
+		}
+	}
+	if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) {
+		/*
+		 * stop the shutdown timer, since we WILL move to
+		 * SHUTDOWN-ACK-SENT.
+		 */
+		sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_8);
+	}
+	/* Now is there unsent data on a stream somewhere? */
+	some_on_streamwheel = sctp_is_there_unsent_data(stcb);
+
+	if (!TAILQ_EMPTY(&asoc->send_queue) ||
+	    !TAILQ_EMPTY(&asoc->sent_queue) ||
+	    some_on_streamwheel) {
+		/* By returning we will push more data out */
+		return;
+	} else {
+		/* no outstanding data to send, so move on... */
+		/* send SHUTDOWN-ACK */
+		sctp_send_shutdown_ack(stcb, stcb->asoc.primary_destination);
+		/* move to SHUTDOWN-ACK-SENT state */
+		if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+		    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+			SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+		}
+		SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
+		SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+		sctp_stop_timers_for_shutdown(stcb);
+		sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK, stcb->sctp_ep,
+		    stcb, net);
+	}
+}
+
+static void
+sctp_handle_shutdown_ack(struct sctp_shutdown_ack_chunk *cp,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	struct sctp_association *asoc;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+	so = SCTP_INP_SO(stcb->sctp_ep);
+#endif
+	SCTPDBG(SCTP_DEBUG_INPUT2,
+	    "sctp_handle_shutdown_ack: handling SHUTDOWN ACK\n");
+	if (stcb == NULL)
+		return;
+
+	asoc = &stcb->asoc;
+	/* process according to association state */
+	if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
+	    (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+		/* unexpected SHUTDOWN-ACK... do OOTB handling... */
+		sctp_send_shutdown_complete(stcb, net, 1);
+		SCTP_TCB_UNLOCK(stcb);
+		return;
+	}
+	if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+	    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+		/* unexpected SHUTDOWN-ACK... so ignore... */
+		SCTP_TCB_UNLOCK(stcb);
+		return;
+	}
+	if (asoc->control_pdapi) {
+		/*
+		 * With a normal shutdown we assume the end of last record.
+		 */
+		SCTP_INP_READ_LOCK(stcb->sctp_ep);
+		asoc->control_pdapi->end_added = 1;
+		asoc->control_pdapi->pdapi_aborted = 1;
+		asoc->control_pdapi = NULL;
+		SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+			/* assoc was freed while we were unlocked */
+			SCTP_SOCKET_UNLOCK(so, 1);
+			return;
+		}
+#endif
+		sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+	}
+	/* are the queues empty? */
+	if (!TAILQ_EMPTY(&asoc->send_queue) ||
+	    !TAILQ_EMPTY(&asoc->sent_queue) ||
+	    !TAILQ_EMPTY(&asoc->out_wheel)) {
+		sctp_report_all_outbound(stcb, 0, SCTP_SO_NOT_LOCKED);
+	}
+	/* stop the timer */
+	sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_9);
+	/* send SHUTDOWN-COMPLETE */
+	sctp_send_shutdown_complete(stcb, net, 0);
+	/* notify upper layer protocol */
+	if (stcb->sctp_socket) {
+		sctp_ulp_notify(SCTP_NOTIFY_ASSOC_DOWN, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
+		if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+		    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+			/* Set the connected flag to disconnected */
+			stcb->sctp_ep->sctp_socket->so_snd.sb_cc = 0;
+		}
+	}
+	SCTP_STAT_INCR_COUNTER32(sctps_shutdown);
+	/* free the TCB but first save off the ep */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	atomic_add_int(&stcb->asoc.refcnt, 1);
+	SCTP_TCB_UNLOCK(stcb);
+	SCTP_SOCKET_LOCK(so, 1);
+	SCTP_TCB_LOCK(stcb);
+	atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+	(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
+	    SCTP_FROM_SCTP_INPUT + SCTP_LOC_10);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+}
+
+/*
+ * Skip past the param header and then we will find the chunk that caused the
+ * problem. There are two possiblities ASCONF or FWD-TSN other than that and
+ * our peer must be broken.
+ */
+static void
+sctp_process_unrecog_chunk(struct sctp_tcb *stcb, struct sctp_paramhdr *phdr,
+    struct sctp_nets *net)
+{
+	struct sctp_chunkhdr *chk;
+
+	chk = (struct sctp_chunkhdr *)((caddr_t)phdr + sizeof(*phdr));
+	switch (chk->chunk_type) {
+	case SCTP_ASCONF_ACK:
+	case SCTP_ASCONF:
+		sctp_asconf_cleanup(stcb, net);
+		break;
+	case SCTP_FORWARD_CUM_TSN:
+		stcb->asoc.peer_supports_prsctp = 0;
+		break;
+	default:
+		SCTPDBG(SCTP_DEBUG_INPUT2,
+		    "Peer does not support chunk type %d(%x)??\n",
+		    chk->chunk_type, (uint32_t) chk->chunk_type);
+		break;
+	}
+}
+
+/*
+ * Skip past the param header and then we will find the param that caused the
+ * problem.  There are a number of param's in a ASCONF OR the prsctp param
+ * these will turn of specific features.
+ */
+static void
+sctp_process_unrecog_param(struct sctp_tcb *stcb, struct sctp_paramhdr *phdr)
+{
+	struct sctp_paramhdr *pbad;
+
+	pbad = phdr + 1;
+	switch (ntohs(pbad->param_type)) {
+		/* pr-sctp draft */
+	case SCTP_PRSCTP_SUPPORTED:
+		stcb->asoc.peer_supports_prsctp = 0;
+		break;
+	case SCTP_SUPPORTED_CHUNK_EXT:
+		break;
+		/* draft-ietf-tsvwg-addip-sctp */
+	case SCTP_HAS_NAT_SUPPORT:
+		stcb->asoc.peer_supports_nat = 0;
+		break;
+	case SCTP_ECN_NONCE_SUPPORTED:
+		stcb->asoc.peer_supports_ecn_nonce = 0;
+		stcb->asoc.ecn_nonce_allowed = 0;
+		stcb->asoc.ecn_allowed = 0;
+		break;
+	case SCTP_ADD_IP_ADDRESS:
+	case SCTP_DEL_IP_ADDRESS:
+	case SCTP_SET_PRIM_ADDR:
+		stcb->asoc.peer_supports_asconf = 0;
+		break;
+	case SCTP_SUCCESS_REPORT:
+	case SCTP_ERROR_CAUSE_IND:
+		SCTPDBG(SCTP_DEBUG_INPUT2, "Huh, the peer does not support success? or error cause?\n");
+		SCTPDBG(SCTP_DEBUG_INPUT2,
+		    "Turning off ASCONF to this strange peer\n");
+		stcb->asoc.peer_supports_asconf = 0;
+		break;
+	default:
+		SCTPDBG(SCTP_DEBUG_INPUT2,
+		    "Peer does not support param type %d(%x)??\n",
+		    pbad->param_type, (uint32_t) pbad->param_type);
+		break;
+	}
+}
+
+static int
+sctp_handle_error(struct sctp_chunkhdr *ch,
+    struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	int chklen;
+	struct sctp_paramhdr *phdr;
+	uint16_t error_type;
+	uint16_t error_len;
+	struct sctp_association *asoc;
+	int adjust;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+#endif
+
+	/* parse through all of the errors and process */
+	asoc = &stcb->asoc;
+	phdr = (struct sctp_paramhdr *)((caddr_t)ch +
+	    sizeof(struct sctp_chunkhdr));
+	chklen = ntohs(ch->chunk_length) - sizeof(struct sctp_chunkhdr);
+	while ((size_t)chklen >= sizeof(struct sctp_paramhdr)) {
+		/* Process an Error Cause */
+		error_type = ntohs(phdr->param_type);
+		error_len = ntohs(phdr->param_length);
+		if ((error_len > chklen) || (error_len == 0)) {
+			/* invalid param length for this param */
+			SCTPDBG(SCTP_DEBUG_INPUT1, "Bogus length in error param- chunk left:%d errorlen:%d\n",
+			    chklen, error_len);
+			return (0);
+		}
+		switch (error_type) {
+		case SCTP_CAUSE_INVALID_STREAM:
+		case SCTP_CAUSE_MISSING_PARAM:
+		case SCTP_CAUSE_INVALID_PARAM:
+		case SCTP_CAUSE_NO_USER_DATA:
+			SCTPDBG(SCTP_DEBUG_INPUT1, "Software error we got a %d back? We have a bug :/ (or do they?)\n",
+			    error_type);
+			break;
+		case SCTP_CAUSE_NAT_COLLIDING_STATE:
+			SCTPDBG(SCTP_DEBUG_INPUT2, "Received Colliding state abort flags:%x\n",
+			    ch->chunk_flags);
+			if (sctp_handle_nat_colliding_state(stcb)) {
+				return (0);
+			}
+			break;
+		case SCTP_CAUSE_NAT_MISSING_STATE:
+			SCTPDBG(SCTP_DEBUG_INPUT2, "Received missing state abort flags:%x\n",
+			    ch->chunk_flags);
+			if (sctp_handle_nat_missing_state(stcb, net)) {
+				return (0);
+			}
+			break;
+		case SCTP_CAUSE_STALE_COOKIE:
+			/*
+			 * We only act if we have echoed a cookie and are
+			 * waiting.
+			 */
+			if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED) {
+				int *p;
+
+				p = (int *)((caddr_t)phdr + sizeof(*phdr));
+				/* Save the time doubled */
+				asoc->cookie_preserve_req = ntohl(*p) << 1;
+				asoc->stale_cookie_count++;
+				if (asoc->stale_cookie_count >
+				    asoc->max_init_times) {
+					sctp_abort_notification(stcb, 0, SCTP_SO_NOT_LOCKED);
+					/* now free the asoc */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+					so = SCTP_INP_SO(stcb->sctp_ep);
+					atomic_add_int(&stcb->asoc.refcnt, 1);
+					SCTP_TCB_UNLOCK(stcb);
+					SCTP_SOCKET_LOCK(so, 1);
+					SCTP_TCB_LOCK(stcb);
+					atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+					(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
+					    SCTP_FROM_SCTP_INPUT + SCTP_LOC_11);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+					SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+					return (-1);
+				}
+				/* blast back to INIT state */
+				sctp_toss_old_cookies(stcb, &stcb->asoc);
+				asoc->state &= ~SCTP_STATE_COOKIE_ECHOED;
+				asoc->state |= SCTP_STATE_COOKIE_WAIT;
+				sctp_stop_all_cookie_timers(stcb);
+				sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
+			}
+			break;
+		case SCTP_CAUSE_UNRESOLVABLE_ADDR:
+			/*
+			 * Nothing we can do here, we don't do hostname
+			 * addresses so if the peer does not like my IPv6
+			 * (or IPv4 for that matter) it does not matter. If
+			 * they don't support that type of address, they can
+			 * NOT possibly get that packet type... i.e. with no
+			 * IPv6 you can't recieve a IPv6 packet. so we can
+			 * safely ignore this one. If we ever added support
+			 * for HOSTNAME Addresses, then we would need to do
+			 * something here.
+			 */
+			break;
+		case SCTP_CAUSE_UNRECOG_CHUNK:
+			sctp_process_unrecog_chunk(stcb, phdr, net);
+			break;
+		case SCTP_CAUSE_UNRECOG_PARAM:
+			sctp_process_unrecog_param(stcb, phdr);
+			break;
+		case SCTP_CAUSE_COOKIE_IN_SHUTDOWN:
+			/*
+			 * We ignore this since the timer will drive out a
+			 * new cookie anyway and there timer will drive us
+			 * to send a SHUTDOWN_COMPLETE. We can't send one
+			 * here since we don't have their tag.
+			 */
+			break;
+		case SCTP_CAUSE_DELETING_LAST_ADDR:
+		case SCTP_CAUSE_RESOURCE_SHORTAGE:
+		case SCTP_CAUSE_DELETING_SRC_ADDR:
+			/*
+			 * We should NOT get these here, but in a
+			 * ASCONF-ACK.
+			 */
+			SCTPDBG(SCTP_DEBUG_INPUT2, "Peer sends ASCONF errors in a Operational Error?<%d>?\n",
+			    error_type);
+			break;
+		case SCTP_CAUSE_OUT_OF_RESC:
+			/*
+			 * And what, pray tell do we do with the fact that
+			 * the peer is out of resources? Not really sure we
+			 * could do anything but abort. I suspect this
+			 * should have came WITH an abort instead of in a
+			 * OP-ERROR.
+			 */
+			break;
+		default:
+			SCTPDBG(SCTP_DEBUG_INPUT1, "sctp_handle_error: unknown error type = 0x%xh\n",
+			    error_type);
+			break;
+		}
+		adjust = SCTP_SIZE32(error_len);
+		chklen -= adjust;
+		phdr = (struct sctp_paramhdr *)((caddr_t)phdr + adjust);
+	}
+	return (0);
+}
+
+static int
+sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset,
+    struct sctphdr *sh, struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb,
+    struct sctp_nets *net, int *abort_no_unlock, uint32_t vrf_id)
+{
+	struct sctp_init_ack *init_ack;
+	struct mbuf *op_err;
+
+	SCTPDBG(SCTP_DEBUG_INPUT2,
+	    "sctp_handle_init_ack: handling INIT-ACK\n");
+
+	if (stcb == NULL) {
+		SCTPDBG(SCTP_DEBUG_INPUT2,
+		    "sctp_handle_init_ack: TCB is null\n");
+		return (-1);
+	}
+	if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_init_ack_chunk)) {
+		/* Invalid length */
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
+		    op_err, 0, net->port);
+		*abort_no_unlock = 1;
+		return (-1);
+	}
+	init_ack = &cp->init;
+	/* validate parameters */
+	if (init_ack->initiate_tag == 0) {
+		/* protocol error... send an abort */
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
+		    op_err, 0, net->port);
+		*abort_no_unlock = 1;
+		return (-1);
+	}
+	if (ntohl(init_ack->a_rwnd) < SCTP_MIN_RWND) {
+		/* protocol error... send an abort */
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
+		    op_err, 0, net->port);
+		*abort_no_unlock = 1;
+		return (-1);
+	}
+	if (init_ack->num_inbound_streams == 0) {
+		/* protocol error... send an abort */
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
+		    op_err, 0, net->port);
+		*abort_no_unlock = 1;
+		return (-1);
+	}
+	if (init_ack->num_outbound_streams == 0) {
+		/* protocol error... send an abort */
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
+		    op_err, 0, net->port);
+		*abort_no_unlock = 1;
+		return (-1);
+	}
+	/* process according to association state... */
+	switch (stcb->asoc.state & SCTP_STATE_MASK) {
+	case SCTP_STATE_COOKIE_WAIT:
+		/* this is the expected state for this chunk */
+		/* process the INIT-ACK parameters */
+		if (stcb->asoc.primary_destination->dest_state &
+		    SCTP_ADDR_UNCONFIRMED) {
+			/*
+			 * The primary is where we sent the INIT, we can
+			 * always consider it confirmed when the INIT-ACK is
+			 * returned. Do this before we load addresses
+			 * though.
+			 */
+			stcb->asoc.primary_destination->dest_state &=
+			    ~SCTP_ADDR_UNCONFIRMED;
+			sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
+			    stcb, 0, (void *)stcb->asoc.primary_destination, SCTP_SO_NOT_LOCKED);
+		}
+		if (sctp_process_init_ack(m, iphlen, offset, sh, cp, stcb,
+		    net, abort_no_unlock, vrf_id) < 0) {
+			/* error in parsing parameters */
+			return (-1);
+		}
+		/* update our state */
+		SCTPDBG(SCTP_DEBUG_INPUT2, "moving to COOKIE-ECHOED state\n");
+		SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_ECHOED);
+
+		/* reset the RTO calc */
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
+			sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+			    stcb->asoc.overall_error_count,
+			    0,
+			    SCTP_FROM_SCTP_INPUT,
+			    __LINE__);
+		}
+		stcb->asoc.overall_error_count = 0;
+		(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
+		/*
+		 * collapse the init timer back in case of a exponential
+		 * backoff
+		 */
+		sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, stcb->sctp_ep,
+		    stcb, net);
+		/*
+		 * the send at the end of the inbound data processing will
+		 * cause the cookie to be sent
+		 */
+		break;
+	case SCTP_STATE_SHUTDOWN_SENT:
+		/* incorrect state... discard */
+		break;
+	case SCTP_STATE_COOKIE_ECHOED:
+		/* incorrect state... discard */
+		break;
+	case SCTP_STATE_OPEN:
+		/* incorrect state... discard */
+		break;
+	case SCTP_STATE_EMPTY:
+	case SCTP_STATE_INUSE:
+	default:
+		/* incorrect state... discard */
+		return (-1);
+		break;
+	}
+	SCTPDBG(SCTP_DEBUG_INPUT1, "Leaving handle-init-ack end\n");
+	return (0);
+}
+
+static struct sctp_tcb *
+sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
+    struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len,
+    struct sctp_inpcb *inp, struct sctp_nets **netp,
+    struct sockaddr *init_src, int *notification,
+    int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
+    uint32_t vrf_id, uint16_t port);
+
+
+/*
+ * handle a state cookie for an existing association m: input packet mbuf
+ * chain-- assumes a pullup on IP/SCTP/COOKIE-ECHO chunk note: this is a
+ * "split" mbuf and the cookie signature does not exist offset: offset into
+ * mbuf to the cookie-echo chunk
+ */
+static struct sctp_tcb *
+sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
+    struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len,
+    struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets **netp,
+    struct sockaddr *init_src, int *notification, sctp_assoc_t * sac_assoc_id,
+    uint32_t vrf_id, int auth_skipped, uint32_t auth_offset, uint32_t auth_len, uint16_t port)
+{
+	struct sctp_association *asoc;
+	struct sctp_init_chunk *init_cp, init_buf;
+	struct sctp_init_ack_chunk *initack_cp, initack_buf;
+	struct sctp_nets *net;
+	struct mbuf *op_err;
+	struct sctp_paramhdr *ph;
+	int chk_length;
+	int init_offset, initack_offset, i;
+	int retval;
+	int spec_flag = 0;
+	uint32_t how_indx;
+
+	net = *netp;
+	/* I know that the TCB is non-NULL from the caller */
+	asoc = &stcb->asoc;
+	for (how_indx = 0; how_indx < sizeof(asoc->cookie_how); how_indx++) {
+		if (asoc->cookie_how[how_indx] == 0)
+			break;
+	}
+	if (how_indx < sizeof(asoc->cookie_how)) {
+		asoc->cookie_how[how_indx] = 1;
+	}
+	if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) {
+		/* SHUTDOWN came in after sending INIT-ACK */
+		sctp_send_shutdown_ack(stcb, stcb->asoc.primary_destination);
+		op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr),
+		    0, M_DONTWAIT, 1, MT_DATA);
+		if (op_err == NULL) {
+			/* FOOBAR */
+			return (NULL);
+		}
+		/* Set the len */
+		SCTP_BUF_LEN(op_err) = sizeof(struct sctp_paramhdr);
+		ph = mtod(op_err, struct sctp_paramhdr *);
+		ph->param_type = htons(SCTP_CAUSE_COOKIE_IN_SHUTDOWN);
+		ph->param_length = htons(sizeof(struct sctp_paramhdr));
+		sctp_send_operr_to(m, iphlen, op_err, cookie->peers_vtag,
+		    vrf_id, net->port);
+		if (how_indx < sizeof(asoc->cookie_how))
+			asoc->cookie_how[how_indx] = 2;
+		return (NULL);
+	}
+	/*
+	 * find and validate the INIT chunk in the cookie (peer's info) the
+	 * INIT should start after the cookie-echo header struct (chunk
+	 * header, state cookie header struct)
+	 */
+	init_offset = offset += sizeof(struct sctp_cookie_echo_chunk);
+
+	init_cp = (struct sctp_init_chunk *)
+	    sctp_m_getptr(m, init_offset, sizeof(struct sctp_init_chunk),
+	    (uint8_t *) & init_buf);
+	if (init_cp == NULL) {
+		/* could not pull a INIT chunk in cookie */
+		return (NULL);
+	}
+	chk_length = ntohs(init_cp->ch.chunk_length);
+	if (init_cp->ch.chunk_type != SCTP_INITIATION) {
+		return (NULL);
+	}
+	/*
+	 * find and validate the INIT-ACK chunk in the cookie (my info) the
+	 * INIT-ACK follows the INIT chunk
+	 */
+	initack_offset = init_offset + SCTP_SIZE32(chk_length);
+	initack_cp = (struct sctp_init_ack_chunk *)
+	    sctp_m_getptr(m, initack_offset, sizeof(struct sctp_init_ack_chunk),
+	    (uint8_t *) & initack_buf);
+	if (initack_cp == NULL) {
+		/* could not pull INIT-ACK chunk in cookie */
+		return (NULL);
+	}
+	chk_length = ntohs(initack_cp->ch.chunk_length);
+	if (initack_cp->ch.chunk_type != SCTP_INITIATION_ACK) {
+		return (NULL);
+	}
+	if ((ntohl(initack_cp->init.initiate_tag) == asoc->my_vtag) &&
+	    (ntohl(init_cp->init.initiate_tag) == asoc->peer_vtag)) {
+		/*
+		 * case D in Section 5.2.4 Table 2: MMAA process accordingly
+		 * to get into the OPEN state
+		 */
+		if (ntohl(initack_cp->init.initial_tsn) != asoc->init_seq_number) {
+			/*-
+			 * Opps, this means that we somehow generated two vtag's
+			 * the same. I.e. we did:
+			 *  Us               Peer
+			 *   <---INIT(tag=a)------
+			 *   ----INIT-ACK(tag=t)-->
+			 *   ----INIT(tag=t)------> *1
+			 *   <---INIT-ACK(tag=a)---
+                         *   <----CE(tag=t)------------- *2
+			 *
+			 * At point *1 we should be generating a different
+			 * tag t'. Which means we would throw away the CE and send
+			 * ours instead. Basically this is case C (throw away side).
+			 */
+			if (how_indx < sizeof(asoc->cookie_how))
+				asoc->cookie_how[how_indx] = 17;
+			return (NULL);
+
+		}
+		switch SCTP_GET_STATE
+			(asoc) {
+		case SCTP_STATE_COOKIE_WAIT:
+		case SCTP_STATE_COOKIE_ECHOED:
+			/*
+			 * INIT was sent but got a COOKIE_ECHO with the
+			 * correct tags... just accept it...but we must
+			 * process the init so that we can make sure we have
+			 * the right seq no's.
+			 */
+			/* First we must process the INIT !! */
+			retval = sctp_process_init(init_cp, stcb, net);
+			if (retval < 0) {
+				if (how_indx < sizeof(asoc->cookie_how))
+					asoc->cookie_how[how_indx] = 3;
+				return (NULL);
+			}
+			/* we have already processed the INIT so no problem */
+			sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb,
+			    net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_12);
+			sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_13);
+			/* update current state */
+			if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)
+				SCTP_STAT_INCR_COUNTER32(sctps_activeestab);
+			else
+				SCTP_STAT_INCR_COUNTER32(sctps_collisionestab);
+
+			SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+			if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+				    stcb->sctp_ep, stcb, asoc->primary_destination);
+			}
+			SCTP_STAT_INCR_GAUGE32(sctps_currestab);
+			sctp_stop_all_cookie_timers(stcb);
+			if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+			    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
+			    (inp->sctp_socket->so_qlimit == 0)
+			    ) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				struct socket *so;
+
+#endif
+				/*
+				 * Here is where collision would go if we
+				 * did a connect() and instead got a
+				 * init/init-ack/cookie done before the
+				 * init-ack came back..
+				 */
+				stcb->sctp_ep->sctp_flags |=
+				    SCTP_PCB_FLAGS_CONNECTED;
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				so = SCTP_INP_SO(stcb->sctp_ep);
+				atomic_add_int(&stcb->asoc.refcnt, 1);
+				SCTP_TCB_UNLOCK(stcb);
+				SCTP_SOCKET_LOCK(so, 1);
+				SCTP_TCB_LOCK(stcb);
+				atomic_add_int(&stcb->asoc.refcnt, -1);
+				if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+					SCTP_SOCKET_UNLOCK(so, 1);
+					return (NULL);
+				}
+#endif
+				soisconnected(stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+			}
+			/* notify upper layer */
+			*notification = SCTP_NOTIFY_ASSOC_UP;
+			/*
+			 * since we did not send a HB make sure we don't
+			 * double things
+			 */
+			net->hb_responded = 1;
+			net->RTO = sctp_calculate_rto(stcb, asoc, net,
+			    &cookie->time_entered, sctp_align_unsafe_makecopy);
+
+			if (stcb->asoc.sctp_autoclose_ticks &&
+			    (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE))) {
+				sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE,
+				    inp, stcb, NULL);
+			}
+			break;
+		default:
+			/*
+			 * we're in the OPEN state (or beyond), so peer must
+			 * have simply lost the COOKIE-ACK
+			 */
+			break;
+			}	/* end switch */
+		sctp_stop_all_cookie_timers(stcb);
+		/*
+		 * We ignore the return code here.. not sure if we should
+		 * somehow abort.. but we do have an existing asoc. This
+		 * really should not fail.
+		 */
+		if (sctp_load_addresses_from_init(stcb, m, iphlen,
+		    init_offset + sizeof(struct sctp_init_chunk),
+		    initack_offset, sh, init_src)) {
+			if (how_indx < sizeof(asoc->cookie_how))
+				asoc->cookie_how[how_indx] = 4;
+			return (NULL);
+		}
+		/* respond with a COOKIE-ACK */
+		sctp_toss_old_cookies(stcb, asoc);
+		sctp_send_cookie_ack(stcb);
+		if (how_indx < sizeof(asoc->cookie_how))
+			asoc->cookie_how[how_indx] = 5;
+		return (stcb);
+	}
+	if (ntohl(initack_cp->init.initiate_tag) != asoc->my_vtag &&
+	    ntohl(init_cp->init.initiate_tag) == asoc->peer_vtag &&
+	    cookie->tie_tag_my_vtag == 0 &&
+	    cookie->tie_tag_peer_vtag == 0) {
+		/*
+		 * case C in Section 5.2.4 Table 2: XMOO silently discard
+		 */
+		if (how_indx < sizeof(asoc->cookie_how))
+			asoc->cookie_how[how_indx] = 6;
+		return (NULL);
+	}
+	/*
+	 * If nat support, and the below and stcb is established, send back
+	 * a ABORT(colliding state) if we are established.
+	 */
+	if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) &&
+	    (asoc->peer_supports_nat) &&
+	    ((ntohl(initack_cp->init.initiate_tag) == asoc->my_vtag) &&
+	    ((ntohl(init_cp->init.initiate_tag) != asoc->peer_vtag) ||
+	    (asoc->peer_vtag == 0)))) {
+		/*
+		 * Special case - Peer's support nat. We may have two init's
+		 * that we gave out the same tag on since one was not
+		 * established.. i.e. we get INIT from host-1 behind the nat
+		 * and we respond tag-a, we get a INIT from host-2 behind
+		 * the nat and we get tag-a again. Then we bring up host-1
+		 * (or 2's) assoc, Then comes the cookie from hsot-2 (or 1).
+		 * Now we have colliding state. We must send an abort here
+		 * with colliding state indication.
+		 */
+		op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr),
+		    0, M_DONTWAIT, 1, MT_DATA);
+		if (op_err == NULL) {
+			/* FOOBAR */
+			return (NULL);
+		}
+		/* pre-reserve some space */
+#ifdef INET6
+		SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
+#else
+		SCTP_BUF_RESV_UF(op_err, sizeof(struct ip));
+#endif
+		SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
+		SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
+		/* Set the len */
+		SCTP_BUF_LEN(op_err) = sizeof(struct sctp_paramhdr);
+		ph = mtod(op_err, struct sctp_paramhdr *);
+		ph->param_type = htons(SCTP_CAUSE_NAT_COLLIDING_STATE);
+		ph->param_length = htons(sizeof(struct sctp_paramhdr));
+		sctp_send_abort(m, iphlen, sh, 0, op_err, vrf_id, port);
+		return (NULL);
+	}
+	if ((ntohl(initack_cp->init.initiate_tag) == asoc->my_vtag) &&
+	    ((ntohl(init_cp->init.initiate_tag) != asoc->peer_vtag) ||
+	    (asoc->peer_vtag == 0))) {
+		/*
+		 * case B in Section 5.2.4 Table 2: MXAA or MOAA my info
+		 * should be ok, re-accept peer info
+		 */
+		if (ntohl(initack_cp->init.initial_tsn) != asoc->init_seq_number) {
+			/*
+			 * Extension of case C. If we hit this, then the
+			 * random number generator returned the same vtag
+			 * when we first sent our INIT-ACK and when we later
+			 * sent our INIT. The side with the seq numbers that
+			 * are different will be the one that normnally
+			 * would have hit case C. This in effect "extends"
+			 * our vtags in this collision case to be 64 bits.
+			 * The same collision could occur aka you get both
+			 * vtag and seq number the same twice in a row.. but
+			 * is much less likely. If it did happen then we
+			 * would proceed through and bring up the assoc.. we
+			 * may end up with the wrong stream setup however..
+			 * which would be bad.. but there is no way to
+			 * tell.. until we send on a stream that does not
+			 * exist :-)
+			 */
+			if (how_indx < sizeof(asoc->cookie_how))
+				asoc->cookie_how[how_indx] = 7;
+
+			return (NULL);
+		}
+		if (how_indx < sizeof(asoc->cookie_how))
+			asoc->cookie_how[how_indx] = 8;
+		sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_14);
+		sctp_stop_all_cookie_timers(stcb);
+		/*
+		 * since we did not send a HB make sure we don't double
+		 * things
+		 */
+		net->hb_responded = 1;
+		if (stcb->asoc.sctp_autoclose_ticks &&
+		    sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) {
+			sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE, inp, stcb,
+			    NULL);
+		}
+		asoc->my_rwnd = ntohl(initack_cp->init.a_rwnd);
+		asoc->pre_open_streams = ntohs(initack_cp->init.num_outbound_streams);
+
+		/* Note last_cwr_tsn? where is this used? */
+		asoc->last_cwr_tsn = asoc->init_seq_number - 1;
+		if (ntohl(init_cp->init.initiate_tag) != asoc->peer_vtag) {
+			/*
+			 * Ok the peer probably discarded our data (if we
+			 * echoed a cookie+data). So anything on the
+			 * sent_queue should be marked for retransmit, we
+			 * may not get something to kick us so it COULD
+			 * still take a timeout to move these.. but it can't
+			 * hurt to mark them.
+			 */
+			struct sctp_tmit_chunk *chk;
+
+			TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
+				if (chk->sent < SCTP_DATAGRAM_RESEND) {
+					chk->sent = SCTP_DATAGRAM_RESEND;
+					sctp_flight_size_decrease(chk);
+					sctp_total_flight_decrease(stcb, chk);
+					sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+					spec_flag++;
+				}
+			}
+
+		}
+		/* process the INIT info (peer's info) */
+		retval = sctp_process_init(init_cp, stcb, net);
+		if (retval < 0) {
+			if (how_indx < sizeof(asoc->cookie_how))
+				asoc->cookie_how[how_indx] = 9;
+			return (NULL);
+		}
+		if (sctp_load_addresses_from_init(stcb, m, iphlen,
+		    init_offset + sizeof(struct sctp_init_chunk),
+		    initack_offset, sh, init_src)) {
+			if (how_indx < sizeof(asoc->cookie_how))
+				asoc->cookie_how[how_indx] = 10;
+			return (NULL);
+		}
+		if ((asoc->state & SCTP_STATE_COOKIE_WAIT) ||
+		    (asoc->state & SCTP_STATE_COOKIE_ECHOED)) {
+			*notification = SCTP_NOTIFY_ASSOC_UP;
+
+			if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+			    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
+			    (inp->sctp_socket->so_qlimit == 0)) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				struct socket *so;
+
+#endif
+				stcb->sctp_ep->sctp_flags |=
+				    SCTP_PCB_FLAGS_CONNECTED;
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				so = SCTP_INP_SO(stcb->sctp_ep);
+				atomic_add_int(&stcb->asoc.refcnt, 1);
+				SCTP_TCB_UNLOCK(stcb);
+				SCTP_SOCKET_LOCK(so, 1);
+				SCTP_TCB_LOCK(stcb);
+				atomic_add_int(&stcb->asoc.refcnt, -1);
+				if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+					SCTP_SOCKET_UNLOCK(so, 1);
+					return (NULL);
+				}
+#endif
+				soisconnected(stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+			}
+			if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)
+				SCTP_STAT_INCR_COUNTER32(sctps_activeestab);
+			else
+				SCTP_STAT_INCR_COUNTER32(sctps_collisionestab);
+			SCTP_STAT_INCR_GAUGE32(sctps_currestab);
+		} else if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
+			SCTP_STAT_INCR_COUNTER32(sctps_restartestab);
+		} else {
+			SCTP_STAT_INCR_COUNTER32(sctps_collisionestab);
+		}
+		SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+		if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
+			sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+			    stcb->sctp_ep, stcb, asoc->primary_destination);
+		}
+		sctp_stop_all_cookie_timers(stcb);
+		sctp_toss_old_cookies(stcb, asoc);
+		sctp_send_cookie_ack(stcb);
+		if (spec_flag) {
+			/*
+			 * only if we have retrans set do we do this. What
+			 * this call does is get only the COOKIE-ACK out and
+			 * then when we return the normal call to
+			 * sctp_chunk_output will get the retrans out behind
+			 * this.
+			 */
+			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_COOKIE_ACK, SCTP_SO_NOT_LOCKED);
+		}
+		if (how_indx < sizeof(asoc->cookie_how))
+			asoc->cookie_how[how_indx] = 11;
+
+		return (stcb);
+	}
+	if ((ntohl(initack_cp->init.initiate_tag) != asoc->my_vtag &&
+	    ntohl(init_cp->init.initiate_tag) != asoc->peer_vtag) &&
+	    cookie->tie_tag_my_vtag == asoc->my_vtag_nonce &&
+	    cookie->tie_tag_peer_vtag == asoc->peer_vtag_nonce &&
+	    cookie->tie_tag_peer_vtag != 0) {
+		struct sctpasochead *head;
+
+		if (asoc->peer_supports_nat) {
+			/*
+			 * This is a gross gross hack. just call the
+			 * cookie_new code since we are allowing a duplicate
+			 * association. I hope this works...
+			 */
+			return (sctp_process_cookie_new(m, iphlen, offset, sh, cookie, cookie_len,
+			    inp, netp, init_src, notification,
+			    auth_skipped, auth_offset, auth_len,
+			    vrf_id, port));
+		}
+		/*
+		 * case A in Section 5.2.4 Table 2: XXMM (peer restarted)
+		 */
+		/* temp code */
+		if (how_indx < sizeof(asoc->cookie_how))
+			asoc->cookie_how[how_indx] = 12;
+		sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_15);
+		sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
+
+		*sac_assoc_id = sctp_get_associd(stcb);
+		/* notify upper layer */
+		*notification = SCTP_NOTIFY_ASSOC_RESTART;
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		if ((SCTP_GET_STATE(asoc) != SCTP_STATE_OPEN) &&
+		    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+		    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT)) {
+			SCTP_STAT_INCR_GAUGE32(sctps_currestab);
+		}
+		if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
+			SCTP_STAT_INCR_GAUGE32(sctps_restartestab);
+		} else if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) {
+			SCTP_STAT_INCR_GAUGE32(sctps_collisionestab);
+		}
+		if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
+			SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+			sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+			    stcb->sctp_ep, stcb, asoc->primary_destination);
+
+		} else if (!(asoc->state & SCTP_STATE_SHUTDOWN_SENT)) {
+			/* move to OPEN state, if not in SHUTDOWN_SENT */
+			SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+		}
+		asoc->pre_open_streams =
+		    ntohs(initack_cp->init.num_outbound_streams);
+		asoc->init_seq_number = ntohl(initack_cp->init.initial_tsn);
+		asoc->sending_seq = asoc->asconf_seq_out = asoc->str_reset_seq_out = asoc->init_seq_number;
+		asoc->asconf_seq_out_acked = asoc->asconf_seq_out - 1;
+
+		asoc->last_cwr_tsn = asoc->init_seq_number - 1;
+		asoc->asconf_seq_in = asoc->last_acked_seq = asoc->init_seq_number - 1;
+
+		asoc->str_reset_seq_in = asoc->init_seq_number;
+
+		asoc->advanced_peer_ack_point = asoc->last_acked_seq;
+		if (asoc->mapping_array) {
+			memset(asoc->mapping_array, 0,
+			    asoc->mapping_array_size);
+		}
+		if (asoc->nr_mapping_array) {
+			memset(asoc->nr_mapping_array, 0,
+			    asoc->mapping_array_size);
+		}
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_INP_INFO_WLOCK();
+		SCTP_INP_WLOCK(stcb->sctp_ep);
+		SCTP_TCB_LOCK(stcb);
+		atomic_add_int(&stcb->asoc.refcnt, -1);
+		/* send up all the data */
+		SCTP_TCB_SEND_LOCK(stcb);
+
+		sctp_report_all_outbound(stcb, 1, SCTP_SO_NOT_LOCKED);
+		for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+			stcb->asoc.strmout[i].stream_no = i;
+			stcb->asoc.strmout[i].next_sequence_sent = 0;
+			stcb->asoc.strmout[i].last_msg_incomplete = 0;
+		}
+		/* process the INIT-ACK info (my info) */
+		asoc->my_vtag = ntohl(initack_cp->init.initiate_tag);
+		asoc->my_rwnd = ntohl(initack_cp->init.a_rwnd);
+
+		/* pull from vtag hash */
+		LIST_REMOVE(stcb, sctp_asocs);
+		/* re-insert to new vtag position */
+		head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(stcb->asoc.my_vtag,
+		    SCTP_BASE_INFO(hashasocmark))];
+		/*
+		 * put it in the bucket in the vtag hash of assoc's for the
+		 * system
+		 */
+		LIST_INSERT_HEAD(head, stcb, sctp_asocs);
+
+		/* process the INIT info (peer's info) */
+		SCTP_TCB_SEND_UNLOCK(stcb);
+		SCTP_INP_WUNLOCK(stcb->sctp_ep);
+		SCTP_INP_INFO_WUNLOCK();
+
+		retval = sctp_process_init(init_cp, stcb, net);
+		if (retval < 0) {
+			if (how_indx < sizeof(asoc->cookie_how))
+				asoc->cookie_how[how_indx] = 13;
+
+			return (NULL);
+		}
+		/*
+		 * since we did not send a HB make sure we don't double
+		 * things
+		 */
+		net->hb_responded = 1;
+
+		if (sctp_load_addresses_from_init(stcb, m, iphlen,
+		    init_offset + sizeof(struct sctp_init_chunk),
+		    initack_offset, sh, init_src)) {
+			if (how_indx < sizeof(asoc->cookie_how))
+				asoc->cookie_how[how_indx] = 14;
+
+			return (NULL);
+		}
+		/* respond with a COOKIE-ACK */
+		sctp_stop_all_cookie_timers(stcb);
+		sctp_toss_old_cookies(stcb, asoc);
+		sctp_send_cookie_ack(stcb);
+		if (how_indx < sizeof(asoc->cookie_how))
+			asoc->cookie_how[how_indx] = 15;
+
+		return (stcb);
+	}
+	if (how_indx < sizeof(asoc->cookie_how))
+		asoc->cookie_how[how_indx] = 16;
+	/* all other cases... */
+	return (NULL);
+}
+
+
+/*
+ * handle a state cookie for a new association m: input packet mbuf chain--
+ * assumes a pullup on IP/SCTP/COOKIE-ECHO chunk note: this is a "split" mbuf
+ * and the cookie signature does not exist offset: offset into mbuf to the
+ * cookie-echo chunk length: length of the cookie chunk to: where the init
+ * was from returns a new TCB
+ */
+struct sctp_tcb *
+sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
+    struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len,
+    struct sctp_inpcb *inp, struct sctp_nets **netp,
+    struct sockaddr *init_src, int *notification,
+    int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
+    uint32_t vrf_id, uint16_t port)
+{
+	struct sctp_tcb *stcb;
+	struct sctp_init_chunk *init_cp, init_buf;
+	struct sctp_init_ack_chunk *initack_cp, initack_buf;
+	struct sockaddr_storage sa_store;
+	struct sockaddr *initack_src = (struct sockaddr *)&sa_store;
+	struct sockaddr_in *sin;
+	struct sockaddr_in6 *sin6;
+	struct sctp_association *asoc;
+	int chk_length;
+	int init_offset, initack_offset, initack_limit;
+	int retval;
+	int error = 0;
+	uint32_t old_tag;
+	uint8_t auth_chunk_buf[SCTP_PARAM_BUFFER_SIZE];
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+	so = SCTP_INP_SO(inp);
+#endif
+
+	/*
+	 * find and validate the INIT chunk in the cookie (peer's info) the
+	 * INIT should start after the cookie-echo header struct (chunk
+	 * header, state cookie header struct)
+	 */
+	init_offset = offset + sizeof(struct sctp_cookie_echo_chunk);
+	init_cp = (struct sctp_init_chunk *)
+	    sctp_m_getptr(m, init_offset, sizeof(struct sctp_init_chunk),
+	    (uint8_t *) & init_buf);
+	if (init_cp == NULL) {
+		/* could not pull a INIT chunk in cookie */
+		SCTPDBG(SCTP_DEBUG_INPUT1,
+		    "process_cookie_new: could not pull INIT chunk hdr\n");
+		return (NULL);
+	}
+	chk_length = ntohs(init_cp->ch.chunk_length);
+	if (init_cp->ch.chunk_type != SCTP_INITIATION) {
+		SCTPDBG(SCTP_DEBUG_INPUT1, "HUH? process_cookie_new: could not find INIT chunk!\n");
+		return (NULL);
+	}
+	initack_offset = init_offset + SCTP_SIZE32(chk_length);
+	/*
+	 * find and validate the INIT-ACK chunk in the cookie (my info) the
+	 * INIT-ACK follows the INIT chunk
+	 */
+	initack_cp = (struct sctp_init_ack_chunk *)
+	    sctp_m_getptr(m, initack_offset, sizeof(struct sctp_init_ack_chunk),
+	    (uint8_t *) & initack_buf);
+	if (initack_cp == NULL) {
+		/* could not pull INIT-ACK chunk in cookie */
+		SCTPDBG(SCTP_DEBUG_INPUT1, "process_cookie_new: could not pull INIT-ACK chunk hdr\n");
+		return (NULL);
+	}
+	chk_length = ntohs(initack_cp->ch.chunk_length);
+	if (initack_cp->ch.chunk_type != SCTP_INITIATION_ACK) {
+		return (NULL);
+	}
+	/*
+	 * NOTE: We can't use the INIT_ACK's chk_length to determine the
+	 * "initack_limit" value.  This is because the chk_length field
+	 * includes the length of the cookie, but the cookie is omitted when
+	 * the INIT and INIT_ACK are tacked onto the cookie...
+	 */
+	initack_limit = offset + cookie_len;
+
+	/*
+	 * now that we know the INIT/INIT-ACK are in place, create a new TCB
+	 * and popluate
+	 */
+
+	/*
+	 * Here we do a trick, we set in NULL for the proc/thread argument.
+	 * We do this since in effect we only use the p argument when the
+	 * socket is unbound and we must do an implicit bind. Since we are
+	 * getting a cookie, we cannot be unbound.
+	 */
+	stcb = sctp_aloc_assoc(inp, init_src, &error,
+	    ntohl(initack_cp->init.initiate_tag), vrf_id,
+	    (struct thread *)NULL
+	    );
+	if (stcb == NULL) {
+		struct mbuf *op_err;
+
+		/* memory problem? */
+		SCTPDBG(SCTP_DEBUG_INPUT1,
+		    "process_cookie_new: no room for another TCB!\n");
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
+
+		sctp_abort_association(inp, (struct sctp_tcb *)NULL, m, iphlen,
+		    sh, op_err, vrf_id, port);
+		return (NULL);
+	}
+	/* get the correct sctp_nets */
+	if (netp)
+		*netp = sctp_findnet(stcb, init_src);
+
+	asoc = &stcb->asoc;
+	/* get scope variables out of cookie */
+	asoc->ipv4_local_scope = cookie->ipv4_scope;
+	asoc->site_scope = cookie->site_scope;
+	asoc->local_scope = cookie->local_scope;
+	asoc->loopback_scope = cookie->loopback_scope;
+
+	if ((asoc->ipv4_addr_legal != cookie->ipv4_addr_legal) ||
+	    (asoc->ipv6_addr_legal != cookie->ipv6_addr_legal)) {
+		struct mbuf *op_err;
+
+		/*
+		 * Houston we have a problem. The EP changed while the
+		 * cookie was in flight. Only recourse is to abort the
+		 * association.
+		 */
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
+		sctp_abort_association(inp, (struct sctp_tcb *)NULL, m, iphlen,
+		    sh, op_err, vrf_id, port);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+#endif
+		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+		    SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		return (NULL);
+	}
+	/* process the INIT-ACK info (my info) */
+	old_tag = asoc->my_vtag;
+	asoc->my_vtag = ntohl(initack_cp->init.initiate_tag);
+	asoc->my_rwnd = ntohl(initack_cp->init.a_rwnd);
+	asoc->pre_open_streams = ntohs(initack_cp->init.num_outbound_streams);
+	asoc->init_seq_number = ntohl(initack_cp->init.initial_tsn);
+	asoc->sending_seq = asoc->asconf_seq_out = asoc->str_reset_seq_out = asoc->init_seq_number;
+	asoc->asconf_seq_out_acked = asoc->asconf_seq_out - 1;
+	asoc->last_cwr_tsn = asoc->init_seq_number - 1;
+	asoc->asconf_seq_in = asoc->last_acked_seq = asoc->init_seq_number - 1;
+	asoc->str_reset_seq_in = asoc->init_seq_number;
+
+	asoc->advanced_peer_ack_point = asoc->last_acked_seq;
+
+	/* process the INIT info (peer's info) */
+	if (netp)
+		retval = sctp_process_init(init_cp, stcb, *netp);
+	else
+		retval = 0;
+	if (retval < 0) {
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+#endif
+		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		return (NULL);
+	}
+	/* load all addresses */
+	if (sctp_load_addresses_from_init(stcb, m, iphlen,
+	    init_offset + sizeof(struct sctp_init_chunk), initack_offset, sh,
+	    init_src)) {
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+#endif
+		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_17);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		return (NULL);
+	}
+	/*
+	 * verify any preceding AUTH chunk that was skipped
+	 */
+	/* pull the local authentication parameters from the cookie/init-ack */
+	sctp_auth_get_cookie_params(stcb, m,
+	    initack_offset + sizeof(struct sctp_init_ack_chunk),
+	    initack_limit - (initack_offset + sizeof(struct sctp_init_ack_chunk)));
+	if (auth_skipped) {
+		struct sctp_auth_chunk *auth;
+
+		auth = (struct sctp_auth_chunk *)
+		    sctp_m_getptr(m, auth_offset, auth_len, auth_chunk_buf);
+		if ((auth == NULL) || sctp_handle_auth(stcb, auth, m, auth_offset)) {
+			/* auth HMAC failed, dump the assoc and packet */
+			SCTPDBG(SCTP_DEBUG_AUTH1,
+			    "COOKIE-ECHO: AUTH failed\n");
+			atomic_add_int(&stcb->asoc.refcnt, 1);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_SOCKET_LOCK(so, 1);
+			SCTP_TCB_LOCK(stcb);
+#endif
+			(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_18);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+			atomic_subtract_int(&stcb->asoc.refcnt, 1);
+			return (NULL);
+		} else {
+			/* remaining chunks checked... good to go */
+			stcb->asoc.authenticated = 1;
+		}
+	}
+	/* update current state */
+	SCTPDBG(SCTP_DEBUG_INPUT2, "moving to OPEN state\n");
+	SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+	if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
+		sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+		    stcb->sctp_ep, stcb, asoc->primary_destination);
+	}
+	sctp_stop_all_cookie_timers(stcb);
+	SCTP_STAT_INCR_COUNTER32(sctps_passiveestab);
+	SCTP_STAT_INCR_GAUGE32(sctps_currestab);
+
+	/*
+	 * if we're doing ASCONFs, check to see if we have any new local
+	 * addresses that need to get added to the peer (eg. addresses
+	 * changed while cookie echo in flight).  This needs to be done
+	 * after we go to the OPEN state to do the correct asconf
+	 * processing. else, make sure we have the correct addresses in our
+	 * lists
+	 */
+
+	/* warning, we re-use sin, sin6, sa_store here! */
+	/* pull in local_address (our "from" address) */
+	if (cookie->laddr_type == SCTP_IPV4_ADDRESS) {
+		/* source addr is IPv4 */
+		sin = (struct sockaddr_in *)initack_src;
+		memset(sin, 0, sizeof(*sin));
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(struct sockaddr_in);
+		sin->sin_addr.s_addr = cookie->laddress[0];
+	} else if (cookie->laddr_type == SCTP_IPV6_ADDRESS) {
+		/* source addr is IPv6 */
+		sin6 = (struct sockaddr_in6 *)initack_src;
+		memset(sin6, 0, sizeof(*sin6));
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_len = sizeof(struct sockaddr_in6);
+		sin6->sin6_scope_id = cookie->scope_id;
+		memcpy(&sin6->sin6_addr, cookie->laddress,
+		    sizeof(sin6->sin6_addr));
+	} else {
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+#endif
+		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_19);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		return (NULL);
+	}
+
+	/* set up to notify upper layer */
+	*notification = SCTP_NOTIFY_ASSOC_UP;
+	if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
+	    (inp->sctp_socket->so_qlimit == 0)) {
+		/*
+		 * This is an endpoint that called connect() how it got a
+		 * cookie that is NEW is a bit of a mystery. It must be that
+		 * the INIT was sent, but before it got there.. a complete
+		 * INIT/INIT-ACK/COOKIE arrived. But of course then it
+		 * should have went to the other code.. not here.. oh well..
+		 * a bit of protection is worth having..
+		 */
+		stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+			SCTP_SOCKET_UNLOCK(so, 1);
+			return (NULL);
+		}
+#endif
+		soisconnected(stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+	} else if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
+	    (inp->sctp_socket->so_qlimit)) {
+		/*
+		 * We don't want to do anything with this one. Since it is
+		 * the listening guy. The timer will get started for
+		 * accepted connections in the caller.
+		 */
+		;
+	}
+	/* since we did not send a HB make sure we don't double things */
+	if ((netp) && (*netp))
+		(*netp)->hb_responded = 1;
+
+	if (stcb->asoc.sctp_autoclose_ticks &&
+	    sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) {
+		sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE, inp, stcb, NULL);
+	}
+	/* calculate the RTT */
+	(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
+	if ((netp) && (*netp)) {
+		(*netp)->RTO = sctp_calculate_rto(stcb, asoc, *netp,
+		    &cookie->time_entered, sctp_align_unsafe_makecopy);
+	}
+	/* respond with a COOKIE-ACK */
+	sctp_send_cookie_ack(stcb);
+
+	/*
+	 * check the address lists for any ASCONFs that need to be sent
+	 * AFTER the cookie-ack is sent
+	 */
+	sctp_check_address_list(stcb, m,
+	    initack_offset + sizeof(struct sctp_init_ack_chunk),
+	    initack_limit - (initack_offset + sizeof(struct sctp_init_ack_chunk)),
+	    initack_src, cookie->local_scope, cookie->site_scope,
+	    cookie->ipv4_scope, cookie->loopback_scope);
+
+
+	return (stcb);
+}
+
+/*
+ * CODE LIKE THIS NEEDS TO RUN IF the peer supports the NAT extension, i.e
+ * we NEED to make sure we are not already using the vtag. If so we
+ * need to send back an ABORT-TRY-AGAIN-WITH-NEW-TAG No middle box bit!
+	head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(tag,
+							    SCTP_BASE_INFO(hashasocmark))];
+	LIST_FOREACH(stcb, head, sctp_asocs) {
+	        if ((stcb->asoc.my_vtag == tag) && (stcb->rport == rport) && (inp == stcb->sctp_ep))  {
+		       -- SEND ABORT - TRY AGAIN --
+		}
+	}
+*/
+
+/*
+ * handles a COOKIE-ECHO message stcb: modified to either a new or left as
+ * existing (non-NULL) TCB
+ */
+static struct mbuf *
+sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
+    struct sctphdr *sh, struct sctp_cookie_echo_chunk *cp,
+    struct sctp_inpcb **inp_p, struct sctp_tcb **stcb, struct sctp_nets **netp,
+    int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
+    struct sctp_tcb **locked_tcb, uint32_t vrf_id, uint16_t port)
+{
+	struct sctp_state_cookie *cookie;
+	struct sockaddr_in6 sin6;
+	struct sockaddr_in sin;
+	struct sctp_tcb *l_stcb = *stcb;
+	struct sctp_inpcb *l_inp;
+	struct sockaddr *to;
+	sctp_assoc_t sac_restart_id;
+	struct sctp_pcb *ep;
+	struct mbuf *m_sig;
+	uint8_t calc_sig[SCTP_SIGNATURE_SIZE], tmp_sig[SCTP_SIGNATURE_SIZE];
+	uint8_t *sig;
+	uint8_t cookie_ok = 0;
+	unsigned int size_of_pkt, sig_offset, cookie_offset;
+	unsigned int cookie_len;
+	struct timeval now;
+	struct timeval time_expires;
+	struct sockaddr_storage dest_store;
+	struct sockaddr *localep_sa = (struct sockaddr *)&dest_store;
+	struct ip *iph;
+	int notification = 0;
+	struct sctp_nets *netl;
+	int had_a_existing_tcb = 0;
+
+	SCTPDBG(SCTP_DEBUG_INPUT2,
+	    "sctp_handle_cookie: handling COOKIE-ECHO\n");
+
+	if (inp_p == NULL) {
+		return (NULL);
+	}
+	/* First get the destination address setup too. */
+	iph = mtod(m, struct ip *);
+	switch (iph->ip_v) {
+	case IPVERSION:
+		{
+			/* its IPv4 */
+			struct sockaddr_in *lsin;
+
+			lsin = (struct sockaddr_in *)(localep_sa);
+			memset(lsin, 0, sizeof(*lsin));
+			lsin->sin_family = AF_INET;
+			lsin->sin_len = sizeof(*lsin);
+			lsin->sin_port = sh->dest_port;
+			lsin->sin_addr.s_addr = iph->ip_dst.s_addr;
+			size_of_pkt = SCTP_GET_IPV4_LENGTH(iph);
+			break;
+		}
+#ifdef INET6
+	case IPV6_VERSION >> 4:
+		{
+			/* its IPv6 */
+			struct ip6_hdr *ip6;
+			struct sockaddr_in6 *lsin6;
+
+			lsin6 = (struct sockaddr_in6 *)(localep_sa);
+			memset(lsin6, 0, sizeof(*lsin6));
+			lsin6->sin6_family = AF_INET6;
+			lsin6->sin6_len = sizeof(struct sockaddr_in6);
+			ip6 = mtod(m, struct ip6_hdr *);
+			lsin6->sin6_port = sh->dest_port;
+			lsin6->sin6_addr = ip6->ip6_dst;
+			size_of_pkt = SCTP_GET_IPV6_LENGTH(ip6) + iphlen;
+			break;
+		}
+#endif
+	default:
+		return (NULL);
+	}
+
+	cookie = &cp->cookie;
+	cookie_offset = offset + sizeof(struct sctp_chunkhdr);
+	cookie_len = ntohs(cp->ch.chunk_length);
+
+	if ((cookie->peerport != sh->src_port) &&
+	    (cookie->myport != sh->dest_port) &&
+	    (cookie->my_vtag != sh->v_tag)) {
+		/*
+		 * invalid ports or bad tag.  Note that we always leave the
+		 * v_tag in the header in network order and when we stored
+		 * it in the my_vtag slot we also left it in network order.
+		 * This maintains the match even though it may be in the
+		 * opposite byte order of the machine :->
+		 */
+		return (NULL);
+	}
+	if (cookie_len > size_of_pkt ||
+	    cookie_len < sizeof(struct sctp_cookie_echo_chunk) +
+	    sizeof(struct sctp_init_chunk) +
+	    sizeof(struct sctp_init_ack_chunk) + SCTP_SIGNATURE_SIZE) {
+		/* cookie too long!  or too small */
+		return (NULL);
+	}
+	/*
+	 * split off the signature into its own mbuf (since it should not be
+	 * calculated in the sctp_hmac_m() call).
+	 */
+	sig_offset = offset + cookie_len - SCTP_SIGNATURE_SIZE;
+	if (sig_offset > size_of_pkt) {
+		/* packet not correct size! */
+		/* XXX this may already be accounted for earlier... */
+		return (NULL);
+	}
+	m_sig = m_split(m, sig_offset, M_DONTWAIT);
+	if (m_sig == NULL) {
+		/* out of memory or ?? */
+		return (NULL);
+	}
+#ifdef SCTP_MBUF_LOGGING
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
+		struct mbuf *mat;
+
+		mat = m_sig;
+		while (mat) {
+			if (SCTP_BUF_IS_EXTENDED(mat)) {
+				sctp_log_mb(mat, SCTP_MBUF_SPLIT);
+			}
+			mat = SCTP_BUF_NEXT(mat);
+		}
+	}
+#endif
+
+	/*
+	 * compute the signature/digest for the cookie
+	 */
+	ep = &(*inp_p)->sctp_ep;
+	l_inp = *inp_p;
+	if (l_stcb) {
+		SCTP_TCB_UNLOCK(l_stcb);
+	}
+	SCTP_INP_RLOCK(l_inp);
+	if (l_stcb) {
+		SCTP_TCB_LOCK(l_stcb);
+	}
+	/* which cookie is it? */
+	if ((cookie->time_entered.tv_sec < (long)ep->time_of_secret_change) &&
+	    (ep->current_secret_number != ep->last_secret_number)) {
+		/* it's the old cookie */
+		(void)sctp_hmac_m(SCTP_HMAC,
+		    (uint8_t *) ep->secret_key[(int)ep->last_secret_number],
+		    SCTP_SECRET_SIZE, m, cookie_offset, calc_sig, 0);
+	} else {
+		/* it's the current cookie */
+		(void)sctp_hmac_m(SCTP_HMAC,
+		    (uint8_t *) ep->secret_key[(int)ep->current_secret_number],
+		    SCTP_SECRET_SIZE, m, cookie_offset, calc_sig, 0);
+	}
+	/* get the signature */
+	SCTP_INP_RUNLOCK(l_inp);
+	sig = (uint8_t *) sctp_m_getptr(m_sig, 0, SCTP_SIGNATURE_SIZE, (uint8_t *) & tmp_sig);
+	if (sig == NULL) {
+		/* couldn't find signature */
+		sctp_m_freem(m_sig);
+		return (NULL);
+	}
+	/* compare the received digest with the computed digest */
+	if (memcmp(calc_sig, sig, SCTP_SIGNATURE_SIZE) != 0) {
+		/* try the old cookie? */
+		if ((cookie->time_entered.tv_sec == (long)ep->time_of_secret_change) &&
+		    (ep->current_secret_number != ep->last_secret_number)) {
+			/* compute digest with old */
+			(void)sctp_hmac_m(SCTP_HMAC,
+			    (uint8_t *) ep->secret_key[(int)ep->last_secret_number],
+			    SCTP_SECRET_SIZE, m, cookie_offset, calc_sig, 0);
+			/* compare */
+			if (memcmp(calc_sig, sig, SCTP_SIGNATURE_SIZE) == 0)
+				cookie_ok = 1;
+		}
+	} else {
+		cookie_ok = 1;
+	}
+
+	/*
+	 * Now before we continue we must reconstruct our mbuf so that
+	 * normal processing of any other chunks will work.
+	 */
+	{
+		struct mbuf *m_at;
+
+		m_at = m;
+		while (SCTP_BUF_NEXT(m_at) != NULL) {
+			m_at = SCTP_BUF_NEXT(m_at);
+		}
+		SCTP_BUF_NEXT(m_at) = m_sig;
+	}
+
+	if (cookie_ok == 0) {
+		SCTPDBG(SCTP_DEBUG_INPUT2, "handle_cookie_echo: cookie signature validation failed!\n");
+		SCTPDBG(SCTP_DEBUG_INPUT2,
+		    "offset = %u, cookie_offset = %u, sig_offset = %u\n",
+		    (uint32_t) offset, cookie_offset, sig_offset);
+		return (NULL);
+	}
+	/*
+	 * check the cookie timestamps to be sure it's not stale
+	 */
+	(void)SCTP_GETTIME_TIMEVAL(&now);
+	/* Expire time is in Ticks, so we convert to seconds */
+	time_expires.tv_sec = cookie->time_entered.tv_sec + TICKS_TO_SEC(cookie->cookie_life);
+	time_expires.tv_usec = cookie->time_entered.tv_usec;
+	/*
+	 * TODO sctp_constants.h needs alternative time macros when _KERNEL
+	 * is undefined.
+	 */
+	if (timevalcmp(&now, &time_expires, >)) {
+		/* cookie is stale! */
+		struct mbuf *op_err;
+		struct sctp_stale_cookie_msg *scm;
+		uint32_t tim;
+
+		op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_stale_cookie_msg),
+		    0, M_DONTWAIT, 1, MT_DATA);
+		if (op_err == NULL) {
+			/* FOOBAR */
+			return (NULL);
+		}
+		/* Set the len */
+		SCTP_BUF_LEN(op_err) = sizeof(struct sctp_stale_cookie_msg);
+		scm = mtod(op_err, struct sctp_stale_cookie_msg *);
+		scm->ph.param_type = htons(SCTP_CAUSE_STALE_COOKIE);
+		scm->ph.param_length = htons((sizeof(struct sctp_paramhdr) +
+		    (sizeof(uint32_t))));
+		/* seconds to usec */
+		tim = (now.tv_sec - time_expires.tv_sec) * 1000000;
+		/* add in usec */
+		if (tim == 0)
+			tim = now.tv_usec - cookie->time_entered.tv_usec;
+		scm->time_usec = htonl(tim);
+		sctp_send_operr_to(m, iphlen, op_err, cookie->peers_vtag,
+		    vrf_id, port);
+		return (NULL);
+	}
+	/*
+	 * Now we must see with the lookup address if we have an existing
+	 * asoc. This will only happen if we were in the COOKIE-WAIT state
+	 * and a INIT collided with us and somewhere the peer sent the
+	 * cookie on another address besides the single address our assoc
+	 * had for him. In this case we will have one of the tie-tags set at
+	 * least AND the address field in the cookie can be used to look it
+	 * up.
+	 */
+	to = NULL;
+	if (cookie->addr_type == SCTP_IPV6_ADDRESS) {
+		memset(&sin6, 0, sizeof(sin6));
+		sin6.sin6_family = AF_INET6;
+		sin6.sin6_len = sizeof(sin6);
+		sin6.sin6_port = sh->src_port;
+		sin6.sin6_scope_id = cookie->scope_id;
+		memcpy(&sin6.sin6_addr.s6_addr, cookie->address,
+		    sizeof(sin6.sin6_addr.s6_addr));
+		to = (struct sockaddr *)&sin6;
+	} else if (cookie->addr_type == SCTP_IPV4_ADDRESS) {
+		memset(&sin, 0, sizeof(sin));
+		sin.sin_family = AF_INET;
+		sin.sin_len = sizeof(sin);
+		sin.sin_port = sh->src_port;
+		sin.sin_addr.s_addr = cookie->address[0];
+		to = (struct sockaddr *)&sin;
+	} else {
+		/* This should not happen */
+		return (NULL);
+	}
+	if ((*stcb == NULL) && to) {
+		/* Yep, lets check */
+		*stcb = sctp_findassociation_ep_addr(inp_p, to, netp, localep_sa, NULL);
+		if (*stcb == NULL) {
+			/*
+			 * We should have only got back the same inp. If we
+			 * got back a different ep we have a problem. The
+			 * original findep got back l_inp and now
+			 */
+			if (l_inp != *inp_p) {
+				SCTP_PRINTF("Bad problem find_ep got a diff inp then special_locate?\n");
+			}
+		} else {
+			if (*locked_tcb == NULL) {
+				/*
+				 * In this case we found the assoc only
+				 * after we locked the create lock. This
+				 * means we are in a colliding case and we
+				 * must make sure that we unlock the tcb if
+				 * its one of the cases where we throw away
+				 * the incoming packets.
+				 */
+				*locked_tcb = *stcb;
+
+				/*
+				 * We must also increment the inp ref count
+				 * since the ref_count flags was set when we
+				 * did not find the TCB, now we found it
+				 * which reduces the refcount.. we must
+				 * raise it back out to balance it all :-)
+				 */
+				SCTP_INP_INCR_REF((*stcb)->sctp_ep);
+				if ((*stcb)->sctp_ep != l_inp) {
+					SCTP_PRINTF("Huh? ep:%p diff then l_inp:%p?\n",
+					    (*stcb)->sctp_ep, l_inp);
+				}
+			}
+		}
+	}
+	if (to == NULL) {
+		return (NULL);
+	}
+	cookie_len -= SCTP_SIGNATURE_SIZE;
+	if (*stcb == NULL) {
+		/* this is the "normal" case... get a new TCB */
+		*stcb = sctp_process_cookie_new(m, iphlen, offset, sh, cookie,
+		    cookie_len, *inp_p, netp, to, &notification,
+		    auth_skipped, auth_offset, auth_len, vrf_id, port);
+	} else {
+		/* this is abnormal... cookie-echo on existing TCB */
+		had_a_existing_tcb = 1;
+		*stcb = sctp_process_cookie_existing(m, iphlen, offset, sh,
+		    cookie, cookie_len, *inp_p, *stcb, netp, to,
+		    &notification, &sac_restart_id, vrf_id, auth_skipped, auth_offset, auth_len, port);
+	}
+
+	if (*stcb == NULL) {
+		/* still no TCB... must be bad cookie-echo */
+		return (NULL);
+	}
+	/*
+	 * Ok, we built an association so confirm the address we sent the
+	 * INIT-ACK to.
+	 */
+	netl = sctp_findnet(*stcb, to);
+	/*
+	 * This code should in theory NOT run but
+	 */
+	if (netl == NULL) {
+		/* TSNH! Huh, why do I need to add this address here? */
+		int ret;
+
+		ret = sctp_add_remote_addr(*stcb, to, SCTP_DONOT_SETSCOPE,
+		    SCTP_IN_COOKIE_PROC);
+		netl = sctp_findnet(*stcb, to);
+	}
+	if (netl) {
+		if (netl->dest_state & SCTP_ADDR_UNCONFIRMED) {
+			netl->dest_state &= ~SCTP_ADDR_UNCONFIRMED;
+			(void)sctp_set_primary_addr((*stcb), (struct sockaddr *)NULL,
+			    netl);
+			sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
+			    (*stcb), 0, (void *)netl, SCTP_SO_NOT_LOCKED);
+		}
+	}
+	if (*stcb) {
+		sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, *inp_p,
+		    *stcb, NULL);
+	}
+	if ((*inp_p)->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
+		if (!had_a_existing_tcb ||
+		    (((*inp_p)->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0)) {
+			/*
+			 * If we have a NEW cookie or the connect never
+			 * reached the connected state during collision we
+			 * must do the TCP accept thing.
+			 */
+			struct socket *so, *oso;
+			struct sctp_inpcb *inp;
+
+			if (notification == SCTP_NOTIFY_ASSOC_RESTART) {
+				/*
+				 * For a restart we will keep the same
+				 * socket, no need to do anything. I THINK!!
+				 */
+				sctp_ulp_notify(notification, *stcb, 0, (void *)&sac_restart_id, SCTP_SO_NOT_LOCKED);
+				return (m);
+			}
+			oso = (*inp_p)->sctp_socket;
+			atomic_add_int(&(*stcb)->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK((*stcb));
+			so = sonewconn(oso, 0
+			    );
+			SCTP_TCB_LOCK((*stcb));
+			atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
+
+			if (so == NULL) {
+				struct mbuf *op_err;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				struct socket *pcb_so;
+
+#endif
+				/* Too many sockets */
+				SCTPDBG(SCTP_DEBUG_INPUT1, "process_cookie_new: no room for another socket!\n");
+				op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
+				sctp_abort_association(*inp_p, NULL, m, iphlen,
+				    sh, op_err, vrf_id, port);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				pcb_so = SCTP_INP_SO(*inp_p);
+				atomic_add_int(&(*stcb)->asoc.refcnt, 1);
+				SCTP_TCB_UNLOCK((*stcb));
+				SCTP_SOCKET_LOCK(pcb_so, 1);
+				SCTP_TCB_LOCK((*stcb));
+				atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
+#endif
+				(void)sctp_free_assoc(*inp_p, *stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_20);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				SCTP_SOCKET_UNLOCK(pcb_so, 1);
+#endif
+				return (NULL);
+			}
+			inp = (struct sctp_inpcb *)so->so_pcb;
+			SCTP_INP_INCR_REF(inp);
+			/*
+			 * We add the unbound flag here so that if we get an
+			 * soabort() before we get the move_pcb done, we
+			 * will properly cleanup.
+			 */
+			inp->sctp_flags = (SCTP_PCB_FLAGS_TCPTYPE |
+			    SCTP_PCB_FLAGS_CONNECTED |
+			    SCTP_PCB_FLAGS_IN_TCPPOOL |
+			    SCTP_PCB_FLAGS_UNBOUND |
+			    (SCTP_PCB_COPY_FLAGS & (*inp_p)->sctp_flags) |
+			    SCTP_PCB_FLAGS_DONT_WAKE);
+			inp->sctp_features = (*inp_p)->sctp_features;
+			inp->sctp_mobility_features = (*inp_p)->sctp_mobility_features;
+			inp->sctp_socket = so;
+			inp->sctp_frag_point = (*inp_p)->sctp_frag_point;
+			inp->sctp_cmt_on_off = (*inp_p)->sctp_cmt_on_off;
+			inp->partial_delivery_point = (*inp_p)->partial_delivery_point;
+			inp->sctp_context = (*inp_p)->sctp_context;
+			inp->inp_starting_point_for_iterator = NULL;
+			/*
+			 * copy in the authentication parameters from the
+			 * original endpoint
+			 */
+			if (inp->sctp_ep.local_hmacs)
+				sctp_free_hmaclist(inp->sctp_ep.local_hmacs);
+			inp->sctp_ep.local_hmacs =
+			    sctp_copy_hmaclist((*inp_p)->sctp_ep.local_hmacs);
+			if (inp->sctp_ep.local_auth_chunks)
+				sctp_free_chunklist(inp->sctp_ep.local_auth_chunks);
+			inp->sctp_ep.local_auth_chunks =
+			    sctp_copy_chunklist((*inp_p)->sctp_ep.local_auth_chunks);
+
+			/*
+			 * Now we must move it from one hash table to
+			 * another and get the tcb in the right place.
+			 */
+
+			/*
+			 * This is where the one-2-one socket is put into
+			 * the accept state waiting for the accept!
+			 */
+			if (*stcb) {
+				(*stcb)->asoc.state |= SCTP_STATE_IN_ACCEPT_QUEUE;
+			}
+			sctp_move_pcb_and_assoc(*inp_p, inp, *stcb);
+
+			atomic_add_int(&(*stcb)->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK((*stcb));
+
+			sctp_pull_off_control_to_new_inp((*inp_p), inp, *stcb,
+			    0);
+			SCTP_TCB_LOCK((*stcb));
+			atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
+
+
+			/*
+			 * now we must check to see if we were aborted while
+			 * the move was going on and the lock/unlock
+			 * happened.
+			 */
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+				/*
+				 * yep it was, we leave the assoc attached
+				 * to the socket since the sctp_inpcb_free()
+				 * call will send an abort for us.
+				 */
+				SCTP_INP_DECR_REF(inp);
+				return (NULL);
+			}
+			SCTP_INP_DECR_REF(inp);
+			/* Switch over to the new guy */
+			*inp_p = inp;
+			sctp_ulp_notify(notification, *stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
+
+			/*
+			 * Pull it from the incomplete queue and wake the
+			 * guy
+			 */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			atomic_add_int(&(*stcb)->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK((*stcb));
+			SCTP_SOCKET_LOCK(so, 1);
+#endif
+			soisconnected(so);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			SCTP_TCB_LOCK((*stcb));
+			atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
+			SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+			return (m);
+		}
+	}
+	if ((notification) && ((*inp_p)->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE)) {
+		sctp_ulp_notify(notification, *stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
+	}
+	return (m);
+}
+
+static void
+sctp_handle_cookie_ack(struct sctp_cookie_ack_chunk *cp,
+    struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	/* cp must not be used, others call this without a c-ack :-) */
+	struct sctp_association *asoc;
+
+	SCTPDBG(SCTP_DEBUG_INPUT2,
+	    "sctp_handle_cookie_ack: handling COOKIE-ACK\n");
+	if (stcb == NULL)
+		return;
+
+	asoc = &stcb->asoc;
+
+	sctp_stop_all_cookie_timers(stcb);
+	/* process according to association state */
+	if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED) {
+		/* state change only needed when I am in right state */
+		SCTPDBG(SCTP_DEBUG_INPUT2, "moving to OPEN state\n");
+		SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+		if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
+			sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+			    stcb->sctp_ep, stcb, asoc->primary_destination);
+
+		}
+		/* update RTO */
+		SCTP_STAT_INCR_COUNTER32(sctps_activeestab);
+		SCTP_STAT_INCR_GAUGE32(sctps_currestab);
+		if (asoc->overall_error_count == 0) {
+			net->RTO = sctp_calculate_rto(stcb, asoc, net,
+			    &asoc->time_entered, sctp_align_safe_nocopy);
+		}
+		(void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered);
+		sctp_ulp_notify(SCTP_NOTIFY_ASSOC_UP, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
+		if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+		    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			struct socket *so;
+
+#endif
+			stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			so = SCTP_INP_SO(stcb->sctp_ep);
+			atomic_add_int(&stcb->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_SOCKET_LOCK(so, 1);
+			SCTP_TCB_LOCK(stcb);
+			atomic_subtract_int(&stcb->asoc.refcnt, 1);
+			if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+				SCTP_SOCKET_UNLOCK(so, 1);
+				return;
+			}
+#endif
+			soisconnected(stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+		}
+		sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep,
+		    stcb, net);
+		/*
+		 * since we did not send a HB make sure we don't double
+		 * things
+		 */
+		net->hb_responded = 1;
+
+		if (stcb->asoc.sctp_autoclose_ticks &&
+		    sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_AUTOCLOSE)) {
+			sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE,
+			    stcb->sctp_ep, stcb, NULL);
+		}
+		/*
+		 * send ASCONF if parameters are pending and ASCONFs are
+		 * allowed (eg. addresses changed when init/cookie echo were
+		 * in flight)
+		 */
+		if ((sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_DO_ASCONF)) &&
+		    (stcb->asoc.peer_supports_asconf) &&
+		    (!TAILQ_EMPTY(&stcb->asoc.asconf_queue))) {
+#ifdef SCTP_TIMER_BASED_ASCONF
+			sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
+			    stcb->sctp_ep, stcb,
+			    stcb->asoc.primary_destination);
+#else
+			sctp_send_asconf(stcb, stcb->asoc.primary_destination,
+			    SCTP_ADDR_NOT_LOCKED);
+#endif
+		}
+	}
+	/* Toss the cookie if I can */
+	sctp_toss_old_cookies(stcb, asoc);
+	if (!TAILQ_EMPTY(&asoc->sent_queue)) {
+		/* Restart the timer if we have pending data */
+		struct sctp_tmit_chunk *chk;
+
+		chk = TAILQ_FIRST(&asoc->sent_queue);
+		if (chk) {
+			sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+			    stcb, chk->whoTo);
+		}
+	}
+}
+
+static void
+sctp_handle_ecn_echo(struct sctp_ecne_chunk *cp,
+    struct sctp_tcb *stcb)
+{
+	struct sctp_nets *net;
+	struct sctp_tmit_chunk *lchk;
+	uint32_t tsn;
+
+	if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_ecne_chunk)) {
+		return;
+	}
+	SCTP_STAT_INCR(sctps_recvecne);
+	tsn = ntohl(cp->tsn);
+	/* ECN Nonce stuff: need a resync and disable the nonce sum check */
+	/* Also we make sure we disable the nonce_wait */
+	lchk = TAILQ_FIRST(&stcb->asoc.send_queue);
+	if (lchk == NULL) {
+		stcb->asoc.nonce_resync_tsn = stcb->asoc.sending_seq;
+	} else {
+		stcb->asoc.nonce_resync_tsn = lchk->rec.data.TSN_seq;
+	}
+	stcb->asoc.nonce_wait_for_ecne = 0;
+	stcb->asoc.nonce_sum_check = 0;
+
+	/* Find where it was sent, if possible */
+	net = NULL;
+	lchk = TAILQ_FIRST(&stcb->asoc.sent_queue);
+	while (lchk) {
+		if (lchk->rec.data.TSN_seq == tsn) {
+			net = lchk->whoTo;
+			break;
+		}
+		if (compare_with_wrap(lchk->rec.data.TSN_seq, tsn, MAX_SEQ))
+			break;
+		lchk = TAILQ_NEXT(lchk, sctp_next);
+	}
+	if (net == NULL)
+		/* default is we use the primary */
+		net = stcb->asoc.primary_destination;
+
+	if (compare_with_wrap(tsn, stcb->asoc.last_cwr_tsn, MAX_TSN)) {
+		/*
+		 * JRS - Use the congestion control given in the pluggable
+		 * CC module
+		 */
+		stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo(stcb, net);
+		/*
+		 * we reduce once every RTT. So we will only lower cwnd at
+		 * the next sending seq i.e. the resync_tsn.
+		 */
+		stcb->asoc.last_cwr_tsn = stcb->asoc.nonce_resync_tsn;
+	}
+	/*
+	 * We always send a CWR this way if our previous one was lost our
+	 * peer will get an update, or if it is not time again to reduce we
+	 * still get the cwr to the peer.
+	 */
+	sctp_send_cwr(stcb, net, tsn);
+}
+
+static void
+sctp_handle_ecn_cwr(struct sctp_cwr_chunk *cp, struct sctp_tcb *stcb)
+{
+	/*
+	 * Here we get a CWR from the peer. We must look in the outqueue and
+	 * make sure that we have a covered ECNE in teh control chunk part.
+	 * If so remove it.
+	 */
+	struct sctp_tmit_chunk *chk;
+	struct sctp_ecne_chunk *ecne;
+
+	TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
+		if (chk->rec.chunk_id.id != SCTP_ECN_ECHO) {
+			continue;
+		}
+		/*
+		 * Look for and remove if it is the right TSN. Since there
+		 * is only ONE ECNE on the control queue at any one time we
+		 * don't need to worry about more than one!
+		 */
+		ecne = mtod(chk->data, struct sctp_ecne_chunk *);
+		if (compare_with_wrap(ntohl(cp->tsn), ntohl(ecne->tsn),
+		    MAX_TSN) || (cp->tsn == ecne->tsn)) {
+			/* this covers this ECNE, we can remove it */
+			stcb->asoc.ecn_echo_cnt_onq--;
+			TAILQ_REMOVE(&stcb->asoc.control_send_queue, chk,
+			    sctp_next);
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			stcb->asoc.ctrl_queue_cnt--;
+			sctp_free_a_chunk(stcb, chk);
+			break;
+		}
+	}
+}
+
+static void
+sctp_handle_shutdown_complete(struct sctp_shutdown_complete_chunk *cp,
+    struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	struct sctp_association *asoc;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+#endif
+
+	SCTPDBG(SCTP_DEBUG_INPUT2,
+	    "sctp_handle_shutdown_complete: handling SHUTDOWN-COMPLETE\n");
+	if (stcb == NULL)
+		return;
+
+	asoc = &stcb->asoc;
+	/* process according to association state */
+	if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT) {
+		/* unexpected SHUTDOWN-COMPLETE... so ignore... */
+		SCTPDBG(SCTP_DEBUG_INPUT2,
+		    "sctp_handle_shutdown_complete: not in SCTP_STATE_SHUTDOWN_ACK_SENT --- ignore\n");
+		SCTP_TCB_UNLOCK(stcb);
+		return;
+	}
+	/* notify upper layer protocol */
+	if (stcb->sctp_socket) {
+		sctp_ulp_notify(SCTP_NOTIFY_ASSOC_DOWN, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
+		/* are the queues empty? they should be */
+		if (!TAILQ_EMPTY(&asoc->send_queue) ||
+		    !TAILQ_EMPTY(&asoc->sent_queue) ||
+		    !TAILQ_EMPTY(&asoc->out_wheel)) {
+			sctp_report_all_outbound(stcb, 0, SCTP_SO_NOT_LOCKED);
+		}
+	}
+	/* stop the timer */
+	sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWNACK, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_22);
+	SCTP_STAT_INCR_COUNTER32(sctps_shutdown);
+	/* free the TCB */
+	SCTPDBG(SCTP_DEBUG_INPUT2,
+	    "sctp_handle_shutdown_complete: calls free-asoc\n");
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	so = SCTP_INP_SO(stcb->sctp_ep);
+	atomic_add_int(&stcb->asoc.refcnt, 1);
+	SCTP_TCB_UNLOCK(stcb);
+	SCTP_SOCKET_LOCK(so, 1);
+	SCTP_TCB_LOCK(stcb);
+	atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+	(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_23);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+	return;
+}
+
+static int
+process_chunk_drop(struct sctp_tcb *stcb, struct sctp_chunk_desc *desc,
+    struct sctp_nets *net, uint8_t flg)
+{
+	switch (desc->chunk_type) {
+	case SCTP_DATA:
+		/* find the tsn to resend (possibly */
+		{
+			uint32_t tsn;
+			struct sctp_tmit_chunk *tp1;
+
+			tsn = ntohl(desc->tsn_ifany);
+			tp1 = TAILQ_FIRST(&stcb->asoc.sent_queue);
+			while (tp1) {
+				if (tp1->rec.data.TSN_seq == tsn) {
+					/* found it */
+					break;
+				}
+				if (compare_with_wrap(tp1->rec.data.TSN_seq, tsn,
+				    MAX_TSN)) {
+					/* not found */
+					tp1 = NULL;
+					break;
+				}
+				tp1 = TAILQ_NEXT(tp1, sctp_next);
+			}
+			if (tp1 == NULL) {
+				/*
+				 * Do it the other way , aka without paying
+				 * attention to queue seq order.
+				 */
+				SCTP_STAT_INCR(sctps_pdrpdnfnd);
+				tp1 = TAILQ_FIRST(&stcb->asoc.sent_queue);
+				while (tp1) {
+					if (tp1->rec.data.TSN_seq == tsn) {
+						/* found it */
+						break;
+					}
+					tp1 = TAILQ_NEXT(tp1, sctp_next);
+				}
+			}
+			if (tp1 == NULL) {
+				SCTP_STAT_INCR(sctps_pdrptsnnf);
+			}
+			if ((tp1) && (tp1->sent < SCTP_DATAGRAM_ACKED)) {
+				uint8_t *ddp;
+
+				if (((flg & SCTP_BADCRC) == 0) &&
+				    ((flg & SCTP_FROM_MIDDLE_BOX) == 0)) {
+					return (0);
+				}
+				if ((stcb->asoc.peers_rwnd == 0) &&
+				    ((flg & SCTP_FROM_MIDDLE_BOX) == 0)) {
+					SCTP_STAT_INCR(sctps_pdrpdiwnp);
+					return (0);
+				}
+				if (stcb->asoc.peers_rwnd == 0 &&
+				    (flg & SCTP_FROM_MIDDLE_BOX)) {
+					SCTP_STAT_INCR(sctps_pdrpdizrw);
+					return (0);
+				}
+				ddp = (uint8_t *) (mtod(tp1->data, caddr_t)+
+				    sizeof(struct sctp_data_chunk));
+				{
+					unsigned int iii;
+
+					for (iii = 0; iii < sizeof(desc->data_bytes);
+					    iii++) {
+						if (ddp[iii] != desc->data_bytes[iii]) {
+							SCTP_STAT_INCR(sctps_pdrpbadd);
+							return (-1);
+						}
+					}
+				}
+				/*
+				 * We zero out the nonce so resync not
+				 * needed
+				 */
+				tp1->rec.data.ect_nonce = 0;
+
+				if (tp1->do_rtt) {
+					/*
+					 * this guy had a RTO calculation
+					 * pending on it, cancel it
+					 */
+					tp1->do_rtt = 0;
+				}
+				SCTP_STAT_INCR(sctps_pdrpmark);
+				if (tp1->sent != SCTP_DATAGRAM_RESEND)
+					sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+				/*
+				 * mark it as if we were doing a FR, since
+				 * we will be getting gap ack reports behind
+				 * the info from the router.
+				 */
+				tp1->rec.data.doing_fast_retransmit = 1;
+				/*
+				 * mark the tsn with what sequences can
+				 * cause a new FR.
+				 */
+				if (TAILQ_EMPTY(&stcb->asoc.send_queue)) {
+					tp1->rec.data.fast_retran_tsn = stcb->asoc.sending_seq;
+				} else {
+					tp1->rec.data.fast_retran_tsn = (TAILQ_FIRST(&stcb->asoc.send_queue))->rec.data.TSN_seq;
+				}
+
+				/* restart the timer */
+				sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+				    stcb, tp1->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_24);
+				sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+				    stcb, tp1->whoTo);
+
+				/* fix counts and things */
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
+					sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_PDRP,
+					    tp1->whoTo->flight_size,
+					    tp1->book_size,
+					    (uintptr_t) stcb,
+					    tp1->rec.data.TSN_seq);
+				}
+				if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+					sctp_flight_size_decrease(tp1);
+					sctp_total_flight_decrease(stcb, tp1);
+				}
+				tp1->sent = SCTP_DATAGRAM_RESEND;
+			} {
+				/* audit code */
+				unsigned int audit;
+
+				audit = 0;
+				TAILQ_FOREACH(tp1, &stcb->asoc.sent_queue, sctp_next) {
+					if (tp1->sent == SCTP_DATAGRAM_RESEND)
+						audit++;
+				}
+				TAILQ_FOREACH(tp1, &stcb->asoc.control_send_queue,
+				    sctp_next) {
+					if (tp1->sent == SCTP_DATAGRAM_RESEND)
+						audit++;
+				}
+				if (audit != stcb->asoc.sent_queue_retran_cnt) {
+					SCTP_PRINTF("**Local Audit finds cnt:%d asoc cnt:%d\n",
+					    audit, stcb->asoc.sent_queue_retran_cnt);
+#ifndef SCTP_AUDITING_ENABLED
+					stcb->asoc.sent_queue_retran_cnt = audit;
+#endif
+				}
+			}
+		}
+		break;
+	case SCTP_ASCONF:
+		{
+			struct sctp_tmit_chunk *asconf;
+
+			TAILQ_FOREACH(asconf, &stcb->asoc.control_send_queue,
+			    sctp_next) {
+				if (asconf->rec.chunk_id.id == SCTP_ASCONF) {
+					break;
+				}
+			}
+			if (asconf) {
+				if (asconf->sent != SCTP_DATAGRAM_RESEND)
+					sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+				asconf->sent = SCTP_DATAGRAM_RESEND;
+				asconf->snd_count--;
+			}
+		}
+		break;
+	case SCTP_INITIATION:
+		/* resend the INIT */
+		stcb->asoc.dropped_special_cnt++;
+		if (stcb->asoc.dropped_special_cnt < SCTP_RETRY_DROPPED_THRESH) {
+			/*
+			 * If we can get it in, in a few attempts we do
+			 * this, otherwise we let the timer fire.
+			 */
+			sctp_timer_stop(SCTP_TIMER_TYPE_INIT, stcb->sctp_ep,
+			    stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_25);
+			sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
+		}
+		break;
+	case SCTP_SELECTIVE_ACK:
+	case SCTP_NR_SELECTIVE_ACK:
+		/* resend the sack */
+		sctp_send_sack(stcb);
+		break;
+	case SCTP_HEARTBEAT_REQUEST:
+		/* resend a demand HB */
+		if ((stcb->asoc.overall_error_count + 3) < stcb->asoc.max_send_times) {
+			/*
+			 * Only retransmit if we KNOW we wont destroy the
+			 * tcb
+			 */
+			(void)sctp_send_hb(stcb, 1, net);
+		}
+		break;
+	case SCTP_SHUTDOWN:
+		sctp_send_shutdown(stcb, net);
+		break;
+	case SCTP_SHUTDOWN_ACK:
+		sctp_send_shutdown_ack(stcb, net);
+		break;
+	case SCTP_COOKIE_ECHO:
+		{
+			struct sctp_tmit_chunk *cookie;
+
+			cookie = NULL;
+			TAILQ_FOREACH(cookie, &stcb->asoc.control_send_queue,
+			    sctp_next) {
+				if (cookie->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
+					break;
+				}
+			}
+			if (cookie) {
+				if (cookie->sent != SCTP_DATAGRAM_RESEND)
+					sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+				cookie->sent = SCTP_DATAGRAM_RESEND;
+				sctp_stop_all_cookie_timers(stcb);
+			}
+		}
+		break;
+	case SCTP_COOKIE_ACK:
+		sctp_send_cookie_ack(stcb);
+		break;
+	case SCTP_ASCONF_ACK:
+		/* resend last asconf ack */
+		sctp_send_asconf_ack(stcb);
+		break;
+	case SCTP_FORWARD_CUM_TSN:
+		send_forward_tsn(stcb, &stcb->asoc);
+		break;
+		/* can't do anything with these */
+	case SCTP_PACKET_DROPPED:
+	case SCTP_INITIATION_ACK:	/* this should not happen */
+	case SCTP_HEARTBEAT_ACK:
+	case SCTP_ABORT_ASSOCIATION:
+	case SCTP_OPERATION_ERROR:
+	case SCTP_SHUTDOWN_COMPLETE:
+	case SCTP_ECN_ECHO:
+	case SCTP_ECN_CWR:
+	default:
+		break;
+	}
+	return (0);
+}
+
+void
+sctp_reset_in_stream(struct sctp_tcb *stcb, int number_entries, uint16_t * list)
+{
+	int i;
+	uint16_t temp;
+
+	/*
+	 * We set things to 0xffff since this is the last delivered sequence
+	 * and we will be sending in 0 after the reset.
+	 */
+
+	if (number_entries) {
+		for (i = 0; i < number_entries; i++) {
+			temp = ntohs(list[i]);
+			if (temp >= stcb->asoc.streamincnt) {
+				continue;
+			}
+			stcb->asoc.strmin[temp].last_sequence_delivered = 0xffff;
+		}
+	} else {
+		list = NULL;
+		for (i = 0; i < stcb->asoc.streamincnt; i++) {
+			stcb->asoc.strmin[i].last_sequence_delivered = 0xffff;
+		}
+	}
+	sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_RECV, stcb, number_entries, (void *)list, SCTP_SO_NOT_LOCKED);
+}
+
+static void
+sctp_reset_out_streams(struct sctp_tcb *stcb, int number_entries, uint16_t * list)
+{
+	int i;
+
+	if (number_entries == 0) {
+		for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+			stcb->asoc.strmout[i].next_sequence_sent = 0;
+		}
+	} else if (number_entries) {
+		for (i = 0; i < number_entries; i++) {
+			uint16_t temp;
+
+			temp = ntohs(list[i]);
+			if (temp >= stcb->asoc.streamoutcnt) {
+				/* no such stream */
+				continue;
+			}
+			stcb->asoc.strmout[temp].next_sequence_sent = 0;
+		}
+	}
+	sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_SEND, stcb, number_entries, (void *)list, SCTP_SO_NOT_LOCKED);
+}
+
+
+struct sctp_stream_reset_out_request *
+sctp_find_stream_reset(struct sctp_tcb *stcb, uint32_t seq, struct sctp_tmit_chunk **bchk)
+{
+	struct sctp_association *asoc;
+	struct sctp_stream_reset_out_req *req;
+	struct sctp_stream_reset_out_request *r;
+	struct sctp_tmit_chunk *chk;
+	int len, clen;
+
+	asoc = &stcb->asoc;
+	if (TAILQ_EMPTY(&stcb->asoc.control_send_queue)) {
+		asoc->stream_reset_outstanding = 0;
+		return (NULL);
+	}
+	if (stcb->asoc.str_reset == NULL) {
+		asoc->stream_reset_outstanding = 0;
+		return (NULL);
+	}
+	chk = stcb->asoc.str_reset;
+	if (chk->data == NULL) {
+		return (NULL);
+	}
+	if (bchk) {
+		/* he wants a copy of the chk pointer */
+		*bchk = chk;
+	}
+	clen = chk->send_size;
+	req = mtod(chk->data, struct sctp_stream_reset_out_req *);
+	r = &req->sr_req;
+	if (ntohl(r->request_seq) == seq) {
+		/* found it */
+		return (r);
+	}
+	len = SCTP_SIZE32(ntohs(r->ph.param_length));
+	if (clen > (len + (int)sizeof(struct sctp_chunkhdr))) {
+		/* move to the next one, there can only be a max of two */
+		r = (struct sctp_stream_reset_out_request *)((caddr_t)r + len);
+		if (ntohl(r->request_seq) == seq) {
+			return (r);
+		}
+	}
+	/* that seq is not here */
+	return (NULL);
+}
+
+static void
+sctp_clean_up_stream_reset(struct sctp_tcb *stcb)
+{
+	struct sctp_association *asoc;
+	struct sctp_tmit_chunk *chk = stcb->asoc.str_reset;
+
+	if (stcb->asoc.str_reset == NULL) {
+		return;
+	}
+	asoc = &stcb->asoc;
+
+	sctp_timer_stop(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_26);
+	TAILQ_REMOVE(&asoc->control_send_queue,
+	    chk,
+	    sctp_next);
+	if (chk->data) {
+		sctp_m_freem(chk->data);
+		chk->data = NULL;
+	}
+	asoc->ctrl_queue_cnt--;
+	sctp_free_a_chunk(stcb, chk);
+	/* sa_ignore NO_NULL_CHK */
+	stcb->asoc.str_reset = NULL;
+}
+
+
+static int
+sctp_handle_stream_reset_response(struct sctp_tcb *stcb,
+    uint32_t seq, uint32_t action,
+    struct sctp_stream_reset_response *respin)
+{
+	uint16_t type;
+	int lparm_len;
+	struct sctp_association *asoc = &stcb->asoc;
+	struct sctp_tmit_chunk *chk;
+	struct sctp_stream_reset_out_request *srparam;
+	int number_entries;
+
+	if (asoc->stream_reset_outstanding == 0) {
+		/* duplicate */
+		return (0);
+	}
+	if (seq == stcb->asoc.str_reset_seq_out) {
+		srparam = sctp_find_stream_reset(stcb, seq, &chk);
+		if (srparam) {
+			stcb->asoc.str_reset_seq_out++;
+			type = ntohs(srparam->ph.param_type);
+			lparm_len = ntohs(srparam->ph.param_length);
+			if (type == SCTP_STR_RESET_OUT_REQUEST) {
+				number_entries = (lparm_len - sizeof(struct sctp_stream_reset_out_request)) / sizeof(uint16_t);
+				asoc->stream_reset_out_is_outstanding = 0;
+				if (asoc->stream_reset_outstanding)
+					asoc->stream_reset_outstanding--;
+				if (action == SCTP_STREAM_RESET_PERFORMED) {
+					/* do it */
+					sctp_reset_out_streams(stcb, number_entries, srparam->list_of_streams);
+				} else {
+					sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_OUT, stcb, number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED);
+				}
+			} else if (type == SCTP_STR_RESET_IN_REQUEST) {
+				/* Answered my request */
+				number_entries = (lparm_len - sizeof(struct sctp_stream_reset_in_request)) / sizeof(uint16_t);
+				if (asoc->stream_reset_outstanding)
+					asoc->stream_reset_outstanding--;
+				if (action != SCTP_STREAM_RESET_PERFORMED) {
+					sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_IN, stcb, number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED);
+				}
+			} else if (type == SCTP_STR_RESET_ADD_STREAMS) {
+				/* Ok we now may have more streams */
+				if (asoc->stream_reset_outstanding)
+					asoc->stream_reset_outstanding--;
+				if (action == SCTP_STREAM_RESET_PERFORMED) {
+					/* Put the new streams into effect */
+					stcb->asoc.streamoutcnt = stcb->asoc.strm_realoutsize;
+					sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_ADD_OK, stcb,
+					    (uint32_t) stcb->asoc.streamoutcnt, NULL, SCTP_SO_NOT_LOCKED);
+				} else {
+					sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_ADD_FAIL, stcb,
+					    (uint32_t) stcb->asoc.streamoutcnt, NULL, SCTP_SO_NOT_LOCKED);
+				}
+			} else if (type == SCTP_STR_RESET_TSN_REQUEST) {
+				/**
+				 * a) Adopt the new in tsn.
+				 * b) reset the map
+				 * c) Adopt the new out-tsn
+				 */
+				struct sctp_stream_reset_response_tsn *resp;
+				struct sctp_forward_tsn_chunk fwdtsn;
+				int abort_flag = 0;
+
+				if (respin == NULL) {
+					/* huh ? */
+					return (0);
+				}
+				if (action == SCTP_STREAM_RESET_PERFORMED) {
+					resp = (struct sctp_stream_reset_response_tsn *)respin;
+					asoc->stream_reset_outstanding--;
+					fwdtsn.ch.chunk_length = htons(sizeof(struct sctp_forward_tsn_chunk));
+					fwdtsn.ch.chunk_type = SCTP_FORWARD_CUM_TSN;
+					fwdtsn.new_cumulative_tsn = htonl(ntohl(resp->senders_next_tsn) - 1);
+					sctp_handle_forward_tsn(stcb, &fwdtsn, &abort_flag, NULL, 0);
+					if (abort_flag) {
+						return (1);
+					}
+					stcb->asoc.highest_tsn_inside_map = (ntohl(resp->senders_next_tsn) - 1);
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) {
+						sctp_log_map(0, 7, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
+					}
+					stcb->asoc.tsn_last_delivered = stcb->asoc.cumulative_tsn = stcb->asoc.highest_tsn_inside_map;
+					stcb->asoc.mapping_array_base_tsn = ntohl(resp->senders_next_tsn);
+					memset(stcb->asoc.mapping_array, 0, stcb->asoc.mapping_array_size);
+
+					stcb->asoc.highest_tsn_inside_nr_map = stcb->asoc.highest_tsn_inside_map;
+					memset(stcb->asoc.nr_mapping_array, 0, stcb->asoc.mapping_array_size);
+
+					stcb->asoc.sending_seq = ntohl(resp->receivers_next_tsn);
+					stcb->asoc.last_acked_seq = stcb->asoc.cumulative_tsn;
+
+					sctp_reset_out_streams(stcb, 0, (uint16_t *) NULL);
+					sctp_reset_in_stream(stcb, 0, (uint16_t *) NULL);
+
+				}
+			}
+			/* get rid of the request and get the request flags */
+			if (asoc->stream_reset_outstanding == 0) {
+				sctp_clean_up_stream_reset(stcb);
+			}
+		}
+	}
+	return (0);
+}
+
+static void
+sctp_handle_str_reset_request_in(struct sctp_tcb *stcb,
+    struct sctp_tmit_chunk *chk,
+    struct sctp_stream_reset_in_request *req, int trunc)
+{
+	uint32_t seq;
+	int len, i;
+	int number_entries;
+	uint16_t temp;
+
+	/*
+	 * peer wants me to send a str-reset to him for my outgoing seq's if
+	 * seq_in is right.
+	 */
+	struct sctp_association *asoc = &stcb->asoc;
+
+	seq = ntohl(req->request_seq);
+	if (asoc->str_reset_seq_in == seq) {
+		if (trunc) {
+			/* Can't do it, since they exceeded our buffer size  */
+			asoc->last_reset_action[1] = asoc->last_reset_action[0];
+			asoc->last_reset_action[0] = SCTP_STREAM_RESET_DENIED;
+			sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
+		} else if (stcb->asoc.stream_reset_out_is_outstanding == 0) {
+			len = ntohs(req->ph.param_length);
+			number_entries = ((len - sizeof(struct sctp_stream_reset_in_request)) / sizeof(uint16_t));
+			for (i = 0; i < number_entries; i++) {
+				temp = ntohs(req->list_of_streams[i]);
+				req->list_of_streams[i] = temp;
+			}
+			/* move the reset action back one */
+			asoc->last_reset_action[1] = asoc->last_reset_action[0];
+			asoc->last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED;
+			sctp_add_stream_reset_out(chk, number_entries, req->list_of_streams,
+			    asoc->str_reset_seq_out,
+			    seq, (asoc->sending_seq - 1));
+			asoc->stream_reset_out_is_outstanding = 1;
+			asoc->str_reset = chk;
+			sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo);
+			stcb->asoc.stream_reset_outstanding++;
+		} else {
+			/* Can't do it, since we have sent one out */
+			asoc->last_reset_action[1] = asoc->last_reset_action[0];
+			asoc->last_reset_action[0] = SCTP_STREAM_RESET_TRY_LATER;
+			sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
+		}
+		asoc->str_reset_seq_in++;
+	} else if (asoc->str_reset_seq_in - 1 == seq) {
+		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
+	} else if (asoc->str_reset_seq_in - 2 == seq) {
+		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]);
+	} else {
+		sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_BAD_SEQNO);
+	}
+}
+
+static int
+sctp_handle_str_reset_request_tsn(struct sctp_tcb *stcb,
+    struct sctp_tmit_chunk *chk,
+    struct sctp_stream_reset_tsn_request *req)
+{
+	/* reset all in and out and update the tsn */
+	/*
+	 * A) reset my str-seq's on in and out. B) Select a receive next,
+	 * and set cum-ack to it. Also process this selected number as a
+	 * fwd-tsn as well. C) set in the response my next sending seq.
+	 */
+	struct sctp_forward_tsn_chunk fwdtsn;
+	struct sctp_association *asoc = &stcb->asoc;
+	int abort_flag = 0;
+	uint32_t seq;
+
+	seq = ntohl(req->request_seq);
+	if (asoc->str_reset_seq_in == seq) {
+		fwdtsn.ch.chunk_length = htons(sizeof(struct sctp_forward_tsn_chunk));
+		fwdtsn.ch.chunk_type = SCTP_FORWARD_CUM_TSN;
+		fwdtsn.ch.chunk_flags = 0;
+		fwdtsn.new_cumulative_tsn = htonl(stcb->asoc.highest_tsn_inside_map + 1);
+		sctp_handle_forward_tsn(stcb, &fwdtsn, &abort_flag, NULL, 0);
+		if (abort_flag) {
+			return (1);
+		}
+		stcb->asoc.highest_tsn_inside_map += SCTP_STREAM_RESET_TSN_DELTA;
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) {
+			sctp_log_map(0, 10, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
+		}
+		stcb->asoc.tsn_last_delivered = stcb->asoc.cumulative_tsn = stcb->asoc.highest_tsn_inside_map;
+		stcb->asoc.mapping_array_base_tsn = stcb->asoc.highest_tsn_inside_map + 1;
+		memset(stcb->asoc.mapping_array, 0, stcb->asoc.mapping_array_size);
+		stcb->asoc.highest_tsn_inside_nr_map = stcb->asoc.highest_tsn_inside_map;
+		memset(stcb->asoc.nr_mapping_array, 0, stcb->asoc.mapping_array_size);
+		atomic_add_int(&stcb->asoc.sending_seq, 1);
+		/* save off historical data for retrans */
+		stcb->asoc.last_sending_seq[1] = stcb->asoc.last_sending_seq[0];
+		stcb->asoc.last_sending_seq[0] = stcb->asoc.sending_seq;
+		stcb->asoc.last_base_tsnsent[1] = stcb->asoc.last_base_tsnsent[0];
+		stcb->asoc.last_base_tsnsent[0] = stcb->asoc.mapping_array_base_tsn;
+
+		sctp_add_stream_reset_result_tsn(chk,
+		    ntohl(req->request_seq),
+		    SCTP_STREAM_RESET_PERFORMED,
+		    stcb->asoc.sending_seq,
+		    stcb->asoc.mapping_array_base_tsn);
+		sctp_reset_out_streams(stcb, 0, (uint16_t *) NULL);
+		sctp_reset_in_stream(stcb, 0, (uint16_t *) NULL);
+		stcb->asoc.last_reset_action[1] = stcb->asoc.last_reset_action[0];
+		stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED;
+
+		asoc->str_reset_seq_in++;
+	} else if (asoc->str_reset_seq_in - 1 == seq) {
+		sctp_add_stream_reset_result_tsn(chk, seq, asoc->last_reset_action[0],
+		    stcb->asoc.last_sending_seq[0],
+		    stcb->asoc.last_base_tsnsent[0]
+		    );
+	} else if (asoc->str_reset_seq_in - 2 == seq) {
+		sctp_add_stream_reset_result_tsn(chk, seq, asoc->last_reset_action[1],
+		    stcb->asoc.last_sending_seq[1],
+		    stcb->asoc.last_base_tsnsent[1]
+		    );
+	} else {
+		sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_BAD_SEQNO);
+	}
+	return (0);
+}
+
+static void
+sctp_handle_str_reset_request_out(struct sctp_tcb *stcb,
+    struct sctp_tmit_chunk *chk,
+    struct sctp_stream_reset_out_request *req, int trunc)
+{
+	uint32_t seq, tsn;
+	int number_entries, len;
+	struct sctp_association *asoc = &stcb->asoc;
+
+	seq = ntohl(req->request_seq);
+
+	/* now if its not a duplicate we process it */
+	if (asoc->str_reset_seq_in == seq) {
+		len = ntohs(req->ph.param_length);
+		number_entries = ((len - sizeof(struct sctp_stream_reset_out_request)) / sizeof(uint16_t));
+		/*
+		 * the sender is resetting, handle the list issue.. we must
+		 * a) verify if we can do the reset, if so no problem b) If
+		 * we can't do the reset we must copy the request. c) queue
+		 * it, and setup the data in processor to trigger it off
+		 * when needed and dequeue all the queued data.
+		 */
+		tsn = ntohl(req->send_reset_at_tsn);
+
+		/* move the reset action back one */
+		asoc->last_reset_action[1] = asoc->last_reset_action[0];
+		if (trunc) {
+			sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_DENIED);
+			asoc->last_reset_action[0] = SCTP_STREAM_RESET_DENIED;
+		} else if ((tsn == asoc->cumulative_tsn) ||
+		    (compare_with_wrap(asoc->cumulative_tsn, tsn, MAX_TSN))) {
+			/* we can do it now */
+			sctp_reset_in_stream(stcb, number_entries, req->list_of_streams);
+			sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_PERFORMED);
+			asoc->last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED;
+		} else {
+			/*
+			 * we must queue it up and thus wait for the TSN's
+			 * to arrive that are at or before tsn
+			 */
+			struct sctp_stream_reset_list *liste;
+			int siz;
+
+			siz = sizeof(struct sctp_stream_reset_list) + (number_entries * sizeof(uint16_t));
+			SCTP_MALLOC(liste, struct sctp_stream_reset_list *,
+			    siz, SCTP_M_STRESET);
+			if (liste == NULL) {
+				/* gak out of memory */
+				sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_DENIED);
+				asoc->last_reset_action[0] = SCTP_STREAM_RESET_DENIED;
+				return;
+			}
+			liste->tsn = tsn;
+			liste->number_entries = number_entries;
+			memcpy(&liste->req, req,
+			    (sizeof(struct sctp_stream_reset_out_request) + (number_entries * sizeof(uint16_t))));
+			TAILQ_INSERT_TAIL(&asoc->resetHead, liste, next_resp);
+			sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_PERFORMED);
+			asoc->last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED;
+		}
+		asoc->str_reset_seq_in++;
+	} else if ((asoc->str_reset_seq_in - 1) == seq) {
+		/*
+		 * one seq back, just echo back last action since my
+		 * response was lost.
+		 */
+		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
+	} else if ((asoc->str_reset_seq_in - 2) == seq) {
+		/*
+		 * two seq back, just echo back last action since my
+		 * response was lost.
+		 */
+		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]);
+	} else {
+		sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_BAD_SEQNO);
+	}
+}
+
+static void
+sctp_handle_str_reset_add_strm(struct sctp_tcb *stcb, struct sctp_tmit_chunk *chk,
+    struct sctp_stream_reset_add_strm *str_add)
+{
+	/*
+	 * Peer is requesting to add more streams. If its within our
+	 * max-streams we will allow it.
+	 */
+	uint16_t num_stream, i;
+	uint32_t seq;
+	struct sctp_association *asoc = &stcb->asoc;
+	struct sctp_queued_to_read *ctl;
+
+	/* Get the number. */
+	seq = ntohl(str_add->request_seq);
+	num_stream = ntohs(str_add->number_of_streams);
+	/* Now what would be the new total? */
+	if (asoc->str_reset_seq_in == seq) {
+		num_stream += stcb->asoc.streamincnt;
+		if (num_stream > stcb->asoc.max_inbound_streams) {
+			/* We must reject it they ask for to many */
+	denied:
+			sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_DENIED);
+			stcb->asoc.last_reset_action[1] = stcb->asoc.last_reset_action[0];
+			stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_DENIED;
+		} else {
+			/* Ok, we can do that :-) */
+			struct sctp_stream_in *oldstrm;
+
+			/* save off the old */
+			oldstrm = stcb->asoc.strmin;
+			SCTP_MALLOC(stcb->asoc.strmin, struct sctp_stream_in *,
+			    (num_stream * sizeof(struct sctp_stream_in)),
+			    SCTP_M_STRMI);
+			if (stcb->asoc.strmin == NULL) {
+				stcb->asoc.strmin = oldstrm;
+				goto denied;
+			}
+			/* copy off the old data */
+			for (i = 0; i < stcb->asoc.streamincnt; i++) {
+				TAILQ_INIT(&stcb->asoc.strmin[i].inqueue);
+				stcb->asoc.strmin[i].stream_no = i;
+				stcb->asoc.strmin[i].last_sequence_delivered = oldstrm[i].last_sequence_delivered;
+				stcb->asoc.strmin[i].delivery_started = oldstrm[i].delivery_started;
+				/* now anything on those queues? */
+				while (TAILQ_EMPTY(&oldstrm[i].inqueue) == 0) {
+					ctl = TAILQ_FIRST(&oldstrm[i].inqueue);
+					TAILQ_REMOVE(&oldstrm[i].inqueue, ctl, next);
+					TAILQ_INSERT_TAIL(&stcb->asoc.strmin[i].inqueue, ctl, next);
+				}
+			}
+			/* Init the new streams */
+			for (i = stcb->asoc.streamincnt; i < num_stream; i++) {
+				TAILQ_INIT(&stcb->asoc.strmin[i].inqueue);
+				stcb->asoc.strmin[i].stream_no = i;
+				stcb->asoc.strmin[i].last_sequence_delivered = 0xffff;
+				stcb->asoc.strmin[i].delivery_started = 0;
+			}
+			SCTP_FREE(oldstrm, SCTP_M_STRMI);
+			/* update the size */
+			stcb->asoc.streamincnt = num_stream;
+			/* Send the ack */
+			sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_PERFORMED);
+			stcb->asoc.last_reset_action[1] = stcb->asoc.last_reset_action[0];
+			stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED;
+			sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_INSTREAM_ADD_OK, stcb,
+			    (uint32_t) stcb->asoc.streamincnt, NULL, SCTP_SO_NOT_LOCKED);
+		}
+	} else if ((asoc->str_reset_seq_in - 1) == seq) {
+		/*
+		 * one seq back, just echo back last action since my
+		 * response was lost.
+		 */
+		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
+	} else if ((asoc->str_reset_seq_in - 2) == seq) {
+		/*
+		 * two seq back, just echo back last action since my
+		 * response was lost.
+		 */
+		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]);
+	} else {
+		sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_BAD_SEQNO);
+
+	}
+}
+
+#ifdef __GNUC__
+__attribute__((noinline))
+#endif
+	static int
+	    sctp_handle_stream_reset(struct sctp_tcb *stcb, struct mbuf *m, int offset,
+        struct sctp_stream_reset_out_req *sr_req)
+{
+	int chk_length, param_len, ptype;
+	struct sctp_paramhdr pstore;
+	uint8_t cstore[SCTP_CHUNK_BUFFER_SIZE];
+
+	uint32_t seq;
+	int num_req = 0;
+	int trunc = 0;
+	struct sctp_tmit_chunk *chk;
+	struct sctp_chunkhdr *ch;
+	struct sctp_paramhdr *ph;
+	int ret_code = 0;
+	int num_param = 0;
+
+	/* now it may be a reset or a reset-response */
+	chk_length = ntohs(sr_req->ch.chunk_length);
+
+	/* setup for adding the response */
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		return (ret_code);
+	}
+	chk->rec.chunk_id.id = SCTP_STREAM_RESET;
+	chk->rec.chunk_id.can_take_data = 0;
+	chk->asoc = &stcb->asoc;
+	chk->no_fr_allowed = 0;
+	chk->book_size = chk->send_size = sizeof(struct sctp_chunkhdr);
+	chk->book_size_scale = 0;
+	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	if (chk->data == NULL) {
+strres_nochunk:
+		if (chk->data) {
+			sctp_m_freem(chk->data);
+			chk->data = NULL;
+		}
+		sctp_free_a_chunk(stcb, chk);
+		return (ret_code);
+	}
+	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+
+	/* setup chunk parameters */
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->whoTo = stcb->asoc.primary_destination;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+
+	ch = mtod(chk->data, struct sctp_chunkhdr *);
+	ch->chunk_type = SCTP_STREAM_RESET;
+	ch->chunk_flags = 0;
+	ch->chunk_length = htons(chk->send_size);
+	SCTP_BUF_LEN(chk->data) = SCTP_SIZE32(chk->send_size);
+	offset += sizeof(struct sctp_chunkhdr);
+	while ((size_t)chk_length >= sizeof(struct sctp_stream_reset_tsn_request)) {
+		ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset, sizeof(pstore), (uint8_t *) & pstore);
+		if (ph == NULL)
+			break;
+		param_len = ntohs(ph->param_length);
+		if (param_len < (int)sizeof(struct sctp_stream_reset_tsn_request)) {
+			/* bad param */
+			break;
+		}
+		ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset, min(param_len, (int)sizeof(cstore)),
+		    (uint8_t *) & cstore);
+		ptype = ntohs(ph->param_type);
+		num_param++;
+		if (param_len > (int)sizeof(cstore)) {
+			trunc = 1;
+		} else {
+			trunc = 0;
+		}
+
+		if (num_param > SCTP_MAX_RESET_PARAMS) {
+			/* hit the max of parameters already sorry.. */
+			break;
+		}
+		if (ptype == SCTP_STR_RESET_OUT_REQUEST) {
+			struct sctp_stream_reset_out_request *req_out;
+
+			req_out = (struct sctp_stream_reset_out_request *)ph;
+			num_req++;
+			if (stcb->asoc.stream_reset_outstanding) {
+				seq = ntohl(req_out->response_seq);
+				if (seq == stcb->asoc.str_reset_seq_out) {
+					/* implicit ack */
+					(void)sctp_handle_stream_reset_response(stcb, seq, SCTP_STREAM_RESET_PERFORMED, NULL);
+				}
+			}
+			sctp_handle_str_reset_request_out(stcb, chk, req_out, trunc);
+		} else if (ptype == SCTP_STR_RESET_ADD_STREAMS) {
+			struct sctp_stream_reset_add_strm *str_add;
+
+			str_add = (struct sctp_stream_reset_add_strm *)ph;
+			num_req++;
+			sctp_handle_str_reset_add_strm(stcb, chk, str_add);
+		} else if (ptype == SCTP_STR_RESET_IN_REQUEST) {
+			struct sctp_stream_reset_in_request *req_in;
+
+			num_req++;
+
+			req_in = (struct sctp_stream_reset_in_request *)ph;
+
+			sctp_handle_str_reset_request_in(stcb, chk, req_in, trunc);
+		} else if (ptype == SCTP_STR_RESET_TSN_REQUEST) {
+			struct sctp_stream_reset_tsn_request *req_tsn;
+
+			num_req++;
+			req_tsn = (struct sctp_stream_reset_tsn_request *)ph;
+
+			if (sctp_handle_str_reset_request_tsn(stcb, chk, req_tsn)) {
+				ret_code = 1;
+				goto strres_nochunk;
+			}
+			/* no more */
+			break;
+		} else if (ptype == SCTP_STR_RESET_RESPONSE) {
+			struct sctp_stream_reset_response *resp;
+			uint32_t result;
+
+			resp = (struct sctp_stream_reset_response *)ph;
+			seq = ntohl(resp->response_seq);
+			result = ntohl(resp->result);
+			if (sctp_handle_stream_reset_response(stcb, seq, result, resp)) {
+				ret_code = 1;
+				goto strres_nochunk;
+			}
+		} else {
+			break;
+		}
+		offset += SCTP_SIZE32(param_len);
+		chk_length -= SCTP_SIZE32(param_len);
+	}
+	if (num_req == 0) {
+		/* we have no response free the stuff */
+		goto strres_nochunk;
+	}
+	/* ok we have a chunk to link in */
+	TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue,
+	    chk,
+	    sctp_next);
+	stcb->asoc.ctrl_queue_cnt++;
+	return (ret_code);
+}
+
+/*
+ * Handle a router or endpoints report of a packet loss, there are two ways
+ * to handle this, either we get the whole packet and must disect it
+ * ourselves (possibly with truncation and or corruption) or it is a summary
+ * from a middle box that did the disectting for us.
+ */
+static void
+sctp_handle_packet_dropped(struct sctp_pktdrop_chunk *cp,
+    struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t limit)
+{
+	uint32_t bottle_bw, on_queue;
+	uint16_t trunc_len;
+	unsigned int chlen;
+	unsigned int at;
+	struct sctp_chunk_desc desc;
+	struct sctp_chunkhdr *ch;
+
+	chlen = ntohs(cp->ch.chunk_length);
+	chlen -= sizeof(struct sctp_pktdrop_chunk);
+	/* XXX possible chlen underflow */
+	if (chlen == 0) {
+		ch = NULL;
+		if (cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX)
+			SCTP_STAT_INCR(sctps_pdrpbwrpt);
+	} else {
+		ch = (struct sctp_chunkhdr *)(cp->data + sizeof(struct sctphdr));
+		chlen -= sizeof(struct sctphdr);
+		/* XXX possible chlen underflow */
+		memset(&desc, 0, sizeof(desc));
+	}
+	trunc_len = (uint16_t) ntohs(cp->trunc_len);
+	if (trunc_len > limit) {
+		trunc_len = limit;
+	}
+	/* now the chunks themselves */
+	while ((ch != NULL) && (chlen >= sizeof(struct sctp_chunkhdr))) {
+		desc.chunk_type = ch->chunk_type;
+		/* get amount we need to move */
+		at = ntohs(ch->chunk_length);
+		if (at < sizeof(struct sctp_chunkhdr)) {
+			/* corrupt chunk, maybe at the end? */
+			SCTP_STAT_INCR(sctps_pdrpcrupt);
+			break;
+		}
+		if (trunc_len == 0) {
+			/* we are supposed to have all of it */
+			if (at > chlen) {
+				/* corrupt skip it */
+				SCTP_STAT_INCR(sctps_pdrpcrupt);
+				break;
+			}
+		} else {
+			/* is there enough of it left ? */
+			if (desc.chunk_type == SCTP_DATA) {
+				if (chlen < (sizeof(struct sctp_data_chunk) +
+				    sizeof(desc.data_bytes))) {
+					break;
+				}
+			} else {
+				if (chlen < sizeof(struct sctp_chunkhdr)) {
+					break;
+				}
+			}
+		}
+		if (desc.chunk_type == SCTP_DATA) {
+			/* can we get out the tsn? */
+			if ((cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX))
+				SCTP_STAT_INCR(sctps_pdrpmbda);
+
+			if (chlen >= (sizeof(struct sctp_data_chunk) + sizeof(uint32_t))) {
+				/* yep */
+				struct sctp_data_chunk *dcp;
+				uint8_t *ddp;
+				unsigned int iii;
+
+				dcp = (struct sctp_data_chunk *)ch;
+				ddp = (uint8_t *) (dcp + 1);
+				for (iii = 0; iii < sizeof(desc.data_bytes); iii++) {
+					desc.data_bytes[iii] = ddp[iii];
+				}
+				desc.tsn_ifany = dcp->dp.tsn;
+			} else {
+				/* nope we are done. */
+				SCTP_STAT_INCR(sctps_pdrpnedat);
+				break;
+			}
+		} else {
+			if ((cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX))
+				SCTP_STAT_INCR(sctps_pdrpmbct);
+		}
+
+		if (process_chunk_drop(stcb, &desc, net, cp->ch.chunk_flags)) {
+			SCTP_STAT_INCR(sctps_pdrppdbrk);
+			break;
+		}
+		if (SCTP_SIZE32(at) > chlen) {
+			break;
+		}
+		chlen -= SCTP_SIZE32(at);
+		if (chlen < sizeof(struct sctp_chunkhdr)) {
+			/* done, none left */
+			break;
+		}
+		ch = (struct sctp_chunkhdr *)((caddr_t)ch + SCTP_SIZE32(at));
+	}
+	/* Now update any rwnd --- possibly */
+	if ((cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX) == 0) {
+		/* From a peer, we get a rwnd report */
+		uint32_t a_rwnd;
+
+		SCTP_STAT_INCR(sctps_pdrpfehos);
+
+		bottle_bw = ntohl(cp->bottle_bw);
+		on_queue = ntohl(cp->current_onq);
+		if (bottle_bw && on_queue) {
+			/* a rwnd report is in here */
+			if (bottle_bw > on_queue)
+				a_rwnd = bottle_bw - on_queue;
+			else
+				a_rwnd = 0;
+
+			if (a_rwnd == 0)
+				stcb->asoc.peers_rwnd = 0;
+			else {
+				if (a_rwnd > stcb->asoc.total_flight) {
+					stcb->asoc.peers_rwnd =
+					    a_rwnd - stcb->asoc.total_flight;
+				} else {
+					stcb->asoc.peers_rwnd = 0;
+				}
+				if (stcb->asoc.peers_rwnd <
+				    stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+					/* SWS sender side engages */
+					stcb->asoc.peers_rwnd = 0;
+				}
+			}
+		}
+	} else {
+		SCTP_STAT_INCR(sctps_pdrpfmbox);
+	}
+
+	/* now middle boxes in sat networks get a cwnd bump */
+	if ((cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX) &&
+	    (stcb->asoc.sat_t3_loss_recovery == 0) &&
+	    (stcb->asoc.sat_network)) {
+		/*
+		 * This is debateable but for sat networks it makes sense
+		 * Note if a T3 timer has went off, we will prohibit any
+		 * changes to cwnd until we exit the t3 loss recovery.
+		 */
+		stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped(stcb,
+		    net, cp, &bottle_bw, &on_queue);
+	}
+}
+
+/*
+ * handles all control chunks in a packet inputs: - m: mbuf chain, assumed to
+ * still contain IP/SCTP header - stcb: is the tcb found for this packet -
+ * offset: offset into the mbuf chain to first chunkhdr - length: is the
+ * length of the complete packet outputs: - length: modified to remaining
+ * length after control processing - netp: modified to new sctp_nets after
+ * cookie-echo processing - return NULL to discard the packet (ie. no asoc,
+ * bad packet,...) otherwise return the tcb for this packet
+ */
+#ifdef __GNUC__
+__attribute__((noinline))
+#endif
+	static struct sctp_tcb *
+	         sctp_process_control(struct mbuf *m, int iphlen, int *offset, int length,
+             struct sctphdr *sh, struct sctp_chunkhdr *ch, struct sctp_inpcb *inp,
+             struct sctp_tcb *stcb, struct sctp_nets **netp, int *fwd_tsn_seen,
+             uint32_t vrf_id, uint16_t port)
+{
+	struct sctp_association *asoc;
+	uint32_t vtag_in;
+	int num_chunks = 0;	/* number of control chunks processed */
+	uint32_t chk_length;
+	int ret;
+	int abort_no_unlock = 0;
+
+	/*
+	 * How big should this be, and should it be alloc'd? Lets try the
+	 * d-mtu-ceiling for now (2k) and that should hopefully work ...
+	 * until we get into jumbo grams and such..
+	 */
+	uint8_t chunk_buf[SCTP_CHUNK_BUFFER_SIZE];
+	struct sctp_tcb *locked_tcb = stcb;
+	int got_auth = 0;
+	uint32_t auth_offset = 0, auth_len = 0;
+	int auth_skipped = 0;
+	int asconf_cnt = 0;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+#endif
+
+	SCTPDBG(SCTP_DEBUG_INPUT1, "sctp_process_control: iphlen=%u, offset=%u, length=%u stcb:%p\n",
+	    iphlen, *offset, length, stcb);
+
+	/* validate chunk header length... */
+	if (ntohs(ch->chunk_length) < sizeof(*ch)) {
+		SCTPDBG(SCTP_DEBUG_INPUT1, "Invalid header length %d\n",
+		    ntohs(ch->chunk_length));
+		if (locked_tcb) {
+			SCTP_TCB_UNLOCK(locked_tcb);
+		}
+		return (NULL);
+	}
+	/*
+	 * validate the verification tag
+	 */
+	vtag_in = ntohl(sh->v_tag);
+
+	if (locked_tcb) {
+		SCTP_TCB_LOCK_ASSERT(locked_tcb);
+	}
+	if (ch->chunk_type == SCTP_INITIATION) {
+		SCTPDBG(SCTP_DEBUG_INPUT1, "Its an INIT of len:%d vtag:%x\n",
+		    ntohs(ch->chunk_length), vtag_in);
+		if (vtag_in != 0) {
+			/* protocol error- silently discard... */
+			SCTP_STAT_INCR(sctps_badvtag);
+			if (locked_tcb) {
+				SCTP_TCB_UNLOCK(locked_tcb);
+			}
+			return (NULL);
+		}
+	} else if (ch->chunk_type != SCTP_COOKIE_ECHO) {
+		/*
+		 * If there is no stcb, skip the AUTH chunk and process
+		 * later after a stcb is found (to validate the lookup was
+		 * valid.
+		 */
+		if ((ch->chunk_type == SCTP_AUTHENTICATION) &&
+		    (stcb == NULL) &&
+		    !SCTP_BASE_SYSCTL(sctp_auth_disable)) {
+			/* save this chunk for later processing */
+			auth_skipped = 1;
+			auth_offset = *offset;
+			auth_len = ntohs(ch->chunk_length);
+
+			/* (temporarily) move past this chunk */
+			*offset += SCTP_SIZE32(auth_len);
+			if (*offset >= length) {
+				/* no more data left in the mbuf chain */
+				*offset = length;
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				return (NULL);
+			}
+			ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset,
+			    sizeof(struct sctp_chunkhdr), chunk_buf);
+		}
+		if (ch == NULL) {
+			/* Help */
+			*offset = length;
+			if (locked_tcb) {
+				SCTP_TCB_UNLOCK(locked_tcb);
+			}
+			return (NULL);
+		}
+		if (ch->chunk_type == SCTP_COOKIE_ECHO) {
+			goto process_control_chunks;
+		}
+		/*
+		 * first check if it's an ASCONF with an unknown src addr we
+		 * need to look inside to find the association
+		 */
+		if (ch->chunk_type == SCTP_ASCONF && stcb == NULL) {
+			struct sctp_chunkhdr *asconf_ch = ch;
+			uint32_t asconf_offset = 0, asconf_len = 0;
+
+			/* inp's refcount may be reduced */
+			SCTP_INP_INCR_REF(inp);
+
+			asconf_offset = *offset;
+			do {
+				asconf_len = ntohs(asconf_ch->chunk_length);
+				if (asconf_len < sizeof(struct sctp_asconf_paramhdr))
+					break;
+				stcb = sctp_findassociation_ep_asconf(m, iphlen,
+				    *offset, sh, &inp, netp, vrf_id);
+				if (stcb != NULL)
+					break;
+				asconf_offset += SCTP_SIZE32(asconf_len);
+				asconf_ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, asconf_offset,
+				    sizeof(struct sctp_chunkhdr), chunk_buf);
+			} while (asconf_ch != NULL && asconf_ch->chunk_type == SCTP_ASCONF);
+			if (stcb == NULL) {
+				/*
+				 * reduce inp's refcount if not reduced in
+				 * sctp_findassociation_ep_asconf().
+				 */
+				SCTP_INP_DECR_REF(inp);
+			} else {
+				locked_tcb = stcb;
+			}
+
+			/* now go back and verify any auth chunk to be sure */
+			if (auth_skipped && (stcb != NULL)) {
+				struct sctp_auth_chunk *auth;
+
+				auth = (struct sctp_auth_chunk *)
+				    sctp_m_getptr(m, auth_offset,
+				    auth_len, chunk_buf);
+				got_auth = 1;
+				auth_skipped = 0;
+				if ((auth == NULL) || sctp_handle_auth(stcb, auth, m,
+				    auth_offset)) {
+					/* auth HMAC failed so dump it */
+					*offset = length;
+					if (locked_tcb) {
+						SCTP_TCB_UNLOCK(locked_tcb);
+					}
+					return (NULL);
+				} else {
+					/* remaining chunks are HMAC checked */
+					stcb->asoc.authenticated = 1;
+				}
+			}
+		}
+		if (stcb == NULL) {
+			/* no association, so it's out of the blue... */
+			sctp_handle_ootb(m, iphlen, *offset, sh, inp, NULL,
+			    vrf_id, port);
+			*offset = length;
+			if (locked_tcb) {
+				SCTP_TCB_UNLOCK(locked_tcb);
+			}
+			return (NULL);
+		}
+		asoc = &stcb->asoc;
+		/* ABORT and SHUTDOWN can use either v_tag... */
+		if ((ch->chunk_type == SCTP_ABORT_ASSOCIATION) ||
+		    (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) ||
+		    (ch->chunk_type == SCTP_PACKET_DROPPED)) {
+			if ((vtag_in == asoc->my_vtag) ||
+			    ((ch->chunk_flags & SCTP_HAD_NO_TCB) &&
+			    (vtag_in == asoc->peer_vtag))) {
+				/* this is valid */
+			} else {
+				/* drop this packet... */
+				SCTP_STAT_INCR(sctps_badvtag);
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				return (NULL);
+			}
+		} else if (ch->chunk_type == SCTP_SHUTDOWN_ACK) {
+			if (vtag_in != asoc->my_vtag) {
+				/*
+				 * this could be a stale SHUTDOWN-ACK or the
+				 * peer never got the SHUTDOWN-COMPLETE and
+				 * is still hung; we have started a new asoc
+				 * but it won't complete until the shutdown
+				 * is completed
+				 */
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				sctp_handle_ootb(m, iphlen, *offset, sh, inp,
+				    NULL, vrf_id, port);
+				return (NULL);
+			}
+		} else {
+			/* for all other chunks, vtag must match */
+			if (vtag_in != asoc->my_vtag) {
+				/* invalid vtag... */
+				SCTPDBG(SCTP_DEBUG_INPUT3,
+				    "invalid vtag: %xh, expect %xh\n",
+				    vtag_in, asoc->my_vtag);
+				SCTP_STAT_INCR(sctps_badvtag);
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				*offset = length;
+				return (NULL);
+			}
+		}
+	}			/* end if !SCTP_COOKIE_ECHO */
+	/*
+	 * process all control chunks...
+	 */
+	if (((ch->chunk_type == SCTP_SELECTIVE_ACK) ||
+	/* EY */
+	    (ch->chunk_type == SCTP_NR_SELECTIVE_ACK) ||
+	    (ch->chunk_type == SCTP_HEARTBEAT_REQUEST)) &&
+	    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+		/* implied cookie-ack.. we must have lost the ack */
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
+			sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+			    stcb->asoc.overall_error_count,
+			    0,
+			    SCTP_FROM_SCTP_INPUT,
+			    __LINE__);
+		}
+		stcb->asoc.overall_error_count = 0;
+		sctp_handle_cookie_ack((struct sctp_cookie_ack_chunk *)ch, stcb,
+		    *netp);
+	}
+process_control_chunks:
+	while (IS_SCTP_CONTROL(ch)) {
+		/* validate chunk length */
+		chk_length = ntohs(ch->chunk_length);
+		SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_process_control: processing a chunk type=%u, len=%u\n",
+		    ch->chunk_type, chk_length);
+		SCTP_LTRACE_CHK(inp, stcb, ch->chunk_type, chk_length);
+		if (chk_length < sizeof(*ch) ||
+		    (*offset + (int)chk_length) > length) {
+			*offset = length;
+			if (locked_tcb) {
+				SCTP_TCB_UNLOCK(locked_tcb);
+			}
+			return (NULL);
+		}
+		SCTP_STAT_INCR_COUNTER64(sctps_incontrolchunks);
+		/*
+		 * INIT-ACK only gets the init ack "header" portion only
+		 * because we don't have to process the peer's COOKIE. All
+		 * others get a complete chunk.
+		 */
+		if ((ch->chunk_type == SCTP_INITIATION_ACK) ||
+		    (ch->chunk_type == SCTP_INITIATION)) {
+			/* get an init-ack chunk */
+			ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset,
+			    sizeof(struct sctp_init_ack_chunk), chunk_buf);
+			if (ch == NULL) {
+				*offset = length;
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				return (NULL);
+			}
+		} else {
+			/* For cookies and all other chunks. */
+			if (chk_length > sizeof(chunk_buf)) {
+				/*
+				 * use just the size of the chunk buffer so
+				 * the front part of our chunks fit in
+				 * contiguous space up to the chunk buffer
+				 * size (508 bytes). For chunks that need to
+				 * get more than that they must use the
+				 * sctp_m_getptr() function or other means
+				 * (e.g. know how to parse mbuf chains).
+				 * Cookies do this already.
+				 */
+				ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset,
+				    (sizeof(chunk_buf) - 4),
+				    chunk_buf);
+				if (ch == NULL) {
+					*offset = length;
+					if (locked_tcb) {
+						SCTP_TCB_UNLOCK(locked_tcb);
+					}
+					return (NULL);
+				}
+			} else {
+				/* We can fit it all */
+				ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset,
+				    chk_length, chunk_buf);
+				if (ch == NULL) {
+					SCTP_PRINTF("sctp_process_control: Can't get the all data....\n");
+					*offset = length;
+					if (locked_tcb) {
+						SCTP_TCB_UNLOCK(locked_tcb);
+					}
+					return (NULL);
+				}
+			}
+		}
+		num_chunks++;
+		/* Save off the last place we got a control from */
+		if (stcb != NULL) {
+			if (((netp != NULL) && (*netp != NULL)) || (ch->chunk_type == SCTP_ASCONF)) {
+				/*
+				 * allow last_control to be NULL if
+				 * ASCONF... ASCONF processing will find the
+				 * right net later
+				 */
+				if ((netp != NULL) && (*netp != NULL))
+					stcb->asoc.last_control_chunk_from = *netp;
+			}
+		}
+#ifdef SCTP_AUDITING_ENABLED
+		sctp_audit_log(0xB0, ch->chunk_type);
+#endif
+
+		/* check to see if this chunk required auth, but isn't */
+		if ((stcb != NULL) &&
+		    !SCTP_BASE_SYSCTL(sctp_auth_disable) &&
+		    sctp_auth_is_required_chunk(ch->chunk_type, stcb->asoc.local_auth_chunks) &&
+		    !stcb->asoc.authenticated) {
+			/* "silently" ignore */
+			SCTP_STAT_INCR(sctps_recvauthmissing);
+			goto next_chunk;
+		}
+		switch (ch->chunk_type) {
+		case SCTP_INITIATION:
+			/* must be first and only chunk */
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_INIT\n");
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+				/* We are not interested anymore? */
+				if ((stcb) && (stcb->asoc.total_output_queue_size)) {
+					/*
+					 * collision case where we are
+					 * sending to them too
+					 */
+					;
+				} else {
+					if (locked_tcb) {
+						SCTP_TCB_UNLOCK(locked_tcb);
+					}
+					*offset = length;
+					return (NULL);
+				}
+			}
+			if ((chk_length > SCTP_LARGEST_INIT_ACCEPTED) ||
+			    (num_chunks > 1) ||
+			    (SCTP_BASE_SYSCTL(sctp_strict_init) && (length - *offset > (int)SCTP_SIZE32(chk_length)))) {
+				*offset = length;
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				return (NULL);
+			}
+			if ((stcb != NULL) &&
+			    (SCTP_GET_STATE(&stcb->asoc) ==
+			    SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+				sctp_send_shutdown_ack(stcb,
+				    stcb->asoc.primary_destination);
+				*offset = length;
+				sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED);
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				return (NULL);
+			}
+			if (netp) {
+				sctp_handle_init(m, iphlen, *offset, sh,
+				    (struct sctp_init_chunk *)ch, inp,
+				    stcb, *netp, &abort_no_unlock, vrf_id, port);
+			}
+			if (abort_no_unlock)
+				return (NULL);
+
+			*offset = length;
+			if (locked_tcb) {
+				SCTP_TCB_UNLOCK(locked_tcb);
+			}
+			return (NULL);
+			break;
+		case SCTP_PAD_CHUNK:
+			break;
+		case SCTP_INITIATION_ACK:
+			/* must be first and only chunk */
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_INIT-ACK\n");
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+				/* We are not interested anymore */
+				if ((stcb) && (stcb->asoc.total_output_queue_size)) {
+					;
+				} else {
+					if (locked_tcb != stcb) {
+						/* Very unlikely */
+						SCTP_TCB_UNLOCK(locked_tcb);
+					}
+					*offset = length;
+					if (stcb) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+						so = SCTP_INP_SO(inp);
+						atomic_add_int(&stcb->asoc.refcnt, 1);
+						SCTP_TCB_UNLOCK(stcb);
+						SCTP_SOCKET_LOCK(so, 1);
+						SCTP_TCB_LOCK(stcb);
+						atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+						(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_27);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+						SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+					}
+					return (NULL);
+				}
+			}
+			if ((num_chunks > 1) ||
+			    (SCTP_BASE_SYSCTL(sctp_strict_init) && (length - *offset > (int)SCTP_SIZE32(chk_length)))) {
+				*offset = length;
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				return (NULL);
+			}
+			if ((netp) && (*netp)) {
+				ret = sctp_handle_init_ack(m, iphlen, *offset, sh,
+				    (struct sctp_init_ack_chunk *)ch, stcb, *netp, &abort_no_unlock, vrf_id);
+			} else {
+				ret = -1;
+			}
+			/*
+			 * Special case, I must call the output routine to
+			 * get the cookie echoed
+			 */
+			if (abort_no_unlock)
+				return (NULL);
+
+			if ((stcb) && ret == 0)
+				sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED);
+			*offset = length;
+			if (locked_tcb) {
+				SCTP_TCB_UNLOCK(locked_tcb);
+			}
+			return (NULL);
+			break;
+		case SCTP_SELECTIVE_ACK:
+			{
+				struct sctp_sack_chunk *sack;
+				int abort_now = 0;
+				uint32_t a_rwnd, cum_ack;
+				uint16_t num_seg, num_dup;
+				uint8_t flags;
+				int offset_seg, offset_dup;
+				int nonce_sum_flag;
+
+				SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SACK\n");
+				SCTP_STAT_INCR(sctps_recvsacks);
+				if (stcb == NULL) {
+					SCTPDBG(SCTP_DEBUG_INDATA1, "No stcb when processing SACK chunk\n");
+					break;
+				}
+				if (chk_length < sizeof(struct sctp_sack_chunk)) {
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Bad size on SACK chunk, too small\n");
+					break;
+				}
+				if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) {
+					/*-
+					 * If we have sent a shutdown-ack, we will pay no
+					 * attention to a sack sent in to us since
+					 * we don't care anymore.
+					 */
+					break;
+				}
+				sack = (struct sctp_sack_chunk *)ch;
+				flags = ch->chunk_flags;
+				nonce_sum_flag = flags & SCTP_SACK_NONCE_SUM;
+				cum_ack = ntohl(sack->sack.cum_tsn_ack);
+				num_seg = ntohs(sack->sack.num_gap_ack_blks);
+				num_dup = ntohs(sack->sack.num_dup_tsns);
+				a_rwnd = (uint32_t) ntohl(sack->sack.a_rwnd);
+				if (sizeof(struct sctp_sack_chunk) +
+				    num_seg * sizeof(struct sctp_gap_ack_block) +
+				    num_dup * sizeof(uint32_t) != chk_length) {
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Bad size of SACK chunk\n");
+					break;
+				}
+				offset_seg = *offset + sizeof(struct sctp_sack_chunk);
+				offset_dup = offset_seg + num_seg * sizeof(struct sctp_gap_ack_block);
+				SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SACK process cum_ack:%x num_seg:%d a_rwnd:%d\n",
+				    cum_ack, num_seg, a_rwnd);
+				stcb->asoc.seen_a_sack_this_pkt = 1;
+				if ((stcb->asoc.pr_sctp_cnt == 0) &&
+				    (num_seg == 0) &&
+				    ((compare_with_wrap(cum_ack, stcb->asoc.last_acked_seq, MAX_TSN)) ||
+				    (cum_ack == stcb->asoc.last_acked_seq)) &&
+				    (stcb->asoc.saw_sack_with_frags == 0) &&
+				    (stcb->asoc.saw_sack_with_nr_frags == 0) &&
+				    (!TAILQ_EMPTY(&stcb->asoc.sent_queue))
+				    ) {
+					/*
+					 * We have a SIMPLE sack having no
+					 * prior segments and data on sent
+					 * queue to be acked.. Use the
+					 * faster path sack processing. We
+					 * also allow window update sacks
+					 * with no missing segments to go
+					 * this way too.
+					 */
+					sctp_express_handle_sack(stcb, cum_ack, a_rwnd, nonce_sum_flag,
+					    &abort_now);
+				} else {
+					if (netp && *netp)
+						sctp_handle_sack(m, offset_seg, offset_dup,
+						    stcb, *netp,
+						    num_seg, 0, num_dup, &abort_now, flags,
+						    cum_ack, a_rwnd);
+				}
+				if (abort_now) {
+					/* ABORT signal from sack processing */
+					*offset = length;
+					return (NULL);
+				}
+				if (TAILQ_EMPTY(&stcb->asoc.send_queue) &&
+				    TAILQ_EMPTY(&stcb->asoc.sent_queue) &&
+				    (stcb->asoc.stream_queue_cnt == 0)) {
+					sctp_ulp_notify(SCTP_NOTIFY_SENDER_DRY, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
+				}
+			}
+			break;
+			/*
+			 * EY - nr_sack:  If the received chunk is an
+			 * nr_sack chunk
+			 */
+		case SCTP_NR_SELECTIVE_ACK:
+			{
+				struct sctp_nr_sack_chunk *nr_sack;
+				int abort_now = 0;
+				uint32_t a_rwnd, cum_ack;
+				uint16_t num_seg, num_nr_seg, num_dup;
+				uint8_t flags;
+				int offset_seg, offset_dup;
+				int nonce_sum_flag;
+
+				SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_NR_SACK\n");
+				SCTP_STAT_INCR(sctps_recvsacks);
+				if (stcb == NULL) {
+					SCTPDBG(SCTP_DEBUG_INDATA1, "No stcb when processing NR-SACK chunk\n");
+					break;
+				}
+				if ((stcb->asoc.sctp_nr_sack_on_off == 0) ||
+				    (stcb->asoc.peer_supports_nr_sack == 0)) {
+					goto unknown_chunk;
+				}
+				if (chk_length < sizeof(struct sctp_nr_sack_chunk)) {
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Bad size on NR-SACK chunk, too small\n");
+					break;
+				}
+				if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) {
+					/*-
+					 * If we have sent a shutdown-ack, we will pay no
+					 * attention to a sack sent in to us since
+					 * we don't care anymore.
+					 */
+					break;
+				}
+				nr_sack = (struct sctp_nr_sack_chunk *)ch;
+				flags = ch->chunk_flags;
+				nonce_sum_flag = flags & SCTP_SACK_NONCE_SUM;
+
+				cum_ack = ntohl(nr_sack->nr_sack.cum_tsn_ack);
+				num_seg = ntohs(nr_sack->nr_sack.num_gap_ack_blks);
+				num_nr_seg = ntohs(nr_sack->nr_sack.num_nr_gap_ack_blks);
+				num_dup = ntohs(nr_sack->nr_sack.num_dup_tsns);
+				a_rwnd = (uint32_t) ntohl(nr_sack->nr_sack.a_rwnd);
+				if (sizeof(struct sctp_nr_sack_chunk) +
+				    (num_seg + num_nr_seg) * sizeof(struct sctp_gap_ack_block) +
+				    num_dup * sizeof(uint32_t) != chk_length) {
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Bad size of NR_SACK chunk\n");
+					break;
+				}
+				offset_seg = *offset + sizeof(struct sctp_nr_sack_chunk);
+				offset_dup = offset_seg + num_seg * sizeof(struct sctp_gap_ack_block);
+				SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_NR_SACK process cum_ack:%x num_seg:%d a_rwnd:%d\n",
+				    cum_ack, num_seg, a_rwnd);
+				stcb->asoc.seen_a_sack_this_pkt = 1;
+				if ((stcb->asoc.pr_sctp_cnt == 0) &&
+				    (num_seg == 0) && (num_nr_seg == 0) &&
+				    ((compare_with_wrap(cum_ack, stcb->asoc.last_acked_seq, MAX_TSN)) ||
+				    (cum_ack == stcb->asoc.last_acked_seq)) &&
+				    (stcb->asoc.saw_sack_with_frags == 0) &&
+				    (stcb->asoc.saw_sack_with_nr_frags == 0) &&
+				    (!TAILQ_EMPTY(&stcb->asoc.sent_queue))) {
+					/*
+					 * We have a SIMPLE sack having no
+					 * prior segments and data on sent
+					 * queue to be acked. Use the faster
+					 * path sack processing. We also
+					 * allow window update sacks with no
+					 * missing segments to go this way
+					 * too.
+					 */
+					sctp_express_handle_sack(stcb, cum_ack, a_rwnd, nonce_sum_flag,
+					    &abort_now);
+				} else {
+					if (netp && *netp)
+						sctp_handle_sack(m, offset_seg, offset_dup,
+						    stcb, *netp,
+						    num_seg, num_nr_seg, num_dup, &abort_now, flags,
+						    cum_ack, a_rwnd);
+				}
+				if (abort_now) {
+					/* ABORT signal from sack processing */
+					*offset = length;
+					return (NULL);
+				}
+				if (TAILQ_EMPTY(&stcb->asoc.send_queue) &&
+				    TAILQ_EMPTY(&stcb->asoc.sent_queue) &&
+				    (stcb->asoc.stream_queue_cnt == 0)) {
+					sctp_ulp_notify(SCTP_NOTIFY_SENDER_DRY, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
+				}
+			}
+			break;
+
+		case SCTP_HEARTBEAT_REQUEST:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_HEARTBEAT\n");
+			if ((stcb) && netp && *netp) {
+				SCTP_STAT_INCR(sctps_recvheartbeat);
+				sctp_send_heartbeat_ack(stcb, m, *offset,
+				    chk_length, *netp);
+
+				/* He's alive so give him credit */
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
+					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+					    stcb->asoc.overall_error_count,
+					    0,
+					    SCTP_FROM_SCTP_INPUT,
+					    __LINE__);
+				}
+				stcb->asoc.overall_error_count = 0;
+			}
+			break;
+		case SCTP_HEARTBEAT_ACK:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_HEARTBEAT-ACK\n");
+			if ((stcb == NULL) || (chk_length != sizeof(struct sctp_heartbeat_chunk))) {
+				/* Its not ours */
+				*offset = length;
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				return (NULL);
+			}
+			/* He's alive so give him credit */
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
+				sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+				    stcb->asoc.overall_error_count,
+				    0,
+				    SCTP_FROM_SCTP_INPUT,
+				    __LINE__);
+			}
+			stcb->asoc.overall_error_count = 0;
+			SCTP_STAT_INCR(sctps_recvheartbeatack);
+			if (netp && *netp)
+				sctp_handle_heartbeat_ack((struct sctp_heartbeat_chunk *)ch,
+				    stcb, *netp);
+			break;
+		case SCTP_ABORT_ASSOCIATION:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ABORT, stcb %p\n",
+			    stcb);
+			if ((stcb) && netp && *netp)
+				sctp_handle_abort((struct sctp_abort_chunk *)ch,
+				    stcb, *netp);
+			*offset = length;
+			return (NULL);
+			break;
+		case SCTP_SHUTDOWN:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN, stcb %p\n",
+			    stcb);
+			if ((stcb == NULL) || (chk_length != sizeof(struct sctp_shutdown_chunk))) {
+				*offset = length;
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				return (NULL);
+			}
+			if (netp && *netp) {
+				int abort_flag = 0;
+
+				sctp_handle_shutdown((struct sctp_shutdown_chunk *)ch,
+				    stcb, *netp, &abort_flag);
+				if (abort_flag) {
+					*offset = length;
+					return (NULL);
+				}
+			}
+			break;
+		case SCTP_SHUTDOWN_ACK:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN-ACK, stcb %p\n", stcb);
+			if ((stcb) && (netp) && (*netp))
+				sctp_handle_shutdown_ack((struct sctp_shutdown_ack_chunk *)ch, stcb, *netp);
+			*offset = length;
+			return (NULL);
+			break;
+
+		case SCTP_OPERATION_ERROR:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_OP-ERR\n");
+			if ((stcb) && netp && *netp && sctp_handle_error(ch, stcb, *netp) < 0) {
+
+				*offset = length;
+				return (NULL);
+			}
+			break;
+		case SCTP_COOKIE_ECHO:
+			SCTPDBG(SCTP_DEBUG_INPUT3,
+			    "SCTP_COOKIE-ECHO, stcb %p\n", stcb);
+			if ((stcb) && (stcb->asoc.total_output_queue_size)) {
+				;
+			} else {
+				if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+					/* We are not interested anymore */
+			abend:
+					if (stcb) {
+						SCTP_TCB_UNLOCK(stcb);
+					}
+					*offset = length;
+					return (NULL);
+				}
+			}
+			/*
+			 * First are we accepting? We do this again here
+			 * since it is possible that a previous endpoint WAS
+			 * listening responded to a INIT-ACK and then
+			 * closed. We opened and bound.. and are now no
+			 * longer listening.
+			 */
+
+			if ((stcb == NULL) && (inp->sctp_socket->so_qlen >= inp->sctp_socket->so_qlimit)) {
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
+				    (SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit))) {
+					struct mbuf *oper;
+					struct sctp_paramhdr *phdr;
+
+					oper = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr);
+						phdr = mtod(oper,
+						    struct sctp_paramhdr *);
+						phdr->param_type =
+						    htons(SCTP_CAUSE_OUT_OF_RESC);
+						phdr->param_length =
+						    htons(sizeof(struct sctp_paramhdr));
+					}
+					sctp_abort_association(inp, stcb, m,
+					    iphlen, sh, oper, vrf_id, port);
+				}
+				*offset = length;
+				return (NULL);
+			} else {
+				struct mbuf *ret_buf;
+				struct sctp_inpcb *linp;
+
+				if (stcb) {
+					linp = NULL;
+				} else {
+					linp = inp;
+				}
+
+				if (linp) {
+					SCTP_ASOC_CREATE_LOCK(linp);
+					if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+					    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
+						SCTP_ASOC_CREATE_UNLOCK(linp);
+						goto abend;
+					}
+				}
+				if (netp) {
+					ret_buf =
+					    sctp_handle_cookie_echo(m, iphlen,
+					    *offset, sh,
+					    (struct sctp_cookie_echo_chunk *)ch,
+					    &inp, &stcb, netp,
+					    auth_skipped,
+					    auth_offset,
+					    auth_len,
+					    &locked_tcb,
+					    vrf_id,
+					    port);
+				} else {
+					ret_buf = NULL;
+				}
+				if (linp) {
+					SCTP_ASOC_CREATE_UNLOCK(linp);
+				}
+				if (ret_buf == NULL) {
+					if (locked_tcb) {
+						SCTP_TCB_UNLOCK(locked_tcb);
+					}
+					SCTPDBG(SCTP_DEBUG_INPUT3,
+					    "GAK, null buffer\n");
+					auth_skipped = 0;
+					*offset = length;
+					return (NULL);
+				}
+				/* if AUTH skipped, see if it verified... */
+				if (auth_skipped) {
+					got_auth = 1;
+					auth_skipped = 0;
+				}
+				if (!TAILQ_EMPTY(&stcb->asoc.sent_queue)) {
+					/*
+					 * Restart the timer if we have
+					 * pending data
+					 */
+					struct sctp_tmit_chunk *chk;
+
+					chk = TAILQ_FIRST(&stcb->asoc.sent_queue);
+					if (chk) {
+						sctp_timer_start(SCTP_TIMER_TYPE_SEND,
+						    stcb->sctp_ep, stcb,
+						    chk->whoTo);
+					}
+				}
+			}
+			break;
+		case SCTP_COOKIE_ACK:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_COOKIE-ACK, stcb %p\n", stcb);
+			if ((stcb == NULL) || chk_length != sizeof(struct sctp_cookie_ack_chunk)) {
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				return (NULL);
+			}
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+				/* We are not interested anymore */
+				if ((stcb) && (stcb->asoc.total_output_queue_size)) {
+					;
+				} else if (stcb) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+					so = SCTP_INP_SO(inp);
+					atomic_add_int(&stcb->asoc.refcnt, 1);
+					SCTP_TCB_UNLOCK(stcb);
+					SCTP_SOCKET_LOCK(so, 1);
+					SCTP_TCB_LOCK(stcb);
+					atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+					(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_27);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+					SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+					*offset = length;
+					return (NULL);
+				}
+			}
+			/* He's alive so give him credit */
+			if ((stcb) && netp && *netp) {
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
+					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+					    stcb->asoc.overall_error_count,
+					    0,
+					    SCTP_FROM_SCTP_INPUT,
+					    __LINE__);
+				}
+				stcb->asoc.overall_error_count = 0;
+				sctp_handle_cookie_ack((struct sctp_cookie_ack_chunk *)ch, stcb, *netp);
+			}
+			break;
+		case SCTP_ECN_ECHO:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ECN-ECHO\n");
+			/* He's alive so give him credit */
+			if ((stcb == NULL) || (chk_length != sizeof(struct sctp_ecne_chunk))) {
+				/* Its not ours */
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				*offset = length;
+				return (NULL);
+			}
+			if (stcb) {
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
+					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+					    stcb->asoc.overall_error_count,
+					    0,
+					    SCTP_FROM_SCTP_INPUT,
+					    __LINE__);
+				}
+				stcb->asoc.overall_error_count = 0;
+				sctp_handle_ecn_echo((struct sctp_ecne_chunk *)ch,
+				    stcb);
+			}
+			break;
+		case SCTP_ECN_CWR:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ECN-CWR\n");
+			/* He's alive so give him credit */
+			if ((stcb == NULL) || (chk_length != sizeof(struct sctp_cwr_chunk))) {
+				/* Its not ours */
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				*offset = length;
+				return (NULL);
+			}
+			if (stcb) {
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
+					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+					    stcb->asoc.overall_error_count,
+					    0,
+					    SCTP_FROM_SCTP_INPUT,
+					    __LINE__);
+				}
+				stcb->asoc.overall_error_count = 0;
+				sctp_handle_ecn_cwr((struct sctp_cwr_chunk *)ch, stcb);
+			}
+			break;
+		case SCTP_SHUTDOWN_COMPLETE:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN-COMPLETE, stcb %p\n", stcb);
+			/* must be first and only chunk */
+			if ((num_chunks > 1) ||
+			    (length - *offset > (int)SCTP_SIZE32(chk_length))) {
+				*offset = length;
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				return (NULL);
+			}
+			if ((stcb) && netp && *netp) {
+				sctp_handle_shutdown_complete((struct sctp_shutdown_complete_chunk *)ch,
+				    stcb, *netp);
+			}
+			*offset = length;
+			return (NULL);
+			break;
+		case SCTP_ASCONF:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ASCONF\n");
+			/* He's alive so give him credit */
+			if (stcb) {
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
+					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+					    stcb->asoc.overall_error_count,
+					    0,
+					    SCTP_FROM_SCTP_INPUT,
+					    __LINE__);
+				}
+				stcb->asoc.overall_error_count = 0;
+				sctp_handle_asconf(m, *offset,
+				    (struct sctp_asconf_chunk *)ch, stcb, asconf_cnt == 0);
+				asconf_cnt++;
+			}
+			break;
+		case SCTP_ASCONF_ACK:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ASCONF-ACK\n");
+			if (chk_length < sizeof(struct sctp_asconf_ack_chunk)) {
+				/* Its not ours */
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				*offset = length;
+				return (NULL);
+			}
+			if ((stcb) && netp && *netp) {
+				/* He's alive so give him credit */
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
+					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+					    stcb->asoc.overall_error_count,
+					    0,
+					    SCTP_FROM_SCTP_INPUT,
+					    __LINE__);
+				}
+				stcb->asoc.overall_error_count = 0;
+				sctp_handle_asconf_ack(m, *offset,
+				    (struct sctp_asconf_ack_chunk *)ch, stcb, *netp, &abort_no_unlock);
+				if (abort_no_unlock)
+					return (NULL);
+			}
+			break;
+		case SCTP_FORWARD_CUM_TSN:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_FWD-TSN\n");
+			if (chk_length < sizeof(struct sctp_forward_tsn_chunk)) {
+				/* Its not ours */
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				*offset = length;
+				return (NULL);
+			}
+			/* He's alive so give him credit */
+			if (stcb) {
+				int abort_flag = 0;
+
+				stcb->asoc.overall_error_count = 0;
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
+					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+					    stcb->asoc.overall_error_count,
+					    0,
+					    SCTP_FROM_SCTP_INPUT,
+					    __LINE__);
+				}
+				*fwd_tsn_seen = 1;
+				if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+					/* We are not interested anymore */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+					so = SCTP_INP_SO(inp);
+					atomic_add_int(&stcb->asoc.refcnt, 1);
+					SCTP_TCB_UNLOCK(stcb);
+					SCTP_SOCKET_LOCK(so, 1);
+					SCTP_TCB_LOCK(stcb);
+					atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+					(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_29);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+					SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+					*offset = length;
+					return (NULL);
+				}
+				sctp_handle_forward_tsn(stcb,
+				    (struct sctp_forward_tsn_chunk *)ch, &abort_flag, m, *offset);
+				if (abort_flag) {
+					*offset = length;
+					return (NULL);
+				} else {
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
+						sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+						    stcb->asoc.overall_error_count,
+						    0,
+						    SCTP_FROM_SCTP_INPUT,
+						    __LINE__);
+					}
+					stcb->asoc.overall_error_count = 0;
+				}
+
+			}
+			break;
+		case SCTP_STREAM_RESET:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_STREAM_RESET\n");
+			if (((stcb == NULL) || (ch == NULL) || (chk_length < sizeof(struct sctp_stream_reset_tsn_req)))) {
+				/* Its not ours */
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				*offset = length;
+				return (NULL);
+			}
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+				/* We are not interested anymore */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				so = SCTP_INP_SO(inp);
+				atomic_add_int(&stcb->asoc.refcnt, 1);
+				SCTP_TCB_UNLOCK(stcb);
+				SCTP_SOCKET_LOCK(so, 1);
+				SCTP_TCB_LOCK(stcb);
+				atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_30);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+				*offset = length;
+				return (NULL);
+			}
+			if (stcb->asoc.peer_supports_strreset == 0) {
+				/*
+				 * hmm, peer should have announced this, but
+				 * we will turn it on since he is sending us
+				 * a stream reset.
+				 */
+				stcb->asoc.peer_supports_strreset = 1;
+			}
+			if (sctp_handle_stream_reset(stcb, m, *offset, (struct sctp_stream_reset_out_req *)ch)) {
+				/* stop processing */
+				*offset = length;
+				return (NULL);
+			}
+			break;
+		case SCTP_PACKET_DROPPED:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_PACKET_DROPPED\n");
+			/* re-get it all please */
+			if (chk_length < sizeof(struct sctp_pktdrop_chunk)) {
+				/* Its not ours */
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				*offset = length;
+				return (NULL);
+			}
+			if (ch && (stcb) && netp && (*netp)) {
+				sctp_handle_packet_dropped((struct sctp_pktdrop_chunk *)ch,
+				    stcb, *netp,
+				    min(chk_length, (sizeof(chunk_buf) - 4)));
+
+			}
+			break;
+
+		case SCTP_AUTHENTICATION:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_AUTHENTICATION\n");
+			if (SCTP_BASE_SYSCTL(sctp_auth_disable))
+				goto unknown_chunk;
+
+			if (stcb == NULL) {
+				/* save the first AUTH for later processing */
+				if (auth_skipped == 0) {
+					auth_offset = *offset;
+					auth_len = chk_length;
+					auth_skipped = 1;
+				}
+				/* skip this chunk (temporarily) */
+				goto next_chunk;
+			}
+			if ((chk_length < (sizeof(struct sctp_auth_chunk))) ||
+			    (chk_length > (sizeof(struct sctp_auth_chunk) +
+			    SCTP_AUTH_DIGEST_LEN_MAX))) {
+				/* Its not ours */
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				*offset = length;
+				return (NULL);
+			}
+			if (got_auth == 1) {
+				/* skip this chunk... it's already auth'd */
+				goto next_chunk;
+			}
+			got_auth = 1;
+			if ((ch == NULL) || sctp_handle_auth(stcb, (struct sctp_auth_chunk *)ch,
+			    m, *offset)) {
+				/* auth HMAC failed so dump the packet */
+				*offset = length;
+				return (stcb);
+			} else {
+				/* remaining chunks are HMAC checked */
+				stcb->asoc.authenticated = 1;
+			}
+			break;
+
+		default:
+	unknown_chunk:
+			/* it's an unknown chunk! */
+			if ((ch->chunk_type & 0x40) && (stcb != NULL)) {
+				struct mbuf *mm;
+				struct sctp_paramhdr *phd;
+
+				mm = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr),
+				    0, M_DONTWAIT, 1, MT_DATA);
+				if (mm) {
+					phd = mtod(mm, struct sctp_paramhdr *);
+					/*
+					 * We cheat and use param type since
+					 * we did not bother to define a
+					 * error cause struct. They are the
+					 * same basic format with different
+					 * names.
+					 */
+					phd->param_type = htons(SCTP_CAUSE_UNRECOG_CHUNK);
+					phd->param_length = htons(chk_length + sizeof(*phd));
+					SCTP_BUF_LEN(mm) = sizeof(*phd);
+					SCTP_BUF_NEXT(mm) = SCTP_M_COPYM(m, *offset, SCTP_SIZE32(chk_length),
+					    M_DONTWAIT);
+					if (SCTP_BUF_NEXT(mm)) {
+#ifdef SCTP_MBUF_LOGGING
+						if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
+							struct mbuf *mat;
+
+							mat = SCTP_BUF_NEXT(mm);
+							while (mat) {
+								if (SCTP_BUF_IS_EXTENDED(mat)) {
+									sctp_log_mb(mat, SCTP_MBUF_ICOPY);
+								}
+								mat = SCTP_BUF_NEXT(mat);
+							}
+						}
+#endif
+						sctp_queue_op_err(stcb, mm);
+					} else {
+						sctp_m_freem(mm);
+					}
+				}
+			}
+			if ((ch->chunk_type & 0x80) == 0) {
+				/* discard this packet */
+				*offset = length;
+				return (stcb);
+			}	/* else skip this bad chunk and continue... */
+			break;
+		}		/* switch (ch->chunk_type) */
+
+
+next_chunk:
+		/* get the next chunk */
+		*offset += SCTP_SIZE32(chk_length);
+		if (*offset >= length) {
+			/* no more data left in the mbuf chain */
+			break;
+		}
+		ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset,
+		    sizeof(struct sctp_chunkhdr), chunk_buf);
+		if (ch == NULL) {
+			if (locked_tcb) {
+				SCTP_TCB_UNLOCK(locked_tcb);
+			}
+			*offset = length;
+			return (NULL);
+		}
+	}			/* while */
+
+	if (asconf_cnt > 0 && stcb != NULL) {
+		sctp_send_asconf_ack(stcb);
+	}
+	return (stcb);
+}
+
+
+/*
+ * Process the ECN bits we have something set so we must look to see if it is
+ * ECN(0) or ECN(1) or CE
+ */
+static void
+sctp_process_ecn_marked_a(struct sctp_tcb *stcb, struct sctp_nets *net,
+    uint8_t ecn_bits)
+{
+	if ((ecn_bits & SCTP_CE_BITS) == SCTP_CE_BITS) {
+		;
+	} else if ((ecn_bits & SCTP_ECT1_BIT) == SCTP_ECT1_BIT) {
+		/*
+		 * we only add to the nonce sum for ECT1, ECT0 does not
+		 * change the NS bit (that we have yet to find a way to send
+		 * it yet).
+		 */
+
+		/* ECN Nonce stuff */
+		stcb->asoc.receiver_nonce_sum++;
+		stcb->asoc.receiver_nonce_sum &= SCTP_SACK_NONCE_SUM;
+
+		/*
+		 * Drag up the last_echo point if cumack is larger since we
+		 * don't want the point falling way behind by more than
+		 * 2^^31 and then having it be incorrect.
+		 */
+		if (compare_with_wrap(stcb->asoc.cumulative_tsn,
+		    stcb->asoc.last_echo_tsn, MAX_TSN)) {
+			stcb->asoc.last_echo_tsn = stcb->asoc.cumulative_tsn;
+		}
+	} else if ((ecn_bits & SCTP_ECT0_BIT) == SCTP_ECT0_BIT) {
+		/*
+		 * Drag up the last_echo point if cumack is larger since we
+		 * don't want the point falling way behind by more than
+		 * 2^^31 and then having it be incorrect.
+		 */
+		if (compare_with_wrap(stcb->asoc.cumulative_tsn,
+		    stcb->asoc.last_echo_tsn, MAX_TSN)) {
+			stcb->asoc.last_echo_tsn = stcb->asoc.cumulative_tsn;
+		}
+	}
+}
+
+static void
+sctp_process_ecn_marked_b(struct sctp_tcb *stcb, struct sctp_nets *net,
+    uint32_t high_tsn, uint8_t ecn_bits)
+{
+	if ((ecn_bits & SCTP_CE_BITS) == SCTP_CE_BITS) {
+		/*
+		 * we possibly must notify the sender that a congestion
+		 * window reduction is in order. We do this by adding a ECNE
+		 * chunk to the output chunk queue. The incoming CWR will
+		 * remove this chunk.
+		 */
+		if (compare_with_wrap(high_tsn, stcb->asoc.last_echo_tsn,
+		    MAX_TSN)) {
+			/* Yep, we need to add a ECNE */
+			sctp_send_ecn_echo(stcb, net, high_tsn);
+			stcb->asoc.last_echo_tsn = high_tsn;
+		}
+	}
+}
+
+#ifdef INVARIANTS
+#ifdef __GNUC__
+__attribute__((noinline))
+#endif
+	void
+	     sctp_validate_no_locks(struct sctp_inpcb *inp)
+{
+	struct sctp_tcb *lstcb;
+
+	LIST_FOREACH(lstcb, &inp->sctp_asoc_list, sctp_tcblist) {
+		if (mtx_owned(&lstcb->tcb_mtx)) {
+			panic("Own lock on stcb at return from input");
+		}
+	}
+	if (mtx_owned(&inp->inp_create_mtx)) {
+		panic("Own create lock on inp");
+	}
+	if (mtx_owned(&inp->inp_mtx)) {
+		panic("Own inp lock on inp");
+	}
+}
+
+#endif
+
+/*
+ * common input chunk processing (v4 and v6)
+ */
+void
+sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset,
+    int length, struct sctphdr *sh, struct sctp_chunkhdr *ch,
+    struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets *net,
+    uint8_t ecn_bits, uint32_t vrf_id, uint16_t port)
+{
+	/*
+	 * Control chunk processing
+	 */
+	uint32_t high_tsn;
+	int fwd_tsn_seen = 0, data_processed = 0;
+	struct mbuf *m = *mm;
+	int abort_flag = 0;
+	int un_sent;
+
+	SCTP_STAT_INCR(sctps_recvdatagrams);
+#ifdef SCTP_AUDITING_ENABLED
+	sctp_audit_log(0xE0, 1);
+	sctp_auditing(0, inp, stcb, net);
+#endif
+
+	SCTPDBG(SCTP_DEBUG_INPUT1, "Ok, Common input processing called, m:%p iphlen:%d offset:%d length:%d stcb:%p\n",
+	    m, iphlen, offset, length, stcb);
+	if (stcb) {
+		/* always clear this before beginning a packet */
+		stcb->asoc.authenticated = 0;
+		stcb->asoc.seen_a_sack_this_pkt = 0;
+		SCTPDBG(SCTP_DEBUG_INPUT1, "stcb:%p state:%x\n",
+		    stcb, stcb->asoc.state);
+
+		if ((stcb->asoc.state & SCTP_STATE_WAS_ABORTED) ||
+		    (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED)) {
+			/*-
+			 * If we hit here, we had a ref count
+			 * up when the assoc was aborted and the
+			 * timer is clearing out the assoc, we should
+			 * NOT respond to any packet.. its OOTB.
+			 */
+			SCTP_TCB_UNLOCK(stcb);
+			sctp_handle_ootb(m, iphlen, offset, sh, inp, NULL,
+			    vrf_id, port);
+			goto out_now;
+		}
+	}
+	if (IS_SCTP_CONTROL(ch)) {
+		/* process the control portion of the SCTP packet */
+		/* sa_ignore NO_NULL_CHK */
+		stcb = sctp_process_control(m, iphlen, &offset, length, sh, ch,
+		    inp, stcb, &net, &fwd_tsn_seen, vrf_id, port);
+		if (stcb) {
+			/*
+			 * This covers us if the cookie-echo was there and
+			 * it changes our INP.
+			 */
+			inp = stcb->sctp_ep;
+			if ((net) && (port)) {
+				if (net->port == 0) {
+					sctp_pathmtu_adjustment(inp, stcb, net, net->mtu - sizeof(struct udphdr));
+				}
+				net->port = port;
+			}
+		}
+	} else {
+		/*
+		 * no control chunks, so pre-process DATA chunks (these
+		 * checks are taken care of by control processing)
+		 */
+
+		/*
+		 * if DATA only packet, and auth is required, then punt...
+		 * can't have authenticated without any AUTH (control)
+		 * chunks
+		 */
+		if ((stcb != NULL) &&
+		    !SCTP_BASE_SYSCTL(sctp_auth_disable) &&
+		    sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.local_auth_chunks)) {
+			/* "silently" ignore */
+			SCTP_STAT_INCR(sctps_recvauthmissing);
+			SCTP_TCB_UNLOCK(stcb);
+			goto out_now;
+		}
+		if (stcb == NULL) {
+			/* out of the blue DATA chunk */
+			sctp_handle_ootb(m, iphlen, offset, sh, inp, NULL,
+			    vrf_id, port);
+			goto out_now;
+		}
+		if (stcb->asoc.my_vtag != ntohl(sh->v_tag)) {
+			/* v_tag mismatch! */
+			SCTP_STAT_INCR(sctps_badvtag);
+			SCTP_TCB_UNLOCK(stcb);
+			goto out_now;
+		}
+	}
+
+	if (stcb == NULL) {
+		/*
+		 * no valid TCB for this packet, or we found it's a bad
+		 * packet while processing control, or we're done with this
+		 * packet (done or skip rest of data), so we drop it...
+		 */
+		goto out_now;
+	}
+	/*
+	 * DATA chunk processing
+	 */
+	/* plow through the data chunks while length > offset */
+
+	/*
+	 * Rest should be DATA only.  Check authentication state if AUTH for
+	 * DATA is required.
+	 */
+	if ((length > offset) &&
+	    (stcb != NULL) &&
+	    !SCTP_BASE_SYSCTL(sctp_auth_disable) &&
+	    sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.local_auth_chunks) &&
+	    !stcb->asoc.authenticated) {
+		/* "silently" ignore */
+		SCTP_STAT_INCR(sctps_recvauthmissing);
+		SCTPDBG(SCTP_DEBUG_AUTH1,
+		    "Data chunk requires AUTH, skipped\n");
+		goto trigger_send;
+	}
+	if (length > offset) {
+		int retval;
+
+		/*
+		 * First check to make sure our state is correct. We would
+		 * not get here unless we really did have a tag, so we don't
+		 * abort if this happens, just dump the chunk silently.
+		 */
+		switch (SCTP_GET_STATE(&stcb->asoc)) {
+		case SCTP_STATE_COOKIE_ECHOED:
+			/*
+			 * we consider data with valid tags in this state
+			 * shows us the cookie-ack was lost. Imply it was
+			 * there.
+			 */
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
+				sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+				    stcb->asoc.overall_error_count,
+				    0,
+				    SCTP_FROM_SCTP_INPUT,
+				    __LINE__);
+			}
+			stcb->asoc.overall_error_count = 0;
+			sctp_handle_cookie_ack((struct sctp_cookie_ack_chunk *)ch, stcb, net);
+			break;
+		case SCTP_STATE_COOKIE_WAIT:
+			/*
+			 * We consider OOTB any data sent during asoc setup.
+			 */
+			sctp_handle_ootb(m, iphlen, offset, sh, inp, NULL,
+			    vrf_id, port);
+			SCTP_TCB_UNLOCK(stcb);
+			goto out_now;
+			/* sa_ignore NOTREACHED */
+			break;
+		case SCTP_STATE_EMPTY:	/* should not happen */
+		case SCTP_STATE_INUSE:	/* should not happen */
+		case SCTP_STATE_SHUTDOWN_RECEIVED:	/* This is a peer error */
+		case SCTP_STATE_SHUTDOWN_ACK_SENT:
+		default:
+			SCTP_TCB_UNLOCK(stcb);
+			goto out_now;
+			/* sa_ignore NOTREACHED */
+			break;
+		case SCTP_STATE_OPEN:
+		case SCTP_STATE_SHUTDOWN_SENT:
+			break;
+		}
+		/* take care of ECN, part 1. */
+		if (stcb->asoc.ecn_allowed &&
+		    (ecn_bits & (SCTP_ECT0_BIT | SCTP_ECT1_BIT))) {
+			sctp_process_ecn_marked_a(stcb, net, ecn_bits);
+		}
+		/* plow through the data chunks while length > offset */
+		retval = sctp_process_data(mm, iphlen, &offset, length, sh,
+		    inp, stcb, net, &high_tsn);
+		if (retval == 2) {
+			/*
+			 * The association aborted, NO UNLOCK needed since
+			 * the association is destroyed.
+			 */
+			goto out_now;
+		}
+		data_processed = 1;
+		if (retval == 0) {
+			/* take care of ecn part 2. */
+			if (stcb->asoc.ecn_allowed &&
+			    (ecn_bits & (SCTP_ECT0_BIT | SCTP_ECT1_BIT))) {
+				sctp_process_ecn_marked_b(stcb, net, high_tsn,
+				    ecn_bits);
+			}
+		}
+		/*
+		 * Anything important needs to have been m_copy'ed in
+		 * process_data
+		 */
+	}
+	if ((data_processed == 0) && (fwd_tsn_seen)) {
+		int was_a_gap;
+		uint32_t highest_tsn;
+
+		if (compare_with_wrap(stcb->asoc.highest_tsn_inside_nr_map, stcb->asoc.highest_tsn_inside_map, MAX_TSN)) {
+			highest_tsn = stcb->asoc.highest_tsn_inside_nr_map;
+		} else {
+			highest_tsn = stcb->asoc.highest_tsn_inside_map;
+		}
+		was_a_gap = compare_with_wrap(highest_tsn, stcb->asoc.cumulative_tsn, MAX_TSN);
+		stcb->asoc.send_sack = 1;
+		sctp_sack_check(stcb, was_a_gap, &abort_flag);
+		if (abort_flag) {
+			/* Again, we aborted so NO UNLOCK needed */
+			goto out_now;
+		}
+	} else if (fwd_tsn_seen) {
+		stcb->asoc.send_sack = 1;
+	}
+	/* trigger send of any chunks in queue... */
+trigger_send:
+#ifdef SCTP_AUDITING_ENABLED
+	sctp_audit_log(0xE0, 2);
+	sctp_auditing(1, inp, stcb, net);
+#endif
+	SCTPDBG(SCTP_DEBUG_INPUT1,
+	    "Check for chunk output prw:%d tqe:%d tf=%d\n",
+	    stcb->asoc.peers_rwnd,
+	    TAILQ_EMPTY(&stcb->asoc.control_send_queue),
+	    stcb->asoc.total_flight);
+	un_sent = (stcb->asoc.total_output_queue_size - stcb->asoc.total_flight);
+
+	if (!TAILQ_EMPTY(&stcb->asoc.control_send_queue) ||
+	    ((un_sent) &&
+	    (stcb->asoc.peers_rwnd > 0 ||
+	    (stcb->asoc.peers_rwnd <= 0 && stcb->asoc.total_flight == 0)))) {
+		SCTPDBG(SCTP_DEBUG_INPUT3, "Calling chunk OUTPUT\n");
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED);
+		SCTPDBG(SCTP_DEBUG_INPUT3, "chunk OUTPUT returns\n");
+	}
+#ifdef SCTP_AUDITING_ENABLED
+	sctp_audit_log(0xE0, 3);
+	sctp_auditing(2, inp, stcb, net);
+#endif
+	SCTP_TCB_UNLOCK(stcb);
+out_now:
+#ifdef INVARIANTS
+	sctp_validate_no_locks(inp);
+#endif
+	return;
+}
+
+#if 0
+static void
+sctp_print_mbuf_chain(struct mbuf *m)
+{
+	for (; m; m = SCTP_BUF_NEXT(m)) {
+		printf("%p: m_len = %ld\n", m, SCTP_BUF_LEN(m));
+		if (SCTP_BUF_IS_EXTENDED(m))
+			printf("%p: extend_size = %d\n", m, SCTP_BUF_EXTEND_SIZE(m));
+	}
+}
+
+#endif
+
+void
+sctp_input_with_port(struct mbuf *i_pak, int off, uint16_t port)
+{
+#ifdef SCTP_MBUF_LOGGING
+	struct mbuf *mat;
+
+#endif
+	struct mbuf *m;
+	int iphlen;
+	uint32_t vrf_id = 0;
+	uint8_t ecn_bits;
+	struct ip *ip;
+	struct sctphdr *sh;
+	struct sctp_inpcb *inp = NULL;
+	struct sctp_nets *net;
+	struct sctp_tcb *stcb = NULL;
+	struct sctp_chunkhdr *ch;
+	int refcount_up = 0;
+	int length, mlen, offset;
+
+#if !defined(SCTP_WITH_NO_CSUM)
+	uint32_t check, calc_check;
+
+#endif
+
+	if (SCTP_GET_PKT_VRFID(i_pak, vrf_id)) {
+		SCTP_RELEASE_PKT(i_pak);
+		return;
+	}
+	mlen = SCTP_HEADER_LEN(i_pak);
+	iphlen = off;
+	m = SCTP_HEADER_TO_CHAIN(i_pak);
+
+	net = NULL;
+	SCTP_STAT_INCR(sctps_recvpackets);
+	SCTP_STAT_INCR_COUNTER64(sctps_inpackets);
+
+
+#ifdef SCTP_MBUF_LOGGING
+	/* Log in any input mbufs */
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
+		mat = m;
+		while (mat) {
+			if (SCTP_BUF_IS_EXTENDED(mat)) {
+				sctp_log_mb(mat, SCTP_MBUF_INPUT);
+			}
+			mat = SCTP_BUF_NEXT(mat);
+		}
+	}
+#endif
+#ifdef  SCTP_PACKET_LOGGING
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
+		sctp_packet_log(m, mlen);
+#endif
+	/*
+	 * Must take out the iphlen, since mlen expects this (only effect lb
+	 * case)
+	 */
+	mlen -= iphlen;
+
+	/*
+	 * Get IP, SCTP, and first chunk header together in first mbuf.
+	 */
+	ip = mtod(m, struct ip *);
+	offset = iphlen + sizeof(*sh) + sizeof(*ch);
+	if (SCTP_BUF_LEN(m) < offset) {
+		if ((m = m_pullup(m, offset)) == 0) {
+			SCTP_STAT_INCR(sctps_hdrops);
+			return;
+		}
+		ip = mtod(m, struct ip *);
+	}
+	/* validate mbuf chain length with IP payload length */
+	if (mlen < (SCTP_GET_IPV4_LENGTH(ip) - iphlen)) {
+		SCTP_STAT_INCR(sctps_hdrops);
+		goto bad;
+	}
+	sh = (struct sctphdr *)((caddr_t)ip + iphlen);
+	ch = (struct sctp_chunkhdr *)((caddr_t)sh + sizeof(*sh));
+	SCTPDBG(SCTP_DEBUG_INPUT1,
+	    "sctp_input() length:%d iphlen:%d\n", mlen, iphlen);
+
+	/* SCTP does not allow broadcasts or multicasts */
+	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
+		goto bad;
+	}
+	if (SCTP_IS_IT_BROADCAST(ip->ip_dst, m)) {
+		/*
+		 * We only look at broadcast if its a front state, All
+		 * others we will not have a tcb for anyway.
+		 */
+		goto bad;
+	}
+	/* validate SCTP checksum */
+	SCTPDBG(SCTP_DEBUG_CRCOFFLOAD,
+	    "sctp_input(): Packet of length %d received on %s with csum_flags 0x%x.\n",
+	    m->m_pkthdr.len,
+	    if_name(m->m_pkthdr.rcvif),
+	    m->m_pkthdr.csum_flags);
+#if defined(SCTP_WITH_NO_CSUM)
+	SCTP_STAT_INCR(sctps_recvnocrc);
+#else
+	if (m->m_pkthdr.csum_flags & CSUM_SCTP_VALID) {
+		SCTP_STAT_INCR(sctps_recvhwcrc);
+		goto sctp_skip_csum_4;
+	}
+	check = sh->checksum;	/* save incoming checksum */
+	sh->checksum = 0;	/* prepare for calc */
+	calc_check = sctp_calculate_cksum(m, iphlen);
+	sh->checksum = check;
+	SCTP_STAT_INCR(sctps_recvswcrc);
+	if (calc_check != check) {
+		SCTPDBG(SCTP_DEBUG_INPUT1, "Bad CSUM on SCTP packet calc_check:%x check:%x  m:%p mlen:%d iphlen:%d\n",
+		    calc_check, check, m, mlen, iphlen);
+
+		stcb = sctp_findassociation_addr(m, iphlen,
+		    offset - sizeof(*ch),
+		    sh, ch, &inp, &net,
+		    vrf_id);
+		if ((net) && (port)) {
+			if (net->port == 0) {
+				sctp_pathmtu_adjustment(inp, stcb, net, net->mtu - sizeof(struct udphdr));
+			}
+			net->port = port;
+		}
+		if ((inp) && (stcb)) {
+			sctp_send_packet_dropped(stcb, net, m, iphlen, 1);
+			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_INPUT_ERROR, SCTP_SO_NOT_LOCKED);
+		} else if ((inp != NULL) && (stcb == NULL)) {
+			refcount_up = 1;
+		}
+		SCTP_STAT_INCR(sctps_badsum);
+		SCTP_STAT_INCR_COUNTER32(sctps_checksumerrors);
+		goto bad;
+	}
+sctp_skip_csum_4:
+#endif
+	/* destination port of 0 is illegal, based on RFC2960. */
+	if (sh->dest_port == 0) {
+		SCTP_STAT_INCR(sctps_hdrops);
+		goto bad;
+	}
+	/*
+	 * Locate pcb and tcb for datagram sctp_findassociation_addr() wants
+	 * IP/SCTP/first chunk header...
+	 */
+	stcb = sctp_findassociation_addr(m, iphlen, offset - sizeof(*ch),
+	    sh, ch, &inp, &net, vrf_id);
+	if ((net) && (port)) {
+		if (net->port == 0) {
+			sctp_pathmtu_adjustment(inp, stcb, net, net->mtu - sizeof(struct udphdr));
+		}
+		net->port = port;
+	}
+	/* inp's ref-count increased && stcb locked */
+	if (inp == NULL) {
+		struct sctp_init_chunk *init_chk, chunk_buf;
+
+		SCTP_STAT_INCR(sctps_noport);
+#ifdef ICMP_BANDLIM
+		/*
+		 * we use the bandwidth limiting to protect against sending
+		 * too many ABORTS all at once. In this case these count the
+		 * same as an ICMP message.
+		 */
+		if (badport_bandlim(0) < 0)
+			goto bad;
+#endif				/* ICMP_BANDLIM */
+		SCTPDBG(SCTP_DEBUG_INPUT1,
+		    "Sending a ABORT from packet entry!\n");
+		if (ch->chunk_type == SCTP_INITIATION) {
+			/*
+			 * we do a trick here to get the INIT tag, dig in
+			 * and get the tag from the INIT and put it in the
+			 * common header.
+			 */
+			init_chk = (struct sctp_init_chunk *)sctp_m_getptr(m,
+			    iphlen + sizeof(*sh), sizeof(*init_chk),
+			    (uint8_t *) & chunk_buf);
+			if (init_chk != NULL)
+				sh->v_tag = init_chk->init.initiate_tag;
+		}
+		if (ch->chunk_type == SCTP_SHUTDOWN_ACK) {
+			sctp_send_shutdown_complete2(m, iphlen, sh, vrf_id, port);
+			goto bad;
+		}
+		if (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) {
+			goto bad;
+		}
+		if (ch->chunk_type != SCTP_ABORT_ASSOCIATION)
+			sctp_send_abort(m, iphlen, sh, 0, NULL, vrf_id, port);
+		goto bad;
+	} else if (stcb == NULL) {
+		refcount_up = 1;
+	}
+#ifdef IPSEC
+	/*
+	 * I very much doubt any of the IPSEC stuff will work but I have no
+	 * idea, so I will leave it in place.
+	 */
+	if (inp && ipsec4_in_reject(m, &inp->ip_inp.inp)) {
+		MODULE_GLOBAL(ipsec4stat).in_polvio++;
+		SCTP_STAT_INCR(sctps_hdrops);
+		goto bad;
+	}
+#endif				/* IPSEC */
+
+	/*
+	 * common chunk processing
+	 */
+	length = ip->ip_len + iphlen;
+	offset -= sizeof(struct sctp_chunkhdr);
+
+	ecn_bits = ip->ip_tos;
+
+	/* sa_ignore NO_NULL_CHK */
+	sctp_common_input_processing(&m, iphlen, offset, length, sh, ch,
+	    inp, stcb, net, ecn_bits, vrf_id, port);
+	/* inp's ref-count reduced && stcb unlocked */
+	if (m) {
+		sctp_m_freem(m);
+	}
+	if ((inp) && (refcount_up)) {
+		/* reduce ref-count */
+		SCTP_INP_DECR_REF(inp);
+	}
+	return;
+bad:
+	if (stcb) {
+		SCTP_TCB_UNLOCK(stcb);
+	}
+	if ((inp) && (refcount_up)) {
+		/* reduce ref-count */
+		SCTP_INP_DECR_REF(inp);
+	}
+	if (m) {
+		sctp_m_freem(m);
+	}
+	return;
+}
+void
+sctp_input(i_pak, off)
+	struct mbuf *i_pak;
+	int off;
+{
+	sctp_input_with_port(i_pak, off, 0);
+}
diff --git a/freebsd/sys/netinet/sctp_input.h b/freebsd/sys/netinet/sctp_input.h
new file mode 100644
index 00000000..90cd098a
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_input.h
@@ -0,0 +1,57 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_input.h,v 1.6 2005/03/06 16:04:17 itojun Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifndef __sctp_input_h__
+#define __sctp_input_h__
+
+#if defined(_KERNEL) || defined(__Userspace__)
+void
+sctp_common_input_processing(struct mbuf **, int, int, int,
+    struct sctphdr *, struct sctp_chunkhdr *, struct sctp_inpcb *,
+    struct sctp_tcb *, struct sctp_nets *, uint8_t, uint32_t, uint16_t);
+
+struct sctp_stream_reset_out_request *
+sctp_find_stream_reset(struct sctp_tcb *stcb, uint32_t seq,
+    struct sctp_tmit_chunk **bchk);
+
+void 
+sctp_reset_in_stream(struct sctp_tcb *stcb, int number_entries,
+    uint16_t * list);
+
+
+int sctp_is_there_unsent_data(struct sctp_tcb *stcb);
+
+#endif
+#endif
diff --git a/freebsd/sys/netinet/sctp_lock_bsd.h b/freebsd/sys/netinet/sctp_lock_bsd.h
new file mode 100644
index 00000000..81e4a35f
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_lock_bsd.h
@@ -0,0 +1,430 @@
+#ifndef __sctp_lock_bsd_h__
+#define __sctp_lock_bsd_h__
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * General locking concepts: The goal of our locking is to of course provide
+ * consistency and yet minimize overhead. We will attempt to use
+ * non-recursive locks which are supposed to be quite inexpensive. Now in
+ * order to do this the goal is that most functions are not aware of locking.
+ * Once we have a TCB we lock it and unlock when we are through. This means
+ * that the TCB lock is kind-of a "global" lock when working on an
+ * association. Caution must be used when asserting a TCB_LOCK since if we
+ * recurse we deadlock.
+ *
+ * Most other locks (INP and INFO) attempt to localize the locking i.e. we try
+ * to contain the lock and unlock within the function that needs to lock it.
+ * This sometimes mean we do extra locks and unlocks and lose a bit of
+ * efficency, but if the performance statements about non-recursive locks are
+ * true this should not be a problem.  One issue that arises with this only
+ * lock when needed is that if an implicit association setup is done we have
+ * a problem. If at the time I lookup an association I have NULL in the tcb
+ * return, by the time I call to create the association some other processor
+ * could have created it. This is what the CREATE lock on the endpoint.
+ * Places where we will be implicitly creating the association OR just
+ * creating an association (the connect call) will assert the CREATE_INP
+ * lock. This will assure us that during all the lookup of INP and INFO if
+ * another creator is also locking/looking up we can gate the two to
+ * synchronize. So the CREATE_INP lock is also another one we must use
+ * extreme caution in locking to make sure we don't hit a re-entrancy issue.
+ *
+ * For non FreeBSD 5.x we provide a bunch of EMPTY lock macros so we can
+ * blatantly put locks everywhere and they reduce to nothing on
+ * NetBSD/OpenBSD and FreeBSD 4.x
+ *
+ */
+
+/*
+ * When working with the global SCTP lists we lock and unlock the INP_INFO
+ * lock. So when we go to lookup an association we will want to do a
+ * SCTP_INP_INFO_RLOCK() and then when we want to add a new association to
+ * the SCTP_BASE_INFO() list's we will do a SCTP_INP_INFO_WLOCK().
+ */
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+
+extern struct sctp_foo_stuff sctp_logoff[];
+extern int sctp_logoff_stuff;
+
+#define SCTP_IPI_COUNT_INIT()
+
+#define SCTP_STATLOG_INIT_LOCK()
+#define SCTP_STATLOG_LOCK()
+#define SCTP_STATLOG_UNLOCK()
+#define SCTP_STATLOG_DESTROY()
+
+#define SCTP_INP_INFO_LOCK_DESTROY() do { \
+        if(rw_wowned(&SCTP_BASE_INFO(ipi_ep_mtx))) { \
+             rw_wunlock(&SCTP_BASE_INFO(ipi_ep_mtx)); \
+        } \
+        rw_destroy(&SCTP_BASE_INFO(ipi_ep_mtx)); \
+      }  while (0)
+
+#define SCTP_INP_INFO_LOCK_INIT() \
+        rw_init(&SCTP_BASE_INFO(ipi_ep_mtx), "sctp-info");
+
+
+#define SCTP_INP_INFO_RLOCK()	do { 					\
+             rw_rlock(&SCTP_BASE_INFO(ipi_ep_mtx));                         \
+} while (0)
+
+
+#define SCTP_INP_INFO_WLOCK()	do { 					\
+            rw_wlock(&SCTP_BASE_INFO(ipi_ep_mtx));                         \
+} while (0)
+
+
+#define SCTP_INP_INFO_RUNLOCK()		rw_runlock(&SCTP_BASE_INFO(ipi_ep_mtx))
+#define SCTP_INP_INFO_WUNLOCK()		rw_wunlock(&SCTP_BASE_INFO(ipi_ep_mtx))
+
+
+#define SCTP_IPI_ADDR_INIT()								\
+        rw_init(&SCTP_BASE_INFO(ipi_addr_mtx), "sctp-addr")
+#define SCTP_IPI_ADDR_DESTROY() do  { \
+        if(rw_wowned(&SCTP_BASE_INFO(ipi_addr_mtx))) { \
+             rw_wunlock(&SCTP_BASE_INFO(ipi_addr_mtx)); \
+        } \
+	rw_destroy(&SCTP_BASE_INFO(ipi_addr_mtx)); \
+      }  while (0)
+#define SCTP_IPI_ADDR_RLOCK()	do { 					\
+             rw_rlock(&SCTP_BASE_INFO(ipi_addr_mtx));                         \
+} while (0)
+#define SCTP_IPI_ADDR_WLOCK()	do { 					\
+             rw_wlock(&SCTP_BASE_INFO(ipi_addr_mtx));                         \
+} while (0)
+
+#define SCTP_IPI_ADDR_RUNLOCK()		rw_runlock(&SCTP_BASE_INFO(ipi_addr_mtx))
+#define SCTP_IPI_ADDR_WUNLOCK()		rw_wunlock(&SCTP_BASE_INFO(ipi_addr_mtx))
+
+
+#define SCTP_IPI_ITERATOR_WQ_INIT() \
+        mtx_init(&sctp_it_ctl.ipi_iterator_wq_mtx, "sctp-it-wq", "sctp_it_wq", MTX_DEF)
+
+#define SCTP_IPI_ITERATOR_WQ_DESTROY() \
+	mtx_destroy(&sctp_it_ctl.ipi_iterator_wq_mtx)
+
+#define SCTP_IPI_ITERATOR_WQ_LOCK()	do { 					\
+             mtx_lock(&sctp_it_ctl.ipi_iterator_wq_mtx);                \
+} while (0)
+
+#define SCTP_IPI_ITERATOR_WQ_UNLOCK()		mtx_unlock(&sctp_it_ctl.ipi_iterator_wq_mtx)
+
+
+#define SCTP_IP_PKTLOG_INIT() \
+        mtx_init(&SCTP_BASE_INFO(ipi_pktlog_mtx), "sctp-pktlog", "packetlog", MTX_DEF)
+
+
+#define SCTP_IP_PKTLOG_LOCK()	do { 			\
+             mtx_lock(&SCTP_BASE_INFO(ipi_pktlog_mtx));     \
+} while (0)
+
+#define SCTP_IP_PKTLOG_UNLOCK()	mtx_unlock(&SCTP_BASE_INFO(ipi_pktlog_mtx))
+
+#define SCTP_IP_PKTLOG_DESTROY() \
+	mtx_destroy(&SCTP_BASE_INFO(ipi_pktlog_mtx))
+
+
+
+
+
+/*
+ * The INP locks we will use for locking an SCTP endpoint, so for example if
+ * we want to change something at the endpoint level for example random_store
+ * or cookie secrets we lock the INP level.
+ */
+
+#define SCTP_INP_READ_INIT(_inp) \
+	mtx_init(&(_inp)->inp_rdata_mtx, "sctp-read", "inpr", MTX_DEF | MTX_DUPOK)
+
+#define SCTP_INP_READ_DESTROY(_inp) \
+	mtx_destroy(&(_inp)->inp_rdata_mtx)
+
+#define SCTP_INP_READ_LOCK(_inp)	do { \
+        mtx_lock(&(_inp)->inp_rdata_mtx);    \
+} while (0)
+
+
+#define SCTP_INP_READ_UNLOCK(_inp) mtx_unlock(&(_inp)->inp_rdata_mtx)
+
+
+#define SCTP_INP_LOCK_INIT(_inp) \
+	mtx_init(&(_inp)->inp_mtx, "sctp-inp", "inp", MTX_DEF | MTX_DUPOK)
+#define SCTP_ASOC_CREATE_LOCK_INIT(_inp) \
+	mtx_init(&(_inp)->inp_create_mtx, "sctp-create", "inp_create", \
+		 MTX_DEF | MTX_DUPOK)
+
+#define SCTP_INP_LOCK_DESTROY(_inp) \
+	mtx_destroy(&(_inp)->inp_mtx)
+
+#define SCTP_INP_LOCK_CONTENDED(_inp) ((_inp)->inp_mtx.mtx_lock & MTX_CONTESTED)
+
+#define SCTP_INP_READ_CONTENDED(_inp) ((_inp)->inp_rdata_mtx.mtx_lock & MTX_CONTESTED)
+
+#define SCTP_ASOC_CREATE_LOCK_CONTENDED(_inp) ((_inp)->inp_create_mtx.mtx_lock & MTX_CONTESTED)
+
+
+#define SCTP_ASOC_CREATE_LOCK_DESTROY(_inp) \
+	mtx_destroy(&(_inp)->inp_create_mtx)
+
+
+#ifdef SCTP_LOCK_LOGGING
+#define SCTP_INP_RLOCK(_inp)	do { 					\
+	if(SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOCK_LOGGING_ENABLE) sctp_log_lock(_inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_INP);\
+        mtx_lock(&(_inp)->inp_mtx);                                     \
+} while (0)
+
+#define SCTP_INP_WLOCK(_inp)	do { 					\
+	if(SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOCK_LOGGING_ENABLE) sctp_log_lock(_inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_INP);\
+        mtx_lock(&(_inp)->inp_mtx);                                     \
+} while (0)
+
+#else
+
+#define SCTP_INP_RLOCK(_inp)	do { 					\
+        mtx_lock(&(_inp)->inp_mtx);                                     \
+} while (0)
+
+#define SCTP_INP_WLOCK(_inp)	do { 					\
+        mtx_lock(&(_inp)->inp_mtx);                                     \
+} while (0)
+
+#endif
+
+
+#define SCTP_TCB_SEND_LOCK_INIT(_tcb) \
+	mtx_init(&(_tcb)->tcb_send_mtx, "sctp-send-tcb", "tcbs", MTX_DEF | MTX_DUPOK)
+
+#define SCTP_TCB_SEND_LOCK_DESTROY(_tcb) mtx_destroy(&(_tcb)->tcb_send_mtx)
+
+#define SCTP_TCB_SEND_LOCK(_tcb)  do { \
+	mtx_lock(&(_tcb)->tcb_send_mtx); \
+} while (0)
+
+#define SCTP_TCB_SEND_UNLOCK(_tcb) mtx_unlock(&(_tcb)->tcb_send_mtx)
+
+#define SCTP_INP_INCR_REF(_inp) atomic_add_int(&((_inp)->refcount), 1)
+#define SCTP_INP_DECR_REF(_inp) atomic_add_int(&((_inp)->refcount), -1)
+
+
+#ifdef SCTP_LOCK_LOGGING
+#define SCTP_ASOC_CREATE_LOCK(_inp) \
+	do {								\
+	if(SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOCK_LOGGING_ENABLE) sctp_log_lock(_inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_CREATE); \
+		mtx_lock(&(_inp)->inp_create_mtx);			\
+	} while (0)
+#else
+
+#define SCTP_ASOC_CREATE_LOCK(_inp) \
+	do {								\
+		mtx_lock(&(_inp)->inp_create_mtx);			\
+	} while (0)
+#endif
+
+#define SCTP_INP_RUNLOCK(_inp)		mtx_unlock(&(_inp)->inp_mtx)
+#define SCTP_INP_WUNLOCK(_inp)		mtx_unlock(&(_inp)->inp_mtx)
+#define SCTP_ASOC_CREATE_UNLOCK(_inp)	mtx_unlock(&(_inp)->inp_create_mtx)
+
+/*
+ * For the majority of things (once we have found the association) we will
+ * lock the actual association mutex. This will protect all the assoiciation
+ * level queues and streams and such. We will need to lock the socket layer
+ * when we stuff data up into the receiving sb_mb. I.e. we will need to do an
+ * extra SOCKBUF_LOCK(&so->so_rcv) even though the association is locked.
+ */
+
+#define SCTP_TCB_LOCK_INIT(_tcb) \
+	mtx_init(&(_tcb)->tcb_mtx, "sctp-tcb", "tcb", MTX_DEF | MTX_DUPOK)
+
+#define SCTP_TCB_LOCK_DESTROY(_tcb)	mtx_destroy(&(_tcb)->tcb_mtx)
+
+#ifdef SCTP_LOCK_LOGGING
+#define SCTP_TCB_LOCK(_tcb)  do {					\
+	if(SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOCK_LOGGING_ENABLE)  sctp_log_lock(_tcb->sctp_ep, _tcb, SCTP_LOG_LOCK_TCB);          \
+	mtx_lock(&(_tcb)->tcb_mtx);                                     \
+} while (0)
+
+#else
+#define SCTP_TCB_LOCK(_tcb)  do {					\
+	mtx_lock(&(_tcb)->tcb_mtx);                                     \
+} while (0)
+
+#endif
+
+
+#define SCTP_TCB_TRYLOCK(_tcb) 	mtx_trylock(&(_tcb)->tcb_mtx)
+
+#define SCTP_TCB_UNLOCK(_tcb)		mtx_unlock(&(_tcb)->tcb_mtx)
+
+#define SCTP_TCB_UNLOCK_IFOWNED(_tcb)	      do { \
+                                                if (mtx_owned(&(_tcb)->tcb_mtx)) \
+                                                     mtx_unlock(&(_tcb)->tcb_mtx); \
+                                              } while (0)
+
+
+
+#ifdef INVARIANTS
+#define SCTP_TCB_LOCK_ASSERT(_tcb) do { \
+                            if (mtx_owned(&(_tcb)->tcb_mtx) == 0) \
+                                panic("Don't own TCB lock"); \
+                            } while (0)
+#else
+#define SCTP_TCB_LOCK_ASSERT(_tcb)
+#endif
+
+#define SCTP_ITERATOR_LOCK_INIT() \
+        mtx_init(&sctp_it_ctl.it_mtx, "sctp-it", "iterator", MTX_DEF)
+
+#ifdef INVARIANTS
+#define SCTP_ITERATOR_LOCK() \
+	do {								\
+		if (mtx_owned(&sctp_it_ctl.it_mtx))			\
+			panic("Iterator Lock");				\
+		mtx_lock(&sctp_it_ctl.it_mtx);				\
+	} while (0)
+#else
+#define SCTP_ITERATOR_LOCK() \
+	do {								\
+		mtx_lock(&sctp_it_ctl.it_mtx);				\
+	} while (0)
+
+#endif
+
+#define SCTP_ITERATOR_UNLOCK()	        mtx_unlock(&sctp_it_ctl.it_mtx)
+#define SCTP_ITERATOR_LOCK_DESTROY()	mtx_destroy(&sctp_it_ctl.it_mtx)
+
+
+#define SCTP_WQ_ADDR_INIT() do { \
+        mtx_init(&SCTP_BASE_INFO(wq_addr_mtx), "sctp-addr-wq","sctp_addr_wq",MTX_DEF); \
+ } while (0)
+
+#define SCTP_WQ_ADDR_DESTROY() do  { \
+        if(mtx_owned(&SCTP_BASE_INFO(wq_addr_mtx))) { \
+             mtx_unlock(&SCTP_BASE_INFO(wq_addr_mtx)); \
+        } \
+	    mtx_destroy(&SCTP_BASE_INFO(wq_addr_mtx)); \
+      }  while (0)
+
+#define SCTP_WQ_ADDR_LOCK()	do { \
+             mtx_lock(&SCTP_BASE_INFO(wq_addr_mtx));  \
+} while (0)
+#define SCTP_WQ_ADDR_UNLOCK() do { \
+		mtx_unlock(&SCTP_BASE_INFO(wq_addr_mtx)); \
+} while (0)
+
+
+
+#define SCTP_INCR_EP_COUNT() \
+                do { \
+		       atomic_add_int(&SCTP_BASE_INFO(ipi_count_ep), 1); \
+	        } while (0)
+
+#define SCTP_DECR_EP_COUNT() \
+                do { \
+		       atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_ep), 1); \
+	        } while (0)
+
+#define SCTP_INCR_ASOC_COUNT() \
+                do { \
+	               atomic_add_int(&SCTP_BASE_INFO(ipi_count_asoc), 1); \
+	        } while (0)
+
+#define SCTP_DECR_ASOC_COUNT() \
+                do { \
+	               atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_asoc), 1); \
+	        } while (0)
+
+#define SCTP_INCR_LADDR_COUNT() \
+                do { \
+	               atomic_add_int(&SCTP_BASE_INFO(ipi_count_laddr), 1); \
+	        } while (0)
+
+#define SCTP_DECR_LADDR_COUNT() \
+                do { \
+	               atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_laddr), 1); \
+	        } while (0)
+
+#define SCTP_INCR_RADDR_COUNT() \
+                do { \
+ 	               atomic_add_int(&SCTP_BASE_INFO(ipi_count_raddr), 1); \
+	        } while (0)
+
+#define SCTP_DECR_RADDR_COUNT() \
+                do { \
+ 	               atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_raddr),1); \
+	        } while (0)
+
+#define SCTP_INCR_CHK_COUNT() \
+                do { \
+  	               atomic_add_int(&SCTP_BASE_INFO(ipi_count_chunk), 1); \
+	        } while (0)
+#ifdef INVARIANTS
+#define SCTP_DECR_CHK_COUNT() \
+                do { \
+                       if(SCTP_BASE_INFO(ipi_count_chunk) == 0) \
+                             panic("chunk count to 0?");    \
+  	               atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_chunk), 1); \
+	        } while (0)
+#else
+#define SCTP_DECR_CHK_COUNT() \
+                do { \
+                       if(SCTP_BASE_INFO(ipi_count_chunk) != 0) \
+  	               atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_chunk), 1); \
+	        } while (0)
+#endif
+#define SCTP_INCR_READQ_COUNT() \
+                do { \
+		       atomic_add_int(&SCTP_BASE_INFO(ipi_count_readq),1); \
+	        } while (0)
+
+#define SCTP_DECR_READQ_COUNT() \
+                do { \
+		       atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_readq), 1); \
+	        } while (0)
+
+#define SCTP_INCR_STRMOQ_COUNT() \
+                do { \
+		       atomic_add_int(&SCTP_BASE_INFO(ipi_count_strmoq), 1); \
+	        } while (0)
+
+#define SCTP_DECR_STRMOQ_COUNT() \
+                do { \
+		       atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_strmoq), 1); \
+	        } while (0)
+
+
+#if defined(SCTP_SO_LOCK_TESTING)
+#define SCTP_INP_SO(sctpinp)	(sctpinp)->ip_inp.inp.inp_socket
+#define SCTP_SOCKET_LOCK(so, refcnt)
+#define SCTP_SOCKET_UNLOCK(so, refcnt)
+#endif
+
+#endif
diff --git a/freebsd/sys/netinet/sctp_os.h b/freebsd/sys/netinet/sctp_os.h
new file mode 100644
index 00000000..c1a392f0
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_os.h
@@ -0,0 +1,72 @@
+/*-
+ * Copyright (c) 2006-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#ifndef __sctp_os_h__
+#define __sctp_os_h__
+
+/*
+ * General kernel memory allocation:
+ *  SCTP_MALLOC(element, type, size, name)
+ *  SCTP_FREE(element)
+ * Kernel memory allocation for "soname"- memory must be zeroed.
+ *  SCTP_MALLOC_SONAME(name, type, size)
+ *  SCTP_FREE_SONAME(name)
+ */
+
+/*
+ * Zone(pool) allocation routines: MUST be defined for each OS.
+ *  zone = zone/pool pointer.
+ *  name = string name of the zone/pool.
+ *  size = size of each zone/pool element.
+ *  number = number of elements in zone/pool.
+ *  type = structure type to allocate
+ *
+ * sctp_zone_t
+ * SCTP_ZONE_INIT(zone, name, size, number)
+ * SCTP_ZONE_GET(zone, type)
+ * SCTP_ZONE_FREE(zone, element)
+ * SCTP_ZONE_DESTROY(zone)
+ */
+
+#include <freebsd/netinet/sctp_os_bsd.h>
+
+
+
+
+
+/* All os's must implement this address gatherer. If
+ * no VRF's exist, then vrf 0 is the only one and all
+ * addresses and ifn's live here.
+ */
+#define SCTP_DEFAULT_VRF 0
+void sctp_init_vrf_list(int vrfid);
+
+#endif
diff --git a/freebsd/sys/netinet/sctp_os_bsd.h b/freebsd/sys/netinet/sctp_os_bsd.h
new file mode 100644
index 00000000..cf29776f
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_os_bsd.h
@@ -0,0 +1,503 @@
+/*-
+ * Copyright (c) 2006-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#ifndef __sctp_os_bsd_h__
+#define __sctp_os_bsd_h__
+/*
+ * includes
+ */
+#include <freebsd/local/opt_ipsec.h>
+#include <freebsd/local/opt_compat.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_sctp.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/ktr.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/jail.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/resourcevar.h>
+#include <freebsd/sys/uio.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/rwlock.h>
+#include <freebsd/sys/kthread.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/random.h>
+#include <freebsd/sys/limits.h>
+#include <freebsd/sys/queue.h>
+#include <freebsd/machine/cpu.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/if_var.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_icmp.h>
+#include <freebsd/netinet/icmp_var.h>
+
+#ifdef IPSEC
+#include <freebsd/netipsec/ipsec.h>
+#include <freebsd/netipsec/key.h>
+#endif				/* IPSEC */
+
+#ifdef INET6
+#include <freebsd/sys/domain.h>
+#ifdef IPSEC
+#include <freebsd/netipsec/ipsec6.h>
+#endif
+#include <freebsd/netinet/ip6.h>
+#include <freebsd/netinet6/ip6_var.h>
+#include <freebsd/netinet6/in6_pcb.h>
+#include <freebsd/netinet/icmp6.h>
+#include <freebsd/netinet6/ip6protosw.h>
+#include <freebsd/netinet6/nd6.h>
+#include <freebsd/netinet6/scope6_var.h>
+#endif				/* INET6 */
+
+
+#include <freebsd/netinet/ip_options.h>
+
+#ifndef in6pcb
+#define in6pcb		inpcb
+#endif
+/* Declare all the malloc names for all the various mallocs */
+MALLOC_DECLARE(SCTP_M_MAP);
+MALLOC_DECLARE(SCTP_M_STRMI);
+MALLOC_DECLARE(SCTP_M_STRMO);
+MALLOC_DECLARE(SCTP_M_ASC_ADDR);
+MALLOC_DECLARE(SCTP_M_ASC_IT);
+MALLOC_DECLARE(SCTP_M_AUTH_CL);
+MALLOC_DECLARE(SCTP_M_AUTH_KY);
+MALLOC_DECLARE(SCTP_M_AUTH_HL);
+MALLOC_DECLARE(SCTP_M_AUTH_IF);
+MALLOC_DECLARE(SCTP_M_STRESET);
+MALLOC_DECLARE(SCTP_M_CMSG);
+MALLOC_DECLARE(SCTP_M_COPYAL);
+MALLOC_DECLARE(SCTP_M_VRF);
+MALLOC_DECLARE(SCTP_M_IFA);
+MALLOC_DECLARE(SCTP_M_IFN);
+MALLOC_DECLARE(SCTP_M_TIMW);
+MALLOC_DECLARE(SCTP_M_MVRF);
+MALLOC_DECLARE(SCTP_M_ITER);
+MALLOC_DECLARE(SCTP_M_SOCKOPT);
+
+#if defined(SCTP_LOCAL_TRACE_BUF)
+
+#define SCTP_GET_CYCLECOUNT get_cyclecount()
+#define SCTP_CTR6 sctp_log_trace
+
+#else
+#define SCTP_CTR6 CTR6
+#endif
+
+/*
+ * Macros to expand out globals defined by various modules
+ * to either a real global or a virtualized instance of one,
+ * depending on whether VIMAGE is defined.
+ */
+/* then define the macro(s) that hook into the vimage macros */
+#define MODULE_GLOBAL(__SYMBOL) V_##__SYMBOL
+
+#define V_system_base_info VNET(system_base_info)
+#define SCTP_BASE_INFO(__m) V_system_base_info.sctppcbinfo.__m
+#define SCTP_BASE_STATS V_system_base_info.sctpstat
+#define SCTP_BASE_STATS_SYSCTL VNET_NAME(system_base_info.sctpstat)
+#define SCTP_BASE_STAT(__m)     V_system_base_info.sctpstat.__m
+#define SCTP_BASE_SYSCTL(__m) VNET_NAME(system_base_info.sctpsysctl.__m)
+#define SCTP_BASE_VAR(__m) V_system_base_info.__m
+
+/*
+ *
+ */
+#define USER_ADDR_NULL	(NULL)	/* FIX ME: temp */
+
+#if defined(SCTP_DEBUG)
+#define SCTPDBG(level, params...)					\
+{									\
+    do {								\
+	if (SCTP_BASE_SYSCTL(sctp_debug_on) & level ) {			\
+	    printf(params);						\
+	}								\
+    } while (0);							\
+}
+#define SCTPDBG_ADDR(level, addr)					\
+{									\
+    do {								\
+	if (SCTP_BASE_SYSCTL(sctp_debug_on) & level ) {			\
+	    sctp_print_address(addr);					\
+	}								\
+    } while (0);							\
+}
+#define SCTPDBG_PKT(level, iph, sh)					\
+{									\
+    do {								\
+	    if (SCTP_BASE_SYSCTL(sctp_debug_on) & level) {		\
+		    sctp_print_address_pkt(iph, sh);			\
+	    }								\
+    } while (0);							\
+}
+#else
+#define SCTPDBG(level, params...)
+#define SCTPDBG_ADDR(level, addr)
+#define SCTPDBG_PKT(level, iph, sh)
+#endif
+#define SCTP_PRINTF(params...)	printf(params)
+
+#ifdef SCTP_LTRACE_CHUNKS
+#define SCTP_LTRACE_CHK(a, b, c, d) if(SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_CHUNK_ENABLE) SCTP_CTR6(KTR_SUBSYS, "SCTP:%d[%d]:%x-%x-%x-%x", SCTP_LOG_CHUNK_PROC, 0, a, b, c, d)
+#else
+#define SCTP_LTRACE_CHK(a, b, c, d)
+#endif
+
+#ifdef SCTP_LTRACE_ERRORS
+#define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err) if(SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_ERROR_ENABLE) \
+                                                         printf("mbuf:%p inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
+								     m, inp, stcb, net, file, __LINE__, err);
+#define SCTP_LTRACE_ERR_RET(inp, stcb, net, file, err) if(SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_ERROR_ENABLE) \
+                                                          printf("inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
+								     inp, stcb, net, file, __LINE__, err);
+#else
+#define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err)
+#define SCTP_LTRACE_ERR_RET(inp, stcb, net, file, err)
+#endif
+
+
+/*
+ * Local address and interface list handling
+ */
+#define SCTP_MAX_VRF_ID		0
+#define SCTP_SIZE_OF_VRF_HASH	3
+#define SCTP_IFNAMSIZ		IFNAMSIZ
+#define SCTP_DEFAULT_VRFID	0
+#define SCTP_VRF_ADDR_HASH_SIZE	16
+#define SCTP_VRF_IFN_HASH_SIZE	3
+#define	SCTP_INIT_VRF_TABLEID(vrf)
+
+#define SCTP_IFN_IS_IFT_LOOP(ifn) ((ifn)->ifn_type == IFT_LOOP)
+#define SCTP_ROUTE_IS_REAL_LOOP(ro) ((ro)->ro_rt && (ro)->ro_rt->rt_ifa && (ro)->ro_rt->rt_ifa->ifa_ifp && (ro)->ro_rt->rt_ifa->ifa_ifp->if_type == IFT_LOOP)
+
+/*
+ * Access to IFN's to help with src-addr-selection
+ */
+/* This could return VOID if the index works but for BSD we provide both. */
+#define SCTP_GET_IFN_VOID_FROM_ROUTE(ro) (void *)ro->ro_rt->rt_ifp
+#define SCTP_GET_IF_INDEX_FROM_ROUTE(ro) (ro)->ro_rt->rt_ifp->if_index
+#define SCTP_ROUTE_HAS_VALID_IFN(ro) ((ro)->ro_rt && (ro)->ro_rt->rt_ifp)
+
+/*
+ * general memory allocation
+ */
+#define SCTP_MALLOC(var, type, size, name) \
+    do { \
+	var = (type)malloc(size, name, M_NOWAIT); \
+    } while (0)
+
+#define SCTP_FREE(var, type)	free(var, type)
+
+#define SCTP_MALLOC_SONAME(var, type, size) \
+    do { \
+	var = (type)malloc(size, M_SONAME, M_WAITOK | M_ZERO); \
+    } while (0)
+
+#define SCTP_FREE_SONAME(var)	free(var, M_SONAME)
+
+#define SCTP_PROCESS_STRUCT struct proc *
+
+/*
+ * zone allocation functions
+ */
+#include <freebsd/vm/uma.h>
+
+/* SCTP_ZONE_INIT: initialize the zone */
+typedef struct uma_zone *sctp_zone_t;
+
+#define SCTP_ZONE_INIT(zone, name, size, number) { \
+	zone = uma_zcreate(name, size, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,\
+		0); \
+	uma_zone_set_max(zone, number); \
+}
+
+#define SCTP_ZONE_DESTROY(zone) uma_zdestroy(zone)
+
+/* SCTP_ZONE_GET: allocate element from the zone */
+#define SCTP_ZONE_GET(zone, type) \
+	(type *)uma_zalloc(zone, M_NOWAIT);
+
+/* SCTP_ZONE_FREE: free element from the zone */
+#define SCTP_ZONE_FREE(zone, element) \
+	uma_zfree(zone, element);
+
+#define SCTP_HASH_INIT(size, hashmark) hashinit_flags(size, M_PCB, hashmark, HASH_NOWAIT)
+#define SCTP_HASH_FREE(table, hashmark) hashdestroy(table, M_PCB, hashmark)
+
+#define SCTP_M_COPYM	m_copym
+
+/*
+ * timers
+ */
+#include <freebsd/sys/callout.h>
+typedef struct callout sctp_os_timer_t;
+
+
+#define SCTP_OS_TIMER_INIT(tmr)	callout_init(tmr, 1)
+#define SCTP_OS_TIMER_START	callout_reset
+#define SCTP_OS_TIMER_STOP	callout_stop
+#define SCTP_OS_TIMER_STOP_DRAIN callout_drain
+#define SCTP_OS_TIMER_PENDING	callout_pending
+#define SCTP_OS_TIMER_ACTIVE	callout_active
+#define SCTP_OS_TIMER_DEACTIVATE callout_deactivate
+
+#define sctp_get_tick_count() (ticks)
+
+#define SCTP_UNUSED __attribute__((unused))
+
+/*
+ * Functions
+ */
+/* Mbuf manipulation and access macros  */
+#define SCTP_BUF_LEN(m) (m->m_len)
+#define SCTP_BUF_NEXT(m) (m->m_next)
+#define SCTP_BUF_NEXT_PKT(m) (m->m_nextpkt)
+#define SCTP_BUF_RESV_UF(m, size) m->m_data += size
+#define SCTP_BUF_AT(m, size) m->m_data + size
+#define SCTP_BUF_IS_EXTENDED(m) (m->m_flags & M_EXT)
+#define SCTP_BUF_EXTEND_SIZE(m) (m->m_ext.ext_size)
+#define SCTP_BUF_TYPE(m) (m->m_type)
+#define SCTP_BUF_RECVIF(m) (m->m_pkthdr.rcvif)
+#define SCTP_BUF_PREPEND	M_PREPEND
+
+#define SCTP_ALIGN_TO_END(m, len) if(m->m_flags & M_PKTHDR) { \
+                                     MH_ALIGN(m, len); \
+                                  } else if ((m->m_flags & M_EXT) == 0) { \
+                                     M_ALIGN(m, len); \
+                                  }
+
+/* We make it so if you have up to 4 threads
+ * writing based on the default size of
+ * the packet log 65 k, that would be
+ * 4 16k packets before we would hit
+ * a problem.
+ */
+#define SCTP_PKTLOG_WRITERS_NEED_LOCK 3
+
+/*************************/
+/*      MTU              */
+/*************************/
+#define SCTP_GATHER_MTU_FROM_IFN_INFO(ifn, ifn_index, af) ((struct ifnet *)ifn)->if_mtu
+#define SCTP_GATHER_MTU_FROM_ROUTE(sctp_ifa, sa, rt) ((rt != NULL) ? rt->rt_rmx.rmx_mtu : 0)
+#define SCTP_GATHER_MTU_FROM_INTFC(sctp_ifn) ((sctp_ifn->ifn_p != NULL) ? ((struct ifnet *)(sctp_ifn->ifn_p))->if_mtu : 0)
+#define SCTP_SET_MTU_OF_ROUTE(sa, rt, mtu) do { \
+                                              if (rt != NULL) \
+                                                 rt->rt_rmx.rmx_mtu = mtu; \
+                                           } while(0)
+
+/* (de-)register interface event notifications */
+#define SCTP_REGISTER_INTERFACE(ifhandle, af)
+#define SCTP_DEREGISTER_INTERFACE(ifhandle, af)
+
+
+/*************************/
+/* These are for logging */
+/*************************/
+/* return the base ext data pointer */
+#define SCTP_BUF_EXTEND_BASE(m) (m->m_ext.ext_buf)
+ /* return the refcnt of the data pointer */
+#define SCTP_BUF_EXTEND_REFCNT(m) (*m->m_ext.ref_cnt)
+/* return any buffer related flags, this is
+ * used beyond logging for apple only.
+ */
+#define SCTP_BUF_GET_FLAGS(m) (m->m_flags)
+
+/* For BSD this just accesses the M_PKTHDR length
+ * so it operates on an mbuf with hdr flag. Other
+ * O/S's may have separate packet header and mbuf
+ * chain pointers.. thus the macro.
+ */
+#define SCTP_HEADER_TO_CHAIN(m) (m)
+#define SCTP_DETACH_HEADER_FROM_CHAIN(m)
+#define SCTP_HEADER_LEN(m) (m->m_pkthdr.len)
+#define SCTP_GET_HEADER_FOR_OUTPUT(o_pak) 0
+#define SCTP_RELEASE_HEADER(m)
+#define SCTP_RELEASE_PKT(m)	sctp_m_freem(m)
+#define SCTP_ENABLE_UDP_CSUM(m) do { \
+					m->m_pkthdr.csum_flags = CSUM_UDP; \
+					m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); \
+				} while (0)
+
+#define SCTP_GET_PKT_VRFID(m, vrf_id)  ((vrf_id = SCTP_DEFAULT_VRFID) != SCTP_DEFAULT_VRFID)
+
+
+
+/* Attach the chain of data into the sendable packet. */
+#define SCTP_ATTACH_CHAIN(pak, m, packet_length) do { \
+                                                 pak = m; \
+                                                 pak->m_pkthdr.len = packet_length; \
+                         } while(0)
+
+/* Other m_pkthdr type things */
+#define SCTP_IS_IT_BROADCAST(dst, m) ((m->m_flags & M_PKTHDR) ? in_broadcast(dst, m->m_pkthdr.rcvif) : 0)
+#define SCTP_IS_IT_LOOPBACK(m) ((m->m_flags & M_PKTHDR) && ((m->m_pkthdr.rcvif == NULL) || (m->m_pkthdr.rcvif->if_type == IFT_LOOP)))
+
+
+/* This converts any input packet header
+ * into the chain of data holders, for BSD
+ * its a NOP.
+ */
+
+/* Macro's for getting length from V6/V4 header */
+#define SCTP_GET_IPV4_LENGTH(iph) (iph->ip_len)
+#define SCTP_GET_IPV6_LENGTH(ip6) (ntohs(ip6->ip6_plen))
+
+/* get the v6 hop limit */
+#define SCTP_GET_HLIM(inp, ro)	in6_selecthlim((struct in6pcb *)&inp->ip_inp.inp, (ro ? (ro->ro_rt ? (ro->ro_rt->rt_ifp) : (NULL)) : (NULL)));
+
+/* is the endpoint v6only? */
+#define SCTP_IPV6_V6ONLY(inp)	(((struct inpcb *)inp)->inp_flags & IN6P_IPV6_V6ONLY)
+/* is the socket non-blocking? */
+#define SCTP_SO_IS_NBIO(so)	((so)->so_state & SS_NBIO)
+#define SCTP_SET_SO_NBIO(so)	((so)->so_state |= SS_NBIO)
+#define SCTP_CLEAR_SO_NBIO(so)	((so)->so_state &= ~SS_NBIO)
+/* get the socket type */
+#define SCTP_SO_TYPE(so)	((so)->so_type)
+/* reserve sb space for a socket */
+#define SCTP_SORESERVE(so, send, recv)	soreserve(so, send, recv)
+/* wakeup a socket */
+#define SCTP_SOWAKEUP(so)	wakeup(&(so)->so_timeo)
+/* clear the socket buffer state */
+#define SCTP_SB_CLEAR(sb)	\
+	(sb).sb_cc = 0;		\
+	(sb).sb_mb = NULL;	\
+	(sb).sb_mbcnt = 0;
+
+#define SCTP_SB_LIMIT_RCV(so) so->so_rcv.sb_hiwat
+#define SCTP_SB_LIMIT_SND(so) so->so_snd.sb_hiwat
+
+/*
+ * routes, output, etc.
+ */
+typedef struct route sctp_route_t;
+typedef struct rtentry sctp_rtentry_t;
+
+#define SCTP_RTALLOC(ro, vrf_id) rtalloc_ign((struct route *)ro, 0UL)
+
+/* Future zero copy wakeup/send  function */
+#define SCTP_ZERO_COPY_EVENT(inp, so)
+/* This is re-pulse ourselves for sendbuf */
+#define SCTP_ZERO_COPY_SENDQ_EVENT(inp, so)
+
+/*
+ * IP output routines
+ */
+#define SCTP_IP_OUTPUT(result, o_pak, ro, stcb, vrf_id) \
+{ \
+	int o_flgs = IP_RAWOUTPUT; \
+	struct sctp_tcb *local_stcb = stcb; \
+	if (local_stcb && \
+	    local_stcb->sctp_ep && \
+	    local_stcb->sctp_ep->sctp_socket) \
+		o_flgs |= local_stcb->sctp_ep->sctp_socket->so_options & SO_DONTROUTE; \
+	result = ip_output(o_pak, NULL, ro, o_flgs, 0, NULL); \
+}
+
+#define SCTP_IP6_OUTPUT(result, o_pak, ro, ifp, stcb, vrf_id) \
+{ \
+	struct sctp_tcb *local_stcb = stcb; \
+	if (local_stcb && local_stcb->sctp_ep) \
+		result = ip6_output(o_pak, \
+				    ((struct in6pcb *)(local_stcb->sctp_ep))->in6p_outputopts, \
+				    (ro), 0, 0, ifp, NULL); \
+	else \
+		result = ip6_output(o_pak, NULL, (ro), 0, 0, ifp, NULL); \
+}
+
+struct mbuf *
+sctp_get_mbuf_for_msg(unsigned int space_needed,
+    int want_header, int how, int allonebuf, int type);
+
+
+/*
+ * SCTP AUTH
+ */
+#define HAVE_SHA2
+
+#define SCTP_READ_RANDOM(buf, len)	read_random(buf, len)
+
+#ifdef USE_SCTP_SHA1
+#include <freebsd/netinet/sctp_sha1.h>
+#else
+#include <freebsd/crypto/sha1.h>
+/* map standard crypto API names */
+#define SHA1_Init	SHA1Init
+#define SHA1_Update	SHA1Update
+#define SHA1_Final(x,y)	SHA1Final((caddr_t)x, y)
+#endif
+
+#if defined(HAVE_SHA2)
+#include <freebsd/crypto/sha2/sha2.h>
+#endif
+
+#endif
+
+#define SCTP_DECREMENT_AND_CHECK_REFCOUNT(addr) (atomic_fetchadd_int(addr, -1) == 1)
+#if defined(INVARIANTS)
+#define SCTP_SAVE_ATOMIC_DECREMENT(addr, val) \
+{ \
+	int32_t oldval; \
+	oldval = atomic_fetchadd_int(addr, -val); \
+	if (oldval < val) { \
+		panic("Counter goes negative"); \
+	} \
+}
+#else
+#define SCTP_SAVE_ATOMIC_DECREMENT(addr, val) \
+{ \
+	int32_t oldval; \
+	oldval = atomic_fetchadd_int(addr, -val); \
+	if (oldval < val) { \
+		*addr = 0; \
+	} \
+}
+#endif
diff --git a/freebsd/sys/netinet/sctp_output.c b/freebsd/sys/netinet/sctp_output.c
new file mode 100644
index 00000000..9acd3288
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_output.c
@@ -0,0 +1,13539 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_output.c,v 1.46 2005/03/06 16:04:17 itojun Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/netinet/sctp_os.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/netinet/sctp_var.h>
+#include <freebsd/netinet/sctp_sysctl.h>
+#include <freebsd/netinet/sctp_header.h>
+#include <freebsd/netinet/sctp_pcb.h>
+#include <freebsd/netinet/sctputil.h>
+#include <freebsd/netinet/sctp_output.h>
+#include <freebsd/netinet/sctp_uio.h>
+#include <freebsd/netinet/sctputil.h>
+#include <freebsd/netinet/sctp_auth.h>
+#include <freebsd/netinet/sctp_timer.h>
+#include <freebsd/netinet/sctp_asconf.h>
+#include <freebsd/netinet/sctp_indata.h>
+#include <freebsd/netinet/sctp_bsd_addr.h>
+#include <freebsd/netinet/sctp_input.h>
+#include <freebsd/netinet/sctp_crc32.h>
+#include <freebsd/netinet/udp.h>
+#include <freebsd/machine/in_cksum.h>
+
+
+
+#define SCTP_MAX_GAPS_INARRAY 4
+struct sack_track {
+	uint8_t right_edge;	/* mergable on the right edge */
+	uint8_t left_edge;	/* mergable on the left edge */
+	uint8_t num_entries;
+	uint8_t spare;
+	struct sctp_gap_ack_block gaps[SCTP_MAX_GAPS_INARRAY];
+};
+
+struct sack_track sack_array[256] = {
+	{0, 0, 0, 0,		/* 0x00 */
+		{{0, 0},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 1, 0,		/* 0x01 */
+		{{0, 0},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x02 */
+		{{1, 1},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 1, 0,		/* 0x03 */
+		{{0, 1},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x04 */
+		{{2, 2},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x05 */
+		{{0, 0},
+		{2, 2},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x06 */
+		{{1, 2},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 1, 0,		/* 0x07 */
+		{{0, 2},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x08 */
+		{{3, 3},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x09 */
+		{{0, 0},
+		{3, 3},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x0a */
+		{{1, 1},
+		{3, 3},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x0b */
+		{{0, 1},
+		{3, 3},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x0c */
+		{{2, 3},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x0d */
+		{{0, 0},
+		{2, 3},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x0e */
+		{{1, 3},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 1, 0,		/* 0x0f */
+		{{0, 3},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x10 */
+		{{4, 4},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x11 */
+		{{0, 0},
+		{4, 4},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x12 */
+		{{1, 1},
+		{4, 4},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x13 */
+		{{0, 1},
+		{4, 4},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x14 */
+		{{2, 2},
+		{4, 4},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x15 */
+		{{0, 0},
+		{2, 2},
+		{4, 4},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x16 */
+		{{1, 2},
+		{4, 4},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x17 */
+		{{0, 2},
+		{4, 4},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x18 */
+		{{3, 4},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x19 */
+		{{0, 0},
+		{3, 4},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x1a */
+		{{1, 1},
+		{3, 4},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x1b */
+		{{0, 1},
+		{3, 4},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x1c */
+		{{2, 4},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x1d */
+		{{0, 0},
+		{2, 4},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x1e */
+		{{1, 4},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 1, 0,		/* 0x1f */
+		{{0, 4},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x20 */
+		{{5, 5},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x21 */
+		{{0, 0},
+		{5, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x22 */
+		{{1, 1},
+		{5, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x23 */
+		{{0, 1},
+		{5, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x24 */
+		{{2, 2},
+		{5, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x25 */
+		{{0, 0},
+		{2, 2},
+		{5, 5},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x26 */
+		{{1, 2},
+		{5, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x27 */
+		{{0, 2},
+		{5, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x28 */
+		{{3, 3},
+		{5, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x29 */
+		{{0, 0},
+		{3, 3},
+		{5, 5},
+		{0, 0}
+		}
+	},
+	{0, 0, 3, 0,		/* 0x2a */
+		{{1, 1},
+		{3, 3},
+		{5, 5},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x2b */
+		{{0, 1},
+		{3, 3},
+		{5, 5},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x2c */
+		{{2, 3},
+		{5, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x2d */
+		{{0, 0},
+		{2, 3},
+		{5, 5},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x2e */
+		{{1, 3},
+		{5, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x2f */
+		{{0, 3},
+		{5, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x30 */
+		{{4, 5},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x31 */
+		{{0, 0},
+		{4, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x32 */
+		{{1, 1},
+		{4, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x33 */
+		{{0, 1},
+		{4, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x34 */
+		{{2, 2},
+		{4, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x35 */
+		{{0, 0},
+		{2, 2},
+		{4, 5},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x36 */
+		{{1, 2},
+		{4, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x37 */
+		{{0, 2},
+		{4, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x38 */
+		{{3, 5},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x39 */
+		{{0, 0},
+		{3, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x3a */
+		{{1, 1},
+		{3, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x3b */
+		{{0, 1},
+		{3, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x3c */
+		{{2, 5},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x3d */
+		{{0, 0},
+		{2, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x3e */
+		{{1, 5},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 1, 0,		/* 0x3f */
+		{{0, 5},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x40 */
+		{{6, 6},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x41 */
+		{{0, 0},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x42 */
+		{{1, 1},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x43 */
+		{{0, 1},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x44 */
+		{{2, 2},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x45 */
+		{{0, 0},
+		{2, 2},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x46 */
+		{{1, 2},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x47 */
+		{{0, 2},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x48 */
+		{{3, 3},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x49 */
+		{{0, 0},
+		{3, 3},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 3, 0,		/* 0x4a */
+		{{1, 1},
+		{3, 3},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x4b */
+		{{0, 1},
+		{3, 3},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x4c */
+		{{2, 3},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x4d */
+		{{0, 0},
+		{2, 3},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x4e */
+		{{1, 3},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x4f */
+		{{0, 3},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x50 */
+		{{4, 4},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x51 */
+		{{0, 0},
+		{4, 4},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 3, 0,		/* 0x52 */
+		{{1, 1},
+		{4, 4},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x53 */
+		{{0, 1},
+		{4, 4},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 3, 0,		/* 0x54 */
+		{{2, 2},
+		{4, 4},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{1, 0, 4, 0,		/* 0x55 */
+		{{0, 0},
+		{2, 2},
+		{4, 4},
+		{6, 6}
+		}
+	},
+	{0, 0, 3, 0,		/* 0x56 */
+		{{1, 2},
+		{4, 4},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x57 */
+		{{0, 2},
+		{4, 4},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x58 */
+		{{3, 4},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x59 */
+		{{0, 0},
+		{3, 4},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 3, 0,		/* 0x5a */
+		{{1, 1},
+		{3, 4},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x5b */
+		{{0, 1},
+		{3, 4},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x5c */
+		{{2, 4},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x5d */
+		{{0, 0},
+		{2, 4},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x5e */
+		{{1, 4},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x5f */
+		{{0, 4},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x60 */
+		{{5, 6},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x61 */
+		{{0, 0},
+		{5, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x62 */
+		{{1, 1},
+		{5, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x63 */
+		{{0, 1},
+		{5, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x64 */
+		{{2, 2},
+		{5, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x65 */
+		{{0, 0},
+		{2, 2},
+		{5, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x66 */
+		{{1, 2},
+		{5, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x67 */
+		{{0, 2},
+		{5, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x68 */
+		{{3, 3},
+		{5, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x69 */
+		{{0, 0},
+		{3, 3},
+		{5, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 3, 0,		/* 0x6a */
+		{{1, 1},
+		{3, 3},
+		{5, 6},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x6b */
+		{{0, 1},
+		{3, 3},
+		{5, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x6c */
+		{{2, 3},
+		{5, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x6d */
+		{{0, 0},
+		{2, 3},
+		{5, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x6e */
+		{{1, 3},
+		{5, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x6f */
+		{{0, 3},
+		{5, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x70 */
+		{{4, 6},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x71 */
+		{{0, 0},
+		{4, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x72 */
+		{{1, 1},
+		{4, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x73 */
+		{{0, 1},
+		{4, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x74 */
+		{{2, 2},
+		{4, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x75 */
+		{{0, 0},
+		{2, 2},
+		{4, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x76 */
+		{{1, 2},
+		{4, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x77 */
+		{{0, 2},
+		{4, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x78 */
+		{{3, 6},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x79 */
+		{{0, 0},
+		{3, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x7a */
+		{{1, 1},
+		{3, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x7b */
+		{{0, 1},
+		{3, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x7c */
+		{{2, 6},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x7d */
+		{{0, 0},
+		{2, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x7e */
+		{{1, 6},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 1, 0,		/* 0x7f */
+		{{0, 6},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 1, 0,		/* 0x80 */
+		{{7, 7},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0x81 */
+		{{0, 0},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0x82 */
+		{{1, 1},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0x83 */
+		{{0, 1},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0x84 */
+		{{2, 2},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0x85 */
+		{{0, 0},
+		{2, 2},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0x86 */
+		{{1, 2},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0x87 */
+		{{0, 2},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0x88 */
+		{{3, 3},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0x89 */
+		{{0, 0},
+		{3, 3},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0x8a */
+		{{1, 1},
+		{3, 3},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0x8b */
+		{{0, 1},
+		{3, 3},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0x8c */
+		{{2, 3},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0x8d */
+		{{0, 0},
+		{2, 3},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0x8e */
+		{{1, 3},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0x8f */
+		{{0, 3},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0x90 */
+		{{4, 4},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0x91 */
+		{{0, 0},
+		{4, 4},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0x92 */
+		{{1, 1},
+		{4, 4},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0x93 */
+		{{0, 1},
+		{4, 4},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0x94 */
+		{{2, 2},
+		{4, 4},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 4, 0,		/* 0x95 */
+		{{0, 0},
+		{2, 2},
+		{4, 4},
+		{7, 7}
+		}
+	},
+	{0, 1, 3, 0,		/* 0x96 */
+		{{1, 2},
+		{4, 4},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0x97 */
+		{{0, 2},
+		{4, 4},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0x98 */
+		{{3, 4},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0x99 */
+		{{0, 0},
+		{3, 4},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0x9a */
+		{{1, 1},
+		{3, 4},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0x9b */
+		{{0, 1},
+		{3, 4},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0x9c */
+		{{2, 4},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0x9d */
+		{{0, 0},
+		{2, 4},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0x9e */
+		{{1, 4},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0x9f */
+		{{0, 4},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xa0 */
+		{{5, 5},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xa1 */
+		{{0, 0},
+		{5, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xa2 */
+		{{1, 1},
+		{5, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xa3 */
+		{{0, 1},
+		{5, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xa4 */
+		{{2, 2},
+		{5, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 4, 0,		/* 0xa5 */
+		{{0, 0},
+		{2, 2},
+		{5, 5},
+		{7, 7}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xa6 */
+		{{1, 2},
+		{5, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xa7 */
+		{{0, 2},
+		{5, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xa8 */
+		{{3, 3},
+		{5, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 4, 0,		/* 0xa9 */
+		{{0, 0},
+		{3, 3},
+		{5, 5},
+		{7, 7}
+		}
+	},
+	{0, 1, 4, 0,		/* 0xaa */
+		{{1, 1},
+		{3, 3},
+		{5, 5},
+		{7, 7}
+		}
+	},
+	{1, 1, 4, 0,		/* 0xab */
+		{{0, 1},
+		{3, 3},
+		{5, 5},
+		{7, 7}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xac */
+		{{2, 3},
+		{5, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 4, 0,		/* 0xad */
+		{{0, 0},
+		{2, 3},
+		{5, 5},
+		{7, 7}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xae */
+		{{1, 3},
+		{5, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xaf */
+		{{0, 3},
+		{5, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xb0 */
+		{{4, 5},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xb1 */
+		{{0, 0},
+		{4, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xb2 */
+		{{1, 1},
+		{4, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xb3 */
+		{{0, 1},
+		{4, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xb4 */
+		{{2, 2},
+		{4, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 4, 0,		/* 0xb5 */
+		{{0, 0},
+		{2, 2},
+		{4, 5},
+		{7, 7}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xb6 */
+		{{1, 2},
+		{4, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xb7 */
+		{{0, 2},
+		{4, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xb8 */
+		{{3, 5},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xb9 */
+		{{0, 0},
+		{3, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xba */
+		{{1, 1},
+		{3, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xbb */
+		{{0, 1},
+		{3, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xbc */
+		{{2, 5},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xbd */
+		{{0, 0},
+		{2, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xbe */
+		{{1, 5},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xbf */
+		{{0, 5},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 1, 0,		/* 0xc0 */
+		{{6, 7},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xc1 */
+		{{0, 0},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xc2 */
+		{{1, 1},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xc3 */
+		{{0, 1},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xc4 */
+		{{2, 2},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xc5 */
+		{{0, 0},
+		{2, 2},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xc6 */
+		{{1, 2},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xc7 */
+		{{0, 2},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xc8 */
+		{{3, 3},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xc9 */
+		{{0, 0},
+		{3, 3},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xca */
+		{{1, 1},
+		{3, 3},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xcb */
+		{{0, 1},
+		{3, 3},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xcc */
+		{{2, 3},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xcd */
+		{{0, 0},
+		{2, 3},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xce */
+		{{1, 3},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xcf */
+		{{0, 3},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xd0 */
+		{{4, 4},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xd1 */
+		{{0, 0},
+		{4, 4},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xd2 */
+		{{1, 1},
+		{4, 4},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xd3 */
+		{{0, 1},
+		{4, 4},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xd4 */
+		{{2, 2},
+		{4, 4},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 4, 0,		/* 0xd5 */
+		{{0, 0},
+		{2, 2},
+		{4, 4},
+		{6, 7}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xd6 */
+		{{1, 2},
+		{4, 4},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xd7 */
+		{{0, 2},
+		{4, 4},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xd8 */
+		{{3, 4},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xd9 */
+		{{0, 0},
+		{3, 4},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xda */
+		{{1, 1},
+		{3, 4},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xdb */
+		{{0, 1},
+		{3, 4},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xdc */
+		{{2, 4},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xdd */
+		{{0, 0},
+		{2, 4},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xde */
+		{{1, 4},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xdf */
+		{{0, 4},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 1, 0,		/* 0xe0 */
+		{{5, 7},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xe1 */
+		{{0, 0},
+		{5, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xe2 */
+		{{1, 1},
+		{5, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xe3 */
+		{{0, 1},
+		{5, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xe4 */
+		{{2, 2},
+		{5, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xe5 */
+		{{0, 0},
+		{2, 2},
+		{5, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xe6 */
+		{{1, 2},
+		{5, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xe7 */
+		{{0, 2},
+		{5, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xe8 */
+		{{3, 3},
+		{5, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xe9 */
+		{{0, 0},
+		{3, 3},
+		{5, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xea */
+		{{1, 1},
+		{3, 3},
+		{5, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xeb */
+		{{0, 1},
+		{3, 3},
+		{5, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xec */
+		{{2, 3},
+		{5, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xed */
+		{{0, 0},
+		{2, 3},
+		{5, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xee */
+		{{1, 3},
+		{5, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xef */
+		{{0, 3},
+		{5, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 1, 0,		/* 0xf0 */
+		{{4, 7},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xf1 */
+		{{0, 0},
+		{4, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xf2 */
+		{{1, 1},
+		{4, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xf3 */
+		{{0, 1},
+		{4, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xf4 */
+		{{2, 2},
+		{4, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xf5 */
+		{{0, 0},
+		{2, 2},
+		{4, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xf6 */
+		{{1, 2},
+		{4, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xf7 */
+		{{0, 2},
+		{4, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 1, 0,		/* 0xf8 */
+		{{3, 7},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xf9 */
+		{{0, 0},
+		{3, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xfa */
+		{{1, 1},
+		{3, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xfb */
+		{{0, 1},
+		{3, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 1, 0,		/* 0xfc */
+		{{2, 7},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xfd */
+		{{0, 0},
+		{2, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 1, 0,		/* 0xfe */
+		{{1, 7},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 1, 0,		/* 0xff */
+		{{0, 7},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	}
+};
+
+
+int
+sctp_is_address_in_scope(struct sctp_ifa *ifa,
+    int ipv4_addr_legal,
+    int ipv6_addr_legal,
+    int loopback_scope,
+    int ipv4_local_scope,
+    int local_scope,
+    int site_scope,
+    int do_update)
+{
+	if ((loopback_scope == 0) &&
+	    (ifa->ifn_p) && SCTP_IFN_IS_IFT_LOOP(ifa->ifn_p)) {
+		/*
+		 * skip loopback if not in scope *
+		 */
+		return (0);
+	}
+	switch (ifa->address.sa.sa_family) {
+	case AF_INET:
+		if (ipv4_addr_legal) {
+			struct sockaddr_in *sin;
+
+			sin = (struct sockaddr_in *)&ifa->address.sin;
+			if (sin->sin_addr.s_addr == 0) {
+				/* not in scope , unspecified */
+				return (0);
+			}
+			if ((ipv4_local_scope == 0) &&
+			    (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
+				/* private address not in scope */
+				return (0);
+			}
+		} else {
+			return (0);
+		}
+		break;
+#ifdef INET6
+	case AF_INET6:
+		if (ipv6_addr_legal) {
+			struct sockaddr_in6 *sin6;
+
+			/*
+			 * Must update the flags,  bummer, which means any
+			 * IFA locks must now be applied HERE <->
+			 */
+			if (do_update) {
+				sctp_gather_internal_ifa_flags(ifa);
+			}
+			if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+				return (0);
+			}
+			/* ok to use deprecated addresses? */
+			sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+				/* skip unspecifed addresses */
+				return (0);
+			}
+			if (	/* (local_scope == 0) && */
+			    (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))) {
+				return (0);
+			}
+			if ((site_scope == 0) &&
+			    (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) {
+				return (0);
+			}
+		} else {
+			return (0);
+		}
+		break;
+#endif
+	default:
+		return (0);
+	}
+	return (1);
+}
+
+static struct mbuf *
+sctp_add_addr_to_mbuf(struct mbuf *m, struct sctp_ifa *ifa)
+{
+	struct sctp_paramhdr *parmh;
+	struct mbuf *mret;
+	int len;
+
+	if (ifa->address.sa.sa_family == AF_INET) {
+		len = sizeof(struct sctp_ipv4addr_param);
+	} else if (ifa->address.sa.sa_family == AF_INET6) {
+		len = sizeof(struct sctp_ipv6addr_param);
+	} else {
+		/* unknown type */
+		return (m);
+	}
+	if (M_TRAILINGSPACE(m) >= len) {
+		/* easy side we just drop it on the end */
+		parmh = (struct sctp_paramhdr *)(SCTP_BUF_AT(m, SCTP_BUF_LEN(m)));
+		mret = m;
+	} else {
+		/* Need more space */
+		mret = m;
+		while (SCTP_BUF_NEXT(mret) != NULL) {
+			mret = SCTP_BUF_NEXT(mret);
+		}
+		SCTP_BUF_NEXT(mret) = sctp_get_mbuf_for_msg(len, 0, M_DONTWAIT, 1, MT_DATA);
+		if (SCTP_BUF_NEXT(mret) == NULL) {
+			/* We are hosed, can't add more addresses */
+			return (m);
+		}
+		mret = SCTP_BUF_NEXT(mret);
+		parmh = mtod(mret, struct sctp_paramhdr *);
+	}
+	/* now add the parameter */
+	switch (ifa->address.sa.sa_family) {
+	case AF_INET:
+		{
+			struct sctp_ipv4addr_param *ipv4p;
+			struct sockaddr_in *sin;
+
+			sin = (struct sockaddr_in *)&ifa->address.sin;
+			ipv4p = (struct sctp_ipv4addr_param *)parmh;
+			parmh->param_type = htons(SCTP_IPV4_ADDRESS);
+			parmh->param_length = htons(len);
+			ipv4p->addr = sin->sin_addr.s_addr;
+			SCTP_BUF_LEN(mret) += len;
+			break;
+		}
+#ifdef INET6
+	case AF_INET6:
+		{
+			struct sctp_ipv6addr_param *ipv6p;
+			struct sockaddr_in6 *sin6;
+
+			sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+			ipv6p = (struct sctp_ipv6addr_param *)parmh;
+			parmh->param_type = htons(SCTP_IPV6_ADDRESS);
+			parmh->param_length = htons(len);
+			memcpy(ipv6p->addr, &sin6->sin6_addr,
+			    sizeof(ipv6p->addr));
+			/* clear embedded scope in the address */
+			in6_clearscope((struct in6_addr *)ipv6p->addr);
+			SCTP_BUF_LEN(mret) += len;
+			break;
+		}
+#endif
+	default:
+		return (m);
+	}
+	return (mret);
+}
+
+
+struct mbuf *
+sctp_add_addresses_to_i_ia(struct sctp_inpcb *inp, struct sctp_scoping *scope,
+    struct mbuf *m_at, int cnt_inits_to)
+{
+	struct sctp_vrf *vrf = NULL;
+	int cnt, limit_out = 0, total_count;
+	uint32_t vrf_id;
+
+	vrf_id = inp->def_vrf_id;
+	SCTP_IPI_ADDR_RLOCK();
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf == NULL) {
+		SCTP_IPI_ADDR_RUNLOCK();
+		return (m_at);
+	}
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		struct sctp_ifa *sctp_ifap;
+		struct sctp_ifn *sctp_ifnp;
+
+		cnt = cnt_inits_to;
+		if (vrf->total_ifa_count > SCTP_COUNT_LIMIT) {
+			limit_out = 1;
+			cnt = SCTP_ADDRESS_LIMIT;
+			goto skip_count;
+		}
+		LIST_FOREACH(sctp_ifnp, &vrf->ifnlist, next_ifn) {
+			if ((scope->loopback_scope == 0) &&
+			    SCTP_IFN_IS_IFT_LOOP(sctp_ifnp)) {
+				/*
+				 * Skip loopback devices if loopback_scope
+				 * not set
+				 */
+				continue;
+			}
+			LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) {
+				if (sctp_is_address_in_scope(sctp_ifap,
+				    scope->ipv4_addr_legal,
+				    scope->ipv6_addr_legal,
+				    scope->loopback_scope,
+				    scope->ipv4_local_scope,
+				    scope->local_scope,
+				    scope->site_scope, 1) == 0) {
+					continue;
+				}
+				cnt++;
+				if (cnt > SCTP_ADDRESS_LIMIT) {
+					break;
+				}
+			}
+			if (cnt > SCTP_ADDRESS_LIMIT) {
+				break;
+			}
+		}
+skip_count:
+		if (cnt > 1) {
+			total_count = 0;
+			LIST_FOREACH(sctp_ifnp, &vrf->ifnlist, next_ifn) {
+				cnt = 0;
+				if ((scope->loopback_scope == 0) &&
+				    SCTP_IFN_IS_IFT_LOOP(sctp_ifnp)) {
+					/*
+					 * Skip loopback devices if
+					 * loopback_scope not set
+					 */
+					continue;
+				}
+				LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) {
+					if (sctp_is_address_in_scope(sctp_ifap,
+					    scope->ipv4_addr_legal,
+					    scope->ipv6_addr_legal,
+					    scope->loopback_scope,
+					    scope->ipv4_local_scope,
+					    scope->local_scope,
+					    scope->site_scope, 0) == 0) {
+						continue;
+					}
+					m_at = sctp_add_addr_to_mbuf(m_at, sctp_ifap);
+					if (limit_out) {
+						cnt++;
+						total_count++;
+						if (cnt >= 2) {
+							/*
+							 * two from each
+							 * address
+							 */
+							break;
+						}
+						if (total_count > SCTP_ADDRESS_LIMIT) {
+							/* No more addresses */
+							break;
+						}
+					}
+				}
+			}
+		}
+	} else {
+		struct sctp_laddr *laddr;
+
+		cnt = cnt_inits_to;
+		/* First, how many ? */
+		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+			if (laddr->ifa == NULL) {
+				continue;
+			}
+			if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED)
+				/*
+				 * Address being deleted by the system, dont
+				 * list.
+				 */
+				continue;
+			if (laddr->action == SCTP_DEL_IP_ADDRESS) {
+				/*
+				 * Address being deleted on this ep don't
+				 * list.
+				 */
+				continue;
+			}
+			if (sctp_is_address_in_scope(laddr->ifa,
+			    scope->ipv4_addr_legal,
+			    scope->ipv6_addr_legal,
+			    scope->loopback_scope,
+			    scope->ipv4_local_scope,
+			    scope->local_scope,
+			    scope->site_scope, 1) == 0) {
+				continue;
+			}
+			cnt++;
+		}
+		if (cnt > SCTP_ADDRESS_LIMIT) {
+			limit_out = 1;
+		}
+		/*
+		 * To get through a NAT we only list addresses if we have
+		 * more than one. That way if you just bind a single address
+		 * we let the source of the init dictate our address.
+		 */
+		if (cnt > 1) {
+			LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+				cnt = 0;
+				if (laddr->ifa == NULL) {
+					continue;
+				}
+				if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED)
+					continue;
+
+				if (sctp_is_address_in_scope(laddr->ifa,
+				    scope->ipv4_addr_legal,
+				    scope->ipv6_addr_legal,
+				    scope->loopback_scope,
+				    scope->ipv4_local_scope,
+				    scope->local_scope,
+				    scope->site_scope, 0) == 0) {
+					continue;
+				}
+				m_at = sctp_add_addr_to_mbuf(m_at, laddr->ifa);
+				cnt++;
+				if (cnt >= SCTP_ADDRESS_LIMIT) {
+					break;
+				}
+			}
+		}
+	}
+	SCTP_IPI_ADDR_RUNLOCK();
+	return (m_at);
+}
+
+static struct sctp_ifa *
+sctp_is_ifa_addr_preferred(struct sctp_ifa *ifa,
+    uint8_t dest_is_loop,
+    uint8_t dest_is_priv,
+    sa_family_t fam)
+{
+	uint8_t dest_is_global = 0;
+
+	/* dest_is_priv is true if destination is a private address */
+	/* dest_is_loop is true if destination is a loopback addresses */
+
+	/**
+	 * Here we determine if its a preferred address. A preferred address
+	 * means it is the same scope or higher scope then the destination.
+	 * L = loopback, P = private, G = global
+	 * -----------------------------------------
+         *    src    |  dest | result
+         *  ----------------------------------------
+         *     L     |    L  |    yes
+         *  -----------------------------------------
+         *     P     |    L  |    yes-v4 no-v6
+         *  -----------------------------------------
+         *     G     |    L  |    yes-v4 no-v6
+         *  -----------------------------------------
+         *     L     |    P  |    no
+         *  -----------------------------------------
+         *     P     |    P  |    yes
+         *  -----------------------------------------
+         *     G     |    P  |    no
+         *   -----------------------------------------
+         *     L     |    G  |    no
+         *   -----------------------------------------
+         *     P     |    G  |    no
+         *    -----------------------------------------
+         *     G     |    G  |    yes
+         *    -----------------------------------------
+	 */
+
+	if (ifa->address.sa.sa_family != fam) {
+		/* forget mis-matched family */
+		return (NULL);
+	}
+	if ((dest_is_priv == 0) && (dest_is_loop == 0)) {
+		dest_is_global = 1;
+	}
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Is destination preferred:");
+	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &ifa->address.sa);
+	/* Ok the address may be ok */
+	if (fam == AF_INET6) {
+		/* ok to use deprecated addresses? no lets not! */
+		if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+			SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:1\n");
+			return (NULL);
+		}
+		if (ifa->src_is_priv && !ifa->src_is_loop) {
+			if (dest_is_loop) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:2\n");
+				return (NULL);
+			}
+		}
+		if (ifa->src_is_glob) {
+			if (dest_is_loop) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:3\n");
+				return (NULL);
+			}
+		}
+	}
+	/*
+	 * Now that we know what is what, implement or table this could in
+	 * theory be done slicker (it used to be), but this is
+	 * straightforward and easier to validate :-)
+	 */
+	SCTPDBG(SCTP_DEBUG_OUTPUT3, "src_loop:%d src_priv:%d src_glob:%d\n",
+	    ifa->src_is_loop, ifa->src_is_priv, ifa->src_is_glob);
+	SCTPDBG(SCTP_DEBUG_OUTPUT3, "dest_loop:%d dest_priv:%d dest_glob:%d\n",
+	    dest_is_loop, dest_is_priv, dest_is_global);
+
+	if ((ifa->src_is_loop) && (dest_is_priv)) {
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:4\n");
+		return (NULL);
+	}
+	if ((ifa->src_is_glob) && (dest_is_priv)) {
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:5\n");
+		return (NULL);
+	}
+	if ((ifa->src_is_loop) && (dest_is_global)) {
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:6\n");
+		return (NULL);
+	}
+	if ((ifa->src_is_priv) && (dest_is_global)) {
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:7\n");
+		return (NULL);
+	}
+	SCTPDBG(SCTP_DEBUG_OUTPUT3, "YES\n");
+	/* its a preferred address */
+	return (ifa);
+}
+
+static struct sctp_ifa *
+sctp_is_ifa_addr_acceptable(struct sctp_ifa *ifa,
+    uint8_t dest_is_loop,
+    uint8_t dest_is_priv,
+    sa_family_t fam)
+{
+	uint8_t dest_is_global = 0;
+
+	/*
+	 * Here we determine if its a acceptable address. A acceptable
+	 * address means it is the same scope or higher scope but we can
+	 * allow for NAT which means its ok to have a global dest and a
+	 * private src.
+	 *
+	 * L = loopback, P = private, G = global
+	 * ----------------------------------------- src    |  dest | result
+	 * ----------------------------------------- L     |   L   |    yes
+	 * ----------------------------------------- P     |   L   |
+	 * yes-v4 no-v6 ----------------------------------------- G     |
+	 * L   |    yes ----------------------------------------- L     |
+	 * P   |    no ----------------------------------------- P     |   P
+	 * |    yes ----------------------------------------- G     |   P
+	 * |    yes - May not work -----------------------------------------
+	 * L     |   G   |    no ----------------------------------------- P
+	 * |   G   |    yes - May not work
+	 * ----------------------------------------- G     |   G   |    yes
+	 * -----------------------------------------
+	 */
+
+	if (ifa->address.sa.sa_family != fam) {
+		/* forget non matching family */
+		return (NULL);
+	}
+	/* Ok the address may be ok */
+	if ((dest_is_loop == 0) && (dest_is_priv == 0)) {
+		dest_is_global = 1;
+	}
+	if (fam == AF_INET6) {
+		/* ok to use deprecated addresses? */
+		if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+			return (NULL);
+		}
+		if (ifa->src_is_priv) {
+			/* Special case, linklocal to loop */
+			if (dest_is_loop)
+				return (NULL);
+		}
+	}
+	/*
+	 * Now that we know what is what, implement our table. This could in
+	 * theory be done slicker (it used to be), but this is
+	 * straightforward and easier to validate :-)
+	 */
+	if ((ifa->src_is_loop == 1) && (dest_is_priv)) {
+		return (NULL);
+	}
+	if ((ifa->src_is_loop == 1) && (dest_is_global)) {
+		return (NULL);
+	}
+	/* its an acceptable address */
+	return (ifa);
+}
+
+int
+sctp_is_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa)
+{
+	struct sctp_laddr *laddr;
+
+	if (stcb == NULL) {
+		/* There are no restrictions, no TCB :-) */
+		return (0);
+	}
+	LIST_FOREACH(laddr, &stcb->asoc.sctp_restricted_addrs, sctp_nxt_addr) {
+		if (laddr->ifa == NULL) {
+			SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
+			    __FUNCTION__);
+			continue;
+		}
+		if (laddr->ifa == ifa) {
+			/* Yes it is on the list */
+			return (1);
+		}
+	}
+	return (0);
+}
+
+
+int
+sctp_is_addr_in_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa)
+{
+	struct sctp_laddr *laddr;
+
+	if (ifa == NULL)
+		return (0);
+	LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+		if (laddr->ifa == NULL) {
+			SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
+			    __FUNCTION__);
+			continue;
+		}
+		if ((laddr->ifa == ifa) && laddr->action == 0)
+			/* same pointer */
+			return (1);
+	}
+	return (0);
+}
+
+
+
+static struct sctp_ifa *
+sctp_choose_boundspecific_inp(struct sctp_inpcb *inp,
+    sctp_route_t * ro,
+    uint32_t vrf_id,
+    int non_asoc_addr_ok,
+    uint8_t dest_is_priv,
+    uint8_t dest_is_loop,
+    sa_family_t fam)
+{
+	struct sctp_laddr *laddr, *starting_point;
+	void *ifn;
+	int resettotop = 0;
+	struct sctp_ifn *sctp_ifn;
+	struct sctp_ifa *sctp_ifa, *sifa;
+	struct sctp_vrf *vrf;
+	uint32_t ifn_index;
+
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf == NULL)
+		return (NULL);
+
+	ifn = SCTP_GET_IFN_VOID_FROM_ROUTE(ro);
+	ifn_index = SCTP_GET_IF_INDEX_FROM_ROUTE(ro);
+	sctp_ifn = sctp_find_ifn(ifn, ifn_index);
+	/*
+	 * first question, is the ifn we will emit on in our list, if so, we
+	 * want such an address. Note that we first looked for a preferred
+	 * address.
+	 */
+	if (sctp_ifn) {
+		/* is a preferred one on the interface we route out? */
+		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+			if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
+			    (non_asoc_addr_ok == 0))
+				continue;
+			sifa = sctp_is_ifa_addr_preferred(sctp_ifa,
+			    dest_is_loop,
+			    dest_is_priv, fam);
+			if (sifa == NULL)
+				continue;
+			if (sctp_is_addr_in_ep(inp, sifa)) {
+				atomic_add_int(&sifa->refcount, 1);
+				return (sifa);
+			}
+		}
+	}
+	/*
+	 * ok, now we now need to find one on the list of the addresses. We
+	 * can't get one on the emitting interface so let's find first a
+	 * preferred one. If not that an acceptable one otherwise... we
+	 * return NULL.
+	 */
+	starting_point = inp->next_addr_touse;
+once_again:
+	if (inp->next_addr_touse == NULL) {
+		inp->next_addr_touse = LIST_FIRST(&inp->sctp_addr_list);
+		resettotop = 1;
+	}
+	for (laddr = inp->next_addr_touse; laddr;
+	    laddr = LIST_NEXT(laddr, sctp_nxt_addr)) {
+		if (laddr->ifa == NULL) {
+			/* address has been removed */
+			continue;
+		}
+		if (laddr->action == SCTP_DEL_IP_ADDRESS) {
+			/* address is being deleted */
+			continue;
+		}
+		sifa = sctp_is_ifa_addr_preferred(laddr->ifa, dest_is_loop,
+		    dest_is_priv, fam);
+		if (sifa == NULL)
+			continue;
+		atomic_add_int(&sifa->refcount, 1);
+		return (sifa);
+	}
+	if (resettotop == 0) {
+		inp->next_addr_touse = NULL;
+		goto once_again;
+	}
+	inp->next_addr_touse = starting_point;
+	resettotop = 0;
+once_again_too:
+	if (inp->next_addr_touse == NULL) {
+		inp->next_addr_touse = LIST_FIRST(&inp->sctp_addr_list);
+		resettotop = 1;
+	}
+	/* ok, what about an acceptable address in the inp */
+	for (laddr = inp->next_addr_touse; laddr;
+	    laddr = LIST_NEXT(laddr, sctp_nxt_addr)) {
+		if (laddr->ifa == NULL) {
+			/* address has been removed */
+			continue;
+		}
+		if (laddr->action == SCTP_DEL_IP_ADDRESS) {
+			/* address is being deleted */
+			continue;
+		}
+		sifa = sctp_is_ifa_addr_acceptable(laddr->ifa, dest_is_loop,
+		    dest_is_priv, fam);
+		if (sifa == NULL)
+			continue;
+		atomic_add_int(&sifa->refcount, 1);
+		return (sifa);
+	}
+	if (resettotop == 0) {
+		inp->next_addr_touse = NULL;
+		goto once_again_too;
+	}
+	/*
+	 * no address bound can be a source for the destination we are in
+	 * trouble
+	 */
+	return (NULL);
+}
+
+
+
+static struct sctp_ifa *
+sctp_choose_boundspecific_stcb(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net,
+    sctp_route_t * ro,
+    uint32_t vrf_id,
+    uint8_t dest_is_priv,
+    uint8_t dest_is_loop,
+    int non_asoc_addr_ok,
+    sa_family_t fam)
+{
+	struct sctp_laddr *laddr, *starting_point;
+	void *ifn;
+	struct sctp_ifn *sctp_ifn;
+	struct sctp_ifa *sctp_ifa, *sifa;
+	uint8_t start_at_beginning = 0;
+	struct sctp_vrf *vrf;
+	uint32_t ifn_index;
+
+	/*
+	 * first question, is the ifn we will emit on in our list, if so, we
+	 * want that one.
+	 */
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf == NULL)
+		return (NULL);
+
+	ifn = SCTP_GET_IFN_VOID_FROM_ROUTE(ro);
+	ifn_index = SCTP_GET_IF_INDEX_FROM_ROUTE(ro);
+	sctp_ifn = sctp_find_ifn(ifn, ifn_index);
+
+	/*
+	 * first question, is the ifn we will emit on in our list?  If so,
+	 * we want that one. First we look for a preferred. Second, we go
+	 * for an acceptable.
+	 */
+	if (sctp_ifn) {
+		/* first try for a preferred address on the ep */
+		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+			if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0))
+				continue;
+			if (sctp_is_addr_in_ep(inp, sctp_ifa)) {
+				sifa = sctp_is_ifa_addr_preferred(sctp_ifa, dest_is_loop, dest_is_priv, fam);
+				if (sifa == NULL)
+					continue;
+				if (((non_asoc_addr_ok == 0) &&
+				    (sctp_is_addr_restricted(stcb, sifa))) ||
+				    (non_asoc_addr_ok &&
+				    (sctp_is_addr_restricted(stcb, sifa)) &&
+				    (!sctp_is_addr_pending(stcb, sifa)))) {
+					/* on the no-no list */
+					continue;
+				}
+				atomic_add_int(&sifa->refcount, 1);
+				return (sifa);
+			}
+		}
+		/* next try for an acceptable address on the ep */
+		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+			if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0))
+				continue;
+			if (sctp_is_addr_in_ep(inp, sctp_ifa)) {
+				sifa = sctp_is_ifa_addr_acceptable(sctp_ifa, dest_is_loop, dest_is_priv, fam);
+				if (sifa == NULL)
+					continue;
+				if (((non_asoc_addr_ok == 0) &&
+				    (sctp_is_addr_restricted(stcb, sifa))) ||
+				    (non_asoc_addr_ok &&
+				    (sctp_is_addr_restricted(stcb, sifa)) &&
+				    (!sctp_is_addr_pending(stcb, sifa)))) {
+					/* on the no-no list */
+					continue;
+				}
+				atomic_add_int(&sifa->refcount, 1);
+				return (sifa);
+			}
+		}
+
+	}
+	/*
+	 * if we can't find one like that then we must look at all addresses
+	 * bound to pick one at first preferable then secondly acceptable.
+	 */
+	starting_point = stcb->asoc.last_used_address;
+sctp_from_the_top:
+	if (stcb->asoc.last_used_address == NULL) {
+		start_at_beginning = 1;
+		stcb->asoc.last_used_address = LIST_FIRST(&inp->sctp_addr_list);
+	}
+	/* search beginning with the last used address */
+	for (laddr = stcb->asoc.last_used_address; laddr;
+	    laddr = LIST_NEXT(laddr, sctp_nxt_addr)) {
+		if (laddr->ifa == NULL) {
+			/* address has been removed */
+			continue;
+		}
+		if (laddr->action == SCTP_DEL_IP_ADDRESS) {
+			/* address is being deleted */
+			continue;
+		}
+		sifa = sctp_is_ifa_addr_preferred(laddr->ifa, dest_is_loop, dest_is_priv, fam);
+		if (sifa == NULL)
+			continue;
+		if (((non_asoc_addr_ok == 0) &&
+		    (sctp_is_addr_restricted(stcb, sifa))) ||
+		    (non_asoc_addr_ok &&
+		    (sctp_is_addr_restricted(stcb, sifa)) &&
+		    (!sctp_is_addr_pending(stcb, sifa)))) {
+			/* on the no-no list */
+			continue;
+		}
+		stcb->asoc.last_used_address = laddr;
+		atomic_add_int(&sifa->refcount, 1);
+		return (sifa);
+	}
+	if (start_at_beginning == 0) {
+		stcb->asoc.last_used_address = NULL;
+		goto sctp_from_the_top;
+	}
+	/* now try for any higher scope than the destination */
+	stcb->asoc.last_used_address = starting_point;
+	start_at_beginning = 0;
+sctp_from_the_top2:
+	if (stcb->asoc.last_used_address == NULL) {
+		start_at_beginning = 1;
+		stcb->asoc.last_used_address = LIST_FIRST(&inp->sctp_addr_list);
+	}
+	/* search beginning with the last used address */
+	for (laddr = stcb->asoc.last_used_address; laddr;
+	    laddr = LIST_NEXT(laddr, sctp_nxt_addr)) {
+		if (laddr->ifa == NULL) {
+			/* address has been removed */
+			continue;
+		}
+		if (laddr->action == SCTP_DEL_IP_ADDRESS) {
+			/* address is being deleted */
+			continue;
+		}
+		sifa = sctp_is_ifa_addr_acceptable(laddr->ifa, dest_is_loop,
+		    dest_is_priv, fam);
+		if (sifa == NULL)
+			continue;
+		if (((non_asoc_addr_ok == 0) &&
+		    (sctp_is_addr_restricted(stcb, sifa))) ||
+		    (non_asoc_addr_ok &&
+		    (sctp_is_addr_restricted(stcb, sifa)) &&
+		    (!sctp_is_addr_pending(stcb, sifa)))) {
+			/* on the no-no list */
+			continue;
+		}
+		stcb->asoc.last_used_address = laddr;
+		atomic_add_int(&sifa->refcount, 1);
+		return (sifa);
+	}
+	if (start_at_beginning == 0) {
+		stcb->asoc.last_used_address = NULL;
+		goto sctp_from_the_top2;
+	}
+	return (NULL);
+}
+
+static struct sctp_ifa *
+sctp_select_nth_preferred_addr_from_ifn_boundall(struct sctp_ifn *ifn,
+    struct sctp_tcb *stcb,
+    int non_asoc_addr_ok,
+    uint8_t dest_is_loop,
+    uint8_t dest_is_priv,
+    int addr_wanted,
+    sa_family_t fam,
+    sctp_route_t * ro
+)
+{
+	struct sctp_ifa *ifa, *sifa;
+	int num_eligible_addr = 0;
+
+#ifdef INET6
+	struct sockaddr_in6 sin6, lsa6;
+
+	if (fam == AF_INET6) {
+		memcpy(&sin6, &ro->ro_dst, sizeof(struct sockaddr_in6));
+		(void)sa6_recoverscope(&sin6);
+	}
+#endif				/* INET6 */
+	LIST_FOREACH(ifa, &ifn->ifalist, next_ifa) {
+		if ((ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
+		    (non_asoc_addr_ok == 0))
+			continue;
+		sifa = sctp_is_ifa_addr_preferred(ifa, dest_is_loop,
+		    dest_is_priv, fam);
+		if (sifa == NULL)
+			continue;
+#ifdef INET6
+		if (fam == AF_INET6 &&
+		    dest_is_loop &&
+		    sifa->src_is_loop && sifa->src_is_priv) {
+			/*
+			 * don't allow fe80::1 to be a src on loop ::1, we
+			 * don't list it to the peer so we will get an
+			 * abort.
+			 */
+			continue;
+		}
+		if (fam == AF_INET6 &&
+		    IN6_IS_ADDR_LINKLOCAL(&sifa->address.sin6.sin6_addr) &&
+		    IN6_IS_ADDR_LINKLOCAL(&sin6.sin6_addr)) {
+			/*
+			 * link-local <-> link-local must belong to the same
+			 * scope.
+			 */
+			memcpy(&lsa6, &sifa->address.sin6, sizeof(struct sockaddr_in6));
+			(void)sa6_recoverscope(&lsa6);
+			if (sin6.sin6_scope_id != lsa6.sin6_scope_id) {
+				continue;
+			}
+		}
+#endif				/* INET6 */
+
+		/*
+		 * Check if the IPv6 address matches to next-hop. In the
+		 * mobile case, old IPv6 address may be not deleted from the
+		 * interface. Then, the interface has previous and new
+		 * addresses.  We should use one corresponding to the
+		 * next-hop.  (by micchie)
+		 */
+#ifdef INET6
+		if (stcb && fam == AF_INET6 &&
+		    sctp_is_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_BASE)) {
+			if (sctp_v6src_match_nexthop(&sifa->address.sin6, ro)
+			    == 0) {
+				continue;
+			}
+		}
+#endif
+		/* Avoid topologically incorrect IPv4 address */
+		if (stcb && fam == AF_INET &&
+		    sctp_is_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_BASE)) {
+			if (sctp_v4src_match_nexthop(sifa, ro) == 0) {
+				continue;
+			}
+		}
+		if (stcb) {
+			if (sctp_is_address_in_scope(ifa,
+			    stcb->asoc.ipv4_addr_legal,
+			    stcb->asoc.ipv6_addr_legal,
+			    stcb->asoc.loopback_scope,
+			    stcb->asoc.ipv4_local_scope,
+			    stcb->asoc.local_scope,
+			    stcb->asoc.site_scope, 0) == 0) {
+				continue;
+			}
+			if (((non_asoc_addr_ok == 0) &&
+			    (sctp_is_addr_restricted(stcb, sifa))) ||
+			    (non_asoc_addr_ok &&
+			    (sctp_is_addr_restricted(stcb, sifa)) &&
+			    (!sctp_is_addr_pending(stcb, sifa)))) {
+				/*
+				 * It is restricted for some reason..
+				 * probably not yet added.
+				 */
+				continue;
+			}
+		}
+		if (num_eligible_addr >= addr_wanted) {
+			return (sifa);
+		}
+		num_eligible_addr++;
+	}
+	return (NULL);
+}
+
+
+static int
+sctp_count_num_preferred_boundall(struct sctp_ifn *ifn,
+    struct sctp_tcb *stcb,
+    int non_asoc_addr_ok,
+    uint8_t dest_is_loop,
+    uint8_t dest_is_priv,
+    sa_family_t fam)
+{
+	struct sctp_ifa *ifa, *sifa;
+	int num_eligible_addr = 0;
+
+	LIST_FOREACH(ifa, &ifn->ifalist, next_ifa) {
+		if ((ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
+		    (non_asoc_addr_ok == 0)) {
+			continue;
+		}
+		sifa = sctp_is_ifa_addr_preferred(ifa, dest_is_loop,
+		    dest_is_priv, fam);
+		if (sifa == NULL) {
+			continue;
+		}
+		if (stcb) {
+			if (sctp_is_address_in_scope(ifa,
+			    stcb->asoc.ipv4_addr_legal,
+			    stcb->asoc.ipv6_addr_legal,
+			    stcb->asoc.loopback_scope,
+			    stcb->asoc.ipv4_local_scope,
+			    stcb->asoc.local_scope,
+			    stcb->asoc.site_scope, 0) == 0) {
+				continue;
+			}
+			if (((non_asoc_addr_ok == 0) &&
+			    (sctp_is_addr_restricted(stcb, sifa))) ||
+			    (non_asoc_addr_ok &&
+			    (sctp_is_addr_restricted(stcb, sifa)) &&
+			    (!sctp_is_addr_pending(stcb, sifa)))) {
+				/*
+				 * It is restricted for some reason..
+				 * probably not yet added.
+				 */
+				continue;
+			}
+		}
+		num_eligible_addr++;
+	}
+	return (num_eligible_addr);
+}
+
+static struct sctp_ifa *
+sctp_choose_boundall(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net,
+    sctp_route_t * ro,
+    uint32_t vrf_id,
+    uint8_t dest_is_priv,
+    uint8_t dest_is_loop,
+    int non_asoc_addr_ok,
+    sa_family_t fam)
+{
+	int cur_addr_num = 0, num_preferred = 0;
+	void *ifn;
+	struct sctp_ifn *sctp_ifn, *looked_at = NULL, *emit_ifn;
+	struct sctp_ifa *sctp_ifa, *sifa;
+	uint32_t ifn_index;
+	struct sctp_vrf *vrf;
+
+	/*-
+	 * For boundall we can use any address in the association.
+	 * If non_asoc_addr_ok is set we can use any address (at least in
+	 * theory). So we look for preferred addresses first. If we find one,
+	 * we use it. Otherwise we next try to get an address on the
+	 * interface, which we should be able to do (unless non_asoc_addr_ok
+	 * is false and we are routed out that way). In these cases where we
+	 * can't use the address of the interface we go through all the
+	 * ifn's looking for an address we can use and fill that in. Punting
+	 * means we send back address 0, which will probably cause problems
+	 * actually since then IP will fill in the address of the route ifn,
+	 * which means we probably already rejected it.. i.e. here comes an
+	 * abort :-<.
+	 */
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf == NULL)
+		return (NULL);
+
+	ifn = SCTP_GET_IFN_VOID_FROM_ROUTE(ro);
+	ifn_index = SCTP_GET_IF_INDEX_FROM_ROUTE(ro);
+	emit_ifn = looked_at = sctp_ifn = sctp_find_ifn(ifn, ifn_index);
+	if (sctp_ifn == NULL) {
+		/* ?? We don't have this guy ?? */
+		SCTPDBG(SCTP_DEBUG_OUTPUT2, "No ifn emit interface?\n");
+		goto bound_all_plan_b;
+	}
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "ifn_index:%d name:%s is emit interface\n",
+	    ifn_index, sctp_ifn->ifn_name);
+
+	if (net) {
+		cur_addr_num = net->indx_of_eligible_next_to_use;
+	}
+	num_preferred = sctp_count_num_preferred_boundall(sctp_ifn,
+	    stcb,
+	    non_asoc_addr_ok,
+	    dest_is_loop,
+	    dest_is_priv, fam);
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Found %d preferred source addresses for intf:%s\n",
+	    num_preferred, sctp_ifn->ifn_name);
+	if (num_preferred == 0) {
+		/*
+		 * no eligible addresses, we must use some other interface
+		 * address if we can find one.
+		 */
+		goto bound_all_plan_b;
+	}
+	/*
+	 * Ok we have num_eligible_addr set with how many we can use, this
+	 * may vary from call to call due to addresses being deprecated
+	 * etc..
+	 */
+	if (cur_addr_num >= num_preferred) {
+		cur_addr_num = 0;
+	}
+	/*
+	 * select the nth address from the list (where cur_addr_num is the
+	 * nth) and 0 is the first one, 1 is the second one etc...
+	 */
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "cur_addr_num:%d\n", cur_addr_num);
+
+	sctp_ifa = sctp_select_nth_preferred_addr_from_ifn_boundall(sctp_ifn, stcb, non_asoc_addr_ok, dest_is_loop,
+	    dest_is_priv, cur_addr_num, fam, ro);
+
+	/* if sctp_ifa is NULL something changed??, fall to plan b. */
+	if (sctp_ifa) {
+		atomic_add_int(&sctp_ifa->refcount, 1);
+		if (net) {
+			/* save off where the next one we will want */
+			net->indx_of_eligible_next_to_use = cur_addr_num + 1;
+		}
+		return (sctp_ifa);
+	}
+	/*
+	 * plan_b: Look at all interfaces and find a preferred address. If
+	 * no preferred fall through to plan_c.
+	 */
+bound_all_plan_b:
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan B\n");
+	LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+		SCTPDBG(SCTP_DEBUG_OUTPUT2, "Examine interface %s\n",
+		    sctp_ifn->ifn_name);
+		if (dest_is_loop == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
+			/* wrong base scope */
+			SCTPDBG(SCTP_DEBUG_OUTPUT2, "skip\n");
+			continue;
+		}
+		if ((sctp_ifn == looked_at) && looked_at) {
+			/* already looked at this guy */
+			SCTPDBG(SCTP_DEBUG_OUTPUT2, "already seen\n");
+			continue;
+		}
+		num_preferred = sctp_count_num_preferred_boundall(sctp_ifn, stcb, non_asoc_addr_ok,
+		    dest_is_loop, dest_is_priv, fam);
+		SCTPDBG(SCTP_DEBUG_OUTPUT2,
+		    "Found ifn:%p %d preferred source addresses\n",
+		    ifn, num_preferred);
+		if (num_preferred == 0) {
+			/* None on this interface. */
+			SCTPDBG(SCTP_DEBUG_OUTPUT2, "No prefered -- skipping to next\n");
+			continue;
+		}
+		SCTPDBG(SCTP_DEBUG_OUTPUT2,
+		    "num preferred:%d on interface:%p cur_addr_num:%d\n",
+		    num_preferred, sctp_ifn, cur_addr_num);
+
+		/*
+		 * Ok we have num_eligible_addr set with how many we can
+		 * use, this may vary from call to call due to addresses
+		 * being deprecated etc..
+		 */
+		if (cur_addr_num >= num_preferred) {
+			cur_addr_num = 0;
+		}
+		sifa = sctp_select_nth_preferred_addr_from_ifn_boundall(sctp_ifn, stcb, non_asoc_addr_ok, dest_is_loop,
+		    dest_is_priv, cur_addr_num, fam, ro);
+		if (sifa == NULL)
+			continue;
+		if (net) {
+			net->indx_of_eligible_next_to_use = cur_addr_num + 1;
+			SCTPDBG(SCTP_DEBUG_OUTPUT2, "we selected %d\n",
+			    cur_addr_num);
+			SCTPDBG(SCTP_DEBUG_OUTPUT2, "Source:");
+			SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &sifa->address.sa);
+			SCTPDBG(SCTP_DEBUG_OUTPUT2, "Dest:");
+			SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &net->ro._l_addr.sa);
+		}
+		atomic_add_int(&sifa->refcount, 1);
+		return (sifa);
+
+	}
+
+	/* plan_c: do we have an acceptable address on the emit interface */
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan C: find acceptable on interface\n");
+	if (emit_ifn == NULL) {
+		goto plan_d;
+	}
+	LIST_FOREACH(sctp_ifa, &emit_ifn->ifalist, next_ifa) {
+		if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
+		    (non_asoc_addr_ok == 0))
+			continue;
+		sifa = sctp_is_ifa_addr_acceptable(sctp_ifa, dest_is_loop,
+		    dest_is_priv, fam);
+		if (sifa == NULL)
+			continue;
+		if (stcb) {
+			if (sctp_is_address_in_scope(sifa,
+			    stcb->asoc.ipv4_addr_legal,
+			    stcb->asoc.ipv6_addr_legal,
+			    stcb->asoc.loopback_scope,
+			    stcb->asoc.ipv4_local_scope,
+			    stcb->asoc.local_scope,
+			    stcb->asoc.site_scope, 0) == 0) {
+				continue;
+			}
+			if (((non_asoc_addr_ok == 0) &&
+			    (sctp_is_addr_restricted(stcb, sifa))) ||
+			    (non_asoc_addr_ok &&
+			    (sctp_is_addr_restricted(stcb, sifa)) &&
+			    (!sctp_is_addr_pending(stcb, sifa)))) {
+				/*
+				 * It is restricted for some reason..
+				 * probably not yet added.
+				 */
+				continue;
+			}
+		}
+		atomic_add_int(&sifa->refcount, 1);
+		return (sifa);
+	}
+plan_d:
+	/*
+	 * plan_d: We are in trouble. No preferred address on the emit
+	 * interface. And not even a preferred address on all interfaces. Go
+	 * out and see if we can find an acceptable address somewhere
+	 * amongst all interfaces.
+	 */
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan D\n");
+	LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+		if (dest_is_loop == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
+			/* wrong base scope */
+			continue;
+		}
+		if ((sctp_ifn == looked_at) && looked_at)
+			/* already looked at this guy */
+			continue;
+
+		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+			if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
+			    (non_asoc_addr_ok == 0))
+				continue;
+			sifa = sctp_is_ifa_addr_acceptable(sctp_ifa,
+			    dest_is_loop,
+			    dest_is_priv, fam);
+			if (sifa == NULL)
+				continue;
+			if (stcb) {
+				if (sctp_is_address_in_scope(sifa,
+				    stcb->asoc.ipv4_addr_legal,
+				    stcb->asoc.ipv6_addr_legal,
+				    stcb->asoc.loopback_scope,
+				    stcb->asoc.ipv4_local_scope,
+				    stcb->asoc.local_scope,
+				    stcb->asoc.site_scope, 0) == 0) {
+					continue;
+				}
+				if (((non_asoc_addr_ok == 0) &&
+				    (sctp_is_addr_restricted(stcb, sifa))) ||
+				    (non_asoc_addr_ok &&
+				    (sctp_is_addr_restricted(stcb, sifa)) &&
+				    (!sctp_is_addr_pending(stcb, sifa)))) {
+					/*
+					 * It is restricted for some
+					 * reason.. probably not yet added.
+					 */
+					continue;
+				}
+			}
+			atomic_add_int(&sifa->refcount, 1);
+			return (sifa);
+		}
+	}
+	/*
+	 * Ok we can find NO address to source from that is not on our
+	 * restricted list and non_asoc_address is NOT ok, or it is on our
+	 * restricted list. We can't source to it :-(
+	 */
+	return (NULL);
+}
+
+
+
+/* tcb may be NULL */
+struct sctp_ifa *
+sctp_source_address_selection(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    sctp_route_t * ro,
+    struct sctp_nets *net,
+    int non_asoc_addr_ok, uint32_t vrf_id)
+{
+	struct sockaddr_in *to = (struct sockaddr_in *)&ro->ro_dst;
+
+#ifdef INET6
+	struct sockaddr_in6 *to6 = (struct sockaddr_in6 *)&ro->ro_dst;
+
+#endif
+	struct sctp_ifa *answer;
+	uint8_t dest_is_priv, dest_is_loop;
+	sa_family_t fam;
+
+	/*-
+	 * Rules: - Find the route if needed, cache if I can. - Look at
+	 * interface address in route, Is it in the bound list. If so we
+	 * have the best source. - If not we must rotate amongst the
+	 * addresses.
+	 *
+	 * Cavets and issues
+	 *
+	 * Do we need to pay attention to scope. We can have a private address
+	 * or a global address we are sourcing or sending to. So if we draw
+	 * it out
+	 * zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+	 * For V4
+	 * ------------------------------------------
+	 *      source     *      dest  *  result
+	 * -----------------------------------------
+	 * <a>  Private    *    Global  *  NAT
+	 * -----------------------------------------
+	 * <b>  Private    *    Private *  No problem
+	 * -----------------------------------------
+	 * <c>  Global     *    Private *  Huh, How will this work?
+	 * -----------------------------------------
+	 * <d>  Global     *    Global  *  No Problem
+	 *------------------------------------------
+	 * zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+	 * For V6
+	 *------------------------------------------
+	 *      source     *      dest  *  result
+	 * -----------------------------------------
+	 * <a>  Linklocal  *    Global  *
+	 * -----------------------------------------
+	 * <b>  Linklocal  * Linklocal  *  No problem
+	 * -----------------------------------------
+	 * <c>  Global     * Linklocal  *  Huh, How will this work?
+	 * -----------------------------------------
+	 * <d>  Global     *    Global  *  No Problem
+	 *------------------------------------------
+	 * zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+	 *
+	 * And then we add to that what happens if there are multiple addresses
+	 * assigned to an interface. Remember the ifa on a ifn is a linked
+	 * list of addresses. So one interface can have more than one IP
+	 * address. What happens if we have both a private and a global
+	 * address? Do we then use context of destination to sort out which
+	 * one is best? And what about NAT's sending P->G may get you a NAT
+	 * translation, or should you select the G thats on the interface in
+	 * preference.
+	 *
+	 * Decisions:
+	 *
+	 * - count the number of addresses on the interface.
+	 * - if it is one, no problem except case <c>.
+	 *   For <a> we will assume a NAT out there.
+	 * - if there are more than one, then we need to worry about scope P
+	 *   or G. We should prefer G -> G and P -> P if possible.
+	 *   Then as a secondary fall back to mixed types G->P being a last
+	 *   ditch one.
+	 * - The above all works for bound all, but bound specific we need to
+	 *   use the same concept but instead only consider the bound
+	 *   addresses. If the bound set is NOT assigned to the interface then
+	 *   we must use rotation amongst the bound addresses..
+	 */
+	if (ro->ro_rt == NULL) {
+		/*
+		 * Need a route to cache.
+		 */
+		SCTP_RTALLOC(ro, vrf_id);
+	}
+	if (ro->ro_rt == NULL) {
+		return (NULL);
+	}
+	fam = to->sin_family;
+	dest_is_priv = dest_is_loop = 0;
+	/* Setup our scopes for the destination */
+	switch (fam) {
+	case AF_INET:
+		/* Scope based on outbound address */
+		if (IN4_ISLOOPBACK_ADDRESS(&to->sin_addr)) {
+			dest_is_loop = 1;
+			if (net != NULL) {
+				/* mark it as local */
+				net->addr_is_local = 1;
+			}
+		} else if ((IN4_ISPRIVATE_ADDRESS(&to->sin_addr))) {
+			dest_is_priv = 1;
+		}
+		break;
+#ifdef INET6
+	case AF_INET6:
+		/* Scope based on outbound address */
+		if (IN6_IS_ADDR_LOOPBACK(&to6->sin6_addr) ||
+		    SCTP_ROUTE_IS_REAL_LOOP(ro)) {
+			/*
+			 * If the address is a loopback address, which
+			 * consists of "::1" OR "fe80::1%lo0", we are
+			 * loopback scope. But we don't use dest_is_priv
+			 * (link local addresses).
+			 */
+			dest_is_loop = 1;
+			if (net != NULL) {
+				/* mark it as local */
+				net->addr_is_local = 1;
+			}
+		} else if (IN6_IS_ADDR_LINKLOCAL(&to6->sin6_addr)) {
+			dest_is_priv = 1;
+		}
+		break;
+#endif
+	}
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Select source addr for:");
+	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)to);
+	SCTP_IPI_ADDR_RLOCK();
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		/*
+		 * Bound all case
+		 */
+		answer = sctp_choose_boundall(inp, stcb, net, ro, vrf_id,
+		    dest_is_priv, dest_is_loop,
+		    non_asoc_addr_ok, fam);
+		SCTP_IPI_ADDR_RUNLOCK();
+		return (answer);
+	}
+	/*
+	 * Subset bound case
+	 */
+	if (stcb) {
+		answer = sctp_choose_boundspecific_stcb(inp, stcb, net, ro,
+		    vrf_id, dest_is_priv,
+		    dest_is_loop,
+		    non_asoc_addr_ok, fam);
+	} else {
+		answer = sctp_choose_boundspecific_inp(inp, ro, vrf_id,
+		    non_asoc_addr_ok,
+		    dest_is_priv,
+		    dest_is_loop, fam);
+	}
+	SCTP_IPI_ADDR_RUNLOCK();
+	return (answer);
+}
+
+static int
+sctp_find_cmsg(int c_type, void *data, struct mbuf *control, int cpsize)
+{
+	struct cmsghdr cmh;
+	int tlen, at;
+
+	tlen = SCTP_BUF_LEN(control);
+	at = 0;
+	/*
+	 * Independent of how many mbufs, find the c_type inside the control
+	 * structure and copy out the data.
+	 */
+	while (at < tlen) {
+		if ((tlen - at) < (int)CMSG_ALIGN(sizeof(cmh))) {
+			/* not enough room for one more we are done. */
+			return (0);
+		}
+		m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh);
+		if (((int)cmh.cmsg_len + at) > tlen) {
+			/*
+			 * this is real messed up since there is not enough
+			 * data here to cover the cmsg header. We are done.
+			 */
+			return (0);
+		}
+		if ((cmh.cmsg_level == IPPROTO_SCTP) &&
+		    (c_type == cmh.cmsg_type)) {
+			/* found the one we want, copy it out */
+			at += CMSG_ALIGN(sizeof(struct cmsghdr));
+			if ((int)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < cpsize) {
+				/*
+				 * space of cmsg_len after header not big
+				 * enough
+				 */
+				return (0);
+			}
+			m_copydata(control, at, cpsize, data);
+			return (1);
+		} else {
+			at += CMSG_ALIGN(cmh.cmsg_len);
+			if (cmh.cmsg_len == 0) {
+				break;
+			}
+		}
+	}
+	/* not found */
+	return (0);
+}
+
+static struct mbuf *
+sctp_add_cookie(struct sctp_inpcb *inp, struct mbuf *init, int init_offset,
+    struct mbuf *initack, int initack_offset, struct sctp_state_cookie *stc_in, uint8_t ** signature)
+{
+	struct mbuf *copy_init, *copy_initack, *m_at, *sig, *mret;
+	struct sctp_state_cookie *stc;
+	struct sctp_paramhdr *ph;
+	uint8_t *foo;
+	int sig_offset;
+	uint16_t cookie_sz;
+
+	mret = NULL;
+	mret = sctp_get_mbuf_for_msg((sizeof(struct sctp_state_cookie) +
+	    sizeof(struct sctp_paramhdr)), 0,
+	    M_DONTWAIT, 1, MT_DATA);
+	if (mret == NULL) {
+		return (NULL);
+	}
+	copy_init = SCTP_M_COPYM(init, init_offset, M_COPYALL, M_DONTWAIT);
+	if (copy_init == NULL) {
+		sctp_m_freem(mret);
+		return (NULL);
+	}
+#ifdef SCTP_MBUF_LOGGING
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
+		struct mbuf *mat;
+
+		mat = copy_init;
+		while (mat) {
+			if (SCTP_BUF_IS_EXTENDED(mat)) {
+				sctp_log_mb(mat, SCTP_MBUF_ICOPY);
+			}
+			mat = SCTP_BUF_NEXT(mat);
+		}
+	}
+#endif
+	copy_initack = SCTP_M_COPYM(initack, initack_offset, M_COPYALL,
+	    M_DONTWAIT);
+	if (copy_initack == NULL) {
+		sctp_m_freem(mret);
+		sctp_m_freem(copy_init);
+		return (NULL);
+	}
+#ifdef SCTP_MBUF_LOGGING
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
+		struct mbuf *mat;
+
+		mat = copy_initack;
+		while (mat) {
+			if (SCTP_BUF_IS_EXTENDED(mat)) {
+				sctp_log_mb(mat, SCTP_MBUF_ICOPY);
+			}
+			mat = SCTP_BUF_NEXT(mat);
+		}
+	}
+#endif
+	/* easy side we just drop it on the end */
+	ph = mtod(mret, struct sctp_paramhdr *);
+	SCTP_BUF_LEN(mret) = sizeof(struct sctp_state_cookie) +
+	    sizeof(struct sctp_paramhdr);
+	stc = (struct sctp_state_cookie *)((caddr_t)ph +
+	    sizeof(struct sctp_paramhdr));
+	ph->param_type = htons(SCTP_STATE_COOKIE);
+	ph->param_length = 0;	/* fill in at the end */
+	/* Fill in the stc cookie data */
+	memcpy(stc, stc_in, sizeof(struct sctp_state_cookie));
+
+	/* tack the INIT and then the INIT-ACK onto the chain */
+	cookie_sz = 0;
+	m_at = mret;
+	for (m_at = mret; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
+		cookie_sz += SCTP_BUF_LEN(m_at);
+		if (SCTP_BUF_NEXT(m_at) == NULL) {
+			SCTP_BUF_NEXT(m_at) = copy_init;
+			break;
+		}
+	}
+
+	for (m_at = copy_init; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
+		cookie_sz += SCTP_BUF_LEN(m_at);
+		if (SCTP_BUF_NEXT(m_at) == NULL) {
+			SCTP_BUF_NEXT(m_at) = copy_initack;
+			break;
+		}
+	}
+
+	for (m_at = copy_initack; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
+		cookie_sz += SCTP_BUF_LEN(m_at);
+		if (SCTP_BUF_NEXT(m_at) == NULL) {
+			break;
+		}
+	}
+	sig = sctp_get_mbuf_for_msg(SCTP_SECRET_SIZE, 0, M_DONTWAIT, 1, MT_DATA);
+	if (sig == NULL) {
+		/* no space, so free the entire chain */
+		sctp_m_freem(mret);
+		return (NULL);
+	}
+	SCTP_BUF_LEN(sig) = 0;
+	SCTP_BUF_NEXT(m_at) = sig;
+	sig_offset = 0;
+	foo = (uint8_t *) (mtod(sig, caddr_t)+sig_offset);
+	memset(foo, 0, SCTP_SIGNATURE_SIZE);
+	*signature = foo;
+	SCTP_BUF_LEN(sig) += SCTP_SIGNATURE_SIZE;
+	cookie_sz += SCTP_SIGNATURE_SIZE;
+	ph->param_length = htons(cookie_sz);
+	return (mret);
+}
+
+
+static uint8_t
+sctp_get_ect(struct sctp_tcb *stcb,
+    struct sctp_tmit_chunk *chk)
+{
+	uint8_t this_random;
+
+	/* Huh? */
+	if (SCTP_BASE_SYSCTL(sctp_ecn_enable) == 0)
+		return (0);
+
+	if (SCTP_BASE_SYSCTL(sctp_ecn_nonce) == 0)
+		/* no nonce, always return ECT0 */
+		return (SCTP_ECT0_BIT);
+
+	if (stcb->asoc.peer_supports_ecn_nonce == 0) {
+		/* Peer does NOT support it, so we send a ECT0 only */
+		return (SCTP_ECT0_BIT);
+	}
+	if (chk == NULL)
+		return (SCTP_ECT0_BIT);
+
+	if ((stcb->asoc.hb_random_idx > 3) ||
+	    ((stcb->asoc.hb_random_idx == 3) &&
+	    (stcb->asoc.hb_ect_randombit > 7))) {
+		uint32_t rndval;
+
+warp_drive_sa:
+		rndval = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
+		memcpy(stcb->asoc.hb_random_values, &rndval,
+		    sizeof(stcb->asoc.hb_random_values));
+		this_random = stcb->asoc.hb_random_values[0];
+		stcb->asoc.hb_random_idx = 0;
+		stcb->asoc.hb_ect_randombit = 0;
+	} else {
+		if (stcb->asoc.hb_ect_randombit > 7) {
+			stcb->asoc.hb_ect_randombit = 0;
+			stcb->asoc.hb_random_idx++;
+			if (stcb->asoc.hb_random_idx > 3) {
+				goto warp_drive_sa;
+			}
+		}
+		this_random = stcb->asoc.hb_random_values[stcb->asoc.hb_random_idx];
+	}
+	if ((this_random >> stcb->asoc.hb_ect_randombit) & 0x01) {
+		if (chk != NULL)
+			/* ECN Nonce stuff */
+			chk->rec.data.ect_nonce = SCTP_ECT1_BIT;
+		stcb->asoc.hb_ect_randombit++;
+		return (SCTP_ECT1_BIT);
+	} else {
+		stcb->asoc.hb_ect_randombit++;
+		return (SCTP_ECT0_BIT);
+	}
+}
+
+static int
+sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,	/* may be NULL */
+    struct sctp_nets *net,
+    struct sockaddr *to,
+    struct mbuf *m,
+    uint32_t auth_offset,
+    struct sctp_auth_chunk *auth,
+    uint16_t auth_keyid,
+    int nofragment_flag,
+    int ecn_ok,
+    struct sctp_tmit_chunk *chk,
+    int out_of_asoc_ok,
+    uint16_t src_port,
+    uint16_t dest_port,
+    uint32_t v_tag,
+    uint16_t port,
+    int so_locked,
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+    union sctp_sockstore *over_addr
+)
+/* nofragment_flag to tell if IP_DF should be set (IPv4 only) */
+{
+	/*
+	 * Given a mbuf chain (via SCTP_BUF_NEXT()) that holds a packet
+	 * header WITH an SCTPHDR but no IP header, endpoint inp and sa
+	 * structure: - fill in the HMAC digest of any AUTH chunk in the
+	 * packet. - calculate and fill in the SCTP checksum. - prepend an
+	 * IP address header. - if boundall use INADDR_ANY. - if
+	 * boundspecific do source address selection. - set fragmentation
+	 * option for ipV4. - On return from IP output, check/adjust mtu
+	 * size of output interface and smallest_mtu size as well.
+	 */
+	/* Will need ifdefs around this */
+	struct mbuf *o_pak;
+	struct mbuf *newm;
+	struct sctphdr *sctphdr;
+	int packet_length;
+	int ret;
+	uint32_t vrf_id;
+	sctp_route_t *ro = NULL;
+	struct udphdr *udp = NULL;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so = NULL;
+
+#endif
+
+	if ((net) && (net->dest_state & SCTP_ADDR_OUT_OF_SCOPE)) {
+		SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EFAULT);
+		sctp_m_freem(m);
+		return (EFAULT);
+	}
+	if (stcb) {
+		vrf_id = stcb->asoc.vrf_id;
+	} else {
+		vrf_id = inp->def_vrf_id;
+	}
+
+	/* fill in the HMAC digest for any AUTH chunk in the packet */
+	if ((auth != NULL) && (stcb != NULL)) {
+		sctp_fill_hmac_digest_m(m, auth_offset, auth, stcb, auth_keyid);
+	}
+	if (to->sa_family == AF_INET) {
+		struct ip *ip = NULL;
+		sctp_route_t iproute;
+		uint8_t tos_value;
+		int len;
+
+		len = sizeof(struct ip) + sizeof(struct sctphdr);
+		if (port) {
+			len += sizeof(struct udphdr);
+		}
+		newm = sctp_get_mbuf_for_msg(len, 1, M_DONTWAIT, 1, MT_DATA);
+		if (newm == NULL) {
+			sctp_m_freem(m);
+			SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+			return (ENOMEM);
+		}
+		SCTP_ALIGN_TO_END(newm, len);
+		SCTP_BUF_LEN(newm) = len;
+		SCTP_BUF_NEXT(newm) = m;
+		m = newm;
+		packet_length = sctp_calculate_len(m);
+		ip = mtod(m, struct ip *);
+		ip->ip_v = IPVERSION;
+		ip->ip_hl = (sizeof(struct ip) >> 2);
+		if (net) {
+			tos_value = net->tos_flowlabel & 0x000000ff;
+		} else {
+			tos_value = inp->ip_inp.inp.inp_ip_tos;
+		}
+		if ((nofragment_flag) && (port == 0)) {
+			ip->ip_off = IP_DF;
+		} else
+			ip->ip_off = 0;
+
+		/* FreeBSD has a function for ip_id's */
+		ip->ip_id = ip_newid();
+
+		ip->ip_ttl = inp->ip_inp.inp.inp_ip_ttl;
+		ip->ip_len = packet_length;
+		if (stcb) {
+			if ((stcb->asoc.ecn_allowed) && ecn_ok) {
+				/* Enable ECN */
+				ip->ip_tos = ((u_char)(tos_value & 0xfc) | sctp_get_ect(stcb, chk));
+			} else {
+				/* No ECN */
+				ip->ip_tos = (u_char)(tos_value & 0xfc);
+			}
+		} else {
+			/* no association at all */
+			ip->ip_tos = (tos_value & 0xfc);
+		}
+		if (port) {
+			ip->ip_p = IPPROTO_UDP;
+		} else {
+			ip->ip_p = IPPROTO_SCTP;
+		}
+		ip->ip_sum = 0;
+		if (net == NULL) {
+			ro = &iproute;
+			memset(&iproute, 0, sizeof(iproute));
+			memcpy(&ro->ro_dst, to, to->sa_len);
+		} else {
+			ro = (sctp_route_t *) & net->ro;
+		}
+		/* Now the address selection part */
+		ip->ip_dst.s_addr = ((struct sockaddr_in *)to)->sin_addr.s_addr;
+
+		/* call the routine to select the src address */
+		if (net && out_of_asoc_ok == 0) {
+			if (net->ro._s_addr && (net->ro._s_addr->localifa_flags & (SCTP_BEING_DELETED | SCTP_ADDR_IFA_UNUSEABLE))) {
+				sctp_free_ifa(net->ro._s_addr);
+				net->ro._s_addr = NULL;
+				net->src_addr_selected = 0;
+				if (ro->ro_rt) {
+					RTFREE(ro->ro_rt);
+					ro->ro_rt = NULL;
+				}
+			}
+			if (net->src_addr_selected == 0) {
+				/* Cache the source address */
+				net->ro._s_addr = sctp_source_address_selection(inp, stcb,
+				    ro, net, 0,
+				    vrf_id);
+				net->src_addr_selected = 1;
+			}
+			if (net->ro._s_addr == NULL) {
+				/* No route to host */
+				net->src_addr_selected = 0;
+				goto no_route;
+			}
+			ip->ip_src = net->ro._s_addr->address.sin.sin_addr;
+		} else {
+			if (over_addr == NULL) {
+				struct sctp_ifa *_lsrc;
+
+				_lsrc = sctp_source_address_selection(inp, stcb, ro,
+				    net,
+				    out_of_asoc_ok,
+				    vrf_id);
+				if (_lsrc == NULL) {
+					goto no_route;
+				}
+				ip->ip_src = _lsrc->address.sin.sin_addr;
+				sctp_free_ifa(_lsrc);
+			} else {
+				ip->ip_src = over_addr->sin.sin_addr;
+				SCTP_RTALLOC(ro, vrf_id);
+			}
+		}
+		if (port) {
+			udp = (struct udphdr *)((caddr_t)ip + sizeof(struct ip));
+			udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port));
+			udp->uh_dport = port;
+			udp->uh_ulen = htons(packet_length - sizeof(struct ip));
+			udp->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP));
+			sctphdr = (struct sctphdr *)((caddr_t)udp + sizeof(struct udphdr));
+		} else {
+			sctphdr = (struct sctphdr *)((caddr_t)ip + sizeof(struct ip));
+		}
+
+		sctphdr->src_port = src_port;
+		sctphdr->dest_port = dest_port;
+		sctphdr->v_tag = v_tag;
+		sctphdr->checksum = 0;
+
+		/*
+		 * If source address selection fails and we find no route
+		 * then the ip_output should fail as well with a
+		 * NO_ROUTE_TO_HOST type error. We probably should catch
+		 * that somewhere and abort the association right away
+		 * (assuming this is an INIT being sent).
+		 */
+		if ((ro->ro_rt == NULL)) {
+			/*
+			 * src addr selection failed to find a route (or
+			 * valid source addr), so we can't get there from
+			 * here (yet)!
+			 */
+	no_route:
+			SCTPDBG(SCTP_DEBUG_OUTPUT1,
+			    "%s: dropped packet - no valid source addr\n",
+			    __FUNCTION__);
+			if (net) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1,
+				    "Destination was ");
+				SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT1,
+				    &net->ro._l_addr.sa);
+				if (net->dest_state & SCTP_ADDR_CONFIRMED) {
+					if ((net->dest_state & SCTP_ADDR_REACHABLE) && stcb) {
+						SCTPDBG(SCTP_DEBUG_OUTPUT1, "no route takes interface %p down\n", net);
+						sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
+						    stcb,
+						    SCTP_FAILED_THRESHOLD,
+						    (void *)net,
+						    so_locked);
+						net->dest_state &= ~SCTP_ADDR_REACHABLE;
+						net->dest_state |= SCTP_ADDR_NOT_REACHABLE;
+						/*
+						 * JRS 5/14/07 - If a
+						 * destination is
+						 * unreachable, the PF bit
+						 * is turned off.  This
+						 * allows an unambiguous use
+						 * of the PF bit for
+						 * destinations that are
+						 * reachable but potentially
+						 * failed. If the
+						 * destination is set to the
+						 * unreachable state, also
+						 * set the destination to
+						 * the PF state.
+						 */
+						/*
+						 * Add debug message here if
+						 * destination is not in PF
+						 * state.
+						 */
+						/*
+						 * Stop any running T3
+						 * timers here?
+						 */
+						if ((stcb->asoc.sctp_cmt_on_off == 1) &&
+						    (stcb->asoc.sctp_cmt_pf > 0)) {
+							net->dest_state &= ~SCTP_ADDR_PF;
+							SCTPDBG(SCTP_DEBUG_OUTPUT1, "Destination %p moved from PF to unreachable.\n",
+							    net);
+						}
+					}
+				}
+				if (stcb) {
+					if (net == stcb->asoc.primary_destination) {
+						/* need a new primary */
+						struct sctp_nets *alt;
+
+						alt = sctp_find_alternate_net(stcb, net, 0);
+						if (alt != net) {
+							if (sctp_set_primary_addr(stcb,
+							    (struct sockaddr *)NULL,
+							    alt) == 0) {
+								net->dest_state |= SCTP_ADDR_WAS_PRIMARY;
+								if (net->ro._s_addr) {
+									sctp_free_ifa(net->ro._s_addr);
+									net->ro._s_addr = NULL;
+								}
+								net->src_addr_selected = 0;
+							}
+						}
+					}
+				}
+			}
+			SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH);
+			sctp_m_freem(m);
+			return (EHOSTUNREACH);
+		}
+		if (ro != &iproute) {
+			memcpy(&iproute, ro, sizeof(*ro));
+		}
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "Calling ipv4 output routine from low level src addr:%x\n",
+		    (uint32_t) (ntohl(ip->ip_src.s_addr)));
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "Destination is %x\n",
+		    (uint32_t) (ntohl(ip->ip_dst.s_addr)));
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "RTP route is %p through\n",
+		    ro->ro_rt);
+
+		if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
+			/* failed to prepend data, give up */
+			SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+			sctp_m_freem(m);
+			return (ENOMEM);
+		}
+#ifdef  SCTP_PACKET_LOGGING
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
+			sctp_packet_log(m, packet_length);
+#endif
+		SCTP_ATTACH_CHAIN(o_pak, m, packet_length);
+		if (port) {
+#if defined(SCTP_WITH_NO_CSUM)
+			SCTP_STAT_INCR(sctps_sendnocrc);
+#else
+			if (!(SCTP_BASE_SYSCTL(sctp_no_csum_on_loopback) &&
+			    (stcb) &&
+			    (stcb->asoc.loopback_scope))) {
+				sctphdr->checksum = sctp_calculate_cksum(m, sizeof(struct ip) + sizeof(struct udphdr));
+				SCTP_STAT_INCR(sctps_sendswcrc);
+			} else {
+				SCTP_STAT_INCR(sctps_sendnocrc);
+			}
+#endif
+			SCTP_ENABLE_UDP_CSUM(o_pak);
+		} else {
+#if defined(SCTP_WITH_NO_CSUM)
+			SCTP_STAT_INCR(sctps_sendnocrc);
+#else
+			m->m_pkthdr.csum_flags = CSUM_SCTP;
+			m->m_pkthdr.csum_data = 0;
+			SCTP_STAT_INCR(sctps_sendhwcrc);
+#endif
+		}
+		/* send it out.  table id is taken from stcb */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
+			so = SCTP_INP_SO(inp);
+			SCTP_SOCKET_UNLOCK(so, 0);
+		}
+#endif
+		SCTP_IP_OUTPUT(ret, o_pak, ro, stcb, vrf_id);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
+			atomic_add_int(&stcb->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_SOCKET_LOCK(so, 0);
+			SCTP_TCB_LOCK(stcb);
+			atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		}
+#endif
+		SCTP_STAT_INCR(sctps_sendpackets);
+		SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
+		if (ret)
+			SCTP_STAT_INCR(sctps_senderrors);
+
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "IP output returns %d\n", ret);
+		if (net == NULL) {
+			/* free tempy routes */
+			if (ro->ro_rt) {
+				RTFREE(ro->ro_rt);
+				ro->ro_rt = NULL;
+			}
+		} else {
+			/* PMTU check versus smallest asoc MTU goes here */
+			if ((ro->ro_rt != NULL) &&
+			    (net->ro._s_addr)) {
+				uint32_t mtu;
+
+				mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_rt);
+				if (net->port) {
+					mtu -= sizeof(struct udphdr);
+				}
+				if (mtu && (stcb->asoc.smallest_mtu > mtu)) {
+					sctp_mtu_size_reset(inp, &stcb->asoc, mtu);
+					net->mtu = mtu;
+				}
+			} else if (ro->ro_rt == NULL) {
+				/* route was freed */
+				if (net->ro._s_addr &&
+				    net->src_addr_selected) {
+					sctp_free_ifa(net->ro._s_addr);
+					net->ro._s_addr = NULL;
+				}
+				net->src_addr_selected = 0;
+			}
+		}
+		return (ret);
+	}
+#ifdef INET6
+	else if (to->sa_family == AF_INET6) {
+		uint32_t flowlabel;
+		struct ip6_hdr *ip6h;
+		struct route_in6 ip6route;
+		struct ifnet *ifp;
+		u_char flowTop;
+		uint16_t flowBottom;
+		u_char tosBottom, tosTop;
+		struct sockaddr_in6 *sin6, tmp, *lsa6, lsa6_tmp;
+		int prev_scope = 0;
+		struct sockaddr_in6 lsa6_storage;
+		int error;
+		u_short prev_port = 0;
+		int len;
+
+		if (net != NULL) {
+			flowlabel = net->tos_flowlabel;
+		} else {
+			flowlabel = ((struct in6pcb *)inp)->in6p_flowinfo;
+		}
+
+		len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr);
+		if (port) {
+			len += sizeof(struct udphdr);
+		}
+		newm = sctp_get_mbuf_for_msg(len, 1, M_DONTWAIT, 1, MT_DATA);
+		if (newm == NULL) {
+			sctp_m_freem(m);
+			SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+			return (ENOMEM);
+		}
+		SCTP_ALIGN_TO_END(newm, len);
+		SCTP_BUF_LEN(newm) = len;
+		SCTP_BUF_NEXT(newm) = m;
+		m = newm;
+		packet_length = sctp_calculate_len(m);
+
+		ip6h = mtod(m, struct ip6_hdr *);
+		/*
+		 * We assume here that inp_flow is in host byte order within
+		 * the TCB!
+		 */
+		flowBottom = flowlabel & 0x0000ffff;
+		flowTop = ((flowlabel & 0x000f0000) >> 16);
+		tosTop = (((flowlabel & 0xf0) >> 4) | IPV6_VERSION);
+		/* protect *sin6 from overwrite */
+		sin6 = (struct sockaddr_in6 *)to;
+		tmp = *sin6;
+		sin6 = &tmp;
+
+		/* KAME hack: embed scopeid */
+		if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) {
+			SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+			return (EINVAL);
+		}
+		if (net == NULL) {
+			memset(&ip6route, 0, sizeof(ip6route));
+			ro = (sctp_route_t *) & ip6route;
+			memcpy(&ro->ro_dst, sin6, sin6->sin6_len);
+		} else {
+			ro = (sctp_route_t *) & net->ro;
+		}
+		if (stcb != NULL) {
+			if ((stcb->asoc.ecn_allowed) && ecn_ok) {
+				/* Enable ECN */
+				tosBottom = (((((struct in6pcb *)inp)->in6p_flowinfo & 0x0c) | sctp_get_ect(stcb, chk)) << 4);
+			} else {
+				/* No ECN */
+				tosBottom = ((((struct in6pcb *)inp)->in6p_flowinfo & 0x0c) << 4);
+			}
+		} else {
+			/* we could get no asoc if it is a O-O-T-B packet */
+			tosBottom = ((((struct in6pcb *)inp)->in6p_flowinfo & 0x0c) << 4);
+		}
+		ip6h->ip6_flow = htonl(((tosTop << 24) | ((tosBottom | flowTop) << 16) | flowBottom));
+		if (port) {
+			ip6h->ip6_nxt = IPPROTO_UDP;
+		} else {
+			ip6h->ip6_nxt = IPPROTO_SCTP;
+		}
+		ip6h->ip6_plen = (packet_length - sizeof(struct ip6_hdr));
+		ip6h->ip6_dst = sin6->sin6_addr;
+
+		/*
+		 * Add SRC address selection here: we can only reuse to a
+		 * limited degree the kame src-addr-sel, since we can try
+		 * their selection but it may not be bound.
+		 */
+		bzero(&lsa6_tmp, sizeof(lsa6_tmp));
+		lsa6_tmp.sin6_family = AF_INET6;
+		lsa6_tmp.sin6_len = sizeof(lsa6_tmp);
+		lsa6 = &lsa6_tmp;
+		if (net && out_of_asoc_ok == 0) {
+			if (net->ro._s_addr && (net->ro._s_addr->localifa_flags & (SCTP_BEING_DELETED | SCTP_ADDR_IFA_UNUSEABLE))) {
+				sctp_free_ifa(net->ro._s_addr);
+				net->ro._s_addr = NULL;
+				net->src_addr_selected = 0;
+				if (ro->ro_rt) {
+					RTFREE(ro->ro_rt);
+					ro->ro_rt = NULL;
+				}
+			}
+			if (net->src_addr_selected == 0) {
+				sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+				/* KAME hack: embed scopeid */
+				if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) {
+					SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+					return (EINVAL);
+				}
+				/* Cache the source address */
+				net->ro._s_addr = sctp_source_address_selection(inp,
+				    stcb,
+				    ro,
+				    net,
+				    0,
+				    vrf_id);
+				(void)sa6_recoverscope(sin6);
+				net->src_addr_selected = 1;
+			}
+			if (net->ro._s_addr == NULL) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT3, "V6:No route to host\n");
+				net->src_addr_selected = 0;
+				goto no_route;
+			}
+			lsa6->sin6_addr = net->ro._s_addr->address.sin6.sin6_addr;
+		} else {
+			sin6 = (struct sockaddr_in6 *)&ro->ro_dst;
+			/* KAME hack: embed scopeid */
+			if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) {
+				SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+				return (EINVAL);
+			}
+			if (over_addr == NULL) {
+				struct sctp_ifa *_lsrc;
+
+				_lsrc = sctp_source_address_selection(inp, stcb, ro,
+				    net,
+				    out_of_asoc_ok,
+				    vrf_id);
+				if (_lsrc == NULL) {
+					goto no_route;
+				}
+				lsa6->sin6_addr = _lsrc->address.sin6.sin6_addr;
+				sctp_free_ifa(_lsrc);
+			} else {
+				lsa6->sin6_addr = over_addr->sin6.sin6_addr;
+				SCTP_RTALLOC(ro, vrf_id);
+			}
+			(void)sa6_recoverscope(sin6);
+		}
+		lsa6->sin6_port = inp->sctp_lport;
+
+		if (ro->ro_rt == NULL) {
+			/*
+			 * src addr selection failed to find a route (or
+			 * valid source addr), so we can't get there from
+			 * here!
+			 */
+			goto no_route;
+		}
+		/*
+		 * XXX: sa6 may not have a valid sin6_scope_id in the
+		 * non-SCOPEDROUTING case.
+		 */
+		bzero(&lsa6_storage, sizeof(lsa6_storage));
+		lsa6_storage.sin6_family = AF_INET6;
+		lsa6_storage.sin6_len = sizeof(lsa6_storage);
+		lsa6_storage.sin6_addr = lsa6->sin6_addr;
+		if ((error = sa6_recoverscope(&lsa6_storage)) != 0) {
+			SCTPDBG(SCTP_DEBUG_OUTPUT3, "recover scope fails error %d\n", error);
+			sctp_m_freem(m);
+			return (error);
+		}
+		/* XXX */
+		lsa6_storage.sin6_addr = lsa6->sin6_addr;
+		lsa6_storage.sin6_port = inp->sctp_lport;
+		lsa6 = &lsa6_storage;
+		ip6h->ip6_src = lsa6->sin6_addr;
+
+		if (port) {
+			udp = (struct udphdr *)((caddr_t)ip6h + sizeof(struct ip6_hdr));
+			udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port));
+			udp->uh_dport = port;
+			udp->uh_ulen = htons(packet_length - sizeof(struct ip6_hdr));
+			udp->uh_sum = 0;
+			sctphdr = (struct sctphdr *)((caddr_t)udp + sizeof(struct udphdr));
+		} else {
+			sctphdr = (struct sctphdr *)((caddr_t)ip6h + sizeof(struct ip6_hdr));
+		}
+
+		sctphdr->src_port = src_port;
+		sctphdr->dest_port = dest_port;
+		sctphdr->v_tag = v_tag;
+		sctphdr->checksum = 0;
+
+		/*
+		 * We set the hop limit now since there is a good chance
+		 * that our ro pointer is now filled
+		 */
+		ip6h->ip6_hlim = SCTP_GET_HLIM(inp, ro);
+		ifp = SCTP_GET_IFN_VOID_FROM_ROUTE(ro);
+
+#ifdef SCTP_DEBUG
+		/* Copy to be sure something bad is not happening */
+		sin6->sin6_addr = ip6h->ip6_dst;
+		lsa6->sin6_addr = ip6h->ip6_src;
+#endif
+
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "Calling ipv6 output routine from low level\n");
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "src: ");
+		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT3, (struct sockaddr *)lsa6);
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "dst: ");
+		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT3, (struct sockaddr *)sin6);
+		if (net) {
+			sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+			/* preserve the port and scope for link local send */
+			prev_scope = sin6->sin6_scope_id;
+			prev_port = sin6->sin6_port;
+		}
+		if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
+			/* failed to prepend data, give up */
+			sctp_m_freem(m);
+			SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+			return (ENOMEM);
+		}
+#ifdef  SCTP_PACKET_LOGGING
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
+			sctp_packet_log(m, packet_length);
+#endif
+		SCTP_ATTACH_CHAIN(o_pak, m, packet_length);
+		if (port) {
+#if defined(SCTP_WITH_NO_CSUM)
+			SCTP_STAT_INCR(sctps_sendnocrc);
+#else
+			if (!(SCTP_BASE_SYSCTL(sctp_no_csum_on_loopback) &&
+			    (stcb) &&
+			    (stcb->asoc.loopback_scope))) {
+				sctphdr->checksum = sctp_calculate_cksum(m, sizeof(struct ip6_hdr) + sizeof(struct udphdr));
+				SCTP_STAT_INCR(sctps_sendswcrc);
+			} else {
+				SCTP_STAT_INCR(sctps_sendnocrc);
+			}
+#endif
+			if ((udp->uh_sum = in6_cksum(o_pak, IPPROTO_UDP, sizeof(struct ip6_hdr), packet_length - sizeof(struct ip6_hdr))) == 0) {
+				udp->uh_sum = 0xffff;
+			}
+		} else {
+#if defined(SCTP_WITH_NO_CSUM)
+			SCTP_STAT_INCR(sctps_sendnocrc);
+#else
+			m->m_pkthdr.csum_flags = CSUM_SCTP;
+			m->m_pkthdr.csum_data = 0;
+			SCTP_STAT_INCR(sctps_sendhwcrc);
+#endif
+		}
+		/* send it out. table id is taken from stcb */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
+			so = SCTP_INP_SO(inp);
+			SCTP_SOCKET_UNLOCK(so, 0);
+		}
+#endif
+		SCTP_IP6_OUTPUT(ret, o_pak, (struct route_in6 *)ro, &ifp, stcb, vrf_id);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
+			atomic_add_int(&stcb->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_SOCKET_LOCK(so, 0);
+			SCTP_TCB_LOCK(stcb);
+			atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		}
+#endif
+		if (net) {
+			/* for link local this must be done */
+			sin6->sin6_scope_id = prev_scope;
+			sin6->sin6_port = prev_port;
+		}
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "return from send is %d\n", ret);
+		SCTP_STAT_INCR(sctps_sendpackets);
+		SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
+		if (ret) {
+			SCTP_STAT_INCR(sctps_senderrors);
+		}
+		if (net == NULL) {
+			/* Now if we had a temp route free it */
+			if (ro->ro_rt) {
+				RTFREE(ro->ro_rt);
+			}
+		} else {
+			/* PMTU check versus smallest asoc MTU goes here */
+			if (ro->ro_rt == NULL) {
+				/* Route was freed */
+				if (net->ro._s_addr &&
+				    net->src_addr_selected) {
+					sctp_free_ifa(net->ro._s_addr);
+					net->ro._s_addr = NULL;
+				}
+				net->src_addr_selected = 0;
+			}
+			if ((ro->ro_rt != NULL) &&
+			    (net->ro._s_addr)) {
+				uint32_t mtu;
+
+				mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_rt);
+				if (mtu &&
+				    (stcb->asoc.smallest_mtu > mtu)) {
+					sctp_mtu_size_reset(inp, &stcb->asoc, mtu);
+					net->mtu = mtu;
+					if (net->port) {
+						net->mtu -= sizeof(struct udphdr);
+					}
+				}
+			} else if (ifp) {
+				if (ND_IFINFO(ifp)->linkmtu &&
+				    (stcb->asoc.smallest_mtu > ND_IFINFO(ifp)->linkmtu)) {
+					sctp_mtu_size_reset(inp,
+					    &stcb->asoc,
+					    ND_IFINFO(ifp)->linkmtu);
+				}
+			}
+		}
+		return (ret);
+	}
+#endif
+	else {
+		SCTPDBG(SCTP_DEBUG_OUTPUT1, "Unknown protocol (TSNH) type %d\n",
+		    ((struct sockaddr *)to)->sa_family);
+		sctp_m_freem(m);
+		SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EFAULT);
+		return (EFAULT);
+	}
+}
+
+
+void
+sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	struct mbuf *m, *m_at, *mp_last;
+	struct sctp_nets *net;
+	struct sctp_init_chunk *init;
+	struct sctp_supported_addr_param *sup_addr;
+	struct sctp_adaptation_layer_indication *ali;
+	struct sctp_ecn_supported_param *ecn;
+	struct sctp_prsctp_supported_param *prsctp;
+	struct sctp_ecn_nonce_supported_param *ecn_nonce;
+	struct sctp_supported_chunk_types_param *pr_supported;
+	int cnt_inits_to = 0;
+	int padval, ret;
+	int num_ext;
+	int p_len;
+
+	/* INIT's always go to the primary (and usually ONLY address) */
+	mp_last = NULL;
+	net = stcb->asoc.primary_destination;
+	if (net == NULL) {
+		net = TAILQ_FIRST(&stcb->asoc.nets);
+		if (net == NULL) {
+			/* TSNH */
+			return;
+		}
+		/* we confirm any address we send an INIT to */
+		net->dest_state &= ~SCTP_ADDR_UNCONFIRMED;
+		(void)sctp_set_primary_addr(stcb, NULL, net);
+	} else {
+		/* we confirm any address we send an INIT to */
+		net->dest_state &= ~SCTP_ADDR_UNCONFIRMED;
+	}
+	SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT\n");
+#ifdef INET6
+	if (((struct sockaddr *)&(net->ro._l_addr))->sa_family == AF_INET6) {
+		/*
+		 * special hook, if we are sending to link local it will not
+		 * show up in our private address count.
+		 */
+		struct sockaddr_in6 *sin6l;
+
+		sin6l = &net->ro._l_addr.sin6;
+		if (IN6_IS_ADDR_LINKLOCAL(&sin6l->sin6_addr))
+			cnt_inits_to = 1;
+	}
+#endif
+	if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+		/* This case should not happen */
+		SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - failed timer?\n");
+		return;
+	}
+	/* start the INIT timer */
+	sctp_timer_start(SCTP_TIMER_TYPE_INIT, inp, stcb, net);
+
+	m = sctp_get_mbuf_for_msg(MCLBYTES, 1, M_DONTWAIT, 1, MT_DATA);
+	if (m == NULL) {
+		/* No memory, INIT timer will re-attempt. */
+		SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - mbuf?\n");
+		return;
+	}
+	SCTP_BUF_LEN(m) = sizeof(struct sctp_init_chunk);
+	/*
+	 * assume peer supports asconf in order to be able to queue local
+	 * address changes while an INIT is in flight and before the assoc
+	 * is established.
+	 */
+	stcb->asoc.peer_supports_asconf = 1;
+	/* Now lets put the SCTP header in place */
+	init = mtod(m, struct sctp_init_chunk *);
+	/* now the chunk header */
+	init->ch.chunk_type = SCTP_INITIATION;
+	init->ch.chunk_flags = 0;
+	/* fill in later from mbuf we build */
+	init->ch.chunk_length = 0;
+	/* place in my tag */
+	init->init.initiate_tag = htonl(stcb->asoc.my_vtag);
+	/* set up some of the credits. */
+	init->init.a_rwnd = htonl(max(inp->sctp_socket ? SCTP_SB_LIMIT_RCV(inp->sctp_socket) : 0,
+	    SCTP_MINIMAL_RWND));
+
+	init->init.num_outbound_streams = htons(stcb->asoc.pre_open_streams);
+	init->init.num_inbound_streams = htons(stcb->asoc.max_inbound_streams);
+	init->init.initial_tsn = htonl(stcb->asoc.init_seq_number);
+	/* now the address restriction */
+	sup_addr = (struct sctp_supported_addr_param *)((caddr_t)init +
+	    sizeof(*init));
+	sup_addr->ph.param_type = htons(SCTP_SUPPORTED_ADDRTYPE);
+#ifdef INET6
+	/* we support 2 types: IPv6/IPv4 */
+	sup_addr->ph.param_length = htons(sizeof(*sup_addr) + sizeof(uint16_t));
+	sup_addr->addr_type[0] = htons(SCTP_IPV4_ADDRESS);
+	sup_addr->addr_type[1] = htons(SCTP_IPV6_ADDRESS);
+#else
+	/* we support 1 type: IPv4 */
+	sup_addr->ph.param_length = htons(sizeof(*sup_addr) + sizeof(uint8_t));
+	sup_addr->addr_type[0] = htons(SCTP_IPV4_ADDRESS);
+	sup_addr->addr_type[1] = htons(0);	/* this is the padding */
+#endif
+	SCTP_BUF_LEN(m) += sizeof(*sup_addr) + sizeof(uint16_t);
+	/* adaptation layer indication parameter */
+	ali = (struct sctp_adaptation_layer_indication *)((caddr_t)sup_addr + sizeof(*sup_addr) + sizeof(uint16_t));
+	ali->ph.param_type = htons(SCTP_ULP_ADAPTATION);
+	ali->ph.param_length = htons(sizeof(*ali));
+	ali->indication = ntohl(inp->sctp_ep.adaptation_layer_indicator);
+	SCTP_BUF_LEN(m) += sizeof(*ali);
+	ecn = (struct sctp_ecn_supported_param *)((caddr_t)ali + sizeof(*ali));
+
+	if (SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly)) {
+		/* Add NAT friendly parameter */
+		struct sctp_paramhdr *ph;
+
+		ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+		ph->param_type = htons(SCTP_HAS_NAT_SUPPORT);
+		ph->param_length = htons(sizeof(struct sctp_paramhdr));
+		SCTP_BUF_LEN(m) += sizeof(struct sctp_paramhdr);
+		ecn = (struct sctp_ecn_supported_param *)((caddr_t)ph + sizeof(*ph));
+	}
+	/* now any cookie time extensions */
+	if (stcb->asoc.cookie_preserve_req) {
+		struct sctp_cookie_perserve_param *cookie_preserve;
+
+		cookie_preserve = (struct sctp_cookie_perserve_param *)(ecn);
+		cookie_preserve->ph.param_type = htons(SCTP_COOKIE_PRESERVE);
+		cookie_preserve->ph.param_length = htons(
+		    sizeof(*cookie_preserve));
+		cookie_preserve->time = htonl(stcb->asoc.cookie_preserve_req);
+		SCTP_BUF_LEN(m) += sizeof(*cookie_preserve);
+		ecn = (struct sctp_ecn_supported_param *)(
+		    (caddr_t)cookie_preserve + sizeof(*cookie_preserve));
+		stcb->asoc.cookie_preserve_req = 0;
+	}
+	/* ECN parameter */
+	if (SCTP_BASE_SYSCTL(sctp_ecn_enable) == 1) {
+		ecn->ph.param_type = htons(SCTP_ECN_CAPABLE);
+		ecn->ph.param_length = htons(sizeof(*ecn));
+		SCTP_BUF_LEN(m) += sizeof(*ecn);
+		prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn +
+		    sizeof(*ecn));
+	} else {
+		prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn);
+	}
+	/* And now tell the peer we do pr-sctp */
+	prsctp->ph.param_type = htons(SCTP_PRSCTP_SUPPORTED);
+	prsctp->ph.param_length = htons(sizeof(*prsctp));
+	SCTP_BUF_LEN(m) += sizeof(*prsctp);
+
+	/* And now tell the peer we do all the extensions */
+	pr_supported = (struct sctp_supported_chunk_types_param *)
+	    ((caddr_t)prsctp + sizeof(*prsctp));
+	pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT);
+	num_ext = 0;
+	pr_supported->chunk_types[num_ext++] = SCTP_ASCONF;
+	pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK;
+	pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN;
+	pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED;
+	pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET;
+	if (!SCTP_BASE_SYSCTL(sctp_auth_disable)) {
+		pr_supported->chunk_types[num_ext++] = SCTP_AUTHENTICATION;
+	}
+	if (stcb->asoc.sctp_nr_sack_on_off == 1) {
+		pr_supported->chunk_types[num_ext++] = SCTP_NR_SELECTIVE_ACK;
+	}
+	p_len = sizeof(*pr_supported) + num_ext;
+	pr_supported->ph.param_length = htons(p_len);
+	bzero((caddr_t)pr_supported + p_len, SCTP_SIZE32(p_len) - p_len);
+	SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+
+
+	/* ECN nonce: And now tell the peer we support ECN nonce */
+	if (SCTP_BASE_SYSCTL(sctp_ecn_nonce)) {
+		ecn_nonce = (struct sctp_ecn_nonce_supported_param *)
+		    ((caddr_t)pr_supported + SCTP_SIZE32(p_len));
+		ecn_nonce->ph.param_type = htons(SCTP_ECN_NONCE_SUPPORTED);
+		ecn_nonce->ph.param_length = htons(sizeof(*ecn_nonce));
+		SCTP_BUF_LEN(m) += sizeof(*ecn_nonce);
+	}
+	/* add authentication parameters */
+	if (!SCTP_BASE_SYSCTL(sctp_auth_disable)) {
+		struct sctp_auth_random *randp;
+		struct sctp_auth_hmac_algo *hmacs;
+		struct sctp_auth_chunk_list *chunks;
+
+		/* attach RANDOM parameter, if available */
+		if (stcb->asoc.authinfo.random != NULL) {
+			randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+			p_len = sizeof(*randp) + stcb->asoc.authinfo.random_len;
+			/* random key already contains the header */
+			bcopy(stcb->asoc.authinfo.random->key, randp, p_len);
+			/* zero out any padding required */
+			bzero((caddr_t)randp + p_len, SCTP_SIZE32(p_len) - p_len);
+			SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+		}
+		/* add HMAC_ALGO parameter */
+		hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+		p_len = sctp_serialize_hmaclist(stcb->asoc.local_hmacs,
+		    (uint8_t *) hmacs->hmac_ids);
+		if (p_len > 0) {
+			p_len += sizeof(*hmacs);
+			hmacs->ph.param_type = htons(SCTP_HMAC_LIST);
+			hmacs->ph.param_length = htons(p_len);
+			/* zero out any padding required */
+			bzero((caddr_t)hmacs + p_len, SCTP_SIZE32(p_len) - p_len);
+			SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+		}
+		/* add CHUNKS parameter */
+		chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+		p_len = sctp_serialize_auth_chunks(stcb->asoc.local_auth_chunks,
+		    chunks->chunk_types);
+		if (p_len > 0) {
+			p_len += sizeof(*chunks);
+			chunks->ph.param_type = htons(SCTP_CHUNK_LIST);
+			chunks->ph.param_length = htons(p_len);
+			/* zero out any padding required */
+			bzero((caddr_t)chunks + p_len, SCTP_SIZE32(p_len) - p_len);
+			SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+		}
+	}
+	m_at = m;
+	/* now the addresses */
+	{
+		struct sctp_scoping scp;
+
+		/*
+		 * To optimize this we could put the scoping stuff into a
+		 * structure and remove the individual uint8's from the
+		 * assoc structure. Then we could just sifa in the address
+		 * within the stcb.. but for now this is a quick hack to get
+		 * the address stuff teased apart.
+		 */
+		scp.ipv4_addr_legal = stcb->asoc.ipv4_addr_legal;
+		scp.ipv6_addr_legal = stcb->asoc.ipv6_addr_legal;
+		scp.loopback_scope = stcb->asoc.loopback_scope;
+		scp.ipv4_local_scope = stcb->asoc.ipv4_local_scope;
+		scp.local_scope = stcb->asoc.local_scope;
+		scp.site_scope = stcb->asoc.site_scope;
+
+		m_at = sctp_add_addresses_to_i_ia(inp, &scp, m_at, cnt_inits_to);
+	}
+
+	/* calulate the size and update pkt header and chunk header */
+	p_len = 0;
+	for (m_at = m; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
+		if (SCTP_BUF_NEXT(m_at) == NULL)
+			mp_last = m_at;
+		p_len += SCTP_BUF_LEN(m_at);
+	}
+	init->ch.chunk_length = htons(p_len);
+	/*
+	 * We sifa 0 here to NOT set IP_DF if its IPv4, we ignore the return
+	 * here since the timer will drive a retranmission.
+	 */
+
+	/* I don't expect this to execute but we will be safe here */
+	padval = p_len % 4;
+	if ((padval) && (mp_last)) {
+		/*
+		 * The compiler worries that mp_last may not be set even
+		 * though I think it is impossible :-> however we add
+		 * mp_last here just in case.
+		 */
+		ret = sctp_add_pad_tombuf(mp_last, (4 - padval));
+		if (ret) {
+			/* Houston we have a problem, no space */
+			sctp_m_freem(m);
+			return;
+		}
+		p_len += padval;
+	}
+	SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - calls lowlevel_output\n");
+	ret = sctp_lowlevel_chunk_output(inp, stcb, net,
+	    (struct sockaddr *)&net->ro._l_addr,
+	    m, 0, NULL, 0, 0, 0, NULL, 0,
+	    inp->sctp_lport, stcb->rport, htonl(0),
+	    net->port, so_locked, NULL);
+	SCTPDBG(SCTP_DEBUG_OUTPUT4, "lowlevel_output - %d\n", ret);
+	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+	(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
+}
+
+struct mbuf *
+sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt,
+    int param_offset, int *abort_processing, struct sctp_chunkhdr *cp, int *nat_friendly)
+{
+	/*
+	 * Given a mbuf containing an INIT or INIT-ACK with the param_offset
+	 * being equal to the beginning of the params i.e. (iphlen +
+	 * sizeof(struct sctp_init_msg) parse through the parameters to the
+	 * end of the mbuf verifying that all parameters are known.
+	 *
+	 * For unknown parameters build and return a mbuf with
+	 * UNRECOGNIZED_PARAMETER errors. If the flags indicate to stop
+	 * processing this chunk stop, and set *abort_processing to 1.
+	 *
+	 * By having param_offset be pre-set to where parameters begin it is
+	 * hoped that this routine may be reused in the future by new
+	 * features.
+	 */
+	struct sctp_paramhdr *phdr, params;
+
+	struct mbuf *mat, *op_err;
+	char tempbuf[SCTP_PARAM_BUFFER_SIZE];
+	int at, limit, pad_needed;
+	uint16_t ptype, plen, padded_size;
+	int err_at;
+
+	*abort_processing = 0;
+	mat = in_initpkt;
+	err_at = 0;
+	limit = ntohs(cp->chunk_length) - sizeof(struct sctp_init_chunk);
+	at = param_offset;
+	op_err = NULL;
+	SCTPDBG(SCTP_DEBUG_OUTPUT1, "Check for unrecognized param's\n");
+	phdr = sctp_get_next_param(mat, at, &params, sizeof(params));
+	while ((phdr != NULL) && ((size_t)limit >= sizeof(struct sctp_paramhdr))) {
+		ptype = ntohs(phdr->param_type);
+		plen = ntohs(phdr->param_length);
+		if ((plen > limit) || (plen < sizeof(struct sctp_paramhdr))) {
+			/* wacked parameter */
+			SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error %d\n", plen);
+			goto invalid_size;
+		}
+		limit -= SCTP_SIZE32(plen);
+		/*-
+		 * All parameters for all chunks that we know/understand are
+		 * listed here. We process them other places and make
+		 * appropriate stop actions per the upper bits. However this
+		 * is the generic routine processor's can call to get back
+		 * an operr.. to either incorporate (init-ack) or send.
+		 */
+		padded_size = SCTP_SIZE32(plen);
+		switch (ptype) {
+			/* Param's with variable size */
+		case SCTP_HEARTBEAT_INFO:
+		case SCTP_STATE_COOKIE:
+		case SCTP_UNRECOG_PARAM:
+		case SCTP_ERROR_CAUSE_IND:
+			/* ok skip fwd */
+			at += padded_size;
+			break;
+			/* Param's with variable size within a range */
+		case SCTP_CHUNK_LIST:
+		case SCTP_SUPPORTED_CHUNK_EXT:
+			if (padded_size > (sizeof(struct sctp_supported_chunk_types_param) + (sizeof(uint8_t) * SCTP_MAX_SUPPORTED_EXT))) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error chklist %d\n", plen);
+				goto invalid_size;
+			}
+			at += padded_size;
+			break;
+		case SCTP_SUPPORTED_ADDRTYPE:
+			if (padded_size > SCTP_MAX_ADDR_PARAMS_SIZE) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error supaddrtype %d\n", plen);
+				goto invalid_size;
+			}
+			at += padded_size;
+			break;
+		case SCTP_RANDOM:
+			if (padded_size > (sizeof(struct sctp_auth_random) + SCTP_RANDOM_MAX_SIZE)) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error random %d\n", plen);
+				goto invalid_size;
+			}
+			at += padded_size;
+			break;
+		case SCTP_SET_PRIM_ADDR:
+		case SCTP_DEL_IP_ADDRESS:
+		case SCTP_ADD_IP_ADDRESS:
+			if ((padded_size != sizeof(struct sctp_asconf_addrv4_param)) &&
+			    (padded_size != sizeof(struct sctp_asconf_addr_param))) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error setprim %d\n", plen);
+				goto invalid_size;
+			}
+			at += padded_size;
+			break;
+			/* Param's with a fixed size */
+		case SCTP_IPV4_ADDRESS:
+			if (padded_size != sizeof(struct sctp_ipv4addr_param)) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ipv4 addr %d\n", plen);
+				goto invalid_size;
+			}
+			at += padded_size;
+			break;
+		case SCTP_IPV6_ADDRESS:
+			if (padded_size != sizeof(struct sctp_ipv6addr_param)) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ipv6 addr %d\n", plen);
+				goto invalid_size;
+			}
+			at += padded_size;
+			break;
+		case SCTP_COOKIE_PRESERVE:
+			if (padded_size != sizeof(struct sctp_cookie_perserve_param)) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error cookie-preserve %d\n", plen);
+				goto invalid_size;
+			}
+			at += padded_size;
+			break;
+		case SCTP_HAS_NAT_SUPPORT:
+			*nat_friendly = 1;
+			/* fall through */
+		case SCTP_ECN_NONCE_SUPPORTED:
+		case SCTP_PRSCTP_SUPPORTED:
+
+			if (padded_size != sizeof(struct sctp_paramhdr)) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ecnnonce/prsctp/nat support %d\n", plen);
+				goto invalid_size;
+			}
+			at += padded_size;
+			break;
+		case SCTP_ECN_CAPABLE:
+			if (padded_size != sizeof(struct sctp_ecn_supported_param)) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ecn %d\n", plen);
+				goto invalid_size;
+			}
+			at += padded_size;
+			break;
+		case SCTP_ULP_ADAPTATION:
+			if (padded_size != sizeof(struct sctp_adaptation_layer_indication)) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error adapatation %d\n", plen);
+				goto invalid_size;
+			}
+			at += padded_size;
+			break;
+		case SCTP_SUCCESS_REPORT:
+			if (padded_size != sizeof(struct sctp_asconf_paramhdr)) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error success %d\n", plen);
+				goto invalid_size;
+			}
+			at += padded_size;
+			break;
+		case SCTP_HOSTNAME_ADDRESS:
+			{
+				/* We can NOT handle HOST NAME addresses!! */
+				int l_len;
+
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Can't handle hostname addresses.. abort processing\n");
+				*abort_processing = 1;
+				if (op_err == NULL) {
+					/* Ok need to try to get a mbuf */
+#ifdef INET6
+					l_len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+#else
+					l_len = sizeof(struct ip) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+#endif
+					l_len += plen;
+					l_len += sizeof(struct sctp_paramhdr);
+					op_err = sctp_get_mbuf_for_msg(l_len, 0, M_DONTWAIT, 1, MT_DATA);
+					if (op_err) {
+						SCTP_BUF_LEN(op_err) = 0;
+						/*
+						 * pre-reserve space for ip
+						 * and sctp header  and
+						 * chunk hdr
+						 */
+#ifdef INET6
+						SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
+#else
+						SCTP_BUF_RESV_UF(op_err, sizeof(struct ip));
+#endif
+						SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
+						SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
+					}
+				}
+				if (op_err) {
+					/* If we have space */
+					struct sctp_paramhdr s;
+
+					if (err_at % 4) {
+						uint32_t cpthis = 0;
+
+						pad_needed = 4 - (err_at % 4);
+						m_copyback(op_err, err_at, pad_needed, (caddr_t)&cpthis);
+						err_at += pad_needed;
+					}
+					s.param_type = htons(SCTP_CAUSE_UNRESOLVABLE_ADDR);
+					s.param_length = htons(sizeof(s) + plen);
+					m_copyback(op_err, err_at, sizeof(s), (caddr_t)&s);
+					err_at += sizeof(s);
+					phdr = sctp_get_next_param(mat, at, (struct sctp_paramhdr *)tempbuf, min(sizeof(tempbuf), plen));
+					if (phdr == NULL) {
+						sctp_m_freem(op_err);
+						/*
+						 * we are out of memory but
+						 * we still need to have a
+						 * look at what to do (the
+						 * system is in trouble
+						 * though).
+						 */
+						return (NULL);
+					}
+					m_copyback(op_err, err_at, plen, (caddr_t)phdr);
+					err_at += plen;
+				}
+				return (op_err);
+				break;
+			}
+		default:
+			/*
+			 * we do not recognize the parameter figure out what
+			 * we do.
+			 */
+			SCTPDBG(SCTP_DEBUG_OUTPUT1, "Hit default param %x\n", ptype);
+			if ((ptype & 0x4000) == 0x4000) {
+				/* Report bit is set?? */
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "report op err\n");
+				if (op_err == NULL) {
+					int l_len;
+
+					/* Ok need to try to get an mbuf */
+#ifdef INET6
+					l_len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+#else
+					l_len = sizeof(struct ip) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+#endif
+					l_len += plen;
+					l_len += sizeof(struct sctp_paramhdr);
+					op_err = sctp_get_mbuf_for_msg(l_len, 0, M_DONTWAIT, 1, MT_DATA);
+					if (op_err) {
+						SCTP_BUF_LEN(op_err) = 0;
+#ifdef INET6
+						SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
+#else
+						SCTP_BUF_RESV_UF(op_err, sizeof(struct ip));
+#endif
+						SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
+						SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
+					}
+				}
+				if (op_err) {
+					/* If we have space */
+					struct sctp_paramhdr s;
+
+					if (err_at % 4) {
+						uint32_t cpthis = 0;
+
+						pad_needed = 4 - (err_at % 4);
+						m_copyback(op_err, err_at, pad_needed, (caddr_t)&cpthis);
+						err_at += pad_needed;
+					}
+					s.param_type = htons(SCTP_UNRECOG_PARAM);
+					s.param_length = htons(sizeof(s) + plen);
+					m_copyback(op_err, err_at, sizeof(s), (caddr_t)&s);
+					err_at += sizeof(s);
+					if (plen > sizeof(tempbuf)) {
+						plen = sizeof(tempbuf);
+					}
+					phdr = sctp_get_next_param(mat, at, (struct sctp_paramhdr *)tempbuf, min(sizeof(tempbuf), plen));
+					if (phdr == NULL) {
+						sctp_m_freem(op_err);
+						/*
+						 * we are out of memory but
+						 * we still need to have a
+						 * look at what to do (the
+						 * system is in trouble
+						 * though).
+						 */
+						op_err = NULL;
+						goto more_processing;
+					}
+					m_copyback(op_err, err_at, plen, (caddr_t)phdr);
+					err_at += plen;
+				}
+			}
+	more_processing:
+			if ((ptype & 0x8000) == 0x0000) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "stop proc\n");
+				return (op_err);
+			} else {
+				/* skip this chunk and continue processing */
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "move on\n");
+				at += SCTP_SIZE32(plen);
+			}
+			break;
+
+		}
+		phdr = sctp_get_next_param(mat, at, &params, sizeof(params));
+	}
+	return (op_err);
+invalid_size:
+	SCTPDBG(SCTP_DEBUG_OUTPUT1, "abort flag set\n");
+	*abort_processing = 1;
+	if ((op_err == NULL) && phdr) {
+		int l_len;
+
+#ifdef INET6
+		l_len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+#else
+		l_len = sizeof(struct ip) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+#endif
+		l_len += (2 * sizeof(struct sctp_paramhdr));
+		op_err = sctp_get_mbuf_for_msg(l_len, 0, M_DONTWAIT, 1, MT_DATA);
+		if (op_err) {
+			SCTP_BUF_LEN(op_err) = 0;
+#ifdef INET6
+			SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
+#else
+			SCTP_BUF_RESV_UF(op_err, sizeof(struct ip));
+#endif
+			SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
+			SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
+		}
+	}
+	if ((op_err) && phdr) {
+		struct sctp_paramhdr s;
+
+		if (err_at % 4) {
+			uint32_t cpthis = 0;
+
+			pad_needed = 4 - (err_at % 4);
+			m_copyback(op_err, err_at, pad_needed, (caddr_t)&cpthis);
+			err_at += pad_needed;
+		}
+		s.param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+		s.param_length = htons(sizeof(s) + sizeof(struct sctp_paramhdr));
+		m_copyback(op_err, err_at, sizeof(s), (caddr_t)&s);
+		err_at += sizeof(s);
+		/* Only copy back the p-hdr that caused the issue */
+		m_copyback(op_err, err_at, sizeof(struct sctp_paramhdr), (caddr_t)phdr);
+	}
+	return (op_err);
+}
+
+static int
+sctp_are_there_new_addresses(struct sctp_association *asoc,
+    struct mbuf *in_initpkt, int iphlen, int offset)
+{
+	/*
+	 * Given a INIT packet, look through the packet to verify that there
+	 * are NO new addresses. As we go through the parameters add reports
+	 * of any un-understood parameters that require an error.  Also we
+	 * must return (1) to drop the packet if we see a un-understood
+	 * parameter that tells us to drop the chunk.
+	 */
+	struct sockaddr_in sin4, *sa4;
+
+#ifdef INET6
+	struct sockaddr_in6 sin6, *sa6;
+
+#endif
+	struct sockaddr *sa_touse;
+	struct sockaddr *sa;
+	struct sctp_paramhdr *phdr, params;
+	struct ip *iph;
+
+#ifdef INET6
+	struct ip6_hdr *ip6h;
+
+#endif
+	struct mbuf *mat;
+	uint16_t ptype, plen;
+	int err_at;
+	uint8_t fnd;
+	struct sctp_nets *net;
+
+	memset(&sin4, 0, sizeof(sin4));
+#ifdef INET6
+	memset(&sin6, 0, sizeof(sin6));
+#endif
+	sin4.sin_family = AF_INET;
+	sin4.sin_len = sizeof(sin4);
+#ifdef INET6
+	sin6.sin6_family = AF_INET6;
+	sin6.sin6_len = sizeof(sin6);
+#endif
+	sa_touse = NULL;
+	/* First what about the src address of the pkt ? */
+	iph = mtod(in_initpkt, struct ip *);
+	switch (iph->ip_v) {
+	case IPVERSION:
+		/* source addr is IPv4 */
+		sin4.sin_addr = iph->ip_src;
+		sa_touse = (struct sockaddr *)&sin4;
+		break;
+#ifdef INET6
+	case IPV6_VERSION >> 4:
+		/* source addr is IPv6 */
+		ip6h = mtod(in_initpkt, struct ip6_hdr *);
+		sin6.sin6_addr = ip6h->ip6_src;
+		sa_touse = (struct sockaddr *)&sin6;
+		break;
+#endif
+	default:
+		return (1);
+	}
+
+	fnd = 0;
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		sa = (struct sockaddr *)&net->ro._l_addr;
+		if (sa->sa_family == sa_touse->sa_family) {
+			if (sa->sa_family == AF_INET) {
+				sa4 = (struct sockaddr_in *)sa;
+				if (sa4->sin_addr.s_addr ==
+				    sin4.sin_addr.s_addr) {
+					fnd = 1;
+					break;
+				}
+			}
+#ifdef INET6
+			if (sa->sa_family == AF_INET6) {
+				sa6 = (struct sockaddr_in6 *)sa;
+				if (SCTP6_ARE_ADDR_EQUAL(sa6,
+				    &sin6)) {
+					fnd = 1;
+					break;
+				}
+			}
+#endif
+		}
+	}
+	if (fnd == 0) {
+		/* New address added! no need to look futher. */
+		return (1);
+	}
+	/* Ok so far lets munge through the rest of the packet */
+	mat = in_initpkt;
+	err_at = 0;
+	sa_touse = NULL;
+	offset += sizeof(struct sctp_init_chunk);
+	phdr = sctp_get_next_param(mat, offset, &params, sizeof(params));
+	while (phdr) {
+		ptype = ntohs(phdr->param_type);
+		plen = ntohs(phdr->param_length);
+		if (ptype == SCTP_IPV4_ADDRESS) {
+			struct sctp_ipv4addr_param *p4, p4_buf;
+
+			phdr = sctp_get_next_param(mat, offset,
+			    (struct sctp_paramhdr *)&p4_buf, sizeof(p4_buf));
+			if (plen != sizeof(struct sctp_ipv4addr_param) ||
+			    phdr == NULL) {
+				return (1);
+			}
+			p4 = (struct sctp_ipv4addr_param *)phdr;
+			sin4.sin_addr.s_addr = p4->addr;
+			sa_touse = (struct sockaddr *)&sin4;
+		} else if (ptype == SCTP_IPV6_ADDRESS) {
+			struct sctp_ipv6addr_param *p6, p6_buf;
+
+			phdr = sctp_get_next_param(mat, offset,
+			    (struct sctp_paramhdr *)&p6_buf, sizeof(p6_buf));
+			if (plen != sizeof(struct sctp_ipv6addr_param) ||
+			    phdr == NULL) {
+				return (1);
+			}
+			p6 = (struct sctp_ipv6addr_param *)phdr;
+#ifdef INET6
+			memcpy((caddr_t)&sin6.sin6_addr, p6->addr,
+			    sizeof(p6->addr));
+#endif
+			sa_touse = (struct sockaddr *)&sin4;
+		}
+		if (sa_touse) {
+			/* ok, sa_touse points to one to check */
+			fnd = 0;
+			TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+				sa = (struct sockaddr *)&net->ro._l_addr;
+				if (sa->sa_family != sa_touse->sa_family) {
+					continue;
+				}
+				if (sa->sa_family == AF_INET) {
+					sa4 = (struct sockaddr_in *)sa;
+					if (sa4->sin_addr.s_addr ==
+					    sin4.sin_addr.s_addr) {
+						fnd = 1;
+						break;
+					}
+				}
+#ifdef INET6
+				if (sa->sa_family == AF_INET6) {
+					sa6 = (struct sockaddr_in6 *)sa;
+					if (SCTP6_ARE_ADDR_EQUAL(
+					    sa6, &sin6)) {
+						fnd = 1;
+						break;
+					}
+				}
+#endif
+			}
+			if (!fnd) {
+				/* New addr added! no need to look further */
+				return (1);
+			}
+		}
+		offset += SCTP_SIZE32(plen);
+		phdr = sctp_get_next_param(mat, offset, &params, sizeof(params));
+	}
+	return (0);
+}
+
+/*
+ * Given a MBUF chain that was sent into us containing an INIT. Build a
+ * INIT-ACK with COOKIE and send back. We assume that the in_initpkt has done
+ * a pullup to include IPv6/4header, SCTP header and initial part of INIT
+ * message (i.e. the struct sctp_init_msg).
+ */
+void
+sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct mbuf *init_pkt, int iphlen, int offset, struct sctphdr *sh,
+    struct sctp_init_chunk *init_chk, uint32_t vrf_id, uint16_t port, int hold_inp_lock)
+{
+	struct sctp_association *asoc;
+	struct mbuf *m, *m_at, *m_tmp, *m_cookie, *op_err, *mp_last;
+	struct sctp_init_ack_chunk *initack;
+	struct sctp_adaptation_layer_indication *ali;
+	struct sctp_ecn_supported_param *ecn;
+	struct sctp_prsctp_supported_param *prsctp;
+	struct sctp_ecn_nonce_supported_param *ecn_nonce;
+	struct sctp_supported_chunk_types_param *pr_supported;
+	union sctp_sockstore store, store1, *over_addr;
+	struct sockaddr_in *sin, *to_sin;
+
+#ifdef INET6
+	struct sockaddr_in6 *sin6, *to_sin6;
+
+#endif
+	struct ip *iph;
+
+#ifdef INET6
+	struct ip6_hdr *ip6;
+
+#endif
+	struct sockaddr *to;
+	struct sctp_state_cookie stc;
+	struct sctp_nets *net = NULL;
+	uint8_t *signature = NULL;
+	int cnt_inits_to = 0;
+	uint16_t his_limit, i_want;
+	int abort_flag, padval;
+	int num_ext;
+	int p_len;
+	int nat_friendly = 0;
+	struct socket *so;
+
+	if (stcb)
+		asoc = &stcb->asoc;
+	else
+		asoc = NULL;
+	mp_last = NULL;
+	if ((asoc != NULL) &&
+	    (SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT) &&
+	    (sctp_are_there_new_addresses(asoc, init_pkt, iphlen, offset))) {
+		/* new addresses, out of here in non-cookie-wait states */
+		/*
+		 * Send a ABORT, we don't add the new address error clause
+		 * though we even set the T bit and copy in the 0 tag.. this
+		 * looks no different than if no listener was present.
+		 */
+		sctp_send_abort(init_pkt, iphlen, sh, 0, NULL, vrf_id, port);
+		return;
+	}
+	abort_flag = 0;
+	op_err = sctp_arethere_unrecognized_parameters(init_pkt,
+	    (offset + sizeof(struct sctp_init_chunk)),
+	    &abort_flag, (struct sctp_chunkhdr *)init_chk, &nat_friendly);
+	if (abort_flag) {
+do_a_abort:
+		sctp_send_abort(init_pkt, iphlen, sh,
+		    init_chk->init.initiate_tag, op_err, vrf_id, port);
+		return;
+	}
+	m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	if (m == NULL) {
+		/* No memory, INIT timer will re-attempt. */
+		if (op_err)
+			sctp_m_freem(op_err);
+		return;
+	}
+	SCTP_BUF_LEN(m) = sizeof(struct sctp_init_chunk);
+
+	/* the time I built cookie */
+	(void)SCTP_GETTIME_TIMEVAL(&stc.time_entered);
+
+	/* populate any tie tags */
+	if (asoc != NULL) {
+		/* unlock before tag selections */
+		stc.tie_tag_my_vtag = asoc->my_vtag_nonce;
+		stc.tie_tag_peer_vtag = asoc->peer_vtag_nonce;
+		stc.cookie_life = asoc->cookie_life;
+		net = asoc->primary_destination;
+	} else {
+		stc.tie_tag_my_vtag = 0;
+		stc.tie_tag_peer_vtag = 0;
+		/* life I will award this cookie */
+		stc.cookie_life = inp->sctp_ep.def_cookie_life;
+	}
+
+	/* copy in the ports for later check */
+	stc.myport = sh->dest_port;
+	stc.peerport = sh->src_port;
+
+	/*
+	 * If we wanted to honor cookie life extentions, we would add to
+	 * stc.cookie_life. For now we should NOT honor any extension
+	 */
+	stc.site_scope = stc.local_scope = stc.loopback_scope = 0;
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+		struct inpcb *in_inp;
+
+		/* Its a V6 socket */
+		in_inp = (struct inpcb *)inp;
+		stc.ipv6_addr_legal = 1;
+		/* Now look at the binding flag to see if V4 will be legal */
+		if (SCTP_IPV6_V6ONLY(in_inp) == 0) {
+			stc.ipv4_addr_legal = 1;
+		} else {
+			/* V4 addresses are NOT legal on the association */
+			stc.ipv4_addr_legal = 0;
+		}
+	} else {
+		/* Its a V4 socket, no - V6 */
+		stc.ipv4_addr_legal = 1;
+		stc.ipv6_addr_legal = 0;
+	}
+
+#ifdef SCTP_DONT_DO_PRIVADDR_SCOPE
+	stc.ipv4_scope = 1;
+#else
+	stc.ipv4_scope = 0;
+#endif
+	/* now for scope setup */
+	memset((caddr_t)&store, 0, sizeof(store));
+	memset((caddr_t)&store1, 0, sizeof(store1));
+	sin = &store.sin;
+	to_sin = &store1.sin;
+#ifdef INET6
+	sin6 = &store.sin6;
+	to_sin6 = &store1.sin6;
+#endif
+	iph = mtod(init_pkt, struct ip *);
+	/* establish the to_addr's */
+	switch (iph->ip_v) {
+	case IPVERSION:
+		to_sin->sin_port = sh->dest_port;
+		to_sin->sin_family = AF_INET;
+		to_sin->sin_len = sizeof(struct sockaddr_in);
+		to_sin->sin_addr = iph->ip_dst;
+		break;
+#ifdef INET6
+	case IPV6_VERSION >> 4:
+		ip6 = mtod(init_pkt, struct ip6_hdr *);
+		to_sin6->sin6_addr = ip6->ip6_dst;
+		to_sin6->sin6_scope_id = 0;
+		to_sin6->sin6_port = sh->dest_port;
+		to_sin6->sin6_family = AF_INET6;
+		to_sin6->sin6_len = sizeof(struct sockaddr_in6);
+		break;
+#endif
+	default:
+		goto do_a_abort;
+		break;
+	};
+
+	if (net == NULL) {
+		to = (struct sockaddr *)&store;
+		switch (iph->ip_v) {
+		case IPVERSION:
+			{
+				sin->sin_family = AF_INET;
+				sin->sin_len = sizeof(struct sockaddr_in);
+				sin->sin_port = sh->src_port;
+				sin->sin_addr = iph->ip_src;
+				/* lookup address */
+				stc.address[0] = sin->sin_addr.s_addr;
+				stc.address[1] = 0;
+				stc.address[2] = 0;
+				stc.address[3] = 0;
+				stc.addr_type = SCTP_IPV4_ADDRESS;
+				/* local from address */
+				stc.laddress[0] = to_sin->sin_addr.s_addr;
+				stc.laddress[1] = 0;
+				stc.laddress[2] = 0;
+				stc.laddress[3] = 0;
+				stc.laddr_type = SCTP_IPV4_ADDRESS;
+				/* scope_id is only for v6 */
+				stc.scope_id = 0;
+#ifndef SCTP_DONT_DO_PRIVADDR_SCOPE
+				if (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
+					stc.ipv4_scope = 1;
+				}
+#else
+				stc.ipv4_scope = 1;
+#endif				/* SCTP_DONT_DO_PRIVADDR_SCOPE */
+				/* Must use the address in this case */
+				if (sctp_is_address_on_local_host((struct sockaddr *)sin, vrf_id)) {
+					stc.loopback_scope = 1;
+					stc.ipv4_scope = 1;
+					stc.site_scope = 1;
+					stc.local_scope = 0;
+				}
+				break;
+			}
+#ifdef INET6
+		case IPV6_VERSION >> 4:
+			{
+				ip6 = mtod(init_pkt, struct ip6_hdr *);
+				sin6->sin6_family = AF_INET6;
+				sin6->sin6_len = sizeof(struct sockaddr_in6);
+				sin6->sin6_port = sh->src_port;
+				sin6->sin6_addr = ip6->ip6_src;
+				/* lookup address */
+				memcpy(&stc.address, &sin6->sin6_addr,
+				    sizeof(struct in6_addr));
+				sin6->sin6_scope_id = 0;
+				stc.addr_type = SCTP_IPV6_ADDRESS;
+				stc.scope_id = 0;
+				if (sctp_is_address_on_local_host((struct sockaddr *)sin6, vrf_id)) {
+					/*
+					 * FIX ME: does this have scope from
+					 * rcvif?
+					 */
+					(void)sa6_recoverscope(sin6);
+					stc.scope_id = sin6->sin6_scope_id;
+					sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone));
+					stc.loopback_scope = 1;
+					stc.local_scope = 0;
+					stc.site_scope = 1;
+					stc.ipv4_scope = 1;
+				} else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+					/*
+					 * If the new destination is a
+					 * LINK_LOCAL we must have common
+					 * both site and local scope. Don't
+					 * set local scope though since we
+					 * must depend on the source to be
+					 * added implicitly. We cannot
+					 * assure just because we share one
+					 * link that all links are common.
+					 */
+					stc.local_scope = 0;
+					stc.site_scope = 1;
+					stc.ipv4_scope = 1;
+					/*
+					 * we start counting for the private
+					 * address stuff at 1. since the
+					 * link local we source from won't
+					 * show up in our scoped count.
+					 */
+					cnt_inits_to = 1;
+					/*
+					 * pull out the scope_id from
+					 * incoming pkt
+					 */
+					/*
+					 * FIX ME: does this have scope from
+					 * rcvif?
+					 */
+					(void)sa6_recoverscope(sin6);
+					stc.scope_id = sin6->sin6_scope_id;
+					sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone));
+				} else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) {
+					/*
+					 * If the new destination is
+					 * SITE_LOCAL then we must have site
+					 * scope in common.
+					 */
+					stc.site_scope = 1;
+				}
+				memcpy(&stc.laddress, &to_sin6->sin6_addr, sizeof(struct in6_addr));
+				stc.laddr_type = SCTP_IPV6_ADDRESS;
+				break;
+			}
+#endif
+		default:
+			/* TSNH */
+			goto do_a_abort;
+			break;
+		}
+	} else {
+		/* set the scope per the existing tcb */
+
+#ifdef INET6
+		struct sctp_nets *lnet;
+
+#endif
+
+		stc.loopback_scope = asoc->loopback_scope;
+		stc.ipv4_scope = asoc->ipv4_local_scope;
+		stc.site_scope = asoc->site_scope;
+		stc.local_scope = asoc->local_scope;
+#ifdef INET6
+		/* Why do we not consider IPv4 LL addresses? */
+		TAILQ_FOREACH(lnet, &asoc->nets, sctp_next) {
+			if (lnet->ro._l_addr.sin6.sin6_family == AF_INET6) {
+				if (IN6_IS_ADDR_LINKLOCAL(&lnet->ro._l_addr.sin6.sin6_addr)) {
+					/*
+					 * if we have a LL address, start
+					 * counting at 1.
+					 */
+					cnt_inits_to = 1;
+				}
+			}
+		}
+#endif
+		/* use the net pointer */
+		to = (struct sockaddr *)&net->ro._l_addr;
+		switch (to->sa_family) {
+		case AF_INET:
+			sin = (struct sockaddr_in *)to;
+			stc.address[0] = sin->sin_addr.s_addr;
+			stc.address[1] = 0;
+			stc.address[2] = 0;
+			stc.address[3] = 0;
+			stc.addr_type = SCTP_IPV4_ADDRESS;
+			if (net->src_addr_selected == 0) {
+				/*
+				 * strange case here, the INIT should have
+				 * did the selection.
+				 */
+				net->ro._s_addr = sctp_source_address_selection(inp,
+				    stcb, (sctp_route_t *) & net->ro,
+				    net, 0, vrf_id);
+				if (net->ro._s_addr == NULL)
+					return;
+
+				net->src_addr_selected = 1;
+
+			}
+			stc.laddress[0] = net->ro._s_addr->address.sin.sin_addr.s_addr;
+			stc.laddress[1] = 0;
+			stc.laddress[2] = 0;
+			stc.laddress[3] = 0;
+			stc.laddr_type = SCTP_IPV4_ADDRESS;
+			break;
+#ifdef INET6
+		case AF_INET6:
+			sin6 = (struct sockaddr_in6 *)to;
+			memcpy(&stc.address, &sin6->sin6_addr,
+			    sizeof(struct in6_addr));
+			stc.addr_type = SCTP_IPV6_ADDRESS;
+			if (net->src_addr_selected == 0) {
+				/*
+				 * strange case here, the INIT should have
+				 * did the selection.
+				 */
+				net->ro._s_addr = sctp_source_address_selection(inp,
+				    stcb, (sctp_route_t *) & net->ro,
+				    net, 0, vrf_id);
+				if (net->ro._s_addr == NULL)
+					return;
+
+				net->src_addr_selected = 1;
+			}
+			memcpy(&stc.laddress, &net->ro._s_addr->address.sin6.sin6_addr,
+			    sizeof(struct in6_addr));
+			stc.laddr_type = SCTP_IPV6_ADDRESS;
+			break;
+#endif
+		}
+	}
+	/* Now lets put the SCTP header in place */
+	initack = mtod(m, struct sctp_init_ack_chunk *);
+	/* Save it off for quick ref */
+	stc.peers_vtag = init_chk->init.initiate_tag;
+	/* who are we */
+	memcpy(stc.identification, SCTP_VERSION_STRING,
+	    min(strlen(SCTP_VERSION_STRING), sizeof(stc.identification)));
+	/* now the chunk header */
+	initack->ch.chunk_type = SCTP_INITIATION_ACK;
+	initack->ch.chunk_flags = 0;
+	/* fill in later from mbuf we build */
+	initack->ch.chunk_length = 0;
+	/* place in my tag */
+	if ((asoc != NULL) &&
+	    ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
+	    (SCTP_GET_STATE(asoc) == SCTP_STATE_INUSE) ||
+	    (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED))) {
+		/* re-use the v-tags and init-seq here */
+		initack->init.initiate_tag = htonl(asoc->my_vtag);
+		initack->init.initial_tsn = htonl(asoc->init_seq_number);
+	} else {
+		uint32_t vtag, itsn;
+
+		if (hold_inp_lock) {
+			SCTP_INP_INCR_REF(inp);
+			SCTP_INP_RUNLOCK(inp);
+		}
+		if (asoc) {
+			atomic_add_int(&asoc->refcnt, 1);
+			SCTP_TCB_UNLOCK(stcb);
+	new_tag:
+			vtag = sctp_select_a_tag(inp, inp->sctp_lport, sh->src_port, 1);
+			if ((asoc->peer_supports_nat) && (vtag == asoc->my_vtag)) {
+				/*
+				 * Got a duplicate vtag on some guy behind a
+				 * nat make sure we don't use it.
+				 */
+				goto new_tag;
+			}
+			initack->init.initiate_tag = htonl(vtag);
+			/* get a TSN to use too */
+			itsn = sctp_select_initial_TSN(&inp->sctp_ep);
+			initack->init.initial_tsn = htonl(itsn);
+			SCTP_TCB_LOCK(stcb);
+			atomic_add_int(&asoc->refcnt, -1);
+		} else {
+			vtag = sctp_select_a_tag(inp, inp->sctp_lport, sh->src_port, 1);
+			initack->init.initiate_tag = htonl(vtag);
+			/* get a TSN to use too */
+			initack->init.initial_tsn = htonl(sctp_select_initial_TSN(&inp->sctp_ep));
+		}
+		if (hold_inp_lock) {
+			SCTP_INP_RLOCK(inp);
+			SCTP_INP_DECR_REF(inp);
+		}
+	}
+	/* save away my tag to */
+	stc.my_vtag = initack->init.initiate_tag;
+
+	/* set up some of the credits. */
+	so = inp->sctp_socket;
+	if (so == NULL) {
+		/* memory problem */
+		sctp_m_freem(m);
+		return;
+	} else {
+		initack->init.a_rwnd = htonl(max(SCTP_SB_LIMIT_RCV(so), SCTP_MINIMAL_RWND));
+	}
+	/* set what I want */
+	his_limit = ntohs(init_chk->init.num_inbound_streams);
+	/* choose what I want */
+	if (asoc != NULL) {
+		if (asoc->streamoutcnt > inp->sctp_ep.pre_open_stream_count) {
+			i_want = asoc->streamoutcnt;
+		} else {
+			i_want = inp->sctp_ep.pre_open_stream_count;
+		}
+	} else {
+		i_want = inp->sctp_ep.pre_open_stream_count;
+	}
+	if (his_limit < i_want) {
+		/* I Want more :< */
+		initack->init.num_outbound_streams = init_chk->init.num_inbound_streams;
+	} else {
+		/* I can have what I want :> */
+		initack->init.num_outbound_streams = htons(i_want);
+	}
+	/* tell him his limt. */
+	initack->init.num_inbound_streams =
+	    htons(inp->sctp_ep.max_open_streams_intome);
+
+	/* adaptation layer indication parameter */
+	ali = (struct sctp_adaptation_layer_indication *)((caddr_t)initack + sizeof(*initack));
+	ali->ph.param_type = htons(SCTP_ULP_ADAPTATION);
+	ali->ph.param_length = htons(sizeof(*ali));
+	ali->indication = ntohl(inp->sctp_ep.adaptation_layer_indicator);
+	SCTP_BUF_LEN(m) += sizeof(*ali);
+	ecn = (struct sctp_ecn_supported_param *)((caddr_t)ali + sizeof(*ali));
+
+	/* ECN parameter */
+	if (SCTP_BASE_SYSCTL(sctp_ecn_enable) == 1) {
+		ecn->ph.param_type = htons(SCTP_ECN_CAPABLE);
+		ecn->ph.param_length = htons(sizeof(*ecn));
+		SCTP_BUF_LEN(m) += sizeof(*ecn);
+
+		prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn +
+		    sizeof(*ecn));
+	} else {
+		prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn);
+	}
+	/* And now tell the peer we do  pr-sctp */
+	prsctp->ph.param_type = htons(SCTP_PRSCTP_SUPPORTED);
+	prsctp->ph.param_length = htons(sizeof(*prsctp));
+	SCTP_BUF_LEN(m) += sizeof(*prsctp);
+	if (nat_friendly) {
+		/* Add NAT friendly parameter */
+		struct sctp_paramhdr *ph;
+
+		ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+		ph->param_type = htons(SCTP_HAS_NAT_SUPPORT);
+		ph->param_length = htons(sizeof(struct sctp_paramhdr));
+		SCTP_BUF_LEN(m) += sizeof(struct sctp_paramhdr);
+	}
+	/* And now tell the peer we do all the extensions */
+	pr_supported = (struct sctp_supported_chunk_types_param *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+	pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT);
+	num_ext = 0;
+	pr_supported->chunk_types[num_ext++] = SCTP_ASCONF;
+	pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK;
+	pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN;
+	pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED;
+	pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET;
+	if (!SCTP_BASE_SYSCTL(sctp_auth_disable))
+		pr_supported->chunk_types[num_ext++] = SCTP_AUTHENTICATION;
+	if (SCTP_BASE_SYSCTL(sctp_nr_sack_on_off))
+		pr_supported->chunk_types[num_ext++] = SCTP_NR_SELECTIVE_ACK;
+	p_len = sizeof(*pr_supported) + num_ext;
+	pr_supported->ph.param_length = htons(p_len);
+	bzero((caddr_t)pr_supported + p_len, SCTP_SIZE32(p_len) - p_len);
+	SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+
+	/* ECN nonce: And now tell the peer we support ECN nonce */
+	if (SCTP_BASE_SYSCTL(sctp_ecn_nonce)) {
+		ecn_nonce = (struct sctp_ecn_nonce_supported_param *)
+		    ((caddr_t)pr_supported + SCTP_SIZE32(p_len));
+		ecn_nonce->ph.param_type = htons(SCTP_ECN_NONCE_SUPPORTED);
+		ecn_nonce->ph.param_length = htons(sizeof(*ecn_nonce));
+		SCTP_BUF_LEN(m) += sizeof(*ecn_nonce);
+	}
+	/* add authentication parameters */
+	if (!SCTP_BASE_SYSCTL(sctp_auth_disable)) {
+		struct sctp_auth_random *randp;
+		struct sctp_auth_hmac_algo *hmacs;
+		struct sctp_auth_chunk_list *chunks;
+		uint16_t random_len;
+
+		/* generate and add RANDOM parameter */
+		random_len = SCTP_AUTH_RANDOM_SIZE_DEFAULT;
+		randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+		randp->ph.param_type = htons(SCTP_RANDOM);
+		p_len = sizeof(*randp) + random_len;
+		randp->ph.param_length = htons(p_len);
+		SCTP_READ_RANDOM(randp->random_data, random_len);
+		/* zero out any padding required */
+		bzero((caddr_t)randp + p_len, SCTP_SIZE32(p_len) - p_len);
+		SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+
+		/* add HMAC_ALGO parameter */
+		hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+		p_len = sctp_serialize_hmaclist(inp->sctp_ep.local_hmacs,
+		    (uint8_t *) hmacs->hmac_ids);
+		if (p_len > 0) {
+			p_len += sizeof(*hmacs);
+			hmacs->ph.param_type = htons(SCTP_HMAC_LIST);
+			hmacs->ph.param_length = htons(p_len);
+			/* zero out any padding required */
+			bzero((caddr_t)hmacs + p_len, SCTP_SIZE32(p_len) - p_len);
+			SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+		}
+		/* add CHUNKS parameter */
+		chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+		p_len = sctp_serialize_auth_chunks(inp->sctp_ep.local_auth_chunks,
+		    chunks->chunk_types);
+		if (p_len > 0) {
+			p_len += sizeof(*chunks);
+			chunks->ph.param_type = htons(SCTP_CHUNK_LIST);
+			chunks->ph.param_length = htons(p_len);
+			/* zero out any padding required */
+			bzero((caddr_t)chunks + p_len, SCTP_SIZE32(p_len) - p_len);
+			SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+		}
+	}
+	m_at = m;
+	/* now the addresses */
+	{
+		struct sctp_scoping scp;
+
+		/*
+		 * To optimize this we could put the scoping stuff into a
+		 * structure and remove the individual uint8's from the stc
+		 * structure. Then we could just sifa in the address within
+		 * the stc.. but for now this is a quick hack to get the
+		 * address stuff teased apart.
+		 */
+		scp.ipv4_addr_legal = stc.ipv4_addr_legal;
+		scp.ipv6_addr_legal = stc.ipv6_addr_legal;
+		scp.loopback_scope = stc.loopback_scope;
+		scp.ipv4_local_scope = stc.ipv4_scope;
+		scp.local_scope = stc.local_scope;
+		scp.site_scope = stc.site_scope;
+		m_at = sctp_add_addresses_to_i_ia(inp, &scp, m_at, cnt_inits_to);
+	}
+
+	/* tack on the operational error if present */
+	if (op_err) {
+		struct mbuf *ol;
+		int llen;
+
+		llen = 0;
+		ol = op_err;
+		while (ol) {
+			llen += SCTP_BUF_LEN(ol);
+			ol = SCTP_BUF_NEXT(ol);
+		}
+		if (llen % 4) {
+			/* must add a pad to the param */
+			uint32_t cpthis = 0;
+			int padlen;
+
+			padlen = 4 - (llen % 4);
+			m_copyback(op_err, llen, padlen, (caddr_t)&cpthis);
+		}
+		while (SCTP_BUF_NEXT(m_at) != NULL) {
+			m_at = SCTP_BUF_NEXT(m_at);
+		}
+		SCTP_BUF_NEXT(m_at) = op_err;
+		while (SCTP_BUF_NEXT(m_at) != NULL) {
+			m_at = SCTP_BUF_NEXT(m_at);
+		}
+	}
+	/* pre-calulate the size and update pkt header and chunk header */
+	p_len = 0;
+	for (m_tmp = m; m_tmp; m_tmp = SCTP_BUF_NEXT(m_tmp)) {
+		p_len += SCTP_BUF_LEN(m_tmp);
+		if (SCTP_BUF_NEXT(m_tmp) == NULL) {
+			/* m_tmp should now point to last one */
+			break;
+		}
+	}
+
+	/* Now we must build a cookie */
+	m_cookie = sctp_add_cookie(inp, init_pkt, offset, m, 0, &stc, &signature);
+	if (m_cookie == NULL) {
+		/* memory problem */
+		sctp_m_freem(m);
+		return;
+	}
+	/* Now append the cookie to the end and update the space/size */
+	SCTP_BUF_NEXT(m_tmp) = m_cookie;
+
+	for (m_tmp = m_cookie; m_tmp; m_tmp = SCTP_BUF_NEXT(m_tmp)) {
+		p_len += SCTP_BUF_LEN(m_tmp);
+		if (SCTP_BUF_NEXT(m_tmp) == NULL) {
+			/* m_tmp should now point to last one */
+			mp_last = m_tmp;
+			break;
+		}
+	}
+	/*
+	 * Place in the size, but we don't include the last pad (if any) in
+	 * the INIT-ACK.
+	 */
+	initack->ch.chunk_length = htons(p_len);
+
+	/*
+	 * Time to sign the cookie, we don't sign over the cookie signature
+	 * though thus we set trailer.
+	 */
+	(void)sctp_hmac_m(SCTP_HMAC,
+	    (uint8_t *) inp->sctp_ep.secret_key[(int)(inp->sctp_ep.current_secret_number)],
+	    SCTP_SECRET_SIZE, m_cookie, sizeof(struct sctp_paramhdr),
+	    (uint8_t *) signature, SCTP_SIGNATURE_SIZE);
+	/*
+	 * We sifa 0 here to NOT set IP_DF if its IPv4, we ignore the return
+	 * here since the timer will drive a retranmission.
+	 */
+	padval = p_len % 4;
+	if ((padval) && (mp_last)) {
+		/* see my previous comments on mp_last */
+		int ret;
+
+		ret = sctp_add_pad_tombuf(mp_last, (4 - padval));
+		if (ret) {
+			/* Houston we have a problem, no space */
+			sctp_m_freem(m);
+			return;
+		}
+		p_len += padval;
+	}
+	if (stc.loopback_scope) {
+		over_addr = &store1;
+	} else {
+		over_addr = NULL;
+	}
+
+	(void)sctp_lowlevel_chunk_output(inp, NULL, NULL, to, m, 0, NULL, 0, 0,
+	    0, NULL, 0,
+	    inp->sctp_lport, sh->src_port, init_chk->init.initiate_tag,
+	    port, SCTP_SO_NOT_LOCKED, over_addr);
+	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+}
+
+
+void
+sctp_insert_on_wheel(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    struct sctp_stream_out *strq, int holds_lock)
+{
+	if (holds_lock == 0) {
+		SCTP_TCB_SEND_LOCK(stcb);
+	}
+	if ((strq->next_spoke.tqe_next == NULL) &&
+	    (strq->next_spoke.tqe_prev == NULL)) {
+		TAILQ_INSERT_TAIL(&asoc->out_wheel, strq, next_spoke);
+	}
+	if (holds_lock == 0) {
+		SCTP_TCB_SEND_UNLOCK(stcb);
+	}
+}
+
+void
+sctp_remove_from_wheel(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    struct sctp_stream_out *strq,
+    int holds_lock)
+{
+	/* take off and then setup so we know it is not on the wheel */
+	if (holds_lock == 0) {
+		SCTP_TCB_SEND_LOCK(stcb);
+	}
+	if (TAILQ_EMPTY(&strq->outqueue)) {
+		if (asoc->last_out_stream == strq) {
+			asoc->last_out_stream = TAILQ_PREV(asoc->last_out_stream, sctpwheel_listhead, next_spoke);
+			if (asoc->last_out_stream == NULL) {
+				asoc->last_out_stream = TAILQ_LAST(&asoc->out_wheel, sctpwheel_listhead);
+			}
+			if (asoc->last_out_stream == strq) {
+				asoc->last_out_stream = NULL;
+			}
+		}
+		TAILQ_REMOVE(&asoc->out_wheel, strq, next_spoke);
+		strq->next_spoke.tqe_next = NULL;
+		strq->next_spoke.tqe_prev = NULL;
+	}
+	if (holds_lock == 0) {
+		SCTP_TCB_SEND_UNLOCK(stcb);
+	}
+}
+
+static void
+sctp_prune_prsctp(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    struct sctp_sndrcvinfo *srcv,
+    int dataout)
+{
+	int freed_spc = 0;
+	struct sctp_tmit_chunk *chk, *nchk;
+
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	if ((asoc->peer_supports_prsctp) &&
+	    (asoc->sent_queue_cnt_removeable > 0)) {
+		TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
+			/*
+			 * Look for chunks marked with the PR_SCTP flag AND
+			 * the buffer space flag. If the one being sent is
+			 * equal or greater priority then purge the old one
+			 * and free some space.
+			 */
+			if (PR_SCTP_BUF_ENABLED(chk->flags)) {
+				/*
+				 * This one is PR-SCTP AND buffer space
+				 * limited type
+				 */
+				if (chk->rec.data.timetodrop.tv_sec >= (long)srcv->sinfo_timetolive) {
+					/*
+					 * Lower numbers equates to higher
+					 * priority so if the one we are
+					 * looking at has a larger or equal
+					 * priority we want to drop the data
+					 * and NOT retransmit it.
+					 */
+					if (chk->data) {
+						/*
+						 * We release the book_size
+						 * if the mbuf is here
+						 */
+						int ret_spc;
+						int cause;
+
+						if (chk->sent > SCTP_DATAGRAM_UNSENT)
+							cause = SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT;
+						else
+							cause = SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_UNSENT;
+						ret_spc = sctp_release_pr_sctp_chunk(stcb, chk,
+						    cause,
+						    SCTP_SO_LOCKED);
+						freed_spc += ret_spc;
+						if (freed_spc >= dataout) {
+							return;
+						}
+					}	/* if chunk was present */
+				}	/* if of sufficent priority */
+			}	/* if chunk has enabled */
+		}		/* tailqforeach */
+
+		chk = TAILQ_FIRST(&asoc->send_queue);
+		while (chk) {
+			nchk = TAILQ_NEXT(chk, sctp_next);
+			/* Here we must move to the sent queue and mark */
+			if (PR_SCTP_BUF_ENABLED(chk->flags)) {
+				if (chk->rec.data.timetodrop.tv_sec >= (long)srcv->sinfo_timetolive) {
+					if (chk->data) {
+						/*
+						 * We release the book_size
+						 * if the mbuf is here
+						 */
+						int ret_spc;
+
+						ret_spc = sctp_release_pr_sctp_chunk(stcb, chk,
+						    SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_UNSENT,
+						    SCTP_SO_LOCKED);
+
+						freed_spc += ret_spc;
+						if (freed_spc >= dataout) {
+							return;
+						}
+					}	/* end if chk->data */
+				}	/* end if right class */
+			}	/* end if chk pr-sctp */
+			chk = nchk;
+		}		/* end while (chk) */
+	}			/* if enabled in asoc */
+}
+
+int
+sctp_get_frag_point(struct sctp_tcb *stcb,
+    struct sctp_association *asoc)
+{
+	int siz, ovh;
+
+	/*
+	 * For endpoints that have both v6 and v4 addresses we must reserve
+	 * room for the ipv6 header, for those that are only dealing with V4
+	 * we use a larger frag point.
+	 */
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+		ovh = SCTP_MED_OVERHEAD;
+	} else {
+		ovh = SCTP_MED_V4_OVERHEAD;
+	}
+
+	if (stcb->asoc.sctp_frag_point > asoc->smallest_mtu)
+		siz = asoc->smallest_mtu - ovh;
+	else
+		siz = (stcb->asoc.sctp_frag_point - ovh);
+	/*
+	 * if (siz > (MCLBYTES-sizeof(struct sctp_data_chunk))) {
+	 */
+	/* A data chunk MUST fit in a cluster */
+	/* siz = (MCLBYTES - sizeof(struct sctp_data_chunk)); */
+	/* } */
+
+	/* adjust for an AUTH chunk if DATA requires auth */
+	if (sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks))
+		siz -= sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
+
+	if (siz % 4) {
+		/* make it an even word boundary please */
+		siz -= (siz % 4);
+	}
+	return (siz);
+}
+
+static void
+sctp_set_prsctp_policy(struct sctp_stream_queue_pending *sp)
+{
+	sp->pr_sctp_on = 0;
+	/*
+	 * We assume that the user wants PR_SCTP_TTL if the user provides a
+	 * positive lifetime but does not specify any PR_SCTP policy. This
+	 * is a BAD assumption and causes problems at least with the
+	 * U-Vancovers MPI folks. I will change this to be no policy means
+	 * NO PR-SCTP.
+	 */
+	if (PR_SCTP_ENABLED(sp->sinfo_flags)) {
+		sp->act_flags |= PR_SCTP_POLICY(sp->sinfo_flags);
+		sp->pr_sctp_on = 1;
+	} else {
+		return;
+	}
+	switch (PR_SCTP_POLICY(sp->sinfo_flags)) {
+	case CHUNK_FLAGS_PR_SCTP_BUF:
+		/*
+		 * Time to live is a priority stored in tv_sec when doing
+		 * the buffer drop thing.
+		 */
+		sp->ts.tv_sec = sp->timetolive;
+		sp->ts.tv_usec = 0;
+		break;
+	case CHUNK_FLAGS_PR_SCTP_TTL:
+		{
+			struct timeval tv;
+
+			(void)SCTP_GETTIME_TIMEVAL(&sp->ts);
+			tv.tv_sec = sp->timetolive / 1000;
+			tv.tv_usec = (sp->timetolive * 1000) % 1000000;
+			/*
+			 * TODO sctp_constants.h needs alternative time
+			 * macros when _KERNEL is undefined.
+			 */
+			timevaladd(&sp->ts, &tv);
+		}
+		break;
+	case CHUNK_FLAGS_PR_SCTP_RTX:
+		/*
+		 * Time to live is a the number or retransmissions stored in
+		 * tv_sec.
+		 */
+		sp->ts.tv_sec = sp->timetolive;
+		sp->ts.tv_usec = 0;
+		break;
+	default:
+		SCTPDBG(SCTP_DEBUG_USRREQ1,
+		    "Unknown PR_SCTP policy %u.\n",
+		    PR_SCTP_POLICY(sp->sinfo_flags));
+		break;
+	}
+}
+
+static int
+sctp_msg_append(struct sctp_tcb *stcb,
+    struct sctp_nets *net,
+    struct mbuf *m,
+    struct sctp_sndrcvinfo *srcv, int hold_stcb_lock)
+{
+	int error = 0, holds_lock;
+	struct mbuf *at;
+	struct sctp_stream_queue_pending *sp = NULL;
+	struct sctp_stream_out *strm;
+
+	/*
+	 * Given an mbuf chain, put it into the association send queue and
+	 * place it on the wheel
+	 */
+	holds_lock = hold_stcb_lock;
+	if (srcv->sinfo_stream >= stcb->asoc.streamoutcnt) {
+		/* Invalid stream number */
+		SCTP_LTRACE_ERR_RET_PKT(m, NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+		error = EINVAL;
+		goto out_now;
+	}
+	if ((stcb->asoc.stream_locked) &&
+	    (stcb->asoc.stream_locked_on != srcv->sinfo_stream)) {
+		SCTP_LTRACE_ERR_RET_PKT(m, NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+		error = EINVAL;
+		goto out_now;
+	}
+	strm = &stcb->asoc.strmout[srcv->sinfo_stream];
+	/* Now can we send this? */
+	if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_SENT) ||
+	    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
+	    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
+	    (stcb->asoc.state & SCTP_STATE_SHUTDOWN_PENDING)) {
+		/* got data while shutting down */
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
+		error = ECONNRESET;
+		goto out_now;
+	}
+	sctp_alloc_a_strmoq(stcb, sp);
+	if (sp == NULL) {
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+		error = ENOMEM;
+		goto out_now;
+	}
+	sp->sinfo_flags = srcv->sinfo_flags;
+	sp->timetolive = srcv->sinfo_timetolive;
+	sp->ppid = srcv->sinfo_ppid;
+	sp->context = srcv->sinfo_context;
+	sp->strseq = 0;
+	if (sp->sinfo_flags & SCTP_ADDR_OVER) {
+		sp->net = net;
+		atomic_add_int(&sp->net->ref_count, 1);
+	} else {
+		sp->net = NULL;
+	}
+	(void)SCTP_GETTIME_TIMEVAL(&sp->ts);
+	sp->stream = srcv->sinfo_stream;
+	sp->msg_is_complete = 1;
+	sp->sender_all_done = 1;
+	sp->some_taken = 0;
+	sp->data = m;
+	sp->tail_mbuf = NULL;
+	sp->length = 0;
+	at = m;
+	sctp_set_prsctp_policy(sp);
+	/*
+	 * We could in theory (for sendall) sifa the length in, but we would
+	 * still have to hunt through the chain since we need to setup the
+	 * tail_mbuf
+	 */
+	while (at) {
+		if (SCTP_BUF_NEXT(at) == NULL)
+			sp->tail_mbuf = at;
+		sp->length += SCTP_BUF_LEN(at);
+		at = SCTP_BUF_NEXT(at);
+	}
+	SCTP_TCB_SEND_LOCK(stcb);
+	sctp_snd_sb_alloc(stcb, sp->length);
+	atomic_add_int(&stcb->asoc.stream_queue_cnt, 1);
+	TAILQ_INSERT_TAIL(&strm->outqueue, sp, next);
+	if ((srcv->sinfo_flags & SCTP_UNORDERED) == 0) {
+		sp->strseq = strm->next_sequence_sent;
+		strm->next_sequence_sent++;
+	}
+	if ((strm->next_spoke.tqe_next == NULL) &&
+	    (strm->next_spoke.tqe_prev == NULL)) {
+		/* Not on wheel, insert */
+		sctp_insert_on_wheel(stcb, &stcb->asoc, strm, 1);
+	}
+	m = NULL;
+	SCTP_TCB_SEND_UNLOCK(stcb);
+out_now:
+	if (m) {
+		sctp_m_freem(m);
+	}
+	return (error);
+}
+
+
+static struct mbuf *
+sctp_copy_mbufchain(struct mbuf *clonechain,
+    struct mbuf *outchain,
+    struct mbuf **endofchain,
+    int can_take_mbuf,
+    int sizeofcpy,
+    uint8_t copy_by_ref)
+{
+	struct mbuf *m;
+	struct mbuf *appendchain;
+	caddr_t cp;
+	int len;
+
+	if (endofchain == NULL) {
+		/* error */
+error_out:
+		if (outchain)
+			sctp_m_freem(outchain);
+		return (NULL);
+	}
+	if (can_take_mbuf) {
+		appendchain = clonechain;
+	} else {
+		if (!copy_by_ref &&
+		    (sizeofcpy <= (int)((((SCTP_BASE_SYSCTL(sctp_mbuf_threshold_count) - 1) * MLEN) + MHLEN)))
+		    ) {
+			/* Its not in a cluster */
+			if (*endofchain == NULL) {
+				/* lets get a mbuf cluster */
+				if (outchain == NULL) {
+					/* This is the general case */
+			new_mbuf:
+					outchain = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_HEADER);
+					if (outchain == NULL) {
+						goto error_out;
+					}
+					SCTP_BUF_LEN(outchain) = 0;
+					*endofchain = outchain;
+					/* get the prepend space */
+					SCTP_BUF_RESV_UF(outchain, (SCTP_FIRST_MBUF_RESV + 4));
+				} else {
+					/*
+					 * We really should not get a NULL
+					 * in endofchain
+					 */
+					/* find end */
+					m = outchain;
+					while (m) {
+						if (SCTP_BUF_NEXT(m) == NULL) {
+							*endofchain = m;
+							break;
+						}
+						m = SCTP_BUF_NEXT(m);
+					}
+					/* sanity */
+					if (*endofchain == NULL) {
+						/*
+						 * huh, TSNH XXX maybe we
+						 * should panic
+						 */
+						sctp_m_freem(outchain);
+						goto new_mbuf;
+					}
+				}
+				/* get the new end of length */
+				len = M_TRAILINGSPACE(*endofchain);
+			} else {
+				/* how much is left at the end? */
+				len = M_TRAILINGSPACE(*endofchain);
+			}
+			/* Find the end of the data, for appending */
+			cp = (mtod((*endofchain), caddr_t)+SCTP_BUF_LEN((*endofchain)));
+
+			/* Now lets copy it out */
+			if (len >= sizeofcpy) {
+				/* It all fits, copy it in */
+				m_copydata(clonechain, 0, sizeofcpy, cp);
+				SCTP_BUF_LEN((*endofchain)) += sizeofcpy;
+			} else {
+				/* fill up the end of the chain */
+				if (len > 0) {
+					m_copydata(clonechain, 0, len, cp);
+					SCTP_BUF_LEN((*endofchain)) += len;
+					/* now we need another one */
+					sizeofcpy -= len;
+				}
+				m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_HEADER);
+				if (m == NULL) {
+					/* We failed */
+					goto error_out;
+				}
+				SCTP_BUF_NEXT((*endofchain)) = m;
+				*endofchain = m;
+				cp = mtod((*endofchain), caddr_t);
+				m_copydata(clonechain, len, sizeofcpy, cp);
+				SCTP_BUF_LEN((*endofchain)) += sizeofcpy;
+			}
+			return (outchain);
+		} else {
+			/* copy the old fashion way */
+			appendchain = SCTP_M_COPYM(clonechain, 0, M_COPYALL, M_DONTWAIT);
+#ifdef SCTP_MBUF_LOGGING
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
+				struct mbuf *mat;
+
+				mat = appendchain;
+				while (mat) {
+					if (SCTP_BUF_IS_EXTENDED(mat)) {
+						sctp_log_mb(mat, SCTP_MBUF_ICOPY);
+					}
+					mat = SCTP_BUF_NEXT(mat);
+				}
+			}
+#endif
+		}
+	}
+	if (appendchain == NULL) {
+		/* error */
+		if (outchain)
+			sctp_m_freem(outchain);
+		return (NULL);
+	}
+	if (outchain) {
+		/* tack on to the end */
+		if (*endofchain != NULL) {
+			SCTP_BUF_NEXT(((*endofchain))) = appendchain;
+		} else {
+			m = outchain;
+			while (m) {
+				if (SCTP_BUF_NEXT(m) == NULL) {
+					SCTP_BUF_NEXT(m) = appendchain;
+					break;
+				}
+				m = SCTP_BUF_NEXT(m);
+			}
+		}
+		/*
+		 * save off the end and update the end-chain postion
+		 */
+		m = appendchain;
+		while (m) {
+			if (SCTP_BUF_NEXT(m) == NULL) {
+				*endofchain = m;
+				break;
+			}
+			m = SCTP_BUF_NEXT(m);
+		}
+		return (outchain);
+	} else {
+		/* save off the end and update the end-chain postion */
+		m = appendchain;
+		while (m) {
+			if (SCTP_BUF_NEXT(m) == NULL) {
+				*endofchain = m;
+				break;
+			}
+			m = SCTP_BUF_NEXT(m);
+		}
+		return (appendchain);
+	}
+}
+
+int
+sctp_med_chunk_output(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    int *num_out,
+    int *reason_code,
+    int control_only, int from_where,
+    struct timeval *now, int *now_filled, int frag_point, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+);
+
+static void
+sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr,
+    uint32_t val)
+{
+	struct sctp_copy_all *ca;
+	struct mbuf *m;
+	int ret = 0;
+	int added_control = 0;
+	int un_sent, do_chunk_output = 1;
+	struct sctp_association *asoc;
+
+	ca = (struct sctp_copy_all *)ptr;
+	if (ca->m == NULL) {
+		return;
+	}
+	if (ca->inp != inp) {
+		/* TSNH */
+		return;
+	}
+	if ((ca->m) && ca->sndlen) {
+		m = SCTP_M_COPYM(ca->m, 0, M_COPYALL, M_DONTWAIT);
+		if (m == NULL) {
+			/* can't copy so we are done */
+			ca->cnt_failed++;
+			return;
+		}
+#ifdef SCTP_MBUF_LOGGING
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
+			struct mbuf *mat;
+
+			mat = m;
+			while (mat) {
+				if (SCTP_BUF_IS_EXTENDED(mat)) {
+					sctp_log_mb(mat, SCTP_MBUF_ICOPY);
+				}
+				mat = SCTP_BUF_NEXT(mat);
+			}
+		}
+#endif
+	} else {
+		m = NULL;
+	}
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	if (ca->sndrcv.sinfo_flags & SCTP_ABORT) {
+		/* Abort this assoc with m as the user defined reason */
+		if (m) {
+			struct sctp_paramhdr *ph;
+
+			SCTP_BUF_PREPEND(m, sizeof(struct sctp_paramhdr), M_DONTWAIT);
+			if (m) {
+				ph = mtod(m, struct sctp_paramhdr *);
+				ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
+				ph->param_length = htons(ca->sndlen);
+			}
+			/*
+			 * We add one here to keep the assoc from
+			 * dis-appearing on us.
+			 */
+			atomic_add_int(&stcb->asoc.refcnt, 1);
+			sctp_abort_an_association(inp, stcb,
+			    SCTP_RESPONSE_TO_USER_REQ,
+			    m, SCTP_SO_NOT_LOCKED);
+			/*
+			 * sctp_abort_an_association calls sctp_free_asoc()
+			 * free association will NOT free it since we
+			 * incremented the refcnt .. we do this to prevent
+			 * it being freed and things getting tricky since we
+			 * could end up (from free_asoc) calling inpcb_free
+			 * which would get a recursive lock call to the
+			 * iterator lock.. But as a consequence of that the
+			 * stcb will return to us un-locked.. since
+			 * free_asoc returns with either no TCB or the TCB
+			 * unlocked, we must relock.. to unlock in the
+			 * iterator timer :-0
+			 */
+			SCTP_TCB_LOCK(stcb);
+			atomic_add_int(&stcb->asoc.refcnt, -1);
+			goto no_chunk_output;
+		}
+	} else {
+		if (m) {
+			ret = sctp_msg_append(stcb, stcb->asoc.primary_destination, m,
+			    &ca->sndrcv, 1);
+		}
+		asoc = &stcb->asoc;
+		if (ca->sndrcv.sinfo_flags & SCTP_EOF) {
+			/* shutdown this assoc */
+			int cnt;
+
+			cnt = sctp_is_there_unsent_data(stcb);
+
+			if (TAILQ_EMPTY(&asoc->send_queue) &&
+			    TAILQ_EMPTY(&asoc->sent_queue) &&
+			    (cnt == 0)) {
+				if (asoc->locked_on_sending) {
+					goto abort_anyway;
+				}
+				/*
+				 * there is nothing queued to send, so I'm
+				 * done...
+				 */
+				if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+				    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+				    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+					/*
+					 * only send SHUTDOWN the first time
+					 * through
+					 */
+					sctp_send_shutdown(stcb, stcb->asoc.primary_destination);
+					if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
+						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+					}
+					SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+					SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb,
+					    asoc->primary_destination);
+					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
+					    asoc->primary_destination);
+					added_control = 1;
+					do_chunk_output = 0;
+				}
+			} else {
+				/*
+				 * we still got (or just got) data to send,
+				 * so set SHUTDOWN_PENDING
+				 */
+				/*
+				 * XXX sockets draft says that SCTP_EOF
+				 * should be sent with no data.  currently,
+				 * we will allow user data to be sent first
+				 * and move to SHUTDOWN-PENDING
+				 */
+				if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+				    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+				    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+					if (asoc->locked_on_sending) {
+						/*
+						 * Locked to send out the
+						 * data
+						 */
+						struct sctp_stream_queue_pending *sp;
+
+						sp = TAILQ_LAST(&asoc->locked_on_sending->outqueue, sctp_streamhead);
+						if (sp) {
+							if ((sp->length == 0) && (sp->msg_is_complete == 0))
+								asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+						}
+					}
+					asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
+					if (TAILQ_EMPTY(&asoc->send_queue) &&
+					    TAILQ_EMPTY(&asoc->sent_queue) &&
+					    (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
+				abort_anyway:
+						atomic_add_int(&stcb->asoc.refcnt, 1);
+						sctp_abort_an_association(stcb->sctp_ep, stcb,
+						    SCTP_RESPONSE_TO_USER_REQ,
+						    NULL, SCTP_SO_NOT_LOCKED);
+						atomic_add_int(&stcb->asoc.refcnt, -1);
+						goto no_chunk_output;
+					}
+					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
+					    asoc->primary_destination);
+				}
+			}
+
+		}
+	}
+	un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
+	    (stcb->asoc.stream_queue_cnt * sizeof(struct sctp_data_chunk)));
+
+	if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY)) &&
+	    (stcb->asoc.total_flight > 0) &&
+	    (un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD))
+	    ) {
+		do_chunk_output = 0;
+	}
+	if (do_chunk_output)
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_NOT_LOCKED);
+	else if (added_control) {
+		int num_out = 0, reason = 0, now_filled = 0;
+		struct timeval now;
+		int frag_point;
+
+		frag_point = sctp_get_frag_point(stcb, &stcb->asoc);
+		(void)sctp_med_chunk_output(inp, stcb, &stcb->asoc, &num_out,
+		    &reason, 1, 1, &now, &now_filled, frag_point, SCTP_SO_NOT_LOCKED);
+	}
+no_chunk_output:
+	if (ret) {
+		ca->cnt_failed++;
+	} else {
+		ca->cnt_sent++;
+	}
+}
+
+static void
+sctp_sendall_completes(void *ptr, uint32_t val)
+{
+	struct sctp_copy_all *ca;
+
+	ca = (struct sctp_copy_all *)ptr;
+	/*
+	 * Do a notify here? Kacheong suggests that the notify be done at
+	 * the send time.. so you would push up a notification if any send
+	 * failed. Don't know if this is feasable since the only failures we
+	 * have is "memory" related and if you cannot get an mbuf to send
+	 * the data you surely can't get an mbuf to send up to notify the
+	 * user you can't send the data :->
+	 */
+
+	/* now free everything */
+	sctp_m_freem(ca->m);
+	SCTP_FREE(ca, SCTP_M_COPYAL);
+}
+
+
+#define	MC_ALIGN(m, len) do {						\
+	SCTP_BUF_RESV_UF(m, ((MCLBYTES - (len)) & ~(sizeof(long) - 1));	\
+} while (0)
+
+
+
+static struct mbuf *
+sctp_copy_out_all(struct uio *uio, int len)
+{
+	struct mbuf *ret, *at;
+	int left, willcpy, cancpy, error;
+
+	ret = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_WAIT, 1, MT_DATA);
+	if (ret == NULL) {
+		/* TSNH */
+		return (NULL);
+	}
+	left = len;
+	SCTP_BUF_LEN(ret) = 0;
+	/* save space for the data chunk header */
+	cancpy = M_TRAILINGSPACE(ret);
+	willcpy = min(cancpy, left);
+	at = ret;
+	while (left > 0) {
+		/* Align data to the end */
+		error = uiomove(mtod(at, caddr_t), willcpy, uio);
+		if (error) {
+	err_out_now:
+			sctp_m_freem(at);
+			return (NULL);
+		}
+		SCTP_BUF_LEN(at) = willcpy;
+		SCTP_BUF_NEXT_PKT(at) = SCTP_BUF_NEXT(at) = 0;
+		left -= willcpy;
+		if (left > 0) {
+			SCTP_BUF_NEXT(at) = sctp_get_mbuf_for_msg(left, 0, M_WAIT, 1, MT_DATA);
+			if (SCTP_BUF_NEXT(at) == NULL) {
+				goto err_out_now;
+			}
+			at = SCTP_BUF_NEXT(at);
+			SCTP_BUF_LEN(at) = 0;
+			cancpy = M_TRAILINGSPACE(at);
+			willcpy = min(cancpy, left);
+		}
+	}
+	return (ret);
+}
+
+static int
+sctp_sendall(struct sctp_inpcb *inp, struct uio *uio, struct mbuf *m,
+    struct sctp_sndrcvinfo *srcv)
+{
+	int ret;
+	struct sctp_copy_all *ca;
+
+	SCTP_MALLOC(ca, struct sctp_copy_all *, sizeof(struct sctp_copy_all),
+	    SCTP_M_COPYAL);
+	if (ca == NULL) {
+		sctp_m_freem(m);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+		return (ENOMEM);
+	}
+	memset(ca, 0, sizeof(struct sctp_copy_all));
+
+	ca->inp = inp;
+	memcpy(&ca->sndrcv, srcv, sizeof(struct sctp_nonpad_sndrcvinfo));
+	/*
+	 * take off the sendall flag, it would be bad if we failed to do
+	 * this :-0
+	 */
+	ca->sndrcv.sinfo_flags &= ~SCTP_SENDALL;
+	/* get length and mbuf chain */
+	if (uio) {
+		ca->sndlen = uio->uio_resid;
+		ca->m = sctp_copy_out_all(uio, ca->sndlen);
+		if (ca->m == NULL) {
+			SCTP_FREE(ca, SCTP_M_COPYAL);
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+			return (ENOMEM);
+		}
+	} else {
+		/* Gather the length of the send */
+		struct mbuf *mat;
+
+		mat = m;
+		ca->sndlen = 0;
+		while (m) {
+			ca->sndlen += SCTP_BUF_LEN(m);
+			m = SCTP_BUF_NEXT(m);
+		}
+		ca->m = mat;
+	}
+	ret = sctp_initiate_iterator(NULL, sctp_sendall_iterator, NULL,
+	    SCTP_PCB_ANY_FLAGS, SCTP_PCB_ANY_FEATURES,
+	    SCTP_ASOC_ANY_STATE,
+	    (void *)ca, 0,
+	    sctp_sendall_completes, inp, 1);
+	if (ret) {
+		SCTP_PRINTF("Failed to initiate iterator for sendall\n");
+		SCTP_FREE(ca, SCTP_M_COPYAL);
+		SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EFAULT);
+		return (EFAULT);
+	}
+	return (0);
+}
+
+
+void
+sctp_toss_old_cookies(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+	struct sctp_tmit_chunk *chk, *nchk;
+
+	chk = TAILQ_FIRST(&asoc->control_send_queue);
+	while (chk) {
+		nchk = TAILQ_NEXT(chk, sctp_next);
+		if (chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
+			TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			asoc->ctrl_queue_cnt--;
+			sctp_free_a_chunk(stcb, chk);
+		}
+		chk = nchk;
+	}
+}
+
+void
+sctp_toss_old_asconf(struct sctp_tcb *stcb)
+{
+	struct sctp_association *asoc;
+	struct sctp_tmit_chunk *chk, *chk_tmp;
+	struct sctp_asconf_chunk *acp;
+
+	asoc = &stcb->asoc;
+	for (chk = TAILQ_FIRST(&asoc->asconf_send_queue); chk != NULL;
+	    chk = chk_tmp) {
+		/* get next chk */
+		chk_tmp = TAILQ_NEXT(chk, sctp_next);
+		/* find SCTP_ASCONF chunk in queue */
+		if (chk->rec.chunk_id.id == SCTP_ASCONF) {
+			if (chk->data) {
+				acp = mtod(chk->data, struct sctp_asconf_chunk *);
+				if (compare_with_wrap(ntohl(acp->serial_number), stcb->asoc.asconf_seq_out_acked, MAX_SEQ)) {
+					/* Not Acked yet */
+					break;
+				}
+			}
+			TAILQ_REMOVE(&asoc->asconf_send_queue, chk, sctp_next);
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			asoc->ctrl_queue_cnt--;
+			sctp_free_a_chunk(stcb, chk);
+		}
+	}
+}
+
+
+static void
+sctp_clean_up_datalist(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    struct sctp_tmit_chunk **data_list,
+    int bundle_at,
+    struct sctp_nets *net)
+{
+	int i;
+	struct sctp_tmit_chunk *tp1;
+
+	for (i = 0; i < bundle_at; i++) {
+		/* off of the send queue */
+		TAILQ_REMOVE(&asoc->send_queue, data_list[i], sctp_next);
+		asoc->send_queue_cnt--;
+		if (i > 0) {
+			/*
+			 * Any chunk NOT 0 you zap the time chunk 0 gets
+			 * zapped or set based on if a RTO measurment is
+			 * needed.
+			 */
+			data_list[i]->do_rtt = 0;
+		}
+		/* record time */
+		data_list[i]->sent_rcv_time = net->last_sent_time;
+		data_list[i]->rec.data.fast_retran_tsn = data_list[i]->rec.data.TSN_seq;
+		if (data_list[i]->whoTo == NULL) {
+			data_list[i]->whoTo = net;
+			atomic_add_int(&net->ref_count, 1);
+		}
+		/* on to the sent queue */
+		tp1 = TAILQ_LAST(&asoc->sent_queue, sctpchunk_listhead);
+		if ((tp1) && (compare_with_wrap(tp1->rec.data.TSN_seq,
+		    data_list[i]->rec.data.TSN_seq, MAX_TSN))) {
+			struct sctp_tmit_chunk *tpp;
+
+			/* need to move back */
+	back_up_more:
+			tpp = TAILQ_PREV(tp1, sctpchunk_listhead, sctp_next);
+			if (tpp == NULL) {
+				TAILQ_INSERT_BEFORE(tp1, data_list[i], sctp_next);
+				goto all_done;
+			}
+			tp1 = tpp;
+			if (compare_with_wrap(tp1->rec.data.TSN_seq,
+			    data_list[i]->rec.data.TSN_seq, MAX_TSN)) {
+				goto back_up_more;
+			}
+			TAILQ_INSERT_AFTER(&asoc->sent_queue, tp1, data_list[i], sctp_next);
+		} else {
+			TAILQ_INSERT_TAIL(&asoc->sent_queue,
+			    data_list[i],
+			    sctp_next);
+		}
+all_done:
+		/* This does not lower until the cum-ack passes it */
+		asoc->sent_queue_cnt++;
+		if ((asoc->peers_rwnd <= 0) &&
+		    (asoc->total_flight == 0) &&
+		    (bundle_at == 1)) {
+			/* Mark the chunk as being a window probe */
+			SCTP_STAT_INCR(sctps_windowprobed);
+		}
+#ifdef SCTP_AUDITING_ENABLED
+		sctp_audit_log(0xC2, 3);
+#endif
+		data_list[i]->sent = SCTP_DATAGRAM_SENT;
+		data_list[i]->snd_count = 1;
+		data_list[i]->rec.data.chunk_was_revoked = 0;
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
+			sctp_misc_ints(SCTP_FLIGHT_LOG_UP,
+			    data_list[i]->whoTo->flight_size,
+			    data_list[i]->book_size,
+			    (uintptr_t) data_list[i]->whoTo,
+			    data_list[i]->rec.data.TSN_seq);
+		}
+		sctp_flight_size_increase(data_list[i]);
+		sctp_total_flight_increase(stcb, data_list[i]);
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_RWND_ENABLE) {
+			sctp_log_rwnd(SCTP_DECREASE_PEER_RWND,
+			    asoc->peers_rwnd, data_list[i]->send_size, SCTP_BASE_SYSCTL(sctp_peer_chunk_oh));
+		}
+		asoc->peers_rwnd = sctp_sbspace_sub(asoc->peers_rwnd,
+		    (uint32_t) (data_list[i]->send_size + SCTP_BASE_SYSCTL(sctp_peer_chunk_oh)));
+		if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+			/* SWS sender side engages */
+			asoc->peers_rwnd = 0;
+		}
+	}
+}
+
+static void
+sctp_clean_up_ctl(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+	struct sctp_tmit_chunk *chk, *nchk;
+
+	for (chk = TAILQ_FIRST(&asoc->control_send_queue);
+	    chk; chk = nchk) {
+		nchk = TAILQ_NEXT(chk, sctp_next);
+		if ((chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) ||
+		    (chk->rec.chunk_id.id == SCTP_NR_SELECTIVE_ACK) ||	/* EY */
+		    (chk->rec.chunk_id.id == SCTP_HEARTBEAT_REQUEST) ||
+		    (chk->rec.chunk_id.id == SCTP_HEARTBEAT_ACK) ||
+		    (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN) ||
+		    (chk->rec.chunk_id.id == SCTP_SHUTDOWN) ||
+		    (chk->rec.chunk_id.id == SCTP_SHUTDOWN_ACK) ||
+		    (chk->rec.chunk_id.id == SCTP_OPERATION_ERROR) ||
+		    (chk->rec.chunk_id.id == SCTP_PACKET_DROPPED) ||
+		    (chk->rec.chunk_id.id == SCTP_COOKIE_ACK) ||
+		    (chk->rec.chunk_id.id == SCTP_ECN_CWR) ||
+		    (chk->rec.chunk_id.id == SCTP_ASCONF_ACK)) {
+			/* Stray chunks must be cleaned up */
+	clean_up_anyway:
+			TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			asoc->ctrl_queue_cnt--;
+			if (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN)
+				asoc->fwd_tsn_cnt--;
+			sctp_free_a_chunk(stcb, chk);
+		} else if (chk->rec.chunk_id.id == SCTP_STREAM_RESET) {
+			/* special handling, we must look into the param */
+			if (chk != asoc->str_reset) {
+				goto clean_up_anyway;
+			}
+		}
+	}
+}
+
+
+static int
+sctp_can_we_split_this(struct sctp_tcb *stcb,
+    uint32_t length,
+    uint32_t goal_mtu, uint32_t frag_point, int eeor_on)
+{
+	/*
+	 * Make a decision on if I should split a msg into multiple parts.
+	 * This is only asked of incomplete messages.
+	 */
+	if (eeor_on) {
+		/*
+		 * If we are doing EEOR we need to always send it if its the
+		 * entire thing, since it might be all the guy is putting in
+		 * the hopper.
+		 */
+		if (goal_mtu >= length) {
+			/*-
+			 * If we have data outstanding,
+			 * we get another chance when the sack
+			 * arrives to transmit - wait for more data
+			 */
+			if (stcb->asoc.total_flight == 0) {
+				/*
+				 * If nothing is in flight, we zero the
+				 * packet counter.
+				 */
+				return (length);
+			}
+			return (0);
+
+		} else {
+			/* You can fill the rest */
+			return (goal_mtu);
+		}
+	}
+	/*-
+	 * For those strange folk that make the send buffer
+	 * smaller than our fragmentation point, we can't
+	 * get a full msg in so we have to allow splitting.
+	 */
+	if (SCTP_SB_LIMIT_SND(stcb->sctp_socket) < frag_point) {
+		return (length);
+	}
+	if ((length <= goal_mtu) ||
+	    ((length - goal_mtu) < SCTP_BASE_SYSCTL(sctp_min_residual))) {
+		/* Sub-optimial residual don't split in non-eeor mode. */
+		return (0);
+	}
+	/*
+	 * If we reach here length is larger than the goal_mtu. Do we wish
+	 * to split it for the sake of packet putting together?
+	 */
+	if (goal_mtu >= min(SCTP_BASE_SYSCTL(sctp_min_split_point), frag_point)) {
+		/* Its ok to split it */
+		return (min(goal_mtu, frag_point));
+	}
+	/* Nope, can't split */
+	return (0);
+
+}
+
+static uint32_t
+sctp_move_to_outqueue(struct sctp_tcb *stcb,
+    struct sctp_stream_out *strq,
+    uint32_t goal_mtu,
+    uint32_t frag_point,
+    int *locked,
+    int *giveup,
+    int eeor_mode,
+    int *bail)
+{
+	/* Move from the stream to the send_queue keeping track of the total */
+	struct sctp_association *asoc;
+	struct sctp_stream_queue_pending *sp;
+	struct sctp_tmit_chunk *chk;
+	struct sctp_data_chunk *dchkh;
+	uint32_t to_move, length;
+	uint8_t rcv_flags = 0;
+	uint8_t some_taken;
+	uint8_t send_lock_up = 0;
+
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	asoc = &stcb->asoc;
+one_more_time:
+	/* sa_ignore FREED_MEMORY */
+	sp = TAILQ_FIRST(&strq->outqueue);
+	if (sp == NULL) {
+		*locked = 0;
+		if (send_lock_up == 0) {
+			SCTP_TCB_SEND_LOCK(stcb);
+			send_lock_up = 1;
+		}
+		sp = TAILQ_FIRST(&strq->outqueue);
+		if (sp) {
+			goto one_more_time;
+		}
+		if (strq->last_msg_incomplete) {
+			SCTP_PRINTF("Huh? Stream:%d lm_in_c=%d but queue is NULL\n",
+			    strq->stream_no,
+			    strq->last_msg_incomplete);
+			strq->last_msg_incomplete = 0;
+		}
+		to_move = 0;
+		if (send_lock_up) {
+			SCTP_TCB_SEND_UNLOCK(stcb);
+			send_lock_up = 0;
+		}
+		goto out_of;
+	}
+	if ((sp->msg_is_complete) && (sp->length == 0)) {
+		if (sp->sender_all_done) {
+			/*
+			 * We are doing differed cleanup. Last time through
+			 * when we took all the data the sender_all_done was
+			 * not set.
+			 */
+			if ((sp->put_last_out == 0) && (sp->discard_rest == 0)) {
+				SCTP_PRINTF("Gak, put out entire msg with NO end!-1\n");
+				SCTP_PRINTF("sender_done:%d len:%d msg_comp:%d put_last_out:%d send_lock:%d\n",
+				    sp->sender_all_done,
+				    sp->length,
+				    sp->msg_is_complete,
+				    sp->put_last_out,
+				    send_lock_up);
+			}
+			if ((TAILQ_NEXT(sp, next) == NULL) && (send_lock_up == 0)) {
+				SCTP_TCB_SEND_LOCK(stcb);
+				send_lock_up = 1;
+			}
+			atomic_subtract_int(&asoc->stream_queue_cnt, 1);
+			TAILQ_REMOVE(&strq->outqueue, sp, next);
+			if (sp->net) {
+				sctp_free_remote_addr(sp->net);
+				sp->net = NULL;
+			}
+			if (sp->data) {
+				sctp_m_freem(sp->data);
+				sp->data = NULL;
+			}
+			sctp_free_a_strmoq(stcb, sp);
+			/* we can't be locked to it */
+			*locked = 0;
+			stcb->asoc.locked_on_sending = NULL;
+			if (send_lock_up) {
+				SCTP_TCB_SEND_UNLOCK(stcb);
+				send_lock_up = 0;
+			}
+			/* back to get the next msg */
+			goto one_more_time;
+		} else {
+			/*
+			 * sender just finished this but still holds a
+			 * reference
+			 */
+			*locked = 1;
+			*giveup = 1;
+			to_move = 0;
+			goto out_of;
+		}
+	} else {
+		/* is there some to get */
+		if (sp->length == 0) {
+			/* no */
+			*locked = 1;
+			*giveup = 1;
+			to_move = 0;
+			goto out_of;
+		} else if (sp->discard_rest) {
+			if (send_lock_up == 0) {
+				SCTP_TCB_SEND_LOCK(stcb);
+				send_lock_up = 1;
+			}
+			/* Whack down the size */
+			atomic_subtract_int(&stcb->asoc.total_output_queue_size, sp->length);
+			if ((stcb->sctp_socket != NULL) && \
+			    ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+			    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) {
+				atomic_subtract_int(&stcb->sctp_socket->so_snd.sb_cc, sp->length);
+			}
+			if (sp->data) {
+				sctp_m_freem(sp->data);
+				sp->data = NULL;
+				sp->tail_mbuf = NULL;
+			}
+			sp->length = 0;
+			sp->some_taken = 1;
+			*locked = 1;
+			*giveup = 1;
+			to_move = 0;
+			goto out_of;
+		}
+	}
+	some_taken = sp->some_taken;
+	if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+		sp->msg_is_complete = 1;
+	}
+re_look:
+	length = sp->length;
+	if (sp->msg_is_complete) {
+		/* The message is complete */
+		to_move = min(length, frag_point);
+		if (to_move == length) {
+			/* All of it fits in the MTU */
+			if (sp->some_taken) {
+				rcv_flags |= SCTP_DATA_LAST_FRAG;
+				sp->put_last_out = 1;
+			} else {
+				rcv_flags |= SCTP_DATA_NOT_FRAG;
+				sp->put_last_out = 1;
+			}
+		} else {
+			/* Not all of it fits, we fragment */
+			if (sp->some_taken == 0) {
+				rcv_flags |= SCTP_DATA_FIRST_FRAG;
+			}
+			sp->some_taken = 1;
+		}
+	} else {
+		to_move = sctp_can_we_split_this(stcb, length, goal_mtu, frag_point, eeor_mode);
+		if (to_move) {
+			/*-
+			 * We use a snapshot of length in case it
+			 * is expanding during the compare.
+			 */
+			uint32_t llen;
+
+			llen = length;
+			if (to_move >= llen) {
+				to_move = llen;
+				if (send_lock_up == 0) {
+					/*-
+					 * We are taking all of an incomplete msg
+					 * thus we need a send lock.
+					 */
+					SCTP_TCB_SEND_LOCK(stcb);
+					send_lock_up = 1;
+					if (sp->msg_is_complete) {
+						/*
+						 * the sender finished the
+						 * msg
+						 */
+						goto re_look;
+					}
+				}
+			}
+			if (sp->some_taken == 0) {
+				rcv_flags |= SCTP_DATA_FIRST_FRAG;
+				sp->some_taken = 1;
+			}
+		} else {
+			/* Nothing to take. */
+			if (sp->some_taken) {
+				*locked = 1;
+			}
+			*giveup = 1;
+			to_move = 0;
+			goto out_of;
+		}
+	}
+
+	/* If we reach here, we can copy out a chunk */
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		/* No chunk memory */
+		*giveup = 1;
+		to_move = 0;
+		goto out_of;
+	}
+	/*
+	 * Setup for unordered if needed by looking at the user sent info
+	 * flags.
+	 */
+	if (sp->sinfo_flags & SCTP_UNORDERED) {
+		rcv_flags |= SCTP_DATA_UNORDERED;
+	}
+	if ((SCTP_BASE_SYSCTL(sctp_enable_sack_immediately) && ((sp->sinfo_flags & SCTP_EOF) == SCTP_EOF)) ||
+	    ((sp->sinfo_flags & SCTP_SACK_IMMEDIATELY) == SCTP_SACK_IMMEDIATELY)) {
+		rcv_flags |= SCTP_DATA_SACK_IMMEDIATELY;
+	}
+	/* clear out the chunk before setting up */
+	memset(chk, 0, sizeof(*chk));
+	chk->rec.data.rcv_flags = rcv_flags;
+
+	if (to_move >= length) {
+		/* we think we can steal the whole thing */
+		if ((sp->sender_all_done == 0) && (send_lock_up == 0)) {
+			SCTP_TCB_SEND_LOCK(stcb);
+			send_lock_up = 1;
+		}
+		if (to_move < sp->length) {
+			/* bail, it changed */
+			goto dont_do_it;
+		}
+		chk->data = sp->data;
+		chk->last_mbuf = sp->tail_mbuf;
+		/* register the stealing */
+		sp->data = sp->tail_mbuf = NULL;
+	} else {
+		struct mbuf *m;
+
+dont_do_it:
+		chk->data = SCTP_M_COPYM(sp->data, 0, to_move, M_DONTWAIT);
+		chk->last_mbuf = NULL;
+		if (chk->data == NULL) {
+			sp->some_taken = some_taken;
+			sctp_free_a_chunk(stcb, chk);
+			*bail = 1;
+			to_move = 0;
+			goto out_of;
+		}
+#ifdef SCTP_MBUF_LOGGING
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
+			struct mbuf *mat;
+
+			mat = chk->data;
+			while (mat) {
+				if (SCTP_BUF_IS_EXTENDED(mat)) {
+					sctp_log_mb(mat, SCTP_MBUF_ICOPY);
+				}
+				mat = SCTP_BUF_NEXT(mat);
+			}
+		}
+#endif
+		/* Pull off the data */
+		m_adj(sp->data, to_move);
+		/* Now lets work our way down and compact it */
+		m = sp->data;
+		while (m && (SCTP_BUF_LEN(m) == 0)) {
+			sp->data = SCTP_BUF_NEXT(m);
+			SCTP_BUF_NEXT(m) = NULL;
+			if (sp->tail_mbuf == m) {
+				/*-
+				 * Freeing tail? TSNH since
+				 * we supposedly were taking less
+				 * than the sp->length.
+				 */
+#ifdef INVARIANTS
+				panic("Huh, freing tail? - TSNH");
+#else
+				SCTP_PRINTF("Huh, freeing tail? - TSNH\n");
+				sp->tail_mbuf = sp->data = NULL;
+				sp->length = 0;
+#endif
+
+			}
+			sctp_m_free(m);
+			m = sp->data;
+		}
+	}
+	if (SCTP_BUF_IS_EXTENDED(chk->data)) {
+		chk->copy_by_ref = 1;
+	} else {
+		chk->copy_by_ref = 0;
+	}
+	/*
+	 * get last_mbuf and counts of mb useage This is ugly but hopefully
+	 * its only one mbuf.
+	 */
+	if (chk->last_mbuf == NULL) {
+		chk->last_mbuf = chk->data;
+		while (SCTP_BUF_NEXT(chk->last_mbuf) != NULL) {
+			chk->last_mbuf = SCTP_BUF_NEXT(chk->last_mbuf);
+		}
+	}
+	if (to_move > length) {
+		/*- This should not happen either
+		 * since we always lower to_move to the size
+		 * of sp->length if its larger.
+		 */
+#ifdef INVARIANTS
+		panic("Huh, how can to_move be larger?");
+#else
+		SCTP_PRINTF("Huh, how can to_move be larger?\n");
+		sp->length = 0;
+#endif
+	} else {
+		atomic_subtract_int(&sp->length, to_move);
+	}
+	if (M_LEADINGSPACE(chk->data) < (int)sizeof(struct sctp_data_chunk)) {
+		/* Not enough room for a chunk header, get some */
+		struct mbuf *m;
+
+		m = sctp_get_mbuf_for_msg(1, 0, M_DONTWAIT, 0, MT_DATA);
+		if (m == NULL) {
+			/*
+			 * we're in trouble here. _PREPEND below will free
+			 * all the data if there is no leading space, so we
+			 * must put the data back and restore.
+			 */
+			if (send_lock_up == 0) {
+				SCTP_TCB_SEND_LOCK(stcb);
+				send_lock_up = 1;
+			}
+			if (chk->data == NULL) {
+				/* unsteal the data */
+				sp->data = chk->data;
+				sp->tail_mbuf = chk->last_mbuf;
+			} else {
+				struct mbuf *m_tmp;
+
+				/* reassemble the data */
+				m_tmp = sp->data;
+				sp->data = chk->data;
+				SCTP_BUF_NEXT(chk->last_mbuf) = m_tmp;
+			}
+			sp->some_taken = some_taken;
+			atomic_add_int(&sp->length, to_move);
+			chk->data = NULL;
+			*bail = 1;
+			sctp_free_a_chunk(stcb, chk);
+			to_move = 0;
+			goto out_of;
+		} else {
+			SCTP_BUF_LEN(m) = 0;
+			SCTP_BUF_NEXT(m) = chk->data;
+			chk->data = m;
+			M_ALIGN(chk->data, 4);
+		}
+	}
+	SCTP_BUF_PREPEND(chk->data, sizeof(struct sctp_data_chunk), M_DONTWAIT);
+	if (chk->data == NULL) {
+		/* HELP, TSNH since we assured it would not above? */
+#ifdef INVARIANTS
+		panic("prepend failes HELP?");
+#else
+		SCTP_PRINTF("prepend fails HELP?\n");
+		sctp_free_a_chunk(stcb, chk);
+#endif
+		*bail = 1;
+		to_move = 0;
+		goto out_of;
+	}
+	sctp_snd_sb_alloc(stcb, sizeof(struct sctp_data_chunk));
+	chk->book_size = chk->send_size = (to_move + sizeof(struct sctp_data_chunk));
+	chk->book_size_scale = 0;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+
+	chk->flags = 0;
+	chk->asoc = &stcb->asoc;
+	chk->pad_inplace = 0;
+	chk->no_fr_allowed = 0;
+	chk->rec.data.stream_seq = sp->strseq;
+	chk->rec.data.stream_number = sp->stream;
+	chk->rec.data.payloadtype = sp->ppid;
+	chk->rec.data.context = sp->context;
+	chk->rec.data.doing_fast_retransmit = 0;
+	chk->rec.data.ect_nonce = 0;	/* ECN Nonce */
+
+	chk->rec.data.timetodrop = sp->ts;
+	chk->flags = sp->act_flags;
+
+	if (sp->net) {
+		chk->whoTo = sp->net;
+		atomic_add_int(&chk->whoTo->ref_count, 1);
+	} else
+		chk->whoTo = NULL;
+
+	if (sp->holds_key_ref) {
+		chk->auth_keyid = sp->auth_keyid;
+		sctp_auth_key_acquire(stcb, chk->auth_keyid);
+		chk->holds_key_ref = 1;
+	}
+	chk->rec.data.TSN_seq = atomic_fetchadd_int(&asoc->sending_seq, 1);
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_AT_SEND_2_OUTQ) {
+		sctp_misc_ints(SCTP_STRMOUT_LOG_SEND,
+		    (uintptr_t) stcb, sp->length,
+		    (uint32_t) ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq),
+		    chk->rec.data.TSN_seq);
+	}
+	dchkh = mtod(chk->data, struct sctp_data_chunk *);
+	/*
+	 * Put the rest of the things in place now. Size was done earlier in
+	 * previous loop prior to padding.
+	 */
+
+#ifdef SCTP_ASOCLOG_OF_TSNS
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	if (asoc->tsn_out_at >= SCTP_TSN_LOG_SIZE) {
+		asoc->tsn_out_at = 0;
+		asoc->tsn_out_wrapped = 1;
+	}
+	asoc->out_tsnlog[asoc->tsn_out_at].tsn = chk->rec.data.TSN_seq;
+	asoc->out_tsnlog[asoc->tsn_out_at].strm = chk->rec.data.stream_number;
+	asoc->out_tsnlog[asoc->tsn_out_at].seq = chk->rec.data.stream_seq;
+	asoc->out_tsnlog[asoc->tsn_out_at].sz = chk->send_size;
+	asoc->out_tsnlog[asoc->tsn_out_at].flgs = chk->rec.data.rcv_flags;
+	asoc->out_tsnlog[asoc->tsn_out_at].stcb = (void *)stcb;
+	asoc->out_tsnlog[asoc->tsn_out_at].in_pos = asoc->tsn_out_at;
+	asoc->out_tsnlog[asoc->tsn_out_at].in_out = 2;
+	asoc->tsn_out_at++;
+#endif
+
+	dchkh->ch.chunk_type = SCTP_DATA;
+	dchkh->ch.chunk_flags = chk->rec.data.rcv_flags;
+	dchkh->dp.tsn = htonl(chk->rec.data.TSN_seq);
+	dchkh->dp.stream_id = htons(strq->stream_no);
+	dchkh->dp.stream_sequence = htons(chk->rec.data.stream_seq);
+	dchkh->dp.protocol_id = chk->rec.data.payloadtype;
+	dchkh->ch.chunk_length = htons(chk->send_size);
+	/* Now advance the chk->send_size by the actual pad needed. */
+	if (chk->send_size < SCTP_SIZE32(chk->book_size)) {
+		/* need a pad */
+		struct mbuf *lm;
+		int pads;
+
+		pads = SCTP_SIZE32(chk->book_size) - chk->send_size;
+		if (sctp_pad_lastmbuf(chk->data, pads, chk->last_mbuf) == 0) {
+			chk->pad_inplace = 1;
+		}
+		if ((lm = SCTP_BUF_NEXT(chk->last_mbuf)) != NULL) {
+			/* pad added an mbuf */
+			chk->last_mbuf = lm;
+		}
+		chk->send_size += pads;
+	}
+	/* We only re-set the policy if it is on */
+	if (sp->pr_sctp_on) {
+		sctp_set_prsctp_policy(sp);
+		asoc->pr_sctp_cnt++;
+		chk->pr_sctp_on = 1;
+	} else {
+		chk->pr_sctp_on = 0;
+	}
+	if (sp->msg_is_complete && (sp->length == 0) && (sp->sender_all_done)) {
+		/* All done pull and kill the message */
+		atomic_subtract_int(&asoc->stream_queue_cnt, 1);
+		if (sp->put_last_out == 0) {
+			SCTP_PRINTF("Gak, put out entire msg with NO end!-2\n");
+			SCTP_PRINTF("sender_done:%d len:%d msg_comp:%d put_last_out:%d send_lock:%d\n",
+			    sp->sender_all_done,
+			    sp->length,
+			    sp->msg_is_complete,
+			    sp->put_last_out,
+			    send_lock_up);
+		}
+		if ((send_lock_up == 0) && (TAILQ_NEXT(sp, next) == NULL)) {
+			SCTP_TCB_SEND_LOCK(stcb);
+			send_lock_up = 1;
+		}
+		TAILQ_REMOVE(&strq->outqueue, sp, next);
+		if (sp->net) {
+			sctp_free_remote_addr(sp->net);
+			sp->net = NULL;
+		}
+		if (sp->data) {
+			sctp_m_freem(sp->data);
+			sp->data = NULL;
+		}
+		sctp_free_a_strmoq(stcb, sp);
+
+		/* we can't be locked to it */
+		*locked = 0;
+		stcb->asoc.locked_on_sending = NULL;
+	} else {
+		/* more to go, we are locked */
+		*locked = 1;
+	}
+	asoc->chunks_on_out_queue++;
+	TAILQ_INSERT_TAIL(&asoc->send_queue, chk, sctp_next);
+	asoc->send_queue_cnt++;
+out_of:
+	if (send_lock_up) {
+		SCTP_TCB_SEND_UNLOCK(stcb);
+		send_lock_up = 0;
+	}
+	return (to_move);
+}
+
+
+static struct sctp_stream_out *
+sctp_select_a_stream(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+	struct sctp_stream_out *strq;
+
+	/* Find the next stream to use */
+	if (asoc->last_out_stream == NULL) {
+		strq = TAILQ_FIRST(&asoc->out_wheel);
+	} else {
+		strq = TAILQ_NEXT(asoc->last_out_stream, next_spoke);
+		if (strq == NULL) {
+			strq = TAILQ_FIRST(&asoc->out_wheel);
+		}
+	}
+	return (strq);
+}
+
+
+static void
+sctp_fill_outqueue(struct sctp_tcb *stcb,
+    struct sctp_nets *net, int frag_point, int eeor_mode, int *quit_now)
+{
+	struct sctp_association *asoc;
+	struct sctp_stream_out *strq, *strqn;
+	int goal_mtu, moved_how_much, total_moved = 0, bail = 0;
+	int locked, giveup;
+	struct sctp_stream_queue_pending *sp;
+
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	asoc = &stcb->asoc;
+#ifdef INET6
+	if (net->ro._l_addr.sin6.sin6_family == AF_INET6) {
+		goal_mtu = net->mtu - SCTP_MIN_OVERHEAD;
+	} else {
+		/* ?? not sure what else to do */
+		goal_mtu = net->mtu - SCTP_MIN_V4_OVERHEAD;
+	}
+#else
+	goal_mtu = net->mtu - SCTP_MIN_OVERHEAD;
+#endif
+	/* Need an allowance for the data chunk header too */
+	goal_mtu -= sizeof(struct sctp_data_chunk);
+
+	/* must make even word boundary */
+	goal_mtu &= 0xfffffffc;
+	if (asoc->locked_on_sending) {
+		/* We are stuck on one stream until the message completes. */
+		strq = asoc->locked_on_sending;
+		locked = 1;
+	} else {
+		strq = sctp_select_a_stream(stcb, asoc);
+		locked = 0;
+	}
+	strqn = strq;
+	while ((goal_mtu > 0) && strq) {
+		sp = TAILQ_FIRST(&strq->outqueue);
+		if (sp == NULL) {
+			break;
+		}
+		/**
+		 * Honor the users' choice if given. If not given,
+		 * pull it only to the primary path in case of not using
+		 * CMT.
+		 */
+		if (((sp->net != NULL) &&
+		    (sp->net != net)) ||
+		    ((sp->net == NULL) &&
+		    (asoc->sctp_cmt_on_off == 0) &&
+		    (asoc->primary_destination != net))) {
+			/* Do not pull to this network */
+			if (locked) {
+				break;
+			} else {
+				strq = sctp_select_a_stream(stcb, asoc);
+				if (strq == NULL)
+					/* none left */
+					break;
+				if (strqn == strq) {
+					/* I have circled */
+					break;
+				}
+				continue;
+			}
+		}
+		giveup = 0;
+		bail = 0;
+		moved_how_much = sctp_move_to_outqueue(stcb, strq, goal_mtu, frag_point, &locked,
+		    &giveup, eeor_mode, &bail);
+		if (moved_how_much)
+			asoc->last_out_stream = strq;
+
+		if (locked) {
+			asoc->locked_on_sending = strq;
+			if ((moved_how_much == 0) || (giveup) || bail)
+				/* no more to move for now */
+				break;
+		} else {
+			asoc->locked_on_sending = NULL;
+			if (TAILQ_EMPTY(&strq->outqueue)) {
+				if (strq == strqn) {
+					/* Must move start to next one */
+					strqn = TAILQ_NEXT(strq, next_spoke);
+					if (strqn == NULL) {
+						strqn = TAILQ_FIRST(&asoc->out_wheel);
+						if (strqn == NULL) {
+							break;
+						}
+					}
+				}
+				sctp_remove_from_wheel(stcb, asoc, strq, 0);
+			}
+			if ((giveup) || bail) {
+				break;
+			}
+			strq = sctp_select_a_stream(stcb, asoc);
+			if (strq == NULL) {
+				break;
+			}
+		}
+		total_moved += moved_how_much;
+		goal_mtu -= (moved_how_much + sizeof(struct sctp_data_chunk));
+		goal_mtu &= 0xfffffffc;
+	}
+	if (bail)
+		*quit_now = 1;
+
+	if (total_moved == 0) {
+		if ((stcb->asoc.sctp_cmt_on_off == 0) &&
+		    (net == stcb->asoc.primary_destination)) {
+			/* ran dry for primary network net */
+			SCTP_STAT_INCR(sctps_primary_randry);
+		} else if (stcb->asoc.sctp_cmt_on_off == 1) {
+			/* ran dry with CMT on */
+			SCTP_STAT_INCR(sctps_cmt_randry);
+		}
+	}
+}
+
+void
+sctp_fix_ecn_echo(struct sctp_association *asoc)
+{
+	struct sctp_tmit_chunk *chk;
+
+	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
+		if (chk->rec.chunk_id.id == SCTP_ECN_ECHO) {
+			chk->sent = SCTP_DATAGRAM_UNSENT;
+		}
+	}
+}
+
+void
+sctp_move_chunks_from_net(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	struct sctp_association *asoc;
+	struct sctp_stream_out *outs;
+	struct sctp_tmit_chunk *chk;
+	struct sctp_stream_queue_pending *sp;
+
+	if (net == NULL) {
+		return;
+	}
+	asoc = &stcb->asoc;
+	TAILQ_FOREACH(outs, &asoc->out_wheel, next_spoke) {
+		TAILQ_FOREACH(sp, &outs->outqueue, next) {
+			if (sp->net == net) {
+				sctp_free_remote_addr(sp->net);
+				sp->net = NULL;
+			}
+		}
+	}
+	TAILQ_FOREACH(chk, &asoc->send_queue, sctp_next) {
+		if (chk->whoTo == net) {
+			sctp_free_remote_addr(chk->whoTo);
+			chk->whoTo = NULL;
+		}
+	}
+}
+
+int
+sctp_med_chunk_output(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    int *num_out,
+    int *reason_code,
+    int control_only, int from_where,
+    struct timeval *now, int *now_filled, int frag_point, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	/*
+	 * Ok this is the generic chunk service queue. we must do the
+	 * following: - Service the stream queue that is next, moving any
+	 * message (note I must get a complete message i.e. FIRST/MIDDLE and
+	 * LAST to the out queue in one pass) and assigning TSN's - Check to
+	 * see if the cwnd/rwnd allows any output, if so we go ahead and
+	 * fomulate and send the low level chunks. Making sure to combine
+	 * any control in the control chunk queue also.
+	 */
+	struct sctp_nets *net, *start_at, *old_start_at = NULL;
+	struct mbuf *outchain, *endoutchain;
+	struct sctp_tmit_chunk *chk, *nchk;
+
+	/* temp arrays for unlinking */
+	struct sctp_tmit_chunk *data_list[SCTP_MAX_DATA_BUNDLING];
+	int no_fragmentflg, error;
+	unsigned int max_rwnd_per_dest, max_send_per_dest;
+	int one_chunk, hbflag, skip_data_for_this_net;
+	int asconf, cookie, no_out_cnt;
+	int bundle_at, ctl_cnt, no_data_chunks, eeor_mode;
+	unsigned int mtu, r_mtu, omtu, mx_mtu, to_out;
+	int tsns_sent = 0;
+	uint32_t auth_offset = 0;
+	struct sctp_auth_chunk *auth = NULL;
+	uint16_t auth_keyid;
+	int override_ok = 1;
+	int data_auth_reqd = 0;
+
+	/*
+	 * JRS 5/14/07 - Add flag for whether a heartbeat is sent to the
+	 * destination.
+	 */
+	int pf_hbflag = 0;
+	int quit_now = 0;
+
+	*num_out = 0;
+	auth_keyid = stcb->asoc.authinfo.active_keyid;
+
+	if ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
+	    (asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED) ||
+	    (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR))) {
+		eeor_mode = 1;
+	} else {
+		eeor_mode = 0;
+	}
+	ctl_cnt = no_out_cnt = asconf = cookie = 0;
+	/*
+	 * First lets prime the pump. For each destination, if there is room
+	 * in the flight size, attempt to pull an MTU's worth out of the
+	 * stream queues into the general send_queue
+	 */
+#ifdef SCTP_AUDITING_ENABLED
+	sctp_audit_log(0xC2, 2);
+#endif
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	hbflag = 0;
+	if ((control_only) || (asoc->stream_reset_outstanding))
+		no_data_chunks = 1;
+	else
+		no_data_chunks = 0;
+
+	/* Nothing to possible to send? */
+	if (TAILQ_EMPTY(&asoc->control_send_queue) &&
+	    TAILQ_EMPTY(&asoc->asconf_send_queue) &&
+	    TAILQ_EMPTY(&asoc->send_queue) &&
+	    TAILQ_EMPTY(&asoc->out_wheel)) {
+		*reason_code = 9;
+		return (0);
+	}
+	if (asoc->peers_rwnd == 0) {
+		/* No room in peers rwnd */
+		*reason_code = 1;
+		if (asoc->total_flight > 0) {
+			/* we are allowed one chunk in flight */
+			no_data_chunks = 1;
+		}
+	}
+	max_rwnd_per_dest = ((asoc->peers_rwnd + asoc->total_flight) / asoc->numnets);
+	if (stcb->sctp_socket)
+		max_send_per_dest = SCTP_SB_LIMIT_SND(stcb->sctp_socket) / asoc->numnets;
+	else
+		max_send_per_dest = 0;
+	if ((no_data_chunks == 0) && (!TAILQ_EMPTY(&asoc->out_wheel))) {
+		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+			/*
+			 * This for loop we are in takes in each net, if
+			 * its's got space in cwnd and has data sent to it
+			 * (when CMT is off) then it calls
+			 * sctp_fill_outqueue for the net. This gets data on
+			 * the send queue for that network.
+			 *
+			 * In sctp_fill_outqueue TSN's are assigned and data is
+			 * copied out of the stream buffers. Note mostly
+			 * copy by reference (we hope).
+			 */
+			net->window_probe = 0;
+			if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ||
+			    (net->dest_state & SCTP_ADDR_UNCONFIRMED)) {
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+					sctp_log_cwnd(stcb, net, 1,
+					    SCTP_CWND_LOG_FILL_OUTQ_CALLED);
+				}
+				continue;
+			}
+			if ((asoc->sctp_cmt_on_off == 0) &&
+			    (asoc->primary_destination != net) &&
+			    (net->ref_count < 2)) {
+				/* nothing can be in queue for this guy */
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+					sctp_log_cwnd(stcb, net, 2,
+					    SCTP_CWND_LOG_FILL_OUTQ_CALLED);
+				}
+				continue;
+			}
+			if (net->flight_size >= net->cwnd) {
+				/* skip this network, no room - can't fill */
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+					sctp_log_cwnd(stcb, net, 3,
+					    SCTP_CWND_LOG_FILL_OUTQ_CALLED);
+				}
+				continue;
+			}
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+				sctp_log_cwnd(stcb, net, 4, SCTP_CWND_LOG_FILL_OUTQ_CALLED);
+			}
+			sctp_fill_outqueue(stcb, net, frag_point, eeor_mode, &quit_now);
+			if (quit_now) {
+				/* memory alloc failure */
+				no_data_chunks = 1;
+				break;
+			}
+		}
+	}
+	/* now service each destination and send out what we can for it */
+	/* Nothing to send? */
+	if (TAILQ_EMPTY(&asoc->control_send_queue) &&
+	    TAILQ_EMPTY(&asoc->asconf_send_queue) &&
+	    TAILQ_EMPTY(&asoc->send_queue)) {
+		*reason_code = 8;
+		return (0);
+	}
+	if (asoc->sctp_cmt_on_off == 1) {
+		/* get the last start point */
+		start_at = asoc->last_net_cmt_send_started;
+		if (start_at == NULL) {
+			/* null so to beginning */
+			start_at = TAILQ_FIRST(&asoc->nets);
+		} else {
+			start_at = TAILQ_NEXT(asoc->last_net_cmt_send_started, sctp_next);
+			if (start_at == NULL) {
+				start_at = TAILQ_FIRST(&asoc->nets);
+			}
+		}
+		asoc->last_net_cmt_send_started = start_at;
+	} else {
+		start_at = TAILQ_FIRST(&asoc->nets);
+	}
+	old_start_at = NULL;
+again_one_more_time:
+	for (net = start_at; net != NULL; net = TAILQ_NEXT(net, sctp_next)) {
+		/* how much can we send? */
+		/* SCTPDBG("Examine for sending net:%x\n", (uint32_t)net); */
+		if (old_start_at && (old_start_at == net)) {
+			/* through list ocmpletely. */
+			break;
+		}
+		tsns_sent = 0xa;
+		if ((asoc->sctp_cmt_on_off == 0) &&
+		    (asoc->primary_destination != net) &&
+		    (net->ref_count < 2)) {
+			/*
+			 * Ref-count of 1 so we cannot have data or control
+			 * queued to this address. Skip it (non-CMT).
+			 */
+			continue;
+		}
+		if (TAILQ_EMPTY(&asoc->control_send_queue) &&
+		    TAILQ_EMPTY(&asoc->asconf_send_queue) &&
+		    (net->flight_size >= net->cwnd)) {
+			/*
+			 * Nothing on control or asconf and flight is full,
+			 * we can skip even in the CMT case.
+			 */
+			continue;
+		}
+		ctl_cnt = bundle_at = 0;
+		endoutchain = outchain = NULL;
+		no_fragmentflg = 1;
+		one_chunk = 0;
+		if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
+			skip_data_for_this_net = 1;
+		} else {
+			skip_data_for_this_net = 0;
+		}
+		if ((net->ro.ro_rt) && (net->ro.ro_rt->rt_ifp)) {
+			/*
+			 * if we have a route and an ifp check to see if we
+			 * have room to send to this guy
+			 */
+			struct ifnet *ifp;
+
+			ifp = net->ro.ro_rt->rt_ifp;
+			if ((ifp->if_snd.ifq_len + 2) >= ifp->if_snd.ifq_maxlen) {
+				SCTP_STAT_INCR(sctps_ifnomemqueued);
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_MAXBURST_ENABLE) {
+					sctp_log_maxburst(stcb, net, ifp->if_snd.ifq_len, ifp->if_snd.ifq_maxlen, SCTP_MAX_IFP_APPLIED);
+				}
+				continue;
+			}
+		}
+		switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) {
+		case AF_INET:
+			mtu = net->mtu - (sizeof(struct ip) + sizeof(struct sctphdr));
+			break;
+#ifdef INET6
+		case AF_INET6:
+			mtu = net->mtu - (sizeof(struct ip6_hdr) + sizeof(struct sctphdr));
+			break;
+#endif
+		default:
+			/* TSNH */
+			mtu = net->mtu;
+			break;
+		}
+		mx_mtu = mtu;
+		to_out = 0;
+		if (mtu > asoc->peers_rwnd) {
+			if (asoc->total_flight > 0) {
+				/* We have a packet in flight somewhere */
+				r_mtu = asoc->peers_rwnd;
+			} else {
+				/* We are always allowed to send one MTU out */
+				one_chunk = 1;
+				r_mtu = mtu;
+			}
+		} else {
+			r_mtu = mtu;
+		}
+		/************************/
+		/* ASCONF transmission */
+		/************************/
+		/* Now first lets go through the asconf queue */
+		for (chk = TAILQ_FIRST(&asoc->asconf_send_queue);
+		    chk; chk = nchk) {
+			nchk = TAILQ_NEXT(chk, sctp_next);
+			if (chk->rec.chunk_id.id != SCTP_ASCONF) {
+				continue;
+			}
+			if (chk->whoTo != net) {
+				/*
+				 * No, not sent to the network we are
+				 * looking at
+				 */
+				break;
+			}
+			if (chk->data == NULL) {
+				break;
+			}
+			if (chk->sent != SCTP_DATAGRAM_UNSENT &&
+			    chk->sent != SCTP_DATAGRAM_RESEND) {
+				break;
+			}
+			/*
+			 * if no AUTH is yet included and this chunk
+			 * requires it, make sure to account for it.  We
+			 * don't apply the size until the AUTH chunk is
+			 * actually added below in case there is no room for
+			 * this chunk. NOTE: we overload the use of "omtu"
+			 * here
+			 */
+			if ((auth == NULL) &&
+			    sctp_auth_is_required_chunk(chk->rec.chunk_id.id,
+			    stcb->asoc.peer_auth_chunks)) {
+				omtu = sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
+			} else
+				omtu = 0;
+			/* Here we do NOT factor the r_mtu */
+			if ((chk->send_size < (int)(mtu - omtu)) ||
+			    (chk->flags & CHUNK_FLAGS_FRAGMENT_OK)) {
+				/*
+				 * We probably should glom the mbuf chain
+				 * from the chk->data for control but the
+				 * problem is it becomes yet one more level
+				 * of tracking to do if for some reason
+				 * output fails. Then I have got to
+				 * reconstruct the merged control chain.. el
+				 * yucko.. for now we take the easy way and
+				 * do the copy
+				 */
+				/*
+				 * Add an AUTH chunk, if chunk requires it
+				 * save the offset into the chain for AUTH
+				 */
+				if ((auth == NULL) &&
+				    (sctp_auth_is_required_chunk(chk->rec.chunk_id.id,
+				    stcb->asoc.peer_auth_chunks))) {
+					outchain = sctp_add_auth_chunk(outchain,
+					    &endoutchain,
+					    &auth,
+					    &auth_offset,
+					    stcb,
+					    chk->rec.chunk_id.id);
+					SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+				}
+				outchain = sctp_copy_mbufchain(chk->data, outchain, &endoutchain,
+				    (int)chk->rec.chunk_id.can_take_data,
+				    chk->send_size, chk->copy_by_ref);
+				if (outchain == NULL) {
+					*reason_code = 8;
+					SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+					return (ENOMEM);
+				}
+				SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+				/* update our MTU size */
+				if (mtu > (chk->send_size + omtu))
+					mtu -= (chk->send_size + omtu);
+				else
+					mtu = 0;
+				to_out += (chk->send_size + omtu);
+				/* Do clear IP_DF ? */
+				if (chk->flags & CHUNK_FLAGS_FRAGMENT_OK) {
+					no_fragmentflg = 0;
+				}
+				if (chk->rec.chunk_id.can_take_data)
+					chk->data = NULL;
+				/*
+				 * set hb flag since we can use these for
+				 * RTO
+				 */
+				hbflag = 1;
+				asconf = 1;
+				/*
+				 * should sysctl this: don't bundle data
+				 * with ASCONF since it requires AUTH
+				 */
+				no_data_chunks = 1;
+				chk->sent = SCTP_DATAGRAM_SENT;
+				chk->snd_count++;
+				if (mtu == 0) {
+					/*
+					 * Ok we are out of room but we can
+					 * output without effecting the
+					 * flight size since this little guy
+					 * is a control only packet.
+					 */
+					sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp, stcb, net);
+					/*
+					 * do NOT clear the asconf flag as
+					 * it is used to do appropriate
+					 * source address selection.
+					 */
+					if ((error = sctp_lowlevel_chunk_output(inp, stcb, net,
+					    (struct sockaddr *)&net->ro._l_addr,
+					    outchain, auth_offset, auth,
+					    stcb->asoc.authinfo.active_keyid,
+					    no_fragmentflg, 0, NULL, asconf,
+					    inp->sctp_lport, stcb->rport,
+					    htonl(stcb->asoc.peer_vtag),
+					    net->port, so_locked, NULL))) {
+						if (error == ENOBUFS) {
+							asoc->ifp_had_enobuf = 1;
+							SCTP_STAT_INCR(sctps_lowlevelerr);
+						}
+						if (from_where == 0) {
+							SCTP_STAT_INCR(sctps_lowlevelerrusr);
+						}
+						if (*now_filled == 0) {
+							(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
+							*now_filled = 1;
+							*now = net->last_sent_time;
+						} else {
+							net->last_sent_time = *now;
+						}
+						hbflag = 0;
+						/* error, could not output */
+						if (error == EHOSTUNREACH) {
+							/*
+							 * Destination went
+							 * unreachable
+							 * during this send
+							 */
+							sctp_move_chunks_from_net(stcb, net);
+						}
+						*reason_code = 7;
+						continue;
+					} else
+						asoc->ifp_had_enobuf = 0;
+					if (*now_filled == 0) {
+						(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
+						*now_filled = 1;
+						*now = net->last_sent_time;
+					} else {
+						net->last_sent_time = *now;
+					}
+					hbflag = 0;
+					/*
+					 * increase the number we sent, if a
+					 * cookie is sent we don't tell them
+					 * any was sent out.
+					 */
+					outchain = endoutchain = NULL;
+					auth = NULL;
+					auth_offset = 0;
+					if (!no_out_cnt)
+						*num_out += ctl_cnt;
+					/* recalc a clean slate and setup */
+					if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+						mtu = (net->mtu - SCTP_MIN_OVERHEAD);
+					} else {
+						mtu = (net->mtu - SCTP_MIN_V4_OVERHEAD);
+					}
+					to_out = 0;
+					no_fragmentflg = 1;
+				}
+			}
+		}
+		/************************/
+		/* Control transmission */
+		/************************/
+		/* Now first lets go through the control queue */
+		for (chk = TAILQ_FIRST(&asoc->control_send_queue);
+		    chk; chk = nchk) {
+			nchk = TAILQ_NEXT(chk, sctp_next);
+			if (chk->whoTo != net) {
+				/*
+				 * No, not sent to the network we are
+				 * looking at
+				 */
+				continue;
+			}
+			if (chk->data == NULL) {
+				continue;
+			}
+			if (chk->sent != SCTP_DATAGRAM_UNSENT) {
+				/*
+				 * It must be unsent. Cookies and ASCONF's
+				 * hang around but there timers will force
+				 * when marked for resend.
+				 */
+				continue;
+			}
+			/*
+			 * if no AUTH is yet included and this chunk
+			 * requires it, make sure to account for it.  We
+			 * don't apply the size until the AUTH chunk is
+			 * actually added below in case there is no room for
+			 * this chunk. NOTE: we overload the use of "omtu"
+			 * here
+			 */
+			if ((auth == NULL) &&
+			    sctp_auth_is_required_chunk(chk->rec.chunk_id.id,
+			    stcb->asoc.peer_auth_chunks)) {
+				omtu = sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
+			} else
+				omtu = 0;
+			/* Here we do NOT factor the r_mtu */
+			if ((chk->send_size <= (int)(mtu - omtu)) ||
+			    (chk->flags & CHUNK_FLAGS_FRAGMENT_OK)) {
+				/*
+				 * We probably should glom the mbuf chain
+				 * from the chk->data for control but the
+				 * problem is it becomes yet one more level
+				 * of tracking to do if for some reason
+				 * output fails. Then I have got to
+				 * reconstruct the merged control chain.. el
+				 * yucko.. for now we take the easy way and
+				 * do the copy
+				 */
+				/*
+				 * Add an AUTH chunk, if chunk requires it
+				 * save the offset into the chain for AUTH
+				 */
+				if ((auth == NULL) &&
+				    (sctp_auth_is_required_chunk(chk->rec.chunk_id.id,
+				    stcb->asoc.peer_auth_chunks))) {
+					outchain = sctp_add_auth_chunk(outchain,
+					    &endoutchain,
+					    &auth,
+					    &auth_offset,
+					    stcb,
+					    chk->rec.chunk_id.id);
+					SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+				}
+				outchain = sctp_copy_mbufchain(chk->data, outchain, &endoutchain,
+				    (int)chk->rec.chunk_id.can_take_data,
+				    chk->send_size, chk->copy_by_ref);
+				if (outchain == NULL) {
+					*reason_code = 8;
+					SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+					return (ENOMEM);
+				}
+				SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+				/* update our MTU size */
+				if (mtu > (chk->send_size + omtu))
+					mtu -= (chk->send_size + omtu);
+				else
+					mtu = 0;
+				to_out += (chk->send_size + omtu);
+				/* Do clear IP_DF ? */
+				if (chk->flags & CHUNK_FLAGS_FRAGMENT_OK) {
+					no_fragmentflg = 0;
+				}
+				if (chk->rec.chunk_id.can_take_data)
+					chk->data = NULL;
+				/* Mark things to be removed, if needed */
+				if ((chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) ||
+				    (chk->rec.chunk_id.id == SCTP_NR_SELECTIVE_ACK) ||	/* EY */
+				    (chk->rec.chunk_id.id == SCTP_HEARTBEAT_REQUEST) ||
+				    (chk->rec.chunk_id.id == SCTP_HEARTBEAT_ACK) ||
+				    (chk->rec.chunk_id.id == SCTP_SHUTDOWN) ||
+				    (chk->rec.chunk_id.id == SCTP_SHUTDOWN_ACK) ||
+				    (chk->rec.chunk_id.id == SCTP_OPERATION_ERROR) ||
+				    (chk->rec.chunk_id.id == SCTP_COOKIE_ACK) ||
+				    (chk->rec.chunk_id.id == SCTP_ECN_CWR) ||
+				    (chk->rec.chunk_id.id == SCTP_PACKET_DROPPED) ||
+				    (chk->rec.chunk_id.id == SCTP_ASCONF_ACK)) {
+
+					if (chk->rec.chunk_id.id == SCTP_HEARTBEAT_REQUEST) {
+						hbflag = 1;
+						/*
+						 * JRS 5/14/07 - Set the
+						 * flag to say a heartbeat
+						 * is being sent.
+						 */
+						pf_hbflag = 1;
+					}
+					/* remove these chunks at the end */
+					if ((chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) ||
+					    (chk->rec.chunk_id.id == SCTP_NR_SELECTIVE_ACK)) {
+						/* turn off the timer */
+						if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
+							sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
+							    inp, stcb, net, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_1);
+						}
+					}
+					ctl_cnt++;
+				} else {
+					/*
+					 * Other chunks, since they have
+					 * timers running (i.e. COOKIE) we
+					 * just "trust" that it gets sent or
+					 * retransmitted.
+					 */
+					ctl_cnt++;
+					if (chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
+						cookie = 1;
+						no_out_cnt = 1;
+					}
+					chk->sent = SCTP_DATAGRAM_SENT;
+					chk->snd_count++;
+				}
+				if (mtu == 0) {
+					/*
+					 * Ok we are out of room but we can
+					 * output without effecting the
+					 * flight size since this little guy
+					 * is a control only packet.
+					 */
+					if (asconf) {
+						sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp, stcb, net);
+						/*
+						 * do NOT clear the asconf
+						 * flag as it is used to do
+						 * appropriate source
+						 * address selection.
+						 */
+					}
+					if (cookie) {
+						sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, inp, stcb, net);
+						cookie = 0;
+					}
+					if ((error = sctp_lowlevel_chunk_output(inp, stcb, net,
+					    (struct sockaddr *)&net->ro._l_addr,
+					    outchain,
+					    auth_offset, auth,
+					    stcb->asoc.authinfo.active_keyid,
+					    no_fragmentflg, 0, NULL, asconf,
+					    inp->sctp_lport, stcb->rport,
+					    htonl(stcb->asoc.peer_vtag),
+					    net->port, so_locked, NULL))) {
+						if (error == ENOBUFS) {
+							asoc->ifp_had_enobuf = 1;
+							SCTP_STAT_INCR(sctps_lowlevelerr);
+						}
+						if (from_where == 0) {
+							SCTP_STAT_INCR(sctps_lowlevelerrusr);
+						}
+						/* error, could not output */
+						if (hbflag) {
+							if (*now_filled == 0) {
+								(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
+								*now_filled = 1;
+								*now = net->last_sent_time;
+							} else {
+								net->last_sent_time = *now;
+							}
+							hbflag = 0;
+						}
+						if (error == EHOSTUNREACH) {
+							/*
+							 * Destination went
+							 * unreachable
+							 * during this send
+							 */
+							sctp_move_chunks_from_net(stcb, net);
+						}
+						*reason_code = 7;
+						continue;
+					} else
+						asoc->ifp_had_enobuf = 0;
+					/* Only HB or ASCONF advances time */
+					if (hbflag) {
+						if (*now_filled == 0) {
+							(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
+							*now_filled = 1;
+							*now = net->last_sent_time;
+						} else {
+							net->last_sent_time = *now;
+						}
+						hbflag = 0;
+					}
+					/*
+					 * increase the number we sent, if a
+					 * cookie is sent we don't tell them
+					 * any was sent out.
+					 */
+					outchain = endoutchain = NULL;
+					auth = NULL;
+					auth_offset = 0;
+					if (!no_out_cnt)
+						*num_out += ctl_cnt;
+					/* recalc a clean slate and setup */
+					if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+						mtu = (net->mtu - SCTP_MIN_OVERHEAD);
+					} else {
+						mtu = (net->mtu - SCTP_MIN_V4_OVERHEAD);
+					}
+					to_out = 0;
+					no_fragmentflg = 1;
+				}
+			}
+		}
+		/* JRI: if dest is in PF state, do not send data to it */
+		if ((asoc->sctp_cmt_on_off == 1) &&
+		    (asoc->sctp_cmt_pf > 0) &&
+		    (net->dest_state & SCTP_ADDR_PF)) {
+			goto no_data_fill;
+		}
+		if (net->flight_size >= net->cwnd) {
+			goto no_data_fill;
+		}
+		if ((asoc->sctp_cmt_on_off == 1) &&
+		    (SCTP_BASE_SYSCTL(sctp_buffer_splitting) & SCTP_RECV_BUFFER_SPLITTING) &&
+		    (net->flight_size > max_rwnd_per_dest)) {
+			goto no_data_fill;
+		}
+		/*
+		 * We need a specific accounting for the usage of the send
+		 * buffer. We also need to check the number of messages per
+		 * net. For now, this is better than nothing and it disabled
+		 * by default...
+		 */
+		if ((asoc->sctp_cmt_on_off == 1) &&
+		    (SCTP_BASE_SYSCTL(sctp_buffer_splitting) & SCTP_SEND_BUFFER_SPLITTING) &&
+		    (max_send_per_dest > 0) &&
+		    (net->flight_size > max_send_per_dest)) {
+			goto no_data_fill;
+		}
+		/*********************/
+		/* Data transmission */
+		/*********************/
+		/*
+		 * if AUTH for DATA is required and no AUTH has been added
+		 * yet, account for this in the mtu now... if no data can be
+		 * bundled, this adjustment won't matter anyways since the
+		 * packet will be going out...
+		 */
+		data_auth_reqd = sctp_auth_is_required_chunk(SCTP_DATA,
+		    stcb->asoc.peer_auth_chunks);
+		if (data_auth_reqd && (auth == NULL)) {
+			mtu -= sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
+		}
+		/* now lets add any data within the MTU constraints */
+		switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) {
+		case AF_INET:
+			if (net->mtu > (sizeof(struct ip) + sizeof(struct sctphdr)))
+				omtu = net->mtu - (sizeof(struct ip) + sizeof(struct sctphdr));
+			else
+				omtu = 0;
+			break;
+#ifdef INET6
+		case AF_INET6:
+			if (net->mtu > (sizeof(struct ip6_hdr) + sizeof(struct sctphdr)))
+				omtu = net->mtu - (sizeof(struct ip6_hdr) + sizeof(struct sctphdr));
+			else
+				omtu = 0;
+			break;
+#endif
+		default:
+			/* TSNH */
+			omtu = 0;
+			break;
+		}
+		if ((((asoc->state & SCTP_STATE_OPEN) == SCTP_STATE_OPEN) &&
+		    (skip_data_for_this_net == 0)) ||
+		    (cookie)) {
+			for (chk = TAILQ_FIRST(&asoc->send_queue); chk; chk = nchk) {
+				if (no_data_chunks) {
+					/* let only control go out */
+					*reason_code = 1;
+					break;
+				}
+				if (net->flight_size >= net->cwnd) {
+					/* skip this net, no room for data */
+					*reason_code = 2;
+					break;
+				}
+				nchk = TAILQ_NEXT(chk, sctp_next);
+				if ((chk->whoTo != NULL) &&
+				    (chk->whoTo != net)) {
+					/* Don't send the chunk on this net */
+					continue;
+				}
+				if ((chk->send_size > omtu) && ((chk->flags & CHUNK_FLAGS_FRAGMENT_OK) == 0)) {
+					/*-
+					 * strange, we have a chunk that is
+					 * to big for its destination and
+					 * yet no fragment ok flag.
+					 * Something went wrong when the
+					 * PMTU changed...we did not mark
+					 * this chunk for some reason?? I
+					 * will fix it here by letting IP
+					 * fragment it for now and printing
+					 * a warning. This really should not
+					 * happen ...
+					 */
+					SCTP_PRINTF("Warning chunk of %d bytes > mtu:%d and yet PMTU disc missed\n",
+					    chk->send_size, mtu);
+					chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
+				}
+				if (SCTP_BASE_SYSCTL(sctp_enable_sack_immediately) &&
+				    ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) == SCTP_STATE_SHUTDOWN_PENDING)) {
+					struct sctp_data_chunk *dchkh;
+
+					dchkh = mtod(chk->data, struct sctp_data_chunk *);
+					dchkh->ch.chunk_flags |= SCTP_DATA_SACK_IMMEDIATELY;
+				}
+				if (((chk->send_size <= mtu) && (chk->send_size <= r_mtu)) ||
+				    ((chk->flags & CHUNK_FLAGS_FRAGMENT_OK) && (chk->send_size <= asoc->peers_rwnd))) {
+					/* ok we will add this one */
+
+					/*
+					 * Add an AUTH chunk, if chunk
+					 * requires it, save the offset into
+					 * the chain for AUTH
+					 */
+					if (data_auth_reqd) {
+						if (auth == NULL) {
+							outchain = sctp_add_auth_chunk(outchain,
+							    &endoutchain,
+							    &auth,
+							    &auth_offset,
+							    stcb,
+							    SCTP_DATA);
+							auth_keyid = chk->auth_keyid;
+							override_ok = 0;
+							SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+						} else if (override_ok) {
+							/*
+							 * use this data's
+							 * keyid
+							 */
+							auth_keyid = chk->auth_keyid;
+							override_ok = 0;
+						} else if (auth_keyid != chk->auth_keyid) {
+							/*
+							 * different keyid,
+							 * so done bundling
+							 */
+							break;
+						}
+					}
+					outchain = sctp_copy_mbufchain(chk->data, outchain, &endoutchain, 0,
+					    chk->send_size, chk->copy_by_ref);
+					if (outchain == NULL) {
+						SCTPDBG(SCTP_DEBUG_OUTPUT3, "No memory?\n");
+						if (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+							sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
+						}
+						*reason_code = 3;
+						SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+						return (ENOMEM);
+					}
+					/* upate our MTU size */
+					/* Do clear IP_DF ? */
+					if (chk->flags & CHUNK_FLAGS_FRAGMENT_OK) {
+						no_fragmentflg = 0;
+					}
+					/* unsigned subtraction of mtu */
+					if (mtu > chk->send_size)
+						mtu -= chk->send_size;
+					else
+						mtu = 0;
+					/* unsigned subtraction of r_mtu */
+					if (r_mtu > chk->send_size)
+						r_mtu -= chk->send_size;
+					else
+						r_mtu = 0;
+
+					to_out += chk->send_size;
+					if ((to_out > mx_mtu) && no_fragmentflg) {
+#ifdef INVARIANTS
+						panic("Exceeding mtu of %d out size is %d", mx_mtu, to_out);
+#else
+						SCTP_PRINTF("Exceeding mtu of %d out size is %d\n",
+						    mx_mtu, to_out);
+#endif
+					}
+					chk->window_probe = 0;
+					data_list[bundle_at++] = chk;
+					if (bundle_at >= SCTP_MAX_DATA_BUNDLING) {
+						mtu = 0;
+						break;
+					}
+					if (chk->sent == SCTP_DATAGRAM_UNSENT) {
+						if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0) {
+							SCTP_STAT_INCR_COUNTER64(sctps_outorderchunks);
+						} else {
+							SCTP_STAT_INCR_COUNTER64(sctps_outunorderchunks);
+						}
+						if (((chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) == SCTP_DATA_LAST_FRAG) &&
+						    ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == 0))
+							/*
+							 * Count number of
+							 * user msg's that
+							 * were fragmented
+							 * we do this by
+							 * counting when we
+							 * see a LAST
+							 * fragment only.
+							 */
+							SCTP_STAT_INCR_COUNTER64(sctps_fragusrmsgs);
+					}
+					if ((mtu == 0) || (r_mtu == 0) || (one_chunk)) {
+						if ((one_chunk) && (stcb->asoc.total_flight == 0)) {
+							data_list[0]->window_probe = 1;
+							net->window_probe = 1;
+						}
+						break;
+					}
+				} else {
+					/*
+					 * Must be sent in order of the
+					 * TSN's (on a network)
+					 */
+					break;
+				}
+			}	/* for (chunk gather loop for this net) */
+		}		/* if asoc.state OPEN */
+no_data_fill:
+		/* Is there something to send for this destination? */
+		if (outchain) {
+			/* We may need to start a control timer or two */
+			if (asconf) {
+				sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp,
+				    stcb, net);
+				/*
+				 * do NOT clear the asconf flag as it is
+				 * used to do appropriate source address
+				 * selection.
+				 */
+			}
+			if (cookie) {
+				sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, inp, stcb, net);
+				cookie = 0;
+			}
+			/* must start a send timer if data is being sent */
+			if (bundle_at && (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer))) {
+				/*
+				 * no timer running on this destination
+				 * restart it.
+				 */
+				sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
+			} else if ((asoc->sctp_cmt_on_off == 1) &&
+				    (asoc->sctp_cmt_pf > 0) &&
+				    pf_hbflag &&
+				    ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF) &&
+			    (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer))) {
+				/*
+				 * JRS 5/14/07 - If a HB has been sent to a
+				 * PF destination and no T3 timer is
+				 * currently running, start the T3 timer to
+				 * track the HBs that were sent.
+				 */
+				sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
+			}
+			/* Now send it, if there is anything to send :> */
+			if ((error = sctp_lowlevel_chunk_output(inp,
+			    stcb,
+			    net,
+			    (struct sockaddr *)&net->ro._l_addr,
+			    outchain,
+			    auth_offset,
+			    auth,
+			    auth_keyid,
+			    no_fragmentflg,
+			    bundle_at,
+			    data_list[0],
+			    asconf,
+			    inp->sctp_lport, stcb->rport,
+			    htonl(stcb->asoc.peer_vtag),
+			    net->port, so_locked, NULL))) {
+				/* error, we could not output */
+				if (error == ENOBUFS) {
+					SCTP_STAT_INCR(sctps_lowlevelerr);
+					asoc->ifp_had_enobuf = 1;
+				}
+				if (from_where == 0) {
+					SCTP_STAT_INCR(sctps_lowlevelerrusr);
+				}
+				SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
+				if (hbflag) {
+					if (*now_filled == 0) {
+						(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
+						*now_filled = 1;
+						*now = net->last_sent_time;
+					} else {
+						net->last_sent_time = *now;
+					}
+					hbflag = 0;
+				}
+				if (error == EHOSTUNREACH) {
+					/*
+					 * Destination went unreachable
+					 * during this send
+					 */
+					sctp_move_chunks_from_net(stcb, net);
+				}
+				*reason_code = 6;
+				/*-
+				 * I add this line to be paranoid. As far as
+				 * I can tell the continue, takes us back to
+				 * the top of the for, but just to make sure
+				 * I will reset these again here.
+				 */
+				ctl_cnt = bundle_at = 0;
+				continue;	/* This takes us back to the
+						 * for() for the nets. */
+			} else {
+				asoc->ifp_had_enobuf = 0;
+			}
+			outchain = endoutchain = NULL;
+			auth = NULL;
+			auth_offset = 0;
+			if (bundle_at || hbflag) {
+				/* For data/asconf and hb set time */
+				if (*now_filled == 0) {
+					(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
+					*now_filled = 1;
+					*now = net->last_sent_time;
+				} else {
+					net->last_sent_time = *now;
+				}
+			}
+			if (!no_out_cnt) {
+				*num_out += (ctl_cnt + bundle_at);
+			}
+			if (bundle_at) {
+				/* setup for a RTO measurement */
+				tsns_sent = data_list[0]->rec.data.TSN_seq;
+				/* fill time if not already filled */
+				if (*now_filled == 0) {
+					(void)SCTP_GETTIME_TIMEVAL(&asoc->time_last_sent);
+					*now_filled = 1;
+					*now = asoc->time_last_sent;
+				} else {
+					asoc->time_last_sent = *now;
+				}
+				data_list[0]->do_rtt = 1;
+				SCTP_STAT_INCR_BY(sctps_senddata, bundle_at);
+				sctp_clean_up_datalist(stcb, asoc, data_list, bundle_at, net);
+				if (SCTP_BASE_SYSCTL(sctp_early_fr)) {
+					if (net->flight_size < net->cwnd) {
+						/* start or restart it */
+						if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+							sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, inp, stcb, net,
+							    SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_2);
+						}
+						SCTP_STAT_INCR(sctps_earlyfrstrout);
+						sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, inp, stcb, net);
+					} else {
+						/* stop it if its running */
+						if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+							SCTP_STAT_INCR(sctps_earlyfrstpout);
+							sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, inp, stcb, net,
+							    SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_3);
+						}
+					}
+				}
+			}
+			if (one_chunk) {
+				break;
+			}
+		}
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+			sctp_log_cwnd(stcb, net, tsns_sent, SCTP_CWND_LOG_FROM_SEND);
+		}
+	}
+	if (old_start_at == NULL) {
+		old_start_at = start_at;
+		start_at = TAILQ_FIRST(&asoc->nets);
+		if (old_start_at)
+			goto again_one_more_time;
+	}
+	/*
+	 * At the end there should be no NON timed chunks hanging on this
+	 * queue.
+	 */
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+		sctp_log_cwnd(stcb, net, *num_out, SCTP_CWND_LOG_FROM_SEND);
+	}
+	if ((*num_out == 0) && (*reason_code == 0)) {
+		*reason_code = 4;
+	} else {
+		*reason_code = 5;
+	}
+	sctp_clean_up_ctl(stcb, asoc);
+	return (0);
+}
+
+void
+sctp_queue_op_err(struct sctp_tcb *stcb, struct mbuf *op_err)
+{
+	/*-
+	 * Prepend a OPERATIONAL_ERROR chunk header and put on the end of
+	 * the control chunk queue.
+	 */
+	struct sctp_chunkhdr *hdr;
+	struct sctp_tmit_chunk *chk;
+	struct mbuf *mat;
+
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		/* no memory */
+		sctp_m_freem(op_err);
+		return;
+	}
+	chk->copy_by_ref = 0;
+	SCTP_BUF_PREPEND(op_err, sizeof(struct sctp_chunkhdr), M_DONTWAIT);
+	if (op_err == NULL) {
+		sctp_free_a_chunk(stcb, chk);
+		return;
+	}
+	chk->send_size = 0;
+	mat = op_err;
+	while (mat != NULL) {
+		chk->send_size += SCTP_BUF_LEN(mat);
+		mat = SCTP_BUF_NEXT(mat);
+	}
+	chk->rec.chunk_id.id = SCTP_OPERATION_ERROR;
+	chk->rec.chunk_id.can_take_data = 1;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->flags = 0;
+	chk->asoc = &stcb->asoc;
+	chk->data = op_err;
+	chk->whoTo = chk->asoc->primary_destination;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	hdr = mtod(op_err, struct sctp_chunkhdr *);
+	hdr->chunk_type = SCTP_OPERATION_ERROR;
+	hdr->chunk_flags = 0;
+	hdr->chunk_length = htons(chk->send_size);
+	TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue,
+	    chk,
+	    sctp_next);
+	chk->asoc->ctrl_queue_cnt++;
+}
+
+int
+sctp_send_cookie_echo(struct mbuf *m,
+    int offset,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	/*-
+	 * pull out the cookie and put it at the front of the control chunk
+	 * queue.
+	 */
+	int at;
+	struct mbuf *cookie;
+	struct sctp_paramhdr parm, *phdr;
+	struct sctp_chunkhdr *hdr;
+	struct sctp_tmit_chunk *chk;
+	uint16_t ptype, plen;
+
+	/* First find the cookie in the param area */
+	cookie = NULL;
+	at = offset + sizeof(struct sctp_init_chunk);
+
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	do {
+		phdr = sctp_get_next_param(m, at, &parm, sizeof(parm));
+		if (phdr == NULL) {
+			return (-3);
+		}
+		ptype = ntohs(phdr->param_type);
+		plen = ntohs(phdr->param_length);
+		if (ptype == SCTP_STATE_COOKIE) {
+			int pad;
+
+			/* found the cookie */
+			if ((pad = (plen % 4))) {
+				plen += 4 - pad;
+			}
+			cookie = SCTP_M_COPYM(m, at, plen, M_DONTWAIT);
+			if (cookie == NULL) {
+				/* No memory */
+				return (-2);
+			}
+#ifdef SCTP_MBUF_LOGGING
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
+				struct mbuf *mat;
+
+				mat = cookie;
+				while (mat) {
+					if (SCTP_BUF_IS_EXTENDED(mat)) {
+						sctp_log_mb(mat, SCTP_MBUF_ICOPY);
+					}
+					mat = SCTP_BUF_NEXT(mat);
+				}
+			}
+#endif
+			break;
+		}
+		at += SCTP_SIZE32(plen);
+	} while (phdr);
+	if (cookie == NULL) {
+		/* Did not find the cookie */
+		return (-3);
+	}
+	/* ok, we got the cookie lets change it into a cookie echo chunk */
+
+	/* first the change from param to cookie */
+	hdr = mtod(cookie, struct sctp_chunkhdr *);
+	hdr->chunk_type = SCTP_COOKIE_ECHO;
+	hdr->chunk_flags = 0;
+	/* get the chunk stuff now and place it in the FRONT of the queue */
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		/* no memory */
+		sctp_m_freem(cookie);
+		return (-5);
+	}
+	chk->copy_by_ref = 0;
+	chk->send_size = plen;
+	chk->rec.chunk_id.id = SCTP_COOKIE_ECHO;
+	chk->rec.chunk_id.can_take_data = 0;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
+	chk->asoc = &stcb->asoc;
+	chk->data = cookie;
+	chk->whoTo = chk->asoc->primary_destination;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	TAILQ_INSERT_HEAD(&chk->asoc->control_send_queue, chk, sctp_next);
+	chk->asoc->ctrl_queue_cnt++;
+	return (0);
+}
+
+void
+sctp_send_heartbeat_ack(struct sctp_tcb *stcb,
+    struct mbuf *m,
+    int offset,
+    int chk_length,
+    struct sctp_nets *net)
+{
+	/*
+	 * take a HB request and make it into a HB ack and send it.
+	 */
+	struct mbuf *outchain;
+	struct sctp_chunkhdr *chdr;
+	struct sctp_tmit_chunk *chk;
+
+
+	if (net == NULL)
+		/* must have a net pointer */
+		return;
+
+	outchain = SCTP_M_COPYM(m, offset, chk_length, M_DONTWAIT);
+	if (outchain == NULL) {
+		/* gak out of memory */
+		return;
+	}
+#ifdef SCTP_MBUF_LOGGING
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
+		struct mbuf *mat;
+
+		mat = outchain;
+		while (mat) {
+			if (SCTP_BUF_IS_EXTENDED(mat)) {
+				sctp_log_mb(mat, SCTP_MBUF_ICOPY);
+			}
+			mat = SCTP_BUF_NEXT(mat);
+		}
+	}
+#endif
+	chdr = mtod(outchain, struct sctp_chunkhdr *);
+	chdr->chunk_type = SCTP_HEARTBEAT_ACK;
+	chdr->chunk_flags = 0;
+	if (chk_length % 4) {
+		/* need pad */
+		uint32_t cpthis = 0;
+		int padlen;
+
+		padlen = 4 - (chk_length % 4);
+		m_copyback(outchain, chk_length, padlen, (caddr_t)&cpthis);
+	}
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		/* no memory */
+		sctp_m_freem(outchain);
+		return;
+	}
+	chk->copy_by_ref = 0;
+	chk->send_size = chk_length;
+	chk->rec.chunk_id.id = SCTP_HEARTBEAT_ACK;
+	chk->rec.chunk_id.can_take_data = 1;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->flags = 0;
+	chk->asoc = &stcb->asoc;
+	chk->data = outchain;
+	chk->whoTo = net;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
+	chk->asoc->ctrl_queue_cnt++;
+}
+
+void
+sctp_send_cookie_ack(struct sctp_tcb *stcb)
+{
+	/* formulate and queue a cookie-ack back to sender */
+	struct mbuf *cookie_ack;
+	struct sctp_chunkhdr *hdr;
+	struct sctp_tmit_chunk *chk;
+
+	cookie_ack = NULL;
+	SCTP_TCB_LOCK_ASSERT(stcb);
+
+	cookie_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_chunkhdr), 0, M_DONTWAIT, 1, MT_HEADER);
+	if (cookie_ack == NULL) {
+		/* no mbuf's */
+		return;
+	}
+	SCTP_BUF_RESV_UF(cookie_ack, SCTP_MIN_OVERHEAD);
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		/* no memory */
+		sctp_m_freem(cookie_ack);
+		return;
+	}
+	chk->copy_by_ref = 0;
+	chk->send_size = sizeof(struct sctp_chunkhdr);
+	chk->rec.chunk_id.id = SCTP_COOKIE_ACK;
+	chk->rec.chunk_id.can_take_data = 1;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->flags = 0;
+	chk->asoc = &stcb->asoc;
+	chk->data = cookie_ack;
+	if (chk->asoc->last_control_chunk_from != NULL) {
+		chk->whoTo = chk->asoc->last_control_chunk_from;
+	} else {
+		chk->whoTo = chk->asoc->primary_destination;
+	}
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	hdr = mtod(cookie_ack, struct sctp_chunkhdr *);
+	hdr->chunk_type = SCTP_COOKIE_ACK;
+	hdr->chunk_flags = 0;
+	hdr->chunk_length = htons(chk->send_size);
+	SCTP_BUF_LEN(cookie_ack) = chk->send_size;
+	TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
+	chk->asoc->ctrl_queue_cnt++;
+	return;
+}
+
+
+void
+sctp_send_shutdown_ack(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	/* formulate and queue a SHUTDOWN-ACK back to the sender */
+	struct mbuf *m_shutdown_ack;
+	struct sctp_shutdown_ack_chunk *ack_cp;
+	struct sctp_tmit_chunk *chk;
+
+	m_shutdown_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_ack_chunk), 0, M_DONTWAIT, 1, MT_HEADER);
+	if (m_shutdown_ack == NULL) {
+		/* no mbuf's */
+		return;
+	}
+	SCTP_BUF_RESV_UF(m_shutdown_ack, SCTP_MIN_OVERHEAD);
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		/* no memory */
+		sctp_m_freem(m_shutdown_ack);
+		return;
+	}
+	chk->copy_by_ref = 0;
+	chk->send_size = sizeof(struct sctp_chunkhdr);
+	chk->rec.chunk_id.id = SCTP_SHUTDOWN_ACK;
+	chk->rec.chunk_id.can_take_data = 1;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->flags = 0;
+	chk->asoc = &stcb->asoc;
+	chk->data = m_shutdown_ack;
+	chk->whoTo = net;
+	atomic_add_int(&net->ref_count, 1);
+
+	ack_cp = mtod(m_shutdown_ack, struct sctp_shutdown_ack_chunk *);
+	ack_cp->ch.chunk_type = SCTP_SHUTDOWN_ACK;
+	ack_cp->ch.chunk_flags = 0;
+	ack_cp->ch.chunk_length = htons(chk->send_size);
+	SCTP_BUF_LEN(m_shutdown_ack) = chk->send_size;
+	TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
+	chk->asoc->ctrl_queue_cnt++;
+	return;
+}
+
+void
+sctp_send_shutdown(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	/* formulate and queue a SHUTDOWN to the sender */
+	struct mbuf *m_shutdown;
+	struct sctp_shutdown_chunk *shutdown_cp;
+	struct sctp_tmit_chunk *chk;
+
+	m_shutdown = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_chunk), 0, M_DONTWAIT, 1, MT_HEADER);
+	if (m_shutdown == NULL) {
+		/* no mbuf's */
+		return;
+	}
+	SCTP_BUF_RESV_UF(m_shutdown, SCTP_MIN_OVERHEAD);
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		/* no memory */
+		sctp_m_freem(m_shutdown);
+		return;
+	}
+	chk->copy_by_ref = 0;
+	chk->send_size = sizeof(struct sctp_shutdown_chunk);
+	chk->rec.chunk_id.id = SCTP_SHUTDOWN;
+	chk->rec.chunk_id.can_take_data = 1;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->flags = 0;
+	chk->asoc = &stcb->asoc;
+	chk->data = m_shutdown;
+	chk->whoTo = net;
+	atomic_add_int(&net->ref_count, 1);
+
+	shutdown_cp = mtod(m_shutdown, struct sctp_shutdown_chunk *);
+	shutdown_cp->ch.chunk_type = SCTP_SHUTDOWN;
+	shutdown_cp->ch.chunk_flags = 0;
+	shutdown_cp->ch.chunk_length = htons(chk->send_size);
+	shutdown_cp->cumulative_tsn_ack = htonl(stcb->asoc.cumulative_tsn);
+	SCTP_BUF_LEN(m_shutdown) = chk->send_size;
+	TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
+	chk->asoc->ctrl_queue_cnt++;
+	return;
+}
+
+void
+sctp_send_asconf(struct sctp_tcb *stcb, struct sctp_nets *net, int addr_locked)
+{
+	/*
+	 * formulate and queue an ASCONF to the peer. ASCONF parameters
+	 * should be queued on the assoc queue.
+	 */
+	struct sctp_tmit_chunk *chk;
+	struct mbuf *m_asconf;
+	int len;
+
+	SCTP_TCB_LOCK_ASSERT(stcb);
+
+	if ((!TAILQ_EMPTY(&stcb->asoc.asconf_send_queue)) &&
+	    (!sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_MULTIPLE_ASCONFS))) {
+		/* can't send a new one if there is one in flight already */
+		return;
+	}
+	/* compose an ASCONF chunk, maximum length is PMTU */
+	m_asconf = sctp_compose_asconf(stcb, &len, addr_locked);
+	if (m_asconf == NULL) {
+		return;
+	}
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		/* no memory */
+		sctp_m_freem(m_asconf);
+		return;
+	}
+	chk->copy_by_ref = 0;
+	chk->data = m_asconf;
+	chk->send_size = len;
+	chk->rec.chunk_id.id = SCTP_ASCONF;
+	chk->rec.chunk_id.can_take_data = 0;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
+	chk->asoc = &stcb->asoc;
+	chk->whoTo = net;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	TAILQ_INSERT_TAIL(&chk->asoc->asconf_send_queue, chk, sctp_next);
+	chk->asoc->ctrl_queue_cnt++;
+	return;
+}
+
+void
+sctp_send_asconf_ack(struct sctp_tcb *stcb)
+{
+	/*
+	 * formulate and queue a asconf-ack back to sender. the asconf-ack
+	 * must be stored in the tcb.
+	 */
+	struct sctp_tmit_chunk *chk;
+	struct sctp_asconf_ack *ack, *latest_ack;
+	struct mbuf *m_ack, *m;
+	struct sctp_nets *net = NULL;
+
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	/* Get the latest ASCONF-ACK */
+	latest_ack = TAILQ_LAST(&stcb->asoc.asconf_ack_sent, sctp_asconf_ackhead);
+	if (latest_ack == NULL) {
+		return;
+	}
+	if (latest_ack->last_sent_to != NULL &&
+	    latest_ack->last_sent_to == stcb->asoc.last_control_chunk_from) {
+		/* we're doing a retransmission */
+		net = sctp_find_alternate_net(stcb, stcb->asoc.last_control_chunk_from, 0);
+		if (net == NULL) {
+			/* no alternate */
+			if (stcb->asoc.last_control_chunk_from == NULL)
+				net = stcb->asoc.primary_destination;
+			else
+				net = stcb->asoc.last_control_chunk_from;
+		}
+	} else {
+		/* normal case */
+		if (stcb->asoc.last_control_chunk_from == NULL)
+			net = stcb->asoc.primary_destination;
+		else
+			net = stcb->asoc.last_control_chunk_from;
+	}
+	latest_ack->last_sent_to = net;
+
+	TAILQ_FOREACH(ack, &stcb->asoc.asconf_ack_sent, next) {
+		if (ack->data == NULL) {
+			continue;
+		}
+		/* copy the asconf_ack */
+		m_ack = SCTP_M_COPYM(ack->data, 0, M_COPYALL, M_DONTWAIT);
+		if (m_ack == NULL) {
+			/* couldn't copy it */
+			return;
+		}
+#ifdef SCTP_MBUF_LOGGING
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
+			struct mbuf *mat;
+
+			mat = m_ack;
+			while (mat) {
+				if (SCTP_BUF_IS_EXTENDED(mat)) {
+					sctp_log_mb(mat, SCTP_MBUF_ICOPY);
+				}
+				mat = SCTP_BUF_NEXT(mat);
+			}
+		}
+#endif
+
+		sctp_alloc_a_chunk(stcb, chk);
+		if (chk == NULL) {
+			/* no memory */
+			if (m_ack)
+				sctp_m_freem(m_ack);
+			return;
+		}
+		chk->copy_by_ref = 0;
+
+		chk->whoTo = net;
+		chk->data = m_ack;
+		chk->send_size = 0;
+		/* Get size */
+		m = m_ack;
+		chk->send_size = ack->len;
+		chk->rec.chunk_id.id = SCTP_ASCONF_ACK;
+		chk->rec.chunk_id.can_take_data = 1;
+		chk->sent = SCTP_DATAGRAM_UNSENT;
+		chk->snd_count = 0;
+		chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;	/* XXX */
+		chk->asoc = &stcb->asoc;
+		atomic_add_int(&chk->whoTo->ref_count, 1);
+
+		TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
+		chk->asoc->ctrl_queue_cnt++;
+	}
+	return;
+}
+
+
+static int
+sctp_chunk_retransmission(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    int *cnt_out, struct timeval *now, int *now_filled, int *fr_done, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	/*-
+	 * send out one MTU of retransmission. If fast_retransmit is
+	 * happening we ignore the cwnd. Otherwise we obey the cwnd and
+	 * rwnd. For a Cookie or Asconf in the control chunk queue we
+	 * retransmit them by themselves.
+	 *
+	 * For data chunks we will pick out the lowest TSN's in the sent_queue
+	 * marked for resend and bundle them all together (up to a MTU of
+	 * destination). The address to send to should have been
+	 * selected/changed where the retransmission was marked (i.e. in FR
+	 * or t3-timeout routines).
+	 */
+	struct sctp_tmit_chunk *data_list[SCTP_MAX_DATA_BUNDLING];
+	struct sctp_tmit_chunk *chk, *fwd;
+	struct mbuf *m, *endofchain;
+	struct sctp_nets *net = NULL;
+	uint32_t tsns_sent = 0;
+	int no_fragmentflg, bundle_at, cnt_thru;
+	unsigned int mtu;
+	int error, i, one_chunk, fwd_tsn, ctl_cnt, tmr_started;
+	struct sctp_auth_chunk *auth = NULL;
+	uint32_t auth_offset = 0;
+	uint16_t auth_keyid;
+	int override_ok = 1;
+	int data_auth_reqd = 0;
+	uint32_t dmtu = 0;
+
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	tmr_started = ctl_cnt = bundle_at = error = 0;
+	no_fragmentflg = 1;
+	fwd_tsn = 0;
+	*cnt_out = 0;
+	fwd = NULL;
+	endofchain = m = NULL;
+	auth_keyid = stcb->asoc.authinfo.active_keyid;
+#ifdef SCTP_AUDITING_ENABLED
+	sctp_audit_log(0xC3, 1);
+#endif
+	if ((TAILQ_EMPTY(&asoc->sent_queue)) &&
+	    (TAILQ_EMPTY(&asoc->control_send_queue))) {
+		SCTPDBG(SCTP_DEBUG_OUTPUT1, "SCTP hits empty queue with cnt set to %d?\n",
+		    asoc->sent_queue_retran_cnt);
+		asoc->sent_queue_cnt = 0;
+		asoc->sent_queue_cnt_removeable = 0;
+		/* send back 0/0 so we enter normal transmission */
+		*cnt_out = 0;
+		return (0);
+	}
+	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
+		if ((chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) ||
+		    (chk->rec.chunk_id.id == SCTP_STREAM_RESET) ||
+		    (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN)) {
+			if (chk->sent != SCTP_DATAGRAM_RESEND) {
+				continue;
+			}
+			if (chk->rec.chunk_id.id == SCTP_STREAM_RESET) {
+				if (chk != asoc->str_reset) {
+					/*
+					 * not eligible for retran if its
+					 * not ours
+					 */
+					continue;
+				}
+			}
+			ctl_cnt++;
+			if (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN) {
+				fwd_tsn = 1;
+				fwd = chk;
+			}
+			/*
+			 * Add an AUTH chunk, if chunk requires it save the
+			 * offset into the chain for AUTH
+			 */
+			if ((auth == NULL) &&
+			    (sctp_auth_is_required_chunk(chk->rec.chunk_id.id,
+			    stcb->asoc.peer_auth_chunks))) {
+				m = sctp_add_auth_chunk(m, &endofchain,
+				    &auth, &auth_offset,
+				    stcb,
+				    chk->rec.chunk_id.id);
+				SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+			}
+			m = sctp_copy_mbufchain(chk->data, m, &endofchain, 0, chk->send_size, chk->copy_by_ref);
+			break;
+		}
+	}
+	one_chunk = 0;
+	cnt_thru = 0;
+	/* do we have control chunks to retransmit? */
+	if (m != NULL) {
+		/* Start a timer no matter if we suceed or fail */
+		if (chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
+			sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, inp, stcb, chk->whoTo);
+		} else if (chk->rec.chunk_id.id == SCTP_ASCONF)
+			sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp, stcb, chk->whoTo);
+		chk->snd_count++;	/* update our count */
+		if ((error = sctp_lowlevel_chunk_output(inp, stcb, chk->whoTo,
+		    (struct sockaddr *)&chk->whoTo->ro._l_addr, m,
+		    auth_offset, auth, stcb->asoc.authinfo.active_keyid,
+		    no_fragmentflg, 0, NULL, 0,
+		    inp->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag),
+		    chk->whoTo->port, so_locked, NULL))) {
+			SCTP_STAT_INCR(sctps_lowlevelerr);
+			return (error);
+		}
+		m = endofchain = NULL;
+		auth = NULL;
+		auth_offset = 0;
+		/*
+		 * We don't want to mark the net->sent time here since this
+		 * we use this for HB and retrans cannot measure RTT
+		 */
+		/* (void)SCTP_GETTIME_TIMEVAL(&chk->whoTo->last_sent_time); */
+		*cnt_out += 1;
+		chk->sent = SCTP_DATAGRAM_SENT;
+		sctp_ucount_decr(stcb->asoc.sent_queue_retran_cnt);
+		if (fwd_tsn == 0) {
+			return (0);
+		} else {
+			/* Clean up the fwd-tsn list */
+			sctp_clean_up_ctl(stcb, asoc);
+			return (0);
+		}
+	}
+	/*
+	 * Ok, it is just data retransmission we need to do or that and a
+	 * fwd-tsn with it all.
+	 */
+	if (TAILQ_EMPTY(&asoc->sent_queue)) {
+		return (SCTP_RETRAN_DONE);
+	}
+	if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED) ||
+	    (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT)) {
+		/* not yet open, resend the cookie and that is it */
+		return (1);
+	}
+#ifdef SCTP_AUDITING_ENABLED
+	sctp_auditing(20, inp, stcb, NULL);
+#endif
+	data_auth_reqd = sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks);
+	TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
+		if (chk->sent != SCTP_DATAGRAM_RESEND) {
+			/* No, not sent to this net or not ready for rtx */
+			continue;
+		}
+		if (chk->data == NULL) {
+			printf("TSN:%x chk->snd_count:%d chk->sent:%d can't retran - no data\n",
+			    chk->rec.data.TSN_seq, chk->snd_count, chk->sent);
+			continue;
+		}
+		if ((SCTP_BASE_SYSCTL(sctp_max_retran_chunk)) &&
+		    (chk->snd_count >= SCTP_BASE_SYSCTL(sctp_max_retran_chunk))) {
+			/* Gak, we have exceeded max unlucky retran, abort! */
+			SCTP_PRINTF("Gak, chk->snd_count:%d >= max:%d - send abort\n",
+			    chk->snd_count,
+			    SCTP_BASE_SYSCTL(sctp_max_retran_chunk));
+			atomic_add_int(&stcb->asoc.refcnt, 1);
+			sctp_abort_an_association(stcb->sctp_ep, stcb, 0, NULL, so_locked);
+			SCTP_TCB_LOCK(stcb);
+			atomic_subtract_int(&stcb->asoc.refcnt, 1);
+			return (SCTP_RETRAN_EXIT);
+		}
+		/* pick up the net */
+		net = chk->whoTo;
+		if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+			mtu = (net->mtu - SCTP_MIN_OVERHEAD);
+		} else {
+			mtu = net->mtu - SCTP_MIN_V4_OVERHEAD;
+		}
+
+		if ((asoc->peers_rwnd < mtu) && (asoc->total_flight > 0)) {
+			/* No room in peers rwnd */
+			uint32_t tsn;
+
+			tsn = asoc->last_acked_seq + 1;
+			if (tsn == chk->rec.data.TSN_seq) {
+				/*
+				 * we make a special exception for this
+				 * case. The peer has no rwnd but is missing
+				 * the lowest chunk.. which is probably what
+				 * is holding up the rwnd.
+				 */
+				goto one_chunk_around;
+			}
+			return (1);
+		}
+one_chunk_around:
+		if (asoc->peers_rwnd < mtu) {
+			one_chunk = 1;
+			if ((asoc->peers_rwnd == 0) &&
+			    (asoc->total_flight == 0)) {
+				chk->window_probe = 1;
+				chk->whoTo->window_probe = 1;
+			}
+		}
+#ifdef SCTP_AUDITING_ENABLED
+		sctp_audit_log(0xC3, 2);
+#endif
+		bundle_at = 0;
+		m = NULL;
+		net->fast_retran_ip = 0;
+		if (chk->rec.data.doing_fast_retransmit == 0) {
+			/*
+			 * if no FR in progress skip destination that have
+			 * flight_size > cwnd.
+			 */
+			if (net->flight_size >= net->cwnd) {
+				continue;
+			}
+		} else {
+			/*
+			 * Mark the destination net to have FR recovery
+			 * limits put on it.
+			 */
+			*fr_done = 1;
+			net->fast_retran_ip = 1;
+		}
+
+		/*
+		 * if no AUTH is yet included and this chunk requires it,
+		 * make sure to account for it.  We don't apply the size
+		 * until the AUTH chunk is actually added below in case
+		 * there is no room for this chunk.
+		 */
+		if (data_auth_reqd && (auth == NULL)) {
+			dmtu = sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
+		} else
+			dmtu = 0;
+
+		if ((chk->send_size <= (mtu - dmtu)) ||
+		    (chk->flags & CHUNK_FLAGS_FRAGMENT_OK)) {
+			/* ok we will add this one */
+			if (data_auth_reqd) {
+				if (auth == NULL) {
+					m = sctp_add_auth_chunk(m,
+					    &endofchain,
+					    &auth,
+					    &auth_offset,
+					    stcb,
+					    SCTP_DATA);
+					auth_keyid = chk->auth_keyid;
+					override_ok = 0;
+					SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+				} else if (override_ok) {
+					auth_keyid = chk->auth_keyid;
+					override_ok = 0;
+				} else if (chk->auth_keyid != auth_keyid) {
+					/* different keyid, so done bundling */
+					break;
+				}
+			}
+			m = sctp_copy_mbufchain(chk->data, m, &endofchain, 0, chk->send_size, chk->copy_by_ref);
+			if (m == NULL) {
+				SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+				return (ENOMEM);
+			}
+			/* Do clear IP_DF ? */
+			if (chk->flags & CHUNK_FLAGS_FRAGMENT_OK) {
+				no_fragmentflg = 0;
+			}
+			/* upate our MTU size */
+			if (mtu > (chk->send_size + dmtu))
+				mtu -= (chk->send_size + dmtu);
+			else
+				mtu = 0;
+			data_list[bundle_at++] = chk;
+			if (one_chunk && (asoc->total_flight <= 0)) {
+				SCTP_STAT_INCR(sctps_windowprobed);
+			}
+		}
+		if (one_chunk == 0) {
+			/*
+			 * now are there anymore forward from chk to pick
+			 * up?
+			 */
+			fwd = TAILQ_NEXT(chk, sctp_next);
+			while (fwd) {
+				if (fwd->sent != SCTP_DATAGRAM_RESEND) {
+					/* Nope, not for retran */
+					fwd = TAILQ_NEXT(fwd, sctp_next);
+					continue;
+				}
+				if (fwd->whoTo != net) {
+					/* Nope, not the net in question */
+					fwd = TAILQ_NEXT(fwd, sctp_next);
+					continue;
+				}
+				if (data_auth_reqd && (auth == NULL)) {
+					dmtu = sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
+				} else
+					dmtu = 0;
+				if (fwd->send_size <= (mtu - dmtu)) {
+					if (data_auth_reqd) {
+						if (auth == NULL) {
+							m = sctp_add_auth_chunk(m,
+							    &endofchain,
+							    &auth,
+							    &auth_offset,
+							    stcb,
+							    SCTP_DATA);
+							auth_keyid = fwd->auth_keyid;
+							override_ok = 0;
+							SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+						} else if (override_ok) {
+							auth_keyid = fwd->auth_keyid;
+							override_ok = 0;
+						} else if (fwd->auth_keyid != auth_keyid) {
+							/*
+							 * different keyid,
+							 * so done bundling
+							 */
+							break;
+						}
+					}
+					m = sctp_copy_mbufchain(fwd->data, m, &endofchain, 0, fwd->send_size, fwd->copy_by_ref);
+					if (m == NULL) {
+						SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+						return (ENOMEM);
+					}
+					/* Do clear IP_DF ? */
+					if (fwd->flags & CHUNK_FLAGS_FRAGMENT_OK) {
+						no_fragmentflg = 0;
+					}
+					/* upate our MTU size */
+					if (mtu > (fwd->send_size + dmtu))
+						mtu -= (fwd->send_size + dmtu);
+					else
+						mtu = 0;
+					data_list[bundle_at++] = fwd;
+					if (bundle_at >= SCTP_MAX_DATA_BUNDLING) {
+						break;
+					}
+					fwd = TAILQ_NEXT(fwd, sctp_next);
+				} else {
+					/* can't fit so we are done */
+					break;
+				}
+			}
+		}
+		/* Is there something to send for this destination? */
+		if (m) {
+			/*
+			 * No matter if we fail/or suceed we should start a
+			 * timer. A failure is like a lost IP packet :-)
+			 */
+			if (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+				/*
+				 * no timer running on this destination
+				 * restart it.
+				 */
+				sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
+				tmr_started = 1;
+			}
+			/* Now lets send it, if there is anything to send :> */
+			if ((error = sctp_lowlevel_chunk_output(inp, stcb, net,
+			    (struct sockaddr *)&net->ro._l_addr, m,
+			    auth_offset, auth, auth_keyid,
+			    no_fragmentflg, 0, NULL, 0,
+			    inp->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag),
+			    net->port, so_locked, NULL))) {
+				/* error, we could not output */
+				SCTP_STAT_INCR(sctps_lowlevelerr);
+				return (error);
+			}
+			m = endofchain = NULL;
+			auth = NULL;
+			auth_offset = 0;
+			/* For HB's */
+			/*
+			 * We don't want to mark the net->sent time here
+			 * since this we use this for HB and retrans cannot
+			 * measure RTT
+			 */
+			/* (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time); */
+
+			/* For auto-close */
+			cnt_thru++;
+			if (*now_filled == 0) {
+				(void)SCTP_GETTIME_TIMEVAL(&asoc->time_last_sent);
+				*now = asoc->time_last_sent;
+				*now_filled = 1;
+			} else {
+				asoc->time_last_sent = *now;
+			}
+			*cnt_out += bundle_at;
+#ifdef SCTP_AUDITING_ENABLED
+			sctp_audit_log(0xC4, bundle_at);
+#endif
+			if (bundle_at) {
+				tsns_sent = data_list[0]->rec.data.TSN_seq;
+			}
+			for (i = 0; i < bundle_at; i++) {
+				SCTP_STAT_INCR(sctps_sendretransdata);
+				data_list[i]->sent = SCTP_DATAGRAM_SENT;
+				/*
+				 * When we have a revoked data, and we
+				 * retransmit it, then we clear the revoked
+				 * flag since this flag dictates if we
+				 * subtracted from the fs
+				 */
+				if (data_list[i]->rec.data.chunk_was_revoked) {
+					/* Deflate the cwnd */
+					data_list[i]->whoTo->cwnd -= data_list[i]->book_size;
+					data_list[i]->rec.data.chunk_was_revoked = 0;
+				}
+				data_list[i]->snd_count++;
+				sctp_ucount_decr(asoc->sent_queue_retran_cnt);
+				/* record the time */
+				data_list[i]->sent_rcv_time = asoc->time_last_sent;
+				if (data_list[i]->book_size_scale) {
+					/*
+					 * need to double the book size on
+					 * this one
+					 */
+					data_list[i]->book_size_scale = 0;
+					/*
+					 * Since we double the booksize, we
+					 * must also double the output queue
+					 * size, since this get shrunk when
+					 * we free by this amount.
+					 */
+					atomic_add_int(&((asoc)->total_output_queue_size), data_list[i]->book_size);
+					data_list[i]->book_size *= 2;
+
+
+				} else {
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_RWND_ENABLE) {
+						sctp_log_rwnd(SCTP_DECREASE_PEER_RWND,
+						    asoc->peers_rwnd, data_list[i]->send_size, SCTP_BASE_SYSCTL(sctp_peer_chunk_oh));
+					}
+					asoc->peers_rwnd = sctp_sbspace_sub(asoc->peers_rwnd,
+					    (uint32_t) (data_list[i]->send_size +
+					    SCTP_BASE_SYSCTL(sctp_peer_chunk_oh)));
+				}
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
+					sctp_misc_ints(SCTP_FLIGHT_LOG_UP_RSND,
+					    data_list[i]->whoTo->flight_size,
+					    data_list[i]->book_size,
+					    (uintptr_t) data_list[i]->whoTo,
+					    data_list[i]->rec.data.TSN_seq);
+				}
+				sctp_flight_size_increase(data_list[i]);
+				sctp_total_flight_increase(stcb, data_list[i]);
+				if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+					/* SWS sender side engages */
+					asoc->peers_rwnd = 0;
+				}
+				if ((i == 0) &&
+				    (data_list[i]->rec.data.doing_fast_retransmit)) {
+					SCTP_STAT_INCR(sctps_sendfastretrans);
+					if ((data_list[i] == TAILQ_FIRST(&asoc->sent_queue)) &&
+					    (tmr_started == 0)) {
+						/*-
+						 * ok we just fast-retrans'd
+						 * the lowest TSN, i.e the
+						 * first on the list. In
+						 * this case we want to give
+						 * some more time to get a
+						 * SACK back without a
+						 * t3-expiring.
+						 */
+						sctp_timer_stop(SCTP_TIMER_TYPE_SEND, inp, stcb, net,
+						    SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_4);
+						sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
+					}
+				}
+			}
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+				sctp_log_cwnd(stcb, net, tsns_sent, SCTP_CWND_LOG_FROM_RESEND);
+			}
+#ifdef SCTP_AUDITING_ENABLED
+			sctp_auditing(21, inp, stcb, NULL);
+#endif
+		} else {
+			/* None will fit */
+			return (1);
+		}
+		if (asoc->sent_queue_retran_cnt <= 0) {
+			/* all done we have no more to retran */
+			asoc->sent_queue_retran_cnt = 0;
+			break;
+		}
+		if (one_chunk) {
+			/* No more room in rwnd */
+			return (1);
+		}
+		/* stop the for loop here. we sent out a packet */
+		break;
+	}
+	return (0);
+}
+
+
+static int
+sctp_timer_validation(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    int ret)
+{
+	struct sctp_nets *net;
+
+	/* Validate that a timer is running somewhere */
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+			/* Here is a timer */
+			return (ret);
+		}
+	}
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	/* Gak, we did not have a timer somewhere */
+	SCTPDBG(SCTP_DEBUG_OUTPUT3, "Deadlock avoided starting timer on a dest at retran\n");
+	sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, asoc->primary_destination);
+	return (ret);
+}
+
+void
+sctp_chunk_output(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    int from_where,
+    int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	/*-
+	 * Ok this is the generic chunk service queue. we must do the
+	 * following:
+	 * - See if there are retransmits pending, if so we must
+	 *   do these first.
+	 * - Service the stream queue that is next, moving any
+	 *   message (note I must get a complete message i.e.
+	 *   FIRST/MIDDLE and LAST to the out queue in one pass) and assigning
+	 *   TSN's
+	 * - Check to see if the cwnd/rwnd allows any output, if so we
+	 *   go ahead and fomulate and send the low level chunks. Making sure
+	 *   to combine any control in the control chunk queue also.
+	 */
+	struct sctp_association *asoc;
+	struct sctp_nets *net;
+	int error = 0, num_out = 0, tot_out = 0, ret = 0, reason_code = 0,
+	    burst_cnt = 0, burst_limit = 0;
+	struct timeval now;
+	int now_filled = 0;
+	int nagle_on = 0;
+	int frag_point = sctp_get_frag_point(stcb, &stcb->asoc);
+	int un_sent = 0;
+	int fr_done, tot_frs = 0;
+
+	asoc = &stcb->asoc;
+	if (from_where == SCTP_OUTPUT_FROM_USR_SEND) {
+		if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NODELAY)) {
+			nagle_on = 0;
+		} else {
+			nagle_on = 1;
+		}
+	}
+	SCTP_TCB_LOCK_ASSERT(stcb);
+
+	un_sent = (stcb->asoc.total_output_queue_size - stcb->asoc.total_flight);
+
+	if ((un_sent <= 0) &&
+	    (TAILQ_EMPTY(&asoc->control_send_queue)) &&
+	    (TAILQ_EMPTY(&asoc->asconf_send_queue)) &&
+	    (asoc->sent_queue_retran_cnt == 0)) {
+		/* Nothing to do unless there is something to be sent left */
+		return;
+	}
+	/*
+	 * Do we have something to send, data or control AND a sack timer
+	 * running, if so piggy-back the sack.
+	 */
+	if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
+		sctp_send_sack(stcb);
+		(void)SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer);
+	}
+	while (asoc->sent_queue_retran_cnt) {
+		/*-
+		 * Ok, it is retransmission time only, we send out only ONE
+		 * packet with a single call off to the retran code.
+		 */
+		if (from_where == SCTP_OUTPUT_FROM_COOKIE_ACK) {
+			/*-
+			 * Special hook for handling cookiess discarded
+			 * by peer that carried data. Send cookie-ack only
+			 * and then the next call with get the retran's.
+			 */
+			(void)sctp_med_chunk_output(inp, stcb, asoc, &num_out, &reason_code, 1,
+			    from_where,
+			    &now, &now_filled, frag_point, so_locked);
+			return;
+		} else if (from_where != SCTP_OUTPUT_FROM_HB_TMR) {
+			/* if its not from a HB then do it */
+			fr_done = 0;
+			ret = sctp_chunk_retransmission(inp, stcb, asoc, &num_out, &now, &now_filled, &fr_done, so_locked);
+			if (fr_done) {
+				tot_frs++;
+			}
+		} else {
+			/*
+			 * its from any other place, we don't allow retran
+			 * output (only control)
+			 */
+			ret = 1;
+		}
+		if (ret > 0) {
+			/* Can't send anymore */
+			/*-
+			 * now lets push out control by calling med-level
+			 * output once. this assures that we WILL send HB's
+			 * if queued too.
+			 */
+			(void)sctp_med_chunk_output(inp, stcb, asoc, &num_out, &reason_code, 1,
+			    from_where,
+			    &now, &now_filled, frag_point, so_locked);
+#ifdef SCTP_AUDITING_ENABLED
+			sctp_auditing(8, inp, stcb, NULL);
+#endif
+			(void)sctp_timer_validation(inp, stcb, asoc, ret);
+			return;
+		}
+		if (ret < 0) {
+			/*-
+			 * The count was off.. retran is not happening so do
+			 * the normal retransmission.
+			 */
+#ifdef SCTP_AUDITING_ENABLED
+			sctp_auditing(9, inp, stcb, NULL);
+#endif
+			if (ret == SCTP_RETRAN_EXIT) {
+				return;
+			}
+			break;
+		}
+		if (from_where == SCTP_OUTPUT_FROM_T3) {
+			/* Only one transmission allowed out of a timeout */
+#ifdef SCTP_AUDITING_ENABLED
+			sctp_auditing(10, inp, stcb, NULL);
+#endif
+			/* Push out any control */
+			(void)sctp_med_chunk_output(inp, stcb, asoc, &num_out, &reason_code, 1, from_where,
+			    &now, &now_filled, frag_point, so_locked);
+			return;
+		}
+		if (tot_frs > asoc->max_burst) {
+			/* Hit FR burst limit */
+			return;
+		}
+		if ((num_out == 0) && (ret == 0)) {
+
+			/* No more retrans to send */
+			break;
+		}
+	}
+#ifdef SCTP_AUDITING_ENABLED
+	sctp_auditing(12, inp, stcb, NULL);
+#endif
+	/* Check for bad destinations, if they exist move chunks around. */
+	burst_limit = asoc->max_burst;
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
+		    SCTP_ADDR_NOT_REACHABLE) {
+			/*-
+			 * if possible move things off of this address we
+			 * still may send below due to the dormant state but
+			 * we try to find an alternate address to send to
+			 * and if we have one we move all queued data on the
+			 * out wheel to this alternate address.
+			 */
+			if (net->ref_count > 1)
+				sctp_move_chunks_from_net(stcb, net);
+		} else if ((asoc->sctp_cmt_on_off == 1) &&
+			    (asoc->sctp_cmt_pf > 0) &&
+		    ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) {
+			/*
+			 * JRS 5/14/07 - If CMT PF is on and the current
+			 * destination is in PF state, move all queued data
+			 * to an alternate desination.
+			 */
+			if (net->ref_count > 1)
+				sctp_move_chunks_from_net(stcb, net);
+		} else {
+			/*-
+			 * if ((asoc->sat_network) || (net->addr_is_local))
+			 * { burst_limit = asoc->max_burst *
+			 * SCTP_SAT_NETWORK_BURST_INCR; }
+			 */
+			if (SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst)) {
+				if ((net->flight_size + (burst_limit * net->mtu)) < net->cwnd) {
+					/*
+					 * JRS - Use the congestion control
+					 * given in the congestion control
+					 * module
+					 */
+					asoc->cc_functions.sctp_cwnd_update_after_output(stcb, net, burst_limit);
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_MAXBURST_ENABLE) {
+						sctp_log_maxburst(stcb, net, 0, burst_limit, SCTP_MAX_BURST_APPLIED);
+					}
+					SCTP_STAT_INCR(sctps_maxburstqueued);
+				}
+				net->fast_retran_ip = 0;
+			} else {
+				if (net->flight_size == 0) {
+					/* Should be decaying the cwnd here */
+					;
+				}
+			}
+		}
+
+	}
+	burst_cnt = 0;
+	do {
+		error = sctp_med_chunk_output(inp, stcb, asoc, &num_out,
+		    &reason_code, 0, from_where,
+		    &now, &now_filled, frag_point, so_locked);
+		if (error) {
+			SCTPDBG(SCTP_DEBUG_OUTPUT1, "Error %d was returned from med-c-op\n", error);
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_MAXBURST_ENABLE) {
+				sctp_log_maxburst(stcb, asoc->primary_destination, error, burst_cnt, SCTP_MAX_BURST_ERROR_STOP);
+			}
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+				sctp_log_cwnd(stcb, NULL, error, SCTP_SEND_NOW_COMPLETES);
+				sctp_log_cwnd(stcb, NULL, 0xdeadbeef, SCTP_SEND_NOW_COMPLETES);
+			}
+			break;
+		}
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "m-c-o put out %d\n", num_out);
+
+		tot_out += num_out;
+		burst_cnt++;
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+			sctp_log_cwnd(stcb, NULL, num_out, SCTP_SEND_NOW_COMPLETES);
+			if (num_out == 0) {
+				sctp_log_cwnd(stcb, NULL, reason_code, SCTP_SEND_NOW_COMPLETES);
+			}
+		}
+		if (nagle_on) {
+			/*-
+			 * When nagle is on, we look at how much is un_sent, then
+			 * if its smaller than an MTU and we have data in
+			 * flight we stop.
+			 */
+			un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
+			    (stcb->asoc.stream_queue_cnt * sizeof(struct sctp_data_chunk)));
+			if ((un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD)) &&
+			    (stcb->asoc.total_flight > 0)) {
+				break;
+			}
+		}
+		if (TAILQ_EMPTY(&asoc->control_send_queue) &&
+		    TAILQ_EMPTY(&asoc->send_queue) &&
+		    TAILQ_EMPTY(&asoc->out_wheel)) {
+			/* Nothing left to send */
+			break;
+		}
+		if ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) <= 0) {
+			/* Nothing left to send */
+			break;
+		}
+	} while (num_out && (SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst) ||
+	    (burst_cnt < burst_limit)));
+
+	if (SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst) == 0) {
+		if (burst_cnt >= burst_limit) {
+			SCTP_STAT_INCR(sctps_maxburstqueued);
+			asoc->burst_limit_applied = 1;
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_MAXBURST_ENABLE) {
+				sctp_log_maxburst(stcb, asoc->primary_destination, 0, burst_cnt, SCTP_MAX_BURST_APPLIED);
+			}
+		} else {
+			asoc->burst_limit_applied = 0;
+		}
+	}
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+		sctp_log_cwnd(stcb, NULL, tot_out, SCTP_SEND_NOW_COMPLETES);
+	}
+	SCTPDBG(SCTP_DEBUG_OUTPUT1, "Ok, we have put out %d chunks\n",
+	    tot_out);
+
+	/*-
+	 * Now we need to clean up the control chunk chain if a ECNE is on
+	 * it. It must be marked as UNSENT again so next call will continue
+	 * to send it until such time that we get a CWR, to remove it.
+	 */
+	if (stcb->asoc.ecn_echo_cnt_onq)
+		sctp_fix_ecn_echo(asoc);
+	return;
+}
+
+
+int
+sctp_output(inp, m, addr, control, p, flags)
+	struct sctp_inpcb *inp;
+	struct mbuf *m;
+	struct sockaddr *addr;
+	struct mbuf *control;
+	struct thread *p;
+	int flags;
+{
+	if (inp == NULL) {
+		SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+		return (EINVAL);
+	}
+	if (inp->sctp_socket == NULL) {
+		SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+		return (EINVAL);
+	}
+	return (sctp_sosend(inp->sctp_socket,
+	    addr,
+	    (struct uio *)NULL,
+	    m,
+	    control,
+	    flags, p
+	    ));
+}
+
+void
+send_forward_tsn(struct sctp_tcb *stcb,
+    struct sctp_association *asoc)
+{
+	struct sctp_tmit_chunk *chk;
+	struct sctp_forward_tsn_chunk *fwdtsn;
+	uint32_t advance_peer_ack_point;
+
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
+		if (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN) {
+			/* mark it to unsent */
+			chk->sent = SCTP_DATAGRAM_UNSENT;
+			chk->snd_count = 0;
+			/* Do we correct its output location? */
+			if (chk->whoTo != asoc->primary_destination) {
+				sctp_free_remote_addr(chk->whoTo);
+				chk->whoTo = asoc->primary_destination;
+				atomic_add_int(&chk->whoTo->ref_count, 1);
+			}
+			goto sctp_fill_in_rest;
+		}
+	}
+	/* Ok if we reach here we must build one */
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		return;
+	}
+	asoc->fwd_tsn_cnt++;
+	chk->copy_by_ref = 0;
+	chk->rec.chunk_id.id = SCTP_FORWARD_CUM_TSN;
+	chk->rec.chunk_id.can_take_data = 0;
+	chk->asoc = asoc;
+	chk->whoTo = NULL;
+	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	if (chk->data == NULL) {
+		sctp_free_a_chunk(stcb, chk);
+		return;
+	}
+	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->whoTo = asoc->primary_destination;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	TAILQ_INSERT_TAIL(&asoc->control_send_queue, chk, sctp_next);
+	asoc->ctrl_queue_cnt++;
+sctp_fill_in_rest:
+	/*-
+	 * Here we go through and fill out the part that deals with
+	 * stream/seq of the ones we skip.
+	 */
+	SCTP_BUF_LEN(chk->data) = 0;
+	{
+		struct sctp_tmit_chunk *at, *tp1, *last;
+		struct sctp_strseq *strseq;
+		unsigned int cnt_of_space, i, ovh;
+		unsigned int space_needed;
+		unsigned int cnt_of_skipped = 0;
+
+		TAILQ_FOREACH(at, &asoc->sent_queue, sctp_next) {
+			if (at->sent != SCTP_FORWARD_TSN_SKIP) {
+				/* no more to look at */
+				break;
+			}
+			if (at->rec.data.rcv_flags & SCTP_DATA_UNORDERED) {
+				/* We don't report these */
+				continue;
+			}
+			cnt_of_skipped++;
+		}
+		space_needed = (sizeof(struct sctp_forward_tsn_chunk) +
+		    (cnt_of_skipped * sizeof(struct sctp_strseq)));
+
+		cnt_of_space = M_TRAILINGSPACE(chk->data);
+
+		if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+			ovh = SCTP_MIN_OVERHEAD;
+		} else {
+			ovh = SCTP_MIN_V4_OVERHEAD;
+		}
+		if (cnt_of_space > (asoc->smallest_mtu - ovh)) {
+			/* trim to a mtu size */
+			cnt_of_space = asoc->smallest_mtu - ovh;
+		}
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) {
+			sctp_misc_ints(SCTP_FWD_TSN_CHECK,
+			    0xff, 0, cnt_of_skipped,
+			    asoc->advanced_peer_ack_point);
+
+		}
+		advance_peer_ack_point = asoc->advanced_peer_ack_point;
+		if (cnt_of_space < space_needed) {
+			/*-
+			 * ok we must trim down the chunk by lowering the
+			 * advance peer ack point.
+			 */
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) {
+				sctp_misc_ints(SCTP_FWD_TSN_CHECK,
+				    0xff, 0xff, cnt_of_space,
+				    space_needed);
+			}
+			cnt_of_skipped = cnt_of_space - sizeof(struct sctp_forward_tsn_chunk);
+			cnt_of_skipped /= sizeof(struct sctp_strseq);
+			/*-
+			 * Go through and find the TSN that will be the one
+			 * we report.
+			 */
+			at = TAILQ_FIRST(&asoc->sent_queue);
+			for (i = 0; i < cnt_of_skipped; i++) {
+				tp1 = TAILQ_NEXT(at, sctp_next);
+				if (tp1 == NULL) {
+					break;
+				}
+				at = tp1;
+			}
+			if (at && SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) {
+				sctp_misc_ints(SCTP_FWD_TSN_CHECK,
+				    0xff, cnt_of_skipped, at->rec.data.TSN_seq,
+				    asoc->advanced_peer_ack_point);
+			}
+			last = at;
+			/*-
+			 * last now points to last one I can report, update
+			 * peer ack point
+			 */
+			if (last)
+				advance_peer_ack_point = last->rec.data.TSN_seq;
+			space_needed = sizeof(struct sctp_forward_tsn_chunk) +
+			    cnt_of_skipped * sizeof(struct sctp_strseq);
+		}
+		chk->send_size = space_needed;
+		/* Setup the chunk */
+		fwdtsn = mtod(chk->data, struct sctp_forward_tsn_chunk *);
+		fwdtsn->ch.chunk_length = htons(chk->send_size);
+		fwdtsn->ch.chunk_flags = 0;
+		fwdtsn->ch.chunk_type = SCTP_FORWARD_CUM_TSN;
+		fwdtsn->new_cumulative_tsn = htonl(advance_peer_ack_point);
+		SCTP_BUF_LEN(chk->data) = chk->send_size;
+		fwdtsn++;
+		/*-
+		 * Move pointer to after the fwdtsn and transfer to the
+		 * strseq pointer.
+		 */
+		strseq = (struct sctp_strseq *)fwdtsn;
+		/*-
+		 * Now populate the strseq list. This is done blindly
+		 * without pulling out duplicate stream info. This is
+		 * inefficent but won't harm the process since the peer will
+		 * look at these in sequence and will thus release anything.
+		 * It could mean we exceed the PMTU and chop off some that
+		 * we could have included.. but this is unlikely (aka 1432/4
+		 * would mean 300+ stream seq's would have to be reported in
+		 * one FWD-TSN. With a bit of work we can later FIX this to
+		 * optimize and pull out duplcates.. but it does add more
+		 * overhead. So for now... not!
+		 */
+		at = TAILQ_FIRST(&asoc->sent_queue);
+		for (i = 0; i < cnt_of_skipped; i++) {
+			tp1 = TAILQ_NEXT(at, sctp_next);
+			if (tp1 == NULL)
+				break;
+			if (at->rec.data.rcv_flags & SCTP_DATA_UNORDERED) {
+				/* We don't report these */
+				i--;
+				at = tp1;
+				continue;
+			}
+			if (at->rec.data.TSN_seq == advance_peer_ack_point) {
+				at->rec.data.fwd_tsn_cnt = 0;
+			}
+			strseq->stream = ntohs(at->rec.data.stream_number);
+			strseq->sequence = ntohs(at->rec.data.stream_seq);
+			strseq++;
+			at = tp1;
+		}
+	}
+	return;
+
+}
+
+void
+sctp_send_sack(struct sctp_tcb *stcb)
+{
+	/*-
+	 * Queue up a SACK or NR-SACK in the control queue.
+	 * We must first check to see if a SACK or NR-SACK is
+	 * somehow on the control queue.
+	 * If so, we will take and and remove the old one.
+	 */
+	struct sctp_association *asoc;
+	struct sctp_tmit_chunk *chk, *a_chk;
+	struct sctp_sack_chunk *sack;
+	struct sctp_nr_sack_chunk *nr_sack;
+	struct sctp_gap_ack_block *gap_descriptor;
+	struct sack_track *selector;
+	int mergeable = 0;
+	int offset;
+	caddr_t limit;
+	uint32_t *dup;
+	int limit_reached = 0;
+	unsigned int i, siz, j;
+	unsigned int num_gap_blocks = 0, num_nr_gap_blocks = 0, space;
+	int num_dups = 0;
+	int space_req;
+	uint32_t highest_tsn;
+	uint8_t flags;
+	uint8_t type;
+	uint8_t tsn_map;
+
+	if ((stcb->asoc.sctp_nr_sack_on_off == 1) &&
+	    (stcb->asoc.peer_supports_nr_sack == 1)) {
+		type = SCTP_NR_SELECTIVE_ACK;
+	} else {
+		type = SCTP_SELECTIVE_ACK;
+	}
+	a_chk = NULL;
+	asoc = &stcb->asoc;
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	if (asoc->last_data_chunk_from == NULL) {
+		/* Hmm we never received anything */
+		return;
+	}
+	sctp_slide_mapping_arrays(stcb);
+	sctp_set_rwnd(stcb, asoc);
+	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
+		if (chk->rec.chunk_id.id == type) {
+			/* Hmm, found a sack already on queue, remove it */
+			TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
+			asoc->ctrl_queue_cnt--;
+			a_chk = chk;
+			if (a_chk->data) {
+				sctp_m_freem(a_chk->data);
+				a_chk->data = NULL;
+			}
+			sctp_free_remote_addr(a_chk->whoTo);
+			a_chk->whoTo = NULL;
+			break;
+		}
+	}
+	if (a_chk == NULL) {
+		sctp_alloc_a_chunk(stcb, a_chk);
+		if (a_chk == NULL) {
+			/* No memory so we drop the idea, and set a timer */
+			if (stcb->asoc.delayed_ack) {
+				sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
+				    stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_5);
+				sctp_timer_start(SCTP_TIMER_TYPE_RECV,
+				    stcb->sctp_ep, stcb, NULL);
+			} else {
+				stcb->asoc.send_sack = 1;
+			}
+			return;
+		}
+		a_chk->copy_by_ref = 0;
+		a_chk->rec.chunk_id.id = type;
+		a_chk->rec.chunk_id.can_take_data = 1;
+	}
+	/* Clear our pkt counts */
+	asoc->data_pkts_seen = 0;
+
+	a_chk->asoc = asoc;
+	a_chk->snd_count = 0;
+	a_chk->send_size = 0;	/* fill in later */
+	a_chk->sent = SCTP_DATAGRAM_UNSENT;
+	a_chk->whoTo = NULL;
+
+	if ((asoc->numduptsns) ||
+	    (asoc->last_data_chunk_from->dest_state & SCTP_ADDR_NOT_REACHABLE)) {
+		/*-
+		 * Ok, we have some duplicates or the destination for the
+		 * sack is unreachable, lets see if we can select an
+		 * alternate than asoc->last_data_chunk_from
+		 */
+		if ((!(asoc->last_data_chunk_from->dest_state & SCTP_ADDR_NOT_REACHABLE)) &&
+		    (asoc->used_alt_onsack > asoc->numnets)) {
+			/* We used an alt last time, don't this time */
+			a_chk->whoTo = NULL;
+		} else {
+			asoc->used_alt_onsack++;
+			a_chk->whoTo = sctp_find_alternate_net(stcb, asoc->last_data_chunk_from, 0);
+		}
+		if (a_chk->whoTo == NULL) {
+			/* Nope, no alternate */
+			a_chk->whoTo = asoc->last_data_chunk_from;
+			asoc->used_alt_onsack = 0;
+		}
+	} else {
+		/*
+		 * No duplicates so we use the last place we received data
+		 * from.
+		 */
+		asoc->used_alt_onsack = 0;
+		a_chk->whoTo = asoc->last_data_chunk_from;
+	}
+	if (a_chk->whoTo) {
+		atomic_add_int(&a_chk->whoTo->ref_count, 1);
+	}
+	if (compare_with_wrap(asoc->highest_tsn_inside_map, asoc->highest_tsn_inside_nr_map, MAX_TSN)) {
+		highest_tsn = asoc->highest_tsn_inside_map;
+	} else {
+		highest_tsn = asoc->highest_tsn_inside_nr_map;
+	}
+	if (highest_tsn == asoc->cumulative_tsn) {
+		/* no gaps */
+		if (type == SCTP_SELECTIVE_ACK) {
+			space_req = sizeof(struct sctp_sack_chunk);
+		} else {
+			space_req = sizeof(struct sctp_nr_sack_chunk);
+		}
+	} else {
+		/* gaps get a cluster */
+		space_req = MCLBYTES;
+	}
+	/* Ok now lets formulate a MBUF with our sack */
+	a_chk->data = sctp_get_mbuf_for_msg(space_req, 0, M_DONTWAIT, 1, MT_DATA);
+	if ((a_chk->data == NULL) ||
+	    (a_chk->whoTo == NULL)) {
+		/* rats, no mbuf memory */
+		if (a_chk->data) {
+			/* was a problem with the destination */
+			sctp_m_freem(a_chk->data);
+			a_chk->data = NULL;
+		}
+		sctp_free_a_chunk(stcb, a_chk);
+		/* sa_ignore NO_NULL_CHK */
+		if (stcb->asoc.delayed_ack) {
+			sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
+			    stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_6);
+			sctp_timer_start(SCTP_TIMER_TYPE_RECV,
+			    stcb->sctp_ep, stcb, NULL);
+		} else {
+			stcb->asoc.send_sack = 1;
+		}
+		return;
+	}
+	/* ok, lets go through and fill it in */
+	SCTP_BUF_RESV_UF(a_chk->data, SCTP_MIN_OVERHEAD);
+	space = M_TRAILINGSPACE(a_chk->data);
+	if (space > (a_chk->whoTo->mtu - SCTP_MIN_OVERHEAD)) {
+		space = (a_chk->whoTo->mtu - SCTP_MIN_OVERHEAD);
+	}
+	limit = mtod(a_chk->data, caddr_t);
+	limit += space;
+
+	/* 0x01 is used by nonce for ecn */
+	if ((SCTP_BASE_SYSCTL(sctp_ecn_enable)) &&
+	    (SCTP_BASE_SYSCTL(sctp_ecn_nonce)) &&
+	    (asoc->peer_supports_ecn_nonce))
+		flags = (asoc->receiver_nonce_sum & SCTP_SACK_NONCE_SUM);
+	else
+		flags = 0;
+
+	if ((asoc->sctp_cmt_on_off == 1) &&
+	    SCTP_BASE_SYSCTL(sctp_cmt_use_dac)) {
+		/*-
+		 * CMT DAC algorithm: If 2 (i.e., 0x10) packets have been
+		 * received, then set high bit to 1, else 0. Reset
+		 * pkts_rcvd.
+		 */
+		flags |= (asoc->cmt_dac_pkts_rcvd << 6);
+		asoc->cmt_dac_pkts_rcvd = 0;
+	}
+#ifdef SCTP_ASOCLOG_OF_TSNS
+	stcb->asoc.cumack_logsnt[stcb->asoc.cumack_log_atsnt] = asoc->cumulative_tsn;
+	stcb->asoc.cumack_log_atsnt++;
+	if (stcb->asoc.cumack_log_atsnt >= SCTP_TSN_LOG_SIZE) {
+		stcb->asoc.cumack_log_atsnt = 0;
+	}
+#endif
+	/* reset the readers interpretation */
+	stcb->freed_by_sorcv_sincelast = 0;
+
+	if (type == SCTP_SELECTIVE_ACK) {
+		sack = mtod(a_chk->data, struct sctp_sack_chunk *);
+		nr_sack = NULL;
+		gap_descriptor = (struct sctp_gap_ack_block *)((caddr_t)sack + sizeof(struct sctp_sack_chunk));
+		if (highest_tsn > asoc->mapping_array_base_tsn) {
+			siz = (((highest_tsn - asoc->mapping_array_base_tsn) + 1) + 7) / 8;
+		} else {
+			siz = (((MAX_TSN - highest_tsn) + 1) + highest_tsn + 7) / 8;
+		}
+	} else {
+		sack = NULL;
+		nr_sack = mtod(a_chk->data, struct sctp_nr_sack_chunk *);
+		gap_descriptor = (struct sctp_gap_ack_block *)((caddr_t)nr_sack + sizeof(struct sctp_nr_sack_chunk));
+		if (asoc->highest_tsn_inside_map > asoc->mapping_array_base_tsn) {
+			siz = (((asoc->highest_tsn_inside_map - asoc->mapping_array_base_tsn) + 1) + 7) / 8;
+		} else {
+			siz = (((MAX_TSN - asoc->mapping_array_base_tsn) + 1) + asoc->highest_tsn_inside_map + 7) / 8;
+		}
+	}
+
+	if (compare_with_wrap(asoc->mapping_array_base_tsn, asoc->cumulative_tsn, MAX_TSN)) {
+		offset = 1;
+	} else {
+		offset = asoc->mapping_array_base_tsn - asoc->cumulative_tsn;
+	}
+	if (((type == SCTP_SELECTIVE_ACK) &&
+	    compare_with_wrap(highest_tsn, asoc->cumulative_tsn, MAX_TSN)) ||
+	    ((type == SCTP_NR_SELECTIVE_ACK) &&
+	    compare_with_wrap(asoc->highest_tsn_inside_map, asoc->cumulative_tsn, MAX_TSN))) {
+		/* we have a gap .. maybe */
+		for (i = 0; i < siz; i++) {
+			tsn_map = asoc->mapping_array[i];
+			if (type == SCTP_SELECTIVE_ACK) {
+				tsn_map |= asoc->nr_mapping_array[i];
+			}
+			if (i == 0) {
+				/*
+				 * Clear all bits corresponding to TSNs
+				 * smaller or equal to the cumulative TSN.
+				 */
+				tsn_map &= (~0 << (1 - offset));
+			}
+			selector = &sack_array[tsn_map];
+			if (mergeable && selector->right_edge) {
+				/*
+				 * Backup, left and right edges were ok to
+				 * merge.
+				 */
+				num_gap_blocks--;
+				gap_descriptor--;
+			}
+			if (selector->num_entries == 0)
+				mergeable = 0;
+			else {
+				for (j = 0; j < selector->num_entries; j++) {
+					if (mergeable && selector->right_edge) {
+						/*
+						 * do a merge by NOT setting
+						 * the left side
+						 */
+						mergeable = 0;
+					} else {
+						/*
+						 * no merge, set the left
+						 * side
+						 */
+						mergeable = 0;
+						gap_descriptor->start = htons((selector->gaps[j].start + offset));
+					}
+					gap_descriptor->end = htons((selector->gaps[j].end + offset));
+					num_gap_blocks++;
+					gap_descriptor++;
+					if (((caddr_t)gap_descriptor + sizeof(struct sctp_gap_ack_block)) > limit) {
+						/* no more room */
+						limit_reached = 1;
+						break;
+					}
+				}
+				if (selector->left_edge) {
+					mergeable = 1;
+				}
+			}
+			if (limit_reached) {
+				/* Reached the limit stop */
+				break;
+			}
+			offset += 8;
+		}
+	}
+	if ((type == SCTP_NR_SELECTIVE_ACK) &&
+	    (limit_reached == 0)) {
+
+		mergeable = 0;
+
+		if (asoc->highest_tsn_inside_nr_map > asoc->mapping_array_base_tsn) {
+			siz = (((asoc->highest_tsn_inside_nr_map - asoc->mapping_array_base_tsn) + 1) + 7) / 8;
+		} else {
+			siz = (((MAX_TSN - asoc->mapping_array_base_tsn) + 1) + asoc->highest_tsn_inside_nr_map + 7) / 8;
+		}
+
+		if (compare_with_wrap(asoc->mapping_array_base_tsn, asoc->cumulative_tsn, MAX_TSN)) {
+			offset = 1;
+		} else {
+			offset = asoc->mapping_array_base_tsn - asoc->cumulative_tsn;
+		}
+		if (compare_with_wrap(asoc->highest_tsn_inside_nr_map, asoc->cumulative_tsn, MAX_TSN)) {
+			/* we have a gap .. maybe */
+			for (i = 0; i < siz; i++) {
+				tsn_map = asoc->nr_mapping_array[i];
+				if (i == 0) {
+					/*
+					 * Clear all bits corresponding to
+					 * TSNs smaller or equal to the
+					 * cumulative TSN.
+					 */
+					tsn_map &= (~0 << (1 - offset));
+				}
+				selector = &sack_array[tsn_map];
+				if (mergeable && selector->right_edge) {
+					/*
+					 * Backup, left and right edges were
+					 * ok to merge.
+					 */
+					num_nr_gap_blocks--;
+					gap_descriptor--;
+				}
+				if (selector->num_entries == 0)
+					mergeable = 0;
+				else {
+					for (j = 0; j < selector->num_entries; j++) {
+						if (mergeable && selector->right_edge) {
+							/*
+							 * do a merge by NOT
+							 * setting the left
+							 * side
+							 */
+							mergeable = 0;
+						} else {
+							/*
+							 * no merge, set the
+							 * left side
+							 */
+							mergeable = 0;
+							gap_descriptor->start = htons((selector->gaps[j].start + offset));
+						}
+						gap_descriptor->end = htons((selector->gaps[j].end + offset));
+						num_nr_gap_blocks++;
+						gap_descriptor++;
+						if (((caddr_t)gap_descriptor + sizeof(struct sctp_gap_ack_block)) > limit) {
+							/* no more room */
+							limit_reached = 1;
+							break;
+						}
+					}
+					if (selector->left_edge) {
+						mergeable = 1;
+					}
+				}
+				if (limit_reached) {
+					/* Reached the limit stop */
+					break;
+				}
+				offset += 8;
+			}
+		}
+	}
+	/* now we must add any dups we are going to report. */
+	if ((limit_reached == 0) && (asoc->numduptsns)) {
+		dup = (uint32_t *) gap_descriptor;
+		for (i = 0; i < asoc->numduptsns; i++) {
+			*dup = htonl(asoc->dup_tsns[i]);
+			dup++;
+			num_dups++;
+			if (((caddr_t)dup + sizeof(uint32_t)) > limit) {
+				/* no more room */
+				break;
+			}
+		}
+		asoc->numduptsns = 0;
+	}
+	/*
+	 * now that the chunk is prepared queue it to the control chunk
+	 * queue.
+	 */
+	if (type == SCTP_SELECTIVE_ACK) {
+		a_chk->send_size = sizeof(struct sctp_sack_chunk) +
+		    (num_gap_blocks + num_nr_gap_blocks) * sizeof(struct sctp_gap_ack_block) +
+		    num_dups * sizeof(int32_t);
+		SCTP_BUF_LEN(a_chk->data) = a_chk->send_size;
+		sack->sack.cum_tsn_ack = htonl(asoc->cumulative_tsn);
+		sack->sack.a_rwnd = htonl(asoc->my_rwnd);
+		sack->sack.num_gap_ack_blks = htons(num_gap_blocks);
+		sack->sack.num_dup_tsns = htons(num_dups);
+		sack->ch.chunk_type = type;
+		sack->ch.chunk_flags = flags;
+		sack->ch.chunk_length = htons(a_chk->send_size);
+	} else {
+		a_chk->send_size = sizeof(struct sctp_nr_sack_chunk) +
+		    (num_gap_blocks + num_nr_gap_blocks) * sizeof(struct sctp_gap_ack_block) +
+		    num_dups * sizeof(int32_t);
+		SCTP_BUF_LEN(a_chk->data) = a_chk->send_size;
+		nr_sack->nr_sack.cum_tsn_ack = htonl(asoc->cumulative_tsn);
+		nr_sack->nr_sack.a_rwnd = htonl(asoc->my_rwnd);
+		nr_sack->nr_sack.num_gap_ack_blks = htons(num_gap_blocks);
+		nr_sack->nr_sack.num_nr_gap_ack_blks = htons(num_nr_gap_blocks);
+		nr_sack->nr_sack.num_dup_tsns = htons(num_dups);
+		nr_sack->nr_sack.reserved = 0;
+		nr_sack->ch.chunk_type = type;
+		nr_sack->ch.chunk_flags = flags;
+		nr_sack->ch.chunk_length = htons(a_chk->send_size);
+	}
+	TAILQ_INSERT_TAIL(&asoc->control_send_queue, a_chk, sctp_next);
+	asoc->my_last_reported_rwnd = asoc->my_rwnd;
+	asoc->ctrl_queue_cnt++;
+	asoc->send_sack = 0;
+	SCTP_STAT_INCR(sctps_sendsacks);
+	return;
+}
+
+void
+sctp_send_abort_tcb(struct sctp_tcb *stcb, struct mbuf *operr, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	struct mbuf *m_abort;
+	struct mbuf *m_out = NULL, *m_end = NULL;
+	struct sctp_abort_chunk *abort = NULL;
+	int sz;
+	uint32_t auth_offset = 0;
+	struct sctp_auth_chunk *auth = NULL;
+
+	/*-
+	 * Add an AUTH chunk, if chunk requires it and save the offset into
+	 * the chain for AUTH
+	 */
+	if (sctp_auth_is_required_chunk(SCTP_ABORT_ASSOCIATION,
+	    stcb->asoc.peer_auth_chunks)) {
+		m_out = sctp_add_auth_chunk(m_out, &m_end, &auth, &auth_offset,
+		    stcb, SCTP_ABORT_ASSOCIATION);
+		SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+	}
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	m_abort = sctp_get_mbuf_for_msg(sizeof(struct sctp_abort_chunk), 0, M_DONTWAIT, 1, MT_HEADER);
+	if (m_abort == NULL) {
+		/* no mbuf's */
+		if (m_out)
+			sctp_m_freem(m_out);
+		return;
+	}
+	/* link in any error */
+	SCTP_BUF_NEXT(m_abort) = operr;
+	sz = 0;
+	if (operr) {
+		struct mbuf *n;
+
+		n = operr;
+		while (n) {
+			sz += SCTP_BUF_LEN(n);
+			n = SCTP_BUF_NEXT(n);
+		}
+	}
+	SCTP_BUF_LEN(m_abort) = sizeof(*abort);
+	if (m_out == NULL) {
+		/* NO Auth chunk prepended, so reserve space in front */
+		SCTP_BUF_RESV_UF(m_abort, SCTP_MIN_OVERHEAD);
+		m_out = m_abort;
+	} else {
+		/* Put AUTH chunk at the front of the chain */
+		SCTP_BUF_NEXT(m_end) = m_abort;
+	}
+
+	/* fill in the ABORT chunk */
+	abort = mtod(m_abort, struct sctp_abort_chunk *);
+	abort->ch.chunk_type = SCTP_ABORT_ASSOCIATION;
+	abort->ch.chunk_flags = 0;
+	abort->ch.chunk_length = htons(sizeof(*abort) + sz);
+
+	(void)sctp_lowlevel_chunk_output(stcb->sctp_ep, stcb,
+	    stcb->asoc.primary_destination,
+	    (struct sockaddr *)&stcb->asoc.primary_destination->ro._l_addr,
+	    m_out, auth_offset, auth, stcb->asoc.authinfo.active_keyid, 1, 0, NULL, 0,
+	    stcb->sctp_ep->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag),
+	    stcb->asoc.primary_destination->port, so_locked, NULL);
+	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+}
+
+void
+sctp_send_shutdown_complete(struct sctp_tcb *stcb,
+    struct sctp_nets *net,
+    int reflect_vtag)
+{
+	/* formulate and SEND a SHUTDOWN-COMPLETE */
+	struct mbuf *m_shutdown_comp;
+	struct sctp_shutdown_complete_chunk *shutdown_complete;
+	uint32_t vtag;
+	uint8_t flags;
+
+	m_shutdown_comp = sctp_get_mbuf_for_msg(sizeof(struct sctp_chunkhdr), 0, M_DONTWAIT, 1, MT_HEADER);
+	if (m_shutdown_comp == NULL) {
+		/* no mbuf's */
+		return;
+	}
+	if (reflect_vtag) {
+		flags = SCTP_HAD_NO_TCB;
+		vtag = stcb->asoc.my_vtag;
+	} else {
+		flags = 0;
+		vtag = stcb->asoc.peer_vtag;
+	}
+	shutdown_complete = mtod(m_shutdown_comp, struct sctp_shutdown_complete_chunk *);
+	shutdown_complete->ch.chunk_type = SCTP_SHUTDOWN_COMPLETE;
+	shutdown_complete->ch.chunk_flags = flags;
+	shutdown_complete->ch.chunk_length = htons(sizeof(struct sctp_shutdown_complete_chunk));
+	SCTP_BUF_LEN(m_shutdown_comp) = sizeof(struct sctp_shutdown_complete_chunk);
+	(void)sctp_lowlevel_chunk_output(stcb->sctp_ep, stcb, net,
+	    (struct sockaddr *)&net->ro._l_addr,
+	    m_shutdown_comp, 0, NULL, 0, 1, 0, NULL, 0,
+	    stcb->sctp_ep->sctp_lport, stcb->rport,
+	    htonl(vtag),
+	    net->port, SCTP_SO_NOT_LOCKED, NULL);
+	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+	return;
+}
+
+void
+sctp_send_shutdown_complete2(struct mbuf *m, int iphlen, struct sctphdr *sh,
+    uint32_t vrf_id, uint16_t port)
+{
+	/* formulate and SEND a SHUTDOWN-COMPLETE */
+	struct mbuf *o_pak;
+	struct mbuf *mout;
+	struct ip *iph, *iph_out;
+	struct udphdr *udp = NULL;
+
+#ifdef INET6
+	struct ip6_hdr *ip6, *ip6_out;
+
+#endif
+	int offset_out, len, mlen;
+	struct sctp_shutdown_complete_msg *comp_cp;
+
+	iph = mtod(m, struct ip *);
+	switch (iph->ip_v) {
+	case IPVERSION:
+		len = (sizeof(struct ip) + sizeof(struct sctp_shutdown_complete_msg));
+		break;
+#ifdef INET6
+	case IPV6_VERSION >> 4:
+		len = (sizeof(struct ip6_hdr) + sizeof(struct sctp_shutdown_complete_msg));
+		break;
+#endif
+	default:
+		return;
+	}
+	if (port) {
+		len += sizeof(struct udphdr);
+	}
+	mout = sctp_get_mbuf_for_msg(len + max_linkhdr, 1, M_DONTWAIT, 1, MT_DATA);
+	if (mout == NULL) {
+		return;
+	}
+	SCTP_BUF_RESV_UF(mout, max_linkhdr);
+	SCTP_BUF_LEN(mout) = len;
+	SCTP_BUF_NEXT(mout) = NULL;
+	iph_out = NULL;
+#ifdef INET6
+	ip6_out = NULL;
+#endif
+	offset_out = 0;
+
+	switch (iph->ip_v) {
+	case IPVERSION:
+		iph_out = mtod(mout, struct ip *);
+
+		/* Fill in the IP header for the ABORT */
+		iph_out->ip_v = IPVERSION;
+		iph_out->ip_hl = (sizeof(struct ip) / 4);
+		iph_out->ip_tos = (u_char)0;
+		iph_out->ip_id = 0;
+		iph_out->ip_off = 0;
+		iph_out->ip_ttl = MAXTTL;
+		if (port) {
+			iph_out->ip_p = IPPROTO_UDP;
+		} else {
+			iph_out->ip_p = IPPROTO_SCTP;
+		}
+		iph_out->ip_src.s_addr = iph->ip_dst.s_addr;
+		iph_out->ip_dst.s_addr = iph->ip_src.s_addr;
+
+		/* let IP layer calculate this */
+		iph_out->ip_sum = 0;
+		offset_out += sizeof(*iph_out);
+		comp_cp = (struct sctp_shutdown_complete_msg *)(
+		    (caddr_t)iph_out + offset_out);
+		break;
+#ifdef INET6
+	case IPV6_VERSION >> 4:
+		ip6 = (struct ip6_hdr *)iph;
+		ip6_out = mtod(mout, struct ip6_hdr *);
+
+		/* Fill in the IPv6 header for the ABORT */
+		ip6_out->ip6_flow = ip6->ip6_flow;
+		ip6_out->ip6_hlim = MODULE_GLOBAL(ip6_defhlim);
+		if (port) {
+			ip6_out->ip6_nxt = IPPROTO_UDP;
+		} else {
+			ip6_out->ip6_nxt = IPPROTO_SCTP;
+		}
+		ip6_out->ip6_src = ip6->ip6_dst;
+		ip6_out->ip6_dst = ip6->ip6_src;
+		/*
+		 * ?? The old code had both the iph len + payload, I think
+		 * this is wrong and would never have worked
+		 */
+		ip6_out->ip6_plen = sizeof(struct sctp_shutdown_complete_msg);
+		offset_out += sizeof(*ip6_out);
+		comp_cp = (struct sctp_shutdown_complete_msg *)(
+		    (caddr_t)ip6_out + offset_out);
+		break;
+#endif				/* INET6 */
+	default:
+		/* Currently not supported. */
+		sctp_m_freem(mout);
+		return;
+	}
+	if (port) {
+		udp = (struct udphdr *)comp_cp;
+		udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port));
+		udp->uh_dport = port;
+		udp->uh_ulen = htons(sizeof(struct sctp_shutdown_complete_msg) + sizeof(struct udphdr));
+		if (iph_out)
+			udp->uh_sum = in_pseudo(iph_out->ip_src.s_addr, iph_out->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP));
+		offset_out += sizeof(struct udphdr);
+		comp_cp = (struct sctp_shutdown_complete_msg *)((caddr_t)comp_cp + sizeof(struct udphdr));
+	}
+	if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
+		/* no mbuf's */
+		sctp_m_freem(mout);
+		return;
+	}
+	/* Now copy in and fill in the ABORT tags etc. */
+	comp_cp->sh.src_port = sh->dest_port;
+	comp_cp->sh.dest_port = sh->src_port;
+	comp_cp->sh.checksum = 0;
+	comp_cp->sh.v_tag = sh->v_tag;
+	comp_cp->shut_cmp.ch.chunk_flags = SCTP_HAD_NO_TCB;
+	comp_cp->shut_cmp.ch.chunk_type = SCTP_SHUTDOWN_COMPLETE;
+	comp_cp->shut_cmp.ch.chunk_length = htons(sizeof(struct sctp_shutdown_complete_chunk));
+
+	if (iph_out != NULL) {
+		sctp_route_t ro;
+		int ret;
+
+		mlen = SCTP_BUF_LEN(mout);
+		bzero(&ro, sizeof ro);
+		/* set IPv4 length */
+		iph_out->ip_len = mlen;
+#ifdef  SCTP_PACKET_LOGGING
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
+			sctp_packet_log(mout, mlen);
+#endif
+		if (port) {
+#if defined(SCTP_WITH_NO_CSUM)
+			SCTP_STAT_INCR(sctps_sendnocrc);
+#else
+			comp_cp->sh.checksum = sctp_calculate_cksum(mout, offset_out);
+			SCTP_STAT_INCR(sctps_sendswcrc);
+#endif
+			SCTP_ENABLE_UDP_CSUM(mout);
+		} else {
+#if defined(SCTP_WITH_NO_CSUM)
+			SCTP_STAT_INCR(sctps_sendnocrc);
+#else
+			mout->m_pkthdr.csum_flags = CSUM_SCTP;
+			mout->m_pkthdr.csum_data = 0;
+			SCTP_STAT_INCR(sctps_sendhwcrc);
+#endif
+		}
+		SCTP_ATTACH_CHAIN(o_pak, mout, mlen);
+		/* out it goes */
+		SCTP_IP_OUTPUT(ret, o_pak, &ro, NULL, vrf_id);
+
+		/* Free the route if we got one back */
+		if (ro.ro_rt)
+			RTFREE(ro.ro_rt);
+	}
+#ifdef INET6
+	if (ip6_out != NULL) {
+		struct route_in6 ro;
+		int ret;
+		struct ifnet *ifp = NULL;
+
+		bzero(&ro, sizeof(ro));
+		mlen = SCTP_BUF_LEN(mout);
+#ifdef  SCTP_PACKET_LOGGING
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
+			sctp_packet_log(mout, mlen);
+#endif
+		SCTP_ATTACH_CHAIN(o_pak, mout, mlen);
+		if (port) {
+#if defined(SCTP_WITH_NO_CSUM)
+			SCTP_STAT_INCR(sctps_sendnocrc);
+#else
+			comp_cp->sh.checksum = sctp_calculate_cksum(mout, sizeof(struct ip6_hdr) + sizeof(struct udphdr));
+			SCTP_STAT_INCR(sctps_sendswcrc);
+#endif
+			if ((udp->uh_sum = in6_cksum(o_pak, IPPROTO_UDP, sizeof(struct ip6_hdr), mlen - sizeof(struct ip6_hdr))) == 0) {
+				udp->uh_sum = 0xffff;
+			}
+		} else {
+#if defined(SCTP_WITH_NO_CSUM)
+			SCTP_STAT_INCR(sctps_sendnocrc);
+#else
+			mout->m_pkthdr.csum_flags = CSUM_SCTP;
+			mout->m_pkthdr.csum_data = 0;
+			SCTP_STAT_INCR(sctps_sendhwcrc);
+#endif
+		}
+		SCTP_IP6_OUTPUT(ret, o_pak, &ro, &ifp, NULL, vrf_id);
+
+		/* Free the route if we got one back */
+		if (ro.ro_rt)
+			RTFREE(ro.ro_rt);
+	}
+#endif
+	SCTP_STAT_INCR(sctps_sendpackets);
+	SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
+	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+	return;
+
+}
+
+static struct sctp_nets *
+sctp_select_hb_destination(struct sctp_tcb *stcb, struct timeval *now)
+{
+	struct sctp_nets *net, *hnet;
+	int ms_goneby, highest_ms, state_overide = 0;
+
+	(void)SCTP_GETTIME_TIMEVAL(now);
+	highest_ms = 0;
+	hnet = NULL;
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		if (
+		    ((net->dest_state & SCTP_ADDR_NOHB) && ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0)) ||
+		    (net->dest_state & SCTP_ADDR_OUT_OF_SCOPE)
+		    ) {
+			/*
+			 * Skip this guy from consideration if HB is off AND
+			 * its confirmed
+			 */
+			continue;
+		}
+		if (sctp_destination_is_reachable(stcb, (struct sockaddr *)&net->ro._l_addr) == 0) {
+			/* skip this dest net from consideration */
+			continue;
+		}
+		if (net->last_sent_time.tv_sec) {
+			/* Sent to so we subtract */
+			ms_goneby = (now->tv_sec - net->last_sent_time.tv_sec) * 1000;
+		} else
+			/* Never been sent to */
+			ms_goneby = 0x7fffffff;
+		/*-
+		 * When the address state is unconfirmed but still
+		 * considered reachable, we HB at a higher rate. Once it
+		 * goes confirmed OR reaches the "unreachable" state, thenw
+		 * we cut it back to HB at a more normal pace.
+		 */
+		if ((net->dest_state & (SCTP_ADDR_UNCONFIRMED | SCTP_ADDR_NOT_REACHABLE)) == SCTP_ADDR_UNCONFIRMED) {
+			state_overide = 1;
+		} else {
+			state_overide = 0;
+		}
+
+		if ((((unsigned int)ms_goneby >= net->RTO) || (state_overide)) &&
+		    (ms_goneby > highest_ms)) {
+			highest_ms = ms_goneby;
+			hnet = net;
+		}
+	}
+	if (hnet &&
+	    ((hnet->dest_state & (SCTP_ADDR_UNCONFIRMED | SCTP_ADDR_NOT_REACHABLE)) == SCTP_ADDR_UNCONFIRMED)) {
+		state_overide = 1;
+	} else {
+		state_overide = 0;
+	}
+
+	if (hnet && highest_ms && (((unsigned int)highest_ms >= hnet->RTO) || state_overide)) {
+		/*-
+		 * Found the one with longest delay bounds OR it is
+		 * unconfirmed and still not marked unreachable.
+		 */
+		SCTPDBG(SCTP_DEBUG_OUTPUT4, "net:%p is the hb winner -", hnet);
+#ifdef SCTP_DEBUG
+		if (hnet) {
+			SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT4,
+			    (struct sockaddr *)&hnet->ro._l_addr);
+		} else {
+			SCTPDBG(SCTP_DEBUG_OUTPUT4, " none\n");
+		}
+#endif
+		/* update the timer now */
+		hnet->last_sent_time = *now;
+		return (hnet);
+	}
+	/* Nothing to HB */
+	return (NULL);
+}
+
+int
+sctp_send_hb(struct sctp_tcb *stcb, int user_req, struct sctp_nets *u_net)
+{
+	struct sctp_tmit_chunk *chk;
+	struct sctp_nets *net;
+	struct sctp_heartbeat_chunk *hb;
+	struct timeval now;
+	struct sockaddr_in *sin;
+	struct sockaddr_in6 *sin6;
+
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	if (user_req == 0) {
+		net = sctp_select_hb_destination(stcb, &now);
+		if (net == NULL) {
+			/*-
+			 * All our busy none to send to, just start the
+			 * timer again.
+			 */
+			if (stcb->asoc.state == 0) {
+				return (0);
+			}
+			sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT,
+			    stcb->sctp_ep,
+			    stcb,
+			    net);
+			return (0);
+		}
+	} else {
+		net = u_net;
+		if (net == NULL) {
+			return (0);
+		}
+		(void)SCTP_GETTIME_TIMEVAL(&now);
+	}
+	sin = (struct sockaddr_in *)&net->ro._l_addr;
+	if (sin->sin_family != AF_INET) {
+		if (sin->sin_family != AF_INET6) {
+			/* huh */
+			return (0);
+		}
+	}
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		SCTPDBG(SCTP_DEBUG_OUTPUT4, "Gak, can't get a chunk for hb\n");
+		return (0);
+	}
+	chk->copy_by_ref = 0;
+	chk->rec.chunk_id.id = SCTP_HEARTBEAT_REQUEST;
+	chk->rec.chunk_id.can_take_data = 1;
+	chk->asoc = &stcb->asoc;
+	chk->send_size = sizeof(struct sctp_heartbeat_chunk);
+
+	chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_DONTWAIT, 1, MT_HEADER);
+	if (chk->data == NULL) {
+		sctp_free_a_chunk(stcb, chk);
+		return (0);
+	}
+	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+	SCTP_BUF_LEN(chk->data) = chk->send_size;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->whoTo = net;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	/* Now we have a mbuf that we can fill in with the details */
+	hb = mtod(chk->data, struct sctp_heartbeat_chunk *);
+	memset(hb, 0, sizeof(struct sctp_heartbeat_chunk));
+	/* fill out chunk header */
+	hb->ch.chunk_type = SCTP_HEARTBEAT_REQUEST;
+	hb->ch.chunk_flags = 0;
+	hb->ch.chunk_length = htons(chk->send_size);
+	/* Fill out hb parameter */
+	hb->heartbeat.hb_info.ph.param_type = htons(SCTP_HEARTBEAT_INFO);
+	hb->heartbeat.hb_info.ph.param_length = htons(sizeof(struct sctp_heartbeat_info_param));
+	hb->heartbeat.hb_info.time_value_1 = now.tv_sec;
+	hb->heartbeat.hb_info.time_value_2 = now.tv_usec;
+	/* Did our user request this one, put it in */
+	hb->heartbeat.hb_info.user_req = user_req;
+	hb->heartbeat.hb_info.addr_family = sin->sin_family;
+	hb->heartbeat.hb_info.addr_len = sin->sin_len;
+	if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
+		/*
+		 * we only take from the entropy pool if the address is not
+		 * confirmed.
+		 */
+		net->heartbeat_random1 = hb->heartbeat.hb_info.random_value1 = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
+		net->heartbeat_random2 = hb->heartbeat.hb_info.random_value2 = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
+	} else {
+		net->heartbeat_random1 = hb->heartbeat.hb_info.random_value1 = 0;
+		net->heartbeat_random2 = hb->heartbeat.hb_info.random_value2 = 0;
+	}
+	if (sin->sin_family == AF_INET) {
+		memcpy(hb->heartbeat.hb_info.address, &sin->sin_addr, sizeof(sin->sin_addr));
+	} else if (sin->sin_family == AF_INET6) {
+		/* We leave the scope the way it is in our lookup table. */
+		sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+		memcpy(hb->heartbeat.hb_info.address, &sin6->sin6_addr, sizeof(sin6->sin6_addr));
+	} else {
+		/* huh compiler bug */
+		return (0);
+	}
+
+	/*
+	 * JRS 5/14/07 - In CMT PF, the T3 timer is used to track
+	 * PF-heartbeats.  Because of this, threshold management is done by
+	 * the t3 timer handler, and does not need to be done upon the send
+	 * of a PF-heartbeat. If CMT PF is on and the destination to which a
+	 * heartbeat is being sent is in PF state, do NOT do threshold
+	 * management.
+	 */
+	if ((stcb->asoc.sctp_cmt_pf == 0) ||
+	    ((net->dest_state & SCTP_ADDR_PF) != SCTP_ADDR_PF)) {
+		/* ok we have a destination that needs a beat */
+		/* lets do the theshold management Qiaobing style */
+		if (sctp_threshold_management(stcb->sctp_ep, stcb, net,
+		    stcb->asoc.max_send_times)) {
+			/*-
+			 * we have lost the association, in a way this is
+			 * quite bad since we really are one less time since
+			 * we really did not send yet. This is the down side
+			 * to the Q's style as defined in the RFC and not my
+			 * alternate style defined in the RFC.
+			 */
+			if (chk->data != NULL) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			/*
+			 * Here we do NOT use the macro since the
+			 * association is now gone.
+			 */
+			if (chk->whoTo) {
+				sctp_free_remote_addr(chk->whoTo);
+				chk->whoTo = NULL;
+			}
+			sctp_free_a_chunk((struct sctp_tcb *)NULL, chk);
+			return (-1);
+		}
+	}
+	net->hb_responded = 0;
+	TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next);
+	stcb->asoc.ctrl_queue_cnt++;
+	SCTP_STAT_INCR(sctps_sendheartbeat);
+	/*-
+	 * Call directly med level routine to put out the chunk. It will
+	 * always tumble out control chunks aka HB but it may even tumble
+	 * out data too.
+	 */
+	return (1);
+}
+
+void
+sctp_send_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net,
+    uint32_t high_tsn)
+{
+	struct sctp_association *asoc;
+	struct sctp_ecne_chunk *ecne;
+	struct sctp_tmit_chunk *chk;
+
+	asoc = &stcb->asoc;
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
+		if (chk->rec.chunk_id.id == SCTP_ECN_ECHO) {
+			/* found a previous ECN_ECHO update it if needed */
+			ecne = mtod(chk->data, struct sctp_ecne_chunk *);
+			ecne->tsn = htonl(high_tsn);
+			return;
+		}
+	}
+	/* nope could not find one to update so we must build one */
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		return;
+	}
+	chk->copy_by_ref = 0;
+	SCTP_STAT_INCR(sctps_sendecne);
+	chk->rec.chunk_id.id = SCTP_ECN_ECHO;
+	chk->rec.chunk_id.can_take_data = 0;
+	chk->asoc = &stcb->asoc;
+	chk->send_size = sizeof(struct sctp_ecne_chunk);
+	chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_DONTWAIT, 1, MT_HEADER);
+	if (chk->data == NULL) {
+		sctp_free_a_chunk(stcb, chk);
+		return;
+	}
+	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+	SCTP_BUF_LEN(chk->data) = chk->send_size;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->whoTo = net;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	stcb->asoc.ecn_echo_cnt_onq++;
+	ecne = mtod(chk->data, struct sctp_ecne_chunk *);
+	ecne->ch.chunk_type = SCTP_ECN_ECHO;
+	ecne->ch.chunk_flags = 0;
+	ecne->ch.chunk_length = htons(sizeof(struct sctp_ecne_chunk));
+	ecne->tsn = htonl(high_tsn);
+	TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next);
+	asoc->ctrl_queue_cnt++;
+}
+
+void
+sctp_send_packet_dropped(struct sctp_tcb *stcb, struct sctp_nets *net,
+    struct mbuf *m, int iphlen, int bad_crc)
+{
+	struct sctp_association *asoc;
+	struct sctp_pktdrop_chunk *drp;
+	struct sctp_tmit_chunk *chk;
+	uint8_t *datap;
+	int len;
+	int was_trunc = 0;
+	struct ip *iph;
+
+#ifdef INET6
+	struct ip6_hdr *ip6h;
+
+#endif
+	int fullsz = 0, extra = 0;
+	long spc;
+	int offset;
+	struct sctp_chunkhdr *ch, chunk_buf;
+	unsigned int chk_length;
+
+	if (!stcb) {
+		return;
+	}
+	asoc = &stcb->asoc;
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	if (asoc->peer_supports_pktdrop == 0) {
+		/*-
+		 * peer must declare support before I send one.
+		 */
+		return;
+	}
+	if (stcb->sctp_socket == NULL) {
+		return;
+	}
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		return;
+	}
+	chk->copy_by_ref = 0;
+	iph = mtod(m, struct ip *);
+	if (iph == NULL) {
+		sctp_free_a_chunk(stcb, chk);
+		return;
+	}
+	switch (iph->ip_v) {
+	case IPVERSION:
+		/* IPv4 */
+		len = chk->send_size = iph->ip_len;
+		break;
+#ifdef INET6
+	case IPV6_VERSION >> 4:
+		/* IPv6 */
+		ip6h = mtod(m, struct ip6_hdr *);
+		len = chk->send_size = htons(ip6h->ip6_plen);
+		break;
+#endif
+	default:
+		return;
+	}
+	/* Validate that we do not have an ABORT in here. */
+	offset = iphlen + sizeof(struct sctphdr);
+	ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
+	    sizeof(*ch), (uint8_t *) & chunk_buf);
+	while (ch != NULL) {
+		chk_length = ntohs(ch->chunk_length);
+		if (chk_length < sizeof(*ch)) {
+			/* break to abort land */
+			break;
+		}
+		switch (ch->chunk_type) {
+		case SCTP_PACKET_DROPPED:
+		case SCTP_ABORT_ASSOCIATION:
+		case SCTP_INITIATION_ACK:
+			/**
+			 * We don't respond with an PKT-DROP to an ABORT
+			 * or PKT-DROP. We also do not respond to an
+			 * INIT-ACK, because we can't know if the initiation
+			 * tag is correct or not.
+			 */
+			sctp_free_a_chunk(stcb, chk);
+			return;
+		default:
+			break;
+		}
+		offset += SCTP_SIZE32(chk_length);
+		ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
+		    sizeof(*ch), (uint8_t *) & chunk_buf);
+	}
+
+	if ((len + SCTP_MAX_OVERHEAD + sizeof(struct sctp_pktdrop_chunk)) >
+	    min(stcb->asoc.smallest_mtu, MCLBYTES)) {
+		/*
+		 * only send 1 mtu worth, trim off the excess on the end.
+		 */
+		fullsz = len - extra;
+		len = min(stcb->asoc.smallest_mtu, MCLBYTES) - SCTP_MAX_OVERHEAD;
+		was_trunc = 1;
+	}
+	chk->asoc = &stcb->asoc;
+	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	if (chk->data == NULL) {
+jump_out:
+		sctp_free_a_chunk(stcb, chk);
+		return;
+	}
+	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+	drp = mtod(chk->data, struct sctp_pktdrop_chunk *);
+	if (drp == NULL) {
+		sctp_m_freem(chk->data);
+		chk->data = NULL;
+		goto jump_out;
+	}
+	chk->book_size = SCTP_SIZE32((chk->send_size + sizeof(struct sctp_pktdrop_chunk) +
+	    sizeof(struct sctphdr) + SCTP_MED_OVERHEAD));
+	chk->book_size_scale = 0;
+	if (was_trunc) {
+		drp->ch.chunk_flags = SCTP_PACKET_TRUNCATED;
+		drp->trunc_len = htons(fullsz);
+		/*
+		 * Len is already adjusted to size minus overhead above take
+		 * out the pkt_drop chunk itself from it.
+		 */
+		chk->send_size = len - sizeof(struct sctp_pktdrop_chunk);
+		len = chk->send_size;
+	} else {
+		/* no truncation needed */
+		drp->ch.chunk_flags = 0;
+		drp->trunc_len = htons(0);
+	}
+	if (bad_crc) {
+		drp->ch.chunk_flags |= SCTP_BADCRC;
+	}
+	chk->send_size += sizeof(struct sctp_pktdrop_chunk);
+	SCTP_BUF_LEN(chk->data) = chk->send_size;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	if (net) {
+		/* we should hit here */
+		chk->whoTo = net;
+	} else {
+		chk->whoTo = asoc->primary_destination;
+	}
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	chk->rec.chunk_id.id = SCTP_PACKET_DROPPED;
+	chk->rec.chunk_id.can_take_data = 1;
+	drp->ch.chunk_type = SCTP_PACKET_DROPPED;
+	drp->ch.chunk_length = htons(chk->send_size);
+	spc = SCTP_SB_LIMIT_RCV(stcb->sctp_socket);
+	if (spc < 0) {
+		spc = 0;
+	}
+	drp->bottle_bw = htonl(spc);
+	if (asoc->my_rwnd) {
+		drp->current_onq = htonl(asoc->size_on_reasm_queue +
+		    asoc->size_on_all_streams +
+		    asoc->my_rwnd_control_len +
+		    stcb->sctp_socket->so_rcv.sb_cc);
+	} else {
+		/*-
+		 * If my rwnd is 0, possibly from mbuf depletion as well as
+		 * space used, tell the peer there is NO space aka onq == bw
+		 */
+		drp->current_onq = htonl(spc);
+	}
+	drp->reserved = 0;
+	datap = drp->data;
+	m_copydata(m, iphlen, len, (caddr_t)datap);
+	TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next);
+	asoc->ctrl_queue_cnt++;
+}
+
+void
+sctp_send_cwr(struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t high_tsn)
+{
+	struct sctp_association *asoc;
+	struct sctp_cwr_chunk *cwr;
+	struct sctp_tmit_chunk *chk;
+
+	asoc = &stcb->asoc;
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
+		if (chk->rec.chunk_id.id == SCTP_ECN_CWR) {
+			/* found a previous ECN_CWR update it if needed */
+			cwr = mtod(chk->data, struct sctp_cwr_chunk *);
+			if (compare_with_wrap(high_tsn, ntohl(cwr->tsn),
+			    MAX_TSN)) {
+				cwr->tsn = htonl(high_tsn);
+			}
+			return;
+		}
+	}
+	/* nope could not find one to update so we must build one */
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		return;
+	}
+	chk->copy_by_ref = 0;
+	chk->rec.chunk_id.id = SCTP_ECN_CWR;
+	chk->rec.chunk_id.can_take_data = 1;
+	chk->asoc = &stcb->asoc;
+	chk->send_size = sizeof(struct sctp_cwr_chunk);
+	chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_DONTWAIT, 1, MT_HEADER);
+	if (chk->data == NULL) {
+		sctp_free_a_chunk(stcb, chk);
+		return;
+	}
+	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+	SCTP_BUF_LEN(chk->data) = chk->send_size;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->whoTo = net;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	cwr = mtod(chk->data, struct sctp_cwr_chunk *);
+	cwr->ch.chunk_type = SCTP_ECN_CWR;
+	cwr->ch.chunk_flags = 0;
+	cwr->ch.chunk_length = htons(sizeof(struct sctp_cwr_chunk));
+	cwr->tsn = htonl(high_tsn);
+	TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next);
+	asoc->ctrl_queue_cnt++;
+}
+
+void
+sctp_add_stream_reset_out(struct sctp_tmit_chunk *chk,
+    int number_entries, uint16_t * list,
+    uint32_t seq, uint32_t resp_seq, uint32_t last_sent)
+{
+	int len, old_len, i;
+	struct sctp_stream_reset_out_request *req_out;
+	struct sctp_chunkhdr *ch;
+
+	ch = mtod(chk->data, struct sctp_chunkhdr *);
+
+
+	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
+
+	/* get to new offset for the param. */
+	req_out = (struct sctp_stream_reset_out_request *)((caddr_t)ch + len);
+	/* now how long will this param be? */
+	len = (sizeof(struct sctp_stream_reset_out_request) + (sizeof(uint16_t) * number_entries));
+	req_out->ph.param_type = htons(SCTP_STR_RESET_OUT_REQUEST);
+	req_out->ph.param_length = htons(len);
+	req_out->request_seq = htonl(seq);
+	req_out->response_seq = htonl(resp_seq);
+	req_out->send_reset_at_tsn = htonl(last_sent);
+	if (number_entries) {
+		for (i = 0; i < number_entries; i++) {
+			req_out->list_of_streams[i] = htons(list[i]);
+		}
+	}
+	if (SCTP_SIZE32(len) > len) {
+		/*-
+		 * Need to worry about the pad we may end up adding to the
+		 * end. This is easy since the struct is either aligned to 4
+		 * bytes or 2 bytes off.
+		 */
+		req_out->list_of_streams[number_entries] = 0;
+	}
+	/* now fix the chunk length */
+	ch->chunk_length = htons(len + old_len);
+	chk->book_size = len + old_len;
+	chk->book_size_scale = 0;
+	chk->send_size = SCTP_SIZE32(chk->book_size);
+	SCTP_BUF_LEN(chk->data) = chk->send_size;
+	return;
+}
+
+
+void
+sctp_add_stream_reset_in(struct sctp_tmit_chunk *chk,
+    int number_entries, uint16_t * list,
+    uint32_t seq)
+{
+	int len, old_len, i;
+	struct sctp_stream_reset_in_request *req_in;
+	struct sctp_chunkhdr *ch;
+
+	ch = mtod(chk->data, struct sctp_chunkhdr *);
+
+
+	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
+
+	/* get to new offset for the param. */
+	req_in = (struct sctp_stream_reset_in_request *)((caddr_t)ch + len);
+	/* now how long will this param be? */
+	len = (sizeof(struct sctp_stream_reset_in_request) + (sizeof(uint16_t) * number_entries));
+	req_in->ph.param_type = htons(SCTP_STR_RESET_IN_REQUEST);
+	req_in->ph.param_length = htons(len);
+	req_in->request_seq = htonl(seq);
+	if (number_entries) {
+		for (i = 0; i < number_entries; i++) {
+			req_in->list_of_streams[i] = htons(list[i]);
+		}
+	}
+	if (SCTP_SIZE32(len) > len) {
+		/*-
+		 * Need to worry about the pad we may end up adding to the
+		 * end. This is easy since the struct is either aligned to 4
+		 * bytes or 2 bytes off.
+		 */
+		req_in->list_of_streams[number_entries] = 0;
+	}
+	/* now fix the chunk length */
+	ch->chunk_length = htons(len + old_len);
+	chk->book_size = len + old_len;
+	chk->book_size_scale = 0;
+	chk->send_size = SCTP_SIZE32(chk->book_size);
+	SCTP_BUF_LEN(chk->data) = chk->send_size;
+	return;
+}
+
+
+void
+sctp_add_stream_reset_tsn(struct sctp_tmit_chunk *chk,
+    uint32_t seq)
+{
+	int len, old_len;
+	struct sctp_stream_reset_tsn_request *req_tsn;
+	struct sctp_chunkhdr *ch;
+
+	ch = mtod(chk->data, struct sctp_chunkhdr *);
+
+
+	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
+
+	/* get to new offset for the param. */
+	req_tsn = (struct sctp_stream_reset_tsn_request *)((caddr_t)ch + len);
+	/* now how long will this param be? */
+	len = sizeof(struct sctp_stream_reset_tsn_request);
+	req_tsn->ph.param_type = htons(SCTP_STR_RESET_TSN_REQUEST);
+	req_tsn->ph.param_length = htons(len);
+	req_tsn->request_seq = htonl(seq);
+
+	/* now fix the chunk length */
+	ch->chunk_length = htons(len + old_len);
+	chk->send_size = len + old_len;
+	chk->book_size = SCTP_SIZE32(chk->send_size);
+	chk->book_size_scale = 0;
+	SCTP_BUF_LEN(chk->data) = SCTP_SIZE32(chk->send_size);
+	return;
+}
+
+void
+sctp_add_stream_reset_result(struct sctp_tmit_chunk *chk,
+    uint32_t resp_seq, uint32_t result)
+{
+	int len, old_len;
+	struct sctp_stream_reset_response *resp;
+	struct sctp_chunkhdr *ch;
+
+	ch = mtod(chk->data, struct sctp_chunkhdr *);
+
+
+	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
+
+	/* get to new offset for the param. */
+	resp = (struct sctp_stream_reset_response *)((caddr_t)ch + len);
+	/* now how long will this param be? */
+	len = sizeof(struct sctp_stream_reset_response);
+	resp->ph.param_type = htons(SCTP_STR_RESET_RESPONSE);
+	resp->ph.param_length = htons(len);
+	resp->response_seq = htonl(resp_seq);
+	resp->result = ntohl(result);
+
+	/* now fix the chunk length */
+	ch->chunk_length = htons(len + old_len);
+	chk->book_size = len + old_len;
+	chk->book_size_scale = 0;
+	chk->send_size = SCTP_SIZE32(chk->book_size);
+	SCTP_BUF_LEN(chk->data) = chk->send_size;
+	return;
+
+}
+
+
+void
+sctp_add_stream_reset_result_tsn(struct sctp_tmit_chunk *chk,
+    uint32_t resp_seq, uint32_t result,
+    uint32_t send_una, uint32_t recv_next)
+{
+	int len, old_len;
+	struct sctp_stream_reset_response_tsn *resp;
+	struct sctp_chunkhdr *ch;
+
+	ch = mtod(chk->data, struct sctp_chunkhdr *);
+
+
+	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
+
+	/* get to new offset for the param. */
+	resp = (struct sctp_stream_reset_response_tsn *)((caddr_t)ch + len);
+	/* now how long will this param be? */
+	len = sizeof(struct sctp_stream_reset_response_tsn);
+	resp->ph.param_type = htons(SCTP_STR_RESET_RESPONSE);
+	resp->ph.param_length = htons(len);
+	resp->response_seq = htonl(resp_seq);
+	resp->result = htonl(result);
+	resp->senders_next_tsn = htonl(send_una);
+	resp->receivers_next_tsn = htonl(recv_next);
+
+	/* now fix the chunk length */
+	ch->chunk_length = htons(len + old_len);
+	chk->book_size = len + old_len;
+	chk->send_size = SCTP_SIZE32(chk->book_size);
+	chk->book_size_scale = 0;
+	SCTP_BUF_LEN(chk->data) = chk->send_size;
+	return;
+}
+
+static void
+sctp_add_a_stream(struct sctp_tmit_chunk *chk,
+    uint32_t seq,
+    uint16_t adding)
+{
+	int len, old_len;
+	struct sctp_chunkhdr *ch;
+	struct sctp_stream_reset_add_strm *addstr;
+
+	ch = mtod(chk->data, struct sctp_chunkhdr *);
+	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
+
+	/* get to new offset for the param. */
+	addstr = (struct sctp_stream_reset_add_strm *)((caddr_t)ch + len);
+	/* now how long will this param be? */
+	len = sizeof(struct sctp_stream_reset_add_strm);
+
+	/* Fill it out. */
+	addstr->ph.param_type = htons(SCTP_STR_RESET_ADD_STREAMS);
+	addstr->ph.param_length = htons(len);
+	addstr->request_seq = htonl(seq);
+	addstr->number_of_streams = htons(adding);
+	addstr->reserved = 0;
+
+	/* now fix the chunk length */
+	ch->chunk_length = htons(len + old_len);
+	chk->send_size = len + old_len;
+	chk->book_size = SCTP_SIZE32(chk->send_size);
+	chk->book_size_scale = 0;
+	SCTP_BUF_LEN(chk->data) = SCTP_SIZE32(chk->send_size);
+	return;
+}
+
+int
+sctp_send_str_reset_req(struct sctp_tcb *stcb,
+    int number_entries, uint16_t * list,
+    uint8_t send_out_req,
+    uint32_t resp_seq,
+    uint8_t send_in_req,
+    uint8_t send_tsn_req,
+    uint8_t add_stream,
+    uint16_t adding
+)
+{
+
+	struct sctp_association *asoc;
+	struct sctp_tmit_chunk *chk;
+	struct sctp_chunkhdr *ch;
+	uint32_t seq;
+
+	asoc = &stcb->asoc;
+	if (asoc->stream_reset_outstanding) {
+		/*-
+		 * Already one pending, must get ACK back to clear the flag.
+		 */
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EBUSY);
+		return (EBUSY);
+	}
+	if ((send_out_req == 0) && (send_in_req == 0) && (send_tsn_req == 0) &&
+	    (add_stream == 0)) {
+		/* nothing to do */
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+		return (EINVAL);
+	}
+	if (send_tsn_req && (send_out_req || send_in_req)) {
+		/* error, can't do that */
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+		return (EINVAL);
+	}
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+		return (ENOMEM);
+	}
+	chk->copy_by_ref = 0;
+	chk->rec.chunk_id.id = SCTP_STREAM_RESET;
+	chk->rec.chunk_id.can_take_data = 0;
+	chk->asoc = &stcb->asoc;
+	chk->book_size = sizeof(struct sctp_chunkhdr);
+	chk->send_size = SCTP_SIZE32(chk->book_size);
+	chk->book_size_scale = 0;
+
+	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	if (chk->data == NULL) {
+		sctp_free_a_chunk(stcb, chk);
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+		return (ENOMEM);
+	}
+	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+
+	/* setup chunk parameters */
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->whoTo = asoc->primary_destination;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+
+	ch = mtod(chk->data, struct sctp_chunkhdr *);
+	ch->chunk_type = SCTP_STREAM_RESET;
+	ch->chunk_flags = 0;
+	ch->chunk_length = htons(chk->book_size);
+	SCTP_BUF_LEN(chk->data) = chk->send_size;
+
+	seq = stcb->asoc.str_reset_seq_out;
+	if (send_out_req) {
+		sctp_add_stream_reset_out(chk, number_entries, list,
+		    seq, resp_seq, (stcb->asoc.sending_seq - 1));
+		asoc->stream_reset_out_is_outstanding = 1;
+		seq++;
+		asoc->stream_reset_outstanding++;
+	}
+	if (add_stream) {
+		sctp_add_a_stream(chk, seq, adding);
+		seq++;
+		asoc->stream_reset_outstanding++;
+	}
+	if (send_in_req) {
+		sctp_add_stream_reset_in(chk, number_entries, list, seq);
+		asoc->stream_reset_outstanding++;
+	}
+	if (send_tsn_req) {
+		sctp_add_stream_reset_tsn(chk, seq);
+		asoc->stream_reset_outstanding++;
+	}
+	asoc->str_reset = chk;
+
+	/* insert the chunk for sending */
+	TAILQ_INSERT_TAIL(&asoc->control_send_queue,
+	    chk,
+	    sctp_next);
+	asoc->ctrl_queue_cnt++;
+	sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo);
+	return (0);
+}
+
+void
+sctp_send_abort(struct mbuf *m, int iphlen, struct sctphdr *sh, uint32_t vtag,
+    struct mbuf *err_cause, uint32_t vrf_id, uint16_t port)
+{
+	/*-
+	 * Formulate the abort message, and send it back down.
+	 */
+	struct mbuf *o_pak;
+	struct mbuf *mout;
+	struct sctp_abort_msg *abm;
+	struct ip *iph, *iph_out;
+	struct udphdr *udp;
+
+#ifdef INET6
+	struct ip6_hdr *ip6, *ip6_out;
+
+#endif
+	int iphlen_out, len;
+
+	/* don't respond to ABORT with ABORT */
+	if (sctp_is_there_an_abort_here(m, iphlen, &vtag)) {
+		if (err_cause)
+			sctp_m_freem(err_cause);
+		return;
+	}
+	iph = mtod(m, struct ip *);
+	switch (iph->ip_v) {
+	case IPVERSION:
+		len = (sizeof(struct ip) + sizeof(struct sctp_abort_msg));
+		break;
+#ifdef INET6
+	case IPV6_VERSION >> 4:
+		len = (sizeof(struct ip6_hdr) + sizeof(struct sctp_abort_msg));
+		break;
+#endif
+	default:
+		if (err_cause) {
+			sctp_m_freem(err_cause);
+		}
+		return;
+	}
+	if (port) {
+		len += sizeof(struct udphdr);
+	}
+	mout = sctp_get_mbuf_for_msg(len + max_linkhdr, 1, M_DONTWAIT, 1, MT_DATA);
+	if (mout == NULL) {
+		if (err_cause) {
+			sctp_m_freem(err_cause);
+		}
+		return;
+	}
+	SCTP_BUF_RESV_UF(mout, max_linkhdr);
+	SCTP_BUF_LEN(mout) = len;
+	SCTP_BUF_NEXT(mout) = err_cause;
+	iph_out = NULL;
+#ifdef INET6
+	ip6_out = NULL;
+#endif
+	switch (iph->ip_v) {
+	case IPVERSION:
+		iph_out = mtod(mout, struct ip *);
+
+		/* Fill in the IP header for the ABORT */
+		iph_out->ip_v = IPVERSION;
+		iph_out->ip_hl = (sizeof(struct ip) / 4);
+		iph_out->ip_tos = (u_char)0;
+		iph_out->ip_id = 0;
+		iph_out->ip_off = 0;
+		iph_out->ip_ttl = MAXTTL;
+		if (port) {
+			iph_out->ip_p = IPPROTO_UDP;
+		} else {
+			iph_out->ip_p = IPPROTO_SCTP;
+		}
+		iph_out->ip_src.s_addr = iph->ip_dst.s_addr;
+		iph_out->ip_dst.s_addr = iph->ip_src.s_addr;
+		/* let IP layer calculate this */
+		iph_out->ip_sum = 0;
+
+		iphlen_out = sizeof(*iph_out);
+		abm = (struct sctp_abort_msg *)((caddr_t)iph_out + iphlen_out);
+		break;
+#ifdef INET6
+	case IPV6_VERSION >> 4:
+		ip6 = (struct ip6_hdr *)iph;
+		ip6_out = mtod(mout, struct ip6_hdr *);
+
+		/* Fill in the IP6 header for the ABORT */
+		ip6_out->ip6_flow = ip6->ip6_flow;
+		ip6_out->ip6_hlim = MODULE_GLOBAL(ip6_defhlim);
+		if (port) {
+			ip6_out->ip6_nxt = IPPROTO_UDP;
+		} else {
+			ip6_out->ip6_nxt = IPPROTO_SCTP;
+		}
+		ip6_out->ip6_src = ip6->ip6_dst;
+		ip6_out->ip6_dst = ip6->ip6_src;
+
+		iphlen_out = sizeof(*ip6_out);
+		abm = (struct sctp_abort_msg *)((caddr_t)ip6_out + iphlen_out);
+		break;
+#endif				/* INET6 */
+	default:
+		/* Currently not supported */
+		sctp_m_freem(mout);
+		return;
+	}
+
+	udp = (struct udphdr *)abm;
+	if (port) {
+		udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port));
+		udp->uh_dport = port;
+		/* set udp->uh_ulen later */
+		udp->uh_sum = 0;
+		iphlen_out += sizeof(struct udphdr);
+		abm = (struct sctp_abort_msg *)((caddr_t)abm + sizeof(struct udphdr));
+	}
+	abm->sh.src_port = sh->dest_port;
+	abm->sh.dest_port = sh->src_port;
+	abm->sh.checksum = 0;
+	if (vtag == 0) {
+		abm->sh.v_tag = sh->v_tag;
+		abm->msg.ch.chunk_flags = SCTP_HAD_NO_TCB;
+	} else {
+		abm->sh.v_tag = htonl(vtag);
+		abm->msg.ch.chunk_flags = 0;
+	}
+	abm->msg.ch.chunk_type = SCTP_ABORT_ASSOCIATION;
+
+	if (err_cause) {
+		struct mbuf *m_tmp = err_cause;
+		int err_len = 0;
+
+		/* get length of the err_cause chain */
+		while (m_tmp != NULL) {
+			err_len += SCTP_BUF_LEN(m_tmp);
+			m_tmp = SCTP_BUF_NEXT(m_tmp);
+		}
+		len = SCTP_BUF_LEN(mout) + err_len;
+		if (err_len % 4) {
+			/* need pad at end of chunk */
+			uint32_t cpthis = 0;
+			int padlen;
+
+			padlen = 4 - (len % 4);
+			m_copyback(mout, len, padlen, (caddr_t)&cpthis);
+			len += padlen;
+		}
+		abm->msg.ch.chunk_length = htons(sizeof(abm->msg.ch) + err_len);
+	} else {
+		len = SCTP_BUF_LEN(mout);
+		abm->msg.ch.chunk_length = htons(sizeof(abm->msg.ch));
+	}
+
+	if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
+		/* no mbuf's */
+		sctp_m_freem(mout);
+		return;
+	}
+	if (iph_out != NULL) {
+		sctp_route_t ro;
+		int ret;
+
+		/* zap the stack pointer to the route */
+		bzero(&ro, sizeof ro);
+		if (port) {
+			udp->uh_ulen = htons(len - sizeof(struct ip));
+			udp->uh_sum = in_pseudo(iph_out->ip_src.s_addr, iph_out->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP));
+		}
+		SCTPDBG(SCTP_DEBUG_OUTPUT2, "sctp_send_abort calling ip_output:\n");
+		SCTPDBG_PKT(SCTP_DEBUG_OUTPUT2, iph_out, &abm->sh);
+		/* set IPv4 length */
+		iph_out->ip_len = len;
+		/* out it goes */
+#ifdef  SCTP_PACKET_LOGGING
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
+			sctp_packet_log(mout, len);
+#endif
+		SCTP_ATTACH_CHAIN(o_pak, mout, len);
+		if (port) {
+#if defined(SCTP_WITH_NO_CSUM)
+			SCTP_STAT_INCR(sctps_sendnocrc);
+#else
+			abm->sh.checksum = sctp_calculate_cksum(mout, iphlen_out);
+			SCTP_STAT_INCR(sctps_sendswcrc);
+#endif
+			SCTP_ENABLE_UDP_CSUM(o_pak);
+		} else {
+#if defined(SCTP_WITH_NO_CSUM)
+			SCTP_STAT_INCR(sctps_sendnocrc);
+#else
+			mout->m_pkthdr.csum_flags = CSUM_SCTP;
+			mout->m_pkthdr.csum_data = 0;
+			SCTP_STAT_INCR(sctps_sendhwcrc);
+#endif
+		}
+		SCTP_IP_OUTPUT(ret, o_pak, &ro, NULL, vrf_id);
+
+		/* Free the route if we got one back */
+		if (ro.ro_rt)
+			RTFREE(ro.ro_rt);
+	}
+#ifdef INET6
+	if (ip6_out != NULL) {
+		struct route_in6 ro;
+		int ret;
+		struct ifnet *ifp = NULL;
+
+		/* zap the stack pointer to the route */
+		bzero(&ro, sizeof(ro));
+		if (port) {
+			udp->uh_ulen = htons(len - sizeof(struct ip6_hdr));
+		}
+		SCTPDBG(SCTP_DEBUG_OUTPUT2, "sctp_send_abort calling ip6_output:\n");
+		SCTPDBG_PKT(SCTP_DEBUG_OUTPUT2, (struct ip *)ip6_out, &abm->sh);
+		ip6_out->ip6_plen = len - sizeof(*ip6_out);
+#ifdef  SCTP_PACKET_LOGGING
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
+			sctp_packet_log(mout, len);
+#endif
+		SCTP_ATTACH_CHAIN(o_pak, mout, len);
+		if (port) {
+#if defined(SCTP_WITH_NO_CSUM)
+			SCTP_STAT_INCR(sctps_sendnocrc);
+#else
+			abm->sh.checksum = sctp_calculate_cksum(mout, sizeof(struct ip6_hdr) + sizeof(struct udphdr));
+			SCTP_STAT_INCR(sctps_sendswcrc);
+#endif
+			if ((udp->uh_sum = in6_cksum(o_pak, IPPROTO_UDP, sizeof(struct ip6_hdr), len - sizeof(struct ip6_hdr))) == 0) {
+				udp->uh_sum = 0xffff;
+			}
+		} else {
+#if defined(SCTP_WITH_NO_CSUM)
+			SCTP_STAT_INCR(sctps_sendnocrc);
+#else
+			mout->m_pkthdr.csum_flags = CSUM_SCTP;
+			mout->m_pkthdr.csum_data = 0;
+			SCTP_STAT_INCR(sctps_sendhwcrc);
+#endif
+		}
+		SCTP_IP6_OUTPUT(ret, o_pak, &ro, &ifp, NULL, vrf_id);
+
+		/* Free the route if we got one back */
+		if (ro.ro_rt)
+			RTFREE(ro.ro_rt);
+	}
+#endif
+	SCTP_STAT_INCR(sctps_sendpackets);
+	SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
+	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+}
+
+void
+sctp_send_operr_to(struct mbuf *m, int iphlen, struct mbuf *scm, uint32_t vtag,
+    uint32_t vrf_id, uint16_t port)
+{
+	struct mbuf *o_pak;
+	struct sctphdr *sh, *sh_out;
+	struct sctp_chunkhdr *ch;
+	struct ip *iph, *iph_out;
+	struct udphdr *udp = NULL;
+	struct mbuf *mout;
+
+#ifdef INET6
+	struct ip6_hdr *ip6, *ip6_out;
+
+#endif
+	int iphlen_out, len;
+
+	iph = mtod(m, struct ip *);
+	sh = (struct sctphdr *)((caddr_t)iph + iphlen);
+	switch (iph->ip_v) {
+	case IPVERSION:
+		len = (sizeof(struct ip) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr));
+		break;
+#ifdef INET6
+	case IPV6_VERSION >> 4:
+		len = (sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr));
+		break;
+#endif
+	default:
+		if (scm) {
+			sctp_m_freem(scm);
+		}
+		return;
+	}
+	if (port) {
+		len += sizeof(struct udphdr);
+	}
+	mout = sctp_get_mbuf_for_msg(len + max_linkhdr, 1, M_DONTWAIT, 1, MT_DATA);
+	if (mout == NULL) {
+		if (scm) {
+			sctp_m_freem(scm);
+		}
+		return;
+	}
+	SCTP_BUF_RESV_UF(mout, max_linkhdr);
+	SCTP_BUF_LEN(mout) = len;
+	SCTP_BUF_NEXT(mout) = scm;
+	iph_out = NULL;
+#ifdef INET6
+	ip6_out = NULL;
+#endif
+	switch (iph->ip_v) {
+	case IPVERSION:
+		iph_out = mtod(mout, struct ip *);
+
+		/* Fill in the IP header for the ABORT */
+		iph_out->ip_v = IPVERSION;
+		iph_out->ip_hl = (sizeof(struct ip) / 4);
+		iph_out->ip_tos = (u_char)0;
+		iph_out->ip_id = 0;
+		iph_out->ip_off = 0;
+		iph_out->ip_ttl = MAXTTL;
+		if (port) {
+			iph_out->ip_p = IPPROTO_UDP;
+		} else {
+			iph_out->ip_p = IPPROTO_SCTP;
+		}
+		iph_out->ip_src.s_addr = iph->ip_dst.s_addr;
+		iph_out->ip_dst.s_addr = iph->ip_src.s_addr;
+		/* let IP layer calculate this */
+		iph_out->ip_sum = 0;
+
+		iphlen_out = sizeof(struct ip);
+		sh_out = (struct sctphdr *)((caddr_t)iph_out + iphlen_out);
+		break;
+#ifdef INET6
+	case IPV6_VERSION >> 4:
+		ip6 = (struct ip6_hdr *)iph;
+		ip6_out = mtod(mout, struct ip6_hdr *);
+
+		/* Fill in the IP6 header for the ABORT */
+		ip6_out->ip6_flow = ip6->ip6_flow;
+		ip6_out->ip6_hlim = MODULE_GLOBAL(ip6_defhlim);
+		if (port) {
+			ip6_out->ip6_nxt = IPPROTO_UDP;
+		} else {
+			ip6_out->ip6_nxt = IPPROTO_SCTP;
+		}
+		ip6_out->ip6_src = ip6->ip6_dst;
+		ip6_out->ip6_dst = ip6->ip6_src;
+
+		iphlen_out = sizeof(struct ip6_hdr);
+		sh_out = (struct sctphdr *)((caddr_t)ip6_out + iphlen_out);
+		break;
+#endif				/* INET6 */
+	default:
+		/* Currently not supported */
+		sctp_m_freem(mout);
+		return;
+	}
+
+	udp = (struct udphdr *)sh_out;
+	if (port) {
+		udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port));
+		udp->uh_dport = port;
+		/* set udp->uh_ulen later */
+		udp->uh_sum = 0;
+		iphlen_out += sizeof(struct udphdr);
+		sh_out = (struct sctphdr *)((caddr_t)udp + sizeof(struct udphdr));
+	}
+	sh_out->src_port = sh->dest_port;
+	sh_out->dest_port = sh->src_port;
+	sh_out->v_tag = vtag;
+	sh_out->checksum = 0;
+
+	ch = (struct sctp_chunkhdr *)((caddr_t)sh_out + sizeof(struct sctphdr));
+	ch->chunk_type = SCTP_OPERATION_ERROR;
+	ch->chunk_flags = 0;
+
+	if (scm) {
+		struct mbuf *m_tmp = scm;
+		int cause_len = 0;
+
+		/* get length of the err_cause chain */
+		while (m_tmp != NULL) {
+			cause_len += SCTP_BUF_LEN(m_tmp);
+			m_tmp = SCTP_BUF_NEXT(m_tmp);
+		}
+		len = SCTP_BUF_LEN(mout) + cause_len;
+		if (cause_len % 4) {
+			/* need pad at end of chunk */
+			uint32_t cpthis = 0;
+			int padlen;
+
+			padlen = 4 - (len % 4);
+			m_copyback(mout, len, padlen, (caddr_t)&cpthis);
+			len += padlen;
+		}
+		ch->chunk_length = htons(sizeof(struct sctp_chunkhdr) + cause_len);
+	} else {
+		len = SCTP_BUF_LEN(mout);
+		ch->chunk_length = htons(sizeof(struct sctp_chunkhdr));
+	}
+
+	if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
+		/* no mbuf's */
+		sctp_m_freem(mout);
+		return;
+	}
+	if (iph_out != NULL) {
+		sctp_route_t ro;
+		int ret;
+
+		/* zap the stack pointer to the route */
+		bzero(&ro, sizeof ro);
+		if (port) {
+			udp->uh_ulen = htons(len - sizeof(struct ip));
+			udp->uh_sum = in_pseudo(iph_out->ip_src.s_addr, iph_out->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP));
+		}
+		/* set IPv4 length */
+		iph_out->ip_len = len;
+		/* out it goes */
+#ifdef  SCTP_PACKET_LOGGING
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
+			sctp_packet_log(mout, len);
+#endif
+		SCTP_ATTACH_CHAIN(o_pak, mout, len);
+		if (port) {
+#if defined(SCTP_WITH_NO_CSUM)
+			SCTP_STAT_INCR(sctps_sendnocrc);
+#else
+			sh_out->checksum = sctp_calculate_cksum(mout, iphlen_out);
+			SCTP_STAT_INCR(sctps_sendswcrc);
+#endif
+			SCTP_ENABLE_UDP_CSUM(o_pak);
+		} else {
+#if defined(SCTP_WITH_NO_CSUM)
+			SCTP_STAT_INCR(sctps_sendnocrc);
+#else
+			mout->m_pkthdr.csum_flags = CSUM_SCTP;
+			mout->m_pkthdr.csum_data = 0;
+			SCTP_STAT_INCR(sctps_sendhwcrc);
+#endif
+		}
+		SCTP_IP_OUTPUT(ret, o_pak, &ro, NULL, vrf_id);
+
+		/* Free the route if we got one back */
+		if (ro.ro_rt)
+			RTFREE(ro.ro_rt);
+	}
+#ifdef INET6
+	if (ip6_out != NULL) {
+		struct route_in6 ro;
+		int ret;
+		struct ifnet *ifp = NULL;
+
+		/* zap the stack pointer to the route */
+		bzero(&ro, sizeof(ro));
+		if (port) {
+			udp->uh_ulen = htons(len - sizeof(struct ip6_hdr));
+		}
+		ip6_out->ip6_plen = len - sizeof(*ip6_out);
+#ifdef  SCTP_PACKET_LOGGING
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
+			sctp_packet_log(mout, len);
+#endif
+		SCTP_ATTACH_CHAIN(o_pak, mout, len);
+		if (port) {
+#if defined(SCTP_WITH_NO_CSUM)
+			SCTP_STAT_INCR(sctps_sendnocrc);
+#else
+			sh_out->checksum = sctp_calculate_cksum(mout, sizeof(struct ip6_hdr) + sizeof(struct udphdr));
+			SCTP_STAT_INCR(sctps_sendswcrc);
+#endif
+			if ((udp->uh_sum = in6_cksum(o_pak, IPPROTO_UDP, sizeof(struct ip6_hdr), len - sizeof(struct ip6_hdr))) == 0) {
+				udp->uh_sum = 0xffff;
+			}
+		} else {
+#if defined(SCTP_WITH_NO_CSUM)
+			SCTP_STAT_INCR(sctps_sendnocrc);
+#else
+			mout->m_pkthdr.csum_flags = CSUM_SCTP;
+			mout->m_pkthdr.csum_data = 0;
+			SCTP_STAT_INCR(sctps_sendhwcrc);
+#endif
+		}
+		SCTP_IP6_OUTPUT(ret, o_pak, &ro, &ifp, NULL, vrf_id);
+
+		/* Free the route if we got one back */
+		if (ro.ro_rt)
+			RTFREE(ro.ro_rt);
+	}
+#endif
+	SCTP_STAT_INCR(sctps_sendpackets);
+	SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
+	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+}
+
+static struct mbuf *
+sctp_copy_resume(struct sctp_stream_queue_pending *sp,
+    struct uio *uio,
+    struct sctp_sndrcvinfo *srcv,
+    int max_send_len,
+    int user_marks_eor,
+    int *error,
+    uint32_t * sndout,
+    struct mbuf **new_tail)
+{
+	struct mbuf *m;
+
+	m = m_uiotombuf(uio, M_WAITOK, max_send_len, 0,
+	    (M_PKTHDR | (user_marks_eor ? M_EOR : 0)));
+	if (m == NULL) {
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+		*error = ENOMEM;
+	} else {
+		*sndout = m_length(m, NULL);
+		*new_tail = m_last(m);
+	}
+	return (m);
+}
+
+static int
+sctp_copy_one(struct sctp_stream_queue_pending *sp,
+    struct uio *uio,
+    int resv_upfront)
+{
+	int left;
+
+	left = sp->length;
+	sp->data = m_uiotombuf(uio, M_WAITOK, sp->length,
+	    resv_upfront, 0);
+	if (sp->data == NULL) {
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+		return (ENOMEM);
+	}
+	sp->tail_mbuf = m_last(sp->data);
+	return (0);
+}
+
+
+
+static struct sctp_stream_queue_pending *
+sctp_copy_it_in(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    struct sctp_sndrcvinfo *srcv,
+    struct uio *uio,
+    struct sctp_nets *net,
+    int max_send_len,
+    int user_marks_eor,
+    int *error,
+    int non_blocking)
+{
+	/*-
+	 * This routine must be very careful in its work. Protocol
+	 * processing is up and running so care must be taken to spl...()
+	 * when you need to do something that may effect the stcb/asoc. The
+	 * sb is locked however. When data is copied the protocol processing
+	 * should be enabled since this is a slower operation...
+	 */
+	struct sctp_stream_queue_pending *sp = NULL;
+	int resv_in_first;
+
+	*error = 0;
+	/* Now can we send this? */
+	if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) ||
+	    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
+	    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
+	    (asoc->state & SCTP_STATE_SHUTDOWN_PENDING)) {
+		/* got data while shutting down */
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
+		*error = ECONNRESET;
+		goto out_now;
+	}
+	sctp_alloc_a_strmoq(stcb, sp);
+	if (sp == NULL) {
+		SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+		*error = ENOMEM;
+		goto out_now;
+	}
+	sp->act_flags = 0;
+	sp->sender_all_done = 0;
+	sp->sinfo_flags = srcv->sinfo_flags;
+	sp->timetolive = srcv->sinfo_timetolive;
+	sp->ppid = srcv->sinfo_ppid;
+	sp->context = srcv->sinfo_context;
+	sp->strseq = 0;
+	(void)SCTP_GETTIME_TIMEVAL(&sp->ts);
+
+	sp->stream = srcv->sinfo_stream;
+	sp->length = min(uio->uio_resid, max_send_len);
+	if ((sp->length == (uint32_t) uio->uio_resid) &&
+	    ((user_marks_eor == 0) ||
+	    (srcv->sinfo_flags & SCTP_EOF) ||
+	    (user_marks_eor && (srcv->sinfo_flags & SCTP_EOR)))) {
+		sp->msg_is_complete = 1;
+	} else {
+		sp->msg_is_complete = 0;
+	}
+	sp->sender_all_done = 0;
+	sp->some_taken = 0;
+	sp->put_last_out = 0;
+	resv_in_first = sizeof(struct sctp_data_chunk);
+	sp->data = sp->tail_mbuf = NULL;
+	if (sp->length == 0) {
+		*error = 0;
+		goto skip_copy;
+	}
+	sp->auth_keyid = stcb->asoc.authinfo.active_keyid;
+	if (sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks)) {
+		sctp_auth_key_acquire(stcb, stcb->asoc.authinfo.active_keyid);
+		sp->holds_key_ref = 1;
+	}
+	*error = sctp_copy_one(sp, uio, resv_in_first);
+skip_copy:
+	if (*error) {
+		sctp_free_a_strmoq(stcb, sp);
+		sp = NULL;
+	} else {
+		if (sp->sinfo_flags & SCTP_ADDR_OVER) {
+			sp->net = net;
+			atomic_add_int(&sp->net->ref_count, 1);
+		} else {
+			sp->net = NULL;
+		}
+		sctp_set_prsctp_policy(sp);
+	}
+out_now:
+	return (sp);
+}
+
+
+int
+sctp_sosend(struct socket *so,
+    struct sockaddr *addr,
+    struct uio *uio,
+    struct mbuf *top,
+    struct mbuf *control,
+    int flags,
+    struct thread *p
+)
+{
+	int error, use_rcvinfo = 0;
+	struct sctp_sndrcvinfo srcv;
+	struct sockaddr *addr_to_use;
+
+#if defined(INET) && defined(INET6)
+	struct sockaddr_in sin;
+
+#endif
+
+	if (control) {
+		/* process cmsg snd/rcv info (maybe a assoc-id) */
+		if (sctp_find_cmsg(SCTP_SNDRCV, (void *)&srcv, control,
+		    sizeof(srcv))) {
+			/* got one */
+			use_rcvinfo = 1;
+		}
+	}
+	addr_to_use = addr;
+#if defined(INET) && defined(INET6)
+	if ((addr) && (addr->sa_family == AF_INET6)) {
+		struct sockaddr_in6 *sin6;
+
+		sin6 = (struct sockaddr_in6 *)addr;
+		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+			in6_sin6_2_sin(&sin, sin6);
+			addr_to_use = (struct sockaddr *)&sin;
+		}
+	}
+#endif
+	error = sctp_lower_sosend(so, addr_to_use, uio, top,
+	    control,
+	    flags,
+	    use_rcvinfo ? &srcv : NULL
+	    ,p
+	    );
+	return (error);
+}
+
+
+int
+sctp_lower_sosend(struct socket *so,
+    struct sockaddr *addr,
+    struct uio *uio,
+    struct mbuf *i_pak,
+    struct mbuf *control,
+    int flags,
+    struct sctp_sndrcvinfo *srcv
+    ,
+    struct thread *p
+)
+{
+	unsigned int sndlen = 0, max_len;
+	int error, len;
+	struct mbuf *top = NULL;
+	int queue_only = 0, queue_only_for_init = 0;
+	int free_cnt_applied = 0;
+	int un_sent;
+	int now_filled = 0;
+	unsigned int inqueue_bytes = 0;
+	struct sctp_block_entry be;
+	struct sctp_inpcb *inp;
+	struct sctp_tcb *stcb = NULL;
+	struct timeval now;
+	struct sctp_nets *net;
+	struct sctp_association *asoc;
+	struct sctp_inpcb *t_inp;
+	int user_marks_eor;
+	int create_lock_applied = 0;
+	int nagle_applies = 0;
+	int some_on_control = 0;
+	int got_all_of_the_send = 0;
+	int hold_tcblock = 0;
+	int non_blocking = 0;
+	uint32_t local_add_more, local_soresv = 0;
+	uint16_t port;
+	uint16_t sinfo_flags;
+	sctp_assoc_t sinfo_assoc_id;
+
+	error = 0;
+	net = NULL;
+	stcb = NULL;
+	asoc = NULL;
+
+	t_inp = inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == NULL) {
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+		error = EINVAL;
+		if (i_pak) {
+			SCTP_RELEASE_PKT(i_pak);
+		}
+		return (error);
+	}
+	if ((uio == NULL) && (i_pak == NULL)) {
+		SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+		return (EINVAL);
+	}
+	user_marks_eor = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR);
+	atomic_add_int(&inp->total_sends, 1);
+	if (uio) {
+		if (uio->uio_resid < 0) {
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+			return (EINVAL);
+		}
+		sndlen = uio->uio_resid;
+	} else {
+		top = SCTP_HEADER_TO_CHAIN(i_pak);
+		sndlen = SCTP_HEADER_LEN(i_pak);
+	}
+	SCTPDBG(SCTP_DEBUG_OUTPUT1, "Send called addr:%p send length %d\n",
+	    addr,
+	    sndlen);
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
+	    (inp->sctp_socket->so_qlimit)) {
+		/* The listener can NOT send */
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOTCONN);
+		error = ENOTCONN;
+		goto out_unlocked;
+	}
+	/**
+	 * Pre-screen address, if one is given the sin-len
+	 * must be set correctly!
+	 */
+	if (addr) {
+		union sctp_sockstore *raddr = (union sctp_sockstore *)addr;
+
+		switch (raddr->sa.sa_family) {
+#if defined(INET)
+		case AF_INET:
+			if (raddr->sin.sin_len != sizeof(struct sockaddr_in)) {
+				SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+				error = EINVAL;
+				goto out_unlocked;
+			}
+			port = raddr->sin.sin_port;
+			break;
+#endif
+#if defined(INET6)
+		case AF_INET6:
+			if (raddr->sin6.sin6_len != sizeof(struct sockaddr_in6)) {
+				SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+				error = EINVAL;
+				goto out_unlocked;
+			}
+			port = raddr->sin6.sin6_port;
+			break;
+#endif
+		default:
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EAFNOSUPPORT);
+			error = EAFNOSUPPORT;
+			goto out_unlocked;
+		}
+	} else
+		port = 0;
+
+	if (srcv) {
+		sinfo_flags = srcv->sinfo_flags;
+		sinfo_assoc_id = srcv->sinfo_assoc_id;
+		if (INVALID_SINFO_FLAG(sinfo_flags) ||
+		    PR_SCTP_INVALID_POLICY(sinfo_flags)) {
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+			error = EINVAL;
+			goto out_unlocked;
+		}
+		if (srcv->sinfo_flags)
+			SCTP_STAT_INCR(sctps_sends_with_flags);
+	} else {
+		sinfo_flags = inp->def_send.sinfo_flags;
+		sinfo_assoc_id = inp->def_send.sinfo_assoc_id;
+	}
+	if (sinfo_flags & SCTP_SENDALL) {
+		/* its a sendall */
+		error = sctp_sendall(inp, uio, top, srcv);
+		top = NULL;
+		goto out_unlocked;
+	}
+	if ((sinfo_flags & SCTP_ADDR_OVER) && (addr == NULL)) {
+		SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+		error = EINVAL;
+		goto out_unlocked;
+	}
+	/* now we must find the assoc */
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+		SCTP_INP_RLOCK(inp);
+		stcb = LIST_FIRST(&inp->sctp_asoc_list);
+		if (stcb == NULL) {
+			SCTP_INP_RUNLOCK(inp);
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOTCONN);
+			error = ENOTCONN;
+			goto out_unlocked;
+		}
+		SCTP_TCB_LOCK(stcb);
+		hold_tcblock = 1;
+		SCTP_INP_RUNLOCK(inp);
+	} else if (sinfo_assoc_id) {
+		stcb = sctp_findassociation_ep_asocid(inp, sinfo_assoc_id, 0);
+	} else if (addr) {
+		/*-
+		 * Since we did not use findep we must
+		 * increment it, and if we don't find a tcb
+		 * decrement it.
+		 */
+		SCTP_INP_WLOCK(inp);
+		SCTP_INP_INCR_REF(inp);
+		SCTP_INP_WUNLOCK(inp);
+		stcb = sctp_findassociation_ep_addr(&t_inp, addr, &net, NULL, NULL);
+		if (stcb == NULL) {
+			SCTP_INP_WLOCK(inp);
+			SCTP_INP_DECR_REF(inp);
+			SCTP_INP_WUNLOCK(inp);
+		} else {
+			hold_tcblock = 1;
+		}
+	}
+	if ((stcb == NULL) && (addr)) {
+		/* Possible implicit send? */
+		SCTP_ASOC_CREATE_LOCK(inp);
+		create_lock_applied = 1;
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+		    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
+			/* Should I really unlock ? */
+			SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+			error = EINVAL;
+			goto out_unlocked;
+
+		}
+		if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) &&
+		    (addr->sa_family == AF_INET6)) {
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+			error = EINVAL;
+			goto out_unlocked;
+		}
+		SCTP_INP_WLOCK(inp);
+		SCTP_INP_INCR_REF(inp);
+		SCTP_INP_WUNLOCK(inp);
+		/* With the lock applied look again */
+		stcb = sctp_findassociation_ep_addr(&t_inp, addr, &net, NULL, NULL);
+		if (stcb == NULL) {
+			SCTP_INP_WLOCK(inp);
+			SCTP_INP_DECR_REF(inp);
+			SCTP_INP_WUNLOCK(inp);
+		} else {
+			hold_tcblock = 1;
+		}
+		if (t_inp != inp) {
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOTCONN);
+			error = ENOTCONN;
+			goto out_unlocked;
+		}
+	}
+	if (stcb == NULL) {
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+		    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOTCONN);
+			error = ENOTCONN;
+			goto out_unlocked;
+		}
+		if (addr == NULL) {
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOENT);
+			error = ENOENT;
+			goto out_unlocked;
+		} else {
+			/*
+			 * UDP style, we must go ahead and start the INIT
+			 * process
+			 */
+			uint32_t vrf_id;
+
+			if ((sinfo_flags & SCTP_ABORT) ||
+			    ((sinfo_flags & SCTP_EOF) && (sndlen == 0))) {
+				/*-
+				 * User asks to abort a non-existant assoc,
+				 * or EOF a non-existant assoc with no data
+				 */
+				SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOENT);
+				error = ENOENT;
+				goto out_unlocked;
+			}
+			/* get an asoc/stcb struct */
+			vrf_id = inp->def_vrf_id;
+#ifdef INVARIANTS
+			if (create_lock_applied == 0) {
+				panic("Error, should hold create lock and I don't?");
+			}
+#endif
+			stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id,
+			    p
+			    );
+			if (stcb == NULL) {
+				/* Error is setup for us in the call */
+				goto out_unlocked;
+			}
+			if (create_lock_applied) {
+				SCTP_ASOC_CREATE_UNLOCK(inp);
+				create_lock_applied = 0;
+			} else {
+				SCTP_PRINTF("Huh-3? create lock should have been on??\n");
+			}
+			/*
+			 * Turn on queue only flag to prevent data from
+			 * being sent
+			 */
+			queue_only = 1;
+			asoc = &stcb->asoc;
+			SCTP_SET_STATE(asoc, SCTP_STATE_COOKIE_WAIT);
+			(void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered);
+
+			/* initialize authentication params for the assoc */
+			sctp_initialize_auth_params(inp, stcb);
+
+			if (control) {
+				/*
+				 * see if a init structure exists in cmsg
+				 * headers
+				 */
+				struct sctp_initmsg initm;
+				int i;
+
+				if (sctp_find_cmsg(SCTP_INIT, (void *)&initm, control,
+				    sizeof(initm))) {
+					/*
+					 * we have an INIT override of the
+					 * default
+					 */
+					if (initm.sinit_max_attempts)
+						asoc->max_init_times = initm.sinit_max_attempts;
+					if (initm.sinit_num_ostreams)
+						asoc->pre_open_streams = initm.sinit_num_ostreams;
+					if (initm.sinit_max_instreams)
+						asoc->max_inbound_streams = initm.sinit_max_instreams;
+					if (initm.sinit_max_init_timeo)
+						asoc->initial_init_rto_max = initm.sinit_max_init_timeo;
+					if (asoc->streamoutcnt < asoc->pre_open_streams) {
+						struct sctp_stream_out *tmp_str;
+						int had_lock = 0;
+
+						/* Default is NOT correct */
+						SCTPDBG(SCTP_DEBUG_OUTPUT1, "Ok, defout:%d pre_open:%d\n",
+						    asoc->streamoutcnt, asoc->pre_open_streams);
+						/*
+						 * What happens if this
+						 * fails? we panic ...
+						 */
+
+						if (hold_tcblock) {
+							had_lock = 1;
+							SCTP_TCB_UNLOCK(stcb);
+						}
+						SCTP_MALLOC(tmp_str,
+						    struct sctp_stream_out *,
+						    (asoc->pre_open_streams *
+						    sizeof(struct sctp_stream_out)),
+						    SCTP_M_STRMO);
+						if (had_lock) {
+							SCTP_TCB_LOCK(stcb);
+						}
+						if (tmp_str != NULL) {
+							SCTP_FREE(asoc->strmout, SCTP_M_STRMO);
+							asoc->strmout = tmp_str;
+							asoc->strm_realoutsize = asoc->streamoutcnt = asoc->pre_open_streams;
+						} else {
+							asoc->pre_open_streams = asoc->streamoutcnt;
+						}
+						for (i = 0; i < asoc->streamoutcnt; i++) {
+							/*-
+							 * inbound side must be set
+							 * to 0xffff, also NOTE when
+							 * we get the INIT-ACK back
+							 * (for INIT sender) we MUST
+							 * reduce the count
+							 * (streamoutcnt) but first
+							 * check if we sent to any
+							 * of the upper streams that
+							 * were dropped (if some
+							 * were). Those that were
+							 * dropped must be notified
+							 * to the upper layer as
+							 * failed to send.
+							 */
+							asoc->strmout[i].next_sequence_sent = 0x0;
+							TAILQ_INIT(&asoc->strmout[i].outqueue);
+							asoc->strmout[i].stream_no = i;
+							asoc->strmout[i].last_msg_incomplete = 0;
+							asoc->strmout[i].next_spoke.tqe_next = 0;
+							asoc->strmout[i].next_spoke.tqe_prev = 0;
+						}
+					}
+				}
+			}
+			hold_tcblock = 1;
+			/* out with the INIT */
+			queue_only_for_init = 1;
+			/*-
+			 * we may want to dig in after this call and adjust the MTU
+			 * value. It defaulted to 1500 (constant) but the ro
+			 * structure may now have an update and thus we may need to
+			 * change it BEFORE we append the message.
+			 */
+		}
+	} else
+		asoc = &stcb->asoc;
+	if (srcv == NULL)
+		srcv = (struct sctp_sndrcvinfo *)&asoc->def_send;
+	if (srcv->sinfo_flags & SCTP_ADDR_OVER) {
+		if (addr)
+			net = sctp_findnet(stcb, addr);
+		else
+			net = NULL;
+		if ((net == NULL) ||
+		    ((port != 0) && (port != stcb->rport))) {
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+			error = EINVAL;
+			goto out_unlocked;
+		}
+	} else {
+		net = stcb->asoc.primary_destination;
+	}
+	atomic_add_int(&stcb->total_sends, 1);
+	/* Keep the stcb from being freed under our feet */
+	atomic_add_int(&asoc->refcnt, 1);
+	free_cnt_applied = 1;
+
+	if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NO_FRAGMENT)) {
+		if (sndlen > asoc->smallest_mtu) {
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EMSGSIZE);
+			error = EMSGSIZE;
+			goto out_unlocked;
+		}
+	}
+	if ((SCTP_SO_IS_NBIO(so)
+	    || (flags & MSG_NBIO)
+	    )) {
+		non_blocking = 1;
+	}
+	/* would we block? */
+	if (non_blocking) {
+		if (hold_tcblock == 0) {
+			SCTP_TCB_LOCK(stcb);
+			hold_tcblock = 1;
+		}
+		inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
+		if ((SCTP_SB_LIMIT_SND(so) < (sndlen + inqueue_bytes + stcb->asoc.sb_send_resv)) ||
+		    (stcb->asoc.chunks_on_out_queue >= SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue))) {
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EWOULDBLOCK);
+			if (sndlen > SCTP_SB_LIMIT_SND(so))
+				error = EMSGSIZE;
+			else
+				error = EWOULDBLOCK;
+			goto out_unlocked;
+		}
+		stcb->asoc.sb_send_resv += sndlen;
+		SCTP_TCB_UNLOCK(stcb);
+		hold_tcblock = 0;
+	} else {
+		atomic_add_int(&stcb->asoc.sb_send_resv, sndlen);
+	}
+	local_soresv = sndlen;
+	if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
+		error = ECONNRESET;
+		goto out_unlocked;
+	}
+	if (create_lock_applied) {
+		SCTP_ASOC_CREATE_UNLOCK(inp);
+		create_lock_applied = 0;
+	}
+	if (asoc->stream_reset_outstanding) {
+		/*
+		 * Can't queue any data while stream reset is underway.
+		 */
+		SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EAGAIN);
+		error = EAGAIN;
+		goto out_unlocked;
+	}
+	if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
+	    (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+		queue_only = 1;
+	}
+	/* we are now done with all control */
+	if (control) {
+		sctp_m_freem(control);
+		control = NULL;
+	}
+	if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) ||
+	    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
+	    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
+	    (asoc->state & SCTP_STATE_SHUTDOWN_PENDING)) {
+		if (srcv->sinfo_flags & SCTP_ABORT) {
+			;
+		} else {
+			SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
+			error = ECONNRESET;
+			goto out_unlocked;
+		}
+	}
+#ifndef __rtems__
+	/* Ok, we will attempt a msgsnd :> */
+	if (p) {
+		p->td_ru.ru_msgsnd++;
+	}
+#endif /* __rtems__ */
+	/* Are we aborting? */
+	if (srcv->sinfo_flags & SCTP_ABORT) {
+		struct mbuf *mm;
+		int tot_demand, tot_out = 0, max_out;
+
+		SCTP_STAT_INCR(sctps_sends_with_abort);
+		if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
+		    (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+			/* It has to be up before we abort */
+			/* how big is the user initiated abort? */
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+			error = EINVAL;
+			goto out;
+		}
+		if (hold_tcblock) {
+			SCTP_TCB_UNLOCK(stcb);
+			hold_tcblock = 0;
+		}
+		if (top) {
+			struct mbuf *cntm = NULL;
+
+			mm = sctp_get_mbuf_for_msg(1, 0, M_WAIT, 1, MT_DATA);
+			if (sndlen != 0) {
+				cntm = top;
+				while (cntm) {
+					tot_out += SCTP_BUF_LEN(cntm);
+					cntm = SCTP_BUF_NEXT(cntm);
+				}
+			}
+			tot_demand = (tot_out + sizeof(struct sctp_paramhdr));
+		} else {
+			/* Must fit in a MTU */
+			tot_out = sndlen;
+			tot_demand = (tot_out + sizeof(struct sctp_paramhdr));
+			if (tot_demand > SCTP_DEFAULT_ADD_MORE) {
+				/* To big */
+				SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EMSGSIZE);
+				error = EMSGSIZE;
+				goto out;
+			}
+			mm = sctp_get_mbuf_for_msg(tot_demand, 0, M_WAIT, 1, MT_DATA);
+		}
+		if (mm == NULL) {
+			SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+			error = ENOMEM;
+			goto out;
+		}
+		max_out = asoc->smallest_mtu - sizeof(struct sctp_paramhdr);
+		max_out -= sizeof(struct sctp_abort_msg);
+		if (tot_out > max_out) {
+			tot_out = max_out;
+		}
+		if (mm) {
+			struct sctp_paramhdr *ph;
+
+			/* now move forward the data pointer */
+			ph = mtod(mm, struct sctp_paramhdr *);
+			ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
+			ph->param_length = htons((sizeof(struct sctp_paramhdr) + tot_out));
+			ph++;
+			SCTP_BUF_LEN(mm) = tot_out + sizeof(struct sctp_paramhdr);
+			if (top == NULL) {
+				error = uiomove((caddr_t)ph, (int)tot_out, uio);
+				if (error) {
+					/*-
+					 * Here if we can't get his data we
+					 * still abort we just don't get to
+					 * send the users note :-0
+					 */
+					sctp_m_freem(mm);
+					mm = NULL;
+				}
+			} else {
+				if (sndlen != 0) {
+					SCTP_BUF_NEXT(mm) = top;
+				}
+			}
+		}
+		if (hold_tcblock == 0) {
+			SCTP_TCB_LOCK(stcb);
+			hold_tcblock = 1;
+		}
+		atomic_add_int(&stcb->asoc.refcnt, -1);
+		free_cnt_applied = 0;
+		/* release this lock, otherwise we hang on ourselves */
+		sctp_abort_an_association(stcb->sctp_ep, stcb,
+		    SCTP_RESPONSE_TO_USER_REQ,
+		    mm, SCTP_SO_LOCKED);
+		/* now relock the stcb so everything is sane */
+		hold_tcblock = 0;
+		stcb = NULL;
+		/*
+		 * In this case top is already chained to mm avoid double
+		 * free, since we free it below if top != NULL and driver
+		 * would free it after sending the packet out
+		 */
+		if (sndlen != 0) {
+			top = NULL;
+		}
+		goto out_unlocked;
+	}
+	/* Calculate the maximum we can send */
+	inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
+	if (SCTP_SB_LIMIT_SND(so) > inqueue_bytes) {
+		if (non_blocking) {
+			/* we already checked for non-blocking above. */
+			max_len = sndlen;
+		} else {
+			max_len = SCTP_SB_LIMIT_SND(so) - inqueue_bytes;
+		}
+	} else {
+		max_len = 0;
+	}
+	if (hold_tcblock) {
+		SCTP_TCB_UNLOCK(stcb);
+		hold_tcblock = 0;
+	}
+	/* Is the stream no. valid? */
+	if (srcv->sinfo_stream >= asoc->streamoutcnt) {
+		/* Invalid stream number */
+		SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+		error = EINVAL;
+		goto out_unlocked;
+	}
+	if (asoc->strmout == NULL) {
+		/* huh? software error */
+		SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EFAULT);
+		error = EFAULT;
+		goto out_unlocked;
+	}
+	/* Unless E_EOR mode is on, we must make a send FIT in one call. */
+	if ((user_marks_eor == 0) &&
+	    (sndlen > SCTP_SB_LIMIT_SND(stcb->sctp_socket))) {
+		/* It will NEVER fit */
+		SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EMSGSIZE);
+		error = EMSGSIZE;
+		goto out_unlocked;
+	}
+	if ((uio == NULL) && user_marks_eor) {
+		/*-
+		 * We do not support eeor mode for
+		 * sending with mbuf chains (like sendfile).
+		 */
+		SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+		error = EINVAL;
+		goto out_unlocked;
+	}
+	if (user_marks_eor) {
+		local_add_more = min(SCTP_SB_LIMIT_SND(so), SCTP_BASE_SYSCTL(sctp_add_more_threshold));
+	} else {
+		/*-
+		 * For non-eeor the whole message must fit in
+		 * the socket send buffer.
+		 */
+		local_add_more = sndlen;
+	}
+	len = 0;
+	if (non_blocking) {
+		goto skip_preblock;
+	}
+	if (((max_len <= local_add_more) &&
+	    (SCTP_SB_LIMIT_SND(so) >= local_add_more)) ||
+	    (max_len == 0) ||
+	    ((stcb->asoc.chunks_on_out_queue + stcb->asoc.stream_queue_cnt) >= SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue))) {
+		/* No room right now ! */
+		SOCKBUF_LOCK(&so->so_snd);
+		inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
+		while ((SCTP_SB_LIMIT_SND(so) < (inqueue_bytes + local_add_more)) ||
+		    ((stcb->asoc.stream_queue_cnt + stcb->asoc.chunks_on_out_queue) >= SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue))) {
+			SCTPDBG(SCTP_DEBUG_OUTPUT1, "pre_block limit:%u <(inq:%d + %d) || (%d+%d > %d)\n",
+			    (unsigned int)SCTP_SB_LIMIT_SND(so),
+			    inqueue_bytes,
+			    local_add_more,
+			    stcb->asoc.stream_queue_cnt,
+			    stcb->asoc.chunks_on_out_queue,
+			    SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue));
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) {
+				sctp_log_block(SCTP_BLOCK_LOG_INTO_BLKA, so, asoc, sndlen);
+			}
+			be.error = 0;
+			stcb->block_entry = &be;
+			error = sbwait(&so->so_snd);
+			stcb->block_entry = NULL;
+			if (error || so->so_error || be.error) {
+				if (error == 0) {
+					if (so->so_error)
+						error = so->so_error;
+					if (be.error) {
+						error = be.error;
+					}
+				}
+				SOCKBUF_UNLOCK(&so->so_snd);
+				goto out_unlocked;
+			}
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) {
+				sctp_log_block(SCTP_BLOCK_LOG_OUTOF_BLK,
+				    so, asoc, stcb->asoc.total_output_queue_size);
+			}
+			if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+				goto out_unlocked;
+			}
+			inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
+		}
+		if (SCTP_SB_LIMIT_SND(so) > inqueue_bytes) {
+			max_len = SCTP_SB_LIMIT_SND(so) - inqueue_bytes;
+		} else {
+			max_len = 0;
+		}
+		SOCKBUF_UNLOCK(&so->so_snd);
+	}
+skip_preblock:
+	if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+		goto out_unlocked;
+	}
+	/*
+	 * sndlen covers for mbuf case uio_resid covers for the non-mbuf
+	 * case NOTE: uio will be null when top/mbuf is passed
+	 */
+	if (sndlen == 0) {
+		if (srcv->sinfo_flags & SCTP_EOF) {
+			got_all_of_the_send = 1;
+			goto dataless_eof;
+		} else {
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+			error = EINVAL;
+			goto out;
+		}
+	}
+	if (top == NULL) {
+		struct sctp_stream_queue_pending *sp;
+		struct sctp_stream_out *strm;
+		uint32_t sndout;
+
+		SCTP_TCB_SEND_LOCK(stcb);
+		if ((asoc->stream_locked) &&
+		    (asoc->stream_locked_on != srcv->sinfo_stream)) {
+			SCTP_TCB_SEND_UNLOCK(stcb);
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+			error = EINVAL;
+			goto out;
+		}
+		SCTP_TCB_SEND_UNLOCK(stcb);
+
+		strm = &stcb->asoc.strmout[srcv->sinfo_stream];
+		if (strm->last_msg_incomplete == 0) {
+	do_a_copy_in:
+			sp = sctp_copy_it_in(stcb, asoc, srcv, uio, net, max_len, user_marks_eor, &error, non_blocking);
+			if ((sp == NULL) || (error)) {
+				goto out;
+			}
+			SCTP_TCB_SEND_LOCK(stcb);
+			if (sp->msg_is_complete) {
+				strm->last_msg_incomplete = 0;
+				asoc->stream_locked = 0;
+			} else {
+				/*
+				 * Just got locked to this guy in case of an
+				 * interrupt.
+				 */
+				strm->last_msg_incomplete = 1;
+				asoc->stream_locked = 1;
+				asoc->stream_locked_on = srcv->sinfo_stream;
+				sp->sender_all_done = 0;
+			}
+			sctp_snd_sb_alloc(stcb, sp->length);
+			atomic_add_int(&asoc->stream_queue_cnt, 1);
+			if ((srcv->sinfo_flags & SCTP_UNORDERED) == 0) {
+				sp->strseq = strm->next_sequence_sent;
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_AT_SEND_2_SCTP) {
+					sctp_misc_ints(SCTP_STRMOUT_LOG_ASSIGN,
+					    (uintptr_t) stcb, sp->length,
+					    (uint32_t) ((srcv->sinfo_stream << 16) | sp->strseq), 0);
+				}
+				strm->next_sequence_sent++;
+			} else {
+				SCTP_STAT_INCR(sctps_sends_with_unord);
+			}
+			TAILQ_INSERT_TAIL(&strm->outqueue, sp, next);
+			if ((strm->next_spoke.tqe_next == NULL) &&
+			    (strm->next_spoke.tqe_prev == NULL)) {
+				/* Not on wheel, insert */
+				sctp_insert_on_wheel(stcb, asoc, strm, 1);
+			}
+			SCTP_TCB_SEND_UNLOCK(stcb);
+		} else {
+			SCTP_TCB_SEND_LOCK(stcb);
+			sp = TAILQ_LAST(&strm->outqueue, sctp_streamhead);
+			SCTP_TCB_SEND_UNLOCK(stcb);
+			if (sp == NULL) {
+				/* ???? Huh ??? last msg is gone */
+#ifdef INVARIANTS
+				panic("Warning: Last msg marked incomplete, yet nothing left?");
+#else
+				SCTP_PRINTF("Warning: Last msg marked incomplete, yet nothing left?\n");
+				strm->last_msg_incomplete = 0;
+#endif
+				goto do_a_copy_in;
+
+			}
+		}
+		while (uio->uio_resid > 0) {
+			/* How much room do we have? */
+			struct mbuf *new_tail, *mm;
+
+			if (SCTP_SB_LIMIT_SND(so) > stcb->asoc.total_output_queue_size)
+				max_len = SCTP_SB_LIMIT_SND(so) - stcb->asoc.total_output_queue_size;
+			else
+				max_len = 0;
+
+			if ((max_len > SCTP_BASE_SYSCTL(sctp_add_more_threshold)) ||
+			    (max_len && (SCTP_SB_LIMIT_SND(so) < SCTP_BASE_SYSCTL(sctp_add_more_threshold))) ||
+			    (uio->uio_resid && (uio->uio_resid <= (int)max_len))) {
+				sndout = 0;
+				new_tail = NULL;
+				if (hold_tcblock) {
+					SCTP_TCB_UNLOCK(stcb);
+					hold_tcblock = 0;
+				}
+				mm = sctp_copy_resume(sp, uio, srcv, max_len, user_marks_eor, &error, &sndout, &new_tail);
+				if ((mm == NULL) || error) {
+					if (mm) {
+						sctp_m_freem(mm);
+					}
+					goto out;
+				}
+				/* Update the mbuf and count */
+				SCTP_TCB_SEND_LOCK(stcb);
+				if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+					/*
+					 * we need to get out. Peer probably
+					 * aborted.
+					 */
+					sctp_m_freem(mm);
+					if (stcb->asoc.state & SCTP_PCB_FLAGS_WAS_ABORTED) {
+						SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
+						error = ECONNRESET;
+					}
+					SCTP_TCB_SEND_UNLOCK(stcb);
+					goto out;
+				}
+				if (sp->tail_mbuf) {
+					/* tack it to the end */
+					SCTP_BUF_NEXT(sp->tail_mbuf) = mm;
+					sp->tail_mbuf = new_tail;
+				} else {
+					/* A stolen mbuf */
+					sp->data = mm;
+					sp->tail_mbuf = new_tail;
+				}
+				sctp_snd_sb_alloc(stcb, sndout);
+				atomic_add_int(&sp->length, sndout);
+				len += sndout;
+
+				/* Did we reach EOR? */
+				if ((uio->uio_resid == 0) &&
+				    ((user_marks_eor == 0) ||
+				    (srcv->sinfo_flags & SCTP_EOF) ||
+				    (user_marks_eor && (srcv->sinfo_flags & SCTP_EOR)))) {
+					sp->msg_is_complete = 1;
+				} else {
+					sp->msg_is_complete = 0;
+				}
+				SCTP_TCB_SEND_UNLOCK(stcb);
+			}
+			if (uio->uio_resid == 0) {
+				/* got it all? */
+				continue;
+			}
+			/* PR-SCTP? */
+			if ((asoc->peer_supports_prsctp) && (asoc->sent_queue_cnt_removeable > 0)) {
+				/*
+				 * This is ugly but we must assure locking
+				 * order
+				 */
+				if (hold_tcblock == 0) {
+					SCTP_TCB_LOCK(stcb);
+					hold_tcblock = 1;
+				}
+				sctp_prune_prsctp(stcb, asoc, srcv, sndlen);
+				inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
+				if (SCTP_SB_LIMIT_SND(so) > stcb->asoc.total_output_queue_size)
+					max_len = SCTP_SB_LIMIT_SND(so) - inqueue_bytes;
+				else
+					max_len = 0;
+				if (max_len > 0) {
+					continue;
+				}
+				SCTP_TCB_UNLOCK(stcb);
+				hold_tcblock = 0;
+			}
+			/* wait for space now */
+			if (non_blocking) {
+				/* Non-blocking io in place out */
+				goto skip_out_eof;
+			}
+			/* What about the INIT, send it maybe */
+			if (queue_only_for_init) {
+				if (hold_tcblock == 0) {
+					SCTP_TCB_LOCK(stcb);
+					hold_tcblock = 1;
+				}
+				if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+					/* a collision took us forward? */
+					queue_only = 0;
+				} else {
+					sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
+					SCTP_SET_STATE(asoc, SCTP_STATE_COOKIE_WAIT);
+					queue_only = 1;
+				}
+			}
+			if ((net->flight_size > net->cwnd) &&
+			    (asoc->sctp_cmt_on_off == 0)) {
+				SCTP_STAT_INCR(sctps_send_cwnd_avoid);
+				queue_only = 1;
+			} else if (asoc->ifp_had_enobuf) {
+				SCTP_STAT_INCR(sctps_ifnomemqueued);
+				if (net->flight_size > (2 * net->mtu)) {
+					queue_only = 1;
+				}
+				asoc->ifp_had_enobuf = 0;
+			}
+			un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
+			    (stcb->asoc.stream_queue_cnt * sizeof(struct sctp_data_chunk)));
+			if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY)) &&
+			    (stcb->asoc.total_flight > 0) &&
+			    (stcb->asoc.stream_queue_cnt < SCTP_MAX_DATA_BUNDLING) &&
+			    (un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD))) {
+
+				/*-
+				 * Ok, Nagle is set on and we have data outstanding.
+				 * Don't send anything and let SACKs drive out the
+				 * data unless wen have a "full" segment to send.
+				 */
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_NAGLE_LOGGING_ENABLE) {
+					sctp_log_nagle_event(stcb, SCTP_NAGLE_APPLIED);
+				}
+				SCTP_STAT_INCR(sctps_naglequeued);
+				nagle_applies = 1;
+			} else {
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_NAGLE_LOGGING_ENABLE) {
+					if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY))
+						sctp_log_nagle_event(stcb, SCTP_NAGLE_SKIPPED);
+				}
+				SCTP_STAT_INCR(sctps_naglesent);
+				nagle_applies = 0;
+			}
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) {
+
+				sctp_misc_ints(SCTP_CWNDLOG_PRESEND, queue_only_for_init, queue_only,
+				    nagle_applies, un_sent);
+				sctp_misc_ints(SCTP_CWNDLOG_PRESEND, stcb->asoc.total_output_queue_size,
+				    stcb->asoc.total_flight,
+				    stcb->asoc.chunks_on_out_queue, stcb->asoc.total_flight_count);
+			}
+			if (queue_only_for_init)
+				queue_only_for_init = 0;
+			if ((queue_only == 0) && (nagle_applies == 0)) {
+				/*-
+				 * need to start chunk output
+				 * before blocking.. note that if
+				 * a lock is already applied, then
+				 * the input via the net is happening
+				 * and I don't need to start output :-D
+				 */
+				if (hold_tcblock == 0) {
+					if (SCTP_TCB_TRYLOCK(stcb)) {
+						hold_tcblock = 1;
+						sctp_chunk_output(inp,
+						    stcb,
+						    SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
+					}
+				} else {
+					sctp_chunk_output(inp,
+					    stcb,
+					    SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
+				}
+				if (hold_tcblock == 1) {
+					SCTP_TCB_UNLOCK(stcb);
+					hold_tcblock = 0;
+				}
+			}
+			SOCKBUF_LOCK(&so->so_snd);
+			/*-
+			 * This is a bit strange, but I think it will
+			 * work. The total_output_queue_size is locked and
+			 * protected by the TCB_LOCK, which we just released.
+			 * There is a race that can occur between releasing it
+			 * above, and me getting the socket lock, where sacks
+			 * come in but we have not put the SB_WAIT on the
+			 * so_snd buffer to get the wakeup. After the LOCK
+			 * is applied the sack_processing will also need to
+			 * LOCK the so->so_snd to do the actual sowwakeup(). So
+			 * once we have the socket buffer lock if we recheck the
+			 * size we KNOW we will get to sleep safely with the
+			 * wakeup flag in place.
+			 */
+			if (SCTP_SB_LIMIT_SND(so) <= (stcb->asoc.total_output_queue_size +
+			    min(SCTP_BASE_SYSCTL(sctp_add_more_threshold), SCTP_SB_LIMIT_SND(so)))) {
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) {
+					sctp_log_block(SCTP_BLOCK_LOG_INTO_BLK,
+					    so, asoc, uio->uio_resid);
+				}
+				be.error = 0;
+				stcb->block_entry = &be;
+				error = sbwait(&so->so_snd);
+				stcb->block_entry = NULL;
+
+				if (error || so->so_error || be.error) {
+					if (error == 0) {
+						if (so->so_error)
+							error = so->so_error;
+						if (be.error) {
+							error = be.error;
+						}
+					}
+					SOCKBUF_UNLOCK(&so->so_snd);
+					goto out_unlocked;
+				}
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) {
+					sctp_log_block(SCTP_BLOCK_LOG_OUTOF_BLK,
+					    so, asoc, stcb->asoc.total_output_queue_size);
+				}
+			}
+			SOCKBUF_UNLOCK(&so->so_snd);
+			if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+				goto out_unlocked;
+			}
+		}
+		SCTP_TCB_SEND_LOCK(stcb);
+		if (sp) {
+			if (sp->msg_is_complete == 0) {
+				strm->last_msg_incomplete = 1;
+				asoc->stream_locked = 1;
+				asoc->stream_locked_on = srcv->sinfo_stream;
+			} else {
+				sp->sender_all_done = 1;
+				strm->last_msg_incomplete = 0;
+				asoc->stream_locked = 0;
+			}
+		} else {
+			SCTP_PRINTF("Huh no sp TSNH?\n");
+			strm->last_msg_incomplete = 0;
+			asoc->stream_locked = 0;
+		}
+		SCTP_TCB_SEND_UNLOCK(stcb);
+		if (uio->uio_resid == 0) {
+			got_all_of_the_send = 1;
+		}
+	} else {
+		/* We send in a 0, since we do NOT have any locks */
+		error = sctp_msg_append(stcb, net, top, srcv, 0);
+		top = NULL;
+		if (srcv->sinfo_flags & SCTP_EOF) {
+			/*
+			 * This should only happen for Panda for the mbuf
+			 * send case, which does NOT yet support EEOR mode.
+			 * Thus, we can just set this flag to do the proper
+			 * EOF handling.
+			 */
+			got_all_of_the_send = 1;
+		}
+	}
+	if (error) {
+		goto out;
+	}
+dataless_eof:
+	/* EOF thing ? */
+	if ((srcv->sinfo_flags & SCTP_EOF) &&
+	    (got_all_of_the_send == 1) &&
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE)) {
+		int cnt;
+
+		SCTP_STAT_INCR(sctps_sends_with_eof);
+		error = 0;
+		if (hold_tcblock == 0) {
+			SCTP_TCB_LOCK(stcb);
+			hold_tcblock = 1;
+		}
+		cnt = sctp_is_there_unsent_data(stcb);
+		if (TAILQ_EMPTY(&asoc->send_queue) &&
+		    TAILQ_EMPTY(&asoc->sent_queue) &&
+		    (cnt == 0)) {
+			if (asoc->locked_on_sending) {
+				goto abort_anyway;
+			}
+			/* there is nothing queued to send, so I'm done... */
+			if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+			    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+			    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+				/* only send SHUTDOWN the first time through */
+				sctp_send_shutdown(stcb, stcb->asoc.primary_destination);
+				if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
+					SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+				}
+				SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+				SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb,
+				    asoc->primary_destination);
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
+				    asoc->primary_destination);
+			}
+		} else {
+			/*-
+			 * we still got (or just got) data to send, so set
+			 * SHUTDOWN_PENDING
+			 */
+			/*-
+			 * XXX sockets draft says that SCTP_EOF should be
+			 * sent with no data.  currently, we will allow user
+			 * data to be sent first and move to
+			 * SHUTDOWN-PENDING
+			 */
+			if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+			    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+			    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+				if (hold_tcblock == 0) {
+					SCTP_TCB_LOCK(stcb);
+					hold_tcblock = 1;
+				}
+				if (asoc->locked_on_sending) {
+					/* Locked to send out the data */
+					struct sctp_stream_queue_pending *sp;
+
+					sp = TAILQ_LAST(&asoc->locked_on_sending->outqueue, sctp_streamhead);
+					if (sp) {
+						if ((sp->length == 0) && (sp->msg_is_complete == 0))
+							asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+					}
+				}
+				asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
+				if (TAILQ_EMPTY(&asoc->send_queue) &&
+				    TAILQ_EMPTY(&asoc->sent_queue) &&
+				    (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
+			abort_anyway:
+					if (free_cnt_applied) {
+						atomic_add_int(&stcb->asoc.refcnt, -1);
+						free_cnt_applied = 0;
+					}
+					sctp_abort_an_association(stcb->sctp_ep, stcb,
+					    SCTP_RESPONSE_TO_USER_REQ,
+					    NULL, SCTP_SO_LOCKED);
+					/*
+					 * now relock the stcb so everything
+					 * is sane
+					 */
+					hold_tcblock = 0;
+					stcb = NULL;
+					goto out;
+				}
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
+				    asoc->primary_destination);
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_NODELAY);
+			}
+		}
+	}
+skip_out_eof:
+	if (!TAILQ_EMPTY(&stcb->asoc.control_send_queue)) {
+		some_on_control = 1;
+	}
+	if (queue_only_for_init) {
+		if (hold_tcblock == 0) {
+			SCTP_TCB_LOCK(stcb);
+			hold_tcblock = 1;
+		}
+		if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+			/* a collision took us forward? */
+			queue_only = 0;
+		} else {
+			sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
+			SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_WAIT);
+			queue_only = 1;
+		}
+	}
+	if ((net->flight_size > net->cwnd) &&
+	    (stcb->asoc.sctp_cmt_on_off == 0)) {
+		SCTP_STAT_INCR(sctps_send_cwnd_avoid);
+		queue_only = 1;
+	} else if (asoc->ifp_had_enobuf) {
+		SCTP_STAT_INCR(sctps_ifnomemqueued);
+		if (net->flight_size > (2 * net->mtu)) {
+			queue_only = 1;
+		}
+		asoc->ifp_had_enobuf = 0;
+	}
+	un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
+	    (stcb->asoc.stream_queue_cnt * sizeof(struct sctp_data_chunk)));
+	if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY)) &&
+	    (stcb->asoc.total_flight > 0) &&
+	    (stcb->asoc.stream_queue_cnt < SCTP_MAX_DATA_BUNDLING) &&
+	    (un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD))) {
+		/*-
+		 * Ok, Nagle is set on and we have data outstanding.
+		 * Don't send anything and let SACKs drive out the
+		 * data unless wen have a "full" segment to send.
+		 */
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_NAGLE_LOGGING_ENABLE) {
+			sctp_log_nagle_event(stcb, SCTP_NAGLE_APPLIED);
+		}
+		SCTP_STAT_INCR(sctps_naglequeued);
+		nagle_applies = 1;
+	} else {
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_NAGLE_LOGGING_ENABLE) {
+			if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY))
+				sctp_log_nagle_event(stcb, SCTP_NAGLE_SKIPPED);
+		}
+		SCTP_STAT_INCR(sctps_naglesent);
+		nagle_applies = 0;
+	}
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) {
+		sctp_misc_ints(SCTP_CWNDLOG_PRESEND, queue_only_for_init, queue_only,
+		    nagle_applies, un_sent);
+		sctp_misc_ints(SCTP_CWNDLOG_PRESEND, stcb->asoc.total_output_queue_size,
+		    stcb->asoc.total_flight,
+		    stcb->asoc.chunks_on_out_queue, stcb->asoc.total_flight_count);
+	}
+	if (queue_only_for_init)
+		queue_only_for_init = 0;
+	if ((queue_only == 0) && (nagle_applies == 0) && (stcb->asoc.peers_rwnd && un_sent)) {
+		/* we can attempt to send too. */
+		if (hold_tcblock == 0) {
+			/*
+			 * If there is activity recv'ing sacks no need to
+			 * send
+			 */
+			if (SCTP_TCB_TRYLOCK(stcb)) {
+				sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
+				hold_tcblock = 1;
+			}
+		} else {
+			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
+		}
+	} else if ((queue_only == 0) &&
+		    (stcb->asoc.peers_rwnd == 0) &&
+	    (stcb->asoc.total_flight == 0)) {
+		/* We get to have a probe outstanding */
+		if (hold_tcblock == 0) {
+			hold_tcblock = 1;
+			SCTP_TCB_LOCK(stcb);
+		}
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
+	} else if (some_on_control) {
+		int num_out, reason, frag_point;
+
+		/* Here we do control only */
+		if (hold_tcblock == 0) {
+			hold_tcblock = 1;
+			SCTP_TCB_LOCK(stcb);
+		}
+		frag_point = sctp_get_frag_point(stcb, &stcb->asoc);
+		(void)sctp_med_chunk_output(inp, stcb, &stcb->asoc, &num_out,
+		    &reason, 1, 1, &now, &now_filled, frag_point, SCTP_SO_LOCKED);
+	}
+	SCTPDBG(SCTP_DEBUG_OUTPUT1, "USR Send complete qo:%d prw:%d unsent:%d tf:%d cooq:%d toqs:%d err:%d\n",
+	    queue_only, stcb->asoc.peers_rwnd, un_sent,
+	    stcb->asoc.total_flight, stcb->asoc.chunks_on_out_queue,
+	    stcb->asoc.total_output_queue_size, error);
+
+out:
+out_unlocked:
+
+	if (local_soresv && stcb) {
+		atomic_subtract_int(&stcb->asoc.sb_send_resv, sndlen);
+		local_soresv = 0;
+	}
+	if (create_lock_applied) {
+		SCTP_ASOC_CREATE_UNLOCK(inp);
+		create_lock_applied = 0;
+	}
+	if ((stcb) && hold_tcblock) {
+		SCTP_TCB_UNLOCK(stcb);
+	}
+	if (stcb && free_cnt_applied) {
+		atomic_add_int(&stcb->asoc.refcnt, -1);
+	}
+#ifdef INVARIANTS
+	if (stcb) {
+		if (mtx_owned(&stcb->tcb_mtx)) {
+			panic("Leaving with tcb mtx owned?");
+		}
+		if (mtx_owned(&stcb->tcb_send_mtx)) {
+			panic("Leaving with tcb send mtx owned?");
+		}
+	}
+#endif
+#ifdef INVARIANTS
+	if (inp) {
+		sctp_validate_no_locks(inp);
+	} else {
+		printf("Warning - inp is NULL so cant validate locks\n");
+	}
+#endif
+	if (top) {
+		sctp_m_freem(top);
+	}
+	if (control) {
+		sctp_m_freem(control);
+	}
+	return (error);
+}
+
+
+/*
+ * generate an AUTHentication chunk, if required
+ */
+struct mbuf *
+sctp_add_auth_chunk(struct mbuf *m, struct mbuf **m_end,
+    struct sctp_auth_chunk **auth_ret, uint32_t * offset,
+    struct sctp_tcb *stcb, uint8_t chunk)
+{
+	struct mbuf *m_auth;
+	struct sctp_auth_chunk *auth;
+	int chunk_len;
+
+	if ((m_end == NULL) || (auth_ret == NULL) || (offset == NULL) ||
+	    (stcb == NULL))
+		return (m);
+
+	/* sysctl disabled auth? */
+	if (SCTP_BASE_SYSCTL(sctp_auth_disable))
+		return (m);
+
+	/* peer doesn't do auth... */
+	if (!stcb->asoc.peer_supports_auth) {
+		return (m);
+	}
+	/* does the requested chunk require auth? */
+	if (!sctp_auth_is_required_chunk(chunk, stcb->asoc.peer_auth_chunks)) {
+		return (m);
+	}
+	m_auth = sctp_get_mbuf_for_msg(sizeof(*auth), 0, M_DONTWAIT, 1, MT_HEADER);
+	if (m_auth == NULL) {
+		/* no mbuf's */
+		return (m);
+	}
+	/* reserve some space if this will be the first mbuf */
+	if (m == NULL)
+		SCTP_BUF_RESV_UF(m_auth, SCTP_MIN_OVERHEAD);
+	/* fill in the AUTH chunk details */
+	auth = mtod(m_auth, struct sctp_auth_chunk *);
+	bzero(auth, sizeof(*auth));
+	auth->ch.chunk_type = SCTP_AUTHENTICATION;
+	auth->ch.chunk_flags = 0;
+	chunk_len = sizeof(*auth) +
+	    sctp_get_hmac_digest_len(stcb->asoc.peer_hmac_id);
+	auth->ch.chunk_length = htons(chunk_len);
+	auth->hmac_id = htons(stcb->asoc.peer_hmac_id);
+	/* key id and hmac digest will be computed and filled in upon send */
+
+	/* save the offset where the auth was inserted into the chain */
+	if (m != NULL) {
+		struct mbuf *cn;
+
+		*offset = 0;
+		cn = m;
+		while (cn) {
+			*offset += SCTP_BUF_LEN(cn);
+			cn = SCTP_BUF_NEXT(cn);
+		}
+	} else
+		*offset = 0;
+
+	/* update length and return pointer to the auth chunk */
+	SCTP_BUF_LEN(m_auth) = chunk_len;
+	m = sctp_copy_mbufchain(m_auth, m, m_end, 1, chunk_len, 0);
+	if (auth_ret != NULL)
+		*auth_ret = auth;
+
+	return (m);
+}
+
+#ifdef INET6
+int
+sctp_v6src_match_nexthop(struct sockaddr_in6 *src6, sctp_route_t * ro)
+{
+	struct nd_prefix *pfx = NULL;
+	struct nd_pfxrouter *pfxrtr = NULL;
+	struct sockaddr_in6 gw6;
+
+	if (ro == NULL || ro->ro_rt == NULL || src6->sin6_family != AF_INET6)
+		return (0);
+
+	/* get prefix entry of address */
+	LIST_FOREACH(pfx, &MODULE_GLOBAL(nd_prefix), ndpr_entry) {
+		if (pfx->ndpr_stateflags & NDPRF_DETACHED)
+			continue;
+		if (IN6_ARE_MASKED_ADDR_EQUAL(&pfx->ndpr_prefix.sin6_addr,
+		    &src6->sin6_addr, &pfx->ndpr_mask))
+			break;
+	}
+	/* no prefix entry in the prefix list */
+	if (pfx == NULL) {
+		SCTPDBG(SCTP_DEBUG_OUTPUT2, "No prefix entry for ");
+		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)src6);
+		return (0);
+	}
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "v6src_match_nexthop(), Prefix entry is ");
+	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)src6);
+
+	/* search installed gateway from prefix entry */
+	for (pfxrtr = pfx->ndpr_advrtrs.lh_first; pfxrtr; pfxrtr =
+	    pfxrtr->pfr_next) {
+		memset(&gw6, 0, sizeof(struct sockaddr_in6));
+		gw6.sin6_family = AF_INET6;
+		gw6.sin6_len = sizeof(struct sockaddr_in6);
+		memcpy(&gw6.sin6_addr, &pfxrtr->router->rtaddr,
+		    sizeof(struct in6_addr));
+		SCTPDBG(SCTP_DEBUG_OUTPUT2, "prefix router is ");
+		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)&gw6);
+		SCTPDBG(SCTP_DEBUG_OUTPUT2, "installed router is ");
+		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, ro->ro_rt->rt_gateway);
+		if (sctp_cmpaddr((struct sockaddr *)&gw6,
+		    ro->ro_rt->rt_gateway)) {
+			SCTPDBG(SCTP_DEBUG_OUTPUT2, "pfxrouter is installed\n");
+			return (1);
+		}
+	}
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "pfxrouter is not installed\n");
+	return (0);
+}
+
+#endif
+
+int
+sctp_v4src_match_nexthop(struct sctp_ifa *sifa, sctp_route_t * ro)
+{
+	struct sockaddr_in *sin, *mask;
+	struct ifaddr *ifa;
+	struct in_addr srcnetaddr, gwnetaddr;
+
+	if (ro == NULL || ro->ro_rt == NULL ||
+	    sifa->address.sa.sa_family != AF_INET) {
+		return (0);
+	}
+	ifa = (struct ifaddr *)sifa->ifa;
+	mask = (struct sockaddr_in *)(ifa->ifa_netmask);
+	sin = (struct sockaddr_in *)&sifa->address.sin;
+	srcnetaddr.s_addr = (sin->sin_addr.s_addr & mask->sin_addr.s_addr);
+	SCTPDBG(SCTP_DEBUG_OUTPUT1, "match_nexthop4: src address is ");
+	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &sifa->address.sa);
+	SCTPDBG(SCTP_DEBUG_OUTPUT1, "network address is %x\n", srcnetaddr.s_addr);
+
+	sin = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
+	gwnetaddr.s_addr = (sin->sin_addr.s_addr & mask->sin_addr.s_addr);
+	SCTPDBG(SCTP_DEBUG_OUTPUT1, "match_nexthop4: nexthop is ");
+	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, ro->ro_rt->rt_gateway);
+	SCTPDBG(SCTP_DEBUG_OUTPUT1, "network address is %x\n", gwnetaddr.s_addr);
+	if (srcnetaddr.s_addr == gwnetaddr.s_addr) {
+		return (1);
+	}
+	return (0);
+}
diff --git a/freebsd/sys/netinet/sctp_output.h b/freebsd/sys/netinet/sctp_output.h
new file mode 100644
index 00000000..d9051ee7
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_output.h
@@ -0,0 +1,229 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_output.h,v 1.14 2005/03/06 16:04:18 itojun Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifndef __sctp_output_h__
+#define __sctp_output_h__
+
+#include <freebsd/netinet/sctp_header.h>
+
+#if defined(_KERNEL) || defined(__Userspace__)
+
+
+struct mbuf *
+sctp_add_addresses_to_i_ia(struct sctp_inpcb *inp,
+    struct sctp_scoping *scope,
+    struct mbuf *m_at,
+    int cnt_inits_to);
+
+
+int sctp_is_addr_restricted(struct sctp_tcb *, struct sctp_ifa *);
+
+
+int
+sctp_is_address_in_scope(struct sctp_ifa *ifa,
+    int ipv4_addr_legal,
+    int ipv6_addr_legal,
+    int loopback_scope,
+    int ipv4_local_scope,
+    int local_scope,
+    int site_scope,
+    int do_update);
+int
+    sctp_is_addr_in_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa);
+
+struct sctp_ifa *
+sctp_source_address_selection(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    sctp_route_t * ro, struct sctp_nets *net,
+    int non_asoc_addr_ok, uint32_t vrf_id);
+
+int
+    sctp_v6src_match_nexthop(struct sockaddr_in6 *src6, sctp_route_t * ro);
+int
+    sctp_v4src_match_nexthop(struct sctp_ifa *sifa, sctp_route_t * ro);
+
+void 
+sctp_send_initiate(struct sctp_inpcb *, struct sctp_tcb *, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+);
+
+void
+sctp_send_initiate_ack(struct sctp_inpcb *, struct sctp_tcb *,
+    struct mbuf *, int, int, struct sctphdr *, struct sctp_init_chunk *,
+    uint32_t, uint16_t, int);
+
+struct mbuf *
+sctp_arethere_unrecognized_parameters(struct mbuf *, int, int *,
+    struct sctp_chunkhdr *, int *);
+void sctp_queue_op_err(struct sctp_tcb *, struct mbuf *);
+
+int
+sctp_send_cookie_echo(struct mbuf *, int, struct sctp_tcb *,
+    struct sctp_nets *);
+
+void sctp_send_cookie_ack(struct sctp_tcb *);
+
+void
+sctp_send_heartbeat_ack(struct sctp_tcb *, struct mbuf *, int, int,
+    struct sctp_nets *);
+
+void
+sctp_remove_from_wheel(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    struct sctp_stream_out *strq, int holds_lock);
+
+
+void sctp_send_shutdown(struct sctp_tcb *, struct sctp_nets *);
+
+void sctp_send_shutdown_ack(struct sctp_tcb *, struct sctp_nets *);
+
+void sctp_send_shutdown_complete(struct sctp_tcb *, struct sctp_nets *, int);
+
+void 
+sctp_send_shutdown_complete2(struct mbuf *, int, struct sctphdr *,
+    uint32_t, uint16_t);
+
+void sctp_send_asconf(struct sctp_tcb *, struct sctp_nets *, int addr_locked);
+
+void sctp_send_asconf_ack(struct sctp_tcb *);
+
+int sctp_get_frag_point(struct sctp_tcb *, struct sctp_association *);
+
+void sctp_toss_old_cookies(struct sctp_tcb *, struct sctp_association *);
+
+void sctp_toss_old_asconf(struct sctp_tcb *);
+
+void sctp_fix_ecn_echo(struct sctp_association *);
+
+void sctp_move_chunks_from_net(struct sctp_tcb *stcb, struct sctp_nets *net);
+
+int
+sctp_output(struct sctp_inpcb *, struct mbuf *, struct sockaddr *,
+    struct mbuf *, struct thread *, int);
+
+void
+sctp_insert_on_wheel(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    struct sctp_stream_out *strq, int holdslock);
+
+void 
+sctp_chunk_output(struct sctp_inpcb *, struct sctp_tcb *, int, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+);
+void 
+sctp_send_abort_tcb(struct sctp_tcb *, struct mbuf *, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+);
+
+void send_forward_tsn(struct sctp_tcb *, struct sctp_association *);
+
+void sctp_send_sack(struct sctp_tcb *);
+
+int sctp_send_hb(struct sctp_tcb *, int, struct sctp_nets *);
+
+void sctp_send_ecn_echo(struct sctp_tcb *, struct sctp_nets *, uint32_t);
+
+
+void
+sctp_send_packet_dropped(struct sctp_tcb *, struct sctp_nets *, struct mbuf *,
+    int, int);
+
+
+
+void sctp_send_cwr(struct sctp_tcb *, struct sctp_nets *, uint32_t);
+
+
+void
+sctp_add_stream_reset_out(struct sctp_tmit_chunk *chk,
+    int number_entries, uint16_t * list,
+    uint32_t seq, uint32_t resp_seq, uint32_t last_sent);
+
+void
+sctp_add_stream_reset_in(struct sctp_tmit_chunk *chk,
+    int number_entries, uint16_t * list,
+    uint32_t seq);
+
+void
+sctp_add_stream_reset_tsn(struct sctp_tmit_chunk *chk,
+    uint32_t seq);
+
+void
+sctp_add_stream_reset_result(struct sctp_tmit_chunk *chk,
+    uint32_t resp_seq, uint32_t result);
+
+void
+sctp_add_stream_reset_result_tsn(struct sctp_tmit_chunk *chk,
+    uint32_t resp_seq, uint32_t result,
+    uint32_t send_una, uint32_t recv_next);
+
+int
+sctp_send_str_reset_req(struct sctp_tcb *stcb,
+    int number_entries,
+    uint16_t * list,
+    uint8_t send_out_req,
+    uint32_t resp_seq,
+    uint8_t send_in_req,
+    uint8_t send_tsn_req,
+    uint8_t add_str,
+    uint16_t adding);
+
+
+void
+sctp_send_abort(struct mbuf *, int, struct sctphdr *, uint32_t,
+    struct mbuf *, uint32_t, uint16_t);
+
+void sctp_send_operr_to(struct mbuf *, int, struct mbuf *, uint32_t, uint32_t, uint16_t);
+
+#endif				/* _KERNEL || __Userspace__ */
+
+#if defined(_KERNEL) || defined (__Userspace__)
+int
+sctp_sosend(struct socket *so,
+    struct sockaddr *addr,
+    struct uio *uio,
+    struct mbuf *top,
+    struct mbuf *control,
+    int flags,
+    struct thread *p
+);
+
+#endif
+#endif
diff --git a/freebsd/sys/netinet/sctp_pcb.c b/freebsd/sys/netinet/sctp_pcb.c
new file mode 100644
index 00000000..fccbda00
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_pcb.c
@@ -0,0 +1,6810 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_pcb.c,v 1.38 2005/03/06 16:04:18 itojun Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/netinet/sctp_os.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/netinet/sctp_var.h>
+#include <freebsd/netinet/sctp_sysctl.h>
+#include <freebsd/netinet/sctp_pcb.h>
+#include <freebsd/netinet/sctputil.h>
+#include <freebsd/netinet/sctp.h>
+#include <freebsd/netinet/sctp_header.h>
+#include <freebsd/netinet/sctp_asconf.h>
+#include <freebsd/netinet/sctp_output.h>
+#include <freebsd/netinet/sctp_timer.h>
+#include <freebsd/netinet/sctp_bsd_addr.h>
+#include <freebsd/netinet/udp.h>
+
+
+VNET_DEFINE(struct sctp_base_info, system_base_info);
+
+/* FIX: we don't handle multiple link local scopes */
+/* "scopeless" replacement IN6_ARE_ADDR_EQUAL */
+#ifdef INET6
+int
+SCTP6_ARE_ADDR_EQUAL(struct sockaddr_in6 *a, struct sockaddr_in6 *b)
+{
+	struct sockaddr_in6 tmp_a, tmp_b;
+
+	memcpy(&tmp_a, a, sizeof(struct sockaddr_in6));
+	if (sa6_embedscope(&tmp_a, MODULE_GLOBAL(ip6_use_defzone)) != 0) {
+		return 0;
+	}
+	memcpy(&tmp_b, b, sizeof(struct sockaddr_in6));
+	if (sa6_embedscope(&tmp_b, MODULE_GLOBAL(ip6_use_defzone)) != 0) {
+		return 0;
+	}
+	return (IN6_ARE_ADDR_EQUAL(&tmp_a.sin6_addr, &tmp_b.sin6_addr));
+}
+
+#endif
+
+void
+sctp_fill_pcbinfo(struct sctp_pcbinfo *spcb)
+{
+	/*
+	 * We really don't need to lock this, but I will just because it
+	 * does not hurt.
+	 */
+	SCTP_INP_INFO_RLOCK();
+	spcb->ep_count = SCTP_BASE_INFO(ipi_count_ep);
+	spcb->asoc_count = SCTP_BASE_INFO(ipi_count_asoc);
+	spcb->laddr_count = SCTP_BASE_INFO(ipi_count_laddr);
+	spcb->raddr_count = SCTP_BASE_INFO(ipi_count_raddr);
+	spcb->chk_count = SCTP_BASE_INFO(ipi_count_chunk);
+	spcb->readq_count = SCTP_BASE_INFO(ipi_count_readq);
+	spcb->stream_oque = SCTP_BASE_INFO(ipi_count_strmoq);
+	spcb->free_chunks = SCTP_BASE_INFO(ipi_free_chunks);
+
+	SCTP_INP_INFO_RUNLOCK();
+}
+
+/*
+ * Addresses are added to VRF's (Virtual Router's). For BSD we
+ * have only the default VRF 0. We maintain a hash list of
+ * VRF's. Each VRF has its own list of sctp_ifn's. Each of
+ * these has a list of addresses. When we add a new address
+ * to a VRF we lookup the ifn/ifn_index, if the ifn does
+ * not exist we create it and add it to the list of IFN's
+ * within the VRF. Once we have the sctp_ifn, we add the
+ * address to the list. So we look something like:
+ *
+ * hash-vrf-table
+ *   vrf-> ifn-> ifn -> ifn
+ *   vrf    |
+ *    ...   +--ifa-> ifa -> ifa
+ *   vrf
+ *
+ * We keep these separate lists since the SCTP subsystem will
+ * point to these from its source address selection nets structure.
+ * When an address is deleted it does not happen right away on
+ * the SCTP side, it gets scheduled. What we do when a
+ * delete happens is immediately remove the address from
+ * the master list and decrement the refcount. As our
+ * addip iterator works through and frees the src address
+ * selection pointing to the sctp_ifa, eventually the refcount
+ * will reach 0 and we will delete it. Note that it is assumed
+ * that any locking on system level ifn/ifa is done at the
+ * caller of these functions and these routines will only
+ * lock the SCTP structures as they add or delete things.
+ *
+ * Other notes on VRF concepts.
+ *  - An endpoint can be in multiple VRF's
+ *  - An association lives within a VRF and only one VRF.
+ *  - Any incoming packet we can deduce the VRF for by
+ *    looking at the mbuf/pak inbound (for BSD its VRF=0 :D)
+ *  - Any downward send call or connect call must supply the
+ *    VRF via ancillary data or via some sort of set default
+ *    VRF socket option call (again for BSD no brainer since
+ *    the VRF is always 0).
+ *  - An endpoint may add multiple VRF's to it.
+ *  - Listening sockets can accept associations in any
+ *    of the VRF's they are in but the assoc will end up
+ *    in only one VRF (gotten from the packet or connect/send).
+ *
+ */
+
+struct sctp_vrf *
+sctp_allocate_vrf(int vrf_id)
+{
+	struct sctp_vrf *vrf = NULL;
+	struct sctp_vrflist *bucket;
+
+	/* First allocate the VRF structure */
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf) {
+		/* Already allocated */
+		return (vrf);
+	}
+	SCTP_MALLOC(vrf, struct sctp_vrf *, sizeof(struct sctp_vrf),
+	    SCTP_M_VRF);
+	if (vrf == NULL) {
+		/* No memory */
+#ifdef INVARIANTS
+		panic("No memory for VRF:%d", vrf_id);
+#endif
+		return (NULL);
+	}
+	/* setup the VRF */
+	memset(vrf, 0, sizeof(struct sctp_vrf));
+	vrf->vrf_id = vrf_id;
+	LIST_INIT(&vrf->ifnlist);
+	vrf->total_ifa_count = 0;
+	vrf->refcount = 0;
+	/* now also setup table ids */
+	SCTP_INIT_VRF_TABLEID(vrf);
+	/* Init the HASH of addresses */
+	vrf->vrf_addr_hash = SCTP_HASH_INIT(SCTP_VRF_ADDR_HASH_SIZE,
+	    &vrf->vrf_addr_hashmark);
+	if (vrf->vrf_addr_hash == NULL) {
+		/* No memory */
+#ifdef INVARIANTS
+		panic("No memory for VRF:%d", vrf_id);
+#endif
+		SCTP_FREE(vrf, SCTP_M_VRF);
+		return (NULL);
+	}
+	/* Add it to the hash table */
+	bucket = &SCTP_BASE_INFO(sctp_vrfhash)[(vrf_id & SCTP_BASE_INFO(hashvrfmark))];
+	LIST_INSERT_HEAD(bucket, vrf, next_vrf);
+	atomic_add_int(&SCTP_BASE_INFO(ipi_count_vrfs), 1);
+	return (vrf);
+}
+
+
+struct sctp_ifn *
+sctp_find_ifn(void *ifn, uint32_t ifn_index)
+{
+	struct sctp_ifn *sctp_ifnp;
+	struct sctp_ifnlist *hash_ifn_head;
+
+	/*
+	 * We assume the lock is held for the addresses if that's wrong
+	 * problems could occur :-)
+	 */
+	hash_ifn_head = &SCTP_BASE_INFO(vrf_ifn_hash)[(ifn_index & SCTP_BASE_INFO(vrf_ifn_hashmark))];
+	LIST_FOREACH(sctp_ifnp, hash_ifn_head, next_bucket) {
+		if (sctp_ifnp->ifn_index == ifn_index) {
+			return (sctp_ifnp);
+		}
+		if (sctp_ifnp->ifn_p && ifn && (sctp_ifnp->ifn_p == ifn)) {
+			return (sctp_ifnp);
+		}
+	}
+	return (NULL);
+}
+
+
+
+struct sctp_vrf *
+sctp_find_vrf(uint32_t vrf_id)
+{
+	struct sctp_vrflist *bucket;
+	struct sctp_vrf *liste;
+
+	bucket = &SCTP_BASE_INFO(sctp_vrfhash)[(vrf_id & SCTP_BASE_INFO(hashvrfmark))];
+	LIST_FOREACH(liste, bucket, next_vrf) {
+		if (vrf_id == liste->vrf_id) {
+			return (liste);
+		}
+	}
+	return (NULL);
+}
+
+void
+sctp_free_vrf(struct sctp_vrf *vrf)
+{
+	if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&vrf->refcount)) {
+		if (vrf->vrf_addr_hash) {
+			SCTP_HASH_FREE(vrf->vrf_addr_hash, vrf->vrf_addr_hashmark);
+			vrf->vrf_addr_hash = NULL;
+		}
+		/* We zero'd the count */
+		LIST_REMOVE(vrf, next_vrf);
+		SCTP_FREE(vrf, SCTP_M_VRF);
+		atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_vrfs), 1);
+	}
+}
+
+void
+sctp_free_ifn(struct sctp_ifn *sctp_ifnp)
+{
+	if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&sctp_ifnp->refcount)) {
+		/* We zero'd the count */
+		if (sctp_ifnp->vrf) {
+			sctp_free_vrf(sctp_ifnp->vrf);
+		}
+		SCTP_FREE(sctp_ifnp, SCTP_M_IFN);
+		atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_ifns), 1);
+	}
+}
+
+void
+sctp_update_ifn_mtu(uint32_t ifn_index, uint32_t mtu)
+{
+	struct sctp_ifn *sctp_ifnp;
+
+	sctp_ifnp = sctp_find_ifn((void *)NULL, ifn_index);
+	if (sctp_ifnp != NULL) {
+		sctp_ifnp->ifn_mtu = mtu;
+	}
+}
+
+
+void
+sctp_free_ifa(struct sctp_ifa *sctp_ifap)
+{
+	if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&sctp_ifap->refcount)) {
+		/* We zero'd the count */
+		if (sctp_ifap->ifn_p) {
+			sctp_free_ifn(sctp_ifap->ifn_p);
+		}
+		SCTP_FREE(sctp_ifap, SCTP_M_IFA);
+		atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_ifas), 1);
+	}
+}
+
+static void
+sctp_delete_ifn(struct sctp_ifn *sctp_ifnp, int hold_addr_lock)
+{
+	struct sctp_ifn *found;
+
+	found = sctp_find_ifn(sctp_ifnp->ifn_p, sctp_ifnp->ifn_index);
+	if (found == NULL) {
+		/* Not in the list.. sorry */
+		return;
+	}
+	if (hold_addr_lock == 0)
+		SCTP_IPI_ADDR_WLOCK();
+	LIST_REMOVE(sctp_ifnp, next_bucket);
+	LIST_REMOVE(sctp_ifnp, next_ifn);
+	SCTP_DEREGISTER_INTERFACE(sctp_ifnp->ifn_index,
+	    sctp_ifnp->registered_af);
+	if (hold_addr_lock == 0)
+		SCTP_IPI_ADDR_WUNLOCK();
+	/* Take away the reference, and possibly free it */
+	sctp_free_ifn(sctp_ifnp);
+}
+
+void
+sctp_mark_ifa_addr_down(uint32_t vrf_id, struct sockaddr *addr,
+    const char *if_name, uint32_t ifn_index)
+{
+	struct sctp_vrf *vrf;
+	struct sctp_ifa *sctp_ifap = NULL;
+
+	SCTP_IPI_ADDR_RLOCK();
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf == NULL) {
+		SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id);
+		goto out;
+
+	}
+	sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED);
+	if (sctp_ifap == NULL) {
+		SCTPDBG(SCTP_DEBUG_PCB4, "Can't find sctp_ifap for address\n");
+		goto out;
+	}
+	if (sctp_ifap->ifn_p == NULL) {
+		SCTPDBG(SCTP_DEBUG_PCB4, "IFA has no IFN - can't mark unuseable\n");
+		goto out;
+	}
+	if (if_name) {
+		int len1, len2;
+
+		len1 = strlen(if_name);
+		len2 = strlen(sctp_ifap->ifn_p->ifn_name);
+		if (len1 != len2) {
+			SCTPDBG(SCTP_DEBUG_PCB4, "IFN of ifa names different length %d vs %d - ignored\n",
+			    len1, len2);
+			goto out;
+		}
+		if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, len1) != 0) {
+			SCTPDBG(SCTP_DEBUG_PCB4, "IFN %s of IFA not the same as %s\n",
+			    sctp_ifap->ifn_p->ifn_name,
+			    if_name);
+			goto out;
+		}
+	} else {
+		if (sctp_ifap->ifn_p->ifn_index != ifn_index) {
+			SCTPDBG(SCTP_DEBUG_PCB4, "IFA owned by ifn_index:%d down command for ifn_index:%d - ignored\n",
+			    sctp_ifap->ifn_p->ifn_index, ifn_index);
+			goto out;
+		}
+	}
+
+	sctp_ifap->localifa_flags &= (~SCTP_ADDR_VALID);
+	sctp_ifap->localifa_flags |= SCTP_ADDR_IFA_UNUSEABLE;
+out:
+	SCTP_IPI_ADDR_RUNLOCK();
+}
+
+void
+sctp_mark_ifa_addr_up(uint32_t vrf_id, struct sockaddr *addr,
+    const char *if_name, uint32_t ifn_index)
+{
+	struct sctp_vrf *vrf;
+	struct sctp_ifa *sctp_ifap = NULL;
+
+	SCTP_IPI_ADDR_RLOCK();
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf == NULL) {
+		SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id);
+		goto out;
+
+	}
+	sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED);
+	if (sctp_ifap == NULL) {
+		SCTPDBG(SCTP_DEBUG_PCB4, "Can't find sctp_ifap for address\n");
+		goto out;
+	}
+	if (sctp_ifap->ifn_p == NULL) {
+		SCTPDBG(SCTP_DEBUG_PCB4, "IFA has no IFN - can't mark unuseable\n");
+		goto out;
+	}
+	if (if_name) {
+		int len1, len2;
+
+		len1 = strlen(if_name);
+		len2 = strlen(sctp_ifap->ifn_p->ifn_name);
+		if (len1 != len2) {
+			SCTPDBG(SCTP_DEBUG_PCB4, "IFN of ifa names different length %d vs %d - ignored\n",
+			    len1, len2);
+			goto out;
+		}
+		if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, len1) != 0) {
+			SCTPDBG(SCTP_DEBUG_PCB4, "IFN %s of IFA not the same as %s\n",
+			    sctp_ifap->ifn_p->ifn_name,
+			    if_name);
+			goto out;
+		}
+	} else {
+		if (sctp_ifap->ifn_p->ifn_index != ifn_index) {
+			SCTPDBG(SCTP_DEBUG_PCB4, "IFA owned by ifn_index:%d down command for ifn_index:%d - ignored\n",
+			    sctp_ifap->ifn_p->ifn_index, ifn_index);
+			goto out;
+		}
+	}
+
+	sctp_ifap->localifa_flags &= (~SCTP_ADDR_IFA_UNUSEABLE);
+	sctp_ifap->localifa_flags |= SCTP_ADDR_VALID;
+out:
+	SCTP_IPI_ADDR_RUNLOCK();
+}
+
+/*-
+ * Add an ifa to an ifn.
+ * Register the interface as necessary.
+ * NOTE: ADDR write lock MUST be held.
+ */
+static void
+sctp_add_ifa_to_ifn(struct sctp_ifn *sctp_ifnp, struct sctp_ifa *sctp_ifap)
+{
+	int ifa_af;
+
+	LIST_INSERT_HEAD(&sctp_ifnp->ifalist, sctp_ifap, next_ifa);
+	sctp_ifap->ifn_p = sctp_ifnp;
+	atomic_add_int(&sctp_ifap->ifn_p->refcount, 1);
+	/* update address counts */
+	sctp_ifnp->ifa_count++;
+	ifa_af = sctp_ifap->address.sa.sa_family;
+	if (ifa_af == AF_INET)
+		sctp_ifnp->num_v4++;
+	else
+		sctp_ifnp->num_v6++;
+	if (sctp_ifnp->ifa_count == 1) {
+		/* register the new interface */
+		SCTP_REGISTER_INTERFACE(sctp_ifnp->ifn_index, ifa_af);
+		sctp_ifnp->registered_af = ifa_af;
+	}
+}
+
+/*-
+ * Remove an ifa from its ifn.
+ * If no more addresses exist, remove the ifn too. Otherwise, re-register
+ * the interface based on the remaining address families left.
+ * NOTE: ADDR write lock MUST be held.
+ */
+static void
+sctp_remove_ifa_from_ifn(struct sctp_ifa *sctp_ifap)
+{
+	uint32_t ifn_index;
+
+	LIST_REMOVE(sctp_ifap, next_ifa);
+	if (sctp_ifap->ifn_p) {
+		/* update address counts */
+		sctp_ifap->ifn_p->ifa_count--;
+		if (sctp_ifap->address.sa.sa_family == AF_INET6)
+			sctp_ifap->ifn_p->num_v6--;
+		else if (sctp_ifap->address.sa.sa_family == AF_INET)
+			sctp_ifap->ifn_p->num_v4--;
+
+		ifn_index = sctp_ifap->ifn_p->ifn_index;
+		if (LIST_EMPTY(&sctp_ifap->ifn_p->ifalist)) {
+			/* remove the ifn, possibly freeing it */
+			sctp_delete_ifn(sctp_ifap->ifn_p, SCTP_ADDR_LOCKED);
+		} else {
+			/* re-register address family type, if needed */
+			if ((sctp_ifap->ifn_p->num_v6 == 0) &&
+			    (sctp_ifap->ifn_p->registered_af == AF_INET6)) {
+				SCTP_DEREGISTER_INTERFACE(ifn_index, AF_INET6);
+				SCTP_REGISTER_INTERFACE(ifn_index, AF_INET);
+				sctp_ifap->ifn_p->registered_af = AF_INET;
+			} else if ((sctp_ifap->ifn_p->num_v4 == 0) &&
+			    (sctp_ifap->ifn_p->registered_af == AF_INET)) {
+				SCTP_DEREGISTER_INTERFACE(ifn_index, AF_INET);
+				SCTP_REGISTER_INTERFACE(ifn_index, AF_INET6);
+				sctp_ifap->ifn_p->registered_af = AF_INET6;
+			}
+			/* free the ifn refcount */
+			sctp_free_ifn(sctp_ifap->ifn_p);
+		}
+		sctp_ifap->ifn_p = NULL;
+	}
+}
+
+struct sctp_ifa *
+sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index,
+    uint32_t ifn_type, const char *if_name, void *ifa,
+    struct sockaddr *addr, uint32_t ifa_flags,
+    int dynamic_add)
+{
+	struct sctp_vrf *vrf;
+	struct sctp_ifn *sctp_ifnp = NULL;
+	struct sctp_ifa *sctp_ifap = NULL;
+	struct sctp_ifalist *hash_addr_head;
+	struct sctp_ifnlist *hash_ifn_head;
+	uint32_t hash_of_addr;
+	int new_ifn_af = 0;
+
+#ifdef SCTP_DEBUG
+	SCTPDBG(SCTP_DEBUG_PCB4, "vrf_id 0x%x: adding address: ", vrf_id);
+	SCTPDBG_ADDR(SCTP_DEBUG_PCB4, addr);
+#endif
+	SCTP_IPI_ADDR_WLOCK();
+	sctp_ifnp = sctp_find_ifn(ifn, ifn_index);
+	if (sctp_ifnp) {
+		vrf = sctp_ifnp->vrf;
+	} else {
+		vrf = sctp_find_vrf(vrf_id);
+		if (vrf == NULL) {
+			vrf = sctp_allocate_vrf(vrf_id);
+			if (vrf == NULL) {
+				SCTP_IPI_ADDR_WUNLOCK();
+				return (NULL);
+			}
+		}
+	}
+	if (sctp_ifnp == NULL) {
+		/*
+		 * build one and add it, can't hold lock until after malloc
+		 * done though.
+		 */
+		SCTP_IPI_ADDR_WUNLOCK();
+		SCTP_MALLOC(sctp_ifnp, struct sctp_ifn *,
+		    sizeof(struct sctp_ifn), SCTP_M_IFN);
+		if (sctp_ifnp == NULL) {
+#ifdef INVARIANTS
+			panic("No memory for IFN");
+#endif
+			return (NULL);
+		}
+		memset(sctp_ifnp, 0, sizeof(struct sctp_ifn));
+		sctp_ifnp->ifn_index = ifn_index;
+		sctp_ifnp->ifn_p = ifn;
+		sctp_ifnp->ifn_type = ifn_type;
+		sctp_ifnp->refcount = 0;
+		sctp_ifnp->vrf = vrf;
+		atomic_add_int(&vrf->refcount, 1);
+		sctp_ifnp->ifn_mtu = SCTP_GATHER_MTU_FROM_IFN_INFO(ifn, ifn_index, addr->sa_family);
+		if (if_name != NULL) {
+			memcpy(sctp_ifnp->ifn_name, if_name, SCTP_IFNAMSIZ);
+		} else {
+			memcpy(sctp_ifnp->ifn_name, "unknown", min(7, SCTP_IFNAMSIZ));
+		}
+		hash_ifn_head = &SCTP_BASE_INFO(vrf_ifn_hash)[(ifn_index & SCTP_BASE_INFO(vrf_ifn_hashmark))];
+		LIST_INIT(&sctp_ifnp->ifalist);
+		SCTP_IPI_ADDR_WLOCK();
+		LIST_INSERT_HEAD(hash_ifn_head, sctp_ifnp, next_bucket);
+		LIST_INSERT_HEAD(&vrf->ifnlist, sctp_ifnp, next_ifn);
+		atomic_add_int(&SCTP_BASE_INFO(ipi_count_ifns), 1);
+		new_ifn_af = 1;
+	}
+	sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED);
+	if (sctp_ifap) {
+		/* Hmm, it already exists? */
+		if ((sctp_ifap->ifn_p) &&
+		    (sctp_ifap->ifn_p->ifn_index == ifn_index)) {
+			SCTPDBG(SCTP_DEBUG_PCB4, "Using existing ifn %s (0x%x) for ifa %p\n",
+			    sctp_ifap->ifn_p->ifn_name, ifn_index,
+			    sctp_ifap);
+			if (new_ifn_af) {
+				/* Remove the created one that we don't want */
+				sctp_delete_ifn(sctp_ifnp, SCTP_ADDR_LOCKED);
+			}
+			if (sctp_ifap->localifa_flags & SCTP_BEING_DELETED) {
+				/* easy to solve, just switch back to active */
+				SCTPDBG(SCTP_DEBUG_PCB4, "Clearing deleted ifa flag\n");
+				sctp_ifap->localifa_flags = SCTP_ADDR_VALID;
+				sctp_ifap->ifn_p = sctp_ifnp;
+				atomic_add_int(&sctp_ifap->ifn_p->refcount, 1);
+			}
+	exit_stage_left:
+			SCTP_IPI_ADDR_WUNLOCK();
+			return (sctp_ifap);
+		} else {
+			if (sctp_ifap->ifn_p) {
+				/*
+				 * The last IFN gets the address, remove the
+				 * old one
+				 */
+				SCTPDBG(SCTP_DEBUG_PCB4, "Moving ifa %p from %s (0x%x) to %s (0x%x)\n",
+				    sctp_ifap, sctp_ifap->ifn_p->ifn_name,
+				    sctp_ifap->ifn_p->ifn_index, if_name,
+				    ifn_index);
+				/* remove the address from the old ifn */
+				sctp_remove_ifa_from_ifn(sctp_ifap);
+				/* move the address over to the new ifn */
+				sctp_add_ifa_to_ifn(sctp_ifnp, sctp_ifap);
+				goto exit_stage_left;
+			} else {
+				/* repair ifnp which was NULL ? */
+				sctp_ifap->localifa_flags = SCTP_ADDR_VALID;
+				SCTPDBG(SCTP_DEBUG_PCB4, "Repairing ifn %p for ifa %p\n",
+				    sctp_ifnp, sctp_ifap);
+				sctp_add_ifa_to_ifn(sctp_ifnp, sctp_ifap);
+			}
+			goto exit_stage_left;
+		}
+	}
+	SCTP_IPI_ADDR_WUNLOCK();
+	SCTP_MALLOC(sctp_ifap, struct sctp_ifa *, sizeof(struct sctp_ifa), SCTP_M_IFA);
+	if (sctp_ifap == NULL) {
+#ifdef INVARIANTS
+		panic("No memory for IFA");
+#endif
+		return (NULL);
+	}
+	memset(sctp_ifap, 0, sizeof(struct sctp_ifa));
+	sctp_ifap->ifn_p = sctp_ifnp;
+	atomic_add_int(&sctp_ifnp->refcount, 1);
+	sctp_ifap->vrf_id = vrf_id;
+	sctp_ifap->ifa = ifa;
+	memcpy(&sctp_ifap->address, addr, addr->sa_len);
+	sctp_ifap->localifa_flags = SCTP_ADDR_VALID | SCTP_ADDR_DEFER_USE;
+	sctp_ifap->flags = ifa_flags;
+	/* Set scope */
+	switch (sctp_ifap->address.sa.sa_family) {
+	case AF_INET:
+		{
+			struct sockaddr_in *sin;
+
+			sin = (struct sockaddr_in *)&sctp_ifap->address.sin;
+			if (SCTP_IFN_IS_IFT_LOOP(sctp_ifap->ifn_p) ||
+			    (IN4_ISLOOPBACK_ADDRESS(&sin->sin_addr))) {
+				sctp_ifap->src_is_loop = 1;
+			}
+			if ((IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
+				sctp_ifap->src_is_priv = 1;
+			}
+			sctp_ifnp->num_v4++;
+			if (new_ifn_af)
+				new_ifn_af = AF_INET;
+			break;
+		}
+#ifdef INET6
+	case AF_INET6:
+		{
+			/* ok to use deprecated addresses? */
+			struct sockaddr_in6 *sin6;
+
+			sin6 = (struct sockaddr_in6 *)&sctp_ifap->address.sin6;
+			if (SCTP_IFN_IS_IFT_LOOP(sctp_ifap->ifn_p) ||
+			    (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))) {
+				sctp_ifap->src_is_loop = 1;
+			}
+			if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+				sctp_ifap->src_is_priv = 1;
+			}
+			sctp_ifnp->num_v6++;
+			if (new_ifn_af)
+				new_ifn_af = AF_INET6;
+			break;
+		}
+#endif
+	default:
+		new_ifn_af = 0;
+		break;
+	}
+	hash_of_addr = sctp_get_ifa_hash_val(&sctp_ifap->address.sa);
+
+	if ((sctp_ifap->src_is_priv == 0) &&
+	    (sctp_ifap->src_is_loop == 0)) {
+		sctp_ifap->src_is_glob = 1;
+	}
+	SCTP_IPI_ADDR_WLOCK();
+	hash_addr_head = &vrf->vrf_addr_hash[(hash_of_addr & vrf->vrf_addr_hashmark)];
+	LIST_INSERT_HEAD(hash_addr_head, sctp_ifap, next_bucket);
+	sctp_ifap->refcount = 1;
+	LIST_INSERT_HEAD(&sctp_ifnp->ifalist, sctp_ifap, next_ifa);
+	sctp_ifnp->ifa_count++;
+	vrf->total_ifa_count++;
+	atomic_add_int(&SCTP_BASE_INFO(ipi_count_ifas), 1);
+	if (new_ifn_af) {
+		SCTP_REGISTER_INTERFACE(ifn_index, new_ifn_af);
+		sctp_ifnp->registered_af = new_ifn_af;
+	}
+	SCTP_IPI_ADDR_WUNLOCK();
+	if (dynamic_add) {
+		/*
+		 * Bump up the refcount so that when the timer completes it
+		 * will drop back down.
+		 */
+		struct sctp_laddr *wi;
+
+		atomic_add_int(&sctp_ifap->refcount, 1);
+		wi = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr);
+		if (wi == NULL) {
+			/*
+			 * Gak, what can we do? We have lost an address
+			 * change can you say HOSED?
+			 */
+			SCTPDBG(SCTP_DEBUG_PCB4, "Lost an address change?\n");
+			/* Opps, must decrement the count */
+			sctp_del_addr_from_vrf(vrf_id, addr, ifn_index,
+			    if_name);
+			return (NULL);
+		}
+		SCTP_INCR_LADDR_COUNT();
+		bzero(wi, sizeof(*wi));
+		(void)SCTP_GETTIME_TIMEVAL(&wi->start_time);
+		wi->ifa = sctp_ifap;
+		wi->action = SCTP_ADD_IP_ADDRESS;
+
+		SCTP_WQ_ADDR_LOCK();
+		LIST_INSERT_HEAD(&SCTP_BASE_INFO(addr_wq), wi, sctp_nxt_addr);
+		SCTP_WQ_ADDR_UNLOCK();
+
+		sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ,
+		    (struct sctp_inpcb *)NULL,
+		    (struct sctp_tcb *)NULL,
+		    (struct sctp_nets *)NULL);
+	} else {
+		/* it's ready for use */
+		sctp_ifap->localifa_flags &= ~SCTP_ADDR_DEFER_USE;
+	}
+	return (sctp_ifap);
+}
+
+void
+sctp_del_addr_from_vrf(uint32_t vrf_id, struct sockaddr *addr,
+    uint32_t ifn_index, const char *if_name)
+{
+	struct sctp_vrf *vrf;
+	struct sctp_ifa *sctp_ifap = NULL;
+
+	SCTP_IPI_ADDR_WLOCK();
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf == NULL) {
+		SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id);
+		goto out_now;
+	}
+#ifdef SCTP_DEBUG
+	SCTPDBG(SCTP_DEBUG_PCB4, "vrf_id 0x%x: deleting address:", vrf_id);
+	SCTPDBG_ADDR(SCTP_DEBUG_PCB4, addr);
+#endif
+	sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED);
+	if (sctp_ifap) {
+		/* Validate the delete */
+		if (sctp_ifap->ifn_p) {
+			int valid = 0;
+
+			/*-
+			 * The name has priority over the ifn_index
+			 * if its given. We do this especially for
+			 * panda who might recycle indexes fast.
+			 */
+			if (if_name) {
+				int len1, len2;
+
+				len1 = min(SCTP_IFNAMSIZ, strlen(if_name));
+				len2 = min(SCTP_IFNAMSIZ, strlen(sctp_ifap->ifn_p->ifn_name));
+				if (len1 && len2 && (len1 == len2)) {
+					/* we can compare them */
+					if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, len1) == 0) {
+						/*
+						 * They match its a correct
+						 * delete
+						 */
+						valid = 1;
+					}
+				}
+			}
+			if (!valid) {
+				/* last ditch check ifn_index */
+				if (ifn_index == sctp_ifap->ifn_p->ifn_index) {
+					valid = 1;
+				}
+			}
+			if (!valid) {
+				SCTPDBG(SCTP_DEBUG_PCB4, "ifn:%d ifname:%s does not match addresses\n",
+				    ifn_index, ((if_name == NULL) ? "NULL" : if_name));
+				SCTPDBG(SCTP_DEBUG_PCB4, "ifn:%d ifname:%s - ignoring delete\n",
+				    sctp_ifap->ifn_p->ifn_index, sctp_ifap->ifn_p->ifn_name);
+				SCTP_IPI_ADDR_WUNLOCK();
+				return;
+			}
+		}
+		SCTPDBG(SCTP_DEBUG_PCB4, "Deleting ifa %p\n", sctp_ifap);
+		sctp_ifap->localifa_flags &= SCTP_ADDR_VALID;
+		sctp_ifap->localifa_flags |= SCTP_BEING_DELETED;
+		vrf->total_ifa_count--;
+		LIST_REMOVE(sctp_ifap, next_bucket);
+		sctp_remove_ifa_from_ifn(sctp_ifap);
+	}
+#ifdef SCTP_DEBUG
+	else {
+		SCTPDBG(SCTP_DEBUG_PCB4, "Del Addr-ifn:%d Could not find address:",
+		    ifn_index);
+		SCTPDBG_ADDR(SCTP_DEBUG_PCB1, addr);
+	}
+#endif
+
+out_now:
+	SCTP_IPI_ADDR_WUNLOCK();
+	if (sctp_ifap) {
+		struct sctp_laddr *wi;
+
+		wi = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr);
+		if (wi == NULL) {
+			/*
+			 * Gak, what can we do? We have lost an address
+			 * change can you say HOSED?
+			 */
+			SCTPDBG(SCTP_DEBUG_PCB4, "Lost an address change?\n");
+
+			/* Oops, must decrement the count */
+			sctp_free_ifa(sctp_ifap);
+			return;
+		}
+		SCTP_INCR_LADDR_COUNT();
+		bzero(wi, sizeof(*wi));
+		(void)SCTP_GETTIME_TIMEVAL(&wi->start_time);
+		wi->ifa = sctp_ifap;
+		wi->action = SCTP_DEL_IP_ADDRESS;
+		SCTP_WQ_ADDR_LOCK();
+		/*
+		 * Should this really be a tailq? As it is we will process
+		 * the newest first :-0
+		 */
+		LIST_INSERT_HEAD(&SCTP_BASE_INFO(addr_wq), wi, sctp_nxt_addr);
+		SCTP_WQ_ADDR_UNLOCK();
+
+		sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ,
+		    (struct sctp_inpcb *)NULL,
+		    (struct sctp_tcb *)NULL,
+		    (struct sctp_nets *)NULL);
+	}
+	return;
+}
+
+
+static struct sctp_tcb *
+sctp_tcb_special_locate(struct sctp_inpcb **inp_p, struct sockaddr *from,
+    struct sockaddr *to, struct sctp_nets **netp, uint32_t vrf_id)
+{
+	/**** ASSUMES THE CALLER holds the INP_INFO_RLOCK */
+	/*
+	 * If we support the TCP model, then we must now dig through to see
+	 * if we can find our endpoint in the list of tcp ep's.
+	 */
+	uint16_t lport, rport;
+	struct sctppcbhead *ephead;
+	struct sctp_inpcb *inp;
+	struct sctp_laddr *laddr;
+	struct sctp_tcb *stcb;
+	struct sctp_nets *net;
+
+	if ((to == NULL) || (from == NULL)) {
+		return (NULL);
+	}
+	if (to->sa_family == AF_INET && from->sa_family == AF_INET) {
+		lport = ((struct sockaddr_in *)to)->sin_port;
+		rport = ((struct sockaddr_in *)from)->sin_port;
+	} else if (to->sa_family == AF_INET6 && from->sa_family == AF_INET6) {
+		lport = ((struct sockaddr_in6 *)to)->sin6_port;
+		rport = ((struct sockaddr_in6 *)from)->sin6_port;
+	} else {
+		return NULL;
+	}
+	ephead = &SCTP_BASE_INFO(sctp_tcpephash)[SCTP_PCBHASH_ALLADDR((lport | rport), SCTP_BASE_INFO(hashtcpmark))];
+	/*
+	 * Ok now for each of the guys in this bucket we must look and see:
+	 * - Does the remote port match. - Does there single association's
+	 * addresses match this address (to). If so we update p_ep to point
+	 * to this ep and return the tcb from it.
+	 */
+	LIST_FOREACH(inp, ephead, sctp_hash) {
+		SCTP_INP_RLOCK(inp);
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
+		if (lport != inp->sctp_lport) {
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
+		if (inp->def_vrf_id != vrf_id) {
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
+		/* check to see if the ep has one of the addresses */
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) {
+			/* We are NOT bound all, so look further */
+			int match = 0;
+
+			LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+
+				if (laddr->ifa == NULL) {
+					SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n", __FUNCTION__);
+					continue;
+				}
+				if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
+					SCTPDBG(SCTP_DEBUG_PCB1, "ifa being deleted\n");
+					continue;
+				}
+				if (laddr->ifa->address.sa.sa_family ==
+				    to->sa_family) {
+					/* see if it matches */
+					struct sockaddr_in *intf_addr, *sin;
+
+					intf_addr = &laddr->ifa->address.sin;
+					sin = (struct sockaddr_in *)to;
+					if (from->sa_family == AF_INET) {
+						if (sin->sin_addr.s_addr ==
+						    intf_addr->sin_addr.s_addr) {
+							match = 1;
+							break;
+						}
+					}
+#ifdef INET6
+					if (from->sa_family == AF_INET6) {
+						struct sockaddr_in6 *intf_addr6;
+						struct sockaddr_in6 *sin6;
+
+						sin6 = (struct sockaddr_in6 *)
+						    to;
+						intf_addr6 = &laddr->ifa->address.sin6;
+
+						if (SCTP6_ARE_ADDR_EQUAL(sin6,
+						    intf_addr6)) {
+							match = 1;
+							break;
+						}
+					}
+#endif
+				}
+			}
+			if (match == 0) {
+				/* This endpoint does not have this address */
+				SCTP_INP_RUNLOCK(inp);
+				continue;
+			}
+		}
+		/*
+		 * Ok if we hit here the ep has the address, does it hold
+		 * the tcb?
+		 */
+
+		stcb = LIST_FIRST(&inp->sctp_asoc_list);
+		if (stcb == NULL) {
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
+		SCTP_TCB_LOCK(stcb);
+		if (stcb->rport != rport) {
+			/* remote port does not match. */
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
+		if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
+		/* Does this TCB have a matching address? */
+		TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+
+			if (net->ro._l_addr.sa.sa_family != from->sa_family) {
+				/* not the same family, can't be a match */
+				continue;
+			}
+			switch (from->sa_family) {
+			case AF_INET:
+				{
+					struct sockaddr_in *sin, *rsin;
+
+					sin = (struct sockaddr_in *)&net->ro._l_addr;
+					rsin = (struct sockaddr_in *)from;
+					if (sin->sin_addr.s_addr ==
+					    rsin->sin_addr.s_addr) {
+						/* found it */
+						if (netp != NULL) {
+							*netp = net;
+						}
+						/*
+						 * Update the endpoint
+						 * pointer
+						 */
+						*inp_p = inp;
+						SCTP_INP_RUNLOCK(inp);
+						return (stcb);
+					}
+					break;
+				}
+#ifdef INET6
+			case AF_INET6:
+				{
+					struct sockaddr_in6 *sin6, *rsin6;
+
+					sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+					rsin6 = (struct sockaddr_in6 *)from;
+					if (SCTP6_ARE_ADDR_EQUAL(sin6,
+					    rsin6)) {
+						/* found it */
+						if (netp != NULL) {
+							*netp = net;
+						}
+						/*
+						 * Update the endpoint
+						 * pointer
+						 */
+						*inp_p = inp;
+						SCTP_INP_RUNLOCK(inp);
+						return (stcb);
+					}
+					break;
+				}
+#endif
+			default:
+				/* TSNH */
+				break;
+			}
+		}
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_INP_RUNLOCK(inp);
+	}
+	return (NULL);
+}
+
+static int
+sctp_does_stcb_own_this_addr(struct sctp_tcb *stcb, struct sockaddr *to)
+{
+	int loopback_scope, ipv4_local_scope, local_scope, site_scope;
+	int ipv4_addr_legal, ipv6_addr_legal;
+	struct sctp_vrf *vrf;
+	struct sctp_ifn *sctp_ifn;
+	struct sctp_ifa *sctp_ifa;
+
+	loopback_scope = stcb->asoc.loopback_scope;
+	ipv4_local_scope = stcb->asoc.ipv4_local_scope;
+	local_scope = stcb->asoc.local_scope;
+	site_scope = stcb->asoc.site_scope;
+	ipv4_addr_legal = ipv6_addr_legal = 0;
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+		ipv6_addr_legal = 1;
+		if (SCTP_IPV6_V6ONLY(stcb->sctp_ep) == 0) {
+			ipv4_addr_legal = 1;
+		}
+	} else {
+		ipv4_addr_legal = 1;
+	}
+
+	SCTP_IPI_ADDR_RLOCK();
+	vrf = sctp_find_vrf(stcb->asoc.vrf_id);
+	if (vrf == NULL) {
+		/* no vrf, no addresses */
+		SCTP_IPI_ADDR_RUNLOCK();
+		return (0);
+	}
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+			if ((loopback_scope == 0) &&
+			    SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
+				continue;
+			}
+			LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+				if (sctp_is_addr_restricted(stcb, sctp_ifa))
+					continue;
+				switch (sctp_ifa->address.sa.sa_family) {
+#ifdef INET
+				case AF_INET:
+					if (ipv4_addr_legal) {
+						struct sockaddr_in *sin,
+						           *rsin;
+
+						sin = &sctp_ifa->address.sin;
+						rsin = (struct sockaddr_in *)to;
+						if ((ipv4_local_scope == 0) &&
+						    IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
+							continue;
+						}
+						if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) {
+							SCTP_IPI_ADDR_RUNLOCK();
+							return (1);
+						}
+					}
+					break;
+#endif
+#ifdef INET6
+				case AF_INET6:
+					if (ipv6_addr_legal) {
+						struct sockaddr_in6 *sin6,
+						            *rsin6;
+
+						sin6 = &sctp_ifa->address.sin6;
+						rsin6 = (struct sockaddr_in6 *)to;
+						if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+							if (local_scope == 0)
+								continue;
+							if (sin6->sin6_scope_id == 0) {
+								if (sa6_recoverscope(sin6) != 0)
+									continue;
+							}
+						}
+						if ((site_scope == 0) &&
+						    (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) {
+							continue;
+						}
+						if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) {
+							SCTP_IPI_ADDR_RUNLOCK();
+							return (1);
+						}
+					}
+					break;
+#endif
+				default:
+					/* TSNH */
+					break;
+				}
+			}
+		}
+	} else {
+		struct sctp_laddr *laddr;
+
+		LIST_FOREACH(laddr, &stcb->sctp_ep->sctp_addr_list, sctp_nxt_addr) {
+			if (sctp_is_addr_restricted(stcb, laddr->ifa)) {
+				continue;
+			}
+			if (laddr->ifa->address.sa.sa_family != to->sa_family) {
+				continue;
+			}
+			switch (to->sa_family) {
+#ifdef INET
+			case AF_INET:
+				{
+					struct sockaddr_in *sin, *rsin;
+
+					sin = (struct sockaddr_in *)&laddr->ifa->address.sin;
+					rsin = (struct sockaddr_in *)to;
+					if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) {
+						SCTP_IPI_ADDR_RUNLOCK();
+						return (1);
+					}
+					break;
+				}
+#endif
+#ifdef INET6
+			case AF_INET6:
+				{
+					struct sockaddr_in6 *sin6, *rsin6;
+
+					sin6 = (struct sockaddr_in6 *)&laddr->ifa->address.sin6;
+					rsin6 = (struct sockaddr_in6 *)to;
+					if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) {
+						SCTP_IPI_ADDR_RUNLOCK();
+						return (1);
+					}
+					break;
+				}
+
+#endif
+			default:
+				/* TSNH */
+				break;
+			}
+
+		}
+	}
+	SCTP_IPI_ADDR_RUNLOCK();
+	return (0);
+}
+
+/*
+ * rules for use
+ *
+ * 1) If I return a NULL you must decrement any INP ref cnt. 2) If I find an
+ * stcb, both will be locked (locked_tcb and stcb) but decrement will be done
+ * (if locked == NULL). 3) Decrement happens on return ONLY if locked ==
+ * NULL.
+ */
+
+struct sctp_tcb *
+sctp_findassociation_ep_addr(struct sctp_inpcb **inp_p, struct sockaddr *remote,
+    struct sctp_nets **netp, struct sockaddr *local, struct sctp_tcb *locked_tcb)
+{
+	struct sctpasochead *head;
+	struct sctp_inpcb *inp;
+	struct sctp_tcb *stcb = NULL;
+	struct sctp_nets *net;
+	uint16_t rport;
+
+	inp = *inp_p;
+	if (remote->sa_family == AF_INET) {
+		rport = (((struct sockaddr_in *)remote)->sin_port);
+	} else if (remote->sa_family == AF_INET6) {
+		rport = (((struct sockaddr_in6 *)remote)->sin6_port);
+	} else {
+		return (NULL);
+	}
+	if (locked_tcb) {
+		/*
+		 * UN-lock so we can do proper locking here this occurs when
+		 * called from load_addresses_from_init.
+		 */
+		atomic_add_int(&locked_tcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(locked_tcb);
+	}
+	SCTP_INP_INFO_RLOCK();
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
+		/*-
+		 * Now either this guy is our listener or it's the
+		 * connector. If it is the one that issued the connect, then
+		 * it's only chance is to be the first TCB in the list. If
+		 * it is the acceptor, then do the special_lookup to hash
+		 * and find the real inp.
+		 */
+		if ((inp->sctp_socket) && (inp->sctp_socket->so_qlimit)) {
+			/* to is peer addr, from is my addr */
+			stcb = sctp_tcb_special_locate(inp_p, remote, local,
+			    netp, inp->def_vrf_id);
+			if ((stcb != NULL) && (locked_tcb == NULL)) {
+				/* we have a locked tcb, lower refcount */
+				SCTP_INP_DECR_REF(inp);
+			}
+			if ((locked_tcb != NULL) && (locked_tcb != stcb)) {
+				SCTP_INP_RLOCK(locked_tcb->sctp_ep);
+				SCTP_TCB_LOCK(locked_tcb);
+				atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
+				SCTP_INP_RUNLOCK(locked_tcb->sctp_ep);
+			}
+			SCTP_INP_INFO_RUNLOCK();
+			return (stcb);
+		} else {
+			SCTP_INP_WLOCK(inp);
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+				goto null_return;
+			}
+			stcb = LIST_FIRST(&inp->sctp_asoc_list);
+			if (stcb == NULL) {
+				goto null_return;
+			}
+			SCTP_TCB_LOCK(stcb);
+
+			if (stcb->rport != rport) {
+				/* remote port does not match. */
+				SCTP_TCB_UNLOCK(stcb);
+				goto null_return;
+			}
+			if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+				SCTP_TCB_UNLOCK(stcb);
+				goto null_return;
+			}
+			if (local && !sctp_does_stcb_own_this_addr(stcb, local)) {
+				SCTP_TCB_UNLOCK(stcb);
+				goto null_return;
+			}
+			/* now look at the list of remote addresses */
+			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+#ifdef INVARIANTS
+				if (net == (TAILQ_NEXT(net, sctp_next))) {
+					panic("Corrupt net list");
+				}
+#endif
+				if (net->ro._l_addr.sa.sa_family !=
+				    remote->sa_family) {
+					/* not the same family */
+					continue;
+				}
+				switch (remote->sa_family) {
+				case AF_INET:
+					{
+						struct sockaddr_in *sin,
+						           *rsin;
+
+						sin = (struct sockaddr_in *)
+						    &net->ro._l_addr;
+						rsin = (struct sockaddr_in *)remote;
+						if (sin->sin_addr.s_addr ==
+						    rsin->sin_addr.s_addr) {
+							/* found it */
+							if (netp != NULL) {
+								*netp = net;
+							}
+							if (locked_tcb == NULL) {
+								SCTP_INP_DECR_REF(inp);
+							} else if (locked_tcb != stcb) {
+								SCTP_TCB_LOCK(locked_tcb);
+							}
+							if (locked_tcb) {
+								atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
+							}
+							SCTP_INP_WUNLOCK(inp);
+							SCTP_INP_INFO_RUNLOCK();
+							return (stcb);
+						}
+						break;
+					}
+#ifdef INET6
+				case AF_INET6:
+					{
+						struct sockaddr_in6 *sin6,
+						            *rsin6;
+
+						sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+						rsin6 = (struct sockaddr_in6 *)remote;
+						if (SCTP6_ARE_ADDR_EQUAL(sin6,
+						    rsin6)) {
+							/* found it */
+							if (netp != NULL) {
+								*netp = net;
+							}
+							if (locked_tcb == NULL) {
+								SCTP_INP_DECR_REF(inp);
+							} else if (locked_tcb != stcb) {
+								SCTP_TCB_LOCK(locked_tcb);
+							}
+							if (locked_tcb) {
+								atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
+							}
+							SCTP_INP_WUNLOCK(inp);
+							SCTP_INP_INFO_RUNLOCK();
+							return (stcb);
+						}
+						break;
+					}
+#endif
+				default:
+					/* TSNH */
+					break;
+				}
+			}
+			SCTP_TCB_UNLOCK(stcb);
+		}
+	} else {
+		SCTP_INP_WLOCK(inp);
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+			goto null_return;
+		}
+		head = &inp->sctp_tcbhash[SCTP_PCBHASH_ALLADDR(rport,
+		    inp->sctp_hashmark)];
+		if (head == NULL) {
+			goto null_return;
+		}
+		LIST_FOREACH(stcb, head, sctp_tcbhash) {
+			if (stcb->rport != rport) {
+				/* remote port does not match */
+				continue;
+			}
+			SCTP_TCB_LOCK(stcb);
+			if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+				SCTP_TCB_UNLOCK(stcb);
+				continue;
+			}
+			if (local && !sctp_does_stcb_own_this_addr(stcb, local)) {
+				SCTP_TCB_UNLOCK(stcb);
+				continue;
+			}
+			/* now look at the list of remote addresses */
+			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+#ifdef INVARIANTS
+				if (net == (TAILQ_NEXT(net, sctp_next))) {
+					panic("Corrupt net list");
+				}
+#endif
+				if (net->ro._l_addr.sa.sa_family !=
+				    remote->sa_family) {
+					/* not the same family */
+					continue;
+				}
+				switch (remote->sa_family) {
+				case AF_INET:
+					{
+						struct sockaddr_in *sin,
+						           *rsin;
+
+						sin = (struct sockaddr_in *)
+						    &net->ro._l_addr;
+						rsin = (struct sockaddr_in *)remote;
+						if (sin->sin_addr.s_addr ==
+						    rsin->sin_addr.s_addr) {
+							/* found it */
+							if (netp != NULL) {
+								*netp = net;
+							}
+							if (locked_tcb == NULL) {
+								SCTP_INP_DECR_REF(inp);
+							} else if (locked_tcb != stcb) {
+								SCTP_TCB_LOCK(locked_tcb);
+							}
+							if (locked_tcb) {
+								atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
+							}
+							SCTP_INP_WUNLOCK(inp);
+							SCTP_INP_INFO_RUNLOCK();
+							return (stcb);
+						}
+						break;
+					}
+#ifdef INET6
+				case AF_INET6:
+					{
+						struct sockaddr_in6 *sin6,
+						            *rsin6;
+
+						sin6 = (struct sockaddr_in6 *)
+						    &net->ro._l_addr;
+						rsin6 = (struct sockaddr_in6 *)remote;
+						if (SCTP6_ARE_ADDR_EQUAL(sin6,
+						    rsin6)) {
+							/* found it */
+							if (netp != NULL) {
+								*netp = net;
+							}
+							if (locked_tcb == NULL) {
+								SCTP_INP_DECR_REF(inp);
+							} else if (locked_tcb != stcb) {
+								SCTP_TCB_LOCK(locked_tcb);
+							}
+							if (locked_tcb) {
+								atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
+							}
+							SCTP_INP_WUNLOCK(inp);
+							SCTP_INP_INFO_RUNLOCK();
+							return (stcb);
+						}
+						break;
+					}
+#endif
+				default:
+					/* TSNH */
+					break;
+				}
+			}
+			SCTP_TCB_UNLOCK(stcb);
+		}
+	}
+null_return:
+	/* clean up for returning null */
+	if (locked_tcb) {
+		SCTP_TCB_LOCK(locked_tcb);
+		atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
+	}
+	SCTP_INP_WUNLOCK(inp);
+	SCTP_INP_INFO_RUNLOCK();
+	/* not found */
+	return (NULL);
+}
+
+/*
+ * Find an association for a specific endpoint using the association id given
+ * out in the COMM_UP notification
+ */
+
+struct sctp_tcb *
+sctp_findasoc_ep_asocid_locked(struct sctp_inpcb *inp, sctp_assoc_t asoc_id, int want_lock)
+{
+	/*
+	 * Use my the assoc_id to find a endpoint
+	 */
+	struct sctpasochead *head;
+	struct sctp_tcb *stcb;
+	uint32_t id;
+
+	if (inp == NULL) {
+		SCTP_PRINTF("TSNH ep_associd\n");
+		return (NULL);
+	}
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+		SCTP_PRINTF("TSNH ep_associd0\n");
+		return (NULL);
+	}
+	id = (uint32_t) asoc_id;
+	head = &inp->sctp_asocidhash[SCTP_PCBHASH_ASOC(id, inp->hashasocidmark)];
+	if (head == NULL) {
+		/* invalid id TSNH */
+		SCTP_PRINTF("TSNH ep_associd1\n");
+		return (NULL);
+	}
+	LIST_FOREACH(stcb, head, sctp_tcbasocidhash) {
+		if (stcb->asoc.assoc_id == id) {
+			if (inp != stcb->sctp_ep) {
+				/*
+				 * some other guy has the same id active (id
+				 * collision ??).
+				 */
+				SCTP_PRINTF("TSNH ep_associd2\n");
+				continue;
+			}
+			if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+				continue;
+			}
+			if (want_lock) {
+				SCTP_TCB_LOCK(stcb);
+			}
+			return (stcb);
+		}
+	}
+	return (NULL);
+}
+
+
+struct sctp_tcb *
+sctp_findassociation_ep_asocid(struct sctp_inpcb *inp, sctp_assoc_t asoc_id, int want_lock)
+{
+	struct sctp_tcb *stcb;
+
+	SCTP_INP_RLOCK(inp);
+	stcb = sctp_findasoc_ep_asocid_locked(inp, asoc_id, want_lock);
+	SCTP_INP_RUNLOCK(inp);
+	return (stcb);
+}
+
+
+static struct sctp_inpcb *
+sctp_endpoint_probe(struct sockaddr *nam, struct sctppcbhead *head,
+    uint16_t lport, uint32_t vrf_id)
+{
+	struct sctp_inpcb *inp;
+	struct sockaddr_in *sin;
+
+#ifdef INET6
+	struct sockaddr_in6 *sin6;
+
+#endif
+	struct sctp_laddr *laddr;
+
+#ifdef INET6
+	struct sockaddr_in6 *intf_addr6;
+
+#endif
+
+	int fnd;
+
+	/*
+	 * Endpoint probe expects that the INP_INFO is locked.
+	 */
+	sin = NULL;
+#ifdef INET6
+	sin6 = NULL;
+#endif
+	switch (nam->sa_family) {
+	case AF_INET:
+		sin = (struct sockaddr_in *)nam;
+		break;
+#ifdef INET6
+	case AF_INET6:
+		sin6 = (struct sockaddr_in6 *)nam;
+		break;
+#endif
+	default:
+		/* unsupported family */
+		return (NULL);
+	}
+
+	if (head == NULL)
+		return (NULL);
+
+	LIST_FOREACH(inp, head, sctp_hash) {
+		SCTP_INP_RLOCK(inp);
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) &&
+		    (inp->sctp_lport == lport)) {
+			/* got it */
+			if ((nam->sa_family == AF_INET) &&
+			    (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+			    SCTP_IPV6_V6ONLY(inp)) {
+				/* IPv4 on a IPv6 socket with ONLY IPv6 set */
+				SCTP_INP_RUNLOCK(inp);
+				continue;
+			}
+			/* A V6 address and the endpoint is NOT bound V6 */
+			if (nam->sa_family == AF_INET6 &&
+			    (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
+				SCTP_INP_RUNLOCK(inp);
+				continue;
+			}
+			/* does a VRF id match? */
+			fnd = 0;
+			if (inp->def_vrf_id == vrf_id)
+				fnd = 1;
+
+			SCTP_INP_RUNLOCK(inp);
+			if (!fnd)
+				continue;
+			return (inp);
+		}
+		SCTP_INP_RUNLOCK(inp);
+	}
+	if ((nam->sa_family == AF_INET) &&
+	    (sin->sin_addr.s_addr == INADDR_ANY)) {
+		/* Can't hunt for one that has no address specified */
+		return (NULL);
+	}
+#ifdef INET6
+	if ((nam->sa_family == AF_INET6) &&
+	    (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))) {
+		/* Can't hunt for one that has no address specified */
+		return (NULL);
+	}
+#endif
+	/*
+	 * ok, not bound to all so see if we can find a EP bound to this
+	 * address.
+	 */
+	LIST_FOREACH(inp, head, sctp_hash) {
+		SCTP_INP_RLOCK(inp);
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL)) {
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
+		/*
+		 * Ok this could be a likely candidate, look at all of its
+		 * addresses
+		 */
+		if (inp->sctp_lport != lport) {
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
+		/* does a VRF id match? */
+		fnd = 0;
+		if (inp->def_vrf_id == vrf_id)
+			fnd = 1;
+
+		if (!fnd) {
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
+		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+			if (laddr->ifa == NULL) {
+				SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n",
+				    __FUNCTION__);
+				continue;
+			}
+			SCTPDBG(SCTP_DEBUG_PCB1, "Ok laddr->ifa:%p is possible, ",
+			    laddr->ifa);
+			if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
+				SCTPDBG(SCTP_DEBUG_PCB1, "Huh IFA being deleted\n");
+				continue;
+			}
+			if (laddr->ifa->address.sa.sa_family == nam->sa_family) {
+				/* possible, see if it matches */
+				struct sockaddr_in *intf_addr;
+
+				intf_addr = &laddr->ifa->address.sin;
+				switch (nam->sa_family) {
+				case AF_INET:
+					if (sin->sin_addr.s_addr ==
+					    intf_addr->sin_addr.s_addr) {
+						SCTP_INP_RUNLOCK(inp);
+						return (inp);
+					}
+					break;
+#ifdef INET6
+				case AF_INET6:
+					intf_addr6 = &laddr->ifa->address.sin6;
+					if (SCTP6_ARE_ADDR_EQUAL(sin6,
+					    intf_addr6)) {
+						SCTP_INP_RUNLOCK(inp);
+						return (inp);
+					}
+					break;
+#endif
+				}
+			}
+		}
+		SCTP_INP_RUNLOCK(inp);
+	}
+	return (NULL);
+}
+
+
+static struct sctp_inpcb *
+sctp_isport_inuse(struct sctp_inpcb *inp, uint16_t lport, uint32_t vrf_id)
+{
+	struct sctppcbhead *head;
+	struct sctp_inpcb *t_inp;
+	int fnd;
+
+	head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(lport,
+	    SCTP_BASE_INFO(hashmark))];
+	LIST_FOREACH(t_inp, head, sctp_hash) {
+		if (t_inp->sctp_lport != lport) {
+			continue;
+		}
+		/* is it in the VRF in question */
+		fnd = 0;
+		if (t_inp->def_vrf_id == vrf_id)
+			fnd = 1;
+		if (!fnd)
+			continue;
+
+		/* This one is in use. */
+		/* check the v6/v4 binding issue */
+		if ((t_inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+		    SCTP_IPV6_V6ONLY(t_inp)) {
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+				/* collision in V6 space */
+				return (t_inp);
+			} else {
+				/* inp is BOUND_V4 no conflict */
+				continue;
+			}
+		} else if (t_inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+			/* t_inp is bound v4 and v6, conflict always */
+			return (t_inp);
+		} else {
+			/* t_inp is bound only V4 */
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+			    SCTP_IPV6_V6ONLY(inp)) {
+				/* no conflict */
+				continue;
+			}
+			/* else fall through to conflict */
+		}
+		return (t_inp);
+	}
+	return (NULL);
+}
+
+
+int
+sctp_swap_inpcb_for_listen(struct sctp_inpcb *inp)
+{
+	/* For 1-2-1 with port reuse */
+	struct sctppcbhead *head;
+	struct sctp_inpcb *tinp;
+
+	if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE)) {
+		/* only works with port reuse on */
+		return (-1);
+	}
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) == 0) {
+		return (0);
+	}
+	SCTP_INP_RUNLOCK(inp);
+	head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(inp->sctp_lport,
+	    SCTP_BASE_INFO(hashmark))];
+	/* Kick out all non-listeners to the TCP hash */
+	LIST_FOREACH(tinp, head, sctp_hash) {
+		if (tinp->sctp_lport != inp->sctp_lport) {
+			continue;
+		}
+		if (tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+			continue;
+		}
+		if (tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+			continue;
+		}
+		if (tinp->sctp_socket->so_qlimit) {
+			continue;
+		}
+		SCTP_INP_WLOCK(tinp);
+		LIST_REMOVE(tinp, sctp_hash);
+		head = &SCTP_BASE_INFO(sctp_tcpephash)[SCTP_PCBHASH_ALLADDR(tinp->sctp_lport, SCTP_BASE_INFO(hashtcpmark))];
+		tinp->sctp_flags |= SCTP_PCB_FLAGS_IN_TCPPOOL;
+		LIST_INSERT_HEAD(head, tinp, sctp_hash);
+		SCTP_INP_WUNLOCK(tinp);
+	}
+	SCTP_INP_WLOCK(inp);
+	/* Pull from where he was */
+	LIST_REMOVE(inp, sctp_hash);
+	inp->sctp_flags &= ~SCTP_PCB_FLAGS_IN_TCPPOOL;
+	head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(inp->sctp_lport, SCTP_BASE_INFO(hashmark))];
+	LIST_INSERT_HEAD(head, inp, sctp_hash);
+	SCTP_INP_WUNLOCK(inp);
+	SCTP_INP_RLOCK(inp);
+	return (0);
+}
+
+
+struct sctp_inpcb *
+sctp_pcb_findep(struct sockaddr *nam, int find_tcp_pool, int have_lock,
+    uint32_t vrf_id)
+{
+	/*
+	 * First we check the hash table to see if someone has this port
+	 * bound with just the port.
+	 */
+	struct sctp_inpcb *inp;
+	struct sctppcbhead *head;
+	struct sockaddr_in *sin;
+	struct sockaddr_in6 *sin6;
+	int lport;
+	unsigned int i;
+
+	if (nam->sa_family == AF_INET) {
+		sin = (struct sockaddr_in *)nam;
+		lport = ((struct sockaddr_in *)nam)->sin_port;
+	} else if (nam->sa_family == AF_INET6) {
+		sin6 = (struct sockaddr_in6 *)nam;
+		lport = ((struct sockaddr_in6 *)nam)->sin6_port;
+	} else {
+		/* unsupported family */
+		return (NULL);
+	}
+	/*
+	 * I could cheat here and just cast to one of the types but we will
+	 * do it right. It also provides the check against an Unsupported
+	 * type too.
+	 */
+	/* Find the head of the ALLADDR chain */
+	if (have_lock == 0) {
+		SCTP_INP_INFO_RLOCK();
+	}
+	head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(lport,
+	    SCTP_BASE_INFO(hashmark))];
+	inp = sctp_endpoint_probe(nam, head, lport, vrf_id);
+
+	/*
+	 * If the TCP model exists it could be that the main listening
+	 * endpoint is gone but there still exists a connected socket for
+	 * this guy. If so we can return the first one that we find. This
+	 * may NOT be the correct one so the caller should be wary on the
+	 * returned INP. Currently the only caller that sets find_tcp_pool
+	 * is in bindx where we are verifying that a user CAN bind the
+	 * address. He either has bound it already, or someone else has, or
+	 * its open to bind, so this is good enough.
+	 */
+	if (inp == NULL && find_tcp_pool) {
+		for (i = 0; i < SCTP_BASE_INFO(hashtcpmark) + 1; i++) {
+			head = &SCTP_BASE_INFO(sctp_tcpephash)[i];
+			inp = sctp_endpoint_probe(nam, head, lport, vrf_id);
+			if (inp) {
+				break;
+			}
+		}
+	}
+	if (inp) {
+		SCTP_INP_INCR_REF(inp);
+	}
+	if (have_lock == 0) {
+		SCTP_INP_INFO_RUNLOCK();
+	}
+	return (inp);
+}
+
+/*
+ * Find an association for an endpoint with the pointer to whom you want to
+ * send to and the endpoint pointer. The address can be IPv4 or IPv6. We may
+ * need to change the *to to some other struct like a mbuf...
+ */
+struct sctp_tcb *
+sctp_findassociation_addr_sa(struct sockaddr *to, struct sockaddr *from,
+    struct sctp_inpcb **inp_p, struct sctp_nets **netp, int find_tcp_pool,
+    uint32_t vrf_id)
+{
+	struct sctp_inpcb *inp = NULL;
+	struct sctp_tcb *retval;
+
+	SCTP_INP_INFO_RLOCK();
+	if (find_tcp_pool) {
+		if (inp_p != NULL) {
+			retval = sctp_tcb_special_locate(inp_p, from, to, netp,
+			    vrf_id);
+		} else {
+			retval = sctp_tcb_special_locate(&inp, from, to, netp,
+			    vrf_id);
+		}
+		if (retval != NULL) {
+			SCTP_INP_INFO_RUNLOCK();
+			return (retval);
+		}
+	}
+	inp = sctp_pcb_findep(to, 0, 1, vrf_id);
+	if (inp_p != NULL) {
+		*inp_p = inp;
+	}
+	SCTP_INP_INFO_RUNLOCK();
+
+	if (inp == NULL) {
+		return (NULL);
+	}
+	/*
+	 * ok, we have an endpoint, now lets find the assoc for it (if any)
+	 * we now place the source address or from in the to of the find
+	 * endpoint call. Since in reality this chain is used from the
+	 * inbound packet side.
+	 */
+	if (inp_p != NULL) {
+		retval = sctp_findassociation_ep_addr(inp_p, from, netp, to,
+		    NULL);
+	} else {
+		retval = sctp_findassociation_ep_addr(&inp, from, netp, to,
+		    NULL);
+	}
+	return retval;
+}
+
+
+/*
+ * This routine will grub through the mbuf that is a INIT or INIT-ACK and
+ * find all addresses that the sender has specified in any address list. Each
+ * address will be used to lookup the TCB and see if one exits.
+ */
+static struct sctp_tcb *
+sctp_findassociation_special_addr(struct mbuf *m, int iphlen, int offset,
+    struct sctphdr *sh, struct sctp_inpcb **inp_p, struct sctp_nets **netp,
+    struct sockaddr *dest)
+{
+	struct sockaddr_in sin4;
+	struct sockaddr_in6 sin6;
+	struct sctp_paramhdr *phdr, parm_buf;
+	struct sctp_tcb *retval;
+	uint32_t ptype, plen;
+
+	memset(&sin4, 0, sizeof(sin4));
+	memset(&sin6, 0, sizeof(sin6));
+	sin4.sin_len = sizeof(sin4);
+	sin4.sin_family = AF_INET;
+	sin4.sin_port = sh->src_port;
+	sin6.sin6_len = sizeof(sin6);
+	sin6.sin6_family = AF_INET6;
+	sin6.sin6_port = sh->src_port;
+
+	retval = NULL;
+	offset += sizeof(struct sctp_init_chunk);
+
+	phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf));
+	while (phdr != NULL) {
+		/* now we must see if we want the parameter */
+		ptype = ntohs(phdr->param_type);
+		plen = ntohs(phdr->param_length);
+		if (plen == 0) {
+			break;
+		}
+		if (ptype == SCTP_IPV4_ADDRESS &&
+		    plen == sizeof(struct sctp_ipv4addr_param)) {
+			/* Get the rest of the address */
+			struct sctp_ipv4addr_param ip4_parm, *p4;
+
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)&ip4_parm, min(plen, sizeof(ip4_parm)));
+			if (phdr == NULL) {
+				return (NULL);
+			}
+			p4 = (struct sctp_ipv4addr_param *)phdr;
+			memcpy(&sin4.sin_addr, &p4->addr, sizeof(p4->addr));
+			/* look it up */
+			retval = sctp_findassociation_ep_addr(inp_p,
+			    (struct sockaddr *)&sin4, netp, dest, NULL);
+			if (retval != NULL) {
+				return (retval);
+			}
+		} else if (ptype == SCTP_IPV6_ADDRESS &&
+		    plen == sizeof(struct sctp_ipv6addr_param)) {
+			/* Get the rest of the address */
+			struct sctp_ipv6addr_param ip6_parm, *p6;
+
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)&ip6_parm, min(plen, sizeof(ip6_parm)));
+			if (phdr == NULL) {
+				return (NULL);
+			}
+			p6 = (struct sctp_ipv6addr_param *)phdr;
+			memcpy(&sin6.sin6_addr, &p6->addr, sizeof(p6->addr));
+			/* look it up */
+			retval = sctp_findassociation_ep_addr(inp_p,
+			    (struct sockaddr *)&sin6, netp, dest, NULL);
+			if (retval != NULL) {
+				return (retval);
+			}
+		}
+		offset += SCTP_SIZE32(plen);
+		phdr = sctp_get_next_param(m, offset, &parm_buf,
+		    sizeof(parm_buf));
+	}
+	return (NULL);
+}
+
+static struct sctp_tcb *
+sctp_findassoc_by_vtag(struct sockaddr *from, struct sockaddr *to, uint32_t vtag,
+    struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint16_t rport,
+    uint16_t lport, int skip_src_check, uint32_t vrf_id, uint32_t remote_tag)
+{
+	/*
+	 * Use my vtag to hash. If we find it we then verify the source addr
+	 * is in the assoc. If all goes well we save a bit on rec of a
+	 * packet.
+	 */
+	struct sctpasochead *head;
+	struct sctp_nets *net;
+	struct sctp_tcb *stcb;
+
+	*netp = NULL;
+	*inp_p = NULL;
+	SCTP_INP_INFO_RLOCK();
+	head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(vtag,
+	    SCTP_BASE_INFO(hashasocmark))];
+	if (head == NULL) {
+		/* invalid vtag */
+		SCTP_INP_INFO_RUNLOCK();
+		return (NULL);
+	}
+	LIST_FOREACH(stcb, head, sctp_asocs) {
+		SCTP_INP_RLOCK(stcb->sctp_ep);
+		if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+			SCTP_INP_RUNLOCK(stcb->sctp_ep);
+			continue;
+		}
+		SCTP_TCB_LOCK(stcb);
+		SCTP_INP_RUNLOCK(stcb->sctp_ep);
+		if (stcb->asoc.my_vtag == vtag) {
+			/* candidate */
+			if (stcb->rport != rport) {
+				SCTP_TCB_UNLOCK(stcb);
+				continue;
+			}
+			if (stcb->sctp_ep->sctp_lport != lport) {
+				SCTP_TCB_UNLOCK(stcb);
+				continue;
+			}
+			if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+				SCTP_TCB_UNLOCK(stcb);
+				continue;
+			}
+			/* RRS:Need toaddr check here */
+			if (sctp_does_stcb_own_this_addr(stcb, to) == 0) {
+				/* Endpoint does not own this address */
+				SCTP_TCB_UNLOCK(stcb);
+				continue;
+			}
+			if (remote_tag) {
+				/*
+				 * If we have both vtags that's all we match
+				 * on
+				 */
+				if (stcb->asoc.peer_vtag == remote_tag) {
+					/*
+					 * If both tags match we consider it
+					 * conclusive and check NO
+					 * source/destination addresses
+					 */
+					goto conclusive;
+				}
+			}
+			if (skip_src_check) {
+		conclusive:
+				if (from) {
+					net = sctp_findnet(stcb, from);
+				} else {
+					*netp = NULL;	/* unknown */
+				}
+				if (inp_p)
+					*inp_p = stcb->sctp_ep;
+				SCTP_INP_INFO_RUNLOCK();
+				return (stcb);
+			}
+			net = sctp_findnet(stcb, from);
+			if (net) {
+				/* yep its him. */
+				*netp = net;
+				SCTP_STAT_INCR(sctps_vtagexpress);
+				*inp_p = stcb->sctp_ep;
+				SCTP_INP_INFO_RUNLOCK();
+				return (stcb);
+			} else {
+				/*
+				 * not him, this should only happen in rare
+				 * cases so I peg it.
+				 */
+				SCTP_STAT_INCR(sctps_vtagbogus);
+			}
+		}
+		SCTP_TCB_UNLOCK(stcb);
+	}
+	SCTP_INP_INFO_RUNLOCK();
+	return (NULL);
+}
+
+/*
+ * Find an association with the pointer to the inbound IP packet. This can be
+ * a IPv4 or IPv6 packet.
+ */
+struct sctp_tcb *
+sctp_findassociation_addr(struct mbuf *m, int iphlen, int offset,
+    struct sctphdr *sh, struct sctp_chunkhdr *ch,
+    struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id)
+{
+	int find_tcp_pool;
+	struct ip *iph;
+	struct sctp_tcb *retval;
+	struct sockaddr_storage to_store, from_store;
+	struct sockaddr *to = (struct sockaddr *)&to_store;
+	struct sockaddr *from = (struct sockaddr *)&from_store;
+	struct sctp_inpcb *inp;
+
+	iph = mtod(m, struct ip *);
+	switch (iph->ip_v) {
+	case IPVERSION:
+		{
+			/* its IPv4 */
+			struct sockaddr_in *from4;
+
+			from4 = (struct sockaddr_in *)&from_store;
+			bzero(from4, sizeof(*from4));
+			from4->sin_family = AF_INET;
+			from4->sin_len = sizeof(struct sockaddr_in);
+			from4->sin_addr.s_addr = iph->ip_src.s_addr;
+			from4->sin_port = sh->src_port;
+			break;
+		}
+#ifdef INET6
+	case IPV6_VERSION >> 4:
+		{
+			/* its IPv6 */
+			struct ip6_hdr *ip6;
+			struct sockaddr_in6 *from6;
+
+			ip6 = mtod(m, struct ip6_hdr *);
+			from6 = (struct sockaddr_in6 *)&from_store;
+			bzero(from6, sizeof(*from6));
+			from6->sin6_family = AF_INET6;
+			from6->sin6_len = sizeof(struct sockaddr_in6);
+			from6->sin6_addr = ip6->ip6_src;
+			from6->sin6_port = sh->src_port;
+			/* Get the scopes in properly to the sin6 addr's */
+			/* we probably don't need these operations */
+			(void)sa6_recoverscope(from6);
+			sa6_embedscope(from6, MODULE_GLOBAL(ip6_use_defzone));
+			break;
+		}
+#endif
+	default:
+		/* Currently not supported. */
+		return (NULL);
+	}
+
+
+	switch (iph->ip_v) {
+	case IPVERSION:
+		{
+			/* its IPv4 */
+			struct sockaddr_in *to4;
+
+			to4 = (struct sockaddr_in *)&to_store;
+			bzero(to4, sizeof(*to4));
+			to4->sin_family = AF_INET;
+			to4->sin_len = sizeof(struct sockaddr_in);
+			to4->sin_addr.s_addr = iph->ip_dst.s_addr;
+			to4->sin_port = sh->dest_port;
+			break;
+		}
+#ifdef INET6
+	case IPV6_VERSION >> 4:
+		{
+			/* its IPv6 */
+			struct ip6_hdr *ip6;
+			struct sockaddr_in6 *to6;
+
+			ip6 = mtod(m, struct ip6_hdr *);
+			to6 = (struct sockaddr_in6 *)&to_store;
+			bzero(to6, sizeof(*to6));
+			to6->sin6_family = AF_INET6;
+			to6->sin6_len = sizeof(struct sockaddr_in6);
+			to6->sin6_addr = ip6->ip6_dst;
+			to6->sin6_port = sh->dest_port;
+			/* Get the scopes in properly to the sin6 addr's */
+			/* we probably don't need these operations */
+			(void)sa6_recoverscope(to6);
+			sa6_embedscope(to6, MODULE_GLOBAL(ip6_use_defzone));
+			break;
+		}
+#endif
+	default:
+		/* TSNH */
+		break;
+	}
+	if (sh->v_tag) {
+		/* we only go down this path if vtag is non-zero */
+		retval = sctp_findassoc_by_vtag(from, to, ntohl(sh->v_tag),
+		    inp_p, netp, sh->src_port, sh->dest_port, 0, vrf_id, 0);
+		if (retval) {
+			return (retval);
+		}
+	}
+	find_tcp_pool = 0;
+	if ((ch->chunk_type != SCTP_INITIATION) &&
+	    (ch->chunk_type != SCTP_INITIATION_ACK) &&
+	    (ch->chunk_type != SCTP_COOKIE_ACK) &&
+	    (ch->chunk_type != SCTP_COOKIE_ECHO)) {
+		/* Other chunk types go to the tcp pool. */
+		find_tcp_pool = 1;
+	}
+	if (inp_p) {
+		retval = sctp_findassociation_addr_sa(to, from, inp_p, netp,
+		    find_tcp_pool, vrf_id);
+		inp = *inp_p;
+	} else {
+		retval = sctp_findassociation_addr_sa(to, from, &inp, netp,
+		    find_tcp_pool, vrf_id);
+	}
+	SCTPDBG(SCTP_DEBUG_PCB1, "retval:%p inp:%p\n", retval, inp);
+	if (retval == NULL && inp) {
+		/* Found a EP but not this address */
+		if ((ch->chunk_type == SCTP_INITIATION) ||
+		    (ch->chunk_type == SCTP_INITIATION_ACK)) {
+			/*-
+			 * special hook, we do NOT return linp or an
+			 * association that is linked to an existing
+			 * association that is under the TCP pool (i.e. no
+			 * listener exists). The endpoint finding routine
+			 * will always find a listener before examining the
+			 * TCP pool.
+			 */
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) {
+				if (inp_p) {
+					*inp_p = NULL;
+				}
+				return (NULL);
+			}
+			retval = sctp_findassociation_special_addr(m, iphlen,
+			    offset, sh, &inp, netp, to);
+			if (inp_p != NULL) {
+				*inp_p = inp;
+			}
+		}
+	}
+	SCTPDBG(SCTP_DEBUG_PCB1, "retval is %p\n", retval);
+	return (retval);
+}
+
+/*
+ * lookup an association by an ASCONF lookup address.
+ * if the lookup address is 0.0.0.0 or ::0, use the vtag to do the lookup
+ */
+struct sctp_tcb *
+sctp_findassociation_ep_asconf(struct mbuf *m, int iphlen, int offset,
+    struct sctphdr *sh, struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id)
+{
+	struct sctp_tcb *stcb;
+	struct sockaddr_in *sin;
+
+#ifdef INET6
+	struct sockaddr_in6 *sin6;
+
+#endif
+	struct sockaddr_storage local_store, remote_store;
+	struct sockaddr *to;
+	struct ip *iph;
+
+#ifdef INET6
+	struct ip6_hdr *ip6;
+
+#endif
+	struct sctp_paramhdr parm_buf, *phdr;
+	int ptype;
+	int zero_address = 0;
+
+
+	memset(&local_store, 0, sizeof(local_store));
+	memset(&remote_store, 0, sizeof(remote_store));
+	to = (struct sockaddr *)&local_store;
+	/* First get the destination address setup too. */
+	iph = mtod(m, struct ip *);
+	switch (iph->ip_v) {
+	case IPVERSION:
+		/* its IPv4 */
+		sin = (struct sockaddr_in *)&local_store;
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(*sin);
+		sin->sin_port = sh->dest_port;
+		sin->sin_addr.s_addr = iph->ip_dst.s_addr;
+		break;
+#ifdef INET6
+	case IPV6_VERSION >> 4:
+		/* its IPv6 */
+		ip6 = mtod(m, struct ip6_hdr *);
+		sin6 = (struct sockaddr_in6 *)&local_store;
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_len = sizeof(*sin6);
+		sin6->sin6_port = sh->dest_port;
+		sin6->sin6_addr = ip6->ip6_dst;
+		break;
+#endif
+	default:
+		return NULL;
+	}
+
+	phdr = sctp_get_next_param(m, offset + sizeof(struct sctp_asconf_chunk),
+	    &parm_buf, sizeof(struct sctp_paramhdr));
+	if (phdr == NULL) {
+		SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf lookup addr\n",
+		    __FUNCTION__);
+		return NULL;
+	}
+	ptype = (int)((uint32_t) ntohs(phdr->param_type));
+	/* get the correlation address */
+	switch (ptype) {
+#ifdef INET6
+	case SCTP_IPV6_ADDRESS:
+		{
+			/* ipv6 address param */
+			struct sctp_ipv6addr_param *p6, p6_buf;
+
+			if (ntohs(phdr->param_length) != sizeof(struct sctp_ipv6addr_param)) {
+				return NULL;
+			}
+			p6 = (struct sctp_ipv6addr_param *)sctp_get_next_param(m,
+			    offset + sizeof(struct sctp_asconf_chunk),
+			    &p6_buf.ph, sizeof(*p6));
+			if (p6 == NULL) {
+				SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf v6 lookup addr\n",
+				    __FUNCTION__);
+				return (NULL);
+			}
+			sin6 = (struct sockaddr_in6 *)&remote_store;
+			sin6->sin6_family = AF_INET6;
+			sin6->sin6_len = sizeof(*sin6);
+			sin6->sin6_port = sh->src_port;
+			memcpy(&sin6->sin6_addr, &p6->addr, sizeof(struct in6_addr));
+			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+				zero_address = 1;
+			break;
+		}
+#endif
+	case SCTP_IPV4_ADDRESS:
+		{
+			/* ipv4 address param */
+			struct sctp_ipv4addr_param *p4, p4_buf;
+
+			if (ntohs(phdr->param_length) != sizeof(struct sctp_ipv4addr_param)) {
+				return NULL;
+			}
+			p4 = (struct sctp_ipv4addr_param *)sctp_get_next_param(m,
+			    offset + sizeof(struct sctp_asconf_chunk),
+			    &p4_buf.ph, sizeof(*p4));
+			if (p4 == NULL) {
+				SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf v4 lookup addr\n",
+				    __FUNCTION__);
+				return (NULL);
+			}
+			sin = (struct sockaddr_in *)&remote_store;
+			sin->sin_family = AF_INET;
+			sin->sin_len = sizeof(*sin);
+			sin->sin_port = sh->src_port;
+			memcpy(&sin->sin_addr, &p4->addr, sizeof(struct in_addr));
+			if (sin->sin_addr.s_addr == INADDR_ANY)
+				zero_address = 1;
+			break;
+		}
+	default:
+		/* invalid address param type */
+		return NULL;
+	}
+
+	if (zero_address) {
+		stcb = sctp_findassoc_by_vtag(NULL, to, ntohl(sh->v_tag), inp_p,
+		    netp, sh->src_port, sh->dest_port, 1, vrf_id, 0);
+		/*
+		 * printf("findassociation_ep_asconf: zero lookup address
+		 * finds stcb 0x%x\n", (uint32_t)stcb);
+		 */
+	} else {
+		stcb = sctp_findassociation_ep_addr(inp_p,
+		    (struct sockaddr *)&remote_store, netp,
+		    to, NULL);
+	}
+	return (stcb);
+}
+
+
+/*
+ * allocate a sctp_inpcb and setup a temporary binding to a port/all
+ * addresses. This way if we don't get a bind we by default pick a ephemeral
+ * port with all addresses bound.
+ */
+int
+sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id)
+{
+	/*
+	 * we get called when a new endpoint starts up. We need to allocate
+	 * the sctp_inpcb structure from the zone and init it. Mark it as
+	 * unbound and find a port that we can use as an ephemeral with
+	 * INADDR_ANY. If the user binds later no problem we can then add in
+	 * the specific addresses. And setup the default parameters for the
+	 * EP.
+	 */
+	int i, error;
+	struct sctp_inpcb *inp;
+	struct sctp_pcb *m;
+	struct timeval time;
+	sctp_sharedkey_t *null_key;
+
+	error = 0;
+
+	SCTP_INP_INFO_WLOCK();
+	inp = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_ep), struct sctp_inpcb);
+	if (inp == NULL) {
+		SCTP_PRINTF("Out of SCTP-INPCB structures - no resources\n");
+		SCTP_INP_INFO_WUNLOCK();
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS);
+		return (ENOBUFS);
+	}
+	/* zap it */
+	bzero(inp, sizeof(*inp));
+
+	/* bump generations */
+	/* setup socket pointers */
+	inp->sctp_socket = so;
+	inp->ip_inp.inp.inp_socket = so;
+	inp->sctp_associd_counter = 1;
+	inp->partial_delivery_point = SCTP_SB_LIMIT_RCV(so) >> SCTP_PARTIAL_DELIVERY_SHIFT;
+	inp->sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT;
+	inp->sctp_cmt_on_off = SCTP_BASE_SYSCTL(sctp_cmt_on_off);
+	/* init the small hash table we use to track asocid <-> tcb */
+	inp->sctp_asocidhash = SCTP_HASH_INIT(SCTP_STACK_VTAG_HASH_SIZE, &inp->hashasocidmark);
+	if (inp->sctp_asocidhash == NULL) {
+		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp);
+		SCTP_INP_INFO_WUNLOCK();
+		return (ENOBUFS);
+	}
+#ifdef IPSEC
+	{
+		struct inpcbpolicy *pcb_sp = NULL;
+
+		error = ipsec_init_policy(so, &pcb_sp);
+		/* Arrange to share the policy */
+		inp->ip_inp.inp.inp_sp = pcb_sp;
+		((struct in6pcb *)(&inp->ip_inp.inp))->in6p_sp = pcb_sp;
+	}
+	if (error != 0) {
+		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp);
+		SCTP_INP_INFO_WUNLOCK();
+		return error;
+	}
+#endif				/* IPSEC */
+	SCTP_INCR_EP_COUNT();
+	inp->ip_inp.inp.inp_ip_ttl = MODULE_GLOBAL(ip_defttl);
+	SCTP_INP_INFO_WUNLOCK();
+
+	so->so_pcb = (caddr_t)inp;
+
+	if ((SCTP_SO_TYPE(so) == SOCK_DGRAM) ||
+	    (SCTP_SO_TYPE(so) == SOCK_SEQPACKET)) {
+		/* UDP style socket */
+		inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE |
+		    SCTP_PCB_FLAGS_UNBOUND);
+		/* Be sure it is NON-BLOCKING IO for UDP */
+		/* SCTP_SET_SO_NBIO(so); */
+	} else if (SCTP_SO_TYPE(so) == SOCK_STREAM) {
+		/* TCP style socket */
+		inp->sctp_flags = (SCTP_PCB_FLAGS_TCPTYPE |
+		    SCTP_PCB_FLAGS_UNBOUND);
+		/* Be sure we have blocking IO by default */
+		SCTP_CLEAR_SO_NBIO(so);
+	} else {
+		/*
+		 * unsupported socket type (RAW, etc)- in case we missed it
+		 * in protosw
+		 */
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EOPNOTSUPP);
+		so->so_pcb = NULL;
+		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp);
+		return (EOPNOTSUPP);
+	}
+	if (SCTP_BASE_SYSCTL(sctp_default_frag_interleave) == SCTP_FRAG_LEVEL_1) {
+		sctp_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
+		sctp_feature_off(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
+	} else if (SCTP_BASE_SYSCTL(sctp_default_frag_interleave) == SCTP_FRAG_LEVEL_2) {
+		sctp_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
+		sctp_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
+	} else if (SCTP_BASE_SYSCTL(sctp_default_frag_interleave) == SCTP_FRAG_LEVEL_0) {
+		sctp_feature_off(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
+		sctp_feature_off(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
+	}
+	inp->sctp_tcbhash = SCTP_HASH_INIT(SCTP_BASE_SYSCTL(sctp_pcbtblsize),
+	    &inp->sctp_hashmark);
+	if (inp->sctp_tcbhash == NULL) {
+		SCTP_PRINTF("Out of SCTP-INPCB->hashinit - no resources\n");
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS);
+		so->so_pcb = NULL;
+		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp);
+		return (ENOBUFS);
+	}
+	inp->def_vrf_id = vrf_id;
+
+	SCTP_INP_INFO_WLOCK();
+	SCTP_INP_LOCK_INIT(inp);
+	INP_LOCK_INIT(&inp->ip_inp.inp, "inp", "sctpinp");
+	SCTP_INP_READ_INIT(inp);
+	SCTP_ASOC_CREATE_LOCK_INIT(inp);
+	/* lock the new ep */
+	SCTP_INP_WLOCK(inp);
+
+	/* add it to the info area */
+	LIST_INSERT_HEAD(&SCTP_BASE_INFO(listhead), inp, sctp_list);
+	SCTP_INP_INFO_WUNLOCK();
+
+	TAILQ_INIT(&inp->read_queue);
+	LIST_INIT(&inp->sctp_addr_list);
+
+	LIST_INIT(&inp->sctp_asoc_list);
+
+#ifdef SCTP_TRACK_FREED_ASOCS
+	/* TEMP CODE */
+	LIST_INIT(&inp->sctp_asoc_free_list);
+#endif
+	/* Init the timer structure for signature change */
+	SCTP_OS_TIMER_INIT(&inp->sctp_ep.signature_change.timer);
+	inp->sctp_ep.signature_change.type = SCTP_TIMER_TYPE_NEWCOOKIE;
+
+	/* now init the actual endpoint default data */
+	m = &inp->sctp_ep;
+
+	/* setup the base timeout information */
+	m->sctp_timeoutticks[SCTP_TIMER_SEND] = SEC_TO_TICKS(SCTP_SEND_SEC);	/* needed ? */
+	m->sctp_timeoutticks[SCTP_TIMER_INIT] = SEC_TO_TICKS(SCTP_INIT_SEC);	/* needed ? */
+	m->sctp_timeoutticks[SCTP_TIMER_RECV] = MSEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_delayed_sack_time_default));
+	m->sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_heartbeat_interval_default));
+	m->sctp_timeoutticks[SCTP_TIMER_PMTU] = SEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_pmtu_raise_time_default));
+	m->sctp_timeoutticks[SCTP_TIMER_MAXSHUTDOWN] = SEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_shutdown_guard_time_default));
+	m->sctp_timeoutticks[SCTP_TIMER_SIGNATURE] = SEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_secret_lifetime_default));
+	/* all max/min max are in ms */
+	m->sctp_maxrto = SCTP_BASE_SYSCTL(sctp_rto_max_default);
+	m->sctp_minrto = SCTP_BASE_SYSCTL(sctp_rto_min_default);
+	m->initial_rto = SCTP_BASE_SYSCTL(sctp_rto_initial_default);
+	m->initial_init_rto_max = SCTP_BASE_SYSCTL(sctp_init_rto_max_default);
+	m->sctp_sack_freq = SCTP_BASE_SYSCTL(sctp_sack_freq_default);
+
+	m->max_open_streams_intome = MAX_SCTP_STREAMS;
+
+	m->max_init_times = SCTP_BASE_SYSCTL(sctp_init_rtx_max_default);
+	m->max_send_times = SCTP_BASE_SYSCTL(sctp_assoc_rtx_max_default);
+	m->def_net_failure = SCTP_BASE_SYSCTL(sctp_path_rtx_max_default);
+	m->sctp_sws_sender = SCTP_SWS_SENDER_DEF;
+	m->sctp_sws_receiver = SCTP_SWS_RECEIVER_DEF;
+	m->max_burst = SCTP_BASE_SYSCTL(sctp_max_burst_default);
+	if ((SCTP_BASE_SYSCTL(sctp_default_cc_module) >= SCTP_CC_RFC2581) &&
+	    (SCTP_BASE_SYSCTL(sctp_default_cc_module) <= SCTP_CC_HTCP)) {
+		m->sctp_default_cc_module = SCTP_BASE_SYSCTL(sctp_default_cc_module);
+	} else {
+		/* sysctl done with invalid value, set to 2581 */
+		m->sctp_default_cc_module = SCTP_CC_RFC2581;
+	}
+	/* number of streams to pre-open on a association */
+	m->pre_open_stream_count = SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default);
+
+	/* Add adaptation cookie */
+	m->adaptation_layer_indicator = 0x504C5253;
+
+	/* seed random number generator */
+	m->random_counter = 1;
+	m->store_at = SCTP_SIGNATURE_SIZE;
+	SCTP_READ_RANDOM(m->random_numbers, sizeof(m->random_numbers));
+	sctp_fill_random_store(m);
+
+	/* Minimum cookie size */
+	m->size_of_a_cookie = (sizeof(struct sctp_init_msg) * 2) +
+	    sizeof(struct sctp_state_cookie);
+	m->size_of_a_cookie += SCTP_SIGNATURE_SIZE;
+
+	/* Setup the initial secret */
+	(void)SCTP_GETTIME_TIMEVAL(&time);
+	m->time_of_secret_change = time.tv_sec;
+
+	for (i = 0; i < SCTP_NUMBER_OF_SECRETS; i++) {
+		m->secret_key[0][i] = sctp_select_initial_TSN(m);
+	}
+	sctp_timer_start(SCTP_TIMER_TYPE_NEWCOOKIE, inp, NULL, NULL);
+
+	/* How long is a cookie good for ? */
+	m->def_cookie_life = MSEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_valid_cookie_life_default));
+	/*
+	 * Initialize authentication parameters
+	 */
+	m->local_hmacs = sctp_default_supported_hmaclist();
+	m->local_auth_chunks = sctp_alloc_chunklist();
+	sctp_auth_set_default_chunks(m->local_auth_chunks);
+	LIST_INIT(&m->shared_keys);
+	/* add default NULL key as key id 0 */
+	null_key = sctp_alloc_sharedkey();
+	sctp_insert_sharedkey(&m->shared_keys, null_key);
+	SCTP_INP_WUNLOCK(inp);
+#ifdef SCTP_LOG_CLOSING
+	sctp_log_closing(inp, NULL, 12);
+#endif
+	return (error);
+}
+
+
+void
+sctp_move_pcb_and_assoc(struct sctp_inpcb *old_inp, struct sctp_inpcb *new_inp,
+    struct sctp_tcb *stcb)
+{
+	struct sctp_nets *net;
+	uint16_t lport, rport;
+	struct sctppcbhead *head;
+	struct sctp_laddr *laddr, *oladdr;
+
+	atomic_add_int(&stcb->asoc.refcnt, 1);
+	SCTP_TCB_UNLOCK(stcb);
+	SCTP_INP_INFO_WLOCK();
+	SCTP_INP_WLOCK(old_inp);
+	SCTP_INP_WLOCK(new_inp);
+	SCTP_TCB_LOCK(stcb);
+	atomic_subtract_int(&stcb->asoc.refcnt, 1);
+
+	new_inp->sctp_ep.time_of_secret_change =
+	    old_inp->sctp_ep.time_of_secret_change;
+	memcpy(new_inp->sctp_ep.secret_key, old_inp->sctp_ep.secret_key,
+	    sizeof(old_inp->sctp_ep.secret_key));
+	new_inp->sctp_ep.current_secret_number =
+	    old_inp->sctp_ep.current_secret_number;
+	new_inp->sctp_ep.last_secret_number =
+	    old_inp->sctp_ep.last_secret_number;
+	new_inp->sctp_ep.size_of_a_cookie = old_inp->sctp_ep.size_of_a_cookie;
+
+	/* make it so new data pours into the new socket */
+	stcb->sctp_socket = new_inp->sctp_socket;
+	stcb->sctp_ep = new_inp;
+
+	/* Copy the port across */
+	lport = new_inp->sctp_lport = old_inp->sctp_lport;
+	rport = stcb->rport;
+	/* Pull the tcb from the old association */
+	LIST_REMOVE(stcb, sctp_tcbhash);
+	LIST_REMOVE(stcb, sctp_tcblist);
+	if (stcb->asoc.in_asocid_hash) {
+		LIST_REMOVE(stcb, sctp_tcbasocidhash);
+	}
+	/* Now insert the new_inp into the TCP connected hash */
+	head = &SCTP_BASE_INFO(sctp_tcpephash)[SCTP_PCBHASH_ALLADDR((lport | rport), SCTP_BASE_INFO(hashtcpmark))];
+
+	LIST_INSERT_HEAD(head, new_inp, sctp_hash);
+	/* Its safe to access */
+	new_inp->sctp_flags &= ~SCTP_PCB_FLAGS_UNBOUND;
+
+	/* Now move the tcb into the endpoint list */
+	LIST_INSERT_HEAD(&new_inp->sctp_asoc_list, stcb, sctp_tcblist);
+	/*
+	 * Question, do we even need to worry about the ep-hash since we
+	 * only have one connection? Probably not :> so lets get rid of it
+	 * and not suck up any kernel memory in that.
+	 */
+	if (stcb->asoc.in_asocid_hash) {
+		struct sctpasochead *lhd;
+
+		lhd = &new_inp->sctp_asocidhash[SCTP_PCBHASH_ASOC(stcb->asoc.assoc_id,
+		    new_inp->hashasocidmark)];
+		LIST_INSERT_HEAD(lhd, stcb, sctp_tcbasocidhash);
+	}
+	/* Ok. Let's restart timer. */
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, new_inp,
+		    stcb, net);
+	}
+
+	SCTP_INP_INFO_WUNLOCK();
+	if (new_inp->sctp_tcbhash != NULL) {
+		SCTP_HASH_FREE(new_inp->sctp_tcbhash, new_inp->sctp_hashmark);
+		new_inp->sctp_tcbhash = NULL;
+	}
+	if ((new_inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) {
+		/* Subset bound, so copy in the laddr list from the old_inp */
+		LIST_FOREACH(oladdr, &old_inp->sctp_addr_list, sctp_nxt_addr) {
+			laddr = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr);
+			if (laddr == NULL) {
+				/*
+				 * Gak, what can we do? This assoc is really
+				 * HOSED. We probably should send an abort
+				 * here.
+				 */
+				SCTPDBG(SCTP_DEBUG_PCB1, "Association hosed in TCP model, out of laddr memory\n");
+				continue;
+			}
+			SCTP_INCR_LADDR_COUNT();
+			bzero(laddr, sizeof(*laddr));
+			(void)SCTP_GETTIME_TIMEVAL(&laddr->start_time);
+			laddr->ifa = oladdr->ifa;
+			atomic_add_int(&laddr->ifa->refcount, 1);
+			LIST_INSERT_HEAD(&new_inp->sctp_addr_list, laddr,
+			    sctp_nxt_addr);
+			new_inp->laddr_count++;
+		}
+	}
+	/*
+	 * Now any running timers need to be adjusted since we really don't
+	 * care if they are running or not just blast in the new_inp into
+	 * all of them.
+	 */
+
+	stcb->asoc.hb_timer.ep = (void *)new_inp;
+	stcb->asoc.dack_timer.ep = (void *)new_inp;
+	stcb->asoc.asconf_timer.ep = (void *)new_inp;
+	stcb->asoc.strreset_timer.ep = (void *)new_inp;
+	stcb->asoc.shut_guard_timer.ep = (void *)new_inp;
+	stcb->asoc.autoclose_timer.ep = (void *)new_inp;
+	stcb->asoc.delayed_event_timer.ep = (void *)new_inp;
+	stcb->asoc.delete_prim_timer.ep = (void *)new_inp;
+	/* now what about the nets? */
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		net->pmtu_timer.ep = (void *)new_inp;
+		net->rxt_timer.ep = (void *)new_inp;
+		net->fr_timer.ep = (void *)new_inp;
+	}
+	SCTP_INP_WUNLOCK(new_inp);
+	SCTP_INP_WUNLOCK(old_inp);
+}
+
+
+
+
+/* sctp_ifap is used to bypass normal local address validation checks */
+int
+sctp_inpcb_bind(struct socket *so, struct sockaddr *addr,
+    struct sctp_ifa *sctp_ifap, struct thread *p)
+{
+	/* bind a ep to a socket address */
+	struct sctppcbhead *head;
+	struct sctp_inpcb *inp, *inp_tmp;
+	struct inpcb *ip_inp;
+	int port_reuse_active = 0;
+	int bindall;
+	uint16_t lport;
+	int error;
+	uint32_t vrf_id;
+
+	lport = 0;
+	error = 0;
+	bindall = 1;
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	ip_inp = (struct inpcb *)so->so_pcb;
+#ifdef SCTP_DEBUG
+	if (addr) {
+		SCTPDBG(SCTP_DEBUG_PCB1, "Bind called port:%d\n",
+		    ntohs(((struct sockaddr_in *)addr)->sin_port));
+		SCTPDBG(SCTP_DEBUG_PCB1, "Addr :");
+		SCTPDBG_ADDR(SCTP_DEBUG_PCB1, addr);
+	}
+#endif
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) == 0) {
+		/* already did a bind, subsequent binds NOT allowed ! */
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+		return (EINVAL);
+	}
+#ifdef INVARIANTS
+	if (p == NULL)
+		panic("null proc/thread");
+#endif
+	if (addr != NULL) {
+		switch (addr->sa_family) {
+		case AF_INET:
+			{
+				struct sockaddr_in *sin;
+
+				/* IPV6_V6ONLY socket? */
+				if (SCTP_IPV6_V6ONLY(ip_inp)) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+					return (EINVAL);
+				}
+				if (addr->sa_len != sizeof(*sin)) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+					return (EINVAL);
+				}
+				sin = (struct sockaddr_in *)addr;
+				lport = sin->sin_port;
+				/*
+				 * For LOOPBACK the prison_local_ip4() call
+				 * will transmute the ip address to the
+				 * proper value.
+				 */
+				if (p && (error = prison_local_ip4(p->td_ucred, &sin->sin_addr)) != 0) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, error);
+					return (error);
+				}
+				if (sin->sin_addr.s_addr != INADDR_ANY) {
+					bindall = 0;
+				}
+				break;
+			}
+#ifdef INET6
+		case AF_INET6:
+			{
+				/*
+				 * Only for pure IPv6 Address. (No IPv4
+				 * Mapped!)
+				 */
+				struct sockaddr_in6 *sin6;
+
+				sin6 = (struct sockaddr_in6 *)addr;
+
+				if (addr->sa_len != sizeof(*sin6)) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+					return (EINVAL);
+				}
+				lport = sin6->sin6_port;
+
+				/*
+				 * For LOOPBACK the prison_local_ip6() call
+				 * will transmute the ipv6 address to the
+				 * proper value.
+				 */
+				if (p && (error = prison_local_ip6(p->td_ucred, &sin6->sin6_addr,
+				    (SCTP_IPV6_V6ONLY(inp) != 0))) != 0) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, error);
+					return (error);
+				}
+				if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+					bindall = 0;
+					/* KAME hack: embed scopeid */
+					if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) {
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+						return (EINVAL);
+					}
+				}
+				/* this must be cleared for ifa_ifwithaddr() */
+				sin6->sin6_scope_id = 0;
+				break;
+			}
+#endif
+		default:
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EAFNOSUPPORT);
+			return (EAFNOSUPPORT);
+		}
+	}
+	SCTP_INP_INFO_WLOCK();
+	SCTP_INP_WLOCK(inp);
+	/* Setup a vrf_id to be the default for the non-bind-all case. */
+	vrf_id = inp->def_vrf_id;
+
+	/* increase our count due to the unlock we do */
+	SCTP_INP_INCR_REF(inp);
+	if (lport) {
+		/*
+		 * Did the caller specify a port? if so we must see if a ep
+		 * already has this one bound.
+		 */
+		/* got to be root to get at low ports */
+		if (ntohs(lport) < IPPORT_RESERVED) {
+			if (p && (error =
+			    priv_check(p, PRIV_NETINET_RESERVEDPORT)
+			    )) {
+				SCTP_INP_DECR_REF(inp);
+				SCTP_INP_WUNLOCK(inp);
+				SCTP_INP_INFO_WUNLOCK();
+				return (error);
+			}
+		}
+		if (p == NULL) {
+			SCTP_INP_DECR_REF(inp);
+			SCTP_INP_WUNLOCK(inp);
+			SCTP_INP_INFO_WUNLOCK();
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, error);
+			return (error);
+		}
+		SCTP_INP_WUNLOCK(inp);
+		if (bindall) {
+			vrf_id = inp->def_vrf_id;
+			inp_tmp = sctp_pcb_findep(addr, 0, 1, vrf_id);
+			if (inp_tmp != NULL) {
+				/*
+				 * lock guy returned and lower count note
+				 * that we are not bound so inp_tmp should
+				 * NEVER be inp. And it is this inp
+				 * (inp_tmp) that gets the reference bump,
+				 * so we must lower it.
+				 */
+				SCTP_INP_DECR_REF(inp_tmp);
+				/* unlock info */
+				if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) &&
+				    (sctp_is_feature_on(inp_tmp, SCTP_PCB_FLAGS_PORTREUSE))) {
+					/*
+					 * Ok, must be one-2-one and
+					 * allowing port re-use
+					 */
+					port_reuse_active = 1;
+					goto continue_anyway;
+				}
+				SCTP_INP_DECR_REF(inp);
+				SCTP_INP_INFO_WUNLOCK();
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE);
+				return (EADDRINUSE);
+			}
+		} else {
+			inp_tmp = sctp_pcb_findep(addr, 0, 1, vrf_id);
+			if (inp_tmp != NULL) {
+				/*
+				 * lock guy returned and lower count note
+				 * that we are not bound so inp_tmp should
+				 * NEVER be inp. And it is this inp
+				 * (inp_tmp) that gets the reference bump,
+				 * so we must lower it.
+				 */
+				SCTP_INP_DECR_REF(inp_tmp);
+				/* unlock info */
+				if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) &&
+				    (sctp_is_feature_on(inp_tmp, SCTP_PCB_FLAGS_PORTREUSE))) {
+					/*
+					 * Ok, must be one-2-one and
+					 * allowing port re-use
+					 */
+					port_reuse_active = 1;
+					goto continue_anyway;
+				}
+				SCTP_INP_DECR_REF(inp);
+				SCTP_INP_INFO_WUNLOCK();
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE);
+				return (EADDRINUSE);
+			}
+		}
+continue_anyway:
+		SCTP_INP_WLOCK(inp);
+		if (bindall) {
+			/* verify that no lport is not used by a singleton */
+			if ((port_reuse_active == 0) &&
+			    (inp_tmp = sctp_isport_inuse(inp, lport, vrf_id))
+			    ) {
+				/* Sorry someone already has this one bound */
+				if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) &&
+				    (sctp_is_feature_on(inp_tmp, SCTP_PCB_FLAGS_PORTREUSE))) {
+					port_reuse_active = 1;
+				} else {
+					SCTP_INP_DECR_REF(inp);
+					SCTP_INP_WUNLOCK(inp);
+					SCTP_INP_INFO_WUNLOCK();
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE);
+					return (EADDRINUSE);
+				}
+			}
+		}
+	} else {
+		uint16_t first, last, candidate;
+		uint16_t count;
+		int done;
+
+		if (ip_inp->inp_flags & INP_HIGHPORT) {
+			first = MODULE_GLOBAL(ipport_hifirstauto);
+			last = MODULE_GLOBAL(ipport_hilastauto);
+		} else if (ip_inp->inp_flags & INP_LOWPORT) {
+			if (p && (error =
+			    priv_check(p, PRIV_NETINET_RESERVEDPORT)
+			    )) {
+				SCTP_INP_DECR_REF(inp);
+				SCTP_INP_WUNLOCK(inp);
+				SCTP_INP_INFO_WUNLOCK();
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, error);
+				return (error);
+			}
+			first = MODULE_GLOBAL(ipport_lowfirstauto);
+			last = MODULE_GLOBAL(ipport_lowlastauto);
+		} else {
+			first = MODULE_GLOBAL(ipport_firstauto);
+			last = MODULE_GLOBAL(ipport_lastauto);
+		}
+		if (first > last) {
+			uint16_t temp;
+
+			temp = first;
+			first = last;
+			last = temp;
+		}
+		count = last - first + 1;	/* number of candidates */
+		candidate = first + sctp_select_initial_TSN(&inp->sctp_ep) % (count);
+
+		done = 0;
+		while (!done) {
+			if (sctp_isport_inuse(inp, htons(candidate), inp->def_vrf_id) == NULL) {
+				done = 1;
+			}
+			if (!done) {
+				if (--count == 0) {
+					SCTP_INP_DECR_REF(inp);
+					SCTP_INP_WUNLOCK(inp);
+					SCTP_INP_INFO_WUNLOCK();
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE);
+					return (EADDRINUSE);
+				}
+				if (candidate == last)
+					candidate = first;
+				else
+					candidate = candidate + 1;
+			}
+		}
+		lport = htons(candidate);
+	}
+	SCTP_INP_DECR_REF(inp);
+	if (inp->sctp_flags & (SCTP_PCB_FLAGS_SOCKET_GONE |
+	    SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
+		/*
+		 * this really should not happen. The guy did a non-blocking
+		 * bind and then did a close at the same time.
+		 */
+		SCTP_INP_WUNLOCK(inp);
+		SCTP_INP_INFO_WUNLOCK();
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+		return (EINVAL);
+	}
+	/* ok we look clear to give out this port, so lets setup the binding */
+	if (bindall) {
+		/* binding to all addresses, so just set in the proper flags */
+		inp->sctp_flags |= SCTP_PCB_FLAGS_BOUNDALL;
+		/* set the automatic addr changes from kernel flag */
+		if (SCTP_BASE_SYSCTL(sctp_auto_asconf) == 0) {
+			sctp_feature_off(inp, SCTP_PCB_FLAGS_DO_ASCONF);
+			sctp_feature_off(inp, SCTP_PCB_FLAGS_AUTO_ASCONF);
+		} else {
+			sctp_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF);
+			sctp_feature_on(inp, SCTP_PCB_FLAGS_AUTO_ASCONF);
+		}
+		if (SCTP_BASE_SYSCTL(sctp_multiple_asconfs) == 0) {
+			sctp_feature_off(inp, SCTP_PCB_FLAGS_MULTIPLE_ASCONFS);
+		} else {
+			sctp_feature_on(inp, SCTP_PCB_FLAGS_MULTIPLE_ASCONFS);
+		}
+		/*
+		 * set the automatic mobility_base from kernel flag (by
+		 * micchie)
+		 */
+		if (SCTP_BASE_SYSCTL(sctp_mobility_base) == 0) {
+			sctp_mobility_feature_off(inp, SCTP_MOBILITY_BASE);
+			sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
+		} else {
+			sctp_mobility_feature_on(inp, SCTP_MOBILITY_BASE);
+			sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
+		}
+		/*
+		 * set the automatic mobility_fasthandoff from kernel flag
+		 * (by micchie)
+		 */
+		if (SCTP_BASE_SYSCTL(sctp_mobility_fasthandoff) == 0) {
+			sctp_mobility_feature_off(inp, SCTP_MOBILITY_FASTHANDOFF);
+			sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
+		} else {
+			sctp_mobility_feature_on(inp, SCTP_MOBILITY_FASTHANDOFF);
+			sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
+		}
+	} else {
+		/*
+		 * bind specific, make sure flags is off and add a new
+		 * address structure to the sctp_addr_list inside the ep
+		 * structure.
+		 *
+		 * We will need to allocate one and insert it at the head. The
+		 * socketopt call can just insert new addresses in there as
+		 * well. It will also have to do the embed scope kame hack
+		 * too (before adding).
+		 */
+		struct sctp_ifa *ifa;
+		struct sockaddr_storage store_sa;
+
+		memset(&store_sa, 0, sizeof(store_sa));
+		if (addr->sa_family == AF_INET) {
+			struct sockaddr_in *sin;
+
+			sin = (struct sockaddr_in *)&store_sa;
+			memcpy(sin, addr, sizeof(struct sockaddr_in));
+			sin->sin_port = 0;
+		} else if (addr->sa_family == AF_INET6) {
+			struct sockaddr_in6 *sin6;
+
+			sin6 = (struct sockaddr_in6 *)&store_sa;
+			memcpy(sin6, addr, sizeof(struct sockaddr_in6));
+			sin6->sin6_port = 0;
+		}
+		/*
+		 * first find the interface with the bound address need to
+		 * zero out the port to find the address! yuck! can't do
+		 * this earlier since need port for sctp_pcb_findep()
+		 */
+		if (sctp_ifap != NULL)
+			ifa = sctp_ifap;
+		else {
+			/*
+			 * Note for BSD we hit here always other O/S's will
+			 * pass things in via the sctp_ifap argument
+			 * (Panda).
+			 */
+			ifa = sctp_find_ifa_by_addr((struct sockaddr *)&store_sa,
+			    vrf_id, SCTP_ADDR_NOT_LOCKED);
+		}
+		if (ifa == NULL) {
+			/* Can't find an interface with that address */
+			SCTP_INP_WUNLOCK(inp);
+			SCTP_INP_INFO_WUNLOCK();
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRNOTAVAIL);
+			return (EADDRNOTAVAIL);
+		}
+		if (addr->sa_family == AF_INET6) {
+			/* GAK, more FIXME IFA lock? */
+			if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+				/* Can't bind a non-existent addr. */
+				SCTP_INP_WUNLOCK(inp);
+				SCTP_INP_INFO_WUNLOCK();
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+				return (EINVAL);
+			}
+		}
+		/* we're not bound all */
+		inp->sctp_flags &= ~SCTP_PCB_FLAGS_BOUNDALL;
+		/* allow bindx() to send ASCONF's for binding changes */
+		sctp_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF);
+		/* clear automatic addr changes from kernel flag */
+		sctp_feature_off(inp, SCTP_PCB_FLAGS_AUTO_ASCONF);
+
+		/* add this address to the endpoint list */
+		error = sctp_insert_laddr(&inp->sctp_addr_list, ifa, 0);
+		if (error != 0) {
+			SCTP_INP_WUNLOCK(inp);
+			SCTP_INP_INFO_WUNLOCK();
+			return (error);
+		}
+		inp->laddr_count++;
+	}
+	/* find the bucket */
+	if (port_reuse_active) {
+		/* Put it into tcp 1-2-1 hash */
+		head = &SCTP_BASE_INFO(sctp_tcpephash)[SCTP_PCBHASH_ALLADDR(lport, SCTP_BASE_INFO(hashtcpmark))];
+		inp->sctp_flags |= SCTP_PCB_FLAGS_IN_TCPPOOL;
+	} else {
+		head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(lport, SCTP_BASE_INFO(hashmark))];
+	}
+	/* put it in the bucket */
+	LIST_INSERT_HEAD(head, inp, sctp_hash);
+	SCTPDBG(SCTP_DEBUG_PCB1, "Main hash to bind at head:%p, bound port:%d - in tcp_pool=%d\n",
+	    head, ntohs(lport), port_reuse_active);
+	/* set in the port */
+	inp->sctp_lport = lport;
+
+	/* turn off just the unbound flag */
+	inp->sctp_flags &= ~SCTP_PCB_FLAGS_UNBOUND;
+	SCTP_INP_WUNLOCK(inp);
+	SCTP_INP_INFO_WUNLOCK();
+	return (0);
+}
+
+
+static void
+sctp_iterator_inp_being_freed(struct sctp_inpcb *inp)
+{
+	struct sctp_iterator *it, *nit;
+
+	/*
+	 * We enter with the only the ITERATOR_LOCK in place and a write
+	 * lock on the inp_info stuff.
+	 */
+	it = sctp_it_ctl.cur_it;
+	if (it && (it->vn != curvnet)) {
+		/* Its not looking at our VNET */
+		return;
+	}
+	if (it && (it->inp == inp)) {
+		/*
+		 * This is tricky and we hold the iterator lock, but when it
+		 * returns and gets the lock (when we release it) the
+		 * iterator will try to operate on inp. We need to stop that
+		 * from happening. But of course the iterator has a
+		 * reference on the stcb and inp. We can mark it and it will
+		 * stop.
+		 *
+		 * If its a single iterator situation, we set the end iterator
+		 * flag. Otherwise we set the iterator to go to the next
+		 * inp.
+		 *
+		 */
+		if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
+			sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_IT;
+		} else {
+			sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_INP;
+		}
+	}
+	/*
+	 * Now go through and remove any single reference to our inp that
+	 * may be still pending on the list
+	 */
+	SCTP_IPI_ITERATOR_WQ_LOCK();
+	it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead);
+	while (it) {
+		nit = TAILQ_NEXT(it, sctp_nxt_itr);
+		if (it->vn != curvnet) {
+			it = nit;
+			continue;
+		}
+		if (it->inp == inp) {
+			/* This one points to me is it inp specific? */
+			if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
+				/* Remove and free this one */
+				TAILQ_REMOVE(&sctp_it_ctl.iteratorhead,
+				    it, sctp_nxt_itr);
+				if (it->function_atend != NULL) {
+					(*it->function_atend) (it->pointer, it->val);
+				}
+				SCTP_FREE(it, SCTP_M_ITER);
+			} else {
+				it->inp = LIST_NEXT(it->inp, sctp_list);
+				if (it->inp) {
+					SCTP_INP_INCR_REF(it->inp);
+				}
+			}
+			/*
+			 * When its put in the refcnt is incremented so decr
+			 * it
+			 */
+			SCTP_INP_DECR_REF(inp);
+		}
+		it = nit;
+	}
+	SCTP_IPI_ITERATOR_WQ_UNLOCK();
+}
+
+/* release sctp_inpcb unbind the port */
+void
+sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
+{
+	/*
+	 * Here we free a endpoint. We must find it (if it is in the Hash
+	 * table) and remove it from there. Then we must also find it in the
+	 * overall list and remove it from there. After all removals are
+	 * complete then any timer has to be stopped. Then start the actual
+	 * freeing. a) Any local lists. b) Any associations. c) The hash of
+	 * all associations. d) finally the ep itself.
+	 */
+	struct sctp_pcb *m;
+	struct sctp_tcb *asoc, *nasoc;
+	struct sctp_laddr *laddr, *nladdr;
+	struct inpcb *ip_pcb;
+	struct socket *so;
+	int being_refed = 0;
+	struct sctp_queued_to_read *sq;
+
+
+	int cnt;
+	sctp_sharedkey_t *shared_key;
+
+
+#ifdef SCTP_LOG_CLOSING
+	sctp_log_closing(inp, NULL, 0);
+#endif
+	SCTP_ITERATOR_LOCK();
+	/* mark any iterators on the list or being processed */
+	sctp_iterator_inp_being_freed(inp);
+	SCTP_ITERATOR_UNLOCK();
+	so = inp->sctp_socket;
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+		/* been here before.. eeks.. get out of here */
+		SCTP_PRINTF("This conflict in free SHOULD not be happening! from %d, imm %d\n", from, immediate);
+#ifdef SCTP_LOG_CLOSING
+		sctp_log_closing(inp, NULL, 1);
+#endif
+		return;
+	}
+	SCTP_ASOC_CREATE_LOCK(inp);
+	SCTP_INP_INFO_WLOCK();
+
+	SCTP_INP_WLOCK(inp);
+	if (from == SCTP_CALLED_AFTER_CMPSET_OFCLOSE) {
+		inp->sctp_flags &= ~SCTP_PCB_FLAGS_CLOSE_IP;
+		/* socket is gone, so no more wakeups allowed */
+		inp->sctp_flags |= SCTP_PCB_FLAGS_DONT_WAKE;
+		inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEINPUT;
+		inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEOUTPUT;
+
+	}
+	/* First time through we have the socket lock, after that no more. */
+	sctp_timer_stop(SCTP_TIMER_TYPE_NEWCOOKIE, inp, NULL, NULL,
+	    SCTP_FROM_SCTP_PCB + SCTP_LOC_1);
+
+	if (inp->control) {
+		sctp_m_freem(inp->control);
+		inp->control = NULL;
+	}
+	if (inp->pkt) {
+		sctp_m_freem(inp->pkt);
+		inp->pkt = NULL;
+	}
+	m = &inp->sctp_ep;
+	ip_pcb = &inp->ip_inp.inp;	/* we could just cast the main pointer
+					 * here but I will be nice :> (i.e.
+					 * ip_pcb = ep;) */
+	if (immediate == SCTP_FREE_SHOULD_USE_GRACEFUL_CLOSE) {
+		int cnt_in_sd;
+
+		cnt_in_sd = 0;
+		for ((asoc = LIST_FIRST(&inp->sctp_asoc_list)); asoc != NULL;
+		    asoc = nasoc) {
+			SCTP_TCB_LOCK(asoc);
+			nasoc = LIST_NEXT(asoc, sctp_tcblist);
+			if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+				/* Skip guys being freed */
+				cnt_in_sd++;
+				if (asoc->asoc.state & SCTP_STATE_IN_ACCEPT_QUEUE) {
+					/*
+					 * Special case - we did not start a
+					 * kill timer on the asoc due to it
+					 * was not closed. So go ahead and
+					 * start it now.
+					 */
+					asoc->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE;
+					sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, asoc, NULL);
+				}
+				SCTP_TCB_UNLOCK(asoc);
+				continue;
+			}
+			if (((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_COOKIE_WAIT) ||
+			    (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_COOKIE_ECHOED)) &&
+			    (asoc->asoc.total_output_queue_size == 0)) {
+				/*
+				 * If we have data in queue, we don't want
+				 * to just free since the app may have done,
+				 * send()/close or connect/send/close. And
+				 * it wants the data to get across first.
+				 */
+				/* Just abandon things in the front states */
+				if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_NOFORCE,
+				    SCTP_FROM_SCTP_PCB + SCTP_LOC_2) == 0) {
+					cnt_in_sd++;
+				}
+				continue;
+			}
+			/* Disconnect the socket please */
+			asoc->sctp_socket = NULL;
+			asoc->asoc.state |= SCTP_STATE_CLOSED_SOCKET;
+			if ((asoc->asoc.size_on_reasm_queue > 0) ||
+			    (asoc->asoc.control_pdapi) ||
+			    (asoc->asoc.size_on_all_streams > 0) ||
+			    (so && (so->so_rcv.sb_cc > 0))
+			    ) {
+				/* Left with Data unread */
+				struct mbuf *op_err;
+
+				op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+				    0, M_DONTWAIT, 1, MT_DATA);
+				if (op_err) {
+					/* Fill in the user initiated abort */
+					struct sctp_paramhdr *ph;
+					uint32_t *ippp;
+
+					SCTP_BUF_LEN(op_err) =
+					    sizeof(struct sctp_paramhdr) + sizeof(uint32_t);
+					ph = mtod(op_err,
+					    struct sctp_paramhdr *);
+					ph->param_type = htons(
+					    SCTP_CAUSE_USER_INITIATED_ABT);
+					ph->param_length = htons(SCTP_BUF_LEN(op_err));
+					ippp = (uint32_t *) (ph + 1);
+					*ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_3);
+				}
+				asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_3;
+#if defined(SCTP_PANIC_ON_ABORT)
+				panic("inpcb_free does an abort");
+#endif
+				sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
+				SCTP_STAT_INCR_COUNTER32(sctps_aborted);
+				if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
+				    (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+					SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+				}
+				if (sctp_free_assoc(inp, asoc,
+				    SCTP_PCBFREE_NOFORCE, SCTP_FROM_SCTP_PCB + SCTP_LOC_4) == 0) {
+					cnt_in_sd++;
+				}
+				continue;
+			} else if (TAILQ_EMPTY(&asoc->asoc.send_queue) &&
+				    TAILQ_EMPTY(&asoc->asoc.sent_queue) &&
+				    (asoc->asoc.stream_queue_cnt == 0)
+			    ) {
+				if (asoc->asoc.locked_on_sending) {
+					goto abort_anyway;
+				}
+				if ((SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+				    (SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+					/*
+					 * there is nothing queued to send,
+					 * so I send shutdown
+					 */
+					sctp_send_shutdown(asoc, asoc->asoc.primary_destination);
+					if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
+					    (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+					}
+					SCTP_SET_STATE(&asoc->asoc, SCTP_STATE_SHUTDOWN_SENT);
+					SCTP_CLEAR_SUBSTATE(&asoc->asoc, SCTP_STATE_SHUTDOWN_PENDING);
+					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, asoc->sctp_ep, asoc,
+					    asoc->asoc.primary_destination);
+					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, asoc->sctp_ep, asoc,
+					    asoc->asoc.primary_destination);
+					sctp_chunk_output(inp, asoc, SCTP_OUTPUT_FROM_SHUT_TMR, SCTP_SO_LOCKED);
+				}
+			} else {
+				/* mark into shutdown pending */
+				struct sctp_stream_queue_pending *sp;
+
+				asoc->asoc.state |= SCTP_STATE_SHUTDOWN_PENDING;
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, asoc->sctp_ep, asoc,
+				    asoc->asoc.primary_destination);
+				if (asoc->asoc.locked_on_sending) {
+					sp = TAILQ_LAST(&((asoc->asoc.locked_on_sending)->outqueue),
+					    sctp_streamhead);
+					if (sp == NULL) {
+						SCTP_PRINTF("Error, sp is NULL, locked on sending is %p strm:%d\n",
+						    asoc->asoc.locked_on_sending,
+						    asoc->asoc.locked_on_sending->stream_no);
+					} else {
+						if ((sp->length == 0) && (sp->msg_is_complete == 0))
+							asoc->asoc.state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+					}
+				}
+				if (TAILQ_EMPTY(&asoc->asoc.send_queue) &&
+				    TAILQ_EMPTY(&asoc->asoc.sent_queue) &&
+				    (asoc->asoc.state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
+					struct mbuf *op_err;
+
+			abort_anyway:
+					op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (op_err) {
+						/*
+						 * Fill in the user
+						 * initiated abort
+						 */
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(op_err) =
+						    (sizeof(struct sctp_paramhdr) +
+						    sizeof(uint32_t));
+						ph = mtod(op_err,
+						    struct sctp_paramhdr *);
+						ph->param_type = htons(
+						    SCTP_CAUSE_USER_INITIATED_ABT);
+						ph->param_length = htons(SCTP_BUF_LEN(op_err));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_5);
+					}
+					asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_5;
+#if defined(SCTP_PANIC_ON_ABORT)
+					panic("inpcb_free does an abort");
+#endif
+
+					sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
+					SCTP_STAT_INCR_COUNTER32(sctps_aborted);
+					if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
+					    (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+					}
+					if (sctp_free_assoc(inp, asoc,
+					    SCTP_PCBFREE_NOFORCE,
+					    SCTP_FROM_SCTP_PCB + SCTP_LOC_6) == 0) {
+						cnt_in_sd++;
+					}
+					continue;
+				} else {
+					sctp_chunk_output(inp, asoc, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED);
+				}
+			}
+			cnt_in_sd++;
+			SCTP_TCB_UNLOCK(asoc);
+		}
+		/* now is there some left in our SHUTDOWN state? */
+		if (cnt_in_sd) {
+#ifdef SCTP_LOG_CLOSING
+			sctp_log_closing(inp, NULL, 2);
+#endif
+			inp->sctp_socket = NULL;
+			SCTP_INP_WUNLOCK(inp);
+			SCTP_ASOC_CREATE_UNLOCK(inp);
+			SCTP_INP_INFO_WUNLOCK();
+			return;
+		}
+	}
+	inp->sctp_socket = NULL;
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) !=
+	    SCTP_PCB_FLAGS_UNBOUND) {
+		/*
+		 * ok, this guy has been bound. It's port is somewhere in
+		 * the SCTP_BASE_INFO(hash table). Remove it!
+		 */
+		LIST_REMOVE(inp, sctp_hash);
+		inp->sctp_flags |= SCTP_PCB_FLAGS_UNBOUND;
+	}
+	/*
+	 * If there is a timer running to kill us, forget it, since it may
+	 * have a contest on the INP lock.. which would cause us to die ...
+	 */
+	cnt = 0;
+	for ((asoc = LIST_FIRST(&inp->sctp_asoc_list)); asoc != NULL;
+	    asoc = nasoc) {
+		SCTP_TCB_LOCK(asoc);
+		nasoc = LIST_NEXT(asoc, sctp_tcblist);
+		if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+			if (asoc->asoc.state & SCTP_STATE_IN_ACCEPT_QUEUE) {
+				asoc->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE;
+				sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, asoc, NULL);
+			}
+			cnt++;
+			SCTP_TCB_UNLOCK(asoc);
+			continue;
+		}
+		/* Free associations that are NOT killing us */
+		if ((SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_COOKIE_WAIT) &&
+		    ((asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0)) {
+			struct mbuf *op_err;
+			uint32_t *ippp;
+
+			op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+			    0, M_DONTWAIT, 1, MT_DATA);
+			if (op_err) {
+				/* Fill in the user initiated abort */
+				struct sctp_paramhdr *ph;
+
+				SCTP_BUF_LEN(op_err) = (sizeof(struct sctp_paramhdr) +
+				    sizeof(uint32_t));
+				ph = mtod(op_err, struct sctp_paramhdr *);
+				ph->param_type = htons(
+				    SCTP_CAUSE_USER_INITIATED_ABT);
+				ph->param_length = htons(SCTP_BUF_LEN(op_err));
+				ippp = (uint32_t *) (ph + 1);
+				*ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_7);
+
+			}
+			asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_7;
+#if defined(SCTP_PANIC_ON_ABORT)
+			panic("inpcb_free does an abort");
+#endif
+			sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
+			SCTP_STAT_INCR_COUNTER32(sctps_aborted);
+		} else if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+			cnt++;
+			SCTP_TCB_UNLOCK(asoc);
+			continue;
+		}
+		if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
+		    (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+			SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+		}
+		if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_PCB + SCTP_LOC_8) == 0) {
+			cnt++;
+		}
+	}
+	if (cnt) {
+		/* Ok we have someone out there that will kill us */
+		(void)SCTP_OS_TIMER_STOP(&inp->sctp_ep.signature_change.timer);
+#ifdef SCTP_LOG_CLOSING
+		sctp_log_closing(inp, NULL, 3);
+#endif
+		SCTP_INP_WUNLOCK(inp);
+		SCTP_ASOC_CREATE_UNLOCK(inp);
+		SCTP_INP_INFO_WUNLOCK();
+		return;
+	}
+#ifndef __rtems__
+	if (SCTP_INP_LOCK_CONTENDED(inp))
+		being_refed++;
+	if (SCTP_INP_READ_CONTENDED(inp))
+		being_refed++;
+	if (SCTP_ASOC_CREATE_LOCK_CONTENDED(inp))
+		being_refed++;
+#endif
+
+	if ((inp->refcount) ||
+	    (being_refed) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_CLOSE_IP)) {
+		(void)SCTP_OS_TIMER_STOP(&inp->sctp_ep.signature_change.timer);
+#ifdef SCTP_LOG_CLOSING
+		sctp_log_closing(inp, NULL, 4);
+#endif
+		sctp_timer_start(SCTP_TIMER_TYPE_INPKILL, inp, NULL, NULL);
+		SCTP_INP_WUNLOCK(inp);
+		SCTP_ASOC_CREATE_UNLOCK(inp);
+		SCTP_INP_INFO_WUNLOCK();
+		return;
+	}
+	inp->sctp_ep.signature_change.type = 0;
+	inp->sctp_flags |= SCTP_PCB_FLAGS_SOCKET_ALLGONE;
+	/*
+	 * Remove it from the list .. last thing we need a lock for.
+	 */
+	LIST_REMOVE(inp, sctp_list);
+	SCTP_INP_WUNLOCK(inp);
+	SCTP_ASOC_CREATE_UNLOCK(inp);
+	SCTP_INP_INFO_WUNLOCK();
+	/*
+	 * Now we release all locks. Since this INP cannot be found anymore
+	 * except possibly by the kill timer that might be running. We call
+	 * the drain function here. It should hit the case were it sees the
+	 * ACTIVE flag cleared and exit out freeing us to proceed and
+	 * destroy everything.
+	 */
+	if (from != SCTP_CALLED_FROM_INPKILL_TIMER) {
+		(void)SCTP_OS_TIMER_STOP_DRAIN(&inp->sctp_ep.signature_change.timer);
+	} else {
+		/* Probably un-needed */
+		(void)SCTP_OS_TIMER_STOP(&inp->sctp_ep.signature_change.timer);
+	}
+
+#ifdef SCTP_LOG_CLOSING
+	sctp_log_closing(inp, NULL, 5);
+#endif
+
+
+	if ((inp->sctp_asocidhash) != NULL) {
+		SCTP_HASH_FREE(inp->sctp_asocidhash, inp->hashasocidmark);
+		inp->sctp_asocidhash = NULL;
+	}
+	/* sa_ignore FREED_MEMORY */
+	while ((sq = TAILQ_FIRST(&inp->read_queue)) != NULL) {
+		/* Its only abandoned if it had data left */
+		if (sq->length)
+			SCTP_STAT_INCR(sctps_left_abandon);
+
+		TAILQ_REMOVE(&inp->read_queue, sq, next);
+		sctp_free_remote_addr(sq->whoFrom);
+		if (so)
+			so->so_rcv.sb_cc -= sq->length;
+		if (sq->data) {
+			sctp_m_freem(sq->data);
+			sq->data = NULL;
+		}
+		/*
+		 * no need to free the net count, since at this point all
+		 * assoc's are gone.
+		 */
+		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), sq);
+		SCTP_DECR_READQ_COUNT();
+	}
+	/* Now the sctp_pcb things */
+	/*
+	 * free each asoc if it is not already closed/free. we can't use the
+	 * macro here since le_next will get freed as part of the
+	 * sctp_free_assoc() call.
+	 */
+	cnt = 0;
+	if (so) {
+#ifdef IPSEC
+		ipsec_delete_pcbpolicy(ip_pcb);
+#endif				/* IPSEC */
+
+		/* Unlocks not needed since the socket is gone now */
+	}
+	if (ip_pcb->inp_options) {
+		(void)sctp_m_free(ip_pcb->inp_options);
+		ip_pcb->inp_options = 0;
+	}
+	if (ip_pcb->inp_moptions) {
+		inp_freemoptions(ip_pcb->inp_moptions);
+		ip_pcb->inp_moptions = 0;
+	}
+#ifdef INET6
+	if (ip_pcb->inp_vflag & INP_IPV6) {
+		struct in6pcb *in6p;
+
+		in6p = (struct in6pcb *)inp;
+		ip6_freepcbopts(in6p->in6p_outputopts);
+	}
+#endif				/* INET6 */
+	ip_pcb->inp_vflag = 0;
+	/* free up authentication fields */
+	if (inp->sctp_ep.local_auth_chunks != NULL)
+		sctp_free_chunklist(inp->sctp_ep.local_auth_chunks);
+	if (inp->sctp_ep.local_hmacs != NULL)
+		sctp_free_hmaclist(inp->sctp_ep.local_hmacs);
+
+	shared_key = LIST_FIRST(&inp->sctp_ep.shared_keys);
+	while (shared_key) {
+		LIST_REMOVE(shared_key, next);
+		sctp_free_sharedkey(shared_key);
+		/* sa_ignore FREED_MEMORY */
+		shared_key = LIST_FIRST(&inp->sctp_ep.shared_keys);
+	}
+
+	/*
+	 * if we have an address list the following will free the list of
+	 * ifaddr's that are set into this ep. Again macro limitations here,
+	 * since the LIST_FOREACH could be a bad idea.
+	 */
+	for ((laddr = LIST_FIRST(&inp->sctp_addr_list)); laddr != NULL;
+	    laddr = nladdr) {
+		nladdr = LIST_NEXT(laddr, sctp_nxt_addr);
+		sctp_remove_laddr(laddr);
+	}
+
+#ifdef SCTP_TRACK_FREED_ASOCS
+	/* TEMP CODE */
+	for ((asoc = LIST_FIRST(&inp->sctp_asoc_free_list)); asoc != NULL;
+	    asoc = nasoc) {
+		nasoc = LIST_NEXT(asoc, sctp_tcblist);
+		LIST_REMOVE(asoc, sctp_tcblist);
+		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), asoc);
+		SCTP_DECR_ASOC_COUNT();
+	}
+	/* *** END TEMP CODE *** */
+#endif
+	/* Now lets see about freeing the EP hash table. */
+	if (inp->sctp_tcbhash != NULL) {
+		SCTP_HASH_FREE(inp->sctp_tcbhash, inp->sctp_hashmark);
+		inp->sctp_tcbhash = NULL;
+	}
+	/* Now we must put the ep memory back into the zone pool */
+	INP_LOCK_DESTROY(&inp->ip_inp.inp);
+	SCTP_INP_LOCK_DESTROY(inp);
+	SCTP_INP_READ_DESTROY(inp);
+	SCTP_ASOC_CREATE_LOCK_DESTROY(inp);
+	SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp);
+	SCTP_DECR_EP_COUNT();
+}
+
+
+struct sctp_nets *
+sctp_findnet(struct sctp_tcb *stcb, struct sockaddr *addr)
+{
+	struct sctp_nets *net;
+
+	/* locate the address */
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		if (sctp_cmpaddr(addr, (struct sockaddr *)&net->ro._l_addr))
+			return (net);
+	}
+	return (NULL);
+}
+
+
+int
+sctp_is_address_on_local_host(struct sockaddr *addr, uint32_t vrf_id)
+{
+	struct sctp_ifa *sctp_ifa;
+
+	sctp_ifa = sctp_find_ifa_by_addr(addr, vrf_id, SCTP_ADDR_NOT_LOCKED);
+	if (sctp_ifa) {
+		return (1);
+	} else {
+		return (0);
+	}
+}
+
+/*
+ * add's a remote endpoint address, done with the INIT/INIT-ACK as well as
+ * when a ASCONF arrives that adds it. It will also initialize all the cwnd
+ * stats of stuff.
+ */
+int
+sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr,
+    int set_scope, int from)
+{
+	/*
+	 * The following is redundant to the same lines in the
+	 * sctp_aloc_assoc() but is needed since others call the add address
+	 * function
+	 */
+	struct sctp_nets *net, *netfirst;
+	int addr_inscope;
+
+	SCTPDBG(SCTP_DEBUG_PCB1, "Adding an address (from:%d) to the peer: ",
+	    from);
+	SCTPDBG_ADDR(SCTP_DEBUG_PCB1, newaddr);
+
+	netfirst = sctp_findnet(stcb, newaddr);
+	if (netfirst) {
+		/*
+		 * Lie and return ok, we don't want to make the association
+		 * go away for this behavior. It will happen in the TCP
+		 * model in a connected socket. It does not reach the hash
+		 * table until after the association is built so it can't be
+		 * found. Mark as reachable, since the initial creation will
+		 * have been cleared and the NOT_IN_ASSOC flag will have
+		 * been added... and we don't want to end up removing it
+		 * back out.
+		 */
+		if (netfirst->dest_state & SCTP_ADDR_UNCONFIRMED) {
+			netfirst->dest_state = (SCTP_ADDR_REACHABLE |
+			    SCTP_ADDR_UNCONFIRMED);
+		} else {
+			netfirst->dest_state = SCTP_ADDR_REACHABLE;
+		}
+
+		return (0);
+	}
+	addr_inscope = 1;
+	if (newaddr->sa_family == AF_INET) {
+		struct sockaddr_in *sin;
+
+		sin = (struct sockaddr_in *)newaddr;
+		if (sin->sin_addr.s_addr == 0) {
+			/* Invalid address */
+			return (-1);
+		}
+		/* zero out the bzero area */
+		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
+
+		/* assure len is set */
+		sin->sin_len = sizeof(struct sockaddr_in);
+		if (set_scope) {
+#ifdef SCTP_DONT_DO_PRIVADDR_SCOPE
+			stcb->ipv4_local_scope = 1;
+#else
+			if (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
+				stcb->asoc.ipv4_local_scope = 1;
+			}
+#endif				/* SCTP_DONT_DO_PRIVADDR_SCOPE */
+		} else {
+			/* Validate the address is in scope */
+			if ((IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) &&
+			    (stcb->asoc.ipv4_local_scope == 0)) {
+				addr_inscope = 0;
+			}
+		}
+#ifdef INET6
+	} else if (newaddr->sa_family == AF_INET6) {
+		struct sockaddr_in6 *sin6;
+
+		sin6 = (struct sockaddr_in6 *)newaddr;
+		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+			/* Invalid address */
+			return (-1);
+		}
+		/* assure len is set */
+		sin6->sin6_len = sizeof(struct sockaddr_in6);
+		if (set_scope) {
+			if (sctp_is_address_on_local_host(newaddr, stcb->asoc.vrf_id)) {
+				stcb->asoc.loopback_scope = 1;
+				stcb->asoc.local_scope = 0;
+				stcb->asoc.ipv4_local_scope = 1;
+				stcb->asoc.site_scope = 1;
+			} else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+				/*
+				 * If the new destination is a LINK_LOCAL we
+				 * must have common site scope. Don't set
+				 * the local scope since we may not share
+				 * all links, only loopback can do this.
+				 * Links on the local network would also be
+				 * on our private network for v4 too.
+				 */
+				stcb->asoc.ipv4_local_scope = 1;
+				stcb->asoc.site_scope = 1;
+			} else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) {
+				/*
+				 * If the new destination is SITE_LOCAL then
+				 * we must have site scope in common.
+				 */
+				stcb->asoc.site_scope = 1;
+			}
+		} else {
+			/* Validate the address is in scope */
+			if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr) &&
+			    (stcb->asoc.loopback_scope == 0)) {
+				addr_inscope = 0;
+			} else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
+			    (stcb->asoc.local_scope == 0)) {
+				addr_inscope = 0;
+			} else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
+			    (stcb->asoc.site_scope == 0)) {
+				addr_inscope = 0;
+			}
+		}
+#endif
+	} else {
+		/* not supported family type */
+		return (-1);
+	}
+	net = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_net), struct sctp_nets);
+	if (net == NULL) {
+		return (-1);
+	}
+	SCTP_INCR_RADDR_COUNT();
+	bzero(net, sizeof(*net));
+	(void)SCTP_GETTIME_TIMEVAL(&net->start_time);
+	memcpy(&net->ro._l_addr, newaddr, newaddr->sa_len);
+	if (newaddr->sa_family == AF_INET) {
+		((struct sockaddr_in *)&net->ro._l_addr)->sin_port = stcb->rport;
+	} else if (newaddr->sa_family == AF_INET6) {
+		((struct sockaddr_in6 *)&net->ro._l_addr)->sin6_port = stcb->rport;
+	}
+	net->addr_is_local = sctp_is_address_on_local_host(newaddr, stcb->asoc.vrf_id);
+	if (net->addr_is_local && ((set_scope || (from == SCTP_ADDR_IS_CONFIRMED)))) {
+		stcb->asoc.loopback_scope = 1;
+		stcb->asoc.ipv4_local_scope = 1;
+		stcb->asoc.local_scope = 0;
+		stcb->asoc.site_scope = 1;
+		addr_inscope = 1;
+	}
+	net->failure_threshold = stcb->asoc.def_net_failure;
+	if (addr_inscope == 0) {
+		net->dest_state = (SCTP_ADDR_REACHABLE |
+		    SCTP_ADDR_OUT_OF_SCOPE);
+	} else {
+		if (from == SCTP_ADDR_IS_CONFIRMED)
+			/* SCTP_ADDR_IS_CONFIRMED is passed by connect_x */
+			net->dest_state = SCTP_ADDR_REACHABLE;
+		else
+			net->dest_state = SCTP_ADDR_REACHABLE |
+			    SCTP_ADDR_UNCONFIRMED;
+	}
+	/*
+	 * We set this to 0, the timer code knows that this means its an
+	 * initial value
+	 */
+	net->RTO = 0;
+	net->RTO_measured = 0;
+	stcb->asoc.numnets++;
+	*(&net->ref_count) = 1;
+	net->tos_flowlabel = 0;
+	if (SCTP_BASE_SYSCTL(sctp_udp_tunneling_for_client_enable)) {
+		net->port = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port));
+	} else {
+		net->port = 0;
+	}
+#ifdef INET
+	if (newaddr->sa_family == AF_INET)
+		net->tos_flowlabel = stcb->asoc.default_tos;
+#endif
+#ifdef INET6
+	if (newaddr->sa_family == AF_INET6)
+		net->tos_flowlabel = stcb->asoc.default_flowlabel;
+#endif
+	/* Init the timer structure */
+	SCTP_OS_TIMER_INIT(&net->rxt_timer.timer);
+	SCTP_OS_TIMER_INIT(&net->fr_timer.timer);
+	SCTP_OS_TIMER_INIT(&net->pmtu_timer.timer);
+
+	/* Now generate a route for this guy */
+#ifdef INET6
+	/* KAME hack: embed scopeid */
+	if (newaddr->sa_family == AF_INET6) {
+		struct sockaddr_in6 *sin6;
+
+		sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+		(void)sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone));
+		sin6->sin6_scope_id = 0;
+	}
+#endif
+	SCTP_RTALLOC((sctp_route_t *) & net->ro, stcb->asoc.vrf_id);
+
+	if (SCTP_ROUTE_HAS_VALID_IFN(&net->ro)) {
+		/* Get source address */
+		net->ro._s_addr = sctp_source_address_selection(stcb->sctp_ep,
+		    stcb,
+		    (sctp_route_t *) & net->ro,
+		    net,
+		    0,
+		    stcb->asoc.vrf_id);
+		/* Now get the interface MTU */
+		if (net->ro._s_addr && net->ro._s_addr->ifn_p) {
+			net->mtu = SCTP_GATHER_MTU_FROM_INTFC(net->ro._s_addr->ifn_p);
+		} else {
+			net->mtu = 0;
+		}
+		if (net->mtu == 0) {
+			/* Huh ?? */
+			net->mtu = SCTP_DEFAULT_MTU;
+		} else {
+			uint32_t rmtu;
+
+			rmtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, net->ro.ro_rt);
+			if (rmtu == 0) {
+				/*
+				 * Start things off to match mtu of
+				 * interface please.
+				 */
+				SCTP_SET_MTU_OF_ROUTE(&net->ro._l_addr.sa,
+				    net->ro.ro_rt, net->mtu);
+			} else {
+				/*
+				 * we take the route mtu over the interface,
+				 * since the route may be leading out the
+				 * loopback, or a different interface.
+				 */
+				net->mtu = rmtu;
+			}
+		}
+		if (from == SCTP_ALLOC_ASOC) {
+			stcb->asoc.smallest_mtu = net->mtu;
+		}
+	} else {
+		net->mtu = stcb->asoc.smallest_mtu;
+	}
+#ifdef INET6
+	if (newaddr->sa_family == AF_INET6) {
+		struct sockaddr_in6 *sin6;
+
+		sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+		(void)sa6_recoverscope(sin6);
+	}
+#endif
+	if (net->port) {
+		net->mtu -= sizeof(struct udphdr);
+	}
+	if (stcb->asoc.smallest_mtu > net->mtu) {
+		stcb->asoc.smallest_mtu = net->mtu;
+	}
+	/* JRS - Use the congestion control given in the CC module */
+	stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net);
+
+	/*
+	 * CMT: CUC algo - set find_pseudo_cumack to TRUE (1) at beginning
+	 * of assoc (2005/06/27, iyengar@cis.udel.edu)
+	 */
+	net->find_pseudo_cumack = 1;
+	net->find_rtx_pseudo_cumack = 1;
+	net->src_addr_selected = 0;
+	netfirst = TAILQ_FIRST(&stcb->asoc.nets);
+	if (net->ro.ro_rt == NULL) {
+		/* Since we have no route put it at the back */
+		TAILQ_INSERT_TAIL(&stcb->asoc.nets, net, sctp_next);
+	} else if (netfirst == NULL) {
+		/* We are the first one in the pool. */
+		TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next);
+	} else if (netfirst->ro.ro_rt == NULL) {
+		/*
+		 * First one has NO route. Place this one ahead of the first
+		 * one.
+		 */
+		TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next);
+	} else if (net->ro.ro_rt->rt_ifp != netfirst->ro.ro_rt->rt_ifp) {
+		/*
+		 * This one has a different interface than the one at the
+		 * top of the list. Place it ahead.
+		 */
+		TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next);
+	} else {
+		/*
+		 * Ok we have the same interface as the first one. Move
+		 * forward until we find either a) one with a NULL route...
+		 * insert ahead of that b) one with a different ifp.. insert
+		 * after that. c) end of the list.. insert at the tail.
+		 */
+		struct sctp_nets *netlook;
+
+		do {
+			netlook = TAILQ_NEXT(netfirst, sctp_next);
+			if (netlook == NULL) {
+				/* End of the list */
+				TAILQ_INSERT_TAIL(&stcb->asoc.nets, net, sctp_next);
+				break;
+			} else if (netlook->ro.ro_rt == NULL) {
+				/* next one has NO route */
+				TAILQ_INSERT_BEFORE(netfirst, net, sctp_next);
+				break;
+			} else if (netlook->ro.ro_rt->rt_ifp != net->ro.ro_rt->rt_ifp) {
+				TAILQ_INSERT_AFTER(&stcb->asoc.nets, netlook,
+				    net, sctp_next);
+				break;
+			}
+			/* Shift forward */
+			netfirst = netlook;
+		} while (netlook != NULL);
+	}
+
+	/* got to have a primary set */
+	if (stcb->asoc.primary_destination == 0) {
+		stcb->asoc.primary_destination = net;
+	} else if ((stcb->asoc.primary_destination->ro.ro_rt == NULL) &&
+		    (net->ro.ro_rt) &&
+	    ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0)) {
+		/* No route to current primary adopt new primary */
+		stcb->asoc.primary_destination = net;
+	}
+	sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, stcb->sctp_ep, stcb,
+	    net);
+	/* Validate primary is first */
+	net = TAILQ_FIRST(&stcb->asoc.nets);
+	if ((net != stcb->asoc.primary_destination) &&
+	    (stcb->asoc.primary_destination)) {
+		/*
+		 * first one on the list is NOT the primary sctp_cmpaddr()
+		 * is much more efficient if the primary is the first on the
+		 * list, make it so.
+		 */
+		TAILQ_REMOVE(&stcb->asoc.nets,
+		    stcb->asoc.primary_destination, sctp_next);
+		TAILQ_INSERT_HEAD(&stcb->asoc.nets,
+		    stcb->asoc.primary_destination, sctp_next);
+	}
+	return (0);
+}
+
+
+static uint32_t
+sctp_aloc_a_assoc_id(struct sctp_inpcb *inp, struct sctp_tcb *stcb)
+{
+	uint32_t id;
+	struct sctpasochead *head;
+	struct sctp_tcb *lstcb;
+
+	SCTP_INP_WLOCK(inp);
+try_again:
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+		/* TSNH */
+		SCTP_INP_WUNLOCK(inp);
+		return (0);
+	}
+	/*
+	 * We don't allow assoc id to be 0, this is needed otherwise if the
+	 * id were to wrap we would have issues with some socket options.
+	 */
+	if (inp->sctp_associd_counter == 0) {
+		inp->sctp_associd_counter++;
+	}
+	id = inp->sctp_associd_counter;
+	inp->sctp_associd_counter++;
+	lstcb = sctp_findasoc_ep_asocid_locked(inp, (sctp_assoc_t) id, 0);
+	if (lstcb) {
+		goto try_again;
+	}
+	head = &inp->sctp_asocidhash[SCTP_PCBHASH_ASOC(id, inp->hashasocidmark)];
+	LIST_INSERT_HEAD(head, stcb, sctp_tcbasocidhash);
+	stcb->asoc.in_asocid_hash = 1;
+	SCTP_INP_WUNLOCK(inp);
+	return id;
+}
+
+/*
+ * allocate an association and add it to the endpoint. The caller must be
+ * careful to add all additional addresses once they are know right away or
+ * else the assoc will be may experience a blackout scenario.
+ */
+struct sctp_tcb *
+sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr,
+    int *error, uint32_t override_tag, uint32_t vrf_id,
+    struct thread *p
+)
+{
+	/* note the p argument is only valid in unbound sockets */
+
+	struct sctp_tcb *stcb;
+	struct sctp_association *asoc;
+	struct sctpasochead *head;
+	uint16_t rport;
+	int err;
+
+	/*
+	 * Assumption made here: Caller has done a
+	 * sctp_findassociation_ep_addr(ep, addr's); to make sure the
+	 * address does not exist already.
+	 */
+	if (SCTP_BASE_INFO(ipi_count_asoc) >= SCTP_MAX_NUM_OF_ASOC) {
+		/* Hit max assoc, sorry no more */
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS);
+		*error = ENOBUFS;
+		return (NULL);
+	}
+	if (firstaddr == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+		*error = EINVAL;
+		return (NULL);
+	}
+	SCTP_INP_RLOCK(inp);
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) &&
+	    ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE)) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED))) {
+		/*
+		 * If its in the TCP pool, its NOT allowed to create an
+		 * association. The parent listener needs to call
+		 * sctp_aloc_assoc.. or the one-2-many socket. If a peeled
+		 * off, or connected one does this.. its an error.
+		 */
+		SCTP_INP_RUNLOCK(inp);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+		*error = EINVAL;
+		return (NULL);
+	}
+	SCTPDBG(SCTP_DEBUG_PCB3, "Allocate an association for peer:");
+#ifdef SCTP_DEBUG
+	if (firstaddr) {
+		SCTPDBG_ADDR(SCTP_DEBUG_PCB3, firstaddr);
+		SCTPDBG(SCTP_DEBUG_PCB3, "Port:%d\n",
+		    ntohs(((struct sockaddr_in *)firstaddr)->sin_port));
+	} else {
+		SCTPDBG(SCTP_DEBUG_PCB3, "None\n");
+	}
+#endif				/* SCTP_DEBUG */
+	if (firstaddr->sa_family == AF_INET) {
+		struct sockaddr_in *sin;
+
+		sin = (struct sockaddr_in *)firstaddr;
+		if ((sin->sin_port == 0) || (sin->sin_addr.s_addr == 0)) {
+			/* Invalid address */
+			SCTP_INP_RUNLOCK(inp);
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+			*error = EINVAL;
+			return (NULL);
+		}
+		rport = sin->sin_port;
+	} else if (firstaddr->sa_family == AF_INET6) {
+		struct sockaddr_in6 *sin6;
+
+		sin6 = (struct sockaddr_in6 *)firstaddr;
+		if ((sin6->sin6_port == 0) ||
+		    (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))) {
+			/* Invalid address */
+			SCTP_INP_RUNLOCK(inp);
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+			*error = EINVAL;
+			return (NULL);
+		}
+		rport = sin6->sin6_port;
+	} else {
+		/* not supported family type */
+		SCTP_INP_RUNLOCK(inp);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+		*error = EINVAL;
+		return (NULL);
+	}
+	SCTP_INP_RUNLOCK(inp);
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) {
+		/*
+		 * If you have not performed a bind, then we need to do the
+		 * ephemeral bind for you.
+		 */
+		if ((err = sctp_inpcb_bind(inp->sctp_socket,
+		    (struct sockaddr *)NULL,
+		    (struct sctp_ifa *)NULL,
+		    p
+		    ))) {
+			/* bind error, probably perm */
+			*error = err;
+			return (NULL);
+		}
+	}
+	stcb = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_asoc), struct sctp_tcb);
+	if (stcb == NULL) {
+		/* out of memory? */
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOMEM);
+		*error = ENOMEM;
+		return (NULL);
+	}
+	SCTP_INCR_ASOC_COUNT();
+
+	bzero(stcb, sizeof(*stcb));
+	asoc = &stcb->asoc;
+
+	asoc->assoc_id = sctp_aloc_a_assoc_id(inp, stcb);
+	SCTP_TCB_LOCK_INIT(stcb);
+	SCTP_TCB_SEND_LOCK_INIT(stcb);
+	stcb->rport = rport;
+	/* setup back pointer's */
+	stcb->sctp_ep = inp;
+	stcb->sctp_socket = inp->sctp_socket;
+	if ((err = sctp_init_asoc(inp, stcb, override_tag, vrf_id))) {
+		/* failed */
+		SCTP_TCB_LOCK_DESTROY(stcb);
+		SCTP_TCB_SEND_LOCK_DESTROY(stcb);
+		LIST_REMOVE(stcb, sctp_tcbasocidhash);
+		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb);
+		SCTP_DECR_ASOC_COUNT();
+		*error = err;
+		return (NULL);
+	}
+	/* and the port */
+	SCTP_INP_INFO_WLOCK();
+	SCTP_INP_WLOCK(inp);
+	if (inp->sctp_flags & (SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
+		/* inpcb freed while alloc going on */
+		SCTP_TCB_LOCK_DESTROY(stcb);
+		SCTP_TCB_SEND_LOCK_DESTROY(stcb);
+		LIST_REMOVE(stcb, sctp_tcbasocidhash);
+		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb);
+		SCTP_INP_WUNLOCK(inp);
+		SCTP_INP_INFO_WUNLOCK();
+		SCTP_DECR_ASOC_COUNT();
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+		*error = EINVAL;
+		return (NULL);
+	}
+	SCTP_TCB_LOCK(stcb);
+
+	/* now that my_vtag is set, add it to the hash */
+	head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(stcb->asoc.my_vtag, SCTP_BASE_INFO(hashasocmark))];
+	/* put it in the bucket in the vtag hash of assoc's for the system */
+	LIST_INSERT_HEAD(head, stcb, sctp_asocs);
+	SCTP_INP_INFO_WUNLOCK();
+
+	if ((err = sctp_add_remote_addr(stcb, firstaddr, SCTP_DO_SETSCOPE, SCTP_ALLOC_ASOC))) {
+		/* failure.. memory error? */
+		if (asoc->strmout) {
+			SCTP_FREE(asoc->strmout, SCTP_M_STRMO);
+			asoc->strmout = NULL;
+		}
+		if (asoc->mapping_array) {
+			SCTP_FREE(asoc->mapping_array, SCTP_M_MAP);
+			asoc->mapping_array = NULL;
+		}
+		if (asoc->nr_mapping_array) {
+			SCTP_FREE(asoc->nr_mapping_array, SCTP_M_MAP);
+			asoc->nr_mapping_array = NULL;
+		}
+		SCTP_DECR_ASOC_COUNT();
+		SCTP_TCB_LOCK_DESTROY(stcb);
+		SCTP_TCB_SEND_LOCK_DESTROY(stcb);
+		LIST_REMOVE(stcb, sctp_tcbasocidhash);
+		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb);
+		SCTP_INP_WUNLOCK(inp);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS);
+		*error = ENOBUFS;
+		return (NULL);
+	}
+	/* Init all the timers */
+	SCTP_OS_TIMER_INIT(&asoc->hb_timer.timer);
+	SCTP_OS_TIMER_INIT(&asoc->dack_timer.timer);
+	SCTP_OS_TIMER_INIT(&asoc->strreset_timer.timer);
+	SCTP_OS_TIMER_INIT(&asoc->asconf_timer.timer);
+	SCTP_OS_TIMER_INIT(&asoc->shut_guard_timer.timer);
+	SCTP_OS_TIMER_INIT(&asoc->autoclose_timer.timer);
+	SCTP_OS_TIMER_INIT(&asoc->delayed_event_timer.timer);
+	SCTP_OS_TIMER_INIT(&asoc->delete_prim_timer.timer);
+
+	LIST_INSERT_HEAD(&inp->sctp_asoc_list, stcb, sctp_tcblist);
+	/* now file the port under the hash as well */
+	if (inp->sctp_tcbhash != NULL) {
+		head = &inp->sctp_tcbhash[SCTP_PCBHASH_ALLADDR(stcb->rport,
+		    inp->sctp_hashmark)];
+		LIST_INSERT_HEAD(head, stcb, sctp_tcbhash);
+	}
+	SCTP_INP_WUNLOCK(inp);
+	SCTPDBG(SCTP_DEBUG_PCB1, "Association %p now allocated\n", stcb);
+	return (stcb);
+}
+
+
+void
+sctp_remove_net(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	struct sctp_association *asoc;
+
+	asoc = &stcb->asoc;
+	asoc->numnets--;
+	TAILQ_REMOVE(&asoc->nets, net, sctp_next);
+	if (net == asoc->primary_destination) {
+		/* Reset primary */
+		struct sctp_nets *lnet;
+
+		lnet = TAILQ_FIRST(&asoc->nets);
+		/*
+		 * Mobility adaptation Ideally, if deleted destination is
+		 * the primary, it becomes a fast retransmission trigger by
+		 * the subsequent SET PRIMARY. (by micchie)
+		 */
+		if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+		    SCTP_MOBILITY_BASE) ||
+		    sctp_is_mobility_feature_on(stcb->sctp_ep,
+		    SCTP_MOBILITY_FASTHANDOFF)) {
+			SCTPDBG(SCTP_DEBUG_ASCONF1, "remove_net: primary dst is deleting\n");
+			if (asoc->deleted_primary != NULL) {
+				SCTPDBG(SCTP_DEBUG_ASCONF1, "remove_net: deleted primary may be already stored\n");
+				goto out;
+			}
+			asoc->deleted_primary = net;
+			atomic_add_int(&net->ref_count, 1);
+			memset(&net->lastsa, 0, sizeof(net->lastsa));
+			memset(&net->lastsv, 0, sizeof(net->lastsv));
+			sctp_mobility_feature_on(stcb->sctp_ep,
+			    SCTP_MOBILITY_PRIM_DELETED);
+			sctp_timer_start(SCTP_TIMER_TYPE_PRIM_DELETED,
+			    stcb->sctp_ep, stcb, NULL);
+		}
+out:
+		/* Try to find a confirmed primary */
+		asoc->primary_destination = sctp_find_alternate_net(stcb, lnet, 0);
+	}
+	if (net == asoc->last_data_chunk_from) {
+		/* Reset primary */
+		asoc->last_data_chunk_from = TAILQ_FIRST(&asoc->nets);
+	}
+	if (net == asoc->last_control_chunk_from) {
+		/* Clear net */
+		asoc->last_control_chunk_from = NULL;
+	}
+	sctp_free_remote_addr(net);
+}
+
+/*
+ * remove a remote endpoint address from an association, it will fail if the
+ * address does not exist.
+ */
+int
+sctp_del_remote_addr(struct sctp_tcb *stcb, struct sockaddr *remaddr)
+{
+	/*
+	 * Here we need to remove a remote address. This is quite simple, we
+	 * first find it in the list of address for the association
+	 * (tasoc->asoc.nets) and then if it is there, we do a LIST_REMOVE
+	 * on that item. Note we do not allow it to be removed if there are
+	 * no other addresses.
+	 */
+	struct sctp_association *asoc;
+	struct sctp_nets *net, *net_tmp;
+
+	asoc = &stcb->asoc;
+
+	/* locate the address */
+	for (net = TAILQ_FIRST(&asoc->nets); net != NULL; net = net_tmp) {
+		net_tmp = TAILQ_NEXT(net, sctp_next);
+		if (net->ro._l_addr.sa.sa_family != remaddr->sa_family) {
+			continue;
+		}
+		if (sctp_cmpaddr((struct sockaddr *)&net->ro._l_addr,
+		    remaddr)) {
+			/* we found the guy */
+			if (asoc->numnets < 2) {
+				/* Must have at LEAST two remote addresses */
+				return (-1);
+			} else {
+				sctp_remove_net(stcb, net);
+				return (0);
+			}
+		}
+	}
+	/* not found. */
+	return (-2);
+}
+
+void
+sctp_delete_from_timewait(uint32_t tag, uint16_t lport, uint16_t rport)
+{
+	struct sctpvtaghead *chain;
+	struct sctp_tagblock *twait_block;
+	int found = 0;
+	int i;
+
+	chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
+	if (!LIST_EMPTY(chain)) {
+		LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
+			for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+				if ((twait_block->vtag_block[i].v_tag == tag) &&
+				    (twait_block->vtag_block[i].lport == lport) &&
+				    (twait_block->vtag_block[i].rport == rport)) {
+					twait_block->vtag_block[i].tv_sec_at_expire = 0;
+					twait_block->vtag_block[i].v_tag = 0;
+					twait_block->vtag_block[i].lport = 0;
+					twait_block->vtag_block[i].rport = 0;
+					found = 1;
+					break;
+				}
+			}
+			if (found)
+				break;
+		}
+	}
+}
+
+int
+sctp_is_in_timewait(uint32_t tag, uint16_t lport, uint16_t rport)
+{
+	struct sctpvtaghead *chain;
+	struct sctp_tagblock *twait_block;
+	int found = 0;
+	int i;
+
+	SCTP_INP_INFO_WLOCK();
+	chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
+	if (!LIST_EMPTY(chain)) {
+		LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
+			for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+				if ((twait_block->vtag_block[i].v_tag == tag) &&
+				    (twait_block->vtag_block[i].lport == lport) &&
+				    (twait_block->vtag_block[i].rport == rport)) {
+					found = 1;
+					break;
+				}
+			}
+			if (found)
+				break;
+		}
+	}
+	SCTP_INP_INFO_WUNLOCK();
+	return (found);
+}
+
+
+void
+sctp_add_vtag_to_timewait(uint32_t tag, uint32_t time, uint16_t lport, uint16_t rport)
+{
+	struct sctpvtaghead *chain;
+	struct sctp_tagblock *twait_block;
+	struct timeval now;
+	int set, i;
+
+	if (time == 0) {
+		/* Its disabled */
+		return;
+	}
+	(void)SCTP_GETTIME_TIMEVAL(&now);
+	chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
+	set = 0;
+	if (!LIST_EMPTY(chain)) {
+		/* Block(s) present, lets find space, and expire on the fly */
+		LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
+			for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+				if ((twait_block->vtag_block[i].v_tag == 0) &&
+				    !set) {
+					twait_block->vtag_block[i].tv_sec_at_expire =
+					    now.tv_sec + time;
+					twait_block->vtag_block[i].v_tag = tag;
+					twait_block->vtag_block[i].lport = lport;
+					twait_block->vtag_block[i].rport = rport;
+					set = 1;
+				} else if ((twait_block->vtag_block[i].v_tag) &&
+				    ((long)twait_block->vtag_block[i].tv_sec_at_expire < now.tv_sec)) {
+					/* Audit expires this guy */
+					twait_block->vtag_block[i].tv_sec_at_expire = 0;
+					twait_block->vtag_block[i].v_tag = 0;
+					twait_block->vtag_block[i].lport = 0;
+					twait_block->vtag_block[i].rport = 0;
+					if (set == 0) {
+						/* Reuse it for my new tag */
+						twait_block->vtag_block[i].tv_sec_at_expire = now.tv_sec + time;
+						twait_block->vtag_block[i].v_tag = tag;
+						twait_block->vtag_block[i].lport = lport;
+						twait_block->vtag_block[i].rport = rport;
+						set = 1;
+					}
+				}
+			}
+			if (set) {
+				/*
+				 * We only do up to the block where we can
+				 * place our tag for audits
+				 */
+				break;
+			}
+		}
+	}
+	/* Need to add a new block to chain */
+	if (!set) {
+		SCTP_MALLOC(twait_block, struct sctp_tagblock *,
+		    sizeof(struct sctp_tagblock), SCTP_M_TIMW);
+		if (twait_block == NULL) {
+#ifdef INVARIANTS
+			panic("Can not alloc tagblock");
+#endif
+			return;
+		}
+		memset(twait_block, 0, sizeof(struct sctp_tagblock));
+		LIST_INSERT_HEAD(chain, twait_block, sctp_nxt_tagblock);
+		twait_block->vtag_block[0].tv_sec_at_expire = now.tv_sec + time;
+		twait_block->vtag_block[0].v_tag = tag;
+		twait_block->vtag_block[0].lport = lport;
+		twait_block->vtag_block[0].rport = rport;
+	}
+}
+
+
+
+/*-
+ * Free the association after un-hashing the remote port. This
+ * function ALWAYS returns holding NO LOCK on the stcb. It DOES
+ * expect that the input to this function IS a locked TCB.
+ * It will return 0, if it did NOT destroy the association (instead
+ * it unlocks it. It will return NON-zero if it either destroyed the
+ * association OR the association is already destroyed.
+ */
+int
+sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfree, int from_location)
+{
+	int i;
+	struct sctp_association *asoc;
+	struct sctp_nets *net, *prev;
+	struct sctp_laddr *laddr;
+	struct sctp_tmit_chunk *chk;
+	struct sctp_asconf_addr *aparam;
+	struct sctp_asconf_ack *aack;
+	struct sctp_stream_reset_list *liste;
+	struct sctp_queued_to_read *sq;
+	struct sctp_stream_queue_pending *sp;
+	sctp_sharedkey_t *shared_key;
+	struct socket *so;
+	int ccnt = 0;
+	int cnt = 0;
+
+	/* first, lets purge the entry from the hash table. */
+
+#ifdef SCTP_LOG_CLOSING
+	sctp_log_closing(inp, stcb, 6);
+#endif
+	if (stcb->asoc.state == 0) {
+#ifdef SCTP_LOG_CLOSING
+		sctp_log_closing(inp, NULL, 7);
+#endif
+		/* there is no asoc, really TSNH :-0 */
+		return (1);
+	}
+	/* TEMP CODE */
+	if (stcb->freed_from_where == 0) {
+		/* Only record the first place free happened from */
+		stcb->freed_from_where = from_location;
+	}
+	/* TEMP CODE */
+
+	asoc = &stcb->asoc;
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE))
+		/* nothing around */
+		so = NULL;
+	else
+		so = inp->sctp_socket;
+
+	/*
+	 * We used timer based freeing if a reader or writer is in the way.
+	 * So we first check if we are actually being called from a timer,
+	 * if so we abort early if a reader or writer is still in the way.
+	 */
+	if ((stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) &&
+	    (from_inpcbfree == SCTP_NORMAL_PROC)) {
+		/*
+		 * is it the timer driving us? if so are the reader/writers
+		 * gone?
+		 */
+		if (stcb->asoc.refcnt) {
+			/* nope, reader or writer in the way */
+			sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL);
+			/* no asoc destroyed */
+			SCTP_TCB_UNLOCK(stcb);
+#ifdef SCTP_LOG_CLOSING
+			sctp_log_closing(inp, stcb, 8);
+#endif
+			return (0);
+		}
+	}
+	/* now clean up any other timers */
+	(void)SCTP_OS_TIMER_STOP(&asoc->hb_timer.timer);
+	asoc->hb_timer.self = NULL;
+	(void)SCTP_OS_TIMER_STOP(&asoc->dack_timer.timer);
+	asoc->dack_timer.self = NULL;
+	(void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer);
+	/*-
+	 * For stream reset we don't blast this unless
+	 * it is a str-reset timer, it might be the
+	 * free-asoc timer which we DON'T want to
+	 * disturb.
+	 */
+	if (asoc->strreset_timer.type == SCTP_TIMER_TYPE_STRRESET)
+		asoc->strreset_timer.self = NULL;
+	(void)SCTP_OS_TIMER_STOP(&asoc->asconf_timer.timer);
+	asoc->asconf_timer.self = NULL;
+	(void)SCTP_OS_TIMER_STOP(&asoc->autoclose_timer.timer);
+	asoc->autoclose_timer.self = NULL;
+	(void)SCTP_OS_TIMER_STOP(&asoc->shut_guard_timer.timer);
+	asoc->shut_guard_timer.self = NULL;
+	(void)SCTP_OS_TIMER_STOP(&asoc->delayed_event_timer.timer);
+	asoc->delayed_event_timer.self = NULL;
+	/* Mobility adaptation */
+	(void)SCTP_OS_TIMER_STOP(&asoc->delete_prim_timer.timer);
+	asoc->delete_prim_timer.self = NULL;
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		(void)SCTP_OS_TIMER_STOP(&net->fr_timer.timer);
+		net->fr_timer.self = NULL;
+		(void)SCTP_OS_TIMER_STOP(&net->rxt_timer.timer);
+		net->rxt_timer.self = NULL;
+		(void)SCTP_OS_TIMER_STOP(&net->pmtu_timer.timer);
+		net->pmtu_timer.self = NULL;
+	}
+	/* Now the read queue needs to be cleaned up (only once) */
+	cnt = 0;
+	if ((stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0) {
+		stcb->asoc.state |= SCTP_STATE_ABOUT_TO_BE_FREED;
+		SCTP_INP_READ_LOCK(inp);
+		TAILQ_FOREACH(sq, &inp->read_queue, next) {
+			if (sq->stcb == stcb) {
+				sq->do_not_ref_stcb = 1;
+				sq->sinfo_cumtsn = stcb->asoc.cumulative_tsn;
+				/*
+				 * If there is no end, there never will be
+				 * now.
+				 */
+				if (sq->end_added == 0) {
+					/* Held for PD-API clear that. */
+					sq->pdapi_aborted = 1;
+					sq->held_length = 0;
+					if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PDAPIEVNT) && (so != NULL)) {
+						/*
+						 * Need to add a PD-API
+						 * aborted indication.
+						 * Setting the control_pdapi
+						 * assures that it will be
+						 * added right after this
+						 * msg.
+						 */
+						uint32_t strseq;
+
+						stcb->asoc.control_pdapi = sq;
+						strseq = (sq->sinfo_stream << 16) | sq->sinfo_ssn;
+						sctp_ulp_notify(SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION,
+						    stcb,
+						    SCTP_PARTIAL_DELIVERY_ABORTED,
+						    (void *)&strseq,
+						    SCTP_SO_LOCKED);
+						stcb->asoc.control_pdapi = NULL;
+					}
+				}
+				/* Add an end to wake them */
+				sq->end_added = 1;
+				cnt++;
+			}
+		}
+		SCTP_INP_READ_UNLOCK(inp);
+		if (stcb->block_entry) {
+			cnt++;
+			SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_PCB, ECONNRESET);
+			stcb->block_entry->error = ECONNRESET;
+			stcb->block_entry = NULL;
+		}
+	}
+	if ((stcb->asoc.refcnt) || (stcb->asoc.state & SCTP_STATE_IN_ACCEPT_QUEUE)) {
+		/*
+		 * Someone holds a reference OR the socket is unaccepted
+		 * yet.
+		 */
+		if ((stcb->asoc.refcnt) ||
+		    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+		    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
+			stcb->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE;
+			sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL);
+		}
+		SCTP_TCB_UNLOCK(stcb);
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+		    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE))
+			/* nothing around */
+			so = NULL;
+		if (so) {
+			/* Wake any reader/writers */
+			sctp_sorwakeup(inp, so);
+			sctp_sowwakeup(inp, so);
+		}
+#ifdef SCTP_LOG_CLOSING
+		sctp_log_closing(inp, stcb, 9);
+#endif
+		/* no asoc destroyed */
+		return (0);
+	}
+#ifdef SCTP_LOG_CLOSING
+	sctp_log_closing(inp, stcb, 10);
+#endif
+	/*
+	 * When I reach here, no others want to kill the assoc yet.. and I
+	 * own the lock. Now its possible an abort comes in when I do the
+	 * lock exchange below to grab all the locks to do the final take
+	 * out. to prevent this we increment the count, which will start a
+	 * timer and blow out above thus assuring us that we hold exclusive
+	 * killing of the asoc. Note that after getting back the TCB lock we
+	 * will go ahead and increment the counter back up and stop any
+	 * timer a passing stranger may have started :-S
+	 */
+	if (from_inpcbfree == SCTP_NORMAL_PROC) {
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_INP_INFO_WLOCK();
+		SCTP_INP_WLOCK(inp);
+		SCTP_TCB_LOCK(stcb);
+	}
+	/* Double check the GONE flag */
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE))
+		/* nothing around */
+		so = NULL;
+
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+		/*
+		 * For TCP type we need special handling when we are
+		 * connected. We also include the peel'ed off ones to.
+		 */
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
+			inp->sctp_flags &= ~SCTP_PCB_FLAGS_CONNECTED;
+			inp->sctp_flags |= SCTP_PCB_FLAGS_WAS_CONNECTED;
+			if (so) {
+				SOCK_LOCK(so);
+				if (so->so_rcv.sb_cc == 0) {
+					so->so_state &= ~(SS_ISCONNECTING |
+					    SS_ISDISCONNECTING |
+					    SS_ISCONFIRMING |
+					    SS_ISCONNECTED);
+				}
+				socantrcvmore_locked(so);
+				sctp_sowwakeup(inp, so);
+				sctp_sorwakeup(inp, so);
+				SCTP_SOWAKEUP(so);
+			}
+		}
+	}
+	/*
+	 * Make it invalid too, that way if its about to run it will abort
+	 * and return.
+	 */
+	/* re-increment the lock */
+	if (from_inpcbfree == SCTP_NORMAL_PROC) {
+		atomic_add_int(&stcb->asoc.refcnt, -1);
+	}
+	if (stcb->asoc.refcnt) {
+		stcb->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE;
+		sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL);
+		if (from_inpcbfree == SCTP_NORMAL_PROC) {
+			SCTP_INP_INFO_WUNLOCK();
+			SCTP_INP_WUNLOCK(inp);
+		}
+		SCTP_TCB_UNLOCK(stcb);
+		return (0);
+	}
+	asoc->state = 0;
+	if (inp->sctp_tcbhash) {
+		LIST_REMOVE(stcb, sctp_tcbhash);
+	}
+	if (stcb->asoc.in_asocid_hash) {
+		LIST_REMOVE(stcb, sctp_tcbasocidhash);
+	}
+	/* Now lets remove it from the list of ALL associations in the EP */
+	LIST_REMOVE(stcb, sctp_tcblist);
+	if (from_inpcbfree == SCTP_NORMAL_PROC) {
+		SCTP_INP_INCR_REF(inp);
+		SCTP_INP_WUNLOCK(inp);
+	}
+	/* pull from vtag hash */
+	LIST_REMOVE(stcb, sctp_asocs);
+	sctp_add_vtag_to_timewait(asoc->my_vtag, SCTP_BASE_SYSCTL(sctp_vtag_time_wait),
+	    inp->sctp_lport, stcb->rport);
+
+	/*
+	 * Now restop the timers to be sure this is paranoia at is finest!
+	 */
+	(void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->hb_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->dack_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->asconf_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->shut_guard_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->autoclose_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->delayed_event_timer.timer);
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		(void)SCTP_OS_TIMER_STOP(&net->fr_timer.timer);
+		(void)SCTP_OS_TIMER_STOP(&net->rxt_timer.timer);
+		(void)SCTP_OS_TIMER_STOP(&net->pmtu_timer.timer);
+	}
+
+	asoc->strreset_timer.type = SCTP_TIMER_TYPE_NONE;
+	prev = NULL;
+	/*
+	 * The chunk lists and such SHOULD be empty but we check them just
+	 * in case.
+	 */
+	/* anything on the wheel needs to be removed */
+	for (i = 0; i < asoc->streamoutcnt; i++) {
+		struct sctp_stream_out *outs;
+
+		outs = &asoc->strmout[i];
+		/* now clean up any chunks here */
+		sp = TAILQ_FIRST(&outs->outqueue);
+		while (sp) {
+			TAILQ_REMOVE(&outs->outqueue, sp, next);
+			if (sp->data) {
+				if (so) {
+					/* Still an open socket - report */
+					sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL, stcb,
+					    SCTP_NOTIFY_DATAGRAM_UNSENT,
+					    (void *)sp, SCTP_SO_LOCKED);
+				}
+				if (sp->data) {
+					sctp_m_freem(sp->data);
+					sp->data = NULL;
+					sp->tail_mbuf = NULL;
+				}
+			}
+			if (sp->net) {
+				sctp_free_remote_addr(sp->net);
+				sp->net = NULL;
+			}
+			sctp_free_spbufspace(stcb, asoc, sp);
+			if (sp->holds_key_ref)
+				sctp_auth_key_release(stcb, sp->auth_keyid);
+			/* Free the zone stuff  */
+			SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_strmoq), sp);
+			SCTP_DECR_STRMOQ_COUNT();
+			/* sa_ignore FREED_MEMORY */
+			sp = TAILQ_FIRST(&outs->outqueue);
+		}
+	}
+
+	/* sa_ignore FREED_MEMORY */
+	while ((liste = TAILQ_FIRST(&asoc->resetHead)) != NULL) {
+		TAILQ_REMOVE(&asoc->resetHead, liste, next_resp);
+		SCTP_FREE(liste, SCTP_M_STRESET);
+	}
+
+	sq = TAILQ_FIRST(&asoc->pending_reply_queue);
+	while (sq) {
+		TAILQ_REMOVE(&asoc->pending_reply_queue, sq, next);
+		if (sq->data) {
+			sctp_m_freem(sq->data);
+			sq->data = NULL;
+		}
+		sctp_free_remote_addr(sq->whoFrom);
+		sq->whoFrom = NULL;
+		sq->stcb = NULL;
+		/* Free the ctl entry */
+		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), sq);
+		SCTP_DECR_READQ_COUNT();
+		/* sa_ignore FREED_MEMORY */
+		sq = TAILQ_FIRST(&asoc->pending_reply_queue);
+	}
+
+	chk = TAILQ_FIRST(&asoc->free_chunks);
+	while (chk) {
+		TAILQ_REMOVE(&asoc->free_chunks, chk, sctp_next);
+		if (chk->data) {
+			sctp_m_freem(chk->data);
+			chk->data = NULL;
+		}
+		if (chk->holds_key_ref)
+			sctp_auth_key_release(stcb, chk->auth_keyid);
+		ccnt++;
+		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk);
+		SCTP_DECR_CHK_COUNT();
+		atomic_subtract_int(&SCTP_BASE_INFO(ipi_free_chunks), 1);
+		asoc->free_chunk_cnt--;
+		/* sa_ignore FREED_MEMORY */
+		chk = TAILQ_FIRST(&asoc->free_chunks);
+	}
+	/* pending send queue SHOULD be empty */
+	if (!TAILQ_EMPTY(&asoc->send_queue)) {
+		chk = TAILQ_FIRST(&asoc->send_queue);
+		while (chk) {
+			TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next);
+			if (chk->data) {
+				if (so) {
+					/* Still a socket? */
+					sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb,
+					    SCTP_NOTIFY_DATAGRAM_UNSENT, chk, SCTP_SO_LOCKED);
+				}
+				if (chk->data) {
+					sctp_m_freem(chk->data);
+					chk->data = NULL;
+				}
+			}
+			if (chk->holds_key_ref)
+				sctp_auth_key_release(stcb, chk->auth_keyid);
+			ccnt++;
+			if (chk->whoTo) {
+				sctp_free_remote_addr(chk->whoTo);
+				chk->whoTo = NULL;
+			}
+			SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk);
+			SCTP_DECR_CHK_COUNT();
+			/* sa_ignore FREED_MEMORY */
+			chk = TAILQ_FIRST(&asoc->send_queue);
+		}
+	}
+/*
+  if (ccnt) {
+  printf("Freed %d from send_queue\n", ccnt);
+  ccnt = 0;
+  }
+*/
+	/* sent queue SHOULD be empty */
+	if (!TAILQ_EMPTY(&asoc->sent_queue)) {
+		chk = TAILQ_FIRST(&asoc->sent_queue);
+		while (chk) {
+			TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next);
+			if (chk->data) {
+				if (so) {
+					/* Still a socket? */
+					sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb,
+					    SCTP_NOTIFY_DATAGRAM_SENT, chk, SCTP_SO_LOCKED);
+				}
+				if (chk->data) {
+					sctp_m_freem(chk->data);
+					chk->data = NULL;
+				}
+			}
+			if (chk->holds_key_ref)
+				sctp_auth_key_release(stcb, chk->auth_keyid);
+			ccnt++;
+			sctp_free_remote_addr(chk->whoTo);
+			SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk);
+			SCTP_DECR_CHK_COUNT();
+			/* sa_ignore FREED_MEMORY */
+			chk = TAILQ_FIRST(&asoc->sent_queue);
+		}
+	}
+/*
+  if (ccnt) {
+  printf("Freed %d from sent_queue\n", ccnt);
+  ccnt = 0;
+  }
+*/
+	/* control queue MAY not be empty */
+	if (!TAILQ_EMPTY(&asoc->control_send_queue)) {
+		chk = TAILQ_FIRST(&asoc->control_send_queue);
+		while (chk) {
+			TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			if (chk->holds_key_ref)
+				sctp_auth_key_release(stcb, chk->auth_keyid);
+			ccnt++;
+			sctp_free_remote_addr(chk->whoTo);
+			SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk);
+			SCTP_DECR_CHK_COUNT();
+			/* sa_ignore FREED_MEMORY */
+			chk = TAILQ_FIRST(&asoc->control_send_queue);
+		}
+	}
+/*
+  if (ccnt) {
+  printf("Freed %d from ctrl_queue\n", ccnt);
+  ccnt = 0;
+  }
+*/
+
+	/* ASCONF queue MAY not be empty */
+	if (!TAILQ_EMPTY(&asoc->asconf_send_queue)) {
+		chk = TAILQ_FIRST(&asoc->asconf_send_queue);
+		while (chk) {
+			TAILQ_REMOVE(&asoc->asconf_send_queue, chk, sctp_next);
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			if (chk->holds_key_ref)
+				sctp_auth_key_release(stcb, chk->auth_keyid);
+			ccnt++;
+			sctp_free_remote_addr(chk->whoTo);
+			SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk);
+			SCTP_DECR_CHK_COUNT();
+			/* sa_ignore FREED_MEMORY */
+			chk = TAILQ_FIRST(&asoc->asconf_send_queue);
+		}
+	}
+/*
+  if (ccnt) {
+  printf("Freed %d from asconf_queue\n", ccnt);
+  ccnt = 0;
+  }
+*/
+	if (!TAILQ_EMPTY(&asoc->reasmqueue)) {
+		chk = TAILQ_FIRST(&asoc->reasmqueue);
+		while (chk) {
+			TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			if (chk->holds_key_ref)
+				sctp_auth_key_release(stcb, chk->auth_keyid);
+			sctp_free_remote_addr(chk->whoTo);
+			ccnt++;
+			SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk);
+			SCTP_DECR_CHK_COUNT();
+			/* sa_ignore FREED_MEMORY */
+			chk = TAILQ_FIRST(&asoc->reasmqueue);
+		}
+	}
+/*
+  if (ccnt) {
+  printf("Freed %d from reasm_queue\n", ccnt);
+  ccnt = 0;
+  }
+*/
+	if (asoc->mapping_array) {
+		SCTP_FREE(asoc->mapping_array, SCTP_M_MAP);
+		asoc->mapping_array = NULL;
+	}
+	if (asoc->nr_mapping_array) {
+		SCTP_FREE(asoc->nr_mapping_array, SCTP_M_MAP);
+		asoc->nr_mapping_array = NULL;
+	}
+	/* the stream outs */
+	if (asoc->strmout) {
+		SCTP_FREE(asoc->strmout, SCTP_M_STRMO);
+		asoc->strmout = NULL;
+	}
+	asoc->strm_realoutsize = asoc->streamoutcnt = 0;
+	if (asoc->strmin) {
+		struct sctp_queued_to_read *ctl;
+
+		for (i = 0; i < asoc->streamincnt; i++) {
+			if (!TAILQ_EMPTY(&asoc->strmin[i].inqueue)) {
+				/* We have somethings on the streamin queue */
+				ctl = TAILQ_FIRST(&asoc->strmin[i].inqueue);
+				while (ctl) {
+					TAILQ_REMOVE(&asoc->strmin[i].inqueue,
+					    ctl, next);
+					sctp_free_remote_addr(ctl->whoFrom);
+					if (ctl->data) {
+						sctp_m_freem(ctl->data);
+						ctl->data = NULL;
+					}
+					/*
+					 * We don't free the address here
+					 * since all the net's were freed
+					 * above.
+					 */
+					SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), ctl);
+					SCTP_DECR_READQ_COUNT();
+					ctl = TAILQ_FIRST(&asoc->strmin[i].inqueue);
+				}
+			}
+		}
+		SCTP_FREE(asoc->strmin, SCTP_M_STRMI);
+		asoc->strmin = NULL;
+	}
+	asoc->streamincnt = 0;
+	while (!TAILQ_EMPTY(&asoc->nets)) {
+		/* sa_ignore FREED_MEMORY */
+		net = TAILQ_FIRST(&asoc->nets);
+		/* pull from list */
+		if ((SCTP_BASE_INFO(ipi_count_raddr) == 0) || (prev == net)) {
+#ifdef INVARIANTS
+			panic("no net's left alloc'ed, or list points to itself");
+#endif
+			break;
+		}
+		prev = net;
+		TAILQ_REMOVE(&asoc->nets, net, sctp_next);
+		sctp_free_remote_addr(net);
+	}
+
+	while (!LIST_EMPTY(&asoc->sctp_restricted_addrs)) {
+		/* sa_ignore FREED_MEMORY */
+		laddr = LIST_FIRST(&asoc->sctp_restricted_addrs);
+		sctp_remove_laddr(laddr);
+	}
+
+	/* pending asconf (address) parameters */
+	while (!TAILQ_EMPTY(&asoc->asconf_queue)) {
+		/* sa_ignore FREED_MEMORY */
+		aparam = TAILQ_FIRST(&asoc->asconf_queue);
+		TAILQ_REMOVE(&asoc->asconf_queue, aparam, next);
+		SCTP_FREE(aparam, SCTP_M_ASC_ADDR);
+	}
+	while (!TAILQ_EMPTY(&asoc->asconf_ack_sent)) {
+		/* sa_ignore FREED_MEMORY */
+		aack = TAILQ_FIRST(&asoc->asconf_ack_sent);
+		TAILQ_REMOVE(&asoc->asconf_ack_sent, aack, next);
+		if (aack->data != NULL) {
+			sctp_m_freem(aack->data);
+		}
+		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asconf_ack), aack);
+	}
+	/* clean up auth stuff */
+	if (asoc->local_hmacs)
+		sctp_free_hmaclist(asoc->local_hmacs);
+	if (asoc->peer_hmacs)
+		sctp_free_hmaclist(asoc->peer_hmacs);
+
+	if (asoc->local_auth_chunks)
+		sctp_free_chunklist(asoc->local_auth_chunks);
+	if (asoc->peer_auth_chunks)
+		sctp_free_chunklist(asoc->peer_auth_chunks);
+
+	sctp_free_authinfo(&asoc->authinfo);
+
+	shared_key = LIST_FIRST(&asoc->shared_keys);
+	while (shared_key) {
+		LIST_REMOVE(shared_key, next);
+		sctp_free_sharedkey(shared_key);
+		/* sa_ignore FREED_MEMORY */
+		shared_key = LIST_FIRST(&asoc->shared_keys);
+	}
+
+	/* Insert new items here :> */
+
+	/* Get rid of LOCK */
+	SCTP_TCB_LOCK_DESTROY(stcb);
+	SCTP_TCB_SEND_LOCK_DESTROY(stcb);
+	if (from_inpcbfree == SCTP_NORMAL_PROC) {
+		SCTP_INP_INFO_WUNLOCK();
+		SCTP_INP_RLOCK(inp);
+	}
+#ifdef SCTP_TRACK_FREED_ASOCS
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+		/* now clean up the tasoc itself */
+		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb);
+		SCTP_DECR_ASOC_COUNT();
+	} else {
+		LIST_INSERT_HEAD(&inp->sctp_asoc_free_list, stcb, sctp_tcblist);
+	}
+#else
+	SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb);
+	SCTP_DECR_ASOC_COUNT();
+#endif
+	if (from_inpcbfree == SCTP_NORMAL_PROC) {
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+			/*
+			 * If its NOT the inp_free calling us AND sctp_close
+			 * as been called, we call back...
+			 */
+			SCTP_INP_RUNLOCK(inp);
+			/*
+			 * This will start the kill timer (if we are the
+			 * last one) since we hold an increment yet. But
+			 * this is the only safe way to do this since
+			 * otherwise if the socket closes at the same time
+			 * we are here we might collide in the cleanup.
+			 */
+			sctp_inpcb_free(inp,
+			    SCTP_FREE_SHOULD_USE_GRACEFUL_CLOSE,
+			    SCTP_CALLED_DIRECTLY_NOCMPSET);
+			SCTP_INP_DECR_REF(inp);
+			goto out_of;
+		} else {
+			/* The socket is still open. */
+			SCTP_INP_DECR_REF(inp);
+		}
+	}
+	if (from_inpcbfree == SCTP_NORMAL_PROC) {
+		SCTP_INP_RUNLOCK(inp);
+	}
+out_of:
+	/* destroyed the asoc */
+#ifdef SCTP_LOG_CLOSING
+	sctp_log_closing(inp, NULL, 11);
+#endif
+	return (1);
+}
+
+
+
+/*
+ * determine if a destination is "reachable" based upon the addresses bound
+ * to the current endpoint (e.g. only v4 or v6 currently bound)
+ */
+/*
+ * FIX: if we allow assoc-level bindx(), then this needs to be fixed to use
+ * assoc level v4/v6 flags, as the assoc *may* not have the same address
+ * types bound as its endpoint
+ */
+int
+sctp_destination_is_reachable(struct sctp_tcb *stcb, struct sockaddr *destaddr)
+{
+	struct sctp_inpcb *inp;
+	int answer;
+
+	/*
+	 * No locks here, the TCB, in all cases is already locked and an
+	 * assoc is up. There is either a INP lock by the caller applied (in
+	 * asconf case when deleting an address) or NOT in the HB case,
+	 * however if HB then the INP increment is up and the INP will not
+	 * be removed (on top of the fact that we have a TCB lock). So we
+	 * only want to read the sctp_flags, which is either bound-all or
+	 * not.. no protection needed since once an assoc is up you can't be
+	 * changing your binding.
+	 */
+	inp = stcb->sctp_ep;
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		/* if bound all, destination is not restricted */
+		/*
+		 * RRS: Question during lock work: Is this correct? If you
+		 * are bound-all you still might need to obey the V4--V6
+		 * flags??? IMO this bound-all stuff needs to be removed!
+		 */
+		return (1);
+	}
+	/* NOTE: all "scope" checks are done when local addresses are added */
+	if (destaddr->sa_family == AF_INET6) {
+		answer = inp->ip_inp.inp.inp_vflag & INP_IPV6;
+	} else if (destaddr->sa_family == AF_INET) {
+		answer = inp->ip_inp.inp.inp_vflag & INP_IPV4;
+	} else {
+		/* invalid family, so it's unreachable */
+		answer = 0;
+	}
+	return (answer);
+}
+
+/*
+ * update the inp_vflags on an endpoint
+ */
+static void
+sctp_update_ep_vflag(struct sctp_inpcb *inp)
+{
+	struct sctp_laddr *laddr;
+
+	/* first clear the flag */
+	inp->ip_inp.inp.inp_vflag = 0;
+	/* set the flag based on addresses on the ep list */
+	LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+		if (laddr->ifa == NULL) {
+			SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n",
+			    __FUNCTION__);
+			continue;
+		}
+		if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
+			continue;
+		}
+		if (laddr->ifa->address.sa.sa_family == AF_INET6) {
+			inp->ip_inp.inp.inp_vflag |= INP_IPV6;
+		} else if (laddr->ifa->address.sa.sa_family == AF_INET) {
+			inp->ip_inp.inp.inp_vflag |= INP_IPV4;
+		}
+	}
+}
+
+/*
+ * Add the address to the endpoint local address list There is nothing to be
+ * done if we are bound to all addresses
+ */
+void
+sctp_add_local_addr_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa, uint32_t action)
+{
+	struct sctp_laddr *laddr;
+	int fnd, error = 0;
+
+	fnd = 0;
+
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		/* You are already bound to all. You have it already */
+		return;
+	}
+	if (ifa->address.sa.sa_family == AF_INET6) {
+		if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+			/* Can't bind a non-useable addr. */
+			return;
+		}
+	}
+	/* first, is it already present? */
+	LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+		if (laddr->ifa == ifa) {
+			fnd = 1;
+			break;
+		}
+	}
+
+	if (fnd == 0) {
+		/* Not in the ep list */
+		error = sctp_insert_laddr(&inp->sctp_addr_list, ifa, action);
+		if (error != 0)
+			return;
+		inp->laddr_count++;
+		/* update inp_vflag flags */
+		if (ifa->address.sa.sa_family == AF_INET6) {
+			inp->ip_inp.inp.inp_vflag |= INP_IPV6;
+		} else if (ifa->address.sa.sa_family == AF_INET) {
+			inp->ip_inp.inp.inp_vflag |= INP_IPV4;
+		}
+	}
+	return;
+}
+
+
+/*
+ * select a new (hopefully reachable) destination net (should only be used
+ * when we deleted an ep addr that is the only usable source address to reach
+ * the destination net)
+ */
+static void
+sctp_select_primary_destination(struct sctp_tcb *stcb)
+{
+	struct sctp_nets *net;
+
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		/* for now, we'll just pick the first reachable one we find */
+		if (net->dest_state & SCTP_ADDR_UNCONFIRMED)
+			continue;
+		if (sctp_destination_is_reachable(stcb,
+		    (struct sockaddr *)&net->ro._l_addr)) {
+			/* found a reachable destination */
+			stcb->asoc.primary_destination = net;
+		}
+	}
+	/* I can't there from here! ...we're gonna die shortly... */
+}
+
+
+/*
+ * Delete the address from the endpoint local address list There is nothing
+ * to be done if we are bound to all addresses
+ */
+void
+sctp_del_local_addr_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa)
+{
+	struct sctp_laddr *laddr;
+	int fnd;
+
+	fnd = 0;
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		/* You are already bound to all. You have it already */
+		return;
+	}
+	LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+		if (laddr->ifa == ifa) {
+			fnd = 1;
+			break;
+		}
+	}
+	if (fnd && (inp->laddr_count < 2)) {
+		/* can't delete unless there are at LEAST 2 addresses */
+		return;
+	}
+	if (fnd) {
+		/*
+		 * clean up any use of this address go through our
+		 * associations and clear any last_used_address that match
+		 * this one for each assoc, see if a new primary_destination
+		 * is needed
+		 */
+		struct sctp_tcb *stcb;
+
+		/* clean up "next_addr_touse" */
+		if (inp->next_addr_touse == laddr)
+			/* delete this address */
+			inp->next_addr_touse = NULL;
+
+		/* clean up "last_used_address" */
+		LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+			struct sctp_nets *net;
+
+			SCTP_TCB_LOCK(stcb);
+			if (stcb->asoc.last_used_address == laddr)
+				/* delete this address */
+				stcb->asoc.last_used_address = NULL;
+			/*
+			 * Now spin through all the nets and purge any ref
+			 * to laddr
+			 */
+			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+				if (net->ro._s_addr &&
+				    (net->ro._s_addr->ifa == laddr->ifa)) {
+					/* Yep, purge src address selected */
+					sctp_rtentry_t *rt;
+
+					/* delete this address if cached */
+					rt = net->ro.ro_rt;
+					if (rt != NULL) {
+						RTFREE(rt);
+						net->ro.ro_rt = NULL;
+					}
+					sctp_free_ifa(net->ro._s_addr);
+					net->ro._s_addr = NULL;
+					net->src_addr_selected = 0;
+				}
+			}
+			SCTP_TCB_UNLOCK(stcb);
+		}		/* for each tcb */
+		/* remove it from the ep list */
+		sctp_remove_laddr(laddr);
+		inp->laddr_count--;
+		/* update inp_vflag flags */
+		sctp_update_ep_vflag(inp);
+	}
+	return;
+}
+
+/*
+ * Add the address to the TCB local address restricted list.
+ * This is a "pending" address list (eg. addresses waiting for an
+ * ASCONF-ACK response) and cannot be used as a valid source address.
+ */
+void
+sctp_add_local_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa)
+{
+	struct sctp_inpcb *inp;
+	struct sctp_laddr *laddr;
+	struct sctpladdr *list;
+
+	/*
+	 * Assumes TCB is locked.. and possibly the INP. May need to
+	 * confirm/fix that if we need it and is not the case.
+	 */
+	list = &stcb->asoc.sctp_restricted_addrs;
+
+	inp = stcb->sctp_ep;
+	if (ifa->address.sa.sa_family == AF_INET6) {
+		if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+			/* Can't bind a non-existent addr. */
+			return;
+		}
+	}
+	/* does the address already exist? */
+	LIST_FOREACH(laddr, list, sctp_nxt_addr) {
+		if (laddr->ifa == ifa) {
+			return;
+		}
+	}
+
+	/* add to the list */
+	(void)sctp_insert_laddr(list, ifa, 0);
+	return;
+}
+
+/*
+ * insert an laddr entry with the given ifa for the desired list
+ */
+int
+sctp_insert_laddr(struct sctpladdr *list, struct sctp_ifa *ifa, uint32_t act)
+{
+	struct sctp_laddr *laddr;
+
+	laddr = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr);
+	if (laddr == NULL) {
+		/* out of memory? */
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+		return (EINVAL);
+	}
+	SCTP_INCR_LADDR_COUNT();
+	bzero(laddr, sizeof(*laddr));
+	(void)SCTP_GETTIME_TIMEVAL(&laddr->start_time);
+	laddr->ifa = ifa;
+	laddr->action = act;
+	atomic_add_int(&ifa->refcount, 1);
+	/* insert it */
+	LIST_INSERT_HEAD(list, laddr, sctp_nxt_addr);
+
+	return (0);
+}
+
+/*
+ * Remove an laddr entry from the local address list (on an assoc)
+ */
+void
+sctp_remove_laddr(struct sctp_laddr *laddr)
+{
+
+	/* remove from the list */
+	LIST_REMOVE(laddr, sctp_nxt_addr);
+	sctp_free_ifa(laddr->ifa);
+	SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_laddr), laddr);
+	SCTP_DECR_LADDR_COUNT();
+}
+
+/*
+ * Remove a local address from the TCB local address restricted list
+ */
+void
+sctp_del_local_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa)
+{
+	struct sctp_inpcb *inp;
+	struct sctp_laddr *laddr;
+
+	/*
+	 * This is called by asconf work. It is assumed that a) The TCB is
+	 * locked and b) The INP is locked. This is true in as much as I can
+	 * trace through the entry asconf code where I did these locks.
+	 * Again, the ASCONF code is a bit different in that it does lock
+	 * the INP during its work often times. This must be since we don't
+	 * want other proc's looking up things while what they are looking
+	 * up is changing :-D
+	 */
+
+	inp = stcb->sctp_ep;
+	/* if subset bound and don't allow ASCONF's, can't delete last */
+	if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) &&
+	    sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DO_ASCONF)) {
+		if (stcb->sctp_ep->laddr_count < 2) {
+			/* can't delete last address */
+			return;
+		}
+	}
+	LIST_FOREACH(laddr, &stcb->asoc.sctp_restricted_addrs, sctp_nxt_addr) {
+		/* remove the address if it exists */
+		if (laddr->ifa == NULL)
+			continue;
+		if (laddr->ifa == ifa) {
+			sctp_remove_laddr(laddr);
+			return;
+		}
+	}
+
+	/* address not found! */
+	return;
+}
+
+/*
+ * Temporarily remove for __APPLE__ until we use the Tiger equivalents
+ */
+/* sysctl */
+static int sctp_max_number_of_assoc = SCTP_MAX_NUM_OF_ASOC;
+static int sctp_scale_up_for_address = SCTP_SCALE_FOR_ADDR;
+
+void
+sctp_pcb_init()
+{
+	/*
+	 * SCTP initialization for the PCB structures should be called by
+	 * the sctp_init() funciton.
+	 */
+	int i;
+	struct timeval tv;
+
+	if (SCTP_BASE_VAR(sctp_pcb_initialized) != 0) {
+		/* error I was called twice */
+		return;
+	}
+	SCTP_BASE_VAR(sctp_pcb_initialized) = 1;
+
+#if defined(SCTP_LOCAL_TRACE_BUF)
+	bzero(&SCTP_BASE_SYSCTL(sctp_log), sizeof(struct sctp_log));
+#endif
+	(void)SCTP_GETTIME_TIMEVAL(&tv);
+#if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
+	SCTP_BASE_STATS[PCPU_GET(cpuid)].sctps_discontinuitytime.tv_sec = (uint32_t) tv.tv_sec;
+	SCTP_BASE_STATS[PCPU_GET(cpuid)].sctps_discontinuitytime.tv_usec = (uint32_t) tv.tv_usec;
+#else
+	SCTP_BASE_STAT(sctps_discontinuitytime).tv_sec = (uint32_t) tv.tv_sec;
+	SCTP_BASE_STAT(sctps_discontinuitytime).tv_usec = (uint32_t) tv.tv_usec;
+#endif
+	/* init the empty list of (All) Endpoints */
+	LIST_INIT(&SCTP_BASE_INFO(listhead));
+
+
+	/* init the hash table of endpoints */
+	TUNABLE_INT_FETCH("net.inet.sctp.tcbhashsize", &SCTP_BASE_SYSCTL(sctp_hashtblsize));
+	TUNABLE_INT_FETCH("net.inet.sctp.pcbhashsize", &SCTP_BASE_SYSCTL(sctp_pcbtblsize));
+	TUNABLE_INT_FETCH("net.inet.sctp.chunkscale", &SCTP_BASE_SYSCTL(sctp_chunkscale));
+	SCTP_BASE_INFO(sctp_asochash) = SCTP_HASH_INIT((SCTP_BASE_SYSCTL(sctp_hashtblsize) * 31),
+	    &SCTP_BASE_INFO(hashasocmark));
+	SCTP_BASE_INFO(sctp_ephash) = SCTP_HASH_INIT(SCTP_BASE_SYSCTL(sctp_hashtblsize),
+	    &SCTP_BASE_INFO(hashmark));
+	SCTP_BASE_INFO(sctp_tcpephash) = SCTP_HASH_INIT(SCTP_BASE_SYSCTL(sctp_hashtblsize),
+	    &SCTP_BASE_INFO(hashtcpmark));
+	SCTP_BASE_INFO(hashtblsize) = SCTP_BASE_SYSCTL(sctp_hashtblsize);
+
+
+	SCTP_BASE_INFO(sctp_vrfhash) = SCTP_HASH_INIT(SCTP_SIZE_OF_VRF_HASH,
+	    &SCTP_BASE_INFO(hashvrfmark));
+
+	SCTP_BASE_INFO(vrf_ifn_hash) = SCTP_HASH_INIT(SCTP_VRF_IFN_HASH_SIZE,
+	    &SCTP_BASE_INFO(vrf_ifn_hashmark));
+	/* init the zones */
+	/*
+	 * FIX ME: Should check for NULL returns, but if it does fail we are
+	 * doomed to panic anyways... add later maybe.
+	 */
+	SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_ep), "sctp_ep",
+	    sizeof(struct sctp_inpcb), maxsockets);
+
+	SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_asoc), "sctp_asoc",
+	    sizeof(struct sctp_tcb), sctp_max_number_of_assoc);
+
+	SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_laddr), "sctp_laddr",
+	    sizeof(struct sctp_laddr),
+	    (sctp_max_number_of_assoc * sctp_scale_up_for_address));
+
+	SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_net), "sctp_raddr",
+	    sizeof(struct sctp_nets),
+	    (sctp_max_number_of_assoc * sctp_scale_up_for_address));
+
+	SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_chunk), "sctp_chunk",
+	    sizeof(struct sctp_tmit_chunk),
+	    (sctp_max_number_of_assoc * SCTP_BASE_SYSCTL(sctp_chunkscale)));
+
+	SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_readq), "sctp_readq",
+	    sizeof(struct sctp_queued_to_read),
+	    (sctp_max_number_of_assoc * SCTP_BASE_SYSCTL(sctp_chunkscale)));
+
+	SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_strmoq), "sctp_stream_msg_out",
+	    sizeof(struct sctp_stream_queue_pending),
+	    (sctp_max_number_of_assoc * SCTP_BASE_SYSCTL(sctp_chunkscale)));
+
+	SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_asconf), "sctp_asconf",
+	    sizeof(struct sctp_asconf),
+	    (sctp_max_number_of_assoc * SCTP_BASE_SYSCTL(sctp_chunkscale)));
+
+	SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_asconf_ack), "sctp_asconf_ack",
+	    sizeof(struct sctp_asconf_ack),
+	    (sctp_max_number_of_assoc * SCTP_BASE_SYSCTL(sctp_chunkscale)));
+
+
+	/* Master Lock INIT for info structure */
+	SCTP_INP_INFO_LOCK_INIT();
+	SCTP_STATLOG_INIT_LOCK();
+
+	SCTP_IPI_COUNT_INIT();
+	SCTP_IPI_ADDR_INIT();
+#ifdef SCTP_PACKET_LOGGING
+	SCTP_IP_PKTLOG_INIT();
+#endif
+	LIST_INIT(&SCTP_BASE_INFO(addr_wq));
+
+	SCTP_WQ_ADDR_INIT();
+	/* not sure if we need all the counts */
+	SCTP_BASE_INFO(ipi_count_ep) = 0;
+	/* assoc/tcb zone info */
+	SCTP_BASE_INFO(ipi_count_asoc) = 0;
+	/* local addrlist zone info */
+	SCTP_BASE_INFO(ipi_count_laddr) = 0;
+	/* remote addrlist zone info */
+	SCTP_BASE_INFO(ipi_count_raddr) = 0;
+	/* chunk info */
+	SCTP_BASE_INFO(ipi_count_chunk) = 0;
+
+	/* socket queue zone info */
+	SCTP_BASE_INFO(ipi_count_readq) = 0;
+
+	/* stream out queue cont */
+	SCTP_BASE_INFO(ipi_count_strmoq) = 0;
+
+	SCTP_BASE_INFO(ipi_free_strmoq) = 0;
+	SCTP_BASE_INFO(ipi_free_chunks) = 0;
+
+	SCTP_OS_TIMER_INIT(&SCTP_BASE_INFO(addr_wq_timer.timer));
+
+	/* Init the TIMEWAIT list */
+	for (i = 0; i < SCTP_STACK_VTAG_HASH_SIZE; i++) {
+		LIST_INIT(&SCTP_BASE_INFO(vtag_timewait)[i]);
+	}
+
+	sctp_startup_iterator();
+
+	/*
+	 * INIT the default VRF which for BSD is the only one, other O/S's
+	 * may have more. But initially they must start with one and then
+	 * add the VRF's as addresses are added.
+	 */
+	sctp_init_vrf_list(SCTP_DEFAULT_VRF);
+
+}
+
+/*
+ * Assumes that the SCTP_BASE_INFO() lock is NOT held.
+ */
+void
+sctp_pcb_finish(void)
+{
+	struct sctp_vrflist *vrf_bucket;
+	struct sctp_vrf *vrf;
+	struct sctp_ifn *ifn;
+	struct sctp_ifa *ifa;
+	struct sctpvtaghead *chain;
+	struct sctp_tagblock *twait_block, *prev_twait_block;
+	struct sctp_laddr *wi;
+	int i;
+
+	/*
+	 * Free BSD the it thread never exits but we do clean up. The only
+	 * way freebsd reaches here if we have VRF's but we still add the
+	 * ifdef to make it compile on old versions.
+	 */
+	{
+		struct sctp_iterator *it, *nit;
+
+		SCTP_IPI_ITERATOR_WQ_LOCK();
+		it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead);
+		while (it) {
+			nit = TAILQ_NEXT(it, sctp_nxt_itr);
+			if (it->vn != curvnet) {
+				it = nit;
+				continue;
+			}
+			TAILQ_REMOVE(&sctp_it_ctl.iteratorhead,
+			    it, sctp_nxt_itr);
+			if (it->function_atend != NULL) {
+				(*it->function_atend) (it->pointer, it->val);
+			}
+			SCTP_FREE(it, SCTP_M_ITER);
+			it = nit;
+		}
+		SCTP_IPI_ITERATOR_WQ_UNLOCK();
+		SCTP_ITERATOR_LOCK();
+		if ((sctp_it_ctl.cur_it) &&
+		    (sctp_it_ctl.cur_it->vn == curvnet)) {
+			sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_IT;
+		}
+		SCTP_ITERATOR_UNLOCK();
+	}
+
+	SCTP_OS_TIMER_STOP(&SCTP_BASE_INFO(addr_wq_timer.timer));
+	SCTP_WQ_ADDR_LOCK();
+	while ((wi = LIST_FIRST(&SCTP_BASE_INFO(addr_wq))) != NULL) {
+		LIST_REMOVE(wi, sctp_nxt_addr);
+		SCTP_DECR_LADDR_COUNT();
+		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_laddr), wi);
+	}
+	SCTP_WQ_ADDR_UNLOCK();
+
+	/*
+	 * free the vrf/ifn/ifa lists and hashes (be sure address monitor is
+	 * destroyed first).
+	 */
+	vrf_bucket = &SCTP_BASE_INFO(sctp_vrfhash)[(SCTP_DEFAULT_VRFID & SCTP_BASE_INFO(hashvrfmark))];
+	while ((vrf = LIST_FIRST(vrf_bucket)) != NULL) {
+		while ((ifn = LIST_FIRST(&vrf->ifnlist)) != NULL) {
+			while ((ifa = LIST_FIRST(&ifn->ifalist)) != NULL) {
+				/* free the ifa */
+				LIST_REMOVE(ifa, next_bucket);
+				LIST_REMOVE(ifa, next_ifa);
+				SCTP_FREE(ifa, SCTP_M_IFA);
+			}
+			/* free the ifn */
+			LIST_REMOVE(ifn, next_bucket);
+			LIST_REMOVE(ifn, next_ifn);
+			SCTP_FREE(ifn, SCTP_M_IFN);
+		}
+		SCTP_HASH_FREE(vrf->vrf_addr_hash, vrf->vrf_addr_hashmark);
+		/* free the vrf */
+		LIST_REMOVE(vrf, next_vrf);
+		SCTP_FREE(vrf, SCTP_M_VRF);
+	}
+	/* free the vrf hashes */
+	SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_vrfhash), SCTP_BASE_INFO(hashvrfmark));
+	SCTP_HASH_FREE(SCTP_BASE_INFO(vrf_ifn_hash), SCTP_BASE_INFO(vrf_ifn_hashmark));
+
+	/*
+	 * free the TIMEWAIT list elements malloc'd in the function
+	 * sctp_add_vtag_to_timewait()...
+	 */
+	for (i = 0; i < SCTP_STACK_VTAG_HASH_SIZE; i++) {
+		chain = &SCTP_BASE_INFO(vtag_timewait)[i];
+		if (!LIST_EMPTY(chain)) {
+			prev_twait_block = NULL;
+			LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
+				if (prev_twait_block) {
+					SCTP_FREE(prev_twait_block, SCTP_M_TIMW);
+				}
+				prev_twait_block = twait_block;
+			}
+			SCTP_FREE(prev_twait_block, SCTP_M_TIMW);
+		}
+	}
+
+	/* free the locks and mutexes */
+#ifdef SCTP_PACKET_LOGGING
+	SCTP_IP_PKTLOG_DESTROY();
+#endif
+	SCTP_IPI_ADDR_DESTROY();
+	SCTP_STATLOG_DESTROY();
+	SCTP_INP_INFO_LOCK_DESTROY();
+
+	SCTP_WQ_ADDR_DESTROY();
+
+	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_ep));
+	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asoc));
+	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_laddr));
+	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_net));
+	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_chunk));
+	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_readq));
+	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_strmoq));
+	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asconf));
+	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asconf_ack));
+	/* Get rid of other stuff to */
+	if (SCTP_BASE_INFO(sctp_asochash) != NULL)
+		SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_asochash), SCTP_BASE_INFO(hashasocmark));
+	if (SCTP_BASE_INFO(sctp_ephash) != NULL)
+		SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_ephash), SCTP_BASE_INFO(hashmark));
+	if (SCTP_BASE_INFO(sctp_tcpephash) != NULL)
+		SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_tcpephash), SCTP_BASE_INFO(hashtcpmark));
+
+}
+
+
+int
+sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
+    int iphlen, int offset, int limit, struct sctphdr *sh,
+    struct sockaddr *altsa)
+{
+	/*
+	 * grub through the INIT pulling addresses and loading them to the
+	 * nets structure in the asoc. The from address in the mbuf should
+	 * also be loaded (if it is not already). This routine can be called
+	 * with either INIT or INIT-ACK's as long as the m points to the IP
+	 * packet and the offset points to the beginning of the parameters.
+	 */
+	struct sctp_inpcb *inp, *l_inp;
+	struct sctp_nets *net, *net_tmp;
+	struct ip *iph;
+	struct sctp_paramhdr *phdr, parm_buf;
+	struct sctp_tcb *stcb_tmp;
+	uint16_t ptype, plen;
+	struct sockaddr *sa;
+	struct sockaddr_storage dest_store;
+	struct sockaddr *local_sa = (struct sockaddr *)&dest_store;
+	struct sockaddr_in sin;
+	struct sockaddr_in6 sin6;
+	uint8_t random_store[SCTP_PARAM_BUFFER_SIZE];
+	struct sctp_auth_random *p_random = NULL;
+	uint16_t random_len = 0;
+	uint8_t hmacs_store[SCTP_PARAM_BUFFER_SIZE];
+	struct sctp_auth_hmac_algo *hmacs = NULL;
+	uint16_t hmacs_len = 0;
+	uint8_t saw_asconf = 0;
+	uint8_t saw_asconf_ack = 0;
+	uint8_t chunks_store[SCTP_PARAM_BUFFER_SIZE];
+	struct sctp_auth_chunk_list *chunks = NULL;
+	uint16_t num_chunks = 0;
+	sctp_key_t *new_key;
+	uint32_t keylen;
+	int got_random = 0, got_hmacs = 0, got_chklist = 0;
+
+	/* First get the destination address setup too. */
+	memset(&sin, 0, sizeof(sin));
+	memset(&sin6, 0, sizeof(sin6));
+
+	sin.sin_family = AF_INET;
+	sin.sin_len = sizeof(sin);
+	sin.sin_port = stcb->rport;
+
+	sin6.sin6_family = AF_INET6;
+	sin6.sin6_len = sizeof(struct sockaddr_in6);
+	sin6.sin6_port = stcb->rport;
+	if (altsa == NULL) {
+		iph = mtod(m, struct ip *);
+		switch (iph->ip_v) {
+		case IPVERSION:
+			{
+				/* its IPv4 */
+				struct sockaddr_in *sin_2;
+
+				sin_2 = (struct sockaddr_in *)(local_sa);
+				memset(sin_2, 0, sizeof(sin));
+				sin_2->sin_family = AF_INET;
+				sin_2->sin_len = sizeof(sin);
+				sin_2->sin_port = sh->dest_port;
+				sin_2->sin_addr.s_addr = iph->ip_dst.s_addr;
+				sin.sin_addr = iph->ip_src;
+				sa = (struct sockaddr *)&sin;
+				break;
+			}
+#ifdef INET6
+		case IPV6_VERSION >> 4:
+			{
+				/* its IPv6 */
+				struct ip6_hdr *ip6;
+				struct sockaddr_in6 *sin6_2;
+
+				ip6 = mtod(m, struct ip6_hdr *);
+				sin6_2 = (struct sockaddr_in6 *)(local_sa);
+				memset(sin6_2, 0, sizeof(sin6));
+				sin6_2->sin6_family = AF_INET6;
+				sin6_2->sin6_len = sizeof(struct sockaddr_in6);
+				sin6_2->sin6_port = sh->dest_port;
+				sin6.sin6_addr = ip6->ip6_src;
+				sa = (struct sockaddr *)&sin6;
+				break;
+			}
+#endif
+		default:
+			return (-1);
+			break;
+		}
+	} else {
+		/*
+		 * For cookies we use the src address NOT from the packet
+		 * but from the original INIT
+		 */
+		sa = altsa;
+	}
+	/* Turn off ECN until we get through all params */
+	stcb->asoc.ecn_allowed = 0;
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		/* mark all addresses that we have currently on the list */
+		net->dest_state |= SCTP_ADDR_NOT_IN_ASSOC;
+	}
+	/* does the source address already exist? if so skip it */
+	l_inp = inp = stcb->sctp_ep;
+
+	atomic_add_int(&stcb->asoc.refcnt, 1);
+	stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net_tmp, local_sa, stcb);
+	atomic_add_int(&stcb->asoc.refcnt, -1);
+
+	if ((stcb_tmp == NULL && inp == stcb->sctp_ep) || inp == NULL) {
+		/* we must add the source address */
+		/* no scope set here since we have a tcb already. */
+		if ((sa->sa_family == AF_INET) &&
+		    (stcb->asoc.ipv4_addr_legal)) {
+			if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_2)) {
+				return (-1);
+			}
+		} else if ((sa->sa_family == AF_INET6) &&
+		    (stcb->asoc.ipv6_addr_legal)) {
+			if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_3)) {
+				return (-2);
+			}
+		}
+	} else {
+		if (net_tmp != NULL && stcb_tmp == stcb) {
+			net_tmp->dest_state &= ~SCTP_ADDR_NOT_IN_ASSOC;
+		} else if (stcb_tmp != stcb) {
+			/* It belongs to another association? */
+			if (stcb_tmp)
+				SCTP_TCB_UNLOCK(stcb_tmp);
+			return (-3);
+		}
+	}
+	if (stcb->asoc.state == 0) {
+		/* the assoc was freed? */
+		return (-4);
+	}
+	/*
+	 * peer must explicitly turn this on. This may have been initialized
+	 * to be "on" in order to allow local addr changes while INIT's are
+	 * in flight.
+	 */
+	stcb->asoc.peer_supports_asconf = 0;
+	/* now we must go through each of the params. */
+	phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf));
+	while (phdr) {
+		ptype = ntohs(phdr->param_type);
+		plen = ntohs(phdr->param_length);
+		/*
+		 * printf("ptype => %0x, plen => %d\n", (uint32_t)ptype,
+		 * (int)plen);
+		 */
+		if (offset + plen > limit) {
+			break;
+		}
+		if (plen == 0) {
+			break;
+		}
+		if (ptype == SCTP_IPV4_ADDRESS) {
+			if (stcb->asoc.ipv4_addr_legal) {
+				struct sctp_ipv4addr_param *p4, p4_buf;
+
+				/* ok get the v4 address and check/add */
+				phdr = sctp_get_next_param(m, offset,
+				    (struct sctp_paramhdr *)&p4_buf,
+				    sizeof(p4_buf));
+				if (plen != sizeof(struct sctp_ipv4addr_param) ||
+				    phdr == NULL) {
+					return (-5);
+				}
+				p4 = (struct sctp_ipv4addr_param *)phdr;
+				sin.sin_addr.s_addr = p4->addr;
+				if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
+					/* Skip multi-cast addresses */
+					goto next_param;
+				}
+				if ((sin.sin_addr.s_addr == INADDR_BROADCAST) ||
+				    (sin.sin_addr.s_addr == INADDR_ANY)) {
+					goto next_param;
+				}
+				sa = (struct sockaddr *)&sin;
+				inp = stcb->sctp_ep;
+				atomic_add_int(&stcb->asoc.refcnt, 1);
+				stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net,
+				    local_sa, stcb);
+				atomic_add_int(&stcb->asoc.refcnt, -1);
+
+				if ((stcb_tmp == NULL && inp == stcb->sctp_ep) ||
+				    inp == NULL) {
+					/* we must add the source address */
+					/*
+					 * no scope set since we have a tcb
+					 * already
+					 */
+
+					/*
+					 * we must validate the state again
+					 * here
+					 */
+			add_it_now:
+					if (stcb->asoc.state == 0) {
+						/* the assoc was freed? */
+						return (-7);
+					}
+					if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_4)) {
+						return (-8);
+					}
+				} else if (stcb_tmp == stcb) {
+					if (stcb->asoc.state == 0) {
+						/* the assoc was freed? */
+						return (-10);
+					}
+					if (net != NULL) {
+						/* clear flag */
+						net->dest_state &=
+						    ~SCTP_ADDR_NOT_IN_ASSOC;
+					}
+				} else {
+					/*
+					 * strange, address is in another
+					 * assoc? straighten out locks.
+					 */
+					if (stcb_tmp) {
+						if (SCTP_GET_STATE(&stcb_tmp->asoc) & SCTP_STATE_COOKIE_WAIT) {
+							/*
+							 * in setup state we
+							 * abort this guy
+							 */
+							sctp_abort_an_association(stcb_tmp->sctp_ep,
+							    stcb_tmp, 1, NULL, 0);
+							goto add_it_now;
+						}
+						SCTP_TCB_UNLOCK(stcb_tmp);
+					}
+					if (stcb->asoc.state == 0) {
+						/* the assoc was freed? */
+						return (-12);
+					}
+					return (-13);
+				}
+			}
+		} else if (ptype == SCTP_IPV6_ADDRESS) {
+			if (stcb->asoc.ipv6_addr_legal) {
+				/* ok get the v6 address and check/add */
+				struct sctp_ipv6addr_param *p6, p6_buf;
+
+				phdr = sctp_get_next_param(m, offset,
+				    (struct sctp_paramhdr *)&p6_buf,
+				    sizeof(p6_buf));
+				if (plen != sizeof(struct sctp_ipv6addr_param) ||
+				    phdr == NULL) {
+					return (-14);
+				}
+				p6 = (struct sctp_ipv6addr_param *)phdr;
+				memcpy((caddr_t)&sin6.sin6_addr, p6->addr,
+				    sizeof(p6->addr));
+				if (IN6_IS_ADDR_MULTICAST(&sin6.sin6_addr)) {
+					/* Skip multi-cast addresses */
+					goto next_param;
+				}
+				if (IN6_IS_ADDR_LINKLOCAL(&sin6.sin6_addr)) {
+					/*
+					 * Link local make no sense without
+					 * scope
+					 */
+					goto next_param;
+				}
+				sa = (struct sockaddr *)&sin6;
+				inp = stcb->sctp_ep;
+				atomic_add_int(&stcb->asoc.refcnt, 1);
+				stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net,
+				    local_sa, stcb);
+				atomic_add_int(&stcb->asoc.refcnt, -1);
+				if (stcb_tmp == NULL &&
+				    (inp == stcb->sctp_ep || inp == NULL)) {
+					/*
+					 * we must validate the state again
+					 * here
+					 */
+			add_it_now6:
+					if (stcb->asoc.state == 0) {
+						/* the assoc was freed? */
+						return (-16);
+					}
+					/*
+					 * we must add the address, no scope
+					 * set
+					 */
+					if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_5)) {
+						return (-17);
+					}
+				} else if (stcb_tmp == stcb) {
+					/*
+					 * we must validate the state again
+					 * here
+					 */
+					if (stcb->asoc.state == 0) {
+						/* the assoc was freed? */
+						return (-19);
+					}
+					if (net != NULL) {
+						/* clear flag */
+						net->dest_state &=
+						    ~SCTP_ADDR_NOT_IN_ASSOC;
+					}
+				} else {
+					/*
+					 * strange, address is in another
+					 * assoc? straighten out locks.
+					 */
+					if (stcb_tmp)
+						if (SCTP_GET_STATE(&stcb_tmp->asoc) & SCTP_STATE_COOKIE_WAIT) {
+							/*
+							 * in setup state we
+							 * abort this guy
+							 */
+							sctp_abort_an_association(stcb_tmp->sctp_ep,
+							    stcb_tmp, 1, NULL, 0);
+							goto add_it_now6;
+						}
+					SCTP_TCB_UNLOCK(stcb_tmp);
+
+					if (stcb->asoc.state == 0) {
+						/* the assoc was freed? */
+						return (-21);
+					}
+					return (-22);
+				}
+			}
+		} else if (ptype == SCTP_ECN_CAPABLE) {
+			stcb->asoc.ecn_allowed = 1;
+		} else if (ptype == SCTP_ULP_ADAPTATION) {
+			if (stcb->asoc.state != SCTP_STATE_OPEN) {
+				struct sctp_adaptation_layer_indication ai,
+				                                *aip;
+
+				phdr = sctp_get_next_param(m, offset,
+				    (struct sctp_paramhdr *)&ai, sizeof(ai));
+				aip = (struct sctp_adaptation_layer_indication *)phdr;
+				if (aip) {
+					stcb->asoc.peers_adaptation = ntohl(aip->indication);
+					stcb->asoc.adaptation_needed = 1;
+				}
+			}
+		} else if (ptype == SCTP_SET_PRIM_ADDR) {
+			struct sctp_asconf_addr_param lstore, *fee;
+			struct sctp_asconf_addrv4_param *fii;
+			int lptype;
+			struct sockaddr *lsa = NULL;
+
+			stcb->asoc.peer_supports_asconf = 1;
+			if (plen > sizeof(lstore)) {
+				return (-23);
+			}
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)&lstore,
+			    min(plen, sizeof(lstore)));
+			if (phdr == NULL) {
+				return (-24);
+			}
+			fee = (struct sctp_asconf_addr_param *)phdr;
+			lptype = ntohs(fee->addrp.ph.param_type);
+			if (lptype == SCTP_IPV4_ADDRESS) {
+				if (plen !=
+				    sizeof(struct sctp_asconf_addrv4_param)) {
+					SCTP_PRINTF("Sizeof setprim in init/init ack not %d but %d - ignored\n",
+					    (int)sizeof(struct sctp_asconf_addrv4_param),
+					    plen);
+				} else {
+					fii = (struct sctp_asconf_addrv4_param *)fee;
+					sin.sin_addr.s_addr = fii->addrp.addr;
+					lsa = (struct sockaddr *)&sin;
+				}
+			} else if (lptype == SCTP_IPV6_ADDRESS) {
+				if (plen !=
+				    sizeof(struct sctp_asconf_addr_param)) {
+					SCTP_PRINTF("Sizeof setprim (v6) in init/init ack not %d but %d - ignored\n",
+					    (int)sizeof(struct sctp_asconf_addr_param),
+					    plen);
+				} else {
+					memcpy(sin6.sin6_addr.s6_addr,
+					    fee->addrp.addr,
+					    sizeof(fee->addrp.addr));
+					lsa = (struct sockaddr *)&sin6;
+				}
+			}
+			if (lsa) {
+				(void)sctp_set_primary_addr(stcb, sa, NULL);
+			}
+		} else if (ptype == SCTP_HAS_NAT_SUPPORT) {
+			stcb->asoc.peer_supports_nat = 1;
+		} else if (ptype == SCTP_PRSCTP_SUPPORTED) {
+			/* Peer supports pr-sctp */
+			stcb->asoc.peer_supports_prsctp = 1;
+		} else if (ptype == SCTP_SUPPORTED_CHUNK_EXT) {
+			/* A supported extension chunk */
+			struct sctp_supported_chunk_types_param *pr_supported;
+			uint8_t local_store[SCTP_PARAM_BUFFER_SIZE];
+			int num_ent, i;
+
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)&local_store, min(sizeof(local_store), plen));
+			if (phdr == NULL) {
+				return (-25);
+			}
+			stcb->asoc.peer_supports_asconf = 0;
+			stcb->asoc.peer_supports_prsctp = 0;
+			stcb->asoc.peer_supports_pktdrop = 0;
+			stcb->asoc.peer_supports_strreset = 0;
+			stcb->asoc.peer_supports_nr_sack = 0;
+			stcb->asoc.peer_supports_auth = 0;
+			pr_supported = (struct sctp_supported_chunk_types_param *)phdr;
+			num_ent = plen - sizeof(struct sctp_paramhdr);
+			for (i = 0; i < num_ent; i++) {
+				switch (pr_supported->chunk_types[i]) {
+				case SCTP_ASCONF:
+				case SCTP_ASCONF_ACK:
+					stcb->asoc.peer_supports_asconf = 1;
+					break;
+				case SCTP_FORWARD_CUM_TSN:
+					stcb->asoc.peer_supports_prsctp = 1;
+					break;
+				case SCTP_PACKET_DROPPED:
+					stcb->asoc.peer_supports_pktdrop = 1;
+					break;
+				case SCTP_NR_SELECTIVE_ACK:
+					stcb->asoc.peer_supports_nr_sack = 1;
+					break;
+				case SCTP_STREAM_RESET:
+					stcb->asoc.peer_supports_strreset = 1;
+					break;
+				case SCTP_AUTHENTICATION:
+					stcb->asoc.peer_supports_auth = 1;
+					break;
+				default:
+					/* one I have not learned yet */
+					break;
+
+				}
+			}
+		} else if (ptype == SCTP_ECN_NONCE_SUPPORTED) {
+			/* Peer supports ECN-nonce */
+			stcb->asoc.peer_supports_ecn_nonce = 1;
+			stcb->asoc.ecn_nonce_allowed = 1;
+		} else if (ptype == SCTP_RANDOM) {
+			if (plen > sizeof(random_store))
+				break;
+			if (got_random) {
+				/* already processed a RANDOM */
+				goto next_param;
+			}
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)random_store,
+			    min(sizeof(random_store), plen));
+			if (phdr == NULL)
+				return (-26);
+			p_random = (struct sctp_auth_random *)phdr;
+			random_len = plen - sizeof(*p_random);
+			/* enforce the random length */
+			if (random_len != SCTP_AUTH_RANDOM_SIZE_REQUIRED) {
+				SCTPDBG(SCTP_DEBUG_AUTH1, "SCTP: invalid RANDOM len\n");
+				return (-27);
+			}
+			got_random = 1;
+		} else if (ptype == SCTP_HMAC_LIST) {
+			int num_hmacs;
+			int i;
+
+			if (plen > sizeof(hmacs_store))
+				break;
+			if (got_hmacs) {
+				/* already processed a HMAC list */
+				goto next_param;
+			}
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)hmacs_store,
+			    min(plen, sizeof(hmacs_store)));
+			if (phdr == NULL)
+				return (-28);
+			hmacs = (struct sctp_auth_hmac_algo *)phdr;
+			hmacs_len = plen - sizeof(*hmacs);
+			num_hmacs = hmacs_len / sizeof(hmacs->hmac_ids[0]);
+			/* validate the hmac list */
+			if (sctp_verify_hmac_param(hmacs, num_hmacs)) {
+				return (-29);
+			}
+			if (stcb->asoc.peer_hmacs != NULL)
+				sctp_free_hmaclist(stcb->asoc.peer_hmacs);
+			stcb->asoc.peer_hmacs = sctp_alloc_hmaclist(num_hmacs);
+			if (stcb->asoc.peer_hmacs != NULL) {
+				for (i = 0; i < num_hmacs; i++) {
+					(void)sctp_auth_add_hmacid(stcb->asoc.peer_hmacs,
+					    ntohs(hmacs->hmac_ids[i]));
+				}
+			}
+			got_hmacs = 1;
+		} else if (ptype == SCTP_CHUNK_LIST) {
+			int i;
+
+			if (plen > sizeof(chunks_store))
+				break;
+			if (got_chklist) {
+				/* already processed a Chunks list */
+				goto next_param;
+			}
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)chunks_store,
+			    min(plen, sizeof(chunks_store)));
+			if (phdr == NULL)
+				return (-30);
+			chunks = (struct sctp_auth_chunk_list *)phdr;
+			num_chunks = plen - sizeof(*chunks);
+			if (stcb->asoc.peer_auth_chunks != NULL)
+				sctp_clear_chunklist(stcb->asoc.peer_auth_chunks);
+			else
+				stcb->asoc.peer_auth_chunks = sctp_alloc_chunklist();
+			for (i = 0; i < num_chunks; i++) {
+				(void)sctp_auth_add_chunk(chunks->chunk_types[i],
+				    stcb->asoc.peer_auth_chunks);
+				/* record asconf/asconf-ack if listed */
+				if (chunks->chunk_types[i] == SCTP_ASCONF)
+					saw_asconf = 1;
+				if (chunks->chunk_types[i] == SCTP_ASCONF_ACK)
+					saw_asconf_ack = 1;
+
+			}
+			got_chklist = 1;
+		} else if ((ptype == SCTP_HEARTBEAT_INFO) ||
+			    (ptype == SCTP_STATE_COOKIE) ||
+			    (ptype == SCTP_UNRECOG_PARAM) ||
+			    (ptype == SCTP_COOKIE_PRESERVE) ||
+			    (ptype == SCTP_SUPPORTED_ADDRTYPE) ||
+			    (ptype == SCTP_ADD_IP_ADDRESS) ||
+			    (ptype == SCTP_DEL_IP_ADDRESS) ||
+			    (ptype == SCTP_ERROR_CAUSE_IND) ||
+		    (ptype == SCTP_SUCCESS_REPORT)) {
+			 /* don't care */ ;
+		} else {
+			if ((ptype & 0x8000) == 0x0000) {
+				/*
+				 * must stop processing the rest of the
+				 * param's. Any report bits were handled
+				 * with the call to
+				 * sctp_arethere_unrecognized_parameters()
+				 * when the INIT or INIT-ACK was first seen.
+				 */
+				break;
+			}
+		}
+
+next_param:
+		offset += SCTP_SIZE32(plen);
+		if (offset >= limit) {
+			break;
+		}
+		phdr = sctp_get_next_param(m, offset, &parm_buf,
+		    sizeof(parm_buf));
+	}
+	/* Now check to see if we need to purge any addresses */
+	for (net = TAILQ_FIRST(&stcb->asoc.nets); net != NULL; net = net_tmp) {
+		net_tmp = TAILQ_NEXT(net, sctp_next);
+		if ((net->dest_state & SCTP_ADDR_NOT_IN_ASSOC) ==
+		    SCTP_ADDR_NOT_IN_ASSOC) {
+			/* This address has been removed from the asoc */
+			/* remove and free it */
+			stcb->asoc.numnets--;
+			TAILQ_REMOVE(&stcb->asoc.nets, net, sctp_next);
+			sctp_free_remote_addr(net);
+			if (net == stcb->asoc.primary_destination) {
+				stcb->asoc.primary_destination = NULL;
+				sctp_select_primary_destination(stcb);
+			}
+		}
+	}
+	/* validate authentication required parameters */
+	if (got_random && got_hmacs) {
+		stcb->asoc.peer_supports_auth = 1;
+	} else {
+		stcb->asoc.peer_supports_auth = 0;
+	}
+	if (!stcb->asoc.peer_supports_auth && got_chklist) {
+		/* peer does not support auth but sent a chunks list? */
+		return (-31);
+	}
+	if (!SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk) && stcb->asoc.peer_supports_asconf &&
+	    !stcb->asoc.peer_supports_auth) {
+		/* peer supports asconf but not auth? */
+		return (-32);
+	} else if ((stcb->asoc.peer_supports_asconf) && (stcb->asoc.peer_supports_auth) &&
+	    ((saw_asconf == 0) || (saw_asconf_ack == 0))) {
+		return (-33);
+	}
+	/* concatenate the full random key */
+	keylen = sizeof(*p_random) + random_len + sizeof(*hmacs) + hmacs_len;
+	if (chunks != NULL) {
+		keylen += sizeof(*chunks) + num_chunks;
+	}
+	new_key = sctp_alloc_key(keylen);
+	if (new_key != NULL) {
+		/* copy in the RANDOM */
+		if (p_random != NULL) {
+			keylen = sizeof(*p_random) + random_len;
+			bcopy(p_random, new_key->key, keylen);
+		}
+		/* append in the AUTH chunks */
+		if (chunks != NULL) {
+			bcopy(chunks, new_key->key + keylen,
+			    sizeof(*chunks) + num_chunks);
+			keylen += sizeof(*chunks) + num_chunks;
+		}
+		/* append in the HMACs */
+		if (hmacs != NULL) {
+			bcopy(hmacs, new_key->key + keylen,
+			    sizeof(*hmacs) + hmacs_len);
+		}
+	} else {
+		/* failed to get memory for the key */
+		return (-34);
+	}
+	if (stcb->asoc.authinfo.peer_random != NULL)
+		sctp_free_key(stcb->asoc.authinfo.peer_random);
+	stcb->asoc.authinfo.peer_random = new_key;
+	sctp_clear_cachedkeys(stcb, stcb->asoc.authinfo.assoc_keyid);
+	sctp_clear_cachedkeys(stcb, stcb->asoc.authinfo.recv_keyid);
+
+	return (0);
+}
+
+int
+sctp_set_primary_addr(struct sctp_tcb *stcb, struct sockaddr *sa,
+    struct sctp_nets *net)
+{
+	/* make sure the requested primary address exists in the assoc */
+	if (net == NULL && sa)
+		net = sctp_findnet(stcb, sa);
+
+	if (net == NULL) {
+		/* didn't find the requested primary address! */
+		return (-1);
+	} else {
+		/* set the primary address */
+		if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
+			/* Must be confirmed, so queue to set */
+			net->dest_state |= SCTP_ADDR_REQ_PRIMARY;
+			return (0);
+		}
+		stcb->asoc.primary_destination = net;
+		net->dest_state &= ~SCTP_ADDR_WAS_PRIMARY;
+		net = TAILQ_FIRST(&stcb->asoc.nets);
+		if (net != stcb->asoc.primary_destination) {
+			/*
+			 * first one on the list is NOT the primary
+			 * sctp_cmpaddr() is much more efficient if the
+			 * primary is the first on the list, make it so.
+			 */
+			TAILQ_REMOVE(&stcb->asoc.nets, stcb->asoc.primary_destination, sctp_next);
+			TAILQ_INSERT_HEAD(&stcb->asoc.nets, stcb->asoc.primary_destination, sctp_next);
+		}
+		return (0);
+	}
+}
+
+int
+sctp_is_vtag_good(struct sctp_inpcb *inp, uint32_t tag, uint16_t lport, uint16_t rport, struct timeval *now, int save_in_twait)
+{
+	/*
+	 * This function serves two purposes. It will see if a TAG can be
+	 * re-used and return 1 for yes it is ok and 0 for don't use that
+	 * tag. A secondary function it will do is purge out old tags that
+	 * can be removed.
+	 */
+	struct sctpvtaghead *chain;
+	struct sctp_tagblock *twait_block;
+	struct sctpasochead *head;
+	struct sctp_tcb *stcb;
+	int i;
+
+	SCTP_INP_INFO_RLOCK();
+	head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(tag,
+	    SCTP_BASE_INFO(hashasocmark))];
+	if (head == NULL) {
+		/* invalid vtag */
+		goto skip_vtag_check;
+	}
+	LIST_FOREACH(stcb, head, sctp_asocs) {
+		/*
+		 * We choose not to lock anything here. TCB's can't be
+		 * removed since we have the read lock, so they can't be
+		 * freed on us, same thing for the INP. I may be wrong with
+		 * this assumption, but we will go with it for now :-)
+		 */
+		if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+			continue;
+		}
+		if (stcb->asoc.my_vtag == tag) {
+			/* candidate */
+			if (stcb->rport != rport) {
+				continue;
+			}
+			if (stcb->sctp_ep->sctp_lport != lport) {
+				continue;
+			}
+			/* Its a used tag set */
+			SCTP_INP_INFO_RUNLOCK();
+			return (0);
+		}
+	}
+skip_vtag_check:
+
+	chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
+	/* Now what about timed wait ? */
+	if (!LIST_EMPTY(chain)) {
+		/*
+		 * Block(s) are present, lets see if we have this tag in the
+		 * list
+		 */
+		LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
+			for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+				if (twait_block->vtag_block[i].v_tag == 0) {
+					/* not used */
+					continue;
+				} else if ((long)twait_block->vtag_block[i].tv_sec_at_expire <
+				    now->tv_sec) {
+					/* Audit expires this guy */
+					twait_block->vtag_block[i].tv_sec_at_expire = 0;
+					twait_block->vtag_block[i].v_tag = 0;
+					twait_block->vtag_block[i].lport = 0;
+					twait_block->vtag_block[i].rport = 0;
+				} else if ((twait_block->vtag_block[i].v_tag == tag) &&
+					    (twait_block->vtag_block[i].lport == lport) &&
+				    (twait_block->vtag_block[i].rport == rport)) {
+					/* Bad tag, sorry :< */
+					SCTP_INP_INFO_RUNLOCK();
+					return (0);
+				}
+			}
+		}
+	}
+	SCTP_INP_INFO_RUNLOCK();
+	return (1);
+}
+
+
+static sctp_assoc_t reneged_asoc_ids[256];
+static uint8_t reneged_at = 0;
+
+
+static void
+sctp_drain_mbufs(struct sctp_inpcb *inp, struct sctp_tcb *stcb)
+{
+	/*
+	 * We must hunt this association for MBUF's past the cumack (i.e.
+	 * out of order data that we can renege on).
+	 */
+	struct sctp_association *asoc;
+	struct sctp_tmit_chunk *chk, *nchk;
+	uint32_t cumulative_tsn_p1;
+	struct sctp_queued_to_read *ctl, *nctl;
+	int cnt, strmat;
+	uint32_t gap, i;
+	int fnd = 0;
+
+	/* We look for anything larger than the cum-ack + 1 */
+
+	asoc = &stcb->asoc;
+	if (asoc->cumulative_tsn == asoc->highest_tsn_inside_map) {
+		/* none we can reneg on. */
+		return;
+	}
+	SCTP_STAT_INCR(sctps_protocol_drains_done);
+	cumulative_tsn_p1 = asoc->cumulative_tsn + 1;
+	cnt = 0;
+	/* First look in the re-assembly queue */
+	chk = TAILQ_FIRST(&asoc->reasmqueue);
+	while (chk) {
+		/* Get the next one */
+		nchk = TAILQ_NEXT(chk, sctp_next);
+		if (compare_with_wrap(chk->rec.data.TSN_seq,
+		    cumulative_tsn_p1, MAX_TSN)) {
+			/* Yep it is above cum-ack */
+			cnt++;
+			SCTP_CALC_TSN_TO_GAP(gap, chk->rec.data.TSN_seq, asoc->mapping_array_base_tsn);
+			asoc->size_on_reasm_queue = sctp_sbspace_sub(asoc->size_on_reasm_queue, chk->send_size);
+			sctp_ucount_decr(asoc->cnt_on_reasm_queue);
+			SCTP_UNSET_TSN_PRESENT(asoc->mapping_array, gap);
+			TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			sctp_free_a_chunk(stcb, chk);
+		}
+		chk = nchk;
+	}
+	/* Ok that was fun, now we will drain all the inbound streams? */
+	for (strmat = 0; strmat < asoc->streamincnt; strmat++) {
+		ctl = TAILQ_FIRST(&asoc->strmin[strmat].inqueue);
+		while (ctl) {
+			nctl = TAILQ_NEXT(ctl, next);
+			if (compare_with_wrap(ctl->sinfo_tsn,
+			    cumulative_tsn_p1, MAX_TSN)) {
+				/* Yep it is above cum-ack */
+				cnt++;
+				SCTP_CALC_TSN_TO_GAP(gap, ctl->sinfo_tsn, asoc->mapping_array_base_tsn);
+				asoc->size_on_all_streams = sctp_sbspace_sub(asoc->size_on_all_streams, ctl->length);
+				sctp_ucount_decr(asoc->cnt_on_all_streams);
+				SCTP_UNSET_TSN_PRESENT(asoc->mapping_array, gap);
+				TAILQ_REMOVE(&asoc->strmin[strmat].inqueue, ctl, next);
+				if (ctl->data) {
+					sctp_m_freem(ctl->data);
+					ctl->data = NULL;
+				}
+				sctp_free_remote_addr(ctl->whoFrom);
+				SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), ctl);
+				SCTP_DECR_READQ_COUNT();
+			}
+			ctl = nctl;
+		}
+	}
+	if (cnt) {
+		/* We must back down to see what the new highest is */
+		for (i = asoc->highest_tsn_inside_map;
+		    (compare_with_wrap(i, asoc->mapping_array_base_tsn, MAX_TSN) || (i == asoc->mapping_array_base_tsn));
+		    i--) {
+			SCTP_CALC_TSN_TO_GAP(gap, i, asoc->mapping_array_base_tsn);
+			if (SCTP_IS_TSN_PRESENT(asoc->mapping_array, gap)) {
+				asoc->highest_tsn_inside_map = i;
+				fnd = 1;
+				break;
+			}
+		}
+		if (!fnd) {
+			asoc->highest_tsn_inside_map = asoc->mapping_array_base_tsn - 1;
+		}
+		/*
+		 * Question, should we go through the delivery queue? The
+		 * only reason things are on here is the app not reading OR
+		 * a p-d-api up. An attacker COULD send enough in to
+		 * initiate the PD-API and then send a bunch of stuff to
+		 * other streams... these would wind up on the delivery
+		 * queue.. and then we would not get to them. But in order
+		 * to do this I then have to back-track and un-deliver
+		 * sequence numbers in streams.. el-yucko. I think for now
+		 * we will NOT look at the delivery queue and leave it to be
+		 * something to consider later. An alternative would be to
+		 * abort the P-D-API with a notification and then deliver
+		 * the data.... Or another method might be to keep track of
+		 * how many times the situation occurs and if we see a
+		 * possible attack underway just abort the association.
+		 */
+#ifdef SCTP_DEBUG
+		SCTPDBG(SCTP_DEBUG_PCB1, "Freed %d chunks from reneg harvest\n", cnt);
+#endif
+		/*
+		 * Now do we need to find a new
+		 * asoc->highest_tsn_inside_map?
+		 */
+		asoc->last_revoke_count = cnt;
+		(void)SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer);
+		/* sa_ignore NO_NULL_CHK */
+		sctp_send_sack(stcb);
+		sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_DRAIN, SCTP_SO_NOT_LOCKED);
+		reneged_asoc_ids[reneged_at] = sctp_get_associd(stcb);
+		reneged_at++;
+	}
+	/*
+	 * Another issue, in un-setting the TSN's in the mapping array we
+	 * DID NOT adjust the highest_tsn marker.  This will cause one of
+	 * two things to occur. It may cause us to do extra work in checking
+	 * for our mapping array movement. More importantly it may cause us
+	 * to SACK every datagram. This may not be a bad thing though since
+	 * we will recover once we get our cum-ack above and all this stuff
+	 * we dumped recovered.
+	 */
+}
+
+void
+sctp_drain()
+{
+	/*
+	 * We must walk the PCB lists for ALL associations here. The system
+	 * is LOW on MBUF's and needs help. This is where reneging will
+	 * occur. We really hope this does NOT happen!
+	 */
+	VNET_ITERATOR_DECL(vnet_iter);
+	VNET_LIST_RLOCK_NOSLEEP();
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter);
+		struct sctp_inpcb *inp;
+		struct sctp_tcb *stcb;
+
+		SCTP_STAT_INCR(sctps_protocol_drain_calls);
+		if (SCTP_BASE_SYSCTL(sctp_do_drain) == 0) {
+#ifdef VIMAGE
+			continue;
+#else
+			return;
+#endif
+		}
+		SCTP_INP_INFO_RLOCK();
+		LIST_FOREACH(inp, &SCTP_BASE_INFO(listhead), sctp_list) {
+			/* For each endpoint */
+			SCTP_INP_RLOCK(inp);
+			LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+				/* For each association */
+				SCTP_TCB_LOCK(stcb);
+				sctp_drain_mbufs(inp, stcb);
+				SCTP_TCB_UNLOCK(stcb);
+			}
+			SCTP_INP_RUNLOCK(inp);
+		}
+		SCTP_INP_INFO_RUNLOCK();
+		CURVNET_RESTORE();
+	}
+	VNET_LIST_RUNLOCK_NOSLEEP();
+}
+
+/*
+ * start a new iterator
+ * iterates through all endpoints and associations based on the pcb_state
+ * flags and asoc_state.  "af" (mandatory) is executed for all matching
+ * assocs and "ef" (optional) is executed when the iterator completes.
+ * "inpf" (optional) is executed for each new endpoint as it is being
+ * iterated through. inpe (optional) is called when the inp completes
+ * its way through all the stcbs.
+ */
+int
+sctp_initiate_iterator(inp_func inpf,
+    asoc_func af,
+    inp_func inpe,
+    uint32_t pcb_state,
+    uint32_t pcb_features,
+    uint32_t asoc_state,
+    void *argp,
+    uint32_t argi,
+    end_func ef,
+    struct sctp_inpcb *s_inp,
+    uint8_t chunk_output_off)
+{
+	struct sctp_iterator *it = NULL;
+
+	if (af == NULL) {
+		return (-1);
+	}
+	SCTP_MALLOC(it, struct sctp_iterator *, sizeof(struct sctp_iterator),
+	    SCTP_M_ITER);
+	if (it == NULL) {
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOMEM);
+		return (ENOMEM);
+	}
+	memset(it, 0, sizeof(*it));
+	it->function_assoc = af;
+	it->function_inp = inpf;
+	if (inpf)
+		it->done_current_ep = 0;
+	else
+		it->done_current_ep = 1;
+	it->function_atend = ef;
+	it->pointer = argp;
+	it->val = argi;
+	it->pcb_flags = pcb_state;
+	it->pcb_features = pcb_features;
+	it->asoc_state = asoc_state;
+	it->function_inp_end = inpe;
+	it->no_chunk_output = chunk_output_off;
+	it->vn = curvnet;
+	if (s_inp) {
+		/* Assume lock is held here */
+		it->inp = s_inp;
+		SCTP_INP_INCR_REF(it->inp);
+		it->iterator_flags = SCTP_ITERATOR_DO_SINGLE_INP;
+	} else {
+		SCTP_INP_INFO_RLOCK();
+		it->inp = LIST_FIRST(&SCTP_BASE_INFO(listhead));
+		if (it->inp) {
+			SCTP_INP_INCR_REF(it->inp);
+		}
+		SCTP_INP_INFO_RUNLOCK();
+		it->iterator_flags = SCTP_ITERATOR_DO_ALL_INP;
+
+	}
+	SCTP_IPI_ITERATOR_WQ_LOCK();
+
+	TAILQ_INSERT_TAIL(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr);
+	if (sctp_it_ctl.iterator_running == 0) {
+		sctp_wakeup_iterator();
+	}
+	SCTP_IPI_ITERATOR_WQ_UNLOCK();
+	/* sa_ignore MEMLEAK {memory is put on the tailq for the iterator} */
+	return (0);
+}
diff --git a/freebsd/sys/netinet/sctp_pcb.h b/freebsd/sys/netinet/sctp_pcb.h
new file mode 100644
index 00000000..a4f4d30c
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_pcb.h
@@ -0,0 +1,632 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_pcb.h,v 1.21 2005/07/16 01:18:47 suz Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifndef __sctp_pcb_h__
+#define __sctp_pcb_h__
+
+#include <freebsd/netinet/sctp_os.h>
+#include <freebsd/netinet/sctp.h>
+#include <freebsd/netinet/sctp_constants.h>
+#include <freebsd/netinet/sctp_sysctl.h>
+
+LIST_HEAD(sctppcbhead, sctp_inpcb);
+LIST_HEAD(sctpasochead, sctp_tcb);
+LIST_HEAD(sctpladdr, sctp_laddr);
+LIST_HEAD(sctpvtaghead, sctp_tagblock);
+LIST_HEAD(sctp_vrflist, sctp_vrf);
+LIST_HEAD(sctp_ifnlist, sctp_ifn);
+LIST_HEAD(sctp_ifalist, sctp_ifa);
+TAILQ_HEAD(sctp_readhead, sctp_queued_to_read);
+TAILQ_HEAD(sctp_streamhead, sctp_stream_queue_pending);
+
+#include <freebsd/netinet/sctp_structs.h>
+#include <freebsd/netinet/sctp_auth.h>
+
+#define SCTP_PCBHASH_ALLADDR(port, mask) (port & mask)
+#define SCTP_PCBHASH_ASOC(tag, mask) (tag & mask)
+
+struct sctp_vrf {
+	LIST_ENTRY(sctp_vrf) next_vrf;
+	struct sctp_ifalist *vrf_addr_hash;
+	struct sctp_ifnlist ifnlist;
+	uint32_t vrf_id;
+	uint32_t tbl_id_v4;	/* default v4 table id */
+	uint32_t tbl_id_v6;	/* default v6 table id */
+	uint32_t total_ifa_count;
+	u_long vrf_addr_hashmark;
+	uint32_t refcount;
+};
+
+struct sctp_ifn {
+	struct sctp_ifalist ifalist;
+	struct sctp_vrf *vrf;
+	         LIST_ENTRY(sctp_ifn) next_ifn;
+	         LIST_ENTRY(sctp_ifn) next_bucket;
+	void *ifn_p;		/* never access without appropriate lock */
+	uint32_t ifn_mtu;
+	uint32_t ifn_type;
+	uint32_t ifn_index;	/* shorthand way to look at ifn for reference */
+	uint32_t refcount;	/* number of reference held should be >=
+				 * ifa_count */
+	uint32_t ifa_count;	/* IFA's we hold (in our list - ifalist) */
+	uint32_t num_v6;	/* number of v6 addresses */
+	uint32_t num_v4;	/* number of v4 addresses */
+	uint32_t registered_af;	/* registered address family for i/f events */
+	char ifn_name[SCTP_IFNAMSIZ];
+};
+
+/* SCTP local IFA flags */
+#define SCTP_ADDR_VALID         0x00000001	/* its up and active */
+#define SCTP_BEING_DELETED      0x00000002	/* being deleted, when
+						 * refcount = 0. Note that it
+						 * is pulled from the ifn list
+						 * and ifa_p is nulled right
+						 * away but it cannot be freed
+						 * until the last *net
+						 * pointing to it is deleted. */
+#define SCTP_ADDR_DEFER_USE     0x00000004	/* Hold off using this one */
+#define SCTP_ADDR_IFA_UNUSEABLE 0x00000008
+
+struct sctp_ifa {
+	LIST_ENTRY(sctp_ifa) next_ifa;
+	LIST_ENTRY(sctp_ifa) next_bucket;
+	struct sctp_ifn *ifn_p;	/* back pointer to parent ifn */
+	void *ifa;		/* pointer to ifa, needed for flag update for
+				 * that we MUST lock appropriate locks. This
+				 * is for V6. */
+	union sctp_sockstore address;
+	uint32_t refcount;	/* number of folks refering to this */
+	uint32_t flags;
+	uint32_t localifa_flags;
+	uint32_t vrf_id;	/* vrf_id of this addr (for deleting) */
+	uint8_t src_is_loop;
+	uint8_t src_is_priv;
+	uint8_t src_is_glob;
+	uint8_t resv;
+};
+
+struct sctp_laddr {
+	LIST_ENTRY(sctp_laddr) sctp_nxt_addr;	/* next in list */
+	struct sctp_ifa *ifa;
+	uint32_t action;	/* Used during asconf and adding if no-zero
+				 * src-addr selection will not consider this
+				 * address. */
+	struct timeval start_time;	/* time when this address was created */
+};
+
+struct sctp_block_entry {
+	int error;
+};
+
+struct sctp_timewait {
+	uint32_t tv_sec_at_expire;	/* the seconds from boot to expire */
+	uint32_t v_tag;		/* the vtag that can not be reused */
+	uint16_t lport;		/* the local port used in vtag */
+	uint16_t rport;		/* the remote port used in vtag */
+};
+
+struct sctp_tagblock {
+	LIST_ENTRY(sctp_tagblock) sctp_nxt_tagblock;
+	struct sctp_timewait vtag_block[SCTP_NUMBER_IN_VTAG_BLOCK];
+};
+
+
+struct sctp_epinfo {
+	struct socket *udp_tun_socket;
+	struct sctpasochead *sctp_asochash;
+	u_long hashasocmark;
+
+	struct sctppcbhead *sctp_ephash;
+	u_long hashmark;
+
+	/*-
+	 * The TCP model represents a substantial overhead in that we get an
+	 * additional hash table to keep explicit connections in. The
+	 * listening TCP endpoint will exist in the usual ephash above and
+	 * accept only INIT's. It will be incapable of sending off an INIT.
+	 * When a dg arrives we must look in the normal ephash. If we find a
+	 * TCP endpoint that will tell us to go to the specific endpoint
+	 * hash and re-hash to find the right assoc/socket. If we find a UDP
+	 * model socket we then must complete the lookup. If this fails,
+	 * i.e. no association can be found then we must continue to see if
+	 * a sctp_peeloff()'d socket is in the tcpephash (a spun off socket
+	 * acts like a TCP model connected socket).
+	 */
+	struct sctppcbhead *sctp_tcpephash;
+	u_long hashtcpmark;
+	uint32_t hashtblsize;
+
+	struct sctp_vrflist *sctp_vrfhash;
+	u_long hashvrfmark;
+
+	struct sctp_ifnlist *vrf_ifn_hash;
+	u_long vrf_ifn_hashmark;
+
+	struct sctppcbhead listhead;
+	struct sctpladdr addr_wq;
+
+	/* ep zone info */
+	sctp_zone_t ipi_zone_ep;
+	sctp_zone_t ipi_zone_asoc;
+	sctp_zone_t ipi_zone_laddr;
+	sctp_zone_t ipi_zone_net;
+	sctp_zone_t ipi_zone_chunk;
+	sctp_zone_t ipi_zone_readq;
+	sctp_zone_t ipi_zone_strmoq;
+	sctp_zone_t ipi_zone_asconf;
+	sctp_zone_t ipi_zone_asconf_ack;
+
+	struct rwlock ipi_ep_mtx;
+	struct mtx ipi_iterator_wq_mtx;
+	struct rwlock ipi_addr_mtx;
+	struct mtx ipi_pktlog_mtx;
+	struct mtx wq_addr_mtx;
+	uint32_t ipi_count_ep;
+
+	/* assoc/tcb zone info */
+	uint32_t ipi_count_asoc;
+
+	/* local addrlist zone info */
+	uint32_t ipi_count_laddr;
+
+	/* remote addrlist zone info */
+	uint32_t ipi_count_raddr;
+
+	/* chunk structure list for output */
+	uint32_t ipi_count_chunk;
+
+	/* socket queue zone info */
+	uint32_t ipi_count_readq;
+
+	/* socket queue zone info */
+	uint32_t ipi_count_strmoq;
+
+	/* Number of vrfs */
+	uint32_t ipi_count_vrfs;
+
+	/* Number of ifns */
+	uint32_t ipi_count_ifns;
+
+	/* Number of ifas */
+	uint32_t ipi_count_ifas;
+
+	/* system wide number of free chunks hanging around */
+	uint32_t ipi_free_chunks;
+	uint32_t ipi_free_strmoq;
+
+	struct sctpvtaghead vtag_timewait[SCTP_STACK_VTAG_HASH_SIZE];
+
+	/* address work queue handling */
+	struct sctp_timer addr_wq_timer;
+
+};
+
+
+struct sctp_base_info {
+	/*
+	 * All static structures that anchor the system must be here.
+	 */
+	struct sctp_epinfo sctppcbinfo;
+#if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
+	struct sctpstat sctpstat[MAXCPU];
+#else
+	struct sctpstat sctpstat;
+#endif
+	struct sctp_sysctl sctpsysctl;
+	uint8_t first_time;
+	char sctp_pcb_initialized;
+#if defined(SCTP_PACKET_LOGGING)
+	int packet_log_writers;
+	int packet_log_end;
+	uint8_t packet_log_buffer[SCTP_PACKET_LOG_SIZE];
+#endif
+};
+
+/*-
+ * Here we have all the relevant information for each SCTP entity created. We
+ * will need to modify this as approprate. We also need to figure out how to
+ * access /dev/random.
+ */
+struct sctp_pcb {
+	unsigned int time_of_secret_change;	/* number of seconds from
+						 * timeval.tv_sec */
+	uint32_t secret_key[SCTP_HOW_MANY_SECRETS][SCTP_NUMBER_OF_SECRETS];
+	unsigned int size_of_a_cookie;
+
+	unsigned int sctp_timeoutticks[SCTP_NUM_TMRS];
+	unsigned int sctp_minrto;
+	unsigned int sctp_maxrto;
+	unsigned int initial_rto;
+	int initial_init_rto_max;
+
+	unsigned int sctp_sack_freq;
+	uint32_t sctp_sws_sender;
+	uint32_t sctp_sws_receiver;
+
+	uint32_t sctp_default_cc_module;
+	/* authentication related fields */
+	struct sctp_keyhead shared_keys;
+	sctp_auth_chklist_t *local_auth_chunks;
+	sctp_hmaclist_t *local_hmacs;
+	uint16_t default_keyid;
+
+	/* various thresholds */
+	/* Max times I will init at a guy */
+	uint16_t max_init_times;
+
+	/* Max times I will send before we consider someone dead */
+	uint16_t max_send_times;
+
+	uint16_t def_net_failure;
+
+	/* number of streams to pre-open on a association */
+	uint16_t pre_open_stream_count;
+	uint16_t max_open_streams_intome;
+
+	/* random number generator */
+	uint32_t random_counter;
+	uint8_t random_numbers[SCTP_SIGNATURE_ALOC_SIZE];
+	uint8_t random_store[SCTP_SIGNATURE_ALOC_SIZE];
+
+	/*
+	 * This timer is kept running per endpoint.  When it fires it will
+	 * change the secret key.  The default is once a hour
+	 */
+	struct sctp_timer signature_change;
+
+	/* Zero copy full buffer timer */
+	struct sctp_timer zero_copy_timer;
+	/* Zero copy app to transport (sendq) read repulse timer */
+	struct sctp_timer zero_copy_sendq_timer;
+	uint32_t def_cookie_life;
+	/* defaults to 0 */
+	int auto_close_time;
+	uint32_t initial_sequence_debug;
+	uint32_t adaptation_layer_indicator;
+	uint32_t store_at;
+	uint8_t max_burst;
+	char current_secret_number;
+	char last_secret_number;
+};
+
+#ifndef SCTP_ALIGNMENT
+#define SCTP_ALIGNMENT 32
+#endif
+
+#ifndef SCTP_ALIGNM1
+#define SCTP_ALIGNM1 (SCTP_ALIGNMENT-1)
+#endif
+
+#define sctp_lport ip_inp.inp.inp_lport
+
+struct sctp_pcbtsn_rlog {
+	uint32_t vtag;
+	uint16_t strm;
+	uint16_t seq;
+	uint16_t sz;
+	uint16_t flgs;
+};
+
+#define SCTP_READ_LOG_SIZE 135	/* we choose the number to make a pcb a page */
+
+
+struct sctp_inpcb {
+	/*-
+	 * put an inpcb in front of it all, kind of a waste but we need to
+	 * for compatability with all the other stuff.
+	 */
+	union {
+		struct inpcb inp;
+		char align[(sizeof(struct in6pcb) + SCTP_ALIGNM1) &
+		        ~SCTP_ALIGNM1];
+	}     ip_inp;
+
+
+	/* Socket buffer lock protects read_queue and of course sb_cc */
+	struct sctp_readhead read_queue;
+
+	              LIST_ENTRY(sctp_inpcb) sctp_list;	/* lists all endpoints */
+	/* hash of all endpoints for model */
+	              LIST_ENTRY(sctp_inpcb) sctp_hash;
+	/* count of local addresses bound, 0 if bound all */
+	int laddr_count;
+
+	/* list of addrs in use by the EP, NULL if bound-all */
+	struct sctpladdr sctp_addr_list;
+	/*
+	 * used for source address selection rotation when we are subset
+	 * bound
+	 */
+	struct sctp_laddr *next_addr_touse;
+
+	/* back pointer to our socket */
+	struct socket *sctp_socket;
+	uint32_t sctp_flags;	/* INP state flag set */
+	uint32_t sctp_features;	/* Feature flags */
+	uint32_t sctp_mobility_features;	/* Mobility  Feature flags */
+	struct sctp_pcb sctp_ep;/* SCTP ep data */
+	/* head of the hash of all associations */
+	struct sctpasochead *sctp_tcbhash;
+	u_long sctp_hashmark;
+	/* head of the list of all associations */
+	struct sctpasochead sctp_asoc_list;
+#ifdef SCTP_TRACK_FREED_ASOCS
+	struct sctpasochead sctp_asoc_free_list;
+#endif
+	struct sctp_iterator *inp_starting_point_for_iterator;
+	uint32_t sctp_frag_point;
+	uint32_t partial_delivery_point;
+	uint32_t sctp_context;
+	uint32_t sctp_cmt_on_off;
+	struct sctp_nonpad_sndrcvinfo def_send;
+	/*-
+	 * These three are here for the sosend_dgram
+	 * (pkt, pkt_last and control).
+	 * routine. However, I don't think anyone in
+	 * the current FreeBSD kernel calls this. So
+	 * they are candidates with sctp_sendm for
+	 * de-supporting.
+	 */
+	struct mbuf *pkt, *pkt_last;
+	struct mbuf *control;
+	struct mtx inp_mtx;
+	struct mtx inp_create_mtx;
+	struct mtx inp_rdata_mtx;
+	int32_t refcount;
+	uint32_t def_vrf_id;
+	uint32_t total_sends;
+	uint32_t total_recvs;
+	uint32_t last_abort_code;
+	uint32_t total_nospaces;
+	struct sctpasochead *sctp_asocidhash;
+	u_long hashasocidmark;
+	uint32_t sctp_associd_counter;
+
+#ifdef SCTP_ASOCLOG_OF_TSNS
+	struct sctp_pcbtsn_rlog readlog[SCTP_READ_LOG_SIZE];
+	uint32_t readlog_index;
+#endif
+};
+
+struct sctp_tcb {
+	struct socket *sctp_socket;	/* back pointer to socket */
+	struct sctp_inpcb *sctp_ep;	/* back pointer to ep */
+	           LIST_ENTRY(sctp_tcb) sctp_tcbhash;	/* next link in hash
+							 * table */
+	           LIST_ENTRY(sctp_tcb) sctp_tcblist;	/* list of all of the
+							 * TCB's */
+	           LIST_ENTRY(sctp_tcb) sctp_tcbasocidhash;	/* next link in asocid
+								 * hash table */
+	           LIST_ENTRY(sctp_tcb) sctp_asocs;	/* vtag hash list */
+	struct sctp_block_entry *block_entry;	/* pointer locked by  socket
+						 * send buffer */
+	struct sctp_association asoc;
+	/*-
+	 * freed_by_sorcv_sincelast is protected by the sockbuf_lock NOT the
+	 * tcb_lock. Its special in this way to help avoid extra mutex calls
+	 * in the reading of data.
+	 */
+	uint32_t freed_by_sorcv_sincelast;
+	uint32_t total_sends;
+	uint32_t total_recvs;
+	int freed_from_where;
+	uint16_t rport;		/* remote port in network format */
+	uint16_t resv;
+	struct mtx tcb_mtx;
+	struct mtx tcb_send_mtx;
+};
+
+
+
+#include <freebsd/netinet/sctp_lock_bsd.h>
+
+
+/* TODO where to put non-_KERNEL things for __Userspace__? */
+#if defined(_KERNEL) || defined(__Userspace__)
+
+/* Attention Julian, this is the extern that
+ * goes with the base info. sctp_pcb.c has
+ * the real definition.
+ */
+VNET_DECLARE(struct sctp_base_info, system_base_info);
+
+#ifdef INET6
+int SCTP6_ARE_ADDR_EQUAL(struct sockaddr_in6 *a, struct sockaddr_in6 *b);
+
+#endif
+
+void sctp_fill_pcbinfo(struct sctp_pcbinfo *);
+
+struct sctp_ifn *
+         sctp_find_ifn(void *ifn, uint32_t ifn_index);
+
+struct sctp_vrf *sctp_allocate_vrf(int vrfid);
+struct sctp_vrf *sctp_find_vrf(uint32_t vrfid);
+void sctp_free_vrf(struct sctp_vrf *vrf);
+
+/*-
+ * Change address state, can be used if
+ * O/S supports telling transports about
+ * changes to IFA/IFN's (link layer triggers).
+ * If a ifn goes down, we will do src-addr-selection
+ * and NOT use that, as a source address. This does
+ * not stop the routing system from routing out
+ * that interface, but we won't put it as a source.
+ */
+void sctp_mark_ifa_addr_down(uint32_t vrf_id, struct sockaddr *addr, const char *if_name, uint32_t ifn_index);
+void sctp_mark_ifa_addr_up(uint32_t vrf_id, struct sockaddr *addr, const char *if_name, uint32_t ifn_index);
+
+struct sctp_ifa *
+sctp_add_addr_to_vrf(uint32_t vrfid,
+    void *ifn, uint32_t ifn_index, uint32_t ifn_type,
+    const char *if_name,
+    void *ifa, struct sockaddr *addr, uint32_t ifa_flags,
+    int dynamic_add);
+
+void sctp_update_ifn_mtu(uint32_t ifn_index, uint32_t mtu);
+
+void sctp_free_ifn(struct sctp_ifn *sctp_ifnp);
+void sctp_free_ifa(struct sctp_ifa *sctp_ifap);
+
+
+void 
+sctp_del_addr_from_vrf(uint32_t vrfid, struct sockaddr *addr,
+    uint32_t ifn_index, const char *if_name);
+
+
+
+struct sctp_nets *sctp_findnet(struct sctp_tcb *, struct sockaddr *);
+
+struct sctp_inpcb *sctp_pcb_findep(struct sockaddr *, int, int, uint32_t);
+
+int 
+sctp_inpcb_bind(struct socket *, struct sockaddr *,
+    struct sctp_ifa *, struct thread *);
+
+struct sctp_tcb *
+sctp_findassociation_addr(struct mbuf *, int, int,
+    struct sctphdr *, struct sctp_chunkhdr *, struct sctp_inpcb **,
+    struct sctp_nets **, uint32_t vrf_id);
+
+struct sctp_tcb *
+sctp_findassociation_addr_sa(struct sockaddr *,
+    struct sockaddr *, struct sctp_inpcb **, struct sctp_nets **, int, uint32_t);
+
+void
+sctp_move_pcb_and_assoc(struct sctp_inpcb *, struct sctp_inpcb *,
+    struct sctp_tcb *);
+
+/*-
+ * For this call ep_addr, the to is the destination endpoint address of the
+ * peer (relative to outbound). The from field is only used if the TCP model
+ * is enabled and helps distingush amongst the subset bound (non-boundall).
+ * The TCP model MAY change the actual ep field, this is why it is passed.
+ */
+struct sctp_tcb *
+sctp_findassociation_ep_addr(struct sctp_inpcb **,
+    struct sockaddr *, struct sctp_nets **, struct sockaddr *,
+    struct sctp_tcb *);
+
+struct sctp_tcb *
+         sctp_findasoc_ep_asocid_locked(struct sctp_inpcb *inp, sctp_assoc_t asoc_id, int want_lock);
+
+struct sctp_tcb *
+sctp_findassociation_ep_asocid(struct sctp_inpcb *,
+    sctp_assoc_t, int);
+
+struct sctp_tcb *
+sctp_findassociation_ep_asconf(struct mbuf *, int, int,
+    struct sctphdr *, struct sctp_inpcb **, struct sctp_nets **, uint32_t vrf_id);
+
+int sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id);
+
+int sctp_is_address_on_local_host(struct sockaddr *addr, uint32_t vrf_id);
+
+void sctp_inpcb_free(struct sctp_inpcb *, int, int);
+
+struct sctp_tcb *
+sctp_aloc_assoc(struct sctp_inpcb *, struct sockaddr *,
+    int *, uint32_t, uint32_t, struct thread *);
+
+int sctp_free_assoc(struct sctp_inpcb *, struct sctp_tcb *, int, int);
+
+
+void sctp_delete_from_timewait(uint32_t, uint16_t, uint16_t);
+
+int sctp_is_in_timewait(uint32_t tag, uint16_t lport, uint16_t rport);
+
+void
+     sctp_add_vtag_to_timewait(uint32_t tag, uint32_t time, uint16_t lport, uint16_t rport);
+
+void sctp_add_local_addr_ep(struct sctp_inpcb *, struct sctp_ifa *, uint32_t);
+
+int sctp_insert_laddr(struct sctpladdr *, struct sctp_ifa *, uint32_t);
+
+void sctp_remove_laddr(struct sctp_laddr *);
+
+void sctp_del_local_addr_ep(struct sctp_inpcb *, struct sctp_ifa *);
+
+int sctp_add_remote_addr(struct sctp_tcb *, struct sockaddr *, int, int);
+
+void sctp_remove_net(struct sctp_tcb *, struct sctp_nets *);
+
+int sctp_del_remote_addr(struct sctp_tcb *, struct sockaddr *);
+
+void sctp_pcb_init(void);
+
+void sctp_pcb_finish(void);
+
+void sctp_add_local_addr_restricted(struct sctp_tcb *, struct sctp_ifa *);
+void sctp_del_local_addr_restricted(struct sctp_tcb *, struct sctp_ifa *);
+
+int
+sctp_load_addresses_from_init(struct sctp_tcb *, struct mbuf *, int, int,
+    int, struct sctphdr *, struct sockaddr *);
+
+int
+sctp_set_primary_addr(struct sctp_tcb *, struct sockaddr *,
+    struct sctp_nets *);
+
+int sctp_is_vtag_good(struct sctp_inpcb *, uint32_t, uint16_t lport, uint16_t rport, struct timeval *, int);
+
+/* void sctp_drain(void); */
+
+int sctp_destination_is_reachable(struct sctp_tcb *, struct sockaddr *);
+
+int sctp_swap_inpcb_for_listen(struct sctp_inpcb *inp);
+
+/*-
+ * Null in last arg inpcb indicate run on ALL ep's. Specific inp in last arg
+ * indicates run on ONLY assoc's of the specified endpoint.
+ */
+int
+sctp_initiate_iterator(inp_func inpf,
+    asoc_func af,
+    inp_func inpe,
+    uint32_t, uint32_t,
+    uint32_t, void *,
+    uint32_t,
+    end_func ef,
+    struct sctp_inpcb *,
+    uint8_t co_off);
+
+#ifdef INVARIANTS
+void
+     sctp_validate_no_locks(struct sctp_inpcb *inp);
+
+#endif
+
+#endif				/* _KERNEL */
+#endif				/* !__sctp_pcb_h__ */
diff --git a/freebsd/sys/netinet/sctp_peeloff.c b/freebsd/sys/netinet/sctp_peeloff.c
new file mode 100644
index 00000000..7b859bba
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_peeloff.c
@@ -0,0 +1,240 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+/* $KAME: sctp_peeloff.c,v 1.13 2005/03/06 16:04:18 itojun Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include <freebsd/netinet/sctp_os.h>
+#include <freebsd/netinet/sctp_pcb.h>
+#include <freebsd/netinet/sctputil.h>
+#include <freebsd/netinet/sctp_var.h>
+#include <freebsd/netinet/sctp_var.h>
+#include <freebsd/netinet/sctp_sysctl.h>
+#include <freebsd/netinet/sctp.h>
+#include <freebsd/netinet/sctp_uio.h>
+#include <freebsd/netinet/sctp_peeloff.h>
+#include <freebsd/netinet/sctputil.h>
+#include <freebsd/netinet/sctp_auth.h>
+
+
+int
+sctp_can_peel_off(struct socket *head, sctp_assoc_t assoc_id)
+{
+	struct sctp_inpcb *inp;
+	struct sctp_tcb *stcb;
+	uint32_t state;
+
+	inp = (struct sctp_inpcb *)head->so_pcb;
+	if (inp == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, EFAULT);
+		return (EFAULT);
+	}
+	stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1);
+	if (stcb == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_PEELOFF, ENOENT);
+		return (ENOENT);
+	}
+	state = SCTP_GET_STATE((&stcb->asoc));
+	if ((state == SCTP_STATE_EMPTY) ||
+	    (state == SCTP_STATE_INUSE) ||
+	    (state == SCTP_STATE_COOKIE_WAIT) ||
+	    (state == SCTP_STATE_COOKIE_ECHOED)) {
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
+		return (ENOTCONN);
+	}
+	SCTP_TCB_UNLOCK(stcb);
+	/* We are clear to peel this one off */
+	return (0);
+}
+
+int
+sctp_do_peeloff(struct socket *head, struct socket *so, sctp_assoc_t assoc_id)
+{
+	struct sctp_inpcb *inp, *n_inp;
+	struct sctp_tcb *stcb;
+	uint32_t state;
+
+	inp = (struct sctp_inpcb *)head->so_pcb;
+	if (inp == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, EFAULT);
+		return (EFAULT);
+	}
+	stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1);
+	if (stcb == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
+		return (ENOTCONN);
+	}
+	state = SCTP_GET_STATE((&stcb->asoc));
+	if ((state == SCTP_STATE_EMPTY) ||
+	    (state == SCTP_STATE_INUSE) ||
+	    (state == SCTP_STATE_COOKIE_WAIT) ||
+	    (state == SCTP_STATE_COOKIE_ECHOED)) {
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
+		return (ENOTCONN);
+	}
+	n_inp = (struct sctp_inpcb *)so->so_pcb;
+	n_inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE |
+	    SCTP_PCB_FLAGS_CONNECTED |
+	    SCTP_PCB_FLAGS_IN_TCPPOOL |	/* Turn on Blocking IO */
+	    (SCTP_PCB_COPY_FLAGS & inp->sctp_flags));
+	n_inp->sctp_socket = so;
+	n_inp->sctp_features = inp->sctp_features;
+	n_inp->sctp_mobility_features = inp->sctp_mobility_features;
+	n_inp->sctp_frag_point = inp->sctp_frag_point;
+	n_inp->sctp_cmt_on_off = inp->sctp_cmt_on_off;
+	n_inp->partial_delivery_point = inp->partial_delivery_point;
+	n_inp->sctp_context = inp->sctp_context;
+	n_inp->inp_starting_point_for_iterator = NULL;
+	/* copy in the authentication parameters from the original endpoint */
+	if (n_inp->sctp_ep.local_hmacs)
+		sctp_free_hmaclist(n_inp->sctp_ep.local_hmacs);
+	n_inp->sctp_ep.local_hmacs =
+	    sctp_copy_hmaclist(inp->sctp_ep.local_hmacs);
+	if (n_inp->sctp_ep.local_auth_chunks)
+		sctp_free_chunklist(n_inp->sctp_ep.local_auth_chunks);
+	n_inp->sctp_ep.local_auth_chunks =
+	    sctp_copy_chunklist(inp->sctp_ep.local_auth_chunks);
+	(void)sctp_copy_skeylist(&inp->sctp_ep.shared_keys,
+	    &n_inp->sctp_ep.shared_keys);
+	/*
+	 * Now we must move it from one hash table to another and get the
+	 * stcb in the right place.
+	 */
+	sctp_move_pcb_and_assoc(inp, n_inp, stcb);
+	atomic_add_int(&stcb->asoc.refcnt, 1);
+	SCTP_TCB_UNLOCK(stcb);
+
+	sctp_pull_off_control_to_new_inp(inp, n_inp, stcb, SBL_WAIT);
+	atomic_subtract_int(&stcb->asoc.refcnt, 1);
+
+	return (0);
+}
+
+
+struct socket *
+sctp_get_peeloff(struct socket *head, sctp_assoc_t assoc_id, int *error)
+{
+	struct socket *newso;
+	struct sctp_inpcb *inp, *n_inp;
+	struct sctp_tcb *stcb;
+
+	SCTPDBG(SCTP_DEBUG_PEEL1, "SCTP peel-off called\n");
+	inp = (struct sctp_inpcb *)head->so_pcb;
+	if (inp == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, EFAULT);
+		*error = EFAULT;
+		return (NULL);
+	}
+	stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1);
+	if (stcb == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
+		*error = ENOTCONN;
+		return (NULL);
+	}
+	atomic_add_int(&stcb->asoc.refcnt, 1);
+	SCTP_TCB_UNLOCK(stcb);
+	newso = sonewconn(head, SS_ISCONNECTED
+	    );
+	if (newso == NULL) {
+		SCTPDBG(SCTP_DEBUG_PEEL1, "sctp_peeloff:sonewconn failed\n");
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_PEELOFF, ENOMEM);
+		*error = ENOMEM;
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		return (NULL);
+
+	}
+	SCTP_TCB_LOCK(stcb);
+	atomic_subtract_int(&stcb->asoc.refcnt, 1);
+	n_inp = (struct sctp_inpcb *)newso->so_pcb;
+	SOCK_LOCK(head);
+	n_inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE |
+	    SCTP_PCB_FLAGS_CONNECTED |
+	    SCTP_PCB_FLAGS_IN_TCPPOOL |	/* Turn on Blocking IO */
+	    (SCTP_PCB_COPY_FLAGS & inp->sctp_flags));
+	n_inp->sctp_features = inp->sctp_features;
+	n_inp->sctp_frag_point = inp->sctp_frag_point;
+	n_inp->sctp_cmt_on_off = inp->sctp_cmt_on_off;
+	n_inp->partial_delivery_point = inp->partial_delivery_point;
+	n_inp->sctp_context = inp->sctp_context;
+	n_inp->inp_starting_point_for_iterator = NULL;
+
+	/* copy in the authentication parameters from the original endpoint */
+	if (n_inp->sctp_ep.local_hmacs)
+		sctp_free_hmaclist(n_inp->sctp_ep.local_hmacs);
+	n_inp->sctp_ep.local_hmacs =
+	    sctp_copy_hmaclist(inp->sctp_ep.local_hmacs);
+	if (n_inp->sctp_ep.local_auth_chunks)
+		sctp_free_chunklist(n_inp->sctp_ep.local_auth_chunks);
+	n_inp->sctp_ep.local_auth_chunks =
+	    sctp_copy_chunklist(inp->sctp_ep.local_auth_chunks);
+	(void)sctp_copy_skeylist(&inp->sctp_ep.shared_keys,
+	    &n_inp->sctp_ep.shared_keys);
+
+	n_inp->sctp_socket = newso;
+	if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) {
+		sctp_feature_off(n_inp, SCTP_PCB_FLAGS_AUTOCLOSE);
+		n_inp->sctp_ep.auto_close_time = 0;
+		sctp_timer_stop(SCTP_TIMER_TYPE_AUTOCLOSE, n_inp, stcb, NULL,
+		    SCTP_FROM_SCTP_PEELOFF + SCTP_LOC_1);
+	}
+	/* Turn off any non-blocking semantic. */
+	SCTP_CLEAR_SO_NBIO(newso);
+	newso->so_state |= SS_ISCONNECTED;
+	/* We remove it right away */
+
+#ifdef SCTP_LOCK_LOGGING
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOCK_LOGGING_ENABLE) {
+		sctp_log_lock(inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_SOCK);
+	}
+#endif
+	TAILQ_REMOVE(&head->so_comp, newso, so_list);
+	head->so_qlen--;
+	SOCK_UNLOCK(head);
+	/*
+	 * Now we must move it from one hash table to another and get the
+	 * stcb in the right place.
+	 */
+	sctp_move_pcb_and_assoc(inp, n_inp, stcb);
+	atomic_add_int(&stcb->asoc.refcnt, 1);
+	SCTP_TCB_UNLOCK(stcb);
+	/*
+	 * And now the final hack. We move data in the pending side i.e.
+	 * head to the new socket buffer. Let the GRUBBING begin :-0
+	 */
+	sctp_pull_off_control_to_new_inp(inp, n_inp, stcb, SBL_WAIT);
+	atomic_subtract_int(&stcb->asoc.refcnt, 1);
+	return (newso);
+}
diff --git a/freebsd/sys/netinet/sctp_peeloff.h b/freebsd/sys/netinet/sctp_peeloff.h
new file mode 100644
index 00000000..57fd5fef
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_peeloff.h
@@ -0,0 +1,52 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_peeloff.h,v 1.6 2005/03/06 16:04:18 itojun Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifndef __sctp_peeloff_h__
+#define __sctp_peeloff_h__
+
+
+
+
+#if defined(_KERNEL)
+
+int sctp_can_peel_off(struct socket *, sctp_assoc_t);
+int sctp_do_peeloff(struct socket *, struct socket *, sctp_assoc_t);
+struct socket *sctp_get_peeloff(struct socket *, sctp_assoc_t, int *);
+
+
+
+#endif				/* _KERNEL */
+
+#endif
diff --git a/freebsd/sys/netinet/sctp_structs.h b/freebsd/sys/netinet/sctp_structs.h
new file mode 100644
index 00000000..2050c581
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_structs.h
@@ -0,0 +1,1094 @@
+/*-
+ * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_structs.h,v 1.13 2005/03/06 16:04:18 itojun Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifndef __sctp_structs_h__
+#define __sctp_structs_h__
+
+#include <freebsd/netinet/sctp_os.h>
+#include <freebsd/netinet/sctp_header.h>
+#include <freebsd/netinet/sctp_auth.h>
+
+struct sctp_timer {
+	sctp_os_timer_t timer;
+
+	int type;
+	/*
+	 * Depending on the timer type these will be setup and cast with the
+	 * appropriate entity.
+	 */
+	void *ep;
+	void *tcb;
+	void *net;
+	void *vnet;
+
+	/* for sanity checking */
+	void *self;
+	uint32_t ticks;
+	uint32_t stopped_from;
+};
+
+
+struct sctp_foo_stuff {
+	struct sctp_inpcb *inp;
+	uint32_t lineno;
+	uint32_t ticks;
+	int updown;
+};
+
+
+/*
+ * This is the information we track on each interface that we know about from
+ * the distant end.
+ */
+TAILQ_HEAD(sctpnetlisthead, sctp_nets);
+
+struct sctp_stream_reset_list {
+	TAILQ_ENTRY(sctp_stream_reset_list) next_resp;
+	uint32_t tsn;
+	int number_entries;
+	struct sctp_stream_reset_out_request req;
+};
+
+TAILQ_HEAD(sctp_resethead, sctp_stream_reset_list);
+
+/*
+ * Users of the iterator need to malloc a iterator with a call to
+ * sctp_initiate_iterator(inp_func, assoc_func, inp_func,  pcb_flags, pcb_features,
+ *     asoc_state, void-ptr-arg, uint32-arg, end_func, inp);
+ *
+ * Use the following two defines if you don't care what pcb flags are on the EP
+ * and/or you don't care what state the association is in.
+ *
+ * Note that if you specify an INP as the last argument then ONLY each
+ * association of that single INP will be executed upon. Note that the pcb
+ * flags STILL apply so if the inp you specify has different pcb_flags then
+ * what you put in pcb_flags nothing will happen. use SCTP_PCB_ANY_FLAGS to
+ * assure the inp you specify gets treated.
+ */
+#define SCTP_PCB_ANY_FLAGS	0x00000000
+#define SCTP_PCB_ANY_FEATURES	0x00000000
+#define SCTP_ASOC_ANY_STATE	0x00000000
+
+typedef void (*asoc_func) (struct sctp_inpcb *, struct sctp_tcb *, void *ptr,
+         uint32_t val);
+typedef int (*inp_func) (struct sctp_inpcb *, void *ptr, uint32_t val);
+typedef void (*end_func) (void *ptr, uint32_t val);
+
+struct sctp_iterator {
+	TAILQ_ENTRY(sctp_iterator) sctp_nxt_itr;
+	struct vnet *vn;
+	struct sctp_timer tmr;
+	struct sctp_inpcb *inp;	/* current endpoint */
+	struct sctp_tcb *stcb;	/* current* assoc */
+	struct sctp_inpcb *next_inp;	/* special hook to skip to */
+	asoc_func function_assoc;	/* per assoc function */
+	inp_func function_inp;	/* per endpoint function */
+	inp_func function_inp_end;	/* end INP function */
+	end_func function_atend;/* iterator completion function */
+	void *pointer;		/* pointer for apply func to use */
+	uint32_t val;		/* value for apply func to use */
+	uint32_t pcb_flags;	/* endpoint flags being checked */
+	uint32_t pcb_features;	/* endpoint features being checked */
+	uint32_t asoc_state;	/* assoc state being checked */
+	uint32_t iterator_flags;
+	uint8_t no_chunk_output;
+	uint8_t done_current_ep;
+};
+
+/* iterator_flags values */
+#define SCTP_ITERATOR_DO_ALL_INP	0x00000001
+#define SCTP_ITERATOR_DO_SINGLE_INP	0x00000002
+
+
+TAILQ_HEAD(sctpiterators, sctp_iterator);
+
+struct sctp_copy_all {
+	struct sctp_inpcb *inp;	/* ep */
+	struct mbuf *m;
+	struct sctp_sndrcvinfo sndrcv;
+	int sndlen;
+	int cnt_sent;
+	int cnt_failed;
+};
+
+struct sctp_asconf_iterator {
+	struct sctpladdr list_of_work;
+	int cnt;
+};
+
+struct iterator_control {
+	struct mtx ipi_iterator_wq_mtx;
+	struct mtx it_mtx;
+	SCTP_PROCESS_STRUCT thread_proc;
+	struct sctpiterators iteratorhead;
+	struct sctp_iterator *cur_it;
+	uint32_t iterator_running;
+	uint32_t iterator_flags;
+};
+
+#define SCTP_ITERATOR_MUST_EXIT   	0x00000001
+#define SCTP_ITERATOR_STOP_CUR_IT  	0x00000002
+#define SCTP_ITERATOR_STOP_CUR_INP  	0x00000004
+
+struct sctp_net_route {
+	sctp_rtentry_t *ro_rt;
+	void *ro_lle;
+	union sctp_sockstore _l_addr;	/* remote peer addr */
+	struct sctp_ifa *_s_addr;	/* our selected src addr */
+};
+
+struct htcp {
+	uint16_t alpha;		/* Fixed point arith, << 7 */
+	uint8_t beta;		/* Fixed point arith, << 7 */
+	uint8_t modeswitch;	/* Delay modeswitch until we had at least one
+				 * congestion event */
+	uint32_t last_cong;	/* Time since last congestion event end */
+	uint32_t undo_last_cong;
+	uint16_t bytes_acked;
+	uint32_t bytecount;
+	uint32_t minRTT;
+	uint32_t maxRTT;
+
+	uint32_t undo_maxRTT;
+	uint32_t undo_old_maxB;
+
+	/* Bandwidth estimation */
+	uint32_t minB;
+	uint32_t maxB;
+	uint32_t old_maxB;
+	uint32_t Bi;
+	uint32_t lasttime;
+};
+
+
+struct sctp_nets {
+	TAILQ_ENTRY(sctp_nets) sctp_next;	/* next link */
+
+	/*
+	 * Things on the top half may be able to be split into a common
+	 * structure shared by all.
+	 */
+	struct sctp_timer pmtu_timer;
+
+	/*
+	 * The following two in combination equate to a route entry for v6
+	 * or v4.
+	 */
+	struct sctp_net_route ro;
+
+	/* mtu discovered so far */
+	uint32_t mtu;
+	uint32_t ssthresh;	/* not sure about this one for split */
+
+	/* smoothed average things for RTT and RTO itself */
+	int lastsa;
+	int lastsv;
+	int rtt;		/* last measured rtt value in ms */
+	unsigned int RTO;
+
+	/* This is used for SHUTDOWN/SHUTDOWN-ACK/SEND or INIT timers */
+	struct sctp_timer rxt_timer;
+	struct sctp_timer fr_timer;	/* for early fr */
+
+	/* last time in seconds I sent to it */
+	struct timeval last_sent_time;
+	int ref_count;
+
+	/* Congestion stats per destination */
+	/*
+	 * flight size variables and such, sorry Vern, I could not avoid
+	 * this if I wanted performance :>
+	 */
+	uint32_t flight_size;
+	uint32_t cwnd;		/* actual cwnd */
+	uint32_t prev_cwnd;	/* cwnd before any processing */
+	uint32_t partial_bytes_acked;	/* in CA tracks when to incr a MTU */
+	uint32_t prev_rtt;
+	/* tracking variables to avoid the aloc/free in sack processing */
+	unsigned int net_ack;
+	unsigned int net_ack2;
+
+	/*
+	 * JRS - 5/8/07 - Variable to track last time a destination was
+	 * active for CMT PF
+	 */
+	uint32_t last_active;
+
+	/*
+	 * CMT variables (iyengar@cis.udel.edu)
+	 */
+	uint32_t this_sack_highest_newack;	/* tracks highest TSN newly
+						 * acked for a given dest in
+						 * the current SACK. Used in
+						 * SFR and HTNA algos */
+	uint32_t pseudo_cumack;	/* CMT CUC algorithm. Maintains next expected
+				 * pseudo-cumack for this destination */
+	uint32_t rtx_pseudo_cumack;	/* CMT CUC algorithm. Maintains next
+					 * expected pseudo-cumack for this
+					 * destination */
+
+	/* CMT fast recovery variables */
+	uint32_t fast_recovery_tsn;
+	uint32_t heartbeat_random1;
+	uint32_t heartbeat_random2;
+	uint32_t tos_flowlabel;
+
+	struct timeval start_time;	/* time when this net was created */
+
+	uint32_t marked_retrans;/* number or DATA chunks marked for timer
+				 * based retransmissions */
+	uint32_t marked_fastretrans;
+
+	/* if this guy is ok or not ... status */
+	uint16_t dest_state;
+	/* number of transmit failures to down this guy */
+	uint16_t failure_threshold;
+	/* error stats on destination */
+	uint16_t error_count;
+	/* UDP port number in case of UDP tunneling */
+	uint16_t port;
+
+	uint8_t fast_retran_loss_recovery;
+	uint8_t will_exit_fast_recovery;
+	/* Flags that probably can be combined into dest_state */
+	uint8_t fast_retran_ip;	/* fast retransmit in progress */
+	uint8_t hb_responded;
+	uint8_t saw_newack;	/* CMT's SFR algorithm flag */
+	uint8_t src_addr_selected;	/* if we split we move */
+	uint8_t indx_of_eligible_next_to_use;
+	uint8_t addr_is_local;	/* its a local address (if known) could move
+				 * in split */
+
+	/*
+	 * CMT variables (iyengar@cis.udel.edu)
+	 */
+	uint8_t find_pseudo_cumack;	/* CMT CUC algorithm. Flag used to
+					 * find a new pseudocumack. This flag
+					 * is set after a new pseudo-cumack
+					 * has been received and indicates
+					 * that the sender should find the
+					 * next pseudo-cumack expected for
+					 * this destination */
+	uint8_t find_rtx_pseudo_cumack;	/* CMT CUCv2 algorithm. Flag used to
+					 * find a new rtx-pseudocumack. This
+					 * flag is set after a new
+					 * rtx-pseudo-cumack has been received
+					 * and indicates that the sender
+					 * should find the next
+					 * rtx-pseudo-cumack expected for this
+					 * destination */
+	uint8_t new_pseudo_cumack;	/* CMT CUC algorithm. Flag used to
+					 * indicate if a new pseudo-cumack or
+					 * rtx-pseudo-cumack has been received */
+	uint8_t window_probe;	/* Doing a window probe? */
+	uint8_t RTO_measured;	/* Have we done the first measure */
+	uint8_t last_hs_used;	/* index into the last HS table entry we used */
+	/* JRS - struct used in HTCP algorithm */
+	struct htcp htcp_ca;
+};
+
+
+struct sctp_data_chunkrec {
+	uint32_t TSN_seq;	/* the TSN of this transmit */
+	uint16_t stream_seq;	/* the stream sequence number of this transmit */
+	uint16_t stream_number;	/* the stream number of this guy */
+	uint32_t payloadtype;
+	uint32_t context;	/* from send */
+
+	/* ECN Nonce: Nonce Value for this chunk */
+	uint8_t ect_nonce;
+	uint8_t fwd_tsn_cnt;
+	/*
+	 * part of the Highest sacked algorithm to be able to stroke counts
+	 * on ones that are FR'd.
+	 */
+	uint32_t fast_retran_tsn;	/* sending_seq at the time of FR */
+	struct timeval timetodrop;	/* time we drop it from queue */
+	uint8_t doing_fast_retransmit;
+	uint8_t rcv_flags;	/* flags pulled from data chunk on inbound for
+				 * outbound holds sending flags for PR-SCTP. */
+	uint8_t state_flags;
+	uint8_t chunk_was_revoked;
+};
+
+TAILQ_HEAD(sctpchunk_listhead, sctp_tmit_chunk);
+
+/* The lower byte is used to enumerate PR_SCTP policies */
+#define CHUNK_FLAGS_PR_SCTP_TTL	        SCTP_PR_SCTP_TTL
+#define CHUNK_FLAGS_PR_SCTP_BUF	        SCTP_PR_SCTP_BUF
+#define CHUNK_FLAGS_PR_SCTP_RTX         SCTP_PR_SCTP_RTX
+
+/* The upper byte is used a a bit mask */
+#define CHUNK_FLAGS_FRAGMENT_OK	        0x0100
+
+struct chk_id {
+	uint16_t id;
+	uint16_t can_take_data;
+};
+
+
+struct sctp_tmit_chunk {
+	union {
+		struct sctp_data_chunkrec data;
+		struct chk_id chunk_id;
+	}     rec;
+	struct sctp_association *asoc;	/* bp to asoc this belongs to */
+	struct timeval sent_rcv_time;	/* filled in if RTT being calculated */
+	struct mbuf *data;	/* pointer to mbuf chain of data */
+	struct mbuf *last_mbuf;	/* pointer to last mbuf in chain */
+	struct sctp_nets *whoTo;
+	          TAILQ_ENTRY(sctp_tmit_chunk) sctp_next;	/* next link */
+	int32_t sent;		/* the send status */
+	uint16_t snd_count;	/* number of times I sent */
+	uint16_t flags;		/* flags, such as FRAGMENT_OK */
+	uint16_t send_size;
+	uint16_t book_size;
+	uint16_t mbcnt;
+	uint16_t auth_keyid;
+	uint8_t holds_key_ref;	/* flag if auth keyid refcount is held */
+	uint8_t pad_inplace;
+	uint8_t do_rtt;
+	uint8_t book_size_scale;
+	uint8_t no_fr_allowed;
+	uint8_t pr_sctp_on;
+	uint8_t copy_by_ref;
+	uint8_t window_probe;
+};
+
+/*
+ * The first part of this structure MUST be the entire sinfo structure. Maybe
+ * I should have made it a sub structure... we can circle back later and do
+ * that if we want.
+ */
+struct sctp_queued_to_read {	/* sinfo structure Pluse more */
+	uint16_t sinfo_stream;	/* off the wire */
+	uint16_t sinfo_ssn;	/* off the wire */
+	uint16_t sinfo_flags;	/* SCTP_UNORDERED from wire use SCTP_EOF for
+				 * EOR */
+	uint32_t sinfo_ppid;	/* off the wire */
+	uint32_t sinfo_context;	/* pick this up from assoc def context? */
+	uint32_t sinfo_timetolive;	/* not used by kernel */
+	uint32_t sinfo_tsn;	/* Use this in reassembly as first TSN */
+	uint32_t sinfo_cumtsn;	/* Use this in reassembly as last TSN */
+	sctp_assoc_t sinfo_assoc_id;	/* our assoc id */
+	/* Non sinfo stuff */
+	uint32_t length;	/* length of data */
+	uint32_t held_length;	/* length held in sb */
+	struct sctp_nets *whoFrom;	/* where it came from */
+	struct mbuf *data;	/* front of the mbuf chain of data with
+				 * PKT_HDR */
+	struct mbuf *tail_mbuf;	/* used for multi-part data */
+	struct mbuf *aux_data;	/* used to hold/cache  control if o/s does not
+				 * take it from us */
+	struct sctp_tcb *stcb;	/* assoc, used for window update */
+	         TAILQ_ENTRY(sctp_queued_to_read) next;
+	uint16_t port_from;
+	uint16_t spec_flags;	/* Flags to hold the notification field */
+	uint8_t do_not_ref_stcb;
+	uint8_t end_added;
+	uint8_t pdapi_aborted;
+	uint8_t some_taken;
+};
+
+/* This data structure will be on the outbound
+ * stream queues. Data will be pulled off from
+ * the front of the mbuf data and chunk-ified
+ * by the output routines. We will custom
+ * fit every chunk we pull to the send/sent
+ * queue to make up the next full packet
+ * if we can. An entry cannot be removed
+ * from the stream_out queue until
+ * the msg_is_complete flag is set. This
+ * means at times data/tail_mbuf MIGHT
+ * be NULL.. If that occurs it happens
+ * for one of two reasons. Either the user
+ * is blocked on a send() call and has not
+ * awoken to copy more data down... OR
+ * the user is in the explict MSG_EOR mode
+ * and wrote some data, but has not completed
+ * sending.
+ */
+struct sctp_stream_queue_pending {
+	struct mbuf *data;
+	struct mbuf *tail_mbuf;
+	struct timeval ts;
+	struct sctp_nets *net;
+	          TAILQ_ENTRY(sctp_stream_queue_pending) next;
+	uint32_t length;
+	uint32_t timetolive;
+	uint32_t ppid;
+	uint32_t context;
+	uint16_t sinfo_flags;
+	uint16_t stream;
+	uint16_t strseq;
+	uint16_t act_flags;
+	uint16_t auth_keyid;
+	uint8_t holds_key_ref;
+	uint8_t msg_is_complete;
+	uint8_t some_taken;
+	uint8_t pr_sctp_on;
+	uint8_t sender_all_done;
+	uint8_t put_last_out;
+	uint8_t discard_rest;
+};
+
+/*
+ * this struct contains info that is used to track inbound stream data and
+ * help with ordering.
+ */
+TAILQ_HEAD(sctpwheelunrel_listhead, sctp_stream_in);
+struct sctp_stream_in {
+	struct sctp_readhead inqueue;
+	uint16_t stream_no;
+	uint16_t last_sequence_delivered;	/* used for re-order */
+	uint8_t delivery_started;
+};
+
+/* This struct is used to track the traffic on outbound streams */
+TAILQ_HEAD(sctpwheel_listhead, sctp_stream_out);
+struct sctp_stream_out {
+	struct sctp_streamhead outqueue;
+	                TAILQ_ENTRY(sctp_stream_out) next_spoke;	/* next link in wheel */
+	uint16_t stream_no;
+	uint16_t next_sequence_sent;	/* next one I expect to send out */
+	uint8_t last_msg_incomplete;
+};
+
+/* used to keep track of the addresses yet to try to add/delete */
+TAILQ_HEAD(sctp_asconf_addrhead, sctp_asconf_addr);
+struct sctp_asconf_addr {
+	TAILQ_ENTRY(sctp_asconf_addr) next;
+	struct sctp_asconf_addr_param ap;
+	struct sctp_ifa *ifa;	/* save the ifa for add/del ip */
+	uint8_t sent;		/* has this been sent yet? */
+	uint8_t special_del;	/* not to be used in lookup */
+};
+
+struct sctp_scoping {
+	uint8_t ipv4_addr_legal;
+	uint8_t ipv6_addr_legal;
+	uint8_t loopback_scope;
+	uint8_t ipv4_local_scope;
+	uint8_t local_scope;
+	uint8_t site_scope;
+};
+
+#define SCTP_TSN_LOG_SIZE 40
+
+struct sctp_tsn_log {
+	void *stcb;
+	uint32_t tsn;
+	uint16_t strm;
+	uint16_t seq;
+	uint16_t sz;
+	uint16_t flgs;
+	uint16_t in_pos;
+	uint16_t in_out;
+};
+
+#define SCTP_FS_SPEC_LOG_SIZE 200
+struct sctp_fs_spec_log {
+	uint32_t sent;
+	uint32_t total_flight;
+	uint32_t tsn;
+	uint16_t book;
+	uint8_t incr;
+	uint8_t decr;
+};
+
+/* This struct is here to cut out the compatiabilty
+ * pad that bulks up both the inp and stcb. The non
+ * pad portion MUST stay in complete sync with
+ * sctp_sndrcvinfo... i.e. if sinfo_xxxx is added
+ * this must be done here too.
+ */
+struct sctp_nonpad_sndrcvinfo {
+	uint16_t sinfo_stream;
+	uint16_t sinfo_ssn;
+	uint16_t sinfo_flags;
+	uint32_t sinfo_ppid;
+	uint32_t sinfo_context;
+	uint32_t sinfo_timetolive;
+	uint32_t sinfo_tsn;
+	uint32_t sinfo_cumtsn;
+	sctp_assoc_t sinfo_assoc_id;
+};
+
+/*
+ * JRS - Structure to hold function pointers to the functions responsible
+ * for congestion control.
+ */
+
+struct sctp_cc_functions {
+	void (*sctp_set_initial_cc_param) (struct sctp_tcb *stcb, struct sctp_nets *net);
+	void (*sctp_cwnd_update_after_sack) (struct sctp_tcb *stcb,
+	         struct sctp_association *asoc,
+	         int accum_moved, int reneged_all, int will_exit);
+	void (*sctp_cwnd_update_after_fr) (struct sctp_tcb *stcb,
+	         struct sctp_association *asoc);
+	void (*sctp_cwnd_update_after_timeout) (struct sctp_tcb *stcb,
+	         struct sctp_nets *net);
+	void (*sctp_cwnd_update_after_ecn_echo) (struct sctp_tcb *stcb,
+	         struct sctp_nets *net);
+	void (*sctp_cwnd_update_after_packet_dropped) (struct sctp_tcb *stcb,
+	         struct sctp_nets *net, struct sctp_pktdrop_chunk *cp,
+	         uint32_t * bottle_bw, uint32_t * on_queue);
+	void (*sctp_cwnd_update_after_output) (struct sctp_tcb *stcb,
+	         struct sctp_nets *net, int burst_limit);
+	void (*sctp_cwnd_update_after_fr_timer) (struct sctp_inpcb *inp,
+	         struct sctp_tcb *stcb, struct sctp_nets *net);
+};
+
+/* used to save ASCONF chunks for retransmission */
+TAILQ_HEAD(sctp_asconf_head, sctp_asconf);
+struct sctp_asconf {
+	TAILQ_ENTRY(sctp_asconf) next;
+	uint32_t serial_number;
+	uint16_t snd_count;
+	struct mbuf *data;
+	uint16_t len;
+};
+
+/* used to save ASCONF-ACK chunks for retransmission */
+TAILQ_HEAD(sctp_asconf_ackhead, sctp_asconf_ack);
+struct sctp_asconf_ack {
+	TAILQ_ENTRY(sctp_asconf_ack) next;
+	uint32_t serial_number;
+	struct sctp_nets *last_sent_to;
+	struct mbuf *data;
+	uint16_t len;
+};
+
+/*
+ * Here we have information about each individual association that we track.
+ * We probably in production would be more dynamic. But for ease of
+ * implementation we will have a fixed array that we hunt for in a linear
+ * fashion.
+ */
+struct sctp_association {
+	/* association state */
+	int state;
+
+	/* queue of pending addrs to add/delete */
+	struct sctp_asconf_addrhead asconf_queue;
+
+	struct timeval time_entered;	/* time we entered state */
+	struct timeval time_last_rcvd;
+	struct timeval time_last_sent;
+	struct timeval time_last_sat_advance;
+	struct sctp_nonpad_sndrcvinfo def_send;
+
+	/* timers and such */
+	struct sctp_timer hb_timer;	/* hb timer */
+	struct sctp_timer dack_timer;	/* Delayed ack timer */
+	struct sctp_timer asconf_timer;	/* asconf */
+	struct sctp_timer strreset_timer;	/* stream reset */
+	struct sctp_timer shut_guard_timer;	/* shutdown guard */
+	struct sctp_timer autoclose_timer;	/* automatic close timer */
+	struct sctp_timer delayed_event_timer;	/* timer for delayed events */
+	struct sctp_timer delete_prim_timer;	/* deleting primary dst */
+
+	/* list of restricted local addresses */
+	struct sctpladdr sctp_restricted_addrs;
+
+	/* last local address pending deletion (waiting for an address add) */
+	struct sctp_ifa *asconf_addr_del_pending;
+	/* Deleted primary destination (used to stop timer) */
+	struct sctp_nets *deleted_primary;
+
+	struct sctpnetlisthead nets;	/* remote address list */
+
+	/* Free chunk list */
+	struct sctpchunk_listhead free_chunks;
+
+	/* Control chunk queue */
+	struct sctpchunk_listhead control_send_queue;
+
+	/* ASCONF chunk queue */
+	struct sctpchunk_listhead asconf_send_queue;
+
+	/*
+	 * Once a TSN hits the wire it is moved to the sent_queue. We
+	 * maintain two counts here (don't know if any but retran_cnt is
+	 * needed). The idea is that the sent_queue_retran_cnt reflects how
+	 * many chunks have been marked for retranmission by either T3-rxt
+	 * or FR.
+	 */
+	struct sctpchunk_listhead sent_queue;
+	struct sctpchunk_listhead send_queue;
+
+	/* re-assembly queue for fragmented chunks on the inbound path */
+	struct sctpchunk_listhead reasmqueue;
+
+	/*
+	 * this queue is used when we reach a condition that we can NOT put
+	 * data into the socket buffer. We track the size of this queue and
+	 * set our rwnd to the space in the socket minus also the
+	 * size_on_delivery_queue.
+	 */
+	struct sctpwheel_listhead out_wheel;
+
+	/*
+	 * This pointer will be set to NULL most of the time. But when we
+	 * have a fragmented message, where we could not get out all of the
+	 * message at the last send then this will point to the stream to go
+	 * get data from.
+	 */
+	struct sctp_stream_out *locked_on_sending;
+
+	/* If an iterator is looking at me, this is it */
+	struct sctp_iterator *stcb_starting_point_for_iterator;
+
+	/* ASCONF save the last ASCONF-ACK so we can resend it if necessary */
+	struct sctp_asconf_ackhead asconf_ack_sent;
+
+	/*
+	 * pointer to last stream reset queued to control queue by us with
+	 * requests.
+	 */
+	struct sctp_tmit_chunk *str_reset;
+	/*
+	 * if Source Address Selection happening, this will rotate through
+	 * the link list.
+	 */
+	struct sctp_laddr *last_used_address;
+
+	/* stream arrays */
+	struct sctp_stream_in *strmin;
+	struct sctp_stream_out *strmout;
+	uint8_t *mapping_array;
+	/* primary destination to use */
+	struct sctp_nets *primary_destination;
+	/* For CMT */
+	struct sctp_nets *last_net_cmt_send_started;
+	/* last place I got a data chunk from */
+	struct sctp_nets *last_data_chunk_from;
+	/* last place I got a control from */
+	struct sctp_nets *last_control_chunk_from;
+
+	/* circular looking for output selection */
+	struct sctp_stream_out *last_out_stream;
+
+	/*
+	 * wait to the point the cum-ack passes req->send_reset_at_tsn for
+	 * any req on the list.
+	 */
+	struct sctp_resethead resetHead;
+
+	/* queue of chunks waiting to be sent into the local stack */
+	struct sctp_readhead pending_reply_queue;
+
+	/* JRS - the congestion control functions are in this struct */
+	struct sctp_cc_functions cc_functions;
+	/*
+	 * JRS - value to store the currently loaded congestion control
+	 * module
+	 */
+	uint32_t congestion_control_module;
+
+	uint32_t vrf_id;
+
+	uint32_t cookie_preserve_req;
+	/* ASCONF next seq I am sending out, inits at init-tsn */
+	uint32_t asconf_seq_out;
+	uint32_t asconf_seq_out_acked;
+	/* ASCONF last received ASCONF from peer, starts at peer's TSN-1 */
+	uint32_t asconf_seq_in;
+
+	/* next seq I am sending in str reset messages */
+	uint32_t str_reset_seq_out;
+	/* next seq I am expecting in str reset messages */
+	uint32_t str_reset_seq_in;
+
+	/* various verification tag information */
+	uint32_t my_vtag;	/* The tag to be used. if assoc is re-initited
+				 * by remote end, and I have unlocked this
+				 * will be regenerated to a new random value. */
+	uint32_t peer_vtag;	/* The peers last tag */
+
+	uint32_t my_vtag_nonce;
+	uint32_t peer_vtag_nonce;
+
+	uint32_t assoc_id;
+
+	/* This is the SCTP fragmentation threshold */
+	uint32_t smallest_mtu;
+
+	/*
+	 * Special hook for Fast retransmit, allows us to track the highest
+	 * TSN that is NEW in this SACK if gap ack blocks are present.
+	 */
+	uint32_t this_sack_highest_gap;
+
+	/*
+	 * The highest consecutive TSN that has been acked by peer on my
+	 * sends
+	 */
+	uint32_t last_acked_seq;
+
+	/* The next TSN that I will use in sending. */
+	uint32_t sending_seq;
+
+	/* Original seq number I used ??questionable to keep?? */
+	uint32_t init_seq_number;
+
+
+	/* The Advanced Peer Ack Point, as required by the PR-SCTP */
+	/* (A1 in Section 4.2) */
+	uint32_t advanced_peer_ack_point;
+
+	/*
+	 * The highest consequetive TSN at the bottom of the mapping array
+	 * (for his sends).
+	 */
+	uint32_t cumulative_tsn;
+	/*
+	 * Used to track the mapping array and its offset bits. This MAY be
+	 * lower then cumulative_tsn.
+	 */
+	uint32_t mapping_array_base_tsn;
+	/*
+	 * used to track highest TSN we have received and is listed in the
+	 * mapping array.
+	 */
+	uint32_t highest_tsn_inside_map;
+
+	/* EY - new NR variables used for nr_sack based on mapping_array */
+	uint8_t *nr_mapping_array;
+	uint32_t highest_tsn_inside_nr_map;
+
+	uint32_t last_echo_tsn;
+	uint32_t last_cwr_tsn;
+	uint32_t fast_recovery_tsn;
+	uint32_t sat_t3_recovery_tsn;
+	uint32_t tsn_last_delivered;
+	/*
+	 * For the pd-api we should re-write this a bit more efficent. We
+	 * could have multiple sctp_queued_to_read's that we are building at
+	 * once. Now we only do this when we get ready to deliver to the
+	 * socket buffer. Note that we depend on the fact that the struct is
+	 * "stuck" on the read queue until we finish all the pd-api.
+	 */
+	struct sctp_queued_to_read *control_pdapi;
+
+	uint32_t tsn_of_pdapi_last_delivered;
+	uint32_t pdapi_ppid;
+	uint32_t context;
+	uint32_t last_reset_action[SCTP_MAX_RESET_PARAMS];
+	uint32_t last_sending_seq[SCTP_MAX_RESET_PARAMS];
+	uint32_t last_base_tsnsent[SCTP_MAX_RESET_PARAMS];
+#ifdef SCTP_ASOCLOG_OF_TSNS
+	/*
+	 * special log  - This adds considerable size to the asoc, but
+	 * provides a log that you can use to detect problems via kgdb.
+	 */
+	struct sctp_tsn_log in_tsnlog[SCTP_TSN_LOG_SIZE];
+	struct sctp_tsn_log out_tsnlog[SCTP_TSN_LOG_SIZE];
+	uint32_t cumack_log[SCTP_TSN_LOG_SIZE];
+	uint32_t cumack_logsnt[SCTP_TSN_LOG_SIZE];
+	uint16_t tsn_in_at;
+	uint16_t tsn_out_at;
+	uint16_t tsn_in_wrapped;
+	uint16_t tsn_out_wrapped;
+	uint16_t cumack_log_at;
+	uint16_t cumack_log_atsnt;
+#endif				/* SCTP_ASOCLOG_OF_TSNS */
+#ifdef SCTP_FS_SPEC_LOG
+	struct sctp_fs_spec_log fslog[SCTP_FS_SPEC_LOG_SIZE];
+	uint16_t fs_index;
+#endif
+
+	/*
+	 * window state information and smallest MTU that I use to bound
+	 * segmentation
+	 */
+	uint32_t peers_rwnd;
+	uint32_t my_rwnd;
+	uint32_t my_last_reported_rwnd;
+	uint32_t sctp_frag_point;
+
+	uint32_t total_output_queue_size;
+
+	uint32_t sb_cc;		/* shadow of sb_cc */
+	uint32_t sb_send_resv;	/* amount reserved on a send */
+	uint32_t my_rwnd_control_len;	/* shadow of sb_mbcnt used for rwnd
+					 * control */
+	/* 32 bit nonce stuff */
+	uint32_t nonce_resync_tsn;
+	uint32_t nonce_wait_tsn;
+	uint32_t default_flowlabel;
+	uint32_t pr_sctp_cnt;
+	int ctrl_queue_cnt;	/* could be removed  REM */
+	/*
+	 * All outbound datagrams queue into this list from the individual
+	 * stream queue. Here they get assigned a TSN and then await
+	 * sending. The stream seq comes when it is first put in the
+	 * individual str queue
+	 */
+	unsigned int stream_queue_cnt;
+	unsigned int send_queue_cnt;
+	unsigned int sent_queue_cnt;
+	unsigned int sent_queue_cnt_removeable;
+	/*
+	 * Number on sent queue that are marked for retran until this value
+	 * is 0 we only send one packet of retran'ed data.
+	 */
+	unsigned int sent_queue_retran_cnt;
+
+	unsigned int size_on_reasm_queue;
+	unsigned int cnt_on_reasm_queue;
+	unsigned int fwd_tsn_cnt;
+	/* amount of data (bytes) currently in flight (on all destinations) */
+	unsigned int total_flight;
+	/* Total book size in flight */
+	unsigned int total_flight_count;	/* count of chunks used with
+						 * book total */
+	/* count of destinaton nets and list of destination nets */
+	unsigned int numnets;
+
+	/* Total error count on this association */
+	unsigned int overall_error_count;
+
+	unsigned int cnt_msg_on_sb;
+
+	/* All stream count of chunks for delivery */
+	unsigned int size_on_all_streams;
+	unsigned int cnt_on_all_streams;
+
+	/* Heart Beat delay in ticks */
+	unsigned int heart_beat_delay;
+
+	/* autoclose */
+	unsigned int sctp_autoclose_ticks;
+
+	/* how many preopen streams we have */
+	unsigned int pre_open_streams;
+
+	/* How many streams I support coming into me */
+	unsigned int max_inbound_streams;
+
+	/* the cookie life I award for any cookie, in seconds */
+	unsigned int cookie_life;
+	/* time to delay acks for */
+	unsigned int delayed_ack;
+	unsigned int old_delayed_ack;
+	unsigned int sack_freq;
+	unsigned int data_pkts_seen;
+
+	unsigned int numduptsns;
+	int dup_tsns[SCTP_MAX_DUP_TSNS];
+	unsigned int initial_init_rto_max;	/* initial RTO for INIT's */
+	unsigned int initial_rto;	/* initial send RTO */
+	unsigned int minrto;	/* per assoc RTO-MIN */
+	unsigned int maxrto;	/* per assoc RTO-MAX */
+
+	/* authentication fields */
+	sctp_auth_chklist_t *local_auth_chunks;
+	sctp_auth_chklist_t *peer_auth_chunks;
+	sctp_hmaclist_t *local_hmacs;	/* local HMACs supported */
+	sctp_hmaclist_t *peer_hmacs;	/* peer HMACs supported */
+	struct sctp_keyhead shared_keys;	/* assoc's shared keys */
+	sctp_authinfo_t authinfo;	/* randoms, cached keys */
+	/*
+	 * refcnt to block freeing when a sender or receiver is off coping
+	 * user data in.
+	 */
+	uint32_t refcnt;
+	uint32_t chunks_on_out_queue;	/* total chunks floating around,
+					 * locked by send socket buffer */
+	uint32_t peers_adaptation;
+	uint16_t peer_hmac_id;	/* peer HMAC id to send */
+
+	/*
+	 * Being that we have no bag to collect stale cookies, and that we
+	 * really would not want to anyway.. we will count them in this
+	 * counter. We of course feed them to the pigeons right away (I have
+	 * always thought of pigeons as flying rats).
+	 */
+	uint16_t stale_cookie_count;
+
+	/*
+	 * For the partial delivery API, if up, invoked this is what last
+	 * TSN I delivered
+	 */
+	uint16_t str_of_pdapi;
+	uint16_t ssn_of_pdapi;
+
+	/* counts of actual built streams. Allocation may be more however */
+	/* could re-arrange to optimize space here. */
+	uint16_t streamincnt;
+	uint16_t streamoutcnt;
+	uint16_t strm_realoutsize;
+	/* my maximum number of retrans of INIT and SEND */
+	/* copied from SCTP but should be individually setable */
+	uint16_t max_init_times;
+	uint16_t max_send_times;
+
+	uint16_t def_net_failure;
+
+	/*
+	 * lock flag: 0 is ok to send, 1+ (duals as a retran count) is
+	 * awaiting ACK
+	 */
+	uint16_t mapping_array_size;
+
+	uint16_t last_strm_seq_delivered;
+	uint16_t last_strm_no_delivered;
+
+	uint16_t last_revoke_count;
+	int16_t num_send_timers_up;
+
+	uint16_t stream_locked_on;
+	uint16_t ecn_echo_cnt_onq;
+
+	uint16_t free_chunk_cnt;
+
+	uint8_t stream_locked;
+	uint8_t authenticated;	/* packet authenticated ok */
+	/*
+	 * This flag indicates that a SACK need to be sent. Initially this
+	 * is 1 to send the first sACK immediately.
+	 */
+	uint8_t send_sack;
+
+	/* max burst after fast retransmit completes */
+	uint8_t max_burst;
+
+	uint8_t sat_network;	/* RTT is in range of sat net or greater */
+	uint8_t sat_network_lockout;	/* lockout code */
+	uint8_t burst_limit_applied;	/* Burst limit in effect at last send? */
+	/* flag goes on when we are doing a partial delivery api */
+	uint8_t hb_random_values[4];
+	uint8_t fragmented_delivery_inprogress;
+	uint8_t fragment_flags;
+	uint8_t last_flags_delivered;
+	uint8_t hb_ect_randombit;
+	uint8_t hb_random_idx;
+	uint8_t hb_is_disabled;	/* is the hb disabled? */
+	uint8_t default_tos;
+	uint8_t asconf_del_pending;	/* asconf delete last addr pending */
+
+	/* ECN Nonce stuff */
+	uint8_t receiver_nonce_sum;	/* nonce I sum and put in my sack */
+	uint8_t ecn_nonce_allowed;	/* Tells us if ECN nonce is on */
+	uint8_t nonce_sum_check;/* On off switch used during re-sync */
+	uint8_t nonce_wait_for_ecne;	/* flag when we expect a ECN */
+	uint8_t peer_supports_ecn_nonce;
+
+	/*
+	 * This value, plus all other ack'd but above cum-ack is added
+	 * together to cross check against the bit that we have yet to
+	 * define (probably in the SACK). When the cum-ack is updated, this
+	 * sum is updated as well.
+	 */
+	uint8_t nonce_sum_expect_base;
+	/* Flag to tell if ECN is allowed */
+	uint8_t ecn_allowed;
+
+	/* flag to indicate if peer can do asconf */
+	uint8_t peer_supports_asconf;
+	/* EY - flag to indicate if peer can do nr_sack */
+	uint8_t peer_supports_nr_sack;
+	/* pr-sctp support flag */
+	uint8_t peer_supports_prsctp;
+	/* peer authentication support flag */
+	uint8_t peer_supports_auth;
+	/* stream resets are supported by the peer */
+	uint8_t peer_supports_strreset;
+
+	uint8_t peer_supports_nat;
+	/*
+	 * packet drop's are supported by the peer, we don't really care
+	 * about this but we bookkeep it anyway.
+	 */
+	uint8_t peer_supports_pktdrop;
+
+	/* Do we allow V6/V4? */
+	uint8_t ipv4_addr_legal;
+	uint8_t ipv6_addr_legal;
+	/* Address scoping flags */
+	/* scope value for IPv4 */
+	uint8_t ipv4_local_scope;
+	/* scope values for IPv6 */
+	uint8_t local_scope;
+	uint8_t site_scope;
+	/* loopback scope */
+	uint8_t loopback_scope;
+	/* flags to handle send alternate net tracking */
+	uint8_t used_alt_onsack;
+	uint8_t used_alt_asconfack;
+	uint8_t fast_retran_loss_recovery;
+	uint8_t sat_t3_loss_recovery;
+	uint8_t dropped_special_cnt;
+	uint8_t seen_a_sack_this_pkt;
+	uint8_t stream_reset_outstanding;
+	uint8_t stream_reset_out_is_outstanding;
+	uint8_t delayed_connection;
+	uint8_t ifp_had_enobuf;
+	uint8_t saw_sack_with_frags;
+	uint8_t saw_sack_with_nr_frags;
+	uint8_t in_asocid_hash;
+	uint8_t assoc_up_sent;
+	uint8_t adaptation_needed;
+	uint8_t adaptation_sent;
+	/* CMT variables */
+	uint8_t cmt_dac_pkts_rcvd;
+	uint8_t sctp_cmt_on_off;
+	uint8_t iam_blocking;
+	uint8_t cookie_how[8];
+	/* EY 05/05/08 - NR_SACK variable */
+	uint8_t sctp_nr_sack_on_off;
+	/* JRS 5/21/07 - CMT PF variable */
+	uint8_t sctp_cmt_pf;
+	/*
+	 * The mapping array is used to track out of order sequences above
+	 * last_acked_seq. 0 indicates packet missing 1 indicates packet
+	 * rec'd. We slide it up every time we raise last_acked_seq and 0
+	 * trailing locactions out.  If I get a TSN above the array
+	 * mappingArraySz, I discard the datagram and let retransmit happen.
+	 */
+	uint32_t marked_retrans;
+	uint32_t timoinit;
+	uint32_t timodata;
+	uint32_t timosack;
+	uint32_t timoshutdown;
+	uint32_t timoheartbeat;
+	uint32_t timocookie;
+	uint32_t timoshutdownack;
+	struct timeval start_time;
+	struct timeval discontinuity_time;
+};
+
+#endif
diff --git a/freebsd/sys/netinet/sctp_sysctl.c b/freebsd/sys/netinet/sctp_sysctl.c
new file mode 100644
index 00000000..b5700e4e
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_sysctl.c
@@ -0,0 +1,1108 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/netinet/sctp_os.h>
+#include <freebsd/netinet/sctp.h>
+#include <freebsd/netinet/sctp_constants.h>
+#include <freebsd/netinet/sctp_sysctl.h>
+#include <freebsd/netinet/sctp_pcb.h>
+#include <freebsd/netinet/sctputil.h>
+#include <freebsd/netinet/sctp_output.h>
+#include <freebsd/sys/smp.h>
+
+/*
+ * sysctl tunable variables
+ */
+
+void
+sctp_init_sysctls()
+{
+	SCTP_BASE_SYSCTL(sctp_sendspace) = SCTPCTL_MAXDGRAM_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_recvspace) = SCTPCTL_RECVSPACE_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_auto_asconf) = SCTPCTL_AUTOASCONF_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_multiple_asconfs) = SCTPCTL_MULTIPLEASCONFS_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_ecn_enable) = SCTPCTL_ECN_ENABLE_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_ecn_nonce) = SCTPCTL_ECN_NONCE_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_strict_sacks) = SCTPCTL_STRICT_SACKS_DEFAULT;
+#if !defined(SCTP_WITH_NO_CSUM)
+	SCTP_BASE_SYSCTL(sctp_no_csum_on_loopback) = SCTPCTL_LOOPBACK_NOCSUM_DEFAULT;
+#endif
+	SCTP_BASE_SYSCTL(sctp_strict_init) = SCTPCTL_STRICT_INIT_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_peer_chunk_oh) = SCTPCTL_PEER_CHKOH_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_max_burst_default) = SCTPCTL_MAXBURST_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue) = SCTPCTL_MAXCHUNKS_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_hashtblsize) = SCTPCTL_TCBHASHSIZE_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_pcbtblsize) = SCTPCTL_PCBHASHSIZE_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_min_split_point) = SCTPCTL_MIN_SPLIT_POINT_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_chunkscale) = SCTPCTL_CHUNKSCALE_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_delayed_sack_time_default) = SCTPCTL_DELAYED_SACK_TIME_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_sack_freq_default) = SCTPCTL_SACK_FREQ_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_system_free_resc_limit) = SCTPCTL_SYS_RESOURCE_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_asoc_free_resc_limit) = SCTPCTL_ASOC_RESOURCE_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_heartbeat_interval_default) = SCTPCTL_HEARTBEAT_INTERVAL_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_pmtu_raise_time_default) = SCTPCTL_PMTU_RAISE_TIME_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_shutdown_guard_time_default) = SCTPCTL_SHUTDOWN_GUARD_TIME_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_secret_lifetime_default) = SCTPCTL_SECRET_LIFETIME_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_rto_max_default) = SCTPCTL_RTO_MAX_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_rto_min_default) = SCTPCTL_RTO_MIN_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_rto_initial_default) = SCTPCTL_RTO_INITIAL_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_init_rto_max_default) = SCTPCTL_INIT_RTO_MAX_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_valid_cookie_life_default) = SCTPCTL_VALID_COOKIE_LIFE_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_init_rtx_max_default) = SCTPCTL_INIT_RTX_MAX_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_assoc_rtx_max_default) = SCTPCTL_ASSOC_RTX_MAX_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_path_rtx_max_default) = SCTPCTL_PATH_RTX_MAX_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_add_more_threshold) = SCTPCTL_ADD_MORE_ON_OUTPUT_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default) = SCTPCTL_OUTGOING_STREAMS_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_cmt_on_off) = SCTPCTL_CMT_ON_OFF_DEFAULT;
+	/* EY */
+	SCTP_BASE_SYSCTL(sctp_nr_sack_on_off) = SCTPCTL_NR_SACK_ON_OFF_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_cmt_use_dac) = SCTPCTL_CMT_USE_DAC_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_cmt_pf) = SCTPCTL_CMT_PF_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst) = SCTPCTL_CWND_MAXBURST_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_early_fr) = SCTPCTL_EARLY_FAST_RETRAN_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_early_fr_msec) = SCTPCTL_EARLY_FAST_RETRAN_MSEC_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk) = SCTPCTL_ASCONF_AUTH_NOCHK_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_auth_disable) = SCTPCTL_AUTH_DISABLE_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_nat_friendly) = SCTPCTL_NAT_FRIENDLY_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_L2_abc_variable) = SCTPCTL_ABC_L_VAR_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_mbuf_threshold_count) = SCTPCTL_MAX_CHAINED_MBUFS_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_do_drain) = SCTPCTL_DO_SCTP_DRAIN_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_hb_maxburst) = SCTPCTL_HB_MAX_BURST_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit) = SCTPCTL_ABORT_AT_LIMIT_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_strict_data_order) = SCTPCTL_STRICT_DATA_ORDER_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_min_residual) = SCTPCTL_MIN_RESIDUAL_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_max_retran_chunk) = SCTPCTL_MAX_RETRAN_CHUNK_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_logging_level) = SCTPCTL_LOGGING_LEVEL_DEFAULT;
+	/* JRS - Variable for default congestion control module */
+	SCTP_BASE_SYSCTL(sctp_default_cc_module) = SCTPCTL_DEFAULT_CC_MODULE_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_default_frag_interleave) = SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_mobility_base) = SCTPCTL_MOBILITY_BASE_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_mobility_fasthandoff) = SCTPCTL_MOBILITY_FASTHANDOFF_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_vtag_time_wait) = SCTPCTL_TIME_WAIT_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_buffer_splitting) = SCTPCTL_BUFFER_SPLITTING_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_initial_cwnd) = SCTPCTL_INITIAL_CWND_DEFAULT;
+#if defined(SCTP_LOCAL_TRACE_BUF)
+	memset(&SCTP_BASE_SYSCTL(sctp_log), 0, sizeof(struct sctp_log));
+#endif
+	SCTP_BASE_SYSCTL(sctp_udp_tunneling_for_client_enable) = SCTPCTL_UDP_TUNNELING_FOR_CLIENT_ENABLE_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_udp_tunneling_port) = SCTPCTL_UDP_TUNNELING_PORT_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_enable_sack_immediately) = SCTPCTL_SACK_IMMEDIATELY_ENABLE_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly) = SCTPCTL_NAT_FRIENDLY_INITS_DEFAULT;
+#if defined(SCTP_DEBUG)
+	SCTP_BASE_SYSCTL(sctp_debug_on) = SCTPCTL_DEBUG_DEFAULT;
+#endif
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	SCTP_BASE_SYSCTL(sctp_output_unlocked) = SCTPCTL_OUTPUT_UNLOCKED_DEFAULT;
+#endif
+}
+
+
+/* It returns an upper limit. No filtering is done here */
+static unsigned int
+number_of_addresses(struct sctp_inpcb *inp)
+{
+	int cnt;
+	struct sctp_vrf *vrf;
+	struct sctp_ifn *sctp_ifn;
+	struct sctp_ifa *sctp_ifa;
+	struct sctp_laddr *laddr;
+
+	cnt = 0;
+	/* neither Mac OS X nor FreeBSD support mulitple routing functions */
+	if ((vrf = sctp_find_vrf(inp->def_vrf_id)) == NULL) {
+		return (0);
+	}
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+			LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+				if ((sctp_ifa->address.sa.sa_family == AF_INET) ||
+				    (sctp_ifa->address.sa.sa_family == AF_INET6)) {
+					cnt++;
+				}
+			}
+		}
+	} else {
+		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+			if ((laddr->ifa->address.sa.sa_family == AF_INET) ||
+			    (laddr->ifa->address.sa.sa_family == AF_INET6)) {
+				cnt++;
+			}
+		}
+	}
+	return (cnt);
+}
+
+static int
+copy_out_local_addresses(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sysctl_req *req)
+{
+	struct sctp_ifn *sctp_ifn;
+	struct sctp_ifa *sctp_ifa;
+	int loopback_scope, ipv4_local_scope, local_scope, site_scope;
+	int ipv4_addr_legal, ipv6_addr_legal;
+	struct sctp_vrf *vrf;
+	struct xsctp_laddr xladdr;
+	struct sctp_laddr *laddr;
+	int error;
+
+	/* Turn on all the appropriate scope */
+	if (stcb) {
+		/* use association specific values */
+		loopback_scope = stcb->asoc.loopback_scope;
+		ipv4_local_scope = stcb->asoc.ipv4_local_scope;
+		local_scope = stcb->asoc.local_scope;
+		site_scope = stcb->asoc.site_scope;
+	} else {
+		/* use generic values for endpoints */
+		loopback_scope = 1;
+		ipv4_local_scope = 1;
+		local_scope = 1;
+		site_scope = 1;
+	}
+
+	/* use only address families of interest */
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+		ipv6_addr_legal = 1;
+		if (SCTP_IPV6_V6ONLY(inp)) {
+			ipv4_addr_legal = 0;
+		} else {
+			ipv4_addr_legal = 1;
+		}
+	} else {
+		ipv4_addr_legal = 1;
+		ipv6_addr_legal = 0;
+	}
+
+	/* neither Mac OS X nor FreeBSD support mulitple routing functions */
+	if ((vrf = sctp_find_vrf(inp->def_vrf_id)) == NULL) {
+		SCTP_INP_RUNLOCK(inp);
+		SCTP_INP_INFO_RUNLOCK();
+		return (-1);
+	}
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+			if ((loopback_scope == 0) && SCTP_IFN_IS_IFT_LOOP(sctp_ifn))
+				/* Skip loopback if loopback_scope not set */
+				continue;
+			LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+				if (stcb) {
+					/*
+					 * ignore if blacklisted at
+					 * association level
+					 */
+					if (sctp_is_addr_restricted(stcb, sctp_ifa))
+						continue;
+				}
+				switch (sctp_ifa->address.sa.sa_family) {
+				case AF_INET:
+					if (ipv4_addr_legal) {
+						struct sockaddr_in *sin;
+
+						sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+						if (sin->sin_addr.s_addr == 0)
+							continue;
+						if ((ipv4_local_scope == 0) && (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)))
+							continue;
+					} else {
+						continue;
+					}
+					break;
+#ifdef INET6
+				case AF_INET6:
+					if (ipv6_addr_legal) {
+						struct sockaddr_in6 *sin6;
+
+						sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+						if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+							continue;
+						if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+							if (local_scope == 0)
+								continue;
+							if (sin6->sin6_scope_id == 0) {
+								/*
+								 * bad link
+								 * local
+								 * address
+								 */
+								if (sa6_recoverscope(sin6) != 0)
+									continue;
+							}
+						}
+						if ((site_scope == 0) && (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)))
+							continue;
+					} else {
+						continue;
+					}
+					break;
+#endif
+				default:
+					continue;
+				}
+				memset((void *)&xladdr, 0, sizeof(struct xsctp_laddr));
+				memcpy((void *)&xladdr.address, (const void *)&sctp_ifa->address, sizeof(union sctp_sockstore));
+				SCTP_INP_RUNLOCK(inp);
+				SCTP_INP_INFO_RUNLOCK();
+				error = SYSCTL_OUT(req, &xladdr, sizeof(struct xsctp_laddr));
+				if (error) {
+					return (error);
+				} else {
+					SCTP_INP_INFO_RLOCK();
+					SCTP_INP_RLOCK(inp);
+				}
+			}
+		}
+	} else {
+		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+			/* ignore if blacklisted at association level */
+			if (stcb && sctp_is_addr_restricted(stcb, laddr->ifa))
+				continue;
+			memset((void *)&xladdr, 0, sizeof(struct xsctp_laddr));
+			memcpy((void *)&xladdr.address, (const void *)&laddr->ifa->address, sizeof(union sctp_sockstore));
+			xladdr.start_time.tv_sec = (uint32_t) laddr->start_time.tv_sec;
+			xladdr.start_time.tv_usec = (uint32_t) laddr->start_time.tv_usec;
+			SCTP_INP_RUNLOCK(inp);
+			SCTP_INP_INFO_RUNLOCK();
+			error = SYSCTL_OUT(req, &xladdr, sizeof(struct xsctp_laddr));
+			if (error) {
+				return (error);
+			} else {
+				SCTP_INP_INFO_RLOCK();
+				SCTP_INP_RLOCK(inp);
+			}
+		}
+	}
+	memset((void *)&xladdr, 0, sizeof(struct xsctp_laddr));
+	xladdr.last = 1;
+	SCTP_INP_RUNLOCK(inp);
+	SCTP_INP_INFO_RUNLOCK();
+	error = SYSCTL_OUT(req, &xladdr, sizeof(struct xsctp_laddr));
+
+	if (error) {
+		return (error);
+	} else {
+		SCTP_INP_INFO_RLOCK();
+		SCTP_INP_RLOCK(inp);
+		return (0);
+	}
+}
+
+/*
+ * sysctl functions
+ */
+static int
+sctp_assoclist(SYSCTL_HANDLER_ARGS)
+{
+	unsigned int number_of_endpoints;
+	unsigned int number_of_local_addresses;
+	unsigned int number_of_associations;
+	unsigned int number_of_remote_addresses;
+	unsigned int n;
+	int error;
+	struct sctp_inpcb *inp;
+	struct sctp_tcb *stcb;
+	struct sctp_nets *net;
+	struct xsctp_inpcb xinpcb;
+	struct xsctp_tcb xstcb;
+	struct xsctp_raddr xraddr;
+	struct socket *so;
+
+	number_of_endpoints = 0;
+	number_of_local_addresses = 0;
+	number_of_associations = 0;
+	number_of_remote_addresses = 0;
+
+	SCTP_INP_INFO_RLOCK();
+	if (req->oldptr == USER_ADDR_NULL) {
+		LIST_FOREACH(inp, &SCTP_BASE_INFO(listhead), sctp_list) {
+			SCTP_INP_RLOCK(inp);
+			number_of_endpoints++;
+			number_of_local_addresses += number_of_addresses(inp);
+			LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+				number_of_associations++;
+				number_of_local_addresses += number_of_addresses(inp);
+				TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+					number_of_remote_addresses++;
+				}
+			}
+			SCTP_INP_RUNLOCK(inp);
+		}
+		SCTP_INP_INFO_RUNLOCK();
+		n = (number_of_endpoints + 1) * sizeof(struct xsctp_inpcb) +
+		    (number_of_local_addresses + number_of_endpoints + number_of_associations) * sizeof(struct xsctp_laddr) +
+		    (number_of_associations + number_of_endpoints) * sizeof(struct xsctp_tcb) +
+		    (number_of_remote_addresses + number_of_associations) * sizeof(struct xsctp_raddr);
+
+		/* request some more memory than needed */
+		req->oldidx = (n + n / 8);
+		return 0;
+	}
+	if (req->newptr != USER_ADDR_NULL) {
+		SCTP_INP_INFO_RUNLOCK();
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_SYSCTL, EPERM);
+		return EPERM;
+	}
+	LIST_FOREACH(inp, &SCTP_BASE_INFO(listhead), sctp_list) {
+		SCTP_INP_RLOCK(inp);
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+			/* if its allgone it is being freed - skip it  */
+			goto skip;
+		}
+		xinpcb.last = 0;
+		xinpcb.local_port = ntohs(inp->sctp_lport);
+		xinpcb.flags = inp->sctp_flags;
+		xinpcb.features = inp->sctp_features;
+		xinpcb.total_sends = inp->total_sends;
+		xinpcb.total_recvs = inp->total_recvs;
+		xinpcb.total_nospaces = inp->total_nospaces;
+		xinpcb.fragmentation_point = inp->sctp_frag_point;
+		so = inp->sctp_socket;
+		if ((so == NULL) ||
+		    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
+			xinpcb.qlen = 0;
+			xinpcb.maxqlen = 0;
+		} else {
+			xinpcb.qlen = so->so_qlen;
+			xinpcb.maxqlen = so->so_qlimit;
+		}
+		SCTP_INP_INCR_REF(inp);
+		SCTP_INP_RUNLOCK(inp);
+		SCTP_INP_INFO_RUNLOCK();
+		error = SYSCTL_OUT(req, &xinpcb, sizeof(struct xsctp_inpcb));
+		if (error) {
+			SCTP_INP_DECR_REF(inp);
+			return error;
+		}
+		SCTP_INP_INFO_RLOCK();
+		SCTP_INP_RLOCK(inp);
+		error = copy_out_local_addresses(inp, NULL, req);
+		if (error) {
+			SCTP_INP_DECR_REF(inp);
+			return error;
+		}
+		LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+			SCTP_TCB_LOCK(stcb);
+			atomic_add_int(&stcb->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK(stcb);
+			xstcb.last = 0;
+			xstcb.local_port = ntohs(inp->sctp_lport);
+			xstcb.remote_port = ntohs(stcb->rport);
+			if (stcb->asoc.primary_destination != NULL)
+				xstcb.primary_addr = stcb->asoc.primary_destination->ro._l_addr;
+			xstcb.heartbeat_interval = stcb->asoc.heart_beat_delay;
+			xstcb.state = SCTP_GET_STATE(&stcb->asoc);	/* FIXME */
+			/* 7.0 does not support these */
+			xstcb.assoc_id = sctp_get_associd(stcb);
+			xstcb.peers_rwnd = stcb->asoc.peers_rwnd;
+			xstcb.in_streams = stcb->asoc.streamincnt;
+			xstcb.out_streams = stcb->asoc.streamoutcnt;
+			xstcb.max_nr_retrans = stcb->asoc.overall_error_count;
+			xstcb.primary_process = 0;	/* not really supported
+							 * yet */
+			xstcb.T1_expireries = stcb->asoc.timoinit + stcb->asoc.timocookie;
+			xstcb.T2_expireries = stcb->asoc.timoshutdown + stcb->asoc.timoshutdownack;
+			xstcb.retransmitted_tsns = stcb->asoc.marked_retrans;
+			xstcb.start_time.tv_sec = (uint32_t) stcb->asoc.start_time.tv_sec;
+			xstcb.start_time.tv_usec = (uint32_t) stcb->asoc.start_time.tv_usec;
+			xstcb.discontinuity_time.tv_sec = (uint32_t) stcb->asoc.discontinuity_time.tv_sec;
+			xstcb.discontinuity_time.tv_usec = (uint32_t) stcb->asoc.discontinuity_time.tv_usec;
+			xstcb.total_sends = stcb->total_sends;
+			xstcb.total_recvs = stcb->total_recvs;
+			xstcb.local_tag = stcb->asoc.my_vtag;
+			xstcb.remote_tag = stcb->asoc.peer_vtag;
+			xstcb.initial_tsn = stcb->asoc.init_seq_number;
+			xstcb.highest_tsn = stcb->asoc.sending_seq - 1;
+			xstcb.cumulative_tsn = stcb->asoc.last_acked_seq;
+			xstcb.cumulative_tsn_ack = stcb->asoc.cumulative_tsn;
+			xstcb.mtu = stcb->asoc.smallest_mtu;
+			xstcb.refcnt = stcb->asoc.refcnt;
+			SCTP_INP_RUNLOCK(inp);
+			SCTP_INP_INFO_RUNLOCK();
+			error = SYSCTL_OUT(req, &xstcb, sizeof(struct xsctp_tcb));
+			if (error) {
+				SCTP_INP_DECR_REF(inp);
+				atomic_subtract_int(&stcb->asoc.refcnt, 1);
+				return error;
+			}
+			SCTP_INP_INFO_RLOCK();
+			SCTP_INP_RLOCK(inp);
+			error = copy_out_local_addresses(inp, stcb, req);
+			if (error) {
+				SCTP_INP_DECR_REF(inp);
+				atomic_subtract_int(&stcb->asoc.refcnt, 1);
+				return error;
+			}
+			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+				xraddr.last = 0;
+				xraddr.address = net->ro._l_addr;
+				xraddr.active = ((net->dest_state & SCTP_ADDR_REACHABLE) == SCTP_ADDR_REACHABLE);
+				xraddr.confirmed = ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0);
+				xraddr.heartbeat_enabled = ((net->dest_state & SCTP_ADDR_NOHB) == 0);
+				xraddr.rto = net->RTO;
+				xraddr.max_path_rtx = net->failure_threshold;
+				xraddr.rtx = net->marked_retrans;
+				xraddr.error_counter = net->error_count;
+				xraddr.cwnd = net->cwnd;
+				xraddr.flight_size = net->flight_size;
+				xraddr.mtu = net->mtu;
+				xraddr.rtt = net->rtt;
+				xraddr.start_time.tv_sec = (uint32_t) net->start_time.tv_sec;
+				xraddr.start_time.tv_usec = (uint32_t) net->start_time.tv_usec;
+				SCTP_INP_RUNLOCK(inp);
+				SCTP_INP_INFO_RUNLOCK();
+				error = SYSCTL_OUT(req, &xraddr, sizeof(struct xsctp_raddr));
+				if (error) {
+					SCTP_INP_DECR_REF(inp);
+					atomic_subtract_int(&stcb->asoc.refcnt, 1);
+					return error;
+				}
+				SCTP_INP_INFO_RLOCK();
+				SCTP_INP_RLOCK(inp);
+			}
+			atomic_subtract_int(&stcb->asoc.refcnt, 1);
+			memset((void *)&xraddr, 0, sizeof(struct xsctp_raddr));
+			xraddr.last = 1;
+			SCTP_INP_RUNLOCK(inp);
+			SCTP_INP_INFO_RUNLOCK();
+			error = SYSCTL_OUT(req, &xraddr, sizeof(struct xsctp_raddr));
+			if (error) {
+				SCTP_INP_DECR_REF(inp);
+				return error;
+			}
+			SCTP_INP_INFO_RLOCK();
+			SCTP_INP_RLOCK(inp);
+		}
+		SCTP_INP_DECR_REF(inp);
+		SCTP_INP_RUNLOCK(inp);
+		SCTP_INP_INFO_RUNLOCK();
+		memset((void *)&xstcb, 0, sizeof(struct xsctp_tcb));
+		xstcb.last = 1;
+		error = SYSCTL_OUT(req, &xstcb, sizeof(struct xsctp_tcb));
+		if (error) {
+			return error;
+		}
+skip:
+		SCTP_INP_INFO_RLOCK();
+	}
+	SCTP_INP_INFO_RUNLOCK();
+
+	memset((void *)&xinpcb, 0, sizeof(struct xsctp_inpcb));
+	xinpcb.last = 1;
+	error = SYSCTL_OUT(req, &xinpcb, sizeof(struct xsctp_inpcb));
+	return error;
+}
+
+
+#define RANGECHK(var, min, max) \
+	if ((var) < (min)) { (var) = (min); } \
+	else if ((var) > (max)) { (var) = (max); }
+
+static int
+sysctl_sctp_udp_tunneling_check(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	uint32_t old_sctp_udp_tunneling_port;
+
+	SCTP_INP_INFO_RLOCK();
+	old_sctp_udp_tunneling_port = SCTP_BASE_SYSCTL(sctp_udp_tunneling_port);
+	SCTP_INP_INFO_RUNLOCK();
+	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
+	if (error == 0) {
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port), SCTPCTL_UDP_TUNNELING_PORT_MIN, SCTPCTL_UDP_TUNNELING_PORT_MAX);
+		if (old_sctp_udp_tunneling_port == SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) {
+			error = 0;
+			goto out;
+		}
+		SCTP_INP_INFO_WLOCK();
+		if (old_sctp_udp_tunneling_port) {
+			sctp_over_udp_stop();
+		}
+		if (SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) {
+			if (sctp_over_udp_start()) {
+				SCTP_BASE_SYSCTL(sctp_udp_tunneling_port) = 0;
+			}
+		}
+		SCTP_INP_INFO_WUNLOCK();
+	}
+out:
+	return (error);
+}
+
+
+static int
+sysctl_sctp_check(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+
+	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
+	if (error == 0) {
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_sendspace), SCTPCTL_MAXDGRAM_MIN, SCTPCTL_MAXDGRAM_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_recvspace), SCTPCTL_RECVSPACE_MIN, SCTPCTL_RECVSPACE_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_auto_asconf), SCTPCTL_AUTOASCONF_MIN, SCTPCTL_AUTOASCONF_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_ecn_enable), SCTPCTL_ECN_ENABLE_MIN, SCTPCTL_ECN_ENABLE_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_ecn_nonce), SCTPCTL_ECN_NONCE_MIN, SCTPCTL_ECN_NONCE_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_strict_sacks), SCTPCTL_STRICT_SACKS_MIN, SCTPCTL_STRICT_SACKS_MAX);
+#if !defined(SCTP_WITH_NO_CSUM)
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_no_csum_on_loopback), SCTPCTL_LOOPBACK_NOCSUM_MIN, SCTPCTL_LOOPBACK_NOCSUM_MAX);
+#endif
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_strict_init), SCTPCTL_STRICT_INIT_MIN, SCTPCTL_STRICT_INIT_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_peer_chunk_oh), SCTPCTL_PEER_CHKOH_MIN, SCTPCTL_PEER_CHKOH_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_max_burst_default), SCTPCTL_MAXBURST_MIN, SCTPCTL_MAXBURST_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue), SCTPCTL_MAXCHUNKS_MIN, SCTPCTL_MAXCHUNKS_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_hashtblsize), SCTPCTL_TCBHASHSIZE_MIN, SCTPCTL_TCBHASHSIZE_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_pcbtblsize), SCTPCTL_PCBHASHSIZE_MIN, SCTPCTL_PCBHASHSIZE_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_min_split_point), SCTPCTL_MIN_SPLIT_POINT_MIN, SCTPCTL_MIN_SPLIT_POINT_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_chunkscale), SCTPCTL_CHUNKSCALE_MIN, SCTPCTL_CHUNKSCALE_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_delayed_sack_time_default), SCTPCTL_DELAYED_SACK_TIME_MIN, SCTPCTL_DELAYED_SACK_TIME_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_sack_freq_default), SCTPCTL_SACK_FREQ_MIN, SCTPCTL_SACK_FREQ_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_system_free_resc_limit), SCTPCTL_SYS_RESOURCE_MIN, SCTPCTL_SYS_RESOURCE_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_asoc_free_resc_limit), SCTPCTL_ASOC_RESOURCE_MIN, SCTPCTL_ASOC_RESOURCE_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_heartbeat_interval_default), SCTPCTL_HEARTBEAT_INTERVAL_MIN, SCTPCTL_HEARTBEAT_INTERVAL_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_pmtu_raise_time_default), SCTPCTL_PMTU_RAISE_TIME_MIN, SCTPCTL_PMTU_RAISE_TIME_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_shutdown_guard_time_default), SCTPCTL_SHUTDOWN_GUARD_TIME_MIN, SCTPCTL_SHUTDOWN_GUARD_TIME_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_secret_lifetime_default), SCTPCTL_SECRET_LIFETIME_MIN, SCTPCTL_SECRET_LIFETIME_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_rto_max_default), SCTPCTL_RTO_MAX_MIN, SCTPCTL_RTO_MAX_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_rto_min_default), SCTPCTL_RTO_MIN_MIN, SCTPCTL_RTO_MIN_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_rto_initial_default), SCTPCTL_RTO_INITIAL_MIN, SCTPCTL_RTO_INITIAL_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_init_rto_max_default), SCTPCTL_INIT_RTO_MAX_MIN, SCTPCTL_INIT_RTO_MAX_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_valid_cookie_life_default), SCTPCTL_VALID_COOKIE_LIFE_MIN, SCTPCTL_VALID_COOKIE_LIFE_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_init_rtx_max_default), SCTPCTL_INIT_RTX_MAX_MIN, SCTPCTL_INIT_RTX_MAX_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_assoc_rtx_max_default), SCTPCTL_ASSOC_RTX_MAX_MIN, SCTPCTL_ASSOC_RTX_MAX_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_path_rtx_max_default), SCTPCTL_PATH_RTX_MAX_MIN, SCTPCTL_PATH_RTX_MAX_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_add_more_threshold), SCTPCTL_ADD_MORE_ON_OUTPUT_MIN, SCTPCTL_ADD_MORE_ON_OUTPUT_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default), SCTPCTL_OUTGOING_STREAMS_MIN, SCTPCTL_OUTGOING_STREAMS_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_cmt_on_off), SCTPCTL_CMT_ON_OFF_MIN, SCTPCTL_CMT_ON_OFF_MAX);
+		/* EY */
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_nr_sack_on_off), SCTPCTL_NR_SACK_ON_OFF_MIN, SCTPCTL_NR_SACK_ON_OFF_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_cmt_use_dac), SCTPCTL_CMT_USE_DAC_MIN, SCTPCTL_CMT_USE_DAC_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_cmt_pf), SCTPCTL_CMT_PF_MIN, SCTPCTL_CMT_PF_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst), SCTPCTL_CWND_MAXBURST_MIN, SCTPCTL_CWND_MAXBURST_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_early_fr), SCTPCTL_EARLY_FAST_RETRAN_MIN, SCTPCTL_EARLY_FAST_RETRAN_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_early_fr_msec), SCTPCTL_EARLY_FAST_RETRAN_MSEC_MIN, SCTPCTL_EARLY_FAST_RETRAN_MSEC_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk), SCTPCTL_ASCONF_AUTH_NOCHK_MIN, SCTPCTL_ASCONF_AUTH_NOCHK_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_auth_disable), SCTPCTL_AUTH_DISABLE_MIN, SCTPCTL_AUTH_DISABLE_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_nat_friendly), SCTPCTL_NAT_FRIENDLY_MIN, SCTPCTL_NAT_FRIENDLY_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_L2_abc_variable), SCTPCTL_ABC_L_VAR_MIN, SCTPCTL_ABC_L_VAR_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_mbuf_threshold_count), SCTPCTL_MAX_CHAINED_MBUFS_MIN, SCTPCTL_MAX_CHAINED_MBUFS_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_do_drain), SCTPCTL_DO_SCTP_DRAIN_MIN, SCTPCTL_DO_SCTP_DRAIN_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_hb_maxburst), SCTPCTL_HB_MAX_BURST_MIN, SCTPCTL_HB_MAX_BURST_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit), SCTPCTL_ABORT_AT_LIMIT_MIN, SCTPCTL_ABORT_AT_LIMIT_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_strict_data_order), SCTPCTL_STRICT_DATA_ORDER_MIN, SCTPCTL_STRICT_DATA_ORDER_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_min_residual), SCTPCTL_MIN_RESIDUAL_MIN, SCTPCTL_MIN_RESIDUAL_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_max_retran_chunk), SCTPCTL_MAX_RETRAN_CHUNK_MIN, SCTPCTL_MAX_RETRAN_CHUNK_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_logging_level), SCTPCTL_LOGGING_LEVEL_MIN, SCTPCTL_LOGGING_LEVEL_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_default_cc_module), SCTPCTL_DEFAULT_CC_MODULE_MIN, SCTPCTL_DEFAULT_CC_MODULE_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_default_frag_interleave), SCTPCTL_DEFAULT_FRAG_INTERLEAVE_MIN, SCTPCTL_DEFAULT_FRAG_INTERLEAVE_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_vtag_time_wait), SCTPCTL_TIME_WAIT_MIN, SCTPCTL_TIME_WAIT_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_buffer_splitting), SCTPCTL_BUFFER_SPLITTING_MIN, SCTPCTL_BUFFER_SPLITTING_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_initial_cwnd), SCTPCTL_INITIAL_CWND_MIN, SCTPCTL_INITIAL_CWND_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_mobility_base), SCTPCTL_MOBILITY_BASE_MIN, SCTPCTL_MOBILITY_BASE_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_mobility_fasthandoff), SCTPCTL_MOBILITY_FASTHANDOFF_MIN, SCTPCTL_MOBILITY_FASTHANDOFF_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_udp_tunneling_for_client_enable), SCTPCTL_UDP_TUNNELING_FOR_CLIENT_ENABLE_MIN, SCTPCTL_UDP_TUNNELING_FOR_CLIENT_ENABLE_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_enable_sack_immediately), SCTPCTL_SACK_IMMEDIATELY_ENABLE_MIN, SCTPCTL_SACK_IMMEDIATELY_ENABLE_MAX);
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly), SCTPCTL_NAT_FRIENDLY_INITS_MIN, SCTPCTL_NAT_FRIENDLY_INITS_MAX);
+
+#ifdef SCTP_DEBUG
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_debug_on), SCTPCTL_DEBUG_MIN, SCTPCTL_DEBUG_MAX);
+#endif
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		RANGECHK(SCTP_BASE_SYSCTL(sctp_output_unlocked), SCTPCTL_OUTPUT_UNLOCKED_MIN, SCTPCTL_OUTPUT_UNLOCKED_MAX);
+#endif
+	}
+	return (error);
+}
+
+#if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
+static int
+sysctl_stat_get(SYSCTL_HANDLER_ARGS)
+{
+	int cpu, error;
+	struct sctpstat sb, *sarry;
+
+	memset(&sb, 0, sizeof(sb));
+	for (cpu = 0; cpu < mp_ncpus; cpu++) {
+		sarry = &SCTP_BASE_STATS[cpu];
+		if (sarry->sctps_discontinuitytime.tv_sec > sb.sctps_discontinuitytime.tv_sec) {
+			sb.sctps_discontinuitytime.tv_sec = sarry->sctps_discontinuitytime.tv_sec;
+			sb.sctps_discontinuitytime.tv_usec = sarry->sctps_discontinuitytime.tv_usec;
+		}
+		sb.sctps_currestab += sarry->sctps_currestab;
+		sb.sctps_activeestab += sarry->sctps_activeestab;
+		sb.sctps_restartestab += sarry->sctps_restartestab;
+		sb.sctps_collisionestab += sarry->sctps_collisionestab;
+		sb.sctps_passiveestab += sarry->sctps_passiveestab;
+		sb.sctps_aborted += sarry->sctps_aborted;
+		sb.sctps_shutdown += sarry->sctps_shutdown;
+		sb.sctps_outoftheblue += sarry->sctps_outoftheblue;
+		sb.sctps_checksumerrors += sarry->sctps_checksumerrors;
+		sb.sctps_outcontrolchunks += sarry->sctps_outcontrolchunks;
+		sb.sctps_outorderchunks += sarry->sctps_outorderchunks;
+		sb.sctps_outunorderchunks += sarry->sctps_outunorderchunks;
+		sb.sctps_incontrolchunks += sarry->sctps_incontrolchunks;
+		sb.sctps_inorderchunks += sarry->sctps_inorderchunks;
+		sb.sctps_inunorderchunks += sarry->sctps_inunorderchunks;
+		sb.sctps_fragusrmsgs += sarry->sctps_fragusrmsgs;
+		sb.sctps_reasmusrmsgs += sarry->sctps_reasmusrmsgs;
+		sb.sctps_outpackets += sarry->sctps_outpackets;
+		sb.sctps_inpackets += sarry->sctps_inpackets;
+		sb.sctps_recvpackets += sarry->sctps_recvpackets;
+		sb.sctps_recvdatagrams += sarry->sctps_recvdatagrams;
+		sb.sctps_recvpktwithdata += sarry->sctps_recvpktwithdata;
+		sb.sctps_recvsacks += sarry->sctps_recvsacks;
+		sb.sctps_recvdata += sarry->sctps_recvdata;
+		sb.sctps_recvdupdata += sarry->sctps_recvdupdata;
+		sb.sctps_recvheartbeat += sarry->sctps_recvheartbeat;
+		sb.sctps_recvheartbeatack += sarry->sctps_recvheartbeatack;
+		sb.sctps_recvecne += sarry->sctps_recvecne;
+		sb.sctps_recvauth += sarry->sctps_recvauth;
+		sb.sctps_recvauthmissing += sarry->sctps_recvauthmissing;
+		sb.sctps_recvivalhmacid += sarry->sctps_recvivalhmacid;
+		sb.sctps_recvivalkeyid += sarry->sctps_recvivalkeyid;
+		sb.sctps_recvauthfailed += sarry->sctps_recvauthfailed;
+		sb.sctps_recvexpress += sarry->sctps_recvexpress;
+		sb.sctps_recvexpressm += sarry->sctps_recvexpressm;
+		sb.sctps_recvnocrc += sarry->sctps_recvnocrc;
+		sb.sctps_recvswcrc += sarry->sctps_recvswcrc;
+		sb.sctps_recvhwcrc += sarry->sctps_recvhwcrc;
+		sb.sctps_sendpackets += sarry->sctps_sendpackets;
+		sb.sctps_sendsacks += sarry->sctps_sendsacks;
+		sb.sctps_senddata += sarry->sctps_senddata;
+		sb.sctps_sendretransdata += sarry->sctps_sendretransdata;
+		sb.sctps_sendfastretrans += sarry->sctps_sendfastretrans;
+		sb.sctps_sendmultfastretrans += sarry->sctps_sendmultfastretrans;
+		sb.sctps_sendheartbeat += sarry->sctps_sendheartbeat;
+		sb.sctps_sendecne += sarry->sctps_sendecne;
+		sb.sctps_sendauth += sarry->sctps_sendauth;
+		sb.sctps_senderrors += sarry->sctps_senderrors;
+		sb.sctps_sendnocrc += sarry->sctps_sendnocrc;
+		sb.sctps_sendswcrc += sarry->sctps_sendswcrc;
+		sb.sctps_sendhwcrc += sarry->sctps_sendhwcrc;
+		sb.sctps_pdrpfmbox += sarry->sctps_pdrpfmbox;
+		sb.sctps_pdrpfehos += sarry->sctps_pdrpfehos;
+		sb.sctps_pdrpmbda += sarry->sctps_pdrpmbda;
+		sb.sctps_pdrpmbct += sarry->sctps_pdrpmbct;
+		sb.sctps_pdrpbwrpt += sarry->sctps_pdrpbwrpt;
+		sb.sctps_pdrpcrupt += sarry->sctps_pdrpcrupt;
+		sb.sctps_pdrpnedat += sarry->sctps_pdrpnedat;
+		sb.sctps_pdrppdbrk += sarry->sctps_pdrppdbrk;
+		sb.sctps_pdrptsnnf += sarry->sctps_pdrptsnnf;
+		sb.sctps_pdrpdnfnd += sarry->sctps_pdrpdnfnd;
+		sb.sctps_pdrpdiwnp += sarry->sctps_pdrpdiwnp;
+		sb.sctps_pdrpdizrw += sarry->sctps_pdrpdizrw;
+		sb.sctps_pdrpbadd += sarry->sctps_pdrpbadd;
+		sb.sctps_pdrpmark += sarry->sctps_pdrpmark;
+		sb.sctps_timoiterator += sarry->sctps_timoiterator;
+		sb.sctps_timodata += sarry->sctps_timodata;
+		sb.sctps_timowindowprobe += sarry->sctps_timowindowprobe;
+		sb.sctps_timoinit += sarry->sctps_timoinit;
+		sb.sctps_timosack += sarry->sctps_timosack;
+		sb.sctps_timoshutdown += sarry->sctps_timoshutdown;
+		sb.sctps_timoheartbeat += sarry->sctps_timoheartbeat;
+		sb.sctps_timocookie += sarry->sctps_timocookie;
+		sb.sctps_timosecret += sarry->sctps_timosecret;
+		sb.sctps_timopathmtu += sarry->sctps_timopathmtu;
+		sb.sctps_timoshutdownack += sarry->sctps_timoshutdownack;
+		sb.sctps_timoshutdownguard += sarry->sctps_timoshutdownguard;
+		sb.sctps_timostrmrst += sarry->sctps_timostrmrst;
+		sb.sctps_timoearlyfr += sarry->sctps_timoearlyfr;
+		sb.sctps_timoasconf += sarry->sctps_timoasconf;
+		sb.sctps_timodelprim += sarry->sctps_timodelprim;
+		sb.sctps_timoautoclose += sarry->sctps_timoautoclose;
+		sb.sctps_timoassockill += sarry->sctps_timoassockill;
+		sb.sctps_timoinpkill += sarry->sctps_timoinpkill;
+		sb.sctps_earlyfrstart += sarry->sctps_earlyfrstart;
+		sb.sctps_earlyfrstop += sarry->sctps_earlyfrstop;
+		sb.sctps_earlyfrmrkretrans += sarry->sctps_earlyfrmrkretrans;
+		sb.sctps_earlyfrstpout += sarry->sctps_earlyfrstpout;
+		sb.sctps_earlyfrstpidsck1 += sarry->sctps_earlyfrstpidsck1;
+		sb.sctps_earlyfrstpidsck2 += sarry->sctps_earlyfrstpidsck2;
+		sb.sctps_earlyfrstpidsck3 += sarry->sctps_earlyfrstpidsck3;
+		sb.sctps_earlyfrstpidsck4 += sarry->sctps_earlyfrstpidsck4;
+		sb.sctps_earlyfrstrid += sarry->sctps_earlyfrstrid;
+		sb.sctps_earlyfrstrout += sarry->sctps_earlyfrstrout;
+		sb.sctps_earlyfrstrtmr += sarry->sctps_earlyfrstrtmr;
+		sb.sctps_hdrops += sarry->sctps_hdrops;
+		sb.sctps_badsum += sarry->sctps_badsum;
+		sb.sctps_noport += sarry->sctps_noport;
+		sb.sctps_badvtag += sarry->sctps_badvtag;
+		sb.sctps_badsid += sarry->sctps_badsid;
+		sb.sctps_nomem += sarry->sctps_nomem;
+		sb.sctps_fastretransinrtt += sarry->sctps_fastretransinrtt;
+		sb.sctps_markedretrans += sarry->sctps_markedretrans;
+		sb.sctps_naglesent += sarry->sctps_naglesent;
+		sb.sctps_naglequeued += sarry->sctps_naglequeued;
+		sb.sctps_maxburstqueued += sarry->sctps_maxburstqueued;
+		sb.sctps_ifnomemqueued += sarry->sctps_ifnomemqueued;
+		sb.sctps_windowprobed += sarry->sctps_windowprobed;
+		sb.sctps_lowlevelerr += sarry->sctps_lowlevelerr;
+		sb.sctps_lowlevelerrusr += sarry->sctps_lowlevelerrusr;
+		sb.sctps_datadropchklmt += sarry->sctps_datadropchklmt;
+		sb.sctps_datadroprwnd += sarry->sctps_datadroprwnd;
+		sb.sctps_ecnereducedcwnd += sarry->sctps_ecnereducedcwnd;
+		sb.sctps_vtagexpress += sarry->sctps_vtagexpress;
+		sb.sctps_vtagbogus += sarry->sctps_vtagbogus;
+		sb.sctps_primary_randry += sarry->sctps_primary_randry;
+		sb.sctps_cmt_randry += sarry->sctps_cmt_randry;
+		sb.sctps_slowpath_sack += sarry->sctps_slowpath_sack;
+		sb.sctps_wu_sacks_sent += sarry->sctps_wu_sacks_sent;
+		sb.sctps_sends_with_flags += sarry->sctps_sends_with_flags;
+		sb.sctps_sends_with_unord += sarry->sctps_sends_with_unord;
+		sb.sctps_sends_with_eof += sarry->sctps_sends_with_eof;
+		sb.sctps_sends_with_abort += sarry->sctps_sends_with_abort;
+		sb.sctps_protocol_drain_calls += sarry->sctps_protocol_drain_calls;
+		sb.sctps_protocol_drains_done += sarry->sctps_protocol_drains_done;
+		sb.sctps_read_peeks += sarry->sctps_read_peeks;
+		sb.sctps_cached_chk += sarry->sctps_cached_chk;
+		sb.sctps_cached_strmoq += sarry->sctps_cached_strmoq;
+		sb.sctps_left_abandon += sarry->sctps_left_abandon;
+		sb.sctps_send_burst_avoid += sarry->sctps_send_burst_avoid;
+		sb.sctps_send_cwnd_avoid += sarry->sctps_send_cwnd_avoid;
+		sb.sctps_fwdtsn_map_over += sarry->sctps_fwdtsn_map_over;
+	}
+	error = SYSCTL_OUT(req, &sb, sizeof(sb));
+	return (error);
+}
+
+#endif
+
+#if defined(SCTP_LOCAL_TRACE_BUF)
+static int
+sysctl_sctp_cleartrace(SYSCTL_HANDLER_ARGS)
+{
+	int error = 0;
+
+	memset(&SCTP_BASE_SYSCTL(sctp_log), 0, sizeof(struct sctp_log));
+	return (error);
+}
+
+#endif
+
+
+/*
+ * sysctl definitions
+ */
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, sendspace, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_sendspace), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_MAXDGRAM_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, recvspace, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_recvspace), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_RECVSPACE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, auto_asconf, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_auto_asconf), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_AUTOASCONF_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, ecn_enable, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_ecn_enable), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_ECN_ENABLE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, ecn_nonce, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_ecn_nonce), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_ECN_NONCE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, strict_sacks, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_strict_sacks), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_STRICT_SACKS_DESC);
+
+#if !defined(SCTP_WITH_NO_CSUM)
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, loopback_nocsum, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_no_csum_on_loopback), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_LOOPBACK_NOCSUM_DESC);
+#endif
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, strict_init, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_strict_init), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_STRICT_INIT_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, peer_chkoh, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_peer_chunk_oh), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_PEER_CHKOH_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, maxburst, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_max_burst_default), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_MAXBURST_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, maxchunks, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_MAXCHUNKS_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, tcbhashsize, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_hashtblsize), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_TCBHASHSIZE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, pcbhashsize, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_pcbtblsize), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_PCBHASHSIZE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, min_split_point, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_min_split_point), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_MIN_SPLIT_POINT_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, chunkscale, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_chunkscale), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_CHUNKSCALE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, delayed_sack_time, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_delayed_sack_time_default), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_DELAYED_SACK_TIME_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, sack_freq, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_sack_freq_default), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_SACK_FREQ_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, sys_resource, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_system_free_resc_limit), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_SYS_RESOURCE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, asoc_resource, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_asoc_free_resc_limit), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_ASOC_RESOURCE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, heartbeat_interval, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_heartbeat_interval_default), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_HEARTBEAT_INTERVAL_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, pmtu_raise_time, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_pmtu_raise_time_default), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_PMTU_RAISE_TIME_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, shutdown_guard_time, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_shutdown_guard_time_default), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_SHUTDOWN_GUARD_TIME_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, secret_lifetime, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_secret_lifetime_default), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_SECRET_LIFETIME_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, rto_max, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_rto_max_default), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_RTO_MAX_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, rto_min, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_rto_min_default), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_RTO_MIN_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, rto_initial, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_rto_initial_default), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_RTO_INITIAL_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, init_rto_max, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_init_rto_max_default), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_INIT_RTO_MAX_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, valid_cookie_life, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_valid_cookie_life_default), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_VALID_COOKIE_LIFE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, init_rtx_max, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_init_rtx_max_default), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_INIT_RTX_MAX_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, assoc_rtx_max, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_assoc_rtx_max_default), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_ASSOC_RTX_MAX_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, path_rtx_max, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_path_rtx_max_default), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_PATH_RTX_MAX_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, add_more_on_output, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_add_more_threshold), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_ADD_MORE_ON_OUTPUT_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, outgoing_streams, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_OUTGOING_STREAMS_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, cmt_on_off, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_cmt_on_off), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_CMT_ON_OFF_DESC);
+
+/* EY */
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, nr_sack_on_off, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_nr_sack_on_off), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_NR_SACK_ON_OFF_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, cmt_use_dac, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_cmt_use_dac), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_CMT_USE_DAC_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, cmt_pf, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_cmt_pf), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_CMT_PF_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, cwnd_maxburst, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_CWND_MAXBURST_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, early_fast_retran, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_early_fr), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_EARLY_FAST_RETRAN_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, early_fast_retran_msec, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_early_fr_msec), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_EARLY_FAST_RETRAN_MSEC_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, asconf_auth_nochk, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_ASCONF_AUTH_NOCHK_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, auth_disable, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_auth_disable), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_AUTH_DISABLE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, nat_friendly, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_nat_friendly), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_NAT_FRIENDLY_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, abc_l_var, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_L2_abc_variable), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_ABC_L_VAR_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, max_chained_mbufs, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_mbuf_threshold_count), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_MAX_CHAINED_MBUFS_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, do_sctp_drain, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_do_drain), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_DO_SCTP_DRAIN_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, hb_max_burst, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_hb_maxburst), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_HB_MAX_BURST_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, abort_at_limit, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_ABORT_AT_LIMIT_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, strict_data_order, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_strict_data_order), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_STRICT_DATA_ORDER_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, min_residual, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_min_residual), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_MIN_RESIDUAL_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, max_retran_chunk, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_max_retran_chunk), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_MAX_RETRAN_CHUNK_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, log_level, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_logging_level), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_LOGGING_LEVEL_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, default_cc_module, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_default_cc_module), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_DEFAULT_CC_MODULE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, default_frag_interleave, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_default_frag_interleave), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, mobility_base, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_mobility_base), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_MOBILITY_BASE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, mobility_fasthandoff, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_mobility_fasthandoff), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_MOBILITY_FASTHANDOFF_DESC);
+
+#if defined(SCTP_LOCAL_TRACE_BUF)
+SYSCTL_STRUCT(_net_inet_sctp, OID_AUTO, log, CTLFLAG_RD,
+    &SCTP_BASE_SYSCTL(sctp_log), sctp_log,
+    "SCTP logging (struct sctp_log)");
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, clear_trace, CTLTYPE_OPAQUE | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_log), 0, sysctl_sctp_cleartrace, "IU",
+    "Clear SCTP Logging buffer");
+
+
+
+#endif
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, udp_tunneling_for_client_enable, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_udp_tunneling_for_client_enable), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_UDP_TUNNELING_FOR_CLIENT_ENABLE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, udp_tunneling_port, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_udp_tunneling_port), 0, sysctl_sctp_udp_tunneling_check, "IU",
+    SCTPCTL_UDP_TUNNELING_PORT_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, enable_sack_immediately, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_enable_sack_immediately), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_SACK_IMMEDIATELY_ENABLE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, nat_friendly_init, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_NAT_FRIENDLY_INITS_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, vtag_time_wait, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_vtag_time_wait), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_TIME_WAIT_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, buffer_splitting, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_buffer_splitting), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_BUFFER_SPLITTING_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, initial_cwnd, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_initial_cwnd), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_INITIAL_CWND_DESC);
+
+#ifdef SCTP_DEBUG
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, debug, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_debug_on), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_DEBUG_DESC);
+#endif				/* SCTP_DEBUG */
+
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, output_unlocked, CTLTYPE_INT | CTLFLAG_RW,
+    &SCTP_BASE_SYSCTL(sctp_output_unlocked), 0, sysctl_sctp_check, "IU",
+    SCTPCTL_OUTPUT_UNLOCKED_DESC);
+#endif
+#if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, stats,
+    CTLTYPE_STRUCT | CTLFLAG_RD,
+    0, 0, sysctl_stat_get, "S,sctpstat",
+    "SCTP statistics (struct sctp_stat)");
+#else
+SYSCTL_STRUCT(_net_inet_sctp, OID_AUTO, stats, CTLFLAG_RW,
+    &SCTP_BASE_STATS_SYSCTL, sctpstat,
+    "SCTP statistics (struct sctp_stat)");
+#endif
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, assoclist, CTLFLAG_RD,
+    0, 0, sctp_assoclist,
+    "S,xassoc", "List of active SCTP associations");
diff --git a/freebsd/sys/netinet/sctp_sysctl.h b/freebsd/sys/netinet/sctp_sysctl.h
new file mode 100644
index 00000000..5f7f270d
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_sysctl.h
@@ -0,0 +1,532 @@
+/*-
+ * Copyright (c) 2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifndef __sctp_sysctl_h__
+#define __sctp_sysctl_h__
+
+#include <freebsd/netinet/sctp_os.h>
+#include <freebsd/netinet/sctp_constants.h>
+
+struct sctp_sysctl {
+	uint32_t sctp_sendspace;
+	uint32_t sctp_recvspace;
+	uint32_t sctp_auto_asconf;
+	uint32_t sctp_multiple_asconfs;
+	uint32_t sctp_ecn_enable;
+	uint32_t sctp_ecn_nonce;
+	uint32_t sctp_strict_sacks;
+#if !defined(SCTP_WITH_NO_CSUM)
+	uint32_t sctp_no_csum_on_loopback;
+#endif
+	uint32_t sctp_strict_init;
+	uint32_t sctp_peer_chunk_oh;
+	uint32_t sctp_max_burst_default;
+	uint32_t sctp_max_chunks_on_queue;
+	uint32_t sctp_hashtblsize;
+	uint32_t sctp_pcbtblsize;
+	uint32_t sctp_min_split_point;
+	uint32_t sctp_chunkscale;
+	uint32_t sctp_delayed_sack_time_default;
+	uint32_t sctp_sack_freq_default;
+	uint32_t sctp_system_free_resc_limit;
+	uint32_t sctp_asoc_free_resc_limit;
+	uint32_t sctp_heartbeat_interval_default;
+	uint32_t sctp_pmtu_raise_time_default;
+	uint32_t sctp_shutdown_guard_time_default;
+	uint32_t sctp_secret_lifetime_default;
+	uint32_t sctp_rto_max_default;
+	uint32_t sctp_rto_min_default;
+	uint32_t sctp_rto_initial_default;
+	uint32_t sctp_init_rto_max_default;
+	uint32_t sctp_valid_cookie_life_default;
+	uint32_t sctp_init_rtx_max_default;
+	uint32_t sctp_assoc_rtx_max_default;
+	uint32_t sctp_path_rtx_max_default;
+	uint32_t sctp_add_more_threshold;
+	uint32_t sctp_nr_outgoing_streams_default;
+	uint32_t sctp_cmt_on_off;
+	uint32_t sctp_cmt_use_dac;
+	/* EY 5/5/08 - nr_sack flag variable */
+	uint32_t sctp_nr_sack_on_off;
+	uint32_t sctp_cmt_pf;
+	uint32_t sctp_use_cwnd_based_maxburst;
+	uint32_t sctp_early_fr;
+	uint32_t sctp_early_fr_msec;
+	uint32_t sctp_asconf_auth_nochk;
+	uint32_t sctp_auth_disable;
+	uint32_t sctp_nat_friendly;
+	uint32_t sctp_L2_abc_variable;
+	uint32_t sctp_mbuf_threshold_count;
+	uint32_t sctp_do_drain;
+	uint32_t sctp_hb_maxburst;
+	uint32_t sctp_abort_if_one_2_one_hits_limit;
+	uint32_t sctp_strict_data_order;
+	uint32_t sctp_min_residual;
+	uint32_t sctp_max_retran_chunk;
+	uint32_t sctp_logging_level;
+	/* JRS - Variable for default congestion control module */
+	uint32_t sctp_default_cc_module;
+	uint32_t sctp_default_frag_interleave;
+	uint32_t sctp_mobility_base;
+	uint32_t sctp_mobility_fasthandoff;
+	uint32_t sctp_inits_include_nat_friendly;
+#if defined(SCTP_LOCAL_TRACE_BUF)
+	struct sctp_log sctp_log;
+#endif
+	uint32_t sctp_udp_tunneling_for_client_enable;
+	uint32_t sctp_udp_tunneling_port;
+	uint32_t sctp_enable_sack_immediately;
+	uint32_t sctp_vtag_time_wait;
+	uint32_t sctp_buffer_splitting;
+	uint32_t sctp_initial_cwnd;
+#if defined(SCTP_DEBUG)
+	uint32_t sctp_debug_on;
+#endif
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	uint32_t sctp_output_unlocked;
+#endif
+};
+
+/*
+ * limits for the sysctl variables
+ */
+/* maxdgram: Maximum outgoing SCTP buffer size */
+#define SCTPCTL_MAXDGRAM_DESC		"Maximum outgoing SCTP buffer size"
+#define SCTPCTL_MAXDGRAM_MIN		0
+#define SCTPCTL_MAXDGRAM_MAX		0xFFFFFFFF
+#define SCTPCTL_MAXDGRAM_DEFAULT	262144	/* 256k */
+
+/* recvspace: Maximum incoming SCTP buffer size */
+#define SCTPCTL_RECVSPACE_DESC		"Maximum incoming SCTP buffer size"
+#define SCTPCTL_RECVSPACE_MIN		0
+#define SCTPCTL_RECVSPACE_MAX		0xFFFFFFFF
+#define SCTPCTL_RECVSPACE_DEFAULT	262144	/* 256k */
+
+/* autoasconf: Enable SCTP Auto-ASCONF */
+#define SCTPCTL_AUTOASCONF_DESC		"Enable SCTP Auto-ASCONF"
+#define SCTPCTL_AUTOASCONF_MIN		0
+#define SCTPCTL_AUTOASCONF_MAX		1
+#define SCTPCTL_AUTOASCONF_DEFAULT	SCTP_DEFAULT_AUTO_ASCONF
+
+/* autoasconf: Enable SCTP Auto-ASCONF */
+#define SCTPCTL_MULTIPLEASCONFS_DESC	"Enable SCTP Muliple-ASCONFs"
+#define SCTPCTL_MULTIPLEASCONFS_MIN	0
+#define SCTPCTL_MULTIPLEASCONFS_MAX	1
+#define SCTPCTL_MULTIPLEASCONFS_DEFAULT	SCTP_DEFAULT_MULTIPLE_ASCONFS
+
+/* ecn_enable: Enable SCTP ECN */
+#define SCTPCTL_ECN_ENABLE_DESC		"Enable SCTP ECN"
+#define SCTPCTL_ECN_ENABLE_MIN		0
+#define SCTPCTL_ECN_ENABLE_MAX		1
+#define SCTPCTL_ECN_ENABLE_DEFAULT	1
+
+/* ecn_nonce: Enable SCTP ECN Nonce */
+#define SCTPCTL_ECN_NONCE_DESC		"Enable SCTP ECN Nonce"
+#define SCTPCTL_ECN_NONCE_MIN		0
+#define SCTPCTL_ECN_NONCE_MAX		1
+#define SCTPCTL_ECN_NONCE_DEFAULT	0
+
+/* strict_sacks: Enable SCTP Strict SACK checking */
+#define SCTPCTL_STRICT_SACKS_DESC	"Enable SCTP Strict SACK checking"
+#define SCTPCTL_STRICT_SACKS_MIN	0
+#define SCTPCTL_STRICT_SACKS_MAX	1
+#define SCTPCTL_STRICT_SACKS_DEFAULT	1
+
+/* loopback_nocsum: Enable NO Csum on packets sent on loopback */
+#define SCTPCTL_LOOPBACK_NOCSUM_DESC	"Enable NO Csum on packets sent on loopback"
+#define SCTPCTL_LOOPBACK_NOCSUM_MIN	0
+#define SCTPCTL_LOOPBACK_NOCSUM_MAX	1
+#define SCTPCTL_LOOPBACK_NOCSUM_DEFAULT	1
+
+/* strict_init: Enable strict INIT/INIT-ACK singleton enforcement */
+#define SCTPCTL_STRICT_INIT_DESC	"Enable strict INIT/INIT-ACK singleton enforcement"
+#define SCTPCTL_STRICT_INIT_MIN		0
+#define SCTPCTL_STRICT_INIT_MAX		1
+#define SCTPCTL_STRICT_INIT_DEFAULT	1
+
+/* peer_chkoh: Amount to debit peers rwnd per chunk sent */
+#define SCTPCTL_PEER_CHKOH_DESC		"Amount to debit peers rwnd per chunk sent"
+#define SCTPCTL_PEER_CHKOH_MIN		0
+#define SCTPCTL_PEER_CHKOH_MAX		0xFFFFFFFF
+#define SCTPCTL_PEER_CHKOH_DEFAULT	256
+
+/* maxburst: Default max burst for sctp endpoints */
+#define SCTPCTL_MAXBURST_DESC		"Default max burst for sctp endpoints"
+#define SCTPCTL_MAXBURST_MIN		1
+#define SCTPCTL_MAXBURST_MAX		0xFFFFFFFF
+#define SCTPCTL_MAXBURST_DEFAULT	SCTP_DEF_MAX_BURST
+
+/* maxchunks: Default max chunks on queue per asoc */
+#define SCTPCTL_MAXCHUNKS_DESC		"Default max chunks on queue per asoc"
+#define SCTPCTL_MAXCHUNKS_MIN		0
+#define SCTPCTL_MAXCHUNKS_MAX		0xFFFFFFFF
+#define SCTPCTL_MAXCHUNKS_DEFAULT	SCTP_ASOC_MAX_CHUNKS_ON_QUEUE
+
+/* tcbhashsize: Tuneable for Hash table sizes */
+#define SCTPCTL_TCBHASHSIZE_DESC	"Tunable for TCB hash table sizes"
+#define SCTPCTL_TCBHASHSIZE_MIN		1
+#define SCTPCTL_TCBHASHSIZE_MAX		0xFFFFFFFF
+#define SCTPCTL_TCBHASHSIZE_DEFAULT	SCTP_TCBHASHSIZE
+
+/* pcbhashsize: Tuneable for PCB Hash table sizes */
+#define SCTPCTL_PCBHASHSIZE_DESC	"Tunable for PCB hash table sizes"
+#define SCTPCTL_PCBHASHSIZE_MIN		1
+#define SCTPCTL_PCBHASHSIZE_MAX		0xFFFFFFFF
+#define SCTPCTL_PCBHASHSIZE_DEFAULT	SCTP_PCBHASHSIZE
+
+/* min_split_point: Minimum size when splitting a chunk */
+#define SCTPCTL_MIN_SPLIT_POINT_DESC	"Minimum size when splitting a chunk"
+#define SCTPCTL_MIN_SPLIT_POINT_MIN	0
+#define SCTPCTL_MIN_SPLIT_POINT_MAX	0xFFFFFFFF
+#define SCTPCTL_MIN_SPLIT_POINT_DEFAULT	SCTP_DEFAULT_SPLIT_POINT_MIN
+
+/* chunkscale: Tuneable for Scaling of number of chunks and messages */
+#define SCTPCTL_CHUNKSCALE_DESC		"Tuneable for Scaling of number of chunks and messages"
+#define SCTPCTL_CHUNKSCALE_MIN		1
+#define SCTPCTL_CHUNKSCALE_MAX		0xFFFFFFFF
+#define SCTPCTL_CHUNKSCALE_DEFAULT	SCTP_CHUNKQUEUE_SCALE
+
+/* delayed_sack_time: Default delayed SACK timer in msec */
+#define SCTPCTL_DELAYED_SACK_TIME_DESC	"Default delayed SACK timer in msec"
+#define SCTPCTL_DELAYED_SACK_TIME_MIN	0
+#define SCTPCTL_DELAYED_SACK_TIME_MAX	0xFFFFFFFF
+#define SCTPCTL_DELAYED_SACK_TIME_DEFAULT	SCTP_RECV_MSEC
+
+/* sack_freq: Default SACK frequency */
+#define SCTPCTL_SACK_FREQ_DESC		"Default SACK frequency"
+#define SCTPCTL_SACK_FREQ_MIN		0
+#define SCTPCTL_SACK_FREQ_MAX		0xFFFFFFFF
+#define SCTPCTL_SACK_FREQ_DEFAULT	SCTP_DEFAULT_SACK_FREQ
+
+/* sys_resource: Max number of cached resources in the system */
+#define SCTPCTL_SYS_RESOURCE_DESC	"Max number of cached resources in the system"
+#define SCTPCTL_SYS_RESOURCE_MIN	0
+#define SCTPCTL_SYS_RESOURCE_MAX	0xFFFFFFFF
+#define SCTPCTL_SYS_RESOURCE_DEFAULT	SCTP_DEF_SYSTEM_RESC_LIMIT
+
+/* asoc_resource: Max number of cached resources in an asoc */
+#define SCTPCTL_ASOC_RESOURCE_DESC	"Max number of cached resources in an asoc"
+#define SCTPCTL_ASOC_RESOURCE_MIN	0
+#define SCTPCTL_ASOC_RESOURCE_MAX	0xFFFFFFFF
+#define SCTPCTL_ASOC_RESOURCE_DEFAULT	SCTP_DEF_ASOC_RESC_LIMIT
+
+/* heartbeat_interval: Default heartbeat interval in msec */
+#define SCTPCTL_HEARTBEAT_INTERVAL_DESC	"Default heartbeat interval in msec"
+#define SCTPCTL_HEARTBEAT_INTERVAL_MIN	0
+#define SCTPCTL_HEARTBEAT_INTERVAL_MAX	0xFFFFFFFF
+#define SCTPCTL_HEARTBEAT_INTERVAL_DEFAULT	SCTP_HB_DEFAULT_MSEC
+
+/* pmtu_raise_time: Default PMTU raise timer in sec */
+#define SCTPCTL_PMTU_RAISE_TIME_DESC	"Default PMTU raise timer in sec"
+#define SCTPCTL_PMTU_RAISE_TIME_MIN	0
+#define SCTPCTL_PMTU_RAISE_TIME_MAX	0xFFFFFFFF
+#define SCTPCTL_PMTU_RAISE_TIME_DEFAULT	SCTP_DEF_PMTU_RAISE_SEC
+
+/* shutdown_guard_time: Default shutdown guard timer in sec */
+#define SCTPCTL_SHUTDOWN_GUARD_TIME_DESC	"Default shutdown guard timer in sec"
+#define SCTPCTL_SHUTDOWN_GUARD_TIME_MIN		0
+#define SCTPCTL_SHUTDOWN_GUARD_TIME_MAX		0xFFFFFFFF
+#define SCTPCTL_SHUTDOWN_GUARD_TIME_DEFAULT	SCTP_DEF_MAX_SHUTDOWN_SEC
+
+/* secret_lifetime: Default secret lifetime in sec */
+#define SCTPCTL_SECRET_LIFETIME_DESC	"Default secret lifetime in sec"
+#define SCTPCTL_SECRET_LIFETIME_MIN	0
+#define SCTPCTL_SECRET_LIFETIME_MAX	0xFFFFFFFF
+#define SCTPCTL_SECRET_LIFETIME_DEFAULT	SCTP_DEFAULT_SECRET_LIFE_SEC
+
+/* rto_max: Default maximum retransmission timeout in msec */
+#define SCTPCTL_RTO_MAX_DESC		"Default maximum retransmission timeout in msec"
+#define SCTPCTL_RTO_MAX_MIN		0
+#define SCTPCTL_RTO_MAX_MAX		0xFFFFFFFF
+#define SCTPCTL_RTO_MAX_DEFAULT		SCTP_RTO_UPPER_BOUND
+
+/* rto_min: Default minimum retransmission timeout in msec */
+#define SCTPCTL_RTO_MIN_DESC		"Default minimum retransmission timeout in msec"
+#define SCTPCTL_RTO_MIN_MIN		0
+#define SCTPCTL_RTO_MIN_MAX		0xFFFFFFFF
+#define SCTPCTL_RTO_MIN_DEFAULT		SCTP_RTO_LOWER_BOUND
+
+/* rto_initial: Default initial retransmission timeout in msec */
+#define SCTPCTL_RTO_INITIAL_DESC	"Default initial retransmission timeout in msec"
+#define SCTPCTL_RTO_INITIAL_MIN		0
+#define SCTPCTL_RTO_INITIAL_MAX		0xFFFFFFFF
+#define SCTPCTL_RTO_INITIAL_DEFAULT	SCTP_RTO_INITIAL
+
+/* init_rto_max: Default maximum retransmission timeout during association setup in msec */
+#define SCTPCTL_INIT_RTO_MAX_DESC	"Default maximum retransmission timeout during association setup in msec"
+#define SCTPCTL_INIT_RTO_MAX_MIN	0
+#define SCTPCTL_INIT_RTO_MAX_MAX	0xFFFFFFFF
+#define SCTPCTL_INIT_RTO_MAX_DEFAULT	SCTP_RTO_UPPER_BOUND
+
+/* valid_cookie_life: Default cookie lifetime in sec */
+#define SCTPCTL_VALID_COOKIE_LIFE_DESC	"Default cookie lifetime in sec"
+#define SCTPCTL_VALID_COOKIE_LIFE_MIN	0
+#define SCTPCTL_VALID_COOKIE_LIFE_MAX	0xFFFFFFFF
+#define SCTPCTL_VALID_COOKIE_LIFE_DEFAULT	SCTP_DEFAULT_COOKIE_LIFE
+
+/* init_rtx_max: Default maximum number of retransmission for INIT chunks */
+#define SCTPCTL_INIT_RTX_MAX_DESC	"Default maximum number of retransmission for INIT chunks"
+#define SCTPCTL_INIT_RTX_MAX_MIN	0
+#define SCTPCTL_INIT_RTX_MAX_MAX	0xFFFFFFFF
+#define SCTPCTL_INIT_RTX_MAX_DEFAULT	SCTP_DEF_MAX_INIT
+
+/* assoc_rtx_max: Default maximum number of retransmissions per association */
+#define SCTPCTL_ASSOC_RTX_MAX_DESC	"Default maximum number of retransmissions per association"
+#define SCTPCTL_ASSOC_RTX_MAX_MIN	0
+#define SCTPCTL_ASSOC_RTX_MAX_MAX	0xFFFFFFFF
+#define SCTPCTL_ASSOC_RTX_MAX_DEFAULT	SCTP_DEF_MAX_SEND
+
+/* path_rtx_max: Default maximum of retransmissions per path */
+#define SCTPCTL_PATH_RTX_MAX_DESC	"Default maximum of retransmissions per path"
+#define SCTPCTL_PATH_RTX_MAX_MIN	0
+#define SCTPCTL_PATH_RTX_MAX_MAX	0xFFFFFFFF
+#define SCTPCTL_PATH_RTX_MAX_DEFAULT	SCTP_DEF_MAX_PATH_RTX
+
+/* add_more_on_output: When space wise is it worthwhile to try to add more to a socket send buffer */
+#define SCTPCTL_ADD_MORE_ON_OUTPUT_DESC	"When space wise is it worthwhile to try to add more to a socket send buffer"
+#define SCTPCTL_ADD_MORE_ON_OUTPUT_MIN	0
+#define SCTPCTL_ADD_MORE_ON_OUTPUT_MAX	0xFFFFFFFF
+#define SCTPCTL_ADD_MORE_ON_OUTPUT_DEFAULT SCTP_DEFAULT_ADD_MORE
+
+/* outgoing_streams: Default number of outgoing streams */
+#define SCTPCTL_OUTGOING_STREAMS_DESC	"Default number of outgoing streams"
+#define SCTPCTL_OUTGOING_STREAMS_MIN	1
+#define SCTPCTL_OUTGOING_STREAMS_MAX	65535
+#define SCTPCTL_OUTGOING_STREAMS_DEFAULT SCTP_OSTREAM_INITIAL
+
+/* cmt_on_off: CMT on/off flag */
+#define SCTPCTL_CMT_ON_OFF_DESC		"CMT on/off flag"
+#define SCTPCTL_CMT_ON_OFF_MIN		0
+#define SCTPCTL_CMT_ON_OFF_MAX		1
+#define SCTPCTL_CMT_ON_OFF_DEFAULT	0
+
+/* EY - nr_sack_on_off: NR_SACK on/off flag */
+#define SCTPCTL_NR_SACK_ON_OFF_DESC	"NR_SACK on/off flag"
+#define SCTPCTL_NR_SACK_ON_OFF_MIN	0
+#define SCTPCTL_NR_SACK_ON_OFF_MAX	1
+#define SCTPCTL_NR_SACK_ON_OFF_DEFAULT	0
+
+/* cmt_use_dac: CMT DAC on/off flag */
+#define SCTPCTL_CMT_USE_DAC_DESC	"CMT DAC on/off flag"
+#define SCTPCTL_CMT_USE_DAC_MIN		0
+#define SCTPCTL_CMT_USE_DAC_MAX		1
+#define SCTPCTL_CMT_USE_DAC_DEFAULT    	0
+
+/* JRS 5/2107 - CMT PF type flag */
+#define SCTPCTL_CMT_PF_DESC		"CMT PF type flag"
+#define SCTPCTL_CMT_PF_MIN		0
+#define SCTPCTL_CMT_PF_MAX		2
+#define SCTPCTL_CMT_PF_DEFAULT		0
+
+/* cwnd_maxburst: Use a CWND adjusting maxburst */
+#define SCTPCTL_CWND_MAXBURST_DESC	"Use a CWND adjusting maxburst"
+#define SCTPCTL_CWND_MAXBURST_MIN	0
+#define SCTPCTL_CWND_MAXBURST_MAX	1
+#define SCTPCTL_CWND_MAXBURST_DEFAULT	1
+
+/* early_fast_retran: Early Fast Retransmit with timer */
+#define SCTPCTL_EARLY_FAST_RETRAN_DESC	"Early Fast Retransmit with timer"
+#define SCTPCTL_EARLY_FAST_RETRAN_MIN	0
+#define SCTPCTL_EARLY_FAST_RETRAN_MAX	0xFFFFFFFF
+#define SCTPCTL_EARLY_FAST_RETRAN_DEFAULT	0
+
+/* early_fast_retran_msec: Early Fast Retransmit minimum timer value */
+#define SCTPCTL_EARLY_FAST_RETRAN_MSEC_DESC	"Early Fast Retransmit minimum timer value"
+#define SCTPCTL_EARLY_FAST_RETRAN_MSEC_MIN	0
+#define SCTPCTL_EARLY_FAST_RETRAN_MSEC_MAX	0xFFFFFFFF
+#define SCTPCTL_EARLY_FAST_RETRAN_MSEC_DEFAULT	SCTP_MINFR_MSEC_TIMER
+
+/* asconf_auth_nochk: Disable SCTP ASCONF AUTH requirement */
+#define SCTPCTL_ASCONF_AUTH_NOCHK_DESC	"Disable SCTP ASCONF AUTH requirement"
+#define SCTPCTL_ASCONF_AUTH_NOCHK_MIN	0
+#define SCTPCTL_ASCONF_AUTH_NOCHK_MAX	1
+#define SCTPCTL_ASCONF_AUTH_NOCHK_DEFAULT	0
+
+/* auth_disable: Disable SCTP AUTH function */
+#define SCTPCTL_AUTH_DISABLE_DESC	"Disable SCTP AUTH function"
+#define SCTPCTL_AUTH_DISABLE_MIN	0
+#define SCTPCTL_AUTH_DISABLE_MAX	1
+#define SCTPCTL_AUTH_DISABLE_DEFAULT	0
+
+/* nat_friendly: SCTP NAT friendly operation */
+#define SCTPCTL_NAT_FRIENDLY_DESC	"SCTP NAT friendly operation"
+#define SCTPCTL_NAT_FRIENDLY_MIN	0
+#define SCTPCTL_NAT_FRIENDLY_MAX	1
+#define SCTPCTL_NAT_FRIENDLY_DEFAULT	1
+
+/* abc_l_var: SCTP ABC max increase per SACK (L) */
+#define SCTPCTL_ABC_L_VAR_DESC		"SCTP ABC max increase per SACK (L)"
+#define SCTPCTL_ABC_L_VAR_MIN		0
+#define SCTPCTL_ABC_L_VAR_MAX		0xFFFFFFFF
+#define SCTPCTL_ABC_L_VAR_DEFAULT	1
+
+/* max_chained_mbufs: Default max number of small mbufs on a chain */
+#define SCTPCTL_MAX_CHAINED_MBUFS_DESC	"Default max number of small mbufs on a chain"
+#define SCTPCTL_MAX_CHAINED_MBUFS_MIN	0
+#define SCTPCTL_MAX_CHAINED_MBUFS_MAX	0xFFFFFFFF
+#define SCTPCTL_MAX_CHAINED_MBUFS_DEFAULT	SCTP_DEFAULT_MBUFS_IN_CHAIN
+
+/* do_sctp_drain: Should SCTP respond to the drain calls */
+#define SCTPCTL_DO_SCTP_DRAIN_DESC	"Should SCTP respond to the drain calls"
+#define SCTPCTL_DO_SCTP_DRAIN_MIN	0
+#define SCTPCTL_DO_SCTP_DRAIN_MAX	1
+#define SCTPCTL_DO_SCTP_DRAIN_DEFAULT	1
+
+/* hb_max_burst: Confirmation Heartbeat max burst? */
+#define SCTPCTL_HB_MAX_BURST_DESC	"Confirmation Heartbeat max burst"
+#define SCTPCTL_HB_MAX_BURST_MIN	1
+#define SCTPCTL_HB_MAX_BURST_MAX	0xFFFFFFFF
+#define SCTPCTL_HB_MAX_BURST_DEFAULT	SCTP_DEF_MAX_BURST
+
+/* abort_at_limit: When one-2-one hits qlimit abort */
+#define SCTPCTL_ABORT_AT_LIMIT_DESC	"When one-2-one hits qlimit abort"
+#define SCTPCTL_ABORT_AT_LIMIT_MIN	0
+#define SCTPCTL_ABORT_AT_LIMIT_MAX	1
+#define SCTPCTL_ABORT_AT_LIMIT_DEFAULT	0
+
+/* strict_data_order: Enforce strict data ordering, abort if control inside data */
+#define SCTPCTL_STRICT_DATA_ORDER_DESC	"Enforce strict data ordering, abort if control inside data"
+#define SCTPCTL_STRICT_DATA_ORDER_MIN	0
+#define SCTPCTL_STRICT_DATA_ORDER_MAX	1
+#define SCTPCTL_STRICT_DATA_ORDER_DEFAULT	0
+
+/* min_residual: min residual in a data fragment leftover */
+#define SCTPCTL_MIN_RESIDUAL_DESC	"Minimum residual data chunk in second part of split"
+#define SCTPCTL_MIN_RESIDUAL_MIN	20
+#define SCTPCTL_MIN_RESIDUAL_MAX	65535
+#define SCTPCTL_MIN_RESIDUAL_DEFAULT	1452
+
+/* max_retran_chunk: max chunk retransmissions */
+#define SCTPCTL_MAX_RETRAN_CHUNK_DESC	"Maximum times an unlucky chunk can be retran'd before assoc abort"
+#define SCTPCTL_MAX_RETRAN_CHUNK_MIN	0
+#define SCTPCTL_MAX_RETRAN_CHUNK_MAX	65535
+#define SCTPCTL_MAX_RETRAN_CHUNK_DEFAULT	30
+
+/* sctp_logging: This gives us logging when the options are enabled */
+#define SCTPCTL_LOGGING_LEVEL_DESC	"Ltrace/KTR trace logging level"
+#define SCTPCTL_LOGGING_LEVEL_MIN	0
+#define SCTPCTL_LOGGING_LEVEL_MAX	0xffffffff
+#define SCTPCTL_LOGGING_LEVEL_DEFAULT	0
+
+/* JRS - default congestion control module sysctl */
+#define SCTPCTL_DEFAULT_CC_MODULE_DESC		"Default congestion control module"
+#define SCTPCTL_DEFAULT_CC_MODULE_MIN		0
+#define SCTPCTL_DEFAULT_CC_MODULE_MAX		2
+#define SCTPCTL_DEFAULT_CC_MODULE_DEFAULT	0
+
+/* RRS - default fragment interleave */
+#define SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DESC	"Default fragment interleave level"
+#define SCTPCTL_DEFAULT_FRAG_INTERLEAVE_MIN	0
+#define SCTPCTL_DEFAULT_FRAG_INTERLEAVE_MAX	2
+#define SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DEFAULT	1
+
+/* mobility_base: Enable SCTP mobility support */
+#define SCTPCTL_MOBILITY_BASE_DESC	"Enable SCTP base mobility"
+#define SCTPCTL_MOBILITY_BASE_MIN	0
+#define SCTPCTL_MOBILITY_BASE_MAX	1
+#define SCTPCTL_MOBILITY_BASE_DEFAULT	SCTP_DEFAULT_MOBILITY_BASE
+
+/* mobility_fasthandoff: Enable SCTP fast handoff support */
+#define SCTPCTL_MOBILITY_FASTHANDOFF_DESC	"Enable SCTP fast handoff"
+#define SCTPCTL_MOBILITY_FASTHANDOFF_MIN	0
+#define SCTPCTL_MOBILITY_FASTHANDOFF_MAX	1
+#define SCTPCTL_MOBILITY_FASTHANDOFF_DEFAULT	SCTP_DEFAULT_MOBILITY_FASTHANDOFF
+
+/* Enable SCTP/UDP tunneling for clients*/
+#define SCTPCTL_UDP_TUNNELING_FOR_CLIENT_ENABLE_DESC	"Enable SCTP/UDP tunneling for client"
+#define SCTPCTL_UDP_TUNNELING_FOR_CLIENT_ENABLE_MIN	0
+#define SCTPCTL_UDP_TUNNELING_FOR_CLIENT_ENABLE_MAX	1
+#define SCTPCTL_UDP_TUNNELING_FOR_CLIENT_ENABLE_DEFAULT	SCTPCTL_UDP_TUNNELING_FOR_CLIENT_ENABLE_MIN
+
+/* Enable SCTP/UDP tunneling port */
+#define SCTPCTL_UDP_TUNNELING_PORT_DESC		"Set the SCTP/UDP tunneling port"
+#define SCTPCTL_UDP_TUNNELING_PORT_MIN		0
+#define SCTPCTL_UDP_TUNNELING_PORT_MAX		65535
+#define SCTPCTL_UDP_TUNNELING_PORT_DEFAULT	SCTP_OVER_UDP_TUNNELING_PORT
+
+/* Enable sending of the SACK-IMMEDIATELY bit */
+#define SCTPCTL_SACK_IMMEDIATELY_ENABLE_DESC	"Enable sending of the SACK-IMMEDIATELY-bit."
+#define SCTPCTL_SACK_IMMEDIATELY_ENABLE_MIN	0
+#define SCTPCTL_SACK_IMMEDIATELY_ENABLE_MAX	1
+#define SCTPCTL_SACK_IMMEDIATELY_ENABLE_DEFAULT	SCTPCTL_SACK_IMMEDIATELY_ENABLE_MIN
+
+/* Enable sending of the NAT-FRIENDLY message */
+#define SCTPCTL_NAT_FRIENDLY_INITS_DESC	"Enable sending of the nat-friendly SCTP option on INITs."
+#define SCTPCTL_NAT_FRIENDLY_INITS_MIN	0
+#define SCTPCTL_NAT_FRIENDLY_INITS_MAX	1
+#define SCTPCTL_NAT_FRIENDLY_INITS_DEFAULT	SCTPCTL_NAT_FRIENDLY_INITS_MIN
+
+/* Vtag time wait in seconds */
+#define SCTPCTL_TIME_WAIT_DESC	"Vtag time wait time in seconds, 0 disables it."
+#define SCTPCTL_TIME_WAIT_MIN	0
+#define SCTPCTL_TIME_WAIT_MAX	0xffffffff
+#define SCTPCTL_TIME_WAIT_DEFAULT	SCTP_TIME_WAIT
+
+/* Enable Send/Receive buffer splitting */
+#define SCTPCTL_BUFFER_SPLITTING_DESC		"Enable send/receive buffer splitting."
+#define SCTPCTL_BUFFER_SPLITTING_MIN		0
+#define SCTPCTL_BUFFER_SPLITTING_MAX		0x3
+#define SCTPCTL_BUFFER_SPLITTING_DEFAULT	SCTPCTL_BUFFER_SPLITTING_MIN
+
+/* Initial congestion window in MTU */
+#define SCTPCTL_INITIAL_CWND_DESC	"Initial congestion window in MTUs"
+#define SCTPCTL_INITIAL_CWND_MIN	1
+#define SCTPCTL_INITIAL_CWND_MAX	0xffffffff
+#define SCTPCTL_INITIAL_CWND_DEFAULT	3
+
+#if defined(SCTP_DEBUG)
+/* debug: Configure debug output */
+#define SCTPCTL_DEBUG_DESC	"Configure debug output"
+#define SCTPCTL_DEBUG_MIN	0
+#define SCTPCTL_DEBUG_MAX	0xFFFFFFFF
+#define SCTPCTL_DEBUG_DEFAULT	0
+#endif
+
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#define SCTPCTL_OUTPUT_UNLOCKED_DESC	"Unlock socket when sending packets down to IP."
+#define SCTPCTL_OUTPUT_UNLOCKED_MIN	0
+#define SCTPCTL_OUTPUT_UNLOCKED_MAX	1
+#define SCTPCTL_OUTPUT_UNLOCKED_DEFAULT	SCTPCTL_OUTPUT_UNLOCKED_MIN
+#endif
+
+
+#if defined(_KERNEL) || defined(__Userspace__)
+#if defined(SYSCTL_DECL)
+SYSCTL_DECL(_net_inet_sctp);
+#endif
+
+void sctp_init_sysctls(void);
+
+#endif				/* _KERNEL */
+#endif				/* __sctp_sysctl_h__ */
diff --git a/freebsd/sys/netinet/sctp_timer.c b/freebsd/sys/netinet/sctp_timer.c
new file mode 100644
index 00000000..090689b1
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_timer.c
@@ -0,0 +1,1804 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_timer.c,v 1.29 2005/03/06 16:04:18 itojun Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#define _IP_VHL
+#include <freebsd/netinet/sctp_os.h>
+#include <freebsd/netinet/sctp_pcb.h>
+#ifdef INET6
+#endif
+#include <freebsd/netinet/sctp_var.h>
+#include <freebsd/netinet/sctp_sysctl.h>
+#include <freebsd/netinet/sctp_timer.h>
+#include <freebsd/netinet/sctputil.h>
+#include <freebsd/netinet/sctp_output.h>
+#include <freebsd/netinet/sctp_header.h>
+#include <freebsd/netinet/sctp_indata.h>
+#include <freebsd/netinet/sctp_asconf.h>
+#include <freebsd/netinet/sctp_input.h>
+#include <freebsd/netinet/sctp.h>
+#include <freebsd/netinet/sctp_uio.h>
+#include <freebsd/netinet/udp.h>
+
+
+void
+sctp_early_fr_timer(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	struct sctp_tmit_chunk *chk, *tp2;
+	struct timeval now, min_wait, tv;
+	unsigned int cur_rtt, cnt = 0, cnt_resend = 0;
+
+	/* an early FR is occuring. */
+	(void)SCTP_GETTIME_TIMEVAL(&now);
+	/* get cur rto in micro-seconds */
+	if (net->lastsa == 0) {
+		/* Hmm no rtt estimate yet? */
+		cur_rtt = stcb->asoc.initial_rto >> 2;
+	} else {
+
+		cur_rtt = ((net->lastsa >> 2) + net->lastsv) >> 1;
+	}
+	if (cur_rtt < SCTP_BASE_SYSCTL(sctp_early_fr_msec)) {
+		cur_rtt = SCTP_BASE_SYSCTL(sctp_early_fr_msec);
+	}
+	cur_rtt *= 1000;
+	tv.tv_sec = cur_rtt / 1000000;
+	tv.tv_usec = cur_rtt % 1000000;
+	min_wait = now;
+	timevalsub(&min_wait, &tv);
+	if (min_wait.tv_sec < 0 || min_wait.tv_usec < 0) {
+		/*
+		 * if we hit here, we don't have enough seconds on the clock
+		 * to account for the RTO. We just let the lower seconds be
+		 * the bounds and don't worry about it. This may mean we
+		 * will mark a lot more than we should.
+		 */
+		min_wait.tv_sec = min_wait.tv_usec = 0;
+	}
+	chk = TAILQ_LAST(&stcb->asoc.sent_queue, sctpchunk_listhead);
+	for (; chk != NULL; chk = tp2) {
+		tp2 = TAILQ_PREV(chk, sctpchunk_listhead, sctp_next);
+		if (chk->whoTo != net) {
+			continue;
+		}
+		if (chk->sent == SCTP_DATAGRAM_RESEND)
+			cnt_resend++;
+		else if ((chk->sent > SCTP_DATAGRAM_UNSENT) &&
+		    (chk->sent < SCTP_DATAGRAM_RESEND)) {
+			/* pending, may need retran */
+			if (chk->sent_rcv_time.tv_sec > min_wait.tv_sec) {
+				/*
+				 * we have reached a chunk that was sent
+				 * some seconds past our min.. forget it we
+				 * will find no more to send.
+				 */
+				continue;
+			} else if (chk->sent_rcv_time.tv_sec == min_wait.tv_sec) {
+				/*
+				 * we must look at the micro seconds to
+				 * know.
+				 */
+				if (chk->sent_rcv_time.tv_usec >= min_wait.tv_usec) {
+					/*
+					 * ok it was sent after our boundary
+					 * time.
+					 */
+					continue;
+				}
+			}
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_EARLYFR_LOGGING_ENABLE) {
+				sctp_log_fr(chk->rec.data.TSN_seq, chk->snd_count,
+				    4, SCTP_FR_MARKED_EARLY);
+			}
+			SCTP_STAT_INCR(sctps_earlyfrmrkretrans);
+			chk->sent = SCTP_DATAGRAM_RESEND;
+			sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+			/* double book size since we are doing an early FR */
+			chk->book_size_scale++;
+			cnt += chk->send_size;
+			if ((cnt + net->flight_size) > net->cwnd) {
+				/* Mark all we could possibly resend */
+				break;
+			}
+		}
+	}
+	if (cnt) {
+		/*
+		 * JRS - Use the congestion control given in the congestion
+		 * control module
+		 */
+		stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer(inp, stcb, net);
+	} else if (cnt_resend) {
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED);
+	}
+	/* Restart it? */
+	if (net->flight_size < net->cwnd) {
+		SCTP_STAT_INCR(sctps_earlyfrstrtmr);
+		sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
+	}
+}
+
+void
+sctp_audit_retranmission_queue(struct sctp_association *asoc)
+{
+	struct sctp_tmit_chunk *chk;
+
+	SCTPDBG(SCTP_DEBUG_TIMER4, "Audit invoked on send queue cnt:%d onqueue:%d\n",
+	    asoc->sent_queue_retran_cnt,
+	    asoc->sent_queue_cnt);
+	asoc->sent_queue_retran_cnt = 0;
+	asoc->sent_queue_cnt = 0;
+	TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
+		if (chk->sent == SCTP_DATAGRAM_RESEND) {
+			sctp_ucount_incr(asoc->sent_queue_retran_cnt);
+		}
+		asoc->sent_queue_cnt++;
+	}
+	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
+		if (chk->sent == SCTP_DATAGRAM_RESEND) {
+			sctp_ucount_incr(asoc->sent_queue_retran_cnt);
+		}
+	}
+	TAILQ_FOREACH(chk, &asoc->asconf_send_queue, sctp_next) {
+		if (chk->sent == SCTP_DATAGRAM_RESEND) {
+			sctp_ucount_incr(asoc->sent_queue_retran_cnt);
+		}
+	}
+	SCTPDBG(SCTP_DEBUG_TIMER4, "Audit completes retran:%d onqueue:%d\n",
+	    asoc->sent_queue_retran_cnt,
+	    asoc->sent_queue_cnt);
+}
+
+int
+sctp_threshold_management(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net, uint16_t threshold)
+{
+	if (net) {
+		net->error_count++;
+		SCTPDBG(SCTP_DEBUG_TIMER4, "Error count for %p now %d thresh:%d\n",
+		    net, net->error_count,
+		    net->failure_threshold);
+		if (net->error_count > net->failure_threshold) {
+			/* We had a threshold failure */
+			if (net->dest_state & SCTP_ADDR_REACHABLE) {
+				net->dest_state &= ~SCTP_ADDR_REACHABLE;
+				net->dest_state |= SCTP_ADDR_NOT_REACHABLE;
+				net->dest_state &= ~SCTP_ADDR_REQ_PRIMARY;
+				if (net == stcb->asoc.primary_destination) {
+					net->dest_state |= SCTP_ADDR_WAS_PRIMARY;
+				}
+				/*
+				 * JRS 5/14/07 - If a destination is
+				 * unreachable, the PF bit is turned off.
+				 * This allows an unambiguous use of the PF
+				 * bit for destinations that are reachable
+				 * but potentially failed. If the
+				 * destination is set to the unreachable
+				 * state, also set the destination to the PF
+				 * state.
+				 */
+				/*
+				 * Add debug message here if destination is
+				 * not in PF state.
+				 */
+				/* Stop any running T3 timers here? */
+				if ((stcb->asoc.sctp_cmt_on_off == 1) &&
+				    (stcb->asoc.sctp_cmt_pf > 0)) {
+					net->dest_state &= ~SCTP_ADDR_PF;
+					SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from PF to unreachable.\n",
+					    net);
+				}
+				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
+				    stcb,
+				    SCTP_FAILED_THRESHOLD,
+				    (void *)net, SCTP_SO_NOT_LOCKED);
+			}
+		}
+		/*********HOLD THIS COMMENT FOR PATCH OF ALTERNATE
+		 *********ROUTING CODE
+		 */
+		/*********HOLD THIS COMMENT FOR END OF PATCH OF ALTERNATE
+		 *********ROUTING CODE
+		 */
+	}
+	if (stcb == NULL)
+		return (0);
+
+	if (net) {
+		if ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0) {
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
+				sctp_misc_ints(SCTP_THRESHOLD_INCR,
+				    stcb->asoc.overall_error_count,
+				    (stcb->asoc.overall_error_count + 1),
+				    SCTP_FROM_SCTP_TIMER,
+				    __LINE__);
+			}
+			stcb->asoc.overall_error_count++;
+		}
+	} else {
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
+			sctp_misc_ints(SCTP_THRESHOLD_INCR,
+			    stcb->asoc.overall_error_count,
+			    (stcb->asoc.overall_error_count + 1),
+			    SCTP_FROM_SCTP_TIMER,
+			    __LINE__);
+		}
+		stcb->asoc.overall_error_count++;
+	}
+	SCTPDBG(SCTP_DEBUG_TIMER4, "Overall error count for %p now %d thresh:%u state:%x\n",
+	    &stcb->asoc, stcb->asoc.overall_error_count,
+	    (uint32_t) threshold,
+	    ((net == NULL) ? (uint32_t) 0 : (uint32_t) net->dest_state));
+	/*
+	 * We specifically do not do >= to give the assoc one more change
+	 * before we fail it.
+	 */
+	if (stcb->asoc.overall_error_count > threshold) {
+		/* Abort notification sends a ULP notify */
+		struct mbuf *oper;
+
+		oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+		    0, M_DONTWAIT, 1, MT_DATA);
+		if (oper) {
+			struct sctp_paramhdr *ph;
+			uint32_t *ippp;
+
+			SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+			    sizeof(uint32_t);
+			ph = mtod(oper, struct sctp_paramhdr *);
+			ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+			ph->param_length = htons(SCTP_BUF_LEN(oper));
+			ippp = (uint32_t *) (ph + 1);
+			*ippp = htonl(SCTP_FROM_SCTP_TIMER + SCTP_LOC_1);
+		}
+		inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_1;
+		sctp_abort_an_association(inp, stcb, SCTP_FAILED_THRESHOLD, oper, SCTP_SO_NOT_LOCKED);
+		return (1);
+	}
+	return (0);
+}
+
+/*
+ * sctp_find_alternate_net() returns a non-NULL pointer as long
+ * the argument net is non-NULL.
+ */
+struct sctp_nets *
+sctp_find_alternate_net(struct sctp_tcb *stcb,
+    struct sctp_nets *net,
+    int mode)
+{
+	/* Find and return an alternate network if possible */
+	struct sctp_nets *alt, *mnet, *min_errors_net = NULL, *max_cwnd_net = NULL;
+	int once;
+
+	/* JRS 5/14/07 - Initialize min_errors to an impossible value. */
+	int min_errors = -1;
+	uint32_t max_cwnd = 0;
+
+	if (stcb->asoc.numnets == 1) {
+		/* No others but net */
+		return (TAILQ_FIRST(&stcb->asoc.nets));
+	}
+	/*
+	 * JRS 5/14/07 - If mode is set to 2, use the CMT PF find alternate
+	 * net algorithm. This algorithm chooses the active destination (not
+	 * in PF state) with the largest cwnd value. If all destinations are
+	 * in PF state, unreachable, or unconfirmed, choose the desination
+	 * that is in PF state with the lowest error count. In case of a
+	 * tie, choose the destination that was most recently active.
+	 */
+	if (mode == 2) {
+		TAILQ_FOREACH(mnet, &stcb->asoc.nets, sctp_next) {
+			/*
+			 * JRS 5/14/07 - If the destination is unreachable
+			 * or unconfirmed, skip it.
+			 */
+			if (((mnet->dest_state & SCTP_ADDR_REACHABLE) != SCTP_ADDR_REACHABLE) ||
+			    (mnet->dest_state & SCTP_ADDR_UNCONFIRMED)) {
+				continue;
+			}
+			/*
+			 * JRS 5/14/07 -  If the destination is reachable
+			 * but in PF state, compare the error count of the
+			 * destination to the minimum error count seen thus
+			 * far. Store the destination with the lower error
+			 * count.  If the error counts are equal, store the
+			 * destination that was most recently active.
+			 */
+			if (mnet->dest_state & SCTP_ADDR_PF) {
+				/*
+				 * JRS 5/14/07 - If the destination under
+				 * consideration is the current destination,
+				 * work as if the error count is one higher.
+				 * The actual error count will not be
+				 * incremented until later in the t3
+				 * handler.
+				 */
+				if (mnet == net) {
+					if (min_errors == -1) {
+						min_errors = mnet->error_count + 1;
+						min_errors_net = mnet;
+					} else if (mnet->error_count + 1 < min_errors) {
+						min_errors = mnet->error_count + 1;
+						min_errors_net = mnet;
+					} else if (mnet->error_count + 1 == min_errors
+					    && mnet->last_active > min_errors_net->last_active) {
+						min_errors_net = mnet;
+						min_errors = mnet->error_count + 1;
+					}
+					continue;
+				} else {
+					if (min_errors == -1) {
+						min_errors = mnet->error_count;
+						min_errors_net = mnet;
+					} else if (mnet->error_count < min_errors) {
+						min_errors = mnet->error_count;
+						min_errors_net = mnet;
+					} else if (mnet->error_count == min_errors
+					    && mnet->last_active > min_errors_net->last_active) {
+						min_errors_net = mnet;
+						min_errors = mnet->error_count;
+					}
+					continue;
+				}
+			}
+			/*
+			 * JRS 5/14/07 - If the destination is reachable and
+			 * not in PF state, compare the cwnd of the
+			 * destination to the highest cwnd seen thus far.
+			 * Store the destination with the higher cwnd value.
+			 * If the cwnd values are equal, randomly choose one
+			 * of the two destinations.
+			 */
+			if (max_cwnd < mnet->cwnd) {
+				max_cwnd_net = mnet;
+				max_cwnd = mnet->cwnd;
+			} else if (max_cwnd == mnet->cwnd) {
+				uint32_t rndval;
+				uint8_t this_random;
+
+				if (stcb->asoc.hb_random_idx > 3) {
+					rndval = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
+					memcpy(stcb->asoc.hb_random_values, &rndval, sizeof(stcb->asoc.hb_random_values));
+					this_random = stcb->asoc.hb_random_values[0];
+					stcb->asoc.hb_random_idx++;
+					stcb->asoc.hb_ect_randombit = 0;
+				} else {
+					this_random = stcb->asoc.hb_random_values[stcb->asoc.hb_random_idx];
+					stcb->asoc.hb_random_idx++;
+					stcb->asoc.hb_ect_randombit = 0;
+				}
+				if (this_random % 2 == 1) {
+					max_cwnd_net = mnet;
+					max_cwnd = mnet->cwnd;	/* Useless? */
+				}
+			}
+		}
+		/*
+		 * JRS 5/14/07 - After all destination have been considered
+		 * as alternates, check to see if there was some active
+		 * destination (not in PF state).  If not, check to see if
+		 * there was some PF destination with the minimum number of
+		 * errors.  If not, return the original destination.  If
+		 * there is a min_errors_net, remove the PF flag from that
+		 * destination, set the cwnd to one or two MTUs, and return
+		 * the destination as an alt. If there was some active
+		 * destination with a highest cwnd, return the destination
+		 * as an alt.
+		 */
+		if (max_cwnd_net == NULL) {
+			if (min_errors_net == NULL) {
+				return (net);
+			}
+			min_errors_net->dest_state &= ~SCTP_ADDR_PF;
+			min_errors_net->cwnd = min_errors_net->mtu * stcb->asoc.sctp_cmt_pf;
+			if (SCTP_OS_TIMER_PENDING(&min_errors_net->rxt_timer.timer)) {
+				sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+				    stcb, min_errors_net,
+				    SCTP_FROM_SCTP_TIMER + SCTP_LOC_2);
+			}
+			SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from PF to active with %d errors.\n",
+			    min_errors_net, min_errors_net->error_count);
+			return (min_errors_net);
+		} else {
+			return (max_cwnd_net);
+		}
+	}
+	/*
+	 * JRS 5/14/07 - If mode is set to 1, use the CMT policy for
+	 * choosing an alternate net.
+	 */ 
+	else if (mode == 1) {
+		TAILQ_FOREACH(mnet, &stcb->asoc.nets, sctp_next) {
+			if (((mnet->dest_state & SCTP_ADDR_REACHABLE) != SCTP_ADDR_REACHABLE) ||
+			    (mnet->dest_state & SCTP_ADDR_UNCONFIRMED)) {
+				/*
+				 * will skip ones that are not-reachable or
+				 * unconfirmed
+				 */
+				continue;
+			}
+			if (max_cwnd < mnet->cwnd) {
+				max_cwnd_net = mnet;
+				max_cwnd = mnet->cwnd;
+			} else if (max_cwnd == mnet->cwnd) {
+				uint32_t rndval;
+				uint8_t this_random;
+
+				if (stcb->asoc.hb_random_idx > 3) {
+					rndval = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
+					memcpy(stcb->asoc.hb_random_values, &rndval,
+					    sizeof(stcb->asoc.hb_random_values));
+					this_random = stcb->asoc.hb_random_values[0];
+					stcb->asoc.hb_random_idx = 0;
+					stcb->asoc.hb_ect_randombit = 0;
+				} else {
+					this_random = stcb->asoc.hb_random_values[stcb->asoc.hb_random_idx];
+					stcb->asoc.hb_random_idx++;
+					stcb->asoc.hb_ect_randombit = 0;
+				}
+				if (this_random % 2) {
+					max_cwnd_net = mnet;
+					max_cwnd = mnet->cwnd;
+				}
+			}
+		}
+		if (max_cwnd_net) {
+			return (max_cwnd_net);
+		}
+	}
+	mnet = net;
+	once = 0;
+
+	if (mnet == NULL) {
+		mnet = TAILQ_FIRST(&stcb->asoc.nets);
+		if (mnet == NULL) {
+			return (NULL);
+		}
+	}
+	do {
+		alt = TAILQ_NEXT(mnet, sctp_next);
+		if (alt == NULL) {
+			once++;
+			if (once > 1) {
+				break;
+			}
+			alt = TAILQ_FIRST(&stcb->asoc.nets);
+			if (alt == NULL) {
+				return (NULL);
+			}
+		}
+		if (alt->ro.ro_rt == NULL) {
+			if (alt->ro._s_addr) {
+				sctp_free_ifa(alt->ro._s_addr);
+				alt->ro._s_addr = NULL;
+			}
+			alt->src_addr_selected = 0;
+		}
+		/* sa_ignore NO_NULL_CHK */
+		if (((alt->dest_state & SCTP_ADDR_REACHABLE) == SCTP_ADDR_REACHABLE) &&
+		    (alt->ro.ro_rt != NULL) &&
+		    (!(alt->dest_state & SCTP_ADDR_UNCONFIRMED))) {
+			/* Found a reachable address */
+			break;
+		}
+		mnet = alt;
+	} while (alt != NULL);
+
+	if (alt == NULL) {
+		/* Case where NO insv network exists (dormant state) */
+		/* we rotate destinations */
+		once = 0;
+		mnet = net;
+		do {
+			if (mnet == NULL) {
+				return (TAILQ_FIRST(&stcb->asoc.nets));
+			}
+			alt = TAILQ_NEXT(mnet, sctp_next);
+			if (alt == NULL) {
+				once++;
+				if (once > 1) {
+					break;
+				}
+				alt = TAILQ_FIRST(&stcb->asoc.nets);
+			}
+			/* sa_ignore NO_NULL_CHK */
+			if ((!(alt->dest_state & SCTP_ADDR_UNCONFIRMED)) &&
+			    (alt != net)) {
+				/* Found an alternate address */
+				break;
+			}
+			mnet = alt;
+		} while (alt != NULL);
+	}
+	if (alt == NULL) {
+		return (net);
+	}
+	return (alt);
+}
+
+static void
+sctp_backoff_on_timeout(struct sctp_tcb *stcb,
+    struct sctp_nets *net,
+    int win_probe,
+    int num_marked, int num_abandoned)
+{
+	if (net->RTO == 0) {
+		net->RTO = stcb->asoc.minrto;
+	}
+	net->RTO <<= 1;
+	if (net->RTO > stcb->asoc.maxrto) {
+		net->RTO = stcb->asoc.maxrto;
+	}
+	if ((win_probe == 0) && (num_marked || num_abandoned)) {
+		/* We don't apply penalty to window probe scenarios */
+		/* JRS - Use the congestion control given in the CC module */
+		stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout(stcb, net);
+	}
+}
+
+#ifndef INVARIANTS
+static void
+sctp_recover_sent_list(struct sctp_tcb *stcb)
+{
+	struct sctp_tmit_chunk *chk, *tp2;
+	struct sctp_association *asoc;
+
+	asoc = &stcb->asoc;
+	chk = TAILQ_FIRST(&stcb->asoc.sent_queue);
+	for (; chk != NULL; chk = tp2) {
+		tp2 = TAILQ_NEXT(chk, sctp_next);
+		if ((compare_with_wrap(stcb->asoc.last_acked_seq,
+		    chk->rec.data.TSN_seq,
+		    MAX_TSN)) ||
+		    (stcb->asoc.last_acked_seq == chk->rec.data.TSN_seq)) {
+
+			SCTP_PRINTF("Found chk:%p tsn:%x <= last_acked_seq:%x\n",
+			    chk, chk->rec.data.TSN_seq, stcb->asoc.last_acked_seq);
+			TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next);
+			if (chk->pr_sctp_on) {
+				if (asoc->pr_sctp_cnt != 0)
+					asoc->pr_sctp_cnt--;
+			}
+			if (chk->data) {
+				/* sa_ignore NO_NULL_CHK */
+				sctp_free_bufspace(stcb, asoc, chk, 1);
+				sctp_m_freem(chk->data);
+				if (asoc->peer_supports_prsctp && PR_SCTP_BUF_ENABLED(chk->flags)) {
+					asoc->sent_queue_cnt_removeable--;
+				}
+			}
+			chk->data = NULL;
+			asoc->sent_queue_cnt--;
+			sctp_free_a_chunk(stcb, chk);
+		}
+	}
+	SCTP_PRINTF("after recover order is as follows\n");
+	chk = TAILQ_FIRST(&stcb->asoc.sent_queue);
+	for (; chk != NULL; chk = tp2) {
+		tp2 = TAILQ_NEXT(chk, sctp_next);
+		SCTP_PRINTF("chk:%p TSN:%x\n", chk, chk->rec.data.TSN_seq);
+	}
+}
+
+#endif
+
+static int
+sctp_mark_all_for_resend(struct sctp_tcb *stcb,
+    struct sctp_nets *net,
+    struct sctp_nets *alt,
+    int window_probe,
+    int *num_marked,
+    int *num_abandoned)
+{
+
+	/*
+	 * Mark all chunks (well not all) that were sent to *net for
+	 * retransmission. Move them to alt for there destination as well...
+	 * We only mark chunks that have been outstanding long enough to
+	 * have received feed-back.
+	 */
+	struct sctp_tmit_chunk *chk, *tp2;
+	struct sctp_nets *lnets;
+	struct timeval now, min_wait, tv;
+	int cur_rtt;
+	int cnt_abandoned;
+	int audit_tf, num_mk, fir;
+	unsigned int cnt_mk;
+	uint32_t orig_flight, orig_tf;
+	uint32_t tsnlast, tsnfirst;
+	int recovery_cnt = 0;
+
+
+	/* none in flight now */
+	audit_tf = 0;
+	fir = 0;
+	/*
+	 * figure out how long a data chunk must be pending before we can
+	 * mark it ..
+	 */
+	(void)SCTP_GETTIME_TIMEVAL(&now);
+	/* get cur rto in micro-seconds */
+	cur_rtt = (((net->lastsa >> 2) + net->lastsv) >> 1);
+	cur_rtt *= 1000;
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
+		sctp_log_fr(cur_rtt,
+		    stcb->asoc.peers_rwnd,
+		    window_probe,
+		    SCTP_FR_T3_MARK_TIME);
+		sctp_log_fr(net->flight_size,
+		    SCTP_OS_TIMER_PENDING(&net->fr_timer.timer),
+		    SCTP_OS_TIMER_ACTIVE(&net->fr_timer.timer),
+		    SCTP_FR_CWND_REPORT);
+		sctp_log_fr(net->flight_size, net->cwnd, stcb->asoc.total_flight, SCTP_FR_CWND_REPORT);
+	}
+	tv.tv_sec = cur_rtt / 1000000;
+	tv.tv_usec = cur_rtt % 1000000;
+	min_wait = now;
+	timevalsub(&min_wait, &tv);
+	if (min_wait.tv_sec < 0 || min_wait.tv_usec < 0) {
+		/*
+		 * if we hit here, we don't have enough seconds on the clock
+		 * to account for the RTO. We just let the lower seconds be
+		 * the bounds and don't worry about it. This may mean we
+		 * will mark a lot more than we should.
+		 */
+		min_wait.tv_sec = min_wait.tv_usec = 0;
+	}
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
+		sctp_log_fr(cur_rtt, now.tv_sec, now.tv_usec, SCTP_FR_T3_MARK_TIME);
+		sctp_log_fr(0, min_wait.tv_sec, min_wait.tv_usec, SCTP_FR_T3_MARK_TIME);
+	}
+	/*
+	 * Our rwnd will be incorrect here since we are not adding back the
+	 * cnt * mbuf but we will fix that down below.
+	 */
+	orig_flight = net->flight_size;
+	orig_tf = stcb->asoc.total_flight;
+
+	net->fast_retran_ip = 0;
+	/* Now on to each chunk */
+	cnt_abandoned = 0;
+	num_mk = cnt_mk = 0;
+	tsnfirst = tsnlast = 0;
+#ifndef INVARIANTS
+start_again:
+#endif
+	chk = TAILQ_FIRST(&stcb->asoc.sent_queue);
+	for (; chk != NULL; chk = tp2) {
+		tp2 = TAILQ_NEXT(chk, sctp_next);
+		if ((compare_with_wrap(stcb->asoc.last_acked_seq,
+		    chk->rec.data.TSN_seq,
+		    MAX_TSN)) ||
+		    (stcb->asoc.last_acked_seq == chk->rec.data.TSN_seq)) {
+			/* Strange case our list got out of order? */
+			SCTP_PRINTF("Our list is out of order? last_acked:%x chk:%x",
+			    (unsigned int)stcb->asoc.last_acked_seq, (unsigned int)chk->rec.data.TSN_seq);
+			recovery_cnt++;
+#ifdef INVARIANTS
+			panic("last acked >= chk on sent-Q");
+#else
+			SCTP_PRINTF("Recover attempts a restart cnt:%d\n", recovery_cnt);
+			sctp_recover_sent_list(stcb);
+			if (recovery_cnt < 10) {
+				goto start_again;
+			} else {
+				SCTP_PRINTF("Recovery fails %d times??\n", recovery_cnt);
+			}
+#endif
+		}
+		if ((chk->whoTo == net) && (chk->sent < SCTP_DATAGRAM_ACKED)) {
+			/*
+			 * found one to mark: If it is less than
+			 * DATAGRAM_ACKED it MUST not be a skipped or marked
+			 * TSN but instead one that is either already set
+			 * for retransmission OR one that needs
+			 * retransmission.
+			 */
+
+			/* validate its been outstanding long enough */
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
+				sctp_log_fr(chk->rec.data.TSN_seq,
+				    chk->sent_rcv_time.tv_sec,
+				    chk->sent_rcv_time.tv_usec,
+				    SCTP_FR_T3_MARK_TIME);
+			}
+			if ((chk->sent_rcv_time.tv_sec > min_wait.tv_sec) && (window_probe == 0)) {
+				/*
+				 * we have reached a chunk that was sent
+				 * some seconds past our min.. forget it we
+				 * will find no more to send.
+				 */
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
+					sctp_log_fr(0,
+					    chk->sent_rcv_time.tv_sec,
+					    chk->sent_rcv_time.tv_usec,
+					    SCTP_FR_T3_STOPPED);
+				}
+				continue;
+			} else if ((chk->sent_rcv_time.tv_sec == min_wait.tv_sec) &&
+			    (window_probe == 0)) {
+				/*
+				 * we must look at the micro seconds to
+				 * know.
+				 */
+				if (chk->sent_rcv_time.tv_usec >= min_wait.tv_usec) {
+					/*
+					 * ok it was sent after our boundary
+					 * time.
+					 */
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
+						sctp_log_fr(0,
+						    chk->sent_rcv_time.tv_sec,
+						    chk->sent_rcv_time.tv_usec,
+						    SCTP_FR_T3_STOPPED);
+					}
+					continue;
+				}
+			}
+			if (stcb->asoc.peer_supports_prsctp && PR_SCTP_TTL_ENABLED(chk->flags)) {
+				/* Is it expired? */
+				if (timevalcmp(&now, &chk->rec.data.timetodrop, >)) {
+					/* Yes so drop it */
+					if (chk->data) {
+						(void)sctp_release_pr_sctp_chunk(stcb,
+						    chk,
+						    (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
+						    SCTP_SO_NOT_LOCKED);
+						cnt_abandoned++;
+					}
+					continue;
+				}
+			}
+			if (stcb->asoc.peer_supports_prsctp && PR_SCTP_RTX_ENABLED(chk->flags)) {
+				/* Has it been retransmitted tv_sec times? */
+				if (chk->snd_count > chk->rec.data.timetodrop.tv_sec) {
+					if (chk->data) {
+						(void)sctp_release_pr_sctp_chunk(stcb,
+						    chk,
+						    (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
+						    SCTP_SO_NOT_LOCKED);
+						cnt_abandoned++;
+					}
+					continue;
+				}
+			}
+			if (chk->sent < SCTP_DATAGRAM_RESEND) {
+				sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+				num_mk++;
+				if (fir == 0) {
+					fir = 1;
+					tsnfirst = chk->rec.data.TSN_seq;
+				}
+				tsnlast = chk->rec.data.TSN_seq;
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
+					sctp_log_fr(chk->rec.data.TSN_seq, chk->snd_count,
+					    0, SCTP_FR_T3_MARKED);
+				}
+				if (chk->rec.data.chunk_was_revoked) {
+					/* deflate the cwnd */
+					chk->whoTo->cwnd -= chk->book_size;
+					chk->rec.data.chunk_was_revoked = 0;
+				}
+				net->marked_retrans++;
+				stcb->asoc.marked_retrans++;
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
+					sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_RSND_TO,
+					    chk->whoTo->flight_size,
+					    chk->book_size,
+					    (uintptr_t) chk->whoTo,
+					    chk->rec.data.TSN_seq);
+				}
+				sctp_flight_size_decrease(chk);
+				sctp_total_flight_decrease(stcb, chk);
+				stcb->asoc.peers_rwnd += chk->send_size;
+				stcb->asoc.peers_rwnd += SCTP_BASE_SYSCTL(sctp_peer_chunk_oh);
+			}
+			chk->sent = SCTP_DATAGRAM_RESEND;
+			SCTP_STAT_INCR(sctps_markedretrans);
+
+			/* reset the TSN for striking and other FR stuff */
+			chk->rec.data.doing_fast_retransmit = 0;
+			/* Clear any time so NO RTT is being done */
+			chk->do_rtt = 0;
+			if (alt != net) {
+				sctp_free_remote_addr(chk->whoTo);
+				chk->no_fr_allowed = 1;
+				chk->whoTo = alt;
+				atomic_add_int(&alt->ref_count, 1);
+			} else {
+				chk->no_fr_allowed = 0;
+				if (TAILQ_EMPTY(&stcb->asoc.send_queue)) {
+					chk->rec.data.fast_retran_tsn = stcb->asoc.sending_seq;
+				} else {
+					chk->rec.data.fast_retran_tsn = (TAILQ_FIRST(&stcb->asoc.send_queue))->rec.data.TSN_seq;
+				}
+			}
+			/*
+			 * CMT: Do not allow FRs on retransmitted TSNs.
+			 */
+			if (stcb->asoc.sctp_cmt_on_off == 1) {
+				chk->no_fr_allowed = 1;
+			}
+#ifdef THIS_SHOULD_NOT_BE_DONE
+		} else if (chk->sent == SCTP_DATAGRAM_ACKED) {
+			/* remember highest acked one */
+			could_be_sent = chk;
+#endif
+		}
+		if (chk->sent == SCTP_DATAGRAM_RESEND) {
+			cnt_mk++;
+		}
+	}
+	if ((orig_flight - net->flight_size) != (orig_tf - stcb->asoc.total_flight)) {
+		/* we did not subtract the same things? */
+		audit_tf = 1;
+	}
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
+		sctp_log_fr(tsnfirst, tsnlast, num_mk, SCTP_FR_T3_TIMEOUT);
+	}
+#ifdef SCTP_DEBUG
+	if (num_mk) {
+		SCTPDBG(SCTP_DEBUG_TIMER1, "LAST TSN marked was %x\n",
+		    tsnlast);
+		SCTPDBG(SCTP_DEBUG_TIMER1, "Num marked for retransmission was %d peer-rwd:%ld\n",
+		    num_mk, (u_long)stcb->asoc.peers_rwnd);
+		SCTPDBG(SCTP_DEBUG_TIMER1, "LAST TSN marked was %x\n",
+		    tsnlast);
+		SCTPDBG(SCTP_DEBUG_TIMER1, "Num marked for retransmission was %d peer-rwd:%d\n",
+		    num_mk,
+		    (int)stcb->asoc.peers_rwnd);
+	}
+#endif
+	*num_marked = num_mk;
+	*num_abandoned = cnt_abandoned;
+	/*
+	 * Now check for a ECN Echo that may be stranded And include the
+	 * cnt_mk'd to have all resends in the control queue.
+	 */
+	TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
+		if (chk->sent == SCTP_DATAGRAM_RESEND) {
+			cnt_mk++;
+		}
+		if ((chk->whoTo == net) &&
+		    (chk->rec.chunk_id.id == SCTP_ECN_ECHO)) {
+			sctp_free_remote_addr(chk->whoTo);
+			chk->whoTo = alt;
+			if (chk->sent != SCTP_DATAGRAM_RESEND) {
+				chk->sent = SCTP_DATAGRAM_RESEND;
+				sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+				cnt_mk++;
+			}
+			atomic_add_int(&alt->ref_count, 1);
+		}
+	}
+#ifdef THIS_SHOULD_NOT_BE_DONE
+	if ((stcb->asoc.sent_queue_retran_cnt == 0) && (could_be_sent)) {
+		/* fix it so we retransmit the highest acked anyway */
+		sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+		cnt_mk++;
+		could_be_sent->sent = SCTP_DATAGRAM_RESEND;
+	}
+#endif
+	if (stcb->asoc.sent_queue_retran_cnt != cnt_mk) {
+#ifdef INVARIANTS
+		SCTP_PRINTF("Local Audit says there are %d for retran asoc cnt:%d we marked:%d this time\n",
+		    cnt_mk, stcb->asoc.sent_queue_retran_cnt, num_mk);
+#endif
+#ifndef SCTP_AUDITING_ENABLED
+		stcb->asoc.sent_queue_retran_cnt = cnt_mk;
+#endif
+	}
+	if (audit_tf) {
+		SCTPDBG(SCTP_DEBUG_TIMER4,
+		    "Audit total flight due to negative value net:%p\n",
+		    net);
+		stcb->asoc.total_flight = 0;
+		stcb->asoc.total_flight_count = 0;
+		/* Clear all networks flight size */
+		TAILQ_FOREACH(lnets, &stcb->asoc.nets, sctp_next) {
+			lnets->flight_size = 0;
+			SCTPDBG(SCTP_DEBUG_TIMER4,
+			    "Net:%p c-f cwnd:%d ssthresh:%d\n",
+			    lnets, lnets->cwnd, lnets->ssthresh);
+		}
+		TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
+			if (chk->sent < SCTP_DATAGRAM_RESEND) {
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
+					sctp_misc_ints(SCTP_FLIGHT_LOG_UP,
+					    chk->whoTo->flight_size,
+					    chk->book_size,
+					    (uintptr_t) chk->whoTo,
+					    chk->rec.data.TSN_seq);
+				}
+				sctp_flight_size_increase(chk);
+				sctp_total_flight_increase(stcb, chk);
+			}
+		}
+	}
+	/*
+	 * Setup the ecn nonce re-sync point. We do this since
+	 * retranmissions are NOT setup for ECN. This means that do to
+	 * Karn's rule, we don't know the total of the peers ecn bits.
+	 */
+	chk = TAILQ_FIRST(&stcb->asoc.send_queue);
+	if (chk == NULL) {
+		stcb->asoc.nonce_resync_tsn = stcb->asoc.sending_seq;
+	} else {
+		stcb->asoc.nonce_resync_tsn = chk->rec.data.TSN_seq;
+	}
+	stcb->asoc.nonce_wait_for_ecne = 0;
+	stcb->asoc.nonce_sum_check = 0;
+	/* We return 1 if we only have a window probe outstanding */
+	return (0);
+}
+
+
+int
+sctp_t3rxt_timer(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	struct sctp_nets *alt;
+	int win_probe, num_mk, num_abandoned;
+
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) {
+		sctp_log_fr(0, 0, 0, SCTP_FR_T3_TIMEOUT);
+	}
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
+		struct sctp_nets *lnet;
+
+		TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
+			if (net == lnet) {
+				sctp_log_cwnd(stcb, lnet, 1, SCTP_CWND_LOG_FROM_T3);
+			} else {
+				sctp_log_cwnd(stcb, lnet, 0, SCTP_CWND_LOG_FROM_T3);
+			}
+		}
+	}
+	/* Find an alternate and mark those for retransmission */
+	if ((stcb->asoc.peers_rwnd == 0) &&
+	    (stcb->asoc.total_flight < net->mtu)) {
+		SCTP_STAT_INCR(sctps_timowindowprobe);
+		win_probe = 1;
+	} else {
+		win_probe = 0;
+	}
+
+	/*
+	 * JRS 5/14/07 - If CMT PF is on and the destination if not already
+	 * in PF state, set the destination to PF state and store the
+	 * current time as the time that the destination was last active. In
+	 * addition, find an alternate destination with PF-based
+	 * find_alt_net().
+	 */
+	if ((stcb->asoc.sctp_cmt_on_off == 1) &&
+	    (stcb->asoc.sctp_cmt_pf > 0)) {
+		if ((net->dest_state & SCTP_ADDR_PF) != SCTP_ADDR_PF) {
+			net->dest_state |= SCTP_ADDR_PF;
+			net->last_active = sctp_get_tick_count();
+			SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from active to PF.\n",
+			    net);
+		}
+		alt = sctp_find_alternate_net(stcb, net, 2);
+	} else if (stcb->asoc.sctp_cmt_on_off == 1) {
+		/*
+		 * CMT: Using RTX_SSTHRESH policy for CMT. If CMT is being
+		 * used, then pick dest with largest ssthresh for any
+		 * retransmission.
+		 */
+		alt = sctp_find_alternate_net(stcb, net, 1);
+		/*
+		 * CUCv2: If a different dest is picked for the
+		 * retransmission, then new (rtx-)pseudo_cumack needs to be
+		 * tracked for orig dest. Let CUCv2 track new (rtx-)
+		 * pseudo-cumack always.
+		 */
+		net->find_pseudo_cumack = 1;
+		net->find_rtx_pseudo_cumack = 1;
+	} else {		/* CMT is OFF */
+		alt = sctp_find_alternate_net(stcb, net, 0);
+	}
+	num_mk = 0;
+	num_abandoned = 0;
+	(void)sctp_mark_all_for_resend(stcb, net, alt, win_probe,
+	    &num_mk, &num_abandoned);
+	/* FR Loss recovery just ended with the T3. */
+	stcb->asoc.fast_retran_loss_recovery = 0;
+
+	/* CMT FR loss recovery ended with the T3 */
+	net->fast_retran_loss_recovery = 0;
+
+	/*
+	 * setup the sat loss recovery that prevents satellite cwnd advance.
+	 */
+	stcb->asoc.sat_t3_loss_recovery = 1;
+	stcb->asoc.sat_t3_recovery_tsn = stcb->asoc.sending_seq;
+
+	/* Backoff the timer and cwnd */
+	sctp_backoff_on_timeout(stcb, net, win_probe, num_mk, num_abandoned);
+	if (win_probe == 0) {
+		/* We don't do normal threshold management on window probes */
+		if (sctp_threshold_management(inp, stcb, net,
+		    stcb->asoc.max_send_times)) {
+			/* Association was destroyed */
+			return (1);
+		} else {
+			if (net != stcb->asoc.primary_destination) {
+				/* send a immediate HB if our RTO is stale */
+				struct timeval now;
+				unsigned int ms_goneby;
+
+				(void)SCTP_GETTIME_TIMEVAL(&now);
+				if (net->last_sent_time.tv_sec) {
+					ms_goneby = (now.tv_sec - net->last_sent_time.tv_sec) * 1000;
+				} else {
+					ms_goneby = 0;
+				}
+				if ((ms_goneby > net->RTO) || (net->RTO == 0)) {
+					/*
+					 * no recent feed back in an RTO or
+					 * more, request a RTT update
+					 */
+					if (sctp_send_hb(stcb, 1, net) < 0)
+						/*
+						 * Less than 0 means we lost
+						 * the assoc
+						 */
+						return (1);
+				}
+			}
+		}
+	} else {
+		/*
+		 * For a window probe we don't penalize the net's but only
+		 * the association. This may fail it if SACKs are not coming
+		 * back. If sack's are coming with rwnd locked at 0, we will
+		 * continue to hold things waiting for rwnd to raise
+		 */
+		if (sctp_threshold_management(inp, stcb, NULL,
+		    stcb->asoc.max_send_times)) {
+			/* Association was destroyed */
+			return (1);
+		}
+	}
+	if (net->dest_state & SCTP_ADDR_NOT_REACHABLE) {
+		/* Move all pending over too */
+		sctp_move_chunks_from_net(stcb, net);
+
+		/*
+		 * Get the address that failed, to force a new src address
+		 * selecton and a route allocation.
+		 */
+		if (net->ro._s_addr) {
+			sctp_free_ifa(net->ro._s_addr);
+			net->ro._s_addr = NULL;
+		}
+		net->src_addr_selected = 0;
+
+		/* Force a route allocation too */
+		if (net->ro.ro_rt) {
+			RTFREE(net->ro.ro_rt);
+			net->ro.ro_rt = NULL;
+		}
+		/* Was it our primary? */
+		if ((stcb->asoc.primary_destination == net) && (alt != net)) {
+			/*
+			 * Yes, note it as such and find an alternate note:
+			 * this means HB code must use this to resent the
+			 * primary if it goes active AND if someone does a
+			 * change-primary then this flag must be cleared
+			 * from any net structures.
+			 */
+			if (sctp_set_primary_addr(stcb,
+			    (struct sockaddr *)NULL,
+			    alt) == 0) {
+				net->dest_state |= SCTP_ADDR_WAS_PRIMARY;
+			}
+		}
+	} else if ((stcb->asoc.sctp_cmt_on_off == 1) &&
+		    (stcb->asoc.sctp_cmt_pf > 0) &&
+	    ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) {
+		/*
+		 * JRS 5/14/07 - If the destination hasn't failed completely
+		 * but is in PF state, a PF-heartbeat needs to be sent
+		 * manually.
+		 */
+		if (sctp_send_hb(stcb, 1, net) < 0)
+			/* Return less than 0 means we lost the association */
+			return (1);
+	}
+	/*
+	 * Special case for cookie-echo'ed case, we don't do output but must
+	 * await the COOKIE-ACK before retransmission
+	 */
+	if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED) {
+		/*
+		 * Here we just reset the timer and start again since we
+		 * have not established the asoc
+		 */
+		sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
+		return (0);
+	}
+	if (stcb->asoc.peer_supports_prsctp) {
+		struct sctp_tmit_chunk *lchk;
+
+		lchk = sctp_try_advance_peer_ack_point(stcb, &stcb->asoc);
+		/* C3. See if we need to send a Fwd-TSN */
+		if (compare_with_wrap(stcb->asoc.advanced_peer_ack_point,
+		    stcb->asoc.last_acked_seq, MAX_TSN)) {
+			/*
+			 * ISSUE with ECN, see FWD-TSN processing for notes
+			 * on issues that will occur when the ECN NONCE
+			 * stuff is put into SCTP for cross checking.
+			 */
+			send_forward_tsn(stcb, &stcb->asoc);
+			if (lchk) {
+				/* Assure a timer is up */
+				sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, lchk->whoTo);
+			}
+		}
+	}
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+		sctp_log_cwnd(stcb, net, net->cwnd, SCTP_CWND_LOG_FROM_RTX);
+	}
+	return (0);
+}
+
+int
+sctp_t1init_timer(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	/* bump the thresholds */
+	if (stcb->asoc.delayed_connection) {
+		/*
+		 * special hook for delayed connection. The library did NOT
+		 * complete the rest of its sends.
+		 */
+		stcb->asoc.delayed_connection = 0;
+		sctp_send_initiate(inp, stcb, SCTP_SO_NOT_LOCKED);
+		return (0);
+	}
+	if (SCTP_GET_STATE((&stcb->asoc)) != SCTP_STATE_COOKIE_WAIT) {
+		return (0);
+	}
+	if (sctp_threshold_management(inp, stcb, net,
+	    stcb->asoc.max_init_times)) {
+		/* Association was destroyed */
+		return (1);
+	}
+	stcb->asoc.dropped_special_cnt = 0;
+	sctp_backoff_on_timeout(stcb, stcb->asoc.primary_destination, 1, 0, 0);
+	if (stcb->asoc.initial_init_rto_max < net->RTO) {
+		net->RTO = stcb->asoc.initial_init_rto_max;
+	}
+	if (stcb->asoc.numnets > 1) {
+		/* If we have more than one addr use it */
+		struct sctp_nets *alt;
+
+		alt = sctp_find_alternate_net(stcb, stcb->asoc.primary_destination, 0);
+		if (alt != stcb->asoc.primary_destination) {
+			sctp_move_chunks_from_net(stcb, stcb->asoc.primary_destination);
+			stcb->asoc.primary_destination = alt;
+		}
+	}
+	/* Send out a new init */
+	sctp_send_initiate(inp, stcb, SCTP_SO_NOT_LOCKED);
+	return (0);
+}
+
+/*
+ * For cookie and asconf we actually need to find and mark for resend, then
+ * increment the resend counter (after all the threshold management stuff of
+ * course).
+ */
+int
+sctp_cookie_timer(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	struct sctp_nets *alt;
+	struct sctp_tmit_chunk *cookie;
+
+	/* first before all else we must find the cookie */
+	TAILQ_FOREACH(cookie, &stcb->asoc.control_send_queue, sctp_next) {
+		if (cookie->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
+			break;
+		}
+	}
+	if (cookie == NULL) {
+		if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED) {
+			/* FOOBAR! */
+			struct mbuf *oper;
+
+			oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+			    0, M_DONTWAIT, 1, MT_DATA);
+			if (oper) {
+				struct sctp_paramhdr *ph;
+				uint32_t *ippp;
+
+				SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+				    sizeof(uint32_t);
+				ph = mtod(oper, struct sctp_paramhdr *);
+				ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+				ph->param_length = htons(SCTP_BUF_LEN(oper));
+				ippp = (uint32_t *) (ph + 1);
+				*ippp = htonl(SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
+			}
+			inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_4;
+			sctp_abort_an_association(inp, stcb, SCTP_INTERNAL_ERROR,
+			    oper, SCTP_SO_NOT_LOCKED);
+		} else {
+#ifdef INVARIANTS
+			panic("Cookie timer expires in wrong state?");
+#else
+			SCTP_PRINTF("Strange in state %d not cookie-echoed yet c-e timer expires?\n", SCTP_GET_STATE(&stcb->asoc));
+			return (0);
+#endif
+		}
+		return (0);
+	}
+	/* Ok we found the cookie, threshold management next */
+	if (sctp_threshold_management(inp, stcb, cookie->whoTo,
+	    stcb->asoc.max_init_times)) {
+		/* Assoc is over */
+		return (1);
+	}
+	/*
+	 * cleared theshold management now lets backoff the address & select
+	 * an alternate
+	 */
+	stcb->asoc.dropped_special_cnt = 0;
+	sctp_backoff_on_timeout(stcb, cookie->whoTo, 1, 0, 0);
+	alt = sctp_find_alternate_net(stcb, cookie->whoTo, 0);
+	if (alt != cookie->whoTo) {
+		sctp_free_remote_addr(cookie->whoTo);
+		cookie->whoTo = alt;
+		atomic_add_int(&alt->ref_count, 1);
+	}
+	/* Now mark the retran info */
+	if (cookie->sent != SCTP_DATAGRAM_RESEND) {
+		sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+	}
+	cookie->sent = SCTP_DATAGRAM_RESEND;
+	/*
+	 * Now call the output routine to kick out the cookie again, Note we
+	 * don't mark any chunks for retran so that FR will need to kick in
+	 * to move these (or a send timer).
+	 */
+	return (0);
+}
+
+int
+sctp_strreset_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	struct sctp_nets *alt;
+	struct sctp_tmit_chunk *strrst = NULL, *chk = NULL;
+
+	if (stcb->asoc.stream_reset_outstanding == 0) {
+		return (0);
+	}
+	/* find the existing STRRESET, we use the seq number we sent out on */
+	(void)sctp_find_stream_reset(stcb, stcb->asoc.str_reset_seq_out, &strrst);
+	if (strrst == NULL) {
+		return (0);
+	}
+	/* do threshold management */
+	if (sctp_threshold_management(inp, stcb, strrst->whoTo,
+	    stcb->asoc.max_send_times)) {
+		/* Assoc is over */
+		return (1);
+	}
+	/*
+	 * cleared theshold management now lets backoff the address & select
+	 * an alternate
+	 */
+	sctp_backoff_on_timeout(stcb, strrst->whoTo, 1, 0, 0);
+	alt = sctp_find_alternate_net(stcb, strrst->whoTo, 0);
+	sctp_free_remote_addr(strrst->whoTo);
+	strrst->whoTo = alt;
+	atomic_add_int(&alt->ref_count, 1);
+
+	/* See if a ECN Echo is also stranded */
+	TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
+		if ((chk->whoTo == net) &&
+		    (chk->rec.chunk_id.id == SCTP_ECN_ECHO)) {
+			sctp_free_remote_addr(chk->whoTo);
+			if (chk->sent != SCTP_DATAGRAM_RESEND) {
+				chk->sent = SCTP_DATAGRAM_RESEND;
+				sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+			}
+			chk->whoTo = alt;
+			atomic_add_int(&alt->ref_count, 1);
+		}
+	}
+	if (net->dest_state & SCTP_ADDR_NOT_REACHABLE) {
+		/*
+		 * If the address went un-reachable, we need to move to
+		 * alternates for ALL chk's in queue
+		 */
+		sctp_move_chunks_from_net(stcb, net);
+	}
+	/* mark the retran info */
+	if (strrst->sent != SCTP_DATAGRAM_RESEND)
+		sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+	strrst->sent = SCTP_DATAGRAM_RESEND;
+
+	/* restart the timer */
+	sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, inp, stcb, strrst->whoTo);
+	return (0);
+}
+
+int
+sctp_asconf_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	struct sctp_nets *alt;
+	struct sctp_tmit_chunk *asconf, *chk, *nchk;
+
+	/* is this a first send, or a retransmission? */
+	if (TAILQ_EMPTY(&stcb->asoc.asconf_send_queue)) {
+		/* compose a new ASCONF chunk and send it */
+		sctp_send_asconf(stcb, net, SCTP_ADDR_NOT_LOCKED);
+	} else {
+		/*
+		 * Retransmission of the existing ASCONF is needed
+		 */
+
+		/* find the existing ASCONF */
+		asconf = TAILQ_FIRST(&stcb->asoc.asconf_send_queue);
+		if (asconf == NULL) {
+			return (0);
+		}
+		/* do threshold management */
+		if (sctp_threshold_management(inp, stcb, asconf->whoTo,
+		    stcb->asoc.max_send_times)) {
+			/* Assoc is over */
+			return (1);
+		}
+		if (asconf->snd_count > stcb->asoc.max_send_times) {
+			/*
+			 * Something is rotten: our peer is not responding
+			 * to ASCONFs but apparently is to other chunks.
+			 * i.e. it is not properly handling the chunk type
+			 * upper bits. Mark this peer as ASCONF incapable
+			 * and cleanup.
+			 */
+			SCTPDBG(SCTP_DEBUG_TIMER1, "asconf_timer: Peer has not responded to our repeated ASCONFs\n");
+			sctp_asconf_cleanup(stcb, net);
+			return (0);
+		}
+		/*
+		 * cleared threshold management, so now backoff the net and
+		 * select an alternate
+		 */
+		sctp_backoff_on_timeout(stcb, asconf->whoTo, 1, 0, 0);
+		alt = sctp_find_alternate_net(stcb, asconf->whoTo, 0);
+		if (asconf->whoTo != alt) {
+			sctp_free_remote_addr(asconf->whoTo);
+			asconf->whoTo = alt;
+			atomic_add_int(&alt->ref_count, 1);
+		}
+		/* See if an ECN Echo is also stranded */
+		TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
+			if ((chk->whoTo == net) &&
+			    (chk->rec.chunk_id.id == SCTP_ECN_ECHO)) {
+				sctp_free_remote_addr(chk->whoTo);
+				chk->whoTo = alt;
+				if (chk->sent != SCTP_DATAGRAM_RESEND) {
+					chk->sent = SCTP_DATAGRAM_RESEND;
+					sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+				}
+				atomic_add_int(&alt->ref_count, 1);
+			}
+		}
+		for (chk = asconf; chk; chk = nchk) {
+			nchk = TAILQ_NEXT(chk, sctp_next);
+			if (chk->whoTo != alt) {
+				sctp_free_remote_addr(chk->whoTo);
+				chk->whoTo = alt;
+				atomic_add_int(&alt->ref_count, 1);
+			}
+			if (asconf->sent != SCTP_DATAGRAM_RESEND && chk->sent != SCTP_DATAGRAM_UNSENT)
+				sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+			chk->sent = SCTP_DATAGRAM_RESEND;
+		}
+		if (net->dest_state & SCTP_ADDR_NOT_REACHABLE) {
+			/*
+			 * If the address went un-reachable, we need to move
+			 * to the alternate for ALL chunks in queue
+			 */
+			sctp_move_chunks_from_net(stcb, net);
+		}
+		/* mark the retran info */
+		if (asconf->sent != SCTP_DATAGRAM_RESEND)
+			sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+		asconf->sent = SCTP_DATAGRAM_RESEND;
+
+		/* send another ASCONF if any and we can do */
+		sctp_send_asconf(stcb, alt, SCTP_ADDR_NOT_LOCKED);
+	}
+	return (0);
+}
+
+/* Mobility adaptation */
+void
+sctp_delete_prim_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	if (stcb->asoc.deleted_primary == NULL) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "delete_prim_timer: deleted_primary is not stored...\n");
+		sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
+		return;
+	}
+	SCTPDBG(SCTP_DEBUG_ASCONF1, "delete_prim_timer: finished to keep deleted primary ");
+	SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, &stcb->asoc.deleted_primary->ro._l_addr.sa);
+	sctp_free_remote_addr(stcb->asoc.deleted_primary);
+	stcb->asoc.deleted_primary = NULL;
+	sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
+	return;
+}
+
+/*
+ * For the shutdown and shutdown-ack, we do not keep one around on the
+ * control queue. This means we must generate a new one and call the general
+ * chunk output routine, AFTER having done threshold management.
+ * It is assumed that net is non-NULL.
+ */
+int
+sctp_shutdown_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	struct sctp_nets *alt;
+
+	/* first threshold managment */
+	if (sctp_threshold_management(inp, stcb, net, stcb->asoc.max_send_times)) {
+		/* Assoc is over */
+		return (1);
+	}
+	sctp_backoff_on_timeout(stcb, net, 1, 0, 0);
+	/* second select an alternative */
+	alt = sctp_find_alternate_net(stcb, net, 0);
+
+	/* third generate a shutdown into the queue for out net */
+	sctp_send_shutdown(stcb, alt);
+
+	/* fourth restart timer */
+	sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, inp, stcb, alt);
+	return (0);
+}
+
+int
+sctp_shutdownack_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	struct sctp_nets *alt;
+
+	/* first threshold managment */
+	if (sctp_threshold_management(inp, stcb, net, stcb->asoc.max_send_times)) {
+		/* Assoc is over */
+		return (1);
+	}
+	sctp_backoff_on_timeout(stcb, net, 1, 0, 0);
+	/* second select an alternative */
+	alt = sctp_find_alternate_net(stcb, net, 0);
+
+	/* third generate a shutdown into the queue for out net */
+	sctp_send_shutdown_ack(stcb, alt);
+
+	/* fourth restart timer */
+	sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK, inp, stcb, alt);
+	return (0);
+}
+
+static void
+sctp_audit_stream_queues_for_size(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb)
+{
+	struct sctp_stream_out *outs;
+	struct sctp_stream_queue_pending *sp;
+	unsigned int chks_in_queue = 0;
+	int being_filled = 0;
+
+	/*
+	 * This function is ONLY called when the send/sent queues are empty.
+	 */
+	if ((stcb == NULL) || (inp == NULL))
+		return;
+
+	if (stcb->asoc.sent_queue_retran_cnt) {
+		SCTP_PRINTF("Hmm, sent_queue_retran_cnt is non-zero %d\n",
+		    stcb->asoc.sent_queue_retran_cnt);
+		stcb->asoc.sent_queue_retran_cnt = 0;
+	}
+	SCTP_TCB_SEND_LOCK(stcb);
+	if (TAILQ_EMPTY(&stcb->asoc.out_wheel)) {
+		int i, cnt = 0;
+
+		/* Check to see if a spoke fell off the wheel */
+		for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+			if (!TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) {
+				sctp_insert_on_wheel(stcb, &stcb->asoc, &stcb->asoc.strmout[i], 1);
+				cnt++;
+			}
+		}
+		if (cnt) {
+			/* yep, we lost a spoke or two */
+			SCTP_PRINTF("Found an additional %d streams NOT on outwheel, corrected\n", cnt);
+		} else {
+			/* no spokes lost, */
+			stcb->asoc.total_output_queue_size = 0;
+		}
+		SCTP_TCB_SEND_UNLOCK(stcb);
+		return;
+	}
+	SCTP_TCB_SEND_UNLOCK(stcb);
+	/* Check to see if some data queued, if so report it */
+	TAILQ_FOREACH(outs, &stcb->asoc.out_wheel, next_spoke) {
+		if (!TAILQ_EMPTY(&outs->outqueue)) {
+			TAILQ_FOREACH(sp, &outs->outqueue, next) {
+				if (sp->msg_is_complete)
+					being_filled++;
+				chks_in_queue++;
+			}
+		}
+	}
+	if (chks_in_queue != stcb->asoc.stream_queue_cnt) {
+		SCTP_PRINTF("Hmm, stream queue cnt at %d I counted %d in stream out wheel\n",
+		    stcb->asoc.stream_queue_cnt, chks_in_queue);
+	}
+	if (chks_in_queue) {
+		/* call the output queue function */
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
+		if ((TAILQ_EMPTY(&stcb->asoc.send_queue)) &&
+		    (TAILQ_EMPTY(&stcb->asoc.sent_queue))) {
+			/*
+			 * Probably should go in and make it go back through
+			 * and add fragments allowed
+			 */
+			if (being_filled == 0) {
+				SCTP_PRINTF("Still nothing moved %d chunks are stuck\n",
+				    chks_in_queue);
+			}
+		}
+	} else {
+		SCTP_PRINTF("Found no chunks on any queue tot:%lu\n",
+		    (u_long)stcb->asoc.total_output_queue_size);
+		stcb->asoc.total_output_queue_size = 0;
+	}
+}
+
+int
+sctp_heartbeat_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net, int cnt_of_unconf)
+{
+	int ret;
+
+	if (net) {
+		if (net->hb_responded == 0) {
+			if (net->ro._s_addr) {
+				/*
+				 * Invalidate the src address if we did not
+				 * get a response last time.
+				 */
+				sctp_free_ifa(net->ro._s_addr);
+				net->ro._s_addr = NULL;
+				net->src_addr_selected = 0;
+			}
+			sctp_backoff_on_timeout(stcb, net, 1, 0, 0);
+		}
+		/* Zero PBA, if it needs it */
+		if (net->partial_bytes_acked) {
+			net->partial_bytes_acked = 0;
+		}
+	}
+	if ((stcb->asoc.total_output_queue_size > 0) &&
+	    (TAILQ_EMPTY(&stcb->asoc.send_queue)) &&
+	    (TAILQ_EMPTY(&stcb->asoc.sent_queue))) {
+		sctp_audit_stream_queues_for_size(inp, stcb);
+	}
+	/* Send a new HB, this will do threshold managment, pick a new dest */
+	if (cnt_of_unconf == 0) {
+		if (sctp_send_hb(stcb, 0, NULL) < 0) {
+			return (1);
+		}
+	} else {
+		/*
+		 * this will send out extra hb's up to maxburst if there are
+		 * any unconfirmed addresses.
+		 */
+		uint32_t cnt_sent = 0;
+
+		TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+			if ((net->dest_state & SCTP_ADDR_UNCONFIRMED) &&
+			    (net->dest_state & SCTP_ADDR_REACHABLE)) {
+				cnt_sent++;
+				if (net->hb_responded == 0) {
+					/* Did we respond last time? */
+					if (net->ro._s_addr) {
+						sctp_free_ifa(net->ro._s_addr);
+						net->ro._s_addr = NULL;
+						net->src_addr_selected = 0;
+					}
+				}
+				ret = sctp_send_hb(stcb, 1, net);
+				if (ret < 0)
+					return 1;
+				else if (ret == 0) {
+					break;
+				}
+				if (cnt_sent >= SCTP_BASE_SYSCTL(sctp_hb_maxburst))
+					break;
+			}
+		}
+	}
+	return (0);
+}
+
+void
+sctp_pathmtu_timer(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	uint32_t next_mtu, mtu;
+
+	next_mtu = sctp_get_next_mtu(inp, net->mtu);
+
+	if ((next_mtu > net->mtu) && (net->port == 0)) {
+		if ((net->src_addr_selected == 0) ||
+		    (net->ro._s_addr == NULL) ||
+		    (net->ro._s_addr->localifa_flags & SCTP_BEING_DELETED)) {
+			if ((net->ro._s_addr != NULL) && (net->ro._s_addr->localifa_flags & SCTP_BEING_DELETED)) {
+				sctp_free_ifa(net->ro._s_addr);
+				net->ro._s_addr = NULL;
+				net->src_addr_selected = 0;
+			} else if (net->ro._s_addr == NULL) {
+#if defined(INET6) && defined(SCTP_EMBEDDED_V6_SCOPE)
+				if (net->ro._l_addr.sa.sa_family == AF_INET6) {
+					struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+
+					/* KAME hack: embed scopeid */
+					(void)sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone));
+				}
+#endif
+
+				net->ro._s_addr = sctp_source_address_selection(inp,
+				    stcb,
+				    (sctp_route_t *) & net->ro,
+				    net, 0, stcb->asoc.vrf_id);
+#if defined(INET6) && defined(SCTP_EMBEDDED_V6_SCOPE)
+				if (net->ro._l_addr.sa.sa_family == AF_INET6) {
+					struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+
+					(void)sa6_recoverscope(sin6);
+				}
+#endif				/* INET6 */
+			}
+			if (net->ro._s_addr)
+				net->src_addr_selected = 1;
+		}
+		if (net->ro._s_addr) {
+			mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._s_addr.sa, net->ro.ro_rt);
+			if (net->port) {
+				mtu -= sizeof(struct udphdr);
+			}
+			if (mtu > next_mtu) {
+				net->mtu = next_mtu;
+			}
+		}
+	}
+	/* restart the timer */
+	sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net);
+}
+
+void
+sctp_autoclose_timer(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	struct timeval tn, *tim_touse;
+	struct sctp_association *asoc;
+	int ticks_gone_by;
+
+	(void)SCTP_GETTIME_TIMEVAL(&tn);
+	if (stcb->asoc.sctp_autoclose_ticks &&
+	    sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) {
+		/* Auto close is on */
+		asoc = &stcb->asoc;
+		/* pick the time to use */
+		if (asoc->time_last_rcvd.tv_sec >
+		    asoc->time_last_sent.tv_sec) {
+			tim_touse = &asoc->time_last_rcvd;
+		} else {
+			tim_touse = &asoc->time_last_sent;
+		}
+		/* Now has long enough transpired to autoclose? */
+		ticks_gone_by = SEC_TO_TICKS(tn.tv_sec - tim_touse->tv_sec);
+		if ((ticks_gone_by > 0) &&
+		    (ticks_gone_by >= (int)asoc->sctp_autoclose_ticks)) {
+			/*
+			 * autoclose time has hit, call the output routine,
+			 * which should do nothing just to be SURE we don't
+			 * have hanging data. We can then safely check the
+			 * queues and know that we are clear to send
+			 * shutdown
+			 */
+			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_AUTOCLOSE_TMR, SCTP_SO_NOT_LOCKED);
+			/* Are we clean? */
+			if (TAILQ_EMPTY(&asoc->send_queue) &&
+			    TAILQ_EMPTY(&asoc->sent_queue)) {
+				/*
+				 * there is nothing queued to send, so I'm
+				 * done...
+				 */
+				if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) {
+					/* only send SHUTDOWN 1st time thru */
+					sctp_send_shutdown(stcb, stcb->asoc.primary_destination);
+					if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+					    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+					}
+					SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+					SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
+					    stcb->sctp_ep, stcb,
+					    asoc->primary_destination);
+					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+					    stcb->sctp_ep, stcb,
+					    asoc->primary_destination);
+				}
+			}
+		} else {
+			/*
+			 * No auto close at this time, reset t-o to check
+			 * later
+			 */
+			int tmp;
+
+			/* fool the timer startup to use the time left */
+			tmp = asoc->sctp_autoclose_ticks;
+			asoc->sctp_autoclose_ticks -= ticks_gone_by;
+			sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE, inp, stcb,
+			    net);
+			/* restore the real tick value */
+			asoc->sctp_autoclose_ticks = tmp;
+		}
+	}
+}
diff --git a/freebsd/sys/netinet/sctp_timer.h b/freebsd/sys/netinet/sctp_timer.h
new file mode 100644
index 00000000..34abbace
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_timer.h
@@ -0,0 +1,101 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_timer.h,v 1.6 2005/03/06 16:04:18 itojun Exp $	 */
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifndef __sctp_timer_h__
+#define __sctp_timer_h__
+
+#if defined(_KERNEL) || defined(__Userspace__)
+
+#define SCTP_RTT_SHIFT 3
+#define SCTP_RTT_VAR_SHIFT 2
+
+void
+sctp_early_fr_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net);
+
+struct sctp_nets *
+sctp_find_alternate_net(struct sctp_tcb *,
+    struct sctp_nets *, int mode);
+
+int
+sctp_threshold_management(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *, uint16_t);
+
+int
+sctp_t3rxt_timer(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *);
+int
+sctp_t1init_timer(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *);
+int
+sctp_shutdown_timer(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *);
+int
+sctp_heartbeat_timer(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *, int);
+
+int
+sctp_cookie_timer(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *);
+
+void
+sctp_pathmtu_timer(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *);
+
+int
+sctp_shutdownack_timer(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *);
+int
+sctp_strreset_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net);
+
+int
+sctp_asconf_timer(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *);
+
+void
+sctp_delete_prim_timer(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *);
+
+void
+sctp_autoclose_timer(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *net);
+
+void sctp_audit_retranmission_queue(struct sctp_association *);
+
+void sctp_iterator_timer(struct sctp_iterator *it);
+
+
+#endif
+#endif
diff --git a/freebsd/sys/netinet/sctp_uio.h b/freebsd/sys/netinet/sctp_uio.h
new file mode 100644
index 00000000..734447ed
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_uio.h
@@ -0,0 +1,1166 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_uio.h,v 1.11 2005/03/06 16:04:18 itojun Exp $	 */
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifndef __sctp_uio_h__
+#define __sctp_uio_h__
+
+
+#if ! defined(_KERNEL)
+#include <freebsd/stdint.h>
+#endif
+#include <freebsd/sys/types.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/netinet/in.h>
+
+typedef uint32_t sctp_assoc_t;
+
+/* Compatibility to previous define's */
+#define sctp_stream_reset_events sctp_stream_reset_event
+
+/* On/Off setup for subscription to events */
+struct sctp_event_subscribe {
+	uint8_t sctp_data_io_event;
+	uint8_t sctp_association_event;
+	uint8_t sctp_address_event;
+	uint8_t sctp_send_failure_event;
+	uint8_t sctp_peer_error_event;
+	uint8_t sctp_shutdown_event;
+	uint8_t sctp_partial_delivery_event;
+	uint8_t sctp_adaptation_layer_event;
+	uint8_t sctp_authentication_event;
+	uint8_t sctp_sender_dry_event;
+	uint8_t sctp_stream_reset_event;
+};
+
+/* ancillary data types */
+#define SCTP_INIT	0x0001
+#define SCTP_SNDRCV	0x0002
+#define SCTP_EXTRCV	0x0003
+/*
+ * ancillary data structures
+ */
+struct sctp_initmsg {
+	uint16_t sinit_num_ostreams;
+	uint16_t sinit_max_instreams;
+	uint16_t sinit_max_attempts;
+	uint16_t sinit_max_init_timeo;
+};
+
+/* We add 96 bytes to the size of sctp_sndrcvinfo.
+ * This makes the current structure 128 bytes long
+ * which is nicely 64 bit aligned but also has room
+ * for us to add more and keep ABI compatibility.
+ * For example, already we have the sctp_extrcvinfo
+ * when enabled which is 48 bytes.
+ */
+
+/*
+ * The assoc up needs a verfid
+ * all sendrcvinfo's need a verfid for SENDING only.
+ */
+
+
+#define SCTP_ALIGN_RESV_PAD 96
+#define SCTP_ALIGN_RESV_PAD_SHORT 80
+
+struct sctp_sndrcvinfo {
+	uint16_t sinfo_stream;
+	uint16_t sinfo_ssn;
+	uint16_t sinfo_flags;
+	uint32_t sinfo_ppid;
+	uint32_t sinfo_context;
+	uint32_t sinfo_timetolive;
+	uint32_t sinfo_tsn;
+	uint32_t sinfo_cumtsn;
+	sctp_assoc_t sinfo_assoc_id;
+	uint8_t __reserve_pad[SCTP_ALIGN_RESV_PAD];
+};
+
+struct sctp_extrcvinfo {
+	uint16_t sinfo_stream;
+	uint16_t sinfo_ssn;
+	uint16_t sinfo_flags;
+	uint16_t sinfo_pr_policy;
+	uint32_t sinfo_ppid;
+	uint32_t sinfo_context;
+	uint32_t sinfo_timetolive;
+	uint32_t sinfo_tsn;
+	uint32_t sinfo_cumtsn;
+	sctp_assoc_t sinfo_assoc_id;
+	uint16_t sreinfo_next_flags;
+	uint16_t sreinfo_next_stream;
+	uint32_t sreinfo_next_aid;
+	uint32_t sreinfo_next_length;
+	uint32_t sreinfo_next_ppid;
+	uint8_t __reserve_pad[SCTP_ALIGN_RESV_PAD_SHORT];
+};
+
+#define SCTP_NO_NEXT_MSG           0x0000
+#define SCTP_NEXT_MSG_AVAIL        0x0001
+#define SCTP_NEXT_MSG_ISCOMPLETE   0x0002
+#define SCTP_NEXT_MSG_IS_UNORDERED 0x0004
+#define SCTP_NEXT_MSG_IS_NOTIFICATION 0x0008
+
+struct sctp_snd_all_completes {
+	uint16_t sall_stream;
+	uint16_t sall_flags;
+	uint32_t sall_ppid;
+	uint32_t sall_context;
+	uint32_t sall_num_sent;
+	uint32_t sall_num_failed;
+};
+
+/* Flags that go into the sinfo->sinfo_flags field */
+#define SCTP_EOF              0x0100	/* Start shutdown procedures */
+#define SCTP_ABORT            0x0200	/* Send an ABORT to peer */
+#define SCTP_UNORDERED        0x0400	/* Message is un-ordered */
+#define SCTP_ADDR_OVER        0x0800	/* Override the primary-address */
+#define SCTP_SENDALL          0x1000	/* Send this on all associations */
+#define SCTP_EOR              0x2000	/* end of message signal */
+#define SCTP_SACK_IMMEDIATELY 0x4000	/* Set I-Bit */
+
+#define INVALID_SINFO_FLAG(x) (((x) & 0xffffff00 \
+                                    & ~(SCTP_EOF | SCTP_ABORT | SCTP_UNORDERED |\
+				        SCTP_ADDR_OVER | SCTP_SENDALL | SCTP_EOR |\
+					SCTP_SACK_IMMEDIATELY)) != 0)
+/* for the endpoint */
+
+/* The lower byte is an enumeration of PR-SCTP policies */
+#define SCTP_PR_SCTP_TTL  0x0001/* Time based PR-SCTP */
+#define SCTP_PR_SCTP_BUF  0x0002/* Buffer based PR-SCTP */
+#define SCTP_PR_SCTP_RTX  0x0003/* Number of retransmissions based PR-SCTP */
+
+#define PR_SCTP_POLICY(x)         ((x) & 0xff)
+#define PR_SCTP_ENABLED(x)        (PR_SCTP_POLICY(x) != 0)
+#define PR_SCTP_TTL_ENABLED(x)    (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_TTL)
+#define PR_SCTP_BUF_ENABLED(x)    (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_BUF)
+#define PR_SCTP_RTX_ENABLED(x)    (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_RTX)
+#define PR_SCTP_INVALID_POLICY(x) (PR_SCTP_POLICY(x) > SCTP_PR_SCTP_RTX)
+/* Stat's */
+struct sctp_pcbinfo {
+	uint32_t ep_count;
+	uint32_t asoc_count;
+	uint32_t laddr_count;
+	uint32_t raddr_count;
+	uint32_t chk_count;
+	uint32_t readq_count;
+	uint32_t free_chunks;
+	uint32_t stream_oque;
+};
+
+struct sctp_sockstat {
+	sctp_assoc_t ss_assoc_id;
+	uint32_t ss_total_sndbuf;
+	uint32_t ss_total_recv_buf;
+};
+
+/*
+ * notification event structures
+ */
+
+/*
+ * association change event
+ */
+struct sctp_assoc_change {
+	uint16_t sac_type;
+	uint16_t sac_flags;
+	uint32_t sac_length;
+	uint16_t sac_state;
+	uint16_t sac_error;
+	uint16_t sac_outbound_streams;
+	uint16_t sac_inbound_streams;
+	sctp_assoc_t sac_assoc_id;
+};
+
+/* sac_state values */
+#define SCTP_COMM_UP		0x0001
+#define SCTP_COMM_LOST		0x0002
+#define SCTP_RESTART		0x0003
+#define SCTP_SHUTDOWN_COMP	0x0004
+#define SCTP_CANT_STR_ASSOC	0x0005
+
+
+/*
+ * Address event
+ */
+struct sctp_paddr_change {
+	uint16_t spc_type;
+	uint16_t spc_flags;
+	uint32_t spc_length;
+	struct sockaddr_storage spc_aaddr;
+	uint32_t spc_state;
+	uint32_t spc_error;
+	sctp_assoc_t spc_assoc_id;
+	uint8_t spc_padding[4];
+};
+
+/* paddr state values */
+#define SCTP_ADDR_AVAILABLE	0x0001
+#define SCTP_ADDR_UNREACHABLE	0x0002
+#define SCTP_ADDR_REMOVED	0x0003
+#define SCTP_ADDR_ADDED		0x0004
+#define SCTP_ADDR_MADE_PRIM	0x0005
+#define SCTP_ADDR_CONFIRMED	0x0006
+
+/*
+ * CAUTION: these are user exposed SCTP addr reachability states must be
+ * compatible with SCTP_ADDR states in sctp_constants.h
+ */
+#ifdef SCTP_ACTIVE
+#undef SCTP_ACTIVE
+#endif
+#define SCTP_ACTIVE		0x0001	/* SCTP_ADDR_REACHABLE */
+
+#ifdef SCTP_INACTIVE
+#undef SCTP_INACTIVE
+#endif
+#define SCTP_INACTIVE		0x0002	/* SCTP_ADDR_NOT_REACHABLE */
+
+#ifdef SCTP_UNCONFIRMED
+#undef SCTP_UNCONFIRMED
+#endif
+#define SCTP_UNCONFIRMED	0x0200	/* SCTP_ADDR_UNCONFIRMED */
+
+#ifdef SCTP_NOHEARTBEAT
+#undef SCTP_NOHEARTBEAT
+#endif
+#define SCTP_NOHEARTBEAT	0x0040	/* SCTP_ADDR_NOHB */
+
+
+/* remote error events */
+struct sctp_remote_error {
+	uint16_t sre_type;
+	uint16_t sre_flags;
+	uint32_t sre_length;
+	uint16_t sre_error;
+	sctp_assoc_t sre_assoc_id;
+	uint8_t sre_data[4];
+};
+
+/* data send failure event */
+struct sctp_send_failed {
+	uint16_t ssf_type;
+	uint16_t ssf_flags;
+	uint32_t ssf_length;
+	uint32_t ssf_error;
+	struct sctp_sndrcvinfo ssf_info;
+	sctp_assoc_t ssf_assoc_id;
+	uint8_t ssf_data[];
+};
+
+/* flag that indicates state of data */
+#define SCTP_DATA_UNSENT	0x0001	/* inqueue never on wire */
+#define SCTP_DATA_SENT		0x0002	/* on wire at failure */
+
+/* shutdown event */
+struct sctp_shutdown_event {
+	uint16_t sse_type;
+	uint16_t sse_flags;
+	uint32_t sse_length;
+	sctp_assoc_t sse_assoc_id;
+};
+
+/* Adaptation layer indication stuff */
+struct sctp_adaptation_event {
+	uint16_t sai_type;
+	uint16_t sai_flags;
+	uint32_t sai_length;
+	uint32_t sai_adaptation_ind;
+	sctp_assoc_t sai_assoc_id;
+};
+
+struct sctp_setadaptation {
+	uint32_t ssb_adaptation_ind;
+};
+
+/* compatible old spelling */
+struct sctp_adaption_event {
+	uint16_t sai_type;
+	uint16_t sai_flags;
+	uint32_t sai_length;
+	uint32_t sai_adaption_ind;
+	sctp_assoc_t sai_assoc_id;
+};
+
+struct sctp_setadaption {
+	uint32_t ssb_adaption_ind;
+};
+
+
+/*
+ * Partial Delivery API event
+ */
+struct sctp_pdapi_event {
+	uint16_t pdapi_type;
+	uint16_t pdapi_flags;
+	uint32_t pdapi_length;
+	uint32_t pdapi_indication;
+	uint16_t pdapi_stream;
+	uint16_t pdapi_seq;
+	sctp_assoc_t pdapi_assoc_id;
+};
+
+/* indication values */
+#define SCTP_PARTIAL_DELIVERY_ABORTED	0x0001
+
+
+/*
+ * authentication key event
+ */
+struct sctp_authkey_event {
+	uint16_t auth_type;
+	uint16_t auth_flags;
+	uint32_t auth_length;
+	uint16_t auth_keynumber;
+	uint16_t auth_altkeynumber;
+	uint32_t auth_indication;
+	sctp_assoc_t auth_assoc_id;
+};
+
+/* indication values */
+#define SCTP_AUTH_NEWKEY	0x0001
+#define SCTP_AUTH_NO_AUTH	0x0002
+#define SCTP_AUTH_FREE_KEY	0x0003
+
+
+struct sctp_sender_dry_event {
+	uint16_t sender_dry_type;
+	uint16_t sender_dry_flags;
+	uint32_t sender_dry_length;
+	sctp_assoc_t sender_dry_assoc_id;
+};
+
+
+/*
+ * stream reset event
+ */
+struct sctp_stream_reset_event {
+	uint16_t strreset_type;
+	uint16_t strreset_flags;
+	uint32_t strreset_length;
+	sctp_assoc_t strreset_assoc_id;
+	uint16_t strreset_list[];
+};
+
+/* flags in strreset_flags field */
+#define SCTP_STRRESET_INBOUND_STR  0x0001
+#define SCTP_STRRESET_OUTBOUND_STR 0x0002
+#define SCTP_STRRESET_ALL_STREAMS  0x0004
+#define SCTP_STRRESET_STREAM_LIST  0x0008
+#define SCTP_STRRESET_FAILED       0x0010
+#define SCTP_STRRESET_ADD_STREAM   0x0020
+
+/* SCTP notification event */
+struct sctp_tlv {
+	uint16_t sn_type;
+	uint16_t sn_flags;
+	uint32_t sn_length;
+};
+
+union sctp_notification {
+	struct sctp_tlv sn_header;
+	struct sctp_assoc_change sn_assoc_change;
+	struct sctp_paddr_change sn_paddr_change;
+	struct sctp_remote_error sn_remote_error;
+	struct sctp_send_failed sn_send_failed;
+	struct sctp_shutdown_event sn_shutdown_event;
+	struct sctp_adaptation_event sn_adaptation_event;
+	/* compatibility same as above */
+	struct sctp_adaption_event sn_adaption_event;
+	struct sctp_pdapi_event sn_pdapi_event;
+	struct sctp_authkey_event sn_auth_event;
+	struct sctp_sender_dry_event sn_sender_dry_event;
+	struct sctp_stream_reset_event sn_strreset_event;
+};
+
+/* notification types */
+#define SCTP_ASSOC_CHANGE			0x0001
+#define SCTP_PEER_ADDR_CHANGE			0x0002
+#define SCTP_REMOTE_ERROR			0x0003
+#define SCTP_SEND_FAILED			0x0004
+#define SCTP_SHUTDOWN_EVENT			0x0005
+#define SCTP_ADAPTATION_INDICATION		0x0006
+/* same as above */
+#define SCTP_ADAPTION_INDICATION		0x0006
+#define SCTP_PARTIAL_DELIVERY_EVENT		0x0007
+#define SCTP_AUTHENTICATION_EVENT		0x0008
+#define SCTP_STREAM_RESET_EVENT			0x0009
+#define SCTP_SENDER_DRY_EVENT			0x000a
+#define SCTP__NOTIFICATIONS_STOPPED_EVENT	0x000b	/* we don't send this */
+/*
+ * socket option structs
+ */
+
+struct sctp_paddrparams {
+	struct sockaddr_storage spp_address;
+	sctp_assoc_t spp_assoc_id;
+	uint32_t spp_hbinterval;
+	uint32_t spp_pathmtu;
+	uint32_t spp_flags;
+	uint32_t spp_ipv6_flowlabel;
+	uint16_t spp_pathmaxrxt;
+	uint8_t spp_ipv4_tos;
+};
+
+#define SPP_HB_ENABLE		0x00000001
+#define SPP_HB_DISABLE		0x00000002
+#define SPP_HB_DEMAND		0x00000004
+#define SPP_PMTUD_ENABLE	0x00000008
+#define SPP_PMTUD_DISABLE	0x00000010
+#define SPP_HB_TIME_IS_ZERO     0x00000080
+#define SPP_IPV6_FLOWLABEL      0x00000100
+#define SPP_IPV4_TOS            0x00000200
+
+struct sctp_paddrinfo {
+	struct sockaddr_storage spinfo_address;
+	sctp_assoc_t spinfo_assoc_id;
+	int32_t spinfo_state;
+	uint32_t spinfo_cwnd;
+	uint32_t spinfo_srtt;
+	uint32_t spinfo_rto;
+	uint32_t spinfo_mtu;
+};
+
+struct sctp_rtoinfo {
+	sctp_assoc_t srto_assoc_id;
+	uint32_t srto_initial;
+	uint32_t srto_max;
+	uint32_t srto_min;
+};
+
+struct sctp_assocparams {
+	sctp_assoc_t sasoc_assoc_id;
+	uint32_t sasoc_peer_rwnd;
+	uint32_t sasoc_local_rwnd;
+	uint32_t sasoc_cookie_life;
+	uint16_t sasoc_asocmaxrxt;
+	uint16_t sasoc_number_peer_destinations;
+};
+
+struct sctp_setprim {
+	struct sockaddr_storage ssp_addr;
+	sctp_assoc_t ssp_assoc_id;
+	uint8_t ssp_padding[4];
+};
+
+struct sctp_setpeerprim {
+	struct sockaddr_storage sspp_addr;
+	sctp_assoc_t sspp_assoc_id;
+	uint8_t sspp_padding[4];
+};
+
+struct sctp_getaddresses {
+	sctp_assoc_t sget_assoc_id;
+	/* addr is filled in for N * sockaddr_storage */
+	struct sockaddr addr[1];
+};
+
+struct sctp_setstrm_timeout {
+	sctp_assoc_t ssto_assoc_id;
+	uint32_t ssto_timeout;
+	uint32_t ssto_streamid_start;
+	uint32_t ssto_streamid_end;
+};
+
+struct sctp_status {
+	sctp_assoc_t sstat_assoc_id;
+	int32_t sstat_state;
+	uint32_t sstat_rwnd;
+	uint16_t sstat_unackdata;
+	uint16_t sstat_penddata;
+	uint16_t sstat_instrms;
+	uint16_t sstat_outstrms;
+	uint32_t sstat_fragmentation_point;
+	struct sctp_paddrinfo sstat_primary;
+};
+
+/*
+ * AUTHENTICATION support
+ */
+/* SCTP_AUTH_CHUNK */
+struct sctp_authchunk {
+	uint8_t sauth_chunk;
+};
+
+/* SCTP_AUTH_KEY */
+struct sctp_authkey {
+	sctp_assoc_t sca_assoc_id;
+	uint16_t sca_keynumber;
+	uint8_t sca_key[];
+};
+
+/* SCTP_HMAC_IDENT */
+struct sctp_hmacalgo {
+	uint32_t shmac_number_of_idents;
+	uint16_t shmac_idents[];
+};
+
+/* AUTH hmac_id */
+#define SCTP_AUTH_HMAC_ID_RSVD		0x0000
+#define SCTP_AUTH_HMAC_ID_SHA1		0x0001	/* default, mandatory */
+#define SCTP_AUTH_HMAC_ID_SHA256	0x0003
+#define SCTP_AUTH_HMAC_ID_SHA224	0x0004
+#define SCTP_AUTH_HMAC_ID_SHA384	0x0005
+#define SCTP_AUTH_HMAC_ID_SHA512	0x0006
+
+
+/* SCTP_AUTH_ACTIVE_KEY / SCTP_AUTH_DELETE_KEY */
+struct sctp_authkeyid {
+	sctp_assoc_t scact_assoc_id;
+	uint16_t scact_keynumber;
+};
+
+/* SCTP_PEER_AUTH_CHUNKS / SCTP_LOCAL_AUTH_CHUNKS */
+struct sctp_authchunks {
+	sctp_assoc_t gauth_assoc_id;
+	uint8_t gauth_chunks[];
+};
+
+struct sctp_assoc_value {
+	sctp_assoc_t assoc_id;
+	uint32_t assoc_value;
+};
+
+struct sctp_assoc_ids {
+	uint32_t gaids_number_of_ids;
+	sctp_assoc_t gaids_assoc_id[];
+};
+
+struct sctp_sack_info {
+	sctp_assoc_t sack_assoc_id;
+	uint32_t sack_delay;
+	uint32_t sack_freq;
+};
+
+struct sctp_timeouts {
+	sctp_assoc_t stimo_assoc_id;
+	uint32_t stimo_init;
+	uint32_t stimo_data;
+	uint32_t stimo_sack;
+	uint32_t stimo_shutdown;
+	uint32_t stimo_heartbeat;
+	uint32_t stimo_cookie;
+	uint32_t stimo_shutdownack;
+};
+
+struct sctp_cwnd_args {
+	struct sctp_nets *net;	/* network to *//* FIXME: LP64 issue */
+	uint32_t cwnd_new_value;/* cwnd in k */
+	uint32_t pseudo_cumack;
+	uint16_t inflight;	/* flightsize in k */
+	uint16_t cwnd_augment;	/* increment to it */
+	uint8_t meets_pseudo_cumack;
+	uint8_t need_new_pseudo_cumack;
+	uint8_t cnt_in_send;
+	uint8_t cnt_in_str;
+};
+
+struct sctp_blk_args {
+	uint32_t onsb;		/* in 1k bytes */
+	uint32_t sndlen;	/* len of send being attempted */
+	uint32_t peer_rwnd;	/* rwnd of peer */
+	uint16_t send_sent_qcnt;/* chnk cnt */
+	uint16_t stream_qcnt;	/* chnk cnt */
+	uint16_t chunks_on_oque;/* chunks out */
+	uint16_t flight_size;	/* flight size in k */
+};
+
+/*
+ * Max we can reset in one setting, note this is dictated not by the define
+ * but the size of a mbuf cluster so don't change this define and think you
+ * can specify more. You must do multiple resets if you want to reset more
+ * than SCTP_MAX_EXPLICIT_STR_RESET.
+ */
+#define SCTP_MAX_EXPLICT_STR_RESET   1000
+
+#define SCTP_RESET_LOCAL_RECV  0x0001
+#define SCTP_RESET_LOCAL_SEND  0x0002
+#define SCTP_RESET_BOTH        0x0003
+#define SCTP_RESET_TSN         0x0004
+#define SCTP_RESET_ADD_STREAMS 0x0005
+
+struct sctp_stream_reset {
+	sctp_assoc_t strrst_assoc_id;
+	uint16_t strrst_flags;
+	uint16_t strrst_num_streams;	/* 0 == ALL */
+	uint16_t strrst_list[];	/* list if strrst_num_streams is not 0 */
+};
+
+
+struct sctp_get_nonce_values {
+	sctp_assoc_t gn_assoc_id;
+	uint32_t gn_peers_tag;
+	uint32_t gn_local_tag;
+};
+
+/* Debugging logs */
+struct sctp_str_log {
+	void *stcb;		/* FIXME: LP64 issue */
+	uint32_t n_tsn;
+	uint32_t e_tsn;
+	uint16_t n_sseq;
+	uint16_t e_sseq;
+	uint16_t strm;
+};
+
+struct sctp_sb_log {
+	void *stcb;		/* FIXME: LP64 issue */
+	uint32_t so_sbcc;
+	uint32_t stcb_sbcc;
+	uint32_t incr;
+};
+
+struct sctp_fr_log {
+	uint32_t largest_tsn;
+	uint32_t largest_new_tsn;
+	uint32_t tsn;
+};
+
+struct sctp_fr_map {
+	uint32_t base;
+	uint32_t cum;
+	uint32_t high;
+};
+
+struct sctp_rwnd_log {
+	uint32_t rwnd;
+	uint32_t send_size;
+	uint32_t overhead;
+	uint32_t new_rwnd;
+};
+
+struct sctp_mbcnt_log {
+	uint32_t total_queue_size;
+	uint32_t size_change;
+	uint32_t total_queue_mb_size;
+	uint32_t mbcnt_change;
+};
+
+struct sctp_sack_log {
+	uint32_t cumack;
+	uint32_t oldcumack;
+	uint32_t tsn;
+	uint16_t numGaps;
+	uint16_t numDups;
+};
+
+struct sctp_lock_log {
+	void *sock;		/* FIXME: LP64 issue */
+	void *inp;		/* FIXME: LP64 issue */
+	uint8_t tcb_lock;
+	uint8_t inp_lock;
+	uint8_t info_lock;
+	uint8_t sock_lock;
+	uint8_t sockrcvbuf_lock;
+	uint8_t socksndbuf_lock;
+	uint8_t create_lock;
+	uint8_t resv;
+};
+
+struct sctp_rto_log {
+	void *net;		/* FIXME: LP64 issue */
+	uint32_t rtt;
+};
+
+struct sctp_nagle_log {
+	void *stcb;		/* FIXME: LP64 issue */
+	uint32_t total_flight;
+	uint32_t total_in_queue;
+	uint16_t count_in_queue;
+	uint16_t count_in_flight;
+};
+
+struct sctp_sbwake_log {
+	void *stcb;		/* FIXME: LP64 issue */
+	uint16_t send_q;
+	uint16_t sent_q;
+	uint16_t flight;
+	uint16_t wake_cnt;
+	uint8_t stream_qcnt;	/* chnk cnt */
+	uint8_t chunks_on_oque;	/* chunks out */
+	uint8_t sbflags;
+	uint8_t sctpflags;
+};
+
+struct sctp_misc_info {
+	uint32_t log1;
+	uint32_t log2;
+	uint32_t log3;
+	uint32_t log4;
+};
+
+struct sctp_log_closing {
+	void *inp;		/* FIXME: LP64 issue */
+	void *stcb;		/* FIXME: LP64 issue */
+	uint32_t sctp_flags;
+	uint16_t state;
+	int16_t loc;
+};
+
+struct sctp_mbuf_log {
+	struct mbuf *mp;	/* FIXME: LP64 issue */
+	caddr_t ext;
+	caddr_t data;
+	uint16_t size;
+	uint8_t refcnt;
+	uint8_t mbuf_flags;
+};
+
+struct sctp_cwnd_log {
+	uint64_t time_event;
+	uint8_t from;
+	uint8_t event_type;
+	uint8_t resv[2];
+	union {
+		struct sctp_log_closing close;
+		struct sctp_blk_args blk;
+		struct sctp_cwnd_args cwnd;
+		struct sctp_str_log strlog;
+		struct sctp_fr_log fr;
+		struct sctp_fr_map map;
+		struct sctp_rwnd_log rwnd;
+		struct sctp_mbcnt_log mbcnt;
+		struct sctp_sack_log sack;
+		struct sctp_lock_log lock;
+		struct sctp_rto_log rto;
+		struct sctp_sb_log sb;
+		struct sctp_nagle_log nagle;
+		struct sctp_sbwake_log wake;
+		struct sctp_mbuf_log mb;
+		struct sctp_misc_info misc;
+	}     x;
+};
+
+struct sctp_cwnd_log_req {
+	int32_t num_in_log;	/* Number in log */
+	int32_t num_ret;	/* Number returned */
+	int32_t start_at;	/* start at this one */
+	int32_t end_at;		/* end at this one */
+	struct sctp_cwnd_log log[];
+};
+
+struct sctp_timeval {
+	uint32_t tv_sec;
+	uint32_t tv_usec;
+};
+
+struct sctpstat {
+	struct sctp_timeval sctps_discontinuitytime;	/* sctpStats 18
+							 * (TimeStamp) */
+	/* MIB according to RFC 3873 */
+	uint32_t sctps_currestab;	/* sctpStats  1   (Gauge32) */
+	uint32_t sctps_activeestab;	/* sctpStats  2 (Counter32) */
+	uint32_t sctps_restartestab;
+	uint32_t sctps_collisionestab;
+	uint32_t sctps_passiveestab;	/* sctpStats  3 (Counter32) */
+	uint32_t sctps_aborted;	/* sctpStats  4 (Counter32) */
+	uint32_t sctps_shutdown;/* sctpStats  5 (Counter32) */
+	uint32_t sctps_outoftheblue;	/* sctpStats  6 (Counter32) */
+	uint32_t sctps_checksumerrors;	/* sctpStats  7 (Counter32) */
+	uint32_t sctps_outcontrolchunks;	/* sctpStats  8 (Counter64) */
+	uint32_t sctps_outorderchunks;	/* sctpStats  9 (Counter64) */
+	uint32_t sctps_outunorderchunks;	/* sctpStats 10 (Counter64) */
+	uint32_t sctps_incontrolchunks;	/* sctpStats 11 (Counter64) */
+	uint32_t sctps_inorderchunks;	/* sctpStats 12 (Counter64) */
+	uint32_t sctps_inunorderchunks;	/* sctpStats 13 (Counter64) */
+	uint32_t sctps_fragusrmsgs;	/* sctpStats 14 (Counter64) */
+	uint32_t sctps_reasmusrmsgs;	/* sctpStats 15 (Counter64) */
+	uint32_t sctps_outpackets;	/* sctpStats 16 (Counter64) */
+	uint32_t sctps_inpackets;	/* sctpStats 17 (Counter64) */
+
+	/* input statistics: */
+	uint32_t sctps_recvpackets;	/* total input packets        */
+	uint32_t sctps_recvdatagrams;	/* total input datagrams      */
+	uint32_t sctps_recvpktwithdata;	/* total packets that had data */
+	uint32_t sctps_recvsacks;	/* total input SACK chunks    */
+	uint32_t sctps_recvdata;/* total input DATA chunks    */
+	uint32_t sctps_recvdupdata;	/* total input duplicate DATA chunks */
+	uint32_t sctps_recvheartbeat;	/* total input HB chunks      */
+	uint32_t sctps_recvheartbeatack;	/* total input HB-ACK chunks  */
+	uint32_t sctps_recvecne;/* total input ECNE chunks    */
+	uint32_t sctps_recvauth;/* total input AUTH chunks    */
+	uint32_t sctps_recvauthmissing;	/* total input chunks missing AUTH */
+	uint32_t sctps_recvivalhmacid;	/* total number of invalid HMAC ids
+					 * received */
+	uint32_t sctps_recvivalkeyid;	/* total number of invalid secret ids
+					 * received */
+	uint32_t sctps_recvauthfailed;	/* total number of auth failed */
+	uint32_t sctps_recvexpress;	/* total fast path receives all one
+					 * chunk */
+	uint32_t sctps_recvexpressm;	/* total fast path multi-part data */
+	uint32_t sctps_recvnocrc;
+	uint32_t sctps_recvswcrc;
+	uint32_t sctps_recvhwcrc;
+
+	/* output statistics: */
+	uint32_t sctps_sendpackets;	/* total output packets       */
+	uint32_t sctps_sendsacks;	/* total output SACKs         */
+	uint32_t sctps_senddata;/* total output DATA chunks   */
+	uint32_t sctps_sendretransdata;	/* total output retransmitted DATA
+					 * chunks */
+	uint32_t sctps_sendfastretrans;	/* total output fast retransmitted
+					 * DATA chunks */
+	uint32_t sctps_sendmultfastretrans;	/* total FR's that happened
+						 * more than once to same
+						 * chunk (u-del multi-fr
+						 * algo). */
+	uint32_t sctps_sendheartbeat;	/* total output HB chunks     */
+	uint32_t sctps_sendecne;/* total output ECNE chunks    */
+	uint32_t sctps_sendauth;/* total output AUTH chunks FIXME   */
+	uint32_t sctps_senderrors;	/* ip_output error counter */
+	uint32_t sctps_sendnocrc;
+	uint32_t sctps_sendswcrc;
+	uint32_t sctps_sendhwcrc;
+	/* PCKDROPREP statistics: */
+	uint32_t sctps_pdrpfmbox;	/* Packet drop from middle box */
+	uint32_t sctps_pdrpfehos;	/* P-drop from end host */
+	uint32_t sctps_pdrpmbda;/* P-drops with data */
+	uint32_t sctps_pdrpmbct;/* P-drops, non-data, non-endhost */
+	uint32_t sctps_pdrpbwrpt;	/* P-drop, non-endhost, bandwidth rep
+					 * only */
+	uint32_t sctps_pdrpcrupt;	/* P-drop, not enough for chunk header */
+	uint32_t sctps_pdrpnedat;	/* P-drop, not enough data to confirm */
+	uint32_t sctps_pdrppdbrk;	/* P-drop, where process_chunk_drop
+					 * said break */
+	uint32_t sctps_pdrptsnnf;	/* P-drop, could not find TSN */
+	uint32_t sctps_pdrpdnfnd;	/* P-drop, attempt reverse TSN lookup */
+	uint32_t sctps_pdrpdiwnp;	/* P-drop, e-host confirms zero-rwnd */
+	uint32_t sctps_pdrpdizrw;	/* P-drop, midbox confirms no space */
+	uint32_t sctps_pdrpbadd;/* P-drop, data did not match TSN */
+	uint32_t sctps_pdrpmark;/* P-drop, TSN's marked for Fast Retran */
+	/* timeouts */
+	uint32_t sctps_timoiterator;	/* Number of iterator timers that
+					 * fired */
+	uint32_t sctps_timodata;/* Number of T3 data time outs */
+	uint32_t sctps_timowindowprobe;	/* Number of window probe (T3) timers
+					 * that fired */
+	uint32_t sctps_timoinit;/* Number of INIT timers that fired */
+	uint32_t sctps_timosack;/* Number of sack timers that fired */
+	uint32_t sctps_timoshutdown;	/* Number of shutdown timers that
+					 * fired */
+	uint32_t sctps_timoheartbeat;	/* Number of heartbeat timers that
+					 * fired */
+	uint32_t sctps_timocookie;	/* Number of times a cookie timeout
+					 * fired */
+	uint32_t sctps_timosecret;	/* Number of times an endpoint changed
+					 * its cookie secret */
+	uint32_t sctps_timopathmtu;	/* Number of PMTU timers that fired */
+	uint32_t sctps_timoshutdownack;	/* Number of shutdown ack timers that
+					 * fired */
+	uint32_t sctps_timoshutdownguard;	/* Number of shutdown guard
+						 * timers that fired */
+	uint32_t sctps_timostrmrst;	/* Number of stream reset timers that
+					 * fired */
+	uint32_t sctps_timoearlyfr;	/* Number of early FR timers that
+					 * fired */
+	uint32_t sctps_timoasconf;	/* Number of times an asconf timer
+					 * fired */
+	uint32_t sctps_timodelprim;	/* Number of times a prim_deleted
+					 * timer fired */
+	uint32_t sctps_timoautoclose;	/* Number of times auto close timer
+					 * fired */
+	uint32_t sctps_timoassockill;	/* Number of asoc free timers expired */
+	uint32_t sctps_timoinpkill;	/* Number of inp free timers expired */
+	/* Early fast retransmission counters */
+	uint32_t sctps_earlyfrstart;
+	uint32_t sctps_earlyfrstop;
+	uint32_t sctps_earlyfrmrkretrans;
+	uint32_t sctps_earlyfrstpout;
+	uint32_t sctps_earlyfrstpidsck1;
+	uint32_t sctps_earlyfrstpidsck2;
+	uint32_t sctps_earlyfrstpidsck3;
+	uint32_t sctps_earlyfrstpidsck4;
+	uint32_t sctps_earlyfrstrid;
+	uint32_t sctps_earlyfrstrout;
+	uint32_t sctps_earlyfrstrtmr;
+	/* others */
+	uint32_t sctps_hdrops;	/* packet shorter than header */
+	uint32_t sctps_badsum;	/* checksum error             */
+	uint32_t sctps_noport;	/* no endpoint for port       */
+	uint32_t sctps_badvtag;	/* bad v-tag                  */
+	uint32_t sctps_badsid;	/* bad SID                    */
+	uint32_t sctps_nomem;	/* no memory                  */
+	uint32_t sctps_fastretransinrtt;	/* number of multiple FR in a
+						 * RTT window */
+	uint32_t sctps_markedretrans;
+	uint32_t sctps_naglesent;	/* nagle allowed sending      */
+	uint32_t sctps_naglequeued;	/* nagle doesn't allow sending */
+	uint32_t sctps_maxburstqueued;	/* max burst doesn't allow sending */
+	uint32_t sctps_ifnomemqueued;	/* look ahead tells us no memory in
+					 * interface ring buffer OR we had a
+					 * send error and are queuing one
+					 * send. */
+	uint32_t sctps_windowprobed;	/* total number of window probes sent */
+	uint32_t sctps_lowlevelerr;	/* total times an output error causes
+					 * us to clamp down on next user send. */
+	uint32_t sctps_lowlevelerrusr;	/* total times sctp_senderrors were
+					 * caused from a user send from a user
+					 * invoked send not a sack response */
+	uint32_t sctps_datadropchklmt;	/* Number of in data drops due to
+					 * chunk limit reached */
+	uint32_t sctps_datadroprwnd;	/* Number of in data drops due to rwnd
+					 * limit reached */
+	uint32_t sctps_ecnereducedcwnd;	/* Number of times a ECN reduced the
+					 * cwnd */
+	uint32_t sctps_vtagexpress;	/* Used express lookup via vtag */
+	uint32_t sctps_vtagbogus;	/* Collision in express lookup. */
+	uint32_t sctps_primary_randry;	/* Number of times the sender ran dry
+					 * of user data on primary */
+	uint32_t sctps_cmt_randry;	/* Same for above */
+	uint32_t sctps_slowpath_sack;	/* Sacks the slow way */
+	uint32_t sctps_wu_sacks_sent;	/* Window Update only sacks sent */
+	uint32_t sctps_sends_with_flags;	/* number of sends with
+						 * sinfo_flags !=0 */
+	uint32_t sctps_sends_with_unord; /* number of unordered sends */
+	uint32_t sctps_sends_with_eof;	/* number of sends with EOF flag set */
+	uint32_t sctps_sends_with_abort;	/* number of sends with ABORT
+						 * flag set */
+	uint32_t sctps_protocol_drain_calls;	/* number of times protocol
+						 * drain called */
+	uint32_t sctps_protocol_drains_done;	/* number of times we did a
+						 * protocol drain */
+	uint32_t sctps_read_peeks;	/* Number of times recv was called
+					 * with peek */
+	uint32_t sctps_cached_chk;	/* Number of cached chunks used */
+	uint32_t sctps_cached_strmoq;	/* Number of cached stream oq's used */
+	uint32_t sctps_left_abandon;	/* Number of unread messages abandoned
+					 * by close */
+	uint32_t sctps_send_burst_avoid;	/* Unused */
+	uint32_t sctps_send_cwnd_avoid;	/* Send cwnd full  avoidance, already
+					 * max burst inflight to net */
+	uint32_t sctps_fwdtsn_map_over;	/* number of map array over-runs via
+					 * fwd-tsn's */
+
+	uint32_t sctps_reserved[32];	/* Future ABI compat - remove int's
+					 * from here when adding new */
+};
+
+#define SCTP_STAT_INCR(_x) SCTP_STAT_INCR_BY(_x,1)
+#define SCTP_STAT_DECR(_x) SCTP_STAT_DECR_BY(_x,1)
+#if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
+#define SCTP_STAT_INCR_BY(_x,_d) (SCTP_BASE_STATS[PCPU_GET(cpuid)]._x += _d)
+#define SCTP_STAT_DECR_BY(_x,_d) (SCTP_BASE_STATS[PCPU_GET(cpuid)]._x -= _d)
+#else
+#define SCTP_STAT_INCR_BY(_x,_d) atomic_add_int(&SCTP_BASE_STAT(_x), _d)
+#define SCTP_STAT_DECR_BY(_x,_d) atomic_subtract_int(&SCTP_BASE_STAT(_x), _d)
+#endif
+/* The following macros are for handling MIB values, */
+#define SCTP_STAT_INCR_COUNTER32(_x) SCTP_STAT_INCR(_x)
+#define SCTP_STAT_INCR_COUNTER64(_x) SCTP_STAT_INCR(_x)
+#define SCTP_STAT_INCR_GAUGE32(_x) SCTP_STAT_INCR(_x)
+#define SCTP_STAT_DECR_COUNTER32(_x) SCTP_STAT_DECR(_x)
+#define SCTP_STAT_DECR_COUNTER64(_x) SCTP_STAT_DECR(_x)
+#define SCTP_STAT_DECR_GAUGE32(_x) SCTP_STAT_DECR(_x)
+
+union sctp_sockstore {
+#if defined(INET) || !defined(_KERNEL)
+	struct sockaddr_in sin;
+#endif
+#if defined(INET6) || !defined(_KERNEL)
+	struct sockaddr_in6 sin6;
+#endif
+	struct sockaddr sa;
+};
+
+
+/***********************************/
+/* And something for us old timers */
+/***********************************/
+
+#ifndef ntohll
+#include <freebsd/sys/endian.h>
+#define ntohll(x) be64toh(x)
+#endif
+
+#ifndef htonll
+#include <freebsd/sys/endian.h>
+#define htonll(x) htobe64(x)
+#endif
+/***********************************/
+
+
+struct xsctp_inpcb {
+	uint32_t last;
+	uint32_t flags;
+	uint32_t features;
+	uint32_t total_sends;
+	uint32_t total_recvs;
+	uint32_t total_nospaces;
+	uint32_t fragmentation_point;
+	uint16_t local_port;
+	uint16_t qlen;
+	uint16_t maxqlen;
+	uint32_t extra_padding[32];	/* future */
+};
+
+struct xsctp_tcb {
+	union sctp_sockstore primary_addr;	/* sctpAssocEntry 5/6 */
+	uint32_t last;
+	uint32_t heartbeat_interval;	/* sctpAssocEntry 7   */
+	uint32_t state;		/* sctpAssocEntry 8   */
+	uint32_t in_streams;	/* sctpAssocEntry 9   */
+	uint32_t out_streams;	/* sctpAssocEntry 10  */
+	uint32_t max_nr_retrans;/* sctpAssocEntry 11  */
+	uint32_t primary_process;	/* sctpAssocEntry 12  */
+	uint32_t T1_expireries;	/* sctpAssocEntry 13  */
+	uint32_t T2_expireries;	/* sctpAssocEntry 14  */
+	uint32_t retransmitted_tsns;	/* sctpAssocEntry 15  */
+	uint32_t total_sends;
+	uint32_t total_recvs;
+	uint32_t local_tag;
+	uint32_t remote_tag;
+	uint32_t initial_tsn;
+	uint32_t highest_tsn;
+	uint32_t cumulative_tsn;
+	uint32_t cumulative_tsn_ack;
+	uint32_t mtu;
+	uint32_t refcnt;
+	uint16_t local_port;	/* sctpAssocEntry 3   */
+	uint16_t remote_port;	/* sctpAssocEntry 4   */
+	struct sctp_timeval start_time;	/* sctpAssocEntry 16  */
+	struct sctp_timeval discontinuity_time;	/* sctpAssocEntry 17  */
+	uint32_t peers_rwnd;
+	sctp_assoc_t assoc_id;	/* sctpAssocEntry 1   */
+	uint32_t extra_padding[32];	/* future */
+};
+
+struct xsctp_laddr {
+	union sctp_sockstore address;	/* sctpAssocLocalAddrEntry 1/2 */
+	uint32_t last;
+	struct sctp_timeval start_time;	/* sctpAssocLocalAddrEntry 3   */
+	uint32_t extra_padding[32];	/* future */
+};
+
+struct xsctp_raddr {
+	union sctp_sockstore address;	/* sctpAssocLocalRemEntry 1/2 */
+	uint32_t last;
+	uint32_t rto;		/* sctpAssocLocalRemEntry 5   */
+	uint32_t max_path_rtx;	/* sctpAssocLocalRemEntry 6   */
+	uint32_t rtx;		/* sctpAssocLocalRemEntry 7   */
+	uint32_t error_counter;	/* */
+	uint32_t cwnd;		/* */
+	uint32_t flight_size;	/* */
+	uint32_t mtu;		/* */
+	uint8_t active;		/* sctpAssocLocalRemEntry 3   */
+	uint8_t confirmed;	/* */
+	uint8_t heartbeat_enabled;	/* sctpAssocLocalRemEntry 4   */
+	struct sctp_timeval start_time;	/* sctpAssocLocalRemEntry 8   */
+	uint32_t rtt;
+	uint32_t extra_padding[32];	/* future */
+};
+
+#define SCTP_MAX_LOGGING_SIZE 30000
+#define SCTP_TRACE_PARAMS 6	/* This number MUST be even   */
+
+struct sctp_log_entry {
+	uint64_t timestamp;
+	uint32_t subsys;
+	uint32_t padding;
+	uint32_t params[SCTP_TRACE_PARAMS];
+};
+
+struct sctp_log {
+	struct sctp_log_entry entry[SCTP_MAX_LOGGING_SIZE];
+	uint32_t index;
+	uint32_t padding;
+};
+
+/*
+ * Kernel defined for sctp_send
+ */
+#if defined(_KERNEL) || defined(__Userspace__)
+int
+sctp_lower_sosend(struct socket *so,
+    struct sockaddr *addr,
+    struct uio *uio,
+    struct mbuf *i_pak,
+    struct mbuf *control,
+    int flags,
+    struct sctp_sndrcvinfo *srcv
+    ,struct thread *p
+);
+
+int
+sctp_sorecvmsg(struct socket *so,
+    struct uio *uio,
+    struct mbuf **mp,
+    struct sockaddr *from,
+    int fromlen,
+    int *msg_flags,
+    struct sctp_sndrcvinfo *sinfo,
+    int filling_sinfo);
+
+#endif
+
+/*
+ * API system calls
+ */
+#if !(defined(_KERNEL)) && !(defined(__Userspace__))
+
+__BEGIN_DECLS
+int sctp_peeloff __P((int, sctp_assoc_t));
+int sctp_bindx __P((int, struct sockaddr *, int, int));
+int sctp_connectx __P((int, const struct sockaddr *, int, sctp_assoc_t *));
+int sctp_getaddrlen __P((sa_family_t));
+int sctp_getpaddrs __P((int, sctp_assoc_t, struct sockaddr **));
+void sctp_freepaddrs __P((struct sockaddr *));
+int sctp_getladdrs __P((int, sctp_assoc_t, struct sockaddr **));
+void sctp_freeladdrs __P((struct sockaddr *));
+int sctp_opt_info __P((int, sctp_assoc_t, int, void *, socklen_t *));
+
+ssize_t sctp_sendmsg 
+__P((int, const void *, size_t,
+    const struct sockaddr *,
+    socklen_t, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t));
+
+	ssize_t sctp_send __P((int sd, const void *msg, size_t len,
+              const struct sctp_sndrcvinfo *sinfo, int flags));
+
+	ssize_t sctp_sendx __P((int sd, const void *msg, size_t len,
+               struct sockaddr *addrs, int addrcnt,
+               struct sctp_sndrcvinfo *sinfo, int flags));
+
+	ssize_t sctp_sendmsgx __P((int sd, const void *, size_t,
+                  struct sockaddr *, int,
+                  uint32_t, uint32_t, uint16_t, uint32_t, uint32_t));
+
+	sctp_assoc_t sctp_getassocid __P((int sd, struct sockaddr *sa));
+
+	ssize_t sctp_recvmsg __P((int, void *, size_t, struct sockaddr *,
+                 socklen_t *, struct sctp_sndrcvinfo *, int *));
+
+__END_DECLS
+
+#endif				/* !_KERNEL */
+#endif				/* !__sctp_uio_h__ */
diff --git a/freebsd/sys/netinet/sctp_usrreq.c b/freebsd/sys/netinet/sctp_usrreq.c
new file mode 100644
index 00000000..bb60795c
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_usrreq.c
@@ -0,0 +1,4918 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_usrreq.c,v 1.48 2005/03/07 23:26:08 itojun Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include <freebsd/netinet/sctp_os.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/netinet/sctp_pcb.h>
+#include <freebsd/netinet/sctp_header.h>
+#include <freebsd/netinet/sctp_var.h>
+#if defined(INET6)
+#endif
+#include <freebsd/netinet/sctp_sysctl.h>
+#include <freebsd/netinet/sctp_output.h>
+#include <freebsd/netinet/sctp_uio.h>
+#include <freebsd/netinet/sctp_asconf.h>
+#include <freebsd/netinet/sctputil.h>
+#include <freebsd/netinet/sctp_indata.h>
+#include <freebsd/netinet/sctp_timer.h>
+#include <freebsd/netinet/sctp_auth.h>
+#include <freebsd/netinet/sctp_bsd_addr.h>
+#include <freebsd/netinet/sctp_cc_functions.h>
+#include <freebsd/netinet/udp.h>
+
+
+
+
+void
+sctp_init(void)
+{
+	u_long sb_max_adj;
+
+	bzero(&SCTP_BASE_STATS, sizeof(struct sctpstat));
+
+	/* Initialize and modify the sysctled variables */
+	sctp_init_sysctls();
+	if ((nmbclusters / 8) > SCTP_ASOC_MAX_CHUNKS_ON_QUEUE)
+		SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue) = (nmbclusters / 8);
+	/*
+	 * Allow a user to take no more than 1/2 the number of clusters or
+	 * the SB_MAX whichever is smaller for the send window.
+	 */
+	sb_max_adj = (u_long)((u_quad_t) (SB_MAX) * MCLBYTES / (MSIZE + MCLBYTES));
+	SCTP_BASE_SYSCTL(sctp_sendspace) = min(sb_max_adj,
+	    (((uint32_t) nmbclusters / 2) * SCTP_DEFAULT_MAXSEGMENT));
+	/*
+	 * Now for the recv window, should we take the same amount? or
+	 * should I do 1/2 the SB_MAX instead in the SB_MAX min above. For
+	 * now I will just copy.
+	 */
+	SCTP_BASE_SYSCTL(sctp_recvspace) = SCTP_BASE_SYSCTL(sctp_sendspace);
+
+	SCTP_BASE_VAR(first_time) = 0;
+	SCTP_BASE_VAR(sctp_pcb_initialized) = 0;
+	sctp_pcb_init();
+#if defined(SCTP_PACKET_LOGGING)
+	SCTP_BASE_VAR(packet_log_writers) = 0;
+	SCTP_BASE_VAR(packet_log_end) = 0;
+	bzero(&SCTP_BASE_VAR(packet_log_buffer), SCTP_PACKET_LOG_SIZE);
+#endif
+
+
+}
+
+void
+sctp_finish(void)
+{
+	sctp_pcb_finish();
+}
+
+
+
+void
+sctp_pathmtu_adjustment(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net,
+    uint16_t nxtsz)
+{
+	struct sctp_tmit_chunk *chk;
+	uint16_t overhead;
+
+	/* Adjust that too */
+	stcb->asoc.smallest_mtu = nxtsz;
+	/* now off to subtract IP_DF flag if needed */
+	overhead = IP_HDR_SIZE;
+	if (sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks)) {
+		overhead += sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
+	}
+	TAILQ_FOREACH(chk, &stcb->asoc.send_queue, sctp_next) {
+		if ((chk->send_size + overhead) > nxtsz) {
+			chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
+		}
+	}
+	TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
+		if ((chk->send_size + overhead) > nxtsz) {
+			/*
+			 * For this guy we also mark for immediate resend
+			 * since we sent to big of chunk
+			 */
+			chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
+			if (chk->sent < SCTP_DATAGRAM_RESEND) {
+				sctp_flight_size_decrease(chk);
+				sctp_total_flight_decrease(stcb, chk);
+			}
+			if (chk->sent != SCTP_DATAGRAM_RESEND) {
+				sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+			}
+			chk->sent = SCTP_DATAGRAM_RESEND;
+			chk->rec.data.doing_fast_retransmit = 0;
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
+				sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_PMTU,
+				    chk->whoTo->flight_size,
+				    chk->book_size,
+				    (uintptr_t) chk->whoTo,
+				    chk->rec.data.TSN_seq);
+			}
+			/* Clear any time so NO RTT is being done */
+			chk->do_rtt = 0;
+		}
+	}
+}
+
+static void
+sctp_notify_mbuf(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net,
+    struct ip *ip,
+    struct sctphdr *sh)
+{
+	struct icmp *icmph;
+	int totsz, tmr_stopped = 0;
+	uint16_t nxtsz;
+
+	/* protection */
+	if ((inp == NULL) || (stcb == NULL) || (net == NULL) ||
+	    (ip == NULL) || (sh == NULL)) {
+		if (stcb != NULL) {
+			SCTP_TCB_UNLOCK(stcb);
+		}
+		return;
+	}
+	/* First job is to verify the vtag matches what I would send */
+	if (ntohl(sh->v_tag) != (stcb->asoc.peer_vtag)) {
+		SCTP_TCB_UNLOCK(stcb);
+		return;
+	}
+	icmph = (struct icmp *)((caddr_t)ip - (sizeof(struct icmp) -
+	    sizeof(struct ip)));
+	if (icmph->icmp_type != ICMP_UNREACH) {
+		/* We only care about unreachable */
+		SCTP_TCB_UNLOCK(stcb);
+		return;
+	}
+	if (icmph->icmp_code != ICMP_UNREACH_NEEDFRAG) {
+		/* not a unreachable message due to frag. */
+		SCTP_TCB_UNLOCK(stcb);
+		return;
+	}
+	totsz = ip->ip_len;
+
+	nxtsz = ntohs(icmph->icmp_nextmtu);
+	if (nxtsz == 0) {
+		/*
+		 * old type router that does not tell us what the next size
+		 * mtu is. Rats we will have to guess (in a educated fashion
+		 * of course)
+		 */
+		nxtsz = sctp_get_prev_mtu(totsz);
+	}
+	/* Stop any PMTU timer */
+	if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+		tmr_stopped = 1;
+		sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
+		    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_1);
+	}
+	/* Adjust destination size limit */
+	if (net->mtu > nxtsz) {
+		net->mtu = nxtsz;
+		if (net->port) {
+			net->mtu -= sizeof(struct udphdr);
+		}
+	}
+	/* now what about the ep? */
+	if (stcb->asoc.smallest_mtu > nxtsz) {
+		sctp_pathmtu_adjustment(inp, stcb, net, nxtsz);
+	}
+	if (tmr_stopped)
+		sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net);
+
+	SCTP_TCB_UNLOCK(stcb);
+}
+
+
+void
+sctp_notify(struct sctp_inpcb *inp,
+    struct ip *ip,
+    struct sctphdr *sh,
+    struct sockaddr *to,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+#endif
+	/* protection */
+	int reason;
+	struct icmp *icmph;
+
+
+	if ((inp == NULL) || (stcb == NULL) || (net == NULL) ||
+	    (sh == NULL) || (to == NULL)) {
+		if (stcb)
+			SCTP_TCB_UNLOCK(stcb);
+		return;
+	}
+	/* First job is to verify the vtag matches what I would send */
+	if (ntohl(sh->v_tag) != (stcb->asoc.peer_vtag)) {
+		SCTP_TCB_UNLOCK(stcb);
+		return;
+	}
+	icmph = (struct icmp *)((caddr_t)ip - (sizeof(struct icmp) -
+	    sizeof(struct ip)));
+	if (icmph->icmp_type != ICMP_UNREACH) {
+		/* We only care about unreachable */
+		SCTP_TCB_UNLOCK(stcb);
+		return;
+	}
+	if ((icmph->icmp_code == ICMP_UNREACH_NET) ||
+	    (icmph->icmp_code == ICMP_UNREACH_HOST) ||
+	    (icmph->icmp_code == ICMP_UNREACH_NET_UNKNOWN) ||
+	    (icmph->icmp_code == ICMP_UNREACH_HOST_UNKNOWN) ||
+	    (icmph->icmp_code == ICMP_UNREACH_ISOLATED) ||
+	    (icmph->icmp_code == ICMP_UNREACH_NET_PROHIB) ||
+	    (icmph->icmp_code == ICMP_UNREACH_HOST_PROHIB) ||
+	    (icmph->icmp_code == ICMP_UNREACH_FILTER_PROHIB)) {
+
+		/*
+		 * Hmm reachablity problems we must examine closely. If its
+		 * not reachable, we may have lost a network. Or if there is
+		 * NO protocol at the other end named SCTP. well we consider
+		 * it a OOTB abort.
+		 */
+		if (net->dest_state & SCTP_ADDR_REACHABLE) {
+			/* Ok that destination is NOT reachable */
+			SCTP_PRINTF("ICMP (thresh %d/%d) takes interface %p down\n",
+			    net->error_count,
+			    net->failure_threshold,
+			    net);
+
+			net->dest_state &= ~SCTP_ADDR_REACHABLE;
+			net->dest_state |= SCTP_ADDR_NOT_REACHABLE;
+			/*
+			 * JRS 5/14/07 - If a destination is unreachable,
+			 * the PF bit is turned off.  This allows an
+			 * unambiguous use of the PF bit for destinations
+			 * that are reachable but potentially failed. If the
+			 * destination is set to the unreachable state, also
+			 * set the destination to the PF state.
+			 */
+			/*
+			 * Add debug message here if destination is not in
+			 * PF state.
+			 */
+			/* Stop any running T3 timers here? */
+			if ((stcb->asoc.sctp_cmt_on_off == 1) &&
+			    (stcb->asoc.sctp_cmt_pf > 0)) {
+				net->dest_state &= ~SCTP_ADDR_PF;
+				SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from PF to unreachable.\n",
+				    net);
+			}
+			net->error_count = net->failure_threshold + 1;
+			sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
+			    stcb, SCTP_FAILED_THRESHOLD,
+			    (void *)net, SCTP_SO_NOT_LOCKED);
+		}
+		SCTP_TCB_UNLOCK(stcb);
+	} else if ((icmph->icmp_code == ICMP_UNREACH_PROTOCOL) ||
+	    (icmph->icmp_code == ICMP_UNREACH_PORT)) {
+		/*
+		 * Here the peer is either playing tricks on us, including
+		 * an address that belongs to someone who does not support
+		 * SCTP OR was a userland implementation that shutdown and
+		 * now is dead. In either case treat it like a OOTB abort
+		 * with no TCB
+		 */
+		reason = SCTP_PEER_FAULTY;
+		sctp_abort_notification(stcb, reason, SCTP_SO_NOT_LOCKED);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		so = SCTP_INP_SO(inp);
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+		/* SCTP_TCB_UNLOCK(stcb); MT: I think this is not needed. */
+#endif
+		/* no need to unlock here, since the TCB is gone */
+	} else {
+		SCTP_TCB_UNLOCK(stcb);
+	}
+}
+
+void
+sctp_ctlinput(cmd, sa, vip)
+	int cmd;
+	struct sockaddr *sa;
+	void *vip;
+{
+	struct ip *ip = vip;
+	struct sctphdr *sh;
+	uint32_t vrf_id;
+
+	/* FIX, for non-bsd is this right? */
+	vrf_id = SCTP_DEFAULT_VRFID;
+	if (sa->sa_family != AF_INET ||
+	    ((struct sockaddr_in *)sa)->sin_addr.s_addr == INADDR_ANY) {
+		return;
+	}
+	if (PRC_IS_REDIRECT(cmd)) {
+		ip = 0;
+	} else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) {
+		return;
+	}
+	if (ip) {
+		struct sctp_inpcb *inp = NULL;
+		struct sctp_tcb *stcb = NULL;
+		struct sctp_nets *net = NULL;
+		struct sockaddr_in to, from;
+
+		sh = (struct sctphdr *)((caddr_t)ip + (ip->ip_hl << 2));
+		bzero(&to, sizeof(to));
+		bzero(&from, sizeof(from));
+		from.sin_family = to.sin_family = AF_INET;
+		from.sin_len = to.sin_len = sizeof(to);
+		from.sin_port = sh->src_port;
+		from.sin_addr = ip->ip_src;
+		to.sin_port = sh->dest_port;
+		to.sin_addr = ip->ip_dst;
+
+		/*
+		 * 'to' holds the dest of the packet that failed to be sent.
+		 * 'from' holds our local endpoint address. Thus we reverse
+		 * the to and the from in the lookup.
+		 */
+		stcb = sctp_findassociation_addr_sa((struct sockaddr *)&from,
+		    (struct sockaddr *)&to,
+		    &inp, &net, 1, vrf_id);
+		if (stcb != NULL && inp && (inp->sctp_socket != NULL)) {
+			if (cmd != PRC_MSGSIZE) {
+				sctp_notify(inp, ip, sh,
+				    (struct sockaddr *)&to, stcb,
+				    net);
+			} else {
+				/* handle possible ICMP size messages */
+				sctp_notify_mbuf(inp, stcb, net, ip, sh);
+			}
+		} else {
+			if ((stcb == NULL) && (inp != NULL)) {
+				/* reduce ref-count */
+				SCTP_INP_WLOCK(inp);
+				SCTP_INP_DECR_REF(inp);
+				SCTP_INP_WUNLOCK(inp);
+			}
+			if (stcb) {
+				SCTP_TCB_UNLOCK(stcb);
+			}
+		}
+	}
+	return;
+}
+
+static int
+sctp_getcred(SYSCTL_HANDLER_ARGS)
+{
+	struct xucred xuc;
+	struct sockaddr_in addrs[2];
+	struct sctp_inpcb *inp;
+	struct sctp_nets *net;
+	struct sctp_tcb *stcb;
+	int error;
+	uint32_t vrf_id;
+
+	/* FIX, for non-bsd is this right? */
+	vrf_id = SCTP_DEFAULT_VRFID;
+
+	error = priv_check(req->td, PRIV_NETINET_GETCRED);
+
+	if (error)
+		return (error);
+
+	error = SYSCTL_IN(req, addrs, sizeof(addrs));
+	if (error)
+		return (error);
+
+	stcb = sctp_findassociation_addr_sa(sintosa(&addrs[0]),
+	    sintosa(&addrs[1]),
+	    &inp, &net, 1, vrf_id);
+	if (stcb == NULL || inp == NULL || inp->sctp_socket == NULL) {
+		if ((inp != NULL) && (stcb == NULL)) {
+			/* reduce ref-count */
+			SCTP_INP_WLOCK(inp);
+			SCTP_INP_DECR_REF(inp);
+			goto cred_can_cont;
+		}
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+		error = ENOENT;
+		goto out;
+	}
+	SCTP_TCB_UNLOCK(stcb);
+	/*
+	 * We use the write lock here, only since in the error leg we need
+	 * it. If we used RLOCK, then we would have to
+	 * wlock/decr/unlock/rlock. Which in theory could create a hole.
+	 * Better to use higher wlock.
+	 */
+	SCTP_INP_WLOCK(inp);
+cred_can_cont:
+	error = cr_canseesocket(req->td->td_ucred, inp->sctp_socket);
+	if (error) {
+		SCTP_INP_WUNLOCK(inp);
+		goto out;
+	}
+	cru2x(inp->sctp_socket->so_cred, &xuc);
+	SCTP_INP_WUNLOCK(inp);
+	error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
+out:
+	return (error);
+}
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, getcred, CTLTYPE_OPAQUE | CTLFLAG_RW,
+    0, 0, sctp_getcred, "S,ucred", "Get the ucred of a SCTP connection");
+
+
+static void
+sctp_abort(struct socket *so)
+{
+	struct sctp_inpcb *inp;
+	uint32_t flags;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == 0) {
+		return;
+	}
+sctp_must_try_again:
+	flags = inp->sctp_flags;
+#ifdef SCTP_LOG_CLOSING
+	sctp_log_closing(inp, NULL, 17);
+#endif
+	if (((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
+	    (atomic_cmpset_int(&inp->sctp_flags, flags, (flags | SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_CLOSE_IP)))) {
+#ifdef SCTP_LOG_CLOSING
+		sctp_log_closing(inp, NULL, 16);
+#endif
+		sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
+		    SCTP_CALLED_AFTER_CMPSET_OFCLOSE);
+		SOCK_LOCK(so);
+		SCTP_SB_CLEAR(so->so_snd);
+		/*
+		 * same for the rcv ones, they are only here for the
+		 * accounting/select.
+		 */
+		SCTP_SB_CLEAR(so->so_rcv);
+
+		/* Now null out the reference, we are completely detached. */
+		so->so_pcb = NULL;
+		SOCK_UNLOCK(so);
+	} else {
+		flags = inp->sctp_flags;
+		if ((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) {
+			goto sctp_must_try_again;
+		}
+	}
+	return;
+}
+
+static int
+sctp_attach(struct socket *so, int proto, struct thread *p)
+{
+	struct sctp_inpcb *inp;
+	struct inpcb *ip_inp;
+	int error;
+	uint32_t vrf_id = SCTP_DEFAULT_VRFID;
+
+#ifdef IPSEC
+	uint32_t flags;
+
+#endif
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp != 0) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return EINVAL;
+	}
+	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
+		error = SCTP_SORESERVE(so, SCTP_BASE_SYSCTL(sctp_sendspace), SCTP_BASE_SYSCTL(sctp_recvspace));
+		if (error) {
+			return error;
+		}
+	}
+	error = sctp_inpcb_alloc(so, vrf_id);
+	if (error) {
+		return error;
+	}
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	SCTP_INP_WLOCK(inp);
+	inp->sctp_flags &= ~SCTP_PCB_FLAGS_BOUND_V6;	/* I'm not v6! */
+	ip_inp = &inp->ip_inp.inp;
+	ip_inp->inp_vflag |= INP_IPV4;
+	ip_inp->inp_ip_ttl = MODULE_GLOBAL(ip_defttl);
+#ifdef IPSEC
+	error = ipsec_init_policy(so, &ip_inp->inp_sp);
+#ifdef SCTP_LOG_CLOSING
+	sctp_log_closing(inp, NULL, 17);
+#endif
+	if (error != 0) {
+try_again:
+		flags = inp->sctp_flags;
+		if (((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
+		    (atomic_cmpset_int(&inp->sctp_flags, flags, (flags | SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_CLOSE_IP)))) {
+#ifdef SCTP_LOG_CLOSING
+			sctp_log_closing(inp, NULL, 15);
+#endif
+			SCTP_INP_WUNLOCK(inp);
+			sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
+			    SCTP_CALLED_AFTER_CMPSET_OFCLOSE);
+		} else {
+			flags = inp->sctp_flags;
+			if ((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) {
+				goto try_again;
+			} else {
+				SCTP_INP_WUNLOCK(inp);
+			}
+		}
+		return error;
+	}
+#endif				/* IPSEC */
+	SCTP_INP_WUNLOCK(inp);
+	return 0;
+}
+
+static int
+sctp_bind(struct socket *so, struct sockaddr *addr, struct thread *p)
+{
+	struct sctp_inpcb *inp = NULL;
+	int error;
+
+#ifdef INET6
+	if (addr && addr->sa_family != AF_INET) {
+		/* must be a v4 address! */
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return EINVAL;
+	}
+#endif				/* INET6 */
+	if (addr && (addr->sa_len != sizeof(struct sockaddr_in))) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return EINVAL;
+	}
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == 0) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return EINVAL;
+	}
+	error = sctp_inpcb_bind(so, addr, NULL, p);
+	return error;
+}
+
+void
+sctp_close(struct socket *so)
+{
+	struct sctp_inpcb *inp;
+	uint32_t flags;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == 0)
+		return;
+
+	/*
+	 * Inform all the lower layer assoc that we are done.
+	 */
+sctp_must_try_again:
+	flags = inp->sctp_flags;
+#ifdef SCTP_LOG_CLOSING
+	sctp_log_closing(inp, NULL, 17);
+#endif
+	if (((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
+	    (atomic_cmpset_int(&inp->sctp_flags, flags, (flags | SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_CLOSE_IP)))) {
+		if (((so->so_options & SO_LINGER) && (so->so_linger == 0)) ||
+		    (so->so_rcv.sb_cc > 0)) {
+#ifdef SCTP_LOG_CLOSING
+			sctp_log_closing(inp, NULL, 13);
+#endif
+			sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
+			    SCTP_CALLED_AFTER_CMPSET_OFCLOSE);
+		} else {
+#ifdef SCTP_LOG_CLOSING
+			sctp_log_closing(inp, NULL, 14);
+#endif
+			sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_GRACEFUL_CLOSE,
+			    SCTP_CALLED_AFTER_CMPSET_OFCLOSE);
+		}
+		/*
+		 * The socket is now detached, no matter what the state of
+		 * the SCTP association.
+		 */
+		SOCK_LOCK(so);
+		SCTP_SB_CLEAR(so->so_snd);
+		/*
+		 * same for the rcv ones, they are only here for the
+		 * accounting/select.
+		 */
+		SCTP_SB_CLEAR(so->so_rcv);
+
+		/* Now null out the reference, we are completely detached. */
+		so->so_pcb = NULL;
+		SOCK_UNLOCK(so);
+	} else {
+		flags = inp->sctp_flags;
+		if ((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) {
+			goto sctp_must_try_again;
+		}
+	}
+	return;
+}
+
+
+int
+sctp_sendm(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
+    struct mbuf *control, struct thread *p);
+
+
+int
+sctp_sendm(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
+    struct mbuf *control, struct thread *p)
+{
+	struct sctp_inpcb *inp;
+	int error;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == 0) {
+		if (control) {
+			sctp_m_freem(control);
+			control = NULL;
+		}
+		SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		sctp_m_freem(m);
+		return EINVAL;
+	}
+	/* Got to have an to address if we are NOT a connected socket */
+	if ((addr == NULL) &&
+	    ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE))
+	    ) {
+		goto connected_type;
+	} else if (addr == NULL) {
+		SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EDESTADDRREQ);
+		error = EDESTADDRREQ;
+		sctp_m_freem(m);
+		if (control) {
+			sctp_m_freem(control);
+			control = NULL;
+		}
+		return (error);
+	}
+#ifdef INET6
+	if (addr->sa_family != AF_INET) {
+		/* must be a v4 address! */
+		SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EDESTADDRREQ);
+		sctp_m_freem(m);
+		if (control) {
+			sctp_m_freem(control);
+			control = NULL;
+		}
+		error = EDESTADDRREQ;
+		return EDESTADDRREQ;
+	}
+#endif				/* INET6 */
+connected_type:
+	/* now what about control */
+	if (control) {
+		if (inp->control) {
+			SCTP_PRINTF("huh? control set?\n");
+			sctp_m_freem(inp->control);
+			inp->control = NULL;
+		}
+		inp->control = control;
+	}
+	/* Place the data */
+	if (inp->pkt) {
+		SCTP_BUF_NEXT(inp->pkt_last) = m;
+		inp->pkt_last = m;
+	} else {
+		inp->pkt_last = inp->pkt = m;
+	}
+	if (
+	/* FreeBSD uses a flag passed */
+	    ((flags & PRUS_MORETOCOME) == 0)
+	    ) {
+		/*
+		 * note with the current version this code will only be used
+		 * by OpenBSD-- NetBSD, FreeBSD, and MacOS have methods for
+		 * re-defining sosend to use the sctp_sosend. One can
+		 * optionally switch back to this code (by changing back the
+		 * definitions) but this is not advisable. This code is used
+		 * by FreeBSD when sending a file with sendfile() though.
+		 */
+		int ret;
+
+		ret = sctp_output(inp, inp->pkt, addr, inp->control, p, flags);
+		inp->pkt = NULL;
+		inp->control = NULL;
+		return (ret);
+	} else {
+		return (0);
+	}
+}
+
+int
+sctp_disconnect(struct socket *so)
+{
+	struct sctp_inpcb *inp;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
+		return (ENOTCONN);
+	}
+	SCTP_INP_RLOCK(inp);
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+		if (LIST_EMPTY(&inp->sctp_asoc_list)) {
+			/* No connection */
+			SCTP_INP_RUNLOCK(inp);
+			return (0);
+		} else {
+			struct sctp_association *asoc;
+			struct sctp_tcb *stcb;
+
+			stcb = LIST_FIRST(&inp->sctp_asoc_list);
+			if (stcb == NULL) {
+				SCTP_INP_RUNLOCK(inp);
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				return (EINVAL);
+			}
+			SCTP_TCB_LOCK(stcb);
+			asoc = &stcb->asoc;
+			if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+				/* We are about to be freed, out of here */
+				SCTP_TCB_UNLOCK(stcb);
+				SCTP_INP_RUNLOCK(inp);
+				return (0);
+			}
+			if (((so->so_options & SO_LINGER) &&
+			    (so->so_linger == 0)) ||
+			    (so->so_rcv.sb_cc > 0)) {
+				if (SCTP_GET_STATE(asoc) !=
+				    SCTP_STATE_COOKIE_WAIT) {
+					/* Left with Data unread */
+					struct mbuf *err;
+
+					err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_DONTWAIT, 1, MT_DATA);
+					if (err) {
+						/*
+						 * Fill in the user
+						 * initiated abort
+						 */
+						struct sctp_paramhdr *ph;
+
+						ph = mtod(err, struct sctp_paramhdr *);
+						SCTP_BUF_LEN(err) = sizeof(struct sctp_paramhdr);
+						ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
+						ph->param_length = htons(SCTP_BUF_LEN(err));
+					}
+#if defined(SCTP_PANIC_ON_ABORT)
+					panic("disconnect does an abort");
+#endif
+					sctp_send_abort_tcb(stcb, err, SCTP_SO_LOCKED);
+					SCTP_STAT_INCR_COUNTER32(sctps_aborted);
+				}
+				SCTP_INP_RUNLOCK(inp);
+				if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+				    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+					SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+				}
+				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_3);
+				/* No unlock tcb assoc is gone */
+				return (0);
+			}
+			if (TAILQ_EMPTY(&asoc->send_queue) &&
+			    TAILQ_EMPTY(&asoc->sent_queue) &&
+			    (asoc->stream_queue_cnt == 0)) {
+				/* there is nothing queued to send, so done */
+				if (asoc->locked_on_sending) {
+					goto abort_anyway;
+				}
+				if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+				    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+					/* only send SHUTDOWN 1st time thru */
+					sctp_stop_timers_for_shutdown(stcb);
+					sctp_send_shutdown(stcb,
+					    stcb->asoc.primary_destination);
+					sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_LOCKED);
+					if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+					    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+					}
+					SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+					SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
+					    stcb->sctp_ep, stcb,
+					    asoc->primary_destination);
+					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+					    stcb->sctp_ep, stcb,
+					    asoc->primary_destination);
+				}
+			} else {
+				/*
+				 * we still got (or just got) data to send,
+				 * so set SHUTDOWN_PENDING
+				 */
+				/*
+				 * XXX sockets draft says that SCTP_EOF
+				 * should be sent with no data. currently,
+				 * we will allow user data to be sent first
+				 * and move to SHUTDOWN-PENDING
+				 */
+				asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
+				    asoc->primary_destination);
+				if (asoc->locked_on_sending) {
+					/* Locked to send out the data */
+					struct sctp_stream_queue_pending *sp;
+
+					sp = TAILQ_LAST(&asoc->locked_on_sending->outqueue, sctp_streamhead);
+					if (sp == NULL) {
+						SCTP_PRINTF("Error, sp is NULL, locked on sending is non-null strm:%d\n",
+						    asoc->locked_on_sending->stream_no);
+					} else {
+						if ((sp->length == 0) && (sp->msg_is_complete == 0))
+							asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+					}
+				}
+				if (TAILQ_EMPTY(&asoc->send_queue) &&
+				    TAILQ_EMPTY(&asoc->sent_queue) &&
+				    (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
+					struct mbuf *op_err;
+
+			abort_anyway:
+					op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (op_err) {
+						/*
+						 * Fill in the user
+						 * initiated abort
+						 */
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(op_err) =
+						    (sizeof(struct sctp_paramhdr) + sizeof(uint32_t));
+						ph = mtod(op_err,
+						    struct sctp_paramhdr *);
+						ph->param_type = htons(
+						    SCTP_CAUSE_USER_INITIATED_ABT);
+						ph->param_length = htons(SCTP_BUF_LEN(op_err));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_USRREQ + SCTP_LOC_4);
+					}
+#if defined(SCTP_PANIC_ON_ABORT)
+					panic("disconnect does an abort");
+#endif
+
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_4;
+					sctp_send_abort_tcb(stcb, op_err, SCTP_SO_LOCKED);
+					SCTP_STAT_INCR_COUNTER32(sctps_aborted);
+					if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+					    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+					}
+					SCTP_INP_RUNLOCK(inp);
+					(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_5);
+					return (0);
+				} else {
+					sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED);
+				}
+			}
+			soisdisconnecting(so);
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_INP_RUNLOCK(inp);
+			return (0);
+		}
+		/* not reached */
+	} else {
+		/* UDP model does not support this */
+		SCTP_INP_RUNLOCK(inp);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+		return EOPNOTSUPP;
+	}
+}
+
+int
+sctp_flush(struct socket *so, int how)
+{
+	/*
+	 * We will just clear out the values and let subsequent close clear
+	 * out the data, if any. Note if the user did a shutdown(SHUT_RD)
+	 * they will not be able to read the data, the socket will block
+	 * that from happening.
+	 */
+	struct sctp_inpcb *inp;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return EINVAL;
+	}
+	SCTP_INP_RLOCK(inp);
+	/* For the 1 to many model this does nothing */
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
+		SCTP_INP_RUNLOCK(inp);
+		return (0);
+	}
+	SCTP_INP_RUNLOCK(inp);
+	if ((how == PRU_FLUSH_RD) || (how == PRU_FLUSH_RDWR)) {
+		/*
+		 * First make sure the sb will be happy, we don't use these
+		 * except maybe the count
+		 */
+		SCTP_INP_WLOCK(inp);
+		SCTP_INP_READ_LOCK(inp);
+		inp->sctp_flags |= SCTP_PCB_FLAGS_SOCKET_CANT_READ;
+		SCTP_INP_READ_UNLOCK(inp);
+		SCTP_INP_WUNLOCK(inp);
+		so->so_rcv.sb_cc = 0;
+		so->so_rcv.sb_mbcnt = 0;
+		so->so_rcv.sb_mb = NULL;
+	}
+	if ((how == PRU_FLUSH_WR) || (how == PRU_FLUSH_RDWR)) {
+		/*
+		 * First make sure the sb will be happy, we don't use these
+		 * except maybe the count
+		 */
+		so->so_snd.sb_cc = 0;
+		so->so_snd.sb_mbcnt = 0;
+		so->so_snd.sb_mb = NULL;
+
+	}
+	return (0);
+}
+
+int
+sctp_shutdown(struct socket *so)
+{
+	struct sctp_inpcb *inp;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == 0) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return EINVAL;
+	}
+	SCTP_INP_RLOCK(inp);
+	/* For UDP model this is a invalid call */
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
+		/* Restore the flags that the soshutdown took away. */
+		SOCKBUF_LOCK(&so->so_rcv);
+		so->so_rcv.sb_state &= ~SBS_CANTRCVMORE;
+		SOCKBUF_UNLOCK(&so->so_rcv);
+		/* This proc will wakeup for read and do nothing (I hope) */
+		SCTP_INP_RUNLOCK(inp);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+		return (EOPNOTSUPP);
+	}
+	/*
+	 * Ok if we reach here its the TCP model and it is either a SHUT_WR
+	 * or SHUT_RDWR. This means we put the shutdown flag against it.
+	 */
+	{
+		struct sctp_tcb *stcb;
+		struct sctp_association *asoc;
+
+		if ((so->so_state &
+		    (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
+			SCTP_INP_RUNLOCK(inp);
+			return (ENOTCONN);
+		}
+		socantsendmore(so);
+
+		stcb = LIST_FIRST(&inp->sctp_asoc_list);
+		if (stcb == NULL) {
+			/*
+			 * Ok we hit the case that the shutdown call was
+			 * made after an abort or something. Nothing to do
+			 * now.
+			 */
+			SCTP_INP_RUNLOCK(inp);
+			return (0);
+		}
+		SCTP_TCB_LOCK(stcb);
+		asoc = &stcb->asoc;
+		if (TAILQ_EMPTY(&asoc->send_queue) &&
+		    TAILQ_EMPTY(&asoc->sent_queue) &&
+		    (asoc->stream_queue_cnt == 0)) {
+			if (asoc->locked_on_sending) {
+				goto abort_anyway;
+			}
+			/* there is nothing queued to send, so I'm done... */
+			if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) {
+				/* only send SHUTDOWN the first time through */
+				sctp_stop_timers_for_shutdown(stcb);
+				sctp_send_shutdown(stcb,
+				    stcb->asoc.primary_destination);
+				sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_LOCKED);
+				if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+				    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+					SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+				}
+				SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+				SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
+				    stcb->sctp_ep, stcb,
+				    asoc->primary_destination);
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+				    stcb->sctp_ep, stcb,
+				    asoc->primary_destination);
+			}
+		} else {
+			/*
+			 * we still got (or just got) data to send, so set
+			 * SHUTDOWN_PENDING
+			 */
+			asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
+			sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
+			    asoc->primary_destination);
+
+			if (asoc->locked_on_sending) {
+				/* Locked to send out the data */
+				struct sctp_stream_queue_pending *sp;
+
+				sp = TAILQ_LAST(&asoc->locked_on_sending->outqueue, sctp_streamhead);
+				if (sp == NULL) {
+					SCTP_PRINTF("Error, sp is NULL, locked on sending is non-null strm:%d\n",
+					    asoc->locked_on_sending->stream_no);
+				} else {
+					if ((sp->length == 0) && (sp->msg_is_complete == 0)) {
+						asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+					}
+				}
+			}
+			if (TAILQ_EMPTY(&asoc->send_queue) &&
+			    TAILQ_EMPTY(&asoc->sent_queue) &&
+			    (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
+				struct mbuf *op_err;
+
+		abort_anyway:
+				op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+				    0, M_DONTWAIT, 1, MT_DATA);
+				if (op_err) {
+					/* Fill in the user initiated abort */
+					struct sctp_paramhdr *ph;
+					uint32_t *ippp;
+
+					SCTP_BUF_LEN(op_err) =
+					    sizeof(struct sctp_paramhdr) + sizeof(uint32_t);
+					ph = mtod(op_err,
+					    struct sctp_paramhdr *);
+					ph->param_type = htons(
+					    SCTP_CAUSE_USER_INITIATED_ABT);
+					ph->param_length = htons(SCTP_BUF_LEN(op_err));
+					ippp = (uint32_t *) (ph + 1);
+					*ippp = htonl(SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6);
+				}
+#if defined(SCTP_PANIC_ON_ABORT)
+				panic("shutdown does an abort");
+#endif
+				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6;
+				sctp_abort_an_association(stcb->sctp_ep, stcb,
+				    SCTP_RESPONSE_TO_USER_REQ,
+				    op_err, SCTP_SO_LOCKED);
+				goto skip_unlock;
+			} else {
+				sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED);
+			}
+		}
+		SCTP_TCB_UNLOCK(stcb);
+	}
+skip_unlock:
+	SCTP_INP_RUNLOCK(inp);
+	return 0;
+}
+
+/*
+ * copies a "user" presentable address and removes embedded scope, etc.
+ * returns 0 on success, 1 on error
+ */
+static uint32_t
+sctp_fill_user_address(struct sockaddr_storage *ss, struct sockaddr *sa)
+{
+#ifdef INET6
+	struct sockaddr_in6 lsa6;
+
+	sa = (struct sockaddr *)sctp_recover_scope((struct sockaddr_in6 *)sa,
+	    &lsa6);
+#endif
+	memcpy(ss, sa, sa->sa_len);
+	return (0);
+}
+
+
+
+/*
+ * NOTE: assumes addr lock is held
+ */
+static size_t
+sctp_fill_up_addresses_vrf(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    size_t limit,
+    struct sockaddr_storage *sas,
+    uint32_t vrf_id)
+{
+	struct sctp_ifn *sctp_ifn;
+	struct sctp_ifa *sctp_ifa;
+	int loopback_scope, ipv4_local_scope, local_scope, site_scope;
+	size_t actual;
+	int ipv4_addr_legal, ipv6_addr_legal;
+	struct sctp_vrf *vrf;
+
+	actual = 0;
+	if (limit <= 0)
+		return (actual);
+
+	if (stcb) {
+		/* Turn on all the appropriate scope */
+		loopback_scope = stcb->asoc.loopback_scope;
+		ipv4_local_scope = stcb->asoc.ipv4_local_scope;
+		local_scope = stcb->asoc.local_scope;
+		site_scope = stcb->asoc.site_scope;
+	} else {
+		/* Turn on ALL scope, since we look at the EP */
+		loopback_scope = ipv4_local_scope = local_scope =
+		    site_scope = 1;
+	}
+	ipv4_addr_legal = ipv6_addr_legal = 0;
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+		ipv6_addr_legal = 1;
+		if (SCTP_IPV6_V6ONLY(inp) == 0) {
+			ipv4_addr_legal = 1;
+		}
+	} else {
+		ipv4_addr_legal = 1;
+	}
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf == NULL) {
+		return (0);
+	}
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+			if ((loopback_scope == 0) &&
+			    SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
+				/* Skip loopback if loopback_scope not set */
+				continue;
+			}
+			LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+				if (stcb) {
+					/*
+					 * For the BOUND-ALL case, the list
+					 * associated with a TCB is Always
+					 * considered a reverse list.. i.e.
+					 * it lists addresses that are NOT
+					 * part of the association. If this
+					 * is one of those we must skip it.
+					 */
+					if (sctp_is_addr_restricted(stcb,
+					    sctp_ifa)) {
+						continue;
+					}
+				}
+				switch (sctp_ifa->address.sa.sa_family) {
+				case AF_INET:
+					if (ipv4_addr_legal) {
+						struct sockaddr_in *sin;
+
+						sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+						if (sin->sin_addr.s_addr == 0) {
+							/*
+							 * we skip
+							 * unspecifed
+							 * addresses
+							 */
+							continue;
+						}
+						if ((ipv4_local_scope == 0) &&
+						    (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
+							continue;
+						}
+#ifdef INET6
+						if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
+							in6_sin_2_v4mapsin6(sin, (struct sockaddr_in6 *)sas);
+							((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport;
+							sas = (struct sockaddr_storage *)((caddr_t)sas + sizeof(struct sockaddr_in6));
+							actual += sizeof(struct sockaddr_in6);
+						} else {
+#endif
+							memcpy(sas, sin, sizeof(*sin));
+							((struct sockaddr_in *)sas)->sin_port = inp->sctp_lport;
+							sas = (struct sockaddr_storage *)((caddr_t)sas + sizeof(*sin));
+							actual += sizeof(*sin);
+#ifdef INET6
+						}
+#endif
+						if (actual >= limit) {
+							return (actual);
+						}
+					} else {
+						continue;
+					}
+					break;
+#ifdef INET6
+				case AF_INET6:
+					if (ipv6_addr_legal) {
+						struct sockaddr_in6 *sin6;
+
+						sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+						if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+							/*
+							 * we skip
+							 * unspecifed
+							 * addresses
+							 */
+							continue;
+						}
+						if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+							if (local_scope == 0)
+								continue;
+							if (sin6->sin6_scope_id == 0) {
+								if (sa6_recoverscope(sin6) != 0)
+									/*
+									 * 
+									 * bad
+									 * 
+									 * li
+									 * nk
+									 * 
+									 * loc
+									 * al
+									 * 
+									 * add
+									 * re
+									 * ss
+									 * */
+									continue;
+							}
+						}
+						if ((site_scope == 0) &&
+						    (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) {
+							continue;
+						}
+						memcpy(sas, sin6, sizeof(*sin6));
+						((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport;
+						sas = (struct sockaddr_storage *)((caddr_t)sas + sizeof(*sin6));
+						actual += sizeof(*sin6);
+						if (actual >= limit) {
+							return (actual);
+						}
+					} else {
+						continue;
+					}
+					break;
+#endif
+				default:
+					/* TSNH */
+					break;
+				}
+			}
+		}
+	} else {
+		struct sctp_laddr *laddr;
+
+		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+			if (stcb) {
+				if (sctp_is_addr_restricted(stcb, laddr->ifa)) {
+					continue;
+				}
+			}
+			if (sctp_fill_user_address(sas, &laddr->ifa->address.sa))
+				continue;
+
+			((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport;
+			sas = (struct sockaddr_storage *)((caddr_t)sas +
+			    laddr->ifa->address.sa.sa_len);
+			actual += laddr->ifa->address.sa.sa_len;
+			if (actual >= limit) {
+				return (actual);
+			}
+		}
+	}
+	return (actual);
+}
+
+static size_t
+sctp_fill_up_addresses(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    size_t limit,
+    struct sockaddr_storage *sas)
+{
+	size_t size = 0;
+
+	SCTP_IPI_ADDR_RLOCK();
+	/* fill up addresses for the endpoint's default vrf */
+	size = sctp_fill_up_addresses_vrf(inp, stcb, limit, sas,
+	    inp->def_vrf_id);
+	SCTP_IPI_ADDR_RUNLOCK();
+	return (size);
+}
+
+/*
+ * NOTE: assumes addr lock is held
+ */
+static int
+sctp_count_max_addresses_vrf(struct sctp_inpcb *inp, uint32_t vrf_id)
+{
+	int cnt = 0;
+	struct sctp_vrf *vrf = NULL;
+
+	/*
+	 * In both sub-set bound an bound_all cases we return the MAXIMUM
+	 * number of addresses that you COULD get. In reality the sub-set
+	 * bound may have an exclusion list for a given TCB OR in the
+	 * bound-all case a TCB may NOT include the loopback or other
+	 * addresses as well.
+	 */
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf == NULL) {
+		return (0);
+	}
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		struct sctp_ifn *sctp_ifn;
+		struct sctp_ifa *sctp_ifa;
+
+		LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+			LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+				/* Count them if they are the right type */
+				if (sctp_ifa->address.sa.sa_family == AF_INET) {
+					if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4))
+						cnt += sizeof(struct sockaddr_in6);
+					else
+						cnt += sizeof(struct sockaddr_in);
+
+				} else if (sctp_ifa->address.sa.sa_family == AF_INET6)
+					cnt += sizeof(struct sockaddr_in6);
+			}
+		}
+	} else {
+		struct sctp_laddr *laddr;
+
+		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+			if (laddr->ifa->address.sa.sa_family == AF_INET) {
+				if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4))
+					cnt += sizeof(struct sockaddr_in6);
+				else
+					cnt += sizeof(struct sockaddr_in);
+
+			} else if (laddr->ifa->address.sa.sa_family == AF_INET6)
+				cnt += sizeof(struct sockaddr_in6);
+		}
+	}
+	return (cnt);
+}
+
+static int
+sctp_count_max_addresses(struct sctp_inpcb *inp)
+{
+	int cnt = 0;
+
+	SCTP_IPI_ADDR_RLOCK();
+	/* count addresses for the endpoint's default VRF */
+	cnt = sctp_count_max_addresses_vrf(inp, inp->def_vrf_id);
+	SCTP_IPI_ADDR_RUNLOCK();
+	return (cnt);
+}
+
+static int
+sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
+    size_t optsize, void *p, int delay)
+{
+	int error = 0;
+	int creat_lock_on = 0;
+	struct sctp_tcb *stcb = NULL;
+	struct sockaddr *sa;
+	int num_v6 = 0, num_v4 = 0, *totaddrp, totaddr;
+	int added = 0;
+	uint32_t vrf_id;
+	int bad_addresses = 0;
+	sctp_assoc_t *a_id;
+
+	SCTPDBG(SCTP_DEBUG_PCB1, "Connectx called\n");
+
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) {
+		/* We are already connected AND the TCP model */
+		SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
+		return (EADDRINUSE);
+	}
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) &&
+	    (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE))) {
+		SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return (EINVAL);
+	}
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
+		SCTP_INP_RLOCK(inp);
+		stcb = LIST_FIRST(&inp->sctp_asoc_list);
+		SCTP_INP_RUNLOCK(inp);
+	}
+	if (stcb) {
+		SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
+		return (EALREADY);
+	}
+	SCTP_INP_INCR_REF(inp);
+	SCTP_ASOC_CREATE_LOCK(inp);
+	creat_lock_on = 1;
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
+		SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EFAULT);
+		error = EFAULT;
+		goto out_now;
+	}
+	totaddrp = (int *)optval;
+	totaddr = *totaddrp;
+	sa = (struct sockaddr *)(totaddrp + 1);
+	stcb = sctp_connectx_helper_find(inp, sa, &totaddr, &num_v4, &num_v6, &error, (optsize - sizeof(int)), &bad_addresses);
+	if ((stcb != NULL) || bad_addresses) {
+		/* Already have or am bring up an association */
+		SCTP_ASOC_CREATE_UNLOCK(inp);
+		creat_lock_on = 0;
+		if (stcb)
+			SCTP_TCB_UNLOCK(stcb);
+		if (bad_addresses == 0) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
+			error = EALREADY;
+		}
+		goto out_now;
+	}
+#ifdef INET6
+	if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) &&
+	    (num_v6 > 0)) {
+		error = EINVAL;
+		goto out_now;
+	}
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+	    (num_v4 > 0)) {
+		struct in6pcb *inp6;
+
+		inp6 = (struct in6pcb *)inp;
+		if (SCTP_IPV6_V6ONLY(inp6)) {
+			/*
+			 * if IPV6_V6ONLY flag, ignore connections destined
+			 * to a v4 addr or v4-mapped addr
+			 */
+			SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+			error = EINVAL;
+			goto out_now;
+		}
+	}
+#endif				/* INET6 */
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) ==
+	    SCTP_PCB_FLAGS_UNBOUND) {
+		/* Bind a ephemeral port */
+		error = sctp_inpcb_bind(so, NULL, NULL, p);
+		if (error) {
+			goto out_now;
+		}
+	}
+	/* FIX ME: do we want to pass in a vrf on the connect call? */
+	vrf_id = inp->def_vrf_id;
+
+
+	/* We are GOOD to go */
+	stcb = sctp_aloc_assoc(inp, sa, &error, 0, vrf_id,
+	    (struct thread *)p
+	    );
+	if (stcb == NULL) {
+		/* Gak! no memory */
+		goto out_now;
+	}
+	SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_WAIT);
+	/* move to second address */
+	if (sa->sa_family == AF_INET)
+		sa = (struct sockaddr *)((caddr_t)sa + sizeof(struct sockaddr_in));
+	else
+		sa = (struct sockaddr *)((caddr_t)sa + sizeof(struct sockaddr_in6));
+
+	error = 0;
+	added = sctp_connectx_helper_add(stcb, sa, (totaddr - 1), &error);
+	/* Fill in the return id */
+	if (error) {
+		(void)sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6);
+		goto out_now;
+	}
+	a_id = (sctp_assoc_t *) optval;
+	*a_id = sctp_get_associd(stcb);
+
+	/* initialize authentication parameters for the assoc */
+	sctp_initialize_auth_params(inp, stcb);
+
+	if (delay) {
+		/* doing delayed connection */
+		stcb->asoc.delayed_connection = 1;
+		sctp_timer_start(SCTP_TIMER_TYPE_INIT, inp, stcb, stcb->asoc.primary_destination);
+	} else {
+		(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
+		sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
+	}
+	SCTP_TCB_UNLOCK(stcb);
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
+		stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
+		/* Set the connected flag so we can queue data */
+		soisconnecting(so);
+	}
+out_now:
+	if (creat_lock_on) {
+		SCTP_ASOC_CREATE_UNLOCK(inp);
+	}
+	SCTP_INP_DECR_REF(inp);
+	return error;
+}
+
+#define SCTP_FIND_STCB(inp, stcb, assoc_id) { \
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||\
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { \
+		SCTP_INP_RLOCK(inp); \
+		stcb = LIST_FIRST(&inp->sctp_asoc_list); \
+		if (stcb) { \
+			SCTP_TCB_LOCK(stcb); \
+                } \
+		SCTP_INP_RUNLOCK(inp); \
+	} else if (assoc_id != 0) { \
+		stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1); \
+		if (stcb == NULL) { \
+		        SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); \
+			error = ENOENT; \
+			break; \
+		} \
+	} else { \
+		stcb = NULL; \
+        } \
+  }
+
+
+#define SCTP_CHECK_AND_CAST(destp, srcp, type, size)  {\
+	if (size < sizeof(type)) { \
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); \
+		error = EINVAL; \
+		break; \
+	} else { \
+		destp = (type *)srcp; \
+	} \
+      }
+
+static int
+sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize,
+    void *p)
+{
+	struct sctp_inpcb *inp = NULL;
+	int error, val = 0;
+	struct sctp_tcb *stcb = NULL;
+
+	if (optval == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return (EINVAL);
+	}
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == 0) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return EINVAL;
+	}
+	error = 0;
+
+	switch (optname) {
+	case SCTP_NODELAY:
+	case SCTP_AUTOCLOSE:
+	case SCTP_EXPLICIT_EOR:
+	case SCTP_AUTO_ASCONF:
+	case SCTP_DISABLE_FRAGMENTS:
+	case SCTP_I_WANT_MAPPED_V4_ADDR:
+	case SCTP_USE_EXT_RCVINFO:
+		SCTP_INP_RLOCK(inp);
+		switch (optname) {
+		case SCTP_DISABLE_FRAGMENTS:
+			val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NO_FRAGMENT);
+			break;
+		case SCTP_I_WANT_MAPPED_V4_ADDR:
+			val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4);
+			break;
+		case SCTP_AUTO_ASCONF:
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+				/* only valid for bound all sockets */
+				val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTO_ASCONF);
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+				goto flags_out;
+			}
+			break;
+		case SCTP_EXPLICIT_EOR:
+			val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR);
+			break;
+		case SCTP_NODELAY:
+			val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NODELAY);
+			break;
+		case SCTP_USE_EXT_RCVINFO:
+			val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO);
+			break;
+		case SCTP_AUTOCLOSE:
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE))
+				val = TICKS_TO_SEC(inp->sctp_ep.auto_close_time);
+			else
+				val = 0;
+			break;
+
+		default:
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
+			error = ENOPROTOOPT;
+		}		/* end switch (sopt->sopt_name) */
+		if (optname != SCTP_AUTOCLOSE) {
+			/* make it an "on/off" value */
+			val = (val != 0);
+		}
+		if (*optsize < sizeof(val)) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+			error = EINVAL;
+		}
+flags_out:
+		SCTP_INP_RUNLOCK(inp);
+		if (error == 0) {
+			/* return the option value */
+			*(int *)optval = val;
+			*optsize = sizeof(val);
+		}
+		break;
+	case SCTP_GET_PACKET_LOG:
+		{
+#ifdef  SCTP_PACKET_LOGGING
+			uint8_t *target;
+			int ret;
+
+			SCTP_CHECK_AND_CAST(target, optval, uint8_t, *optsize);
+			ret = sctp_copy_out_packet_log(target, (int)*optsize);
+			*optsize = ret;
+#else
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+			error = EOPNOTSUPP;
+#endif
+			break;
+		}
+	case SCTP_REUSE_PORT:
+		{
+			uint32_t *value;
+
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE)) {
+				/* Can't do this for a 1-m socket */
+				error = EINVAL;
+				break;
+			}
+			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+			*value = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE);
+			*optsize = sizeof(uint32_t);
+		}
+		break;
+	case SCTP_PARTIAL_DELIVERY_POINT:
+		{
+			uint32_t *value;
+
+			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+			*value = inp->partial_delivery_point;
+			*optsize = sizeof(uint32_t);
+		}
+		break;
+	case SCTP_FRAGMENT_INTERLEAVE:
+		{
+			uint32_t *value;
+
+			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE)) {
+				if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS)) {
+					*value = SCTP_FRAG_LEVEL_2;
+				} else {
+					*value = SCTP_FRAG_LEVEL_1;
+				}
+			} else {
+				*value = SCTP_FRAG_LEVEL_0;
+			}
+			*optsize = sizeof(uint32_t);
+		}
+		break;
+	case SCTP_CMT_ON_OFF:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+			if (stcb) {
+				av->assoc_value = stcb->asoc.sctp_cmt_on_off;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_RLOCK(inp);
+				av->assoc_value = inp->sctp_cmt_on_off;
+				SCTP_INP_RUNLOCK(inp);
+			}
+			*optsize = sizeof(*av);
+		}
+		break;
+		/* JRS - Get socket option for pluggable congestion control */
+	case SCTP_PLUGGABLE_CC:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+			if (stcb) {
+				av->assoc_value = stcb->asoc.congestion_control_module;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				av->assoc_value = inp->sctp_ep.sctp_default_cc_module;
+			}
+			*optsize = sizeof(*av);
+		}
+		break;
+	case SCTP_GET_ADDR_LEN:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+			error = EINVAL;
+#ifdef INET
+			if (av->assoc_value == AF_INET) {
+				av->assoc_value = sizeof(struct sockaddr_in);
+				error = 0;
+			}
+#endif
+#ifdef INET6
+			if (av->assoc_value == AF_INET6) {
+				av->assoc_value = sizeof(struct sockaddr_in6);
+				error = 0;
+			}
+#endif
+			if (error) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+			}
+			*optsize = sizeof(*av);
+		}
+		break;
+	case SCTP_GET_ASSOC_NUMBER:
+		{
+			uint32_t *value, cnt;
+
+			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+			cnt = 0;
+			SCTP_INP_RLOCK(inp);
+			LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+				cnt++;
+			}
+			SCTP_INP_RUNLOCK(inp);
+			*value = cnt;
+			*optsize = sizeof(uint32_t);
+		}
+		break;
+
+	case SCTP_GET_ASSOC_ID_LIST:
+		{
+			struct sctp_assoc_ids *ids;
+			unsigned int at, limit;
+
+			SCTP_CHECK_AND_CAST(ids, optval, struct sctp_assoc_ids, *optsize);
+			at = 0;
+			limit = (*optsize - sizeof(uint32_t)) / sizeof(sctp_assoc_t);
+			SCTP_INP_RLOCK(inp);
+			LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+				if (at < limit) {
+					ids->gaids_assoc_id[at++] = sctp_get_associd(stcb);
+				} else {
+					error = EINVAL;
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+					break;
+				}
+			}
+			SCTP_INP_RUNLOCK(inp);
+			ids->gaids_number_of_ids = at;
+			*optsize = ((at * sizeof(sctp_assoc_t)) + sizeof(uint32_t));
+		}
+		break;
+	case SCTP_CONTEXT:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (stcb) {
+				av->assoc_value = stcb->asoc.context;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_RLOCK(inp);
+				av->assoc_value = inp->sctp_context;
+				SCTP_INP_RUNLOCK(inp);
+			}
+			*optsize = sizeof(*av);
+		}
+		break;
+	case SCTP_VRF_ID:
+		{
+			uint32_t *default_vrfid;
+
+			SCTP_CHECK_AND_CAST(default_vrfid, optval, uint32_t, *optsize);
+			*default_vrfid = inp->def_vrf_id;
+			break;
+		}
+	case SCTP_GET_ASOC_VRF:
+		{
+			struct sctp_assoc_value *id;
+
+			SCTP_CHECK_AND_CAST(id, optval, struct sctp_assoc_value, *optsize);
+			SCTP_FIND_STCB(inp, stcb, id->assoc_id);
+			if (stcb == NULL) {
+				error = EINVAL;
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+				break;
+			}
+			id->assoc_value = stcb->asoc.vrf_id;
+			break;
+		}
+	case SCTP_GET_VRF_IDS:
+		{
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+			error = EOPNOTSUPP;
+			break;
+		}
+	case SCTP_GET_NONCE_VALUES:
+		{
+			struct sctp_get_nonce_values *gnv;
+
+			SCTP_CHECK_AND_CAST(gnv, optval, struct sctp_get_nonce_values, *optsize);
+			SCTP_FIND_STCB(inp, stcb, gnv->gn_assoc_id);
+
+			if (stcb) {
+				gnv->gn_peers_tag = stcb->asoc.peer_vtag;
+				gnv->gn_local_tag = stcb->asoc.my_vtag;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
+				error = ENOTCONN;
+			}
+			*optsize = sizeof(*gnv);
+		}
+		break;
+	case SCTP_DELAYED_SACK:
+		{
+			struct sctp_sack_info *sack;
+
+			SCTP_CHECK_AND_CAST(sack, optval, struct sctp_sack_info, *optsize);
+			SCTP_FIND_STCB(inp, stcb, sack->sack_assoc_id);
+			if (stcb) {
+				sack->sack_delay = stcb->asoc.delayed_ack;
+				sack->sack_freq = stcb->asoc.sack_freq;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_RLOCK(inp);
+				sack->sack_delay = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]);
+				sack->sack_freq = inp->sctp_ep.sctp_sack_freq;
+				SCTP_INP_RUNLOCK(inp);
+			}
+			*optsize = sizeof(*sack);
+		}
+		break;
+
+	case SCTP_GET_SNDBUF_USE:
+		{
+			struct sctp_sockstat *ss;
+
+			SCTP_CHECK_AND_CAST(ss, optval, struct sctp_sockstat, *optsize);
+			SCTP_FIND_STCB(inp, stcb, ss->ss_assoc_id);
+
+			if (stcb) {
+				ss->ss_total_sndbuf = stcb->asoc.total_output_queue_size;
+				ss->ss_total_recv_buf = (stcb->asoc.size_on_reasm_queue +
+				    stcb->asoc.size_on_all_streams);
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
+				error = ENOTCONN;
+			}
+			*optsize = sizeof(struct sctp_sockstat);
+		}
+		break;
+	case SCTP_MAX_BURST:
+		{
+			uint8_t *value;
+
+			SCTP_CHECK_AND_CAST(value, optval, uint8_t, *optsize);
+
+			SCTP_INP_RLOCK(inp);
+			*value = inp->sctp_ep.max_burst;
+			SCTP_INP_RUNLOCK(inp);
+			*optsize = sizeof(uint8_t);
+		}
+		break;
+	case SCTP_MAXSEG:
+		{
+			struct sctp_assoc_value *av;
+			int ovh;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (stcb) {
+				av->assoc_value = sctp_get_frag_point(stcb, &stcb->asoc);
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_RLOCK(inp);
+				if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+					ovh = SCTP_MED_OVERHEAD;
+				} else {
+					ovh = SCTP_MED_V4_OVERHEAD;
+				}
+				if (inp->sctp_frag_point >= SCTP_DEFAULT_MAXSEGMENT)
+					av->assoc_value = 0;
+				else
+					av->assoc_value = inp->sctp_frag_point - ovh;
+				SCTP_INP_RUNLOCK(inp);
+			}
+			*optsize = sizeof(struct sctp_assoc_value);
+		}
+		break;
+	case SCTP_GET_STAT_LOG:
+		error = sctp_fill_stat_log(optval, optsize);
+		break;
+	case SCTP_EVENTS:
+		{
+			struct sctp_event_subscribe *events;
+
+			SCTP_CHECK_AND_CAST(events, optval, struct sctp_event_subscribe, *optsize);
+			memset(events, 0, sizeof(*events));
+			SCTP_INP_RLOCK(inp);
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT))
+				events->sctp_data_io_event = 1;
+
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVASSOCEVNT))
+				events->sctp_association_event = 1;
+
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVPADDREVNT))
+				events->sctp_address_event = 1;
+
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVSENDFAILEVNT))
+				events->sctp_send_failure_event = 1;
+
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVPEERERR))
+				events->sctp_peer_error_event = 1;
+
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT))
+				events->sctp_shutdown_event = 1;
+
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PDAPIEVNT))
+				events->sctp_partial_delivery_event = 1;
+
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ADAPTATIONEVNT))
+				events->sctp_adaptation_layer_event = 1;
+
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTHEVNT))
+				events->sctp_authentication_event = 1;
+
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_DRYEVNT))
+				events->sctp_sender_dry_event = 1;
+
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_STREAM_RESETEVNT))
+				events->sctp_stream_reset_event = 1;
+			SCTP_INP_RUNLOCK(inp);
+			*optsize = sizeof(struct sctp_event_subscribe);
+		}
+		break;
+
+	case SCTP_ADAPTATION_LAYER:
+		{
+			uint32_t *value;
+
+			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+
+			SCTP_INP_RLOCK(inp);
+			*value = inp->sctp_ep.adaptation_layer_indicator;
+			SCTP_INP_RUNLOCK(inp);
+			*optsize = sizeof(uint32_t);
+		}
+		break;
+	case SCTP_SET_INITIAL_DBG_SEQ:
+		{
+			uint32_t *value;
+
+			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+			SCTP_INP_RLOCK(inp);
+			*value = inp->sctp_ep.initial_sequence_debug;
+			SCTP_INP_RUNLOCK(inp);
+			*optsize = sizeof(uint32_t);
+		}
+		break;
+	case SCTP_GET_LOCAL_ADDR_SIZE:
+		{
+			uint32_t *value;
+
+			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+			SCTP_INP_RLOCK(inp);
+			*value = sctp_count_max_addresses(inp);
+			SCTP_INP_RUNLOCK(inp);
+			*optsize = sizeof(uint32_t);
+		}
+		break;
+	case SCTP_GET_REMOTE_ADDR_SIZE:
+		{
+			uint32_t *value;
+			size_t size;
+			struct sctp_nets *net;
+
+			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+			/* FIXME MT: change to sctp_assoc_value? */
+			SCTP_FIND_STCB(inp, stcb, (sctp_assoc_t) * value);
+
+			if (stcb) {
+				size = 0;
+				/* Count the sizes */
+				TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+					if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) ||
+					    (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET6)) {
+						size += sizeof(struct sockaddr_in6);
+					} else if (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET) {
+						size += sizeof(struct sockaddr_in);
+					} else {
+						/* huh */
+						break;
+					}
+				}
+				SCTP_TCB_UNLOCK(stcb);
+				*value = (uint32_t) size;
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
+				error = ENOTCONN;
+			}
+			*optsize = sizeof(uint32_t);
+		}
+		break;
+	case SCTP_GET_PEER_ADDRESSES:
+		/*
+		 * Get the address information, an array is passed in to
+		 * fill up we pack it.
+		 */
+		{
+			size_t cpsz, left;
+			struct sockaddr_storage *sas;
+			struct sctp_nets *net;
+			struct sctp_getaddresses *saddr;
+
+			SCTP_CHECK_AND_CAST(saddr, optval, struct sctp_getaddresses, *optsize);
+			SCTP_FIND_STCB(inp, stcb, saddr->sget_assoc_id);
+
+			if (stcb) {
+				left = (*optsize) - sizeof(struct sctp_getaddresses);
+				*optsize = sizeof(struct sctp_getaddresses);
+				sas = (struct sockaddr_storage *)&saddr->addr[0];
+
+				TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+					if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) ||
+					    (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET6)) {
+						cpsz = sizeof(struct sockaddr_in6);
+					} else if (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET) {
+						cpsz = sizeof(struct sockaddr_in);
+					} else {
+						/* huh */
+						break;
+					}
+					if (left < cpsz) {
+						/* not enough room. */
+						break;
+					}
+#ifdef INET6
+					if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) &&
+					    (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET)) {
+						/* Must map the address */
+						in6_sin_2_v4mapsin6((struct sockaddr_in *)&net->ro._l_addr,
+						    (struct sockaddr_in6 *)sas);
+					} else {
+#endif
+						memcpy(sas, &net->ro._l_addr, cpsz);
+#ifdef INET6
+					}
+#endif
+					((struct sockaddr_in *)sas)->sin_port = stcb->rport;
+
+					sas = (struct sockaddr_storage *)((caddr_t)sas + cpsz);
+					left -= cpsz;
+					*optsize += cpsz;
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+				error = ENOENT;
+			}
+		}
+		break;
+	case SCTP_GET_LOCAL_ADDRESSES:
+		{
+			size_t limit, actual;
+			struct sockaddr_storage *sas;
+			struct sctp_getaddresses *saddr;
+
+			SCTP_CHECK_AND_CAST(saddr, optval, struct sctp_getaddresses, *optsize);
+			SCTP_FIND_STCB(inp, stcb, saddr->sget_assoc_id);
+
+			sas = (struct sockaddr_storage *)&saddr->addr[0];
+			limit = *optsize - sizeof(sctp_assoc_t);
+			actual = sctp_fill_up_addresses(inp, stcb, limit, sas);
+			if (stcb) {
+				SCTP_TCB_UNLOCK(stcb);
+			}
+			*optsize = sizeof(struct sockaddr_storage) + actual;
+		}
+		break;
+	case SCTP_PEER_ADDR_PARAMS:
+		{
+			struct sctp_paddrparams *paddrp;
+			struct sctp_nets *net;
+
+			SCTP_CHECK_AND_CAST(paddrp, optval, struct sctp_paddrparams, *optsize);
+			SCTP_FIND_STCB(inp, stcb, paddrp->spp_assoc_id);
+
+			net = NULL;
+			if (stcb) {
+				net = sctp_findnet(stcb, (struct sockaddr *)&paddrp->spp_address);
+			} else {
+				/*
+				 * We increment here since
+				 * sctp_findassociation_ep_addr() wil do a
+				 * decrement if it finds the stcb as long as
+				 * the locked tcb (last argument) is NOT a
+				 * TCB.. aka NULL.
+				 */
+				SCTP_INP_INCR_REF(inp);
+				stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&paddrp->spp_address, &net, NULL, NULL);
+				if (stcb == NULL) {
+					SCTP_INP_DECR_REF(inp);
+				}
+			}
+			if (stcb && (net == NULL)) {
+				struct sockaddr *sa;
+
+				sa = (struct sockaddr *)&paddrp->spp_address;
+				if (sa->sa_family == AF_INET) {
+					struct sockaddr_in *sin;
+
+					sin = (struct sockaddr_in *)sa;
+					if (sin->sin_addr.s_addr) {
+						error = EINVAL;
+						SCTP_TCB_UNLOCK(stcb);
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+						break;
+					}
+				} else if (sa->sa_family == AF_INET6) {
+					struct sockaddr_in6 *sin6;
+
+					sin6 = (struct sockaddr_in6 *)sa;
+					if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+						error = EINVAL;
+						SCTP_TCB_UNLOCK(stcb);
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+						break;
+					}
+				} else {
+					error = EAFNOSUPPORT;
+					SCTP_TCB_UNLOCK(stcb);
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+					break;
+				}
+			}
+			if (stcb) {
+				/* Applys to the specific association */
+				paddrp->spp_flags = 0;
+				if (net) {
+					int ovh;
+
+					if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+						ovh = SCTP_MED_OVERHEAD;
+					} else {
+						ovh = SCTP_MED_V4_OVERHEAD;
+					}
+
+
+					paddrp->spp_pathmaxrxt = net->failure_threshold;
+					paddrp->spp_pathmtu = net->mtu - ovh;
+					/* get flags for HB */
+					if (net->dest_state & SCTP_ADDR_NOHB)
+						paddrp->spp_flags |= SPP_HB_DISABLE;
+					else
+						paddrp->spp_flags |= SPP_HB_ENABLE;
+					/* get flags for PMTU */
+					if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+						paddrp->spp_flags |= SPP_PMTUD_ENABLE;
+					} else {
+						paddrp->spp_flags |= SPP_PMTUD_DISABLE;
+					}
+#ifdef INET
+					if (net->ro._l_addr.sin.sin_family == AF_INET) {
+						paddrp->spp_ipv4_tos = net->tos_flowlabel & 0x000000fc;
+						paddrp->spp_flags |= SPP_IPV4_TOS;
+					}
+#endif
+#ifdef INET6
+					if (net->ro._l_addr.sin6.sin6_family == AF_INET6) {
+						paddrp->spp_ipv6_flowlabel = net->tos_flowlabel;
+						paddrp->spp_flags |= SPP_IPV6_FLOWLABEL;
+					}
+#endif
+				} else {
+					/*
+					 * No destination so return default
+					 * value
+					 */
+					int cnt = 0;
+
+					paddrp->spp_pathmaxrxt = stcb->asoc.def_net_failure;
+					paddrp->spp_pathmtu = sctp_get_frag_point(stcb, &stcb->asoc);
+#ifdef INET
+					paddrp->spp_ipv4_tos = stcb->asoc.default_tos & 0x000000fc;
+					paddrp->spp_flags |= SPP_IPV4_TOS;
+#endif
+#ifdef INET6
+					paddrp->spp_ipv6_flowlabel = stcb->asoc.default_flowlabel;
+					paddrp->spp_flags |= SPP_IPV6_FLOWLABEL;
+#endif
+					/* default settings should be these */
+					if (stcb->asoc.hb_is_disabled == 0) {
+						paddrp->spp_flags |= SPP_HB_ENABLE;
+					} else {
+						paddrp->spp_flags |= SPP_HB_DISABLE;
+					}
+					TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+						if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+							cnt++;
+						}
+					}
+					if (cnt) {
+						paddrp->spp_flags |= SPP_PMTUD_ENABLE;
+					}
+				}
+				paddrp->spp_hbinterval = stcb->asoc.heart_beat_delay;
+				paddrp->spp_assoc_id = sctp_get_associd(stcb);
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				/* Use endpoint defaults */
+				SCTP_INP_RLOCK(inp);
+				paddrp->spp_pathmaxrxt = inp->sctp_ep.def_net_failure;
+				paddrp->spp_hbinterval = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]);
+				paddrp->spp_assoc_id = (sctp_assoc_t) 0;
+				/* get inp's default */
+#ifdef INET
+				paddrp->spp_ipv4_tos = inp->ip_inp.inp.inp_ip_tos;
+				paddrp->spp_flags |= SPP_IPV4_TOS;
+#endif
+#ifdef INET6
+				if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+					paddrp->spp_ipv6_flowlabel = ((struct in6pcb *)inp)->in6p_flowinfo;
+					paddrp->spp_flags |= SPP_IPV6_FLOWLABEL;
+				}
+#endif
+				/* can't return this */
+				paddrp->spp_pathmtu = 0;
+
+				/* default behavior, no stcb */
+				paddrp->spp_flags = SPP_PMTUD_ENABLE;
+
+				if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT)) {
+					paddrp->spp_flags |= SPP_HB_ENABLE;
+				} else {
+					paddrp->spp_flags |= SPP_HB_DISABLE;
+				}
+				SCTP_INP_RUNLOCK(inp);
+			}
+			*optsize = sizeof(struct sctp_paddrparams);
+		}
+		break;
+	case SCTP_GET_PEER_ADDR_INFO:
+		{
+			struct sctp_paddrinfo *paddri;
+			struct sctp_nets *net;
+
+			SCTP_CHECK_AND_CAST(paddri, optval, struct sctp_paddrinfo, *optsize);
+			SCTP_FIND_STCB(inp, stcb, paddri->spinfo_assoc_id);
+
+			net = NULL;
+			if (stcb) {
+				net = sctp_findnet(stcb, (struct sockaddr *)&paddri->spinfo_address);
+			} else {
+				/*
+				 * We increment here since
+				 * sctp_findassociation_ep_addr() wil do a
+				 * decrement if it finds the stcb as long as
+				 * the locked tcb (last argument) is NOT a
+				 * TCB.. aka NULL.
+				 */
+				SCTP_INP_INCR_REF(inp);
+				stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&paddri->spinfo_address, &net, NULL, NULL);
+				if (stcb == NULL) {
+					SCTP_INP_DECR_REF(inp);
+				}
+			}
+
+			if ((stcb) && (net)) {
+				paddri->spinfo_state = net->dest_state & (SCTP_REACHABLE_MASK | SCTP_ADDR_NOHB);
+				paddri->spinfo_cwnd = net->cwnd;
+				paddri->spinfo_srtt = ((net->lastsa >> 2) + net->lastsv) >> 1;
+				paddri->spinfo_rto = net->RTO;
+				paddri->spinfo_assoc_id = sctp_get_associd(stcb);
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				if (stcb) {
+					SCTP_TCB_UNLOCK(stcb);
+				}
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+				error = ENOENT;
+			}
+			*optsize = sizeof(struct sctp_paddrinfo);
+		}
+		break;
+	case SCTP_PCB_STATUS:
+		{
+			struct sctp_pcbinfo *spcb;
+
+			SCTP_CHECK_AND_CAST(spcb, optval, struct sctp_pcbinfo, *optsize);
+			sctp_fill_pcbinfo(spcb);
+			*optsize = sizeof(struct sctp_pcbinfo);
+		}
+		break;
+
+	case SCTP_STATUS:
+		{
+			struct sctp_nets *net;
+			struct sctp_status *sstat;
+
+			SCTP_CHECK_AND_CAST(sstat, optval, struct sctp_status, *optsize);
+			SCTP_FIND_STCB(inp, stcb, sstat->sstat_assoc_id);
+
+			if (stcb == NULL) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+				error = EINVAL;
+				break;
+			}
+			/*
+			 * I think passing the state is fine since
+			 * sctp_constants.h will be available to the user
+			 * land.
+			 */
+			sstat->sstat_state = stcb->asoc.state;
+			sstat->sstat_assoc_id = sctp_get_associd(stcb);
+			sstat->sstat_rwnd = stcb->asoc.peers_rwnd;
+			sstat->sstat_unackdata = stcb->asoc.sent_queue_cnt;
+			/*
+			 * We can't include chunks that have been passed to
+			 * the socket layer. Only things in queue.
+			 */
+			sstat->sstat_penddata = (stcb->asoc.cnt_on_reasm_queue +
+			    stcb->asoc.cnt_on_all_streams);
+
+
+			sstat->sstat_instrms = stcb->asoc.streamincnt;
+			sstat->sstat_outstrms = stcb->asoc.streamoutcnt;
+			sstat->sstat_fragmentation_point = sctp_get_frag_point(stcb, &stcb->asoc);
+			memcpy(&sstat->sstat_primary.spinfo_address,
+			    &stcb->asoc.primary_destination->ro._l_addr,
+			    ((struct sockaddr *)(&stcb->asoc.primary_destination->ro._l_addr))->sa_len);
+			net = stcb->asoc.primary_destination;
+			((struct sockaddr_in *)&sstat->sstat_primary.spinfo_address)->sin_port = stcb->rport;
+			/*
+			 * Again the user can get info from sctp_constants.h
+			 * for what the state of the network is.
+			 */
+			sstat->sstat_primary.spinfo_state = net->dest_state & SCTP_REACHABLE_MASK;
+			sstat->sstat_primary.spinfo_cwnd = net->cwnd;
+			sstat->sstat_primary.spinfo_srtt = net->lastsa;
+			sstat->sstat_primary.spinfo_rto = net->RTO;
+			sstat->sstat_primary.spinfo_mtu = net->mtu;
+			sstat->sstat_primary.spinfo_assoc_id = sctp_get_associd(stcb);
+			SCTP_TCB_UNLOCK(stcb);
+			*optsize = sizeof(*sstat);
+		}
+		break;
+	case SCTP_RTOINFO:
+		{
+			struct sctp_rtoinfo *srto;
+
+			SCTP_CHECK_AND_CAST(srto, optval, struct sctp_rtoinfo, *optsize);
+			SCTP_FIND_STCB(inp, stcb, srto->srto_assoc_id);
+
+			if (stcb) {
+				srto->srto_initial = stcb->asoc.initial_rto;
+				srto->srto_max = stcb->asoc.maxrto;
+				srto->srto_min = stcb->asoc.minrto;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_RLOCK(inp);
+				srto->srto_initial = inp->sctp_ep.initial_rto;
+				srto->srto_max = inp->sctp_ep.sctp_maxrto;
+				srto->srto_min = inp->sctp_ep.sctp_minrto;
+				SCTP_INP_RUNLOCK(inp);
+			}
+			*optsize = sizeof(*srto);
+		}
+		break;
+	case SCTP_TIMEOUTS:
+		{
+			struct sctp_timeouts *stimo;
+
+			SCTP_CHECK_AND_CAST(stimo, optval, struct sctp_timeouts, *optsize);
+			SCTP_FIND_STCB(inp, stcb, stimo->stimo_assoc_id);
+
+			if (stcb) {
+				stimo->stimo_init = stcb->asoc.timoinit;
+				stimo->stimo_data = stcb->asoc.timodata;
+				stimo->stimo_sack = stcb->asoc.timosack;
+				stimo->stimo_shutdown = stcb->asoc.timoshutdown;
+				stimo->stimo_heartbeat = stcb->asoc.timoheartbeat;
+				stimo->stimo_cookie = stcb->asoc.timocookie;
+				stimo->stimo_shutdownack = stcb->asoc.timoshutdownack;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+				error = EINVAL;
+			}
+			*optsize = sizeof(*stimo);
+		}
+		break;
+	case SCTP_ASSOCINFO:
+		{
+			struct sctp_assocparams *sasoc;
+			uint32_t oldval;
+
+			SCTP_CHECK_AND_CAST(sasoc, optval, struct sctp_assocparams, *optsize);
+			SCTP_FIND_STCB(inp, stcb, sasoc->sasoc_assoc_id);
+
+			if (stcb) {
+				oldval = sasoc->sasoc_cookie_life;
+				sasoc->sasoc_cookie_life = TICKS_TO_MSEC(stcb->asoc.cookie_life);
+				sasoc->sasoc_asocmaxrxt = stcb->asoc.max_send_times;
+				sasoc->sasoc_number_peer_destinations = stcb->asoc.numnets;
+				sasoc->sasoc_peer_rwnd = stcb->asoc.peers_rwnd;
+				sasoc->sasoc_local_rwnd = stcb->asoc.my_rwnd;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_RLOCK(inp);
+				sasoc->sasoc_cookie_life = TICKS_TO_MSEC(inp->sctp_ep.def_cookie_life);
+				sasoc->sasoc_asocmaxrxt = inp->sctp_ep.max_send_times;
+				sasoc->sasoc_number_peer_destinations = 0;
+				sasoc->sasoc_peer_rwnd = 0;
+				sasoc->sasoc_local_rwnd = sbspace(&inp->sctp_socket->so_rcv);
+				SCTP_INP_RUNLOCK(inp);
+			}
+			*optsize = sizeof(*sasoc);
+		}
+		break;
+	case SCTP_DEFAULT_SEND_PARAM:
+		{
+			struct sctp_sndrcvinfo *s_info;
+
+			SCTP_CHECK_AND_CAST(s_info, optval, struct sctp_sndrcvinfo, *optsize);
+			SCTP_FIND_STCB(inp, stcb, s_info->sinfo_assoc_id);
+
+			if (stcb) {
+				memcpy(s_info, &stcb->asoc.def_send, sizeof(stcb->asoc.def_send));
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_RLOCK(inp);
+				memcpy(s_info, &inp->def_send, sizeof(inp->def_send));
+				SCTP_INP_RUNLOCK(inp);
+			}
+			*optsize = sizeof(*s_info);
+		}
+		break;
+	case SCTP_INITMSG:
+		{
+			struct sctp_initmsg *sinit;
+
+			SCTP_CHECK_AND_CAST(sinit, optval, struct sctp_initmsg, *optsize);
+			SCTP_INP_RLOCK(inp);
+			sinit->sinit_num_ostreams = inp->sctp_ep.pre_open_stream_count;
+			sinit->sinit_max_instreams = inp->sctp_ep.max_open_streams_intome;
+			sinit->sinit_max_attempts = inp->sctp_ep.max_init_times;
+			sinit->sinit_max_init_timeo = inp->sctp_ep.initial_init_rto_max;
+			SCTP_INP_RUNLOCK(inp);
+			*optsize = sizeof(*sinit);
+		}
+		break;
+	case SCTP_PRIMARY_ADDR:
+		/* we allow a "get" operation on this */
+		{
+			struct sctp_setprim *ssp;
+
+			SCTP_CHECK_AND_CAST(ssp, optval, struct sctp_setprim, *optsize);
+			SCTP_FIND_STCB(inp, stcb, ssp->ssp_assoc_id);
+
+			if (stcb) {
+				/* simply copy out the sockaddr_storage... */
+				int len;
+
+				len = *optsize;
+				if (len > stcb->asoc.primary_destination->ro._l_addr.sa.sa_len)
+					len = stcb->asoc.primary_destination->ro._l_addr.sa.sa_len;
+
+				memcpy(&ssp->ssp_addr,
+				    &stcb->asoc.primary_destination->ro._l_addr,
+				    len);
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+				error = EINVAL;
+			}
+			*optsize = sizeof(*ssp);
+		}
+		break;
+
+	case SCTP_HMAC_IDENT:
+		{
+			struct sctp_hmacalgo *shmac;
+			sctp_hmaclist_t *hmaclist;
+			uint32_t size;
+			int i;
+
+			SCTP_CHECK_AND_CAST(shmac, optval, struct sctp_hmacalgo, *optsize);
+
+			SCTP_INP_RLOCK(inp);
+			hmaclist = inp->sctp_ep.local_hmacs;
+			if (hmaclist == NULL) {
+				/* no HMACs to return */
+				*optsize = sizeof(*shmac);
+				SCTP_INP_RUNLOCK(inp);
+				break;
+			}
+			/* is there room for all of the hmac ids? */
+			size = sizeof(*shmac) + (hmaclist->num_algo *
+			    sizeof(shmac->shmac_idents[0]));
+			if ((size_t)(*optsize) < size) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+				error = EINVAL;
+				SCTP_INP_RUNLOCK(inp);
+				break;
+			}
+			/* copy in the list */
+			shmac->shmac_number_of_idents = hmaclist->num_algo;
+			for (i = 0; i < hmaclist->num_algo; i++) {
+				shmac->shmac_idents[i] = hmaclist->hmac[i];
+			}
+			SCTP_INP_RUNLOCK(inp);
+			*optsize = size;
+			break;
+		}
+	case SCTP_AUTH_ACTIVE_KEY:
+		{
+			struct sctp_authkeyid *scact;
+
+			SCTP_CHECK_AND_CAST(scact, optval, struct sctp_authkeyid, *optsize);
+			SCTP_FIND_STCB(inp, stcb, scact->scact_assoc_id);
+
+			if (stcb) {
+				/* get the active key on the assoc */
+				scact->scact_keynumber = stcb->asoc.authinfo.active_keyid;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				/* get the endpoint active key */
+				SCTP_INP_RLOCK(inp);
+				scact->scact_keynumber = inp->sctp_ep.default_keyid;
+				SCTP_INP_RUNLOCK(inp);
+			}
+			*optsize = sizeof(*scact);
+			break;
+		}
+	case SCTP_LOCAL_AUTH_CHUNKS:
+		{
+			struct sctp_authchunks *sac;
+			sctp_auth_chklist_t *chklist = NULL;
+			size_t size = 0;
+
+			SCTP_CHECK_AND_CAST(sac, optval, struct sctp_authchunks, *optsize);
+			SCTP_FIND_STCB(inp, stcb, sac->gauth_assoc_id);
+
+			if (stcb) {
+				/* get off the assoc */
+				chklist = stcb->asoc.local_auth_chunks;
+				/* is there enough space? */
+				size = sctp_auth_get_chklist_size(chklist);
+				if (*optsize < (sizeof(struct sctp_authchunks) + size)) {
+					error = EINVAL;
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+				} else {
+					/* copy in the chunks */
+					(void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks);
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				/* get off the endpoint */
+				SCTP_INP_RLOCK(inp);
+				chklist = inp->sctp_ep.local_auth_chunks;
+				/* is there enough space? */
+				size = sctp_auth_get_chklist_size(chklist);
+				if (*optsize < (sizeof(struct sctp_authchunks) + size)) {
+					error = EINVAL;
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+				} else {
+					/* copy in the chunks */
+					(void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks);
+				}
+				SCTP_INP_RUNLOCK(inp);
+			}
+			*optsize = sizeof(struct sctp_authchunks) + size;
+			break;
+		}
+	case SCTP_PEER_AUTH_CHUNKS:
+		{
+			struct sctp_authchunks *sac;
+			sctp_auth_chklist_t *chklist = NULL;
+			size_t size = 0;
+
+			SCTP_CHECK_AND_CAST(sac, optval, struct sctp_authchunks, *optsize);
+			SCTP_FIND_STCB(inp, stcb, sac->gauth_assoc_id);
+
+			if (stcb) {
+				/* get off the assoc */
+				chklist = stcb->asoc.peer_auth_chunks;
+				/* is there enough space? */
+				size = sctp_auth_get_chklist_size(chklist);
+				if (*optsize < (sizeof(struct sctp_authchunks) + size)) {
+					error = EINVAL;
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+				} else {
+					/* copy in the chunks */
+					(void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks);
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+				error = ENOENT;
+			}
+			*optsize = sizeof(struct sctp_authchunks) + size;
+			break;
+		}
+
+
+	default:
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
+		error = ENOPROTOOPT;
+		*optsize = 0;
+		break;
+	}			/* end switch (sopt->sopt_name) */
+	return (error);
+}
+
+static int
+sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
+    void *p)
+{
+	int error, set_opt;
+	uint32_t *mopt;
+	struct sctp_tcb *stcb = NULL;
+	struct sctp_inpcb *inp = NULL;
+	uint32_t vrf_id;
+
+	if (optval == NULL) {
+		SCTP_PRINTF("optval is NULL\n");
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return (EINVAL);
+	}
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == 0) {
+		SCTP_PRINTF("inp is NULL?\n");
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return EINVAL;
+	}
+	vrf_id = inp->def_vrf_id;
+
+	error = 0;
+	switch (optname) {
+	case SCTP_NODELAY:
+	case SCTP_AUTOCLOSE:
+	case SCTP_AUTO_ASCONF:
+	case SCTP_EXPLICIT_EOR:
+	case SCTP_DISABLE_FRAGMENTS:
+	case SCTP_USE_EXT_RCVINFO:
+	case SCTP_I_WANT_MAPPED_V4_ADDR:
+		/* copy in the option value */
+		SCTP_CHECK_AND_CAST(mopt, optval, uint32_t, optsize);
+		set_opt = 0;
+		if (error)
+			break;
+		switch (optname) {
+		case SCTP_DISABLE_FRAGMENTS:
+			set_opt = SCTP_PCB_FLAGS_NO_FRAGMENT;
+			break;
+		case SCTP_AUTO_ASCONF:
+			/*
+			 * NOTE: we don't really support this flag
+			 */
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+				/* only valid for bound all sockets */
+				set_opt = SCTP_PCB_FLAGS_AUTO_ASCONF;
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				return (EINVAL);
+			}
+			break;
+		case SCTP_EXPLICIT_EOR:
+			set_opt = SCTP_PCB_FLAGS_EXPLICIT_EOR;
+			break;
+		case SCTP_USE_EXT_RCVINFO:
+			set_opt = SCTP_PCB_FLAGS_EXT_RCVINFO;
+			break;
+		case SCTP_I_WANT_MAPPED_V4_ADDR:
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+				set_opt = SCTP_PCB_FLAGS_NEEDS_MAPPED_V4;
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				return (EINVAL);
+			}
+			break;
+		case SCTP_NODELAY:
+			set_opt = SCTP_PCB_FLAGS_NODELAY;
+			break;
+		case SCTP_AUTOCLOSE:
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+			    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				return (EINVAL);
+			}
+			set_opt = SCTP_PCB_FLAGS_AUTOCLOSE;
+			/*
+			 * The value is in ticks. Note this does not effect
+			 * old associations, only new ones.
+			 */
+			inp->sctp_ep.auto_close_time = SEC_TO_TICKS(*mopt);
+			break;
+		}
+		SCTP_INP_WLOCK(inp);
+		if (*mopt != 0) {
+			sctp_feature_on(inp, set_opt);
+		} else {
+			sctp_feature_off(inp, set_opt);
+		}
+		SCTP_INP_WUNLOCK(inp);
+		break;
+	case SCTP_REUSE_PORT:
+		{
+			SCTP_CHECK_AND_CAST(mopt, optval, uint32_t, optsize);
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) == 0) {
+				/* Can't set it after we are bound */
+				error = EINVAL;
+				break;
+			}
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE)) {
+				/* Can't do this for a 1-m socket */
+				error = EINVAL;
+				break;
+			}
+			if (optval)
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE);
+			else
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE);
+		}
+		break;
+	case SCTP_PARTIAL_DELIVERY_POINT:
+		{
+			uint32_t *value;
+
+			SCTP_CHECK_AND_CAST(value, optval, uint32_t, optsize);
+			if (*value > SCTP_SB_LIMIT_RCV(so)) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+				break;
+			}
+			inp->partial_delivery_point = *value;
+		}
+		break;
+	case SCTP_FRAGMENT_INTERLEAVE:
+		/* not yet until we re-write sctp_recvmsg() */
+		{
+			uint32_t *level;
+
+			SCTP_CHECK_AND_CAST(level, optval, uint32_t, optsize);
+			if (*level == SCTP_FRAG_LEVEL_2) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
+			} else if (*level == SCTP_FRAG_LEVEL_1) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
+			} else if (*level == SCTP_FRAG_LEVEL_0) {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
+
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+			}
+		}
+		break;
+	case SCTP_CMT_ON_OFF:
+		if (SCTP_BASE_SYSCTL(sctp_cmt_on_off)) {
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+			if (stcb) {
+				if (av->assoc_value != 0)
+					stcb->asoc.sctp_cmt_on_off = 1;
+				else
+					stcb->asoc.sctp_cmt_on_off = 0;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_WLOCK(inp);
+				if (av->assoc_value != 0)
+					inp->sctp_cmt_on_off = 1;
+				else
+					inp->sctp_cmt_on_off = 0;
+				SCTP_INP_WUNLOCK(inp);
+			}
+		} else {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
+			error = ENOPROTOOPT;
+		}
+		break;
+		/* JRS - Set socket option for pluggable congestion control */
+	case SCTP_PLUGGABLE_CC:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+			if (stcb) {
+				switch (av->assoc_value) {
+					/*
+					 * JRS - Standard TCP congestion
+					 * control
+					 */
+				case SCTP_CC_RFC2581:
+					{
+						stcb->asoc.congestion_control_module = SCTP_CC_RFC2581;
+						stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_cwnd_update_after_sack;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_cwnd_update_after_fr;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer;
+						SCTP_TCB_UNLOCK(stcb);
+						break;
+					}
+					/*
+					 * JRS - High Speed TCP congestion
+					 * control (Floyd)
+					 */
+				case SCTP_CC_HSTCP:
+					{
+						stcb->asoc.congestion_control_module = SCTP_CC_HSTCP;
+						stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_hs_cwnd_update_after_sack;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_hs_cwnd_update_after_fr;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer;
+						SCTP_TCB_UNLOCK(stcb);
+						break;
+					}
+					/* JRS - HTCP congestion control */
+				case SCTP_CC_HTCP:
+					{
+						stcb->asoc.congestion_control_module = SCTP_CC_HTCP;
+						stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_htcp_set_initial_cc_param;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_htcp_cwnd_update_after_sack;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_htcp_cwnd_update_after_fr;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_htcp_cwnd_update_after_timeout;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_htcp_cwnd_update_after_ecn_echo;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_htcp_cwnd_update_after_fr_timer;
+						SCTP_TCB_UNLOCK(stcb);
+						break;
+					}
+					/*
+					 * JRS - All other values are
+					 * invalid
+					 */
+				default:
+					{
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+						error = EINVAL;
+						SCTP_TCB_UNLOCK(stcb);
+						break;
+					}
+				}
+			} else {
+				switch (av->assoc_value) {
+				case SCTP_CC_RFC2581:
+				case SCTP_CC_HSTCP:
+				case SCTP_CC_HTCP:
+					inp->sctp_ep.sctp_default_cc_module = av->assoc_value;
+					break;
+				default:
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+					break;
+				};
+			}
+		}
+		break;
+	case SCTP_CLR_STAT_LOG:
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+		error = EOPNOTSUPP;
+		break;
+	case SCTP_CONTEXT:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (stcb) {
+				stcb->asoc.context = av->assoc_value;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_WLOCK(inp);
+				inp->sctp_context = av->assoc_value;
+				SCTP_INP_WUNLOCK(inp);
+			}
+		}
+		break;
+	case SCTP_VRF_ID:
+		{
+			uint32_t *default_vrfid;
+
+			SCTP_CHECK_AND_CAST(default_vrfid, optval, uint32_t, optsize);
+			if (*default_vrfid > SCTP_MAX_VRF_ID) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+				break;
+			}
+			inp->def_vrf_id = *default_vrfid;
+			break;
+		}
+	case SCTP_DEL_VRF_ID:
+		{
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+			error = EOPNOTSUPP;
+			break;
+		}
+	case SCTP_ADD_VRF_ID:
+		{
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+			error = EOPNOTSUPP;
+			break;
+		}
+	case SCTP_DELAYED_SACK:
+		{
+			struct sctp_sack_info *sack;
+
+			SCTP_CHECK_AND_CAST(sack, optval, struct sctp_sack_info, optsize);
+			SCTP_FIND_STCB(inp, stcb, sack->sack_assoc_id);
+			if (sack->sack_delay) {
+				if (sack->sack_delay > SCTP_MAX_SACK_DELAY)
+					sack->sack_delay = SCTP_MAX_SACK_DELAY;
+			}
+			if (stcb) {
+				if (sack->sack_delay) {
+					if (MSEC_TO_TICKS(sack->sack_delay) < 1) {
+						sack->sack_delay = TICKS_TO_MSEC(1);
+					}
+					stcb->asoc.delayed_ack = sack->sack_delay;
+				}
+				if (sack->sack_freq) {
+					stcb->asoc.sack_freq = sack->sack_freq;
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_WLOCK(inp);
+				if (sack->sack_delay) {
+					if (MSEC_TO_TICKS(sack->sack_delay) < 1) {
+						sack->sack_delay = TICKS_TO_MSEC(1);
+					}
+					inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV] = MSEC_TO_TICKS(sack->sack_delay);
+				}
+				if (sack->sack_freq) {
+					inp->sctp_ep.sctp_sack_freq = sack->sack_freq;
+				}
+				SCTP_INP_WUNLOCK(inp);
+			}
+			break;
+		}
+	case SCTP_AUTH_CHUNK:
+		{
+			struct sctp_authchunk *sauth;
+
+			SCTP_CHECK_AND_CAST(sauth, optval, struct sctp_authchunk, optsize);
+
+			SCTP_INP_WLOCK(inp);
+			if (sctp_auth_add_chunk(sauth->sauth_chunk, inp->sctp_ep.local_auth_chunks)) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+			}
+			SCTP_INP_WUNLOCK(inp);
+			break;
+		}
+	case SCTP_AUTH_KEY:
+		{
+			struct sctp_authkey *sca;
+			struct sctp_keyhead *shared_keys;
+			sctp_sharedkey_t *shared_key;
+			sctp_key_t *key = NULL;
+			size_t size;
+
+			SCTP_CHECK_AND_CAST(sca, optval, struct sctp_authkey, optsize);
+			SCTP_FIND_STCB(inp, stcb, sca->sca_assoc_id);
+			size = optsize - sizeof(*sca);
+
+			if (stcb) {
+				/* set it on the assoc */
+				shared_keys = &stcb->asoc.shared_keys;
+				/* clear the cached keys for this key id */
+				sctp_clear_cachedkeys(stcb, sca->sca_keynumber);
+				/*
+				 * create the new shared key and
+				 * insert/replace it
+				 */
+				if (size > 0) {
+					key = sctp_set_key(sca->sca_key, (uint32_t) size);
+					if (key == NULL) {
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
+						error = ENOMEM;
+						SCTP_TCB_UNLOCK(stcb);
+						break;
+					}
+				}
+				shared_key = sctp_alloc_sharedkey();
+				if (shared_key == NULL) {
+					sctp_free_key(key);
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
+					error = ENOMEM;
+					SCTP_TCB_UNLOCK(stcb);
+					break;
+				}
+				shared_key->key = key;
+				shared_key->keyid = sca->sca_keynumber;
+				error = sctp_insert_sharedkey(shared_keys, shared_key);
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				/* set it on the endpoint */
+				SCTP_INP_WLOCK(inp);
+				shared_keys = &inp->sctp_ep.shared_keys;
+				/*
+				 * clear the cached keys on all assocs for
+				 * this key id
+				 */
+				sctp_clear_cachedkeys_ep(inp, sca->sca_keynumber);
+				/*
+				 * create the new shared key and
+				 * insert/replace it
+				 */
+				if (size > 0) {
+					key = sctp_set_key(sca->sca_key, (uint32_t) size);
+					if (key == NULL) {
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
+						error = ENOMEM;
+						SCTP_INP_WUNLOCK(inp);
+						break;
+					}
+				}
+				shared_key = sctp_alloc_sharedkey();
+				if (shared_key == NULL) {
+					sctp_free_key(key);
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
+					error = ENOMEM;
+					SCTP_INP_WUNLOCK(inp);
+					break;
+				}
+				shared_key->key = key;
+				shared_key->keyid = sca->sca_keynumber;
+				error = sctp_insert_sharedkey(shared_keys, shared_key);
+				SCTP_INP_WUNLOCK(inp);
+			}
+			break;
+		}
+	case SCTP_HMAC_IDENT:
+		{
+			struct sctp_hmacalgo *shmac;
+			sctp_hmaclist_t *hmaclist;
+			uint16_t hmacid;
+			uint32_t i;
+
+			size_t found;
+
+			SCTP_CHECK_AND_CAST(shmac, optval, struct sctp_hmacalgo, optsize);
+			if (optsize < sizeof(struct sctp_hmacalgo) + shmac->shmac_number_of_idents * sizeof(uint16_t)) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+				break;
+			}
+			hmaclist = sctp_alloc_hmaclist(shmac->shmac_number_of_idents);
+			if (hmaclist == NULL) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
+				error = ENOMEM;
+				break;
+			}
+			for (i = 0; i < shmac->shmac_number_of_idents; i++) {
+				hmacid = shmac->shmac_idents[i];
+				if (sctp_auth_add_hmacid(hmaclist, hmacid)) {
+					 /* invalid HMACs were found */ ;
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+					sctp_free_hmaclist(hmaclist);
+					goto sctp_set_hmac_done;
+				}
+			}
+			found = 0;
+			for (i = 0; i < hmaclist->num_algo; i++) {
+				if (hmaclist->hmac[i] == SCTP_AUTH_HMAC_ID_SHA1) {
+					/* already in list */
+					found = 1;
+				}
+			}
+			if (!found) {
+				sctp_free_hmaclist(hmaclist);
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+				break;
+			}
+			/* set it on the endpoint */
+			SCTP_INP_WLOCK(inp);
+			if (inp->sctp_ep.local_hmacs)
+				sctp_free_hmaclist(inp->sctp_ep.local_hmacs);
+			inp->sctp_ep.local_hmacs = hmaclist;
+			SCTP_INP_WUNLOCK(inp);
+	sctp_set_hmac_done:
+			break;
+		}
+	case SCTP_AUTH_ACTIVE_KEY:
+		{
+			struct sctp_authkeyid *scact;
+
+			SCTP_CHECK_AND_CAST(scact, optval, struct sctp_authkeyid,
+			    optsize);
+			SCTP_FIND_STCB(inp, stcb, scact->scact_assoc_id);
+
+			/* set the active key on the right place */
+			if (stcb) {
+				/* set the active key on the assoc */
+				if (sctp_auth_setactivekey(stcb,
+				    scact->scact_keynumber)) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL,
+					    SCTP_FROM_SCTP_USRREQ,
+					    EINVAL);
+					error = EINVAL;
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				/* set the active key on the endpoint */
+				SCTP_INP_WLOCK(inp);
+				if (sctp_auth_setactivekey_ep(inp,
+				    scact->scact_keynumber)) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL,
+					    SCTP_FROM_SCTP_USRREQ,
+					    EINVAL);
+					error = EINVAL;
+				}
+				SCTP_INP_WUNLOCK(inp);
+			}
+			break;
+		}
+	case SCTP_AUTH_DELETE_KEY:
+		{
+			struct sctp_authkeyid *scdel;
+
+			SCTP_CHECK_AND_CAST(scdel, optval, struct sctp_authkeyid,
+			    optsize);
+			SCTP_FIND_STCB(inp, stcb, scdel->scact_assoc_id);
+
+			/* delete the key from the right place */
+			if (stcb) {
+				if (sctp_delete_sharedkey(stcb,
+				    scdel->scact_keynumber)) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL,
+					    SCTP_FROM_SCTP_USRREQ,
+					    EINVAL);
+					error = EINVAL;
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_WLOCK(inp);
+				if (sctp_delete_sharedkey_ep(inp,
+				    scdel->scact_keynumber)) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL,
+					    SCTP_FROM_SCTP_USRREQ,
+					    EINVAL);
+					error = EINVAL;
+				}
+				SCTP_INP_WUNLOCK(inp);
+			}
+			break;
+		}
+	case SCTP_AUTH_DEACTIVATE_KEY:
+		{
+			struct sctp_authkeyid *keyid;
+
+			SCTP_CHECK_AND_CAST(keyid, optval, struct sctp_authkeyid,
+			    optsize);
+			SCTP_FIND_STCB(inp, stcb, keyid->scact_assoc_id);
+
+			/* deactivate the key from the right place */
+			if (stcb) {
+				if (sctp_deact_sharedkey(stcb,
+				    keyid->scact_keynumber)) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL,
+					    SCTP_FROM_SCTP_USRREQ,
+					    EINVAL);
+					error = EINVAL;
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_WLOCK(inp);
+				if (sctp_deact_sharedkey_ep(inp,
+				    keyid->scact_keynumber)) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL,
+					    SCTP_FROM_SCTP_USRREQ,
+					    EINVAL);
+					error = EINVAL;
+				}
+				SCTP_INP_WUNLOCK(inp);
+			}
+			break;
+		}
+
+	case SCTP_RESET_STREAMS:
+		{
+			struct sctp_stream_reset *strrst;
+			uint8_t send_in = 0, send_tsn = 0, send_out = 0,
+			        addstream = 0;
+			uint16_t addstrmcnt = 0;
+			int i;
+
+			SCTP_CHECK_AND_CAST(strrst, optval, struct sctp_stream_reset, optsize);
+			SCTP_FIND_STCB(inp, stcb, strrst->strrst_assoc_id);
+
+			if (stcb == NULL) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+				error = ENOENT;
+				break;
+			}
+			if (stcb->asoc.peer_supports_strreset == 0) {
+				/*
+				 * Peer does not support it, we return
+				 * protocol not supported since this is true
+				 * for this feature and this peer, not the
+				 * socket request in general.
+				 */
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EPROTONOSUPPORT);
+				error = EPROTONOSUPPORT;
+				SCTP_TCB_UNLOCK(stcb);
+				break;
+			}
+			if (stcb->asoc.stream_reset_outstanding) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
+				error = EALREADY;
+				SCTP_TCB_UNLOCK(stcb);
+				break;
+			}
+			if (strrst->strrst_flags == SCTP_RESET_LOCAL_RECV) {
+				send_in = 1;
+			} else if (strrst->strrst_flags == SCTP_RESET_LOCAL_SEND) {
+				send_out = 1;
+			} else if (strrst->strrst_flags == SCTP_RESET_BOTH) {
+				send_in = 1;
+				send_out = 1;
+			} else if (strrst->strrst_flags == SCTP_RESET_TSN) {
+				send_tsn = 1;
+			} else if (strrst->strrst_flags == SCTP_RESET_ADD_STREAMS) {
+				if (send_tsn ||
+				    send_in ||
+				    send_out) {
+					/* We can't do that and add streams */
+					error = EINVAL;
+					goto skip_stuff;
+				}
+				if (stcb->asoc.stream_reset_outstanding) {
+					error = EBUSY;
+					goto skip_stuff;
+				}
+				addstream = 1;
+				/* We allocate here */
+				addstrmcnt = strrst->strrst_num_streams;
+				if ((int)(addstrmcnt + stcb->asoc.streamoutcnt) > 0xffff) {
+					/* You can't have more than 64k */
+					error = EINVAL;
+					goto skip_stuff;
+				}
+				if ((stcb->asoc.strm_realoutsize - stcb->asoc.streamoutcnt) < addstrmcnt) {
+					/* Need to allocate more */
+					struct sctp_stream_out *oldstream;
+					struct sctp_stream_queue_pending *sp;
+					int removed;
+
+					oldstream = stcb->asoc.strmout;
+					/* get some more */
+					SCTP_MALLOC(stcb->asoc.strmout, struct sctp_stream_out *,
+					    ((stcb->asoc.streamoutcnt + addstrmcnt) * sizeof(struct sctp_stream_out)),
+					    SCTP_M_STRMO);
+					if (stcb->asoc.strmout == NULL) {
+						stcb->asoc.strmout = oldstream;
+						error = ENOMEM;
+						goto skip_stuff;
+					}
+					/*
+					 * Ok now we proceed with copying
+					 * the old out stuff and
+					 * initializing the new stuff.
+					 */
+					SCTP_TCB_SEND_LOCK(stcb);
+					for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+						TAILQ_INIT(&stcb->asoc.strmout[i].outqueue);
+						stcb->asoc.strmout[i].next_sequence_sent = oldstream[i].next_sequence_sent;
+						stcb->asoc.strmout[i].last_msg_incomplete = oldstream[i].last_msg_incomplete;
+						stcb->asoc.strmout[i].stream_no = i;
+						if (oldstream[i].next_spoke.tqe_next) {
+							sctp_remove_from_wheel(stcb, &stcb->asoc, &oldstream[i], 1);
+							stcb->asoc.strmout[i].next_spoke.tqe_next = NULL;
+							stcb->asoc.strmout[i].next_spoke.tqe_prev = NULL;
+							removed = 1;
+						} else {
+							/* not on out wheel */
+							stcb->asoc.strmout[i].next_spoke.tqe_next = NULL;
+							stcb->asoc.strmout[i].next_spoke.tqe_prev = NULL;
+							removed = 0;
+						}
+						/*
+						 * now anything on those
+						 * queues?
+						 */
+						while (TAILQ_EMPTY(&oldstream[i].outqueue) == 0) {
+							sp = TAILQ_FIRST(&oldstream[i].outqueue);
+							TAILQ_REMOVE(&oldstream[i].outqueue, sp, next);
+							TAILQ_INSERT_TAIL(&stcb->asoc.strmout[i].outqueue, sp, next);
+						}
+						/* Did we disrupt the wheel? */
+						if (removed) {
+							sctp_insert_on_wheel(stcb,
+							    &stcb->asoc,
+							    &stcb->asoc.strmout[i],
+							    1);
+						}
+						/*
+						 * Now move assoc pointers
+						 * too
+						 */
+						if (stcb->asoc.last_out_stream == &oldstream[i]) {
+							stcb->asoc.last_out_stream = &stcb->asoc.strmout[i];
+						}
+						if (stcb->asoc.locked_on_sending == &oldstream[i]) {
+							stcb->asoc.locked_on_sending = &stcb->asoc.strmout[i];
+						}
+					}
+					/* now the new streams */
+					for (i = stcb->asoc.streamoutcnt; i < (stcb->asoc.streamoutcnt + addstrmcnt); i++) {
+						stcb->asoc.strmout[i].next_sequence_sent = 0x0;
+						TAILQ_INIT(&stcb->asoc.strmout[i].outqueue);
+						stcb->asoc.strmout[i].stream_no = i;
+						stcb->asoc.strmout[i].last_msg_incomplete = 0;
+						stcb->asoc.strmout[i].next_spoke.tqe_next = NULL;
+						stcb->asoc.strmout[i].next_spoke.tqe_prev = NULL;
+					}
+					stcb->asoc.strm_realoutsize = stcb->asoc.streamoutcnt + addstrmcnt;
+					SCTP_FREE(oldstream, SCTP_M_STRMO);
+				}
+				SCTP_TCB_SEND_UNLOCK(stcb);
+				goto skip_stuff;
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+				SCTP_TCB_UNLOCK(stcb);
+				break;
+			}
+			for (i = 0; i < strrst->strrst_num_streams; i++) {
+				if ((send_in) &&
+
+				    (strrst->strrst_list[i] > stcb->asoc.streamincnt)) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+					goto get_out;
+				}
+				if ((send_out) &&
+				    (strrst->strrst_list[i] > stcb->asoc.streamoutcnt)) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+					goto get_out;
+				}
+			}
+	skip_stuff:
+			if (error) {
+		get_out:
+				SCTP_TCB_UNLOCK(stcb);
+				break;
+			}
+			error = sctp_send_str_reset_req(stcb, strrst->strrst_num_streams,
+			    strrst->strrst_list,
+			    send_out, (stcb->asoc.str_reset_seq_in - 3),
+			    send_in, send_tsn, addstream, addstrmcnt);
+
+			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED);
+			SCTP_TCB_UNLOCK(stcb);
+		}
+		break;
+
+	case SCTP_CONNECT_X:
+		if (optsize < (sizeof(int) + sizeof(struct sockaddr_in))) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+			error = EINVAL;
+			break;
+		}
+		error = sctp_do_connect_x(so, inp, optval, optsize, p, 0);
+		break;
+
+	case SCTP_CONNECT_X_DELAYED:
+		if (optsize < (sizeof(int) + sizeof(struct sockaddr_in))) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+			error = EINVAL;
+			break;
+		}
+		error = sctp_do_connect_x(so, inp, optval, optsize, p, 1);
+		break;
+
+	case SCTP_CONNECT_X_COMPLETE:
+		{
+			struct sockaddr *sa;
+			struct sctp_nets *net;
+
+			/* FIXME MT: check correct? */
+			SCTP_CHECK_AND_CAST(sa, optval, struct sockaddr, optsize);
+
+			/* find tcb */
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
+				SCTP_INP_RLOCK(inp);
+				stcb = LIST_FIRST(&inp->sctp_asoc_list);
+				if (stcb) {
+					SCTP_TCB_LOCK(stcb);
+					net = sctp_findnet(stcb, sa);
+				}
+				SCTP_INP_RUNLOCK(inp);
+			} else {
+				/*
+				 * We increment here since
+				 * sctp_findassociation_ep_addr() wil do a
+				 * decrement if it finds the stcb as long as
+				 * the locked tcb (last argument) is NOT a
+				 * TCB.. aka NULL.
+				 */
+				SCTP_INP_INCR_REF(inp);
+				stcb = sctp_findassociation_ep_addr(&inp, sa, &net, NULL, NULL);
+				if (stcb == NULL) {
+					SCTP_INP_DECR_REF(inp);
+				}
+			}
+
+			if (stcb == NULL) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+				error = ENOENT;
+				break;
+			}
+			if (stcb->asoc.delayed_connection == 1) {
+				stcb->asoc.delayed_connection = 0;
+				(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
+				sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb,
+				    stcb->asoc.primary_destination,
+				    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_9);
+				sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
+			} else {
+				/*
+				 * already expired or did not use delayed
+				 * connectx
+				 */
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
+				error = EALREADY;
+			}
+			SCTP_TCB_UNLOCK(stcb);
+		}
+		break;
+	case SCTP_MAX_BURST:
+		{
+			uint8_t *burst;
+
+			SCTP_CHECK_AND_CAST(burst, optval, uint8_t, optsize);
+
+			SCTP_INP_WLOCK(inp);
+			if (*burst) {
+				inp->sctp_ep.max_burst = *burst;
+			}
+			SCTP_INP_WUNLOCK(inp);
+		}
+		break;
+	case SCTP_MAXSEG:
+		{
+			struct sctp_assoc_value *av;
+			int ovh;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+				ovh = SCTP_MED_OVERHEAD;
+			} else {
+				ovh = SCTP_MED_V4_OVERHEAD;
+			}
+			if (stcb) {
+				if (av->assoc_value) {
+					stcb->asoc.sctp_frag_point = (av->assoc_value + ovh);
+				} else {
+					stcb->asoc.sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT;
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_WLOCK(inp);
+				/*
+				 * FIXME MT: I think this is not in tune
+				 * with the API ID
+				 */
+				if (av->assoc_value) {
+					inp->sctp_frag_point = (av->assoc_value + ovh);
+				} else {
+					inp->sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT;
+				}
+				SCTP_INP_WUNLOCK(inp);
+			}
+		}
+		break;
+	case SCTP_EVENTS:
+		{
+			struct sctp_event_subscribe *events;
+
+			SCTP_CHECK_AND_CAST(events, optval, struct sctp_event_subscribe, optsize);
+
+			SCTP_INP_WLOCK(inp);
+			if (events->sctp_data_io_event) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT);
+			} else {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT);
+			}
+
+			if (events->sctp_association_event) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVASSOCEVNT);
+			} else {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVASSOCEVNT);
+			}
+
+			if (events->sctp_address_event) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVPADDREVNT);
+			} else {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVPADDREVNT);
+			}
+
+			if (events->sctp_send_failure_event) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVSENDFAILEVNT);
+			} else {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVSENDFAILEVNT);
+			}
+
+			if (events->sctp_peer_error_event) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVPEERERR);
+			} else {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVPEERERR);
+			}
+
+			if (events->sctp_shutdown_event) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT);
+			} else {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT);
+			}
+
+			if (events->sctp_partial_delivery_event) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_PDAPIEVNT);
+			} else {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_PDAPIEVNT);
+			}
+
+			if (events->sctp_adaptation_layer_event) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_ADAPTATIONEVNT);
+			} else {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_ADAPTATIONEVNT);
+			}
+
+			if (events->sctp_authentication_event) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_AUTHEVNT);
+			} else {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_AUTHEVNT);
+			}
+
+			if (events->sctp_sender_dry_event) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_DRYEVNT);
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+					stcb = LIST_FIRST(&inp->sctp_asoc_list);
+					if (stcb) {
+						SCTP_TCB_LOCK(stcb);
+					}
+					if (stcb &&
+					    TAILQ_EMPTY(&stcb->asoc.send_queue) &&
+					    TAILQ_EMPTY(&stcb->asoc.sent_queue) &&
+					    (stcb->asoc.stream_queue_cnt == 0)) {
+						sctp_ulp_notify(SCTP_NOTIFY_SENDER_DRY, stcb, 0, NULL, SCTP_SO_LOCKED);
+					}
+					if (stcb) {
+						SCTP_TCB_UNLOCK(stcb);
+					}
+				}
+			} else {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_DRYEVNT);
+			}
+
+			if (events->sctp_stream_reset_event) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_STREAM_RESETEVNT);
+			} else {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_STREAM_RESETEVNT);
+			}
+			SCTP_INP_WUNLOCK(inp);
+		}
+		break;
+
+	case SCTP_ADAPTATION_LAYER:
+		{
+			struct sctp_setadaptation *adap_bits;
+
+			SCTP_CHECK_AND_CAST(adap_bits, optval, struct sctp_setadaptation, optsize);
+			SCTP_INP_WLOCK(inp);
+			inp->sctp_ep.adaptation_layer_indicator = adap_bits->ssb_adaptation_ind;
+			SCTP_INP_WUNLOCK(inp);
+		}
+		break;
+#ifdef SCTP_DEBUG
+	case SCTP_SET_INITIAL_DBG_SEQ:
+		{
+			uint32_t *vvv;
+
+			SCTP_CHECK_AND_CAST(vvv, optval, uint32_t, optsize);
+			SCTP_INP_WLOCK(inp);
+			inp->sctp_ep.initial_sequence_debug = *vvv;
+			SCTP_INP_WUNLOCK(inp);
+		}
+		break;
+#endif
+	case SCTP_DEFAULT_SEND_PARAM:
+		{
+			struct sctp_sndrcvinfo *s_info;
+
+			SCTP_CHECK_AND_CAST(s_info, optval, struct sctp_sndrcvinfo, optsize);
+			SCTP_FIND_STCB(inp, stcb, s_info->sinfo_assoc_id);
+
+			if (stcb) {
+				if (s_info->sinfo_stream <= stcb->asoc.streamoutcnt) {
+					memcpy(&stcb->asoc.def_send, s_info, min(optsize, sizeof(stcb->asoc.def_send)));
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_WLOCK(inp);
+				memcpy(&inp->def_send, s_info, min(optsize, sizeof(inp->def_send)));
+				SCTP_INP_WUNLOCK(inp);
+			}
+		}
+		break;
+	case SCTP_PEER_ADDR_PARAMS:
+		/* Applys to the specific association */
+		{
+			struct sctp_paddrparams *paddrp;
+			struct sctp_nets *net;
+
+			SCTP_CHECK_AND_CAST(paddrp, optval, struct sctp_paddrparams, optsize);
+			SCTP_FIND_STCB(inp, stcb, paddrp->spp_assoc_id);
+			net = NULL;
+			if (stcb) {
+				net = sctp_findnet(stcb, (struct sockaddr *)&paddrp->spp_address);
+			} else {
+				/*
+				 * We increment here since
+				 * sctp_findassociation_ep_addr() wil do a
+				 * decrement if it finds the stcb as long as
+				 * the locked tcb (last argument) is NOT a
+				 * TCB.. aka NULL.
+				 */
+				SCTP_INP_INCR_REF(inp);
+				stcb = sctp_findassociation_ep_addr(&inp,
+				    (struct sockaddr *)&paddrp->spp_address,
+				    &net, NULL, NULL);
+				if (stcb == NULL) {
+					SCTP_INP_DECR_REF(inp);
+				}
+			}
+			if (stcb && (net == NULL)) {
+				struct sockaddr *sa;
+
+				sa = (struct sockaddr *)&paddrp->spp_address;
+				if (sa->sa_family == AF_INET) {
+					struct sockaddr_in *sin;
+
+					sin = (struct sockaddr_in *)sa;
+					if (sin->sin_addr.s_addr) {
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+						SCTP_TCB_UNLOCK(stcb);
+						error = EINVAL;
+						break;
+					}
+				} else if (sa->sa_family == AF_INET6) {
+					struct sockaddr_in6 *sin6;
+
+					sin6 = (struct sockaddr_in6 *)sa;
+					if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+						SCTP_TCB_UNLOCK(stcb);
+						error = EINVAL;
+						break;
+					}
+				} else {
+					error = EAFNOSUPPORT;
+					SCTP_TCB_UNLOCK(stcb);
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+					break;
+				}
+			}
+			/* sanity checks */
+			if ((paddrp->spp_flags & SPP_HB_ENABLE) && (paddrp->spp_flags & SPP_HB_DISABLE)) {
+				if (stcb)
+					SCTP_TCB_UNLOCK(stcb);
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				return (EINVAL);
+			}
+			if ((paddrp->spp_flags & SPP_PMTUD_ENABLE) && (paddrp->spp_flags & SPP_PMTUD_DISABLE)) {
+				if (stcb)
+					SCTP_TCB_UNLOCK(stcb);
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				return (EINVAL);
+			}
+			if (stcb) {
+				/************************TCB SPECIFIC SET ******************/
+				/*
+				 * do we change the timer for HB, we run
+				 * only one?
+				 */
+				int ovh = 0;
+
+				if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+					ovh = SCTP_MED_OVERHEAD;
+				} else {
+					ovh = SCTP_MED_V4_OVERHEAD;
+				}
+
+				if (paddrp->spp_hbinterval)
+					stcb->asoc.heart_beat_delay = paddrp->spp_hbinterval;
+				else if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO)
+					stcb->asoc.heart_beat_delay = 0;
+
+				/* network sets ? */
+				if (net) {
+					/************************NET SPECIFIC SET ******************/
+					if (paddrp->spp_flags & SPP_HB_DEMAND) {
+						/* on demand HB */
+						if (sctp_send_hb(stcb, 1, net) < 0) {
+							/* asoc destroyed */
+							SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+							error = EINVAL;
+							break;
+						}
+					}
+					if (paddrp->spp_flags & SPP_HB_DISABLE) {
+						net->dest_state |= SCTP_ADDR_NOHB;
+					}
+					if (paddrp->spp_flags & SPP_HB_ENABLE) {
+						net->dest_state &= ~SCTP_ADDR_NOHB;
+					}
+					if ((paddrp->spp_flags & SPP_PMTUD_DISABLE) && (paddrp->spp_pathmtu >= SCTP_SMALLEST_PMTU)) {
+						if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+							sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
+							    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10);
+						}
+						if (paddrp->spp_pathmtu > SCTP_DEFAULT_MINSEGMENT) {
+							net->mtu = paddrp->spp_pathmtu + ovh;
+							if (net->mtu < stcb->asoc.smallest_mtu) {
+								sctp_pathmtu_adjustment(inp, stcb, net, net->mtu);
+							}
+						}
+					}
+					if (paddrp->spp_flags & SPP_PMTUD_ENABLE) {
+						if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+							sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net);
+						}
+					}
+					if (paddrp->spp_pathmaxrxt)
+						net->failure_threshold = paddrp->spp_pathmaxrxt;
+#ifdef INET
+					if (paddrp->spp_flags & SPP_IPV4_TOS) {
+						if (net->ro._l_addr.sin.sin_family == AF_INET) {
+							net->tos_flowlabel = paddrp->spp_ipv4_tos & 0x000000fc;
+						}
+					}
+#endif
+#ifdef INET6
+					if (paddrp->spp_flags & SPP_IPV6_FLOWLABEL) {
+						if (net->ro._l_addr.sin6.sin6_family == AF_INET6) {
+							net->tos_flowlabel = paddrp->spp_ipv6_flowlabel;
+						}
+					}
+#endif
+				} else {
+					/************************ASSOC ONLY -- NO NET SPECIFIC SET ******************/
+					if (paddrp->spp_pathmaxrxt)
+						stcb->asoc.def_net_failure = paddrp->spp_pathmaxrxt;
+
+					if (paddrp->spp_flags & SPP_HB_ENABLE) {
+						/* Turn back on the timer */
+						stcb->asoc.hb_is_disabled = 0;
+						sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net);
+					}
+					if ((paddrp->spp_flags & SPP_PMTUD_DISABLE) && (paddrp->spp_pathmtu >= SCTP_SMALLEST_PMTU)) {
+						TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+							if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+								sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
+								    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10);
+							}
+							if (paddrp->spp_pathmtu > SCTP_DEFAULT_MINSEGMENT) {
+								net->mtu = paddrp->spp_pathmtu + ovh;
+								if (net->mtu < stcb->asoc.smallest_mtu) {
+									sctp_pathmtu_adjustment(inp, stcb, net, net->mtu);
+								}
+							}
+						}
+					}
+					if (paddrp->spp_flags & SPP_PMTUD_ENABLE) {
+						TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+							if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+								sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net);
+							}
+						}
+					}
+					if (paddrp->spp_flags & SPP_HB_DISABLE) {
+						int cnt_of_unconf = 0;
+						struct sctp_nets *lnet;
+
+						stcb->asoc.hb_is_disabled = 1;
+						TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
+							if (lnet->dest_state & SCTP_ADDR_UNCONFIRMED) {
+								cnt_of_unconf++;
+							}
+						}
+						/*
+						 * stop the timer ONLY if we
+						 * have no unconfirmed
+						 * addresses
+						 */
+						if (cnt_of_unconf == 0) {
+							TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+								sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net,
+								    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_11);
+							}
+						}
+					}
+					if (paddrp->spp_flags & SPP_HB_ENABLE) {
+						/* start up the timer. */
+						TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+							sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net);
+						}
+					}
+#ifdef INET
+					if (paddrp->spp_flags & SPP_IPV4_TOS)
+						stcb->asoc.default_tos = paddrp->spp_ipv4_tos & 0x000000fc;
+#endif
+#ifdef INET6
+					if (paddrp->spp_flags & SPP_IPV6_FLOWLABEL)
+						stcb->asoc.default_flowlabel = paddrp->spp_ipv6_flowlabel;
+#endif
+
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				/************************NO TCB, SET TO default stuff ******************/
+				SCTP_INP_WLOCK(inp);
+				/*
+				 * For the TOS/FLOWLABEL stuff you set it
+				 * with the options on the socket
+				 */
+				if (paddrp->spp_pathmaxrxt) {
+					inp->sctp_ep.def_net_failure = paddrp->spp_pathmaxrxt;
+				}
+				if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO)
+					inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = 0;
+				else if (paddrp->spp_hbinterval) {
+					if (paddrp->spp_hbinterval > SCTP_MAX_HB_INTERVAL)
+						paddrp->spp_hbinterval = SCTP_MAX_HB_INTERVAL;
+					inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(paddrp->spp_hbinterval);
+				}
+				if (paddrp->spp_flags & SPP_HB_ENABLE) {
+					sctp_feature_off(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT);
+
+				} else if (paddrp->spp_flags & SPP_HB_DISABLE) {
+					sctp_feature_on(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT);
+				}
+				SCTP_INP_WUNLOCK(inp);
+			}
+		}
+		break;
+	case SCTP_RTOINFO:
+		{
+			struct sctp_rtoinfo *srto;
+			uint32_t new_init, new_min, new_max;
+
+			SCTP_CHECK_AND_CAST(srto, optval, struct sctp_rtoinfo, optsize);
+			SCTP_FIND_STCB(inp, stcb, srto->srto_assoc_id);
+
+			if (stcb) {
+				if (srto->srto_initial)
+					new_init = srto->srto_initial;
+				else
+					new_init = stcb->asoc.initial_rto;
+				if (srto->srto_max)
+					new_max = srto->srto_max;
+				else
+					new_max = stcb->asoc.maxrto;
+				if (srto->srto_min)
+					new_min = srto->srto_min;
+				else
+					new_min = stcb->asoc.minrto;
+				if ((new_min <= new_init) && (new_init <= new_max)) {
+					stcb->asoc.initial_rto = new_init;
+					stcb->asoc.maxrto = new_max;
+					stcb->asoc.minrto = new_min;
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_WLOCK(inp);
+				if (srto->srto_initial)
+					new_init = srto->srto_initial;
+				else
+					new_init = inp->sctp_ep.initial_rto;
+				if (srto->srto_max)
+					new_max = srto->srto_max;
+				else
+					new_max = inp->sctp_ep.sctp_maxrto;
+				if (srto->srto_min)
+					new_min = srto->srto_min;
+				else
+					new_min = inp->sctp_ep.sctp_minrto;
+				if ((new_min <= new_init) && (new_init <= new_max)) {
+					inp->sctp_ep.initial_rto = new_init;
+					inp->sctp_ep.sctp_maxrto = new_max;
+					inp->sctp_ep.sctp_minrto = new_min;
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+				SCTP_INP_WUNLOCK(inp);
+			}
+		}
+		break;
+	case SCTP_ASSOCINFO:
+		{
+			struct sctp_assocparams *sasoc;
+
+			SCTP_CHECK_AND_CAST(sasoc, optval, struct sctp_assocparams, optsize);
+			SCTP_FIND_STCB(inp, stcb, sasoc->sasoc_assoc_id);
+			if (sasoc->sasoc_cookie_life) {
+				/* boundary check the cookie life */
+				if (sasoc->sasoc_cookie_life < 1000)
+					sasoc->sasoc_cookie_life = 1000;
+				if (sasoc->sasoc_cookie_life > SCTP_MAX_COOKIE_LIFE) {
+					sasoc->sasoc_cookie_life = SCTP_MAX_COOKIE_LIFE;
+				}
+			}
+			if (stcb) {
+				if (sasoc->sasoc_asocmaxrxt)
+					stcb->asoc.max_send_times = sasoc->sasoc_asocmaxrxt;
+				sasoc->sasoc_number_peer_destinations = stcb->asoc.numnets;
+				sasoc->sasoc_peer_rwnd = 0;
+				sasoc->sasoc_local_rwnd = 0;
+				if (sasoc->sasoc_cookie_life) {
+					stcb->asoc.cookie_life = MSEC_TO_TICKS(sasoc->sasoc_cookie_life);
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_WLOCK(inp);
+				if (sasoc->sasoc_asocmaxrxt)
+					inp->sctp_ep.max_send_times = sasoc->sasoc_asocmaxrxt;
+				sasoc->sasoc_number_peer_destinations = 0;
+				sasoc->sasoc_peer_rwnd = 0;
+				sasoc->sasoc_local_rwnd = 0;
+				if (sasoc->sasoc_cookie_life) {
+					inp->sctp_ep.def_cookie_life = MSEC_TO_TICKS(sasoc->sasoc_cookie_life);
+				}
+				SCTP_INP_WUNLOCK(inp);
+			}
+		}
+		break;
+	case SCTP_INITMSG:
+		{
+			struct sctp_initmsg *sinit;
+
+			SCTP_CHECK_AND_CAST(sinit, optval, struct sctp_initmsg, optsize);
+			SCTP_INP_WLOCK(inp);
+			if (sinit->sinit_num_ostreams)
+				inp->sctp_ep.pre_open_stream_count = sinit->sinit_num_ostreams;
+
+			if (sinit->sinit_max_instreams)
+				inp->sctp_ep.max_open_streams_intome = sinit->sinit_max_instreams;
+
+			if (sinit->sinit_max_attempts)
+				inp->sctp_ep.max_init_times = sinit->sinit_max_attempts;
+
+			if (sinit->sinit_max_init_timeo)
+				inp->sctp_ep.initial_init_rto_max = sinit->sinit_max_init_timeo;
+			SCTP_INP_WUNLOCK(inp);
+		}
+		break;
+	case SCTP_PRIMARY_ADDR:
+		{
+			struct sctp_setprim *spa;
+			struct sctp_nets *net, *lnet;
+
+			SCTP_CHECK_AND_CAST(spa, optval, struct sctp_setprim, optsize);
+			SCTP_FIND_STCB(inp, stcb, spa->ssp_assoc_id);
+
+			net = NULL;
+			if (stcb) {
+				net = sctp_findnet(stcb, (struct sockaddr *)&spa->ssp_addr);
+			} else {
+				/*
+				 * We increment here since
+				 * sctp_findassociation_ep_addr() wil do a
+				 * decrement if it finds the stcb as long as
+				 * the locked tcb (last argument) is NOT a
+				 * TCB.. aka NULL.
+				 */
+				SCTP_INP_INCR_REF(inp);
+				stcb = sctp_findassociation_ep_addr(&inp,
+				    (struct sockaddr *)&spa->ssp_addr,
+				    &net, NULL, NULL);
+				if (stcb == NULL) {
+					SCTP_INP_DECR_REF(inp);
+				}
+			}
+
+			if ((stcb) && (net)) {
+				if ((net != stcb->asoc.primary_destination) &&
+				    (!(net->dest_state & SCTP_ADDR_UNCONFIRMED))) {
+					/* Ok we need to set it */
+					lnet = stcb->asoc.primary_destination;
+					if (sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net) == 0) {
+						if (net->dest_state & SCTP_ADDR_SWITCH_PRIMARY) {
+							net->dest_state |= SCTP_ADDR_DOUBLE_SWITCH;
+						}
+						net->dest_state |= SCTP_ADDR_SWITCH_PRIMARY;
+					}
+				}
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+			}
+			if (stcb) {
+				SCTP_TCB_UNLOCK(stcb);
+			}
+		}
+		break;
+	case SCTP_SET_DYNAMIC_PRIMARY:
+		{
+			union sctp_sockstore *ss;
+
+			error = priv_check(curthread,
+			    PRIV_NETINET_RESERVEDPORT);
+			if (error)
+				break;
+
+			SCTP_CHECK_AND_CAST(ss, optval, union sctp_sockstore, optsize);
+			/* SUPER USER CHECK? */
+			error = sctp_dynamic_set_primary(&ss->sa, vrf_id);
+		}
+		break;
+	case SCTP_SET_PEER_PRIMARY_ADDR:
+		{
+			struct sctp_setpeerprim *sspp;
+
+			SCTP_CHECK_AND_CAST(sspp, optval, struct sctp_setpeerprim, optsize);
+			SCTP_FIND_STCB(inp, stcb, sspp->sspp_assoc_id);
+			if (stcb != NULL) {
+				struct sctp_ifa *ifa;
+
+				ifa = sctp_find_ifa_by_addr((struct sockaddr *)&sspp->sspp_addr,
+				    stcb->asoc.vrf_id, SCTP_ADDR_NOT_LOCKED);
+				if (ifa == NULL) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+					goto out_of_it;
+				}
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) {
+					/*
+					 * Must validate the ifa found is in
+					 * our ep
+					 */
+					struct sctp_laddr *laddr;
+					int found = 0;
+
+					LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+						if (laddr->ifa == NULL) {
+							SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
+							    __FUNCTION__);
+							continue;
+						}
+						if (laddr->ifa == ifa) {
+							found = 1;
+							break;
+						}
+					}
+					if (!found) {
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+						error = EINVAL;
+						goto out_of_it;
+					}
+				}
+				if (sctp_set_primary_ip_address_sa(stcb,
+				    (struct sockaddr *)&sspp->sspp_addr) != 0) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+		out_of_it:
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+			}
+
+		}
+		break;
+	case SCTP_BINDX_ADD_ADDR:
+		{
+			struct sctp_getaddresses *addrs;
+			size_t sz;
+			struct thread *td;
+
+			td = (struct thread *)p;
+			SCTP_CHECK_AND_CAST(addrs, optval, struct sctp_getaddresses,
+			    optsize);
+			if (addrs->addr->sa_family == AF_INET) {
+				sz = sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in);
+				if (optsize < sz) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+					break;
+				}
+				if (td != NULL && (error = prison_local_ip4(td->td_ucred, &(((struct sockaddr_in *)(addrs->addr))->sin_addr)))) {
+					SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, error);
+					break;
+				}
+#ifdef INET6
+			} else if (addrs->addr->sa_family == AF_INET6) {
+				sz = sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in6);
+				if (optsize < sz) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+					break;
+				}
+				if (td != NULL && (error = prison_local_ip6(td->td_ucred, &(((struct sockaddr_in6 *)(addrs->addr))->sin6_addr),
+				    (SCTP_IPV6_V6ONLY(inp) != 0))) != 0) {
+					SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, error);
+					break;
+				}
+#endif
+			} else {
+				error = EAFNOSUPPORT;
+				break;
+			}
+			sctp_bindx_add_address(so, inp, addrs->addr,
+			    addrs->sget_assoc_id, vrf_id,
+			    &error, p);
+		}
+		break;
+	case SCTP_BINDX_REM_ADDR:
+		{
+			struct sctp_getaddresses *addrs;
+			size_t sz;
+			struct thread *td;
+
+			td = (struct thread *)p;
+
+			SCTP_CHECK_AND_CAST(addrs, optval, struct sctp_getaddresses, optsize);
+			if (addrs->addr->sa_family == AF_INET) {
+				sz = sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in);
+				if (optsize < sz) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+					break;
+				}
+				if (td != NULL && (error = prison_local_ip4(td->td_ucred, &(((struct sockaddr_in *)(addrs->addr))->sin_addr)))) {
+					SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, error);
+					break;
+				}
+#ifdef INET6
+			} else if (addrs->addr->sa_family == AF_INET6) {
+				sz = sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in6);
+				if (optsize < sz) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+					break;
+				}
+				if (td != NULL && (error = prison_local_ip6(td->td_ucred, &(((struct sockaddr_in6 *)(addrs->addr))->sin6_addr),
+				    (SCTP_IPV6_V6ONLY(inp) != 0))) != 0) {
+					SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, error);
+					break;
+				}
+#endif
+			} else {
+				error = EAFNOSUPPORT;
+				break;
+			}
+			sctp_bindx_delete_address(so, inp, addrs->addr,
+			    addrs->sget_assoc_id, vrf_id,
+			    &error);
+		}
+		break;
+	default:
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
+		error = ENOPROTOOPT;
+		break;
+	}			/* end switch (opt) */
+	return (error);
+}
+
+int
+sctp_ctloutput(struct socket *so, struct sockopt *sopt)
+{
+	void *optval = NULL;
+	size_t optsize = 0;
+	struct sctp_inpcb *inp;
+	void *p;
+	int error = 0;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == 0) {
+		/* I made the same as TCP since we are not setup? */
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return (ECONNRESET);
+	}
+	if (sopt->sopt_level != IPPROTO_SCTP) {
+		/* wrong proto level... send back up to IP */
+#ifdef INET6
+		if (INP_CHECK_SOCKAF(so, AF_INET6))
+			error = ip6_ctloutput(so, sopt);
+		else
+#endif				/* INET6 */
+			error = ip_ctloutput(so, sopt);
+		return (error);
+	}
+	optsize = sopt->sopt_valsize;
+	if (optsize) {
+		SCTP_MALLOC(optval, void *, optsize, SCTP_M_SOCKOPT);
+		if (optval == NULL) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOBUFS);
+			return (ENOBUFS);
+		}
+		error = sooptcopyin(sopt, optval, optsize, optsize);
+		if (error) {
+			SCTP_FREE(optval, SCTP_M_SOCKOPT);
+			goto out;
+		}
+	}
+	p = (void *)sopt->sopt_td;
+	if (sopt->sopt_dir == SOPT_SET) {
+		error = sctp_setopt(so, sopt->sopt_name, optval, optsize, p);
+	} else if (sopt->sopt_dir == SOPT_GET) {
+		error = sctp_getopt(so, sopt->sopt_name, optval, &optsize, p);
+	} else {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		error = EINVAL;
+	}
+	if ((error == 0) && (optval != NULL)) {
+		error = sooptcopyout(sopt, optval, optsize);
+		SCTP_FREE(optval, SCTP_M_SOCKOPT);
+	} else if (optval != NULL) {
+		SCTP_FREE(optval, SCTP_M_SOCKOPT);
+	}
+out:
+	return (error);
+}
+
+
+static int
+sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
+{
+	int error = 0;
+	int create_lock_on = 0;
+	uint32_t vrf_id;
+	struct sctp_inpcb *inp;
+	struct sctp_tcb *stcb = NULL;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == 0) {
+		/* I made the same as TCP since we are not setup? */
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return (ECONNRESET);
+	}
+	if (addr == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return EINVAL;
+	}
+#ifdef INET6
+	if (addr->sa_family == AF_INET6) {
+		struct sockaddr_in6 *sin6p;
+
+		if (addr->sa_len != sizeof(struct sockaddr_in6)) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+			return (EINVAL);
+		}
+		sin6p = (struct sockaddr_in6 *)addr;
+		if (p != NULL && (error = prison_remote_ip6(p->td_ucred, &sin6p->sin6_addr)) != 0) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+			return (error);
+		}
+	} else
+#endif
+	if (addr->sa_family == AF_INET) {
+		struct sockaddr_in *sinp;
+
+		if (addr->sa_len != sizeof(struct sockaddr_in)) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+			return (EINVAL);
+		}
+		sinp = (struct sockaddr_in *)addr;
+		if (p != NULL && (error = prison_remote_ip4(p->td_ucred, &sinp->sin_addr)) != 0) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+			return (error);
+		}
+	} else {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EAFNOSUPPORT);
+		return (EAFNOSUPPORT);
+	}
+	SCTP_INP_INCR_REF(inp);
+	SCTP_ASOC_CREATE_LOCK(inp);
+	create_lock_on = 1;
+
+
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
+		/* Should I really unlock ? */
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EFAULT);
+		error = EFAULT;
+		goto out_now;
+	}
+#ifdef INET6
+	if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) &&
+	    (addr->sa_family == AF_INET6)) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		error = EINVAL;
+		goto out_now;
+	}
+#endif				/* INET6 */
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) ==
+	    SCTP_PCB_FLAGS_UNBOUND) {
+		/* Bind a ephemeral port */
+		error = sctp_inpcb_bind(so, NULL, NULL, p);
+		if (error) {
+			goto out_now;
+		}
+	}
+	/* Now do we connect? */
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) &&
+	    (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE))) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		error = EINVAL;
+		goto out_now;
+	}
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) {
+		/* We are already connected AND the TCP model */
+		SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
+		error = EADDRINUSE;
+		goto out_now;
+	}
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
+		SCTP_INP_RLOCK(inp);
+		stcb = LIST_FIRST(&inp->sctp_asoc_list);
+		SCTP_INP_RUNLOCK(inp);
+	} else {
+		/*
+		 * We increment here since sctp_findassociation_ep_addr()
+		 * will do a decrement if it finds the stcb as long as the
+		 * locked tcb (last argument) is NOT a TCB.. aka NULL.
+		 */
+		SCTP_INP_INCR_REF(inp);
+		stcb = sctp_findassociation_ep_addr(&inp, addr, NULL, NULL, NULL);
+		if (stcb == NULL) {
+			SCTP_INP_DECR_REF(inp);
+		} else {
+			SCTP_TCB_UNLOCK(stcb);
+		}
+	}
+	if (stcb != NULL) {
+		/* Already have or am bring up an association */
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
+		error = EALREADY;
+		goto out_now;
+	}
+	vrf_id = inp->def_vrf_id;
+	/* We are GOOD to go */
+	stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id, p);
+	if (stcb == NULL) {
+		/* Gak! no memory */
+		goto out_now;
+	}
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
+		stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
+		/* Set the connected flag so we can queue data */
+		SOCKBUF_LOCK(&so->so_rcv);
+		so->so_rcv.sb_state &= ~SBS_CANTRCVMORE;
+		SOCKBUF_UNLOCK(&so->so_rcv);
+		SOCKBUF_LOCK(&so->so_snd);
+		so->so_snd.sb_state &= ~SBS_CANTSENDMORE;
+		SOCKBUF_UNLOCK(&so->so_snd);
+		SOCK_LOCK(so);
+		so->so_state &= ~SS_ISDISCONNECTING;
+		SOCK_UNLOCK(so);
+		soisconnecting(so);
+	}
+	SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_WAIT);
+	(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
+
+	/* initialize authentication parameters for the assoc */
+	sctp_initialize_auth_params(inp, stcb);
+
+	sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
+	SCTP_TCB_UNLOCK(stcb);
+out_now:
+	if (create_lock_on) {
+		SCTP_ASOC_CREATE_UNLOCK(inp);
+	}
+	SCTP_INP_DECR_REF(inp);
+	return error;
+}
+
+int
+sctp_listen(struct socket *so, int backlog, struct thread *p)
+{
+	/*
+	 * Note this module depends on the protocol processing being called
+	 * AFTER any socket level flags and backlog are applied to the
+	 * socket. The traditional way that the socket flags are applied is
+	 * AFTER protocol processing. We have made a change to the
+	 * sys/kern/uipc_socket.c module to reverse this but this MUST be in
+	 * place if the socket API for SCTP is to work properly.
+	 */
+
+	int error = 0;
+	struct sctp_inpcb *inp;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == 0) {
+		/* I made the same as TCP since we are not setup? */
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return (ECONNRESET);
+	}
+	if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) {
+		/* See if we have a listener */
+		struct sctp_inpcb *tinp;
+		union sctp_sockstore store, *sp;
+
+		sp = &store;
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) {
+			/* not bound all */
+			struct sctp_laddr *laddr;
+
+			LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+				memcpy(&store, &laddr->ifa->address, sizeof(store));
+				sp->sin.sin_port = inp->sctp_lport;
+				tinp = sctp_pcb_findep(&sp->sa, 0, 0, inp->def_vrf_id);
+				if (tinp && (tinp != inp) &&
+				    ((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) == 0) &&
+				    ((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
+				    (tinp->sctp_socket->so_qlimit)) {
+					/*
+					 * we have a listener already and
+					 * its not this inp.
+					 */
+					SCTP_INP_DECR_REF(tinp);
+					return (EADDRINUSE);
+				} else if (tinp) {
+					SCTP_INP_DECR_REF(tinp);
+				}
+			}
+		} else {
+			/* Setup a local addr bound all */
+			memset(&store, 0, sizeof(store));
+			store.sin.sin_port = inp->sctp_lport;
+#ifdef INET6
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+				store.sa.sa_family = AF_INET6;
+				store.sa.sa_len = sizeof(struct sockaddr_in6);
+			}
+#endif
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
+				store.sa.sa_family = AF_INET;
+				store.sa.sa_len = sizeof(struct sockaddr_in);
+			}
+			tinp = sctp_pcb_findep(&sp->sa, 0, 0, inp->def_vrf_id);
+			if (tinp && (tinp != inp) &&
+			    ((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) == 0) &&
+			    ((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
+			    (tinp->sctp_socket->so_qlimit)) {
+				/*
+				 * we have a listener already and its not
+				 * this inp.
+				 */
+				SCTP_INP_DECR_REF(tinp);
+				return (EADDRINUSE);
+			} else if (tinp) {
+				SCTP_INP_DECR_REF(inp);
+			}
+		}
+	}
+	SCTP_INP_RLOCK(inp);
+#ifdef SCTP_LOCK_LOGGING
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOCK_LOGGING_ENABLE) {
+		sctp_log_lock(inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_SOCK);
+	}
+#endif
+	SOCK_LOCK(so);
+	error = solisten_proto_check(so);
+	if (error) {
+		SOCK_UNLOCK(so);
+		SCTP_INP_RUNLOCK(inp);
+		return (error);
+	}
+	if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) &&
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+		/*
+		 * The unlucky case - We are in the tcp pool with this guy.
+		 * - Someone else is in the main inp slot. - We must move
+		 * this guy (the listener) to the main slot - We must then
+		 * move the guy that was listener to the TCP Pool.
+		 */
+		if (sctp_swap_inpcb_for_listen(inp)) {
+			goto in_use;
+		}
+	}
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) {
+		/* We are already connected AND the TCP model */
+in_use:
+		SCTP_INP_RUNLOCK(inp);
+		SOCK_UNLOCK(so);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
+		return (EADDRINUSE);
+	}
+	SCTP_INP_RUNLOCK(inp);
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) {
+		/* We must do a bind. */
+		SOCK_UNLOCK(so);
+		if ((error = sctp_inpcb_bind(so, NULL, NULL, p))) {
+			/* bind error, probably perm */
+			return (error);
+		}
+		SOCK_LOCK(so);
+	}
+	/* It appears for 7.0 and on, we must always call this. */
+	solisten_proto(so, backlog);
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
+		/* remove the ACCEPTCONN flag for one-to-many sockets */
+		so->so_options &= ~SO_ACCEPTCONN;
+	}
+	if (backlog == 0) {
+		/* turning off listen */
+		so->so_options &= ~SO_ACCEPTCONN;
+	}
+	SOCK_UNLOCK(so);
+	return (error);
+}
+
+static int sctp_defered_wakeup_cnt = 0;
+
+int
+sctp_accept(struct socket *so, struct sockaddr **addr)
+{
+	struct sctp_tcb *stcb;
+	struct sctp_inpcb *inp;
+	union sctp_sockstore store;
+
+#ifdef INET6
+	int error;
+
+#endif
+	inp = (struct sctp_inpcb *)so->so_pcb;
+
+	if (inp == 0) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return (ECONNRESET);
+	}
+	SCTP_INP_RLOCK(inp);
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
+		SCTP_INP_RUNLOCK(inp);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+		return (EOPNOTSUPP);
+	}
+	if (so->so_state & SS_ISDISCONNECTED) {
+		SCTP_INP_RUNLOCK(inp);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ECONNABORTED);
+		return (ECONNABORTED);
+	}
+	stcb = LIST_FIRST(&inp->sctp_asoc_list);
+	if (stcb == NULL) {
+		SCTP_INP_RUNLOCK(inp);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return (ECONNRESET);
+	}
+	SCTP_TCB_LOCK(stcb);
+	SCTP_INP_RUNLOCK(inp);
+	store = stcb->asoc.primary_destination->ro._l_addr;
+	stcb->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE;
+	SCTP_TCB_UNLOCK(stcb);
+	switch (store.sa.sa_family) {
+	case AF_INET:
+		{
+			struct sockaddr_in *sin;
+
+			SCTP_MALLOC_SONAME(sin, struct sockaddr_in *, sizeof *sin);
+			if (sin == NULL)
+				return (ENOMEM);
+			sin->sin_family = AF_INET;
+			sin->sin_len = sizeof(*sin);
+			sin->sin_port = ((struct sockaddr_in *)&store)->sin_port;
+			sin->sin_addr = ((struct sockaddr_in *)&store)->sin_addr;
+			*addr = (struct sockaddr *)sin;
+			break;
+		}
+#ifdef INET6
+	case AF_INET6:
+		{
+			struct sockaddr_in6 *sin6;
+
+			SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof *sin6);
+			if (sin6 == NULL)
+				return (ENOMEM);
+			sin6->sin6_family = AF_INET6;
+			sin6->sin6_len = sizeof(*sin6);
+			sin6->sin6_port = ((struct sockaddr_in6 *)&store)->sin6_port;
+
+			sin6->sin6_addr = ((struct sockaddr_in6 *)&store)->sin6_addr;
+			if ((error = sa6_recoverscope(sin6)) != 0) {
+				SCTP_FREE_SONAME(sin6);
+				return (error);
+			}
+			*addr = (struct sockaddr *)sin6;
+			break;
+		}
+#endif
+	default:
+		/* TSNH */
+		break;
+	}
+	/* Wake any delayed sleep action */
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE) {
+		SCTP_INP_WLOCK(inp);
+		inp->sctp_flags &= ~SCTP_PCB_FLAGS_DONT_WAKE;
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_WAKEOUTPUT) {
+			inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEOUTPUT;
+			SCTP_INP_WUNLOCK(inp);
+			SOCKBUF_LOCK(&inp->sctp_socket->so_snd);
+			if (sowriteable(inp->sctp_socket)) {
+				sowwakeup_locked(inp->sctp_socket);
+			} else {
+				SOCKBUF_UNLOCK(&inp->sctp_socket->so_snd);
+			}
+			SCTP_INP_WLOCK(inp);
+		}
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_WAKEINPUT) {
+			inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEINPUT;
+			SCTP_INP_WUNLOCK(inp);
+			SOCKBUF_LOCK(&inp->sctp_socket->so_rcv);
+			if (soreadable(inp->sctp_socket)) {
+				sctp_defered_wakeup_cnt++;
+				sorwakeup_locked(inp->sctp_socket);
+			} else {
+				SOCKBUF_UNLOCK(&inp->sctp_socket->so_rcv);
+			}
+			SCTP_INP_WLOCK(inp);
+		}
+		SCTP_INP_WUNLOCK(inp);
+	}
+	if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+		SCTP_TCB_LOCK(stcb);
+		sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_7);
+	}
+	return (0);
+}
+
+int
+sctp_ingetaddr(struct socket *so, struct sockaddr **addr)
+{
+	struct sockaddr_in *sin;
+	uint32_t vrf_id;
+	struct sctp_inpcb *inp;
+	struct sctp_ifa *sctp_ifa;
+
+	/*
+	 * Do the malloc first in case it blocks.
+	 */
+	SCTP_MALLOC_SONAME(sin, struct sockaddr_in *, sizeof *sin);
+	if (sin == NULL)
+		return (ENOMEM);
+	sin->sin_family = AF_INET;
+	sin->sin_len = sizeof(*sin);
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (!inp) {
+		SCTP_FREE_SONAME(sin);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return ECONNRESET;
+	}
+	SCTP_INP_RLOCK(inp);
+	sin->sin_port = inp->sctp_lport;
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
+			struct sctp_tcb *stcb;
+			struct sockaddr_in *sin_a;
+			struct sctp_nets *net;
+			int fnd;
+
+			stcb = LIST_FIRST(&inp->sctp_asoc_list);
+			if (stcb == NULL) {
+				goto notConn;
+			}
+			fnd = 0;
+			sin_a = NULL;
+			SCTP_TCB_LOCK(stcb);
+			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+				sin_a = (struct sockaddr_in *)&net->ro._l_addr;
+				if (sin_a == NULL)
+					/* this will make coverity happy */
+					continue;
+
+				if (sin_a->sin_family == AF_INET) {
+					fnd = 1;
+					break;
+				}
+			}
+			if ((!fnd) || (sin_a == NULL)) {
+				/* punt */
+				SCTP_TCB_UNLOCK(stcb);
+				goto notConn;
+			}
+			vrf_id = inp->def_vrf_id;
+			sctp_ifa = sctp_source_address_selection(inp,
+			    stcb,
+			    (sctp_route_t *) & net->ro,
+			    net, 0, vrf_id);
+			if (sctp_ifa) {
+				sin->sin_addr = sctp_ifa->address.sin.sin_addr;
+				sctp_free_ifa(sctp_ifa);
+			}
+			SCTP_TCB_UNLOCK(stcb);
+		} else {
+			/* For the bound all case you get back 0 */
+	notConn:
+			sin->sin_addr.s_addr = 0;
+		}
+
+	} else {
+		/* Take the first IPv4 address in the list */
+		struct sctp_laddr *laddr;
+		int fnd = 0;
+
+		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+			if (laddr->ifa->address.sa.sa_family == AF_INET) {
+				struct sockaddr_in *sin_a;
+
+				sin_a = (struct sockaddr_in *)&laddr->ifa->address.sa;
+				sin->sin_addr = sin_a->sin_addr;
+				fnd = 1;
+				break;
+			}
+		}
+		if (!fnd) {
+			SCTP_FREE_SONAME(sin);
+			SCTP_INP_RUNLOCK(inp);
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+			return ENOENT;
+		}
+	}
+	SCTP_INP_RUNLOCK(inp);
+	(*addr) = (struct sockaddr *)sin;
+	return (0);
+}
+
+int
+sctp_peeraddr(struct socket *so, struct sockaddr **addr)
+{
+	struct sockaddr_in *sin = (struct sockaddr_in *)*addr;
+	int fnd;
+	struct sockaddr_in *sin_a;
+	struct sctp_inpcb *inp;
+	struct sctp_tcb *stcb;
+	struct sctp_nets *net;
+
+	/* Do the malloc first in case it blocks. */
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if ((inp == NULL) ||
+	    ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0)) {
+		/* UDP type and listeners will drop out here */
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
+		return (ENOTCONN);
+	}
+	SCTP_MALLOC_SONAME(sin, struct sockaddr_in *, sizeof *sin);
+	if (sin == NULL)
+		return (ENOMEM);
+	sin->sin_family = AF_INET;
+	sin->sin_len = sizeof(*sin);
+
+	/* We must recapture incase we blocked */
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (!inp) {
+		SCTP_FREE_SONAME(sin);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return ECONNRESET;
+	}
+	SCTP_INP_RLOCK(inp);
+	stcb = LIST_FIRST(&inp->sctp_asoc_list);
+	if (stcb) {
+		SCTP_TCB_LOCK(stcb);
+	}
+	SCTP_INP_RUNLOCK(inp);
+	if (stcb == NULL) {
+		SCTP_FREE_SONAME(sin);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return ECONNRESET;
+	}
+	fnd = 0;
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		sin_a = (struct sockaddr_in *)&net->ro._l_addr;
+		if (sin_a->sin_family == AF_INET) {
+			fnd = 1;
+			sin->sin_port = stcb->rport;
+			sin->sin_addr = sin_a->sin_addr;
+			break;
+		}
+	}
+	SCTP_TCB_UNLOCK(stcb);
+	if (!fnd) {
+		/* No IPv4 address */
+		SCTP_FREE_SONAME(sin);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+		return ENOENT;
+	}
+	(*addr) = (struct sockaddr *)sin;
+	return (0);
+}
+
+struct pr_usrreqs sctp_usrreqs = {
+	.pru_abort = sctp_abort,
+	.pru_accept = sctp_accept,
+	.pru_attach = sctp_attach,
+	.pru_bind = sctp_bind,
+	.pru_connect = sctp_connect,
+	.pru_control = in_control,
+	.pru_close = sctp_close,
+	.pru_detach = sctp_close,
+	.pru_sopoll = sopoll_generic,
+	.pru_flush = sctp_flush,
+	.pru_disconnect = sctp_disconnect,
+	.pru_listen = sctp_listen,
+	.pru_peeraddr = sctp_peeraddr,
+	.pru_send = sctp_sendm,
+	.pru_shutdown = sctp_shutdown,
+	.pru_sockaddr = sctp_ingetaddr,
+	.pru_sosend = sctp_sosend,
+	.pru_soreceive = sctp_soreceive
+};
diff --git a/freebsd/sys/netinet/sctp_var.h b/freebsd/sys/netinet/sctp_var.h
new file mode 100644
index 00000000..93b92038
--- /dev/null
+++ b/freebsd/sys/netinet/sctp_var.h
@@ -0,0 +1,336 @@
+/*-
+ * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_var.h,v 1.24 2005/03/06 16:04:19 itojun Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifndef _NETINET_SCTP_VAR_HH_
+#define _NETINET_SCTP_VAR_HH_
+
+#include <freebsd/netinet/sctp_uio.h>
+
+#if defined(_KERNEL) || defined(__Userspace__)
+
+extern struct pr_usrreqs sctp_usrreqs;
+
+
+#define sctp_feature_on(inp, feature)  (inp->sctp_features |= feature)
+#define sctp_feature_off(inp, feature) (inp->sctp_features &= ~feature)
+#define sctp_is_feature_on(inp, feature) ((inp->sctp_features & feature) == feature)
+#define sctp_is_feature_off(inp, feature) ((inp->sctp_features & feature) == 0)
+
+
+/* managing mobility_feature in inpcb (by micchie) */
+#define sctp_mobility_feature_on(inp, feature)  (inp->sctp_mobility_features |= feature)
+#define sctp_mobility_feature_off(inp, feature) (inp->sctp_mobility_features &= ~feature)
+#define sctp_is_mobility_feature_on(inp, feature) (inp->sctp_mobility_features & feature)
+#define sctp_is_mobility_feature_off(inp, feature) ((inp->sctp_mobility_features & feature) == 0)
+
+#define sctp_maxspace(sb) (max((sb)->sb_hiwat,SCTP_MINIMAL_RWND))
+
+#define	sctp_sbspace(asoc, sb) ((long) ((sctp_maxspace(sb) > (asoc)->sb_cc) ? (sctp_maxspace(sb) - (asoc)->sb_cc) : 0))
+
+#define	sctp_sbspace_failedmsgs(sb) ((long) ((sctp_maxspace(sb) > (sb)->sb_cc) ? (sctp_maxspace(sb) - (sb)->sb_cc) : 0))
+
+#define sctp_sbspace_sub(a,b) ((a > b) ? (a - b) : 0)
+
+/*
+ * I tried to cache the readq entries at one point. But the reality
+ * is that it did not add any performance since this meant we had to
+ * lock the STCB on read. And at that point once you have to do an
+ * extra lock, it really does not matter if the lock is in the ZONE
+ * stuff or in our code. Note that this same problem would occur with
+ * an mbuf cache as well so it is not really worth doing, at least
+ * right now :-D
+ */
+
+#define sctp_free_a_readq(_stcb, _readq) { \
+	SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), (_readq)); \
+	SCTP_DECR_READQ_COUNT(); \
+}
+
+#define sctp_alloc_a_readq(_stcb, _readq) { \
+	(_readq) = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_readq), struct sctp_queued_to_read); \
+	if ((_readq)) { \
+ 	     SCTP_INCR_READQ_COUNT(); \
+	} \
+}
+
+#define sctp_free_a_strmoq(_stcb, _strmoq) { \
+	if ((_strmoq)->holds_key_ref) { \
+		sctp_auth_key_release(stcb, sp->auth_keyid); \
+		(_strmoq)->holds_key_ref = 0; \
+	} \
+	SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_strmoq), (_strmoq)); \
+	SCTP_DECR_STRMOQ_COUNT(); \
+}
+
+#define sctp_alloc_a_strmoq(_stcb, _strmoq) { \
+	(_strmoq) = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_strmoq), struct sctp_stream_queue_pending); \
+         if ((_strmoq)) {			  \
+		memset(_strmoq, 0, sizeof(struct sctp_stream_queue_pending)); \
+		SCTP_INCR_STRMOQ_COUNT(); \
+		(_strmoq)->holds_key_ref = 0; \
+ 	} \
+}
+
+#define sctp_free_a_chunk(_stcb, _chk) { \
+	if ((_chk)->holds_key_ref) {\
+		sctp_auth_key_release((_stcb), (_chk)->auth_keyid); \
+		(_chk)->holds_key_ref = 0; \
+	} \
+        if (_stcb) { \
+          SCTP_TCB_LOCK_ASSERT((_stcb)); \
+          if ((_chk)->whoTo) { \
+                  sctp_free_remote_addr((_chk)->whoTo); \
+                  (_chk)->whoTo = NULL; \
+          } \
+          if (((_stcb)->asoc.free_chunk_cnt > SCTP_BASE_SYSCTL(sctp_asoc_free_resc_limit)) || \
+               (SCTP_BASE_INFO(ipi_free_chunks) > SCTP_BASE_SYSCTL(sctp_system_free_resc_limit))) { \
+	 	SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), (_chk)); \
+	 	SCTP_DECR_CHK_COUNT(); \
+	  } else { \
+	 	TAILQ_INSERT_TAIL(&(_stcb)->asoc.free_chunks, (_chk), sctp_next); \
+	 	(_stcb)->asoc.free_chunk_cnt++; \
+	 	atomic_add_int(&SCTP_BASE_INFO(ipi_free_chunks), 1); \
+          } \
+        } else { \
+		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), (_chk)); \
+		SCTP_DECR_CHK_COUNT(); \
+	} \
+}
+
+#define sctp_alloc_a_chunk(_stcb, _chk) { \
+	if (TAILQ_EMPTY(&(_stcb)->asoc.free_chunks))  { \
+		(_chk) = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_chunk), struct sctp_tmit_chunk); \
+		if ((_chk)) { \
+			SCTP_INCR_CHK_COUNT(); \
+                        (_chk)->whoTo = NULL; \
+			(_chk)->holds_key_ref = 0; \
+		} \
+	} else { \
+		(_chk) = TAILQ_FIRST(&(_stcb)->asoc.free_chunks); \
+		TAILQ_REMOVE(&(_stcb)->asoc.free_chunks, (_chk), sctp_next); \
+		atomic_subtract_int(&SCTP_BASE_INFO(ipi_free_chunks), 1); \
+		(_chk)->holds_key_ref = 0; \
+                SCTP_STAT_INCR(sctps_cached_chk); \
+		(_stcb)->asoc.free_chunk_cnt--; \
+	} \
+}
+
+
+#define sctp_free_remote_addr(__net) { \
+	if ((__net)) {  \
+		if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&(__net)->ref_count)) { \
+			(void)SCTP_OS_TIMER_STOP(&(__net)->rxt_timer.timer); \
+			(void)SCTP_OS_TIMER_STOP(&(__net)->pmtu_timer.timer); \
+			(void)SCTP_OS_TIMER_STOP(&(__net)->fr_timer.timer); \
+                        if ((__net)->ro.ro_rt) { \
+				RTFREE((__net)->ro.ro_rt); \
+				(__net)->ro.ro_rt = NULL; \
+                        } \
+			if ((__net)->src_addr_selected) { \
+				sctp_free_ifa((__net)->ro._s_addr); \
+				(__net)->ro._s_addr = NULL; \
+			} \
+                        (__net)->src_addr_selected = 0; \
+			(__net)->dest_state = SCTP_ADDR_NOT_REACHABLE; \
+			SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_net), (__net)); \
+			SCTP_DECR_RADDR_COUNT(); \
+		} \
+	} \
+}
+
+#define sctp_sbfree(ctl, stcb, sb, m) { \
+	SCTP_SAVE_ATOMIC_DECREMENT(&(sb)->sb_cc, SCTP_BUF_LEN((m))); \
+	SCTP_SAVE_ATOMIC_DECREMENT(&(sb)->sb_mbcnt, MSIZE); \
+	if (((ctl)->do_not_ref_stcb == 0) && stcb) {\
+		SCTP_SAVE_ATOMIC_DECREMENT(&(stcb)->asoc.sb_cc, SCTP_BUF_LEN((m))); \
+		SCTP_SAVE_ATOMIC_DECREMENT(&(stcb)->asoc.my_rwnd_control_len, MSIZE); \
+	} \
+	if (SCTP_BUF_TYPE(m) != MT_DATA && SCTP_BUF_TYPE(m) != MT_HEADER && \
+	    SCTP_BUF_TYPE(m) != MT_OOBDATA) \
+		atomic_subtract_int(&(sb)->sb_ctl,SCTP_BUF_LEN((m))); \
+}
+
+#define sctp_sballoc(stcb, sb, m) { \
+	atomic_add_int(&(sb)->sb_cc,SCTP_BUF_LEN((m))); \
+	atomic_add_int(&(sb)->sb_mbcnt, MSIZE); \
+	if (stcb) { \
+		atomic_add_int(&(stcb)->asoc.sb_cc,SCTP_BUF_LEN((m))); \
+		atomic_add_int(&(stcb)->asoc.my_rwnd_control_len, MSIZE); \
+	} \
+	if (SCTP_BUF_TYPE(m) != MT_DATA && SCTP_BUF_TYPE(m) != MT_HEADER && \
+	    SCTP_BUF_TYPE(m) != MT_OOBDATA) \
+		atomic_add_int(&(sb)->sb_ctl,SCTP_BUF_LEN((m))); \
+}
+
+
+#define sctp_ucount_incr(val) { \
+	val++; \
+}
+
+#define sctp_ucount_decr(val) { \
+	if (val > 0) { \
+		val--; \
+	} else { \
+		val = 0; \
+	} \
+}
+
+#define sctp_mbuf_crush(data) do { \
+	struct mbuf *_m; \
+	_m = (data); \
+	while(_m && (SCTP_BUF_LEN(_m) == 0)) { \
+		(data)  = SCTP_BUF_NEXT(_m); \
+		SCTP_BUF_NEXT(_m) = NULL; \
+		sctp_m_free(_m); \
+		_m = (data); \
+	} \
+} while (0)
+
+#define sctp_flight_size_decrease(tp1) do { \
+	if (tp1->whoTo->flight_size >= tp1->book_size) \
+		tp1->whoTo->flight_size -= tp1->book_size; \
+	else \
+		tp1->whoTo->flight_size = 0; \
+} while (0)
+
+#define sctp_flight_size_increase(tp1) do { \
+       (tp1)->whoTo->flight_size += (tp1)->book_size; \
+} while (0)
+
+#ifdef SCTP_FS_SPEC_LOG
+#define sctp_total_flight_decrease(stcb, tp1) do { \
+        if (stcb->asoc.fs_index > SCTP_FS_SPEC_LOG_SIZE) \
+		stcb->asoc.fs_index = 0;\
+	stcb->asoc.fslog[stcb->asoc.fs_index].total_flight = stcb->asoc.total_flight; \
+	stcb->asoc.fslog[stcb->asoc.fs_index].tsn = tp1->rec.data.TSN_seq; \
+	stcb->asoc.fslog[stcb->asoc.fs_index].book = tp1->book_size; \
+	stcb->asoc.fslog[stcb->asoc.fs_index].sent = tp1->sent; \
+	stcb->asoc.fslog[stcb->asoc.fs_index].incr = 0; \
+	stcb->asoc.fslog[stcb->asoc.fs_index].decr = 1; \
+	stcb->asoc.fs_index++; \
+        tp1->window_probe = 0; \
+	if (stcb->asoc.total_flight >= tp1->book_size) { \
+		stcb->asoc.total_flight -= tp1->book_size; \
+		if (stcb->asoc.total_flight_count > 0) \
+			stcb->asoc.total_flight_count--; \
+	} else { \
+		stcb->asoc.total_flight = 0; \
+		stcb->asoc.total_flight_count = 0; \
+	} \
+} while (0)
+
+#define sctp_total_flight_increase(stcb, tp1) do { \
+        if (stcb->asoc.fs_index > SCTP_FS_SPEC_LOG_SIZE) \
+		stcb->asoc.fs_index = 0;\
+	stcb->asoc.fslog[stcb->asoc.fs_index].total_flight = stcb->asoc.total_flight; \
+	stcb->asoc.fslog[stcb->asoc.fs_index].tsn = tp1->rec.data.TSN_seq; \
+	stcb->asoc.fslog[stcb->asoc.fs_index].book = tp1->book_size; \
+	stcb->asoc.fslog[stcb->asoc.fs_index].sent = tp1->sent; \
+	stcb->asoc.fslog[stcb->asoc.fs_index].incr = 1; \
+	stcb->asoc.fslog[stcb->asoc.fs_index].decr = 0; \
+	stcb->asoc.fs_index++; \
+       (stcb)->asoc.total_flight_count++; \
+       (stcb)->asoc.total_flight += (tp1)->book_size; \
+} while (0)
+
+#else
+
+#define sctp_total_flight_decrease(stcb, tp1) do { \
+        tp1->window_probe = 0; \
+	if (stcb->asoc.total_flight >= tp1->book_size) { \
+		stcb->asoc.total_flight -= tp1->book_size; \
+		if (stcb->asoc.total_flight_count > 0) \
+			stcb->asoc.total_flight_count--; \
+	} else { \
+		stcb->asoc.total_flight = 0; \
+		stcb->asoc.total_flight_count = 0; \
+	} \
+} while (0)
+
+#define sctp_total_flight_increase(stcb, tp1) do { \
+       (stcb)->asoc.total_flight_count++; \
+       (stcb)->asoc.total_flight += (tp1)->book_size; \
+} while (0)
+
+#endif
+
+
+struct sctp_nets;
+struct sctp_inpcb;
+struct sctp_tcb;
+struct sctphdr;
+
+
+void sctp_close(struct socket *so);
+int sctp_disconnect(struct socket *so);
+
+void sctp_ctlinput __P((int, struct sockaddr *, void *));
+int sctp_ctloutput __P((struct socket *, struct sockopt *));
+void sctp_input_with_port __P((struct mbuf *, int, uint16_t));
+void sctp_input __P((struct mbuf *, int));
+void sctp_pathmtu_adjustment __P((struct sctp_inpcb *, struct sctp_tcb *, struct sctp_nets *, uint16_t));
+void sctp_drain __P((void));
+void sctp_init __P((void));
+
+void sctp_finish(void);
+
+int sctp_flush(struct socket *, int);
+int sctp_shutdown __P((struct socket *));
+void sctp_notify 
+__P((struct sctp_inpcb *, struct ip *ip, struct sctphdr *,
+    struct sockaddr *, struct sctp_tcb *,
+    struct sctp_nets *));
+
+	int sctp_bindx(struct socket *, int, struct sockaddr_storage *,
+        int, int, struct proc *);
+
+/* can't use sctp_assoc_t here */
+	int sctp_peeloff(struct socket *, struct socket *, int, caddr_t, int *);
+
+	int sctp_ingetaddr(struct socket *,
+        struct sockaddr **
+);
+
+	int sctp_peeraddr(struct socket *,
+        struct sockaddr **
+);
+
+	int sctp_listen(struct socket *, int, struct thread *);
+
+	int sctp_accept(struct socket *, struct sockaddr **);
+
+#endif				/* _KERNEL */
+
+#endif				/* !_NETINET_SCTP_VAR_HH_ */
diff --git a/freebsd/sys/netinet/sctputil.c b/freebsd/sys/netinet/sctputil.c
new file mode 100644
index 00000000..7e8ac1ea
--- /dev/null
+++ b/freebsd/sys/netinet/sctputil.c
@@ -0,0 +1,6977 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctputil.c,v 1.37 2005/03/07 23:26:09 itojun Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/netinet/sctp_os.h>
+#include <freebsd/netinet/sctp_pcb.h>
+#include <freebsd/netinet/sctputil.h>
+#include <freebsd/netinet/sctp_var.h>
+#include <freebsd/netinet/sctp_sysctl.h>
+#ifdef INET6
+#endif
+#include <freebsd/netinet/sctp_header.h>
+#include <freebsd/netinet/sctp_output.h>
+#include <freebsd/netinet/sctp_uio.h>
+#include <freebsd/netinet/sctp_timer.h>
+#include <freebsd/netinet/sctp_indata.h>/* for sctp_deliver_data() */
+#include <freebsd/netinet/sctp_auth.h>
+#include <freebsd/netinet/sctp_asconf.h>
+#include <freebsd/netinet/sctp_cc_functions.h>
+#include <freebsd/netinet/sctp_bsd_addr.h>
+
+
+#ifndef KTR_SCTP
+#define KTR_SCTP KTR_SUBSYS
+#endif
+
+void
+sctp_sblog(struct sockbuf *sb,
+    struct sctp_tcb *stcb, int from, int incr)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.sb.stcb = stcb;
+	sctp_clog.x.sb.so_sbcc = sb->sb_cc;
+	if (stcb)
+		sctp_clog.x.sb.stcb_sbcc = stcb->asoc.sb_cc;
+	else
+		sctp_clog.x.sb.stcb_sbcc = 0;
+	sctp_clog.x.sb.incr = incr;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_SB,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+}
+
+void
+sctp_log_closing(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int16_t loc)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.close.inp = (void *)inp;
+	sctp_clog.x.close.sctp_flags = inp->sctp_flags;
+	if (stcb) {
+		sctp_clog.x.close.stcb = (void *)stcb;
+		sctp_clog.x.close.state = (uint16_t) stcb->asoc.state;
+	} else {
+		sctp_clog.x.close.stcb = 0;
+		sctp_clog.x.close.state = 0;
+	}
+	sctp_clog.x.close.loc = loc;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_CLOSE,
+	    0,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+}
+
+
+void
+rto_logging(struct sctp_nets *net, int from)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	memset(&sctp_clog, 0, sizeof(sctp_clog));
+	sctp_clog.x.rto.net = (void *)net;
+	sctp_clog.x.rto.rtt = net->prev_rtt;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_RTT,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_log_strm_del_alt(struct sctp_tcb *stcb, uint32_t tsn, uint16_t sseq, uint16_t stream, int from)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.strlog.stcb = stcb;
+	sctp_clog.x.strlog.n_tsn = tsn;
+	sctp_clog.x.strlog.n_sseq = sseq;
+	sctp_clog.x.strlog.e_tsn = 0;
+	sctp_clog.x.strlog.e_sseq = 0;
+	sctp_clog.x.strlog.strm = stream;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_STRM,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_log_nagle_event(struct sctp_tcb *stcb, int action)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.nagle.stcb = (void *)stcb;
+	sctp_clog.x.nagle.total_flight = stcb->asoc.total_flight;
+	sctp_clog.x.nagle.total_in_queue = stcb->asoc.total_output_queue_size;
+	sctp_clog.x.nagle.count_in_queue = stcb->asoc.chunks_on_out_queue;
+	sctp_clog.x.nagle.count_in_flight = stcb->asoc.total_flight_count;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_NAGLE,
+	    action,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+}
+
+
+void
+sctp_log_sack(uint32_t old_cumack, uint32_t cumack, uint32_t tsn, uint16_t gaps, uint16_t dups, int from)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.sack.cumack = cumack;
+	sctp_clog.x.sack.oldcumack = old_cumack;
+	sctp_clog.x.sack.tsn = tsn;
+	sctp_clog.x.sack.numGaps = gaps;
+	sctp_clog.x.sack.numDups = dups;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_SACK,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+}
+
+void
+sctp_log_map(uint32_t map, uint32_t cum, uint32_t high, int from)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	memset(&sctp_clog, 0, sizeof(sctp_clog));
+	sctp_clog.x.map.base = map;
+	sctp_clog.x.map.cum = cum;
+	sctp_clog.x.map.high = high;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_MAP,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+}
+
+void
+sctp_log_fr(uint32_t biggest_tsn, uint32_t biggest_new_tsn, uint32_t tsn,
+    int from)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	memset(&sctp_clog, 0, sizeof(sctp_clog));
+	sctp_clog.x.fr.largest_tsn = biggest_tsn;
+	sctp_clog.x.fr.largest_new_tsn = biggest_new_tsn;
+	sctp_clog.x.fr.tsn = tsn;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_FR,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+
+}
+
+
+void
+sctp_log_mb(struct mbuf *m, int from)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.mb.mp = m;
+	sctp_clog.x.mb.mbuf_flags = (uint8_t) (SCTP_BUF_GET_FLAGS(m));
+	sctp_clog.x.mb.size = (uint16_t) (SCTP_BUF_LEN(m));
+	sctp_clog.x.mb.data = SCTP_BUF_AT(m, 0);
+	if (SCTP_BUF_IS_EXTENDED(m)) {
+		sctp_clog.x.mb.ext = SCTP_BUF_EXTEND_BASE(m);
+		sctp_clog.x.mb.refcnt = (uint8_t) (SCTP_BUF_EXTEND_REFCNT(m));
+	} else {
+		sctp_clog.x.mb.ext = 0;
+		sctp_clog.x.mb.refcnt = 0;
+	}
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_MBUF,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+}
+
+
+void
+sctp_log_strm_del(struct sctp_queued_to_read *control, struct sctp_queued_to_read *poschk,
+    int from)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	if (control == NULL) {
+		SCTP_PRINTF("Gak log of NULL?\n");
+		return;
+	}
+	sctp_clog.x.strlog.stcb = control->stcb;
+	sctp_clog.x.strlog.n_tsn = control->sinfo_tsn;
+	sctp_clog.x.strlog.n_sseq = control->sinfo_ssn;
+	sctp_clog.x.strlog.strm = control->sinfo_stream;
+	if (poschk != NULL) {
+		sctp_clog.x.strlog.e_tsn = poschk->sinfo_tsn;
+		sctp_clog.x.strlog.e_sseq = poschk->sinfo_ssn;
+	} else {
+		sctp_clog.x.strlog.e_tsn = 0;
+		sctp_clog.x.strlog.e_sseq = 0;
+	}
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_STRM,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_log_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net, int augment, uint8_t from)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.cwnd.net = net;
+	if (stcb->asoc.send_queue_cnt > 255)
+		sctp_clog.x.cwnd.cnt_in_send = 255;
+	else
+		sctp_clog.x.cwnd.cnt_in_send = stcb->asoc.send_queue_cnt;
+	if (stcb->asoc.stream_queue_cnt > 255)
+		sctp_clog.x.cwnd.cnt_in_str = 255;
+	else
+		sctp_clog.x.cwnd.cnt_in_str = stcb->asoc.stream_queue_cnt;
+
+	if (net) {
+		sctp_clog.x.cwnd.cwnd_new_value = net->cwnd;
+		sctp_clog.x.cwnd.inflight = net->flight_size;
+		sctp_clog.x.cwnd.pseudo_cumack = net->pseudo_cumack;
+		sctp_clog.x.cwnd.meets_pseudo_cumack = net->new_pseudo_cumack;
+		sctp_clog.x.cwnd.need_new_pseudo_cumack = net->find_pseudo_cumack;
+	}
+	if (SCTP_CWNDLOG_PRESEND == from) {
+		sctp_clog.x.cwnd.meets_pseudo_cumack = stcb->asoc.peers_rwnd;
+	}
+	sctp_clog.x.cwnd.cwnd_augment = augment;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_CWND,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_log_lock(struct sctp_inpcb *inp, struct sctp_tcb *stcb, uint8_t from)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	memset(&sctp_clog, 0, sizeof(sctp_clog));
+	if (inp) {
+		sctp_clog.x.lock.sock = (void *)inp->sctp_socket;
+
+	} else {
+		sctp_clog.x.lock.sock = (void *)NULL;
+	}
+	sctp_clog.x.lock.inp = (void *)inp;
+	if (stcb) {
+		sctp_clog.x.lock.tcb_lock = mtx_owned(&stcb->tcb_mtx);
+	} else {
+		sctp_clog.x.lock.tcb_lock = SCTP_LOCK_UNKNOWN;
+	}
+	if (inp) {
+		sctp_clog.x.lock.inp_lock = mtx_owned(&inp->inp_mtx);
+		sctp_clog.x.lock.create_lock = mtx_owned(&inp->inp_create_mtx);
+	} else {
+		sctp_clog.x.lock.inp_lock = SCTP_LOCK_UNKNOWN;
+		sctp_clog.x.lock.create_lock = SCTP_LOCK_UNKNOWN;
+	}
+	sctp_clog.x.lock.info_lock = rw_wowned(&SCTP_BASE_INFO(ipi_ep_mtx));
+	if (inp && (inp->sctp_socket)) {
+		sctp_clog.x.lock.sock_lock = mtx_owned(&(inp->sctp_socket->so_rcv.sb_mtx));
+		sctp_clog.x.lock.sockrcvbuf_lock = mtx_owned(&(inp->sctp_socket->so_rcv.sb_mtx));
+		sctp_clog.x.lock.socksndbuf_lock = mtx_owned(&(inp->sctp_socket->so_snd.sb_mtx));
+	} else {
+		sctp_clog.x.lock.sock_lock = SCTP_LOCK_UNKNOWN;
+		sctp_clog.x.lock.sockrcvbuf_lock = SCTP_LOCK_UNKNOWN;
+		sctp_clog.x.lock.socksndbuf_lock = SCTP_LOCK_UNKNOWN;
+	}
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_LOCK_EVENT,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_log_maxburst(struct sctp_tcb *stcb, struct sctp_nets *net, int error, int burst, uint8_t from)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	memset(&sctp_clog, 0, sizeof(sctp_clog));
+	sctp_clog.x.cwnd.net = net;
+	sctp_clog.x.cwnd.cwnd_new_value = error;
+	sctp_clog.x.cwnd.inflight = net->flight_size;
+	sctp_clog.x.cwnd.cwnd_augment = burst;
+	if (stcb->asoc.send_queue_cnt > 255)
+		sctp_clog.x.cwnd.cnt_in_send = 255;
+	else
+		sctp_clog.x.cwnd.cnt_in_send = stcb->asoc.send_queue_cnt;
+	if (stcb->asoc.stream_queue_cnt > 255)
+		sctp_clog.x.cwnd.cnt_in_str = 255;
+	else
+		sctp_clog.x.cwnd.cnt_in_str = stcb->asoc.stream_queue_cnt;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_MAXBURST,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_log_rwnd(uint8_t from, uint32_t peers_rwnd, uint32_t snd_size, uint32_t overhead)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.rwnd.rwnd = peers_rwnd;
+	sctp_clog.x.rwnd.send_size = snd_size;
+	sctp_clog.x.rwnd.overhead = overhead;
+	sctp_clog.x.rwnd.new_rwnd = 0;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_RWND,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+}
+
+void
+sctp_log_rwnd_set(uint8_t from, uint32_t peers_rwnd, uint32_t flight_size, uint32_t overhead, uint32_t a_rwndval)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.rwnd.rwnd = peers_rwnd;
+	sctp_clog.x.rwnd.send_size = flight_size;
+	sctp_clog.x.rwnd.overhead = overhead;
+	sctp_clog.x.rwnd.new_rwnd = a_rwndval;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_RWND,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+}
+
+void
+sctp_log_mbcnt(uint8_t from, uint32_t total_oq, uint32_t book, uint32_t total_mbcnt_q, uint32_t mbcnt)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.mbcnt.total_queue_size = total_oq;
+	sctp_clog.x.mbcnt.size_change = book;
+	sctp_clog.x.mbcnt.total_queue_mb_size = total_mbcnt_q;
+	sctp_clog.x.mbcnt.mbcnt_change = mbcnt;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_MBCNT,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_misc_ints(uint8_t from, uint32_t a, uint32_t b, uint32_t c, uint32_t d)
+{
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_MISC_EVENT,
+	    from,
+	    a, b, c, d);
+}
+
+void
+sctp_wakeup_log(struct sctp_tcb *stcb, uint32_t cumtsn, uint32_t wake_cnt, int from)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.wake.stcb = (void *)stcb;
+	sctp_clog.x.wake.wake_cnt = wake_cnt;
+	sctp_clog.x.wake.flight = stcb->asoc.total_flight_count;
+	sctp_clog.x.wake.send_q = stcb->asoc.send_queue_cnt;
+	sctp_clog.x.wake.sent_q = stcb->asoc.sent_queue_cnt;
+
+	if (stcb->asoc.stream_queue_cnt < 0xff)
+		sctp_clog.x.wake.stream_qcnt = (uint8_t) stcb->asoc.stream_queue_cnt;
+	else
+		sctp_clog.x.wake.stream_qcnt = 0xff;
+
+	if (stcb->asoc.chunks_on_out_queue < 0xff)
+		sctp_clog.x.wake.chunks_on_oque = (uint8_t) stcb->asoc.chunks_on_out_queue;
+	else
+		sctp_clog.x.wake.chunks_on_oque = 0xff;
+
+	sctp_clog.x.wake.sctpflags = 0;
+	/* set in the defered mode stuff */
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE)
+		sctp_clog.x.wake.sctpflags |= 1;
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_WAKEOUTPUT)
+		sctp_clog.x.wake.sctpflags |= 2;
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_WAKEINPUT)
+		sctp_clog.x.wake.sctpflags |= 4;
+	/* what about the sb */
+	if (stcb->sctp_socket) {
+		struct socket *so = stcb->sctp_socket;
+
+		sctp_clog.x.wake.sbflags = (uint8_t) ((so->so_snd.sb_flags & 0x00ff));
+	} else {
+		sctp_clog.x.wake.sbflags = 0xff;
+	}
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_WAKE,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_log_block(uint8_t from, struct socket *so, struct sctp_association *asoc, int sendlen)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.blk.onsb = asoc->total_output_queue_size;
+	sctp_clog.x.blk.send_sent_qcnt = (uint16_t) (asoc->send_queue_cnt + asoc->sent_queue_cnt);
+	sctp_clog.x.blk.peer_rwnd = asoc->peers_rwnd;
+	sctp_clog.x.blk.stream_qcnt = (uint16_t) asoc->stream_queue_cnt;
+	sctp_clog.x.blk.chunks_on_oque = (uint16_t) asoc->chunks_on_out_queue;
+	sctp_clog.x.blk.flight_size = (uint16_t) (asoc->total_flight / 1024);
+	sctp_clog.x.blk.sndlen = sendlen;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_BLOCK,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+
+}
+
+int
+sctp_fill_stat_log(void *optval, size_t *optsize)
+{
+	/* May need to fix this if ktrdump does not work */
+	return (0);
+}
+
+#ifdef SCTP_AUDITING_ENABLED
+uint8_t sctp_audit_data[SCTP_AUDIT_SIZE][2];
+static int sctp_audit_indx = 0;
+
+static
+void
+sctp_print_audit_report(void)
+{
+	int i;
+	int cnt;
+
+	cnt = 0;
+	for (i = sctp_audit_indx; i < SCTP_AUDIT_SIZE; i++) {
+		if ((sctp_audit_data[i][0] == 0xe0) &&
+		    (sctp_audit_data[i][1] == 0x01)) {
+			cnt = 0;
+			SCTP_PRINTF("\n");
+		} else if (sctp_audit_data[i][0] == 0xf0) {
+			cnt = 0;
+			SCTP_PRINTF("\n");
+		} else if ((sctp_audit_data[i][0] == 0xc0) &&
+		    (sctp_audit_data[i][1] == 0x01)) {
+			SCTP_PRINTF("\n");
+			cnt = 0;
+		}
+		SCTP_PRINTF("%2.2x%2.2x ", (uint32_t) sctp_audit_data[i][0],
+		    (uint32_t) sctp_audit_data[i][1]);
+		cnt++;
+		if ((cnt % 14) == 0)
+			SCTP_PRINTF("\n");
+	}
+	for (i = 0; i < sctp_audit_indx; i++) {
+		if ((sctp_audit_data[i][0] == 0xe0) &&
+		    (sctp_audit_data[i][1] == 0x01)) {
+			cnt = 0;
+			SCTP_PRINTF("\n");
+		} else if (sctp_audit_data[i][0] == 0xf0) {
+			cnt = 0;
+			SCTP_PRINTF("\n");
+		} else if ((sctp_audit_data[i][0] == 0xc0) &&
+		    (sctp_audit_data[i][1] == 0x01)) {
+			SCTP_PRINTF("\n");
+			cnt = 0;
+		}
+		SCTP_PRINTF("%2.2x%2.2x ", (uint32_t) sctp_audit_data[i][0],
+		    (uint32_t) sctp_audit_data[i][1]);
+		cnt++;
+		if ((cnt % 14) == 0)
+			SCTP_PRINTF("\n");
+	}
+	SCTP_PRINTF("\n");
+}
+
+void
+sctp_auditing(int from, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	int resend_cnt, tot_out, rep, tot_book_cnt;
+	struct sctp_nets *lnet;
+	struct sctp_tmit_chunk *chk;
+
+	sctp_audit_data[sctp_audit_indx][0] = 0xAA;
+	sctp_audit_data[sctp_audit_indx][1] = 0x000000ff & from;
+	sctp_audit_indx++;
+	if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+		sctp_audit_indx = 0;
+	}
+	if (inp == NULL) {
+		sctp_audit_data[sctp_audit_indx][0] = 0xAF;
+		sctp_audit_data[sctp_audit_indx][1] = 0x01;
+		sctp_audit_indx++;
+		if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+			sctp_audit_indx = 0;
+		}
+		return;
+	}
+	if (stcb == NULL) {
+		sctp_audit_data[sctp_audit_indx][0] = 0xAF;
+		sctp_audit_data[sctp_audit_indx][1] = 0x02;
+		sctp_audit_indx++;
+		if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+			sctp_audit_indx = 0;
+		}
+		return;
+	}
+	sctp_audit_data[sctp_audit_indx][0] = 0xA1;
+	sctp_audit_data[sctp_audit_indx][1] =
+	    (0x000000ff & stcb->asoc.sent_queue_retran_cnt);
+	sctp_audit_indx++;
+	if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+		sctp_audit_indx = 0;
+	}
+	rep = 0;
+	tot_book_cnt = 0;
+	resend_cnt = tot_out = 0;
+	TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
+		if (chk->sent == SCTP_DATAGRAM_RESEND) {
+			resend_cnt++;
+		} else if (chk->sent < SCTP_DATAGRAM_RESEND) {
+			tot_out += chk->book_size;
+			tot_book_cnt++;
+		}
+	}
+	if (resend_cnt != stcb->asoc.sent_queue_retran_cnt) {
+		sctp_audit_data[sctp_audit_indx][0] = 0xAF;
+		sctp_audit_data[sctp_audit_indx][1] = 0xA1;
+		sctp_audit_indx++;
+		if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+			sctp_audit_indx = 0;
+		}
+		SCTP_PRINTF("resend_cnt:%d asoc-tot:%d\n",
+		    resend_cnt, stcb->asoc.sent_queue_retran_cnt);
+		rep = 1;
+		stcb->asoc.sent_queue_retran_cnt = resend_cnt;
+		sctp_audit_data[sctp_audit_indx][0] = 0xA2;
+		sctp_audit_data[sctp_audit_indx][1] =
+		    (0x000000ff & stcb->asoc.sent_queue_retran_cnt);
+		sctp_audit_indx++;
+		if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+			sctp_audit_indx = 0;
+		}
+	}
+	if (tot_out != stcb->asoc.total_flight) {
+		sctp_audit_data[sctp_audit_indx][0] = 0xAF;
+		sctp_audit_data[sctp_audit_indx][1] = 0xA2;
+		sctp_audit_indx++;
+		if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+			sctp_audit_indx = 0;
+		}
+		rep = 1;
+		SCTP_PRINTF("tot_flt:%d asoc_tot:%d\n", tot_out,
+		    (int)stcb->asoc.total_flight);
+		stcb->asoc.total_flight = tot_out;
+	}
+	if (tot_book_cnt != stcb->asoc.total_flight_count) {
+		sctp_audit_data[sctp_audit_indx][0] = 0xAF;
+		sctp_audit_data[sctp_audit_indx][1] = 0xA5;
+		sctp_audit_indx++;
+		if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+			sctp_audit_indx = 0;
+		}
+		rep = 1;
+		SCTP_PRINTF("tot_flt_book:%d\n", tot_book_cnt);
+
+		stcb->asoc.total_flight_count = tot_book_cnt;
+	}
+	tot_out = 0;
+	TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
+		tot_out += lnet->flight_size;
+	}
+	if (tot_out != stcb->asoc.total_flight) {
+		sctp_audit_data[sctp_audit_indx][0] = 0xAF;
+		sctp_audit_data[sctp_audit_indx][1] = 0xA3;
+		sctp_audit_indx++;
+		if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+			sctp_audit_indx = 0;
+		}
+		rep = 1;
+		SCTP_PRINTF("real flight:%d net total was %d\n",
+		    stcb->asoc.total_flight, tot_out);
+		/* now corrective action */
+		TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
+
+			tot_out = 0;
+			TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
+				if ((chk->whoTo == lnet) &&
+				    (chk->sent < SCTP_DATAGRAM_RESEND)) {
+					tot_out += chk->book_size;
+				}
+			}
+			if (lnet->flight_size != tot_out) {
+				SCTP_PRINTF("net:%p flight was %d corrected to %d\n",
+				    lnet, lnet->flight_size,
+				    tot_out);
+				lnet->flight_size = tot_out;
+			}
+		}
+	}
+	if (rep) {
+		sctp_print_audit_report();
+	}
+}
+
+void
+sctp_audit_log(uint8_t ev, uint8_t fd)
+{
+
+	sctp_audit_data[sctp_audit_indx][0] = ev;
+	sctp_audit_data[sctp_audit_indx][1] = fd;
+	sctp_audit_indx++;
+	if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+		sctp_audit_indx = 0;
+	}
+}
+
+#endif
+
+/*
+ * sctp_stop_timers_for_shutdown() should be called
+ * when entering the SHUTDOWN_SENT or SHUTDOWN_ACK_SENT
+ * state to make sure that all timers are stopped.
+ */
+void
+sctp_stop_timers_for_shutdown(struct sctp_tcb *stcb)
+{
+	struct sctp_association *asoc;
+	struct sctp_nets *net;
+
+	asoc = &stcb->asoc;
+
+	(void)SCTP_OS_TIMER_STOP(&asoc->hb_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->dack_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->asconf_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->autoclose_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->delayed_event_timer.timer);
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		(void)SCTP_OS_TIMER_STOP(&net->fr_timer.timer);
+		(void)SCTP_OS_TIMER_STOP(&net->pmtu_timer.timer);
+	}
+}
+
+/*
+ * a list of sizes based on typical mtu's, used only if next hop size not
+ * returned.
+ */
+static uint32_t sctp_mtu_sizes[] = {
+	68,
+	296,
+	508,
+	512,
+	544,
+	576,
+	1006,
+	1492,
+	1500,
+	1536,
+	2002,
+	2048,
+	4352,
+	4464,
+	8166,
+	17914,
+	32000,
+	65535
+};
+
+/*
+ * Return the largest MTU smaller than val. If there is no
+ * entry, just return val.
+ */
+uint32_t
+sctp_get_prev_mtu(uint32_t val)
+{
+	uint32_t i;
+
+	if (val <= sctp_mtu_sizes[0]) {
+		return (val);
+	}
+	for (i = 1; i < (sizeof(sctp_mtu_sizes) / sizeof(uint32_t)); i++) {
+		if (val <= sctp_mtu_sizes[i]) {
+			break;
+		}
+	}
+	return (sctp_mtu_sizes[i - 1]);
+}
+
+/*
+ * Return the smallest MTU larger than val. If there is no
+ * entry, just return val.
+ */
+uint32_t
+sctp_get_next_mtu(struct sctp_inpcb *inp, uint32_t val)
+{
+	/* select another MTU that is just bigger than this one */
+	uint32_t i;
+
+	for (i = 0; i < (sizeof(sctp_mtu_sizes) / sizeof(uint32_t)); i++) {
+		if (val < sctp_mtu_sizes[i]) {
+			return (sctp_mtu_sizes[i]);
+		}
+	}
+	return (val);
+}
+
+void
+sctp_fill_random_store(struct sctp_pcb *m)
+{
+	/*
+	 * Here we use the MD5/SHA-1 to hash with our good randomNumbers and
+	 * our counter. The result becomes our good random numbers and we
+	 * then setup to give these out. Note that we do no locking to
+	 * protect this. This is ok, since if competing folks call this we
+	 * will get more gobbled gook in the random store which is what we
+	 * want. There is a danger that two guys will use the same random
+	 * numbers, but thats ok too since that is random as well :->
+	 */
+	m->store_at = 0;
+	(void)sctp_hmac(SCTP_HMAC, (uint8_t *) m->random_numbers,
+	    sizeof(m->random_numbers), (uint8_t *) & m->random_counter,
+	    sizeof(m->random_counter), (uint8_t *) m->random_store);
+	m->random_counter++;
+}
+
+uint32_t
+sctp_select_initial_TSN(struct sctp_pcb *inp)
+{
+	/*
+	 * A true implementation should use random selection process to get
+	 * the initial stream sequence number, using RFC1750 as a good
+	 * guideline
+	 */
+	uint32_t x, *xp;
+	uint8_t *p;
+	int store_at, new_store;
+
+	if (inp->initial_sequence_debug != 0) {
+		uint32_t ret;
+
+		ret = inp->initial_sequence_debug;
+		inp->initial_sequence_debug++;
+		return (ret);
+	}
+retry:
+	store_at = inp->store_at;
+	new_store = store_at + sizeof(uint32_t);
+	if (new_store >= (SCTP_SIGNATURE_SIZE - 3)) {
+		new_store = 0;
+	}
+	if (!atomic_cmpset_int(&inp->store_at, store_at, new_store)) {
+		goto retry;
+	}
+	if (new_store == 0) {
+		/* Refill the random store */
+		sctp_fill_random_store(inp);
+	}
+	p = &inp->random_store[store_at];
+	xp = (uint32_t *) p;
+	x = *xp;
+	return (x);
+}
+
+uint32_t
+sctp_select_a_tag(struct sctp_inpcb *inp, uint16_t lport, uint16_t rport, int save_in_twait)
+{
+	uint32_t x, not_done;
+	struct timeval now;
+
+	(void)SCTP_GETTIME_TIMEVAL(&now);
+	not_done = 1;
+	while (not_done) {
+		x = sctp_select_initial_TSN(&inp->sctp_ep);
+		if (x == 0) {
+			/* we never use 0 */
+			continue;
+		}
+		if (sctp_is_vtag_good(inp, x, lport, rport, &now, save_in_twait)) {
+			not_done = 0;
+		}
+	}
+	return (x);
+}
+
+int
+sctp_init_asoc(struct sctp_inpcb *m, struct sctp_tcb *stcb,
+    uint32_t override_tag, uint32_t vrf_id)
+{
+	struct sctp_association *asoc;
+
+	/*
+	 * Anything set to zero is taken care of by the allocation routine's
+	 * bzero
+	 */
+
+	/*
+	 * Up front select what scoping to apply on addresses I tell my peer
+	 * Not sure what to do with these right now, we will need to come up
+	 * with a way to set them. We may need to pass them through from the
+	 * caller in the sctp_aloc_assoc() function.
+	 */
+	int i;
+
+	asoc = &stcb->asoc;
+	/* init all variables to a known value. */
+	SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_INUSE);
+	asoc->max_burst = m->sctp_ep.max_burst;
+	asoc->heart_beat_delay = TICKS_TO_MSEC(m->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]);
+	asoc->cookie_life = m->sctp_ep.def_cookie_life;
+	asoc->sctp_cmt_on_off = m->sctp_cmt_on_off;
+	asoc->sctp_nr_sack_on_off = (uint8_t) SCTP_BASE_SYSCTL(sctp_nr_sack_on_off);
+	asoc->sctp_cmt_pf = (uint8_t) SCTP_BASE_SYSCTL(sctp_cmt_pf);
+	asoc->sctp_frag_point = m->sctp_frag_point;
+#ifdef INET
+	asoc->default_tos = m->ip_inp.inp.inp_ip_tos;
+#else
+	asoc->default_tos = 0;
+#endif
+
+#ifdef INET6
+	asoc->default_flowlabel = ((struct in6pcb *)m)->in6p_flowinfo;
+#else
+	asoc->default_flowlabel = 0;
+#endif
+	asoc->sb_send_resv = 0;
+	if (override_tag) {
+		asoc->my_vtag = override_tag;
+	} else {
+		asoc->my_vtag = sctp_select_a_tag(m, stcb->sctp_ep->sctp_lport, stcb->rport, 1);
+	}
+	/* Get the nonce tags */
+	asoc->my_vtag_nonce = sctp_select_a_tag(m, stcb->sctp_ep->sctp_lport, stcb->rport, 0);
+	asoc->peer_vtag_nonce = sctp_select_a_tag(m, stcb->sctp_ep->sctp_lport, stcb->rport, 0);
+	asoc->vrf_id = vrf_id;
+
+	if (sctp_is_feature_on(m, SCTP_PCB_FLAGS_DONOT_HEARTBEAT))
+		asoc->hb_is_disabled = 1;
+	else
+		asoc->hb_is_disabled = 0;
+
+#ifdef SCTP_ASOCLOG_OF_TSNS
+	asoc->tsn_in_at = 0;
+	asoc->tsn_out_at = 0;
+	asoc->tsn_in_wrapped = 0;
+	asoc->tsn_out_wrapped = 0;
+	asoc->cumack_log_at = 0;
+	asoc->cumack_log_atsnt = 0;
+#endif
+#ifdef SCTP_FS_SPEC_LOG
+	asoc->fs_index = 0;
+#endif
+	asoc->refcnt = 0;
+	asoc->assoc_up_sent = 0;
+	asoc->asconf_seq_out = asoc->str_reset_seq_out = asoc->init_seq_number = asoc->sending_seq =
+	    sctp_select_initial_TSN(&m->sctp_ep);
+	asoc->asconf_seq_out_acked = asoc->asconf_seq_out - 1;
+	/* we are optimisitic here */
+	asoc->peer_supports_pktdrop = 1;
+	asoc->peer_supports_nat = 0;
+	asoc->sent_queue_retran_cnt = 0;
+
+	/* for CMT */
+	asoc->last_net_cmt_send_started = NULL;
+
+	/* This will need to be adjusted */
+	asoc->last_cwr_tsn = asoc->init_seq_number - 1;
+	asoc->last_acked_seq = asoc->init_seq_number - 1;
+	asoc->advanced_peer_ack_point = asoc->last_acked_seq;
+	asoc->asconf_seq_in = asoc->last_acked_seq;
+
+	/* here we are different, we hold the next one we expect */
+	asoc->str_reset_seq_in = asoc->last_acked_seq + 1;
+
+	asoc->initial_init_rto_max = m->sctp_ep.initial_init_rto_max;
+	asoc->initial_rto = m->sctp_ep.initial_rto;
+
+	asoc->max_init_times = m->sctp_ep.max_init_times;
+	asoc->max_send_times = m->sctp_ep.max_send_times;
+	asoc->def_net_failure = m->sctp_ep.def_net_failure;
+	asoc->free_chunk_cnt = 0;
+
+	asoc->iam_blocking = 0;
+	/* ECN Nonce initialization */
+	asoc->context = m->sctp_context;
+	asoc->def_send = m->def_send;
+	asoc->ecn_nonce_allowed = 0;
+	asoc->receiver_nonce_sum = 1;
+	asoc->nonce_sum_expect_base = 1;
+	asoc->nonce_sum_check = 1;
+	asoc->nonce_resync_tsn = 0;
+	asoc->nonce_wait_for_ecne = 0;
+	asoc->nonce_wait_tsn = 0;
+	asoc->delayed_ack = TICKS_TO_MSEC(m->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]);
+	asoc->sack_freq = m->sctp_ep.sctp_sack_freq;
+	asoc->pr_sctp_cnt = 0;
+	asoc->total_output_queue_size = 0;
+
+	if (m->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+		struct in6pcb *inp6;
+
+		/* Its a V6 socket */
+		inp6 = (struct in6pcb *)m;
+		asoc->ipv6_addr_legal = 1;
+		/* Now look at the binding flag to see if V4 will be legal */
+		if (SCTP_IPV6_V6ONLY(inp6) == 0) {
+			asoc->ipv4_addr_legal = 1;
+		} else {
+			/* V4 addresses are NOT legal on the association */
+			asoc->ipv4_addr_legal = 0;
+		}
+	} else {
+		/* Its a V4 socket, no - V6 */
+		asoc->ipv4_addr_legal = 1;
+		asoc->ipv6_addr_legal = 0;
+	}
+
+	asoc->my_rwnd = max(SCTP_SB_LIMIT_RCV(m->sctp_socket), SCTP_MINIMAL_RWND);
+	asoc->peers_rwnd = SCTP_SB_LIMIT_RCV(m->sctp_socket);
+
+	asoc->smallest_mtu = m->sctp_frag_point;
+	asoc->minrto = m->sctp_ep.sctp_minrto;
+	asoc->maxrto = m->sctp_ep.sctp_maxrto;
+
+	asoc->locked_on_sending = NULL;
+	asoc->stream_locked_on = 0;
+	asoc->ecn_echo_cnt_onq = 0;
+	asoc->stream_locked = 0;
+
+	asoc->send_sack = 1;
+
+	LIST_INIT(&asoc->sctp_restricted_addrs);
+
+	TAILQ_INIT(&asoc->nets);
+	TAILQ_INIT(&asoc->pending_reply_queue);
+	TAILQ_INIT(&asoc->asconf_ack_sent);
+	/* Setup to fill the hb random cache at first HB */
+	asoc->hb_random_idx = 4;
+
+	asoc->sctp_autoclose_ticks = m->sctp_ep.auto_close_time;
+
+	/*
+	 * JRS - Pick the default congestion control module based on the
+	 * sysctl.
+	 */
+	switch (m->sctp_ep.sctp_default_cc_module) {
+		/* JRS - Standard TCP congestion control */
+	case SCTP_CC_RFC2581:
+		{
+			stcb->asoc.congestion_control_module = SCTP_CC_RFC2581;
+			stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_cwnd_update_after_sack;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_cwnd_update_after_fr;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer;
+			break;
+		}
+		/* JRS - High Speed TCP congestion control (Floyd) */
+	case SCTP_CC_HSTCP:
+		{
+			stcb->asoc.congestion_control_module = SCTP_CC_HSTCP;
+			stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_hs_cwnd_update_after_sack;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_hs_cwnd_update_after_fr;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer;
+			break;
+		}
+		/* JRS - HTCP congestion control */
+	case SCTP_CC_HTCP:
+		{
+			stcb->asoc.congestion_control_module = SCTP_CC_HTCP;
+			stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_htcp_set_initial_cc_param;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_htcp_cwnd_update_after_sack;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_htcp_cwnd_update_after_fr;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_htcp_cwnd_update_after_timeout;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_htcp_cwnd_update_after_ecn_echo;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_htcp_cwnd_update_after_fr_timer;
+			break;
+		}
+		/* JRS - By default, use RFC2581 */
+	default:
+		{
+			stcb->asoc.congestion_control_module = SCTP_CC_RFC2581;
+			stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_cwnd_update_after_sack;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_cwnd_update_after_fr;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer;
+			break;
+		}
+	}
+
+	/*
+	 * Now the stream parameters, here we allocate space for all streams
+	 * that we request by default.
+	 */
+	asoc->strm_realoutsize = asoc->streamoutcnt = asoc->pre_open_streams =
+	    m->sctp_ep.pre_open_stream_count;
+	SCTP_MALLOC(asoc->strmout, struct sctp_stream_out *,
+	    asoc->streamoutcnt * sizeof(struct sctp_stream_out),
+	    SCTP_M_STRMO);
+	if (asoc->strmout == NULL) {
+		/* big trouble no memory */
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOMEM);
+		return (ENOMEM);
+	}
+	for (i = 0; i < asoc->streamoutcnt; i++) {
+		/*
+		 * inbound side must be set to 0xffff, also NOTE when we get
+		 * the INIT-ACK back (for INIT sender) we MUST reduce the
+		 * count (streamoutcnt) but first check if we sent to any of
+		 * the upper streams that were dropped (if some were). Those
+		 * that were dropped must be notified to the upper layer as
+		 * failed to send.
+		 */
+		asoc->strmout[i].next_sequence_sent = 0x0;
+		TAILQ_INIT(&asoc->strmout[i].outqueue);
+		asoc->strmout[i].stream_no = i;
+		asoc->strmout[i].last_msg_incomplete = 0;
+		asoc->strmout[i].next_spoke.tqe_next = 0;
+		asoc->strmout[i].next_spoke.tqe_prev = 0;
+	}
+	/* Now the mapping array */
+	asoc->mapping_array_size = SCTP_INITIAL_MAPPING_ARRAY;
+	SCTP_MALLOC(asoc->mapping_array, uint8_t *, asoc->mapping_array_size,
+	    SCTP_M_MAP);
+	if (asoc->mapping_array == NULL) {
+		SCTP_FREE(asoc->strmout, SCTP_M_STRMO);
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOMEM);
+		return (ENOMEM);
+	}
+	memset(asoc->mapping_array, 0, asoc->mapping_array_size);
+	SCTP_MALLOC(asoc->nr_mapping_array, uint8_t *, asoc->mapping_array_size,
+	    SCTP_M_MAP);
+	if (asoc->nr_mapping_array == NULL) {
+		SCTP_FREE(asoc->strmout, SCTP_M_STRMO);
+		SCTP_FREE(asoc->mapping_array, SCTP_M_MAP);
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOMEM);
+		return (ENOMEM);
+	}
+	memset(asoc->nr_mapping_array, 0, asoc->mapping_array_size);
+
+	/* Now the init of the other outqueues */
+	TAILQ_INIT(&asoc->free_chunks);
+	TAILQ_INIT(&asoc->out_wheel);
+	TAILQ_INIT(&asoc->control_send_queue);
+	TAILQ_INIT(&asoc->asconf_send_queue);
+	TAILQ_INIT(&asoc->send_queue);
+	TAILQ_INIT(&asoc->sent_queue);
+	TAILQ_INIT(&asoc->reasmqueue);
+	TAILQ_INIT(&asoc->resetHead);
+	asoc->max_inbound_streams = m->sctp_ep.max_open_streams_intome;
+	TAILQ_INIT(&asoc->asconf_queue);
+	/* authentication fields */
+	asoc->authinfo.random = NULL;
+	asoc->authinfo.active_keyid = 0;
+	asoc->authinfo.assoc_key = NULL;
+	asoc->authinfo.assoc_keyid = 0;
+	asoc->authinfo.recv_key = NULL;
+	asoc->authinfo.recv_keyid = 0;
+	LIST_INIT(&asoc->shared_keys);
+	asoc->marked_retrans = 0;
+	asoc->timoinit = 0;
+	asoc->timodata = 0;
+	asoc->timosack = 0;
+	asoc->timoshutdown = 0;
+	asoc->timoheartbeat = 0;
+	asoc->timocookie = 0;
+	asoc->timoshutdownack = 0;
+	(void)SCTP_GETTIME_TIMEVAL(&asoc->start_time);
+	asoc->discontinuity_time = asoc->start_time;
+	/*
+	 * sa_ignore MEMLEAK {memory is put in the assoc mapping array and
+	 * freed later when the association is freed.
+	 */
+	return (0);
+}
+
+void
+sctp_print_mapping_array(struct sctp_association *asoc)
+{
+	unsigned int i, limit;
+
+	printf("Mapping array size: %d, baseTSN: %8.8x, cumAck: %8.8x, highestTSN: (%8.8x, %8.8x).\n",
+	    asoc->mapping_array_size,
+	    asoc->mapping_array_base_tsn,
+	    asoc->cumulative_tsn,
+	    asoc->highest_tsn_inside_map,
+	    asoc->highest_tsn_inside_nr_map);
+	for (limit = asoc->mapping_array_size; limit > 1; limit--) {
+		if (asoc->mapping_array[limit - 1]) {
+			break;
+		}
+	}
+	printf("Renegable mapping array (last %d entries are zero):\n", asoc->mapping_array_size - limit);
+	for (i = 0; i < limit; i++) {
+		printf("%2.2x%c", asoc->mapping_array[i], ((i + 1) % 16) ? ' ' : '\n');
+	}
+	if (limit % 16)
+		printf("\n");
+	for (limit = asoc->mapping_array_size; limit > 1; limit--) {
+		if (asoc->nr_mapping_array[limit - 1]) {
+			break;
+		}
+	}
+	printf("Non renegable mapping array (last %d entries are zero):\n", asoc->mapping_array_size - limit);
+	for (i = 0; i < limit; i++) {
+		printf("%2.2x%c", asoc->nr_mapping_array[i], ((i + 1) % 16) ? ' ' : '\n');
+	}
+	if (limit % 16)
+		printf("\n");
+}
+
+int
+sctp_expand_mapping_array(struct sctp_association *asoc, uint32_t needed)
+{
+	/* mapping array needs to grow */
+	uint8_t *new_array1, *new_array2;
+	uint32_t new_size;
+
+	new_size = asoc->mapping_array_size + ((needed + 7) / 8 + SCTP_MAPPING_ARRAY_INCR);
+	SCTP_MALLOC(new_array1, uint8_t *, new_size, SCTP_M_MAP);
+	SCTP_MALLOC(new_array2, uint8_t *, new_size, SCTP_M_MAP);
+	if ((new_array1 == NULL) || (new_array2 == NULL)) {
+		/* can't get more, forget it */
+		SCTP_PRINTF("No memory for expansion of SCTP mapping array %d\n", new_size);
+		if (new_array1) {
+			SCTP_FREE(new_array1, SCTP_M_MAP);
+		}
+		if (new_array2) {
+			SCTP_FREE(new_array2, SCTP_M_MAP);
+		}
+		return (-1);
+	}
+	memset(new_array1, 0, new_size);
+	memset(new_array2, 0, new_size);
+	memcpy(new_array1, asoc->mapping_array, asoc->mapping_array_size);
+	memcpy(new_array2, asoc->nr_mapping_array, asoc->mapping_array_size);
+	SCTP_FREE(asoc->mapping_array, SCTP_M_MAP);
+	SCTP_FREE(asoc->nr_mapping_array, SCTP_M_MAP);
+	asoc->mapping_array = new_array1;
+	asoc->nr_mapping_array = new_array2;
+	asoc->mapping_array_size = new_size;
+	return (0);
+}
+
+
+static void
+sctp_iterator_work(struct sctp_iterator *it)
+{
+	int iteration_count = 0;
+	int inp_skip = 0;
+	int first_in = 1;
+	struct sctp_inpcb *tinp;
+
+	SCTP_INP_INFO_RLOCK();
+	SCTP_ITERATOR_LOCK();
+	if (it->inp) {
+		SCTP_INP_RLOCK(it->inp);
+		SCTP_INP_DECR_REF(it->inp);
+	}
+	if (it->inp == NULL) {
+		/* iterator is complete */
+done_with_iterator:
+		SCTP_ITERATOR_UNLOCK();
+		SCTP_INP_INFO_RUNLOCK();
+		if (it->function_atend != NULL) {
+			(*it->function_atend) (it->pointer, it->val);
+		}
+		SCTP_FREE(it, SCTP_M_ITER);
+		return;
+	}
+select_a_new_ep:
+	if (first_in) {
+		first_in = 0;
+	} else {
+		SCTP_INP_RLOCK(it->inp);
+	}
+	while (((it->pcb_flags) &&
+	    ((it->inp->sctp_flags & it->pcb_flags) != it->pcb_flags)) ||
+	    ((it->pcb_features) &&
+	    ((it->inp->sctp_features & it->pcb_features) != it->pcb_features))) {
+		/* endpoint flags or features don't match, so keep looking */
+		if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
+			SCTP_INP_RUNLOCK(it->inp);
+			goto done_with_iterator;
+		}
+		tinp = it->inp;
+		it->inp = LIST_NEXT(it->inp, sctp_list);
+		SCTP_INP_RUNLOCK(tinp);
+		if (it->inp == NULL) {
+			goto done_with_iterator;
+		}
+		SCTP_INP_RLOCK(it->inp);
+	}
+	/* now go through each assoc which is in the desired state */
+	if (it->done_current_ep == 0) {
+		if (it->function_inp != NULL)
+			inp_skip = (*it->function_inp) (it->inp, it->pointer, it->val);
+		it->done_current_ep = 1;
+	}
+	if (it->stcb == NULL) {
+		/* run the per instance function */
+		it->stcb = LIST_FIRST(&it->inp->sctp_asoc_list);
+	}
+	if ((inp_skip) || it->stcb == NULL) {
+		if (it->function_inp_end != NULL) {
+			inp_skip = (*it->function_inp_end) (it->inp,
+			    it->pointer,
+			    it->val);
+		}
+		SCTP_INP_RUNLOCK(it->inp);
+		goto no_stcb;
+	}
+	while (it->stcb) {
+		SCTP_TCB_LOCK(it->stcb);
+		if (it->asoc_state && ((it->stcb->asoc.state & it->asoc_state) != it->asoc_state)) {
+			/* not in the right state... keep looking */
+			SCTP_TCB_UNLOCK(it->stcb);
+			goto next_assoc;
+		}
+		/* see if we have limited out the iterator loop */
+		iteration_count++;
+		if (iteration_count > SCTP_ITERATOR_MAX_AT_ONCE) {
+			/* Pause to let others grab the lock */
+			atomic_add_int(&it->stcb->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK(it->stcb);
+			SCTP_INP_INCR_REF(it->inp);
+			SCTP_INP_RUNLOCK(it->inp);
+			SCTP_ITERATOR_UNLOCK();
+			SCTP_INP_INFO_RUNLOCK();
+			SCTP_INP_INFO_RLOCK();
+			SCTP_ITERATOR_LOCK();
+			if (sctp_it_ctl.iterator_flags) {
+				/* We won't be staying here */
+				SCTP_INP_DECR_REF(it->inp);
+				atomic_add_int(&it->stcb->asoc.refcnt, -1);
+				if (sctp_it_ctl.iterator_flags &
+				    SCTP_ITERATOR_MUST_EXIT) {
+					goto done_with_iterator;
+				}
+				if (sctp_it_ctl.iterator_flags &
+				    SCTP_ITERATOR_STOP_CUR_IT) {
+					sctp_it_ctl.iterator_flags &= ~SCTP_ITERATOR_STOP_CUR_IT;
+					goto done_with_iterator;
+				}
+				if (sctp_it_ctl.iterator_flags &
+				    SCTP_ITERATOR_STOP_CUR_INP) {
+					sctp_it_ctl.iterator_flags &= ~SCTP_ITERATOR_STOP_CUR_INP;
+					goto no_stcb;
+				}
+				/* If we reach here huh? */
+				printf("Unknown it ctl flag %x\n",
+				    sctp_it_ctl.iterator_flags);
+				sctp_it_ctl.iterator_flags = 0;
+			}
+			SCTP_INP_RLOCK(it->inp);
+			SCTP_INP_DECR_REF(it->inp);
+			SCTP_TCB_LOCK(it->stcb);
+			atomic_add_int(&it->stcb->asoc.refcnt, -1);
+			iteration_count = 0;
+		}
+		/* run function on this one */
+		(*it->function_assoc) (it->inp, it->stcb, it->pointer, it->val);
+
+		/*
+		 * we lie here, it really needs to have its own type but
+		 * first I must verify that this won't effect things :-0
+		 */
+		if (it->no_chunk_output == 0)
+			sctp_chunk_output(it->inp, it->stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
+
+		SCTP_TCB_UNLOCK(it->stcb);
+next_assoc:
+		it->stcb = LIST_NEXT(it->stcb, sctp_tcblist);
+		if (it->stcb == NULL) {
+			/* Run last function */
+			if (it->function_inp_end != NULL) {
+				inp_skip = (*it->function_inp_end) (it->inp,
+				    it->pointer,
+				    it->val);
+			}
+		}
+	}
+	SCTP_INP_RUNLOCK(it->inp);
+no_stcb:
+	/* done with all assocs on this endpoint, move on to next endpoint */
+	it->done_current_ep = 0;
+	if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
+		it->inp = NULL;
+	} else {
+		it->inp = LIST_NEXT(it->inp, sctp_list);
+	}
+	if (it->inp == NULL) {
+		goto done_with_iterator;
+	}
+	goto select_a_new_ep;
+}
+
+void
+sctp_iterator_worker(void)
+{
+	struct sctp_iterator *it = NULL;
+
+	/* This function is called with the WQ lock in place */
+
+	sctp_it_ctl.iterator_running = 1;
+	sctp_it_ctl.cur_it = it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead);
+	while (it) {
+		/* now lets work on this one */
+		TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr);
+		SCTP_IPI_ITERATOR_WQ_UNLOCK();
+		CURVNET_SET(it->vn);
+		sctp_iterator_work(it);
+
+		CURVNET_RESTORE();
+		SCTP_IPI_ITERATOR_WQ_LOCK();
+		if (sctp_it_ctl.iterator_flags & SCTP_ITERATOR_MUST_EXIT) {
+			sctp_it_ctl.cur_it = NULL;
+			break;
+		}
+		/* sa_ignore FREED_MEMORY */
+		sctp_it_ctl.cur_it = it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead);
+	}
+	sctp_it_ctl.iterator_running = 0;
+	return;
+}
+
+
+static void
+sctp_handle_addr_wq(void)
+{
+	/* deal with the ADDR wq from the rtsock calls */
+	struct sctp_laddr *wi;
+	struct sctp_asconf_iterator *asc;
+
+	SCTP_MALLOC(asc, struct sctp_asconf_iterator *,
+	    sizeof(struct sctp_asconf_iterator), SCTP_M_ASC_IT);
+	if (asc == NULL) {
+		/* Try later, no memory */
+		sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ,
+		    (struct sctp_inpcb *)NULL,
+		    (struct sctp_tcb *)NULL,
+		    (struct sctp_nets *)NULL);
+		return;
+	}
+	LIST_INIT(&asc->list_of_work);
+	asc->cnt = 0;
+
+	SCTP_WQ_ADDR_LOCK();
+	wi = LIST_FIRST(&SCTP_BASE_INFO(addr_wq));
+	while (wi != NULL) {
+		LIST_REMOVE(wi, sctp_nxt_addr);
+		LIST_INSERT_HEAD(&asc->list_of_work, wi, sctp_nxt_addr);
+		asc->cnt++;
+		wi = LIST_FIRST(&SCTP_BASE_INFO(addr_wq));
+	}
+	SCTP_WQ_ADDR_UNLOCK();
+
+	if (asc->cnt == 0) {
+		SCTP_FREE(asc, SCTP_M_ASC_IT);
+	} else {
+		(void)sctp_initiate_iterator(sctp_asconf_iterator_ep,
+		    sctp_asconf_iterator_stcb,
+		    NULL,	/* No ep end for boundall */
+		    SCTP_PCB_FLAGS_BOUNDALL,
+		    SCTP_PCB_ANY_FEATURES,
+		    SCTP_ASOC_ANY_STATE,
+		    (void *)asc, 0,
+		    sctp_asconf_iterator_end, NULL, 0);
+	}
+}
+
+int retcode = 0;
+int cur_oerr = 0;
+
+void
+sctp_timeout_handler(void *t)
+{
+	struct sctp_inpcb *inp;
+	struct sctp_tcb *stcb;
+	struct sctp_nets *net;
+	struct sctp_timer *tmr;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+#endif
+	int did_output, type;
+
+	tmr = (struct sctp_timer *)t;
+	inp = (struct sctp_inpcb *)tmr->ep;
+	stcb = (struct sctp_tcb *)tmr->tcb;
+	net = (struct sctp_nets *)tmr->net;
+	CURVNET_SET((struct vnet *)tmr->vnet);
+	did_output = 1;
+
+#ifdef SCTP_AUDITING_ENABLED
+	sctp_audit_log(0xF0, (uint8_t) tmr->type);
+	sctp_auditing(3, inp, stcb, net);
+#endif
+
+	/* sanity checks... */
+	if (tmr->self != (void *)tmr) {
+		/*
+		 * SCTP_PRINTF("Stale SCTP timer fired (%p), ignoring...\n",
+		 * tmr);
+		 */
+		CURVNET_RESTORE();
+		return;
+	}
+	tmr->stopped_from = 0xa001;
+	if (!SCTP_IS_TIMER_TYPE_VALID(tmr->type)) {
+		/*
+		 * SCTP_PRINTF("SCTP timer fired with invalid type: 0x%x\n",
+		 * tmr->type);
+		 */
+		CURVNET_RESTORE();
+		return;
+	}
+	tmr->stopped_from = 0xa002;
+	if ((tmr->type != SCTP_TIMER_TYPE_ADDR_WQ) && (inp == NULL)) {
+		CURVNET_RESTORE();
+		return;
+	}
+	/* if this is an iterator timeout, get the struct and clear inp */
+	tmr->stopped_from = 0xa003;
+	type = tmr->type;
+	if (inp) {
+		SCTP_INP_INCR_REF(inp);
+		if ((inp->sctp_socket == 0) &&
+		    ((tmr->type != SCTP_TIMER_TYPE_INPKILL) &&
+		    (tmr->type != SCTP_TIMER_TYPE_INIT) &&
+		    (tmr->type != SCTP_TIMER_TYPE_SEND) &&
+		    (tmr->type != SCTP_TIMER_TYPE_RECV) &&
+		    (tmr->type != SCTP_TIMER_TYPE_HEARTBEAT) &&
+		    (tmr->type != SCTP_TIMER_TYPE_SHUTDOWN) &&
+		    (tmr->type != SCTP_TIMER_TYPE_SHUTDOWNACK) &&
+		    (tmr->type != SCTP_TIMER_TYPE_SHUTDOWNGUARD) &&
+		    (tmr->type != SCTP_TIMER_TYPE_ASOCKILL))
+		    ) {
+			SCTP_INP_DECR_REF(inp);
+			CURVNET_RESTORE();
+			return;
+		}
+	}
+	tmr->stopped_from = 0xa004;
+	if (stcb) {
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		if (stcb->asoc.state == 0) {
+			atomic_add_int(&stcb->asoc.refcnt, -1);
+			if (inp) {
+				SCTP_INP_DECR_REF(inp);
+			}
+			CURVNET_RESTORE();
+			return;
+		}
+	}
+	tmr->stopped_from = 0xa005;
+	SCTPDBG(SCTP_DEBUG_TIMER1, "Timer type %d goes off\n", tmr->type);
+	if (!SCTP_OS_TIMER_ACTIVE(&tmr->timer)) {
+		if (inp) {
+			SCTP_INP_DECR_REF(inp);
+		}
+		if (stcb) {
+			atomic_add_int(&stcb->asoc.refcnt, -1);
+		}
+		CURVNET_RESTORE();
+		return;
+	}
+	tmr->stopped_from = 0xa006;
+
+	if (stcb) {
+		SCTP_TCB_LOCK(stcb);
+		atomic_add_int(&stcb->asoc.refcnt, -1);
+		if ((tmr->type != SCTP_TIMER_TYPE_ASOCKILL) &&
+		    ((stcb->asoc.state == 0) ||
+		    (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED))) {
+			SCTP_TCB_UNLOCK(stcb);
+			if (inp) {
+				SCTP_INP_DECR_REF(inp);
+			}
+			CURVNET_RESTORE();
+			return;
+		}
+	}
+	/* record in stopped what t-o occured */
+	tmr->stopped_from = tmr->type;
+
+	/* mark as being serviced now */
+	if (SCTP_OS_TIMER_PENDING(&tmr->timer)) {
+		/*
+		 * Callout has been rescheduled.
+		 */
+		goto get_out;
+	}
+	if (!SCTP_OS_TIMER_ACTIVE(&tmr->timer)) {
+		/*
+		 * Not active, so no action.
+		 */
+		goto get_out;
+	}
+	SCTP_OS_TIMER_DEACTIVATE(&tmr->timer);
+
+	/* call the handler for the appropriate timer type */
+	switch (tmr->type) {
+	case SCTP_TIMER_TYPE_ZERO_COPY:
+		if (inp == NULL) {
+			break;
+		}
+		if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) {
+			SCTP_ZERO_COPY_EVENT(inp, inp->sctp_socket);
+		}
+		break;
+	case SCTP_TIMER_TYPE_ZCOPY_SENDQ:
+		if (inp == NULL) {
+			break;
+		}
+		if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) {
+			SCTP_ZERO_COPY_SENDQ_EVENT(inp, inp->sctp_socket);
+		}
+		break;
+	case SCTP_TIMER_TYPE_ADDR_WQ:
+		sctp_handle_addr_wq();
+		break;
+	case SCTP_TIMER_TYPE_SEND:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		SCTP_STAT_INCR(sctps_timodata);
+		stcb->asoc.timodata++;
+		stcb->asoc.num_send_timers_up--;
+		if (stcb->asoc.num_send_timers_up < 0) {
+			stcb->asoc.num_send_timers_up = 0;
+		}
+		SCTP_TCB_LOCK_ASSERT(stcb);
+		cur_oerr = stcb->asoc.overall_error_count;
+		retcode = sctp_t3rxt_timer(inp, stcb, net);
+		if (retcode) {
+			/* no need to unlock on tcb its gone */
+
+			goto out_decr;
+		}
+		SCTP_TCB_LOCK_ASSERT(stcb);
+#ifdef SCTP_AUDITING_ENABLED
+		sctp_auditing(4, inp, stcb, net);
+#endif
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
+		if ((stcb->asoc.num_send_timers_up == 0) &&
+		    (stcb->asoc.sent_queue_cnt > 0)
+		    ) {
+			struct sctp_tmit_chunk *chk;
+
+			/*
+			 * safeguard. If there on some on the sent queue
+			 * somewhere but no timers running something is
+			 * wrong... so we start a timer on the first chunk
+			 * on the send queue on whatever net it is sent to.
+			 */
+			chk = TAILQ_FIRST(&stcb->asoc.sent_queue);
+			sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb,
+			    chk->whoTo);
+		}
+		break;
+	case SCTP_TIMER_TYPE_INIT:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		SCTP_STAT_INCR(sctps_timoinit);
+		stcb->asoc.timoinit++;
+		if (sctp_t1init_timer(inp, stcb, net)) {
+			/* no need to unlock on tcb its gone */
+			goto out_decr;
+		}
+		/* We do output but not here */
+		did_output = 0;
+		break;
+	case SCTP_TIMER_TYPE_RECV:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		} {
+			SCTP_STAT_INCR(sctps_timosack);
+			stcb->asoc.timosack++;
+			sctp_send_sack(stcb);
+		}
+#ifdef SCTP_AUDITING_ENABLED
+		sctp_auditing(4, inp, stcb, net);
+#endif
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_SACK_TMR, SCTP_SO_NOT_LOCKED);
+		break;
+	case SCTP_TIMER_TYPE_SHUTDOWN:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		if (sctp_shutdown_timer(inp, stcb, net)) {
+			/* no need to unlock on tcb its gone */
+			goto out_decr;
+		}
+		SCTP_STAT_INCR(sctps_timoshutdown);
+		stcb->asoc.timoshutdown++;
+#ifdef SCTP_AUDITING_ENABLED
+		sctp_auditing(4, inp, stcb, net);
+#endif
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_SHUT_TMR, SCTP_SO_NOT_LOCKED);
+		break;
+	case SCTP_TIMER_TYPE_HEARTBEAT:
+		{
+			struct sctp_nets *lnet;
+			int cnt_of_unconf = 0;
+
+			if ((stcb == NULL) || (inp == NULL)) {
+				break;
+			}
+			SCTP_STAT_INCR(sctps_timoheartbeat);
+			stcb->asoc.timoheartbeat++;
+			TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
+				if ((lnet->dest_state & SCTP_ADDR_UNCONFIRMED) &&
+				    (lnet->dest_state & SCTP_ADDR_REACHABLE)) {
+					cnt_of_unconf++;
+				}
+			}
+			if (cnt_of_unconf == 0) {
+				if (sctp_heartbeat_timer(inp, stcb, lnet,
+				    cnt_of_unconf)) {
+					/* no need to unlock on tcb its gone */
+					goto out_decr;
+				}
+			}
+#ifdef SCTP_AUDITING_ENABLED
+			sctp_auditing(4, inp, stcb, lnet);
+#endif
+			sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT,
+			    stcb->sctp_ep, stcb, lnet);
+			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_HB_TMR, SCTP_SO_NOT_LOCKED);
+		}
+		break;
+	case SCTP_TIMER_TYPE_COOKIE:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		if (sctp_cookie_timer(inp, stcb, net)) {
+			/* no need to unlock on tcb its gone */
+			goto out_decr;
+		}
+		SCTP_STAT_INCR(sctps_timocookie);
+		stcb->asoc.timocookie++;
+#ifdef SCTP_AUDITING_ENABLED
+		sctp_auditing(4, inp, stcb, net);
+#endif
+		/*
+		 * We consider T3 and Cookie timer pretty much the same with
+		 * respect to where from in chunk_output.
+		 */
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
+		break;
+	case SCTP_TIMER_TYPE_NEWCOOKIE:
+		{
+			struct timeval tv;
+			int i, secret;
+
+			if (inp == NULL) {
+				break;
+			}
+			SCTP_STAT_INCR(sctps_timosecret);
+			(void)SCTP_GETTIME_TIMEVAL(&tv);
+			SCTP_INP_WLOCK(inp);
+			inp->sctp_ep.time_of_secret_change = tv.tv_sec;
+			inp->sctp_ep.last_secret_number =
+			    inp->sctp_ep.current_secret_number;
+			inp->sctp_ep.current_secret_number++;
+			if (inp->sctp_ep.current_secret_number >=
+			    SCTP_HOW_MANY_SECRETS) {
+				inp->sctp_ep.current_secret_number = 0;
+			}
+			secret = (int)inp->sctp_ep.current_secret_number;
+			for (i = 0; i < SCTP_NUMBER_OF_SECRETS; i++) {
+				inp->sctp_ep.secret_key[secret][i] =
+				    sctp_select_initial_TSN(&inp->sctp_ep);
+			}
+			SCTP_INP_WUNLOCK(inp);
+			sctp_timer_start(SCTP_TIMER_TYPE_NEWCOOKIE, inp, stcb, net);
+		}
+		did_output = 0;
+		break;
+	case SCTP_TIMER_TYPE_PATHMTURAISE:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		SCTP_STAT_INCR(sctps_timopathmtu);
+		sctp_pathmtu_timer(inp, stcb, net);
+		did_output = 0;
+		break;
+	case SCTP_TIMER_TYPE_SHUTDOWNACK:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		if (sctp_shutdownack_timer(inp, stcb, net)) {
+			/* no need to unlock on tcb its gone */
+			goto out_decr;
+		}
+		SCTP_STAT_INCR(sctps_timoshutdownack);
+		stcb->asoc.timoshutdownack++;
+#ifdef SCTP_AUDITING_ENABLED
+		sctp_auditing(4, inp, stcb, net);
+#endif
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_SHUT_ACK_TMR, SCTP_SO_NOT_LOCKED);
+		break;
+	case SCTP_TIMER_TYPE_SHUTDOWNGUARD:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		SCTP_STAT_INCR(sctps_timoshutdownguard);
+		sctp_abort_an_association(inp, stcb,
+		    SCTP_SHUTDOWN_GUARD_EXPIRES, NULL, SCTP_SO_NOT_LOCKED);
+		/* no need to unlock on tcb its gone */
+		goto out_decr;
+
+	case SCTP_TIMER_TYPE_STRRESET:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		if (sctp_strreset_timer(inp, stcb, net)) {
+			/* no need to unlock on tcb its gone */
+			goto out_decr;
+		}
+		SCTP_STAT_INCR(sctps_timostrmrst);
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_TMR, SCTP_SO_NOT_LOCKED);
+		break;
+	case SCTP_TIMER_TYPE_EARLYFR:
+		/* Need to do FR of things for net */
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		SCTP_STAT_INCR(sctps_timoearlyfr);
+		sctp_early_fr_timer(inp, stcb, net);
+		break;
+	case SCTP_TIMER_TYPE_ASCONF:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		if (sctp_asconf_timer(inp, stcb, net)) {
+			/* no need to unlock on tcb its gone */
+			goto out_decr;
+		}
+		SCTP_STAT_INCR(sctps_timoasconf);
+#ifdef SCTP_AUDITING_ENABLED
+		sctp_auditing(4, inp, stcb, net);
+#endif
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_ASCONF_TMR, SCTP_SO_NOT_LOCKED);
+		break;
+	case SCTP_TIMER_TYPE_PRIM_DELETED:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		sctp_delete_prim_timer(inp, stcb, net);
+		SCTP_STAT_INCR(sctps_timodelprim);
+		break;
+
+	case SCTP_TIMER_TYPE_AUTOCLOSE:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		SCTP_STAT_INCR(sctps_timoautoclose);
+		sctp_autoclose_timer(inp, stcb, net);
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_AUTOCLOSE_TMR, SCTP_SO_NOT_LOCKED);
+		did_output = 0;
+		break;
+	case SCTP_TIMER_TYPE_ASOCKILL:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		SCTP_STAT_INCR(sctps_timoassockill);
+		/* Can we free it yet? */
+		SCTP_INP_DECR_REF(inp);
+		sctp_timer_stop(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL, SCTP_FROM_SCTPUTIL + SCTP_LOC_1);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		so = SCTP_INP_SO(inp);
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_2);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+		/*
+		 * free asoc, always unlocks (or destroy's) so prevent
+		 * duplicate unlock or unlock of a free mtx :-0
+		 */
+		stcb = NULL;
+		goto out_no_decr;
+	case SCTP_TIMER_TYPE_INPKILL:
+		SCTP_STAT_INCR(sctps_timoinpkill);
+		if (inp == NULL) {
+			break;
+		}
+		/*
+		 * special case, take away our increment since WE are the
+		 * killer
+		 */
+		SCTP_INP_DECR_REF(inp);
+		sctp_timer_stop(SCTP_TIMER_TYPE_INPKILL, inp, NULL, NULL, SCTP_FROM_SCTPUTIL + SCTP_LOC_3);
+		sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
+		    SCTP_CALLED_FROM_INPKILL_TIMER);
+		inp = NULL;
+		goto out_no_decr;
+	default:
+		SCTPDBG(SCTP_DEBUG_TIMER1, "sctp_timeout_handler:unknown timer %d\n",
+		    tmr->type);
+		break;
+	};
+#ifdef SCTP_AUDITING_ENABLED
+	sctp_audit_log(0xF1, (uint8_t) tmr->type);
+	if (inp)
+		sctp_auditing(5, inp, stcb, net);
+#endif
+	if ((did_output) && stcb) {
+		/*
+		 * Now we need to clean up the control chunk chain if an
+		 * ECNE is on it. It must be marked as UNSENT again so next
+		 * call will continue to send it until such time that we get
+		 * a CWR, to remove it. It is, however, less likely that we
+		 * will find a ecn echo on the chain though.
+		 */
+		sctp_fix_ecn_echo(&stcb->asoc);
+	}
+get_out:
+	if (stcb) {
+		SCTP_TCB_UNLOCK(stcb);
+	}
+out_decr:
+	if (inp) {
+		SCTP_INP_DECR_REF(inp);
+	}
+out_no_decr:
+	SCTPDBG(SCTP_DEBUG_TIMER1, "Timer now complete (type %d)\n",
+	    type);
+	CURVNET_RESTORE();
+}
+
+void
+sctp_timer_start(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	int to_ticks;
+	struct sctp_timer *tmr;
+
+	if ((t_type != SCTP_TIMER_TYPE_ADDR_WQ) && (inp == NULL))
+		return;
+
+	to_ticks = 0;
+
+	tmr = NULL;
+	if (stcb) {
+		SCTP_TCB_LOCK_ASSERT(stcb);
+	}
+	switch (t_type) {
+	case SCTP_TIMER_TYPE_ZERO_COPY:
+		tmr = &inp->sctp_ep.zero_copy_timer;
+		to_ticks = SCTP_ZERO_COPY_TICK_DELAY;
+		break;
+	case SCTP_TIMER_TYPE_ZCOPY_SENDQ:
+		tmr = &inp->sctp_ep.zero_copy_sendq_timer;
+		to_ticks = SCTP_ZERO_COPY_SENDQ_TICK_DELAY;
+		break;
+	case SCTP_TIMER_TYPE_ADDR_WQ:
+		/* Only 1 tick away :-) */
+		tmr = &SCTP_BASE_INFO(addr_wq_timer);
+		to_ticks = SCTP_ADDRESS_TICK_DELAY;
+		break;
+	case SCTP_TIMER_TYPE_SEND:
+		/* Here we use the RTO timer */
+		{
+			int rto_val;
+
+			if ((stcb == NULL) || (net == NULL)) {
+				return;
+			}
+			tmr = &net->rxt_timer;
+			if (net->RTO == 0) {
+				rto_val = stcb->asoc.initial_rto;
+			} else {
+				rto_val = net->RTO;
+			}
+			to_ticks = MSEC_TO_TICKS(rto_val);
+		}
+		break;
+	case SCTP_TIMER_TYPE_INIT:
+		/*
+		 * Here we use the INIT timer default usually about 1
+		 * minute.
+		 */
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		tmr = &net->rxt_timer;
+		if (net->RTO == 0) {
+			to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+		} else {
+			to_ticks = MSEC_TO_TICKS(net->RTO);
+		}
+		break;
+	case SCTP_TIMER_TYPE_RECV:
+		/*
+		 * Here we use the Delayed-Ack timer value from the inp
+		 * ususually about 200ms.
+		 */
+		if (stcb == NULL) {
+			return;
+		}
+		tmr = &stcb->asoc.dack_timer;
+		to_ticks = MSEC_TO_TICKS(stcb->asoc.delayed_ack);
+		break;
+	case SCTP_TIMER_TYPE_SHUTDOWN:
+		/* Here we use the RTO of the destination. */
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		if (net->RTO == 0) {
+			to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+		} else {
+			to_ticks = MSEC_TO_TICKS(net->RTO);
+		}
+		tmr = &net->rxt_timer;
+		break;
+	case SCTP_TIMER_TYPE_HEARTBEAT:
+		/*
+		 * the net is used here so that we can add in the RTO. Even
+		 * though we use a different timer. We also add the HB timer
+		 * PLUS a random jitter.
+		 */
+		if ((inp == NULL) || (stcb == NULL)) {
+			return;
+		} else {
+			uint32_t rndval;
+			uint8_t this_random;
+			int cnt_of_unconf = 0;
+			struct sctp_nets *lnet;
+
+			TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
+				if ((lnet->dest_state & SCTP_ADDR_UNCONFIRMED) &&
+				    (lnet->dest_state & SCTP_ADDR_REACHABLE)) {
+					cnt_of_unconf++;
+				}
+			}
+			if (cnt_of_unconf) {
+				net = lnet = NULL;
+				(void)sctp_heartbeat_timer(inp, stcb, lnet, cnt_of_unconf);
+			}
+			if (stcb->asoc.hb_random_idx > 3) {
+				rndval = sctp_select_initial_TSN(&inp->sctp_ep);
+				memcpy(stcb->asoc.hb_random_values, &rndval,
+				    sizeof(stcb->asoc.hb_random_values));
+				stcb->asoc.hb_random_idx = 0;
+			}
+			this_random = stcb->asoc.hb_random_values[stcb->asoc.hb_random_idx];
+			stcb->asoc.hb_random_idx++;
+			stcb->asoc.hb_ect_randombit = 0;
+			/*
+			 * this_random will be 0 - 256 ms RTO is in ms.
+			 */
+			if ((stcb->asoc.hb_is_disabled) &&
+			    (cnt_of_unconf == 0)) {
+				return;
+			}
+			if (net) {
+				int delay;
+
+				delay = stcb->asoc.heart_beat_delay;
+				TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
+					if ((lnet->dest_state & SCTP_ADDR_UNCONFIRMED) &&
+					    ((lnet->dest_state & SCTP_ADDR_OUT_OF_SCOPE) == 0) &&
+					    (lnet->dest_state & SCTP_ADDR_REACHABLE)) {
+						delay = 0;
+					}
+				}
+				if (net->RTO == 0) {
+					/* Never been checked */
+					to_ticks = this_random + stcb->asoc.initial_rto + delay;
+				} else {
+					/* set rto_val to the ms */
+					to_ticks = delay + net->RTO + this_random;
+				}
+			} else {
+				if (cnt_of_unconf) {
+					to_ticks = this_random + stcb->asoc.initial_rto;
+				} else {
+					to_ticks = stcb->asoc.heart_beat_delay + this_random + stcb->asoc.initial_rto;
+				}
+			}
+			/*
+			 * Now we must convert the to_ticks that are now in
+			 * ms to ticks.
+			 */
+			to_ticks = MSEC_TO_TICKS(to_ticks);
+			tmr = &stcb->asoc.hb_timer;
+		}
+		break;
+	case SCTP_TIMER_TYPE_COOKIE:
+		/*
+		 * Here we can use the RTO timer from the network since one
+		 * RTT was compelete. If a retran happened then we will be
+		 * using the RTO initial value.
+		 */
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		if (net->RTO == 0) {
+			to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+		} else {
+			to_ticks = MSEC_TO_TICKS(net->RTO);
+		}
+		tmr = &net->rxt_timer;
+		break;
+	case SCTP_TIMER_TYPE_NEWCOOKIE:
+		/*
+		 * nothing needed but the endpoint here ususually about 60
+		 * minutes.
+		 */
+		if (inp == NULL) {
+			return;
+		}
+		tmr = &inp->sctp_ep.signature_change;
+		to_ticks = inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_SIGNATURE];
+		break;
+	case SCTP_TIMER_TYPE_ASOCKILL:
+		if (stcb == NULL) {
+			return;
+		}
+		tmr = &stcb->asoc.strreset_timer;
+		to_ticks = MSEC_TO_TICKS(SCTP_ASOC_KILL_TIMEOUT);
+		break;
+	case SCTP_TIMER_TYPE_INPKILL:
+		/*
+		 * The inp is setup to die. We re-use the signature_chage
+		 * timer since that has stopped and we are in the GONE
+		 * state.
+		 */
+		if (inp == NULL) {
+			return;
+		}
+		tmr = &inp->sctp_ep.signature_change;
+		to_ticks = MSEC_TO_TICKS(SCTP_INP_KILL_TIMEOUT);
+		break;
+	case SCTP_TIMER_TYPE_PATHMTURAISE:
+		/*
+		 * Here we use the value found in the EP for PMTU ususually
+		 * about 10 minutes.
+		 */
+		if ((stcb == NULL) || (inp == NULL)) {
+			return;
+		}
+		if (net == NULL) {
+			return;
+		}
+		to_ticks = inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_PMTU];
+		tmr = &net->pmtu_timer;
+		break;
+	case SCTP_TIMER_TYPE_SHUTDOWNACK:
+		/* Here we use the RTO of the destination */
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		if (net->RTO == 0) {
+			to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+		} else {
+			to_ticks = MSEC_TO_TICKS(net->RTO);
+		}
+		tmr = &net->rxt_timer;
+		break;
+	case SCTP_TIMER_TYPE_SHUTDOWNGUARD:
+		/*
+		 * Here we use the endpoints shutdown guard timer usually
+		 * about 3 minutes.
+		 */
+		if ((inp == NULL) || (stcb == NULL)) {
+			return;
+		}
+		to_ticks = inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_MAXSHUTDOWN];
+		tmr = &stcb->asoc.shut_guard_timer;
+		break;
+	case SCTP_TIMER_TYPE_STRRESET:
+		/*
+		 * Here the timer comes from the stcb but its value is from
+		 * the net's RTO.
+		 */
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		if (net->RTO == 0) {
+			to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+		} else {
+			to_ticks = MSEC_TO_TICKS(net->RTO);
+		}
+		tmr = &stcb->asoc.strreset_timer;
+		break;
+
+	case SCTP_TIMER_TYPE_EARLYFR:
+		{
+			unsigned int msec;
+
+			if ((stcb == NULL) || (net == NULL)) {
+				return;
+			}
+			if (net->flight_size > net->cwnd) {
+				/* no need to start */
+				return;
+			}
+			SCTP_STAT_INCR(sctps_earlyfrstart);
+			if (net->lastsa == 0) {
+				/* Hmm no rtt estimate yet? */
+				msec = stcb->asoc.initial_rto >> 2;
+			} else {
+				msec = ((net->lastsa >> 2) + net->lastsv) >> 1;
+			}
+			if (msec < SCTP_BASE_SYSCTL(sctp_early_fr_msec)) {
+				msec = SCTP_BASE_SYSCTL(sctp_early_fr_msec);
+				if (msec < SCTP_MINFR_MSEC_FLOOR) {
+					msec = SCTP_MINFR_MSEC_FLOOR;
+				}
+			}
+			to_ticks = MSEC_TO_TICKS(msec);
+			tmr = &net->fr_timer;
+		}
+		break;
+	case SCTP_TIMER_TYPE_ASCONF:
+		/*
+		 * Here the timer comes from the stcb but its value is from
+		 * the net's RTO.
+		 */
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		if (net->RTO == 0) {
+			to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+		} else {
+			to_ticks = MSEC_TO_TICKS(net->RTO);
+		}
+		tmr = &stcb->asoc.asconf_timer;
+		break;
+	case SCTP_TIMER_TYPE_PRIM_DELETED:
+		if ((stcb == NULL) || (net != NULL)) {
+			return;
+		}
+		to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+		tmr = &stcb->asoc.delete_prim_timer;
+		break;
+	case SCTP_TIMER_TYPE_AUTOCLOSE:
+		if (stcb == NULL) {
+			return;
+		}
+		if (stcb->asoc.sctp_autoclose_ticks == 0) {
+			/*
+			 * Really an error since stcb is NOT set to
+			 * autoclose
+			 */
+			return;
+		}
+		to_ticks = stcb->asoc.sctp_autoclose_ticks;
+		tmr = &stcb->asoc.autoclose_timer;
+		break;
+	default:
+		SCTPDBG(SCTP_DEBUG_TIMER1, "%s: Unknown timer type %d\n",
+		    __FUNCTION__, t_type);
+		return;
+		break;
+	};
+	if ((to_ticks <= 0) || (tmr == NULL)) {
+		SCTPDBG(SCTP_DEBUG_TIMER1, "%s: %d:software error to_ticks:%d tmr:%p not set ??\n",
+		    __FUNCTION__, t_type, to_ticks, tmr);
+		return;
+	}
+	if (SCTP_OS_TIMER_PENDING(&tmr->timer)) {
+		/*
+		 * we do NOT allow you to have it already running. if it is
+		 * we leave the current one up unchanged
+		 */
+		return;
+	}
+	/* At this point we can proceed */
+	if (t_type == SCTP_TIMER_TYPE_SEND) {
+		stcb->asoc.num_send_timers_up++;
+	}
+	tmr->stopped_from = 0;
+	tmr->type = t_type;
+	tmr->ep = (void *)inp;
+	tmr->tcb = (void *)stcb;
+	tmr->net = (void *)net;
+	tmr->self = (void *)tmr;
+	tmr->vnet = (void *)curvnet;
+	tmr->ticks = sctp_get_tick_count();
+	(void)SCTP_OS_TIMER_START(&tmr->timer, to_ticks, sctp_timeout_handler, tmr);
+	return;
+}
+
+void
+sctp_timer_stop(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net, uint32_t from)
+{
+	struct sctp_timer *tmr;
+
+	if ((t_type != SCTP_TIMER_TYPE_ADDR_WQ) &&
+	    (inp == NULL))
+		return;
+
+	tmr = NULL;
+	if (stcb) {
+		SCTP_TCB_LOCK_ASSERT(stcb);
+	}
+	switch (t_type) {
+	case SCTP_TIMER_TYPE_ZERO_COPY:
+		tmr = &inp->sctp_ep.zero_copy_timer;
+		break;
+	case SCTP_TIMER_TYPE_ZCOPY_SENDQ:
+		tmr = &inp->sctp_ep.zero_copy_sendq_timer;
+		break;
+	case SCTP_TIMER_TYPE_ADDR_WQ:
+		tmr = &SCTP_BASE_INFO(addr_wq_timer);
+		break;
+	case SCTP_TIMER_TYPE_EARLYFR:
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		tmr = &net->fr_timer;
+		SCTP_STAT_INCR(sctps_earlyfrstop);
+		break;
+	case SCTP_TIMER_TYPE_SEND:
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		tmr = &net->rxt_timer;
+		break;
+	case SCTP_TIMER_TYPE_INIT:
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		tmr = &net->rxt_timer;
+		break;
+	case SCTP_TIMER_TYPE_RECV:
+		if (stcb == NULL) {
+			return;
+		}
+		tmr = &stcb->asoc.dack_timer;
+		break;
+	case SCTP_TIMER_TYPE_SHUTDOWN:
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		tmr = &net->rxt_timer;
+		break;
+	case SCTP_TIMER_TYPE_HEARTBEAT:
+		if (stcb == NULL) {
+			return;
+		}
+		tmr = &stcb->asoc.hb_timer;
+		break;
+	case SCTP_TIMER_TYPE_COOKIE:
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		tmr = &net->rxt_timer;
+		break;
+	case SCTP_TIMER_TYPE_NEWCOOKIE:
+		/* nothing needed but the endpoint here */
+		tmr = &inp->sctp_ep.signature_change;
+		/*
+		 * We re-use the newcookie timer for the INP kill timer. We
+		 * must assure that we do not kill it by accident.
+		 */
+		break;
+	case SCTP_TIMER_TYPE_ASOCKILL:
+		/*
+		 * Stop the asoc kill timer.
+		 */
+		if (stcb == NULL) {
+			return;
+		}
+		tmr = &stcb->asoc.strreset_timer;
+		break;
+
+	case SCTP_TIMER_TYPE_INPKILL:
+		/*
+		 * The inp is setup to die. We re-use the signature_chage
+		 * timer since that has stopped and we are in the GONE
+		 * state.
+		 */
+		tmr = &inp->sctp_ep.signature_change;
+		break;
+	case SCTP_TIMER_TYPE_PATHMTURAISE:
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		tmr = &net->pmtu_timer;
+		break;
+	case SCTP_TIMER_TYPE_SHUTDOWNACK:
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		tmr = &net->rxt_timer;
+		break;
+	case SCTP_TIMER_TYPE_SHUTDOWNGUARD:
+		if (stcb == NULL) {
+			return;
+		}
+		tmr = &stcb->asoc.shut_guard_timer;
+		break;
+	case SCTP_TIMER_TYPE_STRRESET:
+		if (stcb == NULL) {
+			return;
+		}
+		tmr = &stcb->asoc.strreset_timer;
+		break;
+	case SCTP_TIMER_TYPE_ASCONF:
+		if (stcb == NULL) {
+			return;
+		}
+		tmr = &stcb->asoc.asconf_timer;
+		break;
+	case SCTP_TIMER_TYPE_PRIM_DELETED:
+		if (stcb == NULL) {
+			return;
+		}
+		tmr = &stcb->asoc.delete_prim_timer;
+		break;
+	case SCTP_TIMER_TYPE_AUTOCLOSE:
+		if (stcb == NULL) {
+			return;
+		}
+		tmr = &stcb->asoc.autoclose_timer;
+		break;
+	default:
+		SCTPDBG(SCTP_DEBUG_TIMER1, "%s: Unknown timer type %d\n",
+		    __FUNCTION__, t_type);
+		break;
+	};
+	if (tmr == NULL) {
+		return;
+	}
+	if ((tmr->type != t_type) && tmr->type) {
+		/*
+		 * Ok we have a timer that is under joint use. Cookie timer
+		 * per chance with the SEND timer. We therefore are NOT
+		 * running the timer that the caller wants stopped.  So just
+		 * return.
+		 */
+		return;
+	}
+	if ((t_type == SCTP_TIMER_TYPE_SEND) && (stcb != NULL)) {
+		stcb->asoc.num_send_timers_up--;
+		if (stcb->asoc.num_send_timers_up < 0) {
+			stcb->asoc.num_send_timers_up = 0;
+		}
+	}
+	tmr->self = NULL;
+	tmr->stopped_from = from;
+	(void)SCTP_OS_TIMER_STOP(&tmr->timer);
+	return;
+}
+
+uint32_t
+sctp_calculate_len(struct mbuf *m)
+{
+	uint32_t tlen = 0;
+	struct mbuf *at;
+
+	at = m;
+	while (at) {
+		tlen += SCTP_BUF_LEN(at);
+		at = SCTP_BUF_NEXT(at);
+	}
+	return (tlen);
+}
+
+void
+sctp_mtu_size_reset(struct sctp_inpcb *inp,
+    struct sctp_association *asoc, uint32_t mtu)
+{
+	/*
+	 * Reset the P-MTU size on this association, this involves changing
+	 * the asoc MTU, going through ANY chunk+overhead larger than mtu to
+	 * allow the DF flag to be cleared.
+	 */
+	struct sctp_tmit_chunk *chk;
+	unsigned int eff_mtu, ovh;
+
+	asoc->smallest_mtu = mtu;
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+		ovh = SCTP_MIN_OVERHEAD;
+	} else {
+		ovh = SCTP_MIN_V4_OVERHEAD;
+	}
+	eff_mtu = mtu - ovh;
+	TAILQ_FOREACH(chk, &asoc->send_queue, sctp_next) {
+		if (chk->send_size > eff_mtu) {
+			chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
+		}
+	}
+	TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
+		if (chk->send_size > eff_mtu) {
+			chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
+		}
+	}
+}
+
+
+/*
+ * given an association and starting time of the current RTT period return
+ * RTO in number of msecs net should point to the current network
+ */
+uint32_t
+sctp_calculate_rto(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    struct sctp_nets *net,
+    struct timeval *told,
+    int safe)
+{
+	/*-
+	 * given an association and the starting time of the current RTT
+	 * period (in value1/value2) return RTO in number of msecs.
+	 */
+	int calc_time = 0;
+	int o_calctime;
+	uint32_t new_rto = 0;
+	int first_measure = 0;
+	struct timeval now, then, *old;
+
+	/* Copy it out for sparc64 */
+	if (safe == sctp_align_unsafe_makecopy) {
+		old = &then;
+		memcpy(&then, told, sizeof(struct timeval));
+	} else if (safe == sctp_align_safe_nocopy) {
+		old = told;
+	} else {
+		/* error */
+		SCTP_PRINTF("Huh, bad rto calc call\n");
+		return (0);
+	}
+	/************************/
+	/* 1. calculate new RTT */
+	/************************/
+	/* get the current time */
+	(void)SCTP_GETTIME_TIMEVAL(&now);
+	/* compute the RTT value */
+	if ((u_long)now.tv_sec > (u_long)old->tv_sec) {
+		calc_time = ((u_long)now.tv_sec - (u_long)old->tv_sec) * 1000;
+		if ((u_long)now.tv_usec > (u_long)old->tv_usec) {
+			calc_time += (((u_long)now.tv_usec -
+			    (u_long)old->tv_usec) / 1000);
+		} else if ((u_long)now.tv_usec < (u_long)old->tv_usec) {
+			/* Borrow 1,000ms from current calculation */
+			calc_time -= 1000;
+			/* Add in the slop over */
+			calc_time += ((int)now.tv_usec / 1000);
+			/* Add in the pre-second ms's */
+			calc_time += (((int)1000000 - (int)old->tv_usec) / 1000);
+		}
+	} else if ((u_long)now.tv_sec == (u_long)old->tv_sec) {
+		if ((u_long)now.tv_usec > (u_long)old->tv_usec) {
+			calc_time = ((u_long)now.tv_usec -
+			    (u_long)old->tv_usec) / 1000;
+		} else if ((u_long)now.tv_usec < (u_long)old->tv_usec) {
+			/* impossible .. garbage in nothing out */
+			goto calc_rto;
+		} else if ((u_long)now.tv_usec == (u_long)old->tv_usec) {
+			/*
+			 * We have to have 1 usec :-D this must be the
+			 * loopback.
+			 */
+			calc_time = 1;
+		} else {
+			/* impossible .. garbage in nothing out */
+			goto calc_rto;
+		}
+	} else {
+		/* Clock wrapped? */
+		goto calc_rto;
+	}
+	/***************************/
+	/* 2. update RTTVAR & SRTT */
+	/***************************/
+	net->rtt = o_calctime = calc_time;
+	/* this is Van Jacobson's integer version */
+	if (net->RTO_measured) {
+		calc_time -= (net->lastsa >> SCTP_RTT_SHIFT);	/* take away 1/8th when
+								 * shift=3 */
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_RTTVAR_LOGGING_ENABLE) {
+			rto_logging(net, SCTP_LOG_RTTVAR);
+		}
+		net->prev_rtt = o_calctime;
+		net->lastsa += calc_time;	/* add 7/8th into sa when
+						 * shift=3 */
+		if (calc_time < 0) {
+			calc_time = -calc_time;
+		}
+		calc_time -= (net->lastsv >> SCTP_RTT_VAR_SHIFT);	/* take away 1/4 when
+									 * VAR shift=2 */
+		net->lastsv += calc_time;
+		if (net->lastsv == 0) {
+			net->lastsv = SCTP_CLOCK_GRANULARITY;
+		}
+	} else {
+		/* First RTO measurment */
+		net->RTO_measured = 1;
+		net->lastsa = calc_time << SCTP_RTT_SHIFT;	/* Multiply by 8 when
+								 * shift=3 */
+		net->lastsv = calc_time;
+		if (net->lastsv == 0) {
+			net->lastsv = SCTP_CLOCK_GRANULARITY;
+		}
+		first_measure = 1;
+		net->prev_rtt = o_calctime;
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_RTTVAR_LOGGING_ENABLE) {
+			rto_logging(net, SCTP_LOG_INITIAL_RTT);
+		}
+	}
+calc_rto:
+	new_rto = (net->lastsa >> SCTP_RTT_SHIFT) + net->lastsv;
+	if ((new_rto > SCTP_SAT_NETWORK_MIN) &&
+	    (stcb->asoc.sat_network_lockout == 0)) {
+		stcb->asoc.sat_network = 1;
+	} else if ((!first_measure) && stcb->asoc.sat_network) {
+		stcb->asoc.sat_network = 0;
+		stcb->asoc.sat_network_lockout = 1;
+	}
+	/* bound it, per C6/C7 in Section 5.3.1 */
+	if (new_rto < stcb->asoc.minrto) {
+		new_rto = stcb->asoc.minrto;
+	}
+	if (new_rto > stcb->asoc.maxrto) {
+		new_rto = stcb->asoc.maxrto;
+	}
+	/* we are now returning the RTO */
+	return (new_rto);
+}
+
+/*
+ * return a pointer to a contiguous piece of data from the given mbuf chain
+ * starting at 'off' for 'len' bytes.  If the desired piece spans more than
+ * one mbuf, a copy is made at 'ptr'. caller must ensure that the buffer size
+ * is >= 'len' returns NULL if there there isn't 'len' bytes in the chain.
+ */
+caddr_t
+sctp_m_getptr(struct mbuf *m, int off, int len, uint8_t * in_ptr)
+{
+	uint32_t count;
+	uint8_t *ptr;
+
+	ptr = in_ptr;
+	if ((off < 0) || (len <= 0))
+		return (NULL);
+
+	/* find the desired start location */
+	while ((m != NULL) && (off > 0)) {
+		if (off < SCTP_BUF_LEN(m))
+			break;
+		off -= SCTP_BUF_LEN(m);
+		m = SCTP_BUF_NEXT(m);
+	}
+	if (m == NULL)
+		return (NULL);
+
+	/* is the current mbuf large enough (eg. contiguous)? */
+	if ((SCTP_BUF_LEN(m) - off) >= len) {
+		return (mtod(m, caddr_t)+off);
+	} else {
+		/* else, it spans more than one mbuf, so save a temp copy... */
+		while ((m != NULL) && (len > 0)) {
+			count = min(SCTP_BUF_LEN(m) - off, len);
+			bcopy(mtod(m, caddr_t)+off, ptr, count);
+			len -= count;
+			ptr += count;
+			off = 0;
+			m = SCTP_BUF_NEXT(m);
+		}
+		if ((m == NULL) && (len > 0))
+			return (NULL);
+		else
+			return ((caddr_t)in_ptr);
+	}
+}
+
+
+
+struct sctp_paramhdr *
+sctp_get_next_param(struct mbuf *m,
+    int offset,
+    struct sctp_paramhdr *pull,
+    int pull_limit)
+{
+	/* This just provides a typed signature to Peter's Pull routine */
+	return ((struct sctp_paramhdr *)sctp_m_getptr(m, offset, pull_limit,
+	    (uint8_t *) pull));
+}
+
+
+int
+sctp_add_pad_tombuf(struct mbuf *m, int padlen)
+{
+	/*
+	 * add padlen bytes of 0 filled padding to the end of the mbuf. If
+	 * padlen is > 3 this routine will fail.
+	 */
+	uint8_t *dp;
+	int i;
+
+	if (padlen > 3) {
+		SCTP_LTRACE_ERR_RET_PKT(m, NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
+		return (ENOBUFS);
+	}
+	if (padlen <= M_TRAILINGSPACE(m)) {
+		/*
+		 * The easy way. We hope the majority of the time we hit
+		 * here :)
+		 */
+		dp = (uint8_t *) (mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+		SCTP_BUF_LEN(m) += padlen;
+	} else {
+		/* Hard way we must grow the mbuf */
+		struct mbuf *tmp;
+
+		tmp = sctp_get_mbuf_for_msg(padlen, 0, M_DONTWAIT, 1, MT_DATA);
+		if (tmp == NULL) {
+			/* Out of space GAK! we are in big trouble. */
+			SCTP_LTRACE_ERR_RET_PKT(m, NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+			return (ENOSPC);
+		}
+		/* setup and insert in middle */
+		SCTP_BUF_LEN(tmp) = padlen;
+		SCTP_BUF_NEXT(tmp) = NULL;
+		SCTP_BUF_NEXT(m) = tmp;
+		dp = mtod(tmp, uint8_t *);
+	}
+	/* zero out the pad */
+	for (i = 0; i < padlen; i++) {
+		*dp = 0;
+		dp++;
+	}
+	return (0);
+}
+
+int
+sctp_pad_lastmbuf(struct mbuf *m, int padval, struct mbuf *last_mbuf)
+{
+	/* find the last mbuf in chain and pad it */
+	struct mbuf *m_at;
+
+	m_at = m;
+	if (last_mbuf) {
+		return (sctp_add_pad_tombuf(last_mbuf, padval));
+	} else {
+		while (m_at) {
+			if (SCTP_BUF_NEXT(m_at) == NULL) {
+				return (sctp_add_pad_tombuf(m_at, padval));
+			}
+			m_at = SCTP_BUF_NEXT(m_at);
+		}
+	}
+	SCTP_LTRACE_ERR_RET_PKT(m, NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, EFAULT);
+	return (EFAULT);
+}
+
+static void
+sctp_notify_assoc_change(uint32_t event, struct sctp_tcb *stcb,
+    uint32_t error, void *data, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	struct mbuf *m_notify;
+	struct sctp_assoc_change *sac;
+	struct sctp_queued_to_read *control;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+#endif
+
+	/*
+	 * For TCP model AND UDP connected sockets we will send an error up
+	 * when an ABORT comes in.
+	 */
+	if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
+	    ((event == SCTP_COMM_LOST) || (event == SCTP_CANT_STR_ASSOC))) {
+		if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) {
+			SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNREFUSED);
+			stcb->sctp_socket->so_error = ECONNREFUSED;
+		} else {
+			SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNRESET);
+			stcb->sctp_socket->so_error = ECONNRESET;
+		}
+		/* Wake ANY sleepers */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		so = SCTP_INP_SO(stcb->sctp_ep);
+		if (!so_locked) {
+			atomic_add_int(&stcb->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_SOCKET_LOCK(so, 1);
+			SCTP_TCB_LOCK(stcb);
+			atomic_subtract_int(&stcb->asoc.refcnt, 1);
+			if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+				SCTP_SOCKET_UNLOCK(so, 1);
+				return;
+			}
+		}
+#endif
+		socantrcvmore(stcb->sctp_socket);
+		sorwakeup(stcb->sctp_socket);
+		sowwakeup(stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		if (!so_locked) {
+			SCTP_SOCKET_UNLOCK(so, 1);
+		}
+#endif
+	}
+	if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVASSOCEVNT)) {
+		/* event not enabled */
+		return;
+	}
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_assoc_change), 0, M_DONTWAIT, 1, MT_DATA);
+	if (m_notify == NULL)
+		/* no space left */
+		return;
+	SCTP_BUF_LEN(m_notify) = 0;
+
+	sac = mtod(m_notify, struct sctp_assoc_change *);
+	sac->sac_type = SCTP_ASSOC_CHANGE;
+	sac->sac_flags = 0;
+	sac->sac_length = sizeof(struct sctp_assoc_change);
+	sac->sac_state = event;
+	sac->sac_error = error;
+	/* XXX verify these stream counts */
+	sac->sac_outbound_streams = stcb->asoc.streamoutcnt;
+	sac->sac_inbound_streams = stcb->asoc.streamincnt;
+	sac->sac_assoc_id = sctp_get_associd(stcb);
+	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_assoc_change);
+	SCTP_BUF_NEXT(m_notify) = NULL;
+	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+	    0, 0, 0, 0, 0, 0,
+	    m_notify);
+	if (control == NULL) {
+		/* no memory */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	control->length = SCTP_BUF_LEN(m_notify);
+	/* not that we need this */
+	control->tail_mbuf = m_notify;
+	control->spec_flags = M_NOTIFICATION;
+	sctp_add_to_readq(stcb->sctp_ep, stcb,
+	    control,
+	    &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD,
+	    so_locked);
+	if (event == SCTP_COMM_LOST) {
+		/* Wake up any sleeper */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		so = SCTP_INP_SO(stcb->sctp_ep);
+		if (!so_locked) {
+			atomic_add_int(&stcb->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_SOCKET_LOCK(so, 1);
+			SCTP_TCB_LOCK(stcb);
+			atomic_subtract_int(&stcb->asoc.refcnt, 1);
+			if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+				SCTP_SOCKET_UNLOCK(so, 1);
+				return;
+			}
+		}
+#endif
+		sctp_sowwakeup(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		if (!so_locked) {
+			SCTP_SOCKET_UNLOCK(so, 1);
+		}
+#endif
+	}
+}
+
+static void
+sctp_notify_peer_addr_change(struct sctp_tcb *stcb, uint32_t state,
+    struct sockaddr *sa, uint32_t error)
+{
+	struct mbuf *m_notify;
+	struct sctp_paddr_change *spc;
+	struct sctp_queued_to_read *control;
+
+	if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVPADDREVNT)) {
+		/* event not enabled */
+		return;
+	}
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_paddr_change), 0, M_DONTWAIT, 1, MT_DATA);
+	if (m_notify == NULL)
+		return;
+	SCTP_BUF_LEN(m_notify) = 0;
+	spc = mtod(m_notify, struct sctp_paddr_change *);
+	spc->spc_type = SCTP_PEER_ADDR_CHANGE;
+	spc->spc_flags = 0;
+	spc->spc_length = sizeof(struct sctp_paddr_change);
+	switch (sa->sa_family) {
+	case AF_INET:
+		memcpy(&spc->spc_aaddr, sa, sizeof(struct sockaddr_in));
+		break;
+#ifdef INET6
+	case AF_INET6:
+		{
+			struct sockaddr_in6 *sin6;
+
+			memcpy(&spc->spc_aaddr, sa, sizeof(struct sockaddr_in6));
+
+			sin6 = (struct sockaddr_in6 *)&spc->spc_aaddr;
+			if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) {
+				if (sin6->sin6_scope_id == 0) {
+					/* recover scope_id for user */
+					(void)sa6_recoverscope(sin6);
+				} else {
+					/* clear embedded scope_id for user */
+					in6_clearscope(&sin6->sin6_addr);
+				}
+			}
+			break;
+		}
+#endif
+	default:
+		/* TSNH */
+		break;
+	}
+	spc->spc_state = state;
+	spc->spc_error = error;
+	spc->spc_assoc_id = sctp_get_associd(stcb);
+
+	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_paddr_change);
+	SCTP_BUF_NEXT(m_notify) = NULL;
+
+	/* append to socket */
+	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+	    0, 0, 0, 0, 0, 0,
+	    m_notify);
+	if (control == NULL) {
+		/* no memory */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	control->length = SCTP_BUF_LEN(m_notify);
+	control->spec_flags = M_NOTIFICATION;
+	/* not that we need this */
+	control->tail_mbuf = m_notify;
+	sctp_add_to_readq(stcb->sctp_ep, stcb,
+	    control,
+	    &stcb->sctp_socket->so_rcv, 1,
+	    SCTP_READ_LOCK_NOT_HELD,
+	    SCTP_SO_NOT_LOCKED);
+}
+
+
+static void
+sctp_notify_send_failed(struct sctp_tcb *stcb, uint32_t error,
+    struct sctp_tmit_chunk *chk, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	struct mbuf *m_notify;
+	struct sctp_send_failed *ssf;
+	struct sctp_queued_to_read *control;
+	int length;
+
+	if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVSENDFAILEVNT)) {
+		/* event not enabled */
+		return;
+	}
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_send_failed), 0, M_DONTWAIT, 1, MT_DATA);
+	if (m_notify == NULL)
+		/* no space left */
+		return;
+	length = sizeof(struct sctp_send_failed) + chk->send_size;
+	length -= sizeof(struct sctp_data_chunk);
+	SCTP_BUF_LEN(m_notify) = 0;
+	ssf = mtod(m_notify, struct sctp_send_failed *);
+	ssf->ssf_type = SCTP_SEND_FAILED;
+	if (error == SCTP_NOTIFY_DATAGRAM_UNSENT)
+		ssf->ssf_flags = SCTP_DATA_UNSENT;
+	else
+		ssf->ssf_flags = SCTP_DATA_SENT;
+	ssf->ssf_length = length;
+	ssf->ssf_error = error;
+	/* not exactly what the user sent in, but should be close :) */
+	bzero(&ssf->ssf_info, sizeof(ssf->ssf_info));
+	ssf->ssf_info.sinfo_stream = chk->rec.data.stream_number;
+	ssf->ssf_info.sinfo_ssn = chk->rec.data.stream_seq;
+	ssf->ssf_info.sinfo_flags = chk->rec.data.rcv_flags;
+	ssf->ssf_info.sinfo_ppid = chk->rec.data.payloadtype;
+	ssf->ssf_info.sinfo_context = chk->rec.data.context;
+	ssf->ssf_info.sinfo_assoc_id = sctp_get_associd(stcb);
+	ssf->ssf_assoc_id = sctp_get_associd(stcb);
+
+	if (chk->data) {
+		/*
+		 * trim off the sctp chunk header(it should be there)
+		 */
+		if (chk->send_size >= sizeof(struct sctp_data_chunk)) {
+			m_adj(chk->data, sizeof(struct sctp_data_chunk));
+			sctp_mbuf_crush(chk->data);
+			chk->send_size -= sizeof(struct sctp_data_chunk);
+		}
+	}
+	SCTP_BUF_NEXT(m_notify) = chk->data;
+	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed);
+	/* Steal off the mbuf */
+	chk->data = NULL;
+	/*
+	 * For this case, we check the actual socket buffer, since the assoc
+	 * is going away we don't want to overfill the socket buffer for a
+	 * non-reader
+	 */
+	if (sctp_sbspace_failedmsgs(&stcb->sctp_socket->so_rcv) < SCTP_BUF_LEN(m_notify)) {
+		sctp_m_freem(m_notify);
+		return;
+	}
+	/* append to socket */
+	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+	    0, 0, 0, 0, 0, 0,
+	    m_notify);
+	if (control == NULL) {
+		/* no memory */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	control->spec_flags = M_NOTIFICATION;
+	sctp_add_to_readq(stcb->sctp_ep, stcb,
+	    control,
+	    &stcb->sctp_socket->so_rcv, 1,
+	    SCTP_READ_LOCK_NOT_HELD,
+	    so_locked);
+}
+
+
+static void
+sctp_notify_send_failed2(struct sctp_tcb *stcb, uint32_t error,
+    struct sctp_stream_queue_pending *sp, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	struct mbuf *m_notify;
+	struct sctp_send_failed *ssf;
+	struct sctp_queued_to_read *control;
+	int length;
+
+	if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVSENDFAILEVNT)) {
+		/* event not enabled */
+		return;
+	}
+	length = sizeof(struct sctp_send_failed) + sp->length;
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_send_failed), 0, M_DONTWAIT, 1, MT_DATA);
+	if (m_notify == NULL)
+		/* no space left */
+		return;
+	SCTP_BUF_LEN(m_notify) = 0;
+	ssf = mtod(m_notify, struct sctp_send_failed *);
+	ssf->ssf_type = SCTP_SEND_FAILED;
+	if (error == SCTP_NOTIFY_DATAGRAM_UNSENT)
+		ssf->ssf_flags = SCTP_DATA_UNSENT;
+	else
+		ssf->ssf_flags = SCTP_DATA_SENT;
+	ssf->ssf_length = length;
+	ssf->ssf_error = error;
+	/* not exactly what the user sent in, but should be close :) */
+	bzero(&ssf->ssf_info, sizeof(ssf->ssf_info));
+	ssf->ssf_info.sinfo_stream = sp->stream;
+	ssf->ssf_info.sinfo_ssn = sp->strseq;
+	if (sp->some_taken) {
+		ssf->ssf_info.sinfo_flags = SCTP_DATA_LAST_FRAG;
+	} else {
+		ssf->ssf_info.sinfo_flags = SCTP_DATA_NOT_FRAG;
+	}
+	ssf->ssf_info.sinfo_ppid = sp->ppid;
+	ssf->ssf_info.sinfo_context = sp->context;
+	ssf->ssf_info.sinfo_assoc_id = sctp_get_associd(stcb);
+	ssf->ssf_assoc_id = sctp_get_associd(stcb);
+	SCTP_BUF_NEXT(m_notify) = sp->data;
+	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed);
+
+	/* Steal off the mbuf */
+	sp->data = NULL;
+	/*
+	 * For this case, we check the actual socket buffer, since the assoc
+	 * is going away we don't want to overfill the socket buffer for a
+	 * non-reader
+	 */
+	if (sctp_sbspace_failedmsgs(&stcb->sctp_socket->so_rcv) < SCTP_BUF_LEN(m_notify)) {
+		sctp_m_freem(m_notify);
+		return;
+	}
+	/* append to socket */
+	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+	    0, 0, 0, 0, 0, 0,
+	    m_notify);
+	if (control == NULL) {
+		/* no memory */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	control->spec_flags = M_NOTIFICATION;
+	sctp_add_to_readq(stcb->sctp_ep, stcb,
+	    control,
+	    &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, so_locked);
+}
+
+
+
+static void
+sctp_notify_adaptation_layer(struct sctp_tcb *stcb,
+    uint32_t error)
+{
+	struct mbuf *m_notify;
+	struct sctp_adaptation_event *sai;
+	struct sctp_queued_to_read *control;
+
+	if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_ADAPTATIONEVNT)) {
+		/* event not enabled */
+		return;
+	}
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_adaption_event), 0, M_DONTWAIT, 1, MT_DATA);
+	if (m_notify == NULL)
+		/* no space left */
+		return;
+	SCTP_BUF_LEN(m_notify) = 0;
+	sai = mtod(m_notify, struct sctp_adaptation_event *);
+	sai->sai_type = SCTP_ADAPTATION_INDICATION;
+	sai->sai_flags = 0;
+	sai->sai_length = sizeof(struct sctp_adaptation_event);
+	sai->sai_adaptation_ind = stcb->asoc.peers_adaptation;
+	sai->sai_assoc_id = sctp_get_associd(stcb);
+
+	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_adaptation_event);
+	SCTP_BUF_NEXT(m_notify) = NULL;
+
+	/* append to socket */
+	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+	    0, 0, 0, 0, 0, 0,
+	    m_notify);
+	if (control == NULL) {
+		/* no memory */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	control->length = SCTP_BUF_LEN(m_notify);
+	control->spec_flags = M_NOTIFICATION;
+	/* not that we need this */
+	control->tail_mbuf = m_notify;
+	sctp_add_to_readq(stcb->sctp_ep, stcb,
+	    control,
+	    &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
+}
+
+/* This always must be called with the read-queue LOCKED in the INP */
+static void
+sctp_notify_partial_delivery_indication(struct sctp_tcb *stcb, uint32_t error,
+    uint32_t val, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	struct mbuf *m_notify;
+	struct sctp_pdapi_event *pdapi;
+	struct sctp_queued_to_read *control;
+	struct sockbuf *sb;
+
+	if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_PDAPIEVNT)) {
+		/* event not enabled */
+		return;
+	}
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_CANT_READ) {
+		return;
+	}
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_pdapi_event), 0, M_DONTWAIT, 1, MT_DATA);
+	if (m_notify == NULL)
+		/* no space left */
+		return;
+	SCTP_BUF_LEN(m_notify) = 0;
+	pdapi = mtod(m_notify, struct sctp_pdapi_event *);
+	pdapi->pdapi_type = SCTP_PARTIAL_DELIVERY_EVENT;
+	pdapi->pdapi_flags = 0;
+	pdapi->pdapi_length = sizeof(struct sctp_pdapi_event);
+	pdapi->pdapi_indication = error;
+	pdapi->pdapi_stream = (val >> 16);
+	pdapi->pdapi_seq = (val & 0x0000ffff);
+	pdapi->pdapi_assoc_id = sctp_get_associd(stcb);
+
+	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_pdapi_event);
+	SCTP_BUF_NEXT(m_notify) = NULL;
+	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+	    0, 0, 0, 0, 0, 0,
+	    m_notify);
+	if (control == NULL) {
+		/* no memory */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	control->spec_flags = M_NOTIFICATION;
+	control->length = SCTP_BUF_LEN(m_notify);
+	/* not that we need this */
+	control->tail_mbuf = m_notify;
+	control->held_length = 0;
+	control->length = 0;
+	sb = &stcb->sctp_socket->so_rcv;
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
+		sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBALLOC, SCTP_BUF_LEN(m_notify));
+	}
+	sctp_sballoc(stcb, sb, m_notify);
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
+		sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
+	}
+	atomic_add_int(&control->length, SCTP_BUF_LEN(m_notify));
+	control->end_added = 1;
+	if (stcb->asoc.control_pdapi)
+		TAILQ_INSERT_AFTER(&stcb->sctp_ep->read_queue, stcb->asoc.control_pdapi, control, next);
+	else {
+		/* we really should not see this case */
+		TAILQ_INSERT_TAIL(&stcb->sctp_ep->read_queue, control, next);
+	}
+	if (stcb->sctp_ep && stcb->sctp_socket) {
+		/* This should always be the case */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		struct socket *so;
+
+		so = SCTP_INP_SO(stcb->sctp_ep);
+		if (!so_locked) {
+			atomic_add_int(&stcb->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_SOCKET_LOCK(so, 1);
+			SCTP_TCB_LOCK(stcb);
+			atomic_subtract_int(&stcb->asoc.refcnt, 1);
+			if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+				SCTP_SOCKET_UNLOCK(so, 1);
+				return;
+			}
+		}
+#endif
+		sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		if (!so_locked) {
+			SCTP_SOCKET_UNLOCK(so, 1);
+		}
+#endif
+	}
+}
+
+static void
+sctp_notify_shutdown_event(struct sctp_tcb *stcb)
+{
+	struct mbuf *m_notify;
+	struct sctp_shutdown_event *sse;
+	struct sctp_queued_to_read *control;
+
+	/*
+	 * For TCP model AND UDP connected sockets we will send an error up
+	 * when an SHUTDOWN completes
+	 */
+	if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+		/* mark socket closed for read/write and wakeup! */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		struct socket *so;
+
+		so = SCTP_INP_SO(stcb->sctp_ep);
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+			SCTP_SOCKET_UNLOCK(so, 1);
+			return;
+		}
+#endif
+		socantsendmore(stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+	}
+	if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT)) {
+		/* event not enabled */
+		return;
+	}
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_event), 0, M_DONTWAIT, 1, MT_DATA);
+	if (m_notify == NULL)
+		/* no space left */
+		return;
+	sse = mtod(m_notify, struct sctp_shutdown_event *);
+	sse->sse_type = SCTP_SHUTDOWN_EVENT;
+	sse->sse_flags = 0;
+	sse->sse_length = sizeof(struct sctp_shutdown_event);
+	sse->sse_assoc_id = sctp_get_associd(stcb);
+
+	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_shutdown_event);
+	SCTP_BUF_NEXT(m_notify) = NULL;
+
+	/* append to socket */
+	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+	    0, 0, 0, 0, 0, 0,
+	    m_notify);
+	if (control == NULL) {
+		/* no memory */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	control->spec_flags = M_NOTIFICATION;
+	control->length = SCTP_BUF_LEN(m_notify);
+	/* not that we need this */
+	control->tail_mbuf = m_notify;
+	sctp_add_to_readq(stcb->sctp_ep, stcb,
+	    control,
+	    &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
+}
+
+static void
+sctp_notify_sender_dry_event(struct sctp_tcb *stcb,
+    int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	struct mbuf *m_notify;
+	struct sctp_sender_dry_event *event;
+	struct sctp_queued_to_read *control;
+
+	if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_DRYEVNT)) {
+		/* event not enabled */
+		return;
+	}
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_sender_dry_event), 0, M_DONTWAIT, 1, MT_DATA);
+	if (m_notify == NULL) {
+		/* no space left */
+		return;
+	}
+	SCTP_BUF_LEN(m_notify) = 0;
+	event = mtod(m_notify, struct sctp_sender_dry_event *);
+	event->sender_dry_type = SCTP_SENDER_DRY_EVENT;
+	event->sender_dry_flags = 0;
+	event->sender_dry_length = sizeof(struct sctp_sender_dry_event);
+	event->sender_dry_assoc_id = sctp_get_associd(stcb);
+
+	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_sender_dry_event);
+	SCTP_BUF_NEXT(m_notify) = NULL;
+
+	/* append to socket */
+	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+	    0, 0, 0, 0, 0, 0, m_notify);
+	if (control == NULL) {
+		/* no memory */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	control->length = SCTP_BUF_LEN(m_notify);
+	control->spec_flags = M_NOTIFICATION;
+	/* not that we need this */
+	control->tail_mbuf = m_notify;
+	sctp_add_to_readq(stcb->sctp_ep, stcb, control,
+	    &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, so_locked);
+}
+
+
+static void
+sctp_notify_stream_reset_add(struct sctp_tcb *stcb, int number_entries, int flag)
+{
+	struct mbuf *m_notify;
+	struct sctp_queued_to_read *control;
+	struct sctp_stream_reset_event *strreset;
+	int len;
+
+	if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_STREAM_RESETEVNT)) {
+		/* event not enabled */
+		return;
+	}
+	m_notify = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	if (m_notify == NULL)
+		/* no space left */
+		return;
+	SCTP_BUF_LEN(m_notify) = 0;
+	len = sizeof(struct sctp_stream_reset_event) + (number_entries * sizeof(uint16_t));
+	if (len > M_TRAILINGSPACE(m_notify)) {
+		/* never enough room */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	strreset = mtod(m_notify, struct sctp_stream_reset_event *);
+	strreset->strreset_type = SCTP_STREAM_RESET_EVENT;
+	strreset->strreset_flags = SCTP_STRRESET_ADD_STREAM | flag;
+	strreset->strreset_length = len;
+	strreset->strreset_assoc_id = sctp_get_associd(stcb);
+	strreset->strreset_list[0] = number_entries;
+
+	SCTP_BUF_LEN(m_notify) = len;
+	SCTP_BUF_NEXT(m_notify) = NULL;
+	if (sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv) < SCTP_BUF_LEN(m_notify)) {
+		/* no space */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	/* append to socket */
+	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+	    0, 0, 0, 0, 0, 0,
+	    m_notify);
+	if (control == NULL) {
+		/* no memory */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	control->spec_flags = M_NOTIFICATION;
+	control->length = SCTP_BUF_LEN(m_notify);
+	/* not that we need this */
+	control->tail_mbuf = m_notify;
+	sctp_add_to_readq(stcb->sctp_ep, stcb,
+	    control,
+	    &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
+}
+
+
+static void
+sctp_notify_stream_reset(struct sctp_tcb *stcb,
+    int number_entries, uint16_t * list, int flag)
+{
+	struct mbuf *m_notify;
+	struct sctp_queued_to_read *control;
+	struct sctp_stream_reset_event *strreset;
+	int len;
+
+	if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_STREAM_RESETEVNT)) {
+		/* event not enabled */
+		return;
+	}
+	m_notify = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	if (m_notify == NULL)
+		/* no space left */
+		return;
+	SCTP_BUF_LEN(m_notify) = 0;
+	len = sizeof(struct sctp_stream_reset_event) + (number_entries * sizeof(uint16_t));
+	if (len > M_TRAILINGSPACE(m_notify)) {
+		/* never enough room */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	strreset = mtod(m_notify, struct sctp_stream_reset_event *);
+	strreset->strreset_type = SCTP_STREAM_RESET_EVENT;
+	if (number_entries == 0) {
+		strreset->strreset_flags = flag | SCTP_STRRESET_ALL_STREAMS;
+	} else {
+		strreset->strreset_flags = flag | SCTP_STRRESET_STREAM_LIST;
+	}
+	strreset->strreset_length = len;
+	strreset->strreset_assoc_id = sctp_get_associd(stcb);
+	if (number_entries) {
+		int i;
+
+		for (i = 0; i < number_entries; i++) {
+			strreset->strreset_list[i] = ntohs(list[i]);
+		}
+	}
+	SCTP_BUF_LEN(m_notify) = len;
+	SCTP_BUF_NEXT(m_notify) = NULL;
+	if (sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv) < SCTP_BUF_LEN(m_notify)) {
+		/* no space */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	/* append to socket */
+	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+	    0, 0, 0, 0, 0, 0,
+	    m_notify);
+	if (control == NULL) {
+		/* no memory */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	control->spec_flags = M_NOTIFICATION;
+	control->length = SCTP_BUF_LEN(m_notify);
+	/* not that we need this */
+	control->tail_mbuf = m_notify;
+	sctp_add_to_readq(stcb->sctp_ep, stcb,
+	    control,
+	    &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
+}
+
+
+void
+sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb,
+    uint32_t error, void *data, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	if ((stcb == NULL) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+	    (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)) {
+		/* If the socket is gone we are out of here */
+		return;
+	}
+	if (stcb->sctp_socket->so_rcv.sb_state & SBS_CANTRCVMORE) {
+		return;
+	}
+	if (stcb && ((stcb->asoc.state & SCTP_STATE_COOKIE_WAIT) ||
+	    (stcb->asoc.state & SCTP_STATE_COOKIE_ECHOED))) {
+		if ((notification == SCTP_NOTIFY_INTERFACE_DOWN) ||
+		    (notification == SCTP_NOTIFY_INTERFACE_UP) ||
+		    (notification == SCTP_NOTIFY_INTERFACE_CONFIRMED)) {
+			/* Don't report these in front states */
+			return;
+		}
+	}
+	switch (notification) {
+	case SCTP_NOTIFY_ASSOC_UP:
+		if (stcb->asoc.assoc_up_sent == 0) {
+			sctp_notify_assoc_change(SCTP_COMM_UP, stcb, error, NULL, so_locked);
+			stcb->asoc.assoc_up_sent = 1;
+		}
+		if (stcb->asoc.adaptation_needed && (stcb->asoc.adaptation_sent == 0)) {
+			sctp_notify_adaptation_layer(stcb, error);
+		}
+		if (stcb->asoc.peer_supports_auth == 0) {
+			sctp_ulp_notify(SCTP_NOTIFY_NO_PEER_AUTH, stcb, 0,
+			    NULL, so_locked);
+		}
+		break;
+	case SCTP_NOTIFY_ASSOC_DOWN:
+		sctp_notify_assoc_change(SCTP_SHUTDOWN_COMP, stcb, error, NULL, so_locked);
+		break;
+	case SCTP_NOTIFY_INTERFACE_DOWN:
+		{
+			struct sctp_nets *net;
+
+			net = (struct sctp_nets *)data;
+			sctp_notify_peer_addr_change(stcb, SCTP_ADDR_UNREACHABLE,
+			    (struct sockaddr *)&net->ro._l_addr, error);
+			break;
+		}
+	case SCTP_NOTIFY_INTERFACE_UP:
+		{
+			struct sctp_nets *net;
+
+			net = (struct sctp_nets *)data;
+			sctp_notify_peer_addr_change(stcb, SCTP_ADDR_AVAILABLE,
+			    (struct sockaddr *)&net->ro._l_addr, error);
+			break;
+		}
+	case SCTP_NOTIFY_INTERFACE_CONFIRMED:
+		{
+			struct sctp_nets *net;
+
+			net = (struct sctp_nets *)data;
+			sctp_notify_peer_addr_change(stcb, SCTP_ADDR_CONFIRMED,
+			    (struct sockaddr *)&net->ro._l_addr, error);
+			break;
+		}
+	case SCTP_NOTIFY_SPECIAL_SP_FAIL:
+		sctp_notify_send_failed2(stcb, error,
+		    (struct sctp_stream_queue_pending *)data, so_locked);
+		break;
+	case SCTP_NOTIFY_DG_FAIL:
+		sctp_notify_send_failed(stcb, error,
+		    (struct sctp_tmit_chunk *)data, so_locked);
+		break;
+	case SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION:
+		{
+			uint32_t val;
+
+			val = *((uint32_t *) data);
+
+			sctp_notify_partial_delivery_indication(stcb, error, val, so_locked);
+			break;
+		}
+	case SCTP_NOTIFY_STRDATA_ERR:
+		break;
+	case SCTP_NOTIFY_ASSOC_ABORTED:
+		if ((stcb) && (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) ||
+		    ((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED))) {
+			sctp_notify_assoc_change(SCTP_CANT_STR_ASSOC, stcb, error, NULL, so_locked);
+		} else {
+			sctp_notify_assoc_change(SCTP_COMM_LOST, stcb, error, NULL, so_locked);
+		}
+		break;
+	case SCTP_NOTIFY_PEER_OPENED_STREAM:
+		break;
+	case SCTP_NOTIFY_STREAM_OPENED_OK:
+		break;
+	case SCTP_NOTIFY_ASSOC_RESTART:
+		sctp_notify_assoc_change(SCTP_RESTART, stcb, error, data, so_locked);
+		if (stcb->asoc.peer_supports_auth == 0) {
+			sctp_ulp_notify(SCTP_NOTIFY_NO_PEER_AUTH, stcb, 0,
+			    NULL, so_locked);
+		}
+		break;
+	case SCTP_NOTIFY_HB_RESP:
+		break;
+	case SCTP_NOTIFY_STR_RESET_INSTREAM_ADD_OK:
+		sctp_notify_stream_reset_add(stcb, error, SCTP_STRRESET_INBOUND_STR);
+		break;
+	case SCTP_NOTIFY_STR_RESET_ADD_OK:
+		sctp_notify_stream_reset_add(stcb, error, SCTP_STRRESET_OUTBOUND_STR);
+		break;
+	case SCTP_NOTIFY_STR_RESET_ADD_FAIL:
+		sctp_notify_stream_reset_add(stcb, error, (SCTP_STRRESET_FAILED | SCTP_STRRESET_OUTBOUND_STR));
+		break;
+
+	case SCTP_NOTIFY_STR_RESET_SEND:
+		sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), SCTP_STRRESET_OUTBOUND_STR);
+		break;
+	case SCTP_NOTIFY_STR_RESET_RECV:
+		sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), SCTP_STRRESET_INBOUND_STR);
+		break;
+	case SCTP_NOTIFY_STR_RESET_FAILED_OUT:
+		sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), (SCTP_STRRESET_OUTBOUND_STR | SCTP_STRRESET_FAILED));
+		break;
+	case SCTP_NOTIFY_STR_RESET_FAILED_IN:
+		sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), (SCTP_STRRESET_INBOUND_STR | SCTP_STRRESET_FAILED));
+		break;
+	case SCTP_NOTIFY_ASCONF_ADD_IP:
+		sctp_notify_peer_addr_change(stcb, SCTP_ADDR_ADDED, data,
+		    error);
+		break;
+	case SCTP_NOTIFY_ASCONF_DELETE_IP:
+		sctp_notify_peer_addr_change(stcb, SCTP_ADDR_REMOVED, data,
+		    error);
+		break;
+	case SCTP_NOTIFY_ASCONF_SET_PRIMARY:
+		sctp_notify_peer_addr_change(stcb, SCTP_ADDR_MADE_PRIM, data,
+		    error);
+		break;
+	case SCTP_NOTIFY_ASCONF_SUCCESS:
+		break;
+	case SCTP_NOTIFY_ASCONF_FAILED:
+		break;
+	case SCTP_NOTIFY_PEER_SHUTDOWN:
+		sctp_notify_shutdown_event(stcb);
+		break;
+	case SCTP_NOTIFY_AUTH_NEW_KEY:
+		sctp_notify_authentication(stcb, SCTP_AUTH_NEWKEY, error,
+		    (uint16_t) (uintptr_t) data,
+		    so_locked);
+		break;
+	case SCTP_NOTIFY_AUTH_FREE_KEY:
+		sctp_notify_authentication(stcb, SCTP_AUTH_FREE_KEY, error,
+		    (uint16_t) (uintptr_t) data,
+		    so_locked);
+		break;
+	case SCTP_NOTIFY_NO_PEER_AUTH:
+		sctp_notify_authentication(stcb, SCTP_AUTH_NO_AUTH, error,
+		    (uint16_t) (uintptr_t) data,
+		    so_locked);
+		break;
+	case SCTP_NOTIFY_SENDER_DRY:
+		sctp_notify_sender_dry_event(stcb, so_locked);
+		break;
+	default:
+		SCTPDBG(SCTP_DEBUG_UTIL1, "%s: unknown notification %xh (%u)\n",
+		    __FUNCTION__, notification, notification);
+		break;
+	}			/* end switch */
+}
+
+void
+sctp_report_all_outbound(struct sctp_tcb *stcb, int holds_lock, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	struct sctp_association *asoc;
+	struct sctp_stream_out *outs;
+	struct sctp_tmit_chunk *chk;
+	struct sctp_stream_queue_pending *sp;
+	int i;
+
+	asoc = &stcb->asoc;
+
+	if (stcb == NULL) {
+		return;
+	}
+	if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+		/* already being freed */
+		return;
+	}
+	if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+	    (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)) {
+		return;
+	}
+	/* now through all the gunk freeing chunks */
+	if (holds_lock == 0) {
+		SCTP_TCB_SEND_LOCK(stcb);
+	}
+	/* sent queue SHOULD be empty */
+	if (!TAILQ_EMPTY(&asoc->sent_queue)) {
+		chk = TAILQ_FIRST(&asoc->sent_queue);
+		while (chk) {
+			TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next);
+			asoc->sent_queue_cnt--;
+			if (chk->data != NULL) {
+				sctp_free_bufspace(stcb, asoc, chk, 1);
+				sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb,
+				    SCTP_NOTIFY_DATAGRAM_SENT, chk, so_locked);
+				if (chk->data) {
+					sctp_m_freem(chk->data);
+					chk->data = NULL;
+				}
+			}
+			sctp_free_a_chunk(stcb, chk);
+			/* sa_ignore FREED_MEMORY */
+			chk = TAILQ_FIRST(&asoc->sent_queue);
+		}
+	}
+	/* pending send queue SHOULD be empty */
+	if (!TAILQ_EMPTY(&asoc->send_queue)) {
+		chk = TAILQ_FIRST(&asoc->send_queue);
+		while (chk) {
+			TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next);
+			asoc->send_queue_cnt--;
+			if (chk->data != NULL) {
+				sctp_free_bufspace(stcb, asoc, chk, 1);
+				sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb,
+				    SCTP_NOTIFY_DATAGRAM_UNSENT, chk, so_locked);
+				if (chk->data) {
+					sctp_m_freem(chk->data);
+					chk->data = NULL;
+				}
+			}
+			sctp_free_a_chunk(stcb, chk);
+			/* sa_ignore FREED_MEMORY */
+			chk = TAILQ_FIRST(&asoc->send_queue);
+		}
+	}
+	for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+		/* For each stream */
+		outs = &stcb->asoc.strmout[i];
+		/* clean up any sends there */
+		stcb->asoc.locked_on_sending = NULL;
+		sp = TAILQ_FIRST(&outs->outqueue);
+		while (sp) {
+			stcb->asoc.stream_queue_cnt--;
+			TAILQ_REMOVE(&outs->outqueue, sp, next);
+			sctp_free_spbufspace(stcb, asoc, sp);
+			if (sp->data) {
+				sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL, stcb,
+				    SCTP_NOTIFY_DATAGRAM_UNSENT, (void *)sp, so_locked);
+				if (sp->data) {
+					sctp_m_freem(sp->data);
+					sp->data = NULL;
+				}
+			}
+			if (sp->net) {
+				sctp_free_remote_addr(sp->net);
+				sp->net = NULL;
+			}
+			/* Free the chunk */
+			sctp_free_a_strmoq(stcb, sp);
+			/* sa_ignore FREED_MEMORY */
+			sp = TAILQ_FIRST(&outs->outqueue);
+		}
+	}
+
+	if (holds_lock == 0) {
+		SCTP_TCB_SEND_UNLOCK(stcb);
+	}
+}
+
+void
+sctp_abort_notification(struct sctp_tcb *stcb, int error, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+
+	if (stcb == NULL) {
+		return;
+	}
+	if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+	    (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)) {
+		return;
+	}
+	/* Tell them we lost the asoc */
+	sctp_report_all_outbound(stcb, 1, so_locked);
+	if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+	    ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_CONNECTED))) {
+		stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_WAS_ABORTED;
+	}
+	sctp_ulp_notify(SCTP_NOTIFY_ASSOC_ABORTED, stcb, error, NULL, so_locked);
+}
+
+void
+sctp_abort_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct mbuf *m, int iphlen, struct sctphdr *sh, struct mbuf *op_err,
+    uint32_t vrf_id, uint16_t port)
+{
+	uint32_t vtag;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+#endif
+
+	vtag = 0;
+	if (stcb != NULL) {
+		/* We have a TCB to abort, send notification too */
+		vtag = stcb->asoc.peer_vtag;
+		sctp_abort_notification(stcb, 0, SCTP_SO_NOT_LOCKED);
+		/* get the assoc vrf id and table id */
+		vrf_id = stcb->asoc.vrf_id;
+		stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
+	}
+	sctp_send_abort(m, iphlen, sh, vtag, op_err, vrf_id, port);
+	if (stcb != NULL) {
+		/* Ok, now lets free it */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		so = SCTP_INP_SO(inp);
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+		SCTP_STAT_INCR_COUNTER32(sctps_aborted);
+		if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+		    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+			SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+		}
+		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_4);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+	}
+}
+
+#ifdef SCTP_ASOCLOG_OF_TSNS
+void
+sctp_print_out_track_log(struct sctp_tcb *stcb)
+{
+#ifdef NOSIY_PRINTS
+	int i;
+
+	SCTP_PRINTF("Last ep reason:%x\n", stcb->sctp_ep->last_abort_code);
+	SCTP_PRINTF("IN bound TSN log-aaa\n");
+	if ((stcb->asoc.tsn_in_at == 0) && (stcb->asoc.tsn_in_wrapped == 0)) {
+		SCTP_PRINTF("None rcvd\n");
+		goto none_in;
+	}
+	if (stcb->asoc.tsn_in_wrapped) {
+		for (i = stcb->asoc.tsn_in_at; i < SCTP_TSN_LOG_SIZE; i++) {
+			SCTP_PRINTF("TSN:%x strm:%d seq:%d flags:%x sz:%d\n",
+			    stcb->asoc.in_tsnlog[i].tsn,
+			    stcb->asoc.in_tsnlog[i].strm,
+			    stcb->asoc.in_tsnlog[i].seq,
+			    stcb->asoc.in_tsnlog[i].flgs,
+			    stcb->asoc.in_tsnlog[i].sz);
+		}
+	}
+	if (stcb->asoc.tsn_in_at) {
+		for (i = 0; i < stcb->asoc.tsn_in_at; i++) {
+			SCTP_PRINTF("TSN:%x strm:%d seq:%d flags:%x sz:%d\n",
+			    stcb->asoc.in_tsnlog[i].tsn,
+			    stcb->asoc.in_tsnlog[i].strm,
+			    stcb->asoc.in_tsnlog[i].seq,
+			    stcb->asoc.in_tsnlog[i].flgs,
+			    stcb->asoc.in_tsnlog[i].sz);
+		}
+	}
+none_in:
+	SCTP_PRINTF("OUT bound TSN log-aaa\n");
+	if ((stcb->asoc.tsn_out_at == 0) &&
+	    (stcb->asoc.tsn_out_wrapped == 0)) {
+		SCTP_PRINTF("None sent\n");
+	}
+	if (stcb->asoc.tsn_out_wrapped) {
+		for (i = stcb->asoc.tsn_out_at; i < SCTP_TSN_LOG_SIZE; i++) {
+			SCTP_PRINTF("TSN:%x strm:%d seq:%d flags:%x sz:%d\n",
+			    stcb->asoc.out_tsnlog[i].tsn,
+			    stcb->asoc.out_tsnlog[i].strm,
+			    stcb->asoc.out_tsnlog[i].seq,
+			    stcb->asoc.out_tsnlog[i].flgs,
+			    stcb->asoc.out_tsnlog[i].sz);
+		}
+	}
+	if (stcb->asoc.tsn_out_at) {
+		for (i = 0; i < stcb->asoc.tsn_out_at; i++) {
+			SCTP_PRINTF("TSN:%x strm:%d seq:%d flags:%x sz:%d\n",
+			    stcb->asoc.out_tsnlog[i].tsn,
+			    stcb->asoc.out_tsnlog[i].strm,
+			    stcb->asoc.out_tsnlog[i].seq,
+			    stcb->asoc.out_tsnlog[i].flgs,
+			    stcb->asoc.out_tsnlog[i].sz);
+		}
+	}
+#endif
+}
+
+#endif
+
+void
+sctp_abort_an_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    int error, struct mbuf *op_err,
+    int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	uint32_t vtag;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+#endif
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	so = SCTP_INP_SO(inp);
+#endif
+	if (stcb == NULL) {
+		/* Got to have a TCB */
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+			if (LIST_FIRST(&inp->sctp_asoc_list) == NULL) {
+				sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
+				    SCTP_CALLED_DIRECTLY_NOCMPSET);
+			}
+		}
+		return;
+	} else {
+		stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
+	}
+	vtag = stcb->asoc.peer_vtag;
+	/* notify the ulp */
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0)
+		sctp_abort_notification(stcb, error, so_locked);
+	/* notify the peer */
+#if defined(SCTP_PANIC_ON_ABORT)
+	panic("aborting an association");
+#endif
+	sctp_send_abort_tcb(stcb, op_err, so_locked);
+	SCTP_STAT_INCR_COUNTER32(sctps_aborted);
+	if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+	    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+		SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+	}
+	/* now free the asoc */
+#ifdef SCTP_ASOCLOG_OF_TSNS
+	sctp_print_out_track_log(stcb);
+#endif
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	if (!so_locked) {
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+	}
+#endif
+	(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_5);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	if (!so_locked) {
+		SCTP_SOCKET_UNLOCK(so, 1);
+	}
+#endif
+}
+
+void
+sctp_handle_ootb(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh,
+    struct sctp_inpcb *inp, struct mbuf *op_err, uint32_t vrf_id, uint16_t port)
+{
+	struct sctp_chunkhdr *ch, chunk_buf;
+	unsigned int chk_length;
+
+	SCTP_STAT_INCR_COUNTER32(sctps_outoftheblue);
+	/* Generate a TO address for future reference */
+	if (inp && (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
+		if (LIST_FIRST(&inp->sctp_asoc_list) == NULL) {
+			sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
+			    SCTP_CALLED_DIRECTLY_NOCMPSET);
+		}
+	}
+	ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
+	    sizeof(*ch), (uint8_t *) & chunk_buf);
+	while (ch != NULL) {
+		chk_length = ntohs(ch->chunk_length);
+		if (chk_length < sizeof(*ch)) {
+			/* break to abort land */
+			break;
+		}
+		switch (ch->chunk_type) {
+		case SCTP_COOKIE_ECHO:
+			/* We hit here only if the assoc is being freed */
+			return;
+		case SCTP_PACKET_DROPPED:
+			/* we don't respond to pkt-dropped */
+			return;
+		case SCTP_ABORT_ASSOCIATION:
+			/* we don't respond with an ABORT to an ABORT */
+			return;
+		case SCTP_SHUTDOWN_COMPLETE:
+			/*
+			 * we ignore it since we are not waiting for it and
+			 * peer is gone
+			 */
+			return;
+		case SCTP_SHUTDOWN_ACK:
+			sctp_send_shutdown_complete2(m, iphlen, sh, vrf_id, port);
+			return;
+		default:
+			break;
+		}
+		offset += SCTP_SIZE32(chk_length);
+		ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
+		    sizeof(*ch), (uint8_t *) & chunk_buf);
+	}
+	sctp_send_abort(m, iphlen, sh, 0, op_err, vrf_id, port);
+}
+
+/*
+ * check the inbound datagram to make sure there is not an abort inside it,
+ * if there is return 1, else return 0.
+ */
+int
+sctp_is_there_an_abort_here(struct mbuf *m, int iphlen, uint32_t * vtagfill)
+{
+	struct sctp_chunkhdr *ch;
+	struct sctp_init_chunk *init_chk, chunk_buf;
+	int offset;
+	unsigned int chk_length;
+
+	offset = iphlen + sizeof(struct sctphdr);
+	ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset, sizeof(*ch),
+	    (uint8_t *) & chunk_buf);
+	while (ch != NULL) {
+		chk_length = ntohs(ch->chunk_length);
+		if (chk_length < sizeof(*ch)) {
+			/* packet is probably corrupt */
+			break;
+		}
+		/* we seem to be ok, is it an abort? */
+		if (ch->chunk_type == SCTP_ABORT_ASSOCIATION) {
+			/* yep, tell them */
+			return (1);
+		}
+		if (ch->chunk_type == SCTP_INITIATION) {
+			/* need to update the Vtag */
+			init_chk = (struct sctp_init_chunk *)sctp_m_getptr(m,
+			    offset, sizeof(*init_chk), (uint8_t *) & chunk_buf);
+			if (init_chk != NULL) {
+				*vtagfill = ntohl(init_chk->init.initiate_tag);
+			}
+		}
+		/* Nope, move to the next chunk */
+		offset += SCTP_SIZE32(chk_length);
+		ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
+		    sizeof(*ch), (uint8_t *) & chunk_buf);
+	}
+	return (0);
+}
+
+/*
+ * currently (2/02), ifa_addr embeds scope_id's and don't have sin6_scope_id
+ * set (i.e. it's 0) so, create this function to compare link local scopes
+ */
+#ifdef INET6
+uint32_t
+sctp_is_same_scope(struct sockaddr_in6 *addr1, struct sockaddr_in6 *addr2)
+{
+	struct sockaddr_in6 a, b;
+
+	/* save copies */
+	a = *addr1;
+	b = *addr2;
+
+	if (a.sin6_scope_id == 0)
+		if (sa6_recoverscope(&a)) {
+			/* can't get scope, so can't match */
+			return (0);
+		}
+	if (b.sin6_scope_id == 0)
+		if (sa6_recoverscope(&b)) {
+			/* can't get scope, so can't match */
+			return (0);
+		}
+	if (a.sin6_scope_id != b.sin6_scope_id)
+		return (0);
+
+	return (1);
+}
+
+/*
+ * returns a sockaddr_in6 with embedded scope recovered and removed
+ */
+struct sockaddr_in6 *
+sctp_recover_scope(struct sockaddr_in6 *addr, struct sockaddr_in6 *store)
+{
+	/* check and strip embedded scope junk */
+	if (addr->sin6_family == AF_INET6) {
+		if (IN6_IS_SCOPE_LINKLOCAL(&addr->sin6_addr)) {
+			if (addr->sin6_scope_id == 0) {
+				*store = *addr;
+				if (!sa6_recoverscope(store)) {
+					/* use the recovered scope */
+					addr = store;
+				}
+			} else {
+				/* else, return the original "to" addr */
+				in6_clearscope(&addr->sin6_addr);
+			}
+		}
+	}
+	return (addr);
+}
+
+#endif
+
+/*
+ * are the two addresses the same?  currently a "scopeless" check returns: 1
+ * if same, 0 if not
+ */
+int
+sctp_cmpaddr(struct sockaddr *sa1, struct sockaddr *sa2)
+{
+
+	/* must be valid */
+	if (sa1 == NULL || sa2 == NULL)
+		return (0);
+
+	/* must be the same family */
+	if (sa1->sa_family != sa2->sa_family)
+		return (0);
+
+	switch (sa1->sa_family) {
+#ifdef INET6
+	case AF_INET6:
+		{
+			/* IPv6 addresses */
+			struct sockaddr_in6 *sin6_1, *sin6_2;
+
+			sin6_1 = (struct sockaddr_in6 *)sa1;
+			sin6_2 = (struct sockaddr_in6 *)sa2;
+			return (SCTP6_ARE_ADDR_EQUAL(sin6_1,
+			    sin6_2));
+		}
+#endif
+	case AF_INET:
+		{
+			/* IPv4 addresses */
+			struct sockaddr_in *sin_1, *sin_2;
+
+			sin_1 = (struct sockaddr_in *)sa1;
+			sin_2 = (struct sockaddr_in *)sa2;
+			return (sin_1->sin_addr.s_addr == sin_2->sin_addr.s_addr);
+		}
+	default:
+		/* we don't do these... */
+		return (0);
+	}
+}
+
+void
+sctp_print_address(struct sockaddr *sa)
+{
+#ifdef INET6
+	char ip6buf[INET6_ADDRSTRLEN];
+
+	ip6buf[0] = 0;
+#endif
+
+	switch (sa->sa_family) {
+#ifdef INET6
+	case AF_INET6:
+		{
+			struct sockaddr_in6 *sin6;
+
+			sin6 = (struct sockaddr_in6 *)sa;
+			SCTP_PRINTF("IPv6 address: %s:port:%d scope:%u\n",
+			    ip6_sprintf(ip6buf, &sin6->sin6_addr),
+			    ntohs(sin6->sin6_port),
+			    sin6->sin6_scope_id);
+			break;
+		}
+#endif
+	case AF_INET:
+		{
+			struct sockaddr_in *sin;
+			unsigned char *p;
+
+			sin = (struct sockaddr_in *)sa;
+			p = (unsigned char *)&sin->sin_addr;
+			SCTP_PRINTF("IPv4 address: %u.%u.%u.%u:%d\n",
+			    p[0], p[1], p[2], p[3], ntohs(sin->sin_port));
+			break;
+		}
+	default:
+		SCTP_PRINTF("?\n");
+		break;
+	}
+}
+
+void
+sctp_print_address_pkt(struct ip *iph, struct sctphdr *sh)
+{
+	switch (iph->ip_v) {
+	case IPVERSION:
+		{
+			struct sockaddr_in lsa, fsa;
+
+			bzero(&lsa, sizeof(lsa));
+			lsa.sin_len = sizeof(lsa);
+			lsa.sin_family = AF_INET;
+			lsa.sin_addr = iph->ip_src;
+			lsa.sin_port = sh->src_port;
+			bzero(&fsa, sizeof(fsa));
+			fsa.sin_len = sizeof(fsa);
+			fsa.sin_family = AF_INET;
+			fsa.sin_addr = iph->ip_dst;
+			fsa.sin_port = sh->dest_port;
+			SCTP_PRINTF("src: ");
+			sctp_print_address((struct sockaddr *)&lsa);
+			SCTP_PRINTF("dest: ");
+			sctp_print_address((struct sockaddr *)&fsa);
+			break;
+		}
+#ifdef INET6
+	case IPV6_VERSION >> 4:
+		{
+			struct ip6_hdr *ip6;
+			struct sockaddr_in6 lsa6, fsa6;
+
+			ip6 = (struct ip6_hdr *)iph;
+			bzero(&lsa6, sizeof(lsa6));
+			lsa6.sin6_len = sizeof(lsa6);
+			lsa6.sin6_family = AF_INET6;
+			lsa6.sin6_addr = ip6->ip6_src;
+			lsa6.sin6_port = sh->src_port;
+			bzero(&fsa6, sizeof(fsa6));
+			fsa6.sin6_len = sizeof(fsa6);
+			fsa6.sin6_family = AF_INET6;
+			fsa6.sin6_addr = ip6->ip6_dst;
+			fsa6.sin6_port = sh->dest_port;
+			SCTP_PRINTF("src: ");
+			sctp_print_address((struct sockaddr *)&lsa6);
+			SCTP_PRINTF("dest: ");
+			sctp_print_address((struct sockaddr *)&fsa6);
+			break;
+		}
+#endif
+	default:
+		/* TSNH */
+		break;
+	}
+}
+
+void
+sctp_pull_off_control_to_new_inp(struct sctp_inpcb *old_inp,
+    struct sctp_inpcb *new_inp,
+    struct sctp_tcb *stcb,
+    int waitflags)
+{
+	/*
+	 * go through our old INP and pull off any control structures that
+	 * belong to stcb and move then to the new inp.
+	 */
+	struct socket *old_so, *new_so;
+	struct sctp_queued_to_read *control, *nctl;
+	struct sctp_readhead tmp_queue;
+	struct mbuf *m;
+	int error = 0;
+
+	old_so = old_inp->sctp_socket;
+	new_so = new_inp->sctp_socket;
+	TAILQ_INIT(&tmp_queue);
+	error = sblock(&old_so->so_rcv, waitflags);
+	if (error) {
+		/*
+		 * Gak, can't get sblock, we have a problem. data will be
+		 * left stranded.. and we don't dare look at it since the
+		 * other thread may be reading something. Oh well, its a
+		 * screwed up app that does a peeloff OR a accept while
+		 * reading from the main socket... actually its only the
+		 * peeloff() case, since I think read will fail on a
+		 * listening socket..
+		 */
+		return;
+	}
+	/* lock the socket buffers */
+	SCTP_INP_READ_LOCK(old_inp);
+	control = TAILQ_FIRST(&old_inp->read_queue);
+	/* Pull off all for out target stcb */
+	while (control) {
+		nctl = TAILQ_NEXT(control, next);
+		if (control->stcb == stcb) {
+			/* remove it we want it */
+			TAILQ_REMOVE(&old_inp->read_queue, control, next);
+			TAILQ_INSERT_TAIL(&tmp_queue, control, next);
+			m = control->data;
+			while (m) {
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
+					sctp_sblog(&old_so->so_rcv, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBFREE, SCTP_BUF_LEN(m));
+				}
+				sctp_sbfree(control, stcb, &old_so->so_rcv, m);
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
+					sctp_sblog(&old_so->so_rcv, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
+				}
+				m = SCTP_BUF_NEXT(m);
+			}
+		}
+		control = nctl;
+	}
+	SCTP_INP_READ_UNLOCK(old_inp);
+	/* Remove the sb-lock on the old socket */
+
+	sbunlock(&old_so->so_rcv);
+	/* Now we move them over to the new socket buffer */
+	control = TAILQ_FIRST(&tmp_queue);
+	SCTP_INP_READ_LOCK(new_inp);
+	while (control) {
+		nctl = TAILQ_NEXT(control, next);
+		TAILQ_INSERT_TAIL(&new_inp->read_queue, control, next);
+		m = control->data;
+		while (m) {
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
+				sctp_sblog(&new_so->so_rcv, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBALLOC, SCTP_BUF_LEN(m));
+			}
+			sctp_sballoc(stcb, &new_so->so_rcv, m);
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
+				sctp_sblog(&new_so->so_rcv, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
+			}
+			m = SCTP_BUF_NEXT(m);
+		}
+		control = nctl;
+	}
+	SCTP_INP_READ_UNLOCK(new_inp);
+}
+
+void
+sctp_add_to_readq(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_queued_to_read *control,
+    struct sockbuf *sb,
+    int end,
+    int inp_read_lock_held,
+    int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	/*
+	 * Here we must place the control on the end of the socket read
+	 * queue AND increment sb_cc so that select will work properly on
+	 * read.
+	 */
+	struct mbuf *m, *prev = NULL;
+
+	if (inp == NULL) {
+		/* Gak, TSNH!! */
+#ifdef INVARIANTS
+		panic("Gak, inp NULL on add_to_readq");
+#endif
+		return;
+	}
+	if (inp_read_lock_held == 0)
+		SCTP_INP_READ_LOCK(inp);
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_CANT_READ) {
+		sctp_free_remote_addr(control->whoFrom);
+		if (control->data) {
+			sctp_m_freem(control->data);
+			control->data = NULL;
+		}
+		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), control);
+		if (inp_read_lock_held == 0)
+			SCTP_INP_READ_UNLOCK(inp);
+		return;
+	}
+	if (!(control->spec_flags & M_NOTIFICATION)) {
+		atomic_add_int(&inp->total_recvs, 1);
+		if (!control->do_not_ref_stcb) {
+			atomic_add_int(&stcb->total_recvs, 1);
+		}
+	}
+	m = control->data;
+	control->held_length = 0;
+	control->length = 0;
+	while (m) {
+		if (SCTP_BUF_LEN(m) == 0) {
+			/* Skip mbufs with NO length */
+			if (prev == NULL) {
+				/* First one */
+				control->data = sctp_m_free(m);
+				m = control->data;
+			} else {
+				SCTP_BUF_NEXT(prev) = sctp_m_free(m);
+				m = SCTP_BUF_NEXT(prev);
+			}
+			if (m == NULL) {
+				control->tail_mbuf = prev;
+			}
+			continue;
+		}
+		prev = m;
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
+			sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBALLOC, SCTP_BUF_LEN(m));
+		}
+		sctp_sballoc(stcb, sb, m);
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
+			sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
+		}
+		atomic_add_int(&control->length, SCTP_BUF_LEN(m));
+		m = SCTP_BUF_NEXT(m);
+	}
+	if (prev != NULL) {
+		control->tail_mbuf = prev;
+	} else {
+		/* Everything got collapsed out?? */
+		sctp_free_remote_addr(control->whoFrom);
+		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), control);
+		if (inp_read_lock_held == 0)
+			SCTP_INP_READ_UNLOCK(inp);
+		return;
+	}
+	if (end) {
+		control->end_added = 1;
+	}
+	TAILQ_INSERT_TAIL(&inp->read_queue, control, next);
+	if (inp_read_lock_held == 0)
+		SCTP_INP_READ_UNLOCK(inp);
+	if (inp && inp->sctp_socket) {
+		if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) {
+			SCTP_ZERO_COPY_EVENT(inp, inp->sctp_socket);
+		} else {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			struct socket *so;
+
+			so = SCTP_INP_SO(inp);
+			if (!so_locked) {
+				atomic_add_int(&stcb->asoc.refcnt, 1);
+				SCTP_TCB_UNLOCK(stcb);
+				SCTP_SOCKET_LOCK(so, 1);
+				SCTP_TCB_LOCK(stcb);
+				atomic_subtract_int(&stcb->asoc.refcnt, 1);
+				if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+					SCTP_SOCKET_UNLOCK(so, 1);
+					return;
+				}
+			}
+#endif
+			sctp_sorwakeup(inp, inp->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			if (!so_locked) {
+				SCTP_SOCKET_UNLOCK(so, 1);
+			}
+#endif
+		}
+	}
+}
+
+
+int
+sctp_append_to_readq(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_queued_to_read *control,
+    struct mbuf *m,
+    int end,
+    int ctls_cumack,
+    struct sockbuf *sb)
+{
+	/*
+	 * A partial delivery API event is underway. OR we are appending on
+	 * the reassembly queue.
+	 * 
+	 * If PDAPI this means we need to add m to the end of the data.
+	 * Increase the length in the control AND increment the sb_cc.
+	 * Otherwise sb is NULL and all we need to do is put it at the end
+	 * of the mbuf chain.
+	 */
+	int len = 0;
+	struct mbuf *mm, *tail = NULL, *prev = NULL;
+
+	if (inp) {
+		SCTP_INP_READ_LOCK(inp);
+	}
+	if (control == NULL) {
+get_out:
+		if (inp) {
+			SCTP_INP_READ_UNLOCK(inp);
+		}
+		return (-1);
+	}
+	if (inp && (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_CANT_READ)) {
+		SCTP_INP_READ_UNLOCK(inp);
+		return 0;
+	}
+	if (control->end_added) {
+		/* huh this one is complete? */
+		goto get_out;
+	}
+	mm = m;
+	if (mm == NULL) {
+		goto get_out;
+	}
+	while (mm) {
+		if (SCTP_BUF_LEN(mm) == 0) {
+			/* Skip mbufs with NO lenght */
+			if (prev == NULL) {
+				/* First one */
+				m = sctp_m_free(mm);
+				mm = m;
+			} else {
+				SCTP_BUF_NEXT(prev) = sctp_m_free(mm);
+				mm = SCTP_BUF_NEXT(prev);
+			}
+			continue;
+		}
+		prev = mm;
+		len += SCTP_BUF_LEN(mm);
+		if (sb) {
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
+				sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBALLOC, SCTP_BUF_LEN(mm));
+			}
+			sctp_sballoc(stcb, sb, mm);
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
+				sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
+			}
+		}
+		mm = SCTP_BUF_NEXT(mm);
+	}
+	if (prev) {
+		tail = prev;
+	} else {
+		/* Really there should always be a prev */
+		if (m == NULL) {
+			/* Huh nothing left? */
+#ifdef INVARIANTS
+			panic("Nothing left to add?");
+#else
+			goto get_out;
+#endif
+		}
+		tail = m;
+	}
+	if (control->tail_mbuf) {
+		/* append */
+		SCTP_BUF_NEXT(control->tail_mbuf) = m;
+		control->tail_mbuf = tail;
+	} else {
+		/* nothing there */
+#ifdef INVARIANTS
+		if (control->data != NULL) {
+			panic("This should NOT happen");
+		}
+#endif
+		control->data = m;
+		control->tail_mbuf = tail;
+	}
+	atomic_add_int(&control->length, len);
+	if (end) {
+		/* message is complete */
+		if (stcb && (control == stcb->asoc.control_pdapi)) {
+			stcb->asoc.control_pdapi = NULL;
+		}
+		control->held_length = 0;
+		control->end_added = 1;
+	}
+	if (stcb == NULL) {
+		control->do_not_ref_stcb = 1;
+	}
+	/*
+	 * When we are appending in partial delivery, the cum-ack is used
+	 * for the actual pd-api highest tsn on this mbuf. The true cum-ack
+	 * is populated in the outbound sinfo structure from the true cumack
+	 * if the association exists...
+	 */
+	control->sinfo_tsn = control->sinfo_cumtsn = ctls_cumack;
+	if (inp) {
+		SCTP_INP_READ_UNLOCK(inp);
+	}
+	if (inp && inp->sctp_socket) {
+		if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) {
+			SCTP_ZERO_COPY_EVENT(inp, inp->sctp_socket);
+		} else {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			struct socket *so;
+
+			so = SCTP_INP_SO(inp);
+			atomic_add_int(&stcb->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_SOCKET_LOCK(so, 1);
+			SCTP_TCB_LOCK(stcb);
+			atomic_subtract_int(&stcb->asoc.refcnt, 1);
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+				SCTP_SOCKET_UNLOCK(so, 1);
+				return (0);
+			}
+#endif
+			sctp_sorwakeup(inp, inp->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+		}
+	}
+	return (0);
+}
+
+
+
+/*************HOLD THIS COMMENT FOR PATCH FILE OF
+ *************ALTERNATE ROUTING CODE
+ */
+
+/*************HOLD THIS COMMENT FOR END OF PATCH FILE OF
+ *************ALTERNATE ROUTING CODE
+ */
+
+struct mbuf *
+sctp_generate_invmanparam(int err)
+{
+	/* Return a MBUF with a invalid mandatory parameter */
+	struct mbuf *m;
+
+	m = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_DONTWAIT, 1, MT_DATA);
+	if (m) {
+		struct sctp_paramhdr *ph;
+
+		SCTP_BUF_LEN(m) = sizeof(struct sctp_paramhdr);
+		ph = mtod(m, struct sctp_paramhdr *);
+		ph->param_length = htons(sizeof(struct sctp_paramhdr));
+		ph->param_type = htons(err);
+	}
+	return (m);
+}
+
+#ifdef SCTP_MBCNT_LOGGING
+void
+sctp_free_bufspace(struct sctp_tcb *stcb, struct sctp_association *asoc,
+    struct sctp_tmit_chunk *tp1, int chk_cnt)
+{
+	if (tp1->data == NULL) {
+		return;
+	}
+	asoc->chunks_on_out_queue -= chk_cnt;
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBCNT_LOGGING_ENABLE) {
+		sctp_log_mbcnt(SCTP_LOG_MBCNT_DECREASE,
+		    asoc->total_output_queue_size,
+		    tp1->book_size,
+		    0,
+		    tp1->mbcnt);
+	}
+	if (asoc->total_output_queue_size >= tp1->book_size) {
+		atomic_add_int(&asoc->total_output_queue_size, -tp1->book_size);
+	} else {
+		asoc->total_output_queue_size = 0;
+	}
+
+	if (stcb->sctp_socket && (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) ||
+	    ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE)))) {
+		if (stcb->sctp_socket->so_snd.sb_cc >= tp1->book_size) {
+			stcb->sctp_socket->so_snd.sb_cc -= tp1->book_size;
+		} else {
+			stcb->sctp_socket->so_snd.sb_cc = 0;
+
+		}
+	}
+}
+
+#endif
+
+int
+sctp_release_pr_sctp_chunk(struct sctp_tcb *stcb, struct sctp_tmit_chunk *tp1,
+    int reason, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	struct sctp_stream_out *strq;
+	struct sctp_tmit_chunk *chk = NULL;
+	struct sctp_stream_queue_pending *sp;
+	uint16_t stream = 0, seq = 0;
+	uint8_t foundeom = 0;
+	int ret_sz = 0;
+	int notdone;
+	int do_wakeup_routine = 0;
+
+	stream = tp1->rec.data.stream_number;
+	seq = tp1->rec.data.stream_seq;
+	do {
+		ret_sz += tp1->book_size;
+		if (tp1->data != NULL) {
+			if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+				sctp_flight_size_decrease(tp1);
+				sctp_total_flight_decrease(stcb, tp1);
+			}
+			sctp_free_bufspace(stcb, &stcb->asoc, tp1, 1);
+			stcb->asoc.peers_rwnd += tp1->send_size;
+			stcb->asoc.peers_rwnd += SCTP_BASE_SYSCTL(sctp_peer_chunk_oh);
+			sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb, reason, tp1, so_locked);
+			if (tp1->data) {
+				sctp_m_freem(tp1->data);
+				tp1->data = NULL;
+			}
+			do_wakeup_routine = 1;
+			if (PR_SCTP_BUF_ENABLED(tp1->flags)) {
+				stcb->asoc.sent_queue_cnt_removeable--;
+			}
+		}
+		tp1->sent = SCTP_FORWARD_TSN_SKIP;
+		if ((tp1->rec.data.rcv_flags & SCTP_DATA_NOT_FRAG) ==
+		    SCTP_DATA_NOT_FRAG) {
+			/* not frag'ed we ae done   */
+			notdone = 0;
+			foundeom = 1;
+		} else if (tp1->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) {
+			/* end of frag, we are done */
+			notdone = 0;
+			foundeom = 1;
+		} else {
+			/*
+			 * Its a begin or middle piece, we must mark all of
+			 * it
+			 */
+			notdone = 1;
+			tp1 = TAILQ_NEXT(tp1, sctp_next);
+		}
+	} while (tp1 && notdone);
+	if (foundeom == 0) {
+		/*
+		 * The multi-part message was scattered across the send and
+		 * sent queue.
+		 */
+next_on_sent:
+		tp1 = TAILQ_FIRST(&stcb->asoc.send_queue);
+		/*
+		 * recurse throught the send_queue too, starting at the
+		 * beginning.
+		 */
+		if ((tp1) &&
+		    (tp1->rec.data.stream_number == stream) &&
+		    (tp1->rec.data.stream_seq == seq)) {
+			/*
+			 * save to chk in case we have some on stream out
+			 * queue. If so and we have an un-transmitted one we
+			 * don't have to fudge the TSN.
+			 */
+			chk = tp1;
+			ret_sz += tp1->book_size;
+			sctp_free_bufspace(stcb, &stcb->asoc, tp1, 1);
+			sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb, reason, tp1, so_locked);
+			if (tp1->data) {
+				sctp_m_freem(tp1->data);
+				tp1->data = NULL;
+			}
+			/* No flight involved here book the size to 0 */
+			tp1->book_size = 0;
+			if (tp1->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) {
+				foundeom = 1;
+			}
+			do_wakeup_routine = 1;
+			tp1->sent = SCTP_FORWARD_TSN_SKIP;
+			TAILQ_REMOVE(&stcb->asoc.send_queue, tp1, sctp_next);
+			/*
+			 * on to the sent queue so we can wait for it to be
+			 * passed by.
+			 */
+			TAILQ_INSERT_TAIL(&stcb->asoc.sent_queue, tp1,
+			    sctp_next);
+			stcb->asoc.send_queue_cnt--;
+			stcb->asoc.sent_queue_cnt++;
+			goto next_on_sent;
+		}
+	}
+	if (foundeom == 0) {
+		/*
+		 * Still no eom found. That means there is stuff left on the
+		 * stream out queue.. yuck.
+		 */
+		strq = &stcb->asoc.strmout[stream];
+		SCTP_TCB_SEND_LOCK(stcb);
+		sp = TAILQ_FIRST(&strq->outqueue);
+		while (sp->strseq <= seq) {
+			/* Check if its our SEQ */
+			if (sp->strseq == seq) {
+				sp->discard_rest = 1;
+				/*
+				 * We may need to put a chunk on the queue
+				 * that holds the TSN that would have been
+				 * sent with the LAST bit.
+				 */
+				if (chk == NULL) {
+					/* Yep, we have to */
+					sctp_alloc_a_chunk(stcb, chk);
+					if (chk == NULL) {
+						/*
+						 * we are hosed. All we can
+						 * do is nothing.. which
+						 * will cause an abort if
+						 * the peer is paying
+						 * attention.
+						 */
+						goto oh_well;
+					}
+					memset(chk, 0, sizeof(*chk));
+					chk->rec.data.rcv_flags = SCTP_DATA_LAST_FRAG;
+					chk->sent = SCTP_FORWARD_TSN_SKIP;
+					chk->asoc = &stcb->asoc;
+					chk->rec.data.stream_seq = sp->strseq;
+					chk->rec.data.stream_number = sp->stream;
+					chk->rec.data.payloadtype = sp->ppid;
+					chk->rec.data.context = sp->context;
+					chk->flags = sp->act_flags;
+					if (sp->net)
+						chk->whoTo = sp->net;
+					else
+						chk->whoTo = stcb->asoc.primary_destination;
+					atomic_add_int(&chk->whoTo->ref_count, 1);
+					chk->rec.data.TSN_seq = atomic_fetchadd_int(&stcb->asoc.sending_seq, 1);
+					stcb->asoc.pr_sctp_cnt++;
+					chk->pr_sctp_on = 1;
+					TAILQ_INSERT_TAIL(&stcb->asoc.sent_queue, chk, sctp_next);
+					stcb->asoc.sent_queue_cnt++;
+					stcb->asoc.pr_sctp_cnt++;
+				} else {
+					chk->rec.data.rcv_flags |= SCTP_DATA_LAST_FRAG;
+				}
+		oh_well:
+				if (sp->data) {
+					/*
+					 * Pull any data to free up the SB
+					 * and allow sender to "add more"
+					 * whilc we will throw away :-)
+					 */
+					sctp_free_spbufspace(stcb, &stcb->asoc,
+					    sp);
+					ret_sz += sp->length;
+					do_wakeup_routine = 1;
+					sp->some_taken = 1;
+					sctp_m_freem(sp->data);
+					sp->length = 0;
+					sp->data = NULL;
+					sp->tail_mbuf = NULL;
+				}
+				break;
+			} else {
+				/* Next one please */
+				sp = TAILQ_NEXT(sp, next);
+			}
+		}		/* End while */
+		SCTP_TCB_SEND_UNLOCK(stcb);
+	}
+	if (do_wakeup_routine) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		struct socket *so;
+
+		so = SCTP_INP_SO(stcb->sctp_ep);
+		if (!so_locked) {
+			atomic_add_int(&stcb->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_SOCKET_LOCK(so, 1);
+			SCTP_TCB_LOCK(stcb);
+			atomic_subtract_int(&stcb->asoc.refcnt, 1);
+			if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+				/* assoc was freed while we were unlocked */
+				SCTP_SOCKET_UNLOCK(so, 1);
+				return (ret_sz);
+			}
+		}
+#endif
+		sctp_sowwakeup(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		if (!so_locked) {
+			SCTP_SOCKET_UNLOCK(so, 1);
+		}
+#endif
+	}
+	return (ret_sz);
+}
+
+/*
+ * checks to see if the given address, sa, is one that is currently known by
+ * the kernel note: can't distinguish the same address on multiple interfaces
+ * and doesn't handle multiple addresses with different zone/scope id's note:
+ * ifa_ifwithaddr() compares the entire sockaddr struct
+ */
+struct sctp_ifa *
+sctp_find_ifa_in_ep(struct sctp_inpcb *inp, struct sockaddr *addr,
+    int holds_lock)
+{
+	struct sctp_laddr *laddr;
+
+	if (holds_lock == 0) {
+		SCTP_INP_RLOCK(inp);
+	}
+	LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+		if (laddr->ifa == NULL)
+			continue;
+		if (addr->sa_family != laddr->ifa->address.sa.sa_family)
+			continue;
+		if (addr->sa_family == AF_INET) {
+			if (((struct sockaddr_in *)addr)->sin_addr.s_addr ==
+			    laddr->ifa->address.sin.sin_addr.s_addr) {
+				/* found him. */
+				if (holds_lock == 0) {
+					SCTP_INP_RUNLOCK(inp);
+				}
+				return (laddr->ifa);
+				break;
+			}
+		}
+#ifdef INET6
+		if (addr->sa_family == AF_INET6) {
+			if (SCTP6_ARE_ADDR_EQUAL((struct sockaddr_in6 *)addr,
+			    &laddr->ifa->address.sin6)) {
+				/* found him. */
+				if (holds_lock == 0) {
+					SCTP_INP_RUNLOCK(inp);
+				}
+				return (laddr->ifa);
+				break;
+			}
+		}
+#endif
+	}
+	if (holds_lock == 0) {
+		SCTP_INP_RUNLOCK(inp);
+	}
+	return (NULL);
+}
+
+uint32_t
+sctp_get_ifa_hash_val(struct sockaddr *addr)
+{
+	if (addr->sa_family == AF_INET) {
+		struct sockaddr_in *sin;
+
+		sin = (struct sockaddr_in *)addr;
+		return (sin->sin_addr.s_addr ^ (sin->sin_addr.s_addr >> 16));
+	} else if (addr->sa_family == AF_INET6) {
+		struct sockaddr_in6 *sin6;
+		uint32_t hash_of_addr;
+
+		sin6 = (struct sockaddr_in6 *)addr;
+		hash_of_addr = (sin6->sin6_addr.s6_addr32[0] +
+		    sin6->sin6_addr.s6_addr32[1] +
+		    sin6->sin6_addr.s6_addr32[2] +
+		    sin6->sin6_addr.s6_addr32[3]);
+		hash_of_addr = (hash_of_addr ^ (hash_of_addr >> 16));
+		return (hash_of_addr);
+	}
+	return (0);
+}
+
+struct sctp_ifa *
+sctp_find_ifa_by_addr(struct sockaddr *addr, uint32_t vrf_id, int holds_lock)
+{
+	struct sctp_ifa *sctp_ifap;
+	struct sctp_vrf *vrf;
+	struct sctp_ifalist *hash_head;
+	uint32_t hash_of_addr;
+
+	if (holds_lock == 0)
+		SCTP_IPI_ADDR_RLOCK();
+
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf == NULL) {
+stage_right:
+		if (holds_lock == 0)
+			SCTP_IPI_ADDR_RUNLOCK();
+		return (NULL);
+	}
+	hash_of_addr = sctp_get_ifa_hash_val(addr);
+
+	hash_head = &vrf->vrf_addr_hash[(hash_of_addr & vrf->vrf_addr_hashmark)];
+	if (hash_head == NULL) {
+		SCTP_PRINTF("hash_of_addr:%x mask:%x table:%x - ",
+		    hash_of_addr, (uint32_t) vrf->vrf_addr_hashmark,
+		    (uint32_t) (hash_of_addr & vrf->vrf_addr_hashmark));
+		sctp_print_address(addr);
+		SCTP_PRINTF("No such bucket for address\n");
+		if (holds_lock == 0)
+			SCTP_IPI_ADDR_RUNLOCK();
+
+		return (NULL);
+	}
+	LIST_FOREACH(sctp_ifap, hash_head, next_bucket) {
+		if (sctp_ifap == NULL) {
+#ifdef INVARIANTS
+			panic("Huh LIST_FOREACH corrupt");
+			goto stage_right;
+#else
+			SCTP_PRINTF("LIST corrupt of sctp_ifap's?\n");
+			goto stage_right;
+#endif
+		}
+		if (addr->sa_family != sctp_ifap->address.sa.sa_family)
+			continue;
+		if (addr->sa_family == AF_INET) {
+			if (((struct sockaddr_in *)addr)->sin_addr.s_addr ==
+			    sctp_ifap->address.sin.sin_addr.s_addr) {
+				/* found him. */
+				if (holds_lock == 0)
+					SCTP_IPI_ADDR_RUNLOCK();
+				return (sctp_ifap);
+				break;
+			}
+		}
+#ifdef INET6
+		if (addr->sa_family == AF_INET6) {
+			if (SCTP6_ARE_ADDR_EQUAL((struct sockaddr_in6 *)addr,
+			    &sctp_ifap->address.sin6)) {
+				/* found him. */
+				if (holds_lock == 0)
+					SCTP_IPI_ADDR_RUNLOCK();
+				return (sctp_ifap);
+				break;
+			}
+		}
+#endif
+	}
+	if (holds_lock == 0)
+		SCTP_IPI_ADDR_RUNLOCK();
+	return (NULL);
+}
+
+static void
+sctp_user_rcvd(struct sctp_tcb *stcb, uint32_t * freed_so_far, int hold_rlock,
+    uint32_t rwnd_req)
+{
+	/* User pulled some data, do we need a rwnd update? */
+	int r_unlocked = 0;
+	uint32_t dif, rwnd;
+	struct socket *so = NULL;
+
+	if (stcb == NULL)
+		return;
+
+	atomic_add_int(&stcb->asoc.refcnt, 1);
+
+	if (stcb->asoc.state & (SCTP_STATE_ABOUT_TO_BE_FREED |
+	    SCTP_STATE_SHUTDOWN_RECEIVED |
+	    SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+		/* Pre-check If we are freeing no update */
+		goto no_lock;
+	}
+	SCTP_INP_INCR_REF(stcb->sctp_ep);
+	if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
+		goto out;
+	}
+	so = stcb->sctp_socket;
+	if (so == NULL) {
+		goto out;
+	}
+	atomic_add_int(&stcb->freed_by_sorcv_sincelast, *freed_so_far);
+	/* Have you have freed enough to look */
+	*freed_so_far = 0;
+	/* Yep, its worth a look and the lock overhead */
+
+	/* Figure out what the rwnd would be */
+	rwnd = sctp_calc_rwnd(stcb, &stcb->asoc);
+	if (rwnd >= stcb->asoc.my_last_reported_rwnd) {
+		dif = rwnd - stcb->asoc.my_last_reported_rwnd;
+	} else {
+		dif = 0;
+	}
+	if (dif >= rwnd_req) {
+		if (hold_rlock) {
+			SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
+			r_unlocked = 1;
+		}
+		if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+			/*
+			 * One last check before we allow the guy possibly
+			 * to get in. There is a race, where the guy has not
+			 * reached the gate. In that case
+			 */
+			goto out;
+		}
+		SCTP_TCB_LOCK(stcb);
+		if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+			/* No reports here */
+			SCTP_TCB_UNLOCK(stcb);
+			goto out;
+		}
+		SCTP_STAT_INCR(sctps_wu_sacks_sent);
+		sctp_send_sack(stcb);
+
+		sctp_chunk_output(stcb->sctp_ep, stcb,
+		    SCTP_OUTPUT_FROM_USR_RCVD, SCTP_SO_LOCKED);
+		/* make sure no timer is running */
+		sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTPUTIL + SCTP_LOC_6);
+		SCTP_TCB_UNLOCK(stcb);
+	} else {
+		/* Update how much we have pending */
+		stcb->freed_by_sorcv_sincelast = dif;
+	}
+out:
+	if (so && r_unlocked && hold_rlock) {
+		SCTP_INP_READ_LOCK(stcb->sctp_ep);
+	}
+	SCTP_INP_DECR_REF(stcb->sctp_ep);
+no_lock:
+	atomic_add_int(&stcb->asoc.refcnt, -1);
+	return;
+}
+
+int
+sctp_sorecvmsg(struct socket *so,
+    struct uio *uio,
+    struct mbuf **mp,
+    struct sockaddr *from,
+    int fromlen,
+    int *msg_flags,
+    struct sctp_sndrcvinfo *sinfo,
+    int filling_sinfo)
+{
+	/*
+	 * MSG flags we will look at MSG_DONTWAIT - non-blocking IO.
+	 * MSG_PEEK - Look don't touch :-D (only valid with OUT mbuf copy
+	 * mp=NULL thus uio is the copy method to userland) MSG_WAITALL - ??
+	 * On the way out we may send out any combination of:
+	 * MSG_NOTIFICATION MSG_EOR
+	 * 
+	 */
+	struct sctp_inpcb *inp = NULL;
+	int my_len = 0;
+	int cp_len = 0, error = 0;
+	struct sctp_queued_to_read *control = NULL, *ctl = NULL, *nxt = NULL;
+	struct mbuf *m = NULL;
+	struct sctp_tcb *stcb = NULL;
+	int wakeup_read_socket = 0;
+	int freecnt_applied = 0;
+	int out_flags = 0, in_flags = 0;
+	int block_allowed = 1;
+	uint32_t freed_so_far = 0;
+	uint32_t copied_so_far = 0;
+	int in_eeor_mode = 0;
+	int no_rcv_needed = 0;
+	uint32_t rwnd_req = 0;
+	int hold_sblock = 0;
+	int hold_rlock = 0;
+	int slen = 0;
+	uint32_t held_length = 0;
+	int sockbuf_lock = 0;
+
+	if (uio == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+		return (EINVAL);
+	}
+	if (msg_flags) {
+		in_flags = *msg_flags;
+		if (in_flags & MSG_PEEK)
+			SCTP_STAT_INCR(sctps_read_peeks);
+	} else {
+		in_flags = 0;
+	}
+	slen = uio->uio_resid;
+
+	/* Pull in and set up our int flags */
+	if (in_flags & MSG_OOB) {
+		/* Out of band's NOT supported */
+		return (EOPNOTSUPP);
+	}
+	if ((in_flags & MSG_PEEK) && (mp != NULL)) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+		return (EINVAL);
+	}
+	if ((in_flags & (MSG_DONTWAIT
+	    | MSG_NBIO
+	    )) ||
+	    SCTP_SO_IS_NBIO(so)) {
+		block_allowed = 0;
+	}
+	/* setup the endpoint */
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == NULL) {
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, EFAULT);
+		return (EFAULT);
+	}
+	rwnd_req = (SCTP_SB_LIMIT_RCV(so) >> SCTP_RWND_HIWAT_SHIFT);
+	/* Must be at least a MTU's worth */
+	if (rwnd_req < SCTP_MIN_RWND)
+		rwnd_req = SCTP_MIN_RWND;
+	in_eeor_mode = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR);
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_RECV_RWND_LOGGING_ENABLE) {
+		sctp_misc_ints(SCTP_SORECV_ENTER,
+		    rwnd_req, in_eeor_mode, so->so_rcv.sb_cc, uio->uio_resid);
+	}
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_RECV_RWND_LOGGING_ENABLE) {
+		sctp_misc_ints(SCTP_SORECV_ENTERPL,
+		    rwnd_req, block_allowed, so->so_rcv.sb_cc, uio->uio_resid);
+	}
+	error = sblock(&so->so_rcv, (block_allowed ? SBL_WAIT : 0));
+	sockbuf_lock = 1;
+	if (error) {
+		goto release_unlocked;
+	}
+restart:
+
+
+restart_nosblocks:
+	if (hold_sblock == 0) {
+		SOCKBUF_LOCK(&so->so_rcv);
+		hold_sblock = 1;
+	}
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
+		goto out;
+	}
+	if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
+		if (so->so_error) {
+			error = so->so_error;
+			if ((in_flags & MSG_PEEK) == 0)
+				so->so_error = 0;
+			goto out;
+		} else {
+			if (so->so_rcv.sb_cc == 0) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOTCONN);
+				/* indicate EOF */
+				error = 0;
+				goto out;
+			}
+		}
+	}
+	if ((so->so_rcv.sb_cc <= held_length) && block_allowed) {
+		/* we need to wait for data */
+		if ((so->so_rcv.sb_cc == 0) &&
+		    ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+		    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) {
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0) {
+				/*
+				 * For active open side clear flags for
+				 * re-use passive open is blocked by
+				 * connect.
+				 */
+				if (inp->sctp_flags & SCTP_PCB_FLAGS_WAS_ABORTED) {
+					/*
+					 * You were aborted, passive side
+					 * always hits here
+					 */
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ECONNRESET);
+					error = ECONNRESET;
+					/*
+					 * You get this once if you are
+					 * active open side
+					 */
+					if (!(inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+						/*
+						 * Remove flag if on the
+						 * active open side
+						 */
+						inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAS_ABORTED;
+					}
+				}
+				so->so_state &= ~(SS_ISCONNECTING |
+				    SS_ISDISCONNECTING |
+				    SS_ISCONFIRMING |
+				    SS_ISCONNECTED);
+				if (error == 0) {
+					if ((inp->sctp_flags & SCTP_PCB_FLAGS_WAS_CONNECTED) == 0) {
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOTCONN);
+						error = ENOTCONN;
+					} else {
+						inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAS_CONNECTED;
+					}
+				}
+				goto out;
+			}
+		}
+		error = sbwait(&so->so_rcv);
+		if (error) {
+			goto out;
+		}
+		held_length = 0;
+		goto restart_nosblocks;
+	} else if (so->so_rcv.sb_cc == 0) {
+		if (so->so_error) {
+			error = so->so_error;
+			if ((in_flags & MSG_PEEK) == 0)
+				so->so_error = 0;
+		} else {
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+			    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0) {
+					/*
+					 * For active open side clear flags
+					 * for re-use passive open is
+					 * blocked by connect.
+					 */
+					if (inp->sctp_flags & SCTP_PCB_FLAGS_WAS_ABORTED) {
+						/*
+						 * You were aborted, passive
+						 * side always hits here
+						 */
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ECONNRESET);
+						error = ECONNRESET;
+						/*
+						 * You get this once if you
+						 * are active open side
+						 */
+						if (!(inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+							/*
+							 * Remove flag if on
+							 * the active open
+							 * side
+							 */
+							inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAS_ABORTED;
+						}
+					}
+					so->so_state &= ~(SS_ISCONNECTING |
+					    SS_ISDISCONNECTING |
+					    SS_ISCONFIRMING |
+					    SS_ISCONNECTED);
+					if (error == 0) {
+						if ((inp->sctp_flags & SCTP_PCB_FLAGS_WAS_CONNECTED) == 0) {
+							SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOTCONN);
+							error = ENOTCONN;
+						} else {
+							inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAS_CONNECTED;
+						}
+					}
+					goto out;
+				}
+			}
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EWOULDBLOCK);
+			error = EWOULDBLOCK;
+		}
+		goto out;
+	}
+	if (hold_sblock == 1) {
+		SOCKBUF_UNLOCK(&so->so_rcv);
+		hold_sblock = 0;
+	}
+	/* we possibly have data we can read */
+	/* sa_ignore FREED_MEMORY */
+	control = TAILQ_FIRST(&inp->read_queue);
+	if (control == NULL) {
+		/*
+		 * This could be happening since the appender did the
+		 * increment but as not yet did the tailq insert onto the
+		 * read_queue
+		 */
+		if (hold_rlock == 0) {
+			SCTP_INP_READ_LOCK(inp);
+			hold_rlock = 1;
+		}
+		control = TAILQ_FIRST(&inp->read_queue);
+		if ((control == NULL) && (so->so_rcv.sb_cc != 0)) {
+#ifdef INVARIANTS
+			panic("Huh, its non zero and nothing on control?");
+#endif
+			so->so_rcv.sb_cc = 0;
+		}
+		SCTP_INP_READ_UNLOCK(inp);
+		hold_rlock = 0;
+		goto restart;
+	}
+	if ((control->length == 0) &&
+	    (control->do_not_ref_stcb)) {
+		/*
+		 * Clean up code for freeing assoc that left behind a
+		 * pdapi.. maybe a peer in EEOR that just closed after
+		 * sending and never indicated a EOR.
+		 */
+		if (hold_rlock == 0) {
+			hold_rlock = 1;
+			SCTP_INP_READ_LOCK(inp);
+		}
+		control->held_length = 0;
+		if (control->data) {
+			/* Hmm there is data here .. fix */
+			struct mbuf *m_tmp;
+			int cnt = 0;
+
+			m_tmp = control->data;
+			while (m_tmp) {
+				cnt += SCTP_BUF_LEN(m_tmp);
+				if (SCTP_BUF_NEXT(m_tmp) == NULL) {
+					control->tail_mbuf = m_tmp;
+					control->end_added = 1;
+				}
+				m_tmp = SCTP_BUF_NEXT(m_tmp);
+			}
+			control->length = cnt;
+		} else {
+			/* remove it */
+			TAILQ_REMOVE(&inp->read_queue, control, next);
+			/* Add back any hiddend data */
+			sctp_free_remote_addr(control->whoFrom);
+			sctp_free_a_readq(stcb, control);
+		}
+		if (hold_rlock) {
+			hold_rlock = 0;
+			SCTP_INP_READ_UNLOCK(inp);
+		}
+		goto restart;
+	}
+	if ((control->length == 0) &&
+	    (control->end_added == 1)) {
+		/*
+		 * Do we also need to check for (control->pdapi_aborted ==
+		 * 1)?
+		 */
+		if (hold_rlock == 0) {
+			hold_rlock = 1;
+			SCTP_INP_READ_LOCK(inp);
+		}
+		TAILQ_REMOVE(&inp->read_queue, control, next);
+		if (control->data) {
+#ifdef INVARIANTS
+			panic("control->data not null but control->length == 0");
+#else
+			SCTP_PRINTF("Strange, data left in the control buffer. Cleaning up.\n");
+			sctp_m_freem(control->data);
+			control->data = NULL;
+#endif
+		}
+		if (control->aux_data) {
+			sctp_m_free(control->aux_data);
+			control->aux_data = NULL;
+		}
+		sctp_free_remote_addr(control->whoFrom);
+		sctp_free_a_readq(stcb, control);
+		if (hold_rlock) {
+			hold_rlock = 0;
+			SCTP_INP_READ_UNLOCK(inp);
+		}
+		goto restart;
+	}
+	if (control->length == 0) {
+		if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE)) &&
+		    (filling_sinfo)) {
+			/* find a more suitable one then this */
+			ctl = TAILQ_NEXT(control, next);
+			while (ctl) {
+				if ((ctl->stcb != control->stcb) && (ctl->length) &&
+				    (ctl->some_taken ||
+				    (ctl->spec_flags & M_NOTIFICATION) ||
+				    ((ctl->do_not_ref_stcb == 0) &&
+				    (ctl->stcb->asoc.strmin[ctl->sinfo_stream].delivery_started == 0)))
+				    ) {
+					/*-
+					 * If we have a different TCB next, and there is data
+					 * present. If we have already taken some (pdapi), OR we can
+					 * ref the tcb and no delivery as started on this stream, we
+					 * take it. Note we allow a notification on a different
+					 * assoc to be delivered..
+					 */
+					control = ctl;
+					goto found_one;
+				} else if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS)) &&
+					    (ctl->length) &&
+					    ((ctl->some_taken) ||
+					    ((ctl->do_not_ref_stcb == 0) &&
+					    ((ctl->spec_flags & M_NOTIFICATION) == 0) &&
+				    (ctl->stcb->asoc.strmin[ctl->sinfo_stream].delivery_started == 0)))) {
+					/*-
+					 * If we have the same tcb, and there is data present, and we
+					 * have the strm interleave feature present. Then if we have
+					 * taken some (pdapi) or we can refer to tht tcb AND we have
+					 * not started a delivery for this stream, we can take it.
+					 * Note we do NOT allow a notificaiton on the same assoc to
+					 * be delivered.
+					 */
+					control = ctl;
+					goto found_one;
+				}
+				ctl = TAILQ_NEXT(ctl, next);
+			}
+		}
+		/*
+		 * if we reach here, not suitable replacement is available
+		 * <or> fragment interleave is NOT on. So stuff the sb_cc
+		 * into the our held count, and its time to sleep again.
+		 */
+		held_length = so->so_rcv.sb_cc;
+		control->held_length = so->so_rcv.sb_cc;
+		goto restart;
+	}
+	/* Clear the held length since there is something to read */
+	control->held_length = 0;
+	if (hold_rlock) {
+		SCTP_INP_READ_UNLOCK(inp);
+		hold_rlock = 0;
+	}
+found_one:
+	/*
+	 * If we reach here, control has a some data for us to read off.
+	 * Note that stcb COULD be NULL.
+	 */
+	control->some_taken++;
+	if (hold_sblock) {
+		SOCKBUF_UNLOCK(&so->so_rcv);
+		hold_sblock = 0;
+	}
+	stcb = control->stcb;
+	if (stcb) {
+		if ((control->do_not_ref_stcb == 0) &&
+		    (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED)) {
+			if (freecnt_applied == 0)
+				stcb = NULL;
+		} else if (control->do_not_ref_stcb == 0) {
+			/* you can't free it on me please */
+			/*
+			 * The lock on the socket buffer protects us so the
+			 * free code will stop. But since we used the
+			 * socketbuf lock and the sender uses the tcb_lock
+			 * to increment, we need to use the atomic add to
+			 * the refcnt
+			 */
+			if (freecnt_applied) {
+#ifdef INVARIANTS
+				panic("refcnt already incremented");
+#else
+				printf("refcnt already incremented?\n");
+#endif
+			} else {
+				atomic_add_int(&stcb->asoc.refcnt, 1);
+				freecnt_applied = 1;
+			}
+			/*
+			 * Setup to remember how much we have not yet told
+			 * the peer our rwnd has opened up. Note we grab the
+			 * value from the tcb from last time. Note too that
+			 * sack sending clears this when a sack is sent,
+			 * which is fine. Once we hit the rwnd_req, we then
+			 * will go to the sctp_user_rcvd() that will not
+			 * lock until it KNOWs it MUST send a WUP-SACK.
+			 */
+			freed_so_far = stcb->freed_by_sorcv_sincelast;
+			stcb->freed_by_sorcv_sincelast = 0;
+		}
+	}
+	if (stcb &&
+	    ((control->spec_flags & M_NOTIFICATION) == 0) &&
+	    control->do_not_ref_stcb == 0) {
+		stcb->asoc.strmin[control->sinfo_stream].delivery_started = 1;
+	}
+	/* First lets get off the sinfo and sockaddr info */
+	if ((sinfo) && filling_sinfo) {
+		memcpy(sinfo, control, sizeof(struct sctp_nonpad_sndrcvinfo));
+		nxt = TAILQ_NEXT(control, next);
+		if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO)) {
+			struct sctp_extrcvinfo *s_extra;
+
+			s_extra = (struct sctp_extrcvinfo *)sinfo;
+			if ((nxt) &&
+			    (nxt->length)) {
+				s_extra->sreinfo_next_flags = SCTP_NEXT_MSG_AVAIL;
+				if (nxt->sinfo_flags & SCTP_UNORDERED) {
+					s_extra->sreinfo_next_flags |= SCTP_NEXT_MSG_IS_UNORDERED;
+				}
+				if (nxt->spec_flags & M_NOTIFICATION) {
+					s_extra->sreinfo_next_flags |= SCTP_NEXT_MSG_IS_NOTIFICATION;
+				}
+				s_extra->sreinfo_next_aid = nxt->sinfo_assoc_id;
+				s_extra->sreinfo_next_length = nxt->length;
+				s_extra->sreinfo_next_ppid = nxt->sinfo_ppid;
+				s_extra->sreinfo_next_stream = nxt->sinfo_stream;
+				if (nxt->tail_mbuf != NULL) {
+					if (nxt->end_added) {
+						s_extra->sreinfo_next_flags |= SCTP_NEXT_MSG_ISCOMPLETE;
+					}
+				}
+			} else {
+				/*
+				 * we explicitly 0 this, since the memcpy
+				 * got some other things beyond the older
+				 * sinfo_ that is on the control's structure
+				 * :-D
+				 */
+				nxt = NULL;
+				s_extra->sreinfo_next_flags = SCTP_NO_NEXT_MSG;
+				s_extra->sreinfo_next_aid = 0;
+				s_extra->sreinfo_next_length = 0;
+				s_extra->sreinfo_next_ppid = 0;
+				s_extra->sreinfo_next_stream = 0;
+			}
+		}
+		/*
+		 * update off the real current cum-ack, if we have an stcb.
+		 */
+		if ((control->do_not_ref_stcb == 0) && stcb)
+			sinfo->sinfo_cumtsn = stcb->asoc.cumulative_tsn;
+		/*
+		 * mask off the high bits, we keep the actual chunk bits in
+		 * there.
+		 */
+		sinfo->sinfo_flags &= 0x00ff;
+		if ((control->sinfo_flags >> 8) & SCTP_DATA_UNORDERED) {
+			sinfo->sinfo_flags |= SCTP_UNORDERED;
+		}
+	}
+#ifdef SCTP_ASOCLOG_OF_TSNS
+	{
+		int index, newindex;
+		struct sctp_pcbtsn_rlog *entry;
+
+		do {
+			index = inp->readlog_index;
+			newindex = index + 1;
+			if (newindex >= SCTP_READ_LOG_SIZE) {
+				newindex = 0;
+			}
+		} while (atomic_cmpset_int(&inp->readlog_index, index, newindex) == 0);
+		entry = &inp->readlog[index];
+		entry->vtag = control->sinfo_assoc_id;
+		entry->strm = control->sinfo_stream;
+		entry->seq = control->sinfo_ssn;
+		entry->sz = control->length;
+		entry->flgs = control->sinfo_flags;
+	}
+#endif
+	if (fromlen && from) {
+		struct sockaddr *to;
+
+#ifdef INET
+		cp_len = min((size_t)fromlen, (size_t)control->whoFrom->ro._l_addr.sin.sin_len);
+		memcpy(from, &control->whoFrom->ro._l_addr, cp_len);
+		((struct sockaddr_in *)from)->sin_port = control->port_from;
+#else
+		/* No AF_INET use AF_INET6 */
+		cp_len = min((size_t)fromlen, (size_t)control->whoFrom->ro._l_addr.sin6.sin6_len);
+		memcpy(from, &control->whoFrom->ro._l_addr, cp_len);
+		((struct sockaddr_in6 *)from)->sin6_port = control->port_from;
+#endif
+
+		to = from;
+#if defined(INET) && defined(INET6)
+		if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) &&
+		    (to->sa_family == AF_INET) &&
+		    ((size_t)fromlen >= sizeof(struct sockaddr_in6))) {
+			struct sockaddr_in *sin;
+			struct sockaddr_in6 sin6;
+
+			sin = (struct sockaddr_in *)to;
+			bzero(&sin6, sizeof(sin6));
+			sin6.sin6_family = AF_INET6;
+			sin6.sin6_len = sizeof(struct sockaddr_in6);
+			sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
+			bcopy(&sin->sin_addr,
+			    &sin6.sin6_addr.s6_addr32[3],
+			    sizeof(sin6.sin6_addr.s6_addr32[3]));
+			sin6.sin6_port = sin->sin_port;
+			memcpy(from, (caddr_t)&sin6, sizeof(sin6));
+		}
+#endif
+#if defined(INET6)
+		{
+			struct sockaddr_in6 lsa6, *to6;
+
+			to6 = (struct sockaddr_in6 *)to;
+			sctp_recover_scope_mac(to6, (&lsa6));
+		}
+#endif
+	}
+	/* now copy out what data we can */
+	if (mp == NULL) {
+		/* copy out each mbuf in the chain up to length */
+get_more_data:
+		m = control->data;
+		while (m) {
+			/* Move out all we can */
+			cp_len = (int)uio->uio_resid;
+			my_len = (int)SCTP_BUF_LEN(m);
+			if (cp_len > my_len) {
+				/* not enough in this buf */
+				cp_len = my_len;
+			}
+			if (hold_rlock) {
+				SCTP_INP_READ_UNLOCK(inp);
+				hold_rlock = 0;
+			}
+			if (cp_len > 0)
+				error = uiomove(mtod(m, char *), cp_len, uio);
+			/* re-read */
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+				goto release;
+			}
+			if ((control->do_not_ref_stcb == 0) && stcb &&
+			    stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+				no_rcv_needed = 1;
+			}
+			if (error) {
+				/* error we are out of here */
+				goto release;
+			}
+			if ((SCTP_BUF_NEXT(m) == NULL) &&
+			    (cp_len >= SCTP_BUF_LEN(m)) &&
+			    ((control->end_added == 0) ||
+			    (control->end_added &&
+			    (TAILQ_NEXT(control, next) == NULL)))
+			    ) {
+				SCTP_INP_READ_LOCK(inp);
+				hold_rlock = 1;
+			}
+			if (cp_len == SCTP_BUF_LEN(m)) {
+				if ((SCTP_BUF_NEXT(m) == NULL) &&
+				    (control->end_added)) {
+					out_flags |= MSG_EOR;
+					if ((control->do_not_ref_stcb == 0) &&
+					    (control->stcb != NULL) &&
+					    ((control->spec_flags & M_NOTIFICATION) == 0))
+						control->stcb->asoc.strmin[control->sinfo_stream].delivery_started = 0;
+				}
+				if (control->spec_flags & M_NOTIFICATION) {
+					out_flags |= MSG_NOTIFICATION;
+				}
+				/* we ate up the mbuf */
+				if (in_flags & MSG_PEEK) {
+					/* just looking */
+					m = SCTP_BUF_NEXT(m);
+					copied_so_far += cp_len;
+				} else {
+					/* dispose of the mbuf */
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
+						sctp_sblog(&so->so_rcv,
+						    control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBFREE, SCTP_BUF_LEN(m));
+					}
+					sctp_sbfree(control, stcb, &so->so_rcv, m);
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
+						sctp_sblog(&so->so_rcv,
+						    control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
+					}
+					copied_so_far += cp_len;
+					freed_so_far += cp_len;
+					freed_so_far += MSIZE;
+					atomic_subtract_int(&control->length, cp_len);
+					control->data = sctp_m_free(m);
+					m = control->data;
+					/*
+					 * been through it all, must hold sb
+					 * lock ok to null tail
+					 */
+					if (control->data == NULL) {
+#ifdef INVARIANTS
+						if ((control->end_added == 0) ||
+						    (TAILQ_NEXT(control, next) == NULL)) {
+							/*
+							 * If the end is not
+							 * added, OR the
+							 * next is NOT null
+							 * we MUST have the
+							 * lock.
+							 */
+							if (mtx_owned(&inp->inp_rdata_mtx) == 0) {
+								panic("Hmm we don't own the lock?");
+							}
+						}
+#endif
+						control->tail_mbuf = NULL;
+#ifdef INVARIANTS
+						if ((control->end_added) && ((out_flags & MSG_EOR) == 0)) {
+							panic("end_added, nothing left and no MSG_EOR");
+						}
+#endif
+					}
+				}
+			} else {
+				/* Do we need to trim the mbuf? */
+				if (control->spec_flags & M_NOTIFICATION) {
+					out_flags |= MSG_NOTIFICATION;
+				}
+				if ((in_flags & MSG_PEEK) == 0) {
+					SCTP_BUF_RESV_UF(m, cp_len);
+					SCTP_BUF_LEN(m) -= cp_len;
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
+						sctp_sblog(&so->so_rcv, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBFREE, cp_len);
+					}
+					atomic_subtract_int(&so->so_rcv.sb_cc, cp_len);
+					if ((control->do_not_ref_stcb == 0) &&
+					    stcb) {
+						atomic_subtract_int(&stcb->asoc.sb_cc, cp_len);
+					}
+					copied_so_far += cp_len;
+					freed_so_far += cp_len;
+					freed_so_far += MSIZE;
+					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
+						sctp_sblog(&so->so_rcv, control->do_not_ref_stcb ? NULL : stcb,
+						    SCTP_LOG_SBRESULT, 0);
+					}
+					atomic_subtract_int(&control->length, cp_len);
+				} else {
+					copied_so_far += cp_len;
+				}
+			}
+			if ((out_flags & MSG_EOR) || (uio->uio_resid == 0)) {
+				break;
+			}
+			if (((stcb) && (in_flags & MSG_PEEK) == 0) &&
+			    (control->do_not_ref_stcb == 0) &&
+			    (freed_so_far >= rwnd_req)) {
+				sctp_user_rcvd(stcb, &freed_so_far, hold_rlock, rwnd_req);
+			}
+		}		/* end while(m) */
+		/*
+		 * At this point we have looked at it all and we either have
+		 * a MSG_EOR/or read all the user wants... <OR>
+		 * control->length == 0.
+		 */
+		if ((out_flags & MSG_EOR) && ((in_flags & MSG_PEEK) == 0)) {
+			/* we are done with this control */
+			if (control->length == 0) {
+				if (control->data) {
+#ifdef INVARIANTS
+					panic("control->data not null at read eor?");
+#else
+					SCTP_PRINTF("Strange, data left in the control buffer .. invarients would panic?\n");
+					sctp_m_freem(control->data);
+					control->data = NULL;
+#endif
+				}
+		done_with_control:
+				if (TAILQ_NEXT(control, next) == NULL) {
+					/*
+					 * If we don't have a next we need a
+					 * lock, if there is a next
+					 * interrupt is filling ahead of us
+					 * and we don't need a lock to
+					 * remove this guy (which is the
+					 * head of the queue).
+					 */
+					if (hold_rlock == 0) {
+						SCTP_INP_READ_LOCK(inp);
+						hold_rlock = 1;
+					}
+				}
+				TAILQ_REMOVE(&inp->read_queue, control, next);
+				/* Add back any hiddend data */
+				if (control->held_length) {
+					held_length = 0;
+					control->held_length = 0;
+					wakeup_read_socket = 1;
+				}
+				if (control->aux_data) {
+					sctp_m_free(control->aux_data);
+					control->aux_data = NULL;
+				}
+				no_rcv_needed = control->do_not_ref_stcb;
+				sctp_free_remote_addr(control->whoFrom);
+				control->data = NULL;
+				sctp_free_a_readq(stcb, control);
+				control = NULL;
+				if ((freed_so_far >= rwnd_req) &&
+				    (no_rcv_needed == 0))
+					sctp_user_rcvd(stcb, &freed_so_far, hold_rlock, rwnd_req);
+
+			} else {
+				/*
+				 * The user did not read all of this
+				 * message, turn off the returned MSG_EOR
+				 * since we are leaving more behind on the
+				 * control to read.
+				 */
+#ifdef INVARIANTS
+				if (control->end_added &&
+				    (control->data == NULL) &&
+				    (control->tail_mbuf == NULL)) {
+					panic("Gak, control->length is corrupt?");
+				}
+#endif
+				no_rcv_needed = control->do_not_ref_stcb;
+				out_flags &= ~MSG_EOR;
+			}
+		}
+		if (out_flags & MSG_EOR) {
+			goto release;
+		}
+		if ((uio->uio_resid == 0) ||
+		    ((in_eeor_mode) && (copied_so_far >= max(so->so_rcv.sb_lowat, 1)))
+		    ) {
+			goto release;
+		}
+		/*
+		 * If I hit here the receiver wants more and this message is
+		 * NOT done (pd-api). So two questions. Can we block? if not
+		 * we are done. Did the user NOT set MSG_WAITALL?
+		 */
+		if (block_allowed == 0) {
+			goto release;
+		}
+		/*
+		 * We need to wait for more data a few things: - We don't
+		 * sbunlock() so we don't get someone else reading. - We
+		 * must be sure to account for the case where what is added
+		 * is NOT to our control when we wakeup.
+		 */
+
+		/*
+		 * Do we need to tell the transport a rwnd update might be
+		 * needed before we go to sleep?
+		 */
+		if (((stcb) && (in_flags & MSG_PEEK) == 0) &&
+		    ((freed_so_far >= rwnd_req) &&
+		    (control->do_not_ref_stcb == 0) &&
+		    (no_rcv_needed == 0))) {
+			sctp_user_rcvd(stcb, &freed_so_far, hold_rlock, rwnd_req);
+		}
+wait_some_more:
+		if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
+			goto release;
+		}
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)
+			goto release;
+
+		if (hold_rlock == 1) {
+			SCTP_INP_READ_UNLOCK(inp);
+			hold_rlock = 0;
+		}
+		if (hold_sblock == 0) {
+			SOCKBUF_LOCK(&so->so_rcv);
+			hold_sblock = 1;
+		}
+		if ((copied_so_far) && (control->length == 0) &&
+		    (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE))) {
+			goto release;
+		}
+		if (so->so_rcv.sb_cc <= control->held_length) {
+			error = sbwait(&so->so_rcv);
+			if (error) {
+				goto release;
+			}
+			control->held_length = 0;
+		}
+		if (hold_sblock) {
+			SOCKBUF_UNLOCK(&so->so_rcv);
+			hold_sblock = 0;
+		}
+		if (control->length == 0) {
+			/* still nothing here */
+			if (control->end_added == 1) {
+				/* he aborted, or is done i.e.did a shutdown */
+				out_flags |= MSG_EOR;
+				if (control->pdapi_aborted) {
+					if ((control->do_not_ref_stcb == 0) && ((control->spec_flags & M_NOTIFICATION) == 0))
+						control->stcb->asoc.strmin[control->sinfo_stream].delivery_started = 0;
+
+					out_flags |= MSG_TRUNC;
+				} else {
+					if ((control->do_not_ref_stcb == 0) && ((control->spec_flags & M_NOTIFICATION) == 0))
+						control->stcb->asoc.strmin[control->sinfo_stream].delivery_started = 0;
+				}
+				goto done_with_control;
+			}
+			if (so->so_rcv.sb_cc > held_length) {
+				control->held_length = so->so_rcv.sb_cc;
+				held_length = 0;
+			}
+			goto wait_some_more;
+		} else if (control->data == NULL) {
+			/*
+			 * we must re-sync since data is probably being
+			 * added
+			 */
+			SCTP_INP_READ_LOCK(inp);
+			if ((control->length > 0) && (control->data == NULL)) {
+				/*
+				 * big trouble.. we have the lock and its
+				 * corrupt?
+				 */
+#ifdef INVARIANTS
+				panic("Impossible data==NULL length !=0");
+#endif
+				out_flags |= MSG_EOR;
+				out_flags |= MSG_TRUNC;
+				control->length = 0;
+				SCTP_INP_READ_UNLOCK(inp);
+				goto done_with_control;
+			}
+			SCTP_INP_READ_UNLOCK(inp);
+			/* We will fall around to get more data */
+		}
+		goto get_more_data;
+	} else {
+		/*-
+		 * Give caller back the mbuf chain,
+		 * store in uio_resid the length
+		 */
+		wakeup_read_socket = 0;
+		if ((control->end_added == 0) ||
+		    (TAILQ_NEXT(control, next) == NULL)) {
+			/* Need to get rlock */
+			if (hold_rlock == 0) {
+				SCTP_INP_READ_LOCK(inp);
+				hold_rlock = 1;
+			}
+		}
+		if (control->end_added) {
+			out_flags |= MSG_EOR;
+			if ((control->do_not_ref_stcb == 0) && ((control->spec_flags & M_NOTIFICATION) == 0))
+				control->stcb->asoc.strmin[control->sinfo_stream].delivery_started = 0;
+		}
+		if (control->spec_flags & M_NOTIFICATION) {
+			out_flags |= MSG_NOTIFICATION;
+		}
+		uio->uio_resid = control->length;
+		*mp = control->data;
+		m = control->data;
+		while (m) {
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
+				sctp_sblog(&so->so_rcv,
+				    control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBFREE, SCTP_BUF_LEN(m));
+			}
+			sctp_sbfree(control, stcb, &so->so_rcv, m);
+			freed_so_far += SCTP_BUF_LEN(m);
+			freed_so_far += MSIZE;
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
+				sctp_sblog(&so->so_rcv,
+				    control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
+			}
+			m = SCTP_BUF_NEXT(m);
+		}
+		control->data = control->tail_mbuf = NULL;
+		control->length = 0;
+		if (out_flags & MSG_EOR) {
+			/* Done with this control */
+			goto done_with_control;
+		}
+	}
+release:
+	if (hold_rlock == 1) {
+		SCTP_INP_READ_UNLOCK(inp);
+		hold_rlock = 0;
+	}
+	if (hold_sblock == 1) {
+		SOCKBUF_UNLOCK(&so->so_rcv);
+		hold_sblock = 0;
+	}
+	sbunlock(&so->so_rcv);
+	sockbuf_lock = 0;
+
+release_unlocked:
+	if (hold_sblock) {
+		SOCKBUF_UNLOCK(&so->so_rcv);
+		hold_sblock = 0;
+	}
+	if ((stcb) && (in_flags & MSG_PEEK) == 0) {
+		if ((freed_so_far >= rwnd_req) &&
+		    (control && (control->do_not_ref_stcb == 0)) &&
+		    (no_rcv_needed == 0))
+			sctp_user_rcvd(stcb, &freed_so_far, hold_rlock, rwnd_req);
+	}
+out:
+	if (msg_flags) {
+		*msg_flags = out_flags;
+	}
+	if (((out_flags & MSG_EOR) == 0) &&
+	    ((in_flags & MSG_PEEK) == 0) &&
+	    (sinfo) &&
+	    (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO))) {
+		struct sctp_extrcvinfo *s_extra;
+
+		s_extra = (struct sctp_extrcvinfo *)sinfo;
+		s_extra->sreinfo_next_flags = SCTP_NO_NEXT_MSG;
+	}
+	if (hold_rlock == 1) {
+		SCTP_INP_READ_UNLOCK(inp);
+		hold_rlock = 0;
+	}
+	if (hold_sblock) {
+		SOCKBUF_UNLOCK(&so->so_rcv);
+		hold_sblock = 0;
+	}
+	if (sockbuf_lock) {
+		sbunlock(&so->so_rcv);
+	}
+	if (freecnt_applied) {
+		/*
+		 * The lock on the socket buffer protects us so the free
+		 * code will stop. But since we used the socketbuf lock and
+		 * the sender uses the tcb_lock to increment, we need to use
+		 * the atomic add to the refcnt.
+		 */
+		if (stcb == NULL) {
+#ifdef INVARIANTS
+			panic("stcb for refcnt has gone NULL?");
+			goto stage_left;
+#else
+			goto stage_left;
+#endif
+		}
+		atomic_add_int(&stcb->asoc.refcnt, -1);
+		freecnt_applied = 0;
+		/* Save the value back for next time */
+		stcb->freed_by_sorcv_sincelast = freed_so_far;
+	}
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_RECV_RWND_LOGGING_ENABLE) {
+		if (stcb) {
+			sctp_misc_ints(SCTP_SORECV_DONE,
+			    freed_so_far,
+			    ((uio) ? (slen - uio->uio_resid) : slen),
+			    stcb->asoc.my_rwnd,
+			    so->so_rcv.sb_cc);
+		} else {
+			sctp_misc_ints(SCTP_SORECV_DONE,
+			    freed_so_far,
+			    ((uio) ? (slen - uio->uio_resid) : slen),
+			    0,
+			    so->so_rcv.sb_cc);
+		}
+	}
+stage_left:
+	if (wakeup_read_socket) {
+		sctp_sorwakeup(inp, so);
+	}
+	return (error);
+}
+
+
+#ifdef SCTP_MBUF_LOGGING
+struct mbuf *
+sctp_m_free(struct mbuf *m)
+{
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
+		if (SCTP_BUF_IS_EXTENDED(m)) {
+			sctp_log_mb(m, SCTP_MBUF_IFREE);
+		}
+	}
+	return (m_free(m));
+}
+
+void 
+sctp_m_freem(struct mbuf *mb)
+{
+	while (mb != NULL)
+		mb = sctp_m_free(mb);
+}
+
+#endif
+
+int
+sctp_dynamic_set_primary(struct sockaddr *sa, uint32_t vrf_id)
+{
+	/*
+	 * Given a local address. For all associations that holds the
+	 * address, request a peer-set-primary.
+	 */
+	struct sctp_ifa *ifa;
+	struct sctp_laddr *wi;
+
+	ifa = sctp_find_ifa_by_addr(sa, vrf_id, 0);
+	if (ifa == NULL) {
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, EADDRNOTAVAIL);
+		return (EADDRNOTAVAIL);
+	}
+	/*
+	 * Now that we have the ifa we must awaken the iterator with this
+	 * message.
+	 */
+	wi = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr);
+	if (wi == NULL) {
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOMEM);
+		return (ENOMEM);
+	}
+	/* Now incr the count and int wi structure */
+	SCTP_INCR_LADDR_COUNT();
+	bzero(wi, sizeof(*wi));
+	(void)SCTP_GETTIME_TIMEVAL(&wi->start_time);
+	wi->ifa = ifa;
+	wi->action = SCTP_SET_PRIM_ADDR;
+	atomic_add_int(&ifa->refcount, 1);
+
+	/* Now add it to the work queue */
+	SCTP_WQ_ADDR_LOCK();
+	/*
+	 * Should this really be a tailq? As it is we will process the
+	 * newest first :-0
+	 */
+	LIST_INSERT_HEAD(&SCTP_BASE_INFO(addr_wq), wi, sctp_nxt_addr);
+	SCTP_WQ_ADDR_UNLOCK();
+	sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ,
+	    (struct sctp_inpcb *)NULL,
+	    (struct sctp_tcb *)NULL,
+	    (struct sctp_nets *)NULL);
+	return (0);
+}
+
+
+int
+sctp_soreceive(struct socket *so,
+    struct sockaddr **psa,
+    struct uio *uio,
+    struct mbuf **mp0,
+    struct mbuf **controlp,
+    int *flagsp)
+{
+	int error, fromlen;
+	uint8_t sockbuf[256];
+	struct sockaddr *from;
+	struct sctp_extrcvinfo sinfo;
+	int filling_sinfo = 1;
+	struct sctp_inpcb *inp;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	/* pickup the assoc we are reading from */
+	if (inp == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+		return (EINVAL);
+	}
+	if ((sctp_is_feature_off(inp,
+	    SCTP_PCB_FLAGS_RECVDATAIOEVNT)) ||
+	    (controlp == NULL)) {
+		/* user does not want the sndrcv ctl */
+		filling_sinfo = 0;
+	}
+	if (psa) {
+		from = (struct sockaddr *)sockbuf;
+		fromlen = sizeof(sockbuf);
+		from->sa_len = 0;
+	} else {
+		from = NULL;
+		fromlen = 0;
+	}
+
+	error = sctp_sorecvmsg(so, uio, mp0, from, fromlen, flagsp,
+	    (struct sctp_sndrcvinfo *)&sinfo, filling_sinfo);
+	if ((controlp) && (filling_sinfo)) {
+		/* copy back the sinfo in a CMSG format */
+		if (filling_sinfo)
+			*controlp = sctp_build_ctl_nchunk(inp,
+			    (struct sctp_sndrcvinfo *)&sinfo);
+		else
+			*controlp = NULL;
+	}
+	if (psa) {
+		/* copy back the address info */
+		if (from && from->sa_len) {
+			*psa = sodupsockaddr(from, M_NOWAIT);
+		} else {
+			*psa = NULL;
+		}
+	}
+	return (error);
+}
+
+
+int 
+sctp_l_soreceive(struct socket *so,
+    struct sockaddr **name,
+    struct uio *uio,
+    char **controlp,
+    int *controllen,
+    int *flag)
+{
+	int error, fromlen;
+	uint8_t sockbuf[256];
+	struct sockaddr *from;
+	struct sctp_extrcvinfo sinfo;
+	int filling_sinfo = 1;
+	struct sctp_inpcb *inp;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	/* pickup the assoc we are reading from */
+	if (inp == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+		return (EINVAL);
+	}
+	if ((sctp_is_feature_off(inp,
+	    SCTP_PCB_FLAGS_RECVDATAIOEVNT)) ||
+	    (controlp == NULL)) {
+		/* user does not want the sndrcv ctl */
+		filling_sinfo = 0;
+	}
+	if (name) {
+		from = (struct sockaddr *)sockbuf;
+		fromlen = sizeof(sockbuf);
+		from->sa_len = 0;
+	} else {
+		from = NULL;
+		fromlen = 0;
+	}
+
+	error = sctp_sorecvmsg(so, uio,
+	    (struct mbuf **)NULL,
+	    from, fromlen, flag,
+	    (struct sctp_sndrcvinfo *)&sinfo,
+	    filling_sinfo);
+	if ((controlp) && (filling_sinfo)) {
+		/*
+		 * copy back the sinfo in a CMSG format note that the caller
+		 * has reponsibility for freeing the memory.
+		 */
+		if (filling_sinfo)
+			*controlp = sctp_build_ctl_cchunk(inp,
+			    controllen,
+			    (struct sctp_sndrcvinfo *)&sinfo);
+	}
+	if (name) {
+		/* copy back the address info */
+		if (from && from->sa_len) {
+			*name = sodupsockaddr(from, M_WAIT);
+		} else {
+			*name = NULL;
+		}
+	}
+	return (error);
+}
+
+
+
+
+
+
+
+int
+sctp_connectx_helper_add(struct sctp_tcb *stcb, struct sockaddr *addr,
+    int totaddr, int *error)
+{
+	int added = 0;
+	int i;
+	struct sctp_inpcb *inp;
+	struct sockaddr *sa;
+	size_t incr = 0;
+
+	sa = addr;
+	inp = stcb->sctp_ep;
+	*error = 0;
+	for (i = 0; i < totaddr; i++) {
+		if (sa->sa_family == AF_INET) {
+			incr = sizeof(struct sockaddr_in);
+			if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
+				/* assoc gone no un-lock */
+				SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
+				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_7);
+				*error = ENOBUFS;
+				goto out_now;
+			}
+			added++;
+		} else if (sa->sa_family == AF_INET6) {
+			incr = sizeof(struct sockaddr_in6);
+			if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
+				/* assoc gone no un-lock */
+				SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
+				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_8);
+				*error = ENOBUFS;
+				goto out_now;
+			}
+			added++;
+		}
+		sa = (struct sockaddr *)((caddr_t)sa + incr);
+	}
+out_now:
+	return (added);
+}
+
+struct sctp_tcb *
+sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
+    int *totaddr, int *num_v4, int *num_v6, int *error,
+    int limit, int *bad_addr)
+{
+	struct sockaddr *sa;
+	struct sctp_tcb *stcb = NULL;
+	size_t incr, at, i;
+
+	at = incr = 0;
+	sa = addr;
+	*error = *num_v6 = *num_v4 = 0;
+	/* account and validate addresses */
+	for (i = 0; i < (size_t)*totaddr; i++) {
+		if (sa->sa_family == AF_INET) {
+			(*num_v4) += 1;
+			incr = sizeof(struct sockaddr_in);
+			if (sa->sa_len != incr) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+				*error = EINVAL;
+				*bad_addr = 1;
+				return (NULL);
+			}
+		} else if (sa->sa_family == AF_INET6) {
+			struct sockaddr_in6 *sin6;
+
+			sin6 = (struct sockaddr_in6 *)sa;
+			if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+				/* Must be non-mapped for connectx */
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+				*error = EINVAL;
+				*bad_addr = 1;
+				return (NULL);
+			}
+			(*num_v6) += 1;
+			incr = sizeof(struct sockaddr_in6);
+			if (sa->sa_len != incr) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+				*error = EINVAL;
+				*bad_addr = 1;
+				return (NULL);
+			}
+		} else {
+			*totaddr = i;
+			/* we are done */
+			break;
+		}
+		SCTP_INP_INCR_REF(inp);
+		stcb = sctp_findassociation_ep_addr(&inp, sa, NULL, NULL, NULL);
+		if (stcb != NULL) {
+			/* Already have or am bring up an association */
+			return (stcb);
+		} else {
+			SCTP_INP_DECR_REF(inp);
+		}
+		if ((at + incr) > (size_t)limit) {
+			*totaddr = i;
+			break;
+		}
+		sa = (struct sockaddr *)((caddr_t)sa + incr);
+	}
+	return ((struct sctp_tcb *)NULL);
+}
+
+/*
+ * sctp_bindx(ADD) for one address.
+ * assumes all arguments are valid/checked by caller.
+ */
+void
+sctp_bindx_add_address(struct socket *so, struct sctp_inpcb *inp,
+    struct sockaddr *sa, sctp_assoc_t assoc_id,
+    uint32_t vrf_id, int *error, void *p)
+{
+	struct sockaddr *addr_touse;
+
+#ifdef INET6
+	struct sockaddr_in sin;
+
+#endif
+
+	/* see if we're bound all already! */
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+		*error = EINVAL;
+		return;
+	}
+	addr_touse = sa;
+#if defined(INET6) && !defined(__Userspace__)	/* TODO port in6_sin6_2_sin */
+	if (sa->sa_family == AF_INET6) {
+		struct sockaddr_in6 *sin6;
+
+		if (sa->sa_len != sizeof(struct sockaddr_in6)) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+			*error = EINVAL;
+			return;
+		}
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
+			/* can only bind v6 on PF_INET6 sockets */
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+			*error = EINVAL;
+			return;
+		}
+		sin6 = (struct sockaddr_in6 *)addr_touse;
+		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+			    SCTP_IPV6_V6ONLY(inp)) {
+				/* can't bind v4-mapped on PF_INET sockets */
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+				*error = EINVAL;
+				return;
+			}
+			in6_sin6_2_sin(&sin, sin6);
+			addr_touse = (struct sockaddr *)&sin;
+		}
+	}
+#endif
+	if (sa->sa_family == AF_INET) {
+		if (sa->sa_len != sizeof(struct sockaddr_in)) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+			*error = EINVAL;
+			return;
+		}
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+		    SCTP_IPV6_V6ONLY(inp)) {
+			/* can't bind v4 on PF_INET sockets */
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+			*error = EINVAL;
+			return;
+		}
+	}
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) {
+		if (p == NULL) {
+			/* Can't get proc for Net/Open BSD */
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+			*error = EINVAL;
+			return;
+		}
+		*error = sctp_inpcb_bind(so, addr_touse, NULL, p);
+		return;
+	}
+	/*
+	 * No locks required here since bind and mgmt_ep_sa all do their own
+	 * locking. If we do something for the FIX: below we may need to
+	 * lock in that case.
+	 */
+	if (assoc_id == 0) {
+		/* add the address */
+		struct sctp_inpcb *lep;
+		struct sockaddr_in *lsin = (struct sockaddr_in *)addr_touse;
+
+		/* validate the incoming port */
+		if ((lsin->sin_port != 0) &&
+		    (lsin->sin_port != inp->sctp_lport)) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+			*error = EINVAL;
+			return;
+		} else {
+			/* user specified 0 port, set it to existing port */
+			lsin->sin_port = inp->sctp_lport;
+		}
+
+		lep = sctp_pcb_findep(addr_touse, 1, 0, vrf_id);
+		if (lep != NULL) {
+			/*
+			 * We must decrement the refcount since we have the
+			 * ep already and are binding. No remove going on
+			 * here.
+			 */
+			SCTP_INP_DECR_REF(lep);
+		}
+		if (lep == inp) {
+			/* already bound to it.. ok */
+			return;
+		} else if (lep == NULL) {
+			((struct sockaddr_in *)addr_touse)->sin_port = 0;
+			*error = sctp_addr_mgmt_ep_sa(inp, addr_touse,
+			    SCTP_ADD_IP_ADDRESS,
+			    vrf_id, NULL);
+		} else {
+			*error = EADDRINUSE;
+		}
+		if (*error)
+			return;
+	} else {
+		/*
+		 * FIX: decide whether we allow assoc based bindx
+		 */
+	}
+}
+
+/*
+ * sctp_bindx(DELETE) for one address.
+ * assumes all arguments are valid/checked by caller.
+ */
+void
+sctp_bindx_delete_address(struct socket *so, struct sctp_inpcb *inp,
+    struct sockaddr *sa, sctp_assoc_t assoc_id,
+    uint32_t vrf_id, int *error)
+{
+	struct sockaddr *addr_touse;
+
+#ifdef INET6
+	struct sockaddr_in sin;
+
+#endif
+
+	/* see if we're bound all already! */
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+		*error = EINVAL;
+		return;
+	}
+	addr_touse = sa;
+#if defined(INET6) && !defined(__Userspace__)	/* TODO port in6_sin6_2_sin */
+	if (sa->sa_family == AF_INET6) {
+		struct sockaddr_in6 *sin6;
+
+		if (sa->sa_len != sizeof(struct sockaddr_in6)) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+			*error = EINVAL;
+			return;
+		}
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
+			/* can only bind v6 on PF_INET6 sockets */
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+			*error = EINVAL;
+			return;
+		}
+		sin6 = (struct sockaddr_in6 *)addr_touse;
+		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+			    SCTP_IPV6_V6ONLY(inp)) {
+				/* can't bind mapped-v4 on PF_INET sockets */
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+				*error = EINVAL;
+				return;
+			}
+			in6_sin6_2_sin(&sin, sin6);
+			addr_touse = (struct sockaddr *)&sin;
+		}
+	}
+#endif
+	if (sa->sa_family == AF_INET) {
+		if (sa->sa_len != sizeof(struct sockaddr_in)) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+			*error = EINVAL;
+			return;
+		}
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+		    SCTP_IPV6_V6ONLY(inp)) {
+			/* can't bind v4 on PF_INET sockets */
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+			*error = EINVAL;
+			return;
+		}
+	}
+	/*
+	 * No lock required mgmt_ep_sa does its own locking. If the FIX:
+	 * below is ever changed we may need to lock before calling
+	 * association level binding.
+	 */
+	if (assoc_id == 0) {
+		/* delete the address */
+		*error = sctp_addr_mgmt_ep_sa(inp, addr_touse,
+		    SCTP_DEL_IP_ADDRESS,
+		    vrf_id, NULL);
+	} else {
+		/*
+		 * FIX: decide whether we allow assoc based bindx
+		 */
+	}
+}
+
+/*
+ * returns the valid local address count for an assoc, taking into account
+ * all scoping rules
+ */
+int
+sctp_local_addr_count(struct sctp_tcb *stcb)
+{
+	int loopback_scope, ipv4_local_scope, local_scope, site_scope;
+	int ipv4_addr_legal, ipv6_addr_legal;
+	struct sctp_vrf *vrf;
+	struct sctp_ifn *sctp_ifn;
+	struct sctp_ifa *sctp_ifa;
+	int count = 0;
+
+	/* Turn on all the appropriate scopes */
+	loopback_scope = stcb->asoc.loopback_scope;
+	ipv4_local_scope = stcb->asoc.ipv4_local_scope;
+	local_scope = stcb->asoc.local_scope;
+	site_scope = stcb->asoc.site_scope;
+	ipv4_addr_legal = ipv6_addr_legal = 0;
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+		ipv6_addr_legal = 1;
+		if (SCTP_IPV6_V6ONLY(stcb->sctp_ep) == 0) {
+			ipv4_addr_legal = 1;
+		}
+	} else {
+		ipv4_addr_legal = 1;
+	}
+
+	SCTP_IPI_ADDR_RLOCK();
+	vrf = sctp_find_vrf(stcb->asoc.vrf_id);
+	if (vrf == NULL) {
+		/* no vrf, no addresses */
+		SCTP_IPI_ADDR_RUNLOCK();
+		return (0);
+	}
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		/*
+		 * bound all case: go through all ifns on the vrf
+		 */
+		LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+			if ((loopback_scope == 0) &&
+			    SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
+				continue;
+			}
+			LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+				if (sctp_is_addr_restricted(stcb, sctp_ifa))
+					continue;
+				switch (sctp_ifa->address.sa.sa_family) {
+				case AF_INET:
+					if (ipv4_addr_legal) {
+						struct sockaddr_in *sin;
+
+						sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+						if (sin->sin_addr.s_addr == 0) {
+							/*
+							 * skip unspecified
+							 * addrs
+							 */
+							continue;
+						}
+						if ((ipv4_local_scope == 0) &&
+						    (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
+							continue;
+						}
+						/* count this one */
+						count++;
+					} else {
+						continue;
+					}
+					break;
+#ifdef INET6
+				case AF_INET6:
+					if (ipv6_addr_legal) {
+						struct sockaddr_in6 *sin6;
+
+						sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+						if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+							continue;
+						}
+						if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+							if (local_scope == 0)
+								continue;
+							if (sin6->sin6_scope_id == 0) {
+								if (sa6_recoverscope(sin6) != 0)
+									/*
+									 * 
+									 * bad
+									 * 
+									 * li
+									 * nk
+									 * 
+									 * loc
+									 * al
+									 * 
+									 * add
+									 * re
+									 * ss
+									 * */
+									continue;
+							}
+						}
+						if ((site_scope == 0) &&
+						    (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) {
+							continue;
+						}
+						/* count this one */
+						count++;
+					}
+					break;
+#endif
+				default:
+					/* TSNH */
+					break;
+				}
+			}
+		}
+	} else {
+		/*
+		 * subset bound case
+		 */
+		struct sctp_laddr *laddr;
+
+		LIST_FOREACH(laddr, &stcb->sctp_ep->sctp_addr_list,
+		    sctp_nxt_addr) {
+			if (sctp_is_addr_restricted(stcb, laddr->ifa)) {
+				continue;
+			}
+			/* count this one */
+			count++;
+		}
+	}
+	SCTP_IPI_ADDR_RUNLOCK();
+	return (count);
+}
+
+#if defined(SCTP_LOCAL_TRACE_BUF)
+
+void
+sctp_log_trace(uint32_t subsys, const char *str SCTP_UNUSED, uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint32_t e, uint32_t f)
+{
+	uint32_t saveindex, newindex;
+
+	do {
+		saveindex = SCTP_BASE_SYSCTL(sctp_log).index;
+		if (saveindex >= SCTP_MAX_LOGGING_SIZE) {
+			newindex = 1;
+		} else {
+			newindex = saveindex + 1;
+		}
+	} while (atomic_cmpset_int(&SCTP_BASE_SYSCTL(sctp_log).index, saveindex, newindex) == 0);
+	if (saveindex >= SCTP_MAX_LOGGING_SIZE) {
+		saveindex = 0;
+	}
+	SCTP_BASE_SYSCTL(sctp_log).entry[saveindex].timestamp = SCTP_GET_CYCLECOUNT;
+	SCTP_BASE_SYSCTL(sctp_log).entry[saveindex].subsys = subsys;
+	SCTP_BASE_SYSCTL(sctp_log).entry[saveindex].params[0] = a;
+	SCTP_BASE_SYSCTL(sctp_log).entry[saveindex].params[1] = b;
+	SCTP_BASE_SYSCTL(sctp_log).entry[saveindex].params[2] = c;
+	SCTP_BASE_SYSCTL(sctp_log).entry[saveindex].params[3] = d;
+	SCTP_BASE_SYSCTL(sctp_log).entry[saveindex].params[4] = e;
+	SCTP_BASE_SYSCTL(sctp_log).entry[saveindex].params[5] = f;
+}
+
+#endif
+/* We will need to add support
+ * to bind the ports and such here
+ * so we can do UDP tunneling. In
+ * the mean-time, we return error
+ */
+#include <freebsd/netinet/udp.h>
+#include <freebsd/netinet/udp_var.h>
+#include <freebsd/sys/proc.h>
+#ifdef INET6
+#include <freebsd/netinet6/sctp6_var.h>
+#endif
+
+static void
+sctp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *ignored)
+{
+	struct ip *iph;
+	struct mbuf *sp, *last;
+	struct udphdr *uhdr;
+	uint16_t port = 0, len;
+	int header_size = sizeof(struct udphdr) + sizeof(struct sctphdr);
+
+	/*
+	 * Split out the mbuf chain. Leave the IP header in m, place the
+	 * rest in the sp.
+	 */
+	if ((m->m_flags & M_PKTHDR) == 0) {
+		/* Can't handle one that is not a pkt hdr */
+		goto out;
+	}
+	/* pull the src port */
+	iph = mtod(m, struct ip *);
+	uhdr = (struct udphdr *)((caddr_t)iph + off);
+
+	port = uhdr->uh_sport;
+	sp = m_split(m, off, M_DONTWAIT);
+	if (sp == NULL) {
+		/* Gak, drop packet, we can't do a split */
+		goto out;
+	}
+	if (sp->m_pkthdr.len < header_size) {
+		/* Gak, packet can't have an SCTP header in it - to small */
+		m_freem(sp);
+		goto out;
+	}
+	/* ok now pull up the UDP header and SCTP header together */
+	sp = m_pullup(sp, header_size);
+	if (sp == NULL) {
+		/* Gak pullup failed */
+		goto out;
+	}
+	/* trim out the UDP header */
+	m_adj(sp, sizeof(struct udphdr));
+
+	/* Now reconstruct the mbuf chain */
+	/* 1) find last one */
+	last = m;
+	while (last->m_next != NULL) {
+		last = last->m_next;
+	}
+	last->m_next = sp;
+	m->m_pkthdr.len += sp->m_pkthdr.len;
+	last = m;
+	while (last != NULL) {
+		last = last->m_next;
+	}
+	/* Now its ready for sctp_input or sctp6_input */
+	iph = mtod(m, struct ip *);
+	switch (iph->ip_v) {
+	case IPVERSION:
+		{
+			/* its IPv4 */
+			len = SCTP_GET_IPV4_LENGTH(iph);
+			len -= sizeof(struct udphdr);
+			SCTP_GET_IPV4_LENGTH(iph) = len;
+			sctp_input_with_port(m, off, port);
+			break;
+		}
+#ifdef INET6
+	case IPV6_VERSION >> 4:
+		{
+			/* its IPv6 - NOT supported */
+			goto out;
+			break;
+
+		}
+#endif
+	default:
+		{
+			m_freem(m);
+			break;
+		}
+	}
+	return;
+out:
+	m_freem(m);
+}
+
+void 
+sctp_over_udp_stop(void)
+{
+	struct socket *sop;
+
+	/*
+	 * This function assumes sysctl caller holds sctp_sysctl_info_lock()
+	 * for writting!
+	 */
+	if (SCTP_BASE_INFO(udp_tun_socket) == NULL) {
+		/* Nothing to do */
+		return;
+	}
+	sop = SCTP_BASE_INFO(udp_tun_socket);
+	soclose(sop);
+	SCTP_BASE_INFO(udp_tun_socket) = NULL;
+}
+int 
+sctp_over_udp_start(void)
+{
+	uint16_t port;
+	int ret;
+	struct sockaddr_in sin;
+	struct socket *sop = NULL;
+	struct thread *th;
+	struct ucred *cred;
+
+	/*
+	 * This function assumes sysctl caller holds sctp_sysctl_info_lock()
+	 * for writting!
+	 */
+	port = SCTP_BASE_SYSCTL(sctp_udp_tunneling_port);
+	if (port == 0) {
+		/* Must have a port set */
+		return (EINVAL);
+	}
+	if (SCTP_BASE_INFO(udp_tun_socket) != NULL) {
+		/* Already running -- must stop first */
+		return (EALREADY);
+	}
+	th = curthread;
+	cred = th->td_ucred;
+	if ((ret = socreate(PF_INET, &sop,
+	    SOCK_DGRAM, IPPROTO_UDP, cred, th))) {
+		return (ret);
+	}
+	SCTP_BASE_INFO(udp_tun_socket) = sop;
+	/* call the special UDP hook */
+	ret = udp_set_kernel_tunneling(sop, sctp_recv_udp_tunneled_packet);
+	if (ret) {
+		goto exit_stage_left;
+	}
+	/* Ok we have a socket, bind it to the port */
+	memset(&sin, 0, sizeof(sin));
+	sin.sin_len = sizeof(sin);
+	sin.sin_family = AF_INET;
+	sin.sin_port = htons(port);
+	ret = sobind(sop, (struct sockaddr *)&sin, th);
+	if (ret) {
+		/* Close up we cant get the port */
+exit_stage_left:
+		sctp_over_udp_stop();
+		return (ret);
+	}
+	/*
+	 * Ok we should now get UDP packets directly to our input routine
+	 * sctp_recv_upd_tunneled_packet().
+	 */
+	return (0);
+}
diff --git a/freebsd/sys/netinet/sctputil.h b/freebsd/sys/netinet/sctputil.h
new file mode 100644
index 00000000..b1bee3a4
--- /dev/null
+++ b/freebsd/sys/netinet/sctputil.h
@@ -0,0 +1,392 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+/* $KAME: sctputil.h,v 1.15 2005/03/06 16:04:19 itojun Exp $	 */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#ifndef __sctputil_h__
+#define __sctputil_h__
+
+
+#if defined(_KERNEL) || defined(__Userspace__)
+
+#define SCTP_READ_LOCK_HELD 1
+#define SCTP_READ_LOCK_NOT_HELD 0
+
+#ifdef SCTP_ASOCLOG_OF_TSNS
+void sctp_print_out_track_log(struct sctp_tcb *stcb);
+
+#endif
+
+#ifdef SCTP_MBUF_LOGGING
+struct mbuf *sctp_m_free(struct mbuf *m);
+void sctp_m_freem(struct mbuf *m);
+
+#else
+#define sctp_m_free m_free
+#define sctp_m_freem m_freem
+#endif
+
+#if defined(SCTP_LOCAL_TRACE_BUF) || defined(__APPLE__)
+void
+     sctp_log_trace(uint32_t fr, const char *str SCTP_UNUSED, uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint32_t e, uint32_t f);
+
+#endif
+
+#define sctp_get_associd(stcb) ((sctp_assoc_t)stcb->asoc.assoc_id)
+
+
+/*
+ * Function prototypes
+ */
+uint32_t
+sctp_get_ifa_hash_val(struct sockaddr *addr);
+
+struct sctp_ifa *
+         sctp_find_ifa_in_ep(struct sctp_inpcb *inp, struct sockaddr *addr, int hold_lock);
+
+struct sctp_ifa *
+         sctp_find_ifa_by_addr(struct sockaddr *addr, uint32_t vrf_id, int holds_lock);
+
+uint32_t sctp_select_initial_TSN(struct sctp_pcb *);
+
+uint32_t sctp_select_a_tag(struct sctp_inpcb *, uint16_t lport, uint16_t rport, int);
+
+int sctp_init_asoc(struct sctp_inpcb *, struct sctp_tcb *, uint32_t, uint32_t);
+
+void sctp_fill_random_store(struct sctp_pcb *);
+
+void
+sctp_timer_start(int, struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *);
+
+void
+sctp_timer_stop(int, struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *, uint32_t);
+
+int
+    sctp_dynamic_set_primary(struct sockaddr *sa, uint32_t vrf_id);
+
+void
+     sctp_mtu_size_reset(struct sctp_inpcb *, struct sctp_association *, uint32_t);
+
+void
+sctp_add_to_readq(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_queued_to_read *control,
+    struct sockbuf *sb,
+    int end,
+    int inpread_locked,
+    int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+);
+
+int
+sctp_append_to_readq(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_queued_to_read *control,
+    struct mbuf *m,
+    int end,
+    int new_cumack,
+    struct sockbuf *sb);
+
+
+void sctp_iterator_worker(void);
+
+uint32_t sctp_get_prev_mtu(uint32_t);
+uint32_t sctp_get_next_mtu(struct sctp_inpcb *, uint32_t);
+
+void
+     sctp_timeout_handler(void *);
+
+uint32_t
+sctp_calculate_rto(struct sctp_tcb *, struct sctp_association *,
+    struct sctp_nets *, struct timeval *, int);
+
+uint32_t sctp_calculate_len(struct mbuf *);
+
+caddr_t sctp_m_getptr(struct mbuf *, int, int, uint8_t *);
+
+struct sctp_paramhdr *
+sctp_get_next_param(struct mbuf *, int,
+    struct sctp_paramhdr *, int);
+
+int sctp_add_pad_tombuf(struct mbuf *, int);
+
+int sctp_pad_lastmbuf(struct mbuf *, int, struct mbuf *);
+
+void 
+sctp_ulp_notify(uint32_t, struct sctp_tcb *, uint32_t, void *, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+);
+
+void
+sctp_pull_off_control_to_new_inp(struct sctp_inpcb *old_inp,
+    struct sctp_inpcb *new_inp,
+    struct sctp_tcb *stcb, int waitflags);
+
+
+void sctp_stop_timers_for_shutdown(struct sctp_tcb *);
+
+void 
+sctp_report_all_outbound(struct sctp_tcb *, int, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+);
+
+int sctp_expand_mapping_array(struct sctp_association *, uint32_t);
+
+void 
+sctp_abort_notification(struct sctp_tcb *, int, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+);
+
+/* We abort responding to an IP packet for some reason */
+void
+sctp_abort_association(struct sctp_inpcb *, struct sctp_tcb *,
+    struct mbuf *, int, struct sctphdr *, struct mbuf *, uint32_t, uint16_t);
+
+
+/* We choose to abort via user input */
+void
+sctp_abort_an_association(struct sctp_inpcb *, struct sctp_tcb *, int,
+    struct mbuf *, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+);
+
+void 
+sctp_handle_ootb(struct mbuf *, int, int, struct sctphdr *,
+    struct sctp_inpcb *, struct mbuf *, uint32_t, uint16_t);
+
+int 
+sctp_connectx_helper_add(struct sctp_tcb *stcb, struct sockaddr *addr,
+    int totaddr, int *error);
+
+struct sctp_tcb *
+sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
+    int *totaddr, int *num_v4, int *num_v6, int *error, int limit, int *bad_addr);
+
+int sctp_is_there_an_abort_here(struct mbuf *, int, uint32_t *);
+
+#ifdef INET6
+uint32_t sctp_is_same_scope(struct sockaddr_in6 *, struct sockaddr_in6 *);
+
+struct sockaddr_in6 *
+             sctp_recover_scope(struct sockaddr_in6 *, struct sockaddr_in6 *);
+
+#define sctp_recover_scope_mac(addr, store) do { \
+	 if ((addr->sin6_family == AF_INET6) && \
+	     (IN6_IS_SCOPE_LINKLOCAL(&addr->sin6_addr))) { \
+		*store = *addr; \
+		if (addr->sin6_scope_id == 0) { \
+			if (!sa6_recoverscope(store)) { \
+				addr = store; \
+			} \
+		} else { \
+			in6_clearscope(&addr->sin6_addr); \
+			addr = store; \
+		} \
+	 } \
+} while (0)
+#endif
+
+int sctp_cmpaddr(struct sockaddr *, struct sockaddr *);
+
+void sctp_print_address(struct sockaddr *);
+void sctp_print_address_pkt(struct ip *, struct sctphdr *);
+
+int
+sctp_release_pr_sctp_chunk(struct sctp_tcb *, struct sctp_tmit_chunk *,
+    int, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+);
+
+struct mbuf *sctp_generate_invmanparam(int);
+
+void 
+sctp_bindx_add_address(struct socket *so, struct sctp_inpcb *inp,
+    struct sockaddr *sa, sctp_assoc_t assoc_id,
+    uint32_t vrf_id, int *error, void *p);
+void 
+sctp_bindx_delete_address(struct socket *so, struct sctp_inpcb *inp,
+    struct sockaddr *sa, sctp_assoc_t assoc_id,
+    uint32_t vrf_id, int *error);
+
+int sctp_local_addr_count(struct sctp_tcb *stcb);
+
+#ifdef SCTP_MBCNT_LOGGING
+void
+sctp_free_bufspace(struct sctp_tcb *, struct sctp_association *,
+    struct sctp_tmit_chunk *, int);
+
+#else
+#define sctp_free_bufspace(stcb, asoc, tp1, chk_cnt)  \
+do { \
+	if (tp1->data != NULL) { \
+                atomic_subtract_int(&((asoc)->chunks_on_out_queue), chk_cnt); \
+		if ((asoc)->total_output_queue_size >= tp1->book_size) { \
+			atomic_subtract_int(&((asoc)->total_output_queue_size), tp1->book_size); \
+		} else { \
+			(asoc)->total_output_queue_size = 0; \
+		} \
+   	        if (stcb->sctp_socket && ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || \
+	            (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { \
+			if (stcb->sctp_socket->so_snd.sb_cc >= tp1->book_size) { \
+				atomic_subtract_int(&((stcb)->sctp_socket->so_snd.sb_cc), tp1->book_size); \
+			} else { \
+				stcb->sctp_socket->so_snd.sb_cc = 0; \
+			} \
+		} \
+        } \
+} while (0)
+
+#endif
+
+#define sctp_free_spbufspace(stcb, asoc, sp)  \
+do { \
+ 	if (sp->data != NULL) { \
+		if ((asoc)->total_output_queue_size >= sp->length) { \
+			atomic_subtract_int(&(asoc)->total_output_queue_size, sp->length); \
+		} else { \
+			(asoc)->total_output_queue_size = 0; \
+		} \
+   	        if (stcb->sctp_socket && ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || \
+	            (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { \
+			if (stcb->sctp_socket->so_snd.sb_cc >= sp->length) { \
+				atomic_subtract_int(&stcb->sctp_socket->so_snd.sb_cc,sp->length); \
+			} else { \
+				stcb->sctp_socket->so_snd.sb_cc = 0; \
+			} \
+		} \
+        } \
+} while (0)
+
+#define sctp_snd_sb_alloc(stcb, sz)  \
+do { \
+	atomic_add_int(&stcb->asoc.total_output_queue_size,sz); \
+	if ((stcb->sctp_socket != NULL) && \
+	    ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || \
+	     (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { \
+		atomic_add_int(&stcb->sctp_socket->so_snd.sb_cc,sz); \
+	} \
+} while (0)
+
+/* new functions to start/stop udp tunneling */
+void sctp_over_udp_stop(void);
+int sctp_over_udp_start(void);
+
+int
+sctp_soreceive(struct socket *so, struct sockaddr **psa,
+    struct uio *uio,
+    struct mbuf **mp0,
+    struct mbuf **controlp,
+    int *flagsp);
+
+
+/* For those not passing mbufs, this does the
+ * translations for you. Caller owns memory
+ * of size controllen returned in controlp.
+ */
+int 
+sctp_l_soreceive(struct socket *so,
+    struct sockaddr **name,
+    struct uio *uio,
+    char **controlp,
+    int *controllen,
+    int *flag);
+
+
+void
+     sctp_misc_ints(uint8_t from, uint32_t a, uint32_t b, uint32_t c, uint32_t d);
+
+void
+sctp_wakeup_log(struct sctp_tcb *stcb,
+    uint32_t cumtsn,
+    uint32_t wake_cnt, int from);
+
+void sctp_log_strm_del_alt(struct sctp_tcb *stcb, uint32_t, uint16_t, uint16_t, int);
+
+void sctp_log_nagle_event(struct sctp_tcb *stcb, int action);
+
+
+void
+     sctp_log_mb(struct mbuf *m, int from);
+
+void
+sctp_sblog(struct sockbuf *sb,
+    struct sctp_tcb *stcb, int from, int incr);
+
+void
+sctp_log_strm_del(struct sctp_queued_to_read *control,
+    struct sctp_queued_to_read *poschk,
+    int from);
+void sctp_log_cwnd(struct sctp_tcb *stcb, struct sctp_nets *, int, uint8_t);
+void rto_logging(struct sctp_nets *net, int from);
+
+void sctp_log_closing(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int16_t loc);
+
+void sctp_log_lock(struct sctp_inpcb *inp, struct sctp_tcb *stcb, uint8_t from);
+void sctp_log_maxburst(struct sctp_tcb *stcb, struct sctp_nets *, int, int, uint8_t);
+void sctp_log_block(uint8_t, struct socket *, struct sctp_association *, int);
+void sctp_log_rwnd(uint8_t, uint32_t, uint32_t, uint32_t);
+void sctp_log_mbcnt(uint8_t, uint32_t, uint32_t, uint32_t, uint32_t);
+void sctp_log_rwnd_set(uint8_t, uint32_t, uint32_t, uint32_t, uint32_t);
+int sctp_fill_stat_log(void *, size_t *);
+void sctp_log_fr(uint32_t, uint32_t, uint32_t, int);
+void sctp_log_sack(uint32_t, uint32_t, uint32_t, uint16_t, uint16_t, int);
+void sctp_log_map(uint32_t, uint32_t, uint32_t, int);
+void sctp_print_mapping_array(struct sctp_association *asoc);
+void sctp_clr_stat_log(void);
+
+
+#ifdef SCTP_AUDITING_ENABLED
+void
+sctp_auditing(int, struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *);
+void sctp_audit_log(uint8_t, uint8_t);
+
+#endif
+
+
+#endif				/* _KERNEL */
+#endif
diff --git a/freebsd/sys/netinet/tcp.h b/freebsd/sys/netinet/tcp.h
new file mode 100644
index 00000000..19b1c57f
--- /dev/null
+++ b/freebsd/sys/netinet/tcp.h
@@ -0,0 +1,2 @@
+#include <freebsd/bsd.h>
+#include <freebsd/netinet/tcp.h>
diff --git a/freebsd/sys/netinet/tcp_debug.c b/freebsd/sys/netinet/tcp_debug.c
new file mode 100644
index 00000000..52a82193
--- /dev/null
+++ b/freebsd/sys/netinet/tcp_debug.c
@@ -0,0 +1,226 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_debug.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_tcpdebug.h>
+
+#ifdef TCPDEBUG
+/* load symbolic names */
+#define PRUREQUESTS
+#define TCPSTATES
+#define	TCPTIMERS
+#define	TANAMES
+#endif
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/socket.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#endif
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/tcp.h>
+#include <freebsd/netinet/tcp_fsm.h>
+#include <freebsd/netinet/tcp_timer.h>
+#include <freebsd/netinet/tcp_var.h>
+#include <freebsd/netinet/tcpip.h>
+#include <freebsd/netinet/tcp_debug.h>
+
+#ifdef TCPDEBUG
+static int		tcpconsdebug = 0;
+#endif
+
+/*
+ * Global ring buffer of TCP debugging state.  Each entry captures a snapshot
+ * of TCP connection state at any given moment.  tcp_debx addresses at the
+ * next available slot.  There is no explicit export of this data structure;
+ * it will be read via /dev/kmem by debugging tools.
+ */
+static struct tcp_debug	tcp_debug[TCP_NDEBUG];
+static int		tcp_debx;
+
+/*
+ * All global state is protected by tcp_debug_mtx; tcp_trace() is split into
+ * two parts, one of which saves connection and other state into the global
+ * array (locked by tcp_debug_mtx).
+ */
+struct mtx		tcp_debug_mtx;
+MTX_SYSINIT(tcp_debug_mtx, &tcp_debug_mtx, "tcp_debug_mtx", MTX_DEF);
+
+/*
+ * Save TCP state at a given moment; optionally, both tcpcb and TCP packet
+ * header state will be saved.
+ */
+void
+tcp_trace(short act, short ostate, struct tcpcb *tp, void *ipgen,
+    struct tcphdr *th, int req)
+{
+#ifdef INET6
+	int isipv6;
+#endif /* INET6 */
+	tcp_seq seq, ack;
+	int len, flags;
+	struct tcp_debug *td;
+
+	mtx_lock(&tcp_debug_mtx);
+	td = &tcp_debug[tcp_debx++];
+	if (tcp_debx == TCP_NDEBUG)
+		tcp_debx = 0;
+	bzero(td, sizeof(*td));
+#ifdef INET6
+	isipv6 = (ipgen != NULL && ((struct ip *)ipgen)->ip_v == 6) ? 1 : 0;
+#endif /* INET6 */
+	td->td_family =
+#ifdef INET6
+	    (isipv6 != 0) ? AF_INET6 :
+#endif
+	    AF_INET;
+#ifdef INET
+	td->td_time = iptime();
+#endif
+	td->td_act = act;
+	td->td_ostate = ostate;
+	td->td_tcb = (caddr_t)tp;
+	if (tp != NULL)
+		td->td_cb = *tp;
+	if (ipgen != NULL) {
+		switch (td->td_family) {
+#ifdef INET
+		case AF_INET:
+			bcopy(ipgen, &td->td_ti.ti_i, sizeof(td->td_ti.ti_i));
+			break;
+#endif
+#ifdef INET6
+		case AF_INET6:
+			bcopy(ipgen, td->td_ip6buf, sizeof(td->td_ip6buf));
+			break;
+#endif
+		}
+	}
+	if (th != NULL) {
+		switch (td->td_family) {
+#ifdef INET
+		case AF_INET:
+			td->td_ti.ti_t = *th;
+			break;
+#endif
+#ifdef INET6
+		case AF_INET6:
+			td->td_ti6.th = *th;
+			break;
+#endif
+		}
+	}
+	td->td_req = req;
+	mtx_unlock(&tcp_debug_mtx);
+#ifdef TCPDEBUG
+	if (tcpconsdebug == 0)
+		return;
+	if (tp != NULL)
+		printf("%p %s:", tp, tcpstates[ostate]);
+	else
+		printf("???????? ");
+	printf("%s ", tanames[act]);
+	switch (act) {
+	case TA_INPUT:
+	case TA_OUTPUT:
+	case TA_DROP:
+		if (ipgen == NULL || th == NULL)
+			break;
+		seq = th->th_seq;
+		ack = th->th_ack;
+		len =
+#ifdef INET6
+		    isipv6 ? ntohs(((struct ip6_hdr *)ipgen)->ip6_plen) :
+#endif
+		    ((struct ip *)ipgen)->ip_len;
+		if (act == TA_OUTPUT) {
+			seq = ntohl(seq);
+			ack = ntohl(ack);
+			len = ntohs((u_short)len);
+		}
+		if (act == TA_OUTPUT)
+			len -= sizeof (struct tcphdr);
+		if (len)
+			printf("[%x..%x)", seq, seq+len);
+		else
+			printf("%x", seq);
+		printf("@%x, urp=%x", ack, th->th_urp);
+		flags = th->th_flags;
+		if (flags) {
+			char *cp = "<";
+#define pf(f) {					\
+	if (th->th_flags & TH_##f) {		\
+		printf("%s%s", cp, #f);		\
+		cp = ",";			\
+	}					\
+}
+			pf(SYN); pf(ACK); pf(FIN); pf(RST); pf(PUSH); pf(URG);
+			printf(">");
+		}
+		break;
+
+	case TA_USER:
+		printf("%s", prurequests[req&0xff]);
+		if ((req & 0xff) == PRU_SLOWTIMO)
+			printf("<%s>", tcptimers[req>>8]);
+		break;
+	}
+	if (tp != NULL)
+		printf(" -> %s", tcpstates[tp->t_state]);
+	/* print out internal state of tp !?! */
+	printf("\n");
+	if (tp == NULL)
+		return;
+	printf(
+	"\trcv_(nxt,wnd,up) (%lx,%lx,%lx) snd_(una,nxt,max) (%lx,%lx,%lx)\n",
+	    (u_long)tp->rcv_nxt, tp->rcv_wnd, (u_long)tp->rcv_up,
+	    (u_long)tp->snd_una, (u_long)tp->snd_nxt, (u_long)tp->snd_max);
+	printf("\tsnd_(wl1,wl2,wnd) (%lx,%lx,%lx)\n",
+	    (u_long)tp->snd_wl1, (u_long)tp->snd_wl2, tp->snd_wnd);
+#endif /* TCPDEBUG */
+}
diff --git a/freebsd/sys/netinet/tcp_debug.h b/freebsd/sys/netinet/tcp_debug.h
new file mode 100644
index 00000000..0c103958
--- /dev/null
+++ b/freebsd/sys/netinet/tcp_debug.h
@@ -0,0 +1,80 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_debug.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_TCP_DEBUG_HH_
+#define	_NETINET_TCP_DEBUG_HH_
+
+struct	tcp_debug {
+	uint32_t	td_time;	/* network format */
+	short	td_act;
+	short	td_ostate;
+	caddr_t	td_tcb;
+	int	td_family;
+	/*
+	 * Co-existense of td_ti and td_ti6 below is ugly, but it is necessary
+	 * to achieve backword compatibility to some extent.
+	 */
+	struct	tcpiphdr td_ti;
+	struct {
+#define	IP6_HDR_LEN	40	/* sizeof(struct ip6_hdr) */
+#if !defined(_KERNEL) && defined(INET6)
+		struct	ip6_hdr ip6;
+#else
+		u_char	ip6buf[IP6_HDR_LEN];
+#endif
+		struct	tcphdr th;
+	} td_ti6;
+#define	td_ip6buf	td_ti6.ip6buf
+	short	td_req;
+	struct	tcpcb td_cb;
+};
+
+#define	TA_INPUT	0
+#define	TA_OUTPUT	1
+#define	TA_USER		2
+#define	TA_RESPOND	3
+#define	TA_DROP		4
+
+#ifdef TANAMES
+static const char	*tanames[] =
+    { "input", "output", "user", "respond", "drop" };
+#endif
+
+#define	TCP_NDEBUG 100
+
+#ifndef _KERNEL
+/* XXX common variables for broken applications. */
+struct	tcp_debug tcp_debug[TCP_NDEBUG];
+int	tcp_debx;
+#endif
+
+#endif /* !_NETINET_TCP_DEBUG_HH_ */
diff --git a/freebsd/sys/netinet/tcp_fsm.h b/freebsd/sys/netinet/tcp_fsm.h
new file mode 100644
index 00000000..253e53d4
--- /dev/null
+++ b/freebsd/sys/netinet/tcp_fsm.h
@@ -0,0 +1,112 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_fsm.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_TCP_FSM_HH_
+#define	_NETINET_TCP_FSM_HH_
+
+/*
+ * TCP FSM state definitions.
+ *
+ * Per RFC793, September, 1981.
+ */
+
+#define	TCP_NSTATES	11
+
+#define	TCPS_CLOSED		0	/* closed */
+#define	TCPS_LISTEN		1	/* listening for connection */
+#define	TCPS_SYN_SENT		2	/* active, have sent syn */
+#define	TCPS_SYN_RECEIVED	3	/* have sent and received syn */
+/* states < TCPS_ESTABLISHED are those where connections not established */
+#define	TCPS_ESTABLISHED	4	/* established */
+#define	TCPS_CLOSE_WAIT		5	/* rcvd fin, waiting for close */
+/* states > TCPS_CLOSE_WAIT are those where user has closed */
+#define	TCPS_FIN_WAIT_1		6	/* have closed, sent fin */
+#define	TCPS_CLOSING		7	/* closed xchd FIN; await FIN ACK */
+#define	TCPS_LAST_ACK		8	/* had fin and close; await FIN ACK */
+/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */
+#define	TCPS_FIN_WAIT_2		9	/* have closed, fin is acked */
+#define	TCPS_TIME_WAIT		10	/* in 2*msl quiet wait after close */
+
+/* for KAME src sync over BSD*'s */
+#define	TCP6_NSTATES		TCP_NSTATES
+#define	TCP6S_CLOSED		TCPS_CLOSED
+#define	TCP6S_LISTEN		TCPS_LISTEN
+#define	TCP6S_SYN_SENT		TCPS_SYN_SENT
+#define	TCP6S_SYN_RECEIVED	TCPS_SYN_RECEIVED
+#define	TCP6S_ESTABLISHED	TCPS_ESTABLISHED
+#define	TCP6S_CLOSE_WAIT	TCPS_CLOSE_WAIT
+#define	TCP6S_FIN_WAIT_1	TCPS_FIN_WAIT_1
+#define	TCP6S_CLOSING		TCPS_CLOSING
+#define	TCP6S_LAST_ACK		TCPS_LAST_ACK
+#define	TCP6S_FIN_WAIT_2	TCPS_FIN_WAIT_2
+#define	TCP6S_TIME_WAIT		TCPS_TIME_WAIT
+
+#define	TCPS_HAVERCVDSYN(s)	((s) >= TCPS_SYN_RECEIVED)
+#define	TCPS_HAVEESTABLISHED(s)	((s) >= TCPS_ESTABLISHED)
+#define	TCPS_HAVERCVDFIN(s)	((s) >= TCPS_TIME_WAIT)
+
+#ifdef	TCPOUTFLAGS
+/*
+ * Flags used when sending segments in tcp_output.  Basic flags (TH_RST,
+ * TH_ACK,TH_SYN,TH_FIN) are totally determined by state, with the proviso
+ * that TH_FIN is sent only if all data queued for output is included in the
+ * segment.
+ */
+static u_char	tcp_outflags[TCP_NSTATES] = {
+	TH_RST|TH_ACK,		/* 0, CLOSED */
+	0,			/* 1, LISTEN */
+	TH_SYN,			/* 2, SYN_SENT */
+	TH_SYN|TH_ACK,		/* 3, SYN_RECEIVED */
+	TH_ACK,			/* 4, ESTABLISHED */
+	TH_ACK,			/* 5, CLOSE_WAIT */
+	TH_FIN|TH_ACK,		/* 6, FIN_WAIT_1 */
+	TH_FIN|TH_ACK,		/* 7, CLOSING */
+	TH_FIN|TH_ACK,		/* 8, LAST_ACK */
+	TH_ACK,			/* 9, FIN_WAIT_2 */
+	TH_ACK,			/* 10, TIME_WAIT */
+};	
+#endif
+
+#ifdef KPROF
+int	tcp_acounts[TCP_NSTATES][PRU_NREQ];
+#endif
+
+#ifdef	TCPSTATES
+static char const * const tcpstates[] = {
+	"CLOSED",	"LISTEN",	"SYN_SENT",	"SYN_RCVD",
+	"ESTABLISHED",	"CLOSE_WAIT",	"FIN_WAIT_1",	"CLOSING",
+	"LAST_ACK",	"FIN_WAIT_2",	"TIME_WAIT",
+};
+#endif
+
+#endif
diff --git a/freebsd/sys/netinet/tcp_hostcache.c b/freebsd/sys/netinet/tcp_hostcache.c
new file mode 100644
index 00000000..07b78cfe
--- /dev/null
+++ b/freebsd/sys/netinet/tcp_hostcache.c
@@ -0,0 +1,693 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2002 Andre Oppermann, Internet Business Solutions AG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * The tcp_hostcache moves the tcp-specific cached metrics from the routing
+ * table to a dedicated structure indexed by the remote IP address.  It keeps
+ * information on the measured TCP parameters of past TCP sessions to allow
+ * better initial start values to be used with later connections to/from the
+ * same source.  Depending on the network parameters (delay, bandwidth, max
+ * MTU, congestion window) between local and remote sites, this can lead to
+ * significant speed-ups for new TCP connections after the first one.
+ *
+ * Due to the tcp_hostcache, all TCP-specific metrics information in the
+ * routing table have been removed.  The inpcb no longer keeps a pointer to
+ * the routing entry, and protocol-initiated route cloning has been removed
+ * as well.  With these changes, the routing table has gone back to being
+ * more lightwight and only carries information related to packet forwarding.
+ *
+ * tcp_hostcache is designed for multiple concurrent access in SMP
+ * environments and high contention.  All bucket rows have their own lock and
+ * thus multiple lookups and modifies can be done at the same time as long as
+ * they are in different bucket rows.  If a request for insertion of a new
+ * record can't be satisfied, it simply returns an empty structure.  Nobody
+ * and nothing outside of tcp_hostcache.c will ever point directly to any
+ * entry in the tcp_hostcache.  All communication is done in an
+ * object-oriented way and only functions of tcp_hostcache will manipulate
+ * hostcache entries.  Otherwise, we are unable to achieve good behaviour in
+ * concurrent access situations.  Since tcp_hostcache is only caching
+ * information, there are no fatal consequences if we either can't satisfy
+ * any particular request or have to drop/overwrite an existing entry because
+ * of bucket limit memory constrains.
+ */
+
+/*
+ * Many thanks to jlemon for basic structure of tcp_syncache which is being
+ * followed here.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_inet6.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/sysctl.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/ip_var.h>
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#include <freebsd/netinet6/ip6_var.h>
+#endif
+#include <freebsd/netinet/tcp.h>
+#include <freebsd/netinet/tcp_var.h>
+#include <freebsd/netinet/tcp_hostcache.h>
+#ifdef INET6
+#include <freebsd/netinet6/tcp6_var.h>
+#endif
+
+#include <freebsd/vm/uma.h>
+
+/* Arbitrary values */
+#define TCP_HOSTCACHE_HASHSIZE		512
+#define TCP_HOSTCACHE_BUCKETLIMIT	30
+#define TCP_HOSTCACHE_EXPIRE		60*60	/* one hour */
+#define TCP_HOSTCACHE_PRUNE		5*60	/* every 5 minutes */
+
+static VNET_DEFINE(struct tcp_hostcache, tcp_hostcache);
+#define	V_tcp_hostcache		VNET(tcp_hostcache)
+
+static VNET_DEFINE(struct callout, tcp_hc_callout);
+#define	V_tcp_hc_callout	VNET(tcp_hc_callout)
+
+static struct hc_metrics *tcp_hc_lookup(struct in_conninfo *);
+static struct hc_metrics *tcp_hc_insert(struct in_conninfo *);
+static int sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS);
+static void tcp_hc_purge_internal(int);
+static void tcp_hc_purge(void *);
+
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hostcache, CTLFLAG_RW, 0,
+    "TCP Host cache");
+
+SYSCTL_VNET_INT(_net_inet_tcp_hostcache, OID_AUTO, cachelimit, CTLFLAG_RDTUN,
+    &VNET_NAME(tcp_hostcache.cache_limit), 0,
+    "Overall entry limit for hostcache");
+
+SYSCTL_VNET_INT(_net_inet_tcp_hostcache, OID_AUTO, hashsize, CTLFLAG_RDTUN,
+    &VNET_NAME(tcp_hostcache.hashsize), 0,
+    "Size of TCP hostcache hashtable");
+
+SYSCTL_VNET_INT(_net_inet_tcp_hostcache, OID_AUTO, bucketlimit,
+    CTLFLAG_RDTUN, &VNET_NAME(tcp_hostcache.bucket_limit), 0,
+    "Per-bucket hash limit for hostcache");
+
+SYSCTL_VNET_INT(_net_inet_tcp_hostcache, OID_AUTO, count, CTLFLAG_RD,
+     &VNET_NAME(tcp_hostcache.cache_count), 0,
+    "Current number of entries in hostcache");
+
+SYSCTL_VNET_INT(_net_inet_tcp_hostcache, OID_AUTO, expire, CTLFLAG_RW,
+    &VNET_NAME(tcp_hostcache.expire), 0,
+    "Expire time of TCP hostcache entries");
+
+SYSCTL_VNET_INT(_net_inet_tcp_hostcache, OID_AUTO, prune, CTLFLAG_RW,
+    &VNET_NAME(tcp_hostcache.prune), 0,
+    "Time between purge runs");
+
+SYSCTL_VNET_INT(_net_inet_tcp_hostcache, OID_AUTO, purge, CTLFLAG_RW,
+    &VNET_NAME(tcp_hostcache.purgeall), 0,
+    "Expire all entires on next purge run");
+
+SYSCTL_PROC(_net_inet_tcp_hostcache, OID_AUTO, list,
+    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP, 0, 0,
+    sysctl_tcp_hc_list, "A", "List of all hostcache entries");
+
+
+static MALLOC_DEFINE(M_HOSTCACHE, "hostcache", "TCP hostcache");
+
+#define HOSTCACHE_HASH(ip) \
+	(((ip)->s_addr ^ ((ip)->s_addr >> 7) ^ ((ip)->s_addr >> 17)) &	\
+	  V_tcp_hostcache.hashmask)
+
+/* XXX: What is the recommended hash to get good entropy for IPv6 addresses? */
+#define HOSTCACHE_HASH6(ip6)				\
+	(((ip6)->s6_addr32[0] ^				\
+	  (ip6)->s6_addr32[1] ^				\
+	  (ip6)->s6_addr32[2] ^				\
+	  (ip6)->s6_addr32[3]) &			\
+	 V_tcp_hostcache.hashmask)
+
+#define THC_LOCK(lp)		mtx_lock(lp)
+#define THC_UNLOCK(lp)		mtx_unlock(lp)
+
+void
+tcp_hc_init(void)
+{
+	int i;
+
+	/*
+	 * Initialize hostcache structures.
+	 */
+	V_tcp_hostcache.cache_count = 0;
+	V_tcp_hostcache.hashsize = TCP_HOSTCACHE_HASHSIZE;
+	V_tcp_hostcache.bucket_limit = TCP_HOSTCACHE_BUCKETLIMIT;
+	V_tcp_hostcache.cache_limit =
+	    V_tcp_hostcache.hashsize * V_tcp_hostcache.bucket_limit;
+	V_tcp_hostcache.expire = TCP_HOSTCACHE_EXPIRE;
+	V_tcp_hostcache.prune = TCP_HOSTCACHE_PRUNE;
+
+	TUNABLE_INT_FETCH("net.inet.tcp.hostcache.hashsize",
+	    &V_tcp_hostcache.hashsize);
+	TUNABLE_INT_FETCH("net.inet.tcp.hostcache.cachelimit",
+	    &V_tcp_hostcache.cache_limit);
+	TUNABLE_INT_FETCH("net.inet.tcp.hostcache.bucketlimit",
+	    &V_tcp_hostcache.bucket_limit);
+	if (!powerof2(V_tcp_hostcache.hashsize)) {
+		printf("WARNING: hostcache hash size is not a power of 2.\n");
+		V_tcp_hostcache.hashsize = TCP_HOSTCACHE_HASHSIZE; /* default */
+	}
+	V_tcp_hostcache.hashmask = V_tcp_hostcache.hashsize - 1;
+
+	/*
+	 * Allocate the hash table.
+	 */
+	V_tcp_hostcache.hashbase = (struct hc_head *)
+	    malloc(V_tcp_hostcache.hashsize * sizeof(struct hc_head),
+		   M_HOSTCACHE, M_WAITOK | M_ZERO);
+
+	/*
+	 * Initialize the hash buckets.
+	 */
+	for (i = 0; i < V_tcp_hostcache.hashsize; i++) {
+		TAILQ_INIT(&V_tcp_hostcache.hashbase[i].hch_bucket);
+		V_tcp_hostcache.hashbase[i].hch_length = 0;
+		mtx_init(&V_tcp_hostcache.hashbase[i].hch_mtx, "tcp_hc_entry",
+			  NULL, MTX_DEF);
+	}
+
+	/*
+	 * Allocate the hostcache entries.
+	 */
+	V_tcp_hostcache.zone =
+	    uma_zcreate("hostcache", sizeof(struct hc_metrics),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+	uma_zone_set_max(V_tcp_hostcache.zone, V_tcp_hostcache.cache_limit);
+
+	/*
+	 * Set up periodic cache cleanup.
+	 */
+	callout_init(&V_tcp_hc_callout, CALLOUT_MPSAFE);
+	callout_reset(&V_tcp_hc_callout, V_tcp_hostcache.prune * hz,
+	    tcp_hc_purge, curvnet);
+}
+
+#ifdef VIMAGE
+void
+tcp_hc_destroy(void)
+{
+	int i;
+
+	callout_drain(&V_tcp_hc_callout);
+
+	/* Purge all hc entries. */
+	tcp_hc_purge_internal(1);
+
+	/* Free the uma zone and the allocated hash table. */
+	uma_zdestroy(V_tcp_hostcache.zone);
+
+	for (i = 0; i < V_tcp_hostcache.hashsize; i++)
+		mtx_destroy(&V_tcp_hostcache.hashbase[i].hch_mtx);
+	free(V_tcp_hostcache.hashbase, M_HOSTCACHE);
+}
+#endif
+
+/*
+ * Internal function: look up an entry in the hostcache or return NULL.
+ *
+ * If an entry has been returned, the caller becomes responsible for
+ * unlocking the bucket row after he is done reading/modifying the entry.
+ */
+static struct hc_metrics *
+tcp_hc_lookup(struct in_conninfo *inc)
+{
+	int hash;
+	struct hc_head *hc_head;
+	struct hc_metrics *hc_entry;
+
+	KASSERT(inc != NULL, ("tcp_hc_lookup with NULL in_conninfo pointer"));
+
+	/*
+	 * Hash the foreign ip address.
+	 */
+	if (inc->inc_flags & INC_ISIPV6)
+		hash = HOSTCACHE_HASH6(&inc->inc6_faddr);
+	else
+		hash = HOSTCACHE_HASH(&inc->inc_faddr);
+
+	hc_head = &V_tcp_hostcache.hashbase[hash];
+
+	/*
+	 * Acquire lock for this bucket row; we release the lock if we don't
+	 * find an entry, otherwise the caller has to unlock after he is
+	 * done.
+	 */
+	THC_LOCK(&hc_head->hch_mtx);
+
+	/*
+	 * Iterate through entries in bucket row looking for a match.
+	 */
+	TAILQ_FOREACH(hc_entry, &hc_head->hch_bucket, rmx_q) {
+		if (inc->inc_flags & INC_ISIPV6) {
+			if (memcmp(&inc->inc6_faddr, &hc_entry->ip6,
+			    sizeof(inc->inc6_faddr)) == 0)
+				return hc_entry;
+		} else {
+			if (memcmp(&inc->inc_faddr, &hc_entry->ip4,
+			    sizeof(inc->inc_faddr)) == 0)
+				return hc_entry;
+		}
+	}
+
+	/*
+	 * We were unsuccessful and didn't find anything.
+	 */
+	THC_UNLOCK(&hc_head->hch_mtx);
+	return NULL;
+}
+
+/*
+ * Internal function: insert an entry into the hostcache or return NULL if
+ * unable to allocate a new one.
+ *
+ * If an entry has been returned, the caller becomes responsible for
+ * unlocking the bucket row after he is done reading/modifying the entry.
+ */
+static struct hc_metrics *
+tcp_hc_insert(struct in_conninfo *inc)
+{
+	int hash;
+	struct hc_head *hc_head;
+	struct hc_metrics *hc_entry;
+
+	KASSERT(inc != NULL, ("tcp_hc_insert with NULL in_conninfo pointer"));
+
+	/*
+	 * Hash the foreign ip address.
+	 */
+	if (inc->inc_flags & INC_ISIPV6)
+		hash = HOSTCACHE_HASH6(&inc->inc6_faddr);
+	else
+		hash = HOSTCACHE_HASH(&inc->inc_faddr);
+
+	hc_head = &V_tcp_hostcache.hashbase[hash];
+
+	/*
+	 * Acquire lock for this bucket row; we release the lock if we don't
+	 * find an entry, otherwise the caller has to unlock after he is
+	 * done.
+	 */
+	THC_LOCK(&hc_head->hch_mtx);
+
+	/*
+	 * If the bucket limit is reached, reuse the least-used element.
+	 */
+	if (hc_head->hch_length >= V_tcp_hostcache.bucket_limit ||
+	    V_tcp_hostcache.cache_count >= V_tcp_hostcache.cache_limit) {
+		hc_entry = TAILQ_LAST(&hc_head->hch_bucket, hc_qhead);
+		/*
+		 * At first we were dropping the last element, just to
+		 * reacquire it in the next two lines again, which isn't very
+		 * efficient.  Instead just reuse the least used element.
+		 * We may drop something that is still "in-use" but we can be
+		 * "lossy".
+		 * Just give up if this bucket row is empty and we don't have
+		 * anything to replace.
+		 */
+		if (hc_entry == NULL) {
+			THC_UNLOCK(&hc_head->hch_mtx);
+			return NULL;
+		}
+		TAILQ_REMOVE(&hc_head->hch_bucket, hc_entry, rmx_q);
+		V_tcp_hostcache.hashbase[hash].hch_length--;
+		V_tcp_hostcache.cache_count--;
+		TCPSTAT_INC(tcps_hc_bucketoverflow);
+#if 0
+		uma_zfree(V_tcp_hostcache.zone, hc_entry);
+#endif
+	} else {
+		/*
+		 * Allocate a new entry, or balk if not possible.
+		 */
+		hc_entry = uma_zalloc(V_tcp_hostcache.zone, M_NOWAIT);
+		if (hc_entry == NULL) {
+			THC_UNLOCK(&hc_head->hch_mtx);
+			return NULL;
+		}
+	}
+
+	/*
+	 * Initialize basic information of hostcache entry.
+	 */
+	bzero(hc_entry, sizeof(*hc_entry));
+	if (inc->inc_flags & INC_ISIPV6)
+		bcopy(&inc->inc6_faddr, &hc_entry->ip6, sizeof(hc_entry->ip6));
+	else
+		hc_entry->ip4 = inc->inc_faddr;
+	hc_entry->rmx_head = hc_head;
+	hc_entry->rmx_expire = V_tcp_hostcache.expire;
+
+	/*
+	 * Put it upfront.
+	 */
+	TAILQ_INSERT_HEAD(&hc_head->hch_bucket, hc_entry, rmx_q);
+	V_tcp_hostcache.hashbase[hash].hch_length++;
+	V_tcp_hostcache.cache_count++;
+	TCPSTAT_INC(tcps_hc_added);
+
+	return hc_entry;
+}
+
+/*
+ * External function: look up an entry in the hostcache and fill out the
+ * supplied TCP metrics structure.  Fills in NULL when no entry was found or
+ * a value is not set.
+ */
+void
+tcp_hc_get(struct in_conninfo *inc, struct hc_metrics_lite *hc_metrics_lite)
+{
+	struct hc_metrics *hc_entry;
+
+	/*
+	 * Find the right bucket.
+	 */
+	hc_entry = tcp_hc_lookup(inc);
+
+	/*
+	 * If we don't have an existing object.
+	 */
+	if (hc_entry == NULL) {
+		bzero(hc_metrics_lite, sizeof(*hc_metrics_lite));
+		return;
+	}
+	hc_entry->rmx_hits++;
+	hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */
+
+	hc_metrics_lite->rmx_mtu = hc_entry->rmx_mtu;
+	hc_metrics_lite->rmx_ssthresh = hc_entry->rmx_ssthresh;
+	hc_metrics_lite->rmx_rtt = hc_entry->rmx_rtt;
+	hc_metrics_lite->rmx_rttvar = hc_entry->rmx_rttvar;
+	hc_metrics_lite->rmx_bandwidth = hc_entry->rmx_bandwidth;
+	hc_metrics_lite->rmx_cwnd = hc_entry->rmx_cwnd;
+	hc_metrics_lite->rmx_sendpipe = hc_entry->rmx_sendpipe;
+	hc_metrics_lite->rmx_recvpipe = hc_entry->rmx_recvpipe;
+
+	/*
+	 * Unlock bucket row.
+	 */
+	THC_UNLOCK(&hc_entry->rmx_head->hch_mtx);
+}
+
+/*
+ * External function: look up an entry in the hostcache and return the
+ * discovered path MTU.  Returns NULL if no entry is found or value is not
+ * set.
+ */
+u_long
+tcp_hc_getmtu(struct in_conninfo *inc)
+{
+	struct hc_metrics *hc_entry;
+	u_long mtu;
+
+	hc_entry = tcp_hc_lookup(inc);
+	if (hc_entry == NULL) {
+		return 0;
+	}
+	hc_entry->rmx_hits++;
+	hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */
+
+	mtu = hc_entry->rmx_mtu;
+	THC_UNLOCK(&hc_entry->rmx_head->hch_mtx);
+	return mtu;
+}
+
+/*
+ * External function: update the MTU value of an entry in the hostcache.
+ * Creates a new entry if none was found.
+ */
+void
+tcp_hc_updatemtu(struct in_conninfo *inc, u_long mtu)
+{
+	struct hc_metrics *hc_entry;
+
+	/*
+	 * Find the right bucket.
+	 */
+	hc_entry = tcp_hc_lookup(inc);
+
+	/*
+	 * If we don't have an existing object, try to insert a new one.
+	 */
+	if (hc_entry == NULL) {
+		hc_entry = tcp_hc_insert(inc);
+		if (hc_entry == NULL)
+			return;
+	}
+	hc_entry->rmx_updates++;
+	hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */
+
+	hc_entry->rmx_mtu = mtu;
+
+	/*
+	 * Put it upfront so we find it faster next time.
+	 */
+	TAILQ_REMOVE(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q);
+	TAILQ_INSERT_HEAD(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q);
+
+	/*
+	 * Unlock bucket row.
+	 */
+	THC_UNLOCK(&hc_entry->rmx_head->hch_mtx);
+}
+
+/*
+ * External function: update the TCP metrics of an entry in the hostcache.
+ * Creates a new entry if none was found.
+ */
+void
+tcp_hc_update(struct in_conninfo *inc, struct hc_metrics_lite *hcml)
+{
+	struct hc_metrics *hc_entry;
+
+	hc_entry = tcp_hc_lookup(inc);
+	if (hc_entry == NULL) {
+		hc_entry = tcp_hc_insert(inc);
+		if (hc_entry == NULL)
+			return;
+	}
+	hc_entry->rmx_updates++;
+	hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */
+
+	if (hcml->rmx_rtt != 0) {
+		if (hc_entry->rmx_rtt == 0)
+			hc_entry->rmx_rtt = hcml->rmx_rtt;
+		else
+			hc_entry->rmx_rtt =
+			    (hc_entry->rmx_rtt + hcml->rmx_rtt) / 2;
+		TCPSTAT_INC(tcps_cachedrtt);
+	}
+	if (hcml->rmx_rttvar != 0) {
+	        if (hc_entry->rmx_rttvar == 0)
+			hc_entry->rmx_rttvar = hcml->rmx_rttvar;
+		else
+			hc_entry->rmx_rttvar =
+			    (hc_entry->rmx_rttvar + hcml->rmx_rttvar) / 2;
+		TCPSTAT_INC(tcps_cachedrttvar);
+	}
+	if (hcml->rmx_ssthresh != 0) {
+		if (hc_entry->rmx_ssthresh == 0)
+			hc_entry->rmx_ssthresh = hcml->rmx_ssthresh;
+		else
+			hc_entry->rmx_ssthresh =
+			    (hc_entry->rmx_ssthresh + hcml->rmx_ssthresh) / 2;
+		TCPSTAT_INC(tcps_cachedssthresh);
+	}
+	if (hcml->rmx_bandwidth != 0) {
+		if (hc_entry->rmx_bandwidth == 0)
+			hc_entry->rmx_bandwidth = hcml->rmx_bandwidth;
+		else
+			hc_entry->rmx_bandwidth =
+			    (hc_entry->rmx_bandwidth + hcml->rmx_bandwidth) / 2;
+		/* TCPSTAT_INC(tcps_cachedbandwidth); */
+	}
+	if (hcml->rmx_cwnd != 0) {
+		if (hc_entry->rmx_cwnd == 0)
+			hc_entry->rmx_cwnd = hcml->rmx_cwnd;
+		else
+			hc_entry->rmx_cwnd =
+			    (hc_entry->rmx_cwnd + hcml->rmx_cwnd) / 2;
+		/* TCPSTAT_INC(tcps_cachedcwnd); */
+	}
+	if (hcml->rmx_sendpipe != 0) {
+		if (hc_entry->rmx_sendpipe == 0)
+			hc_entry->rmx_sendpipe = hcml->rmx_sendpipe;
+		else
+			hc_entry->rmx_sendpipe =
+			    (hc_entry->rmx_sendpipe + hcml->rmx_sendpipe) /2;
+		/* TCPSTAT_INC(tcps_cachedsendpipe); */
+	}
+	if (hcml->rmx_recvpipe != 0) {
+		if (hc_entry->rmx_recvpipe == 0)
+			hc_entry->rmx_recvpipe = hcml->rmx_recvpipe;
+		else
+			hc_entry->rmx_recvpipe =
+			    (hc_entry->rmx_recvpipe + hcml->rmx_recvpipe) /2;
+		/* TCPSTAT_INC(tcps_cachedrecvpipe); */
+	}
+
+	TAILQ_REMOVE(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q);
+	TAILQ_INSERT_HEAD(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q);
+	THC_UNLOCK(&hc_entry->rmx_head->hch_mtx);
+}
+
+/*
+ * Sysctl function: prints the list and values of all hostcache entries in
+ * unsorted order.
+ */
+static int
+sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS)
+{
+	int bufsize;
+	int linesize = 128;
+	char *p, *buf;
+	int len, i, error;
+	struct hc_metrics *hc_entry;
+#ifdef INET6
+	char ip6buf[INET6_ADDRSTRLEN];
+#endif
+
+	bufsize = linesize * (V_tcp_hostcache.cache_count + 1);
+
+	p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO);
+
+	len = snprintf(p, linesize,
+		"\nIP address        MTU  SSTRESH      RTT   RTTVAR BANDWIDTH "
+		"    CWND SENDPIPE RECVPIPE HITS  UPD  EXP\n");
+	p += len;
+
+#define msec(u) (((u) + 500) / 1000)
+	for (i = 0; i < V_tcp_hostcache.hashsize; i++) {
+		THC_LOCK(&V_tcp_hostcache.hashbase[i].hch_mtx);
+		TAILQ_FOREACH(hc_entry, &V_tcp_hostcache.hashbase[i].hch_bucket,
+			      rmx_q) {
+			len = snprintf(p, linesize,
+			    "%-15s %5lu %8lu %6lums %6lums %9lu %8lu %8lu %8lu "
+			    "%4lu %4lu %4i\n",
+			    hc_entry->ip4.s_addr ? inet_ntoa(hc_entry->ip4) :
+#ifdef INET6
+				ip6_sprintf(ip6buf, &hc_entry->ip6),
+#else
+				"IPv6?",
+#endif
+			    hc_entry->rmx_mtu,
+			    hc_entry->rmx_ssthresh,
+			    msec(hc_entry->rmx_rtt *
+				(RTM_RTTUNIT / (hz * TCP_RTT_SCALE))),
+			    msec(hc_entry->rmx_rttvar *
+				(RTM_RTTUNIT / (hz * TCP_RTT_SCALE))),
+			    hc_entry->rmx_bandwidth * 8,
+			    hc_entry->rmx_cwnd,
+			    hc_entry->rmx_sendpipe,
+			    hc_entry->rmx_recvpipe,
+			    hc_entry->rmx_hits,
+			    hc_entry->rmx_updates,
+			    hc_entry->rmx_expire);
+			p += len;
+		}
+		THC_UNLOCK(&V_tcp_hostcache.hashbase[i].hch_mtx);
+	}
+#undef msec
+	error = SYSCTL_OUT(req, buf, p - buf);
+	free(buf, M_TEMP);
+	return(error);
+}
+
+/*
+ * Caller has to make sure the curvnet is set properly.
+ */
+static void
+tcp_hc_purge_internal(int all)
+{
+	struct hc_metrics *hc_entry, *hc_next;
+	int i;
+
+	for (i = 0; i < V_tcp_hostcache.hashsize; i++) {
+		THC_LOCK(&V_tcp_hostcache.hashbase[i].hch_mtx);
+		TAILQ_FOREACH_SAFE(hc_entry,
+		    &V_tcp_hostcache.hashbase[i].hch_bucket, rmx_q, hc_next) {
+			if (all || hc_entry->rmx_expire <= 0) {
+				TAILQ_REMOVE(&V_tcp_hostcache.hashbase[i].hch_bucket,
+					      hc_entry, rmx_q);
+				uma_zfree(V_tcp_hostcache.zone, hc_entry);
+				V_tcp_hostcache.hashbase[i].hch_length--;
+				V_tcp_hostcache.cache_count--;
+			} else
+				hc_entry->rmx_expire -= V_tcp_hostcache.prune;
+		}
+		THC_UNLOCK(&V_tcp_hostcache.hashbase[i].hch_mtx);
+	}
+}
+
+/*
+ * Expire and purge (old|all) entries in the tcp_hostcache.  Runs
+ * periodically from the callout.
+ */
+static void
+tcp_hc_purge(void *arg)
+{
+	CURVNET_SET((struct vnet *) arg);
+	int all = 0;
+
+	if (V_tcp_hostcache.purgeall) {
+		all = 1;
+		V_tcp_hostcache.purgeall = 0;
+	}
+
+	tcp_hc_purge_internal(all);
+
+	callout_reset(&V_tcp_hc_callout, V_tcp_hostcache.prune * hz,
+	    tcp_hc_purge, arg);
+	CURVNET_RESTORE();
+}
diff --git a/freebsd/sys/netinet/tcp_hostcache.h b/freebsd/sys/netinet/tcp_hostcache.h
new file mode 100644
index 00000000..a494ed03
--- /dev/null
+++ b/freebsd/sys/netinet/tcp_hostcache.h
@@ -0,0 +1,82 @@
+/*-
+ * Copyright (c) 2002 Andre Oppermann, Internet Business Solutions AG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Many thanks to jlemon for basic structure of tcp_syncache which is being
+ * followed here.
+ */
+
+#ifndef _NETINET_TCP_HOSTCACHE_HH_
+#define _NETINET_TCP_HOSTCACHE_HH_
+
+TAILQ_HEAD(hc_qhead, hc_metrics);
+
+struct hc_head {
+	struct hc_qhead	hch_bucket;
+	u_int		hch_length;
+	struct mtx	hch_mtx;
+};
+
+struct hc_metrics {
+	/* housekeeping */
+	TAILQ_ENTRY(hc_metrics) rmx_q;
+	struct	hc_head *rmx_head; /* head of bucket tail queue */
+	struct	in_addr ip4;	/* IP address */
+	struct	in6_addr ip6;	/* IP6 address */
+	/* endpoint specific values for tcp */
+	u_long	rmx_mtu;	/* MTU for this path */
+	u_long	rmx_ssthresh;	/* outbound gateway buffer limit */
+	u_long	rmx_rtt;	/* estimated round trip time */
+	u_long	rmx_rttvar;	/* estimated rtt variance */
+	u_long	rmx_bandwidth;	/* estimated bandwidth */
+	u_long	rmx_cwnd;	/* congestion window */
+	u_long	rmx_sendpipe;	/* outbound delay-bandwidth product */
+	u_long	rmx_recvpipe;	/* inbound delay-bandwidth product */
+	/* TCP hostcache internal data */
+	int	rmx_expire;	/* lifetime for object */
+	u_long	rmx_hits;	/* number of hits */
+	u_long	rmx_updates;	/* number of updates */
+};
+
+struct tcp_hostcache {
+	struct	hc_head *hashbase;
+	uma_zone_t zone;
+	u_int	hashsize;
+	u_int	hashmask;
+	u_int	bucket_limit;
+	u_int	cache_count;
+	u_int	cache_limit;
+	int	expire;
+	int	prune;
+	int	purgeall;
+};
+
+#endif /* !_NETINET_TCP_HOSTCACHE_HH_*/
diff --git a/freebsd/sys/netinet/tcp_input.c b/freebsd/sys/netinet/tcp_input.c
new file mode 100644
index 00000000..85daf203
--- /dev/null
+++ b/freebsd/sys/netinet/tcp_input.c
@@ -0,0 +1,3453 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_input.c	8.12 (Berkeley) 5/24/95
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_ipfw.h>		/* for ipfw_fwd	*/
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_ipsec.h>
+#include <freebsd/local/opt_tcpdebug.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/proc.h>		/* for proc0 declaration */
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/sys/systm.h>
+
+#include <freebsd/machine/cpu.h>	/* before tcp_seq.h, for tcp_random18() */
+
+#include <freebsd/vm/uma.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#define TCPSTATES		/* for logging */
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_icmp.h>	/* required for icmp_var.h */
+#include <freebsd/netinet/icmp_var.h>	/* for ICMP_BANDLIM */
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_options.h>
+#include <freebsd/netinet/ip6.h>
+#include <freebsd/netinet/icmp6.h>
+#include <freebsd/netinet6/in6_pcb.h>
+#include <freebsd/netinet6/ip6_var.h>
+#include <freebsd/netinet6/nd6.h>
+#include <freebsd/netinet/tcp.h>
+#include <freebsd/netinet/tcp_fsm.h>
+#include <freebsd/netinet/tcp_seq.h>
+#include <freebsd/netinet/tcp_timer.h>
+#include <freebsd/netinet/tcp_var.h>
+#include <freebsd/netinet6/tcp6_var.h>
+#include <freebsd/netinet/tcpip.h>
+#include <freebsd/netinet/tcp_syncache.h>
+#ifdef TCPDEBUG
+#include <freebsd/netinet/tcp_debug.h>
+#endif /* TCPDEBUG */
+
+#ifdef IPSEC
+#include <freebsd/netipsec/ipsec.h>
+#include <freebsd/netipsec/ipsec6.h>
+#endif /*IPSEC*/
+
+#include <freebsd/machine/in_cksum.h>
+
+#include <freebsd/security/mac/mac_framework.h>
+
+static const int tcprexmtthresh = 3;
+
+VNET_DEFINE(struct tcpstat, tcpstat);
+SYSCTL_VNET_STRUCT(_net_inet_tcp, TCPCTL_STATS, stats, CTLFLAG_RW,
+    &VNET_NAME(tcpstat), tcpstat,
+    "TCP statistics (struct tcpstat, netinet/tcp_var.h)");
+
+int tcp_log_in_vain = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW,
+    &tcp_log_in_vain, 0,
+    "Log all incoming TCP segments to closed ports");
+
+VNET_DEFINE(int, blackhole) = 0;
+#define	V_blackhole		VNET(blackhole)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, blackhole, CTLFLAG_RW,
+    &VNET_NAME(blackhole), 0,
+    "Do not send RST on segments to closed ports");
+
+VNET_DEFINE(int, tcp_delack_enabled) = 1;
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_RW,
+    &VNET_NAME(tcp_delack_enabled), 0,
+    "Delay ACK to try and piggyback it onto a data packet");
+
+VNET_DEFINE(int, drop_synfin) = 0;
+#define	V_drop_synfin		VNET(drop_synfin)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_RW,
+    &VNET_NAME(drop_synfin), 0,
+    "Drop TCP packets with SYN+FIN set");
+
+VNET_DEFINE(int, tcp_do_rfc3042) = 1;
+#define	V_tcp_do_rfc3042	VNET(tcp_do_rfc3042)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, rfc3042, CTLFLAG_RW,
+    &VNET_NAME(tcp_do_rfc3042), 0,
+    "Enable RFC 3042 (Limited Transmit)");
+
+VNET_DEFINE(int, tcp_do_rfc3390) = 1;
+#define	V_tcp_do_rfc3390	VNET(tcp_do_rfc3390)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_RW,
+    &VNET_NAME(tcp_do_rfc3390), 0,
+    "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)");
+
+VNET_DEFINE(int, tcp_do_rfc3465) = 1;
+#define	V_tcp_do_rfc3465	VNET(tcp_do_rfc3465)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_RW,
+    &VNET_NAME(tcp_do_rfc3465), 0,
+    "Enable RFC 3465 (Appropriate Byte Counting)");
+
+VNET_DEFINE(int, tcp_abc_l_var) = 2;
+#define	V_tcp_abc_l_var		VNET(tcp_abc_l_var)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, abc_l_var, CTLFLAG_RW,
+    &VNET_NAME(tcp_abc_l_var), 2,
+    "Cap the max cwnd increment during slow-start to this number of segments");
+
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn, CTLFLAG_RW, 0, "TCP ECN");
+
+VNET_DEFINE(int, tcp_do_ecn) = 0;
+SYSCTL_VNET_INT(_net_inet_tcp_ecn, OID_AUTO, enable, CTLFLAG_RW,
+    &VNET_NAME(tcp_do_ecn), 0,
+    "TCP ECN support");
+
+VNET_DEFINE(int, tcp_ecn_maxretries) = 1;
+SYSCTL_VNET_INT(_net_inet_tcp_ecn, OID_AUTO, maxretries, CTLFLAG_RW,
+    &VNET_NAME(tcp_ecn_maxretries), 0,
+    "Max retries before giving up on ECN");
+
+VNET_DEFINE(int, tcp_insecure_rst) = 0;
+#define	V_tcp_insecure_rst	VNET(tcp_insecure_rst)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, insecure_rst, CTLFLAG_RW,
+    &VNET_NAME(tcp_insecure_rst), 0,
+    "Follow the old (insecure) criteria for accepting RST packets");
+
+VNET_DEFINE(int, tcp_do_autorcvbuf) = 1;
+#define	V_tcp_do_autorcvbuf	VNET(tcp_do_autorcvbuf)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_RW,
+    &VNET_NAME(tcp_do_autorcvbuf), 0,
+    "Enable automatic receive buffer sizing");
+
+VNET_DEFINE(int, tcp_autorcvbuf_inc) = 16*1024;
+#define	V_tcp_autorcvbuf_inc	VNET(tcp_autorcvbuf_inc)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, recvbuf_inc, CTLFLAG_RW,
+    &VNET_NAME(tcp_autorcvbuf_inc), 0,
+    "Incrementor step size of automatic receive buffer");
+
+VNET_DEFINE(int, tcp_autorcvbuf_max) = 256*1024;
+#define	V_tcp_autorcvbuf_max	VNET(tcp_autorcvbuf_max)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_RW,
+    &VNET_NAME(tcp_autorcvbuf_max), 0,
+    "Max size of automatic receive buffer");
+
+int	tcp_read_locking = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, read_locking, CTLFLAG_RW,
+    &tcp_read_locking, 0, "Enable read locking strategy");
+
+VNET_DEFINE(struct inpcbhead, tcb);
+#define	tcb6	tcb  /* for KAME src sync over BSD*'s */
+VNET_DEFINE(struct inpcbinfo, tcbinfo);
+
+static void	 tcp_dooptions(struct tcpopt *, u_char *, int, int);
+static void	 tcp_do_segment(struct mbuf *, struct tcphdr *,
+		     struct socket *, struct tcpcb *, int, int, uint8_t,
+		     int);
+static void	 tcp_dropwithreset(struct mbuf *, struct tcphdr *,
+		     struct tcpcb *, int, int);
+static void	 tcp_pulloutofband(struct socket *,
+		     struct tcphdr *, struct mbuf *, int);
+static void	 tcp_xmit_timer(struct tcpcb *, int);
+static void	 tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
+static void inline
+		 tcp_congestion_exp(struct tcpcb *);
+
+/*
+ * Kernel module interface for updating tcpstat.  The argument is an index
+ * into tcpstat treated as an array of u_long.  While this encodes the
+ * general layout of tcpstat into the caller, it doesn't encode its location,
+ * so that future changes to add, for example, per-CPU stats support won't
+ * cause binary compatibility problems for kernel modules.
+ */
+void
+kmod_tcpstat_inc(int statnum)
+{
+
+	(*((u_long *)&V_tcpstat + statnum))++;
+}
+
+static void inline
+tcp_congestion_exp(struct tcpcb *tp)
+{
+	u_int win;
+	
+	win = min(tp->snd_wnd, tp->snd_cwnd) /
+	    2 / tp->t_maxseg;
+	if (win < 2)
+		win = 2;
+	tp->snd_ssthresh = win * tp->t_maxseg;
+	ENTER_FASTRECOVERY(tp);
+	tp->snd_recover = tp->snd_max;
+	if (tp->t_flags & TF_ECN_PERMIT)
+		tp->t_flags |= TF_ECN_SND_CWR;
+}
+
+/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */
+#ifdef INET6
+#define ND6_HINT(tp) \
+do { \
+	if ((tp) && (tp)->t_inpcb && \
+	    ((tp)->t_inpcb->inp_vflag & INP_IPV6) != 0) \
+		nd6_nud_hint(NULL, NULL, 0); \
+} while (0)
+#else
+#define ND6_HINT(tp)
+#endif
+
+/*
+ * Indicate whether this ack should be delayed.  We can delay the ack if
+ *	- there is no delayed ack timer in progress and
+ *	- our last ack wasn't a 0-sized window.  We never want to delay
+ *	  the ack that opens up a 0-sized window and
+ *		- delayed acks are enabled or
+ *		- this is a half-synchronized T/TCP connection.
+ */
+#define DELAY_ACK(tp)							\
+	((!tcp_timer_active(tp, TT_DELACK) &&				\
+	    (tp->t_flags & TF_RXWIN0SENT) == 0) &&			\
+	    (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
+
+/*
+ * TCP input handling is split into multiple parts:
+ *   tcp6_input is a thin wrapper around tcp_input for the extended
+ *	ip6_protox[] call format in ip6_input
+ *   tcp_input handles primary segment validation, inpcb lookup and
+ *	SYN processing on listen sockets
+ *   tcp_do_segment processes the ACK and text of the segment for
+ *	establishing, established and closing connections
+ */
+#ifdef INET6
+int
+tcp6_input(struct mbuf **mp, int *offp, int proto)
+{
+	struct mbuf *m = *mp;
+	struct in6_ifaddr *ia6;
+
+	IP6_EXTHDR_CHECK(m, *offp, sizeof(struct tcphdr), IPPROTO_DONE);
+
+	/*
+	 * draft-itojun-ipv6-tcp-to-anycast
+	 * better place to put this in?
+	 */
+	ia6 = ip6_getdstifaddr(m);
+	if (ia6 && (ia6->ia6_flags & IN6_IFF_ANYCAST)) {
+		struct ip6_hdr *ip6;
+
+		ifa_free(&ia6->ia_ifa);
+		ip6 = mtod(m, struct ip6_hdr *);
+		icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR,
+			    (caddr_t)&ip6->ip6_dst - (caddr_t)ip6);
+		return IPPROTO_DONE;
+	}
+
+	tcp_input(m, *offp);
+	return IPPROTO_DONE;
+}
+#endif
+
+void
+tcp_input(struct mbuf *m, int off0)
+{
+	struct tcphdr *th;
+	struct ip *ip = NULL;
+	struct ipovly *ipov;
+	struct inpcb *inp = NULL;
+	struct tcpcb *tp = NULL;
+	struct socket *so = NULL;
+	u_char *optp = NULL;
+	int optlen = 0;
+	int len, tlen, off;
+	int drop_hdrlen;
+	int thflags;
+	int rstreason = 0;	/* For badport_bandlim accounting purposes */
+	uint8_t iptos;
+#ifdef IPFIREWALL_FORWARD
+	struct m_tag *fwd_tag;
+#endif
+#ifdef INET6
+	struct ip6_hdr *ip6 = NULL;
+	int isipv6;
+#else
+	const void *ip6 = NULL;
+	const int isipv6 = 0;
+#endif
+	struct tcpopt to;		/* options in this segment */
+	char *s = NULL;			/* address and port logging */
+	int ti_locked;
+#define	TI_UNLOCKED	1
+#define	TI_RLOCKED	2
+#define	TI_WLOCKED	3
+
+#ifdef TCPDEBUG
+	/*
+	 * The size of tcp_saveipgen must be the size of the max ip header,
+	 * now IPv6.
+	 */
+	u_char tcp_saveipgen[IP6_HDR_LEN];
+	struct tcphdr tcp_savetcp;
+	short ostate = 0;
+#endif
+
+#ifdef INET6
+	isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
+#endif
+
+	to.to_flags = 0;
+	TCPSTAT_INC(tcps_rcvtotal);
+
+	if (isipv6) {
+#ifdef INET6
+		/* IP6_EXTHDR_CHECK() is already done at tcp6_input(). */
+		ip6 = mtod(m, struct ip6_hdr *);
+		tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0;
+		if (in6_cksum(m, IPPROTO_TCP, off0, tlen)) {
+			TCPSTAT_INC(tcps_rcvbadsum);
+			goto drop;
+		}
+		th = (struct tcphdr *)((caddr_t)ip6 + off0);
+
+		/*
+		 * Be proactive about unspecified IPv6 address in source.
+		 * As we use all-zero to indicate unbounded/unconnected pcb,
+		 * unspecified IPv6 address can be used to confuse us.
+		 *
+		 * Note that packets with unspecified IPv6 destination is
+		 * already dropped in ip6_input.
+		 */
+		if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
+			/* XXX stat */
+			goto drop;
+		}
+#else
+		th = NULL;		/* XXX: Avoid compiler warning. */
+#endif
+	} else {
+		/*
+		 * Get IP and TCP header together in first mbuf.
+		 * Note: IP leaves IP header in first mbuf.
+		 */
+		if (off0 > sizeof (struct ip)) {
+			ip_stripoptions(m, (struct mbuf *)0);
+			off0 = sizeof(struct ip);
+		}
+		if (m->m_len < sizeof (struct tcpiphdr)) {
+			if ((m = m_pullup(m, sizeof (struct tcpiphdr)))
+			    == NULL) {
+				TCPSTAT_INC(tcps_rcvshort);
+				return;
+			}
+		}
+		ip = mtod(m, struct ip *);
+		ipov = (struct ipovly *)ip;
+		th = (struct tcphdr *)((caddr_t)ip + off0);
+		tlen = ip->ip_len;
+
+		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
+			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
+				th->th_sum = m->m_pkthdr.csum_data;
+			else
+				th->th_sum = in_pseudo(ip->ip_src.s_addr,
+						ip->ip_dst.s_addr,
+						htonl(m->m_pkthdr.csum_data +
+							ip->ip_len +
+							IPPROTO_TCP));
+			th->th_sum ^= 0xffff;
+#ifdef TCPDEBUG
+			ipov->ih_len = (u_short)tlen;
+			ipov->ih_len = htons(ipov->ih_len);
+#endif
+		} else {
+			/*
+			 * Checksum extended TCP header and data.
+			 */
+			len = sizeof (struct ip) + tlen;
+			bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
+			ipov->ih_len = (u_short)tlen;
+			ipov->ih_len = htons(ipov->ih_len);
+			th->th_sum = in_cksum(m, len);
+		}
+		if (th->th_sum) {
+			TCPSTAT_INC(tcps_rcvbadsum);
+			goto drop;
+		}
+		/* Re-initialization for later version check */
+		ip->ip_v = IPVERSION;
+	}
+
+#ifdef INET6
+	if (isipv6)
+		iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+	else
+#endif
+		iptos = ip->ip_tos;
+
+	/*
+	 * Check that TCP offset makes sense,
+	 * pull out TCP options and adjust length.		XXX
+	 */
+	off = th->th_off << 2;
+	if (off < sizeof (struct tcphdr) || off > tlen) {
+		TCPSTAT_INC(tcps_rcvbadoff);
+		goto drop;
+	}
+	tlen -= off;	/* tlen is used instead of ti->ti_len */
+	if (off > sizeof (struct tcphdr)) {
+		if (isipv6) {
+#ifdef INET6
+			IP6_EXTHDR_CHECK(m, off0, off, );
+			ip6 = mtod(m, struct ip6_hdr *);
+			th = (struct tcphdr *)((caddr_t)ip6 + off0);
+#endif
+		} else {
+			if (m->m_len < sizeof(struct ip) + off) {
+				if ((m = m_pullup(m, sizeof (struct ip) + off))
+				    == NULL) {
+					TCPSTAT_INC(tcps_rcvshort);
+					return;
+				}
+				ip = mtod(m, struct ip *);
+				ipov = (struct ipovly *)ip;
+				th = (struct tcphdr *)((caddr_t)ip + off0);
+			}
+		}
+		optlen = off - sizeof (struct tcphdr);
+		optp = (u_char *)(th + 1);
+	}
+	thflags = th->th_flags;
+
+	/*
+	 * Convert TCP protocol specific fields to host format.
+	 */
+	th->th_seq = ntohl(th->th_seq);
+	th->th_ack = ntohl(th->th_ack);
+	th->th_win = ntohs(th->th_win);
+	th->th_urp = ntohs(th->th_urp);
+
+	/*
+	 * Delay dropping TCP, IP headers, IPv6 ext headers, and TCP options.
+	 */
+	drop_hdrlen = off0 + off;
+
+	/*
+	 * Locate pcb for segment, which requires a lock on tcbinfo.
+	 * Optimisticaly acquire a global read lock rather than a write lock
+	 * unless header flags necessarily imply a state change.  There are
+	 * two cases where we might discover later we need a write lock
+	 * despite the flags: ACKs moving a connection out of the syncache,
+	 * and ACKs for a connection in TIMEWAIT.
+	 */
+	if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
+	    tcp_read_locking == 0) {
+		INP_INFO_WLOCK(&V_tcbinfo);
+		ti_locked = TI_WLOCKED;
+	} else {
+		INP_INFO_RLOCK(&V_tcbinfo);
+		ti_locked = TI_RLOCKED;
+	}
+
+findpcb:
+#ifdef INVARIANTS
+	if (ti_locked == TI_RLOCKED)
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+	else if (ti_locked == TI_WLOCKED)
+		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	else
+		panic("%s: findpcb ti_locked %d\n", __func__, ti_locked);
+#endif
+
+#ifdef IPFIREWALL_FORWARD
+	/*
+	 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
+	 */
+	fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
+
+	if (fwd_tag != NULL && isipv6 == 0) {	/* IPv6 support is not yet */
+		struct sockaddr_in *next_hop;
+
+		next_hop = (struct sockaddr_in *)(fwd_tag+1);
+		/*
+		 * Transparently forwarded. Pretend to be the destination.
+		 * already got one like this?
+		 */
+		inp = in_pcblookup_hash(&V_tcbinfo,
+					ip->ip_src, th->th_sport,
+					ip->ip_dst, th->th_dport,
+					0, m->m_pkthdr.rcvif);
+		if (!inp) {
+			/* It's new.  Try to find the ambushing socket. */
+			inp = in_pcblookup_hash(&V_tcbinfo,
+						ip->ip_src, th->th_sport,
+						next_hop->sin_addr,
+						next_hop->sin_port ?
+						    ntohs(next_hop->sin_port) :
+						    th->th_dport,
+						INPLOOKUP_WILDCARD,
+						m->m_pkthdr.rcvif);
+		}
+		/* Remove the tag from the packet.  We don't need it anymore. */
+		m_tag_delete(m, fwd_tag);
+	} else
+#endif /* IPFIREWALL_FORWARD */
+	{
+		if (isipv6) {
+#ifdef INET6
+			inp = in6_pcblookup_hash(&V_tcbinfo,
+						 &ip6->ip6_src, th->th_sport,
+						 &ip6->ip6_dst, th->th_dport,
+						 INPLOOKUP_WILDCARD,
+						 m->m_pkthdr.rcvif);
+#endif
+		} else
+			inp = in_pcblookup_hash(&V_tcbinfo,
+						ip->ip_src, th->th_sport,
+						ip->ip_dst, th->th_dport,
+						INPLOOKUP_WILDCARD,
+						m->m_pkthdr.rcvif);
+	}
+
+	/*
+	 * If the INPCB does not exist then all data in the incoming
+	 * segment is discarded and an appropriate RST is sent back.
+	 * XXX MRT Send RST using which routing table?
+	 */
+	if (inp == NULL) {
+		/*
+		 * Log communication attempts to ports that are not
+		 * in use.
+		 */
+		if ((tcp_log_in_vain == 1 && (thflags & TH_SYN)) ||
+		    tcp_log_in_vain == 2) {
+			if ((s = tcp_log_vain(NULL, th, (void *)ip, ip6)))
+				log(LOG_INFO, "%s; %s: Connection attempt "
+				    "to closed port\n", s, __func__);
+		}
+		/*
+		 * When blackholing do not respond with a RST but
+		 * completely ignore the segment and drop it.
+		 */
+		if ((V_blackhole == 1 && (thflags & TH_SYN)) ||
+		    V_blackhole == 2)
+			goto dropunlock;
+
+		rstreason = BANDLIM_RST_CLOSEDPORT;
+		goto dropwithreset;
+	}
+	INP_WLOCK(inp);
+	if (!(inp->inp_flags & INP_HW_FLOWID)
+	    && (m->m_flags & M_FLOWID)
+	    && ((inp->inp_socket == NULL)
+		|| !(inp->inp_socket->so_options & SO_ACCEPTCONN))) {
+		inp->inp_flags |= INP_HW_FLOWID;
+		inp->inp_flags &= ~INP_SW_FLOWID;
+		inp->inp_flowid = m->m_pkthdr.flowid;
+	}
+#ifdef IPSEC
+#ifdef INET6
+	if (isipv6 && ipsec6_in_reject(m, inp)) {
+		V_ipsec6stat.in_polvio++;
+		goto dropunlock;
+	} else
+#endif /* INET6 */
+	if (ipsec4_in_reject(m, inp) != 0) {
+		V_ipsec4stat.in_polvio++;
+		goto dropunlock;
+	}
+#endif /* IPSEC */
+
+	/*
+	 * Check the minimum TTL for socket.
+	 */
+	if (inp->inp_ip_minttl != 0) {
+#ifdef INET6
+		if (isipv6 && inp->inp_ip_minttl > ip6->ip6_hlim)
+			goto dropunlock;
+		else
+#endif
+		if (inp->inp_ip_minttl > ip->ip_ttl)
+			goto dropunlock;
+	}
+
+	/*
+	 * A previous connection in TIMEWAIT state is supposed to catch stray
+	 * or duplicate segments arriving late.  If this segment was a
+	 * legitimate new connection attempt the old INPCB gets removed and
+	 * we can try again to find a listening socket.
+	 *
+	 * At this point, due to earlier optimism, we may hold a read lock on
+	 * the inpcbinfo, rather than a write lock.  If so, we need to
+	 * upgrade, or if that fails, acquire a reference on the inpcb, drop
+	 * all locks, acquire a global write lock, and then re-acquire the
+	 * inpcb lock.  We may at that point discover that another thread has
+	 * tried to free the inpcb, in which case we need to loop back and
+	 * try to find a new inpcb to deliver to.
+	 */
+relocked:
+	if (inp->inp_flags & INP_TIMEWAIT) {
+		KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
+		    ("%s: INP_TIMEWAIT ti_locked %d", __func__, ti_locked));
+
+		if (ti_locked == TI_RLOCKED) {
+			if (INP_INFO_TRY_UPGRADE(&V_tcbinfo) == 0) {
+				in_pcbref(inp);
+				INP_WUNLOCK(inp);
+				INP_INFO_RUNLOCK(&V_tcbinfo);
+				INP_INFO_WLOCK(&V_tcbinfo);
+				ti_locked = TI_WLOCKED;
+				INP_WLOCK(inp);
+				if (in_pcbrele(inp)) {
+					inp = NULL;
+					goto findpcb;
+				}
+			} else
+				ti_locked = TI_WLOCKED;
+		}
+		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+
+		if (thflags & TH_SYN)
+			tcp_dooptions(&to, optp, optlen, TO_SYN);
+		/*
+		 * NB: tcp_twcheck unlocks the INP and frees the mbuf.
+		 */
+		if (tcp_twcheck(inp, &to, th, m, tlen))
+			goto findpcb;
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		return;
+	}
+	/*
+	 * The TCPCB may no longer exist if the connection is winding
+	 * down or it is in the CLOSED state.  Either way we drop the
+	 * segment and send an appropriate response.
+	 */
+	tp = intotcpcb(inp);
+	if (tp == NULL || tp->t_state == TCPS_CLOSED) {
+		rstreason = BANDLIM_RST_CLOSEDPORT;
+		goto dropwithreset;
+	}
+
+	/*
+	 * We've identified a valid inpcb, but it could be that we need an
+	 * inpcbinfo write lock and have only a read lock.  In this case,
+	 * attempt to upgrade/relock using the same strategy as the TIMEWAIT
+	 * case above.  If we relock, we have to jump back to 'relocked' as
+	 * the connection might now be in TIMEWAIT.
+	 */
+	if (tp->t_state != TCPS_ESTABLISHED ||
+	    (thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
+	    tcp_read_locking == 0) {
+		KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
+		    ("%s: upgrade check ti_locked %d", __func__, ti_locked));
+
+		if (ti_locked == TI_RLOCKED) {
+			if (INP_INFO_TRY_UPGRADE(&V_tcbinfo) == 0) {
+				in_pcbref(inp);
+				INP_WUNLOCK(inp);
+				INP_INFO_RUNLOCK(&V_tcbinfo);
+				INP_INFO_WLOCK(&V_tcbinfo);
+				ti_locked = TI_WLOCKED;
+				INP_WLOCK(inp);
+				if (in_pcbrele(inp)) {
+					inp = NULL;
+					goto findpcb;
+				}
+				goto relocked;
+			} else
+				ti_locked = TI_WLOCKED;
+		}
+		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	}
+
+#ifdef MAC
+	INP_WLOCK_ASSERT(inp);
+	if (mac_inpcb_check_deliver(inp, m))
+		goto dropunlock;
+#endif
+	so = inp->inp_socket;
+	KASSERT(so != NULL, ("%s: so == NULL", __func__));
+#ifdef TCPDEBUG
+	if (so->so_options & SO_DEBUG) {
+		ostate = tp->t_state;
+		if (isipv6) {
+#ifdef INET6
+			bcopy((char *)ip6, (char *)tcp_saveipgen, sizeof(*ip6));
+#endif
+		} else
+			bcopy((char *)ip, (char *)tcp_saveipgen, sizeof(*ip));
+		tcp_savetcp = *th;
+	}
+#endif
+	/*
+	 * When the socket is accepting connections (the INPCB is in LISTEN
+	 * state) we look into the SYN cache if this is a new connection
+	 * attempt or the completion of a previous one.
+	 */
+	if (so->so_options & SO_ACCEPTCONN) {
+		struct in_conninfo inc;
+
+		KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but "
+		    "tp not listening", __func__));
+
+		bzero(&inc, sizeof(inc));
+#ifdef INET6
+		if (isipv6) {
+			inc.inc_flags |= INC_ISIPV6;
+			inc.inc6_faddr = ip6->ip6_src;
+			inc.inc6_laddr = ip6->ip6_dst;
+		} else
+#endif
+		{
+			inc.inc_faddr = ip->ip_src;
+			inc.inc_laddr = ip->ip_dst;
+		}
+		inc.inc_fport = th->th_sport;
+		inc.inc_lport = th->th_dport;
+		inc.inc_fibnum = so->so_fibnum;
+
+		/*
+		 * Check for an existing connection attempt in syncache if
+		 * the flag is only ACK.  A successful lookup creates a new
+		 * socket appended to the listen queue in SYN_RECEIVED state.
+		 */
+		if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
+			/*
+			 * Parse the TCP options here because
+			 * syncookies need access to the reflected
+			 * timestamp.
+			 */
+			tcp_dooptions(&to, optp, optlen, 0);
+			/*
+			 * NB: syncache_expand() doesn't unlock
+			 * inp and tcpinfo locks.
+			 */
+			if (!syncache_expand(&inc, &to, th, &so, m)) {
+				/*
+				 * No syncache entry or ACK was not
+				 * for our SYN/ACK.  Send a RST.
+				 * NB: syncache did its own logging
+				 * of the failure cause.
+				 */
+				rstreason = BANDLIM_RST_OPENPORT;
+				goto dropwithreset;
+			}
+			if (so == NULL) {
+				/*
+				 * We completed the 3-way handshake
+				 * but could not allocate a socket
+				 * either due to memory shortage,
+				 * listen queue length limits or
+				 * global socket limits.  Send RST
+				 * or wait and have the remote end
+				 * retransmit the ACK for another
+				 * try.
+				 */
+				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+					log(LOG_DEBUG, "%s; %s: Listen socket: "
+					    "Socket allocation failed due to "
+					    "limits or memory shortage, %s\n",
+					    s, __func__,
+					    V_tcp_sc_rst_sock_fail ?
+					    "sending RST" : "try again");
+				if (V_tcp_sc_rst_sock_fail) {
+					rstreason = BANDLIM_UNLIMITED;
+					goto dropwithreset;
+				} else
+					goto dropunlock;
+			}
+			/*
+			 * Socket is created in state SYN_RECEIVED.
+			 * Unlock the listen socket, lock the newly
+			 * created socket and update the tp variable.
+			 */
+			INP_WUNLOCK(inp);	/* listen socket */
+			inp = sotoinpcb(so);
+			INP_WLOCK(inp);		/* new connection */
+			tp = intotcpcb(inp);
+			KASSERT(tp->t_state == TCPS_SYN_RECEIVED,
+			    ("%s: ", __func__));
+			/*
+			 * Process the segment and the data it
+			 * contains.  tcp_do_segment() consumes
+			 * the mbuf chain and unlocks the inpcb.
+			 */
+			tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen,
+			    iptos, ti_locked);
+			INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+			return;
+		}
+		/*
+		 * Segment flag validation for new connection attempts:
+		 *
+		 * Our (SYN|ACK) response was rejected.
+		 * Check with syncache and remove entry to prevent
+		 * retransmits.
+		 *
+		 * NB: syncache_chkrst does its own logging of failure
+		 * causes.
+		 */
+		if (thflags & TH_RST) {
+			syncache_chkrst(&inc, th);
+			goto dropunlock;
+		}
+		/*
+		 * We can't do anything without SYN.
+		 */
+		if ((thflags & TH_SYN) == 0) {
+			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+				log(LOG_DEBUG, "%s; %s: Listen socket: "
+				    "SYN is missing, segment ignored\n",
+				    s, __func__);
+			TCPSTAT_INC(tcps_badsyn);
+			goto dropunlock;
+		}
+		/*
+		 * (SYN|ACK) is bogus on a listen socket.
+		 */
+		if (thflags & TH_ACK) {
+			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+				log(LOG_DEBUG, "%s; %s: Listen socket: "
+				    "SYN|ACK invalid, segment rejected\n",
+				    s, __func__);
+			syncache_badack(&inc);	/* XXX: Not needed! */
+			TCPSTAT_INC(tcps_badsyn);
+			rstreason = BANDLIM_RST_OPENPORT;
+			goto dropwithreset;
+		}
+		/*
+		 * If the drop_synfin option is enabled, drop all
+		 * segments with both the SYN and FIN bits set.
+		 * This prevents e.g. nmap from identifying the
+		 * TCP/IP stack.
+		 * XXX: Poor reasoning.  nmap has other methods
+		 * and is constantly refining its stack detection
+		 * strategies.
+		 * XXX: This is a violation of the TCP specification
+		 * and was used by RFC1644.
+		 */
+		if ((thflags & TH_FIN) && V_drop_synfin) {
+			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+				log(LOG_DEBUG, "%s; %s: Listen socket: "
+				    "SYN|FIN segment ignored (based on "
+				    "sysctl setting)\n", s, __func__);
+			TCPSTAT_INC(tcps_badsyn);
+                	goto dropunlock;
+		}
+		/*
+		 * Segment's flags are (SYN) or (SYN|FIN).
+		 *
+		 * TH_PUSH, TH_URG, TH_ECE, TH_CWR are ignored
+		 * as they do not affect the state of the TCP FSM.
+		 * The data pointed to by TH_URG and th_urp is ignored.
+		 */
+		KASSERT((thflags & (TH_RST|TH_ACK)) == 0,
+		    ("%s: Listen socket: TH_RST or TH_ACK set", __func__));
+		KASSERT(thflags & (TH_SYN),
+		    ("%s: Listen socket: TH_SYN not set", __func__));
+#ifdef INET6
+		/*
+		 * If deprecated address is forbidden,
+		 * we do not accept SYN to deprecated interface
+		 * address to prevent any new inbound connection from
+		 * getting established.
+		 * When we do not accept SYN, we send a TCP RST,
+		 * with deprecated source address (instead of dropping
+		 * it).  We compromise it as it is much better for peer
+		 * to send a RST, and RST will be the final packet
+		 * for the exchange.
+		 *
+		 * If we do not forbid deprecated addresses, we accept
+		 * the SYN packet.  RFC2462 does not suggest dropping
+		 * SYN in this case.
+		 * If we decipher RFC2462 5.5.4, it says like this:
+		 * 1. use of deprecated addr with existing
+		 *    communication is okay - "SHOULD continue to be
+		 *    used"
+		 * 2. use of it with new communication:
+		 *   (2a) "SHOULD NOT be used if alternate address
+		 *        with sufficient scope is available"
+		 *   (2b) nothing mentioned otherwise.
+		 * Here we fall into (2b) case as we have no choice in
+		 * our source address selection - we must obey the peer.
+		 *
+		 * The wording in RFC2462 is confusing, and there are
+		 * multiple description text for deprecated address
+		 * handling - worse, they are not exactly the same.
+		 * I believe 5.5.4 is the best one, so we follow 5.5.4.
+		 */
+		if (isipv6 && !V_ip6_use_deprecated) {
+			struct in6_ifaddr *ia6;
+
+			ia6 = ip6_getdstifaddr(m);
+			if (ia6 != NULL &&
+			    (ia6->ia6_flags & IN6_IFF_DEPRECATED)) {
+				ifa_free(&ia6->ia_ifa);
+				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+				    log(LOG_DEBUG, "%s; %s: Listen socket: "
+					"Connection attempt to deprecated "
+					"IPv6 address rejected\n",
+					s, __func__);
+				rstreason = BANDLIM_RST_OPENPORT;
+				goto dropwithreset;
+			}
+			ifa_free(&ia6->ia_ifa);
+		}
+#endif
+		/*
+		 * Basic sanity checks on incoming SYN requests:
+		 *   Don't respond if the destination is a link layer
+		 *	broadcast according to RFC1122 4.2.3.10, p. 104.
+		 *   If it is from this socket it must be forged.
+		 *   Don't respond if the source or destination is a
+		 *	global or subnet broad- or multicast address.
+		 *   Note that it is quite possible to receive unicast
+		 *	link-layer packets with a broadcast IP address. Use
+		 *	in_broadcast() to find them.
+		 */
+		if (m->m_flags & (M_BCAST|M_MCAST)) {
+			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+			    log(LOG_DEBUG, "%s; %s: Listen socket: "
+				"Connection attempt from broad- or multicast "
+				"link layer address ignored\n", s, __func__);
+			goto dropunlock;
+		}
+		if (isipv6) {
+#ifdef INET6
+			if (th->th_dport == th->th_sport &&
+			    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) {
+				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+				    log(LOG_DEBUG, "%s; %s: Listen socket: "
+					"Connection attempt to/from self "
+					"ignored\n", s, __func__);
+				goto dropunlock;
+			}
+			if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
+			    IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
+				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+				    log(LOG_DEBUG, "%s; %s: Listen socket: "
+					"Connection attempt from/to multicast "
+					"address ignored\n", s, __func__);
+				goto dropunlock;
+			}
+#endif
+		} else {
+			if (th->th_dport == th->th_sport &&
+			    ip->ip_dst.s_addr == ip->ip_src.s_addr) {
+				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+				    log(LOG_DEBUG, "%s; %s: Listen socket: "
+					"Connection attempt from/to self "
+					"ignored\n", s, __func__);
+				goto dropunlock;
+			}
+			if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
+			    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
+			    ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
+			    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
+				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+				    log(LOG_DEBUG, "%s; %s: Listen socket: "
+					"Connection attempt from/to broad- "
+					"or multicast address ignored\n",
+					s, __func__);
+				goto dropunlock;
+			}
+		}
+		/*
+		 * SYN appears to be valid.  Create compressed TCP state
+		 * for syncache.
+		 */
+#ifdef TCPDEBUG
+		if (so->so_options & SO_DEBUG)
+			tcp_trace(TA_INPUT, ostate, tp,
+			    (void *)tcp_saveipgen, &tcp_savetcp, 0);
+#endif
+		tcp_dooptions(&to, optp, optlen, TO_SYN);
+		syncache_add(&inc, &to, th, inp, &so, m);
+		/*
+		 * Entry added to syncache and mbuf consumed.
+		 * Everything already unlocked by syncache_add().
+		 */
+		INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+		return;
+	}
+
+	/*
+	 * Segment belongs to a connection in SYN_SENT, ESTABLISHED or later
+	 * state.  tcp_do_segment() always consumes the mbuf chain, unlocks
+	 * the inpcb, and unlocks pcbinfo.
+	 */
+	tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked);
+	INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+	return;
+
+dropwithreset:
+	if (ti_locked == TI_RLOCKED)
+		INP_INFO_RUNLOCK(&V_tcbinfo);
+	else if (ti_locked == TI_WLOCKED)
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+	else
+		panic("%s: dropwithreset ti_locked %d", __func__, ti_locked);
+	ti_locked = TI_UNLOCKED;
+
+	if (inp != NULL) {
+		tcp_dropwithreset(m, th, tp, tlen, rstreason);
+		INP_WUNLOCK(inp);
+	} else
+		tcp_dropwithreset(m, th, NULL, tlen, rstreason);
+	m = NULL;	/* mbuf chain got consumed. */
+	goto drop;
+
+dropunlock:
+	if (ti_locked == TI_RLOCKED)
+		INP_INFO_RUNLOCK(&V_tcbinfo);
+	else if (ti_locked == TI_WLOCKED)
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+	else
+		panic("%s: dropunlock ti_locked %d", __func__, ti_locked);
+	ti_locked = TI_UNLOCKED;
+
+	if (inp != NULL)
+		INP_WUNLOCK(inp);
+
+drop:
+	INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+	if (s != NULL)
+		free(s, M_TCPLOG);
+	if (m != NULL)
+		m_freem(m);
+}
+
+static void
+tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
+    struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
+    int ti_locked)
+{
+	int thflags, acked, ourfinisacked, needoutput = 0;
+	int rstreason, todrop, win;
+	u_long tiwin;
+	struct tcpopt to;
+
+#ifdef TCPDEBUG
+	/*
+	 * The size of tcp_saveipgen must be the size of the max ip header,
+	 * now IPv6.
+	 */
+	u_char tcp_saveipgen[IP6_HDR_LEN];
+	struct tcphdr tcp_savetcp;
+	short ostate = 0;
+#endif
+	thflags = th->th_flags;
+
+	/*
+	 * If this is either a state-changing packet or current state isn't
+	 * established, we require a write lock on tcbinfo.  Otherwise, we
+	 * allow either a read lock or a write lock, as we may have acquired
+	 * a write lock due to a race.
+	 *
+	 * Require a global write lock for SYN/FIN/RST segments or
+	 * non-established connections; otherwise accept either a read or
+	 * write lock, as we may have conservatively acquired a write lock in
+	 * certain cases in tcp_input() (is this still true?).  Currently we
+	 * will never enter with no lock, so we try to drop it quickly in the
+	 * common pure ack/pure data cases.
+	 */
+	if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
+	    tp->t_state != TCPS_ESTABLISHED) {
+		KASSERT(ti_locked == TI_WLOCKED, ("%s ti_locked %d for "
+		    "SYN/FIN/RST/!EST", __func__, ti_locked));
+		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	} else {
+#ifdef INVARIANTS
+		if (ti_locked == TI_RLOCKED)
+			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+		else if (ti_locked == TI_WLOCKED)
+			INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		else
+			panic("%s: ti_locked %d for EST", __func__,
+			    ti_locked);
+#endif
+	}
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+	KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
+	    __func__));
+	KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT",
+	    __func__));
+
+	/*
+	 * Segment received on connection.
+	 * Reset idle time and keep-alive timer.
+	 * XXX: This should be done after segment
+	 * validation to ignore broken/spoofed segs.
+	 */
+	tp->t_rcvtime = ticks;
+	if (TCPS_HAVEESTABLISHED(tp->t_state))
+		tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
+
+	/*
+	 * Unscale the window into a 32-bit value.
+	 * For the SYN_SENT state the scale is zero.
+	 */
+	tiwin = th->th_win << tp->snd_scale;
+
+	/*
+	 * TCP ECN processing.
+	 */
+	if (tp->t_flags & TF_ECN_PERMIT) {
+		if (thflags & TH_CWR)
+			tp->t_flags &= ~TF_ECN_SND_ECE;
+		switch (iptos & IPTOS_ECN_MASK) {
+		case IPTOS_ECN_CE:
+			tp->t_flags |= TF_ECN_SND_ECE;
+			TCPSTAT_INC(tcps_ecn_ce);
+			break;
+		case IPTOS_ECN_ECT0:
+			TCPSTAT_INC(tcps_ecn_ect0);
+			break;
+		case IPTOS_ECN_ECT1:
+			TCPSTAT_INC(tcps_ecn_ect1);
+			break;
+		}
+		/*
+		 * Congestion experienced.
+		 * Ignore if we are already trying to recover.
+		 */
+		if ((thflags & TH_ECE) &&
+		    SEQ_LEQ(th->th_ack, tp->snd_recover)) {
+			TCPSTAT_INC(tcps_ecn_rcwnd);
+			tcp_congestion_exp(tp);
+		}
+	}
+
+	/*
+	 * Parse options on any incoming segment.
+	 */
+	tcp_dooptions(&to, (u_char *)(th + 1),
+	    (th->th_off << 2) - sizeof(struct tcphdr),
+	    (thflags & TH_SYN) ? TO_SYN : 0);
+
+	/*
+	 * If echoed timestamp is later than the current time,
+	 * fall back to non RFC1323 RTT calculation.  Normalize
+	 * timestamp if syncookies were used when this connection
+	 * was established.
+	 */
+	if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) {
+		to.to_tsecr -= tp->ts_offset;
+		if (TSTMP_GT(to.to_tsecr, ticks))
+			to.to_tsecr = 0;
+	}
+
+	/*
+	 * Process options only when we get SYN/ACK back. The SYN case
+	 * for incoming connections is handled in tcp_syncache.
+	 * According to RFC1323 the window field in a SYN (i.e., a <SYN>
+	 * or <SYN,ACK>) segment itself is never scaled.
+	 * XXX this is traditional behavior, may need to be cleaned up.
+	 */
+	if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
+		if ((to.to_flags & TOF_SCALE) &&
+		    (tp->t_flags & TF_REQ_SCALE)) {
+			tp->t_flags |= TF_RCVD_SCALE;
+			tp->snd_scale = to.to_wscale;
+		}
+		/*
+		 * Initial send window.  It will be updated with
+		 * the next incoming segment to the scaled value.
+		 */
+		tp->snd_wnd = th->th_win;
+		if (to.to_flags & TOF_TS) {
+			tp->t_flags |= TF_RCVD_TSTMP;
+			tp->ts_recent = to.to_tsval;
+			tp->ts_recent_age = ticks;
+		}
+		if (to.to_flags & TOF_MSS)
+			tcp_mss(tp, to.to_mss);
+		if ((tp->t_flags & TF_SACK_PERMIT) &&
+		    (to.to_flags & TOF_SACKPERM) == 0)
+			tp->t_flags &= ~TF_SACK_PERMIT;
+	}
+
+	/*
+	 * Header prediction: check for the two common cases
+	 * of a uni-directional data xfer.  If the packet has
+	 * no control flags, is in-sequence, the window didn't
+	 * change and we're not retransmitting, it's a
+	 * candidate.  If the length is zero and the ack moved
+	 * forward, we're the sender side of the xfer.  Just
+	 * free the data acked & wake any higher level process
+	 * that was blocked waiting for space.  If the length
+	 * is non-zero and the ack didn't move, we're the
+	 * receiver side.  If we're getting packets in-order
+	 * (the reassembly queue is empty), add the data to
+	 * the socket buffer and note that we need a delayed ack.
+	 * Make sure that the hidden state-flags are also off.
+	 * Since we check for TCPS_ESTABLISHED first, it can only
+	 * be TH_NEEDSYN.
+	 */
+	if (tp->t_state == TCPS_ESTABLISHED &&
+	    th->th_seq == tp->rcv_nxt &&
+	    (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
+	    tp->snd_nxt == tp->snd_max &&
+	    tiwin && tiwin == tp->snd_wnd && 
+	    ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
+	    LIST_EMPTY(&tp->t_segq) &&
+	    ((to.to_flags & TOF_TS) == 0 ||
+	     TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) {
+
+		/*
+		 * If last ACK falls within this segment's sequence numbers,
+		 * record the timestamp.
+		 * NOTE that the test is modified according to the latest
+		 * proposal of the tcplw@cray.com list (Braden 1993/04/26).
+		 */
+		if ((to.to_flags & TOF_TS) != 0 &&
+		    SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
+			tp->ts_recent_age = ticks;
+			tp->ts_recent = to.to_tsval;
+		}
+
+		if (tlen == 0) {
+			if (SEQ_GT(th->th_ack, tp->snd_una) &&
+			    SEQ_LEQ(th->th_ack, tp->snd_max) &&
+			    tp->snd_cwnd >= tp->snd_wnd &&
+			    ((!V_tcp_do_newreno &&
+			      !(tp->t_flags & TF_SACK_PERMIT) &&
+			      tp->t_dupacks < tcprexmtthresh) ||
+			     ((V_tcp_do_newreno ||
+			       (tp->t_flags & TF_SACK_PERMIT)) &&
+			      !IN_FASTRECOVERY(tp) &&
+			      (to.to_flags & TOF_SACK) == 0 &&
+			      TAILQ_EMPTY(&tp->snd_holes)))) {
+				/*
+				 * This is a pure ack for outstanding data.
+				 */
+				if (ti_locked == TI_RLOCKED)
+					INP_INFO_RUNLOCK(&V_tcbinfo);
+				else if (ti_locked == TI_WLOCKED)
+					INP_INFO_WUNLOCK(&V_tcbinfo);
+				else
+					panic("%s: ti_locked %d on pure ACK",
+					    __func__, ti_locked);
+				ti_locked = TI_UNLOCKED;
+
+				TCPSTAT_INC(tcps_predack);
+
+				/*
+				 * "bad retransmit" recovery.
+				 */
+				if (tp->t_rxtshift == 1 &&
+				    (int)(ticks - tp->t_badrxtwin) < 0) {
+					TCPSTAT_INC(tcps_sndrexmitbad);
+					tp->snd_cwnd = tp->snd_cwnd_prev;
+					tp->snd_ssthresh =
+					    tp->snd_ssthresh_prev;
+					tp->snd_recover = tp->snd_recover_prev;
+					if (tp->t_flags & TF_WASFRECOVERY)
+					    ENTER_FASTRECOVERY(tp);
+					tp->snd_nxt = tp->snd_max;
+					tp->t_badrxtwin = 0;
+				}
+
+				/*
+				 * Recalculate the transmit timer / rtt.
+				 *
+				 * Some boxes send broken timestamp replies
+				 * during the SYN+ACK phase, ignore
+				 * timestamps of 0 or we could calculate a
+				 * huge RTT and blow up the retransmit timer.
+				 */
+				if ((to.to_flags & TOF_TS) != 0 &&
+				    to.to_tsecr) {
+					if (!tp->t_rttlow ||
+					    tp->t_rttlow > ticks - to.to_tsecr)
+						tp->t_rttlow = ticks - to.to_tsecr;
+					tcp_xmit_timer(tp,
+					    ticks - to.to_tsecr + 1);
+				} else if (tp->t_rtttime &&
+				    SEQ_GT(th->th_ack, tp->t_rtseq)) {
+					if (!tp->t_rttlow ||
+					    tp->t_rttlow > ticks - tp->t_rtttime)
+						tp->t_rttlow = ticks - tp->t_rtttime;
+					tcp_xmit_timer(tp,
+							ticks - tp->t_rtttime);
+				}
+				tcp_xmit_bandwidth_limit(tp, th->th_ack);
+				acked = th->th_ack - tp->snd_una;
+				TCPSTAT_INC(tcps_rcvackpack);
+				TCPSTAT_ADD(tcps_rcvackbyte, acked);
+				sbdrop(&so->so_snd, acked);
+				if (SEQ_GT(tp->snd_una, tp->snd_recover) &&
+				    SEQ_LEQ(th->th_ack, tp->snd_recover))
+					tp->snd_recover = th->th_ack - 1;
+				tp->snd_una = th->th_ack;
+				/*
+				 * Pull snd_wl2 up to prevent seq wrap relative
+				 * to th_ack.
+				 */
+				tp->snd_wl2 = th->th_ack;
+				tp->t_dupacks = 0;
+				m_freem(m);
+				ND6_HINT(tp); /* Some progress has been made. */
+
+				/*
+				 * If all outstanding data are acked, stop
+				 * retransmit timer, otherwise restart timer
+				 * using current (possibly backed-off) value.
+				 * If process is waiting for space,
+				 * wakeup/selwakeup/signal.  If data
+				 * are ready to send, let tcp_output
+				 * decide between more output or persist.
+				 */
+#ifdef TCPDEBUG
+				if (so->so_options & SO_DEBUG)
+					tcp_trace(TA_INPUT, ostate, tp,
+					    (void *)tcp_saveipgen,
+					    &tcp_savetcp, 0);
+#endif
+				if (tp->snd_una == tp->snd_max)
+					tcp_timer_activate(tp, TT_REXMT, 0);
+				else if (!tcp_timer_active(tp, TT_PERSIST))
+					tcp_timer_activate(tp, TT_REXMT,
+						      tp->t_rxtcur);
+				sowwakeup(so);
+				if (so->so_snd.sb_cc)
+					(void) tcp_output(tp);
+				goto check_delack;
+			}
+		} else if (th->th_ack == tp->snd_una &&
+		    tlen <= sbspace(&so->so_rcv)) {
+			int newsize = 0;	/* automatic sockbuf scaling */
+
+			/*
+			 * This is a pure, in-sequence data packet with
+			 * nothing on the reassembly queue and we have enough
+			 * buffer space to take it.
+			 */
+			if (ti_locked == TI_RLOCKED)
+				INP_INFO_RUNLOCK(&V_tcbinfo);
+			else if (ti_locked == TI_WLOCKED)
+				INP_INFO_WUNLOCK(&V_tcbinfo);
+			else
+				panic("%s: ti_locked %d on pure data "
+				    "segment", __func__, ti_locked);
+			ti_locked = TI_UNLOCKED;
+
+			/* Clean receiver SACK report if present */
+			if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks)
+				tcp_clean_sackreport(tp);
+			TCPSTAT_INC(tcps_preddat);
+			tp->rcv_nxt += tlen;
+			/*
+			 * Pull snd_wl1 up to prevent seq wrap relative to
+			 * th_seq.
+			 */
+			tp->snd_wl1 = th->th_seq;
+			/*
+			 * Pull rcv_up up to prevent seq wrap relative to
+			 * rcv_nxt.
+			 */
+			tp->rcv_up = tp->rcv_nxt;
+			TCPSTAT_INC(tcps_rcvpack);
+			TCPSTAT_ADD(tcps_rcvbyte, tlen);
+			ND6_HINT(tp);	/* Some progress has been made */
+#ifdef TCPDEBUG
+			if (so->so_options & SO_DEBUG)
+				tcp_trace(TA_INPUT, ostate, tp,
+				    (void *)tcp_saveipgen, &tcp_savetcp, 0);
+#endif
+		/*
+		 * Automatic sizing of receive socket buffer.  Often the send
+		 * buffer size is not optimally adjusted to the actual network
+		 * conditions at hand (delay bandwidth product).  Setting the
+		 * buffer size too small limits throughput on links with high
+		 * bandwidth and high delay (eg. trans-continental/oceanic links).
+		 *
+		 * On the receive side the socket buffer memory is only rarely
+		 * used to any significant extent.  This allows us to be much
+		 * more aggressive in scaling the receive socket buffer.  For
+		 * the case that the buffer space is actually used to a large
+		 * extent and we run out of kernel memory we can simply drop
+		 * the new segments; TCP on the sender will just retransmit it
+		 * later.  Setting the buffer size too big may only consume too
+		 * much kernel memory if the application doesn't read() from
+		 * the socket or packet loss or reordering makes use of the
+		 * reassembly queue.
+		 *
+		 * The criteria to step up the receive buffer one notch are:
+		 *  1. the number of bytes received during the time it takes
+		 *     one timestamp to be reflected back to us (the RTT);
+		 *  2. received bytes per RTT is within seven eighth of the
+		 *     current socket buffer size;
+		 *  3. receive buffer size has not hit maximal automatic size;
+		 *
+		 * This algorithm does one step per RTT at most and only if
+		 * we receive a bulk stream w/o packet losses or reorderings.
+		 * Shrinking the buffer during idle times is not necessary as
+		 * it doesn't consume any memory when idle.
+		 *
+		 * TODO: Only step up if the application is actually serving
+		 * the buffer to better manage the socket buffer resources.
+		 */
+			if (V_tcp_do_autorcvbuf &&
+			    to.to_tsecr &&
+			    (so->so_rcv.sb_flags & SB_AUTOSIZE)) {
+				if (TSTMP_GT(to.to_tsecr, tp->rfbuf_ts) &&
+				    to.to_tsecr - tp->rfbuf_ts < hz) {
+					if (tp->rfbuf_cnt >
+					    (so->so_rcv.sb_hiwat / 8 * 7) &&
+					    so->so_rcv.sb_hiwat <
+					    V_tcp_autorcvbuf_max) {
+						newsize =
+						    min(so->so_rcv.sb_hiwat +
+						    V_tcp_autorcvbuf_inc,
+						    V_tcp_autorcvbuf_max);
+					}
+					/* Start over with next RTT. */
+					tp->rfbuf_ts = 0;
+					tp->rfbuf_cnt = 0;
+				} else
+					tp->rfbuf_cnt += tlen;	/* add up */
+			}
+
+			/* Add data to socket buffer. */
+			SOCKBUF_LOCK(&so->so_rcv);
+			if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
+				m_freem(m);
+			} else {
+				/*
+				 * Set new socket buffer size.
+				 * Give up when limit is reached.
+				 */
+				if (newsize)
+					if (!sbreserve_locked(&so->so_rcv,
+					    newsize, so, NULL))
+						so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
+				m_adj(m, drop_hdrlen);	/* delayed header drop */
+				sbappendstream_locked(&so->so_rcv, m);
+			}
+			/* NB: sorwakeup_locked() does an implicit unlock. */
+			sorwakeup_locked(so);
+			if (DELAY_ACK(tp)) {
+				tp->t_flags |= TF_DELACK;
+			} else {
+				tp->t_flags |= TF_ACKNOW;
+				tcp_output(tp);
+			}
+			goto check_delack;
+		}
+	}
+
+	/*
+	 * Calculate amount of space in receive window,
+	 * and then do TCP input processing.
+	 * Receive window is amount of space in rcv queue,
+	 * but not less than advertised window.
+	 */
+	win = sbspace(&so->so_rcv);
+	if (win < 0)
+		win = 0;
+	tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
+
+	/* Reset receive buffer auto scaling when not in bulk receive mode. */
+	tp->rfbuf_ts = 0;
+	tp->rfbuf_cnt = 0;
+
+	switch (tp->t_state) {
+
+	/*
+	 * If the state is SYN_RECEIVED:
+	 *	if seg contains an ACK, but not for our SYN/ACK, send a RST.
+	 */
+	case TCPS_SYN_RECEIVED:
+		if ((thflags & TH_ACK) &&
+		    (SEQ_LEQ(th->th_ack, tp->snd_una) ||
+		     SEQ_GT(th->th_ack, tp->snd_max))) {
+				rstreason = BANDLIM_RST_OPENPORT;
+				goto dropwithreset;
+		}
+		break;
+
+	/*
+	 * If the state is SYN_SENT:
+	 *	if seg contains an ACK, but not for our SYN, drop the input.
+	 *	if seg contains a RST, then drop the connection.
+	 *	if seg does not contain SYN, then drop it.
+	 * Otherwise this is an acceptable SYN segment
+	 *	initialize tp->rcv_nxt and tp->irs
+	 *	if seg contains ack then advance tp->snd_una
+	 *	if seg contains an ECE and ECN support is enabled, the stream
+	 *	    is ECN capable.
+	 *	if SYN has been acked change to ESTABLISHED else SYN_RCVD state
+	 *	arrange for segment to be acked (eventually)
+	 *	continue processing rest of data/controls, beginning with URG
+	 */
+	case TCPS_SYN_SENT:
+		if ((thflags & TH_ACK) &&
+		    (SEQ_LEQ(th->th_ack, tp->iss) ||
+		     SEQ_GT(th->th_ack, tp->snd_max))) {
+			rstreason = BANDLIM_UNLIMITED;
+			goto dropwithreset;
+		}
+		if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST))
+			tp = tcp_drop(tp, ECONNREFUSED);
+		if (thflags & TH_RST)
+			goto drop;
+		if (!(thflags & TH_SYN))
+			goto drop;
+
+		tp->irs = th->th_seq;
+		tcp_rcvseqinit(tp);
+		if (thflags & TH_ACK) {
+			TCPSTAT_INC(tcps_connects);
+			soisconnected(so);
+#ifdef MAC
+			mac_socketpeer_set_from_mbuf(m, so);
+#endif
+			/* Do window scaling on this connection? */
+			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
+				tp->rcv_scale = tp->request_r_scale;
+			}
+			tp->rcv_adv += tp->rcv_wnd;
+			tp->snd_una++;		/* SYN is acked */
+			/*
+			 * If there's data, delay ACK; if there's also a FIN
+			 * ACKNOW will be turned on later.
+			 */
+			if (DELAY_ACK(tp) && tlen != 0)
+				tcp_timer_activate(tp, TT_DELACK,
+				    tcp_delacktime);
+			else
+				tp->t_flags |= TF_ACKNOW;
+
+			if ((thflags & TH_ECE) && V_tcp_do_ecn) {
+				tp->t_flags |= TF_ECN_PERMIT;
+				TCPSTAT_INC(tcps_ecn_shs);
+			}
+			
+			/*
+			 * Received <SYN,ACK> in SYN_SENT[*] state.
+			 * Transitions:
+			 *	SYN_SENT  --> ESTABLISHED
+			 *	SYN_SENT* --> FIN_WAIT_1
+			 */
+			tp->t_starttime = ticks;
+			if (tp->t_flags & TF_NEEDFIN) {
+				tp->t_state = TCPS_FIN_WAIT_1;
+				tp->t_flags &= ~TF_NEEDFIN;
+				thflags &= ~TH_SYN;
+			} else {
+				tp->t_state = TCPS_ESTABLISHED;
+				tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
+			}
+		} else {
+			/*
+			 * Received initial SYN in SYN-SENT[*] state =>
+			 * simultaneous open.  If segment contains CC option
+			 * and there is a cached CC, apply TAO test.
+			 * If it succeeds, connection is * half-synchronized.
+			 * Otherwise, do 3-way handshake:
+			 *        SYN-SENT -> SYN-RECEIVED
+			 *        SYN-SENT* -> SYN-RECEIVED*
+			 * If there was no CC option, clear cached CC value.
+			 */
+			tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
+			tcp_timer_activate(tp, TT_REXMT, 0);
+			tp->t_state = TCPS_SYN_RECEIVED;
+		}
+
+		KASSERT(ti_locked == TI_WLOCKED, ("%s: trimthenstep6: "
+		    "ti_locked %d", __func__, ti_locked));
+		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		INP_WLOCK_ASSERT(tp->t_inpcb);
+
+		/*
+		 * Advance th->th_seq to correspond to first data byte.
+		 * If data, trim to stay within window,
+		 * dropping FIN if necessary.
+		 */
+		th->th_seq++;
+		if (tlen > tp->rcv_wnd) {
+			todrop = tlen - tp->rcv_wnd;
+			m_adj(m, -todrop);
+			tlen = tp->rcv_wnd;
+			thflags &= ~TH_FIN;
+			TCPSTAT_INC(tcps_rcvpackafterwin);
+			TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop);
+		}
+		tp->snd_wl1 = th->th_seq - 1;
+		tp->rcv_up = th->th_seq;
+		/*
+		 * Client side of transaction: already sent SYN and data.
+		 * If the remote host used T/TCP to validate the SYN,
+		 * our data will be ACK'd; if so, enter normal data segment
+		 * processing in the middle of step 5, ack processing.
+		 * Otherwise, goto step 6.
+		 */
+		if (thflags & TH_ACK)
+			goto process_ACK;
+
+		goto step6;
+
+	/*
+	 * If the state is LAST_ACK or CLOSING or TIME_WAIT:
+	 *      do normal processing.
+	 *
+	 * NB: Leftover from RFC1644 T/TCP.  Cases to be reused later.
+	 */
+	case TCPS_LAST_ACK:
+	case TCPS_CLOSING:
+		break;  /* continue normal processing */
+	}
+
+	/*
+	 * States other than LISTEN or SYN_SENT.
+	 * First check the RST flag and sequence number since reset segments
+	 * are exempt from the timestamp and connection count tests.  This
+	 * fixes a bug introduced by the Stevens, vol. 2, p. 960 bugfix
+	 * below which allowed reset segments in half the sequence space
+	 * to fall though and be processed (which gives forged reset
+	 * segments with a random sequence number a 50 percent chance of
+	 * killing a connection).
+	 * Then check timestamp, if present.
+	 * Then check the connection count, if present.
+	 * Then check that at least some bytes of segment are within
+	 * receive window.  If segment begins before rcv_nxt,
+	 * drop leading data (and SYN); if nothing left, just ack.
+	 *
+	 *
+	 * If the RST bit is set, check the sequence number to see
+	 * if this is a valid reset segment.
+	 * RFC 793 page 37:
+	 *   In all states except SYN-SENT, all reset (RST) segments
+	 *   are validated by checking their SEQ-fields.  A reset is
+	 *   valid if its sequence number is in the window.
+	 * Note: this does not take into account delayed ACKs, so
+	 *   we should test against last_ack_sent instead of rcv_nxt.
+	 *   The sequence number in the reset segment is normally an
+	 *   echo of our outgoing acknowlegement numbers, but some hosts
+	 *   send a reset with the sequence number at the rightmost edge
+	 *   of our receive window, and we have to handle this case.
+	 * Note 2: Paul Watson's paper "Slipping in the Window" has shown
+	 *   that brute force RST attacks are possible.  To combat this,
+	 *   we use a much stricter check while in the ESTABLISHED state,
+	 *   only accepting RSTs where the sequence number is equal to
+	 *   last_ack_sent.  In all other states (the states in which a
+	 *   RST is more likely), the more permissive check is used.
+	 * If we have multiple segments in flight, the initial reset
+	 * segment sequence numbers will be to the left of last_ack_sent,
+	 * but they will eventually catch up.
+	 * In any case, it never made sense to trim reset segments to
+	 * fit the receive window since RFC 1122 says:
+	 *   4.2.2.12  RST Segment: RFC-793 Section 3.4
+	 *
+	 *    A TCP SHOULD allow a received RST segment to include data.
+	 *
+	 *    DISCUSSION
+	 *         It has been suggested that a RST segment could contain
+	 *         ASCII text that encoded and explained the cause of the
+	 *         RST.  No standard has yet been established for such
+	 *         data.
+	 *
+	 * If the reset segment passes the sequence number test examine
+	 * the state:
+	 *    SYN_RECEIVED STATE:
+	 *	If passive open, return to LISTEN state.
+	 *	If active open, inform user that connection was refused.
+	 *    ESTABLISHED, FIN_WAIT_1, FIN_WAIT_2, CLOSE_WAIT STATES:
+	 *	Inform user that connection was reset, and close tcb.
+	 *    CLOSING, LAST_ACK STATES:
+	 *	Close the tcb.
+	 *    TIME_WAIT STATE:
+	 *	Drop the segment - see Stevens, vol. 2, p. 964 and
+	 *      RFC 1337.
+	 */
+	if (thflags & TH_RST) {
+		if (SEQ_GEQ(th->th_seq, tp->last_ack_sent - 1) &&
+		    SEQ_LEQ(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) {
+			switch (tp->t_state) {
+
+			case TCPS_SYN_RECEIVED:
+				so->so_error = ECONNREFUSED;
+				goto close;
+
+			case TCPS_ESTABLISHED:
+				if (V_tcp_insecure_rst == 0 &&
+				    !(SEQ_GEQ(th->th_seq, tp->rcv_nxt - 1) &&
+				    SEQ_LEQ(th->th_seq, tp->rcv_nxt + 1)) &&
+				    !(SEQ_GEQ(th->th_seq, tp->last_ack_sent - 1) &&
+				    SEQ_LEQ(th->th_seq, tp->last_ack_sent + 1))) {
+					TCPSTAT_INC(tcps_badrst);
+					goto drop;
+				}
+				/* FALLTHROUGH */
+			case TCPS_FIN_WAIT_1:
+			case TCPS_FIN_WAIT_2:
+			case TCPS_CLOSE_WAIT:
+				so->so_error = ECONNRESET;
+			close:
+				KASSERT(ti_locked == TI_WLOCKED,
+				    ("tcp_do_segment: TH_RST 1 ti_locked %d",
+				    ti_locked));
+				INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+
+				tp->t_state = TCPS_CLOSED;
+				TCPSTAT_INC(tcps_drops);
+				tp = tcp_close(tp);
+				break;
+
+			case TCPS_CLOSING:
+			case TCPS_LAST_ACK:
+				KASSERT(ti_locked == TI_WLOCKED,
+				    ("tcp_do_segment: TH_RST 2 ti_locked %d",
+				    ti_locked));
+				INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+
+				tp = tcp_close(tp);
+				break;
+			}
+		}
+		goto drop;
+	}
+
+	/*
+	 * RFC 1323 PAWS: If we have a timestamp reply on this segment
+	 * and it's less than ts_recent, drop it.
+	 */
+	if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent &&
+	    TSTMP_LT(to.to_tsval, tp->ts_recent)) {
+
+		/* Check to see if ts_recent is over 24 days old.  */
+		if (ticks - tp->ts_recent_age > TCP_PAWS_IDLE) {
+			/*
+			 * Invalidate ts_recent.  If this segment updates
+			 * ts_recent, the age will be reset later and ts_recent
+			 * will get a valid value.  If it does not, setting
+			 * ts_recent to zero will at least satisfy the
+			 * requirement that zero be placed in the timestamp
+			 * echo reply when ts_recent isn't valid.  The
+			 * age isn't reset until we get a valid ts_recent
+			 * because we don't want out-of-order segments to be
+			 * dropped when ts_recent is old.
+			 */
+			tp->ts_recent = 0;
+		} else {
+			TCPSTAT_INC(tcps_rcvduppack);
+			TCPSTAT_ADD(tcps_rcvdupbyte, tlen);
+			TCPSTAT_INC(tcps_pawsdrop);
+			if (tlen)
+				goto dropafterack;
+			goto drop;
+		}
+	}
+
+	/*
+	 * In the SYN-RECEIVED state, validate that the packet belongs to
+	 * this connection before trimming the data to fit the receive
+	 * window.  Check the sequence number versus IRS since we know
+	 * the sequence numbers haven't wrapped.  This is a partial fix
+	 * for the "LAND" DoS attack.
+	 */
+	if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) {
+		rstreason = BANDLIM_RST_OPENPORT;
+		goto dropwithreset;
+	}
+
+	todrop = tp->rcv_nxt - th->th_seq;
+	if (todrop > 0) {
+		/*
+		 * If this is a duplicate SYN for our current connection,
+		 * advance over it and pretend and it's not a SYN.
+		 */
+		if (thflags & TH_SYN && th->th_seq == tp->irs) {
+			thflags &= ~TH_SYN;
+			th->th_seq++;
+			if (th->th_urp > 1)
+				th->th_urp--;
+			else
+				thflags &= ~TH_URG;
+			todrop--;
+		}
+		/*
+		 * Following if statement from Stevens, vol. 2, p. 960.
+		 */
+		if (todrop > tlen
+		    || (todrop == tlen && (thflags & TH_FIN) == 0)) {
+			/*
+			 * Any valid FIN must be to the left of the window.
+			 * At this point the FIN must be a duplicate or out
+			 * of sequence; drop it.
+			 */
+			thflags &= ~TH_FIN;
+
+			/*
+			 * Send an ACK to resynchronize and drop any data.
+			 * But keep on processing for RST or ACK.
+			 */
+			tp->t_flags |= TF_ACKNOW;
+			todrop = tlen;
+			TCPSTAT_INC(tcps_rcvduppack);
+			TCPSTAT_ADD(tcps_rcvdupbyte, todrop);
+		} else {
+			TCPSTAT_INC(tcps_rcvpartduppack);
+			TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop);
+		}
+		drop_hdrlen += todrop;	/* drop from the top afterwards */
+		th->th_seq += todrop;
+		tlen -= todrop;
+		if (th->th_urp > todrop)
+			th->th_urp -= todrop;
+		else {
+			thflags &= ~TH_URG;
+			th->th_urp = 0;
+		}
+	}
+
+	/*
+	 * If new data are received on a connection after the
+	 * user processes are gone, then RST the other end.
+	 */
+	if ((so->so_state & SS_NOFDREF) &&
+	    tp->t_state > TCPS_CLOSE_WAIT && tlen) {
+		char *s;
+
+		KASSERT(ti_locked == TI_WLOCKED, ("%s: SS_NOFDEREF && "
+		    "CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked));
+		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+
+		if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) {
+			log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data after socket "
+			    "was closed, sending RST and removing tcpcb\n",
+			    s, __func__, tcpstates[tp->t_state], tlen);
+			free(s, M_TCPLOG);
+		}
+		tp = tcp_close(tp);
+		TCPSTAT_INC(tcps_rcvafterclose);
+		rstreason = BANDLIM_UNLIMITED;
+		goto dropwithreset;
+	}
+
+	/*
+	 * If segment ends after window, drop trailing data
+	 * (and PUSH and FIN); if nothing left, just ACK.
+	 */
+	todrop = (th->th_seq + tlen) - (tp->rcv_nxt + tp->rcv_wnd);
+	if (todrop > 0) {
+		TCPSTAT_INC(tcps_rcvpackafterwin);
+		if (todrop >= tlen) {
+			TCPSTAT_ADD(tcps_rcvbyteafterwin, tlen);
+			/*
+			 * If window is closed can only take segments at
+			 * window edge, and have to drop data and PUSH from
+			 * incoming segments.  Continue processing, but
+			 * remember to ack.  Otherwise, drop segment
+			 * and ack.
+			 */
+			if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
+				tp->t_flags |= TF_ACKNOW;
+				TCPSTAT_INC(tcps_rcvwinprobe);
+			} else
+				goto dropafterack;
+		} else
+			TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop);
+		m_adj(m, -todrop);
+		tlen -= todrop;
+		thflags &= ~(TH_PUSH|TH_FIN);
+	}
+
+	/*
+	 * If last ACK falls within this segment's sequence numbers,
+	 * record its timestamp.
+	 * NOTE: 
+	 * 1) That the test incorporates suggestions from the latest
+	 *    proposal of the tcplw@cray.com list (Braden 1993/04/26).
+	 * 2) That updating only on newer timestamps interferes with
+	 *    our earlier PAWS tests, so this check should be solely
+	 *    predicated on the sequence space of this segment.
+	 * 3) That we modify the segment boundary check to be 
+	 *        Last.ACK.Sent <= SEG.SEQ + SEG.Len  
+	 *    instead of RFC1323's
+	 *        Last.ACK.Sent < SEG.SEQ + SEG.Len,
+	 *    This modified check allows us to overcome RFC1323's
+	 *    limitations as described in Stevens TCP/IP Illustrated
+	 *    Vol. 2 p.869. In such cases, we can still calculate the
+	 *    RTT correctly when RCV.NXT == Last.ACK.Sent.
+	 */
+	if ((to.to_flags & TOF_TS) != 0 &&
+	    SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
+	    SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
+		((thflags & (TH_SYN|TH_FIN)) != 0))) {
+		tp->ts_recent_age = ticks;
+		tp->ts_recent = to.to_tsval;
+	}
+
+	/*
+	 * If a SYN is in the window, then this is an
+	 * error and we send an RST and drop the connection.
+	 */
+	if (thflags & TH_SYN) {
+		KASSERT(ti_locked == TI_WLOCKED,
+		    ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked));
+		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+
+		tp = tcp_drop(tp, ECONNRESET);
+		rstreason = BANDLIM_UNLIMITED;
+		goto drop;
+	}
+
+	/*
+	 * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN
+	 * flag is on (half-synchronized state), then queue data for
+	 * later processing; else drop segment and return.
+	 */
+	if ((thflags & TH_ACK) == 0) {
+		if (tp->t_state == TCPS_SYN_RECEIVED ||
+		    (tp->t_flags & TF_NEEDSYN))
+			goto step6;
+		else if (tp->t_flags & TF_ACKNOW)
+			goto dropafterack;
+		else
+			goto drop;
+	}
+
+	/*
+	 * Ack processing.
+	 */
+	switch (tp->t_state) {
+
+	/*
+	 * In SYN_RECEIVED state, the ack ACKs our SYN, so enter
+	 * ESTABLISHED state and continue processing.
+	 * The ACK was checked above.
+	 */
+	case TCPS_SYN_RECEIVED:
+
+		TCPSTAT_INC(tcps_connects);
+		soisconnected(so);
+		/* Do window scaling? */
+		if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+			(TF_RCVD_SCALE|TF_REQ_SCALE)) {
+			tp->rcv_scale = tp->request_r_scale;
+			tp->snd_wnd = tiwin;
+		}
+		/*
+		 * Make transitions:
+		 *      SYN-RECEIVED  -> ESTABLISHED
+		 *      SYN-RECEIVED* -> FIN-WAIT-1
+		 */
+		tp->t_starttime = ticks;
+		if (tp->t_flags & TF_NEEDFIN) {
+			tp->t_state = TCPS_FIN_WAIT_1;
+			tp->t_flags &= ~TF_NEEDFIN;
+		} else {
+			tp->t_state = TCPS_ESTABLISHED;
+			tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
+		}
+		/*
+		 * If segment contains data or ACK, will call tcp_reass()
+		 * later; if not, do so now to pass queued data to user.
+		 */
+		if (tlen == 0 && (thflags & TH_FIN) == 0)
+			(void) tcp_reass(tp, (struct tcphdr *)0, 0,
+			    (struct mbuf *)0);
+		tp->snd_wl1 = th->th_seq - 1;
+		/* FALLTHROUGH */
+
+	/*
+	 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
+	 * ACKs.  If the ack is in the range
+	 *	tp->snd_una < th->th_ack <= tp->snd_max
+	 * then advance tp->snd_una to th->th_ack and drop
+	 * data from the retransmission queue.  If this ACK reflects
+	 * more up to date window information we update our window information.
+	 */
+	case TCPS_ESTABLISHED:
+	case TCPS_FIN_WAIT_1:
+	case TCPS_FIN_WAIT_2:
+	case TCPS_CLOSE_WAIT:
+	case TCPS_CLOSING:
+	case TCPS_LAST_ACK:
+		if (SEQ_GT(th->th_ack, tp->snd_max)) {
+			TCPSTAT_INC(tcps_rcvacktoomuch);
+			goto dropafterack;
+		}
+		if ((tp->t_flags & TF_SACK_PERMIT) &&
+		    ((to.to_flags & TOF_SACK) ||
+		     !TAILQ_EMPTY(&tp->snd_holes)))
+			tcp_sack_doack(tp, &to, th->th_ack);
+		if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
+			if (tlen == 0 && tiwin == tp->snd_wnd) {
+				TCPSTAT_INC(tcps_rcvdupack);
+				/*
+				 * If we have outstanding data (other than
+				 * a window probe), this is a completely
+				 * duplicate ack (ie, window info didn't
+				 * change), the ack is the biggest we've
+				 * seen and we've seen exactly our rexmt
+				 * threshhold of them, assume a packet
+				 * has been dropped and retransmit it.
+				 * Kludge snd_nxt & the congestion
+				 * window so we send only this one
+				 * packet.
+				 *
+				 * We know we're losing at the current
+				 * window size so do congestion avoidance
+				 * (set ssthresh to half the current window
+				 * and pull our congestion window back to
+				 * the new ssthresh).
+				 *
+				 * Dup acks mean that packets have left the
+				 * network (they're now cached at the receiver)
+				 * so bump cwnd by the amount in the receiver
+				 * to keep a constant cwnd packets in the
+				 * network.
+				 *
+				 * When using TCP ECN, notify the peer that
+				 * we reduced the cwnd.
+				 */
+				if (!tcp_timer_active(tp, TT_REXMT) ||
+				    th->th_ack != tp->snd_una)
+					tp->t_dupacks = 0;
+				else if (++tp->t_dupacks > tcprexmtthresh ||
+				    ((V_tcp_do_newreno ||
+				      (tp->t_flags & TF_SACK_PERMIT)) &&
+				     IN_FASTRECOVERY(tp))) {
+					if ((tp->t_flags & TF_SACK_PERMIT) &&
+					    IN_FASTRECOVERY(tp)) {
+						int awnd;
+						
+						/*
+						 * Compute the amount of data in flight first.
+						 * We can inject new data into the pipe iff 
+						 * we have less than 1/2 the original window's 	
+						 * worth of data in flight.
+						 */
+						awnd = (tp->snd_nxt - tp->snd_fack) +
+							tp->sackhint.sack_bytes_rexmit;
+						if (awnd < tp->snd_ssthresh) {
+							tp->snd_cwnd += tp->t_maxseg;
+							if (tp->snd_cwnd > tp->snd_ssthresh)
+								tp->snd_cwnd = tp->snd_ssthresh;
+						}
+					} else
+						tp->snd_cwnd += tp->t_maxseg;
+					(void) tcp_output(tp);
+					goto drop;
+				} else if (tp->t_dupacks == tcprexmtthresh) {
+					tcp_seq onxt = tp->snd_nxt;
+
+					/*
+					 * If we're doing sack, check to
+					 * see if we're already in sack
+					 * recovery. If we're not doing sack,
+					 * check to see if we're in newreno
+					 * recovery.
+					 */
+					if (tp->t_flags & TF_SACK_PERMIT) {
+						if (IN_FASTRECOVERY(tp)) {
+							tp->t_dupacks = 0;
+							break;
+						}
+					} else if (V_tcp_do_newreno ||
+					    V_tcp_do_ecn) {
+						if (SEQ_LEQ(th->th_ack,
+						    tp->snd_recover)) {
+							tp->t_dupacks = 0;
+							break;
+						}
+					}
+					tcp_congestion_exp(tp);
+					tcp_timer_activate(tp, TT_REXMT, 0);
+					tp->t_rtttime = 0;
+					if (tp->t_flags & TF_SACK_PERMIT) {
+						TCPSTAT_INC(
+						    tcps_sack_recovery_episode);
+						tp->sack_newdata = tp->snd_nxt;
+						tp->snd_cwnd = tp->t_maxseg;
+						(void) tcp_output(tp);
+						goto drop;
+					}
+					tp->snd_nxt = th->th_ack;
+					tp->snd_cwnd = tp->t_maxseg;
+					(void) tcp_output(tp);
+					KASSERT(tp->snd_limited <= 2,
+					    ("%s: tp->snd_limited too big",
+					    __func__));
+					tp->snd_cwnd = tp->snd_ssthresh +
+					     tp->t_maxseg *
+					     (tp->t_dupacks - tp->snd_limited);
+					if (SEQ_GT(onxt, tp->snd_nxt))
+						tp->snd_nxt = onxt;
+					goto drop;
+				} else if (V_tcp_do_rfc3042) {
+					u_long oldcwnd = tp->snd_cwnd;
+					tcp_seq oldsndmax = tp->snd_max;
+					u_int sent;
+
+					KASSERT(tp->t_dupacks == 1 ||
+					    tp->t_dupacks == 2,
+					    ("%s: dupacks not 1 or 2",
+					    __func__));
+					if (tp->t_dupacks == 1)
+						tp->snd_limited = 0;
+					tp->snd_cwnd =
+					    (tp->snd_nxt - tp->snd_una) +
+					    (tp->t_dupacks - tp->snd_limited) *
+					    tp->t_maxseg;
+					(void) tcp_output(tp);
+					sent = tp->snd_max - oldsndmax;
+					if (sent > tp->t_maxseg) {
+						KASSERT((tp->t_dupacks == 2 &&
+						    tp->snd_limited == 0) ||
+						   (sent == tp->t_maxseg + 1 &&
+						    tp->t_flags & TF_SENTFIN),
+						    ("%s: sent too much",
+						    __func__));
+						tp->snd_limited = 2;
+					} else if (sent > 0)
+						++tp->snd_limited;
+					tp->snd_cwnd = oldcwnd;
+					goto drop;
+				}
+			} else
+				tp->t_dupacks = 0;
+			break;
+		}
+
+		KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
+		    ("%s: th_ack <= snd_una", __func__));
+
+		/*
+		 * If the congestion window was inflated to account
+		 * for the other side's cached packets, retract it.
+		 */
+		if (V_tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) {
+			if (IN_FASTRECOVERY(tp)) {
+				if (SEQ_LT(th->th_ack, tp->snd_recover)) {
+					if (tp->t_flags & TF_SACK_PERMIT)
+						tcp_sack_partialack(tp, th);
+					else
+						tcp_newreno_partial_ack(tp, th);
+				} else {
+					/*
+					 * Out of fast recovery.
+					 * Window inflation should have left us
+					 * with approximately snd_ssthresh
+					 * outstanding data.
+					 * But in case we would be inclined to
+					 * send a burst, better to do it via
+					 * the slow start mechanism.
+					 */
+					if (SEQ_GT(th->th_ack +
+							tp->snd_ssthresh,
+						   tp->snd_max))
+						tp->snd_cwnd = tp->snd_max -
+								th->th_ack +
+								tp->t_maxseg;
+					else
+						tp->snd_cwnd = tp->snd_ssthresh;
+				}
+			}
+		} else {
+			if (tp->t_dupacks >= tcprexmtthresh &&
+			    tp->snd_cwnd > tp->snd_ssthresh)
+				tp->snd_cwnd = tp->snd_ssthresh;
+		}
+		tp->t_dupacks = 0;
+		/*
+		 * If we reach this point, ACK is not a duplicate,
+		 *     i.e., it ACKs something we sent.
+		 */
+		if (tp->t_flags & TF_NEEDSYN) {
+			/*
+			 * T/TCP: Connection was half-synchronized, and our
+			 * SYN has been ACK'd (so connection is now fully
+			 * synchronized).  Go to non-starred state,
+			 * increment snd_una for ACK of SYN, and check if
+			 * we can do window scaling.
+			 */
+			tp->t_flags &= ~TF_NEEDSYN;
+			tp->snd_una++;
+			/* Do window scaling? */
+			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
+				tp->rcv_scale = tp->request_r_scale;
+				/* Send window already scaled. */
+			}
+		}
+
+process_ACK:
+		INP_INFO_LOCK_ASSERT(&V_tcbinfo);
+		KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
+		    ("tcp_input: process_ACK ti_locked %d", ti_locked));
+		INP_WLOCK_ASSERT(tp->t_inpcb);
+
+		acked = th->th_ack - tp->snd_una;
+		TCPSTAT_INC(tcps_rcvackpack);
+		TCPSTAT_ADD(tcps_rcvackbyte, acked);
+
+		/*
+		 * If we just performed our first retransmit, and the ACK
+		 * arrives within our recovery window, then it was a mistake
+		 * to do the retransmit in the first place.  Recover our
+		 * original cwnd and ssthresh, and proceed to transmit where
+		 * we left off.
+		 */
+		if (tp->t_rxtshift == 1 && (int)(ticks - tp->t_badrxtwin) < 0) {
+			TCPSTAT_INC(tcps_sndrexmitbad);
+			tp->snd_cwnd = tp->snd_cwnd_prev;
+			tp->snd_ssthresh = tp->snd_ssthresh_prev;
+			tp->snd_recover = tp->snd_recover_prev;
+			if (tp->t_flags & TF_WASFRECOVERY)
+				ENTER_FASTRECOVERY(tp);
+			tp->snd_nxt = tp->snd_max;
+			tp->t_badrxtwin = 0;	/* XXX probably not required */
+		}
+
+		/*
+		 * If we have a timestamp reply, update smoothed
+		 * round trip time.  If no timestamp is present but
+		 * transmit timer is running and timed sequence
+		 * number was acked, update smoothed round trip time.
+		 * Since we now have an rtt measurement, cancel the
+		 * timer backoff (cf., Phil Karn's retransmit alg.).
+		 * Recompute the initial retransmit timer.
+		 *
+		 * Some boxes send broken timestamp replies
+		 * during the SYN+ACK phase, ignore
+		 * timestamps of 0 or we could calculate a
+		 * huge RTT and blow up the retransmit timer.
+		 */
+		if ((to.to_flags & TOF_TS) != 0 &&
+		    to.to_tsecr) {
+			if (!tp->t_rttlow || tp->t_rttlow > ticks - to.to_tsecr)
+				tp->t_rttlow = ticks - to.to_tsecr;
+			tcp_xmit_timer(tp, ticks - to.to_tsecr + 1);
+		} else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) {
+			if (!tp->t_rttlow || tp->t_rttlow > ticks - tp->t_rtttime)
+				tp->t_rttlow = ticks - tp->t_rtttime;
+			tcp_xmit_timer(tp, ticks - tp->t_rtttime);
+		}
+		tcp_xmit_bandwidth_limit(tp, th->th_ack);
+
+		/*
+		 * If all outstanding data is acked, stop retransmit
+		 * timer and remember to restart (more output or persist).
+		 * If there is more data to be acked, restart retransmit
+		 * timer, using current (possibly backed-off) value.
+		 */
+		if (th->th_ack == tp->snd_max) {
+			tcp_timer_activate(tp, TT_REXMT, 0);
+			needoutput = 1;
+		} else if (!tcp_timer_active(tp, TT_PERSIST))
+			tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
+
+		/*
+		 * If no data (only SYN) was ACK'd,
+		 *    skip rest of ACK processing.
+		 */
+		if (acked == 0)
+			goto step6;
+
+		/*
+		 * When new data is acked, open the congestion window.
+		 * Method depends on which congestion control state we're
+		 * in (slow start or cong avoid) and if ABC (RFC 3465) is
+		 * enabled.
+		 *
+		 * slow start: cwnd <= ssthresh
+		 * cong avoid: cwnd > ssthresh
+		 *
+		 * slow start and ABC (RFC 3465):
+		 *   Grow cwnd exponentially by the amount of data
+		 *   ACKed capping the max increment per ACK to
+		 *   (abc_l_var * maxseg) bytes.
+		 *
+		 * slow start without ABC (RFC 2581):
+		 *   Grow cwnd exponentially by maxseg per ACK.
+		 *
+		 * cong avoid and ABC (RFC 3465):
+		 *   Grow cwnd linearly by maxseg per RTT for each
+		 *   cwnd worth of ACKed data.
+		 *
+		 * cong avoid without ABC (RFC 2581):
+		 *   Grow cwnd linearly by approximately maxseg per RTT using
+		 *   maxseg^2 / cwnd per ACK as the increment.
+		 *   If cwnd > maxseg^2, fix the cwnd increment at 1 byte to
+		 *   avoid capping cwnd.
+		 */
+		if ((!V_tcp_do_newreno && !(tp->t_flags & TF_SACK_PERMIT)) ||
+		    !IN_FASTRECOVERY(tp)) {
+			u_int cw = tp->snd_cwnd;
+			u_int incr = tp->t_maxseg;
+			/* In congestion avoidance? */
+			if (cw > tp->snd_ssthresh) {
+				if (V_tcp_do_rfc3465) {
+					tp->t_bytes_acked += acked;
+					if (tp->t_bytes_acked >= tp->snd_cwnd)
+						tp->t_bytes_acked -= cw;
+					else
+						incr = 0;
+				}
+				else
+					incr = max((incr * incr / cw), 1);
+			/*
+			 * In slow-start with ABC enabled and no RTO in sight?
+			 * (Must not use abc_l_var > 1 if slow starting after an
+			 * RTO. On RTO, snd_nxt = snd_una, so the snd_nxt ==
+			 * snd_max check is sufficient to handle this).
+			 */
+			} else if (V_tcp_do_rfc3465 &&
+			    tp->snd_nxt == tp->snd_max)
+				incr = min(acked,
+				    V_tcp_abc_l_var * tp->t_maxseg);
+			/* ABC is on by default, so (incr == 0) frequently. */
+			if (incr > 0)
+				tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<<tp->snd_scale);
+		}
+		SOCKBUF_LOCK(&so->so_snd);
+		if (acked > so->so_snd.sb_cc) {
+			tp->snd_wnd -= so->so_snd.sb_cc;
+			sbdrop_locked(&so->so_snd, (int)so->so_snd.sb_cc);
+			ourfinisacked = 1;
+		} else {
+			sbdrop_locked(&so->so_snd, acked);
+			tp->snd_wnd -= acked;
+			ourfinisacked = 0;
+		}
+		/* NB: sowwakeup_locked() does an implicit unlock. */
+		sowwakeup_locked(so);
+		/* Detect una wraparound. */
+		if ((V_tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) &&
+		    !IN_FASTRECOVERY(tp) &&
+		    SEQ_GT(tp->snd_una, tp->snd_recover) &&
+		    SEQ_LEQ(th->th_ack, tp->snd_recover))
+			tp->snd_recover = th->th_ack - 1;
+		if ((V_tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) &&
+		    IN_FASTRECOVERY(tp) &&
+		    SEQ_GEQ(th->th_ack, tp->snd_recover)) {
+			EXIT_FASTRECOVERY(tp);
+			tp->t_bytes_acked = 0;
+		}
+		tp->snd_una = th->th_ack;
+		if (tp->t_flags & TF_SACK_PERMIT) {
+			if (SEQ_GT(tp->snd_una, tp->snd_recover))
+				tp->snd_recover = tp->snd_una;
+		}
+		if (SEQ_LT(tp->snd_nxt, tp->snd_una))
+			tp->snd_nxt = tp->snd_una;
+
+		switch (tp->t_state) {
+
+		/*
+		 * In FIN_WAIT_1 STATE in addition to the processing
+		 * for the ESTABLISHED state if our FIN is now acknowledged
+		 * then enter FIN_WAIT_2.
+		 */
+		case TCPS_FIN_WAIT_1:
+			if (ourfinisacked) {
+				/*
+				 * If we can't receive any more
+				 * data, then closing user can proceed.
+				 * Starting the timer is contrary to the
+				 * specification, but if we don't get a FIN
+				 * we'll hang forever.
+				 *
+				 * XXXjl:
+				 * we should release the tp also, and use a
+				 * compressed state.
+				 */
+				if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
+					int timeout;
+
+					soisdisconnected(so);
+					timeout = (tcp_fast_finwait2_recycle) ? 
+						tcp_finwait2_timeout : tcp_maxidle;
+					tcp_timer_activate(tp, TT_2MSL, timeout);
+				}
+				tp->t_state = TCPS_FIN_WAIT_2;
+			}
+			break;
+
+		/*
+		 * In CLOSING STATE in addition to the processing for
+		 * the ESTABLISHED state if the ACK acknowledges our FIN
+		 * then enter the TIME-WAIT state, otherwise ignore
+		 * the segment.
+		 */
+		case TCPS_CLOSING:
+			if (ourfinisacked) {
+				INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+				tcp_twstart(tp);
+				INP_INFO_WUNLOCK(&V_tcbinfo);
+				m_freem(m);
+				return;
+			}
+			break;
+
+		/*
+		 * In LAST_ACK, we may still be waiting for data to drain
+		 * and/or to be acked, as well as for the ack of our FIN.
+		 * If our FIN is now acknowledged, delete the TCB,
+		 * enter the closed state and return.
+		 */
+		case TCPS_LAST_ACK:
+			if (ourfinisacked) {
+				INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+				tp = tcp_close(tp);
+				goto drop;
+			}
+			break;
+		}
+	}
+
+step6:
+	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
+	KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
+	    ("tcp_do_segment: step6 ti_locked %d", ti_locked));
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	/*
+	 * Update window information.
+	 * Don't look at window if no ACK: TAC's send garbage on first SYN.
+	 */
+	if ((thflags & TH_ACK) &&
+	    (SEQ_LT(tp->snd_wl1, th->th_seq) ||
+	    (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) ||
+	     (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) {
+		/* keep track of pure window updates */
+		if (tlen == 0 &&
+		    tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd)
+			TCPSTAT_INC(tcps_rcvwinupd);
+		tp->snd_wnd = tiwin;
+		tp->snd_wl1 = th->th_seq;
+		tp->snd_wl2 = th->th_ack;
+		if (tp->snd_wnd > tp->max_sndwnd)
+			tp->max_sndwnd = tp->snd_wnd;
+		needoutput = 1;
+	}
+
+	/*
+	 * Process segments with URG.
+	 */
+	if ((thflags & TH_URG) && th->th_urp &&
+	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+		/*
+		 * This is a kludge, but if we receive and accept
+		 * random urgent pointers, we'll crash in
+		 * soreceive.  It's hard to imagine someone
+		 * actually wanting to send this much urgent data.
+		 */
+		SOCKBUF_LOCK(&so->so_rcv);
+		if (th->th_urp + so->so_rcv.sb_cc > sb_max) {
+			th->th_urp = 0;			/* XXX */
+			thflags &= ~TH_URG;		/* XXX */
+			SOCKBUF_UNLOCK(&so->so_rcv);	/* XXX */
+			goto dodata;			/* XXX */
+		}
+		/*
+		 * If this segment advances the known urgent pointer,
+		 * then mark the data stream.  This should not happen
+		 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
+		 * a FIN has been received from the remote side.
+		 * In these states we ignore the URG.
+		 *
+		 * According to RFC961 (Assigned Protocols),
+		 * the urgent pointer points to the last octet
+		 * of urgent data.  We continue, however,
+		 * to consider it to indicate the first octet
+		 * of data past the urgent section as the original
+		 * spec states (in one of two places).
+		 */
+		if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) {
+			tp->rcv_up = th->th_seq + th->th_urp;
+			so->so_oobmark = so->so_rcv.sb_cc +
+			    (tp->rcv_up - tp->rcv_nxt) - 1;
+			if (so->so_oobmark == 0)
+				so->so_rcv.sb_state |= SBS_RCVATMARK;
+			sohasoutofband(so);
+			tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
+		}
+		SOCKBUF_UNLOCK(&so->so_rcv);
+		/*
+		 * Remove out of band data so doesn't get presented to user.
+		 * This can happen independent of advancing the URG pointer,
+		 * but if two URG's are pending at once, some out-of-band
+		 * data may creep in... ick.
+		 */
+		if (th->th_urp <= (u_long)tlen &&
+		    !(so->so_options & SO_OOBINLINE)) {
+			/* hdr drop is delayed */
+			tcp_pulloutofband(so, th, m, drop_hdrlen);
+		}
+	} else {
+		/*
+		 * If no out of band data is expected,
+		 * pull receive urgent pointer along
+		 * with the receive window.
+		 */
+		if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
+			tp->rcv_up = tp->rcv_nxt;
+	}
+dodata:							/* XXX */
+	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
+	KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
+	    ("tcp_do_segment: dodata ti_locked %d", ti_locked));
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	/*
+	 * Process the segment text, merging it into the TCP sequencing queue,
+	 * and arranging for acknowledgment of receipt if necessary.
+	 * This process logically involves adjusting tp->rcv_wnd as data
+	 * is presented to the user (this happens in tcp_usrreq.c,
+	 * case PRU_RCVD).  If a FIN has already been received on this
+	 * connection then we just ignore the text.
+	 */
+	if ((tlen || (thflags & TH_FIN)) &&
+	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+		tcp_seq save_start = th->th_seq;
+		m_adj(m, drop_hdrlen);	/* delayed header drop */
+		/*
+		 * Insert segment which includes th into TCP reassembly queue
+		 * with control block tp.  Set thflags to whether reassembly now
+		 * includes a segment with FIN.  This handles the common case
+		 * inline (segment is the next to be received on an established
+		 * connection, and the queue is empty), avoiding linkage into
+		 * and removal from the queue and repetition of various
+		 * conversions.
+		 * Set DELACK for segments received in order, but ack
+		 * immediately when segments are out of order (so
+		 * fast retransmit can work).
+		 */
+		if (th->th_seq == tp->rcv_nxt &&
+		    LIST_EMPTY(&tp->t_segq) &&
+		    TCPS_HAVEESTABLISHED(tp->t_state)) {
+			if (DELAY_ACK(tp))
+				tp->t_flags |= TF_DELACK;
+			else
+				tp->t_flags |= TF_ACKNOW;
+			tp->rcv_nxt += tlen;
+			thflags = th->th_flags & TH_FIN;
+			TCPSTAT_INC(tcps_rcvpack);
+			TCPSTAT_ADD(tcps_rcvbyte, tlen);
+			ND6_HINT(tp);
+			SOCKBUF_LOCK(&so->so_rcv);
+			if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
+				m_freem(m);
+			else
+				sbappendstream_locked(&so->so_rcv, m);
+			/* NB: sorwakeup_locked() does an implicit unlock. */
+			sorwakeup_locked(so);
+		} else {
+			/*
+			 * XXX: Due to the header drop above "th" is
+			 * theoretically invalid by now.  Fortunately
+			 * m_adj() doesn't actually frees any mbufs
+			 * when trimming from the head.
+			 */
+			thflags = tcp_reass(tp, th, &tlen, m);
+			tp->t_flags |= TF_ACKNOW;
+		}
+		if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT))
+			tcp_update_sack_list(tp, save_start, save_start + tlen);
+#if 0
+		/*
+		 * Note the amount of data that peer has sent into
+		 * our window, in order to estimate the sender's
+		 * buffer size.
+		 * XXX: Unused.
+		 */
+		len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
+#endif
+	} else {
+		m_freem(m);
+		thflags &= ~TH_FIN;
+	}
+
+	/*
+	 * If FIN is received ACK the FIN and let the user know
+	 * that the connection is closing.
+	 */
+	if (thflags & TH_FIN) {
+		if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+			socantrcvmore(so);
+			/*
+			 * If connection is half-synchronized
+			 * (ie NEEDSYN flag on) then delay ACK,
+			 * so it may be piggybacked when SYN is sent.
+			 * Otherwise, since we received a FIN then no
+			 * more input can be expected, send ACK now.
+			 */
+			if (tp->t_flags & TF_NEEDSYN)
+				tp->t_flags |= TF_DELACK;
+			else
+				tp->t_flags |= TF_ACKNOW;
+			tp->rcv_nxt++;
+		}
+		switch (tp->t_state) {
+
+		/*
+		 * In SYN_RECEIVED and ESTABLISHED STATES
+		 * enter the CLOSE_WAIT state.
+		 */
+		case TCPS_SYN_RECEIVED:
+			tp->t_starttime = ticks;
+			/* FALLTHROUGH */
+		case TCPS_ESTABLISHED:
+			tp->t_state = TCPS_CLOSE_WAIT;
+			break;
+
+		/*
+		 * If still in FIN_WAIT_1 STATE FIN has not been acked so
+		 * enter the CLOSING state.
+		 */
+		case TCPS_FIN_WAIT_1:
+			tp->t_state = TCPS_CLOSING;
+			break;
+
+		/*
+		 * In FIN_WAIT_2 state enter the TIME_WAIT state,
+		 * starting the time-wait timer, turning off the other
+		 * standard timers.
+		 */
+		case TCPS_FIN_WAIT_2:
+			INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+			KASSERT(ti_locked == TI_WLOCKED, ("%s: dodata "
+			    "TCP_FIN_WAIT_2 ti_locked: %d", __func__,
+			    ti_locked));
+
+			tcp_twstart(tp);
+			INP_INFO_WUNLOCK(&V_tcbinfo);
+			return;
+		}
+	}
+	if (ti_locked == TI_RLOCKED)
+		INP_INFO_RUNLOCK(&V_tcbinfo);
+	else if (ti_locked == TI_WLOCKED)
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+	else
+		panic("%s: dodata epilogue ti_locked %d", __func__,
+		    ti_locked);
+	ti_locked = TI_UNLOCKED;
+
+#ifdef TCPDEBUG
+	if (so->so_options & SO_DEBUG)
+		tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen,
+			  &tcp_savetcp, 0);
+#endif
+
+	/*
+	 * Return any desired output.
+	 */
+	if (needoutput || (tp->t_flags & TF_ACKNOW))
+		(void) tcp_output(tp);
+
+check_delack:
+	KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
+	    __func__, ti_locked));
+	INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	if (tp->t_flags & TF_DELACK) {
+		tp->t_flags &= ~TF_DELACK;
+		tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
+	}
+	INP_WUNLOCK(tp->t_inpcb);
+	return;
+
+dropafterack:
+	KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
+	    ("tcp_do_segment: dropafterack ti_locked %d", ti_locked));
+
+	/*
+	 * Generate an ACK dropping incoming segment if it occupies
+	 * sequence space, where the ACK reflects our state.
+	 *
+	 * We can now skip the test for the RST flag since all
+	 * paths to this code happen after packets containing
+	 * RST have been dropped.
+	 *
+	 * In the SYN-RECEIVED state, don't send an ACK unless the
+	 * segment we received passes the SYN-RECEIVED ACK test.
+	 * If it fails send a RST.  This breaks the loop in the
+	 * "LAND" DoS attack, and also prevents an ACK storm
+	 * between two listening ports that have been sent forged
+	 * SYN segments, each with the source address of the other.
+	 */
+	if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) &&
+	    (SEQ_GT(tp->snd_una, th->th_ack) ||
+	     SEQ_GT(th->th_ack, tp->snd_max)) ) {
+		rstreason = BANDLIM_RST_OPENPORT;
+		goto dropwithreset;
+	}
+#ifdef TCPDEBUG
+	if (so->so_options & SO_DEBUG)
+		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
+			  &tcp_savetcp, 0);
+#endif
+	if (ti_locked == TI_RLOCKED)
+		INP_INFO_RUNLOCK(&V_tcbinfo);
+	else if (ti_locked == TI_WLOCKED)
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+	else
+		panic("%s: dropafterack epilogue ti_locked %d", __func__,
+		    ti_locked);
+	ti_locked = TI_UNLOCKED;
+
+	tp->t_flags |= TF_ACKNOW;
+	(void) tcp_output(tp);
+	INP_WUNLOCK(tp->t_inpcb);
+	m_freem(m);
+	return;
+
+dropwithreset:
+	if (ti_locked == TI_RLOCKED)
+		INP_INFO_RUNLOCK(&V_tcbinfo);
+	else if (ti_locked == TI_WLOCKED)
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+	else
+		panic("%s: dropwithreset ti_locked %d", __func__, ti_locked);
+	ti_locked = TI_UNLOCKED;
+
+	if (tp != NULL) {
+		tcp_dropwithreset(m, th, tp, tlen, rstreason);
+		INP_WUNLOCK(tp->t_inpcb);
+	} else
+		tcp_dropwithreset(m, th, NULL, tlen, rstreason);
+	return;
+
+drop:
+	if (ti_locked == TI_RLOCKED)
+		INP_INFO_RUNLOCK(&V_tcbinfo);
+	else if (ti_locked == TI_WLOCKED)
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+#ifdef INVARIANTS
+	else
+		INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+#endif
+	ti_locked = TI_UNLOCKED;
+
+	/*
+	 * Drop space held by incoming segment and return.
+	 */
+#ifdef TCPDEBUG
+	if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
+			  &tcp_savetcp, 0);
+#endif
+	if (tp != NULL)
+		INP_WUNLOCK(tp->t_inpcb);
+	m_freem(m);
+}
+
+/*
+ * Issue RST and make ACK acceptable to originator of segment.
+ * The mbuf must still include the original packet header.
+ * tp may be NULL.
+ */
+static void
+tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp,
+    int tlen, int rstreason)
+{
+	struct ip *ip;
+#ifdef INET6
+	struct ip6_hdr *ip6;
+#endif
+
+	if (tp != NULL) {
+		INP_WLOCK_ASSERT(tp->t_inpcb);
+	}
+
+	/* Don't bother if destination was broadcast/multicast. */
+	if ((th->th_flags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST))
+		goto drop;
+#ifdef INET6
+	if (mtod(m, struct ip *)->ip_v == 6) {
+		ip6 = mtod(m, struct ip6_hdr *);
+		if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
+		    IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
+			goto drop;
+		/* IPv6 anycast check is done at tcp6_input() */
+	} else
+#endif
+	{
+		ip = mtod(m, struct ip *);
+		if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
+		    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
+		    ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
+		    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
+			goto drop;
+	}
+
+	/* Perform bandwidth limiting. */
+	if (badport_bandlim(rstreason) < 0)
+		goto drop;
+
+	/* tcp_respond consumes the mbuf chain. */
+	if (th->th_flags & TH_ACK) {
+		tcp_respond(tp, mtod(m, void *), th, m, (tcp_seq)0,
+		    th->th_ack, TH_RST);
+	} else {
+		if (th->th_flags & TH_SYN)
+			tlen++;
+		tcp_respond(tp, mtod(m, void *), th, m, th->th_seq+tlen,
+		    (tcp_seq)0, TH_RST|TH_ACK);
+	}
+	return;
+drop:
+	m_freem(m);
+}
+
+/*
+ * Parse TCP options and place in tcpopt.
+ */
+static void
+tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, int flags)
+{
+	int opt, optlen;
+
+	to->to_flags = 0;
+	for (; cnt > 0; cnt -= optlen, cp += optlen) {
+		opt = cp[0];
+		if (opt == TCPOPT_EOL)
+			break;
+		if (opt == TCPOPT_NOP)
+			optlen = 1;
+		else {
+			if (cnt < 2)
+				break;
+			optlen = cp[1];
+			if (optlen < 2 || optlen > cnt)
+				break;
+		}
+		switch (opt) {
+		case TCPOPT_MAXSEG:
+			if (optlen != TCPOLEN_MAXSEG)
+				continue;
+			if (!(flags & TO_SYN))
+				continue;
+			to->to_flags |= TOF_MSS;
+			bcopy((char *)cp + 2,
+			    (char *)&to->to_mss, sizeof(to->to_mss));
+			to->to_mss = ntohs(to->to_mss);
+			break;
+		case TCPOPT_WINDOW:
+			if (optlen != TCPOLEN_WINDOW)
+				continue;
+			if (!(flags & TO_SYN))
+				continue;
+			to->to_flags |= TOF_SCALE;
+			to->to_wscale = min(cp[2], TCP_MAX_WINSHIFT);
+			break;
+		case TCPOPT_TIMESTAMP:
+			if (optlen != TCPOLEN_TIMESTAMP)
+				continue;
+			to->to_flags |= TOF_TS;
+			bcopy((char *)cp + 2,
+			    (char *)&to->to_tsval, sizeof(to->to_tsval));
+			to->to_tsval = ntohl(to->to_tsval);
+			bcopy((char *)cp + 6,
+			    (char *)&to->to_tsecr, sizeof(to->to_tsecr));
+			to->to_tsecr = ntohl(to->to_tsecr);
+			break;
+#ifdef TCP_SIGNATURE
+		/*
+		 * XXX In order to reply to a host which has set the
+		 * TCP_SIGNATURE option in its initial SYN, we have to
+		 * record the fact that the option was observed here
+		 * for the syncache code to perform the correct response.
+		 */
+		case TCPOPT_SIGNATURE:
+			if (optlen != TCPOLEN_SIGNATURE)
+				continue;
+			to->to_flags |= TOF_SIGNATURE;
+			to->to_signature = cp + 2;
+			break;
+#endif
+		case TCPOPT_SACK_PERMITTED:
+			if (optlen != TCPOLEN_SACK_PERMITTED)
+				continue;
+			if (!(flags & TO_SYN))
+				continue;
+			if (!V_tcp_do_sack)
+				continue;
+			to->to_flags |= TOF_SACKPERM;
+			break;
+		case TCPOPT_SACK:
+			if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0)
+				continue;
+			if (flags & TO_SYN)
+				continue;
+			to->to_flags |= TOF_SACK;
+			to->to_nsacks = (optlen - 2) / TCPOLEN_SACK;
+			to->to_sacks = cp + 2;
+			TCPSTAT_INC(tcps_sack_rcv_blocks);
+			break;
+		default:
+			continue;
+		}
+	}
+}
+
+/*
+ * Pull out of band byte out of a segment so
+ * it doesn't appear in the user's data queue.
+ * It is still reflected in the segment length for
+ * sequencing purposes.
+ */
+static void
+tcp_pulloutofband(struct socket *so, struct tcphdr *th, struct mbuf *m,
+    int off)
+{
+	int cnt = off + th->th_urp - 1;
+
+	while (cnt >= 0) {
+		if (m->m_len > cnt) {
+			char *cp = mtod(m, caddr_t) + cnt;
+			struct tcpcb *tp = sototcpcb(so);
+
+			INP_WLOCK_ASSERT(tp->t_inpcb);
+
+			tp->t_iobc = *cp;
+			tp->t_oobflags |= TCPOOB_HAVEDATA;
+			bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1));
+			m->m_len--;
+			if (m->m_flags & M_PKTHDR)
+				m->m_pkthdr.len--;
+			return;
+		}
+		cnt -= m->m_len;
+		m = m->m_next;
+		if (m == NULL)
+			break;
+	}
+	panic("tcp_pulloutofband");
+}
+
+/*
+ * Collect new round-trip time estimate
+ * and update averages and current timeout.
+ */
+static void
+tcp_xmit_timer(struct tcpcb *tp, int rtt)
+{
+	int delta;
+
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	TCPSTAT_INC(tcps_rttupdated);
+	tp->t_rttupdated++;
+	if (tp->t_srtt != 0) {
+		/*
+		 * srtt is stored as fixed point with 5 bits after the
+		 * binary point (i.e., scaled by 8).  The following magic
+		 * is equivalent to the smoothing algorithm in rfc793 with
+		 * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
+		 * point).  Adjust rtt to origin 0.
+		 */
+		delta = ((rtt - 1) << TCP_DELTA_SHIFT)
+			- (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT));
+
+		if ((tp->t_srtt += delta) <= 0)
+			tp->t_srtt = 1;
+
+		/*
+		 * We accumulate a smoothed rtt variance (actually, a
+		 * smoothed mean difference), then set the retransmit
+		 * timer to smoothed rtt + 4 times the smoothed variance.
+		 * rttvar is stored as fixed point with 4 bits after the
+		 * binary point (scaled by 16).  The following is
+		 * equivalent to rfc793 smoothing with an alpha of .75
+		 * (rttvar = rttvar*3/4 + |delta| / 4).  This replaces
+		 * rfc793's wired-in beta.
+		 */
+		if (delta < 0)
+			delta = -delta;
+		delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT);
+		if ((tp->t_rttvar += delta) <= 0)
+			tp->t_rttvar = 1;
+		if (tp->t_rttbest > tp->t_srtt + tp->t_rttvar)
+		    tp->t_rttbest = tp->t_srtt + tp->t_rttvar;
+	} else {
+		/*
+		 * No rtt measurement yet - use the unsmoothed rtt.
+		 * Set the variance to half the rtt (so our first
+		 * retransmit happens at 3*rtt).
+		 */
+		tp->t_srtt = rtt << TCP_RTT_SHIFT;
+		tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
+		tp->t_rttbest = tp->t_srtt + tp->t_rttvar;
+	}
+	tp->t_rtttime = 0;
+	tp->t_rxtshift = 0;
+
+	/*
+	 * the retransmit should happen at rtt + 4 * rttvar.
+	 * Because of the way we do the smoothing, srtt and rttvar
+	 * will each average +1/2 tick of bias.  When we compute
+	 * the retransmit timer, we want 1/2 tick of rounding and
+	 * 1 extra tick because of +-1/2 tick uncertainty in the
+	 * firing of the timer.  The bias will give us exactly the
+	 * 1.5 tick we need.  But, because the bias is
+	 * statistical, we have to test that we don't drop below
+	 * the minimum feasible timer (which is 2 ticks).
+	 */
+	TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
+		      max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX);
+
+	/*
+	 * We received an ack for a packet that wasn't retransmitted;
+	 * it is probably safe to discard any error indications we've
+	 * received recently.  This isn't quite right, but close enough
+	 * for now (a route might have failed after we sent a segment,
+	 * and the return path might not be symmetrical).
+	 */
+	tp->t_softerror = 0;
+}
+
+/*
+ * Determine a reasonable value for maxseg size.
+ * If the route is known, check route for mtu.
+ * If none, use an mss that can be handled on the outgoing
+ * interface without forcing IP to fragment; if bigger than
+ * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
+ * to utilize large mbufs.  If no route is found, route has no mtu,
+ * or the destination isn't local, use a default, hopefully conservative
+ * size (usually 512 or the default IP max size, but no more than the mtu
+ * of the interface), as we can't discover anything about intervening
+ * gateways or networks.  We also initialize the congestion/slow start
+ * window to be a single segment if the destination isn't local.
+ * While looking at the routing entry, we also initialize other path-dependent
+ * parameters from pre-set or cached values in the routing entry.
+ *
+ * Also take into account the space needed for options that we
+ * send regularly.  Make maxseg shorter by that amount to assure
+ * that we can send maxseg amount of data even when the options
+ * are present.  Store the upper limit of the length of options plus
+ * data in maxopd.
+ *
+ * In case of T/TCP, we call this routine during implicit connection
+ * setup as well (offer = -1), to initialize maxseg from the cached
+ * MSS of our peer.
+ *
+ * NOTE that this routine is only called when we process an incoming
+ * segment. Outgoing SYN/ACK MSS settings are handled in tcp_mssopt().
+ */
+void
+tcp_mss_update(struct tcpcb *tp, int offer,
+    struct hc_metrics_lite *metricptr, int *mtuflags)
+{
+	int mss;
+	u_long maxmtu;
+	struct inpcb *inp = tp->t_inpcb;
+	struct hc_metrics_lite metrics;
+	int origoffer = offer;
+#ifdef INET6
+	int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
+	size_t min_protoh = isipv6 ?
+			    sizeof (struct ip6_hdr) + sizeof (struct tcphdr) :
+			    sizeof (struct tcpiphdr);
+#else
+	const size_t min_protoh = sizeof(struct tcpiphdr);
+#endif
+
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	/* Initialize. */
+#ifdef INET6
+	if (isipv6) {
+		maxmtu = tcp_maxmtu6(&inp->inp_inc, mtuflags);
+		tp->t_maxopd = tp->t_maxseg = V_tcp_v6mssdflt;
+	} else
+#endif
+	{
+		maxmtu = tcp_maxmtu(&inp->inp_inc, mtuflags);
+		tp->t_maxopd = tp->t_maxseg = V_tcp_mssdflt;
+	}
+
+	/*
+	 * No route to sender, stay with default mss and return.
+	 */
+	if (maxmtu == 0) {
+		/*
+		 * In case we return early we need to initialize metrics
+		 * to a defined state as tcp_hc_get() would do for us
+		 * if there was no cache hit.
+		 */
+		if (metricptr != NULL)
+			bzero(metricptr, sizeof(struct hc_metrics_lite));
+		return;
+	}
+
+	/* What have we got? */
+	switch (offer) {
+		case 0:
+			/*
+			 * Offer == 0 means that there was no MSS on the SYN
+			 * segment, in this case we use tcp_mssdflt as
+			 * already assigned to t_maxopd above.
+			 */
+			offer = tp->t_maxopd;
+			break;
+
+		case -1:
+			/*
+			 * Offer == -1 means that we didn't receive SYN yet.
+			 */
+			/* FALLTHROUGH */
+
+		default:
+			/*
+			 * Prevent DoS attack with too small MSS. Round up
+			 * to at least minmss.
+			 */
+			offer = max(offer, V_tcp_minmss);
+	}
+
+	/*
+	 * rmx information is now retrieved from tcp_hostcache.
+	 */
+	tcp_hc_get(&inp->inp_inc, &metrics);
+	if (metricptr != NULL)
+		bcopy(&metrics, metricptr, sizeof(struct hc_metrics_lite));
+
+	/*
+	 * If there's a discovered mtu int tcp hostcache, use it
+	 * else, use the link mtu.
+	 */
+	if (metrics.rmx_mtu)
+		mss = min(metrics.rmx_mtu, maxmtu) - min_protoh;
+	else {
+#ifdef INET6
+		if (isipv6) {
+			mss = maxmtu - min_protoh;
+			if (!V_path_mtu_discovery &&
+			    !in6_localaddr(&inp->in6p_faddr))
+				mss = min(mss, V_tcp_v6mssdflt);
+		} else
+#endif
+		{
+			mss = maxmtu - min_protoh;
+			if (!V_path_mtu_discovery &&
+			    !in_localaddr(inp->inp_faddr))
+				mss = min(mss, V_tcp_mssdflt);
+		}
+		/*
+		 * XXX - The above conditional (mss = maxmtu - min_protoh)
+		 * probably violates the TCP spec.
+		 * The problem is that, since we don't know the
+		 * other end's MSS, we are supposed to use a conservative
+		 * default.  But, if we do that, then MTU discovery will
+		 * never actually take place, because the conservative
+		 * default is much less than the MTUs typically seen
+		 * on the Internet today.  For the moment, we'll sweep
+		 * this under the carpet.
+		 *
+		 * The conservative default might not actually be a problem
+		 * if the only case this occurs is when sending an initial
+		 * SYN with options and data to a host we've never talked
+		 * to before.  Then, they will reply with an MSS value which
+		 * will get recorded and the new parameters should get
+		 * recomputed.  For Further Study.
+		 */
+	}
+	mss = min(mss, offer);
+
+	/*
+	 * Sanity check: make sure that maxopd will be large
+	 * enough to allow some data on segments even if the
+	 * all the option space is used (40bytes).  Otherwise
+	 * funny things may happen in tcp_output.
+	 */
+	mss = max(mss, 64);
+
+	/*
+	 * maxopd stores the maximum length of data AND options
+	 * in a segment; maxseg is the amount of data in a normal
+	 * segment.  We need to store this value (maxopd) apart
+	 * from maxseg, because now every segment carries options
+	 * and thus we normally have somewhat less data in segments.
+	 */
+	tp->t_maxopd = mss;
+
+	/*
+	 * origoffer==-1 indicates that no segments were received yet.
+	 * In this case we just guess.
+	 */
+	if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
+	    (origoffer == -1 ||
+	     (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP))
+		mss -= TCPOLEN_TSTAMP_APPA;
+
+#if	(MCLBYTES & (MCLBYTES - 1)) == 0
+	if (mss > MCLBYTES)
+		mss &= ~(MCLBYTES-1);
+#else
+	if (mss > MCLBYTES)
+		mss = mss / MCLBYTES * MCLBYTES;
+#endif
+	tp->t_maxseg = mss;
+}
+
+void
+tcp_mss(struct tcpcb *tp, int offer)
+{
+	int rtt, mss;
+	u_long bufsize;
+	struct inpcb *inp;
+	struct socket *so;
+	struct hc_metrics_lite metrics;
+	int mtuflags = 0;
+#ifdef INET6
+	int isipv6;
+#endif
+	KASSERT(tp != NULL, ("%s: tp == NULL", __func__));
+	
+	tcp_mss_update(tp, offer, &metrics, &mtuflags);
+
+	mss = tp->t_maxseg;
+	inp = tp->t_inpcb;
+#ifdef INET6
+	isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
+#endif
+
+	/*
+	 * If there's a pipesize, change the socket buffer to that size,
+	 * don't change if sb_hiwat is different than default (then it
+	 * has been changed on purpose with setsockopt).
+	 * Make the socket buffers an integral number of mss units;
+	 * if the mss is larger than the socket buffer, decrease the mss.
+	 */
+	so = inp->inp_socket;
+	SOCKBUF_LOCK(&so->so_snd);
+	if ((so->so_snd.sb_hiwat == tcp_sendspace) && metrics.rmx_sendpipe)
+		bufsize = metrics.rmx_sendpipe;
+	else
+		bufsize = so->so_snd.sb_hiwat;
+	if (bufsize < mss)
+		mss = bufsize;
+	else {
+		bufsize = roundup(bufsize, mss);
+		if (bufsize > sb_max)
+			bufsize = sb_max;
+		if (bufsize > so->so_snd.sb_hiwat)
+			(void)sbreserve_locked(&so->so_snd, bufsize, so, NULL);
+	}
+	SOCKBUF_UNLOCK(&so->so_snd);
+	tp->t_maxseg = mss;
+
+	SOCKBUF_LOCK(&so->so_rcv);
+	if ((so->so_rcv.sb_hiwat == tcp_recvspace) && metrics.rmx_recvpipe)
+		bufsize = metrics.rmx_recvpipe;
+	else
+		bufsize = so->so_rcv.sb_hiwat;
+	if (bufsize > mss) {
+		bufsize = roundup(bufsize, mss);
+		if (bufsize > sb_max)
+			bufsize = sb_max;
+		if (bufsize > so->so_rcv.sb_hiwat)
+			(void)sbreserve_locked(&so->so_rcv, bufsize, so, NULL);
+	}
+	SOCKBUF_UNLOCK(&so->so_rcv);
+	/*
+	 * While we're here, check the others too.
+	 */
+	if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) {
+		tp->t_srtt = rtt;
+		tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE;
+		TCPSTAT_INC(tcps_usedrtt);
+		if (metrics.rmx_rttvar) {
+			tp->t_rttvar = metrics.rmx_rttvar;
+			TCPSTAT_INC(tcps_usedrttvar);
+		} else {
+			/* default variation is +- 1 rtt */
+			tp->t_rttvar =
+			    tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
+		}
+		TCPT_RANGESET(tp->t_rxtcur,
+			      ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
+			      tp->t_rttmin, TCPTV_REXMTMAX);
+	}
+	if (metrics.rmx_ssthresh) {
+		/*
+		 * There's some sort of gateway or interface
+		 * buffer limit on the path.  Use this to set
+		 * the slow start threshhold, but set the
+		 * threshold to no less than 2*mss.
+		 */
+		tp->snd_ssthresh = max(2 * mss, metrics.rmx_ssthresh);
+		TCPSTAT_INC(tcps_usedssthresh);
+	}
+	if (metrics.rmx_bandwidth)
+		tp->snd_bandwidth = metrics.rmx_bandwidth;
+
+	/*
+	 * Set the slow-start flight size depending on whether this
+	 * is a local network or not.
+	 *
+	 * Extend this so we cache the cwnd too and retrieve it here.
+	 * Make cwnd even bigger than RFC3390 suggests but only if we
+	 * have previous experience with the remote host. Be careful
+	 * not make cwnd bigger than remote receive window or our own
+	 * send socket buffer. Maybe put some additional upper bound
+	 * on the retrieved cwnd. Should do incremental updates to
+	 * hostcache when cwnd collapses so next connection doesn't
+	 * overloads the path again.
+	 *
+	 * XXXAO: Initializing the CWND from the hostcache is broken
+	 * and in its current form not RFC conformant.  It is disabled
+	 * until fixed or removed entirely.
+	 *
+	 * RFC3390 says only do this if SYN or SYN/ACK didn't got lost.
+	 * We currently check only in syncache_socket for that.
+	 */
+/* #define TCP_METRICS_CWND */
+#ifdef TCP_METRICS_CWND
+	if (metrics.rmx_cwnd)
+		tp->snd_cwnd = max(mss,
+				min(metrics.rmx_cwnd / 2,
+				 min(tp->snd_wnd, so->so_snd.sb_hiwat)));
+	else
+#endif
+	if (V_tcp_do_rfc3390)
+		tp->snd_cwnd = min(4 * mss, max(2 * mss, 4380));
+#ifdef INET6
+	else if ((isipv6 && in6_localaddr(&inp->in6p_faddr)) ||
+		 (!isipv6 && in_localaddr(inp->inp_faddr)))
+#else
+	else if (in_localaddr(inp->inp_faddr))
+#endif
+		tp->snd_cwnd = mss * V_ss_fltsz_local;
+	else
+		tp->snd_cwnd = mss * V_ss_fltsz;
+
+	/* Check the interface for TSO capabilities. */
+	if (mtuflags & CSUM_TSO)
+		tp->t_flags |= TF_TSO;
+}
+
+/*
+ * Determine the MSS option to send on an outgoing SYN.
+ */
+int
+tcp_mssopt(struct in_conninfo *inc)
+{
+	int mss = 0;
+	u_long maxmtu = 0;
+	u_long thcmtu = 0;
+	size_t min_protoh;
+
+	KASSERT(inc != NULL, ("tcp_mssopt with NULL in_conninfo pointer"));
+
+#ifdef INET6
+	if (inc->inc_flags & INC_ISIPV6) {
+		mss = V_tcp_v6mssdflt;
+		maxmtu = tcp_maxmtu6(inc, NULL);
+		thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
+		min_protoh = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
+	} else
+#endif
+	{
+		mss = V_tcp_mssdflt;
+		maxmtu = tcp_maxmtu(inc, NULL);
+		thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
+		min_protoh = sizeof(struct tcpiphdr);
+	}
+	if (maxmtu && thcmtu)
+		mss = min(maxmtu, thcmtu) - min_protoh;
+	else if (maxmtu || thcmtu)
+		mss = max(maxmtu, thcmtu) - min_protoh;
+
+	return (mss);
+}
+
+
+/*
+ * On a partial ack arrives, force the retransmission of the
+ * next unacknowledged segment.  Do not clear tp->t_dupacks.
+ * By setting snd_nxt to ti_ack, this forces retransmission timer to
+ * be started again.
+ */
+static void
+tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th)
+{
+	tcp_seq onxt = tp->snd_nxt;
+	u_long  ocwnd = tp->snd_cwnd;
+
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	tcp_timer_activate(tp, TT_REXMT, 0);
+	tp->t_rtttime = 0;
+	tp->snd_nxt = th->th_ack;
+	/*
+	 * Set snd_cwnd to one segment beyond acknowledged offset.
+	 * (tp->snd_una has not yet been updated when this function is called.)
+	 */
+	tp->snd_cwnd = tp->t_maxseg + (th->th_ack - tp->snd_una);
+	tp->t_flags |= TF_ACKNOW;
+	(void) tcp_output(tp);
+	tp->snd_cwnd = ocwnd;
+	if (SEQ_GT(onxt, tp->snd_nxt))
+		tp->snd_nxt = onxt;
+	/*
+	 * Partial window deflation.  Relies on fact that tp->snd_una
+	 * not updated yet.
+	 */
+	if (tp->snd_cwnd > th->th_ack - tp->snd_una)
+		tp->snd_cwnd -= th->th_ack - tp->snd_una;
+	else
+		tp->snd_cwnd = 0;
+	tp->snd_cwnd += tp->t_maxseg;
+}
diff --git a/freebsd/sys/netinet/tcp_lro.c b/freebsd/sys/netinet/tcp_lro.c
new file mode 100644
index 00000000..6aaff4a5
--- /dev/null
+++ b/freebsd/sys/netinet/tcp_lro.c
@@ -0,0 +1,389 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/******************************************************************************
+
+Copyright (c) 2007, Myricom Inc.
+Copyright (c) 2008, Intel Corporation.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Myricom Inc, nor the names of its
+    contributors may be used to endorse or promote products derived from
+    this software without specific prior written permission.
+
+ 3. Neither the name of the Intel Corporation, nor the names of its
+    contributors may be used to endorse or promote products derived from
+    this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+$FreeBSD$ 
+***************************************************************************/
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/endian.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/socket.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/ethernet.h>
+#include <freebsd/net/if_media.h>
+
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/tcp.h>
+#include <freebsd/netinet/tcp_lro.h>
+
+#include <freebsd/machine/bus.h>
+#include <freebsd/machine/in_cksum.h>
+
+
+static uint16_t do_csum_data(uint16_t *raw, int len)
+{
+	uint32_t csum;
+	csum = 0;
+	while (len > 0) {
+		csum += *raw;
+		raw++;
+		csum += *raw;
+		raw++;
+		len -= 4;
+	}
+	csum = (csum >> 16) + (csum & 0xffff);
+	csum = (csum >> 16) + (csum & 0xffff);
+	return (uint16_t)csum;
+}
+
+/*
+ * Allocate and init the LRO data structures
+ */
+int
+tcp_lro_init(struct lro_ctrl *cntl)
+{
+	struct lro_entry *lro;
+	int i, error = 0;
+
+	SLIST_INIT(&cntl->lro_free);
+	SLIST_INIT(&cntl->lro_active);
+
+	cntl->lro_bad_csum = 0;
+	cntl->lro_queued = 0;
+	cntl->lro_flushed = 0;
+
+	for (i = 0; i < LRO_ENTRIES; i++) {
+                lro = (struct lro_entry *) malloc(sizeof (struct lro_entry),
+		    M_DEVBUF, M_NOWAIT | M_ZERO);
+                if (lro == NULL) {
+			if (i == 0)
+				error = ENOMEM;
+                        break;
+                }
+		cntl->lro_cnt = i;
+                SLIST_INSERT_HEAD(&cntl->lro_free, lro, next);
+        }
+
+	return (error);
+}
+
+void
+tcp_lro_free(struct lro_ctrl *cntl)
+{
+	struct lro_entry *entry;
+
+	while (!SLIST_EMPTY(&cntl->lro_free)) {
+		entry = SLIST_FIRST(&cntl->lro_free);
+               	SLIST_REMOVE_HEAD(&cntl->lro_free, next);
+		free(entry, M_DEVBUF);
+	}
+}
+
+void
+tcp_lro_flush(struct lro_ctrl *cntl, struct lro_entry *lro)
+{
+	struct ifnet *ifp;
+	struct ip *ip;
+	struct tcphdr *tcp;
+	uint32_t *ts_ptr;
+	uint32_t tcplen, tcp_csum;
+
+
+	if (lro->append_cnt) {
+		/* incorporate the new len into the ip header and
+		 * re-calculate the checksum */
+		ip = lro->ip;
+		ip->ip_len = htons(lro->len - ETHER_HDR_LEN);
+		ip->ip_sum = 0;
+		ip->ip_sum = 0xffff ^ 
+			do_csum_data((uint16_t*)ip,
+					      sizeof (*ip));
+
+		lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
+			CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+		lro->m_head->m_pkthdr.csum_data = 0xffff;
+		lro->m_head->m_pkthdr.len = lro->len;
+
+		/* incorporate the latest ack into the tcp header */
+		tcp = (struct tcphdr *) (ip + 1);
+		tcp->th_ack = lro->ack_seq;
+		tcp->th_win = lro->window;
+		/* incorporate latest timestamp into the tcp header */
+		if (lro->timestamp) {
+			ts_ptr = (uint32_t *)(tcp + 1);
+			ts_ptr[1] = htonl(lro->tsval);
+			ts_ptr[2] = lro->tsecr;
+		}
+		/* 
+		 * update checksum in tcp header by re-calculating the
+		 * tcp pseudoheader checksum, and adding it to the checksum
+		 * of the tcp payload data 
+		 */
+		tcp->th_sum = 0;
+		tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN;
+		tcp_csum = lro->data_csum;
+		tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+				      htons(tcplen + IPPROTO_TCP));
+		tcp_csum += do_csum_data((uint16_t*)tcp,
+						  tcp->th_off << 2);
+		tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
+		tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
+		tcp->th_sum = 0xffff ^ tcp_csum;
+	}
+	ifp = cntl->ifp;
+	(*ifp->if_input)(cntl->ifp, lro->m_head);
+	cntl->lro_queued += lro->append_cnt + 1;
+	cntl->lro_flushed++;
+	lro->m_head = NULL;
+	lro->timestamp = 0;
+	lro->append_cnt = 0;
+	SLIST_INSERT_HEAD(&cntl->lro_free, lro, next);
+}
+
+int
+tcp_lro_rx(struct lro_ctrl *cntl, struct mbuf *m_head, uint32_t csum)
+{
+	struct ether_header *eh;
+	struct ip *ip;
+	struct tcphdr *tcp;
+	uint32_t *ts_ptr;
+	struct mbuf *m_nxt, *m_tail;
+	struct lro_entry *lro;
+	int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len;
+	int opt_bytes, trim, csum_flags;
+	uint32_t seq, tmp_csum, device_mtu;
+
+
+	eh = mtod(m_head, struct ether_header *);
+	if (eh->ether_type != htons(ETHERTYPE_IP))
+		return 1;
+	ip = (struct ip *) (eh + 1);
+	if (ip->ip_p != IPPROTO_TCP)
+		return 1;
+	
+	/* ensure there are no options */
+	if ((ip->ip_hl << 2) != sizeof (*ip))
+		return -1;
+
+	/* .. and the packet is not fragmented */
+	if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
+		return -1;
+
+	/* verify that the IP header checksum is correct */
+	csum_flags = m_head->m_pkthdr.csum_flags;
+	if (csum_flags & CSUM_IP_CHECKED) {
+		if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) {
+			cntl->lro_bad_csum++;
+			return -1;
+		}
+	} else {
+		tmp_csum = do_csum_data((uint16_t *)ip, sizeof (*ip));
+		if (__predict_false((tmp_csum ^ 0xffff) != 0)) {
+			cntl->lro_bad_csum++;
+			return -1;
+		}
+	}
+	
+	/* find the TCP header */
+	tcp = (struct tcphdr *) (ip + 1);
+
+	/* Get the TCP checksum if we dont have it */
+	if (!csum)
+		csum = tcp->th_sum;
+
+	/* ensure no bits set besides ack or psh */
+	if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
+		return -1;
+
+	/* check for timestamps. Since the only option we handle are
+	   timestamps, we only have to handle the simple case of
+	   aligned timestamps */
+
+	opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
+	tcp_hdr_len =  sizeof (*tcp) + opt_bytes;
+	ts_ptr = (uint32_t *)(tcp + 1);
+	if (opt_bytes != 0) {
+		if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
+		    (*ts_ptr !=  ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
+		    TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
+			return -1;
+	}
+
+	ip_len = ntohs(ip->ip_len);
+	tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);
+	
+
+	/* 
+	 * If frame is padded beyond the end of the IP packet,
+	 * then we must trim the extra bytes off the end.
+	 */
+	tot_len = m_head->m_pkthdr.len;
+	trim = tot_len - (ip_len + ETHER_HDR_LEN);
+	if (trim != 0) {
+		if (trim < 0) {
+			/* truncated packet */
+			return -1;
+		}
+		m_adj(m_head, -trim);
+		tot_len = m_head->m_pkthdr.len;
+	}
+
+	m_nxt = m_head;
+	m_tail = NULL; /* -Wuninitialized */
+	while (m_nxt != NULL) {
+		m_tail = m_nxt;
+		m_nxt = m_tail->m_next;
+	}
+
+	hlen = ip_len + ETHER_HDR_LEN - tcp_data_len;
+	seq = ntohl(tcp->th_seq);
+
+	SLIST_FOREACH(lro, &cntl->lro_active, next) {
+		if (lro->source_port == tcp->th_sport && 
+		    lro->dest_port == tcp->th_dport &&
+		    lro->source_ip == ip->ip_src.s_addr && 
+		    lro->dest_ip == ip->ip_dst.s_addr) {
+			/* Try to append it */
+
+			if (__predict_false(seq != lro->next_seq)) {
+				/* out of order packet */
+				SLIST_REMOVE(&cntl->lro_active, lro,
+					     lro_entry, next);
+				tcp_lro_flush(cntl, lro);
+				return -1;
+			}
+
+			if (opt_bytes) {
+				uint32_t tsval = ntohl(*(ts_ptr + 1));
+				/* make sure timestamp values are increasing */
+				if (__predict_false(lro->tsval > tsval || 
+					     *(ts_ptr + 2) == 0)) {
+					return -1;
+				}
+				lro->tsval = tsval;
+				lro->tsecr = *(ts_ptr + 2);
+			}
+
+			lro->next_seq += tcp_data_len;
+			lro->ack_seq = tcp->th_ack;
+			lro->window = tcp->th_win;
+			lro->append_cnt++;
+			if (tcp_data_len == 0) {
+				m_freem(m_head);
+				return 0;
+			}
+			/* subtract off the checksum of the tcp header
+                         * from the hardware checksum, and add it to the
+                         * stored tcp data checksum.  Byteswap the checksum
+			 * if the total length so far is odd 
+                         */
+			tmp_csum = do_csum_data((uint16_t*)tcp,
+							 tcp_hdr_len);
+			csum = csum + (tmp_csum ^ 0xffff);
+			csum = (csum & 0xffff) + (csum >> 16);
+			csum = (csum & 0xffff) + (csum >> 16);
+			if (lro->len & 0x1) {
+				/* Odd number of bytes so far, flip bytes */
+				csum = ((csum << 8) | (csum >> 8)) & 0xffff;
+			}
+			csum = csum + lro->data_csum;
+			csum = (csum & 0xffff) + (csum >> 16);
+			csum = (csum & 0xffff) + (csum >> 16);
+			lro->data_csum = csum;
+
+			lro->len += tcp_data_len;
+
+			/* adjust mbuf so that m->m_data points to
+			   the first byte of the payload */
+			m_adj(m_head, hlen);
+			/* append mbuf chain */
+			lro->m_tail->m_next = m_head;
+			/* advance the last pointer */
+			lro->m_tail = m_tail;
+			/* flush packet if required */
+			device_mtu = cntl->ifp->if_mtu;
+			if (lro->len > (65535 - device_mtu)) {
+				SLIST_REMOVE(&cntl->lro_active, lro,
+					     lro_entry, next);
+				tcp_lro_flush(cntl, lro);
+			}
+			return 0;
+		}
+	}
+
+	if (SLIST_EMPTY(&cntl->lro_free))
+	    return -1;
+
+	/* start a new chain */
+	lro = SLIST_FIRST(&cntl->lro_free);
+	SLIST_REMOVE_HEAD(&cntl->lro_free, next);
+	SLIST_INSERT_HEAD(&cntl->lro_active, lro, next);
+	lro->source_port = tcp->th_sport;
+	lro->dest_port = tcp->th_dport;
+	lro->source_ip = ip->ip_src.s_addr;
+	lro->dest_ip = ip->ip_dst.s_addr;
+	lro->next_seq = seq + tcp_data_len;
+	lro->mss = tcp_data_len;
+	lro->ack_seq = tcp->th_ack;
+	lro->window = tcp->th_win;
+
+	/* save the checksum of just the TCP payload by
+	 * subtracting off the checksum of the TCP header from
+	 * the entire hardware checksum 
+	 * Since IP header checksum is correct, checksum over
+	 * the IP header is -0.  Substracting -0 is unnecessary.
+	 */
+	tmp_csum = do_csum_data((uint16_t*)tcp, tcp_hdr_len);
+	csum = csum + (tmp_csum ^ 0xffff);
+	csum = (csum & 0xffff) + (csum >> 16);
+	csum = (csum & 0xffff) + (csum >> 16);
+	lro->data_csum = csum;
+	
+	lro->ip = ip;
+	/* record timestamp if it is present */
+	if (opt_bytes) {
+		lro->timestamp = 1;
+		lro->tsval = ntohl(*(ts_ptr + 1));
+		lro->tsecr = *(ts_ptr + 2);
+	}
+	lro->len = tot_len;
+	lro->m_head = m_head;
+	lro->m_tail = m_tail;
+	return 0;
+}
diff --git a/freebsd/sys/netinet/tcp_lro.h b/freebsd/sys/netinet/tcp_lro.h
new file mode 100644
index 00000000..20cfb7cf
--- /dev/null
+++ b/freebsd/sys/netinet/tcp_lro.h
@@ -0,0 +1,85 @@
+/*******************************************************************************
+
+Copyright (c) 2006, Myricom Inc.
+Copyright (c) 2008, Intel Corporation.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Myricom Inc, nor the names of its
+    contributors may be used to endorse or promote products derived from
+    this software without specific prior written permission.
+
+ 2. Neither the name of the Intel Corporation, nor the names of its
+    contributors may be used to endorse or promote products derived from
+    this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+
+$FreeBSD$
+
+***************************************************************************/
+#ifndef _TCP_LRO_HH_
+#define _TCP_LRO_HH_
+
+struct lro_entry;
+struct lro_entry
+{
+	SLIST_ENTRY(lro_entry) next;
+	struct mbuf  	*m_head;
+	struct mbuf	*m_tail;
+	int		timestamp;
+	struct ip	*ip;
+	uint32_t	tsval;
+	uint32_t	tsecr;
+	uint32_t	source_ip;
+	uint32_t	dest_ip;
+	uint32_t	next_seq;
+	uint32_t	ack_seq;
+	uint32_t	len;
+	uint32_t	data_csum;
+	uint16_t	window;
+	uint16_t	source_port;
+	uint16_t	dest_port;
+	uint16_t	append_cnt;
+	uint16_t	mss;
+	
+};
+SLIST_HEAD(lro_head, lro_entry);
+
+struct lro_ctrl {
+	struct ifnet	*ifp;
+	int		lro_queued;
+	int		lro_flushed;
+	int		lro_bad_csum;
+	int		lro_cnt;
+
+	struct lro_head	lro_active;
+	struct lro_head	lro_free;
+};
+
+
+int tcp_lro_init(struct lro_ctrl *);
+void tcp_lro_free(struct lro_ctrl *);
+void tcp_lro_flush(struct lro_ctrl *, struct lro_entry *);
+int tcp_lro_rx(struct lro_ctrl *, struct mbuf *, uint32_t);
+
+/* Number of LRO entries - these are per rx queue */
+#define LRO_ENTRIES			8
+
+#endif /* _TCP_LRO_HH_ */
diff --git a/freebsd/sys/netinet/tcp_offload.c b/freebsd/sys/netinet/tcp_offload.c
new file mode 100644
index 00000000..9c73992b
--- /dev/null
+++ b/freebsd/sys/netinet/tcp_offload.c
@@ -0,0 +1,147 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2007, Chelsio Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Neither the name of the Chelsio Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/types.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/if_types.h>
+#include <freebsd/net/if_var.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/tcp.h>
+#include <freebsd/netinet/tcp_var.h>
+#include <freebsd/netinet/tcp_offload.h>
+#include <freebsd/netinet/toedev.h>
+
+uint32_t toedev_registration_count;
+
+int
+tcp_offload_connect(struct socket *so, struct sockaddr *nam)
+{
+	struct ifnet *ifp;
+	struct toedev *tdev;
+	struct rtentry *rt;
+	int error;
+
+	if (toedev_registration_count == 0)
+		return (EINVAL);
+	
+	/*
+	 * Look up the route used for the connection to 
+	 * determine if it uses an interface capable of
+	 * offloading the connection.
+	 */
+	rt = rtalloc1(nam, 0 /*report*/, 0 /*ignflags*/);
+	if (rt) 
+		RT_UNLOCK(rt);
+	else 
+		return (EHOSTUNREACH);
+
+	ifp = rt->rt_ifp;
+	if ((ifp->if_capenable & IFCAP_TOE) == 0) {
+		error = EINVAL;
+		goto fail;
+	}
+	
+	tdev = TOEDEV(ifp);
+	if (tdev == NULL) {
+		error = EPERM;
+		goto fail;
+	}
+	
+	if (tdev->tod_can_offload(tdev, so) == 0) {
+		error = EPERM;
+		goto fail;
+	}
+	
+	return (tdev->tod_connect(tdev, so, rt, nam));
+fail:
+	RTFREE(rt);
+	return (error);
+}
+
+
+/*
+ * This file contains code as a short-term staging area before it is moved in 
+ * to sys/netinet/tcp_offload.c
+ */
+
+void
+tcp_offload_twstart(struct tcpcb *tp)
+{
+
+	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_WLOCK(tp->t_inpcb);
+	tcp_twstart(tp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+}
+
+struct tcpcb *
+tcp_offload_close(struct tcpcb *tp)
+{
+
+	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_WLOCK(tp->t_inpcb);
+	tp = tcp_close(tp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+	if (tp)
+		INP_WUNLOCK(tp->t_inpcb);
+
+	return (tp);
+}
+
+struct tcpcb *
+tcp_offload_drop(struct tcpcb *tp, int error)
+{
+
+	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_WLOCK(tp->t_inpcb);
+	tp = tcp_drop(tp, error);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+	if (tp)
+		INP_WUNLOCK(tp->t_inpcb);
+
+	return (tp);
+}
+
diff --git a/freebsd/sys/netinet/tcp_offload.h b/freebsd/sys/netinet/tcp_offload.h
new file mode 100644
index 00000000..f2a35a58
--- /dev/null
+++ b/freebsd/sys/netinet/tcp_offload.h
@@ -0,0 +1,354 @@
+/*-
+ * Copyright (c) 2007, Chelsio Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Neither the name of the Chelsio Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_TCP_OFFLOAD_HH_
+#define	_NETINET_TCP_OFFLOAD_HH_
+
+#ifndef _KERNEL
+#error "no user-serviceable parts inside"
+#endif
+
+/*
+ * A driver publishes that it provides offload services
+ * by setting IFCAP_TOE in the ifnet. The offload connect
+ * will bypass any further work if the interface that a
+ * connection would use does not support TCP offload.
+ *
+ * The TOE API assumes that the tcp offload engine can offload the 
+ * the entire connection from set up to teardown, with some provision 
+ * being made to allowing the software stack to handle time wait. If
+ * the device does not meet these criteria, it is the driver's responsibility
+ * to overload the functions that it needs to in tcp_usrreqs and make
+ * its own calls to tcp_output if it needs to do so.
+ *
+ * There is currently no provision for the device advertising the congestion
+ * control algorithms it supports as there is currently no API for querying 
+ * an operating system for the protocols that it has loaded. This is a desirable
+ * future extension.
+ *
+ *
+ *
+ * It is assumed that individuals deploying TOE will want connections
+ * to be offloaded without software changes so all connections on an
+ * interface providing TOE are offloaded unless the the SO_NO_OFFLOAD 
+ * flag is set on the socket.
+ *
+ *
+ * The toe_usrreqs structure constitutes the TOE driver's 
+ * interface to the TCP stack for functionality that doesn't
+ * interact directly with userspace. If one wants to provide
+ * (optional) functionality to do zero-copy to/from
+ * userspace one still needs to override soreceive/sosend 
+ * with functions that fault in and pin the user buffers.
+ *
+ * + tu_send
+ *   - tells the driver that new data may have been added to the 
+ *     socket's send buffer - the driver should not fail if the
+ *     buffer is in fact unchanged
+ *   - the driver is responsible for providing credits (bytes in the send window)
+ *     back to the socket by calling sbdrop() as segments are acknowledged.
+ *   - The driver expects the inpcb lock to be held - the driver is expected
+ *     not to drop the lock. Hence the driver is not allowed to acquire the
+ *     pcbinfo lock during this call.
+ *
+ * + tu_rcvd
+ *   - returns credits to the driver and triggers window updates
+ *     to the peer (a credit as used here is a byte in the peer's receive window)
+ *   - the driver is expected to determine how many bytes have been 
+ *     consumed and credit that back to the card so that it can grow
+ *     the window again by maintaining its own state between invocations.
+ *   - In principle this could be used to shrink the window as well as
+ *     grow the window, although it is not used for that now.
+ *   - this function needs to correctly handle being called any number of
+ *     times without any bytes being consumed from the receive buffer.
+ *   - The driver expects the inpcb lock to be held - the driver is expected
+ *     not to drop the lock. Hence the driver is not allowed to acquire the
+ *     pcbinfo lock during this call.
+ *
+ * + tu_disconnect
+ *   - tells the driver to send FIN to peer
+ *   - driver is expected to send the remaining data and then do a clean half close
+ *   - disconnect implies at least half-close so only send, reset, and detach
+ *     are legal
+ *   - the driver is expected to handle transition through the shutdown
+ *     state machine and allow the stack to support SO_LINGER.
+ *   - The driver expects the inpcb lock to be held - the driver is expected
+ *     not to drop the lock. Hence the driver is not allowed to acquire the
+ *     pcbinfo lock during this call.
+ *
+ * + tu_reset
+ *   - closes the connection and sends a RST to peer
+ *   - driver is expectd to trigger an RST and detach the toepcb
+ *   - no further calls are legal after reset
+ *   - The driver expects the inpcb lock to be held - the driver is expected
+ *     not to drop the lock. Hence the driver is not allowed to acquire the
+ *     pcbinfo lock during this call.
+ *
+ *   The following fields in the tcpcb are expected to be referenced by the driver:
+ *	+ iss
+ *	+ rcv_nxt
+ *	+ rcv_wnd
+ *	+ snd_isn
+ *	+ snd_max
+ *	+ snd_nxt
+ *	+ snd_una
+ *	+ t_flags
+ *	+ t_inpcb
+ *	+ t_maxseg
+ *	+ t_toe
+ *
+ *   The following fields in the inpcb are expected to be referenced by the driver:
+ *	+ inp_lport
+ *	+ inp_fport
+ *	+ inp_laddr
+ *	+ inp_fport
+ *	+ inp_socket
+ *	+ inp_ip_tos
+ *
+ *   The following fields in the socket are expected to be referenced by the
+ *   driver:
+ *	+ so_comp
+ *	+ so_error
+ *	+ so_linger
+ *	+ so_options
+ *	+ so_rcv
+ *	+ so_snd
+ *	+ so_state
+ *	+ so_timeo
+ *
+ *   These functions all return 0 on success and can return the following errors
+ *   as appropriate:
+ *	+ EPERM:
+ *	+ ENOBUFS: memory allocation failed
+ *	+ EMSGSIZE: MTU changed during the call
+ *	+ EHOSTDOWN:
+ *	+ EHOSTUNREACH:
+ *	+ ENETDOWN:
+ *	* ENETUNREACH: the peer is no longer reachable
+ *
+ * + tu_detach
+ *   - tells driver that the socket is going away so disconnect
+ *     the toepcb and free appropriate resources
+ *   - allows the driver to cleanly handle the case of connection state
+ *     outliving the socket
+ *   - no further calls are legal after detach
+ *   - the driver is expected to provide its own synchronization between
+ *     detach and receiving new data.
+ * 
+ * + tu_syncache_event
+ *   - even if it is not actually needed, the driver is expected to
+ *     call syncache_add for the initial SYN and then syncache_expand
+ *     for the SYN,ACK
+ *   - tells driver that a connection either has not been added or has 
+ *     been dropped from the syncache
+ *   - the driver is expected to maintain state that lives outside the 
+ *     software stack so the syncache needs to be able to notify the
+ *     toe driver that the software stack is not going to create a connection
+ *     for a received SYN
+ *   - The driver is responsible for any synchronization required between
+ *     the syncache dropping an entry and the driver processing the SYN,ACK.
+ * 
+ */
+struct toe_usrreqs {
+	int (*tu_send)(struct tcpcb *tp);
+	int (*tu_rcvd)(struct tcpcb *tp);
+	int (*tu_disconnect)(struct tcpcb *tp);
+	int (*tu_reset)(struct tcpcb *tp);
+	void (*tu_detach)(struct tcpcb *tp);
+	void (*tu_syncache_event)(int event, void *toep);
+};
+
+/*
+ * Proxy for struct tcpopt between TOE drivers and TCP functions.
+ */
+struct toeopt {
+	u_int64_t	to_flags;	/* see tcpopt in tcp_var.h */
+	u_int16_t	to_mss;		/* maximum segment size */
+	u_int8_t	to_wscale;	/* window scaling */
+
+	u_int8_t	_pad1;		/* explicit pad for 64bit alignment */
+	u_int32_t	_pad2;		/* explicit pad for 64bit alignment */
+	u_int64_t	_pad3[4];	/* TBD */
+};
+
+#define	TOE_SC_ENTRY_PRESENT		1	/* 4-tuple already present */
+#define	TOE_SC_DROP			2	/* connection was timed out */
+
+/*
+ * Because listen is a one-to-many relationship (a socket can be listening 
+ * on all interfaces on a machine some of which may be using different TCP
+ * offload devices), listen uses a publish/subscribe mechanism. The TCP
+ * offload driver registers a listen notification function with the stack.
+ * When a listen socket is created all TCP offload devices are notified
+ * so that they can do the appropriate set up to offload connections on the
+ * port to which the socket is bound. When the listen socket is closed,
+ * the offload devices are notified so that they will stop listening on that
+ * port and free any associated resources as well as sending RSTs on any
+ * connections in the SYN_RCVD state.
+ *
+ */
+
+typedef	void	(*tcp_offload_listen_start_fn)(void *, struct tcpcb *);
+typedef	void	(*tcp_offload_listen_stop_fn)(void *, struct tcpcb *);
+
+EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn);
+EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn);
+
+/*
+ * Check if the socket can be offloaded by the following steps:
+ * - determine the egress interface
+ * - check the interface for TOE capability and TOE is enabled
+ * - check if the device has resources to offload the connection
+ */
+int	tcp_offload_connect(struct socket *so, struct sockaddr *nam);
+
+/*
+ * The tcp_output_* routines are wrappers around the toe_usrreqs calls
+ * which trigger packet transmission. In the non-offloaded case they
+ * translate to tcp_output. The tcp_offload_* routines notify TOE
+ * of specific events. I the non-offloaded case they are no-ops.
+ *
+ * Listen is a special case because it is a 1 to many relationship
+ * and there can be more than one offload driver in the system.
+ */
+
+/*
+ * Connection is offloaded
+ */
+#define	tp_offload(tp)		((tp)->t_flags & TF_TOE)
+
+/*
+ * hackish way of allowing this file to also be included by TOE
+ * which needs to be kept ignorant of socket implementation details
+ */
+#ifdef _SYS_SOCKETVAR_HH_
+/*
+ * The socket has not been marked as "do not offload"
+ */
+#define	SO_OFFLOADABLE(so)	((so->so_options & SO_NO_OFFLOAD) == 0)
+
+static __inline int
+tcp_output_connect(struct socket *so, struct sockaddr *nam)
+{
+	struct tcpcb *tp = sototcpcb(so);
+	int error;
+
+	/*
+	 * If offload has been disabled for this socket or the 
+	 * connection cannot be offloaded just call tcp_output
+	 * to start the TCP state machine.
+	 */
+#ifndef TCP_OFFLOAD_DISABLE	
+	if (!SO_OFFLOADABLE(so) || (error = tcp_offload_connect(so, nam)) != 0)
+#endif		
+		error = tcp_output(tp);
+	return (error);
+}
+
+static __inline int
+tcp_output_send(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+	if (tp_offload(tp))
+		return (tp->t_tu->tu_send(tp));
+#endif
+	return (tcp_output(tp));
+}
+
+static __inline int
+tcp_output_rcvd(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+	if (tp_offload(tp))
+		return (tp->t_tu->tu_rcvd(tp));
+#endif
+	return (tcp_output(tp));
+}
+
+static __inline int
+tcp_output_disconnect(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+	if (tp_offload(tp))
+		return (tp->t_tu->tu_disconnect(tp));
+#endif
+	return (tcp_output(tp));
+}
+
+static __inline int
+tcp_output_reset(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+	if (tp_offload(tp))
+		return (tp->t_tu->tu_reset(tp));
+#endif
+	return (tcp_output(tp));
+}
+
+static __inline void
+tcp_offload_detach(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+	if (tp_offload(tp))
+		tp->t_tu->tu_detach(tp);
+#endif	
+}
+
+static __inline void
+tcp_offload_listen_open(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+	if (SO_OFFLOADABLE(tp->t_inpcb->inp_socket))
+		EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp);
+#endif	
+}
+
+static __inline void
+tcp_offload_listen_close(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+	EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp);
+#endif	
+}
+#undef SO_OFFLOADABLE
+#endif /* _SYS_SOCKETVAR_HH_ */
+#undef tp_offload
+
+void tcp_offload_twstart(struct tcpcb *tp);
+struct tcpcb *tcp_offload_close(struct tcpcb *tp);
+struct tcpcb *tcp_offload_drop(struct tcpcb *tp, int error);
+
+#endif /* _NETINET_TCP_OFFLOAD_HH_ */
diff --git a/freebsd/sys/netinet/tcp_output.c b/freebsd/sys/netinet/tcp_output.c
new file mode 100644
index 00000000..bebab1f1
--- /dev/null
+++ b/freebsd/sys/netinet/tcp_output.c
@@ -0,0 +1,1485 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_output.c	8.4 (Berkeley) 5/24/95
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_ipsec.h>
+#include <freebsd/local/opt_tcpdebug.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/domain.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/sysctl.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_options.h>
+#ifdef INET6
+#include <freebsd/netinet6/in6_pcb.h>
+#include <freebsd/netinet/ip6.h>
+#include <freebsd/netinet6/ip6_var.h>
+#endif
+#include <freebsd/netinet/tcp.h>
+#define	TCPOUTFLAGS
+#include <freebsd/netinet/tcp_fsm.h>
+#include <freebsd/netinet/tcp_seq.h>
+#include <freebsd/netinet/tcp_timer.h>
+#include <freebsd/netinet/tcp_var.h>
+#include <freebsd/netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <freebsd/netinet/tcp_debug.h>
+#endif
+
+#ifdef IPSEC
+#include <freebsd/netipsec/ipsec.h>
+#endif /*IPSEC*/
+
+#include <freebsd/machine/in_cksum.h>
+
+#include <freebsd/security/mac/mac_framework.h>
+
+#ifdef notyet
+extern struct mbuf *m_copypack();
+#endif
+
+VNET_DEFINE(int, path_mtu_discovery) = 1;
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, path_mtu_discovery, CTLFLAG_RW,
+	&VNET_NAME(path_mtu_discovery), 1,
+	"Enable Path MTU Discovery");
+
+VNET_DEFINE(int, ss_fltsz) = 1;
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, slowstart_flightsize, CTLFLAG_RW,
+	&VNET_NAME(ss_fltsz), 1,
+	"Slow start flight size");
+
+VNET_DEFINE(int, ss_fltsz_local) = 4;
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, local_slowstart_flightsize,
+	CTLFLAG_RW, &VNET_NAME(ss_fltsz_local), 1,
+	"Slow start flight size for local networks");
+
+VNET_DEFINE(int, tcp_do_newreno) = 1;
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, newreno, CTLFLAG_RW,
+	&VNET_NAME(tcp_do_newreno), 0,
+	"Enable NewReno Algorithms");
+
+VNET_DEFINE(int, tcp_do_tso) = 1;
+#define	V_tcp_do_tso		VNET(tcp_do_tso)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW,
+	&VNET_NAME(tcp_do_tso), 0,
+	"Enable TCP Segmentation Offload");
+
+VNET_DEFINE(int, tcp_do_autosndbuf) = 1;
+#define	V_tcp_do_autosndbuf	VNET(tcp_do_autosndbuf)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto, CTLFLAG_RW,
+	&VNET_NAME(tcp_do_autosndbuf), 0,
+	"Enable automatic send buffer sizing");
+
+VNET_DEFINE(int, tcp_autosndbuf_inc) = 8*1024;
+#define	V_tcp_autosndbuf_inc	VNET(tcp_autosndbuf_inc)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, sendbuf_inc, CTLFLAG_RW,
+	&VNET_NAME(tcp_autosndbuf_inc), 0,
+	"Incrementor step size of automatic send buffer");
+
+VNET_DEFINE(int, tcp_autosndbuf_max) = 256*1024;
+#define	V_tcp_autosndbuf_max	VNET(tcp_autosndbuf_max)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_RW,
+	&VNET_NAME(tcp_autosndbuf_max), 0,
+	"Max size of automatic send buffer");
+
+
+/*
+ * Tcp output routine: figure out what should be sent and send it.
+ */
+int
+tcp_output(struct tcpcb *tp)
+{
+	struct socket *so = tp->t_inpcb->inp_socket;
+	long len, recwin, sendwin;
+	int off, flags, error, rw;
+	struct mbuf *m;
+	struct ip *ip = NULL;
+	struct ipovly *ipov = NULL;
+	struct tcphdr *th;
+	u_char opt[TCP_MAXOLEN];
+	unsigned ipoptlen, optlen, hdrlen;
+#ifdef IPSEC
+	unsigned ipsec_optlen = 0;
+#endif
+	int idle, sendalot;
+	int sack_rxmit, sack_bytes_rxmt;
+	struct sackhole *p;
+	int tso;
+	struct tcpopt to;
+#if 0
+	int maxburst = TCP_MAXBURST;
+#endif
+#ifdef INET6
+	struct ip6_hdr *ip6 = NULL;
+	int isipv6;
+
+	isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
+#endif
+
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	/*
+	 * Determine length of data that should be transmitted,
+	 * and flags that will be used.
+	 * If there is some data or critical controls (SYN, RST)
+	 * to send, then transmit; otherwise, investigate further.
+	 */
+	idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una);
+	if (idle && ticks - tp->t_rcvtime >= tp->t_rxtcur) {
+		/*
+		 * If we've been idle for more than one retransmit
+		 * timeout the old congestion window is no longer
+		 * current and we have to reduce it to the restart
+		 * window before we can transmit again.
+		 *
+		 * The restart window is the initial window or the last
+		 * CWND, whichever is smaller.
+		 * 
+		 * This is done to prevent us from flooding the path with
+		 * a full CWND at wirespeed, overloading router and switch
+		 * buffers along the way.
+		 *
+		 * See RFC5681 Section 4.1. "Restarting Idle Connections".
+		 */
+		if (V_tcp_do_rfc3390)
+			rw = min(4 * tp->t_maxseg,
+				 max(2 * tp->t_maxseg, 4380));
+#ifdef INET6
+		else if ((isipv6 ? in6_localaddr(&tp->t_inpcb->in6p_faddr) :
+			  in_localaddr(tp->t_inpcb->inp_faddr)))
+#else
+		else if (in_localaddr(tp->t_inpcb->inp_faddr))
+#endif
+			rw = V_ss_fltsz_local * tp->t_maxseg;
+		else
+			rw = V_ss_fltsz * tp->t_maxseg;
+
+		tp->snd_cwnd = min(rw, tp->snd_cwnd);
+	}
+	tp->t_flags &= ~TF_LASTIDLE;
+	if (idle) {
+		if (tp->t_flags & TF_MORETOCOME) {
+			tp->t_flags |= TF_LASTIDLE;
+			idle = 0;
+		}
+	}
+again:
+	/*
+	 * If we've recently taken a timeout, snd_max will be greater than
+	 * snd_nxt.  There may be SACK information that allows us to avoid
+	 * resending already delivered data.  Adjust snd_nxt accordingly.
+	 */
+	if ((tp->t_flags & TF_SACK_PERMIT) &&
+	    SEQ_LT(tp->snd_nxt, tp->snd_max))
+		tcp_sack_adjust(tp);
+	sendalot = 0;
+	tso = 0;
+	off = tp->snd_nxt - tp->snd_una;
+	sendwin = min(tp->snd_wnd, tp->snd_cwnd);
+	sendwin = min(sendwin, tp->snd_bwnd);
+
+	flags = tcp_outflags[tp->t_state];
+	/*
+	 * Send any SACK-generated retransmissions.  If we're explicitly trying
+	 * to send out new data (when sendalot is 1), bypass this function.
+	 * If we retransmit in fast recovery mode, decrement snd_cwnd, since
+	 * we're replacing a (future) new transmission with a retransmission
+	 * now, and we previously incremented snd_cwnd in tcp_input().
+	 */
+	/*
+	 * Still in sack recovery , reset rxmit flag to zero.
+	 */
+	sack_rxmit = 0;
+	sack_bytes_rxmt = 0;
+	len = 0;
+	p = NULL;
+	if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp) &&
+	    (p = tcp_sack_output(tp, &sack_bytes_rxmt))) {
+		long cwin;
+		
+		cwin = min(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt;
+		if (cwin < 0)
+			cwin = 0;
+		/* Do not retransmit SACK segments beyond snd_recover */
+		if (SEQ_GT(p->end, tp->snd_recover)) {
+			/*
+			 * (At least) part of sack hole extends beyond
+			 * snd_recover. Check to see if we can rexmit data
+			 * for this hole.
+			 */
+			if (SEQ_GEQ(p->rxmit, tp->snd_recover)) {
+				/*
+				 * Can't rexmit any more data for this hole.
+				 * That data will be rexmitted in the next
+				 * sack recovery episode, when snd_recover
+				 * moves past p->rxmit.
+				 */
+				p = NULL;
+				goto after_sack_rexmit;
+			} else
+				/* Can rexmit part of the current hole */
+				len = ((long)ulmin(cwin,
+						   tp->snd_recover - p->rxmit));
+		} else
+			len = ((long)ulmin(cwin, p->end - p->rxmit));
+		off = p->rxmit - tp->snd_una;
+		KASSERT(off >= 0,("%s: sack block to the left of una : %d",
+		    __func__, off));
+		if (len > 0) {
+			sack_rxmit = 1;
+			sendalot = 1;
+			TCPSTAT_INC(tcps_sack_rexmits);
+			TCPSTAT_ADD(tcps_sack_rexmit_bytes,
+			    min(len, tp->t_maxseg));
+		}
+	}
+after_sack_rexmit:
+	/*
+	 * Get standard flags, and add SYN or FIN if requested by 'hidden'
+	 * state flags.
+	 */
+	if (tp->t_flags & TF_NEEDFIN)
+		flags |= TH_FIN;
+	if (tp->t_flags & TF_NEEDSYN)
+		flags |= TH_SYN;
+
+	SOCKBUF_LOCK(&so->so_snd);
+	/*
+	 * If in persist timeout with window of 0, send 1 byte.
+	 * Otherwise, if window is small but nonzero
+	 * and timer expired, we will send what we can
+	 * and go to transmit state.
+	 */
+	if (tp->t_flags & TF_FORCEDATA) {
+		if (sendwin == 0) {
+			/*
+			 * If we still have some data to send, then
+			 * clear the FIN bit.  Usually this would
+			 * happen below when it realizes that we
+			 * aren't sending all the data.  However,
+			 * if we have exactly 1 byte of unsent data,
+			 * then it won't clear the FIN bit below,
+			 * and if we are in persist state, we wind
+			 * up sending the packet without recording
+			 * that we sent the FIN bit.
+			 *
+			 * We can't just blindly clear the FIN bit,
+			 * because if we don't have any more data
+			 * to send then the probe will be the FIN
+			 * itself.
+			 */
+			if (off < so->so_snd.sb_cc)
+				flags &= ~TH_FIN;
+			sendwin = 1;
+		} else {
+			tcp_timer_activate(tp, TT_PERSIST, 0);
+			tp->t_rxtshift = 0;
+		}
+	}
+
+	/*
+	 * If snd_nxt == snd_max and we have transmitted a FIN, the
+	 * offset will be > 0 even if so_snd.sb_cc is 0, resulting in
+	 * a negative length.  This can also occur when TCP opens up
+	 * its congestion window while receiving additional duplicate
+	 * acks after fast-retransmit because TCP will reset snd_nxt
+	 * to snd_max after the fast-retransmit.
+	 *
+	 * In the normal retransmit-FIN-only case, however, snd_nxt will
+	 * be set to snd_una, the offset will be 0, and the length may
+	 * wind up 0.
+	 *
+	 * If sack_rxmit is true we are retransmitting from the scoreboard
+	 * in which case len is already set.
+	 */
+	if (sack_rxmit == 0) {
+		if (sack_bytes_rxmt == 0)
+			len = ((long)ulmin(so->so_snd.sb_cc, sendwin) - off);
+		else {
+			long cwin;
+
+                        /*
+			 * We are inside of a SACK recovery episode and are
+			 * sending new data, having retransmitted all the
+			 * data possible in the scoreboard.
+			 */
+			len = ((long)ulmin(so->so_snd.sb_cc, tp->snd_wnd) 
+			       - off);
+			/*
+			 * Don't remove this (len > 0) check !
+			 * We explicitly check for len > 0 here (although it 
+			 * isn't really necessary), to work around a gcc 
+			 * optimization issue - to force gcc to compute
+			 * len above. Without this check, the computation
+			 * of len is bungled by the optimizer.
+			 */
+			if (len > 0) {
+				cwin = tp->snd_cwnd - 
+					(tp->snd_nxt - tp->sack_newdata) -
+					sack_bytes_rxmt;
+				if (cwin < 0)
+					cwin = 0;
+				len = lmin(len, cwin);
+			}
+		}
+	}
+
+	/*
+	 * Lop off SYN bit if it has already been sent.  However, if this
+	 * is SYN-SENT state and if segment contains data and if we don't
+	 * know that foreign host supports TAO, suppress sending segment.
+	 */
+	if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
+		if (tp->t_state != TCPS_SYN_RECEIVED)
+			flags &= ~TH_SYN;
+		off--, len++;
+	}
+
+	/*
+	 * Be careful not to send data and/or FIN on SYN segments.
+	 * This measure is needed to prevent interoperability problems
+	 * with not fully conformant TCP implementations.
+	 */
+	if ((flags & TH_SYN) && (tp->t_flags & TF_NOOPT)) {
+		len = 0;
+		flags &= ~TH_FIN;
+	}
+
+	if (len < 0) {
+		/*
+		 * If FIN has been sent but not acked,
+		 * but we haven't been called to retransmit,
+		 * len will be < 0.  Otherwise, window shrank
+		 * after we sent into it.  If window shrank to 0,
+		 * cancel pending retransmit, pull snd_nxt back
+		 * to (closed) window, and set the persist timer
+		 * if it isn't already going.  If the window didn't
+		 * close completely, just wait for an ACK.
+		 */
+		len = 0;
+		if (sendwin == 0) {
+			tcp_timer_activate(tp, TT_REXMT, 0);
+			tp->t_rxtshift = 0;
+			tp->snd_nxt = tp->snd_una;
+			if (!tcp_timer_active(tp, TT_PERSIST))
+				tcp_setpersist(tp);
+		}
+	}
+
+	/* len will be >= 0 after this point. */
+	KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__));
+
+	/*
+	 * Automatic sizing of send socket buffer.  Often the send buffer
+	 * size is not optimally adjusted to the actual network conditions
+	 * at hand (delay bandwidth product).  Setting the buffer size too
+	 * small limits throughput on links with high bandwidth and high
+	 * delay (eg. trans-continental/oceanic links).  Setting the
+	 * buffer size too big consumes too much real kernel memory,
+	 * especially with many connections on busy servers.
+	 *
+	 * The criteria to step up the send buffer one notch are:
+	 *  1. receive window of remote host is larger than send buffer
+	 *     (with a fudge factor of 5/4th);
+	 *  2. send buffer is filled to 7/8th with data (so we actually
+	 *     have data to make use of it);
+	 *  3. send buffer fill has not hit maximal automatic size;
+	 *  4. our send window (slow start and cogestion controlled) is
+	 *     larger than sent but unacknowledged data in send buffer.
+	 *
+	 * The remote host receive window scaling factor may limit the
+	 * growing of the send buffer before it reaches its allowed
+	 * maximum.
+	 *
+	 * It scales directly with slow start or congestion window
+	 * and does at most one step per received ACK.  This fast
+	 * scaling has the drawback of growing the send buffer beyond
+	 * what is strictly necessary to make full use of a given
+	 * delay*bandwith product.  However testing has shown this not
+	 * to be much of an problem.  At worst we are trading wasting
+	 * of available bandwith (the non-use of it) for wasting some
+	 * socket buffer memory.
+	 *
+	 * TODO: Shrink send buffer during idle periods together
+	 * with congestion window.  Requires another timer.  Has to
+	 * wait for upcoming tcp timer rewrite.
+	 */
+	if (V_tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) {
+		if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat &&
+		    so->so_snd.sb_cc >= (so->so_snd.sb_hiwat / 8 * 7) &&
+		    so->so_snd.sb_cc < V_tcp_autosndbuf_max &&
+		    sendwin >= (so->so_snd.sb_cc - (tp->snd_nxt - tp->snd_una))) {
+			if (!sbreserve_locked(&so->so_snd,
+			    min(so->so_snd.sb_hiwat + V_tcp_autosndbuf_inc,
+			     V_tcp_autosndbuf_max), so, curthread))
+				so->so_snd.sb_flags &= ~SB_AUTOSIZE;
+		}
+	}
+
+	/*
+	 * Truncate to the maximum segment length or enable TCP Segmentation
+	 * Offloading (if supported by hardware) and ensure that FIN is removed
+	 * if the length no longer contains the last data byte.
+	 *
+	 * TSO may only be used if we are in a pure bulk sending state.  The
+	 * presence of TCP-MD5, SACK retransmits, SACK advertizements and
+	 * IP options prevent using TSO.  With TSO the TCP header is the same
+	 * (except for the sequence number) for all generated packets.  This
+	 * makes it impossible to transmit any options which vary per generated
+	 * segment or packet.
+	 *
+	 * The length of TSO bursts is limited to TCP_MAXWIN.  That limit and
+	 * removal of FIN (if not already catched here) are handled later after
+	 * the exact length of the TCP options are known.
+	 */
+#ifdef IPSEC
+	/*
+	 * Pre-calculate here as we save another lookup into the darknesses
+	 * of IPsec that way and can actually decide if TSO is ok.
+	 */
+	ipsec_optlen = ipsec_hdrsiz_tcp(tp);
+#endif
+	if (len > tp->t_maxseg) {
+		if ((tp->t_flags & TF_TSO) && V_tcp_do_tso &&
+		    ((tp->t_flags & TF_SIGNATURE) == 0) &&
+		    tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
+		    tp->t_inpcb->inp_options == NULL &&
+		    tp->t_inpcb->in6p_options == NULL
+#ifdef IPSEC
+		    && ipsec_optlen == 0
+#endif
+		    ) {
+			tso = 1;
+		} else {
+			len = tp->t_maxseg;
+			sendalot = 1;
+		}
+	}
+
+	if (sack_rxmit) {
+		if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc))
+			flags &= ~TH_FIN;
+	} else {
+		if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))
+			flags &= ~TH_FIN;
+	}
+
+	recwin = sbspace(&so->so_rcv);
+
+	/*
+	 * Sender silly window avoidance.   We transmit under the following
+	 * conditions when len is non-zero:
+	 *
+	 *	- We have a full segment (or more with TSO)
+	 *	- This is the last buffer in a write()/send() and we are
+	 *	  either idle or running NODELAY
+	 *	- we've timed out (e.g. persist timer)
+	 *	- we have more then 1/2 the maximum send window's worth of
+	 *	  data (receiver may be limited the window size)
+	 *	- we need to retransmit
+	 */
+	if (len) {
+		if (len >= tp->t_maxseg)
+			goto send;
+		/*
+		 * NOTE! on localhost connections an 'ack' from the remote
+		 * end may occur synchronously with the output and cause
+		 * us to flush a buffer queued with moretocome.  XXX
+		 *
+		 * note: the len + off check is almost certainly unnecessary.
+		 */
+		if (!(tp->t_flags & TF_MORETOCOME) &&	/* normal case */
+		    (idle || (tp->t_flags & TF_NODELAY)) &&
+		    len + off >= so->so_snd.sb_cc &&
+		    (tp->t_flags & TF_NOPUSH) == 0) {
+			goto send;
+		}
+		if (tp->t_flags & TF_FORCEDATA)		/* typ. timeout case */
+			goto send;
+		if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)
+			goto send;
+		if (SEQ_LT(tp->snd_nxt, tp->snd_max))	/* retransmit case */
+			goto send;
+		if (sack_rxmit)
+			goto send;
+	}
+
+	/*
+	 * Compare available window to amount of window
+	 * known to peer (as advertised window less
+	 * next expected input).  If the difference is at least two
+	 * max size segments, or at least 50% of the maximum possible
+	 * window, then want to send a window update to peer.
+	 * Skip this if the connection is in T/TCP half-open state.
+	 * Don't send pure window updates when the peer has closed
+	 * the connection and won't ever send more data.
+	 */
+	if (recwin > 0 && !(tp->t_flags & TF_NEEDSYN) &&
+	    !TCPS_HAVERCVDFIN(tp->t_state)) {
+		/*
+		 * "adv" is the amount we can increase the window,
+		 * taking into account that we are limited by
+		 * TCP_MAXWIN << tp->rcv_scale.
+		 */
+		long adv = min(recwin, (long)TCP_MAXWIN << tp->rcv_scale) -
+			(tp->rcv_adv - tp->rcv_nxt);
+
+		if (adv >= (long) (2 * tp->t_maxseg))
+			goto send;
+		if (2 * adv >= (long) so->so_rcv.sb_hiwat)
+			goto send;
+	}
+
+	/*
+	 * Send if we owe the peer an ACK, RST, SYN, or urgent data.  ACKNOW
+	 * is also a catch-all for the retransmit timer timeout case.
+	 */
+	if (tp->t_flags & TF_ACKNOW)
+		goto send;
+	if ((flags & TH_RST) ||
+	    ((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0))
+		goto send;
+	if (SEQ_GT(tp->snd_up, tp->snd_una))
+		goto send;
+	/*
+	 * If our state indicates that FIN should be sent
+	 * and we have not yet done so, then we need to send.
+	 */
+	if (flags & TH_FIN &&
+	    ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
+		goto send;
+	/*
+	 * In SACK, it is possible for tcp_output to fail to send a segment
+	 * after the retransmission timer has been turned off.  Make sure
+	 * that the retransmission timer is set.
+	 */
+	if ((tp->t_flags & TF_SACK_PERMIT) &&
+	    SEQ_GT(tp->snd_max, tp->snd_una) &&
+	    !tcp_timer_active(tp, TT_REXMT) &&
+	    !tcp_timer_active(tp, TT_PERSIST)) {
+		tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
+		goto just_return;
+	} 
+	/*
+	 * TCP window updates are not reliable, rather a polling protocol
+	 * using ``persist'' packets is used to insure receipt of window
+	 * updates.  The three ``states'' for the output side are:
+	 *	idle			not doing retransmits or persists
+	 *	persisting		to move a small or zero window
+	 *	(re)transmitting	and thereby not persisting
+	 *
+	 * tcp_timer_active(tp, TT_PERSIST)
+	 *	is true when we are in persist state.
+	 * (tp->t_flags & TF_FORCEDATA)
+	 *	is set when we are called to send a persist packet.
+	 * tcp_timer_active(tp, TT_REXMT)
+	 *	is set when we are retransmitting
+	 * The output side is idle when both timers are zero.
+	 *
+	 * If send window is too small, there is data to transmit, and no
+	 * retransmit or persist is pending, then go to persist state.
+	 * If nothing happens soon, send when timer expires:
+	 * if window is nonzero, transmit what we can,
+	 * otherwise force out a byte.
+	 */
+	if (so->so_snd.sb_cc && !tcp_timer_active(tp, TT_REXMT) &&
+	    !tcp_timer_active(tp, TT_PERSIST)) {
+		tp->t_rxtshift = 0;
+		tcp_setpersist(tp);
+	}
+
+	/*
+	 * No reason to send a segment, just return.
+	 */
+just_return:
+	SOCKBUF_UNLOCK(&so->so_snd);
+	return (0);
+
+send:
+	SOCKBUF_LOCK_ASSERT(&so->so_snd);
+	/*
+	 * Before ESTABLISHED, force sending of initial options
+	 * unless TCP set not to do any options.
+	 * NOTE: we assume that the IP/TCP header plus TCP options
+	 * always fit in a single mbuf, leaving room for a maximum
+	 * link header, i.e.
+	 *	max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MCLBYTES
+	 */
+	optlen = 0;
+#ifdef INET6
+	if (isipv6)
+		hdrlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
+	else
+#endif
+	hdrlen = sizeof (struct tcpiphdr);
+
+	/*
+	 * Compute options for segment.
+	 * We only have to care about SYN and established connection
+	 * segments.  Options for SYN-ACK segments are handled in TCP
+	 * syncache.
+	 */
+	if ((tp->t_flags & TF_NOOPT) == 0) {
+		to.to_flags = 0;
+		/* Maximum segment size. */
+		if (flags & TH_SYN) {
+			tp->snd_nxt = tp->iss;
+			to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc);
+			to.to_flags |= TOF_MSS;
+		}
+		/* Window scaling. */
+		if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) {
+			to.to_wscale = tp->request_r_scale;
+			to.to_flags |= TOF_SCALE;
+		}
+		/* Timestamps. */
+		if ((tp->t_flags & TF_RCVD_TSTMP) ||
+		    ((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) {
+			to.to_tsval = ticks + tp->ts_offset;
+			to.to_tsecr = tp->ts_recent;
+			to.to_flags |= TOF_TS;
+			/* Set receive buffer autosizing timestamp. */
+			if (tp->rfbuf_ts == 0 &&
+			    (so->so_rcv.sb_flags & SB_AUTOSIZE))
+				tp->rfbuf_ts = ticks;
+		}
+		/* Selective ACK's. */
+		if (tp->t_flags & TF_SACK_PERMIT) {
+			if (flags & TH_SYN)
+				to.to_flags |= TOF_SACKPERM;
+			else if (TCPS_HAVEESTABLISHED(tp->t_state) &&
+			    (tp->t_flags & TF_SACK_PERMIT) &&
+			    tp->rcv_numsacks > 0) {
+				to.to_flags |= TOF_SACK;
+				to.to_nsacks = tp->rcv_numsacks;
+				to.to_sacks = (u_char *)tp->sackblks;
+			}
+		}
+#ifdef TCP_SIGNATURE
+		/* TCP-MD5 (RFC2385). */
+		if (tp->t_flags & TF_SIGNATURE)
+			to.to_flags |= TOF_SIGNATURE;
+#endif /* TCP_SIGNATURE */
+
+		/* Processing the options. */
+		hdrlen += optlen = tcp_addoptions(&to, opt);
+	}
+
+#ifdef INET6
+	if (isipv6)
+		ipoptlen = ip6_optlen(tp->t_inpcb);
+	else
+#endif
+	if (tp->t_inpcb->inp_options)
+		ipoptlen = tp->t_inpcb->inp_options->m_len -
+				offsetof(struct ipoption, ipopt_list);
+	else
+		ipoptlen = 0;
+#ifdef IPSEC
+	ipoptlen += ipsec_optlen;
+#endif
+
+	/*
+	 * Adjust data length if insertion of options will
+	 * bump the packet length beyond the t_maxopd length.
+	 * Clear the FIN bit because we cut off the tail of
+	 * the segment.
+	 *
+	 * When doing TSO limit a burst to TCP_MAXWIN minus the
+	 * IP, TCP and Options length to keep ip->ip_len from
+	 * overflowing.  Prevent the last segment from being
+	 * fractional thus making them all equal sized and set
+	 * the flag to continue sending.  TSO is disabled when
+	 * IP options or IPSEC are present.
+	 */
+	if (len + optlen + ipoptlen > tp->t_maxopd) {
+		flags &= ~TH_FIN;
+		if (tso) {
+			if (len > TCP_MAXWIN - hdrlen - optlen) {
+				len = TCP_MAXWIN - hdrlen - optlen;
+				len = len - (len % (tp->t_maxopd - optlen));
+				sendalot = 1;
+			} else if (tp->t_flags & TF_NEEDFIN)
+				sendalot = 1;
+		} else {
+			len = tp->t_maxopd - optlen - ipoptlen;
+			sendalot = 1;
+		}
+	}
+
+/*#ifdef DIAGNOSTIC*/
+#ifdef INET6
+	if (max_linkhdr + hdrlen > MCLBYTES)
+#else
+	if (max_linkhdr + hdrlen > MHLEN)
+#endif
+		panic("tcphdr too big");
+/*#endif*/
+
+	/*
+	 * This KASSERT is here to catch edge cases at a well defined place.
+	 * Before, those had triggered (random) panic conditions further down.
+	 */
+	KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__));
+
+	/*
+	 * Grab a header mbuf, attaching a copy of data to
+	 * be transmitted, and initialize the header from
+	 * the template for sends on this connection.
+	 */
+	if (len) {
+		struct mbuf *mb;
+		u_int moff;
+
+		if ((tp->t_flags & TF_FORCEDATA) && len == 1)
+			TCPSTAT_INC(tcps_sndprobe);
+		else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
+			TCPSTAT_INC(tcps_sndrexmitpack);
+			TCPSTAT_ADD(tcps_sndrexmitbyte, len);
+		} else {
+			TCPSTAT_INC(tcps_sndpack);
+			TCPSTAT_ADD(tcps_sndbyte, len);
+		}
+#ifdef notyet
+		if ((m = m_copypack(so->so_snd.sb_mb, off,
+		    (int)len, max_linkhdr + hdrlen)) == 0) {
+			SOCKBUF_UNLOCK(&so->so_snd);
+			error = ENOBUFS;
+			goto out;
+		}
+		/*
+		 * m_copypack left space for our hdr; use it.
+		 */
+		m->m_len += hdrlen;
+		m->m_data -= hdrlen;
+#else
+		MGETHDR(m, M_DONTWAIT, MT_DATA);
+		if (m == NULL) {
+			SOCKBUF_UNLOCK(&so->so_snd);
+			error = ENOBUFS;
+			goto out;
+		}
+#ifdef INET6
+		if (MHLEN < hdrlen + max_linkhdr) {
+			MCLGET(m, M_DONTWAIT);
+			if ((m->m_flags & M_EXT) == 0) {
+				SOCKBUF_UNLOCK(&so->so_snd);
+				m_freem(m);
+				error = ENOBUFS;
+				goto out;
+			}
+		}
+#endif
+		m->m_data += max_linkhdr;
+		m->m_len = hdrlen;
+
+		/*
+		 * Start the m_copy functions from the closest mbuf
+		 * to the offset in the socket buffer chain.
+		 */
+		mb = sbsndptr(&so->so_snd, off, len, &moff);
+
+		if (len <= MHLEN - hdrlen - max_linkhdr) {
+			m_copydata(mb, moff, (int)len,
+			    mtod(m, caddr_t) + hdrlen);
+			m->m_len += len;
+		} else {
+			m->m_next = m_copy(mb, moff, (int)len);
+			if (m->m_next == NULL) {
+				SOCKBUF_UNLOCK(&so->so_snd);
+				(void) m_free(m);
+				error = ENOBUFS;
+				goto out;
+			}
+		}
+#endif
+		/*
+		 * If we're sending everything we've got, set PUSH.
+		 * (This will keep happy those implementations which only
+		 * give data to the user when a buffer fills or
+		 * a PUSH comes in.)
+		 */
+		if (off + len == so->so_snd.sb_cc)
+			flags |= TH_PUSH;
+		SOCKBUF_UNLOCK(&so->so_snd);
+	} else {
+		SOCKBUF_UNLOCK(&so->so_snd);
+		if (tp->t_flags & TF_ACKNOW)
+			TCPSTAT_INC(tcps_sndacks);
+		else if (flags & (TH_SYN|TH_FIN|TH_RST))
+			TCPSTAT_INC(tcps_sndctrl);
+		else if (SEQ_GT(tp->snd_up, tp->snd_una))
+			TCPSTAT_INC(tcps_sndurg);
+		else
+			TCPSTAT_INC(tcps_sndwinup);
+
+		MGETHDR(m, M_DONTWAIT, MT_DATA);
+		if (m == NULL) {
+			error = ENOBUFS;
+			goto out;
+		}
+#ifdef INET6
+		if (isipv6 && (MHLEN < hdrlen + max_linkhdr) &&
+		    MHLEN >= hdrlen) {
+			MH_ALIGN(m, hdrlen);
+		} else
+#endif
+		m->m_data += max_linkhdr;
+		m->m_len = hdrlen;
+	}
+	SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
+	m->m_pkthdr.rcvif = (struct ifnet *)0;
+#ifdef MAC
+	mac_inpcb_create_mbuf(tp->t_inpcb, m);
+#endif
+#ifdef INET6
+	if (isipv6) {
+		ip6 = mtod(m, struct ip6_hdr *);
+		th = (struct tcphdr *)(ip6 + 1);
+		tcpip_fillheaders(tp->t_inpcb, ip6, th);
+	} else
+#endif /* INET6 */
+	{
+		ip = mtod(m, struct ip *);
+		ipov = (struct ipovly *)ip;
+		th = (struct tcphdr *)(ip + 1);
+		tcpip_fillheaders(tp->t_inpcb, ip, th);
+	}
+
+	/*
+	 * Fill in fields, remembering maximum advertised
+	 * window for use in delaying messages about window sizes.
+	 * If resending a FIN, be sure not to use a new sequence number.
+	 */
+	if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
+	    tp->snd_nxt == tp->snd_max)
+		tp->snd_nxt--;
+	/*
+	 * If we are starting a connection, send ECN setup
+	 * SYN packet. If we are on a retransmit, we may
+	 * resend those bits a number of times as per
+	 * RFC 3168.
+	 */
+	if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
+		if (tp->t_rxtshift >= 1) {
+			if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
+				flags |= TH_ECE|TH_CWR;
+		} else
+			flags |= TH_ECE|TH_CWR;
+	}
+	
+	if (tp->t_state == TCPS_ESTABLISHED &&
+	    (tp->t_flags & TF_ECN_PERMIT)) {
+		/*
+		 * If the peer has ECN, mark data packets with
+		 * ECN capable transmission (ECT).
+		 * Ignore pure ack packets, retransmissions and window probes.
+		 */
+		if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
+		    !((tp->t_flags & TF_FORCEDATA) && len == 1)) {
+#ifdef INET6
+			if (isipv6)
+				ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
+			else
+#endif
+				ip->ip_tos |= IPTOS_ECN_ECT0;
+			TCPSTAT_INC(tcps_ecn_ect0);
+		}
+		
+		/*
+		 * Reply with proper ECN notifications.
+		 */
+		if (tp->t_flags & TF_ECN_SND_CWR) {
+			flags |= TH_CWR;
+			tp->t_flags &= ~TF_ECN_SND_CWR;
+		} 
+		if (tp->t_flags & TF_ECN_SND_ECE)
+			flags |= TH_ECE;
+	}
+	
+	/*
+	 * If we are doing retransmissions, then snd_nxt will
+	 * not reflect the first unsent octet.  For ACK only
+	 * packets, we do not want the sequence number of the
+	 * retransmitted packet, we want the sequence number
+	 * of the next unsent octet.  So, if there is no data
+	 * (and no SYN or FIN), use snd_max instead of snd_nxt
+	 * when filling in ti_seq.  But if we are in persist
+	 * state, snd_max might reflect one byte beyond the
+	 * right edge of the window, so use snd_nxt in that
+	 * case, since we know we aren't doing a retransmission.
+	 * (retransmit and persist are mutually exclusive...)
+	 */
+	if (sack_rxmit == 0) {
+		if (len || (flags & (TH_SYN|TH_FIN)) ||
+		    tcp_timer_active(tp, TT_PERSIST))
+			th->th_seq = htonl(tp->snd_nxt);
+		else
+			th->th_seq = htonl(tp->snd_max);
+	} else {
+		th->th_seq = htonl(p->rxmit);
+		p->rxmit += len;
+		tp->sackhint.sack_bytes_rexmit += len;
+	}
+	th->th_ack = htonl(tp->rcv_nxt);
+	if (optlen) {
+		bcopy(opt, th + 1, optlen);
+		th->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
+	}
+	th->th_flags = flags;
+	/*
+	 * Calculate receive window.  Don't shrink window,
+	 * but avoid silly window syndrome.
+	 */
+	if (recwin < (long)(so->so_rcv.sb_hiwat / 4) &&
+	    recwin < (long)tp->t_maxseg)
+		recwin = 0;
+	if (recwin < (long)(tp->rcv_adv - tp->rcv_nxt))
+		recwin = (long)(tp->rcv_adv - tp->rcv_nxt);
+	if (recwin > (long)TCP_MAXWIN << tp->rcv_scale)
+		recwin = (long)TCP_MAXWIN << tp->rcv_scale;
+
+	/*
+	 * According to RFC1323 the window field in a SYN (i.e., a <SYN>
+	 * or <SYN,ACK>) segment itself is never scaled.  The <SYN,ACK>
+	 * case is handled in syncache.
+	 */
+	if (flags & TH_SYN)
+		th->th_win = htons((u_short)
+				(min(sbspace(&so->so_rcv), TCP_MAXWIN)));
+	else
+		th->th_win = htons((u_short)(recwin >> tp->rcv_scale));
+
+	/*
+	 * Adjust the RXWIN0SENT flag - indicate that we have advertised
+	 * a 0 window.  This may cause the remote transmitter to stall.  This
+	 * flag tells soreceive() to disable delayed acknowledgements when
+	 * draining the buffer.  This can occur if the receiver is attempting
+	 * to read more data than can be buffered prior to transmitting on
+	 * the connection.
+	 */
+	if (th->th_win == 0)
+		tp->t_flags |= TF_RXWIN0SENT;
+	else
+		tp->t_flags &= ~TF_RXWIN0SENT;
+	if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
+		th->th_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
+		th->th_flags |= TH_URG;
+	} else
+		/*
+		 * If no urgent pointer to send, then we pull
+		 * the urgent pointer to the left edge of the send window
+		 * so that it doesn't drift into the send window on sequence
+		 * number wraparound.
+		 */
+		tp->snd_up = tp->snd_una;		/* drag it along */
+
+#ifdef TCP_SIGNATURE
+	if (tp->t_flags & TF_SIGNATURE) {
+		int sigoff = to.to_signature - opt;
+		tcp_signature_compute(m, 0, len, optlen,
+		    (u_char *)(th + 1) + sigoff, IPSEC_DIR_OUTBOUND);
+	}
+#endif
+
+	/*
+	 * Put TCP length in extended header, and then
+	 * checksum extended header and data.
+	 */
+	m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
+#ifdef INET6
+	if (isipv6)
+		/*
+		 * ip6_plen is not need to be filled now, and will be filled
+		 * in ip6_output.
+		 */
+		th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
+				       sizeof(struct tcphdr) + optlen + len);
+	else
+#endif /* INET6 */
+	{
+		m->m_pkthdr.csum_flags = CSUM_TCP;
+		m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+		    htons(sizeof(struct tcphdr) + IPPROTO_TCP + len + optlen));
+
+		/* IP version must be set here for ipv4/ipv6 checking later */
+		KASSERT(ip->ip_v == IPVERSION,
+		    ("%s: IP version incorrect: %d", __func__, ip->ip_v));
+	}
+
+	/*
+	 * Enable TSO and specify the size of the segments.
+	 * The TCP pseudo header checksum is always provided.
+	 * XXX: Fixme: This is currently not the case for IPv6.
+	 */
+	if (tso) {
+		KASSERT(len > tp->t_maxopd - optlen,
+		    ("%s: len <= tso_segsz", __func__));
+		m->m_pkthdr.csum_flags |= CSUM_TSO;
+		m->m_pkthdr.tso_segsz = tp->t_maxopd - optlen;
+	}
+
+	/*
+	 * In transmit state, time the transmission and arrange for
+	 * the retransmit.  In persist state, just set snd_max.
+	 */
+	if ((tp->t_flags & TF_FORCEDATA) == 0 || 
+	    !tcp_timer_active(tp, TT_PERSIST)) {
+		tcp_seq startseq = tp->snd_nxt;
+
+		/*
+		 * Advance snd_nxt over sequence space of this segment.
+		 */
+		if (flags & (TH_SYN|TH_FIN)) {
+			if (flags & TH_SYN)
+				tp->snd_nxt++;
+			if (flags & TH_FIN) {
+				tp->snd_nxt++;
+				tp->t_flags |= TF_SENTFIN;
+			}
+		}
+		if (sack_rxmit)
+			goto timer;
+		tp->snd_nxt += len;
+		if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
+			tp->snd_max = tp->snd_nxt;
+			/*
+			 * Time this transmission if not a retransmission and
+			 * not currently timing anything.
+			 */
+			if (tp->t_rtttime == 0) {
+				tp->t_rtttime = ticks;
+				tp->t_rtseq = startseq;
+				TCPSTAT_INC(tcps_segstimed);
+			}
+		}
+
+		/*
+		 * Set retransmit timer if not currently set,
+		 * and not doing a pure ack or a keep-alive probe.
+		 * Initial value for retransmit timer is smoothed
+		 * round-trip time + 2 * round-trip time variance.
+		 * Initialize shift counter which is used for backoff
+		 * of retransmit time.
+		 */
+timer:
+		if (!tcp_timer_active(tp, TT_REXMT) &&
+		    ((sack_rxmit && tp->snd_nxt != tp->snd_max) ||
+		     (tp->snd_nxt != tp->snd_una))) {
+			if (tcp_timer_active(tp, TT_PERSIST)) {
+				tcp_timer_activate(tp, TT_PERSIST, 0);
+				tp->t_rxtshift = 0;
+			}
+			tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
+		}
+	} else {
+		/*
+		 * Persist case, update snd_max but since we are in
+		 * persist mode (no window) we do not update snd_nxt.
+		 */
+		int xlen = len;
+		if (flags & TH_SYN)
+			++xlen;
+		if (flags & TH_FIN) {
+			++xlen;
+			tp->t_flags |= TF_SENTFIN;
+		}
+		if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max))
+			tp->snd_max = tp->snd_nxt + len;
+	}
+
+#ifdef TCPDEBUG
+	/*
+	 * Trace.
+	 */
+	if (so->so_options & SO_DEBUG) {
+		u_short save = 0;
+#ifdef INET6
+		if (!isipv6)
+#endif
+		{
+			save = ipov->ih_len;
+			ipov->ih_len = htons(m->m_pkthdr.len /* - hdrlen + (th->th_off << 2) */);
+		}
+		tcp_trace(TA_OUTPUT, tp->t_state, tp, mtod(m, void *), th, 0);
+#ifdef INET6
+		if (!isipv6)
+#endif
+		ipov->ih_len = save;
+	}
+#endif
+
+	/*
+	 * Fill in IP length and desired time to live and
+	 * send to IP level.  There should be a better way
+	 * to handle ttl and tos; we could keep them in
+	 * the template, but need a way to checksum without them.
+	 */
+	/*
+	 * m->m_pkthdr.len should have been set before cksum calcuration,
+	 * because in6_cksum() need it.
+	 */
+#ifdef INET6
+	if (isipv6) {
+		/*
+		 * we separately set hoplimit for every segment, since the
+		 * user might want to change the value via setsockopt.
+		 * Also, desired default hop limit might be changed via
+		 * Neighbor Discovery.
+		 */
+		ip6->ip6_hlim = in6_selecthlim(tp->t_inpcb, NULL);
+
+		/* TODO: IPv6 IP6TOS_ECT bit on */
+		error = ip6_output(m,
+			    tp->t_inpcb->in6p_outputopts, NULL,
+			    ((so->so_options & SO_DONTROUTE) ?
+			    IP_ROUTETOIF : 0), NULL, NULL, tp->t_inpcb);
+	} else
+#endif /* INET6 */
+    {
+	ip->ip_len = m->m_pkthdr.len;
+#ifdef INET6
+	if (tp->t_inpcb->inp_vflag & INP_IPV6PROTO)
+		ip->ip_ttl = in6_selecthlim(tp->t_inpcb, NULL);
+#endif /* INET6 */
+	/*
+	 * If we do path MTU discovery, then we set DF on every packet.
+	 * This might not be the best thing to do according to RFC3390
+	 * Section 2. However the tcp hostcache migitates the problem
+	 * so it affects only the first tcp connection with a host.
+	 *
+	 * NB: Don't set DF on small MTU/MSS to have a safe fallback.
+	 */
+	if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss)
+		ip->ip_off |= IP_DF;
+
+	error = ip_output(m, tp->t_inpcb->inp_options, NULL,
+	    ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0,
+	    tp->t_inpcb);
+    }
+	if (error) {
+
+		/*
+		 * We know that the packet was lost, so back out the
+		 * sequence number advance, if any.
+		 *
+		 * If the error is EPERM the packet got blocked by the
+		 * local firewall.  Normally we should terminate the
+		 * connection but the blocking may have been spurious
+		 * due to a firewall reconfiguration cycle.  So we treat
+		 * it like a packet loss and let the retransmit timer and
+		 * timeouts do their work over time.
+		 * XXX: It is a POLA question whether calling tcp_drop right
+		 * away would be the really correct behavior instead.
+		 */
+		if (((tp->t_flags & TF_FORCEDATA) == 0 ||
+		    !tcp_timer_active(tp, TT_PERSIST)) &&
+		    ((flags & TH_SYN) == 0) &&
+		    (error != EPERM)) {
+			if (sack_rxmit) {
+				p->rxmit -= len;
+				tp->sackhint.sack_bytes_rexmit -= len;
+				KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
+				    ("sackhint bytes rtx >= 0"));
+			} else
+				tp->snd_nxt -= len;
+		}
+out:
+		SOCKBUF_UNLOCK_ASSERT(&so->so_snd);	/* Check gotos. */
+		switch (error) {
+		case EPERM:
+			tp->t_softerror = error;
+			return (error);
+		case ENOBUFS:
+	                if (!tcp_timer_active(tp, TT_REXMT) &&
+			    !tcp_timer_active(tp, TT_PERSIST))
+	                        tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
+			tp->snd_cwnd = tp->t_maxseg;
+			return (0);
+		case EMSGSIZE:
+			/*
+			 * For some reason the interface we used initially
+			 * to send segments changed to another or lowered
+			 * its MTU.
+			 *
+			 * tcp_mtudisc() will find out the new MTU and as
+			 * its last action, initiate retransmission, so it
+			 * is important to not do so here.
+			 *
+			 * If TSO was active we either got an interface
+			 * without TSO capabilits or TSO was turned off.
+			 * Disable it for this connection as too and
+			 * immediatly retry with MSS sized segments generated
+			 * by this function.
+			 */
+			if (tso)
+				tp->t_flags &= ~TF_TSO;
+			tcp_mtudisc(tp->t_inpcb, 0);
+			return (0);
+		case EHOSTDOWN:
+		case EHOSTUNREACH:
+		case ENETDOWN:
+		case ENETUNREACH:
+			if (TCPS_HAVERCVDSYN(tp->t_state)) {
+				tp->t_softerror = error;
+				return (0);
+			}
+			/* FALLTHROUGH */
+		default:
+			return (error);
+		}
+	}
+	TCPSTAT_INC(tcps_sndtotal);
+
+	/*
+	 * Data sent (as far as we can tell).
+	 * If this advertises a larger window than any other segment,
+	 * then remember the size of the advertised window.
+	 * Any pending ACK has now been sent.
+	 */
+	if (recwin > 0 && SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv))
+		tp->rcv_adv = tp->rcv_nxt + recwin;
+	tp->last_ack_sent = tp->rcv_nxt;
+	tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
+	if (tcp_timer_active(tp, TT_DELACK))
+		tcp_timer_activate(tp, TT_DELACK, 0);
+#if 0
+	/*
+	 * This completely breaks TCP if newreno is turned on.  What happens
+	 * is that if delayed-acks are turned on on the receiver, this code
+	 * on the transmitter effectively destroys the TCP window, forcing
+	 * it to four packets (1.5Kx4 = 6K window).
+	 */
+	if (sendalot && (!V_tcp_do_newreno || --maxburst))
+		goto again;
+#endif
+	if (sendalot)
+		goto again;
+	return (0);
+}
+
+void
+tcp_setpersist(struct tcpcb *tp)
+{
+	int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
+	int tt;
+
+	if (tcp_timer_active(tp, TT_REXMT))
+		panic("tcp_setpersist: retransmit pending");
+	/*
+	 * Start/restart persistance timer.
+	 */
+	TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
+		      TCPTV_PERSMIN, TCPTV_PERSMAX);
+	tcp_timer_activate(tp, TT_PERSIST, tt);
+	if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
+		tp->t_rxtshift++;
+}
+
+/*
+ * Insert TCP options according to the supplied parameters to the place
+ * optp in a consistent way.  Can handle unaligned destinations.
+ *
+ * The order of the option processing is crucial for optimal packing and
+ * alignment for the scarce option space.
+ *
+ * The optimal order for a SYN/SYN-ACK segment is:
+ *   MSS (4) + NOP (1) + Window scale (3) + SACK permitted (2) +
+ *   Timestamp (10) + Signature (18) = 38 bytes out of a maximum of 40.
+ *
+ * The SACK options should be last.  SACK blocks consume 8*n+2 bytes.
+ * So a full size SACK blocks option is 34 bytes (with 4 SACK blocks).
+ * At minimum we need 10 bytes (to generate 1 SACK block).  If both
+ * TCP Timestamps (12 bytes) and TCP Signatures (18 bytes) are present,
+ * we only have 10 bytes for SACK options (40 - (12 + 18)).
+ */
+int
+tcp_addoptions(struct tcpopt *to, u_char *optp)
+{
+	u_int mask, optlen = 0;
+
+	for (mask = 1; mask < TOF_MAXOPT; mask <<= 1) {
+		if ((to->to_flags & mask) != mask)
+			continue;
+		if (optlen == TCP_MAXOLEN)
+			break;
+		switch (to->to_flags & mask) {
+		case TOF_MSS:
+			while (optlen % 4) {
+				optlen += TCPOLEN_NOP;
+				*optp++ = TCPOPT_NOP;
+			}
+			if (TCP_MAXOLEN - optlen < TCPOLEN_MAXSEG)
+				continue;
+			optlen += TCPOLEN_MAXSEG;
+			*optp++ = TCPOPT_MAXSEG;
+			*optp++ = TCPOLEN_MAXSEG;
+			to->to_mss = htons(to->to_mss);
+			bcopy((u_char *)&to->to_mss, optp, sizeof(to->to_mss));
+			optp += sizeof(to->to_mss);
+			break;
+		case TOF_SCALE:
+			while (!optlen || optlen % 2 != 1) {
+				optlen += TCPOLEN_NOP;
+				*optp++ = TCPOPT_NOP;
+			}
+			if (TCP_MAXOLEN - optlen < TCPOLEN_WINDOW)
+				continue;
+			optlen += TCPOLEN_WINDOW;
+			*optp++ = TCPOPT_WINDOW;
+			*optp++ = TCPOLEN_WINDOW;
+			*optp++ = to->to_wscale;
+			break;
+		case TOF_SACKPERM:
+			while (optlen % 2) {
+				optlen += TCPOLEN_NOP;
+				*optp++ = TCPOPT_NOP;
+			}
+			if (TCP_MAXOLEN - optlen < TCPOLEN_SACK_PERMITTED)
+				continue;
+			optlen += TCPOLEN_SACK_PERMITTED;
+			*optp++ = TCPOPT_SACK_PERMITTED;
+			*optp++ = TCPOLEN_SACK_PERMITTED;
+			break;
+		case TOF_TS:
+			while (!optlen || optlen % 4 != 2) {
+				optlen += TCPOLEN_NOP;
+				*optp++ = TCPOPT_NOP;
+			}
+			if (TCP_MAXOLEN - optlen < TCPOLEN_TIMESTAMP)
+				continue;
+			optlen += TCPOLEN_TIMESTAMP;
+			*optp++ = TCPOPT_TIMESTAMP;
+			*optp++ = TCPOLEN_TIMESTAMP;
+			to->to_tsval = htonl(to->to_tsval);
+			to->to_tsecr = htonl(to->to_tsecr);
+			bcopy((u_char *)&to->to_tsval, optp, sizeof(to->to_tsval));
+			optp += sizeof(to->to_tsval);
+			bcopy((u_char *)&to->to_tsecr, optp, sizeof(to->to_tsecr));
+			optp += sizeof(to->to_tsecr);
+			break;
+		case TOF_SIGNATURE:
+			{
+			int siglen = TCPOLEN_SIGNATURE - 2;
+
+			while (!optlen || optlen % 4 != 2) {
+				optlen += TCPOLEN_NOP;
+				*optp++ = TCPOPT_NOP;
+			}
+			if (TCP_MAXOLEN - optlen < TCPOLEN_SIGNATURE)
+				continue;
+			optlen += TCPOLEN_SIGNATURE;
+			*optp++ = TCPOPT_SIGNATURE;
+			*optp++ = TCPOLEN_SIGNATURE;
+			to->to_signature = optp;
+			while (siglen--)
+				 *optp++ = 0;
+			break;
+			}
+		case TOF_SACK:
+			{
+			int sackblks = 0;
+			struct sackblk *sack = (struct sackblk *)to->to_sacks;
+			tcp_seq sack_seq;
+
+			while (!optlen || optlen % 4 != 2) {
+				optlen += TCPOLEN_NOP;
+				*optp++ = TCPOPT_NOP;
+			}
+			if (TCP_MAXOLEN - optlen < TCPOLEN_SACKHDR + TCPOLEN_SACK)
+				continue;
+			optlen += TCPOLEN_SACKHDR;
+			*optp++ = TCPOPT_SACK;
+			sackblks = min(to->to_nsacks,
+					(TCP_MAXOLEN - optlen) / TCPOLEN_SACK);
+			*optp++ = TCPOLEN_SACKHDR + sackblks * TCPOLEN_SACK;
+			while (sackblks--) {
+				sack_seq = htonl(sack->start);
+				bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq));
+				optp += sizeof(sack_seq);
+				sack_seq = htonl(sack->end);
+				bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq));
+				optp += sizeof(sack_seq);
+				optlen += TCPOLEN_SACK;
+				sack++;
+			}
+			TCPSTAT_INC(tcps_sack_send_blocks);
+			break;
+			}
+		default:
+			panic("%s: unknown TCP option type", __func__);
+			break;
+		}
+	}
+
+	/* Terminate and pad TCP options to a 4 byte boundary. */
+	if (optlen % 4) {
+		optlen += TCPOLEN_EOL;
+		*optp++ = TCPOPT_EOL;
+	}
+	/*
+	 * According to RFC 793 (STD0007):
+	 *   "The content of the header beyond the End-of-Option option
+	 *    must be header padding (i.e., zero)."
+	 *   and later: "The padding is composed of zeros."
+	 */
+	while (optlen % 4) {
+		optlen += TCPOLEN_PAD;
+		*optp++ = TCPOPT_PAD;
+	}
+
+	KASSERT(optlen <= TCP_MAXOLEN, ("%s: TCP options too long", __func__));
+	return (optlen);
+}
diff --git a/freebsd/sys/netinet/tcp_reass.c b/freebsd/sys/netinet/tcp_reass.c
new file mode 100644
index 00000000..aea58740
--- /dev/null
+++ b/freebsd/sys/netinet/tcp_reass.c
@@ -0,0 +1,335 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_input.c	8.12 (Berkeley) 5/24/95
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_tcpdebug.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/sys/systm.h>
+
+#include <freebsd/vm/uma.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_options.h>
+#include <freebsd/netinet/ip6.h>
+#include <freebsd/netinet6/in6_pcb.h>
+#include <freebsd/netinet6/ip6_var.h>
+#include <freebsd/netinet6/nd6.h>
+#include <freebsd/netinet/tcp.h>
+#include <freebsd/netinet/tcp_fsm.h>
+#include <freebsd/netinet/tcp_seq.h>
+#include <freebsd/netinet/tcp_timer.h>
+#include <freebsd/netinet/tcp_var.h>
+#include <freebsd/netinet6/tcp6_var.h>
+#include <freebsd/netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <freebsd/netinet/tcp_debug.h>
+#endif /* TCPDEBUG */
+
+static int tcp_reass_sysctl_maxseg(SYSCTL_HANDLER_ARGS);
+static int tcp_reass_sysctl_qsize(SYSCTL_HANDLER_ARGS);
+
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
+    "TCP Segment Reassembly Queue");
+
+static VNET_DEFINE(int, tcp_reass_maxseg) = 0;
+#define	V_tcp_reass_maxseg		VNET(tcp_reass_maxseg)
+SYSCTL_VNET_PROC(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
+    &VNET_NAME(tcp_reass_maxseg), 0, &tcp_reass_sysctl_maxseg, "I",
+    "Global maximum number of TCP Segments in Reassembly Queue");
+
+static VNET_DEFINE(int, tcp_reass_qsize) = 0;
+#define	V_tcp_reass_qsize		VNET(tcp_reass_qsize)
+SYSCTL_VNET_PROC(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD,
+    &VNET_NAME(tcp_reass_qsize), 0, &tcp_reass_sysctl_qsize, "I",
+    "Global number of TCP Segments currently in Reassembly Queue");
+
+static VNET_DEFINE(int, tcp_reass_overflows) = 0;
+#define	V_tcp_reass_overflows		VNET(tcp_reass_overflows)
+SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD,
+    &VNET_NAME(tcp_reass_overflows), 0,
+    "Global number of TCP Segment Reassembly Queue Overflows");
+
+static VNET_DEFINE(uma_zone_t, tcp_reass_zone);
+#define	V_tcp_reass_zone		VNET(tcp_reass_zone)
+
+/* Initialize TCP reassembly queue */
+static void
+tcp_reass_zone_change(void *tag)
+{
+
+	V_tcp_reass_maxseg = nmbclusters / 16;
+	uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxseg);
+}
+
+void
+tcp_reass_init(void)
+{
+
+	V_tcp_reass_maxseg = nmbclusters / 16;
+	TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
+	    &V_tcp_reass_maxseg);
+	V_tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+	uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxseg);
+	EVENTHANDLER_REGISTER(nmbclusters_change,
+	    tcp_reass_zone_change, NULL, EVENTHANDLER_PRI_ANY);
+}
+
+#ifdef VIMAGE
+void
+tcp_reass_destroy(void)
+{
+
+	uma_zdestroy(V_tcp_reass_zone);
+}
+#endif
+
+void
+tcp_reass_flush(struct tcpcb *tp)
+{
+	struct tseg_qent *qe;
+
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	while ((qe = LIST_FIRST(&tp->t_segq)) != NULL) {
+		LIST_REMOVE(qe, tqe_q);
+		m_freem(qe->tqe_m);
+		uma_zfree(V_tcp_reass_zone, qe);
+		tp->t_segqlen--;
+	}
+
+	KASSERT((tp->t_segqlen == 0),
+	    ("TCP reass queue %p segment count is %d instead of 0 after flush.",
+	    tp, tp->t_segqlen));
+}
+
+static int
+tcp_reass_sysctl_maxseg(SYSCTL_HANDLER_ARGS)
+{
+	V_tcp_reass_maxseg = uma_zone_get_max(V_tcp_reass_zone);
+	return (sysctl_handle_int(oidp, arg1, arg2, req));
+}
+
+static int
+tcp_reass_sysctl_qsize(SYSCTL_HANDLER_ARGS)
+{
+	V_tcp_reass_qsize = uma_zone_get_cur(V_tcp_reass_zone);
+	return (sysctl_handle_int(oidp, arg1, arg2, req));
+}
+
+int
+tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
+{
+	struct tseg_qent *q;
+	struct tseg_qent *p = NULL;
+	struct tseg_qent *nq;
+	struct tseg_qent *te = NULL;
+	struct socket *so = tp->t_inpcb->inp_socket;
+	int flags;
+
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	/*
+	 * XXX: tcp_reass() is rather inefficient with its data structures
+	 * and should be rewritten (see NetBSD for optimizations).
+	 */
+
+	/*
+	 * Call with th==NULL after become established to
+	 * force pre-ESTABLISHED data up to user socket.
+	 */
+	if (th == NULL)
+		goto present;
+
+	/*
+	 * Limit the number of segments that can be queued to reduce the
+	 * potential for mbuf exhaustion. For best performance, we want to be
+	 * able to queue a full window's worth of segments. The size of the
+	 * socket receive buffer determines our advertised window and grows
+	 * automatically when socket buffer autotuning is enabled. Use it as the
+	 * basis for our queue limit.
+	 * Always let the missing segment through which caused this queue.
+	 * NB: Access to the socket buffer is left intentionally unlocked as we
+	 * can tolerate stale information here.
+	 *
+	 * XXXLAS: Using sbspace(so->so_rcv) instead of so->so_rcv.sb_hiwat
+	 * should work but causes packets to be dropped when they shouldn't.
+	 * Investigate why and re-evaluate the below limit after the behaviour
+	 * is understood.
+	 */
+	if (th->th_seq != tp->rcv_nxt &&
+	    tp->t_segqlen >= (so->so_rcv.sb_hiwat / tp->t_maxseg) + 1) {
+		V_tcp_reass_overflows++;
+		TCPSTAT_INC(tcps_rcvmemdrop);
+		m_freem(m);
+		*tlenp = 0;
+		return (0);
+	}
+
+	/*
+	 * Allocate a new queue entry. If we can't, or hit the zone limit
+	 * just drop the pkt.
+	 */
+	te = uma_zalloc(V_tcp_reass_zone, M_NOWAIT);
+	if (te == NULL) {
+		TCPSTAT_INC(tcps_rcvmemdrop);
+		m_freem(m);
+		*tlenp = 0;
+		return (0);
+	}
+	tp->t_segqlen++;
+
+	/*
+	 * Find a segment which begins after this one does.
+	 */
+	LIST_FOREACH(q, &tp->t_segq, tqe_q) {
+		if (SEQ_GT(q->tqe_th->th_seq, th->th_seq))
+			break;
+		p = q;
+	}
+
+	/*
+	 * If there is a preceding segment, it may provide some of
+	 * our data already.  If so, drop the data from the incoming
+	 * segment.  If it provides all of our data, drop us.
+	 */
+	if (p != NULL) {
+		int i;
+		/* conversion to int (in i) handles seq wraparound */
+		i = p->tqe_th->th_seq + p->tqe_len - th->th_seq;
+		if (i > 0) {
+			if (i >= *tlenp) {
+				TCPSTAT_INC(tcps_rcvduppack);
+				TCPSTAT_ADD(tcps_rcvdupbyte, *tlenp);
+				m_freem(m);
+				uma_zfree(V_tcp_reass_zone, te);
+				tp->t_segqlen--;
+				/*
+				 * Try to present any queued data
+				 * at the left window edge to the user.
+				 * This is needed after the 3-WHS
+				 * completes.
+				 */
+				goto present;	/* ??? */
+			}
+			m_adj(m, i);
+			*tlenp -= i;
+			th->th_seq += i;
+		}
+	}
+	TCPSTAT_INC(tcps_rcvoopack);
+	TCPSTAT_ADD(tcps_rcvoobyte, *tlenp);
+
+	/*
+	 * While we overlap succeeding segments trim them or,
+	 * if they are completely covered, dequeue them.
+	 */
+	while (q) {
+		int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq;
+		if (i <= 0)
+			break;
+		if (i < q->tqe_len) {
+			q->tqe_th->th_seq += i;
+			q->tqe_len -= i;
+			m_adj(q->tqe_m, i);
+			break;
+		}
+
+		nq = LIST_NEXT(q, tqe_q);
+		LIST_REMOVE(q, tqe_q);
+		m_freem(q->tqe_m);
+		uma_zfree(V_tcp_reass_zone, q);
+		tp->t_segqlen--;
+		q = nq;
+	}
+
+	/* Insert the new segment queue entry into place. */
+	te->tqe_m = m;
+	te->tqe_th = th;
+	te->tqe_len = *tlenp;
+
+	if (p == NULL) {
+		LIST_INSERT_HEAD(&tp->t_segq, te, tqe_q);
+	} else {
+		LIST_INSERT_AFTER(p, te, tqe_q);
+	}
+
+present:
+	/*
+	 * Present data to user, advancing rcv_nxt through
+	 * completed sequence space.
+	 */
+	if (!TCPS_HAVEESTABLISHED(tp->t_state))
+		return (0);
+	q = LIST_FIRST(&tp->t_segq);
+	if (!q || q->tqe_th->th_seq != tp->rcv_nxt)
+		return (0);
+	SOCKBUF_LOCK(&so->so_rcv);
+	do {
+		tp->rcv_nxt += q->tqe_len;
+		flags = q->tqe_th->th_flags & TH_FIN;
+		nq = LIST_NEXT(q, tqe_q);
+		LIST_REMOVE(q, tqe_q);
+		if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
+			m_freem(q->tqe_m);
+		else
+			sbappendstream_locked(&so->so_rcv, q->tqe_m);
+		uma_zfree(V_tcp_reass_zone, q);
+		tp->t_segqlen--;
+		q = nq;
+	} while (q && q->tqe_th->th_seq == tp->rcv_nxt);
+	ND6_HINT(tp);
+	sorwakeup_locked(so);
+	return (flags);
+}
diff --git a/freebsd/sys/netinet/tcp_sack.c b/freebsd/sys/netinet/tcp_sack.c
new file mode 100644
index 00000000..94bae57b
--- /dev/null
+++ b/freebsd/sys/netinet/tcp_sack.c
@@ -0,0 +1,687 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
+ *	The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_sack.c	8.12 (Berkeley) 5/24/95
+ */
+
+/*-
+ *	@@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
+ *
+ * NRL grants permission for redistribution and use in source and binary
+ * forms, with or without modification, of the software and documentation
+ * created at NRL provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgements:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ *	This product includes software developed at the Information
+ *	Technology Division, US Naval Research Laboratory.
+ * 4. Neither the name of the NRL nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
+ * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation
+ * are those of the authors and should not be interpreted as representing
+ * official policies, either expressed or implied, of the US Naval
+ * Research Laboratory (NRL).
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_tcpdebug.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/proc.h>		/* for proc0 declaration */
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/sys/systm.h>
+
+#include <freebsd/machine/cpu.h>	/* before tcp_seq.h, for tcp_random18() */
+
+#include <freebsd/vm/uma.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip6.h>
+#include <freebsd/netinet/icmp6.h>
+#include <freebsd/netinet6/nd6.h>
+#include <freebsd/netinet6/ip6_var.h>
+#include <freebsd/netinet6/in6_pcb.h>
+#include <freebsd/netinet/tcp.h>
+#include <freebsd/netinet/tcp_fsm.h>
+#include <freebsd/netinet/tcp_seq.h>
+#include <freebsd/netinet/tcp_timer.h>
+#include <freebsd/netinet/tcp_var.h>
+#include <freebsd/netinet6/tcp6_var.h>
+#include <freebsd/netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <freebsd/netinet/tcp_debug.h>
+#endif /* TCPDEBUG */
+
+#include <freebsd/machine/in_cksum.h>
+
+VNET_DECLARE(struct uma_zone *, sack_hole_zone);
+#define	V_sack_hole_zone		VNET(sack_hole_zone)
+
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, 0, "TCP SACK");
+VNET_DEFINE(int, tcp_do_sack) = 1;
+#define	V_tcp_do_sack			VNET(tcp_do_sack)
+SYSCTL_VNET_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_RW,
+    &VNET_NAME(tcp_do_sack), 0, "Enable/Disable TCP SACK support");
+
+VNET_DEFINE(int, tcp_sack_maxholes) = 128;
+#define	V_tcp_sack_maxholes		VNET(tcp_sack_maxholes)
+SYSCTL_VNET_INT(_net_inet_tcp_sack, OID_AUTO, maxholes, CTLFLAG_RW,
+    &VNET_NAME(tcp_sack_maxholes), 0,
+    "Maximum number of TCP SACK holes allowed per connection");
+
+VNET_DEFINE(int, tcp_sack_globalmaxholes) = 65536;
+#define	V_tcp_sack_globalmaxholes	VNET(tcp_sack_globalmaxholes)
+SYSCTL_VNET_INT(_net_inet_tcp_sack, OID_AUTO, globalmaxholes, CTLFLAG_RW,
+    &VNET_NAME(tcp_sack_globalmaxholes), 0, 
+    "Global maximum number of TCP SACK holes");
+
+VNET_DEFINE(int, tcp_sack_globalholes) = 0;
+#define	V_tcp_sack_globalholes		VNET(tcp_sack_globalholes)
+SYSCTL_VNET_INT(_net_inet_tcp_sack, OID_AUTO, globalholes, CTLFLAG_RD,
+    &VNET_NAME(tcp_sack_globalholes), 0,
+    "Global number of TCP SACK holes currently allocated");
+
+/*
+ * This function is called upon receipt of new valid data (while not in
+ * header prediction mode), and it updates the ordered list of sacks.
+ */
+void
+tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
+{
+	/*
+	 * First reported block MUST be the most recent one.  Subsequent
+	 * blocks SHOULD be in the order in which they arrived at the
+	 * receiver.  These two conditions make the implementation fully
+	 * compliant with RFC 2018.
+	 */
+	struct sackblk head_blk, saved_blks[MAX_SACK_BLKS];
+	int num_head, num_saved, i;
+
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	/* Check arguments. */
+	KASSERT(SEQ_LT(rcv_start, rcv_end), ("rcv_start < rcv_end"));
+
+	/* SACK block for the received segment. */
+	head_blk.start = rcv_start;
+	head_blk.end = rcv_end;
+
+	/*
+	 * Merge updated SACK blocks into head_blk, and save unchanged SACK
+	 * blocks into saved_blks[].  num_saved will have the number of the
+	 * saved SACK blocks.
+	 */
+	num_saved = 0;
+	for (i = 0; i < tp->rcv_numsacks; i++) {
+		tcp_seq start = tp->sackblks[i].start;
+		tcp_seq end = tp->sackblks[i].end;
+		if (SEQ_GEQ(start, end) || SEQ_LEQ(start, tp->rcv_nxt)) {
+			/*
+			 * Discard this SACK block.
+			 */
+		} else if (SEQ_LEQ(head_blk.start, end) &&
+			   SEQ_GEQ(head_blk.end, start)) {
+			/*
+			 * Merge this SACK block into head_blk.  This SACK
+			 * block itself will be discarded.
+			 */
+			if (SEQ_GT(head_blk.start, start))
+				head_blk.start = start;
+			if (SEQ_LT(head_blk.end, end))
+				head_blk.end = end;
+		} else {
+			/*
+			 * Save this SACK block.
+			 */
+			saved_blks[num_saved].start = start;
+			saved_blks[num_saved].end = end;
+			num_saved++;
+		}
+	}
+
+	/*
+	 * Update SACK list in tp->sackblks[].
+	 */
+	num_head = 0;
+	if (SEQ_GT(head_blk.start, tp->rcv_nxt)) {
+		/*
+		 * The received data segment is an out-of-order segment.  Put
+		 * head_blk at the top of SACK list.
+		 */
+		tp->sackblks[0] = head_blk;
+		num_head = 1;
+		/*
+		 * If the number of saved SACK blocks exceeds its limit,
+		 * discard the last SACK block.
+		 */
+		if (num_saved >= MAX_SACK_BLKS)
+			num_saved--;
+	}
+	if (num_saved > 0) {
+		/*
+		 * Copy the saved SACK blocks back.
+		 */
+		bcopy(saved_blks, &tp->sackblks[num_head],
+		      sizeof(struct sackblk) * num_saved);
+	}
+
+	/* Save the number of SACK blocks. */
+	tp->rcv_numsacks = num_head + num_saved;
+}
+
+/*
+ * Delete all receiver-side SACK information.
+ */
+void
+tcp_clean_sackreport(struct tcpcb *tp)
+{
+	int i;
+
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+	tp->rcv_numsacks = 0;
+	for (i = 0; i < MAX_SACK_BLKS; i++)
+		tp->sackblks[i].start = tp->sackblks[i].end=0;
+}
+
+/*
+ * Allocate struct sackhole.
+ */
+static struct sackhole *
+tcp_sackhole_alloc(struct tcpcb *tp, tcp_seq start, tcp_seq end)
+{
+	struct sackhole *hole;
+
+	if (tp->snd_numholes >= V_tcp_sack_maxholes ||
+	    V_tcp_sack_globalholes >= V_tcp_sack_globalmaxholes) {
+		TCPSTAT_INC(tcps_sack_sboverflow);
+		return NULL;
+	}
+
+	hole = (struct sackhole *)uma_zalloc(V_sack_hole_zone, M_NOWAIT);
+	if (hole == NULL)
+		return NULL;
+
+	hole->start = start;
+	hole->end = end;
+	hole->rxmit = start;
+
+	tp->snd_numholes++;
+	atomic_add_int(&V_tcp_sack_globalholes, 1);
+
+	return hole;
+}
+
+/*
+ * Free struct sackhole.
+ */
+static void
+tcp_sackhole_free(struct tcpcb *tp, struct sackhole *hole)
+{
+
+	uma_zfree(V_sack_hole_zone, hole);
+
+	tp->snd_numholes--;
+	atomic_subtract_int(&V_tcp_sack_globalholes, 1);
+
+	KASSERT(tp->snd_numholes >= 0, ("tp->snd_numholes >= 0"));
+	KASSERT(V_tcp_sack_globalholes >= 0, ("tcp_sack_globalholes >= 0"));
+}
+
+/*
+ * Insert new SACK hole into scoreboard.
+ */
+static struct sackhole *
+tcp_sackhole_insert(struct tcpcb *tp, tcp_seq start, tcp_seq end,
+    struct sackhole *after)
+{
+	struct sackhole *hole;
+
+	/* Allocate a new SACK hole. */
+	hole = tcp_sackhole_alloc(tp, start, end);
+	if (hole == NULL)
+		return NULL;
+
+	/* Insert the new SACK hole into scoreboard. */
+	if (after != NULL)
+		TAILQ_INSERT_AFTER(&tp->snd_holes, after, hole, scblink);
+	else
+		TAILQ_INSERT_TAIL(&tp->snd_holes, hole, scblink);
+
+	/* Update SACK hint. */
+	if (tp->sackhint.nexthole == NULL)
+		tp->sackhint.nexthole = hole;
+
+	return hole;
+}
+
+/*
+ * Remove SACK hole from scoreboard.
+ */
+static void
+tcp_sackhole_remove(struct tcpcb *tp, struct sackhole *hole)
+{
+
+	/* Update SACK hint. */
+	if (tp->sackhint.nexthole == hole)
+		tp->sackhint.nexthole = TAILQ_NEXT(hole, scblink);
+
+	/* Remove this SACK hole. */
+	TAILQ_REMOVE(&tp->snd_holes, hole, scblink);
+
+	/* Free this SACK hole. */
+	tcp_sackhole_free(tp, hole);
+}
+
+/*
+ * Process cumulative ACK and the TCP SACK option to update the scoreboard.
+ * tp->snd_holes is an ordered list of holes (oldest to newest, in terms of
+ * the sequence space).
+ */
+void
+tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
+{
+	struct sackhole *cur, *temp;
+	struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp;
+	int i, j, num_sack_blks;
+
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	num_sack_blks = 0;
+	/*
+	 * If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist,
+	 * treat [SND.UNA, SEG.ACK) as if it is a SACK block.
+	 */
+	if (SEQ_LT(tp->snd_una, th_ack) && !TAILQ_EMPTY(&tp->snd_holes)) {
+		sack_blocks[num_sack_blks].start = tp->snd_una;
+		sack_blocks[num_sack_blks++].end = th_ack;
+	}
+	/*
+	 * Append received valid SACK blocks to sack_blocks[], but only if we
+	 * received new blocks from the other side.
+	 */
+	if (to->to_flags & TOF_SACK) {
+		for (i = 0; i < to->to_nsacks; i++) {
+			bcopy((to->to_sacks + i * TCPOLEN_SACK),
+			    &sack, sizeof(sack));
+			sack.start = ntohl(sack.start);
+			sack.end = ntohl(sack.end);
+			if (SEQ_GT(sack.end, sack.start) &&
+			    SEQ_GT(sack.start, tp->snd_una) &&
+			    SEQ_GT(sack.start, th_ack) &&
+			    SEQ_LT(sack.start, tp->snd_max) &&
+			    SEQ_GT(sack.end, tp->snd_una) &&
+			    SEQ_LEQ(sack.end, tp->snd_max))
+				sack_blocks[num_sack_blks++] = sack;
+		}
+	}
+	/*
+	 * Return if SND.UNA is not advanced and no valid SACK block is
+	 * received.
+	 */
+	if (num_sack_blks == 0)
+		return;
+
+	/*
+	 * Sort the SACK blocks so we can update the scoreboard with just one
+	 * pass. The overhead of sorting upto 4+1 elements is less than
+	 * making upto 4+1 passes over the scoreboard.
+	 */
+	for (i = 0; i < num_sack_blks; i++) {
+		for (j = i + 1; j < num_sack_blks; j++) {
+			if (SEQ_GT(sack_blocks[i].end, sack_blocks[j].end)) {
+				sack = sack_blocks[i];
+				sack_blocks[i] = sack_blocks[j];
+				sack_blocks[j] = sack;
+			}
+		}
+	}
+	if (TAILQ_EMPTY(&tp->snd_holes))
+		/*
+		 * Empty scoreboard. Need to initialize snd_fack (it may be
+		 * uninitialized or have a bogus value). Scoreboard holes
+		 * (from the sack blocks received) are created later below
+		 * (in the logic that adds holes to the tail of the
+		 * scoreboard).
+		 */
+		tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack);
+	/*
+	 * In the while-loop below, incoming SACK blocks (sack_blocks[]) and
+	 * SACK holes (snd_holes) are traversed from their tails with just
+	 * one pass in order to reduce the number of compares especially when
+	 * the bandwidth-delay product is large.
+	 *
+	 * Note: Typically, in the first RTT of SACK recovery, the highest
+	 * three or four SACK blocks with the same ack number are received.
+	 * In the second RTT, if retransmitted data segments are not lost,
+	 * the highest three or four SACK blocks with ack number advancing
+	 * are received.
+	 */
+	sblkp = &sack_blocks[num_sack_blks - 1];	/* Last SACK block */
+	if (SEQ_LT(tp->snd_fack, sblkp->start)) {
+		/*
+		 * The highest SACK block is beyond fack.  Append new SACK
+		 * hole at the tail.  If the second or later highest SACK
+		 * blocks are also beyond the current fack, they will be
+		 * inserted by way of hole splitting in the while-loop below.
+		 */
+		temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL);
+		if (temp != NULL) {
+			tp->snd_fack = sblkp->end;
+			/* Go to the previous sack block. */
+			sblkp--;
+		} else {
+			/* 
+			 * We failed to add a new hole based on the current 
+			 * sack block.  Skip over all the sack blocks that 
+			 * fall completely to the right of snd_fack and
+			 * proceed to trim the scoreboard based on the
+			 * remaining sack blocks.  This also trims the
+			 * scoreboard for th_ack (which is sack_blocks[0]).
+			 */
+			while (sblkp >= sack_blocks && 
+			       SEQ_LT(tp->snd_fack, sblkp->start))
+				sblkp--;
+			if (sblkp >= sack_blocks && 
+			    SEQ_LT(tp->snd_fack, sblkp->end))
+				tp->snd_fack = sblkp->end;
+		}
+	} else if (SEQ_LT(tp->snd_fack, sblkp->end))
+		/* fack is advanced. */
+		tp->snd_fack = sblkp->end;
+	/* We must have at least one SACK hole in scoreboard. */
+	KASSERT(!TAILQ_EMPTY(&tp->snd_holes),
+	    ("SACK scoreboard must not be empty"));
+	cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole. */
+	/*
+	 * Since the incoming sack blocks are sorted, we can process them
+	 * making one sweep of the scoreboard.
+	 */
+	while (sblkp >= sack_blocks  && cur != NULL) {
+		if (SEQ_GEQ(sblkp->start, cur->end)) {
+			/*
+			 * SACKs data beyond the current hole.  Go to the
+			 * previous sack block.
+			 */
+			sblkp--;
+			continue;
+		}
+		if (SEQ_LEQ(sblkp->end, cur->start)) {
+			/*
+			 * SACKs data before the current hole.  Go to the
+			 * previous hole.
+			 */
+			cur = TAILQ_PREV(cur, sackhole_head, scblink);
+			continue;
+		}
+		tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start);
+		KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
+		    ("sackhint bytes rtx >= 0"));
+		if (SEQ_LEQ(sblkp->start, cur->start)) {
+			/* Data acks at least the beginning of hole. */
+			if (SEQ_GEQ(sblkp->end, cur->end)) {
+				/* Acks entire hole, so delete hole. */
+				temp = cur;
+				cur = TAILQ_PREV(cur, sackhole_head, scblink);
+				tcp_sackhole_remove(tp, temp);
+				/*
+				 * The sack block may ack all or part of the
+				 * next hole too, so continue onto the next
+				 * hole.
+				 */
+				continue;
+			} else {
+				/* Move start of hole forward. */
+				cur->start = sblkp->end;
+				cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
+			}
+		} else {
+			/* Data acks at least the end of hole. */
+			if (SEQ_GEQ(sblkp->end, cur->end)) {
+				/* Move end of hole backward. */
+				cur->end = sblkp->start;
+				cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
+			} else {
+				/*
+				 * ACKs some data in middle of a hole; need
+				 * to split current hole
+				 */
+				temp = tcp_sackhole_insert(tp, sblkp->end,
+				    cur->end, cur);
+				if (temp != NULL) {
+					if (SEQ_GT(cur->rxmit, temp->rxmit)) {
+						temp->rxmit = cur->rxmit;
+						tp->sackhint.sack_bytes_rexmit
+						    += (temp->rxmit
+						    - temp->start);
+					}
+					cur->end = sblkp->start;
+					cur->rxmit = SEQ_MIN(cur->rxmit,
+					    cur->end);
+				}
+			}
+		}
+		tp->sackhint.sack_bytes_rexmit += (cur->rxmit - cur->start);
+		/*
+		 * Testing sblkp->start against cur->start tells us whether
+		 * we're done with the sack block or the sack hole.
+		 * Accordingly, we advance one or the other.
+		 */
+		if (SEQ_LEQ(sblkp->start, cur->start))
+			cur = TAILQ_PREV(cur, sackhole_head, scblink);
+		else
+			sblkp--;
+	}
+}
+
+/*
+ * Free all SACK holes to clear the scoreboard.
+ */
+void
+tcp_free_sackholes(struct tcpcb *tp)
+{
+	struct sackhole *q;
+
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+	while ((q = TAILQ_FIRST(&tp->snd_holes)) != NULL)
+		tcp_sackhole_remove(tp, q);
+	tp->sackhint.sack_bytes_rexmit = 0;
+
+	KASSERT(tp->snd_numholes == 0, ("tp->snd_numholes == 0"));
+	KASSERT(tp->sackhint.nexthole == NULL,
+		("tp->sackhint.nexthole == NULL"));
+}
+
+/*
+ * Partial ack handling within a sack recovery episode.  Keeping this very
+ * simple for now.  When a partial ack is received, force snd_cwnd to a value
+ * that will allow the sender to transmit no more than 2 segments.  If
+ * necessary, a better scheme can be adopted at a later point, but for now,
+ * the goal is to prevent the sender from bursting a large amount of data in
+ * the midst of sack recovery.
+ */
+void
+tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th)
+{
+	int num_segs = 1;
+
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+	tcp_timer_activate(tp, TT_REXMT, 0);
+	tp->t_rtttime = 0;
+	/* Send one or 2 segments based on how much new data was acked. */
+	if (((th->th_ack - tp->snd_una) / tp->t_maxseg) > 2)
+		num_segs = 2;
+	tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit +
+	    (tp->snd_nxt - tp->sack_newdata) + num_segs * tp->t_maxseg);
+	if (tp->snd_cwnd > tp->snd_ssthresh)
+		tp->snd_cwnd = tp->snd_ssthresh;
+	tp->t_flags |= TF_ACKNOW;
+	(void) tcp_output(tp);
+}
+
+#if 0
+/*
+ * Debug version of tcp_sack_output() that walks the scoreboard.  Used for
+ * now to sanity check the hint.
+ */
+static struct sackhole *
+tcp_sack_output_debug(struct tcpcb *tp, int *sack_bytes_rexmt)
+{
+	struct sackhole *p;
+
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+	*sack_bytes_rexmt = 0;
+	TAILQ_FOREACH(p, &tp->snd_holes, scblink) {
+		if (SEQ_LT(p->rxmit, p->end)) {
+			if (SEQ_LT(p->rxmit, tp->snd_una)) {/* old SACK hole */
+				continue;
+			}
+			*sack_bytes_rexmt += (p->rxmit - p->start);
+			break;
+		}
+		*sack_bytes_rexmt += (p->rxmit - p->start);
+	}
+	return (p);
+}
+#endif
+
+/*
+ * Returns the next hole to retransmit and the number of retransmitted bytes
+ * from the scoreboard.  We store both the next hole and the number of
+ * retransmitted bytes as hints (and recompute these on the fly upon SACK/ACK
+ * reception).  This avoids scoreboard traversals completely.
+ *
+ * The loop here will traverse *at most* one link.  Here's the argument.  For
+ * the loop to traverse more than 1 link before finding the next hole to
+ * retransmit, we would need to have at least 1 node following the current
+ * hint with (rxmit == end).  But, for all holes following the current hint,
+ * (start == rxmit), since we have not yet retransmitted from them.
+ * Therefore, in order to traverse more 1 link in the loop below, we need to
+ * have at least one node following the current hint with (start == rxmit ==
+ * end).  But that can't happen, (start == end) means that all the data in
+ * that hole has been sacked, in which case, the hole would have been removed
+ * from the scoreboard.
+ */
+struct sackhole *
+tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
+{
+	struct sackhole *hole = NULL;
+
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+	*sack_bytes_rexmt = tp->sackhint.sack_bytes_rexmit;
+	hole = tp->sackhint.nexthole;
+	if (hole == NULL || SEQ_LT(hole->rxmit, hole->end))
+		goto out;
+	while ((hole = TAILQ_NEXT(hole, scblink)) != NULL) {
+		if (SEQ_LT(hole->rxmit, hole->end)) {
+			tp->sackhint.nexthole = hole;
+			break;
+		}
+	}
+out:
+	return (hole);
+}
+
+/*
+ * After a timeout, the SACK list may be rebuilt.  This SACK information
+ * should be used to avoid retransmitting SACKed data.  This function
+ * traverses the SACK list to see if snd_nxt should be moved forward.
+ */
+void
+tcp_sack_adjust(struct tcpcb *tp)
+{
+	struct sackhole *p, *cur = TAILQ_FIRST(&tp->snd_holes);
+
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+	if (cur == NULL)
+		return; /* No holes */
+	if (SEQ_GEQ(tp->snd_nxt, tp->snd_fack))
+		return; /* We're already beyond any SACKed blocks */
+	/*-
+	 * Two cases for which we want to advance snd_nxt:
+	 * i) snd_nxt lies between end of one hole and beginning of another
+	 * ii) snd_nxt lies between end of last hole and snd_fack
+	 */
+	while ((p = TAILQ_NEXT(cur, scblink)) != NULL) {
+		if (SEQ_LT(tp->snd_nxt, cur->end))
+			return;
+		if (SEQ_GEQ(tp->snd_nxt, p->start))
+			cur = p;
+		else {
+			tp->snd_nxt = p->start;
+			return;
+		}
+	}
+	if (SEQ_LT(tp->snd_nxt, cur->end))
+		return;
+	tp->snd_nxt = tp->snd_fack;
+}
diff --git a/freebsd/sys/netinet/tcp_seq.h b/freebsd/sys/netinet/tcp_seq.h
new file mode 100644
index 00000000..8af7b0ab
--- /dev/null
+++ b/freebsd/sys/netinet/tcp_seq.h
@@ -0,0 +1,68 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_seq.h	8.3 (Berkeley) 6/21/95
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_TCP_SEQ_HH_
+#define _NETINET_TCP_SEQ_HH_
+/*
+ * TCP sequence numbers are 32 bit integers operated
+ * on with modular arithmetic.  These macros can be
+ * used to compare such integers.
+ */
+#define	SEQ_LT(a,b)	((int)((a)-(b)) < 0)
+#define	SEQ_LEQ(a,b)	((int)((a)-(b)) <= 0)
+#define	SEQ_GT(a,b)	((int)((a)-(b)) > 0)
+#define	SEQ_GEQ(a,b)	((int)((a)-(b)) >= 0)
+
+#define	SEQ_MIN(a, b)	((SEQ_LT(a, b)) ? (a) : (b))
+#define	SEQ_MAX(a, b)	((SEQ_GT(a, b)) ? (a) : (b))
+
+/* for modulo comparisons of timestamps */
+#define TSTMP_LT(a,b)	((int)((a)-(b)) < 0)
+#define TSTMP_GT(a,b)	((int)((a)-(b)) > 0)
+#define TSTMP_GEQ(a,b)	((int)((a)-(b)) >= 0)
+
+/*
+ * Macros to initialize tcp sequence numbers for
+ * send and receive from initial send and receive
+ * sequence numbers.
+ */
+#define	tcp_rcvseqinit(tp) \
+	(tp)->rcv_adv = (tp)->rcv_nxt = (tp)->irs + 1
+
+#define	tcp_sendseqinit(tp) \
+	(tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = \
+	    (tp)->snd_recover = (tp)->iss
+
+#define TCP_PAWS_IDLE	(24 * 24 * 60 * 60 * hz)
+					/* timestamp wrap-around time */
+
+#endif /* _NETINET_TCP_SEQ_HH_ */
diff --git a/freebsd/sys/netinet/tcp_subr.c b/freebsd/sys/netinet/tcp_subr.c
new file mode 100644
index 00000000..83777450
--- /dev/null
+++ b/freebsd/sys/netinet/tcp_subr.c
@@ -0,0 +1,2315 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_subr.c	8.2 (Berkeley) 5/24/95
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_compat.h>
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_ipsec.h>
+#include <freebsd/local/opt_tcpdebug.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/callout.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/jail.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#ifdef INET6
+#include <freebsd/sys/domain.h>
+#endif
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/random.h>
+
+#include <freebsd/vm/uma.h>
+
+#include <freebsd/net/route.h>
+#include <freebsd/net/if.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#endif
+#include <freebsd/netinet/in_pcb.h>
+#ifdef INET6
+#include <freebsd/netinet6/in6_pcb.h>
+#endif
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip_var.h>
+#ifdef INET6
+#include <freebsd/netinet6/ip6_var.h>
+#include <freebsd/netinet6/scope6_var.h>
+#include <freebsd/netinet6/nd6.h>
+#endif
+#include <freebsd/netinet/ip_icmp.h>
+#include <freebsd/netinet/tcp.h>
+#include <freebsd/netinet/tcp_fsm.h>
+#include <freebsd/netinet/tcp_seq.h>
+#include <freebsd/netinet/tcp_timer.h>
+#include <freebsd/netinet/tcp_var.h>
+#include <freebsd/netinet/tcp_syncache.h>
+#include <freebsd/netinet/tcp_offload.h>
+#ifdef INET6
+#include <freebsd/netinet6/tcp6_var.h>
+#endif
+#include <freebsd/netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <freebsd/netinet/tcp_debug.h>
+#endif
+#include <freebsd/netinet6/ip6protosw.h>
+
+#ifdef IPSEC
+#include <freebsd/netipsec/ipsec.h>
+#include <freebsd/netipsec/xform.h>
+#ifdef INET6
+#include <freebsd/netipsec/ipsec6.h>
+#endif
+#include <freebsd/netipsec/key.h>
+#include <freebsd/sys/syslog.h>
+#endif /*IPSEC*/
+
+#include <freebsd/machine/in_cksum.h>
+#include <freebsd/sys/md5.h>
+
+#include <freebsd/security/mac/mac_framework.h>
+
+VNET_DEFINE(int, tcp_mssdflt) = TCP_MSS;
+#ifdef INET6
+VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS;
+#endif
+
+static int
+sysctl_net_inet_tcp_mss_check(SYSCTL_HANDLER_ARGS)
+{
+	int error, new;
+
+	new = V_tcp_mssdflt;
+	error = sysctl_handle_int(oidp, &new, 0, req);
+	if (error == 0 && req->newptr) {
+		if (new < TCP_MINMSS)
+			error = EINVAL;
+		else
+			V_tcp_mssdflt = new;
+	}
+	return (error);
+}
+
+SYSCTL_VNET_PROC(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt,
+    CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(tcp_mssdflt), 0,
+    &sysctl_net_inet_tcp_mss_check, "I",
+    "Default TCP Maximum Segment Size");
+
+#ifdef INET6
+static int
+sysctl_net_inet_tcp_mss_v6_check(SYSCTL_HANDLER_ARGS)
+{
+	int error, new;
+
+	new = V_tcp_v6mssdflt;
+	error = sysctl_handle_int(oidp, &new, 0, req);
+	if (error == 0 && req->newptr) {
+		if (new < TCP_MINMSS)
+			error = EINVAL;
+		else
+			V_tcp_v6mssdflt = new;
+	}
+	return (error);
+}
+
+SYSCTL_VNET_PROC(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
+    CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(tcp_v6mssdflt), 0,
+    &sysctl_net_inet_tcp_mss_v6_check, "I",
+   "Default TCP Maximum Segment Size for IPv6");
+#endif
+
+static int
+vnet_sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS)
+{
+
+	VNET_SYSCTL_ARG(req, arg1);
+	return (sysctl_msec_to_ticks(oidp, arg1, arg2, req));
+}
+
+/*
+ * Minimum MSS we accept and use. This prevents DoS attacks where
+ * we are forced to a ridiculous low MSS like 20 and send hundreds
+ * of packets instead of one. The effect scales with the available
+ * bandwidth and quickly saturates the CPU and network interface
+ * with packet generation and sending. Set to zero to disable MINMSS
+ * checking. This setting prevents us from sending too small packets.
+ */
+VNET_DEFINE(int, tcp_minmss) = TCP_MINMSS;
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_RW,
+     &VNET_NAME(tcp_minmss), 0,
+    "Minmum TCP Maximum Segment Size");
+
+VNET_DEFINE(int, tcp_do_rfc1323) = 1;
+SYSCTL_VNET_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW,
+    &VNET_NAME(tcp_do_rfc1323), 0,
+    "Enable rfc1323 (high performance TCP) extensions");
+
+static int	tcp_log_debug = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_debug, CTLFLAG_RW,
+    &tcp_log_debug, 0, "Log errors caused by incoming TCP segments");
+
+static int	tcp_tcbhashsize = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RDTUN,
+    &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable");
+
+static int	do_tcpdrain = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0,
+    "Enable tcp_drain routine for extra help when low on mbufs");
+
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD,
+    &VNET_NAME(tcbinfo.ipi_count), 0, "Number of active PCBs");
+
+static VNET_DEFINE(int, icmp_may_rst) = 1;
+#define	V_icmp_may_rst			VNET(icmp_may_rst)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_RW,
+    &VNET_NAME(icmp_may_rst), 0,
+    "Certain ICMP unreachable messages may abort connections in SYN_SENT");
+
+static VNET_DEFINE(int, tcp_isn_reseed_interval) = 0;
+#define	V_tcp_isn_reseed_interval	VNET(tcp_isn_reseed_interval)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_RW,
+    &VNET_NAME(tcp_isn_reseed_interval), 0,
+    "Seconds between reseeding of ISN secret");
+
+/*
+ * TCP bandwidth limiting sysctls.  Note that the default lower bound of
+ * 1024 exists only for debugging.  A good production default would be
+ * something like 6100.
+ */
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, inflight, CTLFLAG_RW, 0,
+    "TCP inflight data limiting");
+
+static VNET_DEFINE(int, tcp_inflight_enable) = 0;
+#define	V_tcp_inflight_enable		VNET(tcp_inflight_enable)
+SYSCTL_VNET_INT(_net_inet_tcp_inflight, OID_AUTO, enable, CTLFLAG_RW,
+    &VNET_NAME(tcp_inflight_enable), 0,
+    "Enable automatic TCP inflight data limiting");
+
+static int	tcp_inflight_debug = 0;
+SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, debug, CTLFLAG_RW,
+    &tcp_inflight_debug, 0,
+    "Debug TCP inflight calculations");
+
+static VNET_DEFINE(int, tcp_inflight_rttthresh);
+#define	V_tcp_inflight_rttthresh	VNET(tcp_inflight_rttthresh)
+SYSCTL_VNET_PROC(_net_inet_tcp_inflight, OID_AUTO, rttthresh,
+    CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(tcp_inflight_rttthresh), 0,
+    vnet_sysctl_msec_to_ticks, "I",
+    "RTT threshold below which inflight will deactivate itself");
+
+static VNET_DEFINE(int, tcp_inflight_min) = 6144;
+#define	V_tcp_inflight_min		VNET(tcp_inflight_min)
+SYSCTL_VNET_INT(_net_inet_tcp_inflight, OID_AUTO, min, CTLFLAG_RW,
+    &VNET_NAME(tcp_inflight_min), 0,
+    "Lower-bound for TCP inflight window");
+
+static VNET_DEFINE(int, tcp_inflight_max) = TCP_MAXWIN << TCP_MAX_WINSHIFT;
+#define	V_tcp_inflight_max		VNET(tcp_inflight_max)
+SYSCTL_VNET_INT(_net_inet_tcp_inflight, OID_AUTO, max, CTLFLAG_RW,
+    &VNET_NAME(tcp_inflight_max), 0,
+    "Upper-bound for TCP inflight window");
+
+static VNET_DEFINE(int, tcp_inflight_stab) = 20;
+#define	V_tcp_inflight_stab		VNET(tcp_inflight_stab)
+SYSCTL_VNET_INT(_net_inet_tcp_inflight, OID_AUTO, stab, CTLFLAG_RW,
+    &VNET_NAME(tcp_inflight_stab), 0,
+    "Inflight Algorithm Stabilization 20 = 2 packets");
+
+VNET_DEFINE(uma_zone_t, sack_hole_zone);
+#define	V_sack_hole_zone		VNET(sack_hole_zone)
+
+static struct inpcb *tcp_notify(struct inpcb *, int);
+static void	tcp_isn_tick(void *);
+static char *	tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th,
+		    void *ip4hdr, const void *ip6hdr);
+
+/*
+ * Target size of TCP PCB hash tables. Must be a power of two.
+ *
+ * Note that this can be overridden by the kernel environment
+ * variable net.inet.tcp.tcbhashsize
+ */
+#ifndef TCBHASHSIZE
+#define TCBHASHSIZE	512
+#endif
+
+/*
+ * XXX
+ * Callouts should be moved into struct tcp directly.  They are currently
+ * separate because the tcpcb structure is exported to userland for sysctl
+ * parsing purposes, which do not know about callouts.
+ */
+struct tcpcb_mem {
+	struct	tcpcb		tcb;
+	struct	tcp_timer	tt;
+};
+
+static VNET_DEFINE(uma_zone_t, tcpcb_zone);
+#define	V_tcpcb_zone			VNET(tcpcb_zone)
+
+MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers");
+struct callout isn_callout;
+static struct mtx isn_mtx;
+
+#define	ISN_LOCK_INIT()	mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF)
+#define	ISN_LOCK()	mtx_lock(&isn_mtx)
+#define	ISN_UNLOCK()	mtx_unlock(&isn_mtx)
+
+/*
+ * TCP initialization.
+ */
+static void
+tcp_zone_change(void *tag)
+{
+
+	uma_zone_set_max(V_tcbinfo.ipi_zone, maxsockets);
+	uma_zone_set_max(V_tcpcb_zone, maxsockets);
+	tcp_tw_zone_change();
+}
+
+static int
+tcp_inpcb_init(void *mem, int size, int flags)
+{
+	struct inpcb *inp = mem;
+
+	INP_LOCK_INIT(inp, "inp", "tcpinp");
+	return (0);
+}
+
+void
+tcp_init(void)
+{
+	int hashsize;
+
+	INP_INFO_LOCK_INIT(&V_tcbinfo, "tcp");
+	LIST_INIT(&V_tcb);
+#ifdef VIMAGE
+	V_tcbinfo.ipi_vnet = curvnet;
+#endif
+	V_tcbinfo.ipi_listhead = &V_tcb;
+	hashsize = TCBHASHSIZE;
+	TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &hashsize);
+	if (!powerof2(hashsize)) {
+		printf("WARNING: TCB hash size not a power of 2\n");
+		hashsize = 512; /* safe default */
+	}
+	V_tcbinfo.ipi_hashbase = hashinit(hashsize, M_PCB,
+	    &V_tcbinfo.ipi_hashmask);
+	V_tcbinfo.ipi_porthashbase = hashinit(hashsize, M_PCB,
+	    &V_tcbinfo.ipi_porthashmask);
+	V_tcbinfo.ipi_zone = uma_zcreate("tcp_inpcb", sizeof(struct inpcb),
+	    NULL, NULL, tcp_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+	uma_zone_set_max(V_tcbinfo.ipi_zone, maxsockets);
+	V_tcp_inflight_rttthresh = TCPTV_INFLIGHT_RTTTHRESH;
+
+	/*
+	 * These have to be type stable for the benefit of the timers.
+	 */
+	V_tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+	uma_zone_set_max(V_tcpcb_zone, maxsockets);
+
+	tcp_tw_init();
+	syncache_init();
+	tcp_hc_init();
+	tcp_reass_init();
+
+	TUNABLE_INT_FETCH("net.inet.tcp.sack.enable", &V_tcp_do_sack);
+	V_sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+
+	/* Skip initialization of globals for non-default instances. */
+	if (!IS_DEFAULT_VNET(curvnet))
+		return;
+
+	/* XXX virtualize those bellow? */
+	tcp_delacktime = TCPTV_DELACK;
+	tcp_keepinit = TCPTV_KEEP_INIT;
+	tcp_keepidle = TCPTV_KEEP_IDLE;
+	tcp_keepintvl = TCPTV_KEEPINTVL;
+	tcp_maxpersistidle = TCPTV_KEEP_IDLE;
+	tcp_msl = TCPTV_MSL;
+	tcp_rexmit_min = TCPTV_MIN;
+	if (tcp_rexmit_min < 1)
+		tcp_rexmit_min = 1;
+	tcp_rexmit_slop = TCPTV_CPU_VAR;
+	tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT;
+	tcp_tcbhashsize = hashsize;
+
+#ifdef INET6
+#define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
+#else /* INET6 */
+#define TCP_MINPROTOHDR (sizeof(struct tcpiphdr))
+#endif /* INET6 */
+	if (max_protohdr < TCP_MINPROTOHDR)
+		max_protohdr = TCP_MINPROTOHDR;
+	if (max_linkhdr + TCP_MINPROTOHDR > MHLEN)
+		panic("tcp_init");
+#undef TCP_MINPROTOHDR
+
+	ISN_LOCK_INIT();
+	callout_init(&isn_callout, CALLOUT_MPSAFE);
+	callout_reset(&isn_callout, hz/100, tcp_isn_tick, NULL);
+	EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL,
+		SHUTDOWN_PRI_DEFAULT);
+	EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL,
+		EVENTHANDLER_PRI_ANY);
+}
+
+#ifdef VIMAGE
+void
+tcp_destroy(void)
+{
+
+	tcp_reass_destroy();
+	tcp_hc_destroy();
+	syncache_destroy();
+	tcp_tw_destroy();
+
+	/* XXX check that hashes are empty! */
+	hashdestroy(V_tcbinfo.ipi_hashbase, M_PCB,
+	    V_tcbinfo.ipi_hashmask);
+	hashdestroy(V_tcbinfo.ipi_porthashbase, M_PCB,
+	    V_tcbinfo.ipi_porthashmask);
+
+	uma_zdestroy(V_sack_hole_zone);
+	uma_zdestroy(V_tcpcb_zone);
+	uma_zdestroy(V_tcbinfo.ipi_zone);
+
+	INP_INFO_LOCK_DESTROY(&V_tcbinfo);
+}
+#endif
+
+void
+tcp_fini(void *xtp)
+{
+
+	callout_stop(&isn_callout);
+}
+
+/*
+ * Fill in the IP and TCP headers for an outgoing packet, given the tcpcb.
+ * tcp_template used to store this data in mbufs, but we now recopy it out
+ * of the tcpcb each time to conserve mbufs.
+ */
+void
+tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr)
+{
+	struct tcphdr *th = (struct tcphdr *)tcp_ptr;
+
+	INP_WLOCK_ASSERT(inp);
+
+#ifdef INET6
+	if ((inp->inp_vflag & INP_IPV6) != 0) {
+		struct ip6_hdr *ip6;
+
+		ip6 = (struct ip6_hdr *)ip_ptr;
+		ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) |
+			(inp->inp_flow & IPV6_FLOWINFO_MASK);
+		ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
+			(IPV6_VERSION & IPV6_VERSION_MASK);
+		ip6->ip6_nxt = IPPROTO_TCP;
+		ip6->ip6_plen = htons(sizeof(struct tcphdr));
+		ip6->ip6_src = inp->in6p_laddr;
+		ip6->ip6_dst = inp->in6p_faddr;
+	} else
+#endif
+	{
+		struct ip *ip;
+
+		ip = (struct ip *)ip_ptr;
+		ip->ip_v = IPVERSION;
+		ip->ip_hl = 5;
+		ip->ip_tos = inp->inp_ip_tos;
+		ip->ip_len = 0;
+		ip->ip_id = 0;
+		ip->ip_off = 0;
+		ip->ip_ttl = inp->inp_ip_ttl;
+		ip->ip_sum = 0;
+		ip->ip_p = IPPROTO_TCP;
+		ip->ip_src = inp->inp_laddr;
+		ip->ip_dst = inp->inp_faddr;
+	}
+	th->th_sport = inp->inp_lport;
+	th->th_dport = inp->inp_fport;
+	th->th_seq = 0;
+	th->th_ack = 0;
+	th->th_x2 = 0;
+	th->th_off = 5;
+	th->th_flags = 0;
+	th->th_win = 0;
+	th->th_urp = 0;
+	th->th_sum = 0;		/* in_pseudo() is called later for ipv4 */
+}
+
+/*
+ * Create template to be used to send tcp packets on a connection.
+ * Allocates an mbuf and fills in a skeletal tcp/ip header.  The only
+ * use for this function is in keepalives, which use tcp_respond.
+ */
+struct tcptemp *
+tcpip_maketemplate(struct inpcb *inp)
+{
+	struct tcptemp *t;
+
+	t = malloc(sizeof(*t), M_TEMP, M_NOWAIT);
+	if (t == NULL)
+		return (NULL);
+	tcpip_fillheaders(inp, (void *)&t->tt_ipgen, (void *)&t->tt_t);
+	return (t);
+}
+
+/*
+ * Send a single message to the TCP at address specified by
+ * the given TCP/IP header.  If m == NULL, then we make a copy
+ * of the tcpiphdr at ti and send directly to the addressed host.
+ * This is used to force keep alive messages out using the TCP
+ * template for a connection.  If flags are given then we send
+ * a message back to the TCP which originated the * segment ti,
+ * and discard the mbuf containing it and any other attached mbufs.
+ *
+ * In any case the ack and sequence number of the transmitted
+ * segment are as specified by the parameters.
+ *
+ * NOTE: If m != NULL, then ti must point to *inside* the mbuf.
+ */
+void
+tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
+    tcp_seq ack, tcp_seq seq, int flags)
+{
+	int tlen;
+	int win = 0;
+	struct ip *ip;
+	struct tcphdr *nth;
+#ifdef INET6
+	struct ip6_hdr *ip6;
+	int isipv6;
+#endif /* INET6 */
+	int ipflags = 0;
+	struct inpcb *inp;
+
+	KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL"));
+
+#ifdef INET6
+	isipv6 = ((struct ip *)ipgen)->ip_v == 6;
+	ip6 = ipgen;
+#endif /* INET6 */
+	ip = ipgen;
+
+	if (tp != NULL) {
+		inp = tp->t_inpcb;
+		KASSERT(inp != NULL, ("tcp control block w/o inpcb"));
+		INP_WLOCK_ASSERT(inp);
+	} else
+		inp = NULL;
+
+	if (tp != NULL) {
+		if (!(flags & TH_RST)) {
+			win = sbspace(&inp->inp_socket->so_rcv);
+			if (win > (long)TCP_MAXWIN << tp->rcv_scale)
+				win = (long)TCP_MAXWIN << tp->rcv_scale;
+		}
+	}
+	if (m == NULL) {
+		m = m_gethdr(M_DONTWAIT, MT_DATA);
+		if (m == NULL)
+			return;
+		tlen = 0;
+		m->m_data += max_linkhdr;
+#ifdef INET6
+		if (isipv6) {
+			bcopy((caddr_t)ip6, mtod(m, caddr_t),
+			      sizeof(struct ip6_hdr));
+			ip6 = mtod(m, struct ip6_hdr *);
+			nth = (struct tcphdr *)(ip6 + 1);
+		} else
+#endif /* INET6 */
+	      {
+		bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
+		ip = mtod(m, struct ip *);
+		nth = (struct tcphdr *)(ip + 1);
+	      }
+		bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
+		flags = TH_ACK;
+	} else {
+		/*
+		 *  reuse the mbuf. 
+		 * XXX MRT We inherrit the FIB, which is lucky.
+		 */
+		m_freem(m->m_next);
+		m->m_next = NULL;
+		m->m_data = (caddr_t)ipgen;
+		/* m_len is set later */
+		tlen = 0;
+#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
+#ifdef INET6
+		if (isipv6) {
+			xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
+			nth = (struct tcphdr *)(ip6 + 1);
+		} else
+#endif /* INET6 */
+	      {
+		xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t);
+		nth = (struct tcphdr *)(ip + 1);
+	      }
+		if (th != nth) {
+			/*
+			 * this is usually a case when an extension header
+			 * exists between the IPv6 header and the
+			 * TCP header.
+			 */
+			nth->th_sport = th->th_sport;
+			nth->th_dport = th->th_dport;
+		}
+		xchg(nth->th_dport, nth->th_sport, uint16_t);
+#undef xchg
+	}
+#ifdef INET6
+	if (isipv6) {
+		ip6->ip6_flow = 0;
+		ip6->ip6_vfc = IPV6_VERSION;
+		ip6->ip6_nxt = IPPROTO_TCP;
+		ip6->ip6_plen = htons((u_short)(sizeof (struct tcphdr) +
+						tlen));
+		tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
+	} else
+#endif
+	{
+		tlen += sizeof (struct tcpiphdr);
+		ip->ip_len = tlen;
+		ip->ip_ttl = V_ip_defttl;
+		if (V_path_mtu_discovery)
+			ip->ip_off |= IP_DF;
+	}
+	m->m_len = tlen;
+	m->m_pkthdr.len = tlen;
+	m->m_pkthdr.rcvif = NULL;
+#ifdef MAC
+	if (inp != NULL) {
+		/*
+		 * Packet is associated with a socket, so allow the
+		 * label of the response to reflect the socket label.
+		 */
+		INP_WLOCK_ASSERT(inp);
+		mac_inpcb_create_mbuf(inp, m);
+	} else {
+		/*
+		 * Packet is not associated with a socket, so possibly
+		 * update the label in place.
+		 */
+		mac_netinet_tcp_reply(m);
+	}
+#endif
+	nth->th_seq = htonl(seq);
+	nth->th_ack = htonl(ack);
+	nth->th_x2 = 0;
+	nth->th_off = sizeof (struct tcphdr) >> 2;
+	nth->th_flags = flags;
+	if (tp != NULL)
+		nth->th_win = htons((u_short) (win >> tp->rcv_scale));
+	else
+		nth->th_win = htons((u_short)win);
+	nth->th_urp = 0;
+#ifdef INET6
+	if (isipv6) {
+		nth->th_sum = 0;
+		nth->th_sum = in6_cksum(m, IPPROTO_TCP,
+					sizeof(struct ip6_hdr),
+					tlen - sizeof(struct ip6_hdr));
+		ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->t_inpcb :
+		    NULL, NULL);
+	} else
+#endif /* INET6 */
+	{
+		nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+		    htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p)));
+		m->m_pkthdr.csum_flags = CSUM_TCP;
+		m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+	}
+#ifdef TCPDEBUG
+	if (tp == NULL || (inp->inp_socket->so_options & SO_DEBUG))
+		tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0);
+#endif
+#ifdef INET6
+	if (isipv6)
+		(void) ip6_output(m, NULL, NULL, ipflags, NULL, NULL, inp);
+	else
+#endif /* INET6 */
+	(void) ip_output(m, NULL, NULL, ipflags, NULL, inp);
+}
+
+/*
+ * Create a new TCP control block, making an
+ * empty reassembly queue and hooking it to the argument
+ * protocol control block.  The `inp' parameter must have
+ * come from the zone allocator set up in tcp_init().
+ */
+struct tcpcb *
+tcp_newtcpcb(struct inpcb *inp)
+{
+	struct tcpcb_mem *tm;
+	struct tcpcb *tp;
+#ifdef INET6
+	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
+#endif /* INET6 */
+
+	tm = uma_zalloc(V_tcpcb_zone, M_NOWAIT | M_ZERO);
+	if (tm == NULL)
+		return (NULL);
+	tp = &tm->tcb;
+#ifdef VIMAGE
+	tp->t_vnet = inp->inp_vnet;
+#endif
+	tp->t_timers = &tm->tt;
+	/*	LIST_INIT(&tp->t_segq); */	/* XXX covered by M_ZERO */
+	tp->t_maxseg = tp->t_maxopd =
+#ifdef INET6
+		isipv6 ? V_tcp_v6mssdflt :
+#endif /* INET6 */
+		V_tcp_mssdflt;
+
+	/* Set up our timeouts. */
+	callout_init(&tp->t_timers->tt_rexmt, CALLOUT_MPSAFE);
+	callout_init(&tp->t_timers->tt_persist, CALLOUT_MPSAFE);
+	callout_init(&tp->t_timers->tt_keep, CALLOUT_MPSAFE);
+	callout_init(&tp->t_timers->tt_2msl, CALLOUT_MPSAFE);
+	callout_init(&tp->t_timers->tt_delack, CALLOUT_MPSAFE);
+
+	if (V_tcp_do_rfc1323)
+		tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
+	if (V_tcp_do_sack)
+		tp->t_flags |= TF_SACK_PERMIT;
+	TAILQ_INIT(&tp->snd_holes);
+	tp->t_inpcb = inp;	/* XXX */
+	/*
+	 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
+	 * rtt estimate.  Set rttvar so that srtt + 4 * rttvar gives
+	 * reasonable initial retransmit time.
+	 */
+	tp->t_srtt = TCPTV_SRTTBASE;
+	tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
+	tp->t_rttmin = tcp_rexmit_min;
+	tp->t_rxtcur = TCPTV_RTOBASE;
+	tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
+	tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
+	tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
+	tp->t_rcvtime = ticks;
+	tp->t_bw_rtttime = ticks;
+	/*
+	 * IPv4 TTL initialization is necessary for an IPv6 socket as well,
+	 * because the socket may be bound to an IPv6 wildcard address,
+	 * which may match an IPv4-mapped IPv6 address.
+	 */
+	inp->inp_ip_ttl = V_ip_defttl;
+	inp->inp_ppcb = tp;
+	return (tp);		/* XXX */
+}
+
+/*
+ * Drop a TCP connection, reporting
+ * the specified error.  If connection is synchronized,
+ * then send a RST to peer.
+ */
+struct tcpcb *
+tcp_drop(struct tcpcb *tp, int errno)
+{
+	struct socket *so = tp->t_inpcb->inp_socket;
+
+	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	if (TCPS_HAVERCVDSYN(tp->t_state)) {
+		tp->t_state = TCPS_CLOSED;
+		(void) tcp_output_reset(tp);
+		TCPSTAT_INC(tcps_drops);
+	} else
+		TCPSTAT_INC(tcps_conndrops);
+	if (errno == ETIMEDOUT && tp->t_softerror)
+		errno = tp->t_softerror;
+	so->so_error = errno;
+	return (tcp_close(tp));
+}
+
+void
+tcp_discardcb(struct tcpcb *tp)
+{
+	struct inpcb *inp = tp->t_inpcb;
+	struct socket *so = inp->inp_socket;
+#ifdef INET6
+	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
+#endif /* INET6 */
+
+	INP_WLOCK_ASSERT(inp);
+
+	/*
+	 * Make sure that all of our timers are stopped before we
+	 * delete the PCB.
+	 */
+	callout_stop(&tp->t_timers->tt_rexmt);
+	callout_stop(&tp->t_timers->tt_persist);
+	callout_stop(&tp->t_timers->tt_keep);
+	callout_stop(&tp->t_timers->tt_2msl);
+	callout_stop(&tp->t_timers->tt_delack);
+
+	/*
+	 * If we got enough samples through the srtt filter,
+	 * save the rtt and rttvar in the routing entry.
+	 * 'Enough' is arbitrarily defined as 4 rtt samples.
+	 * 4 samples is enough for the srtt filter to converge
+	 * to within enough % of the correct value; fewer samples
+	 * and we could save a bogus rtt. The danger is not high
+	 * as tcp quickly recovers from everything.
+	 * XXX: Works very well but needs some more statistics!
+	 */
+	if (tp->t_rttupdated >= 4) {
+		struct hc_metrics_lite metrics;
+		u_long ssthresh;
+
+		bzero(&metrics, sizeof(metrics));
+		/*
+		 * Update the ssthresh always when the conditions below
+		 * are satisfied. This gives us better new start value
+		 * for the congestion avoidance for new connections.
+		 * ssthresh is only set if packet loss occured on a session.
+		 *
+		 * XXXRW: 'so' may be NULL here, and/or socket buffer may be
+		 * being torn down.  Ideally this code would not use 'so'.
+		 */
+		ssthresh = tp->snd_ssthresh;
+		if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) {
+			/*
+			 * convert the limit from user data bytes to
+			 * packets then to packet data bytes.
+			 */
+			ssthresh = (ssthresh + tp->t_maxseg / 2) / tp->t_maxseg;
+			if (ssthresh < 2)
+				ssthresh = 2;
+			ssthresh *= (u_long)(tp->t_maxseg +
+#ifdef INET6
+				      (isipv6 ? sizeof (struct ip6_hdr) +
+					       sizeof (struct tcphdr) :
+#endif
+				       sizeof (struct tcpiphdr)
+#ifdef INET6
+				       )
+#endif
+				      );
+		} else
+			ssthresh = 0;
+		metrics.rmx_ssthresh = ssthresh;
+
+		metrics.rmx_rtt = tp->t_srtt;
+		metrics.rmx_rttvar = tp->t_rttvar;
+		/* XXX: This wraps if the pipe is more than 4 Gbit per second */
+		metrics.rmx_bandwidth = tp->snd_bandwidth;
+		metrics.rmx_cwnd = tp->snd_cwnd;
+		metrics.rmx_sendpipe = 0;
+		metrics.rmx_recvpipe = 0;
+
+		tcp_hc_update(&inp->inp_inc, &metrics);
+	}
+
+	/* free the reassembly queue, if any */
+	tcp_reass_flush(tp);
+	/* Disconnect offload device, if any. */
+	tcp_offload_detach(tp);
+		
+	tcp_free_sackholes(tp);
+	inp->inp_ppcb = NULL;
+	tp->t_inpcb = NULL;
+	uma_zfree(V_tcpcb_zone, tp);
+}
+
+/*
+ * Attempt to close a TCP control block, marking it as dropped, and freeing
+ * the socket if we hold the only reference.
+ */
+struct tcpcb *
+tcp_close(struct tcpcb *tp)
+{
+	struct inpcb *inp = tp->t_inpcb;
+	struct socket *so;
+
+	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_WLOCK_ASSERT(inp);
+
+	/* Notify any offload devices of listener close */
+	if (tp->t_state == TCPS_LISTEN)
+		tcp_offload_listen_close(tp);
+	in_pcbdrop(inp);
+	TCPSTAT_INC(tcps_closed);
+	KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
+	so = inp->inp_socket;
+	soisdisconnected(so);
+	if (inp->inp_flags & INP_SOCKREF) {
+		KASSERT(so->so_state & SS_PROTOREF,
+		    ("tcp_close: !SS_PROTOREF"));
+		inp->inp_flags &= ~INP_SOCKREF;
+		INP_WUNLOCK(inp);
+		ACCEPT_LOCK();
+		SOCK_LOCK(so);
+		so->so_state &= ~SS_PROTOREF;
+		sofree(so);
+		return (NULL);
+	}
+	return (tp);
+}
+
+void
+tcp_drain(void)
+{
+	VNET_ITERATOR_DECL(vnet_iter);
+
+	if (!do_tcpdrain)
+		return;
+
+	VNET_LIST_RLOCK_NOSLEEP();
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter);
+		struct inpcb *inpb;
+		struct tcpcb *tcpb;
+
+	/*
+	 * Walk the tcpbs, if existing, and flush the reassembly queue,
+	 * if there is one...
+	 * XXX: The "Net/3" implementation doesn't imply that the TCP
+	 *      reassembly queue should be flushed, but in a situation
+	 *	where we're really low on mbufs, this is potentially
+	 *	usefull.
+	 */
+		INP_INFO_RLOCK(&V_tcbinfo);
+		LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) {
+			if (inpb->inp_flags & INP_TIMEWAIT)
+				continue;
+			INP_WLOCK(inpb);
+			if ((tcpb = intotcpcb(inpb)) != NULL) {
+				tcp_reass_flush(tcpb);
+				tcp_clean_sackreport(tcpb);
+			}
+			INP_WUNLOCK(inpb);
+		}
+		INP_INFO_RUNLOCK(&V_tcbinfo);
+		CURVNET_RESTORE();
+	}
+	VNET_LIST_RUNLOCK_NOSLEEP();
+}
+
+/*
+ * Notify a tcp user of an asynchronous error;
+ * store error as soft error, but wake up user
+ * (for now, won't do anything until can select for soft error).
+ *
+ * Do not wake up user since there currently is no mechanism for
+ * reporting soft errors (yet - a kqueue filter may be added).
+ */
+static struct inpcb *
+tcp_notify(struct inpcb *inp, int error)
+{
+	struct tcpcb *tp;
+
+	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_WLOCK_ASSERT(inp);
+
+	if ((inp->inp_flags & INP_TIMEWAIT) ||
+	    (inp->inp_flags & INP_DROPPED))
+		return (inp);
+
+	tp = intotcpcb(inp);
+	KASSERT(tp != NULL, ("tcp_notify: tp == NULL"));
+
+	/*
+	 * Ignore some errors if we are hooked up.
+	 * If connection hasn't completed, has retransmitted several times,
+	 * and receives a second error, give up now.  This is better
+	 * than waiting a long time to establish a connection that
+	 * can never complete.
+	 */
+	if (tp->t_state == TCPS_ESTABLISHED &&
+	    (error == EHOSTUNREACH || error == ENETUNREACH ||
+	     error == EHOSTDOWN)) {
+		return (inp);
+	} else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
+	    tp->t_softerror) {
+		tp = tcp_drop(tp, error);
+		if (tp != NULL)
+			return (inp);
+		else
+			return (NULL);
+	} else {
+		tp->t_softerror = error;
+		return (inp);
+	}
+#if 0
+	wakeup( &so->so_timeo);
+	sorwakeup(so);
+	sowwakeup(so);
+#endif
+}
+
+static int
+tcp_pcblist(SYSCTL_HANDLER_ARGS)
+{
+	int error, i, m, n, pcb_count;
+	struct inpcb *inp, **inp_list;
+	inp_gen_t gencnt;
+	struct xinpgen xig;
+
+	/*
+	 * The process of preparing the TCB list is too time-consuming and
+	 * resource-intensive to repeat twice on every request.
+	 */
+	if (req->oldptr == NULL) {
+		n = V_tcbinfo.ipi_count + syncache_pcbcount();
+		n += imax(n / 8, 10);
+		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb);
+		return (0);
+	}
+
+	if (req->newptr != NULL)
+		return (EPERM);
+
+	/*
+	 * OK, now we're committed to doing something.
+	 */
+	INP_INFO_RLOCK(&V_tcbinfo);
+	gencnt = V_tcbinfo.ipi_gencnt;
+	n = V_tcbinfo.ipi_count;
+	INP_INFO_RUNLOCK(&V_tcbinfo);
+
+	m = syncache_pcbcount();
+
+	error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
+		+ (n + m) * sizeof(struct xtcpcb));
+	if (error != 0)
+		return (error);
+
+	xig.xig_len = sizeof xig;
+	xig.xig_count = n + m;
+	xig.xig_gen = gencnt;
+	xig.xig_sogen = so_gencnt;
+	error = SYSCTL_OUT(req, &xig, sizeof xig);
+	if (error)
+		return (error);
+
+	error = syncache_pcblist(req, m, &pcb_count);
+	if (error)
+		return (error);
+
+	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
+	if (inp_list == NULL)
+		return (ENOMEM);
+
+	INP_INFO_RLOCK(&V_tcbinfo);
+	for (inp = LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0;
+	    inp != NULL && i < n; inp = LIST_NEXT(inp, inp_list)) {
+		INP_WLOCK(inp);
+		if (inp->inp_gencnt <= gencnt) {
+			/*
+			 * XXX: This use of cr_cansee(), introduced with
+			 * TCP state changes, is not quite right, but for
+			 * now, better than nothing.
+			 */
+			if (inp->inp_flags & INP_TIMEWAIT) {
+				if (intotw(inp) != NULL)
+					error = cr_cansee(req->td->td_ucred,
+					    intotw(inp)->tw_cred);
+				else
+					error = EINVAL;	/* Skip this inp. */
+			} else
+				error = cr_canseeinpcb(req->td->td_ucred, inp);
+			if (error == 0) {
+				in_pcbref(inp);
+				inp_list[i++] = inp;
+			}
+		}
+		INP_WUNLOCK(inp);
+	}
+	INP_INFO_RUNLOCK(&V_tcbinfo);
+	n = i;
+
+	error = 0;
+	for (i = 0; i < n; i++) {
+		inp = inp_list[i];
+		INP_RLOCK(inp);
+		if (inp->inp_gencnt <= gencnt) {
+			struct xtcpcb xt;
+			void *inp_ppcb;
+
+			bzero(&xt, sizeof(xt));
+			xt.xt_len = sizeof xt;
+			/* XXX should avoid extra copy */
+			bcopy(inp, &xt.xt_inp, sizeof *inp);
+			inp_ppcb = inp->inp_ppcb;
+			if (inp_ppcb == NULL)
+				bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
+			else if (inp->inp_flags & INP_TIMEWAIT) {
+				bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
+				xt.xt_tp.t_state = TCPS_TIME_WAIT;
+			} else
+				bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
+			if (inp->inp_socket != NULL)
+				sotoxsocket(inp->inp_socket, &xt.xt_socket);
+			else {
+				bzero(&xt.xt_socket, sizeof xt.xt_socket);
+				xt.xt_socket.xso_protocol = IPPROTO_TCP;
+			}
+			xt.xt_inp.inp_gencnt = inp->inp_gencnt;
+			INP_RUNLOCK(inp);
+			error = SYSCTL_OUT(req, &xt, sizeof xt);
+		} else
+			INP_RUNLOCK(inp);
+	}
+	INP_INFO_WLOCK(&V_tcbinfo);
+	for (i = 0; i < n; i++) {
+		inp = inp_list[i];
+		INP_WLOCK(inp);
+		if (!in_pcbrele(inp))
+			INP_WUNLOCK(inp);
+	}
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+
+	if (!error) {
+		/*
+		 * Give the user an updated idea of our state.
+		 * If the generation differs from what we told
+		 * her before, she knows that something happened
+		 * while we were processing this request, and it
+		 * might be necessary to retry.
+		 */
+		INP_INFO_RLOCK(&V_tcbinfo);
+		xig.xig_gen = V_tcbinfo.ipi_gencnt;
+		xig.xig_sogen = so_gencnt;
+		xig.xig_count = V_tcbinfo.ipi_count + pcb_count;
+		INP_INFO_RUNLOCK(&V_tcbinfo);
+		error = SYSCTL_OUT(req, &xig, sizeof xig);
+	}
+	free(inp_list, M_TEMP);
+	return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
+    tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
+
+static int
+tcp_getcred(SYSCTL_HANDLER_ARGS)
+{
+	struct xucred xuc;
+	struct sockaddr_in addrs[2];
+	struct inpcb *inp;
+	int error;
+
+	error = priv_check(req->td, PRIV_NETINET_GETCRED);
+	if (error)
+		return (error);
+	error = SYSCTL_IN(req, addrs, sizeof(addrs));
+	if (error)
+		return (error);
+	INP_INFO_RLOCK(&V_tcbinfo);
+	inp = in_pcblookup_hash(&V_tcbinfo, addrs[1].sin_addr,
+	    addrs[1].sin_port, addrs[0].sin_addr, addrs[0].sin_port, 0, NULL);
+	if (inp != NULL) {
+		INP_RLOCK(inp);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
+		if (inp->inp_socket == NULL)
+			error = ENOENT;
+		if (error == 0)
+			error = cr_canseeinpcb(req->td->td_ucred, inp);
+		if (error == 0)
+			cru2x(inp->inp_cred, &xuc);
+		INP_RUNLOCK(inp);
+	} else {
+		INP_INFO_RUNLOCK(&V_tcbinfo);
+		error = ENOENT;
+	}
+	if (error == 0)
+		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
+	return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, getcred,
+    CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
+    tcp_getcred, "S,xucred", "Get the xucred of a TCP connection");
+
+#ifdef INET6
+static int
+tcp6_getcred(SYSCTL_HANDLER_ARGS)
+{
+	struct xucred xuc;
+	struct sockaddr_in6 addrs[2];
+	struct inpcb *inp;
+	int error, mapped = 0;
+
+	error = priv_check(req->td, PRIV_NETINET_GETCRED);
+	if (error)
+		return (error);
+	error = SYSCTL_IN(req, addrs, sizeof(addrs));
+	if (error)
+		return (error);
+	if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 ||
+	    (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) {
+		return (error);
+	}
+	if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) {
+		if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr))
+			mapped = 1;
+		else
+			return (EINVAL);
+	}
+
+	INP_INFO_RLOCK(&V_tcbinfo);
+	if (mapped == 1)
+		inp = in_pcblookup_hash(&V_tcbinfo,
+			*(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12],
+			addrs[1].sin6_port,
+			*(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12],
+			addrs[0].sin6_port,
+			0, NULL);
+	else
+		inp = in6_pcblookup_hash(&V_tcbinfo,
+			&addrs[1].sin6_addr, addrs[1].sin6_port,
+			&addrs[0].sin6_addr, addrs[0].sin6_port, 0, NULL);
+	if (inp != NULL) {
+		INP_RLOCK(inp);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
+		if (inp->inp_socket == NULL)
+			error = ENOENT;
+		if (error == 0)
+			error = cr_canseeinpcb(req->td->td_ucred, inp);
+		if (error == 0)
+			cru2x(inp->inp_cred, &xuc);
+		INP_RUNLOCK(inp);
+	} else {
+		INP_INFO_RUNLOCK(&V_tcbinfo);
+		error = ENOENT;
+	}
+	if (error == 0)
+		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
+	return (error);
+}
+
+SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred,
+    CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
+    tcp6_getcred, "S,xucred", "Get the xucred of a TCP6 connection");
+#endif
+
+
+void
+tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
+{
+	struct ip *ip = vip;
+	struct tcphdr *th;
+	struct in_addr faddr;
+	struct inpcb *inp;
+	struct tcpcb *tp;
+	struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
+	struct icmp *icp;
+	struct in_conninfo inc;
+	tcp_seq icmp_tcp_seq;
+	int mtu;
+
+	faddr = ((struct sockaddr_in *)sa)->sin_addr;
+	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
+		return;
+
+	if (cmd == PRC_MSGSIZE)
+		notify = tcp_mtudisc;
+	else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB ||
+		cmd == PRC_UNREACH_PORT || cmd == PRC_TIMXCEED_INTRANS) && ip)
+		notify = tcp_drop_syn_sent;
+	/*
+	 * Redirects don't need to be handled up here.
+	 */
+	else if (PRC_IS_REDIRECT(cmd))
+		return;
+	/*
+	 * Source quench is depreciated.
+	 */
+	else if (cmd == PRC_QUENCH)
+		return;
+	/*
+	 * Hostdead is ugly because it goes linearly through all PCBs.
+	 * XXX: We never get this from ICMP, otherwise it makes an
+	 * excellent DoS attack on machines with many connections.
+	 */
+	else if (cmd == PRC_HOSTDEAD)
+		ip = NULL;
+	else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
+		return;
+	if (ip != NULL) {
+		icp = (struct icmp *)((caddr_t)ip
+				      - offsetof(struct icmp, icmp_ip));
+		th = (struct tcphdr *)((caddr_t)ip
+				       + (ip->ip_hl << 2));
+		INP_INFO_WLOCK(&V_tcbinfo);
+		inp = in_pcblookup_hash(&V_tcbinfo, faddr, th->th_dport,
+		    ip->ip_src, th->th_sport, 0, NULL);
+		if (inp != NULL)  {
+			INP_WLOCK(inp);
+			if (!(inp->inp_flags & INP_TIMEWAIT) &&
+			    !(inp->inp_flags & INP_DROPPED) &&
+			    !(inp->inp_socket == NULL)) {
+				icmp_tcp_seq = htonl(th->th_seq);
+				tp = intotcpcb(inp);
+				if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) &&
+				    SEQ_LT(icmp_tcp_seq, tp->snd_max)) {
+					if (cmd == PRC_MSGSIZE) {
+					    /*
+					     * MTU discovery:
+					     * If we got a needfrag set the MTU
+					     * in the route to the suggested new
+					     * value (if given) and then notify.
+					     */
+					    bzero(&inc, sizeof(inc));
+					    inc.inc_faddr = faddr;
+					    inc.inc_fibnum =
+						inp->inp_inc.inc_fibnum;
+
+					    mtu = ntohs(icp->icmp_nextmtu);
+					    /*
+					     * If no alternative MTU was
+					     * proposed, try the next smaller
+					     * one.  ip->ip_len has already
+					     * been swapped in icmp_input().
+					     */
+					    if (!mtu)
+						mtu = ip_next_mtu(ip->ip_len,
+						 1);
+					    if (mtu < V_tcp_minmss
+						 + sizeof(struct tcpiphdr))
+						mtu = V_tcp_minmss
+						 + sizeof(struct tcpiphdr);
+					    /*
+					     * Only cache the the MTU if it
+					     * is smaller than the interface
+					     * or route MTU.  tcp_mtudisc()
+					     * will do right thing by itself.
+					     */
+					    if (mtu <= tcp_maxmtu(&inc, NULL))
+						tcp_hc_updatemtu(&inc, mtu);
+					}
+
+					inp = (*notify)(inp, inetctlerrmap[cmd]);
+				}
+			}
+			if (inp != NULL)
+				INP_WUNLOCK(inp);
+		} else {
+			bzero(&inc, sizeof(inc));
+			inc.inc_fport = th->th_dport;
+			inc.inc_lport = th->th_sport;
+			inc.inc_faddr = faddr;
+			inc.inc_laddr = ip->ip_src;
+			syncache_unreach(&inc, th);
+		}
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+	} else
+		in_pcbnotifyall(&V_tcbinfo, faddr, inetctlerrmap[cmd], notify);
+}
+
+#ifdef INET6
+void
+tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
+{
+	struct tcphdr th;
+	struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
+	struct ip6_hdr *ip6;
+	struct mbuf *m;
+	struct ip6ctlparam *ip6cp = NULL;
+	const struct sockaddr_in6 *sa6_src = NULL;
+	int off;
+	struct tcp_portonly {
+		u_int16_t th_sport;
+		u_int16_t th_dport;
+	} *thp;
+
+	if (sa->sa_family != AF_INET6 ||
+	    sa->sa_len != sizeof(struct sockaddr_in6))
+		return;
+
+	if (cmd == PRC_MSGSIZE)
+		notify = tcp_mtudisc;
+	else if (!PRC_IS_REDIRECT(cmd) &&
+		 ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0))
+		return;
+	/* Source quench is depreciated. */
+	else if (cmd == PRC_QUENCH)
+		return;
+
+	/* if the parameter is from icmp6, decode it. */
+	if (d != NULL) {
+		ip6cp = (struct ip6ctlparam *)d;
+		m = ip6cp->ip6c_m;
+		ip6 = ip6cp->ip6c_ip6;
+		off = ip6cp->ip6c_off;
+		sa6_src = ip6cp->ip6c_src;
+	} else {
+		m = NULL;
+		ip6 = NULL;
+		off = 0;	/* fool gcc */
+		sa6_src = &sa6_any;
+	}
+
+	if (ip6 != NULL) {
+		struct in_conninfo inc;
+		/*
+		 * XXX: We assume that when IPV6 is non NULL,
+		 * M and OFF are valid.
+		 */
+
+		/* check if we can safely examine src and dst ports */
+		if (m->m_pkthdr.len < off + sizeof(*thp))
+			return;
+
+		bzero(&th, sizeof(th));
+		m_copydata(m, off, sizeof(*thp), (caddr_t)&th);
+
+		in6_pcbnotify(&V_tcbinfo, sa, th.th_dport,
+		    (struct sockaddr *)ip6cp->ip6c_src,
+		    th.th_sport, cmd, NULL, notify);
+
+		bzero(&inc, sizeof(inc));
+		inc.inc_fport = th.th_dport;
+		inc.inc_lport = th.th_sport;
+		inc.inc6_faddr = ((struct sockaddr_in6 *)sa)->sin6_addr;
+		inc.inc6_laddr = ip6cp->ip6c_src->sin6_addr;
+		inc.inc_flags |= INC_ISIPV6;
+		INP_INFO_WLOCK(&V_tcbinfo);
+		syncache_unreach(&inc, &th);
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+	} else
+		in6_pcbnotify(&V_tcbinfo, sa, 0, (const struct sockaddr *)sa6_src,
+			      0, cmd, NULL, notify);
+}
+#endif /* INET6 */
+
+
+/*
+ * Following is where TCP initial sequence number generation occurs.
+ *
+ * There are two places where we must use initial sequence numbers:
+ * 1.  In SYN-ACK packets.
+ * 2.  In SYN packets.
+ *
+ * All ISNs for SYN-ACK packets are generated by the syncache.  See
+ * tcp_syncache.c for details.
+ *
+ * The ISNs in SYN packets must be monotonic; TIME_WAIT recycling
+ * depends on this property.  In addition, these ISNs should be
+ * unguessable so as to prevent connection hijacking.  To satisfy
+ * the requirements of this situation, the algorithm outlined in
+ * RFC 1948 is used, with only small modifications.
+ *
+ * Implementation details:
+ *
+ * Time is based off the system timer, and is corrected so that it
+ * increases by one megabyte per second.  This allows for proper
+ * recycling on high speed LANs while still leaving over an hour
+ * before rollover.
+ *
+ * As reading the *exact* system time is too expensive to be done
+ * whenever setting up a TCP connection, we increment the time
+ * offset in two ways.  First, a small random positive increment
+ * is added to isn_offset for each connection that is set up.
+ * Second, the function tcp_isn_tick fires once per clock tick
+ * and increments isn_offset as necessary so that sequence numbers
+ * are incremented at approximately ISN_BYTES_PER_SECOND.  The
+ * random positive increments serve only to ensure that the same
+ * exact sequence number is never sent out twice (as could otherwise
+ * happen when a port is recycled in less than the system tick
+ * interval.)
+ *
+ * net.inet.tcp.isn_reseed_interval controls the number of seconds
+ * between seeding of isn_secret.  This is normally set to zero,
+ * as reseeding should not be necessary.
+ *
+ * Locking of the global variables isn_secret, isn_last_reseed, isn_offset,
+ * isn_offset_old, and isn_ctx is performed using the TCP pcbinfo lock.  In
+ * general, this means holding an exclusive (write) lock.
+ */
+
+#define ISN_BYTES_PER_SECOND 1048576
+#define ISN_STATIC_INCREMENT 4096
+#define ISN_RANDOM_INCREMENT (4096 - 1)
+
+static VNET_DEFINE(u_char, isn_secret[32]);
+static VNET_DEFINE(int, isn_last_reseed);
+static VNET_DEFINE(u_int32_t, isn_offset);
+static VNET_DEFINE(u_int32_t, isn_offset_old);
+
+#define	V_isn_secret			VNET(isn_secret)
+#define	V_isn_last_reseed		VNET(isn_last_reseed)
+#define	V_isn_offset			VNET(isn_offset)
+#define	V_isn_offset_old		VNET(isn_offset_old)
+
+tcp_seq
+tcp_new_isn(struct tcpcb *tp)
+{
+	MD5_CTX isn_ctx;
+	u_int32_t md5_buffer[4];
+	tcp_seq new_isn;
+
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	ISN_LOCK();
+	/* Seed if this is the first use, reseed if requested. */
+	if ((V_isn_last_reseed == 0) || ((V_tcp_isn_reseed_interval > 0) &&
+	     (((u_int)V_isn_last_reseed + (u_int)V_tcp_isn_reseed_interval*hz)
+		< (u_int)ticks))) {
+		read_random(&V_isn_secret, sizeof(V_isn_secret));
+		V_isn_last_reseed = ticks;
+	}
+
+	/* Compute the md5 hash and return the ISN. */
+	MD5Init(&isn_ctx);
+	MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_fport, sizeof(u_short));
+	MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_lport, sizeof(u_short));
+#ifdef INET6
+	if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) {
+		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_faddr,
+			  sizeof(struct in6_addr));
+		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_laddr,
+			  sizeof(struct in6_addr));
+	} else
+#endif
+	{
+		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_faddr,
+			  sizeof(struct in_addr));
+		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_laddr,
+			  sizeof(struct in_addr));
+	}
+	MD5Update(&isn_ctx, (u_char *) &V_isn_secret, sizeof(V_isn_secret));
+	MD5Final((u_char *) &md5_buffer, &isn_ctx);
+	new_isn = (tcp_seq) md5_buffer[0];
+	V_isn_offset += ISN_STATIC_INCREMENT +
+		(arc4random() & ISN_RANDOM_INCREMENT);
+	new_isn += V_isn_offset;
+	ISN_UNLOCK();
+	return (new_isn);
+}
+
+/*
+ * Increment the offset to the next ISN_BYTES_PER_SECOND / 100 boundary
+ * to keep time flowing at a relatively constant rate.  If the random
+ * increments have already pushed us past the projected offset, do nothing.
+ */
+static void
+tcp_isn_tick(void *xtp)
+{
+	VNET_ITERATOR_DECL(vnet_iter);
+	u_int32_t projected_offset;
+
+	VNET_LIST_RLOCK_NOSLEEP();
+	ISN_LOCK();
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter); /* XXX appease INVARIANTS */
+		projected_offset =
+		    V_isn_offset_old + ISN_BYTES_PER_SECOND / 100;
+
+		if (SEQ_GT(projected_offset, V_isn_offset))
+			V_isn_offset = projected_offset;
+
+		V_isn_offset_old = V_isn_offset;
+		CURVNET_RESTORE();
+	}
+	ISN_UNLOCK();
+	VNET_LIST_RUNLOCK_NOSLEEP();
+	callout_reset(&isn_callout, hz/100, tcp_isn_tick, NULL);
+}
+
+/*
+ * When a specific ICMP unreachable message is received and the
+ * connection state is SYN-SENT, drop the connection.  This behavior
+ * is controlled by the icmp_may_rst sysctl.
+ */
+struct inpcb *
+tcp_drop_syn_sent(struct inpcb *inp, int errno)
+{
+	struct tcpcb *tp;
+
+	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_WLOCK_ASSERT(inp);
+
+	if ((inp->inp_flags & INP_TIMEWAIT) ||
+	    (inp->inp_flags & INP_DROPPED))
+		return (inp);
+
+	tp = intotcpcb(inp);
+	if (tp->t_state != TCPS_SYN_SENT)
+		return (inp);
+
+	tp = tcp_drop(tp, errno);
+	if (tp != NULL)
+		return (inp);
+	else
+		return (NULL);
+}
+
+/*
+ * When `need fragmentation' ICMP is received, update our idea of the MSS
+ * based on the new value in the route.  Also nudge TCP to send something,
+ * since we know the packet we just sent was dropped.
+ * This duplicates some code in the tcp_mss() function in tcp_input.c.
+ */
+struct inpcb *
+tcp_mtudisc(struct inpcb *inp, int errno)
+{
+	struct tcpcb *tp;
+	struct socket *so;
+
+	INP_WLOCK_ASSERT(inp);
+	if ((inp->inp_flags & INP_TIMEWAIT) ||
+	    (inp->inp_flags & INP_DROPPED))
+		return (inp);
+
+	tp = intotcpcb(inp);
+	KASSERT(tp != NULL, ("tcp_mtudisc: tp == NULL"));
+
+	tcp_mss_update(tp, -1, NULL, NULL);
+  
+	so = inp->inp_socket;
+	SOCKBUF_LOCK(&so->so_snd);
+	/* If the mss is larger than the socket buffer, decrease the mss. */
+	if (so->so_snd.sb_hiwat < tp->t_maxseg)
+		tp->t_maxseg = so->so_snd.sb_hiwat;
+	SOCKBUF_UNLOCK(&so->so_snd);
+
+	TCPSTAT_INC(tcps_mturesent);
+	tp->t_rtttime = 0;
+	tp->snd_nxt = tp->snd_una;
+	tcp_free_sackholes(tp);
+	tp->snd_recover = tp->snd_max;
+	if (tp->t_flags & TF_SACK_PERMIT)
+		EXIT_FASTRECOVERY(tp);
+	tcp_output_send(tp);
+	return (inp);
+}
+
+/*
+ * Look-up the routing entry to the peer of this inpcb.  If no route
+ * is found and it cannot be allocated, then return 0.  This routine
+ * is called by TCP routines that access the rmx structure and by
+ * tcp_mss_update to get the peer/interface MTU.
+ */
+u_long
+tcp_maxmtu(struct in_conninfo *inc, int *flags)
+{
+	struct route sro;
+	struct sockaddr_in *dst;
+	struct ifnet *ifp;
+	u_long maxmtu = 0;
+
+	KASSERT(inc != NULL, ("tcp_maxmtu with NULL in_conninfo pointer"));
+
+	bzero(&sro, sizeof(sro));
+	if (inc->inc_faddr.s_addr != INADDR_ANY) {
+	        dst = (struct sockaddr_in *)&sro.ro_dst;
+		dst->sin_family = AF_INET;
+		dst->sin_len = sizeof(*dst);
+		dst->sin_addr = inc->inc_faddr;
+		in_rtalloc_ign(&sro, 0, inc->inc_fibnum);
+	}
+	if (sro.ro_rt != NULL) {
+		ifp = sro.ro_rt->rt_ifp;
+		if (sro.ro_rt->rt_rmx.rmx_mtu == 0)
+			maxmtu = ifp->if_mtu;
+		else
+			maxmtu = min(sro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
+
+		/* Report additional interface capabilities. */
+		if (flags != NULL) {
+			if (ifp->if_capenable & IFCAP_TSO4 &&
+			    ifp->if_hwassist & CSUM_TSO)
+				*flags |= CSUM_TSO;
+		}
+		RTFREE(sro.ro_rt);
+	}
+	return (maxmtu);
+}
+
+#ifdef INET6
+u_long
+tcp_maxmtu6(struct in_conninfo *inc, int *flags)
+{
+	struct route_in6 sro6;
+	struct ifnet *ifp;
+	u_long maxmtu = 0;
+
+	KASSERT(inc != NULL, ("tcp_maxmtu6 with NULL in_conninfo pointer"));
+
+	bzero(&sro6, sizeof(sro6));
+	if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) {
+		sro6.ro_dst.sin6_family = AF_INET6;
+		sro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6);
+		sro6.ro_dst.sin6_addr = inc->inc6_faddr;
+		rtalloc_ign((struct route *)&sro6, 0);
+	}
+	if (sro6.ro_rt != NULL) {
+		ifp = sro6.ro_rt->rt_ifp;
+		if (sro6.ro_rt->rt_rmx.rmx_mtu == 0)
+			maxmtu = IN6_LINKMTU(sro6.ro_rt->rt_ifp);
+		else
+			maxmtu = min(sro6.ro_rt->rt_rmx.rmx_mtu,
+				     IN6_LINKMTU(sro6.ro_rt->rt_ifp));
+
+		/* Report additional interface capabilities. */
+		if (flags != NULL) {
+			if (ifp->if_capenable & IFCAP_TSO6 &&
+			    ifp->if_hwassist & CSUM_TSO)
+				*flags |= CSUM_TSO;
+		}
+		RTFREE(sro6.ro_rt);
+	}
+
+	return (maxmtu);
+}
+#endif /* INET6 */
+
+#ifdef IPSEC
+/* compute ESP/AH header size for TCP, including outer IP header. */
+size_t
+ipsec_hdrsiz_tcp(struct tcpcb *tp)
+{
+	struct inpcb *inp;
+	struct mbuf *m;
+	size_t hdrsiz;
+	struct ip *ip;
+#ifdef INET6
+	struct ip6_hdr *ip6;
+#endif
+	struct tcphdr *th;
+
+	if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL))
+		return (0);
+	MGETHDR(m, M_DONTWAIT, MT_DATA);
+	if (!m)
+		return (0);
+
+#ifdef INET6
+	if ((inp->inp_vflag & INP_IPV6) != 0) {
+		ip6 = mtod(m, struct ip6_hdr *);
+		th = (struct tcphdr *)(ip6 + 1);
+		m->m_pkthdr.len = m->m_len =
+			sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
+		tcpip_fillheaders(inp, ip6, th);
+		hdrsiz = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
+	} else
+#endif /* INET6 */
+	{
+		ip = mtod(m, struct ip *);
+		th = (struct tcphdr *)(ip + 1);
+		m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr);
+		tcpip_fillheaders(inp, ip, th);
+		hdrsiz = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
+	}
+
+	m_free(m);
+	return (hdrsiz);
+}
+#endif /* IPSEC */
+
+/*
+ * TCP BANDWIDTH DELAY PRODUCT WINDOW LIMITING
+ *
+ * This code attempts to calculate the bandwidth-delay product as a
+ * means of determining the optimal window size to maximize bandwidth,
+ * minimize RTT, and avoid the over-allocation of buffers on interfaces and
+ * routers.  This code also does a fairly good job keeping RTTs in check
+ * across slow links like modems.  We implement an algorithm which is very
+ * similar (but not meant to be) TCP/Vegas.  The code operates on the
+ * transmitter side of a TCP connection and so only effects the transmit
+ * side of the connection.
+ *
+ * BACKGROUND:  TCP makes no provision for the management of buffer space
+ * at the end points or at the intermediate routers and switches.  A TCP
+ * stream, whether using NewReno or not, will eventually buffer as
+ * many packets as it is able and the only reason this typically works is
+ * due to the fairly small default buffers made available for a connection
+ * (typicaly 16K or 32K).  As machines use larger windows and/or window
+ * scaling it is now fairly easy for even a single TCP connection to blow-out
+ * all available buffer space not only on the local interface, but on
+ * intermediate routers and switches as well.  NewReno makes a misguided
+ * attempt to 'solve' this problem by waiting for an actual failure to occur,
+ * then backing off, then steadily increasing the window again until another
+ * failure occurs, ad-infinitum.  This results in terrible oscillation that
+ * is only made worse as network loads increase and the idea of intentionally
+ * blowing out network buffers is, frankly, a terrible way to manage network
+ * resources.
+ *
+ * It is far better to limit the transmit window prior to the failure
+ * condition being achieved.  There are two general ways to do this:  First
+ * you can 'scan' through different transmit window sizes and locate the
+ * point where the RTT stops increasing, indicating that you have filled the
+ * pipe, then scan backwards until you note that RTT stops decreasing, then
+ * repeat ad-infinitum.  This method works in principle but has severe
+ * implementation issues due to RTT variances, timer granularity, and
+ * instability in the algorithm which can lead to many false positives and
+ * create oscillations as well as interact badly with other TCP streams
+ * implementing the same algorithm.
+ *
+ * The second method is to limit the window to the bandwidth delay product
+ * of the link.  This is the method we implement.  RTT variances and our
+ * own manipulation of the congestion window, bwnd, can potentially
+ * destabilize the algorithm.  For this reason we have to stabilize the
+ * elements used to calculate the window.  We do this by using the minimum
+ * observed RTT, the long term average of the observed bandwidth, and
+ * by adding two segments worth of slop.  It isn't perfect but it is able
+ * to react to changing conditions and gives us a very stable basis on
+ * which to extend the algorithm.
+ */
+void
+tcp_xmit_bandwidth_limit(struct tcpcb *tp, tcp_seq ack_seq)
+{
+	u_long bw;
+	u_long bwnd;
+	int save_ticks;
+
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	/*
+	 * If inflight_enable is disabled in the middle of a tcp connection,
+	 * make sure snd_bwnd is effectively disabled.
+	 */
+	if (V_tcp_inflight_enable == 0 ||
+	    tp->t_rttlow < V_tcp_inflight_rttthresh) {
+		tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
+		tp->snd_bandwidth = 0;
+		return;
+	}
+
+	/*
+	 * Figure out the bandwidth.  Due to the tick granularity this
+	 * is a very rough number and it MUST be averaged over a fairly
+	 * long period of time.  XXX we need to take into account a link
+	 * that is not using all available bandwidth, but for now our
+	 * slop will ramp us up if this case occurs and the bandwidth later
+	 * increases.
+	 *
+	 * Note: if ticks rollover 'bw' may wind up negative.  We must
+	 * effectively reset t_bw_rtttime for this case.
+	 */
+	save_ticks = ticks;
+	if ((u_int)(save_ticks - tp->t_bw_rtttime) < 1)
+		return;
+
+	bw = (int64_t)(ack_seq - tp->t_bw_rtseq) * hz /
+	    (save_ticks - tp->t_bw_rtttime);
+	tp->t_bw_rtttime = save_ticks;
+	tp->t_bw_rtseq = ack_seq;
+	if (tp->t_bw_rtttime == 0 || (int)bw < 0)
+		return;
+	bw = ((int64_t)tp->snd_bandwidth * 15 + bw) >> 4;
+
+	tp->snd_bandwidth = bw;
+
+	/*
+	 * Calculate the semi-static bandwidth delay product, plus two maximal
+	 * segments.  The additional slop puts us squarely in the sweet
+	 * spot and also handles the bandwidth run-up case and stabilization.
+	 * Without the slop we could be locking ourselves into a lower
+	 * bandwidth.
+	 *
+	 * Situations Handled:
+	 *	(1) Prevents over-queueing of packets on LANs, especially on
+	 *	    high speed LANs, allowing larger TCP buffers to be
+	 *	    specified, and also does a good job preventing
+	 *	    over-queueing of packets over choke points like modems
+	 *	    (at least for the transmit side).
+	 *
+	 *	(2) Is able to handle changing network loads (bandwidth
+	 *	    drops so bwnd drops, bandwidth increases so bwnd
+	 *	    increases).
+	 *
+	 *	(3) Theoretically should stabilize in the face of multiple
+	 *	    connections implementing the same algorithm (this may need
+	 *	    a little work).
+	 *
+	 *	(4) Stability value (defaults to 20 = 2 maximal packets) can
+	 *	    be adjusted with a sysctl but typically only needs to be
+	 *	    on very slow connections.  A value no smaller then 5
+	 *	    should be used, but only reduce this default if you have
+	 *	    no other choice.
+	 */
+#define USERTT	((tp->t_srtt + tp->t_rttbest) / 2)
+	bwnd = (int64_t)bw * USERTT / (hz << TCP_RTT_SHIFT) + V_tcp_inflight_stab * tp->t_maxseg / 10;
+#undef USERTT
+
+	if (tcp_inflight_debug > 0) {
+		static int ltime;
+		if ((u_int)(ticks - ltime) >= hz / tcp_inflight_debug) {
+			ltime = ticks;
+			printf("%p bw %ld rttbest %d srtt %d bwnd %ld\n",
+			    tp,
+			    bw,
+			    tp->t_rttbest,
+			    tp->t_srtt,
+			    bwnd
+			);
+		}
+	}
+	if ((long)bwnd < V_tcp_inflight_min)
+		bwnd = V_tcp_inflight_min;
+	if (bwnd > V_tcp_inflight_max)
+		bwnd = V_tcp_inflight_max;
+	if ((long)bwnd < tp->t_maxseg * 2)
+		bwnd = tp->t_maxseg * 2;
+	tp->snd_bwnd = bwnd;
+}
+
+#ifdef TCP_SIGNATURE
+/*
+ * Callback function invoked by m_apply() to digest TCP segment data
+ * contained within an mbuf chain.
+ */
+static int
+tcp_signature_apply(void *fstate, void *data, u_int len)
+{
+
+	MD5Update(fstate, (u_char *)data, len);
+	return (0);
+}
+
+/*
+ * Compute TCP-MD5 hash of a TCP segment. (RFC2385)
+ *
+ * Parameters:
+ * m		pointer to head of mbuf chain
+ * _unused	
+ * len		length of TCP segment data, excluding options
+ * optlen	length of TCP segment options
+ * buf		pointer to storage for computed MD5 digest
+ * direction	direction of flow (IPSEC_DIR_INBOUND or OUTBOUND)
+ *
+ * We do this over ip, tcphdr, segment data, and the key in the SADB.
+ * When called from tcp_input(), we can be sure that th_sum has been
+ * zeroed out and verified already.
+ *
+ * Return 0 if successful, otherwise return -1.
+ *
+ * XXX The key is retrieved from the system's PF_KEY SADB, by keying a
+ * search with the destination IP address, and a 'magic SPI' to be
+ * determined by the application. This is hardcoded elsewhere to 1179
+ * right now. Another branch of this code exists which uses the SPD to
+ * specify per-application flows but it is unstable.
+ */
+int
+tcp_signature_compute(struct mbuf *m, int _unused, int len, int optlen,
+    u_char *buf, u_int direction)
+{
+	union sockaddr_union dst;
+	struct ippseudo ippseudo;
+	MD5_CTX ctx;
+	int doff;
+	struct ip *ip;
+	struct ipovly *ipovly;
+	struct secasvar *sav;
+	struct tcphdr *th;
+#ifdef INET6
+	struct ip6_hdr *ip6;
+	struct in6_addr in6;
+	char ip6buf[INET6_ADDRSTRLEN];
+	uint32_t plen;
+	uint16_t nhdr;
+#endif
+	u_short savecsum;
+
+	KASSERT(m != NULL, ("NULL mbuf chain"));
+	KASSERT(buf != NULL, ("NULL signature pointer"));
+
+	/* Extract the destination from the IP header in the mbuf. */
+	bzero(&dst, sizeof(union sockaddr_union));
+	ip = mtod(m, struct ip *);
+#ifdef INET6
+	ip6 = NULL;	/* Make the compiler happy. */
+#endif
+	switch (ip->ip_v) {
+	case IPVERSION:
+		dst.sa.sa_len = sizeof(struct sockaddr_in);
+		dst.sa.sa_family = AF_INET;
+		dst.sin.sin_addr = (direction == IPSEC_DIR_INBOUND) ?
+		    ip->ip_src : ip->ip_dst;
+		break;
+#ifdef INET6
+	case (IPV6_VERSION >> 4):
+		ip6 = mtod(m, struct ip6_hdr *);
+		dst.sa.sa_len = sizeof(struct sockaddr_in6);
+		dst.sa.sa_family = AF_INET6;
+		dst.sin6.sin6_addr = (direction == IPSEC_DIR_INBOUND) ?
+		    ip6->ip6_src : ip6->ip6_dst;
+		break;
+#endif
+	default:
+		return (EINVAL);
+		/* NOTREACHED */
+		break;
+	}
+
+	/* Look up an SADB entry which matches the address of the peer. */
+	sav = KEY_ALLOCSA(&dst, IPPROTO_TCP, htonl(TCP_SIG_SPI));
+	if (sav == NULL) {
+		ipseclog((LOG_ERR, "%s: SADB lookup failed for %s\n", __func__,
+		    (ip->ip_v == IPVERSION) ? inet_ntoa(dst.sin.sin_addr) :
+#ifdef INET6
+			(ip->ip_v == (IPV6_VERSION >> 4)) ?
+			    ip6_sprintf(ip6buf, &dst.sin6.sin6_addr) :
+#endif
+			"(unsupported)"));
+		return (EINVAL);
+	}
+
+	MD5Init(&ctx);
+	/*
+	 * Step 1: Update MD5 hash with IP(v6) pseudo-header.
+	 *
+	 * XXX The ippseudo header MUST be digested in network byte order,
+	 * or else we'll fail the regression test. Assume all fields we've
+	 * been doing arithmetic on have been in host byte order.
+	 * XXX One cannot depend on ipovly->ih_len here. When called from
+	 * tcp_output(), the underlying ip_len member has not yet been set.
+	 */
+	switch (ip->ip_v) {
+	case IPVERSION:
+		ipovly = (struct ipovly *)ip;
+		ippseudo.ippseudo_src = ipovly->ih_src;
+		ippseudo.ippseudo_dst = ipovly->ih_dst;
+		ippseudo.ippseudo_pad = 0;
+		ippseudo.ippseudo_p = IPPROTO_TCP;
+		ippseudo.ippseudo_len = htons(len + sizeof(struct tcphdr) +
+		    optlen);
+		MD5Update(&ctx, (char *)&ippseudo, sizeof(struct ippseudo));
+
+		th = (struct tcphdr *)((u_char *)ip + sizeof(struct ip));
+		doff = sizeof(struct ip) + sizeof(struct tcphdr) + optlen;
+		break;
+#ifdef INET6
+	/*
+	 * RFC 2385, 2.0  Proposal
+	 * For IPv6, the pseudo-header is as described in RFC 2460, namely the
+	 * 128-bit source IPv6 address, 128-bit destination IPv6 address, zero-
+	 * extended next header value (to form 32 bits), and 32-bit segment
+	 * length.
+	 * Note: Upper-Layer Packet Length comes before Next Header.
+	 */
+	case (IPV6_VERSION >> 4):
+		in6 = ip6->ip6_src;
+		in6_clearscope(&in6);
+		MD5Update(&ctx, (char *)&in6, sizeof(struct in6_addr));
+		in6 = ip6->ip6_dst;
+		in6_clearscope(&in6);
+		MD5Update(&ctx, (char *)&in6, sizeof(struct in6_addr));
+		plen = htonl(len + sizeof(struct tcphdr) + optlen);
+		MD5Update(&ctx, (char *)&plen, sizeof(uint32_t));
+		nhdr = 0;
+		MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t));
+		MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t));
+		MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t));
+		nhdr = IPPROTO_TCP;
+		MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t));
+
+		th = (struct tcphdr *)((u_char *)ip6 + sizeof(struct ip6_hdr));
+		doff = sizeof(struct ip6_hdr) + sizeof(struct tcphdr) + optlen;
+		break;
+#endif
+	default:
+		return (EINVAL);
+		/* NOTREACHED */
+		break;
+	}
+
+
+	/*
+	 * Step 2: Update MD5 hash with TCP header, excluding options.
+	 * The TCP checksum must be set to zero.
+	 */
+	savecsum = th->th_sum;
+	th->th_sum = 0;
+	MD5Update(&ctx, (char *)th, sizeof(struct tcphdr));
+	th->th_sum = savecsum;
+
+	/*
+	 * Step 3: Update MD5 hash with TCP segment data.
+	 *         Use m_apply() to avoid an early m_pullup().
+	 */
+	if (len > 0)
+		m_apply(m, doff, len, tcp_signature_apply, &ctx);
+
+	/*
+	 * Step 4: Update MD5 hash with shared secret.
+	 */
+	MD5Update(&ctx, sav->key_auth->key_data, _KEYLEN(sav->key_auth));
+	MD5Final(buf, &ctx);
+
+	key_sa_recordxfer(sav, m);
+	KEY_FREESAV(&sav);
+	return (0);
+}
+#endif /* TCP_SIGNATURE */
+
+static int
+sysctl_drop(SYSCTL_HANDLER_ARGS)
+{
+	/* addrs[0] is a foreign socket, addrs[1] is a local one. */
+	struct sockaddr_storage addrs[2];
+	struct inpcb *inp;
+	struct tcpcb *tp;
+	struct tcptw *tw;
+	struct sockaddr_in *fin, *lin;
+#ifdef INET6
+	struct sockaddr_in6 *fin6, *lin6;
+#endif
+	int error;
+
+	inp = NULL;
+	fin = lin = NULL;
+#ifdef INET6
+	fin6 = lin6 = NULL;
+#endif
+	error = 0;
+
+	if (req->oldptr != NULL || req->oldlen != 0)
+		return (EINVAL);
+	if (req->newptr == NULL)
+		return (EPERM);
+	if (req->newlen < sizeof(addrs))
+		return (ENOMEM);
+	error = SYSCTL_IN(req, &addrs, sizeof(addrs));
+	if (error)
+		return (error);
+
+	switch (addrs[0].ss_family) {
+#ifdef INET6
+	case AF_INET6:
+		fin6 = (struct sockaddr_in6 *)&addrs[0];
+		lin6 = (struct sockaddr_in6 *)&addrs[1];
+		if (fin6->sin6_len != sizeof(struct sockaddr_in6) ||
+		    lin6->sin6_len != sizeof(struct sockaddr_in6))
+			return (EINVAL);
+		if (IN6_IS_ADDR_V4MAPPED(&fin6->sin6_addr)) {
+			if (!IN6_IS_ADDR_V4MAPPED(&lin6->sin6_addr))
+				return (EINVAL);
+			in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[0]);
+			in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[1]);
+			fin = (struct sockaddr_in *)&addrs[0];
+			lin = (struct sockaddr_in *)&addrs[1];
+			break;
+		}
+		error = sa6_embedscope(fin6, V_ip6_use_defzone);
+		if (error)
+			return (error);
+		error = sa6_embedscope(lin6, V_ip6_use_defzone);
+		if (error)
+			return (error);
+		break;
+#endif
+	case AF_INET:
+		fin = (struct sockaddr_in *)&addrs[0];
+		lin = (struct sockaddr_in *)&addrs[1];
+		if (fin->sin_len != sizeof(struct sockaddr_in) ||
+		    lin->sin_len != sizeof(struct sockaddr_in))
+			return (EINVAL);
+		break;
+	default:
+		return (EINVAL);
+	}
+	INP_INFO_WLOCK(&V_tcbinfo);
+	switch (addrs[0].ss_family) {
+#ifdef INET6
+	case AF_INET6:
+		inp = in6_pcblookup_hash(&V_tcbinfo, &fin6->sin6_addr,
+		    fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port, 0,
+		    NULL);
+		break;
+#endif
+	case AF_INET:
+		inp = in_pcblookup_hash(&V_tcbinfo, fin->sin_addr,
+		    fin->sin_port, lin->sin_addr, lin->sin_port, 0, NULL);
+		break;
+	}
+	if (inp != NULL) {
+		INP_WLOCK(inp);
+		if (inp->inp_flags & INP_TIMEWAIT) {
+			/*
+			 * XXXRW: There currently exists a state where an
+			 * inpcb is present, but its timewait state has been
+			 * discarded.  For now, don't allow dropping of this
+			 * type of inpcb.
+			 */
+			tw = intotw(inp);
+			if (tw != NULL)
+				tcp_twclose(tw, 0);
+			else
+				INP_WUNLOCK(inp);
+		} else if (!(inp->inp_flags & INP_DROPPED) &&
+			   !(inp->inp_socket->so_options & SO_ACCEPTCONN)) {
+			tp = intotcpcb(inp);
+			tp = tcp_drop(tp, ECONNABORTED);
+			if (tp != NULL)
+				INP_WUNLOCK(inp);
+		} else
+			INP_WUNLOCK(inp);
+	} else
+		error = ESRCH;
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+	return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_DROP, drop,
+    CTLTYPE_STRUCT|CTLFLAG_WR|CTLFLAG_SKIP, NULL,
+    0, sysctl_drop, "", "Drop TCP connection");
+
+/*
+ * Generate a standardized TCP log line for use throughout the
+ * tcp subsystem.  Memory allocation is done with M_NOWAIT to
+ * allow use in the interrupt context.
+ *
+ * NB: The caller MUST free(s, M_TCPLOG) the returned string.
+ * NB: The function may return NULL if memory allocation failed.
+ *
+ * Due to header inclusion and ordering limitations the struct ip
+ * and ip6_hdr pointers have to be passed as void pointers.
+ */
+char *
+tcp_log_vain(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
+    const void *ip6hdr)
+{
+
+	/* Is logging enabled? */
+	if (tcp_log_in_vain == 0)
+		return (NULL);
+
+	return (tcp_log_addr(inc, th, ip4hdr, ip6hdr));
+}
+
+char *
+tcp_log_addrs(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
+    const void *ip6hdr)
+{
+
+	/* Is logging enabled? */
+	if (tcp_log_debug == 0)
+		return (NULL);
+
+	return (tcp_log_addr(inc, th, ip4hdr, ip6hdr));
+}
+
+static char *
+tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
+    const void *ip6hdr)
+{
+	char *s, *sp;
+	size_t size;
+	struct ip *ip;
+#ifdef INET6
+	const struct ip6_hdr *ip6;
+
+	ip6 = (const struct ip6_hdr *)ip6hdr;
+#endif /* INET6 */
+	ip = (struct ip *)ip4hdr;
+
+	/*
+	 * The log line looks like this:
+	 * "TCP: [1.2.3.4]:50332 to [1.2.3.4]:80 tcpflags 0x2<SYN>"
+	 */
+	size = sizeof("TCP: []:12345 to []:12345 tcpflags 0x2<>") +
+	    sizeof(PRINT_TH_FLAGS) + 1 +
+#ifdef INET6
+	    2 * INET6_ADDRSTRLEN;
+#else
+	    2 * INET_ADDRSTRLEN;
+#endif /* INET6 */
+
+	s = malloc(size, M_TCPLOG, M_ZERO|M_NOWAIT);
+	if (s == NULL)
+		return (NULL);
+
+	strcat(s, "TCP: [");
+	sp = s + strlen(s);
+
+	if (inc && ((inc->inc_flags & INC_ISIPV6) == 0)) {
+		inet_ntoa_r(inc->inc_faddr, sp);
+		sp = s + strlen(s);
+		sprintf(sp, "]:%i to [", ntohs(inc->inc_fport));
+		sp = s + strlen(s);
+		inet_ntoa_r(inc->inc_laddr, sp);
+		sp = s + strlen(s);
+		sprintf(sp, "]:%i", ntohs(inc->inc_lport));
+#ifdef INET6
+	} else if (inc) {
+		ip6_sprintf(sp, &inc->inc6_faddr);
+		sp = s + strlen(s);
+		sprintf(sp, "]:%i to [", ntohs(inc->inc_fport));
+		sp = s + strlen(s);
+		ip6_sprintf(sp, &inc->inc6_laddr);
+		sp = s + strlen(s);
+		sprintf(sp, "]:%i", ntohs(inc->inc_lport));
+	} else if (ip6 && th) {
+		ip6_sprintf(sp, &ip6->ip6_src);
+		sp = s + strlen(s);
+		sprintf(sp, "]:%i to [", ntohs(th->th_sport));
+		sp = s + strlen(s);
+		ip6_sprintf(sp, &ip6->ip6_dst);
+		sp = s + strlen(s);
+		sprintf(sp, "]:%i", ntohs(th->th_dport));
+#endif /* INET6 */
+	} else if (ip && th) {
+		inet_ntoa_r(ip->ip_src, sp);
+		sp = s + strlen(s);
+		sprintf(sp, "]:%i to [", ntohs(th->th_sport));
+		sp = s + strlen(s);
+		inet_ntoa_r(ip->ip_dst, sp);
+		sp = s + strlen(s);
+		sprintf(sp, "]:%i", ntohs(th->th_dport));
+	} else {
+		free(s, M_TCPLOG);
+		return (NULL);
+	}
+	sp = s + strlen(s);
+	if (th)
+		sprintf(sp, " tcpflags 0x%b", th->th_flags, PRINT_TH_FLAGS);
+	if (*(s + size - 1) != '\0')
+		panic("%s: string too long", __func__);
+	return (s);
+}
diff --git a/freebsd/sys/netinet/tcp_syncache.c b/freebsd/sys/netinet/tcp_syncache.c
new file mode 100644
index 00000000..78790cc8
--- /dev/null
+++ b/freebsd/sys/netinet/tcp_syncache.c
@@ -0,0 +1,1823 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2001 McAfee, Inc.
+ * Copyright (c) 2006 Andre Oppermann, Internet Business Solutions AG
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Jonathan Lemon
+ * and McAfee Research, the Security Research Division of McAfee, Inc. under
+ * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_ipsec.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/limits.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/md5.h>
+#include <freebsd/sys/proc.h>		/* for proc0 declaration */
+#include <freebsd/sys/random.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/sys/ucred.h>
+
+#include <freebsd/vm/uma.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_options.h>
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#include <freebsd/netinet/icmp6.h>
+#include <freebsd/netinet6/nd6.h>
+#include <freebsd/netinet6/ip6_var.h>
+#include <freebsd/netinet6/in6_pcb.h>
+#endif
+#include <freebsd/netinet/tcp.h>
+#include <freebsd/netinet/tcp_fsm.h>
+#include <freebsd/netinet/tcp_seq.h>
+#include <freebsd/netinet/tcp_timer.h>
+#include <freebsd/netinet/tcp_var.h>
+#include <freebsd/netinet/tcp_syncache.h>
+#include <freebsd/netinet/tcp_offload.h>
+#ifdef INET6
+#include <freebsd/netinet6/tcp6_var.h>
+#endif
+
+#ifdef IPSEC
+#include <freebsd/netipsec/ipsec.h>
+#ifdef INET6
+#include <freebsd/netipsec/ipsec6.h>
+#endif
+#include <freebsd/netipsec/key.h>
+#endif /*IPSEC*/
+
+#include <freebsd/machine/in_cksum.h>
+
+#include <freebsd/security/mac/mac_framework.h>
+
+static VNET_DEFINE(int, tcp_syncookies) = 1;
+#define	V_tcp_syncookies		VNET(tcp_syncookies)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_RW,
+    &VNET_NAME(tcp_syncookies), 0,
+    "Use TCP SYN cookies if the syncache overflows");
+
+static VNET_DEFINE(int, tcp_syncookiesonly) = 0;
+#define	V_tcp_syncookiesonly		VNET(tcp_syncookiesonly)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_RW,
+    &VNET_NAME(tcp_syncookiesonly), 0,
+    "Use only TCP SYN cookies");
+
+#ifdef TCP_OFFLOAD_DISABLE
+#define TOEPCB_ISSET(sc) (0)
+#else
+#define TOEPCB_ISSET(sc) ((sc)->sc_toepcb != NULL)
+#endif
+
+static void	 syncache_drop(struct syncache *, struct syncache_head *);
+static void	 syncache_free(struct syncache *);
+static void	 syncache_insert(struct syncache *, struct syncache_head *);
+struct syncache *syncache_lookup(struct in_conninfo *, struct syncache_head **);
+static int	 syncache_respond(struct syncache *);
+static struct	 socket *syncache_socket(struct syncache *, struct socket *,
+		    struct mbuf *m);
+static void	 syncache_timeout(struct syncache *sc, struct syncache_head *sch,
+		    int docallout);
+static void	 syncache_timer(void *);
+static void	 syncookie_generate(struct syncache_head *, struct syncache *,
+		    u_int32_t *);
+static struct syncache
+		*syncookie_lookup(struct in_conninfo *, struct syncache_head *,
+		    struct syncache *, struct tcpopt *, struct tcphdr *,
+		    struct socket *);
+
+/*
+ * Transmit the SYN,ACK fewer times than TCP_MAXRXTSHIFT specifies.
+ * 3 retransmits corresponds to a timeout of 3 * (1 + 2 + 4 + 8) == 45 seconds,
+ * the odds are that the user has given up attempting to connect by then.
+ */
+#define SYNCACHE_MAXREXMTS		3
+
+/* Arbitrary values */
+#define TCP_SYNCACHE_HASHSIZE		512
+#define TCP_SYNCACHE_BUCKETLIMIT	30
+
+static VNET_DEFINE(struct tcp_syncache, tcp_syncache);
+#define	V_tcp_syncache			VNET(tcp_syncache)
+
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, syncache, CTLFLAG_RW, 0, "TCP SYN cache");
+
+SYSCTL_VNET_INT(_net_inet_tcp_syncache, OID_AUTO, bucketlimit, CTLFLAG_RDTUN,
+    &VNET_NAME(tcp_syncache.bucket_limit), 0,
+    "Per-bucket hash limit for syncache");
+
+SYSCTL_VNET_INT(_net_inet_tcp_syncache, OID_AUTO, cachelimit, CTLFLAG_RDTUN,
+    &VNET_NAME(tcp_syncache.cache_limit), 0,
+    "Overall entry limit for syncache");
+
+SYSCTL_VNET_INT(_net_inet_tcp_syncache, OID_AUTO, count, CTLFLAG_RD,
+    &VNET_NAME(tcp_syncache.cache_count), 0,
+    "Current number of entries in syncache");
+
+SYSCTL_VNET_INT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_RDTUN,
+    &VNET_NAME(tcp_syncache.hashsize), 0,
+    "Size of TCP syncache hashtable");
+
+SYSCTL_VNET_INT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_RW,
+    &VNET_NAME(tcp_syncache.rexmt_limit), 0,
+    "Limit on SYN/ACK retransmissions");
+
+VNET_DEFINE(int, tcp_sc_rst_sock_fail) = 1;
+SYSCTL_VNET_INT(_net_inet_tcp_syncache, OID_AUTO, rst_on_sock_fail,
+    CTLFLAG_RW, &VNET_NAME(tcp_sc_rst_sock_fail), 0,
+    "Send reset on socket allocation failure");
+
+static MALLOC_DEFINE(M_SYNCACHE, "syncache", "TCP syncache");
+
+#define SYNCACHE_HASH(inc, mask)					\
+	((V_tcp_syncache.hash_secret ^					\
+	  (inc)->inc_faddr.s_addr ^					\
+	  ((inc)->inc_faddr.s_addr >> 16) ^				\
+	  (inc)->inc_fport ^ (inc)->inc_lport) & mask)
+
+#define SYNCACHE_HASH6(inc, mask)					\
+	((V_tcp_syncache.hash_secret ^					\
+	  (inc)->inc6_faddr.s6_addr32[0] ^				\
+	  (inc)->inc6_faddr.s6_addr32[3] ^				\
+	  (inc)->inc_fport ^ (inc)->inc_lport) & mask)
+
+#define ENDPTS_EQ(a, b) (						\
+	(a)->ie_fport == (b)->ie_fport &&				\
+	(a)->ie_lport == (b)->ie_lport &&				\
+	(a)->ie_faddr.s_addr == (b)->ie_faddr.s_addr &&			\
+	(a)->ie_laddr.s_addr == (b)->ie_laddr.s_addr			\
+)
+
+#define ENDPTS6_EQ(a, b) (memcmp(a, b, sizeof(*a)) == 0)
+
+#define	SCH_LOCK(sch)		mtx_lock(&(sch)->sch_mtx)
+#define	SCH_UNLOCK(sch)		mtx_unlock(&(sch)->sch_mtx)
+#define	SCH_LOCK_ASSERT(sch)	mtx_assert(&(sch)->sch_mtx, MA_OWNED)
+
+/*
+ * Requires the syncache entry to be already removed from the bucket list.
+ */
+static void
+syncache_free(struct syncache *sc)
+{
+
+	if (sc->sc_ipopts)
+		(void) m_free(sc->sc_ipopts);
+	if (sc->sc_cred)
+		crfree(sc->sc_cred);
+#ifdef MAC
+	mac_syncache_destroy(&sc->sc_label);
+#endif
+
+	uma_zfree(V_tcp_syncache.zone, sc);
+}
+
+void
+syncache_init(void)
+{
+	int i;
+
+	V_tcp_syncache.cache_count = 0;
+	V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
+	V_tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT;
+	V_tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS;
+	V_tcp_syncache.hash_secret = arc4random();
+
+	TUNABLE_INT_FETCH("net.inet.tcp.syncache.hashsize",
+	    &V_tcp_syncache.hashsize);
+	TUNABLE_INT_FETCH("net.inet.tcp.syncache.bucketlimit",
+	    &V_tcp_syncache.bucket_limit);
+	if (!powerof2(V_tcp_syncache.hashsize) ||
+	    V_tcp_syncache.hashsize == 0) {
+		printf("WARNING: syncache hash size is not a power of 2.\n");
+		V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
+	}
+	V_tcp_syncache.hashmask = V_tcp_syncache.hashsize - 1;
+
+	/* Set limits. */
+	V_tcp_syncache.cache_limit =
+	    V_tcp_syncache.hashsize * V_tcp_syncache.bucket_limit;
+	TUNABLE_INT_FETCH("net.inet.tcp.syncache.cachelimit",
+	    &V_tcp_syncache.cache_limit);
+
+	/* Allocate the hash table. */
+	V_tcp_syncache.hashbase = malloc(V_tcp_syncache.hashsize *
+	    sizeof(struct syncache_head), M_SYNCACHE, M_WAITOK | M_ZERO);
+
+	/* Initialize the hash buckets. */
+	for (i = 0; i < V_tcp_syncache.hashsize; i++) {
+#ifdef VIMAGE
+		V_tcp_syncache.hashbase[i].sch_vnet = curvnet;
+#endif
+		TAILQ_INIT(&V_tcp_syncache.hashbase[i].sch_bucket);
+		mtx_init(&V_tcp_syncache.hashbase[i].sch_mtx, "tcp_sc_head",
+			 NULL, MTX_DEF);
+		callout_init_mtx(&V_tcp_syncache.hashbase[i].sch_timer,
+			 &V_tcp_syncache.hashbase[i].sch_mtx, 0);
+		V_tcp_syncache.hashbase[i].sch_length = 0;
+	}
+
+	/* Create the syncache entry zone. */
+	V_tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+	uma_zone_set_max(V_tcp_syncache.zone, V_tcp_syncache.cache_limit);
+}
+
+#ifdef VIMAGE
+void
+syncache_destroy(void)
+{
+	struct syncache_head *sch;
+	struct syncache *sc, *nsc;
+	int i;
+
+	/* Cleanup hash buckets: stop timers, free entries, destroy locks. */
+	for (i = 0; i < V_tcp_syncache.hashsize; i++) {
+
+		sch = &V_tcp_syncache.hashbase[i];
+		callout_drain(&sch->sch_timer);
+
+		SCH_LOCK(sch);
+		TAILQ_FOREACH_SAFE(sc, &sch->sch_bucket, sc_hash, nsc)
+			syncache_drop(sc, sch);
+		SCH_UNLOCK(sch);
+		KASSERT(TAILQ_EMPTY(&sch->sch_bucket),
+		    ("%s: sch->sch_bucket not empty", __func__));
+		KASSERT(sch->sch_length == 0, ("%s: sch->sch_length %d not 0",
+		    __func__, sch->sch_length));
+		mtx_destroy(&sch->sch_mtx);
+	}
+
+	KASSERT(V_tcp_syncache.cache_count == 0, ("%s: cache_count %d not 0",
+	    __func__, V_tcp_syncache.cache_count));
+
+	/* Free the allocated global resources. */
+	uma_zdestroy(V_tcp_syncache.zone);
+	free(V_tcp_syncache.hashbase, M_SYNCACHE);
+}
+#endif
+
+/*
+ * Inserts a syncache entry into the specified bucket row.
+ * Locks and unlocks the syncache_head autonomously.
+ */
+static void
+syncache_insert(struct syncache *sc, struct syncache_head *sch)
+{
+	struct syncache *sc2;
+
+	SCH_LOCK(sch);
+
+	/*
+	 * Make sure that we don't overflow the per-bucket limit.
+	 * If the bucket is full, toss the oldest element.
+	 */
+	if (sch->sch_length >= V_tcp_syncache.bucket_limit) {
+		KASSERT(!TAILQ_EMPTY(&sch->sch_bucket),
+			("sch->sch_length incorrect"));
+		sc2 = TAILQ_LAST(&sch->sch_bucket, sch_head);
+		syncache_drop(sc2, sch);
+		TCPSTAT_INC(tcps_sc_bucketoverflow);
+	}
+
+	/* Put it into the bucket. */
+	TAILQ_INSERT_HEAD(&sch->sch_bucket, sc, sc_hash);
+	sch->sch_length++;
+
+	/* Reinitialize the bucket row's timer. */
+	if (sch->sch_length == 1)
+		sch->sch_nextc = ticks + INT_MAX;
+	syncache_timeout(sc, sch, 1);
+
+	SCH_UNLOCK(sch);
+
+	V_tcp_syncache.cache_count++;
+	TCPSTAT_INC(tcps_sc_added);
+}
+
+/*
+ * Remove and free entry from syncache bucket row.
+ * Expects locked syncache head.
+ */
+static void
+syncache_drop(struct syncache *sc, struct syncache_head *sch)
+{
+
+	SCH_LOCK_ASSERT(sch);
+
+	TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
+	sch->sch_length--;
+
+#ifndef TCP_OFFLOAD_DISABLE
+	if (sc->sc_tu)
+		sc->sc_tu->tu_syncache_event(TOE_SC_DROP, sc->sc_toepcb);
+#endif		    
+	syncache_free(sc);
+	V_tcp_syncache.cache_count--;
+}
+
+/*
+ * Engage/reengage time on bucket row.
+ */
+static void
+syncache_timeout(struct syncache *sc, struct syncache_head *sch, int docallout)
+{
+	sc->sc_rxttime = ticks +
+		TCPTV_RTOBASE * (tcp_backoff[sc->sc_rxmits]);
+	sc->sc_rxmits++;
+	if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc)) {
+		sch->sch_nextc = sc->sc_rxttime;
+		if (docallout)
+			callout_reset(&sch->sch_timer, sch->sch_nextc - ticks,
+			    syncache_timer, (void *)sch);
+	}
+}
+
+/*
+ * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted.
+ * If we have retransmitted an entry the maximum number of times, expire it.
+ * One separate timer for each bucket row.
+ */
+static void
+syncache_timer(void *xsch)
+{
+	struct syncache_head *sch = (struct syncache_head *)xsch;
+	struct syncache *sc, *nsc;
+	int tick = ticks;
+	char *s;
+
+	CURVNET_SET(sch->sch_vnet);
+
+	/* NB: syncache_head has already been locked by the callout. */
+	SCH_LOCK_ASSERT(sch);
+
+	/*
+	 * In the following cycle we may remove some entries and/or
+	 * advance some timeouts, so re-initialize the bucket timer.
+	 */
+	sch->sch_nextc = tick + INT_MAX;
+
+	TAILQ_FOREACH_SAFE(sc, &sch->sch_bucket, sc_hash, nsc) {
+		/*
+		 * We do not check if the listen socket still exists
+		 * and accept the case where the listen socket may be
+		 * gone by the time we resend the SYN/ACK.  We do
+		 * not expect this to happens often. If it does,
+		 * then the RST will be sent by the time the remote
+		 * host does the SYN/ACK->ACK.
+		 */
+		if (TSTMP_GT(sc->sc_rxttime, tick)) {
+			if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc))
+				sch->sch_nextc = sc->sc_rxttime;
+			continue;
+		}
+		if (sc->sc_rxmits > V_tcp_syncache.rexmt_limit) {
+			if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
+				log(LOG_DEBUG, "%s; %s: Retransmits exhausted, "
+				    "giving up and removing syncache entry\n",
+				    s, __func__);
+				free(s, M_TCPLOG);
+			}
+			syncache_drop(sc, sch);
+			TCPSTAT_INC(tcps_sc_stale);
+			continue;
+		}
+		if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
+			log(LOG_DEBUG, "%s; %s: Response timeout, "
+			    "retransmitting (%u) SYN|ACK\n",
+			    s, __func__, sc->sc_rxmits);
+			free(s, M_TCPLOG);
+		}
+
+		(void) syncache_respond(sc);
+		TCPSTAT_INC(tcps_sc_retransmitted);
+		syncache_timeout(sc, sch, 0);
+	}
+	if (!TAILQ_EMPTY(&(sch)->sch_bucket))
+		callout_reset(&(sch)->sch_timer, (sch)->sch_nextc - tick,
+			syncache_timer, (void *)(sch));
+	CURVNET_RESTORE();
+}
+
+/*
+ * Find an entry in the syncache.
+ * Returns always with locked syncache_head plus a matching entry or NULL.
+ */
+struct syncache *
+syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp)
+{
+	struct syncache *sc;
+	struct syncache_head *sch;
+
+#ifdef INET6
+	if (inc->inc_flags & INC_ISIPV6) {
+		sch = &V_tcp_syncache.hashbase[
+		    SYNCACHE_HASH6(inc, V_tcp_syncache.hashmask)];
+		*schp = sch;
+
+		SCH_LOCK(sch);
+
+		/* Circle through bucket row to find matching entry. */
+		TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
+			if (ENDPTS6_EQ(&inc->inc_ie, &sc->sc_inc.inc_ie))
+				return (sc);
+		}
+	} else
+#endif
+	{
+		sch = &V_tcp_syncache.hashbase[
+		    SYNCACHE_HASH(inc, V_tcp_syncache.hashmask)];
+		*schp = sch;
+
+		SCH_LOCK(sch);
+
+		/* Circle through bucket row to find matching entry. */
+		TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
+#ifdef INET6
+			if (sc->sc_inc.inc_flags & INC_ISIPV6)
+				continue;
+#endif
+			if (ENDPTS_EQ(&inc->inc_ie, &sc->sc_inc.inc_ie))
+				return (sc);
+		}
+	}
+	SCH_LOCK_ASSERT(*schp);
+	return (NULL);			/* always returns with locked sch */
+}
+
+/*
+ * This function is called when we get a RST for a
+ * non-existent connection, so that we can see if the
+ * connection is in the syn cache.  If it is, zap it.
+ */
+void
+syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th)
+{
+	struct syncache *sc;
+	struct syncache_head *sch;
+	char *s = NULL;
+
+	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
+	SCH_LOCK_ASSERT(sch);
+
+	/*
+	 * Any RST to our SYN|ACK must not carry ACK, SYN or FIN flags.
+	 * See RFC 793 page 65, section SEGMENT ARRIVES.
+	 */
+	if (th->th_flags & (TH_ACK|TH_SYN|TH_FIN)) {
+		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+			log(LOG_DEBUG, "%s; %s: Spurious RST with ACK, SYN or "
+			    "FIN flag set, segment ignored\n", s, __func__);
+		TCPSTAT_INC(tcps_badrst);
+		goto done;
+	}
+
+	/*
+	 * No corresponding connection was found in syncache.
+	 * If syncookies are enabled and possibly exclusively
+	 * used, or we are under memory pressure, a valid RST
+	 * may not find a syncache entry.  In that case we're
+	 * done and no SYN|ACK retransmissions will happen.
+	 * Otherwise the the RST was misdirected or spoofed.
+	 */
+	if (sc == NULL) {
+		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+			log(LOG_DEBUG, "%s; %s: Spurious RST without matching "
+			    "syncache entry (possibly syncookie only), "
+			    "segment ignored\n", s, __func__);
+		TCPSTAT_INC(tcps_badrst);
+		goto done;
+	}
+
+	/*
+	 * If the RST bit is set, check the sequence number to see
+	 * if this is a valid reset segment.
+	 * RFC 793 page 37:
+	 *   In all states except SYN-SENT, all reset (RST) segments
+	 *   are validated by checking their SEQ-fields.  A reset is
+	 *   valid if its sequence number is in the window.
+	 *
+	 *   The sequence number in the reset segment is normally an
+	 *   echo of our outgoing acknowlegement numbers, but some hosts
+	 *   send a reset with the sequence number at the rightmost edge
+	 *   of our receive window, and we have to handle this case.
+	 */
+	if (SEQ_GEQ(th->th_seq, sc->sc_irs) &&
+	    SEQ_LEQ(th->th_seq, sc->sc_irs + sc->sc_wnd)) {
+		syncache_drop(sc, sch);
+		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+			log(LOG_DEBUG, "%s; %s: Our SYN|ACK was rejected, "
+			    "connection attempt aborted by remote endpoint\n",
+			    s, __func__);
+		TCPSTAT_INC(tcps_sc_reset);
+	} else {
+		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+			log(LOG_DEBUG, "%s; %s: RST with invalid SEQ %u != "
+			    "IRS %u (+WND %u), segment ignored\n",
+			    s, __func__, th->th_seq, sc->sc_irs, sc->sc_wnd);
+		TCPSTAT_INC(tcps_badrst);
+	}
+
+done:
+	if (s != NULL)
+		free(s, M_TCPLOG);
+	SCH_UNLOCK(sch);
+}
+
+void
+syncache_badack(struct in_conninfo *inc)
+{
+	struct syncache *sc;
+	struct syncache_head *sch;
+
+	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
+	SCH_LOCK_ASSERT(sch);
+	if (sc != NULL) {
+		syncache_drop(sc, sch);
+		TCPSTAT_INC(tcps_sc_badack);
+	}
+	SCH_UNLOCK(sch);
+}
+
+void
+syncache_unreach(struct in_conninfo *inc, struct tcphdr *th)
+{
+	struct syncache *sc;
+	struct syncache_head *sch;
+
+	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
+	SCH_LOCK_ASSERT(sch);
+	if (sc == NULL)
+		goto done;
+
+	/* If the sequence number != sc_iss, then it's a bogus ICMP msg */
+	if (ntohl(th->th_seq) != sc->sc_iss)
+		goto done;
+
+	/*
+	 * If we've rertransmitted 3 times and this is our second error,
+	 * we remove the entry.  Otherwise, we allow it to continue on.
+	 * This prevents us from incorrectly nuking an entry during a
+	 * spurious network outage.
+	 *
+	 * See tcp_notify().
+	 */
+	if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxmits < 3 + 1) {
+		sc->sc_flags |= SCF_UNREACH;
+		goto done;
+	}
+	syncache_drop(sc, sch);
+	TCPSTAT_INC(tcps_sc_unreach);
+done:
+	SCH_UNLOCK(sch);
+}
+
+/*
+ * Build a new TCP socket structure from a syncache entry.
+ */
+static struct socket *
+syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
+{
+	struct inpcb *inp = NULL;
+	struct socket *so;
+	struct tcpcb *tp;
+	int error = 0;
+	char *s;
+
+	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+
+	/*
+	 * Ok, create the full blown connection, and set things up
+	 * as they would have been set up if we had created the
+	 * connection when the SYN arrived.  If we can't create
+	 * the connection, abort it.
+	 */
+	so = sonewconn(lso, SS_ISCONNECTED);
+	if (so == NULL) {
+		/*
+		 * Drop the connection; we will either send a RST or
+		 * have the peer retransmit its SYN again after its
+		 * RTO and try again.
+		 */
+		TCPSTAT_INC(tcps_listendrop);
+		if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
+			log(LOG_DEBUG, "%s; %s: Socket create failed "
+			    "due to limits or memory shortage\n",
+			    s, __func__);
+			free(s, M_TCPLOG);
+		}
+		goto abort2;
+	}
+#ifdef MAC
+	mac_socketpeer_set_from_mbuf(m, so);
+#endif
+
+	inp = sotoinpcb(so);
+	inp->inp_inc.inc_fibnum = so->so_fibnum;
+	INP_WLOCK(inp);
+
+	/* Insert new socket into PCB hash list. */
+	inp->inp_inc.inc_flags = sc->sc_inc.inc_flags;
+#ifdef INET6
+	if (sc->sc_inc.inc_flags & INC_ISIPV6) {
+		inp->in6p_laddr = sc->sc_inc.inc6_laddr;
+	} else {
+		inp->inp_vflag &= ~INP_IPV6;
+		inp->inp_vflag |= INP_IPV4;
+#endif
+		inp->inp_laddr = sc->sc_inc.inc_laddr;
+#ifdef INET6
+	}
+#endif
+	inp->inp_lport = sc->sc_inc.inc_lport;
+	if ((error = in_pcbinshash(inp)) != 0) {
+		/*
+		 * Undo the assignments above if we failed to
+		 * put the PCB on the hash lists.
+		 */
+#ifdef INET6
+		if (sc->sc_inc.inc_flags & INC_ISIPV6)
+			inp->in6p_laddr = in6addr_any;
+		else
+#endif
+			inp->inp_laddr.s_addr = INADDR_ANY;
+		inp->inp_lport = 0;
+		if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
+			log(LOG_DEBUG, "%s; %s: in_pcbinshash failed "
+			    "with error %i\n",
+			    s, __func__, error);
+			free(s, M_TCPLOG);
+		}
+		goto abort;
+	}
+#ifdef IPSEC
+	/* Copy old policy into new socket's. */
+	if (ipsec_copy_policy(sotoinpcb(lso)->inp_sp, inp->inp_sp))
+		printf("syncache_socket: could not copy policy\n");
+#endif
+#ifdef INET6
+	if (sc->sc_inc.inc_flags & INC_ISIPV6) {
+		struct inpcb *oinp = sotoinpcb(lso);
+		struct in6_addr laddr6;
+		struct sockaddr_in6 sin6;
+		/*
+		 * Inherit socket options from the listening socket.
+		 * Note that in6p_inputopts are not (and should not be)
+		 * copied, since it stores previously received options and is
+		 * used to detect if each new option is different than the
+		 * previous one and hence should be passed to a user.
+		 * If we copied in6p_inputopts, a user would not be able to
+		 * receive options just after calling the accept system call.
+		 */
+		inp->inp_flags |= oinp->inp_flags & INP_CONTROLOPTS;
+		if (oinp->in6p_outputopts)
+			inp->in6p_outputopts =
+			    ip6_copypktopts(oinp->in6p_outputopts, M_NOWAIT);
+
+		sin6.sin6_family = AF_INET6;
+		sin6.sin6_len = sizeof(sin6);
+		sin6.sin6_addr = sc->sc_inc.inc6_faddr;
+		sin6.sin6_port = sc->sc_inc.inc_fport;
+		sin6.sin6_flowinfo = sin6.sin6_scope_id = 0;
+		laddr6 = inp->in6p_laddr;
+		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
+			inp->in6p_laddr = sc->sc_inc.inc6_laddr;
+#ifndef __rtems__
+		if ((error = in6_pcbconnect(inp, (struct sockaddr *)&sin6,
+		    thread0.td_ucred)) != 0) {
+#else  /*  __rtems__ */
+		if ((error = in6_pcbconnect(inp, (struct sockaddr *)&sin6,
+		    rtems_bsd_thread0_ucred)) != 0) {
+#endif  /*  __rtems__ */
+			inp->in6p_laddr = laddr6;
+			if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
+				log(LOG_DEBUG, "%s; %s: in6_pcbconnect failed "
+				    "with error %i\n",
+				    s, __func__, error);
+				free(s, M_TCPLOG);
+			}
+			goto abort;
+		}
+		/* Override flowlabel from in6_pcbconnect. */
+		inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
+		inp->inp_flow |= sc->sc_flowlabel;
+	} else
+#endif
+	{
+		struct in_addr laddr;
+		struct sockaddr_in sin;
+
+		inp->inp_options = (m) ? ip_srcroute(m) : NULL;
+		
+		if (inp->inp_options == NULL) {
+			inp->inp_options = sc->sc_ipopts;
+			sc->sc_ipopts = NULL;
+		}
+
+		sin.sin_family = AF_INET;
+		sin.sin_len = sizeof(sin);
+		sin.sin_addr = sc->sc_inc.inc_faddr;
+		sin.sin_port = sc->sc_inc.inc_fport;
+		bzero((caddr_t)sin.sin_zero, sizeof(sin.sin_zero));
+		laddr = inp->inp_laddr;
+		if (inp->inp_laddr.s_addr == INADDR_ANY)
+			inp->inp_laddr = sc->sc_inc.inc_laddr;
+#ifndef __rtems__
+		if ((error = in_pcbconnect(inp, (struct sockaddr *)&sin,
+		    thread0.td_ucred)) != 0) {
+#else  /*  __rtems__ */
+		if ((error = in_pcbconnect(inp, (struct sockaddr *)&sin,
+		    rtems_bsd_thread0_ucred)) != 0) {
+#endif  /*  __rtems__ */
+
+			inp->inp_laddr = laddr;
+			if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
+				log(LOG_DEBUG, "%s; %s: in_pcbconnect failed "
+				    "with error %i\n",
+				    s, __func__, error);
+				free(s, M_TCPLOG);
+			}
+			goto abort;
+		}
+	}
+	tp = intotcpcb(inp);
+	tp->t_state = TCPS_SYN_RECEIVED;
+	tp->iss = sc->sc_iss;
+	tp->irs = sc->sc_irs;
+	tcp_rcvseqinit(tp);
+	tcp_sendseqinit(tp);
+	tp->snd_wl1 = sc->sc_irs;
+	tp->snd_max = tp->iss + 1;
+	tp->snd_nxt = tp->iss + 1;
+	tp->rcv_up = sc->sc_irs + 1;
+	tp->rcv_wnd = sc->sc_wnd;
+	tp->rcv_adv += tp->rcv_wnd;
+	tp->last_ack_sent = tp->rcv_nxt;
+
+	tp->t_flags = sototcpcb(lso)->t_flags & (TF_NOPUSH|TF_NODELAY);
+	if (sc->sc_flags & SCF_NOOPT)
+		tp->t_flags |= TF_NOOPT;
+	else {
+		if (sc->sc_flags & SCF_WINSCALE) {
+			tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE;
+			tp->snd_scale = sc->sc_requested_s_scale;
+			tp->request_r_scale = sc->sc_requested_r_scale;
+		}
+		if (sc->sc_flags & SCF_TIMESTAMP) {
+			tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP;
+			tp->ts_recent = sc->sc_tsreflect;
+			tp->ts_recent_age = ticks;
+			tp->ts_offset = sc->sc_tsoff;
+		}
+#ifdef TCP_SIGNATURE
+		if (sc->sc_flags & SCF_SIGNATURE)
+			tp->t_flags |= TF_SIGNATURE;
+#endif
+		if (sc->sc_flags & SCF_SACK)
+			tp->t_flags |= TF_SACK_PERMIT;
+	}
+
+	if (sc->sc_flags & SCF_ECN)
+		tp->t_flags |= TF_ECN_PERMIT;
+
+	/*
+	 * Set up MSS and get cached values from tcp_hostcache.
+	 * This might overwrite some of the defaults we just set.
+	 */
+	tcp_mss(tp, sc->sc_peer_mss);
+
+	/*
+	 * If the SYN,ACK was retransmitted, reset cwnd to 1 segment.
+	 * NB: sc_rxmits counts all SYN,ACK transmits, not just retransmits.
+	 */
+	if (sc->sc_rxmits > 1)
+		tp->snd_cwnd = tp->t_maxseg;
+	tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
+
+	INP_WUNLOCK(inp);
+
+	TCPSTAT_INC(tcps_accepts);
+	return (so);
+
+abort:
+	INP_WUNLOCK(inp);
+abort2:
+	if (so != NULL)
+		soabort(so);
+	return (NULL);
+}
+
+/*
+ * This function gets called when we receive an ACK for a
+ * socket in the LISTEN state.  We look up the connection
+ * in the syncache, and if its there, we pull it out of
+ * the cache and turn it into a full-blown connection in
+ * the SYN-RECEIVED state.
+ */
+int
+syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
+    struct socket **lsop, struct mbuf *m)
+{
+	struct syncache *sc;
+	struct syncache_head *sch;
+	struct syncache scs;
+	char *s;
+
+	/*
+	 * Global TCP locks are held because we manipulate the PCB lists
+	 * and create a new socket.
+	 */
+	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK,
+	    ("%s: can handle only ACK", __func__));
+
+	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
+	SCH_LOCK_ASSERT(sch);
+	if (sc == NULL) {
+		/*
+		 * There is no syncache entry, so see if this ACK is
+		 * a returning syncookie.  To do this, first:
+		 *  A. See if this socket has had a syncache entry dropped in
+		 *     the past.  We don't want to accept a bogus syncookie
+		 *     if we've never received a SYN.
+		 *  B. check that the syncookie is valid.  If it is, then
+		 *     cobble up a fake syncache entry, and return.
+		 */
+		if (!V_tcp_syncookies) {
+			SCH_UNLOCK(sch);
+			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+				log(LOG_DEBUG, "%s; %s: Spurious ACK, "
+				    "segment rejected (syncookies disabled)\n",
+				    s, __func__);
+			goto failed;
+		}
+		bzero(&scs, sizeof(scs));
+		sc = syncookie_lookup(inc, sch, &scs, to, th, *lsop);
+		SCH_UNLOCK(sch);
+		if (sc == NULL) {
+			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+				log(LOG_DEBUG, "%s; %s: Segment failed "
+				    "SYNCOOKIE authentication, segment rejected "
+				    "(probably spoofed)\n", s, __func__);
+			goto failed;
+		}
+	} else {
+		/* Pull out the entry to unlock the bucket row. */
+		TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
+		sch->sch_length--;
+		V_tcp_syncache.cache_count--;
+		SCH_UNLOCK(sch);
+	}
+
+	/*
+	 * Segment validation:
+	 * ACK must match our initial sequence number + 1 (the SYN|ACK).
+	 */
+	if (th->th_ack != sc->sc_iss + 1 && !TOEPCB_ISSET(sc)) {
+		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+			log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment "
+			    "rejected\n", s, __func__, th->th_ack, sc->sc_iss);
+		goto failed;
+	}
+
+	/*
+	 * The SEQ must fall in the window starting at the received
+	 * initial receive sequence number + 1 (the SYN).
+	 */
+	if ((SEQ_LEQ(th->th_seq, sc->sc_irs) ||
+	    SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) &&
+	    !TOEPCB_ISSET(sc)) {
+		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+			log(LOG_DEBUG, "%s; %s: SEQ %u != IRS+1 %u, segment "
+			    "rejected\n", s, __func__, th->th_seq, sc->sc_irs);
+		goto failed;
+	}
+
+	if (!(sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS)) {
+		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+			log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
+			    "segment rejected\n", s, __func__);
+		goto failed;
+	}
+	/*
+	 * If timestamps were negotiated the reflected timestamp
+	 * must be equal to what we actually sent in the SYN|ACK.
+	 */
+	if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts &&
+	    !TOEPCB_ISSET(sc)) {
+		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+			log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, "
+			    "segment rejected\n",
+			    s, __func__, to->to_tsecr, sc->sc_ts);
+		goto failed;
+	}
+
+	*lsop = syncache_socket(sc, *lsop, m);
+
+	if (*lsop == NULL)
+		TCPSTAT_INC(tcps_sc_aborted);
+	else
+		TCPSTAT_INC(tcps_sc_completed);
+
+/* how do we find the inp for the new socket? */
+	if (sc != &scs)
+		syncache_free(sc);
+	return (1);
+failed:
+	if (sc != NULL && sc != &scs)
+		syncache_free(sc);
+	if (s != NULL)
+		free(s, M_TCPLOG);
+	*lsop = NULL;
+	return (0);
+}
+
+int
+tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo,
+    struct tcphdr *th, struct socket **lsop, struct mbuf *m)
+{
+	struct tcpopt to;
+	int rc;
+
+	bzero(&to, sizeof(struct tcpopt));
+	to.to_mss = toeo->to_mss;
+	to.to_wscale = toeo->to_wscale;
+	to.to_flags = toeo->to_flags;
+	
+	INP_INFO_WLOCK(&V_tcbinfo);
+	rc = syncache_expand(inc, &to, th, lsop, m);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+
+	return (rc);
+}
+
+/*
+ * Given a LISTEN socket and an inbound SYN request, add
+ * this to the syn cache, and send back a segment:
+ *	<SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
+ * to the source.
+ *
+ * IMPORTANT NOTE: We do _NOT_ ACK data that might accompany the SYN.
+ * Doing so would require that we hold onto the data and deliver it
+ * to the application.  However, if we are the target of a SYN-flood
+ * DoS attack, an attacker could send data which would eventually
+ * consume all available buffer space if it were ACKed.  By not ACKing
+ * the data, we avoid this DoS scenario.
+ */
+static void
+_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
+    struct inpcb *inp, struct socket **lsop, struct mbuf *m,
+    struct toe_usrreqs *tu, void *toepcb)
+{
+	struct tcpcb *tp;
+	struct socket *so;
+	struct syncache *sc = NULL;
+	struct syncache_head *sch;
+	struct mbuf *ipopts = NULL;
+	u_int32_t flowtmp;
+	int win, sb_hiwat, ip_ttl, ip_tos, noopt;
+	char *s;
+#ifdef INET6
+	int autoflowlabel = 0;
+#endif
+#ifdef MAC
+	struct label *maclabel;
+#endif
+	struct syncache scs;
+	struct ucred *cred;
+
+	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_WLOCK_ASSERT(inp);			/* listen socket */
+	KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
+	    ("%s: unexpected tcp flags", __func__));
+
+	/*
+	 * Combine all so/tp operations very early to drop the INP lock as
+	 * soon as possible.
+	 */
+	so = *lsop;
+	tp = sototcpcb(so);
+	cred = crhold(so->so_cred);
+
+#ifdef INET6
+	if ((inc->inc_flags & INC_ISIPV6) &&
+	    (inp->inp_flags & IN6P_AUTOFLOWLABEL))
+		autoflowlabel = 1;
+#endif
+	ip_ttl = inp->inp_ip_ttl;
+	ip_tos = inp->inp_ip_tos;
+	win = sbspace(&so->so_rcv);
+	sb_hiwat = so->so_rcv.sb_hiwat;
+	noopt = (tp->t_flags & TF_NOOPT);
+
+	/* By the time we drop the lock these should no longer be used. */
+	so = NULL;
+	tp = NULL;
+
+#ifdef MAC
+	if (mac_syncache_init(&maclabel) != 0) {
+		INP_WUNLOCK(inp);
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		goto done;
+	} else
+		mac_syncache_create(maclabel, inp);
+#endif
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+
+	/*
+	 * Remember the IP options, if any.
+	 */
+#ifdef INET6
+	if (!(inc->inc_flags & INC_ISIPV6))
+#endif
+		ipopts = (m) ? ip_srcroute(m) : NULL;
+
+	/*
+	 * See if we already have an entry for this connection.
+	 * If we do, resend the SYN,ACK, and reset the retransmit timer.
+	 *
+	 * XXX: should the syncache be re-initialized with the contents
+	 * of the new SYN here (which may have different options?)
+	 *
+	 * XXX: We do not check the sequence number to see if this is a
+	 * real retransmit or a new connection attempt.  The question is
+	 * how to handle such a case; either ignore it as spoofed, or
+	 * drop the current entry and create a new one?
+	 */
+	sc = syncache_lookup(inc, &sch);	/* returns locked entry */
+	SCH_LOCK_ASSERT(sch);
+	if (sc != NULL) {
+#ifndef TCP_OFFLOAD_DISABLE
+		if (sc->sc_tu)
+			sc->sc_tu->tu_syncache_event(TOE_SC_ENTRY_PRESENT,
+			    sc->sc_toepcb);
+#endif		    
+		TCPSTAT_INC(tcps_sc_dupsyn);
+		if (ipopts) {
+			/*
+			 * If we were remembering a previous source route,
+			 * forget it and use the new one we've been given.
+			 */
+			if (sc->sc_ipopts)
+				(void) m_free(sc->sc_ipopts);
+			sc->sc_ipopts = ipopts;
+		}
+		/*
+		 * Update timestamp if present.
+		 */
+		if ((sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS))
+			sc->sc_tsreflect = to->to_tsval;
+		else
+			sc->sc_flags &= ~SCF_TIMESTAMP;
+#ifdef MAC
+		/*
+		 * Since we have already unconditionally allocated label
+		 * storage, free it up.  The syncache entry will already
+		 * have an initialized label we can use.
+		 */
+		mac_syncache_destroy(&maclabel);
+#endif
+		/* Retransmit SYN|ACK and reset retransmit count. */
+		if ((s = tcp_log_addrs(&sc->sc_inc, th, NULL, NULL))) {
+			log(LOG_DEBUG, "%s; %s: Received duplicate SYN, "
+			    "resetting timer and retransmitting SYN|ACK\n",
+			    s, __func__);
+			free(s, M_TCPLOG);
+		}
+		if (!TOEPCB_ISSET(sc) && syncache_respond(sc) == 0) {
+			sc->sc_rxmits = 0;
+			syncache_timeout(sc, sch, 1);
+			TCPSTAT_INC(tcps_sndacks);
+			TCPSTAT_INC(tcps_sndtotal);
+		}
+		SCH_UNLOCK(sch);
+		goto done;
+	}
+
+	sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
+	if (sc == NULL) {
+		/*
+		 * The zone allocator couldn't provide more entries.
+		 * Treat this as if the cache was full; drop the oldest
+		 * entry and insert the new one.
+		 */
+		TCPSTAT_INC(tcps_sc_zonefail);
+		if ((sc = TAILQ_LAST(&sch->sch_bucket, sch_head)) != NULL)
+			syncache_drop(sc, sch);
+		sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
+		if (sc == NULL) {
+			if (V_tcp_syncookies) {
+				bzero(&scs, sizeof(scs));
+				sc = &scs;
+			} else {
+				SCH_UNLOCK(sch);
+				if (ipopts)
+					(void) m_free(ipopts);
+				goto done;
+			}
+		}
+	}
+	
+	/*
+	 * Fill in the syncache values.
+	 */
+#ifdef MAC
+	sc->sc_label = maclabel;
+#endif
+	sc->sc_cred = cred;
+	cred = NULL;
+	sc->sc_ipopts = ipopts;
+	bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
+#ifdef INET6
+	if (!(inc->inc_flags & INC_ISIPV6))
+#endif
+	{
+		sc->sc_ip_tos = ip_tos;
+		sc->sc_ip_ttl = ip_ttl;
+	}
+#ifndef TCP_OFFLOAD_DISABLE	
+	sc->sc_tu = tu;
+	sc->sc_toepcb = toepcb;
+#endif
+	sc->sc_irs = th->th_seq;
+	sc->sc_iss = arc4random();
+	sc->sc_flags = 0;
+	sc->sc_flowlabel = 0;
+
+	/*
+	 * Initial receive window: clip sbspace to [0 .. TCP_MAXWIN].
+	 * win was derived from socket earlier in the function.
+	 */
+	win = imax(win, 0);
+	win = imin(win, TCP_MAXWIN);
+	sc->sc_wnd = win;
+
+	if (V_tcp_do_rfc1323) {
+		/*
+		 * A timestamp received in a SYN makes
+		 * it ok to send timestamp requests and replies.
+		 */
+		if (to->to_flags & TOF_TS) {
+			sc->sc_tsreflect = to->to_tsval;
+			sc->sc_ts = ticks;
+			sc->sc_flags |= SCF_TIMESTAMP;
+		}
+		if (to->to_flags & TOF_SCALE) {
+			int wscale = 0;
+
+			/*
+			 * Pick the smallest possible scaling factor that
+			 * will still allow us to scale up to sb_max, aka
+			 * kern.ipc.maxsockbuf.
+			 *
+			 * We do this because there are broken firewalls that
+			 * will corrupt the window scale option, leading to
+			 * the other endpoint believing that our advertised
+			 * window is unscaled.  At scale factors larger than
+			 * 5 the unscaled window will drop below 1500 bytes,
+			 * leading to serious problems when traversing these
+			 * broken firewalls.
+			 *
+			 * With the default maxsockbuf of 256K, a scale factor
+			 * of 3 will be chosen by this algorithm.  Those who
+			 * choose a larger maxsockbuf should watch out
+			 * for the compatiblity problems mentioned above.
+			 *
+			 * RFC1323: The Window field in a SYN (i.e., a <SYN>
+			 * or <SYN,ACK>) segment itself is never scaled.
+			 */
+			while (wscale < TCP_MAX_WINSHIFT &&
+			    (TCP_MAXWIN << wscale) < sb_max)
+				wscale++;
+			sc->sc_requested_r_scale = wscale;
+			sc->sc_requested_s_scale = to->to_wscale;
+			sc->sc_flags |= SCF_WINSCALE;
+		}
+	}
+#ifdef TCP_SIGNATURE
+	/*
+	 * If listening socket requested TCP digests, and received SYN
+	 * contains the option, flag this in the syncache so that
+	 * syncache_respond() will do the right thing with the SYN+ACK.
+	 * XXX: Currently we always record the option by default and will
+	 * attempt to use it in syncache_respond().
+	 */
+	if (to->to_flags & TOF_SIGNATURE)
+		sc->sc_flags |= SCF_SIGNATURE;
+#endif
+	if (to->to_flags & TOF_SACKPERM)
+		sc->sc_flags |= SCF_SACK;
+	if (to->to_flags & TOF_MSS)
+		sc->sc_peer_mss = to->to_mss;	/* peer mss may be zero */
+	if (noopt)
+		sc->sc_flags |= SCF_NOOPT;
+	if ((th->th_flags & (TH_ECE|TH_CWR)) && V_tcp_do_ecn)
+		sc->sc_flags |= SCF_ECN;
+
+	if (V_tcp_syncookies) {
+		syncookie_generate(sch, sc, &flowtmp);
+#ifdef INET6
+		if (autoflowlabel)
+			sc->sc_flowlabel = flowtmp;
+#endif
+	} else {
+#ifdef INET6
+		if (autoflowlabel)
+			sc->sc_flowlabel =
+			    (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
+#endif
+	}
+	SCH_UNLOCK(sch);
+
+	/*
+	 * Do a standard 3-way handshake.
+	 */
+	if (TOEPCB_ISSET(sc) || syncache_respond(sc) == 0) {
+		if (V_tcp_syncookies && V_tcp_syncookiesonly && sc != &scs)
+			syncache_free(sc);
+		else if (sc != &scs)
+			syncache_insert(sc, sch);   /* locks and unlocks sch */
+		TCPSTAT_INC(tcps_sndacks);
+		TCPSTAT_INC(tcps_sndtotal);
+	} else {
+		if (sc != &scs)
+			syncache_free(sc);
+		TCPSTAT_INC(tcps_sc_dropped);
+	}
+
+done:
+	if (cred != NULL)
+		crfree(cred);
+#ifdef MAC
+	if (sc == &scs)
+		mac_syncache_destroy(&maclabel);
+#endif
+	if (m) {
+		
+		*lsop = NULL;
+		m_freem(m);
+	}
+}
+
+static int
+syncache_respond(struct syncache *sc)
+{
+	struct ip *ip = NULL;
+	struct mbuf *m;
+	struct tcphdr *th;
+	int optlen, error;
+	u_int16_t hlen, tlen, mssopt;
+	struct tcpopt to;
+#ifdef INET6
+	struct ip6_hdr *ip6 = NULL;
+#endif
+
+	hlen =
+#ifdef INET6
+	       (sc->sc_inc.inc_flags & INC_ISIPV6) ? sizeof(struct ip6_hdr) :
+#endif
+		sizeof(struct ip);
+	tlen = hlen + sizeof(struct tcphdr);
+
+	/* Determine MSS we advertize to other end of connection. */
+	mssopt = tcp_mssopt(&sc->sc_inc);
+	if (sc->sc_peer_mss)
+		mssopt = max( min(sc->sc_peer_mss, mssopt), V_tcp_minmss);
+
+	/* XXX: Assume that the entire packet will fit in a header mbuf. */
+	KASSERT(max_linkhdr + tlen + TCP_MAXOLEN <= MHLEN,
+	    ("syncache: mbuf too small"));
+
+	/* Create the IP+TCP header from scratch. */
+	m = m_gethdr(M_DONTWAIT, MT_DATA);
+	if (m == NULL)
+		return (ENOBUFS);
+#ifdef MAC
+	mac_syncache_create_mbuf(sc->sc_label, m);
+#endif
+	m->m_data += max_linkhdr;
+	m->m_len = tlen;
+	m->m_pkthdr.len = tlen;
+	m->m_pkthdr.rcvif = NULL;
+
+#ifdef INET6
+	if (sc->sc_inc.inc_flags & INC_ISIPV6) {
+		ip6 = mtod(m, struct ip6_hdr *);
+		ip6->ip6_vfc = IPV6_VERSION;
+		ip6->ip6_nxt = IPPROTO_TCP;
+		ip6->ip6_src = sc->sc_inc.inc6_laddr;
+		ip6->ip6_dst = sc->sc_inc.inc6_faddr;
+		ip6->ip6_plen = htons(tlen - hlen);
+		/* ip6_hlim is set after checksum */
+		ip6->ip6_flow &= ~IPV6_FLOWLABEL_MASK;
+		ip6->ip6_flow |= sc->sc_flowlabel;
+
+		th = (struct tcphdr *)(ip6 + 1);
+	} else
+#endif
+	{
+		ip = mtod(m, struct ip *);
+		ip->ip_v = IPVERSION;
+		ip->ip_hl = sizeof(struct ip) >> 2;
+		ip->ip_len = tlen;
+		ip->ip_id = 0;
+		ip->ip_off = 0;
+		ip->ip_sum = 0;
+		ip->ip_p = IPPROTO_TCP;
+		ip->ip_src = sc->sc_inc.inc_laddr;
+		ip->ip_dst = sc->sc_inc.inc_faddr;
+		ip->ip_ttl = sc->sc_ip_ttl;
+		ip->ip_tos = sc->sc_ip_tos;
+
+		/*
+		 * See if we should do MTU discovery.  Route lookups are
+		 * expensive, so we will only unset the DF bit if:
+		 *
+		 *	1) path_mtu_discovery is disabled
+		 *	2) the SCF_UNREACH flag has been set
+		 */
+		if (V_path_mtu_discovery && ((sc->sc_flags & SCF_UNREACH) == 0))
+		       ip->ip_off |= IP_DF;
+
+		th = (struct tcphdr *)(ip + 1);
+	}
+	th->th_sport = sc->sc_inc.inc_lport;
+	th->th_dport = sc->sc_inc.inc_fport;
+
+	th->th_seq = htonl(sc->sc_iss);
+	th->th_ack = htonl(sc->sc_irs + 1);
+	th->th_off = sizeof(struct tcphdr) >> 2;
+	th->th_x2 = 0;
+	th->th_flags = TH_SYN|TH_ACK;
+	th->th_win = htons(sc->sc_wnd);
+	th->th_urp = 0;
+
+	if (sc->sc_flags & SCF_ECN) {
+		th->th_flags |= TH_ECE;
+		TCPSTAT_INC(tcps_ecn_shs);
+	}
+
+	/* Tack on the TCP options. */
+	if ((sc->sc_flags & SCF_NOOPT) == 0) {
+		to.to_flags = 0;
+
+		to.to_mss = mssopt;
+		to.to_flags = TOF_MSS;
+		if (sc->sc_flags & SCF_WINSCALE) {
+			to.to_wscale = sc->sc_requested_r_scale;
+			to.to_flags |= TOF_SCALE;
+		}
+		if (sc->sc_flags & SCF_TIMESTAMP) {
+			/* Virgin timestamp or TCP cookie enhanced one. */
+			to.to_tsval = sc->sc_ts;
+			to.to_tsecr = sc->sc_tsreflect;
+			to.to_flags |= TOF_TS;
+		}
+		if (sc->sc_flags & SCF_SACK)
+			to.to_flags |= TOF_SACKPERM;
+#ifdef TCP_SIGNATURE
+		if (sc->sc_flags & SCF_SIGNATURE)
+			to.to_flags |= TOF_SIGNATURE;
+#endif
+		optlen = tcp_addoptions(&to, (u_char *)(th + 1));
+
+		/* Adjust headers by option size. */
+		th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
+		m->m_len += optlen;
+		m->m_pkthdr.len += optlen;
+
+#ifdef TCP_SIGNATURE
+		if (sc->sc_flags & SCF_SIGNATURE)
+			tcp_signature_compute(m, 0, 0, optlen,
+			    to.to_signature, IPSEC_DIR_OUTBOUND);
+#endif
+#ifdef INET6
+		if (sc->sc_inc.inc_flags & INC_ISIPV6)
+			ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) + optlen);
+		else
+#endif
+			ip->ip_len += optlen;
+	} else
+		optlen = 0;
+
+	M_SETFIB(m, sc->sc_inc.inc_fibnum);
+#ifdef INET6
+	if (sc->sc_inc.inc_flags & INC_ISIPV6) {
+		th->th_sum = 0;
+		th->th_sum = in6_cksum(m, IPPROTO_TCP, hlen,
+				       tlen + optlen - hlen);
+		ip6->ip6_hlim = in6_selecthlim(NULL, NULL);
+		error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
+	} else
+#endif
+	{
+		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+		    htons(tlen + optlen - hlen + IPPROTO_TCP));
+		m->m_pkthdr.csum_flags = CSUM_TCP;
+		m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+		error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL);
+	}
+	return (error);
+}
+
+void
+syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
+    struct inpcb *inp, struct socket **lsop, struct mbuf *m)
+{
+	_syncache_add(inc, to, th, inp, lsop, m, NULL, NULL);
+}
+
+void
+tcp_offload_syncache_add(struct in_conninfo *inc, struct toeopt *toeo,
+    struct tcphdr *th, struct inpcb *inp, struct socket **lsop,
+    struct toe_usrreqs *tu, void *toepcb)
+{
+	struct tcpopt to;
+
+	bzero(&to, sizeof(struct tcpopt));
+	to.to_mss = toeo->to_mss;
+	to.to_wscale = toeo->to_wscale;
+	to.to_flags = toeo->to_flags;
+
+	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_WLOCK(inp);
+
+	_syncache_add(inc, &to, th, inp, lsop, NULL, tu, toepcb);
+}
+
+/*
+ * The purpose of SYN cookies is to avoid keeping track of all SYN's we
+ * receive and to be able to handle SYN floods from bogus source addresses
+ * (where we will never receive any reply).  SYN floods try to exhaust all
+ * our memory and available slots in the SYN cache table to cause a denial
+ * of service to legitimate users of the local host.
+ *
+ * The idea of SYN cookies is to encode and include all necessary information
+ * about the connection setup state within the SYN-ACK we send back and thus
+ * to get along without keeping any local state until the ACK to the SYN-ACK
+ * arrives (if ever).  Everything we need to know should be available from
+ * the information we encoded in the SYN-ACK.
+ *
+ * More information about the theory behind SYN cookies and its first
+ * discussion and specification can be found at:
+ *  http://cr.yp.to/syncookies.html    (overview)
+ *  http://cr.yp.to/syncookies/archive (gory details)
+ *
+ * This implementation extends the orginal idea and first implementation
+ * of FreeBSD by using not only the initial sequence number field to store
+ * information but also the timestamp field if present.  This way we can
+ * keep track of the entire state we need to know to recreate the session in
+ * its original form.  Almost all TCP speakers implement RFC1323 timestamps
+ * these days.  For those that do not we still have to live with the known
+ * shortcomings of the ISN only SYN cookies.
+ *
+ * Cookie layers:
+ *
+ * Initial sequence number we send:
+ * 31|................................|0
+ *    DDDDDDDDDDDDDDDDDDDDDDDDDMMMRRRP
+ *    D = MD5 Digest (first dword)
+ *    M = MSS index
+ *    R = Rotation of secret
+ *    P = Odd or Even secret
+ *
+ * The MD5 Digest is computed with over following parameters:
+ *  a) randomly rotated secret
+ *  b) struct in_conninfo containing the remote/local ip/port (IPv4&IPv6)
+ *  c) the received initial sequence number from remote host
+ *  d) the rotation offset and odd/even bit
+ *
+ * Timestamp we send:
+ * 31|................................|0
+ *    DDDDDDDDDDDDDDDDDDDDDDSSSSRRRRA5
+ *    D = MD5 Digest (third dword) (only as filler)
+ *    S = Requested send window scale
+ *    R = Requested receive window scale
+ *    A = SACK allowed
+ *    5 = TCP-MD5 enabled (not implemented yet)
+ *    XORed with MD5 Digest (forth dword)
+ *
+ * The timestamp isn't cryptographically secure and doesn't need to be.
+ * The double use of the MD5 digest dwords ties it to a specific remote/
+ * local host/port, remote initial sequence number and our local time
+ * limited secret.  A received timestamp is reverted (XORed) and then
+ * the contained MD5 dword is compared to the computed one to ensure the
+ * timestamp belongs to the SYN-ACK we sent.  The other parameters may
+ * have been tampered with but this isn't different from supplying bogus
+ * values in the SYN in the first place.
+ *
+ * Some problems with SYN cookies remain however:
+ * Consider the problem of a recreated (and retransmitted) cookie.  If the
+ * original SYN was accepted, the connection is established.  The second
+ * SYN is inflight, and if it arrives with an ISN that falls within the
+ * receive window, the connection is killed.
+ *
+ * Notes:
+ * A heuristic to determine when to accept syn cookies is not necessary.
+ * An ACK flood would cause the syncookie verification to be attempted,
+ * but a SYN flood causes syncookies to be generated.  Both are of equal
+ * cost, so there's no point in trying to optimize the ACK flood case.
+ * Also, if you don't process certain ACKs for some reason, then all someone
+ * would have to do is launch a SYN and ACK flood at the same time, which
+ * would stop cookie verification and defeat the entire purpose of syncookies.
+ */
+static int tcp_sc_msstab[] = { 0, 256, 468, 536, 996, 1452, 1460, 8960 };
+
+static void
+syncookie_generate(struct syncache_head *sch, struct syncache *sc,
+    u_int32_t *flowlabel)
+{
+	MD5_CTX ctx;
+	u_int32_t md5_buffer[MD5_DIGEST_LENGTH / sizeof(u_int32_t)];
+	u_int32_t data;
+	u_int32_t *secbits;
+	u_int off, pmss, mss;
+	int i;
+
+	SCH_LOCK_ASSERT(sch);
+
+	/* Which of the two secrets to use. */
+	secbits = sch->sch_oddeven ?
+			sch->sch_secbits_odd : sch->sch_secbits_even;
+
+	/* Reseed secret if too old. */
+	if (sch->sch_reseed < time_uptime) {
+		sch->sch_oddeven = sch->sch_oddeven ? 0 : 1;	/* toggle */
+		secbits = sch->sch_oddeven ?
+				sch->sch_secbits_odd : sch->sch_secbits_even;
+		for (i = 0; i < SYNCOOKIE_SECRET_SIZE; i++)
+			secbits[i] = arc4random();
+		sch->sch_reseed = time_uptime + SYNCOOKIE_LIFETIME;
+	}
+
+	/* Secret rotation offset. */
+	off = sc->sc_iss & 0x7;			/* iss was randomized before */
+
+	/* Maximum segment size calculation. */
+	pmss =
+	    max( min(sc->sc_peer_mss, tcp_mssopt(&sc->sc_inc)),	V_tcp_minmss);
+	for (mss = sizeof(tcp_sc_msstab) / sizeof(int) - 1; mss > 0; mss--)
+		if (tcp_sc_msstab[mss] <= pmss)
+			break;
+
+	/* Fold parameters and MD5 digest into the ISN we will send. */
+	data = sch->sch_oddeven;/* odd or even secret, 1 bit */
+	data |= off << 1;	/* secret offset, derived from iss, 3 bits */
+	data |= mss << 4;	/* mss, 3 bits */
+
+	MD5Init(&ctx);
+	MD5Update(&ctx, ((u_int8_t *)secbits) + off,
+	    SYNCOOKIE_SECRET_SIZE * sizeof(*secbits) - off);
+	MD5Update(&ctx, secbits, off);
+	MD5Update(&ctx, &sc->sc_inc, sizeof(sc->sc_inc));
+	MD5Update(&ctx, &sc->sc_irs, sizeof(sc->sc_irs));
+	MD5Update(&ctx, &data, sizeof(data));
+	MD5Final((u_int8_t *)&md5_buffer, &ctx);
+
+	data |= (md5_buffer[0] << 7);
+	sc->sc_iss = data;
+
+#ifdef INET6
+	*flowlabel = md5_buffer[1] & IPV6_FLOWLABEL_MASK;
+#endif
+
+	/* Additional parameters are stored in the timestamp if present. */
+	if (sc->sc_flags & SCF_TIMESTAMP) {
+		data =  ((sc->sc_flags & SCF_SIGNATURE) ? 1 : 0); /* TCP-MD5, 1 bit */
+		data |= ((sc->sc_flags & SCF_SACK) ? 1 : 0) << 1; /* SACK, 1 bit */
+		data |= sc->sc_requested_s_scale << 2;  /* SWIN scale, 4 bits */
+		data |= sc->sc_requested_r_scale << 6;  /* RWIN scale, 4 bits */
+		data |= md5_buffer[2] << 10;		/* more digest bits */
+		data ^= md5_buffer[3];
+		sc->sc_ts = data;
+		sc->sc_tsoff = data - ticks;		/* after XOR */
+	}
+
+	TCPSTAT_INC(tcps_sc_sendcookie);
+}
+
+static struct syncache *
+syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch, 
+    struct syncache *sc, struct tcpopt *to, struct tcphdr *th,
+    struct socket *so)
+{
+	MD5_CTX ctx;
+	u_int32_t md5_buffer[MD5_DIGEST_LENGTH / sizeof(u_int32_t)];
+	u_int32_t data = 0;
+	u_int32_t *secbits;
+	tcp_seq ack, seq;
+	int off, mss, wnd, flags;
+
+	SCH_LOCK_ASSERT(sch);
+
+	/*
+	 * Pull information out of SYN-ACK/ACK and
+	 * revert sequence number advances.
+	 */
+	ack = th->th_ack - 1;
+	seq = th->th_seq - 1;
+	off = (ack >> 1) & 0x7;
+	mss = (ack >> 4) & 0x7;
+	flags = ack & 0x7f;
+
+	/* Which of the two secrets to use. */
+	secbits = (flags & 0x1) ? sch->sch_secbits_odd : sch->sch_secbits_even;
+
+	/*
+	 * The secret wasn't updated for the lifetime of a syncookie,
+	 * so this SYN-ACK/ACK is either too old (replay) or totally bogus.
+	 */
+	if (sch->sch_reseed + SYNCOOKIE_LIFETIME < time_uptime) {
+		return (NULL);
+	}
+
+	/* Recompute the digest so we can compare it. */
+	MD5Init(&ctx);
+	MD5Update(&ctx, ((u_int8_t *)secbits) + off,
+	    SYNCOOKIE_SECRET_SIZE * sizeof(*secbits) - off);
+	MD5Update(&ctx, secbits, off);
+	MD5Update(&ctx, inc, sizeof(*inc));
+	MD5Update(&ctx, &seq, sizeof(seq));
+	MD5Update(&ctx, &flags, sizeof(flags));
+	MD5Final((u_int8_t *)&md5_buffer, &ctx);
+
+	/* Does the digest part of or ACK'ed ISS match? */
+	if ((ack & (~0x7f)) != (md5_buffer[0] << 7))
+		return (NULL);
+
+	/* Does the digest part of our reflected timestamp match? */
+	if (to->to_flags & TOF_TS) {
+		data = md5_buffer[3] ^ to->to_tsecr;
+		if ((data & (~0x3ff)) != (md5_buffer[2] << 10))
+			return (NULL);
+	}
+
+	/* Fill in the syncache values. */
+	bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
+	sc->sc_ipopts = NULL;
+	
+	sc->sc_irs = seq;
+	sc->sc_iss = ack;
+
+#ifdef INET6
+	if (inc->inc_flags & INC_ISIPV6) {
+		if (sotoinpcb(so)->inp_flags & IN6P_AUTOFLOWLABEL)
+			sc->sc_flowlabel = md5_buffer[1] & IPV6_FLOWLABEL_MASK;
+	} else
+#endif
+	{
+		sc->sc_ip_ttl = sotoinpcb(so)->inp_ip_ttl;
+		sc->sc_ip_tos = sotoinpcb(so)->inp_ip_tos;
+	}
+
+	/* Additional parameters that were encoded in the timestamp. */
+	if (data) {
+		sc->sc_flags |= SCF_TIMESTAMP;
+		sc->sc_tsreflect = to->to_tsval;
+		sc->sc_ts = to->to_tsecr;
+		sc->sc_tsoff = to->to_tsecr - ticks;
+		sc->sc_flags |= (data & 0x1) ? SCF_SIGNATURE : 0;
+		sc->sc_flags |= ((data >> 1) & 0x1) ? SCF_SACK : 0;
+		sc->sc_requested_s_scale = min((data >> 2) & 0xf,
+		    TCP_MAX_WINSHIFT);
+		sc->sc_requested_r_scale = min((data >> 6) & 0xf,
+		    TCP_MAX_WINSHIFT);
+		if (sc->sc_requested_s_scale || sc->sc_requested_r_scale)
+			sc->sc_flags |= SCF_WINSCALE;
+	} else
+		sc->sc_flags |= SCF_NOOPT;
+
+	wnd = sbspace(&so->so_rcv);
+	wnd = imax(wnd, 0);
+	wnd = imin(wnd, TCP_MAXWIN);
+	sc->sc_wnd = wnd;
+
+	sc->sc_rxmits = 0;
+	sc->sc_peer_mss = tcp_sc_msstab[mss];
+
+	TCPSTAT_INC(tcps_sc_recvcookie);
+	return (sc);
+}
+
+/*
+ * Returns the current number of syncache entries.  This number
+ * will probably change before you get around to calling 
+ * syncache_pcblist.
+ */
+
+int
+syncache_pcbcount(void)
+{
+	struct syncache_head *sch;
+	int count, i;
+
+	for (count = 0, i = 0; i < V_tcp_syncache.hashsize; i++) {
+		/* No need to lock for a read. */
+		sch = &V_tcp_syncache.hashbase[i];
+		count += sch->sch_length;
+	}
+	return count;
+}
+
+/*
+ * Exports the syncache entries to userland so that netstat can display
+ * them alongside the other sockets.  This function is intended to be
+ * called only from tcp_pcblist.
+ *
+ * Due to concurrency on an active system, the number of pcbs exported
+ * may have no relation to max_pcbs.  max_pcbs merely indicates the
+ * amount of space the caller allocated for this function to use.
+ */
+int
+syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported)
+{
+	struct xtcpcb xt;
+	struct syncache *sc;
+	struct syncache_head *sch;
+	int count, error, i;
+
+	for (count = 0, error = 0, i = 0; i < V_tcp_syncache.hashsize; i++) {
+		sch = &V_tcp_syncache.hashbase[i];
+		SCH_LOCK(sch);
+		TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
+			if (count >= max_pcbs) {
+				SCH_UNLOCK(sch);
+				goto exit;
+			}
+			if (cr_cansee(req->td->td_ucred, sc->sc_cred) != 0)
+				continue;
+			bzero(&xt, sizeof(xt));
+			xt.xt_len = sizeof(xt);
+			if (sc->sc_inc.inc_flags & INC_ISIPV6)
+				xt.xt_inp.inp_vflag = INP_IPV6;
+			else
+				xt.xt_inp.inp_vflag = INP_IPV4;
+			bcopy(&sc->sc_inc, &xt.xt_inp.inp_inc, sizeof (struct in_conninfo));
+			xt.xt_tp.t_inpcb = &xt.xt_inp;
+			xt.xt_tp.t_state = TCPS_SYN_RECEIVED;
+			xt.xt_socket.xso_protocol = IPPROTO_TCP;
+			xt.xt_socket.xso_len = sizeof (struct xsocket);
+			xt.xt_socket.so_type = SOCK_STREAM;
+			xt.xt_socket.so_state = SS_ISCONNECTING;
+			error = SYSCTL_OUT(req, &xt, sizeof xt);
+			if (error) {
+				SCH_UNLOCK(sch);
+				goto exit;
+			}
+			count++;
+		}
+		SCH_UNLOCK(sch);
+	}
+exit:
+	*pcbs_exported = count;
+	return error;
+}
diff --git a/freebsd/sys/netinet/tcp_syncache.h b/freebsd/sys/netinet/tcp_syncache.h
new file mode 100644
index 00000000..96ba1535
--- /dev/null
+++ b/freebsd/sys/netinet/tcp_syncache.h
@@ -0,0 +1,127 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993, 1994, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_var.h	8.4 (Berkeley) 5/24/95
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_TCP_SYNCACHE_HH_
+#define _NETINET_TCP_SYNCACHE_HH_
+#ifdef _KERNEL
+
+struct toeopt;
+
+void	 syncache_init(void);
+#ifdef VIMAGE
+void	syncache_destroy(void);
+#endif
+void	 syncache_unreach(struct in_conninfo *, struct tcphdr *);
+int	 syncache_expand(struct in_conninfo *, struct tcpopt *,
+	     struct tcphdr *, struct socket **, struct mbuf *);
+int	 tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo,
+             struct tcphdr *th, struct socket **lsop, struct mbuf *m);
+void	 syncache_add(struct in_conninfo *, struct tcpopt *,
+	     struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *);
+void	 tcp_offload_syncache_add(struct in_conninfo *, struct toeopt *,
+             struct tcphdr *, struct inpcb *, struct socket **,
+             struct toe_usrreqs *tu, void *toepcb);
+
+void	 syncache_chkrst(struct in_conninfo *, struct tcphdr *);
+void	 syncache_badack(struct in_conninfo *);
+int	 syncache_pcbcount(void);
+int	 syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported);
+
+struct syncache {
+	TAILQ_ENTRY(syncache)	sc_hash;
+	struct		in_conninfo sc_inc;	/* addresses */
+	int		sc_rxttime;		/* retransmit time */
+	u_int16_t	sc_rxmits;		/* retransmit counter */
+	u_int32_t	sc_tsreflect;		/* timestamp to reflect */
+	u_int32_t	sc_ts;			/* our timestamp to send */
+	u_int32_t	sc_tsoff;		/* ts offset w/ syncookies */
+	u_int32_t	sc_flowlabel;		/* IPv6 flowlabel */
+	tcp_seq		sc_irs;			/* seq from peer */
+	tcp_seq		sc_iss;			/* our ISS */
+	struct		mbuf *sc_ipopts;	/* source route */
+	u_int16_t	sc_peer_mss;		/* peer's MSS */
+	u_int16_t	sc_wnd;			/* advertised window */
+	u_int8_t	sc_ip_ttl;		/* IPv4 TTL */
+	u_int8_t	sc_ip_tos;		/* IPv4 TOS */
+	u_int8_t	sc_requested_s_scale:4,
+			sc_requested_r_scale:4;
+	u_int16_t	sc_flags;
+#ifndef TCP_OFFLOAD_DISABLE
+	struct toe_usrreqs *sc_tu;		/* TOE operations */
+	void 		*sc_toepcb;		/* TOE protocol block */
+#endif			
+	struct label	*sc_label;		/* MAC label reference */
+	struct ucred	*sc_cred;		/* cred cache for jail checks */
+};
+
+/*
+ * Flags for the sc_flags field.
+ */
+#define SCF_NOOPT	0x01			/* no TCP options */
+#define SCF_WINSCALE	0x02			/* negotiated window scaling */
+#define SCF_TIMESTAMP	0x04			/* negotiated timestamps */
+						/* MSS is implicit */
+#define SCF_UNREACH	0x10			/* icmp unreachable received */
+#define SCF_SIGNATURE	0x20			/* send MD5 digests */
+#define SCF_SACK	0x80			/* send SACK option */
+#define SCF_ECN		0x100			/* send ECN setup packet */
+
+#define	SYNCOOKIE_SECRET_SIZE	8	/* dwords */
+#define	SYNCOOKIE_LIFETIME	16	/* seconds */
+
+struct syncache_head {
+	struct vnet	*sch_vnet;
+	struct mtx	sch_mtx;
+	TAILQ_HEAD(sch_head, syncache)	sch_bucket;
+	struct callout	sch_timer;
+	int		sch_nextc;
+	u_int		sch_length;
+	u_int		sch_oddeven;
+	u_int32_t	sch_secbits_odd[SYNCOOKIE_SECRET_SIZE];
+	u_int32_t	sch_secbits_even[SYNCOOKIE_SECRET_SIZE];
+	u_int		sch_reseed;		/* time_uptime, seconds */
+};
+
+struct tcp_syncache {
+	struct	syncache_head *hashbase;
+	uma_zone_t zone;
+	u_int	hashsize;
+	u_int	hashmask;
+	u_int	bucket_limit;
+	u_int	cache_count;		/* XXX: unprotected */
+	u_int	cache_limit;
+	u_int	rexmt_limit;
+	u_int	hash_secret;
+};
+
+#endif /* _KERNEL */
+#endif /* !_NETINET_TCP_SYNCACHE_HH_ */
diff --git a/freebsd/sys/netinet/tcp_timer.c b/freebsd/sys/netinet/tcp_timer.c
new file mode 100644
index 00000000..36e2bec2
--- /dev/null
+++ b/freebsd/sys/netinet/tcp_timer.c
@@ -0,0 +1,660 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_timer.c	8.2 (Berkeley) 5/24/95
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_tcpdebug.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/systm.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/in_systm.h>
+#ifdef INET6
+#include <freebsd/netinet6/in6_pcb.h>
+#endif
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/tcp.h>
+#include <freebsd/netinet/tcp_fsm.h>
+#include <freebsd/netinet/tcp_timer.h>
+#include <freebsd/netinet/tcp_var.h>
+#include <freebsd/netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <freebsd/netinet/tcp_debug.h>
+#endif
+
+int	tcp_keepinit;
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
+    &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
+
+int	tcp_keepidle;
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
+    &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin");
+
+int	tcp_keepintvl;
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
+    &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes");
+
+int	tcp_delacktime;
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
+    &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
+    "Time before a delayed ACK is sent");
+
+int	tcp_msl;
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
+    &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
+
+int	tcp_rexmit_min;
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
+    &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
+    "Minimum Retransmission Timeout");
+
+int	tcp_rexmit_slop;
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
+    &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
+    "Retransmission Timer Slop");
+
+static int	always_keepalive = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
+    &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
+
+int    tcp_fast_finwait2_recycle = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 
+    &tcp_fast_finwait2_recycle, 0,
+    "Recycle closed FIN_WAIT_2 connections faster");
+
+int    tcp_finwait2_timeout;
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
+    &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
+
+
+static int	tcp_keepcnt = TCPTV_KEEPCNT;
+	/* max idle probes */
+int	tcp_maxpersistidle;
+	/* max idle time in persist */
+int	tcp_maxidle;
+
+/*
+ * Tcp protocol timeout routine called every 500 ms.
+ * Updates timestamps used for TCP
+ * causes finite state machine actions if timers expire.
+ */
+void
+tcp_slowtimo(void)
+{
+	VNET_ITERATOR_DECL(vnet_iter);
+
+	VNET_LIST_RLOCK_NOSLEEP();
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter);
+		tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
+		INP_INFO_WLOCK(&V_tcbinfo);
+		(void) tcp_tw_2msl_scan(0);
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		CURVNET_RESTORE();
+	}
+	VNET_LIST_RUNLOCK_NOSLEEP();
+}
+
+int	tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
+    { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
+
+int	tcp_backoff[TCP_MAXRXTSHIFT + 1] =
+    { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
+
+static int tcp_totbackoff = 2559;	/* sum of tcp_backoff[] */
+
+static int tcp_timer_race;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
+    0, "Count of t_inpcb races on tcp_discardcb");
+
+/*
+ * TCP timer processing.
+ */
+
+void
+tcp_timer_delack(void *xtp)
+{
+	struct tcpcb *tp = xtp;
+	struct inpcb *inp;
+	CURVNET_SET(tp->t_vnet);
+
+	inp = tp->t_inpcb;
+	/*
+	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
+	 * tear-down mean we need it as a work-around for races between
+	 * timers and tcp_discardcb().
+	 *
+	 * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
+	 */
+	if (inp == NULL) {
+		tcp_timer_race++;
+		CURVNET_RESTORE();
+		return;
+	}
+	INP_WLOCK(inp);
+	if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_delack)
+	    || !callout_active(&tp->t_timers->tt_delack)) {
+		INP_WUNLOCK(inp);
+		CURVNET_RESTORE();
+		return;
+	}
+	callout_deactivate(&tp->t_timers->tt_delack);
+
+	tp->t_flags |= TF_ACKNOW;
+	TCPSTAT_INC(tcps_delack);
+	(void) tcp_output(tp);
+	INP_WUNLOCK(inp);
+	CURVNET_RESTORE();
+}
+
+void
+tcp_timer_2msl(void *xtp)
+{
+	struct tcpcb *tp = xtp;
+	struct inpcb *inp;
+	CURVNET_SET(tp->t_vnet);
+#ifdef TCPDEBUG
+	int ostate;
+
+	ostate = tp->t_state;
+#endif
+	/*
+	 * XXXRW: Does this actually happen?
+	 */
+	INP_INFO_WLOCK(&V_tcbinfo);
+	inp = tp->t_inpcb;
+	/*
+	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
+	 * tear-down mean we need it as a work-around for races between
+	 * timers and tcp_discardcb().
+	 *
+	 * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
+	 */
+	if (inp == NULL) {
+		tcp_timer_race++;
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		CURVNET_RESTORE();
+		return;
+	}
+	INP_WLOCK(inp);
+	tcp_free_sackholes(tp);
+	if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_2msl) ||
+	    !callout_active(&tp->t_timers->tt_2msl)) {
+		INP_WUNLOCK(tp->t_inpcb);
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		CURVNET_RESTORE();
+		return;
+	}
+	callout_deactivate(&tp->t_timers->tt_2msl);
+	/*
+	 * 2 MSL timeout in shutdown went off.  If we're closed but
+	 * still waiting for peer to close and connection has been idle
+	 * too long, or if 2MSL time is up from TIME_WAIT, delete connection
+	 * control block.  Otherwise, check again in a bit.
+	 *
+	 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 
+	 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 
+	 * Ignore fact that there were recent incoming segments.
+	 */
+	if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
+	    tp->t_inpcb && tp->t_inpcb->inp_socket && 
+	    (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
+		TCPSTAT_INC(tcps_finwait2_drops);
+		tp = tcp_close(tp);             
+	} else {
+		if (tp->t_state != TCPS_TIME_WAIT &&
+		   ticks - tp->t_rcvtime <= tcp_maxidle)
+		       callout_reset(&tp->t_timers->tt_2msl, tcp_keepintvl,
+				     tcp_timer_2msl, tp);
+	       else
+		       tp = tcp_close(tp);
+       }
+
+#ifdef TCPDEBUG
+	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
+			  PRU_SLOWTIMO);
+#endif
+	if (tp != NULL)
+		INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+	CURVNET_RESTORE();
+}
+
+void
+tcp_timer_keep(void *xtp)
+{
+	struct tcpcb *tp = xtp;
+	struct tcptemp *t_template;
+	struct inpcb *inp;
+	CURVNET_SET(tp->t_vnet);
+#ifdef TCPDEBUG
+	int ostate;
+
+	ostate = tp->t_state;
+#endif
+	INP_INFO_WLOCK(&V_tcbinfo);
+	inp = tp->t_inpcb;
+	/*
+	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
+	 * tear-down mean we need it as a work-around for races between
+	 * timers and tcp_discardcb().
+	 *
+	 * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
+	 */
+	if (inp == NULL) {
+		tcp_timer_race++;
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		CURVNET_RESTORE();
+		return;
+	}
+	INP_WLOCK(inp);
+	if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_keep)
+	    || !callout_active(&tp->t_timers->tt_keep)) {
+		INP_WUNLOCK(inp);
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		CURVNET_RESTORE();
+		return;
+	}
+	callout_deactivate(&tp->t_timers->tt_keep);
+	/*
+	 * Keep-alive timer went off; send something
+	 * or drop connection if idle for too long.
+	 */
+	TCPSTAT_INC(tcps_keeptimeo);
+	if (tp->t_state < TCPS_ESTABLISHED)
+		goto dropit;
+	if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
+	    tp->t_state <= TCPS_CLOSING) {
+		if (ticks - tp->t_rcvtime >= tcp_keepidle + tcp_maxidle)
+			goto dropit;
+		/*
+		 * Send a packet designed to force a response
+		 * if the peer is up and reachable:
+		 * either an ACK if the connection is still alive,
+		 * or an RST if the peer has closed the connection
+		 * due to timeout or reboot.
+		 * Using sequence number tp->snd_una-1
+		 * causes the transmitted zero-length segment
+		 * to lie outside the receive window;
+		 * by the protocol spec, this requires the
+		 * correspondent TCP to respond.
+		 */
+		TCPSTAT_INC(tcps_keepprobe);
+		t_template = tcpip_maketemplate(inp);
+		if (t_template) {
+			tcp_respond(tp, t_template->tt_ipgen,
+				    &t_template->tt_t, (struct mbuf *)NULL,
+				    tp->rcv_nxt, tp->snd_una - 1, 0);
+			free(t_template, M_TEMP);
+		}
+		callout_reset(&tp->t_timers->tt_keep, tcp_keepintvl, tcp_timer_keep, tp);
+	} else
+		callout_reset(&tp->t_timers->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
+
+#ifdef TCPDEBUG
+	if (inp->inp_socket->so_options & SO_DEBUG)
+		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
+			  PRU_SLOWTIMO);
+#endif
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+	CURVNET_RESTORE();
+	return;
+
+dropit:
+	TCPSTAT_INC(tcps_keepdrops);
+	tp = tcp_drop(tp, ETIMEDOUT);
+
+#ifdef TCPDEBUG
+	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
+			  PRU_SLOWTIMO);
+#endif
+	if (tp != NULL)
+		INP_WUNLOCK(tp->t_inpcb);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+	CURVNET_RESTORE();
+}
+
+void
+tcp_timer_persist(void *xtp)
+{
+	struct tcpcb *tp = xtp;
+	struct inpcb *inp;
+	CURVNET_SET(tp->t_vnet);
+#ifdef TCPDEBUG
+	int ostate;
+
+	ostate = tp->t_state;
+#endif
+	INP_INFO_WLOCK(&V_tcbinfo);
+	inp = tp->t_inpcb;
+	/*
+	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
+	 * tear-down mean we need it as a work-around for races between
+	 * timers and tcp_discardcb().
+	 *
+	 * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
+	 */
+	if (inp == NULL) {
+		tcp_timer_race++;
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		CURVNET_RESTORE();
+		return;
+	}
+	INP_WLOCK(inp);
+	if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_persist)
+	    || !callout_active(&tp->t_timers->tt_persist)) {
+		INP_WUNLOCK(inp);
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		CURVNET_RESTORE();
+		return;
+	}
+	callout_deactivate(&tp->t_timers->tt_persist);
+	/*
+	 * Persistance timer into zero window.
+	 * Force a byte to be output, if possible.
+	 */
+	TCPSTAT_INC(tcps_persisttimeo);
+	/*
+	 * Hack: if the peer is dead/unreachable, we do not
+	 * time out if the window is closed.  After a full
+	 * backoff, drop the connection if the idle time
+	 * (no responses to probes) reaches the maximum
+	 * backoff that we would use if retransmitting.
+	 */
+	if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
+	    (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
+	     ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
+		TCPSTAT_INC(tcps_persistdrop);
+		tp = tcp_drop(tp, ETIMEDOUT);
+		goto out;
+	}
+	tcp_setpersist(tp);
+	tp->t_flags |= TF_FORCEDATA;
+	(void) tcp_output(tp);
+	tp->t_flags &= ~TF_FORCEDATA;
+
+out:
+#ifdef TCPDEBUG
+	if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
+		tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
+#endif
+	if (tp != NULL)
+		INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+	CURVNET_RESTORE();
+}
+
+void
+tcp_timer_rexmt(void * xtp)
+{
+	struct tcpcb *tp = xtp;
+	CURVNET_SET(tp->t_vnet);
+	int rexmt;
+	int headlocked;
+	struct inpcb *inp;
+#ifdef TCPDEBUG
+	int ostate;
+
+	ostate = tp->t_state;
+#endif
+	INP_INFO_WLOCK(&V_tcbinfo);
+	headlocked = 1;
+	inp = tp->t_inpcb;
+	/*
+	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
+	 * tear-down mean we need it as a work-around for races between
+	 * timers and tcp_discardcb().
+	 *
+	 * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
+	 */
+	if (inp == NULL) {
+		tcp_timer_race++;
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		CURVNET_RESTORE();
+		return;
+	}
+	INP_WLOCK(inp);
+	if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_rexmt)
+	    || !callout_active(&tp->t_timers->tt_rexmt)) {
+		INP_WUNLOCK(inp);
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		CURVNET_RESTORE();
+		return;
+	}
+	callout_deactivate(&tp->t_timers->tt_rexmt);
+	tcp_free_sackholes(tp);
+	/*
+	 * Retransmission timer went off.  Message has not
+	 * been acked within retransmit interval.  Back off
+	 * to a longer retransmit interval and retransmit one segment.
+	 */
+	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
+		tp->t_rxtshift = TCP_MAXRXTSHIFT;
+		TCPSTAT_INC(tcps_timeoutdrop);
+		tp = tcp_drop(tp, tp->t_softerror ?
+			      tp->t_softerror : ETIMEDOUT);
+		goto out;
+	}
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+	headlocked = 0;
+	if (tp->t_rxtshift == 1) {
+		/*
+		 * first retransmit; record ssthresh and cwnd so they can
+		 * be recovered if this turns out to be a "bad" retransmit.
+		 * A retransmit is considered "bad" if an ACK for this
+		 * segment is received within RTT/2 interval; the assumption
+		 * here is that the ACK was already in flight.  See
+		 * "On Estimating End-to-End Network Path Properties" by
+		 * Allman and Paxson for more details.
+		 */
+		tp->snd_cwnd_prev = tp->snd_cwnd;
+		tp->snd_ssthresh_prev = tp->snd_ssthresh;
+		tp->snd_recover_prev = tp->snd_recover;
+		if (IN_FASTRECOVERY(tp))
+		  tp->t_flags |= TF_WASFRECOVERY;
+		else
+		  tp->t_flags &= ~TF_WASFRECOVERY;
+		tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
+	}
+	TCPSTAT_INC(tcps_rexmttimeo);
+	if (tp->t_state == TCPS_SYN_SENT)
+		rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift];
+	else
+		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
+	TCPT_RANGESET(tp->t_rxtcur, rexmt,
+		      tp->t_rttmin, TCPTV_REXMTMAX);
+	/*
+	 * Disable rfc1323 if we havn't got any response to
+	 * our third SYN to work-around some broken terminal servers
+	 * (most of which have hopefully been retired) that have bad VJ
+	 * header compression code which trashes TCP segments containing
+	 * unknown-to-them TCP options.
+	 */
+	if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3))
+		tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP);
+	/*
+	 * If we backed off this far, our srtt estimate is probably bogus.
+	 * Clobber it so we'll take the next rtt measurement as our srtt;
+	 * move the current srtt into rttvar to keep the current
+	 * retransmit times until then.
+	 */
+	if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
+#ifdef INET6
+		if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
+			in6_losing(tp->t_inpcb);
+		else
+#endif
+		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
+		tp->t_srtt = 0;
+	}
+	tp->snd_nxt = tp->snd_una;
+	tp->snd_recover = tp->snd_max;
+	/*
+	 * Force a segment to be sent.
+	 */
+	tp->t_flags |= TF_ACKNOW;
+	/*
+	 * If timing a segment in this window, stop the timer.
+	 */
+	tp->t_rtttime = 0;
+	/*
+	 * Close the congestion window down to one segment
+	 * (we'll open it by one segment for each ack we get).
+	 * Since we probably have a window's worth of unacked
+	 * data accumulated, this "slow start" keeps us from
+	 * dumping all that data as back-to-back packets (which
+	 * might overwhelm an intermediate gateway).
+	 *
+	 * There are two phases to the opening: Initially we
+	 * open by one mss on each ack.  This makes the window
+	 * size increase exponentially with time.  If the
+	 * window is larger than the path can handle, this
+	 * exponential growth results in dropped packet(s)
+	 * almost immediately.  To get more time between
+	 * drops but still "push" the network to take advantage
+	 * of improving conditions, we switch from exponential
+	 * to linear window opening at some threshhold size.
+	 * For a threshhold, we use half the current window
+	 * size, truncated to a multiple of the mss.
+	 *
+	 * (the minimum cwnd that will give us exponential
+	 * growth is 2 mss.  We don't allow the threshhold
+	 * to go below this.)
+	 */
+	{
+		u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
+		if (win < 2)
+			win = 2;
+		tp->snd_cwnd = tp->t_maxseg;
+		tp->snd_ssthresh = win * tp->t_maxseg;
+		tp->t_dupacks = 0;
+	}
+	EXIT_FASTRECOVERY(tp);
+	tp->t_bytes_acked = 0;
+	(void) tcp_output(tp);
+
+out:
+#ifdef TCPDEBUG
+	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
+			  PRU_SLOWTIMO);
+#endif
+	if (tp != NULL)
+		INP_WUNLOCK(inp);
+	if (headlocked)
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+	CURVNET_RESTORE();
+}
+
+void
+tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
+{
+	struct callout *t_callout;
+	void *f_callout;
+
+	switch (timer_type) {
+		case TT_DELACK:
+			t_callout = &tp->t_timers->tt_delack;
+			f_callout = tcp_timer_delack;
+			break;
+		case TT_REXMT:
+			t_callout = &tp->t_timers->tt_rexmt;
+			f_callout = tcp_timer_rexmt;
+			break;
+		case TT_PERSIST:
+			t_callout = &tp->t_timers->tt_persist;
+			f_callout = tcp_timer_persist;
+			break;
+		case TT_KEEP:
+			t_callout = &tp->t_timers->tt_keep;
+			f_callout = tcp_timer_keep;
+			break;
+		case TT_2MSL:
+			t_callout = &tp->t_timers->tt_2msl;
+			f_callout = tcp_timer_2msl;
+			break;
+		default:
+			panic("bad timer_type");
+		}
+	if (delta == 0) {
+		callout_stop(t_callout);
+	} else {
+		callout_reset(t_callout, delta, f_callout, tp);
+	}
+}
+
+int
+tcp_timer_active(struct tcpcb *tp, int timer_type)
+{
+	struct callout *t_callout;
+
+	switch (timer_type) {
+		case TT_DELACK:
+			t_callout = &tp->t_timers->tt_delack;
+			break;
+		case TT_REXMT:
+			t_callout = &tp->t_timers->tt_rexmt;
+			break;
+		case TT_PERSIST:
+			t_callout = &tp->t_timers->tt_persist;
+			break;
+		case TT_KEEP:
+			t_callout = &tp->t_timers->tt_keep;
+			break;
+		case TT_2MSL:
+			t_callout = &tp->t_timers->tt_2msl;
+			break;
+		default:
+			panic("bad timer_type");
+		}
+	return callout_active(t_callout);
+}
diff --git a/freebsd/sys/netinet/tcp_timer.h b/freebsd/sys/netinet/tcp_timer.h
new file mode 100644
index 00000000..1514a293
--- /dev/null
+++ b/freebsd/sys/netinet/tcp_timer.h
@@ -0,0 +1,183 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_timer.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_TCP_TIMER_HH_
+#define _NETINET_TCP_TIMER_HH_
+
+/*
+ * The TCPT_REXMT timer is used to force retransmissions.
+ * The TCP has the TCPT_REXMT timer set whenever segments
+ * have been sent for which ACKs are expected but not yet
+ * received.  If an ACK is received which advances tp->snd_una,
+ * then the retransmit timer is cleared (if there are no more
+ * outstanding segments) or reset to the base value (if there
+ * are more ACKs expected).  Whenever the retransmit timer goes off,
+ * we retransmit one unacknowledged segment, and do a backoff
+ * on the retransmit timer.
+ *
+ * The TCPT_PERSIST timer is used to keep window size information
+ * flowing even if the window goes shut.  If all previous transmissions
+ * have been acknowledged (so that there are no retransmissions in progress),
+ * and the window is too small to bother sending anything, then we start
+ * the TCPT_PERSIST timer.  When it expires, if the window is nonzero,
+ * we go to transmit state.  Otherwise, at intervals send a single byte
+ * into the peer's window to force him to update our window information.
+ * We do this at most as often as TCPT_PERSMIN time intervals,
+ * but no more frequently than the current estimate of round-trip
+ * packet time.  The TCPT_PERSIST timer is cleared whenever we receive
+ * a window update from the peer.
+ *
+ * The TCPT_KEEP timer is used to keep connections alive.  If an
+ * connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time,
+ * but not yet established, then we drop the connection.  Once the connection
+ * is established, if the connection is idle for TCPTV_KEEP_IDLE time
+ * (and keepalives have been enabled on the socket), we begin to probe
+ * the connection.  We force the peer to send us a segment by sending:
+ *	<SEQ=SND.UNA-1><ACK=RCV.NXT><CTL=ACK>
+ * This segment is (deliberately) outside the window, and should elicit
+ * an ack segment in response from the peer.  If, despite the TCPT_KEEP
+ * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE
+ * amount of time probing, then we drop the connection.
+ */
+
+/*
+ * Time constants.
+ */
+#define	TCPTV_MSL	( 30*hz)		/* max seg lifetime (hah!) */
+#define	TCPTV_SRTTBASE	0			/* base roundtrip time;
+						   if 0, no idea yet */
+#define	TCPTV_RTOBASE	(  3*hz)		/* assumed RTO if no info */
+#define	TCPTV_SRTTDFLT	(  3*hz)		/* assumed RTT if no info */
+
+#define	TCPTV_PERSMIN	(  5*hz)		/* retransmit persistence */
+#define	TCPTV_PERSMAX	( 60*hz)		/* maximum persist interval */
+
+#define	TCPTV_KEEP_INIT	( 75*hz)		/* initial connect keepalive */
+#define	TCPTV_KEEP_IDLE	(120*60*hz)		/* dflt time before probing */
+#define	TCPTV_KEEPINTVL	( 75*hz)		/* default probe interval */
+#define	TCPTV_KEEPCNT	8			/* max probes before drop */
+
+#define	TCPTV_INFLIGHT_RTTTHRESH (10*hz/1000)	/* below which inflight
+						   disengages, in msec */
+
+#define TCPTV_FINWAIT2_TIMEOUT (60*hz)         /* FIN_WAIT_2 timeout if no receiver */
+
+/*
+ * Minimum retransmit timer is 3 ticks, for algorithmic stability.
+ * TCPT_RANGESET() will add another TCPTV_CPU_VAR to deal with
+ * the expected worst-case processing variances by the kernels
+ * representing the end points.  Such variances do not always show
+ * up in the srtt because the timestamp is often calculated at
+ * the interface rather then at the TCP layer.  This value is
+ * typically 50ms.  However, it is also possible that delayed
+ * acks (typically 100ms) could create issues so we set the slop
+ * to 200ms to try to cover it.  Note that, properly speaking,
+ * delayed-acks should not create a major issue for interactive
+ * environments which 'P'ush the last segment, at least as
+ * long as implementations do the required 'at least one ack
+ * for every two packets' for the non-interactive streaming case.
+ * (maybe the RTO calculation should use 2*RTT instead of RTT
+ * to handle the ack-every-other-packet case).
+ *
+ * The prior minimum of 1*hz (1 second) badly breaks throughput on any
+ * networks faster then a modem that has minor (e.g. 1%) packet loss.
+ */
+#define	TCPTV_MIN	( hz/33 )		/* minimum allowable value */
+#define TCPTV_CPU_VAR	( hz/5 )		/* cpu variance allowed (200ms) */
+#define	TCPTV_REXMTMAX	( 64*hz)		/* max allowable REXMT value */
+
+#define TCPTV_TWTRUNC	8			/* RTO factor to truncate TW */
+
+#define	TCP_LINGERTIME	120			/* linger at most 2 minutes */
+
+#define	TCP_MAXRXTSHIFT	12			/* maximum retransmits */
+
+#define	TCPTV_DELACK	(hz / PR_FASTHZ / 2)	/* 100ms timeout */
+
+#ifdef	TCPTIMERS
+static const char *tcptimers[] =
+    { "REXMT", "PERSIST", "KEEP", "2MSL" };
+#endif
+
+/*
+ * Force a time value to be in a certain range.
+ */
+#define	TCPT_RANGESET(tv, value, tvmin, tvmax) do { \
+	(tv) = (value) + tcp_rexmit_slop; \
+	if ((u_long)(tv) < (u_long)(tvmin)) \
+		(tv) = (tvmin); \
+	if ((u_long)(tv) > (u_long)(tvmax)) \
+		(tv) = (tvmax); \
+} while(0)
+
+#ifdef _KERNEL
+
+struct tcp_timer {
+	struct	callout tt_rexmt;	/* retransmit timer */
+	struct	callout tt_persist;	/* retransmit persistence */
+	struct	callout tt_keep;	/* keepalive */
+	struct	callout tt_2msl;	/* 2*msl TIME_WAIT timer */
+	struct	callout tt_delack;	/* delayed ACK timer */
+};
+#define TT_DELACK	0x01
+#define TT_REXMT	0x02
+#define TT_PERSIST	0x04
+#define TT_KEEP		0x08
+#define TT_2MSL		0x10
+
+extern int tcp_keepinit;		/* time to establish connection */
+extern int tcp_keepidle;		/* time before keepalive probes begin */
+extern int tcp_keepintvl;		/* time between keepalive probes */
+extern int tcp_maxidle;			/* time to drop after starting probes */
+extern int tcp_delacktime;		/* time before sending a delayed ACK */
+extern int tcp_maxpersistidle;
+extern int tcp_rexmit_min;
+extern int tcp_rexmit_slop;
+extern int tcp_msl;
+extern int tcp_ttl;			/* time to live for TCP segs */
+extern int tcp_backoff[];
+
+extern int tcp_finwait2_timeout;
+extern int tcp_fast_finwait2_recycle;
+
+void	tcp_timer_init(void);
+void	tcp_timer_2msl(void *xtp);
+struct tcptw *
+	tcp_tw_2msl_scan(int _reuse);		/* XXX temporary */
+void	tcp_timer_keep(void *xtp);
+void	tcp_timer_persist(void *xtp);
+void	tcp_timer_rexmt(void *xtp);
+void	tcp_timer_delack(void *xtp);
+
+#endif /* _KERNEL */
+
+#endif /* !_NETINET_TCP_TIMER_HH_ */
diff --git a/freebsd/sys/netinet/tcp_timewait.c b/freebsd/sys/netinet/tcp_timewait.c
new file mode 100644
index 00000000..92643d0a
--- /dev/null
+++ b/freebsd/sys/netinet/tcp_timewait.c
@@ -0,0 +1,618 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_subr.c	8.2 (Berkeley) 5/24/95
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_tcpdebug.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/callout.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/random.h>
+
+#include <freebsd/vm/uma.h>
+
+#include <freebsd/net/route.h>
+#include <freebsd/net/if.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/ip.h>
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#endif
+#include <freebsd/netinet/in_pcb.h>
+#ifdef INET6
+#include <freebsd/netinet6/in6_pcb.h>
+#endif
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip_var.h>
+#ifdef INET6
+#include <freebsd/netinet6/ip6_var.h>
+#include <freebsd/netinet6/scope6_var.h>
+#include <freebsd/netinet6/nd6.h>
+#endif
+#include <freebsd/netinet/ip_icmp.h>
+#include <freebsd/netinet/tcp.h>
+#include <freebsd/netinet/tcp_fsm.h>
+#include <freebsd/netinet/tcp_seq.h>
+#include <freebsd/netinet/tcp_timer.h>
+#include <freebsd/netinet/tcp_var.h>
+#ifdef INET6
+#include <freebsd/netinet6/tcp6_var.h>
+#endif
+#include <freebsd/netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <freebsd/netinet/tcp_debug.h>
+#endif
+#include <freebsd/netinet6/ip6protosw.h>
+
+#include <freebsd/machine/in_cksum.h>
+
+#include <freebsd/security/mac/mac_framework.h>
+
+static VNET_DEFINE(uma_zone_t, tcptw_zone);
+#define	V_tcptw_zone			VNET(tcptw_zone)
+static int	maxtcptw;
+
+/*
+ * The timed wait queue contains references to each of the TCP sessions
+ * currently in the TIME_WAIT state.  The queue pointers, including the
+ * queue pointers in each tcptw structure, are protected using the global
+ * tcbinfo lock, which must be held over queue iteration and modification.
+ */
+static VNET_DEFINE(TAILQ_HEAD(, tcptw), twq_2msl);
+#define	V_twq_2msl			VNET(twq_2msl)
+
+static void	tcp_tw_2msl_reset(struct tcptw *, int);
+static void	tcp_tw_2msl_stop(struct tcptw *);
+
+static int
+tcptw_auto_size(void)
+{
+	int halfrange;
+
+	/*
+	 * Max out at half the ephemeral port range so that TIME_WAIT
+	 * sockets don't tie up too many ephemeral ports.
+	 */
+	if (V_ipport_lastauto > V_ipport_firstauto)
+		halfrange = (V_ipport_lastauto - V_ipport_firstauto) / 2;
+	else
+		halfrange = (V_ipport_firstauto - V_ipport_lastauto) / 2;
+	/* Protect against goofy port ranges smaller than 32. */
+	return (imin(imax(halfrange, 32), maxsockets / 5));
+}
+
+static int
+sysctl_maxtcptw(SYSCTL_HANDLER_ARGS)
+{
+	int error, new;
+
+	if (maxtcptw == 0)
+		new = tcptw_auto_size();
+	else
+		new = maxtcptw;
+	error = sysctl_handle_int(oidp, &new, 0, req);
+	if (error == 0 && req->newptr)
+		if (new >= 32) {
+			maxtcptw = new;
+			uma_zone_set_max(V_tcptw_zone, maxtcptw);
+		}
+	return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, maxtcptw, CTLTYPE_INT|CTLFLAG_RW,
+    &maxtcptw, 0, sysctl_maxtcptw, "IU",
+    "Maximum number of compressed TCP TIME_WAIT entries");
+
+VNET_DEFINE(int, nolocaltimewait) = 0;
+#define	V_nolocaltimewait	VNET(nolocaltimewait)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, nolocaltimewait, CTLFLAG_RW,
+    &VNET_NAME(nolocaltimewait), 0,
+    "Do not create compressed TCP TIME_WAIT entries for local connections");
+
+void
+tcp_tw_zone_change(void)
+{
+
+	if (maxtcptw == 0)
+		uma_zone_set_max(V_tcptw_zone, tcptw_auto_size());
+}
+
+void
+tcp_tw_init(void)
+{
+
+	V_tcptw_zone = uma_zcreate("tcptw", sizeof(struct tcptw),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+	TUNABLE_INT_FETCH("net.inet.tcp.maxtcptw", &maxtcptw);
+	if (maxtcptw == 0)
+		uma_zone_set_max(V_tcptw_zone, tcptw_auto_size());
+	else
+		uma_zone_set_max(V_tcptw_zone, maxtcptw);
+	TAILQ_INIT(&V_twq_2msl);
+}
+
+#ifdef VIMAGE
+void
+tcp_tw_destroy(void)
+{
+	struct tcptw *tw;
+
+	INP_INFO_WLOCK(&V_tcbinfo);
+	while((tw = TAILQ_FIRST(&V_twq_2msl)) != NULL)
+		tcp_twclose(tw, 0);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+
+	uma_zdestroy(V_tcptw_zone);
+}
+#endif
+
+/*
+ * Move a TCP connection into TIME_WAIT state.
+ *    tcbinfo is locked.
+ *    inp is locked, and is unlocked before returning.
+ */
+void
+tcp_twstart(struct tcpcb *tp)
+{
+	struct tcptw *tw;
+	struct inpcb *inp = tp->t_inpcb;
+	int acknow;
+	struct socket *so;
+
+	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);	/* tcp_tw_2msl_reset(). */
+	INP_WLOCK_ASSERT(inp);
+
+	if (V_nolocaltimewait && in_localip(inp->inp_faddr)) {
+		tp = tcp_close(tp);
+		if (tp != NULL)
+			INP_WUNLOCK(inp);
+		return;
+	}
+
+	tw = uma_zalloc(V_tcptw_zone, M_NOWAIT);
+	if (tw == NULL) {
+		tw = tcp_tw_2msl_scan(1);
+		if (tw == NULL) {
+			tp = tcp_close(tp);
+			if (tp != NULL)
+				INP_WUNLOCK(inp);
+			return;
+		}
+	}
+	tw->tw_inpcb = inp;
+
+	/*
+	 * Recover last window size sent.
+	 */
+	tw->last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale;
+
+	/*
+	 * Set t_recent if timestamps are used on the connection.
+	 */
+	if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) ==
+	    (TF_REQ_TSTMP|TF_RCVD_TSTMP)) {
+		tw->t_recent = tp->ts_recent;
+		tw->ts_offset = tp->ts_offset;
+	} else {
+		tw->t_recent = 0;
+		tw->ts_offset = 0;
+	}
+
+	tw->snd_nxt = tp->snd_nxt;
+	tw->rcv_nxt = tp->rcv_nxt;
+	tw->iss     = tp->iss;
+	tw->irs     = tp->irs;
+	tw->t_starttime = tp->t_starttime;
+	tw->tw_time = 0;
+
+/* XXX
+ * If this code will
+ * be used for fin-wait-2 state also, then we may need
+ * a ts_recent from the last segment.
+ */
+	acknow = tp->t_flags & TF_ACKNOW;
+
+	/*
+	 * First, discard tcpcb state, which includes stopping its timers and
+	 * freeing it.  tcp_discardcb() used to also release the inpcb, but
+	 * that work is now done in the caller.
+	 *
+	 * Note: soisdisconnected() call used to be made in tcp_discardcb(),
+	 * and might not be needed here any longer.
+	 */
+	tcp_discardcb(tp);
+	so = inp->inp_socket;
+	soisdisconnected(so);
+	tw->tw_cred = crhold(so->so_cred);
+	SOCK_LOCK(so);
+	tw->tw_so_options = so->so_options;
+	SOCK_UNLOCK(so);
+	if (acknow)
+		tcp_twrespond(tw, TH_ACK);
+	inp->inp_ppcb = tw;
+	inp->inp_flags |= INP_TIMEWAIT;
+	tcp_tw_2msl_reset(tw, 0);
+
+	/*
+	 * If the inpcb owns the sole reference to the socket, then we can
+	 * detach and free the socket as it is not needed in time wait.
+	 */
+	if (inp->inp_flags & INP_SOCKREF) {
+		KASSERT(so->so_state & SS_PROTOREF,
+		    ("tcp_twstart: !SS_PROTOREF"));
+		inp->inp_flags &= ~INP_SOCKREF;
+		INP_WUNLOCK(inp);
+		ACCEPT_LOCK();
+		SOCK_LOCK(so);
+		so->so_state &= ~SS_PROTOREF;
+		sofree(so);
+	} else
+		INP_WUNLOCK(inp);
+}
+
+#if 0
+/*
+ * The appromixate rate of ISN increase of Microsoft TCP stacks;
+ * the actual rate is slightly higher due to the addition of
+ * random positive increments.
+ *
+ * Most other new OSes use semi-randomized ISN values, so we
+ * do not need to worry about them.
+ */
+#define MS_ISN_BYTES_PER_SECOND		250000
+
+/*
+ * Determine if the ISN we will generate has advanced beyond the last
+ * sequence number used by the previous connection.  If so, indicate
+ * that it is safe to recycle this tw socket by returning 1.
+ */
+int
+tcp_twrecycleable(struct tcptw *tw)
+{
+	tcp_seq new_iss = tw->iss;
+	tcp_seq new_irs = tw->irs;
+
+	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	new_iss += (ticks - tw->t_starttime) * (ISN_BYTES_PER_SECOND / hz);
+	new_irs += (ticks - tw->t_starttime) * (MS_ISN_BYTES_PER_SECOND / hz);
+
+	if (SEQ_GT(new_iss, tw->snd_nxt) && SEQ_GT(new_irs, tw->rcv_nxt))
+		return (1);
+	else
+		return (0);
+}
+#endif
+
+/*
+ * Returns 1 if the TIME_WAIT state was killed and we should start over,
+ * looking for a pcb in the listen state.  Returns 0 otherwise.
+ */
+int
+tcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct tcphdr *th,
+    struct mbuf *m, int tlen)
+{
+	struct tcptw *tw;
+	int thflags;
+	tcp_seq seq;
+
+	/* tcbinfo lock required for tcp_twclose(), tcp_tw_2msl_reset(). */
+	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_WLOCK_ASSERT(inp);
+
+	/*
+	 * XXXRW: Time wait state for inpcb has been recycled, but inpcb is
+	 * still present.  This is undesirable, but temporarily necessary
+	 * until we work out how to handle inpcb's who's timewait state has
+	 * been removed.
+	 */
+	tw = intotw(inp);
+	if (tw == NULL)
+		goto drop;
+
+	thflags = th->th_flags;
+
+	/*
+	 * NOTE: for FIN_WAIT_2 (to be added later),
+	 * must validate sequence number before accepting RST
+	 */
+
+	/*
+	 * If the segment contains RST:
+	 *	Drop the segment - see Stevens, vol. 2, p. 964 and
+	 *      RFC 1337.
+	 */
+	if (thflags & TH_RST)
+		goto drop;
+
+#if 0
+/* PAWS not needed at the moment */
+	/*
+	 * RFC 1323 PAWS: If we have a timestamp reply on this segment
+	 * and it's less than ts_recent, drop it.
+	 */
+	if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent &&
+	    TSTMP_LT(to.to_tsval, tp->ts_recent)) {
+		if ((thflags & TH_ACK) == 0)
+			goto drop;
+		goto ack;
+	}
+	/*
+	 * ts_recent is never updated because we never accept new segments.
+	 */
+#endif
+
+	/*
+	 * If a new connection request is received
+	 * while in TIME_WAIT, drop the old connection
+	 * and start over if the sequence numbers
+	 * are above the previous ones.
+	 */
+	if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tw->rcv_nxt)) {
+		tcp_twclose(tw, 0);
+		return (1);
+	}
+
+	/*
+	 * Drop the the segment if it does not contain an ACK.
+	 */
+	if ((thflags & TH_ACK) == 0)
+		goto drop;
+
+	/*
+	 * Reset the 2MSL timer if this is a duplicate FIN.
+	 */
+	if (thflags & TH_FIN) {
+		seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0);
+		if (seq + 1 == tw->rcv_nxt)
+			tcp_tw_2msl_reset(tw, 1);
+	}
+
+	/*
+	 * Acknowledge the segment if it has data or is not a duplicate ACK.
+	 */
+	if (thflags != TH_ACK || tlen != 0 ||
+	    th->th_seq != tw->rcv_nxt || th->th_ack != tw->snd_nxt)
+		tcp_twrespond(tw, TH_ACK);
+drop:
+	INP_WUNLOCK(inp);
+	m_freem(m);
+	return (0);
+}
+
+void
+tcp_twclose(struct tcptw *tw, int reuse)
+{
+	struct socket *so;
+	struct inpcb *inp;
+
+	/*
+	 * At this point, we are in one of two situations:
+	 *
+	 * (1) We have no socket, just an inpcb<->twtcp pair.  We can free
+	 *     all state.
+	 *
+	 * (2) We have a socket -- if we own a reference, release it and
+	 *     notify the socket layer.
+	 */
+	inp = tw->tw_inpcb;
+	KASSERT((inp->inp_flags & INP_TIMEWAIT), ("tcp_twclose: !timewait"));
+	KASSERT(intotw(inp) == tw, ("tcp_twclose: inp_ppcb != tw"));
+	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);	/* tcp_tw_2msl_stop(). */
+	INP_WLOCK_ASSERT(inp);
+
+	tw->tw_inpcb = NULL;
+	tcp_tw_2msl_stop(tw);
+	inp->inp_ppcb = NULL;
+	in_pcbdrop(inp);
+
+	so = inp->inp_socket;
+	if (so != NULL) {
+		/*
+		 * If there's a socket, handle two cases: first, we own a
+		 * strong reference, which we will now release, or we don't
+		 * in which case another reference exists (XXXRW: think
+		 * about this more), and we don't need to take action.
+		 */
+		if (inp->inp_flags & INP_SOCKREF) {
+			inp->inp_flags &= ~INP_SOCKREF;
+			INP_WUNLOCK(inp);
+			ACCEPT_LOCK();
+			SOCK_LOCK(so);
+			KASSERT(so->so_state & SS_PROTOREF,
+			    ("tcp_twclose: INP_SOCKREF && !SS_PROTOREF"));
+			so->so_state &= ~SS_PROTOREF;
+			sofree(so);
+		} else {
+			/*
+			 * If we don't own the only reference, the socket and
+			 * inpcb need to be left around to be handled by
+			 * tcp_usr_detach() later.
+			 */
+			INP_WUNLOCK(inp);
+		}
+	} else
+		in_pcbfree(inp);
+	TCPSTAT_INC(tcps_closed);
+	crfree(tw->tw_cred);
+	tw->tw_cred = NULL;
+	if (reuse)
+		return;
+	uma_zfree(V_tcptw_zone, tw);
+}
+
+int
+tcp_twrespond(struct tcptw *tw, int flags)
+{
+	struct inpcb *inp = tw->tw_inpcb;
+	struct tcphdr *th;
+	struct mbuf *m;
+	struct ip *ip = NULL;
+	u_int hdrlen, optlen;
+	int error;
+	struct tcpopt to;
+#ifdef INET6
+	struct ip6_hdr *ip6 = NULL;
+	int isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
+#endif
+
+	INP_WLOCK_ASSERT(inp);
+
+	m = m_gethdr(M_DONTWAIT, MT_DATA);
+	if (m == NULL)
+		return (ENOBUFS);
+	m->m_data += max_linkhdr;
+
+#ifdef MAC
+	mac_inpcb_create_mbuf(inp, m);
+#endif
+
+#ifdef INET6
+	if (isipv6) {
+		hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
+		ip6 = mtod(m, struct ip6_hdr *);
+		th = (struct tcphdr *)(ip6 + 1);
+		tcpip_fillheaders(inp, ip6, th);
+	} else
+#endif
+	{
+		hdrlen = sizeof(struct tcpiphdr);
+		ip = mtod(m, struct ip *);
+		th = (struct tcphdr *)(ip + 1);
+		tcpip_fillheaders(inp, ip, th);
+	}
+	to.to_flags = 0;
+
+	/*
+	 * Send a timestamp and echo-reply if both our side and our peer
+	 * have sent timestamps in our SYN's and this is not a RST.
+	 */
+	if (tw->t_recent && flags == TH_ACK) {
+		to.to_flags |= TOF_TS;
+		to.to_tsval = ticks + tw->ts_offset;
+		to.to_tsecr = tw->t_recent;
+	}
+	optlen = tcp_addoptions(&to, (u_char *)(th + 1));
+
+	m->m_len = hdrlen + optlen;
+	m->m_pkthdr.len = m->m_len;
+
+	KASSERT(max_linkhdr + m->m_len <= MHLEN, ("tcptw: mbuf too small"));
+
+	th->th_seq = htonl(tw->snd_nxt);
+	th->th_ack = htonl(tw->rcv_nxt);
+	th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
+	th->th_flags = flags;
+	th->th_win = htons(tw->last_win);
+
+#ifdef INET6
+	if (isipv6) {
+		th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
+		    sizeof(struct tcphdr) + optlen);
+		ip6->ip6_hlim = in6_selecthlim(inp, NULL);
+		error = ip6_output(m, inp->in6p_outputopts, NULL,
+		    (tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp);
+	} else
+#endif
+	{
+		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+		    htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP));
+		m->m_pkthdr.csum_flags = CSUM_TCP;
+		m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+		ip->ip_len = m->m_pkthdr.len;
+		if (V_path_mtu_discovery)
+			ip->ip_off |= IP_DF;
+		error = ip_output(m, inp->inp_options, NULL,
+		    ((tw->tw_so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0),
+		    NULL, inp);
+	}
+	if (flags & TH_ACK)
+		TCPSTAT_INC(tcps_sndacks);
+	else
+		TCPSTAT_INC(tcps_sndctrl);
+	TCPSTAT_INC(tcps_sndtotal);
+	return (error);
+}
+
+static void
+tcp_tw_2msl_reset(struct tcptw *tw, int rearm)
+{
+
+	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_WLOCK_ASSERT(tw->tw_inpcb);
+	if (rearm)
+		TAILQ_REMOVE(&V_twq_2msl, tw, tw_2msl);
+	tw->tw_time = ticks + 2 * tcp_msl;
+	TAILQ_INSERT_TAIL(&V_twq_2msl, tw, tw_2msl);
+}
+
+static void
+tcp_tw_2msl_stop(struct tcptw *tw)
+{
+
+	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	TAILQ_REMOVE(&V_twq_2msl, tw, tw_2msl);
+}
+
+struct tcptw *
+tcp_tw_2msl_scan(int reuse)
+{
+	struct tcptw *tw;
+
+	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	for (;;) {
+		tw = TAILQ_FIRST(&V_twq_2msl);
+		if (tw == NULL || (!reuse && (tw->tw_time - ticks) > 0))
+			break;
+		INP_WLOCK(tw->tw_inpcb);
+		tcp_twclose(tw, reuse);
+		if (reuse)
+			return (tw);
+	}
+	return (NULL);
+}
diff --git a/freebsd/sys/netinet/tcp_usrreq.c b/freebsd/sys/netinet/tcp_usrreq.c
new file mode 100644
index 00000000..fc083e05
--- /dev/null
+++ b/freebsd/sys/netinet/tcp_usrreq.c
@@ -0,0 +1,1886 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.
+ * Copyright (c) 2006-2007 Robert N. M. Watson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	From: @(#)tcp_usrreq.c	8.2 (Berkeley) 1/3/94
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_ddb.h>
+#include <freebsd/local/opt_inet.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_tcpdebug.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/mbuf.h>
+#ifdef INET6
+#include <freebsd/sys/domain.h>
+#endif /* INET6 */
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/jail.h>
+
+#ifdef DDB
+#include <freebsd/ddb/ddb.h>
+#endif
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/route.h>
+#include <freebsd/net/vnet.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_systm.h>
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#endif
+#include <freebsd/netinet/in_pcb.h>
+#ifdef INET6
+#include <freebsd/netinet6/in6_pcb.h>
+#endif
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip_var.h>
+#ifdef INET6
+#include <freebsd/netinet6/ip6_var.h>
+#include <freebsd/netinet6/scope6_var.h>
+#endif
+#include <freebsd/netinet/tcp.h>
+#include <freebsd/netinet/tcp_fsm.h>
+#include <freebsd/netinet/tcp_seq.h>
+#include <freebsd/netinet/tcp_timer.h>
+#include <freebsd/netinet/tcp_var.h>
+#include <freebsd/netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <freebsd/netinet/tcp_debug.h>
+#endif
+#include <freebsd/netinet/tcp_offload.h>
+
+/*
+ * TCP protocol interface to socket abstraction.
+ */
+static int	tcp_attach(struct socket *);
+static int	tcp_connect(struct tcpcb *, struct sockaddr *,
+		    struct thread *td);
+#ifdef INET6
+static int	tcp6_connect(struct tcpcb *, struct sockaddr *,
+		    struct thread *td);
+#endif /* INET6 */
+static void	tcp_disconnect(struct tcpcb *);
+static void	tcp_usrclosed(struct tcpcb *);
+static void	tcp_fill_info(struct tcpcb *, struct tcp_info *);
+
+#ifdef TCPDEBUG
+#define	TCPDEBUG0	int ostate = 0
+#define	TCPDEBUG1()	ostate = tp ? tp->t_state : 0
+#define	TCPDEBUG2(req)	if (tp && (so->so_options & SO_DEBUG)) \
+				tcp_trace(TA_USER, ostate, tp, 0, 0, req)
+#else
+#define	TCPDEBUG0
+#define	TCPDEBUG1()
+#define	TCPDEBUG2(req)
+#endif
+
+/*
+ * TCP attaches to socket via pru_attach(), reserving space,
+ * and an internet control block.
+ */
+static int
+tcp_usr_attach(struct socket *so, int proto, struct thread *td)
+{
+	struct inpcb *inp;
+	struct tcpcb *tp = NULL;
+	int error;
+	TCPDEBUG0;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL"));
+	TCPDEBUG1();
+
+	error = tcp_attach(so);
+	if (error)
+		goto out;
+
+	if ((so->so_options & SO_LINGER) && so->so_linger == 0)
+		so->so_linger = TCP_LINGERTIME;
+
+	inp = sotoinpcb(so);
+	tp = intotcpcb(inp);
+out:
+	TCPDEBUG2(PRU_ATTACH);
+	return error;
+}
+
+/*
+ * tcp_detach is called when the socket layer loses its final reference
+ * to the socket, be it a file descriptor reference, a reference from TCP,
+ * etc.  At this point, there is only one case in which we will keep around
+ * inpcb state: time wait.
+ *
+ * This function can probably be re-absorbed back into tcp_usr_detach() now
+ * that there is a single detach path.
+ */
+static void
+tcp_detach(struct socket *so, struct inpcb *inp)
+{
+	struct tcpcb *tp;
+
+	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_WLOCK_ASSERT(inp);
+
+	KASSERT(so->so_pcb == inp, ("tcp_detach: so_pcb != inp"));
+	KASSERT(inp->inp_socket == so, ("tcp_detach: inp_socket != so"));
+
+	tp = intotcpcb(inp);
+
+	if (inp->inp_flags & INP_TIMEWAIT) {
+		/*
+		 * There are two cases to handle: one in which the time wait
+		 * state is being discarded (INP_DROPPED), and one in which
+		 * this connection will remain in timewait.  In the former,
+		 * it is time to discard all state (except tcptw, which has
+		 * already been discarded by the timewait close code, which
+		 * should be further up the call stack somewhere).  In the
+		 * latter case, we detach from the socket, but leave the pcb
+		 * present until timewait ends.
+		 *
+		 * XXXRW: Would it be cleaner to free the tcptw here?
+		 */
+		if (inp->inp_flags & INP_DROPPED) {
+			KASSERT(tp == NULL, ("tcp_detach: INP_TIMEWAIT && "
+			    "INP_DROPPED && tp != NULL"));
+			in_pcbdetach(inp);
+			in_pcbfree(inp);
+		} else {
+			in_pcbdetach(inp);
+			INP_WUNLOCK(inp);
+		}
+	} else {
+		/*
+		 * If the connection is not in timewait, we consider two
+		 * two conditions: one in which no further processing is
+		 * necessary (dropped || embryonic), and one in which TCP is
+		 * not yet done, but no longer requires the socket, so the
+		 * pcb will persist for the time being.
+		 *
+		 * XXXRW: Does the second case still occur?
+		 */
+		if (inp->inp_flags & INP_DROPPED ||
+		    tp->t_state < TCPS_SYN_SENT) {
+			tcp_discardcb(tp);
+			in_pcbdetach(inp);
+			in_pcbfree(inp);
+		} else
+			in_pcbdetach(inp);
+	}
+}
+
+/*
+ * pru_detach() detaches the TCP protocol from the socket.
+ * If the protocol state is non-embryonic, then can't
+ * do this directly: have to initiate a pru_disconnect(),
+ * which may finish later; embryonic TCB's can just
+ * be discarded here.
+ */
+static void
+tcp_usr_detach(struct socket *so)
+{
+	struct inpcb *inp;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL"));
+	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_WLOCK(inp);
+	KASSERT(inp->inp_socket != NULL,
+	    ("tcp_usr_detach: inp_socket == NULL"));
+	tcp_detach(so, inp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+}
+
+/*
+ * Give the socket an address.
+ */
+static int
+tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+	int error = 0;
+	struct inpcb *inp;
+	struct tcpcb *tp = NULL;
+	struct sockaddr_in *sinp;
+
+	sinp = (struct sockaddr_in *)nam;
+	if (nam->sa_len != sizeof (*sinp))
+		return (EINVAL);
+	/*
+	 * Must check for multicast addresses and disallow binding
+	 * to them.
+	 */
+	if (sinp->sin_family == AF_INET &&
+	    IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
+		return (EAFNOSUPPORT);
+
+	TCPDEBUG0;
+	INP_INFO_WLOCK(&V_tcbinfo);
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL"));
+	INP_WLOCK(inp);
+	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = EINVAL;
+		goto out;
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
+	error = in_pcbbind(inp, nam, td->td_ucred);
+out:
+	TCPDEBUG2(PRU_BIND);
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+
+	return (error);
+}
+
+#ifdef INET6
+static int
+tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+	int error = 0;
+	struct inpcb *inp;
+	struct tcpcb *tp = NULL;
+	struct sockaddr_in6 *sin6p;
+
+	sin6p = (struct sockaddr_in6 *)nam;
+	if (nam->sa_len != sizeof (*sin6p))
+		return (EINVAL);
+	/*
+	 * Must check for multicast addresses and disallow binding
+	 * to them.
+	 */
+	if (sin6p->sin6_family == AF_INET6 &&
+	    IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
+		return (EAFNOSUPPORT);
+
+	TCPDEBUG0;
+	INP_INFO_WLOCK(&V_tcbinfo);
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL"));
+	INP_WLOCK(inp);
+	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = EINVAL;
+		goto out;
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
+	inp->inp_vflag &= ~INP_IPV4;
+	inp->inp_vflag |= INP_IPV6;
+	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
+		if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr))
+			inp->inp_vflag |= INP_IPV4;
+		else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
+			struct sockaddr_in sin;
+
+			in6_sin6_2_sin(&sin, sin6p);
+			inp->inp_vflag |= INP_IPV4;
+			inp->inp_vflag &= ~INP_IPV6;
+			error = in_pcbbind(inp, (struct sockaddr *)&sin,
+			    td->td_ucred);
+			goto out;
+		}
+	}
+	error = in6_pcbbind(inp, nam, td->td_ucred);
+out:
+	TCPDEBUG2(PRU_BIND);
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+	return (error);
+}
+#endif /* INET6 */
+
+/*
+ * Prepare to accept connections.
+ */
+static int
+tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
+{
+	int error = 0;
+	struct inpcb *inp;
+	struct tcpcb *tp = NULL;
+
+	TCPDEBUG0;
+	INP_INFO_WLOCK(&V_tcbinfo);
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL"));
+	INP_WLOCK(inp);
+	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = EINVAL;
+		goto out;
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
+	SOCK_LOCK(so);
+	error = solisten_proto_check(so);
+	if (error == 0 && inp->inp_lport == 0)
+		error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
+	if (error == 0) {
+		tp->t_state = TCPS_LISTEN;
+		solisten_proto(so, backlog);
+		tcp_offload_listen_open(tp);
+	}
+	SOCK_UNLOCK(so);
+
+out:
+	TCPDEBUG2(PRU_LISTEN);
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+	return (error);
+}
+
+#ifdef INET6
+static int
+tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
+{
+	int error = 0;
+	struct inpcb *inp;
+	struct tcpcb *tp = NULL;
+
+	TCPDEBUG0;
+	INP_INFO_WLOCK(&V_tcbinfo);
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL"));
+	INP_WLOCK(inp);
+	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = EINVAL;
+		goto out;
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
+	SOCK_LOCK(so);
+	error = solisten_proto_check(so);
+	if (error == 0 && inp->inp_lport == 0) {
+		inp->inp_vflag &= ~INP_IPV4;
+		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
+			inp->inp_vflag |= INP_IPV4;
+		error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
+	}
+	if (error == 0) {
+		tp->t_state = TCPS_LISTEN;
+		solisten_proto(so, backlog);
+	}
+	SOCK_UNLOCK(so);
+
+out:
+	TCPDEBUG2(PRU_LISTEN);
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+	return (error);
+}
+#endif /* INET6 */
+
+/*
+ * Initiate connection to peer.
+ * Create a template for use in transmissions on this connection.
+ * Enter SYN_SENT state, and mark socket as connecting.
+ * Start keep-alive timer, and seed output sequence space.
+ * Send initial segment on connection.
+ */
+static int
+tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+	int error = 0;
+	struct inpcb *inp;
+	struct tcpcb *tp = NULL;
+	struct sockaddr_in *sinp;
+
+	sinp = (struct sockaddr_in *)nam;
+	if (nam->sa_len != sizeof (*sinp))
+		return (EINVAL);
+	/*
+	 * Must disallow TCP ``connections'' to multicast addresses.
+	 */
+	if (sinp->sin_family == AF_INET
+	    && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
+		return (EAFNOSUPPORT);
+	if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr)) != 0)
+		return (error);
+
+	TCPDEBUG0;
+	INP_INFO_WLOCK(&V_tcbinfo);
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
+	INP_WLOCK(inp);
+	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = EINVAL;
+		goto out;
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
+	if ((error = tcp_connect(tp, nam, td)) != 0)
+		goto out;
+	error = tcp_output_connect(so, nam);
+out:
+	TCPDEBUG2(PRU_CONNECT);
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+	return (error);
+}
+
+#ifdef INET6
+static int
+tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+	int error = 0;
+	struct inpcb *inp;
+	struct tcpcb *tp = NULL;
+	struct sockaddr_in6 *sin6p;
+
+	TCPDEBUG0;
+
+	sin6p = (struct sockaddr_in6 *)nam;
+	if (nam->sa_len != sizeof (*sin6p))
+		return (EINVAL);
+	/*
+	 * Must disallow TCP ``connections'' to multicast addresses.
+	 */
+	if (sin6p->sin6_family == AF_INET6
+	    && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
+		return (EAFNOSUPPORT);
+
+	INP_INFO_WLOCK(&V_tcbinfo);
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
+	INP_WLOCK(inp);
+	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = EINVAL;
+		goto out;
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
+	if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
+		struct sockaddr_in sin;
+
+		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
+			error = EINVAL;
+			goto out;
+		}
+
+		in6_sin6_2_sin(&sin, sin6p);
+		inp->inp_vflag |= INP_IPV4;
+		inp->inp_vflag &= ~INP_IPV6;
+		if ((error = prison_remote_ip4(td->td_ucred,
+		    &sin.sin_addr)) != 0)
+			goto out;
+		if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0)
+			goto out;
+		error = tcp_output_connect(so, nam);
+		goto out;
+	}
+	inp->inp_vflag &= ~INP_IPV4;
+	inp->inp_vflag |= INP_IPV6;
+	inp->inp_inc.inc_flags |= INC_ISIPV6;
+	if ((error = prison_remote_ip6(td->td_ucred, &sin6p->sin6_addr)) != 0)
+		goto out;
+	if ((error = tcp6_connect(tp, nam, td)) != 0)
+		goto out;
+	error = tcp_output_connect(so, nam);
+
+out:
+	TCPDEBUG2(PRU_CONNECT);
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+	return (error);
+}
+#endif /* INET6 */
+
+/*
+ * Initiate disconnect from peer.
+ * If connection never passed embryonic stage, just drop;
+ * else if don't need to let data drain, then can just drop anyways,
+ * else have to begin TCP shutdown process: mark socket disconnecting,
+ * drain unread data, state switch to reflect user close, and
+ * send segment (e.g. FIN) to peer.  Socket will be really disconnected
+ * when peer sends FIN and acks ours.
+ *
+ * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
+ */
+static int
+tcp_usr_disconnect(struct socket *so)
+{
+	struct inpcb *inp;
+	struct tcpcb *tp = NULL;
+	int error = 0;
+
+	TCPDEBUG0;
+	INP_INFO_WLOCK(&V_tcbinfo);
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
+	INP_WLOCK(inp);
+	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = ECONNRESET;
+		goto out;
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
+	tcp_disconnect(tp);
+out:
+	TCPDEBUG2(PRU_DISCONNECT);
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+	return (error);
+}
+
+/*
+ * Accept a connection.  Essentially all the work is done at higher levels;
+ * just return the address of the peer, storing through addr.
+ *
+ * The rationale for acquiring the tcbinfo lock here is somewhat complicated,
+ * and is described in detail in the commit log entry for r175612.  Acquiring
+ * it delays an accept(2) racing with sonewconn(), which inserts the socket
+ * before the inpcb address/port fields are initialized.  A better fix would
+ * prevent the socket from being placed in the listen queue until all fields
+ * are fully initialized.
+ */
+static int
+tcp_usr_accept(struct socket *so, struct sockaddr **nam)
+{
+	int error = 0;
+	struct inpcb *inp = NULL;
+	struct tcpcb *tp = NULL;
+	struct in_addr addr;
+	in_port_t port = 0;
+	TCPDEBUG0;
+
+	if (so->so_state & SS_ISDISCONNECTED)
+		return (ECONNABORTED);
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL"));
+	INP_INFO_RLOCK(&V_tcbinfo);
+	INP_WLOCK(inp);
+	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = ECONNABORTED;
+		goto out;
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
+
+	/*
+	 * We inline in_getpeeraddr and COMMON_END here, so that we can
+	 * copy the data of interest and defer the malloc until after we
+	 * release the lock.
+	 */
+	port = inp->inp_fport;
+	addr = inp->inp_faddr;
+
+out:
+	TCPDEBUG2(PRU_ACCEPT);
+	INP_WUNLOCK(inp);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
+	if (error == 0)
+		*nam = in_sockaddr(port, &addr);
+	return error;
+}
+
+#ifdef INET6
+static int
+tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
+{
+	struct inpcb *inp = NULL;
+	int error = 0;
+	struct tcpcb *tp = NULL;
+	struct in_addr addr;
+	struct in6_addr addr6;
+	in_port_t port = 0;
+	int v4 = 0;
+	TCPDEBUG0;
+
+	if (so->so_state & SS_ISDISCONNECTED)
+		return (ECONNABORTED);
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
+	INP_WLOCK(inp);
+	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = ECONNABORTED;
+		goto out;
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
+
+	/*
+	 * We inline in6_mapped_peeraddr and COMMON_END here, so that we can
+	 * copy the data of interest and defer the malloc until after we
+	 * release the lock.
+	 */
+	if (inp->inp_vflag & INP_IPV4) {
+		v4 = 1;
+		port = inp->inp_fport;
+		addr = inp->inp_faddr;
+	} else {
+		port = inp->inp_fport;
+		addr6 = inp->in6p_faddr;
+	}
+
+out:
+	TCPDEBUG2(PRU_ACCEPT);
+	INP_WUNLOCK(inp);
+	if (error == 0) {
+		if (v4)
+			*nam = in6_v4mapsin6_sockaddr(port, &addr);
+		else
+			*nam = in6_sockaddr(port, &addr6);
+	}
+	return error;
+}
+#endif /* INET6 */
+
+/*
+ * Mark the connection as being incapable of further output.
+ */
+static int
+tcp_usr_shutdown(struct socket *so)
+{
+	int error = 0;
+	struct inpcb *inp;
+	struct tcpcb *tp = NULL;
+
+	TCPDEBUG0;
+	INP_INFO_WLOCK(&V_tcbinfo);
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("inp == NULL"));
+	INP_WLOCK(inp);
+	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = ECONNRESET;
+		goto out;
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
+	socantsendmore(so);
+	tcp_usrclosed(tp);
+	if (!(inp->inp_flags & INP_DROPPED))
+		error = tcp_output_disconnect(tp);
+
+out:
+	TCPDEBUG2(PRU_SHUTDOWN);
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+
+	return (error);
+}
+
+/*
+ * After a receive, possibly send window update to peer.
+ */
+static int
+tcp_usr_rcvd(struct socket *so, int flags)
+{
+	struct inpcb *inp;
+	struct tcpcb *tp = NULL;
+	int error = 0;
+
+	TCPDEBUG0;
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL"));
+	INP_WLOCK(inp);
+	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = ECONNRESET;
+		goto out;
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
+	tcp_output_rcvd(tp);
+
+out:
+	TCPDEBUG2(PRU_RCVD);
+	INP_WUNLOCK(inp);
+	return (error);
+}
+
+/*
+ * Do a send by putting data in output queue and updating urgent
+ * marker if URG set.  Possibly send more data.  Unlike the other
+ * pru_*() routines, the mbuf chains are our responsibility.  We
+ * must either enqueue them or free them.  The other pru_* routines
+ * generally are caller-frees.
+ */
+static int
+tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
+    struct sockaddr *nam, struct mbuf *control, struct thread *td)
+{
+	int error = 0;
+	struct inpcb *inp;
+	struct tcpcb *tp = NULL;
+	int headlocked = 0;
+#ifdef INET6
+	int isipv6;
+#endif
+	TCPDEBUG0;
+
+	/*
+	 * We require the pcbinfo lock in two cases:
+	 *
+	 * (1) An implied connect is taking place, which can result in
+	 *     binding IPs and ports and hence modification of the pcb hash
+	 *     chains.
+	 *
+	 * (2) PRUS_EOF is set, resulting in explicit close on the send.
+	 */
+	if ((nam != NULL) || (flags & PRUS_EOF)) {
+		INP_INFO_WLOCK(&V_tcbinfo);
+		headlocked = 1;
+	}
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
+	INP_WLOCK(inp);
+	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+		if (control)
+			m_freem(control);
+		if (m)
+			m_freem(m);
+		error = ECONNRESET;
+		goto out;
+	}
+#ifdef INET6
+	isipv6 = nam && nam->sa_family == AF_INET6;
+#endif /* INET6 */
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
+	if (control) {
+		/* TCP doesn't do control messages (rights, creds, etc) */
+		if (control->m_len) {
+			m_freem(control);
+			if (m)
+				m_freem(m);
+			error = EINVAL;
+			goto out;
+		}
+		m_freem(control);	/* empty control, just free it */
+	}
+	if (!(flags & PRUS_OOB)) {
+		sbappendstream(&so->so_snd, m);
+		if (nam && tp->t_state < TCPS_SYN_SENT) {
+			/*
+			 * Do implied connect if not yet connected,
+			 * initialize window to default value, and
+			 * initialize maxseg/maxopd using peer's cached
+			 * MSS.
+			 */
+			INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+#ifdef INET6
+			if (isipv6)
+				error = tcp6_connect(tp, nam, td);
+			else
+#endif /* INET6 */
+			error = tcp_connect(tp, nam, td);
+			if (error)
+				goto out;
+			tp->snd_wnd = TTCP_CLIENT_SND_WND;
+			tcp_mss(tp, -1);
+		}
+		if (flags & PRUS_EOF) {
+			/*
+			 * Close the send side of the connection after
+			 * the data is sent.
+			 */
+			INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+			socantsendmore(so);
+			tcp_usrclosed(tp);
+		}
+		if (headlocked) {
+			INP_INFO_WUNLOCK(&V_tcbinfo);
+			headlocked = 0;
+		}
+		if (!(inp->inp_flags & INP_DROPPED)) {
+			if (flags & PRUS_MORETOCOME)
+				tp->t_flags |= TF_MORETOCOME;
+			error = tcp_output_send(tp);
+			if (flags & PRUS_MORETOCOME)
+				tp->t_flags &= ~TF_MORETOCOME;
+		}
+	} else {
+		/*
+		 * XXXRW: PRUS_EOF not implemented with PRUS_OOB?
+		 */
+		SOCKBUF_LOCK(&so->so_snd);
+		if (sbspace(&so->so_snd) < -512) {
+			SOCKBUF_UNLOCK(&so->so_snd);
+			m_freem(m);
+			error = ENOBUFS;
+			goto out;
+		}
+		/*
+		 * According to RFC961 (Assigned Protocols),
+		 * the urgent pointer points to the last octet
+		 * of urgent data.  We continue, however,
+		 * to consider it to indicate the first octet
+		 * of data past the urgent section.
+		 * Otherwise, snd_up should be one lower.
+		 */
+		sbappendstream_locked(&so->so_snd, m);
+		SOCKBUF_UNLOCK(&so->so_snd);
+		if (nam && tp->t_state < TCPS_SYN_SENT) {
+			/*
+			 * Do implied connect if not yet connected,
+			 * initialize window to default value, and
+			 * initialize maxseg/maxopd using peer's cached
+			 * MSS.
+			 */
+			INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+#ifdef INET6
+			if (isipv6)
+				error = tcp6_connect(tp, nam, td);
+			else
+#endif /* INET6 */
+			error = tcp_connect(tp, nam, td);
+			if (error)
+				goto out;
+			tp->snd_wnd = TTCP_CLIENT_SND_WND;
+			tcp_mss(tp, -1);
+			INP_INFO_WUNLOCK(&V_tcbinfo);
+			headlocked = 0;
+		} else if (nam) {
+			INP_INFO_WUNLOCK(&V_tcbinfo);
+			headlocked = 0;
+		}
+		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
+		tp->t_flags |= TF_FORCEDATA;
+		error = tcp_output_send(tp);
+		tp->t_flags &= ~TF_FORCEDATA;
+	}
+out:
+	TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB :
+		  ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
+	INP_WUNLOCK(inp);
+	if (headlocked)
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+	return (error);
+}
+
+/*
+ * Abort the TCP.  Drop the connection abruptly.
+ */
+static void
+tcp_usr_abort(struct socket *so)
+{
+	struct inpcb *inp;
+	struct tcpcb *tp = NULL;
+	TCPDEBUG0;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
+
+	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_WLOCK(inp);
+	KASSERT(inp->inp_socket != NULL,
+	    ("tcp_usr_abort: inp_socket == NULL"));
+
+	/*
+	 * If we still have full TCP state, and we're not dropped, drop.
+	 */
+	if (!(inp->inp_flags & INP_TIMEWAIT) &&
+	    !(inp->inp_flags & INP_DROPPED)) {
+		tp = intotcpcb(inp);
+		TCPDEBUG1();
+		tcp_drop(tp, ECONNABORTED);
+		TCPDEBUG2(PRU_ABORT);
+	}
+	if (!(inp->inp_flags & INP_DROPPED)) {
+		SOCK_LOCK(so);
+		so->so_state |= SS_PROTOREF;
+		SOCK_UNLOCK(so);
+		inp->inp_flags |= INP_SOCKREF;
+	}
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+}
+
+/*
+ * TCP socket is closed.  Start friendly disconnect.
+ */
+static void
+tcp_usr_close(struct socket *so)
+{
+	struct inpcb *inp;
+	struct tcpcb *tp = NULL;
+	TCPDEBUG0;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
+
+	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_WLOCK(inp);
+	KASSERT(inp->inp_socket != NULL,
+	    ("tcp_usr_close: inp_socket == NULL"));
+
+	/*
+	 * If we still have full TCP state, and we're not dropped, initiate
+	 * a disconnect.
+	 */
+	if (!(inp->inp_flags & INP_TIMEWAIT) &&
+	    !(inp->inp_flags & INP_DROPPED)) {
+		tp = intotcpcb(inp);
+		TCPDEBUG1();
+		tcp_disconnect(tp);
+		TCPDEBUG2(PRU_CLOSE);
+	}
+	if (!(inp->inp_flags & INP_DROPPED)) {
+		SOCK_LOCK(so);
+		so->so_state |= SS_PROTOREF;
+		SOCK_UNLOCK(so);
+		inp->inp_flags |= INP_SOCKREF;
+	}
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+}
+
+/*
+ * Receive out-of-band data.
+ */
+static int
+tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
+{
+	int error = 0;
+	struct inpcb *inp;
+	struct tcpcb *tp = NULL;
+
+	TCPDEBUG0;
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL"));
+	INP_WLOCK(inp);
+	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = ECONNRESET;
+		goto out;
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
+	if ((so->so_oobmark == 0 &&
+	     (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
+	    so->so_options & SO_OOBINLINE ||
+	    tp->t_oobflags & TCPOOB_HADDATA) {
+		error = EINVAL;
+		goto out;
+	}
+	if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
+		error = EWOULDBLOCK;
+		goto out;
+	}
+	m->m_len = 1;
+	*mtod(m, caddr_t) = tp->t_iobc;
+	if ((flags & MSG_PEEK) == 0)
+		tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
+
+out:
+	TCPDEBUG2(PRU_RCVOOB);
+	INP_WUNLOCK(inp);
+	return (error);
+}
+
+struct pr_usrreqs tcp_usrreqs = {
+	.pru_abort =		tcp_usr_abort,
+	.pru_accept =		tcp_usr_accept,
+	.pru_attach =		tcp_usr_attach,
+	.pru_bind =		tcp_usr_bind,
+	.pru_connect =		tcp_usr_connect,
+	.pru_control =		in_control,
+	.pru_detach =		tcp_usr_detach,
+	.pru_disconnect =	tcp_usr_disconnect,
+	.pru_listen =		tcp_usr_listen,
+	.pru_peeraddr =		in_getpeeraddr,
+	.pru_rcvd =		tcp_usr_rcvd,
+	.pru_rcvoob =		tcp_usr_rcvoob,
+	.pru_send =		tcp_usr_send,
+	.pru_shutdown =		tcp_usr_shutdown,
+	.pru_sockaddr =		in_getsockaddr,
+#if 0
+	.pru_soreceive =	soreceive_stream,
+#endif
+	.pru_sosetlabel =	in_pcbsosetlabel,
+	.pru_close =		tcp_usr_close,
+};
+
+#ifdef INET6
+struct pr_usrreqs tcp6_usrreqs = {
+	.pru_abort =		tcp_usr_abort,
+	.pru_accept =		tcp6_usr_accept,
+	.pru_attach =		tcp_usr_attach,
+	.pru_bind =		tcp6_usr_bind,
+	.pru_connect =		tcp6_usr_connect,
+	.pru_control =		in6_control,
+	.pru_detach =		tcp_usr_detach,
+	.pru_disconnect =	tcp_usr_disconnect,
+	.pru_listen =		tcp6_usr_listen,
+	.pru_peeraddr =		in6_mapped_peeraddr,
+	.pru_rcvd =		tcp_usr_rcvd,
+	.pru_rcvoob =		tcp_usr_rcvoob,
+	.pru_send =		tcp_usr_send,
+	.pru_shutdown =		tcp_usr_shutdown,
+	.pru_sockaddr =		in6_mapped_sockaddr,
+#if 0
+	.pru_soreceive =	soreceive_stream,
+#endif
+ 	.pru_sosetlabel =	in_pcbsosetlabel,
+	.pru_close =		tcp_usr_close,
+};
+#endif /* INET6 */
+
+/*
+ * Common subroutine to open a TCP connection to remote host specified
+ * by struct sockaddr_in in mbuf *nam.  Call in_pcbbind to assign a local
+ * port number if needed.  Call in_pcbconnect_setup to do the routing and
+ * to choose a local host address (interface).  If there is an existing
+ * incarnation of the same connection in TIME-WAIT state and if the remote
+ * host was sending CC options and if the connection duration was < MSL, then
+ * truncate the previous TIME-WAIT state and proceed.
+ * Initialize connection parameters and enter SYN-SENT state.
+ */
+static int
+tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
+{
+	struct inpcb *inp = tp->t_inpcb, *oinp;
+	struct socket *so = inp->inp_socket;
+	struct in_addr laddr;
+	u_short lport;
+	int error;
+
+	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_WLOCK_ASSERT(inp);
+
+	if (inp->inp_lport == 0) {
+		error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
+		if (error)
+			return error;
+	}
+
+	/*
+	 * Cannot simply call in_pcbconnect, because there might be an
+	 * earlier incarnation of this same connection still in
+	 * TIME_WAIT state, creating an ADDRINUSE error.
+	 */
+	laddr = inp->inp_laddr;
+	lport = inp->inp_lport;
+	error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport,
+	    &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred);
+	if (error && oinp == NULL)
+		return error;
+	if (oinp)
+		return EADDRINUSE;
+	inp->inp_laddr = laddr;
+	in_pcbrehash(inp);
+
+	/*
+	 * Compute window scaling to request:
+	 * Scale to fit into sweet spot.  See tcp_syncache.c.
+	 * XXX: This should move to tcp_output().
+	 */
+	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
+	    (TCP_MAXWIN << tp->request_r_scale) < sb_max)
+		tp->request_r_scale++;
+
+	soisconnecting(so);
+	TCPSTAT_INC(tcps_connattempt);
+	tp->t_state = TCPS_SYN_SENT;
+	tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
+	tp->iss = tcp_new_isn(tp);
+	tp->t_bw_rtseq = tp->iss;
+	tcp_sendseqinit(tp);
+
+	return 0;
+}
+
+#ifdef INET6
+static int
+tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
+{
+	struct inpcb *inp = tp->t_inpcb, *oinp;
+	struct socket *so = inp->inp_socket;
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
+	struct in6_addr addr6;
+	int error;
+
+	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_WLOCK_ASSERT(inp);
+
+	if (inp->inp_lport == 0) {
+		error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
+		if (error)
+			return error;
+	}
+
+	/*
+	 * Cannot simply call in_pcbconnect, because there might be an
+	 * earlier incarnation of this same connection still in
+	 * TIME_WAIT state, creating an ADDRINUSE error.
+	 * in6_pcbladdr() also handles scope zone IDs.
+	 */
+	error = in6_pcbladdr(inp, nam, &addr6);
+	if (error)
+		return error;
+	oinp = in6_pcblookup_hash(inp->inp_pcbinfo,
+				  &sin6->sin6_addr, sin6->sin6_port,
+				  IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
+				  ? &addr6
+				  : &inp->in6p_laddr,
+				  inp->inp_lport,  0, NULL);
+	if (oinp)
+		return EADDRINUSE;
+	if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
+		inp->in6p_laddr = addr6;
+	inp->in6p_faddr = sin6->sin6_addr;
+	inp->inp_fport = sin6->sin6_port;
+	/* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
+	inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
+	if (inp->inp_flags & IN6P_AUTOFLOWLABEL)
+		inp->inp_flow |=
+		    (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
+	in_pcbrehash(inp);
+
+	/* Compute window scaling to request.  */
+	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
+	    (TCP_MAXWIN << tp->request_r_scale) < sb_max)
+		tp->request_r_scale++;
+
+	soisconnecting(so);
+	TCPSTAT_INC(tcps_connattempt);
+	tp->t_state = TCPS_SYN_SENT;
+	tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
+	tp->iss = tcp_new_isn(tp);
+	tp->t_bw_rtseq = tp->iss;
+	tcp_sendseqinit(tp);
+
+	return 0;
+}
+#endif /* INET6 */
+
+/*
+ * Export TCP internal state information via a struct tcp_info, based on the
+ * Linux 2.6 API.  Not ABI compatible as our constants are mapped differently
+ * (TCP state machine, etc).  We export all information using FreeBSD-native
+ * constants -- for example, the numeric values for tcpi_state will differ
+ * from Linux.
+ */
+static void
+tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
+{
+
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+	bzero(ti, sizeof(*ti));
+
+	ti->tcpi_state = tp->t_state;
+	if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
+		ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
+	if (tp->t_flags & TF_SACK_PERMIT)
+		ti->tcpi_options |= TCPI_OPT_SACK;
+	if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
+		ti->tcpi_options |= TCPI_OPT_WSCALE;
+		ti->tcpi_snd_wscale = tp->snd_scale;
+		ti->tcpi_rcv_wscale = tp->rcv_scale;
+	}
+
+	ti->tcpi_rto = tp->t_rxtcur * tick;
+	ti->tcpi_last_data_recv = (long)(ticks - (int)tp->t_rcvtime) * tick;
+	ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
+	ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
+
+	ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
+	ti->tcpi_snd_cwnd = tp->snd_cwnd;
+
+	/*
+	 * FreeBSD-specific extension fields for tcp_info.
+	 */
+	ti->tcpi_rcv_space = tp->rcv_wnd;
+	ti->tcpi_rcv_nxt = tp->rcv_nxt;
+	ti->tcpi_snd_wnd = tp->snd_wnd;
+	ti->tcpi_snd_bwnd = tp->snd_bwnd;
+	ti->tcpi_snd_nxt = tp->snd_nxt;
+	ti->tcpi_snd_mss = tp->t_maxseg;
+	ti->tcpi_rcv_mss = tp->t_maxseg;
+	if (tp->t_flags & TF_TOE)
+		ti->tcpi_options |= TCPI_OPT_TOE;
+}
+
+/*
+ * tcp_ctloutput() must drop the inpcb lock before performing copyin on
+ * socket option arguments.  When it re-acquires the lock after the copy, it
+ * has to revalidate that the connection is still valid for the socket
+ * option.
+ */
+#define INP_WLOCK_RECHECK(inp) do {					\
+	INP_WLOCK(inp);							\
+	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {		\
+		INP_WUNLOCK(inp);					\
+		return (ECONNRESET);					\
+	}								\
+	tp = intotcpcb(inp);						\
+} while(0)
+
+int
+tcp_ctloutput(struct socket *so, struct sockopt *sopt)
+{
+	int	error, opt, optval;
+	struct	inpcb *inp;
+	struct	tcpcb *tp;
+	struct	tcp_info ti;
+
+	error = 0;
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL"));
+	INP_WLOCK(inp);
+	if (sopt->sopt_level != IPPROTO_TCP) {
+#ifdef INET6
+		if (inp->inp_vflag & INP_IPV6PROTO) {
+			INP_WUNLOCK(inp);
+			error = ip6_ctloutput(so, sopt);
+		} else {
+#endif /* INET6 */
+			INP_WUNLOCK(inp);
+			error = ip_ctloutput(so, sopt);
+#ifdef INET6
+		}
+#endif
+		return (error);
+	}
+	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+		INP_WUNLOCK(inp);
+		return (ECONNRESET);
+	}
+
+	switch (sopt->sopt_dir) {
+	case SOPT_SET:
+		switch (sopt->sopt_name) {
+#ifdef TCP_SIGNATURE
+		case TCP_MD5SIG:
+			INP_WUNLOCK(inp);
+			error = sooptcopyin(sopt, &optval, sizeof optval,
+			    sizeof optval);
+			if (error)
+				return (error);
+
+			INP_WLOCK_RECHECK(inp);
+			if (optval > 0)
+				tp->t_flags |= TF_SIGNATURE;
+			else
+				tp->t_flags &= ~TF_SIGNATURE;
+			INP_WUNLOCK(inp);
+			break;
+#endif /* TCP_SIGNATURE */
+		case TCP_NODELAY:
+		case TCP_NOOPT:
+			INP_WUNLOCK(inp);
+			error = sooptcopyin(sopt, &optval, sizeof optval,
+			    sizeof optval);
+			if (error)
+				return (error);
+
+			INP_WLOCK_RECHECK(inp);
+			switch (sopt->sopt_name) {
+			case TCP_NODELAY:
+				opt = TF_NODELAY;
+				break;
+			case TCP_NOOPT:
+				opt = TF_NOOPT;
+				break;
+			default:
+				opt = 0; /* dead code to fool gcc */
+				break;
+			}
+
+			if (optval)
+				tp->t_flags |= opt;
+			else
+				tp->t_flags &= ~opt;
+			INP_WUNLOCK(inp);
+			break;
+
+		case TCP_NOPUSH:
+			INP_WUNLOCK(inp);
+			error = sooptcopyin(sopt, &optval, sizeof optval,
+			    sizeof optval);
+			if (error)
+				return (error);
+
+			INP_WLOCK_RECHECK(inp);
+			if (optval)
+				tp->t_flags |= TF_NOPUSH;
+			else if (tp->t_flags & TF_NOPUSH) {
+				tp->t_flags &= ~TF_NOPUSH;
+				if (TCPS_HAVEESTABLISHED(tp->t_state))
+					error = tcp_output(tp);
+			}
+			INP_WUNLOCK(inp);
+			break;
+
+		case TCP_MAXSEG:
+			INP_WUNLOCK(inp);
+			error = sooptcopyin(sopt, &optval, sizeof optval,
+			    sizeof optval);
+			if (error)
+				return (error);
+
+			INP_WLOCK_RECHECK(inp);
+			if (optval > 0 && optval <= tp->t_maxseg &&
+			    optval + 40 >= V_tcp_minmss)
+				tp->t_maxseg = optval;
+			else
+				error = EINVAL;
+			INP_WUNLOCK(inp);
+			break;
+
+		case TCP_INFO:
+			INP_WUNLOCK(inp);
+			error = EINVAL;
+			break;
+
+		default:
+			INP_WUNLOCK(inp);
+			error = ENOPROTOOPT;
+			break;
+		}
+		break;
+
+	case SOPT_GET:
+		tp = intotcpcb(inp);
+		switch (sopt->sopt_name) {
+#ifdef TCP_SIGNATURE
+		case TCP_MD5SIG:
+			optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0;
+			INP_WUNLOCK(inp);
+			error = sooptcopyout(sopt, &optval, sizeof optval);
+			break;
+#endif
+
+		case TCP_NODELAY:
+			optval = tp->t_flags & TF_NODELAY;
+			INP_WUNLOCK(inp);
+			error = sooptcopyout(sopt, &optval, sizeof optval);
+			break;
+		case TCP_MAXSEG:
+			optval = tp->t_maxseg;
+			INP_WUNLOCK(inp);
+			error = sooptcopyout(sopt, &optval, sizeof optval);
+			break;
+		case TCP_NOOPT:
+			optval = tp->t_flags & TF_NOOPT;
+			INP_WUNLOCK(inp);
+			error = sooptcopyout(sopt, &optval, sizeof optval);
+			break;
+		case TCP_NOPUSH:
+			optval = tp->t_flags & TF_NOPUSH;
+			INP_WUNLOCK(inp);
+			error = sooptcopyout(sopt, &optval, sizeof optval);
+			break;
+		case TCP_INFO:
+			tcp_fill_info(tp, &ti);
+			INP_WUNLOCK(inp);
+			error = sooptcopyout(sopt, &ti, sizeof ti);
+			break;
+		default:
+			INP_WUNLOCK(inp);
+			error = ENOPROTOOPT;
+			break;
+		}
+		break;
+	}
+	return (error);
+}
+#undef INP_WLOCK_RECHECK
+
+/*
+ * tcp_sendspace and tcp_recvspace are the default send and receive window
+ * sizes, respectively.  These are obsolescent (this information should
+ * be set by the route).
+ */
+u_long	tcp_sendspace = 1024*32;
+SYSCTL_ULONG(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW,
+    &tcp_sendspace , 0, "Maximum outgoing TCP datagram size");
+u_long	tcp_recvspace = 1024*64;
+SYSCTL_ULONG(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
+    &tcp_recvspace , 0, "Maximum incoming TCP datagram size");
+
+/*
+ * Attach TCP protocol to socket, allocating
+ * internet protocol control block, tcp control block,
+ * bufer space, and entering LISTEN state if to accept connections.
+ */
+static int
+tcp_attach(struct socket *so)
+{
+	struct tcpcb *tp;
+	struct inpcb *inp;
+	int error;
+
+	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
+		error = soreserve(so, tcp_sendspace, tcp_recvspace);
+		if (error)
+			return (error);
+	}
+	so->so_rcv.sb_flags |= SB_AUTOSIZE;
+	so->so_snd.sb_flags |= SB_AUTOSIZE;
+	INP_INFO_WLOCK(&V_tcbinfo);
+	error = in_pcballoc(so, &V_tcbinfo);
+	if (error) {
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		return (error);
+	}
+	inp = sotoinpcb(so);
+#ifdef INET6
+	if (inp->inp_vflag & INP_IPV6PROTO) {
+		inp->inp_vflag |= INP_IPV6;
+		inp->in6p_hops = -1;	/* use kernel default */
+	}
+	else
+#endif
+	inp->inp_vflag |= INP_IPV4;
+	tp = tcp_newtcpcb(inp);
+	if (tp == NULL) {
+		in_pcbdetach(inp);
+		in_pcbfree(inp);
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		return (ENOBUFS);
+	}
+	tp->t_state = TCPS_CLOSED;
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+	return (0);
+}
+
+/*
+ * Initiate (or continue) disconnect.
+ * If embryonic state, just send reset (once).
+ * If in ``let data drain'' option and linger null, just drop.
+ * Otherwise (hard), mark socket disconnecting and drop
+ * current input data; switch states based on user close, and
+ * send segment to peer (with FIN).
+ */
+static void
+tcp_disconnect(struct tcpcb *tp)
+{
+	struct inpcb *inp = tp->t_inpcb;
+	struct socket *so = inp->inp_socket;
+
+	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_WLOCK_ASSERT(inp);
+
+	/*
+	 * Neither tcp_close() nor tcp_drop() should return NULL, as the
+	 * socket is still open.
+	 */
+	if (tp->t_state < TCPS_ESTABLISHED) {
+		tp = tcp_close(tp);
+		KASSERT(tp != NULL,
+		    ("tcp_disconnect: tcp_close() returned NULL"));
+	} else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
+		tp = tcp_drop(tp, 0);
+		KASSERT(tp != NULL,
+		    ("tcp_disconnect: tcp_drop() returned NULL"));
+	} else {
+		soisdisconnecting(so);
+		sbflush(&so->so_rcv);
+		tcp_usrclosed(tp);
+		if (!(inp->inp_flags & INP_DROPPED))
+			tcp_output_disconnect(tp);
+	}
+}
+
+/*
+ * User issued close, and wish to trail through shutdown states:
+ * if never received SYN, just forget it.  If got a SYN from peer,
+ * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
+ * If already got a FIN from peer, then almost done; go to LAST_ACK
+ * state.  In all other cases, have already sent FIN to peer (e.g.
+ * after PRU_SHUTDOWN), and just have to play tedious game waiting
+ * for peer to send FIN or not respond to keep-alives, etc.
+ * We can let the user exit from the close as soon as the FIN is acked.
+ */
+static void
+tcp_usrclosed(struct tcpcb *tp)
+{
+
+	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	switch (tp->t_state) {
+	case TCPS_LISTEN:
+		tcp_offload_listen_close(tp);
+		/* FALLTHROUGH */
+	case TCPS_CLOSED:
+		tp->t_state = TCPS_CLOSED;
+		tp = tcp_close(tp);
+		/*
+		 * tcp_close() should never return NULL here as the socket is
+		 * still open.
+		 */
+		KASSERT(tp != NULL,
+		    ("tcp_usrclosed: tcp_close() returned NULL"));
+		break;
+
+	case TCPS_SYN_SENT:
+	case TCPS_SYN_RECEIVED:
+		tp->t_flags |= TF_NEEDFIN;
+		break;
+
+	case TCPS_ESTABLISHED:
+		tp->t_state = TCPS_FIN_WAIT_1;
+		break;
+
+	case TCPS_CLOSE_WAIT:
+		tp->t_state = TCPS_LAST_ACK;
+		break;
+	}
+	if (tp->t_state >= TCPS_FIN_WAIT_2) {
+		soisdisconnected(tp->t_inpcb->inp_socket);
+		/* Prevent the connection hanging in FIN_WAIT_2 forever. */
+		if (tp->t_state == TCPS_FIN_WAIT_2) {
+			int timeout;
+
+			timeout = (tcp_fast_finwait2_recycle) ? 
+			    tcp_finwait2_timeout : tcp_maxidle;
+			tcp_timer_activate(tp, TT_2MSL, timeout);
+		}
+	}
+}
+
+#ifdef DDB
+static void
+db_print_indent(int indent)
+{
+	int i;
+
+	for (i = 0; i < indent; i++)
+		db_printf(" ");
+}
+
+static void
+db_print_tstate(int t_state)
+{
+
+	switch (t_state) {
+	case TCPS_CLOSED:
+		db_printf("TCPS_CLOSED");
+		return;
+
+	case TCPS_LISTEN:
+		db_printf("TCPS_LISTEN");
+		return;
+
+	case TCPS_SYN_SENT:
+		db_printf("TCPS_SYN_SENT");
+		return;
+
+	case TCPS_SYN_RECEIVED:
+		db_printf("TCPS_SYN_RECEIVED");
+		return;
+
+	case TCPS_ESTABLISHED:
+		db_printf("TCPS_ESTABLISHED");
+		return;
+
+	case TCPS_CLOSE_WAIT:
+		db_printf("TCPS_CLOSE_WAIT");
+		return;
+
+	case TCPS_FIN_WAIT_1:
+		db_printf("TCPS_FIN_WAIT_1");
+		return;
+
+	case TCPS_CLOSING:
+		db_printf("TCPS_CLOSING");
+		return;
+
+	case TCPS_LAST_ACK:
+		db_printf("TCPS_LAST_ACK");
+		return;
+
+	case TCPS_FIN_WAIT_2:
+		db_printf("TCPS_FIN_WAIT_2");
+		return;
+
+	case TCPS_TIME_WAIT:
+		db_printf("TCPS_TIME_WAIT");
+		return;
+
+	default:
+		db_printf("unknown");
+		return;
+	}
+}
+
+static void
+db_print_tflags(u_int t_flags)
+{
+	int comma;
+
+	comma = 0;
+	if (t_flags & TF_ACKNOW) {
+		db_printf("%sTF_ACKNOW", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_DELACK) {
+		db_printf("%sTF_DELACK", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_NODELAY) {
+		db_printf("%sTF_NODELAY", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_NOOPT) {
+		db_printf("%sTF_NOOPT", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_SENTFIN) {
+		db_printf("%sTF_SENTFIN", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_REQ_SCALE) {
+		db_printf("%sTF_REQ_SCALE", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_RCVD_SCALE) {
+		db_printf("%sTF_RECVD_SCALE", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_REQ_TSTMP) {
+		db_printf("%sTF_REQ_TSTMP", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_RCVD_TSTMP) {
+		db_printf("%sTF_RCVD_TSTMP", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_SACK_PERMIT) {
+		db_printf("%sTF_SACK_PERMIT", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_NEEDSYN) {
+		db_printf("%sTF_NEEDSYN", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_NEEDFIN) {
+		db_printf("%sTF_NEEDFIN", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_NOPUSH) {
+		db_printf("%sTF_NOPUSH", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_NOPUSH) {
+		db_printf("%sTF_NOPUSH", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_MORETOCOME) {
+		db_printf("%sTF_MORETOCOME", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_LQ_OVERFLOW) {
+		db_printf("%sTF_LQ_OVERFLOW", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_LASTIDLE) {
+		db_printf("%sTF_LASTIDLE", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_RXWIN0SENT) {
+		db_printf("%sTF_RXWIN0SENT", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_FASTRECOVERY) {
+		db_printf("%sTF_FASTRECOVERY", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_WASFRECOVERY) {
+		db_printf("%sTF_WASFRECOVERY", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_SIGNATURE) {
+		db_printf("%sTF_SIGNATURE", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_FORCEDATA) {
+		db_printf("%sTF_FORCEDATA", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_TSO) {
+		db_printf("%sTF_TSO", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_ECN_PERMIT) {
+		db_printf("%sTF_ECN_PERMIT", comma ? ", " : "");
+		comma = 1;
+	}
+}
+
+static void
+db_print_toobflags(char t_oobflags)
+{
+	int comma;
+
+	comma = 0;
+	if (t_oobflags & TCPOOB_HAVEDATA) {
+		db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_oobflags & TCPOOB_HADDATA) {
+		db_printf("%sTCPOOB_HADDATA", comma ? ", " : "");
+		comma = 1;
+	}
+}
+
+static void
+db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
+{
+
+	db_print_indent(indent);
+	db_printf("%s at %p\n", name, tp);
+
+	indent += 2;
+
+	db_print_indent(indent);
+	db_printf("t_segq first: %p   t_segqlen: %d   t_dupacks: %d\n",
+	   LIST_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
+
+	db_print_indent(indent);
+	db_printf("tt_rexmt: %p   tt_persist: %p   tt_keep: %p\n",
+	    &tp->t_timers->tt_rexmt, &tp->t_timers->tt_persist, &tp->t_timers->tt_keep);
+
+	db_print_indent(indent);
+	db_printf("tt_2msl: %p   tt_delack: %p   t_inpcb: %p\n", &tp->t_timers->tt_2msl,
+	    &tp->t_timers->tt_delack, tp->t_inpcb);
+
+	db_print_indent(indent);
+	db_printf("t_state: %d (", tp->t_state);
+	db_print_tstate(tp->t_state);
+	db_printf(")\n");
+
+	db_print_indent(indent);
+	db_printf("t_flags: 0x%x (", tp->t_flags);
+	db_print_tflags(tp->t_flags);
+	db_printf(")\n");
+
+	db_print_indent(indent);
+	db_printf("snd_una: 0x%08x   snd_max: 0x%08x   snd_nxt: x0%08x\n",
+	    tp->snd_una, tp->snd_max, tp->snd_nxt);
+
+	db_print_indent(indent);
+	db_printf("snd_up: 0x%08x   snd_wl1: 0x%08x   snd_wl2: 0x%08x\n",
+	   tp->snd_up, tp->snd_wl1, tp->snd_wl2);
+
+	db_print_indent(indent);
+	db_printf("iss: 0x%08x   irs: 0x%08x   rcv_nxt: 0x%08x\n",
+	    tp->iss, tp->irs, tp->rcv_nxt);
+
+	db_print_indent(indent);
+	db_printf("rcv_adv: 0x%08x   rcv_wnd: %lu   rcv_up: 0x%08x\n",
+	    tp->rcv_adv, tp->rcv_wnd, tp->rcv_up);
+
+	db_print_indent(indent);
+	db_printf("snd_wnd: %lu   snd_cwnd: %lu   snd_bwnd: %lu\n",
+	   tp->snd_wnd, tp->snd_cwnd, tp->snd_bwnd);
+
+	db_print_indent(indent);
+	db_printf("snd_ssthresh: %lu   snd_bandwidth: %lu   snd_recover: "
+	    "0x%08x\n", tp->snd_ssthresh, tp->snd_bandwidth,
+	    tp->snd_recover);
+
+	db_print_indent(indent);
+	db_printf("t_maxopd: %u   t_rcvtime: %u   t_startime: %u\n",
+	    tp->t_maxopd, tp->t_rcvtime, tp->t_starttime);
+
+	db_print_indent(indent);
+	db_printf("t_rttime: %u   t_rtsq: 0x%08x   t_bw_rtttime: %u\n",
+	    tp->t_rtttime, tp->t_rtseq, tp->t_bw_rtttime);
+
+	db_print_indent(indent);
+	db_printf("t_bw_rtseq: 0x%08x   t_rxtcur: %d   t_maxseg: %u   "
+	    "t_srtt: %d\n", tp->t_bw_rtseq, tp->t_rxtcur, tp->t_maxseg,
+	    tp->t_srtt);
+
+	db_print_indent(indent);
+	db_printf("t_rttvar: %d   t_rxtshift: %d   t_rttmin: %u   "
+	    "t_rttbest: %u\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin,
+	    tp->t_rttbest);
+
+	db_print_indent(indent);
+	db_printf("t_rttupdated: %lu   max_sndwnd: %lu   t_softerror: %d\n",
+	    tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror);
+
+	db_print_indent(indent);
+	db_printf("t_oobflags: 0x%x (", tp->t_oobflags);
+	db_print_toobflags(tp->t_oobflags);
+	db_printf(")   t_iobc: 0x%02x\n", tp->t_iobc);
+
+	db_print_indent(indent);
+	db_printf("snd_scale: %u   rcv_scale: %u   request_r_scale: %u\n",
+	    tp->snd_scale, tp->rcv_scale, tp->request_r_scale);
+
+	db_print_indent(indent);
+	db_printf("ts_recent: %u   ts_recent_age: %u\n",
+	    tp->ts_recent, tp->ts_recent_age);
+
+	db_print_indent(indent);
+	db_printf("ts_offset: %u   last_ack_sent: 0x%08x   snd_cwnd_prev: "
+	    "%lu\n", tp->ts_offset, tp->last_ack_sent, tp->snd_cwnd_prev);
+
+	db_print_indent(indent);
+	db_printf("snd_ssthresh_prev: %lu   snd_recover_prev: 0x%08x   "
+	    "t_badrxtwin: %u\n", tp->snd_ssthresh_prev,
+	    tp->snd_recover_prev, tp->t_badrxtwin);
+
+	db_print_indent(indent);
+	db_printf("snd_numholes: %d  snd_holes first: %p\n",
+	    tp->snd_numholes, TAILQ_FIRST(&tp->snd_holes));
+
+	db_print_indent(indent);
+	db_printf("snd_fack: 0x%08x   rcv_numsacks: %d   sack_newdata: "
+	    "0x%08x\n", tp->snd_fack, tp->rcv_numsacks, tp->sack_newdata);
+
+	/* Skip sackblks, sackhint. */
+
+	db_print_indent(indent);
+	db_printf("t_rttlow: %d   rfbuf_ts: %u   rfbuf_cnt: %d\n",
+	    tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt);
+}
+
+DB_SHOW_COMMAND(tcpcb, db_show_tcpcb)
+{
+	struct tcpcb *tp;
+
+	if (!have_addr) {
+		db_printf("usage: show tcpcb <addr>\n");
+		return;
+	}
+	tp = (struct tcpcb *)addr;
+
+	db_print_tcpcb(tp, "tcpcb", 0);
+}
+#endif
diff --git a/freebsd/sys/netinet/tcp_var.h b/freebsd/sys/netinet/tcp_var.h
new file mode 100644
index 00000000..77586144
--- /dev/null
+++ b/freebsd/sys/netinet/tcp_var.h
@@ -0,0 +1,687 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993, 1994, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_var.h	8.4 (Berkeley) 5/24/95
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_TCP_VAR_HH_
+#define _NETINET_TCP_VAR_HH_
+
+#include <freebsd/netinet/tcp.h>
+
+#ifdef _KERNEL
+#include <freebsd/net/vnet.h>
+
+/*
+ * Kernel variables for tcp.
+ */
+VNET_DECLARE(int, tcp_do_rfc1323);
+#define	V_tcp_do_rfc1323	VNET(tcp_do_rfc1323)
+
+#endif /* _KERNEL */
+
+/* TCP segment queue entry */
+struct tseg_qent {
+	LIST_ENTRY(tseg_qent) tqe_q;
+	int	tqe_len;		/* TCP segment data length */
+	struct	tcphdr *tqe_th;		/* a pointer to tcp header */
+	struct	mbuf	*tqe_m;		/* mbuf contains packet */
+};
+LIST_HEAD(tsegqe_head, tseg_qent);
+
+struct sackblk {
+	tcp_seq start;		/* start seq no. of sack block */
+	tcp_seq end;		/* end seq no. */
+};
+
+struct sackhole {
+	tcp_seq start;		/* start seq no. of hole */
+	tcp_seq end;		/* end seq no. */
+	tcp_seq rxmit;		/* next seq. no in hole to be retransmitted */
+	TAILQ_ENTRY(sackhole) scblink;	/* scoreboard linkage */
+};
+
+struct sackhint {
+	struct sackhole	*nexthole;
+	int		sack_bytes_rexmit;
+
+	int		ispare;		/* explicit pad for 64bit alignment */
+	uint64_t	_pad[2];	/* 1 sacked_bytes, 1 TBD */
+};
+
+struct tcptemp {
+	u_char	tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */
+	struct	tcphdr tt_t;
+};
+
+#define tcp6cb		tcpcb  /* for KAME src sync over BSD*'s */
+
+/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */
+#ifdef INET6
+#define ND6_HINT(tp)						\
+do {								\
+	if ((tp) && (tp)->t_inpcb &&				\
+	    ((tp)->t_inpcb->inp_vflag & INP_IPV6) != 0)		\
+		nd6_nud_hint(NULL, NULL, 0);			\
+} while (0)
+#else
+#define ND6_HINT(tp)
+#endif
+
+/*
+ * Tcp control block, one per tcp; fields:
+ * Organized for 16 byte cacheline efficiency.
+ */
+struct tcpcb {
+	struct	tsegqe_head t_segq;	/* segment reassembly queue */
+	void	*t_pspare[2];		/* new reassembly queue */
+	int	t_segqlen;		/* segment reassembly queue length */
+	int	t_dupacks;		/* consecutive dup acks recd */
+
+	struct tcp_timer *t_timers;	/* All the TCP timers in one struct */
+
+	struct	inpcb *t_inpcb;		/* back pointer to internet pcb */
+	int	t_state;		/* state of this connection */
+	u_int	t_flags;
+
+	struct	vnet *t_vnet;		/* back pointer to parent vnet */
+
+	tcp_seq	snd_una;		/* send unacknowledged */
+	tcp_seq	snd_max;		/* highest sequence number sent;
+					 * used to recognize retransmits
+					 */
+	tcp_seq	snd_nxt;		/* send next */
+	tcp_seq	snd_up;			/* send urgent pointer */
+
+	tcp_seq	snd_wl1;		/* window update seg seq number */
+	tcp_seq	snd_wl2;		/* window update seg ack number */
+	tcp_seq	iss;			/* initial send sequence number */
+	tcp_seq	irs;			/* initial receive sequence number */
+
+	tcp_seq	rcv_nxt;		/* receive next */
+	tcp_seq	rcv_adv;		/* advertised window */
+	u_long	rcv_wnd;		/* receive window */
+	tcp_seq	rcv_up;			/* receive urgent pointer */
+
+	u_long	snd_wnd;		/* send window */
+	u_long	snd_cwnd;		/* congestion-controlled window */
+	u_long	snd_bwnd;		/* bandwidth-controlled window */
+	u_long	snd_ssthresh;		/* snd_cwnd size threshold for
+					 * for slow start exponential to
+					 * linear switch
+					 */
+	u_long	snd_bandwidth;		/* calculated bandwidth or 0 */
+	tcp_seq	snd_recover;		/* for use in NewReno Fast Recovery */
+
+	u_int	t_maxopd;		/* mss plus options */
+
+	u_int	t_rcvtime;		/* inactivity time */
+	u_int	t_starttime;		/* time connection was established */
+	u_int	t_rtttime;		/* RTT measurement start time */
+	tcp_seq	t_rtseq;		/* sequence number being timed */
+
+	u_int	t_bw_rtttime;		/* used for bandwidth calculation */
+	tcp_seq	t_bw_rtseq;		/* used for bandwidth calculation */
+
+	int	t_rxtcur;		/* current retransmit value (ticks) */
+	u_int	t_maxseg;		/* maximum segment size */
+	int	t_srtt;			/* smoothed round-trip time */
+	int	t_rttvar;		/* variance in round-trip time */
+
+	int	t_rxtshift;		/* log(2) of rexmt exp. backoff */
+	u_int	t_rttmin;		/* minimum rtt allowed */
+	u_int	t_rttbest;		/* best rtt we've seen */
+	u_long	t_rttupdated;		/* number of times rtt sampled */
+	u_long	max_sndwnd;		/* largest window peer has offered */
+
+	int	t_softerror;		/* possible error not yet reported */
+/* out-of-band data */
+	char	t_oobflags;		/* have some */
+	char	t_iobc;			/* input character */
+/* RFC 1323 variables */
+	u_char	snd_scale;		/* window scaling for send window */
+	u_char	rcv_scale;		/* window scaling for recv window */
+	u_char	request_r_scale;	/* pending window scaling */
+	u_int32_t  ts_recent;		/* timestamp echo data */
+	u_int	ts_recent_age;		/* when last updated */
+	u_int32_t  ts_offset;		/* our timestamp offset */
+
+	tcp_seq	last_ack_sent;
+/* experimental */
+	u_long	snd_cwnd_prev;		/* cwnd prior to retransmit */
+	u_long	snd_ssthresh_prev;	/* ssthresh prior to retransmit */
+	tcp_seq	snd_recover_prev;	/* snd_recover prior to retransmit */
+	u_int	t_badrxtwin;		/* window for retransmit recovery */
+	u_char	snd_limited;		/* segments limited transmitted */
+/* SACK related state */
+	int	snd_numholes;		/* number of holes seen by sender */
+	TAILQ_HEAD(sackhole_head, sackhole) snd_holes;
+					/* SACK scoreboard (sorted) */
+	tcp_seq	snd_fack;		/* last seq number(+1) sack'd by rcv'r*/
+	int	rcv_numsacks;		/* # distinct sack blks present */
+	struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */
+	tcp_seq sack_newdata;		/* New data xmitted in this recovery
+					   episode starts at this seq number */
+	struct sackhint	sackhint;	/* SACK scoreboard hint */
+	int	t_rttlow;		/* smallest observerved RTT */
+	u_int32_t	rfbuf_ts;	/* recv buffer autoscaling timestamp */
+	int	rfbuf_cnt;		/* recv buffer autoscaling byte count */
+	struct toe_usrreqs *t_tu;	/* offload operations vector */
+	void	*t_toe;			/* TOE pcb pointer */
+	int	t_bytes_acked;		/* # bytes acked during current RTT */
+
+	int	t_ispare;		/* explicit pad for 64bit alignment */
+	void	*t_pspare2[6];		/* 2 CC / 4 TBD */
+	uint64_t _pad[12];		/* 7 UTO, 5 TBD (1-2 CC/RTT?) */
+};
+
+/*
+ * Flags and utility macros for the t_flags field.
+ */
+#define	TF_ACKNOW	0x000001	/* ack peer immediately */
+#define	TF_DELACK	0x000002	/* ack, but try to delay it */
+#define	TF_NODELAY	0x000004	/* don't delay packets to coalesce */
+#define	TF_NOOPT	0x000008	/* don't use tcp options */
+#define	TF_SENTFIN	0x000010	/* have sent FIN */
+#define	TF_REQ_SCALE	0x000020	/* have/will request window scaling */
+#define	TF_RCVD_SCALE	0x000040	/* other side has requested scaling */
+#define	TF_REQ_TSTMP	0x000080	/* have/will request timestamps */
+#define	TF_RCVD_TSTMP	0x000100	/* a timestamp was received in SYN */
+#define	TF_SACK_PERMIT	0x000200	/* other side said I could SACK */
+#define	TF_NEEDSYN	0x000400	/* send SYN (implicit state) */
+#define	TF_NEEDFIN	0x000800	/* send FIN (implicit state) */
+#define	TF_NOPUSH	0x001000	/* don't push */
+#define	TF_MORETOCOME	0x010000	/* More data to be appended to sock */
+#define	TF_LQ_OVERFLOW	0x020000	/* listen queue overflow */
+#define	TF_LASTIDLE	0x040000	/* connection was previously idle */
+#define	TF_RXWIN0SENT	0x080000	/* sent a receiver win 0 in response */
+#define	TF_FASTRECOVERY	0x100000	/* in NewReno Fast Recovery */
+#define	TF_WASFRECOVERY	0x200000	/* was in NewReno Fast Recovery */
+#define	TF_SIGNATURE	0x400000	/* require MD5 digests (RFC2385) */
+#define	TF_FORCEDATA	0x800000	/* force out a byte */
+#define	TF_TSO		0x1000000	/* TSO enabled on this connection */
+#define	TF_TOE		0x2000000	/* this connection is offloaded */
+#define	TF_ECN_PERMIT	0x4000000	/* connection ECN-ready */
+#define	TF_ECN_SND_CWR	0x8000000	/* ECN CWR in queue */
+#define	TF_ECN_SND_ECE	0x10000000	/* ECN ECE in queue */
+
+#define IN_FASTRECOVERY(tp)	(tp->t_flags & TF_FASTRECOVERY)
+#define ENTER_FASTRECOVERY(tp)	tp->t_flags |= TF_FASTRECOVERY
+#define EXIT_FASTRECOVERY(tp)	tp->t_flags &= ~TF_FASTRECOVERY
+
+/*
+ * Flags for the t_oobflags field.
+ */
+#define	TCPOOB_HAVEDATA	0x01
+#define	TCPOOB_HADDATA	0x02
+
+#ifdef TCP_SIGNATURE
+/*
+ * Defines which are needed by the xform_tcp module and tcp_[in|out]put
+ * for SADB verification and lookup.
+ */
+#define	TCP_SIGLEN	16	/* length of computed digest in bytes */
+#define	TCP_KEYLEN_MIN	1	/* minimum length of TCP-MD5 key */
+#define	TCP_KEYLEN_MAX	80	/* maximum length of TCP-MD5 key */
+/*
+ * Only a single SA per host may be specified at this time. An SPI is
+ * needed in order for the KEY_ALLOCSA() lookup to work.
+ */
+#define	TCP_SIG_SPI	0x1000
+#endif /* TCP_SIGNATURE */
+
+/*
+ * Structure to hold TCP options that are only used during segment
+ * processing (in tcp_input), but not held in the tcpcb.
+ * It's basically used to reduce the number of parameters
+ * to tcp_dooptions and tcp_addoptions.
+ * The binary order of the to_flags is relevant for packing of the
+ * options in tcp_addoptions.
+ */
+struct tcpopt {
+	u_int64_t	to_flags;	/* which options are present */
+#define	TOF_MSS		0x0001		/* maximum segment size */
+#define	TOF_SCALE	0x0002		/* window scaling */
+#define	TOF_SACKPERM	0x0004		/* SACK permitted */
+#define	TOF_TS		0x0010		/* timestamp */
+#define	TOF_SIGNATURE	0x0040		/* TCP-MD5 signature option (RFC2385) */
+#define	TOF_SACK	0x0080		/* Peer sent SACK option */
+#define	TOF_MAXOPT	0x0100
+	u_int32_t	to_tsval;	/* new timestamp */
+	u_int32_t	to_tsecr;	/* reflected timestamp */
+	u_char		*to_sacks;	/* pointer to the first SACK blocks */
+	u_char		*to_signature;	/* pointer to the TCP-MD5 signature */
+	u_int16_t	to_mss;		/* maximum segment size */
+	u_int8_t	to_wscale;	/* window scaling */
+	u_int8_t	to_nsacks;	/* number of SACK blocks */
+};
+
+/*
+ * Flags for tcp_dooptions.
+ */
+#define	TO_SYN		0x01		/* parse SYN-only options */
+
+struct hc_metrics_lite {	/* must stay in sync with hc_metrics */
+	u_long	rmx_mtu;	/* MTU for this path */
+	u_long	rmx_ssthresh;	/* outbound gateway buffer limit */
+	u_long	rmx_rtt;	/* estimated round trip time */
+	u_long	rmx_rttvar;	/* estimated rtt variance */
+	u_long	rmx_bandwidth;	/* estimated bandwidth */
+	u_long	rmx_cwnd;	/* congestion window */
+	u_long	rmx_sendpipe;   /* outbound delay-bandwidth product */
+	u_long	rmx_recvpipe;   /* inbound delay-bandwidth product */
+};
+
+#ifndef _NETINET_IN_PCB_HH_
+struct in_conninfo;
+#endif /* _NETINET_IN_PCB_HH_ */
+
+struct tcptw {
+	struct inpcb	*tw_inpcb;	/* XXX back pointer to internet pcb */
+	tcp_seq		snd_nxt;
+	tcp_seq		rcv_nxt;
+	tcp_seq		iss;
+	tcp_seq		irs;
+	u_short		last_win;	/* cached window value */
+	u_short		tw_so_options;	/* copy of so_options */
+	struct ucred	*tw_cred;	/* user credentials */
+	u_int32_t	t_recent;
+	u_int32_t	ts_offset;	/* our timestamp offset */
+	u_int		t_starttime;
+	int		tw_time;
+	TAILQ_ENTRY(tcptw) tw_2msl;
+};
+
+#define	intotcpcb(ip)	((struct tcpcb *)(ip)->inp_ppcb)
+#define	intotw(ip)	((struct tcptw *)(ip)->inp_ppcb)
+#define	sototcpcb(so)	(intotcpcb(sotoinpcb(so)))
+
+/*
+ * The smoothed round-trip time and estimated variance
+ * are stored as fixed point numbers scaled by the values below.
+ * For convenience, these scales are also used in smoothing the average
+ * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed).
+ * With these scales, srtt has 3 bits to the right of the binary point,
+ * and thus an "ALPHA" of 0.875.  rttvar has 2 bits to the right of the
+ * binary point, and is smoothed with an ALPHA of 0.75.
+ */
+#define	TCP_RTT_SCALE		32	/* multiplier for srtt; 3 bits frac. */
+#define	TCP_RTT_SHIFT		5	/* shift for srtt; 3 bits frac. */
+#define	TCP_RTTVAR_SCALE	16	/* multiplier for rttvar; 2 bits */
+#define	TCP_RTTVAR_SHIFT	4	/* shift for rttvar; 2 bits */
+#define	TCP_DELTA_SHIFT		2	/* see tcp_input.c */
+
+/*
+ * The initial retransmission should happen at rtt + 4 * rttvar.
+ * Because of the way we do the smoothing, srtt and rttvar
+ * will each average +1/2 tick of bias.  When we compute
+ * the retransmit timer, we want 1/2 tick of rounding and
+ * 1 extra tick because of +-1/2 tick uncertainty in the
+ * firing of the timer.  The bias will give us exactly the
+ * 1.5 tick we need.  But, because the bias is
+ * statistical, we have to test that we don't drop below
+ * the minimum feasible timer (which is 2 ticks).
+ * This version of the macro adapted from a paper by Lawrence
+ * Brakmo and Larry Peterson which outlines a problem caused
+ * by insufficient precision in the original implementation,
+ * which results in inappropriately large RTO values for very
+ * fast networks.
+ */
+#define	TCP_REXMTVAL(tp) \
+	max((tp)->t_rttmin, (((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT))  \
+	  + (tp)->t_rttvar) >> TCP_DELTA_SHIFT)
+
+/*
+ * TCP statistics.
+ * Many of these should be kept per connection,
+ * but that's inconvenient at the moment.
+ */
+struct	tcpstat {
+	u_long	tcps_connattempt;	/* connections initiated */
+	u_long	tcps_accepts;		/* connections accepted */
+	u_long	tcps_connects;		/* connections established */
+	u_long	tcps_drops;		/* connections dropped */
+	u_long	tcps_conndrops;		/* embryonic connections dropped */
+	u_long	tcps_minmssdrops;	/* average minmss too low drops */
+	u_long	tcps_closed;		/* conn. closed (includes drops) */
+	u_long	tcps_segstimed;		/* segs where we tried to get rtt */
+	u_long	tcps_rttupdated;	/* times we succeeded */
+	u_long	tcps_delack;		/* delayed acks sent */
+	u_long	tcps_timeoutdrop;	/* conn. dropped in rxmt timeout */
+	u_long	tcps_rexmttimeo;	/* retransmit timeouts */
+	u_long	tcps_persisttimeo;	/* persist timeouts */
+	u_long	tcps_keeptimeo;		/* keepalive timeouts */
+	u_long	tcps_keepprobe;		/* keepalive probes sent */
+	u_long	tcps_keepdrops;		/* connections dropped in keepalive */
+
+	u_long	tcps_sndtotal;		/* total packets sent */
+	u_long	tcps_sndpack;		/* data packets sent */
+	u_long	tcps_sndbyte;		/* data bytes sent */
+	u_long	tcps_sndrexmitpack;	/* data packets retransmitted */
+	u_long	tcps_sndrexmitbyte;	/* data bytes retransmitted */
+	u_long	tcps_sndrexmitbad;	/* unnecessary packet retransmissions */
+	u_long	tcps_sndacks;		/* ack-only packets sent */
+	u_long	tcps_sndprobe;		/* window probes sent */
+	u_long	tcps_sndurg;		/* packets sent with URG only */
+	u_long	tcps_sndwinup;		/* window update-only packets sent */
+	u_long	tcps_sndctrl;		/* control (SYN|FIN|RST) packets sent */
+
+	u_long	tcps_rcvtotal;		/* total packets received */
+	u_long	tcps_rcvpack;		/* packets received in sequence */
+	u_long	tcps_rcvbyte;		/* bytes received in sequence */
+	u_long	tcps_rcvbadsum;		/* packets received with ccksum errs */
+	u_long	tcps_rcvbadoff;		/* packets received with bad offset */
+	u_long	tcps_rcvmemdrop;	/* packets dropped for lack of memory */
+	u_long	tcps_rcvshort;		/* packets received too short */
+	u_long	tcps_rcvduppack;	/* duplicate-only packets received */
+	u_long	tcps_rcvdupbyte;	/* duplicate-only bytes received */
+	u_long	tcps_rcvpartduppack;	/* packets with some duplicate data */
+	u_long	tcps_rcvpartdupbyte;	/* dup. bytes in part-dup. packets */
+	u_long	tcps_rcvoopack;		/* out-of-order packets received */
+	u_long	tcps_rcvoobyte;		/* out-of-order bytes received */
+	u_long	tcps_rcvpackafterwin;	/* packets with data after window */
+	u_long	tcps_rcvbyteafterwin;	/* bytes rcvd after window */
+	u_long	tcps_rcvafterclose;	/* packets rcvd after "close" */
+	u_long	tcps_rcvwinprobe;	/* rcvd window probe packets */
+	u_long	tcps_rcvdupack;		/* rcvd duplicate acks */
+	u_long	tcps_rcvacktoomuch;	/* rcvd acks for unsent data */
+	u_long	tcps_rcvackpack;	/* rcvd ack packets */
+	u_long	tcps_rcvackbyte;	/* bytes acked by rcvd acks */
+	u_long	tcps_rcvwinupd;		/* rcvd window update packets */
+	u_long	tcps_pawsdrop;		/* segments dropped due to PAWS */
+	u_long	tcps_predack;		/* times hdr predict ok for acks */
+	u_long	tcps_preddat;		/* times hdr predict ok for data pkts */
+	u_long	tcps_pcbcachemiss;
+	u_long	tcps_cachedrtt;		/* times cached RTT in route updated */
+	u_long	tcps_cachedrttvar;	/* times cached rttvar updated */
+	u_long	tcps_cachedssthresh;	/* times cached ssthresh updated */
+	u_long	tcps_usedrtt;		/* times RTT initialized from route */
+	u_long	tcps_usedrttvar;	/* times RTTVAR initialized from rt */
+	u_long	tcps_usedssthresh;	/* times ssthresh initialized from rt*/
+	u_long	tcps_persistdrop;	/* timeout in persist state */
+	u_long	tcps_badsyn;		/* bogus SYN, e.g. premature ACK */
+	u_long	tcps_mturesent;		/* resends due to MTU discovery */
+	u_long	tcps_listendrop;	/* listen queue overflows */
+	u_long	tcps_badrst;		/* ignored RSTs in the window */
+
+	u_long	tcps_sc_added;		/* entry added to syncache */
+	u_long	tcps_sc_retransmitted;	/* syncache entry was retransmitted */
+	u_long	tcps_sc_dupsyn;		/* duplicate SYN packet */
+	u_long	tcps_sc_dropped;	/* could not reply to packet */
+	u_long	tcps_sc_completed;	/* successful extraction of entry */
+	u_long	tcps_sc_bucketoverflow;	/* syncache per-bucket limit hit */
+	u_long	tcps_sc_cacheoverflow;	/* syncache cache limit hit */
+	u_long	tcps_sc_reset;		/* RST removed entry from syncache */
+	u_long	tcps_sc_stale;		/* timed out or listen socket gone */
+	u_long	tcps_sc_aborted;	/* syncache entry aborted */
+	u_long	tcps_sc_badack;		/* removed due to bad ACK */
+	u_long	tcps_sc_unreach;	/* ICMP unreachable received */
+	u_long	tcps_sc_zonefail;	/* zalloc() failed */
+	u_long	tcps_sc_sendcookie;	/* SYN cookie sent */
+	u_long	tcps_sc_recvcookie;	/* SYN cookie received */
+
+	u_long	tcps_hc_added;		/* entry added to hostcache */
+	u_long	tcps_hc_bucketoverflow;	/* hostcache per bucket limit hit */
+
+	u_long  tcps_finwait2_drops;    /* Drop FIN_WAIT_2 connection after time limit */
+
+	/* SACK related stats */
+	u_long	tcps_sack_recovery_episode; /* SACK recovery episodes */
+	u_long  tcps_sack_rexmits;	    /* SACK rexmit segments   */
+	u_long  tcps_sack_rexmit_bytes;	    /* SACK rexmit bytes      */
+	u_long  tcps_sack_rcv_blocks;	    /* SACK blocks (options) received */
+	u_long  tcps_sack_send_blocks;	    /* SACK blocks (options) sent     */
+	u_long  tcps_sack_sboverflow; 	    /* times scoreboard overflowed */
+
+	/* ECN related stats */
+	u_long	tcps_ecn_ce;		/* ECN Congestion Experienced */
+	u_long	tcps_ecn_ect0;		/* ECN Capable Transport */
+	u_long	tcps_ecn_ect1;		/* ECN Capable Transport */
+	u_long	tcps_ecn_shs;		/* ECN successful handshakes */
+	u_long	tcps_ecn_rcwnd;		/* # times ECN reduced the cwnd */
+
+	u_long	_pad[12];		/* 6 UTO, 6 TBD */
+};
+
+#ifdef _KERNEL
+/*
+ * In-kernel consumers can use these accessor macros directly to update
+ * stats.
+ */
+#define	TCPSTAT_ADD(name, val)	V_tcpstat.name += (val)
+#define	TCPSTAT_INC(name)	TCPSTAT_ADD(name, 1)
+
+/*
+ * Kernel module consumers must use this accessor macro.
+ */
+void	kmod_tcpstat_inc(int statnum);
+#define	KMOD_TCPSTAT_INC(name)						\
+	kmod_tcpstat_inc(offsetof(struct tcpstat, name) / sizeof(u_long))
+#endif
+
+/*
+ * TCB structure exported to user-land via sysctl(3).
+ * Evil hack: declare only if in_pcb.h and sys/socketvar.h have been
+ * included.  Not all of our clients do.
+ */
+#if defined(_NETINET_IN_PCB_HH_) && defined(_SYS_SOCKETVAR_HH_)
+struct	xtcpcb {
+	size_t	xt_len;
+	struct	inpcb	xt_inp;
+	struct	tcpcb	xt_tp;
+	struct	xsocket	xt_socket;
+	u_quad_t	xt_alignment_hack;
+};
+#endif
+
+/*
+ * Names for TCP sysctl objects
+ */
+#define	TCPCTL_DO_RFC1323	1	/* use RFC-1323 extensions */
+#define	TCPCTL_MSSDFLT		3	/* MSS default */
+#define TCPCTL_STATS		4	/* statistics (read-only) */
+#define	TCPCTL_RTTDFLT		5	/* default RTT estimate */
+#define	TCPCTL_KEEPIDLE		6	/* keepalive idle timer */
+#define	TCPCTL_KEEPINTVL	7	/* interval to send keepalives */
+#define	TCPCTL_SENDSPACE	8	/* send buffer space */
+#define	TCPCTL_RECVSPACE	9	/* receive buffer space */
+#define	TCPCTL_KEEPINIT		10	/* timeout for establishing syn */
+#define	TCPCTL_PCBLIST		11	/* list of all outstanding PCBs */
+#define	TCPCTL_DELACKTIME	12	/* time before sending delayed ACK */
+#define	TCPCTL_V6MSSDFLT	13	/* MSS default for IPv6 */
+#define	TCPCTL_SACK		14	/* Selective Acknowledgement,rfc 2018 */
+#define	TCPCTL_DROP		15	/* drop tcp connection */
+#define	TCPCTL_MAXID		16
+#define TCPCTL_FINWAIT2_TIMEOUT        17
+
+#define TCPCTL_NAMES { \
+	{ 0, 0 }, \
+	{ "rfc1323", CTLTYPE_INT }, \
+	{ "mssdflt", CTLTYPE_INT }, \
+	{ "stats", CTLTYPE_STRUCT }, \
+	{ "rttdflt", CTLTYPE_INT }, \
+	{ "keepidle", CTLTYPE_INT }, \
+	{ "keepintvl", CTLTYPE_INT }, \
+	{ "sendspace", CTLTYPE_INT }, \
+	{ "recvspace", CTLTYPE_INT }, \
+	{ "keepinit", CTLTYPE_INT }, \
+	{ "pcblist", CTLTYPE_STRUCT }, \
+	{ "delacktime", CTLTYPE_INT }, \
+	{ "v6mssdflt", CTLTYPE_INT }, \
+	{ "maxid", CTLTYPE_INT }, \
+}
+
+
+#ifdef _KERNEL
+#ifdef SYSCTL_DECL
+SYSCTL_DECL(_net_inet_tcp);
+SYSCTL_DECL(_net_inet_tcp_sack);
+MALLOC_DECLARE(M_TCPLOG);
+#endif
+
+VNET_DECLARE(struct inpcbhead, tcb);		/* queue of active tcpcb's */
+VNET_DECLARE(struct inpcbinfo, tcbinfo);
+VNET_DECLARE(struct tcpstat, tcpstat);		/* tcp statistics */
+extern	int tcp_log_in_vain;
+VNET_DECLARE(int, tcp_mssdflt);	/* XXX */
+VNET_DECLARE(int, tcp_minmss);
+VNET_DECLARE(int, tcp_delack_enabled);
+VNET_DECLARE(int, tcp_do_rfc3390);
+VNET_DECLARE(int, tcp_do_newreno);
+VNET_DECLARE(int, path_mtu_discovery);
+VNET_DECLARE(int, ss_fltsz);
+VNET_DECLARE(int, ss_fltsz_local);
+#define	V_tcb			VNET(tcb)
+#define	V_tcbinfo		VNET(tcbinfo)
+#define	V_tcpstat		VNET(tcpstat)
+#define	V_tcp_mssdflt		VNET(tcp_mssdflt)
+#define	V_tcp_minmss		VNET(tcp_minmss)
+#define	V_tcp_delack_enabled	VNET(tcp_delack_enabled)
+#define	V_tcp_do_rfc3390	VNET(tcp_do_rfc3390)
+#define	V_tcp_do_newreno	VNET(tcp_do_newreno)
+#define	V_path_mtu_discovery	VNET(path_mtu_discovery)
+#define	V_ss_fltsz		VNET(ss_fltsz)
+#define	V_ss_fltsz_local	VNET(ss_fltsz_local)
+
+VNET_DECLARE(int, tcp_do_sack);			/* SACK enabled/disabled */
+VNET_DECLARE(int, tcp_sc_rst_sock_fail);	/* RST on sock alloc failure */
+#define	V_tcp_do_sack		VNET(tcp_do_sack)
+#define	V_tcp_sc_rst_sock_fail	VNET(tcp_sc_rst_sock_fail)
+
+VNET_DECLARE(int, tcp_do_ecn);			/* TCP ECN enabled/disabled */
+VNET_DECLARE(int, tcp_ecn_maxretries);
+#define	V_tcp_do_ecn		VNET(tcp_do_ecn)
+#define	V_tcp_ecn_maxretries	VNET(tcp_ecn_maxretries)
+
+int	 tcp_addoptions(struct tcpopt *, u_char *);
+struct tcpcb *
+	 tcp_close(struct tcpcb *);
+void	 tcp_discardcb(struct tcpcb *);
+void	 tcp_twstart(struct tcpcb *);
+#if 0
+int	 tcp_twrecycleable(struct tcptw *tw);
+#endif
+void	 tcp_twclose(struct tcptw *_tw, int _reuse);
+void	 tcp_ctlinput(int, struct sockaddr *, void *);
+int	 tcp_ctloutput(struct socket *, struct sockopt *);
+#ifndef __rtems__
+struct tcpcb *
+   tcp_drop(struct tcpcb *, int);
+#else
+struct tcpcb *
+tcp_drop(struct tcpcb *tp, int errno);
+#endif
+void	 tcp_drain(void);
+void	 tcp_init(void);
+#ifdef VIMAGE
+void	 tcp_destroy(void);
+#endif
+void	 tcp_fini(void *);
+char 	*tcp_log_addrs(struct in_conninfo *, struct tcphdr *, void *,
+	    const void *);
+char	*tcp_log_vain(struct in_conninfo *, struct tcphdr *, void *,
+	    const void *);
+int	 tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *);
+void	 tcp_reass_init(void);
+void	 tcp_reass_flush(struct tcpcb *);
+#ifdef VIMAGE
+void	 tcp_reass_destroy(void);
+#endif
+void	 tcp_input(struct mbuf *, int);
+u_long	 tcp_maxmtu(struct in_conninfo *, int *);
+u_long	 tcp_maxmtu6(struct in_conninfo *, int *);
+void	 tcp_mss_update(struct tcpcb *, int, struct hc_metrics_lite *, int *);
+void	 tcp_mss(struct tcpcb *, int);
+int	 tcp_mssopt(struct in_conninfo *);
+#ifndef __rtems__
+struct inpcb *
+   tcp_drop_syn_sent(struct inpcb *, int);
+struct inpcb *
+   tcp_mtudisc(struct inpcb *, int);
+#else
+struct inpcb *
+tcp_drop_syn_sent(struct inpcb *inp, int errno);
+struct inpcb *
+tcp_mtudisc(struct inpcb *inp, int errno);
+#endif
+struct tcpcb *
+	 tcp_newtcpcb(struct inpcb *);
+int	 tcp_output(struct tcpcb *);
+void	 tcp_respond(struct tcpcb *, void *,
+	    struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int);
+void	 tcp_tw_init(void);
+#ifdef VIMAGE
+void	 tcp_tw_destroy(void);
+#endif
+void	 tcp_tw_zone_change(void);
+int	 tcp_twcheck(struct inpcb *, struct tcpopt *, struct tcphdr *,
+	    struct mbuf *, int);
+int	 tcp_twrespond(struct tcptw *, int);
+void	 tcp_setpersist(struct tcpcb *);
+#ifdef TCP_SIGNATURE
+int	 tcp_signature_compute(struct mbuf *, int, int, int, u_char *, u_int);
+#endif
+void	 tcp_slowtimo(void);
+struct tcptemp *
+	 tcpip_maketemplate(struct inpcb *);
+void	 tcpip_fillheaders(struct inpcb *, void *, void *);
+void	 tcp_timer_activate(struct tcpcb *, int, u_int);
+int	 tcp_timer_active(struct tcpcb *, int);
+void	 tcp_trace(short, short, struct tcpcb *, void *, struct tcphdr *, int);
+void	 tcp_xmit_bandwidth_limit(struct tcpcb *tp, tcp_seq ack_seq);
+/*
+ * All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo)
+ */
+void	 tcp_hc_init(void);
+#ifdef VIMAGE
+void	 tcp_hc_destroy(void);
+#endif
+void	 tcp_hc_get(struct in_conninfo *, struct hc_metrics_lite *);
+u_long	 tcp_hc_getmtu(struct in_conninfo *);
+void	 tcp_hc_updatemtu(struct in_conninfo *, u_long);
+void	 tcp_hc_update(struct in_conninfo *, struct hc_metrics_lite *);
+
+extern	struct pr_usrreqs tcp_usrreqs;
+extern	u_long tcp_sendspace;
+extern	u_long tcp_recvspace;
+tcp_seq tcp_new_isn(struct tcpcb *);
+
+void	 tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
+void	 tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
+void	 tcp_clean_sackreport(struct tcpcb *tp);
+void	 tcp_sack_adjust(struct tcpcb *tp);
+struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
+void	 tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
+void	 tcp_free_sackholes(struct tcpcb *tp);
+int	 tcp_newreno(struct tcpcb *, struct tcphdr *);
+u_long	 tcp_seq_subtract(u_long, u_long );
+
+#endif /* _KERNEL */
+
+#endif /* _NETINET_TCP_VAR_HH_ */
diff --git a/freebsd/sys/netinet/tcpip.h b/freebsd/sys/netinet/tcpip.h
new file mode 100644
index 00000000..337c07a6
--- /dev/null
+++ b/freebsd/sys/netinet/tcpip.h
@@ -0,0 +1,59 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcpip.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_TCPIP_HH_
+#define _NETINET_TCPIP_HH_
+
+/*
+ * Tcp+ip header, after ip options removed.
+ */
+struct tcpiphdr {
+	struct	ipovly ti_i;		/* overlaid ip structure */
+	struct	tcphdr ti_t;		/* tcp header */
+};
+#define	ti_x1		ti_i.ih_x1
+#define	ti_pr		ti_i.ih_pr
+#define	ti_len		ti_i.ih_len
+#define	ti_src		ti_i.ih_src
+#define	ti_dst		ti_i.ih_dst
+#define	ti_sport	ti_t.th_sport
+#define	ti_dport	ti_t.th_dport
+#define	ti_seq		ti_t.th_seq
+#define	ti_ack		ti_t.th_ack
+#define	ti_x2		ti_t.th_x2
+#define	ti_off		ti_t.th_off
+#define	ti_flags	ti_t.th_flags
+#define	ti_win		ti_t.th_win
+#define	ti_sum		ti_t.th_sum
+#define	ti_urp		ti_t.th_urp
+
+#endif
diff --git a/freebsd/sys/netinet/toedev.h b/freebsd/sys/netinet/toedev.h
new file mode 100644
index 00000000..4623845c
--- /dev/null
+++ b/freebsd/sys/netinet/toedev.h
@@ -0,0 +1,162 @@
+/*-
+ * Copyright (c) 2007, Chelsio Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Neither the name of the Chelsio Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_TOEDEV_HH_
+#define	_NETINET_TOEDEV_HH_
+
+#ifndef _KERNEL
+#error "no user-serviceable parts inside"
+#endif
+
+extern uint32_t toedev_registration_count;
+
+/* Parameter values for offload_get_phys_egress(). */
+enum {
+	TOE_OPEN,
+	TOE_FAILOVER,
+};
+
+/* Parameter values for toe_failover(). */
+enum {
+	TOE_ACTIVE_SLAVE,
+	TOE_LINK_DOWN,
+	TOE_LINK_UP,
+	TOE_RELEASE,
+	TOE_RELEASE_ALL,
+};
+
+#define	TOENAMSIZ	16
+
+/* Get the toedev associated with a ifnet. */
+#define	TOEDEV(ifp)	((ifp)->if_llsoftc)
+
+struct offload_id {
+	unsigned int	id;
+	unsigned long	data;
+};
+
+struct ifnet;
+struct rt_entry;
+struct tom_info;
+struct sysctl_oid;
+struct socket;
+struct mbuf;
+
+struct toedev {
+	TAILQ_ENTRY(toedev) entry;  
+	char 		tod_name[TOENAMSIZ];	/* TOE device name */
+	unsigned int 	tod_ttid;		/* TOE type id */
+	unsigned long 	tod_flags;		/* device flags */
+	unsigned int	tod_mtu;		/* max TX offloaded data */
+	unsigned int	tod_nconn;		/* max # of offloaded
+						 * connections
+						 */
+	struct ifnet 	*tod_lldev;   		/* first interface */
+	const struct tom_info *tod_offload_mod; /* TCP offload module */
+
+	/*
+	 * This TOE device is capable of offloading the connection for socket so
+	 */
+	int	(*tod_can_offload)(struct toedev *dev, struct socket *so);
+
+	/*
+	 * Establish a connection to nam using the TOE device dev
+	 */
+	int	(*tod_connect)(struct toedev *dev, struct socket *so,
+	        struct rtentry *rt, struct sockaddr *nam);
+	/*
+	 * Send an mbuf down to the toe device 
+	 */
+	int	(*tod_send)(struct toedev *dev, struct mbuf *m);
+	/*
+	 * Receive an array of mbufs from the TOE device dev 
+	 */
+	int	(*tod_recv)(struct toedev *dev, struct mbuf **m, int n);
+	/*
+	 * Device specific ioctl interface
+	 */
+	int	(*tod_ctl)(struct toedev *dev, unsigned int req, void *data);
+	/*
+	 * Update L2 entry in toedev 
+	 */
+	void	(*tod_arp_update)(struct toedev *dev, struct rtentry *neigh);
+	/*
+	 * Failover from one toe device to another
+	 */
+	void	(*tod_failover)(struct toedev *dev, struct ifnet *bond_ifp,
+			 struct ifnet *ndev, int event);
+	void	*tod_priv;			/* driver private data */
+	void 	*tod_l2opt;			/* optional layer 2 data */
+	void	*tod_l3opt; 			/* optional layer 3 data */
+	void 	*tod_l4opt;			/* optional layer 4 data */
+	void 	*tod_ulp;			/* upper lever protocol */
+};
+
+struct tom_info {
+	TAILQ_ENTRY(tom_info)	entry;
+	int		(*ti_attach)(struct toedev *dev,
+	                             const struct offload_id *entry);
+	int		(*ti_detach)(struct toedev *dev);
+	const char	*ti_name;
+	const struct offload_id	*ti_id_table;
+};
+
+static __inline void
+init_offload_dev(struct toedev *dev)
+{
+}
+
+int	register_tom(struct tom_info *t);
+int	unregister_tom(struct tom_info *t);
+int	register_toedev(struct toedev *dev, const char *name);
+int	unregister_toedev(struct toedev *dev);
+int	activate_offload(struct toedev *dev);
+int	toe_send(struct toedev *dev, struct mbuf *m);
+void	toe_arp_update(struct rtentry *rt);
+struct ifnet	*offload_get_phys_egress(struct ifnet *ifp,
+        struct socket *so, int context);
+int 	toe_receive_mbuf(struct toedev *dev, struct mbuf **m, int n);
+
+static __inline void
+toe_neigh_update(struct ifnet *ifp)
+{
+}
+
+static __inline void
+toe_failover(struct ifnet *bond_ifp, struct ifnet *fail_ifp, int event)
+{
+}
+
+static __inline int
+toe_enslave(struct ifnet *bond_ifp, struct ifnet *slave_ifp)
+{
+	return (0);
+}
+
+#endif /* _NETINET_TOEDEV_HH_ */
diff --git a/freebsd/sys/netinet/udp.h b/freebsd/sys/netinet/udp.h
new file mode 100644
index 00000000..e7010ac5
--- /dev/null
+++ b/freebsd/sys/netinet/udp.h
@@ -0,0 +1,2 @@
+#include <freebsd/bsd.h>
+#include <freebsd/netinet/udp.h>
diff --git a/freebsd/sys/netinet/udp_usrreq.c b/freebsd/sys/netinet/udp_usrreq.c
new file mode 100644
index 00000000..f992f5f6
--- /dev/null
+++ b/freebsd/sys/netinet/udp_usrreq.c
@@ -0,0 +1,1633 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
+ *	The Regents of the University of California.
+ * Copyright (c) 2008 Robert N. M. Watson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)udp_usrreq.c	8.6 (Berkeley) 5/23/95
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_ipfw.h>
+#include <freebsd/local/opt_inet6.h>
+#include <freebsd/local/opt_ipsec.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/domain.h>
+#include <freebsd/sys/eventhandler.h>
+#include <freebsd/sys/jail.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mbuf.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/socket.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/sx.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/syslog.h>
+#include <freebsd/sys/systm.h>
+
+#include <freebsd/vm/uma.h>
+
+#include <freebsd/net/if.h>
+#include <freebsd/net/route.h>
+
+#include <freebsd/netinet/in.h>
+#include <freebsd/netinet/in_pcb.h>
+#include <freebsd/netinet/in_systm.h>
+#include <freebsd/netinet/in_var.h>
+#include <freebsd/netinet/ip.h>
+#ifdef INET6
+#include <freebsd/netinet/ip6.h>
+#endif
+#include <freebsd/netinet/ip_icmp.h>
+#include <freebsd/netinet/icmp_var.h>
+#include <freebsd/netinet/ip_var.h>
+#include <freebsd/netinet/ip_options.h>
+#ifdef INET6
+#include <freebsd/netinet6/ip6_var.h>
+#endif
+#include <freebsd/netinet/udp.h>
+#include <freebsd/netinet/udp_var.h>
+
+#ifdef IPSEC
+#include <freebsd/netipsec/ipsec.h>
+#include <freebsd/netipsec/esp.h>
+#endif
+
+#include <freebsd/machine/in_cksum.h>
+
+#include <freebsd/security/mac/mac_framework.h>
+
+/*
+ * UDP protocol implementation.
+ * Per RFC 768, August, 1980.
+ */
+
+/*
+ * BSD 4.2 defaulted the udp checksum to be off.  Turning off udp checksums
+ * removes the only data integrity mechanism for packets and malformed
+ * packets that would otherwise be discarded due to bad checksums, and may
+ * cause problems (especially for NFS data blocks).
+ */
+static int	udp_cksum = 1;
+SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, &udp_cksum,
+    0, "compute udp checksum");
+
+int	udp_log_in_vain = 0;
+SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW,
+    &udp_log_in_vain, 0, "Log all incoming UDP packets");
+
+VNET_DEFINE(int, udp_blackhole) = 0;
+SYSCTL_VNET_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW,
+    &VNET_NAME(udp_blackhole), 0,
+    "Do not send port unreachables for refused connects");
+
+u_long	udp_sendspace = 9216;		/* really max datagram size */
+					/* 40 1K datagrams */
+SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
+    &udp_sendspace, 0, "Maximum outgoing UDP datagram size");
+
+u_long	udp_recvspace = 40 * (1024 +
+#ifdef INET6
+				      sizeof(struct sockaddr_in6)
+#else
+				      sizeof(struct sockaddr_in)
+#endif
+				      );
+
+SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
+    &udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
+
+VNET_DEFINE(struct inpcbhead, udb);		/* from udp_var.h */
+VNET_DEFINE(struct inpcbinfo, udbinfo);
+static VNET_DEFINE(uma_zone_t, udpcb_zone);
+#define	V_udpcb_zone			VNET(udpcb_zone)
+
+#ifndef UDBHASHSIZE
+#define	UDBHASHSIZE	128
+#endif
+
+VNET_DEFINE(struct udpstat, udpstat);		/* from udp_var.h */
+SYSCTL_VNET_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RW,
+    &VNET_NAME(udpstat), udpstat,
+    "UDP statistics (struct udpstat, netinet/udp_var.h)");
+
+static void	udp_detach(struct socket *so);
+static int	udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
+		    struct mbuf *, struct thread *);
+#ifdef IPSEC
+#ifdef IPSEC_NAT_T
+#define	UF_ESPINUDP_ALL	(UF_ESPINUDP_NON_IKE|UF_ESPINUDP)
+#ifdef INET
+static struct mbuf *udp4_espdecap(struct inpcb *, struct mbuf *, int);
+#endif
+#endif /* IPSEC_NAT_T */
+#endif /* IPSEC */
+
+static void
+udp_zone_change(void *tag)
+{
+
+	uma_zone_set_max(V_udbinfo.ipi_zone, maxsockets);
+	uma_zone_set_max(V_udpcb_zone, maxsockets);
+}
+
+static int
+udp_inpcb_init(void *mem, int size, int flags)
+{
+	struct inpcb *inp;
+
+	inp = mem;
+	INP_LOCK_INIT(inp, "inp", "udpinp");
+	return (0);
+}
+
+void
+udp_init(void)
+{
+
+
+	INP_INFO_LOCK_INIT(&V_udbinfo, "udp");
+	LIST_INIT(&V_udb);
+#ifdef VIMAGE
+	V_udbinfo.ipi_vnet = curvnet;
+#endif
+	V_udbinfo.ipi_listhead = &V_udb;
+	V_udbinfo.ipi_hashbase = hashinit(UDBHASHSIZE, M_PCB,
+	    &V_udbinfo.ipi_hashmask);
+	V_udbinfo.ipi_porthashbase = hashinit(UDBHASHSIZE, M_PCB,
+	    &V_udbinfo.ipi_porthashmask);
+	V_udbinfo.ipi_zone = uma_zcreate("udp_inpcb", sizeof(struct inpcb),
+	    NULL, NULL, udp_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+	uma_zone_set_max(V_udbinfo.ipi_zone, maxsockets);
+
+	V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+	uma_zone_set_max(V_udpcb_zone, maxsockets);
+
+	EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL,
+	    EVENTHANDLER_PRI_ANY);
+}
+
+/*
+ * Kernel module interface for updating udpstat.  The argument is an index
+ * into udpstat treated as an array of u_long.  While this encodes the
+ * general layout of udpstat into the caller, it doesn't encode its location,
+ * so that future changes to add, for example, per-CPU stats support won't
+ * cause binary compatibility problems for kernel modules.
+ */
+void
+kmod_udpstat_inc(int statnum)
+{
+
+	(*((u_long *)&V_udpstat + statnum))++;
+}
+
+int
+udp_newudpcb(struct inpcb *inp)
+{
+	struct udpcb *up;
+
+	up = uma_zalloc(V_udpcb_zone, M_NOWAIT | M_ZERO);
+	if (up == NULL)
+		return (ENOBUFS);
+	inp->inp_ppcb = up;
+	return (0);
+}
+
+void
+udp_discardcb(struct udpcb *up)
+{
+
+	uma_zfree(V_udpcb_zone, up);
+}
+
+#ifdef VIMAGE
+void
+udp_destroy(void)
+{
+
+	hashdestroy(V_udbinfo.ipi_hashbase, M_PCB,
+	    V_udbinfo.ipi_hashmask);
+	hashdestroy(V_udbinfo.ipi_porthashbase, M_PCB,
+	    V_udbinfo.ipi_porthashmask);
+
+	uma_zdestroy(V_udpcb_zone);
+	uma_zdestroy(V_udbinfo.ipi_zone);
+	INP_INFO_LOCK_DESTROY(&V_udbinfo);
+}
+#endif
+
+/*
+ * Subroutine of udp_input(), which appends the provided mbuf chain to the
+ * passed pcb/socket.  The caller must provide a sockaddr_in via udp_in that
+ * contains the source address.  If the socket ends up being an IPv6 socket,
+ * udp_append() will convert to a sockaddr_in6 before passing the address
+ * into the socket code.
+ */
+static void
+udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
+    struct sockaddr_in *udp_in)
+{
+	struct sockaddr *append_sa;
+	struct socket *so;
+	struct mbuf *opts = 0;
+#ifdef INET6
+	struct sockaddr_in6 udp_in6;
+#endif
+#ifdef IPSEC
+#ifdef IPSEC_NAT_T
+#ifdef INET
+	struct udpcb *up;
+#endif
+#endif
+#endif
+
+	INP_RLOCK_ASSERT(inp);
+
+#ifdef IPSEC
+	/* Check AH/ESP integrity. */
+	if (ipsec4_in_reject(n, inp)) {
+		m_freem(n);
+		V_ipsec4stat.in_polvio++;
+		return;
+	}
+#ifdef IPSEC_NAT_T
+#ifdef INET
+	up = intoudpcb(inp);
+	KASSERT(up != NULL, ("%s: udpcb NULL", __func__));
+	if (up->u_flags & UF_ESPINUDP_ALL) {	/* IPSec UDP encaps. */
+		n = udp4_espdecap(inp, n, off);
+		if (n == NULL)				/* Consumed. */
+			return;
+	}
+#endif /* INET */
+#endif /* IPSEC_NAT_T */
+#endif /* IPSEC */
+#ifdef MAC
+	if (mac_inpcb_check_deliver(inp, n) != 0) {
+		m_freem(n);
+		return;
+	}
+#endif
+	if (inp->inp_flags & INP_CONTROLOPTS ||
+	    inp->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) {
+#ifdef INET6
+		if (inp->inp_vflag & INP_IPV6)
+			(void)ip6_savecontrol_v4(inp, n, &opts, NULL);
+		else
+#endif
+			ip_savecontrol(inp, &opts, ip, n);
+	}
+#ifdef INET6
+	if (inp->inp_vflag & INP_IPV6) {
+		bzero(&udp_in6, sizeof(udp_in6));
+		udp_in6.sin6_len = sizeof(udp_in6);
+		udp_in6.sin6_family = AF_INET6;
+		in6_sin_2_v4mapsin6(udp_in, &udp_in6);
+		append_sa = (struct sockaddr *)&udp_in6;
+	} else
+#endif
+		append_sa = (struct sockaddr *)udp_in;
+	m_adj(n, off);
+
+	so = inp->inp_socket;
+	SOCKBUF_LOCK(&so->so_rcv);
+	if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) {
+		SOCKBUF_UNLOCK(&so->so_rcv);
+		m_freem(n);
+		if (opts)
+			m_freem(opts);
+		UDPSTAT_INC(udps_fullsock);
+	} else
+		sorwakeup_locked(so);
+}
+
+void
+udp_input(struct mbuf *m, int off)
+{
+	int iphlen = off;
+	struct ip *ip;
+	struct udphdr *uh;
+	struct ifnet *ifp;
+	struct inpcb *inp;
+	struct udpcb *up;
+	int len;
+	struct ip save_ip;
+	struct sockaddr_in udp_in;
+#ifdef IPFIREWALL_FORWARD
+	struct m_tag *fwd_tag;
+#endif
+
+	ifp = m->m_pkthdr.rcvif;
+	UDPSTAT_INC(udps_ipackets);
+
+	/*
+	 * Strip IP options, if any; should skip this, make available to
+	 * user, and use on returned packets, but we don't yet have a way to
+	 * check the checksum with options still present.
+	 */
+	if (iphlen > sizeof (struct ip)) {
+		ip_stripoptions(m, (struct mbuf *)0);
+		iphlen = sizeof(struct ip);
+	}
+
+	/*
+	 * Get IP and UDP header together in first mbuf.
+	 */
+	ip = mtod(m, struct ip *);
+	if (m->m_len < iphlen + sizeof(struct udphdr)) {
+		if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) {
+			UDPSTAT_INC(udps_hdrops);
+			return;
+		}
+		ip = mtod(m, struct ip *);
+	}
+	uh = (struct udphdr *)((caddr_t)ip + iphlen);
+
+	/*
+	 * Destination port of 0 is illegal, based on RFC768.
+	 */
+	if (uh->uh_dport == 0)
+		goto badunlocked;
+
+	/*
+	 * Construct sockaddr format source address.  Stuff source address
+	 * and datagram in user buffer.
+	 */
+	bzero(&udp_in, sizeof(udp_in));
+	udp_in.sin_len = sizeof(udp_in);
+	udp_in.sin_family = AF_INET;
+	udp_in.sin_port = uh->uh_sport;
+	udp_in.sin_addr = ip->ip_src;
+
+	/*
+	 * Make mbuf data length reflect UDP length.  If not enough data to
+	 * reflect UDP length, drop.
+	 */
+	len = ntohs((u_short)uh->uh_ulen);
+	if (ip->ip_len != len) {
+		if (len > ip->ip_len || len < sizeof(struct udphdr)) {
+			UDPSTAT_INC(udps_badlen);
+			goto badunlocked;
+		}
+		m_adj(m, len - ip->ip_len);
+		/* ip->ip_len = len; */
+	}
+
+	/*
+	 * Save a copy of the IP header in case we want restore it for
+	 * sending an ICMP error message in response.
+	 */
+	if (!V_udp_blackhole)
+		save_ip = *ip;
+	else
+		memset(&save_ip, 0, sizeof(save_ip));
+
+	/*
+	 * Checksum extended UDP header and data.
+	 */
+	if (uh->uh_sum) {
+		u_short uh_sum;
+
+		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
+			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
+				uh_sum = m->m_pkthdr.csum_data;
+			else
+				uh_sum = in_pseudo(ip->ip_src.s_addr,
+				    ip->ip_dst.s_addr, htonl((u_short)len +
+				    m->m_pkthdr.csum_data + IPPROTO_UDP));
+			uh_sum ^= 0xffff;
+		} else {
+			char b[9];
+
+			bcopy(((struct ipovly *)ip)->ih_x1, b, 9);
+			bzero(((struct ipovly *)ip)->ih_x1, 9);
+			((struct ipovly *)ip)->ih_len = uh->uh_ulen;
+			uh_sum = in_cksum(m, len + sizeof (struct ip));
+			bcopy(b, ((struct ipovly *)ip)->ih_x1, 9);
+		}
+		if (uh_sum) {
+			UDPSTAT_INC(udps_badsum);
+			m_freem(m);
+			return;
+		}
+	} else
+		UDPSTAT_INC(udps_nosum);
+
+#ifdef IPFIREWALL_FORWARD
+	/*
+	 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
+	 */
+	fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
+	if (fwd_tag != NULL) {
+		struct sockaddr_in *next_hop;
+
+		/*
+		 * Do the hack.
+		 */
+		next_hop = (struct sockaddr_in *)(fwd_tag + 1);
+		ip->ip_dst = next_hop->sin_addr;
+		uh->uh_dport = ntohs(next_hop->sin_port);
+
+		/*
+		 * Remove the tag from the packet.  We don't need it anymore.
+		 */
+		m_tag_delete(m, fwd_tag);
+	}
+#endif
+
+	INP_INFO_RLOCK(&V_udbinfo);
+	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
+	    in_broadcast(ip->ip_dst, ifp)) {
+		struct inpcb *last;
+		struct ip_moptions *imo;
+
+		last = NULL;
+		LIST_FOREACH(inp, &V_udb, inp_list) {
+			if (inp->inp_lport != uh->uh_dport)
+				continue;
+#ifdef INET6
+			if ((inp->inp_vflag & INP_IPV4) == 0)
+				continue;
+#endif
+			if (inp->inp_laddr.s_addr != INADDR_ANY &&
+			    inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
+				continue;
+			if (inp->inp_faddr.s_addr != INADDR_ANY &&
+			    inp->inp_faddr.s_addr != ip->ip_src.s_addr)
+				continue;
+			if (inp->inp_fport != 0 &&
+			    inp->inp_fport != uh->uh_sport)
+				continue;
+
+			INP_RLOCK(inp);
+
+			/*
+			 * Handle socket delivery policy for any-source
+			 * and source-specific multicast. [RFC3678]
+			 */
+			imo = inp->inp_moptions;
+			if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
+			    imo != NULL) {
+				struct sockaddr_in	 group;
+				int			 blocked;
+
+				bzero(&group, sizeof(struct sockaddr_in));
+				group.sin_len = sizeof(struct sockaddr_in);
+				group.sin_family = AF_INET;
+				group.sin_addr = ip->ip_dst;
+
+				blocked = imo_multi_filter(imo, ifp,
+					(struct sockaddr *)&group,
+					(struct sockaddr *)&udp_in);
+				if (blocked != MCAST_PASS) {
+					if (blocked == MCAST_NOTGMEMBER)
+						IPSTAT_INC(ips_notmember);
+					if (blocked == MCAST_NOTSMEMBER ||
+					    blocked == MCAST_MUTED)
+						UDPSTAT_INC(udps_filtermcast);
+					INP_RUNLOCK(inp);
+					continue;
+				}
+			}
+			if (last != NULL) {
+				struct mbuf *n;
+
+				n = m_copy(m, 0, M_COPYALL);
+				up = intoudpcb(last);
+				if (up->u_tun_func == NULL) {
+					if (n != NULL)
+						udp_append(last, 
+						    ip, n, 
+						    iphlen +
+						    sizeof(struct udphdr),
+						    &udp_in);
+				} else {
+					/*
+					 * Engage the tunneling protocol we
+					 * will have to leave the info_lock
+					 * up, since we are hunting through
+					 * multiple UDP's.
+					 */
+
+					(*up->u_tun_func)(n, iphlen, last);
+				}
+				INP_RUNLOCK(last);
+			}
+			last = inp;
+			/*
+			 * Don't look for additional matches if this one does
+			 * not have either the SO_REUSEPORT or SO_REUSEADDR
+			 * socket options set.  This heuristic avoids
+			 * searching through all pcbs in the common case of a
+			 * non-shared port.  It assumes that an application
+			 * will never clear these options after setting them.
+			 */
+			if ((last->inp_socket->so_options &
+			    (SO_REUSEPORT|SO_REUSEADDR)) == 0)
+				break;
+		}
+
+		if (last == NULL) {
+			/*
+			 * No matching pcb found; discard datagram.  (No need
+			 * to send an ICMP Port Unreachable for a broadcast
+			 * or multicast datgram.)
+			 */
+			UDPSTAT_INC(udps_noportbcast);
+			goto badheadlocked;
+		}
+		up = intoudpcb(last);
+		if (up->u_tun_func == NULL) {
+			udp_append(last, ip, m, iphlen + sizeof(struct udphdr),
+			    &udp_in);
+		} else {
+			/*
+			 * Engage the tunneling protocol.
+			 */
+			(*up->u_tun_func)(m, iphlen, last);
+		}
+		INP_RUNLOCK(last);
+		INP_INFO_RUNLOCK(&V_udbinfo);
+		return;
+	}
+
+	/*
+	 * Locate pcb for datagram.
+	 */
+	inp = in_pcblookup_hash(&V_udbinfo, ip->ip_src, uh->uh_sport,
+	    ip->ip_dst, uh->uh_dport, 1, ifp);
+	if (inp == NULL) {
+		if (udp_log_in_vain) {
+			char buf[4*sizeof "123"];
+
+			strcpy(buf, inet_ntoa(ip->ip_dst));
+			log(LOG_INFO,
+			    "Connection attempt to UDP %s:%d from %s:%d\n",
+			    buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src),
+			    ntohs(uh->uh_sport));
+		}
+		UDPSTAT_INC(udps_noport);
+		if (m->m_flags & (M_BCAST | M_MCAST)) {
+			UDPSTAT_INC(udps_noportbcast);
+			goto badheadlocked;
+		}
+		if (V_udp_blackhole)
+			goto badheadlocked;
+		if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
+			goto badheadlocked;
+		*ip = save_ip;
+		ip->ip_len += iphlen;
+		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
+		INP_INFO_RUNLOCK(&V_udbinfo);
+		return;
+	}
+
+	/*
+	 * Check the minimum TTL for socket.
+	 */
+	INP_RLOCK(inp);
+	INP_INFO_RUNLOCK(&V_udbinfo);
+	if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) {
+		INP_RUNLOCK(inp);
+		goto badunlocked;
+	}
+	up = intoudpcb(inp);
+	if (up->u_tun_func == NULL) {
+		udp_append(inp, ip, m, iphlen + sizeof(struct udphdr), &udp_in);
+	} else {
+		/*
+		 * Engage the tunneling protocol.
+		 */
+
+		(*up->u_tun_func)(m, iphlen, inp);
+	}
+	INP_RUNLOCK(inp);
+	return;
+
+badheadlocked:
+	if (inp)
+		INP_RUNLOCK(inp);
+	INP_INFO_RUNLOCK(&V_udbinfo);
+badunlocked:
+	m_freem(m);
+}
+
+/*
+ * Notify a udp user of an asynchronous error; just wake up so that they can
+ * collect error status.
+ */
+struct inpcb *
+udp_notify(struct inpcb *inp, int errno)
+{
+
+	/*
+	 * While udp_ctlinput() always calls udp_notify() with a read lock
+	 * when invoking it directly, in_pcbnotifyall() currently uses write
+	 * locks due to sharing code with TCP.  For now, accept either a read
+	 * or a write lock, but a read lock is sufficient.
+	 */
+	INP_LOCK_ASSERT(inp);
+
+	inp->inp_socket->so_error = errno;
+	sorwakeup(inp->inp_socket);
+	sowwakeup(inp->inp_socket);
+	return (inp);
+}
+
+void
+udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
+{
+	struct ip *ip = vip;
+	struct udphdr *uh;
+	struct in_addr faddr;
+	struct inpcb *inp;
+
+	faddr = ((struct sockaddr_in *)sa)->sin_addr;
+	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
+		return;
+
+	/*
+	 * Redirects don't need to be handled up here.
+	 */
+	if (PRC_IS_REDIRECT(cmd))
+		return;
+
+	/*
+	 * Hostdead is ugly because it goes linearly through all PCBs.
+	 *
+	 * XXX: We never get this from ICMP, otherwise it makes an excellent
+	 * DoS attack on machines with many connections.
+	 */
+	if (cmd == PRC_HOSTDEAD)
+		ip = NULL;
+	else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
+		return;
+	if (ip != NULL) {
+		uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
+		INP_INFO_RLOCK(&V_udbinfo);
+		inp = in_pcblookup_hash(&V_udbinfo, faddr, uh->uh_dport,
+		    ip->ip_src, uh->uh_sport, 0, NULL);
+		if (inp != NULL) {
+			INP_RLOCK(inp);
+			if (inp->inp_socket != NULL) {
+				udp_notify(inp, inetctlerrmap[cmd]);
+			}
+			INP_RUNLOCK(inp);
+		}
+		INP_INFO_RUNLOCK(&V_udbinfo);
+	} else
+		in_pcbnotifyall(&V_udbinfo, faddr, inetctlerrmap[cmd],
+		    udp_notify);
+}
+
+static int
+udp_pcblist(SYSCTL_HANDLER_ARGS)
+{
+	int error, i, n;
+	struct inpcb *inp, **inp_list;
+	inp_gen_t gencnt;
+	struct xinpgen xig;
+
+	/*
+	 * The process of preparing the PCB list is too time-consuming and
+	 * resource-intensive to repeat twice on every request.
+	 */
+	if (req->oldptr == 0) {
+		n = V_udbinfo.ipi_count;
+		n += imax(n / 8, 10);
+		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
+		return (0);
+	}
+
+	if (req->newptr != 0)
+		return (EPERM);
+
+	/*
+	 * OK, now we're committed to doing something.
+	 */
+	INP_INFO_RLOCK(&V_udbinfo);
+	gencnt = V_udbinfo.ipi_gencnt;
+	n = V_udbinfo.ipi_count;
+	INP_INFO_RUNLOCK(&V_udbinfo);
+
+	error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
+		+ n * sizeof(struct xinpcb));
+	if (error != 0)
+		return (error);
+
+	xig.xig_len = sizeof xig;
+	xig.xig_count = n;
+	xig.xig_gen = gencnt;
+	xig.xig_sogen = so_gencnt;
+	error = SYSCTL_OUT(req, &xig, sizeof xig);
+	if (error)
+		return (error);
+
+	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
+	if (inp_list == 0)
+		return (ENOMEM);
+
+	INP_INFO_RLOCK(&V_udbinfo);
+	for (inp = LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n;
+	     inp = LIST_NEXT(inp, inp_list)) {
+		INP_WLOCK(inp);
+		if (inp->inp_gencnt <= gencnt &&
+		    cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
+			in_pcbref(inp);
+			inp_list[i++] = inp;
+		}
+		INP_WUNLOCK(inp);
+	}
+	INP_INFO_RUNLOCK(&V_udbinfo);
+	n = i;
+
+	error = 0;
+	for (i = 0; i < n; i++) {
+		inp = inp_list[i];
+		INP_RLOCK(inp);
+		if (inp->inp_gencnt <= gencnt) {
+			struct xinpcb xi;
+
+			bzero(&xi, sizeof(xi));
+			xi.xi_len = sizeof xi;
+			/* XXX should avoid extra copy */
+			bcopy(inp, &xi.xi_inp, sizeof *inp);
+			if (inp->inp_socket)
+				sotoxsocket(inp->inp_socket, &xi.xi_socket);
+			xi.xi_inp.inp_gencnt = inp->inp_gencnt;
+			INP_RUNLOCK(inp);
+			error = SYSCTL_OUT(req, &xi, sizeof xi);
+		} else
+			INP_RUNLOCK(inp);
+	}
+	INP_INFO_WLOCK(&V_udbinfo);
+	for (i = 0; i < n; i++) {
+		inp = inp_list[i];
+		INP_WLOCK(inp);
+		if (!in_pcbrele(inp))
+			INP_WUNLOCK(inp);
+	}
+	INP_INFO_WUNLOCK(&V_udbinfo);
+
+	if (!error) {
+		/*
+		 * Give the user an updated idea of our state.  If the
+		 * generation differs from what we told her before, she knows
+		 * that something happened while we were processing this
+		 * request, and it might be necessary to retry.
+		 */
+		INP_INFO_RLOCK(&V_udbinfo);
+		xig.xig_gen = V_udbinfo.ipi_gencnt;
+		xig.xig_sogen = so_gencnt;
+		xig.xig_count = V_udbinfo.ipi_count;
+		INP_INFO_RUNLOCK(&V_udbinfo);
+		error = SYSCTL_OUT(req, &xig, sizeof xig);
+	}
+	free(inp_list, M_TEMP);
+	return (error);
+}
+
+SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
+    udp_pcblist, "S,xinpcb", "List of active UDP sockets");
+
+static int
+udp_getcred(SYSCTL_HANDLER_ARGS)
+{
+	struct xucred xuc;
+	struct sockaddr_in addrs[2];
+	struct inpcb *inp;
+	int error;
+
+	error = priv_check(req->td, PRIV_NETINET_GETCRED);
+	if (error)
+		return (error);
+	error = SYSCTL_IN(req, addrs, sizeof(addrs));
+	if (error)
+		return (error);
+	INP_INFO_RLOCK(&V_udbinfo);
+	inp = in_pcblookup_hash(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
+				addrs[0].sin_addr, addrs[0].sin_port, 1, NULL);
+	if (inp != NULL) {
+		INP_RLOCK(inp);
+		INP_INFO_RUNLOCK(&V_udbinfo);
+		if (inp->inp_socket == NULL)
+			error = ENOENT;
+		if (error == 0)
+			error = cr_canseeinpcb(req->td->td_ucred, inp);
+		if (error == 0)
+			cru2x(inp->inp_cred, &xuc);
+		INP_RUNLOCK(inp);
+	} else {
+		INP_INFO_RUNLOCK(&V_udbinfo);
+		error = ENOENT;
+	}
+	if (error == 0)
+		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
+	return (error);
+}
+
+SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred,
+    CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
+    udp_getcred, "S,xucred", "Get the xucred of a UDP connection");
+
+int
+udp_ctloutput(struct socket *so, struct sockopt *sopt)
+{
+	int error = 0, optval;
+	struct inpcb *inp;
+#ifdef IPSEC_NAT_T
+	struct udpcb *up;
+#endif
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
+	INP_WLOCK(inp);
+	if (sopt->sopt_level != IPPROTO_UDP) {
+#ifdef INET6
+		if (INP_CHECK_SOCKAF(so, AF_INET6)) {
+			INP_WUNLOCK(inp);
+			error = ip6_ctloutput(so, sopt);
+		} else {
+#endif
+			INP_WUNLOCK(inp);
+			error = ip_ctloutput(so, sopt);
+#ifdef INET6
+		}
+#endif
+		return (error);
+	}
+
+	switch (sopt->sopt_dir) {
+	case SOPT_SET:
+		switch (sopt->sopt_name) {
+		case UDP_ENCAP:
+			INP_WUNLOCK(inp);
+			error = sooptcopyin(sopt, &optval, sizeof optval,
+					    sizeof optval);
+			if (error)
+				break;
+			inp = sotoinpcb(so);
+			KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
+			INP_WLOCK(inp);
+#ifdef IPSEC_NAT_T
+			up = intoudpcb(inp);
+			KASSERT(up != NULL, ("%s: up == NULL", __func__));
+#endif
+			switch (optval) {
+			case 0:
+				/* Clear all UDP encap. */
+#ifdef IPSEC_NAT_T
+				up->u_flags &= ~UF_ESPINUDP_ALL;
+#endif
+				break;
+#ifdef IPSEC_NAT_T
+			case UDP_ENCAP_ESPINUDP:
+			case UDP_ENCAP_ESPINUDP_NON_IKE:
+				up->u_flags &= ~UF_ESPINUDP_ALL;
+				if (optval == UDP_ENCAP_ESPINUDP)
+					up->u_flags |= UF_ESPINUDP;
+				else if (optval == UDP_ENCAP_ESPINUDP_NON_IKE)
+					up->u_flags |= UF_ESPINUDP_NON_IKE;
+				break;
+#endif
+			default:
+				error = EINVAL;
+				break;
+			}
+			INP_WUNLOCK(inp);
+			break;
+		default:
+			INP_WUNLOCK(inp);
+			error = ENOPROTOOPT;
+			break;
+		}
+		break;
+	case SOPT_GET:
+		switch (sopt->sopt_name) {
+#ifdef IPSEC_NAT_T
+		case UDP_ENCAP:
+			up = intoudpcb(inp);
+			KASSERT(up != NULL, ("%s: up == NULL", __func__));
+			optval = up->u_flags & UF_ESPINUDP_ALL;
+			INP_WUNLOCK(inp);
+			error = sooptcopyout(sopt, &optval, sizeof optval);
+			break;
+#endif
+		default:
+			INP_WUNLOCK(inp);
+			error = ENOPROTOOPT;
+			break;
+		}
+		break;
+	}	
+	return (error);
+}
+
+static int
+udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
+    struct mbuf *control, struct thread *td)
+{
+	struct udpiphdr *ui;
+	int len = m->m_pkthdr.len;
+	struct in_addr faddr, laddr;
+	struct cmsghdr *cm;
+	struct sockaddr_in *sin, src;
+	int error = 0;
+	int ipflags;
+	u_short fport, lport;
+	int unlock_udbinfo;
+
+	/*
+	 * udp_output() may need to temporarily bind or connect the current
+	 * inpcb.  As such, we don't know up front whether we will need the
+	 * pcbinfo lock or not.  Do any work to decide what is needed up
+	 * front before acquiring any locks.
+	 */
+	if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
+		if (control)
+			m_freem(control);
+		m_freem(m);
+		return (EMSGSIZE);
+	}
+
+	src.sin_family = 0;
+	if (control != NULL) {
+		/*
+		 * XXX: Currently, we assume all the optional information is
+		 * stored in a single mbuf.
+		 */
+		if (control->m_next) {
+			m_freem(control);
+			m_freem(m);
+			return (EINVAL);
+		}
+		for (; control->m_len > 0;
+		    control->m_data += CMSG_ALIGN(cm->cmsg_len),
+		    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
+			cm = mtod(control, struct cmsghdr *);
+			if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0
+			    || cm->cmsg_len > control->m_len) {
+				error = EINVAL;
+				break;
+			}
+			if (cm->cmsg_level != IPPROTO_IP)
+				continue;
+
+			switch (cm->cmsg_type) {
+			case IP_SENDSRCADDR:
+				if (cm->cmsg_len !=
+				    CMSG_LEN(sizeof(struct in_addr))) {
+					error = EINVAL;
+					break;
+				}
+				bzero(&src, sizeof(src));
+				src.sin_family = AF_INET;
+				src.sin_len = sizeof(src);
+				src.sin_port = inp->inp_lport;
+				src.sin_addr =
+				    *(struct in_addr *)CMSG_DATA(cm);
+				break;
+
+			default:
+				error = ENOPROTOOPT;
+				break;
+			}
+			if (error)
+				break;
+		}
+		m_freem(control);
+	}
+	if (error) {
+		m_freem(m);
+		return (error);
+	}
+
+	/*
+	 * Depending on whether or not the application has bound or connected
+	 * the socket, we may have to do varying levels of work.  The optimal
+	 * case is for a connected UDP socket, as a global lock isn't
+	 * required at all.
+	 *
+	 * In order to decide which we need, we require stability of the
+	 * inpcb binding, which we ensure by acquiring a read lock on the
+	 * inpcb.  This doesn't strictly follow the lock order, so we play
+	 * the trylock and retry game; note that we may end up with more
+	 * conservative locks than required the second time around, so later
+	 * assertions have to accept that.  Further analysis of the number of
+	 * misses under contention is required.
+	 */
+	sin = (struct sockaddr_in *)addr;
+	INP_RLOCK(inp);
+	if (sin != NULL &&
+	    (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
+		INP_RUNLOCK(inp);
+		INP_INFO_WLOCK(&V_udbinfo);
+		INP_WLOCK(inp);
+		unlock_udbinfo = 2;
+	} else if ((sin != NULL && (
+	    (sin->sin_addr.s_addr == INADDR_ANY) ||
+	    (sin->sin_addr.s_addr == INADDR_BROADCAST) ||
+	    (inp->inp_laddr.s_addr == INADDR_ANY) ||
+	    (inp->inp_lport == 0))) ||
+	    (src.sin_family == AF_INET)) {
+		if (!INP_INFO_TRY_RLOCK(&V_udbinfo)) {
+			INP_RUNLOCK(inp);
+			INP_INFO_RLOCK(&V_udbinfo);
+			INP_RLOCK(inp);
+		}
+		unlock_udbinfo = 1;
+	} else
+		unlock_udbinfo = 0;
+
+	/*
+	 * If the IP_SENDSRCADDR control message was specified, override the
+	 * source address for this datagram.  Its use is invalidated if the
+	 * address thus specified is incomplete or clobbers other inpcbs.
+	 */
+	laddr = inp->inp_laddr;
+	lport = inp->inp_lport;
+	if (src.sin_family == AF_INET) {
+		INP_INFO_LOCK_ASSERT(&V_udbinfo);
+		if ((lport == 0) ||
+		    (laddr.s_addr == INADDR_ANY &&
+		     src.sin_addr.s_addr == INADDR_ANY)) {
+			error = EINVAL;
+			goto release;
+		}
+		error = in_pcbbind_setup(inp, (struct sockaddr *)&src,
+		    &laddr.s_addr, &lport, td->td_ucred);
+		if (error)
+			goto release;
+	}
+
+	/*
+	 * If a UDP socket has been connected, then a local address/port will
+	 * have been selected and bound.
+	 *
+	 * If a UDP socket has not been connected to, then an explicit
+	 * destination address must be used, in which case a local
+	 * address/port may not have been selected and bound.
+	 */
+	if (sin != NULL) {
+		INP_LOCK_ASSERT(inp);
+		if (inp->inp_faddr.s_addr != INADDR_ANY) {
+			error = EISCONN;
+			goto release;
+		}
+
+		/*
+		 * Jail may rewrite the destination address, so let it do
+		 * that before we use it.
+		 */
+		error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
+		if (error)
+			goto release;
+
+		/*
+		 * If a local address or port hasn't yet been selected, or if
+		 * the destination address needs to be rewritten due to using
+		 * a special INADDR_ constant, invoke in_pcbconnect_setup()
+		 * to do the heavy lifting.  Once a port is selected, we
+		 * commit the binding back to the socket; we also commit the
+		 * binding of the address if in jail.
+		 *
+		 * If we already have a valid binding and we're not
+		 * requesting a destination address rewrite, use a fast path.
+		 */
+		if (inp->inp_laddr.s_addr == INADDR_ANY ||
+		    inp->inp_lport == 0 ||
+		    sin->sin_addr.s_addr == INADDR_ANY ||
+		    sin->sin_addr.s_addr == INADDR_BROADCAST) {
+			INP_INFO_LOCK_ASSERT(&V_udbinfo);
+			error = in_pcbconnect_setup(inp, addr, &laddr.s_addr,
+			    &lport, &faddr.s_addr, &fport, NULL,
+			    td->td_ucred);
+			if (error)
+				goto release;
+
+			/*
+			 * XXXRW: Why not commit the port if the address is
+			 * !INADDR_ANY?
+			 */
+			/* Commit the local port if newly assigned. */
+			if (inp->inp_laddr.s_addr == INADDR_ANY &&
+			    inp->inp_lport == 0) {
+				INP_INFO_WLOCK_ASSERT(&V_udbinfo);
+				INP_WLOCK_ASSERT(inp);
+				/*
+				 * Remember addr if jailed, to prevent
+				 * rebinding.
+				 */
+				if (prison_flag(td->td_ucred, PR_IP4))
+					inp->inp_laddr = laddr;
+				inp->inp_lport = lport;
+				if (in_pcbinshash(inp) != 0) {
+					inp->inp_lport = 0;
+					error = EAGAIN;
+					goto release;
+				}
+				inp->inp_flags |= INP_ANONPORT;
+			}
+		} else {
+			faddr = sin->sin_addr;
+			fport = sin->sin_port;
+		}
+	} else {
+		INP_LOCK_ASSERT(inp);
+		faddr = inp->inp_faddr;
+		fport = inp->inp_fport;
+		if (faddr.s_addr == INADDR_ANY) {
+			error = ENOTCONN;
+			goto release;
+		}
+	}
+
+	/*
+	 * Calculate data length and get a mbuf for UDP, IP, and possible
+	 * link-layer headers.  Immediate slide the data pointer back forward
+	 * since we won't use that space at this layer.
+	 */
+	M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_DONTWAIT);
+	if (m == NULL) {
+		error = ENOBUFS;
+		goto release;
+	}
+	m->m_data += max_linkhdr;
+	m->m_len -= max_linkhdr;
+	m->m_pkthdr.len -= max_linkhdr;
+
+	/*
+	 * Fill in mbuf with extended UDP header and addresses and length put
+	 * into network format.
+	 */
+	ui = mtod(m, struct udpiphdr *);
+	bzero(ui->ui_x1, sizeof(ui->ui_x1));	/* XXX still needed? */
+	ui->ui_pr = IPPROTO_UDP;
+	ui->ui_src = laddr;
+	ui->ui_dst = faddr;
+	ui->ui_sport = lport;
+	ui->ui_dport = fport;
+	ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
+
+	/*
+	 * Set the Don't Fragment bit in the IP header.
+	 */
+	if (inp->inp_flags & INP_DONTFRAG) {
+		struct ip *ip;
+
+		ip = (struct ip *)&ui->ui_i;
+		ip->ip_off |= IP_DF;
+	}
+
+	ipflags = 0;
+	if (inp->inp_socket->so_options & SO_DONTROUTE)
+		ipflags |= IP_ROUTETOIF;
+	if (inp->inp_socket->so_options & SO_BROADCAST)
+		ipflags |= IP_ALLOWBROADCAST;
+	if (inp->inp_flags & INP_ONESBCAST)
+		ipflags |= IP_SENDONES;
+
+#ifdef MAC
+	mac_inpcb_create_mbuf(inp, m);
+#endif
+
+	/*
+	 * Set up checksum and output datagram.
+	 */
+	if (udp_cksum) {
+		if (inp->inp_flags & INP_ONESBCAST)
+			faddr.s_addr = INADDR_BROADCAST;
+		ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr,
+		    htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP));
+		m->m_pkthdr.csum_flags = CSUM_UDP;
+		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+	} else
+		ui->ui_sum = 0;
+	((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len;
+	((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl;	/* XXX */
+	((struct ip *)ui)->ip_tos = inp->inp_ip_tos;	/* XXX */
+	UDPSTAT_INC(udps_opackets);
+
+	if (unlock_udbinfo == 2)
+		INP_INFO_WUNLOCK(&V_udbinfo);
+	else if (unlock_udbinfo == 1)
+		INP_INFO_RUNLOCK(&V_udbinfo);
+	error = ip_output(m, inp->inp_options, NULL, ipflags,
+	    inp->inp_moptions, inp);
+	if (unlock_udbinfo == 2)
+		INP_WUNLOCK(inp);
+	else
+		INP_RUNLOCK(inp);
+	return (error);
+
+release:
+	if (unlock_udbinfo == 2) {
+		INP_WUNLOCK(inp);
+		INP_INFO_WUNLOCK(&V_udbinfo);
+	} else if (unlock_udbinfo == 1) {
+		INP_RUNLOCK(inp);
+		INP_INFO_RUNLOCK(&V_udbinfo);
+	} else
+		INP_RUNLOCK(inp);
+	m_freem(m);
+	return (error);
+}
+
+
+#if defined(IPSEC) && defined(IPSEC_NAT_T)
+#ifdef INET
+/*
+ * Potentially decap ESP in UDP frame.  Check for an ESP header
+ * and optional marker; if present, strip the UDP header and
+ * push the result through IPSec.
+ *
+ * Returns mbuf to be processed (potentially re-allocated) or
+ * NULL if consumed and/or processed.
+ */
+static struct mbuf *
+udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off)
+{
+	size_t minlen, payload, skip, iphlen;
+	caddr_t data;
+	struct udpcb *up;
+	struct m_tag *tag;
+	struct udphdr *udphdr;
+	struct ip *ip;
+
+	INP_RLOCK_ASSERT(inp);
+
+	/* 
+	 * Pull up data so the longest case is contiguous:
+	 *    IP/UDP hdr + non ESP marker + ESP hdr.
+	 */
+	minlen = off + sizeof(uint64_t) + sizeof(struct esp);
+	if (minlen > m->m_pkthdr.len)
+		minlen = m->m_pkthdr.len;
+	if ((m = m_pullup(m, minlen)) == NULL) {
+		V_ipsec4stat.in_inval++;
+		return (NULL);		/* Bypass caller processing. */
+	}
+	data = mtod(m, caddr_t);	/* Points to ip header. */
+	payload = m->m_len - off;	/* Size of payload. */
+
+	if (payload == 1 && data[off] == '\xff')
+		return (m);		/* NB: keepalive packet, no decap. */
+
+	up = intoudpcb(inp);
+	KASSERT(up != NULL, ("%s: udpcb NULL", __func__));
+	KASSERT((up->u_flags & UF_ESPINUDP_ALL) != 0,
+	    ("u_flags 0x%x", up->u_flags));
+
+	/* 
+	 * Check that the payload is large enough to hold an
+	 * ESP header and compute the amount of data to remove.
+	 *
+	 * NB: the caller has already done a pullup for us.
+	 * XXX can we assume alignment and eliminate bcopys?
+	 */
+	if (up->u_flags & UF_ESPINUDP_NON_IKE) {
+		/*
+		 * draft-ietf-ipsec-nat-t-ike-0[01].txt and
+		 * draft-ietf-ipsec-udp-encaps-(00/)01.txt, ignoring
+		 * possible AH mode non-IKE marker+non-ESP marker
+		 * from draft-ietf-ipsec-udp-encaps-00.txt.
+		 */
+		uint64_t marker;
+
+		if (payload <= sizeof(uint64_t) + sizeof(struct esp))
+			return (m);	/* NB: no decap. */
+		bcopy(data + off, &marker, sizeof(uint64_t));
+		if (marker != 0)	/* Non-IKE marker. */
+			return (m);	/* NB: no decap. */
+		skip = sizeof(uint64_t) + sizeof(struct udphdr);
+	} else {
+		uint32_t spi;
+
+		if (payload <= sizeof(struct esp)) {
+			V_ipsec4stat.in_inval++;
+			m_freem(m);
+			return (NULL);	/* Discard. */
+		}
+		bcopy(data + off, &spi, sizeof(uint32_t));
+		if (spi == 0)		/* Non-ESP marker. */
+			return (m);	/* NB: no decap. */
+		skip = sizeof(struct udphdr);
+	}
+
+	/*
+	 * Setup a PACKET_TAG_IPSEC_NAT_T_PORT tag to remember
+	 * the UDP ports. This is required if we want to select
+	 * the right SPD for multiple hosts behind same NAT.
+	 *
+	 * NB: ports are maintained in network byte order everywhere
+	 *     in the NAT-T code.
+	 */
+	tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS,
+		2 * sizeof(uint16_t), M_NOWAIT);
+	if (tag == NULL) {
+		V_ipsec4stat.in_nomem++;
+		m_freem(m);
+		return (NULL);		/* Discard. */
+	}
+	iphlen = off - sizeof(struct udphdr);
+	udphdr = (struct udphdr *)(data + iphlen);
+	((uint16_t *)(tag + 1))[0] = udphdr->uh_sport;
+	((uint16_t *)(tag + 1))[1] = udphdr->uh_dport;
+	m_tag_prepend(m, tag);
+
+	/*
+	 * Remove the UDP header (and possibly the non ESP marker)
+	 * IP header length is iphlen
+	 * Before:
+	 *   <--- off --->
+	 *   +----+------+-----+
+	 *   | IP |  UDP | ESP |
+	 *   +----+------+-----+
+	 *        <-skip->
+	 * After:
+	 *          +----+-----+
+	 *          | IP | ESP |
+	 *          +----+-----+
+	 *   <-skip->
+	 */
+	ovbcopy(data, data + skip, iphlen);
+	m_adj(m, skip);
+
+	ip = mtod(m, struct ip *);
+	ip->ip_len -= skip;
+	ip->ip_p = IPPROTO_ESP;
+
+	/*
+	 * We cannot yet update the cksums so clear any
+	 * h/w cksum flags as they are no longer valid.
+	 */
+	if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)
+		m->m_pkthdr.csum_flags &= ~(CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
+
+	(void) ipsec4_common_input(m, iphlen, ip->ip_p);
+	return (NULL);			/* NB: consumed, bypass processing. */
+}
+#endif /* INET */
+#endif /* defined(IPSEC) && defined(IPSEC_NAT_T) */
+
+static void
+udp_abort(struct socket *so)
+{
+	struct inpcb *inp;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("udp_abort: inp == NULL"));
+	INP_INFO_WLOCK(&V_udbinfo);
+	INP_WLOCK(inp);
+	if (inp->inp_faddr.s_addr != INADDR_ANY) {
+		in_pcbdisconnect(inp);
+		inp->inp_laddr.s_addr = INADDR_ANY;
+		soisdisconnected(so);
+	}
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_udbinfo);
+}
+
+static int
+udp_attach(struct socket *so, int proto, struct thread *td)
+{
+	struct inpcb *inp;
+	int error;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp == NULL, ("udp_attach: inp != NULL"));
+	error = soreserve(so, udp_sendspace, udp_recvspace);
+	if (error)
+		return (error);
+	INP_INFO_WLOCK(&V_udbinfo);
+	error = in_pcballoc(so, &V_udbinfo);
+	if (error) {
+		INP_INFO_WUNLOCK(&V_udbinfo);
+		return (error);
+	}
+
+	inp = sotoinpcb(so);
+	inp->inp_vflag |= INP_IPV4;
+	inp->inp_ip_ttl = V_ip_defttl;
+
+	error = udp_newudpcb(inp);
+	if (error) {
+		in_pcbdetach(inp);
+		in_pcbfree(inp);
+		INP_INFO_WUNLOCK(&V_udbinfo);
+		return (error);
+	}
+
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_udbinfo);
+	return (0);
+}
+
+int
+udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f)
+{
+	struct inpcb *inp;
+	struct udpcb *up;
+
+	KASSERT(so->so_type == SOCK_DGRAM,
+	    ("udp_set_kernel_tunneling: !dgram"));
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("udp_set_kernel_tunneling: inp == NULL"));
+	INP_WLOCK(inp);
+	up = intoudpcb(inp);
+	if (up->u_tun_func != NULL) {
+		INP_WUNLOCK(inp);
+		return (EBUSY);
+	}
+	up->u_tun_func = f;
+	INP_WUNLOCK(inp);
+	return (0);
+}
+
+static int
+udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+	struct inpcb *inp;
+	int error;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("udp_bind: inp == NULL"));
+	INP_INFO_WLOCK(&V_udbinfo);
+	INP_WLOCK(inp);
+	error = in_pcbbind(inp, nam, td->td_ucred);
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_udbinfo);
+	return (error);
+}
+
+static void
+udp_close(struct socket *so)
+{
+	struct inpcb *inp;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("udp_close: inp == NULL"));
+	INP_INFO_WLOCK(&V_udbinfo);
+	INP_WLOCK(inp);
+	if (inp->inp_faddr.s_addr != INADDR_ANY) {
+		in_pcbdisconnect(inp);
+		inp->inp_laddr.s_addr = INADDR_ANY;
+		soisdisconnected(so);
+	}
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_udbinfo);
+}
+
+static int
+udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+	struct inpcb *inp;
+	int error;
+	struct sockaddr_in *sin;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("udp_connect: inp == NULL"));
+	INP_INFO_WLOCK(&V_udbinfo);
+	INP_WLOCK(inp);
+	if (inp->inp_faddr.s_addr != INADDR_ANY) {
+		INP_WUNLOCK(inp);
+		INP_INFO_WUNLOCK(&V_udbinfo);
+		return (EISCONN);
+	}
+	sin = (struct sockaddr_in *)nam;
+	error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
+	if (error != 0) {
+		INP_WUNLOCK(inp);
+		INP_INFO_WUNLOCK(&V_udbinfo);
+		return (error);
+	}
+	error = in_pcbconnect(inp, nam, td->td_ucred);
+	if (error == 0)
+		soisconnected(so);
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_udbinfo);
+	return (error);
+}
+
+static void
+udp_detach(struct socket *so)
+{
+	struct inpcb *inp;
+	struct udpcb *up;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("udp_detach: inp == NULL"));
+	KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
+	    ("udp_detach: not disconnected"));
+	INP_INFO_WLOCK(&V_udbinfo);
+	INP_WLOCK(inp);
+	up = intoudpcb(inp);
+	KASSERT(up != NULL, ("%s: up == NULL", __func__));
+	inp->inp_ppcb = NULL;
+	in_pcbdetach(inp);
+	in_pcbfree(inp);
+	INP_INFO_WUNLOCK(&V_udbinfo);
+	udp_discardcb(up);
+}
+
+static int
+udp_disconnect(struct socket *so)
+{
+	struct inpcb *inp;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("udp_disconnect: inp == NULL"));
+	INP_INFO_WLOCK(&V_udbinfo);
+	INP_WLOCK(inp);
+	if (inp->inp_faddr.s_addr == INADDR_ANY) {
+		INP_WUNLOCK(inp);
+		INP_INFO_WUNLOCK(&V_udbinfo);
+		return (ENOTCONN);
+	}
+
+	in_pcbdisconnect(inp);
+	inp->inp_laddr.s_addr = INADDR_ANY;
+	SOCK_LOCK(so);
+	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
+	SOCK_UNLOCK(so);
+	INP_WUNLOCK(inp);
+	INP_INFO_WUNLOCK(&V_udbinfo);
+	return (0);
+}
+
+static int
+udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
+    struct mbuf *control, struct thread *td)
+{
+	struct inpcb *inp;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("udp_send: inp == NULL"));
+	return (udp_output(inp, m, addr, control, td));
+}
+
+int
+udp_shutdown(struct socket *so)
+{
+	struct inpcb *inp;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("udp_shutdown: inp == NULL"));
+	INP_WLOCK(inp);
+	socantsendmore(so);
+	INP_WUNLOCK(inp);
+	return (0);
+}
+
+struct pr_usrreqs udp_usrreqs = {
+	.pru_abort =		udp_abort,
+	.pru_attach =		udp_attach,
+	.pru_bind =		udp_bind,
+	.pru_connect =		udp_connect,
+	.pru_control =		in_control,
+	.pru_detach =		udp_detach,
+	.pru_disconnect =	udp_disconnect,
+	.pru_peeraddr =		in_getpeeraddr,
+	.pru_send =		udp_send,
+	.pru_soreceive =	soreceive_dgram,
+	.pru_sosend =		sosend_dgram,
+	.pru_shutdown =		udp_shutdown,
+	.pru_sockaddr =		in_getsockaddr,
+	.pru_sosetlabel =	in_pcbsosetlabel,
+	.pru_close =		udp_close,
+};
diff --git a/freebsd/sys/netinet/udp_var.h b/freebsd/sys/netinet/udp_var.h
new file mode 100644
index 00000000..0bff6ea9
--- /dev/null
+++ b/freebsd/sys/netinet/udp_var.h
@@ -0,0 +1,161 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)udp_var.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_UDP_VAR_HH_
+#define	_NETINET_UDP_VAR_HH_
+
+/*
+ * UDP kernel structures and variables.
+ */
+struct udpiphdr {
+	struct ipovly	ui_i;		/* overlaid ip structure */
+	struct udphdr	ui_u;		/* udp header */
+};
+#define	ui_x1		ui_i.ih_x1
+#define	ui_pr		ui_i.ih_pr
+#define	ui_len		ui_i.ih_len
+#define	ui_src		ui_i.ih_src
+#define	ui_dst		ui_i.ih_dst
+#define	ui_sport	ui_u.uh_sport
+#define	ui_dport	ui_u.uh_dport
+#define	ui_ulen		ui_u.uh_ulen
+#define	ui_sum		ui_u.uh_sum
+
+typedef void(*udp_tun_func_t)(struct mbuf *, int off, struct inpcb *);
+
+/*
+ * UDP control block; one per udp.
+ */
+struct udpcb {
+	udp_tun_func_t	u_tun_func;	/* UDP kernel tunneling callback. */
+	u_int		u_flags;	/* Generic UDP flags. */
+};
+
+#define	intoudpcb(ip)	((struct udpcb *)(ip)->inp_ppcb)
+#define	sotoudpcb(so)	(intoudpcb(sotoinpcb(so)))
+
+				/* IPsec: ESP in UDP tunneling: */
+#define	UF_ESPINUDP_NON_IKE	0x00000001	/* w/ non-IKE marker .. */
+	/* .. per draft-ietf-ipsec-nat-t-ike-0[01],
+	 * and draft-ietf-ipsec-udp-encaps-(00/)01.txt */
+#define	UF_ESPINUDP		0x00000002	/* w/ non-ESP marker. */
+
+struct udpstat {
+				/* input statistics: */
+	u_long	udps_ipackets;		/* total input packets */
+	u_long	udps_hdrops;		/* packet shorter than header */
+	u_long	udps_badsum;		/* checksum error */
+	u_long	udps_nosum;		/* no checksum */
+	u_long	udps_badlen;		/* data length larger than packet */
+	u_long	udps_noport;		/* no socket on port */
+	u_long	udps_noportbcast;	/* of above, arrived as broadcast */
+	u_long	udps_fullsock;		/* not delivered, input socket full */
+	u_long	udpps_pcbcachemiss;	/* input packets missing pcb cache */
+	u_long	udpps_pcbhashmiss;	/* input packets not for hashed pcb */
+				/* output statistics: */
+	u_long	udps_opackets;		/* total output packets */
+	u_long	udps_fastout;		/* output packets on fast path */
+	/* of no socket on port, arrived as multicast */
+	u_long	udps_noportmcast;
+	u_long	udps_filtermcast;	/* blocked by multicast filter */
+};
+
+#ifdef _KERNEL
+/*
+ * In-kernel consumers can use these accessor macros directly to update
+ * stats.
+ */
+#define	UDPSTAT_ADD(name, val)	V_udpstat.name += (val)
+#define	UDPSTAT_INC(name)	UDPSTAT_ADD(name, 1)
+
+/*
+ * Kernel module consumers must use this accessor macro.
+ */
+void	kmod_udpstat_inc(int statnum);
+#define	KMOD_UDPSTAT_INC(name)						\
+	kmod_udpstat_inc(offsetof(struct udpstat, name) / sizeof(u_long))
+#endif
+
+/*
+ * Names for UDP sysctl objects.
+ */
+#define	UDPCTL_CHECKSUM		1	/* checksum UDP packets */
+#define	UDPCTL_STATS		2	/* statistics (read-only) */
+#define	UDPCTL_MAXDGRAM		3	/* max datagram size */
+#define	UDPCTL_RECVSPACE	4	/* default receive buffer space */
+#define	UDPCTL_PCBLIST		5	/* list of PCBs for UDP sockets */
+#define	UDPCTL_MAXID		6
+
+#define	UDPCTL_NAMES	{						\
+	{ 0, 0 },							\
+	{ "checksum", CTLTYPE_INT },					\
+	{ "stats", CTLTYPE_STRUCT },					\
+	{ "maxdgram", CTLTYPE_INT },					\
+	{ "recvspace", CTLTYPE_INT },					\
+	{ "pcblist", CTLTYPE_STRUCT },					\
+}
+
+#ifdef _KERNEL
+SYSCTL_DECL(_net_inet_udp);
+
+extern struct pr_usrreqs	udp_usrreqs;
+VNET_DECLARE(struct inpcbhead, udb);
+VNET_DECLARE(struct inpcbinfo, udbinfo);
+#define	V_udb			VNET(udb)
+#define	V_udbinfo		VNET(udbinfo)
+
+extern u_long			udp_sendspace;
+extern u_long			udp_recvspace;
+VNET_DECLARE(struct udpstat, udpstat);
+VNET_DECLARE(int, udp_blackhole);
+#define	V_udpstat		VNET(udpstat)
+#define	V_udp_blackhole		VNET(udp_blackhole)
+extern int			udp_log_in_vain;
+
+int		 udp_newudpcb(struct inpcb *);
+void		 udp_discardcb(struct udpcb *);
+
+void		 udp_ctlinput(int, struct sockaddr *, void *);
+int	 	 udp_ctloutput(struct socket *, struct sockopt *);
+void		 udp_init(void);
+#ifdef VIMAGE
+void		 udp_destroy(void);
+#endif
+void		 udp_input(struct mbuf *, int);
+struct inpcb	*udp_notify(struct inpcb *inp, int errno);
+int		 udp_shutdown(struct socket *so);
+
+int udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f);
+#endif
+
+#endif
author	Sebastian Huber <sebastian.huber@embedded-brains.de>	2013-10-09 22:42:09 +0200
committer	Sebastian Huber <sebastian.huber@embedded-brains.de>	2013-10-10 09:06:58 +0200
commit	bceabc95c1c85d793200446fa85f1ddc6313ea29 (patch)
tree	973c8bd8deca9fd69913f2895cc91e0e6114d46c /freebsd/sys/netinet
parent	Add FreeBSD sources as a submodule (diff)
download	rtems-libbsd-bceabc95c1c85d793200446fa85f1ddc6313ea29.tar.bz2