Update to FreeBSD head 2016-08-23

Git mirror commit 9fe7c416e6abb28b1398fd3e5687099846800cfd.
author: Sebastian Huber <sebastian.huber@embedded-brains.de> 2016-10-07 15:10:20 +0200
committer: Sebastian Huber <sebastian.huber@embedded-brains.de> 2017-01-10 09:53:31 +0100
commit: c40e45b75eb76d79a05c7fa85c1fa9b5c728a12f (patch)
tree: ad4f2519067709f00ab98b3c591186c26dc3a21f /freebsd/sys/net
parent: userspace-header-gen.py: Simplify program ports (diff)
download: rtems-libbsd-c40e45b75eb76d79a05c7fa85c1fa9b5c728a12f.tar.bz2
104 files changed, 29683 insertions, 8933 deletions
diff --git a/freebsd/sys/net/altq/altq.h b/freebsd/sys/net/altq/altq.h
new file mode 100644
index 00000000..5d7eab8a
--- /dev/null
+++ b/freebsd/sys/net/altq/altq.h
@@ -0,0 +1,206 @@
+/*-
+ * Copyright (C) 1998-2003
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq.h,v 1.10 2003/07/10 12:07:47 kjc Exp $
+ * $FreeBSD$
+ */
+#ifndef _ALTQ_ALTQ_H_
+#define	_ALTQ_ALTQ_H_
+
+#if 0
+/*
+ * allow altq-3 (altqd(8) and /dev/altq) to coexist with the new pf-based altq.
+ * altq3 is mainly for research experiments. pf-based altq is for daily use.
+ */
+#define ALTQ3_COMPAT		/* for compatibility with altq-3 */
+#define ALTQ3_CLFIER_COMPAT	/* for compatibility with altq-3 classifier */
+#endif
+
+#ifdef ALTQ3_COMPAT
+#include <rtems/bsd/sys/param.h>
+#include <sys/ioccom.h>
+#include <sys/queue.h>
+#include <netinet/in.h>
+
+#ifndef IFNAMSIZ
+#define	IFNAMSIZ	16
+#endif
+#endif /* ALTQ3_COMPAT */
+
+/* altq discipline type */
+#define	ALTQT_NONE		0	/* reserved */
+#define	ALTQT_CBQ		1	/* cbq */
+#define	ALTQT_WFQ		2	/* wfq */
+#define	ALTQT_AFMAP		3	/* afmap */
+#define	ALTQT_FIFOQ		4	/* fifoq */
+#define	ALTQT_RED		5	/* red */
+#define	ALTQT_RIO		6	/* rio */
+#define	ALTQT_LOCALQ		7	/* local use */
+#define	ALTQT_HFSC		8	/* hfsc */
+#define	ALTQT_CDNR		9	/* traffic conditioner */
+#define	ALTQT_BLUE		10	/* blue */
+#define	ALTQT_PRIQ		11	/* priority queue */
+#define	ALTQT_JOBS		12	/* JoBS */
+#define	ALTQT_FAIRQ		13	/* fairq */
+#define	ALTQT_CODEL		14      /* CoDel */
+#define	ALTQT_MAX		15	/* should be max discipline type + 1 */
+
+#ifdef ALTQ3_COMPAT
+struct	altqreq {
+	char	ifname[IFNAMSIZ];	/* if name, e.g. "en0" */
+	u_long	arg;			/* request-specific argument */
+};
+#endif
+
+/* simple token backet meter profile */
+struct	tb_profile {
+	u_int	rate;	/* rate in bit-per-sec */
+	u_int	depth;	/* depth in bytes */
+};
+
+#ifdef ALTQ3_COMPAT
+struct	tbrreq {
+	char	ifname[IFNAMSIZ];	/* if name, e.g. "en0" */
+	struct	tb_profile tb_prof;	/* token bucket profile */
+};
+
+#ifdef ALTQ3_CLFIER_COMPAT
+/*
+ * common network flow info structure
+ */
+struct flowinfo {
+	u_char		fi_len;		/* total length */
+	u_char		fi_family;	/* address family */
+	u_int8_t	fi_data[46];	/* actually longer; address family
+					   specific flow info. */
+};
+
+/*
+ * flow info structure for internet protocol family.
+ * (currently this is the only protocol family supported)
+ */
+struct flowinfo_in {
+	u_char		fi_len;		/* sizeof(struct flowinfo_in) */
+	u_char		fi_family;	/* AF_INET */
+	u_int8_t	fi_proto;	/* IPPROTO_XXX */
+	u_int8_t	fi_tos;		/* type-of-service */
+	struct in_addr	fi_dst;		/* dest address */
+	struct in_addr	fi_src;		/* src address */
+	u_int16_t	fi_dport;	/* dest port */
+	u_int16_t	fi_sport;	/* src port */
+	u_int32_t	fi_gpi;		/* generalized port id for ipsec */
+	u_int8_t	_pad[28];	/* make the size equal to
+					   flowinfo_in6 */
+};
+
+#ifdef SIN6_LEN
+struct flowinfo_in6 {
+	u_char		fi6_len;	/* sizeof(struct flowinfo_in6) */
+	u_char		fi6_family;	/* AF_INET6 */
+	u_int8_t	fi6_proto;	/* IPPROTO_XXX */
+	u_int8_t	fi6_tclass;	/* traffic class */
+	u_int32_t	fi6_flowlabel;	/* ipv6 flowlabel */
+	u_int16_t	fi6_dport;	/* dest port */
+	u_int16_t	fi6_sport;	/* src port */
+	u_int32_t	fi6_gpi;	/* generalized port id */
+	struct in6_addr fi6_dst;	/* dest address */
+	struct in6_addr fi6_src;	/* src address */
+};
+#endif /* INET6 */
+
+/*
+ * flow filters for AF_INET and AF_INET6
+ */
+struct flow_filter {
+	int			ff_ruleno;
+	struct flowinfo_in	ff_flow;
+	struct {
+		struct in_addr	mask_dst;
+		struct in_addr	mask_src;
+		u_int8_t	mask_tos;
+		u_int8_t	_pad[3];
+	} ff_mask;
+	u_int8_t _pad2[24];	/* make the size equal to flow_filter6 */
+};
+
+#ifdef SIN6_LEN
+struct flow_filter6 {
+	int			ff_ruleno;
+	struct flowinfo_in6	ff_flow6;
+	struct {
+		struct in6_addr	mask6_dst;
+		struct in6_addr	mask6_src;
+		u_int8_t	mask6_tclass;
+		u_int8_t	_pad[3];
+	} ff_mask6;
+};
+#endif /* INET6 */
+#endif /* ALTQ3_CLFIER_COMPAT */
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * generic packet counter
+ */
+struct pktcntr {
+	u_int64_t	packets;
+	u_int64_t	bytes;
+};
+
+#define	PKTCNTR_ADD(cntr, len)	\
+	do { (cntr)->packets++; (cntr)->bytes += len; } while (/*CONSTCOND*/ 0)
+
+#ifdef ALTQ3_COMPAT
+/*
+ * altq related ioctls
+ */
+#define	ALTQGTYPE	_IOWR('q', 0, struct altqreq)	/* get queue type */
+#if 0
+/*
+ * these ioctls are currently discipline-specific but could be shared
+ * in the future.
+ */
+#define	ALTQATTACH	_IOW('q', 1, struct altqreq)	/* attach discipline */
+#define	ALTQDETACH	_IOW('q', 2, struct altqreq)	/* detach discipline */
+#define	ALTQENABLE	_IOW('q', 3, struct altqreq)	/* enable discipline */
+#define	ALTQDISABLE	_IOW('q', 4, struct altqreq)	/* disable discipline*/
+#define	ALTQCLEAR	_IOW('q', 5, struct altqreq)	/* (re)initialize */
+#define	ALTQCONFIG	_IOWR('q', 6, struct altqreq)	/* set config params */
+#define	ALTQADDCLASS	_IOWR('q', 7, struct altqreq)	/* add a class */
+#define	ALTQMODCLASS	_IOWR('q', 8, struct altqreq)	/* modify a class */
+#define	ALTQDELCLASS	_IOWR('q', 9, struct altqreq)	/* delete a class */
+#define	ALTQADDFILTER	_IOWR('q', 10, struct altqreq)	/* add a filter */
+#define	ALTQDELFILTER	_IOWR('q', 11, struct altqreq)	/* delete a filter */
+#define	ALTQGETSTATS	_IOWR('q', 12, struct altqreq)	/* get statistics */
+#define	ALTQGETCNTR	_IOWR('q', 13, struct altqreq)	/* get a pkt counter */
+#endif /* 0 */
+#define	ALTQTBRSET	_IOW('q', 14, struct tbrreq)	/* set tb regulator */
+#define	ALTQTBRGET	_IOWR('q', 15, struct tbrreq)	/* get tb regulator */
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+#include <net/altq/altq_var.h>
+#endif
+
+#endif /* _ALTQ_ALTQ_H_ */
diff --git a/freebsd/sys/net/altq/altq_cbq.c b/freebsd/sys/net/altq/altq_cbq.c
new file mode 100644
index 00000000..b8593fd6
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_cbq.c
@@ -0,0 +1,1171 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the SMCC Technology
+ *      Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ *      promote products derived from this software without specific prior
+ *      written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE.  The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ *
+ * $KAME: altq_cbq.c,v 1.19 2003/09/17 14:23:25 kjc Exp $
+ * $FreeBSD$
+ */
+
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#ifdef ALTQ_CBQ	/* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/time.h>
+#ifdef ALTQ3_COMPAT
+#include <sys/uio.h>
+#include <sys/kernel.h>
+#endif
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <netinet/in.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_cbq.h>
+#ifdef ALTQ3_COMPAT
+#include <net/altq/altq_conf.h>
+#endif
+
+#ifdef ALTQ3_COMPAT
+/*
+ * Local Data structures.
+ */
+static cbq_state_t *cbq_list = NULL;
+#endif
+
+/*
+ * Forward Declarations.
+ */
+static int		 cbq_class_destroy(cbq_state_t *, struct rm_class *);
+static struct rm_class  *clh_to_clp(cbq_state_t *, u_int32_t);
+static int		 cbq_clear_interface(cbq_state_t *);
+static int		 cbq_request(struct ifaltq *, int, void *);
+static int		 cbq_enqueue(struct ifaltq *, struct mbuf *,
+			     struct altq_pktattr *);
+static struct mbuf	*cbq_dequeue(struct ifaltq *, int);
+static void		 cbqrestart(struct ifaltq *);
+static void		 get_class_stats(class_stats_t *, struct rm_class *);
+static void		 cbq_purge(cbq_state_t *);
+#ifdef ALTQ3_COMPAT
+static int	cbq_add_class(struct cbq_add_class *);
+static int	cbq_delete_class(struct cbq_delete_class *);
+static int	cbq_modify_class(struct cbq_modify_class *);
+static int 	cbq_class_create(cbq_state_t *, struct cbq_add_class *,
+				 struct rm_class *, struct rm_class *);
+static int	cbq_clear_hierarchy(struct cbq_interface *);
+static int	cbq_set_enable(struct cbq_interface *, int);
+static int	cbq_ifattach(struct cbq_interface *);
+static int	cbq_ifdetach(struct cbq_interface *);
+static int 	cbq_getstats(struct cbq_getstats *);
+
+static int	cbq_add_filter(struct cbq_add_filter *);
+static int	cbq_delete_filter(struct cbq_delete_filter *);
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * int
+ * cbq_class_destroy(cbq_mod_state_t *, struct rm_class *) - This
+ *	function destroys a given traffic class.  Before destroying
+ *	the class, all traffic for that class is released.
+ */
+static int
+cbq_class_destroy(cbq_state_t *cbqp, struct rm_class *cl)
+{
+	int	i;
+
+	/* delete the class */
+	rmc_delete_class(&cbqp->ifnp, cl);
+
+	/*
+	 * free the class handle
+	 */
+	for (i = 0; i < CBQ_MAX_CLASSES; i++)
+		if (cbqp->cbq_class_tbl[i] == cl)
+			cbqp->cbq_class_tbl[i] = NULL;
+
+	if (cl == cbqp->ifnp.root_)
+		cbqp->ifnp.root_ = NULL;
+	if (cl == cbqp->ifnp.default_)
+		cbqp->ifnp.default_ = NULL;
+#ifdef ALTQ3_COMPAT
+	if (cl == cbqp->ifnp.ctl_)
+		cbqp->ifnp.ctl_ = NULL;
+#endif
+	return (0);
+}
+
+/* convert class handle to class pointer */
+static struct rm_class *
+clh_to_clp(cbq_state_t *cbqp, u_int32_t chandle)
+{
+	int i;
+	struct rm_class *cl;
+
+	if (chandle == 0)
+		return (NULL);
+	/*
+	 * first, try optimistically the slot matching the lower bits of
+	 * the handle.  if it fails, do the linear table search.
+	 */
+	i = chandle % CBQ_MAX_CLASSES;
+	if ((cl = cbqp->cbq_class_tbl[i]) != NULL &&
+	    cl->stats_.handle == chandle)
+		return (cl);
+	for (i = 0; i < CBQ_MAX_CLASSES; i++)
+		if ((cl = cbqp->cbq_class_tbl[i]) != NULL &&
+		    cl->stats_.handle == chandle)
+			return (cl);
+	return (NULL);
+}
+
+static int
+cbq_clear_interface(cbq_state_t *cbqp)
+{
+	int		 again, i;
+	struct rm_class	*cl;
+
+#ifdef ALTQ3_CLFIER_COMPAT
+	/* free the filters for this interface */
+	acc_discard_filters(&cbqp->cbq_classifier, NULL, 1);
+#endif
+
+	/* clear out the classes now */
+	do {
+		again = 0;
+		for (i = 0; i < CBQ_MAX_CLASSES; i++) {
+			if ((cl = cbqp->cbq_class_tbl[i]) != NULL) {
+				if (is_a_parent_class(cl))
+					again++;
+				else {
+					cbq_class_destroy(cbqp, cl);
+					cbqp->cbq_class_tbl[i] = NULL;
+					if (cl == cbqp->ifnp.root_)
+						cbqp->ifnp.root_ = NULL;
+					if (cl == cbqp->ifnp.default_)
+						cbqp->ifnp.default_ = NULL;
+#ifdef ALTQ3_COMPAT
+					if (cl == cbqp->ifnp.ctl_)
+						cbqp->ifnp.ctl_ = NULL;
+#endif
+				}
+			}
+		}
+	} while (again);
+
+	return (0);
+}
+
+static int
+cbq_request(struct ifaltq *ifq, int req, void *arg)
+{
+	cbq_state_t	*cbqp = (cbq_state_t *)ifq->altq_disc;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	switch (req) {
+	case ALTRQ_PURGE:
+		cbq_purge(cbqp);
+		break;
+	}
+	return (0);
+}
+
+/* copy the stats info in rm_class to class_states_t */
+static void
+get_class_stats(class_stats_t *statsp, struct rm_class *cl)
+{
+	statsp->xmit_cnt	= cl->stats_.xmit_cnt;
+	statsp->drop_cnt	= cl->stats_.drop_cnt;
+	statsp->over		= cl->stats_.over;
+	statsp->borrows		= cl->stats_.borrows;
+	statsp->overactions	= cl->stats_.overactions;
+	statsp->delays		= cl->stats_.delays;
+
+	statsp->depth		= cl->depth_;
+	statsp->priority	= cl->pri_;
+	statsp->maxidle		= cl->maxidle_;
+	statsp->minidle		= cl->minidle_;
+	statsp->offtime		= cl->offtime_;
+	statsp->qmax		= qlimit(cl->q_);
+	statsp->ns_per_byte	= cl->ns_per_byte_;
+	statsp->wrr_allot	= cl->w_allotment_;
+	statsp->qcnt		= qlen(cl->q_);
+	statsp->avgidle		= cl->avgidle_;
+
+	statsp->qtype		= qtype(cl->q_);
+#ifdef ALTQ_RED
+	if (q_is_red(cl->q_))
+		red_getstats(cl->red_, &statsp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+	if (q_is_rio(cl->q_))
+		rio_getstats((rio_t *)cl->red_, &statsp->red[0]);
+#endif
+#ifdef ALTQ_CODEL
+	if (q_is_codel(cl->q_))
+		codel_getstats(cl->codel_, &statsp->codel);
+#endif
+}
+
+int
+cbq_pfattach(struct pf_altq *a)
+{
+	struct ifnet	*ifp;
+	int		 s, error;
+
+	if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+		return (EINVAL);
+	s = splnet();
+	error = altq_attach(&ifp->if_snd, ALTQT_CBQ, a->altq_disc,
+	    cbq_enqueue, cbq_dequeue, cbq_request, NULL, NULL);
+	splx(s);
+	return (error);
+}
+
+int
+cbq_add_altq(struct pf_altq *a)
+{
+	cbq_state_t	*cbqp;
+	struct ifnet	*ifp;
+
+	if ((ifp = ifunit(a->ifname)) == NULL)
+		return (EINVAL);
+	if (!ALTQ_IS_READY(&ifp->if_snd))
+		return (ENODEV);
+
+	/* allocate and initialize cbq_state_t */
+	cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (cbqp == NULL)
+		return (ENOMEM);
+	CALLOUT_INIT(&cbqp->cbq_callout);
+	cbqp->cbq_qlen = 0;
+	cbqp->ifnp.ifq_ = &ifp->if_snd;	    /* keep the ifq */
+
+	/* keep the state in pf_altq */
+	a->altq_disc = cbqp;
+
+	return (0);
+}
+
+int
+cbq_remove_altq(struct pf_altq *a)
+{
+	cbq_state_t	*cbqp;
+
+	if ((cbqp = a->altq_disc) == NULL)
+		return (EINVAL);
+	a->altq_disc = NULL;
+
+	cbq_clear_interface(cbqp);
+
+	if (cbqp->ifnp.default_)
+		cbq_class_destroy(cbqp, cbqp->ifnp.default_);
+	if (cbqp->ifnp.root_)
+		cbq_class_destroy(cbqp, cbqp->ifnp.root_);
+
+	/* deallocate cbq_state_t */
+	free(cbqp, M_DEVBUF);
+
+	return (0);
+}
+
+int
+cbq_add_queue(struct pf_altq *a)
+{
+	struct rm_class	*borrow, *parent;
+	cbq_state_t	*cbqp;
+	struct rm_class	*cl;
+	struct cbq_opts	*opts;
+	int		i;
+
+	if ((cbqp = a->altq_disc) == NULL)
+		return (EINVAL);
+	if (a->qid == 0)
+		return (EINVAL);
+
+	/*
+	 * find a free slot in the class table.  if the slot matching
+	 * the lower bits of qid is free, use this slot.  otherwise,
+	 * use the first free slot.
+	 */
+	i = a->qid % CBQ_MAX_CLASSES;
+	if (cbqp->cbq_class_tbl[i] != NULL) {
+		for (i = 0; i < CBQ_MAX_CLASSES; i++)
+			if (cbqp->cbq_class_tbl[i] == NULL)
+				break;
+		if (i == CBQ_MAX_CLASSES)
+			return (EINVAL);
+	}
+
+	opts = &a->pq_u.cbq_opts;
+	/* check parameters */
+	if (a->priority >= CBQ_MAXPRI)
+		return (EINVAL);
+
+	/* Get pointers to parent and borrow classes.  */
+	parent = clh_to_clp(cbqp, a->parent_qid);
+	if (opts->flags & CBQCLF_BORROW)
+		borrow = parent;
+	else
+		borrow = NULL;
+
+	/*
+	 * A class must borrow from it's parent or it can not
+	 * borrow at all.  Hence, borrow can be null.
+	 */
+	if (parent == NULL && (opts->flags & CBQCLF_ROOTCLASS) == 0) {
+		printf("cbq_add_queue: no parent class!\n");
+		return (EINVAL);
+	}
+
+	if ((borrow != parent)  && (borrow != NULL)) {
+		printf("cbq_add_class: borrow class != parent\n");
+		return (EINVAL);
+	}
+
+	/*
+	 * check parameters
+	 */
+	switch (opts->flags & CBQCLF_CLASSMASK) {
+	case CBQCLF_ROOTCLASS:
+		if (parent != NULL)
+			return (EINVAL);
+		if (cbqp->ifnp.root_)
+			return (EINVAL);
+		break;
+	case CBQCLF_DEFCLASS:
+		if (cbqp->ifnp.default_)
+			return (EINVAL);
+		break;
+	case 0:
+		if (a->qid == 0)
+			return (EINVAL);
+		break;
+	default:
+		/* more than two flags bits set */
+		return (EINVAL);
+	}
+
+	/*
+	 * create a class.  if this is a root class, initialize the
+	 * interface.
+	 */
+	if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) {
+		rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, opts->ns_per_byte,
+		    cbqrestart, a->qlimit, RM_MAXQUEUED,
+		    opts->maxidle, opts->minidle, opts->offtime,
+		    opts->flags);
+		cl = cbqp->ifnp.root_;
+	} else {
+		cl = rmc_newclass(a->priority,
+				  &cbqp->ifnp, opts->ns_per_byte,
+				  rmc_delay_action, a->qlimit, parent, borrow,
+				  opts->maxidle, opts->minidle, opts->offtime,
+				  opts->pktsize, opts->flags);
+	}
+	if (cl == NULL)
+		return (ENOMEM);
+
+	/* return handle to user space. */
+	cl->stats_.handle = a->qid;
+	cl->stats_.depth = cl->depth_;
+
+	/* save the allocated class */
+	cbqp->cbq_class_tbl[i] = cl;
+
+	if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS)
+		cbqp->ifnp.default_ = cl;
+
+	return (0);
+}
+
+int
+cbq_remove_queue(struct pf_altq *a)
+{
+	struct rm_class	*cl;
+	cbq_state_t	*cbqp;
+	int		i;
+
+	if ((cbqp = a->altq_disc) == NULL)
+		return (EINVAL);
+
+	if ((cl = clh_to_clp(cbqp, a->qid)) == NULL)
+		return (EINVAL);
+
+	/* if we are a parent class, then return an error. */
+	if (is_a_parent_class(cl))
+		return (EINVAL);
+
+	/* delete the class */
+	rmc_delete_class(&cbqp->ifnp, cl);
+
+	/*
+	 * free the class handle
+	 */
+	for (i = 0; i < CBQ_MAX_CLASSES; i++)
+		if (cbqp->cbq_class_tbl[i] == cl) {
+			cbqp->cbq_class_tbl[i] = NULL;
+			if (cl == cbqp->ifnp.root_)
+				cbqp->ifnp.root_ = NULL;
+			if (cl == cbqp->ifnp.default_)
+				cbqp->ifnp.default_ = NULL;
+			break;
+		}
+
+	return (0);
+}
+
+int
+cbq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+	cbq_state_t	*cbqp;
+	struct rm_class	*cl;
+	class_stats_t	 stats;
+	int		 error = 0;
+
+	if ((cbqp = altq_lookup(a->ifname, ALTQT_CBQ)) == NULL)
+		return (EBADF);
+
+	if ((cl = clh_to_clp(cbqp, a->qid)) == NULL)
+		return (EINVAL);
+
+	if (*nbytes < sizeof(stats))
+		return (EINVAL);
+
+	get_class_stats(&stats, cl);
+
+	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+		return (error);
+	*nbytes = sizeof(stats);
+	return (0);
+}
+
+/*
+ * int
+ * cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pattr)
+ *		- Queue data packets.
+ *
+ *	cbq_enqueue is set to ifp->if_altqenqueue and called by an upper
+ *	layer (e.g. ether_output).  cbq_enqueue queues the given packet
+ *	to the cbq, then invokes the driver's start routine.
+ *
+ *	Assumptions:	called in splimp
+ *	Returns:	0 if the queueing is successful.
+ *			ENOBUFS if a packet dropping occurred as a result of
+ *			the queueing.
+ */
+
+static int
+cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
+{
+	cbq_state_t	*cbqp = (cbq_state_t *)ifq->altq_disc;
+	struct rm_class	*cl;
+	struct pf_mtag	*t;
+	int		 len;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	/* grab class set by classifier */
+	if ((m->m_flags & M_PKTHDR) == 0) {
+		/* should not happen */
+		printf("altq: packet for %s does not have pkthdr\n",
+		    ifq->altq_ifp->if_xname);
+		m_freem(m);
+		return (ENOBUFS);
+	}
+	cl = NULL;
+	if ((t = pf_find_mtag(m)) != NULL)
+		cl = clh_to_clp(cbqp, t->qid);
+#ifdef ALTQ3_COMPAT
+	else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL)
+		cl = pktattr->pattr_class;
+#endif
+	if (cl == NULL) {
+		cl = cbqp->ifnp.default_;
+		if (cl == NULL) {
+			m_freem(m);
+			return (ENOBUFS);
+		}
+	}
+#ifdef ALTQ3_COMPAT
+	if (pktattr != NULL)
+		cl->pktattr_ = pktattr;  /* save proto hdr used by ECN */
+	else
+#endif
+		cl->pktattr_ = NULL;
+	len = m_pktlen(m);
+	if (rmc_queue_packet(cl, m) != 0) {
+		/* drop occurred.  some mbuf was freed in rmc_queue_packet. */
+		PKTCNTR_ADD(&cl->stats_.drop_cnt, len);
+		return (ENOBUFS);
+	}
+
+	/* successfully queued. */
+	++cbqp->cbq_qlen;
+	IFQ_INC_LEN(ifq);
+	return (0);
+}
+
+static struct mbuf *
+cbq_dequeue(struct ifaltq *ifq, int op)
+{
+	cbq_state_t	*cbqp = (cbq_state_t *)ifq->altq_disc;
+	struct mbuf	*m;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	m = rmc_dequeue_next(&cbqp->ifnp, op);
+
+	if (m && op == ALTDQ_REMOVE) {
+		--cbqp->cbq_qlen;  /* decrement # of packets in cbq */
+		IFQ_DEC_LEN(ifq);
+
+		/* Update the class. */
+		rmc_update_class_util(&cbqp->ifnp);
+	}
+	return (m);
+}
+
+/*
+ * void
+ * cbqrestart(queue_t *) - Restart sending of data.
+ * called from rmc_restart in splimp via timeout after waking up
+ * a suspended class.
+ *	Returns:	NONE
+ */
+
+static void
+cbqrestart(struct ifaltq *ifq)
+{
+	cbq_state_t	*cbqp;
+	struct ifnet	*ifp;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	if (!ALTQ_IS_ENABLED(ifq))
+		/* cbq must have been detached */
+		return;
+
+	if ((cbqp = (cbq_state_t *)ifq->altq_disc) == NULL)
+		/* should not happen */
+		return;
+
+	ifp = ifq->altq_ifp;
+	if (ifp->if_start &&
+	    cbqp->cbq_qlen > 0 && (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
+	    	IFQ_UNLOCK(ifq);
+		(*ifp->if_start)(ifp);
+		IFQ_LOCK(ifq);
+	}
+}
+
+static void cbq_purge(cbq_state_t *cbqp)
+{
+	struct rm_class	*cl;
+	int		 i;
+
+	for (i = 0; i < CBQ_MAX_CLASSES; i++)
+		if ((cl = cbqp->cbq_class_tbl[i]) != NULL)
+			rmc_dropall(cl);
+	if (ALTQ_IS_ENABLED(cbqp->ifnp.ifq_))
+		cbqp->ifnp.ifq_->ifq_len = 0;
+}
+#ifdef ALTQ3_COMPAT
+
+static int
+cbq_add_class(acp)
+	struct cbq_add_class *acp;
+{
+	char		*ifacename;
+	struct rm_class	*borrow, *parent;
+	cbq_state_t	*cbqp;
+
+	ifacename = acp->cbq_iface.cbq_ifacename;
+	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+		return (EBADF);
+
+	/* check parameters */
+	if (acp->cbq_class.priority >= CBQ_MAXPRI ||
+	    acp->cbq_class.maxq > CBQ_MAXQSIZE)
+		return (EINVAL);
+
+	/* Get pointers to parent and borrow classes.  */
+	parent = clh_to_clp(cbqp, acp->cbq_class.parent_class_handle);
+	borrow = clh_to_clp(cbqp, acp->cbq_class.borrow_class_handle);
+
+	/*
+	 * A class must borrow from it's parent or it can not
+	 * borrow at all.  Hence, borrow can be null.
+	 */
+	if (parent == NULL && (acp->cbq_class.flags & CBQCLF_ROOTCLASS) == 0) {
+		printf("cbq_add_class: no parent class!\n");
+		return (EINVAL);
+	}
+
+	if ((borrow != parent)  && (borrow != NULL)) {
+		printf("cbq_add_class: borrow class != parent\n");
+		return (EINVAL);
+	}
+
+	return cbq_class_create(cbqp, acp, parent, borrow);
+}
+
+static int
+cbq_delete_class(dcp)
+	struct cbq_delete_class *dcp;
+{
+	char		*ifacename;
+	struct rm_class	*cl;
+	cbq_state_t	*cbqp;
+
+	ifacename = dcp->cbq_iface.cbq_ifacename;
+	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+		return (EBADF);
+
+	if ((cl = clh_to_clp(cbqp, dcp->cbq_class_handle)) == NULL)
+		return (EINVAL);
+
+	/* if we are a parent class, then return an error. */
+	if (is_a_parent_class(cl))
+		return (EINVAL);
+
+	/* if a filter has a reference to this class delete the filter */
+	acc_discard_filters(&cbqp->cbq_classifier, cl, 0);
+
+	return cbq_class_destroy(cbqp, cl);
+}
+
+static int
+cbq_modify_class(acp)
+	struct cbq_modify_class *acp;
+{
+	char		*ifacename;
+	struct rm_class	*cl;
+	cbq_state_t	*cbqp;
+
+	ifacename = acp->cbq_iface.cbq_ifacename;
+	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+		return (EBADF);
+
+	/* Get pointer to this class */
+	if ((cl = clh_to_clp(cbqp, acp->cbq_class_handle)) == NULL)
+		return (EINVAL);
+
+	if (rmc_modclass(cl, acp->cbq_class.nano_sec_per_byte,
+			 acp->cbq_class.maxq, acp->cbq_class.maxidle,
+			 acp->cbq_class.minidle, acp->cbq_class.offtime,
+			 acp->cbq_class.pktsize) < 0)
+		return (EINVAL);
+	return (0);
+}
+
+/*
+ * struct rm_class *
+ * cbq_class_create(cbq_mod_state_t *cbqp, struct cbq_add_class *acp,
+ *		struct rm_class *parent, struct rm_class *borrow)
+ *
+ * This function create a new traffic class in the CBQ class hierarchy of
+ * given parameters.  The class that created is either the root, default,
+ * or a new dynamic class.  If CBQ is not initilaized, the root class
+ * will be created.
+ */
+static int
+cbq_class_create(cbqp, acp, parent, borrow)
+	cbq_state_t *cbqp;
+	struct cbq_add_class *acp;
+	struct rm_class *parent, *borrow;
+{
+	struct rm_class	*cl;
+	cbq_class_spec_t *spec = &acp->cbq_class;
+	u_int32_t	chandle;
+	int		i;
+
+	/*
+	 * allocate class handle
+	 */
+	for (i = 1; i < CBQ_MAX_CLASSES; i++)
+		if (cbqp->cbq_class_tbl[i] == NULL)
+			break;
+	if (i == CBQ_MAX_CLASSES)
+		return (EINVAL);
+	chandle = i;	/* use the slot number as class handle */
+
+	/*
+	 * create a class.  if this is a root class, initialize the
+	 * interface.
+	 */
+	if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) {
+		rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, spec->nano_sec_per_byte,
+			 cbqrestart, spec->maxq, RM_MAXQUEUED,
+			 spec->maxidle, spec->minidle, spec->offtime,
+			 spec->flags);
+		cl = cbqp->ifnp.root_;
+	} else {
+		cl = rmc_newclass(spec->priority,
+				  &cbqp->ifnp, spec->nano_sec_per_byte,
+				  rmc_delay_action, spec->maxq, parent, borrow,
+				  spec->maxidle, spec->minidle, spec->offtime,
+				  spec->pktsize, spec->flags);
+	}
+	if (cl == NULL)
+		return (ENOMEM);
+
+	/* return handle to user space. */
+	acp->cbq_class_handle = chandle;
+
+	cl->stats_.handle = chandle;
+	cl->stats_.depth = cl->depth_;
+
+	/* save the allocated class */
+	cbqp->cbq_class_tbl[i] = cl;
+
+	if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS)
+		cbqp->ifnp.default_ = cl;
+	if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_CTLCLASS)
+		cbqp->ifnp.ctl_ = cl;
+
+	return (0);
+}
+
+static int
+cbq_add_filter(afp)
+	struct cbq_add_filter *afp;
+{
+	char		*ifacename;
+	cbq_state_t	*cbqp;
+	struct rm_class	*cl;
+
+	ifacename = afp->cbq_iface.cbq_ifacename;
+	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+		return (EBADF);
+
+	/* Get the pointer to class. */
+	if ((cl = clh_to_clp(cbqp, afp->cbq_class_handle)) == NULL)
+		return (EINVAL);
+
+	return acc_add_filter(&cbqp->cbq_classifier, &afp->cbq_filter,
+			      cl, &afp->cbq_filter_handle);
+}
+
+static int
+cbq_delete_filter(dfp)
+	struct cbq_delete_filter *dfp;
+{
+	char		*ifacename;
+	cbq_state_t	*cbqp;
+
+	ifacename = dfp->cbq_iface.cbq_ifacename;
+	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+		return (EBADF);
+
+	return acc_delete_filter(&cbqp->cbq_classifier,
+				 dfp->cbq_filter_handle);
+}
+
+/*
+ * cbq_clear_hierarchy deletes all classes and their filters on the
+ * given interface.
+ */
+static int
+cbq_clear_hierarchy(ifacep)
+	struct cbq_interface *ifacep;
+{
+	char		*ifacename;
+	cbq_state_t	*cbqp;
+
+	ifacename = ifacep->cbq_ifacename;
+	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+		return (EBADF);
+
+	return cbq_clear_interface(cbqp);
+}
+
+/*
+ * static int
+ * cbq_set_enable(struct cbq_enable *ep) - this function processed the
+ *	ioctl request to enable class based queueing.  It searches the list
+ *	of interfaces for the specified interface and then enables CBQ on
+ *	that interface.
+ *
+ *	Returns:	0, for no error.
+ *			EBADF, for specified inteface not found.
+ */
+
+static int
+cbq_set_enable(ep, enable)
+	struct cbq_interface *ep;
+	int enable;
+{
+	int 	error = 0;
+	cbq_state_t	*cbqp;
+	char 	*ifacename;
+
+	ifacename = ep->cbq_ifacename;
+	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+		return (EBADF);
+
+	switch (enable) {
+	case ENABLE:
+		if (cbqp->ifnp.root_ == NULL || cbqp->ifnp.default_ == NULL ||
+		    cbqp->ifnp.ctl_ == NULL) {
+			if (cbqp->ifnp.root_ == NULL)
+				printf("No Root Class for %s\n", ifacename);
+			if (cbqp->ifnp.default_ == NULL)
+				printf("No Default Class for %s\n", ifacename);
+			if (cbqp->ifnp.ctl_ == NULL)
+				printf("No Control Class for %s\n", ifacename);
+			error = EINVAL;
+		} else if ((error = altq_enable(cbqp->ifnp.ifq_)) == 0) {
+			cbqp->cbq_qlen = 0;
+		}
+		break;
+
+	case DISABLE:
+		error = altq_disable(cbqp->ifnp.ifq_);
+		break;
+	}
+	return (error);
+}
+
+static int
+cbq_getstats(gsp)
+	struct cbq_getstats *gsp;
+{
+	char		*ifacename;
+	int		i, n, nclasses;
+	cbq_state_t	*cbqp;
+	struct rm_class	*cl;
+	class_stats_t	stats, *usp;
+	int error = 0;
+
+	ifacename = gsp->iface.cbq_ifacename;
+	nclasses = gsp->nclasses;
+	usp = gsp->stats;
+
+	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+		return (EBADF);
+	if (nclasses <= 0)
+		return (EINVAL);
+
+	for (n = 0, i = 0; n < nclasses && i < CBQ_MAX_CLASSES; n++, i++) {
+		while ((cl = cbqp->cbq_class_tbl[i]) == NULL)
+			if (++i >= CBQ_MAX_CLASSES)
+				goto out;
+
+		get_class_stats(&stats, cl);
+		stats.handle = cl->stats_.handle;
+
+		if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
+		    sizeof(stats))) != 0)
+			return (error);
+	}
+
+ out:
+	gsp->nclasses = n;
+	return (error);
+}
+
+static int
+cbq_ifattach(ifacep)
+	struct cbq_interface *ifacep;
+{
+	int		error = 0;
+	char		*ifacename;
+	cbq_state_t	*new_cbqp;
+	struct ifnet 	*ifp;
+
+	ifacename = ifacep->cbq_ifacename;
+	if ((ifp = ifunit(ifacename)) == NULL)
+		return (ENXIO);
+	if (!ALTQ_IS_READY(&ifp->if_snd))
+		return (ENXIO);
+
+	/* allocate and initialize cbq_state_t */
+	new_cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_WAITOK);
+	if (new_cbqp == NULL)
+		return (ENOMEM);
+	bzero(new_cbqp, sizeof(cbq_state_t));
+ 	CALLOUT_INIT(&new_cbqp->cbq_callout);
+
+	new_cbqp->cbq_qlen = 0;
+	new_cbqp->ifnp.ifq_ = &ifp->if_snd;	    /* keep the ifq */
+
+	/*
+	 * set CBQ to this ifnet structure.
+	 */
+	error = altq_attach(&ifp->if_snd, ALTQT_CBQ, new_cbqp,
+			    cbq_enqueue, cbq_dequeue, cbq_request,
+			    &new_cbqp->cbq_classifier, acc_classify);
+	if (error) {
+		free(new_cbqp, M_DEVBUF);
+		return (error);
+	}
+
+	/* prepend to the list of cbq_state_t's. */
+	new_cbqp->cbq_next = cbq_list;
+	cbq_list = new_cbqp;
+
+	return (0);
+}
+
+static int
+cbq_ifdetach(ifacep)
+	struct cbq_interface *ifacep;
+{
+	char		*ifacename;
+	cbq_state_t 	*cbqp;
+
+	ifacename = ifacep->cbq_ifacename;
+	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+		return (EBADF);
+
+	(void)cbq_set_enable(ifacep, DISABLE);
+
+	cbq_clear_interface(cbqp);
+
+	/* remove CBQ from the ifnet structure. */
+	(void)altq_detach(cbqp->ifnp.ifq_);
+
+	/* remove from the list of cbq_state_t's. */
+	if (cbq_list == cbqp)
+		cbq_list = cbqp->cbq_next;
+	else {
+		cbq_state_t *cp;
+
+		for (cp = cbq_list; cp != NULL; cp = cp->cbq_next)
+			if (cp->cbq_next == cbqp) {
+				cp->cbq_next = cbqp->cbq_next;
+				break;
+			}
+		ASSERT(cp != NULL);
+	}
+
+	/* deallocate cbq_state_t */
+	free(cbqp, M_DEVBUF);
+
+	return (0);
+}
+
+/*
+ * cbq device interface
+ */
+
+altqdev_decl(cbq);
+
+int
+cbqopen(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	return (0);
+}
+
+int
+cbqclose(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	struct ifnet *ifp;
+	struct cbq_interface iface;
+	int err, error = 0;
+
+	while (cbq_list) {
+		ifp = cbq_list->ifnp.ifq_->altq_ifp;
+		sprintf(iface.cbq_ifacename, "%s", ifp->if_xname);
+		err = cbq_ifdetach(&iface);
+		if (err != 0 && error == 0)
+			error = err;
+	}
+
+	return (error);
+}
+
+int
+cbqioctl(dev, cmd, addr, flag, p)
+	dev_t dev;
+	ioctlcmd_t cmd;
+	caddr_t addr;
+	int flag;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	int	error = 0;
+
+	/* check cmd for superuser only */
+	switch (cmd) {
+	case CBQ_GETSTATS:
+		/* currently only command that an ordinary user can call */
+		break;
+	default:
+#if (__FreeBSD_version > 700000)
+		error = priv_check(p, PRIV_ALTQ_MANAGE);
+#elsif (__FreeBSD_version > 400000)
+		error = suser(p);
+#else
+		error = suser(p->p_ucred, &p->p_acflag);
+#endif
+		if (error)
+			return (error);
+		break;
+	}
+
+	switch (cmd) {
+
+	case CBQ_ENABLE:
+		error = cbq_set_enable((struct cbq_interface *)addr, ENABLE);
+		break;
+
+	case CBQ_DISABLE:
+		error = cbq_set_enable((struct cbq_interface *)addr, DISABLE);
+		break;
+
+	case CBQ_ADD_FILTER:
+		error = cbq_add_filter((struct cbq_add_filter *)addr);
+		break;
+
+	case CBQ_DEL_FILTER:
+		error = cbq_delete_filter((struct cbq_delete_filter *)addr);
+		break;
+
+	case CBQ_ADD_CLASS:
+		error = cbq_add_class((struct cbq_add_class *)addr);
+		break;
+
+	case CBQ_DEL_CLASS:
+		error = cbq_delete_class((struct cbq_delete_class *)addr);
+		break;
+
+	case CBQ_MODIFY_CLASS:
+		error = cbq_modify_class((struct cbq_modify_class *)addr);
+		break;
+
+	case CBQ_CLEAR_HIERARCHY:
+		error = cbq_clear_hierarchy((struct cbq_interface *)addr);
+		break;
+
+	case CBQ_IF_ATTACH:
+		error = cbq_ifattach((struct cbq_interface *)addr);
+		break;
+
+	case CBQ_IF_DETACH:
+		error = cbq_ifdetach((struct cbq_interface *)addr);
+		break;
+
+	case CBQ_GETSTATS:
+		error = cbq_getstats((struct cbq_getstats *)addr);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return error;
+}
+
+#if 0
+/* for debug */
+static void cbq_class_dump(int);
+
+static void cbq_class_dump(i)
+	int i;
+{
+	struct rm_class *cl;
+	rm_class_stats_t *s;
+	struct _class_queue_ *q;
+
+	if (cbq_list == NULL) {
+		printf("cbq_class_dump: no cbq_state found\n");
+		return;
+	}
+	cl = cbq_list->cbq_class_tbl[i];
+
+	printf("class %d cl=%p\n", i, cl);
+	if (cl != NULL) {
+		s = &cl->stats_;
+		q = cl->q_;
+
+		printf("pri=%d, depth=%d, maxrate=%d, allotment=%d\n",
+		       cl->pri_, cl->depth_, cl->maxrate_, cl->allotment_);
+		printf("w_allotment=%d, bytes_alloc=%d, avgidle=%d, maxidle=%d\n",
+		       cl->w_allotment_, cl->bytes_alloc_, cl->avgidle_,
+		       cl->maxidle_);
+		printf("minidle=%d, offtime=%d, sleeping=%d, leaf=%d\n",
+		       cl->minidle_, cl->offtime_, cl->sleeping_, cl->leaf_);
+		printf("handle=%d, depth=%d, packets=%d, bytes=%d\n",
+		       s->handle, s->depth,
+		       (int)s->xmit_cnt.packets, (int)s->xmit_cnt.bytes);
+		printf("over=%d\n, borrows=%d, drops=%d, overactions=%d, delays=%d\n",
+		       s->over, s->borrows, (int)s->drop_cnt.packets,
+		       s->overactions, s->delays);
+		printf("tail=%p, head=%p, qlen=%d, qlim=%d, qthresh=%d,qtype=%d\n",
+		       q->tail_, q->head_, q->qlen_, q->qlim_,
+		       q->qthresh_, q->qtype_);
+	}
+}
+#endif /* 0 */
+
+#ifdef KLD_MODULE
+
+static struct altqsw cbq_sw =
+	{"cbq", cbqopen, cbqclose, cbqioctl};
+
+ALTQ_MODULE(altq_cbq, ALTQT_CBQ, &cbq_sw);
+MODULE_DEPEND(altq_cbq, altq_red, 1, 1, 1);
+MODULE_DEPEND(altq_cbq, altq_rio, 1, 1, 1);
+
+#endif /* KLD_MODULE */
+#endif /* ALTQ3_COMPAT */
+
+#endif /* ALTQ_CBQ */
diff --git a/freebsd/sys/net/altq/altq_cbq.h b/freebsd/sys/net/altq/altq_cbq.h
new file mode 100644
index 00000000..51e7cf9a
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_cbq.h
@@ -0,0 +1,225 @@
+/*-
+ * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the SMCC Technology
+ *      Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ *      promote products derived from this software without specific prior
+ *      written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE.  The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ *
+ * $KAME: altq_cbq.h,v 1.12 2003/10/03 05:05:15 kjc Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_CBQ_H_
+#define	_ALTQ_ALTQ_CBQ_H_
+
+#include <net/altq/altq.h>
+#include <net/altq/altq_rmclass.h>
+#include <net/altq/altq_codel.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define	NULL_CLASS_HANDLE	0
+
+/* class flags should be same as class flags in rm_class.h */
+#define	CBQCLF_RED		0x0001	/* use RED */
+#define	CBQCLF_ECN		0x0002  /* use RED/ECN */
+#define	CBQCLF_RIO		0x0004  /* use RIO */
+#define	CBQCLF_FLOWVALVE	0x0008	/* use flowvalve (aka penalty-box) */
+#define	CBQCLF_CLEARDSCP	0x0010  /* clear diffserv codepoint */
+#define	CBQCLF_BORROW		0x0020  /* borrow from parent */
+#define	CBQCLF_CODEL		0x0040	/* use CoDel */
+
+/* class flags only for root class */
+#define	CBQCLF_WRR		0x0100	/* weighted-round robin */
+#define	CBQCLF_EFFICIENT	0x0200  /* work-conserving */
+
+/* class flags for special classes */
+#define	CBQCLF_ROOTCLASS	0x1000	/* root class */
+#define	CBQCLF_DEFCLASS		0x2000	/* default class */
+#ifdef ALTQ3_COMPAT
+#define	CBQCLF_CTLCLASS		0x4000	/* control class */
+#endif
+#define	CBQCLF_CLASSMASK	0xf000	/* class mask */
+
+#define	CBQ_MAXQSIZE		200
+#define	CBQ_MAXPRI		RM_MAXPRIO
+
+typedef struct _cbq_class_stats_ {
+	u_int32_t	handle;
+	u_int		depth;
+
+	struct pktcntr	xmit_cnt;	/* packets sent in this class */
+	struct pktcntr	drop_cnt;	/* dropped packets */
+	u_int		over;		/* # times went over limit */
+	u_int		borrows;	/* # times tried to borrow */
+	u_int		overactions;	/* # times invoked overlimit action */
+	u_int		delays;		/* # times invoked delay actions */
+
+	/* other static class parameters useful for debugging */
+	int		priority;
+	int		maxidle;
+	int		minidle;
+	int		offtime;
+	int		qmax;
+	int		ns_per_byte;
+	int		wrr_allot;
+
+	int		qcnt;		/* # packets in queue */
+	int		avgidle;
+
+	/* codel, red and rio related info */
+	int		qtype;
+	struct redstats	red[3];
+	struct codel_stats codel;
+} class_stats_t;
+
+#ifdef ALTQ3_COMPAT
+/*
+ * Define structures associated with IOCTLS for cbq.
+ */
+
+/*
+ * Define the CBQ interface structure.  This must be included in all
+ * IOCTL's such that the CBQ driver may find the appropriate CBQ module
+ * associated with the network interface to be affected.
+ */
+struct cbq_interface {
+	char	cbq_ifacename[IFNAMSIZ];
+};
+
+typedef struct cbq_class_spec {
+	u_int		priority;
+	u_int		nano_sec_per_byte;
+	u_int		maxq;
+	u_int		maxidle;
+	int		minidle;
+	u_int		offtime;
+	u_int32_t	parent_class_handle;
+	u_int32_t	borrow_class_handle;
+
+	u_int		pktsize;
+	int		flags;
+} cbq_class_spec_t;
+
+struct cbq_add_class {
+	struct cbq_interface	cbq_iface;
+
+	cbq_class_spec_t	cbq_class;
+	u_int32_t		cbq_class_handle;
+};
+
+struct cbq_delete_class {
+	struct cbq_interface	cbq_iface;
+	u_int32_t		cbq_class_handle;
+};
+
+struct cbq_modify_class {
+	struct cbq_interface	cbq_iface;
+
+	cbq_class_spec_t	cbq_class;
+	u_int32_t		cbq_class_handle;
+};
+
+struct cbq_add_filter {
+	struct cbq_interface		cbq_iface;
+	u_int32_t		cbq_class_handle;
+	struct flow_filter	cbq_filter;
+
+	u_long			cbq_filter_handle;
+};
+
+struct cbq_delete_filter {
+	struct cbq_interface	cbq_iface;
+	u_long			cbq_filter_handle;
+};
+
+/* number of classes are returned in nclasses field */
+struct cbq_getstats {
+	struct cbq_interface	iface;
+	int			nclasses;
+	class_stats_t		*stats;
+};
+
+/*
+ * Define IOCTLs for CBQ.
+ */
+#define	CBQ_IF_ATTACH		_IOW('Q', 1, struct cbq_interface)
+#define	CBQ_IF_DETACH		_IOW('Q', 2, struct cbq_interface)
+#define	CBQ_ENABLE		_IOW('Q', 3, struct cbq_interface)
+#define	CBQ_DISABLE		_IOW('Q', 4, struct cbq_interface)
+#define	CBQ_CLEAR_HIERARCHY	_IOW('Q', 5, struct cbq_interface)
+#define	CBQ_ADD_CLASS		_IOWR('Q', 7, struct cbq_add_class)
+#define	CBQ_DEL_CLASS		_IOW('Q', 8, struct cbq_delete_class)
+#define	CBQ_MODIFY_CLASS	_IOWR('Q', 9, struct cbq_modify_class)
+#define	CBQ_ADD_FILTER		_IOWR('Q', 10, struct cbq_add_filter)
+#define	CBQ_DEL_FILTER		_IOW('Q', 11, struct cbq_delete_filter)
+#define	CBQ_GETSTATS		_IOWR('Q', 12, struct cbq_getstats)
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+/*
+ * Define macros only good for kernel drivers and modules.
+ */
+#define	CBQ_WATCHDOG		(hz / 20)
+#define	CBQ_TIMEOUT		10
+#define	CBQ_LS_TIMEOUT		(20 * hz / 1000)
+
+#define	CBQ_MAX_CLASSES	256
+
+#ifdef ALTQ3_COMPAT
+#define	CBQ_MAX_FILTERS 256
+
+#define	DISABLE		0x00
+#define	ENABLE		0x01
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * Define State structures.
+ */
+typedef struct cbqstate {
+#ifdef ALTQ3_COMPAT
+	struct cbqstate		*cbq_next;
+#endif
+	int			 cbq_qlen;	/* # of packets in cbq */
+	struct rm_class		*cbq_class_tbl[CBQ_MAX_CLASSES];
+
+	struct rm_ifdat		 ifnp;
+	struct callout		 cbq_callout;	/* for timeouts */
+#ifdef ALTQ3_CLFIER_COMPAT
+	struct acc_classifier	cbq_classifier;
+#endif
+} cbq_state_t;
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_ALTQ_ALTQ_CBQ_H_ */
diff --git a/freebsd/sys/net/altq/altq_cdnr.c b/freebsd/sys/net/altq/altq_cdnr.c
new file mode 100644
index 00000000..f456ce83
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_cdnr.c
@@ -0,0 +1,1384 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (C) 1999-2002
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_cdnr.c,v 1.15 2005/04/13 03:44:24 suz Exp $
+ * $FreeBSD$
+ */
+
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <net/altq/if_altq.h>
+#include <net/altq/altq.h>
+#ifdef ALTQ3_COMPAT
+#include <net/altq/altq_conf.h>
+#endif
+#include <net/altq/altq_cdnr.h>
+
+#ifdef ALTQ3_COMPAT
+/*
+ * diffserv traffic conditioning module
+ */
+
+int altq_cdnr_enabled = 0;
+
+/* traffic conditioner is enabled by ALTQ_CDNR option in opt_altq.h */
+#ifdef ALTQ_CDNR
+
+/* cdnr_list keeps all cdnr's allocated. */
+static LIST_HEAD(, top_cdnr) tcb_list;
+
+static int altq_cdnr_input(struct mbuf *, int);
+static struct top_cdnr *tcb_lookup(char *ifname);
+static struct cdnr_block *cdnr_handle2cb(u_long);
+static u_long cdnr_cb2handle(struct cdnr_block *);
+static void *cdnr_cballoc(struct top_cdnr *, int,
+       struct tc_action *(*)(struct cdnr_block *, struct cdnr_pktinfo *));
+static void cdnr_cbdestroy(void *);
+static int tca_verify_action(struct tc_action *);
+static void tca_import_action(struct tc_action *, struct tc_action *);
+static void tca_invalidate_action(struct tc_action *);
+
+static int generic_element_destroy(struct cdnr_block *);
+static struct top_cdnr *top_create(struct ifaltq *);
+static int top_destroy(struct top_cdnr *);
+static struct cdnr_block *element_create(struct top_cdnr *, struct tc_action *);
+static int element_destroy(struct cdnr_block *);
+static void tb_import_profile(struct tbe *, struct tb_profile *);
+static struct tbmeter *tbm_create(struct top_cdnr *, struct tb_profile *,
+				  struct tc_action *, struct tc_action *);
+static int tbm_destroy(struct tbmeter *);
+static struct tc_action *tbm_input(struct cdnr_block *, struct cdnr_pktinfo *);
+static struct trtcm *trtcm_create(struct top_cdnr *,
+		  struct tb_profile *, struct tb_profile *,
+		  struct tc_action *, struct tc_action *, struct tc_action *,
+		  int);
+static int trtcm_destroy(struct trtcm *);
+static struct tc_action *trtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
+static struct tswtcm *tswtcm_create(struct top_cdnr *,
+		  u_int32_t, u_int32_t, u_int32_t,
+		  struct tc_action *, struct tc_action *, struct tc_action *);
+static int tswtcm_destroy(struct tswtcm *);
+static struct tc_action *tswtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
+
+static int cdnrcmd_if_attach(char *);
+static int cdnrcmd_if_detach(char *);
+static int cdnrcmd_add_element(struct cdnr_add_element *);
+static int cdnrcmd_delete_element(struct cdnr_delete_element *);
+static int cdnrcmd_add_filter(struct cdnr_add_filter *);
+static int cdnrcmd_delete_filter(struct cdnr_delete_filter *);
+static int cdnrcmd_add_tbm(struct cdnr_add_tbmeter *);
+static int cdnrcmd_modify_tbm(struct cdnr_modify_tbmeter *);
+static int cdnrcmd_tbm_stats(struct cdnr_tbmeter_stats *);
+static int cdnrcmd_add_trtcm(struct cdnr_add_trtcm *);
+static int cdnrcmd_modify_trtcm(struct cdnr_modify_trtcm *);
+static int cdnrcmd_tcm_stats(struct cdnr_tcm_stats *);
+static int cdnrcmd_add_tswtcm(struct cdnr_add_tswtcm *);
+static int cdnrcmd_modify_tswtcm(struct cdnr_modify_tswtcm *);
+static int cdnrcmd_get_stats(struct cdnr_get_stats *);
+
+altqdev_decl(cdnr);
+
+/*
+ * top level input function called from ip_input.
+ * should be called before converting header fields to host-byte-order.
+ */
+int
+altq_cdnr_input(m, af)
+	struct mbuf	*m;
+	int		af;	/* address family */
+{
+	struct ifnet		*ifp;
+	struct ip		*ip;
+	struct top_cdnr		*top;
+	struct tc_action	*tca;
+	struct cdnr_block	*cb;
+	struct cdnr_pktinfo	pktinfo;
+
+	ifp = m->m_pkthdr.rcvif;
+	if (!ALTQ_IS_CNDTNING(&ifp->if_snd))
+		/* traffic conditioner is not enabled on this interface */
+		return (1);
+
+	top = ifp->if_snd.altq_cdnr;
+
+	ip = mtod(m, struct ip *);
+#ifdef INET6
+	if (af == AF_INET6) {
+		u_int32_t flowlabel;
+
+		flowlabel = ((struct ip6_hdr *)ip)->ip6_flow;
+		pktinfo.pkt_dscp = (ntohl(flowlabel) >> 20) & DSCP_MASK;
+	} else
+#endif
+		pktinfo.pkt_dscp = ip->ip_tos & DSCP_MASK;
+	pktinfo.pkt_len = m_pktlen(m);
+
+	tca = NULL;
+
+	cb = acc_classify(&top->tc_classifier, m, af);
+	if (cb != NULL)
+		tca = &cb->cb_action;
+
+	if (tca == NULL)
+		tca = &top->tc_block.cb_action;
+
+	while (1) {
+		PKTCNTR_ADD(&top->tc_cnts[tca->tca_code], pktinfo.pkt_len);
+
+		switch (tca->tca_code) {
+		case TCACODE_PASS:
+			return (1);
+		case TCACODE_DROP:
+			m_freem(m);
+			return (0);
+		case TCACODE_RETURN:
+			return (0);
+		case TCACODE_MARK:
+#ifdef INET6
+			if (af == AF_INET6) {
+				struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
+				u_int32_t flowlabel;
+
+				flowlabel = ntohl(ip6->ip6_flow);
+				flowlabel = (tca->tca_dscp << 20) |
+					(flowlabel & ~(DSCP_MASK << 20));
+				ip6->ip6_flow = htonl(flowlabel);
+			} else
+#endif
+				ip->ip_tos = tca->tca_dscp |
+					(ip->ip_tos & DSCP_CUMASK);
+			return (1);
+		case TCACODE_NEXT:
+			cb = tca->tca_next;
+			tca = (*cb->cb_input)(cb, &pktinfo);
+			break;
+		case TCACODE_NONE:
+		default:
+			return (1);
+		}
+	}
+}
+
+static struct top_cdnr *
+tcb_lookup(ifname)
+	char *ifname;
+{
+	struct top_cdnr *top;
+	struct ifnet *ifp;
+
+	if ((ifp = ifunit(ifname)) != NULL)
+		LIST_FOREACH(top, &tcb_list, tc_next)
+			if (top->tc_ifq->altq_ifp == ifp)
+				return (top);
+	return (NULL);
+}
+
+static struct cdnr_block *
+cdnr_handle2cb(handle)
+	u_long handle;
+{
+	struct cdnr_block *cb;
+
+	cb = (struct cdnr_block *)handle;
+	if (handle != ALIGN(cb))
+		return (NULL);
+
+	if (cb == NULL || cb->cb_handle != handle)
+		return (NULL);
+	return (cb);
+}
+
+static u_long
+cdnr_cb2handle(cb)
+	struct cdnr_block *cb;
+{
+	return (cb->cb_handle);
+}
+
+static void *
+cdnr_cballoc(top, type, input_func)
+	struct top_cdnr *top;
+	int type;
+	struct tc_action *(*input_func)(struct cdnr_block *,
+					struct cdnr_pktinfo *);
+{
+	struct cdnr_block *cb;
+	int size;
+
+	switch (type) {
+	case TCETYPE_TOP:
+		size = sizeof(struct top_cdnr);
+		break;
+	case TCETYPE_ELEMENT:
+		size = sizeof(struct cdnr_block);
+		break;
+	case TCETYPE_TBMETER:
+		size = sizeof(struct tbmeter);
+		break;
+	case TCETYPE_TRTCM:
+		size = sizeof(struct trtcm);
+		break;
+	case TCETYPE_TSWTCM:
+		size = sizeof(struct tswtcm);
+		break;
+	default:
+		return (NULL);
+	}
+
+	cb = malloc(size, M_DEVBUF, M_WAITOK);
+	if (cb == NULL)
+		return (NULL);
+	bzero(cb, size);
+
+	cb->cb_len = size;
+	cb->cb_type = type;
+	cb->cb_ref = 0;
+	cb->cb_handle = (u_long)cb;
+	if (top == NULL)
+		cb->cb_top = (struct top_cdnr *)cb;
+	else
+		cb->cb_top = top;
+
+	if (input_func != NULL) {
+		/*
+		 * if this cdnr has an action function,
+		 * make tc_action to call itself.
+		 */
+		cb->cb_action.tca_code = TCACODE_NEXT;
+		cb->cb_action.tca_next = cb;
+		cb->cb_input = input_func;
+	} else
+		cb->cb_action.tca_code = TCACODE_NONE;
+
+	/* if this isn't top, register the element to the top level cdnr */
+	if (top != NULL)
+		LIST_INSERT_HEAD(&top->tc_elements, cb, cb_next);
+
+	return ((void *)cb);
+}
+
+static void
+cdnr_cbdestroy(cblock)
+	void *cblock;
+{
+	struct cdnr_block *cb = cblock;
+
+	/* delete filters belonging to this cdnr */
+	acc_discard_filters(&cb->cb_top->tc_classifier, cb, 0);
+
+	/* remove from the top level cdnr */
+	if (cb->cb_top != cblock)
+		LIST_REMOVE(cb, cb_next);
+
+	free(cb, M_DEVBUF);
+}
+
+/*
+ * conditioner common destroy routine
+ */
+static int
+generic_element_destroy(cb)
+	struct cdnr_block *cb;
+{
+	int error = 0;
+
+	switch (cb->cb_type) {
+	case TCETYPE_TOP:
+		error = top_destroy((struct top_cdnr *)cb);
+		break;
+	case TCETYPE_ELEMENT:
+		error = element_destroy(cb);
+		break;
+	case TCETYPE_TBMETER:
+		error = tbm_destroy((struct tbmeter *)cb);
+		break;
+	case TCETYPE_TRTCM:
+		error = trtcm_destroy((struct trtcm *)cb);
+		break;
+	case TCETYPE_TSWTCM:
+		error = tswtcm_destroy((struct tswtcm *)cb);
+		break;
+	default:
+		error = EINVAL;
+	}
+	return (error);
+}
+
+static int
+tca_verify_action(utca)
+	struct tc_action *utca;
+{
+	switch (utca->tca_code) {
+	case TCACODE_PASS:
+	case TCACODE_DROP:
+	case TCACODE_MARK:
+		/* these are ok */
+		break;
+
+	case TCACODE_HANDLE:
+		/* verify handle value */
+		if (cdnr_handle2cb(utca->tca_handle) == NULL)
+			return (-1);
+		break;
+
+	case TCACODE_NONE:
+	case TCACODE_RETURN:
+	case TCACODE_NEXT:
+	default:
+		/* should not be passed from a user */
+		return (-1);
+	}
+	return (0);
+}
+
+static void
+tca_import_action(ktca, utca)
+	struct tc_action *ktca, *utca;
+{
+	struct cdnr_block *cb;
+
+	*ktca = *utca;
+	if (ktca->tca_code == TCACODE_HANDLE) {
+		cb = cdnr_handle2cb(ktca->tca_handle);
+		if (cb == NULL) {
+			ktca->tca_code = TCACODE_NONE;
+			return;
+		}
+		ktca->tca_code = TCACODE_NEXT;
+		ktca->tca_next = cb;
+		cb->cb_ref++;
+	} else if (ktca->tca_code == TCACODE_MARK) {
+		ktca->tca_dscp &= DSCP_MASK;
+	}
+	return;
+}
+
+static void
+tca_invalidate_action(tca)
+	struct tc_action *tca;
+{
+	struct cdnr_block *cb;
+
+	if (tca->tca_code == TCACODE_NEXT) {
+		cb = tca->tca_next;
+		if (cb == NULL)
+			return;
+		cb->cb_ref--;
+	}
+	tca->tca_code = TCACODE_NONE;
+}
+
+/*
+ * top level traffic conditioner
+ */
+static struct top_cdnr *
+top_create(ifq)
+	struct ifaltq *ifq;
+{
+	struct top_cdnr *top;
+
+	if ((top = cdnr_cballoc(NULL, TCETYPE_TOP, NULL)) == NULL)
+		return (NULL);
+
+	top->tc_ifq = ifq;
+	/* set default action for the top level conditioner */
+	top->tc_block.cb_action.tca_code = TCACODE_PASS;
+
+	LIST_INSERT_HEAD(&tcb_list, top, tc_next);
+
+	ifq->altq_cdnr = top;
+
+	return (top);
+}
+
+static int
+top_destroy(top)
+	struct top_cdnr *top;
+{
+	struct cdnr_block *cb;
+
+	if (ALTQ_IS_CNDTNING(top->tc_ifq))
+		ALTQ_CLEAR_CNDTNING(top->tc_ifq);
+	top->tc_ifq->altq_cdnr = NULL;
+
+	/*
+	 * destroy all the conditioner elements belonging to this interface
+	 */
+	while ((cb = LIST_FIRST(&top->tc_elements)) != NULL) {
+		while (cb != NULL && cb->cb_ref > 0)
+			cb = LIST_NEXT(cb, cb_next);
+		if (cb != NULL)
+			generic_element_destroy(cb);
+	}
+
+	LIST_REMOVE(top, tc_next);
+
+	cdnr_cbdestroy(top);
+
+	/* if there is no active conditioner, remove the input hook */
+	if (altq_input != NULL) {
+		LIST_FOREACH(top, &tcb_list, tc_next)
+			if (ALTQ_IS_CNDTNING(top->tc_ifq))
+				break;
+		if (top == NULL)
+			altq_input = NULL;
+	}
+
+	return (0);
+}
+
+/*
+ * simple tc elements without input function (e.g., dropper and makers).
+ */
+static struct cdnr_block *
+element_create(top, action)
+	struct top_cdnr *top;
+	struct tc_action *action;
+{
+	struct cdnr_block *cb;
+
+	if (tca_verify_action(action) < 0)
+		return (NULL);
+
+	if ((cb = cdnr_cballoc(top, TCETYPE_ELEMENT, NULL)) == NULL)
+		return (NULL);
+
+	tca_import_action(&cb->cb_action, action);
+
+	return (cb);
+}
+
+static int
+element_destroy(cb)
+	struct cdnr_block *cb;
+{
+	if (cb->cb_ref > 0)
+		return (EBUSY);
+
+	tca_invalidate_action(&cb->cb_action);
+
+	cdnr_cbdestroy(cb);
+	return (0);
+}
+
+/*
+ * internal representation of token bucket parameters
+ *	rate: 	byte_per_unittime << 32
+ *		(((bits_per_sec) / 8) << 32) / machclk_freq
+ *	depth:	byte << 32
+ *
+ */
+#define	TB_SHIFT	32
+#define	TB_SCALE(x)	((u_int64_t)(x) << TB_SHIFT)
+#define	TB_UNSCALE(x)	((x) >> TB_SHIFT)
+
+static void
+tb_import_profile(tb, profile)
+	struct tbe *tb;
+	struct tb_profile *profile;
+{
+	tb->rate = TB_SCALE(profile->rate / 8) / machclk_freq;
+	tb->depth = TB_SCALE(profile->depth);
+	if (tb->rate > 0)
+		tb->filluptime = tb->depth / tb->rate;
+	else
+		tb->filluptime = 0xffffffffffffffffLL;
+	tb->token = tb->depth;
+	tb->last = read_machclk();
+}
+
+/*
+ * simple token bucket meter
+ */
+static struct tbmeter *
+tbm_create(top, profile, in_action, out_action)
+	struct top_cdnr *top;
+	struct tb_profile *profile;
+	struct tc_action *in_action, *out_action;
+{
+	struct tbmeter *tbm = NULL;
+
+	if (tca_verify_action(in_action) < 0
+	    || tca_verify_action(out_action) < 0)
+		return (NULL);
+
+	if ((tbm = cdnr_cballoc(top, TCETYPE_TBMETER,
+				tbm_input)) == NULL)
+		return (NULL);
+
+	tb_import_profile(&tbm->tb, profile);
+
+	tca_import_action(&tbm->in_action, in_action);
+	tca_import_action(&tbm->out_action, out_action);
+
+	return (tbm);
+}
+
+static int
+tbm_destroy(tbm)
+	struct tbmeter *tbm;
+{
+	if (tbm->cdnrblk.cb_ref > 0)
+		return (EBUSY);
+
+	tca_invalidate_action(&tbm->in_action);
+	tca_invalidate_action(&tbm->out_action);
+
+	cdnr_cbdestroy(tbm);
+	return (0);
+}
+
+static struct tc_action *
+tbm_input(cb, pktinfo)
+	struct cdnr_block *cb;
+	struct cdnr_pktinfo *pktinfo;
+{
+	struct tbmeter *tbm = (struct tbmeter *)cb;
+	u_int64_t	len;
+	u_int64_t	interval, now;
+
+	len = TB_SCALE(pktinfo->pkt_len);
+
+	if (tbm->tb.token < len) {
+		now = read_machclk();
+		interval = now - tbm->tb.last;
+		if (interval >= tbm->tb.filluptime)
+			tbm->tb.token = tbm->tb.depth;
+		else {
+			tbm->tb.token += interval * tbm->tb.rate;
+			if (tbm->tb.token > tbm->tb.depth)
+				tbm->tb.token = tbm->tb.depth;
+		}
+		tbm->tb.last = now;
+	}
+
+	if (tbm->tb.token < len) {
+		PKTCNTR_ADD(&tbm->out_cnt, pktinfo->pkt_len);
+		return (&tbm->out_action);
+	}
+
+	tbm->tb.token -= len;
+	PKTCNTR_ADD(&tbm->in_cnt, pktinfo->pkt_len);
+	return (&tbm->in_action);
+}
+
+/*
+ * two rate three color marker
+ * as described in draft-heinanen-diffserv-trtcm-01.txt
+ */
+static struct trtcm *
+trtcm_create(top, cmtd_profile, peak_profile,
+	     green_action, yellow_action, red_action, coloraware)
+	struct top_cdnr *top;
+	struct tb_profile *cmtd_profile, *peak_profile;
+	struct tc_action *green_action, *yellow_action, *red_action;
+	int	coloraware;
+{
+	struct trtcm *tcm = NULL;
+
+	if (tca_verify_action(green_action) < 0
+	    || tca_verify_action(yellow_action) < 0
+	    || tca_verify_action(red_action) < 0)
+		return (NULL);
+
+	if ((tcm = cdnr_cballoc(top, TCETYPE_TRTCM,
+				trtcm_input)) == NULL)
+		return (NULL);
+
+	tb_import_profile(&tcm->cmtd_tb, cmtd_profile);
+	tb_import_profile(&tcm->peak_tb, peak_profile);
+
+	tca_import_action(&tcm->green_action, green_action);
+	tca_import_action(&tcm->yellow_action, yellow_action);
+	tca_import_action(&tcm->red_action, red_action);
+
+	/* set dscps to use */
+	if (tcm->green_action.tca_code == TCACODE_MARK)
+		tcm->green_dscp = tcm->green_action.tca_dscp & DSCP_MASK;
+	else
+		tcm->green_dscp = DSCP_AF11;
+	if (tcm->yellow_action.tca_code == TCACODE_MARK)
+		tcm->yellow_dscp = tcm->yellow_action.tca_dscp & DSCP_MASK;
+	else
+		tcm->yellow_dscp = DSCP_AF12;
+	if (tcm->red_action.tca_code == TCACODE_MARK)
+		tcm->red_dscp = tcm->red_action.tca_dscp & DSCP_MASK;
+	else
+		tcm->red_dscp = DSCP_AF13;
+
+	tcm->coloraware = coloraware;
+
+	return (tcm);
+}
+
+static int
+trtcm_destroy(tcm)
+	struct trtcm *tcm;
+{
+	if (tcm->cdnrblk.cb_ref > 0)
+		return (EBUSY);
+
+	tca_invalidate_action(&tcm->green_action);
+	tca_invalidate_action(&tcm->yellow_action);
+	tca_invalidate_action(&tcm->red_action);
+
+	cdnr_cbdestroy(tcm);
+	return (0);
+}
+
+static struct tc_action *
+trtcm_input(cb, pktinfo)
+	struct cdnr_block *cb;
+	struct cdnr_pktinfo *pktinfo;
+{
+	struct trtcm *tcm = (struct trtcm *)cb;
+	u_int64_t	len;
+	u_int64_t	interval, now;
+	u_int8_t	color;
+
+	len = TB_SCALE(pktinfo->pkt_len);
+	if (tcm->coloraware) {
+		color = pktinfo->pkt_dscp;
+		if (color != tcm->yellow_dscp && color != tcm->red_dscp)
+			color = tcm->green_dscp;
+	} else {
+		/* if color-blind, precolor it as green */
+		color = tcm->green_dscp;
+	}
+
+	now = read_machclk();
+	if (tcm->cmtd_tb.token < len) {
+		interval = now - tcm->cmtd_tb.last;
+		if (interval >= tcm->cmtd_tb.filluptime)
+			tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
+		else {
+			tcm->cmtd_tb.token += interval * tcm->cmtd_tb.rate;
+			if (tcm->cmtd_tb.token > tcm->cmtd_tb.depth)
+				tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
+		}
+		tcm->cmtd_tb.last = now;
+	}
+	if (tcm->peak_tb.token < len) {
+		interval = now - tcm->peak_tb.last;
+		if (interval >= tcm->peak_tb.filluptime)
+			tcm->peak_tb.token = tcm->peak_tb.depth;
+		else {
+			tcm->peak_tb.token += interval * tcm->peak_tb.rate;
+			if (tcm->peak_tb.token > tcm->peak_tb.depth)
+				tcm->peak_tb.token = tcm->peak_tb.depth;
+		}
+		tcm->peak_tb.last = now;
+	}
+
+	if (color == tcm->red_dscp || tcm->peak_tb.token < len) {
+		pktinfo->pkt_dscp = tcm->red_dscp;
+		PKTCNTR_ADD(&tcm->red_cnt, pktinfo->pkt_len);
+		return (&tcm->red_action);
+	}
+
+	if (color == tcm->yellow_dscp || tcm->cmtd_tb.token < len) {
+		pktinfo->pkt_dscp = tcm->yellow_dscp;
+		tcm->peak_tb.token -= len;
+		PKTCNTR_ADD(&tcm->yellow_cnt, pktinfo->pkt_len);
+		return (&tcm->yellow_action);
+	}
+
+	pktinfo->pkt_dscp = tcm->green_dscp;
+	tcm->cmtd_tb.token -= len;
+	tcm->peak_tb.token -= len;
+	PKTCNTR_ADD(&tcm->green_cnt, pktinfo->pkt_len);
+	return (&tcm->green_action);
+}
+
+/*
+ * time sliding window three color marker
+ * as described in draft-fang-diffserv-tc-tswtcm-00.txt
+ */
+static struct tswtcm *
+tswtcm_create(top, cmtd_rate, peak_rate, avg_interval,
+	      green_action, yellow_action, red_action)
+	struct top_cdnr *top;
+	u_int32_t	cmtd_rate, peak_rate, avg_interval;
+	struct tc_action *green_action, *yellow_action, *red_action;
+{
+	struct tswtcm *tsw;
+
+	if (tca_verify_action(green_action) < 0
+	    || tca_verify_action(yellow_action) < 0
+	    || tca_verify_action(red_action) < 0)
+		return (NULL);
+
+	if ((tsw = cdnr_cballoc(top, TCETYPE_TSWTCM,
+				tswtcm_input)) == NULL)
+		return (NULL);
+
+	tca_import_action(&tsw->green_action, green_action);
+	tca_import_action(&tsw->yellow_action, yellow_action);
+	tca_import_action(&tsw->red_action, red_action);
+
+	/* set dscps to use */
+	if (tsw->green_action.tca_code == TCACODE_MARK)
+		tsw->green_dscp = tsw->green_action.tca_dscp & DSCP_MASK;
+	else
+		tsw->green_dscp = DSCP_AF11;
+	if (tsw->yellow_action.tca_code == TCACODE_MARK)
+		tsw->yellow_dscp = tsw->yellow_action.tca_dscp & DSCP_MASK;
+	else
+		tsw->yellow_dscp = DSCP_AF12;
+	if (tsw->red_action.tca_code == TCACODE_MARK)
+		tsw->red_dscp = tsw->red_action.tca_dscp & DSCP_MASK;
+	else
+		tsw->red_dscp = DSCP_AF13;
+
+	/* convert rates from bits/sec to bytes/sec */
+	tsw->cmtd_rate = cmtd_rate / 8;
+	tsw->peak_rate = peak_rate / 8;
+	tsw->avg_rate = 0;
+
+	/* timewin is converted from msec to machine clock unit */
+	tsw->timewin = (u_int64_t)machclk_freq * avg_interval / 1000;
+
+	return (tsw);
+}
+
+static int
+tswtcm_destroy(tsw)
+	struct tswtcm *tsw;
+{
+	if (tsw->cdnrblk.cb_ref > 0)
+		return (EBUSY);
+
+	tca_invalidate_action(&tsw->green_action);
+	tca_invalidate_action(&tsw->yellow_action);
+	tca_invalidate_action(&tsw->red_action);
+
+	cdnr_cbdestroy(tsw);
+	return (0);
+}
+
+static struct tc_action *
+tswtcm_input(cb, pktinfo)
+	struct cdnr_block *cb;
+	struct cdnr_pktinfo *pktinfo;
+{
+	struct tswtcm	*tsw = (struct tswtcm *)cb;
+	int		len;
+	u_int32_t	avg_rate;
+	u_int64_t	interval, now, tmp;
+
+	/*
+	 * rate estimator
+	 */
+	len = pktinfo->pkt_len;
+	now = read_machclk();
+
+	interval = now - tsw->t_front;
+	/*
+	 * calculate average rate:
+	 *	avg = (avg * timewin + pkt_len)/(timewin + interval)
+	 * pkt_len needs to be multiplied by machclk_freq in order to
+	 * get (bytes/sec).
+	 * note: when avg_rate (bytes/sec) and timewin (machclk unit) are
+	 * less than 32 bits, the following 64-bit operation has enough
+	 * precision.
+	 */
+	tmp = ((u_int64_t)tsw->avg_rate * tsw->timewin
+	       + (u_int64_t)len * machclk_freq) / (tsw->timewin + interval);
+	tsw->avg_rate = avg_rate = (u_int32_t)tmp;
+	tsw->t_front = now;
+
+	/*
+	 * marker
+	 */
+	if (avg_rate > tsw->cmtd_rate) {
+		u_int32_t randval = arc4random() % avg_rate;
+
+		if (avg_rate > tsw->peak_rate) {
+			if (randval < avg_rate - tsw->peak_rate) {
+				/* mark red */
+				pktinfo->pkt_dscp = tsw->red_dscp;
+				PKTCNTR_ADD(&tsw->red_cnt, len);
+				return (&tsw->red_action);
+			} else if (randval < avg_rate - tsw->cmtd_rate)
+				goto mark_yellow;
+		} else {
+			/* peak_rate >= avg_rate > cmtd_rate */
+			if (randval < avg_rate - tsw->cmtd_rate) {
+			mark_yellow:
+				pktinfo->pkt_dscp = tsw->yellow_dscp;
+				PKTCNTR_ADD(&tsw->yellow_cnt, len);
+				return (&tsw->yellow_action);
+			}
+		}
+	}
+
+	/* mark green */
+	pktinfo->pkt_dscp = tsw->green_dscp;
+	PKTCNTR_ADD(&tsw->green_cnt, len);
+	return (&tsw->green_action);
+}
+
+/*
+ * ioctl requests
+ */
+static int
+cdnrcmd_if_attach(ifname)
+	char *ifname;
+{
+	struct ifnet *ifp;
+	struct top_cdnr *top;
+
+	if ((ifp = ifunit(ifname)) == NULL)
+		return (EBADF);
+
+	if (ifp->if_snd.altq_cdnr != NULL)
+		return (EBUSY);
+
+	if ((top = top_create(&ifp->if_snd)) == NULL)
+		return (ENOMEM);
+	return (0);
+}
+
+static int
+cdnrcmd_if_detach(ifname)
+	char *ifname;
+{
+	struct top_cdnr *top;
+
+	if ((top = tcb_lookup(ifname)) == NULL)
+		return (EBADF);
+
+	return top_destroy(top);
+}
+
+static int
+cdnrcmd_add_element(ap)
+	struct cdnr_add_element *ap;
+{
+	struct top_cdnr *top;
+	struct cdnr_block *cb;
+
+	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+		return (EBADF);
+
+	cb = element_create(top, &ap->action);
+	if (cb == NULL)
+		return (EINVAL);
+	/* return a class handle to the user */
+	ap->cdnr_handle = cdnr_cb2handle(cb);
+	return (0);
+}
+
+static int
+cdnrcmd_delete_element(ap)
+	struct cdnr_delete_element *ap;
+{
+	struct top_cdnr *top;
+	struct cdnr_block *cb;
+
+	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+		return (EBADF);
+
+	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+		return (EINVAL);
+
+	if (cb->cb_type != TCETYPE_ELEMENT)
+		return generic_element_destroy(cb);
+
+	return element_destroy(cb);
+}
+
+static int
+cdnrcmd_add_filter(ap)
+	struct cdnr_add_filter *ap;
+{
+	struct top_cdnr *top;
+	struct cdnr_block *cb;
+
+	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+		return (EBADF);
+
+	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+		return (EINVAL);
+
+	return acc_add_filter(&top->tc_classifier, &ap->filter,
+			      cb, &ap->filter_handle);
+}
+
+static int
+cdnrcmd_delete_filter(ap)
+	struct cdnr_delete_filter *ap;
+{
+	struct top_cdnr *top;
+
+	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+		return (EBADF);
+
+	return acc_delete_filter(&top->tc_classifier, ap->filter_handle);
+}
+
+static int
+cdnrcmd_add_tbm(ap)
+	struct cdnr_add_tbmeter *ap;
+{
+	struct top_cdnr *top;
+	struct tbmeter *tbm;
+
+	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+		return (EBADF);
+
+	tbm = tbm_create(top, &ap->profile, &ap->in_action, &ap->out_action);
+	if (tbm == NULL)
+		return (EINVAL);
+	/* return a class handle to the user */
+	ap->cdnr_handle = cdnr_cb2handle(&tbm->cdnrblk);
+	return (0);
+}
+
+static int
+cdnrcmd_modify_tbm(ap)
+	struct cdnr_modify_tbmeter *ap;
+{
+	struct tbmeter *tbm;
+
+	if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+		return (EINVAL);
+
+	tb_import_profile(&tbm->tb, &ap->profile);
+
+	return (0);
+}
+
+static int
+cdnrcmd_tbm_stats(ap)
+	struct cdnr_tbmeter_stats *ap;
+{
+	struct tbmeter *tbm;
+
+	if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+		return (EINVAL);
+
+	ap->in_cnt = tbm->in_cnt;
+	ap->out_cnt = tbm->out_cnt;
+
+	return (0);
+}
+
+static int
+cdnrcmd_add_trtcm(ap)
+	struct cdnr_add_trtcm *ap;
+{
+	struct top_cdnr *top;
+	struct trtcm *tcm;
+
+	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+		return (EBADF);
+
+	tcm = trtcm_create(top, &ap->cmtd_profile, &ap->peak_profile,
+			   &ap->green_action, &ap->yellow_action,
+			   &ap->red_action, ap->coloraware);
+	if (tcm == NULL)
+		return (EINVAL);
+
+	/* return a class handle to the user */
+	ap->cdnr_handle = cdnr_cb2handle(&tcm->cdnrblk);
+	return (0);
+}
+
+static int
+cdnrcmd_modify_trtcm(ap)
+	struct cdnr_modify_trtcm *ap;
+{
+	struct trtcm *tcm;
+
+	if ((tcm = (struct trtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+		return (EINVAL);
+
+	tb_import_profile(&tcm->cmtd_tb, &ap->cmtd_profile);
+	tb_import_profile(&tcm->peak_tb, &ap->peak_profile);
+
+	return (0);
+}
+
+static int
+cdnrcmd_tcm_stats(ap)
+	struct cdnr_tcm_stats *ap;
+{
+	struct cdnr_block *cb;
+
+	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+		return (EINVAL);
+
+	if (cb->cb_type == TCETYPE_TRTCM) {
+	    struct trtcm *tcm = (struct trtcm *)cb;
+
+	    ap->green_cnt = tcm->green_cnt;
+	    ap->yellow_cnt = tcm->yellow_cnt;
+	    ap->red_cnt = tcm->red_cnt;
+	} else if (cb->cb_type == TCETYPE_TSWTCM) {
+	    struct tswtcm *tsw = (struct tswtcm *)cb;
+
+	    ap->green_cnt = tsw->green_cnt;
+	    ap->yellow_cnt = tsw->yellow_cnt;
+	    ap->red_cnt = tsw->red_cnt;
+	} else
+	    return (EINVAL);
+
+	return (0);
+}
+
+static int
+cdnrcmd_add_tswtcm(ap)
+	struct cdnr_add_tswtcm *ap;
+{
+	struct top_cdnr *top;
+	struct tswtcm *tsw;
+
+	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+		return (EBADF);
+
+	if (ap->cmtd_rate > ap->peak_rate)
+		return (EINVAL);
+
+	tsw = tswtcm_create(top, ap->cmtd_rate, ap->peak_rate,
+			    ap->avg_interval, &ap->green_action,
+			    &ap->yellow_action, &ap->red_action);
+	if (tsw == NULL)
+	    return (EINVAL);
+
+	/* return a class handle to the user */
+	ap->cdnr_handle = cdnr_cb2handle(&tsw->cdnrblk);
+	return (0);
+}
+
+static int
+cdnrcmd_modify_tswtcm(ap)
+	struct cdnr_modify_tswtcm *ap;
+{
+	struct tswtcm *tsw;
+
+	if ((tsw = (struct tswtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+		return (EINVAL);
+
+	if (ap->cmtd_rate > ap->peak_rate)
+		return (EINVAL);
+
+	/* convert rates from bits/sec to bytes/sec */
+	tsw->cmtd_rate = ap->cmtd_rate / 8;
+	tsw->peak_rate = ap->peak_rate / 8;
+	tsw->avg_rate = 0;
+
+	/* timewin is converted from msec to machine clock unit */
+	tsw->timewin = (u_int64_t)machclk_freq * ap->avg_interval / 1000;
+
+	return (0);
+}
+
+static int
+cdnrcmd_get_stats(ap)
+	struct cdnr_get_stats *ap;
+{
+	struct top_cdnr *top;
+	struct cdnr_block *cb;
+	struct tbmeter *tbm;
+	struct trtcm *tcm;
+	struct tswtcm *tsw;
+	struct tce_stats tce, *usp;
+	int error, n, nskip, nelements;
+
+	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+		return (EBADF);
+
+	/* copy action stats */
+	bcopy(top->tc_cnts, ap->cnts, sizeof(ap->cnts));
+
+	/* stats for each element */
+	nelements = ap->nelements;
+	usp = ap->tce_stats;
+	if (nelements <= 0 || usp == NULL)
+		return (0);
+
+	nskip = ap->nskip;
+	n = 0;
+	LIST_FOREACH(cb, &top->tc_elements, cb_next) {
+		if (nskip > 0) {
+			nskip--;
+			continue;
+		}
+
+		bzero(&tce, sizeof(tce));
+		tce.tce_handle = cb->cb_handle;
+		tce.tce_type = cb->cb_type;
+		switch (cb->cb_type) {
+		case TCETYPE_TBMETER:
+			tbm = (struct tbmeter *)cb;
+			tce.tce_cnts[0] = tbm->in_cnt;
+			tce.tce_cnts[1] = tbm->out_cnt;
+			break;
+		case TCETYPE_TRTCM:
+			tcm = (struct trtcm *)cb;
+			tce.tce_cnts[0] = tcm->green_cnt;
+			tce.tce_cnts[1] = tcm->yellow_cnt;
+			tce.tce_cnts[2] = tcm->red_cnt;
+			break;
+		case TCETYPE_TSWTCM:
+			tsw = (struct tswtcm *)cb;
+			tce.tce_cnts[0] = tsw->green_cnt;
+			tce.tce_cnts[1] = tsw->yellow_cnt;
+			tce.tce_cnts[2] = tsw->red_cnt;
+			break;
+		default:
+			continue;
+		}
+
+		if ((error = copyout((caddr_t)&tce, (caddr_t)usp++,
+				     sizeof(tce))) != 0)
+			return (error);
+
+		if (++n == nelements)
+			break;
+	}
+	ap->nelements = n;
+
+	return (0);
+}
+
+/*
+ * conditioner device interface
+ */
+int
+cdnropen(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	if (machclk_freq == 0)
+		init_machclk();
+
+	if (machclk_freq == 0) {
+		printf("cdnr: no cpu clock available!\n");
+		return (ENXIO);
+	}
+
+	/* everything will be done when the queueing scheme is attached. */
+	return 0;
+}
+
+int
+cdnrclose(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	struct top_cdnr *top;
+	int err, error = 0;
+
+	while ((top = LIST_FIRST(&tcb_list)) != NULL) {
+		/* destroy all */
+		err = top_destroy(top);
+		if (err != 0 && error == 0)
+			error = err;
+	}
+	altq_input = NULL;
+
+	return (error);
+}
+
+int
+cdnrioctl(dev, cmd, addr, flag, p)
+	dev_t dev;
+	ioctlcmd_t cmd;
+	caddr_t addr;
+	int flag;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	struct top_cdnr *top;
+	struct cdnr_interface *ifacep;
+	int	s, error = 0;
+
+	/* check super-user privilege */
+	switch (cmd) {
+	case CDNR_GETSTATS:
+		break;
+	default:
+#if (__FreeBSD_version > 700000)
+		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
+#elsif (__FreeBSD_version > 400000)
+		if ((error = suser(p)) != 0)
+#else
+		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+#endif
+			return (error);
+		break;
+	}
+
+	s = splnet();
+	switch (cmd) {
+
+	case CDNR_IF_ATTACH:
+		ifacep = (struct cdnr_interface *)addr;
+		error = cdnrcmd_if_attach(ifacep->cdnr_ifname);
+		break;
+
+	case CDNR_IF_DETACH:
+		ifacep = (struct cdnr_interface *)addr;
+		error = cdnrcmd_if_detach(ifacep->cdnr_ifname);
+		break;
+
+	case CDNR_ENABLE:
+	case CDNR_DISABLE:
+		ifacep = (struct cdnr_interface *)addr;
+		if ((top = tcb_lookup(ifacep->cdnr_ifname)) == NULL) {
+			error = EBADF;
+			break;
+		}
+
+		switch (cmd) {
+
+		case CDNR_ENABLE:
+			ALTQ_SET_CNDTNING(top->tc_ifq);
+			if (altq_input == NULL)
+				altq_input = altq_cdnr_input;
+			break;
+
+		case CDNR_DISABLE:
+			ALTQ_CLEAR_CNDTNING(top->tc_ifq);
+			LIST_FOREACH(top, &tcb_list, tc_next)
+				if (ALTQ_IS_CNDTNING(top->tc_ifq))
+					break;
+			if (top == NULL)
+				altq_input = NULL;
+			break;
+		}
+		break;
+
+	case CDNR_ADD_ELEM:
+		error = cdnrcmd_add_element((struct cdnr_add_element *)addr);
+		break;
+
+	case CDNR_DEL_ELEM:
+		error = cdnrcmd_delete_element((struct cdnr_delete_element *)addr);
+		break;
+
+	case CDNR_ADD_TBM:
+		error = cdnrcmd_add_tbm((struct cdnr_add_tbmeter *)addr);
+		break;
+
+	case CDNR_MOD_TBM:
+		error = cdnrcmd_modify_tbm((struct cdnr_modify_tbmeter *)addr);
+		break;
+
+	case CDNR_TBM_STATS:
+		error = cdnrcmd_tbm_stats((struct cdnr_tbmeter_stats *)addr);
+		break;
+
+	case CDNR_ADD_TCM:
+		error = cdnrcmd_add_trtcm((struct cdnr_add_trtcm *)addr);
+		break;
+
+	case CDNR_MOD_TCM:
+		error = cdnrcmd_modify_trtcm((struct cdnr_modify_trtcm *)addr);
+		break;
+
+	case CDNR_TCM_STATS:
+		error = cdnrcmd_tcm_stats((struct cdnr_tcm_stats *)addr);
+		break;
+
+	case CDNR_ADD_FILTER:
+		error = cdnrcmd_add_filter((struct cdnr_add_filter *)addr);
+		break;
+
+	case CDNR_DEL_FILTER:
+		error = cdnrcmd_delete_filter((struct cdnr_delete_filter *)addr);
+		break;
+
+	case CDNR_GETSTATS:
+		error = cdnrcmd_get_stats((struct cdnr_get_stats *)addr);
+		break;
+
+	case CDNR_ADD_TSW:
+		error = cdnrcmd_add_tswtcm((struct cdnr_add_tswtcm *)addr);
+		break;
+
+	case CDNR_MOD_TSW:
+		error = cdnrcmd_modify_tswtcm((struct cdnr_modify_tswtcm *)addr);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+	splx(s);
+
+	return error;
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw cdnr_sw =
+	{"cdnr", cdnropen, cdnrclose, cdnrioctl};
+
+ALTQ_MODULE(altq_cdnr, ALTQT_CDNR, &cdnr_sw);
+
+#endif /* KLD_MODULE */
+
+#endif /* ALTQ3_COMPAT */
+#endif /* ALTQ_CDNR */
diff --git a/freebsd/sys/net/altq/altq_cdnr.h b/freebsd/sys/net/altq/altq_cdnr.h
new file mode 100644
index 00000000..06fa9c98
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_cdnr.h
@@ -0,0 +1,336 @@
+/*-
+ * Copyright (C) 1999-2002
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_cdnr.h,v 1.9 2003/07/10 12:07:48 kjc Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_CDNR_H_
+#define	_ALTQ_ALTQ_CDNR_H_
+
+#include <net/altq/altq.h>
+
+/*
+ * traffic conditioner element types
+ */
+#define	TCETYPE_NONE		0
+#define	TCETYPE_TOP		1	/* top level conditioner */
+#define	TCETYPE_ELEMENT		2	/* a simple tc element */
+#define	TCETYPE_TBMETER		3	/* token bucket meter */
+#define	TCETYPE_TRTCM		4	/* (two-rate) three color marker */
+#define	TCETYPE_TSWTCM		5	/* time sliding window 3-color maker */
+
+/*
+ * traffic conditioner action
+ */
+struct cdnr_block;
+
+struct tc_action {
+	int	tca_code;	/* e.g., TCACODE_PASS */
+	/* tca_code dependent variable */
+	union {
+		u_long		un_value;	/* template */
+		u_int8_t	un_dscp;	/* diffserv code point */
+		u_long		un_handle;	/* tc action handle */
+		struct cdnr_block *un_next;	/* next tc element block */
+	} tca_un;
+};
+#define	tca_value	tca_un.un_value
+#define	tca_dscp	tca_un.un_dscp
+#define	tca_handle	tca_un.un_handle
+#define	tca_next	tca_un.un_next
+
+#define	TCACODE_NONE	0	/* action is not set */
+#define	TCACODE_PASS	1 	/* pass this packet */
+#define	TCACODE_DROP	2	/* discard this packet */
+#define	TCACODE_RETURN	3	/* do not process this packet */
+#define	TCACODE_MARK	4	/* mark dscp */
+#define	TCACODE_HANDLE	5	/* take action specified by handle */
+#define	TCACODE_NEXT	6	/* take action in the next tc element */
+#define	TCACODE_MAX	6
+
+#define	CDNR_NULL_HANDLE	0
+
+struct cdnr_interface {
+	char	cdnr_ifname[IFNAMSIZ];  /* interface name (e.g., fxp0) */
+};
+
+/* simple element operations */
+struct cdnr_add_element {
+	struct cdnr_interface	iface;
+	struct tc_action	action;
+
+	u_long			cdnr_handle;	/* return value */
+};
+
+struct cdnr_delete_element {
+	struct cdnr_interface	iface;
+	u_long			cdnr_handle;
+};
+
+/* token-bucket meter operations */
+struct cdnr_add_tbmeter {
+	struct cdnr_interface	iface;
+	struct tb_profile	profile;
+	struct tc_action	in_action;
+	struct tc_action	out_action;
+
+	u_long			cdnr_handle;	/* return value */
+};
+
+struct cdnr_modify_tbmeter {
+	struct cdnr_interface	iface;
+	u_long			cdnr_handle;
+	struct tb_profile	profile;
+};
+
+struct cdnr_tbmeter_stats {
+	struct cdnr_interface	iface;
+	u_long			cdnr_handle;
+	struct pktcntr		in_cnt;
+	struct pktcntr		out_cnt;
+};
+
+/* two-rate three-color marker operations */
+struct cdnr_add_trtcm {
+	struct cdnr_interface	iface;
+	struct tb_profile	cmtd_profile;	/* profile for committed tb */
+	struct tb_profile	peak_profile;	/* profile for peak tb */
+	struct tc_action	green_action;	/* action for green packets */
+	struct tc_action	yellow_action;	/* action for yellow packets */
+	struct tc_action	red_action;	/* action for red packets */
+	int			coloraware;	/* color-aware/color-blind */
+
+	u_long			cdnr_handle;	/* return value */
+};
+
+struct cdnr_modify_trtcm {
+	struct cdnr_interface	iface;
+	u_long			cdnr_handle;
+	struct tb_profile	cmtd_profile;	/* profile for committed tb */
+	struct tb_profile	peak_profile;	/* profile for peak tb */
+	int			coloraware;	/* color-aware/color-blind */
+};
+
+struct cdnr_tcm_stats {
+	struct cdnr_interface	iface;
+	u_long			cdnr_handle;
+	struct pktcntr		green_cnt;
+	struct pktcntr		yellow_cnt;
+	struct pktcntr		red_cnt;
+};
+
+/* time sliding window three-color marker operations */
+struct cdnr_add_tswtcm {
+	struct cdnr_interface	iface;
+	u_int32_t		cmtd_rate;	/* committed rate (bits/sec) */
+	u_int32_t		peak_rate;	/* peak rate (bits/sec) */
+	u_int32_t		avg_interval;	/* averaging interval (msec) */
+	struct tc_action	green_action;	/* action for green packets */
+	struct tc_action	yellow_action;	/* action for yellow packets */
+	struct tc_action	red_action;	/* action for red packets */
+
+	u_long			cdnr_handle;	/* return value */
+};
+
+struct cdnr_modify_tswtcm {
+	struct cdnr_interface	iface;
+	u_long			cdnr_handle;
+	u_int32_t		cmtd_rate;	/* committed rate (bits/sec) */
+	u_int32_t		peak_rate;	/* peak rate (bits/sec) */
+	u_int32_t		avg_interval;	/* averaging interval (msec) */
+};
+
+struct cdnr_add_filter {
+	struct cdnr_interface	iface;
+	u_long			cdnr_handle;
+#ifdef ALTQ3_CLFIER_COMPAT
+	struct flow_filter	filter;
+#endif
+	u_long			filter_handle;	/* return value */
+};
+
+struct cdnr_delete_filter {
+	struct cdnr_interface	iface;
+	u_long			filter_handle;
+};
+
+struct tce_stats {
+	u_long			tce_handle;	/* tc element handle */
+	int			tce_type;	/* e.g., TCETYPE_ELEMENT */
+	struct pktcntr		tce_cnts[3];	/* tcm returns 3 counters */
+};
+
+struct cdnr_get_stats {
+	struct cdnr_interface	iface;
+	struct pktcntr		cnts[TCACODE_MAX+1];
+
+	/* element stats */
+	int			nskip;		/* skip # of elements */
+	int			nelements;	/* # of element stats (WR) */
+	struct tce_stats	*tce_stats;	/* pointer to stats array */
+};
+
+#define	CDNR_IF_ATTACH		_IOW('Q', 1, struct cdnr_interface)
+#define	CDNR_IF_DETACH		_IOW('Q', 2, struct cdnr_interface)
+#define	CDNR_ENABLE		_IOW('Q', 3, struct cdnr_interface)
+#define	CDNR_DISABLE		_IOW('Q', 4, struct cdnr_interface)
+#define	CDNR_ADD_FILTER		_IOWR('Q', 10, struct cdnr_add_filter)
+#define	CDNR_DEL_FILTER		_IOW('Q', 11, struct cdnr_delete_filter)
+#define	CDNR_GETSTATS		_IOWR('Q', 12, struct cdnr_get_stats)
+#define	CDNR_ADD_ELEM		_IOWR('Q', 30, struct cdnr_add_element)
+#define	CDNR_DEL_ELEM		_IOW('Q', 31, struct cdnr_delete_element)
+#define	CDNR_ADD_TBM		_IOWR('Q', 32, struct cdnr_add_tbmeter)
+#define	CDNR_MOD_TBM		_IOW('Q', 33, struct cdnr_modify_tbmeter)
+#define	CDNR_TBM_STATS		_IOWR('Q', 34, struct cdnr_tbmeter_stats)
+#define	CDNR_ADD_TCM		_IOWR('Q', 35, struct cdnr_add_trtcm)
+#define	CDNR_MOD_TCM		_IOWR('Q', 36, struct cdnr_modify_trtcm)
+#define	CDNR_TCM_STATS		_IOWR('Q', 37, struct cdnr_tcm_stats)
+#define	CDNR_ADD_TSW		_IOWR('Q', 38, struct cdnr_add_tswtcm)
+#define	CDNR_MOD_TSW		_IOWR('Q', 39, struct cdnr_modify_tswtcm)
+
+#ifndef DSCP_EF
+/* diffserve code points */
+#define	DSCP_MASK	0xfc
+#define	DSCP_CUMASK	0x03
+#define	DSCP_EF		0xb8
+#define	DSCP_AF11	0x28
+#define	DSCP_AF12	0x30
+#define	DSCP_AF13	0x38
+#define	DSCP_AF21	0x48
+#define	DSCP_AF22	0x50
+#define	DSCP_AF23	0x58
+#define	DSCP_AF31	0x68
+#define	DSCP_AF32	0x70
+#define	DSCP_AF33	0x78
+#define	DSCP_AF41	0x88
+#define	DSCP_AF42	0x90
+#define	DSCP_AF43	0x98
+#define	AF_CLASSMASK		0xe0
+#define	AF_DROPPRECMASK		0x18
+#endif
+
+#ifdef _KERNEL
+
+/*
+ * packet information passed to the input function of tc elements
+ */
+struct cdnr_pktinfo {
+	int		pkt_len;	/* packet length */
+	u_int8_t	pkt_dscp;	/* diffserv code point */
+};
+
+/*
+ * traffic conditioner control block common to all types of tc elements
+ */
+struct cdnr_block {
+	LIST_ENTRY(cdnr_block)	cb_next;
+	int		cb_len;		/* size of this tc element */
+	int		cb_type;	/* cdnr block type */
+	int		cb_ref;		/* reference count of this element */
+	u_long		cb_handle;	/* handle of this tc element */
+	struct top_cdnr *cb_top;	/* back pointer to top */
+	struct tc_action cb_action;	/* top level action for this tcb */
+	struct tc_action *(*cb_input)(struct cdnr_block *,
+				      struct cdnr_pktinfo *);
+};
+
+/*
+ * top level traffic conditioner structure for an interface
+ */
+struct top_cdnr {
+	struct cdnr_block	tc_block;
+
+	LIST_ENTRY(top_cdnr)	tc_next;
+	struct ifaltq		*tc_ifq;
+
+	LIST_HEAD(, cdnr_block) tc_elements;
+#ifdef ALTQ3_CLFIER_COMPAT
+	struct acc_classifier	tc_classifier;
+#endif
+	struct pktcntr		tc_cnts[TCACODE_MAX+1];
+};
+
+/* token bucket element */
+struct tbe {
+	u_int64_t	rate;
+	u_int64_t	depth;
+
+	u_int64_t	token;
+	u_int64_t	filluptime;
+	u_int64_t	last;
+};
+
+/* token bucket meter structure */
+struct tbmeter {
+	struct cdnr_block	cdnrblk;	/* conditioner block */
+	struct tbe		tb;		/* token bucket */
+	struct tc_action	in_action;	/* actions for IN/OUT */
+	struct tc_action	out_action;	/* actions for IN/OUT */
+	struct pktcntr		in_cnt;		/* statistics for IN/OUT */
+	struct pktcntr		out_cnt;	/* statistics for IN/OUT */
+};
+
+/* two-rate three-color marker structure */
+struct trtcm {
+	struct cdnr_block	cdnrblk;	/* conditioner block */
+	struct tbe		cmtd_tb;	/* committed tb profile */
+	struct tbe		peak_tb;	/* peak tb profile */
+	struct tc_action	green_action;
+	struct tc_action	yellow_action;
+	struct tc_action	red_action;
+	int			coloraware;
+	u_int8_t		green_dscp;
+	u_int8_t		yellow_dscp;
+	u_int8_t		red_dscp;
+	struct pktcntr		green_cnt;
+	struct pktcntr		yellow_cnt;
+	struct pktcntr		red_cnt;
+};
+
+/* time sliding window three-color marker structure */
+struct tswtcm {
+	struct cdnr_block	cdnrblk;	/* conditioner block */
+
+	u_int32_t		avg_rate;	/* average rate (bytes/sec) */
+	u_int64_t		t_front;	/* timestamp of last update */
+
+	u_int64_t		timewin;	/* average interval */
+	u_int32_t		cmtd_rate;	/* committed target rate */
+	u_int32_t		peak_rate;	/* peak target rate */
+	struct tc_action	green_action;
+	struct tc_action	yellow_action;
+	struct tc_action	red_action;
+	u_int8_t		green_dscp;
+	u_int8_t		yellow_dscp;
+	u_int8_t		red_dscp;
+	struct pktcntr		green_cnt;
+	struct pktcntr		yellow_cnt;
+	struct pktcntr		red_cnt;
+};
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_CDNR_H_ */
diff --git a/freebsd/sys/net/altq/altq_classq.h b/freebsd/sys/net/altq/altq_classq.h
new file mode 100644
index 00000000..dc465a0b
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_classq.h
@@ -0,0 +1,213 @@
+/*-
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the Network Research
+ *	Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ *    to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_classq.h,v 1.6 2003/01/07 07:33:38 kjc Exp $
+ * $FreeBSD$
+ */
+/*
+ * class queue definitions extracted from rm_class.h.
+ */
+#ifndef _ALTQ_ALTQ_CLASSQ_H_
+#define	_ALTQ_ALTQ_CLASSQ_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Packet Queue types: RED or DROPHEAD.
+ */
+#define	Q_DROPHEAD	0x00
+#define	Q_RED		0x01
+#define	Q_RIO		0x02
+#define	Q_DROPTAIL	0x03
+#define	Q_CODEL		0x04
+
+#ifdef _KERNEL
+
+/*
+ * Packet Queue structures and macros to manipulate them.
+ */
+struct _class_queue_ {
+	struct mbuf	*tail_;	/* Tail of packet queue */
+	int	qlen_;		/* Queue length (in number of packets) */
+	int	qlim_;		/* Queue limit (in number of packets*) */
+	int	qsize_;		/* Queue size (in number of bytes*) */
+	int	qtype_;		/* Queue type */
+};
+
+typedef struct _class_queue_	class_queue_t;
+
+#define	qtype(q)	(q)->qtype_		/* Get queue type */
+#define	qlimit(q)	(q)->qlim_		/* Max packets to be queued */
+#define	qlen(q)		(q)->qlen_		/* Current queue length. */
+#define	qsize(q)	(q)->qsize_		/* Current queue size. */
+#define	qtail(q)	(q)->tail_		/* Tail of the queue */
+#define	qhead(q)	((q)->tail_ ? (q)->tail_->m_nextpkt : NULL)
+
+#define	qempty(q)	((q)->qlen_ == 0)	/* Is the queue empty?? */
+#define	q_is_codel(q)	((q)->qtype_ == Q_CODEL) /* Is the queue a codel queue */
+#define	q_is_red(q)	((q)->qtype_ == Q_RED)	/* Is the queue a red queue */
+#define	q_is_rio(q)	((q)->qtype_ == Q_RIO)	/* Is the queue a rio queue */
+#define	q_is_red_or_rio(q)	((q)->qtype_ == Q_RED || (q)->qtype_ == Q_RIO)
+
+#if !defined(__GNUC__) || defined(ALTQ_DEBUG)
+
+extern void		_addq(class_queue_t *, struct mbuf *);
+extern struct mbuf	*_getq(class_queue_t *);
+extern struct mbuf	*_getq_tail(class_queue_t *);
+extern struct mbuf	*_getq_random(class_queue_t *);
+extern void		_removeq(class_queue_t *, struct mbuf *);
+extern void		_flushq(class_queue_t *);
+
+#else /* __GNUC__ && !ALTQ_DEBUG */
+/*
+ * inlined versions
+ */
+static __inline void
+_addq(class_queue_t *q, struct mbuf *m)
+{
+        struct mbuf *m0;
+
+	if ((m0 = qtail(q)) != NULL)
+		m->m_nextpkt = m0->m_nextpkt;
+	else
+		m0 = m;
+	m0->m_nextpkt = m;
+	qtail(q) = m;
+	qlen(q)++;
+	qsize(q) += m_pktlen(m);
+}
+
+static __inline struct mbuf *
+_getq(class_queue_t *q)
+{
+	struct mbuf  *m, *m0;
+
+	if ((m = qtail(q)) == NULL)
+		return (NULL);
+	if ((m0 = m->m_nextpkt) != m)
+		m->m_nextpkt = m0->m_nextpkt;
+	else
+		qtail(q) = NULL;
+	qlen(q)--;
+	qsize(q) -= m_pktlen(m0);
+	m0->m_nextpkt = NULL;
+	return (m0);
+}
+
+/* drop a packet at the tail of the queue */
+static __inline struct mbuf *
+_getq_tail(class_queue_t *q)
+{
+	struct mbuf *m, *m0, *prev;
+
+	if ((m = m0 = qtail(q)) == NULL)
+		return NULL;
+	do {
+		prev = m0;
+		m0 = m0->m_nextpkt;
+	} while (m0 != m);
+	prev->m_nextpkt = m->m_nextpkt;
+	if (prev == m)
+		qtail(q) = NULL;
+	else
+		qtail(q) = prev;
+	qlen(q)--;
+	m->m_nextpkt = NULL;
+	return (m);
+}
+
+/* randomly select a packet in the queue */
+static __inline struct mbuf *
+_getq_random(class_queue_t *q)
+{
+	struct mbuf *m;
+	int i, n;
+
+	if ((m = qtail(q)) == NULL)
+		return NULL;
+	if (m->m_nextpkt == m)
+		qtail(q) = NULL;
+	else {
+		struct mbuf *prev = NULL;
+
+		n = random() % qlen(q) + 1;
+		for (i = 0; i < n; i++) {
+			prev = m;
+			m = m->m_nextpkt;
+		}
+		prev->m_nextpkt = m->m_nextpkt;
+		if (m == qtail(q))
+			qtail(q) = prev;
+	}
+	qlen(q)--;
+	m->m_nextpkt = NULL;
+	return (m);
+}
+
+static __inline void
+_removeq(class_queue_t *q, struct mbuf *m)
+{
+	struct mbuf *m0, *prev;
+
+	m0 = qtail(q);
+	do {
+		prev = m0;
+		m0 = m0->m_nextpkt;
+	} while (m0 != m);
+	prev->m_nextpkt = m->m_nextpkt;
+	if (prev == m)
+		qtail(q) = NULL;
+	else if (qtail(q) == m)
+		qtail(q) = prev;
+	qlen(q)--;
+}
+
+static __inline void
+_flushq(class_queue_t *q)
+{
+	struct mbuf *m;
+
+	while ((m = _getq(q)) != NULL)
+		m_freem(m);
+}
+
+#endif /* __GNUC__ && !ALTQ_DEBUG */
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_CLASSQ_H_ */
diff --git a/freebsd/sys/net/altq/altq_codel.c b/freebsd/sys/net/altq/altq_codel.c
new file mode 100644
index 00000000..438120f5
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_codel.c
@@ -0,0 +1,479 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * CoDel - The Controlled-Delay Active Queue Management algorithm
+ *
+ *  Copyright (C) 2013 Ermal Luçi <eri@FreeBSD.org>
+ *  Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
+ *  Copyright (C) 2011-2012 Van Jacobson <van@pollere.net>
+ *  Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
+ *  Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#ifdef ALTQ_CODEL  /* CoDel is enabled by ALTQ_CODEL option in opt_altq.h */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <netinet/in.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <net/altq/if_altq.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_codel.h>
+
+static int		 codel_should_drop(struct codel *, class_queue_t *,
+			    struct mbuf *, u_int64_t);
+static void		 codel_Newton_step(struct codel_vars *);
+static u_int64_t	 codel_control_law(u_int64_t t, u_int64_t, u_int32_t);
+
+#define	codel_time_after(a, b)		((int64_t)(a) - (int64_t)(b) > 0)
+#define	codel_time_after_eq(a, b)	((int64_t)(a) - (int64_t)(b) >= 0)
+#define	codel_time_before(a, b)		((int64_t)(a) - (int64_t)(b) < 0)
+#define	codel_time_before_eq(a, b)	((int64_t)(a) - (int64_t)(b) <= 0)
+
+static int codel_request(struct ifaltq *, int, void *);
+
+static int codel_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+static struct mbuf *codel_dequeue(struct ifaltq *, int);
+
+int
+codel_pfattach(struct pf_altq *a)
+{
+	struct ifnet *ifp;
+
+	if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+		return (EINVAL);
+
+	return (altq_attach(&ifp->if_snd, ALTQT_CODEL, a->altq_disc,
+	    codel_enqueue, codel_dequeue, codel_request, NULL, NULL));
+}
+
+int
+codel_add_altq(struct pf_altq *a)
+{
+	struct codel_if	*cif;
+	struct ifnet	*ifp;
+	struct codel_opts	*opts;
+
+	if ((ifp = ifunit(a->ifname)) == NULL)
+		return (EINVAL);
+	if (!ALTQ_IS_READY(&ifp->if_snd))
+		return (ENODEV);
+
+	opts = &a->pq_u.codel_opts;
+
+	cif = malloc(sizeof(struct codel_if), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (cif == NULL)
+		return (ENOMEM);
+	cif->cif_bandwidth = a->ifbandwidth;
+	cif->cif_ifq = &ifp->if_snd;
+
+	cif->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (cif->cl_q == NULL) {
+		free(cif, M_DEVBUF);
+		return (ENOMEM);
+	}
+
+	if (a->qlimit == 0)
+		a->qlimit = 50;	/* use default. */
+	qlimit(cif->cl_q) = a->qlimit;
+	qtype(cif->cl_q) = Q_CODEL;
+	qlen(cif->cl_q) = 0;
+	qsize(cif->cl_q) = 0;
+
+	if (opts->target == 0)
+		opts->target = 5;
+	if (opts->interval == 0)
+		opts->interval = 100;
+	cif->codel.params.target = machclk_freq * opts->target / 1000;
+	cif->codel.params.interval = machclk_freq * opts->interval / 1000;
+	cif->codel.params.ecn = opts->ecn;
+	cif->codel.stats.maxpacket = 256;
+
+	cif->cl_stats.qlength = qlen(cif->cl_q);
+	cif->cl_stats.qlimit = qlimit(cif->cl_q);
+
+	/* keep the state in pf_altq */
+	a->altq_disc = cif;
+
+	return (0);
+}
+
+int
+codel_remove_altq(struct pf_altq *a)
+{
+	struct codel_if *cif;
+
+	if ((cif = a->altq_disc) == NULL)
+		return (EINVAL);
+	a->altq_disc = NULL;
+
+	if (cif->cl_q)
+		free(cif->cl_q, M_DEVBUF);
+	free(cif, M_DEVBUF);
+
+	return (0);
+}
+
+int
+codel_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+	struct codel_if *cif;
+	struct codel_ifstats stats;
+	int error = 0;
+
+	if ((cif = altq_lookup(a->ifname, ALTQT_CODEL)) == NULL)
+		return (EBADF);
+
+	if (*nbytes < sizeof(stats))
+		return (EINVAL);
+
+	stats = cif->cl_stats;
+	stats.stats = cif->codel.stats;
+
+	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+		return (error);
+	*nbytes = sizeof(stats);
+
+	return (0);
+}
+
+static int
+codel_request(struct ifaltq *ifq, int req, void *arg)
+{
+	struct codel_if	*cif = (struct codel_if *)ifq->altq_disc;
+	struct mbuf *m;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	switch (req) {
+	case ALTRQ_PURGE:
+		if (!ALTQ_IS_ENABLED(cif->cif_ifq))
+			break;
+
+		if (qempty(cif->cl_q))
+			break;
+
+		while ((m = _getq(cif->cl_q)) != NULL) {
+			PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m));
+			m_freem(m);
+			IFQ_DEC_LEN(cif->cif_ifq);
+		}
+		cif->cif_ifq->ifq_len = 0;
+		break;
+	}
+
+	return (0);
+}
+
+static int
+codel_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
+{
+
+	struct codel_if *cif = (struct codel_if *) ifq->altq_disc;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	/* grab class set by classifier */
+	if ((m->m_flags & M_PKTHDR) == 0) {
+		/* should not happen */
+		printf("altq: packet for %s does not have pkthdr\n",
+		   ifq->altq_ifp->if_xname);
+		m_freem(m);
+		PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m));
+		return (ENOBUFS);
+	}
+
+	if (codel_addq(&cif->codel, cif->cl_q, m)) {
+		PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m));
+		return (ENOBUFS);
+	}
+	IFQ_INC_LEN(ifq);
+
+	return (0);
+}
+
+static struct mbuf *
+codel_dequeue(struct ifaltq *ifq, int op)
+{
+	struct codel_if *cif = (struct codel_if *)ifq->altq_disc;
+	struct mbuf *m;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	if (IFQ_IS_EMPTY(ifq))
+		return (NULL);
+
+	if (op == ALTDQ_POLL)
+		return (qhead(cif->cl_q));
+
+
+	m = codel_getq(&cif->codel, cif->cl_q);
+	if (m != NULL) {
+		IFQ_DEC_LEN(ifq);
+		PKTCNTR_ADD(&cif->cl_stats.cl_xmitcnt, m_pktlen(m));
+		return (m);
+	}
+
+	return (NULL);
+}
+
+struct codel *
+codel_alloc(int target, int interval, int ecn)
+{
+	struct codel *c;
+
+	c = malloc(sizeof(*c), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (c != NULL) {
+		c->params.target = machclk_freq * target / 1000;
+		c->params.interval = machclk_freq * interval / 1000;
+		c->params.ecn = ecn;
+		c->stats.maxpacket = 256;
+	}
+
+	return (c);
+}
+
+void
+codel_destroy(struct codel *c)
+{
+
+	free(c, M_DEVBUF);
+}
+
+#define	MTAG_CODEL	1438031249
+int
+codel_addq(struct codel *c, class_queue_t *q, struct mbuf *m)
+{
+	struct m_tag *mtag;
+	uint64_t *enqueue_time;
+
+	if (qlen(q) < qlimit(q)) {
+		mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL);
+		if (mtag == NULL)
+			mtag = m_tag_alloc(MTAG_CODEL, 0, sizeof(uint64_t),
+			    M_NOWAIT);
+		if (mtag == NULL) {
+			m_freem(m);
+			return (-1);
+		}
+		enqueue_time = (uint64_t *)(mtag + 1);
+		*enqueue_time = read_machclk();
+		m_tag_prepend(m, mtag);
+		_addq(q, m);
+		return (0);
+	}
+	c->drop_overlimit++;
+	m_freem(m);
+
+	return (-1);
+}
+
+static int
+codel_should_drop(struct codel *c, class_queue_t *q, struct mbuf *m,
+    u_int64_t now)
+{
+	struct m_tag *mtag;
+	uint64_t *enqueue_time;
+
+	if (m == NULL) {
+		c->vars.first_above_time = 0;
+		return (0);
+	}
+
+	mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL);
+	if (mtag == NULL) {
+		/* Only one warning per second. */
+		if (ppsratecheck(&c->last_log, &c->last_pps, 1))
+			printf("%s: could not found the packet mtag!\n",
+			    __func__);
+		c->vars.first_above_time = 0;
+		return (0);
+	}
+	enqueue_time = (uint64_t *)(mtag + 1);
+	c->vars.ldelay = now - *enqueue_time;
+	c->stats.maxpacket = MAX(c->stats.maxpacket, m_pktlen(m));
+
+	if (codel_time_before(c->vars.ldelay, c->params.target) ||
+	    qsize(q) <= c->stats.maxpacket) {
+		/* went below - stay below for at least interval */
+		c->vars.first_above_time = 0;
+		return (0);
+	}
+	if (c->vars.first_above_time == 0) {
+		/* just went above from below. If we stay above
+		 * for at least interval we'll say it's ok to drop
+		 */
+		c->vars.first_above_time = now + c->params.interval;
+		return (0);
+	}
+	if (codel_time_after(now, c->vars.first_above_time))
+		return (1);
+
+	return (0);
+}
+
+/*
+ * Run a Newton method step:
+ * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
+ *
+ * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
+ */
+static void
+codel_Newton_step(struct codel_vars *vars)
+{
+	uint32_t invsqrt, invsqrt2;
+	uint64_t val;
+
+/* sizeof_in_bits(rec_inv_sqrt) */
+#define	REC_INV_SQRT_BITS (8 * sizeof(u_int16_t))
+/* needed shift to get a Q0.32 number from rec_inv_sqrt */
+#define	REC_INV_SQRT_SHIFT (32 - REC_INV_SQRT_BITS)
+
+	invsqrt = ((u_int32_t)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT;
+	invsqrt2 = ((u_int64_t)invsqrt * invsqrt) >> 32;
+	val = (3LL << 32) - ((u_int64_t)vars->count * invsqrt2);
+	val >>= 2; /* avoid overflow in following multiply */
+	val = (val * invsqrt) >> (32 - 2 + 1);
+
+	vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT;
+}
+
+static u_int64_t
+codel_control_law(u_int64_t t, u_int64_t interval, u_int32_t rec_inv_sqrt)
+{
+
+	return (t + (u_int32_t)(((u_int64_t)interval *
+	    (rec_inv_sqrt << REC_INV_SQRT_SHIFT)) >> 32));
+}
+
+struct mbuf *
+codel_getq(struct codel *c, class_queue_t *q)
+{
+	struct mbuf	*m;
+	u_int64_t	 now;
+	int		 drop;
+
+	if ((m = _getq(q)) == NULL) {
+		c->vars.dropping = 0;
+		return (m);
+	}
+
+	now = read_machclk();
+	drop = codel_should_drop(c, q, m, now);
+	if (c->vars.dropping) {
+		if (!drop) {
+			/* sojourn time below target - leave dropping state */
+			c->vars.dropping = 0;
+		} else if (codel_time_after_eq(now, c->vars.drop_next)) {
+			/* It's time for the next drop. Drop the current
+			 * packet and dequeue the next. The dequeue might
+			 * take us out of dropping state.
+			 * If not, schedule the next drop.
+			 * A large backlog might result in drop rates so high
+			 * that the next drop should happen now,
+			 * hence the while loop.
+			 */
+			while (c->vars.dropping &&
+			    codel_time_after_eq(now, c->vars.drop_next)) {
+				c->vars.count++; /* don't care of possible wrap
+						  * since there is no more
+						  * divide */
+				codel_Newton_step(&c->vars);
+				/* TODO ECN */
+				PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m));
+				m_freem(m);
+				m = _getq(q);
+				if (!codel_should_drop(c, q, m, now))
+					/* leave dropping state */
+					c->vars.dropping = 0;
+				else
+					/* and schedule the next drop */
+					c->vars.drop_next =
+					    codel_control_law(c->vars.drop_next,
+						c->params.interval,
+						c->vars.rec_inv_sqrt);
+			}
+		}
+	} else if (drop) {
+		/* TODO ECN */
+		PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m));
+		m_freem(m);
+
+		m = _getq(q);
+		drop = codel_should_drop(c, q, m, now);
+
+		c->vars.dropping = 1;
+		/* if min went above target close to when we last went below it
+		 * assume that the drop rate that controlled the queue on the
+		 * last cycle is a good starting point to control it now.
+		 */
+		if (codel_time_before(now - c->vars.drop_next,
+		    16 * c->params.interval)) {
+			c->vars.count = (c->vars.count - c->vars.lastcount) | 1;
+			/* we dont care if rec_inv_sqrt approximation
+			 * is not very precise :
+			 * Next Newton steps will correct it quadratically.
+			 */
+			codel_Newton_step(&c->vars);
+		} else {
+			c->vars.count = 1;
+			c->vars.rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT;
+		}
+		c->vars.lastcount = c->vars.count;
+		c->vars.drop_next = codel_control_law(now, c->params.interval,
+		    c->vars.rec_inv_sqrt);
+	}
+
+	return (m);
+}
+
+void
+codel_getstats(struct codel *c, struct codel_stats *s)
+{
+	*s = c->stats;
+}
+
+#endif /* ALTQ_CODEL */
diff --git a/freebsd/sys/net/altq/altq_codel.h b/freebsd/sys/net/altq/altq_codel.h
new file mode 100644
index 00000000..8d7178b4
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_codel.h
@@ -0,0 +1,129 @@
+/*
+ * CoDel - The Controlled-Delay Active Queue Management algorithm
+ *
+ *  Copyright (C) 2013 Ermal Luçi <eri@FreeBSD.org>
+ *  Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
+ *  Copyright (C) 2011-2012 Van Jacobson <van@pollere.net>
+ *  Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
+ *  Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_CODEL_H_
+#define	_ALTQ_ALTQ_CODEL_H_
+
+struct codel_stats {
+	u_int32_t	maxpacket;
+	struct pktcntr	drop_cnt;
+	u_int		marked_packets;
+};
+
+struct codel_ifstats {
+	u_int			qlength;
+	u_int			qlimit;
+	struct codel_stats	stats;
+	struct pktcntr	cl_xmitcnt;	/* transmitted packet counter */
+	struct pktcntr	cl_dropcnt;	/* dropped packet counter */
+};
+
+#ifdef _KERNEL
+#include <net/altq/altq_classq.h>
+
+/**
+ * struct codel_params - contains codel parameters
+ *  <at> target:	target queue size (in time units)
+ *  <at> interval:	width of moving time window
+ *  <at> ecn:	is Explicit Congestion Notification enabled
+ */
+struct codel_params {
+	u_int64_t	target;
+	u_int64_t	interval;
+	int		ecn;
+};
+
+/**
+ * struct codel_vars - contains codel variables
+ *  <at> count:		how many drops we've done since the last time we
+ *			entered dropping state
+ *  <at> lastcount:	count at entry to dropping state
+ *  <at> dropping:	set to true if in dropping state
+ *  <at> rec_inv_sqrt:	reciprocal value of sqrt(count) >> 1
+ *  <at> first_above_time:	when we went (or will go) continuously above
+ *				target for interval
+ *  <at> drop_next:	time to drop next packet, or when we dropped last
+ *  <at> ldelay:	sojourn time of last dequeued packet
+ */
+struct codel_vars {
+	u_int32_t	count;
+	u_int32_t	lastcount;
+	int		dropping;
+	u_int16_t	rec_inv_sqrt;
+	u_int64_t	first_above_time;
+	u_int64_t	drop_next;
+	u_int64_t	ldelay;
+};
+        
+struct codel {
+	int			last_pps;
+	struct codel_params	params;
+	struct codel_vars	vars;
+	struct codel_stats	stats;
+	struct timeval		last_log;
+	u_int32_t		drop_overlimit;
+};
+
+/*
+ * codel interface state
+ */
+struct codel_if {
+	struct codel_if		*cif_next;	/* interface state list */
+	struct ifaltq		*cif_ifq;	/* backpointer to ifaltq */
+	u_int			cif_bandwidth;	/* link bandwidth in bps */
+
+	class_queue_t	*cl_q;		/* class queue structure */
+	struct codel	codel;
+
+	/* statistics */
+	struct codel_ifstats cl_stats;
+};
+
+struct codel	*codel_alloc(int, int, int);
+void		 codel_destroy(struct codel *);
+int		 codel_addq(struct codel *, class_queue_t *, struct mbuf *);
+struct mbuf	*codel_getq(struct codel *, class_queue_t *);
+void		 codel_getstats(struct codel *, struct codel_stats *);
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_CODEL_H_ */
diff --git a/freebsd/sys/net/altq/altq_fairq.c b/freebsd/sys/net/altq/altq_fairq.c
new file mode 100644
index 00000000..efb58d3f
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_fairq.c
@@ -0,0 +1,911 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
+ * 
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * 
+ * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.1 2008/04/06 18:58:15 dillon Exp $
+ * $FreeBSD$
+ */
+/*
+ * Matt: I gutted altq_priq.c and used it as a skeleton on which to build
+ * fairq.  The fairq algorithm is completely different then priq, of course,
+ * but because I used priq's skeleton I believe I should include priq's
+ * copyright.
+ *
+ * Copyright (C) 2000-2003
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * FAIRQ - take traffic classified by keep state (hashed into
+ * mbuf->m_pkthdr.altq_state_hash) and bucketize it.  Fairly extract
+ * the first packet from each bucket in a round-robin fashion.
+ *
+ * TODO - better overall qlimit support (right now it is per-bucket).
+ *	- NOTE: red etc is per bucket, not overall.
+ *	- better service curve support.
+ *
+ * EXAMPLE:
+ *
+ *  altq on em0 fairq bandwidth 650Kb queue { std, bulk }
+ *  queue std  priority 3 bandwidth 400Kb \
+ *	fairq (buckets 64, default, hogs 1Kb) qlimit 50
+ *  queue bulk priority 2 bandwidth 100Kb \
+ *	fairq (buckets 64, hogs 1Kb) qlimit 50
+ *
+ *  pass out on em0 from any to any keep state queue std
+ *  pass out on em0 inet proto tcp ..... port ... keep state queue bulk
+ */
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#ifdef ALTQ_FAIRQ  /* fairq is enabled in the kernel conf */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <netinet/in.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_fairq.h>
+
+/*
+ * function prototypes
+ */
+static int	fairq_clear_interface(struct fairq_if *);
+static int	fairq_request(struct ifaltq *, int, void *);
+static void	fairq_purge(struct fairq_if *);
+static struct fairq_class *fairq_class_create(struct fairq_if *, int, int, u_int, struct fairq_opts *, int);
+static int	fairq_class_destroy(struct fairq_class *);
+static int	fairq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+static struct mbuf *fairq_dequeue(struct ifaltq *, int);
+
+static int	fairq_addq(struct fairq_class *, struct mbuf *, u_int32_t);
+static struct mbuf *fairq_getq(struct fairq_class *, uint64_t);
+static struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *);
+static fairq_bucket_t *fairq_selectq(struct fairq_class *, int);
+static void	fairq_purgeq(struct fairq_class *);
+
+static void	get_class_stats(struct fairq_classstats *, struct fairq_class *);
+static struct fairq_class *clh_to_clp(struct fairq_if *, uint32_t);
+
+int
+fairq_pfattach(struct pf_altq *a)
+{
+	struct ifnet *ifp;
+	int error;
+
+	if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+		return (EINVAL);
+
+	error = altq_attach(&ifp->if_snd, ALTQT_FAIRQ, a->altq_disc,
+	    fairq_enqueue, fairq_dequeue, fairq_request, NULL, NULL);
+
+	return (error);
+}
+
+int
+fairq_add_altq(struct pf_altq *a)
+{
+	struct fairq_if *pif;
+	struct ifnet *ifp;
+
+	if ((ifp = ifunit(a->ifname)) == NULL)
+		return (EINVAL);
+	if (!ALTQ_IS_READY(&ifp->if_snd))
+		return (ENODEV);
+
+
+	pif = malloc(sizeof(struct fairq_if),
+			M_DEVBUF, M_WAITOK | M_ZERO);
+	pif->pif_bandwidth = a->ifbandwidth;
+	pif->pif_maxpri = -1;
+	pif->pif_ifq = &ifp->if_snd;
+
+	/* keep the state in pf_altq */
+	a->altq_disc = pif;
+
+	return (0);
+}
+
+int
+fairq_remove_altq(struct pf_altq *a)
+{
+	struct fairq_if *pif;
+
+	if ((pif = a->altq_disc) == NULL)
+		return (EINVAL);
+	a->altq_disc = NULL;
+
+	fairq_clear_interface(pif);
+
+	free(pif, M_DEVBUF);
+	return (0);
+}
+
+int
+fairq_add_queue(struct pf_altq *a)
+{
+	struct fairq_if *pif;
+	struct fairq_class *cl;
+
+	if ((pif = a->altq_disc) == NULL)
+		return (EINVAL);
+
+	/* check parameters */
+	if (a->priority >= FAIRQ_MAXPRI)
+		return (EINVAL);
+	if (a->qid == 0)
+		return (EINVAL);
+	if (pif->pif_classes[a->priority] != NULL)
+		return (EBUSY);
+	if (clh_to_clp(pif, a->qid) != NULL)
+		return (EBUSY);
+
+	cl = fairq_class_create(pif, a->priority, a->qlimit, a->bandwidth,
+			       &a->pq_u.fairq_opts, a->qid);
+	if (cl == NULL)
+		return (ENOMEM);
+
+	return (0);
+}
+
+int
+fairq_remove_queue(struct pf_altq *a)
+{
+	struct fairq_if *pif;
+	struct fairq_class *cl;
+
+	if ((pif = a->altq_disc) == NULL)
+		return (EINVAL);
+
+	if ((cl = clh_to_clp(pif, a->qid)) == NULL)
+		return (EINVAL);
+
+	return (fairq_class_destroy(cl));
+}
+
+int
+fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+	struct fairq_if *pif;
+	struct fairq_class *cl;
+	struct fairq_classstats stats;
+	int error = 0;
+
+	if ((pif = altq_lookup(a->ifname, ALTQT_FAIRQ)) == NULL)
+		return (EBADF);
+
+	if ((cl = clh_to_clp(pif, a->qid)) == NULL)
+		return (EINVAL);
+
+	if (*nbytes < sizeof(stats))
+		return (EINVAL);
+
+	get_class_stats(&stats, cl);
+
+	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+		return (error);
+	*nbytes = sizeof(stats);
+	return (0);
+}
+
+/*
+ * bring the interface back to the initial state by discarding
+ * all the filters and classes.
+ */
+static int
+fairq_clear_interface(struct fairq_if *pif)
+{
+	struct fairq_class *cl;
+	int pri;
+
+	/* clear out the classes */
+	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
+		if ((cl = pif->pif_classes[pri]) != NULL)
+			fairq_class_destroy(cl);
+	}
+
+	return (0);
+}
+
+static int
+fairq_request(struct ifaltq *ifq, int req, void *arg)
+{
+	struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	switch (req) {
+	case ALTRQ_PURGE:
+		fairq_purge(pif);
+		break;
+	}
+	return (0);
+}
+
+/* discard all the queued packets on the interface */
+static void
+fairq_purge(struct fairq_if *pif)
+{
+	struct fairq_class *cl;
+	int pri;
+
+	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
+		if ((cl = pif->pif_classes[pri]) != NULL && cl->cl_head)
+			fairq_purgeq(cl);
+	}
+	if (ALTQ_IS_ENABLED(pif->pif_ifq))
+		pif->pif_ifq->ifq_len = 0;
+}
+
+static struct fairq_class *
+fairq_class_create(struct fairq_if *pif, int pri, int qlimit,
+		   u_int bandwidth, struct fairq_opts *opts, int qid)
+{
+	struct fairq_class *cl;
+	int flags = opts->flags;
+	u_int nbuckets = opts->nbuckets;
+	int i;
+
+#ifndef ALTQ_RED
+	if (flags & FARF_RED) {
+#ifdef ALTQ_DEBUG
+		printf("fairq_class_create: RED not configured for FAIRQ!\n");
+#endif
+		return (NULL);
+	}
+#endif
+#ifndef ALTQ_CODEL
+	if (flags & FARF_CODEL) {
+#ifdef ALTQ_DEBUG
+		printf("fairq_class_create: CODEL not configured for FAIRQ!\n");
+#endif
+		return (NULL);
+	}
+#endif
+	if (nbuckets == 0)
+		nbuckets = 256;
+	if (nbuckets > FAIRQ_MAX_BUCKETS)
+		nbuckets = FAIRQ_MAX_BUCKETS;
+	/* enforce power-of-2 size */
+	while ((nbuckets ^ (nbuckets - 1)) != ((nbuckets << 1) - 1))
+		++nbuckets;
+
+	if ((cl = pif->pif_classes[pri]) != NULL) {
+		/* modify the class instead of creating a new one */
+		IFQ_LOCK(cl->cl_pif->pif_ifq);
+		if (cl->cl_head)
+			fairq_purgeq(cl);
+		IFQ_UNLOCK(cl->cl_pif->pif_ifq);
+#ifdef ALTQ_RIO
+		if (cl->cl_qtype == Q_RIO)
+			rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+		if (cl->cl_qtype == Q_RED)
+			red_destroy(cl->cl_red);
+#endif
+#ifdef ALTQ_CODEL
+		if (cl->cl_qtype == Q_CODEL)
+			codel_destroy(cl->cl_codel);
+#endif
+	} else {
+		cl = malloc(sizeof(struct fairq_class),
+				M_DEVBUF, M_WAITOK | M_ZERO);
+		cl->cl_nbuckets = nbuckets;
+		cl->cl_nbucket_mask = nbuckets - 1;
+
+		cl->cl_buckets = malloc(
+			sizeof(struct fairq_bucket) * cl->cl_nbuckets,
+			M_DEVBUF, M_WAITOK | M_ZERO);
+		cl->cl_head = NULL;
+	}
+
+	pif->pif_classes[pri] = cl;
+	if (flags & FARF_DEFAULTCLASS)
+		pif->pif_default = cl;
+	if (qlimit == 0)
+		qlimit = 50;  /* use default */
+	cl->cl_qlimit = qlimit;
+	for (i = 0; i < cl->cl_nbuckets; ++i) {
+		qlimit(&cl->cl_buckets[i].queue) = qlimit;
+	}
+	cl->cl_bandwidth = bandwidth / 8;
+	cl->cl_qtype = Q_DROPTAIL;
+	cl->cl_flags = flags & FARF_USERFLAGS;
+	cl->cl_pri = pri;
+	if (pri > pif->pif_maxpri)
+		pif->pif_maxpri = pri;
+	cl->cl_pif = pif;
+	cl->cl_handle = qid;
+	cl->cl_hogs_m1 = opts->hogs_m1 / 8;
+	cl->cl_lssc_m1 = opts->lssc_m1 / 8;	/* NOT YET USED */
+
+#ifdef ALTQ_RED
+	if (flags & (FARF_RED|FARF_RIO)) {
+		int red_flags, red_pkttime;
+
+		red_flags = 0;
+		if (flags & FARF_ECN)
+			red_flags |= REDF_ECN;
+#ifdef ALTQ_RIO
+		if (flags & FARF_CLEARDSCP)
+			red_flags |= RIOF_CLEARDSCP;
+#endif
+		if (pif->pif_bandwidth < 8)
+			red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
+		else
+			red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu
+			  * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8);
+#ifdef ALTQ_RIO
+		if (flags & FARF_RIO) {
+			cl->cl_red = (red_t *)rio_alloc(0, NULL,
+						red_flags, red_pkttime);
+			if (cl->cl_red != NULL)
+				cl->cl_qtype = Q_RIO;
+		} else
+#endif
+		if (flags & FARF_RED) {
+			cl->cl_red = red_alloc(0, 0,
+			    cl->cl_qlimit * 10/100,
+			    cl->cl_qlimit * 30/100,
+			    red_flags, red_pkttime);
+			if (cl->cl_red != NULL)
+				cl->cl_qtype = Q_RED;
+		}
+	}
+#endif /* ALTQ_RED */
+#ifdef ALTQ_CODEL
+	if (flags & FARF_CODEL) {
+		cl->cl_codel = codel_alloc(5, 100, 0);
+		if (cl->cl_codel != NULL)
+			cl->cl_qtype = Q_CODEL;
+	}
+#endif
+
+	return (cl);
+}
+
+static int
+fairq_class_destroy(struct fairq_class *cl)
+{
+	struct fairq_if *pif;
+	int pri;
+
+	IFQ_LOCK(cl->cl_pif->pif_ifq);
+
+	if (cl->cl_head)
+		fairq_purgeq(cl);
+
+	pif = cl->cl_pif;
+	pif->pif_classes[cl->cl_pri] = NULL;
+	if (pif->pif_poll_cache == cl)
+		pif->pif_poll_cache = NULL;
+	if (pif->pif_maxpri == cl->cl_pri) {
+		for (pri = cl->cl_pri; pri >= 0; pri--)
+			if (pif->pif_classes[pri] != NULL) {
+				pif->pif_maxpri = pri;
+				break;
+			}
+		if (pri < 0)
+			pif->pif_maxpri = -1;
+	}
+	IFQ_UNLOCK(cl->cl_pif->pif_ifq);
+
+	if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+		if (cl->cl_qtype == Q_RIO)
+			rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+		if (cl->cl_qtype == Q_RED)
+			red_destroy(cl->cl_red);
+#endif
+#ifdef ALTQ_CODEL
+		if (cl->cl_qtype == Q_CODEL)
+			codel_destroy(cl->cl_codel);
+#endif
+	}
+	free(cl->cl_buckets, M_DEVBUF);
+	free(cl, M_DEVBUF);
+
+	return (0);
+}
+
+/*
+ * fairq_enqueue is an enqueue function to be registered to
+ * (*altq_enqueue) in struct ifaltq.
+ */
+static int
+fairq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
+{
+	struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
+	struct fairq_class *cl = NULL; /* Make compiler happy */
+	struct pf_mtag *t;
+	u_int32_t qid_hash = 0;
+	int len;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	/* grab class set by classifier */
+	if ((m->m_flags & M_PKTHDR) == 0) {
+		/* should not happen */
+		printf("altq: packet for %s does not have pkthdr\n",
+			ifq->altq_ifp->if_xname);
+		m_freem(m);
+		return (ENOBUFS);
+	}
+
+	if ((t = pf_find_mtag(m)) != NULL) {
+		cl = clh_to_clp(pif, t->qid);
+		qid_hash = t->qid_hash;
+	}
+	if (cl == NULL) {
+		cl = pif->pif_default;
+		if (cl == NULL) {
+			m_freem(m);
+			return (ENOBUFS);
+		}
+	}
+	cl->cl_flags |= FARF_HAS_PACKETS;
+	cl->cl_pktattr = NULL;
+	len = m_pktlen(m);
+	if (fairq_addq(cl, m, qid_hash) != 0) {
+		/* drop occurred.  mbuf was freed in fairq_addq. */
+		PKTCNTR_ADD(&cl->cl_dropcnt, len);
+		return (ENOBUFS);
+	}
+	IFQ_INC_LEN(ifq);
+
+	return (0);
+}
+
+/*
+ * fairq_dequeue is a dequeue function to be registered to
+ * (*altq_dequeue) in struct ifaltq.
+ *
+ * note: ALTDQ_POLL returns the next packet without removing the packet
+ *	from the queue.  ALTDQ_REMOVE is a normal dequeue operation.
+ *	ALTDQ_REMOVE must return the same packet if called immediately
+ *	after ALTDQ_POLL.
+ */
+static struct mbuf *
+fairq_dequeue(struct ifaltq *ifq, int op)
+{
+	struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
+	struct fairq_class *cl;
+	struct fairq_class *best_cl;
+	struct mbuf *best_m;
+	struct mbuf *m = NULL;
+	uint64_t cur_time = read_machclk();
+	int pri;
+	int hit_limit;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	if (IFQ_IS_EMPTY(ifq)) {
+		return (NULL);
+	}
+
+	if (pif->pif_poll_cache && op == ALTDQ_REMOVE) {
+		best_cl = pif->pif_poll_cache;
+		m = fairq_getq(best_cl, cur_time);
+		pif->pif_poll_cache = NULL;
+		if (m) {
+			IFQ_DEC_LEN(ifq);
+			PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
+			return (m);
+		}
+	} else {
+		best_cl = NULL;
+		best_m = NULL;
+
+		for (pri = pif->pif_maxpri;  pri >= 0; pri--) {
+			if ((cl = pif->pif_classes[pri]) == NULL)
+				continue;
+			if ((cl->cl_flags & FARF_HAS_PACKETS) == 0)
+				continue;
+			m = fairq_pollq(cl, cur_time, &hit_limit);
+			if (m == NULL) {
+				cl->cl_flags &= ~FARF_HAS_PACKETS;
+				continue;
+			}
+
+			/*
+			 * Only override the best choice if we are under
+			 * the BW limit.
+			 */
+			if (hit_limit == 0 || best_cl == NULL) {
+				best_cl = cl;
+				best_m = m;
+			}
+
+			/*
+			 * Remember the highest priority mbuf in case we
+			 * do not find any lower priority mbufs.
+			 */
+			if (hit_limit)
+				continue;
+			break;
+		}
+		if (op == ALTDQ_POLL) {
+			pif->pif_poll_cache = best_cl;
+			m = best_m;
+		} else if (best_cl) {
+			m = fairq_getq(best_cl, cur_time);
+			if (m != NULL) {
+				IFQ_DEC_LEN(ifq);
+				PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
+			}
+		} 
+		return (m);
+	}
+	return (NULL);
+}
+
+static int
+fairq_addq(struct fairq_class *cl, struct mbuf *m, u_int32_t bucketid)
+{
+	fairq_bucket_t *b;
+	u_int hindex;
+	uint64_t bw;
+
+	/*
+	 * If the packet doesn't have any keep state put it on the end of
+	 * our queue.  XXX this can result in out of order delivery.
+	 */
+	if (bucketid == 0) {
+		if (cl->cl_head)
+			b = cl->cl_head->prev;
+		else
+			b = &cl->cl_buckets[0];
+	} else {
+		hindex = bucketid & cl->cl_nbucket_mask;
+		b = &cl->cl_buckets[hindex];
+	}
+
+	/*
+	 * Add the bucket to the end of the circular list of active buckets.
+	 *
+	 * As a special case we add the bucket to the beginning of the list
+	 * instead of the end if it was not previously on the list and if
+	 * its traffic is less then the hog level.
+	 */
+	if (b->in_use == 0) {
+		b->in_use = 1;
+		if (cl->cl_head == NULL) {
+			cl->cl_head = b;
+			b->next = b;
+			b->prev = b;
+		} else {
+			b->next = cl->cl_head;
+			b->prev = cl->cl_head->prev;
+			b->prev->next = b;
+			b->next->prev = b;
+
+			if (b->bw_delta && cl->cl_hogs_m1) {
+				bw = b->bw_bytes * machclk_freq / b->bw_delta;
+				if (bw < cl->cl_hogs_m1)
+					cl->cl_head = b;
+			}
+		}
+	}
+
+#ifdef ALTQ_RIO
+	if (cl->cl_qtype == Q_RIO)
+		return rio_addq((rio_t *)cl->cl_red, &b->queue, m, cl->cl_pktattr);
+#endif
+#ifdef ALTQ_RED
+	if (cl->cl_qtype == Q_RED)
+		return red_addq(cl->cl_red, &b->queue, m, cl->cl_pktattr);
+#endif
+#ifdef ALTQ_CODEL
+	if (cl->cl_qtype == Q_CODEL)
+		return codel_addq(cl->cl_codel, &b->queue, m);
+#endif
+	if (qlen(&b->queue) >= qlimit(&b->queue)) {
+		m_freem(m);
+		return (-1);
+	}
+
+	if (cl->cl_flags & FARF_CLEARDSCP)
+		write_dsfield(m, cl->cl_pktattr, 0);
+
+	_addq(&b->queue, m);
+
+	return (0);
+}
+
+static struct mbuf *
+fairq_getq(struct fairq_class *cl, uint64_t cur_time)
+{
+	fairq_bucket_t *b;
+	struct mbuf *m;
+
+	b = fairq_selectq(cl, 0);
+	if (b == NULL)
+		m = NULL;
+#ifdef ALTQ_RIO
+	else if (cl->cl_qtype == Q_RIO)
+		m = rio_getq((rio_t *)cl->cl_red, &b->queue);
+#endif
+#ifdef ALTQ_RED
+	else if (cl->cl_qtype == Q_RED)
+		m = red_getq(cl->cl_red, &b->queue);
+#endif
+#ifdef ALTQ_CODEL
+	else if (cl->cl_qtype == Q_CODEL)
+		m = codel_getq(cl->cl_codel, &b->queue);
+#endif
+	else
+		m = _getq(&b->queue);
+
+	/*
+	 * Calculate the BW change
+	 */
+	if (m != NULL) {
+		uint64_t delta;
+
+		/*
+		 * Per-class bandwidth calculation
+		 */
+		delta = (cur_time - cl->cl_last_time);
+		if (delta > machclk_freq * 8)
+			delta = machclk_freq * 8;
+		cl->cl_bw_delta += delta;
+		cl->cl_bw_bytes += m->m_pkthdr.len;
+		cl->cl_last_time = cur_time;
+		cl->cl_bw_delta -= cl->cl_bw_delta >> 3;
+		cl->cl_bw_bytes -= cl->cl_bw_bytes >> 3;
+
+		/*
+		 * Per-bucket bandwidth calculation
+		 */
+		delta = (cur_time - b->last_time);
+		if (delta > machclk_freq * 8)
+			delta = machclk_freq * 8;
+		b->bw_delta += delta;
+		b->bw_bytes += m->m_pkthdr.len;
+		b->last_time = cur_time;
+		b->bw_delta -= b->bw_delta >> 3;
+		b->bw_bytes -= b->bw_bytes >> 3;
+	}
+	return(m);
+}
+
+/*
+ * Figure out what the next packet would be if there were no limits.  If
+ * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise
+ * it is set to 0.  A non-NULL mbuf is returned either way.
+ */
+static struct mbuf *
+fairq_pollq(struct fairq_class *cl, uint64_t cur_time, int *hit_limit)
+{
+	fairq_bucket_t *b;
+	struct mbuf *m;
+	uint64_t delta;
+	uint64_t bw;
+
+	*hit_limit = 0;
+	b = fairq_selectq(cl, 1);
+	if (b == NULL)
+		return(NULL);
+	m = qhead(&b->queue);
+
+	/*
+	 * Did this packet exceed the class bandwidth?  Calculate the
+	 * bandwidth component of the packet.
+	 *
+	 * - Calculate bytes per second
+	 */
+	delta = cur_time - cl->cl_last_time;
+	if (delta > machclk_freq * 8)
+		delta = machclk_freq * 8;
+	cl->cl_bw_delta += delta;
+	cl->cl_last_time = cur_time;
+	if (cl->cl_bw_delta) {
+		bw = cl->cl_bw_bytes * machclk_freq / cl->cl_bw_delta;
+
+		if (bw > cl->cl_bandwidth)
+			*hit_limit = 1;
+#ifdef ALTQ_DEBUG
+		printf("BW %6ju relative to %6u %d queue %p\n",
+			(uintmax_t)bw, cl->cl_bandwidth, *hit_limit, b);
+#endif
+	}
+	return(m);
+}
+
+/*
+ * Locate the next queue we want to pull a packet out of.  This code
+ * is also responsible for removing empty buckets from the circular list.
+ */
+static
+fairq_bucket_t *
+fairq_selectq(struct fairq_class *cl, int ispoll)
+{
+	fairq_bucket_t *b;
+	uint64_t bw;
+
+	if (ispoll == 0 && cl->cl_polled) {
+		b = cl->cl_polled;
+		cl->cl_polled = NULL;
+		return(b);
+	}
+
+	while ((b = cl->cl_head) != NULL) {
+		/*
+		 * Remove empty queues from consideration
+		 */
+		if (qempty(&b->queue)) {
+			b->in_use = 0;
+			cl->cl_head = b->next;
+			if (cl->cl_head == b) {
+				cl->cl_head = NULL;
+			} else {
+				b->next->prev = b->prev;
+				b->prev->next = b->next;
+			}
+			continue;
+		}
+
+		/*
+		 * Advance the round robin.  Queues with bandwidths less
+		 * then the hog bandwidth are allowed to burst.
+		 */
+		if (cl->cl_hogs_m1 == 0) {
+			cl->cl_head = b->next;
+		} else if (b->bw_delta) {
+			bw = b->bw_bytes * machclk_freq / b->bw_delta;
+			if (bw >= cl->cl_hogs_m1) {
+				cl->cl_head = b->next;
+			}
+			/*
+			 * XXX TODO - 
+			 */
+		}
+
+		/*
+		 * Return bucket b.
+		 */
+		break;
+	}
+	if (ispoll)
+		cl->cl_polled = b;
+	return(b);
+}
+
+static void
+fairq_purgeq(struct fairq_class *cl)
+{
+	fairq_bucket_t *b;
+	struct mbuf *m;
+
+	while ((b = fairq_selectq(cl, 0)) != NULL) {
+		while ((m = _getq(&b->queue)) != NULL) {
+			PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m));
+			m_freem(m);
+		}
+		ASSERT(qlen(&b->queue) == 0);
+	}
+}
+
+static void
+get_class_stats(struct fairq_classstats *sp, struct fairq_class *cl)
+{
+	fairq_bucket_t *b;
+
+	sp->class_handle = cl->cl_handle;
+	sp->qlimit = cl->cl_qlimit;
+	sp->xmit_cnt = cl->cl_xmitcnt;
+	sp->drop_cnt = cl->cl_dropcnt;
+	sp->qtype = cl->cl_qtype;
+	sp->qlength = 0;
+
+	if (cl->cl_head) {
+		b = cl->cl_head;
+		do {
+			sp->qlength += qlen(&b->queue);
+			b = b->next;
+		} while (b != cl->cl_head);
+	}
+
+#ifdef ALTQ_RED
+	if (cl->cl_qtype == Q_RED)
+		red_getstats(cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+	if (cl->cl_qtype == Q_RIO)
+		rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_CODEL
+	if (cl->cl_qtype == Q_CODEL)
+		codel_getstats(cl->cl_codel, &sp->codel);
+#endif
+}
+
+/* convert a class handle to the corresponding class pointer */
+static struct fairq_class *
+clh_to_clp(struct fairq_if *pif, uint32_t chandle)
+{
+	struct fairq_class *cl;
+	int idx;
+
+	if (chandle == 0)
+		return (NULL);
+
+	for (idx = pif->pif_maxpri; idx >= 0; idx--)
+		if ((cl = pif->pif_classes[idx]) != NULL &&
+		    cl->cl_handle == chandle)
+			return (cl);
+
+	return (NULL);
+}
+
+#endif /* ALTQ_FAIRQ */
diff --git a/freebsd/sys/net/altq/altq_fairq.h b/freebsd/sys/net/altq/altq_fairq.h
new file mode 100644
index 00000000..1a4b97dd
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_fairq.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
+ * 
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * 
+ * $DragonFly: src/sys/net/altq/altq_fairq.h,v 1.1 2008/04/06 18:58:15 dillon Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_FAIRQ_H_
+#define	_ALTQ_ALTQ_FAIRQ_H_
+
+#include <net/altq/altq.h>
+#include <net/altq/altq_classq.h>
+#include <net/altq/altq_codel.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
+#include <net/altq/altq_rmclass.h>
+
+#define	FAIRQ_MAX_BUCKETS	2048	/* maximum number of sorting buckets */
+#define	FAIRQ_MAXPRI		RM_MAXPRIO
+#define FAIRQ_BITMAP_WIDTH	(sizeof(fairq_bitmap_t)*8)
+#define FAIRQ_BITMAP_MASK	(FAIRQ_BITMAP_WIDTH - 1)
+
+/* fairq class flags */
+#define	FARF_RED		0x0001	/* use RED */
+#define	FARF_ECN		0x0002  /* use RED/ECN */
+#define	FARF_RIO		0x0004  /* use RIO */
+#define	FARF_CODEL		0x0008	/* use CoDel */
+#define	FARF_CLEARDSCP		0x0010  /* clear diffserv codepoint */
+#define	FARF_DEFAULTCLASS	0x1000	/* default class */
+
+#define FARF_HAS_PACKETS	0x2000	/* might have queued packets */
+
+#define FARF_USERFLAGS		(FARF_RED|FARF_ECN|FARF_RIO|FARF_CLEARDSCP| \
+				 FARF_DEFAULTCLASS)
+
+/* special class handles */
+#define	FAIRQ_NULLCLASS_HANDLE	0
+
+typedef u_int	fairq_bitmap_t;
+
+struct fairq_classstats {
+	uint32_t		class_handle;
+
+	u_int			qlength;
+	u_int			qlimit;
+	struct pktcntr		xmit_cnt;  /* transmitted packet counter */
+	struct pktcntr		drop_cnt;  /* dropped packet counter */
+
+	/* codel, red and rio related info */
+	int			qtype;
+	struct redstats		red[3];	/* rio has 3 red stats */
+	struct codel_stats	codel;
+};
+
+#ifdef _KERNEL
+
+typedef struct fairq_bucket {
+	struct fairq_bucket *next;	/* circular list */
+	struct fairq_bucket *prev;	/* circular list */
+	class_queue_t	queue;		/* the actual queue */
+	uint64_t	bw_bytes;	/* statistics used to calculate bw */
+	uint64_t	bw_delta;	/* statistics used to calculate bw */
+	uint64_t	last_time;
+	int		in_use;
+} fairq_bucket_t;
+
+struct fairq_class {
+	uint32_t	cl_handle;	/* class handle */
+	u_int		cl_nbuckets;	/* (power of 2) */
+	u_int		cl_nbucket_mask; /* bucket mask */
+	fairq_bucket_t	*cl_buckets;
+	fairq_bucket_t	*cl_head;	/* head of circular bucket list */
+	fairq_bucket_t	*cl_polled;
+	union {
+		struct red	*cl_red;	/* RED state */
+		struct codel	*cl_codel;	/* CoDel state */
+	} cl_aqm;
+#define	cl_red		cl_aqm.cl_red
+#define	cl_codel	cl_aqm.cl_codel
+	u_int		cl_hogs_m1;
+	u_int		cl_lssc_m1;
+	u_int		cl_bandwidth;
+	uint64_t	cl_bw_bytes;
+	uint64_t	cl_bw_delta;
+	uint64_t	cl_last_time;
+	int		cl_qtype;	/* rollup */
+	int		cl_qlimit;
+	int		cl_pri;		/* priority */
+	int		cl_flags;	/* class flags */
+	struct fairq_if	*cl_pif;	/* back pointer to pif */
+	struct altq_pktattr *cl_pktattr; /* saved header used by ECN */
+
+	/* round robin index */
+
+	/* statistics */
+	struct pktcntr  cl_xmitcnt;	/* transmitted packet counter */
+	struct pktcntr  cl_dropcnt;	/* dropped packet counter */
+};
+
+/*
+ * fairq interface state
+ */
+struct fairq_if {
+	struct fairq_if		*pif_next;	/* interface state list */
+	struct ifaltq		*pif_ifq;	/* backpointer to ifaltq */
+	u_int			pif_bandwidth;	/* link bandwidth in bps */
+	int			pif_maxpri;	/* max priority in use */
+	struct fairq_class	*pif_poll_cache;/* cached poll */
+	struct fairq_class	*pif_default;	/* default class */
+	struct fairq_class	*pif_classes[FAIRQ_MAXPRI]; /* classes */
+};
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_FAIRQ_H_ */
diff --git a/freebsd/sys/net/altq/altq_hfsc.c b/freebsd/sys/net/altq/altq_hfsc.c
new file mode 100644
index 00000000..f7a18296
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_hfsc.c
@@ -0,0 +1,2240 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation is hereby granted (including for commercial or
+ * for-profit use), provided that both the copyright notice and this
+ * permission notice appear in all copies of the software, derivative
+ * works, or modified versions, and any portions thereof.
+ *
+ * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
+ * WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON PROVIDES THIS
+ * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Carnegie Mellon encourages (but does not require) users of this
+ * software to return any improvements or extensions that they make,
+ * and to grant Carnegie Mellon the rights to redistribute these
+ * changes without encumbrance.
+ *
+ * $KAME: altq_hfsc.c,v 1.24 2003/12/05 05:40:46 kjc Exp $
+ * $FreeBSD$
+ */
+/*
+ * H-FSC is described in Proceedings of SIGCOMM'97,
+ * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
+ * Real-Time and Priority Service"
+ * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
+ *
+ * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing.
+ * when a class has an upperlimit, the fit-time is computed from the
+ * upperlimit service curve.  the link-sharing scheduler does not schedule
+ * a class whose fit-time exceeds the current time.
+ */
+
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#ifdef ALTQ_HFSC  /* hfsc is enabled by ALTQ_HFSC option in opt_altq.h */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/queue.h>
+#if 1 /* ALTQ3_COMPAT */
+#include <sys/sockio.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#endif /* ALTQ3_COMPAT */
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <netinet/in.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_hfsc.h>
+#ifdef ALTQ3_COMPAT
+#include <net/altq/altq_conf.h>
+#endif
+
+/*
+ * function prototypes
+ */
+static int			 hfsc_clear_interface(struct hfsc_if *);
+static int			 hfsc_request(struct ifaltq *, int, void *);
+static void			 hfsc_purge(struct hfsc_if *);
+static struct hfsc_class	*hfsc_class_create(struct hfsc_if *,
+    struct service_curve *, struct service_curve *, struct service_curve *,
+    struct hfsc_class *, int, int, int);
+static int			 hfsc_class_destroy(struct hfsc_class *);
+static struct hfsc_class	*hfsc_nextclass(struct hfsc_class *);
+static int			 hfsc_enqueue(struct ifaltq *, struct mbuf *,
+				    struct altq_pktattr *);
+static struct mbuf		*hfsc_dequeue(struct ifaltq *, int);
+
+static int		 hfsc_addq(struct hfsc_class *, struct mbuf *);
+static struct mbuf	*hfsc_getq(struct hfsc_class *);
+static struct mbuf	*hfsc_pollq(struct hfsc_class *);
+static void		 hfsc_purgeq(struct hfsc_class *);
+
+static void		 update_cfmin(struct hfsc_class *);
+static void		 set_active(struct hfsc_class *, int);
+static void		 set_passive(struct hfsc_class *);
+
+static void		 init_ed(struct hfsc_class *, int);
+static void		 update_ed(struct hfsc_class *, int);
+static void		 update_d(struct hfsc_class *, int);
+static void		 init_vf(struct hfsc_class *, int);
+static void		 update_vf(struct hfsc_class *, int, u_int64_t);
+static void		 ellist_insert(struct hfsc_class *);
+static void		 ellist_remove(struct hfsc_class *);
+static void		 ellist_update(struct hfsc_class *);
+struct hfsc_class	*hfsc_get_mindl(struct hfsc_if *, u_int64_t);
+static void		 actlist_insert(struct hfsc_class *);
+static void		 actlist_remove(struct hfsc_class *);
+static void		 actlist_update(struct hfsc_class *);
+
+static struct hfsc_class	*actlist_firstfit(struct hfsc_class *,
+				    u_int64_t);
+
+static __inline u_int64_t	seg_x2y(u_int64_t, u_int64_t);
+static __inline u_int64_t	seg_y2x(u_int64_t, u_int64_t);
+static __inline u_int64_t	m2sm(u_int);
+static __inline u_int64_t	m2ism(u_int);
+static __inline u_int64_t	d2dx(u_int);
+static u_int			sm2m(u_int64_t);
+static u_int			dx2d(u_int64_t);
+
+static void		sc2isc(struct service_curve *, struct internal_sc *);
+static void		rtsc_init(struct runtime_sc *, struct internal_sc *,
+			    u_int64_t, u_int64_t);
+static u_int64_t	rtsc_y2x(struct runtime_sc *, u_int64_t);
+static u_int64_t	rtsc_x2y(struct runtime_sc *, u_int64_t);
+static void		rtsc_min(struct runtime_sc *, struct internal_sc *,
+			    u_int64_t, u_int64_t);
+
+static void			 get_class_stats(struct hfsc_classstats *,
+				    struct hfsc_class *);
+static struct hfsc_class	*clh_to_clp(struct hfsc_if *, u_int32_t);
+
+
+#ifdef ALTQ3_COMPAT
+static struct hfsc_if *hfsc_attach(struct ifaltq *, u_int);
+static int hfsc_detach(struct hfsc_if *);
+static int hfsc_class_modify(struct hfsc_class *, struct service_curve *,
+    struct service_curve *, struct service_curve *);
+
+static int hfsccmd_if_attach(struct hfsc_attach *);
+static int hfsccmd_if_detach(struct hfsc_interface *);
+static int hfsccmd_add_class(struct hfsc_add_class *);
+static int hfsccmd_delete_class(struct hfsc_delete_class *);
+static int hfsccmd_modify_class(struct hfsc_modify_class *);
+static int hfsccmd_add_filter(struct hfsc_add_filter *);
+static int hfsccmd_delete_filter(struct hfsc_delete_filter *);
+static int hfsccmd_class_stats(struct hfsc_class_stats *);
+
+altqdev_decl(hfsc);
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * macros
+ */
+#define	is_a_parent_class(cl)	((cl)->cl_children != NULL)
+
+#define	HT_INFINITY	0xffffffffffffffffLL	/* infinite time value */
+
+#ifdef ALTQ3_COMPAT
+/* hif_list keeps all hfsc_if's allocated. */
+static struct hfsc_if *hif_list = NULL;
+#endif /* ALTQ3_COMPAT */
+
+int
+hfsc_pfattach(struct pf_altq *a)
+{
+	struct ifnet *ifp;
+	int s, error;
+
+	if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+		return (EINVAL);
+	s = splnet();
+	error = altq_attach(&ifp->if_snd, ALTQT_HFSC, a->altq_disc,
+	    hfsc_enqueue, hfsc_dequeue, hfsc_request, NULL, NULL);
+	splx(s);
+	return (error);
+}
+
+int
+hfsc_add_altq(struct pf_altq *a)
+{
+	struct hfsc_if *hif;
+	struct ifnet *ifp;
+
+	if ((ifp = ifunit(a->ifname)) == NULL)
+		return (EINVAL);
+	if (!ALTQ_IS_READY(&ifp->if_snd))
+		return (ENODEV);
+
+	hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (hif == NULL)
+		return (ENOMEM);
+
+	TAILQ_INIT(&hif->hif_eligible);
+	hif->hif_ifq = &ifp->if_snd;
+
+	/* keep the state in pf_altq */
+	a->altq_disc = hif;
+
+	return (0);
+}
+
+int
+hfsc_remove_altq(struct pf_altq *a)
+{
+	struct hfsc_if *hif;
+
+	if ((hif = a->altq_disc) == NULL)
+		return (EINVAL);
+	a->altq_disc = NULL;
+
+	(void)hfsc_clear_interface(hif);
+	(void)hfsc_class_destroy(hif->hif_rootclass);
+
+	free(hif, M_DEVBUF);
+
+	return (0);
+}
+
+int
+hfsc_add_queue(struct pf_altq *a)
+{
+	struct hfsc_if *hif;
+	struct hfsc_class *cl, *parent;
+	struct hfsc_opts *opts;
+	struct service_curve rtsc, lssc, ulsc;
+
+	if ((hif = a->altq_disc) == NULL)
+		return (EINVAL);
+
+	opts = &a->pq_u.hfsc_opts;
+
+	if (a->parent_qid == HFSC_NULLCLASS_HANDLE &&
+	    hif->hif_rootclass == NULL)
+		parent = NULL;
+	else if ((parent = clh_to_clp(hif, a->parent_qid)) == NULL)
+		return (EINVAL);
+
+	if (a->qid == 0)
+		return (EINVAL);
+
+	if (clh_to_clp(hif, a->qid) != NULL)
+		return (EBUSY);
+
+	rtsc.m1 = opts->rtsc_m1;
+	rtsc.d  = opts->rtsc_d;
+	rtsc.m2 = opts->rtsc_m2;
+	lssc.m1 = opts->lssc_m1;
+	lssc.d  = opts->lssc_d;
+	lssc.m2 = opts->lssc_m2;
+	ulsc.m1 = opts->ulsc_m1;
+	ulsc.d  = opts->ulsc_d;
+	ulsc.m2 = opts->ulsc_m2;
+
+	cl = hfsc_class_create(hif, &rtsc, &lssc, &ulsc,
+	    parent, a->qlimit, opts->flags, a->qid);
+	if (cl == NULL)
+		return (ENOMEM);
+
+	return (0);
+}
+
+int
+hfsc_remove_queue(struct pf_altq *a)
+{
+	struct hfsc_if *hif;
+	struct hfsc_class *cl;
+
+	if ((hif = a->altq_disc) == NULL)
+		return (EINVAL);
+
+	if ((cl = clh_to_clp(hif, a->qid)) == NULL)
+		return (EINVAL);
+
+	return (hfsc_class_destroy(cl));
+}
+
+int
+hfsc_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+	struct hfsc_if *hif;
+	struct hfsc_class *cl;
+	struct hfsc_classstats stats;
+	int error = 0;
+
+	if ((hif = altq_lookup(a->ifname, ALTQT_HFSC)) == NULL)
+		return (EBADF);
+
+	if ((cl = clh_to_clp(hif, a->qid)) == NULL)
+		return (EINVAL);
+
+	if (*nbytes < sizeof(stats))
+		return (EINVAL);
+
+	get_class_stats(&stats, cl);
+
+	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+		return (error);
+	*nbytes = sizeof(stats);
+	return (0);
+}
+
+/*
+ * bring the interface back to the initial state by discarding
+ * all the filters and classes except the root class.
+ */
+static int
+hfsc_clear_interface(struct hfsc_if *hif)
+{
+	struct hfsc_class	*cl;
+
+#ifdef ALTQ3_COMPAT
+	/* free the filters for this interface */
+	acc_discard_filters(&hif->hif_classifier, NULL, 1);
+#endif
+
+	/* clear out the classes */
+	while (hif->hif_rootclass != NULL &&
+	    (cl = hif->hif_rootclass->cl_children) != NULL) {
+		/*
+		 * remove the first leaf class found in the hierarchy
+		 * then start over
+		 */
+		for (; cl != NULL; cl = hfsc_nextclass(cl)) {
+			if (!is_a_parent_class(cl)) {
+				(void)hfsc_class_destroy(cl);
+				break;
+			}
+		}
+	}
+
+	return (0);
+}
+
+static int
+hfsc_request(struct ifaltq *ifq, int req, void *arg)
+{
+	struct hfsc_if	*hif = (struct hfsc_if *)ifq->altq_disc;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	switch (req) {
+	case ALTRQ_PURGE:
+		hfsc_purge(hif);
+		break;
+	}
+	return (0);
+}
+
+/* discard all the queued packets on the interface */
+static void
+hfsc_purge(struct hfsc_if *hif)
+{
+	struct hfsc_class *cl;
+
+	for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
+		if (!qempty(cl->cl_q))
+			hfsc_purgeq(cl);
+	if (ALTQ_IS_ENABLED(hif->hif_ifq))
+		hif->hif_ifq->ifq_len = 0;
+}
+
+struct hfsc_class *
+hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc,
+    struct service_curve *fsc, struct service_curve *usc,
+    struct hfsc_class *parent, int qlimit, int flags, int qid)
+{
+	struct hfsc_class *cl, *p;
+	int i, s;
+
+	if (hif->hif_classes >= HFSC_MAX_CLASSES)
+		return (NULL);
+
+#ifndef ALTQ_RED
+	if (flags & HFCF_RED) {
+#ifdef ALTQ_DEBUG
+		printf("hfsc_class_create: RED not configured for HFSC!\n");
+#endif
+		return (NULL);
+	}
+#endif
+#ifndef ALTQ_CODEL
+	if (flags & HFCF_CODEL) {
+#ifdef ALTQ_DEBUG
+		printf("hfsc_class_create: CODEL not configured for HFSC!\n");
+#endif
+		return (NULL);
+	}
+#endif
+
+	cl = malloc(sizeof(struct hfsc_class), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (cl == NULL)
+		return (NULL);
+
+	cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (cl->cl_q == NULL)
+		goto err_ret;
+
+	TAILQ_INIT(&cl->cl_actc);
+
+	if (qlimit == 0)
+		qlimit = 50;  /* use default */
+	qlimit(cl->cl_q) = qlimit;
+	qtype(cl->cl_q) = Q_DROPTAIL;
+	qlen(cl->cl_q) = 0;
+	qsize(cl->cl_q) = 0;
+	cl->cl_flags = flags;
+#ifdef ALTQ_RED
+	if (flags & (HFCF_RED|HFCF_RIO)) {
+		int red_flags, red_pkttime;
+		u_int m2;
+
+		m2 = 0;
+		if (rsc != NULL && rsc->m2 > m2)
+			m2 = rsc->m2;
+		if (fsc != NULL && fsc->m2 > m2)
+			m2 = fsc->m2;
+		if (usc != NULL && usc->m2 > m2)
+			m2 = usc->m2;
+
+		red_flags = 0;
+		if (flags & HFCF_ECN)
+			red_flags |= REDF_ECN;
+#ifdef ALTQ_RIO
+		if (flags & HFCF_CLEARDSCP)
+			red_flags |= RIOF_CLEARDSCP;
+#endif
+		if (m2 < 8)
+			red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
+		else
+			red_pkttime = (int64_t)hif->hif_ifq->altq_ifp->if_mtu
+				* 1000 * 1000 * 1000 / (m2 / 8);
+		if (flags & HFCF_RED) {
+			cl->cl_red = red_alloc(0, 0,
+			    qlimit(cl->cl_q) * 10/100,
+			    qlimit(cl->cl_q) * 30/100,
+			    red_flags, red_pkttime);
+			if (cl->cl_red != NULL)
+				qtype(cl->cl_q) = Q_RED;
+		}
+#ifdef ALTQ_RIO
+		else {
+			cl->cl_red = (red_t *)rio_alloc(0, NULL,
+			    red_flags, red_pkttime);
+			if (cl->cl_red != NULL)
+				qtype(cl->cl_q) = Q_RIO;
+		}
+#endif
+	}
+#endif /* ALTQ_RED */
+#ifdef ALTQ_CODEL
+	if (flags & HFCF_CODEL) {
+		cl->cl_codel = codel_alloc(5, 100, 0);
+		if (cl->cl_codel != NULL)
+			qtype(cl->cl_q) = Q_CODEL;
+	}
+#endif
+
+	if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0)) {
+		cl->cl_rsc = malloc(sizeof(struct internal_sc),
+		    M_DEVBUF, M_NOWAIT);
+		if (cl->cl_rsc == NULL)
+			goto err_ret;
+		sc2isc(rsc, cl->cl_rsc);
+		rtsc_init(&cl->cl_deadline, cl->cl_rsc, 0, 0);
+		rtsc_init(&cl->cl_eligible, cl->cl_rsc, 0, 0);
+	}
+	if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0)) {
+		cl->cl_fsc = malloc(sizeof(struct internal_sc),
+		    M_DEVBUF, M_NOWAIT);
+		if (cl->cl_fsc == NULL)
+			goto err_ret;
+		sc2isc(fsc, cl->cl_fsc);
+		rtsc_init(&cl->cl_virtual, cl->cl_fsc, 0, 0);
+	}
+	if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0)) {
+		cl->cl_usc = malloc(sizeof(struct internal_sc),
+		    M_DEVBUF, M_NOWAIT);
+		if (cl->cl_usc == NULL)
+			goto err_ret;
+		sc2isc(usc, cl->cl_usc);
+		rtsc_init(&cl->cl_ulimit, cl->cl_usc, 0, 0);
+	}
+
+	cl->cl_id = hif->hif_classid++;
+	cl->cl_handle = qid;
+	cl->cl_hif = hif;
+	cl->cl_parent = parent;
+
+	s = splnet();
+	IFQ_LOCK(hif->hif_ifq);
+	hif->hif_classes++;
+
+	/*
+	 * find a free slot in the class table.  if the slot matching
+	 * the lower bits of qid is free, use this slot.  otherwise,
+	 * use the first free slot.
+	 */
+	i = qid % HFSC_MAX_CLASSES;
+	if (hif->hif_class_tbl[i] == NULL)
+		hif->hif_class_tbl[i] = cl;
+	else {
+		for (i = 0; i < HFSC_MAX_CLASSES; i++)
+			if (hif->hif_class_tbl[i] == NULL) {
+				hif->hif_class_tbl[i] = cl;
+				break;
+			}
+		if (i == HFSC_MAX_CLASSES) {
+			IFQ_UNLOCK(hif->hif_ifq);
+			splx(s);
+			goto err_ret;
+		}
+	}
+
+	if (flags & HFCF_DEFAULTCLASS)
+		hif->hif_defaultclass = cl;
+
+	if (parent == NULL) {
+		/* this is root class */
+		hif->hif_rootclass = cl;
+	} else {
+		/* add this class to the children list of the parent */
+		if ((p = parent->cl_children) == NULL)
+			parent->cl_children = cl;
+		else {
+			while (p->cl_siblings != NULL)
+				p = p->cl_siblings;
+			p->cl_siblings = cl;
+		}
+	}
+	IFQ_UNLOCK(hif->hif_ifq);
+	splx(s);
+
+	return (cl);
+
+ err_ret:
+	if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+		if (q_is_rio(cl->cl_q))
+			rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+		if (q_is_red(cl->cl_q))
+			red_destroy(cl->cl_red);
+#endif
+#ifdef ALTQ_CODEL
+		if (q_is_codel(cl->cl_q))
+			codel_destroy(cl->cl_codel);
+#endif
+	}
+	if (cl->cl_fsc != NULL)
+		free(cl->cl_fsc, M_DEVBUF);
+	if (cl->cl_rsc != NULL)
+		free(cl->cl_rsc, M_DEVBUF);
+	if (cl->cl_usc != NULL)
+		free(cl->cl_usc, M_DEVBUF);
+	if (cl->cl_q != NULL)
+		free(cl->cl_q, M_DEVBUF);
+	free(cl, M_DEVBUF);
+	return (NULL);
+}
+
+static int
+hfsc_class_destroy(struct hfsc_class *cl)
+{
+	int i, s;
+
+	if (cl == NULL)
+		return (0);
+
+	if (is_a_parent_class(cl))
+		return (EBUSY);
+
+	s = splnet();
+	IFQ_LOCK(cl->cl_hif->hif_ifq);
+
+#ifdef ALTQ3_COMPAT
+	/* delete filters referencing to this class */
+	acc_discard_filters(&cl->cl_hif->hif_classifier, cl, 0);
+#endif /* ALTQ3_COMPAT */
+
+	if (!qempty(cl->cl_q))
+		hfsc_purgeq(cl);
+
+	if (cl->cl_parent == NULL) {
+		/* this is root class */
+	} else {
+		struct hfsc_class *p = cl->cl_parent->cl_children;
+
+		if (p == cl)
+			cl->cl_parent->cl_children = cl->cl_siblings;
+		else do {
+			if (p->cl_siblings == cl) {
+				p->cl_siblings = cl->cl_siblings;
+				break;
+			}
+		} while ((p = p->cl_siblings) != NULL);
+		ASSERT(p != NULL);
+	}
+
+	for (i = 0; i < HFSC_MAX_CLASSES; i++)
+		if (cl->cl_hif->hif_class_tbl[i] == cl) {
+			cl->cl_hif->hif_class_tbl[i] = NULL;
+			break;
+		}
+
+	cl->cl_hif->hif_classes--;
+	IFQ_UNLOCK(cl->cl_hif->hif_ifq);
+	splx(s);
+
+	if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+		if (q_is_rio(cl->cl_q))
+			rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+		if (q_is_red(cl->cl_q))
+			red_destroy(cl->cl_red);
+#endif
+#ifdef ALTQ_CODEL
+		if (q_is_codel(cl->cl_q))
+			codel_destroy(cl->cl_codel);
+#endif
+	}
+
+	IFQ_LOCK(cl->cl_hif->hif_ifq);
+	if (cl == cl->cl_hif->hif_rootclass)
+		cl->cl_hif->hif_rootclass = NULL;
+	if (cl == cl->cl_hif->hif_defaultclass)
+		cl->cl_hif->hif_defaultclass = NULL;
+	IFQ_UNLOCK(cl->cl_hif->hif_ifq);
+
+	if (cl->cl_usc != NULL)
+		free(cl->cl_usc, M_DEVBUF);
+	if (cl->cl_fsc != NULL)
+		free(cl->cl_fsc, M_DEVBUF);
+	if (cl->cl_rsc != NULL)
+		free(cl->cl_rsc, M_DEVBUF);
+	free(cl->cl_q, M_DEVBUF);
+	free(cl, M_DEVBUF);
+
+	return (0);
+}
+
+/*
+ * hfsc_nextclass returns the next class in the tree.
+ *   usage:
+ *	for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
+ *		do_something;
+ */
+static struct hfsc_class *
+hfsc_nextclass(struct hfsc_class *cl)
+{
+	if (cl->cl_children != NULL)
+		cl = cl->cl_children;
+	else if (cl->cl_siblings != NULL)
+		cl = cl->cl_siblings;
+	else {
+		while ((cl = cl->cl_parent) != NULL)
+			if (cl->cl_siblings) {
+				cl = cl->cl_siblings;
+				break;
+			}
+	}
+
+	return (cl);
+}
+
+/*
+ * hfsc_enqueue is an enqueue function to be registered to
+ * (*altq_enqueue) in struct ifaltq.
+ */
+static int
+hfsc_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
+{
+	struct hfsc_if	*hif = (struct hfsc_if *)ifq->altq_disc;
+	struct hfsc_class *cl;
+	struct pf_mtag *t;
+	int len;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	/* grab class set by classifier */
+	if ((m->m_flags & M_PKTHDR) == 0) {
+		/* should not happen */
+		printf("altq: packet for %s does not have pkthdr\n",
+		    ifq->altq_ifp->if_xname);
+		m_freem(m);
+		return (ENOBUFS);
+	}
+	cl = NULL;
+	if ((t = pf_find_mtag(m)) != NULL)
+		cl = clh_to_clp(hif, t->qid);
+#ifdef ALTQ3_COMPAT
+	else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL)
+		cl = pktattr->pattr_class;
+#endif
+	if (cl == NULL || is_a_parent_class(cl)) {
+		cl = hif->hif_defaultclass;
+		if (cl == NULL) {
+			m_freem(m);
+			return (ENOBUFS);
+		}
+	}
+#ifdef ALTQ3_COMPAT
+	if (pktattr != NULL)
+		cl->cl_pktattr = pktattr;  /* save proto hdr used by ECN */
+	else
+#endif
+		cl->cl_pktattr = NULL;
+	len = m_pktlen(m);
+	if (hfsc_addq(cl, m) != 0) {
+		/* drop occurred.  mbuf was freed in hfsc_addq. */
+		PKTCNTR_ADD(&cl->cl_stats.drop_cnt, len);
+		return (ENOBUFS);
+	}
+	IFQ_INC_LEN(ifq);
+	cl->cl_hif->hif_packets++;
+
+	/* successfully queued. */
+	if (qlen(cl->cl_q) == 1)
+		set_active(cl, m_pktlen(m));
+
+	return (0);
+}
+
+/*
+ * hfsc_dequeue is a dequeue function to be registered to
+ * (*altq_dequeue) in struct ifaltq.
+ *
+ * note: ALTDQ_POLL returns the next packet without removing the packet
+ *	from the queue.  ALTDQ_REMOVE is a normal dequeue operation.
+ *	ALTDQ_REMOVE must return the same packet if called immediately
+ *	after ALTDQ_POLL.
+ */
+static struct mbuf *
+hfsc_dequeue(struct ifaltq *ifq, int op)
+{
+	struct hfsc_if	*hif = (struct hfsc_if *)ifq->altq_disc;
+	struct hfsc_class *cl;
+	struct mbuf *m;
+	int len, next_len;
+	int realtime = 0;
+	u_int64_t cur_time;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	if (hif->hif_packets == 0)
+		/* no packet in the tree */
+		return (NULL);
+
+	cur_time = read_machclk();
+
+	if (op == ALTDQ_REMOVE && hif->hif_pollcache != NULL) {
+
+		cl = hif->hif_pollcache;
+		hif->hif_pollcache = NULL;
+		/* check if the class was scheduled by real-time criteria */
+		if (cl->cl_rsc != NULL)
+			realtime = (cl->cl_e <= cur_time);
+	} else {
+		/*
+		 * if there are eligible classes, use real-time criteria.
+		 * find the class with the minimum deadline among
+		 * the eligible classes.
+		 */
+		if ((cl = hfsc_get_mindl(hif, cur_time))
+		    != NULL) {
+			realtime = 1;
+		} else {
+#ifdef ALTQ_DEBUG
+			int fits = 0;
+#endif
+			/*
+			 * use link-sharing criteria
+			 * get the class with the minimum vt in the hierarchy
+			 */
+			cl = hif->hif_rootclass;
+			while (is_a_parent_class(cl)) {
+
+				cl = actlist_firstfit(cl, cur_time);
+				if (cl == NULL) {
+#ifdef ALTQ_DEBUG
+					if (fits > 0)
+						printf("%d fit but none found\n",fits);
+#endif
+					return (NULL);
+				}
+				/*
+				 * update parent's cl_cvtmin.
+				 * don't update if the new vt is smaller.
+				 */
+				if (cl->cl_parent->cl_cvtmin < cl->cl_vt)
+					cl->cl_parent->cl_cvtmin = cl->cl_vt;
+#ifdef ALTQ_DEBUG
+				fits++;
+#endif
+			}
+		}
+
+		if (op == ALTDQ_POLL) {
+			hif->hif_pollcache = cl;
+			m = hfsc_pollq(cl);
+			return (m);
+		}
+	}
+
+	m = hfsc_getq(cl);
+	if (m == NULL)
+		panic("hfsc_dequeue:");
+	len = m_pktlen(m);
+	cl->cl_hif->hif_packets--;
+	IFQ_DEC_LEN(ifq);
+	PKTCNTR_ADD(&cl->cl_stats.xmit_cnt, len);
+
+	update_vf(cl, len, cur_time);
+	if (realtime)
+		cl->cl_cumul += len;
+
+	if (!qempty(cl->cl_q)) {
+		if (cl->cl_rsc != NULL) {
+			/* update ed */
+			next_len = m_pktlen(qhead(cl->cl_q));
+
+			if (realtime)
+				update_ed(cl, next_len);
+			else
+				update_d(cl, next_len);
+		}
+	} else {
+		/* the class becomes passive */
+		set_passive(cl);
+	}
+
+	return (m);
+}
+
+static int
+hfsc_addq(struct hfsc_class *cl, struct mbuf *m)
+{
+
+#ifdef ALTQ_RIO
+	if (q_is_rio(cl->cl_q))
+		return rio_addq((rio_t *)cl->cl_red, cl->cl_q,
+				m, cl->cl_pktattr);
+#endif
+#ifdef ALTQ_RED
+	if (q_is_red(cl->cl_q))
+		return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr);
+#endif
+#ifdef ALTQ_CODEL
+	if (q_is_codel(cl->cl_q))
+		return codel_addq(cl->cl_codel, cl->cl_q, m);
+#endif
+	if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) {
+		m_freem(m);
+		return (-1);
+	}
+
+	if (cl->cl_flags & HFCF_CLEARDSCP)
+		write_dsfield(m, cl->cl_pktattr, 0);
+
+	_addq(cl->cl_q, m);
+
+	return (0);
+}
+
+static struct mbuf *
+hfsc_getq(struct hfsc_class *cl)
+{
+#ifdef ALTQ_RIO
+	if (q_is_rio(cl->cl_q))
+		return rio_getq((rio_t *)cl->cl_red, cl->cl_q);
+#endif
+#ifdef ALTQ_RED
+	if (q_is_red(cl->cl_q))
+		return red_getq(cl->cl_red, cl->cl_q);
+#endif
+#ifdef ALTQ_CODEL
+	if (q_is_codel(cl->cl_q))
+		return codel_getq(cl->cl_codel, cl->cl_q);
+#endif
+	return _getq(cl->cl_q);
+}
+
+static struct mbuf *
+hfsc_pollq(struct hfsc_class *cl)
+{
+	return qhead(cl->cl_q);
+}
+
+static void
+hfsc_purgeq(struct hfsc_class *cl)
+{
+	struct mbuf *m;
+
+	if (qempty(cl->cl_q))
+		return;
+
+	while ((m = _getq(cl->cl_q)) != NULL) {
+		PKTCNTR_ADD(&cl->cl_stats.drop_cnt, m_pktlen(m));
+		m_freem(m);
+		cl->cl_hif->hif_packets--;
+		IFQ_DEC_LEN(cl->cl_hif->hif_ifq);
+	}
+	ASSERT(qlen(cl->cl_q) == 0);
+
+	update_vf(cl, 0, 0);	/* remove cl from the actlist */
+	set_passive(cl);
+}
+
+static void
+set_active(struct hfsc_class *cl, int len)
+{
+	if (cl->cl_rsc != NULL)
+		init_ed(cl, len);
+	if (cl->cl_fsc != NULL)
+		init_vf(cl, len);
+
+	cl->cl_stats.period++;
+}
+
+static void
+set_passive(struct hfsc_class *cl)
+{
+	if (cl->cl_rsc != NULL)
+		ellist_remove(cl);
+
+	/*
+	 * actlist is now handled in update_vf() so that update_vf(cl, 0, 0)
+	 * needs to be called explicitly to remove a class from actlist
+	 */
+}
+
+static void
+init_ed(struct hfsc_class *cl, int next_len)
+{
+	u_int64_t cur_time;
+
+	cur_time = read_machclk();
+
+	/* update the deadline curve */
+	rtsc_min(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul);
+
+	/*
+	 * update the eligible curve.
+	 * for concave, it is equal to the deadline curve.
+	 * for convex, it is a linear curve with slope m2.
+	 */
+	cl->cl_eligible = cl->cl_deadline;
+	if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) {
+		cl->cl_eligible.dx = 0;
+		cl->cl_eligible.dy = 0;
+	}
+
+	/* compute e and d */
+	cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
+	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+
+	ellist_insert(cl);
+}
+
+static void
+update_ed(struct hfsc_class *cl, int next_len)
+{
+	cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
+	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+
+	ellist_update(cl);
+}
+
+static void
+update_d(struct hfsc_class *cl, int next_len)
+{
+	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+}
+
+static void
+init_vf(struct hfsc_class *cl, int len)
+{
+	struct hfsc_class *max_cl, *p;
+	u_int64_t vt, f, cur_time;
+	int go_active;
+
+	cur_time = 0;
+	go_active = 1;
+	for ( ; cl->cl_parent != NULL; cl = cl->cl_parent) {
+
+		if (go_active && cl->cl_nactive++ == 0)
+			go_active = 1;
+		else
+			go_active = 0;
+
+		if (go_active) {
+			max_cl = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead);
+			if (max_cl != NULL) {
+				/*
+				 * set vt to the average of the min and max
+				 * classes.  if the parent's period didn't
+				 * change, don't decrease vt of the class.
+				 */
+				vt = max_cl->cl_vt;
+				if (cl->cl_parent->cl_cvtmin != 0)
+					vt = (cl->cl_parent->cl_cvtmin + vt)/2;
+
+				if (cl->cl_parent->cl_vtperiod !=
+				    cl->cl_parentperiod || vt > cl->cl_vt)
+					cl->cl_vt = vt;
+			} else {
+				/*
+				 * first child for a new parent backlog period.
+				 * add parent's cvtmax to vtoff of children
+				 * to make a new vt (vtoff + vt) larger than
+				 * the vt in the last period for all children.
+				 */
+				vt = cl->cl_parent->cl_cvtmax;
+				for (p = cl->cl_parent->cl_children; p != NULL;
+				     p = p->cl_siblings)
+					p->cl_vtoff += vt;
+				cl->cl_vt = 0;
+				cl->cl_parent->cl_cvtmax = 0;
+				cl->cl_parent->cl_cvtmin = 0;
+			}
+			cl->cl_initvt = cl->cl_vt;
+
+			/* update the virtual curve */
+			vt = cl->cl_vt + cl->cl_vtoff;
+			rtsc_min(&cl->cl_virtual, cl->cl_fsc, vt, cl->cl_total);
+			if (cl->cl_virtual.x == vt) {
+				cl->cl_virtual.x -= cl->cl_vtoff;
+				cl->cl_vtoff = 0;
+			}
+			cl->cl_vtadj = 0;
+
+			cl->cl_vtperiod++;  /* increment vt period */
+			cl->cl_parentperiod = cl->cl_parent->cl_vtperiod;
+			if (cl->cl_parent->cl_nactive == 0)
+				cl->cl_parentperiod++;
+			cl->cl_f = 0;
+
+			actlist_insert(cl);
+
+			if (cl->cl_usc != NULL) {
+				/* class has upper limit curve */
+				if (cur_time == 0)
+					cur_time = read_machclk();
+
+				/* update the ulimit curve */
+				rtsc_min(&cl->cl_ulimit, cl->cl_usc, cur_time,
+				    cl->cl_total);
+				/* compute myf */
+				cl->cl_myf = rtsc_y2x(&cl->cl_ulimit,
+				    cl->cl_total);
+				cl->cl_myfadj = 0;
+			}
+		}
+
+		if (cl->cl_myf > cl->cl_cfmin)
+			f = cl->cl_myf;
+		else
+			f = cl->cl_cfmin;
+		if (f != cl->cl_f) {
+			cl->cl_f = f;
+			update_cfmin(cl->cl_parent);
+		}
+	}
+}
+
+static void
+update_vf(struct hfsc_class *cl, int len, u_int64_t cur_time)
+{
+	u_int64_t f, myf_bound, delta;
+	int go_passive;
+
+	go_passive = qempty(cl->cl_q);
+
+	for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
+
+		cl->cl_total += len;
+
+		if (cl->cl_fsc == NULL || cl->cl_nactive == 0)
+			continue;
+
+		if (go_passive && --cl->cl_nactive == 0)
+			go_passive = 1;
+		else
+			go_passive = 0;
+
+		if (go_passive) {
+			/* no more active child, going passive */
+
+			/* update cvtmax of the parent class */
+			if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
+				cl->cl_parent->cl_cvtmax = cl->cl_vt;
+
+			/* remove this class from the vt list */
+			actlist_remove(cl);
+
+			update_cfmin(cl->cl_parent);
+
+			continue;
+		}
+
+		/*
+		 * update vt and f
+		 */
+		cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
+		    - cl->cl_vtoff + cl->cl_vtadj;
+
+		/*
+		 * if vt of the class is smaller than cvtmin,
+		 * the class was skipped in the past due to non-fit.
+		 * if so, we need to adjust vtadj.
+		 */
+		if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
+			cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
+			cl->cl_vt = cl->cl_parent->cl_cvtmin;
+		}
+
+		/* update the vt list */
+		actlist_update(cl);
+
+		if (cl->cl_usc != NULL) {
+			cl->cl_myf = cl->cl_myfadj
+			    + rtsc_y2x(&cl->cl_ulimit, cl->cl_total);
+
+			/*
+			 * if myf lags behind by more than one clock tick
+			 * from the current time, adjust myfadj to prevent
+			 * a rate-limited class from going greedy.
+			 * in a steady state under rate-limiting, myf
+			 * fluctuates within one clock tick.
+			 */
+			myf_bound = cur_time - machclk_per_tick;
+			if (cl->cl_myf < myf_bound) {
+				delta = cur_time - cl->cl_myf;
+				cl->cl_myfadj += delta;
+				cl->cl_myf += delta;
+			}
+		}
+
+		/* cl_f is max(cl_myf, cl_cfmin) */
+		if (cl->cl_myf > cl->cl_cfmin)
+			f = cl->cl_myf;
+		else
+			f = cl->cl_cfmin;
+		if (f != cl->cl_f) {
+			cl->cl_f = f;
+			update_cfmin(cl->cl_parent);
+		}
+	}
+}
+
+static void
+update_cfmin(struct hfsc_class *cl)
+{
+	struct hfsc_class *p;
+	u_int64_t cfmin;
+
+	if (TAILQ_EMPTY(&cl->cl_actc)) {
+		cl->cl_cfmin = 0;
+		return;
+	}
+	cfmin = HT_INFINITY;
+	TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) {
+		if (p->cl_f == 0) {
+			cl->cl_cfmin = 0;
+			return;
+		}
+		if (p->cl_f < cfmin)
+			cfmin = p->cl_f;
+	}
+	cl->cl_cfmin = cfmin;
+}
+
+/*
+ * TAILQ based ellist and actlist implementation
+ * (ion wanted to make a calendar queue based implementation)
+ */
+/*
+ * eligible list holds backlogged classes being sorted by their eligible times.
+ * there is one eligible list per interface.
+ */
+
+static void
+ellist_insert(struct hfsc_class *cl)
+{
+	struct hfsc_if	*hif = cl->cl_hif;
+	struct hfsc_class *p;
+
+	/* check the last entry first */
+	if ((p = TAILQ_LAST(&hif->hif_eligible, elighead)) == NULL ||
+	    p->cl_e <= cl->cl_e) {
+		TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist);
+		return;
+	}
+
+	TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) {
+		if (cl->cl_e < p->cl_e) {
+			TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
+			return;
+		}
+	}
+	ASSERT(0); /* should not reach here */
+}
+
+static void
+ellist_remove(struct hfsc_class *cl)
+{
+	struct hfsc_if	*hif = cl->cl_hif;
+
+	TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
+}
+
+static void
+ellist_update(struct hfsc_class *cl)
+{
+	struct hfsc_if	*hif = cl->cl_hif;
+	struct hfsc_class *p, *last;
+
+	/*
+	 * the eligible time of a class increases monotonically.
+	 * if the next entry has a larger eligible time, nothing to do.
+	 */
+	p = TAILQ_NEXT(cl, cl_ellist);
+	if (p == NULL || cl->cl_e <= p->cl_e)
+		return;
+
+	/* check the last entry */
+	last = TAILQ_LAST(&hif->hif_eligible, elighead);
+	ASSERT(last != NULL);
+	if (last->cl_e <= cl->cl_e) {
+		TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
+		TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist);
+		return;
+	}
+
+	/*
+	 * the new position must be between the next entry
+	 * and the last entry
+	 */
+	while ((p = TAILQ_NEXT(p, cl_ellist)) != NULL) {
+		if (cl->cl_e < p->cl_e) {
+			TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
+			TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
+			return;
+		}
+	}
+	ASSERT(0); /* should not reach here */
+}
+
+/* find the class with the minimum deadline among the eligible classes */
+struct hfsc_class *
+hfsc_get_mindl(struct hfsc_if *hif, u_int64_t cur_time)
+{
+	struct hfsc_class *p, *cl = NULL;
+
+	TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) {
+		if (p->cl_e > cur_time)
+			break;
+		if (cl == NULL || p->cl_d < cl->cl_d)
+			cl = p;
+	}
+	return (cl);
+}
+
+/*
+ * active children list holds backlogged child classes being sorted
+ * by their virtual time.
+ * each intermediate class has one active children list.
+ */
+
+static void
+actlist_insert(struct hfsc_class *cl)
+{
+	struct hfsc_class *p;
+
+	/* check the last entry first */
+	if ((p = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead)) == NULL
+	    || p->cl_vt <= cl->cl_vt) {
+		TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist);
+		return;
+	}
+
+	TAILQ_FOREACH(p, &cl->cl_parent->cl_actc, cl_actlist) {
+		if (cl->cl_vt < p->cl_vt) {
+			TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
+			return;
+		}
+	}
+	ASSERT(0); /* should not reach here */
+}
+
+static void
+actlist_remove(struct hfsc_class *cl)
+{
+	TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
+}
+
+static void
+actlist_update(struct hfsc_class *cl)
+{
+	struct hfsc_class *p, *last;
+
+	/*
+	 * the virtual time of a class increases monotonically during its
+	 * backlogged period.
+	 * if the next entry has a larger virtual time, nothing to do.
+	 */
+	p = TAILQ_NEXT(cl, cl_actlist);
+	if (p == NULL || cl->cl_vt < p->cl_vt)
+		return;
+
+	/* check the last entry */
+	last = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead);
+	ASSERT(last != NULL);
+	if (last->cl_vt <= cl->cl_vt) {
+		TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
+		TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist);
+		return;
+	}
+
+	/*
+	 * the new position must be between the next entry
+	 * and the last entry
+	 */
+	while ((p = TAILQ_NEXT(p, cl_actlist)) != NULL) {
+		if (cl->cl_vt < p->cl_vt) {
+			TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
+			TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
+			return;
+		}
+	}
+	ASSERT(0); /* should not reach here */
+}
+
+static struct hfsc_class *
+actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time)
+{
+	struct hfsc_class *p;
+
+	TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) {
+		if (p->cl_f <= cur_time)
+			return (p);
+	}
+	return (NULL);
+}
+
+/*
+ * service curve support functions
+ *
+ *  external service curve parameters
+ *	m: bits/sec
+ *	d: msec
+ *  internal service curve parameters
+ *	sm: (bytes/tsc_interval) << SM_SHIFT
+ *	ism: (tsc_count/byte) << ISM_SHIFT
+ *	dx: tsc_count
+ *
+ * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits.
+ * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU
+ * speed.  SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective
+ * digits in decimal using the following table.
+ *
+ *  bits/sec    100Kbps     1Mbps     10Mbps     100Mbps    1Gbps
+ *  ----------+-------------------------------------------------------
+ *  bytes/nsec  12.5e-6    125e-6     1250e-6    12500e-6   125000e-6
+ *  sm(500MHz)  25.0e-6    250e-6     2500e-6    25000e-6   250000e-6
+ *  sm(200MHz)  62.5e-6    625e-6     6250e-6    62500e-6   625000e-6
+ *
+ *  nsec/byte   80000      8000       800        80         8
+ *  ism(500MHz) 40000      4000       400        40         4
+ *  ism(200MHz) 16000      1600       160        16         1.6
+ */
+#define	SM_SHIFT	24
+#define	ISM_SHIFT	10
+
+#define	SM_MASK		((1LL << SM_SHIFT) - 1)
+#define	ISM_MASK	((1LL << ISM_SHIFT) - 1)
+
+static __inline u_int64_t
+seg_x2y(u_int64_t x, u_int64_t sm)
+{
+	u_int64_t y;
+
+	/*
+	 * compute
+	 *	y = x * sm >> SM_SHIFT
+	 * but divide it for the upper and lower bits to avoid overflow
+	 */
+	y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT);
+	return (y);
+}
+
+static __inline u_int64_t
+seg_y2x(u_int64_t y, u_int64_t ism)
+{
+	u_int64_t x;
+
+	if (y == 0)
+		x = 0;
+	else if (ism == HT_INFINITY)
+		x = HT_INFINITY;
+	else {
+		x = (y >> ISM_SHIFT) * ism
+		    + (((y & ISM_MASK) * ism) >> ISM_SHIFT);
+	}
+	return (x);
+}
+
+static __inline u_int64_t
+m2sm(u_int m)
+{
+	u_int64_t sm;
+
+	sm = ((u_int64_t)m << SM_SHIFT) / 8 / machclk_freq;
+	return (sm);
+}
+
+static __inline u_int64_t
+m2ism(u_int m)
+{
+	u_int64_t ism;
+
+	if (m == 0)
+		ism = HT_INFINITY;
+	else
+		ism = ((u_int64_t)machclk_freq << ISM_SHIFT) * 8 / m;
+	return (ism);
+}
+
+static __inline u_int64_t
+d2dx(u_int d)
+{
+	u_int64_t dx;
+
+	dx = ((u_int64_t)d * machclk_freq) / 1000;
+	return (dx);
+}
+
+static u_int
+sm2m(u_int64_t sm)
+{
+	u_int64_t m;
+
+	m = (sm * 8 * machclk_freq) >> SM_SHIFT;
+	return ((u_int)m);
+}
+
+static u_int
+dx2d(u_int64_t dx)
+{
+	u_int64_t d;
+
+	d = dx * 1000 / machclk_freq;
+	return ((u_int)d);
+}
+
+static void
+sc2isc(struct service_curve *sc, struct internal_sc *isc)
+{
+	isc->sm1 = m2sm(sc->m1);
+	isc->ism1 = m2ism(sc->m1);
+	isc->dx = d2dx(sc->d);
+	isc->dy = seg_x2y(isc->dx, isc->sm1);
+	isc->sm2 = m2sm(sc->m2);
+	isc->ism2 = m2ism(sc->m2);
+}
+
+/*
+ * initialize the runtime service curve with the given internal
+ * service curve starting at (x, y).
+ */
+static void
+rtsc_init(struct runtime_sc *rtsc, struct internal_sc * isc, u_int64_t x,
+    u_int64_t y)
+{
+	rtsc->x =	x;
+	rtsc->y =	y;
+	rtsc->sm1 =	isc->sm1;
+	rtsc->ism1 =	isc->ism1;
+	rtsc->dx =	isc->dx;
+	rtsc->dy =	isc->dy;
+	rtsc->sm2 =	isc->sm2;
+	rtsc->ism2 =	isc->ism2;
+}
+
+/*
+ * calculate the y-projection of the runtime service curve by the
+ * given x-projection value
+ */
+static u_int64_t
+rtsc_y2x(struct runtime_sc *rtsc, u_int64_t y)
+{
+	u_int64_t	x;
+
+	if (y < rtsc->y)
+		x = rtsc->x;
+	else if (y <= rtsc->y + rtsc->dy) {
+		/* x belongs to the 1st segment */
+		if (rtsc->dy == 0)
+			x = rtsc->x + rtsc->dx;
+		else
+			x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1);
+	} else {
+		/* x belongs to the 2nd segment */
+		x = rtsc->x + rtsc->dx
+		    + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2);
+	}
+	return (x);
+}
+
+static u_int64_t
+rtsc_x2y(struct runtime_sc *rtsc, u_int64_t x)
+{
+	u_int64_t	y;
+
+	if (x <= rtsc->x)
+		y = rtsc->y;
+	else if (x <= rtsc->x + rtsc->dx)
+		/* y belongs to the 1st segment */
+		y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1);
+	else
+		/* y belongs to the 2nd segment */
+		y = rtsc->y + rtsc->dy
+		    + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2);
+	return (y);
+}
+
+/*
+ * update the runtime service curve by taking the minimum of the current
+ * runtime service curve and the service curve starting at (x, y).
+ */
+static void
+rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u_int64_t x,
+    u_int64_t y)
+{
+	u_int64_t	y1, y2, dx, dy;
+
+	if (isc->sm1 <= isc->sm2) {
+		/* service curve is convex */
+		y1 = rtsc_x2y(rtsc, x);
+		if (y1 < y)
+			/* the current rtsc is smaller */
+			return;
+		rtsc->x = x;
+		rtsc->y = y;
+		return;
+	}
+
+	/*
+	 * service curve is concave
+	 * compute the two y values of the current rtsc
+	 *	y1: at x
+	 *	y2: at (x + dx)
+	 */
+	y1 = rtsc_x2y(rtsc, x);
+	if (y1 <= y) {
+		/* rtsc is below isc, no change to rtsc */
+		return;
+	}
+
+	y2 = rtsc_x2y(rtsc, x + isc->dx);
+	if (y2 >= y + isc->dy) {
+		/* rtsc is above isc, replace rtsc by isc */
+		rtsc->x = x;
+		rtsc->y = y;
+		rtsc->dx = isc->dx;
+		rtsc->dy = isc->dy;
+		return;
+	}
+
+	/*
+	 * the two curves intersect
+	 * compute the offsets (dx, dy) using the reverse
+	 * function of seg_x2y()
+	 *	seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
+	 */
+	dx = ((y1 - y) << SM_SHIFT) / (isc->sm1 - isc->sm2);
+	/*
+	 * check if (x, y1) belongs to the 1st segment of rtsc.
+	 * if so, add the offset.
+	 */
+	if (rtsc->x + rtsc->dx > x)
+		dx += rtsc->x + rtsc->dx - x;
+	dy = seg_x2y(dx, isc->sm1);
+
+	rtsc->x = x;
+	rtsc->y = y;
+	rtsc->dx = dx;
+	rtsc->dy = dy;
+	return;
+}
+
+static void
+get_class_stats(struct hfsc_classstats *sp, struct hfsc_class *cl)
+{
+	sp->class_id = cl->cl_id;
+	sp->class_handle = cl->cl_handle;
+
+	if (cl->cl_rsc != NULL) {
+		sp->rsc.m1 = sm2m(cl->cl_rsc->sm1);
+		sp->rsc.d = dx2d(cl->cl_rsc->dx);
+		sp->rsc.m2 = sm2m(cl->cl_rsc->sm2);
+	} else {
+		sp->rsc.m1 = 0;
+		sp->rsc.d = 0;
+		sp->rsc.m2 = 0;
+	}
+	if (cl->cl_fsc != NULL) {
+		sp->fsc.m1 = sm2m(cl->cl_fsc->sm1);
+		sp->fsc.d = dx2d(cl->cl_fsc->dx);
+		sp->fsc.m2 = sm2m(cl->cl_fsc->sm2);
+	} else {
+		sp->fsc.m1 = 0;
+		sp->fsc.d = 0;
+		sp->fsc.m2 = 0;
+	}
+	if (cl->cl_usc != NULL) {
+		sp->usc.m1 = sm2m(cl->cl_usc->sm1);
+		sp->usc.d = dx2d(cl->cl_usc->dx);
+		sp->usc.m2 = sm2m(cl->cl_usc->sm2);
+	} else {
+		sp->usc.m1 = 0;
+		sp->usc.d = 0;
+		sp->usc.m2 = 0;
+	}
+
+	sp->total = cl->cl_total;
+	sp->cumul = cl->cl_cumul;
+
+	sp->d = cl->cl_d;
+	sp->e = cl->cl_e;
+	sp->vt = cl->cl_vt;
+	sp->f = cl->cl_f;
+
+	sp->initvt = cl->cl_initvt;
+	sp->vtperiod = cl->cl_vtperiod;
+	sp->parentperiod = cl->cl_parentperiod;
+	sp->nactive = cl->cl_nactive;
+	sp->vtoff = cl->cl_vtoff;
+	sp->cvtmax = cl->cl_cvtmax;
+	sp->myf = cl->cl_myf;
+	sp->cfmin = cl->cl_cfmin;
+	sp->cvtmin = cl->cl_cvtmin;
+	sp->myfadj = cl->cl_myfadj;
+	sp->vtadj = cl->cl_vtadj;
+
+	sp->cur_time = read_machclk();
+	sp->machclk_freq = machclk_freq;
+
+	sp->qlength = qlen(cl->cl_q);
+	sp->qlimit = qlimit(cl->cl_q);
+	sp->xmit_cnt = cl->cl_stats.xmit_cnt;
+	sp->drop_cnt = cl->cl_stats.drop_cnt;
+	sp->period = cl->cl_stats.period;
+
+	sp->qtype = qtype(cl->cl_q);
+#ifdef ALTQ_RED
+	if (q_is_red(cl->cl_q))
+		red_getstats(cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+	if (q_is_rio(cl->cl_q))
+		rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_CODEL
+	if (q_is_codel(cl->cl_q))
+		codel_getstats(cl->cl_codel, &sp->codel);
+#endif
+}
+
+/* convert a class handle to the corresponding class pointer */
+static struct hfsc_class *
+clh_to_clp(struct hfsc_if *hif, u_int32_t chandle)
+{
+	int i;
+	struct hfsc_class *cl;
+
+	if (chandle == 0)
+		return (NULL);
+	/*
+	 * first, try optimistically the slot matching the lower bits of
+	 * the handle.  if it fails, do the linear table search.
+	 */
+	i = chandle % HFSC_MAX_CLASSES;
+	if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle)
+		return (cl);
+	for (i = 0; i < HFSC_MAX_CLASSES; i++)
+		if ((cl = hif->hif_class_tbl[i]) != NULL &&
+		    cl->cl_handle == chandle)
+			return (cl);
+	return (NULL);
+}
+
+#ifdef ALTQ3_COMPAT
+static struct hfsc_if *
+hfsc_attach(ifq, bandwidth)
+	struct ifaltq *ifq;
+	u_int bandwidth;
+{
+	struct hfsc_if *hif;
+
+	hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_WAITOK);
+	if (hif == NULL)
+		return (NULL);
+	bzero(hif, sizeof(struct hfsc_if));
+
+	hif->hif_eligible = ellist_alloc();
+	if (hif->hif_eligible == NULL) {
+		free(hif, M_DEVBUF);
+		return NULL;
+	}
+
+	hif->hif_ifq = ifq;
+
+	/* add this state to the hfsc list */
+	hif->hif_next = hif_list;
+	hif_list = hif;
+
+	return (hif);
+}
+
+static int
+hfsc_detach(hif)
+	struct hfsc_if *hif;
+{
+	(void)hfsc_clear_interface(hif);
+	(void)hfsc_class_destroy(hif->hif_rootclass);
+
+	/* remove this interface from the hif list */
+	if (hif_list == hif)
+		hif_list = hif->hif_next;
+	else {
+		struct hfsc_if *h;
+
+		for (h = hif_list; h != NULL; h = h->hif_next)
+			if (h->hif_next == hif) {
+				h->hif_next = hif->hif_next;
+				break;
+			}
+		ASSERT(h != NULL);
+	}
+
+	ellist_destroy(hif->hif_eligible);
+
+	free(hif, M_DEVBUF);
+
+	return (0);
+}
+
+static int
+hfsc_class_modify(cl, rsc, fsc, usc)
+	struct hfsc_class *cl;
+	struct service_curve *rsc, *fsc, *usc;
+{
+	struct internal_sc *rsc_tmp, *fsc_tmp, *usc_tmp;
+	u_int64_t cur_time;
+	int s;
+
+	rsc_tmp = fsc_tmp = usc_tmp = NULL;
+	if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0) &&
+	    cl->cl_rsc == NULL) {
+		rsc_tmp = malloc(sizeof(struct internal_sc),
+		    M_DEVBUF, M_WAITOK);
+		if (rsc_tmp == NULL)
+			return (ENOMEM);
+	}
+	if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0) &&
+	    cl->cl_fsc == NULL) {
+		fsc_tmp = malloc(sizeof(struct internal_sc),
+		    M_DEVBUF, M_WAITOK);
+		if (fsc_tmp == NULL) {
+			free(rsc_tmp);
+			return (ENOMEM);
+		}
+	}
+	if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0) &&
+	    cl->cl_usc == NULL) {
+		usc_tmp = malloc(sizeof(struct internal_sc),
+		    M_DEVBUF, M_WAITOK);
+		if (usc_tmp == NULL) {
+			free(rsc_tmp);
+			free(fsc_tmp);
+			return (ENOMEM);
+		}
+	}
+
+	cur_time = read_machclk();
+	s = splnet();
+	IFQ_LOCK(cl->cl_hif->hif_ifq);
+
+	if (rsc != NULL) {
+		if (rsc->m1 == 0 && rsc->m2 == 0) {
+			if (cl->cl_rsc != NULL) {
+				if (!qempty(cl->cl_q))
+					hfsc_purgeq(cl);
+				free(cl->cl_rsc, M_DEVBUF);
+				cl->cl_rsc = NULL;
+			}
+		} else {
+			if (cl->cl_rsc == NULL)
+				cl->cl_rsc = rsc_tmp;
+			sc2isc(rsc, cl->cl_rsc);
+			rtsc_init(&cl->cl_deadline, cl->cl_rsc, cur_time,
+			    cl->cl_cumul);
+			cl->cl_eligible = cl->cl_deadline;
+			if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) {
+				cl->cl_eligible.dx = 0;
+				cl->cl_eligible.dy = 0;
+			}
+		}
+	}
+
+	if (fsc != NULL) {
+		if (fsc->m1 == 0 && fsc->m2 == 0) {
+			if (cl->cl_fsc != NULL) {
+				if (!qempty(cl->cl_q))
+					hfsc_purgeq(cl);
+				free(cl->cl_fsc, M_DEVBUF);
+				cl->cl_fsc = NULL;
+			}
+		} else {
+			if (cl->cl_fsc == NULL)
+				cl->cl_fsc = fsc_tmp;
+			sc2isc(fsc, cl->cl_fsc);
+			rtsc_init(&cl->cl_virtual, cl->cl_fsc, cl->cl_vt,
+			    cl->cl_total);
+		}
+	}
+
+	if (usc != NULL) {
+		if (usc->m1 == 0 && usc->m2 == 0) {
+			if (cl->cl_usc != NULL) {
+				free(cl->cl_usc, M_DEVBUF);
+				cl->cl_usc = NULL;
+				cl->cl_myf = 0;
+			}
+		} else {
+			if (cl->cl_usc == NULL)
+				cl->cl_usc = usc_tmp;
+			sc2isc(usc, cl->cl_usc);
+			rtsc_init(&cl->cl_ulimit, cl->cl_usc, cur_time,
+			    cl->cl_total);
+		}
+	}
+
+	if (!qempty(cl->cl_q)) {
+		if (cl->cl_rsc != NULL)
+			update_ed(cl, m_pktlen(qhead(cl->cl_q)));
+		if (cl->cl_fsc != NULL)
+			update_vf(cl, 0, cur_time);
+		/* is this enough? */
+	}
+
+	IFQ_UNLOCK(cl->cl_hif->hif_ifq);
+	splx(s);
+
+	return (0);
+}
+
+/*
+ * hfsc device interface
+ */
+int
+hfscopen(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	if (machclk_freq == 0)
+		init_machclk();
+
+	if (machclk_freq == 0) {
+		printf("hfsc: no cpu clock available!\n");
+		return (ENXIO);
+	}
+
+	/* everything will be done when the queueing scheme is attached. */
+	return 0;
+}
+
+int
+hfscclose(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	struct hfsc_if *hif;
+	int err, error = 0;
+
+	while ((hif = hif_list) != NULL) {
+		/* destroy all */
+		if (ALTQ_IS_ENABLED(hif->hif_ifq))
+			altq_disable(hif->hif_ifq);
+
+		err = altq_detach(hif->hif_ifq);
+		if (err == 0)
+			err = hfsc_detach(hif);
+		if (err != 0 && error == 0)
+			error = err;
+	}
+
+	return error;
+}
+
+int
+hfscioctl(dev, cmd, addr, flag, p)
+	dev_t dev;
+	ioctlcmd_t cmd;
+	caddr_t addr;
+	int flag;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	struct hfsc_if *hif;
+	struct hfsc_interface *ifacep;
+	int	error = 0;
+
+	/* check super-user privilege */
+	switch (cmd) {
+	case HFSC_GETSTATS:
+		break;
+	default:
+#if (__FreeBSD_version > 700000)
+		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
+			return (error);
+#elsif (__FreeBSD_version > 400000)
+		if ((error = suser(p)) != 0)
+			return (error);
+#else
+		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+			return (error);
+#endif
+		break;
+	}
+
+	switch (cmd) {
+
+	case HFSC_IF_ATTACH:
+		error = hfsccmd_if_attach((struct hfsc_attach *)addr);
+		break;
+
+	case HFSC_IF_DETACH:
+		error = hfsccmd_if_detach((struct hfsc_interface *)addr);
+		break;
+
+	case HFSC_ENABLE:
+	case HFSC_DISABLE:
+	case HFSC_CLEAR_HIERARCHY:
+		ifacep = (struct hfsc_interface *)addr;
+		if ((hif = altq_lookup(ifacep->hfsc_ifname,
+				       ALTQT_HFSC)) == NULL) {
+			error = EBADF;
+			break;
+		}
+
+		switch (cmd) {
+
+		case HFSC_ENABLE:
+			if (hif->hif_defaultclass == NULL) {
+#ifdef ALTQ_DEBUG
+				printf("hfsc: no default class\n");
+#endif
+				error = EINVAL;
+				break;
+			}
+			error = altq_enable(hif->hif_ifq);
+			break;
+
+		case HFSC_DISABLE:
+			error = altq_disable(hif->hif_ifq);
+			break;
+
+		case HFSC_CLEAR_HIERARCHY:
+			hfsc_clear_interface(hif);
+			break;
+		}
+		break;
+
+	case HFSC_ADD_CLASS:
+		error = hfsccmd_add_class((struct hfsc_add_class *)addr);
+		break;
+
+	case HFSC_DEL_CLASS:
+		error = hfsccmd_delete_class((struct hfsc_delete_class *)addr);
+		break;
+
+	case HFSC_MOD_CLASS:
+		error = hfsccmd_modify_class((struct hfsc_modify_class *)addr);
+		break;
+
+	case HFSC_ADD_FILTER:
+		error = hfsccmd_add_filter((struct hfsc_add_filter *)addr);
+		break;
+
+	case HFSC_DEL_FILTER:
+		error = hfsccmd_delete_filter((struct hfsc_delete_filter *)addr);
+		break;
+
+	case HFSC_GETSTATS:
+		error = hfsccmd_class_stats((struct hfsc_class_stats *)addr);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+	return error;
+}
+
+static int
+hfsccmd_if_attach(ap)
+	struct hfsc_attach *ap;
+{
+	struct hfsc_if *hif;
+	struct ifnet *ifp;
+	int error;
+
+	if ((ifp = ifunit(ap->iface.hfsc_ifname)) == NULL)
+		return (ENXIO);
+
+	if ((hif = hfsc_attach(&ifp->if_snd, ap->bandwidth)) == NULL)
+		return (ENOMEM);
+
+	/*
+	 * set HFSC to this ifnet structure.
+	 */
+	if ((error = altq_attach(&ifp->if_snd, ALTQT_HFSC, hif,
+				 hfsc_enqueue, hfsc_dequeue, hfsc_request,
+				 &hif->hif_classifier, acc_classify)) != 0)
+		(void)hfsc_detach(hif);
+
+	return (error);
+}
+
+static int
+hfsccmd_if_detach(ap)
+	struct hfsc_interface *ap;
+{
+	struct hfsc_if *hif;
+	int error;
+
+	if ((hif = altq_lookup(ap->hfsc_ifname, ALTQT_HFSC)) == NULL)
+		return (EBADF);
+
+	if (ALTQ_IS_ENABLED(hif->hif_ifq))
+		altq_disable(hif->hif_ifq);
+
+	if ((error = altq_detach(hif->hif_ifq)))
+		return (error);
+
+	return hfsc_detach(hif);
+}
+
+static int
+hfsccmd_add_class(ap)
+	struct hfsc_add_class *ap;
+{
+	struct hfsc_if *hif;
+	struct hfsc_class *cl, *parent;
+	int	i;
+
+	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+		return (EBADF);
+
+	if (ap->parent_handle == HFSC_NULLCLASS_HANDLE &&
+	    hif->hif_rootclass == NULL)
+		parent = NULL;
+	else if ((parent = clh_to_clp(hif, ap->parent_handle)) == NULL)
+		return (EINVAL);
+
+	/* assign a class handle (use a free slot number for now) */
+	for (i = 1; i < HFSC_MAX_CLASSES; i++)
+		if (hif->hif_class_tbl[i] == NULL)
+			break;
+	if (i == HFSC_MAX_CLASSES)
+		return (EBUSY);
+
+	if ((cl = hfsc_class_create(hif, &ap->service_curve, NULL, NULL,
+	    parent, ap->qlimit, ap->flags, i)) == NULL)
+		return (ENOMEM);
+
+	/* return a class handle to the user */
+	ap->class_handle = i;
+
+	return (0);
+}
+
+static int
+hfsccmd_delete_class(ap)
+	struct hfsc_delete_class *ap;
+{
+	struct hfsc_if *hif;
+	struct hfsc_class *cl;
+
+	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+		return (EBADF);
+
+	if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
+		return (EINVAL);
+
+	return hfsc_class_destroy(cl);
+}
+
+static int
+hfsccmd_modify_class(ap)
+	struct hfsc_modify_class *ap;
+{
+	struct hfsc_if *hif;
+	struct hfsc_class *cl;
+	struct service_curve *rsc = NULL;
+	struct service_curve *fsc = NULL;
+	struct service_curve *usc = NULL;
+
+	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+		return (EBADF);
+
+	if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
+		return (EINVAL);
+
+	if (ap->sctype & HFSC_REALTIMESC)
+		rsc = &ap->service_curve;
+	if (ap->sctype & HFSC_LINKSHARINGSC)
+		fsc = &ap->service_curve;
+	if (ap->sctype & HFSC_UPPERLIMITSC)
+		usc = &ap->service_curve;
+
+	return hfsc_class_modify(cl, rsc, fsc, usc);
+}
+
+static int
+hfsccmd_add_filter(ap)
+	struct hfsc_add_filter *ap;
+{
+	struct hfsc_if *hif;
+	struct hfsc_class *cl;
+
+	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+		return (EBADF);
+
+	if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
+		return (EINVAL);
+
+	if (is_a_parent_class(cl)) {
+#ifdef ALTQ_DEBUG
+		printf("hfsccmd_add_filter: not a leaf class!\n");
+#endif
+		return (EINVAL);
+	}
+
+	return acc_add_filter(&hif->hif_classifier, &ap->filter,
+			      cl, &ap->filter_handle);
+}
+
+static int
+hfsccmd_delete_filter(ap)
+	struct hfsc_delete_filter *ap;
+{
+	struct hfsc_if *hif;
+
+	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+		return (EBADF);
+
+	return acc_delete_filter(&hif->hif_classifier,
+				 ap->filter_handle);
+}
+
+static int
+hfsccmd_class_stats(ap)
+	struct hfsc_class_stats *ap;
+{
+	struct hfsc_if *hif;
+	struct hfsc_class *cl;
+	struct hfsc_classstats stats, *usp;
+	int	n, nclasses, error;
+
+	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+		return (EBADF);
+
+	ap->cur_time = read_machclk();
+	ap->machclk_freq = machclk_freq;
+	ap->hif_classes = hif->hif_classes;
+	ap->hif_packets = hif->hif_packets;
+
+	/* skip the first N classes in the tree */
+	nclasses = ap->nskip;
+	for (cl = hif->hif_rootclass, n = 0; cl != NULL && n < nclasses;
+	     cl = hfsc_nextclass(cl), n++)
+		;
+	if (n != nclasses)
+		return (EINVAL);
+
+	/* then, read the next N classes in the tree */
+	nclasses = ap->nclasses;
+	usp = ap->stats;
+	for (n = 0; cl != NULL && n < nclasses; cl = hfsc_nextclass(cl), n++) {
+
+		get_class_stats(&stats, cl);
+
+		if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
+				     sizeof(stats))) != 0)
+			return (error);
+	}
+
+	ap->nclasses = n;
+
+	return (0);
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw hfsc_sw =
+	{"hfsc", hfscopen, hfscclose, hfscioctl};
+
+ALTQ_MODULE(altq_hfsc, ALTQT_HFSC, &hfsc_sw);
+MODULE_DEPEND(altq_hfsc, altq_red, 1, 1, 1);
+MODULE_DEPEND(altq_hfsc, altq_rio, 1, 1, 1);
+
+#endif /* KLD_MODULE */
+#endif /* ALTQ3_COMPAT */
+
+#endif /* ALTQ_HFSC */
diff --git a/freebsd/sys/net/altq/altq_hfsc.h b/freebsd/sys/net/altq/altq_hfsc.h
new file mode 100644
index 00000000..de5e89b8
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_hfsc.h
@@ -0,0 +1,319 @@
+/*-
+ * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation is hereby granted (including for commercial or
+ * for-profit use), provided that both the copyright notice and this
+ * permission notice appear in all copies of the software, derivative
+ * works, or modified versions, and any portions thereof.
+ *
+ * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
+ * WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON PROVIDES THIS
+ * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Carnegie Mellon encourages (but does not require) users of this
+ * software to return any improvements or extensions that they make,
+ * and to grant Carnegie Mellon the rights to redistribute these
+ * changes without encumbrance.
+ *
+ * $KAME: altq_hfsc.h,v 1.12 2003/12/05 05:40:46 kjc Exp $
+ * $FreeBSD$
+ */
+#ifndef _ALTQ_ALTQ_HFSC_H_
+#define	_ALTQ_ALTQ_HFSC_H_
+
+#include <net/altq/altq.h>
+#include <net/altq/altq_classq.h>
+#include <net/altq/altq_codel.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct service_curve {
+	u_int	m1;	/* slope of the first segment in bits/sec */
+	u_int	d;	/* the x-projection of the first segment in msec */
+	u_int	m2;	/* slope of the second segment in bits/sec */
+};
+
+/* special class handles */
+#define	HFSC_NULLCLASS_HANDLE	0
+#define	HFSC_MAX_CLASSES	64
+
+/* hfsc class flags */
+#define	HFCF_RED		0x0001	/* use RED */
+#define	HFCF_ECN		0x0002  /* use RED/ECN */
+#define	HFCF_RIO		0x0004  /* use RIO */
+#define	HFCF_CODEL		0x0008	/* use CoDel */
+#define	HFCF_CLEARDSCP		0x0010  /* clear diffserv codepoint */
+#define	HFCF_DEFAULTCLASS	0x1000	/* default class */
+
+/* service curve types */
+#define	HFSC_REALTIMESC		1
+#define	HFSC_LINKSHARINGSC	2
+#define	HFSC_UPPERLIMITSC	4
+#define	HFSC_DEFAULTSC		(HFSC_REALTIMESC|HFSC_LINKSHARINGSC)
+
+struct hfsc_classstats {
+	u_int			class_id;
+	u_int32_t		class_handle;
+	struct service_curve	rsc;
+	struct service_curve	fsc;
+	struct service_curve	usc;	/* upper limit service curve */
+
+	u_int64_t		total;	/* total work in bytes */
+	u_int64_t		cumul;	/* cumulative work in bytes
+					   done by real-time criteria */
+	u_int64_t		d;		/* deadline */
+	u_int64_t		e;		/* eligible time */
+	u_int64_t		vt;		/* virtual time */
+	u_int64_t		f;		/* fit time for upper-limit */
+
+	/* info helpful for debugging */
+	u_int64_t		initvt;		/* init virtual time */
+	u_int64_t		vtoff;		/* cl_vt_ipoff */
+	u_int64_t		cvtmax;		/* cl_maxvt */
+	u_int64_t		myf;		/* cl_myf */
+	u_int64_t		cfmin;		/* cl_mincf */
+	u_int64_t		cvtmin;		/* cl_mincvt */
+	u_int64_t		myfadj;		/* cl_myfadj */
+	u_int64_t		vtadj;		/* cl_vtadj */
+	u_int64_t		cur_time;
+	u_int32_t		machclk_freq;
+
+	u_int			qlength;
+	u_int			qlimit;
+	struct pktcntr		xmit_cnt;
+	struct pktcntr		drop_cnt;
+	u_int			period;
+
+	u_int			vtperiod;	/* vt period sequence no */
+	u_int			parentperiod;	/* parent's vt period seqno */
+	int			nactive;	/* number of active children */
+
+	/* codel, red and rio related info */
+	int			qtype;
+	struct redstats		red[3];
+	struct codel_stats	codel;
+};
+
+#ifdef ALTQ3_COMPAT
+struct hfsc_interface {
+	char	hfsc_ifname[IFNAMSIZ];  /* interface name (e.g., fxp0) */
+};
+
+struct hfsc_attach {
+	struct hfsc_interface	iface;
+	u_int			bandwidth;  /* link bandwidth in bits/sec */
+};
+
+struct hfsc_add_class {
+	struct hfsc_interface	iface;
+	u_int32_t		parent_handle;
+	struct service_curve	service_curve;
+	int			qlimit;
+	int			flags;
+
+	u_int32_t		class_handle;  /* return value */
+};
+
+struct hfsc_delete_class {
+	struct hfsc_interface	iface;
+	u_int32_t		class_handle;
+};
+
+struct hfsc_modify_class {
+	struct hfsc_interface	iface;
+	u_int32_t		class_handle;
+	struct service_curve	service_curve;
+	int			sctype;
+};
+
+struct hfsc_add_filter {
+	struct hfsc_interface	iface;
+	u_int32_t		class_handle;
+	struct flow_filter	filter;
+
+	u_long			filter_handle;  /* return value */
+};
+
+struct hfsc_delete_filter {
+	struct hfsc_interface	iface;
+	u_long			filter_handle;
+};
+
+struct hfsc_class_stats {
+	struct hfsc_interface	iface;
+	int			nskip;		/* skip # of classes */
+	int			nclasses;	/* # of class stats (WR) */
+	u_int64_t		cur_time;	/* current time */
+	u_int32_t		machclk_freq;	/* machine clock frequency */
+	u_int			hif_classes;	/* # of classes in the tree */
+	u_int			hif_packets;	/* # of packets in the tree */
+	struct hfsc_classstats	*stats;		/* pointer to stats array */
+};
+
+#define	HFSC_IF_ATTACH		_IOW('Q', 1, struct hfsc_attach)
+#define	HFSC_IF_DETACH		_IOW('Q', 2, struct hfsc_interface)
+#define	HFSC_ENABLE		_IOW('Q', 3, struct hfsc_interface)
+#define	HFSC_DISABLE		_IOW('Q', 4, struct hfsc_interface)
+#define	HFSC_CLEAR_HIERARCHY	_IOW('Q', 5, struct hfsc_interface)
+#define	HFSC_ADD_CLASS		_IOWR('Q', 7, struct hfsc_add_class)
+#define	HFSC_DEL_CLASS		_IOW('Q', 8, struct hfsc_delete_class)
+#define	HFSC_MOD_CLASS		_IOW('Q', 9, struct hfsc_modify_class)
+#define	HFSC_ADD_FILTER		_IOWR('Q', 10, struct hfsc_add_filter)
+#define	HFSC_DEL_FILTER		_IOW('Q', 11, struct hfsc_delete_filter)
+#define	HFSC_GETSTATS		_IOWR('Q', 12, struct hfsc_class_stats)
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+/*
+ * kernel internal service curve representation
+ *	coordinates are given by 64 bit unsigned integers.
+ *	x-axis: unit is clock count.  for the intel x86 architecture,
+ *		the raw Pentium TSC (Timestamp Counter) value is used.
+ *		virtual time is also calculated in this time scale.
+ *	y-axis: unit is byte.
+ *
+ *	the service curve parameters are converted to the internal
+ *	representation.
+ *	the slope values are scaled to avoid overflow.
+ *	the inverse slope values as well as the y-projection of the 1st
+ *	segment are kept in order to to avoid 64-bit divide operations
+ *	that are expensive on 32-bit architectures.
+ *
+ *  note: Intel Pentium TSC never wraps around in several thousands of years.
+ *	x-axis doesn't wrap around for 1089 years with 1GHz clock.
+ *      y-axis doesn't wrap around for 4358 years with 1Gbps bandwidth.
+ */
+
+/* kernel internal representation of a service curve */
+struct internal_sc {
+	u_int64_t	sm1;	/* scaled slope of the 1st segment */
+	u_int64_t	ism1;	/* scaled inverse-slope of the 1st segment */
+	u_int64_t	dx;	/* the x-projection of the 1st segment */
+	u_int64_t	dy;	/* the y-projection of the 1st segment */
+	u_int64_t	sm2;	/* scaled slope of the 2nd segment */
+	u_int64_t	ism2;	/* scaled inverse-slope of the 2nd segment */
+};
+
+/* runtime service curve */
+struct runtime_sc {
+	u_int64_t	x;	/* current starting position on x-axis */
+	u_int64_t	y;	/* current starting position on x-axis */
+	u_int64_t	sm1;	/* scaled slope of the 1st segment */
+	u_int64_t	ism1;	/* scaled inverse-slope of the 1st segment */
+	u_int64_t	dx;	/* the x-projection of the 1st segment */
+	u_int64_t	dy;	/* the y-projection of the 1st segment */
+	u_int64_t	sm2;	/* scaled slope of the 2nd segment */
+	u_int64_t	ism2;	/* scaled inverse-slope of the 2nd segment */
+};
+
+struct hfsc_class {
+	u_int		cl_id;		/* class id (just for debug) */
+	u_int32_t	cl_handle;	/* class handle */
+	struct hfsc_if	*cl_hif;	/* back pointer to struct hfsc_if */
+	int		cl_flags;	/* misc flags */
+
+	struct hfsc_class *cl_parent;	/* parent class */
+	struct hfsc_class *cl_siblings;	/* sibling classes */
+	struct hfsc_class *cl_children;	/* child classes */
+
+	class_queue_t	*cl_q;		/* class queue structure */
+	union {
+		struct red	*cl_red;	/* RED state */
+		struct codel	*cl_codel;	/* CoDel state */
+	} cl_aqm;
+#define	cl_red			cl_aqm.cl_red
+#define	cl_codel		cl_aqm.cl_codel
+	struct altq_pktattr *cl_pktattr; /* saved header used by ECN */
+
+	u_int64_t	cl_total;	/* total work in bytes */
+	u_int64_t	cl_cumul;	/* cumulative work in bytes
+					   done by real-time criteria */
+	u_int64_t	cl_d;		/* deadline */
+	u_int64_t	cl_e;		/* eligible time */
+	u_int64_t	cl_vt;		/* virtual time */
+	u_int64_t	cl_f;		/* time when this class will fit for
+					   link-sharing, max(myf, cfmin) */
+	u_int64_t	cl_myf;		/* my fit-time (as calculated from this
+					   class's own upperlimit curve) */
+	u_int64_t	cl_myfadj;	/* my fit-time adjustment
+					   (to cancel history dependence) */
+	u_int64_t	cl_cfmin;	/* earliest children's fit-time (used
+					   with cl_myf to obtain cl_f) */
+	u_int64_t	cl_cvtmin;	/* minimal virtual time among the
+					   children fit for link-sharing
+					   (monotonic within a period) */
+	u_int64_t	cl_vtadj;	/* intra-period cumulative vt
+					   adjustment */
+	u_int64_t	cl_vtoff;	/* inter-period cumulative vt offset */
+	u_int64_t	cl_cvtmax;	/* max child's vt in the last period */
+
+	u_int64_t	cl_initvt;	/* init virtual time (for debugging) */
+
+	struct internal_sc *cl_rsc;	/* internal real-time service curve */
+	struct internal_sc *cl_fsc;	/* internal fair service curve */
+	struct internal_sc *cl_usc;	/* internal upperlimit service curve */
+	struct runtime_sc  cl_deadline;	/* deadline curve */
+	struct runtime_sc  cl_eligible;	/* eligible curve */
+	struct runtime_sc  cl_virtual;	/* virtual curve */
+	struct runtime_sc  cl_ulimit;	/* upperlimit curve */
+
+	u_int		cl_vtperiod;	/* vt period sequence no */
+	u_int		cl_parentperiod;  /* parent's vt period seqno */
+	int		cl_nactive;	/* number of active children */
+
+	TAILQ_HEAD(acthead, hfsc_class) cl_actc; /* active children list */
+	TAILQ_ENTRY(hfsc_class)	cl_actlist;	/* active children list entry */
+	TAILQ_ENTRY(hfsc_class)	cl_ellist;	/* eligible list entry */
+
+	struct {
+		struct pktcntr	xmit_cnt;
+		struct pktcntr	drop_cnt;
+		u_int period;
+	} cl_stats;
+};
+
+/*
+ * hfsc interface state
+ */
+struct hfsc_if {
+	struct hfsc_if		*hif_next;	/* interface state list */
+	struct ifaltq		*hif_ifq;	/* backpointer to ifaltq */
+	struct hfsc_class	*hif_rootclass;		/* root class */
+	struct hfsc_class	*hif_defaultclass;	/* default class */
+	struct hfsc_class	*hif_class_tbl[HFSC_MAX_CLASSES];
+	struct hfsc_class	*hif_pollcache;	/* cache for poll operation */
+
+	u_int	hif_classes;			/* # of classes in the tree */
+	u_int	hif_packets;			/* # of packets in the tree */
+	u_int	hif_classid;			/* class id sequence number */
+
+	TAILQ_HEAD(elighead, hfsc_class) hif_eligible; /* eligible list */
+
+#ifdef ALTQ3_CLFIER_COMPAT
+	struct acc_classifier	hif_classifier;
+#endif
+};
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_HFSC_H_ */
diff --git a/freebsd/sys/net/altq/altq_priq.c b/freebsd/sys/net/altq/altq_priq.c
new file mode 100644
index 00000000..d257ae3c
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_priq.c
@@ -0,0 +1,1072 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (C) 2000-2003
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_priq.c,v 1.11 2003/09/17 14:23:25 kjc Exp $
+ * $FreeBSD$
+ */
+/*
+ * priority queue
+ */
+
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#ifdef ALTQ_PRIQ  /* priq is enabled by ALTQ_PRIQ option in opt_altq.h */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <netinet/in.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+#include <net/altq/altq.h>
+#ifdef ALTQ3_COMPAT
+#include <net/altq/altq_conf.h>
+#endif
+#include <net/altq/altq_priq.h>
+
+/*
+ * function prototypes
+ */
+#ifdef ALTQ3_COMPAT
+static struct priq_if *priq_attach(struct ifaltq *, u_int);
+static int priq_detach(struct priq_if *);
+#endif
+static int priq_clear_interface(struct priq_if *);
+static int priq_request(struct ifaltq *, int, void *);
+static void priq_purge(struct priq_if *);
+static struct priq_class *priq_class_create(struct priq_if *, int, int, int,
+    int);
+static int priq_class_destroy(struct priq_class *);
+static int priq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+static struct mbuf *priq_dequeue(struct ifaltq *, int);
+
+static int priq_addq(struct priq_class *, struct mbuf *);
+static struct mbuf *priq_getq(struct priq_class *);
+static struct mbuf *priq_pollq(struct priq_class *);
+static void priq_purgeq(struct priq_class *);
+
+#ifdef ALTQ3_COMPAT
+static int priqcmd_if_attach(struct priq_interface *);
+static int priqcmd_if_detach(struct priq_interface *);
+static int priqcmd_add_class(struct priq_add_class *);
+static int priqcmd_delete_class(struct priq_delete_class *);
+static int priqcmd_modify_class(struct priq_modify_class *);
+static int priqcmd_add_filter(struct priq_add_filter *);
+static int priqcmd_delete_filter(struct priq_delete_filter *);
+static int priqcmd_class_stats(struct priq_class_stats *);
+#endif /* ALTQ3_COMPAT */
+
+static void get_class_stats(struct priq_classstats *, struct priq_class *);
+static struct priq_class *clh_to_clp(struct priq_if *, u_int32_t);
+
+#ifdef ALTQ3_COMPAT
+altqdev_decl(priq);
+
+/* pif_list keeps all priq_if's allocated. */
+static struct priq_if *pif_list = NULL;
+#endif /* ALTQ3_COMPAT */
+
+int
+priq_pfattach(struct pf_altq *a)
+{
+	struct ifnet *ifp;
+	int s, error;
+
+	if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+		return (EINVAL);
+	s = splnet();
+	error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, a->altq_disc,
+	    priq_enqueue, priq_dequeue, priq_request, NULL, NULL);
+	splx(s);
+	return (error);
+}
+
+int
+priq_add_altq(struct pf_altq *a)
+{
+	struct priq_if	*pif;
+	struct ifnet	*ifp;
+
+	if ((ifp = ifunit(a->ifname)) == NULL)
+		return (EINVAL);
+	if (!ALTQ_IS_READY(&ifp->if_snd))
+		return (ENODEV);
+
+	pif = malloc(sizeof(struct priq_if), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (pif == NULL)
+		return (ENOMEM);
+	pif->pif_bandwidth = a->ifbandwidth;
+	pif->pif_maxpri = -1;
+	pif->pif_ifq = &ifp->if_snd;
+
+	/* keep the state in pf_altq */
+	a->altq_disc = pif;
+
+	return (0);
+}
+
+int
+priq_remove_altq(struct pf_altq *a)
+{
+	struct priq_if *pif;
+
+	if ((pif = a->altq_disc) == NULL)
+		return (EINVAL);
+	a->altq_disc = NULL;
+
+	(void)priq_clear_interface(pif);
+
+	free(pif, M_DEVBUF);
+	return (0);
+}
+
+int
+priq_add_queue(struct pf_altq *a)
+{
+	struct priq_if *pif;
+	struct priq_class *cl;
+
+	if ((pif = a->altq_disc) == NULL)
+		return (EINVAL);
+
+	/* check parameters */
+	if (a->priority >= PRIQ_MAXPRI)
+		return (EINVAL);
+	if (a->qid == 0)
+		return (EINVAL);
+	if (pif->pif_classes[a->priority] != NULL)
+		return (EBUSY);
+	if (clh_to_clp(pif, a->qid) != NULL)
+		return (EBUSY);
+
+	cl = priq_class_create(pif, a->priority, a->qlimit,
+	    a->pq_u.priq_opts.flags, a->qid);
+	if (cl == NULL)
+		return (ENOMEM);
+
+	return (0);
+}
+
+int
+priq_remove_queue(struct pf_altq *a)
+{
+	struct priq_if *pif;
+	struct priq_class *cl;
+
+	if ((pif = a->altq_disc) == NULL)
+		return (EINVAL);
+
+	if ((cl = clh_to_clp(pif, a->qid)) == NULL)
+		return (EINVAL);
+
+	return (priq_class_destroy(cl));
+}
+
+int
+priq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+	struct priq_if *pif;
+	struct priq_class *cl;
+	struct priq_classstats stats;
+	int error = 0;
+
+	if ((pif = altq_lookup(a->ifname, ALTQT_PRIQ)) == NULL)
+		return (EBADF);
+
+	if ((cl = clh_to_clp(pif, a->qid)) == NULL)
+		return (EINVAL);
+
+	if (*nbytes < sizeof(stats))
+		return (EINVAL);
+
+	get_class_stats(&stats, cl);
+
+	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+		return (error);
+	*nbytes = sizeof(stats);
+	return (0);
+}
+
+/*
+ * bring the interface back to the initial state by discarding
+ * all the filters and classes.
+ */
+static int
+priq_clear_interface(struct priq_if *pif)
+{
+	struct priq_class	*cl;
+	int pri;
+
+#ifdef ALTQ3_CLFIER_COMPAT
+	/* free the filters for this interface */
+	acc_discard_filters(&pif->pif_classifier, NULL, 1);
+#endif
+
+	/* clear out the classes */
+	for (pri = 0; pri <= pif->pif_maxpri; pri++)
+		if ((cl = pif->pif_classes[pri]) != NULL)
+			priq_class_destroy(cl);
+
+	return (0);
+}
+
+static int
+priq_request(struct ifaltq *ifq, int req, void *arg)
+{
+	struct priq_if	*pif = (struct priq_if *)ifq->altq_disc;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	switch (req) {
+	case ALTRQ_PURGE:
+		priq_purge(pif);
+		break;
+	}
+	return (0);
+}
+
+/* discard all the queued packets on the interface */
+static void
+priq_purge(struct priq_if *pif)
+{
+	struct priq_class *cl;
+	int pri;
+
+	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
+		if ((cl = pif->pif_classes[pri]) != NULL && !qempty(cl->cl_q))
+			priq_purgeq(cl);
+	}
+	if (ALTQ_IS_ENABLED(pif->pif_ifq))
+		pif->pif_ifq->ifq_len = 0;
+}
+
+static struct priq_class *
+priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid)
+{
+	struct priq_class *cl;
+	int s;
+
+#ifndef ALTQ_RED
+	if (flags & PRCF_RED) {
+#ifdef ALTQ_DEBUG
+		printf("priq_class_create: RED not configured for PRIQ!\n");
+#endif
+		return (NULL);
+	}
+#endif
+#ifndef ALTQ_CODEL
+	if (flags & PRCF_CODEL) {
+#ifdef ALTQ_DEBUG
+		printf("priq_class_create: CODEL not configured for PRIQ!\n");
+#endif
+		return (NULL);
+	}
+#endif
+
+	if ((cl = pif->pif_classes[pri]) != NULL) {
+		/* modify the class instead of creating a new one */
+		s = splnet();
+		IFQ_LOCK(cl->cl_pif->pif_ifq);
+		if (!qempty(cl->cl_q))
+			priq_purgeq(cl);
+		IFQ_UNLOCK(cl->cl_pif->pif_ifq);
+		splx(s);
+#ifdef ALTQ_RIO
+		if (q_is_rio(cl->cl_q))
+			rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+		if (q_is_red(cl->cl_q))
+			red_destroy(cl->cl_red);
+#endif
+#ifdef ALTQ_CODEL
+		if (q_is_codel(cl->cl_q))
+			codel_destroy(cl->cl_codel);
+#endif
+	} else {
+		cl = malloc(sizeof(struct priq_class), M_DEVBUF,
+		    M_NOWAIT | M_ZERO);
+		if (cl == NULL)
+			return (NULL);
+
+		cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF,
+		    M_NOWAIT | M_ZERO);
+		if (cl->cl_q == NULL)
+			goto err_ret;
+	}
+
+	pif->pif_classes[pri] = cl;
+	if (flags & PRCF_DEFAULTCLASS)
+		pif->pif_default = cl;
+	if (qlimit == 0)
+		qlimit = 50;  /* use default */
+	qlimit(cl->cl_q) = qlimit;
+	qtype(cl->cl_q) = Q_DROPTAIL;
+	qlen(cl->cl_q) = 0;
+	qsize(cl->cl_q) = 0;
+	cl->cl_flags = flags;
+	cl->cl_pri = pri;
+	if (pri > pif->pif_maxpri)
+		pif->pif_maxpri = pri;
+	cl->cl_pif = pif;
+	cl->cl_handle = qid;
+
+#ifdef ALTQ_RED
+	if (flags & (PRCF_RED|PRCF_RIO)) {
+		int red_flags, red_pkttime;
+
+		red_flags = 0;
+		if (flags & PRCF_ECN)
+			red_flags |= REDF_ECN;
+#ifdef ALTQ_RIO
+		if (flags & PRCF_CLEARDSCP)
+			red_flags |= RIOF_CLEARDSCP;
+#endif
+		if (pif->pif_bandwidth < 8)
+			red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
+		else
+			red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu
+			  * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8);
+#ifdef ALTQ_RIO
+		if (flags & PRCF_RIO) {
+			cl->cl_red = (red_t *)rio_alloc(0, NULL,
+						red_flags, red_pkttime);
+			if (cl->cl_red == NULL)
+				goto err_ret;
+			qtype(cl->cl_q) = Q_RIO;
+		} else
+#endif
+		if (flags & PRCF_RED) {
+			cl->cl_red = red_alloc(0, 0,
+			    qlimit(cl->cl_q) * 10/100,
+			    qlimit(cl->cl_q) * 30/100,
+			    red_flags, red_pkttime);
+			if (cl->cl_red == NULL)
+				goto err_ret;
+			qtype(cl->cl_q) = Q_RED;
+		}
+	}
+#endif /* ALTQ_RED */
+#ifdef ALTQ_CODEL
+	if (flags & PRCF_CODEL) {
+		cl->cl_codel = codel_alloc(5, 100, 0);
+		if (cl->cl_codel != NULL)
+			qtype(cl->cl_q) = Q_CODEL;
+	}
+#endif
+
+	return (cl);
+
+ err_ret:
+	if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+		if (q_is_rio(cl->cl_q))
+			rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+		if (q_is_red(cl->cl_q))
+			red_destroy(cl->cl_red);
+#endif
+#ifdef ALTQ_CODEL
+		if (q_is_codel(cl->cl_q))
+			codel_destroy(cl->cl_codel);
+#endif
+	}
+	if (cl->cl_q != NULL)
+		free(cl->cl_q, M_DEVBUF);
+	free(cl, M_DEVBUF);
+	return (NULL);
+}
+
+static int
+priq_class_destroy(struct priq_class *cl)
+{
+	struct priq_if *pif;
+	int s, pri;
+
+	s = splnet();
+	IFQ_LOCK(cl->cl_pif->pif_ifq);
+
+#ifdef ALTQ3_CLFIER_COMPAT
+	/* delete filters referencing to this class */
+	acc_discard_filters(&cl->cl_pif->pif_classifier, cl, 0);
+#endif
+
+	if (!qempty(cl->cl_q))
+		priq_purgeq(cl);
+
+	pif = cl->cl_pif;
+	pif->pif_classes[cl->cl_pri] = NULL;
+	if (pif->pif_maxpri == cl->cl_pri) {
+		for (pri = cl->cl_pri; pri >= 0; pri--)
+			if (pif->pif_classes[pri] != NULL) {
+				pif->pif_maxpri = pri;
+				break;
+			}
+		if (pri < 0)
+			pif->pif_maxpri = -1;
+	}
+	IFQ_UNLOCK(cl->cl_pif->pif_ifq);
+	splx(s);
+
+	if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+		if (q_is_rio(cl->cl_q))
+			rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+		if (q_is_red(cl->cl_q))
+			red_destroy(cl->cl_red);
+#endif
+#ifdef ALTQ_CODEL
+		if (q_is_codel(cl->cl_q))
+			codel_destroy(cl->cl_codel);
+#endif
+	}
+	free(cl->cl_q, M_DEVBUF);
+	free(cl, M_DEVBUF);
+	return (0);
+}
+
+/*
+ * priq_enqueue is an enqueue function to be registered to
+ * (*altq_enqueue) in struct ifaltq.
+ */
+static int
+priq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
+{
+	struct priq_if	*pif = (struct priq_if *)ifq->altq_disc;
+	struct priq_class *cl;
+	struct pf_mtag *t;
+	int len;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	/* grab class set by classifier */
+	if ((m->m_flags & M_PKTHDR) == 0) {
+		/* should not happen */
+		printf("altq: packet for %s does not have pkthdr\n",
+		    ifq->altq_ifp->if_xname);
+		m_freem(m);
+		return (ENOBUFS);
+	}
+	cl = NULL;
+	if ((t = pf_find_mtag(m)) != NULL)
+		cl = clh_to_clp(pif, t->qid);
+#ifdef ALTQ3_COMPAT
+	else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL)
+		cl = pktattr->pattr_class;
+#endif
+	if (cl == NULL) {
+		cl = pif->pif_default;
+		if (cl == NULL) {
+			m_freem(m);
+			return (ENOBUFS);
+		}
+	}
+#ifdef ALTQ3_COMPAT
+	if (pktattr != NULL)
+		cl->cl_pktattr = pktattr;  /* save proto hdr used by ECN */
+	else
+#endif
+		cl->cl_pktattr = NULL;
+	len = m_pktlen(m);
+	if (priq_addq(cl, m) != 0) {
+		/* drop occurred.  mbuf was freed in priq_addq. */
+		PKTCNTR_ADD(&cl->cl_dropcnt, len);
+		return (ENOBUFS);
+	}
+	IFQ_INC_LEN(ifq);
+
+	/* successfully queued. */
+	return (0);
+}
+
+/*
+ * priq_dequeue is a dequeue function to be registered to
+ * (*altq_dequeue) in struct ifaltq.
+ *
+ * note: ALTDQ_POLL returns the next packet without removing the packet
+ *	from the queue.  ALTDQ_REMOVE is a normal dequeue operation.
+ *	ALTDQ_REMOVE must return the same packet if called immediately
+ *	after ALTDQ_POLL.
+ */
+static struct mbuf *
+priq_dequeue(struct ifaltq *ifq, int op)
+{
+	struct priq_if	*pif = (struct priq_if *)ifq->altq_disc;
+	struct priq_class *cl;
+	struct mbuf *m;
+	int pri;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	if (IFQ_IS_EMPTY(ifq))
+		/* no packet in the queue */
+		return (NULL);
+
+	for (pri = pif->pif_maxpri;  pri >= 0; pri--) {
+		if ((cl = pif->pif_classes[pri]) != NULL &&
+		    !qempty(cl->cl_q)) {
+			if (op == ALTDQ_POLL)
+				return (priq_pollq(cl));
+
+			m = priq_getq(cl);
+			if (m != NULL) {
+				IFQ_DEC_LEN(ifq);
+				if (qempty(cl->cl_q))
+					cl->cl_period++;
+				PKTCNTR_ADD(&cl->cl_xmitcnt, m_pktlen(m));
+			}
+			return (m);
+		}
+	}
+	return (NULL);
+}
+
+static int
+priq_addq(struct priq_class *cl, struct mbuf *m)
+{
+
+#ifdef ALTQ_RIO
+	if (q_is_rio(cl->cl_q))
+		return rio_addq((rio_t *)cl->cl_red, cl->cl_q, m,
+				cl->cl_pktattr);
+#endif
+#ifdef ALTQ_RED
+	if (q_is_red(cl->cl_q))
+		return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr);
+#endif
+#ifdef ALTQ_CODEL
+	if (q_is_codel(cl->cl_q))
+		return codel_addq(cl->cl_codel, cl->cl_q, m);
+#endif
+	if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) {
+		m_freem(m);
+		return (-1);
+	}
+
+	if (cl->cl_flags & PRCF_CLEARDSCP)
+		write_dsfield(m, cl->cl_pktattr, 0);
+
+	_addq(cl->cl_q, m);
+
+	return (0);
+}
+
+static struct mbuf *
+priq_getq(struct priq_class *cl)
+{
+#ifdef ALTQ_RIO
+	if (q_is_rio(cl->cl_q))
+		return rio_getq((rio_t *)cl->cl_red, cl->cl_q);
+#endif
+#ifdef ALTQ_RED
+	if (q_is_red(cl->cl_q))
+		return red_getq(cl->cl_red, cl->cl_q);
+#endif
+#ifdef ALTQ_CODEL
+	if (q_is_codel(cl->cl_q))
+		return codel_getq(cl->cl_codel, cl->cl_q);
+#endif
+	return _getq(cl->cl_q);
+}
+
+static struct mbuf *
+priq_pollq(cl)
+	struct priq_class *cl;
+{
+	return qhead(cl->cl_q);
+}
+
+static void
+priq_purgeq(struct priq_class *cl)
+{
+	struct mbuf *m;
+
+	if (qempty(cl->cl_q))
+		return;
+
+	while ((m = _getq(cl->cl_q)) != NULL) {
+		PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m));
+		m_freem(m);
+	}
+	ASSERT(qlen(cl->cl_q) == 0);
+}
+
+static void
+get_class_stats(struct priq_classstats *sp, struct priq_class *cl)
+{
+	sp->class_handle = cl->cl_handle;
+	sp->qlength = qlen(cl->cl_q);
+	sp->qlimit = qlimit(cl->cl_q);
+	sp->period = cl->cl_period;
+	sp->xmitcnt = cl->cl_xmitcnt;
+	sp->dropcnt = cl->cl_dropcnt;
+
+	sp->qtype = qtype(cl->cl_q);
+#ifdef ALTQ_RED
+	if (q_is_red(cl->cl_q))
+		red_getstats(cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+	if (q_is_rio(cl->cl_q))
+		rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_CODEL
+	if (q_is_codel(cl->cl_q))
+		codel_getstats(cl->cl_codel, &sp->codel);
+#endif
+}
+
+/* convert a class handle to the corresponding class pointer */
+static struct priq_class *
+clh_to_clp(struct priq_if *pif, u_int32_t chandle)
+{
+	struct priq_class *cl;
+	int idx;
+
+	if (chandle == 0)
+		return (NULL);
+
+	for (idx = pif->pif_maxpri; idx >= 0; idx--)
+		if ((cl = pif->pif_classes[idx]) != NULL &&
+		    cl->cl_handle == chandle)
+			return (cl);
+
+	return (NULL);
+}
+
+
+#ifdef ALTQ3_COMPAT
+
+static struct priq_if *
+priq_attach(ifq, bandwidth)
+	struct ifaltq *ifq;
+	u_int bandwidth;
+{
+	struct priq_if *pif;
+
+	pif = malloc(sizeof(struct priq_if),
+	       M_DEVBUF, M_WAITOK);
+	if (pif == NULL)
+		return (NULL);
+	bzero(pif, sizeof(struct priq_if));
+	pif->pif_bandwidth = bandwidth;
+	pif->pif_maxpri = -1;
+	pif->pif_ifq = ifq;
+
+	/* add this state to the priq list */
+	pif->pif_next = pif_list;
+	pif_list = pif;
+
+	return (pif);
+}
+
+static int
+priq_detach(pif)
+	struct priq_if *pif;
+{
+	(void)priq_clear_interface(pif);
+
+	/* remove this interface from the pif list */
+	if (pif_list == pif)
+		pif_list = pif->pif_next;
+	else {
+		struct priq_if *p;
+
+		for (p = pif_list; p != NULL; p = p->pif_next)
+			if (p->pif_next == pif) {
+				p->pif_next = pif->pif_next;
+				break;
+			}
+		ASSERT(p != NULL);
+	}
+
+	free(pif, M_DEVBUF);
+	return (0);
+}
+
+/*
+ * priq device interface
+ */
+int
+priqopen(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	/* everything will be done when the queueing scheme is attached. */
+	return 0;
+}
+
+int
+priqclose(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	struct priq_if *pif;
+	int err, error = 0;
+
+	while ((pif = pif_list) != NULL) {
+		/* destroy all */
+		if (ALTQ_IS_ENABLED(pif->pif_ifq))
+			altq_disable(pif->pif_ifq);
+
+		err = altq_detach(pif->pif_ifq);
+		if (err == 0)
+			err = priq_detach(pif);
+		if (err != 0 && error == 0)
+			error = err;
+	}
+
+	return error;
+}
+
+int
+priqioctl(dev, cmd, addr, flag, p)
+	dev_t dev;
+	ioctlcmd_t cmd;
+	caddr_t addr;
+	int flag;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	struct priq_if *pif;
+	struct priq_interface *ifacep;
+	int	error = 0;
+
+	/* check super-user privilege */
+	switch (cmd) {
+	case PRIQ_GETSTATS:
+		break;
+	default:
+#if (__FreeBSD_version > 700000)
+		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
+			return (error);
+#elsif (__FreeBSD_version > 400000)
+		if ((error = suser(p)) != 0)
+			return (error);
+#else
+		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+			return (error);
+#endif
+		break;
+	}
+
+	switch (cmd) {
+
+	case PRIQ_IF_ATTACH:
+		error = priqcmd_if_attach((struct priq_interface *)addr);
+		break;
+
+	case PRIQ_IF_DETACH:
+		error = priqcmd_if_detach((struct priq_interface *)addr);
+		break;
+
+	case PRIQ_ENABLE:
+	case PRIQ_DISABLE:
+	case PRIQ_CLEAR:
+		ifacep = (struct priq_interface *)addr;
+		if ((pif = altq_lookup(ifacep->ifname,
+				       ALTQT_PRIQ)) == NULL) {
+			error = EBADF;
+			break;
+		}
+
+		switch (cmd) {
+		case PRIQ_ENABLE:
+			if (pif->pif_default == NULL) {
+#ifdef ALTQ_DEBUG
+				printf("priq: no default class\n");
+#endif
+				error = EINVAL;
+				break;
+			}
+			error = altq_enable(pif->pif_ifq);
+			break;
+
+		case PRIQ_DISABLE:
+			error = altq_disable(pif->pif_ifq);
+			break;
+
+		case PRIQ_CLEAR:
+			priq_clear_interface(pif);
+			break;
+		}
+		break;
+
+	case PRIQ_ADD_CLASS:
+		error = priqcmd_add_class((struct priq_add_class *)addr);
+		break;
+
+	case PRIQ_DEL_CLASS:
+		error = priqcmd_delete_class((struct priq_delete_class *)addr);
+		break;
+
+	case PRIQ_MOD_CLASS:
+		error = priqcmd_modify_class((struct priq_modify_class *)addr);
+		break;
+
+	case PRIQ_ADD_FILTER:
+		error = priqcmd_add_filter((struct priq_add_filter *)addr);
+		break;
+
+	case PRIQ_DEL_FILTER:
+		error = priqcmd_delete_filter((struct priq_delete_filter *)addr);
+		break;
+
+	case PRIQ_GETSTATS:
+		error = priqcmd_class_stats((struct priq_class_stats *)addr);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+	return error;
+}
+
+static int
+priqcmd_if_attach(ap)
+	struct priq_interface *ap;
+{
+	struct priq_if *pif;
+	struct ifnet *ifp;
+	int error;
+
+	if ((ifp = ifunit(ap->ifname)) == NULL)
+		return (ENXIO);
+
+	if ((pif = priq_attach(&ifp->if_snd, ap->arg)) == NULL)
+		return (ENOMEM);
+
+	/*
+	 * set PRIQ to this ifnet structure.
+	 */
+	if ((error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, pif,
+				 priq_enqueue, priq_dequeue, priq_request,
+				 &pif->pif_classifier, acc_classify)) != 0)
+		(void)priq_detach(pif);
+
+	return (error);
+}
+
+static int
+priqcmd_if_detach(ap)
+	struct priq_interface *ap;
+{
+	struct priq_if *pif;
+	int error;
+
+	if ((pif = altq_lookup(ap->ifname, ALTQT_PRIQ)) == NULL)
+		return (EBADF);
+
+	if (ALTQ_IS_ENABLED(pif->pif_ifq))
+		altq_disable(pif->pif_ifq);
+
+	if ((error = altq_detach(pif->pif_ifq)))
+		return (error);
+
+	return priq_detach(pif);
+}
+
+static int
+priqcmd_add_class(ap)
+	struct priq_add_class *ap;
+{
+	struct priq_if *pif;
+	struct priq_class *cl;
+	int qid;
+
+	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+		return (EBADF);
+
+	if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI)
+		return (EINVAL);
+	if (pif->pif_classes[ap->pri] != NULL)
+		return (EBUSY);
+
+	qid = ap->pri + 1;
+	if ((cl = priq_class_create(pif, ap->pri,
+	    ap->qlimit, ap->flags, qid)) == NULL)
+		return (ENOMEM);
+
+	/* return a class handle to the user */
+	ap->class_handle = cl->cl_handle;
+
+	return (0);
+}
+
+static int
+priqcmd_delete_class(ap)
+	struct priq_delete_class *ap;
+{
+	struct priq_if *pif;
+	struct priq_class *cl;
+
+	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+		return (EBADF);
+
+	if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
+		return (EINVAL);
+
+	return priq_class_destroy(cl);
+}
+
+static int
+priqcmd_modify_class(ap)
+	struct priq_modify_class *ap;
+{
+	struct priq_if *pif;
+	struct priq_class *cl;
+
+	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+		return (EBADF);
+
+	if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI)
+		return (EINVAL);
+
+	if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
+		return (EINVAL);
+
+	/*
+	 * if priority is changed, move the class to the new priority
+	 */
+	if (pif->pif_classes[ap->pri] != cl) {
+		if (pif->pif_classes[ap->pri] != NULL)
+			return (EEXIST);
+		pif->pif_classes[cl->cl_pri] = NULL;
+		pif->pif_classes[ap->pri] = cl;
+		cl->cl_pri = ap->pri;
+	}
+
+	/* call priq_class_create to change class parameters */
+	if ((cl = priq_class_create(pif, ap->pri,
+	    ap->qlimit, ap->flags, ap->class_handle)) == NULL)
+		return (ENOMEM);
+	return 0;
+}
+
+static int
+priqcmd_add_filter(ap)
+	struct priq_add_filter *ap;
+{
+	struct priq_if *pif;
+	struct priq_class *cl;
+
+	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+		return (EBADF);
+
+	if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
+		return (EINVAL);
+
+	return acc_add_filter(&pif->pif_classifier, &ap->filter,
+			      cl, &ap->filter_handle);
+}
+
+static int
+priqcmd_delete_filter(ap)
+	struct priq_delete_filter *ap;
+{
+	struct priq_if *pif;
+
+	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+		return (EBADF);
+
+	return acc_delete_filter(&pif->pif_classifier,
+				 ap->filter_handle);
+}
+
+static int
+priqcmd_class_stats(ap)
+	struct priq_class_stats *ap;
+{
+	struct priq_if *pif;
+	struct priq_class *cl;
+	struct priq_classstats stats, *usp;
+	int	pri, error;
+
+	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+		return (EBADF);
+
+	ap->maxpri = pif->pif_maxpri;
+
+	/* then, read the next N classes in the tree */
+	usp = ap->stats;
+	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
+		cl = pif->pif_classes[pri];
+		if (cl != NULL)
+			get_class_stats(&stats, cl);
+		else
+			bzero(&stats, sizeof(stats));
+		if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
+				     sizeof(stats))) != 0)
+			return (error);
+	}
+	return (0);
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw priq_sw =
+	{"priq", priqopen, priqclose, priqioctl};
+
+ALTQ_MODULE(altq_priq, ALTQT_PRIQ, &priq_sw);
+MODULE_DEPEND(altq_priq, altq_red, 1, 1, 1);
+MODULE_DEPEND(altq_priq, altq_rio, 1, 1, 1);
+
+#endif /* KLD_MODULE */
+
+#endif /* ALTQ3_COMPAT */
+#endif /* ALTQ_PRIQ */
diff --git a/freebsd/sys/net/altq/altq_priq.h b/freebsd/sys/net/altq/altq_priq.h
new file mode 100644
index 00000000..fcbfee98
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_priq.h
@@ -0,0 +1,180 @@
+/*-
+ * Copyright (C) 2000-2003
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_priq.h,v 1.7 2003/10/03 05:05:15 kjc Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_PRIQ_H_
+#define	_ALTQ_ALTQ_PRIQ_H_
+
+#include <net/altq/altq.h>
+#include <net/altq/altq_classq.h>
+#include <net/altq/altq_codel.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define	PRIQ_MAXPRI	16	/* upper limit of the number of priorities */
+
+#ifdef ALTQ3_COMPAT
+struct priq_interface {
+	char	ifname[IFNAMSIZ];	/* interface name (e.g., fxp0) */
+	u_long	arg;			/* request-specific argument */
+};
+
+struct priq_add_class {
+	struct priq_interface	iface;
+	int			pri;	/* priority (0 is the lowest) */
+	int			qlimit;	/* queue size limit */
+	int			flags;	/* misc flags (see below) */
+
+	u_int32_t		class_handle;  /* return value */
+};
+#endif /* ALTQ3_COMPAT */
+
+/* priq class flags */
+#define	PRCF_RED		0x0001	/* use RED */
+#define	PRCF_ECN		0x0002  /* use RED/ECN */
+#define	PRCF_RIO		0x0004  /* use RIO */
+#define	PRCF_CODEL		0x0008	/* use CoDel */
+#define	PRCF_CLEARDSCP		0x0010  /* clear diffserv codepoint */
+#define	PRCF_DEFAULTCLASS	0x1000	/* default class */
+
+/* special class handles */
+#define	PRIQ_NULLCLASS_HANDLE	0
+
+#ifdef ALTQ3_COMPAT
+struct priq_delete_class {
+	struct priq_interface	iface;
+	u_int32_t		class_handle;
+};
+
+struct priq_modify_class {
+	struct priq_interface	iface;
+	u_int32_t		class_handle;
+	int			pri;
+	int			qlimit;
+	int			flags;
+};
+
+struct priq_add_filter {
+	struct priq_interface	iface;
+	u_int32_t		class_handle;
+	struct flow_filter	filter;
+
+	u_long			filter_handle;  /* return value */
+};
+
+struct priq_delete_filter {
+	struct priq_interface	iface;
+	u_long			filter_handle;
+};
+#endif /* ALTQ3_COMPAT */
+
+struct priq_classstats {
+	u_int32_t		class_handle;
+
+	u_int			qlength;
+	u_int			qlimit;
+	u_int			period;
+	struct pktcntr		xmitcnt;  /* transmitted packet counter */
+	struct pktcntr		dropcnt;  /* dropped packet counter */
+
+	/* codel, red and rio related info */
+	int			qtype;
+	struct redstats		red[3];	/* rio has 3 red stats */
+	struct codel_stats	codel;
+};
+
+#ifdef ALTQ3_COMPAT
+struct priq_class_stats {
+	struct priq_interface	iface;
+	int			maxpri;	  /* in/out */
+
+	struct priq_classstats	*stats;   /* pointer to stats array */
+};
+
+#define	PRIQ_IF_ATTACH		_IOW('Q', 1, struct priq_interface)
+#define	PRIQ_IF_DETACH		_IOW('Q', 2, struct priq_interface)
+#define	PRIQ_ENABLE		_IOW('Q', 3, struct priq_interface)
+#define	PRIQ_DISABLE		_IOW('Q', 4, struct priq_interface)
+#define	PRIQ_CLEAR		_IOW('Q', 5, struct priq_interface)
+#define	PRIQ_ADD_CLASS		_IOWR('Q', 7, struct priq_add_class)
+#define	PRIQ_DEL_CLASS		_IOW('Q', 8, struct priq_delete_class)
+#define	PRIQ_MOD_CLASS		_IOW('Q', 9, struct priq_modify_class)
+#define	PRIQ_ADD_FILTER		_IOWR('Q', 10, struct priq_add_filter)
+#define	PRIQ_DEL_FILTER		_IOW('Q', 11, struct priq_delete_filter)
+#define	PRIQ_GETSTATS		_IOWR('Q', 12, struct priq_class_stats)
+
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+
+struct priq_class {
+	u_int32_t	cl_handle;	/* class handle */
+	class_queue_t	*cl_q;		/* class queue structure */
+	union {
+		struct red	*cl_red;	/* RED state */
+		struct codel	*cl_codel;	/* CoDel state */
+	} cl_aqm;
+#define	cl_red		cl_aqm.cl_red
+#define	cl_codel	cl_aqm.cl_codel
+	int		cl_pri;		/* priority */
+	int		cl_flags;	/* class flags */
+	struct priq_if	*cl_pif;	/* back pointer to pif */
+	struct altq_pktattr *cl_pktattr; /* saved header used by ECN */
+
+	/* statistics */
+	u_int		cl_period;	/* backlog period */
+	struct pktcntr  cl_xmitcnt;	/* transmitted packet counter */
+	struct pktcntr  cl_dropcnt;	/* dropped packet counter */
+};
+
+/*
+ * priq interface state
+ */
+struct priq_if {
+	struct priq_if		*pif_next;	/* interface state list */
+	struct ifaltq		*pif_ifq;	/* backpointer to ifaltq */
+	u_int			pif_bandwidth;	/* link bandwidth in bps */
+	int			pif_maxpri;	/* max priority in use */
+	struct priq_class	*pif_default;	/* default class */
+	struct priq_class	*pif_classes[PRIQ_MAXPRI]; /* classes */
+#ifdef ALTQ3_CLFIER_COMPAT
+	struct acc_classifier	pif_classifier;	/* classifier */
+#endif
+};
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_PRIQ_H_ */
diff --git a/freebsd/sys/net/altq/altq_red.c b/freebsd/sys/net/altq/altq_red.c
new file mode 100644
index 00000000..f83b7b50
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_red.c
@@ -0,0 +1,1494 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (C) 1997-2003
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+/*-
+ * Copyright (c) 1990-1994 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the Computer Systems
+ *	Engineering Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ *    to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $
+ * $FreeBSD$	
+ */
+
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#ifdef ALTQ_RED	/* red is enabled by ALTQ_RED option in opt_altq.h */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <rtems/bsd/sys/errno.h>
+#if 1 /* ALTQ3_COMPAT */
+#include <sys/sockio.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#ifdef ALTQ_FLOWVALVE
+#include <sys/queue.h>
+#include <sys/time.h>
+#endif
+#endif /* ALTQ3_COMPAT */
+
+#include <net/if.h>
+#include <net/if_var.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_red.h>
+#ifdef ALTQ3_COMPAT
+#include <net/altq/altq_conf.h>
+#ifdef ALTQ_FLOWVALVE
+#include <net/altq/altq_flowvalve.h>
+#endif
+#endif
+
+/*
+ * ALTQ/RED (Random Early Detection) implementation using 32-bit
+ * fixed-point calculation.
+ *
+ * written by kjc using the ns code as a reference.
+ * you can learn more about red and ns from Sally's home page at
+ * http://www-nrg.ee.lbl.gov/floyd/
+ *
+ * most of the red parameter values are fixed in this implementation
+ * to prevent fixed-point overflow/underflow.
+ * if you change the parameters, watch out for overflow/underflow!
+ *
+ * the parameters used are recommended values by Sally.
+ * the corresponding ns config looks:
+ *	q_weight=0.00195
+ *	minthresh=5 maxthresh=15 queue-size=60
+ *	linterm=30
+ *	dropmech=drop-tail
+ *	bytes=false (can't be handled by 32-bit fixed-point)
+ *	doubleq=false dqthresh=false
+ *	wait=true
+ */
+/*
+ * alternative red parameters for a slow link.
+ *
+ * assume the queue length becomes from zero to L and keeps L, it takes
+ * N packets for q_avg to reach 63% of L.
+ * when q_weight is 0.002, N is about 500 packets.
+ * for a slow link like dial-up, 500 packets takes more than 1 minute!
+ * when q_weight is 0.008, N is about 127 packets.
+ * when q_weight is 0.016, N is about 63 packets.
+ * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
+ * are allowed for 0.016.
+ * see Sally's paper for more details.
+ */
+/* normal red parameters */
+#define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
+				/* q_weight = 0.00195 */
+
+/* red parameters for a slow link */
+#define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
+				/* q_weight = 0.0078125 */
+
+/* red parameters for a very slow link (e.g., dialup) */
+#define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
+				/* q_weight = 0.015625 */
+
+/* fixed-point uses 12-bit decimal places */
+#define	FP_SHIFT	12	/* fixed-point shift */
+
+/* red parameters for drop probability */
+#define	INV_P_MAX	10	/* inverse of max drop probability */
+#define	TH_MIN		5	/* min threshold */
+#define	TH_MAX		15	/* max threshold */
+
+#define	RED_LIMIT	60	/* default max queue length */
+#define	RED_STATS		/* collect statistics */
+
+/*
+ * our default policy for forced-drop is drop-tail.
+ * (in altq-1.1.2 or earlier, the default was random-drop.
+ * but it makes more sense to punish the cause of the surge.)
+ * to switch to the random-drop policy, define "RED_RANDOM_DROP".
+ */
+
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_FLOWVALVE
+/*
+ * flow-valve is an extension to protect red from unresponsive flows
+ * and to promote end-to-end congestion control.
+ * flow-valve observes the average drop rates of the flows that have
+ * experienced packet drops in the recent past.
+ * when the average drop rate exceeds the threshold, the flow is
+ * blocked by the flow-valve.  the trapped flow should back off
+ * exponentially to escape from the flow-valve.
+ */
+#ifdef RED_RANDOM_DROP
+#error "random-drop can't be used with flow-valve!"
+#endif
+#endif /* ALTQ_FLOWVALVE */
+
+/* red_list keeps all red_queue_t's allocated. */
+static red_queue_t *red_list = NULL;
+
+#endif /* ALTQ3_COMPAT */
+
+/* default red parameter values */
+static int default_th_min = TH_MIN;
+static int default_th_max = TH_MAX;
+static int default_inv_pmax = INV_P_MAX;
+
+#ifdef ALTQ3_COMPAT
+/* internal function prototypes */
+static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+static struct mbuf *red_dequeue(struct ifaltq *, int);
+static int red_request(struct ifaltq *, int, void *);
+static void red_purgeq(red_queue_t *);
+static int red_detach(red_queue_t *);
+#ifdef ALTQ_FLOWVALVE
+static __inline struct fve *flowlist_lookup(struct flowvalve *,
+			 struct altq_pktattr *, struct timeval *);
+static __inline struct fve *flowlist_reclaim(struct flowvalve *,
+					     struct altq_pktattr *);
+static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
+static __inline int fv_p2f(struct flowvalve *, int);
+#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
+static struct flowvalve *fv_alloc(struct red *);
+#endif
+static void fv_destroy(struct flowvalve *);
+static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
+			struct fve **);
+static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
+			 struct fve *);
+#endif
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * red support routines
+ */
+red_t *
+red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
+   int pkttime)
+{
+	red_t	*rp;
+	int	 w, i;
+	int	 npkts_per_sec;
+
+	rp = malloc(sizeof(red_t), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (rp == NULL)
+		return (NULL);
+
+	if (weight == 0)
+		rp->red_weight = W_WEIGHT;
+	else
+		rp->red_weight = weight;
+
+	/* allocate weight table */
+	rp->red_wtab = wtab_alloc(rp->red_weight);
+	if (rp->red_wtab == NULL) {
+		free(rp, M_DEVBUF);
+		return (NULL);
+	}
+
+	rp->red_avg = 0;
+	rp->red_idle = 1;
+
+	if (inv_pmax == 0)
+		rp->red_inv_pmax = default_inv_pmax;
+	else
+		rp->red_inv_pmax = inv_pmax;
+	if (th_min == 0)
+		rp->red_thmin = default_th_min;
+	else
+		rp->red_thmin = th_min;
+	if (th_max == 0)
+		rp->red_thmax = default_th_max;
+	else
+		rp->red_thmax = th_max;
+
+	rp->red_flags = flags;
+
+	if (pkttime == 0)
+		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
+		rp->red_pkttime = 800;
+	else
+		rp->red_pkttime = pkttime;
+
+	if (weight == 0) {
+		/* when the link is very slow, adjust red parameters */
+		npkts_per_sec = 1000000 / rp->red_pkttime;
+		if (npkts_per_sec < 50) {
+			/* up to about 400Kbps */
+			rp->red_weight = W_WEIGHT_2;
+		} else if (npkts_per_sec < 300) {
+			/* up to about 2.4Mbps */
+			rp->red_weight = W_WEIGHT_1;
+		}
+	}
+
+	/* calculate wshift.  weight must be power of 2 */
+	w = rp->red_weight;
+	for (i = 0; w > 1; i++)
+		w = w >> 1;
+	rp->red_wshift = i;
+	w = 1 << rp->red_wshift;
+	if (w != rp->red_weight) {
+		printf("invalid weight value %d for red! use %d\n",
+		       rp->red_weight, w);
+		rp->red_weight = w;
+	}
+
+	/*
+	 * thmin_s and thmax_s are scaled versions of th_min and th_max
+	 * to be compared with avg.
+	 */
+	rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
+	rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
+
+	/*
+	 * precompute probability denominator
+	 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
+	 */
+	rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
+			 * rp->red_inv_pmax) << FP_SHIFT;
+
+	microtime(&rp->red_last);
+	return (rp);
+}
+
+void
+red_destroy(red_t *rp)
+{
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_FLOWVALVE
+	if (rp->red_flowvalve != NULL)
+		fv_destroy(rp->red_flowvalve);
+#endif
+#endif /* ALTQ3_COMPAT */
+	wtab_destroy(rp->red_wtab);
+	free(rp, M_DEVBUF);
+}
+
+void
+red_getstats(red_t *rp, struct redstats *sp)
+{
+	sp->q_avg		= rp->red_avg >> rp->red_wshift;
+	sp->xmit_cnt		= rp->red_stats.xmit_cnt;
+	sp->drop_cnt		= rp->red_stats.drop_cnt;
+	sp->drop_forced		= rp->red_stats.drop_forced;
+	sp->drop_unforced	= rp->red_stats.drop_unforced;
+	sp->marked_packets	= rp->red_stats.marked_packets;
+}
+
+int
+red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
+    struct altq_pktattr *pktattr)
+{
+	int avg, droptype;
+	int n;
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_FLOWVALVE
+	struct fve *fve = NULL;
+
+	if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
+		if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
+			m_freem(m);
+			return (-1);
+		}
+#endif
+#endif /* ALTQ3_COMPAT */
+
+	avg = rp->red_avg;
+
+	/*
+	 * if we were idle, we pretend that n packets arrived during
+	 * the idle period.
+	 */
+	if (rp->red_idle) {
+		struct timeval now;
+		int t;
+
+		rp->red_idle = 0;
+		microtime(&now);
+		t = (now.tv_sec - rp->red_last.tv_sec);
+		if (t > 60) {
+			/*
+			 * being idle for more than 1 minute, set avg to zero.
+			 * this prevents t from overflow.
+			 */
+			avg = 0;
+		} else {
+			t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
+			n = t / rp->red_pkttime - 1;
+
+			/* the following line does (avg = (1 - Wq)^n * avg) */
+			if (n > 0)
+				avg = (avg >> FP_SHIFT) *
+				    pow_w(rp->red_wtab, n);
+		}
+	}
+
+	/* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
+	avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
+	rp->red_avg = avg;		/* save the new value */
+
+	/*
+	 * red_count keeps a tally of arriving traffic that has not
+	 * been dropped.
+	 */
+	rp->red_count++;
+
+	/* see if we drop early */
+	droptype = DTYPE_NODROP;
+	if (avg >= rp->red_thmin_s && qlen(q) > 1) {
+		if (avg >= rp->red_thmax_s) {
+			/* avg >= th_max: forced drop */
+			droptype = DTYPE_FORCED;
+		} else if (rp->red_old == 0) {
+			/* first exceeds th_min */
+			rp->red_count = 1;
+			rp->red_old = 1;
+		} else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
+				      rp->red_probd, rp->red_count)) {
+			/* mark or drop by red */
+			if ((rp->red_flags & REDF_ECN) &&
+			    mark_ecn(m, pktattr, rp->red_flags)) {
+				/* successfully marked.  do not drop. */
+				rp->red_count = 0;
+#ifdef RED_STATS
+				rp->red_stats.marked_packets++;
+#endif
+			} else {
+				/* unforced drop by red */
+				droptype = DTYPE_EARLY;
+			}
+		}
+	} else {
+		/* avg < th_min */
+		rp->red_old = 0;
+	}
+
+	/*
+	 * if the queue length hits the hard limit, it's a forced drop.
+	 */
+	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
+		droptype = DTYPE_FORCED;
+
+#ifdef RED_RANDOM_DROP
+	/* if successful or forced drop, enqueue this packet. */
+	if (droptype != DTYPE_EARLY)
+		_addq(q, m);
+#else
+	/* if successful, enqueue this packet. */
+	if (droptype == DTYPE_NODROP)
+		_addq(q, m);
+#endif
+	if (droptype != DTYPE_NODROP) {
+		if (droptype == DTYPE_EARLY) {
+			/* drop the incoming packet */
+#ifdef RED_STATS
+			rp->red_stats.drop_unforced++;
+#endif
+		} else {
+			/* forced drop, select a victim packet in the queue. */
+#ifdef RED_RANDOM_DROP
+			m = _getq_random(q);
+#endif
+#ifdef RED_STATS
+			rp->red_stats.drop_forced++;
+#endif
+		}
+#ifdef RED_STATS
+		PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
+#endif
+		rp->red_count = 0;
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_FLOWVALVE
+		if (rp->red_flowvalve != NULL)
+			fv_dropbyred(rp->red_flowvalve, pktattr, fve);
+#endif
+#endif /* ALTQ3_COMPAT */
+		m_freem(m);
+		return (-1);
+	}
+	/* successfully queued */
+#ifdef RED_STATS
+	PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
+#endif
+	return (0);
+}
+
+/*
+ * early-drop probability is calculated as follows:
+ *   prob = p_max * (avg - th_min) / (th_max - th_min)
+ *   prob_a = prob / (2 - count*prob)
+ *	    = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
+ * here prob_a increases as successive undrop count increases.
+ * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
+ * becomes 1 when (count >= (2 / prob))).
+ */
+int
+drop_early(int fp_len, int fp_probd, int count)
+{
+	int	d;		/* denominator of drop-probability */
+
+	d = fp_probd - count * fp_len;
+	if (d <= 0)
+		/* count exceeds the hard limit: drop or mark */
+		return (1);
+
+	/*
+	 * now the range of d is [1..600] in fixed-point. (when
+	 * th_max-th_min=10 and p_max=1/30)
+	 * drop probability = (avg - TH_MIN) / d
+	 */
+
+	if ((arc4random() % d) < fp_len) {
+		/* drop or mark */
+		return (1);
+	}
+	/* no drop/mark */
+	return (0);
+}
+
+/*
+ * try to mark CE bit to the packet.
+ *    returns 1 if successfully marked, 0 otherwise.
+ */
+int
+mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
+{
+	struct mbuf	*m0;
+	struct pf_mtag	*at;
+	void		*hdr;
+
+	at = pf_find_mtag(m);
+	if (at != NULL) {
+		hdr = at->hdr;
+#ifdef ALTQ3_COMPAT
+	} else if (pktattr != NULL) {
+		af = pktattr->pattr_af;
+		hdr = pktattr->pattr_hdr;
+#endif /* ALTQ3_COMPAT */
+	} else
+		return (0);
+
+	/* verify that pattr_hdr is within the mbuf data */
+	for (m0 = m; m0 != NULL; m0 = m0->m_next)
+		if (((caddr_t)hdr >= m0->m_data) &&
+		    ((caddr_t)hdr < m0->m_data + m0->m_len))
+			break;
+	if (m0 == NULL) {
+		/* ick, tag info is stale */
+		return (0);
+	}
+
+	switch (((struct ip *)hdr)->ip_v) {
+	case IPVERSION:
+		if (flags & REDF_ECN4) {
+			struct ip *ip = hdr;
+			u_int8_t otos;
+			int sum;
+
+			if (ip->ip_v != 4)
+				return (0);	/* version mismatch! */
+
+			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
+				return (0);	/* not-ECT */
+			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+				return (1);	/* already marked */
+
+			/*
+			 * ecn-capable but not marked,
+			 * mark CE and update checksum
+			 */
+			otos = ip->ip_tos;
+			ip->ip_tos |= IPTOS_ECN_CE;
+			/*
+			 * update checksum (from RFC1624)
+			 *	   HC' = ~(~HC + ~m + m')
+			 */
+			sum = ~ntohs(ip->ip_sum) & 0xffff;
+			sum += (~otos & 0xffff) + ip->ip_tos;
+			sum = (sum >> 16) + (sum & 0xffff);
+			sum += (sum >> 16);  /* add carry */
+			ip->ip_sum = htons(~sum & 0xffff);
+			return (1);
+		}
+		break;
+#ifdef INET6
+	case (IPV6_VERSION >> 4):
+		if (flags & REDF_ECN6) {
+			struct ip6_hdr *ip6 = hdr;
+			u_int32_t flowlabel;
+
+			flowlabel = ntohl(ip6->ip6_flow);
+			if ((flowlabel >> 28) != 6)
+				return (0);	/* version mismatch! */
+			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
+			    (IPTOS_ECN_NOTECT << 20))
+				return (0);	/* not-ECT */
+			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
+			    (IPTOS_ECN_CE << 20))
+				return (1);	/* already marked */
+			/*
+			 * ecn-capable but not marked,  mark CE
+			 */
+			flowlabel |= (IPTOS_ECN_CE << 20);
+			ip6->ip6_flow = htonl(flowlabel);
+			return (1);
+		}
+		break;
+#endif  /* INET6 */
+	}
+
+	/* not marked */
+	return (0);
+}
+
+struct mbuf *
+red_getq(rp, q)
+	red_t *rp;
+	class_queue_t *q;
+{
+	struct mbuf *m;
+
+	if ((m = _getq(q)) == NULL) {
+		if (rp->red_idle == 0) {
+			rp->red_idle = 1;
+			microtime(&rp->red_last);
+		}
+		return NULL;
+	}
+
+	rp->red_idle = 0;
+	return (m);
+}
+
+/*
+ * helper routine to calibrate avg during idle.
+ * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
+ * here Wq = 1/weight and the code assumes Wq is close to zero.
+ *
+ * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
+ */
+static struct wtab *wtab_list = NULL;	/* pointer to wtab list */
+
+struct wtab *
+wtab_alloc(int weight)
+{
+	struct wtab	*w;
+	int		 i;
+
+	for (w = wtab_list; w != NULL; w = w->w_next)
+		if (w->w_weight == weight) {
+			w->w_refcount++;
+			return (w);
+		}
+
+	w = malloc(sizeof(struct wtab), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (w == NULL)
+		return (NULL);
+	w->w_weight = weight;
+	w->w_refcount = 1;
+	w->w_next = wtab_list;
+	wtab_list = w;
+
+	/* initialize the weight table */
+	w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
+	for (i = 1; i < 32; i++) {
+		w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
+		if (w->w_tab[i] == 0 && w->w_param_max == 0)
+			w->w_param_max = 1 << i;
+	}
+
+	return (w);
+}
+
+int
+wtab_destroy(struct wtab *w)
+{
+	struct wtab	*prev;
+
+	if (--w->w_refcount > 0)
+		return (0);
+
+	if (wtab_list == w)
+		wtab_list = w->w_next;
+	else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
+		if (prev->w_next == w) {
+			prev->w_next = w->w_next;
+			break;
+		}
+
+	free(w, M_DEVBUF);
+	return (0);
+}
+
+int32_t
+pow_w(struct wtab *w, int n)
+{
+	int	i, bit;
+	int32_t	val;
+
+	if (n >= w->w_param_max)
+		return (0);
+
+	val = 1 << FP_SHIFT;
+	if (n <= 0)
+		return (val);
+
+	bit = 1;
+	i = 0;
+	while (n) {
+		if (n & bit) {
+			val = (val * w->w_tab[i]) >> FP_SHIFT;
+			n &= ~bit;
+		}
+		i++;
+		bit <<=  1;
+	}
+	return (val);
+}
+
+#ifdef ALTQ3_COMPAT
+/*
+ * red device interface
+ */
+altqdev_decl(red);
+
+int
+redopen(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	/* everything will be done when the queueing scheme is attached. */
+	return 0;
+}
+
+int
+redclose(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	red_queue_t *rqp;
+	int err, error = 0;
+
+	while ((rqp = red_list) != NULL) {
+		/* destroy all */
+		err = red_detach(rqp);
+		if (err != 0 && error == 0)
+			error = err;
+	}
+
+	return error;
+}
+
+int
+redioctl(dev, cmd, addr, flag, p)
+	dev_t dev;
+	ioctlcmd_t cmd;
+	caddr_t addr;
+	int flag;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	red_queue_t *rqp;
+	struct red_interface *ifacep;
+	struct ifnet *ifp;
+	int	error = 0;
+
+	/* check super-user privilege */
+	switch (cmd) {
+	case RED_GETSTATS:
+		break;
+	default:
+#if (__FreeBSD_version > 700000)
+		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
+#elsif (__FreeBSD_version > 400000)
+		if ((error = suser(p)) != 0)
+#else
+		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+#endif
+			return (error);
+		break;
+	}
+
+	switch (cmd) {
+
+	case RED_ENABLE:
+		ifacep = (struct red_interface *)addr;
+		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
+			error = EBADF;
+			break;
+		}
+		error = altq_enable(rqp->rq_ifq);
+		break;
+
+	case RED_DISABLE:
+		ifacep = (struct red_interface *)addr;
+		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
+			error = EBADF;
+			break;
+		}
+		error = altq_disable(rqp->rq_ifq);
+		break;
+
+	case RED_IF_ATTACH:
+		ifp = ifunit(((struct red_interface *)addr)->red_ifname);
+		if (ifp == NULL) {
+			error = ENXIO;
+			break;
+		}
+
+		/* allocate and initialize red_queue_t */
+		rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
+		if (rqp == NULL) {
+			error = ENOMEM;
+			break;
+		}
+		bzero(rqp, sizeof(red_queue_t));
+
+		rqp->rq_q = malloc(sizeof(class_queue_t),
+		       M_DEVBUF, M_WAITOK);
+		if (rqp->rq_q == NULL) {
+			free(rqp, M_DEVBUF);
+			error = ENOMEM;
+			break;
+		}
+		bzero(rqp->rq_q, sizeof(class_queue_t));
+
+		rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
+		if (rqp->rq_red == NULL) {
+			free(rqp->rq_q, M_DEVBUF);
+			free(rqp, M_DEVBUF);
+			error = ENOMEM;
+			break;
+		}
+
+		rqp->rq_ifq = &ifp->if_snd;
+		qtail(rqp->rq_q) = NULL;
+		qlen(rqp->rq_q) = 0;
+		qlimit(rqp->rq_q) = RED_LIMIT;
+		qtype(rqp->rq_q) = Q_RED;
+
+		/*
+		 * set RED to this ifnet structure.
+		 */
+		error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
+				    red_enqueue, red_dequeue, red_request,
+				    NULL, NULL);
+		if (error) {
+			red_destroy(rqp->rq_red);
+			free(rqp->rq_q, M_DEVBUF);
+			free(rqp, M_DEVBUF);
+			break;
+		}
+
+		/* add this state to the red list */
+		rqp->rq_next = red_list;
+		red_list = rqp;
+		break;
+
+	case RED_IF_DETACH:
+		ifacep = (struct red_interface *)addr;
+		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
+			error = EBADF;
+			break;
+		}
+		error = red_detach(rqp);
+		break;
+
+	case RED_GETSTATS:
+		do {
+			struct red_stats *q_stats;
+			red_t *rp;
+
+			q_stats = (struct red_stats *)addr;
+			if ((rqp = altq_lookup(q_stats->iface.red_ifname,
+					     ALTQT_RED)) == NULL) {
+				error = EBADF;
+				break;
+			}
+
+			q_stats->q_len 	   = qlen(rqp->rq_q);
+			q_stats->q_limit   = qlimit(rqp->rq_q);
+
+			rp = rqp->rq_red;
+			q_stats->q_avg 	   = rp->red_avg >> rp->red_wshift;
+			q_stats->xmit_cnt  = rp->red_stats.xmit_cnt;
+			q_stats->drop_cnt  = rp->red_stats.drop_cnt;
+			q_stats->drop_forced   = rp->red_stats.drop_forced;
+			q_stats->drop_unforced = rp->red_stats.drop_unforced;
+			q_stats->marked_packets = rp->red_stats.marked_packets;
+
+			q_stats->weight		= rp->red_weight;
+			q_stats->inv_pmax	= rp->red_inv_pmax;
+			q_stats->th_min		= rp->red_thmin;
+			q_stats->th_max		= rp->red_thmax;
+
+#ifdef ALTQ_FLOWVALVE
+			if (rp->red_flowvalve != NULL) {
+				struct flowvalve *fv = rp->red_flowvalve;
+				q_stats->fv_flows    = fv->fv_flows;
+				q_stats->fv_pass     = fv->fv_stats.pass;
+				q_stats->fv_predrop  = fv->fv_stats.predrop;
+				q_stats->fv_alloc    = fv->fv_stats.alloc;
+				q_stats->fv_escape   = fv->fv_stats.escape;
+			} else {
+#endif /* ALTQ_FLOWVALVE */
+				q_stats->fv_flows    = 0;
+				q_stats->fv_pass     = 0;
+				q_stats->fv_predrop  = 0;
+				q_stats->fv_alloc    = 0;
+				q_stats->fv_escape   = 0;
+#ifdef ALTQ_FLOWVALVE
+			}
+#endif /* ALTQ_FLOWVALVE */
+		} while (/*CONSTCOND*/ 0);
+		break;
+
+	case RED_CONFIG:
+		do {
+			struct red_conf *fc;
+			red_t *new;
+			int s, limit;
+
+			fc = (struct red_conf *)addr;
+			if ((rqp = altq_lookup(fc->iface.red_ifname,
+					       ALTQT_RED)) == NULL) {
+				error = EBADF;
+				break;
+			}
+			new = red_alloc(fc->red_weight,
+					fc->red_inv_pmax,
+					fc->red_thmin,
+					fc->red_thmax,
+					fc->red_flags,
+					fc->red_pkttime);
+			if (new == NULL) {
+				error = ENOMEM;
+				break;
+			}
+
+			s = splnet();
+			red_purgeq(rqp);
+			limit = fc->red_limit;
+			if (limit < fc->red_thmax)
+				limit = fc->red_thmax;
+			qlimit(rqp->rq_q) = limit;
+			fc->red_limit = limit;	/* write back the new value */
+
+			red_destroy(rqp->rq_red);
+			rqp->rq_red = new;
+
+			splx(s);
+
+			/* write back new values */
+			fc->red_limit = limit;
+			fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
+			fc->red_thmin = rqp->rq_red->red_thmin;
+			fc->red_thmax = rqp->rq_red->red_thmax;
+
+		} while (/*CONSTCOND*/ 0);
+		break;
+
+	case RED_SETDEFAULTS:
+		do {
+			struct redparams *rp;
+
+			rp = (struct redparams *)addr;
+
+			default_th_min = rp->th_min;
+			default_th_max = rp->th_max;
+			default_inv_pmax = rp->inv_pmax;
+		} while (/*CONSTCOND*/ 0);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+	return error;
+}
+
+static int
+red_detach(rqp)
+	red_queue_t *rqp;
+{
+	red_queue_t *tmp;
+	int error = 0;
+
+	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
+		altq_disable(rqp->rq_ifq);
+
+	if ((error = altq_detach(rqp->rq_ifq)))
+		return (error);
+
+	if (red_list == rqp)
+		red_list = rqp->rq_next;
+	else {
+		for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
+			if (tmp->rq_next == rqp) {
+				tmp->rq_next = rqp->rq_next;
+				break;
+			}
+		if (tmp == NULL)
+			printf("red_detach: no state found in red_list!\n");
+	}
+
+	red_destroy(rqp->rq_red);
+	free(rqp->rq_q, M_DEVBUF);
+	free(rqp, M_DEVBUF);
+	return (error);
+}
+
+/*
+ * enqueue routine:
+ *
+ *	returns: 0 when successfully queued.
+ *		 ENOBUFS when drop occurs.
+ */
+static int
+red_enqueue(ifq, m, pktattr)
+	struct ifaltq *ifq;
+	struct mbuf *m;
+	struct altq_pktattr *pktattr;
+{
+	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
+		return ENOBUFS;
+	ifq->ifq_len++;
+	return 0;
+}
+
+/*
+ * dequeue routine:
+ *	must be called in splimp.
+ *
+ *	returns: mbuf dequeued.
+ *		 NULL when no packet is available in the queue.
+ */
+
+static struct mbuf *
+red_dequeue(ifq, op)
+	struct ifaltq *ifq;
+	int op;
+{
+	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
+	struct mbuf *m;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	if (op == ALTDQ_POLL)
+		return qhead(rqp->rq_q);
+
+	/* op == ALTDQ_REMOVE */
+	m =  red_getq(rqp->rq_red, rqp->rq_q);
+	if (m != NULL)
+		ifq->ifq_len--;
+	return (m);
+}
+
+static int
+red_request(ifq, req, arg)
+	struct ifaltq *ifq;
+	int req;
+	void *arg;
+{
+	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	switch (req) {
+	case ALTRQ_PURGE:
+		red_purgeq(rqp);
+		break;
+	}
+	return (0);
+}
+
+static void
+red_purgeq(rqp)
+	red_queue_t *rqp;
+{
+	_flushq(rqp->rq_q);
+	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
+		rqp->rq_ifq->ifq_len = 0;
+}
+
+#ifdef ALTQ_FLOWVALVE
+
+#define	FV_PSHIFT	7	/* weight of average drop rate -- 1/128 */
+#define	FV_PSCALE(x)	((x) << FV_PSHIFT)
+#define	FV_PUNSCALE(x)	((x) >> FV_PSHIFT)
+#define	FV_FSHIFT	5	/* weight of average fraction -- 1/32 */
+#define	FV_FSCALE(x)	((x) << FV_FSHIFT)
+#define	FV_FUNSCALE(x)	((x) >> FV_FSHIFT)
+
+#define	FV_TIMER	(3 * hz)	/* timer value for garbage collector */
+#define	FV_FLOWLISTSIZE		64	/* how many flows in flowlist */
+
+#define	FV_N			10	/* update fve_f every FV_N packets */
+
+#define	FV_BACKOFFTHRESH	1  /* backoff threshold interval in second */
+#define	FV_TTHRESH		3  /* time threshold to delete fve */
+#define	FV_ALPHA		5  /* extra packet count */
+
+#define	FV_STATS
+
+#if (__FreeBSD_version > 300000)
+#define	FV_TIMESTAMP(tp)	getmicrotime(tp)
+#else
+#define	FV_TIMESTAMP(tp)	{ (*(tp)) = time; }
+#endif
+
+/*
+ * Brtt table: 127 entry table to convert drop rate (p) to
+ * the corresponding bandwidth fraction (f)
+ * the following equation is implemented to use scaled values,
+ * fve_p and fve_f, in the fixed point format.
+ *
+ *   Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
+ *   f = Brtt(p) / (max_th + alpha)
+ */
+#define	BRTT_SIZE	128
+#define	BRTT_SHIFT	12
+#define	BRTT_MASK	0x0007f000
+#define	BRTT_PMAX	(1 << (FV_PSHIFT + FP_SHIFT))
+
+const int brtt_tab[BRTT_SIZE] = {
+	0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
+	392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
+	225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
+	145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
+	98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
+	67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
+	47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
+	33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
+	24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
+	18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
+	14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
+	10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
+	8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
+	6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
+	5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
+	4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
+};
+
+static __inline struct fve *
+flowlist_lookup(fv, pktattr, now)
+	struct flowvalve *fv;
+	struct altq_pktattr *pktattr;
+	struct timeval *now;
+{
+	struct fve *fve;
+	int flows;
+	struct ip *ip;
+#ifdef INET6
+	struct ip6_hdr *ip6;
+#endif
+	struct timeval tthresh;
+
+	if (pktattr == NULL)
+		return (NULL);
+
+	tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
+	flows = 0;
+	/*
+	 * search the flow list
+	 */
+	switch (pktattr->pattr_af) {
+	case AF_INET:
+		ip = (struct ip *)pktattr->pattr_hdr;
+		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
+			if (fve->fve_lastdrop.tv_sec == 0)
+				break;
+			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
+				fve->fve_lastdrop.tv_sec = 0;
+				break;
+			}
+			if (fve->fve_flow.flow_af == AF_INET &&
+			    fve->fve_flow.flow_ip.ip_src.s_addr ==
+			    ip->ip_src.s_addr &&
+			    fve->fve_flow.flow_ip.ip_dst.s_addr ==
+			    ip->ip_dst.s_addr)
+				return (fve);
+			flows++;
+		}
+		break;
+#ifdef INET6
+	case AF_INET6:
+		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
+			if (fve->fve_lastdrop.tv_sec == 0)
+				break;
+			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
+				fve->fve_lastdrop.tv_sec = 0;
+				break;
+			}
+			if (fve->fve_flow.flow_af == AF_INET6 &&
+			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
+					       &ip6->ip6_src) &&
+			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
+					       &ip6->ip6_dst))
+				return (fve);
+			flows++;
+		}
+		break;
+#endif /* INET6 */
+
+	default:
+		/* unknown protocol.  no drop. */
+		return (NULL);
+	}
+	fv->fv_flows = flows;	/* save the number of active fve's */
+	return (NULL);
+}
+
+static __inline struct fve *
+flowlist_reclaim(fv, pktattr)
+	struct flowvalve *fv;
+	struct altq_pktattr *pktattr;
+{
+	struct fve *fve;
+	struct ip *ip;
+#ifdef INET6
+	struct ip6_hdr *ip6;
+#endif
+
+	/*
+	 * get an entry from the tail of the LRU list.
+	 */
+	fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
+
+	switch (pktattr->pattr_af) {
+	case AF_INET:
+		ip = (struct ip *)pktattr->pattr_hdr;
+		fve->fve_flow.flow_af = AF_INET;
+		fve->fve_flow.flow_ip.ip_src = ip->ip_src;
+		fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
+		break;
+#ifdef INET6
+	case AF_INET6:
+		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+		fve->fve_flow.flow_af = AF_INET6;
+		fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
+		fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
+		break;
+#endif
+	}
+
+	fve->fve_state = Green;
+	fve->fve_p = 0.0;
+	fve->fve_f = 0.0;
+	fve->fve_ifseq = fv->fv_ifseq - 1;
+	fve->fve_count = 0;
+
+	fv->fv_flows++;
+#ifdef FV_STATS
+	fv->fv_stats.alloc++;
+#endif
+	return (fve);
+}
+
+static __inline void
+flowlist_move_to_head(fv, fve)
+	struct flowvalve *fv;
+	struct fve *fve;
+{
+	if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
+		TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
+		TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
+	}
+}
+
+#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
+/*
+ * allocate flowvalve structure
+ */
+static struct flowvalve *
+fv_alloc(rp)
+	struct red *rp;
+{
+	struct flowvalve *fv;
+	struct fve *fve;
+	int i, num;
+
+	num = FV_FLOWLISTSIZE;
+	fv = malloc(sizeof(struct flowvalve),
+	       M_DEVBUF, M_WAITOK);
+	if (fv == NULL)
+		return (NULL);
+	bzero(fv, sizeof(struct flowvalve));
+
+	fv->fv_fves = malloc(sizeof(struct fve) * num,
+	       M_DEVBUF, M_WAITOK);
+	if (fv->fv_fves == NULL) {
+		free(fv, M_DEVBUF);
+		return (NULL);
+	}
+	bzero(fv->fv_fves, sizeof(struct fve) * num);
+
+	fv->fv_flows = 0;
+	TAILQ_INIT(&fv->fv_flowlist);
+	for (i = 0; i < num; i++) {
+		fve = &fv->fv_fves[i];
+		fve->fve_lastdrop.tv_sec = 0;
+		TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
+	}
+
+	/* initialize drop rate threshold in scaled fixed-point */
+	fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
+
+	/* initialize drop rate to fraction table */
+	fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE,
+	       M_DEVBUF, M_WAITOK);
+	if (fv->fv_p2ftab == NULL) {
+		free(fv->fv_fves, M_DEVBUF);
+		free(fv, M_DEVBUF);
+		return (NULL);
+	}
+	/*
+	 * create the p2f table.
+	 * (shift is used to keep the precision)
+	 */
+	for (i = 1; i < BRTT_SIZE; i++) {
+		int f;
+
+		f = brtt_tab[i] << 8;
+		fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
+	}
+
+	return (fv);
+}
+#endif
+
+static void fv_destroy(fv)
+	struct flowvalve *fv;
+{
+	free(fv->fv_p2ftab, M_DEVBUF);
+	free(fv->fv_fves, M_DEVBUF);
+	free(fv, M_DEVBUF);
+}
+
+static __inline int
+fv_p2f(fv, p)
+	struct flowvalve	*fv;
+	int	p;
+{
+	int val, f;
+
+	if (p >= BRTT_PMAX)
+		f = fv->fv_p2ftab[BRTT_SIZE-1];
+	else if ((val = (p & BRTT_MASK)))
+		f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
+	else
+		f = fv->fv_p2ftab[1];
+	return (f);
+}
+
+/*
+ * check if an arriving packet should be pre-dropped.
+ * called from red_addq() when a packet arrives.
+ * returns 1 when the packet should be pre-dropped.
+ * should be called in splimp.
+ */
+static int
+fv_checkflow(fv, pktattr, fcache)
+	struct flowvalve *fv;
+	struct altq_pktattr *pktattr;
+	struct fve **fcache;
+{
+	struct fve *fve;
+	struct timeval now;
+
+	fv->fv_ifseq++;
+	FV_TIMESTAMP(&now);
+
+	if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
+		/* no matching entry in the flowlist */
+		return (0);
+
+	*fcache = fve;
+
+	/* update fraction f for every FV_N packets */
+	if (++fve->fve_count == FV_N) {
+		/*
+		 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
+		 */
+		fve->fve_f =
+			(FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
+			+ fve->fve_f - FV_FUNSCALE(fve->fve_f);
+		fve->fve_ifseq = fv->fv_ifseq;
+		fve->fve_count = 0;
+	}
+
+	/*
+	 * overpumping test
+	 */
+	if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
+		int fthresh;
+
+		/* calculate a threshold */
+		fthresh = fv_p2f(fv, fve->fve_p);
+		if (fve->fve_f > fthresh)
+			fve->fve_state = Red;
+	}
+
+	if (fve->fve_state == Red) {
+		/*
+		 * backoff test
+		 */
+		if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
+			/* no drop for at least FV_BACKOFFTHRESH sec */
+			fve->fve_p = 0;
+			fve->fve_state = Green;
+#ifdef FV_STATS
+			fv->fv_stats.escape++;
+#endif
+		} else {
+			/* block this flow */
+			flowlist_move_to_head(fv, fve);
+			fve->fve_lastdrop = now;
+#ifdef FV_STATS
+			fv->fv_stats.predrop++;
+#endif
+			return (1);
+		}
+	}
+
+	/*
+	 * p = (1 - Wp) * p
+	 */
+	fve->fve_p -= FV_PUNSCALE(fve->fve_p);
+	if (fve->fve_p < 0)
+		fve->fve_p = 0;
+#ifdef FV_STATS
+	fv->fv_stats.pass++;
+#endif
+	return (0);
+}
+
+/*
+ * called from red_addq when a packet is dropped by red.
+ * should be called in splimp.
+ */
+static void fv_dropbyred(fv, pktattr, fcache)
+	struct flowvalve *fv;
+	struct altq_pktattr *pktattr;
+	struct fve *fcache;
+{
+	struct fve *fve;
+	struct timeval now;
+
+	if (pktattr == NULL)
+		return;
+	FV_TIMESTAMP(&now);
+
+	if (fcache != NULL)
+		/* the fve of this packet is already cached */
+		fve = fcache;
+	else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
+		fve = flowlist_reclaim(fv, pktattr);
+
+	flowlist_move_to_head(fv, fve);
+
+	/*
+	 * update p:  the following line cancels the update
+	 *	      in fv_checkflow() and calculate
+	 *	p = Wp + (1 - Wp) * p
+	 */
+	fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
+
+	fve->fve_lastdrop = now;
+}
+
+#endif /* ALTQ_FLOWVALVE */
+
+#ifdef KLD_MODULE
+
+static struct altqsw red_sw =
+	{"red", redopen, redclose, redioctl};
+
+ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
+MODULE_VERSION(altq_red, 1);
+
+#endif /* KLD_MODULE */
+#endif /* ALTQ3_COMPAT */
+
+#endif /* ALTQ_RED */
diff --git a/freebsd/sys/net/altq/altq_red.h b/freebsd/sys/net/altq/altq_red.h
new file mode 100644
index 00000000..8ae8d291
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_red.h
@@ -0,0 +1,199 @@
+/*-
+ * Copyright (C) 1997-2003
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_red.h,v 1.8 2003/07/10 12:07:49 kjc Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_RED_H_
+#define	_ALTQ_ALTQ_RED_H_
+
+#include <net/altq/altq_classq.h>
+
+#ifdef ALTQ3_COMPAT
+struct red_interface {
+	char	red_ifname[IFNAMSIZ];
+};
+
+struct red_stats {
+	struct red_interface iface;
+	int q_len;
+	int q_avg;
+
+	struct pktcntr	xmit_cnt;
+	struct pktcntr	drop_cnt;
+	u_int		drop_forced;
+	u_int		drop_unforced;
+	u_int		marked_packets;
+
+	/* static red parameters */
+	int q_limit;
+	int weight;
+	int inv_pmax;
+	int th_min;
+	int th_max;
+
+	/* flowvalve related stuff */
+	u_int fv_flows;
+	u_int fv_pass;
+	u_int fv_predrop;
+	u_int fv_alloc;
+	u_int fv_escape;
+};
+
+struct red_conf {
+	struct red_interface iface;
+	int red_weight;		/* weight for EWMA */
+	int red_inv_pmax;	/* inverse of max drop probability */
+	int red_thmin;		/* red min threshold */
+	int red_thmax;		/* red max threshold */
+	int red_limit;		/* max queue length */
+	int red_pkttime;	/* average packet time in usec */
+	int red_flags;		/* see below */
+};
+#endif /* ALTQ3_COMPAT */
+
+/* red flags */
+#define	REDF_ECN4	0x01	/* use packet marking for IPv4 packets */
+#define	REDF_ECN6	0x02	/* use packet marking for IPv6 packets */
+#define	REDF_ECN	(REDF_ECN4 | REDF_ECN6)
+#define	REDF_FLOWVALVE	0x04	/* use flowvalve (aka penalty-box) */
+
+/*
+ * simpler versions of red parameters and statistics used by other
+ * disciplines (e.g., CBQ)
+ */
+struct redparams {
+	int th_min;		/* red min threshold */
+	int th_max;		/* red max threshold */
+	int inv_pmax;		/* inverse of max drop probability */
+};
+
+struct redstats {
+	int		q_avg;
+	struct pktcntr	xmit_cnt;
+	struct pktcntr	drop_cnt;
+	u_int		drop_forced;
+	u_int		drop_unforced;
+	u_int		marked_packets;
+};
+
+#ifdef ALTQ3_COMPAT
+/*
+ * IOCTLs for RED
+ */
+#define	RED_IF_ATTACH		_IOW('Q', 1, struct red_interface)
+#define	RED_IF_DETACH		_IOW('Q', 2, struct red_interface)
+#define	RED_ENABLE		_IOW('Q', 3, struct red_interface)
+#define	RED_DISABLE		_IOW('Q', 4, struct red_interface)
+#define	RED_CONFIG		_IOWR('Q', 6, struct red_conf)
+#define	RED_GETSTATS		_IOWR('Q', 12, struct red_stats)
+#define	RED_SETDEFAULTS		_IOW('Q', 30, struct redparams)
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+
+#ifdef ALTQ3_COMPAT
+struct flowvalve;
+#endif
+
+/* weight table structure for idle time calibration */
+struct wtab {
+	struct wtab	*w_next;
+	int		 w_weight;
+	int		 w_param_max;
+	int		 w_refcount;
+	int32_t		 w_tab[32];
+};
+
+typedef struct red {
+	int		red_pkttime;	/* average packet time in micro sec
+					   used for idle calibration */
+	int		red_flags;	/* red flags */
+
+	/* red parameters */
+	int		red_weight;	/* weight for EWMA */
+	int		red_inv_pmax;	/* inverse of max drop probability */
+	int		red_thmin;	/* red min threshold */
+	int		red_thmax;	/* red max threshold */
+
+	/* variables for internal use */
+	int		red_wshift;	/* log(red_weight) */
+	int		red_thmin_s;	/* th_min scaled by avgshift */
+	int		red_thmax_s;	/* th_max scaled by avgshift */
+	int		red_probd;	/* drop probability denominator */
+
+	int		red_avg;	/* queue len avg scaled by avgshift */
+	int		red_count;	/* packet count since last dropped/
+					   marked packet */
+	int		red_idle;	/* queue was empty */
+	int		red_old;	/* avg is above th_min */
+	struct wtab	*red_wtab;	/* weight table */
+	struct timeval	 red_last;	/* time when the queue becomes idle */
+
+#ifdef ALTQ3_COMPAT
+	struct flowvalve *red_flowvalve;	/* flowvalve state */
+#endif
+
+	struct {
+		struct pktcntr	xmit_cnt;
+		struct pktcntr	drop_cnt;
+		u_int		drop_forced;
+		u_int		drop_unforced;
+		u_int		marked_packets;
+	} red_stats;
+} red_t;
+
+#ifdef ALTQ3_COMPAT
+typedef struct red_queue {
+	struct red_queue *rq_next;	/* next red_state in the list */
+	struct ifaltq *rq_ifq;		/* backpointer to ifaltq */
+
+	class_queue_t *rq_q;
+
+	red_t *rq_red;
+} red_queue_t;
+#endif /* ALTQ3_COMPAT */
+
+/* red drop types */
+#define	DTYPE_NODROP	0	/* no drop */
+#define	DTYPE_FORCED	1	/* a "forced" drop */
+#define	DTYPE_EARLY	2	/* an "unforced" (early) drop */
+
+extern red_t		*red_alloc(int, int, int, int, int, int);
+extern void		 red_destroy(red_t *);
+extern void		 red_getstats(red_t *, struct redstats *);
+extern int		 red_addq(red_t *, class_queue_t *, struct mbuf *,
+			     struct altq_pktattr *);
+extern struct mbuf	*red_getq(red_t *, class_queue_t *);
+extern int		 drop_early(int, int, int);
+extern int		 mark_ecn(struct mbuf *, struct altq_pktattr *, int);
+extern struct wtab	*wtab_alloc(int);
+extern int		 wtab_destroy(struct wtab *);
+extern int32_t		 pow_w(struct wtab *, int);
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_RED_H_ */
diff --git a/freebsd/sys/net/altq/altq_rio.c b/freebsd/sys/net/altq/altq_rio.c
new file mode 100644
index 00000000..bad0257c
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_rio.c
@@ -0,0 +1,846 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (C) 1998-2003
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*-
+ * Copyright (c) 1990-1994 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the Computer Systems
+ *	Engineering Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ *    to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_rio.c,v 1.17 2003/07/10 12:07:49 kjc Exp $
+ * $FreeBSD$
+ */
+
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#ifdef ALTQ_RIO	/* rio is enabled by ALTQ_RIO option in opt_altq.h */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <rtems/bsd/sys/errno.h>
+#if 1 /* ALTQ3_COMPAT */
+#include <sys/proc.h>
+#include <sys/sockio.h>
+#include <sys/kernel.h>
+#endif
+
+#include <net/if.h>
+#include <net/if_var.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_cdnr.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
+#ifdef ALTQ3_COMPAT
+#include <net/altq/altq_conf.h>
+#endif
+
+/*
+ * RIO: RED with IN/OUT bit
+ *   described in
+ *	"Explicit Allocation of Best Effort Packet Delivery Service"
+ *	David D. Clark and Wenjia Fang, MIT Lab for Computer Science
+ *	http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf}
+ *
+ * this implementation is extended to support more than 2 drop precedence
+ * values as described in RFC2597 (Assured Forwarding PHB Group).
+ *
+ */
+/*
+ * AF DS (differentiated service) codepoints.
+ * (classes can be mapped to CBQ or H-FSC classes.)
+ *
+ *      0   1   2   3   4   5   6   7
+ *    +---+---+---+---+---+---+---+---+
+ *    |   CLASS   |DropPre| 0 |  CU   |
+ *    +---+---+---+---+---+---+---+---+
+ *
+ *    class 1: 001
+ *    class 2: 010
+ *    class 3: 011
+ *    class 4: 100
+ *
+ *    low drop prec:    01
+ *    medium drop prec: 10
+ *    high drop prec:   01
+ */
+
+/* normal red parameters */
+#define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
+				/* q_weight = 0.00195 */
+
+/* red parameters for a slow link */
+#define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
+				/* q_weight = 0.0078125 */
+
+/* red parameters for a very slow link (e.g., dialup) */
+#define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
+				/* q_weight = 0.015625 */
+
+/* fixed-point uses 12-bit decimal places */
+#define	FP_SHIFT	12	/* fixed-point shift */
+
+/* red parameters for drop probability */
+#define	INV_P_MAX	10	/* inverse of max drop probability */
+#define	TH_MIN		 5	/* min threshold */
+#define	TH_MAX		15	/* max threshold */
+
+#define	RIO_LIMIT	60	/* default max queue length */
+#define	RIO_STATS		/* collect statistics */
+
+#define	TV_DELTA(a, b, delta) {					\
+	register int	xxs;					\
+								\
+	delta = (a)->tv_usec - (b)->tv_usec; 			\
+	if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) { 		\
+		if (xxs < 0) { 					\
+			delta = 60000000;			\
+		} else if (xxs > 4)  {				\
+			if (xxs > 60)				\
+				delta = 60000000;		\
+			else					\
+				delta += xxs * 1000000;		\
+		} else while (xxs > 0) {			\
+			delta += 1000000;			\
+			xxs--;					\
+		}						\
+	}							\
+}
+
+#ifdef ALTQ3_COMPAT
+/* rio_list keeps all rio_queue_t's allocated. */
+static rio_queue_t *rio_list = NULL;
+#endif
+/* default rio parameter values */
+static struct redparams default_rio_params[RIO_NDROPPREC] = {
+  /* th_min,		 th_max,     inv_pmax */
+  { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */
+  { TH_MAX + TH_MIN,	 TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */
+  { TH_MIN,		 TH_MAX,     INV_P_MAX }  /* high drop precedence */
+};
+
+/* internal function prototypes */
+static int dscp2index(u_int8_t);
+#ifdef ALTQ3_COMPAT
+static int rio_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+static struct mbuf *rio_dequeue(struct ifaltq *, int);
+static int rio_request(struct ifaltq *, int, void *);
+static int rio_detach(rio_queue_t *);
+
+/*
+ * rio device interface
+ */
+altqdev_decl(rio);
+
+#endif /* ALTQ3_COMPAT */
+
+rio_t *
+rio_alloc(int weight, struct redparams *params, int flags, int pkttime)
+{
+	rio_t	*rp;
+	int	 w, i;
+	int	 npkts_per_sec;
+
+	rp = malloc(sizeof(rio_t), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (rp == NULL)
+		return (NULL);
+
+	rp->rio_flags = flags;
+	if (pkttime == 0)
+		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
+		rp->rio_pkttime = 800;
+	else
+		rp->rio_pkttime = pkttime;
+
+	if (weight != 0)
+		rp->rio_weight = weight;
+	else {
+		/* use default */
+		rp->rio_weight = W_WEIGHT;
+
+		/* when the link is very slow, adjust red parameters */
+		npkts_per_sec = 1000000 / rp->rio_pkttime;
+		if (npkts_per_sec < 50) {
+			/* up to about 400Kbps */
+			rp->rio_weight = W_WEIGHT_2;
+		} else if (npkts_per_sec < 300) {
+			/* up to about 2.4Mbps */
+			rp->rio_weight = W_WEIGHT_1;
+		}
+	}
+
+	/* calculate wshift.  weight must be power of 2 */
+	w = rp->rio_weight;
+	for (i = 0; w > 1; i++)
+		w = w >> 1;
+	rp->rio_wshift = i;
+	w = 1 << rp->rio_wshift;
+	if (w != rp->rio_weight) {
+		printf("invalid weight value %d for red! use %d\n",
+		       rp->rio_weight, w);
+		rp->rio_weight = w;
+	}
+
+	/* allocate weight table */
+	rp->rio_wtab = wtab_alloc(rp->rio_weight);
+
+	for (i = 0; i < RIO_NDROPPREC; i++) {
+		struct dropprec_state *prec = &rp->rio_precstate[i];
+
+		prec->avg = 0;
+		prec->idle = 1;
+
+		if (params == NULL || params[i].inv_pmax == 0)
+			prec->inv_pmax = default_rio_params[i].inv_pmax;
+		else
+			prec->inv_pmax = params[i].inv_pmax;
+		if (params == NULL || params[i].th_min == 0)
+			prec->th_min = default_rio_params[i].th_min;
+		else
+			prec->th_min = params[i].th_min;
+		if (params == NULL || params[i].th_max == 0)
+			prec->th_max = default_rio_params[i].th_max;
+		else
+			prec->th_max = params[i].th_max;
+
+		/*
+		 * th_min_s and th_max_s are scaled versions of th_min
+		 * and th_max to be compared with avg.
+		 */
+		prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT);
+		prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT);
+
+		/*
+		 * precompute probability denominator
+		 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
+		 */
+		prec->probd = (2 * (prec->th_max - prec->th_min)
+			       * prec->inv_pmax) << FP_SHIFT;
+
+		microtime(&prec->last);
+	}
+
+	return (rp);
+}
+
+void
+rio_destroy(rio_t *rp)
+{
+	wtab_destroy(rp->rio_wtab);
+	free(rp, M_DEVBUF);
+}
+
+void
+rio_getstats(rio_t *rp, struct redstats *sp)
+{
+	int	i;
+
+	for (i = 0; i < RIO_NDROPPREC; i++) {
+		bcopy(&rp->q_stats[i], sp, sizeof(struct redstats));
+		sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift;
+		sp++;
+	}
+}
+
+#if (RIO_NDROPPREC == 3)
+/*
+ * internally, a drop precedence value is converted to an index
+ * starting from 0.
+ */
+static int
+dscp2index(u_int8_t dscp)
+{
+	int	dpindex = dscp & AF_DROPPRECMASK;
+
+	if (dpindex == 0)
+		return (0);
+	return ((dpindex >> 3) - 1);
+}
+#endif
+
+#if 1
+/*
+ * kludge: when a packet is dequeued, we need to know its drop precedence
+ * in order to keep the queue length of each drop precedence.
+ * use m_pkthdr.rcvif to pass this info.
+ */
+#define	RIOM_SET_PRECINDEX(m, idx)	\
+	do { (m)->m_pkthdr.rcvif = (void *)((long)(idx)); } while (0)
+#define	RIOM_GET_PRECINDEX(m)	\
+	({ long idx; idx = (long)((m)->m_pkthdr.rcvif); \
+	(m)->m_pkthdr.rcvif = NULL; idx; })
+#endif
+
+int
+rio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m,
+    struct altq_pktattr *pktattr)
+{
+	int			 avg, droptype;
+	u_int8_t		 dsfield, odsfield;
+	int			 dpindex, i, n, t;
+	struct timeval		 now;
+	struct dropprec_state	*prec;
+
+	dsfield = odsfield = read_dsfield(m, pktattr);
+	dpindex = dscp2index(dsfield);
+
+	/*
+	 * update avg of the precedence states whose drop precedence
+	 * is larger than or equal to the drop precedence of the packet
+	 */
+	now.tv_sec = 0;
+	for (i = dpindex; i < RIO_NDROPPREC; i++) {
+		prec = &rp->rio_precstate[i];
+		avg = prec->avg;
+		if (prec->idle) {
+			prec->idle = 0;
+			if (now.tv_sec == 0)
+				microtime(&now);
+			t = (now.tv_sec - prec->last.tv_sec);
+			if (t > 60)
+				avg = 0;
+			else {
+				t = t * 1000000 +
+					(now.tv_usec - prec->last.tv_usec);
+				n = t / rp->rio_pkttime;
+				/* calculate (avg = (1 - Wq)^n * avg) */
+				if (n > 0)
+					avg = (avg >> FP_SHIFT) *
+						pow_w(rp->rio_wtab, n);
+			}
+		}
+
+		/* run estimator. (avg is scaled by WEIGHT in fixed-point) */
+		avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift);
+		prec->avg = avg;		/* save the new value */
+		/*
+		 * count keeps a tally of arriving traffic that has not
+		 * been dropped.
+		 */
+		prec->count++;
+	}
+
+	prec = &rp->rio_precstate[dpindex];
+	avg = prec->avg;
+
+	/* see if we drop early */
+	droptype = DTYPE_NODROP;
+	if (avg >= prec->th_min_s && prec->qlen > 1) {
+		if (avg >= prec->th_max_s) {
+			/* avg >= th_max: forced drop */
+			droptype = DTYPE_FORCED;
+		} else if (prec->old == 0) {
+			/* first exceeds th_min */
+			prec->count = 1;
+			prec->old = 1;
+		} else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift,
+				      prec->probd, prec->count)) {
+			/* unforced drop by red */
+			droptype = DTYPE_EARLY;
+		}
+	} else {
+		/* avg < th_min */
+		prec->old = 0;
+	}
+
+	/*
+	 * if the queue length hits the hard limit, it's a forced drop.
+	 */
+	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
+		droptype = DTYPE_FORCED;
+
+	if (droptype != DTYPE_NODROP) {
+		/* always drop incoming packet (as opposed to randomdrop) */
+		for (i = dpindex; i < RIO_NDROPPREC; i++)
+			rp->rio_precstate[i].count = 0;
+#ifdef RIO_STATS
+		if (droptype == DTYPE_EARLY)
+			rp->q_stats[dpindex].drop_unforced++;
+		else
+			rp->q_stats[dpindex].drop_forced++;
+		PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m));
+#endif
+		m_freem(m);
+		return (-1);
+	}
+
+	for (i = dpindex; i < RIO_NDROPPREC; i++)
+		rp->rio_precstate[i].qlen++;
+
+	/* save drop precedence index in mbuf hdr */
+	RIOM_SET_PRECINDEX(m, dpindex);
+
+	if (rp->rio_flags & RIOF_CLEARDSCP)
+		dsfield &= ~DSCP_MASK;
+
+	if (dsfield != odsfield)
+		write_dsfield(m, pktattr, dsfield);
+
+	_addq(q, m);
+
+#ifdef RIO_STATS
+	PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m));
+#endif
+	return (0);
+}
+
+struct mbuf *
+rio_getq(rio_t *rp, class_queue_t *q)
+{
+	struct mbuf	*m;
+	int		 dpindex, i;
+
+	if ((m = _getq(q)) == NULL)
+		return NULL;
+
+	dpindex = RIOM_GET_PRECINDEX(m);
+	for (i = dpindex; i < RIO_NDROPPREC; i++) {
+		if (--rp->rio_precstate[i].qlen == 0) {
+			if (rp->rio_precstate[i].idle == 0) {
+				rp->rio_precstate[i].idle = 1;
+				microtime(&rp->rio_precstate[i].last);
+			}
+		}
+	}
+	return (m);
+}
+
+#ifdef ALTQ3_COMPAT
+int
+rioopen(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	/* everything will be done when the queueing scheme is attached. */
+	return 0;
+}
+
+int
+rioclose(dev, flag, fmt, p)
+	dev_t dev;
+	int flag, fmt;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	rio_queue_t *rqp;
+	int err, error = 0;
+
+	while ((rqp = rio_list) != NULL) {
+		/* destroy all */
+		err = rio_detach(rqp);
+		if (err != 0 && error == 0)
+			error = err;
+	}
+
+	return error;
+}
+
+int
+rioioctl(dev, cmd, addr, flag, p)
+	dev_t dev;
+	ioctlcmd_t cmd;
+	caddr_t addr;
+	int flag;
+#if (__FreeBSD_version > 500000)
+	struct thread *p;
+#else
+	struct proc *p;
+#endif
+{
+	rio_queue_t *rqp;
+	struct rio_interface *ifacep;
+	struct ifnet *ifp;
+	int	error = 0;
+
+	/* check super-user privilege */
+	switch (cmd) {
+	case RIO_GETSTATS:
+		break;
+	default:
+#if (__FreeBSD_version > 700000)
+		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
+			return (error);
+#elsif (__FreeBSD_version > 400000)
+		if ((error = suser(p)) != 0)
+			return (error);
+#else
+		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+			return (error);
+#endif
+		break;
+	}
+
+	switch (cmd) {
+
+	case RIO_ENABLE:
+		ifacep = (struct rio_interface *)addr;
+		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
+			error = EBADF;
+			break;
+		}
+		error = altq_enable(rqp->rq_ifq);
+		break;
+
+	case RIO_DISABLE:
+		ifacep = (struct rio_interface *)addr;
+		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
+			error = EBADF;
+			break;
+		}
+		error = altq_disable(rqp->rq_ifq);
+		break;
+
+	case RIO_IF_ATTACH:
+		ifp = ifunit(((struct rio_interface *)addr)->rio_ifname);
+		if (ifp == NULL) {
+			error = ENXIO;
+			break;
+		}
+
+		/* allocate and initialize rio_queue_t */
+		rqp = malloc(sizeof(rio_queue_t), M_DEVBUF, M_WAITOK);
+		if (rqp == NULL) {
+			error = ENOMEM;
+			break;
+		}
+		bzero(rqp, sizeof(rio_queue_t));
+
+		rqp->rq_q = malloc(sizeof(class_queue_t),
+		       M_DEVBUF, M_WAITOK);
+		if (rqp->rq_q == NULL) {
+			free(rqp, M_DEVBUF);
+			error = ENOMEM;
+			break;
+		}
+		bzero(rqp->rq_q, sizeof(class_queue_t));
+
+		rqp->rq_rio = rio_alloc(0, NULL, 0, 0);
+		if (rqp->rq_rio == NULL) {
+			free(rqp->rq_q, M_DEVBUF);
+			free(rqp, M_DEVBUF);
+			error = ENOMEM;
+			break;
+		}
+
+		rqp->rq_ifq = &ifp->if_snd;
+		qtail(rqp->rq_q) = NULL;
+		qlen(rqp->rq_q) = 0;
+		qlimit(rqp->rq_q) = RIO_LIMIT;
+		qtype(rqp->rq_q) = Q_RIO;
+
+		/*
+		 * set RIO to this ifnet structure.
+		 */
+		error = altq_attach(rqp->rq_ifq, ALTQT_RIO, rqp,
+				    rio_enqueue, rio_dequeue, rio_request,
+				    NULL, NULL);
+		if (error) {
+			rio_destroy(rqp->rq_rio);
+			free(rqp->rq_q, M_DEVBUF);
+			free(rqp, M_DEVBUF);
+			break;
+		}
+
+		/* add this state to the rio list */
+		rqp->rq_next = rio_list;
+		rio_list = rqp;
+		break;
+
+	case RIO_IF_DETACH:
+		ifacep = (struct rio_interface *)addr;
+		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
+			error = EBADF;
+			break;
+		}
+		error = rio_detach(rqp);
+		break;
+
+	case RIO_GETSTATS:
+		do {
+			struct rio_stats *q_stats;
+			rio_t *rp;
+			int i;
+
+			q_stats = (struct rio_stats *)addr;
+			if ((rqp = altq_lookup(q_stats->iface.rio_ifname,
+					       ALTQT_RIO)) == NULL) {
+				error = EBADF;
+				break;
+			}
+
+			rp = rqp->rq_rio;
+
+			q_stats->q_limit = qlimit(rqp->rq_q);
+			q_stats->weight	= rp->rio_weight;
+			q_stats->flags = rp->rio_flags;
+
+			for (i = 0; i < RIO_NDROPPREC; i++) {
+				q_stats->q_len[i] = rp->rio_precstate[i].qlen;
+				bcopy(&rp->q_stats[i], &q_stats->q_stats[i],
+				      sizeof(struct redstats));
+				q_stats->q_stats[i].q_avg =
+				    rp->rio_precstate[i].avg >> rp->rio_wshift;
+
+				q_stats->q_params[i].inv_pmax
+					= rp->rio_precstate[i].inv_pmax;
+				q_stats->q_params[i].th_min
+					= rp->rio_precstate[i].th_min;
+				q_stats->q_params[i].th_max
+					= rp->rio_precstate[i].th_max;
+			}
+		} while (/*CONSTCOND*/ 0);
+		break;
+
+	case RIO_CONFIG:
+		do {
+			struct rio_conf *fc;
+			rio_t	*new;
+			int s, limit, i;
+
+			fc = (struct rio_conf *)addr;
+			if ((rqp = altq_lookup(fc->iface.rio_ifname,
+					       ALTQT_RIO)) == NULL) {
+				error = EBADF;
+				break;
+			}
+
+			new = rio_alloc(fc->rio_weight, &fc->q_params[0],
+					fc->rio_flags, fc->rio_pkttime);
+			if (new == NULL) {
+				error = ENOMEM;
+				break;
+			}
+
+			s = splnet();
+			_flushq(rqp->rq_q);
+			limit = fc->rio_limit;
+			if (limit < fc->q_params[RIO_NDROPPREC-1].th_max)
+				limit = fc->q_params[RIO_NDROPPREC-1].th_max;
+			qlimit(rqp->rq_q) = limit;
+
+			rio_destroy(rqp->rq_rio);
+			rqp->rq_rio = new;
+
+			splx(s);
+
+			/* write back new values */
+			fc->rio_limit = limit;
+			for (i = 0; i < RIO_NDROPPREC; i++) {
+				fc->q_params[i].inv_pmax =
+					rqp->rq_rio->rio_precstate[i].inv_pmax;
+				fc->q_params[i].th_min =
+					rqp->rq_rio->rio_precstate[i].th_min;
+				fc->q_params[i].th_max =
+					rqp->rq_rio->rio_precstate[i].th_max;
+			}
+		} while (/*CONSTCOND*/ 0);
+		break;
+
+	case RIO_SETDEFAULTS:
+		do {
+			struct redparams *rp;
+			int i;
+
+			rp = (struct redparams *)addr;
+			for (i = 0; i < RIO_NDROPPREC; i++)
+				default_rio_params[i] = rp[i];
+		} while (/*CONSTCOND*/ 0);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return error;
+}
+
+static int
+rio_detach(rqp)
+	rio_queue_t *rqp;
+{
+	rio_queue_t *tmp;
+	int error = 0;
+
+	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
+		altq_disable(rqp->rq_ifq);
+
+	if ((error = altq_detach(rqp->rq_ifq)))
+		return (error);
+
+	if (rio_list == rqp)
+		rio_list = rqp->rq_next;
+	else {
+		for (tmp = rio_list; tmp != NULL; tmp = tmp->rq_next)
+			if (tmp->rq_next == rqp) {
+				tmp->rq_next = rqp->rq_next;
+				break;
+			}
+		if (tmp == NULL)
+			printf("rio_detach: no state found in rio_list!\n");
+	}
+
+	rio_destroy(rqp->rq_rio);
+	free(rqp->rq_q, M_DEVBUF);
+	free(rqp, M_DEVBUF);
+	return (error);
+}
+
+/*
+ * rio support routines
+ */
+static int
+rio_request(ifq, req, arg)
+	struct ifaltq *ifq;
+	int req;
+	void *arg;
+{
+	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	switch (req) {
+	case ALTRQ_PURGE:
+		_flushq(rqp->rq_q);
+		if (ALTQ_IS_ENABLED(ifq))
+			ifq->ifq_len = 0;
+		break;
+	}
+	return (0);
+}
+
+/*
+ * enqueue routine:
+ *
+ *	returns: 0 when successfully queued.
+ *		 ENOBUFS when drop occurs.
+ */
+static int
+rio_enqueue(ifq, m, pktattr)
+	struct ifaltq *ifq;
+	struct mbuf *m;
+	struct altq_pktattr *pktattr;
+{
+	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
+	int error = 0;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	if (rio_addq(rqp->rq_rio, rqp->rq_q, m, pktattr) == 0)
+		ifq->ifq_len++;
+	else
+		error = ENOBUFS;
+	return error;
+}
+
+/*
+ * dequeue routine:
+ *	must be called in splimp.
+ *
+ *	returns: mbuf dequeued.
+ *		 NULL when no packet is available in the queue.
+ */
+
+static struct mbuf *
+rio_dequeue(ifq, op)
+	struct ifaltq *ifq;
+	int op;
+{
+	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
+	struct mbuf *m = NULL;
+
+	IFQ_LOCK_ASSERT(ifq);
+
+	if (op == ALTDQ_POLL)
+		return qhead(rqp->rq_q);
+
+	m = rio_getq(rqp->rq_rio, rqp->rq_q);
+	if (m != NULL)
+		ifq->ifq_len--;
+	return m;
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw rio_sw =
+	{"rio", rioopen, rioclose, rioioctl};
+
+ALTQ_MODULE(altq_rio, ALTQT_RIO, &rio_sw);
+MODULE_VERSION(altq_rio, 1);
+MODULE_DEPEND(altq_rio, altq_red, 1, 1, 1);
+
+#endif /* KLD_MODULE */
+#endif /* ALTQ3_COMPAT */
+
+#endif /* ALTQ_RIO */
diff --git a/freebsd/sys/net/altq/altq_rio.h b/freebsd/sys/net/altq/altq_rio.h
new file mode 100644
index 00000000..ce9dc0e0
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_rio.h
@@ -0,0 +1,145 @@
+/*-
+ * Copyright (C) 1998-2003
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_rio.h,v 1.9 2003/07/10 12:07:49 kjc Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_RIO_H_
+#define	_ALTQ_ALTQ_RIO_H_
+
+#include <net/altq/altq_classq.h>
+
+/*
+ * RIO: RED with IN/OUT bit
+ * (extended to support more than 2 drop precedence values)
+ */
+#define	RIO_NDROPPREC	3	/* number of drop precedence values */
+
+#ifdef ALTQ3_COMPAT
+struct rio_interface {
+	char	rio_ifname[IFNAMSIZ];
+};
+
+struct rio_stats {
+	struct rio_interface iface;
+	int q_len[RIO_NDROPPREC];
+	struct redstats q_stats[RIO_NDROPPREC];
+
+	/* static red parameters */
+	int q_limit;
+	int weight;
+	int flags;
+	struct redparams q_params[RIO_NDROPPREC];
+};
+
+struct rio_conf {
+	struct rio_interface iface;
+	struct redparams q_params[RIO_NDROPPREC];
+	int rio_weight;		/* weight for EWMA */
+	int rio_limit;		/* max queue length */
+	int rio_pkttime;	/* average packet time in usec */
+	int rio_flags;		/* see below */
+};
+#endif /* ALTQ3_COMPAT */
+
+/* rio flags */
+#define	RIOF_ECN4	0x01	/* use packet marking for IPv4 packets */
+#define	RIOF_ECN6	0x02	/* use packet marking for IPv6 packets */
+#define	RIOF_ECN	(RIOF_ECN4 | RIOF_ECN6)
+#define	RIOF_CLEARDSCP	0x200	/* clear diffserv codepoint */
+
+#ifdef ALTQ3_COMPAT
+/*
+ * IOCTLs for RIO
+ */
+#define	RIO_IF_ATTACH		_IOW('Q', 1, struct rio_interface)
+#define	RIO_IF_DETACH		_IOW('Q', 2, struct rio_interface)
+#define	RIO_ENABLE		_IOW('Q', 3, struct rio_interface)
+#define	RIO_DISABLE		_IOW('Q', 4, struct rio_interface)
+#define	RIO_CONFIG		_IOWR('Q', 6, struct rio_conf)
+#define	RIO_GETSTATS		_IOWR('Q', 12, struct rio_stats)
+#define	RIO_SETDEFAULTS		_IOW('Q', 30, struct redparams[RIO_NDROPPREC])
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+
+typedef struct rio {
+	/* per drop precedence structure */
+	struct dropprec_state {
+		/* red parameters */
+		int	inv_pmax;	/* inverse of max drop probability */
+		int	th_min;		/* red min threshold */
+		int	th_max;		/* red max threshold */
+
+		/* variables for internal use */
+		int	th_min_s;	/* th_min scaled by avgshift */
+		int	th_max_s;	/* th_max scaled by avgshift */
+		int	probd;		/* drop probability denominator */
+
+		int	qlen;		/* queue length */
+		int	avg;		/* (scaled) queue length average */
+		int	count;		/* packet count since the last dropped/
+					   marked packet */
+		int	idle;		/* queue was empty */
+		int	old;		/* avg is above th_min */
+		struct timeval	last;	/* timestamp when queue becomes idle */
+	} rio_precstate[RIO_NDROPPREC];
+
+	int		 rio_wshift;	/* log(red_weight) */
+	int		 rio_weight;	/* weight for EWMA */
+	struct wtab	*rio_wtab;	/* weight table */
+
+	int		 rio_pkttime;	/* average packet time in micro sec
+					   used for idle calibration */
+	int		 rio_flags;	/* rio flags */
+
+	u_int8_t	 rio_codepoint;	/* codepoint value to tag packets */
+	u_int8_t	 rio_codepointmask;	/* codepoint mask bits */
+
+	struct redstats q_stats[RIO_NDROPPREC];	/* statistics */
+} rio_t;
+
+#ifdef ALTQ3_COMPAT
+typedef struct rio_queue {
+	struct rio_queue	*rq_next;	/* next red_state in the list */
+	struct ifaltq		*rq_ifq;	/* backpointer to ifaltq */
+
+	class_queue_t		*rq_q;
+
+	rio_t			*rq_rio;
+} rio_queue_t;
+#endif /* ALTQ3_COMPAT */
+
+extern rio_t		*rio_alloc(int, struct redparams *, int, int);
+extern void		 rio_destroy(rio_t *);
+extern void		 rio_getstats(rio_t *, struct redstats *);
+extern int		 rio_addq(rio_t *, class_queue_t *, struct mbuf *,
+			     struct altq_pktattr *);
+extern struct mbuf	*rio_getq(rio_t *, class_queue_t *);
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_RIO_H_ */
diff --git a/freebsd/sys/net/altq/altq_rmclass.c b/freebsd/sys/net/altq/altq_rmclass.c
new file mode 100644
index 00000000..160884e2
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_rmclass.c
@@ -0,0 +1,1841 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the Network Research
+ *      Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ *    to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * LBL code modified by speer@eng.sun.com, May 1977.
+ * For questions and/or comments, please send mail to cbq@ee.lbl.gov
+ *
+ * @(#)rm_class.c  1.48     97/12/05 SMI
+ * $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $
+ * $FreeBSD$
+ */
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#ifdef ALTQ_CBQ	/* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/time.h>
+#ifdef ALTQ3_COMPAT
+#include <sys/kernel.h>
+#endif
+
+#include <net/if.h>
+#include <net/if_var.h>
+#ifdef ALTQ3_COMPAT
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#endif
+
+#include <net/altq/if_altq.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_codel.h>
+#include <net/altq/altq_rmclass.h>
+#include <net/altq/altq_rmclass_debug.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
+
+/*
+ * Local Macros
+ */
+
+#define	reset_cutoff(ifd)	{ ifd->cutoff_ = RM_MAXDEPTH; }
+
+/*
+ * Local routines.
+ */
+
+static int	rmc_satisfied(struct rm_class *, struct timeval *);
+static void	rmc_wrr_set_weights(struct rm_ifdat *);
+static void	rmc_depth_compute(struct rm_class *);
+static void	rmc_depth_recompute(rm_class_t *);
+
+static mbuf_t	*_rmc_wrr_dequeue_next(struct rm_ifdat *, int);
+static mbuf_t	*_rmc_prr_dequeue_next(struct rm_ifdat *, int);
+
+static int	_rmc_addq(rm_class_t *, mbuf_t *);
+static void	_rmc_dropq(rm_class_t *);
+static mbuf_t	*_rmc_getq(rm_class_t *);
+static mbuf_t	*_rmc_pollq(rm_class_t *);
+
+static int	rmc_under_limit(struct rm_class *, struct timeval *);
+static void	rmc_tl_satisfied(struct rm_ifdat *, struct timeval *);
+static void	rmc_drop_action(struct rm_class *);
+static void	rmc_restart(struct rm_class *);
+static void	rmc_root_overlimit(struct rm_class *, struct rm_class *);
+
+#define	BORROW_OFFTIME
+/*
+ * BORROW_OFFTIME (experimental):
+ * borrow the offtime of the class borrowing from.
+ * the reason is that when its own offtime is set, the class is unable
+ * to borrow much, especially when cutoff is taking effect.
+ * but when the borrowed class is overloaded (advidle is close to minidle),
+ * use the borrowing class's offtime to avoid overload.
+ */
+#define	ADJUST_CUTOFF
+/*
+ * ADJUST_CUTOFF (experimental):
+ * if no underlimit class is found due to cutoff, increase cutoff and
+ * retry the scheduling loop.
+ * also, don't invoke delay_actions while cutoff is taking effect,
+ * since a sleeping class won't have a chance to be scheduled in the
+ * next loop.
+ *
+ * now heuristics for setting the top-level variable (cutoff_) becomes:
+ *	1. if a packet arrives for a not-overlimit class, set cutoff
+ *	   to the depth of the class.
+ *	2. if cutoff is i, and a packet arrives for an overlimit class
+ *	   with an underlimit ancestor at a lower level than i (say j),
+ *	   then set cutoff to j.
+ *	3. at scheduling a packet, if there is no underlimit class
+ *	   due to the current cutoff level, increase cutoff by 1 and
+ *	   then try to schedule again.
+ */
+
+/*
+ * rm_class_t *
+ * rmc_newclass(...) - Create a new resource management class at priority
+ * 'pri' on the interface given by 'ifd'.
+ *
+ * nsecPerByte  is the data rate of the interface in nanoseconds/byte.
+ *              E.g., 800 for a 10Mb/s ethernet.  If the class gets less
+ *              than 100% of the bandwidth, this number should be the
+ *              'effective' rate for the class.  Let f be the
+ *              bandwidth fraction allocated to this class, and let
+ *              nsPerByte be the data rate of the output link in
+ *              nanoseconds/byte.  Then nsecPerByte is set to
+ *              nsPerByte / f.  E.g., 1600 (= 800 / .5)
+ *              for a class that gets 50% of an ethernet's bandwidth.
+ *
+ * action       the routine to call when the class is over limit.
+ *
+ * maxq         max allowable queue size for class (in packets).
+ *
+ * parent       parent class pointer.
+ *
+ * borrow       class to borrow from (should be either 'parent' or null).
+ *
+ * maxidle      max value allowed for class 'idle' time estimate (this
+ *              parameter determines how large an initial burst of packets
+ *              can be before overlimit action is invoked.
+ *
+ * offtime      how long 'delay' action will delay when class goes over
+ *              limit (this parameter determines the steady-state burst
+ *              size when a class is running over its limit).
+ *
+ * Maxidle and offtime have to be computed from the following:  If the
+ * average packet size is s, the bandwidth fraction allocated to this
+ * class is f, we want to allow b packet bursts, and the gain of the
+ * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
+ *
+ *   ptime = s * nsPerByte * (1 - f) / f
+ *   maxidle = ptime * (1 - g^b) / g^b
+ *   minidle = -ptime * (1 / (f - 1))
+ *   offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
+ *
+ * Operationally, it's convenient to specify maxidle & offtime in units
+ * independent of the link bandwidth so the maxidle & offtime passed to
+ * this routine are the above values multiplied by 8*f/(1000*nsPerByte).
+ * (The constant factor is a scale factor needed to make the parameters
+ * integers.  This scaling also means that the 'unscaled' values of
+ * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
+ * not nanoseconds.)  Also note that the 'idle' filter computation keeps
+ * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
+ * maxidle also must be scaled upward by this value.  Thus, the passed
+ * values for maxidle and offtime can be computed as follows:
+ *
+ * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
+ * offtime = offtime * 8 / (1000 * nsecPerByte)
+ *
+ * When USE_HRTIME is employed, then maxidle and offtime become:
+ * 	maxidle = maxilde * (8.0 / nsecPerByte);
+ * 	offtime = offtime * (8.0 / nsecPerByte);
+ */
+struct rm_class *
+rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte,
+    void (*action)(rm_class_t *, rm_class_t *), int maxq,
+    struct rm_class *parent, struct rm_class *borrow, u_int maxidle,
+    int minidle, u_int offtime, int pktsize, int flags)
+{
+	struct rm_class	*cl;
+	struct rm_class	*peer;
+	int		 s;
+
+	if (pri >= RM_MAXPRIO)
+		return (NULL);
+#ifndef ALTQ_RED
+	if (flags & RMCF_RED) {
+#ifdef ALTQ_DEBUG
+		printf("rmc_newclass: RED not configured for CBQ!\n");
+#endif
+		return (NULL);
+	}
+#endif
+#ifndef ALTQ_RIO
+	if (flags & RMCF_RIO) {
+#ifdef ALTQ_DEBUG
+		printf("rmc_newclass: RIO not configured for CBQ!\n");
+#endif
+		return (NULL);
+	}
+#endif
+#ifndef ALTQ_CODEL
+	if (flags & RMCF_CODEL) {
+#ifdef ALTQ_DEBUG
+		printf("rmc_newclass: CODEL not configured for CBQ!\n");
+#endif
+		return (NULL);
+	}
+#endif
+
+	cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (cl == NULL)
+		return (NULL);
+	CALLOUT_INIT(&cl->callout_);
+	cl->q_ = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (cl->q_ == NULL) {
+		free(cl, M_DEVBUF);
+		return (NULL);
+	}
+
+	/*
+	 * Class initialization.
+	 */
+	cl->children_ = NULL;
+	cl->parent_ = parent;
+	cl->borrow_ = borrow;
+	cl->leaf_ = 1;
+	cl->ifdat_ = ifd;
+	cl->pri_ = pri;
+	cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
+	cl->depth_ = 0;
+	cl->qthresh_ = 0;
+	cl->ns_per_byte_ = nsecPerByte;
+
+	qlimit(cl->q_) = maxq;
+	qtype(cl->q_) = Q_DROPHEAD;
+	qlen(cl->q_) = 0;
+	cl->flags_ = flags;
+
+#if 1 /* minidle is also scaled in ALTQ */
+	cl->minidle_ = (minidle * (int)nsecPerByte) / 8;
+	if (cl->minidle_ > 0)
+		cl->minidle_ = 0;
+#else
+	cl->minidle_ = minidle;
+#endif
+	cl->maxidle_ = (maxidle * nsecPerByte) / 8;
+	if (cl->maxidle_ == 0)
+		cl->maxidle_ = 1;
+#if 1 /* offtime is also scaled in ALTQ */
+	cl->avgidle_ = cl->maxidle_;
+	cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
+	if (cl->offtime_ == 0)
+		cl->offtime_ = 1;
+#else
+	cl->avgidle_ = 0;
+	cl->offtime_ = (offtime * nsecPerByte) / 8;
+#endif
+	cl->overlimit = action;
+
+#ifdef ALTQ_RED
+	if (flags & (RMCF_RED|RMCF_RIO)) {
+		int red_flags, red_pkttime;
+
+		red_flags = 0;
+		if (flags & RMCF_ECN)
+			red_flags |= REDF_ECN;
+		if (flags & RMCF_FLOWVALVE)
+			red_flags |= REDF_FLOWVALVE;
+#ifdef ALTQ_RIO
+		if (flags & RMCF_CLEARDSCP)
+			red_flags |= RIOF_CLEARDSCP;
+#endif
+		red_pkttime = nsecPerByte * pktsize  / 1000;
+
+		if (flags & RMCF_RED) {
+			cl->red_ = red_alloc(0, 0,
+			    qlimit(cl->q_) * 10/100,
+			    qlimit(cl->q_) * 30/100,
+			    red_flags, red_pkttime);
+			if (cl->red_ != NULL)
+				qtype(cl->q_) = Q_RED;
+		}
+#ifdef ALTQ_RIO
+		else {
+			cl->red_ = (red_t *)rio_alloc(0, NULL,
+						      red_flags, red_pkttime);
+			if (cl->red_ != NULL)
+				qtype(cl->q_) = Q_RIO;
+		}
+#endif
+	}
+#endif /* ALTQ_RED */
+#ifdef ALTQ_CODEL
+	if (flags & RMCF_CODEL) {
+		cl->codel_ = codel_alloc(5, 100, 0);
+		if (cl->codel_ != NULL)
+			qtype(cl->q_) = Q_CODEL;
+	}
+#endif
+
+	/*
+	 * put the class into the class tree
+	 */
+	s = splnet();
+	IFQ_LOCK(ifd->ifq_);
+	if ((peer = ifd->active_[pri]) != NULL) {
+		/* find the last class at this pri */
+		cl->peer_ = peer;
+		while (peer->peer_ != ifd->active_[pri])
+			peer = peer->peer_;
+		peer->peer_ = cl;
+	} else {
+		ifd->active_[pri] = cl;
+		cl->peer_ = cl;
+	}
+
+	if (cl->parent_) {
+		cl->next_ = parent->children_;
+		parent->children_ = cl;
+		parent->leaf_ = 0;
+	}
+
+	/*
+	 * Compute the depth of this class and its ancestors in the class
+	 * hierarchy.
+	 */
+	rmc_depth_compute(cl);
+
+	/*
+	 * If CBQ's WRR is enabled, then initialize the class WRR state.
+	 */
+	if (ifd->wrr_) {
+		ifd->num_[pri]++;
+		ifd->alloc_[pri] += cl->allotment_;
+		rmc_wrr_set_weights(ifd);
+	}
+	IFQ_UNLOCK(ifd->ifq_);
+	splx(s);
+	return (cl);
+}
+
+int
+rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle,
+    int minidle, u_int offtime, int pktsize)
+{
+	struct rm_ifdat	*ifd;
+	u_int		 old_allotment;
+	int		 s;
+
+	ifd = cl->ifdat_;
+	old_allotment = cl->allotment_;
+
+	s = splnet();
+	IFQ_LOCK(ifd->ifq_);
+	cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
+	cl->qthresh_ = 0;
+	cl->ns_per_byte_ = nsecPerByte;
+
+	qlimit(cl->q_) = maxq;
+
+#if 1 /* minidle is also scaled in ALTQ */
+	cl->minidle_ = (minidle * nsecPerByte) / 8;
+	if (cl->minidle_ > 0)
+		cl->minidle_ = 0;
+#else
+	cl->minidle_ = minidle;
+#endif
+	cl->maxidle_ = (maxidle * nsecPerByte) / 8;
+	if (cl->maxidle_ == 0)
+		cl->maxidle_ = 1;
+#if 1 /* offtime is also scaled in ALTQ */
+	cl->avgidle_ = cl->maxidle_;
+	cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
+	if (cl->offtime_ == 0)
+		cl->offtime_ = 1;
+#else
+	cl->avgidle_ = 0;
+	cl->offtime_ = (offtime * nsecPerByte) / 8;
+#endif
+
+	/*
+	 * If CBQ's WRR is enabled, then initialize the class WRR state.
+	 */
+	if (ifd->wrr_) {
+		ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
+		rmc_wrr_set_weights(ifd);
+	}
+	IFQ_UNLOCK(ifd->ifq_);
+	splx(s);
+	return (0);
+}
+
+/*
+ * static void
+ * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
+ *	the appropriate run robin weights for the CBQ weighted round robin
+ *	algorithm.
+ *
+ *	Returns: NONE
+ */
+
+static void
+rmc_wrr_set_weights(struct rm_ifdat *ifd)
+{
+	int		i;
+	struct rm_class	*cl, *clh;
+
+	for (i = 0; i < RM_MAXPRIO; i++) {
+		/*
+		 * This is inverted from that of the simulator to
+		 * maintain precision.
+		 */
+		if (ifd->num_[i] == 0)
+			ifd->M_[i] = 0;
+		else
+			ifd->M_[i] = ifd->alloc_[i] /
+				(ifd->num_[i] * ifd->maxpkt_);
+		/*
+		 * Compute the weighted allotment for each class.
+		 * This takes the expensive div instruction out
+		 * of the main loop for the wrr scheduling path.
+		 * These only get recomputed when a class comes or
+		 * goes.
+		 */
+		if (ifd->active_[i] != NULL) {
+			clh = cl = ifd->active_[i];
+			do {
+				/* safe-guard for slow link or alloc_ == 0 */
+				if (ifd->M_[i] == 0)
+					cl->w_allotment_ = 0;
+				else
+					cl->w_allotment_ = cl->allotment_ /
+						ifd->M_[i];
+				cl = cl->peer_;
+			} while ((cl != NULL) && (cl != clh));
+		}
+	}
+}
+
+int
+rmc_get_weight(struct rm_ifdat *ifd, int pri)
+{
+	if ((pri >= 0) && (pri < RM_MAXPRIO))
+		return (ifd->M_[pri]);
+	else
+		return (0);
+}
+
+/*
+ * static void
+ * rmc_depth_compute(struct rm_class *cl) - This function computes the
+ *	appropriate depth of class 'cl' and its ancestors.
+ *
+ *	Returns:	NONE
+ */
+
+static void
+rmc_depth_compute(struct rm_class *cl)
+{
+	rm_class_t	*t = cl, *p;
+
+	/*
+	 * Recompute the depth for the branch of the tree.
+	 */
+	while (t != NULL) {
+		p = t->parent_;
+		if (p && (t->depth_ >= p->depth_)) {
+			p->depth_ = t->depth_ + 1;
+			t = p;
+		} else
+			t = NULL;
+	}
+}
+
+/*
+ * static void
+ * rmc_depth_recompute(struct rm_class *cl) - This function re-computes
+ *	the depth of the tree after a class has been deleted.
+ *
+ *	Returns:	NONE
+ */
+
+static void
+rmc_depth_recompute(rm_class_t *cl)
+{
+#if 1 /* ALTQ */
+	rm_class_t	*p, *t;
+
+	p = cl;
+	while (p != NULL) {
+		if ((t = p->children_) == NULL) {
+			p->depth_ = 0;
+		} else {
+			int cdepth = 0;
+
+			while (t != NULL) {
+				if (t->depth_ > cdepth)
+					cdepth = t->depth_;
+				t = t->next_;
+			}
+
+			if (p->depth_ == cdepth + 1)
+				/* no change to this parent */
+				return;
+
+			p->depth_ = cdepth + 1;
+		}
+
+		p = p->parent_;
+	}
+#else
+	rm_class_t	*t;
+
+	if (cl->depth_ >= 1) {
+		if (cl->children_ == NULL) {
+			cl->depth_ = 0;
+		} else if ((t = cl->children_) != NULL) {
+			while (t != NULL) {
+				if (t->children_ != NULL)
+					rmc_depth_recompute(t);
+				t = t->next_;
+			}
+		} else
+			rmc_depth_compute(cl);
+	}
+#endif
+}
+
+/*
+ * void
+ * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
+ *	function deletes a class from the link-sharing structure and frees
+ *	all resources associated with the class.
+ *
+ *	Returns: NONE
+ */
+
+void
+rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
+{
+	struct rm_class	*p, *head, *previous;
+	int		 s;
+
+	ASSERT(cl->children_ == NULL);
+
+	if (cl->sleeping_)
+		CALLOUT_STOP(&cl->callout_);
+
+	s = splnet();
+	IFQ_LOCK(ifd->ifq_);
+	/*
+	 * Free packets in the packet queue.
+	 * XXX - this may not be a desired behavior.  Packets should be
+	 *		re-queued.
+	 */
+	rmc_dropall(cl);
+
+	/*
+	 * If the class has a parent, then remove the class from the
+	 * class from the parent's children chain.
+	 */
+	if (cl->parent_ != NULL) {
+		head = cl->parent_->children_;
+		p = previous = head;
+		if (head->next_ == NULL) {
+			ASSERT(head == cl);
+			cl->parent_->children_ = NULL;
+			cl->parent_->leaf_ = 1;
+		} else while (p != NULL) {
+			if (p == cl) {
+				if (cl == head)
+					cl->parent_->children_ = cl->next_;
+				else
+					previous->next_ = cl->next_;
+				cl->next_ = NULL;
+				p = NULL;
+			} else {
+				previous = p;
+				p = p->next_;
+			}
+		}
+	}
+
+	/*
+	 * Delete class from class priority peer list.
+	 */
+	if ((p = ifd->active_[cl->pri_]) != NULL) {
+		/*
+		 * If there is more than one member of this priority
+		 * level, then look for class(cl) in the priority level.
+		 */
+		if (p != p->peer_) {
+			while (p->peer_ != cl)
+				p = p->peer_;
+			p->peer_ = cl->peer_;
+
+			if (ifd->active_[cl->pri_] == cl)
+				ifd->active_[cl->pri_] = cl->peer_;
+		} else {
+			ASSERT(p == cl);
+			ifd->active_[cl->pri_] = NULL;
+		}
+	}
+
+	/*
+	 * Recompute the WRR weights.
+	 */
+	if (ifd->wrr_) {
+		ifd->alloc_[cl->pri_] -= cl->allotment_;
+		ifd->num_[cl->pri_]--;
+		rmc_wrr_set_weights(ifd);
+	}
+
+	/*
+	 * Re-compute the depth of the tree.
+	 */
+#if 1 /* ALTQ */
+	rmc_depth_recompute(cl->parent_);
+#else
+	rmc_depth_recompute(ifd->root_);
+#endif
+
+	IFQ_UNLOCK(ifd->ifq_);
+	splx(s);
+
+	/*
+	 * Free the class structure.
+	 */
+	if (cl->red_ != NULL) {
+#ifdef ALTQ_RIO
+		if (q_is_rio(cl->q_))
+			rio_destroy((rio_t *)cl->red_);
+#endif
+#ifdef ALTQ_RED
+		if (q_is_red(cl->q_))
+			red_destroy(cl->red_);
+#endif
+#ifdef ALTQ_CODEL
+		if (q_is_codel(cl->q_))
+			codel_destroy(cl->codel_);
+#endif
+	}
+	free(cl->q_, M_DEVBUF);
+	free(cl, M_DEVBUF);
+}
+
+
+/*
+ * void
+ * rmc_init(...) - Initialize the resource management data structures
+ *	associated with the output portion of interface 'ifp'.  'ifd' is
+ *	where the structures will be built (for backwards compatibility, the
+ *	structures aren't kept in the ifnet struct).  'nsecPerByte'
+ *	gives the link speed (inverse of bandwidth) in nanoseconds/byte.
+ *	'restart' is the driver-specific routine that the generic 'delay
+ *	until under limit' action will call to restart output.  `maxq'
+ *	is the queue size of the 'link' & 'default' classes.  'maxqueued'
+ *	is the maximum number of packets that the resource management
+ *	code will allow to be queued 'downstream' (this is typically 1).
+ *
+ *	Returns:	NONE
+ */
+
+void
+rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte,
+    void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle,
+    int minidle, u_int offtime, int flags)
+{
+	int		i, mtu;
+
+	/*
+	 * Initialize the CBQ tracing/debug facility.
+	 */
+	CBQTRACEINIT();
+
+	bzero((char *)ifd, sizeof (*ifd));
+	mtu = ifq->altq_ifp->if_mtu;
+	ifd->ifq_ = ifq;
+	ifd->restart = restart;
+	ifd->maxqueued_ = maxqueued;
+	ifd->ns_per_byte_ = nsecPerByte;
+	ifd->maxpkt_ = mtu;
+	ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
+	ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
+#if 1
+	ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;
+	if (mtu * nsecPerByte > 10 * 1000000)
+		ifd->maxiftime_ /= 4;
+#endif
+
+	reset_cutoff(ifd);
+	CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);
+
+	/*
+	 * Initialize the CBQ's WRR state.
+	 */
+	for (i = 0; i < RM_MAXPRIO; i++) {
+		ifd->alloc_[i] = 0;
+		ifd->M_[i] = 0;
+		ifd->num_[i] = 0;
+		ifd->na_[i] = 0;
+		ifd->active_[i] = NULL;
+	}
+
+	/*
+	 * Initialize current packet state.
+	 */
+	ifd->qi_ = 0;
+	ifd->qo_ = 0;
+	for (i = 0; i < RM_MAXQUEUED; i++) {
+		ifd->class_[i] = NULL;
+		ifd->curlen_[i] = 0;
+		ifd->borrowed_[i] = NULL;
+	}
+
+	/*
+	 * Create the root class of the link-sharing structure.
+	 */
+	if ((ifd->root_ = rmc_newclass(0, ifd,
+				       nsecPerByte,
+				       rmc_root_overlimit, maxq, 0, 0,
+				       maxidle, minidle, offtime,
+				       0, 0)) == NULL) {
+		printf("rmc_init: root class not allocated\n");
+		return ;
+	}
+	ifd->root_->depth_ = 0;
+}
+
+/*
+ * void
+ * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
+ *	mbuf 'm' to queue for resource class 'cl'.  This routine is called
+ *	by a driver's if_output routine.  This routine must be called with
+ *	output packet completion interrupts locked out (to avoid racing with
+ *	rmc_dequeue_next).
+ *
+ *	Returns:	0 on successful queueing
+ *			-1 when packet drop occurs
+ */
+int
+rmc_queue_packet(struct rm_class *cl, mbuf_t *m)
+{
+	struct timeval	 now;
+	struct rm_ifdat *ifd = cl->ifdat_;
+	int		 cpri = cl->pri_;
+	int		 is_empty = qempty(cl->q_);
+
+	RM_GETTIME(now);
+	if (ifd->cutoff_ > 0) {
+		if (TV_LT(&cl->undertime_, &now)) {
+			if (ifd->cutoff_ > cl->depth_)
+				ifd->cutoff_ = cl->depth_;
+			CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
+		}
+#if 1 /* ALTQ */
+		else {
+			/*
+			 * the class is overlimit. if the class has
+			 * underlimit ancestors, set cutoff to the lowest
+			 * depth among them.
+			 */
+			struct rm_class *borrow = cl->borrow_;
+
+			while (borrow != NULL &&
+			       borrow->depth_ < ifd->cutoff_) {
+				if (TV_LT(&borrow->undertime_, &now)) {
+					ifd->cutoff_ = borrow->depth_;
+					CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
+					break;
+				}
+				borrow = borrow->borrow_;
+			}
+		}
+#else /* !ALTQ */
+		else if ((ifd->cutoff_ > 1) && cl->borrow_) {
+			if (TV_LT(&cl->borrow_->undertime_, &now)) {
+				ifd->cutoff_ = cl->borrow_->depth_;
+				CBQTRACE(rmc_queue_packet, 'ffob',
+					 cl->borrow_->depth_);
+			}
+		}
+#endif /* !ALTQ */
+	}
+
+	if (_rmc_addq(cl, m) < 0)
+		/* failed */
+		return (-1);
+
+	if (is_empty) {
+		CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
+		ifd->na_[cpri]++;
+	}
+
+	if (qlen(cl->q_) > qlimit(cl->q_)) {
+		/* note: qlimit can be set to 0 or 1 */
+		rmc_drop_action(cl);
+		return (-1);
+	}
+	return (0);
+}
+
+/*
+ * void
+ * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
+ *	classes to see if there are satified.
+ */
+
+static void
+rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now)
+{
+	int		 i;
+	rm_class_t	*p, *bp;
+
+	for (i = RM_MAXPRIO - 1; i >= 0; i--) {
+		if ((bp = ifd->active_[i]) != NULL) {
+			p = bp;
+			do {
+				if (!rmc_satisfied(p, now)) {
+					ifd->cutoff_ = p->depth_;
+					return;
+				}
+				p = p->peer_;
+			} while (p != bp);
+		}
+	}
+
+	reset_cutoff(ifd);
+}
+
+/*
+ * rmc_satisfied - Return 1 of the class is satisfied.  O, otherwise.
+ */
+
+static int
+rmc_satisfied(struct rm_class *cl, struct timeval *now)
+{
+	rm_class_t	*p;
+
+	if (cl == NULL)
+		return (1);
+	if (TV_LT(now, &cl->undertime_))
+		return (1);
+	if (cl->depth_ == 0) {
+		if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
+			return (0);
+		else
+			return (1);
+	}
+	if (cl->children_ != NULL) {
+		p = cl->children_;
+		while (p != NULL) {
+			if (!rmc_satisfied(p, now))
+				return (0);
+			p = p->next_;
+		}
+	}
+
+	return (1);
+}
+
+/*
+ * Return 1 if class 'cl' is under limit or can borrow from a parent,
+ * 0 if overlimit.  As a side-effect, this routine will invoke the
+ * class overlimit action if the class if overlimit.
+ */
+
+static int
+rmc_under_limit(struct rm_class *cl, struct timeval *now)
+{
+	rm_class_t	*p = cl;
+	rm_class_t	*top;
+	struct rm_ifdat	*ifd = cl->ifdat_;
+
+	ifd->borrowed_[ifd->qi_] = NULL;
+	/*
+	 * If cl is the root class, then always return that it is
+	 * underlimit.  Otherwise, check to see if the class is underlimit.
+	 */
+	if (cl->parent_ == NULL)
+		return (1);
+
+	if (cl->sleeping_) {
+		if (TV_LT(now, &cl->undertime_))
+			return (0);
+
+		CALLOUT_STOP(&cl->callout_);
+		cl->sleeping_ = 0;
+		cl->undertime_.tv_sec = 0;
+		return (1);
+	}
+
+	top = NULL;
+	while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
+		if (((cl = cl->borrow_) == NULL) ||
+		    (cl->depth_ > ifd->cutoff_)) {
+#ifdef ADJUST_CUTOFF
+			if (cl != NULL)
+				/* cutoff is taking effect, just
+				   return false without calling
+				   the delay action. */
+				return (0);
+#endif
+#ifdef BORROW_OFFTIME
+			/*
+			 * check if the class can borrow offtime too.
+			 * borrow offtime from the top of the borrow
+			 * chain if the top class is not overloaded.
+			 */
+			if (cl != NULL) {
+				/* cutoff is taking effect, use this class as top. */
+				top = cl;
+				CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
+			}
+			if (top != NULL && top->avgidle_ == top->minidle_)
+				top = NULL;
+			p->overtime_ = *now;
+			(p->overlimit)(p, top);
+#else
+			p->overtime_ = *now;
+			(p->overlimit)(p, NULL);
+#endif
+			return (0);
+		}
+		top = cl;
+	}
+
+	if (cl != p)
+		ifd->borrowed_[ifd->qi_] = cl;
+	return (1);
+}
+
+/*
+ * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
+ *	Packet-by-packet round robin.
+ *
+ * The heart of the weighted round-robin scheduler, which decides which
+ * class next gets to send a packet.  Highest priority first, then
+ * weighted round-robin within priorites.
+ *
+ * Each able-to-send class gets to send until its byte allocation is
+ * exhausted.  Thus, the active pointer is only changed after a class has
+ * exhausted its allocation.
+ *
+ * If the scheduler finds no class that is underlimit or able to borrow,
+ * then the first class found that had a nonzero queue and is allowed to
+ * borrow gets to send.
+ */
+
+static mbuf_t *
+_rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op)
+{
+	struct rm_class	*cl = NULL, *first = NULL;
+	u_int		 deficit;
+	int		 cpri;
+	mbuf_t		*m;
+	struct timeval	 now;
+
+	RM_GETTIME(now);
+
+	/*
+	 * if the driver polls the top of the queue and then removes
+	 * the polled packet, we must return the same packet.
+	 */
+	if (op == ALTDQ_REMOVE && ifd->pollcache_) {
+		cl = ifd->pollcache_;
+		cpri = cl->pri_;
+		if (ifd->efficient_) {
+			/* check if this class is overlimit */
+			if (cl->undertime_.tv_sec != 0 &&
+			    rmc_under_limit(cl, &now) == 0)
+				first = cl;
+		}
+		ifd->pollcache_ = NULL;
+		goto _wrr_out;
+	}
+	else {
+		/* mode == ALTDQ_POLL || pollcache == NULL */
+		ifd->pollcache_ = NULL;
+		ifd->borrowed_[ifd->qi_] = NULL;
+	}
+#ifdef ADJUST_CUTOFF
+ _again:
+#endif
+	for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
+		if (ifd->na_[cpri] == 0)
+			continue;
+		deficit = 0;
+		/*
+		 * Loop through twice for a priority level, if some class
+		 * was unable to send a packet the first round because
+		 * of the weighted round-robin mechanism.
+		 * During the second loop at this level, deficit==2.
+		 * (This second loop is not needed if for every class,
+		 * "M[cl->pri_])" times "cl->allotment" is greater than
+		 * the byte size for the largest packet in the class.)
+		 */
+ _wrr_loop:
+		cl = ifd->active_[cpri];
+		ASSERT(cl != NULL);
+		do {
+			if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
+				cl->bytes_alloc_ += cl->w_allotment_;
+			if (!qempty(cl->q_)) {
+				if ((cl->undertime_.tv_sec == 0) ||
+				    rmc_under_limit(cl, &now)) {
+					if (cl->bytes_alloc_ > 0 || deficit > 1)
+						goto _wrr_out;
+
+					/* underlimit but no alloc */
+					deficit = 1;
+#if 1
+					ifd->borrowed_[ifd->qi_] = NULL;
+#endif
+				}
+				else if (first == NULL && cl->borrow_ != NULL)
+					first = cl; /* borrowing candidate */
+			}
+
+			cl->bytes_alloc_ = 0;
+			cl = cl->peer_;
+		} while (cl != ifd->active_[cpri]);
+
+		if (deficit == 1) {
+			/* first loop found an underlimit class with deficit */
+			/* Loop on same priority level, with new deficit.  */
+			deficit = 2;
+			goto _wrr_loop;
+		}
+	}
+
+#ifdef ADJUST_CUTOFF
+	/*
+	 * no underlimit class found.  if cutoff is taking effect,
+	 * increase cutoff and try again.
+	 */
+	if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
+		ifd->cutoff_++;
+		CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
+		goto _again;
+	}
+#endif /* ADJUST_CUTOFF */
+	/*
+	 * If LINK_EFFICIENCY is turned on, then the first overlimit
+	 * class we encounter will send a packet if all the classes
+	 * of the link-sharing structure are overlimit.
+	 */
+	reset_cutoff(ifd);
+	CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);
+
+	if (!ifd->efficient_ || first == NULL)
+		return (NULL);
+
+	cl = first;
+	cpri = cl->pri_;
+#if 0	/* too time-consuming for nothing */
+	if (cl->sleeping_)
+		CALLOUT_STOP(&cl->callout_);
+	cl->sleeping_ = 0;
+	cl->undertime_.tv_sec = 0;
+#endif
+	ifd->borrowed_[ifd->qi_] = cl->borrow_;
+	ifd->cutoff_ = cl->borrow_->depth_;
+
+	/*
+	 * Deque the packet and do the book keeping...
+	 */
+ _wrr_out:
+	if (op == ALTDQ_REMOVE) {
+		m = _rmc_getq(cl);
+		if (m == NULL)
+			panic("_rmc_wrr_dequeue_next");
+		if (qempty(cl->q_))
+			ifd->na_[cpri]--;
+
+		/*
+		 * Update class statistics and link data.
+		 */
+		if (cl->bytes_alloc_ > 0)
+			cl->bytes_alloc_ -= m_pktlen(m);
+
+		if ((cl->bytes_alloc_ <= 0) || first == cl)
+			ifd->active_[cl->pri_] = cl->peer_;
+		else
+			ifd->active_[cl->pri_] = cl;
+
+		ifd->class_[ifd->qi_] = cl;
+		ifd->curlen_[ifd->qi_] = m_pktlen(m);
+		ifd->now_[ifd->qi_] = now;
+		ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
+		ifd->queued_++;
+	} else {
+		/* mode == ALTDQ_PPOLL */
+		m = _rmc_pollq(cl);
+		ifd->pollcache_ = cl;
+	}
+	return (m);
+}
+
+/*
+ * Dequeue & return next packet from the highest priority class that
+ * has a packet to send & has enough allocation to send it.  This
+ * routine is called by a driver whenever it needs a new packet to
+ * output.
+ */
+static mbuf_t *
+_rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op)
+{
+	mbuf_t		*m;
+	int		 cpri;
+	struct rm_class	*cl, *first = NULL;
+	struct timeval	 now;
+
+	RM_GETTIME(now);
+
+	/*
+	 * if the driver polls the top of the queue and then removes
+	 * the polled packet, we must return the same packet.
+	 */
+	if (op == ALTDQ_REMOVE && ifd->pollcache_) {
+		cl = ifd->pollcache_;
+		cpri = cl->pri_;
+		ifd->pollcache_ = NULL;
+		goto _prr_out;
+	} else {
+		/* mode == ALTDQ_POLL || pollcache == NULL */
+		ifd->pollcache_ = NULL;
+		ifd->borrowed_[ifd->qi_] = NULL;
+	}
+#ifdef ADJUST_CUTOFF
+ _again:
+#endif
+	for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
+		if (ifd->na_[cpri] == 0)
+			continue;
+		cl = ifd->active_[cpri];
+		ASSERT(cl != NULL);
+		do {
+			if (!qempty(cl->q_)) {
+				if ((cl->undertime_.tv_sec == 0) ||
+				    rmc_under_limit(cl, &now))
+					goto _prr_out;
+				if (first == NULL && cl->borrow_ != NULL)
+					first = cl;
+			}
+			cl = cl->peer_;
+		} while (cl != ifd->active_[cpri]);
+	}
+
+#ifdef ADJUST_CUTOFF
+	/*
+	 * no underlimit class found.  if cutoff is taking effect, increase
+	 * cutoff and try again.
+	 */
+	if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
+		ifd->cutoff_++;
+		goto _again;
+	}
+#endif /* ADJUST_CUTOFF */
+	/*
+	 * If LINK_EFFICIENCY is turned on, then the first overlimit
+	 * class we encounter will send a packet if all the classes
+	 * of the link-sharing structure are overlimit.
+	 */
+	reset_cutoff(ifd);
+	if (!ifd->efficient_ || first == NULL)
+		return (NULL);
+
+	cl = first;
+	cpri = cl->pri_;
+#if 0	/* too time-consuming for nothing */
+	if (cl->sleeping_)
+		CALLOUT_STOP(&cl->callout_);
+	cl->sleeping_ = 0;
+	cl->undertime_.tv_sec = 0;
+#endif
+	ifd->borrowed_[ifd->qi_] = cl->borrow_;
+	ifd->cutoff_ = cl->borrow_->depth_;
+
+	/*
+	 * Deque the packet and do the book keeping...
+	 */
+ _prr_out:
+	if (op == ALTDQ_REMOVE) {
+		m = _rmc_getq(cl);
+		if (m == NULL)
+			panic("_rmc_prr_dequeue_next");
+		if (qempty(cl->q_))
+			ifd->na_[cpri]--;
+
+		ifd->active_[cpri] = cl->peer_;
+
+		ifd->class_[ifd->qi_] = cl;
+		ifd->curlen_[ifd->qi_] = m_pktlen(m);
+		ifd->now_[ifd->qi_] = now;
+		ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
+		ifd->queued_++;
+	} else {
+		/* mode == ALTDQ_POLL */
+		m = _rmc_pollq(cl);
+		ifd->pollcache_ = cl;
+	}
+	return (m);
+}
+
+/*
+ * mbuf_t *
+ * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
+ *	is invoked by the packet driver to get the next packet to be
+ *	dequeued and output on the link.  If WRR is enabled, then the
+ *	WRR dequeue next routine will determine the next packet to sent.
+ *	Otherwise, packet-by-packet round robin is invoked.
+ *
+ *	Returns:	NULL, if a packet is not available or if all
+ *			classes are overlimit.
+ *
+ *			Otherwise, Pointer to the next packet.
+ */
+
+mbuf_t *
+rmc_dequeue_next(struct rm_ifdat *ifd, int mode)
+{
+	if (ifd->queued_ >= ifd->maxqueued_)
+		return (NULL);
+	else if (ifd->wrr_)
+		return (_rmc_wrr_dequeue_next(ifd, mode));
+	else
+		return (_rmc_prr_dequeue_next(ifd, mode));
+}
+
+/*
+ * Update the utilization estimate for the packet that just completed.
+ * The packet's class & the parent(s) of that class all get their
+ * estimators updated.  This routine is called by the driver's output-
+ * packet-completion interrupt service routine.
+ */
+
+/*
+ * a macro to approximate "divide by 1000" that gives 0.000999,
+ * if a value has enough effective digits.
+ * (on pentium, mul takes 9 cycles but div takes 46!)
+ */
+#define	NSEC_TO_USEC(t)	(((t) >> 10) + ((t) >> 16) + ((t) >> 17))
+void
+rmc_update_class_util(struct rm_ifdat *ifd)
+{
+	int		 idle, avgidle, pktlen;
+	int		 pkt_time, tidle;
+	rm_class_t	*cl, *borrowed;
+	rm_class_t	*borrows;
+	struct timeval	*nowp;
+
+	/*
+	 * Get the most recent completed class.
+	 */
+	if ((cl = ifd->class_[ifd->qo_]) == NULL)
+		return;
+
+	pktlen = ifd->curlen_[ifd->qo_];
+	borrowed = ifd->borrowed_[ifd->qo_];
+	borrows = borrowed;
+
+	PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
+
+	/*
+	 * Run estimator on class and its ancestors.
+	 */
+	/*
+	 * rm_update_class_util is designed to be called when the
+	 * transfer is completed from a xmit complete interrupt,
+	 * but most drivers don't implement an upcall for that.
+	 * so, just use estimated completion time.
+	 * as a result, ifd->qi_ and ifd->qo_ are always synced.
+	 */
+	nowp = &ifd->now_[ifd->qo_];
+	/* get pkt_time (for link) in usec */
+#if 1  /* use approximation */
+	pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;
+	pkt_time = NSEC_TO_USEC(pkt_time);
+#else
+	pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
+#endif
+#if 1 /* ALTQ4PPP */
+	if (TV_LT(nowp, &ifd->ifnow_)) {
+		int iftime;
+
+		/*
+		 * make sure the estimated completion time does not go
+		 * too far.  it can happen when the link layer supports
+		 * data compression or the interface speed is set to
+		 * a much lower value.
+		 */
+		TV_DELTA(&ifd->ifnow_, nowp, iftime);
+		if (iftime+pkt_time < ifd->maxiftime_) {
+			TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
+		} else {
+			TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
+		}
+	} else {
+		TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
+	}
+#else
+	if (TV_LT(nowp, &ifd->ifnow_)) {
+		TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
+	} else {
+		TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
+	}
+#endif
+
+	while (cl != NULL) {
+		TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
+		if (idle >= 2000000)
+			/*
+			 * this class is idle enough, reset avgidle.
+			 * (TV_DELTA returns 2000000 us when delta is large.)
+			 */
+			cl->avgidle_ = cl->maxidle_;
+
+		/* get pkt_time (for class) in usec */
+#if 1  /* use approximation */
+		pkt_time = pktlen * cl->ns_per_byte_;
+		pkt_time = NSEC_TO_USEC(pkt_time);
+#else
+		pkt_time = pktlen * cl->ns_per_byte_ / 1000;
+#endif
+		idle -= pkt_time;
+
+		avgidle = cl->avgidle_;
+		avgidle += idle - (avgidle >> RM_FILTER_GAIN);
+		cl->avgidle_ = avgidle;
+
+		/* Are we overlimit ? */
+		if (avgidle <= 0) {
+			CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
+#if 1 /* ALTQ */
+			/*
+			 * need some lower bound for avgidle, otherwise
+			 * a borrowing class gets unbounded penalty.
+			 */
+			if (avgidle < cl->minidle_)
+				avgidle = cl->avgidle_ = cl->minidle_;
+#endif
+			/* set next idle to make avgidle 0 */
+			tidle = pkt_time +
+				(((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
+			TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
+			++cl->stats_.over;
+		} else {
+			cl->avgidle_ =
+			    (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
+			cl->undertime_.tv_sec = 0;
+			if (cl->sleeping_) {
+				CALLOUT_STOP(&cl->callout_);
+				cl->sleeping_ = 0;
+			}
+		}
+
+		if (borrows != NULL) {
+			if (borrows != cl)
+				++cl->stats_.borrows;
+			else
+				borrows = NULL;
+		}
+		cl->last_ = ifd->ifnow_;
+		cl->last_pkttime_ = pkt_time;
+
+#if 1
+		if (cl->parent_ == NULL) {
+			/* take stats of root class */
+			PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
+		}
+#endif
+
+		cl = cl->parent_;
+	}
+
+	/*
+	 * Check to see if cutoff needs to set to a new level.
+	 */
+	cl = ifd->class_[ifd->qo_];
+	if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
+#if 1 /* ALTQ */
+		if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {
+			rmc_tl_satisfied(ifd, nowp);
+			CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
+		} else {
+			ifd->cutoff_ = borrowed->depth_;
+			CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
+		}
+#else /* !ALTQ */
+		if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {
+			reset_cutoff(ifd);
+#ifdef notdef
+			rmc_tl_satisfied(ifd, &now);
+#endif
+			CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
+		} else {
+			ifd->cutoff_ = borrowed->depth_;
+			CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
+		}
+#endif /* !ALTQ */
+	}
+
+	/*
+	 * Release class slot
+	 */
+	ifd->borrowed_[ifd->qo_] = NULL;
+	ifd->class_[ifd->qo_] = NULL;
+	ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
+	ifd->queued_--;
+}
+
+/*
+ * void
+ * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
+ *	over-limit action routines.  These get invoked by rmc_under_limit()
+ *	if a class with packets to send if over its bandwidth limit & can't
+ *	borrow from a parent class.
+ *
+ *	Returns: NONE
+ */
+
+static void
+rmc_drop_action(struct rm_class *cl)
+{
+	struct rm_ifdat	*ifd = cl->ifdat_;
+
+	ASSERT(qlen(cl->q_) > 0);
+	_rmc_dropq(cl);
+	if (qempty(cl->q_))
+		ifd->na_[cl->pri_]--;
+}
+
+void rmc_dropall(struct rm_class *cl)
+{
+	struct rm_ifdat	*ifd = cl->ifdat_;
+
+	if (!qempty(cl->q_)) {
+		_flushq(cl->q_);
+
+		ifd->na_[cl->pri_]--;
+	}
+}
+
+#if (__FreeBSD_version > 300000)
+/* hzto() is removed from FreeBSD-3.0 */
+static int hzto(struct timeval *);
+
+static int
+hzto(tv)
+	struct timeval *tv;
+{
+	struct timeval t2;
+
+	getmicrotime(&t2);
+	t2.tv_sec = tv->tv_sec - t2.tv_sec;
+	t2.tv_usec = tv->tv_usec - t2.tv_usec;
+	return (tvtohz(&t2));
+}
+#endif /* __FreeBSD_version > 300000 */
+
+/*
+ * void
+ * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
+ *	delay action routine.  It is invoked via rmc_under_limit when the
+ *	packet is discoverd to be overlimit.
+ *
+ *	If the delay action is result of borrow class being overlimit, then
+ *	delay for the offtime of the borrowing class that is overlimit.
+ *
+ *	Returns: NONE
+ */
+
+void
+rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
+{
+	int	delay, t, extradelay;
+
+	cl->stats_.overactions++;
+	TV_DELTA(&cl->undertime_, &cl->overtime_, delay);
+#ifndef BORROW_OFFTIME
+	delay += cl->offtime_;
+#endif
+
+	if (!cl->sleeping_) {
+		CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
+#ifdef BORROW_OFFTIME
+		if (borrow != NULL)
+			extradelay = borrow->offtime_;
+		else
+#endif
+			extradelay = cl->offtime_;
+
+#ifdef ALTQ
+		/*
+		 * XXX recalculate suspend time:
+		 * current undertime is (tidle + pkt_time) calculated
+		 * from the last transmission.
+		 *	tidle: time required to bring avgidle back to 0
+		 *	pkt_time: target waiting time for this class
+		 * we need to replace pkt_time by offtime
+		 */
+		extradelay -= cl->last_pkttime_;
+#endif
+		if (extradelay > 0) {
+			TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
+			delay += extradelay;
+		}
+
+		cl->sleeping_ = 1;
+		cl->stats_.delays++;
+
+		/*
+		 * Since packets are phased randomly with respect to the
+		 * clock, 1 tick (the next clock tick) can be an arbitrarily
+		 * short time so we have to wait for at least two ticks.
+		 * NOTE:  If there's no other traffic, we need the timer as
+		 * a 'backstop' to restart this class.
+		 */
+		if (delay > tick * 2) {
+			/* FreeBSD rounds up the tick */
+			t = hzto(&cl->undertime_);
+		} else
+			t = 2;
+		CALLOUT_RESET(&cl->callout_, t,
+			      (timeout_t *)rmc_restart, (caddr_t)cl);
+	}
+}
+
+/*
+ * void
+ * rmc_restart() - is just a helper routine for rmc_delay_action -- it is
+ *	called by the system timer code & is responsible checking if the
+ *	class is still sleeping (it might have been restarted as a side
+ *	effect of the queue scan on a packet arrival) and, if so, restarting
+ *	output for the class.  Inspecting the class state & restarting output
+ *	require locking the class structure.  In general the driver is
+ *	responsible for locking but this is the only routine that is not
+ *	called directly or indirectly from the interface driver so it has
+ *	know about system locking conventions.  Under bsd, locking is done
+ *	by raising IPL to splimp so that's what's implemented here.  On a
+ *	different system this would probably need to be changed.
+ *
+ *	Returns:	NONE
+ */
+
+static void
+rmc_restart(struct rm_class *cl)
+{
+	struct rm_ifdat	*ifd = cl->ifdat_;
+	int		 s;
+
+	s = splnet();
+	IFQ_LOCK(ifd->ifq_);
+	if (cl->sleeping_) {
+		cl->sleeping_ = 0;
+		cl->undertime_.tv_sec = 0;
+
+		if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
+			CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
+			(ifd->restart)(ifd->ifq_);
+		}
+	}
+	IFQ_UNLOCK(ifd->ifq_);
+	splx(s);
+}
+
+/*
+ * void
+ * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
+ *	handling routine for the root class of the link sharing structure.
+ *
+ *	Returns: NONE
+ */
+
+static void
+rmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow)
+{
+    panic("rmc_root_overlimit");
+}
+
+/*
+ * Packet Queue handling routines.  Eventually, this is to localize the
+ *	effects on the code whether queues are red queues or droptail
+ *	queues.
+ */
+
+static int
+_rmc_addq(rm_class_t *cl, mbuf_t *m)
+{
+#ifdef ALTQ_RIO
+	if (q_is_rio(cl->q_))
+		return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
+#endif
+#ifdef ALTQ_RED
+	if (q_is_red(cl->q_))
+		return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
+#endif /* ALTQ_RED */
+#ifdef ALTQ_CODEL
+	if (q_is_codel(cl->q_))
+		return codel_addq(cl->codel_, cl->q_, m);
+#endif
+
+	if (cl->flags_ & RMCF_CLEARDSCP)
+		write_dsfield(m, cl->pktattr_, 0);
+
+	_addq(cl->q_, m);
+	return (0);
+}
+
+/* note: _rmc_dropq is not called for red */
+static void
+_rmc_dropq(rm_class_t *cl)
+{
+	mbuf_t	*m;
+
+	if ((m = _getq(cl->q_)) != NULL)
+		m_freem(m);
+}
+
+static mbuf_t *
+_rmc_getq(rm_class_t *cl)
+{
+#ifdef ALTQ_RIO
+	if (q_is_rio(cl->q_))
+		return rio_getq((rio_t *)cl->red_, cl->q_);
+#endif
+#ifdef ALTQ_RED
+	if (q_is_red(cl->q_))
+		return red_getq(cl->red_, cl->q_);
+#endif
+#ifdef ALTQ_CODEL
+	if (q_is_codel(cl->q_))
+		return codel_getq(cl->codel_, cl->q_);
+#endif
+	return _getq(cl->q_);
+}
+
+static mbuf_t *
+_rmc_pollq(rm_class_t *cl)
+{
+	return qhead(cl->q_);
+}
+
+#ifdef CBQ_TRACE
+
+struct cbqtrace		 cbqtrace_buffer[NCBQTRACE+1];
+struct cbqtrace		*cbqtrace_ptr = NULL;
+int			 cbqtrace_count;
+
+/*
+ * DDB hook to trace cbq events:
+ *  the last 1024 events are held in a circular buffer.
+ *  use "call cbqtrace_dump(N)" to display 20 events from Nth event.
+ */
+void cbqtrace_dump(int);
+static char *rmc_funcname(void *);
+
+static struct rmc_funcs {
+	void	*func;
+	char	*name;
+} rmc_funcs[] =
+{
+	rmc_init,		"rmc_init",
+	rmc_queue_packet,	"rmc_queue_packet",
+	rmc_under_limit,	"rmc_under_limit",
+	rmc_update_class_util,	"rmc_update_class_util",
+	rmc_delay_action,	"rmc_delay_action",
+	rmc_restart,		"rmc_restart",
+	_rmc_wrr_dequeue_next,	"_rmc_wrr_dequeue_next",
+	NULL,			NULL
+};
+
+static char *rmc_funcname(void *func)
+{
+	struct rmc_funcs *fp;
+
+	for (fp = rmc_funcs; fp->func != NULL; fp++)
+		if (fp->func == func)
+			return (fp->name);
+	return ("unknown");
+}
+
+void cbqtrace_dump(int counter)
+{
+	int	 i, *p;
+	char	*cp;
+
+	counter = counter % NCBQTRACE;
+	p = (int *)&cbqtrace_buffer[counter];
+
+	for (i=0; i<20; i++) {
+		printf("[0x%x] ", *p++);
+		printf("%s: ", rmc_funcname((void *)*p++));
+		cp = (char *)p++;
+		printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
+		printf("%d\n",*p++);
+
+		if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
+			p = (int *)cbqtrace_buffer;
+	}
+}
+#endif /* CBQ_TRACE */
+#endif /* ALTQ_CBQ */
+
+#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || \
+    defined(ALTQ_HFSC) || defined(ALTQ_PRIQ) || defined(ALTQ_CODEL)
+#if !defined(__GNUC__) || defined(ALTQ_DEBUG)
+
+void
+_addq(class_queue_t *q, mbuf_t *m)
+{
+        mbuf_t	*m0;
+
+	if ((m0 = qtail(q)) != NULL)
+		m->m_nextpkt = m0->m_nextpkt;
+	else
+		m0 = m;
+	m0->m_nextpkt = m;
+	qtail(q) = m;
+	qlen(q)++;
+}
+
+mbuf_t *
+_getq(class_queue_t *q)
+{
+	mbuf_t	*m, *m0;
+
+	if ((m = qtail(q)) == NULL)
+		return (NULL);
+	if ((m0 = m->m_nextpkt) != m)
+		m->m_nextpkt = m0->m_nextpkt;
+	else {
+		ASSERT(qlen(q) == 1);
+		qtail(q) = NULL;
+	}
+	qlen(q)--;
+	m0->m_nextpkt = NULL;
+	return (m0);
+}
+
+/* drop a packet at the tail of the queue */
+mbuf_t *
+_getq_tail(class_queue_t *q)
+{
+	mbuf_t	*m, *m0, *prev;
+
+	if ((m = m0 = qtail(q)) == NULL)
+		return NULL;
+	do {
+		prev = m0;
+		m0 = m0->m_nextpkt;
+	} while (m0 != m);
+	prev->m_nextpkt = m->m_nextpkt;
+	if (prev == m)  {
+		ASSERT(qlen(q) == 1);
+		qtail(q) = NULL;
+	} else
+		qtail(q) = prev;
+	qlen(q)--;
+	m->m_nextpkt = NULL;
+	return (m);
+}
+
+/* randomly select a packet in the queue */
+mbuf_t *
+_getq_random(class_queue_t *q)
+{
+	struct mbuf	*m;
+	int		 i, n;
+
+	if ((m = qtail(q)) == NULL)
+		return NULL;
+	if (m->m_nextpkt == m) {
+		ASSERT(qlen(q) == 1);
+		qtail(q) = NULL;
+	} else {
+		struct mbuf *prev = NULL;
+
+		n = arc4random() % qlen(q) + 1;
+		for (i = 0; i < n; i++) {
+			prev = m;
+			m = m->m_nextpkt;
+		}
+		prev->m_nextpkt = m->m_nextpkt;
+		if (m == qtail(q))
+			qtail(q) = prev;
+	}
+	qlen(q)--;
+	m->m_nextpkt = NULL;
+	return (m);
+}
+
+void
+_removeq(class_queue_t *q, mbuf_t *m)
+{
+	mbuf_t	*m0, *prev;
+
+	m0 = qtail(q);
+	do {
+		prev = m0;
+		m0 = m0->m_nextpkt;
+	} while (m0 != m);
+	prev->m_nextpkt = m->m_nextpkt;
+	if (prev == m)
+		qtail(q) = NULL;
+	else if (qtail(q) == m)
+		qtail(q) = prev;
+	qlen(q)--;
+}
+
+void
+_flushq(class_queue_t *q)
+{
+	mbuf_t *m;
+
+	while ((m = _getq(q)) != NULL)
+		m_freem(m);
+	ASSERT(qlen(q) == 0);
+}
+
+#endif /* !__GNUC__ || ALTQ_DEBUG */
+#endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */
diff --git a/freebsd/sys/net/altq/altq_rmclass.h b/freebsd/sys/net/altq/altq_rmclass.h
new file mode 100644
index 00000000..6130c4ff
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_rmclass.h
@@ -0,0 +1,273 @@
+/*-
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the Network Research
+ *	Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ *    to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_rmclass.h,v 1.10 2003/08/20 23:30:23 itojun Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_RMCLASS_H_
+#define	_ALTQ_ALTQ_RMCLASS_H_
+
+#include <net/altq/altq_classq.h>
+
+/* #pragma ident "@(#)rm_class.h  1.20     97/10/23 SMI" */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define	RM_MAXPRIO	8	/* Max priority */
+
+#ifdef _KERNEL
+
+typedef struct mbuf		mbuf_t;
+typedef struct rm_ifdat		rm_ifdat_t;
+typedef struct rm_class		rm_class_t;
+
+struct red;
+
+/*
+ * Macros for dealing with time values.  We assume all times are
+ * 'timevals'.  `microtime' is used to get the best available clock
+ * resolution.  If `microtime' *doesn't* return a value that's about
+ * ten times smaller than the average packet time on the fastest
+ * link that will use these routines, a slightly different clock
+ * scheme than this one should be used.
+ * (Bias due to truncation error in this scheme will overestimate utilization
+ * and discriminate against high bandwidth classes.  To remove this bias an
+ * integrator needs to be added.  The simplest integrator uses a history of
+ * 10 * avg.packet.time / min.tick.time packet completion entries.  This is
+ * straight forward to add but we don't want to pay the extra memory
+ * traffic to maintain it if it's not necessary (occasionally a vendor
+ * accidentally builds a workstation with a decent clock - e.g., Sun & HP).)
+ */
+
+#define	RM_GETTIME(now) microtime(&now)
+
+#define	TV_LT(a, b) (((a)->tv_sec < (b)->tv_sec) ||  \
+	(((a)->tv_usec < (b)->tv_usec) && ((a)->tv_sec <= (b)->tv_sec)))
+
+#define	TV_DELTA(a, b, delta) { \
+	register int	xxs;	\
+							\
+	delta = (a)->tv_usec - (b)->tv_usec; \
+	if ((xxs = (a)->tv_sec - (b)->tv_sec)) { \
+		switch (xxs) { \
+		default: \
+			/* if (xxs < 0) \
+				printf("rm_class: bogus time values\n"); */ \
+			delta = 0; \
+			/* fall through */ \
+		case 2: \
+			delta += 1000000; \
+			/* fall through */ \
+		case 1: \
+			delta += 1000000; \
+			break; \
+		} \
+	} \
+}
+
+#define	TV_ADD_DELTA(a, delta, res) { \
+	register int xxus = (a)->tv_usec + (delta); \
+	\
+	(res)->tv_sec = (a)->tv_sec; \
+	while (xxus >= 1000000) { \
+		++((res)->tv_sec); \
+		xxus -= 1000000; \
+	} \
+	(res)->tv_usec = xxus; \
+}
+
+#define	RM_TIMEOUT	2	/* 1 Clock tick. */
+
+#if 1
+#define	RM_MAXQUEUED	1	/* this isn't used in ALTQ/CBQ */
+#else
+#define	RM_MAXQUEUED	16	/* Max number of packets downstream of CBQ */
+#endif
+#define	RM_MAXQUEUE	64	/* Max queue length */
+#define	RM_FILTER_GAIN	5	/* log2 of gain, e.g., 5 => 31/32 */
+#define	RM_POWER	(1 << RM_FILTER_GAIN)
+#define	RM_MAXDEPTH	32
+#define	RM_NS_PER_SEC	(1000000000)
+
+typedef struct _rm_class_stats_ {
+	u_int		handle;
+	u_int		depth;
+
+	struct pktcntr	xmit_cnt;	/* packets sent in this class */
+	struct pktcntr	drop_cnt;	/* dropped packets */
+	u_int		over;		/* # times went over limit */
+	u_int		borrows;	/* # times tried to borrow */
+	u_int		overactions;	/* # times invoked overlimit action */
+	u_int		delays;		/* # times invoked delay actions */
+} rm_class_stats_t;
+
+/*
+ * CBQ Class state structure
+ */
+struct rm_class {
+	class_queue_t	*q_;		/* Queue of packets */
+	rm_ifdat_t	*ifdat_;
+	int		pri_;		/* Class priority. */
+	int		depth_;		/* Class depth */
+	u_int		ns_per_byte_;	/* NanoSeconds per byte. */
+	u_int		maxrate_;	/* Bytes per second for this class. */
+	u_int		allotment_;	/* Fraction of link bandwidth. */
+	u_int		w_allotment_;	/* Weighted allotment for WRR */
+	int		bytes_alloc_;	/* Allocation for round of WRR */
+
+	int		avgidle_;
+	int		maxidle_;
+	int		minidle_;
+	int		offtime_;
+	int		sleeping_;	/* != 0 if delaying */
+	int		qthresh_;	/* Queue threshold for formal link sharing */
+	int		leaf_;		/* Note whether leaf class or not.*/
+
+	rm_class_t	*children_;	/* Children of this class */
+	rm_class_t	*next_;		/* Next pointer, used if child */
+
+	rm_class_t	*peer_;		/* Peer class */
+	rm_class_t	*borrow_;	/* Borrow class */
+	rm_class_t	*parent_;	/* Parent class */
+
+	void	(*overlimit)(struct rm_class *, struct rm_class *);
+	void	(*drop)(struct rm_class *);       /* Class drop action. */
+
+	union {
+		struct red	*red_;		/* RED state pointer */
+		struct codel	*codel_;	/* codel state pointer */
+	} cl_aqm_;
+#define	red_		cl_aqm_.red_
+#define	codel_		cl_aqm_.codel_
+	struct altq_pktattr *pktattr_;	/* saved hdr used by RED/ECN */
+	int		flags_;
+
+	int		last_pkttime_;	/* saved pkt_time */
+	struct timeval	undertime_;	/* time can next send */
+	struct timeval	last_;		/* time last packet sent */
+	struct timeval	overtime_;
+	struct callout	callout_; 	/* for timeout() calls */
+
+	rm_class_stats_t stats_;	/* Class Statistics */
+};
+
+/*
+ * CBQ Interface state
+ */
+struct rm_ifdat {
+	int		queued_;	/* # pkts queued downstream */
+	int		efficient_;	/* Link Efficiency bit */
+	int		wrr_;		/* Enable Weighted Round-Robin */
+	u_long		ns_per_byte_;	/* Link byte speed. */
+	int		maxqueued_;	/* Max packets to queue */
+	int		maxpkt_;	/* Max packet size. */
+	int		qi_;		/* In/out pointers for downstream */
+	int		qo_;		/* packets */
+
+	/*
+	 * Active class state and WRR state.
+	 */
+	rm_class_t	*active_[RM_MAXPRIO];	/* Active cl's in each pri */
+	int		na_[RM_MAXPRIO];	/* # of active cl's in a pri */
+	int		num_[RM_MAXPRIO];	/* # of cl's per pri */
+	int		alloc_[RM_MAXPRIO];	/* Byte Allocation */
+	u_long		M_[RM_MAXPRIO];		/* WRR weights. */
+
+	/*
+	 * Network Interface/Solaris Queue state pointer.
+	 */
+	struct ifaltq	*ifq_;
+	rm_class_t	*default_;	/* Default Pkt class, BE */
+	rm_class_t	*root_;		/* Root Link class. */
+	rm_class_t	*ctl_;		/* Control Traffic class. */
+	void		(*restart)(struct ifaltq *);	/* Restart routine. */
+
+	/*
+	 * Current packet downstream packet state and dynamic state.
+	 */
+	rm_class_t	*borrowed_[RM_MAXQUEUED]; /* Class borrowed last */
+	rm_class_t	*class_[RM_MAXQUEUED];	/* class sending */
+	int		curlen_[RM_MAXQUEUED];	/* Current pktlen */
+	struct timeval	now_[RM_MAXQUEUED];	/* Current packet time. */
+	int		is_overlimit_[RM_MAXQUEUED];/* Current packet time. */
+
+	int		cutoff_;	/* Cut-off depth for borrowing */
+
+	struct timeval	ifnow_;		/* expected xmit completion time */
+#if 1 /* ALTQ4PPP */
+	int		maxiftime_;	/* max delay inside interface */
+#endif
+        rm_class_t	*pollcache_;	/* cached rm_class by poll operation */
+};
+
+/* flags for rmc_init and rmc_newclass */
+/* class flags */
+#define	RMCF_RED		0x0001
+#define	RMCF_ECN		0x0002
+#define	RMCF_RIO		0x0004
+#define	RMCF_FLOWVALVE		0x0008	/* use flowvalve (aka penalty-box) */
+#define	RMCF_CLEARDSCP		0x0010  /* clear diffserv codepoint */
+#define	RMCF_CODEL		0x0020
+
+/* flags for rmc_init */
+#define	RMCF_WRR		0x0100
+#define	RMCF_EFFICIENT		0x0200
+
+#define	is_a_parent_class(cl)	((cl)->children_ != NULL)
+
+extern rm_class_t *rmc_newclass(int, struct rm_ifdat *, u_int,
+				void (*)(struct rm_class *, struct rm_class *),
+				int, struct rm_class *, struct rm_class *,
+				u_int, int, u_int, int, int);
+extern void	rmc_delete_class(struct rm_ifdat *, struct rm_class *);
+extern int 	rmc_modclass(struct rm_class *, u_int, int,
+			     u_int, int, u_int, int);
+extern void	rmc_init(struct ifaltq *, struct rm_ifdat *, u_int,
+			 void (*)(struct ifaltq *),
+			 int, int, u_int, int, u_int, int);
+extern int	rmc_queue_packet(struct rm_class *, mbuf_t *);
+extern mbuf_t	*rmc_dequeue_next(struct rm_ifdat *, int);
+extern void	rmc_update_class_util(struct rm_ifdat *);
+extern void	rmc_delay_action(struct rm_class *, struct rm_class *);
+extern void	rmc_dropall(struct rm_class *);
+extern int	rmc_get_weight(struct rm_ifdat *, int);
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_RMCLASS_H_ */
diff --git a/freebsd/sys/net/altq/altq_rmclass_debug.h b/freebsd/sys/net/altq/altq_rmclass_debug.h
new file mode 100644
index 00000000..7adbaec4
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_rmclass_debug.h
@@ -0,0 +1,113 @@
+/*-
+ * Copyright (c) Sun Microsystems, Inc. 1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the SMCC Technology
+ *      Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ *      promote products derived from this software without specific prior
+ *      written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE.  The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ *
+ * $KAME: altq_rmclass_debug.h,v 1.3 2002/11/29 04:36:24 kjc Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_RMCLASS_DEBUG_H_
+#define	_ALTQ_ALTQ_RMCLASS_DEBUG_H_
+
+/* #pragma ident	"@(#)rm_class_debug.h	1.7	98/05/04 SMI" */
+
+/*
+ * Cbq debugging macros
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef	CBQ_TRACE
+#ifndef NCBQTRACE
+#define	NCBQTRACE (16 * 1024)
+#endif
+
+/*
+ * To view the trace output, using adb, type:
+ *	adb -k /dev/ksyms /dev/mem <cr>, then type
+ *	cbqtrace_count/D to get the count, then type
+ *	cbqtrace_buffer,0tcount/Dp4C" "Xn
+ *	This will dump the trace buffer from 0 to count.
+ */
+/*
+ * in ALTQ, "call cbqtrace_dump(N)" from DDB to display 20 events
+ * from Nth event in the circular buffer.
+ */
+
+struct cbqtrace {
+	int count;
+	int function;		/* address of function */
+	int trace_action;	/* descriptive 4 characters */
+	int object;		/* object operated on */
+};
+
+extern struct cbqtrace cbqtrace_buffer[];
+extern struct cbqtrace *cbqtrace_ptr;
+extern int cbqtrace_count;
+
+#define	CBQTRACEINIT() {				\
+	if (cbqtrace_ptr == NULL)		\
+		cbqtrace_ptr = cbqtrace_buffer; \
+	else { \
+		cbqtrace_ptr = cbqtrace_buffer; \
+		bzero((void *)cbqtrace_ptr, sizeof(cbqtrace_buffer)); \
+		cbqtrace_count = 0; \
+	} \
+}
+
+#define	LOCK_TRACE()	splimp()
+#define	UNLOCK_TRACE(x)	splx(x)
+
+#define	CBQTRACE(func, act, obj) {		\
+	int __s = LOCK_TRACE();			\
+	int *_p = &cbqtrace_ptr->count;	\
+	*_p++ = ++cbqtrace_count;		\
+	*_p++ = (int)(func);			\
+	*_p++ = (int)(act);			\
+	*_p++ = (int)(obj);			\
+	if ((struct cbqtrace *)(void *)_p >= &cbqtrace_buffer[NCBQTRACE])\
+		cbqtrace_ptr = cbqtrace_buffer; \
+	else					\
+		cbqtrace_ptr = (struct cbqtrace *)(void *)_p; \
+	UNLOCK_TRACE(__s);			\
+	}
+#else
+
+/* If no tracing, define no-ops */
+#define	CBQTRACEINIT()
+#define	CBQTRACE(a, b, c)
+
+#endif	/* !CBQ_TRACE */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif	/* _ALTQ_ALTQ_RMCLASS_DEBUG_H_ */
diff --git a/freebsd/sys/net/altq/altq_subr.c b/freebsd/sys/net/altq/altq_subr.c
new file mode 100644
index 00000000..66ff441d
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_subr.c
@@ -0,0 +1,1978 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (C) 1997-2003
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $
+ * $FreeBSD$
+ */
+
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <net/altq/altq.h>
+#ifdef ALTQ3_COMPAT
+#include <net/altq/altq_conf.h>
+#endif
+
+/* machine dependent clock related includes */
+#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/eventhandler.h>
+#include <machine/clock.h>
+#if defined(__amd64__) || defined(__i386__)
+#include <machine/cpufunc.h>		/* for pentium tsc */
+#include <machine/specialreg.h>		/* for CPUID_TSC */
+#include <machine/md_var.h>		/* for cpu_feature */
+#endif /* __amd64 || __i386__ */
+
+/*
+ * internal function prototypes
+ */
+static void	tbr_timeout(void *);
+int (*altq_input)(struct mbuf *, int) = NULL;
+static struct mbuf *tbr_dequeue(struct ifaltq *, int);
+static int tbr_timer = 0;	/* token bucket regulator timer */
+#if !defined(__FreeBSD__) || (__FreeBSD_version < 600000)
+static struct callout tbr_callout = CALLOUT_INITIALIZER;
+#else
+static struct callout tbr_callout;
+#endif
+
+#ifdef ALTQ3_CLFIER_COMPAT
+static int 	extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *);
+#ifdef INET6
+static int 	extract_ports6(struct mbuf *, struct ip6_hdr *,
+			       struct flowinfo_in6 *);
+#endif
+static int	apply_filter4(u_int32_t, struct flow_filter *,
+			      struct flowinfo_in *);
+static int	apply_ppfilter4(u_int32_t, struct flow_filter *,
+				struct flowinfo_in *);
+#ifdef INET6
+static int	apply_filter6(u_int32_t, struct flow_filter6 *,
+			      struct flowinfo_in6 *);
+#endif
+static int	apply_tosfilter4(u_int32_t, struct flow_filter *,
+				 struct flowinfo_in *);
+static u_long	get_filt_handle(struct acc_classifier *, int);
+static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long);
+static u_int32_t filt2fibmask(struct flow_filter *);
+
+static void 	ip4f_cache(struct ip *, struct flowinfo_in *);
+static int 	ip4f_lookup(struct ip *, struct flowinfo_in *);
+static int 	ip4f_init(void);
+static struct ip4_frag	*ip4f_alloc(void);
+static void 	ip4f_free(struct ip4_frag *);
+#endif /* ALTQ3_CLFIER_COMPAT */
+
+/*
+ * alternate queueing support routines
+ */
+
+/* look up the queue state by the interface name and the queueing type. */
+void *
+altq_lookup(name, type)
+	char *name;
+	int type;
+{
+	struct ifnet *ifp;
+
+	if ((ifp = ifunit(name)) != NULL) {
+		/* read if_snd unlocked */
+		if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
+			return (ifp->if_snd.altq_disc);
+	}
+
+	return NULL;
+}
+
+int
+altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify)
+	struct ifaltq *ifq;
+	int type;
+	void *discipline;
+	int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+	struct mbuf *(*dequeue)(struct ifaltq *, int);
+	int (*request)(struct ifaltq *, int, void *);
+	void *clfier;
+	void *(*classify)(void *, struct mbuf *, int);
+{
+	IFQ_LOCK(ifq);
+	if (!ALTQ_IS_READY(ifq)) {
+		IFQ_UNLOCK(ifq);
+		return ENXIO;
+	}
+
+#ifdef ALTQ3_COMPAT
+	/*
+	 * pfaltq can override the existing discipline, but altq3 cannot.
+	 * check these if clfier is not NULL (which implies altq3).
+	 */
+	if (clfier != NULL) {
+		if (ALTQ_IS_ENABLED(ifq)) {
+			IFQ_UNLOCK(ifq);
+			return EBUSY;
+		}
+		if (ALTQ_IS_ATTACHED(ifq)) {
+			IFQ_UNLOCK(ifq);
+			return EEXIST;
+		}
+	}
+#endif
+	ifq->altq_type     = type;
+	ifq->altq_disc     = discipline;
+	ifq->altq_enqueue  = enqueue;
+	ifq->altq_dequeue  = dequeue;
+	ifq->altq_request  = request;
+	ifq->altq_clfier   = clfier;
+	ifq->altq_classify = classify;
+	ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_KLD
+	altq_module_incref(type);
+#endif
+#endif
+	IFQ_UNLOCK(ifq);
+	return 0;
+}
+
+int
+altq_detach(ifq)
+	struct ifaltq *ifq;
+{
+	IFQ_LOCK(ifq);
+
+	if (!ALTQ_IS_READY(ifq)) {
+		IFQ_UNLOCK(ifq);
+		return ENXIO;
+	}
+	if (ALTQ_IS_ENABLED(ifq)) {
+		IFQ_UNLOCK(ifq);
+		return EBUSY;
+	}
+	if (!ALTQ_IS_ATTACHED(ifq)) {
+		IFQ_UNLOCK(ifq);
+		return (0);
+	}
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_KLD
+	altq_module_declref(ifq->altq_type);
+#endif
+#endif
+
+	ifq->altq_type     = ALTQT_NONE;
+	ifq->altq_disc     = NULL;
+	ifq->altq_enqueue  = NULL;
+	ifq->altq_dequeue  = NULL;
+	ifq->altq_request  = NULL;
+	ifq->altq_clfier   = NULL;
+	ifq->altq_classify = NULL;
+	ifq->altq_flags &= ALTQF_CANTCHANGE;
+
+	IFQ_UNLOCK(ifq);
+	return 0;
+}
+
+int
+altq_enable(ifq)
+	struct ifaltq *ifq;
+{
+	int s;
+
+	IFQ_LOCK(ifq);
+
+	if (!ALTQ_IS_READY(ifq)) {
+		IFQ_UNLOCK(ifq);
+		return ENXIO;
+	}
+	if (ALTQ_IS_ENABLED(ifq)) {
+		IFQ_UNLOCK(ifq);
+		return 0;
+	}
+
+	s = splnet();
+	IFQ_PURGE_NOLOCK(ifq);
+	ASSERT(ifq->ifq_len == 0);
+	ifq->ifq_drv_maxlen = 0;		/* disable bulk dequeue */
+	ifq->altq_flags |= ALTQF_ENABLED;
+	if (ifq->altq_clfier != NULL)
+		ifq->altq_flags |= ALTQF_CLASSIFY;
+	splx(s);
+
+	IFQ_UNLOCK(ifq);
+	return 0;
+}
+
+int
+altq_disable(ifq)
+	struct ifaltq *ifq;
+{
+	int s;
+
+	IFQ_LOCK(ifq);
+	if (!ALTQ_IS_ENABLED(ifq)) {
+		IFQ_UNLOCK(ifq);
+		return 0;
+	}
+
+	s = splnet();
+	IFQ_PURGE_NOLOCK(ifq);
+	ASSERT(ifq->ifq_len == 0);
+	ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
+	splx(s);
+	
+	IFQ_UNLOCK(ifq);
+	return 0;
+}
+
+#ifdef ALTQ_DEBUG
+void
+altq_assert(file, line, failedexpr)
+	const char *file, *failedexpr;
+	int line;
+{
+	(void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
+		     failedexpr, file, line);
+	panic("altq assertion");
+	/* NOTREACHED */
+}
+#endif
+
+/*
+ * internal representation of token bucket parameters
+ *	rate:	byte_per_unittime << 32
+ *		(((bits_per_sec) / 8) << 32) / machclk_freq
+ *	depth:	byte << 32
+ *
+ */
+#define	TBR_SHIFT	32
+#define	TBR_SCALE(x)	((int64_t)(x) << TBR_SHIFT)
+#define	TBR_UNSCALE(x)	((x) >> TBR_SHIFT)
+
+static struct mbuf *
+tbr_dequeue(ifq, op)
+	struct ifaltq *ifq;
+	int op;
+{
+	struct tb_regulator *tbr;
+	struct mbuf *m;
+	int64_t interval;
+	u_int64_t now;
+
+	IFQ_LOCK_ASSERT(ifq);
+	tbr = ifq->altq_tbr;
+	if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
+		/* if this is a remove after poll, bypass tbr check */
+	} else {
+		/* update token only when it is negative */
+		if (tbr->tbr_token <= 0) {
+			now = read_machclk();
+			interval = now - tbr->tbr_last;
+			if (interval >= tbr->tbr_filluptime)
+				tbr->tbr_token = tbr->tbr_depth;
+			else {
+				tbr->tbr_token += interval * tbr->tbr_rate;
+				if (tbr->tbr_token > tbr->tbr_depth)
+					tbr->tbr_token = tbr->tbr_depth;
+			}
+			tbr->tbr_last = now;
+		}
+		/* if token is still negative, don't allow dequeue */
+		if (tbr->tbr_token <= 0)
+			return (NULL);
+	}
+
+	if (ALTQ_IS_ENABLED(ifq))
+		m = (*ifq->altq_dequeue)(ifq, op);
+	else {
+		if (op == ALTDQ_POLL)
+			_IF_POLL(ifq, m);
+		else
+			_IF_DEQUEUE(ifq, m);
+	}
+
+	if (m != NULL && op == ALTDQ_REMOVE)
+		tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
+	tbr->tbr_lastop = op;
+	return (m);
+}
+
+/*
+ * set a token bucket regulator.
+ * if the specified rate is zero, the token bucket regulator is deleted.
+ */
+int
+tbr_set(ifq, profile)
+	struct ifaltq *ifq;
+	struct tb_profile *profile;
+{
+	struct tb_regulator *tbr, *otbr;
+	
+	if (tbr_dequeue_ptr == NULL)
+		tbr_dequeue_ptr = tbr_dequeue;
+
+	if (machclk_freq == 0)
+		init_machclk();
+	if (machclk_freq == 0) {
+		printf("tbr_set: no cpu clock available!\n");
+		return (ENXIO);
+	}
+
+	IFQ_LOCK(ifq);
+	if (profile->rate == 0) {
+		/* delete this tbr */
+		if ((tbr = ifq->altq_tbr) == NULL) {
+			IFQ_UNLOCK(ifq);
+			return (ENOENT);
+		}
+		ifq->altq_tbr = NULL;
+		free(tbr, M_DEVBUF);
+		IFQ_UNLOCK(ifq);
+		return (0);
+	}
+
+	tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (tbr == NULL) {
+		IFQ_UNLOCK(ifq);
+		return (ENOMEM);
+	}
+
+	tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
+	tbr->tbr_depth = TBR_SCALE(profile->depth);
+	if (tbr->tbr_rate > 0)
+		tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
+	else
+		tbr->tbr_filluptime = 0xffffffffffffffffLL;
+	tbr->tbr_token = tbr->tbr_depth;
+	tbr->tbr_last = read_machclk();
+	tbr->tbr_lastop = ALTDQ_REMOVE;
+
+	otbr = ifq->altq_tbr;
+	ifq->altq_tbr = tbr;	/* set the new tbr */
+
+	if (otbr != NULL)
+		free(otbr, M_DEVBUF);
+	else {
+		if (tbr_timer == 0) {
+			CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
+			tbr_timer = 1;
+		}
+	}
+	IFQ_UNLOCK(ifq);
+	return (0);
+}
+
+/*
+ * tbr_timeout goes through the interface list, and kicks the drivers
+ * if necessary.
+ *
+ * MPSAFE
+ */
+static void
+tbr_timeout(arg)
+	void *arg;
+{
+	VNET_ITERATOR_DECL(vnet_iter);
+	struct ifnet *ifp;
+	int active, s;
+
+	active = 0;
+	s = splnet();
+	IFNET_RLOCK_NOSLEEP();
+	VNET_LIST_RLOCK_NOSLEEP();
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter);
+		for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
+		    ifp = TAILQ_NEXT(ifp, if_list)) {
+			/* read from if_snd unlocked */
+			if (!TBR_IS_ENABLED(&ifp->if_snd))
+				continue;
+			active++;
+			if (!IFQ_IS_EMPTY(&ifp->if_snd) &&
+			    ifp->if_start != NULL)
+				(*ifp->if_start)(ifp);
+		}
+		CURVNET_RESTORE();
+	}
+	VNET_LIST_RUNLOCK_NOSLEEP();
+	IFNET_RUNLOCK_NOSLEEP();
+	splx(s);
+	if (active > 0)
+		CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
+	else
+		tbr_timer = 0;	/* don't need tbr_timer anymore */
+}
+
+/*
+ * get token bucket regulator profile
+ */
+int
+tbr_get(ifq, profile)
+	struct ifaltq *ifq;
+	struct tb_profile *profile;
+{
+	struct tb_regulator *tbr;
+
+	IFQ_LOCK(ifq);
+	if ((tbr = ifq->altq_tbr) == NULL) {
+		profile->rate = 0;
+		profile->depth = 0;
+	} else {
+		profile->rate =
+		    (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
+		profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
+	}
+	IFQ_UNLOCK(ifq);
+	return (0);
+}
+
+/*
+ * attach a discipline to the interface.  if one already exists, it is
+ * overridden.
+ * Locking is done in the discipline specific attach functions. Basically
+ * they call back to altq_attach which takes care of the attach and locking.
+ */
+int
+altq_pfattach(struct pf_altq *a)
+{
+	int error = 0;
+
+	switch (a->scheduler) {
+	case ALTQT_NONE:
+		break;
+#ifdef ALTQ_CBQ
+	case ALTQT_CBQ:
+		error = cbq_pfattach(a);
+		break;
+#endif
+#ifdef ALTQ_PRIQ
+	case ALTQT_PRIQ:
+		error = priq_pfattach(a);
+		break;
+#endif
+#ifdef ALTQ_HFSC
+	case ALTQT_HFSC:
+		error = hfsc_pfattach(a);
+		break;
+#endif
+#ifdef ALTQ_FAIRQ
+	case ALTQT_FAIRQ:
+		error = fairq_pfattach(a);
+		break;
+#endif
+#ifdef ALTQ_CODEL
+	case ALTQT_CODEL:
+		error = codel_pfattach(a);
+		break;
+#endif
+	default:
+		error = ENXIO;
+	}
+
+	return (error);
+}
+
+/*
+ * detach a discipline from the interface.
+ * it is possible that the discipline was already overridden by another
+ * discipline.
+ */
+int
+altq_pfdetach(struct pf_altq *a)
+{
+	struct ifnet *ifp;
+	int s, error = 0;
+
+	if ((ifp = ifunit(a->ifname)) == NULL)
+		return (EINVAL);
+
+	/* if this discipline is no longer referenced, just return */
+	/* read unlocked from if_snd */
+	if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc)
+		return (0);
+
+	s = splnet();
+	/* read unlocked from if_snd, _disable and _detach take care */
+	if (ALTQ_IS_ENABLED(&ifp->if_snd))
+		error = altq_disable(&ifp->if_snd);
+	if (error == 0)
+		error = altq_detach(&ifp->if_snd);
+	splx(s);
+
+	return (error);
+}
+
+/*
+ * add a discipline or a queue
+ * Locking is done in the discipline specific functions with regards to
+ * malloc with WAITOK, also it is not yet clear which lock to use.
+ */
+int
+altq_add(struct pf_altq *a)
+{
+	int error = 0;
+
+	if (a->qname[0] != 0)
+		return (altq_add_queue(a));
+
+	if (machclk_freq == 0)
+		init_machclk();
+	if (machclk_freq == 0)
+		panic("altq_add: no cpu clock");
+
+	switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+	case ALTQT_CBQ:
+		error = cbq_add_altq(a);
+		break;
+#endif
+#ifdef ALTQ_PRIQ
+	case ALTQT_PRIQ:
+		error = priq_add_altq(a);
+		break;
+#endif
+#ifdef ALTQ_HFSC
+	case ALTQT_HFSC:
+		error = hfsc_add_altq(a);
+		break;
+#endif
+#ifdef ALTQ_FAIRQ
+        case ALTQT_FAIRQ:
+                error = fairq_add_altq(a);
+                break;
+#endif
+#ifdef ALTQ_CODEL
+	case ALTQT_CODEL:
+		error = codel_add_altq(a);
+		break;
+#endif
+	default:
+		error = ENXIO;
+	}
+
+	return (error);
+}
+
+/*
+ * remove a discipline or a queue
+ * It is yet unclear what lock to use to protect this operation, the
+ * discipline specific functions will determine and grab it
+ */
+int
+altq_remove(struct pf_altq *a)
+{
+	int error = 0;
+
+	if (a->qname[0] != 0)
+		return (altq_remove_queue(a));
+
+	switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+	case ALTQT_CBQ:
+		error = cbq_remove_altq(a);
+		break;
+#endif
+#ifdef ALTQ_PRIQ
+	case ALTQT_PRIQ:
+		error = priq_remove_altq(a);
+		break;
+#endif
+#ifdef ALTQ_HFSC
+	case ALTQT_HFSC:
+		error = hfsc_remove_altq(a);
+		break;
+#endif
+#ifdef ALTQ_FAIRQ
+        case ALTQT_FAIRQ:
+                error = fairq_remove_altq(a);
+                break;
+#endif
+#ifdef ALTQ_CODEL
+	case ALTQT_CODEL:
+		error = codel_remove_altq(a);
+		break;
+#endif
+	default:
+		error = ENXIO;
+	}
+
+	return (error);
+}
+
+/*
+ * add a queue to the discipline
+ * It is yet unclear what lock to use to protect this operation, the
+ * discipline specific functions will determine and grab it
+ */
+int
+altq_add_queue(struct pf_altq *a)
+{
+	int error = 0;
+
+	switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+	case ALTQT_CBQ:
+		error = cbq_add_queue(a);
+		break;
+#endif
+#ifdef ALTQ_PRIQ
+	case ALTQT_PRIQ:
+		error = priq_add_queue(a);
+		break;
+#endif
+#ifdef ALTQ_HFSC
+	case ALTQT_HFSC:
+		error = hfsc_add_queue(a);
+		break;
+#endif
+#ifdef ALTQ_FAIRQ
+        case ALTQT_FAIRQ:
+                error = fairq_add_queue(a);
+                break;
+#endif
+	default:
+		error = ENXIO;
+	}
+
+	return (error);
+}
+
+/*
+ * remove a queue from the discipline
+ * It is yet unclear what lock to use to protect this operation, the
+ * discipline specific functions will determine and grab it
+ */
+int
+altq_remove_queue(struct pf_altq *a)
+{
+	int error = 0;
+
+	switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+	case ALTQT_CBQ:
+		error = cbq_remove_queue(a);
+		break;
+#endif
+#ifdef ALTQ_PRIQ
+	case ALTQT_PRIQ:
+		error = priq_remove_queue(a);
+		break;
+#endif
+#ifdef ALTQ_HFSC
+	case ALTQT_HFSC:
+		error = hfsc_remove_queue(a);
+		break;
+#endif
+#ifdef ALTQ_FAIRQ
+        case ALTQT_FAIRQ:
+                error = fairq_remove_queue(a);
+                break;
+#endif
+	default:
+		error = ENXIO;
+	}
+
+	return (error);
+}
+
+/*
+ * get queue statistics
+ * Locking is done in the discipline specific functions with regards to
+ * copyout operations, also it is not yet clear which lock to use.
+ */
+int
+altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+	int error = 0;
+
+	switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+	case ALTQT_CBQ:
+		error = cbq_getqstats(a, ubuf, nbytes);
+		break;
+#endif
+#ifdef ALTQ_PRIQ
+	case ALTQT_PRIQ:
+		error = priq_getqstats(a, ubuf, nbytes);
+		break;
+#endif
+#ifdef ALTQ_HFSC
+	case ALTQT_HFSC:
+		error = hfsc_getqstats(a, ubuf, nbytes);
+		break;
+#endif
+#ifdef ALTQ_FAIRQ
+        case ALTQT_FAIRQ:
+                error = fairq_getqstats(a, ubuf, nbytes);
+                break;
+#endif
+#ifdef ALTQ_CODEL
+	case ALTQT_CODEL:
+		error = codel_getqstats(a, ubuf, nbytes);
+		break;
+#endif
+	default:
+		error = ENXIO;
+	}
+
+	return (error);
+}
+
+/*
+ * read and write diffserv field in IPv4 or IPv6 header
+ */
+u_int8_t
+read_dsfield(m, pktattr)
+	struct mbuf *m;
+	struct altq_pktattr *pktattr;
+{
+	struct mbuf *m0;
+	u_int8_t ds_field = 0;
+
+	if (pktattr == NULL ||
+	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
+		return ((u_int8_t)0);
+
+	/* verify that pattr_hdr is within the mbuf data */
+	for (m0 = m; m0 != NULL; m0 = m0->m_next)
+		if ((pktattr->pattr_hdr >= m0->m_data) &&
+		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
+			break;
+	if (m0 == NULL) {
+		/* ick, pattr_hdr is stale */
+		pktattr->pattr_af = AF_UNSPEC;
+#ifdef ALTQ_DEBUG
+		printf("read_dsfield: can't locate header!\n");
+#endif
+		return ((u_int8_t)0);
+	}
+
+	if (pktattr->pattr_af == AF_INET) {
+		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
+
+		if (ip->ip_v != 4)
+			return ((u_int8_t)0);	/* version mismatch! */
+		ds_field = ip->ip_tos;
+	}
+#ifdef INET6
+	else if (pktattr->pattr_af == AF_INET6) {
+		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+		u_int32_t flowlabel;
+
+		flowlabel = ntohl(ip6->ip6_flow);
+		if ((flowlabel >> 28) != 6)
+			return ((u_int8_t)0);	/* version mismatch! */
+		ds_field = (flowlabel >> 20) & 0xff;
+	}
+#endif
+	return (ds_field);
+}
+
+void
+write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield)
+{
+	struct mbuf *m0;
+
+	if (pktattr == NULL ||
+	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
+		return;
+
+	/* verify that pattr_hdr is within the mbuf data */
+	for (m0 = m; m0 != NULL; m0 = m0->m_next)
+		if ((pktattr->pattr_hdr >= m0->m_data) &&
+		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
+			break;
+	if (m0 == NULL) {
+		/* ick, pattr_hdr is stale */
+		pktattr->pattr_af = AF_UNSPEC;
+#ifdef ALTQ_DEBUG
+		printf("write_dsfield: can't locate header!\n");
+#endif
+		return;
+	}
+
+	if (pktattr->pattr_af == AF_INET) {
+		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
+		u_int8_t old;
+		int32_t sum;
+
+		if (ip->ip_v != 4)
+			return;		/* version mismatch! */
+		old = ip->ip_tos;
+		dsfield |= old & 3;	/* leave CU bits */
+		if (old == dsfield)
+			return;
+		ip->ip_tos = dsfield;
+		/*
+		 * update checksum (from RFC1624)
+		 *	   HC' = ~(~HC + ~m + m')
+		 */
+		sum = ~ntohs(ip->ip_sum) & 0xffff;
+		sum += 0xff00 + (~old & 0xff) + dsfield;
+		sum = (sum >> 16) + (sum & 0xffff);
+		sum += (sum >> 16);  /* add carry */
+
+		ip->ip_sum = htons(~sum & 0xffff);
+	}
+#ifdef INET6
+	else if (pktattr->pattr_af == AF_INET6) {
+		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+		u_int32_t flowlabel;
+
+		flowlabel = ntohl(ip6->ip6_flow);
+		if ((flowlabel >> 28) != 6)
+			return;		/* version mismatch! */
+		flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
+		ip6->ip6_flow = htonl(flowlabel);
+	}
+#endif
+	return;
+}
+
+
+/*
+ * high resolution clock support taking advantage of a machine dependent
+ * high resolution time counter (e.g., timestamp counter of intel pentium).
+ * we assume
+ *  - 64-bit-long monotonically-increasing counter
+ *  - frequency range is 100M-4GHz (CPU speed)
+ */
+/* if pcc is not available or disabled, emulate 256MHz using microtime() */
+#define	MACHCLK_SHIFT	8
+
+int machclk_usepcc;
+u_int32_t machclk_freq;
+u_int32_t machclk_per_tick;
+
+#if defined(__i386__) && defined(__NetBSD__)
+extern u_int64_t cpu_tsc_freq;
+#endif
+
+#if (__FreeBSD_version >= 700035)
+/* Update TSC freq with the value indicated by the caller. */
+static void
+tsc_freq_changed(void *arg, const struct cf_level *level, int status)
+{
+	/* If there was an error during the transition, don't do anything. */
+	if (status != 0)
+		return;
+
+#if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__))
+	/* If TSC is P-state invariant, don't do anything. */
+	if (tsc_is_invariant)
+		return;
+#endif
+
+	/* Total setting for this level gives the new frequency in MHz. */
+	init_machclk();
+}
+EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
+    EVENTHANDLER_PRI_LAST);
+#endif /* __FreeBSD_version >= 700035 */
+
+static void
+init_machclk_setup(void)
+{
+#if (__FreeBSD_version >= 600000)
+	callout_init(&tbr_callout, 0);
+#endif
+
+	machclk_usepcc = 1;
+
+#if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC)
+	machclk_usepcc = 0;
+#endif
+#if defined(__FreeBSD__) && defined(SMP)
+	machclk_usepcc = 0;
+#endif
+#if defined(__NetBSD__) && defined(MULTIPROCESSOR)
+	machclk_usepcc = 0;
+#endif
+#if defined(__amd64__) || defined(__i386__)
+	/* check if TSC is available */
+	if ((cpu_feature & CPUID_TSC) == 0 ||
+	    atomic_load_acq_64(&tsc_freq) == 0)
+		machclk_usepcc = 0;
+#endif
+}
+
+void
+init_machclk(void)
+{
+	static int called;
+
+	/* Call one-time initialization function. */
+	if (!called) {
+		init_machclk_setup();
+		called = 1;
+	}
+
+	if (machclk_usepcc == 0) {
+		/* emulate 256MHz using microtime() */
+		machclk_freq = 1000000 << MACHCLK_SHIFT;
+		machclk_per_tick = machclk_freq / hz;
+#ifdef ALTQ_DEBUG
+		printf("altq: emulate %uHz cpu clock\n", machclk_freq);
+#endif
+		return;
+	}
+
+	/*
+	 * if the clock frequency (of Pentium TSC or Alpha PCC) is
+	 * accessible, just use it.
+	 */
+#if defined(__amd64__) || defined(__i386__)
+	machclk_freq = atomic_load_acq_64(&tsc_freq);
+#endif
+
+	/*
+	 * if we don't know the clock frequency, measure it.
+	 */
+	if (machclk_freq == 0) {
+		static int	wait;
+		struct timeval	tv_start, tv_end;
+		u_int64_t	start, end, diff;
+		int		timo;
+
+		microtime(&tv_start);
+		start = read_machclk();
+		timo = hz;	/* 1 sec */
+		(void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo);
+		microtime(&tv_end);
+		end = read_machclk();
+		diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
+		    + tv_end.tv_usec - tv_start.tv_usec;
+		if (diff != 0)
+			machclk_freq = (u_int)((end - start) * 1000000 / diff);
+	}
+
+	machclk_per_tick = machclk_freq / hz;
+
+#ifdef ALTQ_DEBUG
+	printf("altq: CPU clock: %uHz\n", machclk_freq);
+#endif
+}
+
+#if defined(__OpenBSD__) && defined(__i386__)
+static __inline u_int64_t
+rdtsc(void)
+{
+	u_int64_t rv;
+	__asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
+	return (rv);
+}
+#endif /* __OpenBSD__ && __i386__ */
+
+u_int64_t
+read_machclk(void)
+{
+	u_int64_t val;
+
+	if (machclk_usepcc) {
+#if defined(__amd64__) || defined(__i386__)
+		val = rdtsc();
+#else
+		panic("read_machclk");
+#endif
+	} else {
+		struct timeval tv, boottime;
+
+		microtime(&tv);
+		getboottime(&boottime);
+		val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000
+		    + tv.tv_usec) << MACHCLK_SHIFT);
+	}
+	return (val);
+}
+
+#ifdef ALTQ3_CLFIER_COMPAT
+
+#ifndef IPPROTO_ESP
+#define	IPPROTO_ESP	50		/* encapsulating security payload */
+#endif
+#ifndef IPPROTO_AH
+#define	IPPROTO_AH	51		/* authentication header */
+#endif
+
+/*
+ * extract flow information from a given packet.
+ * filt_mask shows flowinfo fields required.
+ * we assume the ip header is in one mbuf, and addresses and ports are
+ * in network byte order.
+ */
+int
+altq_extractflow(m, af, flow, filt_bmask)
+	struct mbuf *m;
+	int af;
+	struct flowinfo *flow;
+	u_int32_t	filt_bmask;
+{
+
+	switch (af) {
+	case PF_INET: {
+		struct flowinfo_in *fin;
+		struct ip *ip;
+
+		ip = mtod(m, struct ip *);
+
+		if (ip->ip_v != 4)
+			break;
+
+		fin = (struct flowinfo_in *)flow;
+		fin->fi_len = sizeof(struct flowinfo_in);
+		fin->fi_family = AF_INET;
+
+		fin->fi_proto = ip->ip_p;
+		fin->fi_tos = ip->ip_tos;
+
+		fin->fi_src.s_addr = ip->ip_src.s_addr;
+		fin->fi_dst.s_addr = ip->ip_dst.s_addr;
+
+		if (filt_bmask & FIMB4_PORTS)
+			/* if port info is required, extract port numbers */
+			extract_ports4(m, ip, fin);
+		else {
+			fin->fi_sport = 0;
+			fin->fi_dport = 0;
+			fin->fi_gpi = 0;
+		}
+		return (1);
+	}
+
+#ifdef INET6
+	case PF_INET6: {
+		struct flowinfo_in6 *fin6;
+		struct ip6_hdr *ip6;
+
+		ip6 = mtod(m, struct ip6_hdr *);
+		/* should we check the ip version? */
+
+		fin6 = (struct flowinfo_in6 *)flow;
+		fin6->fi6_len = sizeof(struct flowinfo_in6);
+		fin6->fi6_family = AF_INET6;
+
+		fin6->fi6_proto = ip6->ip6_nxt;
+		fin6->fi6_tclass   = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+
+		fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff);
+		fin6->fi6_src = ip6->ip6_src;
+		fin6->fi6_dst = ip6->ip6_dst;
+
+		if ((filt_bmask & FIMB6_PORTS) ||
+		    ((filt_bmask & FIMB6_PROTO)
+		     && ip6->ip6_nxt > IPPROTO_IPV6))
+			/*
+			 * if port info is required, or proto is required
+			 * but there are option headers, extract port
+			 * and protocol numbers.
+			 */
+			extract_ports6(m, ip6, fin6);
+		else {
+			fin6->fi6_sport = 0;
+			fin6->fi6_dport = 0;
+			fin6->fi6_gpi = 0;
+		}
+		return (1);
+	}
+#endif /* INET6 */
+
+	default:
+		break;
+	}
+
+	/* failed */
+	flow->fi_len = sizeof(struct flowinfo);
+	flow->fi_family = AF_UNSPEC;
+	return (0);
+}
+
+/*
+ * helper routine to extract port numbers
+ */
+/* structure for ipsec and ipv6 option header template */
+struct _opt6 {
+	u_int8_t	opt6_nxt;	/* next header */
+	u_int8_t	opt6_hlen;	/* header extension length */
+	u_int16_t	_pad;
+	u_int32_t	ah_spi;		/* security parameter index
+					   for authentication header */
+};
+
+/*
+ * extract port numbers from a ipv4 packet.
+ */
+static int
+extract_ports4(m, ip, fin)
+	struct mbuf *m;
+	struct ip *ip;
+	struct flowinfo_in *fin;
+{
+	struct mbuf *m0;
+	u_short ip_off;
+	u_int8_t proto;
+	int 	off;
+
+	fin->fi_sport = 0;
+	fin->fi_dport = 0;
+	fin->fi_gpi = 0;
+
+	ip_off = ntohs(ip->ip_off);
+	/* if it is a fragment, try cached fragment info */
+	if (ip_off & IP_OFFMASK) {
+		ip4f_lookup(ip, fin);
+		return (1);
+	}
+
+	/* locate the mbuf containing the protocol header */
+	for (m0 = m; m0 != NULL; m0 = m0->m_next)
+		if (((caddr_t)ip >= m0->m_data) &&
+		    ((caddr_t)ip < m0->m_data + m0->m_len))
+			break;
+	if (m0 == NULL) {
+#ifdef ALTQ_DEBUG
+		printf("extract_ports4: can't locate header! ip=%p\n", ip);
+#endif
+		return (0);
+	}
+	off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2);
+	proto = ip->ip_p;
+
+#ifdef ALTQ_IPSEC
+ again:
+#endif
+	while (off >= m0->m_len) {
+		off -= m0->m_len;
+		m0 = m0->m_next;
+		if (m0 == NULL)
+			return (0);  /* bogus ip_hl! */
+	}
+	if (m0->m_len < off + 4)
+		return (0);
+
+	switch (proto) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP: {
+		struct udphdr *udp;
+
+		udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
+		fin->fi_sport = udp->uh_sport;
+		fin->fi_dport = udp->uh_dport;
+		fin->fi_proto = proto;
+		}
+		break;
+
+#ifdef ALTQ_IPSEC
+	case IPPROTO_ESP:
+		if (fin->fi_gpi == 0){
+			u_int32_t *gpi;
+
+			gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
+			fin->fi_gpi   = *gpi;
+		}
+		fin->fi_proto = proto;
+		break;
+
+	case IPPROTO_AH: {
+			/* get next header and header length */
+			struct _opt6 *opt6;
+
+			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
+			proto = opt6->opt6_nxt;
+			off += 8 + (opt6->opt6_hlen * 4);
+			if (fin->fi_gpi == 0 && m0->m_len >= off + 8)
+				fin->fi_gpi = opt6->ah_spi;
+		}
+		/* goto the next header */
+		goto again;
+#endif  /* ALTQ_IPSEC */
+
+	default:
+		fin->fi_proto = proto;
+		return (0);
+	}
+
+	/* if this is a first fragment, cache it. */
+	if (ip_off & IP_MF)
+		ip4f_cache(ip, fin);
+
+	return (1);
+}
+
+#ifdef INET6
+static int
+extract_ports6(m, ip6, fin6)
+	struct mbuf *m;
+	struct ip6_hdr *ip6;
+	struct flowinfo_in6 *fin6;
+{
+	struct mbuf *m0;
+	int	off;
+	u_int8_t proto;
+
+	fin6->fi6_gpi   = 0;
+	fin6->fi6_sport = 0;
+	fin6->fi6_dport = 0;
+
+	/* locate the mbuf containing the protocol header */
+	for (m0 = m; m0 != NULL; m0 = m0->m_next)
+		if (((caddr_t)ip6 >= m0->m_data) &&
+		    ((caddr_t)ip6 < m0->m_data + m0->m_len))
+			break;
+	if (m0 == NULL) {
+#ifdef ALTQ_DEBUG
+		printf("extract_ports6: can't locate header! ip6=%p\n", ip6);
+#endif
+		return (0);
+	}
+	off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr);
+
+	proto = ip6->ip6_nxt;
+	do {
+		while (off >= m0->m_len) {
+			off -= m0->m_len;
+			m0 = m0->m_next;
+			if (m0 == NULL)
+				return (0);
+		}
+		if (m0->m_len < off + 4)
+			return (0);
+
+		switch (proto) {
+		case IPPROTO_TCP:
+		case IPPROTO_UDP: {
+			struct udphdr *udp;
+
+			udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
+			fin6->fi6_sport = udp->uh_sport;
+			fin6->fi6_dport = udp->uh_dport;
+			fin6->fi6_proto = proto;
+			}
+			return (1);
+
+		case IPPROTO_ESP:
+			if (fin6->fi6_gpi == 0) {
+				u_int32_t *gpi;
+
+				gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
+				fin6->fi6_gpi   = *gpi;
+			}
+			fin6->fi6_proto = proto;
+			return (1);
+
+		case IPPROTO_AH: {
+			/* get next header and header length */
+			struct _opt6 *opt6;
+
+			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
+			if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8)
+				fin6->fi6_gpi = opt6->ah_spi;
+			proto = opt6->opt6_nxt;
+			off += 8 + (opt6->opt6_hlen * 4);
+			/* goto the next header */
+			break;
+			}
+
+		case IPPROTO_HOPOPTS:
+		case IPPROTO_ROUTING:
+		case IPPROTO_DSTOPTS: {
+			/* get next header and header length */
+			struct _opt6 *opt6;
+
+			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
+			proto = opt6->opt6_nxt;
+			off += (opt6->opt6_hlen + 1) * 8;
+			/* goto the next header */
+			break;
+			}
+
+		case IPPROTO_FRAGMENT:
+			/* ipv6 fragmentations are not supported yet */
+		default:
+			fin6->fi6_proto = proto;
+			return (0);
+		}
+	} while (1);
+	/*NOTREACHED*/
+}
+#endif /* INET6 */
+
+/*
+ * altq common classifier
+ */
+int
+acc_add_filter(classifier, filter, class, phandle)
+	struct acc_classifier *classifier;
+	struct flow_filter *filter;
+	void	*class;
+	u_long	*phandle;
+{
+	struct acc_filter *afp, *prev, *tmp;
+	int	i, s;
+
+#ifdef INET6
+	if (filter->ff_flow.fi_family != AF_INET &&
+	    filter->ff_flow.fi_family != AF_INET6)
+		return (EINVAL);
+#else
+	if (filter->ff_flow.fi_family != AF_INET)
+		return (EINVAL);
+#endif
+
+	afp = malloc(sizeof(struct acc_filter),
+	       M_DEVBUF, M_WAITOK);
+	if (afp == NULL)
+		return (ENOMEM);
+	bzero(afp, sizeof(struct acc_filter));
+
+	afp->f_filter = *filter;
+	afp->f_class = class;
+
+	i = ACC_WILDCARD_INDEX;
+	if (filter->ff_flow.fi_family == AF_INET) {
+		struct flow_filter *filter4 = &afp->f_filter;
+
+		/*
+		 * if address is 0, it's a wildcard.  if address mask
+		 * isn't set, use full mask.
+		 */
+		if (filter4->ff_flow.fi_dst.s_addr == 0)
+			filter4->ff_mask.mask_dst.s_addr = 0;
+		else if (filter4->ff_mask.mask_dst.s_addr == 0)
+			filter4->ff_mask.mask_dst.s_addr = 0xffffffff;
+		if (filter4->ff_flow.fi_src.s_addr == 0)
+			filter4->ff_mask.mask_src.s_addr = 0;
+		else if (filter4->ff_mask.mask_src.s_addr == 0)
+			filter4->ff_mask.mask_src.s_addr = 0xffffffff;
+
+		/* clear extra bits in addresses  */
+		   filter4->ff_flow.fi_dst.s_addr &=
+		       filter4->ff_mask.mask_dst.s_addr;
+		   filter4->ff_flow.fi_src.s_addr &=
+		       filter4->ff_mask.mask_src.s_addr;
+
+		/*
+		 * if dst address is a wildcard, use hash-entry
+		 * ACC_WILDCARD_INDEX.
+		 */
+		if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff)
+			i = ACC_WILDCARD_INDEX;
+		else
+			i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr);
+	}
+#ifdef INET6
+	else if (filter->ff_flow.fi_family == AF_INET6) {
+		struct flow_filter6 *filter6 =
+			(struct flow_filter6 *)&afp->f_filter;
+#ifndef IN6MASK0 /* taken from kame ipv6 */
+#define	IN6MASK0	{{{ 0, 0, 0, 0 }}}
+#define	IN6MASK128	{{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
+		const struct in6_addr in6mask0 = IN6MASK0;
+		const struct in6_addr in6mask128 = IN6MASK128;
+#endif
+
+		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst))
+			filter6->ff_mask6.mask6_dst = in6mask0;
+		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst))
+			filter6->ff_mask6.mask6_dst = in6mask128;
+		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src))
+			filter6->ff_mask6.mask6_src = in6mask0;
+		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src))
+			filter6->ff_mask6.mask6_src = in6mask128;
+
+		/* clear extra bits in addresses  */
+		for (i = 0; i < 16; i++)
+			filter6->ff_flow6.fi6_dst.s6_addr[i] &=
+			    filter6->ff_mask6.mask6_dst.s6_addr[i];
+		for (i = 0; i < 16; i++)
+			filter6->ff_flow6.fi6_src.s6_addr[i] &=
+			    filter6->ff_mask6.mask6_src.s6_addr[i];
+
+		if (filter6->ff_flow6.fi6_flowlabel == 0)
+			i = ACC_WILDCARD_INDEX;
+		else
+			i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel);
+	}
+#endif /* INET6 */
+
+	afp->f_handle = get_filt_handle(classifier, i);
+
+	/* update filter bitmask */
+	afp->f_fbmask = filt2fibmask(filter);
+	classifier->acc_fbmask |= afp->f_fbmask;
+
+	/*
+	 * add this filter to the filter list.
+	 * filters are ordered from the highest rule number.
+	 */
+	s = splnet();
+	prev = NULL;
+	LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
+		if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
+			prev = tmp;
+		else
+			break;
+	}
+	if (prev == NULL)
+		LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain);
+	else
+		LIST_INSERT_AFTER(prev, afp, f_chain);
+	splx(s);
+
+	*phandle = afp->f_handle;
+	return (0);
+}
+
+int
+acc_delete_filter(classifier, handle)
+	struct acc_classifier *classifier;
+	u_long handle;
+{
+	struct acc_filter *afp;
+	int	s;
+
+	if ((afp = filth_to_filtp(classifier, handle)) == NULL)
+		return (EINVAL);
+
+	s = splnet();
+	LIST_REMOVE(afp, f_chain);
+	splx(s);
+
+	free(afp, M_DEVBUF);
+
+	/* todo: update filt_bmask */
+
+	return (0);
+}
+
+/*
+ * delete filters referencing to the specified class.
+ * if the all flag is not 0, delete all the filters.
+ */
+int
+acc_discard_filters(classifier, class, all)
+	struct acc_classifier *classifier;
+	void	*class;
+	int	all;
+{
+	struct acc_filter *afp;
+	int	i, s;
+
+	s = splnet();
+	for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
+		do {
+			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+				if (all || afp->f_class == class) {
+					LIST_REMOVE(afp, f_chain);
+					free(afp, M_DEVBUF);
+					/* start again from the head */
+					break;
+				}
+		} while (afp != NULL);
+	}
+	splx(s);
+
+	if (all)
+		classifier->acc_fbmask = 0;
+
+	return (0);
+}
+
+void *
+acc_classify(clfier, m, af)
+	void *clfier;
+	struct mbuf *m;
+	int af;
+{
+	struct acc_classifier *classifier;
+	struct flowinfo flow;
+	struct acc_filter *afp;
+	int	i;
+
+	classifier = (struct acc_classifier *)clfier;
+	altq_extractflow(m, af, &flow, classifier->acc_fbmask);
+
+	if (flow.fi_family == AF_INET) {
+		struct flowinfo_in *fp = (struct flowinfo_in *)&flow;
+
+		if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) {
+			/* only tos is used */
+			LIST_FOREACH(afp,
+				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
+				 f_chain)
+				if (apply_tosfilter4(afp->f_fbmask,
+						     &afp->f_filter, fp))
+					/* filter matched */
+					return (afp->f_class);
+		} else if ((classifier->acc_fbmask &
+			(~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL))
+		    == 0) {
+			/* only proto and ports are used */
+			LIST_FOREACH(afp,
+				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
+				 f_chain)
+				if (apply_ppfilter4(afp->f_fbmask,
+						    &afp->f_filter, fp))
+					/* filter matched */
+					return (afp->f_class);
+		} else {
+			/* get the filter hash entry from its dest address */
+			i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr);
+			do {
+				/*
+				 * go through this loop twice.  first for dst
+				 * hash, second for wildcards.
+				 */
+				LIST_FOREACH(afp, &classifier->acc_filters[i],
+					     f_chain)
+					if (apply_filter4(afp->f_fbmask,
+							  &afp->f_filter, fp))
+						/* filter matched */
+						return (afp->f_class);
+
+				/*
+				 * check again for filters with a dst addr
+				 * wildcard.
+				 * (daddr == 0 || dmask != 0xffffffff).
+				 */
+				if (i != ACC_WILDCARD_INDEX)
+					i = ACC_WILDCARD_INDEX;
+				else
+					break;
+			} while (1);
+		}
+	}
+#ifdef INET6
+	else if (flow.fi_family == AF_INET6) {
+		struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow;
+
+		/* get the filter hash entry from its flow ID */
+		if (fp6->fi6_flowlabel != 0)
+			i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel);
+		else
+			/* flowlable can be zero */
+			i = ACC_WILDCARD_INDEX;
+
+		/* go through this loop twice.  first for flow hash, second
+		   for wildcards. */
+		do {
+			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+				if (apply_filter6(afp->f_fbmask,
+					(struct flow_filter6 *)&afp->f_filter,
+					fp6))
+					/* filter matched */
+					return (afp->f_class);
+
+			/*
+			 * check again for filters with a wildcard.
+			 */
+			if (i != ACC_WILDCARD_INDEX)
+				i = ACC_WILDCARD_INDEX;
+			else
+				break;
+		} while (1);
+	}
+#endif /* INET6 */
+
+	/* no filter matched */
+	return (NULL);
+}
+
+static int
+apply_filter4(fbmask, filt, pkt)
+	u_int32_t	fbmask;
+	struct flow_filter *filt;
+	struct flowinfo_in *pkt;
+{
+	if (filt->ff_flow.fi_family != AF_INET)
+		return (0);
+	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
+		return (0);
+	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
+		return (0);
+	if ((fbmask & FIMB4_DADDR) &&
+	    filt->ff_flow.fi_dst.s_addr !=
+	    (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr))
+		return (0);
+	if ((fbmask & FIMB4_SADDR) &&
+	    filt->ff_flow.fi_src.s_addr !=
+	    (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr))
+		return (0);
+	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
+		return (0);
+	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
+	    (pkt->fi_tos & filt->ff_mask.mask_tos))
+		return (0);
+	if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi))
+		return (0);
+	/* match */
+	return (1);
+}
+
+/*
+ * filter matching function optimized for a common case that checks
+ * only protocol and port numbers
+ */
+static int
+apply_ppfilter4(fbmask, filt, pkt)
+	u_int32_t	fbmask;
+	struct flow_filter *filt;
+	struct flowinfo_in *pkt;
+{
+	if (filt->ff_flow.fi_family != AF_INET)
+		return (0);
+	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
+		return (0);
+	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
+		return (0);
+	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
+		return (0);
+	/* match */
+	return (1);
+}
+
+/*
+ * filter matching function only for tos field.
+ */
+static int
+apply_tosfilter4(fbmask, filt, pkt)
+	u_int32_t	fbmask;
+	struct flow_filter *filt;
+	struct flowinfo_in *pkt;
+{
+	if (filt->ff_flow.fi_family != AF_INET)
+		return (0);
+	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
+	    (pkt->fi_tos & filt->ff_mask.mask_tos))
+		return (0);
+	/* match */
+	return (1);
+}
+
+#ifdef INET6
+static int
+apply_filter6(fbmask, filt, pkt)
+	u_int32_t	fbmask;
+	struct flow_filter6 *filt;
+	struct flowinfo_in6 *pkt;
+{
+	int i;
+
+	if (filt->ff_flow6.fi6_family != AF_INET6)
+		return (0);
+	if ((fbmask & FIMB6_FLABEL) &&
+	    filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel)
+		return (0);
+	if ((fbmask & FIMB6_PROTO) &&
+	    filt->ff_flow6.fi6_proto != pkt->fi6_proto)
+		return (0);
+	if ((fbmask & FIMB6_SPORT) &&
+	    filt->ff_flow6.fi6_sport != pkt->fi6_sport)
+		return (0);
+	if ((fbmask & FIMB6_DPORT) &&
+	    filt->ff_flow6.fi6_dport != pkt->fi6_dport)
+		return (0);
+	if (fbmask & FIMB6_SADDR) {
+		for (i = 0; i < 4; i++)
+			if (filt->ff_flow6.fi6_src.s6_addr32[i] !=
+			    (pkt->fi6_src.s6_addr32[i] &
+			     filt->ff_mask6.mask6_src.s6_addr32[i]))
+				return (0);
+	}
+	if (fbmask & FIMB6_DADDR) {
+		for (i = 0; i < 4; i++)
+			if (filt->ff_flow6.fi6_dst.s6_addr32[i] !=
+			    (pkt->fi6_dst.s6_addr32[i] &
+			     filt->ff_mask6.mask6_dst.s6_addr32[i]))
+				return (0);
+	}
+	if ((fbmask & FIMB6_TCLASS) &&
+	    filt->ff_flow6.fi6_tclass !=
+	    (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass))
+		return (0);
+	if ((fbmask & FIMB6_GPI) &&
+	    filt->ff_flow6.fi6_gpi != pkt->fi6_gpi)
+		return (0);
+	/* match */
+	return (1);
+}
+#endif /* INET6 */
+
+/*
+ *  filter handle:
+ *	bit 20-28: index to the filter hash table
+ *	bit  0-19: unique id in the hash bucket.
+ */
+static u_long
+get_filt_handle(classifier, i)
+	struct acc_classifier *classifier;
+	int	i;
+{
+	static u_long handle_number = 1;
+	u_long 	handle;
+	struct acc_filter *afp;
+
+	while (1) {
+		handle = handle_number++ & 0x000fffff;
+
+		if (LIST_EMPTY(&classifier->acc_filters[i]))
+			break;
+
+		LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+			if ((afp->f_handle & 0x000fffff) == handle)
+				break;
+		if (afp == NULL)
+			break;
+		/* this handle is already used, try again */
+	}
+
+	return ((i << 20) | handle);
+}
+
+/* convert filter handle to filter pointer */
+static struct acc_filter *
+filth_to_filtp(classifier, handle)
+	struct acc_classifier *classifier;
+	u_long handle;
+{
+	struct acc_filter *afp;
+	int	i;
+
+	i = ACC_GET_HINDEX(handle);
+
+	LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+		if (afp->f_handle == handle)
+			return (afp);
+
+	return (NULL);
+}
+
+/* create flowinfo bitmask */
+static u_int32_t
+filt2fibmask(filt)
+	struct flow_filter *filt;
+{
+	u_int32_t mask = 0;
+#ifdef INET6
+	struct flow_filter6 *filt6;
+#endif
+
+	switch (filt->ff_flow.fi_family) {
+	case AF_INET:
+		if (filt->ff_flow.fi_proto != 0)
+			mask |= FIMB4_PROTO;
+		if (filt->ff_flow.fi_tos != 0)
+			mask |= FIMB4_TOS;
+		if (filt->ff_flow.fi_dst.s_addr != 0)
+			mask |= FIMB4_DADDR;
+		if (filt->ff_flow.fi_src.s_addr != 0)
+			mask |= FIMB4_SADDR;
+		if (filt->ff_flow.fi_sport != 0)
+			mask |= FIMB4_SPORT;
+		if (filt->ff_flow.fi_dport != 0)
+			mask |= FIMB4_DPORT;
+		if (filt->ff_flow.fi_gpi != 0)
+			mask |= FIMB4_GPI;
+		break;
+#ifdef INET6
+	case AF_INET6:
+		filt6 = (struct flow_filter6 *)filt;
+
+		if (filt6->ff_flow6.fi6_proto != 0)
+			mask |= FIMB6_PROTO;
+		if (filt6->ff_flow6.fi6_tclass != 0)
+			mask |= FIMB6_TCLASS;
+		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst))
+			mask |= FIMB6_DADDR;
+		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src))
+			mask |= FIMB6_SADDR;
+		if (filt6->ff_flow6.fi6_sport != 0)
+			mask |= FIMB6_SPORT;
+		if (filt6->ff_flow6.fi6_dport != 0)
+			mask |= FIMB6_DPORT;
+		if (filt6->ff_flow6.fi6_gpi != 0)
+			mask |= FIMB6_GPI;
+		if (filt6->ff_flow6.fi6_flowlabel != 0)
+			mask |= FIMB6_FLABEL;
+		break;
+#endif /* INET6 */
+	}
+	return (mask);
+}
+
+
+/*
+ * helper functions to handle IPv4 fragments.
+ * currently only in-sequence fragments are handled.
+ *	- fragment info is cached in a LRU list.
+ *	- when a first fragment is found, cache its flow info.
+ *	- when a non-first fragment is found, lookup the cache.
+ */
+
+struct ip4_frag {
+    TAILQ_ENTRY(ip4_frag) ip4f_chain;
+    char    ip4f_valid;
+    u_short ip4f_id;
+    struct flowinfo_in ip4f_info;
+};
+
+static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */
+
+#define	IP4F_TABSIZE		16	/* IPv4 fragment cache size */
+
+
+static void
+ip4f_cache(ip, fin)
+	struct ip *ip;
+	struct flowinfo_in *fin;
+{
+	struct ip4_frag *fp;
+
+	if (TAILQ_EMPTY(&ip4f_list)) {
+		/* first time call, allocate fragment cache entries. */
+		if (ip4f_init() < 0)
+			/* allocation failed! */
+			return;
+	}
+
+	fp = ip4f_alloc();
+	fp->ip4f_id = ip->ip_id;
+	fp->ip4f_info.fi_proto = ip->ip_p;
+	fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr;
+	fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr;
+
+	/* save port numbers */
+	fp->ip4f_info.fi_sport = fin->fi_sport;
+	fp->ip4f_info.fi_dport = fin->fi_dport;
+	fp->ip4f_info.fi_gpi   = fin->fi_gpi;
+}
+
+static int
+ip4f_lookup(ip, fin)
+	struct ip *ip;
+	struct flowinfo_in *fin;
+{
+	struct ip4_frag *fp;
+
+	for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid;
+	     fp = TAILQ_NEXT(fp, ip4f_chain))
+		if (ip->ip_id == fp->ip4f_id &&
+		    ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr &&
+		    ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr &&
+		    ip->ip_p == fp->ip4f_info.fi_proto) {
+
+			/* found the matching entry */
+			fin->fi_sport = fp->ip4f_info.fi_sport;
+			fin->fi_dport = fp->ip4f_info.fi_dport;
+			fin->fi_gpi   = fp->ip4f_info.fi_gpi;
+
+			if ((ntohs(ip->ip_off) & IP_MF) == 0)
+				/* this is the last fragment,
+				   release the entry. */
+				ip4f_free(fp);
+
+			return (1);
+		}
+
+	/* no matching entry found */
+	return (0);
+}
+
+static int
+ip4f_init(void)
+{
+	struct ip4_frag *fp;
+	int i;
+
+	TAILQ_INIT(&ip4f_list);
+	for (i=0; i<IP4F_TABSIZE; i++) {
+		fp = malloc(sizeof(struct ip4_frag),
+		       M_DEVBUF, M_NOWAIT);
+		if (fp == NULL) {
+			printf("ip4f_init: can't alloc %dth entry!\n", i);
+			if (i == 0)
+				return (-1);
+			return (0);
+		}
+		fp->ip4f_valid = 0;
+		TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
+	}
+	return (0);
+}
+
+static struct ip4_frag *
+ip4f_alloc(void)
+{
+	struct ip4_frag *fp;
+
+	/* reclaim an entry at the tail, put it at the head */
+	fp = TAILQ_LAST(&ip4f_list, ip4f_list);
+	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
+	fp->ip4f_valid = 1;
+	TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain);
+	return (fp);
+}
+
+static void
+ip4f_free(fp)
+	struct ip4_frag *fp;
+{
+	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
+	fp->ip4f_valid = 0;
+	TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
+}
+
+#endif /* ALTQ3_CLFIER_COMPAT */
diff --git a/freebsd/sys/net/altq/altq_var.h b/freebsd/sys/net/altq/altq_var.h
new file mode 100644
index 00000000..2ddcb211
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_var.h
@@ -0,0 +1,243 @@
+/*-
+ * Copyright (C) 1998-2003
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_var.h,v 1.16 2003/10/03 05:05:15 kjc Exp $
+ * $FreeBSD$
+ */
+#ifndef _ALTQ_ALTQ_VAR_H_
+#define	_ALTQ_ALTQ_VAR_H_
+
+#ifdef _KERNEL
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+
+#ifdef ALTQ3_CLFIER_COMPAT
+/*
+ * filter structure for altq common classifier
+ */
+struct acc_filter {
+	LIST_ENTRY(acc_filter)	f_chain;
+	void			*f_class;	/* pointer to the class */
+	u_long			f_handle;	/* filter id */
+	u_int32_t		f_fbmask;	/* filter bitmask */
+	struct flow_filter	f_filter;	/* filter value */
+};
+
+/*
+ * XXX ACC_FILTER_TABLESIZE can't be larger than 2048 unless we fix
+ * the handle assignment.
+ */
+#define	ACC_FILTER_TABLESIZE	(256+1)
+#define	ACC_FILTER_MASK		(ACC_FILTER_TABLESIZE - 2)
+#define	ACC_WILDCARD_INDEX	(ACC_FILTER_TABLESIZE - 1)
+#ifdef __GNUC__
+#define	ACC_GET_HASH_INDEX(addr) \
+	({int x = (addr) + ((addr) >> 16); (x + (x >> 8)) & ACC_FILTER_MASK;})
+#else
+#define	ACC_GET_HASH_INDEX(addr) \
+	(((addr) + ((addr) >> 8) + ((addr) >> 16) + ((addr) >> 24)) \
+	& ACC_FILTER_MASK)
+#endif
+#define	ACC_GET_HINDEX(handle) ((handle) >> 20)
+
+#if (__FreeBSD_version > 500000)
+#define ACC_LOCK_INIT(ac)	mtx_init(&(ac)->acc_mtx, "classifier", MTX_DEF)
+#define ACC_LOCK_DESTROY(ac)	mtx_destroy(&(ac)->acc_mtx)
+#define ACC_LOCK(ac)		mtx_lock(&(ac)->acc_mtx)
+#define ACC_UNLOCK(ac)		mtx_unlock(&(ac)->acc_mtx)
+#else
+#define ACC_LOCK_INIT(ac)
+#define ACC_LOCK_DESTROY(ac)
+#define ACC_LOCK(ac)
+#define ACC_UNLOCK(ac)
+#endif
+
+struct acc_classifier {
+	u_int32_t			acc_fbmask;
+	LIST_HEAD(filt, acc_filter)	acc_filters[ACC_FILTER_TABLESIZE];
+
+#if (__FreeBSD_version > 500000)
+	struct	mtx acc_mtx;
+#endif
+};
+
+/*
+ * flowinfo mask bits used by classifier
+ */
+/* for ipv4 */
+#define	FIMB4_PROTO	0x0001
+#define	FIMB4_TOS	0x0002
+#define	FIMB4_DADDR	0x0004
+#define	FIMB4_SADDR	0x0008
+#define	FIMB4_DPORT	0x0010
+#define	FIMB4_SPORT	0x0020
+#define	FIMB4_GPI	0x0040
+#define	FIMB4_ALL	0x007f
+/* for ipv6 */
+#define	FIMB6_PROTO	0x0100
+#define	FIMB6_TCLASS	0x0200
+#define	FIMB6_DADDR	0x0400
+#define	FIMB6_SADDR	0x0800
+#define	FIMB6_DPORT	0x1000
+#define	FIMB6_SPORT	0x2000
+#define	FIMB6_GPI	0x4000
+#define	FIMB6_FLABEL	0x8000
+#define	FIMB6_ALL	0xff00
+
+#define	FIMB_ALL	(FIMB4_ALL|FIMB6_ALL)
+
+#define	FIMB4_PORTS	(FIMB4_DPORT|FIMB4_SPORT|FIMB4_GPI)
+#define	FIMB6_PORTS	(FIMB6_DPORT|FIMB6_SPORT|FIMB6_GPI)
+#endif /* ALTQ3_CLFIER_COMPAT */
+
+/*
+ * machine dependent clock
+ * a 64bit high resolution time counter.
+ */
+extern int machclk_usepcc;
+extern u_int32_t machclk_freq;
+extern u_int32_t machclk_per_tick;
+extern void init_machclk(void);
+extern u_int64_t read_machclk(void);
+
+/*
+ * debug support
+ */
+#ifdef ALTQ_DEBUG
+#ifdef __STDC__
+#define	ASSERT(e)	((e) ? (void)0 : altq_assert(__FILE__, __LINE__, #e))
+#else	/* PCC */
+#define	ASSERT(e)	((e) ? (void)0 : altq_assert(__FILE__, __LINE__, "e"))
+#endif
+#else
+#define	ASSERT(e)	((void)0)
+#endif
+
+/*
+ * misc stuff for compatibility
+ */
+/* ioctl cmd type */
+typedef u_long ioctlcmd_t;
+
+/*
+ * queue macros:
+ * the interface of TAILQ_LAST macro changed after the introduction
+ * of softupdate. redefine it here to make it work with pre-2.2.7.
+ */
+#undef TAILQ_LAST
+#define	TAILQ_LAST(head, headname) \
+	(*(((struct headname *)((head)->tqh_last))->tqh_last))
+
+#ifndef TAILQ_EMPTY
+#define	TAILQ_EMPTY(head) ((head)->tqh_first == NULL)
+#endif
+#ifndef TAILQ_FOREACH
+#define TAILQ_FOREACH(var, head, field)					\
+	for (var = TAILQ_FIRST(head); var; var = TAILQ_NEXT(var, field))
+#endif
+
+/* macro for timeout/untimeout */
+/* use callout */
+#include <sys/callout.h>
+
+#if (__FreeBSD_version > 500000)
+#define	CALLOUT_INIT(c)		callout_init((c), 0)
+#else
+#define	CALLOUT_INIT(c)		callout_init((c))
+#endif
+#define	CALLOUT_RESET(c,t,f,a)	callout_reset((c),(t),(f),(a))
+#define	CALLOUT_STOP(c)		callout_stop((c))
+#if !defined(CALLOUT_INITIALIZER) && (__FreeBSD_version < 600000)
+#define	CALLOUT_INITIALIZER	{ { { NULL } }, 0, NULL, NULL, 0 }
+#endif
+
+#define	m_pktlen(m)		((m)->m_pkthdr.len)
+
+struct ifnet; struct mbuf;
+struct pf_altq;
+#ifdef ALTQ3_CLFIER_COMPAT
+struct flowinfo;
+#endif
+
+void	*altq_lookup(char *, int);
+#ifdef ALTQ3_CLFIER_COMPAT
+int	altq_extractflow(struct mbuf *, int, struct flowinfo *, u_int32_t);
+int	acc_add_filter(struct acc_classifier *, struct flow_filter *,
+	    void *, u_long *);
+int	acc_delete_filter(struct acc_classifier *, u_long);
+int	acc_discard_filters(struct acc_classifier *, void *, int);
+void	*acc_classify(void *, struct mbuf *, int);
+#endif
+u_int8_t read_dsfield(struct mbuf *, struct altq_pktattr *);
+void	write_dsfield(struct mbuf *, struct altq_pktattr *, u_int8_t);
+void	altq_assert(const char *, int, const char *);
+int	tbr_set(struct ifaltq *, struct tb_profile *);
+int	tbr_get(struct ifaltq *, struct tb_profile *);
+
+int	altq_pfattach(struct pf_altq *);
+int	altq_pfdetach(struct pf_altq *);
+int	altq_add(struct pf_altq *);
+int	altq_remove(struct pf_altq *);
+int	altq_add_queue(struct pf_altq *);
+int	altq_remove_queue(struct pf_altq *);
+int	altq_getqstats(struct pf_altq *, void *, int *);
+
+int	cbq_pfattach(struct pf_altq *);
+int	cbq_add_altq(struct pf_altq *);
+int	cbq_remove_altq(struct pf_altq *);
+int	cbq_add_queue(struct pf_altq *);
+int	cbq_remove_queue(struct pf_altq *);
+int	cbq_getqstats(struct pf_altq *, void *, int *);
+
+int	codel_pfattach(struct pf_altq *);
+int	codel_add_altq(struct pf_altq *);
+int	codel_remove_altq(struct pf_altq *);
+int	codel_getqstats(struct pf_altq *, void *, int *);
+
+int	priq_pfattach(struct pf_altq *);
+int	priq_add_altq(struct pf_altq *);
+int	priq_remove_altq(struct pf_altq *);
+int	priq_add_queue(struct pf_altq *);
+int	priq_remove_queue(struct pf_altq *);
+int	priq_getqstats(struct pf_altq *, void *, int *);
+
+int	hfsc_pfattach(struct pf_altq *);
+int	hfsc_add_altq(struct pf_altq *);
+int	hfsc_remove_altq(struct pf_altq *);
+int	hfsc_add_queue(struct pf_altq *);
+int	hfsc_remove_queue(struct pf_altq *);
+int	hfsc_getqstats(struct pf_altq *, void *, int *);
+
+int	fairq_pfattach(struct pf_altq *);
+int	fairq_add_altq(struct pf_altq *);
+int	fairq_remove_altq(struct pf_altq *);
+int	fairq_add_queue(struct pf_altq *);
+int	fairq_remove_queue(struct pf_altq *);
+int	fairq_getqstats(struct pf_altq *, void *, int *);
+
+#endif /* _KERNEL */
+#endif /* _ALTQ_ALTQ_VAR_H_ */
diff --git a/freebsd/sys/net/altq/if_altq.h b/freebsd/sys/net/altq/if_altq.h
new file mode 100644
index 00000000..c5ad2875
--- /dev/null
+++ b/freebsd/sys/net/altq/if_altq.h
@@ -0,0 +1,182 @@
+/*-
+ * Copyright (C) 1997-2003
+ *	Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: if_altq.h,v 1.12 2005/04/13 03:44:25 suz Exp $
+ * $FreeBSD$
+ */
+#ifndef _ALTQ_IF_ALTQ_H_
+#define	_ALTQ_IF_ALTQ_H_
+
+#include <rtems/bsd/sys/lock.h>		/* XXX */
+#include <sys/mutex.h>		/* XXX */
+#include <sys/event.h>		/* XXX */
+
+struct altq_pktattr; struct tb_regulator; struct top_cdnr;
+
+/*
+ * Structure defining a queue for a network interface.
+ */
+struct	ifaltq {
+	/* fields compatible with struct ifqueue */
+	struct	mbuf *ifq_head;
+	struct	mbuf *ifq_tail;
+	int	ifq_len;
+	int	ifq_maxlen;
+	struct	mtx ifq_mtx;
+
+	/* driver owned queue (used for bulk dequeue and prepend) UNLOCKED */
+	struct	mbuf *ifq_drv_head;
+	struct	mbuf *ifq_drv_tail;
+	int	ifq_drv_len;
+	int	ifq_drv_maxlen;
+
+	/* alternate queueing related fields */
+	int	altq_type;		/* discipline type */
+	int	altq_flags;		/* flags (e.g. ready, in-use) */
+	void	*altq_disc;		/* for discipline-specific use */
+	struct	ifnet *altq_ifp;	/* back pointer to interface */
+
+	int	(*altq_enqueue)(struct ifaltq *, struct mbuf *,
+				struct altq_pktattr *);
+	struct	mbuf *(*altq_dequeue)(struct ifaltq *, int);
+	int	(*altq_request)(struct ifaltq *, int, void *);
+
+	/* classifier fields */
+	void	*altq_clfier;		/* classifier-specific use */
+	void	*(*altq_classify)(void *, struct mbuf *, int);
+
+	/* token bucket regulator */
+	struct	tb_regulator *altq_tbr;
+
+	/* input traffic conditioner (doesn't belong to the output queue...) */
+	struct top_cdnr *altq_cdnr;
+};
+
+
+#ifdef _KERNEL
+
+/*
+ * packet attributes used by queueing disciplines.
+ * pattr_class is a discipline-dependent scheduling class that is
+ * set by a classifier.
+ * pattr_hdr and pattr_af may be used by a discipline to access
+ * the header within a mbuf.  (e.g. ECN needs to update the CE bit)
+ * note that pattr_hdr could be stale after m_pullup, though link
+ * layer output routines usually don't use m_pullup.  link-level
+ * compression also invalidates these fields.  thus, pattr_hdr needs
+ * to be verified when a discipline touches the header.
+ */
+struct altq_pktattr {
+	void	*pattr_class;		/* sched class set by classifier */
+	int	pattr_af;		/* address family */
+	caddr_t	pattr_hdr;		/* saved header position in mbuf */
+};
+
+/*
+ * mbuf tag to carry a queue id (and hints for ECN).
+ */
+struct altq_tag {
+	u_int32_t	qid;		/* queue id */
+	/* hints for ecn */
+	int		af;		/* address family */
+	void		*hdr;		/* saved header position in mbuf */
+};
+
+/*
+ * a token-bucket regulator limits the rate that a network driver can
+ * dequeue packets from the output queue.
+ * modern cards are able to buffer a large amount of packets and dequeue
+ * too many packets at a time.  this bursty dequeue behavior makes it
+ * impossible to schedule packets by queueing disciplines.
+ * a token-bucket is used to control the burst size in a device
+ * independent manner.
+ */
+struct tb_regulator {
+	int64_t		tbr_rate;	/* (scaled) token bucket rate */
+	int64_t		tbr_depth;	/* (scaled) token bucket depth */
+
+	int64_t		tbr_token;	/* (scaled) current token */
+	int64_t		tbr_filluptime;	/* (scaled) time to fill up bucket */
+	u_int64_t	tbr_last;	/* last time token was updated */
+
+	int		tbr_lastop;	/* last dequeue operation type
+					   needed for poll-and-dequeue */
+};
+
+/* if_altqflags */
+#define	ALTQF_READY	 0x01	/* driver supports alternate queueing */
+#define	ALTQF_ENABLED	 0x02	/* altq is in use */
+#define	ALTQF_CLASSIFY	 0x04	/* classify packets */
+#define	ALTQF_CNDTNING	 0x08	/* altq traffic conditioning is enabled */
+#define	ALTQF_DRIVER1	 0x40	/* driver specific */
+
+/* if_altqflags set internally only: */
+#define	ALTQF_CANTCHANGE 	(ALTQF_READY)
+
+/* altq_dequeue 2nd arg */
+#define	ALTDQ_REMOVE		1	/* dequeue mbuf from the queue */
+#define	ALTDQ_POLL		2	/* don't dequeue mbuf from the queue */
+
+/* altq request types (currently only purge is defined) */
+#define	ALTRQ_PURGE		1	/* purge all packets */
+
+#define	ALTQ_IS_READY(ifq)		((ifq)->altq_flags & ALTQF_READY)
+#define	ALTQ_IS_ENABLED(ifq)		((ifq)->altq_flags & ALTQF_ENABLED)
+#define	ALTQ_NEEDS_CLASSIFY(ifq)	((ifq)->altq_flags & ALTQF_CLASSIFY)
+#define	ALTQ_IS_CNDTNING(ifq)		((ifq)->altq_flags & ALTQF_CNDTNING)
+
+#define	ALTQ_SET_CNDTNING(ifq)		((ifq)->altq_flags |= ALTQF_CNDTNING)
+#define	ALTQ_CLEAR_CNDTNING(ifq)	((ifq)->altq_flags &= ~ALTQF_CNDTNING)
+#define	ALTQ_IS_ATTACHED(ifq)		((ifq)->altq_disc != NULL)
+
+#define	ALTQ_ENQUEUE(ifq, m, pa, err)					\
+	(err) = (*(ifq)->altq_enqueue)((ifq),(m),(pa))
+#define	ALTQ_DEQUEUE(ifq, m)						\
+	(m) = (*(ifq)->altq_dequeue)((ifq), ALTDQ_REMOVE)
+#define	ALTQ_POLL(ifq, m)						\
+	(m) = (*(ifq)->altq_dequeue)((ifq), ALTDQ_POLL)
+#define	ALTQ_PURGE(ifq)							\
+	(void)(*(ifq)->altq_request)((ifq), ALTRQ_PURGE, (void *)0)
+#define	ALTQ_IS_EMPTY(ifq)		((ifq)->ifq_len == 0)
+#define	TBR_IS_ENABLED(ifq)		((ifq)->altq_tbr != NULL)
+
+extern int altq_attach(struct ifaltq *, int, void *,
+		       int (*)(struct ifaltq *, struct mbuf *,
+			       struct altq_pktattr *),
+		       struct mbuf *(*)(struct ifaltq *, int),
+		       int (*)(struct ifaltq *, int, void *),
+		       void *,
+		       void *(*)(void *, struct mbuf *, int));
+extern int altq_detach(struct ifaltq *);
+extern int altq_enable(struct ifaltq *);
+extern int altq_disable(struct ifaltq *);
+extern struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int);
+extern int (*altq_input)(struct mbuf *, int);
+#if 0 /* ALTQ3_CLFIER_COMPAT */
+void altq_etherclassify(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+#endif
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_IF_ALTQ_H_ */
diff --git a/freebsd/sys/net/bpf.c b/freebsd/sys/net/bpf.c
index f74ac9a1..e7822586 100644
--- a/freebsd/sys/net/bpf.c
+++ b/freebsd/sys/net/bpf.c
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
 
 #include <rtems/bsd/local/opt_bpf.h>
 #include <rtems/bsd/local/opt_compat.h>
+#include <rtems/bsd/local/opt_ddb.h>
 #include <rtems/bsd/local/opt_netgraph.h>
 
 #include <sys/types.h>
@@ -69,8 +70,13 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/socket.h>
 
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
 #include <net/if.h>
-#define	BPF_INTERNAL
+#include <net/if_var.h>
+#include <net/if_dl.h>
 #include <net/bpf.h>
 #include <net/bpf_buffer.h>
 #ifdef BPF_JITTER
@@ -78,6 +84,7 @@ __FBSDID("$FreeBSD$");
 #endif
 #include <net/bpf_zerocopy.h>
 #include <net/bpfdesc.h>
+#include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
@@ -96,6 +103,20 @@ __FBSDID("$FreeBSD$");
 
 MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
 
+struct bpf_if {
+#define	bif_next	bif_ext.bif_next
+#define	bif_dlist	bif_ext.bif_dlist
+	struct bpf_if_ext bif_ext;	/* public members */
+	u_int		bif_dlt;	/* link layer type */
+	u_int		bif_hdrlen;	/* length of link header */
+	struct ifnet	*bif_ifp;	/* corresponding interface */
+	struct rwlock	bif_lock;	/* interface lock */
+	LIST_HEAD(, bpf_d) bif_wlist;	/* writer-only list */
+	int		bif_flags;	/* Interface flags */
+};
+
+CTASSERT(offsetof(struct bpf_if, bif_ext) == 0);
+
 #if defined(DEV_BPF) || defined(NETGRAPH_BPF)
 
 #define PRINET  26			/* interruptible */
@@ -107,7 +128,7 @@ MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
 #include <sys/mount.h>
 #include <compat/freebsd32/freebsd32.h>
 #define BPF_ALIGNMENT32 sizeof(int32_t)
-#define BPF_WORDALIGN32(x) (((x)+(BPF_ALIGNMENT32-1))&~(BPF_ALIGNMENT32-1))
+#define	BPF_WORDALIGN32(x) roundup2(x, BPF_ALIGNMENT32)
 
 #ifndef BURN_BRIDGES
 /*
@@ -148,7 +169,7 @@ struct bpf_dltlist32 {
  * structures registered by different layers in the stack (i.e., 802.11
  * frames, ethernet frames, etc).
  */
-static LIST_HEAD(, bpf_if)	bpf_iflist;
+static LIST_HEAD(, bpf_if)	bpf_iflist, bpf_freelist;
 static struct mtx	bpf_mtx;		/* bpf global lock */
 static int		bpf_bpfd_cnt;
 
@@ -157,7 +178,7 @@ static void	bpf_detachd(struct bpf_d *);
 static void	bpf_detachd_locked(struct bpf_d *);
 static void	bpf_freed(struct bpf_d *);
 static int	bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
-		    struct sockaddr *, int *, struct bpf_insn *);
+		    struct sockaddr *, int *, struct bpf_d *);
 static int	bpf_setif(struct bpf_d *, struct ifreq *);
 static void	bpf_timed_out(void *);
 static __inline void
@@ -188,8 +209,8 @@ static SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
 
 static VNET_DEFINE(int, bpf_optimize_writers) = 0;
 #define	V_bpf_optimize_writers VNET(bpf_optimize_writers)
-SYSCTL_VNET_INT(_net_bpf, OID_AUTO, optimize_writers,
-    CTLFLAG_RW, &VNET_NAME(bpf_optimize_writers), 0,
+SYSCTL_INT(_net_bpf, OID_AUTO, optimize_writers, CTLFLAG_VNET | CTLFLAG_RW,
+    &VNET_NAME(bpf_optimize_writers), 0,
     "Do not send packets until BPF program is set");
 
 #ifndef __rtems__
@@ -479,7 +500,7 @@ bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
  */
 static int
 bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
-    struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
+    struct sockaddr *sockp, int *hdrlen, struct bpf_d *d)
 {
 	const struct ieee80211_bpf_params *p;
 	struct ether_header *eh;
@@ -561,37 +582,20 @@ bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
 	}
 
 	len = uio->uio_resid;
-
-	if (len - hlen > ifp->if_mtu)
+	if (len < hlen || len - hlen > ifp->if_mtu)
 		return (EMSGSIZE);
 
-	if ((unsigned)len > MJUM16BYTES)
+	m = m_get2(len, M_WAITOK, MT_DATA, M_PKTHDR);
+	if (m == NULL)
 		return (EIO);
-
-	if (len <= MHLEN)
-		MGETHDR(m, M_WAIT, MT_DATA);
-	else if (len <= MCLBYTES)
-		m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR);
-	else
-		m = m_getjcl(M_WAIT, MT_DATA, M_PKTHDR,
-#if (MJUMPAGESIZE > MCLBYTES)
-		    len <= MJUMPAGESIZE ? MJUMPAGESIZE :
-#endif
-		    (len <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES));
 	m->m_pkthdr.len = m->m_len = len;
-	m->m_pkthdr.rcvif = NULL;
 	*mp = m;
 
-	if (m->m_len < hlen) {
-		error = EPERM;
-		goto bad;
-	}
-
 	error = uiomove(mtod(m, u_char *), len, uio);
 	if (error)
 		goto bad;
 
-	slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
+	slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len);
 	if (slen == 0) {
 		error = EPERM;
 		goto bad;
@@ -608,6 +612,10 @@ bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
 			else
 				m->m_flags |= M_MCAST;
 		}
+		if (d->bd_hdrcmplt == 0) {
+			memcpy(eh->ether_shost, IF_LLADDR(ifp),
+			    sizeof(eh->ether_shost));
+		}
 		break;
 	}
 
@@ -632,7 +640,7 @@ bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
 				goto bad;
 			}
 		}
-		bcopy(m->m_data, sockp->sa_data, hlen);
+		bcopy(mtod(m, const void *), sockp->sa_data, hlen);
 	}
 	*hdrlen = hlen;
 
@@ -656,13 +664,13 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
 	 * Save sysctl value to protect from sysctl change
 	 * between reads
 	 */
-	op_w = V_bpf_optimize_writers;
+	op_w = V_bpf_optimize_writers || d->bd_writer;
 
 	if (d->bd_bif != NULL)
 		bpf_detachd_locked(d);
 	/*
 	 * Point d at bp, and add d to the interface's list.
-	 * Since there are many applicaiotns using BPF for
+	 * Since there are many applications using BPF for
 	 * sending raw packets only (dhcpd, cdpd are good examples)
 	 * we can delay adding d to the list of active listeners until
 	 * some filter is configured.
@@ -760,7 +768,7 @@ bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode, int flen)
 
 /*
  * Add d to the list of active bp filters.
- * Reuqires bpf_attachd() to be called before
+ * Requires bpf_attachd() to be called before.
  */
 static void
 bpf_upgraded(struct bpf_d *d)
@@ -909,7 +917,7 @@ bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
 {
 	struct bpf_d *d;
 #ifndef __rtems__
-	int error, size;
+	int error;
 
 	d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
 	error = devfs_set_cdevpriv(d, bpf_dtor);
@@ -932,6 +940,8 @@ bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
 	 * particular buffer method.
 	 */
 	bpf_buffer_init(d);
+	if ((flags & FREAD) == 0)
+		d->bd_writer = 2;
 	d->bd_hbuf_in_use = 0;
 	d->bd_bufmode = BPF_BUFMODE_BUFFER;
 	d->bd_sig = SIGIO;
@@ -945,10 +955,6 @@ bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
 	callout_init_mtx(&d->bd_callout, &d->bd_lock, 0);
 	knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock);
 
-	/* Allocate default buffers */
-	size = d->bd_bufsize;
-	bpf_buffer_ioctl_sblen(d, &size);
-
 #ifndef __rtems__
 	return (0);
 #else /* __rtems__ */
@@ -1163,6 +1169,7 @@ bpfwrite(struct bpf_d *d, struct uio *uio, int ioflag)
 	struct ifnet *ifp;
 	struct mbuf *m, *mc;
 	struct sockaddr dst;
+	struct route ro;
 	int error, hlen;
 
 	error = devfs_get_cdevpriv((void **)&d);
@@ -1194,7 +1201,7 @@ bpfwrite(struct bpf_d *d, struct uio *uio, int ioflag)
 	hlen = 0;
 	/* XXX: bpf_movein() can sleep */
 	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
-	    &m, &dst, &hlen, d->bd_wfilter);
+	    &m, &dst, &hlen, d);
 	if (error) {
 		d->bd_wdcount++;
 		return (error);
@@ -1204,7 +1211,7 @@ bpfwrite(struct bpf_d *d, struct uio *uio, int ioflag)
 		dst.sa_family = pseudo_AF_HDRCMPLT;
 
 	if (d->bd_feedback) {
-		mc = m_dup(m, M_DONTWAIT);
+		mc = m_dup(m, M_NOWAIT);
 		if (mc != NULL)
 			mc->m_pkthdr.rcvif = ifp;
 		/* Set M_PROMISC for outgoing packets to be discarded. */
@@ -1226,7 +1233,14 @@ bpfwrite(struct bpf_d *d, struct uio *uio, int ioflag)
 	BPFD_UNLOCK(d);
 #endif
 
-	error = (*ifp->if_output)(ifp, m, &dst, NULL);
+	bzero(&ro, sizeof(ro));
+	if (hlen != 0) {
+		ro.ro_prepend = (u_char *)&dst.sa_data;
+		ro.ro_plen = hlen;
+		ro.ro_flags = RT_HAS_HEADER;
+	}
+
+	error = (*ifp->if_output)(ifp, m, &dst, &ro);
 	if (error)
 		d->bd_wdcount++;
 
@@ -1278,7 +1292,6 @@ reset_d(struct bpf_d *d)
 
 /*
  *  FIONREAD		Check for read packet available.
- *  SIOCGIFADDR		Get interface address - convenient hook to driver.
  *  BIOCGBLEN		Get buffer len [for read()].
  *  BIOCSETF		Set read filter.
  *  BIOCSETFNR		Set read filter without resetting descriptor.
@@ -1347,7 +1360,7 @@ bpfioctl(struct bpf_d *d, u_long cmd, caddr_t addr, int flags,
 #endif
 		case BIOCGETIF:
 		case BIOCGRTIMEOUT:
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
 		case BIOCGRTIMEOUT32:
 #endif
 		case BIOCGSTATS:
@@ -1359,7 +1372,7 @@ bpfioctl(struct bpf_d *d, u_long cmd, caddr_t addr, int flags,
 		case FIONREAD:
 		case BIOCLOCK:
 		case BIOCSRTIMEOUT:
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
 		case BIOCSRTIMEOUT32:
 #endif
 		case BIOCIMMEDIATE:
@@ -1415,19 +1428,6 @@ bpfioctl(struct bpf_d *d, u_long cmd, caddr_t addr, int flags,
 			break;
 		}
 
-	case SIOCGIFADDR:
-		{
-			struct ifnet *ifp;
-
-			if (d->bd_bif == NULL)
-				error = EINVAL;
-			else {
-				ifp = d->bd_bif->bif_ifp;
-				error = (*ifp->if_ioctl)(ifp, cmd, addr);
-			}
-			break;
-		}
-
 	/*
 	 * Get buffer len [for read()].
 	 */
@@ -1564,21 +1564,44 @@ bpfioctl(struct bpf_d *d, u_long cmd, caddr_t addr, int flags,
 	 * Set interface.
 	 */
 	case BIOCSETIF:
-		BPF_LOCK();
-		error = bpf_setif(d, (struct ifreq *)addr);
-		BPF_UNLOCK();
-		break;
+		{
+			int alloc_buf, size;
+
+			/*
+			 * Behavior here depends on the buffering model.  If
+			 * we're using kernel memory buffers, then we can
+			 * allocate them here.  If we're using zero-copy,
+			 * then the user process must have registered buffers
+			 * by the time we get here.
+			 */
+			alloc_buf = 0;
+			BPFD_LOCK(d);
+			if (d->bd_bufmode == BPF_BUFMODE_BUFFER &&
+			    d->bd_sbuf == NULL)
+				alloc_buf = 1;
+			BPFD_UNLOCK(d);
+			if (alloc_buf) {
+				size = d->bd_bufsize;
+				error = bpf_buffer_ioctl_sblen(d, &size);
+				if (error != 0)
+					break;
+			}
+			BPF_LOCK();
+			error = bpf_setif(d, (struct ifreq *)addr);
+			BPF_UNLOCK();
+			break;
+		}
 
 	/*
 	 * Set read timeout.
 	 */
 	case BIOCSRTIMEOUT:
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
 	case BIOCSRTIMEOUT32:
 #endif
 		{
 			struct timeval *tv = (struct timeval *)addr;
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
 			struct timeval32 *tv32;
 			struct timeval tv64;
 
@@ -1604,12 +1627,12 @@ bpfioctl(struct bpf_d *d, u_long cmd, caddr_t addr, int flags,
 	 * Get read timeout.
 	 */
 	case BIOCGRTIMEOUT:
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
 	case BIOCGRTIMEOUT32:
 #endif
 		{
 			struct timeval *tv;
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
 			struct timeval32 *tv32;
 			struct timeval tv64;
 
@@ -1621,7 +1644,7 @@ bpfioctl(struct bpf_d *d, u_long cmd, caddr_t addr, int flags,
 
 			tv->tv_sec = d->bd_rtout / hz;
 			tv->tv_usec = (d->bd_rtout % hz) * tick;
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
 			if (cmd == BIOCGRTIMEOUT32) {
 				tv32 = (struct timeval32 *)addr;
 				tv32->tv_sec = tv->tv_sec;
@@ -2001,17 +2024,15 @@ bpf_setif(struct bpf_d *d, struct ifreq *ifr)
 
 	/* Check if interface is not being detached from BPF */
 	BPFIF_RLOCK(bp);
-	if (bp->flags & BPFIF_FLAG_DYING) {
+	if (bp->bif_flags & BPFIF_FLAG_DYING) {
 		BPFIF_RUNLOCK(bp);
 		return (ENXIO);
 	}
 	BPFIF_RUNLOCK(bp);
 
 	/*
-	 * Behavior here depends on the buffering model.  If we're using
-	 * kernel memory buffers, then we can allocate them here.  If we're
-	 * using zero-copy, then the user process must have registered
-	 * buffers by the time we get here.  If not, return an error.
+	 * At this point, we expect the buffer is already allocated.  If not,
+	 * return an error.
 	 */
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_BUFFER:
@@ -2131,10 +2152,10 @@ filt_bpfread(struct knote *kn, long hint)
 	ready = bpf_ready(d);
 	if (ready) {
 		kn->kn_data = d->bd_slen;
-		while (d->bd_hbuf_in_use)
-			mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
-			    PRINET, "bd_hbuf", 0);
-		if (d->bd_hbuf)
+		/*
+		 * Ignore the hold buffer if it is being copied to user space.
+		 */
+		if (!d->bd_hbuf_in_use && d->bd_hbuf)
 			kn->kn_data += d->bd_hlen;
 	} else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
 		callout_reset(&d->bd_callout, d->bd_rtout,
@@ -2405,12 +2426,19 @@ bpf_hdrlen(struct bpf_d *d)
 static void
 bpf_bintime2ts(struct bintime *bt, struct bpf_ts *ts, int tstype)
 {
+#ifndef __rtems__
+	struct bintime bt2, boottimebin;
+#else /* __rtems__ */
 	struct bintime bt2;
+#endif /* __rtems__ */
 	struct timeval tsm;
 	struct timespec tsn;
 
 	if ((tstype & BPF_T_MONOTONIC) == 0) {
 		bt2 = *bt;
+#ifndef __rtems__
+		getboottimebin(&boottimebin);
+#endif /* __rtems__ */
 		bintime_add(&bt2, &boottimebin);
 		bt = &bt2;
 	}
@@ -2466,9 +2494,6 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
 	 * spot to do it.
 	 */
 	if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
-		while (d->bd_hbuf_in_use)
-			mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
-			    PRINET, "bd_hbuf", 0);
 		d->bd_fbuf = d->bd_hbuf;
 		d->bd_hbuf = NULL;
 		d->bd_hlen = 0;
@@ -2511,9 +2536,7 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
 			++d->bd_dcount;
 			return;
 		}
-		while (d->bd_hbuf_in_use)
-			mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
-			    PRINET, "bd_hbuf", 0);
+		KASSERT(!d->bd_hbuf_in_use, ("hold buffer is in use"));
 		ROTATE_BUFFERS(d);
 		do_wakeup = 1;
 		curlen = 0;
@@ -2652,10 +2675,36 @@ bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
 
 	bp->bif_hdrlen = hdrlen;
 
-	if (bootverbose)
+	if (bootverbose && IS_DEFAULT_VNET(curvnet))
 		if_printf(ifp, "bpf attached\n");
 }
 
+#ifdef VIMAGE
+/*
+ * When moving interfaces between vnet instances we need a way to
+ * query the dlt and hdrlen before detach so we can re-attch the if_bpf
+ * after the vmove.  We unfortunately have no device driver infrastructure
+ * to query the interface for these values after creation/attach, thus
+ * add this as a workaround.
+ */
+int
+bpf_get_bp_params(struct bpf_if *bp, u_int *bif_dlt, u_int *bif_hdrlen)
+{
+
+	if (bp == NULL)
+		return (ENXIO);
+	if (bif_dlt == NULL && bif_hdrlen == NULL)
+		return (0);
+
+	if (bif_dlt != NULL)
+		*bif_dlt = bp->bif_dlt;
+	if (bif_hdrlen != NULL)
+		*bif_hdrlen = bp->bif_hdrlen;
+
+	return (0);
+}
+#endif
+
 /*
  * Detach bpf from an interface. This involves detaching each descriptor
  * associated with the interface. Notify each descriptor as it's detached
@@ -2664,52 +2713,51 @@ bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
 void
 bpfdetach(struct ifnet *ifp)
 {
-	struct bpf_if	*bp;
+	struct bpf_if	*bp, *bp_temp;
 	struct bpf_d	*d;
-#ifdef INVARIANTS
 	int ndetached;
 
 	ndetached = 0;
-#endif
 
 	BPF_LOCK();
 	/* Find all bpf_if struct's which reference ifp and detach them. */
-	do {
-		LIST_FOREACH(bp, &bpf_iflist, bif_next) {
-			if (ifp == bp->bif_ifp)
-				break;
-		}
-		if (bp != NULL)
-			LIST_REMOVE(bp, bif_next);
+	LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) {
+		if (ifp != bp->bif_ifp)
+			continue;
 
-		if (bp != NULL) {
-#ifdef INVARIANTS
-			ndetached++;
-#endif
-			while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
-				bpf_detachd_locked(d);
-				BPFD_LOCK(d);
-				bpf_wakeup(d);
-				BPFD_UNLOCK(d);
-			}
-			/* Free writer-only descriptors */
-			while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
-				bpf_detachd_locked(d);
-				BPFD_LOCK(d);
-				bpf_wakeup(d);
-				BPFD_UNLOCK(d);
-			}
+		LIST_REMOVE(bp, bif_next);
+		/* Add to to-be-freed list */
+		LIST_INSERT_HEAD(&bpf_freelist, bp, bif_next);
 
-			/*
-			 * Delay freing bp till interface is detached
-			 * and all routes through this interface are removed.
-			 * Mark bp as detached to restrict new consumers.
-			 */
-			BPFIF_WLOCK(bp);
-			bp->flags |= BPFIF_FLAG_DYING;
-			BPFIF_WUNLOCK(bp);
+		ndetached++;
+		/*
+		 * Delay freeing bp till interface is detached
+		 * and all routes through this interface are removed.
+		 * Mark bp as detached to restrict new consumers.
+		 */
+		BPFIF_WLOCK(bp);
+		bp->bif_flags |= BPFIF_FLAG_DYING;
+		BPFIF_WUNLOCK(bp);
+
+		CTR4(KTR_NET, "%s: sheduling free for encap %d (%p) for if %p",
+		    __func__, bp->bif_dlt, bp, ifp);
+
+		/* Free common descriptors */
+		while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
+			bpf_detachd_locked(d);
+			BPFD_LOCK(d);
+			bpf_wakeup(d);
+			BPFD_UNLOCK(d);
 		}
-	} while (bp != NULL);
+
+		/* Free writer-only descriptors */
+		while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
+			bpf_detachd_locked(d);
+			BPFD_LOCK(d);
+			bpf_wakeup(d);
+			BPFD_UNLOCK(d);
+		}
+	}
 	BPF_UNLOCK();
 
 #ifdef INVARIANTS
@@ -2721,32 +2769,46 @@ bpfdetach(struct ifnet *ifp)
 /*
  * Interface departure handler.
  * Note departure event does not guarantee interface is going down.
+ * Interface renaming is currently done via departure/arrival event set.
+ *
+ * Departure handled is called after all routes pointing to
+ * given interface are removed and interface is in down state
+ * restricting any packets to be sent/received. We assume it is now safe
+ * to free data allocated by BPF.
  */
 static void
 bpf_ifdetach(void *arg __unused, struct ifnet *ifp)
 {
-	struct bpf_if *bp;
+	struct bpf_if *bp, *bp_temp;
+	int nmatched = 0;
 
 	BPF_LOCK();
-	if ((bp = ifp->if_bpf) == NULL) {
-		BPF_UNLOCK();
-		return;
-	}
+	/*
+	 * Find matching entries in free list.
+	 * Nothing should be found if bpfdetach() was not called.
+	 */
+	LIST_FOREACH_SAFE(bp, &bpf_freelist, bif_next, bp_temp) {
+		if (ifp != bp->bif_ifp)
+			continue;
 
-	/* Check if bpfdetach() was called previously */
-	if ((bp->flags & BPFIF_FLAG_DYING) == 0) {
-		BPF_UNLOCK();
-		return;
-	}
+		CTR3(KTR_NET, "%s: freeing BPF instance %p for interface %p",
+		    __func__, bp, ifp);
 
-	CTR3(KTR_NET, "%s: freing BPF instance %p for interface %p",
-	    __func__, bp, ifp);
+		LIST_REMOVE(bp, bif_next);
 
-	ifp->if_bpf = NULL;
+		rw_destroy(&bp->bif_lock);
+		free(bp, M_BPF);
+
+		nmatched++;
+	}
 	BPF_UNLOCK();
 
-	rw_destroy(&bp->bif_lock);
-	free(bp, M_BPF);
+	/*
+	 * Note that we cannot zero other pointers to
+	 * custom DLTs possibly used by given interface.
+	 */
+	if (nmatched != 0)
+		ifp->if_bpf = NULL;
 }
 
 /*
@@ -2755,26 +2817,44 @@ bpf_ifdetach(void *arg __unused, struct ifnet *ifp)
 static int
 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
 {
-	int n, error;
 	struct ifnet *ifp;
 	struct bpf_if *bp;
+	u_int *lst;
+	int error, n, n1;
 
 	BPF_LOCK_ASSERT();
 
 	ifp = d->bd_bif->bif_ifp;
+again:
+	n1 = 0;
+	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+		if (bp->bif_ifp == ifp)
+			n1++;
+	}
+	if (bfl->bfl_list == NULL) {
+		bfl->bfl_len = n1;
+		return (0);
+	}
+	if (n1 > bfl->bfl_len)
+		return (ENOMEM);
+	BPF_UNLOCK();
+	lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK);
 	n = 0;
-	error = 0;
+	BPF_LOCK();
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 		if (bp->bif_ifp != ifp)
 			continue;
-		if (bfl->bfl_list != NULL) {
-			if (n >= bfl->bfl_len)
-				return (ENOMEM);
-			error = copyout(&bp->bif_dlt,
-			    bfl->bfl_list + n, sizeof(u_int));
+		if (n >= n1) {
+			free(lst, M_TEMP);
+			goto again;
 		}
+		lst[n] = bp->bif_dlt;
 		n++;
 	}
+	BPF_UNLOCK();
+	error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n);
+	free(lst, M_TEMP);
+	BPF_LOCK();
 	bfl->bfl_len = n;
 	return (error);
 }
@@ -2999,6 +3079,7 @@ bpf_drvinit(void *unused)
 
 	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
 	LIST_INIT(&bpf_iflist);
+	LIST_INIT(&bpf_freelist);
 
 #ifndef __rtems__
 	dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
@@ -3214,3 +3295,34 @@ bpf_validate(const struct bpf_insn *f, int len)
 }
 
 #endif /* !DEV_BPF && !NETGRAPH_BPF */
+
+#ifdef DDB
+static void
+bpf_show_bpf_if(struct bpf_if *bpf_if)
+{
+
+	if (bpf_if == NULL)
+		return;
+	db_printf("%p:\n", bpf_if);
+#define	BPF_DB_PRINTF(f, e)	db_printf("   %s = " f "\n", #e, bpf_if->e);
+	/* bif_ext.bif_next */
+	/* bif_ext.bif_dlist */
+	BPF_DB_PRINTF("%#x", bif_dlt);
+	BPF_DB_PRINTF("%u", bif_hdrlen);
+	BPF_DB_PRINTF("%p", bif_ifp);
+	/* bif_lock */
+	/* bif_wlist */
+	BPF_DB_PRINTF("%#x", bif_flags);
+}
+
+DB_SHOW_COMMAND(bpf_if, db_show_bpf_if)
+{
+
+	if (!have_addr) {
+		db_printf("usage: show bpf_if <struct bpf_if *>\n");
+		return;
+	}
+
+	bpf_show_bpf_if((struct bpf_if *)addr);
+}
+#endif
diff --git a/freebsd/sys/net/bpf.h b/freebsd/sys/net/bpf.h
index bfe8cfe0..f707f436 100644
--- a/freebsd/sys/net/bpf.h
+++ b/freebsd/sys/net/bpf.h
@@ -582,7 +582,7 @@ struct bpf_zbuf_header {
  * input packets such as port scans, packets from old lost connections,
  * etc. to force the connection to stay up).
  *
- * The first byte of the PPP header (0xff03) is modified to accomodate
+ * The first byte of the PPP header (0xff03) is modified to accommodate
  * the direction - 0x00 = IN, 0x01 = OUT.
  */
 #define DLT_PPP_PPPD		166
@@ -1096,7 +1096,7 @@ struct bpf_zbuf_header {
 #define DLT_NETANALYZER_TRANSPARENT	241
 
 /*
- * IP-over-Infiniband, as specified by RFC 4391.
+ * IP-over-InfiniBand, as specified by RFC 4391.
  *
  * Requested by Petr Sumbera <petr.sumbera@oracle.com>.
  */
@@ -1138,7 +1138,145 @@ struct bpf_zbuf_header {
 #define DLT_PFSYNC		246
 #endif
 
-#define DLT_MATCHING_MAX	246	/* highest value in the "matching" range */
+/*
+ * Raw InfiniBand packets, starting with the Local Routing Header.
+ *
+ * Requested by Oren Kladnitsky <orenk@mellanox.com>.
+ */
+#define DLT_INFINIBAND		247
+
+/*
+ * SCTP, with no lower-level protocols (i.e., no IPv4 or IPv6).
+ *
+ * Requested by Michael Tuexen <Michael.Tuexen@lurchi.franken.de>.
+ */
+#define DLT_SCTP		248
+
+/*
+ * USB packets, beginning with a USBPcap header.
+ *
+ * Requested by Tomasz Mon <desowin@gmail.com>
+ */
+#define DLT_USBPCAP		249
+
+/*
+ * Schweitzer Engineering Laboratories "RTAC" product serial-line
+ * packets.
+ *
+ * Requested by Chris Bontje <chris_bontje@selinc.com>.
+ */
+#define DLT_RTAC_SERIAL		250
+
+/*
+ * Bluetooth Low Energy air interface link-layer packets.
+ *
+ * Requested by Mike Kershaw <dragorn@kismetwireless.net>.
+ */
+#define DLT_BLUETOOTH_LE_LL	251
+
+/*
+ * DLT type for upper-protocol layer PDU saves from wireshark.
+ * 
+ * the actual contents are determined by two TAGs stored with each
+ * packet:
+ *   EXP_PDU_TAG_LINKTYPE          the link type (LINKTYPE_ value) of the
+ *				   original packet.
+ *
+ *   EXP_PDU_TAG_PROTO_NAME        the name of the wireshark dissector
+ * 				   that can make sense of the data stored.
+ */
+#define DLT_WIRESHARK_UPPER_PDU	252
+
+/*
+ * DLT type for the netlink protocol (nlmon devices).
+ */
+#define DLT_NETLINK		253
+
+/*
+ * Bluetooth Linux Monitor headers for the BlueZ stack.
+ */
+#define DLT_BLUETOOTH_LINUX_MONITOR	254
+
+/*
+ * Bluetooth Basic Rate/Enhanced Data Rate baseband packets, as
+ * captured by Ubertooth.
+ */
+#define DLT_BLUETOOTH_BREDR_BB	255
+
+/*
+ * Bluetooth Low Energy link layer packets, as captured by Ubertooth.
+ */
+#define DLT_BLUETOOTH_LE_LL_WITH_PHDR	256
+
+/*
+ * PROFIBUS data link layer.
+ */
+#define DLT_PROFIBUS_DL		257
+
+/*
+ * Apple's DLT_PKTAP headers.
+ *
+ * Sadly, the folks at Apple either had no clue that the DLT_USERn values
+ * are for internal use within an organization and partners only, and
+ * didn't know that the right way to get a link-layer header type is to
+ * ask tcpdump.org for one, or knew and didn't care, so they just
+ * used DLT_USER2, which causes problems for everything except for
+ * their version of tcpdump.
+ *
+ * So I'll just give them one; hopefully this will show up in a
+ * libpcap release in time for them to get this into 10.10 Big Sur
+ * or whatever Mavericks' successor is called.  LINKTYPE_PKTAP
+ * will be 258 *even on OS X*; that is *intentional*, so that
+ * PKTAP files look the same on *all* OSes (different OSes can have
+ * different numerical values for a given DLT_, but *MUST NOT* have
+ * different values for what goes in a file, as files can be moved
+ * between OSes!).
+ *
+ * When capturing, on a system with a Darwin-based OS, on a device
+ * that returns 149 (DLT_USER2 and Apple's DLT_PKTAP) with this
+ * version of libpcap, the DLT_ value for the pcap_t  will be DLT_PKTAP,
+ * and that will continue to be DLT_USER2 on Darwin-based OSes. That way,
+ * binary compatibility with Mavericks is preserved for programs using
+ * this version of libpcap.  This does mean that if you were using
+ * DLT_USER2 for some capture device on OS X, you can't do so with
+ * this version of libpcap, just as you can't with Apple's libpcap -
+ * on OS X, they define DLT_PKTAP to be DLT_USER2, so programs won't
+ * be able to distinguish between PKTAP and whatever you were using
+ * DLT_USER2 for.
+ *
+ * If the program saves the capture to a file using this version of
+ * libpcap's pcap_dump code, the LINKTYPE_ value in the file will be
+ * LINKTYPE_PKTAP, which will be 258, even on Darwin-based OSes.
+ * That way, the file will *not* be a DLT_USER2 file.  That means
+ * that the latest version of tcpdump, when built with this version
+ * of libpcap, and sufficiently recent versions of Wireshark will
+ * be able to read those files and interpret them correctly; however,
+ * Apple's version of tcpdump in OS X 10.9 won't be able to handle
+ * them.  (Hopefully, Apple will pick up this version of libpcap,
+ * and the corresponding version of tcpdump, so that tcpdump will
+ * be able to handle the old LINKTYPE_USER2 captures *and* the new
+ * LINKTYPE_PKTAP captures.)
+ */
+#ifdef __APPLE__
+#define DLT_PKTAP	DLT_USER2
+#else
+#define DLT_PKTAP	258
+#endif
+
+/*
+ * Ethernet packets preceded by a header giving the last 6 octets
+ * of the preamble specified by 802.3-2012 Clause 65, section
+ * 65.1.3.2 "Transmit".
+ */
+#define DLT_EPON	259
+
+/*
+ * IPMI trace packets, as specified by Table 3-20 "Trace Data Block Format"
+ * in the PICMG HPM.2 specification.
+ */
+#define DLT_IPMI_HPM_2	260
+
+#define DLT_MATCHING_MAX	260	/* highest value in the "matching" range */
 
 /*
  * DLT and savefile link type values are split into a class and
@@ -1149,7 +1287,17 @@ struct bpf_zbuf_header {
 
 /*
  * The instruction encodings.
+ *
+ * Please inform tcpdump-workers@lists.tcpdump.org if you use any
+ * of the reserved values, so that we can note that they're used
+ * (and perhaps implement it in the reference BPF implementation
+ * and encourage its implementation elsewhere).
  */
+
+/*
+ * The upper 8 bits of the opcode aren't used. BSD/OS used 0x8000.
+ */
+
 /* instruction classes */
 #define BPF_CLASS(code) ((code) & 0x07)
 #define		BPF_LD		0x00
@@ -1166,6 +1314,7 @@ struct bpf_zbuf_header {
 #define		BPF_W		0x00
 #define		BPF_H		0x08
 #define		BPF_B		0x10
+/*				0x18	reserved; used by BSD/OS */
 #define BPF_MODE(code)	((code) & 0xe0)
 #define		BPF_IMM 	0x00
 #define		BPF_ABS		0x20
@@ -1173,6 +1322,8 @@ struct bpf_zbuf_header {
 #define		BPF_MEM		0x60
 #define		BPF_LEN		0x80
 #define		BPF_MSH		0xa0
+/*				0xc0	reserved; used by BSD/OS */
+/*				0xe0	reserved; used by BSD/OS */
 
 /* alu/jmp fields */
 #define BPF_OP(code)	((code) & 0xf0)
@@ -1185,11 +1336,30 @@ struct bpf_zbuf_header {
 #define		BPF_LSH		0x60
 #define		BPF_RSH		0x70
 #define		BPF_NEG		0x80
+#define		BPF_MOD		0x90
+#define		BPF_XOR		0xa0
+/*				0xb0	reserved */
+/*				0xc0	reserved */
+/*				0xd0	reserved */
+/*				0xe0	reserved */
+/*				0xf0	reserved */
+
 #define		BPF_JA		0x00
 #define		BPF_JEQ		0x10
 #define		BPF_JGT		0x20
 #define		BPF_JGE		0x30
 #define		BPF_JSET	0x40
+/*				0x50	reserved; used on BSD/OS */
+/*				0x60	reserved */
+/*				0x70	reserved */
+/*				0x80	reserved */
+/*				0x90	reserved */
+/*				0xa0	reserved */
+/*				0xb0	reserved */
+/*				0xc0	reserved */
+/*				0xd0	reserved */
+/*				0xe0	reserved */
+/*				0xf0	reserved */
 #define BPF_SRC(code)	((code) & 0x08)
 #define		BPF_K		0x00
 #define		BPF_X		0x08
@@ -1197,11 +1367,43 @@ struct bpf_zbuf_header {
 /* ret - BPF_K and BPF_X also apply */
 #define BPF_RVAL(code)	((code) & 0x18)
 #define		BPF_A		0x10
+/*				0x18	reserved */
 
 /* misc */
 #define BPF_MISCOP(code) ((code) & 0xf8)
 #define		BPF_TAX		0x00
+/*				0x08	reserved */
+/*				0x10	reserved */
+/*				0x18	reserved */
+/* #define	BPF_COP		0x20	NetBSD "coprocessor" extensions */
+/*				0x28	reserved */
+/*				0x30	reserved */
+/*				0x38	reserved */
+/* #define	BPF_COPX	0x40	NetBSD "coprocessor" extensions */
+/*					also used on BSD/OS */
+/*				0x48	reserved */
+/*				0x50	reserved */
+/*				0x58	reserved */
+/*				0x60	reserved */
+/*				0x68	reserved */
+/*				0x70	reserved */
+/*				0x78	reserved */
 #define		BPF_TXA		0x80
+/*				0x88	reserved */
+/*				0x90	reserved */
+/*				0x98	reserved */
+/*				0xa0	reserved */
+/*				0xa8	reserved */
+/*				0xb0	reserved */
+/*				0xb8	reserved */
+/*				0xc0	reserved; used on BSD/OS */
+/*				0xc8	reserved */
+/*				0xd0	reserved */
+/*				0xd8	reserved */
+/*				0xe0	reserved */
+/*				0xe8	reserved */
+/*				0xf0	reserved */
+/*				0xf8	reserved */
 
 /*
  * The instruction data structure.
@@ -1237,9 +1439,9 @@ SYSCTL_DECL(_net_bpf);
 
 /*
  * Rotate the packet buffers in descriptor d.  Move the store buffer into the
- * hold slot, and the free buffer ino the store slot.  Zero the length of the
- * new store buffer.  Descriptor lock should be held. Hold buffer must
- * not be marked "in use".
+ * hold slot, and the free buffer into the store slot.  Zero the length of the
+ * new store buffer.  Descriptor lock should be held.  One must be careful to
+ * not rotate the buffers twice, i.e. if fbuf != NULL.
  */
 #define	ROTATE_BUFFERS(d)	do {					\
 	(d)->bd_hbuf = (d)->bd_sbuf;					\
@@ -1252,21 +1454,14 @@ SYSCTL_DECL(_net_bpf);
 
 /*
  * Descriptor associated with each attached hardware interface.
- * FIXME: this structure is exposed to external callers to speed up
- * bpf_peers_present() call. However we cover all fields not needed by
- * this function via BPF_INTERNAL define
+ * Part of this structure is exposed to external callers to speed up
+ * bpf_peers_present() calls.
  */
-struct bpf_if {
+struct bpf_if;
+
+struct bpf_if_ext {
 	LIST_ENTRY(bpf_if)	bif_next;	/* list of all interfaces */
 	LIST_HEAD(, bpf_d)	bif_dlist;	/* descriptor list */
-#ifdef BPF_INTERNAL
-	u_int bif_dlt;				/* link layer type */
-	u_int bif_hdrlen;		/* length of link header */
-	struct ifnet *bif_ifp;		/* corresponding interface */
-	struct rwlock bif_lock;		/* interface lock */
-	LIST_HEAD(, bpf_d)	bif_wlist;	/* writer-only list */
-	int flags;			/* Interface flags */
-#endif
 };
 
 void	 bpf_bufheld(struct bpf_d *d);
@@ -1277,6 +1472,9 @@ void	 bpf_mtap2(struct bpf_if *, void *, u_int, struct mbuf *);
 void	 bpfattach(struct ifnet *, u_int, u_int);
 void	 bpfattach2(struct ifnet *, u_int, u_int, struct bpf_if **);
 void	 bpfdetach(struct ifnet *);
+#ifdef VIMAGE
+int	 bpf_get_bp_params(struct bpf_if *, u_int *, u_int *);
+#endif
 
 void	 bpfilterattach(int);
 u_int	 bpf_filter(const struct bpf_insn *, u_char *, u_int, u_int);
@@ -1284,8 +1482,10 @@ u_int	 bpf_filter(const struct bpf_insn *, u_char *, u_int, u_int);
 static __inline int
 bpf_peers_present(struct bpf_if *bpf)
 {
+	struct bpf_if_ext *ext;
 
-	if (!LIST_EMPTY(&bpf->bif_dlist))
+	ext = (struct bpf_if_ext *)bpf;
+	if (!LIST_EMPTY(&ext->bif_dlist))
 		return (1);
 	return (0);
 }
@@ -1313,4 +1513,12 @@ bpf_peers_present(struct bpf_if *bpf)
  */
 #define BPF_MEMWORDS 16
 
+#ifdef _SYS_EVENTHANDLER_H_
+/* BPF attach/detach events */
+struct ifnet;
+typedef void (*bpf_track_fn)(void *, struct ifnet *, int /* dlt */,
+    int /* 1 =>'s attach */);
+EVENTHANDLER_DECLARE(bpf_track, bpf_track_fn);
+#endif /* _SYS_EVENTHANDLER_H_ */
+
 #endif /* _NET_BPF_H_ */
diff --git a/freebsd/sys/net/bpf_buffer.c b/freebsd/sys/net/bpf_buffer.c
index ec6aed74..d42df1b0 100644
--- a/freebsd/sys/net/bpf_buffer.c
+++ b/freebsd/sys/net/bpf_buffer.c
@@ -81,8 +81,6 @@ __FBSDID("$FreeBSD$");
 #include <net/bpf_buffer.h>
 #include <net/bpfdesc.h>
 
-#define PRINET  26			/* interruptible */
-
 /*
  * Implement historical kernel memory buffering model for BPF: two malloc(9)
  * kernel buffers are hung off of the descriptor.  The size is fixed prior to
@@ -193,9 +191,6 @@ bpf_buffer_ioctl_sblen(struct bpf_d *d, u_int *i)
 		return (EINVAL);
 	}
 
-	while (d->bd_hbuf_in_use)
-		mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
-		    PRINET, "bd_hbuf", 0);
 	/* Free old buffers if set */
 	if (d->bd_fbuf != NULL)
 		free(d->bd_fbuf, M_BPF);
diff --git a/freebsd/sys/net/bpf_filter.c b/freebsd/sys/net/bpf_filter.c
index a313f4bd..941fa290 100644
--- a/freebsd/sys/net/bpf_filter.c
+++ b/freebsd/sys/net/bpf_filter.c
@@ -41,6 +41,9 @@ __FBSDID("$FreeBSD$");
 
 #include <rtems/bsd/sys/param.h>
 
+#if !defined(_KERNEL)
+#include <strings.h>
+#endif
 #if !defined(_KERNEL) || defined(sun)
 #include <netinet/in.h>
 #endif
@@ -98,7 +101,7 @@ m_xword(struct mbuf *m, bpf_u_int32 k, int *err)
 	while (k >= len) {
 		k -= len;
 		m = m->m_next;
-		if (m == 0)
+		if (m == NULL)
 			goto bad;
 		len = m->m_len;
 	}
@@ -108,7 +111,7 @@ m_xword(struct mbuf *m, bpf_u_int32 k, int *err)
 		return (EXTRACT_LONG(cp));
 	}
 	m0 = m->m_next;
-	if (m0 == 0 || m0->m_len + len - k < 4)
+	if (m0 == NULL || m0->m_len + len - k < 4)
 		goto bad;
 	*err = 0;
 	np = mtod(m0, u_char *);
@@ -147,7 +150,7 @@ m_xhalf(struct mbuf *m, bpf_u_int32 k, int *err)
 	while (k >= len) {
 		k -= len;
 		m = m->m_next;
-		if (m == 0)
+		if (m == NULL)
 			goto bad;
 		len = m->m_len;
 	}
@@ -157,7 +160,7 @@ m_xhalf(struct mbuf *m, bpf_u_int32 k, int *err)
 		return (EXTRACT_SHORT(cp));
 	}
 	m0 = m->m_next;
-	if (m0 == 0)
+	if (m0 == NULL)
 		goto bad;
 	*err = 0;
 	return ((cp[0] << 8) | mtod(m0, u_char *)[0]);
diff --git a/freebsd/sys/net/bridgestp.c b/freebsd/sys/net/bridgestp.c
index 167bc59f..5fea7ae7 100644
--- a/freebsd/sys/net/bridgestp.c
+++ b/freebsd/sys/net/bridgestp.c
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/kernel.h>
+#include <sys/malloc.h>
 #include <sys/callout.h>
 #include <sys/module.h>
 #include <sys/proc.h>
@@ -53,6 +54,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/taskqueue.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_llc.h>
@@ -236,7 +238,7 @@ bstp_transmit_tcn(struct bstp_state *bs, struct bstp_port *bp)
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		return;
 
-	MGETHDR(m, M_DONTWAIT, MT_DATA);
+	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return;
 
@@ -350,7 +352,7 @@ bstp_send_bpdu(struct bstp_state *bs, struct bstp_port *bp,
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		return;
 
-	MGETHDR(m, M_DONTWAIT, MT_DATA);
+	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return;
 
@@ -789,7 +791,7 @@ bstp_assign_roles(struct bstp_state *bs)
 	bs->bs_root_htime = bs->bs_bridge_htime;
 	bs->bs_root_port = NULL;
 
-	/* check if any recieved info supersedes us */
+	/* check if any received info supersedes us */
 	LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
 		if (bp->bp_infois != BSTP_INFO_RECEIVED)
 			continue;
diff --git a/freebsd/sys/net/ethernet.h b/freebsd/sys/net/ethernet.h
index ae7341ee..bc5fa9cb 100644
--- a/freebsd/sys/net/ethernet.h
+++ b/freebsd/sys/net/ethernet.h
@@ -71,6 +71,28 @@ struct ether_addr {
 } __packed;
 
 #define	ETHER_IS_MULTICAST(addr) (*(addr) & 0x01) /* is address mcast/bcast? */
+#define	ETHER_IS_BROADCAST(addr) \
+	(((addr)[0] & (addr)[1] & (addr)[2] & \
+	  (addr)[3] & (addr)[4] & (addr)[5]) == 0xff)
+
+/*
+ * 802.1q Virtual LAN header.
+ */
+struct ether_vlan_header {
+	uint8_t evl_dhost[ETHER_ADDR_LEN];
+	uint8_t evl_shost[ETHER_ADDR_LEN];
+	uint16_t evl_encap_proto;
+	uint16_t evl_tag;
+	uint16_t evl_proto;
+} __packed;
+
+#define	EVL_VLID_MASK		0x0FFF
+#define	EVL_PRI_MASK		0xE000
+#define	EVL_VLANOFTAG(tag)	((tag) & EVL_VLID_MASK)
+#define	EVL_PRIOFTAG(tag)	(((tag) >> 13) & 7)
+#define	EVL_CFIOFTAG(tag)	(((tag) >> 12) & 1)
+#define	EVL_MAKETAG(vlid, pri, cfi)					\
+	((((((pri) & 7) << 1) | ((cfi) & 1)) << 12) | ((vlid) & EVL_VLID_MASK))
 
 /*
  *  NOTE: 0x0000-0x05DC (0..1500) are generally IEEE 802.3 length fields.
@@ -314,6 +336,7 @@ struct ether_addr {
 #define	ETHERTYPE_SLOW		0x8809	/* 802.3ad link aggregation (LACP) */
 #define	ETHERTYPE_PPP		0x880B	/* PPP (obsolete by PPPoE) */
 #define	ETHERTYPE_HITACHI	0x8820	/* Hitachi Cable (Optoelectronic Systems Laboratory) */
+#define ETHERTYPE_TEST		0x8822  /* Network Conformance Testing */
 #define	ETHERTYPE_MPLS		0x8847	/* MPLS Unicast */
 #define	ETHERTYPE_MPLS_MCAST	0x8848	/* MPLS Multicast */
 #define	ETHERTYPE_AXIS		0x8856	/* Axis Communications AB proprietary bootstrap/config */
@@ -375,8 +398,8 @@ extern	void ether_demux(struct ifnet *, struct mbuf *);
 extern	void ether_ifattach(struct ifnet *, const u_int8_t *);
 extern	void ether_ifdetach(struct ifnet *);
 extern	int  ether_ioctl(struct ifnet *, u_long, caddr_t);
-extern	int  ether_output(struct ifnet *,
-		   struct mbuf *, struct sockaddr *, struct route *);
+extern	int  ether_output(struct ifnet *, struct mbuf *,
+	    const struct sockaddr *, struct route *);
 extern	int  ether_output_frame(struct ifnet *, struct mbuf *);
 extern	char *ether_sprintf(const u_int8_t *);
 void	ether_vlan_mtap(struct bpf_if *, struct mbuf *,
diff --git a/freebsd/sys/net/flowtable.h b/freebsd/sys/net/flowtable.h
index d810fa33..5a1d9273 100644
--- a/freebsd/sys/net/flowtable.h
+++ b/freebsd/sys/net/flowtable.h
@@ -1,83 +1,56 @@
-/**************************************************************************
-
-Copyright (c) 2008-2010, BitGravity Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Neither the name of the BitGravity Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-$FreeBSD$
-
-***************************************************************************/
+/*-
+ * Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org>
+ * Copyright (c) 2008-2010, BitGravity Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Neither the name of the BitGravity Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
 
 #ifndef	_NET_FLOWTABLE_H_
 #define	_NET_FLOWTABLE_H_
 
-#ifdef	_KERNEL
-
-#define	FL_HASH_ALL	(1<<0)	/* hash 4-tuple + protocol */
-#define	FL_PCPU		(1<<1)	/* pcpu cache */
-#define	FL_NOAUTO	(1<<2)	/* don't automatically add flentry on miss */
-#define FL_IPV6  	(1<<9)
-
-#define	FL_TCP		(1<<11)
-#define	FL_SCTP		(1<<12)
-#define	FL_UDP		(1<<13)
-#define	FL_DEBUG	(1<<14)
-#define	FL_DEBUG_ALL	(1<<15)
-
-struct flowtable;
-struct flentry;
-struct route;
-struct route_in6;
+struct flowtable_stat {
+	uint64_t	ft_collisions;
+	uint64_t	ft_misses;
+	uint64_t	ft_free_checks;
+	uint64_t	ft_frees;
+	uint64_t	ft_hits;
+	uint64_t	ft_lookups;
+	uint64_t	ft_fail_lle_invalid;
+	uint64_t	ft_inserts;
+};
 
-VNET_DECLARE(struct flowtable *, ip_ft);
-#define	V_ip_ft			VNET(ip_ft)
-
-VNET_DECLARE(struct flowtable *, ip6_ft);
-#define	V_ip6_ft		VNET(ip6_ft)
-
-struct flowtable *flowtable_alloc(char *name, int nentry, int flags);
+#ifdef	_KERNEL
 
 /*
- * Given a flow table, look up the L3 and L2 information and
- * return it in the route.
- *
+ * Given a flow table, look up the L3 and L2 information
+ * and return it in the route.
  */
-struct flentry *flowtable_lookup_mbuf(struct flowtable *ft, struct mbuf *m, int af);
-
-struct flentry *flowtable_lookup(struct flowtable *ft, struct sockaddr_storage *ssa,
-    struct sockaddr_storage *dsa, uint32_t fibnum, int flags);
-
-int kern_flowtable_insert(struct flowtable *ft, struct sockaddr_storage *ssa,
-    struct sockaddr_storage *dsa, struct route *ro, uint32_t fibnum, int flags);
-
-void flow_invalidate(struct flentry *fl);
-void flowtable_route_flush(struct flowtable *ft, struct rtentry *rt);
-
-void flow_to_route(struct flentry *fl, struct route *ro);
-
-void flow_to_route_in6(struct flentry *fl, struct route_in6 *ro);
-
+int flowtable_lookup(sa_family_t, struct mbuf *, struct route *);
+void flowtable_route_flush(sa_family_t, struct rtentry *);
 
 #endif /* _KERNEL */
-#endif
+#endif /* !_NET_FLOWTABLE_H_ */
diff --git a/freebsd/sys/net/ieee8023ad_lacp.c b/freebsd/sys/net/ieee8023ad_lacp.c
index 5172ad54..619db8af 100644
--- a/freebsd/sys/net/ieee8023ad_lacp.c
+++ b/freebsd/sys/net/ieee8023ad_lacp.c
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
 
 #include <rtems/bsd/sys/param.h>
 #include <sys/callout.h>
+#include <sys/eventhandler.h>
 #include <sys/mbuf.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
@@ -44,8 +45,10 @@ __FBSDID("$FreeBSD$");
 #include <machine/stdarg.h>
 #include <rtems/bsd/sys/lock.h>
 #include <sys/rwlock.h>
+#include <sys/taskqueue.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/ethernet.h>
 #include <net/if_media.h>
@@ -189,30 +192,37 @@ static const char *lacp_format_portid(const struct lacp_portid *, char *,
 static void	lacp_dprintf(const struct lacp_port *, const char *, ...)
 		    __attribute__((__format__(__printf__, 2, 3)));
 
-static int lacp_debug = 0;
-SYSCTL_INT(_net, OID_AUTO, lacp_debug, CTLFLAG_RW | CTLFLAG_TUN,
-    &lacp_debug, 0, "Enable LACP debug logging (1=debug, 2=trace)");
-TUNABLE_INT("net.lacp_debug", &lacp_debug);
+static VNET_DEFINE(int, lacp_debug);
+#define	V_lacp_debug	VNET(lacp_debug)
+SYSCTL_NODE(_net_link_lagg, OID_AUTO, lacp, CTLFLAG_RD, 0, "ieee802.3ad");
+SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, debug, CTLFLAG_RWTUN | CTLFLAG_VNET,
+    &VNET_NAME(lacp_debug), 0, "Enable LACP debug logging (1=debug, 2=trace)");
 
-#define LACP_DPRINTF(a) if (lacp_debug > 0) { lacp_dprintf a ; }
-#define LACP_TRACE(a) if (lacp_debug > 1) { lacp_dprintf(a,"%s\n",__func__); }
+static VNET_DEFINE(int, lacp_default_strict_mode) = 1;
+SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, default_strict_mode, CTLFLAG_RWTUN,
+    &VNET_NAME(lacp_default_strict_mode), 0,
+    "LACP strict protocol compliance default");
+
+#define LACP_DPRINTF(a) if (V_lacp_debug & 0x01) { lacp_dprintf a ; }
+#define LACP_TRACE(a) if (V_lacp_debug & 0x02) { lacp_dprintf(a,"%s\n",__func__); }
+#define LACP_TPRINTF(a) if (V_lacp_debug & 0x04) { lacp_dprintf a ; }
 
 /*
  * partner administration variables.
  * XXX should be configurable.
  */
 
-static const struct lacp_peerinfo lacp_partner_admin = {
+static const struct lacp_peerinfo lacp_partner_admin_optimistic = {
 	.lip_systemid = { .lsi_prio = 0xffff },
 	.lip_portid = { .lpi_prio = 0xffff },
-#if 1
-	/* optimistic */
 	.lip_state = LACP_STATE_SYNC | LACP_STATE_AGGREGATION |
 	    LACP_STATE_COLLECTING | LACP_STATE_DISTRIBUTING,
-#else
-	/* pessimistic */
+};
+
+static const struct lacp_peerinfo lacp_partner_admin_strict = {
+	.lip_systemid = { .lsi_prio = 0xffff },
+	.lip_portid = { .lpi_prio = 0xffff },
 	.lip_state = 0,
-#endif
 };
 
 static const lacp_timer_func_t lacp_timer_funcs[LACP_NTIMER] = {
@@ -298,11 +308,16 @@ lacp_pdu_input(struct lacp_port *lp, struct mbuf *m)
 		goto bad;
 	}
 
-        if (lacp_debug > 0) {
+        if (V_lacp_debug > 0) {
 		lacp_dprintf(lp, "lacpdu receive\n");
 		lacp_dump_lacpdu(du);
 	}
 
+	if ((1 << lp->lp_ifp->if_dunit) & lp->lp_lsc->lsc_debug.lsc_rx_test) {
+		LACP_TPRINTF((lp, "Dropping RX PDU\n"));
+		goto bad;
+	}
+
 	LACP_LOCK(lsc);
 	lacp_sm_rx(lp, du);
 	LACP_UNLOCK(lsc);
@@ -350,7 +365,7 @@ lacp_xmit_lacpdu(struct lacp_port *lp)
 
 	LACP_LOCK_ASSERT(lp->lp_lsc);
 
-	m = m_gethdr(M_DONTWAIT, MT_DATA);
+	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL) {
 		return (ENOMEM);
 	}
@@ -378,7 +393,7 @@ lacp_xmit_lacpdu(struct lacp_port *lp)
 	    sizeof(du->ldu_collector));
 	du->ldu_collector.lci_maxdelay = 0;
 
-	if (lacp_debug > 0) {
+	if (V_lacp_debug > 0) {
 		lacp_dprintf(lp, "lacpdu transmit\n");
 		lacp_dump_lacpdu(du);
 	}
@@ -404,7 +419,7 @@ lacp_xmit_marker(struct lacp_port *lp)
 
 	LACP_LOCK_ASSERT(lp->lp_lsc);
 
-	m = m_gethdr(M_DONTWAIT, MT_DATA);
+	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL) {
 		return (ENOMEM);
 	}
@@ -490,12 +505,14 @@ lacp_tick(void *arg)
 		if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0)
 			continue;
 
+		CURVNET_SET(lp->lp_ifp->if_vnet);
 		lacp_run_timers(lp);
 
 		lacp_select(lp);
 		lacp_sm_mux(lp);
 		lacp_sm_tx(lp);
 		lacp_sm_ptx_tx_schedule(lp);
+		CURVNET_RESTORE();
 	}
 	callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
 }
@@ -512,20 +529,17 @@ lacp_port_create(struct lagg_port *lgp)
 	int error;
 
 	boolean_t active = TRUE; /* XXX should be configurable */
-	boolean_t fast = FALSE; /* XXX should be configurable */
+	boolean_t fast = FALSE; /* Configurable via ioctl */ 
 
-	bzero((char *)&sdl, sizeof(sdl));
-	sdl.sdl_len = sizeof(sdl);
-	sdl.sdl_family = AF_LINK;
-	sdl.sdl_index = ifp->if_index;
-	sdl.sdl_type = IFT_ETHER;
+	link_init_sdl(ifp, (struct sockaddr *)&sdl, IFT_ETHER);
 	sdl.sdl_alen = ETHER_ADDR_LEN;
 
 	bcopy(&ethermulticastaddr_slowprotocols,
 	    LLADDR(&sdl), ETHER_ADDR_LEN);
 	error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
 	if (error) {
-		printf("%s: ADDMULTI failed on %s\n", __func__, lgp->lp_ifname);
+		printf("%s: ADDMULTI failed on %s\n", __func__,
+		    lgp->lp_ifp->if_xname);
 		return (error);
 	}
 
@@ -535,7 +549,7 @@ lacp_port_create(struct lagg_port *lgp)
 		return (ENOMEM);
 
 	LACP_LOCK(lsc);
-	lgp->lp_psc = (caddr_t)lp;
+	lgp->lp_psc = lp;
 	lp->lp_ifp = ifp;
 	lp->lp_lagg = lgp;
 	lp->lp_lsc = lsc;
@@ -572,17 +586,18 @@ lacp_port_destroy(struct lagg_port *lgp)
 	lacp_disable_distributing(lp);
 	lacp_unselect(lp);
 
+	LIST_REMOVE(lp, lp_next);
+	LACP_UNLOCK(lsc);
+
 	/* The address may have already been removed by if_purgemaddrs() */
 	if (!lgp->lp_detaching)
 		if_delmulti_ifma(lp->lp_ifma);
 
-	LIST_REMOVE(lp, lp_next);
-	LACP_UNLOCK(lsc);
 	free(lp, M_DEVBUF);
 }
 
 void
-lacp_req(struct lagg_softc *sc, caddr_t data)
+lacp_req(struct lagg_softc *sc, void *data)
 {
 	struct lacp_opreq *req = (struct lacp_opreq *)data;
 	struct lacp_softc *lsc = LACP_SOFTC(sc);
@@ -590,7 +605,7 @@ lacp_req(struct lagg_softc *sc, caddr_t data)
 
 	bzero(req, sizeof(struct lacp_opreq));
 	
-	/* 
+	/*
 	 * If the LACP softc is NULL, return with the opreq structure full of
 	 * zeros.  It is normal for the softc to be NULL while the lagg is
 	 * being destroyed.
@@ -621,7 +636,7 @@ lacp_req(struct lagg_softc *sc, caddr_t data)
 }
 
 void
-lacp_portreq(struct lagg_port *lgp, caddr_t data)
+lacp_portreq(struct lagg_port *lgp, void *data)
 {
 	struct lacp_opreq *req = (struct lacp_opreq *)data;
 	struct lacp_port *lp = LACP_PORT(lgp);
@@ -665,6 +680,7 @@ lacp_disable_distributing(struct lacp_port *lp)
 {
 	struct lacp_aggregator *la = lp->lp_aggregator;
 	struct lacp_softc *lsc = lp->lp_lsc;
+	struct lagg_softc *sc = lsc->lsc_softc;
 	char buf[LACP_LAGIDSTR_MAX+1];
 
 	LACP_LOCK_ASSERT(lsc);
@@ -684,6 +700,7 @@ lacp_disable_distributing(struct lacp_port *lp)
 
 	TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q);
 	la->la_nports--;
+	sc->sc_active = la->la_nports;
 
 	if (lsc->lsc_active_aggregator == la) {
 		lacp_suppress_distributing(lsc, la);
@@ -700,6 +717,7 @@ lacp_enable_distributing(struct lacp_port *lp)
 {
 	struct lacp_aggregator *la = lp->lp_aggregator;
 	struct lacp_softc *lsc = lp->lp_lsc;
+	struct lagg_softc *sc = lsc->lsc_softc;
 	char buf[LACP_LAGIDSTR_MAX+1];
 
 	LACP_LOCK_ASSERT(lsc);
@@ -716,6 +734,7 @@ lacp_enable_distributing(struct lacp_port *lp)
 	KASSERT(la->la_refcnt > la->la_nports, ("aggregator refcnt invalid"));
 	TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q);
 	la->la_nports++;
+	sc->sc_active = la->la_nports;
 
 	lp->lp_state |= LACP_STATE_DISTRIBUTING;
 
@@ -734,26 +753,26 @@ lacp_transit_expire(void *vp)
 
 	LACP_LOCK_ASSERT(lsc);
 
+	CURVNET_SET(lsc->lsc_softc->sc_ifp->if_vnet);
 	LACP_TRACE(NULL);
+	CURVNET_RESTORE();
 
 	lsc->lsc_suppress_distributing = FALSE;
 }
 
-int
+void
 lacp_attach(struct lagg_softc *sc)
 {
 	struct lacp_softc *lsc;
 
-	lsc = malloc(sizeof(struct lacp_softc),
-	    M_DEVBUF, M_NOWAIT|M_ZERO);
-	if (lsc == NULL)
-		return (ENOMEM);
+	lsc = malloc(sizeof(struct lacp_softc), M_DEVBUF, M_WAITOK | M_ZERO);
 
-	sc->sc_psc = (caddr_t)lsc;
+	sc->sc_psc = lsc;
 	lsc->lsc_softc = sc;
 
-	lsc->lsc_hashkey = arc4random();
+	lsc->lsc_hashkey = m_ether_tcpip_hash_init();
 	lsc->lsc_active_aggregator = NULL;
+	lsc->lsc_strict_mode = VNET(lacp_default_strict_mode);
 	LACP_LOCK_INIT(lsc);
 	TAILQ_INIT(&lsc->lsc_aggregators);
 	LIST_INIT(&lsc->lsc_ports);
@@ -764,27 +783,23 @@ lacp_attach(struct lagg_softc *sc)
 	/* if the lagg is already up then do the same */
 	if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
 		lacp_init(sc);
-
-	return (0);
 }
 
-int
-lacp_detach(struct lagg_softc *sc)
+void
+lacp_detach(void *psc)
 {
-	struct lacp_softc *lsc = LACP_SOFTC(sc);
+	struct lacp_softc *lsc = (struct lacp_softc *)psc;
 
 	KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators),
 	    ("aggregators still active"));
 	KASSERT(lsc->lsc_active_aggregator == NULL,
 	    ("aggregator still attached"));
 
-	sc->sc_psc = NULL;
 	callout_drain(&lsc->lsc_transit_callout);
 	callout_drain(&lsc->lsc_callout);
 
 	LACP_LOCK_DESTROY(lsc);
 	free(lsc, M_DEVBUF);
-	return (0);
 }
 
 void
@@ -827,10 +842,11 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
 		return (NULL);
 	}
 
-	if (sc->use_flowid && (m->m_flags & M_FLOWID))
-		hash = m->m_pkthdr.flowid;
+	if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
+	    M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+		hash = m->m_pkthdr.flowid >> sc->flowid_shift;
 	else
-		hash = lagg_hashmbuf(sc, m, lsc->lsc_hashkey);
+		hash = m_ether_tcpip_hash(sc->sc_flags, m, lsc->lsc_hashkey);
 	hash %= pm->pm_count;
 	lp = pm->pm_map[hash];
 
@@ -920,7 +936,6 @@ lacp_aggregator_bandwidth(struct lacp_aggregator *la)
 static void
 lacp_select_active_aggregator(struct lacp_softc *lsc)
 {
-	struct lagg_softc *sc = lsc->lsc_softc;
 	struct lacp_aggregator *la;
 	struct lacp_aggregator *best_la = NULL;
 	uint64_t best_speed = 0;
@@ -940,13 +955,13 @@ lacp_select_active_aggregator(struct lacp_softc *lsc)
 		    lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
 		    speed, la->la_nports));
 
-		/* This aggregator is chosen if
-		 *      the partner has a better system priority
-		 *  or, the total aggregated speed is higher
-		 *  or, it is already the chosen aggregator
+		/*
+		 * This aggregator is chosen if the partner has a better
+		 * system priority or, the total aggregated speed is higher
+		 * or, it is already the chosen aggregator
 		 */
 		if ((best_la != NULL && LACP_SYS_PRI(la->la_partner) <
-		     LACP_SYS_PRI(best_la->la_partner)) ||
+		    LACP_SYS_PRI(best_la->la_partner)) ||
 		    speed > best_speed ||
 		    (speed == best_speed &&
 		    la == lsc->lsc_active_aggregator)) {
@@ -972,7 +987,6 @@ lacp_select_active_aggregator(struct lacp_softc *lsc)
 	    lacp_format_lagid_aggregator(best_la, buf, sizeof(buf))));
 
 	if (lsc->lsc_active_aggregator != best_la) {
-		sc->sc_ifp->if_baudrate = best_speed;
 		lsc->lsc_active_aggregator = best_la;
 		lacp_update_portmap(lsc);
 		if (best_la) {
@@ -988,15 +1002,18 @@ lacp_select_active_aggregator(struct lacp_softc *lsc)
 static void
 lacp_update_portmap(struct lacp_softc *lsc)
 {
+	struct lagg_softc *sc = lsc->lsc_softc;
 	struct lacp_aggregator *la;
 	struct lacp_portmap *p;
 	struct lacp_port *lp;
+	uint64_t speed;
 	u_int newmap;
 	int i;
 
 	newmap = lsc->lsc_activemap == 0 ? 1 : 0;
 	p = &lsc->lsc_pmap[newmap];
 	la = lsc->lsc_active_aggregator;
+	speed = 0;
 	bzero(p, sizeof(struct lacp_portmap));
 
 	if (la != NULL && la->la_nports > 0) {
@@ -1005,7 +1022,9 @@ lacp_update_portmap(struct lacp_softc *lsc)
 		TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q)
 			p->pm_map[i++] = lp;
 		KASSERT(i == p->pm_count, ("Invalid port count"));
+		speed = lacp_aggregator_bandwidth(la);
 	}
+	sc->sc_ifp->if_baudrate = speed;
 
 	/* switch the active portmap over */
 	atomic_store_rel_int(&lsc->lsc_activemap, newmap);
@@ -1054,12 +1073,16 @@ lacp_compose_key(struct lacp_port *lp)
 		case IFM_100_T4:
 		case IFM_100_VG:
 		case IFM_100_T2:
+		case IFM_100_T:
 			key = IFM_100_TX;
 			break;
 		case IFM_1000_SX:
 		case IFM_1000_LX:
 		case IFM_1000_CX:
 		case IFM_1000_T:
+		case IFM_1000_KX:
+		case IFM_1000_SGMII:
+		case IFM_1000_CX_SGMII:
 			key = IFM_1000_SX;
 			break;
 		case IFM_10G_LR:
@@ -1069,15 +1092,53 @@ lacp_compose_key(struct lacp_port *lp)
 		case IFM_10G_TWINAX_LONG:
 		case IFM_10G_LRM:
 		case IFM_10G_T:
+		case IFM_10G_KX4:
+		case IFM_10G_KR:
+		case IFM_10G_CR1:
+		case IFM_10G_ER:
+		case IFM_10G_SFI:
 			key = IFM_10G_LR;
 			break;
+		case IFM_20G_KR2:
+			key = IFM_20G_KR2;
+			break;
+		case IFM_2500_KX:
+		case IFM_2500_T:
+			key = IFM_2500_KX;
+			break;
+		case IFM_5000_T:
+			key = IFM_5000_T;
+			break;
+		case IFM_50G_PCIE:
+		case IFM_50G_CR2:
+		case IFM_50G_KR2:
+			key = IFM_50G_PCIE;
+			break;
+		case IFM_56G_R4:
+			key = IFM_56G_R4;
+			break;
+		case IFM_25G_PCIE:
+		case IFM_25G_CR:
+		case IFM_25G_KR:
+		case IFM_25G_SR:
+			key = IFM_25G_PCIE;
+			break;
 		case IFM_40G_CR4:
 		case IFM_40G_SR4:
 		case IFM_40G_LR4:
+		case IFM_40G_XLPPI:
+		case IFM_40G_KR4:
 			key = IFM_40G_CR4;
 			break;
+		case IFM_100G_CR4:
+		case IFM_100G_SR4:
+		case IFM_100G_KR4:
+		case IFM_100G_LR4:
+			key = IFM_100G_CR4;
+			break;
 		default:
 			key = subtype;
+			break;
 		}
 		/* bit 5..14:	(some bits of) if_index of lagg device */
 		key |= 0x7fe0 & ((sc->sc_ifp->if_index) << 5);
@@ -1313,6 +1374,8 @@ lacp_unselect(struct lacp_port *lp)
 static void
 lacp_sm_mux(struct lacp_port *lp)
 {
+	struct lagg_port *lgp = lp->lp_lagg;
+	struct lagg_softc *sc = lgp->lp_softc;
 	enum lacp_mux_state new_state;
 	boolean_t p_sync =
 		    (lp->lp_partner.lip_state & LACP_STATE_SYNC) != 0;
@@ -1321,8 +1384,10 @@ lacp_sm_mux(struct lacp_port *lp)
 	enum lacp_selected selected = lp->lp_selected;
 	struct lacp_aggregator *la;
 
-	if (lacp_debug > 1)
-		lacp_dprintf(lp, "%s: state %d\n", __func__, lp->lp_mux_state);
+	if (V_lacp_debug > 1)
+		lacp_dprintf(lp, "%s: state= 0x%x, selected= 0x%x, "
+		    "p_sync= 0x%x, p_collecting= 0x%x\n", __func__,
+		    lp->lp_mux_state, selected, p_sync, p_collecting);
 
 re_eval:
 	la = lp->lp_aggregator;
@@ -1362,6 +1427,8 @@ re_eval:
 	case LACP_MUX_DISTRIBUTING:
 		if (selected != LACP_SELECTED || !p_sync || !p_collecting) {
 			new_state = LACP_MUX_COLLECTING;
+			lacp_dprintf(lp, "Interface stopped DISTRIBUTING, possible flapping\n");
+			sc->sc_flapping++;
 		}
 		break;
 	default:
@@ -1610,6 +1677,10 @@ lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du)
 		    sizeof(buf))));
 	}
 
+	/* XXX Hack, still need to implement 5.4.9 para 2,3,4 */
+	if (lp->lp_lsc->lsc_strict_mode)
+		lp->lp_partner.lip_state |= LACP_STATE_SYNC;
+
 	lacp_sm_ptx_update_timeout(lp, oldpstate);
 }
 
@@ -1635,7 +1706,10 @@ lacp_sm_rx_record_default(struct lacp_port *lp)
 	LACP_TRACE(lp);
 
 	oldpstate = lp->lp_partner.lip_state;
-	lp->lp_partner = lacp_partner_admin;
+	if (lp->lp_lsc->lsc_strict_mode)
+		lp->lp_partner = lacp_partner_admin_strict;
+	else
+		lp->lp_partner = lacp_partner_admin_optimistic;
 	lp->lp_state |= LACP_STATE_DEFAULTED;
 	lacp_sm_ptx_update_timeout(lp, oldpstate);
 }
@@ -1670,7 +1744,12 @@ lacp_sm_rx_update_default_selected(struct lacp_port *lp)
 
 	LACP_TRACE(lp);
 
-	lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin);
+	if (lp->lp_lsc->lsc_strict_mode)
+		lacp_sm_rx_update_selected_from_peerinfo(lp,
+		    &lacp_partner_admin_strict);
+	else
+		lacp_sm_rx_update_selected_from_peerinfo(lp,
+		    &lacp_partner_admin_optimistic);
 }
 
 /* transmit machine */
@@ -1678,7 +1757,7 @@ lacp_sm_rx_update_default_selected(struct lacp_port *lp)
 static void
 lacp_sm_tx(struct lacp_port *lp)
 {
-	int error;
+	int error = 0;
 
 	if (!(lp->lp_state & LACP_STATE_AGGREGATION)
 #if 1
@@ -1700,7 +1779,11 @@ lacp_sm_tx(struct lacp_port *lp)
 		return;
 	}
 
-	error = lacp_xmit_lacpdu(lp);
+	if (((1 << lp->lp_ifp->if_dunit) & lp->lp_lsc->lsc_debug.lsc_tx_test) == 0) {
+		error = lacp_xmit_lacpdu(lp);
+	} else {
+		LACP_TPRINTF((lp, "Dropping TX PDU\n"));
+	}
 
 	if (error == 0) {
 		lp->lp_flags &= ~LACP_PORT_NTT;
diff --git a/freebsd/sys/net/ieee8023ad_lacp.h b/freebsd/sys/net/ieee8023ad_lacp.h
index 9cebc591..8f0f51a7 100644
--- a/freebsd/sys/net/ieee8023ad_lacp.h
+++ b/freebsd/sys/net/ieee8023ad_lacp.h
@@ -75,6 +75,7 @@
 	"\007DEFAULTED"		\
 	"\010EXPIRED"
 
+#ifdef _KERNEL
 /*
  * IEEE802.3 slow protocols
  *
@@ -245,6 +246,12 @@ struct lacp_softc {
 	struct lacp_portmap	lsc_pmap[2];
 	volatile u_int		lsc_activemap;
 	u_int32_t		lsc_hashkey;
+	struct {
+		u_int32_t	lsc_rx_test;
+		u_int32_t	lsc_tx_test;
+	} lsc_debug;
+	u_int32_t		lsc_strict_mode;
+	boolean_t		lsc_fast_timeout; /* if set, fast timeout */
 };
 
 #define	LACP_TYPE_ACTORINFO	1
@@ -277,15 +284,15 @@ struct lacp_softc {
 
 struct mbuf	*lacp_input(struct lagg_port *, struct mbuf *);
 struct lagg_port *lacp_select_tx_port(struct lagg_softc *, struct mbuf *);
-int		lacp_attach(struct lagg_softc *);
-int		lacp_detach(struct lagg_softc *);
+void		lacp_attach(struct lagg_softc *);
+void		lacp_detach(void *);
 void		lacp_init(struct lagg_softc *);
 void		lacp_stop(struct lagg_softc *);
 int		lacp_port_create(struct lagg_port *);
 void		lacp_port_destroy(struct lagg_port *);
 void		lacp_linkstate(struct lagg_port *);
-void		lacp_req(struct lagg_softc *, caddr_t);
-void		lacp_portreq(struct lagg_port *, caddr_t);
+void		lacp_req(struct lagg_softc *, void *);
+void		lacp_portreq(struct lagg_port *, void *);
 
 static __inline int
 lacp_isactive(struct lagg_port *lgp)
@@ -331,3 +338,4 @@ lacp_isdistributing(struct lagg_port *lgp)
 #define	LACP_LAGIDSTR_MAX	\
 	(1 + LACP_PARTNERSTR_MAX + 1 + LACP_PARTNERSTR_MAX + 1)
 #define	LACP_STATESTR_MAX	(255) /* XXX */
+#endif	/* _KERNEL */
diff --git a/freebsd/sys/net/if.c b/freebsd/sys/net/if.c
index 2c638a37..8bfa9e21 100644
--- a/freebsd/sys/net/if.c
+++ b/freebsd/sys/net/if.c
@@ -65,12 +65,16 @@
 #include <machine/stdarg.h>
 #include <vm/uma.h>
 
+#include <net/bpf.h>
+#include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/if_clone.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
+#include <net/if_media.h>
+#include <net/if_vlan_var.h>
 #include <net/radix.h>
 #include <net/route.h>
 #include <net/vnet.h>
@@ -97,14 +101,9 @@
 #include <compat/freebsd32/freebsd32.h>
 #endif
 
-struct ifindex_entry {
-	struct  ifnet *ife_ifnet;
-};
-
 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
 
-TUNABLE_INT("net.link.ifqmaxlen", &ifqmaxlen);
 SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN,
     &ifqmaxlen, 0, "max send queue size");
 
@@ -115,6 +114,13 @@ SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
 	&log_link_state_change, 0,
 	"log interface link state change events");
 
+/* Log promiscuous mode change events */
+static int log_promisc_mode_change = 1;
+
+SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN,
+	&log_promisc_mode_change, 1,
+	"log promiscuous mode change events");
+
 /* Interface description */
 static unsigned int ifdescr_maxlen = 1024;
 SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW,
@@ -132,18 +138,22 @@ void	(*ng_ether_link_state_p)(struct ifnet *ifp, int state);
 void	(*lagg_linkstate_p)(struct ifnet *ifp, int state);
 /* These are external hooks for CARP. */
 void	(*carp_linkstate_p)(struct ifnet *ifp);
+void	(*carp_demote_adj_p)(int, char *);
+int	(*carp_master_p)(struct ifaddr *);
 #if defined(INET) || defined(INET6)
-struct ifnet *(*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
+int	(*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
 int	(*carp_output_p)(struct ifnet *ifp, struct mbuf *m,
-    struct sockaddr *sa, struct rtentry *rt);
+    const struct sockaddr *sa);
+int	(*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);   
+int	(*carp_attach_p)(struct ifaddr *, int);
+void	(*carp_detach_p)(struct ifaddr *);
 #endif
 #ifdef INET
-int (*carp_iamatch_p)(struct ifnet *, struct in_ifaddr *, struct in_addr *,
-    u_int8_t **);
+int	(*carp_iamatch_p)(struct ifaddr *, uint8_t **);
 #endif
 #ifdef INET6
 struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6);
-caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
+caddr_t	(*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
     const struct in6_addr *taddr);
 #endif
 
@@ -158,23 +168,25 @@ static void	if_attachdomain(void *);
 static void	if_attachdomain1(struct ifnet *);
 static int	ifconf(u_long, caddr_t);
 static void	if_freemulti(struct ifmultiaddr *);
-static void	if_init(void *);
 static void	if_grow(void);
 static void	if_input_default(struct ifnet *, struct mbuf *);
+static int	if_requestencap_default(struct ifnet *, struct if_encap_req *);
 static void	if_route(struct ifnet *, int flag, int fam);
 static int	if_setflag(struct ifnet *, int, int, int *, int);
 static int	if_transmit(struct ifnet *ifp, struct mbuf *m);
 static void	if_unroute(struct ifnet *, int flag, int fam);
 static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
-static int	if_rtdel(struct radix_node *, void *);
 static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
 static int	if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
 static void	do_link_state_change(void *, int);
 static int	if_getgroup(struct ifgroupreq *, struct ifnet *);
 static int	if_getgroupmembers(struct ifgroupreq *);
 static void	if_delgroups(struct ifnet *);
-static void	if_attach_internal(struct ifnet *, int);
-static void	if_detach_internal(struct ifnet *, int);
+static void	if_attach_internal(struct ifnet *, int, struct if_clone *);
+static int	if_detach_internal(struct ifnet *, int, struct if_clone **);
+#ifdef VIMAGE
+static void	if_vmove(struct ifnet *, struct vnet *);
+#endif
 
 #ifdef INET6
 /*
@@ -184,6 +196,10 @@ static void	if_detach_internal(struct ifnet *, int);
 extern void	nd6_setmtu(struct ifnet *);
 #endif
 
+/* ipsec helper hooks */
+VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]);
+VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]);
+
 VNET_DEFINE(int, if_index);
 int	ifqmaxlen = IFQ_MAXLEN;
 VNET_DEFINE(struct ifnethead, ifnet);	/* depend on static init XXX */
@@ -192,7 +208,7 @@ VNET_DEFINE(struct ifgrouphead, ifg_head);
 static VNET_DEFINE(int, if_indexlim) = 8;
 
 /* Table of ifnet by index. */
-VNET_DEFINE(struct ifindex_entry *, ifindex_table);
+VNET_DEFINE(struct ifnet **, ifindex_table);
 
 #define	V_if_indexlim		VNET(if_indexlim)
 #define	V_ifindex_table		VNET(ifindex_table)
@@ -207,7 +223,9 @@ VNET_DEFINE(struct ifindex_entry *, ifindex_table);
  * inversions and deadlocks.
  */
 struct rwlock ifnet_rwlock;
+RW_SYSINIT_FLAGS(ifnet_rw, &ifnet_rwlock, "ifnet_rw", RW_RECURSE);
 struct sx ifnet_sxlock;
+SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE);
 
 /*
  * The allocation of network interfaces is a rather non-atomic affair; we
@@ -229,9 +247,9 @@ ifnet_byindex_locked(u_short idx)
 
 	if (idx > V_if_index)
 		return (NULL);
-	if (V_ifindex_table[idx].ife_ifnet == IFNET_HOLD)
+	if (V_ifindex_table[idx] == IFNET_HOLD)
 		return (NULL);
-	return (V_ifindex_table[idx].ife_ifnet);
+	return (V_ifindex_table[idx]);
 }
 
 struct ifnet *
@@ -265,34 +283,30 @@ ifnet_byindex_ref(u_short idx)
  * Allocate an ifindex array entry; return 0 on success or an error on
  * failure.
  */
-static int
-ifindex_alloc_locked(u_short *idxp)
+static u_short
+ifindex_alloc(void)
 {
 	u_short idx;
 
 	IFNET_WLOCK_ASSERT();
-
 retry:
 	/*
 	 * Try to find an empty slot below V_if_index.  If we fail, take the
 	 * next slot.
 	 */
 	for (idx = 1; idx <= V_if_index; idx++) {
-		if (V_ifindex_table[idx].ife_ifnet == NULL)
+		if (V_ifindex_table[idx] == NULL)
 			break;
 	}
 
 	/* Catch if_index overflow. */
-	if (idx < 1)
-		return (ENOSPC);
 	if (idx >= V_if_indexlim) {
 		if_grow();
 		goto retry;
 	}
 	if (idx > V_if_index)
 		V_if_index = idx;
-	*idxp = idx;
-	return (0);
+	return (idx);
 }
 
 static void
@@ -301,9 +315,9 @@ ifindex_free_locked(u_short idx)
 
 	IFNET_WLOCK_ASSERT();
 
-	V_ifindex_table[idx].ife_ifnet = NULL;
+	V_ifindex_table[idx] = NULL;
 	while (V_if_index > 0 &&
-	    V_ifindex_table[V_if_index].ife_ifnet == NULL)
+	    V_ifindex_table[V_if_index] == NULL)
 		V_if_index--;
 }
 
@@ -322,7 +336,7 @@ ifnet_setbyindex_locked(u_short idx, struct ifnet *ifp)
 
 	IFNET_WLOCK_ASSERT();
 
-	V_ifindex_table[idx].ife_ifnet = ifp;
+	V_ifindex_table[idx] = ifp;
 }
 
 static void
@@ -337,11 +351,12 @@ ifnet_setbyindex(u_short idx, struct ifnet *ifp)
 struct ifaddr *
 ifaddr_byindex(u_short idx)
 {
-	struct ifaddr *ifa;
+	struct ifnet *ifp;
+	struct ifaddr *ifa = NULL;
 
 	IFNET_RLOCK_NOSLEEP();
-	ifa = ifnet_byindex_locked(idx)->if_addr;
-	if (ifa != NULL)
+	ifp = ifnet_byindex_locked(idx);
+	if (ifp != NULL && (ifa = ifp->if_addr) != NULL)
 		ifa_ref(ifa);
 	IFNET_RUNLOCK_NOSLEEP();
 	return (ifa);
@@ -368,17 +383,6 @@ vnet_if_init(const void *unused __unused)
 VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init,
     NULL);
 
-/* ARGSUSED*/
-static void
-if_init(void *dummy __unused)
-{
-
-	IFNET_LOCK_INIT();
-	if_clone_init();
-}
-SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL);
-
-
 #ifdef VIMAGE
 static void
 vnet_if_uninit(const void *unused __unused)
@@ -393,6 +397,20 @@ vnet_if_uninit(const void *unused __unused)
 }
 VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
     vnet_if_uninit, NULL);
+
+static void
+vnet_if_return(const void *unused __unused)
+{
+	struct ifnet *ifp, *nifp;
+
+	/* Return all inherited interfaces to their parent vnets. */
+	TAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) {
+		if (ifp->if_home_vnet != ifp->if_vnet)
+			if_vmove(ifp, ifp->if_home_vnet);
+	}
+}
+VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY,
+    vnet_if_return, NULL);
 #endif
 
 static void
@@ -400,7 +418,7 @@ if_grow(void)
 {
 	int oldlim;
 	u_int n;
-	struct ifindex_entry *e;
+	struct ifnet **e;
 
 	IFNET_WLOCK_ASSERT();
 	oldlim = V_if_indexlim;
@@ -433,16 +451,15 @@ if_alloc(u_char type)
 
 	ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
 	IFNET_WLOCK();
-	if (ifindex_alloc_locked(&idx) != 0) {
-		IFNET_WUNLOCK();
-		free(ifp, M_IFNET);
-		return (NULL);
-	}
+	idx = ifindex_alloc();
 	ifnet_setbyindex_locked(idx, IFNET_HOLD);
 	IFNET_WUNLOCK();
 	ifp->if_index = idx;
 	ifp->if_type = type;
 	ifp->if_alloctype = type;
+#ifdef VIMAGE
+	ifp->if_vnet = curvnet;
+#endif
 	if (if_com_alloc[type] != NULL) {
 		ifp->if_l2com = if_com_alloc[type](type, ifp);
 		if (ifp->if_l2com == NULL) {
@@ -457,7 +474,6 @@ if_alloc(u_char type)
 	ifp->if_afdata_initialized = 0;
 	IF_AFDATA_LOCK_INIT(ifp);
 	TAILQ_INIT(&ifp->if_addrhead);
-	TAILQ_INIT(&ifp->if_prefixhead);
 	TAILQ_INIT(&ifp->if_multiaddrs);
 	TAILQ_INIT(&ifp->if_groups);
 #ifdef MAC
@@ -466,6 +482,9 @@ if_alloc(u_char type)
 	ifq_init(&ifp->if_snd, ifp);
 
 	refcount_init(&ifp->if_refcount, 1);	/* Index reference. */
+	for (int i = 0; i < IFCOUNTERS; i++)
+		ifp->if_counters[i] = counter_u64_alloc(M_WAITOK);
+	ifp->if_get_counter = if_get_counter_default;
 	ifnet_setbyindex(ifp->if_index, ifp);
 	return (ifp);
 }
@@ -494,23 +513,20 @@ if_free_internal(struct ifnet *ifp)
 	IF_AFDATA_DESTROY(ifp);
 	IF_ADDR_LOCK_DESTROY(ifp);
 	ifq_delete(&ifp->if_snd);
+
+	for (int i = 0; i < IFCOUNTERS; i++)
+		counter_u64_free(ifp->if_counters[i]);
+
 	free(ifp, M_IFNET);
 }
 
 /*
- * This version should only be called by intefaces that switch their type
- * after calling if_alloc().  if_free_type() will go away again now that we
- * have if_alloctype to cache the original allocation type.  For now, assert
- * that they match, since we require that in practice.
+ * Deregister an interface and free the associated storage.
  */
 void
-if_free_type(struct ifnet *ifp, u_char type)
+if_free(struct ifnet *ifp)
 {
 
-	KASSERT(ifp->if_alloctype == type,
-	    ("if_free_type: type (%d) != alloctype (%d)", type,
-	    ifp->if_alloctype));
-
 	ifp->if_flags |= IFF_DYING;			/* XXX: Locking */
 
 	CURVNET_SET_QUIET(ifp->if_vnet);
@@ -527,18 +543,6 @@ if_free_type(struct ifnet *ifp, u_char type)
 }
 
 /*
- * This is the normal version of if_free(), used by device drivers to free a
- * detached network interface.  The contents of if_free_type() will move into
- * here when if_free_type() goes away.
- */
-void
-if_free(struct ifnet *ifp)
-{
-
-	if_free_type(ifp, ifp->if_alloctype);
-}
-
-/*
  * Interfaces to keep an ifnet type-stable despite the possibility of the
  * driver calling if_free().  If there are additional references, we defer
  * freeing the underlying data structure.
@@ -583,12 +587,21 @@ ifq_delete(struct ifaltq *ifq)
 }
 
 /*
- * Perform generic interface initalization tasks and attach the interface
+ * Perform generic interface initialization tasks and attach the interface
  * to the list of "active" interfaces.  If vmove flag is set on entry
  * to if_attach_internal(), perform only a limited subset of initialization
  * tasks, given that we are moving from one vnet to another an ifnet which
  * has already been fully initialized.
  *
+ * Note that if_detach_internal() removes group membership unconditionally
+ * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL.
+ * Thus, when if_vmove() is applied to a cloned interface, group membership
+ * is lost while a cloned one always joins a group whose name is
+ * ifc->ifc_name.  To recover this after if_detach_internal() and
+ * if_attach_internal(), the cloner should be specified to
+ * if_attach_internal() via ifc.  If it is non-NULL, if_attach_internal()
+ * attempts to join a group whose name is ifc->ifc_name.
+ *
  * XXX:
  *  - The decision to return void and thus require this function to
  *    succeed is questionable.
@@ -599,14 +612,14 @@ void
 if_attach(struct ifnet *ifp)
 {
 
-	if_attach_internal(ifp, 0);
+	if_attach_internal(ifp, 0, NULL);
 }
 
 /*
  * Compute the least common TSO limit.
  */
 void
-if_hw_tsomax_common(struct ifnet *ifp, struct ifnet_hw_tsomax *pmax)
+if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax)
 {
 	/*
 	 * 1) If there is no limit currently, take the limit from
@@ -635,7 +648,7 @@ if_hw_tsomax_common(struct ifnet *ifp, struct ifnet_hw_tsomax *pmax)
  * Returns zero if no change. Else non-zero.
  */
 int
-if_hw_tsomax_update(struct ifnet *ifp, struct ifnet_hw_tsomax *pmax)
+if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax)
 {
 	int retval = 0;
 	if (ifp->if_hw_tsomax != pmax->tsomaxbytes) {
@@ -654,7 +667,7 @@ if_hw_tsomax_update(struct ifnet *ifp, struct ifnet_hw_tsomax *pmax)
 }
 
 static void
-if_attach_internal(struct ifnet *ifp, int vmove)
+if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc)
 {
 	unsigned socksize, ifasize;
 	int namelen, masklen;
@@ -673,9 +686,12 @@ if_attach_internal(struct ifnet *ifp, int vmove)
 
 	if_addgroup(ifp, IFG_ALL);
 
+	/* Restore group membership for cloned interfaces. */
+	if (vmove && ifc != NULL)
+		if_clone_addgroup(ifp, ifc);
+
 	getmicrotime(&ifp->if_lastchange);
-	ifp->if_data.ifi_epoch = time_uptime;
-	ifp->if_data.ifi_datalen = sizeof(struct if_data);
+	ifp->if_epoch = time_uptime;
 
 	KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) ||
 	    (ifp->if_transmit != NULL && ifp->if_qflush != NULL),
@@ -687,6 +703,9 @@ if_attach_internal(struct ifnet *ifp, int vmove)
 	if (ifp->if_input == NULL)
 		ifp->if_input = if_input_default;
 
+	if (ifp->if_requestencap == NULL)
+		ifp->if_requestencap = if_requestencap_default;
+
 	if (!vmove) {
 #ifdef MAC
 		mac_ifnet_create(ifp);
@@ -706,8 +725,7 @@ if_attach_internal(struct ifnet *ifp, int vmove)
 			socksize = sizeof(*sdl);
 		socksize = roundup2(socksize, sizeof(long));
 		ifasize = sizeof(*ifa) + 2 * socksize;
-		ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
-		ifa_init(ifa);
+		ifa = ifa_alloc(ifasize, M_WAITOK);
 		sdl = (struct sockaddr_dl *)(ifa + 1);
 		sdl->sdl_len = socksize;
 		sdl->sdl_family = AF_LINK;
@@ -792,12 +810,9 @@ static void
 if_attachdomain(void *dummy)
 {
 	struct ifnet *ifp;
-	int s;
 
-	s = splnet();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
 		if_attachdomain1(ifp);
-	splx(s);
 }
 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
     if_attachdomain, NULL);
@@ -806,23 +821,16 @@ static void
 if_attachdomain1(struct ifnet *ifp)
 {
 	struct domain *dp;
-	int s;
-
-	s = splnet();
 
 	/*
 	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
 	 * cannot lock ifp->if_afdata initialization, entirely.
 	 */
-	if (IF_AFDATA_TRYLOCK(ifp) == 0) {
-		splx(s);
-		return;
-	}
+	IF_AFDATA_LOCK(ifp);
 	if (ifp->if_afdata_initialized >= domain_init_status) {
 		IF_AFDATA_UNLOCK(ifp);
-		splx(s);
-		printf("if_attachdomain called more than once on %s\n",
-		    ifp->if_xname);
+		log(LOG_WARNING, "%s called more than once on %s\n",
+		    __func__, ifp->if_xname);
 		return;
 	}
 	ifp->if_afdata_initialized = domain_init_status;
@@ -835,8 +843,6 @@ if_attachdomain1(struct ifnet *ifp)
 			ifp->if_afdata[dp->dom_family] =
 			    (*dp->dom_ifattach)(ifp);
 	}
-
-	splx(s);
 }
 
 /*
@@ -847,6 +853,7 @@ if_purgeaddrs(struct ifnet *ifp)
 {
 	struct ifaddr *ifa, *next;
 
+	/* XXX cannot hold IF_ADDR_WLOCK over called functions. */
 	TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
 		if (ifa->ifa_addr->sa_family == AF_LINK)
 			continue;
@@ -871,7 +878,9 @@ if_purgeaddrs(struct ifnet *ifp)
 			continue;
 		}
 #endif /* INET6 */
+		IF_ADDR_WLOCK(ifp);
 		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
+		IF_ADDR_WUNLOCK(ifp);
 		ifa_free(ifa);
 	}
 }
@@ -906,20 +915,34 @@ if_detach(struct ifnet *ifp)
 {
 
 	CURVNET_SET_QUIET(ifp->if_vnet);
-	if_detach_internal(ifp, 0);
+	if_detach_internal(ifp, 0, NULL);
 	CURVNET_RESTORE();
 }
 
-static void
-if_detach_internal(struct ifnet *ifp, int vmove)
+/*
+ * The vmove flag, if set, indicates that we are called from a callpath
+ * that is moving an interface to a different vnet instance.
+ *
+ * The shutdown flag, if set, indicates that we are called in the
+ * process of shutting down a vnet instance.  Currently only the
+ * vnet_if_return SYSUNINIT function sets it.  Note: we can be called
+ * on a vnet instance shutdown without this flag being set, e.g., when
+ * the cloned interfaces are destoyed as first thing of teardown.
+ */
+static int
+if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp)
 {
 	struct ifaddr *ifa;
-	struct radix_node_head	*rnh;
-	int i, j;
+	int i;
 	struct domain *dp;
  	struct ifnet *iter;
  	int found = 0;
+#ifdef VIMAGE
+	int shutdown;
 
+	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
+		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
+#endif
 	IFNET_WLOCK();
 	TAILQ_FOREACH(iter, &V_ifnet, if_link)
 		if (iter == ifp) {
@@ -927,28 +950,77 @@ if_detach_internal(struct ifnet *ifp, int vmove)
 			found = 1;
 			break;
 		}
-#ifdef VIMAGE
-	if (found)
-		curvnet->vnet_ifcnt--;
-#endif
 	IFNET_WUNLOCK();
 	if (!found) {
+		/*
+		 * While we would want to panic here, we cannot
+		 * guarantee that the interface is indeed still on
+		 * the list given we don't hold locks all the way.
+		 */
+		return (ENOENT);
+#if 0
 		if (vmove)
 			panic("%s: ifp=%p not on the ifnet tailq %p",
 			    __func__, ifp, &V_ifnet);
 		else
 			return; /* XXX this should panic as well? */
+#endif
 	}
 
 	/*
-	 * Remove/wait for pending events.
+	 * At this point we know the interface still was on the ifnet list
+	 * and we removed it so we are in a stable state.
 	 */
+#ifdef VIMAGE
+	curvnet->vnet_ifcnt--;
+#endif
+
+	/*
+	 * In any case (destroy or vmove) detach us from the groups
+	 * and remove/wait for pending events on the taskq.
+	 * XXX-BZ in theory an interface could still enqueue a taskq change?
+	 */
+	if_delgroups(ifp);
+
 	taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
 
 	/*
-	 * Remove routes and flush queues.
+	 * Check if this is a cloned interface or not. Must do even if
+	 * shutting down as a if_vmove_reclaim() would move the ifp and
+	 * the if_clone_addgroup() will have a corrupted string overwise
+	 * from a gibberish pointer.
 	 */
+	if (vmove && ifcp != NULL)
+		*ifcp = if_clone_findifc(ifp);
+
 	if_down(ifp);
+
+#ifdef VIMAGE
+	/*
+	 * On VNET shutdown abort here as the stack teardown will do all
+	 * the work top-down for us.
+	 */
+	if (shutdown) {
+		/*
+		 * In case of a vmove we are done here without error.
+		 * If we would signal an error it would lead to the same
+		 * abort as if we did not find the ifnet anymore.
+		 * if_detach() calls us in void context and does not care
+		 * about an early abort notification, so life is splendid :)
+		 */
+		goto finish_vnet_shutdown;
+	}
+#endif
+
+	/*
+	 * At this point we are not tearing down a VNET and are either
+	 * going to destroy or vmove the interface and have to cleanup
+	 * accordingly.
+	 */
+
+	/*
+	 * Remove routes and flush queues.
+	 */
 #ifdef ALTQ
 	if (ALTQ_IS_ENABLED(&ifp->if_snd))
 		altq_disable(&ifp->if_snd);
@@ -973,6 +1045,12 @@ if_detach_internal(struct ifnet *ifp, int vmove)
 #endif
 	if_purgemaddrs(ifp);
 
+	/* Announce that the interface is gone. */
+	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
+	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
+	if (IS_DEFAULT_VNET(curvnet))
+		devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
+
 	if (!vmove) {
 		/*
 		 * Prevent further calls into the device driver via ifnet.
@@ -986,37 +1064,21 @@ if_detach_internal(struct ifnet *ifp, int vmove)
 		ifp->if_addr = NULL;
 
 		/* We can now free link ifaddr. */
+		IF_ADDR_WLOCK(ifp);
 		if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
 			ifa = TAILQ_FIRST(&ifp->if_addrhead);
 			TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
+			IF_ADDR_WUNLOCK(ifp);
 			ifa_free(ifa);
-		}
-	}
-
-	/*
-	 * Delete all remaining routes using this interface
-	 * Unfortuneatly the only way to do this is to slog through
-	 * the entire routing table looking for routes which point
-	 * to this interface...oh well...
-	 */
-	for (i = 1; i <= AF_MAX; i++) {
-		for (j = 0; j < rt_numfibs; j++) {
-			rnh = rt_tables_get_rnh(j, i);
-			if (rnh == NULL)
-				continue;
-			RADIX_NODE_HEAD_LOCK(rnh);
-			(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
-			RADIX_NODE_HEAD_UNLOCK(rnh);
-		}
+		} else
+			IF_ADDR_WUNLOCK(ifp);
 	}
 
-	/* Announce that the interface is gone. */
-	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
-	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
-	if (IS_DEFAULT_VNET(curvnet))
-		devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
-	if_delgroups(ifp);
+	rt_flushifroutes(ifp);
 
+#ifdef VIMAGE
+finish_vnet_shutdown:
+#endif
 	/*
 	 * We cannot hold the lock over dom_ifdetach calls as they might
 	 * sleep, for example trying to drain a callout, thus open up the
@@ -1027,10 +1089,14 @@ if_detach_internal(struct ifnet *ifp, int vmove)
 	ifp->if_afdata_initialized = 0;
 	IF_AFDATA_UNLOCK(ifp);
 	for (dp = domains; i > 0 && dp; dp = dp->dom_next) {
-		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
+		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) {
 			(*dp->dom_ifdetach)(ifp,
 			    ifp->if_afdata[dp->dom_family]);
+			ifp->if_afdata[dp->dom_family] = NULL;
+		}
 	}
+
+	return (0);
 }
 
 #ifdef VIMAGE
@@ -1041,16 +1107,28 @@ if_detach_internal(struct ifnet *ifp, int vmove)
  * unused if_index in target vnet and calls if_grow() if necessary,
  * and finally find an unused if_xname for the target vnet.
  */
-void
+static void
 if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
 {
-	u_short idx;
+	struct if_clone *ifc;
+	u_int bif_dlt, bif_hdrlen;
+	int rc;
+
+ 	/*
+	 * if_detach_internal() will call the eventhandler to notify
+	 * interface departure.  That will detach if_bpf.  We need to
+	 * safe the dlt and hdrlen so we can re-attach it later.
+	 */
+	bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen);
 
 	/*
 	 * Detach from current vnet, but preserve LLADDR info, do not
 	 * mark as dead etc. so that the ifnet can be reattached later.
+	 * If we cannot find it, we lost the race to someone else.
 	 */
-	if_detach_internal(ifp, 1);
+	rc = if_detach_internal(ifp, 1, &ifc);
+	if (rc != 0)
+		return;
 
 	/*
 	 * Unlink the ifnet from ifindex_table[] in current vnet, and shrink
@@ -1076,15 +1154,14 @@ if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
 	CURVNET_SET_QUIET(new_vnet);
 
 	IFNET_WLOCK();
-	if (ifindex_alloc_locked(&idx) != 0) {
-		IFNET_WUNLOCK();
-		panic("if_index overflow");
-	}
-	ifp->if_index = idx;
+	ifp->if_index = ifindex_alloc();
 	ifnet_setbyindex_locked(ifp->if_index, ifp);
 	IFNET_WUNLOCK();
 
-	if_attach_internal(ifp, 1);
+	if_attach_internal(ifp, 1, ifc);
+
+	if (ifp->if_bpf == NULL)
+		bpfattach(ifp, bif_dlt, bif_hdrlen);
 
 	CURVNET_RESTORE();
 }
@@ -1097,6 +1174,7 @@ if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid)
 {
 	struct prison *pr;
 	struct ifnet *difp;
+	int shutdown;
 
 	/* Try to find the prison within our visibility. */
 	sx_slock(&allprison_lock);
@@ -1117,12 +1195,22 @@ if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid)
 	/* XXX Lock interfaces to avoid races. */
 	CURVNET_SET_QUIET(pr->pr_vnet);
 	difp = ifunit(ifname);
-	CURVNET_RESTORE();
 	if (difp != NULL) {
+		CURVNET_RESTORE();
 		prison_free(pr);
 		return (EEXIST);
 	}
 
+	/* Make sure the VNET is stable. */
+	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
+		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
+	if (shutdown) {
+		CURVNET_RESTORE();
+		prison_free(pr);
+		return (EBUSY);
+	}
+	CURVNET_RESTORE();
+
 	/* Move the interface into the child jail/vnet. */
 	if_vmove(ifp, pr->pr_vnet);
 
@@ -1139,6 +1227,7 @@ if_vmove_reclaim(struct thread *td, char *ifname, int jid)
 	struct prison *pr;
 	struct vnet *vnet_dst;
 	struct ifnet *ifp;
+ 	int shutdown;
 
 	/* Try to find the prison within our visibility. */
 	sx_slock(&allprison_lock);
@@ -1166,6 +1255,15 @@ if_vmove_reclaim(struct thread *td, char *ifname, int jid)
 		return (EEXIST);
 	}
 
+	/* Make sure the VNET is stable. */
+	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
+		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
+	if (shutdown) {
+		CURVNET_RESTORE();
+		prison_free(pr);
+		return (EBUSY);
+	}
+
 	/* Get interface back from child jail/vnet. */
 	if_vmove(ifp, vnet_dst);
 	CURVNET_RESTORE();
@@ -1187,6 +1285,7 @@ if_addgroup(struct ifnet *ifp, const char *groupname)
 	struct ifg_list		*ifgl;
 	struct ifg_group	*ifg = NULL;
 	struct ifg_member	*ifgm;
+	int 			 new = 0;
 
 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
 	    groupname[strlen(groupname) - 1] <= '9')
@@ -1227,8 +1326,8 @@ if_addgroup(struct ifnet *ifp, const char *groupname)
 		strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
 		ifg->ifg_refcnt = 0;
 		TAILQ_INIT(&ifg->ifg_members);
-		EVENTHANDLER_INVOKE(group_attach_event, ifg);
 		TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
+		new = 1;
 	}
 
 	ifg->ifg_refcnt++;
@@ -1242,6 +1341,8 @@ if_addgroup(struct ifnet *ifp, const char *groupname)
 
 	IFNET_WUNLOCK();
 
+	if (new)
+		EVENTHANDLER_INVOKE(group_attach_event, ifg);
 	EVENTHANDLER_INVOKE(group_change_event, groupname);
 
 	return (0);
@@ -1280,10 +1381,11 @@ if_delgroup(struct ifnet *ifp, const char *groupname)
 
 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
 		TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
+		IFNET_WUNLOCK();
 		EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
 		free(ifgl->ifgl_group, M_TEMP);
-	}
-	IFNET_WUNLOCK();
+	} else
+		IFNET_WUNLOCK();
 
 	free(ifgl, M_TEMP);
 
@@ -1324,11 +1426,12 @@ if_delgroups(struct ifnet *ifp)
 
 		if (--ifgl->ifgl_group->ifg_refcnt == 0) {
 			TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
+			IFNET_WUNLOCK();
 			EVENTHANDLER_INVOKE(group_detach_event,
 			    ifgl->ifgl_group);
 			free(ifgl->ifgl_group, M_TEMP);
-		}
-		IFNET_WUNLOCK();
+		} else
+			IFNET_WUNLOCK();
 
 		free(ifgl, M_TEMP);
 
@@ -1434,46 +1537,63 @@ if_getgroupmembers(struct ifgroupreq *data)
 }
 
 /*
- * Delete Routes for a Network Interface
- *
- * Called for each routing entry via the rnh->rnh_walktree() call above
- * to delete all route entries referencing a detaching network interface.
- *
- * Arguments:
- *	rn	pointer to node in the routing table
- *	arg	argument passed to rnh->rnh_walktree() - detaching interface
- *
- * Returns:
- *	0	successful
- *	errno	failed - reason indicated
- *
+ * Return counter values from counter(9)s stored in ifnet.
  */
-static int
-if_rtdel(struct radix_node *rn, void *arg)
+uint64_t
+if_get_counter_default(struct ifnet *ifp, ift_counter cnt)
 {
-	struct rtentry	*rt = (struct rtentry *)rn;
-	struct ifnet	*ifp = arg;
-	int		err;
 
-	if (rt->rt_ifp == ifp) {
+	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
 
-		/*
-		 * Protect (sorta) against walktree recursion problems
-		 * with cloned routes
-		 */
-		if ((rt->rt_flags & RTF_UP) == 0)
-			return (0);
+	return (counter_u64_fetch(ifp->if_counters[cnt]));
+}
 
-		err = rtrequest_fib(RTM_DELETE, rt_key(rt), rt->rt_gateway,
-				rt_mask(rt),
-				rt->rt_flags|RTF_RNH_LOCKED|RTF_PINNED,
-				(struct rtentry **) NULL, rt->rt_fibnum);
-		if (err) {
-			log(LOG_WARNING, "if_rtdel: error %d\n", err);
-		}
-	}
+/*
+ * Increase an ifnet counter. Usually used for counters shared
+ * between the stack and a driver, but function supports them all.
+ */
+void
+if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc)
+{
 
-	return (0);
+	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
+
+	counter_u64_add(ifp->if_counters[cnt], inc);
+}
+
+/*
+ * Copy data from ifnet to userland API structure if_data.
+ */
+void
+if_data_copy(struct ifnet *ifp, struct if_data *ifd)
+{
+
+	ifd->ifi_type = ifp->if_type;
+	ifd->ifi_physical = 0;
+	ifd->ifi_addrlen = ifp->if_addrlen;
+	ifd->ifi_hdrlen = ifp->if_hdrlen;
+	ifd->ifi_link_state = ifp->if_link_state;
+	ifd->ifi_vhid = 0;
+	ifd->ifi_datalen = sizeof(struct if_data);
+	ifd->ifi_mtu = ifp->if_mtu;
+	ifd->ifi_metric = ifp->if_metric;
+	ifd->ifi_baudrate = ifp->if_baudrate;
+	ifd->ifi_hwassist = ifp->if_hwassist;
+	ifd->ifi_epoch = ifp->if_epoch;
+	ifd->ifi_lastchange = ifp->if_lastchange;
+
+	ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS);
+	ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS);
+	ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS);
+	ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS);
+	ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS);
+	ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES);
+	ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES);
+	ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS);
+	ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS);
+	ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS);
+	ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
+	ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO);
 }
 
 /*
@@ -1497,28 +1617,56 @@ if_addr_runlock(struct ifnet *ifp)
 }
 
 void
-if_maddr_rlock(struct ifnet *ifp)
+if_maddr_rlock(if_t ifp)
 {
 
-	IF_ADDR_RLOCK(ifp);
+	IF_ADDR_RLOCK((struct ifnet *)ifp);
 }
 
 void
-if_maddr_runlock(struct ifnet *ifp)
+if_maddr_runlock(if_t ifp)
 {
 
-	IF_ADDR_RUNLOCK(ifp);
+	IF_ADDR_RUNLOCK((struct ifnet *)ifp);
 }
 
 /*
- * Reference count functions for ifaddrs.
+ * Initialization, destruction and refcounting functions for ifaddrs.
  */
-void
-ifa_init(struct ifaddr *ifa)
+struct ifaddr *
+ifa_alloc(size_t size, int flags)
 {
+	struct ifaddr *ifa;
+
+	KASSERT(size >= sizeof(struct ifaddr),
+	    ("%s: invalid size %zu", __func__, size));
+
+	ifa = malloc(size, M_IFADDR, M_ZERO | flags);
+	if (ifa == NULL)
+		return (NULL);
+
+	if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL)
+		goto fail;
+	if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL)
+		goto fail;
+	if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL)
+		goto fail;
+	if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL)
+		goto fail;
 
-	mtx_init(&ifa->ifa_mtx, "ifaddr", NULL, MTX_DEF);
 	refcount_init(&ifa->ifa_refcnt, 1);
+
+	return (ifa);
+
+fail:
+	/* free(NULL) is okay */
+	counter_u64_free(ifa->ifa_opackets);
+	counter_u64_free(ifa->ifa_ipackets);
+	counter_u64_free(ifa->ifa_obytes);
+	counter_u64_free(ifa->ifa_ibytes);
+	free(ifa, M_IFADDR);
+
+	return (NULL);
 }
 
 void
@@ -1533,62 +1681,61 @@ ifa_free(struct ifaddr *ifa)
 {
 
 	if (refcount_release(&ifa->ifa_refcnt)) {
-		mtx_destroy(&ifa->ifa_mtx);
+		counter_u64_free(ifa->ifa_opackets);
+		counter_u64_free(ifa->ifa_ipackets);
+		counter_u64_free(ifa->ifa_obytes);
+		counter_u64_free(ifa->ifa_ibytes);
 		free(ifa, M_IFADDR);
 	}
 }
 
-int
-ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
+static int
+ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
+    struct sockaddr *ia)
 {
-	int error = 0;
-	struct rtentry *rt = NULL;
+	int error;
 	struct rt_addrinfo info;
-	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
+	struct sockaddr_dl null_sdl;
+	struct ifnet *ifp;
+
+	ifp = ifa->ifa_ifp;
 
 	bzero(&info, sizeof(info));
-	info.rti_ifp = V_loif;
+	if (cmd != RTM_DELETE)
+		info.rti_ifp = V_loif;
 	info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC;
 	info.rti_info[RTAX_DST] = ia;
 	info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
-	error = rtrequest1_fib(RTM_ADD, &info, &rt, ifa->ifa_ifp->if_fib);
-
-	if (error == 0 && rt != NULL) {
-		RT_LOCK(rt);
-		((struct sockaddr_dl *)rt->rt_gateway)->sdl_type  =
-			ifa->ifa_ifp->if_type;
-		((struct sockaddr_dl *)rt->rt_gateway)->sdl_index =
-			ifa->ifa_ifp->if_index;
-		RT_REMREF(rt);
-		RT_UNLOCK(rt);
-	} else if (error != 0)
-		log(LOG_INFO, "ifa_add_loopback_route: insertion failed\n");
+	link_init_sdl(ifp, (struct sockaddr *)&null_sdl, ifp->if_type);
+
+	error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib);
+
+	if (error != 0)
+		log(LOG_DEBUG, "%s: %s failed for interface %s: %u\n",
+		    __func__, otype, if_name(ifp), error);
 
 	return (error);
 }
 
 int
+ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
+{
+
+	return (ifa_maintain_loopback_route(RTM_ADD, "insertion", ifa, ia));
+}
+
+int
 ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
 {
-	int error = 0;
-	struct rt_addrinfo info;
-	struct sockaddr_dl null_sdl;
 
-	bzero(&null_sdl, sizeof(null_sdl));
-	null_sdl.sdl_len = sizeof(null_sdl);
-	null_sdl.sdl_family = AF_LINK;
-	null_sdl.sdl_type = ifa->ifa_ifp->if_type;
-	null_sdl.sdl_index = ifa->ifa_ifp->if_index;
-	bzero(&info, sizeof(info));
-	info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC;
-	info.rti_info[RTAX_DST] = ia;
-	info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
-	error = rtrequest1_fib(RTM_DELETE, &info, NULL, ifa->ifa_ifp->if_fib);
+	return (ifa_maintain_loopback_route(RTM_DELETE, "deletion", ifa, ia));
+}
 
-	if (error != 0)
-		log(LOG_INFO, "ifa_del_loopback_route: deletion failed\n");
+int
+ifa_switch_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
+{
 
-	return (error);
+	return (ifa_maintain_loopback_route(RTM_CHANGE, "switch", ifa, ia));
 }
 
 /*
@@ -1597,22 +1744,19 @@ ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
  * to perform a different comparison.
  */
 
-#define	sa_equal(a1, a2)	\
-	(bcmp((a1), (a2), ((a1))->sa_len) == 0)
-
 #define	sa_dl_equal(a1, a2)	\
-	((((struct sockaddr_dl *)(a1))->sdl_len ==			\
-	 ((struct sockaddr_dl *)(a2))->sdl_len) &&			\
-	 (bcmp(LLADDR((struct sockaddr_dl *)(a1)),			\
-	       LLADDR((struct sockaddr_dl *)(a2)),			\
-	       ((struct sockaddr_dl *)(a1))->sdl_alen) == 0))
+	((((const struct sockaddr_dl *)(a1))->sdl_len ==		\
+	 ((const struct sockaddr_dl *)(a2))->sdl_len) &&		\
+	 (bcmp(CLLADDR((const struct sockaddr_dl *)(a1)),		\
+	       CLLADDR((const struct sockaddr_dl *)(a2)),		\
+	       ((const struct sockaddr_dl *)(a1))->sdl_alen) == 0))
 
 /*
  * Locate an interface based on a complete address.
  */
 /*ARGSUSED*/
 static struct ifaddr *
-ifa_ifwithaddr_internal(struct sockaddr *addr, int getref)
+ifa_ifwithaddr_internal(const struct sockaddr *addr, int getref)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
@@ -1649,14 +1793,14 @@ done:
 }
 
 struct ifaddr *
-ifa_ifwithaddr(struct sockaddr *addr)
+ifa_ifwithaddr(const struct sockaddr *addr)
 {
 
 	return (ifa_ifwithaddr_internal(addr, 1));
 }
 
 int
-ifa_ifwithaddr_check(struct sockaddr *addr)
+ifa_ifwithaddr_check(const struct sockaddr *addr)
 {
 
 	return (ifa_ifwithaddr_internal(addr, 0) != NULL);
@@ -1667,13 +1811,15 @@ ifa_ifwithaddr_check(struct sockaddr *addr)
  */
 /* ARGSUSED */
 struct ifaddr *
-ifa_ifwithbroadaddr(struct sockaddr *addr)
+ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 
 	IFNET_RLOCK_NOSLEEP();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
+			continue;
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != addr->sa_family)
@@ -1700,7 +1846,7 @@ done:
  */
 /*ARGSUSED*/
 struct ifaddr *
-ifa_ifwithdstaddr_fib(struct sockaddr *addr, int fibnum)
+ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
@@ -1730,32 +1876,25 @@ done:
 	return (ifa);
 }
 
-struct ifaddr *
-ifa_ifwithdstaddr(struct sockaddr *addr)
-{
-
-	return (ifa_ifwithdstaddr_fib(addr, RT_ALL_FIBS));
-}
-
 /*
  * Find an interface on a specific network.  If many, choice
  * is most specific found.
  */
 struct ifaddr *
-ifa_ifwithnet_fib(struct sockaddr *addr, int ignore_ptp, int fibnum)
+ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	struct ifaddr *ifa_maybe = NULL;
 	u_int af = addr->sa_family;
-	char *addr_data = addr->sa_data, *cplim;
+	const char *addr_data = addr->sa_data, *cplim;
 
 	/*
 	 * AF_LINK addresses can be looked up directly by their index number,
 	 * so do that if we can.
 	 */
 	if (af == AF_LINK) {
-	    struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
+	    const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)addr;
 	    if (sdl->sdl_index && sdl->sdl_index <= V_if_index)
 		return (ifaddr_byindex(sdl->sdl_index));
 	}
@@ -1772,7 +1911,7 @@ ifa_ifwithnet_fib(struct sockaddr *addr, int ignore_ptp, int fibnum)
 			continue;
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
-			char *cp, *cp2, *cp3;
+			const char *cp, *cp2, *cp3;
 
 			if (ifa->ifa_addr->sa_family != af)
 next:				continue;
@@ -1794,19 +1933,6 @@ next:				continue;
 				}
 			} else {
 				/*
-				 * if we have a special address handler,
-				 * then use it instead of the generic one.
-				 */
-				if (ifa->ifa_claim_addr) {
-					if ((*ifa->ifa_claim_addr)(ifa, addr)) {
-						ifa_ref(ifa);
-						IF_ADDR_RUNLOCK(ifp);
-						goto done;
-					}
-					continue;
-				}
-
-				/*
 				 * Scan all the bits in the ifa's address.
 				 * If a bit dissagrees with what we are
 				 * looking for, mask it with the netmask
@@ -1826,11 +1952,13 @@ next:				continue;
 				/*
 				 * If the netmask of what we just found
 				 * is more specific than what we had before
-				 * (if we had one) then remember the new one
-				 * before continuing to search
-				 * for an even better one.
+				 * (if we had one), or if the virtual status
+				 * of new prefix is better than of the old one,
+				 * then remember the new one before continuing
+				 * to search for an even better one.
 				 */
 				if (ifa_maybe == NULL ||
+				    ifa_preferred(ifa_maybe, ifa) ||
 				    rn_refines((caddr_t)ifa->ifa_netmask,
 				    (caddr_t)ifa_maybe->ifa_netmask)) {
 					if (ifa_maybe != NULL)
@@ -1851,22 +1979,15 @@ done:
 	return (ifa);
 }
 
-struct ifaddr *
-ifa_ifwithnet(struct sockaddr *addr, int ignore_ptp)
-{
-
-	return (ifa_ifwithnet_fib(addr, ignore_ptp, RT_ALL_FIBS));
-}
-
 /*
  * Find an interface address specific to an interface best matching
  * a given address.
  */
 struct ifaddr *
-ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
+ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
 {
 	struct ifaddr *ifa;
-	char *cp, *cp2, *cp3;
+	const char *cp, *cp2, *cp3;
 	char *cplim;
 	struct ifaddr *ifa_maybe = NULL;
 	u_int af = addr->sa_family;
@@ -1909,6 +2030,21 @@ done:
 	return (ifa);
 }
 
+/*
+ * See whether new ifa is better than current one:
+ * 1) A non-virtual one is preferred over virtual.
+ * 2) A virtual in master state preferred over any other state.
+ *
+ * Used in several address selecting functions.
+ */
+int
+ifa_preferred(struct ifaddr *cur, struct ifaddr *next)
+{
+
+	return (cur->ifa_carp && (!next->ifa_carp ||
+	    ((*carp_master_p)(next) && !(*carp_master_p)(cur))));
+}
+
 #include <net/if_llatbl.h>
 
 /*
@@ -1923,10 +2059,8 @@ link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
 	struct sockaddr *dst;
 	struct ifnet *ifp;
 
-	RT_LOCK_ASSERT(rt);
-
-	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
-	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
+	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == NULL) ||
+	    ((ifp = ifa->ifa_ifp) == NULL) || ((dst = rt_key(rt)) == NULL))
 		return;
 	ifa = ifaof_ifpforaddr(dst, ifp);
 	if (ifa) {
@@ -1938,10 +2072,41 @@ link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
 	}
 }
 
+struct sockaddr_dl *
+link_alloc_sdl(size_t size, int flags)
+{
+
+	return (malloc(size, M_TEMP, flags));
+}
+
+void
+link_free_sdl(struct sockaddr *sa)
+{
+	free(sa, M_TEMP);
+}
+
+/*
+ * Fills in given sdl with interface basic info.
+ * Returns pointer to filled sdl.
+ */
+struct sockaddr_dl *
+link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype)
+{
+	struct sockaddr_dl *sdl;
+
+	sdl = (struct sockaddr_dl *)paddr;
+	memset(sdl, 0, sizeof(struct sockaddr_dl));
+	sdl->sdl_len = sizeof(struct sockaddr_dl);
+	sdl->sdl_family = AF_LINK;
+	sdl->sdl_index = ifp->if_index;
+	sdl->sdl_type = iftype;
+
+	return (sdl);
+}
+
 /*
  * Mark an interface down and notify protocols of
  * the transition.
- * NOTE: must be called at splnet or eqivalent.
  */
 static void
 if_unroute(struct ifnet *ifp, int flag, int fam)
@@ -1965,7 +2130,6 @@ if_unroute(struct ifnet *ifp, int flag, int fam)
 /*
  * Mark an interface up and notify protocols of
  * the transition.
- * NOTE: must be called at splnet or eqivalent.
  */
 static void
 if_route(struct ifnet *ifp, int flag, int fam)
@@ -2026,7 +2190,7 @@ do_link_state_change(void *arg, int pending)
 		(*vlan_link_state_p)(ifp);
 
 	if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
-	    IFP2AC(ifp)->ac_netgraph != NULL)
+	    ifp->if_l2com != NULL)
 		(*ng_ether_link_state_p)(ifp, link_state);
 	if (ifp->if_carp)
 		(*carp_linkstate_p)(ifp);
@@ -2051,7 +2215,6 @@ do_link_state_change(void *arg, int pending)
 /*
  * Mark an interface down and notify protocols of
  * the transition.
- * NOTE: must be called at splnet or eqivalent.
  */
 void
 if_down(struct ifnet *ifp)
@@ -2063,7 +2226,6 @@ if_down(struct ifnet *ifp)
 /*
  * Mark an interface up and notify protocols of
  * the transition.
- * NOTE: must be called at splnet or eqivalent.
  */
 void
 if_up(struct ifnet *ifp)
@@ -2088,8 +2250,8 @@ if_qflush(struct ifnet *ifp)
 		ALTQ_PURGE(ifq);
 #endif
 	n = ifq->ifq_head;
-	while ((m = n) != 0) {
-		n = m->m_act;
+	while ((m = n) != NULL) {
+		n = m->m_nextpkt;
 		m_freem(m);
 	}
 	ifq->ifq_head = 0;
@@ -2140,7 +2302,6 @@ static int
 ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
 {
 	struct ifreq *ifr;
-	struct ifstat *ifs;
 	int error = 0;
 	int new_flags, temp_flags;
 	size_t namelen, onamelen;
@@ -2182,7 +2343,8 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
 		break;
 
 	case SIOCGIFPHYS:
-		ifr->ifr_phys = ifp->if_physical;
+		/* XXXGL: did this ever worked? */
+		ifr->ifr_phys = 0;
 		break;
 
 	case SIOCGIFDESCR:
@@ -2262,18 +2424,12 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
 		 */
 		new_flags = (ifr->ifr_flags & 0xffff) |
 		    (ifr->ifr_flagshigh << 16);
-		if (ifp->if_flags & IFF_SMART) {
-			/* Smart drivers twiddle their own routes */
-		} else if (ifp->if_flags & IFF_UP &&
+		if (ifp->if_flags & IFF_UP &&
 		    (new_flags & IFF_UP) == 0) {
-			int s = splimp();
 			if_down(ifp);
-			splx(s);
 		} else if (new_flags & IFF_UP &&
 		    (ifp->if_flags & IFF_UP) == 0) {
-			int s = splimp();
 			if_up(ifp);
-			splx(s);
 		}
 		/* See if permanently promiscuous mode bit is about to flip */
 		if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
@@ -2281,9 +2437,11 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
 				ifp->if_flags |= IFF_PROMISC;
 			else if (ifp->if_pcount == 0)
 				ifp->if_flags &= ~IFF_PROMISC;
-			log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
-			    ifp->if_xname,
-			    (new_flags & IFF_PPROMISC) ? "enabled" : "disabled");
+			if (log_promisc_mode_change)
+                                log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
+                                    ifp->if_xname,
+                                    ((new_flags & IFF_PPROMISC) ?
+                                     "enabled" : "disabled"));
 		}
 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
 			(new_flags &~ IFF_CANTCHANGE);
@@ -2321,6 +2479,11 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
 			return (error);
 		if (new_name[0] == '\0')
 			return (EINVAL);
+		if (new_name[IFNAMSIZ-1] != '\0') {
+			new_name[IFNAMSIZ-1] = '\0';
+			if (strlen(new_name) == IFNAMSIZ-1)
+				return (EINVAL);
+		}
 		if (ifunit(new_name) != NULL)
 			return (EEXIST);
 
@@ -2339,9 +2502,9 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
 		log(LOG_INFO, "%s: changing name to '%s'\n",
 		    ifp->if_xname, new_name);
 
+		IF_ADDR_WLOCK(ifp);
 		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
 		ifa = ifp->if_addr;
-		IFA_LOCK(ifa);
 		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 		namelen = strlen(new_name);
 		onamelen = sdl->sdl_nlen;
@@ -2360,7 +2523,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
 		bzero(sdl->sdl_data, onamelen);
 		while (namelen != 0)
 			sdl->sdl_data[--namelen] = 0xff;
-		IFA_UNLOCK(ifa);
+		IF_ADDR_WUNLOCK(ifp);
 
 		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
 		/* Announce the return of the interface. */
@@ -2420,6 +2583,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
 #ifdef INET6
 			nd6_setmtu(ifp);
 #endif
+			rt_updatemtu(ifp);
 		}
 		break;
 	}
@@ -2470,7 +2634,6 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
 #ifdef INET6
 	case SIOCSIFPHYADDR_IN6:
 #endif
-	case SIOCSLIFPHYADDR:
 	case SIOCSIFMEDIA:
 	case SIOCSIFGENERIC:
 		error = priv_check(td, PRIV_NET_HWIOCTL);
@@ -2484,13 +2647,10 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
 		break;
 
 	case SIOCGIFSTATUS:
-		ifs = (struct ifstat *)data;
-		ifs->ascii[0] = '\0';
-
 	case SIOCGIFPSRCADDR:
 	case SIOCGIFPDSTADDR:
-	case SIOCGLIFPHYADDR:
 	case SIOCGIFMEDIA:
+	case SIOCGIFXMEDIA:
 	case SIOCGIFGENERIC:
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
@@ -2503,7 +2663,6 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
 			return (error);
 		error = if_setlladdr(ifp,
 		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
-		EVENTHANDLER_INVOKE(iflladdr_event, ifp);
 		break;
 
 	case SIOCAIFGROUP:
@@ -2542,6 +2701,9 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
 	return (error);
 }
 
+/* COMPAT_SVR4 */
+#define	OSIOCGIFCONF	_IOWR('i', 20, struct ifconf)
+
 #ifdef COMPAT_FREEBSD32
 struct ifconf32 {
 	int32_t	ifc_len;
@@ -2563,11 +2725,25 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
 	struct ifreq *ifr;
 	int error;
 	int oif_flags;
+#ifdef VIMAGE
+	int shutdown;
+#endif
 
 	CURVNET_SET(so->so_vnet);
+#ifdef VIMAGE
+	/* Make sure the VNET is stable. */
+	shutdown = (so->so_vnet->vnet_state > SI_SUB_VNET &&
+		 so->so_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
+	if (shutdown) {
+		CURVNET_RESTORE();
+		return (EBUSY);
+	}
+#endif
+
+
 	switch (cmd) {
 	case SIOCGIFCONF:
-	case OSIOCGIFCONF:
+	case OSIOCGIFCONF:	/* COMPAT_SVR4 */
 		error = ifconf(cmd, data);
 		CURVNET_RESTORE();
 		return (error);
@@ -2626,6 +2802,16 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
 		error = if_getgroupmembers((struct ifgroupreq *)data);
 		CURVNET_RESTORE();
 		return (error);
+#if defined(INET) || defined(INET6)
+	case SIOCSVH:
+	case SIOCGVH:
+		if (carp_ioctl_p == NULL)
+			error = EPROTONOSUPPORT;
+		else
+			error = (*carp_ioctl_p)(ifr, cmd, td);
+		CURVNET_RESTORE();
+		return (error);
+#endif
 	}
 
 	ifp = ifunit_ref(ifr->ifr_name);
@@ -2657,79 +2843,17 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
 	 * layer, and do not perform any credentials checks or input
 	 * validation.
 	 */
-#ifndef COMPAT_43
-	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
-								 data,
-								 ifp, td));
+	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data,
+	    ifp, td));
 	if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL &&
 	    cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
 	    cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
-#else
-	{
-		u_long ocmd = cmd;
-
-		switch (cmd) {
-
-		case SIOCSIFDSTADDR:
-		case SIOCSIFADDR:
-		case SIOCSIFBRDADDR:
-		case SIOCSIFNETMASK:
-#if BYTE_ORDER != BIG_ENDIAN
-			if (ifr->ifr_addr.sa_family == 0 &&
-			    ifr->ifr_addr.sa_len < 16) {
-				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
-				ifr->ifr_addr.sa_len = 16;
-			}
-#else
-			if (ifr->ifr_addr.sa_len == 0)
-				ifr->ifr_addr.sa_len = 16;
-#endif
-			break;
-
-		case OSIOCGIFADDR:
-			cmd = SIOCGIFADDR;
-			break;
-
-		case OSIOCGIFDSTADDR:
-			cmd = SIOCGIFDSTADDR;
-			break;
-
-		case OSIOCGIFBRDADDR:
-			cmd = SIOCGIFBRDADDR;
-			break;
-
-		case OSIOCGIFNETMASK:
-			cmd = SIOCGIFNETMASK;
-		}
-		error =  ((*so->so_proto->pr_usrreqs->pru_control)(so,
-								   cmd,
-								   data,
-								   ifp, td));
-		if (error == EOPNOTSUPP && ifp != NULL &&
-		    ifp->if_ioctl != NULL &&
-		    cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
-		    cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
-			error = (*ifp->if_ioctl)(ifp, cmd, data);
-		switch (ocmd) {
-
-		case OSIOCGIFADDR:
-		case OSIOCGIFDSTADDR:
-		case OSIOCGIFBRDADDR:
-		case OSIOCGIFNETMASK:
-			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
-
-		}
-	}
-#endif /* COMPAT_43 */
 
 	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
 #ifdef INET6
-		if (ifp->if_flags & IFF_UP) {
-			int s = splimp();
+		if (ifp->if_flags & IFF_UP)
 			in6_if_up(ifp);
-			splx(s);
-		}
 #endif
 	}
 	if_rele(ifp);
@@ -2825,7 +2949,8 @@ ifpromisc(struct ifnet *ifp, int pswitch)
 	error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
 			   &ifp->if_pcount, pswitch);
 	/* If promiscuous mode status has changed, log a message */
-	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC))
+	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) &&
+            log_promisc_mode_change)
 		log(LOG_INFO, "%s: promiscuous mode %s\n",
 		    ifp->if_xname,
 		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
@@ -2890,16 +3015,15 @@ again:
 			if (prison_if(curthread->td_ucred, sa) != 0)
 				continue;
 			addrs++;
-#ifdef COMPAT_43
+			/* COMPAT_SVR4 */
 			if (cmd == OSIOCGIFCONF) {
 				struct osockaddr *osa =
-					 (struct osockaddr *)&ifr.ifr_addr;
+				    (struct osockaddr *)&ifr.ifr_addr;
 				ifr.ifr_addr = *sa;
 				osa->sa_family = sa->sa_family;
 				sbuf_bcat(sb, &ifr, sizeof(ifr));
 				max_len += sizeof(ifr);
 			} else
-#endif
 			if (sa->sa_len <= sizeof(*sa)) {
 				ifr.ifr_addr = *sa;
 				sbuf_bcat(sb, &ifr, sizeof(ifr));
@@ -2955,7 +3079,7 @@ if_allmulti(struct ifnet *ifp, int onswitch)
 }
 
 struct ifmultiaddr *
-if_findmulti(struct ifnet *ifp, struct sockaddr *sa)
+if_findmulti(struct ifnet *ifp, const struct sockaddr *sa)
 {
 	struct ifmultiaddr *ifma;
 
@@ -3034,8 +3158,6 @@ if_freemulti(struct ifmultiaddr *ifma)
 
 	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
 	    ifma->ifma_refcount));
-	KASSERT(ifma->ifma_protospec == NULL,
-	    ("if_freemulti: protospec not NULL"));
 
 	if (ifma->ifma_lladdr != NULL)
 		free(ifma->ifma_lladdr, M_IFMADDR);
@@ -3067,6 +3189,7 @@ if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
 {
 	struct ifmultiaddr *ifma, *ll_ifma;
 	struct sockaddr *llsa;
+	struct sockaddr_dl sdl;
 	int error;
 
 	/*
@@ -3086,12 +3209,18 @@ if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
 	/*
 	 * The address isn't already present; resolve the protocol address
 	 * into a link layer address, and then look that up, bump its
-	 * refcount or allocate an ifma for that also.  If 'llsa' was
-	 * returned, we will need to free it later.
+	 * refcount or allocate an ifma for that also.
+	 * Most link layer resolving functions returns address data which
+	 * fits inside default sockaddr_dl structure. However callback
+	 * can allocate another sockaddr structure, in that case we need to
+	 * free it later.
 	 */
 	llsa = NULL;
 	ll_ifma = NULL;
 	if (ifp->if_resolvemulti != NULL) {
+		/* Provide called function with buffer size information */
+		sdl.sdl_len = sizeof(sdl);
+		llsa = (struct sockaddr *)&sdl;
 		error = ifp->if_resolvemulti(ifp, &llsa, sa);
 		if (error)
 			goto unlock_out;
@@ -3155,14 +3284,14 @@ if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
 		(void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
 	}
 
-	if (llsa != NULL)
-		free(llsa, M_IFMADDR);
+	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
+		link_free_sdl(llsa);
 
 	return (0);
 
 free_llsa_out:
-	if (llsa != NULL)
-		free(llsa, M_IFMADDR);
+	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
+		link_free_sdl(llsa);
 
 unlock_out:
 	IF_ADDR_WUNLOCK(ifp);
@@ -3363,8 +3492,10 @@ if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
  *
  * At this time we only support certain types of interfaces,
  * and we don't allow the length of the address to change.
+ *
+ * Set noinline to be dtrace-friendly
  */
-int
+__noinline int
 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
 {
 	struct sockaddr_dl *sdl;
@@ -3422,17 +3553,45 @@ if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
 			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
 		}
-#ifdef INET
-		/*
-		 * Also send gratuitous ARPs to notify other nodes about
-		 * the address change.
-		 */
-		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
-			if (ifa->ifa_addr->sa_family == AF_INET)
-				arp_ifinit(ifp, ifa);
-		}
-#endif
 	}
+	EVENTHANDLER_INVOKE(iflladdr_event, ifp);
+	return (0);
+}
+
+/*
+ * Compat function for handling basic encapsulation requests.
+ * Not converted stacks (FDDI, IB, ..) supports traditional
+ * output model: ARP (and other similar L2 protocols) are handled
+ * inside output routine, arpresolve/nd6_resolve() returns MAC
+ * address instead of full prepend.
+ *
+ * This function creates calculated header==MAC for IPv4/IPv6 and
+ * returns EAFNOSUPPORT (which is then handled in ARP code) for other
+ * address families.
+ */
+static int
+if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req)
+{
+
+	if (req->rtype != IFENCAP_LL)
+		return (EOPNOTSUPP);
+
+	if (req->bufsize < req->lladdr_len)
+		return (ENOMEM);
+
+	switch (req->family) {
+	case AF_INET:
+	case AF_INET6:
+		break;
+	default:
+		return (EAFNOSUPPORT);
+	}
+
+	/* Copy lladdr to storage as is */
+	memmove(req->buf, req->lladdr, req->lladdr_len);
+	req->bufsize = req->lladdr_len;
+	req->lladdr_off = 0;
+
 	return (0);
 }
 
@@ -3500,15 +3659,15 @@ if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
 
 	IF_LOCK(ifq);
 	if (_IF_QFULL(ifq)) {
-		_IF_DROP(ifq);
 		IF_UNLOCK(ifq);
+		if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
 		m_freem(m);
 		return (0);
 	}
 	if (ifp != NULL) {
-		ifp->if_obytes += m->m_pkthdr.len + adjust;
+		if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust);
 		if (m->m_flags & (M_BCAST|M_MCAST))
-			ifp->if_omcasts++;
+			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
 		active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
 	}
 	_IF_ENQUEUE(ifq, m);
@@ -3543,3 +3702,465 @@ if_deregister_com_alloc(u_char type)
 	if_com_alloc[type] = NULL;
 	if_com_free[type] = NULL;
 }
+
+/* API for driver access to network stack owned ifnet.*/
+uint64_t
+if_setbaudrate(struct ifnet *ifp, uint64_t baudrate)
+{
+	uint64_t oldbrate;
+
+	oldbrate = ifp->if_baudrate;
+	ifp->if_baudrate = baudrate;
+	return (oldbrate);
+}
+
+uint64_t
+if_getbaudrate(if_t ifp)
+{
+
+	return (((struct ifnet *)ifp)->if_baudrate);
+}
+
+int
+if_setcapabilities(if_t ifp, int capabilities)
+{
+	((struct ifnet *)ifp)->if_capabilities = capabilities;
+	return (0);
+}
+
+int
+if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit)
+{
+	((struct ifnet *)ifp)->if_capabilities |= setbit;
+	((struct ifnet *)ifp)->if_capabilities &= ~clearbit;
+
+	return (0);
+}
+
+int
+if_getcapabilities(if_t ifp)
+{
+	return ((struct ifnet *)ifp)->if_capabilities;
+}
+
+int 
+if_setcapenable(if_t ifp, int capabilities)
+{
+	((struct ifnet *)ifp)->if_capenable = capabilities;
+	return (0);
+}
+
+int 
+if_setcapenablebit(if_t ifp, int setcap, int clearcap)
+{
+	if(setcap) 
+		((struct ifnet *)ifp)->if_capenable |= setcap;
+	if(clearcap)
+		((struct ifnet *)ifp)->if_capenable &= ~clearcap;
+
+	return (0);
+}
+
+const char *
+if_getdname(if_t ifp)
+{
+	return ((struct ifnet *)ifp)->if_dname;
+}
+
+int 
+if_togglecapenable(if_t ifp, int togglecap)
+{
+	((struct ifnet *)ifp)->if_capenable ^= togglecap;
+	return (0);
+}
+
+int
+if_getcapenable(if_t ifp)
+{
+	return ((struct ifnet *)ifp)->if_capenable;
+}
+
+/*
+ * This is largely undesirable because it ties ifnet to a device, but does
+ * provide flexiblity for an embedded product vendor. Should be used with
+ * the understanding that it violates the interface boundaries, and should be
+ * a last resort only.
+ */
+int
+if_setdev(if_t ifp, void *dev)
+{
+	return (0);
+}
+
+int
+if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags)
+{
+	((struct ifnet *)ifp)->if_drv_flags |= set_flags;
+	((struct ifnet *)ifp)->if_drv_flags &= ~clear_flags;
+
+	return (0);
+}
+
+int
+if_getdrvflags(if_t ifp)
+{
+	return ((struct ifnet *)ifp)->if_drv_flags;
+}
+ 
+int
+if_setdrvflags(if_t ifp, int flags)
+{
+	((struct ifnet *)ifp)->if_drv_flags = flags;
+	return (0);
+}
+
+
+int
+if_setflags(if_t ifp, int flags)
+{
+	((struct ifnet *)ifp)->if_flags = flags;
+	return (0);
+}
+
+int
+if_setflagbits(if_t ifp, int set, int clear)
+{
+	((struct ifnet *)ifp)->if_flags |= set;
+	((struct ifnet *)ifp)->if_flags &= ~clear;
+
+	return (0);
+}
+
+int
+if_getflags(if_t ifp)
+{
+	return ((struct ifnet *)ifp)->if_flags;
+}
+
+int
+if_clearhwassist(if_t ifp)
+{
+	((struct ifnet *)ifp)->if_hwassist = 0;
+	return (0);
+}
+
+int
+if_sethwassistbits(if_t ifp, int toset, int toclear)
+{
+	((struct ifnet *)ifp)->if_hwassist |= toset;
+	((struct ifnet *)ifp)->if_hwassist &= ~toclear;
+
+	return (0);
+}
+
+int
+if_sethwassist(if_t ifp, int hwassist_bit)
+{
+	((struct ifnet *)ifp)->if_hwassist = hwassist_bit;
+	return (0);
+}
+
+int
+if_gethwassist(if_t ifp)
+{
+	return ((struct ifnet *)ifp)->if_hwassist;
+}
+
+int
+if_setmtu(if_t ifp, int mtu)
+{
+	((struct ifnet *)ifp)->if_mtu = mtu;
+	return (0);
+}
+
+int
+if_getmtu(if_t ifp)
+{
+	return ((struct ifnet *)ifp)->if_mtu;
+}
+
+int
+if_getmtu_family(if_t ifp, int family)
+{
+	struct domain *dp;
+
+	for (dp = domains; dp; dp = dp->dom_next) {
+		if (dp->dom_family == family && dp->dom_ifmtu != NULL)
+			return (dp->dom_ifmtu((struct ifnet *)ifp));
+	}
+
+	return (((struct ifnet *)ifp)->if_mtu);
+}
+
+int
+if_setsoftc(if_t ifp, void *softc)
+{
+	((struct ifnet *)ifp)->if_softc = softc;
+	return (0);
+}
+
+void *
+if_getsoftc(if_t ifp)
+{
+	return ((struct ifnet *)ifp)->if_softc;
+}
+
+void 
+if_setrcvif(struct mbuf *m, if_t ifp)
+{
+	m->m_pkthdr.rcvif = (struct ifnet *)ifp;
+}
+
+void 
+if_setvtag(struct mbuf *m, uint16_t tag)
+{
+	m->m_pkthdr.ether_vtag = tag;	
+}
+
+uint16_t
+if_getvtag(struct mbuf *m)
+{
+
+	return (m->m_pkthdr.ether_vtag);
+}
+
+int
+if_sendq_empty(if_t ifp)
+{
+	return IFQ_DRV_IS_EMPTY(&((struct ifnet *)ifp)->if_snd);
+}
+
+struct ifaddr *
+if_getifaddr(if_t ifp)
+{
+	return ((struct ifnet *)ifp)->if_addr;
+}
+
+int
+if_getamcount(if_t ifp)
+{
+	return ((struct ifnet *)ifp)->if_amcount;
+}
+
+
+int
+if_setsendqready(if_t ifp)
+{
+	IFQ_SET_READY(&((struct ifnet *)ifp)->if_snd);
+	return (0);
+}
+
+int
+if_setsendqlen(if_t ifp, int tx_desc_count)
+{
+	IFQ_SET_MAXLEN(&((struct ifnet *)ifp)->if_snd, tx_desc_count);
+	((struct ifnet *)ifp)->if_snd.ifq_drv_maxlen = tx_desc_count;
+
+	return (0);
+}
+
+int
+if_vlantrunkinuse(if_t ifp)
+{
+	return ((struct ifnet *)ifp)->if_vlantrunk != NULL?1:0;
+}
+
+int
+if_input(if_t ifp, struct mbuf* sendmp)
+{
+	(*((struct ifnet *)ifp)->if_input)((struct ifnet *)ifp, sendmp);
+	return (0);
+
+}
+
+/* XXX */
+#ifndef ETH_ADDR_LEN
+#define ETH_ADDR_LEN 6
+#endif
+
+int 
+if_setupmultiaddr(if_t ifp, void *mta, int *cnt, int max)
+{
+	struct ifmultiaddr *ifma;
+	uint8_t *lmta = (uint8_t *)mta;
+	int mcnt = 0;
+
+	TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) {
+		if (ifma->ifma_addr->sa_family != AF_LINK)
+			continue;
+
+		if (mcnt == max)
+			break;
+
+		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
+		    &lmta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
+		mcnt++;
+	}
+	*cnt = mcnt;
+
+	return (0);
+}
+
+int
+if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max)
+{
+	int error;
+
+	if_maddr_rlock(ifp);
+	error = if_setupmultiaddr(ifp, mta, cnt, max);
+	if_maddr_runlock(ifp);
+	return (error);
+}
+
+int
+if_multiaddr_count(if_t ifp, int max)
+{
+	struct ifmultiaddr *ifma;
+	int count;
+
+	count = 0;
+	if_maddr_rlock(ifp);
+	TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) {
+		if (ifma->ifma_addr->sa_family != AF_LINK)
+			continue;
+		count++;
+		if (count == max)
+			break;
+	}
+	if_maddr_runlock(ifp);
+	return (count);
+}
+
+int
+if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg)
+{
+	struct ifmultiaddr *ifma;
+	int cnt = 0;
+
+	if_maddr_rlock(ifp);
+	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
+		cnt += filter(arg, ifma, cnt);
+	if_maddr_runlock(ifp);
+	return (cnt);
+}
+
+struct mbuf *
+if_dequeue(if_t ifp)
+{
+	struct mbuf *m;
+	IFQ_DRV_DEQUEUE(&((struct ifnet *)ifp)->if_snd, m);
+
+	return (m);
+}
+
+int
+if_sendq_prepend(if_t ifp, struct mbuf *m)
+{
+	IFQ_DRV_PREPEND(&((struct ifnet *)ifp)->if_snd, m);
+	return (0);
+}
+
+int
+if_setifheaderlen(if_t ifp, int len)
+{
+	((struct ifnet *)ifp)->if_hdrlen = len;
+	return (0);
+}
+
+caddr_t
+if_getlladdr(if_t ifp)
+{
+	return (IF_LLADDR((struct ifnet *)ifp));
+}
+
+void *
+if_gethandle(u_char type)
+{
+	return (if_alloc(type));
+}
+
+void
+if_bpfmtap(if_t ifh, struct mbuf *m)
+{
+	struct ifnet *ifp = (struct ifnet *)ifh;
+
+	BPF_MTAP(ifp, m);
+}
+
+void
+if_etherbpfmtap(if_t ifh, struct mbuf *m)
+{
+	struct ifnet *ifp = (struct ifnet *)ifh;
+
+	ETHER_BPF_MTAP(ifp, m);
+}
+
+void
+if_vlancap(if_t ifh)
+{
+	struct ifnet *ifp = (struct ifnet *)ifh;
+	VLAN_CAPABILITIES(ifp);
+}
+
+void
+if_setinitfn(if_t ifp, void (*init_fn)(void *))
+{
+	((struct ifnet *)ifp)->if_init = init_fn;
+}
+
+void
+if_setioctlfn(if_t ifp, int (*ioctl_fn)(if_t, u_long, caddr_t))
+{
+	((struct ifnet *)ifp)->if_ioctl = (void *)ioctl_fn;
+}
+
+void
+if_setstartfn(if_t ifp, void (*start_fn)(if_t))
+{
+	((struct ifnet *)ifp)->if_start = (void *)start_fn;
+}
+
+void
+if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn)
+{
+	((struct ifnet *)ifp)->if_transmit = start_fn;
+}
+
+void if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn)
+{
+	((struct ifnet *)ifp)->if_qflush = flush_fn;
+	
+}
+
+void
+if_setgetcounterfn(if_t ifp, if_get_counter_t fn)
+{
+
+	ifp->if_get_counter = fn;
+}
+
+/* Revisit these - These are inline functions originally. */
+int
+drbr_inuse_drv(if_t ifh, struct buf_ring *br)
+{
+	return drbr_inuse(ifh, br);
+}
+
+struct mbuf*
+drbr_dequeue_drv(if_t ifh, struct buf_ring *br)
+{
+	return drbr_dequeue(ifh, br);
+}
+
+int
+drbr_needs_enqueue_drv(if_t ifh, struct buf_ring *br)
+{
+	return drbr_needs_enqueue(ifh, br);
+}
+
+int
+drbr_enqueue_drv(if_t ifh, struct buf_ring *br, struct mbuf *m)
+{
+	return drbr_enqueue(ifh, br, m);
+
+}
diff --git a/freebsd/sys/net/if.h b/freebsd/sys/net/if.h
index e182db54..98ae0a82 100644
--- a/freebsd/sys/net/if.h
+++ b/freebsd/sys/net/if.h
@@ -35,10 +35,6 @@
 
 #include <sys/cdefs.h>
 
-#ifdef _KERNEL
-#include <sys/queue.h>
-#endif
-
 #if __BSD_VISIBLE
 /*
  * <net/if.h> does not depend on <sys/time.h> on most other systems.  This
@@ -49,8 +45,6 @@
 #include <sys/time.h>
 #include <sys/socket.h>
 #endif
-
-struct ifnet;
 #endif
 
 /*
@@ -80,32 +74,45 @@ struct if_clonereq {
  */
 struct if_data {
 	/* generic interface information */
-	u_char	ifi_type;		/* ethernet, tokenring, etc */
-	u_char	ifi_physical;		/* e.g., AUI, Thinnet, 10base-T, etc */
-	u_char	ifi_addrlen;		/* media address length */
-	u_char	ifi_hdrlen;		/* media header length */
-	u_char	ifi_link_state;		/* current link state */
-	u_char	ifi_spare_char1;	/* spare byte */
-	u_char	ifi_spare_char2;	/* spare byte */
-	u_char	ifi_datalen;		/* length of this data struct */
-	u_long	ifi_mtu;		/* maximum transmission unit */
-	u_long	ifi_metric;		/* routing metric (external only) */
-	u_long	ifi_baudrate;		/* linespeed */
+	uint8_t	ifi_type;		/* ethernet, tokenring, etc */
+	uint8_t	ifi_physical;		/* e.g., AUI, Thinnet, 10base-T, etc */
+	uint8_t	ifi_addrlen;		/* media address length */
+	uint8_t	ifi_hdrlen;		/* media header length */
+	uint8_t	ifi_link_state;		/* current link state */
+	uint8_t	ifi_vhid;		/* carp vhid */
+	uint16_t	ifi_datalen;	/* length of this data struct */
+	uint32_t	ifi_mtu;	/* maximum transmission unit */
+	uint32_t	ifi_metric;	/* routing metric (external only) */
+	uint64_t	ifi_baudrate;	/* linespeed */
 	/* volatile statistics */
-	u_long	ifi_ipackets;		/* packets received on interface */
-	u_long	ifi_ierrors;		/* input errors on interface */
-	u_long	ifi_opackets;		/* packets sent on interface */
-	u_long	ifi_oerrors;		/* output errors on interface */
-	u_long	ifi_collisions;		/* collisions on csma interfaces */
-	u_long	ifi_ibytes;		/* total number of octets received */
-	u_long	ifi_obytes;		/* total number of octets sent */
-	u_long	ifi_imcasts;		/* packets received via multicast */
-	u_long	ifi_omcasts;		/* packets sent via multicast */
-	u_long	ifi_iqdrops;		/* dropped on input, this interface */
-	u_long	ifi_noproto;		/* destined for unsupported protocol */
-	u_long	ifi_hwassist;		/* HW offload capabilities, see IFCAP */
-	time_t	ifi_epoch;		/* uptime at attach or stat reset */
-	struct	timeval ifi_lastchange;	/* time of last administrative change */
+	uint64_t	ifi_ipackets;	/* packets received on interface */
+	uint64_t	ifi_ierrors;	/* input errors on interface */
+	uint64_t	ifi_opackets;	/* packets sent on interface */
+	uint64_t	ifi_oerrors;	/* output errors on interface */
+	uint64_t	ifi_collisions;	/* collisions on csma interfaces */
+	uint64_t	ifi_ibytes;	/* total number of octets received */
+	uint64_t	ifi_obytes;	/* total number of octets sent */
+	uint64_t	ifi_imcasts;	/* packets received via multicast */
+	uint64_t	ifi_omcasts;	/* packets sent via multicast */
+	uint64_t	ifi_iqdrops;	/* dropped on input */
+	uint64_t	ifi_oqdrops;	/* dropped on output */
+	uint64_t	ifi_noproto;	/* destined for unsupported protocol */
+	uint64_t	ifi_hwassist;	/* HW offload capabilities, see IFCAP */
+
+	/* Unions are here to make sizes MI. */
+	union {				/* uptime at attach or stat reset */
+		time_t		tt;
+		uint64_t	ph;
+	} __ifi_epoch;
+#define	ifi_epoch	__ifi_epoch.tt
+	union {				/* time of last administrative change */
+		struct timeval	tv;
+		struct {
+			uint64_t ph1;
+			uint64_t ph2;
+		} ph;
+	} __ifi_lastchange;
+#define	ifi_lastchange	__ifi_lastchange.tv
 };
 
 /*-
@@ -135,7 +142,7 @@ struct if_data {
 #define	IFF_DEBUG	0x4		/* (n) turn on debugging */
 #define	IFF_LOOPBACK	0x8		/* (i) is a loopback net */
 #define	IFF_POINTOPOINT	0x10		/* (i) is a point-to-point link */
-#define	IFF_SMART	0x20		/* (i) interface manages own routes */
+/*			0x20		   was IFF_SMART */
 #define	IFF_DRV_RUNNING	0x40		/* (d) resources allocated */
 #define	IFF_NOARP	0x80		/* (n) no address resolution protocol */
 #define	IFF_PROMISC	0x100		/* (n) receive all packets */
@@ -153,7 +160,6 @@ struct if_data {
 #define	IFF_STATICARP	0x80000		/* (n) static ARP */
 #define	IFF_DYING	0x200000	/* (n) interface is winding down */
 #define	IFF_RENAMING	0x400000	/* (n) interface is being renamed */
-
 /*
  * Old names for driver flags so that user space tools can continue to use
  * the old (portable) names.
@@ -166,7 +172,7 @@ struct if_data {
 /* flags set internally only: */
 #define	IFF_CANTCHANGE \
 	(IFF_BROADCAST|IFF_POINTOPOINT|IFF_DRV_RUNNING|IFF_DRV_OACTIVE|\
-	    IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI|IFF_SMART|IFF_PROMISC|\
+	    IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI|IFF_PROMISC|\
 	    IFF_DYING|IFF_CANTCONFIG)
 
 /*
@@ -180,7 +186,7 @@ struct if_data {
  * Some convenience macros used for setting ifi_baudrate.
  * XXX 1000 vs. 1024? --thorpej@netbsd.org
  */
-#define	IF_Kbps(x)	((x) * 1000)		/* kilobits/sec. */
+#define	IF_Kbps(x)	((uintmax_t)(x) * 1000)	/* kilobits/sec. */
 #define	IF_Mbps(x)	(IF_Kbps((x) * 1000))	/* megabits/sec. */
 #define	IF_Gbps(x)	(IF_Mbps((x) * 1000))	/* gigabits/sec. */
 
@@ -232,6 +238,7 @@ struct if_data {
 #define	IFCAP_NETMAP		0x100000 /* netmap mode supported/enabled */
 #define	IFCAP_RXCSUM_IPV6	0x200000  /* can offload checksum on IPv6 RX */
 #define	IFCAP_TXCSUM_IPV6	0x400000  /* can offload checksum on IPv6 TX */
+#define	IFCAP_HWSTATS		0x800000 /* manages counters internally */
 
 #define IFCAP_HWCSUM_IPV6	(IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6)
 
@@ -297,7 +304,7 @@ struct ifa_msghdr {
 	int	ifam_addrs;	/* like rtm_addrs */
 	int	ifam_flags;	/* value of ifa_flags */
 	u_short	ifam_index;	/* index for associated ifp */
-	int	ifam_metric;	/* value of ifa_metric */
+	int	ifam_metric;	/* value of ifa_ifp->if_metric */
 };
 
 /*
@@ -322,7 +329,7 @@ struct ifa_msghdrl {
 	u_short _ifam_spare1;	/* spare space to grow if_index, see if_var.h */
 	u_short	ifam_len;	/* length of ifa_msghdrl incl. if_data */
 	u_short	ifam_data_off;	/* offset of if_data from beginning */
-	int	ifam_metric;	/* value of ifa_metric */
+	int	ifam_metric;	/* value of ifa_ifp->if_metric */
 	struct	if_data ifam_data;/* statistics and other data about if or
 				 * address */
 };
@@ -386,6 +393,7 @@ struct	ifreq {
 		caddr_t	ifru_data;
 		int	ifru_cap[2];
 		u_int	ifru_fib;
+		u_char	ifru_vlan_pcp;
 	} ifr_ifru;
 #define	ifr_addr	ifr_ifru.ifru_addr	/* address */
 #define	ifr_dstaddr	ifr_ifru.ifru_dstaddr	/* other end of p-to-p link */
@@ -403,6 +411,7 @@ struct	ifreq {
 #define	ifr_curcap	ifr_ifru.ifru_cap[1]	/* current capabilities */
 #define	ifr_index	ifr_ifru.ifru_index	/* interface index */
 #define	ifr_fib		ifr_ifru.ifru_fib	/* interface fib */
+#define	ifr_vlan_pcp	ifr_ifru.ifru_vlan_pcp	/* VLAN priority */
 };
 
 #define	_SIZEOF_ADDR_IFREQ(ifr) \
@@ -415,6 +424,15 @@ struct ifaliasreq {
 	struct	sockaddr ifra_addr;
 	struct	sockaddr ifra_broadaddr;
 	struct	sockaddr ifra_mask;
+	int	ifra_vhid;
+};
+
+/* 9.x compat */
+struct oifaliasreq {
+	char	ifra_name[IFNAMSIZ];
+	struct	sockaddr ifra_addr;
+	struct	sockaddr ifra_broadaddr;
+	struct	sockaddr ifra_mask;
 };
 
 struct ifmediareq {
@@ -495,16 +513,17 @@ struct ifgroupreq {
 };
 
 /*
- * Structure for SIOC[AGD]LIFADDR
+ * Structure used to request i2c data
+ * from interface transceivers.
  */
-struct if_laddrreq {
-	char	iflr_name[IFNAMSIZ];
-	u_int	flags;
-#define	IFLR_PREFIX	0x8000  /* in: prefix given  out: kernel fills id */
-	u_int	prefixlen;         /* in/out */
-	struct	sockaddr_storage addr;   /* in/out */
-	struct	sockaddr_storage dstaddr; /* out */
-};
+struct ifi2creq {
+	uint8_t dev_addr;	/* i2c address (0xA0, 0xA2) */
+	uint8_t offset;		/* read offset */
+	uint8_t len;		/* read length */
+	uint8_t spare0;
+	uint32_t spare1;
+	uint8_t data[8];	/* read buffer */
+}; 
 
 #endif /* __BSD_VISIBLE */
 
@@ -528,10 +547,4 @@ struct if_nameindex	*if_nameindex(void);
 unsigned int		 if_nametoindex(const char *);
 __END_DECLS
 #endif
-
-#ifdef _KERNEL
-/* XXX - this should go away soon. */
-#include <net/if_var.h>
-#endif
-
 #endif /* !_NET_IF_H_ */
diff --git a/freebsd/sys/net/if_arc.h b/freebsd/sys/net/if_arc.h
index 88a72403..23139aa6 100644
--- a/freebsd/sys/net/if_arc.h
+++ b/freebsd/sys/net/if_arc.h
@@ -133,7 +133,7 @@ void	arc_storelladdr(struct ifnet *, u_int8_t);
 int	arc_isphds(u_int8_t);
 void	arc_input(struct ifnet *, struct mbuf *);
 int	arc_output(struct ifnet *, struct mbuf *,
-	    struct sockaddr *, struct route *);
+	    const struct sockaddr *, struct route *);
 int	arc_ioctl(struct ifnet *, u_long, caddr_t);
 
 void		arc_frag_init(struct ifnet *);
diff --git a/freebsd/sys/net/if_arcsubr.c b/freebsd/sys/net/if_arcsubr.c
index fae432ad..1954e262 100644
--- a/freebsd/sys/net/if_arcsubr.c
+++ b/freebsd/sys/net/if_arcsubr.c
@@ -42,7 +42,6 @@
  */
 #include <rtems/bsd/local/opt_inet.h>
 #include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
 
 #include <rtems/bsd/sys/param.h>
 #include <sys/systm.h>
@@ -59,6 +58,7 @@
 #include <machine/cpu.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/if_dl.h>
@@ -78,11 +78,6 @@
 #include <netinet6/nd6.h>
 #endif
 
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
 #define ARCNET_ALLOW_BROKEN_ARP
 
 static struct mbuf *arc_defrag(struct ifnet *, struct mbuf *);
@@ -94,8 +89,7 @@ u_int8_t  arcbroadcastaddr = 0;
 #define ARC_LLADDR(ifp)	(*(u_int8_t *)IF_LLADDR(ifp))
 
 #define senderr(e) { error = (e); goto bad;}
-#define SIN(s)	((struct sockaddr_in *)s)
-#define SIPX(s)	((struct sockaddr_ipx *)s)
+#define SIN(s)	((const struct sockaddr_in *)(s))
 
 /*
  * ARCnet output routine.
@@ -103,7 +97,7 @@ u_int8_t  arcbroadcastaddr = 0;
  * Assumes that ifp is actually pointer to arccom structure.
  */
 int
-arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+arc_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	struct arc_header	*ah;
@@ -112,7 +106,7 @@ arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
 	int			loop_copy = 0;
 	int			isphds;
 #if defined(INET) || defined(INET6)
-	struct llentry		*lle;
+	int			is_gw = 0;
 #endif
 
 	if (!((ifp->if_flags & IFF_UP) &&
@@ -120,6 +114,10 @@ arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
 		return(ENETDOWN); /* m, m1 aren't initialized yet */
 
 	error = 0;
+#if defined(INET) || defined(INET6)
+	if (ro != NULL)
+		is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
+#endif
 
 	switch (dst->sa_family) {
 #ifdef INET
@@ -133,8 +131,8 @@ arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
 		else if (ifp->if_flags & IFF_NOARP)
 			adst = ntohl(SIN(dst)->sin_addr.s_addr) & 0xFF;
 		else {
-			error = arpresolve(ifp, ro ? ro->ro_rt : NULL,
-			                   m, dst, &adst, &lle);
+			error = arpresolve(ifp, is_gw, m, dst, &adst, NULL,
+			    NULL);
 			if (error)
 				return (error == EWOULDBLOCK ? 0 : error);
 		}
@@ -172,24 +170,23 @@ arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
 #endif
 #ifdef INET6
 	case AF_INET6:
-		error = nd6_storelladdr(ifp, m, dst, (u_char *)&adst, &lle);
-		if (error)
-			return (error);
+		if ((m->m_flags & M_MCAST) != 0)
+			adst = arcbroadcastaddr; /* ARCnet broadcast address */
+		else {
+			error = nd6_resolve(ifp, is_gw, m, dst, &adst, NULL,
+			    NULL);
+			if (error != 0)
+				return (error == EWOULDBLOCK ? 0 : error);
+		}
 		atype = ARCTYPE_INET6;
 		break;
 #endif
-#ifdef IPX
-	case AF_IPX:
-		adst = SIPX(dst)->sipx_addr.x_host.c_host[5];
-		atype = ARCTYPE_IPX;
-		if (adst == 0xff)
-			adst = arcbroadcastaddr;
-		break;
-#endif
-
 	case AF_UNSPEC:
+	    {
+		const struct arc_header *ah;
+
 		loop_copy = -1;
-		ah = (struct arc_header *)dst->sa_data;
+		ah = (const struct arc_header *)dst->sa_data;
 		adst = ah->arc_dhost;
 		atype = ah->arc_type;
 
@@ -209,15 +206,15 @@ arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
 #endif
 		}
 		break;
-
+	    }
 	default:
 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
 		senderr(EAFNOSUPPORT);
 	}
 
 	isphds = arc_isphds(atype);
-	M_PREPEND(m, isphds ? ARC_HDRNEWLEN : ARC_HDRLEN, M_DONTWAIT);
-	if (m == 0)
+	M_PREPEND(m, isphds ? ARC_HDRNEWLEN : ARC_HDRLEN, M_NOWAIT);
+	if (m == NULL)
 		senderr(ENOBUFS);
 	ah = mtod(m, struct arc_header *);
 	ah->arc_type = atype;
@@ -268,12 +265,12 @@ arc_frag_next(struct ifnet *ifp)
 	struct arc_header *ah;
 
 	ac = (struct arccom *)ifp->if_l2com;
-	if ((m = ac->curr_frag) == 0) {
+	if ((m = ac->curr_frag) == NULL) {
 		int tfrags;
 
 		/* dequeue new packet */
 		IF_DEQUEUE(&ifp->if_snd, m);
-		if (m == 0)
+		if (m == NULL)
 			return 0;
 
 		ah = mtod(m, struct arc_header *);
@@ -281,7 +278,7 @@ arc_frag_next(struct ifnet *ifp)
 			return m;
 
 		++ac->ac_seqid;		/* make the seqid unique */
-		tfrags = (m->m_pkthdr.len + ARC_MAX_DATA - 1) / ARC_MAX_DATA;
+		tfrags = howmany(m->m_pkthdr.len, ARC_MAX_DATA);
 		ac->fsflag = 2 * tfrags - 3;
 		ac->sflag = 0;
 		ac->rsflag = ac->fsflag;
@@ -296,14 +293,14 @@ arc_frag_next(struct ifnet *ifp)
 	/* split out next fragment and return it */
 	if (ac->sflag < ac->fsflag) {
 		/* we CAN'T have short packets here */
-		ac->curr_frag = m_split(m, ARC_MAX_DATA, M_DONTWAIT);
+		ac->curr_frag = m_split(m, ARC_MAX_DATA, M_NOWAIT);
 		if (ac->curr_frag == 0) {
 			m_freem(m);
 			return 0;
 		}
 
-		M_PREPEND(m, ARC_HDRNEWLEN, M_DONTWAIT);
-		if (m == 0) {
+		M_PREPEND(m, ARC_HDRNEWLEN, M_NOWAIT);
+		if (m == NULL) {
 			m_freem(ac->curr_frag);
 			ac->curr_frag = 0;
 			return 0;
@@ -321,8 +318,8 @@ arc_frag_next(struct ifnet *ifp)
 	    ARC_MAX_FORBID_LEN - ARC_HDRNEWLEN + 2)) {
 		ac->curr_frag = 0;
 
-		M_PREPEND(m, ARC_HDRNEWLEN_EXC, M_DONTWAIT);
-		if (m == 0)
+		M_PREPEND(m, ARC_HDRNEWLEN_EXC, M_NOWAIT);
+		if (m == NULL)
 			return 0;
 
 		ah = mtod(m, struct arc_header *);
@@ -334,8 +331,8 @@ arc_frag_next(struct ifnet *ifp)
 	} else {
 		ac->curr_frag = 0;
 
-		M_PREPEND(m, ARC_HDRNEWLEN, M_DONTWAIT);
-		if (m == 0)
+		M_PREPEND(m, ARC_HDRNEWLEN, M_NOWAIT);
+		if (m == NULL)
 			return 0;
 
 		ah = mtod(m, struct arc_header *);
@@ -352,7 +349,7 @@ arc_frag_next(struct ifnet *ifp)
 
 /*
  * Defragmenter. Returns mbuf if last packet found, else
- * NULL. frees imcoming mbuf as necessary.
+ * NULL. frees incoming mbuf as necessary.
  */
 
 static __inline struct mbuf *
@@ -371,7 +368,7 @@ arc_defrag(struct ifnet *ifp, struct mbuf *m)
 	if (m->m_len < ARC_HDRNEWLEN) {
 		m = m_pullup(m, ARC_HDRNEWLEN);
 		if (m == NULL) {
-			++ifp->if_ierrors;
+			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			return NULL;
 		}
 	}
@@ -391,7 +388,7 @@ arc_defrag(struct ifnet *ifp, struct mbuf *m)
 		if (m->m_len < ARC_HDRNEWLEN) {
 			m = m_pullup(m, ARC_HDRNEWLEN);
 			if (m == NULL) {
-				++ifp->if_ierrors;
+				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 				return NULL;
 			}
 		}
@@ -544,11 +541,11 @@ arc_input(struct ifnet *ifp, struct mbuf *m)
 		return;
 	}
 
-	ifp->if_ibytes += m->m_pkthdr.len;
+	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 
 	if (ah->arc_dhost == arcbroadcastaddr) {
 		m->m_flags |= M_BCAST|M_MCAST;
-		ifp->if_imcasts++;
+		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 	}
 
 	atype = ah->arc_type;
@@ -556,15 +553,11 @@ arc_input(struct ifnet *ifp, struct mbuf *m)
 #ifdef INET
 	case ARCTYPE_IP:
 		m_adj(m, ARC_HDRNEWLEN);
-		if ((m = ip_fastforward(m)) == NULL)
-			return;
 		isr = NETISR_IP;
 		break;
 
 	case ARCTYPE_IP_OLD:
 		m_adj(m, ARC_HDRLEN);
-		if ((m = ip_fastforward(m)) == NULL)
-			return;
 		isr = NETISR_IP;
 		break;
 
@@ -600,12 +593,6 @@ arc_input(struct ifnet *ifp, struct mbuf *m)
 		isr = NETISR_IPV6;
 		break;
 #endif
-#ifdef IPX
-	case ARCTYPE_IPX:
-		m_adj(m, ARC_HDRNEWLEN);
-		isr = NETISR_IPX;
-		break;
-#endif
 	default:
 		m_freem(m);
 		return;
@@ -640,11 +627,7 @@ arc_ifattach(struct ifnet *ifp, u_int8_t lla)
 	ifp->if_resolvemulti = arc_resolvemulti;
 	if (ifp->if_baudrate == 0)
 		ifp->if_baudrate = 2500000;
-#if __FreeBSD_version < 500000
-	ifa = ifnet_addrs[ifp->if_index - 1];
-#else
 	ifa = ifp->if_addr;
-#endif
 	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	sdl->sdl_type = IFT_ARCNET;
@@ -691,26 +674,6 @@ arc_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 			arp_ifinit(ifp, ifa);
 			break;
 #endif
-#ifdef IPX
-		/*
-		 * XXX This code is probably wrong
-		 */
-		case AF_IPX:
-		{
-			struct ipx_addr *ina = &(IA_SIPX(ifa)->sipx_addr);
-
-			if (ipx_nullhost(*ina))
-				ina->x_host.c_host[5] = ARC_LLADDR(ifp);
-			else
-				arc_storelladdr(ifp, ina->x_host.c_host[5]);
-
-			/*
-			 * Set new address
-			 */
-			ifp->if_init(ifp->if_softc);
-			break;
-		}
-#endif
 		default:
 			ifp->if_init(ifp->if_softc);
 			break;
@@ -781,21 +744,14 @@ arc_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
 		sdl = (struct sockaddr_dl *)sa;
 		if (*LLADDR(sdl) != arcbroadcastaddr)
 			return EADDRNOTAVAIL;
-		*llsa = 0;
+		*llsa = NULL;
 		return 0;
 #ifdef INET
 	case AF_INET:
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 			return EADDRNOTAVAIL;
-		sdl = malloc(sizeof *sdl, M_IFMADDR,
-		       M_NOWAIT | M_ZERO);
-		if (sdl == NULL)
-			return ENOMEM;
-		sdl->sdl_len = sizeof *sdl;
-		sdl->sdl_family = AF_LINK;
-		sdl->sdl_index = ifp->if_index;
-		sdl->sdl_type = IFT_ARCNET;
+		sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 		sdl->sdl_alen = ARC_ADDR_LEN;
 		*LLADDR(sdl) = 0;
 		*llsa = (struct sockaddr *)sdl;
@@ -811,19 +767,12 @@ arc_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
 			 * (This is used for multicast routers.)
 			 */
 			ifp->if_flags |= IFF_ALLMULTI;
-			*llsa = 0;
+			*llsa = NULL;
 			return 0;
 		}
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 			return EADDRNOTAVAIL;
-		sdl = malloc(sizeof *sdl, M_IFMADDR,
-		       M_NOWAIT | M_ZERO);
-		if (sdl == NULL)
-			return ENOMEM;
-		sdl->sdl_len = sizeof *sdl;
-		sdl->sdl_family = AF_LINK;
-		sdl->sdl_index = ifp->if_index;
-		sdl->sdl_type = IFT_ARCNET;
+		sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 		sdl->sdl_alen = ARC_ADDR_LEN;
 		*LLADDR(sdl) = 0;
 		*llsa = (struct sockaddr *)sdl;
diff --git a/freebsd/sys/net/if_arp.h b/freebsd/sys/net/if_arp.h
index 38c64020..7d141f37 100644
--- a/freebsd/sys/net/if_arp.h
+++ b/freebsd/sys/net/if_arp.h
@@ -97,43 +97,37 @@ struct arpreq {
 #define	ATF_PUBL	0x08	/* publish entry (respond for other host) */
 #define	ATF_USETRAILERS	0x10	/* has requested trailers */
 
-#ifdef _KERNEL
-/*
- * Structure shared between the ethernet driver modules and
- * the address resolution code.
- */
-struct	arpcom {
-	struct 	ifnet *ac_ifp;		/* network-visible interface */
-	void	*ac_netgraph;		/* ng_ether(4) netgraph node info */
-};
-#define IFP2AC(ifp) ((struct arpcom *)(ifp->if_l2com))
-#define AC2IFP(ac) ((ac)->ac_ifp)
-
-#endif /* _KERNEL */
-
 struct arpstat {
 	/* Normal things that happen: */
-	u_long txrequests;	/* # of ARP requests sent by this host. */
-	u_long txreplies;	/* # of ARP replies sent by this host. */
-	u_long rxrequests;	/* # of ARP requests received by this host. */
-	u_long rxreplies;	/* # of ARP replies received by this host. */
-	u_long received;	/* # of ARP packets received by this host. */
+	uint64_t txrequests;	/* # of ARP requests sent by this host. */
+	uint64_t txreplies;	/* # of ARP replies sent by this host. */
+	uint64_t rxrequests;	/* # of ARP requests received by this host. */
+	uint64_t rxreplies;	/* # of ARP replies received by this host. */
+	uint64_t received;	/* # of ARP packets received by this host. */
 
-	u_long arp_spares[4];	/* For either the upper or lower half. */
+	uint64_t arp_spares[4];	/* For either the upper or lower half. */
 	/* Abnormal event and error  counting: */
-	u_long dropped;		/* # of packets dropped waiting for a reply. */
-	u_long timeouts;	/* # of times with entries removed */
+	uint64_t dropped;	/* # of packets dropped waiting for a reply. */
+	uint64_t timeouts;	/* # of times with entries removed */
 				/* due to timeout. */
-	u_long dupips;		/* # of duplicate IPs detected. */
+	uint64_t dupips;	/* # of duplicate IPs detected. */
 };
 
+#ifdef _KERNEL
+#include <sys/counter.h>
+#include <net/vnet.h>
+
+VNET_PCPUSTAT_DECLARE(struct arpstat, arpstat);
 /*
  * In-kernel consumers can use these accessor macros directly to update
  * stats.
  */
-#define	ARPSTAT_ADD(name, val)	V_arpstat.name += (val)
-#define	ARPSTAT_SUB(name, val)	V_arpstat.name -= (val)
+#define	ARPSTAT_ADD(name, val)	\
+    VNET_PCPUSTAT_ADD(struct arpstat, arpstat, name, (val))
+#define	ARPSTAT_SUB(name, val)	ARPSTAT_ADD(name, -(val))
 #define	ARPSTAT_INC(name)	ARPSTAT_ADD(name, 1)
 #define	ARPSTAT_DEC(name)	ARPSTAT_SUB(name, 1)
 
+#endif /* _KERNEL */
+
 #endif /* !_NET_IF_ARP_H_ */
diff --git a/freebsd/sys/net/if_atm.h b/freebsd/sys/net/if_atm.h
index e8f69da0..a0900eee 100644
--- a/freebsd/sys/net/if_atm.h
+++ b/freebsd/sys/net/if_atm.h
@@ -96,7 +96,7 @@ struct ifatm_mib {
 
 /*
  * Traffic parameters for ATM connections. This contains all parameters
- * to accomodate UBR, UBR+MCR, CBR, VBR and ABR connections.
+ * to accommodate UBR, UBR+MCR, CBR, VBR and ABR connections.
  *
  * Keep in sync with ng_atm.h
  */
@@ -292,7 +292,7 @@ void	atm_ifattach(struct ifnet *);
 void	atm_ifdetach(struct ifnet *);
 void	atm_input(struct ifnet *, struct atm_pseudohdr *,
 	    struct mbuf *, void *);
-int	atm_output(struct ifnet *, struct mbuf *, struct sockaddr *, 
+int	atm_output(struct ifnet *, struct mbuf *, const struct sockaddr *, 
 	    struct route *);
 struct atmio_vcctable *atm_getvccs(struct atmio_vcc **, u_int, u_int,
 	    struct mtx *, int);
diff --git a/freebsd/sys/net/if_atmsubr.c b/freebsd/sys/net/if_atmsubr.c
index a4cbeb09..fff233c4 100644
--- a/freebsd/sys/net/if_atmsubr.c
+++ b/freebsd/sys/net/if_atmsubr.c
@@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/malloc.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/if_dl.h>
@@ -123,7 +124,7 @@ static MALLOC_DEFINE(M_IFATM, "ifatm", "atm interface internals");
  *		ro->ro_rt must also be NULL.
  */
 int
-atm_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
+atm_output(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
     struct route *ro)
 {
 	u_int16_t etype = 0;			/* if using LLC/SNAP */
@@ -131,7 +132,7 @@ atm_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
 	struct atm_pseudohdr atmdst, *ad;
 	struct mbuf *m = m0;
 	struct atmllc *atmllc;
-	struct atmllc *llc_hdr = NULL;
+	const struct atmllc *llc_hdr = NULL;
 	u_int32_t atm_flags;
 
 #ifdef MAC
@@ -175,7 +176,7 @@ atm_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
 			 * (atm pseudo header (4) + LLC/SNAP (8))
 			 */
 			bcopy(dst->sa_data, &atmdst, sizeof(atmdst));
-			llc_hdr = (struct atmllc *)(dst->sa_data +
+			llc_hdr = (const struct atmllc *)(dst->sa_data +
 			    sizeof(atmdst));
 			break;
 			
@@ -192,8 +193,8 @@ atm_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
 		atm_flags = ATM_PH_FLAGS(&atmdst);
 		if (atm_flags & ATM_PH_LLCSNAP)
 			sz += 8;	/* sizeof snap == 8 */
-		M_PREPEND(m, sz, M_DONTWAIT);
-		if (m == 0)
+		M_PREPEND(m, sz, M_NOWAIT);
+		if (m == NULL)
 			senderr(ENOBUFS);
 		ad = mtod(m, struct atm_pseudohdr *);
 		*ad = atmdst;
@@ -253,7 +254,7 @@ atm_input(struct ifnet *ifp, struct atm_pseudohdr *ah, struct mbuf *m,
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
-	ifp->if_ibytes += m->m_pkthdr.len;
+	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 
 	if (ng_atm_input_p != NULL) {
 		(*ng_atm_input_p)(ifp, &m, ah, rxhand);
@@ -296,7 +297,7 @@ atm_input(struct ifnet *ifp, struct atm_pseudohdr *ah, struct mbuf *m,
 			struct atmllc *alc;
 
 			if (m->m_len < sizeof(*alc) &&
-			    (m = m_pullup(m, sizeof(*alc))) == 0)
+			    (m = m_pullup(m, sizeof(*alc))) == NULL)
 				return; /* failed */
 			alc = mtod(m, struct atmllc *);
 			if (bcmp(alc, ATMLLC_HDR, 6)) {
diff --git a/freebsd/sys/net/if_bridge.c b/freebsd/sys/net/if_bridge.c
index 65553092..77b376b9 100644
--- a/freebsd/sys/net/if_bridge.c
+++ b/freebsd/sys/net/if_bridge.c
@@ -73,7 +73,7 @@
  *	- Currently only supports Ethernet-like interfaces (Ethernet,
  *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
  *	  to bridge other types of interfaces (FDDI-FDDI, and maybe
- *	  consider heterogenous bridges).
+ *	  consider heterogeneous bridges).
  */
 
 #include <sys/cdefs.h>
@@ -83,6 +83,7 @@ __FBSDID("$FreeBSD$");
 #include <rtems/bsd/local/opt_inet6.h>
 
 #include <rtems/bsd/sys/param.h>
+#include <sys/eventhandler.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/protosw.h>
@@ -102,7 +103,6 @@ __FBSDID("$FreeBSD$");
 #include <sys/proc.h>
 #include <rtems/bsd/sys/lock.h>
 #include <sys/mutex.h>
-#include <sys/rwlock.h>
 
 #include <net/bpf.h>
 #include <net/if.h>
@@ -113,7 +113,7 @@ __FBSDID("$FreeBSD$");
 #include <net/pfil.h>
 #include <net/vnet.h>
 
-#include <netinet/in.h> /* for struct arpcom */
+#include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
@@ -127,15 +127,13 @@ __FBSDID("$FreeBSD$");
 #include <netinet/ip_carp.h>
 #endif
 #include <machine/in_cksum.h>
-#include <netinet/if_ether.h> /* for struct arpcom */
+#include <netinet/if_ether.h>
 #include <net/bridgestp.h>
 #include <net/if_bridgevar.h>
 #include <net/if_llc.h>
 #include <net/if_vlan_var.h>
 
 #include <net/route.h>
-#include <netinet/ip_fw.h>
-#include <netpfil/ipfw/ip_fw_private.h>
 
 /*
  * Size of the route hash table.  Must be a power of two.
@@ -170,7 +168,8 @@ __FBSDID("$FreeBSD$");
 /*
  * List of capabilities to possibly mask on the member interface.
  */
-#define	BRIDGE_IFCAPS_MASK		(IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM)
+#define	BRIDGE_IFCAPS_MASK		(IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM|\
+					 IFCAP_TXCSUM_IPV6)
 
 /*
  * List of capabilities to strip
@@ -230,8 +229,9 @@ struct bridge_softc {
 	u_char			sc_defaddr[6];	/* Default MAC address */
 };
 
-static struct mtx 	bridge_list_mtx;
-eventhandler_tag	bridge_detach_cookie = NULL;
+static VNET_DEFINE(struct mtx, bridge_list_mtx);
+#define	V_bridge_list_mtx	VNET(bridge_list_mtx)
+static eventhandler_tag bridge_detach_cookie;
 
 int	bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
 
@@ -248,11 +248,12 @@ static void	bridge_ifdetach(void *arg __unused, struct ifnet *);
 static void	bridge_init(void *);
 static void	bridge_dummynet(struct mbuf *, struct ifnet *);
 static void	bridge_stop(struct ifnet *, int);
-static void	bridge_start(struct ifnet *);
+static int	bridge_transmit(struct ifnet *, struct mbuf *);
+static void	bridge_qflush(struct ifnet *);
 static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
 static int	bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *,
 		    struct rtentry *);
-static void	bridge_enqueue(struct bridge_softc *, struct ifnet *,
+static int	bridge_enqueue(struct bridge_softc *, struct ifnet *,
 		    struct mbuf *);
 static void	bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
 
@@ -275,7 +276,7 @@ static void	bridge_rtflush(struct bridge_softc *, int);
 static int	bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
 		    uint16_t);
 
-static int	bridge_rtable_init(struct bridge_softc *);
+static void	bridge_rtable_init(struct bridge_softc *);
 static void	bridge_rtable_fini(struct bridge_softc *);
 
 static int	bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
@@ -353,43 +354,64 @@ static struct bstp_cb_ops bridge_ops = {
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
 
-static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */
-static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */
-static int pfil_member = 1; /* run pfil hooks on the member interface */
-static int pfil_ipfw = 0;   /* layer2 filter with ipfw */
-static int pfil_ipfw_arp = 0;   /* layer2 filter with ipfw */
-static int pfil_local_phys = 0; /* run pfil hooks on the physical interface for
-                                   locally destined packets */
-static int log_stp   = 0;   /* log STP state changes */
-static int bridge_inherit_mac = 0;   /* share MAC with first bridge member */
-TUNABLE_INT("net.link.bridge.pfil_onlyip", &pfil_onlyip);
-SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW,
-    &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled");
-TUNABLE_INT("net.link.bridge.ipfw_arp", &pfil_ipfw_arp);
-SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp, CTLFLAG_RW,
-    &pfil_ipfw_arp, 0, "Filter ARP packets through IPFW layer2");
-TUNABLE_INT("net.link.bridge.pfil_bridge", &pfil_bridge);
-SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW,
-    &pfil_bridge, 0, "Packet filter on the bridge interface");
-TUNABLE_INT("net.link.bridge.pfil_member", &pfil_member);
-SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW,
-    &pfil_member, 0, "Packet filter on the member interface");
-TUNABLE_INT("net.link.bridge.pfil_local_phys", &pfil_local_phys);
-SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys, CTLFLAG_RW,
-    &pfil_local_phys, 0,
+/* only pass IP[46] packets when pfil is enabled */
+static VNET_DEFINE(int, pfil_onlyip) = 1;
+#define	V_pfil_onlyip	VNET(pfil_onlyip)
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip,
+    CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_onlyip), 0,
+    "Only pass IP packets when pfil is enabled");
+
+/* run pfil hooks on the bridge interface */
+static VNET_DEFINE(int, pfil_bridge) = 1;
+#define	V_pfil_bridge	VNET(pfil_bridge)
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge,
+    CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_bridge), 0,
+    "Packet filter on the bridge interface");
+
+/* layer2 filter with ipfw */
+static VNET_DEFINE(int, pfil_ipfw);
+#define	V_pfil_ipfw	VNET(pfil_ipfw)
+
+/* layer2 ARP filter with ipfw */
+static VNET_DEFINE(int, pfil_ipfw_arp);
+#define	V_pfil_ipfw_arp	VNET(pfil_ipfw_arp)
+SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp,
+    CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_ipfw_arp), 0,
+    "Filter ARP packets through IPFW layer2");
+
+/* run pfil hooks on the member interface */
+static VNET_DEFINE(int, pfil_member) = 1;
+#define	V_pfil_member	VNET(pfil_member)
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member,
+    CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_member), 0,
+    "Packet filter on the member interface");
+
+/* run pfil hooks on the physical interface for locally destined packets */
+static VNET_DEFINE(int, pfil_local_phys);
+#define	V_pfil_local_phys	VNET(pfil_local_phys)
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys,
+    CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_local_phys), 0,
     "Packet filter on the physical interface for locally destined packets");
-TUNABLE_INT("net.link.bridge.log_stp", &log_stp);
-SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
-    &log_stp, 0, "Log STP state changes");
-TUNABLE_INT("net.link.bridge.inherit_mac", &bridge_inherit_mac);
-SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac, CTLFLAG_RW,
-    &bridge_inherit_mac, 0,
+
+/* log STP state changes */
+static VNET_DEFINE(int, log_stp);
+#define	V_log_stp	VNET(log_stp)
+SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp,
+    CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(log_stp), 0,
+    "Log STP state changes");
+
+/* share MAC with first bridge member */
+static VNET_DEFINE(int, bridge_inherit_mac);
+#define	V_bridge_inherit_mac	VNET(bridge_inherit_mac)
+SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
+    CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(bridge_inherit_mac), 0,
     "Inherit MAC address from the first bridge member");
 
 static VNET_DEFINE(int, allow_llz_overlap) = 0;
 #define	V_allow_llz_overlap	VNET(allow_llz_overlap)
-SYSCTL_VNET_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap, CTLFLAG_RW,
-    &VNET_NAME(allow_llz_overlap), 0, "Allow overlap of link-local scope "
+SYSCTL_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap,
+    CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(allow_llz_overlap), 0,
+    "Allow overlap of link-local scope "
     "zones of a bridge interface and the member interfaces");
 
 struct bridge_control {
@@ -487,12 +509,43 @@ const struct bridge_control bridge_control_table[] = {
 	  BC_F_COPYIN|BC_F_SUSER },
 
 };
-const int bridge_control_table_size =
-    sizeof(bridge_control_table) / sizeof(bridge_control_table[0]);
+const int bridge_control_table_size = nitems(bridge_control_table);
+
+static VNET_DEFINE(LIST_HEAD(, bridge_softc), bridge_list);
+#define	V_bridge_list	VNET(bridge_list)
+#define	BRIDGE_LIST_LOCK_INIT(x)	mtx_init(&V_bridge_list_mtx,	\
+					    "if_bridge list", NULL, MTX_DEF)
+#define	BRIDGE_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_bridge_list_mtx)
+#define	BRIDGE_LIST_LOCK(x)		mtx_lock(&V_bridge_list_mtx)
+#define	BRIDGE_LIST_UNLOCK(x)		mtx_unlock(&V_bridge_list_mtx)
+
+static VNET_DEFINE(struct if_clone *, bridge_cloner);
+#define	V_bridge_cloner	VNET(bridge_cloner)
 
-LIST_HEAD(, bridge_softc) bridge_list;
+static const char bridge_name[] = "bridge";
+
+static void
+vnet_bridge_init(const void *unused __unused)
+{
+
+	BRIDGE_LIST_LOCK_INIT();
+	LIST_INIT(&V_bridge_list);
+	V_bridge_cloner = if_clone_simple(bridge_name,
+	    bridge_clone_create, bridge_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_bridge_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+    vnet_bridge_init, NULL);
+
+static void
+vnet_bridge_uninit(const void *unused __unused)
+{
 
-IFC_SIMPLE_DECLARE(bridge, 0);
+	if_clone_detach(V_bridge_cloner);
+	V_bridge_cloner = NULL;
+	BRIDGE_LIST_LOCK_DESTROY();
+}
+VNET_SYSUNINIT(vnet_bridge_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY,
+    vnet_bridge_uninit, NULL);
 
 static int
 bridge_modevent(module_t mod, int type, void *data)
@@ -500,12 +553,9 @@ bridge_modevent(module_t mod, int type, void *data)
 
 	switch (type) {
 	case MOD_LOAD:
-		mtx_init(&bridge_list_mtx, "if_bridge list", NULL, MTX_DEF);
-		if_clone_attach(&bridge_cloner);
 		bridge_rtnode_zone = uma_zcreate("bridge_rtnode",
 		    sizeof(struct bridge_rtnode), NULL, NULL, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
-		LIST_INIT(&bridge_list);
 		bridge_input_p = bridge_input;
 		bridge_output_p = bridge_output;
 		bridge_dn_p = bridge_dummynet;
@@ -517,13 +567,11 @@ bridge_modevent(module_t mod, int type, void *data)
 	case MOD_UNLOAD:
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event,
 		    bridge_detach_cookie);
-		if_clone_detach(&bridge_cloner);
 		uma_zdestroy(bridge_rtnode_zone);
 		bridge_input_p = NULL;
 		bridge_output_p = NULL;
 		bridge_dn_p = NULL;
 		bridge_linkstate_p = NULL;
-		mtx_destroy(&bridge_list_mtx);
 		break;
 	default:
 		return (EOPNOTSUPP);
@@ -541,19 +589,19 @@ DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_DEPEND(if_bridge, bridgestp, 1, 1, 1);
 
 /*
- * handler for net.link.bridge.pfil_ipfw
+ * handler for net.link.bridge.ipfw
  */
 static int
 sysctl_pfil_ipfw(SYSCTL_HANDLER_ARGS)
 {
-	int enable = pfil_ipfw;
+	int enable = V_pfil_ipfw;
 	int error;
 
 	error = sysctl_handle_int(oidp, &enable, 0, req);
-	enable = (enable) ? 1 : 0;
+	enable &= 1;
 
-	if (enable != pfil_ipfw) {
-		pfil_ipfw = enable;
+	if (enable != V_pfil_ipfw) {
+		V_pfil_ipfw = enable;
 
 		/*
 		 * Disable pfil so that ipfw doesnt run twice, if the user
@@ -561,17 +609,19 @@ sysctl_pfil_ipfw(SYSCTL_HANDLER_ARGS)
 		 * pfil_member. Also allow non-ip packets as ipfw can filter by
 		 * layer2 type.
 		 */
-		if (pfil_ipfw) {
-			pfil_onlyip = 0;
-			pfil_bridge = 0;
-			pfil_member = 0;
+		if (V_pfil_ipfw) {
+			V_pfil_onlyip = 0;
+			V_pfil_bridge = 0;
+			V_pfil_member = 0;
 		}
 	}
 
 	return (error);
 }
-SYSCTL_PROC(_net_link_bridge, OID_AUTO, ipfw, CTLTYPE_INT|CTLFLAG_RW,
-	    &pfil_ipfw, 0, &sysctl_pfil_ipfw, "I", "Layer2 filter with IPFW");
+SYSCTL_PROC(_net_link_bridge, OID_AUTO, ipfw,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_VNET,
+    &VNET_NAME(pfil_ipfw), 0, &sysctl_pfil_ipfw, "I",
+    "Layer2 filter with IPFW");
 
 /*
  * bridge_clone_create:
@@ -606,15 +656,13 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 	LIST_INIT(&sc->sc_spanlist);
 
 	ifp->if_softc = sc;
-	if_initname(ifp, ifc->ifc_name, unit);
+	if_initname(ifp, bridge_name, unit);
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = bridge_ioctl;
-	ifp->if_start = bridge_start;
+	ifp->if_transmit = bridge_transmit;
+	ifp->if_qflush = bridge_qflush;
 	ifp->if_init = bridge_init;
 	ifp->if_type = IFT_BRIDGE;
-	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
-	ifp->if_snd.ifq_drv_maxlen = ifqmaxlen;
-	IFQ_SET_READY(&ifp->if_snd);
 
 	/*
 	 * Generate an ethernet address with a locally administered address.
@@ -626,7 +674,7 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 	 */
 	fb = 0;
 	getcredhostid(curthread->td_ucred, &hostid);
-	for (retry = 1; retry != 0;) {
+	do {
 		if (fb || hostid == 0) {
 			arc4rand(sc->sc_defaddr, ETHER_ADDR_LEN, 1);
 			sc->sc_defaddr[0] &= ~1;/* clear multicast bit */
@@ -642,15 +690,17 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 
 		fb = 1;
 		retry = 0;
-		mtx_lock(&bridge_list_mtx);
-		LIST_FOREACH(sc2, &bridge_list, sc_list) {
+		BRIDGE_LIST_LOCK();
+		LIST_FOREACH(sc2, &V_bridge_list, sc_list) {
 			bifp = sc2->sc_ifp;
 			if (memcmp(sc->sc_defaddr,
-			    IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0)
+			    IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0) {
 				retry = 1;
+				break;
+			}
 		}
-		mtx_unlock(&bridge_list_mtx);
-	}
+		BRIDGE_LIST_UNLOCK();
+	} while (retry == 1);
 
 	bstp_attach(&sc->sc_stp, &bridge_ops);
 	ether_ifattach(ifp, sc->sc_defaddr);
@@ -658,9 +708,9 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 	ifp->if_baudrate = 0;
 	ifp->if_type = IFT_BRIDGE;
 
-	mtx_lock(&bridge_list_mtx);
-	LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
-	mtx_unlock(&bridge_list_mtx);
+	BRIDGE_LIST_LOCK();
+	LIST_INSERT_HEAD(&V_bridge_list, sc, sc_list);
+	BRIDGE_LIST_UNLOCK();
 
 	return (0);
 }
@@ -692,13 +742,13 @@ bridge_clone_destroy(struct ifnet *ifp)
 
 	callout_drain(&sc->sc_brcallout);
 
-	mtx_lock(&bridge_list_mtx);
+	BRIDGE_LIST_LOCK();
 	LIST_REMOVE(sc, sc_list);
-	mtx_unlock(&bridge_list_mtx);
+	BRIDGE_LIST_UNLOCK();
 
 	bstp_detach(&sc->sc_stp);
 	ether_ifdetach(ifp);
-	if_free_type(ifp, IFT_ETHER);
+	if_free(ifp);
 
 	/* Tear down the routing table. */
 	bridge_rtable_fini(sc);
@@ -818,7 +868,7 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		BRIDGE_LOCK(sc);
 		LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 			if (bif->bif_ifp->if_mtu != ifr->ifr_mtu) {
-				log(LOG_NOTICE, "%s: invalid MTU: %lu(%s)"
+				log(LOG_NOTICE, "%s: invalid MTU: %u(%s)"
 				    " != %d\n", sc->sc_ifp->if_xname,
 				    bif->bif_ifp->if_mtu,
 				    bif->bif_ifp->if_xname, ifr->ifr_mtu);
@@ -960,7 +1010,7 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
 	 * the mac address of the bridge to the address of the next member, or
 	 * to its default address if no members are left.
 	 */
-	if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
+	if (V_bridge_inherit_mac && sc->sc_ifaddr == ifs) {
 		if (LIST_EMPTY(&sc->sc_iflist)) {
 			bcopy(sc->sc_defaddr,
 			    IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
@@ -986,9 +1036,12 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
 		case IFT_ETHER:
 		case IFT_L2VLAN:
 			/*
-			 * Take the interface out of promiscuous mode.
+			 * Take the interface out of promiscuous mode, but only
+			 * if it was promiscuous in the first place. It might
+			 * not be if we're in the bridge_ioctl_add() error path.
 			 */
-			(void) ifpromisc(ifs, 0);
+			if (ifs->if_flags & IFF_PROMISC)
+				(void) ifpromisc(ifs, 0);
 			break;
 
 		case IFT_GIF:
@@ -1108,7 +1161,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
 	if (LIST_EMPTY(&sc->sc_iflist))
 		sc->sc_ifp->if_mtu = ifs->if_mtu;
 	else if (sc->sc_ifp->if_mtu != ifs->if_mtu) {
-		if_printf(sc->sc_ifp, "invalid MTU: %lu(%s) != %lu\n",
+		if_printf(sc->sc_ifp, "invalid MTU: %u(%s) != %u\n",
 		    ifs->if_mtu, ifs->if_xname, sc->sc_ifp->if_mtu);
 		return (EINVAL);
 	}
@@ -1126,7 +1179,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
 	 * member and the MAC address of the bridge has not been changed from
 	 * the default randomly generated one.
 	 */
-	if (bridge_inherit_mac && LIST_EMPTY(&sc->sc_iflist) &&
+	if (V_bridge_inherit_mac && LIST_EMPTY(&sc->sc_iflist) &&
 	    !memcmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr, ETHER_ADDR_LEN)) {
 		bcopy(IF_LLADDR(ifs), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
 		sc->sc_ifaddr = ifs;
@@ -1156,10 +1209,8 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
 			break;
 	}
 
-	if (error) {
+	if (error)
 		bridge_delete_member(sc, bif, 0);
-		free(bif, M_DEVBUF);
-	}
 	return (error);
 }
 
@@ -1751,7 +1802,13 @@ bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
 
 	if (ifp->if_flags & IFF_RENAMING)
 		return;
-
+	if (V_bridge_cloner == NULL) {
+		/*
+		 * This detach handler can be called after
+		 * vnet_bridge_uninit().  Just return in that case.
+		 */
+		return;
+	}
 	/* Check if the interface is a bridge member */
 	if (sc != NULL) {
 		BRIDGE_LOCK(sc);
@@ -1765,8 +1822,8 @@ bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
 	}
 
 	/* Check if the interface is a span port */
-	mtx_lock(&bridge_list_mtx);
-	LIST_FOREACH(sc, &bridge_list, sc_list) {
+	BRIDGE_LIST_LOCK();
+	LIST_FOREACH(sc, &V_bridge_list, sc_list) {
 		BRIDGE_LOCK(sc);
 		LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
 			if (ifp == bif->bif_ifp) {
@@ -1776,7 +1833,7 @@ bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
 
 		BRIDGE_UNLOCK(sc);
 	}
-	mtx_unlock(&bridge_list_mtx);
+	BRIDGE_LIST_UNLOCK();
 }
 
 /*
@@ -1832,20 +1889,19 @@ bridge_stop(struct ifnet *ifp, int disable)
  *	Enqueue a packet on a bridge member interface.
  *
  */
-static void
+static int
 bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
 {
 	int len, err = 0;
 	short mflags;
 	struct mbuf *m0;
 
-	len = m->m_pkthdr.len;
-	mflags = m->m_flags;
-
 	/* We may be sending a fragment so traverse the mbuf */
 	for (; m; m = m0) {
 		m0 = m->m_nextpkt;
 		m->m_nextpkt = NULL;
+		len = m->m_pkthdr.len;
+		mflags = m->m_flags;
 
 		/*
 		 * If underlying interface can not do VLAN tag insertion itself
@@ -1857,7 +1913,7 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
 			if (m == NULL) {
 				if_printf(dst_ifp,
 				    "unable to prepend VLAN header\n");
-				dst_ifp->if_oerrors++;
+				if_inc_counter(dst_ifp, IFCOUNTER_OERRORS, 1);
 				continue;
 			}
 			m->m_flags &= ~M_VLANTAG;
@@ -1865,16 +1921,17 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
 
 		if ((err = dst_ifp->if_transmit(dst_ifp, m))) {
 			m_freem(m0);
+			if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 			break;
 		}
-	}
 
-	if (err == 0) {
-		sc->sc_ifp->if_opackets++;
-		sc->sc_ifp->if_obytes += len;
+		if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
+		if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, len);
 		if (mflags & M_MCAST)
-			sc->sc_ifp->if_omcasts++;
+			if_inc_counter(sc->sc_ifp, IFCOUNTER_OMCASTS, 1);
 	}
+
+	return (err);
 }
 
 /*
@@ -2000,9 +2057,9 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
 				used = 1;
 				mc = m;
 			} else {
-				mc = m_copypacket(m, M_DONTWAIT);
+				mc = m_copypacket(m, M_NOWAIT);
 				if (mc == NULL) {
-					sc->sc_ifp->if_oerrors++;
+					if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 					continue;
 				}
 			}
@@ -2033,44 +2090,42 @@ sendunicast:
 }
 
 /*
- * bridge_start:
+ * bridge_transmit:
  *
- *	Start output on a bridge.
+ *	Do output on a bridge.
  *
  */
-static void
-bridge_start(struct ifnet *ifp)
+static int
+bridge_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct bridge_softc *sc;
-	struct mbuf *m;
 	struct ether_header *eh;
 	struct ifnet *dst_if;
+	int error = 0;
 
 	sc = ifp->if_softc;
 
-	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
-	for (;;) {
-		IFQ_DEQUEUE(&ifp->if_snd, m);
-		if (m == 0)
-			break;
-		ETHER_BPF_MTAP(ifp, m);
+	ETHER_BPF_MTAP(ifp, m);
 
-		eh = mtod(m, struct ether_header *);
-		dst_if = NULL;
+	eh = mtod(m, struct ether_header *);
 
-		BRIDGE_LOCK(sc);
-		if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
-			dst_if = bridge_rtlookup(sc, eh->ether_dhost, 1);
-		}
+	BRIDGE_LOCK(sc);
+	if (((m->m_flags & (M_BCAST|M_MCAST)) == 0) &&
+	    (dst_if = bridge_rtlookup(sc, eh->ether_dhost, 1)) != NULL) {
+		BRIDGE_UNLOCK(sc);
+		error = bridge_enqueue(sc, dst_if, m);
+	} else
+		bridge_broadcast(sc, ifp, m, 0);
 
-		if (dst_if == NULL)
-			bridge_broadcast(sc, ifp, m, 0);
-		else {
-			BRIDGE_UNLOCK(sc);
-			bridge_enqueue(sc, dst_if, m);
-		}
-	}
-	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+	return (error);
+}
+
+/*
+ * The ifp->if_qflush entry point for if_bridge(4) is no-op.
+ */
+static void
+bridge_qflush(struct ifnet *ifp __unused)
+{
 }
 
 /*
@@ -2094,8 +2149,8 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
 	src_if = m->m_pkthdr.rcvif;
 	ifp = sc->sc_ifp;
 
-	ifp->if_ipackets++;
-	ifp->if_ibytes += m->m_pkthdr.len;
+	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	vlan = VLANTAGOF(m);
 
 	if ((sbif->bif_flags & IFBIF_STP) &&
@@ -2147,7 +2202,7 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
 			goto drop;
 
 		/* ...forward it to all interfaces. */
-		ifp->if_imcasts++;
+		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 		dst_if = NULL;
 	}
 
@@ -2255,8 +2310,8 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
 	if ((bifp->if_flags & IFF_MONITOR) != 0) {
 		m->m_pkthdr.rcvif  = bifp;
 		ETHER_BPF_MTAP(bifp, m);
-		bifp->if_ipackets++;
-		bifp->if_ibytes += m->m_pkthdr.len;
+		if_inc_counter(bifp, IFCOUNTER_IPACKETS, 1);
+		if_inc_counter(bifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 		m_freem(m);
 		return (NULL);
 	}
@@ -2291,7 +2346,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
 		 * for bridge processing; return the original packet for
 		 * local processing.
 		 */
-		mc = m_dup(m, M_DONTWAIT);
+		mc = m_dup(m, M_NOWAIT);
 		if (mc == NULL) {
 			BRIDGE_UNLOCK(sc);
 			return (m);
@@ -2308,7 +2363,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
 		 */
 		KASSERT(bifp->if_bridge == NULL,
 		    ("loop created in bridge_input"));
-		mc2 = m_dup(m, M_DONTWAIT);
+		mc2 = m_dup(m, M_NOWAIT);
 		if (mc2 != NULL) {
 			/* Keep the layer3 header aligned */
 			int i = min(mc2->m_pkthdr.len, max_protohdr);
@@ -2357,9 +2412,10 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
 	    ) {								\
 		if ((iface)->if_type == IFT_BRIDGE) {			\
 			ETHER_BPF_MTAP(iface, m);			\
-			iface->if_ipackets++;				\
+			if_inc_counter(iface, IFCOUNTER_IPACKETS, 1);				\
+			if_inc_counter(iface, IFCOUNTER_IBYTES, m->m_pkthdr.len);		\
 			/* Filter on the physical interface. */		\
-			if (pfil_local_phys &&				\
+			if (V_pfil_local_phys &&			\
 			    (PFIL_HOOKED(&V_inet_pfil_hook)		\
 			     OR_PFIL_HOOKED_INET6)) {			\
 				if (bridge_pfil(&m, NULL, ifp,		\
@@ -2485,9 +2541,9 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
 			mc = m;
 			used = 1;
 		} else {
-			mc = m_dup(m, M_DONTWAIT);
+			mc = m_dup(m, M_NOWAIT);
 			if (mc == NULL) {
-				sc->sc_ifp->if_oerrors++;
+				if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 				continue;
 			}
 		}
@@ -2507,7 +2563,7 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
 				i = min(mc->m_pkthdr.len, max_protohdr);
 				mc = m_copyup(mc, i, ETHER_ALIGN);
 				if (mc == NULL) {
-					sc->sc_ifp->if_oerrors++;
+					if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 					continue;
 				}
 			}
@@ -2548,9 +2604,9 @@ bridge_span(struct bridge_softc *sc, struct mbuf *m)
 		if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			continue;
 
-		mc = m_copypacket(m, M_DONTWAIT);
+		mc = m_copypacket(m, M_NOWAIT);
 		if (mc == NULL) {
-			sc->sc_ifp->if_oerrors++;
+			if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 			continue;
 		}
 
@@ -2793,24 +2849,19 @@ bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
  *
  *	Initialize the route table for this bridge.
  */
-static int
+static void
 bridge_rtable_init(struct bridge_softc *sc)
 {
 	int i;
 
 	sc->sc_rthash = malloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
-	    M_DEVBUF, M_NOWAIT);
-	if (sc->sc_rthash == NULL)
-		return (ENOMEM);
+	    M_DEVBUF, M_WAITOK);
 
 	for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
 		LIST_INIT(&sc->sc_rthash[i]);
 
 	sc->sc_rthash_key = arc4random();
-
 	LIST_INIT(&sc->sc_rtlist);
-
-	return (0);
 }
 
 /*
@@ -3018,9 +3069,11 @@ bridge_state_change(struct ifnet *ifp, int state)
 		"discarding"
 	};
 
-	if (log_stp)
+	CURVNET_SET(ifp->if_vnet);
+	if (V_log_stp)
 		log(LOG_NOTICE, "%s: state changed to %s on %s\n",
 		    sc->sc_ifp->if_xname, stpstates[state], ifp->if_xname);
+	CURVNET_RESTORE();
 }
 
 /*
@@ -3034,7 +3087,6 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
 {
 	int snap, error, i, hlen;
 	struct ether_header *eh1, eh2;
-	struct ip_fw_args args;
 	struct ip *ip;
 	struct llc llc1;
 	u_int16_t ether_type;
@@ -3047,7 +3099,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
 	KASSERT(M_WRITABLE(*mp), ("%s: modifying a shared mbuf", __func__));
 #endif
 
-	if (pfil_bridge == 0 && pfil_member == 0 && pfil_ipfw == 0)
+	if (V_pfil_bridge == 0 && V_pfil_member == 0 && V_pfil_ipfw == 0)
 		return (0); /* filtering is disabled */
 
 	i = min((*mp)->m_pkthdr.len, max_protohdr);
@@ -3089,7 +3141,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
 	switch (ether_type) {
 		case ETHERTYPE_ARP:
 		case ETHERTYPE_REVARP:
-			if (pfil_ipfw_arp == 0)
+			if (V_pfil_ipfw_arp == 0)
 				return (0); /* Automatically pass */
 			break;
 
@@ -3104,10 +3156,20 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
 			 * packets, these will not be checked by pfil(9) and
 			 * passed unconditionally so the default is to drop.
 			 */
-			if (pfil_onlyip)
+			if (V_pfil_onlyip)
 				goto bad;
 	}
 
+	/* Run the packet through pfil before stripping link headers */
+	if (PFIL_HOOKED(&V_link_pfil_hook) && V_pfil_ipfw != 0 &&
+			dir == PFIL_OUT && ifp != NULL) {
+
+		error = pfil_run_hooks(&V_link_pfil_hook, mp, ifp, dir, NULL);
+
+		if (*mp == NULL || error != 0) /* packet consumed by filter */
+			return (error);
+	}
+
 	/* Strip off the Ethernet header and keep a copy. */
 	m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2);
 	m_adj(*mp, ETHER_HDR_LEN);
@@ -3138,63 +3200,6 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
 			goto bad;
 	}
 
-	/* XXX this section is also in if_ethersubr.c */
-	// XXX PFIL_OUT or DIR_OUT ?
-	if (V_ip_fw_chk_ptr && pfil_ipfw != 0 &&
-			dir == PFIL_OUT && ifp != NULL) {
-		struct m_tag *mtag;
-
-		error = -1;
-		/* fetch the start point from existing tags, if any */
-		mtag = m_tag_locate(*mp, MTAG_IPFW_RULE, 0, NULL);
-		if (mtag == NULL) {
-			args.rule.slot = 0;
-		} else {
-			struct ipfw_rule_ref *r;
-
-			/* XXX can we free the tag after use ? */
-			mtag->m_tag_id = PACKET_TAG_NONE;
-			r = (struct ipfw_rule_ref *)(mtag + 1);
-			/* packet already partially processed ? */
-			if (r->info & IPFW_ONEPASS)
-				goto ipfwpass;
-			args.rule = *r;
-		}
-
-		args.m = *mp;
-		args.oif = ifp;
-		args.next_hop = NULL;
-		args.next_hop6 = NULL;
-		args.eh = &eh2;
-		args.inp = NULL;	/* used by ipfw uid/gid/jail rules */
-		i = V_ip_fw_chk_ptr(&args);
-		*mp = args.m;
-
-		if (*mp == NULL)
-			return (error);
-
-		if (ip_dn_io_ptr && (i == IP_FW_DUMMYNET)) {
-
-			/* put the Ethernet header back on */
-			M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
-			if (*mp == NULL)
-				return (error);
-			bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
-
-			/*
-			 * Pass the pkt to dummynet, which consumes it. The
-			 * packet will return to us via bridge_dummynet().
-			 */
-			args.oif = ifp;
-			ip_dn_io_ptr(mp, DIR_FWD | PROTO_IFB, &args);
-			return (error);
-		}
-
-		if (i != IP_FW_PASS) /* drop */
-			goto bad;
-	}
-
-ipfwpass:
 	error = 0;
 
 	/*
@@ -3203,36 +3208,27 @@ ipfwpass:
 	switch (ether_type) {
 	case ETHERTYPE_IP:
 		/*
-		 * before calling the firewall, swap fields the same as
-		 * IP does. here we assume the header is contiguous
-		 */
-		ip = mtod(*mp, struct ip *);
-
-		ip->ip_len = ntohs(ip->ip_len);
-		ip->ip_off = ntohs(ip->ip_off);
-
-		/*
 		 * Run pfil on the member interface and the bridge, both can
 		 * be skipped by clearing pfil_member or pfil_bridge.
 		 *
 		 * Keep the order:
 		 *   in_if -> bridge_if -> out_if
 		 */
-		if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
+		if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL)
 			error = pfil_run_hooks(&V_inet_pfil_hook, mp, bifp,
 					dir, NULL);
 
 		if (*mp == NULL || error != 0) /* filter may consume */
 			break;
 
-		if (pfil_member && ifp != NULL)
+		if (V_pfil_member && ifp != NULL)
 			error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp,
 					dir, NULL);
 
 		if (*mp == NULL || error != 0) /* filter may consume */
 			break;
 
-		if (pfil_bridge && dir == PFIL_IN && bifp != NULL)
+		if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL)
 			error = pfil_run_hooks(&V_inet_pfil_hook, mp, bifp,
 					dir, NULL);
 
@@ -3240,7 +3236,7 @@ ipfwpass:
 			break;
 
 		/* check if we need to fragment the packet */
-		if (pfil_member && ifp != NULL && dir == PFIL_OUT) {
+		if (V_pfil_member && ifp != NULL && dir == PFIL_OUT) {
 			i = (*mp)->m_pkthdr.len;
 			if (i > ifp->if_mtu) {
 				error = bridge_fragment(ifp, *mp, &eh2, snap,
@@ -3249,20 +3245,18 @@ ipfwpass:
 			}
 		}
 
-		/* Recalculate the ip checksum and restore byte ordering */
+		/* Recalculate the ip checksum. */
 		ip = mtod(*mp, struct ip *);
 		hlen = ip->ip_hl << 2;
 		if (hlen < sizeof(struct ip))
 			goto bad;
 		if (hlen > (*mp)->m_len) {
-			if ((*mp = m_pullup(*mp, hlen)) == 0)
+			if ((*mp = m_pullup(*mp, hlen)) == NULL)
 				goto bad;
 			ip = mtod(*mp, struct ip *);
 			if (ip == NULL)
 				goto bad;
 		}
-		ip->ip_len = htons(ip->ip_len);
-		ip->ip_off = htons(ip->ip_off);
 		ip->ip_sum = 0;
 		if (hlen == sizeof(struct ip))
 			ip->ip_sum = in_cksum_hdr(ip);
@@ -3272,21 +3266,21 @@ ipfwpass:
 		break;
 #ifdef INET6
 	case ETHERTYPE_IPV6:
-		if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
+		if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL)
 			error = pfil_run_hooks(&V_inet6_pfil_hook, mp, bifp,
 					dir, NULL);
 
 		if (*mp == NULL || error != 0) /* filter may consume */
 			break;
 
-		if (pfil_member && ifp != NULL)
+		if (V_pfil_member && ifp != NULL)
 			error = pfil_run_hooks(&V_inet6_pfil_hook, mp, ifp,
 					dir, NULL);
 
 		if (*mp == NULL || error != 0) /* filter may consume */
 			break;
 
-		if (pfil_bridge && dir == PFIL_IN && bifp != NULL)
+		if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL)
 			error = pfil_run_hooks(&V_inet6_pfil_hook, mp, bifp,
 					dir, NULL);
 		break;
@@ -3307,13 +3301,13 @@ ipfwpass:
 	 * Finally, put everything back the way it was and return
 	 */
 	if (snap) {
-		M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT);
+		M_PREPEND(*mp, sizeof(struct llc), M_NOWAIT);
 		if (*mp == NULL)
 			return (error);
 		bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
 	}
 
-	M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
+	M_PREPEND(*mp, ETHER_HDR_LEN, M_NOWAIT);
 	if (*mp == NULL)
 		return (error);
 	bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
@@ -3375,7 +3369,7 @@ bridge_ip_checkbasic(struct mbuf **mp)
 		goto bad;
 	}
 	if (hlen > m->m_len) {
-		if ((m = m_pullup(m, hlen)) == 0) {
+		if ((m = m_pullup(m, hlen)) == NULL) {
 			KMOD_IPSTAT_INC(ips_badhlen);
 			goto bad;
 		}
@@ -3499,8 +3493,8 @@ bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
 		goto out;
 	ip = mtod(m, struct ip *);
 
-	error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist,
-		    CSUM_DELAY_IP);
+	m->m_pkthdr.csum_flags |= CSUM_IP;
+	error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist);
 	if (error)
 		goto out;
 
@@ -3508,7 +3502,7 @@ bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
 	for (m0 = m; m0; m0 = m0->m_nextpkt) {
 		if (error == 0) {
 			if (snap) {
-				M_PREPEND(m0, sizeof(struct llc), M_DONTWAIT);
+				M_PREPEND(m0, sizeof(struct llc), M_NOWAIT);
 				if (m0 == NULL) {
 					error = ENOBUFS;
 					continue;
@@ -3516,7 +3510,7 @@ bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
 				bcopy(llc, mtod(m0, caddr_t),
 				    sizeof(struct llc));
 			}
-			M_PREPEND(m0, ETHER_HDR_LEN, M_DONTWAIT);
+			M_PREPEND(m0, ETHER_HDR_LEN, M_NOWAIT);
 			if (m0 == NULL) {
 				error = ENOBUFS;
 				continue;
diff --git a/freebsd/sys/net/if_clone.c b/freebsd/sys/net/if_clone.c
index 0b752139..61ba9c6c 100644
--- a/freebsd/sys/net/if_clone.c
+++ b/freebsd/sys/net/if_clone.c
@@ -1,6 +1,7 @@
 #include <machine/rtems-bsd-kernel-space.h>
 
 /*-
+ * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -33,6 +34,7 @@
  */
 
 #include <rtems/bsd/sys/param.h>
+#include <sys/eventhandler.h>
 #include <sys/malloc.h>
 #include <sys/limits.h>
 #include <rtems/bsd/sys/lock.h>
@@ -43,29 +45,74 @@
 #include <sys/socket.h>
 
 #include <net/if.h>
-#include <net/if_clone.h>
-#if 0
-#include <net/if_dl.h>
-#endif
-#include <net/if_types.h>
 #include <net/if_var.h>
+#include <net/if_clone.h>
 #include <net/radix.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
+/* Current IF_MAXUNIT expands maximum to 5 characters. */
+#define	IFCLOSIZ	(IFNAMSIZ - 5)
+
+/*
+ * Structure describing a `cloning' interface.
+ *
+ * List of locks
+ * (c)		const until freeing
+ * (d)		driver specific data, may need external protection.
+ * (e)		locked by if_cloners_mtx
+ * (i)		locked by ifc_mtx mtx
+ */
+struct if_clone {
+	char ifc_name[IFCLOSIZ];	/* (c) Name of device, e.g. `gif' */
+	struct unrhdr *ifc_unrhdr;	/* (c) alloc_unr(9) header */
+	int ifc_maxunit;		/* (c) maximum unit number */
+	long ifc_refcnt;		/* (i) Reference count. */
+	LIST_HEAD(, ifnet) ifc_iflist;	/* (i) List of cloned interfaces */
+	struct mtx ifc_mtx;		/* Mutex to protect members. */
+
+	enum { SIMPLE, ADVANCED } ifc_type; /* (c) */
+
+	/* (c) Driver specific cloning functions.  Called with no locks held. */
+	union {
+		struct {	/* advanced cloner */
+			ifc_match_t	*_ifc_match;
+			ifc_create_t	*_ifc_create;
+			ifc_destroy_t	*_ifc_destroy;
+		} A;
+		struct {	/* simple cloner */
+			ifcs_create_t	*_ifcs_create;
+			ifcs_destroy_t	*_ifcs_destroy;
+			int		_ifcs_minifs;	/* minimum ifs */
+
+		} S;
+	} U;
+#define	ifc_match	U.A._ifc_match
+#define	ifc_create	U.A._ifc_create
+#define	ifc_destroy	U.A._ifc_destroy
+#define	ifcs_create	U.S._ifcs_create
+#define	ifcs_destroy	U.S._ifcs_destroy
+#define	ifcs_minifs	U.S._ifcs_minifs
+
+	LIST_ENTRY(if_clone) ifc_list;	/* (e) On list of cloners */
+};
+
 static void	if_clone_free(struct if_clone *ifc);
 static int	if_clone_createif(struct if_clone *ifc, char *name, size_t len,
 		    caddr_t params);
 
-static struct mtx	if_cloners_mtx;
+static int     ifc_simple_match(struct if_clone *, const char *);
+static int     ifc_simple_create(struct if_clone *, char *, size_t, caddr_t);
+static int     ifc_simple_destroy(struct if_clone *, struct ifnet *);
+
+static struct mtx if_cloners_mtx;
+MTX_SYSINIT(if_cloners_lock, &if_cloners_mtx, "if_cloners lock", MTX_DEF);
 static VNET_DEFINE(int, if_cloners_count);
 VNET_DEFINE(LIST_HEAD(, if_clone), if_cloners);
 
 #define	V_if_cloners_count	VNET(if_cloners_count)
 #define	V_if_cloners		VNET(if_cloners)
 
-#define IF_CLONERS_LOCK_INIT()		\
-    mtx_init(&if_cloners_mtx, "if_cloners lock", NULL, MTX_DEF)
 #define IF_CLONERS_LOCK_ASSERT()	mtx_assert(&if_cloners_mtx, MA_OWNED)
 #define IF_CLONERS_LOCK()		mtx_lock(&if_cloners_mtx)
 #define IF_CLONERS_UNLOCK()		mtx_unlock(&if_cloners_mtx)
@@ -123,13 +170,6 @@ vnet_if_clone_init(void)
 	LIST_INIT(&V_if_cloners);
 }
 
-void
-if_clone_init(void)
-{
-
-	IF_CLONERS_LOCK_INIT();
-}
-
 /*
  * Lookup and create a clone network interface.
  */
@@ -140,18 +180,25 @@ if_clone_create(char *name, size_t len, caddr_t params)
 
 	/* Try to find an applicable cloner for this request */
 	IF_CLONERS_LOCK();
-	LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
-		if (ifc->ifc_match(ifc, name)) {
-			break;
+	LIST_FOREACH(ifc, &V_if_cloners, ifc_list)
+		if (ifc->ifc_type == SIMPLE) {
+			if (ifc_simple_match(ifc, name))
+				break;
+		} else {
+			if (ifc->ifc_match(ifc, name))
+				break;
 		}
-	}
 #ifdef VIMAGE
 	if (ifc == NULL && !IS_DEFAULT_VNET(curvnet)) {
 		CURVNET_SET_QUIET(vnet0);
-		LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
-			if (ifc->ifc_match(ifc, name))
-				break;
-		}
+		LIST_FOREACH(ifc, &V_if_cloners, ifc_list)
+			if (ifc->ifc_type == SIMPLE) {
+				if (ifc_simple_match(ifc, name))
+					break;
+			} else {
+				if (ifc->ifc_match(ifc, name))
+					break;
+			}
 		CURVNET_RESTORE();
 	}
 #endif
@@ -175,7 +222,10 @@ if_clone_createif(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 	if (ifunit(name) != NULL)
 		return (EEXIST);
 
-	err = (*ifc->ifc_create)(ifc, name, len, params);
+	if (ifc->ifc_type == SIMPLE)
+		err = ifc_simple_create(ifc, name, len, params);
+	else
+		err = (*ifc->ifc_create)(ifc, name, len, params);
 	
 	if (!err) {
 		ifp = ifunit(name);
@@ -216,10 +266,14 @@ if_clone_destroy(const char *name)
 #ifdef VIMAGE
 	if (ifc == NULL && !IS_DEFAULT_VNET(curvnet)) {
 		CURVNET_SET_QUIET(vnet0);
-		LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
-			if (ifc->ifc_match(ifc, name))
-				break;
-		}
+		LIST_FOREACH(ifc, &V_if_cloners, ifc_list)
+			if (ifc->ifc_type == SIMPLE) {
+				if (ifc_simple_match(ifc, name))
+					break;
+			} else {
+				if (ifc->ifc_match(ifc, name))
+					break;
+			}
 		CURVNET_RESTORE();
 	}
 #endif
@@ -243,7 +297,7 @@ if_clone_destroyif(struct if_clone *ifc, struct ifnet *ifp)
 	int err;
 	struct ifnet *ifcifp;
 
-	if (ifc->ifc_destroy == NULL)
+	if (ifc->ifc_type == ADVANCED && ifc->ifc_destroy == NULL)
 		return(EOPNOTSUPP);
 
 	/*
@@ -268,7 +322,10 @@ if_clone_destroyif(struct if_clone *ifc, struct ifnet *ifp)
 
 	if_delgroup(ifp, ifc->ifc_name);
 
-	err =  (*ifc->ifc_destroy)(ifc, ifp);
+	if (ifc->ifc_type == SIMPLE)
+		err = ifc_simple_destroy(ifc, ifp);
+	else
+		err = (*ifc->ifc_destroy)(ifc, ifp);
 
 	if (err != 0) {
 		if_addgroup(ifp, ifc->ifc_name);
@@ -281,36 +338,97 @@ if_clone_destroyif(struct if_clone *ifc, struct ifnet *ifp)
 	return (err);
 }
 
-/*
- * Register a network interface cloner.
- */
-void
-if_clone_attach(struct if_clone *ifc)
+static struct if_clone *
+if_clone_alloc(const char *name, int maxunit)
 {
-	int len, maxclone;
+	struct if_clone *ifc;
 
-	/*
-	 * Compute bitmap size and allocate it.
-	 */
-	maxclone = ifc->ifc_maxunit + 1;
-	len = maxclone >> 3;
-	if ((len << 3) < maxclone)
-		len++;
-	ifc->ifc_units = malloc(len, M_CLONE, M_WAITOK | M_ZERO);
-	ifc->ifc_bmlen = len;
+	KASSERT(name != NULL, ("%s: no name\n", __func__));
+
+	ifc = malloc(sizeof(struct if_clone), M_CLONE, M_WAITOK | M_ZERO);
+	strncpy(ifc->ifc_name, name, IFCLOSIZ-1);
 	IF_CLONE_LOCK_INIT(ifc);
 	IF_CLONE_ADDREF(ifc);
+	ifc->ifc_maxunit = maxunit ? maxunit : IF_MAXUNIT;
+	ifc->ifc_unrhdr = new_unrhdr(0, ifc->ifc_maxunit, &ifc->ifc_mtx);
+	LIST_INIT(&ifc->ifc_iflist);
+
+	return (ifc);
+}
+	
+static int
+if_clone_attach(struct if_clone *ifc)
+{
+	struct if_clone *ifc1;
 
 	IF_CLONERS_LOCK();
+	LIST_FOREACH(ifc1, &V_if_cloners, ifc_list)
+		if (strcmp(ifc->ifc_name, ifc1->ifc_name) == 0) {
+			IF_CLONERS_UNLOCK();
+			IF_CLONE_REMREF(ifc);
+			return (EEXIST);
+		}
 	LIST_INSERT_HEAD(&V_if_cloners, ifc, ifc_list);
 	V_if_cloners_count++;
 	IF_CLONERS_UNLOCK();
 
-	LIST_INIT(&ifc->ifc_iflist);
+	return (0);
+}
+
+struct if_clone *
+if_clone_advanced(const char *name, u_int maxunit, ifc_match_t match,
+	ifc_create_t create, ifc_destroy_t destroy)
+{
+	struct if_clone *ifc;
+
+	ifc = if_clone_alloc(name, maxunit);
+	ifc->ifc_type = ADVANCED;
+	ifc->ifc_match = match;
+	ifc->ifc_create = create;
+	ifc->ifc_destroy = destroy;
+
+	if (if_clone_attach(ifc) != 0) {
+		if_clone_free(ifc);
+		return (NULL);
+	}
 
-	if (ifc->ifc_attach != NULL)
-		(*ifc->ifc_attach)(ifc);
 	EVENTHANDLER_INVOKE(if_clone_event, ifc);
+
+	return (ifc);
+}
+
+struct if_clone *
+if_clone_simple(const char *name, ifcs_create_t create, ifcs_destroy_t destroy,
+	u_int minifs)
+{
+	struct if_clone *ifc;
+	u_int unit;
+
+	ifc = if_clone_alloc(name, 0);
+	ifc->ifc_type = SIMPLE;
+	ifc->ifcs_create = create;
+	ifc->ifcs_destroy = destroy;
+	ifc->ifcs_minifs = minifs;
+
+	if (if_clone_attach(ifc) != 0) {
+		if_clone_free(ifc);
+		return (NULL);
+	}
+
+	for (unit = 0; unit < minifs; unit++) {
+		char name[IFNAMSIZ];
+		int error;
+
+		snprintf(name, IFNAMSIZ, "%s%d", ifc->ifc_name, unit);
+		error = if_clone_createif(ifc, name, IFNAMSIZ, NULL);
+		KASSERT(error == 0,
+		    ("%s: failed to create required interface %s",
+		    __func__, name));
+	}
+
+	EVENTHANDLER_INVOKE(if_clone_event, ifc);
+
+	return (ifc);
 }
 
 /*
@@ -319,7 +437,6 @@ if_clone_attach(struct if_clone *ifc)
 void
 if_clone_detach(struct if_clone *ifc)
 {
-	struct ifc_simple_data *ifcs = ifc->ifc_data;
 
 	IF_CLONERS_LOCK();
 	LIST_REMOVE(ifc, ifc_list);
@@ -327,8 +444,8 @@ if_clone_detach(struct if_clone *ifc)
 	IF_CLONERS_UNLOCK();
 
 	/* Allow all simples to be destroyed */
-	if (ifc->ifc_attach == ifc_simple_attach)
-		ifcs->ifcs_minifs = 0;
+	if (ifc->ifc_type == SIMPLE)
+		ifc->ifcs_minifs = 0;
 
 	/* destroy all interfaces for this cloner */
 	while (!LIST_EMPTY(&ifc->ifc_iflist))
@@ -340,16 +457,13 @@ if_clone_detach(struct if_clone *ifc)
 static void
 if_clone_free(struct if_clone *ifc)
 {
-	for (int bytoff = 0; bytoff < ifc->ifc_bmlen; bytoff++) {
-		KASSERT(ifc->ifc_units[bytoff] == 0x00,
-		    ("ifc_units[%d] is not empty", bytoff));
-	}
 
 	KASSERT(LIST_EMPTY(&ifc->ifc_iflist),
 	    ("%s: ifc_iflist not empty", __func__));
 
 	IF_CLONE_LOCK_DESTROY(ifc);
-	free(ifc->ifc_units, M_CLONE);
+	delete_unrhdr(ifc->ifc_unrhdr);
+	free(ifc, M_CLONE);
 }
 
 /*
@@ -372,7 +486,7 @@ if_clone_list(struct if_clonereq *ifcr)
 	 * below, but that's not a major problem.  Not caping our
 	 * allocation to the number of cloners actually in the system
 	 * could be because that would let arbitrary users cause us to
-	 * allocate abritrary amounts of kernel memory.
+	 * allocate arbitrary amounts of kernel memory.
 	 */
 	buf_count = (V_if_cloners_count < ifcr->ifcr_count) ?
 	    V_if_cloners_count : ifcr->ifcr_count;
@@ -406,6 +520,49 @@ done:
 }
 
 /*
+ * if_clone_findifc() looks up ifnet from the current
+ * cloner list, and returns ifc if found.  Note that ifc_refcnt
+ * is incremented.
+ */
+struct if_clone *
+if_clone_findifc(struct ifnet *ifp)
+{
+	struct if_clone *ifc, *ifc0;
+	struct ifnet *ifcifp;
+
+	ifc0 = NULL;
+	IF_CLONERS_LOCK();
+	LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
+		IF_CLONE_LOCK(ifc);
+		LIST_FOREACH(ifcifp, &ifc->ifc_iflist, if_clones) {
+			if (ifp == ifcifp) {
+				ifc0 = ifc;
+				IF_CLONE_ADDREF_LOCKED(ifc);
+				break;
+			}
+		}
+		IF_CLONE_UNLOCK(ifc);
+		if (ifc0 != NULL)
+			break;
+	}
+	IF_CLONERS_UNLOCK();
+
+	return (ifc0);
+}
+
+/*
+ * if_clone_addgroup() decrements ifc_refcnt because it is called after
+ * if_clone_findifc().
+ */
+void
+if_clone_addgroup(struct ifnet *ifp, struct if_clone *ifc)
+{
+
+	if_addgroup(ifp, ifc->ifc_name);
+	IF_CLONE_REMREF(ifc);
+}
+
+/*
  * A utility function to extract unit numbers from interface names of
  * the form name###.
  *
@@ -443,98 +600,52 @@ ifc_name2unit(const char *name, int *unit)
 int
 ifc_alloc_unit(struct if_clone *ifc, int *unit)
 {
-	int wildcard, bytoff, bitoff;
-	int err = 0;
-
-	IF_CLONE_LOCK(ifc);
+	char name[IFNAMSIZ];
+	int wildcard;
 
-	bytoff = bitoff = 0;
 	wildcard = (*unit < 0);
-	/*
-	 * Find a free unit if none was given.
-	 */
-	if (wildcard) {
-		while ((bytoff < ifc->ifc_bmlen)
-		    && (ifc->ifc_units[bytoff] == 0xff))
-			bytoff++;
-		if (bytoff >= ifc->ifc_bmlen) {
-			err = ENOSPC;
-			goto done;
+retry:
+	if (*unit > ifc->ifc_maxunit)
+		return (ENOSPC);
+	if (*unit < 0) {
+		*unit = alloc_unr(ifc->ifc_unrhdr);
+		if (*unit == -1)
+			return (ENOSPC);
+	} else {
+		*unit = alloc_unr_specific(ifc->ifc_unrhdr, *unit);
+		if (*unit == -1) {
+			if (wildcard) {
+				(*unit)++;
+				goto retry;
+			} else
+				return (EEXIST);
 		}
-		while ((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0)
-			bitoff++;
-		*unit = (bytoff << 3) + bitoff;
 	}
 
-	if (*unit > ifc->ifc_maxunit) {
-		err = ENOSPC;
-		goto done;
+	snprintf(name, IFNAMSIZ, "%s%d", ifc->ifc_name, *unit);
+	if (ifunit(name) != NULL) {
+		free_unr(ifc->ifc_unrhdr, *unit);
+		if (wildcard) {
+			(*unit)++;
+			goto retry;
+		} else
+			return (EEXIST);
 	}
 
-	if (!wildcard) {
-		bytoff = *unit >> 3;
-		bitoff = *unit - (bytoff << 3);
-	}
-
-	if((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0) {
-		err = EEXIST;
-		goto done;
-	}
-	/*
-	 * Allocate the unit in the bitmap.
-	 */
-	KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) == 0,
-	    ("%s: bit is already set", __func__));
-	ifc->ifc_units[bytoff] |= (1 << bitoff);
-	IF_CLONE_ADDREF_LOCKED(ifc);
+	IF_CLONE_ADDREF(ifc);
 
-done:
-	IF_CLONE_UNLOCK(ifc);
-	return (err);
+	return (0);
 }
 
 void
 ifc_free_unit(struct if_clone *ifc, int unit)
 {
-	int bytoff, bitoff;
-
-
-	/*
-	 * Compute offset in the bitmap and deallocate the unit.
-	 */
-	bytoff = unit >> 3;
-	bitoff = unit - (bytoff << 3);
 
-	IF_CLONE_LOCK(ifc);
-	KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0,
-	    ("%s: bit is already cleared", __func__));
-	ifc->ifc_units[bytoff] &= ~(1 << bitoff);
-	IF_CLONE_REMREF_LOCKED(ifc);	/* releases lock */
-}
-
-void
-ifc_simple_attach(struct if_clone *ifc)
-{
-	int err;
-	int unit;
-	char name[IFNAMSIZ];
-	struct ifc_simple_data *ifcs = ifc->ifc_data;
-
-	KASSERT(ifcs->ifcs_minifs - 1 <= ifc->ifc_maxunit,
-	    ("%s: %s requested more units than allowed (%d > %d)",
-	    __func__, ifc->ifc_name, ifcs->ifcs_minifs,
-	    ifc->ifc_maxunit + 1));
-
-	for (unit = 0; unit < ifcs->ifcs_minifs; unit++) {
-		snprintf(name, IFNAMSIZ, "%s%d", ifc->ifc_name, unit);
-		err = if_clone_createif(ifc, name, IFNAMSIZ, NULL);
-		KASSERT(err == 0,
-		    ("%s: failed to create required interface %s",
-		    __func__, name));
-	}
+	free_unr(ifc->ifc_unrhdr, unit);
+	IF_CLONE_REMREF(ifc);
 }
 
-int
+static int
 ifc_simple_match(struct if_clone *ifc, const char *name)
 {
 	const char *cp;
@@ -555,14 +666,13 @@ ifc_simple_match(struct if_clone *ifc, const char *name)
 	return (1);
 }
 
-int
+static int
 ifc_simple_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 {
 	char *dp;
 	int wildcard;
 	int unit;
 	int err;
-	struct ifc_simple_data *ifcs = ifc->ifc_data;
 
 	err = ifc_name2unit(name, &unit);
 	if (err != 0)
@@ -574,7 +684,7 @@ ifc_simple_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 	if (err != 0)
 		return (err);
 
-	err = ifcs->ifcs_create(ifc, unit, params);
+	err = ifc->ifcs_create(ifc, unit, params);
 	if (err != 0) {
 		ifc_free_unit(ifc, unit);
 		return (err);
@@ -598,18 +708,17 @@ ifc_simple_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 	return (0);
 }
 
-int
+static int
 ifc_simple_destroy(struct if_clone *ifc, struct ifnet *ifp)
 {
 	int unit;
-	struct ifc_simple_data *ifcs = ifc->ifc_data;
 
 	unit = ifp->if_dunit;
 
-	if (unit < ifcs->ifcs_minifs) 
+	if (unit < ifc->ifcs_minifs) 
 		return (EINVAL);
 
-	ifcs->ifcs_destroy(ifp);
+	ifc->ifcs_destroy(ifp);
 
 	ifc_free_unit(ifc, unit);
 
diff --git a/freebsd/sys/net/if_clone.h b/freebsd/sys/net/if_clone.h
index f125f8b5..3a60b0a1 100644
--- a/freebsd/sys/net/if_clone.h
+++ b/freebsd/sys/net/if_clone.h
@@ -35,82 +35,45 @@
 
 #ifdef _KERNEL
 
-#define IFC_CLONE_INITIALIZER(name, data, maxunit,			\
-    attach, match, create, destroy)					\
-    { { 0 }, name, maxunit, NULL, 0, data, attach, match, create, destroy }
-
-/*
- * Structure describing a `cloning' interface.
- *
- * List of locks
- * (c)		const until freeing
- * (d)		driver specific data, may need external protection.
- * (e)		locked by if_cloners_mtx
- * (i)		locked by ifc_mtx mtx
- */
-struct if_clone {
-	LIST_ENTRY(if_clone) ifc_list;	/* (e) On list of cloners */
-	const char *ifc_name;		/* (c) Name of device, e.g. `gif' */
-	int ifc_maxunit;		/* (c) Maximum unit number */
-	unsigned char *ifc_units;	/* (i) Bitmap to handle units. */
-					/*     Considered private, access */
-					/*     via ifc_(alloc|free)_unit(). */
-	int ifc_bmlen;			/* (c) Bitmap length. */
-	void *ifc_data;			/* (*) Data for ifc_* functions. */
-
-	/* (c) Driver specific cloning functions.  Called with no locks held. */
-	void	(*ifc_attach)(struct if_clone *);
-	int	(*ifc_match)(struct if_clone *, const char *);
-	int	(*ifc_create)(struct if_clone *, char *, size_t, caddr_t);
-	int	(*ifc_destroy)(struct if_clone *, struct ifnet *);
-
-	long ifc_refcnt;		/* (i) Refrence count. */
-	struct mtx ifc_mtx;		/* Muted to protect members. */
-	LIST_HEAD(, ifnet) ifc_iflist;	/* (i) List of cloned interfaces */
-};
-
-void	if_clone_init(void);
-void	if_clone_attach(struct if_clone *);
+struct if_clone;
+
+/* Methods. */
+typedef int	ifc_match_t(struct if_clone *, const char *);
+typedef int	ifc_create_t(struct if_clone *, char *, size_t, caddr_t);
+typedef int	ifc_destroy_t(struct if_clone *, struct ifnet *);
+
+typedef int	ifcs_create_t(struct if_clone *, int, caddr_t);
+typedef void	ifcs_destroy_t(struct ifnet *);
+
+/* Interface cloner (de)allocating functions. */
+struct if_clone *
+	if_clone_advanced(const char *, u_int, ifc_match_t, ifc_create_t,
+		      ifc_destroy_t);
+struct if_clone *
+	if_clone_simple(const char *, ifcs_create_t, ifcs_destroy_t, u_int);
 void	if_clone_detach(struct if_clone *);
-void	vnet_if_clone_init(void);
-
-int	if_clone_create(char *, size_t, caddr_t);
-int	if_clone_destroy(const char *);
-int	if_clone_destroyif(struct if_clone *, struct ifnet *);
-int	if_clone_list(struct if_clonereq *);
 
+/* Unit (de)allocating fucntions. */
 int	ifc_name2unit(const char *name, int *unit);
 int	ifc_alloc_unit(struct if_clone *, int *);
 void	ifc_free_unit(struct if_clone *, int);
 
-/*
- * The ifc_simple functions, structures, and macros implement basic
- * cloning as in 5.[012].
- */
-
-struct ifc_simple_data {
-	int ifcs_minifs;		/* minimum number of interfaces */
-
-	int	(*ifcs_create)(struct if_clone *, int, caddr_t);
-	void	(*ifcs_destroy)(struct ifnet *);
-};
-
-/* interface clone event */
+#ifdef _SYS_EVENTHANDLER_H_
+/* Interface clone event. */
 typedef void (*if_clone_event_handler_t)(void *, struct if_clone *);
 EVENTHANDLER_DECLARE(if_clone_event, if_clone_event_handler_t);
+#endif
 
-#define IFC_SIMPLE_DECLARE(name, minifs)				\
-struct ifc_simple_data name##_cloner_data =				\
-    {minifs, name##_clone_create, name##_clone_destroy};		\
-struct if_clone name##_cloner =						\
-    IFC_CLONE_INITIALIZER(#name, &name##_cloner_data, IF_MAXUNIT,	\
-    ifc_simple_attach, ifc_simple_match, ifc_simple_create, ifc_simple_destroy)
+/* The below interfaces used only by net/if.c. */
+void	vnet_if_clone_init(void);
+int	if_clone_create(char *, size_t, caddr_t);
+int	if_clone_destroy(const char *);
+int	if_clone_list(struct if_clonereq *);
+struct if_clone *if_clone_findifc(struct ifnet *);
+void	if_clone_addgroup(struct ifnet *, struct if_clone *);
 
-void	ifc_simple_attach(struct if_clone *);
-int	ifc_simple_match(struct if_clone *, const char *);
-int	ifc_simple_create(struct if_clone *, char *, size_t, caddr_t);
-int	ifc_simple_destroy(struct if_clone *, struct ifnet *);
+/* The below interface used only by epair(4). */
+int	if_clone_destroyif(struct if_clone *, struct ifnet *);
 
 #endif /* _KERNEL */
-
 #endif /* !_NET_IF_CLONE_H_ */
diff --git a/freebsd/sys/net/if_dead.c b/freebsd/sys/net/if_dead.c
index b85793f8..e290823c 100644
--- a/freebsd/sys/net/if_dead.c
+++ b/freebsd/sys/net/if_dead.c
@@ -44,7 +44,7 @@ __FBSDID("$FreeBSD$");
 #include <net/if_var.h>
 
 static int
-ifdead_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
+ifdead_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
     struct route *ro)
 {
 
@@ -95,6 +95,13 @@ ifdead_transmit(struct ifnet *ifp, struct mbuf *m)
 	return (ENXIO);
 }
 
+static uint64_t
+ifdead_get_counter(struct ifnet *ifp, ift_counter cnt)
+{
+
+	return (0);
+}
+
 void
 if_dead(struct ifnet *ifp)
 {
@@ -106,4 +113,5 @@ if_dead(struct ifnet *ifp)
 	ifp->if_resolvemulti = ifdead_resolvemulti;
 	ifp->if_qflush = ifdead_qflush;
 	ifp->if_transmit = ifdead_transmit;
+	ifp->if_get_counter = ifdead_get_counter;
 }
diff --git a/freebsd/sys/net/if_disc.c b/freebsd/sys/net/if_disc.c
index 3d4f3159..a2e5a7e8 100644
--- a/freebsd/sys/net/if_disc.c
+++ b/freebsd/sys/net/if_disc.c
@@ -47,10 +47,12 @@
 #include <sys/sockio.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/bpf.h>
+#include <net/vnet.h>
 
 #include <rtems/bsd/local/opt_inet.h>
 #include <rtems/bsd/local/opt_inet6.h>
@@ -61,22 +63,21 @@
 #define DSMTU	65532
 #endif
 
-#define DISCNAME	"disc"
-
 struct disc_softc {
 	struct ifnet *sc_ifp;
 };
 
 static int	discoutput(struct ifnet *, struct mbuf *,
-		    struct sockaddr *, struct route *);
-static void	discrtrequest(int, struct rtentry *, struct rt_addrinfo *);
+		    const struct sockaddr *, struct route *);
 static int	discioctl(struct ifnet *, u_long, caddr_t);
 static int	disc_clone_create(struct if_clone *, int, caddr_t);
 static void	disc_clone_destroy(struct ifnet *);
 
-static MALLOC_DEFINE(M_DISC, DISCNAME, "Discard interface");
+static const char discname[] = "disc";
+static MALLOC_DEFINE(M_DISC, discname, "Discard interface");
 
-IFC_SIMPLE_DECLARE(disc, 0);
+static VNET_DEFINE(struct if_clone *, disc_cloner);
+#define	V_disc_cloner	VNET(disc_cloner)
 
 static int
 disc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
@@ -92,7 +93,7 @@ disc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 	}
 
 	ifp->if_softc = sc;
-	if_initname(ifp, ifc->ifc_name, unit);
+	if_initname(ifp, discname, unit);
 	ifp->if_mtu = DSMTU;
 	/*
 	 * IFF_LOOPBACK should not be removed from disc's flags because
@@ -131,16 +132,32 @@ disc_clone_destroy(struct ifnet *ifp)
 	free(sc, M_DISC);
 }
 
+static void
+vnet_disc_init(const void *unused __unused)
+{
+
+	V_disc_cloner = if_clone_simple(discname, disc_clone_create,
+	    disc_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_disc_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
+    vnet_disc_init, NULL);
+
+static void
+vnet_disc_uninit(const void *unused __unused)
+{
+
+	if_clone_detach(V_disc_cloner);
+}
+VNET_SYSUNINIT(vnet_disc_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
+    vnet_disc_uninit, NULL);
+
 static int
 disc_modevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
-		if_clone_attach(&disc_cloner);
-		break;
 	case MOD_UNLOAD:
-		if_clone_detach(&disc_cloner);
 		break;
 	default:
 		return (EOPNOTSUPP);
@@ -157,7 +174,7 @@ static moduledata_t disc_mod = {
 DECLARE_MODULE(if_disc, disc_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 
 static int
-discoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+discoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	u_int32_t af;
@@ -165,62 +182,47 @@ discoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
 	M_ASSERTPKTHDR(m);
 
 	/* BPF writes need to be handled specially. */
-	if (dst->sa_family == AF_UNSPEC) {
+	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &af, sizeof(af));
-		dst->sa_family = af;
-	}
+	else
+		af = dst->sa_family;
 
-	if (bpf_peers_present(ifp->if_bpf)) {
-		u_int af = dst->sa_family;
+	if (bpf_peers_present(ifp->if_bpf))
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
-	}
+
 	m->m_pkthdr.rcvif = ifp;
 
-	ifp->if_opackets++;
-	ifp->if_obytes += m->m_pkthdr.len;
+	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+	if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
 
 	m_freem(m);
 	return (0);
 }
 
-/* ARGSUSED */
-static void
-discrtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
-{
-	RT_LOCK_ASSERT(rt);
-	rt->rt_rmx.rmx_mtu = DSMTU;
-}
-
 /*
  * Process an ioctl request.
  */
 static int
 discioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
-	struct ifaddr *ifa;
 	struct ifreq *ifr = (struct ifreq *)data;
 	int error = 0;
 
 	switch (cmd) {
-
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
-		ifa = (struct ifaddr *)data;
-		if (ifa != 0)
-			ifa->ifa_rtrequest = discrtrequest;
+
 		/*
 		 * Everything else is done at a higher level.
 		 */
 		break;
-
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
-		if (ifr == 0) {
+		if (ifr == NULL) {
 			error = EAFNOSUPPORT;		/* XXX */
 			break;
 		}
 		switch (ifr->ifr_addr.sa_family) {
-
 #ifdef INET
 		case AF_INET:
 			break;
@@ -229,17 +231,14 @@ discioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		case AF_INET6:
 			break;
 #endif
-
 		default:
 			error = EAFNOSUPPORT;
 			break;
 		}
 		break;
-
 	case SIOCSIFMTU:
 		ifp->if_mtu = ifr->ifr_mtu;
 		break;
-
 	default:
 		error = EINVAL;
 	}
diff --git a/freebsd/sys/net/if_dl.h b/freebsd/sys/net/if_dl.h
index 8d88623d..f53bc5e4 100644
--- a/freebsd/sys/net/if_dl.h
+++ b/freebsd/sys/net/if_dl.h
@@ -67,6 +67,14 @@ struct sockaddr_dl {
 };
 
 #define LLADDR(s) ((caddr_t)((s)->sdl_data + (s)->sdl_nlen))
+#define CLLADDR(s) ((c_caddr_t)((s)->sdl_data + (s)->sdl_nlen))
+#define LLINDEX(s) ((s)->sdl_index)
+
+
+struct ifnet;
+struct sockaddr_dl *link_alloc_sdl(size_t, int);
+void link_free_sdl(struct sockaddr *sa);
+struct sockaddr_dl *link_init_sdl(struct ifnet *, struct sockaddr *, u_char);
 
 #ifndef _KERNEL
 
diff --git a/freebsd/sys/net/if_edsc.c b/freebsd/sys/net/if_edsc.c
index 6bb80fdb..d90f072a 100644
--- a/freebsd/sys/net/if_edsc.c
+++ b/freebsd/sys/net/if_edsc.c
@@ -48,10 +48,14 @@
 
 #include <net/bpf.h>		/* bpf(9) */
 #include <net/ethernet.h>	/* Ethernet related constants and types */
-#include <net/if.h>		/* basic part of ifnet(9) */
+#include <net/if.h>
+#include <net/if_var.h>		/* basic part of ifnet(9) */
 #include <net/if_clone.h>	/* network interface cloning */
 #include <net/if_types.h>	/* IFT_ETHER and friends */
 #include <net/if_var.h>		/* kernel-only part of ifnet(9) */
+#include <net/vnet.h>
+
+static const char edscname[] = "edsc";
 
 /*
  * Software configuration of an interface specific to this device type.
@@ -66,9 +70,10 @@ struct edsc_softc {
 };
 
 /*
- * Simple cloning methods.
- * IFC_SIMPLE_DECLARE() expects precisely these names.
+ * Attach to the interface cloning framework.
  */
+static VNET_DEFINE(struct if_clone *, edsc_cloner);
+#define	V_edsc_cloner	VNET(edsc_cloner)
 static int	edsc_clone_create(struct if_clone *, int, caddr_t);
 static void	edsc_clone_destroy(struct ifnet *);
 
@@ -83,15 +88,7 @@ static void	edsc_start(struct ifnet *ifp);
 /*
  * We'll allocate softc instances from this.
  */
-static		MALLOC_DEFINE(M_EDSC, "edsc", "Ethernet discard interface");
-
-/*
- * Attach to the interface cloning framework under the name of "edsc".
- * The second argument is the number of units to be created from
- * the outset.  It's also the minimum number of units allowed.
- * We don't want any units created as soon as the driver is loaded.
- */
-IFC_SIMPLE_DECLARE(edsc, 0);
+static		MALLOC_DEFINE(M_EDSC, edscname, "Ethernet discard interface");
 
 /*
  * Create an interface instance.
@@ -118,7 +115,7 @@ edsc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 	/*
 	 * Get a name for this particular interface in its ifnet structure.
 	 */
-	if_initname(ifp, ifc->ifc_name, unit);
+	if_initname(ifp, edscname, unit);
 
 	/*
 	 * Typical Ethernet interface flags: we can do broadcast and
@@ -298,8 +295,8 @@ edsc_start(struct ifnet *ifp)
 		/*
 		 * Update the interface counters.
 		 */
-		ifp->if_obytes += m->m_pkthdr.len;
-		ifp->if_opackets++;
+		if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
+		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 
 		/*
 		 * Finally, just drop the packet.
@@ -314,6 +311,36 @@ edsc_start(struct ifnet *ifp)
 	 */
 }
 
+static void
+vnet_edsc_init(const void *unused __unused)
+{
+
+	/*
+	 * Connect to the network interface cloning framework.
+	 * The last argument is the number of units to be created
+	 * from the outset.  It's also the minimum number of units
+	 * allowed.  We don't want any units created as soon as the
+	 * driver is loaded.
+	 */
+	V_edsc_cloner = if_clone_simple(edscname, edsc_clone_create,
+	    edsc_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_edsc_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+    vnet_edsc_init, NULL);
+
+static void
+vnet_edsc_uninit(const void *unused __unused)
+{
+
+	/*
+	 * Disconnect from the cloning framework.
+	 * Existing interfaces will be disposed of properly.
+	 */
+	if_clone_detach(V_edsc_cloner);
+}
+VNET_SYSUNINIT(vnet_edsc_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
+    vnet_edsc_uninit, NULL);
+
 /*
  * This function provides handlers for module events, namely load and unload.
  */
@@ -323,20 +350,8 @@ edsc_modevent(module_t mod, int type, void *data)
 
 	switch (type) {
 	case MOD_LOAD:
-		/*
-		 * Connect to the network interface cloning framework.
-		 */
-		if_clone_attach(&edsc_cloner);
-		break;
-
 	case MOD_UNLOAD:
-		/*
-		 * Disconnect from the cloning framework.
-		 * Existing interfaces will be disposed of properly.
-		 */
-		if_clone_detach(&edsc_cloner);
 		break;
-
 	default:
 		/*
 		 * There are other event types, but we don't handle them.
diff --git a/freebsd/sys/net/if_ef.c b/freebsd/sys/net/if_ef.c
deleted file mode 100644
index 4aa76712..00000000
--- a/freebsd/sys/net/if_ef.c
+++ /dev/null
@@ -1,610 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*-
- * Copyright (c) 1999, 2000 Boris Popov
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#include <rtems/bsd/local/opt_inet.h>
-#include <rtems/bsd/local/opt_ipx.h>
-#include <rtems/bsd/local/opt_ef.h>
-
-#include <rtems/bsd/sys/param.h>
-#include <sys/systm.h>
-#include <sys/sockio.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/socket.h>
-#include <sys/syslog.h>
-#include <sys/kernel.h>
-#include <sys/module.h>
-
-#include <net/ethernet.h>
-#include <net/if_llc.h>
-#include <net/if.h>
-#include <net/if_arp.h>
-#include <net/if_dl.h>
-#include <net/if_types.h>
-#include <net/netisr.h>
-#include <net/bpf.h>
-#include <net/vnet.h>
-
-#ifdef INET
-#include <netinet/in.h>
-#include <netinet/in_var.h>
-#include <netinet/if_ether.h>
-#endif
-
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
-/* If none of the supported layers is enabled explicitly enable them all */
-#if !defined(ETHER_II) && !defined(ETHER_8023) && !defined(ETHER_8022) && \
-    !defined(ETHER_SNAP)
-#define	ETHER_II	1
-#define	ETHER_8023	1
-#define	ETHER_8022	1
-#define	ETHER_SNAP	1
-#endif
-
-/* internal frame types */
-#define ETHER_FT_EII		0	/* Ethernet_II - default */
-#define	ETHER_FT_8023		1	/* 802.3 (Novell) */
-#define	ETHER_FT_8022		2	/* 802.2 */
-#define	ETHER_FT_SNAP		3	/* SNAP */
-#define	EF_NFT			4	/* total number of frame types */
-
-#ifdef EF_DEBUG
-#define EFDEBUG(format, args...) printf("%s: "format, __func__ ,## args)
-#else
-#define EFDEBUG(format, args...)
-#endif
-
-#define EFERROR(format, args...) printf("%s: "format, __func__ ,## args)
-
-struct efnet {
-	struct ifnet	*ef_ifp;
-	struct ifnet	*ef_pifp;
-	int		ef_frametype;
-};
-
-struct ef_link {
-	SLIST_ENTRY(ef_link) el_next;
-	struct ifnet	*el_ifp;		/* raw device for this clones */
-	struct efnet	*el_units[EF_NFT];	/* our clones */
-};
-
-static SLIST_HEAD(ef_link_head, ef_link) efdev = {NULL};
-static int efcount;
-
-extern int (*ef_inputp)(struct ifnet*, struct ether_header *eh, struct mbuf *m);
-extern int (*ef_outputp)(struct ifnet *ifp, struct mbuf **mp,
-		struct sockaddr *dst, short *tp, int *hlen);
-
-/*
-static void ef_reset (struct ifnet *);
-*/
-static int ef_attach(struct efnet *sc);
-static int ef_detach(struct efnet *sc);
-static void ef_init(void *);
-static int ef_ioctl(struct ifnet *, u_long, caddr_t);
-static void ef_start(struct ifnet *);
-static int ef_input(struct ifnet*, struct ether_header *, struct mbuf *);
-static int ef_output(struct ifnet *ifp, struct mbuf **mp,
-		struct sockaddr *dst, short *tp, int *hlen);
-
-static int ef_load(void);
-static int ef_unload(void);
-
-/*
- * Install the interface, most of structure initialization done in ef_clone()
- */
-static int
-ef_attach(struct efnet *sc)
-{
-	struct ifnet *ifp = sc->ef_ifp;
-
-	ifp->if_start = ef_start;
-	ifp->if_init = ef_init;
-	ifp->if_snd.ifq_maxlen = ifqmaxlen;
-	ifp->if_flags = (IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
-	/*
-	 * Attach the interface
-	 */
-	ether_ifattach(ifp, IF_LLADDR(sc->ef_pifp));
-
-	ifp->if_resolvemulti = 0;
-	ifp->if_type = IFT_XETHER;
-	ifp->if_drv_flags |= IFF_DRV_RUNNING;
-
-	EFDEBUG("%s: attached\n", ifp->if_xname);
-	return 1;
-}
-
-/*
- * This is for _testing_only_, just removes interface from interfaces list
- */
-static int
-ef_detach(struct efnet *sc)
-{
-	struct ifnet *ifp = sc->ef_ifp;
-	int s;
-
-	s = splimp();
-
-	ether_ifdetach(ifp);
-	if_free(ifp);
-
-	splx(s);
-	return 0;
-}
-
-static void
-ef_init(void *foo) {
-	return;
-}
-
-static int
-ef_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
-{
-	struct efnet *sc = ifp->if_softc;
-	struct ifaddr *ifa = (struct ifaddr*)data;
-	int s, error;
-
-	EFDEBUG("IOCTL %ld for %s\n", cmd, ifp->if_xname);
-	error = 0;
-	s = splimp();
-	switch (cmd) {
-	    case SIOCSIFFLAGS:
-		error = 0;
-		break;
-	    case SIOCSIFADDR:
-		if (sc->ef_frametype == ETHER_FT_8023 && 
-		    ifa->ifa_addr->sa_family != AF_IPX) {
-			error = EAFNOSUPPORT;
-			break;
-		}
-		ifp->if_flags |= IFF_UP; 
-		/* FALL THROUGH */
-	    default:
-		error = ether_ioctl(ifp, cmd, data);
-		break;
-	}
-	splx(s);
-	return error;
-}
-
-/*
- * Currently packet prepared in the ether_output(), but this can be a better
- * place.
- */
-static void
-ef_start(struct ifnet *ifp)
-{
-	struct efnet *sc = (struct efnet*)ifp->if_softc;
-	struct ifnet *p;
-	struct mbuf *m;
-	int error;
-
-	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
-	p = sc->ef_pifp;
-
-	EFDEBUG("\n");
-	for (;;) {
-		IF_DEQUEUE(&ifp->if_snd, m);
-		if (m == 0)
-			break;
-		BPF_MTAP(ifp, m);
-		error = p->if_transmit(p, m);
-		if (error) {
-			ifp->if_oerrors++;
-			continue;
-		}
-		ifp->if_opackets++;
-	}
-	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
-	return;
-}
-
-/*
- * Inline functions do not put additional overhead to procedure call or
- * parameter passing but simplify the code
- */
-static int __inline
-ef_inputEII(struct mbuf *m, struct ether_header *eh, u_short ether_type)
-{
-	int isr;
-
-	switch(ether_type) {
-#ifdef IPX
-	case ETHERTYPE_IPX:
-		isr = NETISR_IPX;
-		break;
-#endif
-#ifdef INET
-	case ETHERTYPE_IP:
-		if ((m = ip_fastforward(m)) == NULL)
-			return (0);
-		isr = NETISR_IP;
-		break;
-
-	case ETHERTYPE_ARP:
-		isr = NETISR_ARP;
-		break;
-#endif
-	default:
-		return (EPROTONOSUPPORT);
-	}
-	netisr_dispatch(isr, m);
-	return (0);
-}
-
-static int __inline
-ef_inputSNAP(struct mbuf *m, struct ether_header *eh, struct llc* l,
-	u_short ether_type)
-{
-	int isr;
-
-	switch(ether_type) {
-#ifdef IPX
-	case ETHERTYPE_IPX:
-		m_adj(m, 8);
-		isr = NETISR_IPX;
-		break;
-#endif
-	default:
-		return (EPROTONOSUPPORT);
-	}
-	netisr_dispatch(isr, m);
-	return (0);
-}
-
-static int __inline
-ef_input8022(struct mbuf *m, struct ether_header *eh, struct llc* l,
-	u_short ether_type)
-{
-	int isr;
-
-	switch(ether_type) {
-#ifdef IPX
-	case 0xe0:
-		m_adj(m, 3);
-		isr = NETISR_IPX;
-		break;
-#endif
-	default:
-		return (EPROTONOSUPPORT);
-	}
-	netisr_dispatch(isr, m);
-	return (0);
-}
-
-/*
- * Called from ether_input()
- */
-static int
-ef_input(struct ifnet *ifp, struct ether_header *eh, struct mbuf *m)
-{
-	u_short ether_type;
-	int ft = -1;
-	struct efnet *efp;
-	struct ifnet *eifp;
-	struct llc *l;
-	struct ef_link *efl;
-	int isr;
-
-	ether_type = ntohs(eh->ether_type);
-	l = NULL;
-	if (ether_type < ETHERMTU) {
-		l = mtod(m, struct llc*);
-		if (l->llc_dsap == 0xff && l->llc_ssap == 0xff) {
-			/* 
-			 * Novell's "802.3" frame
-			 */
-			ft = ETHER_FT_8023;
-		} else if (l->llc_dsap == 0xaa && l->llc_ssap == 0xaa) {
-			/*
-			 * 802.2/SNAP
-			 */
-			ft = ETHER_FT_SNAP;
-			ether_type = ntohs(l->llc_un.type_snap.ether_type);
-		} else if (l->llc_dsap == l->llc_ssap) {
-			/*
-			 * 802.3/802.2
-			 */
-			ft = ETHER_FT_8022;
-			ether_type = l->llc_ssap;
-		}
-	} else
-		ft = ETHER_FT_EII;
-
-	if (ft == -1) {
-		EFDEBUG("Unrecognised ether_type %x\n", ether_type);
-		return EPROTONOSUPPORT;
-	}
-
-	/*
-	 * Check if interface configured for the given frame
-	 */
-	efp = NULL;
-	SLIST_FOREACH(efl, &efdev, el_next) {
-		if (efl->el_ifp == ifp) {
-			efp = efl->el_units[ft];
-			break;
-		}
-	}
-	if (efp == NULL) {
-		EFDEBUG("Can't find if for %d\n", ft);
-		return EPROTONOSUPPORT;
-	}
-	eifp = efp->ef_ifp;
-	if ((eifp->if_flags & IFF_UP) == 0)
-		return EPROTONOSUPPORT;
-	eifp->if_ibytes += m->m_pkthdr.len + sizeof (*eh);
-	m->m_pkthdr.rcvif = eifp;
-
-	BPF_MTAP2(eifp, eh, ETHER_HDR_LEN, m);
-	/*
-	 * Now we ready to adjust mbufs and pass them to protocol intr's
-	 */
-	switch(ft) {
-	case ETHER_FT_EII:
-		return (ef_inputEII(m, eh, ether_type));
-#ifdef IPX
-	case ETHER_FT_8023:		/* only IPX can be here */
-		isr = NETISR_IPX;
-		break;
-#endif
-	case ETHER_FT_SNAP:
-		return (ef_inputSNAP(m, eh, l, ether_type));
-	case ETHER_FT_8022:
-		return (ef_input8022(m, eh, l, ether_type));
-	default:
-		EFDEBUG("No support for frame %d and proto %04x\n",
-			ft, ether_type);
-		return (EPROTONOSUPPORT);
-	}
-	netisr_dispatch(isr, m);
-	return (0);
-}
-
-static int
-ef_output(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst, short *tp,
-	int *hlen)
-{
-	struct efnet *sc = (struct efnet*)ifp->if_softc;
-	struct mbuf *m = *mp;
-	u_char *cp;
-	short type;
-
-	if (ifp->if_type != IFT_XETHER)
-		return ENETDOWN;
-	switch (sc->ef_frametype) {
-	    case ETHER_FT_EII:
-#ifdef IPX
-		type = htons(ETHERTYPE_IPX);
-#else
-		return EPFNOSUPPORT;
-#endif
-		break;
-	    case ETHER_FT_8023:
-		type = htons(m->m_pkthdr.len);
-		break;
-	    case ETHER_FT_8022:
-		M_PREPEND(m, ETHER_HDR_LEN + 3, M_WAIT);
-		/*
-		 * Ensure that ethernet header and next three bytes
-		 * will fit into single mbuf
-		 */
-		m = m_pullup(m, ETHER_HDR_LEN + 3);
-		if (m == NULL) {
-			*mp = NULL;
-			return ENOBUFS;
-		}
-		m_adj(m, ETHER_HDR_LEN);
-		type = htons(m->m_pkthdr.len);
-		cp = mtod(m, u_char *);
-		*cp++ = 0xE0;
-		*cp++ = 0xE0;
-		*cp++ = 0x03;
-		*hlen += 3;
-		break;
-	    case ETHER_FT_SNAP:
-		M_PREPEND(m, 8, M_WAIT);
-		type = htons(m->m_pkthdr.len);
-		cp = mtod(m, u_char *);
-		bcopy("\xAA\xAA\x03\x00\x00\x00\x81\x37", cp, 8);
-		*hlen += 8;
-		break;
-	    default:
-		return EPFNOSUPPORT;
-	}
-	*mp = m;
-	*tp = type;
-	return 0;
-}
-
-/*
- * Create clone from the given interface
- */
-static int
-ef_clone(struct ef_link *efl, int ft)
-{
-	struct efnet *efp;
-	struct ifnet *eifp;
-	struct ifnet *ifp = efl->el_ifp;
-
-	efp = (struct efnet*)malloc(sizeof(struct efnet), M_IFADDR,
-	    M_WAITOK | M_ZERO);
-	if (efp == NULL)
-		return ENOMEM;
-	efp->ef_pifp = ifp;
-	efp->ef_frametype = ft;
-	eifp = efp->ef_ifp = if_alloc(IFT_ETHER);
-	if (eifp == NULL) {
-		free(efp, M_IFADDR);
-		return (ENOSPC);
-	}
-	snprintf(eifp->if_xname, IFNAMSIZ,
-	    "%sf%d", ifp->if_xname, efp->ef_frametype);
-	eifp->if_dname = "ef";
-	eifp->if_dunit = IF_DUNIT_NONE;
-	eifp->if_softc = efp;
-	if (ifp->if_ioctl)
-		eifp->if_ioctl = ef_ioctl;
-	efl->el_units[ft] = efp;
-	return 0;
-}
-
-static int
-ef_load(void)
-{
-	VNET_ITERATOR_DECL(vnet_iter);
-	struct ifnet *ifp;
-	struct efnet *efp;
-	struct ef_link *efl = NULL, *efl_temp;
-	int error = 0, d;
-
-	VNET_LIST_RLOCK();
-	VNET_FOREACH(vnet_iter) {
-		CURVNET_SET(vnet_iter);
-
-		/*
-		 * XXXRW: The following loop walks the ifnet list while
-		 * modifying it, something not well-supported by ifnet
-		 * locking.  To avoid lock upgrade/recursion issues, manually
-		 * acquire a write lock of ifnet_sxlock here, rather than a
-		 * read lock, so that when if_alloc() recurses the lock, we
-		 * don't panic.  This structure, in which if_ef automatically
-		 * attaches to all ethernet interfaces, should be replaced
-		 * with a model like that found in if_vlan, in which
-		 * interfaces are explicitly configured, which would avoid
-		 * this (and other) problems.
-		 */
-		sx_xlock(&ifnet_sxlock);
-		TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
-			if (ifp->if_type != IFT_ETHER) continue;
-			EFDEBUG("Found interface %s\n", ifp->if_xname);
-			efl = (struct ef_link*)malloc(sizeof(struct ef_link), 
-			    M_IFADDR, M_WAITOK | M_ZERO);
-			if (efl == NULL) {
-				error = ENOMEM;
-				break;
-			}
-
-			efl->el_ifp = ifp;
-#ifdef ETHER_II
-			error = ef_clone(efl, ETHER_FT_EII);
-			if (error) break;
-#endif
-#ifdef ETHER_8023
-			error = ef_clone(efl, ETHER_FT_8023);
-			if (error) break;
-#endif
-#ifdef ETHER_8022
-			error = ef_clone(efl, ETHER_FT_8022);
-			if (error) break;
-#endif
-#ifdef ETHER_SNAP
-			error = ef_clone(efl, ETHER_FT_SNAP);
-			if (error) break;
-#endif
-			efcount++;
-			SLIST_INSERT_HEAD(&efdev, efl, el_next);
-		}
-		sx_xunlock(&ifnet_sxlock);
-		CURVNET_RESTORE();
-	}
-	VNET_LIST_RUNLOCK();
-	if (error) {
-		if (efl)
-			SLIST_INSERT_HEAD(&efdev, efl, el_next);
-		SLIST_FOREACH_SAFE(efl, &efdev, el_next, efl_temp) {
-			for (d = 0; d < EF_NFT; d++)
-				if (efl->el_units[d]) {
-					if (efl->el_units[d]->ef_pifp != NULL)
-						if_free(efl->el_units[d]->ef_pifp);
-					free(efl->el_units[d], M_IFADDR);
-				}
-			free(efl, M_IFADDR);
-		}
-		return error;
-	}
-	SLIST_FOREACH(efl, &efdev, el_next) {
-		for (d = 0; d < EF_NFT; d++) {
-			efp = efl->el_units[d];
-			if (efp)
-				ef_attach(efp);
-		}
-	}
-	ef_inputp = ef_input;
-	ef_outputp = ef_output;
-	EFDEBUG("Loaded\n");
-	return 0;
-}
-
-static int
-ef_unload(void)
-{
-	struct efnet *efp;
-	struct ef_link *efl;
-	int d;
-
-	ef_inputp = NULL;
-	ef_outputp = NULL;
-	SLIST_FOREACH(efl, &efdev, el_next) {
-		for (d = 0; d < EF_NFT; d++) {
-			efp = efl->el_units[d];
-			if (efp) {
-				ef_detach(efp);
-			}
-		}
-	}
-	EFDEBUG("Unloaded\n");
-	return 0;
-}
-
-static int 
-if_ef_modevent(module_t mod, int type, void *data)
-{
-	switch ((modeventtype_t)type) {
-	    case MOD_LOAD:
-		return ef_load();
-	    case MOD_UNLOAD:
-		return ef_unload();
-	    default:
-		return EOPNOTSUPP;
-	}
-	return 0;
-}
-
-static moduledata_t if_ef_mod = {
-	"if_ef", if_ef_modevent, NULL
-};
-
-DECLARE_MODULE(if_ef, if_ef_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE);
diff --git a/freebsd/sys/net/if_enc.c b/freebsd/sys/net/if_enc.c
index 91d34722..d0d065b8 100644
--- a/freebsd/sys/net/if_enc.c
+++ b/freebsd/sys/net/if_enc.c
@@ -2,6 +2,7 @@
 
 /*-
  * Copyright (c) 2006 The FreeBSD Project.
+ * Copyright (c) 2015 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -31,10 +32,10 @@
 
 #include <rtems/bsd/local/opt_inet.h>
 #include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_enc.h>
 
 #include <rtems/bsd/sys/param.h>
 #include <sys/systm.h>
+#include <sys/hhook.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
@@ -46,6 +47,8 @@
 #include <sys/sysctl.h>
 
 #include <net/if.h>
+#include <net/if_enc.h>
+#include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/pfil.h>
@@ -80,56 +83,66 @@ struct enchdr {
 	u_int32_t spi;
 	u_int32_t flags;
 };
-
-struct ifnet	*encif;
-static struct mtx	enc_mtx;
-
 struct enc_softc {
 	struct	ifnet *sc_ifp;
 };
+static VNET_DEFINE(struct enc_softc *, enc_sc);
+#define	V_enc_sc	VNET(enc_sc)
+static VNET_DEFINE(struct if_clone *, enc_cloner);
+#define	V_enc_cloner	VNET(enc_cloner)
 
 static int	enc_ioctl(struct ifnet *, u_long, caddr_t);
-static int	enc_output(struct ifnet *ifp, struct mbuf *m,
-		    struct sockaddr *dst, struct route *ro);
+static int	enc_output(struct ifnet *, struct mbuf *,
+    const struct sockaddr *, struct route *);
 static int	enc_clone_create(struct if_clone *, int, caddr_t);
 static void	enc_clone_destroy(struct ifnet *);
+static int	enc_add_hhooks(struct enc_softc *);
+static void	enc_remove_hhooks(struct enc_softc *);
 
-IFC_SIMPLE_DECLARE(enc, 1);
-
-/*
- * Sysctls.
- */
+static const char encname[] = "enc";
 
 /*
  * Before and after are relative to when we are stripping the
  * outer IP header.
  */
-static SYSCTL_NODE(_net, OID_AUTO, enc, CTLFLAG_RW, 0, "enc sysctl");
+static VNET_DEFINE(int, filter_mask_in) = IPSEC_ENC_BEFORE;
+static VNET_DEFINE(int, bpf_mask_in) = IPSEC_ENC_BEFORE;
+static VNET_DEFINE(int, filter_mask_out) = IPSEC_ENC_BEFORE;
+static VNET_DEFINE(int, bpf_mask_out) = IPSEC_ENC_BEFORE | IPSEC_ENC_AFTER;
+#define	V_filter_mask_in	VNET(filter_mask_in)
+#define	V_bpf_mask_in		VNET(bpf_mask_in)
+#define	V_filter_mask_out	VNET(filter_mask_out)
+#define	V_bpf_mask_out		VNET(bpf_mask_out)
 
+static SYSCTL_NODE(_net, OID_AUTO, enc, CTLFLAG_RW, 0, "enc sysctl");
 static SYSCTL_NODE(_net_enc, OID_AUTO, in, CTLFLAG_RW, 0, "enc input sysctl");
-static int ipsec_filter_mask_in = ENC_BEFORE;
-SYSCTL_INT(_net_enc_in, OID_AUTO, ipsec_filter_mask, CTLFLAG_RW,
-	&ipsec_filter_mask_in, 0, "IPsec input firewall filter mask");
-static int ipsec_bpf_mask_in = ENC_BEFORE;
-SYSCTL_INT(_net_enc_in, OID_AUTO, ipsec_bpf_mask, CTLFLAG_RW,
-	&ipsec_bpf_mask_in, 0, "IPsec input bpf mask");
-
 static SYSCTL_NODE(_net_enc, OID_AUTO, out, CTLFLAG_RW, 0, "enc output sysctl");
-static int ipsec_filter_mask_out = ENC_BEFORE;
-SYSCTL_INT(_net_enc_out, OID_AUTO, ipsec_filter_mask, CTLFLAG_RW,
-	&ipsec_filter_mask_out, 0, "IPsec output firewall filter mask");
-static int ipsec_bpf_mask_out = ENC_BEFORE|ENC_AFTER;
-SYSCTL_INT(_net_enc_out, OID_AUTO, ipsec_bpf_mask, CTLFLAG_RW,
-	&ipsec_bpf_mask_out, 0, "IPsec output bpf mask");
+SYSCTL_INT(_net_enc_in, OID_AUTO, ipsec_filter_mask,
+    CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(filter_mask_in), 0,
+    "IPsec input firewall filter mask");
+SYSCTL_INT(_net_enc_in, OID_AUTO, ipsec_bpf_mask,
+    CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(bpf_mask_in), 0,
+    "IPsec input bpf mask");
+SYSCTL_INT(_net_enc_out, OID_AUTO, ipsec_filter_mask,
+    CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(filter_mask_out), 0,
+    "IPsec output firewall filter mask");
+SYSCTL_INT(_net_enc_out, OID_AUTO, ipsec_bpf_mask,
+    CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(bpf_mask_out), 0,
+    "IPsec output bpf mask");
 
 static void
 enc_clone_destroy(struct ifnet *ifp)
 {
-	KASSERT(ifp != encif, ("%s: destroying encif", __func__));
+	struct enc_softc *sc;
+
+	sc = ifp->if_softc;
+	KASSERT(sc == V_enc_sc, ("sc != ifp->if_softc"));
 
 	bpfdetach(ifp);
 	if_detach(ifp);
 	if_free(ifp);
+	free(sc, M_DEVBUF);
+	V_enc_sc = NULL;
 }
 
 static int
@@ -138,244 +151,277 @@ enc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 	struct ifnet *ifp;
 	struct enc_softc *sc;
 
-	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
+	sc = malloc(sizeof(struct enc_softc), M_DEVBUF,
+	    M_WAITOK | M_ZERO);
 	ifp = sc->sc_ifp = if_alloc(IFT_ENC);
 	if (ifp == NULL) {
 		free(sc, M_DEVBUF);
 		return (ENOSPC);
 	}
-
-	if_initname(ifp, ifc->ifc_name, unit);
+	if (V_enc_sc != NULL) {
+		if_free(ifp);
+		free(sc, M_DEVBUF);
+		return (EEXIST);
+	}
+	V_enc_sc = sc;
+	if_initname(ifp, encname, unit);
 	ifp->if_mtu = ENCMTU;
 	ifp->if_ioctl = enc_ioctl;
 	ifp->if_output = enc_output;
-	ifp->if_snd.ifq_maxlen = ifqmaxlen;
 	ifp->if_softc = sc;
 	if_attach(ifp);
 	bpfattach(ifp, DLT_ENC, sizeof(struct enchdr));
-
-	mtx_lock(&enc_mtx);
-	/* grab a pointer to enc0, ignore the rest */
-	if (encif == NULL)
-		encif = ifp;
-	mtx_unlock(&enc_mtx);
-
 	return (0);
 }
 
 static int
-enc_modevent(module_t mod, int type, void *data)
+enc_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+    struct route *ro)
 {
-	switch (type) {
-	case MOD_LOAD:
-		mtx_init(&enc_mtx, "enc mtx", NULL, MTX_DEF);
-		if_clone_attach(&enc_cloner);
-		break;
-	case MOD_UNLOAD:
-		printf("enc module unload - not possible for this module\n");
-		return (EINVAL);
-	default:
-		return (EOPNOTSUPP);
-	}
+
+	m_freem(m);
 	return (0);
 }
 
-static moduledata_t enc_mod = {
-	"if_enc",
-	enc_modevent,
-	0
-};
-
-DECLARE_MODULE(if_enc, enc_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
-
 static int
-enc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
-    struct route *ro)
+enc_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
-	m_freem(m);
+
+	if (cmd != SIOCSIFFLAGS)
+		return (EINVAL);
+	if (ifp->if_flags & IFF_UP)
+		ifp->if_drv_flags |= IFF_DRV_RUNNING;
+	else
+		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	return (0);
 }
 
 /*
- * Process an ioctl request.
+ * One helper hook function is used by any hook points.
+ * + from hhook_type we can determine the packet direction:
+ *   HHOOK_TYPE_IPSEC_IN or HHOOK_TYPE_IPSEC_OUT;
+ * + from hhook_id we can determine address family: AF_INET or AF_INET6;
+ * + udata contains pointer to enc_softc;
+ * + ctx_data contains pointer to struct ipsec_ctx_data.
  */
-/* ARGSUSED */
 static int
-enc_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+enc_hhook(int32_t hhook_type, int32_t hhook_id, void *udata, void *ctx_data,
+    void *hdata, struct osd *hosd)
 {
-	int error = 0;
-
-	mtx_lock(&enc_mtx);
+	struct enchdr hdr;
+	struct ipsec_ctx_data *ctx;
+	struct enc_softc *sc;
+	struct ifnet *ifp, *rcvif;
+	struct pfil_head *ph;
+	int pdir;
 
-	switch (cmd) {
+	sc = (struct enc_softc *)udata;
+	ifp = sc->sc_ifp;
+	if ((ifp->if_flags & IFF_UP) == 0)
+		return (0);
 
-	case SIOCSIFFLAGS:
-		if (ifp->if_flags & IFF_UP)
-			ifp->if_drv_flags |= IFF_DRV_RUNNING;
-		else
-			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+	ctx = (struct ipsec_ctx_data *)ctx_data;
+	/* XXX: wrong hook point was used by caller? */
+	if (ctx->af != hhook_id)
+		return (EPFNOSUPPORT);
+
+	if (((hhook_type == HHOOK_TYPE_IPSEC_IN &&
+	    (ctx->enc & V_bpf_mask_in) != 0) ||
+	    (hhook_type == HHOOK_TYPE_IPSEC_OUT &&
+	    (ctx->enc & V_bpf_mask_out) != 0)) &&
+	    bpf_peers_present(ifp->if_bpf) != 0) {
+		hdr.af = ctx->af;
+		hdr.spi = ctx->sav->spi;
+		hdr.flags = 0;
+		if (ctx->sav->alg_enc != SADB_EALG_NONE)
+			hdr.flags |= M_CONF;
+		if (ctx->sav->alg_auth != SADB_AALG_NONE)
+			hdr.flags |= M_AUTH;
+		bpf_mtap2(ifp->if_bpf, &hdr, sizeof(hdr), *ctx->mp);
+	}
 
+	switch (hhook_type) {
+	case HHOOK_TYPE_IPSEC_IN:
+		if (ctx->enc == IPSEC_ENC_BEFORE) {
+			/* Do accounting only once */
+			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+			if_inc_counter(ifp, IFCOUNTER_IBYTES,
+			    (*ctx->mp)->m_pkthdr.len);
+		}
+		if ((ctx->enc & V_filter_mask_in) == 0)
+			return (0); /* skip pfil processing */
+		pdir = PFIL_IN;
+		break;
+	case HHOOK_TYPE_IPSEC_OUT:
+		if (ctx->enc == IPSEC_ENC_BEFORE) {
+			/* Do accounting only once */
+			if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+			if_inc_counter(ifp, IFCOUNTER_OBYTES,
+			    (*ctx->mp)->m_pkthdr.len);
+		}
+		if ((ctx->enc & V_filter_mask_out) == 0)
+			return (0); /* skip pfil processing */
+		pdir = PFIL_OUT;
 		break;
-
 	default:
-		error = EINVAL;
+		return (EINVAL);
 	}
 
-	mtx_unlock(&enc_mtx);
-	return (error);
+	switch (hhook_id) {
+#ifdef INET
+	case AF_INET:
+		ph = &V_inet_pfil_hook;
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		ph = &V_inet6_pfil_hook;
+		break;
+#endif
+	default:
+		ph = NULL;
+	}
+	if (ph == NULL || !PFIL_HOOKED(ph))
+		return (0);
+	/* Make a packet looks like it was received on enc(4) */
+	rcvif = (*ctx->mp)->m_pkthdr.rcvif;
+	(*ctx->mp)->m_pkthdr.rcvif = ifp;
+	if (pfil_run_hooks(ph, ctx->mp, ifp, pdir, NULL) != 0 ||
+	    *ctx->mp == NULL) {
+		*ctx->mp = NULL; /* consumed by filter */
+		return (EACCES);
+	}
+	(*ctx->mp)->m_pkthdr.rcvif = rcvif;
+	return (0);
 }
 
-int
-ipsec_filter(struct mbuf **mp, int dir, int flags)
+static int
+enc_add_hhooks(struct enc_softc *sc)
 {
-	int error, i;
-	struct ip *ip;
-
-	KASSERT(encif != NULL, ("%s: encif is null", __func__));
-	KASSERT(flags & (ENC_IN|ENC_OUT),
-		("%s: invalid flags: %04x", __func__, flags));
-
-	if ((encif->if_drv_flags & IFF_DRV_RUNNING) == 0)
-		return (0);
+	struct hookinfo hki;
+	int error;
 
-	if (flags & ENC_IN) {
-		if ((flags & ipsec_filter_mask_in) == 0)
-			return (0);
-	} else {
-		if ((flags & ipsec_filter_mask_out) == 0)
-			return (0);
-	}
-
-	/* Skip pfil(9) if no filters are loaded */
-	if (1
+	error = EPFNOSUPPORT;
+	hki.hook_func = enc_hhook;
+	hki.hook_helper = NULL;
+	hki.hook_udata = sc;
 #ifdef INET
-	    && !PFIL_HOOKED(&V_inet_pfil_hook)
+	hki.hook_id = AF_INET;
+	hki.hook_type = HHOOK_TYPE_IPSEC_IN;
+	error = hhook_add_hook(V_ipsec_hhh_in[HHOOK_IPSEC_INET],
+	    &hki, HHOOK_WAITOK);
+	if (error != 0)
+		return (error);
+	hki.hook_type = HHOOK_TYPE_IPSEC_OUT;
+	error = hhook_add_hook(V_ipsec_hhh_out[HHOOK_IPSEC_INET],
+	    &hki, HHOOK_WAITOK);
+	if (error != 0)
+		return (error);
 #endif
 #ifdef INET6
-	    && !PFIL_HOOKED(&V_inet6_pfil_hook)
+	hki.hook_id = AF_INET6;
+	hki.hook_type = HHOOK_TYPE_IPSEC_IN;
+	error = hhook_add_hook(V_ipsec_hhh_in[HHOOK_IPSEC_INET6],
+	    &hki, HHOOK_WAITOK);
+	if (error != 0)
+		return (error);
+	hki.hook_type = HHOOK_TYPE_IPSEC_OUT;
+	error = hhook_add_hook(V_ipsec_hhh_out[HHOOK_IPSEC_INET6],
+	    &hki, HHOOK_WAITOK);
+	if (error != 0)
+		return (error);
 #endif
-	    ) {
-		return (0);
-	}
+	return (error);
+}
 
-	i = min((*mp)->m_pkthdr.len, max_protohdr);
-	if ((*mp)->m_len < i) {
-		*mp = m_pullup(*mp, i);
-		if (*mp == NULL) {
-			printf("%s: m_pullup failed\n", __func__);
-			return (-1);
-		}
-	}
+static void
+enc_remove_hhooks(struct enc_softc *sc)
+{
+	struct hookinfo hki;
 
-	error = 0;
-	ip = mtod(*mp, struct ip *);
-	switch (ip->ip_v) {
+	hki.hook_func = enc_hhook;
+	hki.hook_helper = NULL;
+	hki.hook_udata = sc;
 #ifdef INET
-		case 4:
-			/*
-			 * before calling the firewall, swap fields the same as
-			 * IP does. here we assume the header is contiguous
-			 */
-			ip->ip_len = ntohs(ip->ip_len);
-			ip->ip_off = ntohs(ip->ip_off);
-
-			error = pfil_run_hooks(&V_inet_pfil_hook, mp,
-			    encif, dir, NULL);
-
-			if (*mp == NULL || error != 0)
-				break;
-
-			/* restore byte ordering */
-			ip = mtod(*mp, struct ip *);
-			ip->ip_len = htons(ip->ip_len);
-			ip->ip_off = htons(ip->ip_off);
-			break;
+	hki.hook_id = AF_INET;
+	hki.hook_type = HHOOK_TYPE_IPSEC_IN;
+	hhook_remove_hook(V_ipsec_hhh_in[HHOOK_IPSEC_INET], &hki);
+	hki.hook_type = HHOOK_TYPE_IPSEC_OUT;
+	hhook_remove_hook(V_ipsec_hhh_out[HHOOK_IPSEC_INET], &hki);
 #endif
 #ifdef INET6
-		case 6:
-			error = pfil_run_hooks(&V_inet6_pfil_hook, mp,
-			    encif, dir, NULL);
-			break;
+	hki.hook_id = AF_INET6;
+	hki.hook_type = HHOOK_TYPE_IPSEC_IN;
+	hhook_remove_hook(V_ipsec_hhh_in[HHOOK_IPSEC_INET6], &hki);
+	hki.hook_type = HHOOK_TYPE_IPSEC_OUT;
+	hhook_remove_hook(V_ipsec_hhh_out[HHOOK_IPSEC_INET6], &hki);
 #endif
-		default:
-			printf("%s: unknown IP version\n", __func__);
-	}
+}
 
-	/*
-	 * If the mbuf was consumed by the filter for requeueing (dummynet, etc)
-	 * then error will be zero but we still want to return an error to our
-	 * caller so the null mbuf isn't forwarded further.
-	 */
-	if (*mp == NULL && error == 0)
-		return (-1);	/* Consumed by the filter */
-	if (*mp == NULL)
-		return (error);
-	if (error != 0)
-		goto bad;
+static void
+vnet_enc_init(const void *unused __unused)
+{
 
-	return (error);
+	V_enc_sc = NULL;
+	V_enc_cloner = if_clone_simple(encname, enc_clone_create,
+	    enc_clone_destroy, 1);
+}
+VNET_SYSINIT(vnet_enc_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
+    vnet_enc_init, NULL);
 
-bad:
-	m_freem(*mp);
-	*mp = NULL;
-	return (error);
+static void
+vnet_enc_init_proto(void *unused __unused)
+{
+	KASSERT(V_enc_sc != NULL, ("%s: V_enc_sc is %p\n", __func__, V_enc_sc));
+
+	if (enc_add_hhooks(V_enc_sc) != 0)
+		enc_clone_destroy(V_enc_sc->sc_ifp);
 }
+VNET_SYSINIT(vnet_enc_init_proto, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+    vnet_enc_init_proto, NULL);
 
-void
-ipsec_bpf(struct mbuf *m, struct secasvar *sav, int af, int flags)
+static void
+vnet_enc_uninit(const void *unused __unused)
 {
-	int mflags;
-	struct enchdr hdr;
+	KASSERT(V_enc_sc != NULL, ("%s: V_enc_sc is %p\n", __func__, V_enc_sc));
 
-	KASSERT(encif != NULL, ("%s: encif is null", __func__));
-	KASSERT(flags & (ENC_IN|ENC_OUT),
-		("%s: invalid flags: %04x", __func__, flags));
+	if_clone_detach(V_enc_cloner);
+}
+VNET_SYSUNINIT(vnet_enc_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
+    vnet_enc_uninit, NULL);
 
-	if ((encif->if_drv_flags & IFF_DRV_RUNNING) == 0)
-		return;
+/*
+ * The hhook consumer needs to go before ip[6]_destroy are called on
+ * SI_ORDER_THIRD.
+ */
+static void
+vnet_enc_uninit_hhook(const void *unused __unused)
+{
+	KASSERT(V_enc_sc != NULL, ("%s: V_enc_sc is %p\n", __func__, V_enc_sc));
 
-	if (flags & ENC_IN) {
-		if ((flags & ipsec_bpf_mask_in) == 0)
-			return;
-	} else {
-		if ((flags & ipsec_bpf_mask_out) == 0)
-			return;
-	}
+	enc_remove_hhooks(V_enc_sc);
+}
+VNET_SYSUNINIT(vnet_enc_uninit_hhook, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
+    vnet_enc_uninit_hhook, NULL);
 
-	if (bpf_peers_present(encif->if_bpf)) {
-		mflags = 0;
-		hdr.spi = 0;
-		if (!sav) {
-			struct m_tag *mtag;
-			mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
-			if (mtag != NULL) {
-				struct tdb_ident *tdbi;
-				tdbi = (struct tdb_ident *) (mtag + 1);
-				if (tdbi->alg_enc != SADB_EALG_NONE)
-					mflags |= M_CONF;
-				if (tdbi->alg_auth != SADB_AALG_NONE)
-					mflags |= M_AUTH;
-				hdr.spi = tdbi->spi;
-			}
-		} else {
-			if (sav->alg_enc != SADB_EALG_NONE)
-				mflags |= M_CONF;
-			if (sav->alg_auth != SADB_AALG_NONE)
-				mflags |= M_AUTH;
-			hdr.spi = sav->spi;
-		}
+static int
+enc_modevent(module_t mod, int type, void *data)
+{
 
-		/*
-		 * We need to prepend the address family as a four byte
-		 * field.  Cons up a dummy header to pacify bpf.  This
-		 * is safe because bpf will only read from the mbuf
-		 * (i.e., it won't try to free it or keep a pointer a
-		 * to it).
-		 */
-		hdr.af = af;
-		/* hdr.spi already set above */
-		hdr.flags = mflags;
-
-		bpf_mtap2(encif->if_bpf, &hdr, sizeof(hdr), m);
+	switch (type) {
+	case MOD_LOAD:
+	case MOD_UNLOAD:
+		break;
+	default:
+		return (EOPNOTSUPP);
 	}
+	return (0);
 }
+
+static moduledata_t enc_mod = {
+	"if_enc",
+	enc_modevent,
+	0
+};
+
+DECLARE_MODULE(if_enc, enc_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
diff --git a/freebsd/sys/net/if_enc.h b/freebsd/sys/net/if_enc.h
index 59a55fcf..941ed12a 100644
--- a/freebsd/sys/net/if_enc.h
+++ b/freebsd/sys/net/if_enc.h
@@ -30,6 +30,13 @@
 #ifndef _NET_IF_ENC_H
 #define _NET_IF_ENC_H
 
-extern struct ifnet	*encif;
+struct ipsec_ctx_data {
+	struct mbuf	**mp;
+	struct secasvar	*sav;
+	uint8_t		af;
+#define	IPSEC_ENC_BEFORE	0x01
+#define	IPSEC_ENC_AFTER		0x02
+	uint8_t		enc;
+};
 
 #endif /* _NET_IF_ENC_H */
diff --git a/freebsd/sys/net/if_epair.c b/freebsd/sys/net/if_epair.c
index 755e608a..b4f73d68 100644
--- a/freebsd/sys/net/if_epair.c
+++ b/freebsd/sys/net/if_epair.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
 
 #include <rtems/bsd/sys/param.h>
 #include <sys/kernel.h>
+#include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/refcount.h>
@@ -67,6 +68,7 @@ __FBSDID("$FreeBSD$");
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_media.h>
 #include <net/if_var.h>
@@ -74,8 +76,6 @@ __FBSDID("$FreeBSD$");
 #include <net/netisr.h>
 #include <net/vnet.h>
 
-#define	EPAIRNAME	"epair"
-
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl");
 
@@ -102,9 +102,11 @@ static int epair_clone_match(struct if_clone *, const char *);
 static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t);
 static int epair_clone_destroy(struct if_clone *, struct ifnet *);
 
-/* Netisr realted definitions and sysctl. */
+static const char epairname[] = "epair";
+
+/* Netisr related definitions and sysctl. */
 static struct netisr_handler epair_nh = {
-	.nh_name	= EPAIRNAME,
+	.nh_name	= epairname,
 	.nh_proto	= NETISR_EPAIR,
 	.nh_policy	= NETISR_POLICY_CPU,
 	.nh_handler	= epair_nh_sintr,
@@ -170,12 +172,11 @@ STAILQ_HEAD(eid_list, epair_ifp_drain);
 #define	EPAIR_REFCOUNT_ASSERT(a, p)
 #endif
 
-static MALLOC_DEFINE(M_EPAIR, EPAIRNAME,
+static MALLOC_DEFINE(M_EPAIR, epairname,
     "Pair of virtual cross-over connected Ethernet-like interfaces");
 
-static struct if_clone epair_cloner = IFC_CLONE_INITIALIZER(
-    EPAIRNAME, NULL, IF_MAXUNIT,
-    NULL, epair_clone_match, epair_clone_create, epair_clone_destroy);
+static VNET_DEFINE(struct if_clone *, epair_cloner);
+#define	V_epair_cloner	VNET(epair_cloner)
 
 /*
  * DPCPU area and functions.
@@ -421,7 +422,7 @@ epair_start_locked(struct ifnet *ifp)
 		 */
 		if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 		    (oifp->if_flags & IFF_UP) ==0) {
-			ifp->if_oerrors++;
+			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			m_freem(m);
 			continue;
 		}
@@ -437,15 +438,15 @@ epair_start_locked(struct ifnet *ifp)
 		error = netisr_queue(NETISR_EPAIR, m);
 		CURVNET_RESTORE();
 		if (!error) {
-			ifp->if_opackets++;
+			if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 			/* Someone else received the packet. */
-			oifp->if_ipackets++;
+			if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1);
 		} else {
 			/* The packet was freed already. */
 			epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE;
 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 			(void) epair_add_ifp_for_draining(ifp);
-			ifp->if_oerrors++;
+			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			EPAIR_REFCOUNT_RELEASE(&sc->refcount);
 			EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
 			    ("%s: ifp=%p sc->refcount not >= 1: %d",
@@ -506,7 +507,7 @@ epair_transmit_locked(struct ifnet *ifp, struct mbuf *m)
 	oifp = sc->oifp;
 	if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 	    (oifp->if_flags & IFF_UP) ==0) {
-		ifp->if_oerrors++;
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		m_freem(m);
 		return (0);
 	}
@@ -515,17 +516,17 @@ epair_transmit_locked(struct ifnet *ifp, struct mbuf *m)
 	DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname);
 
 #ifdef ALTQ
-	/* Support ALTQ via the clasic if_start() path. */
+	/* Support ALTQ via the classic if_start() path. */
 	IF_LOCK(&ifp->if_snd);
 	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
 		ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error);
 		if (error)
-			ifp->if_snd.ifq_drops++;
+			if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
 		IF_UNLOCK(&ifp->if_snd);
 		if (!error) {
-			ifp->if_obytes += len;
+			if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
 			if (mflags & (M_BCAST|M_MCAST))
-				ifp->if_omcasts++;
+				if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
 			
 			if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0)
 				epair_start_locked(ifp);
@@ -559,22 +560,22 @@ epair_transmit_locked(struct ifnet *ifp, struct mbuf *m)
 	error = netisr_queue(NETISR_EPAIR, m);
 	CURVNET_RESTORE();
 	if (!error) {
-		ifp->if_opackets++;
+		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		/*
 		 * IFQ_HANDOFF_ADJ/ip_handoff() update statistics,
 		 * but as we bypass all this we have to duplicate
 		 * the logic another time.
 		 */
-		ifp->if_obytes += len;
+		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
 		if (mflags & (M_BCAST|M_MCAST))
-			ifp->if_omcasts++;
+			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
 		/* Someone else received the packet. */
-		oifp->if_ipackets++;
+		if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1);
 	} else {
 		/* The packet was freed already. */
 		epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE;
 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
-		ifp->if_oerrors++;
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		EPAIR_REFCOUNT_RELEASE(&sc->refcount);
 		EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
 		    ("%s: ifp=%p sc->refcount not >= 1: %d",
@@ -694,10 +695,10 @@ epair_clone_match(struct if_clone *ifc, const char *name)
 	 * - epair<n>
 	 * but not the epair<n>[ab] versions.
 	 */
-	if (strncmp(EPAIRNAME, name, sizeof(EPAIRNAME)-1) != 0)
+	if (strncmp(epairname, name, sizeof(epairname)-1) != 0)
 		return (0);
 
-	for (cp = name + sizeof(EPAIRNAME) - 1; *cp != '\0'; cp++) {
+	for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) {
 		if (*cp < '0' || *cp > '9')
 			return (0);
 	}
@@ -716,7 +717,7 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 
 	/*
 	 * We are abusing params to create our second interface.
-	 * Actually we already created it and called if_clone_createif()
+	 * Actually we already created it and called if_clone_create()
 	 * for it to do the official insertion procedure the moment we knew
 	 * it cannot fail anymore. So just do attach it here.
 	 */
@@ -763,10 +764,17 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 		ifc_free_unit(ifc, unit);
 		return (ENOSPC);
 	}
-	*dp = 'a';
+	*dp = 'b';
 	/* Must not change dp so we can replace 'a' by 'b' later. */
 	*(dp+1) = '\0';
 
+	/* Check if 'a' and 'b' interfaces already exist. */ 
+	if (ifunit(name) != NULL)
+		return (EEXIST);
+	*dp = 'a';
+	if (ifunit(name) != NULL)
+		return (EEXIST);
+
 	/* Allocate memory for both [ab] interfaces */
 	sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
 	EPAIR_REFCOUNT_INIT(&sca->refcount, 1);
@@ -801,15 +809,23 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 	 * cache locality but we can at least allow parallelism.
 	 */
 	sca->cpuid =
-	    netisr_get_cpuid(sca->ifp->if_index % netisr_get_cpucount());
+	    netisr_get_cpuid(sca->ifp->if_index);
 	scb->cpuid =
-	    netisr_get_cpuid(scb->ifp->if_index % netisr_get_cpucount());
+	    netisr_get_cpuid(scb->ifp->if_index);
+
+	/* Initialise pseudo media types. */
+	ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status);
+	ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL);
+	ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T);
+	ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status);
+	ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL);
+	ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T);
 	
 	/* Finish initialization of interface <n>a. */
 	ifp = sca->ifp;
 	ifp->if_softc = sca;
 	strlcpy(ifp->if_xname, name, IFNAMSIZ);
-	ifp->if_dname = ifc->ifc_name;
+	ifp->if_dname = epairname;
 	ifp->if_dunit = unit;
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_capabilities = IFCAP_VLAN_MTU;
@@ -827,7 +843,7 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 	sca->if_qflush = ifp->if_qflush;
 	ifp->if_qflush = epair_qflush;
 	ifp->if_transmit = epair_transmit;
-	ifp->if_baudrate = IF_Gbps(10UL);	/* arbitrary maximum */
+	ifp->if_baudrate = IF_Gbps(10);	/* arbitrary maximum */
 
 	/* Swap the name and finish initialization of interface <n>b. */
 	*dp = 'b';
@@ -835,7 +851,7 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 	ifp = scb->ifp;
 	ifp->if_softc = scb;
 	strlcpy(ifp->if_xname, name, IFNAMSIZ);
-	ifp->if_dname = ifc->ifc_name;
+	ifp->if_dname = epairname;
 	ifp->if_dunit = unit;
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_capabilities = IFCAP_VLAN_MTU;
@@ -845,15 +861,15 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 	ifp->if_init  = epair_init;
 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
 	/* We need to play some tricks here for the second interface. */
-	strlcpy(name, EPAIRNAME, len);
+	strlcpy(name, epairname, len);
 	error = if_clone_create(name, len, (caddr_t)scb);
 	if (error)
-		panic("%s: if_clone_createif() for our 2nd iface failed: %d",
+		panic("%s: if_clone_create() for our 2nd iface failed: %d",
 		    __func__, error);
 	scb->if_qflush = ifp->if_qflush;
 	ifp->if_qflush = epair_qflush;
 	ifp->if_transmit = epair_transmit;
-	ifp->if_baudrate = IF_Gbps(10UL);	/* arbitrary maximum */
+	ifp->if_baudrate = IF_Gbps(10);	/* arbitrary maximum */
 
 	/*
 	 * Restore name to <n>a as the ifp for this will go into the
@@ -862,14 +878,6 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 	strlcpy(name, sca->ifp->if_xname, len);
 	DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb);
 
-	/* Initialise pseudo media types. */
-	ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status);
-	ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL);
-	ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T);
-	ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status);
-	ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL);
-	ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T);
-
 	/* Tell the world, that we are ready to rock. */
 	sca->ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	scb->ifp->if_drv_flags |= IFF_DRV_RUNNING;
@@ -947,6 +955,31 @@ epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
 	return (0);
 }
 
+static void
+vnet_epair_init(const void *unused __unused)
+{
+
+	V_epair_cloner = if_clone_advanced(epairname, 0,
+	    epair_clone_match, epair_clone_create, epair_clone_destroy);
+#ifdef VIMAGE
+	netisr_register_vnet(&epair_nh);
+#endif
+}
+VNET_SYSINIT(vnet_epair_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
+    vnet_epair_init, NULL);
+
+static void
+vnet_epair_uninit(const void *unused __unused)
+{
+
+#ifdef VIMAGE
+	netisr_unregister_vnet(&epair_nh);
+#endif
+	if_clone_detach(V_epair_cloner);
+}
+VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
+    vnet_epair_uninit, NULL);
+
 static int
 epair_modevent(module_t mod, int type, void *data)
 {
@@ -962,16 +995,14 @@ epair_modevent(module_t mod, int type, void *data)
 		    epair_nh.nh_qlimit = qlimit;
 #endif /* __rtems__ */
 		netisr_register(&epair_nh);
-		if_clone_attach(&epair_cloner);
 		if (bootverbose)
-			printf("%s initialized.\n", EPAIRNAME);
+			printf("%s initialized.\n", epairname);
 		break;
 	case MOD_UNLOAD:
-		if_clone_detach(&epair_cloner);
 		netisr_unregister(&epair_nh);
 		epair_dpcpu_detach();
 		if (bootverbose)
-			printf("%s unloaded.\n", EPAIRNAME);
+			printf("%s unloaded.\n", epairname);
 		break;
 	default:
 		return (EOPNOTSUPP);
@@ -985,5 +1016,5 @@ static moduledata_t epair_mod = {
 	0
 };
 
-DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE);
 MODULE_VERSION(if_epair, 1);
diff --git a/freebsd/sys/net/if_ethersubr.c b/freebsd/sys/net/if_ethersubr.c
index 5ee2606e..1d22c0a6 100644
--- a/freebsd/sys/net/if_ethersubr.c
+++ b/freebsd/sys/net/if_ethersubr.c
@@ -32,12 +32,11 @@
  * $FreeBSD$
  */
 
-#include <rtems/bsd/local/opt_atalk.h>
 #include <rtems/bsd/local/opt_inet.h>
 #include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
 #include <rtems/bsd/local/opt_netgraph.h>
 #include <rtems/bsd/local/opt_mbuf_profiling.h>
+#include <rtems/bsd/local/opt_rss.h>
 
 #include <rtems/bsd/sys/param.h>
 #include <sys/systm.h>
@@ -47,12 +46,13 @@
 #include <sys/module.h>
 #include <sys/mbuf.h>
 #include <sys/random.h>
-#include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
+#include <sys/uuid.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/netisr.h>
 #include <net/route.h>
@@ -64,43 +64,22 @@
 #include <net/if_bridgevar.h>
 #include <net/if_vlan_var.h>
 #include <net/if_llatbl.h>
-#include <net/pf_mtag.h>
+#include <net/pfil.h>
+#include <net/rss_config.h>
 #include <net/vnet.h>
 
+#include <netpfil/pf/pf_mtag.h>
+
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip_carp.h>
 #include <netinet/ip_var.h>
-#include <netinet/ip_fw.h>
-#include <netpfil/ipfw/ip_fw_private.h>
 #endif
 #ifdef INET6
 #include <netinet6/nd6.h>
 #endif
-
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
-int (*ef_inputp)(struct ifnet*, struct ether_header *eh, struct mbuf *m);
-int (*ef_outputp)(struct ifnet *ifp, struct mbuf **mp,
-		struct sockaddr *dst, short *tp, int *hlen);
-
-#ifdef NETATALK
-#include <netatalk/at.h>
-#include <netatalk/at_var.h>
-#include <netatalk/at_extern.h>
-
-#define llc_snap_org_code llc_un.type_snap.org_code
-#define llc_snap_ether_type llc_un.type_snap.ether_type
-
-extern u_char	at_org_code[3];
-extern u_char	aarp_org_code[3];
-#endif /* NETATALK */
-
 #include <security/mac/mac_framework.h>
 
 #ifdef CTASSERT
@@ -108,6 +87,8 @@ CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2);
 CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
 #endif
 
+VNET_DEFINE(struct pfil_head, link_pfil_hook);	/* Packet filter hooks */
+
 /* netgraph node hooks for ng_ether(4) */
 void	(*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
 void	(*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m);
@@ -134,22 +115,160 @@ static	int ether_resolvemulti(struct ifnet *, struct sockaddr **,
 #ifdef VIMAGE
 static	void ether_reassign(struct ifnet *, struct vnet *, char *);
 #endif
+static	int ether_requestencap(struct ifnet *, struct if_encap_req *);
 
-/* XXX: should be in an arp support file, not here */
-static MALLOC_DEFINE(M_ARPCOM, "arpcom", "802.* interface internals");
-
-#define	ETHER_IS_BROADCAST(addr) \
-	(bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
 
 #define senderr(e) do { error = (e); goto bad;} while (0)
 
+static void
+update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst)
+{
+	int csum_flags = 0;
+
+	if (src->m_pkthdr.csum_flags & CSUM_IP)
+		csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
+	if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
+		csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
+	if (src->m_pkthdr.csum_flags & CSUM_SCTP)
+		csum_flags |= CSUM_SCTP_VALID;
+	dst->m_pkthdr.csum_flags |= csum_flags;
+	if (csum_flags & CSUM_DATA_VALID)
+		dst->m_pkthdr.csum_data = 0xffff;
+}
+
+/*
+ * Handle link-layer encapsulation requests.
+ */
+static int
+ether_requestencap(struct ifnet *ifp, struct if_encap_req *req)
+{
+	struct ether_header *eh;
+	struct arphdr *ah;
+	uint16_t etype;
+	const u_char *lladdr;
+
+	if (req->rtype != IFENCAP_LL)
+		return (EOPNOTSUPP);
+
+	if (req->bufsize < ETHER_HDR_LEN)
+		return (ENOMEM);
+
+	eh = (struct ether_header *)req->buf;
+	lladdr = req->lladdr;
+	req->lladdr_off = 0;
+
+	switch (req->family) {
+	case AF_INET:
+		etype = htons(ETHERTYPE_IP);
+		break;
+	case AF_INET6:
+		etype = htons(ETHERTYPE_IPV6);
+		break;
+	case AF_ARP:
+		ah = (struct arphdr *)req->hdata;
+		ah->ar_hrd = htons(ARPHRD_ETHER);
+
+		switch(ntohs(ah->ar_op)) {
+		case ARPOP_REVREQUEST:
+		case ARPOP_REVREPLY:
+			etype = htons(ETHERTYPE_REVARP);
+			break;
+		case ARPOP_REQUEST:
+		case ARPOP_REPLY:
+		default:
+			etype = htons(ETHERTYPE_ARP);
+			break;
+		}
+
+		if (req->flags & IFENCAP_FLAG_BROADCAST)
+			lladdr = ifp->if_broadcastaddr;
+		break;
+	default:
+		return (EAFNOSUPPORT);
+	}
+
+	memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type));
+	memcpy(eh->ether_dhost, lladdr, ETHER_ADDR_LEN);
+	memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+	req->bufsize = sizeof(struct ether_header);
+
+	return (0);
+}
+
+
+static int
+ether_resolve_addr(struct ifnet *ifp, struct mbuf *m,
+	const struct sockaddr *dst, struct route *ro, u_char *phdr,
+	uint32_t *pflags, struct llentry **plle)
+{
+	struct ether_header *eh;
+	uint32_t lleflags = 0;
+	int error = 0;
 #if defined(INET) || defined(INET6)
-int
-ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared);
-static VNET_DEFINE(int, ether_ipfw);
-#define	V_ether_ipfw	VNET(ether_ipfw)
+	uint16_t etype;
+#endif
+
+	if (plle)
+		*plle = NULL;
+	eh = (struct ether_header *)phdr;
+
+	switch (dst->sa_family) {
+#ifdef INET
+	case AF_INET:
+		if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
+			error = arpresolve(ifp, 0, m, dst, phdr, &lleflags,
+			    plle);
+		else {
+			if (m->m_flags & M_BCAST)
+				memcpy(eh->ether_dhost, ifp->if_broadcastaddr,
+				    ETHER_ADDR_LEN);
+			else {
+				const struct in_addr *a;
+				a = &(((const struct sockaddr_in *)dst)->sin_addr);
+				ETHER_MAP_IP_MULTICAST(a, eh->ether_dhost);
+			}
+			etype = htons(ETHERTYPE_IP);
+			memcpy(&eh->ether_type, &etype, sizeof(etype));
+			memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+		}
+		break;
 #endif
+#ifdef INET6
+	case AF_INET6:
+		if ((m->m_flags & M_MCAST) == 0)
+			error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags,
+			    plle);
+		else {
+			const struct in6_addr *a6;
+			a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr);
+			ETHER_MAP_IPV6_MULTICAST(a6, eh->ether_dhost);
+			etype = htons(ETHERTYPE_IPV6);
+			memcpy(&eh->ether_type, &etype, sizeof(etype));
+			memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+		}
+		break;
+#endif
+	default:
+		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
+		if (m != NULL)
+			m_freem(m);
+		return (EAFNOSUPPORT);
+	}
+
+	if (error == EHOSTDOWN) {
+		if (ro != NULL && (ro->ro_flags & RT_HAS_GW) != 0)
+			error = EHOSTUNREACH;
+	}
 
+	if (error != 0)
+		return (error);
+
+	*pflags = RT_MAY_LOOP;
+	if (lleflags & LLE_IFADDR)
+		*pflags |= RT_L2_ME;
+
+	return (0);
+}
 
 /*
  * Ethernet output routine.
@@ -159,23 +278,49 @@ static VNET_DEFINE(int, ether_ipfw);
  */
 int
 ether_output(struct ifnet *ifp, struct mbuf *m,
-	struct sockaddr *dst, struct route *ro)
+	const struct sockaddr *dst, struct route *ro)
 {
-	short type;
-	int error = 0, hdrcmplt = 0;
-	u_char esrc[ETHER_ADDR_LEN], edst[ETHER_ADDR_LEN];
-	struct llentry *lle = NULL;
-	struct rtentry *rt0 = NULL;
+	int error = 0;
+	char linkhdr[ETHER_HDR_LEN], *phdr;
 	struct ether_header *eh;
 	struct pf_mtag *t;
 	int loop_copy = 1;
 	int hlen;	/* link layer header length */
+	uint32_t pflags;
+	struct llentry *lle = NULL;
+	struct rtentry *rt0 = NULL;
+	int addref = 0;
 
+	phdr = NULL;
+	pflags = 0;
 	if (ro != NULL) {
-		if (!(m->m_flags & (M_BCAST | M_MCAST)))
-			lle = ro->ro_lle;
+		/* XXX BPF uses ro_prepend */
+		if (ro->ro_prepend != NULL) {
+			phdr = ro->ro_prepend;
+			hlen = ro->ro_plen;
+		} else if (!(m->m_flags & (M_BCAST | M_MCAST))) {
+			if ((ro->ro_flags & RT_LLE_CACHE) != 0) {
+				lle = ro->ro_lle;
+				if (lle != NULL &&
+				    (lle->la_flags & LLE_VALID) == 0) {
+					LLE_FREE(lle);
+					lle = NULL;	/* redundant */
+					ro->ro_lle = NULL;
+				}
+				if (lle == NULL) {
+					/* if we lookup, keep cache */
+					addref = 1;
+				}
+			}
+			if (lle != NULL) {
+				phdr = lle->r_linkdata;
+				hlen = lle->r_hdrlen;
+				pflags = lle->r_flags;
+			}
+		}
 		rt0 = ro->ro_rt;
 	}
+
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error)
@@ -189,153 +334,39 @@ ether_output(struct ifnet *ifp, struct mbuf *m,
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		senderr(ENETDOWN);
 
-	hlen = ETHER_HDR_LEN;
-	switch (dst->sa_family) {
-#ifdef INET
-	case AF_INET:
-		if (lle != NULL && (lle->la_flags & LLE_VALID))
-			memcpy(edst, &lle->ll_addr.mac16, sizeof(edst));
-		else
-			error = arpresolve(ifp, rt0, m, dst, edst, &lle);
-		if (error)
+	if (phdr == NULL) {
+		/* No prepend data supplied. Try to calculate ourselves. */
+		phdr = linkhdr;
+		hlen = ETHER_HDR_LEN;
+		error = ether_resolve_addr(ifp, m, dst, ro, phdr, &pflags,
+		    addref ? &lle : NULL);
+		if (addref && lle != NULL)
+			ro->ro_lle = lle;
+		if (error != 0)
 			return (error == EWOULDBLOCK ? 0 : error);
-		type = htons(ETHERTYPE_IP);
-		break;
-	case AF_ARP:
-	{
-		struct arphdr *ah;
-		ah = mtod(m, struct arphdr *);
-		ah->ar_hrd = htons(ARPHRD_ETHER);
-
-		loop_copy = 0; /* if this is for us, don't do it */
-
-		switch(ntohs(ah->ar_op)) {
-		case ARPOP_REVREQUEST:
-		case ARPOP_REVREPLY:
-			type = htons(ETHERTYPE_REVARP);
-			break;
-		case ARPOP_REQUEST:
-		case ARPOP_REPLY:
-		default:
-			type = htons(ETHERTYPE_ARP);
-			break;
-		}
-
-		if (m->m_flags & M_BCAST)
-			bcopy(ifp->if_broadcastaddr, edst, ETHER_ADDR_LEN);
-		else
-			bcopy(ar_tha(ah), edst, ETHER_ADDR_LEN);
-
-	}
-	break;
-#endif
-#ifdef INET6
-	case AF_INET6:
-		if (lle != NULL && (lle->la_flags & LLE_VALID))
-			memcpy(edst, &lle->ll_addr.mac16, sizeof(edst));
-		else
-			error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, &lle);
-		if (error)
-			return error;
-		type = htons(ETHERTYPE_IPV6);
-		break;
-#endif
-#ifdef IPX
-	case AF_IPX:
-		if (ef_outputp) {
-		    error = ef_outputp(ifp, &m, dst, &type, &hlen);
-		    if (error)
-			goto bad;
-		} else
-		    type = htons(ETHERTYPE_IPX);
-		bcopy((caddr_t)&(((struct sockaddr_ipx *)dst)->sipx_addr.x_host),
-		    (caddr_t)edst, sizeof (edst));
-		break;
-#endif
-#ifdef NETATALK
-	case AF_APPLETALK:
-	  {
-	    struct at_ifaddr *aa;
-
-	    if ((aa = at_ifawithnet((struct sockaddr_at *)dst)) == NULL)
-		    senderr(EHOSTUNREACH); /* XXX */
-	    if (!aarpresolve(ifp, m, (struct sockaddr_at *)dst, edst)) {
-		    ifa_free(&aa->aa_ifa);
-		    return (0);
-	    }
-	    /*
-	     * In the phase 2 case, need to prepend an mbuf for the llc header.
-	     */
-	    if ( aa->aa_flags & AFA_PHASE2 ) {
-		struct llc llc;
-
-		ifa_free(&aa->aa_ifa);
-		M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT);
-		if (m == NULL)
-			senderr(ENOBUFS);
-		llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP;
-		llc.llc_control = LLC_UI;
-		bcopy(at_org_code, llc.llc_snap_org_code, sizeof(at_org_code));
-		llc.llc_snap_ether_type = htons( ETHERTYPE_AT );
-		bcopy(&llc, mtod(m, caddr_t), LLC_SNAPFRAMELEN);
-		type = htons(m->m_pkthdr.len);
-		hlen = LLC_SNAPFRAMELEN + ETHER_HDR_LEN;
-	    } else {
-		ifa_free(&aa->aa_ifa);
-		type = htons(ETHERTYPE_AT);
-	    }
-	    break;
-	  }
-#endif /* NETATALK */
-
-	case pseudo_AF_HDRCMPLT:
-		hdrcmplt = 1;
-		eh = (struct ether_header *)dst->sa_data;
-		(void)memcpy(esrc, eh->ether_shost, sizeof (esrc));
-		/* FALLTHROUGH */
-
-	case AF_UNSPEC:
-		loop_copy = 0; /* if this is for us, don't do it */
-		eh = (struct ether_header *)dst->sa_data;
-		(void)memcpy(edst, eh->ether_dhost, sizeof (edst));
-		type = eh->ether_type;
-		break;
-
-	default:
-		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
-		senderr(EAFNOSUPPORT);
 	}
 
-	if (lle != NULL && (lle->la_flags & LLE_IFADDR)) {
-		int csum_flags = 0;
-		if (m->m_pkthdr.csum_flags & CSUM_IP)
-			csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
-		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
-			csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
-		if (m->m_pkthdr.csum_flags & CSUM_SCTP)
-			csum_flags |= CSUM_SCTP_VALID;
-		m->m_pkthdr.csum_flags |= csum_flags;
-		m->m_pkthdr.csum_data = 0xffff;
+	if ((pflags & RT_L2_ME) != 0) {
+		update_mbuf_csumflags(m, m);
 		return (if_simloop(ifp, m, dst->sa_family, 0));
 	}
+	loop_copy = pflags & RT_MAY_LOOP;
 
 	/*
 	 * Add local net header.  If no space in first mbuf,
 	 * allocate another.
+	 *
+	 * Note that we do prepend regardless of RT_HAS_HEADER flag.
+	 * This is done because BPF code shifts m_data pointer
+	 * to the end of ethernet header prior to calling if_output().
 	 */
-	M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
+	M_PREPEND(m, hlen, M_NOWAIT);
 	if (m == NULL)
 		senderr(ENOBUFS);
-	eh = mtod(m, struct ether_header *);
-	(void)memcpy(&eh->ether_type, &type,
-		sizeof(eh->ether_type));
-	(void)memcpy(eh->ether_dhost, edst, sizeof (edst));
-	if (hdrcmplt)
-		(void)memcpy(eh->ether_shost, esrc,
-			sizeof(eh->ether_shost));
-	else
-		(void)memcpy(eh->ether_shost, IF_LLADDR(ifp),
-			sizeof(eh->ether_shost));
+	if ((pflags & RT_HAS_HEADER) == 0) {
+		eh = mtod(m, struct ether_header *);
+		memcpy(eh, phdr, hlen);
+	}
 
 	/*
 	 * If a simplex interface, and the packet is being sent to our
@@ -346,47 +377,27 @@ ether_output(struct ifnet *ifp, struct mbuf *m,
 	 * on the wire). However, we don't do that here for security
 	 * reasons and compatibility with the original behavior.
 	 */
-	if ((ifp->if_flags & IFF_SIMPLEX) && loop_copy &&
+	if ((m->m_flags & M_BCAST) && loop_copy && (ifp->if_flags & IFF_SIMPLEX) &&
 	    ((t = pf_find_mtag(m)) == NULL || !t->routed)) {
-		int csum_flags = 0;
+		struct mbuf *n;
 
-		if (m->m_pkthdr.csum_flags & CSUM_IP)
-			csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
-		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
-			csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
-		if (m->m_pkthdr.csum_flags & CSUM_SCTP)
-			csum_flags |= CSUM_SCTP_VALID;
-
-		if (m->m_flags & M_BCAST) {
-			struct mbuf *n;
-
-			/*
-			 * Because if_simloop() modifies the packet, we need a
-			 * writable copy through m_dup() instead of a readonly
-			 * one as m_copy[m] would give us. The alternative would
-			 * be to modify if_simloop() to handle the readonly mbuf,
-			 * but performancewise it is mostly equivalent (trading
-			 * extra data copying vs. extra locking).
-			 *
-			 * XXX This is a local workaround.  A number of less
-			 * often used kernel parts suffer from the same bug.
-			 * See PR kern/105943 for a proposed general solution.
-			 */
-			if ((n = m_dup(m, M_DONTWAIT)) != NULL) {
-				n->m_pkthdr.csum_flags |= csum_flags;
-				if (csum_flags & CSUM_DATA_VALID)
-					n->m_pkthdr.csum_data = 0xffff;
-				(void)if_simloop(ifp, n, dst->sa_family, hlen);
-			} else
-				ifp->if_iqdrops++;
-		} else if (bcmp(eh->ether_dhost, eh->ether_shost,
-				ETHER_ADDR_LEN) == 0) {
-			m->m_pkthdr.csum_flags |= csum_flags;
-			if (csum_flags & CSUM_DATA_VALID)
-				m->m_pkthdr.csum_data = 0xffff;
-			(void) if_simloop(ifp, m, dst->sa_family, hlen);
-			return (0);	/* XXX */
-		}
+		/*
+		 * Because if_simloop() modifies the packet, we need a
+		 * writable copy through m_dup() instead of a readonly
+		 * one as m_copy[m] would give us. The alternative would
+		 * be to modify if_simloop() to handle the readonly mbuf,
+		 * but performancewise it is mostly equivalent (trading
+		 * extra data copying vs. extra locking).
+		 *
+		 * XXX This is a local workaround.  A number of less
+		 * often used kernel parts suffer from the same bug.
+		 * See PR kern/105943 for a proposed general solution.
+		 */
+		if ((n = m_dup(m, M_NOWAIT)) != NULL) {
+			update_mbuf_csumflags(m, n);
+			(void)if_simloop(ifp, n, dst->sa_family, hlen);
+		} else
+			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 	}
 
        /*
@@ -399,12 +410,12 @@ ether_output(struct ifnet *ifp, struct mbuf *m,
 
 #if defined(INET) || defined(INET6)
 	if (ifp->if_carp &&
-	    (error = (*carp_output_p)(ifp, m, dst, NULL)))
+	    (error = (*carp_output_p)(ifp, m, dst)))
 		goto bad;
 #endif
 
 	/* Handle ng_ether(4) processing, if any */
-	if (IFP2AC(ifp)->ac_netgraph != NULL) {
+	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_output_p != NULL,
 		    ("ng_ether_output_p is NULL"));
 		if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) {
@@ -429,18 +440,17 @@ bad:			if (m != NULL)
 int
 ether_output_frame(struct ifnet *ifp, struct mbuf *m)
 {
-#if defined(INET) || defined(INET6)
+	int i;
 
-	if (V_ip_fw_chk_ptr && V_ether_ipfw != 0) {
-		if (ether_ipfw_chk(&m, ifp, 0) == 0) {
-			if (m) {
-				m_freem(m);
-				return EACCES;	/* pkt dropped */
-			} else
-				return 0;	/* consumed e.g. in a pipe */
-		}
+	if (PFIL_HOOKED(&V_link_pfil_hook)) {
+		i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_OUT, NULL);
+
+		if (i != 0)
+			return (EACCES);
+
+		if (m == NULL)
+			return (0);
 	}
-#endif
 
 	/*
 	 * Queue message on interface, update output statistics if
@@ -449,116 +459,6 @@ ether_output_frame(struct ifnet *ifp, struct mbuf *m)
 	return ((ifp->if_transmit)(ifp, m));
 }
 
-#if defined(INET) || defined(INET6)
-/*
- * ipfw processing for ethernet packets (in and out).
- * The second parameter is NULL from ether_demux, and ifp from
- * ether_output_frame.
- */
-int
-ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared)
-{
-	struct ether_header *eh;
-	struct ether_header save_eh;
-	struct mbuf *m;
-	int i;
-	struct ip_fw_args args;
-	struct m_tag *mtag;
-
-	/* fetch start point from rule, if any */
-	mtag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL);
-	if (mtag == NULL) {
-		args.rule.slot = 0;
-	} else {
-		/* dummynet packet, already partially processed */
-		struct ipfw_rule_ref *r;
-
-		/* XXX can we free it after use ? */
-		mtag->m_tag_id = PACKET_TAG_NONE;
-		r = (struct ipfw_rule_ref *)(mtag + 1);
-		if (r->info & IPFW_ONEPASS)
-			return (1);
-		args.rule = *r;
-	}
-
-	/*
-	 * I need some amt of data to be contiguous, and in case others need
-	 * the packet (shared==1) also better be in the first mbuf.
-	 */
-	m = *m0;
-	i = min( m->m_pkthdr.len, max_protohdr);
-	if ( shared || m->m_len < i) {
-		m = m_pullup(m, i);
-		if (m == NULL) {
-			*m0 = m;
-			return 0;
-		}
-	}
-	eh = mtod(m, struct ether_header *);
-	save_eh = *eh;			/* save copy for restore below */
-	m_adj(m, ETHER_HDR_LEN);	/* strip ethernet header */
-
-	args.m = m;		/* the packet we are looking at		*/
-	args.oif = dst;		/* destination, if any			*/
-	args.next_hop = NULL;	/* we do not support forward yet	*/
-	args.next_hop6 = NULL;	/* we do not support forward yet	*/
-	args.eh = &save_eh;	/* MAC header for bridged/MAC packets	*/
-	args.inp = NULL;	/* used by ipfw uid/gid/jail rules	*/
-	i = V_ip_fw_chk_ptr(&args);
-	m = args.m;
-	if (m != NULL) {
-		/*
-		 * Restore Ethernet header, as needed, in case the
-		 * mbuf chain was replaced by ipfw.
-		 */
-		M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
-		if (m == NULL) {
-			*m0 = m;
-			return 0;
-		}
-		if (eh != mtod(m, struct ether_header *))
-			bcopy(&save_eh, mtod(m, struct ether_header *),
-				ETHER_HDR_LEN);
-	}
-	*m0 = m;
-
-	if (i == IP_FW_DENY) /* drop */
-		return 0;
-
-	KASSERT(m != NULL, ("ether_ipfw_chk: m is NULL"));
-
-	if (i == IP_FW_PASS) /* a PASS rule.  */
-		return 1;
-
-	if (ip_dn_io_ptr && (i == IP_FW_DUMMYNET)) {
-		int dir;
-		/*
-		 * Pass the pkt to dummynet, which consumes it.
-		 * If shared, make a copy and keep the original.
-		 */
-		if (shared) {
-			m = m_copypacket(m, M_DONTWAIT);
-			if (m == NULL)
-				return 0;
-		} else {
-			/*
-			 * Pass the original to dummynet and
-			 * nothing back to the caller
-			 */
-			*m0 = NULL ;
-		}
-		dir = PROTO_LAYER2 | (dst ? DIR_OUT : DIR_IN);
-		ip_dn_io_ptr(&m, dir, &args);
-		return 0;
-	}
-	/*
-	 * XXX at some point add support for divert/forward actions.
-	 * If none of the above matches, we have to drop the pkt.
-	 */
-	return 0;
-}
-#endif
-
 /*
  * Process a received Ethernet packet; the packet is in the
  * mbuf chain m with the ethernet header at the front.
@@ -580,39 +480,18 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
 		return;
 	}
 #endif
-	/*
-	 * Do consistency checks to verify assumptions
-	 * made by code past this point.
-	 */
-	if ((m->m_flags & M_PKTHDR) == 0) {
-		if_printf(ifp, "discard frame w/o packet header\n");
-		ifp->if_ierrors++;
-		m_freem(m);
-		return;
-	}
 	if (m->m_len < ETHER_HDR_LEN) {
 		/* XXX maybe should pullup? */
 		if_printf(ifp, "discard frame w/o leading ethernet "
 				"header (len %u pkt len %u)\n",
 				m->m_len, m->m_pkthdr.len);
-		ifp->if_ierrors++;
+		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		return;
 	}
 	eh = mtod(m, struct ether_header *);
 	etype = ntohs(eh->ether_type);
-	if (m->m_pkthdr.rcvif == NULL) {
-		if_printf(ifp, "discard frame w/o interface pointer\n");
-		ifp->if_ierrors++;
-		m_freem(m);
-		return;
-	}
-#ifdef DIAGNOSTIC
-	if (m->m_pkthdr.rcvif != ifp) {
-		if_printf(ifp, "Warning, frame marked as received on %s\n",
-			m->m_pkthdr.rcvif->if_xname);
-	}
-#endif
+	random_harvest_queue(m, sizeof(*m), 2, RANDOM_NET_ETHER);
 
 	CURVNET_SET_QUIET(ifp->if_vnet);
 
@@ -621,7 +500,7 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
 			m->m_flags |= M_BCAST;
 		else
 			m->m_flags |= M_MCAST;
-		ifp->if_imcasts++;
+		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 	}
 
 #ifdef MAC
@@ -647,7 +526,8 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
 		m->m_flags &= ~M_HASFCS;
 	}
 
-	ifp->if_ibytes += m->m_pkthdr.len;
+	if (!(ifp->if_capenable & IFCAP_HWSTATS))
+		if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 
 	/* Allow monitor mode to claim this frame, after stats are updated. */
 	if (ifp->if_flags & IFF_MONITOR) {
@@ -683,8 +563,7 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
 #ifdef DIAGNOSTIC
 			if_printf(ifp, "cannot pullup VLAN header\n");
 #endif
-			ifp->if_ierrors++;
-			m_freem(m);
+			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			CURVNET_RESTORE();
 			return;
 		}
@@ -702,7 +581,7 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
 	M_SETFIB(m, ifp->if_fib);
 
 	/* Allow ng_ether(4) to claim this frame. */
-	if (IFP2AC(ifp)->ac_netgraph != NULL) {
+	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_input_p != NULL,
 		    ("%s: ng_ether_input_p is NULL", __func__));
 		m->m_flags &= ~M_PROMISC;
@@ -757,22 +636,36 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
 			m->m_flags |= M_PROMISC;
 	}
 
-	/* First chunk of an mbuf contains good entropy */
-	if (harvest.ethernet)
-		random_harvest(m, 16, 3, 0, RANDOM_NET);
-
 	ether_demux(ifp, m);
 	CURVNET_RESTORE();
 }
 
 /*
  * Ethernet input dispatch; by default, direct dispatch here regardless of
- * global configuration.
+ * global configuration.  However, if RSS is enabled, hook up RSS affinity
+ * so that when deferred or hybrid dispatch is enabled, we can redistribute
+ * load based on RSS.
+ *
+ * XXXRW: Would be nice if the ifnet passed up a flag indicating whether or
+ * not it had already done work distribution via multi-queue.  Then we could
+ * direct dispatch in the event load balancing was already complete and
+ * handle the case of interfaces with different capabilities better.
+ *
+ * XXXRW: Sort of want an M_DISTRIBUTED flag to avoid multiple distributions
+ * at multiple layers?
+ *
+ * XXXRW: For now, enable all this only if RSS is compiled in, although it
+ * works fine without RSS.  Need to characterise the performance overhead
+ * of the detour through the netisr code in the event the result is always
+ * direct dispatch.
  */
 static void
 ether_nh_input(struct mbuf *m)
 {
 
+	M_ASSERTPKTHDR(m);
+	KASSERT(m->m_pkthdr.rcvif != NULL,
+	    ("%s: NULL interface pointer", __func__));
 	ether_input_internal(m->m_pkthdr.rcvif, m);
 }
 
@@ -780,8 +673,14 @@ static struct netisr_handler	ether_nh = {
 	.nh_name = "ether",
 	.nh_handler = ether_nh_input,
 	.nh_proto = NETISR_ETHER,
+#ifdef RSS
+	.nh_policy = NETISR_POLICY_CPU,
+	.nh_dispatch = NETISR_DISPATCH_DIRECT,
+	.nh_m2cpuid = rss_m2cpuid,
+#else
 	.nh_policy = NETISR_POLICY_SOURCE,
 	.nh_dispatch = NETISR_DISPATCH_DIRECT,
+#endif
 };
 
 static void
@@ -793,16 +692,74 @@ ether_init(__unused void *arg)
 SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL);
 
 static void
+vnet_ether_init(__unused void *arg)
+{
+	int i;
+
+	/* Initialize packet filter hooks. */
+	V_link_pfil_hook.ph_type = PFIL_TYPE_AF;
+	V_link_pfil_hook.ph_af = AF_LINK;
+	if ((i = pfil_head_register(&V_link_pfil_hook)) != 0)
+		printf("%s: WARNING: unable to register pfil link hook, "
+			"error %d\n", __func__, i);
+#ifdef VIMAGE
+	netisr_register_vnet(&ether_nh);
+#endif
+}
+VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
+    vnet_ether_init, NULL);
+ 
+#ifdef VIMAGE
+static void
+vnet_ether_pfil_destroy(__unused void *arg)
+{
+	int i;
+
+	if ((i = pfil_head_unregister(&V_link_pfil_hook)) != 0)
+		printf("%s: WARNING: unable to unregister pfil link hook, "
+			"error %d\n", __func__, i);
+}
+VNET_SYSUNINIT(vnet_ether_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_ANY,
+    vnet_ether_pfil_destroy, NULL);
+
+static void
+vnet_ether_destroy(__unused void *arg)
+{
+
+	netisr_unregister_vnet(&ether_nh);
+}
+VNET_SYSUNINIT(vnet_ether_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
+    vnet_ether_destroy, NULL);
+#endif
+
+
+
+static void
 ether_input(struct ifnet *ifp, struct mbuf *m)
 {
 
+	struct mbuf *mn;
+
 	/*
-	 * We will rely on rcvif being set properly in the deferred context,
-	 * so assert it is correct here.
+	 * The drivers are allowed to pass in a chain of packets linked with
+	 * m_nextpkt. We split them up into separate packets here and pass
+	 * them up. This allows the drivers to amortize the receive lock.
 	 */
-	KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch", __func__));
+	while (m) {
+		mn = m->m_nextpkt;
+		m->m_nextpkt = NULL;
 
-	netisr_dispatch(NETISR_ETHER, m);
+		/*
+		 * We will rely on rcvif being set properly in the deferred context,
+		 * so assert it is correct here.
+		 */
+		KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch m %p "
+		    "rcvif %p ifp %p", __func__, m, m->m_pkthdr.rcvif, ifp));
+		CURVNET_SET_QUIET(ifp->if_vnet);
+		netisr_dispatch(NETISR_ETHER, m);
+		CURVNET_RESTORE();
+		m = mn;
+	}
 }
 
 /*
@@ -812,27 +769,19 @@ void
 ether_demux(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ether_header *eh;
-	int isr;
+	int i, isr;
 	u_short ether_type;
-#if defined(NETATALK)
-	struct llc *l;
-#endif
 
 	KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__));
 
-#if defined(INET) || defined(INET6)
-	/*
-	 * Allow dummynet and/or ipfw to claim the frame.
-	 * Do not do this for PROMISC frames in case we are re-entered.
-	 */
-	if (V_ip_fw_chk_ptr && V_ether_ipfw != 0 && !(m->m_flags & M_PROMISC)) {
-		if (ether_ipfw_chk(&m, NULL, 0) == 0) {
-			if (m)
-				m_freem(m);	/* dropped; free mbuf chain */
-			return;			/* consumed */
-		}
+	/* Do not grab PROMISC frames in case we are re-entered. */
+	if (PFIL_HOOKED(&V_link_pfil_hook) && !(m->m_flags & M_PROMISC)) {
+		i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_IN, NULL);
+
+		if (i != 0 || m == NULL)
+			return;
 	}
-#endif
+
 	eh = mtod(m, struct ether_header *);
 	ether_type = ntohs(eh->ether_type);
 
@@ -843,7 +792,7 @@ ether_demux(struct ifnet *ifp, struct mbuf *m)
 	if ((m->m_flags & M_VLANTAG) &&
 	    EVL_VLANOFTAG(m->m_pkthdr.ether_vtag) != 0) {
 		if (ifp->if_vlantrunk == NULL) {
-			ifp->if_noproto++;
+			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			m_freem(m);
 			return;
 		}
@@ -869,7 +818,7 @@ ether_demux(struct ifnet *ifp, struct mbuf *m)
 	 * Strip off Ethernet header.
 	 */
 	m->m_flags &= ~M_VLANTAG;
-	m->m_flags &= ~(M_PROTOFLAGS);
+	m_clrprotoflags(m);
 	m_adj(m, ETHER_HDR_LEN);
 
 	/*
@@ -878,8 +827,6 @@ ether_demux(struct ifnet *ifp, struct mbuf *m)
 	switch (ether_type) {
 #ifdef INET
 	case ETHERTYPE_IP:
-		if ((m = ip_fastforward(m)) == NULL)
-			return;
 		isr = NETISR_IP;
 		break;
 
@@ -892,54 +839,12 @@ ether_demux(struct ifnet *ifp, struct mbuf *m)
 		isr = NETISR_ARP;
 		break;
 #endif
-#ifdef IPX
-	case ETHERTYPE_IPX:
-		if (ef_inputp && ef_inputp(ifp, eh, m) == 0)
-			return;
-		isr = NETISR_IPX;
-		break;
-#endif
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 		isr = NETISR_IPV6;
 		break;
 #endif
-#ifdef NETATALK
-	case ETHERTYPE_AT:
-		isr = NETISR_ATALK1;
-		break;
-	case ETHERTYPE_AARP:
-		isr = NETISR_AARP;
-		break;
-#endif /* NETATALK */
 	default:
-#ifdef IPX
-		if (ef_inputp && ef_inputp(ifp, eh, m) == 0)
-			return;
-#endif /* IPX */
-#if defined(NETATALK)
-		if (ether_type > ETHERMTU)
-			goto discard;
-		l = mtod(m, struct llc *);
-		if (l->llc_dsap == LLC_SNAP_LSAP &&
-		    l->llc_ssap == LLC_SNAP_LSAP &&
-		    l->llc_control == LLC_UI) {
-			if (bcmp(&(l->llc_snap_org_code)[0], at_org_code,
-			    sizeof(at_org_code)) == 0 &&
-			    ntohs(l->llc_snap_ether_type) == ETHERTYPE_AT) {
-				m_adj(m, LLC_SNAPFRAMELEN);
-				isr = NETISR_ATALK2;
-				break;
-			}
-			if (bcmp(&(l->llc_snap_org_code)[0], aarp_org_code,
-			    sizeof(aarp_org_code)) == 0 &&
-			    ntohs(l->llc_snap_ether_type) == ETHERTYPE_AARP) {
-				m_adj(m, LLC_SNAPFRAMELEN);
-				isr = NETISR_AARP;
-				break;
-			}
-		}
-#endif /* NETATALK */
 		goto discard;
 	}
 	netisr_dispatch(isr, m);
@@ -951,14 +856,14 @@ discard:
 	 * hand the packet to it for last chance processing;
 	 * otherwise dispose of it.
 	 */
-	if (IFP2AC(ifp)->ac_netgraph != NULL) {
+	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_input_orphan_p != NULL,
 		    ("ng_ether_input_orphan_p is NULL"));
 		/*
 		 * Put back the ethernet header so netgraph has a
 		 * consistent view of inbound packets.
 		 */
-		M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
+		M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
 		(*ng_ether_input_orphan_p)(ifp, m);
 		return;
 	}
@@ -998,6 +903,7 @@ ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
 	ifp->if_output = ether_output;
 	ifp->if_input = ether_input;
 	ifp->if_resolvemulti = ether_resolvemulti;
+	ifp->if_requestencap = ether_requestencap;
 #ifdef VIMAGE
 	ifp->if_reassign = ether_reassign;
 #endif
@@ -1022,6 +928,8 @@ ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
 			break; 
 	if (i != ifp->if_addrlen)
 		if_printf(ifp, "Ethernet address: %6D\n", lla, ":");
+
+	uuid_ether_add(LLADDR(sdl));
 }
 
 /*
@@ -1030,7 +938,12 @@ ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
 void
 ether_ifdetach(struct ifnet *ifp)
 {
-	if (IFP2AC(ifp)->ac_netgraph != NULL) {
+	struct sockaddr_dl *sdl;
+
+	sdl = (struct sockaddr_dl *)(ifp->if_addr->ifa_addr);
+	uuid_ether_del(LLADDR(sdl));
+
+	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_detach_p != NULL,
 		    ("ng_ether_detach_p is NULL"));
 		(*ng_ether_detach_p)(ifp);
@@ -1045,7 +958,7 @@ void
 ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused)
 {
 
-	if (IFP2AC(ifp)->ac_netgraph != NULL) {
+	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_detach_p != NULL,
 		    ("ng_ether_detach_p is NULL"));
 		(*ng_ether_detach_p)(ifp);
@@ -1061,10 +974,6 @@ ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused)
 
 SYSCTL_DECL(_net_link);
 SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet");
-#if defined(INET) || defined(INET6)
-SYSCTL_VNET_INT(_net_link_ether, OID_AUTO, ipfw, CTLFLAG_RW,
-	     &VNET_NAME(ether_ipfw), 0, "Pass ether pkts through firewall");
-#endif
 
 #if 0
 /*
@@ -1158,31 +1067,6 @@ ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 			arp_ifinit(ifp, ifa);
 			break;
 #endif
-#ifdef IPX
-		/*
-		 * XXX - This code is probably wrong
-		 */
-		case AF_IPX:
-			{
-			struct ipx_addr *ina = &(IA_SIPX(ifa)->sipx_addr);
-
-			if (ipx_nullhost(*ina))
-				ina->x_host =
-				    *(union ipx_host *)
-				    IF_LLADDR(ifp);
-			else {
-				bcopy((caddr_t) ina->x_host.c_host,
-				      (caddr_t) IF_LLADDR(ifp),
-				      ETHER_ADDR_LEN);
-			}
-
-			/*
-			 * Set new address
-			 */
-			ifp->if_init(ifp->if_softc);
-			break;
-			}
-#endif
 		default:
 			ifp->if_init(ifp->if_softc);
 			break;
@@ -1238,7 +1122,7 @@ ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
 		e_addr = LLADDR(sdl);
 		if (!ETHER_IS_MULTICAST(e_addr))
 			return EADDRNOTAVAIL;
-		*llsa = 0;
+		*llsa = NULL;
 		return 0;
 
 #ifdef INET
@@ -1246,14 +1130,7 @@ ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 			return EADDRNOTAVAIL;
-		sdl = malloc(sizeof *sdl, M_IFMADDR,
-		       M_NOWAIT|M_ZERO);
-		if (sdl == NULL)
-			return ENOMEM;
-		sdl->sdl_len = sizeof *sdl;
-		sdl->sdl_family = AF_LINK;
-		sdl->sdl_index = ifp->if_index;
-		sdl->sdl_type = IFT_ETHER;
+		sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 		sdl->sdl_alen = ETHER_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
@@ -1270,19 +1147,12 @@ ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
 			 * (This is used for multicast routers.)
 			 */
 			ifp->if_flags |= IFF_ALLMULTI;
-			*llsa = 0;
+			*llsa = NULL;
 			return 0;
 		}
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 			return EADDRNOTAVAIL;
-		sdl = malloc(sizeof *sdl, M_IFMADDR,
-		       M_NOWAIT|M_ZERO);
-		if (sdl == NULL)
-			return (ENOMEM);
-		sdl->sdl_len = sizeof *sdl;
-		sdl->sdl_family = AF_LINK;
-		sdl->sdl_index = ifp->if_index;
-		sdl->sdl_type = IFT_ETHER;
+		sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 		sdl->sdl_alen = ETHER_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
@@ -1299,46 +1169,8 @@ ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
 	}
 }
 
-static void*
-ether_alloc(u_char type, struct ifnet *ifp)
-{
-	struct arpcom	*ac;
-	
-	ac = malloc(sizeof(struct arpcom), M_ARPCOM, M_WAITOK | M_ZERO);
-	ac->ac_ifp = ifp;
-
-	return (ac);
-}
-
-static void
-ether_free(void *com, u_char type)
-{
-
-	free(com, M_ARPCOM);
-}
-
-static int
-ether_modevent(module_t mod, int type, void *data)
-{
-
-	switch (type) {
-	case MOD_LOAD:
-		if_register_com_alloc(IFT_ETHER, ether_alloc, ether_free);
-		break;
-	case MOD_UNLOAD:
-		if_deregister_com_alloc(IFT_ETHER);
-		break;
-	default:
-		return EOPNOTSUPP;
-	}
-
-	return (0);
-}
-
 static moduledata_t ether_mod = {
-	"ether",
-	ether_modevent,
-	0
+	.name = "ether",
 };
 
 void
@@ -1386,7 +1218,7 @@ ether_vlanencap(struct mbuf *m, uint16_t tag)
 {
 	struct ether_vlan_header *evl;
 
-	M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
+	M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
 	if (m == NULL)
 		return (NULL);
 	/* M_PREPEND takes care of m_len, m_pkthdr.len for us */
diff --git a/freebsd/sys/net/if_faith.c b/freebsd/sys/net/if_faith.c
deleted file mode 100644
index cf4a7fba..00000000
--- a/freebsd/sys/net/if_faith.c
+++ /dev/null
@@ -1,353 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*	$KAME: if_faith.c,v 1.23 2001/12/17 13:55:29 sumikawa Exp $	*/
-
-/*-
- * Copyright (c) 1982, 1986, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-/*
- * derived from
- *	@(#)if_loop.c	8.1 (Berkeley) 6/10/93
- * Id: if_loop.c,v 1.22 1996/06/19 16:24:10 wollman Exp
- */
-
-/*
- * Loopback interface driver for protocol testing and timing.
- */
-#include <rtems/bsd/local/opt_inet.h>
-#include <rtems/bsd/local/opt_inet6.h>
-
-#include <rtems/bsd/sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <sys/socket.h>
-#include <rtems/bsd/sys/errno.h>
-#include <sys/sockio.h>
-#include <sys/time.h>
-#include <sys/queue.h>
-#include <sys/types.h>
-#include <sys/malloc.h>
-
-#include <net/if.h>
-#include <net/if_clone.h>
-#include <net/if_types.h>
-#include <net/netisr.h>
-#include <net/route.h>
-#include <net/bpf.h>
-#include <net/vnet.h>
-
-#ifdef	INET
-#include <netinet/in.h>
-#include <netinet/in_systm.h>
-#include <netinet/in_var.h>
-#include <netinet/ip.h>
-#endif
-
-#ifdef INET6
-#ifndef INET
-#include <netinet/in.h>
-#endif
-#include <netinet6/in6_var.h>
-#include <netinet/ip6.h>
-#include <netinet6/ip6_var.h>
-#endif
-
-#define FAITHNAME	"faith"
-
-struct faith_softc {
-	struct ifnet *sc_ifp;
-};
-
-static int faithioctl(struct ifnet *, u_long, caddr_t);
-int faithoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
-	struct route *);
-static void faithrtrequest(int, struct rtentry *, struct rt_addrinfo *);
-#ifdef INET6
-static int faithprefix(struct in6_addr *);
-#endif
-
-static int faithmodevent(module_t, int, void *);
-
-static MALLOC_DEFINE(M_FAITH, FAITHNAME, "Firewall Assisted Tunnel Interface");
-
-static int	faith_clone_create(struct if_clone *, int, caddr_t);
-static void	faith_clone_destroy(struct ifnet *);
-
-IFC_SIMPLE_DECLARE(faith, 0);
-
-#define	FAITHMTU	1500
-
-static int
-faithmodevent(mod, type, data)
-	module_t mod;
-	int type;
-	void *data;
-{
-
-	switch (type) {
-	case MOD_LOAD:
-		if_clone_attach(&faith_cloner);
-
-#ifdef INET6
-		faithprefix_p = faithprefix;
-#endif
-
-		break;
-	case MOD_UNLOAD:
-#ifdef INET6
-		faithprefix_p = NULL;
-#endif
-
-		if_clone_detach(&faith_cloner);
-		break;
-	default:
-		return EOPNOTSUPP;
-	}
-	return 0;
-}
-
-static moduledata_t faith_mod = {
-	"if_faith",
-	faithmodevent,
-	0
-};
-
-DECLARE_MODULE(if_faith, faith_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
-MODULE_VERSION(if_faith, 1);
-
-static int
-faith_clone_create(ifc, unit, params)
-	struct if_clone *ifc;
-	int unit;
-	caddr_t params;
-{
-	struct ifnet *ifp;
-	struct faith_softc *sc;
-
-	sc = malloc(sizeof(struct faith_softc), M_FAITH, M_WAITOK | M_ZERO);
-	ifp = sc->sc_ifp = if_alloc(IFT_FAITH);
-	if (ifp == NULL) {
-		free(sc, M_FAITH);
-		return (ENOSPC);
-	}
-
-	ifp->if_softc = sc;
-	if_initname(sc->sc_ifp, ifc->ifc_name, unit);
-
-	ifp->if_mtu = FAITHMTU;
-	/* Change to BROADCAST experimentaly to announce its prefix. */
-	ifp->if_flags = /* IFF_LOOPBACK */ IFF_BROADCAST | IFF_MULTICAST;
-	ifp->if_ioctl = faithioctl;
-	ifp->if_output = faithoutput;
-	ifp->if_hdrlen = 0;
-	ifp->if_addrlen = 0;
-	ifp->if_snd.ifq_maxlen = ifqmaxlen;
-	if_attach(ifp);
-	bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
-	return (0);
-}
-
-static void
-faith_clone_destroy(ifp)
-	struct ifnet *ifp;
-{
-	struct faith_softc *sc = ifp->if_softc;
-
-	bpfdetach(ifp);
-	if_detach(ifp);
-	if_free(ifp);
-	free(sc, M_FAITH);
-}
-
-int
-faithoutput(ifp, m, dst, ro)
-	struct ifnet *ifp;
-	struct mbuf *m;
-	struct sockaddr *dst;
-	struct route *ro;
-{
-	int isr;
-	u_int32_t af;
-	struct rtentry *rt = NULL;
-
-	M_ASSERTPKTHDR(m);
-
-	if (ro != NULL)
-		rt = ro->ro_rt;
-	/* BPF writes need to be handled specially. */
-	if (dst->sa_family == AF_UNSPEC) {
-		bcopy(dst->sa_data, &af, sizeof(af));
-		dst->sa_family = af;
-	}
-
-	if (bpf_peers_present(ifp->if_bpf)) {
-		af = dst->sa_family;
-		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
-	}
-
-	if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
-		m_freem(m);
-		return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
-		        rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
-	}
-	ifp->if_opackets++;
-	ifp->if_obytes += m->m_pkthdr.len;
-	switch (dst->sa_family) {
-#ifdef INET
-	case AF_INET:
-		isr = NETISR_IP;
-		break;
-#endif
-#ifdef INET6
-	case AF_INET6:
-		isr = NETISR_IPV6;
-		break;
-#endif
-	default:
-		m_freem(m);
-		return EAFNOSUPPORT;
-	}
-
-	/* XXX do we need more sanity checks? */
-
-	m->m_pkthdr.rcvif = ifp;
-	ifp->if_ipackets++;
-	ifp->if_ibytes += m->m_pkthdr.len;
-	netisr_dispatch(isr, m);
-	return (0);
-}
-
-/* ARGSUSED */
-static void
-faithrtrequest(cmd, rt, info)
-	int cmd;
-	struct rtentry *rt;
-	struct rt_addrinfo *info;
-{
-	RT_LOCK_ASSERT(rt);
-	rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
-}
-
-/*
- * Process an ioctl request.
- */
-/* ARGSUSED */
-static int
-faithioctl(ifp, cmd, data)
-	struct ifnet *ifp;
-	u_long cmd;
-	caddr_t data;
-{
-	struct ifaddr *ifa;
-	struct ifreq *ifr = (struct ifreq *)data;
-	int error = 0;
-
-	switch (cmd) {
-
-	case SIOCSIFADDR:
-		ifp->if_flags |= IFF_UP;
-		ifp->if_drv_flags |= IFF_DRV_RUNNING;
-		ifa = (struct ifaddr *)data;
-		ifa->ifa_rtrequest = faithrtrequest;
-		/*
-		 * Everything else is done at a higher level.
-		 */
-		break;
-
-	case SIOCADDMULTI:
-	case SIOCDELMULTI:
-		if (ifr == 0) {
-			error = EAFNOSUPPORT;		/* XXX */
-			break;
-		}
-		switch (ifr->ifr_addr.sa_family) {
-#ifdef INET
-		case AF_INET:
-			break;
-#endif
-#ifdef INET6
-		case AF_INET6:
-			break;
-#endif
-
-		default:
-			error = EAFNOSUPPORT;
-			break;
-		}
-		break;
-
-#ifdef SIOCSIFMTU
-	case SIOCSIFMTU:
-		ifp->if_mtu = ifr->ifr_mtu;
-		break;
-#endif
-
-	case SIOCSIFFLAGS:
-		break;
-
-	default:
-		error = EINVAL;
-	}
-	return (error);
-}
-
-#ifdef INET6
-/*
- * XXX could be slow
- * XXX could be layer violation to call sys/net from sys/netinet6
- */
-static int
-faithprefix(in6)
-	struct in6_addr *in6;
-{
-	struct rtentry *rt;
-	struct sockaddr_in6 sin6;
-	int ret;
-
-	if (V_ip6_keepfaith == 0)
-		return 0;
-
-	bzero(&sin6, sizeof(sin6));
-	sin6.sin6_family = AF_INET6;
-	sin6.sin6_len = sizeof(struct sockaddr_in6);
-	sin6.sin6_addr = *in6;
-	rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, 0UL, RT_DEFAULT_FIB);
-	if (rt && rt->rt_ifp && rt->rt_ifp->if_type == IFT_FAITH &&
-	    (rt->rt_ifp->if_flags & IFF_UP) != 0)
-		ret = 1;
-	else
-		ret = 0;
-	if (rt)
-		RTFREE_LOCKED(rt);
-	return ret;
-}
-#endif
diff --git a/freebsd/sys/net/if_fddisubr.c b/freebsd/sys/net/if_fddisubr.c
index 7a7fb471..9df882ec 100644
--- a/freebsd/sys/net/if_fddisubr.c
+++ b/freebsd/sys/net/if_fddisubr.c
@@ -38,10 +38,8 @@
  * $FreeBSD$
  */
 
-#include <rtems/bsd/local/opt_atalk.h>
 #include <rtems/bsd/local/opt_inet.h>
 #include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
 
 #include <rtems/bsd/sys/param.h>
 #include <sys/systm.h>
@@ -53,6 +51,7 @@
 #include <sys/sockio.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_llc.h>
 #include <net/if_types.h>
@@ -73,24 +72,10 @@
 #include <netinet6/nd6.h>
 #endif
 
-#ifdef IPX
-#include <netipx/ipx.h> 
-#include <netipx/ipx_if.h>
-#endif
-
 #ifdef DECNET
 #include <netdnet/dn.h>
 #endif
 
-#ifdef NETATALK
-#include <netatalk/at.h>
-#include <netatalk/at_var.h>
-#include <netatalk/at_extern.h>
-
-extern u_char	at_org_code[ 3 ];
-extern u_char	aarp_org_code[ 3 ];
-#endif /* NETATALK */
-
 #include <security/mac/mac_framework.h>
 
 static const u_char fddibroadcastaddr[FDDI_ADDR_LEN] =
@@ -98,7 +83,7 @@ static const u_char fddibroadcastaddr[FDDI_ADDR_LEN] =
 
 static int fddi_resolvemulti(struct ifnet *, struct sockaddr **,
 			      struct sockaddr *);
-static int fddi_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+static int fddi_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
 		       struct route *); 
 static void fddi_input(struct ifnet *ifp, struct mbuf *m);
 
@@ -109,21 +94,17 @@ static void fddi_input(struct ifnet *ifp, struct mbuf *m);
  * Encapsulate a packet of type family for the local net.
  * Use trailer local net encapsulation if enough data in first
  * packet leaves a multiple of 512 bytes of data in remainder.
- * Assumes that ifp is actually pointer to arpcom structure.
  */
 static int
-fddi_output(ifp, m, dst, ro)
-	struct ifnet *ifp;
-	struct mbuf *m;
-	struct sockaddr *dst;
-	struct route *ro;
+fddi_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+	struct route *ro)
 {
 	u_int16_t type;
 	int loop_copy = 0, error = 0, hdrcmplt = 0;
  	u_char esrc[FDDI_ADDR_LEN], edst[FDDI_ADDR_LEN];
 	struct fddi_header *fh;
 #if defined(INET) || defined(INET6)
-	struct llentry *lle;
+	int is_gw = 0;
 #endif
 
 #ifdef MAC
@@ -139,14 +120,15 @@ fddi_output(ifp, m, dst, ro)
 		senderr(ENETDOWN);
 	getmicrotime(&ifp->if_lastchange);
 
+#if defined(INET) || defined(INET6)
+	if (ro != NULL)
+		is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
+#endif
+
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET: {
-		struct rtentry *rt0 = NULL;
-
-		if (ro != NULL)
-			rt0 = ro->ro_rt;
-		error = arpresolve(ifp, rt0, m, dst, edst, &lle);
+		error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL);
 		if (error)
 			return (error == EWOULDBLOCK ? 0 : error);
 		type = htons(ETHERTYPE_IP);
@@ -182,68 +164,29 @@ fddi_output(ifp, m, dst, ro)
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
-		error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, &lle);
+		error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL);
 		if (error)
-			return (error); /* Something bad happened */
+			return (error == EWOULDBLOCK ? 0 : error);
 		type = htons(ETHERTYPE_IPV6);
 		break;
 #endif /* INET6 */
-#ifdef IPX
-	case AF_IPX:
-		type = htons(ETHERTYPE_IPX);
- 		bcopy((caddr_t)&(((struct sockaddr_ipx *)dst)->sipx_addr.x_host),
-		    (caddr_t)edst, FDDI_ADDR_LEN);
-		break;
-#endif /* IPX */
-#ifdef NETATALK
-	case AF_APPLETALK: {
-	    struct at_ifaddr *aa;
-            if (!aarpresolve(ifp, m, (struct sockaddr_at *)dst, edst))
-                return (0);
-	    /*
-	     * ifaddr is the first thing in at_ifaddr
-	     */
-	    if ((aa = at_ifawithnet( (struct sockaddr_at *)dst)) == 0)
-		goto bad;
-	    
-	    /*
-	     * In the phase 2 case, we need to prepend an mbuf for the llc header.
-	     * Since we must preserve the value of m, which is passed to us by
-	     * value, we m_copy() the first mbuf, and use it for our llc header.
-	     */
-	    if (aa->aa_flags & AFA_PHASE2) {
-		struct llc llc;
-
-		M_PREPEND(m, LLC_SNAPFRAMELEN, M_WAIT);
-		llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP;
-		llc.llc_control = LLC_UI;
-		bcopy(at_org_code, llc.llc_snap.org_code, sizeof(at_org_code));
-		llc.llc_snap.ether_type = htons(ETHERTYPE_AT);
-		bcopy(&llc, mtod(m, caddr_t), LLC_SNAPFRAMELEN);
-		type = 0;
-	    } else {
-		type = htons(ETHERTYPE_AT);
-	    }
-	    ifa_free(&aa->aa_ifa);
-	    break;
-	}
-#endif /* NETATALK */
-
 	case pseudo_AF_HDRCMPLT:
 	{
-		struct ether_header *eh;
+		const struct ether_header *eh;
+
 		hdrcmplt = 1;
-		eh = (struct ether_header *)dst->sa_data;
-		bcopy((caddr_t)eh->ether_shost, (caddr_t)esrc, FDDI_ADDR_LEN);
+		eh = (const struct ether_header *)dst->sa_data;
+		bcopy(eh->ether_shost, esrc, FDDI_ADDR_LEN);
 		/* FALLTHROUGH */
 	}
 
 	case AF_UNSPEC:
 	{
-		struct ether_header *eh;
+		const struct ether_header *eh;
+
 		loop_copy = -1;
-		eh = (struct ether_header *)dst->sa_data;
-		bcopy((caddr_t)eh->ether_dhost, (caddr_t)edst, FDDI_ADDR_LEN);
+		eh = (const struct ether_header *)dst->sa_data;
+		bcopy(eh->ether_dhost, edst, FDDI_ADDR_LEN);
 		if (*edst & 1)
 			m->m_flags |= (M_BCAST|M_MCAST);
 		type = eh->ether_type;
@@ -293,8 +236,8 @@ fddi_output(ifp, m, dst, ro)
 	 */
 	if (type != 0) {
 		struct llc *l;
-		M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT);
-		if (m == 0)
+		M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT);
+		if (m == NULL)
 			senderr(ENOBUFS);
 		l = mtod(m, struct llc *);
 		l->llc_control = LLC_UI;
@@ -309,8 +252,8 @@ fddi_output(ifp, m, dst, ro)
 	 * Add local net header.  If no space in first mbuf,
 	 * allocate another.
 	 */
-	M_PREPEND(m, FDDI_HDR_LEN, M_DONTWAIT);
-	if (m == 0)
+	M_PREPEND(m, FDDI_HDR_LEN, M_NOWAIT);
+	if (m == NULL)
 		senderr(ENOBUFS);
 	fh = mtod(m, struct fddi_header *);
 	fh->fddi_fc = FDDIFC_LLC_ASYNC|FDDIFC_LLC_PRIO4;
@@ -347,12 +290,12 @@ fddi_output(ifp, m, dst, ro)
 
 	error = (ifp->if_transmit)(ifp, m);
 	if (error)
-		ifp->if_oerrors++;
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 
 	return (error);
 
 bad:
-	ifp->if_oerrors++;
+	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	if (m)
 		m_freem(m);
 	return (error);
@@ -376,24 +319,23 @@ fddi_input(ifp, m)
 	 */
 	if ((m->m_flags & M_PKTHDR) == 0) {
 		if_printf(ifp, "discard frame w/o packet header\n");
-		ifp->if_ierrors++;
+		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		return;
 	}
 	if (m->m_pkthdr.rcvif == NULL) {
 		if_printf(ifp, "discard frame w/o interface pointer\n");
-		ifp->if_ierrors++;
+		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		return;
         }
 
 	m = m_pullup(m, FDDI_HDR_LEN);
 	if (m == NULL) {
-		ifp->if_ierrors++;
+		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		goto dropanyway;
 	}
 	fh = mtod(m, struct fddi_header *);
-	m->m_pkthdr.header = (void *)fh;
 
 	/*
 	 * Discard packet if interface is not up.
@@ -422,7 +364,7 @@ fddi_input(ifp, m)
 	/*
 	 * Update interface statistics.
 	 */
-	ifp->if_ibytes += m->m_pkthdr.len;
+	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	getmicrotime(&ifp->if_lastchange);
 
 	/*
@@ -443,7 +385,7 @@ fddi_input(ifp, m)
 			m->m_flags |= M_BCAST;
 		else
 			m->m_flags |= M_MCAST;
-		ifp->if_imcasts++;
+		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 	}
 
 #ifdef M_LINK0
@@ -461,7 +403,7 @@ fddi_input(ifp, m)
 
 	m = m_pullup(m, LLC_SNAPFRAMELEN);
 	if (m == 0) {
-		ifp->if_ierrors++;
+		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		goto dropanyway;
 	}
 	l = mtod(m, struct llc *);
@@ -472,30 +414,13 @@ fddi_input(ifp, m)
 		u_int16_t type;
 		if ((l->llc_control != LLC_UI) ||
 		    (l->llc_ssap != LLC_SNAP_LSAP)) {
-			ifp->if_noproto++;
+			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 		}
-#ifdef NETATALK
-		if (bcmp(&(l->llc_snap.org_code)[0], at_org_code,
-		    sizeof(at_org_code)) == 0 &&
-		    ntohs(l->llc_snap.ether_type) == ETHERTYPE_AT) {
-			isr = NETISR_ATALK2;
-			m_adj(m, LLC_SNAPFRAMELEN);
-			break;
-		}
-
-		if (bcmp(&(l->llc_snap.org_code)[0], aarp_org_code,
-		    sizeof(aarp_org_code)) == 0 &&
-		    ntohs(l->llc_snap.ether_type) == ETHERTYPE_AARP) {
-			m_adj(m, LLC_SNAPFRAMELEN);
-			isr = NETISR_AARP;
-			break;
-		}
-#endif /* NETATALK */
 		if (l->llc_snap.org_code[0] != 0 ||
 		    l->llc_snap.org_code[1] != 0 ||
 		    l->llc_snap.org_code[2] != 0) {
-			ifp->if_noproto++;
+			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 		}
 
@@ -505,8 +430,6 @@ fddi_input(ifp, m)
 		switch (type) {
 #ifdef INET
 		case ETHERTYPE_IP:
-			if ((m = ip_fastforward(m)) == NULL)
-				return;
 			isr = NETISR_IP;
 			break;
 
@@ -521,27 +444,14 @@ fddi_input(ifp, m)
 			isr = NETISR_IPV6;
 			break;
 #endif
-#ifdef IPX      
-		case ETHERTYPE_IPX: 
-			isr = NETISR_IPX;
-			break;  
-#endif   
 #ifdef DECNET
 		case ETHERTYPE_DECNET:
 			isr = NETISR_DECNET;
 			break;
 #endif
-#ifdef NETATALK 
-		case ETHERTYPE_AT:
-	                isr = NETISR_ATALK1;
-			break;
-	        case ETHERTYPE_AARP:
-			isr = NETISR_AARP;
-			break;
-#endif /* NETATALK */
 		default:
 			/* printf("fddi_input: unknown protocol 0x%x\n", type); */
-			ifp->if_noproto++;
+			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 		}
 		break;
@@ -549,7 +459,7 @@ fddi_input(ifp, m)
 		
 	default:
 		/* printf("fddi_input: unknown dsap 0x%x\n", l->llc_dsap); */
-		ifp->if_noproto++;
+		if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 		goto dropanyway;
 	}
 	M_SETFIB(m, ifp->if_fib);
@@ -557,7 +467,7 @@ fddi_input(ifp, m)
 	return;
 
 dropanyway:
-	ifp->if_iqdrops++;
+	if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 	if (m)
 		m_freem(m);
 	return;
@@ -643,31 +553,6 @@ fddi_ioctl (ifp, command, data)
 			arp_ifinit(ifp, ifa);
 			break;
 #endif
-#ifdef IPX
-		/*
-		 * XXX - This code is probably wrong
-		 */
-		case AF_IPX: {
-				struct ipx_addr *ina;
-
-				ina = &(IA_SIPX(ifa)->sipx_addr);
-
-				if (ipx_nullhost(*ina)) {
-					ina->x_host = *(union ipx_host *)
-							IF_LLADDR(ifp);
-				} else {
-					bcopy((caddr_t) ina->x_host.c_host,
-					      (caddr_t) IF_LLADDR(ifp),
-					      ETHER_ADDR_LEN);
-				}
-	
-				/*
-				 * Set new address
-				 */
-				ifp->if_init(ifp->if_softc);
-			}
-			break;
-#endif
 		default:
 			ifp->if_init(ifp->if_softc);
 			break;
@@ -724,7 +609,7 @@ fddi_resolvemulti(ifp, llsa, sa)
 		e_addr = LLADDR(sdl);
 		if ((e_addr[0] & 1) != 1)
 			return (EADDRNOTAVAIL);
-		*llsa = 0;
+		*llsa = NULL;
 		return (0);
 
 #ifdef INET
@@ -732,14 +617,7 @@ fddi_resolvemulti(ifp, llsa, sa)
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 			return (EADDRNOTAVAIL);
-		sdl = malloc(sizeof *sdl, M_IFMADDR,
-		       M_NOWAIT | M_ZERO);
-		if (sdl == NULL)
-			return (ENOMEM);
-		sdl->sdl_len = sizeof *sdl;
-		sdl->sdl_family = AF_LINK;
-		sdl->sdl_index = ifp->if_index;
-		sdl->sdl_type = IFT_FDDI;
+		sdl = link_init_sdl(ifp, *llsa, IFT_FDDI);
 		sdl->sdl_nlen = 0;
 		sdl->sdl_alen = FDDI_ADDR_LEN;
 		sdl->sdl_slen = 0;
@@ -758,19 +636,12 @@ fddi_resolvemulti(ifp, llsa, sa)
 			 * (This is used for multicast routers.)
 			 */
 			ifp->if_flags |= IFF_ALLMULTI;
-			*llsa = 0;
+			*llsa = NULL;
 			return (0);
 		}
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 			return (EADDRNOTAVAIL);
-		sdl = malloc(sizeof *sdl, M_IFMADDR,
-		       M_NOWAIT | M_ZERO);
-		if (sdl == NULL)
-			return (ENOMEM);
-		sdl->sdl_len = sizeof *sdl;
-		sdl->sdl_family = AF_LINK;
-		sdl->sdl_index = ifp->if_index;
-		sdl->sdl_type = IFT_FDDI;
+		sdl = link_init_sdl(ifp, *llsa, IFT_FDDI);
 		sdl->sdl_nlen = 0;
 		sdl->sdl_alen = FDDI_ADDR_LEN;
 		sdl->sdl_slen = 0;
diff --git a/freebsd/sys/net/if_fwsubr.c b/freebsd/sys/net/if_fwsubr.c
index b022ecae..df4c38cf 100644
--- a/freebsd/sys/net/if_fwsubr.c
+++ b/freebsd/sys/net/if_fwsubr.c
@@ -45,6 +45,7 @@
 #include <sys/sockio.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/if_llc.h>
@@ -77,7 +78,7 @@ struct fw_hwaddr firewire_broadcastaddr = {
 };
 
 static int
-firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+firewire_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	struct fw_com *fc = IFP2FWC(ifp);
@@ -91,7 +92,7 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
 	int unicast, dgl, foff;
 	static int next_dgl;
 #if defined(INET) || defined(INET6)
-	struct llentry *lle;
+	int is_gw = 0;
 #endif
 
 #ifdef MAC
@@ -106,6 +107,10 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
 		goto bad;
 	}
 
+#if defined(INET) || defined(INET6)
+	if (ro != NULL)
+		is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
+#endif
 	/*
 	 * For unicast, we make a tag to store the lladdr of the
 	 * destination. This might not be the first time we have seen
@@ -129,7 +134,7 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
 		}
 		destfw = (struct fw_hwaddr *)(mtag + 1);
 	} else {
-		destfw = 0;
+		destfw = NULL;
 	}
 
 	switch (dst->sa_family) {
@@ -141,7 +146,8 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
 		 * doesn't fit into the arp model.
 		 */
 		if (unicast) {
-			error = arpresolve(ifp, ro ? ro->ro_rt : NULL, m, dst, (u_char *) destfw, &lle);
+			error = arpresolve(ifp, is_gw, m, dst,
+			    (u_char *) destfw, NULL, NULL);
 			if (error)
 				return (error == EWOULDBLOCK ? 0 : error);
 		}
@@ -170,10 +176,10 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
 #ifdef INET6
 	case AF_INET6:
 		if (unicast) {
-			error = nd6_storelladdr(fc->fc_ifp, m, dst,
-			    (u_char *) destfw, &lle);
+			error = nd6_resolve(fc->fc_ifp, is_gw, m, dst,
+			    (u_char *) destfw, NULL, NULL);
 			if (error)
-				return (error);
+				return (error == EWOULDBLOCK ? 0 : error);
 		}
 		type = ETHERTYPE_IPV6;
 		break;
@@ -231,7 +237,7 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
 		/*
 		 * No fragmentation is necessary.
 		 */
-		M_PREPEND(m, sizeof(uint32_t), M_DONTWAIT);
+		M_PREPEND(m, sizeof(uint32_t), M_NOWAIT);
 		if (!m) {
 			error = ENOBUFS;
 			goto bad;
@@ -263,17 +269,17 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
 				 * Split off the tail segment from the
 				 * datagram, copying our tags over.
 				 */
-				mtail = m_split(m, fsize, M_DONTWAIT);
+				mtail = m_split(m, fsize, M_NOWAIT);
 				m_tag_copy_chain(mtail, m, M_NOWAIT);
 			} else {
-				mtail = 0;
+				mtail = NULL;
 			}
 
 			/*
 			 * Add our encapsulation header to this
 			 * fragment and hand it off to the link.
 			 */
-			M_PREPEND(m, 2*sizeof(uint32_t), M_DONTWAIT);
+			M_PREPEND(m, 2*sizeof(uint32_t), M_NOWAIT);
 			if (!m) {
 				error = ENOBUFS;
 				goto bad;
@@ -538,7 +544,7 @@ firewire_input(struct ifnet *ifp, struct mbuf *m, uint16_t src)
 
 	if (m->m_pkthdr.rcvif == NULL) {
 		if_printf(ifp, "discard frame w/o interface pointer\n");
-		ifp->if_ierrors++;
+		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		return;
 	}
@@ -583,7 +589,7 @@ firewire_input(struct ifnet *ifp, struct mbuf *m, uint16_t src)
 		return;
 	}
 
-	ifp->if_ibytes += m->m_pkthdr.len;
+	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 
 	/* Discard packet if interface is not up */
 	if ((ifp->if_flags & IFF_UP) == 0) {
@@ -592,13 +598,11 @@ firewire_input(struct ifnet *ifp, struct mbuf *m, uint16_t src)
 	}
 
 	if (m->m_flags & (M_BCAST|M_MCAST))
-		ifp->if_imcasts++;
+		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 
 	switch (type) {
 #ifdef INET
 	case ETHERTYPE_IP:
-		if ((m = ip_fastforward(m)) == NULL)
-			return;
 		isr = NETISR_IP;
 		break;
 
@@ -700,7 +704,7 @@ firewire_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
 		/*
 		 * No mapping needed.
 		 */
-		*llsa = 0;
+		*llsa = NULL;
 		return 0;
 
 #ifdef INET
@@ -708,7 +712,7 @@ firewire_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 			return EADDRNOTAVAIL;
-		*llsa = 0;
+		*llsa = NULL;
 		return 0;
 #endif
 #ifdef INET6
@@ -721,12 +725,12 @@ firewire_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
 			 * (This is used for multicast routers.)
 			 */
 			ifp->if_flags |= IFF_ALLMULTI;
-			*llsa = 0;
+			*llsa = NULL;
 			return 0;
 		}
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 			return EADDRNOTAVAIL;
-		*llsa = 0;
+		*llsa = NULL;
 		return 0;
 #endif
 
diff --git a/freebsd/sys/net/if_gif.c b/freebsd/sys/net/if_gif.c
index 27cbbdda..e07a2da0 100644
--- a/freebsd/sys/net/if_gif.c
+++ b/freebsd/sys/net/if_gif.c
@@ -1,8 +1,5 @@
 #include <machine/rtems-bsd-kernel-space.h>
 
-/*	$FreeBSD$	*/
-/*	$KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $	*/
-
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
@@ -30,8 +27,13 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
+ *
+ *	$KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
 #include <rtems/bsd/local/opt_inet.h>
 #include <rtems/bsd/local/opt_inet6.h>
 
@@ -39,11 +41,14 @@
 #include <sys/systm.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
+#include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
+#include <sys/sx.h>
 #include <rtems/bsd/sys/errno.h>
 #include <sys/time.h>
 #include <sys/sysctl.h>
@@ -55,6 +60,7 @@
 #include <machine/cpu.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
@@ -65,9 +71,9 @@
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
+#include <netinet/ip_ecn.h>
 #ifdef	INET
 #include <netinet/in_var.h>
-#include <netinet/in_gif.h>
 #include <netinet/ip_var.h>
 #endif	/* INET */
 
@@ -77,9 +83,9 @@
 #endif
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
+#include <netinet6/ip6_ecn.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
-#include <netinet6/in6_gif.h>
 #include <netinet6/ip6protosw.h>
 #endif /* INET6 */
 
@@ -90,26 +96,41 @@
 
 #include <security/mac/mac_framework.h>
 
-#define GIFNAME		"gif"
+static const char gifname[] = "gif";
 
 /*
- * gif_mtx protects the global gif_softc_list.
+ * gif_mtx protects a per-vnet gif_softc_list.
  */
-static struct mtx gif_mtx;
+static VNET_DEFINE(struct mtx, gif_mtx);
+#define	V_gif_mtx		VNET(gif_mtx)
 static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
 static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list);
 #define	V_gif_softc_list	VNET(gif_softc_list)
+static struct sx gif_ioctl_sx;
+SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl");
+
+#define	GIF_LIST_LOCK_INIT(x)		mtx_init(&V_gif_mtx, "gif_mtx", \
+					    NULL, MTX_DEF)
+#define	GIF_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_gif_mtx)
+#define	GIF_LIST_LOCK(x)		mtx_lock(&V_gif_mtx)
+#define	GIF_LIST_UNLOCK(x)		mtx_unlock(&V_gif_mtx)
 
 void	(*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
 void	(*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
 void	(*ng_gif_attach_p)(struct ifnet *ifp);
 void	(*ng_gif_detach_p)(struct ifnet *ifp);
 
-static void	gif_start(struct ifnet *);
+static int	gif_check_nesting(struct ifnet *, struct mbuf *);
+static int	gif_set_tunnel(struct ifnet *, struct sockaddr *,
+    struct sockaddr *);
+static void	gif_delete_tunnel(struct ifnet *);
+static int	gif_ioctl(struct ifnet *, u_long, caddr_t);
+static int	gif_transmit(struct ifnet *, struct mbuf *);
+static void	gif_qflush(struct ifnet *);
 static int	gif_clone_create(struct if_clone *, int, caddr_t);
 static void	gif_clone_destroy(struct ifnet *);
-
-IFC_SIMPLE_DECLARE(gif, 0);
+static VNET_DEFINE(struct if_clone *, gif_cloner);
+#define	V_gif_cloner	VNET(gif_cloner)
 
 static int gifmodevent(module_t, int, void *);
 
@@ -129,7 +150,7 @@ static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
 #endif
 static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NEST;
 #define	V_max_gif_nesting	VNET(max_gif_nesting)
-SYSCTL_VNET_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_RW,
+SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels");
 
 /*
@@ -143,22 +164,12 @@ static VNET_DEFINE(int, parallel_tunnels) = 1;
 static VNET_DEFINE(int, parallel_tunnels) = 0;
 #endif
 #define	V_parallel_tunnels	VNET(parallel_tunnels)
-SYSCTL_VNET_INT(_net_link_gif, OID_AUTO, parallel_tunnels, CTLFLAG_RW,
-    &VNET_NAME(parallel_tunnels), 0, "Allow parallel tunnels?");
-
-/* copy from src/sys/net/if_ethersubr.c */
-static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
-			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
-#ifndef ETHER_IS_BROADCAST
-#define ETHER_IS_BROADCAST(addr) \
-	(bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
-#endif
+SYSCTL_INT(_net_link_gif, OID_AUTO, parallel_tunnels,
+    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(parallel_tunnels), 0,
+    "Allow parallel tunnels?");
 
 static int
-gif_clone_create(ifc, unit, params)
-	struct if_clone *ifc;
-	int unit;
-	caddr_t params;
+gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct gif_softc *sc;
 
@@ -169,18 +180,9 @@ gif_clone_create(ifc, unit, params)
 	sc->gif_fibnum = BSD_DEFAULT_FIB;
 #endif /* __rtems__ */
 	GIF2IFP(sc) = if_alloc(IFT_GIF);
-	if (GIF2IFP(sc) == NULL) {
-		free(sc, M_GIF);
-		return (ENOSPC);
-	}
-
 	GIF_LOCK_INIT(sc);
-
 	GIF2IFP(sc)->if_softc = sc;
-	if_initname(GIF2IFP(sc), ifc->ifc_name, unit);
-
-	sc->encap_cookie4 = sc->encap_cookie6 = NULL;
-	sc->gif_options = GIF_ACCEPT_REVETHIP;
+	if_initname(GIF2IFP(sc), gifname, unit);
 
 	GIF2IFP(sc)->if_addrlen = 0;
 	GIF2IFP(sc)->if_mtu    = GIF_MTU;
@@ -190,56 +192,42 @@ gif_clone_create(ifc, unit, params)
 	GIF2IFP(sc)->if_flags  |= IFF_LINK2;
 #endif
 	GIF2IFP(sc)->if_ioctl  = gif_ioctl;
-	GIF2IFP(sc)->if_start  = gif_start;
+	GIF2IFP(sc)->if_transmit  = gif_transmit;
+	GIF2IFP(sc)->if_qflush  = gif_qflush;
 	GIF2IFP(sc)->if_output = gif_output;
-	GIF2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen;
+	GIF2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
+	GIF2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
 	if_attach(GIF2IFP(sc));
 	bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
 	if (ng_gif_attach_p != NULL)
 		(*ng_gif_attach_p)(GIF2IFP(sc));
 
-	mtx_lock(&gif_mtx);
+	GIF_LIST_LOCK();
 	LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list);
-	mtx_unlock(&gif_mtx);
-
+	GIF_LIST_UNLOCK();
 	return (0);
 }
 
 static void
-gif_clone_destroy(ifp)
-	struct ifnet *ifp;
+gif_clone_destroy(struct ifnet *ifp)
 {
-#if defined(INET) || defined(INET6)
-	int err;
-#endif
-	struct gif_softc *sc = ifp->if_softc;
-
-	mtx_lock(&gif_mtx);
-	LIST_REMOVE(sc, gif_list);
-	mtx_unlock(&gif_mtx);
+	struct gif_softc *sc;
 
+	sx_xlock(&gif_ioctl_sx);
+	sc = ifp->if_softc;
 	gif_delete_tunnel(ifp);
-#ifdef INET6
-	if (sc->encap_cookie6 != NULL) {
-		err = encap_detach(sc->encap_cookie6);
-		KASSERT(err == 0, ("Unexpected error detaching encap_cookie6"));
-	}
-#endif
-#ifdef INET
-	if (sc->encap_cookie4 != NULL) {
-		err = encap_detach(sc->encap_cookie4);
-		KASSERT(err == 0, ("Unexpected error detaching encap_cookie4"));
-	}
-#endif
-
+	GIF_LIST_LOCK();
+	LIST_REMOVE(sc, gif_list);
+	GIF_LIST_UNLOCK();
 	if (ng_gif_detach_p != NULL)
 		(*ng_gif_detach_p)(ifp);
 	bpfdetach(ifp);
 	if_detach(ifp);
-	if_free(ifp);
+	ifp->if_softc = NULL;
+	sx_xunlock(&gif_ioctl_sx);
 
+	if_free(ifp);
 	GIF_LOCK_DESTROY(sc);
-
 	free(sc, M_GIF);
 }
 
@@ -248,31 +236,35 @@ vnet_gif_init(const void *unused __unused)
 {
 
 	LIST_INIT(&V_gif_softc_list);
+	GIF_LIST_LOCK_INIT();
+	V_gif_cloner = if_clone_simple(gifname, gif_clone_create,
+	    gif_clone_destroy, 0);
 }
-VNET_SYSINIT(vnet_gif_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, vnet_gif_init,
-    NULL);
+VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+    vnet_gif_init, NULL);
+
+static void
+vnet_gif_uninit(const void *unused __unused)
+{
+
+	if_clone_detach(V_gif_cloner);
+	GIF_LIST_LOCK_DESTROY();
+}
+VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+    vnet_gif_uninit, NULL);
 
 static int
-gifmodevent(mod, type, data)
-	module_t mod;
-	int type;
-	void *data;
+gifmodevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
-		mtx_init(&gif_mtx, "gif_mtx", NULL, MTX_DEF);
-		if_clone_attach(&gif_cloner);
-		break;
-
 	case MOD_UNLOAD:
-		if_clone_detach(&gif_cloner);
-		mtx_destroy(&gif_mtx);
 		break;
 	default:
-		return EOPNOTSUPP;
+		return (EOPNOTSUPP);
 	}
-	return 0;
+	return (0);
 }
 
 static moduledata_t gif_mod = {
@@ -285,219 +277,257 @@ DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_gif, 1);
 
 int
-gif_encapcheck(m, off, proto, arg)
-	const struct mbuf *m;
-	int off;
-	int proto;
-	void *arg;
+gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
 {
-	struct ip ip;
+	GIF_RLOCK_TRACKER;
+	const struct ip *ip;
 	struct gif_softc *sc;
+	int ret;
 
 	sc = (struct gif_softc *)arg;
-	if (sc == NULL)
-		return 0;
+	if (sc == NULL || (GIF2IFP(sc)->if_flags & IFF_UP) == 0)
+		return (0);
 
-	if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0)
-		return 0;
+	ret = 0;
+	GIF_RLOCK(sc);
 
 	/* no physical address */
-	if (!sc->gif_psrc || !sc->gif_pdst)
-		return 0;
+	if (sc->gif_family == 0)
+		goto done;
 
 	switch (proto) {
 #ifdef INET
 	case IPPROTO_IPV4:
-		break;
 #endif
 #ifdef INET6
 	case IPPROTO_IPV6:
-		break;
 #endif
 	case IPPROTO_ETHERIP:
 		break;
-
 	default:
-		return 0;
+		goto done;
 	}
 
 	/* Bail on short packets */
-	if (m->m_pkthdr.len < sizeof(ip))
-		return 0;
+	M_ASSERTPKTHDR(m);
+	if (m->m_pkthdr.len < sizeof(struct ip))
+		goto done;
 
-	m_copydata(m, 0, sizeof(ip), (caddr_t)&ip);
-
-	switch (ip.ip_v) {
+	ip = mtod(m, const struct ip *);
+	switch (ip->ip_v) {
 #ifdef INET
 	case 4:
-		if (sc->gif_psrc->sa_family != AF_INET ||
-		    sc->gif_pdst->sa_family != AF_INET)
-			return 0;
-		return gif_encapcheck4(m, off, proto, arg);
+		if (sc->gif_family != AF_INET)
+			goto done;
+		ret = in_gif_encapcheck(m, off, proto, arg);
+		break;
 #endif
 #ifdef INET6
 	case 6:
 		if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
-			return 0;
-		if (sc->gif_psrc->sa_family != AF_INET6 ||
-		    sc->gif_pdst->sa_family != AF_INET6)
-			return 0;
-		return gif_encapcheck6(m, off, proto, arg);
+			goto done;
+		if (sc->gif_family != AF_INET6)
+			goto done;
+		ret = in6_gif_encapcheck(m, off, proto, arg);
+		break;
 #endif
-	default:
-		return 0;
 	}
+done:
+	GIF_RUNLOCK(sc);
+	return (ret);
 }
 
-static void
-gif_start(struct ifnet *ifp)
+static int
+gif_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct gif_softc *sc;
-	struct mbuf *m;
-
-	sc = ifp->if_softc;
-
-	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
-	for (;;) {
-		IFQ_DEQUEUE(&ifp->if_snd, m);
-		if (m == 0)
-			break;
-
-		gif_output(ifp, m, sc->gif_pdst, NULL);
-
-	}
-	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
-
-	return;
-}
-
-int
-gif_output(ifp, m, dst, ro)
-	struct ifnet *ifp;
-	struct mbuf *m;
-	struct sockaddr *dst;
-	struct route *ro;
-{
-	struct gif_softc *sc = ifp->if_softc;
-	struct m_tag *mtag;
-	int error = 0;
-	int gif_called;
-	u_int32_t af;
+	struct etherip_header *eth;
+#ifdef INET
+	struct ip *ip;
+#endif
+#ifdef INET6
+	struct ip6_hdr *ip6;
+	uint32_t t;
+#endif
+	uint32_t af;
+	uint8_t proto, ecn;
+	int error;
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error) {
 		m_freem(m);
-		goto end;
+		goto err;
 	}
 #endif
-
-	/*
-	 * gif may cause infinite recursion calls when misconfigured.
-	 * We'll prevent this by detecting loops.
-	 *
-	 * High nesting level may cause stack exhaustion.
-	 * We'll prevent this by introducing upper limit.
-	 */
-	gif_called = 1;
-	mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, NULL);
-	while (mtag != NULL) {
-		if (*(struct ifnet **)(mtag + 1) == ifp) {
-			log(LOG_NOTICE,
-			    "gif_output: loop detected on %s\n",
-			    (*(struct ifnet **)(mtag + 1))->if_xname);
-			m_freem(m);
-			error = EIO;	/* is there better errno? */
-			goto end;
-		}
-		mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, mtag);
-		gif_called++;
-	}
-	if (gif_called > V_max_gif_nesting) {
-		log(LOG_NOTICE,
-		    "gif_output: recursively called too many times(%d)\n",
-		    gif_called);
-		m_freem(m);
-		error = EIO;	/* is there better errno? */
-		goto end;
-	}
-	mtag = m_tag_alloc(MTAG_GIF, MTAG_GIF_CALLED, sizeof(struct ifnet *),
-	    M_NOWAIT);
-	if (mtag == NULL) {
-		m_freem(m);
-		error = ENOMEM;
-		goto end;
-	}
-	*(struct ifnet **)(mtag + 1) = ifp;
-	m_tag_prepend(m, mtag);
-
-	m->m_flags &= ~(M_BCAST|M_MCAST);
-
-	GIF_LOCK(sc);
-
-	if (!(ifp->if_flags & IFF_UP) ||
-	    sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
-		GIF_UNLOCK(sc);
+	error = ENETDOWN;
+	sc = ifp->if_softc;
+	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
+	    (ifp->if_flags & IFF_UP) == 0 ||
+	    sc->gif_family == 0 ||
+	    (error = gif_check_nesting(ifp, m)) != 0) {
 		m_freem(m);
-		error = ENETDOWN;
-		goto end;
-	}
-
-	/* BPF writes need to be handled specially. */
-	if (dst->sa_family == AF_UNSPEC) {
-		bcopy(dst->sa_data, &af, sizeof(af));
-		dst->sa_family = af;
+		goto err;
 	}
-
-	af = dst->sa_family;
-	BPF_MTAP2(ifp, &af, sizeof(af), m);
-	ifp->if_opackets++;	
-	ifp->if_obytes += m->m_pkthdr.len;
-
-	/* override to IPPROTO_ETHERIP for bridged traffic */
+	/* Now pull back the af that we stashed in the csum_data. */
 	if (ifp->if_bridge)
 		af = AF_LINK;
-
+	else
+		af = m->m_pkthdr.csum_data;
+	m->m_flags &= ~(M_BCAST|M_MCAST);
 	M_SETFIB(m, sc->gif_fibnum);
+	BPF_MTAP2(ifp, &af, sizeof(af), m);
+	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+	if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
 	/* inner AF-specific encapsulation */
-
+	ecn = 0;
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		proto = IPPROTO_IPV4;
+		if (m->m_len < sizeof(struct ip))
+			m = m_pullup(m, sizeof(struct ip));
+		if (m == NULL) {
+			error = ENOBUFS;
+			goto err;
+		}
+		ip = mtod(m, struct ip *);
+		ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
+		    ECN_NOCARE, &ecn, &ip->ip_tos);
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		proto = IPPROTO_IPV6;
+		if (m->m_len < sizeof(struct ip6_hdr))
+			m = m_pullup(m, sizeof(struct ip6_hdr));
+		if (m == NULL) {
+			error = ENOBUFS;
+			goto err;
+		}
+		t = 0;
+		ip6 = mtod(m, struct ip6_hdr *);
+		ip6_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
+		    ECN_NOCARE, &t, &ip6->ip6_flow);
+		ecn = (ntohl(t) >> 20) & 0xff;
+		break;
+#endif
+	case AF_LINK:
+		proto = IPPROTO_ETHERIP;
+		M_PREPEND(m, sizeof(struct etherip_header), M_NOWAIT);
+		if (m == NULL) {
+			error = ENOBUFS;
+			goto err;
+		}
+		eth = mtod(m, struct etherip_header *);
+		eth->eip_resvh = 0;
+		eth->eip_ver = ETHERIP_VERSION;
+		eth->eip_resvl = 0;
+		break;
+	default:
+		error = EAFNOSUPPORT;
+		m_freem(m);
+		goto err;
+	}
 	/* XXX should we check if our outer source is legal? */
-
 	/* dispatch to output logic based on outer AF */
-	switch (sc->gif_psrc->sa_family) {
+	switch (sc->gif_family) {
 #ifdef INET
 	case AF_INET:
-		error = in_gif_output(ifp, af, m);
+		error = in_gif_output(ifp, m, proto, ecn);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
-		error = in6_gif_output(ifp, af, m);
+		error = in6_gif_output(ifp, m, proto, ecn);
 		break;
 #endif
 	default:
-		m_freem(m);		
-		error = ENETDOWN;
+		m_freem(m);
 	}
-
-	GIF_UNLOCK(sc);
-  end:
+err:
 	if (error)
-		ifp->if_oerrors++;
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	return (error);
 }
 
+static void
+gif_qflush(struct ifnet *ifp __unused)
+{
+
+}
+
+#define	MTAG_GIF	1080679712
+static int
+gif_check_nesting(struct ifnet *ifp, struct mbuf *m)
+{
+	struct m_tag *mtag;
+	int count;
+
+	/*
+	 * gif may cause infinite recursion calls when misconfigured.
+	 * We'll prevent this by detecting loops.
+	 *
+	 * High nesting level may cause stack exhaustion.
+	 * We'll prevent this by introducing upper limit.
+	 */
+	count = 1;
+	mtag = NULL;
+	while ((mtag = m_tag_locate(m, MTAG_GIF, 0, mtag)) != NULL) {
+		if (*(struct ifnet **)(mtag + 1) == ifp) {
+			log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp));
+			return (EIO);
+		}
+		count++;
+	}
+	if (count > V_max_gif_nesting) {
+		log(LOG_NOTICE,
+		    "%s: if_output recursively called too many times(%d)\n",
+		    if_name(ifp), count);
+		return (EIO);
+	}
+	mtag = m_tag_alloc(MTAG_GIF, 0, sizeof(struct ifnet *), M_NOWAIT);
+	if (mtag == NULL)
+		return (ENOMEM);
+	*(struct ifnet **)(mtag + 1) = ifp;
+	m_tag_prepend(m, mtag);
+	return (0);
+}
+
+int
+gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+	struct route *ro)
+{
+	uint32_t af;
+
+	if (dst->sa_family == AF_UNSPEC)
+		bcopy(dst->sa_data, &af, sizeof(af));
+	else
+		af = dst->sa_family;
+	/*
+	 * Now save the af in the inbound pkt csum data, this is a cheat since
+	 * we are using the inbound csum_data field to carry the af over to
+	 * the gif_transmit() routine, avoiding using yet another mtag.
+	 */
+	m->m_pkthdr.csum_data = af;
+	return (ifp->if_transmit(ifp, m));
+}
+
 void
-gif_input(m, af, ifp)
-	struct mbuf *m;
-	int af;
-	struct ifnet *ifp;
+gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn)
 {
-	int isr, n;
-	struct gif_softc *sc;
 	struct etherip_header *eip;
+#ifdef INET
+	struct ip *ip;
+#endif
+#ifdef INET6
+	struct ip6_hdr *ip6;
+	uint32_t t;
+#endif
+	struct gif_softc *sc;
 	struct ether_header *eh;
 	struct ifnet *oldifp;
+	int isr, n, af;
 
 	if (ifp == NULL) {
 		/* just in case */
@@ -506,20 +536,67 @@ gif_input(m, af, ifp)
 	}
 	sc = ifp->if_softc;
 	m->m_pkthdr.rcvif = ifp;
+	m_clrprotoflags(m);
+	switch (proto) {
+#ifdef INET
+	case IPPROTO_IPV4:
+		af = AF_INET;
+		if (m->m_len < sizeof(struct ip))
+			m = m_pullup(m, sizeof(struct ip));
+		if (m == NULL)
+			goto drop;
+		ip = mtod(m, struct ip *);
+		if (ip_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
+		    ECN_NOCARE, &ecn, &ip->ip_tos) == 0) {
+			m_freem(m);
+			goto drop;
+		}
+		break;
+#endif
+#ifdef INET6
+	case IPPROTO_IPV6:
+		af = AF_INET6;
+		if (m->m_len < sizeof(struct ip6_hdr))
+			m = m_pullup(m, sizeof(struct ip6_hdr));
+		if (m == NULL)
+			goto drop;
+		t = htonl((uint32_t)ecn << 20);
+		ip6 = mtod(m, struct ip6_hdr *);
+		if (ip6_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
+		    ECN_NOCARE, &t, &ip6->ip6_flow) == 0) {
+			m_freem(m);
+			goto drop;
+		}
+		break;
+#endif
+	case IPPROTO_ETHERIP:
+		af = AF_LINK;
+		break;
+	default:
+		m_freem(m);
+		goto drop;
+	}
 
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	if (bpf_peers_present(ifp->if_bpf)) {
-		u_int32_t af1 = af;
+		uint32_t af1 = af;
 		bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
 	}
 
+	if ((ifp->if_flags & IFF_MONITOR) != 0) {
+		if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+		if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
+		m_freem(m);
+		return;
+	}
+
 	if (ng_gif_input_p != NULL) {
 		(*ng_gif_input_p)(ifp, &m, af);
 		if (m == NULL)
-			return;
+			goto drop;
 	}
 
 	/*
@@ -546,34 +623,15 @@ gif_input(m, af, ifp)
 #endif
 	case AF_LINK:
 		n = sizeof(struct etherip_header) + sizeof(struct ether_header);
-		if (n > m->m_len) {
+		if (n > m->m_len)
 			m = m_pullup(m, n);
-			if (m == NULL) {
-				ifp->if_ierrors++;
-				return;
-			}
-		}
-
+		if (m == NULL)
+			goto drop;
 		eip = mtod(m, struct etherip_header *);
-		/* 
-		 * GIF_ACCEPT_REVETHIP (enabled by default) intentionally
-		 * accepts an EtherIP packet with revered version field in
-		 * the header.  This is a knob for backward compatibility
-		 * with FreeBSD 7.2R or prior.
-		 */
-		if (sc->gif_options & GIF_ACCEPT_REVETHIP) {
-			if (eip->eip_resvl != ETHERIP_VERSION
-			    && eip->eip_ver != ETHERIP_VERSION) {
-				/* discard unknown versions */
-				m_freem(m);
-				return;
-			}
-		} else {
-			if (eip->eip_ver != ETHERIP_VERSION) {
-				/* discard unknown versions */
-				m_freem(m);
-				return;
-			}
+		if (eip->eip_ver != ETHERIP_VERSION) {
+			/* discard unknown versions */
+			m_freem(m);
+			goto drop;
 		}
 		m_adj(m, sizeof(struct etherip_header));
 
@@ -588,7 +646,7 @@ gif_input(m, af, ifp)
 					m->m_flags |= M_BCAST;
 				else
 					m->m_flags |= M_MCAST;
-				ifp->if_imcasts++;
+				if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 			}
 			BRIDGE_INPUT(ifp, m);
 
@@ -613,59 +671,61 @@ gif_input(m, af, ifp)
 		return;
 	}
 
-	ifp->if_ipackets++;
-	ifp->if_ibytes += m->m_pkthdr.len;
+	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(isr, m);
+	return;
+drop:
+	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 }
 
 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
 int
-gif_ioctl(ifp, cmd, data)
-	struct ifnet *ifp;
-	u_long cmd;
-	caddr_t data;
+gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
-	struct gif_softc *sc  = ifp->if_softc;
-	struct ifreq     *ifr = (struct ifreq*)data;
-	int error = 0, size;
-	u_int	options;
+	GIF_RLOCK_TRACKER;
+	struct ifreq *ifr = (struct ifreq*)data;
 	struct sockaddr *dst, *src;
-#ifdef	SIOCSIFMTU /* xxx */
-	u_long mtu;
+	struct gif_softc *sc;
+#ifdef INET
+	struct sockaddr_in *sin = NULL;
 #endif
+#ifdef INET6
+	struct sockaddr_in6 *sin6 = NULL;
+#endif
+	u_int options;
+	int error;
 
 	switch (cmd) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
-		break;
-		
-	case SIOCSIFDSTADDR:
-		break;
-
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
-		break;
-
-#ifdef	SIOCSIFMTU /* xxx */
 	case SIOCGIFMTU:
-		break;
-
+	case SIOCSIFFLAGS:
+		return (0);
 	case SIOCSIFMTU:
-		mtu = ifr->ifr_mtu;
-		if (mtu < GIF_MTU_MIN || mtu > GIF_MTU_MAX)
+		if (ifr->ifr_mtu < GIF_MTU_MIN ||
+		    ifr->ifr_mtu > GIF_MTU_MAX)
 			return (EINVAL);
-		ifp->if_mtu = mtu;
-		break;
-#endif /* SIOCSIFMTU */
-
-#ifdef INET
+		else
+			ifp->if_mtu = ifr->ifr_mtu;
+		return (0);
+	}
+	sx_xlock(&gif_ioctl_sx);
+	sc = ifp->if_softc;
+	if (sc == NULL) {
+		error = ENXIO;
+		goto bad;
+	}
+	error = 0;
+	switch (cmd) {
 	case SIOCSIFPHYADDR:
-#endif
 #ifdef INET6
 	case SIOCSIFPHYADDR_IN6:
-#endif /* INET6 */
-	case SIOCSLIFPHYADDR:
+#endif
+		error = EINVAL;
 		switch (cmd) {
 #ifdef INET
 		case SIOCSIFPHYADDR:
@@ -683,199 +743,169 @@ gif_ioctl(ifp, cmd, data)
 				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
 			break;
 #endif
-		case SIOCSLIFPHYADDR:
-			src = (struct sockaddr *)
-				&(((struct if_laddrreq *)data)->addr);
-			dst = (struct sockaddr *)
-				&(((struct if_laddrreq *)data)->dstaddr);
-			break;
 		default:
-			return EINVAL;
+			goto bad;
 		}
-
 		/* sa_family must be equal */
-		if (src->sa_family != dst->sa_family)
-			return EINVAL;
+		if (src->sa_family != dst->sa_family ||
+		    src->sa_len != dst->sa_len)
+			goto bad;
 
 		/* validate sa_len */
+		/* check sa_family looks sane for the cmd */
 		switch (src->sa_family) {
 #ifdef INET
 		case AF_INET:
 			if (src->sa_len != sizeof(struct sockaddr_in))
-				return EINVAL;
+				goto bad;
+			if (cmd != SIOCSIFPHYADDR) {
+				error = EAFNOSUPPORT;
+				goto bad;
+			}
+			if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
+			    satosin(dst)->sin_addr.s_addr == INADDR_ANY) {
+				error = EADDRNOTAVAIL;
+				goto bad;
+			}
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			if (src->sa_len != sizeof(struct sockaddr_in6))
-				return EINVAL;
-			break;
-#endif
-		default:
-			return EAFNOSUPPORT;
-		}
-		switch (dst->sa_family) {
-#ifdef INET
-		case AF_INET:
-			if (dst->sa_len != sizeof(struct sockaddr_in))
-				return EINVAL;
-			break;
-#endif
-#ifdef INET6
-		case AF_INET6:
-			if (dst->sa_len != sizeof(struct sockaddr_in6))
-				return EINVAL;
+				goto bad;
+			if (cmd != SIOCSIFPHYADDR_IN6) {
+				error = EAFNOSUPPORT;
+				goto bad;
+			}
+			error = EADDRNOTAVAIL;
+			if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
+			    ||
+			    IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
+				goto bad;
+			/*
+			 * Check validity of the scope zone ID of the
+			 * addresses, and convert it into the kernel
+			 * internal form if necessary.
+			 */
+			error = sa6_embedscope(satosin6(src), 0);
+			if (error != 0)
+				goto bad;
+			error = sa6_embedscope(satosin6(dst), 0);
+			if (error != 0)
+				goto bad;
 			break;
 #endif
 		default:
-			return EAFNOSUPPORT;
-		}
-
-		/* check sa_family looks sane for the cmd */
-		switch (cmd) {
-		case SIOCSIFPHYADDR:
-			if (src->sa_family == AF_INET)
-				break;
-			return EAFNOSUPPORT;
-#ifdef INET6
-		case SIOCSIFPHYADDR_IN6:
-			if (src->sa_family == AF_INET6)
-				break;
-			return EAFNOSUPPORT;
-#endif /* INET6 */
-		case SIOCSLIFPHYADDR:
-			/* checks done in the above */
-			break;
+			error = EAFNOSUPPORT;
+			goto bad;
 		}
-
-		error = gif_set_tunnel(GIF2IFP(sc), src, dst);
+		error = gif_set_tunnel(ifp, src, dst);
 		break;
-
-#ifdef SIOCDIFPHYADDR
 	case SIOCDIFPHYADDR:
-		gif_delete_tunnel(GIF2IFP(sc));
+		gif_delete_tunnel(ifp);
 		break;
-#endif
-			
 	case SIOCGIFPSRCADDR:
+	case SIOCGIFPDSTADDR:
 #ifdef INET6
 	case SIOCGIFPSRCADDR_IN6:
-#endif /* INET6 */
-		if (sc->gif_psrc == NULL) {
+	case SIOCGIFPDSTADDR_IN6:
+#endif
+		if (sc->gif_family == 0) {
 			error = EADDRNOTAVAIL;
-			goto bad;
+			break;
 		}
-		src = sc->gif_psrc;
+		GIF_RLOCK(sc);
 		switch (cmd) {
 #ifdef INET
 		case SIOCGIFPSRCADDR:
-			dst = &ifr->ifr_addr;
-			size = sizeof(ifr->ifr_addr);
+		case SIOCGIFPDSTADDR:
+			if (sc->gif_family != AF_INET) {
+				error = EADDRNOTAVAIL;
+				break;
+			}
+			sin = (struct sockaddr_in *)&ifr->ifr_addr;
+			memset(sin, 0, sizeof(*sin));
+			sin->sin_family = AF_INET;
+			sin->sin_len = sizeof(*sin);
 			break;
-#endif /* INET */
+#endif
 #ifdef INET6
 		case SIOCGIFPSRCADDR_IN6:
-			dst = (struct sockaddr *)
+		case SIOCGIFPDSTADDR_IN6:
+			if (sc->gif_family != AF_INET6) {
+				error = EADDRNOTAVAIL;
+				break;
+			}
+			sin6 = (struct sockaddr_in6 *)
 				&(((struct in6_ifreq *)data)->ifr_addr);
-			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
+			memset(sin6, 0, sizeof(*sin6));
+			sin6->sin6_family = AF_INET6;
+			sin6->sin6_len = sizeof(*sin6);
 			break;
-#endif /* INET6 */
+#endif
 		default:
-			error = EADDRNOTAVAIL;
-			goto bad;
-		}
-		if (src->sa_len > size)
-			return EINVAL;
-		bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
-#ifdef INET6
-		if (dst->sa_family == AF_INET6) {
-			error = sa6_recoverscope((struct sockaddr_in6 *)dst);
-			if (error != 0)
-				return (error);
+			error = EAFNOSUPPORT;
 		}
+		if (error == 0) {
+			switch (cmd) {
+#ifdef INET
+			case SIOCGIFPSRCADDR:
+				sin->sin_addr = sc->gif_iphdr->ip_src;
+				break;
+			case SIOCGIFPDSTADDR:
+				sin->sin_addr = sc->gif_iphdr->ip_dst;
+				break;
 #endif
-		break;
-			
-	case SIOCGIFPDSTADDR:
 #ifdef INET6
-	case SIOCGIFPDSTADDR_IN6:
-#endif /* INET6 */
-		if (sc->gif_pdst == NULL) {
-			error = EADDRNOTAVAIL;
-			goto bad;
+			case SIOCGIFPSRCADDR_IN6:
+				sin6->sin6_addr = sc->gif_ip6hdr->ip6_src;
+				break;
+			case SIOCGIFPDSTADDR_IN6:
+				sin6->sin6_addr = sc->gif_ip6hdr->ip6_dst;
+				break;
+#endif
+			}
 		}
-		src = sc->gif_pdst;
+		GIF_RUNLOCK(sc);
+		if (error != 0)
+			break;
 		switch (cmd) {
 #ifdef INET
+		case SIOCGIFPSRCADDR:
 		case SIOCGIFPDSTADDR:
-			dst = &ifr->ifr_addr;
-			size = sizeof(ifr->ifr_addr);
+			error = prison_if(curthread->td_ucred,
+			    (struct sockaddr *)sin);
+			if (error != 0)
+				memset(sin, 0, sizeof(*sin));
 			break;
-#endif /* INET */
+#endif
 #ifdef INET6
+		case SIOCGIFPSRCADDR_IN6:
 		case SIOCGIFPDSTADDR_IN6:
-			dst = (struct sockaddr *)
-				&(((struct in6_ifreq *)data)->ifr_addr);
-			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
-			break;
-#endif /* INET6 */
-		default:
-			error = EADDRNOTAVAIL;
-			goto bad;
-		}
-		if (src->sa_len > size)
-			return EINVAL;
-		error = prison_if(curthread->td_ucred, src);
-		if (error != 0)
-			return (error);
-		error = prison_if(curthread->td_ucred, dst);
-		if (error != 0)
-			return (error);
-		bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
-#ifdef INET6
-		if (dst->sa_family == AF_INET6) {
-			error = sa6_recoverscope((struct sockaddr_in6 *)dst);
+			error = prison_if(curthread->td_ucred,
+			    (struct sockaddr *)sin6);
+			if (error == 0)
+				error = sa6_recoverscope(sin6);
 			if (error != 0)
-				return (error);
-		}
+				memset(sin6, 0, sizeof(*sin6));
 #endif
-		break;
-
-	case SIOCGLIFPHYADDR:
-		if (sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
-			error = EADDRNOTAVAIL;
-			goto bad;
 		}
-
-		/* copy src */
-		src = sc->gif_psrc;
-		dst = (struct sockaddr *)
-			&(((struct if_laddrreq *)data)->addr);
-		size = sizeof(((struct if_laddrreq *)data)->addr);
-		if (src->sa_len > size)
-			return EINVAL;
-		bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
-
-		/* copy dst */
-		src = sc->gif_pdst;
-		dst = (struct sockaddr *)
-			&(((struct if_laddrreq *)data)->dstaddr);
-		size = sizeof(((struct if_laddrreq *)data)->dstaddr);
-		if (src->sa_len > size)
-			return EINVAL;
-		bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
 		break;
-
-	case SIOCSIFFLAGS:
-		/* if_ioctl() takes care of it */
+	case SIOCGTUNFIB:
+		ifr->ifr_fib = sc->gif_fibnum;
+		break;
+	case SIOCSTUNFIB:
+		if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
+			break;
+		if (ifr->ifr_fib >= rt_numfibs)
+			error = EINVAL;
+		else
+			sc->gif_fibnum = ifr->ifr_fib;
 		break;
-
 	case GIFGOPTS:
 		options = sc->gif_options;
-		error = copyout(&options, ifr->ifr_data,
-				sizeof(options));
+		error = copyout(&options, ifr->ifr_data, sizeof(options));
 		break;
-
 	case GIFSOPTS:
 		if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
 			break;
@@ -887,151 +917,154 @@ gif_ioctl(ifp, cmd, data)
 		else
 			sc->gif_options = options;
 		break;
-
 	default:
 		error = EINVAL;
 		break;
 	}
- bad:
-	return error;
+bad:
+	sx_xunlock(&gif_ioctl_sx);
+	return (error);
 }
 
-/*
- * XXXRW: There's a general event-ordering issue here: the code to check
- * if a given tunnel is already present happens before we perform a
- * potentially blocking setup of the tunnel.  This code needs to be
- * re-ordered so that the check and replacement can be atomic using
- * a mutex.
- */
-int
-gif_set_tunnel(ifp, src, dst)
-	struct ifnet *ifp;
-	struct sockaddr *src;
-	struct sockaddr *dst;
+static void
+gif_detach(struct gif_softc *sc)
 {
-	struct gif_softc *sc = ifp->if_softc;
-	struct gif_softc *sc2;
-	struct sockaddr *osrc, *odst, *sa;
-	int error = 0; 
-
-	mtx_lock(&gif_mtx);
-	LIST_FOREACH(sc2, &V_gif_softc_list, gif_list) {
-		if (sc2 == sc)
-			continue;
-		if (!sc2->gif_pdst || !sc2->gif_psrc)
-			continue;
-		if (sc2->gif_pdst->sa_family != dst->sa_family ||
-		    sc2->gif_pdst->sa_len != dst->sa_len ||
-		    sc2->gif_psrc->sa_family != src->sa_family ||
-		    sc2->gif_psrc->sa_len != src->sa_len)
-			continue;
-
-		/*
-		 * Disallow parallel tunnels unless instructed
-		 * otherwise.
-		 */
-		if (!V_parallel_tunnels &&
-		    bcmp(sc2->gif_pdst, dst, dst->sa_len) == 0 &&
-		    bcmp(sc2->gif_psrc, src, src->sa_len) == 0) {
-			error = EADDRNOTAVAIL;
-			mtx_unlock(&gif_mtx);
-			goto bad;
-		}
 
-		/* XXX both end must be valid? (I mean, not 0.0.0.0) */
-	}
-	mtx_unlock(&gif_mtx);
+	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
+	if (sc->gif_ecookie != NULL)
+		encap_detach(sc->gif_ecookie);
+	sc->gif_ecookie = NULL;
+}
+
+static int
+gif_attach(struct gif_softc *sc, int af)
+{
 
-	/* XXX we can detach from both, but be polite just in case */
-	if (sc->gif_psrc)
-		switch (sc->gif_psrc->sa_family) {
+	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
+	switch (af) {
 #ifdef INET
-		case AF_INET:
-			(void)in_gif_detach(sc);
-			break;
+	case AF_INET:
+		return (in_gif_attach(sc));
 #endif
 #ifdef INET6
-		case AF_INET6:
-			(void)in6_gif_detach(sc);
-			break;
+	case AF_INET6:
+		return (in6_gif_attach(sc));
 #endif
-		}
-
-	osrc = sc->gif_psrc;
-	sa = (struct sockaddr *)malloc(src->sa_len, M_IFADDR, M_WAITOK);
-	bcopy((caddr_t)src, (caddr_t)sa, src->sa_len);
-	sc->gif_psrc = sa;
+	}
+	return (EAFNOSUPPORT);
+}
 
-	odst = sc->gif_pdst;
-	sa = (struct sockaddr *)malloc(dst->sa_len, M_IFADDR, M_WAITOK);
-	bcopy((caddr_t)dst, (caddr_t)sa, dst->sa_len);
-	sc->gif_pdst = sa;
+static int
+gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
+{
+	struct gif_softc *sc = ifp->if_softc;
+	struct gif_softc *tsc;
+#ifdef INET
+	struct ip *ip;
+#endif
+#ifdef INET6
+	struct ip6_hdr *ip6;
+#endif
+	void *hdr;
+	int error = 0;
 
-	switch (sc->gif_psrc->sa_family) {
+	if (sc == NULL)
+		return (ENXIO);
+	/* Disallow parallel tunnels unless instructed otherwise. */
+	if (V_parallel_tunnels == 0) {
+		GIF_LIST_LOCK();
+		LIST_FOREACH(tsc, &V_gif_softc_list, gif_list) {
+			if (tsc == sc || tsc->gif_family != src->sa_family)
+				continue;
+#ifdef INET
+			if (tsc->gif_family == AF_INET &&
+			    tsc->gif_iphdr->ip_src.s_addr ==
+			    satosin(src)->sin_addr.s_addr &&
+			    tsc->gif_iphdr->ip_dst.s_addr ==
+			    satosin(dst)->sin_addr.s_addr) {
+				error = EADDRNOTAVAIL;
+				GIF_LIST_UNLOCK();
+				goto bad;
+			}
+#endif
+#ifdef INET6
+			if (tsc->gif_family == AF_INET6 &&
+			    IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_src,
+			    &satosin6(src)->sin6_addr) &&
+			    IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_dst,
+			    &satosin6(dst)->sin6_addr)) {
+				error = EADDRNOTAVAIL;
+				GIF_LIST_UNLOCK();
+				goto bad;
+			}
+#endif
+		}
+		GIF_LIST_UNLOCK();
+	}
+	switch (src->sa_family) {
 #ifdef INET
 	case AF_INET:
-		error = in_gif_attach(sc);
+		hdr = ip = malloc(sizeof(struct ip), M_GIF,
+		    M_WAITOK | M_ZERO);
+		ip->ip_src.s_addr = satosin(src)->sin_addr.s_addr;
+		ip->ip_dst.s_addr = satosin(dst)->sin_addr.s_addr;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
-		/*
-		 * Check validity of the scope zone ID of the addresses, and
-		 * convert it into the kernel internal form if necessary.
-		 */
-		error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_psrc, 0);
-		if (error != 0)
-			break;
-		error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_pdst, 0);
-		if (error != 0)
-			break;
-		error = in6_gif_attach(sc);
+		hdr = ip6 = malloc(sizeof(struct ip6_hdr), M_GIF,
+		    M_WAITOK | M_ZERO);
+		ip6->ip6_src = satosin6(src)->sin6_addr;
+		ip6->ip6_dst = satosin6(dst)->sin6_addr;
+		ip6->ip6_vfc = IPV6_VERSION;
 		break;
 #endif
+	default:
+		return (EAFNOSUPPORT);
 	}
-	if (error) {
-		/* rollback */
-		free((caddr_t)sc->gif_psrc, M_IFADDR);
-		free((caddr_t)sc->gif_pdst, M_IFADDR);
-		sc->gif_psrc = osrc;
-		sc->gif_pdst = odst;
-		goto bad;
-	}
-
-	if (osrc)
-		free((caddr_t)osrc, M_IFADDR);
-	if (odst)
-		free((caddr_t)odst, M_IFADDR);
 
- bad:
-	if (sc->gif_psrc && sc->gif_pdst)
+	if (sc->gif_family != src->sa_family)
+		gif_detach(sc);
+	if (sc->gif_family == 0 ||
+	    sc->gif_family != src->sa_family)
+		error = gif_attach(sc, src->sa_family);
+
+	GIF_WLOCK(sc);
+	if (sc->gif_family != 0)
+		free(sc->gif_hdr, M_GIF);
+	sc->gif_family = src->sa_family;
+	sc->gif_hdr = hdr;
+	GIF_WUNLOCK(sc);
+#if defined(INET) || defined(INET6)
+bad:
+#endif
+	if (error == 0 && sc->gif_family != 0) {
 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
-	else
+		if_link_state_change(ifp, LINK_STATE_UP);
+	} else {
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-
-	return error;
+		if_link_state_change(ifp, LINK_STATE_DOWN);
+	}
+	return (error);
 }
 
-void
-gif_delete_tunnel(ifp)
-	struct ifnet *ifp;
+static void
+gif_delete_tunnel(struct ifnet *ifp)
 {
 	struct gif_softc *sc = ifp->if_softc;
+	int family;
 
-	if (sc->gif_psrc) {
-		free((caddr_t)sc->gif_psrc, M_IFADDR);
-		sc->gif_psrc = NULL;
-	}
-	if (sc->gif_pdst) {
-		free((caddr_t)sc->gif_pdst, M_IFADDR);
-		sc->gif_pdst = NULL;
+	if (sc == NULL)
+		return;
+
+	GIF_WLOCK(sc);
+	family = sc->gif_family;
+	sc->gif_family = 0;
+	GIF_WUNLOCK(sc);
+	if (family != 0) {
+		gif_detach(sc);
+		free(sc->gif_hdr, M_GIF);
 	}
-	/* it is safe to detach from both */
-#ifdef INET
-	(void)in_gif_detach(sc);
-#endif
-#ifdef INET6
-	(void)in6_gif_detach(sc);
-#endif
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+	if_link_state_change(ifp, LINK_STATE_DOWN);
 }
diff --git a/freebsd/sys/net/if_gif.h b/freebsd/sys/net/if_gif.h
index a2f214c5..28da85bd 100644
--- a/freebsd/sys/net/if_gif.h
+++ b/freebsd/sys/net/if_gif.h
@@ -30,21 +30,17 @@
  * SUCH DAMAGE.
  */
 
-/*
- * if_gif.h
- */
-
 #ifndef _NET_IF_GIF_H_
 #define _NET_IF_GIF_H_
 
-
 #ifdef _KERNEL
 #include <rtems/bsd/local/opt_inet.h>
 #include <rtems/bsd/local/opt_inet6.h>
 
 #include <netinet/in.h>
-/* xxx sigh, why route have struct route instead of pointer? */
 
+struct ip;
+struct ip6_hdr;
 struct encaptab;
 
 extern	void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp,
@@ -56,44 +52,44 @@ extern	void (*ng_gif_attach_p)(struct ifnet *ifp);
 extern	void (*ng_gif_detach_p)(struct ifnet *ifp);
 
 struct gif_softc {
-	struct ifnet	*gif_ifp;
-	struct mtx	gif_mtx;
-	struct sockaddr	*gif_psrc; /* Physical src addr */
-	struct sockaddr	*gif_pdst; /* Physical dst addr */
+	struct ifnet		*gif_ifp;
+	struct rmlock		gif_lock;
+	const struct encaptab	*gif_ecookie;
+	int			gif_family;
+	int			gif_flags;
+	u_int			gif_fibnum;
+	u_int			gif_options;
+	void			*gif_netgraph;	/* netgraph node info */
 	union {
-		struct route  gifscr_ro;    /* xxx */
+		void		*hdr;
+		struct ip	*iphdr;
 #ifdef INET6
-		struct route_in6 gifscr_ro6; /* xxx */
+		struct ip6_hdr	*ip6hdr;
 #endif
-	} gifsc_gifscr;
-	int		gif_flags;
-	u_int		gif_fibnum;
-	const struct encaptab *encap_cookie4;
-	const struct encaptab *encap_cookie6;
-	void		*gif_netgraph;	/* ng_gif(4) netgraph node info */
-	u_int		gif_options;
-	LIST_ENTRY(gif_softc) gif_list; /* all gif's are linked */
+	} gif_uhdr;
+	LIST_ENTRY(gif_softc)	gif_list; /* all gif's are linked */
 };
 #define	GIF2IFP(sc)	((sc)->gif_ifp)
-#define	GIF_LOCK_INIT(sc)	mtx_init(&(sc)->gif_mtx, "gif softc",	\
-				     NULL, MTX_DEF)
-#define	GIF_LOCK_DESTROY(sc)	mtx_destroy(&(sc)->gif_mtx)
-#define	GIF_LOCK(sc)		mtx_lock(&(sc)->gif_mtx)
-#define	GIF_UNLOCK(sc)		mtx_unlock(&(sc)->gif_mtx)
-#define	GIF_LOCK_ASSERT(sc)	mtx_assert(&(sc)->gif_mtx, MA_OWNED)
-
-#define gif_ro gifsc_gifscr.gifscr_ro
+#define	GIF_LOCK_INIT(sc)	rm_init(&(sc)->gif_lock, "gif softc")
+#define	GIF_LOCK_DESTROY(sc)	rm_destroy(&(sc)->gif_lock)
+#define	GIF_RLOCK_TRACKER	struct rm_priotracker gif_tracker
+#define	GIF_RLOCK(sc)		rm_rlock(&(sc)->gif_lock, &gif_tracker)
+#define	GIF_RUNLOCK(sc)		rm_runlock(&(sc)->gif_lock, &gif_tracker)
+#define	GIF_RLOCK_ASSERT(sc)	rm_assert(&(sc)->gif_lock, RA_RLOCKED)
+#define	GIF_WLOCK(sc)		rm_wlock(&(sc)->gif_lock)
+#define	GIF_WUNLOCK(sc)		rm_wunlock(&(sc)->gif_lock)
+#define	GIF_WLOCK_ASSERT(sc)	rm_assert(&(sc)->gif_lock, RA_WLOCKED)
+
+#define	gif_iphdr	gif_uhdr.iphdr
+#define	gif_hdr		gif_uhdr.hdr
 #ifdef INET6
-#define gif_ro6 gifsc_gifscr.gifscr_ro6
+#define	gif_ip6hdr	gif_uhdr.ip6hdr
 #endif
 
 #define GIF_MTU		(1280)	/* Default MTU */
 #define	GIF_MTU_MIN	(1280)	/* Minimum MTU */
 #define	GIF_MTU_MAX	(8192)	/* Maximum MTU */
 
-#define	MTAG_GIF	1080679712
-#define	MTAG_GIF_CALLED	0
-
 struct etherip_header {
 #if BYTE_ORDER == LITTLE_ENDIAN
 	u_int	eip_resvl:4,	/* reserved */
@@ -111,20 +107,26 @@ struct etherip_header {
 #define	ETHERIP_ALIGN		2
 
 /* Prototypes */
-void gif_input(struct mbuf *, int, struct ifnet *);
-int gif_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+void gif_input(struct mbuf *, struct ifnet *, int, uint8_t);
+int gif_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
 	       struct route *);
-int gif_ioctl(struct ifnet *, u_long, caddr_t);
-int gif_set_tunnel(struct ifnet *, struct sockaddr *, struct sockaddr *);
-void gif_delete_tunnel(struct ifnet *);
 int gif_encapcheck(const struct mbuf *, int, int, void *);
+#ifdef INET
+int in_gif_output(struct ifnet *, struct mbuf *, int, uint8_t);
+int in_gif_encapcheck(const struct mbuf *, int, int, void *);
+int in_gif_attach(struct gif_softc *);
+#endif
+#ifdef INET6
+int in6_gif_output(struct ifnet *, struct mbuf *, int, uint8_t);
+int in6_gif_encapcheck(const struct mbuf *, int, int, void *);
+int in6_gif_attach(struct gif_softc *);
+#endif
 #endif /* _KERNEL */
 
 #define GIFGOPTS	_IOWR('i', 150, struct ifreq)
 #define GIFSOPTS	_IOW('i', 151, struct ifreq)
 
-#define	GIF_ACCEPT_REVETHIP	0x0001
-#define	GIF_SEND_REVETHIP	0x0010
-#define	GIF_OPTMASK		(GIF_ACCEPT_REVETHIP|GIF_SEND_REVETHIP)
+#define	GIF_IGNORE_SOURCE	0x0002
+#define	GIF_OPTMASK		(GIF_IGNORE_SOURCE)
 
 #endif /* _NET_IF_GIF_H_ */
diff --git a/freebsd/sys/net/if_gre.c b/freebsd/sys/net/if_gre.c
index b7e0bd15..68b515ea 100644
--- a/freebsd/sys/net/if_gre.c
+++ b/freebsd/sys/net/if_gre.c
@@ -1,10 +1,8 @@
 #include <machine/rtems-bsd-kernel-space.h>
 
-/*	$NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ */
-/*	 $FreeBSD$ */
-
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -32,24 +30,20 @@
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $
  */
 
-/*
- * Encapsulate L3 protocols into IP
- * See RFC 2784 (successor of RFC 1701 and 1702) for more details.
- * If_gre is compatible with Cisco GRE tunnels, so you can
- * have a NetBSD box as the other end of a tunnel interface of a Cisco
- * router. See gre(4) for more details.
- * Also supported:  IP in IP encaps (proto 55) as of RFC 2004
- */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
 
-#include <rtems/bsd/local/opt_atalk.h>
 #include <rtems/bsd/local/opt_inet.h>
 #include <rtems/bsd/local/opt_inet6.h>
 
 #include <rtems/bsd/sys/param.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
 #include <sys/libkern.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
@@ -57,97 +51,76 @@
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
+#include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
+#include <sys/sx.h>
 #include <sys/sysctl.h>
+#include <sys/syslog.h>
 #include <sys/systm.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
-#include <net/route.h>
+#include <net/netisr.h>
 #include <net/vnet.h>
+#include <net/route.h>
 
-#ifdef INET
 #include <netinet/in.h>
+#ifdef INET
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
-#include <netinet/ip_gre.h>
 #include <netinet/ip_var.h>
-#include <netinet/ip_encap.h>
-#else
-#error "Huh? if_gre without inet?"
 #endif
 
-#include <net/bpf.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/scope6_var.h>
+#endif
 
+#include <netinet/ip_encap.h>
+#include <net/bpf.h>
 #include <net/if_gre.h>
 
-/*
- * It is not easy to calculate the right value for a GRE MTU.
- * We leave this task to the admin and use the same default that
- * other vendors use.
- */
-#define GREMTU	1476
-
-#define GRENAME	"gre"
-
-#define	MTAG_COOKIE_GRE		1307983903
-#define	MTAG_GRE_NESTING	1
-struct mtag_gre_nesting {
-	uint16_t	count;
-	uint16_t	max;
-	struct ifnet	*ifp[];
-};
-
-/*
- * gre_mtx protects all global variables in if_gre.c.
- * XXX: gre_softc data not protected yet.
- */
-struct mtx gre_mtx;
-static MALLOC_DEFINE(M_GRE, GRENAME, "Generic Routing Encapsulation");
-
-struct gre_softc_head gre_softc_list;
+#include <machine/in_cksum.h>
+#include <security/mac/mac_framework.h>
+
+#define	GREMTU			1500
+static const char grename[] = "gre";
+static MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation");
+static VNET_DEFINE(struct mtx, gre_mtx);
+#define	V_gre_mtx	VNET(gre_mtx)
+#define	GRE_LIST_LOCK_INIT(x)		mtx_init(&V_gre_mtx, "gre_mtx", NULL, \
+					    MTX_DEF)
+#define	GRE_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_gre_mtx)
+#define	GRE_LIST_LOCK(x)		mtx_lock(&V_gre_mtx)
+#define	GRE_LIST_UNLOCK(x)		mtx_unlock(&V_gre_mtx)
+
+static VNET_DEFINE(LIST_HEAD(, gre_softc), gre_softc_list);
+#define	V_gre_softc_list	VNET(gre_softc_list)
+static struct sx gre_ioctl_sx;
+SX_SYSINIT(gre_ioctl_sx, &gre_ioctl_sx, "gre_ioctl");
 
 static int	gre_clone_create(struct if_clone *, int, caddr_t);
 static void	gre_clone_destroy(struct ifnet *);
-static int	gre_ioctl(struct ifnet *, u_long, caddr_t);
-static int	gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
-		    struct route *ro);
-
-IFC_SIMPLE_DECLARE(gre, 0);
+static VNET_DEFINE(struct if_clone *, gre_cloner);
+#define	V_gre_cloner	VNET(gre_cloner)
 
-static int gre_compute_route(struct gre_softc *sc);
-
-static void	greattach(void);
+static void	gre_qflush(struct ifnet *);
+static int	gre_transmit(struct ifnet *, struct mbuf *);
+static int	gre_ioctl(struct ifnet *, u_long, caddr_t);
+static int	gre_output(struct ifnet *, struct mbuf *,
+		    const struct sockaddr *, struct route *);
 
-#ifdef INET
-extern struct domain inetdomain;
-static const struct protosw in_gre_protosw = {
-	.pr_type =		SOCK_RAW,
-	.pr_domain =		&inetdomain,
-	.pr_protocol =		IPPROTO_GRE,
-	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		gre_input,
-	.pr_output =		(pr_output_t *)rip_output,
-	.pr_ctlinput =		rip_ctlinput,
-	.pr_ctloutput =		rip_ctloutput,
-	.pr_usrreqs =		&rip_usrreqs
-};
-static const struct protosw in_mobile_protosw = {
-	.pr_type =		SOCK_RAW,
-	.pr_domain =		&inetdomain,
-	.pr_protocol =		IPPROTO_MOBILE,
-	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		gre_mobile_input,
-	.pr_output =		(pr_output_t *)rip_output,
-	.pr_ctlinput =		rip_ctlinput,
-	.pr_ctloutput =		rip_ctloutput,
-	.pr_usrreqs =		&rip_usrreqs
-};
-#endif
+static void	gre_updatehdr(struct gre_softc *);
+static int	gre_set_tunnel(struct ifnet *, struct sockaddr *,
+    struct sockaddr *);
+static void	gre_delete_tunnel(struct ifnet *);
 
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0,
@@ -163,805 +136,851 @@ static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0,
  */
 #define MAX_GRE_NEST 1
 #endif
-static int max_gre_nesting = MAX_GRE_NEST;
-SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW,
-    &max_gre_nesting, 0, "Max nested tunnels");
 
-/* ARGSUSED */
+static VNET_DEFINE(int, max_gre_nesting) = MAX_GRE_NEST;
+#define	V_max_gre_nesting	VNET(max_gre_nesting)
+SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
+    &VNET_NAME(max_gre_nesting), 0, "Max nested tunnels");
+
+static void
+vnet_gre_init(const void *unused __unused)
+{
+	LIST_INIT(&V_gre_softc_list);
+	GRE_LIST_LOCK_INIT();
+	V_gre_cloner = if_clone_simple(grename, gre_clone_create,
+	    gre_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_gre_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+    vnet_gre_init, NULL);
+
 static void
-greattach(void)
+vnet_gre_uninit(const void *unused __unused)
 {
 
-	mtx_init(&gre_mtx, "gre_mtx", NULL, MTX_DEF);
-	LIST_INIT(&gre_softc_list);
-	if_clone_attach(&gre_cloner);
+	if_clone_detach(V_gre_cloner);
+	GRE_LIST_LOCK_DESTROY();
 }
+VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+    vnet_gre_uninit, NULL);
 
 static int
-gre_clone_create(ifc, unit, params)
-	struct if_clone *ifc;
-	int unit;
-	caddr_t params;
+gre_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct gre_softc *sc;
 
 	sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO);
-
+#ifndef __rtems__
+	sc->gre_fibnum = curthread->td_proc->p_fibnum;
+#else /* __rtems__ */
+	sc->gre_fibnum = BSD_DEFAULT_FIB;
+#endif /* __rtems__ */
 	GRE2IFP(sc) = if_alloc(IFT_TUNNEL);
-	if (GRE2IFP(sc) == NULL) {
-		free(sc, M_GRE);
-		return (ENOSPC);
-	}
-
+	GRE_LOCK_INIT(sc);
 	GRE2IFP(sc)->if_softc = sc;
-	if_initname(GRE2IFP(sc), ifc->ifc_name, unit);
+	if_initname(GRE2IFP(sc), grename, unit);
 
-	GRE2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen;
-	GRE2IFP(sc)->if_addrlen = 0;
-	GRE2IFP(sc)->if_hdrlen = 24; /* IP + GRE */
-	GRE2IFP(sc)->if_mtu = GREMTU;
+	GRE2IFP(sc)->if_mtu = sc->gre_mtu = GREMTU;
 	GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
 	GRE2IFP(sc)->if_output = gre_output;
 	GRE2IFP(sc)->if_ioctl = gre_ioctl;
-	sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
-	sc->g_proto = IPPROTO_GRE;
-	GRE2IFP(sc)->if_flags |= IFF_LINK0;
-	sc->encap = NULL;
-#ifndef __rtems__
-	sc->gre_fibnum = curthread->td_proc->p_fibnum;
-#else /* __rtems__ */
-	sc->gre_fibnum = BSD_DEFAULT_FIB;
-#endif /* __rtems__ */
-	sc->wccp_ver = WCCP_V1;
-	sc->key = 0;
+	GRE2IFP(sc)->if_transmit = gre_transmit;
+	GRE2IFP(sc)->if_qflush = gre_qflush;
+	GRE2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
+	GRE2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
 	if_attach(GRE2IFP(sc));
 	bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t));
-	mtx_lock(&gre_mtx);
-	LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
-	mtx_unlock(&gre_mtx);
+	GRE_LIST_LOCK();
+	LIST_INSERT_HEAD(&V_gre_softc_list, sc, gre_list);
+	GRE_LIST_UNLOCK();
 	return (0);
 }
 
 static void
-gre_clone_destroy(ifp)
-	struct ifnet *ifp;
+gre_clone_destroy(struct ifnet *ifp)
 {
-	struct gre_softc *sc = ifp->if_softc;
-
-	mtx_lock(&gre_mtx);
-	LIST_REMOVE(sc, sc_list);
-	mtx_unlock(&gre_mtx);
+	struct gre_softc *sc;
 
-#ifdef INET
-	if (sc->encap != NULL)
-		encap_detach(sc->encap);
-#endif
+	sx_xlock(&gre_ioctl_sx);
+	sc = ifp->if_softc;
+	gre_delete_tunnel(ifp);
+	GRE_LIST_LOCK();
+	LIST_REMOVE(sc, gre_list);
+	GRE_LIST_UNLOCK();
 	bpfdetach(ifp);
 	if_detach(ifp);
+	ifp->if_softc = NULL;
+	sx_xunlock(&gre_ioctl_sx);
+
 	if_free(ifp);
+	GRE_LOCK_DESTROY(sc);
 	free(sc, M_GRE);
 }
 
-/*
- * The output routine. Takes a packet and encapsulates it in the protocol
- * given by sc->g_proto. See also RFC 1701 and RFC 2004
- */
 static int
-gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
-	   struct route *ro)
+gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
-	int error = 0;
-	struct gre_softc *sc = ifp->if_softc;
-	struct greip *gh;
-	struct ip *ip;
-	struct m_tag *mtag;
-	struct mtag_gre_nesting *gt;
-	size_t len;
-	u_short gre_ip_id = 0;
-	uint8_t gre_ip_tos = 0;
-	u_int16_t etype = 0;
-	struct mobile_h mob_h;
-	u_int32_t af;
-	int extra = 0, max;
-
-	/*
-	 * gre may cause infinite recursion calls when misconfigured.  High
-	 * nesting level may cause stack exhaustion.  We'll prevent this by
-	 * detecting loops and by introducing upper limit.
-	 */
-	mtag = m_tag_locate(m, MTAG_COOKIE_GRE, MTAG_GRE_NESTING, NULL);
-	if (mtag != NULL) {
-		struct ifnet **ifp2;
-
-		gt = (struct mtag_gre_nesting *)(mtag + 1);
-		gt->count++;
-		if (gt->count > min(gt->max,max_gre_nesting)) {
-			printf("%s: hit maximum recursion limit %u on %s\n",
-				__func__, gt->count - 1, ifp->if_xname);
-			m_freem(m);
-			error = EIO;	/* is there better errno? */
-			goto end;
-		}
-
-		ifp2 = gt->ifp;
-		for (max = gt->count - 1; max > 0; max--) {
-			if (*ifp2 == ifp)
-				break;
-			ifp2++;
-		}
-		if (*ifp2 == ifp) {
-			printf("%s: detected loop with nexting %u on %s\n",
-				__func__, gt->count-1, ifp->if_xname);
-			m_freem(m);
-			error = EIO;	/* is there better errno? */
-			goto end;
-		}
-		*ifp2 = ifp;
+	GRE_RLOCK_TRACKER;
+	struct ifreq *ifr = (struct ifreq *)data;
+	struct sockaddr *src, *dst;
+	struct gre_softc *sc;
+#ifdef INET
+	struct sockaddr_in *sin = NULL;
+#endif
+#ifdef INET6
+	struct sockaddr_in6 *sin6 = NULL;
+#endif
+	uint32_t opt;
+	int error;
 
-	} else {
-		/*
-		 * Given that people should NOT increase max_gre_nesting beyond
-		 * their real needs, we allocate once per packet rather than
-		 * allocating an mtag once per passing through gre.
-		 *
-		 * Note: the sysctl does not actually check for saneness, so we
-		 * limit the maximum numbers of possible recursions here.
-		 */
-		max = imin(max_gre_nesting, 256);
-		/* If someone sets the sysctl <= 0, we want at least 1. */
-		max = imax(max, 1);
-		len = sizeof(struct mtag_gre_nesting) +
-		    max * sizeof(struct ifnet *);
-		mtag = m_tag_alloc(MTAG_COOKIE_GRE, MTAG_GRE_NESTING, len,
-		    M_NOWAIT);
-		if (mtag == NULL) {
-			m_freem(m);
-			error = ENOMEM;
-			goto end;
-		}
-		gt = (struct mtag_gre_nesting *)(mtag + 1);
-		bzero(gt, len);
-		gt->count = 1;
-		gt->max = max;
-		*gt->ifp = ifp;
-		m_tag_prepend(m, mtag);
+	switch (cmd) {
+	case SIOCSIFMTU:
+		 /* XXX: */
+		if (ifr->ifr_mtu < 576)
+			return (EINVAL);
+		break;
+	case SIOCSIFADDR:
+		ifp->if_flags |= IFF_UP;
+	case SIOCSIFFLAGS:
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+		return (0);
+	case GRESADDRS:
+	case GRESADDRD:
+	case GREGADDRS:
+	case GREGADDRD:
+	case GRESPROTO:
+	case GREGPROTO:
+		return (EOPNOTSUPP);
 	}
-
-	if (!((ifp->if_flags & IFF_UP) &&
-	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) ||
-	    sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
-		m_freem(m);
-		error = ENETDOWN;
+	src = dst = NULL;
+	sx_xlock(&gre_ioctl_sx);
+	sc = ifp->if_softc;
+	if (sc == NULL) {
+		error = ENXIO;
 		goto end;
 	}
-
-	gh = NULL;
-	ip = NULL;
-
-	/* BPF writes need to be handled specially. */
-	if (dst->sa_family == AF_UNSPEC) {
-		bcopy(dst->sa_data, &af, sizeof(af));
-		dst->sa_family = af;
-	}
-
-	if (bpf_peers_present(ifp->if_bpf)) {
-		af = dst->sa_family;
-		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
-	}
-
-	if ((ifp->if_flags & IFF_MONITOR) != 0) {
-		m_freem(m);
-		error = ENETDOWN;
+	error = 0;
+	switch (cmd) {
+	case SIOCSIFMTU:
+		GRE_WLOCK(sc);
+		sc->gre_mtu = ifr->ifr_mtu;
+		gre_updatehdr(sc);
+		GRE_WUNLOCK(sc);
 		goto end;
-	}
-
-	m->m_flags &= ~(M_BCAST|M_MCAST);
-
-	if (sc->g_proto == IPPROTO_MOBILE) {
-		if (dst->sa_family == AF_INET) {
-			struct mbuf *m0;
-			int msiz;
-
-			ip = mtod(m, struct ip *);
-
-			/*
-			 * RFC2004 specifies that fragmented diagrams shouldn't
-			 * be encapsulated.
-			 */
-			if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
-				_IF_DROP(&ifp->if_snd);
-				m_freem(m);
-				error = EINVAL;    /* is there better errno? */
-				goto end;
-			}
-			memset(&mob_h, 0, MOB_H_SIZ_L);
-			mob_h.proto = (ip->ip_p) << 8;
-			mob_h.odst = ip->ip_dst.s_addr;
-			ip->ip_dst.s_addr = sc->g_dst.s_addr;
-
-			/*
-			 * If the packet comes from our host, we only change
-			 * the destination address in the IP header.
-			 * Else we also need to save and change the source
-			 */
-			if (in_hosteq(ip->ip_src, sc->g_src)) {
-				msiz = MOB_H_SIZ_S;
-			} else {
-				mob_h.proto |= MOB_H_SBIT;
-				mob_h.osrc = ip->ip_src.s_addr;
-				ip->ip_src.s_addr = sc->g_src.s_addr;
-				msiz = MOB_H_SIZ_L;
-			}
-			mob_h.proto = htons(mob_h.proto);
-			mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
-
-			if ((m->m_data - msiz) < m->m_pktdat) {
-				/* need new mbuf */
-				MGETHDR(m0, M_DONTWAIT, MT_DATA);
-				if (m0 == NULL) {
-					_IF_DROP(&ifp->if_snd);
-					m_freem(m);
-					error = ENOBUFS;
-					goto end;
-				}
-				m0->m_next = m;
-				m->m_data += sizeof(struct ip);
-				m->m_len -= sizeof(struct ip);
-				m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
-				m0->m_len = msiz + sizeof(struct ip);
-				m0->m_data += max_linkhdr;
-				memcpy(mtod(m0, caddr_t), (caddr_t)ip,
-				       sizeof(struct ip));
-				m = m0;
-			} else {  /* we have some space left in the old one */
-				m->m_data -= msiz;
-				m->m_len += msiz;
-				m->m_pkthdr.len += msiz;
-				bcopy(ip, mtod(m, caddr_t),
-					sizeof(struct ip));
-			}
-			ip = mtod(m, struct ip *);
-			memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz);
-			ip->ip_len = ntohs(ip->ip_len) + msiz;
-		} else {  /* AF_INET */
-			_IF_DROP(&ifp->if_snd);
-			m_freem(m);
-			error = EINVAL;
-			goto end;
-		}
-	} else if (sc->g_proto == IPPROTO_GRE) {
-		switch (dst->sa_family) {
-		case AF_INET:
-			ip = mtod(m, struct ip *);
-			gre_ip_tos = ip->ip_tos;
-			gre_ip_id = ip->ip_id;
-			if (sc->wccp_ver == WCCP_V2) {
-				extra = sizeof(uint32_t);
-				etype =  WCCP_PROTOCOL_TYPE;
-			} else {
-				etype = ETHERTYPE_IP;
-			}
-			break;
+	case SIOCSIFPHYADDR:
 #ifdef INET6
-		case AF_INET6:
-			gre_ip_id = ip_newid();
-			etype = ETHERTYPE_IPV6;
+	case SIOCSIFPHYADDR_IN6:
+#endif
+		error = EINVAL;
+		switch (cmd) {
+#ifdef INET
+		case SIOCSIFPHYADDR:
+			src = (struct sockaddr *)
+				&(((struct in_aliasreq *)data)->ifra_addr);
+			dst = (struct sockaddr *)
+				&(((struct in_aliasreq *)data)->ifra_dstaddr);
 			break;
 #endif
-#ifdef NETATALK
-		case AF_APPLETALK:
-			etype = ETHERTYPE_ATALK;
+#ifdef INET6
+		case SIOCSIFPHYADDR_IN6:
+			src = (struct sockaddr *)
+				&(((struct in6_aliasreq *)data)->ifra_addr);
+			dst = (struct sockaddr *)
+				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
 			break;
 #endif
 		default:
-			_IF_DROP(&ifp->if_snd);
-			m_freem(m);
 			error = EAFNOSUPPORT;
 			goto end;
 		}
-			
-		/* Reserve space for GRE header + optional GRE key */
-		int hdrlen = sizeof(struct greip) + extra;
-		if (sc->key)
-			hdrlen += sizeof(uint32_t);
-		M_PREPEND(m, hdrlen, M_DONTWAIT);
-	} else {
-		_IF_DROP(&ifp->if_snd);
-		m_freem(m);
-		error = EINVAL;
-		goto end;
-	}
-
-	if (m == NULL) {	/* mbuf allocation failed */
-		_IF_DROP(&ifp->if_snd);
-		error = ENOBUFS;
-		goto end;
-	}
-
-	M_SETFIB(m, sc->gre_fibnum); /* The envelope may use a different FIB */
-
-	gh = mtod(m, struct greip *);
-	if (sc->g_proto == IPPROTO_GRE) {
-		uint32_t *options = gh->gi_options;
-
-		memset((void *)gh, 0, sizeof(struct greip) + extra);
-		gh->gi_ptype = htons(etype);
-		gh->gi_flags = 0;
-
-		/* Add key option */
-		if (sc->key)
-		{
-			gh->gi_flags |= htons(GRE_KP);
-			*(options++) = htonl(sc->key);
-		}
-	}
-
-	gh->gi_pr = sc->g_proto;
-	if (sc->g_proto != IPPROTO_MOBILE) {
-		gh->gi_src = sc->g_src;
-		gh->gi_dst = sc->g_dst;
-		((struct ip*)gh)->ip_v = IPPROTO_IPV4;
-		((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2;
-		((struct ip*)gh)->ip_ttl = GRE_TTL;
-		((struct ip*)gh)->ip_tos = gre_ip_tos;
-		((struct ip*)gh)->ip_id = gre_ip_id;
-		gh->gi_len = m->m_pkthdr.len;
-	}
-
-	ifp->if_opackets++;
-	ifp->if_obytes += m->m_pkthdr.len;
-	/*
-	 * Send it off and with IP_FORWARD flag to prevent it from
-	 * overwriting the ip_id again.  ip_id is already set to the
-	 * ip_id of the encapsulated packet.
-	 */
-	error = ip_output(m, NULL, &sc->route, IP_FORWARDING,
-	    (struct ip_moptions *)NULL, (struct inpcb *)NULL);
-  end:
-	if (error)
-		ifp->if_oerrors++;
-	return (error);
-}
-
-static int
-gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
-{
-	struct ifreq *ifr = (struct ifreq *)data;
-	struct if_laddrreq *lifr = (struct if_laddrreq *)data;
-	struct in_aliasreq *aifr = (struct in_aliasreq *)data;
-	struct gre_softc *sc = ifp->if_softc;
-	int s;
-	struct sockaddr_in si;
-	struct sockaddr *sa = NULL;
-	int error, adj;
-	struct sockaddr_in sp, sm, dp, dm;
-	uint32_t key;
-
-	error = 0;
-	adj = 0;
+		/* sa_family must be equal */
+		if (src->sa_family != dst->sa_family ||
+		    src->sa_len != dst->sa_len)
+			goto end;
 
-	s = splnet();
-	switch (cmd) {
-	case SIOCSIFADDR:
-		ifp->if_flags |= IFF_UP;
-		break;
-	case SIOCSIFDSTADDR:
-		break;
-	case SIOCSIFFLAGS:
-		/*
-		 * XXXRW: Isn't this priv_check() redundant to the ifnet
-		 * layer check?
-		 */
-		if ((error = priv_check(curthread, PRIV_NET_SETIFFLAGS)) != 0)
-			break;
-		if ((ifr->ifr_flags & IFF_LINK0) != 0)
-			sc->g_proto = IPPROTO_GRE;
-		else
-			sc->g_proto = IPPROTO_MOBILE;
-		if ((ifr->ifr_flags & IFF_LINK2) != 0)
-			sc->wccp_ver = WCCP_V2;
-		else
-			sc->wccp_ver = WCCP_V1;
-		goto recompute;
-	case SIOCSIFMTU:
-		/*
-		 * XXXRW: Isn't this priv_check() redundant to the ifnet
-		 * layer check?
-		 */
-		if ((error = priv_check(curthread, PRIV_NET_SETIFMTU)) != 0)
-			break;
-		if (ifr->ifr_mtu < 576) {
-			error = EINVAL;
-			break;
-		}
-		ifp->if_mtu = ifr->ifr_mtu;
-		break;
-	case SIOCGIFMTU:
-		ifr->ifr_mtu = GRE2IFP(sc)->if_mtu;
-		break;
-	case SIOCADDMULTI:
-		/*
-		 * XXXRW: Isn't this priv_checkr() redundant to the ifnet
-		 * layer check?
-		 */
-		if ((error = priv_check(curthread, PRIV_NET_ADDMULTI)) != 0)
-			break;
-		if (ifr == 0) {
-			error = EAFNOSUPPORT;
-			break;
-		}
-		switch (ifr->ifr_addr.sa_family) {
+		/* validate sa_len */
+		switch (src->sa_family) {
 #ifdef INET
 		case AF_INET:
+			if (src->sa_len != sizeof(struct sockaddr_in))
+				goto end;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
+			if (src->sa_len != sizeof(struct sockaddr_in6))
+				goto end;
 			break;
 #endif
 		default:
 			error = EAFNOSUPPORT;
-			break;
+			goto end;
 		}
-		break;
-	case SIOCDELMULTI:
-		/*
-		 * XXXRW: Isn't this priv_check() redundant to the ifnet
-		 * layer check?
-		 */
-		if ((error = priv_check(curthread, PRIV_NET_DELIFGROUP)) != 0)
-			break;
-		if (ifr == 0) {
-			error = EAFNOSUPPORT;
-			break;
+		/* check sa_family looks sane for the cmd */
+		error = EAFNOSUPPORT;
+		switch (cmd) {
+#ifdef INET
+		case SIOCSIFPHYADDR:
+			if (src->sa_family == AF_INET)
+				break;
+			goto end;
+#endif
+#ifdef INET6
+		case SIOCSIFPHYADDR_IN6:
+			if (src->sa_family == AF_INET6)
+				break;
+			goto end;
+#endif
 		}
-		switch (ifr->ifr_addr.sa_family) {
+		error = EADDRNOTAVAIL;
+		switch (src->sa_family) {
 #ifdef INET
 		case AF_INET:
+			if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
+			    satosin(dst)->sin_addr.s_addr == INADDR_ANY)
+				goto end;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
-			break;
+			if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
+			    ||
+			    IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
+				goto end;
+			/*
+			 * Check validity of the scope zone ID of the
+			 * addresses, and convert it into the kernel
+			 * internal form if necessary.
+			 */
+			error = sa6_embedscope(satosin6(src), 0);
+			if (error != 0)
+				goto end;
+			error = sa6_embedscope(satosin6(dst), 0);
+			if (error != 0)
+				goto end;
 #endif
-		default:
-			error = EAFNOSUPPORT;
-			break;
 		}
+		error = gre_set_tunnel(ifp, src, dst);
 		break;
-	case GRESPROTO:
-		/*
-		 * XXXRW: Isn't this priv_check() redundant to the ifnet
-		 * layer check?
-		 */
-		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
-			break;
-		sc->g_proto = ifr->ifr_flags;
-		switch (sc->g_proto) {
-		case IPPROTO_GRE:
-			ifp->if_flags |= IFF_LINK0;
-			break;
-		case IPPROTO_MOBILE:
-			ifp->if_flags &= ~IFF_LINK0;
-			break;
-		default:
-			error = EPROTONOSUPPORT;
-			break;
-		}
-		goto recompute;
-	case GREGPROTO:
-		ifr->ifr_flags = sc->g_proto;
+	case SIOCDIFPHYADDR:
+		gre_delete_tunnel(ifp);
 		break;
-	case GRESADDRS:
-	case GRESADDRD:
-		error = priv_check(curthread, PRIV_NET_GRE);
-		if (error)
-			return (error);
-		/*
-		 * set tunnel endpoints, compute a less specific route
-		 * to the remote end and mark if as up
-		 */
-		sa = &ifr->ifr_addr;
-		if (cmd == GRESADDRS)
-			sc->g_src = (satosin(sa))->sin_addr;
-		if (cmd == GRESADDRD)
-			sc->g_dst = (satosin(sa))->sin_addr;
-	recompute:
-#ifdef INET
-		if (sc->encap != NULL) {
-			encap_detach(sc->encap);
-			sc->encap = NULL;
-		}
+	case SIOCGIFPSRCADDR:
+	case SIOCGIFPDSTADDR:
+#ifdef INET6
+	case SIOCGIFPSRCADDR_IN6:
+	case SIOCGIFPDSTADDR_IN6:
 #endif
-		if ((sc->g_src.s_addr != INADDR_ANY) &&
-		    (sc->g_dst.s_addr != INADDR_ANY)) {
-			bzero(&sp, sizeof(sp));
-			bzero(&sm, sizeof(sm));
-			bzero(&dp, sizeof(dp));
-			bzero(&dm, sizeof(dm));
-			sp.sin_len = sm.sin_len = dp.sin_len = dm.sin_len =
-			    sizeof(struct sockaddr_in);
-			sp.sin_family = sm.sin_family = dp.sin_family =
-			    dm.sin_family = AF_INET;
-			sp.sin_addr = sc->g_src;
-			dp.sin_addr = sc->g_dst;
-			sm.sin_addr.s_addr = dm.sin_addr.s_addr =
-			    INADDR_BROADCAST;
-#ifdef INET
-			sc->encap = encap_attach(AF_INET, sc->g_proto,
-			    sintosa(&sp), sintosa(&sm), sintosa(&dp),
-			    sintosa(&dm), (sc->g_proto == IPPROTO_GRE) ?
-				&in_gre_protosw : &in_mobile_protosw, sc);
-			if (sc->encap == NULL)
-				printf("%s: unable to attach encap\n",
-				    if_name(GRE2IFP(sc)));
-#endif
-			if (sc->route.ro_rt != 0) /* free old route */
-				RTFREE(sc->route.ro_rt);
-			if (gre_compute_route(sc) == 0)
-				ifp->if_drv_flags |= IFF_DRV_RUNNING;
-			else
-				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-		}
-		break;
-	case GREGADDRS:
-		memset(&si, 0, sizeof(si));
-		si.sin_family = AF_INET;
-		si.sin_len = sizeof(struct sockaddr_in);
-		si.sin_addr.s_addr = sc->g_src.s_addr;
-		sa = sintosa(&si);
-		error = prison_if(curthread->td_ucred, sa);
-		if (error != 0)
-			break;
-		ifr->ifr_addr = *sa;
-		break;
-	case GREGADDRD:
-		memset(&si, 0, sizeof(si));
-		si.sin_family = AF_INET;
-		si.sin_len = sizeof(struct sockaddr_in);
-		si.sin_addr.s_addr = sc->g_dst.s_addr;
-		sa = sintosa(&si);
-		error = prison_if(curthread->td_ucred, sa);
-		if (error != 0)
-			break;
-		ifr->ifr_addr = *sa;
-		break;
-	case SIOCSIFPHYADDR:
-		/*
-		 * XXXRW: Isn't this priv_check() redundant to the ifnet
-		 * layer check?
-		 */
-		if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
-			break;
-		if (aifr->ifra_addr.sin_family != AF_INET ||
-		    aifr->ifra_dstaddr.sin_family != AF_INET) {
-			error = EAFNOSUPPORT;
-			break;
-		}
-		if (aifr->ifra_addr.sin_len != sizeof(si) ||
-		    aifr->ifra_dstaddr.sin_len != sizeof(si)) {
-			error = EINVAL;
-			break;
-		}
-		sc->g_src = aifr->ifra_addr.sin_addr;
-		sc->g_dst = aifr->ifra_dstaddr.sin_addr;
-		goto recompute;
-	case SIOCSLIFPHYADDR:
-		/*
-		 * XXXRW: Isn't this priv_check() redundant to the ifnet
-		 * layer check?
-		 */
-		if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
-			break;
-		if (lifr->addr.ss_family != AF_INET ||
-		    lifr->dstaddr.ss_family != AF_INET) {
-			error = EAFNOSUPPORT;
-			break;
-		}
-		if (lifr->addr.ss_len != sizeof(si) ||
-		    lifr->dstaddr.ss_len != sizeof(si)) {
-			error = EINVAL;
-			break;
-		}
-		sc->g_src = (satosin(&lifr->addr))->sin_addr;
-		sc->g_dst =
-		    (satosin(&lifr->dstaddr))->sin_addr;
-		goto recompute;
-	case SIOCDIFPHYADDR:
-		/*
-		 * XXXRW: Isn't this priv_check() redundant to the ifnet
-		 * layer check?
-		 */
-		if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
-			break;
-		sc->g_src.s_addr = INADDR_ANY;
-		sc->g_dst.s_addr = INADDR_ANY;
-		goto recompute;
-	case SIOCGLIFPHYADDR:
-		if (sc->g_src.s_addr == INADDR_ANY ||
-		    sc->g_dst.s_addr == INADDR_ANY) {
+		if (sc->gre_family == 0) {
 			error = EADDRNOTAVAIL;
 			break;
 		}
-		memset(&si, 0, sizeof(si));
-		si.sin_family = AF_INET;
-		si.sin_len = sizeof(struct sockaddr_in);
-		si.sin_addr.s_addr = sc->g_src.s_addr;
-		error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
-		if (error != 0)
+		GRE_RLOCK(sc);
+		switch (cmd) {
+#ifdef INET
+		case SIOCGIFPSRCADDR:
+		case SIOCGIFPDSTADDR:
+			if (sc->gre_family != AF_INET) {
+				error = EADDRNOTAVAIL;
+				break;
+			}
+			sin = (struct sockaddr_in *)&ifr->ifr_addr;
+			memset(sin, 0, sizeof(*sin));
+			sin->sin_family = AF_INET;
+			sin->sin_len = sizeof(*sin);
 			break;
-		memcpy(&lifr->addr, &si, sizeof(si));
-		si.sin_addr.s_addr = sc->g_dst.s_addr;
-		error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
-		if (error != 0)
+#endif
+#ifdef INET6
+		case SIOCGIFPSRCADDR_IN6:
+		case SIOCGIFPDSTADDR_IN6:
+			if (sc->gre_family != AF_INET6) {
+				error = EADDRNOTAVAIL;
+				break;
+			}
+			sin6 = (struct sockaddr_in6 *)
+				&(((struct in6_ifreq *)data)->ifr_addr);
+			memset(sin6, 0, sizeof(*sin6));
+			sin6->sin6_family = AF_INET6;
+			sin6->sin6_len = sizeof(*sin6);
 			break;
-		memcpy(&lifr->dstaddr, &si, sizeof(si));
-		break;
-	case SIOCGIFPSRCADDR:
+#endif
+		}
+		if (error == 0) {
+			switch (cmd) {
+#ifdef INET
+			case SIOCGIFPSRCADDR:
+				sin->sin_addr = sc->gre_oip.ip_src;
+				break;
+			case SIOCGIFPDSTADDR:
+				sin->sin_addr = sc->gre_oip.ip_dst;
+				break;
+#endif
 #ifdef INET6
-	case SIOCGIFPSRCADDR_IN6:
+			case SIOCGIFPSRCADDR_IN6:
+				sin6->sin6_addr = sc->gre_oip6.ip6_src;
+				break;
+			case SIOCGIFPDSTADDR_IN6:
+				sin6->sin6_addr = sc->gre_oip6.ip6_dst;
+				break;
 #endif
-		if (sc->g_src.s_addr == INADDR_ANY) {
-			error = EADDRNOTAVAIL;
-			break;
+			}
 		}
-		memset(&si, 0, sizeof(si));
-		si.sin_family = AF_INET;
-		si.sin_len = sizeof(struct sockaddr_in);
-		si.sin_addr.s_addr = sc->g_src.s_addr;
-		error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
+		GRE_RUNLOCK(sc);
 		if (error != 0)
 			break;
-		bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
-		break;
-	case SIOCGIFPDSTADDR:
+		switch (cmd) {
+#ifdef INET
+		case SIOCGIFPSRCADDR:
+		case SIOCGIFPDSTADDR:
+			error = prison_if(curthread->td_ucred,
+			    (struct sockaddr *)sin);
+			if (error != 0)
+				memset(sin, 0, sizeof(*sin));
+			break;
+#endif
 #ifdef INET6
-	case SIOCGIFPDSTADDR_IN6:
+		case SIOCGIFPSRCADDR_IN6:
+		case SIOCGIFPDSTADDR_IN6:
+			error = prison_if(curthread->td_ucred,
+			    (struct sockaddr *)sin6);
+			if (error == 0)
+				error = sa6_recoverscope(sin6);
+			if (error != 0)
+				memset(sin6, 0, sizeof(*sin6));
 #endif
-		if (sc->g_dst.s_addr == INADDR_ANY) {
-			error = EADDRNOTAVAIL;
-			break;
 		}
-		memset(&si, 0, sizeof(si));
-		si.sin_family = AF_INET;
-		si.sin_len = sizeof(struct sockaddr_in);
-		si.sin_addr.s_addr = sc->g_dst.s_addr;
-		error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
-		if (error != 0)
+		break;
+	case SIOCGTUNFIB:
+		ifr->ifr_fib = sc->gre_fibnum;
+		break;
+	case SIOCSTUNFIB:
+		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
 			break;
-		bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
+		if (ifr->ifr_fib >= rt_numfibs)
+			error = EINVAL;
+		else
+			sc->gre_fibnum = ifr->ifr_fib;
 		break;
 	case GRESKEY:
-		error = priv_check(curthread, PRIV_NET_GRE);
-		if (error)
-			break;
-		error = copyin(ifr->ifr_data, &key, sizeof(key));
-		if (error)
+		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
 			break;
-		/* adjust MTU for option header */
-		if (key == 0 && sc->key != 0)		/* clear */
-			adj += sizeof(key);
-		else if (key != 0 && sc->key == 0)	/* set */
-			adj -= sizeof(key);
-
-		if (ifp->if_mtu + adj < 576) {
-			error = EINVAL;
+		if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0)
 			break;
+		if (sc->gre_key != opt) {
+			GRE_WLOCK(sc);
+			sc->gre_key = opt;
+			gre_updatehdr(sc);
+			GRE_WUNLOCK(sc);
 		}
-		ifp->if_mtu += adj;
-		sc->key = key;
 		break;
 	case GREGKEY:
-		error = copyout(&sc->key, ifr->ifr_data, sizeof(sc->key));
+		error = copyout(&sc->gre_key, ifr->ifr_data,
+		    sizeof(sc->gre_key));
+		break;
+	case GRESOPTS:
+		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
+			break;
+		if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0)
+			break;
+		if (opt & ~GRE_OPTMASK)
+			error = EINVAL;
+		else {
+			if (sc->gre_options != opt) {
+				GRE_WLOCK(sc);
+				sc->gre_options = opt;
+				gre_updatehdr(sc);
+				GRE_WUNLOCK(sc);
+			}
+		}
 		break;
 
+	case GREGOPTS:
+		error = copyout(&sc->gre_options, ifr->ifr_data,
+		    sizeof(sc->gre_options));
+		break;
 	default:
 		error = EINVAL;
 		break;
 	}
-
-	splx(s);
+end:
+	sx_xunlock(&gre_ioctl_sx);
 	return (error);
 }
 
-/*
- * computes a route to our destination that is not the one
- * which would be taken by ip_output(), as this one will loop back to
- * us. If the interface is p2p as  a--->b, then a routing entry exists
- * If we now send a packet to b (e.g. ping b), this will come down here
- * gets src=a, dst=b tacked on and would from ip_output() sent back to
- * if_gre.
- * Goal here is to compute a route to b that is less specific than
- * a-->b. We know that this one exists as in normal operation we have
- * at least a default route which matches.
- */
+static void
+gre_updatehdr(struct gre_softc *sc)
+{
+	struct grehdr *gh = NULL;
+	uint32_t *opts;
+	uint16_t flags;
+
+	GRE_WLOCK_ASSERT(sc);
+	switch (sc->gre_family) {
+#ifdef INET
+	case AF_INET:
+		sc->gre_hlen = sizeof(struct greip);
+		sc->gre_oip.ip_v = IPPROTO_IPV4;
+		sc->gre_oip.ip_hl = sizeof(struct ip) >> 2;
+		sc->gre_oip.ip_p = IPPROTO_GRE;
+		gh = &sc->gre_gihdr->gi_gre;
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		sc->gre_hlen = sizeof(struct greip6);
+		sc->gre_oip6.ip6_vfc = IPV6_VERSION;
+		sc->gre_oip6.ip6_nxt = IPPROTO_GRE;
+		gh = &sc->gre_gi6hdr->gi6_gre;
+		break;
+#endif
+	default:
+		return;
+	}
+	flags = 0;
+	opts = gh->gre_opts;
+	if (sc->gre_options & GRE_ENABLE_CSUM) {
+		flags |= GRE_FLAGS_CP;
+		sc->gre_hlen += 2 * sizeof(uint16_t);
+		*opts++ = 0;
+	}
+	if (sc->gre_key != 0) {
+		flags |= GRE_FLAGS_KP;
+		sc->gre_hlen += sizeof(uint32_t);
+		*opts++ = htonl(sc->gre_key);
+	}
+	if (sc->gre_options & GRE_ENABLE_SEQ) {
+		flags |= GRE_FLAGS_SP;
+		sc->gre_hlen += sizeof(uint32_t);
+		*opts++ = 0;
+	} else
+		sc->gre_oseq = 0;
+	gh->gre_flags = htons(flags);
+	GRE2IFP(sc)->if_mtu = sc->gre_mtu - sc->gre_hlen;
+}
+
+static void
+gre_detach(struct gre_softc *sc)
+{
+
+	sx_assert(&gre_ioctl_sx, SA_XLOCKED);
+	if (sc->gre_ecookie != NULL)
+		encap_detach(sc->gre_ecookie);
+	sc->gre_ecookie = NULL;
+}
+
 static int
-gre_compute_route(struct gre_softc *sc)
+gre_set_tunnel(struct ifnet *ifp, struct sockaddr *src,
+    struct sockaddr *dst)
 {
-	struct route *ro;
-
-	ro = &sc->route;
-
-	memset(ro, 0, sizeof(struct route));
-	((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
-	ro->ro_dst.sa_family = AF_INET;
-	ro->ro_dst.sa_len = sizeof(ro->ro_dst);
-
-	/*
-	 * toggle last bit, so our interface is not found, but a less
-	 * specific route. I'd rather like to specify a shorter mask,
-	 * but this is not possible. Should work though. XXX
-	 * XXX MRT Use a different FIB for the tunnel to solve this problem.
-	 */
-	if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) {
-		((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr ^=
-		    htonl(0x01);
+	struct gre_softc *sc, *tsc;
+#ifdef INET6
+	struct ip6_hdr *ip6;
+#endif
+#ifdef INET
+	struct ip *ip;
+#endif
+	void *hdr;
+	int error;
+
+	sx_assert(&gre_ioctl_sx, SA_XLOCKED);
+	GRE_LIST_LOCK();
+	sc = ifp->if_softc;
+	LIST_FOREACH(tsc, &V_gre_softc_list, gre_list) {
+		if (tsc == sc || tsc->gre_family != src->sa_family)
+			continue;
+#ifdef INET
+		if (tsc->gre_family == AF_INET &&
+		    tsc->gre_oip.ip_src.s_addr ==
+		    satosin(src)->sin_addr.s_addr &&
+		    tsc->gre_oip.ip_dst.s_addr ==
+		    satosin(dst)->sin_addr.s_addr) {
+			GRE_LIST_UNLOCK();
+			return (EADDRNOTAVAIL);
+		}
+#endif
+#ifdef INET6
+		if (tsc->gre_family == AF_INET6 &&
+		    IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_src,
+		    &satosin6(src)->sin6_addr) &&
+		    IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_dst,
+			&satosin6(dst)->sin6_addr)) {
+			GRE_LIST_UNLOCK();
+			return (EADDRNOTAVAIL);
+		}
+#endif
 	}
+	GRE_LIST_UNLOCK();
 
-#ifdef DIAGNOSTIC
-	printf("%s: searching for a route to %s", if_name(GRE2IFP(sc)),
-	    inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr));
+	switch (src->sa_family) {
+#ifdef INET
+	case AF_INET:
+		hdr = ip = malloc(sizeof(struct greip) +
+		    3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO);
+		ip->ip_src = satosin(src)->sin_addr;
+		ip->ip_dst = satosin(dst)->sin_addr;
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		hdr = ip6 = malloc(sizeof(struct greip6) +
+		    3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO);
+		ip6->ip6_src = satosin6(src)->sin6_addr;
+		ip6->ip6_dst = satosin6(dst)->sin6_addr;
+		break;
+#endif
+	default:
+		return (EAFNOSUPPORT);
+	}
+	if (sc->gre_family != 0)
+		gre_detach(sc);
+	GRE_WLOCK(sc);
+	if (sc->gre_family != 0)
+		free(sc->gre_hdr, M_GRE);
+	sc->gre_family = src->sa_family;
+	sc->gre_hdr = hdr;
+	sc->gre_oseq = 0;
+	sc->gre_iseq = UINT32_MAX;
+	gre_updatehdr(sc);
+	GRE_WUNLOCK(sc);
+
+	error = 0;
+	switch (src->sa_family) {
+#ifdef INET
+	case AF_INET:
+		error = in_gre_attach(sc);
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		error = in6_gre_attach(sc);
+		break;
 #endif
+	}
+	if (error == 0) {
+		ifp->if_drv_flags |= IFF_DRV_RUNNING;
+		if_link_state_change(ifp, LINK_STATE_UP);
+	}
+	return (error);
+}
 
-	rtalloc_fib(ro, sc->gre_fibnum);
+static void
+gre_delete_tunnel(struct ifnet *ifp)
+{
+	struct gre_softc *sc = ifp->if_softc;
+	int family;
+
+	GRE_WLOCK(sc);
+	family = sc->gre_family;
+	sc->gre_family = 0;
+	GRE_WUNLOCK(sc);
+	if (family != 0) {
+		gre_detach(sc);
+		free(sc->gre_hdr, M_GRE);
+	}
+	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+	if_link_state_change(ifp, LINK_STATE_DOWN);
+}
 
-	/*
-	 * check if this returned a route at all and this route is no
-	 * recursion to ourself
-	 */
-	if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
-#ifdef DIAGNOSTIC
-		if (ro->ro_rt == NULL)
-			printf(" - no route found!\n");
-		else
-			printf(" - route loops back to ourself!\n");
+int
+gre_input(struct mbuf **mp, int *offp, int proto)
+{
+	struct gre_softc *sc;
+	struct grehdr *gh;
+	struct ifnet *ifp;
+	struct mbuf *m;
+	uint32_t *opts;
+#ifdef notyet
+	uint32_t key;
+#endif
+	uint16_t flags;
+	int hlen, isr, af;
+
+	m = *mp;
+	sc = encap_getarg(m);
+	KASSERT(sc != NULL, ("encap_getarg returned NULL"));
+
+	ifp = GRE2IFP(sc);
+	hlen = *offp + sizeof(struct grehdr) + 4 * sizeof(uint32_t);
+	if (m->m_pkthdr.len < hlen)
+		goto drop;
+	if (m->m_len < hlen) {
+		m = m_pullup(m, hlen);
+		if (m == NULL)
+			goto drop;
+	}
+	gh = (struct grehdr *)mtodo(m, *offp);
+	flags = ntohs(gh->gre_flags);
+	if (flags & ~GRE_FLAGS_MASK)
+		goto drop;
+	opts = gh->gre_opts;
+	hlen = 2 * sizeof(uint16_t);
+	if (flags & GRE_FLAGS_CP) {
+		/* reserved1 field must be zero */
+		if (((uint16_t *)opts)[1] != 0)
+			goto drop;
+		if (in_cksum_skip(m, m->m_pkthdr.len, *offp) != 0)
+			goto drop;
+		hlen += 2 * sizeof(uint16_t);
+		opts++;
+	}
+	if (flags & GRE_FLAGS_KP) {
+#ifdef notyet
+        /* 
+         * XXX: The current implementation uses the key only for outgoing
+         * packets. But we can check the key value here, or even in the
+         * encapcheck function.
+         */
+		key = ntohl(*opts);
+#endif
+		hlen += sizeof(uint32_t);
+		opts++;
+    }
+#ifdef notyet
+	} else
+		key = 0;
+
+	if (sc->gre_key != 0 && (key != sc->gre_key || key != 0))
+		goto drop;
+#endif
+	if (flags & GRE_FLAGS_SP) {
+#ifdef notyet
+		seq = ntohl(*opts);
+#endif
+		hlen += sizeof(uint32_t);
+	}
+	switch (ntohs(gh->gre_proto)) {
+	case ETHERTYPE_WCCP:
+		/*
+		 * For WCCP skip an additional 4 bytes if after GRE header
+		 * doesn't follow an IP header.
+		 */
+		if (flags == 0 && (*(uint8_t *)gh->gre_opts & 0xF0) != 0x40)
+			hlen += sizeof(uint32_t);
+		/* FALLTHROUGH */
+	case ETHERTYPE_IP:
+		isr = NETISR_IP;
+		af = AF_INET;
+		break;
+	case ETHERTYPE_IPV6:
+		isr = NETISR_IPV6;
+		af = AF_INET6;
+		break;
+	default:
+		goto drop;
+	}
+	m_adj(m, *offp + hlen);
+	m_clrprotoflags(m);
+	m->m_pkthdr.rcvif = ifp;
+	M_SETFIB(m, ifp->if_fib);
+#ifdef MAC
+	mac_ifnet_create_mbuf(ifp, m);
 #endif
-		return EADDRNOTAVAIL;
+	BPF_MTAP2(ifp, &af, sizeof(af), m);
+	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
+	if ((ifp->if_flags & IFF_MONITOR) != 0)
+		m_freem(m);
+	else
+		netisr_dispatch(isr, m);
+	return (IPPROTO_DONE);
+drop:
+	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+	m_freem(m);
+	return (IPPROTO_DONE);
+}
+
+#define	MTAG_GRE	1307983903
+static int
+gre_check_nesting(struct ifnet *ifp, struct mbuf *m)
+{
+	struct m_tag *mtag;
+	int count;
+
+	count = 1;
+	mtag = NULL;
+	while ((mtag = m_tag_locate(m, MTAG_GRE, 0, mtag)) != NULL) {
+		if (*(struct ifnet **)(mtag + 1) == ifp) {
+			log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname);
+			return (EIO);
+		}
+		count++;
+	}
+	if (count > V_max_gre_nesting) {
+		log(LOG_NOTICE,
+		    "%s: if_output recursively called too many times(%d)\n",
+		    ifp->if_xname, count);
+		return (EIO);
 	}
+	mtag = m_tag_alloc(MTAG_GRE, 0, sizeof(struct ifnet *), M_NOWAIT);
+	if (mtag == NULL)
+		return (ENOMEM);
+	*(struct ifnet **)(mtag + 1) = ifp;
+	m_tag_prepend(m, mtag);
+	return (0);
+}
 
-	/*
-	 * now change it back - else ip_output will just drop
-	 * the route and search one to this interface ...
-	 */
-	if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0)
-		((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
+static int
+gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+   struct route *ro)
+{
+	uint32_t af;
+	int error;
 
-#ifdef DIAGNOSTIC
-	printf(", choosing %s with gateway %s", if_name(ro->ro_rt->rt_ifp),
-	    inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr));
-	printf("\n");
+#ifdef MAC
+	error = mac_ifnet_check_transmit(ifp, m);
+	if (error != 0)
+		goto drop;
 #endif
+	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
+	    (ifp->if_flags & IFF_UP) == 0) {
+		error = ENETDOWN;
+		goto drop;
+	}
+
+	error = gre_check_nesting(ifp, m);
+	if (error != 0)
+		goto drop;
 
-	return 0;
+	m->m_flags &= ~(M_BCAST|M_MCAST);
+	if (dst->sa_family == AF_UNSPEC)
+		bcopy(dst->sa_data, &af, sizeof(af));
+	else
+		af = dst->sa_family;
+	BPF_MTAP2(ifp, &af, sizeof(af), m);
+	m->m_pkthdr.csum_data = af;	/* save af for if_transmit */
+	return (ifp->if_transmit(ifp, m));
+drop:
+	m_freem(m);
+	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+	return (error);
 }
 
-/*
- * do a checksum of a buffer - much like in_cksum, which operates on
- * mbufs.
- */
-u_int16_t
-gre_in_cksum(u_int16_t *p, u_int len)
+static void
+gre_setseqn(struct grehdr *gh, uint32_t seq)
 {
-	u_int32_t sum = 0;
-	int nwords = len >> 1;
-
-	while (nwords-- != 0)
-		sum += *p++;
-
-	if (len & 1) {
-		union {
-			u_short w;
-			u_char c[2];
-		} u;
-		u.c[0] = *(u_char *)p;
-		u.c[1] = 0;
-		sum += u.w;
+	uint32_t *opts;
+	uint16_t flags;
+
+	opts = gh->gre_opts;
+	flags = ntohs(gh->gre_flags);
+	KASSERT((flags & GRE_FLAGS_SP) != 0,
+	    ("gre_setseqn called, but GRE_FLAGS_SP isn't set "));
+	if (flags & GRE_FLAGS_CP)
+		opts++;
+	if (flags & GRE_FLAGS_KP)
+		opts++;
+	*opts = htonl(seq);
+}
+
+static int
+gre_transmit(struct ifnet *ifp, struct mbuf *m)
+{
+	GRE_RLOCK_TRACKER;
+	struct gre_softc *sc;
+	struct grehdr *gh;
+	uint32_t iaf, oaf, oseq;
+	int error, hlen, olen, plen;
+	int want_seq, want_csum;
+
+	plen = 0;
+	sc = ifp->if_softc;
+	if (sc == NULL) {
+		error = ENETDOWN;
+		m_freem(m);
+		goto drop;
+	}
+	GRE_RLOCK(sc);
+	if (sc->gre_family == 0) {
+		GRE_RUNLOCK(sc);
+		error = ENETDOWN;
+		m_freem(m);
+		goto drop;
+	}
+	iaf = m->m_pkthdr.csum_data;
+	oaf = sc->gre_family;
+	hlen = sc->gre_hlen;
+	want_seq = (sc->gre_options & GRE_ENABLE_SEQ) != 0;
+	if (want_seq)
+		oseq = sc->gre_oseq++; /* XXX */
+	else
+		oseq = 0;		/* Make compiler happy. */
+	want_csum = (sc->gre_options & GRE_ENABLE_CSUM) != 0;
+	M_SETFIB(m, sc->gre_fibnum);
+	M_PREPEND(m, hlen, M_NOWAIT);
+	if (m == NULL) {
+		GRE_RUNLOCK(sc);
+		error = ENOBUFS;
+		goto drop;
+	}
+	bcopy(sc->gre_hdr, mtod(m, void *), hlen);
+	GRE_RUNLOCK(sc);
+	switch (oaf) {
+#ifdef INET
+	case AF_INET:
+		olen = sizeof(struct ip);
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		olen = sizeof(struct ip6_hdr);
+		break;
+#endif
+	default:
+		error = ENETDOWN;
+		goto drop;
 	}
+	gh = (struct grehdr *)mtodo(m, olen);
+	switch (iaf) {
+#ifdef INET
+	case AF_INET:
+		gh->gre_proto = htons(ETHERTYPE_IP);
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		gh->gre_proto = htons(ETHERTYPE_IPV6);
+		break;
+#endif
+	default:
+		error = ENETDOWN;
+		goto drop;
+	}
+	if (want_seq)
+		gre_setseqn(gh, oseq);
+	if (want_csum) {
+		*(uint16_t *)gh->gre_opts = in_cksum_skip(m,
+		    m->m_pkthdr.len, olen);
+	}
+	plen = m->m_pkthdr.len - hlen;
+	switch (oaf) {
+#ifdef INET
+	case AF_INET:
+		error = in_gre_output(m, iaf, hlen);
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		error = in6_gre_output(m, iaf, hlen);
+		break;
+#endif
+	default:
+		m_freem(m);
+		error = ENETDOWN;
+	}
+drop:
+	if (error)
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+	else {
+		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+		if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
+	}
+	return (error);
+}
+
+static void
+gre_qflush(struct ifnet *ifp __unused)
+{
 
-	/* end-around-carry */
-	sum = (sum >> 16) + (sum & 0xffff);
-	sum += (sum >> 16);
-	return (~sum);
 }
 
 static int
@@ -970,16 +989,12 @@ gremodevent(module_t mod, int type, void *data)
 
 	switch (type) {
 	case MOD_LOAD:
-		greattach();
-		break;
 	case MOD_UNLOAD:
-		if_clone_detach(&gre_cloner);
-		mtx_destroy(&gre_mtx);
 		break;
 	default:
-		return EOPNOTSUPP;
+		return (EOPNOTSUPP);
 	}
-	return 0;
+	return (0);
 }
 
 static moduledata_t gre_mod = {
diff --git a/freebsd/sys/net/if_gre.h b/freebsd/sys/net/if_gre.h
index 74d16b1c..806b0cb8 100644
--- a/freebsd/sys/net/if_gre.h
+++ b/freebsd/sys/net/if_gre.h
@@ -1,8 +1,6 @@
-/*	$NetBSD: if_gre.h,v 1.13 2003/11/10 08:51:52 wiz Exp $ */
-/*	 $FreeBSD$ */
-
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -28,158 +26,111 @@
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $NetBSD: if_gre.h,v 1.13 2003/11/10 08:51:52 wiz Exp $
+ * $FreeBSD$
  */
 
-#ifndef _NET_IF_GRE_H
-#define _NET_IF_GRE_H
+#ifndef _NET_IF_GRE_H_
+#define _NET_IF_GRE_H_
 
-#include <sys/ioccom.h>
 #ifdef _KERNEL
-#include <sys/queue.h>
-
-/*
- * Version of the WCCP, need to be configured manually since
- * header for version 2 is the same but IP payload is prepended
- * with additional 4-bytes field.
- */
-typedef enum {
-	WCCP_V1 = 0,
-	WCCP_V2
-} wccp_ver_t;
-
-struct gre_softc {
-	struct ifnet *sc_ifp;
-	LIST_ENTRY(gre_softc) sc_list;
-	int gre_unit;
-	int gre_flags;
-	u_int	gre_fibnum;	/* use this fib for envelopes */
-	struct in_addr g_src;	/* source address of gre packets */
-	struct in_addr g_dst;	/* destination address of gre packets */
-	struct route route;	/* routing entry that determines, where a
-				   encapsulated packet should go */
-	u_char g_proto;		/* protocol of encapsulator */
-
-	const struct encaptab *encap;	/* encapsulation cookie */
-
-	uint32_t key;		/* key included in outgoing GRE packets */
-				/* zero means none */
-
-	wccp_ver_t wccp_ver;	/* version of the WCCP */
-};
-#define	GRE2IFP(sc)	((sc)->sc_ifp)
-
-
-struct gre_h {
-	u_int16_t flags;	/* GRE flags */
-	u_int16_t ptype;	/* protocol type of payload typically
-				   Ether protocol type*/
-	uint32_t options[0];	/* optional options */
-/*
- *  from here on: fields are optional, presence indicated by flags
- *
-	u_int_16 checksum	checksum (one-complements of GRE header
-				and payload
-				Present if (ck_pres | rt_pres == 1).
-				Valid if (ck_pres == 1).
-	u_int_16 offset		offset from start of routing filed to
-				first octet of active SRE (see below).
-				Present if (ck_pres | rt_pres == 1).
-				Valid if (rt_pres == 1).
-	u_int_32 key		inserted by encapsulator e.g. for
-				authentication
-				Present if (key_pres ==1 ).
-	u_int_32 seq_num	Sequence number to allow for packet order
-				Present if (seq_pres ==1 ).
-	struct gre_sre[] routing Routing fileds (see below)
-				Present if (rt_pres == 1)
- */
+/* GRE header according to RFC 2784 and RFC 2890 */
+struct grehdr {
+	uint16_t	gre_flags;	/* GRE flags */
+#define	GRE_FLAGS_CP	0x8000		/* checksum present */
+#define	GRE_FLAGS_KP	0x2000		/* key present */
+#define	GRE_FLAGS_SP	0x1000		/* sequence present */
+#define	GRE_FLAGS_MASK	(GRE_FLAGS_CP|GRE_FLAGS_KP|GRE_FLAGS_SP)
+	uint16_t	gre_proto;	/* protocol type */
+	uint32_t	gre_opts[0];	/* optional fields */
 } __packed;
 
+#ifdef INET
 struct greip {
-	struct ip gi_i;
-	struct gre_h  gi_g;
+	struct ip	gi_ip;
+	struct grehdr	gi_gre;
 } __packed;
+#endif
 
-#define gi_pr		gi_i.ip_p
-#define gi_len		gi_i.ip_len
-#define gi_src		gi_i.ip_src
-#define gi_dst		gi_i.ip_dst
-#define gi_ptype	gi_g.ptype
-#define gi_flags	gi_g.flags
-#define gi_options	gi_g.options
-
-#define GRE_CP		0x8000  /* Checksum Present */
-#define GRE_RP		0x4000  /* Routing Present */
-#define GRE_KP		0x2000  /* Key Present */
-#define GRE_SP		0x1000  /* Sequence Present */
-#define GRE_SS		0x0800	/* Strict Source Route */
+#ifdef INET6
+struct greip6 {
+	struct ip6_hdr	gi6_ip6;
+	struct grehdr	gi6_gre;
+} __packed;
+#endif
 
+struct gre_softc {
+	struct ifnet		*gre_ifp;
+	LIST_ENTRY(gre_softc)	gre_list;
+	struct rmlock		gre_lock;
+	int			gre_family;	/* AF of delivery header */
+	uint32_t		gre_iseq;
+	uint32_t		gre_oseq;
+	uint32_t		gre_key;
+	uint32_t		gre_options;
+	uint32_t		gre_mtu;
+	u_int			gre_fibnum;
+	u_int			gre_hlen;	/* header size */
+	union {
+		void		*hdr;
+#ifdef INET
+		struct greip	*gihdr;
+#endif
+#ifdef INET6
+		struct greip6	*gi6hdr;
+#endif
+	} gre_uhdr;
+	const struct encaptab	*gre_ecookie;
+};
+#define	GRE2IFP(sc)		((sc)->gre_ifp)
+#define	GRE_LOCK_INIT(sc)	rm_init(&(sc)->gre_lock, "gre softc")
+#define	GRE_LOCK_DESTROY(sc)	rm_destroy(&(sc)->gre_lock)
+#define	GRE_RLOCK_TRACKER	struct rm_priotracker gre_tracker
+#define	GRE_RLOCK(sc)		rm_rlock(&(sc)->gre_lock, &gre_tracker)
+#define	GRE_RUNLOCK(sc)		rm_runlock(&(sc)->gre_lock, &gre_tracker)
+#define	GRE_RLOCK_ASSERT(sc)	rm_assert(&(sc)->gre_lock, RA_RLOCKED)
+#define	GRE_WLOCK(sc)		rm_wlock(&(sc)->gre_lock)
+#define	GRE_WUNLOCK(sc)		rm_wunlock(&(sc)->gre_lock)
+#define	GRE_WLOCK_ASSERT(sc)	rm_assert(&(sc)->gre_lock, RA_WLOCKED)
+
+#define	gre_hdr			gre_uhdr.hdr
+#define	gre_gihdr		gre_uhdr.gihdr
+#define	gre_gi6hdr		gre_uhdr.gi6hdr
+#define	gre_oip			gre_gihdr->gi_ip
+#define	gre_oip6		gre_gi6hdr->gi6_ip6
+
+int	gre_input(struct mbuf **, int *, int);
+#ifdef INET
+int	in_gre_attach(struct gre_softc *);
+int	in_gre_output(struct mbuf *, int, int);
+#endif
+#ifdef INET6
+int	in6_gre_attach(struct gre_softc *);
+int	in6_gre_output(struct mbuf *, int, int);
+#endif
 /*
  * CISCO uses special type for GRE tunnel created as part of WCCP
  * connection, while in fact those packets are just IPv4 encapsulated
  * into GRE.
  */
-#define WCCP_PROTOCOL_TYPE	0x883E
-
-/*
- * gre_sre defines a Source route Entry. These are needed if packets
- * should be routed over more than one tunnel hop by hop
- */
-struct gre_sre {
-	u_int16_t sre_family;	/* address family */
-	u_char	sre_offset;	/* offset to first octet of active entry */
-	u_char	sre_length;	/* number of octets in the SRE.
-				   sre_lengthl==0 -> last entry. */
-	u_char	*sre_rtinfo;	/* the routing information */
-};
-
-struct greioctl {
-	int unit;
-	struct in_addr addr;
-};
-
-/* for mobile encaps */
-
-struct mobile_h {
-	u_int16_t proto;		/* protocol and S-bit */
-	u_int16_t hcrc;			/* header checksum */
-	u_int32_t odst;			/* original destination address */
-	u_int32_t osrc;			/* original source addr, if S-bit set */
-} __packed;
-
-struct mobip_h {
-	struct ip	mi;
-	struct mobile_h	mh;
-} __packed;
-
-
-#define MOB_H_SIZ_S		(sizeof(struct mobile_h) - sizeof(u_int32_t))
-#define MOB_H_SIZ_L		(sizeof(struct mobile_h))
-#define MOB_H_SBIT	0x0080
-
-#define	GRE_TTL	30
-
+#define ETHERTYPE_WCCP		0x883E
 #endif /* _KERNEL */
 
-/*
- * ioctls needed to manipulate the interface
- */
-
 #define GRESADDRS	_IOW('i', 101, struct ifreq)
 #define GRESADDRD	_IOW('i', 102, struct ifreq)
 #define GREGADDRS	_IOWR('i', 103, struct ifreq)
 #define GREGADDRD	_IOWR('i', 104, struct ifreq)
 #define GRESPROTO	_IOW('i' , 105, struct ifreq)
 #define GREGPROTO	_IOWR('i', 106, struct ifreq)
-#define GREGKEY		_IOWR('i', 107, struct ifreq)
-#define GRESKEY		_IOW('i', 108, struct ifreq)
 
-#ifdef _KERNEL
-LIST_HEAD(gre_softc_head, gre_softc);
-extern struct mtx gre_mtx;
-extern struct gre_softc_head gre_softc_list;
+#define	GREGKEY		_IOWR('i', 107, struct ifreq)
+#define	GRESKEY		_IOW('i', 108, struct ifreq)
+#define	GREGOPTS	_IOWR('i', 109, struct ifreq)
+#define	GRESOPTS	_IOW('i', 110, struct ifreq)
 
-u_int16_t	gre_in_cksum(u_int16_t *, u_int);
-#endif /* _KERNEL */
+#define	GRE_ENABLE_CSUM		0x0001
+#define	GRE_ENABLE_SEQ		0x0002
+#define	GRE_OPTMASK		(GRE_ENABLE_CSUM|GRE_ENABLE_SEQ)
 
-#endif
+#endif /* _NET_IF_GRE_H_ */
diff --git a/freebsd/sys/net/if_iso88025subr.c b/freebsd/sys/net/if_iso88025subr.c
index 660dc7dd..d26d0ebd 100644
--- a/freebsd/sys/net/if_iso88025subr.c
+++ b/freebsd/sys/net/if_iso88025subr.c
@@ -44,7 +44,6 @@
 
 #include <rtems/bsd/local/opt_inet.h>
 #include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
 
 #include <rtems/bsd/sys/param.h>
 #include <sys/systm.h>
@@ -56,6 +55,7 @@
 #include <sys/sockio.h> 
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_llc.h>
@@ -77,11 +77,6 @@
 #include <netinet6/nd6.h>
 #endif
 
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
 #include <security/mac/mac_framework.h>
 
 static const u_char iso88025_broadcastaddr[ISO88025_ADDR_LEN] =
@@ -172,30 +167,6 @@ iso88025_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
                         arp_ifinit(ifp, ifa);
                         break;
 #endif	/* INET */
-#ifdef IPX
-                /*
-                 * XXX - This code is probably wrong
-                 */
-                case AF_IPX: {
-				struct ipx_addr *ina;
-
-				ina = &(IA_SIPX(ifa)->sipx_addr);
-
-				if (ipx_nullhost(*ina))
-					ina->x_host = *(union ipx_host *)
-							IF_LLADDR(ifp);
-				else
-					bcopy((caddr_t) ina->x_host.c_host,
-					      (caddr_t) IF_LLADDR(ifp),
-					      ISO88025_ADDR_LEN);
-
-				/*
-				 * Set new address
-				 */
-				ifp->if_init(ifp->if_softc);
-			}
-			break;
-#endif	/* IPX */
                 default:
                         ifp->if_init(ifp->if_softc);
                         break;
@@ -233,11 +204,8 @@ iso88025_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
  * ISO88025 encapsulation
  */
 int
-iso88025_output(ifp, m, dst, ro)
-	struct ifnet *ifp;
-	struct mbuf *m;
-	struct sockaddr *dst;
-	struct route *ro;
+iso88025_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+	struct route *ro)
 {
 	u_int16_t snap_type = 0;
 	int loop_copy = 0, error = 0, rif_len = 0;
@@ -246,13 +214,10 @@ iso88025_output(ifp, m, dst, ro)
 	struct iso88025_header gen_th;
 	struct sockaddr_dl *sdl = NULL;
 	struct rtentry *rt0 = NULL;
-#if defined(INET) || defined(INET6)
-	struct llentry *lle;
-#endif
+	int is_gw = 0;
 
 	if (ro != NULL)
-		rt0 = ro->ro_rt;
-
+		is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error)
@@ -291,7 +256,7 @@ iso88025_output(ifp, m, dst, ro)
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
-		error = arpresolve(ifp, rt0, m, dst, edst, &lle);
+		error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL);
 		if (error)
 			return (error == EWOULDBLOCK ? 0 : error);
 		snap_type = ETHERTYPE_IP;
@@ -326,34 +291,15 @@ iso88025_output(ifp, m, dst, ro)
 #endif	/* INET */
 #ifdef INET6
 	case AF_INET6:
-		error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, &lle);
+		error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL);
 		if (error)
-			return (error);
+			return (error == EWOULDBLOCK ? 0 : error);
 		snap_type = ETHERTYPE_IPV6;
 		break;
 #endif	/* INET6 */
-#ifdef IPX
-	case AF_IPX:
-	{
-		u_int8_t	*cp;
-
-		bcopy((caddr_t)&(satoipx_addr(dst).x_host), (caddr_t)edst,
-		      ISO88025_ADDR_LEN);
-
-		M_PREPEND(m, 3, M_WAIT);
-		m = m_pullup(m, 3);
-		if (m == 0)
-			senderr(ENOBUFS);
-		cp = mtod(m, u_int8_t *);
-		*cp++ = ETHERTYPE_IPX_8022;
-		*cp++ = ETHERTYPE_IPX_8022;
-		*cp++ = LLC_UI;
-	}
-	break;
-#endif	/* IPX */
 	case AF_UNSPEC:
 	{
-		struct iso88025_sockaddr_data *sd;
+		const struct iso88025_sockaddr_data *sd;
 		/*
 		 * For AF_UNSPEC sockaddr.sa_data must contain all of the
 		 * mac information needed to send the packet.  This allows
@@ -363,13 +309,12 @@ iso88025_output(ifp, m, dst, ro)
 		 * should be an iso88025_sockaddr_data structure see iso88025.h
 		 */
                 loop_copy = -1;
-		sd = (struct iso88025_sockaddr_data *)dst->sa_data;
+		sd = (const struct iso88025_sockaddr_data *)dst->sa_data;
 		gen_th.ac = sd->ac;
 		gen_th.fc = sd->fc;
-		(void)memcpy((caddr_t)edst, (caddr_t)sd->ether_dhost,
-			     ISO88025_ADDR_LEN);
-		(void)memcpy((caddr_t)gen_th.iso88025_shost,
-			     (caddr_t)sd->ether_shost, ISO88025_ADDR_LEN);
+		(void)memcpy(edst, sd->ether_dhost, ISO88025_ADDR_LEN);
+		(void)memcpy(gen_th.iso88025_shost, sd->ether_shost,
+		    ISO88025_ADDR_LEN);
 		rif_len = 0;
 		break;
 	}
@@ -384,8 +329,8 @@ iso88025_output(ifp, m, dst, ro)
 	 */
 	if (snap_type != 0) {
         	struct llc *l;
-		M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT);
-		if (m == 0)
+		M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT);
+		if (m == NULL)
 			senderr(ENOBUFS);
 		l = mtod(m, struct llc *);
 		l->llc_control = LLC_UI;
@@ -400,8 +345,8 @@ iso88025_output(ifp, m, dst, ro)
 	 * Add local net header.  If no space in first mbuf,
 	 * allocate another.
 	 */
-	M_PREPEND(m, ISO88025_HDR_LEN + rif_len, M_DONTWAIT);
-	if (m == 0)
+	M_PREPEND(m, ISO88025_HDR_LEN + rif_len, M_NOWAIT);
+	if (m == NULL)
 		senderr(ENOBUFS);
 	th = mtod(m, struct iso88025_header *);
 	bcopy((caddr_t)edst, (caddr_t)&gen_th.iso88025_dhost, ISO88025_ADDR_LEN);
@@ -435,12 +380,12 @@ iso88025_output(ifp, m, dst, ro)
 	IFQ_HANDOFF_ADJ(ifp, m, ISO88025_HDR_LEN + LLC_SNAPFRAMELEN, error);
 	if (error) {
 		printf("iso88025_output: packet dropped QFULL.\n");
-		ifp->if_oerrors++;
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	}
 	return (error);
 
 bad:
-	ifp->if_oerrors++;
+	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	if (m)
 		m_freem(m);
 	return (error);
@@ -465,24 +410,23 @@ iso88025_input(ifp, m)
 	 */
 	if ((m->m_flags & M_PKTHDR) == 0) {
 		if_printf(ifp, "discard frame w/o packet header\n");
-		ifp->if_ierrors++;
+		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		return;
 	}
 	if (m->m_pkthdr.rcvif == NULL) {
 		if_printf(ifp, "discard frame w/o interface pointer\n");
-		ifp->if_ierrors++;
+		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
  		m_freem(m);
 		return;
 	}
 
 	m = m_pullup(m, ISO88025_HDR_LEN);
 	if (m == NULL) {
-		ifp->if_ierrors++;
+		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		goto dropanyway;
 	}
 	th = mtod(m, struct iso88025_header *);
-	m->m_pkthdr.header = (void *)th;
 
 	/*
 	 * Discard packet if interface is not up.
@@ -511,7 +455,7 @@ iso88025_input(ifp, m)
 	/*
 	 * Update interface statistics.
 	 */
-	ifp->if_ibytes += m->m_pkthdr.len;
+	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	getmicrotime(&ifp->if_lastchange);
 
 	/*
@@ -533,7 +477,7 @@ iso88025_input(ifp, m)
 			m->m_flags |= M_BCAST;
 		else
 			m->m_flags |= M_MCAST;
-		ifp->if_imcasts++;
+		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 	}
 
 	mac_hdr_len = ISO88025_HDR_LEN;
@@ -546,37 +490,24 @@ iso88025_input(ifp, m)
 
 	m = m_pullup(m, LLC_SNAPFRAMELEN);
 	if (m == 0) {
-		ifp->if_ierrors++;
+		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		goto dropanyway;
 	}
 	l = mtod(m, struct llc *);
 
 	switch (l->llc_dsap) {
-#ifdef IPX
-	case ETHERTYPE_IPX_8022:	/* Thanks a bunch Novell */
-		if ((l->llc_control != LLC_UI) ||
-		    (l->llc_ssap != ETHERTYPE_IPX_8022)) {
-			ifp->if_noproto++;
-			goto dropanyway;
-		}
-
-		th->iso88025_shost[0] &= ~(TR_RII); 
-		m_adj(m, 3);
-		isr = NETISR_IPX;
-		break;
-#endif	/* IPX */
 	case LLC_SNAP_LSAP: {
 		u_int16_t type;
 		if ((l->llc_control != LLC_UI) ||
 		    (l->llc_ssap != LLC_SNAP_LSAP)) {
-			ifp->if_noproto++;
+			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 		}
 
 		if (l->llc_snap.org_code[0] != 0 ||
 		    l->llc_snap.org_code[1] != 0 ||
 		    l->llc_snap.org_code[2] != 0) {
-			ifp->if_noproto++;
+			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 		}
 
@@ -586,8 +517,6 @@ iso88025_input(ifp, m)
 #ifdef INET
 		case ETHERTYPE_IP:
 			th->iso88025_shost[0] &= ~(TR_RII); 
-			if ((m = ip_fastforward(m)) == NULL)
-				return;
 			isr = NETISR_IP;
 			break;
 
@@ -597,12 +526,6 @@ iso88025_input(ifp, m)
 			isr = NETISR_ARP;
 			break;
 #endif	/* INET */
-#ifdef IPX_SNAP	/* XXX: Not supported! */
-		case ETHERTYPE_IPX:
-			th->iso88025_shost[0] &= ~(TR_RII); 
-			isr = NETISR_IPX;
-			break;
-#endif	/* IPX_SNAP */
 #ifdef INET6
 		case ETHERTYPE_IPV6:
 			th->iso88025_shost[0] &= ~(TR_RII); 
@@ -611,7 +534,7 @@ iso88025_input(ifp, m)
 #endif	/* INET6 */
 		default:
 			printf("iso88025_input: unexpected llc_snap ether_type  0x%02x\n", type);
-			ifp->if_noproto++;
+			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 		}
 		break;
@@ -620,7 +543,7 @@ iso88025_input(ifp, m)
 	case LLC_ISO_LSAP:
 		switch (l->llc_control) {
 		case LLC_UI:
-			ifp->if_noproto++;
+			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 			break;
                 case LLC_XID:
@@ -636,7 +559,6 @@ iso88025_input(ifp, m)
 		case LLC_TEST_P:
 		{
 			struct sockaddr sa;
-			struct arpcom *ac;
 			struct iso88025_sockaddr_data *th2;
 			int i;
 			u_char c;
@@ -669,7 +591,7 @@ iso88025_input(ifp, m)
 		}
 		default:
 			printf("iso88025_input: unexpected llc control 0x%02x\n", l->llc_control);
-			ifp->if_noproto++;
+			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 			break;
 		}
@@ -677,7 +599,7 @@ iso88025_input(ifp, m)
 #endif	/* ISO */
 	default:
 		printf("iso88025_input: unknown dsap 0x%x\n", l->llc_dsap);
-		ifp->if_noproto++;
+		if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 		goto dropanyway;
 		break;
 	}
@@ -687,7 +609,7 @@ iso88025_input(ifp, m)
 	return;
 
 dropanyway:
-	ifp->if_iqdrops++;
+	if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 	if (m)
 		m_freem(m);
 	return;
@@ -718,7 +640,7 @@ iso88025_resolvemulti (ifp, llsa, sa)
 		if ((e_addr[0] & 1) != 1) {
 			return (EADDRNOTAVAIL);
 		}
-		*llsa = 0;
+		*llsa = NULL;
 		return (0);
 
 #ifdef INET
@@ -727,14 +649,7 @@ iso88025_resolvemulti (ifp, llsa, sa)
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
 			return (EADDRNOTAVAIL);
 		}
-		sdl = malloc(sizeof *sdl, M_IFMADDR,
-		       M_NOWAIT|M_ZERO);
-		if (sdl == NULL)
-			return (ENOMEM);
-		sdl->sdl_len = sizeof *sdl;
-		sdl->sdl_family = AF_LINK;
-		sdl->sdl_index = ifp->if_index;
-		sdl->sdl_type = IFT_ISO88025;
+		sdl = link_init_sdl(ifp, *llsa, IFT_ISO88025);
 		sdl->sdl_alen = ISO88025_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
@@ -751,20 +666,13 @@ iso88025_resolvemulti (ifp, llsa, sa)
 			 * (This is used for multicast routers.)
 			 */
 			ifp->if_flags |= IFF_ALLMULTI;
-			*llsa = 0;
+			*llsa = NULL;
 			return (0);
 		}
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
 			return (EADDRNOTAVAIL);
 		}
-		sdl = malloc(sizeof *sdl, M_IFMADDR,
-		       M_NOWAIT|M_ZERO);
-		if (sdl == NULL)
-			return (ENOMEM);
-		sdl->sdl_len = sizeof *sdl;
-		sdl->sdl_family = AF_LINK;
-		sdl->sdl_index = ifp->if_index;
-		sdl->sdl_type = IFT_ISO88025;
+		sdl = link_init_sdl(ifp, *llsa, IFT_ISO88025);
 		sdl->sdl_alen = ISO88025_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
@@ -783,49 +691,8 @@ iso88025_resolvemulti (ifp, llsa, sa)
 	return (0);
 }
 
-static MALLOC_DEFINE(M_ISO88025, "arpcom", "802.5 interface internals");
-
-static void*
-iso88025_alloc(u_char type, struct ifnet *ifp)
-{
-	struct arpcom	*ac;
- 
-        ac = malloc(sizeof(struct arpcom), M_ISO88025, M_WAITOK | M_ZERO);
-	ac->ac_ifp = ifp;
-
-	return (ac);
-} 
-
-static void
-iso88025_free(void *com, u_char type)
-{
- 
-        free(com, M_ISO88025);
-}
- 
-static int
-iso88025_modevent(module_t mod, int type, void *data)
-{
-  
-        switch (type) {
-        case MOD_LOAD:
-                if_register_com_alloc(IFT_ISO88025, iso88025_alloc,
-                    iso88025_free);
-                break;
-        case MOD_UNLOAD:
-                if_deregister_com_alloc(IFT_ISO88025);
-                break;
-        default:
-                return EOPNOTSUPP;
-        }
-
-        return (0);
-}
-
 static moduledata_t iso88025_mod = {
-	"iso88025",
-	iso88025_modevent,
-	0
+	.name = "iso88025",
 };
 
 DECLARE_MODULE(iso88025, iso88025_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
diff --git a/freebsd/sys/net/if_lagg.c b/freebsd/sys/net/if_lagg.c
index 46f3f46c..9cfb7b8b 100644
--- a/freebsd/sys/net/if_lagg.c
+++ b/freebsd/sys/net/if_lagg.c
@@ -5,6 +5,7 @@
 /*
  * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
  * Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org>
+ * Copyright (c) 2014, 2016 Marcelo Araujo <araujo@FreeBSD.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -37,9 +38,8 @@ __FBSDID("$FreeBSD$");
 #include <sys/priv.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
-#include <sys/hash.h>
 #include <rtems/bsd/sys/lock.h>
-#include <sys/rwlock.h>
+#include <sys/rmlock.h>
 #include <sys/taskqueue.h>
 #include <sys/eventhandler.h>
 
@@ -48,11 +48,11 @@ __FBSDID("$FreeBSD$");
 #include <net/if_clone.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
-#include <net/if_llc.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/bpf.h>
+#include <net/vnet.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
@@ -83,15 +83,26 @@ static struct {
 	{0, NULL}
 };
 
-SLIST_HEAD(__trhead, lagg_softc) lagg_list;	/* list of laggs */
-static struct mtx	lagg_list_mtx;
+VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */
+#define	V_lagg_list	VNET(lagg_list)
+static VNET_DEFINE(struct mtx, lagg_list_mtx);
+#define	V_lagg_list_mtx	VNET(lagg_list_mtx)
+#define	LAGG_LIST_LOCK_INIT(x)		mtx_init(&V_lagg_list_mtx, \
+					"if_lagg list", NULL, MTX_DEF)
+#define	LAGG_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_lagg_list_mtx)
+#define	LAGG_LIST_LOCK(x)		mtx_lock(&V_lagg_list_mtx)
+#define	LAGG_LIST_UNLOCK(x)		mtx_unlock(&V_lagg_list_mtx)
 eventhandler_tag	lagg_detach_cookie = NULL;
 
 static int	lagg_clone_create(struct if_clone *, int, caddr_t);
 static void	lagg_clone_destroy(struct ifnet *);
+static VNET_DEFINE(struct if_clone *, lagg_cloner);
+#define	V_lagg_cloner	VNET(lagg_cloner)
+static const char laggname[] = "lagg";
+
 static void	lagg_lladdr(struct lagg_softc *, uint8_t *);
 static void	lagg_capabilities(struct lagg_softc *);
-static void	lagg_port_lladdr(struct lagg_port *, uint8_t *);
+static void	lagg_port_lladdr(struct lagg_port *, uint8_t *, lagg_llqtype);
 static void	lagg_port_setlladdr(void *, int);
 static int	lagg_port_create(struct lagg_softc *, struct ifnet *);
 static int	lagg_port_destroy(struct lagg_port *, int);
@@ -100,7 +111,7 @@ static void	lagg_linkstate(struct lagg_softc *);
 static void	lagg_port_state(struct ifnet *, int);
 static int	lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
 static int	lagg_port_output(struct ifnet *, struct mbuf *,
-		    struct sockaddr *, struct route *);
+		    const struct sockaddr *, struct route *);
 static void	lagg_port_ifdetach(void *arg __unused, struct ifnet *);
 #ifdef LAGG_PORT_STACKING
 static int	lagg_port_checkstacking(struct lagg_softc *);
@@ -114,33 +125,28 @@ static int	lagg_ether_cmdmulti(struct lagg_port *, int);
 static	int	lagg_setflag(struct lagg_port *, int, int,
 		    int (*func)(struct ifnet *, int));
 static	int	lagg_setflags(struct lagg_port *, int status);
+static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt);
 static int	lagg_transmit(struct ifnet *, struct mbuf *);
 static void	lagg_qflush(struct ifnet *);
 static int	lagg_media_change(struct ifnet *);
 static void	lagg_media_status(struct ifnet *, struct ifmediareq *);
 static struct lagg_port *lagg_link_active(struct lagg_softc *,
 	    struct lagg_port *);
-static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *);
-
-IFC_SIMPLE_DECLARE(lagg, 0);
 
 /* Simple round robin */
-static int	lagg_rr_attach(struct lagg_softc *);
-static int	lagg_rr_detach(struct lagg_softc *);
+static void	lagg_rr_attach(struct lagg_softc *);
 static int	lagg_rr_start(struct lagg_softc *, struct mbuf *);
 static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *,
 		    struct mbuf *);
 
 /* Active failover */
-static int	lagg_fail_attach(struct lagg_softc *);
-static int	lagg_fail_detach(struct lagg_softc *);
 static int	lagg_fail_start(struct lagg_softc *, struct mbuf *);
 static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *,
 		    struct mbuf *);
 
 /* Loadbalancing */
-static int	lagg_lb_attach(struct lagg_softc *);
-static int	lagg_lb_detach(struct lagg_softc *);
+static void	lagg_lb_attach(struct lagg_softc *);
+static void	lagg_lb_detach(struct lagg_softc *);
 static int	lagg_lb_port_create(struct lagg_port *);
 static void	lagg_lb_port_destroy(struct lagg_port *);
 static int	lagg_lb_start(struct lagg_softc *, struct mbuf *);
@@ -148,50 +154,134 @@ static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *,
 		    struct mbuf *);
 static int	lagg_lb_porttable(struct lagg_softc *, struct lagg_port *);
 
+/* Broadcast */
+static int    lagg_bcast_start(struct lagg_softc *, struct mbuf *);
+static struct mbuf *lagg_bcast_input(struct lagg_softc *, struct lagg_port *,
+		    struct mbuf *);
+
 /* 802.3ad LACP */
-static int	lagg_lacp_attach(struct lagg_softc *);
-static int	lagg_lacp_detach(struct lagg_softc *);
+static void	lagg_lacp_attach(struct lagg_softc *);
+static void	lagg_lacp_detach(struct lagg_softc *);
 static int	lagg_lacp_start(struct lagg_softc *, struct mbuf *);
 static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *,
 		    struct mbuf *);
 static void	lagg_lacp_lladdr(struct lagg_softc *);
 
 /* lagg protocol table */
-static const struct {
-	int			ti_proto;
-	int			(*ti_attach)(struct lagg_softc *);
+static const struct lagg_proto {
+	lagg_proto	pr_num;
+	void		(*pr_attach)(struct lagg_softc *);
+	void		(*pr_detach)(struct lagg_softc *);
+	int		(*pr_start)(struct lagg_softc *, struct mbuf *);
+	struct mbuf *	(*pr_input)(struct lagg_softc *, struct lagg_port *,
+			    struct mbuf *);
+	int		(*pr_addport)(struct lagg_port *);
+	void		(*pr_delport)(struct lagg_port *);
+	void		(*pr_linkstate)(struct lagg_port *);
+	void 		(*pr_init)(struct lagg_softc *);
+	void 		(*pr_stop)(struct lagg_softc *);
+	void 		(*pr_lladdr)(struct lagg_softc *);
+	void		(*pr_request)(struct lagg_softc *, void *);
+	void		(*pr_portreq)(struct lagg_port *, void *);
 } lagg_protos[] = {
-	{ LAGG_PROTO_ROUNDROBIN,	lagg_rr_attach },
-	{ LAGG_PROTO_FAILOVER,		lagg_fail_attach },
-	{ LAGG_PROTO_LOADBALANCE,	lagg_lb_attach },
-	{ LAGG_PROTO_ETHERCHANNEL,	lagg_lb_attach },
-	{ LAGG_PROTO_LACP,		lagg_lacp_attach },
-	{ LAGG_PROTO_NONE,		NULL }
+    {
+	.pr_num = LAGG_PROTO_NONE
+    },
+    {
+	.pr_num = LAGG_PROTO_ROUNDROBIN,
+	.pr_attach = lagg_rr_attach,
+	.pr_start = lagg_rr_start,
+	.pr_input = lagg_rr_input,
+    },
+    {
+	.pr_num = LAGG_PROTO_FAILOVER,
+	.pr_start = lagg_fail_start,
+	.pr_input = lagg_fail_input,
+    },
+    {
+	.pr_num = LAGG_PROTO_LOADBALANCE,
+	.pr_attach = lagg_lb_attach,
+	.pr_detach = lagg_lb_detach,
+	.pr_start = lagg_lb_start,
+	.pr_input = lagg_lb_input,
+	.pr_addport = lagg_lb_port_create,
+	.pr_delport = lagg_lb_port_destroy,
+    },
+    {
+	.pr_num = LAGG_PROTO_LACP,
+	.pr_attach = lagg_lacp_attach,
+	.pr_detach = lagg_lacp_detach,
+	.pr_start = lagg_lacp_start,
+	.pr_input = lagg_lacp_input,
+	.pr_addport = lacp_port_create,
+	.pr_delport = lacp_port_destroy,
+	.pr_linkstate = lacp_linkstate,
+	.pr_init = lacp_init,
+	.pr_stop = lacp_stop,
+	.pr_lladdr = lagg_lacp_lladdr,
+	.pr_request = lacp_req,
+	.pr_portreq = lacp_portreq,
+    },
+    {
+	.pr_num = LAGG_PROTO_BROADCAST,
+	.pr_start = lagg_bcast_start,
+	.pr_input = lagg_bcast_input,
+    },
 };
 
 SYSCTL_DECL(_net_link);
-static SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0,
+SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0,
     "Link Aggregation");
 
-static int lagg_failover_rx_all = 0; /* Allow input on any failover links */
-SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW,
-    &lagg_failover_rx_all, 0,
+/* Allow input on any failover links */
+static VNET_DEFINE(int, lagg_failover_rx_all);
+#define	V_lagg_failover_rx_all	VNET(lagg_failover_rx_all)
+SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW | CTLFLAG_VNET,
+    &VNET_NAME(lagg_failover_rx_all), 0,
     "Accept input from any interface in a failover lagg");
-static int def_use_flowid = 1; /* Default value for using M_FLOWID */
-TUNABLE_INT("net.link.lagg.default_use_flowid", &def_use_flowid);
-SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RW,
-    &def_use_flowid, 0,
+
+/* Default value for using flowid */
+static VNET_DEFINE(int, def_use_flowid) = 1;
+#define	V_def_use_flowid	VNET(def_use_flowid)
+SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN,
+    &VNET_NAME(def_use_flowid), 0,
     "Default setting for using flow id for load sharing");
 
+/* Default value for flowid shift */
+static VNET_DEFINE(int, def_flowid_shift) = 16;
+#define	V_def_flowid_shift	VNET(def_flowid_shift)
+SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN,
+    &VNET_NAME(def_flowid_shift), 0,
+    "Default setting for flowid shift for load sharing");
+
+static void
+vnet_lagg_init(const void *unused __unused)
+{
+
+	LAGG_LIST_LOCK_INIT();
+	SLIST_INIT(&V_lagg_list);
+	V_lagg_cloner = if_clone_simple(laggname, lagg_clone_create,
+	    lagg_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_lagg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+    vnet_lagg_init, NULL);
+
+static void
+vnet_lagg_uninit(const void *unused __unused)
+{
+
+	if_clone_detach(V_lagg_cloner);
+	LAGG_LIST_LOCK_DESTROY();
+}
+VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
+    vnet_lagg_uninit, NULL);
+
 static int
 lagg_modevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
-		mtx_init(&lagg_list_mtx, "if_lagg list", NULL, MTX_DEF);
-		SLIST_INIT(&lagg_list);
-		if_clone_attach(&lagg_cloner);
 		lagg_input_p = lagg_input;
 		lagg_linkstate_p = lagg_port_state;
 		lagg_detach_cookie = EVENTHANDLER_REGISTER(
@@ -201,10 +291,8 @@ lagg_modevent(module_t mod, int type, void *data)
 	case MOD_UNLOAD:
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event,
 		    lagg_detach_cookie);
-		if_clone_detach(&lagg_cloner);
 		lagg_input_p = NULL;
 		lagg_linkstate_p = NULL;
-		mtx_destroy(&lagg_list_mtx);
 		break;
 	default:
 		return (EOPNOTSUPP);
@@ -221,7 +309,117 @@ static moduledata_t lagg_mod = {
 DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_lagg, 1);
 
-#if __FreeBSD_version >= 800000
+static void
+lagg_proto_attach(struct lagg_softc *sc, lagg_proto pr)
+{
+
+	KASSERT(sc->sc_proto == LAGG_PROTO_NONE, ("%s: sc %p has proto",
+	    __func__, sc));
+
+	if (sc->sc_ifflags & IFF_DEBUG)
+		if_printf(sc->sc_ifp, "using proto %u\n", pr);
+
+	if (lagg_protos[pr].pr_attach != NULL)
+		lagg_protos[pr].pr_attach(sc);
+	sc->sc_proto = pr;
+}
+
+static void
+lagg_proto_detach(struct lagg_softc *sc)
+{
+	lagg_proto pr;
+
+	LAGG_WLOCK_ASSERT(sc);
+
+	pr = sc->sc_proto;
+	sc->sc_proto = LAGG_PROTO_NONE;
+
+	if (lagg_protos[pr].pr_detach != NULL)
+		lagg_protos[pr].pr_detach(sc);
+	else
+		LAGG_WUNLOCK(sc);
+}
+
+static int
+lagg_proto_start(struct lagg_softc *sc, struct mbuf *m)
+{
+
+	return (lagg_protos[sc->sc_proto].pr_start(sc, m));
+}
+
+static struct mbuf *
+lagg_proto_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
+{
+
+	return (lagg_protos[sc->sc_proto].pr_input(sc, lp, m));
+}
+
+static int
+lagg_proto_addport(struct lagg_softc *sc, struct lagg_port *lp)
+{
+
+	if (lagg_protos[sc->sc_proto].pr_addport == NULL)
+		return (0);
+	else
+		return (lagg_protos[sc->sc_proto].pr_addport(lp));
+}
+
+static void
+lagg_proto_delport(struct lagg_softc *sc, struct lagg_port *lp)
+{
+
+	if (lagg_protos[sc->sc_proto].pr_delport != NULL)
+		lagg_protos[sc->sc_proto].pr_delport(lp);
+}
+
+static void
+lagg_proto_linkstate(struct lagg_softc *sc, struct lagg_port *lp)
+{
+
+	if (lagg_protos[sc->sc_proto].pr_linkstate != NULL)
+		lagg_protos[sc->sc_proto].pr_linkstate(lp);
+}
+
+static void
+lagg_proto_init(struct lagg_softc *sc)
+{
+
+	if (lagg_protos[sc->sc_proto].pr_init != NULL)
+		lagg_protos[sc->sc_proto].pr_init(sc);
+}
+
+static void
+lagg_proto_stop(struct lagg_softc *sc)
+{
+
+	if (lagg_protos[sc->sc_proto].pr_stop != NULL)
+		lagg_protos[sc->sc_proto].pr_stop(sc);
+}
+
+static void
+lagg_proto_lladdr(struct lagg_softc *sc)
+{
+
+	if (lagg_protos[sc->sc_proto].pr_lladdr != NULL)
+		lagg_protos[sc->sc_proto].pr_lladdr(sc);
+}
+
+static void
+lagg_proto_request(struct lagg_softc *sc, void *v)
+{
+
+	if (lagg_protos[sc->sc_proto].pr_request != NULL)
+		lagg_protos[sc->sc_proto].pr_request(sc, v);
+}
+
+static void
+lagg_proto_portreq(struct lagg_softc *sc, struct lagg_port *lp, void *v)
+{
+
+	if (lagg_protos[sc->sc_proto].pr_portreq != NULL)
+		lagg_protos[sc->sc_proto].pr_portreq(lp, v);
+}
+
 /*
  * This routine is run via an vlan
  * config EVENT
@@ -229,18 +427,19 @@ MODULE_VERSION(if_lagg, 1);
 static void
 lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
 {
-        struct lagg_softc       *sc = ifp->if_softc;
-        struct lagg_port        *lp;
+	struct lagg_softc *sc = ifp->if_softc;
+	struct lagg_port *lp;
+	struct rm_priotracker tracker;
 
-        if (ifp->if_softc !=  arg)   /* Not our event */
-                return;
+	if (ifp->if_softc !=  arg)   /* Not our event */
+		return;
 
-        LAGG_RLOCK(sc);
-        if (!SLIST_EMPTY(&sc->sc_ports)) {
-                SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
-                        EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
-        }
-        LAGG_RUNLOCK(sc);
+	LAGG_RLOCK(sc, &tracker);
+	if (!SLIST_EMPTY(&sc->sc_ports)) {
+		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+			EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
+	}
+	LAGG_RUNLOCK(sc, &tracker);
 }
 
 /*
@@ -250,30 +449,27 @@ lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
 static void
 lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
 {
-        struct lagg_softc       *sc = ifp->if_softc;
-        struct lagg_port        *lp;
+	struct lagg_softc *sc = ifp->if_softc;
+	struct lagg_port *lp;
+	struct rm_priotracker tracker;
 
-        if (ifp->if_softc !=  arg)   /* Not our event */
-                return;
+	if (ifp->if_softc !=  arg)   /* Not our event */
+		return;
 
-        LAGG_RLOCK(sc);
-        if (!SLIST_EMPTY(&sc->sc_ports)) {
-                SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
-                        EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
-        }
-        LAGG_RUNLOCK(sc);
+	LAGG_RLOCK(sc, &tracker);
+	if (!SLIST_EMPTY(&sc->sc_ports)) {
+		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+			EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
+	}
+	LAGG_RUNLOCK(sc, &tracker);
 }
-#endif
 
 static int
 lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct lagg_softc *sc;
 	struct ifnet *ifp;
-	int i, error = 0;
 	static const u_char eaddr[6];	/* 00:00:00:00:00:00 */
-	struct sysctl_oid *oid;
-	char num[14];			/* sufficient for 32 bits */
 
 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
 	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
@@ -282,32 +478,15 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 		return (ENOSPC);
 	}
 
-	sysctl_ctx_init(&sc->ctx);
-	snprintf(num, sizeof(num), "%u", unit);
-	sc->use_flowid = def_use_flowid;
-	oid = SYSCTL_ADD_NODE(&sc->ctx, &SYSCTL_NODE_CHILDREN(_net_link, lagg),
-		OID_AUTO, num, CTLFLAG_RD, NULL, "");
-	SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
-		"use_flowid", CTLFLAG_RW, &sc->use_flowid, sc->use_flowid,
-		"Use flow id for load sharing");
-	SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
-		"count", CTLFLAG_RD, &sc->sc_count, sc->sc_count,
-		"Total number of ports");
+	if (V_def_use_flowid)
+		sc->sc_opts |= LAGG_OPT_USE_FLOWID;
+	sc->flowid_shift = V_def_flowid_shift;
+
 	/* Hash all layers by default */
-	sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4;
+	sc->sc_flags = MBUF_HASHFLAG_L2|MBUF_HASHFLAG_L3|MBUF_HASHFLAG_L4;
+
+	lagg_proto_attach(sc, LAGG_PROTO_DEFAULT);
 
-	sc->sc_proto = LAGG_PROTO_NONE;
-	for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) {
-		if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) {
-			sc->sc_proto = lagg_protos[i].ti_proto;
-			if ((error = lagg_protos[i].ti_attach(sc)) != 0) {
-				if_free_type(ifp, IFT_ETHER);
-				free(sc, M_DEVBUF);
-				return (error);
-			}
-			break;
-		}
-	}
 	LAGG_LOCK_INIT(sc);
 	SLIST_INIT(&sc->sc_ports);
 	TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc);
@@ -318,32 +497,31 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
 
-	if_initname(ifp, ifc->ifc_name, unit);
-	ifp->if_type = IFT_ETHER;
+	if_initname(ifp, laggname, unit);
 	ifp->if_softc = sc;
 	ifp->if_transmit = lagg_transmit;
 	ifp->if_qflush = lagg_qflush;
 	ifp->if_init = lagg_init;
 	ifp->if_ioctl = lagg_ioctl;
+	ifp->if_get_counter = lagg_get_counter;
 	ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
+	ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
 
 	/*
-	 * Attach as an ordinary ethernet device, childs will be attached
+	 * Attach as an ordinary ethernet device, children will be attached
 	 * as special device IFT_IEEE8023ADLAG.
 	 */
 	ether_ifattach(ifp, eaddr);
 
-#if __FreeBSD_version >= 800000
 	sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 		lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
 	sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
 		lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
-#endif
 
 	/* Insert into the global list of laggs */
-	mtx_lock(&lagg_list_mtx);
-	SLIST_INSERT_HEAD(&lagg_list, sc, sc_entries);
-	mtx_unlock(&lagg_list_mtx);
+	LAGG_LIST_LOCK();
+	SLIST_INSERT_HEAD(&V_lagg_list, sc, sc_entries);
+	LAGG_LIST_UNLOCK();
 
 	return (0);
 }
@@ -359,47 +537,64 @@ lagg_clone_destroy(struct ifnet *ifp)
 	lagg_stop(sc);
 	ifp->if_flags &= ~IFF_UP;
 
-#if __FreeBSD_version >= 800000
 	EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
 	EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
-#endif
 
 	/* Shutdown and remove lagg ports */
 	while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL)
 		lagg_port_destroy(lp, 1);
 	/* Unhook the aggregation protocol */
-	if (sc->sc_detach != NULL)
-		(*sc->sc_detach)(sc);
+	lagg_proto_detach(sc);
+	LAGG_UNLOCK_ASSERT(sc);
 
-	LAGG_WUNLOCK(sc);
-
-	sysctl_ctx_free(&sc->ctx);
 	ifmedia_removeall(&sc->sc_media);
 	ether_ifdetach(ifp);
-	if_free_type(ifp, IFT_ETHER);
+	if_free(ifp);
 
-	mtx_lock(&lagg_list_mtx);
-	SLIST_REMOVE(&lagg_list, sc, lagg_softc, sc_entries);
-	mtx_unlock(&lagg_list_mtx);
+	LAGG_LIST_LOCK();
+	SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries);
+	LAGG_LIST_UNLOCK();
 
 	taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task);
 	LAGG_LOCK_DESTROY(sc);
 	free(sc, M_DEVBUF);
 }
 
-static void
+/*
+ * Set link-layer address on the lagg interface itself.
+ * 
+ * Set noinline to be dtrace-friendly
+ */
+static __noinline void
 lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr)
 {
 	struct ifnet *ifp = sc->sc_ifp;
+	struct lagg_port lp;
 
 	if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
 		return;
 
+	LAGG_WLOCK_ASSERT(sc);
+	/*
+	 * Set the link layer address on the lagg interface.
+	 * lagg_proto_lladdr() notifies the MAC change to
+	 * the aggregation protocol.  iflladdr_event handler which
+	 * may trigger gratuitous ARPs for INET will be handled in
+	 * a taskqueue.
+	 */
 	bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN);
-	/* Let the protocol know the MAC has changed */
-	if (sc->sc_lladdr != NULL)
-		(*sc->sc_lladdr)(sc);
-	EVENTHANDLER_INVOKE(iflladdr_event, ifp);
+	lagg_proto_lladdr(sc);
+
+	/*
+	 * Send notification request for lagg interface
+	 * itself. Note that new lladdr is already set.
+	 */
+	bzero(&lp, sizeof(lp));
+	lp.lp_ifp = sc->sc_ifp;
+	lp.lp_softc = sc;
+
+	/* Do not request lladdr change */
+	lagg_port_lladdr(&lp, lladdr, LAGG_LLQTYPE_VIRT);
 }
 
 static void
@@ -440,54 +635,63 @@ lagg_capabilities(struct lagg_softc *sc)
 	}
 }
 
-static void
-lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr)
+/*
+ * Enqueue interface lladdr notification.
+ * If request is already queued, it is updated.
+ * If setting lladdr is also desired, @do_change has to be set to 1.
+ *
+ * Set noinline to be dtrace-friendly
+ */
+static __noinline void
+lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr, lagg_llqtype llq_type)
 {
 	struct lagg_softc *sc = lp->lp_softc;
 	struct ifnet *ifp = lp->lp_ifp;
 	struct lagg_llq *llq;
-	int pending = 0;
 
 	LAGG_WLOCK_ASSERT(sc);
 
-	if (lp->lp_detaching ||
-	    memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
+	/*
+	 * Do not enqueue requests where lladdr is the same for
+	 * "physical" interfaces (e.g. ports in lagg)
+	 */
+	if (llq_type == LAGG_LLQTYPE_PHYS &&
+	    memcmp(IF_LLADDR(ifp), lladdr, ETHER_ADDR_LEN) == 0)
 		return;
 
 	/* Check to make sure its not already queued to be changed */
 	SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
 		if (llq->llq_ifp == ifp) {
-			pending = 1;
-			break;
+			/* Update lladdr, it may have changed */
+			bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN);
+			return;
 		}
 	}
 
-	if (!pending) {
-		llq = malloc(sizeof(struct lagg_llq), M_DEVBUF, M_NOWAIT);
-		if (llq == NULL)	/* XXX what to do */
-			return;
-	}
+	llq = malloc(sizeof(struct lagg_llq), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (llq == NULL)	/* XXX what to do */
+		return;
 
-	/* Update the lladdr even if pending, it may have changed */
 	llq->llq_ifp = ifp;
+	llq->llq_type = llq_type;
 	bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN);
-
-	if (!pending)
-		SLIST_INSERT_HEAD(&sc->sc_llq_head, llq, llq_entries);
+	/* XXX: We should insert to tail */
+	SLIST_INSERT_HEAD(&sc->sc_llq_head, llq, llq_entries);
 
 	taskqueue_enqueue(taskqueue_swi, &sc->sc_lladdr_task);
 }
 
 /*
  * Set the interface MAC address from a taskqueue to avoid a LOR.
+ *
+ * Set noinline to be dtrace-friendly
  */
-static void
+static __noinline void
 lagg_port_setlladdr(void *arg, int pending)
 {
 	struct lagg_softc *sc = (struct lagg_softc *)arg;
 	struct lagg_llq *llq, *head;
 	struct ifnet *ifp;
-	int error;
 
 	/* Grab a local reference of the queue and remove it from the softc */
 	LAGG_WLOCK(sc);
@@ -502,14 +706,19 @@ lagg_port_setlladdr(void *arg, int pending)
 	for (llq = head; llq != NULL; llq = head) {
 		ifp = llq->llq_ifp;
 
-		/* Set the link layer address */
 		CURVNET_SET(ifp->if_vnet);
-		error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN);
-		CURVNET_RESTORE();
-		if (error)
-			printf("%s: setlladdr failed on %s\n", __func__,
-			    ifp->if_xname);
 
+		/*
+		 * Set the link layer address on the laggport interface.
+		 * Note that if_setlladdr() or iflladdr_event handler
+		 * may result in arp transmission / lltable updates.
+		 */
+		if (llq->llq_type == LAGG_LLQTYPE_PHYS)
+			if_setlladdr(ifp, llq->llq_lladdr,
+			    ETHER_ADDR_LEN);
+		else
+			EVENTHANDLER_INVOKE(iflladdr_event, ifp);
+		CURVNET_RESTORE();
 		head = SLIST_NEXT(llq, llq_entries);
 		free(llq, M_DEVBUF);
 	}
@@ -520,7 +729,8 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
 {
 	struct lagg_softc *sc_ptr;
 	struct lagg_port *lp, *tlp;
-	int error = 0;
+	int error, i;
+	uint64_t *pval;
 
 	LAGG_WLOCK_ASSERT(sc);
 
@@ -538,37 +748,9 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
 	}
 
 	/* XXX Disallow non-ethernet interfaces (this should be any of 802) */
-	if (ifp->if_type != IFT_ETHER)
+	if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN)
 		return (EPROTONOSUPPORT);
 
-#ifdef INET6
-	/*
-	 * The member interface should not have inet6 address because
-	 * two interfaces with a valid link-local scope zone must not be
-	 * merged in any form.  This restriction is needed to
-	 * prevent violation of link-local scope zone.  Attempts to
-	 * add a member interface which has inet6 addresses triggers
-	 * removal of all inet6 addresses on the member interface.
-	 */
-	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
-		if (in6ifa_llaonifp(lp->lp_ifp)) {
-			in6_ifdetach(lp->lp_ifp);
-			if_printf(sc->sc_ifp,
-			    "IPv6 addresses on %s have been removed "
-			    "before adding it as a member to prevent "
-			    "IPv6 address scope violation.\n",
-			    lp->lp_ifp->if_xname);
-		}
-	}
-	if (in6ifa_llaonifp(ifp)) {
-		in6_ifdetach(ifp);
-		if_printf(sc->sc_ifp,
-		    "IPv6 addresses on %s have been removed "
-		    "before adding it as a member to prevent "
-		    "IPv6 address scope violation.\n",
-		    ifp->if_xname);
-	}
-#endif
 	/* Allow the first Ethernet member to define the MTU */
 	if (SLIST_EMPTY(&sc->sc_ports))
 		sc->sc_ifp->if_mtu = ifp->if_mtu;
@@ -583,10 +765,10 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
 		return (ENOMEM);
 
 	/* Check if port is a stacked lagg */
-	mtx_lock(&lagg_list_mtx);
-	SLIST_FOREACH(sc_ptr, &lagg_list, sc_entries) {
+	LAGG_LIST_LOCK();
+	SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) {
 		if (ifp == sc_ptr->sc_ifp) {
-			mtx_unlock(&lagg_list_mtx);
+			LAGG_LIST_UNLOCK();
 			free(lp, M_DEVBUF);
 			return (EINVAL);
 			/* XXX disable stacking for the moment, its untested */
@@ -594,14 +776,14 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
 			lp->lp_flags |= LAGG_PORT_STACK;
 			if (lagg_port_checkstacking(sc_ptr) >=
 			    LAGG_MAX_STACKING) {
-				mtx_unlock(&lagg_list_mtx);
+				LAGG_LIST_UNLOCK();
 				free(lp, M_DEVBUF);
 				return (E2BIG);
 			}
 #endif
 		}
 	}
-	mtx_unlock(&lagg_list_mtx);
+	LAGG_LIST_UNLOCK();
 
 	/* Change the interface type */
 	lp->lp_iftype = ifp->if_type;
@@ -620,10 +802,15 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
 
 	if (SLIST_EMPTY(&sc->sc_ports)) {
 		sc->sc_primary = lp;
+		/* First port in lagg. Update/notify lagg lladdress */
 		lagg_lladdr(sc, IF_LLADDR(ifp));
 	} else {
-		/* Update link layer address for this port */
-		lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp));
+
+		/*
+		 * Update link layer address for this port and
+		 * send notifications to other subsystems.
+		 */
+		lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp), LAGG_LLQTYPE_PHYS);
 	}
 
 	/*
@@ -649,19 +836,21 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
 	lagg_capabilities(sc);
 	lagg_linkstate(sc);
 
+	/* Read port counters */
+	pval = lp->port_counters.val;
+	for (i = 0; i < IFCOUNTERS; i++, pval++)
+		*pval = ifp->if_get_counter(ifp, i);
 	/* Add multicast addresses and interface flags to this port */
 	lagg_ether_cmdmulti(lp, 1);
 	lagg_setflags(lp, 1);
 
-	if (sc->sc_port_create != NULL)
-		error = (*sc->sc_port_create)(lp);
-	if (error) {
-		/* remove the port again, without calling sc_port_destroy */
+	if ((error = lagg_proto_addport(sc, lp)) != 0) {
+		/* Remove the port, without calling pr_delport. */
 		lagg_port_destroy(lp, 0);
 		return (error);
 	}
 
-	return (error);
+	return (0);
 }
 
 #ifdef LAGG_PORT_STACKING
@@ -686,17 +875,19 @@ lagg_port_checkstacking(struct lagg_softc *sc)
 #endif
 
 static int
-lagg_port_destroy(struct lagg_port *lp, int runpd)
+lagg_port_destroy(struct lagg_port *lp, int rundelport)
 {
 	struct lagg_softc *sc = lp->lp_softc;
-	struct lagg_port *lp_ptr;
+	struct lagg_port *lp_ptr, *lp0;
 	struct lagg_llq *llq;
 	struct ifnet *ifp = lp->lp_ifp;
+	uint64_t *pval, vdiff;
+	int i;
 
 	LAGG_WLOCK_ASSERT(sc);
 
-	if (runpd && sc->sc_port_destroy != NULL)
-		(*sc->sc_port_destroy)(lp);
+	if (rundelport)
+		lagg_proto_delport(sc, lp);
 
 	/*
 	 * Remove multicast addresses and interface flags from this port and
@@ -705,7 +896,7 @@ lagg_port_destroy(struct lagg_port *lp, int runpd)
 	if (!lp->lp_detaching) {
 		lagg_ether_cmdmulti(lp, 0);
 		lagg_setflags(lp, 0);
-		lagg_port_lladdr(lp, lp->lp_lladdr);
+		lagg_port_lladdr(lp, lp->lp_lladdr, LAGG_LLQTYPE_PHYS);
 	}
 
 	/* Restore interface */
@@ -714,6 +905,13 @@ lagg_port_destroy(struct lagg_port *lp, int runpd)
 	ifp->if_output = lp->lp_output;
 	ifp->if_lagg = NULL;
 
+	/* Update detached port counters */
+	pval = lp->port_counters.val;
+	for (i = 0; i < IFCOUNTERS; i++, pval++) {
+		vdiff = ifp->if_get_counter(ifp, i) - *pval;
+		sc->detached_counters.val[i] += vdiff;
+	}
+
 	/* Finally, remove the port from the lagg */
 	SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries);
 	sc->sc_count--;
@@ -722,18 +920,24 @@ lagg_port_destroy(struct lagg_port *lp, int runpd)
 	if (lp == sc->sc_primary) {
 		uint8_t lladdr[ETHER_ADDR_LEN];
 
-		if ((lp_ptr = SLIST_FIRST(&sc->sc_ports)) == NULL) {
+		if ((lp0 = SLIST_FIRST(&sc->sc_ports)) == NULL) {
 			bzero(&lladdr, ETHER_ADDR_LEN);
 		} else {
-			bcopy(lp_ptr->lp_lladdr,
+			bcopy(lp0->lp_lladdr,
 			    lladdr, ETHER_ADDR_LEN);
 		}
 		lagg_lladdr(sc, lladdr);
-		sc->sc_primary = lp_ptr;
 
-		/* Update link layer address for each port */
+		/* Mark lp0 as new primary */
+		sc->sc_primary = lp0;
+
+		/*
+		 * Enqueue lladdr update/notification for each port
+		 * (new primary needs update as well, to switch from
+		 * old lladdr to its 'real' one).
+		 */
 		SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
-			lagg_port_lladdr(lp_ptr, lladdr);
+			lagg_port_lladdr(lp_ptr, lladdr, LAGG_LLQTYPE_PHYS);
 	}
 
 	/* Remove any pending lladdr changes from the queue */
@@ -767,6 +971,7 @@ lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 	struct lagg_softc *sc;
 	struct lagg_port *lp = NULL;
 	int error = 0;
+	struct rm_priotracker tracker;
 
 	/* Should be checked by the caller */
 	if (ifp->if_type != IFT_IEEE8023ADLAG ||
@@ -781,15 +986,15 @@ lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 			break;
 		}
 
-		LAGG_RLOCK(sc);
+		LAGG_RLOCK(sc, &tracker);
 		if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
 			error = ENOENT;
-			LAGG_RUNLOCK(sc);
+			LAGG_RUNLOCK(sc, &tracker);
 			break;
 		}
 
 		lagg_port2req(lp, rp);
-		LAGG_RUNLOCK(sc);
+		LAGG_RUNLOCK(sc, &tracker);
 		break;
 
 	case SIOCSIFCAP:
@@ -826,11 +1031,66 @@ fallback:
 }
 
 /*
+ * Requests counter @cnt data. 
+ *
+ * Counter value is calculated the following way:
+ * 1) for each port, sum  difference between current and "initial" measurements.
+ * 2) add lagg logical interface counters.
+ * 3) add data from detached_counters array.
+ *
+ * We also do the following things on ports attach/detach:
+ * 1) On port attach we store all counters it has into port_counter array. 
+ * 2) On port detach we add the different between "initial" and
+ *   current counters data to detached_counters array.
+ */
+static uint64_t
+lagg_get_counter(struct ifnet *ifp, ift_counter cnt)
+{
+	struct lagg_softc *sc;
+	struct lagg_port *lp;
+	struct ifnet *lpifp;
+	struct rm_priotracker tracker;
+	uint64_t newval, oldval, vsum;
+
+	/* Revise this when we've got non-generic counters. */
+	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
+
+	sc = (struct lagg_softc *)ifp->if_softc;
+	LAGG_RLOCK(sc, &tracker);
+
+	vsum = 0;
+	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
+		/* Saved attached value */
+		oldval = lp->port_counters.val[cnt];
+		/* current value */
+		lpifp = lp->lp_ifp;
+		newval = lpifp->if_get_counter(lpifp, cnt);
+		/* Calculate diff and save new */
+		vsum += newval - oldval;
+	}
+
+	/*
+	 * Add counter data which might be added by upper
+	 * layer protocols operating on logical interface.
+	 */
+	vsum += if_get_counter_default(ifp, cnt);
+
+	/*
+	 * Add counter data from detached ports counters
+	 */
+	vsum += sc->detached_counters.val[cnt];
+
+	LAGG_RUNLOCK(sc, &tracker);
+
+	return (vsum);
+}
+
+/*
  * For direct output to child ports.
  */
 static int
 lagg_port_output(struct ifnet *ifp, struct mbuf *m,
-	struct sockaddr *dst, struct route *ro)
+	const struct sockaddr *dst, struct route *ro)
 {
 	struct lagg_port *lp = ifp->if_lagg;
 
@@ -874,8 +1134,7 @@ lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
 	strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname));
 	rp->rp_prio = lp->lp_prio;
 	rp->rp_flags = lp->lp_flags;
-	if (sc->sc_portreq != NULL)
-		(*sc->sc_portreq)(lp, (caddr_t)&rp->rp_psc);
+	lagg_proto_portreq(sc, lp, &rp->rp_psc);
 
 	/* Add protocol specific flags */
 	switch (sc->sc_proto) {
@@ -888,7 +1147,7 @@ lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
 
 		case LAGG_PROTO_ROUNDROBIN:
 		case LAGG_PROTO_LOADBALANCE:
-		case LAGG_PROTO_ETHERCHANNEL:
+		case LAGG_PROTO_BROADCAST:
 			if (LAGG_PORTACTIVE(lp))
 				rp->rp_flags |= LAGG_PORT_ACTIVE;
 			break;
@@ -910,8 +1169,8 @@ static void
 lagg_init(void *xsc)
 {
 	struct lagg_softc *sc = (struct lagg_softc *)xsc;
-	struct lagg_port *lp;
 	struct ifnet *ifp = sc->sc_ifp;
+	struct lagg_port *lp;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 		return;
@@ -919,12 +1178,16 @@ lagg_init(void *xsc)
 	LAGG_WLOCK(sc);
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
-	/* Update the port lladdrs */
+
+	/*
+	 * Update the port lladdrs if needed.
+	 * This might be if_setlladdr() notification
+	 * that lladdr has been changed.
+	 */
 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
-		lagg_port_lladdr(lp, IF_LLADDR(ifp));
+		lagg_port_lladdr(lp, IF_LLADDR(ifp), LAGG_LLQTYPE_PHYS);
 
-	if (sc->sc_init != NULL)
-		(*sc->sc_init)(sc);
+	lagg_proto_init(sc);
 
 	LAGG_WUNLOCK(sc);
 }
@@ -941,8 +1204,7 @@ lagg_stop(struct lagg_softc *sc)
 
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 
-	if (sc->sc_stop != NULL)
-		(*sc->sc_stop)(sc);
+	lagg_proto_stop(sc);
 }
 
 static int
@@ -950,6 +1212,7 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 	struct lagg_reqall *ra = (struct lagg_reqall *)data;
+	struct lagg_reqopts *ro = (struct lagg_reqopts *)data;
 	struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
 	struct lagg_reqflags *rf = (struct lagg_reqflags *)data;
 	struct ifreq *ifr = (struct ifreq *)data;
@@ -958,25 +1221,24 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 	struct thread *td = curthread;
 	char *buf, *outbuf;
 	int count, buflen, len, error = 0;
+	struct rm_priotracker tracker;
 
 	bzero(&rpbuf, sizeof(rpbuf));
 
 	switch (cmd) {
 	case SIOCGLAGG:
-		LAGG_RLOCK(sc);
+		LAGG_RLOCK(sc, &tracker);
 		count = 0;
 		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 			count++;
 		buflen = count * sizeof(struct lagg_reqport);
-		LAGG_RUNLOCK(sc);
+		LAGG_RUNLOCK(sc, &tracker);
 
 		outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
 
-		LAGG_RLOCK(sc);
+		LAGG_RLOCK(sc, &tracker);
 		ra->ra_proto = sc->sc_proto;
-		if (sc->sc_req != NULL)
-			(*sc->sc_req)(sc, (caddr_t)&ra->ra_psc);
-
+		lagg_proto_request(sc, &ra->ra_psc);
 		count = 0;
 		buf = outbuf;
 		len = min(ra->ra_size, buflen);
@@ -990,7 +1252,7 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 			buf += sizeof(rpbuf);
 			len -= sizeof(rpbuf);
 		}
-		LAGG_RUNLOCK(sc);
+		LAGG_RUNLOCK(sc, &tracker);
 		ra->ra_ports = count;
 		ra->ra_size = count * sizeof(rpbuf);
 		error = copyout(outbuf, ra->ra_port, ra->ra_size);
@@ -1004,49 +1266,150 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 			error = EPROTONOSUPPORT;
 			break;
 		}
+
 		LAGG_WLOCK(sc);
-		if (sc->sc_proto != LAGG_PROTO_NONE) {
-			/* Reset protocol first in case detach unlocks */
-			sc->sc_proto = LAGG_PROTO_NONE;
-			error = sc->sc_detach(sc);
-			sc->sc_detach = NULL;
-			sc->sc_start = NULL;
-			sc->sc_input = NULL;
-			sc->sc_port_create = NULL;
-			sc->sc_port_destroy = NULL;
-			sc->sc_linkstate = NULL;
-			sc->sc_init = NULL;
-			sc->sc_stop = NULL;
-			sc->sc_lladdr = NULL;
-			sc->sc_req = NULL;
-			sc->sc_portreq = NULL;
-		} else if (sc->sc_input != NULL) {
-			/* Still detaching */
-			error = EBUSY;
+		lagg_proto_detach(sc);
+		LAGG_UNLOCK_ASSERT(sc);
+		lagg_proto_attach(sc, ra->ra_proto);
+		break;
+	case SIOCGLAGGOPTS:
+		ro->ro_opts = sc->sc_opts;
+		if (sc->sc_proto == LAGG_PROTO_LACP) {
+			struct lacp_softc *lsc;
+
+			lsc = (struct lacp_softc *)sc->sc_psc;
+			if (lsc->lsc_debug.lsc_tx_test != 0)
+				ro->ro_opts |= LAGG_OPT_LACP_TXTEST;
+			if (lsc->lsc_debug.lsc_rx_test != 0)
+				ro->ro_opts |= LAGG_OPT_LACP_RXTEST;
+			if (lsc->lsc_strict_mode != 0)
+				ro->ro_opts |= LAGG_OPT_LACP_STRICT;
+			if (lsc->lsc_fast_timeout != 0)
+				ro->ro_opts |= LAGG_OPT_LACP_TIMEOUT;
+
+			ro->ro_active = sc->sc_active;
+		} else {
+			ro->ro_active = 0;
+			SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+				ro->ro_active += LAGG_PORTACTIVE(lp);
 		}
-		if (error != 0) {
-			LAGG_WUNLOCK(sc);
+		ro->ro_bkt = sc->sc_bkt;
+		ro->ro_flapping = sc->sc_flapping;
+		ro->ro_flowid_shift = sc->flowid_shift;
+		break;
+	case SIOCSLAGGOPTS:
+		if (sc->sc_proto == LAGG_PROTO_ROUNDROBIN) {
+			if (ro->ro_bkt == 0)
+				sc->sc_bkt = 1; // Minimum 1 packet per iface.
+			else
+				sc->sc_bkt = ro->ro_bkt;
+		}
+		error = priv_check(td, PRIV_NET_LAGG);
+		if (error)
+			break;
+		if (ro->ro_opts == 0)
+			break;
+		/*
+		 * Set options.  LACP options are stored in sc->sc_psc,
+		 * not in sc_opts.
+		 */
+		int valid, lacp;
+
+		switch (ro->ro_opts) {
+		case LAGG_OPT_USE_FLOWID:
+		case -LAGG_OPT_USE_FLOWID:
+		case LAGG_OPT_FLOWIDSHIFT:
+			valid = 1;
+			lacp = 0;
+			break;
+		case LAGG_OPT_LACP_TXTEST:
+		case -LAGG_OPT_LACP_TXTEST:
+		case LAGG_OPT_LACP_RXTEST:
+		case -LAGG_OPT_LACP_RXTEST:
+		case LAGG_OPT_LACP_STRICT:
+		case -LAGG_OPT_LACP_STRICT:
+		case LAGG_OPT_LACP_TIMEOUT:
+		case -LAGG_OPT_LACP_TIMEOUT:
+			valid = lacp = 1;
+			break;
+		default:
+			valid = lacp = 0;
 			break;
 		}
-		for (int i = 0; i < (sizeof(lagg_protos) /
-		    sizeof(lagg_protos[0])); i++) {
-			if (lagg_protos[i].ti_proto == ra->ra_proto) {
-				if (sc->sc_ifflags & IFF_DEBUG)
-					printf("%s: using proto %u\n",
-					    sc->sc_ifname,
-					    lagg_protos[i].ti_proto);
-				sc->sc_proto = lagg_protos[i].ti_proto;
-				if (sc->sc_proto != LAGG_PROTO_NONE)
-					error = lagg_protos[i].ti_attach(sc);
-				LAGG_WUNLOCK(sc);
-				return (error);
+
+		LAGG_WLOCK(sc);
+
+		if (valid == 0 ||
+		    (lacp == 1 && sc->sc_proto != LAGG_PROTO_LACP)) {
+			/* Invalid combination of options specified. */
+			error = EINVAL;
+			LAGG_WUNLOCK(sc);
+			break;	/* Return from SIOCSLAGGOPTS. */ 
+		}
+		/*
+		 * Store new options into sc->sc_opts except for
+		 * FLOWIDSHIFT and LACP options.
+		 */
+		if (lacp == 0) {
+			if (ro->ro_opts == LAGG_OPT_FLOWIDSHIFT)
+				sc->flowid_shift = ro->ro_flowid_shift;
+			else if (ro->ro_opts > 0)
+				sc->sc_opts |= ro->ro_opts;
+			else
+				sc->sc_opts &= ~ro->ro_opts;
+		} else {
+			struct lacp_softc *lsc;
+			struct lacp_port *lp;
+
+			lsc = (struct lacp_softc *)sc->sc_psc;
+
+			switch (ro->ro_opts) {
+			case LAGG_OPT_LACP_TXTEST:
+				lsc->lsc_debug.lsc_tx_test = 1;
+				break;
+			case -LAGG_OPT_LACP_TXTEST:
+				lsc->lsc_debug.lsc_tx_test = 0;
+				break;
+			case LAGG_OPT_LACP_RXTEST:
+				lsc->lsc_debug.lsc_rx_test = 1;
+				break;
+			case -LAGG_OPT_LACP_RXTEST:
+				lsc->lsc_debug.lsc_rx_test = 0;
+				break;
+			case LAGG_OPT_LACP_STRICT:
+				lsc->lsc_strict_mode = 1;
+				break;
+			case -LAGG_OPT_LACP_STRICT:
+				lsc->lsc_strict_mode = 0;
+				break;
+			case LAGG_OPT_LACP_TIMEOUT:
+				LACP_LOCK(lsc);
+        			LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
+                        		lp->lp_state |= LACP_STATE_TIMEOUT;
+				LACP_UNLOCK(lsc);
+				lsc->lsc_fast_timeout = 1;
+				break;
+			case -LAGG_OPT_LACP_TIMEOUT:
+				LACP_LOCK(lsc);
+        			LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
+                        		lp->lp_state &= ~LACP_STATE_TIMEOUT;
+				LACP_UNLOCK(lsc);
+				lsc->lsc_fast_timeout = 0;
+				break;
 			}
 		}
 		LAGG_WUNLOCK(sc);
-		error = EPROTONOSUPPORT;
 		break;
 	case SIOCGLAGGFLAGS:
-		rf->rf_flags = sc->sc_flags;
+		rf->rf_flags = 0;
+		LAGG_RLOCK(sc, &tracker);
+		if (sc->sc_flags & MBUF_HASHFLAG_L2)
+			rf->rf_flags |= LAGG_F_HASHL2;
+		if (sc->sc_flags & MBUF_HASHFLAG_L3)
+			rf->rf_flags |= LAGG_F_HASHL3;
+		if (sc->sc_flags & MBUF_HASHFLAG_L4)
+			rf->rf_flags |= LAGG_F_HASHL4;
+		LAGG_RUNLOCK(sc, &tracker);
 		break;
 	case SIOCSLAGGHASH:
 		error = priv_check(td, PRIV_NET_LAGG);
@@ -1057,8 +1420,13 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 			break;
 		}
 		LAGG_WLOCK(sc);
-		sc->sc_flags &= ~LAGG_F_HASHMASK;
-		sc->sc_flags |= rf->rf_flags & LAGG_F_HASHMASK;
+		sc->sc_flags = 0;
+		if (rf->rf_flags & LAGG_F_HASHL2)
+			sc->sc_flags |= MBUF_HASHFLAG_L2;
+		if (rf->rf_flags & LAGG_F_HASHL3)
+			sc->sc_flags |= MBUF_HASHFLAG_L3;
+		if (rf->rf_flags & LAGG_F_HASHL4)
+			sc->sc_flags |= MBUF_HASHFLAG_L4;
 		LAGG_WUNLOCK(sc);
 		break;
 	case SIOCGLAGGPORT:
@@ -1068,16 +1436,16 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 			break;
 		}
 
-		LAGG_RLOCK(sc);
+		LAGG_RLOCK(sc, &tracker);
 		if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
 		    lp->lp_softc != sc) {
 			error = ENOENT;
-			LAGG_RUNLOCK(sc);
+			LAGG_RUNLOCK(sc, &tracker);
 			break;
 		}
 
 		lagg_port2req(lp, rp);
-		LAGG_RUNLOCK(sc);
+		LAGG_RUNLOCK(sc, &tracker);
 		break;
 	case SIOCSLAGGPORT:
 		error = priv_check(td, PRIV_NET_LAGG);
@@ -1088,6 +1456,26 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 			error = EINVAL;
 			break;
 		}
+#ifdef INET6
+		/*
+		 * A laggport interface should not have inet6 address
+		 * because two interfaces with a valid link-local
+		 * scope zone must not be merged in any form.  This
+		 * restriction is needed to prevent violation of
+		 * link-local scope zone.  Attempts to add a laggport
+		 * interface which has inet6 addresses triggers
+		 * removal of all inet6 addresses on the member
+		 * interface.
+		 */
+		if (in6ifa_llaonifp(tpif)) {
+			in6_ifdetach(tpif);
+				if_printf(sc->sc_ifp,
+				    "IPv6 addresses on %s have been removed "
+				    "before adding it as a member to prevent "
+				    "IPv6 address scope violation.\n",
+				    tpif->if_xname);
+		}
+#endif
 		LAGG_WLOCK(sc);
 		error = lagg_port_create(sc, tpif);
 		LAGG_WUNLOCK(sc);
@@ -1186,39 +1574,39 @@ lagg_ether_cmdmulti(struct lagg_port *lp, int set)
 	struct ifnet *ifp = lp->lp_ifp;
 	struct ifnet *scifp = sc->sc_ifp;
 	struct lagg_mc *mc;
-	struct ifmultiaddr *ifma, *rifma = NULL;
-	struct sockaddr_dl sdl;
+	struct ifmultiaddr *ifma;
 	int error;
 
 	LAGG_WLOCK_ASSERT(sc);
 
-	bzero((char *)&sdl, sizeof(sdl));
-	sdl.sdl_len = sizeof(sdl);
-	sdl.sdl_family = AF_LINK;
-	sdl.sdl_type = IFT_ETHER;
-	sdl.sdl_alen = ETHER_ADDR_LEN;
-	sdl.sdl_index = ifp->if_index;
-
 	if (set) {
+		IF_ADDR_WLOCK(scifp);
 		TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
 			if (ifma->ifma_addr->sa_family != AF_LINK)
 				continue;
-			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
-			    LLADDR(&sdl), ETHER_ADDR_LEN);
-
-			error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
-			if (error)
-				return (error);
 			mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT);
-			if (mc == NULL)
+			if (mc == NULL) {
+				IF_ADDR_WUNLOCK(scifp);
 				return (ENOMEM);
-			mc->mc_ifma = rifma;
+			}
+			bcopy(ifma->ifma_addr, &mc->mc_addr,
+			    ifma->ifma_addr->sa_len);
+			mc->mc_addr.sdl_index = ifp->if_index;
+			mc->mc_ifma = NULL;
 			SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries);
 		}
+		IF_ADDR_WUNLOCK(scifp);
+		SLIST_FOREACH (mc, &lp->lp_mc_head, mc_entries) {
+			error = if_addmulti(ifp,
+			    (struct sockaddr *)&mc->mc_addr, &mc->mc_ifma);
+			if (error)
+				return (error);
+		}
 	} else {
 		while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) {
 			SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
-			if_delmulti_ifma(mc->mc_ifma);
+			if (mc->mc_ifma && !lp->lp_detaching)
+				if_delmulti_ifma(mc->mc_ifma);
 			free(mc, M_DEVBUF);
 		}
 	}
@@ -1228,7 +1616,7 @@ lagg_ether_cmdmulti(struct lagg_port *lp, int set)
 /* Handle a ref counted flag that should be set on the lagg port as well */
 static int
 lagg_setflag(struct lagg_port *lp, int flag, int status,
-	     int (*func)(struct ifnet *, int))
+    int (*func)(struct ifnet *, int))
 {
 	struct lagg_softc *sc = lp->lp_softc;
 	struct ifnet *scifp = sc->sc_ifp;
@@ -1283,30 +1671,27 @@ lagg_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 	int error, len, mcast;
+	struct rm_priotracker tracker;
 
 	len = m->m_pkthdr.len;
 	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
 
-	LAGG_RLOCK(sc);
+	LAGG_RLOCK(sc, &tracker);
 	/* We need a Tx algorithm and at least one port */
 	if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
-		LAGG_RUNLOCK(sc);
+		LAGG_RUNLOCK(sc, &tracker);
 		m_freem(m);
-		ifp->if_oerrors++;
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		return (ENXIO);
 	}
 
 	ETHER_BPF_MTAP(ifp, m);
 
-	error = (*sc->sc_start)(sc, m);
-	LAGG_RUNLOCK(sc);
+	error = lagg_proto_start(sc, m);
+	LAGG_RUNLOCK(sc, &tracker);
 
-	if (error == 0) {
-		ifp->if_opackets++;
-		ifp->if_omcasts += mcast;
-		ifp->if_obytes += len;
-	} else
-		ifp->if_oerrors++;
+	if (error != 0)
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 
 	return (error);
 }
@@ -1325,31 +1710,33 @@ lagg_input(struct ifnet *ifp, struct mbuf *m)
 	struct lagg_port *lp = ifp->if_lagg;
 	struct lagg_softc *sc = lp->lp_softc;
 	struct ifnet *scifp = sc->sc_ifp;
+	struct rm_priotracker tracker;
 
-	LAGG_RLOCK(sc);
+	LAGG_RLOCK(sc, &tracker);
 	if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 	    (lp->lp_flags & LAGG_PORT_DISABLED) ||
 	    sc->sc_proto == LAGG_PROTO_NONE) {
-		LAGG_RUNLOCK(sc);
+		LAGG_RUNLOCK(sc, &tracker);
 		m_freem(m);
 		return (NULL);
 	}
 
 	ETHER_BPF_MTAP(scifp, m);
 
-	m = (*sc->sc_input)(sc, lp, m);
+	if (lp->lp_detaching != 0) {
+		m_freem(m);
+		m = NULL;
+	} else
+		m = lagg_proto_input(sc, lp, m);
 
 	if (m != NULL) {
-		scifp->if_ipackets++;
-		scifp->if_ibytes += m->m_pkthdr.len;
-
 		if (scifp->if_flags & IFF_MONITOR) {
 			m_freem(m);
 			m = NULL;
 		}
 	}
 
-	LAGG_RUNLOCK(sc);
+	LAGG_RUNLOCK(sc, &tracker);
 	return (m);
 }
 
@@ -1370,16 +1757,17 @@ lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr)
 {
 	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 	struct lagg_port *lp;
+	struct rm_priotracker tracker;
 
 	imr->ifm_status = IFM_AVALID;
 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
 
-	LAGG_RLOCK(sc);
+	LAGG_RLOCK(sc, &tracker);
 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 		if (LAGG_PORTACTIVE(lp))
 			imr->ifm_status |= IFM_ACTIVE;
 	}
-	LAGG_RUNLOCK(sc);
+	LAGG_RUNLOCK(sc, &tracker);
 }
 
 static void
@@ -1391,7 +1779,7 @@ lagg_linkstate(struct lagg_softc *sc)
 
 	/* Our link is considered up if at least one of our ports is active */
 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
-		if (lp->lp_link_state == LINK_STATE_UP) {
+		if (lp->lp_ifp->if_link_state == LINK_STATE_UP) {
 			new_link = LINK_STATE_UP;
 			break;
 		}
@@ -1406,7 +1794,7 @@ lagg_linkstate(struct lagg_softc *sc)
 			break;
 		case LAGG_PROTO_ROUNDROBIN:
 		case LAGG_PROTO_LOADBALANCE:
-		case LAGG_PROTO_ETHERCHANNEL:
+		case LAGG_PROTO_BROADCAST:
 			speed = 0;
 			SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 				speed += lp->lp_ifp->if_baudrate;
@@ -1431,8 +1819,7 @@ lagg_port_state(struct ifnet *ifp, int state)
 
 	LAGG_WLOCK(sc);
 	lagg_linkstate(sc);
-	if (sc->sc_linkstate != NULL)
-		(*sc->sc_linkstate)(lp);
+	lagg_proto_linkstate(sc, lp);
 	LAGG_WUNLOCK(sc);
 }
 
@@ -1487,120 +1874,6 @@ found:
 	return (rval);
 }
 
-static const void *
-lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf)
-{
-	if (m->m_pkthdr.len < (off + len)) {
-		return (NULL);
-	} else if (m->m_len < (off + len)) {
-		m_copydata(m, off, len, buf);
-		return (buf);
-	}
-	return (mtod(m, char *) + off);
-}
-
-uint32_t
-lagg_hashmbuf(struct lagg_softc *sc, struct mbuf *m, uint32_t key)
-{
-	uint16_t etype;
-	uint32_t p = key;
-	int off;
-	struct ether_header *eh;
-	const struct ether_vlan_header *vlan;
-#ifdef INET
-	const struct ip *ip;
-	const uint32_t *ports;
-	int iphlen;
-#endif
-#ifdef INET6
-	const struct ip6_hdr *ip6;
-	uint32_t flow;
-#endif
-	union {
-#ifdef INET
-		struct ip ip;
-#endif
-#ifdef INET6
-		struct ip6_hdr ip6;
-#endif
-		struct ether_vlan_header vlan;
-		uint32_t port;
-	} buf;
-
-
-	off = sizeof(*eh);
-	if (m->m_len < off)
-		goto out;
-	eh = mtod(m, struct ether_header *);
-	etype = ntohs(eh->ether_type);
-	if (sc->sc_flags & LAGG_F_HASHL2) {
-		p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
-		p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
-	}
-
-	/* Special handling for encapsulating VLAN frames */
-	if ((m->m_flags & M_VLANTAG) && (sc->sc_flags & LAGG_F_HASHL2)) {
-		p = hash32_buf(&m->m_pkthdr.ether_vtag,
-		    sizeof(m->m_pkthdr.ether_vtag), p);
-	} else if (etype == ETHERTYPE_VLAN) {
-		vlan = lagg_gethdr(m, off,  sizeof(*vlan), &buf);
-		if (vlan == NULL)
-			goto out;
-
-		if (sc->sc_flags & LAGG_F_HASHL2)
-			p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
-		etype = ntohs(vlan->evl_proto);
-		off += sizeof(*vlan) - sizeof(*eh);
-	}
-
-	switch (etype) {
-#ifdef INET
-	case ETHERTYPE_IP:
-		ip = lagg_gethdr(m, off, sizeof(*ip), &buf);
-		if (ip == NULL)
-			goto out;
-
-		if (sc->sc_flags & LAGG_F_HASHL3) {
-			p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
-			p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
-		}
-		if (!(sc->sc_flags & LAGG_F_HASHL4))
-			break;
-		switch (ip->ip_p) {
-			case IPPROTO_TCP:
-			case IPPROTO_UDP:
-			case IPPROTO_SCTP:
-				iphlen = ip->ip_hl << 2;
-				if (iphlen < sizeof(*ip))
-					break;
-				off += iphlen;
-				ports = lagg_gethdr(m, off, sizeof(*ports), &buf);
-				if (ports == NULL)
-					break;
-				p = hash32_buf(ports, sizeof(*ports), p);
-				break;
-		}
-		break;
-#endif
-#ifdef INET6
-	case ETHERTYPE_IPV6:
-		if (!(sc->sc_flags & LAGG_F_HASHL3))
-			break;
-		ip6 = lagg_gethdr(m, off, sizeof(*ip6), &buf);
-		if (ip6 == NULL)
-			goto out;
-
-		p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
-		p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
-		flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
-		p = hash32_buf(&flow, sizeof(flow), p);	/* IPv6 flow label */
-		break;
-#endif
-	}
-out:
-	return (p);
-}
-
 int
 lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
 {
@@ -1611,24 +1884,12 @@ lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
 /*
  * Simple round robin aggregation
  */
-
-static int
+static void
 lagg_rr_attach(struct lagg_softc *sc)
 {
-	sc->sc_detach = lagg_rr_detach;
-	sc->sc_start = lagg_rr_start;
-	sc->sc_input = lagg_rr_input;
-	sc->sc_port_create = NULL;
 	sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
 	sc->sc_seq = 0;
-
-	return (0);
-}
-
-static int
-lagg_rr_detach(struct lagg_softc *sc)
-{
-	return (0);
+	sc->sc_bkt_count = sc->sc_bkt;
 }
 
 static int
@@ -1637,9 +1898,21 @@ lagg_rr_start(struct lagg_softc *sc, struct mbuf *m)
 	struct lagg_port *lp;
 	uint32_t p;
 
-	p = atomic_fetchadd_32(&sc->sc_seq, 1);
+	if (sc->sc_bkt_count == 0 && sc->sc_bkt > 0)
+		sc->sc_bkt_count = sc->sc_bkt;
+
+	if (sc->sc_bkt > 0) {
+		atomic_subtract_int(&sc->sc_bkt_count, 1);
+	if (atomic_cmpset_int(&sc->sc_bkt_count, 0, sc->sc_bkt))
+		p = atomic_fetchadd_32(&sc->sc_seq, 1);
+	else
+		p = sc->sc_seq; 
+	} else
+		p = atomic_fetchadd_32(&sc->sc_seq, 1);
+
 	p %= sc->sc_count;
 	lp = SLIST_FIRST(&sc->sc_ports);
+
 	while (p--)
 		lp = SLIST_NEXT(lp, lp_entries);
 
@@ -1668,27 +1941,69 @@ lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 }
 
 /*
- * Active failover
+ * Broadcast mode
  */
-
 static int
-lagg_fail_attach(struct lagg_softc *sc)
+lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m)
 {
-	sc->sc_detach = lagg_fail_detach;
-	sc->sc_start = lagg_fail_start;
-	sc->sc_input = lagg_fail_input;
-	sc->sc_port_create = NULL;
-	sc->sc_port_destroy = NULL;
+	int active_ports = 0;
+	int errors = 0;
+	int ret;
+	struct lagg_port *lp, *last = NULL;
+	struct mbuf *m0;
+
+	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
+		if (!LAGG_PORTACTIVE(lp))
+			continue;
+
+		active_ports++;
+
+		if (last != NULL) {
+			m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT);
+			if (m0 == NULL) {
+				ret = ENOBUFS;
+				errors++;
+				break;
+			}
+
+			ret = lagg_enqueue(last->lp_ifp, m0);
+			if (ret != 0)
+				errors++;
+		}
+		last = lp;
+	}
+	if (last == NULL) {
+		m_freem(m);
+		return (ENOENT);
+	}
+	if ((last = lagg_link_active(sc, last)) == NULL) {
+		m_freem(m);
+		return (ENETDOWN);
+	}
+
+	ret = lagg_enqueue(last->lp_ifp, m);
+	if (ret != 0)
+		errors++;
+
+	if (errors == 0)
+		return (ret);
 
 	return (0);
 }
 
-static int
-lagg_fail_detach(struct lagg_softc *sc)
+static struct mbuf*
+lagg_bcast_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 {
-	return (0);
+	struct ifnet *ifp = sc->sc_ifp;
+
+	/* Just pass in the packet to our lagg device */
+	m->m_pkthdr.rcvif = ifp;
+	return (m);
 }
 
+/*
+ * Active failover
+ */
 static int
 lagg_fail_start(struct lagg_softc *sc, struct mbuf *m)
 {
@@ -1710,7 +2025,7 @@ lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 	struct ifnet *ifp = sc->sc_ifp;
 	struct lagg_port *tmp_tp;
 
-	if (lp == sc->sc_primary || lagg_failover_rx_all) {
+	if (lp == sc->sc_primary || V_lagg_failover_rx_all) {
 		m->m_pkthdr.rcvif = ifp;
 		return (m);
 	}
@@ -1718,7 +2033,7 @@ lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 	if (!LAGG_PORTACTIVE(sc->sc_primary)) {
 		tmp_tp = lagg_link_active(sc, sc->sc_primary);
 		/*
-		 * If tmp_tp is null, we've recieved a packet when all
+		 * If tmp_tp is null, we've received a packet when all
 		 * our links are down. Weird, but process it anyways.
 		 */
 		if ((tmp_tp == NULL || tmp_tp == lp)) {
@@ -1734,40 +2049,32 @@ lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 /*
  * Loadbalancing
  */
-
-static int
+static void
 lagg_lb_attach(struct lagg_softc *sc)
 {
 	struct lagg_port *lp;
 	struct lagg_lb *lb;
 
-	if ((lb = (struct lagg_lb *)malloc(sizeof(struct lagg_lb),
-	    M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
-		return (ENOMEM);
+	lb = malloc(sizeof(struct lagg_lb), M_DEVBUF, M_WAITOK | M_ZERO);
 
-	sc->sc_detach = lagg_lb_detach;
-	sc->sc_start = lagg_lb_start;
-	sc->sc_input = lagg_lb_input;
-	sc->sc_port_create = lagg_lb_port_create;
-	sc->sc_port_destroy = lagg_lb_port_destroy;
 	sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
 
-	lb->lb_key = arc4random();
-	sc->sc_psc = (caddr_t)lb;
+	lb->lb_key = m_ether_tcpip_hash_init();
+	sc->sc_psc = lb;
 
 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 		lagg_lb_port_create(lp);
-
-	return (0);
 }
 
-static int
+static void
 lagg_lb_detach(struct lagg_softc *sc)
 {
-	struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
+	struct lagg_lb *lb;
+
+	lb = (struct lagg_lb *)sc->sc_psc;
+	LAGG_WUNLOCK(sc);
 	if (lb != NULL)
 		free(lb, M_DEVBUF);
-	return (0);
 }
 
 static int
@@ -1785,7 +2092,7 @@ lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
 			return (EINVAL);
 		if (sc->sc_ifflags & IFF_DEBUG)
 			printf("%s: port %s at index %d\n",
-			    sc->sc_ifname, lp_next->lp_ifname, i);
+			    sc->sc_ifname, lp_next->lp_ifp->if_xname, i);
 		lb->lb_ports[i++] = lp_next;
 	}
 
@@ -1813,10 +2120,11 @@ lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
 	struct lagg_port *lp = NULL;
 	uint32_t p = 0;
 
-	if (sc->use_flowid && (m->m_flags & M_FLOWID))
-		p = m->m_pkthdr.flowid;
+	if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
+	    M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+		p = m->m_pkthdr.flowid >> sc->flowid_shift;
 	else
-		p = lagg_hashmbuf(sc, m, lb->lb_key);
+		p = m_ether_tcpip_hash(sc->sc_flags, m, lb->lb_key);
 	p %= sc->sc_count;
 	lp = lb->lb_ports[p];
 
@@ -1847,50 +2155,30 @@ lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 /*
  * 802.3ad LACP
  */
-
-static int
+static void
 lagg_lacp_attach(struct lagg_softc *sc)
 {
 	struct lagg_port *lp;
-	int error;
-
-	sc->sc_detach = lagg_lacp_detach;
-	sc->sc_port_create = lacp_port_create;
-	sc->sc_port_destroy = lacp_port_destroy;
-	sc->sc_linkstate = lacp_linkstate;
-	sc->sc_start = lagg_lacp_start;
-	sc->sc_input = lagg_lacp_input;
-	sc->sc_init = lacp_init;
-	sc->sc_stop = lacp_stop;
-	sc->sc_lladdr = lagg_lacp_lladdr;
-	sc->sc_req = lacp_req;
-	sc->sc_portreq = lacp_portreq;
-
-	error = lacp_attach(sc);
-	if (error)
-		return (error);
 
+	lacp_attach(sc);
 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 		lacp_port_create(lp);
-
-	return (error);
 }
 
-static int
+static void
 lagg_lacp_detach(struct lagg_softc *sc)
 {
 	struct lagg_port *lp;
-	int error;
+	void *psc;
 
 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 		lacp_port_destroy(lp);
 
-	/* unlocking is safe here */
+	psc = sc->sc_psc;
+	sc->sc_psc = NULL;
 	LAGG_WUNLOCK(sc);
-	error = lacp_detach(sc);
-	LAGG_WLOCK(sc);
 
-	return (error);
+	lacp_detach(psc);
 }
 
 static void
@@ -1951,3 +2239,4 @@ lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 	m->m_pkthdr.rcvif = ifp;
 	return (m);
 }
+
diff --git a/freebsd/sys/net/if_lagg.h b/freebsd/sys/net/if_lagg.h
index 27ab46f2..334995e5 100644
--- a/freebsd/sys/net/if_lagg.h
+++ b/freebsd/sys/net/if_lagg.h
@@ -21,8 +21,6 @@
 #ifndef _NET_LAGG_H
 #define _NET_LAGG_H
 
-#include <sys/sysctl.h>
-
 /*
  * Global definitions
  */
@@ -49,26 +47,28 @@
 				  "\05DISTRIBUTING\06DISABLED"
 
 /* Supported lagg PROTOs */
-#define	LAGG_PROTO_NONE		0	/* no lagg protocol defined */
-#define	LAGG_PROTO_ROUNDROBIN	1	/* simple round robin */
-#define	LAGG_PROTO_FAILOVER	2	/* active failover */
-#define	LAGG_PROTO_LOADBALANCE	3	/* loadbalance */
-#define	LAGG_PROTO_LACP		4	/* 802.3ad lacp */
-#define	LAGG_PROTO_ETHERCHANNEL	5	/* Cisco FEC */
-#define	LAGG_PROTO_MAX		6
+typedef enum {
+	LAGG_PROTO_NONE = 0,	/* no lagg protocol defined */
+	LAGG_PROTO_ROUNDROBIN,	/* simple round robin */
+	LAGG_PROTO_FAILOVER,	/* active failover */
+	LAGG_PROTO_LOADBALANCE,	/* loadbalance */
+	LAGG_PROTO_LACP,	/* 802.3ad lacp */
+	LAGG_PROTO_BROADCAST,	/* broadcast */
+	LAGG_PROTO_MAX,
+} lagg_proto;
 
 struct lagg_protos {
 	const char		*lpr_name;
-	int			lpr_proto;
+	lagg_proto		lpr_proto;
 };
 
 #define	LAGG_PROTO_DEFAULT	LAGG_PROTO_FAILOVER
 #define LAGG_PROTOS	{						\
-	{ "failover",		LAGG_PROTO_FAILOVER },			\
-	{ "fec",		LAGG_PROTO_ETHERCHANNEL },		\
+	{ "failover",		LAGG_PROTO_FAILOVER },		\
 	{ "lacp",		LAGG_PROTO_LACP },			\
 	{ "loadbalance",	LAGG_PROTO_LOADBALANCE },		\
-	{ "roundrobin",		LAGG_PROTO_ROUNDROBIN },		\
+	{ "roundrobin",	LAGG_PROTO_ROUNDROBIN },		\
+	{ "broadcast",	LAGG_PROTO_BROADCAST },		\
 	{ "none",		LAGG_PROTO_NONE },			\
 	{ "default",		LAGG_PROTO_DEFAULT }			\
 }
@@ -136,16 +136,40 @@ struct lagg_reqflags {
 #define	SIOCGLAGGFLAGS		_IOWR('i', 145, struct lagg_reqflags)
 #define	SIOCSLAGGHASH		 _IOW('i', 146, struct lagg_reqflags)
 
+struct lagg_reqopts {
+	char			ro_ifname[IFNAMSIZ];	/* name of the lagg */
+
+	int			ro_opts;		/* Option bitmap */
+#define	LAGG_OPT_NONE			0x00
+#define	LAGG_OPT_USE_FLOWID		0x01		/* enable use of flowid */
+/* Pseudo flags which are used in ro_opts but not stored into sc_opts. */
+#define	LAGG_OPT_FLOWIDSHIFT		0x02		/* set flowid shift */
+#define	LAGG_OPT_FLOWIDSHIFT_MASK	0x1f		/* flowid is uint32_t */
+#define	LAGG_OPT_LACP_STRICT		0x10		/* LACP strict mode */
+#define	LAGG_OPT_LACP_TXTEST		0x20		/* LACP debug: txtest */
+#define	LAGG_OPT_LACP_RXTEST		0x40		/* LACP debug: rxtest */
+#define	LAGG_OPT_LACP_TIMEOUT		0x80		/* LACP timeout */
+	u_int			ro_count;		/* number of ports */
+	u_int			ro_active;		/* active port count */
+	u_int			ro_flapping;		/* number of flapping */
+	int			ro_flowid_shift;	/* shift the flowid */
+	uint32_t		ro_bkt;			/* packet bucket for roundrobin */
+};
+
+#define	SIOCGLAGGOPTS		_IOWR('i', 152, struct lagg_reqopts)
+#define	SIOCSLAGGOPTS		 _IOW('i', 153, struct lagg_reqopts)
+
+#define	LAGG_OPT_BITS		"\020\001USE_FLOWID\005LACP_STRICT" \
+				"\006LACP_TXTEST\007LACP_RXTEST"
+
 #ifdef _KERNEL
+
 /*
  * Internal kernel part
  */
 
-#define	lp_ifname		lp_ifp->if_xname	/* interface name */
-#define	lp_link_state		lp_ifp->if_link_state	/* link state */
-
 #define	LAGG_PORTACTIVE(_tp)	(					\
-	((_tp)->lp_link_state == LINK_STATE_UP) &&			\
+	((_tp)->lp_ifp->if_link_state == LINK_STATE_UP) &&		\
 	((_tp)->lp_ifp->if_flags & IFF_UP)				\
 )
 
@@ -173,25 +197,39 @@ struct lagg_lb {
 };
 
 struct lagg_mc {
+	struct sockaddr_dl	mc_addr;
 	struct ifmultiaddr      *mc_ifma;
 	SLIST_ENTRY(lagg_mc)	mc_entries;
 };
 
+typedef enum {
+	LAGG_LLQTYPE_PHYS = 0,	/* Task related to physical (underlying) port */
+	LAGG_LLQTYPE_VIRT,	/* Task related to lagg interface itself */
+} lagg_llqtype;
+
 /* List of interfaces to have the MAC address modified */
 struct lagg_llq {
 	struct ifnet		*llq_ifp;
 	uint8_t			llq_lladdr[ETHER_ADDR_LEN];
+	lagg_llqtype		llq_type;
 	SLIST_ENTRY(lagg_llq)	llq_entries;
 };
 
+struct lagg_counters {
+	uint64_t	val[IFCOUNTERS];
+};
+
 struct lagg_softc {
 	struct ifnet			*sc_ifp;	/* virtual interface */
-	struct rwlock			sc_mtx;
+	struct rmlock			sc_mtx;
 	int				sc_proto;	/* lagg protocol */
 	u_int				sc_count;	/* number of ports */
+	u_int				sc_active;	/* active port count */
+	u_int				sc_flapping;	/* number of flapping
+							 * events */
 	struct lagg_port		*sc_primary;	/* primary port */
 	struct ifmedia			sc_media;	/* media config */
-	caddr_t				sc_psc;		/* protocol data */
+	void				*sc_psc;	/* protocol data */
 	uint32_t			sc_seq;		/* sequence counter */
 	uint32_t			sc_flags;
 
@@ -201,26 +239,14 @@ struct lagg_softc {
 	struct task			sc_lladdr_task;
 	SLIST_HEAD(__llqhd, lagg_llq)	sc_llq_head;	/* interfaces to program
 							   the lladdr on */
-
-	/* lagg protocol callbacks */
-	int	(*sc_detach)(struct lagg_softc *);
-	int	(*sc_start)(struct lagg_softc *, struct mbuf *);
-	struct mbuf *(*sc_input)(struct lagg_softc *, struct lagg_port *,
-		    struct mbuf *);
-	int	(*sc_port_create)(struct lagg_port *);
-	void	(*sc_port_destroy)(struct lagg_port *);
-	void	(*sc_linkstate)(struct lagg_port *);
-	void	(*sc_init)(struct lagg_softc *);
-	void	(*sc_stop)(struct lagg_softc *);
-	void	(*sc_lladdr)(struct lagg_softc *);
-	void	(*sc_req)(struct lagg_softc *, caddr_t);
-	void	(*sc_portreq)(struct lagg_port *, caddr_t);
-#if __FreeBSD_version >= 800000
 	eventhandler_tag vlan_attach;
 	eventhandler_tag vlan_detach;
-#endif
-	struct sysctl_ctx_list		ctx;		/* sysctl variables */
-	int				use_flowid;	/* use M_FLOWID */
+	struct callout			sc_callout;
+	u_int				sc_opts;
+	int				flowid_shift;	/* shift the flowid */
+	uint32_t			sc_bkt;		/* packates bucket for roundrobin */
+	uint32_t			sc_bkt_count;	/* packates bucket count for roundrobin */
+	struct lagg_counters		detached_counters; /* detached ports sum */
 };
 
 struct lagg_port {
@@ -233,33 +259,36 @@ struct lagg_port {
 	uint32_t			lp_flags;	/* port flags */
 	int				lp_ifflags;	/* saved ifp flags */
 	void				*lh_cookie;	/* if state hook */
-	caddr_t				lp_psc;		/* protocol data */
+	void				*lp_psc;	/* protocol data */
 	int				lp_detaching;	/* ifnet is detaching */
 
 	SLIST_HEAD(__mclhd, lagg_mc)	lp_mc_head;	/* multicast addresses */
 
 	/* Redirected callbacks */
 	int	(*lp_ioctl)(struct ifnet *, u_long, caddr_t);
-	int	(*lp_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
-		     struct route *);
+	int	(*lp_output)(struct ifnet *, struct mbuf *,
+		     const struct sockaddr *, struct route *);
+	struct lagg_counters		port_counters;	/* ifp counters copy */
 
 	SLIST_ENTRY(lagg_port)		lp_entries;
 };
 
-#define	LAGG_LOCK_INIT(_sc)	rw_init(&(_sc)->sc_mtx, "if_lagg rwlock")
-#define	LAGG_LOCK_DESTROY(_sc)	rw_destroy(&(_sc)->sc_mtx)
-#define	LAGG_RLOCK(_sc)		rw_rlock(&(_sc)->sc_mtx)
-#define	LAGG_WLOCK(_sc)		rw_wlock(&(_sc)->sc_mtx)
-#define	LAGG_RUNLOCK(_sc)	rw_runlock(&(_sc)->sc_mtx)
-#define	LAGG_WUNLOCK(_sc)	rw_wunlock(&(_sc)->sc_mtx)
-#define	LAGG_RLOCK_ASSERT(_sc)	rw_assert(&(_sc)->sc_mtx, RA_RLOCKED)
-#define	LAGG_WLOCK_ASSERT(_sc)	rw_assert(&(_sc)->sc_mtx, RA_WLOCKED)
+#define	LAGG_LOCK_INIT(_sc)	rm_init(&(_sc)->sc_mtx, "if_lagg rmlock")
+#define	LAGG_LOCK_DESTROY(_sc)	rm_destroy(&(_sc)->sc_mtx)
+#define	LAGG_RLOCK(_sc, _p)	rm_rlock(&(_sc)->sc_mtx, (_p))
+#define	LAGG_WLOCK(_sc)		rm_wlock(&(_sc)->sc_mtx)
+#define	LAGG_RUNLOCK(_sc, _p)	rm_runlock(&(_sc)->sc_mtx, (_p))
+#define	LAGG_WUNLOCK(_sc)	rm_wunlock(&(_sc)->sc_mtx)
+#define	LAGG_RLOCK_ASSERT(_sc)	rm_assert(&(_sc)->sc_mtx, RA_RLOCKED)
+#define	LAGG_WLOCK_ASSERT(_sc)	rm_assert(&(_sc)->sc_mtx, RA_WLOCKED)
+#define	LAGG_UNLOCK_ASSERT(_sc)	rm_assert(&(_sc)->sc_mtx, RA_UNLOCKED)
 
 extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
 extern void	(*lagg_linkstate_p)(struct ifnet *, int );
 
 int		lagg_enqueue(struct ifnet *, struct mbuf *);
-uint32_t	lagg_hashmbuf(struct lagg_softc *, struct mbuf *, uint32_t);
+
+SYSCTL_DECL(_net_link_lagg);
 
 #endif /* _KERNEL */
 
diff --git a/freebsd/sys/net/if_llatbl.c b/freebsd/sys/net/if_llatbl.c
index 55b816a7..20c0b9d2 100644
--- a/freebsd/sys/net/if_llatbl.c
+++ b/freebsd/sys/net/if_llatbl.c
@@ -64,17 +64,43 @@ __FBSDID("$FreeBSD$");
 
 MALLOC_DEFINE(M_LLTABLE, "lltable", "link level address tables");
 
-static VNET_DEFINE(SLIST_HEAD(, lltable), lltables);
+static VNET_DEFINE(SLIST_HEAD(, lltable), lltables) =
+    SLIST_HEAD_INITIALIZER(lltables);
 #define	V_lltables	VNET(lltables)
 
-extern void arprequest(struct ifnet *, struct in_addr *, struct in_addr *,
-	u_char *);
-
-static void vnet_lltable_init(void);
-
 struct rwlock lltable_rwlock;
 RW_SYSINIT(lltable_rwlock, &lltable_rwlock, "lltable_rwlock");
 
+static void lltable_unlink(struct lltable *llt);
+static void llentries_unlink(struct lltable *llt, struct llentries *head);
+
+static void htable_unlink_entry(struct llentry *lle);
+static void htable_link_entry(struct lltable *llt, struct llentry *lle);
+static int htable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f,
+    void *farg);
+
+/*
+ * Dump lle state for a specific address family.
+ */
+static int
+lltable_dump_af(struct lltable *llt, struct sysctl_req *wr)
+{
+	int error;
+
+	LLTABLE_LOCK_ASSERT();
+
+	if (llt->llt_ifp->if_flags & IFF_LOOPBACK)
+		return (0);
+	error = 0;
+
+	IF_AFDATA_RLOCK(llt->llt_ifp);
+	error = lltable_foreach_lle(llt,
+	    (llt_foreach_cb_t *)llt->llt_dump_entry, wr);
+	IF_AFDATA_RUNLOCK(llt->llt_ifp);
+
+	return (error);
+}
+
 /*
  * Dump arp state for a specific address family.
  */
@@ -87,7 +113,7 @@ lltable_sysctl_dumparp(int af, struct sysctl_req *wr)
 	LLTABLE_RLOCK();
 	SLIST_FOREACH(llt, &V_lltables, llt_link) {
 		if (llt->llt_af == af) {
-			error = llt->llt_dump(llt, wr);
+			error = lltable_dump_af(llt, wr);
 			if (error != 0)
 				goto done;
 		}
@@ -98,25 +124,144 @@ done:
 }
 
 /*
- * Deletes an address from the address table.
- * This function is called by the timer functions
- * such as arptimer() and nd6_llinfo_timer(), and
- * the caller does the locking.
+ * Common function helpers for chained hash table.
+ */
+
+/*
+ * Runs specified callback for each entry in @llt.
+ * Caller does the locking.
+ *
+ */
+static int
+htable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, void *farg)
+{
+	struct llentry *lle, *next;
+	int i, error;
+
+	error = 0;
+
+	for (i = 0; i < llt->llt_hsize; i++) {
+		LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
+			error = f(llt, lle, farg);
+			if (error != 0)
+				break;
+		}
+	}
+
+	return (error);
+}
+
+static void
+htable_link_entry(struct lltable *llt, struct llentry *lle)
+{
+	struct llentries *lleh;
+	uint32_t hashidx;
+
+	if ((lle->la_flags & LLE_LINKED) != 0)
+		return;
+
+	IF_AFDATA_WLOCK_ASSERT(llt->llt_ifp);
+
+	hashidx = llt->llt_hash(lle, llt->llt_hsize);
+	lleh = &llt->lle_head[hashidx];
+
+	lle->lle_tbl  = llt;
+	lle->lle_head = lleh;
+	lle->la_flags |= LLE_LINKED;
+	LIST_INSERT_HEAD(lleh, lle, lle_next);
+}
+
+static void
+htable_unlink_entry(struct llentry *lle)
+{
+
+	if ((lle->la_flags & LLE_LINKED) != 0) {
+		IF_AFDATA_WLOCK_ASSERT(lle->lle_tbl->llt_ifp);
+		LIST_REMOVE(lle, lle_next);
+		lle->la_flags &= ~(LLE_VALID | LLE_LINKED);
+#if 0
+		lle->lle_tbl = NULL;
+		lle->lle_head = NULL;
+#endif
+	}
+}
+
+struct prefix_match_data {
+	const struct sockaddr *addr;
+	const struct sockaddr *mask;
+	struct llentries dchain;
+	u_int flags;
+};
+
+static int
+htable_prefix_free_cb(struct lltable *llt, struct llentry *lle, void *farg)
+{
+	struct prefix_match_data *pmd;
+
+	pmd = (struct prefix_match_data *)farg;
+
+	if (llt->llt_match_prefix(pmd->addr, pmd->mask, pmd->flags, lle)) {
+		LLE_WLOCK(lle);
+		LIST_INSERT_HEAD(&pmd->dchain, lle, lle_chain);
+	}
+
+	return (0);
+}
+
+static void
+htable_prefix_free(struct lltable *llt, const struct sockaddr *addr,
+    const struct sockaddr *mask, u_int flags)
+{
+	struct llentry *lle, *next;
+	struct prefix_match_data pmd;
+
+	bzero(&pmd, sizeof(pmd));
+	pmd.addr = addr;
+	pmd.mask = mask;
+	pmd.flags = flags;
+	LIST_INIT(&pmd.dchain);
+
+	IF_AFDATA_WLOCK(llt->llt_ifp);
+	/* Push matching lles to chain */
+	lltable_foreach_lle(llt, htable_prefix_free_cb, &pmd);
+
+	llentries_unlink(llt, &pmd.dchain);
+	IF_AFDATA_WUNLOCK(llt->llt_ifp);
+
+	LIST_FOREACH_SAFE(lle, &pmd.dchain, lle_chain, next)
+		lltable_free_entry(llt, lle);
+}
+
+static void
+htable_free_tbl(struct lltable *llt)
+{
+
+	free(llt->lle_head, M_LLTABLE);
+	free(llt, M_LLTABLE);
+}
+
+static void
+llentries_unlink(struct lltable *llt, struct llentries *head)
+{
+	struct llentry *lle, *next;
+
+	LIST_FOREACH_SAFE(lle, head, lle_chain, next)
+		llt->llt_unlink_entry(lle);
+}
+
+/*
+ * Helper function used to drop all mbufs in hold queue.
  *
  * Returns the number of held packets, if any, that were dropped.
  */
 size_t
-llentry_free(struct llentry *lle)
+lltable_drop_entry_queue(struct llentry *lle)
 {
 	size_t pkts_dropped;
 	struct mbuf *next;
 
-	IF_AFDATA_WLOCK_ASSERT(lle->lle_tbl->llt_ifp);
 	LLE_WLOCK_ASSERT(lle);
 
-	LIST_REMOVE(lle, lle_next);
-	lle->la_flags &= ~(LLE_VALID | LLE_LINKED);
-
 	pkts_dropped = 0;
 	while ((lle->la_numheld > 0) && (lle->la_hold != NULL)) {
 		next = lle->la_hold->m_nextpkt;
@@ -130,6 +275,162 @@ llentry_free(struct llentry *lle)
 		("%s: la_numheld %d > 0, pkts_droped %zd", __func__,
 		 lle->la_numheld, pkts_dropped));
 
+	return (pkts_dropped);
+}
+
+void
+lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
+    const char *linkhdr, size_t linkhdrsize, int lladdr_off)
+{
+
+	memcpy(lle->r_linkdata, linkhdr, linkhdrsize);
+	lle->r_hdrlen = linkhdrsize;
+	lle->ll_addr = &lle->r_linkdata[lladdr_off];
+	lle->la_flags |= LLE_VALID;
+	lle->r_flags |= RLLE_VALID;
+}
+
+/*
+ * Tries to update @lle link-level address.
+ * Since update requires AFDATA WLOCK, function
+ * drops @lle lock, acquires AFDATA lock and then acquires
+ * @lle lock to maintain lock order.
+ *
+ * Returns 1 on success.
+ */
+int
+lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
+    const char *linkhdr, size_t linkhdrsize, int lladdr_off)
+{
+
+	/* Perform real LLE update */
+	/* use afdata WLOCK to update fields */
+	LLE_WLOCK_ASSERT(lle);
+	LLE_ADDREF(lle);
+	LLE_WUNLOCK(lle);
+	IF_AFDATA_WLOCK(ifp);
+	LLE_WLOCK(lle);
+
+	/*
+	 * Since we droppped LLE lock, other thread might have deleted
+	 * this lle. Check and return
+	 */
+	if ((lle->la_flags & LLE_DELETED) != 0) {
+		IF_AFDATA_WUNLOCK(ifp);
+		LLE_FREE_LOCKED(lle);
+		return (0);
+	}
+
+	/* Update data */
+	lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize, lladdr_off);
+
+	IF_AFDATA_WUNLOCK(ifp);
+
+	LLE_REMREF(lle);
+
+	return (1);
+}
+
+ /*
+ * Helper function used to pre-compute full/partial link-layer
+ * header data suitable for feeding into if_output().
+ */
+int
+lltable_calc_llheader(struct ifnet *ifp, int family, char *lladdr,
+    char *buf, size_t *bufsize, int *lladdr_off)
+{
+	struct if_encap_req ereq;
+	int error;
+
+	bzero(buf, *bufsize);
+	bzero(&ereq, sizeof(ereq));
+	ereq.buf = buf;
+	ereq.bufsize = *bufsize;
+	ereq.rtype = IFENCAP_LL;
+	ereq.family = family;
+	ereq.lladdr = lladdr;
+	ereq.lladdr_len = ifp->if_addrlen;
+	error = ifp->if_requestencap(ifp, &ereq);
+	if (error == 0) {
+		*bufsize = ereq.bufsize;
+		*lladdr_off = ereq.lladdr_off;
+	}
+
+	return (error);
+}
+
+/*
+ * Update link-layer header for given @lle after
+ * interface lladdr was changed.
+ */
+static int
+llentry_update_ifaddr(struct lltable *llt, struct llentry *lle, void *farg)
+{
+	struct ifnet *ifp;
+	u_char linkhdr[LLE_MAX_LINKHDR];
+	size_t linkhdrsize;
+	u_char *lladdr;
+	int lladdr_off;
+
+	ifp = (struct ifnet *)farg;
+
+	lladdr = lle->ll_addr;
+
+	LLE_WLOCK(lle);
+	if ((lle->la_flags & LLE_VALID) == 0) {
+		LLE_WUNLOCK(lle);
+		return (0);
+	}
+
+	if ((lle->la_flags & LLE_IFADDR) != 0)
+		lladdr = IF_LLADDR(ifp);
+
+	linkhdrsize = sizeof(linkhdr);
+	lltable_calc_llheader(ifp, llt->llt_af, lladdr, linkhdr, &linkhdrsize,
+	    &lladdr_off);
+	memcpy(lle->r_linkdata, linkhdr, linkhdrsize);
+	LLE_WUNLOCK(lle);
+
+	return (0);
+}
+
+/*
+ * Update all calculated headers for given @llt
+ */
+void
+lltable_update_ifaddr(struct lltable *llt)
+{
+
+	if (llt->llt_ifp->if_flags & IFF_LOOPBACK)
+		return;
+
+	IF_AFDATA_WLOCK(llt->llt_ifp);
+	lltable_foreach_lle(llt, llentry_update_ifaddr, llt->llt_ifp);
+	IF_AFDATA_WUNLOCK(llt->llt_ifp);
+}
+
+/*
+ *
+ * Performs generic cleanup routines and frees lle.
+ *
+ * Called for non-linked entries, with callouts and
+ * other AF-specific cleanups performed.
+ *
+ * @lle must be passed WLOCK'ed
+ *
+ * Returns the number of held packets, if any, that were dropped.
+ */
+size_t
+llentry_free(struct llentry *lle)
+{
+	size_t pkts_dropped;
+
+	LLE_WLOCK_ASSERT(lle);
+
+	KASSERT((lle->la_flags & LLE_LINKED) == 0, ("freeing linked lle"));
+
+	pkts_dropped = lltable_drop_entry_queue(lle);
+
 	LLE_FREE_LOCKED(lle);
 
 	return (pkts_dropped);
@@ -144,22 +445,35 @@ struct llentry *
 llentry_alloc(struct ifnet *ifp, struct lltable *lt,
     struct sockaddr_storage *dst)
 {
-	struct llentry *la;
+	struct llentry *la, *la_tmp;
 
 	IF_AFDATA_RLOCK(ifp);
 	la = lla_lookup(lt, LLE_EXCLUSIVE, (struct sockaddr *)dst);
 	IF_AFDATA_RUNLOCK(ifp);
-	if ((la == NULL) &&
-	    (ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0) {
-		IF_AFDATA_WLOCK(ifp);
-		la = lla_lookup(lt, (LLE_CREATE | LLE_EXCLUSIVE),
-		    (struct sockaddr *)dst);
-		IF_AFDATA_WUNLOCK(ifp);
-	}
 
 	if (la != NULL) {
 		LLE_ADDREF(la);
 		LLE_WUNLOCK(la);
+		return (la);
+	}
+
+	if ((ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0) {
+		la = lltable_alloc_entry(lt, 0, (struct sockaddr *)dst);
+		if (la == NULL)
+			return (NULL);
+		IF_AFDATA_WLOCK(ifp);
+		LLE_WLOCK(la);
+		/* Prefer any existing LLE over newly-created one */
+		la_tmp = lla_lookup(lt, LLE_EXCLUSIVE, (struct sockaddr *)dst);
+		if (la_tmp == NULL)
+			lltable_link_entry(lt, la);
+		IF_AFDATA_WUNLOCK(ifp);
+		if (la_tmp != NULL) {
+			lltable_free_entry(lt, la);
+			la = la_tmp;
+		}
+		LLE_ADDREF(la);
+		LLE_WUNLOCK(la);
 	}
 
 	return (la);
@@ -168,30 +482,47 @@ llentry_alloc(struct ifnet *ifp, struct lltable *lt,
 /*
  * Free all entries from given table and free itself.
  */
+
+static int
+lltable_free_cb(struct lltable *llt, struct llentry *lle, void *farg)
+{
+	struct llentries *dchain;
+
+	dchain = (struct llentries *)farg;
+
+	LLE_WLOCK(lle);
+	LIST_INSERT_HEAD(dchain, lle, lle_chain);
+
+	return (0);
+}
+
+/*
+ * Free all entries from given table and free itself.
+ */
 void
 lltable_free(struct lltable *llt)
 {
 	struct llentry *lle, *next;
-	int i;
+	struct llentries dchain;
 
 	KASSERT(llt != NULL, ("%s: llt is NULL", __func__));
 
-	LLTABLE_WLOCK();
-	SLIST_REMOVE(&V_lltables, llt, lltable, llt_link);
-	LLTABLE_WUNLOCK();
+	lltable_unlink(llt);
 
+	LIST_INIT(&dchain);
 	IF_AFDATA_WLOCK(llt->llt_ifp);
-	for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
-		LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
-			LLE_WLOCK(lle);
-			if (callout_stop(&lle->la_timer))
-				LLE_REMREF(lle);
-			llentry_free(lle);
-		}
-	}
+	/* Push all lles to @dchain */
+	lltable_foreach_lle(llt, lltable_free_cb, &dchain);
+	llentries_unlink(llt, &dchain);
 	IF_AFDATA_WUNLOCK(llt->llt_ifp);
 
-	free(llt, M_LLTABLE);
+	LIST_FOREACH_SAFE(lle, &dchain, lle_chain, next) {
+		if (callout_stop(&lle->lle_timer) > 0)
+			LLE_REMREF(lle);
+		llentry_free(lle);
+	}
+
+	llt->llt_free_tbl(llt);
 }
 
 #if 0
@@ -207,7 +538,7 @@ lltable_drain(int af)
 		if (llt->llt_af != af)
 			continue;
 
-		for (i=0; i < LLTBL_HASHTBL_SIZE; i++) {
+		for (i=0; i < llt->llt_hsize; i++) {
 			LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
 				LLE_WLOCK(lle);
 				if (lle->la_hold) {
@@ -222,8 +553,42 @@ lltable_drain(int af)
 }
 #endif
 
+/*
+ * Deletes an address from given lltable.
+ * Used for userland interaction to remove
+ * individual entries. Skips entries added by OS.
+ */
+int
+lltable_delete_addr(struct lltable *llt, u_int flags,
+    const struct sockaddr *l3addr)
+{
+	struct llentry *lle;
+	struct ifnet *ifp;
+
+	ifp = llt->llt_ifp;
+	IF_AFDATA_WLOCK(ifp);
+	lle = lla_lookup(llt, LLE_EXCLUSIVE, l3addr);
+
+	if (lle == NULL) {
+		IF_AFDATA_WUNLOCK(ifp);
+		return (ENOENT);
+	}
+	if ((lle->la_flags & LLE_IFADDR) != 0 && (flags & LLE_IFADDR) == 0) {
+		IF_AFDATA_WUNLOCK(ifp);
+		LLE_WUNLOCK(lle);
+		return (EPERM);
+	}
+
+	lltable_unlink_entry(llt, lle);
+	IF_AFDATA_WUNLOCK(ifp);
+
+	llt->llt_delete_entry(llt, lle);
+
+	return (0);
+}
+
 void
-lltable_prefix_free(int af, struct sockaddr *prefix, struct sockaddr *mask,
+lltable_prefix_free(int af, struct sockaddr *addr, struct sockaddr *mask,
     u_int flags)
 {
 	struct lltable *llt;
@@ -233,38 +598,122 @@ lltable_prefix_free(int af, struct sockaddr *prefix, struct sockaddr *mask,
 		if (llt->llt_af != af)
 			continue;
 
-		llt->llt_prefix_free(llt, prefix, mask, flags);
+		llt->llt_prefix_free(llt, addr, mask, flags);
 	}
 	LLTABLE_RUNLOCK();
 }
 
-
-
-/*
- * Create a new lltable.
- */
 struct lltable *
-lltable_init(struct ifnet *ifp, int af)
+lltable_allocate_htbl(uint32_t hsize)
 {
 	struct lltable *llt;
-	register int i;
+	int i;
 
-	llt = malloc(sizeof(struct lltable), M_LLTABLE, M_WAITOK);
+	llt = malloc(sizeof(struct lltable), M_LLTABLE, M_WAITOK | M_ZERO);
+	llt->llt_hsize = hsize;
+	llt->lle_head = malloc(sizeof(struct llentries) * hsize,
+	    M_LLTABLE, M_WAITOK | M_ZERO);
 
-	llt->llt_af = af;
-	llt->llt_ifp = ifp;
-	for (i = 0; i < LLTBL_HASHTBL_SIZE; i++)
+	for (i = 0; i < llt->llt_hsize; i++)
 		LIST_INIT(&llt->lle_head[i]);
 
+	/* Set some default callbacks */
+	llt->llt_link_entry = htable_link_entry;
+	llt->llt_unlink_entry = htable_unlink_entry;
+	llt->llt_prefix_free = htable_prefix_free;
+	llt->llt_foreach_entry = htable_foreach_lle;
+	llt->llt_free_tbl = htable_free_tbl;
+
+	return (llt);
+}
+
+/*
+ * Links lltable to global llt list.
+ */
+void
+lltable_link(struct lltable *llt)
+{
+
 	LLTABLE_WLOCK();
 	SLIST_INSERT_HEAD(&V_lltables, llt, llt_link);
 	LLTABLE_WUNLOCK();
+}
 
-	return (llt);
+static void
+lltable_unlink(struct lltable *llt)
+{
+
+	LLTABLE_WLOCK();
+	SLIST_REMOVE(&V_lltables, llt, lltable, llt_link);
+	LLTABLE_WUNLOCK();
+
+}
+
+/*
+ * External methods used by lltable consumers
+ */
+
+int
+lltable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, void *farg)
+{
+
+	return (llt->llt_foreach_entry(llt, f, farg));
+}
+
+struct llentry *
+lltable_alloc_entry(struct lltable *llt, u_int flags,
+    const struct sockaddr *l3addr)
+{
+
+	return (llt->llt_alloc_entry(llt, flags, l3addr));
+}
+
+void
+lltable_free_entry(struct lltable *llt, struct llentry *lle)
+{
+
+	llt->llt_free_entry(llt, lle);
+}
+
+void
+lltable_link_entry(struct lltable *llt, struct llentry *lle)
+{
+
+	llt->llt_link_entry(llt, lle);
+}
+
+void
+lltable_unlink_entry(struct lltable *llt, struct llentry *lle)
+{
+
+	llt->llt_unlink_entry(lle);
+}
+
+void
+lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa)
+{
+	struct lltable *llt;
+
+	llt = lle->lle_tbl;
+	llt->llt_fill_sa_entry(lle, sa);
+}
+
+struct ifnet *
+lltable_get_ifp(const struct lltable *llt)
+{
+
+	return (llt->llt_ifp);
+}
+
+int
+lltable_get_af(const struct lltable *llt)
+{
+
+	return (llt->llt_af);
 }
 
 /*
- * Called in route_output when adding/deleting a route to an interface.
+ * Called in route_output when rtm_flags contains RTF_LLDATA.
  */
 int
 lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
@@ -274,14 +723,16 @@ lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
 	struct sockaddr *dst = (struct sockaddr *)info->rti_info[RTAX_DST];
 	struct ifnet *ifp;
 	struct lltable *llt;
-	struct llentry *lle;
-	u_int laflags = 0, flags = 0;
-	int error = 0;
+	struct llentry *lle, *lle_tmp;
+	uint8_t linkhdr[LLE_MAX_LINKHDR];
+	size_t linkhdrsize;
+	int lladdr_off;
+	u_int laflags = 0;
+	int error;
+
+	KASSERT(dl != NULL && dl->sdl_family == AF_LINK,
+	    ("%s: invalid dl\n", __func__));
 
-	if (dl == NULL || dl->sdl_family != AF_LINK) {
-		log(LOG_INFO, "%s: invalid dl\n", __func__);
-		return EINVAL;
-	}
 	ifp = ifnet_byindex(dl->sdl_index);
 	if (ifp == NULL) {
 		log(LOG_INFO, "%s: invalid ifp (sdl_index %d)\n",
@@ -289,44 +740,6 @@ lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
 		return EINVAL;
 	}
 
-	switch (rtm->rtm_type) {
-	case RTM_ADD:
-		if (rtm->rtm_flags & RTF_ANNOUNCE) {
-			flags |= LLE_PUB;
-#ifdef INET
-			if (dst->sa_family == AF_INET &&
-			    ((struct sockaddr_inarp *)dst)->sin_other != 0) {
-				struct rtentry *rt;
-				((struct sockaddr_inarp *)dst)->sin_other = 0;
-				rt = rtalloc1(dst, 0, 0);
-				if (rt == NULL || !(rt->rt_flags & RTF_HOST)) {
-					log(LOG_INFO, "%s: RTM_ADD publish "
-					    "(proxy only) is invalid\n",
-					    __func__);
-					if (rt)
-						RTFREE_LOCKED(rt);
-					return EINVAL;
-				}
-				RTFREE_LOCKED(rt);
-
-				flags |= LLE_PROXY;
-			}
-#endif
-		}
-		flags |= LLE_CREATE;
-		break;
-
-	case RTM_DELETE:
-		flags |= LLE_DELETE;
-		break;
-
-	case RTM_CHANGE:
-		break;
-
-	default:
-		return EINVAL; /* XXX not implemented yet */
-	}
-
 	/* XXX linked list may be too expensive */
 	LLTABLE_RLOCK();
 	SLIST_FOREACH(llt, &V_lltables, llt_link) {
@@ -337,73 +750,82 @@ lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
 	LLTABLE_RUNLOCK();
 	KASSERT(llt != NULL, ("Yep, ugly hacks are bad\n"));
 
-	if (flags & LLE_CREATE)
-		flags |= LLE_EXCLUSIVE;
-
-	IF_AFDATA_LOCK(ifp);
-	lle = lla_lookup(llt, flags, dst);
-	IF_AFDATA_UNLOCK(ifp);
-	if (LLE_IS_VALID(lle)) {
-		if (flags & LLE_CREATE) {
-			/*
-			 * If we delay the delete, then a subsequent
-			 * "arp add" should look up this entry, reset the
-			 * LLE_DELETED flag, and reset the expiration timer
-			 */
-			bcopy(LLADDR(dl), &lle->ll_addr, ifp->if_addrlen);
-			lle->la_flags |= (flags & (LLE_PUB | LLE_PROXY));
-			lle->la_flags |= LLE_VALID;
-			lle->la_flags &= ~LLE_DELETED;
-#ifdef INET6
-			/*
-			 * ND6
-			 */
-			if (dst->sa_family == AF_INET6)
-				lle->ln_state = ND6_LLINFO_REACHABLE;
-#endif
-			/*
-			 * NB: arp and ndp always set (RTF_STATIC | RTF_HOST)
-			 */
-
-			if (rtm->rtm_rmx.rmx_expire == 0) {
-				lle->la_flags |= LLE_STATIC;
-				lle->la_expire = 0;
-			} else
-				lle->la_expire = rtm->rtm_rmx.rmx_expire;
-			laflags = lle->la_flags;
-			LLE_WUNLOCK(lle);
-#ifdef INET
-			/*  gratuitous ARP */
-			if ((laflags & LLE_PUB) && dst->sa_family == AF_INET) {
-				arprequest(ifp,
-				    &((struct sockaddr_in *)dst)->sin_addr,
-				    &((struct sockaddr_in *)dst)->sin_addr,
-				    ((laflags & LLE_PROXY) ?
-					(u_char *)IF_LLADDR(ifp) :
-					(u_char *)LLADDR(dl)));
+	error = 0;
+
+	switch (rtm->rtm_type) {
+	case RTM_ADD:
+		/* Add static LLE */
+		laflags = 0;
+		if (rtm->rtm_rmx.rmx_expire == 0)
+			laflags = LLE_STATIC;
+		lle = lltable_alloc_entry(llt, laflags, dst);
+		if (lle == NULL)
+			return (ENOMEM);
+
+		linkhdrsize = sizeof(linkhdr);
+		if (lltable_calc_llheader(ifp, dst->sa_family, LLADDR(dl),
+		    linkhdr, &linkhdrsize, &lladdr_off) != 0)
+			return (EINVAL);
+		lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
+		    lladdr_off);
+		if ((rtm->rtm_flags & RTF_ANNOUNCE))
+			lle->la_flags |= LLE_PUB;
+		lle->la_expire = rtm->rtm_rmx.rmx_expire;
+
+		laflags = lle->la_flags;
+
+		/* Try to link new entry */
+		lle_tmp = NULL;
+		IF_AFDATA_WLOCK(ifp);
+		LLE_WLOCK(lle);
+		lle_tmp = lla_lookup(llt, LLE_EXCLUSIVE, dst);
+		if (lle_tmp != NULL) {
+			/* Check if we are trying to replace immutable entry */
+			if ((lle_tmp->la_flags & LLE_IFADDR) != 0) {
+				IF_AFDATA_WUNLOCK(ifp);
+				LLE_WUNLOCK(lle_tmp);
+				lltable_free_entry(llt, lle);
+				return (EPERM);
 			}
-#endif
-		} else {
-			if (flags & LLE_EXCLUSIVE)
-				LLE_WUNLOCK(lle);
-			else
-				LLE_RUNLOCK(lle);
+			/* Unlink existing entry from table */
+			lltable_unlink_entry(llt, lle_tmp);
 		}
-	} else if ((lle == NULL) && (flags & LLE_DELETE))
-		error = EINVAL;
+		lltable_link_entry(llt, lle);
+		IF_AFDATA_WUNLOCK(ifp);
 
+		if (lle_tmp != NULL) {
+			EVENTHANDLER_INVOKE(lle_event, lle_tmp,LLENTRY_EXPIRED);
+			lltable_free_entry(llt, lle_tmp);
+		}
 
-	return (error);
-}
+		/*
+		 * By invoking LLE handler here we might get
+		 * two events on static LLE entry insertion
+		 * in routing socket. However, since we might have
+		 * other subscribers we need to generate this event.
+		 */
+		EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_RESOLVED);
+		LLE_WUNLOCK(lle);
+#ifdef INET
+		/* gratuitous ARP */
+		if ((laflags & LLE_PUB) && dst->sa_family == AF_INET)
+			arprequest(ifp,
+			    &((struct sockaddr_in *)dst)->sin_addr,
+			    &((struct sockaddr_in *)dst)->sin_addr,
+			    (u_char *)LLADDR(dl));
+#endif
 
-static void
-vnet_lltable_init()
-{
+		break;
 
-	SLIST_INIT(&V_lltables);
+	case RTM_DELETE:
+		return (lltable_delete_addr(llt, 0, dst));
+
+	default:
+		error = EINVAL;
+	}
+
+	return (error);
 }
-VNET_SYSINIT(vnet_lltable_init, SI_SUB_PSEUDO, SI_ORDER_FIRST,
-    vnet_lltable_init, NULL);
 
 #ifdef DDB
 struct llentry_sa {
@@ -429,15 +851,14 @@ llatbl_lle_show(struct llentry_sa *la)
 	db_printf(" la_flags=0x%04x\n", lle->la_flags);
 	db_printf(" la_asked=%u\n", lle->la_asked);
 	db_printf(" la_preempt=%u\n", lle->la_preempt);
-	db_printf(" ln_byhint=%u\n", lle->ln_byhint);
 	db_printf(" ln_state=%d\n", lle->ln_state);
 	db_printf(" ln_router=%u\n", lle->ln_router);
 	db_printf(" ln_ntick=%ju\n", (uintmax_t)lle->ln_ntick);
 	db_printf(" lle_refcnt=%d\n", lle->lle_refcnt);
-	bcopy(&lle->ll_addr.mac16, octet, sizeof(octet));
+	bcopy(lle->ll_addr, octet, sizeof(octet));
 	db_printf(" ll_addr=%02x:%02x:%02x:%02x:%02x:%02x\n",
 	    octet[0], octet[1], octet[2], octet[3], octet[4], octet[5]);
-	db_printf(" la_timer=%p\n", &lle->la_timer);
+	db_printf(" lle_timer=%p\n", &lle->lle_timer);
 
 	switch (la->l3_addr.sa_family) {
 #ifdef INET
@@ -490,7 +911,7 @@ llatbl_llt_show(struct lltable *llt)
 	db_printf("llt=%p llt_af=%d llt_ifp=%p\n",
 	    llt, llt->llt_af, llt->llt_ifp);
 
-	for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
+	for (i = 0; i < llt->llt_hsize; i++) {
 		LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
 
 			llatbl_lle_show((struct llentry_sa *)lle);
diff --git a/freebsd/sys/net/if_llatbl.h b/freebsd/sys/net/if_llatbl.h
index 8ac72c4f..51de726a 100644
--- a/freebsd/sys/net/if_llatbl.h
+++ b/freebsd/sys/net/if_llatbl.h
@@ -30,8 +30,6 @@ __FBSDID("$FreeBSD$");
 #ifndef	_NET_IF_LLATBL_H_
 #define	_NET_IF_LLATBL_H_
 
-#include <rtems/bsd/local/opt_ofed.h>
-
 #include <sys/_rwlock.h>
 #include <netinet/in.h>
 
@@ -50,42 +48,44 @@ extern struct rwlock lltable_rwlock;
 #define	LLTABLE_WUNLOCK()	rw_wunlock(&lltable_rwlock)
 #define	LLTABLE_LOCK_ASSERT()	rw_assert(&lltable_rwlock, RA_LOCKED)
 
+#define	LLE_MAX_LINKHDR		24	/* Full IB header */
 /*
  * Code referencing llentry must at least hold
  * a shared lock
  */
 struct llentry {
 	LIST_ENTRY(llentry)	 lle_next;
-	struct rwlock		 lle_lock;
+	union {
+		struct in_addr	addr4;
+		struct in6_addr	addr6;
+	} r_l3addr;
+	char			r_linkdata[LLE_MAX_LINKHDR]; /* L2 data */
+	uint8_t			r_hdrlen;	/* length for LL header */
+	uint8_t			spare0[3];
+	uint16_t		r_flags;	/* LLE runtime flags */
+	uint16_t		r_skip_req;	/* feedback from fast path */
+
 	struct lltable		 *lle_tbl;
 	struct llentries	 *lle_head;
-	void			(*lle_free)(struct lltable *, struct llentry *);
+	void			(*lle_free)(struct llentry *);
 	struct mbuf		 *la_hold;
 	int			 la_numheld;  /* # of packets currently held */
 	time_t			 la_expire;
 	uint16_t		 la_flags;
 	uint16_t		 la_asked;
 	uint16_t		 la_preempt;
-	uint16_t		 ln_byhint;
 	int16_t			 ln_state;	/* IPv6 has ND6_LLINFO_NOSTATE == -2 */
 	uint16_t		 ln_router;
 	time_t			 ln_ntick;
+	time_t			lle_remtime;	/* Real time remaining */
+	time_t			lle_hittime;	/* Time when r_skip_req was unset */
 	int			 lle_refcnt;
+	char			*ll_addr;	/* link-layer address */
 
-	union {
-		uint64_t	mac_aligned;
-		uint16_t	mac16[3];
-#ifdef OFED
-		uint8_t		mac8[20];	/* IB needs 20 bytes. */
-#endif
-	} ll_addr;
-
-	/* XXX af-private? */
-	union {
-		struct callout	ln_timer_ch;
-		struct callout  la_timer;
-	} lle_timer;
-	/* NB: struct sockaddr must immediately follow */
+	LIST_ENTRY(llentry)	lle_chain;	/* chain of deleted items */
+	struct callout		lle_timer;
+	struct rwlock		 lle_lock;
+	struct mtx		req_mtx;
 };
 
 #define	LLE_WLOCK(lle)		rw_wlock(&(lle)->lle_lock)
@@ -98,6 +98,12 @@ struct llentry {
 #define	LLE_LOCK_DESTROY(lle)	rw_destroy(&(lle)->lle_lock)
 #define	LLE_WLOCK_ASSERT(lle)	rw_assert(&(lle)->lle_lock, RA_WLOCKED)
 
+#define	LLE_REQ_INIT(lle)	mtx_init(&(lle)->req_mtx, "lle req", \
+	NULL, MTX_DEF)
+#define	LLE_REQ_DESTROY(lle)	mtx_destroy(&(lle)->req_mtx)
+#define	LLE_REQ_LOCK(lle)	mtx_lock(&(lle)->req_mtx)
+#define	LLE_REQ_UNLOCK(lle)	mtx_unlock(&(lle)->req_mtx)
+
 #define LLE_IS_VALID(lle)	(((lle) != NULL) && ((lle) != (void *)-1))
 
 #define	LLE_ADDREF(lle) do {					\
@@ -118,7 +124,7 @@ struct llentry {
 
 #define	LLE_FREE_LOCKED(lle) do {				\
 	if ((lle)->lle_refcnt == 1)				\
-		(lle)->lle_free((lle)->lle_tbl, (lle));		\
+		(lle)->lle_free(lle);				\
 	else {							\
 		LLE_REMREF(lle);				\
 		LLE_WUNLOCK(lle);				\
@@ -132,58 +138,77 @@ struct llentry {
 	LLE_FREE_LOCKED(lle);					\
 } while (0)
 
+typedef	struct llentry *(llt_lookup_t)(struct lltable *, u_int flags,
+    const struct sockaddr *l3addr);
+typedef	struct llentry *(llt_alloc_t)(struct lltable *, u_int flags,
+    const struct sockaddr *l3addr);
+typedef	void (llt_delete_t)(struct lltable *, struct llentry *);
+typedef void (llt_prefix_free_t)(struct lltable *,
+    const struct sockaddr *addr, const struct sockaddr *mask, u_int flags);
+typedef int (llt_dump_entry_t)(struct lltable *, struct llentry *,
+    struct sysctl_req *);
+typedef uint32_t (llt_hash_t)(const struct llentry *, uint32_t);
+typedef int (llt_match_prefix_t)(const struct sockaddr *,
+    const struct sockaddr *, u_int, struct llentry *);
+typedef void (llt_free_entry_t)(struct lltable *, struct llentry *);
+typedef void (llt_fill_sa_entry_t)(const struct llentry *, struct sockaddr *);
+typedef void (llt_free_tbl_t)(struct lltable *);
+typedef void (llt_link_entry_t)(struct lltable *, struct llentry *);
+typedef void (llt_unlink_entry_t)(struct llentry *);
 
-#define	ln_timer_ch	lle_timer.ln_timer_ch
-#define	la_timer	lle_timer.la_timer
-
-/* XXX bad name */
-#define	L3_ADDR(lle)	((struct sockaddr *)(&lle[1]))
-#define	L3_ADDR_LEN(lle)	(((struct sockaddr *)(&lle[1]))->sa_len)
-
-#ifndef LLTBL_HASHTBL_SIZE
-#define	LLTBL_HASHTBL_SIZE	32	/* default 32 ? */
-#endif
-
-#ifndef LLTBL_HASHMASK
-#define	LLTBL_HASHMASK	(LLTBL_HASHTBL_SIZE - 1)
-#endif
+typedef int (llt_foreach_cb_t)(struct lltable *, struct llentry *, void *);
+typedef int (llt_foreach_entry_t)(struct lltable *, llt_foreach_cb_t *, void *);
 
 struct lltable {
 	SLIST_ENTRY(lltable)	llt_link;
-	struct llentries	lle_head[LLTBL_HASHTBL_SIZE];
 	int			llt_af;
+	int			llt_hsize;
+	struct llentries	*lle_head;
 	struct ifnet		*llt_ifp;
 
-	void			(*llt_prefix_free)(struct lltable *,
-				    const struct sockaddr *prefix,
-				    const struct sockaddr *mask,
-				    u_int flags);
-	struct llentry *	(*llt_lookup)(struct lltable *, u_int flags,
-				    const struct sockaddr *l3addr);
-	int			(*llt_dump)(struct lltable *,
-				    struct sysctl_req *);
+	llt_lookup_t		*llt_lookup;
+	llt_alloc_t		*llt_alloc_entry;
+	llt_delete_t		*llt_delete_entry;
+	llt_prefix_free_t	*llt_prefix_free;
+	llt_dump_entry_t	*llt_dump_entry;
+	llt_hash_t		*llt_hash;
+	llt_match_prefix_t	*llt_match_prefix;
+	llt_free_entry_t	*llt_free_entry;
+	llt_foreach_entry_t	*llt_foreach_entry;
+	llt_link_entry_t	*llt_link_entry;
+	llt_unlink_entry_t	*llt_unlink_entry;
+	llt_fill_sa_entry_t	*llt_fill_sa_entry;
+	llt_free_tbl_t		*llt_free_tbl;
 };
+
 MALLOC_DECLARE(M_LLTABLE);
 
 /*
- * flags to be passed to arplookup.
+ * LLentry flags
  */
 #define	LLE_DELETED	0x0001	/* entry must be deleted */
 #define	LLE_STATIC	0x0002	/* entry is static */
 #define	LLE_IFADDR	0x0004	/* entry is interface addr */
 #define	LLE_VALID	0x0008	/* ll_addr is valid */
-#define	LLE_PROXY	0x0010	/* proxy entry ??? */
+#define	LLE_REDIRECT	0x0010	/* installed by redirect; has host rtentry */
 #define	LLE_PUB		0x0020	/* publish entry ??? */
 #define	LLE_LINKED	0x0040	/* linked to lookup structure */
+/* LLE request flags */
 #define	LLE_EXCLUSIVE	0x2000	/* return lle xlocked  */
-#define	LLE_DELETE	0x4000	/* delete on a lookup - match LLE_IFADDR */
-#define	LLE_CREATE	0x8000	/* create on a lookup miss */
+#define	LLE_UNLOCKED	0x4000	/* return lle unlocked */
+#define	LLE_ADDRONLY	0x4000	/* return lladdr instead of full header */
+#define	LLE_CREATE	0x8000	/* hint to avoid lle lookup */
+
+/* LLE flags used by fastpath code */
+#define	RLLE_VALID	0x0001		/* entry is valid */
+#define	RLLE_IFADDR	LLE_IFADDR	/* entry is ifaddr */
 
 #define LLATBL_HASH(key, mask) \
 	(((((((key >> 8) ^ key) >> 8) ^ key) >> 8) ^ key) & mask)
 
-struct lltable *lltable_init(struct ifnet *, int);
+struct lltable *lltable_allocate_htbl(uint32_t hsize);
 void		lltable_free(struct lltable *);
+void		lltable_link(struct lltable *llt);
 void		lltable_prefix_free(int, struct sockaddr *,
 		    struct sockaddr *, u_int);
 #if 0
@@ -195,13 +220,37 @@ size_t		llentry_free(struct llentry *);
 struct llentry  *llentry_alloc(struct ifnet *, struct lltable *,
 		    struct sockaddr_storage *);
 
+/* helper functions */
+size_t lltable_drop_entry_queue(struct llentry *);
+void lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
+    const char *linkhdr, size_t linkhdrsize, int lladdr_off);
+int lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
+    const char *linkhdr, size_t linkhdrsize, int lladdr_off);
+
+int lltable_calc_llheader(struct ifnet *ifp, int family, char *lladdr,
+    char *buf, size_t *bufsize, int *lladdr_off);
+void lltable_update_ifaddr(struct lltable *llt);
+struct llentry *lltable_alloc_entry(struct lltable *llt, u_int flags,
+    const struct sockaddr *l4addr);
+void lltable_free_entry(struct lltable *llt, struct llentry *lle);
+int lltable_delete_addr(struct lltable *llt, u_int flags,
+    const struct sockaddr *l3addr);
+void lltable_link_entry(struct lltable *llt, struct llentry *lle);
+void lltable_unlink_entry(struct lltable *llt, struct llentry *lle);
+void lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa);
+struct ifnet *lltable_get_ifp(const struct lltable *llt);
+int lltable_get_af(const struct lltable *llt);
+
+int lltable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f,
+    void *farg);
 /*
  * Generic link layer address lookup function.
  */
 static __inline struct llentry *
 lla_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
 {
-	return llt->llt_lookup(llt, flags, l3addr);
+
+	return (llt->llt_lookup(llt, flags, l3addr));
 }
 
 int		lla_rt_output(struct rt_msghdr *, struct rt_addrinfo *);
diff --git a/freebsd/sys/net/if_loop.c b/freebsd/sys/net/if_loop.c
index b40dec8e..aa5109eb 100644
--- a/freebsd/sys/net/if_loop.c
+++ b/freebsd/sys/net/if_loop.c
@@ -36,10 +36,8 @@
  * Loopback interface driver for protocol testing and timing.
  */
 
-#include <rtems/bsd/local/opt_atalk.h>
 #include <rtems/bsd/local/opt_inet.h>
 #include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
 
 #include <rtems/bsd/sys/param.h>
 #include <sys/systm.h>
@@ -53,6 +51,7 @@
 #include <sys/sysctl.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
@@ -65,11 +64,6 @@
 #include <netinet/in_var.h>
 #endif
 
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
 #ifdef INET6
 #ifndef INET
 #include <netinet/in.h>
@@ -78,11 +72,6 @@
 #include <netinet/ip6.h>
 #endif
 
-#ifdef NETATALK
-#include <netatalk/at.h>
-#include <netatalk/at_var.h>
-#endif
-
 #include <security/mac/mac_framework.h>
 
 #ifdef TINY_LOMTU
@@ -101,22 +90,20 @@
 				    CSUM_SCTP_VALID)
 
 int		loioctl(struct ifnet *, u_long, caddr_t);
-static void	lortrequest(int, struct rtentry *, struct rt_addrinfo *);
 int		looutput(struct ifnet *ifp, struct mbuf *m,
-		    struct sockaddr *dst, struct route *ro);
+		    const struct sockaddr *dst, struct route *ro);
 static int	lo_clone_create(struct if_clone *, int, caddr_t);
 static void	lo_clone_destroy(struct ifnet *);
 
 VNET_DEFINE(struct ifnet *, loif);	/* Used externally */
 
 #ifdef VIMAGE
-static VNET_DEFINE(struct ifc_simple_data, lo_cloner_data);
-static VNET_DEFINE(struct if_clone, lo_cloner);
-#define	V_lo_cloner_data	VNET(lo_cloner_data)
+static VNET_DEFINE(struct if_clone *, lo_cloner);
 #define	V_lo_cloner		VNET(lo_cloner)
 #endif
 
-IFC_SIMPLE_DECLARE(lo, 1);
+static struct if_clone *lo_cloner;
+static const char loname[] = "lo";
 
 static void
 lo_clone_destroy(struct ifnet *ifp)
@@ -141,7 +128,7 @@ lo_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 	if (ifp == NULL)
 		return (ENOSPC);
 
-	if_initname(ifp, ifc->ifc_name, unit);
+	if_initname(ifp, loname, unit);
 	ifp->if_mtu = LOMTU;
 	ifp->if_flags = IFF_LOOPBACK | IFF_MULTICAST;
 	ifp->if_ioctl = loioctl;
@@ -163,15 +150,15 @@ vnet_loif_init(const void *unused __unused)
 {
 
 #ifdef VIMAGE
+	lo_cloner = if_clone_simple(loname, lo_clone_create, lo_clone_destroy,
+	    1);
 	V_lo_cloner = lo_cloner;
-	V_lo_cloner_data = lo_cloner_data;
-	V_lo_cloner.ifc_data = &V_lo_cloner_data;
-	if_clone_attach(&V_lo_cloner);
 #else
-	if_clone_attach(&lo_cloner);
+	lo_cloner = if_clone_simple(loname, lo_clone_create, lo_clone_destroy,
+	    1);
 #endif
 }
-VNET_SYSINIT(vnet_loif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+VNET_SYSINIT(vnet_loif_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
     vnet_loif_init, NULL);
 
 #ifdef VIMAGE
@@ -179,10 +166,10 @@ static void
 vnet_loif_uninit(const void *unused __unused)
 {
 
-	if_clone_detach(&V_lo_cloner);
+	if_clone_detach(V_lo_cloner);
 	V_loif = NULL;
 }
-VNET_SYSUNINIT(vnet_loif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+VNET_SYSUNINIT(vnet_loif_uninit, SI_SUB_INIT_IF, SI_ORDER_SECOND,
     vnet_loif_uninit, NULL);
 #endif
 
@@ -213,19 +200,16 @@ static moduledata_t loop_mod = {
 DECLARE_MODULE(if_lo, loop_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
 
 int
-looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+looutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	u_int32_t af;
-	struct rtentry *rt = NULL;
 #ifdef MAC
 	int error;
 #endif
 
 	M_ASSERTPKTHDR(m); /* check if we have the packet header */
 
-	if (ro != NULL)
-		rt = ro->ro_rt;
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error) {
@@ -234,23 +218,22 @@ looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
 	}
 #endif
 
-	if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
+	if (ro != NULL && ro->ro_flags & (RT_REJECT|RT_BLACKHOLE)) {
 		m_freem(m);
-		return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
-		        rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
+		return (ro->ro_flags & RT_BLACKHOLE ? 0 : EHOSTUNREACH);
 	}
 
-	ifp->if_opackets++;
-	ifp->if_obytes += m->m_pkthdr.len;
+	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+	if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
 
 	/* BPF writes need to be handled specially. */
-	if (dst->sa_family == AF_UNSPEC) {
+	if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
 		bcopy(dst->sa_data, &af, sizeof(af));
-		dst->sa_family = af;
-	}
+	else
+		af = dst->sa_family;
 
 #if 1	/* XXX */
-	switch (dst->sa_family) {
+	switch (af) {
 	case AF_INET:
 		if (ifp->if_capenable & IFCAP_RXCSUM) {
 			m->m_pkthdr.csum_data = 0xffff;
@@ -275,16 +258,13 @@ looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
 #endif
 		m->m_pkthdr.csum_flags &= ~LO_CSUM_FEATURES6;
 		break;
-	case AF_IPX:
-	case AF_APPLETALK:
-		break;
 	default:
-		printf("looutput: af=%d unexpected\n", dst->sa_family);
+		printf("looutput: af=%d unexpected\n", af);
 		m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 #endif
-	return (if_simloop(ifp, m, dst->sa_family, 0));
+	return (if_simloop(ifp, m, af, 0));
 }
 
 /*
@@ -370,36 +350,17 @@ if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen)
 		isr = NETISR_IPV6;
 		break;
 #endif
-#ifdef IPX
-	case AF_IPX:
-		isr = NETISR_IPX;
-		break;
-#endif
-#ifdef NETATALK
-	case AF_APPLETALK:
-		isr = NETISR_ATALK2;
-		break;
-#endif
 	default:
 		printf("if_simloop: can't handle af=%d\n", af);
 		m_freem(m);
 		return (EAFNOSUPPORT);
 	}
-	ifp->if_ipackets++;
-	ifp->if_ibytes += m->m_pkthdr.len;
+	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	netisr_queue(isr, m);	/* mbuf is free'd on failure. */
 	return (0);
 }
 
-/* ARGSUSED */
-static void
-lortrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
-{
-
-	RT_LOCK_ASSERT(rt);
-	rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
-}
-
 /*
  * Process an ioctl request.
  */
@@ -407,7 +368,6 @@ lortrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
 int
 loioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
-	struct ifaddr *ifa;
 	struct ifreq *ifr = (struct ifreq *)data;
 	int error = 0, mask;
 
@@ -415,8 +375,6 @@ loioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
-		ifa = (struct ifaddr *)data;
-		ifa->ifa_rtrequest = lortrequest;
 		/*
 		 * Everything else is done at a higher level.
 		 */
@@ -424,7 +382,7 @@ loioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
-		if (ifr == 0) {
+		if (ifr == NULL) {
 			error = EAFNOSUPPORT;		/* XXX */
 			break;
 		}
diff --git a/freebsd/sys/net/if_media.c b/freebsd/sys/net/if_media.c
index 264d3535..66b13568 100644
--- a/freebsd/sys/net/if_media.c
+++ b/freebsd/sys/net/if_media.c
@@ -48,6 +48,8 @@
  * to implement this interface.
  */
 
+#include <rtems/bsd/local/opt_ifmedia.h>
+
 #include <rtems/bsd/sys/param.h>
 #include <sys/systm.h>
 #include <sys/socket.h>
@@ -70,6 +72,7 @@ static struct ifmedia_entry *ifmedia_match(struct ifmedia *ifm,
     int flags, int mask);
 
 #ifdef IFMEDIA_DEBUG
+#include <net/if_var.h>
 int	ifmedia_debug = 0;
 SYSCTL_INT(_debug, OID_AUTO, ifmedia, CTLFLAG_RW, &ifmedia_debug,
 	    0, "if_media debugging msgs");
@@ -195,6 +198,21 @@ ifmedia_set(ifm, target)
 }
 
 /*
+ * Given a media word, return one suitable for an application
+ * using the original encoding.
+ */
+static int
+compat_media(int media)
+{
+
+	if (IFM_TYPE(media) == IFM_ETHER && IFM_SUBTYPE(media) > IFM_OTHER) {
+		media &= ~(IFM_ETH_XTYPE|IFM_TMASK);
+		media |= IFM_OTHER;
+	}
+	return (media);
+}
+
+/*
  * Device-independent media ioctl support function.
  */
 int
@@ -206,7 +224,7 @@ ifmedia_ioctl(ifp, ifr, ifm, cmd)
 {
 	struct ifmedia_entry *match;
 	struct ifmediareq *ifmr = (struct ifmediareq *) ifr;
-	int error = 0, sticky;
+	int error = 0;
 
 	if (ifp == NULL || ifr == NULL || ifm == NULL)
 		return(EINVAL);
@@ -273,80 +291,42 @@ ifmedia_ioctl(ifp, ifr, ifm, cmd)
 	 * Get list of available media and current media on interface.
 	 */
 	case  SIOCGIFMEDIA: 
+	case  SIOCGIFXMEDIA: 
 	{
 		struct ifmedia_entry *ep;
-		int *kptr, count;
-		int usermax;	/* user requested max */
+		int i;
 
-		kptr = NULL;		/* XXX gcc */
+		if (ifmr->ifm_count < 0)
+			return (EINVAL);
 
-		ifmr->ifm_active = ifmr->ifm_current = ifm->ifm_cur ?
-		    ifm->ifm_cur->ifm_media : IFM_NONE;
+		if (cmd == SIOCGIFMEDIA) {
+			ifmr->ifm_active = ifmr->ifm_current = ifm->ifm_cur ?
+			    compat_media(ifm->ifm_cur->ifm_media) : IFM_NONE;
+		} else {
+			ifmr->ifm_active = ifmr->ifm_current = ifm->ifm_cur ?
+			    ifm->ifm_cur->ifm_media : IFM_NONE;
+		}
 		ifmr->ifm_mask = ifm->ifm_mask;
 		ifmr->ifm_status = 0;
 		(*ifm->ifm_status)(ifp, ifmr);
 
-		count = 0;
-		usermax = 0;
-
 		/*
 		 * If there are more interfaces on the list, count
 		 * them.  This allows the caller to set ifmr->ifm_count
 		 * to 0 on the first call to know how much space to
 		 * allocate.
 		 */
+		i = 0;
 		LIST_FOREACH(ep, &ifm->ifm_list, ifm_list)
-			usermax++;
-
-		/*
-		 * Don't allow the user to ask for too many
-		 * or a negative number.
-		 */
-		if (ifmr->ifm_count > usermax)
-			ifmr->ifm_count = usermax;
-		else if (ifmr->ifm_count < 0)
-			return (EINVAL);
-
-		if (ifmr->ifm_count != 0) {
-			kptr = (int *)malloc(ifmr->ifm_count * sizeof(int),
-			    M_TEMP, M_NOWAIT);
-
-			if (kptr == NULL)
-				return (ENOMEM);
-			/*
-			 * Get the media words from the interface's list.
-			 */
-			ep = LIST_FIRST(&ifm->ifm_list);
-			for (; ep != NULL && count < ifmr->ifm_count;
-			    ep = LIST_NEXT(ep, ifm_list), count++)
-				kptr[count] = ep->ifm_media;
-
-			if (ep != NULL)
-				error = E2BIG;	/* oops! */
-		} else {
-			count = usermax;
-		}
-
-		/*
-		 * We do the copyout on E2BIG, because that's
-		 * just our way of telling userland that there
-		 * are more.  This is the behavior I've observed
-		 * under BSD/OS 3.0
-		 */
-		sticky = error;
-		if ((error == 0 || error == E2BIG) && ifmr->ifm_count != 0) {
-			error = copyout((caddr_t)kptr,
-			    (caddr_t)ifmr->ifm_ulist,
-			    ifmr->ifm_count * sizeof(int));
-		}
-
-		if (error == 0)
-			error = sticky;
-
-		if (ifmr->ifm_count != 0)
-			free(kptr, M_TEMP);
-
-		ifmr->ifm_count = count;
+			if (i++ < ifmr->ifm_count) {
+				error = copyout(&ep->ifm_media,
+				    ifmr->ifm_ulist + i - 1, sizeof(int));
+				if (error)
+					break;
+			}
+		if (error == 0 && i > ifmr->ifm_count)
+			error = ifmr->ifm_count ? E2BIG : 0;
+		ifmr->ifm_count = i;
 		break;
 	}
 
@@ -400,8 +380,7 @@ ifmedia_baudrate(int mword)
 	int i;
 
 	for (i = 0; ifmedia_baudrate_descriptions[i].ifmb_word != 0; i++) {
-		if ((mword & (IFM_NMASK|IFM_TMASK)) ==
-		    ifmedia_baudrate_descriptions[i].ifmb_word)
+		if (IFM_TYPE_MATCH(mword, ifmedia_baudrate_descriptions[i].ifmb_word))
 			return (ifmedia_baudrate_descriptions[i].ifmb_baudrate);
 	}
 
@@ -507,7 +486,7 @@ ifmedia_printword(ifmw)
 		printf("<unknown type>\n");
 		return;
 	}
-	printf(desc->ifmt_string);
+	printf("%s", desc->ifmt_string);
 
 	/* Any mode. */
 	for (desc = ttos->modes; desc && desc->ifmt_string != NULL; desc++)
diff --git a/freebsd/sys/net/if_media.h b/freebsd/sys/net/if_media.h
index 12585095..86439950 100644
--- a/freebsd/sys/net/if_media.h
+++ b/freebsd/sys/net/if_media.h
@@ -54,11 +54,13 @@
 
 #include <sys/queue.h>
 
+struct ifnet;
+
 /*
  * Driver callbacks for media status and change requests.
  */
-typedef	int (*ifm_change_cb_t)(struct ifnet *ifp);
-typedef	void (*ifm_stat_cb_t)(struct ifnet *ifp, struct ifmediareq *req);
+typedef	int (*ifm_change_cb_t)(struct ifnet *);
+typedef	void (*ifm_stat_cb_t)(struct ifnet *, struct ifmediareq *req);
 
 /*
  * In-kernel representation of a single supported media type.
@@ -104,6 +106,7 @@ void	ifmedia_set(struct ifmedia *ifm, int mword);
 int	ifmedia_ioctl(struct ifnet *ifp, struct ifreq *ifr,
 	    struct ifmedia *ifm, u_long cmd);
 
+
 /* Compute baudrate for a given media. */
 uint64_t	ifmedia_baudrate(int);
 
@@ -115,7 +118,7 @@ uint64_t	ifmedia_baudrate(int);
  *	----	-------
  *	0-4	Media variant
  *	5-7	Media type
- *	8-15	Type specific options
+ *	8-15	Type specific options (includes added variant bits on Ethernet)
  *	16-18	Mode (for multi-mode devices)
  *	19	RFU
  *	20-27	Shared (global) options
@@ -124,8 +127,18 @@ uint64_t	ifmedia_baudrate(int);
 
 /*
  * Ethernet
+ * In order to use more than 31 subtypes, Ethernet uses some of the option
+ * bits as part of the subtype field.  See the options section below for
+ * relevant definitions
  */
 #define	IFM_ETHER	0x00000020
+#define	IFM_ETHER_SUBTYPE(x) (((x) & IFM_TMASK) | \
+	(((x) & (IFM_ETH_XTYPE >> IFM_ETH_XSHIFT)) << IFM_ETH_XSHIFT))
+#define	IFM_X(x) IFM_ETHER_SUBTYPE(x)	/* internal shorthand */
+#define	IFM_ETHER_SUBTYPE_SET(x) (IFM_ETHER_SUBTYPE(x) | IFM_ETHER)
+#define	IFM_ETHER_SUBTYPE_GET(x) ((x) & (IFM_TMASK|IFM_ETH_XTYPE))
+#define	IFM_ETHER_IS_EXTENDED(x)	((x) & IFM_ETH_XTYPE)
+
 #define	IFM_10_T	3		/* 10BaseT - RJ45 */
 #define	IFM_10_2	4		/* 10Base2 - Thinnet */
 #define	IFM_10_5	5		/* 10Base5 - AUI */
@@ -153,15 +166,49 @@ uint64_t	ifmedia_baudrate(int);
 #define	IFM_40G_CR4	27		/* 40GBase-CR4 */
 #define	IFM_40G_SR4	28		/* 40GBase-SR4 */
 #define	IFM_40G_LR4	29		/* 40GBase-LR4 */
+#define	IFM_1000_KX	30		/* 1000Base-KX backplane */
+#define	IFM_OTHER	31		/* Other: one of the following */
+
+/* following types are not visible to old binaries using only IFM_TMASK */
+#define	IFM_10G_KX4	IFM_X(32)	/* 10GBase-KX4 backplane */
+#define	IFM_10G_KR	IFM_X(33)	/* 10GBase-KR backplane */
+#define	IFM_10G_CR1	IFM_X(34)	/* 10GBase-CR1 Twinax splitter */
+#define	IFM_20G_KR2	IFM_X(35)	/* 20GBase-KR2 backplane */
+#define	IFM_2500_KX	IFM_X(36)	/* 2500Base-KX backplane */
+#define	IFM_2500_T	IFM_X(37)	/* 2500Base-T - RJ45 (NBaseT) */
+#define	IFM_5000_T	IFM_X(38)	/* 5000Base-T - RJ45 (NBaseT) */
+#define	IFM_50G_PCIE	IFM_X(39)	/* 50G Ethernet over PCIE */
+#define	IFM_25G_PCIE	IFM_X(40)	/* 25G Ethernet over PCIE */
+#define	IFM_1000_SGMII	IFM_X(41)	/* 1G media interface */
+#define	IFM_10G_SFI	IFM_X(42)	/* 10G media interface */
+#define	IFM_40G_XLPPI	IFM_X(43)	/* 40G media interface */
+#define	IFM_1000_CX_SGMII IFM_X(44)	/* 1000Base-CX-SGMII */
+#define	IFM_40G_KR4	IFM_X(45)	/* 40GBase-KR4 */
+#define	IFM_10G_ER	IFM_X(46)	/* 10GBase-ER */
+#define	IFM_100G_CR4	IFM_X(47)	/* 100GBase-CR4 */
+#define	IFM_100G_SR4	IFM_X(48)	/* 100GBase-SR4 */
+#define	IFM_100G_KR4	IFM_X(49)	/* 100GBase-KR4 */
+#define	IFM_100G_LR4	IFM_X(50)	/* 100GBase-LR4 */
+#define	IFM_56G_R4	IFM_X(51)	/* 56GBase-R4 */
+#define	IFM_100_T	IFM_X(52)	/* 100BaseT - RJ45 */
+#define	IFM_25G_CR	IFM_X(53)	/* 25GBase-CR */
+#define	IFM_25G_KR	IFM_X(54)	/* 25GBase-KR */
+#define	IFM_25G_SR	IFM_X(55)	/* 25GBase-SR */
+#define	IFM_50G_CR2	IFM_X(56)	/* 50GBase-CR2 */
+#define	IFM_50G_KR2	IFM_X(57)	/* 50GBase-KR2 */
+
 /*
  * Please update ieee8023ad_lacp.c:lacp_compose_key()
  * after adding new Ethernet media types.
  */
-/* note 31 is the max! */
+/* Note IFM_X(511) is the max! */
 
+/* Ethernet option values; includes bits used for extended variant field */
 #define	IFM_ETH_MASTER	0x00000100	/* master mode (1000baseT) */
 #define	IFM_ETH_RXPAUSE	0x00000200	/* receive PAUSE frames */
 #define	IFM_ETH_TXPAUSE	0x00000400	/* transmit PAUSE frames */
+#define	IFM_ETH_XTYPE	0x00007800	/* extended media variants */
+#define	IFM_ETH_XSHIFT	6		/* shift XTYPE next to TMASK */
 
 /*
  * Token ring
@@ -253,11 +300,6 @@ uint64_t	ifmedia_baudrate(int);
 #define	IFM_ATM_UNASSIGNED	0x00000400	/* unassigned cells */
 
 /*
- * CARP Common Address Redundancy Protocol
- */
-#define	IFM_CARP	0x000000c0
-
-/*
  * Shared media sub-types
  */
 #define	IFM_AUTO	0		/* Autoselect best media */
@@ -309,7 +351,10 @@ uint64_t	ifmedia_baudrate(int);
  * Macros to extract various bits of information from the media word.
  */
 #define	IFM_TYPE(x)		((x) & IFM_NMASK)
-#define	IFM_SUBTYPE(x)		((x) & IFM_TMASK)
+#define	IFM_SUBTYPE(x)	\
+  (IFM_TYPE(x) == IFM_ETHER ? IFM_ETHER_SUBTYPE_GET(x) : ((x) & IFM_TMASK))
+#define	IFM_TYPE_MATCH(x,y) \
+  (IFM_TYPE(x) == IFM_TYPE(y) && IFM_SUBTYPE(x) == IFM_SUBTYPE(y))
 #define	IFM_TYPE_OPTIONS(x)	((x) & IFM_OMASK)
 #define	IFM_INST(x)		(((x) & IFM_IMASK) >> IFM_ISHIFT)
 #define	IFM_OPTIONS(x)		((x) & (IFM_OMASK | IFM_GMASK))
@@ -343,7 +388,6 @@ struct ifmedia_description {
 	{ IFM_FDDI,		"FDDI" },				\
 	{ IFM_IEEE80211,	"IEEE 802.11 Wireless Ethernet" },	\
 	{ IFM_ATM,		"ATM" },				\
-	{ IFM_CARP,		"Common Address Redundancy Protocol" }, \
 	{ 0, NULL },							\
 }
 
@@ -375,6 +419,34 @@ struct ifmedia_description {
 	{ IFM_40G_CR4,	"40Gbase-CR4" },				\
 	{ IFM_40G_SR4,	"40Gbase-SR4" },				\
 	{ IFM_40G_LR4,	"40Gbase-LR4" },				\
+	{ IFM_1000_KX,	"1000Base-KX" },				\
+	{ IFM_OTHER,	"Other" },					\
+	{ IFM_10G_KX4,	"10GBase-KX4" },				\
+	{ IFM_10G_KR,	"10GBase-KR" },					\
+	{ IFM_10G_CR1,	"10GBase-CR1" },				\
+	{ IFM_20G_KR2,	"20GBase-KR2" },				\
+	{ IFM_2500_KX,	"2500Base-KX" },				\
+	{ IFM_2500_T,	"2500Base-T" },					\
+	{ IFM_5000_T,	"5000Base-T" },					\
+	{ IFM_50G_PCIE,	"PCIExpress-50G" },				\
+	{ IFM_25G_PCIE,	"PCIExpress-25G" },				\
+	{ IFM_1000_SGMII,	"1000Base-SGMII" },			\
+	{ IFM_10G_SFI,	"10GBase-SFI" },				\
+	{ IFM_40G_XLPPI,	"40GBase-XLPPI" },			\
+	{ IFM_1000_CX_SGMII,	"1000Base-CX-SGMII" },			\
+	{ IFM_40G_KR4,	"40GBase-KR4" },				\
+	{ IFM_10G_ER,	"10GBase-ER" },					\
+	{ IFM_100G_CR4,	"100GBase-CR4" },				\
+	{ IFM_100G_SR4,	"100GBase-SR4" },				\
+	{ IFM_100G_KR4,	"100GBase-KR4" },				\
+	{ IFM_100G_LR4, "100GBase-LR4" },				\
+	{ IFM_56G_R4,	"56GBase-R4" },					\
+	{ IFM_100_T,	"100BaseT" },					\
+	{ IFM_25G_CR,	"25GBase-CR" },					\
+	{ IFM_25G_KR,	"25GBase-KR" },					\
+	{ IFM_25G_SR,	"25GBase-SR" },					\
+	{ IFM_50G_CR2,	"50GBase-CR2" },				\
+	{ IFM_50G_KR2,	"50GBase-KR2" },				\
 	{ 0, NULL },							\
 }
 
@@ -676,6 +748,33 @@ struct ifmedia_baudrate {
 	{ IFM_ETHER | IFM_40G_CR4,	IF_Gbps(40ULL) },		\
 	{ IFM_ETHER | IFM_40G_SR4,	IF_Gbps(40ULL) },		\
 	{ IFM_ETHER | IFM_40G_LR4,	IF_Gbps(40ULL) },		\
+	{ IFM_ETHER | IFM_1000_KX,	IF_Mbps(1000) },		\
+	{ IFM_ETHER | IFM_10G_KX4,	IF_Gbps(10ULL) },		\
+	{ IFM_ETHER | IFM_10G_KR,	IF_Gbps(10ULL) },		\
+	{ IFM_ETHER | IFM_10G_CR1,	IF_Gbps(10ULL) },		\
+	{ IFM_ETHER | IFM_20G_KR2,	IF_Gbps(20ULL) },		\
+	{ IFM_ETHER | IFM_2500_KX,	IF_Mbps(2500) },		\
+	{ IFM_ETHER | IFM_2500_T,	IF_Mbps(2500) },		\
+	{ IFM_ETHER | IFM_5000_T,	IF_Mbps(5000) },		\
+	{ IFM_ETHER | IFM_50G_PCIE,	IF_Gbps(50ULL) },		\
+	{ IFM_ETHER | IFM_25G_PCIE,	IF_Gbps(25ULL) },		\
+	{ IFM_ETHER | IFM_1000_SGMII,	IF_Mbps(1000) },		\
+	{ IFM_ETHER | IFM_10G_SFI,	IF_Gbps(10ULL) },		\
+	{ IFM_ETHER | IFM_40G_XLPPI,	IF_Gbps(40ULL) },		\
+	{ IFM_ETHER | IFM_1000_CX_SGMII, IF_Mbps(1000) },		\
+	{ IFM_ETHER | IFM_40G_KR4,	IF_Gbps(40ULL) },		\
+	{ IFM_ETHER | IFM_10G_ER,	IF_Gbps(10ULL) },		\
+	{ IFM_ETHER | IFM_100G_CR4,	IF_Gbps(100ULL) },		\
+	{ IFM_ETHER | IFM_100G_SR4,	IF_Gbps(100ULL) },		\
+	{ IFM_ETHER | IFM_100G_KR4,	IF_Gbps(100ULL) },		\
+	{ IFM_ETHER | IFM_100G_LR4,	IF_Gbps(100ULL) },		\
+	{ IFM_ETHER | IFM_56G_R4,	IF_Gbps(56ULL) },		\
+	{ IFM_ETHER | IFM_100_T,	IF_Mbps(100ULL) },		\
+	{ IFM_ETHER | IFM_25G_CR,	IF_Gbps(25ULL) },		\
+	{ IFM_ETHER | IFM_25G_KR,	IF_Gbps(25ULL) },		\
+	{ IFM_ETHER | IFM_25G_SR,	IF_Gbps(25ULL) },		\
+	{ IFM_ETHER | IFM_50G_CR2,	IF_Gbps(50ULL) },		\
+	{ IFM_ETHER | IFM_50G_KR2,	IF_Gbps(50ULL) },		\
 									\
 	{ IFM_TOKEN | IFM_TOK_STP4,	IF_Mbps(4) },			\
 	{ IFM_TOKEN | IFM_TOK_STP16,	IF_Mbps(16) },			\
@@ -730,8 +829,6 @@ struct ifmedia_status_description {
 	    { "no network", "active" } },				\
 	{ IFM_ATM,		IFM_AVALID,	IFM_ACTIVE,		\
 	    { "no network", "active" } },				\
-	{ IFM_CARP,		IFM_AVALID,	IFM_ACTIVE,		\
-	    { "backup", "master" } },					\
 	{ 0,			0,		0,			\
 	    { NULL, NULL } }						\
 }
diff --git a/freebsd/sys/net/if_mib.c b/freebsd/sys/net/if_mib.c
index ec7a6984..d91c94ab 100644
--- a/freebsd/sys/net/if_mib.c
+++ b/freebsd/sys/net/if_mib.c
@@ -34,10 +34,12 @@
 #include <rtems/bsd/sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
+#include <sys/malloc.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/if_mib.h>
 #include <net/vnet.h>
 
@@ -68,9 +70,9 @@ SYSCTL_DECL(_net_link_generic);
 static SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system, CTLFLAG_RW, 0,
 	    "Variables global to all interfaces");
 
-SYSCTL_VNET_INT(_net_link_generic_system, IFMIB_IFCOUNT, ifcount, CTLFLAG_RD,
-	    &VNET_NAME(if_index), 0,
-	     "Number of configured interfaces");
+SYSCTL_INT(_net_link_generic_system, IFMIB_IFCOUNT, ifcount,
+	CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(if_index), 0,
+	"Number of configured interfaces");
 
 static int
 sysctl_ifdata(SYSCTL_HANDLER_ARGS) /* XXX bad syntax! */
@@ -100,37 +102,18 @@ sysctl_ifdata(SYSCTL_HANDLER_ARGS) /* XXX bad syntax! */
 		bzero(&ifmd, sizeof(ifmd));
 		strlcpy(ifmd.ifmd_name, ifp->if_xname, sizeof(ifmd.ifmd_name));
 
-#define COPY(fld) ifmd.ifmd_##fld = ifp->if_##fld
-		COPY(pcount);
-		COPY(data);
-#undef COPY
+		ifmd.ifmd_pcount = ifp->if_pcount;
+		if_data_copy(ifp, &ifmd.ifmd_data);
+
 		ifmd.ifmd_flags = ifp->if_flags | ifp->if_drv_flags;
 		ifmd.ifmd_snd_len = ifp->if_snd.ifq_len;
 		ifmd.ifmd_snd_maxlen = ifp->if_snd.ifq_maxlen;
-		ifmd.ifmd_snd_drops = ifp->if_snd.ifq_drops;
+		ifmd.ifmd_snd_drops =
+		    ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
 
 		error = SYSCTL_OUT(req, &ifmd, sizeof ifmd);
-		if (error || !req->newptr)
-			goto out;
-
-		error = SYSCTL_IN(req, &ifmd, sizeof ifmd);
 		if (error)
 			goto out;
-
-#define DONTCOPY(fld) ifmd.ifmd_data.ifi_##fld = ifp->if_data.ifi_##fld
-		DONTCOPY(type);
-		DONTCOPY(physical);
-		DONTCOPY(addrlen);
-		DONTCOPY(hdrlen);
-		DONTCOPY(mtu);
-		DONTCOPY(metric);
-		DONTCOPY(baudrate);
-#undef DONTCOPY
-#define COPY(fld) ifp->if_##fld = ifmd.ifmd_##fld
-		COPY(data);
-		ifp->if_snd.ifq_maxlen = ifmd.ifmd_snd_maxlen;
-		ifp->if_snd.ifq_drops = ifmd.ifmd_snd_drops;
-#undef COPY
 		break;
 
 	case IFDATA_LINKSPECIFIC:
diff --git a/freebsd/sys/net/if_pflog.h b/freebsd/sys/net/if_pflog.h
new file mode 100644
index 00000000..0faeb7d4
--- /dev/null
+++ b/freebsd/sys/net/if_pflog.h
@@ -0,0 +1,66 @@
+/* $OpenBSD: if_pflog.h,v 1.13 2006/10/23 12:46:09 henning Exp $ */
+/*
+ * Copyright 2001 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _NET_IF_PFLOG_H_
+#define	_NET_IF_PFLOG_H_
+
+#define	PFLOGIFS_MAX	16
+
+#define	PFLOG_RULESET_NAME_SIZE	16
+
+struct pfloghdr {
+	u_int8_t	length;
+	sa_family_t	af;
+	u_int8_t	action;
+	u_int8_t	reason;
+	char		ifname[IFNAMSIZ];
+	char		ruleset[PFLOG_RULESET_NAME_SIZE];
+	u_int32_t	rulenr;
+	u_int32_t	subrulenr;
+	uid_t		uid;
+	pid_t		pid;
+	uid_t		rule_uid;
+	pid_t		rule_pid;
+	u_int8_t	dir;
+	u_int8_t	pad[3];
+};
+
+#define	PFLOG_HDRLEN		sizeof(struct pfloghdr)
+/* minus pad, also used as a signature */
+#define	PFLOG_REAL_HDRLEN	offsetof(struct pfloghdr, pad)
+
+#ifdef _KERNEL
+struct pf_rule;
+struct pf_ruleset;
+struct pfi_kif;
+struct pf_pdesc;
+
+#define	PFLOG_PACKET(i,a,b,c,d,e,f,g,h,di) do {		\
+	if (pflog_packet_ptr != NULL)			\
+		pflog_packet_ptr(i,a,b,c,d,e,f,g,h,di);	\
+} while (0)
+#endif /* _KERNEL */
+#endif /* _NET_IF_PFLOG_H_ */
diff --git a/freebsd/sys/net/if_pfsync.h b/freebsd/sys/net/if_pfsync.h
new file mode 100644
index 00000000..5c4ba631
--- /dev/null
+++ b/freebsd/sys/net/if_pfsync.h
@@ -0,0 +1,265 @@
+/*-
+ * Copyright (c) 2001 Michael Shalayeff
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 2008 David Gwynne <dlg@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ *	$OpenBSD: if_pfsync.h,v 1.35 2008/06/29 08:42:15 mcbride Exp $
+ *	$FreeBSD$
+ */
+
+
+#ifndef _NET_IF_PFSYNC_H_
+#define	_NET_IF_PFSYNC_H_
+
+#define	PFSYNC_VERSION		5
+#define	PFSYNC_DFLTTL		255
+
+#define	PFSYNC_ACT_CLR		0	/* clear all states */
+#define	PFSYNC_ACT_INS		1	/* insert state */
+#define	PFSYNC_ACT_INS_ACK	2	/* ack of insterted state */
+#define	PFSYNC_ACT_UPD		3	/* update state */
+#define	PFSYNC_ACT_UPD_C	4	/* "compressed" update state */
+#define	PFSYNC_ACT_UPD_REQ	5	/* request "uncompressed" state */
+#define	PFSYNC_ACT_DEL		6	/* delete state */
+#define	PFSYNC_ACT_DEL_C	7	/* "compressed" delete state */
+#define	PFSYNC_ACT_INS_F	8	/* insert fragment */
+#define	PFSYNC_ACT_DEL_F	9	/* delete fragments */
+#define	PFSYNC_ACT_BUS		10	/* bulk update status */
+#define	PFSYNC_ACT_TDB		11	/* TDB replay counter update */
+#define	PFSYNC_ACT_EOF		12	/* end of frame */
+#define	PFSYNC_ACT_MAX		13
+
+/*
+ * A pfsync frame is built from a header followed by several sections which
+ * are all prefixed with their own subheaders. Frames must be terminated with
+ * an EOF subheader.
+ *
+ * | ...			|
+ * | IP header			|
+ * +============================+
+ * | pfsync_header		|
+ * +----------------------------+
+ * | pfsync_subheader		|
+ * +----------------------------+
+ * | first action fields	|
+ * | ...			|
+ * +----------------------------+
+ * | pfsync_subheader		|
+ * +----------------------------+
+ * | second action fields	|
+ * | ...			|
+ * +----------------------------+
+ * | EOF pfsync_subheader	|
+ * +----------------------------+
+ * | HMAC			|
+ * +============================+
+ */
+
+/*
+ * Frame header
+ */
+
+struct pfsync_header {
+	u_int8_t			version;
+	u_int8_t			_pad;
+	u_int16_t			len;
+	u_int8_t			pfcksum[PF_MD5_DIGEST_LENGTH];
+} __packed;
+
+/*
+ * Frame region subheader
+ */
+
+struct pfsync_subheader {
+	u_int8_t			action;
+	u_int8_t			_pad;
+	u_int16_t			count;
+} __packed;
+
+/*
+ * CLR
+ */
+
+struct pfsync_clr {
+	char				ifname[IFNAMSIZ];
+	u_int32_t			creatorid;
+} __packed;
+
+/*
+ * INS, UPD, DEL
+ */
+
+/* these use struct pfsync_state in pfvar.h */
+
+/*
+ * INS_ACK
+ */
+
+struct pfsync_ins_ack {
+	u_int64_t			id;
+	u_int32_t			creatorid;
+} __packed;
+
+/*
+ * UPD_C
+ */
+
+struct pfsync_upd_c {
+	u_int64_t			id;
+	struct pfsync_state_peer	src;
+	struct pfsync_state_peer	dst;
+	u_int32_t			creatorid;
+	u_int32_t			expire;
+	u_int8_t			timeout;
+	u_int8_t			_pad[3];
+} __packed;
+
+/*
+ * UPD_REQ
+ */
+
+struct pfsync_upd_req {
+	u_int64_t			id;
+	u_int32_t			creatorid;
+} __packed;
+
+/*
+ * DEL_C
+ */
+
+struct pfsync_del_c {
+	u_int64_t			id;
+	u_int32_t			creatorid;
+} __packed;
+
+/*
+ * INS_F, DEL_F
+ */
+
+/* not implemented (yet) */
+
+/*
+ * BUS
+ */
+
+struct pfsync_bus {
+	u_int32_t			creatorid;
+	u_int32_t			endtime;
+	u_int8_t			status;
+#define	PFSYNC_BUS_START			1
+#define	PFSYNC_BUS_END				2
+	u_int8_t			_pad[3];
+} __packed;
+
+/*
+ * TDB
+ */
+
+struct pfsync_tdb {
+	u_int32_t			spi;
+	union sockaddr_union		dst;
+	u_int32_t			rpl;
+	u_int64_t			cur_bytes;
+	u_int8_t			sproto;
+	u_int8_t			updates;
+	u_int8_t			_pad[2];
+} __packed;
+
+#define	PFSYNC_HDRLEN		sizeof(struct pfsync_header)
+
+struct pfsyncstats {
+	u_int64_t	pfsyncs_ipackets;	/* total input packets, IPv4 */
+	u_int64_t	pfsyncs_ipackets6;	/* total input packets, IPv6 */
+	u_int64_t	pfsyncs_badif;		/* not the right interface */
+	u_int64_t	pfsyncs_badttl;		/* TTL is not PFSYNC_DFLTTL */
+	u_int64_t	pfsyncs_hdrops;		/* packets shorter than hdr */
+	u_int64_t	pfsyncs_badver;		/* bad (incl unsupp) version */
+	u_int64_t	pfsyncs_badact;		/* bad action */
+	u_int64_t	pfsyncs_badlen;		/* data length does not match */
+	u_int64_t	pfsyncs_badauth;	/* bad authentication */
+	u_int64_t	pfsyncs_stale;		/* stale state */
+	u_int64_t	pfsyncs_badval;		/* bad values */
+	u_int64_t	pfsyncs_badstate;	/* insert/lookup failed */
+
+	u_int64_t	pfsyncs_opackets;	/* total output packets, IPv4 */
+	u_int64_t	pfsyncs_opackets6;	/* total output packets, IPv6 */
+	u_int64_t	pfsyncs_onomem;		/* no memory for an mbuf */
+	u_int64_t	pfsyncs_oerrors;	/* ip output error */
+
+	u_int64_t	pfsyncs_iacts[PFSYNC_ACT_MAX];
+	u_int64_t	pfsyncs_oacts[PFSYNC_ACT_MAX];
+};
+
+/*
+ * Configuration structure for SIOCSETPFSYNC SIOCGETPFSYNC
+ */
+struct pfsyncreq {
+	char		 pfsyncr_syncdev[IFNAMSIZ];
+	struct in_addr	 pfsyncr_syncpeer;
+	int		 pfsyncr_maxupdates;
+	int		 pfsyncr_defer;
+};
+
+#define	SIOCSETPFSYNC   _IOW('i', 247, struct ifreq)
+#define	SIOCGETPFSYNC   _IOWR('i', 248, struct ifreq)
+
+#ifdef _KERNEL
+
+/*
+ * this shows where a pf state is with respect to the syncing.
+ */
+#define	PFSYNC_S_INS	0x00
+#define	PFSYNC_S_IACK	0x01
+#define	PFSYNC_S_UPD	0x02
+#define	PFSYNC_S_UPD_C	0x03
+#define	PFSYNC_S_DEL	0x04
+#define	PFSYNC_S_COUNT	0x05
+
+#define	PFSYNC_S_DEFER	0xfe
+#define	PFSYNC_S_NONE	0xff
+
+#define	PFSYNC_SI_IOCTL		0x01
+#define	PFSYNC_SI_CKSUM		0x02
+#define	PFSYNC_SI_ACK		0x04
+
+#endif /* _KERNEL */
+
+#endif /* _NET_IF_PFSYNC_H_ */
diff --git a/freebsd/sys/net/if_sppp.h b/freebsd/sys/net/if_sppp.h
index 97f94b39..23a08e77 100644
--- a/freebsd/sys/net/if_sppp.h
+++ b/freebsd/sys/net/if_sppp.h
@@ -78,7 +78,7 @@ struct sauth {
 
 /*
  * Don't change the order of this.  Ordering the phases this way allows
- * for a comparision of ``pp_phase >= PHASE_AUTHENTICATE'' in order to
+ * for a comparison of ``pp_phase >= PHASE_AUTHENTICATE'' in order to
  * know whether LCP is up.
  */
 enum ppp_phase {
diff --git a/freebsd/sys/net/if_spppfr.c b/freebsd/sys/net/if_spppfr.c
index 93bbaeba..d30509d5 100644
--- a/freebsd/sys/net/if_spppfr.c
+++ b/freebsd/sys/net/if_spppfr.c
@@ -27,10 +27,9 @@
 
 #include <rtems/bsd/sys/param.h>
 
-#if defined(__FreeBSD__) && __FreeBSD__ >= 3
+#if defined(__FreeBSD__)
 #include <rtems/bsd/local/opt_inet.h>
 #include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
 #endif
 
 #ifdef NetBSD1_3
@@ -47,7 +46,7 @@
 #include <sys/sockio.h>
 #include <sys/socket.h>
 #include <sys/syslog.h>
-#if defined(__FreeBSD__) && __FreeBSD__ >= 3
+#if defined(__FreeBSD__)
 #include <sys/random.h>
 #endif
 #include <sys/malloc.h>
@@ -60,6 +59,7 @@
 #endif
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/if_types.h>
 #include <net/route.h>
@@ -86,11 +86,6 @@
 #  include <net/ethertypes.h>
 #endif
 
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
 #include <net/if_sppp.h>
 
 /*
@@ -151,7 +146,7 @@ struct arp_req {
 	unsigned short  ptarget2;
 } __packed;
 
-#if defined(__FreeBSD__) && __FreeBSD__ >= 3 && __FreeBSD_version < 501113
+#if defined(__FreeBSD__) && __FreeBSD_version < 501113
 #define	SPP_FMT		"%s%d: "
 #define	SPP_ARGS(ifp)	(ifp)->if_name, (ifp)->if_unit
 #else
@@ -257,9 +252,9 @@ bad:            m_freem (m);
 
 	switch (proto) {
 	default:
-		++ifp->if_noproto;
-drop:		++ifp->if_ierrors;
-		++ifp->if_iqdrops;
+		if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
+drop:		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+		if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 		m_freem (m);
 		return;
 #ifdef INET
@@ -267,16 +262,6 @@ drop:		++ifp->if_ierrors;
 		isr = NETISR_IP;
 		break;
 #endif
-#ifdef IPX
-	case ETHERTYPE_IPX:
-		isr = NETISR_IPX;
-		break;
-#endif
-#ifdef NETATALK
-        case ETHERTYPE_AT:
-		isr = NETISR_ATALK;
-                break;
-#endif
 	}
 
 	if (! (ifp->if_flags & IFF_UP))
@@ -306,7 +291,7 @@ struct mbuf *sppp_fr_header (struct sppp *sp, struct mbuf *m,
 
 	/* Prepend the space for Frame Relay header. */
 	hlen = (family == AF_INET) ? 4 : 10;
-	M_PREPEND (m, hlen, M_DONTWAIT);
+	M_PREPEND (m, hlen, M_NOWAIT);
 	if (! m)
 		return 0;
 	h = mtod (m, u_char*);
@@ -346,21 +331,11 @@ struct mbuf *sppp_fr_header (struct sppp *sp, struct mbuf *m,
 		h[3] = FR_IP;
 		return m;
 #endif
-#ifdef IPX
-	case AF_IPX:
-		type = ETHERTYPE_IPX;
-		break;
-#endif
 #ifdef NS
 	case AF_NS:
 		type = 0x8137;
 		break;
 #endif
-#ifdef NETATALK
-	case AF_APPLETALK:
-		type = ETHERTYPE_AT;
-		break;
-#endif
 	}
 	h[3] = FR_PADDING;
 	h[4] = FR_SNAP;
@@ -383,7 +358,7 @@ void sppp_fr_keepalive (struct sppp *sp)
 	unsigned char *h, *p;
 	struct mbuf *m;
 
-	MGETHDR (m, M_DONTWAIT, MT_DATA);
+	MGETHDR (m, M_NOWAIT, MT_DATA);
 	if (! m)
 		return;
 	m->m_pkthdr.rcvif = 0;
@@ -421,7 +396,7 @@ void sppp_fr_keepalive (struct sppp *sp)
 			(u_char) sp->pp_rseq[IDX_LCP]);
 
 	if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
-		++ifp->if_oerrors;
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 }
 
 /*
@@ -503,7 +478,7 @@ static void sppp_fr_arp (struct sppp *sp, struct arp_req *req,
 			(unsigned char) his_ip_address);
 
 	/* Send the Inverse ARP reply. */
-	MGETHDR (m, M_DONTWAIT, MT_DATA);
+	MGETHDR (m, M_NOWAIT, MT_DATA);
 	if (! m)
 		return;
 	m->m_pkthdr.len = m->m_len = 10 + sizeof (*reply);
@@ -535,7 +510,7 @@ static void sppp_fr_arp (struct sppp *sp, struct arp_req *req,
 	reply->ptarget2 = htonl (his_ip_address) >> 16;
 
 	if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
-		++ifp->if_oerrors;
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 }
 
 /*
diff --git a/freebsd/sys/net/if_spppsubr.c b/freebsd/sys/net/if_spppsubr.c
index fa6a7c1b..e7a62277 100644
--- a/freebsd/sys/net/if_spppsubr.c
+++ b/freebsd/sys/net/if_spppsubr.c
@@ -27,11 +27,12 @@
 
 #include <rtems/bsd/local/opt_inet.h>
 #include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
 
 #include <sys/systm.h>
 #include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
 #include <sys/module.h>
+#include <sys/rmlock.h>
 #include <sys/sockio.h>
 #include <sys/socket.h>
 #include <sys/syslog.h>
@@ -42,6 +43,7 @@
 #include <sys/md5.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/if_types.h>
 #include <net/route.h>
@@ -66,11 +68,6 @@
 
 #include <netinet/if_ether.h>
 
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
 #include <net/if_sppp.h>
 
 #define IOCTL_CMD_T	u_long
@@ -264,7 +261,7 @@ static const u_short interactive_ports[8] = {
 	int debug = ifp->if_flags & IFF_DEBUG
 
 static int sppp_output(struct ifnet *ifp, struct mbuf *m,
-		       struct sockaddr *dst, struct route *ro);
+	const struct sockaddr *dst, struct route *ro);
 
 static void sppp_cisco_send(struct sppp *sp, int type, long par1, long par2);
 static void sppp_cisco_input(struct sppp *sp, struct mbuf *m);
@@ -525,7 +522,7 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
 
 	if (ifp->if_flags & IFF_UP)
 		/* Count received bytes, add FCS and one flag */
-		ifp->if_ibytes += m->m_pkthdr.len + 3;
+		if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len + 3);
 
 	if (m->m_pkthdr.len <= PPP_HEADER_LEN) {
 		/* Too small packet, drop it. */
@@ -537,8 +534,8 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
 		m_freem (m);
 		SPPP_UNLOCK(sp);
 	  drop2:
-		++ifp->if_ierrors;
-		++ifp->if_iqdrops;
+		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+		if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 		return;
 	}
 
@@ -577,7 +574,7 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
 				sppp_cp_send (sp, PPP_LCP, PROTO_REJ,
 					++sp->pp_seq[IDX_LCP], m->m_pkthdr.len + 2,
 					&h->protocol);
-			++ifp->if_noproto;
+			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto drop;
 		case PPP_LCP:
 			sppp_cp_input(&lcp, sp, m);
@@ -631,7 +628,7 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
 				 * enough leading space in the existing mbuf).
 				 */
 				m_adj(m, vjlen);
-				M_PREPEND(m, hlen, M_DONTWAIT);
+				M_PREPEND(m, hlen, M_NOWAIT);
 				if (m == NULL) {
 					SPPP_UNLOCK(sp);
 					goto drop2;
@@ -673,14 +670,6 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
 			do_account++;
 			break;
 #endif
-#ifdef IPX
-		case PPP_IPX:
-			/* IPX IPXCP not implemented yet */
-			if (sp->pp_phase == PHASE_NETWORK)
-				isr = NETISR_IPX;
-			do_account++;
-			break;
-#endif
 		}
 		break;
 	case CISCO_MULTICAST:
@@ -697,7 +686,7 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
 		}
 		switch (ntohs (h->protocol)) {
 		default:
-			++ifp->if_noproto;
+			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto invalid;
 		case CISCO_KEEPALIVE:
 			sppp_cisco_input (sp, m);
@@ -716,12 +705,6 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
 			do_account++;
 			break;
 #endif
-#ifdef IPX
-		case ETHERTYPE_IPX:
-			isr = NETISR_IPX;
-			do_account++;
-			break;
-#endif
 		}
 		break;
 	default:        /* Invalid PPP packet. */
@@ -787,19 +770,18 @@ sppp_ifstart(struct ifnet *ifp)
  * Enqueue transmit packet.
  */
 static int
-sppp_output(struct ifnet *ifp, struct mbuf *m,
-	    struct sockaddr *dst, struct route *ro)
+sppp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+	struct route *ro)
 {
 	struct sppp *sp = IFP2SP(ifp);
 	struct ppp_header *h;
 	struct ifqueue *ifq = NULL;
-	int s, error, rv = 0;
+	int error, rv = 0;
 #ifdef INET
 	int ipproto = PPP_IP;
 #endif
 	int debug = ifp->if_flags & IFF_DEBUG;
 
-	s = splimp();
 	SPPP_LOCK(sp);
 
 	if (!(ifp->if_flags & IFF_UP) ||
@@ -810,7 +792,6 @@ sppp_output(struct ifnet *ifp, struct mbuf *m,
 #endif
 		m_freem (m);
 		SPPP_UNLOCK(sp);
-		splx (s);
 		return (ENETDOWN);
 	}
 
@@ -834,9 +815,7 @@ sppp_output(struct ifnet *ifp, struct mbuf *m,
 		 * to start LCP for it.
 		 */
 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
-		splx(s);
 		lcp.Open(sp);
-		s = splimp();
 	}
 
 #ifdef INET
@@ -860,7 +839,6 @@ sppp_output(struct ifnet *ifp, struct mbuf *m,
 		{
 			m_freem(m);
 			SPPP_UNLOCK(sp);
-			splx(s);
 			if(ip->ip_p == IPPROTO_TCP)
 				return(EADDRNOTAVAIL);
 			else
@@ -905,7 +883,6 @@ sppp_output(struct ifnet *ifp, struct mbuf *m,
 			default:
 				m_freem(m);
 				SPPP_UNLOCK(sp);
-				splx(s);
 				return (EINVAL);
 			}
 	}
@@ -928,14 +905,13 @@ sppp_output(struct ifnet *ifp, struct mbuf *m,
 	/*
 	 * Prepend general data packet PPP header. For now, IP only.
 	 */
-	M_PREPEND (m, PPP_HEADER_LEN, M_DONTWAIT);
+	M_PREPEND (m, PPP_HEADER_LEN, M_NOWAIT);
 	if (! m) {
 nobufs:		if (debug)
 			log(LOG_DEBUG, SPP_FMT "no memory for transmit header\n",
 				SPP_ARGS(ifp));
-		++ifp->if_oerrors;
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		SPPP_UNLOCK(sp);
-		splx (s);
 		return (ENOBUFS);
 	}
 	/*
@@ -992,17 +968,10 @@ nobufs:		if (debug)
 		}
 		break;
 #endif
-#ifdef IPX
-	case AF_IPX:     /* Novell IPX Protocol */
-		h->protocol = htons (sp->pp_mode == IFF_CISCO ?
-			ETHERTYPE_IPX : PPP_IPX);
-		break;
-#endif
 	default:
 		m_freem (m);
-		++ifp->if_oerrors;
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		SPPP_UNLOCK(sp);
-		splx (s);
 		return (EAFNOSUPPORT);
 	}
 
@@ -1016,13 +985,11 @@ out:
 	else
 		IFQ_HANDOFF_ADJ(ifp, m, 3, error);
 	if (error) {
-		++ifp->if_oerrors;
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		SPPP_UNLOCK(sp);
-		splx (s);
 		return (rv? rv: ENOBUFS);
 	}
 	SPPP_UNLOCK(sp);
-	splx (s);
 	/*
 	 * Unlike in sppp_input(), we can always bump the timestamp
 	 * here since sppp_output() is only called on behalf of
@@ -1042,7 +1009,7 @@ sppp_attach(struct ifnet *ifp)
 	mtx_init(&sp->mtx, "sppp", MTX_NETWORK_LOCK, MTX_DEF | MTX_RECURSE);
 	
 	/* Initialize keepalive handler. */
- 	callout_init(&sp->keepalive_callout, CALLOUT_MPSAFE);
+ 	callout_init(&sp->keepalive_callout, 1);
 	callout_reset(&sp->keepalive_callout, hz * 10, sppp_keepalive,
  		    (void *)sp); 
 
@@ -1074,7 +1041,7 @@ sppp_attach(struct ifnet *ifp)
 #ifdef INET6
 	sp->confflags |= CONF_ENABLE_IPV6;
 #endif
- 	callout_init(&sp->ifstart_callout, CALLOUT_MPSAFE);
+ 	callout_init(&sp->ifstart_callout, 1);
 	sp->if_start = ifp->if_start;
 	ifp->if_start = sppp_ifstart;
 	sp->pp_comp = malloc(sizeof(struct slcompress), M_TEMP, M_WAITOK);
@@ -1139,14 +1106,12 @@ int
 sppp_isempty(struct ifnet *ifp)
 {
 	struct sppp *sp = IFP2SP(ifp);
-	int empty, s;
+	int empty;
 
-	s = splimp();
 	SPPP_LOCK(sp);
 	empty = !sp->pp_fastq.ifq_head && !sp->pp_cpq.ifq_head &&
 		!SP2IFP(sp)->if_snd.ifq_head;
 	SPPP_UNLOCK(sp);
-	splx(s);
 	return (empty);
 }
 
@@ -1158,9 +1123,7 @@ sppp_dequeue(struct ifnet *ifp)
 {
 	struct sppp *sp = IFP2SP(ifp);
 	struct mbuf *m;
-	int s;
 
-	s = splimp();
 	SPPP_LOCK(sp);
 	/*
 	 * Process only the control protocol queue until we have at
@@ -1177,7 +1140,6 @@ sppp_dequeue(struct ifnet *ifp)
 			IF_DEQUEUE (&SP2IFP(sp)->if_snd, m);
 	}
 	SPPP_UNLOCK(sp);
-	splx(s);
 	return m;
 }
 
@@ -1189,9 +1151,7 @@ sppp_pick(struct ifnet *ifp)
 {
 	struct sppp *sp = IFP2SP(ifp);
 	struct mbuf *m;
-	int s;
 
-	s = splimp ();
 	SPPP_LOCK(sp);
 
 	m = sp->pp_cpq.ifq_head;
@@ -1202,7 +1162,6 @@ sppp_pick(struct ifnet *ifp)
 		if ((m = sp->pp_fastq.ifq_head) == NULL)
 			m = SP2IFP(sp)->if_snd.ifq_head;
 	SPPP_UNLOCK(sp);
-	splx (s);
 	return (m);
 }
 
@@ -1214,14 +1173,12 @@ sppp_ioctl(struct ifnet *ifp, IOCTL_CMD_T cmd, void *data)
 {
 	struct ifreq *ifr = (struct ifreq*) data;
 	struct sppp *sp = IFP2SP(ifp);
-	int s, rv, going_up, going_down, newmode;
+	int rv, going_up, going_down, newmode;
 
-	s = splimp();
 	SPPP_LOCK(sp);
 	rv = 0;
 	switch (cmd) {
 	case SIOCAIFADDR:
-	case SIOCSIFDSTADDR:
 		break;
 
 	case SIOCSIFADDR:
@@ -1322,7 +1279,6 @@ sppp_ioctl(struct ifnet *ifp, IOCTL_CMD_T cmd, void *data)
 		rv = ENOTTY;
 	}
 	SPPP_UNLOCK(sp);
-	splx(s);
 	return rv;
 }
 
@@ -1414,7 +1370,7 @@ sppp_cisco_send(struct sppp *sp, int type, long par1, long par2)
 
 	getmicrouptime(&tv);
 
-	MGETHDR (m, M_DONTWAIT, MT_DATA);
+	MGETHDR (m, M_NOWAIT, MT_DATA);
 	if (! m)
 		return;
 	m->m_pkthdr.len = m->m_len = PPP_HEADER_LEN + CISCO_PACKET_LEN;
@@ -1441,7 +1397,7 @@ sppp_cisco_send(struct sppp *sp, int type, long par1, long par2)
 			(u_long)ch->par2, (u_int)ch->rel, (u_int)ch->time0, (u_int)ch->time1);
 
 	if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
-		ifp->if_oerrors++;
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 }
 
 /*
@@ -1462,7 +1418,7 @@ sppp_cp_send(struct sppp *sp, u_short proto, u_char type,
 
 	if (len > MHLEN - PPP_HEADER_LEN - LCP_HEADER_LEN)
 		len = MHLEN - PPP_HEADER_LEN - LCP_HEADER_LEN;
-	MGETHDR (m, M_DONTWAIT, MT_DATA);
+	MGETHDR (m, M_NOWAIT, MT_DATA);
 	if (! m)
 		return;
 	m->m_pkthdr.len = m->m_len = PPP_HEADER_LEN + LCP_HEADER_LEN + len;
@@ -1490,7 +1446,7 @@ sppp_cp_send(struct sppp *sp, u_short proto, u_char type,
 		log(-1, ">\n");
 	}
 	if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
-		ifp->if_oerrors++;
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 }
 
 /*
@@ -1532,7 +1488,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
 				log(-1, SPP_FMT "%s invalid conf-req length %d\n",
 				       SPP_ARGS(ifp), cp->name,
 				       len);
-			++ifp->if_ierrors;
+			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			break;
 		}
 		/* handle states where RCR doesn't get a SCA/SCN */
@@ -1588,7 +1544,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
 			       SPP_ARGS(ifp), cp->name,
 			       sppp_cp_type_name(h->type),
 			       sppp_state_name(sp->state[cp->protoidx]));
-			++ifp->if_ierrors;
+			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		}
 		break;
 	case CONF_ACK:
@@ -1597,7 +1553,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
 				log(-1, SPP_FMT "%s id mismatch 0x%x != 0x%x\n",
 				       SPP_ARGS(ifp), cp->name,
 				       h->ident, sp->confid[cp->protoidx]);
-			++ifp->if_ierrors;
+			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			break;
 		}
 		switch (sp->state[cp->protoidx]) {
@@ -1632,7 +1588,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
 			       SPP_ARGS(ifp), cp->name,
 			       sppp_cp_type_name(h->type),
 			       sppp_state_name(sp->state[cp->protoidx]));
-			++ifp->if_ierrors;
+			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		}
 		break;
 	case CONF_NAK:
@@ -1642,7 +1598,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
 				log(-1, SPP_FMT "%s id mismatch 0x%x != 0x%x\n",
 				       SPP_ARGS(ifp), cp->name,
 				       h->ident, sp->confid[cp->protoidx]);
-			++ifp->if_ierrors;
+			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			break;
 		}
 		if (h->type == CONF_NAK)
@@ -1682,7 +1638,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
 			       SPP_ARGS(ifp), cp->name,
 			       sppp_cp_type_name(h->type),
 			       sppp_state_name(sp->state[cp->protoidx]));
-			++ifp->if_ierrors;
+			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		}
 		break;
 
@@ -1715,7 +1671,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
 			       SPP_ARGS(ifp), cp->name,
 			       sppp_cp_type_name(h->type),
 			       sppp_state_name(sp->state[cp->protoidx]));
-			++ifp->if_ierrors;
+			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		}
 		break;
 	case TERM_ACK:
@@ -1746,7 +1702,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
 			       SPP_ARGS(ifp), cp->name,
 			       sppp_cp_type_name(h->type),
 			       sppp_state_name(sp->state[cp->protoidx]));
-			++ifp->if_ierrors;
+			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		}
 		break;
 	case CODE_REJ:
@@ -1773,7 +1729,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
 			       SPP_ARGS(ifp), cp->name,
 			       sppp_cp_type_name(h->type),
 			       sppp_state_name(sp->state[cp->protoidx]));
-			++ifp->if_ierrors;
+			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		}
 		break;
 	case PROTO_REJ:
@@ -1832,7 +1788,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
 			       SPP_ARGS(ifp), cp->name,
 			       sppp_cp_type_name(h->type),
 			       sppp_state_name(sp->state[cp->protoidx]));
-			++ifp->if_ierrors;
+			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		}
 		break;
 	    }
@@ -1848,7 +1804,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
 			if (debug)
 				log(-1, SPP_FMT "lcp echo req but lcp closed\n",
 				       SPP_ARGS(ifp));
-			++ifp->if_ierrors;
+			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			break;
 		}
 		if (len < 8) {
@@ -1882,7 +1838,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
 		if (cp->proto != PPP_LCP)
 			goto illegal;
 		if (h->ident != sp->lcp.echoid) {
-			++ifp->if_ierrors;
+			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			break;
 		}
 		if (len < 8) {
@@ -1907,7 +1863,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
 			       SPP_ARGS(ifp), cp->name, h->type);
 		sppp_cp_send(sp, cp->proto, CODE_REJ,
 			     ++sp->pp_seq[cp->protoidx], m->m_pkthdr.len, h);
-		++ifp->if_ierrors;
+		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 	}
 }
 
@@ -2072,9 +2028,7 @@ static void
 sppp_to_event(const struct cp *cp, struct sppp *sp)
 {
 	STDDCL;
-	int s;
 
-	s = splimp();
 	SPPP_LOCK(sp);
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "%s TO(%s) rst_counter = %d\n",
@@ -2124,7 +2078,6 @@ sppp_to_event(const struct cp *cp, struct sppp *sp)
 		}
 
 	SPPP_UNLOCK(sp);
-	splx(s);
 }
 
 /*
@@ -2196,7 +2149,7 @@ sppp_lcp_init(struct sppp *sp)
 	sp->lcp.max_terminate = 2;
 	sp->lcp.max_configure = 10;
 	sp->lcp.max_failure = 10;
- 	callout_init(&sp->ch[IDX_LCP], CALLOUT_MPSAFE);
+ 	callout_init(&sp->ch[IDX_LCP], 1);
 }
 
 static void
@@ -2887,7 +2840,7 @@ sppp_ipcp_init(struct sppp *sp)
 	sp->fail_counter[IDX_IPCP] = 0;
 	sp->pp_seq[IDX_IPCP] = 0;
 	sp->pp_rseq[IDX_IPCP] = 0;
- 	callout_init(&sp->ch[IDX_IPCP], CALLOUT_MPSAFE);
+ 	callout_init(&sp->ch[IDX_IPCP], 1);
 }
 
 static void
@@ -3011,7 +2964,7 @@ sppp_ipcp_RCR(struct sppp *sp, struct lcp_header *h, int len)
 			 * since our algorithm always uses the
 			 * original option to NAK it with new values,
 			 * things would become more complicated.  In
-			 * pratice, the only commonly implemented IP
+			 * practice, the only commonly implemented IP
 			 * compression option is VJ anyway, so the
 			 * difference is negligible.
 			 */
@@ -3446,7 +3399,7 @@ sppp_ipv6cp_init(struct sppp *sp)
 	sp->fail_counter[IDX_IPV6CP] = 0;
 	sp->pp_seq[IDX_IPV6CP] = 0;
 	sp->pp_rseq[IDX_IPV6CP] = 0;
- 	callout_init(&sp->ch[IDX_IPV6CP], CALLOUT_MPSAFE);
+ 	callout_init(&sp->ch[IDX_IPV6CP], 1);
 }
 
 static void
@@ -4027,7 +3980,7 @@ sppp_chap_input(struct sppp *sp, struct mbuf *m)
 {
 	STDDCL;
 	struct lcp_header *h;
-	int len, x;
+	int len;
 	u_char *value, *name, digest[AUTHKEYLEN], dsize;
 	int value_len, name_len;
 	MD5_CTX ctx;
@@ -4104,7 +4057,6 @@ sppp_chap_input(struct sppp *sp, struct mbuf *m)
 			}
 			log(-1, "\n");
 		}
-		x = splimp();
 		SPPP_LOCK(sp);
 		sp->pp_flags &= ~PP_NEEDAUTH;
 		if (sp->myauth.proto == PPP_CHAP &&
@@ -4116,11 +4068,9 @@ sppp_chap_input(struct sppp *sp, struct mbuf *m)
 			 * to network phase.
 			 */
 			SPPP_UNLOCK(sp);
-			splx(x);
 			break;
 		}
 		SPPP_UNLOCK(sp);
-		splx(x);
 		sppp_phase_network(sp);
 		break;
 
@@ -4254,7 +4204,7 @@ sppp_chap_init(struct sppp *sp)
 	sp->fail_counter[IDX_CHAP] = 0;
 	sp->pp_seq[IDX_CHAP] = 0;
 	sp->pp_rseq[IDX_CHAP] = 0;
- 	callout_init(&sp->ch[IDX_CHAP], CALLOUT_MPSAFE);
+ 	callout_init(&sp->ch[IDX_CHAP], 1);
 }
 
 static void
@@ -4282,9 +4232,7 @@ sppp_chap_TO(void *cookie)
 {
 	struct sppp *sp = (struct sppp *)cookie;
 	STDDCL;
-	int s;
 
-	s = splimp();
 	SPPP_LOCK(sp);
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "chap TO(%s) rst_counter = %d\n",
@@ -4315,14 +4263,13 @@ sppp_chap_TO(void *cookie)
 		}
 
 	SPPP_UNLOCK(sp);
-	splx(s);
 }
 
 static void
 sppp_chap_tlu(struct sppp *sp)
 {
 	STDDCL;
-	int i, x;
+	int i;
 
 	i = 0;
 	sp->rst_counter[IDX_CHAP] = sp->lcp.max_configure;
@@ -4350,10 +4297,9 @@ sppp_chap_tlu(struct sppp *sp)
 		if ((sp->hisauth.flags & AUTHFLAG_NORECHALLENGE) == 0)
 			log(-1, "next re-challenge in %d seconds\n", i);
 		else
-			log(-1, "re-challenging supressed\n");
+			log(-1, "re-challenging suppressed\n");
 	}
 
-	x = splimp();
 	SPPP_LOCK(sp);
 	/* indicate to LCP that we need to be closed down */
 	sp->lcp.protos |= (1 << IDX_CHAP);
@@ -4365,11 +4311,9 @@ sppp_chap_tlu(struct sppp *sp)
 		 * phase.
 		 */
 		SPPP_UNLOCK(sp);
-		splx(x);
 		return;
 	}
 	SPPP_UNLOCK(sp);
-	splx(x);
 
 	/*
 	 * If we are already in phase network, we are done here.  This
@@ -4438,7 +4382,7 @@ sppp_pap_input(struct sppp *sp, struct mbuf *m)
 {
 	STDDCL;
 	struct lcp_header *h;
-	int len, x;
+	int len;
 	u_char *name, *passwd, mlen;
 	int name_len, passwd_len;
 
@@ -4525,7 +4469,6 @@ sppp_pap_input(struct sppp *sp, struct mbuf *m)
 			}
 			log(-1, "\n");
 		}
-		x = splimp();
 		SPPP_LOCK(sp);
 		sp->pp_flags &= ~PP_NEEDAUTH;
 		if (sp->myauth.proto == PPP_PAP &&
@@ -4537,11 +4480,9 @@ sppp_pap_input(struct sppp *sp, struct mbuf *m)
 			 * to network phase.
 			 */
 			SPPP_UNLOCK(sp);
-			splx(x);
 			break;
 		}
 		SPPP_UNLOCK(sp);
-		splx(x);
 		sppp_phase_network(sp);
 		break;
 
@@ -4585,8 +4526,8 @@ sppp_pap_init(struct sppp *sp)
 	sp->fail_counter[IDX_PAP] = 0;
 	sp->pp_seq[IDX_PAP] = 0;
 	sp->pp_rseq[IDX_PAP] = 0;
- 	callout_init(&sp->ch[IDX_PAP], CALLOUT_MPSAFE);
- 	callout_init(&sp->pap_my_to_ch, CALLOUT_MPSAFE);
+ 	callout_init(&sp->ch[IDX_PAP], 1);
+ 	callout_init(&sp->pap_my_to_ch, 1);
 }
 
 static void
@@ -4622,9 +4563,7 @@ sppp_pap_TO(void *cookie)
 {
 	struct sppp *sp = (struct sppp *)cookie;
 	STDDCL;
-	int s;
 
-	s = splimp();
 	SPPP_LOCK(sp);
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "pap TO(%s) rst_counter = %d\n",
@@ -4650,7 +4589,6 @@ sppp_pap_TO(void *cookie)
 		}
 
 	SPPP_UNLOCK(sp);
-	splx(s);
 }
 
 /*
@@ -4677,7 +4615,6 @@ static void
 sppp_pap_tlu(struct sppp *sp)
 {
 	STDDCL;
-	int x;
 
 	sp->rst_counter[IDX_PAP] = sp->lcp.max_configure;
 
@@ -4685,7 +4622,6 @@ sppp_pap_tlu(struct sppp *sp)
 		log(LOG_DEBUG, SPP_FMT "%s tlu\n",
 		    SPP_ARGS(ifp), pap.name);
 
-	x = splimp();
 	SPPP_LOCK(sp);
 	/* indicate to LCP that we need to be closed down */
 	sp->lcp.protos |= (1 << IDX_PAP);
@@ -4697,11 +4633,9 @@ sppp_pap_tlu(struct sppp *sp)
 		 * phase.
 		 */
 		SPPP_UNLOCK(sp);
-		splx(x);
 		return;
 	}
 	SPPP_UNLOCK(sp);
-	splx(x);
 	sppp_phase_network(sp);
 }
 
@@ -4766,7 +4700,7 @@ sppp_auth_send(const struct cp *cp, struct sppp *sp,
 	const char *msg;
 	va_list ap;
 
-	MGETHDR (m, M_DONTWAIT, MT_DATA);
+	MGETHDR (m, M_NOWAIT, MT_DATA);
 	if (! m)
 		return;
 	m->m_pkthdr.rcvif = 0;
@@ -4810,7 +4744,7 @@ sppp_auth_send(const struct cp *cp, struct sppp *sp,
 		log(-1, ">\n");
 	}
 	if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
-		ifp->if_oerrors++;
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 }
 
 /*
@@ -4823,7 +4757,7 @@ sppp_qflush(struct ifqueue *ifq)
 
 	n = ifq->ifq_head;
 	while ((m = n)) {
-		n = m->m_act;
+		n = m->m_nextpkt;
 		m_freem (m);
 	}
 	ifq->ifq_head = 0;
@@ -4839,9 +4773,7 @@ sppp_keepalive(void *dummy)
 {
 	struct sppp *sp = (struct sppp*)dummy;
 	struct ifnet *ifp = SP2IFP(sp);
-	int s;
 
-	s = splimp();
 	SPPP_LOCK(sp);
 	/* Keepalive mode disabled or channel down? */
 	if (! (sp->pp_flags & PP_KEEPALIVE) ||
@@ -4884,7 +4816,6 @@ sppp_keepalive(void *dummy)
 	}
 out:
 	SPPP_UNLOCK(sp);
-	splx(s);
  	callout_reset(&sp->keepalive_callout, hz * 10, sppp_keepalive,
 		      (void *)sp);
 }
@@ -4906,7 +4837,7 @@ sppp_get_ip_addrs(struct sppp *sp, u_long *src, u_long *dst, u_long *srcmask)
 	 * Pick the first AF_INET address from the list,
 	 * aliases don't make any sense on a p2p link anyway.
 	 */
-	si = 0;
+	si = NULL;
 	if_addr_rlock(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (ifa->ifa_addr->sa_family == AF_INET) {
@@ -4934,7 +4865,7 @@ sppp_get_ip_addrs(struct sppp *sp, u_long *src, u_long *dst, u_long *srcmask)
 
 #ifdef INET
 /*
- * Set my IP address.  Must be called at splimp.
+ * Set my IP address.
  */
 static void
 sppp_set_ip_addr(struct sppp *sp, u_long src)
@@ -4948,7 +4879,7 @@ sppp_set_ip_addr(struct sppp *sp, u_long src)
 	 * Pick the first AF_INET address from the list,
 	 * aliases don't make any sense on a p2p link anyway.
 	 */
-	si = 0;
+	si = NULL;
 	if_addr_rlock(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family == AF_INET) {
@@ -5051,7 +4982,7 @@ sppp_gen_ip6_addr(struct sppp *sp, struct in6_addr *addr)
 }
 
 /*
- * Set my IPv6 address.  Must be called at splimp.
+ * Set my IPv6 address.
  */
 static void
 sppp_set_ip6_addr(struct sppp *sp, const struct in6_addr *src)
@@ -5126,14 +5057,15 @@ sppp_params(struct sppp *sp, u_long cmd, void *data)
 	struct spppreq *spr;
 	int rv = 0;
 
-	if ((spr = malloc(sizeof(struct spppreq), M_TEMP, M_NOWAIT)) == 0)
+	if ((spr = malloc(sizeof(struct spppreq), M_TEMP, M_NOWAIT)) == NULL)
 		return (EAGAIN);
 	/*
 	 * ifr->ifr_data is supposed to point to a struct spppreq.
 	 * Check the cmd word first before attempting to fetch all the
 	 * data.
 	 */
-	if ((subcmd = fuword(ifr->ifr_data)) == -1) {
+	rv = fueword(ifr->ifr_data, &subcmd);
+	if (rv == -1) {
 		rv = EFAULT;
 		goto quit;
 	}
diff --git a/freebsd/sys/net/if_stf.c b/freebsd/sys/net/if_stf.c
index e88fd34d..7c1b7075 100644
--- a/freebsd/sys/net/if_stf.c
+++ b/freebsd/sys/net/if_stf.c
@@ -76,9 +76,6 @@
  * Note that there is no way to be 100% secure.
  */
 
-#include <rtems/bsd/local/opt_inet.h>
-#include <rtems/bsd/local/opt_inet6.h>
-
 #include <rtems/bsd/sys/param.h>
 #include <sys/systm.h>
 #include <sys/socket.h>
@@ -86,24 +83,27 @@
 #include <sys/mbuf.h>
 #include <rtems/bsd/sys/errno.h>
 #include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
 #include <sys/module.h>
 #include <sys/protosw.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
+#include <sys/rmlock.h>
 #include <sys/sysctl.h>
 #include <machine/cpu.h>
 
 #include <sys/malloc.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/route.h>
 #include <net/netisr.h>
 #include <net/if_types.h>
-#include <net/if_stf.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
+#include <netinet/in_fib.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
@@ -125,16 +125,10 @@
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, IFT_STF, stf, CTLFLAG_RW, 0, "6to4 Interface");
 
-static int stf_route_cache = 1;
-SYSCTL_INT(_net_link_stf, OID_AUTO, route_cache, CTLFLAG_RW,
-    &stf_route_cache, 0, "Caching of IPv4 routes for 6to4 Output");
-
 static int stf_permit_rfc1918 = 0;
-TUNABLE_INT("net.link.stf.permit_rfc1918", &stf_permit_rfc1918);
-SYSCTL_INT(_net_link_stf, OID_AUTO, permit_rfc1918, CTLFLAG_RW | CTLFLAG_TUN,
+SYSCTL_INT(_net_link_stf, OID_AUTO, permit_rfc1918, CTLFLAG_RWTUN,
     &stf_permit_rfc1918, 0, "Permit the use of private IPv4 addresses");
 
-#define STFNAME		"stf"
 #define STFUNIT		0
 
 #define IN6_IS_ADDR_6TO4(x)	(ntohs((x)->s6_addr16[0]) == 0x2002)
@@ -143,36 +137,34 @@ SYSCTL_INT(_net_link_stf, OID_AUTO, permit_rfc1918, CTLFLAG_RW | CTLFLAG_TUN,
  * XXX: Return a pointer with 16-bit aligned.  Don't cast it to
  * struct in_addr *; use bcopy() instead.
  */
-#define GET_V4(x)	((caddr_t)(&(x)->s6_addr16[1]))
+#define GET_V4(x)	(&(x)->s6_addr16[1])
 
 struct stf_softc {
 	struct ifnet	*sc_ifp;
-	union {
-		struct route  __sc_ro4;
-		struct route_in6 __sc_ro6; /* just for safety */
-	} __sc_ro46;
-#define sc_ro	__sc_ro46.__sc_ro4
 	struct mtx	sc_ro_mtx;
 	u_int	sc_fibnum;
 	const struct encaptab *encap_cookie;
 };
 #define STF2IFP(sc)	((sc)->sc_ifp)
 
+static const char stfname[] = "stf";
+
 /*
  * Note that mutable fields in the softc are not currently locked.
  * We do lock sc_ro in stf_output though.
  */
-static MALLOC_DEFINE(M_STF, STFNAME, "6to4 Tunnel Interface");
+static MALLOC_DEFINE(M_STF, stfname, "6to4 Tunnel Interface");
 static const int ip_stf_ttl = 40;
 
 extern  struct domain inetdomain;
-struct protosw in_stf_protosw = {
+static int in_stf_input(struct mbuf **, int *, int);
+static struct protosw in_stf_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_IPV6,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		in_stf_input,
-	.pr_output =		(pr_output_t *)rip_output,
+	.pr_output =		rip_output,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 };
@@ -181,22 +173,20 @@ static char *stfnames[] = {"stf0", "stf", "6to4", NULL};
 
 static int stfmodevent(module_t, int, void *);
 static int stf_encapcheck(const struct mbuf *, int, int, void *);
-static struct in6_ifaddr *stf_getsrcifa6(struct ifnet *);
-static int stf_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+static int stf_getsrcifa6(struct ifnet *, struct in6_addr *, struct in6_addr *);
+static int stf_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
 	struct route *);
 static int isrfc1918addr(struct in_addr *);
 static int stf_checkaddr4(struct stf_softc *, struct in_addr *,
 	struct ifnet *);
 static int stf_checkaddr6(struct stf_softc *, struct in6_addr *,
 	struct ifnet *);
-static void stf_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
 static int stf_ioctl(struct ifnet *, u_long, caddr_t);
 
 static int stf_clone_match(struct if_clone *, const char *);
 static int stf_clone_create(struct if_clone *, char *, size_t, caddr_t);
 static int stf_clone_destroy(struct if_clone *, struct ifnet *);
-struct if_clone stf_cloner = IFC_CLONE_INITIALIZER(STFNAME, NULL, 0,
-    NULL, stf_clone_match, stf_clone_create, stf_clone_destroy);
+static struct if_clone *stf_cloner;
 
 static int
 stf_clone_match(struct if_clone *ifc, const char *name)
@@ -247,7 +237,7 @@ stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 	 * we don't conform to the default naming convention for interfaces.
 	 */
 	strlcpy(ifp->if_xname, name, IFNAMSIZ);
-	ifp->if_dname = ifc->ifc_name;
+	ifp->if_dname = stfname;
 	ifp->if_dunit = IF_DUNIT_NONE;
 
 	mtx_init(&(sc)->sc_ro_mtx, "stf ro", NULL, MTX_DEF);
@@ -289,18 +279,16 @@ stf_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
 }
 
 static int
-stfmodevent(mod, type, data)
-	module_t mod;
-	int type;
-	void *data;
+stfmodevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
-		if_clone_attach(&stf_cloner);
+		stf_cloner = if_clone_advanced(stfname, 0, stf_clone_match,
+		    stf_clone_create, stf_clone_destroy);
 		break;
 	case MOD_UNLOAD:
-		if_clone_detach(&stf_cloner);
+		if_clone_detach(stf_cloner);
 		break;
 	default:
 		return (EOPNOTSUPP);
@@ -318,16 +306,12 @@ static moduledata_t stf_mod = {
 DECLARE_MODULE(if_stf, stf_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 
 static int
-stf_encapcheck(m, off, proto, arg)
-	const struct mbuf *m;
-	int off;
-	int proto;
-	void *arg;
+stf_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
 {
 	struct ip ip;
-	struct in6_ifaddr *ia6;
 	struct stf_softc *sc;
 	struct in_addr a, b, mask;
+	struct in6_addr addr6, mask6;
 
 	sc = (struct stf_softc *)arg;
 	if (sc == NULL)
@@ -349,20 +333,16 @@ stf_encapcheck(m, off, proto, arg)
 	if (ip.ip_v != 4)
 		return 0;
 
-	ia6 = stf_getsrcifa6(STF2IFP(sc));
-	if (ia6 == NULL)
-		return 0;
+	if (stf_getsrcifa6(STF2IFP(sc), &addr6, &mask6) != 0)
+		return (0);
 
 	/*
 	 * check if IPv4 dst matches the IPv4 address derived from the
 	 * local 6to4 address.
 	 * success on: dst = 10.1.1.1, ia6->ia_addr = 2002:0a01:0101:...
 	 */
-	if (bcmp(GET_V4(&ia6->ia_addr.sin6_addr), &ip.ip_dst,
-	    sizeof(ip.ip_dst)) != 0) {
-		ifa_free(&ia6->ia_ifa);
+	if (bcmp(GET_V4(&addr6), &ip.ip_dst, sizeof(ip.ip_dst)) != 0)
 		return 0;
-	}
 
 	/*
 	 * check if IPv4 src matches the IPv4 address derived from the
@@ -371,9 +351,8 @@ stf_encapcheck(m, off, proto, arg)
 	 * fail on: src = 10.1.1.1, ia6->ia_addr = 2002:0b00:.../24
 	 */
 	bzero(&a, sizeof(a));
-	bcopy(GET_V4(&ia6->ia_addr.sin6_addr), &a, sizeof(a));
-	bcopy(GET_V4(&ia6->ia_prefixmask.sin6_addr), &mask, sizeof(mask));
-	ifa_free(&ia6->ia_ifa);
+	bcopy(GET_V4(&addr6), &a, sizeof(a));
+	bcopy(GET_V4(&mask6), &mask, sizeof(mask));
 	a.s_addr &= mask.s_addr;
 	b = ip.ip_src;
 	b.s_addr &= mask.s_addr;
@@ -384,12 +363,12 @@ stf_encapcheck(m, off, proto, arg)
 	return 32;
 }
 
-static struct in6_ifaddr *
-stf_getsrcifa6(ifp)
-	struct ifnet *ifp;
+static int
+stf_getsrcifa6(struct ifnet *ifp, struct in6_addr *addr, struct in6_addr *mask)
 {
 	struct ifaddr *ia;
 	struct in_ifaddr *ia4;
+	struct in6_ifaddr *ia6;
 	struct sockaddr_in6 *sin6;
 	struct in_addr in;
 
@@ -408,33 +387,30 @@ stf_getsrcifa6(ifp)
 		if (ia4 == NULL)
 			continue;
 
-		ifa_ref(ia);
+		ia6 = (struct in6_ifaddr *)ia;
+
+		*addr = sin6->sin6_addr;
+		*mask = ia6->ia_prefixmask.sin6_addr;
 		if_addr_runlock(ifp);
-		return (struct in6_ifaddr *)ia;
+		return (0);
 	}
 	if_addr_runlock(ifp);
 
-	return NULL;
+	return (ENOENT);
 }
 
 static int
-stf_output(ifp, m, dst, ro)
-	struct ifnet *ifp;
-	struct mbuf *m;
-	struct sockaddr *dst;
-	struct route *ro;
+stf_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+    struct route *ro)
 {
 	struct stf_softc *sc;
-	struct sockaddr_in6 *dst6;
-	struct route *cached_route;
+	const struct sockaddr_in6 *dst6;
 	struct in_addr in4;
-	caddr_t ptr;
-	struct sockaddr_in *dst4;
+	const void *ptr;
 	u_int8_t tos;
 	struct ip *ip;
 	struct ip6_hdr *ip6;
-	struct in6_ifaddr *ia6;
-	u_int32_t af;
+	struct in6_addr addr6, mask6;
 	int error;
 
 #ifdef MAC
@@ -446,12 +422,12 @@ stf_output(ifp, m, dst, ro)
 #endif
 
 	sc = ifp->if_softc;
-	dst6 = (struct sockaddr_in6 *)dst;
+	dst6 = (const struct sockaddr_in6 *)dst;
 
 	/* just in case */
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		m_freem(m);
-		ifp->if_oerrors++;
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		return ENETDOWN;
 	}
 
@@ -460,18 +436,16 @@ stf_output(ifp, m, dst, ro)
 	 * we shouldn't generate output.  Without this check, we'll end up
 	 * using wrong IPv4 source.
 	 */
-	ia6 = stf_getsrcifa6(ifp);
-	if (ia6 == NULL) {
+	if (stf_getsrcifa6(ifp, &addr6, &mask6) != 0) {
 		m_freem(m);
-		ifp->if_oerrors++;
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		return ENETDOWN;
 	}
 
 	if (m->m_len < sizeof(*ip6)) {
 		m = m_pullup(m, sizeof(*ip6));
 		if (!m) {
-			ifa_free(&ia6->ia_ifa);
-			ifp->if_oerrors++;
+			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			return ENOBUFS;
 		}
 	}
@@ -479,15 +453,6 @@ stf_output(ifp, m, dst, ro)
 	tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
 
 	/*
-	 * BPF writes need to be handled specially.
-	 * This is a null operation, nothing here checks dst->sa_family.
-	 */
-	if (dst->sa_family == AF_UNSPEC) {
-		bcopy(dst->sa_data, &af, sizeof(af));
-		dst->sa_family = af;
-	}
-
-	/*
 	 * Pickup the right outer dst addr from the list of candidates.
 	 * ip6_dst has priority as it may be able to give us shorter IPv4 hops.
 	 */
@@ -497,9 +462,8 @@ stf_output(ifp, m, dst, ro)
 	else if (IN6_IS_ADDR_6TO4(&dst6->sin6_addr))
 		ptr = GET_V4(&dst6->sin6_addr);
 	else {
-		ifa_free(&ia6->ia_ifa);
 		m_freem(m);
-		ifp->if_oerrors++;
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		return ENETUNREACH;
 	}
 	bcopy(ptr, &in4, sizeof(in4));
@@ -512,78 +476,38 @@ stf_output(ifp, m, dst, ro)
 		 * will only read from the mbuf (i.e., it won't
 		 * try to free it or keep a pointer a to it).
 		 */
-		af = AF_INET6;
+		u_int af = AF_INET6;
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
 	}
 
-	M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
+	M_PREPEND(m, sizeof(struct ip), M_NOWAIT);
 	if (m == NULL) {
-		ifa_free(&ia6->ia_ifa);
-		ifp->if_oerrors++;
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		return ENOBUFS;
 	}
 	ip = mtod(m, struct ip *);
 
 	bzero(ip, sizeof(*ip));
 
-	bcopy(GET_V4(&((struct sockaddr_in6 *)&ia6->ia_addr)->sin6_addr),
-	    &ip->ip_src, sizeof(ip->ip_src));
-	ifa_free(&ia6->ia_ifa);
+	bcopy(GET_V4(&addr6), &ip->ip_src, sizeof(ip->ip_src));
 	bcopy(&in4, &ip->ip_dst, sizeof(ip->ip_dst));
 	ip->ip_p = IPPROTO_IPV6;
 	ip->ip_ttl = ip_stf_ttl;
-	ip->ip_len = m->m_pkthdr.len;	/*host order*/
+	ip->ip_len = htons(m->m_pkthdr.len);
 	if (ifp->if_flags & IFF_LINK1)
 		ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
 	else
 		ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
 
-	if (!stf_route_cache) {
-		cached_route = NULL;
-		goto sendit;
-	}
-
-	/*
-	 * Do we have a cached route?
-	 */
-	mtx_lock(&(sc)->sc_ro_mtx);
-	dst4 = (struct sockaddr_in *)&sc->sc_ro.ro_dst;
-	if (dst4->sin_family != AF_INET ||
-	    bcmp(&dst4->sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)) != 0) {
-		/* cache route doesn't match */
-		dst4->sin_family = AF_INET;
-		dst4->sin_len = sizeof(struct sockaddr_in);
-		bcopy(&ip->ip_dst, &dst4->sin_addr, sizeof(dst4->sin_addr));
-		if (sc->sc_ro.ro_rt) {
-			RTFREE(sc->sc_ro.ro_rt);
-			sc->sc_ro.ro_rt = NULL;
-		}
-	}
-
-	if (sc->sc_ro.ro_rt == NULL) {
-		rtalloc_fib(&sc->sc_ro, sc->sc_fibnum);
-		if (sc->sc_ro.ro_rt == NULL) {
-			m_freem(m);
-			mtx_unlock(&(sc)->sc_ro_mtx);
-			ifp->if_oerrors++;
-			return ENETUNREACH;
-		}
-	}
-	cached_route = &sc->sc_ro;
-
-sendit:
 	M_SETFIB(m, sc->sc_fibnum);
-	ifp->if_opackets++;
-	error = ip_output(m, NULL, cached_route, 0, NULL, NULL);
+	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+	error = ip_output(m, NULL, NULL, 0, NULL, NULL);
 
-	if (cached_route != NULL)
-		mtx_unlock(&(sc)->sc_ro_mtx);
 	return error;
 }
 
 static int
-isrfc1918addr(in)
-	struct in_addr *in;
+isrfc1918addr(struct in_addr *in)
 {
 	/*
 	 * returns 1 if private address range:
@@ -599,11 +523,9 @@ isrfc1918addr(in)
 }
 
 static int
-stf_checkaddr4(sc, in, inifp)
-	struct stf_softc *sc;
-	struct in_addr *in;
-	struct ifnet *inifp;	/* incoming interface */
+stf_checkaddr4(struct stf_softc *sc, struct in_addr *in, struct ifnet *inifp)
 {
+	struct rm_priotracker in_ifa_tracker;
 	struct in_ifaddr *ia4;
 
 	/*
@@ -627,54 +549,35 @@ stf_checkaddr4(sc, in, inifp)
 	/*
 	 * reject packets with broadcast
 	 */
-	IN_IFADDR_RLOCK();
-	for (ia4 = TAILQ_FIRST(&V_in_ifaddrhead);
-	     ia4;
-	     ia4 = TAILQ_NEXT(ia4, ia_link))
-	{
+	IN_IFADDR_RLOCK(&in_ifa_tracker);
+	TAILQ_FOREACH(ia4, &V_in_ifaddrhead, ia_link) {
 		if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
 			continue;
 		if (in->s_addr == ia4->ia_broadaddr.sin_addr.s_addr) {
-			IN_IFADDR_RUNLOCK();
+			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 			return -1;
 		}
 	}
-	IN_IFADDR_RUNLOCK();
+	IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 
 	/*
 	 * perform ingress filter
 	 */
 	if (sc && (STF2IFP(sc)->if_flags & IFF_LINK2) == 0 && inifp) {
-		struct sockaddr_in sin;
-		struct rtentry *rt;
-
-		bzero(&sin, sizeof(sin));
-		sin.sin_family = AF_INET;
-		sin.sin_len = sizeof(struct sockaddr_in);
-		sin.sin_addr = *in;
-		rt = rtalloc1_fib((struct sockaddr *)&sin, 0,
-		    0UL, sc->sc_fibnum);
-		if (!rt || rt->rt_ifp != inifp) {
-#if 0
-			log(LOG_WARNING, "%s: packet from 0x%x dropped "
-			    "due to ingress filter\n", if_name(STF2IFP(sc)),
-			    (u_int32_t)ntohl(sin.sin_addr.s_addr));
-#endif
-			if (rt)
-				RTFREE_LOCKED(rt);
-			return -1;
-		}
-		RTFREE_LOCKED(rt);
+		struct nhop4_basic nh4;
+
+		if (fib4_lookup_nh_basic(sc->sc_fibnum, *in, 0, 0, &nh4) != 0)
+			return (-1);
+
+		if (nh4.nh_ifp != inifp)
+			return (-1);
 	}
 
 	return 0;
 }
 
 static int
-stf_checkaddr6(sc, in6, inifp)
-	struct stf_softc *sc;
-	struct in6_addr *in6;
-	struct ifnet *inifp;	/* incoming interface */
+stf_checkaddr6(struct stf_softc *sc, struct in6_addr *in6, struct ifnet *inifp)
 {
 	/*
 	 * check 6to4 addresses
@@ -697,23 +600,23 @@ stf_checkaddr6(sc, in6, inifp)
 	return 0;
 }
 
-void
-in_stf_input(m, off)
-	struct mbuf *m;
-	int off;
+static int
+in_stf_input(struct mbuf **mp, int *offp, int proto)
 {
-	int proto;
 	struct stf_softc *sc;
 	struct ip *ip;
 	struct ip6_hdr *ip6;
+	struct mbuf *m;
 	u_int8_t otos, itos;
 	struct ifnet *ifp;
+	int off;
 
-	proto = mtod(m, struct ip *)->ip_p;
+	m = *mp;
+	off = *offp;
 
 	if (proto != IPPROTO_IPV6) {
 		m_freem(m);
-		return;
+		return (IPPROTO_DONE);
 	}
 
 	ip = mtod(m, struct ip *);
@@ -722,7 +625,7 @@ in_stf_input(m, off)
 
 	if (sc == NULL || (STF2IFP(sc)->if_flags & IFF_UP) == 0) {
 		m_freem(m);
-		return;
+		return (IPPROTO_DONE);
 	}
 
 	ifp = STF2IFP(sc);
@@ -738,7 +641,7 @@ in_stf_input(m, off)
 	if (stf_checkaddr4(sc, &ip->ip_dst, NULL) < 0 ||
 	    stf_checkaddr4(sc, &ip->ip_src, m->m_pkthdr.rcvif) < 0) {
 		m_freem(m);
-		return;
+		return (IPPROTO_DONE);
 	}
 
 	otos = ip->ip_tos;
@@ -747,7 +650,7 @@ in_stf_input(m, off)
 	if (m->m_len < sizeof(*ip6)) {
 		m = m_pullup(m, sizeof(*ip6));
 		if (!m)
-			return;
+			return (IPPROTO_DONE);
 	}
 	ip6 = mtod(m, struct ip6_hdr *);
 
@@ -758,7 +661,7 @@ in_stf_input(m, off)
 	if (stf_checkaddr6(sc, &ip6->ip6_dst, NULL) < 0 ||
 	    stf_checkaddr6(sc, &ip6->ip6_src, m->m_pkthdr.rcvif) < 0) {
 		m_freem(m);
-		return;
+		return (IPPROTO_DONE);
 	}
 
 	itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
@@ -789,28 +692,15 @@ in_stf_input(m, off)
 	 * See net/if_gif.c for possible issues with packet processing
 	 * reorder due to extra queueing.
 	 */
-	ifp->if_ipackets++;
-	ifp->if_ibytes += m->m_pkthdr.len;
+	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(NETISR_IPV6, m);
-}
-
-/* ARGSUSED */
-static void
-stf_rtrequest(cmd, rt, info)
-	int cmd;
-	struct rtentry *rt;
-	struct rt_addrinfo *info;
-{
-	RT_LOCK_ASSERT(rt);
-	rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
+	return (IPPROTO_DONE);
 }
 
 static int
-stf_ioctl(ifp, cmd, data)
-	struct ifnet *ifp;
-	u_long cmd;
-	caddr_t data;
+stf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct ifaddr *ifa;
 	struct ifreq *ifr;
@@ -837,7 +727,6 @@ stf_ioctl(ifp, cmd, data)
 			break;
 		}
 
-		ifa->ifa_rtrequest = stf_rtrequest;
 		ifp->if_flags |= IFF_UP;
 		break;
 
diff --git a/freebsd/sys/net/if_tap.c b/freebsd/sys/net/if_tap.c
index 599905e8..24ae0092 100644
--- a/freebsd/sys/net/if_tap.c
+++ b/freebsd/sys/net/if_tap.c
@@ -65,6 +65,7 @@
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
@@ -81,8 +82,8 @@
 #define CDEV_NAME	"tap"
 #define TAPDEBUG	if (tapdebug) printf
 
-#define TAP		"tap"
-#define VMNET		"vmnet"
+static const char tapname[] = "tap";
+static const char vmnetname[] = "vmnet";
 #define TAPMAXUNIT	0x7fff
 #define VMNET_DEV_MASK	CLONE_FLAG0
 
@@ -101,11 +102,10 @@ static void		tapifinit(void *);
 
 static int		tap_clone_create(struct if_clone *, int, caddr_t);
 static void		tap_clone_destroy(struct ifnet *);
+static struct if_clone *tap_cloner;
 static int		vmnet_clone_create(struct if_clone *, int, caddr_t);
 static void		vmnet_clone_destroy(struct ifnet *);
-
-IFC_SIMPLE_DECLARE(tap, 0);
-IFC_SIMPLE_DECLARE(vmnet, 0);
+static struct if_clone *vmnet_cloner;
 
 /* character device */
 static d_open_t		tapopen;
@@ -137,7 +137,7 @@ static struct filterops	tap_write_filterops = {
 
 static struct cdevsw	tap_cdevsw = {
 	.d_version =	D_VERSION,
-	.d_flags =	D_PSEUDO | D_NEEDMINOR,
+	.d_flags =	D_NEEDMINOR,
 	.d_open =	tapopen,
 	.d_close =	tapclose,
 	.d_read =	tapread,
@@ -172,12 +172,10 @@ SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0,
 	"Allow user to open /dev/tap (based on node permissions)");
 SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0,
 	"Bring interface up when /dev/tap is opened");
-SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RW, &tapdclone, 0,
-	"Enably legacy devfs interface creation");
+SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0,
+	"Enable legacy devfs interface creation");
 SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, "");
 
-TUNABLE_INT("net.link.tap.devfs_cloning", &tapdclone);
-
 DEV_MODULE(if_tap, tapmodevent, NULL);
 
 static int
@@ -185,18 +183,12 @@ tap_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct cdev *dev;
 	int i;
-	int extra;
 
-	if (strcmp(ifc->ifc_name, VMNET) == 0)
-		extra = VMNET_DEV_MASK;
-	else
-		extra = 0;
-
-	/* find any existing device, or allocate new unit number */
-	i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, extra);
+	/* Find any existing device, or allocate new unit number. */
+	i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, 0);
 	if (i) {
-		dev = make_dev(&tap_cdevsw, unit | extra,
-		     UID_ROOT, GID_WHEEL, 0600, "%s%d", ifc->ifc_name, unit);
+		dev = make_dev(&tap_cdevsw, unit, UID_ROOT, GID_WHEEL, 0600,
+		    "%s%d", tapname, unit);
 	}
 
 	tapcreate(dev);
@@ -207,7 +199,18 @@ tap_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 static int
 vmnet_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
-	return tap_clone_create(ifc, unit, params);
+	struct cdev *dev;
+	int i;
+
+	/* Find any existing device, or allocate new unit number. */
+	i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, VMNET_DEV_MASK);
+	if (i) {
+		dev = make_dev(&tap_cdevsw, unit | VMNET_DEV_MASK, UID_ROOT,
+		    GID_WHEEL, 0600, "%s%d", vmnetname, unit);
+	}
+
+	tapcreate(dev);
+	return (0);
 }
 
 static void
@@ -218,9 +221,10 @@ tap_destroy(struct tap_softc *tp)
 	CURVNET_SET(ifp->if_vnet);
 	destroy_dev(tp->tap_dev);
 	seldrain(&tp->tap_rsel);
+	knlist_clear(&tp->tap_rsel.si_note, 0);
 	knlist_destroy(&tp->tap_rsel.si_note);
 	ether_ifdetach(ifp);
-	if_free_type(ifp, IFT_ETHER);
+	if_free(ifp);
 
 	mtx_destroy(&tp->tap_mtx);
 	free(tp, M_TAP);
@@ -272,8 +276,10 @@ tapmodevent(module_t mod, int type, void *data)
 			mtx_destroy(&tapmtx);
 			return (ENOMEM);
 		}
-		if_clone_attach(&tap_cloner);
-		if_clone_attach(&vmnet_cloner);
+		tap_cloner = if_clone_simple(tapname, tap_clone_create,
+		    tap_clone_destroy, 0);
+		vmnet_cloner = if_clone_simple(vmnetname, vmnet_clone_create,
+		    vmnet_clone_destroy, 0);
 		return (0);
 
 	case MOD_UNLOAD:
@@ -295,8 +301,8 @@ tapmodevent(module_t mod, int type, void *data)
 		mtx_unlock(&tapmtx);
 
 		EVENTHANDLER_DEREGISTER(dev_clone, eh_tag);
-		if_clone_detach(&tap_cloner);
-		if_clone_detach(&vmnet_cloner);
+		if_clone_detach(tap_cloner);
+		if_clone_detach(vmnet_cloner);
 		drain_dev_clone_events();
 
 		mtx_lock(&tapmtx);
@@ -350,13 +356,13 @@ tapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **d
 	extra = 0;
 
 	/* We're interested in only tap/vmnet devices. */
-	if (strcmp(name, TAP) == 0) {
+	if (strcmp(name, tapname) == 0) {
 		unit = -1;
-	} else if (strcmp(name, VMNET) == 0) {
+	} else if (strcmp(name, vmnetname) == 0) {
 		unit = -1;
 		extra = VMNET_DEV_MASK;
-	} else if (dev_stdclone(name, NULL, TAP, &unit) != 1) {
-		if (dev_stdclone(name, NULL, VMNET, &unit) != 1) {
+	} else if (dev_stdclone(name, NULL, tapname, &unit) != 1) {
+		if (dev_stdclone(name, NULL, vmnetname, &unit) != 1) {
 			return;
 		} else {
 			extra = VMNET_DEV_MASK;
@@ -402,11 +408,9 @@ tapcreate(struct cdev *dev)
 	unsigned short		 macaddr_hi;
 	uint32_t		 macaddr_mid;
 	int			 unit;
-	char			*name = NULL;
+	const char		*name = NULL;
 	u_char			eaddr[6];
 
-	dev->si_flags &= ~SI_CHEAPCLONE;
-
 	/* allocate driver storage and create device */
 	tp = malloc(sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
 	mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF);
@@ -418,10 +422,10 @@ tapcreate(struct cdev *dev)
 
 	/* select device: tap or vmnet */
 	if (unit & VMNET_DEV_MASK) {
-		name = VMNET;
+		name = vmnetname;
 		tp->tap_flags |= TAP_VMNET;
 	} else
-		name = TAP;
+		name = tapname;
 
 	unit &= TAPMAXUNIT;
 
@@ -534,11 +538,11 @@ tapclose(struct cdev *dev, int foo, int bar, struct thread *td)
 	IF_DRAIN(&ifp->if_snd);
 
 	/*
-	 * do not bring the interface down, and do not anything with
-	 * interface, if we are in VMnet mode. just close the device.
+	 * Do not bring the interface down, and do not anything with
+	 * interface, if we are in VMnet mode. Just close the device.
 	 */
-
-	if (((tp->tap_flags & TAP_VMNET) == 0) && (ifp->if_flags & IFF_UP)) {
+	if (((tp->tap_flags & TAP_VMNET) == 0) &&
+	    (ifp->if_flags & (IFF_UP | IFF_LINK0)) == IFF_UP) {
 		mtx_unlock(&tp->tap_mtx);
 		if_down(ifp);
 		mtx_lock(&tp->tap_mtx);
@@ -636,12 +640,12 @@ tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 
 		case SIOCGIFSTATUS:
 			ifs = (struct ifstat *)data;
-			dummy = strlen(ifs->ascii);
 			mtx_lock(&tp->tap_mtx);
-			if (tp->tap_pid != 0 && dummy < sizeof(ifs->ascii))
-				snprintf(ifs->ascii + dummy,
-					sizeof(ifs->ascii) - dummy,
+			if (tp->tap_pid != 0)
+				snprintf(ifs->ascii, sizeof(ifs->ascii),
 					"\tOpened by PID %d\n", tp->tap_pid);
+			else
+				ifs->ascii[0] = '\0';
 			mtx_unlock(&tp->tap_mtx);
 			break;
 
@@ -684,7 +688,7 @@ tapifstart(struct ifnet *ifp)
 			IF_DEQUEUE(&ifp->if_snd, m);
 			if (m != NULL) {
 				m_freem(m);
-				ifp->if_oerrors++;
+				if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			} else
 				break;
 		}
@@ -709,7 +713,7 @@ tapifstart(struct ifnet *ifp)
 
 		selwakeuppri(&tp->tap_rsel, PZERO+1);
 		KNOTE_LOCKED(&tp->tap_rsel.si_note, 0);
-		ifp->if_opackets ++; /* obytes are counted in ether_output */
+		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */
 	}
 
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
@@ -829,8 +833,7 @@ tapioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td
 			mtx_unlock(&tp->tap_mtx);
 			break;
 
-		case OSIOCGIFADDR:	/* get MAC address of the remote side */
-		case SIOCGIFADDR:
+		case SIOCGIFADDR:	/* get MAC address of the remote side */
 			mtx_lock(&tp->tap_mtx);
 			bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
 			mtx_unlock(&tp->tap_mtx);
@@ -948,9 +951,9 @@ tapwrite(struct cdev *dev, struct uio *uio, int flag)
 		return (EIO);
 	}
 
-	if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, ETHER_ALIGN,
+	if ((m = m_uiotombuf(uio, M_NOWAIT, 0, ETHER_ALIGN,
 	    M_PKTHDR)) == NULL) {
-		ifp->if_ierrors ++;
+		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		return (ENOBUFS);
 	}
 
@@ -977,7 +980,7 @@ tapwrite(struct cdev *dev, struct uio *uio, int flag)
 	CURVNET_SET(ifp->if_vnet);
 	(*ifp->if_input)(ifp, m);
 	CURVNET_RESTORE();
-	ifp->if_ipackets ++; /* ibytes are counted in parent */
+	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); /* ibytes are counted in parent */
 
 	return (0);
 } /* tapwrite */
diff --git a/freebsd/sys/net/if_tun.c b/freebsd/sys/net/if_tun.c
index 556a4860..edb30d04 100644
--- a/freebsd/sys/net/if_tun.c
+++ b/freebsd/sys/net/if_tun.c
@@ -18,10 +18,8 @@
  * $FreeBSD$
  */
 
-#include <rtems/bsd/local/opt_atalk.h>
 #include <rtems/bsd/local/opt_inet.h>
 #include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
 
 #include <rtems/bsd/sys/param.h>
 #include <sys/priv.h>
@@ -47,6 +45,7 @@
 #include <sys/random.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
@@ -101,7 +100,6 @@ struct tun_softc {
 #define TUN2IFP(sc)	((sc)->tun_ifp)
 
 #define TUNDEBUG	if (tundebug) if_printf
-#define	TUNNAME		"tun"
 
 /*
  * All mutable global variables in if_tun are locked using tunmtx, with
@@ -109,7 +107,8 @@ struct tun_softc {
  * which is static after setup.
  */
 static struct mtx tunmtx;
-static MALLOC_DEFINE(M_TUN, TUNNAME, "Tunnel Interface");
+static const char tunname[] = "tun";
+static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface");
 static int tundebug = 0;
 static int tundclone = 1;
 static struct clonedevs *tunclones;
@@ -119,25 +118,22 @@ SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0,
     "IP tunnel software network interface.");
-SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RW, &tundclone, 0,
+SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tundclone, 0,
     "Enable legacy devfs interface creation.");
 
-TUNABLE_INT("net.link.tun.devfs_cloning", &tundclone);
-
 static void	tunclone(void *arg, struct ucred *cred, char *name,
 		    int namelen, struct cdev **dev);
 static void	tuncreate(const char *name, struct cdev *dev);
 static int	tunifioctl(struct ifnet *, u_long, caddr_t);
 static void	tuninit(struct ifnet *);
 static int	tunmodevent(module_t, int, void *);
-static int	tunoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
-		    struct route *ro);
+static int	tunoutput(struct ifnet *, struct mbuf *,
+		    const struct sockaddr *, struct route *ro);
 static void	tunstart(struct ifnet *);
 
 static int	tun_clone_create(struct if_clone *, int, caddr_t);
 static void	tun_clone_destroy(struct ifnet *);
-
-IFC_SIMPLE_DECLARE(tun, 0);
+static struct if_clone *tun_cloner;
 
 static d_open_t		tunopen;
 static d_close_t	tunclose;
@@ -167,7 +163,7 @@ static struct filterops tun_write_filterops = {
 
 static struct cdevsw tun_cdevsw = {
 	.d_version =	D_VERSION,
-	.d_flags =	D_PSEUDO | D_NEEDMINOR,
+	.d_flags =	D_NEEDMINOR,
 	.d_open =	tunopen,
 	.d_close =	tunclose,
 	.d_read =	tunread,
@@ -175,7 +171,7 @@ static struct cdevsw tun_cdevsw = {
 	.d_ioctl =	tunioctl,
 	.d_poll =	tunpoll,
 	.d_kqfilter =	tunkqfilter,
-	.d_name =	TUNNAME,
+	.d_name =	tunname,
 };
 
 static int
@@ -189,9 +185,9 @@ tun_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 	if (i) {
 		/* No preexisting struct cdev *, create one */
 		dev = make_dev(&tun_cdevsw, unit,
-		    UID_UUCP, GID_DIALER, 0600, "%s%d", ifc->ifc_name, unit);
+		    UID_UUCP, GID_DIALER, 0600, "%s%d", tunname, unit);
 	}
-	tuncreate(ifc->ifc_name, dev);
+	tuncreate(tunname, dev);
 
 	return (0);
 }
@@ -213,9 +209,9 @@ tunclone(void *arg, struct ucred *cred, char *name, int namelen,
 	if (!tundclone || priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0)
 		return;
 
-	if (strcmp(name, TUNNAME) == 0) {
+	if (strcmp(name, tunname) == 0) {
 		u = -1;
-	} else if (dev_stdclone(name, NULL, TUNNAME, &u) != 1)
+	} else if (dev_stdclone(name, NULL, tunname, &u) != 1)
 		return;	/* Don't recognise the name */
 	if (u != -1 && u > IF_MAXUNIT)
 		return;	/* Unit number too high */
@@ -248,7 +244,6 @@ tun_destroy(struct tun_softc *tp)
 {
 	struct cdev *dev;
 
-	/* Unlocked read. */
 	mtx_lock(&tp->tun_mtx);
 	if ((tp->tun_flags & TUN_OPEN) != 0)
 		cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx);
@@ -262,6 +257,7 @@ tun_destroy(struct tun_softc *tp)
 	if_free(TUN2IFP(tp));
 	destroy_dev(dev);
 	seldrain(&tp->tun_rsel);
+	knlist_clear(&tp->tun_rsel.si_note, 0);
 	knlist_destroy(&tp->tun_rsel.si_note);
 	mtx_destroy(&tp->tun_mtx);
 	cv_destroy(&tp->tun_cv);
@@ -293,10 +289,11 @@ tunmodevent(module_t mod, int type, void *data)
 		tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
 		if (tag == NULL)
 			return (ENOMEM);
-		if_clone_attach(&tun_cloner);
+		tun_cloner = if_clone_simple(tunname, tun_clone_create,
+		    tun_clone_destroy, 0);
 		break;
 	case MOD_UNLOAD:
-		if_clone_detach(&tun_cloner);
+		if_clone_detach(tun_cloner);
 		EVENTHANDLER_DEREGISTER(dev_clone, tag);
 		drain_dev_clone_events();
 
@@ -364,8 +361,6 @@ tuncreate(const char *name, struct cdev *dev)
 	struct tun_softc *sc;
 	struct ifnet *ifp;
 
-	dev->si_flags &= ~SI_CHEAPCLONE;
-
 	sc = malloc(sizeof(*sc), M_TUN, M_WAITOK | M_ZERO);
 	mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF);
 	cv_init(&sc->tun_cv, "tun_condvar");
@@ -412,7 +407,7 @@ tunopen(struct cdev *dev, int flag, int mode, struct thread *td)
 	 */
 	tp = dev->si_drv1;
 	if (!tp) {
-		tuncreate(TUNNAME, dev);
+		tuncreate(tunname, dev);
 		tp = dev->si_drv1;
 	}
 
@@ -557,18 +552,16 @@ tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		ifs = (struct ifstat *)data;
 		mtx_lock(&tp->tun_mtx);
 		if (tp->tun_pid)
-			sprintf(ifs->ascii + strlen(ifs->ascii),
+			snprintf(ifs->ascii, sizeof(ifs->ascii),
 			    "\tOpened by PID %d\n", tp->tun_pid);
+		else
+			ifs->ascii[0] = '\0';
 		mtx_unlock(&tp->tun_mtx);
 		break;
 	case SIOCSIFADDR:
 		tuninit(ifp);
 		TUNDEBUG(ifp, "address set\n");
 		break;
-	case SIOCSIFDSTADDR:
-		tuninit(ifp);
-		TUNDEBUG(ifp, "destination address set\n");
-		break;
 	case SIOCSIFMTU:
 		ifp->if_mtu = ifr->ifr_mtu;
 		TUNDEBUG(ifp, "mtu set\n");
@@ -587,7 +580,7 @@ tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
  * tunoutput - queue packets from higher level ready to put out.
  */
 static int
-tunoutput(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
+tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
     struct route *ro)
 {
 	struct tun_softc *tp = ifp->if_softc;
@@ -621,25 +614,23 @@ tunoutput(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
 	}
 
 	/* BPF writes need to be handled specially. */
-	if (dst->sa_family == AF_UNSPEC) {
+	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &af, sizeof(af));
-		dst->sa_family = af; 
-	}
-
-	if (bpf_peers_present(ifp->if_bpf)) {
+	else
 		af = dst->sa_family;
+
+	if (bpf_peers_present(ifp->if_bpf))
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
-	}
 
 	/* prepend sockaddr? this may abort if the mbuf allocation fails */
 	if (cached_tun_flags & TUN_LMODE) {
 		/* allocate space for sockaddr */
-		M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
+		M_PREPEND(m0, dst->sa_len, M_NOWAIT);
 
 		/* if allocation failed drop packet */
 		if (m0 == NULL) {
-			ifp->if_iqdrops++;
-			ifp->if_oerrors++;
+			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
+			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			return (ENOBUFS);
 		} else {
 			bcopy(dst, m0->m_data, dst->sa_len);
@@ -648,18 +639,18 @@ tunoutput(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
 
 	if (cached_tun_flags & TUN_IFHEAD) {
 		/* Prepend the address family */
-		M_PREPEND(m0, 4, M_DONTWAIT);
+		M_PREPEND(m0, 4, M_NOWAIT);
 
 		/* if allocation failed drop packet */
 		if (m0 == NULL) {
-			ifp->if_iqdrops++;
-			ifp->if_oerrors++;
+			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
+			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			return (ENOBUFS);
 		} else
-			*(u_int32_t *)m0->m_data = htonl(dst->sa_family);
+			*(u_int32_t *)m0->m_data = htonl(af);
 	} else {
 #ifdef INET
-		if (dst->sa_family != AF_INET)
+		if (af != AF_INET)
 #endif
 		{
 			m_freem(m0);
@@ -670,7 +661,7 @@ tunoutput(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
 	error = (ifp->if_transmit)(ifp, m0);
 	if (error)
 		return (ENOBUFS);
-	ifp->if_opackets++;
+	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 	return (0);
 }
 
@@ -871,7 +862,7 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag)
 	struct tun_softc *tp = dev->si_drv1;
 	struct ifnet	*ifp = TUN2IFP(tp);
 	struct mbuf	*m;
-	uint32_t	family;
+	uint32_t	family, mru;
 	int 		isr;
 
 	TUNDEBUG(ifp, "tunwrite\n");
@@ -883,13 +874,16 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag)
 	if (uio->uio_resid == 0)
 		return (0);
 
-	if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) {
+	mru = TUNMRU;
+	if (tp->tun_flags & TUN_IFHEAD)
+		mru += sizeof(family);
+	if (uio->uio_resid < 0 || uio->uio_resid > mru) {
 		TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid);
 		return (EIO);
 	}
 
-	if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, 0, M_PKTHDR)) == NULL) {
-		ifp->if_ierrors++;
+	if ((m = m_uiotombuf(uio, M_NOWAIT, 0, 0, M_PKTHDR)) == NULL) {
+		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		return (ENOBUFS);
 	}
 
@@ -925,25 +919,13 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag)
 		isr = NETISR_IPV6;
 		break;
 #endif
-#ifdef IPX
-	case AF_IPX:
-		isr = NETISR_IPX;
-		break;
-#endif
-#ifdef NETATALK
-	case AF_APPLETALK:
-		isr = NETISR_ATALK2;
-		break;
-#endif
 	default:
 		m_freem(m);
 		return (EAFNOSUPPORT);
 	}
-	/* First chunk of an mbuf contains good junk */
-	if (harvest.point_to_point)
-		random_harvest(m, 16, 3, 0, RANDOM_NET);
-	ifp->if_ibytes += m->m_pkthdr.len;
-	ifp->if_ipackets++;
+	random_harvest_queue(m, sizeof(*m), 2, RANDOM_NET_TUN);
+	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
+	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	CURVNET_SET(ifp->if_vnet);
 	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(isr, m);
diff --git a/freebsd/sys/net/if_tun.h b/freebsd/sys/net/if_tun.h
index 382881cb..1ea375f7 100644
--- a/freebsd/sys/net/if_tun.h
+++ b/freebsd/sys/net/if_tun.h
@@ -25,11 +25,11 @@
 #define	TUNMTU		1500
 
 /* Maximum receive packet size (hard limit) */
-#define	TUNMRU		16384
+#define	TUNMRU		65535
 
 struct tuninfo {
 	int	baudrate;		/* linespeed */
-	short	mtu;			/* maximum transmission unit */
+	unsigned short	mtu;		/* maximum transmission unit */
 	u_char	type;			/* ethernet, tokenring, etc. */
 	u_char	dummy;			/* place holder */
 };
diff --git a/freebsd/sys/net/if_types.h b/freebsd/sys/net/if_types.h
index c2effacd..92e101ac 100644
--- a/freebsd/sys/net/if_types.h
+++ b/freebsd/sys/net/if_types.h
@@ -42,214 +42,232 @@
  * 	http://www.iana.org/assignments/smi-numbers
  */
 
-#define	IFT_OTHER	0x1		/* none of the following */
-#define	IFT_1822	0x2		/* old-style arpanet imp */
-#define	IFT_HDH1822	0x3		/* HDH arpanet imp */
-#define	IFT_X25DDN	0x4		/* x25 to imp */
-#define	IFT_X25		0x5		/* PDN X25 interface (RFC877) */
-#define	IFT_ETHER	0x6		/* Ethernet CSMA/CD */
-#define	IFT_ISO88023	0x7		/* CMSA/CD */
-#define	IFT_ISO88024	0x8		/* Token Bus */
-#define	IFT_ISO88025	0x9		/* Token Ring */
-#define	IFT_ISO88026	0xa		/* MAN */
-#define	IFT_STARLAN	0xb
-#define	IFT_P10		0xc		/* Proteon 10MBit ring */
-#define	IFT_P80		0xd		/* Proteon 80MBit ring */
-#define	IFT_HY		0xe		/* Hyperchannel */
-#define	IFT_FDDI	0xf
-#define	IFT_LAPB	0x10
-#define	IFT_SDLC	0x11
-#define	IFT_T1		0x12
-#define	IFT_CEPT	0x13		/* E1 - european T1 */
-#define	IFT_ISDNBASIC	0x14
-#define	IFT_ISDNPRIMARY	0x15
-#define	IFT_PTPSERIAL	0x16		/* Proprietary PTP serial */
-#define	IFT_PPP		0x17		/* RFC 1331 */
-#define	IFT_LOOP	0x18		/* loopback */
-#define	IFT_EON		0x19		/* ISO over IP */
-#define	IFT_XETHER	0x1a		/* obsolete 3MB experimental ethernet */
-#define	IFT_NSIP	0x1b		/* XNS over IP */
-#define	IFT_SLIP	0x1c		/* IP over generic TTY */
-#define	IFT_ULTRA	0x1d		/* Ultra Technologies */
-#define	IFT_DS3		0x1e		/* Generic T3 */
-#define	IFT_SIP		0x1f		/* SMDS */
-#define	IFT_FRELAY	0x20		/* Frame Relay DTE only */
-#define	IFT_RS232	0x21
-#define	IFT_PARA	0x22		/* parallel-port */
-#define	IFT_ARCNET	0x23
-#define	IFT_ARCNETPLUS	0x24
-#define	IFT_ATM		0x25		/* ATM cells */
-#define	IFT_MIOX25	0x26
-#define	IFT_SONET	0x27		/* SONET or SDH */
-#define	IFT_X25PLE	0x28
-#define	IFT_ISO88022LLC	0x29
-#define	IFT_LOCALTALK	0x2a
-#define	IFT_SMDSDXI	0x2b
-#define	IFT_FRELAYDCE	0x2c		/* Frame Relay DCE */
-#define	IFT_V35		0x2d
-#define	IFT_HSSI	0x2e
-#define	IFT_HIPPI	0x2f
-#define	IFT_MODEM	0x30		/* Generic Modem */
-#define	IFT_AAL5	0x31		/* AAL5 over ATM */
-#define	IFT_SONETPATH	0x32
-#define	IFT_SONETVT	0x33
-#define	IFT_SMDSICIP	0x34		/* SMDS InterCarrier Interface */
-#define	IFT_PROPVIRTUAL	0x35		/* Proprietary Virtual/internal */
-#define	IFT_PROPMUX	0x36		/* Proprietary Multiplexing */
-#define	IFT_IEEE80212		   0x37 /* 100BaseVG */
-#define	IFT_FIBRECHANNEL	   0x38 /* Fibre Channel */
-#define	IFT_HIPPIINTERFACE	   0x39 /* HIPPI interfaces	 */
-#define	IFT_FRAMERELAYINTERCONNECT 0x3a /* Obsolete, use either 0x20 or 0x2c */
-#define	IFT_AFLANE8023		   0x3b /* ATM Emulated LAN for 802.3 */
-#define	IFT_AFLANE8025		   0x3c /* ATM Emulated LAN for 802.5 */
-#define	IFT_CCTEMUL		   0x3d /* ATM Emulated circuit		  */
-#define	IFT_FASTETHER		   0x3e /* Fast Ethernet (100BaseT) */
-#define	IFT_ISDN		   0x3f /* ISDN and X.25	    */
-#define	IFT_V11			   0x40 /* CCITT V.11/X.21		*/
-#define	IFT_V36			   0x41 /* CCITT V.36			*/
-#define	IFT_G703AT64K		   0x42 /* CCITT G703 at 64Kbps */
-#define	IFT_G703AT2MB		   0x43 /* Obsolete see DS1-MIB */
-#define	IFT_QLLC		   0x44 /* SNA QLLC			*/
-#define	IFT_FASTETHERFX		   0x45 /* Fast Ethernet (100BaseFX)	*/
-#define	IFT_CHANNEL		   0x46 /* channel			*/
-#define	IFT_IEEE80211		   0x47 /* radio spread spectrum	*/
-#define	IFT_IBM370PARCHAN	   0x48 /* IBM System 360/370 OEMI Channel */
-#define	IFT_ESCON		   0x49 /* IBM Enterprise Systems Connection */
-#define	IFT_DLSW		   0x4a /* Data Link Switching */
-#define	IFT_ISDNS		   0x4b /* ISDN S/T interface */
-#define	IFT_ISDNU		   0x4c /* ISDN U interface */
-#define	IFT_LAPD		   0x4d /* Link Access Protocol D */
-#define	IFT_IPSWITCH		   0x4e /* IP Switching Objects */
-#define	IFT_RSRB		   0x4f /* Remote Source Route Bridging */
-#define	IFT_ATMLOGICAL		   0x50 /* ATM Logical Port */
-#define	IFT_DS0			   0x51 /* Digital Signal Level 0 */
-#define	IFT_DS0BUNDLE		   0x52 /* group of ds0s on the same ds1 */
-#define	IFT_BSC			   0x53 /* Bisynchronous Protocol */
-#define	IFT_ASYNC		   0x54 /* Asynchronous Protocol */
-#define	IFT_CNR			   0x55 /* Combat Net Radio */
-#define	IFT_ISO88025DTR		   0x56 /* ISO 802.5r DTR */
-#define	IFT_EPLRS		   0x57 /* Ext Pos Loc Report Sys */
-#define	IFT_ARAP		   0x58 /* Appletalk Remote Access Protocol */
-#define	IFT_PROPCNLS		   0x59 /* Proprietary Connectionless Protocol*/
-#define	IFT_HOSTPAD		   0x5a /* CCITT-ITU X.29 PAD Protocol */
-#define	IFT_TERMPAD		   0x5b /* CCITT-ITU X.3 PAD Facility */
-#define	IFT_FRAMERELAYMPI	   0x5c /* Multiproto Interconnect over FR */
-#define	IFT_X213		   0x5d /* CCITT-ITU X213 */
-#define	IFT_ADSL		   0x5e /* Asymmetric Digital Subscriber Loop */
-#define	IFT_RADSL		   0x5f /* Rate-Adapt. Digital Subscriber Loop*/
-#define	IFT_SDSL		   0x60 /* Symmetric Digital Subscriber Loop */
-#define	IFT_VDSL		   0x61 /* Very H-Speed Digital Subscrib. Loop*/
-#define	IFT_ISO88025CRFPINT	   0x62 /* ISO 802.5 CRFP */
-#define	IFT_MYRINET		   0x63 /* Myricom Myrinet */
-#define	IFT_VOICEEM		   0x64 /* voice recEive and transMit */
-#define	IFT_VOICEFXO		   0x65 /* voice Foreign Exchange Office */
-#define	IFT_VOICEFXS		   0x66 /* voice Foreign Exchange Station */
-#define	IFT_VOICEENCAP		   0x67 /* voice encapsulation */
-#define	IFT_VOICEOVERIP		   0x68 /* voice over IP encapsulation */
-#define	IFT_ATMDXI		   0x69 /* ATM DXI */
-#define	IFT_ATMFUNI		   0x6a /* ATM FUNI */
-#define	IFT_ATMIMA		   0x6b /* ATM IMA		      */
-#define	IFT_PPPMULTILINKBUNDLE	   0x6c /* PPP Multilink Bundle */
-#define	IFT_IPOVERCDLC		   0x6d /* IBM ipOverCdlc */
-#define	IFT_IPOVERCLAW		   0x6e /* IBM Common Link Access to Workstn */
-#define	IFT_STACKTOSTACK	   0x6f /* IBM stackToStack */
-#define	IFT_VIRTUALIPADDRESS	   0x70 /* IBM VIPA */
-#define	IFT_MPC			   0x71 /* IBM multi-protocol channel support */
-#define	IFT_IPOVERATM		   0x72 /* IBM ipOverAtm */
-#define	IFT_ISO88025FIBER	   0x73 /* ISO 802.5j Fiber Token Ring */
-#define	IFT_TDLC		   0x74 /* IBM twinaxial data link control */
-#define	IFT_GIGABITETHERNET	   0x75 /* Gigabit Ethernet */
-#define	IFT_HDLC		   0x76 /* HDLC */
-#define	IFT_LAPF		   0x77 /* LAP F */
-#define	IFT_V37			   0x78 /* V.37 */
-#define	IFT_X25MLP		   0x79 /* Multi-Link Protocol */
-#define	IFT_X25HUNTGROUP	   0x7a /* X25 Hunt Group */
-#define	IFT_TRANSPHDLC		   0x7b /* Transp HDLC */
-#define	IFT_INTERLEAVE		   0x7c /* Interleave channel */
-#define	IFT_FAST		   0x7d /* Fast channel */
-#define	IFT_IP			   0x7e /* IP (for APPN HPR in IP networks) */
-#define	IFT_DOCSCABLEMACLAYER	   0x7f /* CATV Mac Layer */
-#define	IFT_DOCSCABLEDOWNSTREAM	   0x80 /* CATV Downstream interface */
-#define	IFT_DOCSCABLEUPSTREAM	   0x81 /* CATV Upstream interface */
-#define	IFT_A12MPPSWITCH	   0x82	/* Avalon Parallel Processor */
-#define	IFT_TUNNEL		   0x83	/* Encapsulation interface */
-#define	IFT_COFFEE		   0x84	/* coffee pot */
-#define	IFT_CES			   0x85	/* Circiut Emulation Service */
-#define	IFT_ATMSUBINTERFACE	   0x86	/* (x)  ATM Sub Interface */
-#define	IFT_L2VLAN		   0x87	/* Layer 2 Virtual LAN using 802.1Q */
-#define	IFT_L3IPVLAN		   0x88	/* Layer 3 Virtual LAN - IP Protocol */
-#define	IFT_L3IPXVLAN		   0x89	/* Layer 3 Virtual LAN - IPX Prot. */
-#define	IFT_DIGITALPOWERLINE	   0x8a	/* IP over Power Lines */
-#define	IFT_MEDIAMAILOVERIP	   0x8b	/* (xxx)  Multimedia Mail over IP */
-#define	IFT_DTM			   0x8c	/* Dynamic synchronous Transfer Mode */
-#define	IFT_DCN			   0x8d	/* Data Communications Network */
-#define	IFT_IPFORWARD		   0x8e	/* IP Forwarding Interface */
-#define	IFT_MSDSL		   0x8f	/* Multi-rate Symmetric DSL */
-#define	IFT_IEEE1394		   0x90	/* IEEE1394 High Performance SerialBus*/
-#define	IFT_IFGSN		   0x91	/* HIPPI-6400 */
-#define	IFT_DVBRCCMACLAYER	   0x92	/* DVB-RCC MAC Layer */
-#define	IFT_DVBRCCDOWNSTREAM	   0x93	/* DVB-RCC Downstream Channel */
-#define	IFT_DVBRCCUPSTREAM	   0x94	/* DVB-RCC Upstream Channel */
-#define	IFT_ATMVIRTUAL		   0x95	/* ATM Virtual Interface */
-#define	IFT_MPLSTUNNEL		   0x96	/* MPLS Tunnel Virtual Interface */
-#define	IFT_SRP			   0x97	/* Spatial Reuse Protocol */
-#define	IFT_VOICEOVERATM	   0x98	/* Voice over ATM */
-#define	IFT_VOICEOVERFRAMERELAY	   0x99	/* Voice Over Frame Relay */
-#define	IFT_IDSL		   0x9a	/* Digital Subscriber Loop over ISDN */
-#define	IFT_COMPOSITELINK	   0x9b	/* Avici Composite Link Interface */
-#define	IFT_SS7SIGLINK		   0x9c	/* SS7 Signaling Link */
-#define	IFT_PROPWIRELESSP2P	   0x9d	/* Prop. P2P wireless interface */
-#define	IFT_FRFORWARD		   0x9e	/* Frame forward Interface */
-#define	IFT_RFC1483		   0x9f	/* Multiprotocol over ATM AAL5 */
-#define	IFT_USB			   0xa0	/* USB Interface */
-#define	IFT_IEEE8023ADLAG	   0xa1	/* IEEE 802.3ad Link Aggregate*/
-#define	IFT_BGPPOLICYACCOUNTING	   0xa2	/* BGP Policy Accounting */
-#define	IFT_FRF16MFRBUNDLE	   0xa3	/* FRF.16 Multilik Frame Relay*/
-#define	IFT_H323GATEKEEPER	   0xa4	/* H323 Gatekeeper */
-#define	IFT_H323PROXY		   0xa5	/* H323 Voice and Video Proxy */
-#define	IFT_MPLS		   0xa6	/* MPLS */
-#define	IFT_MFSIGLINK		   0xa7	/* Multi-frequency signaling link */
-#define	IFT_HDSL2		   0xa8	/* High Bit-Rate DSL, 2nd gen. */
-#define	IFT_SHDSL		   0xa9	/* Multirate HDSL2 */
-#define	IFT_DS1FDL		   0xaa	/* Facility Data Link (4Kbps) on a DS1*/
-#define	IFT_POS			   0xab	/* Packet over SONET/SDH Interface */
-#define	IFT_DVBASILN		   0xac	/* DVB-ASI Input */
-#define	IFT_DVBASIOUT		   0xad	/* DVB-ASI Output */
-#define	IFT_PLC			   0xae	/* Power Line Communications */
-#define	IFT_NFAS		   0xaf	/* Non-Facility Associated Signaling */
-#define	IFT_TR008		   0xb0	/* TROO8 */
-#define	IFT_GR303RDT		   0xb1	/* Remote Digital Terminal */
-#define	IFT_GR303IDT		   0xb2	/* Integrated Digital Terminal */
-#define	IFT_ISUP		   0xb3	/* ISUP */
-#define	IFT_PROPDOCSWIRELESSMACLAYER	   0xb4	/* prop/Wireless MAC Layer */
-#define	IFT_PROPDOCSWIRELESSDOWNSTREAM	   0xb5	/* prop/Wireless Downstream */
-#define	IFT_PROPDOCSWIRELESSUPSTREAM	   0xb6	/* prop/Wireless Upstream */
-#define	IFT_HIPERLAN2		   0xb7	/* HIPERLAN Type 2 Radio Interface */
-#define	IFT_PROPBWAP2MP		   0xb8	/* PropBroadbandWirelessAccess P2MP*/
-#define	IFT_SONETOVERHEADCHANNEL   0xb9	/* SONET Overhead Channel */
-#define	IFT_DIGITALWRAPPEROVERHEADCHANNEL  0xba	/* Digital Wrapper Overhead */
-#define	IFT_AAL2		   0xbb	/* ATM adaptation layer 2 */
-#define	IFT_RADIOMAC		   0xbc	/* MAC layer over radio links */
-#define	IFT_ATMRADIO		   0xbd	/* ATM over radio links */
-#define	IFT_IMT			   0xbe /* Inter-Machine Trunks */
-#define	IFT_MVL			   0xbf /* Multiple Virtual Lines DSL */
-#define	IFT_REACHDSL		   0xc0 /* Long Reach DSL */
-#define	IFT_FRDLCIENDPT		   0xc1 /* Frame Relay DLCI End Point */
-#define	IFT_ATMVCIENDPT		   0xc2 /* ATM VCI End Point */
-#define	IFT_OPTICALCHANNEL	   0xc3 /* Optical Channel */
-#define	IFT_OPTICALTRANSPORT	   0xc4 /* Optical Transport */
-#define	IFT_INFINIBAND		   0xc7	/* Infiniband */
-#define	IFT_BRIDGE		   0xd1 /* Transparent bridge interface */
+typedef enum {
+	IFT_OTHER	= 0x1,		/* none of the following */
+	IFT_1822	= 0x2,		/* old-style arpanet imp */
+	IFT_HDH1822	= 0x3,		/* HDH arpanet imp */
+	IFT_X25DDN	= 0x4,		/* x25 to imp */
+	IFT_X25		= 0x5,		/* PDN X25 interface (RFC877) */
+	IFT_ETHER	= 0x6,		/* Ethernet CSMA/CD */
+	IFT_ISO88023	= 0x7,		/* CMSA/CD */
+	IFT_ISO88024	= 0x8,		/* Token Bus */
+	IFT_ISO88025	= 0x9,		/* Token Ring */
+	IFT_ISO88026	= 0xa,		/* MAN */
+	IFT_STARLAN	= 0xb,
+	IFT_P10		= 0xc,		/* Proteon 10MBit ring */
+	IFT_P80		= 0xd,		/* Proteon 80MBit ring */
+	IFT_HY		= 0xe,		/* Hyperchannel */
+	IFT_FDDI	= 0xf,
+	IFT_LAPB	= 0x10,
+	IFT_SDLC	= 0x11,
+	IFT_T1		= 0x12,
+	IFT_CEPT	= 0x13,		/* E1 - european T1 */
+	IFT_ISDNBASIC	= 0x14,
+	IFT_ISDNPRIMARY	= 0x15,
+	IFT_PTPSERIAL	= 0x16,		/* Proprietary PTP serial */
+	IFT_PPP		= 0x17,		/* RFC 1331 */
+	IFT_LOOP	= 0x18,		/* loopback */
+	IFT_EON		= 0x19,		/* ISO over IP */
+	IFT_XETHER	= 0x1a,		/* obsolete 3MB experimental ethernet */
+	IFT_NSIP	= 0x1b,		/* XNS over IP */
+	IFT_SLIP	= 0x1c,		/* IP over generic TTY */
+	IFT_ULTRA	= 0x1d,		/* Ultra Technologies */
+	IFT_DS3		= 0x1e,		/* Generic T3 */
+	IFT_SIP		= 0x1f,		/* SMDS */
+	IFT_FRELAY	= 0x20,		/* Frame Relay DTE only */
+	IFT_RS232	= 0x21,
+	IFT_PARA	= 0x22,		/* parallel-port */
+	IFT_ARCNET	= 0x23,
+	IFT_ARCNETPLUS	= 0x24,
+	IFT_ATM		= 0x25,		/* ATM cells */
+	IFT_MIOX25	= 0x26,
+	IFT_SONET	= 0x27,		/* SONET or SDH */
+	IFT_X25PLE	= 0x28,
+	IFT_ISO88022LLC	= 0x29,
+	IFT_LOCALTALK	= 0x2a,
+	IFT_SMDSDXI	= 0x2b,
+	IFT_FRELAYDCE	= 0x2c,		/* Frame Relay DCE */
+	IFT_V35		= 0x2d,
+	IFT_HSSI	= 0x2e,
+	IFT_HIPPI	= 0x2f,
+	IFT_MODEM	= 0x30,		/* Generic Modem */
+	IFT_AAL5	= 0x31,		/* AAL5 over ATM */
+	IFT_SONETPATH	= 0x32,
+	IFT_SONETVT	= 0x33,
+	IFT_SMDSICIP	= 0x34,		/* SMDS InterCarrier Interface */
+	IFT_PROPVIRTUAL	= 0x35,		/* Proprietary Virtual/internal */
+	IFT_PROPMUX	= 0x36,		/* Proprietary Multiplexing */
+	IFT_IEEE80212	= 0x37,		/* 100BaseVG */
+	IFT_FIBRECHANNEL = 0x38,	/* Fibre Channel */
+	IFT_HIPPIINTERFACE = 0x39,	/* HIPPI interfaces	 */
+	IFT_FRAMERELAYINTERCONNECT = 0x3a, /* Obsolete, use 0x20 either 0x2c */
+	IFT_AFLANE8023	= 0x3b,		/* ATM Emulated LAN for 802.3 */
+	IFT_AFLANE8025	= 0x3c,		/* ATM Emulated LAN for 802.5 */
+	IFT_CCTEMUL	= 0x3d,		/* ATM Emulated circuit		  */
+	IFT_FASTETHER	= 0x3e,		/* Fast Ethernet (100BaseT) */
+	IFT_ISDN	= 0x3f,		/* ISDN and X.25	    */
+	IFT_V11		= 0x40,		/* CCITT V.11/X.21		*/
+	IFT_V36		= 0x41,		/* CCITT V.36			*/
+	IFT_G703AT64K	= 0x42,		/* CCITT G703 at 64Kbps */
+	IFT_G703AT2MB	= 0x43,		/* Obsolete see DS1-MIB */
+	IFT_QLLC	= 0x44,		/* SNA QLLC			*/
+	IFT_FASTETHERFX	= 0x45,		/* Fast Ethernet (100BaseFX)	*/
+	IFT_CHANNEL	= 0x46,		/* channel			*/
+	IFT_IEEE80211	= 0x47,		/* radio spread spectrum	*/
+	IFT_IBM370PARCHAN = 0x48,	/* IBM System 360/370 OEMI Channel */
+	IFT_ESCON	= 0x49,		/* IBM Enterprise Systems Connection */
+	IFT_DLSW	= 0x4a,		/* Data Link Switching */
+	IFT_ISDNS	= 0x4b,		/* ISDN S/T interface */
+	IFT_ISDNU	= 0x4c,		/* ISDN U interface */
+	IFT_LAPD	= 0x4d,		/* Link Access Protocol D */
+	IFT_IPSWITCH	= 0x4e,		/* IP Switching Objects */
+	IFT_RSRB	= 0x4f,		/* Remote Source Route Bridging */
+	IFT_ATMLOGICAL	= 0x50,		/* ATM Logical Port */
+	IFT_DS0		= 0x51,		/* Digital Signal Level 0 */
+	IFT_DS0BUNDLE	= 0x52,		/* group of ds0s on the same ds1 */
+	IFT_BSC		= 0x53,		/* Bisynchronous Protocol */
+	IFT_ASYNC	= 0x54,		/* Asynchronous Protocol */
+	IFT_CNR		= 0x55,		/* Combat Net Radio */
+	IFT_ISO88025DTR	= 0x56,		/* ISO 802.5r DTR */
+	IFT_EPLRS	= 0x57,		/* Ext Pos Loc Report Sys */
+	IFT_ARAP	= 0x58,		/* Appletalk Remote Access Protocol */
+	IFT_PROPCNLS	= 0x59,		/* Proprietary Connectionless Protocol*/
+	IFT_HOSTPAD	= 0x5a,		/* CCITT-ITU X.29 PAD Protocol */
+	IFT_TERMPAD	= 0x5b,		/* CCITT-ITU X.3 PAD Facility */
+	IFT_FRAMERELAYMPI = 0x5c,	/* Multiproto Interconnect over FR */
+	IFT_X213	= 0x5d,		/* CCITT-ITU X213 */
+	IFT_ADSL	= 0x5e,		/* Asymmetric Digital Subscriber Loop */
+	IFT_RADSL	= 0x5f,		/* Rate-Adapt. Digital Subscriber Loop*/
+	IFT_SDSL	= 0x60,		/* Symmetric Digital Subscriber Loop */
+	IFT_VDSL	= 0x61,		/* Very H-Speed Digital Subscrib. Loop*/
+	IFT_ISO88025CRFPINT = 0x62,	/* ISO 802.5 CRFP */
+	IFT_MYRINET	= 0x63,		/* Myricom Myrinet */
+	IFT_VOICEEM	= 0x64,		/* voice recEive and transMit */
+	IFT_VOICEFXO	= 0x65,		/* voice Foreign Exchange Office */
+	IFT_VOICEFXS	= 0x66,		/* voice Foreign Exchange Station */
+	IFT_VOICEENCAP	= 0x67,		/* voice encapsulation */
+	IFT_VOICEOVERIP	= 0x68,		/* voice over IP encapsulation */
+	IFT_ATMDXI	= 0x69,		/* ATM DXI */
+	IFT_ATMFUNI	= 0x6a,		/* ATM FUNI */
+	IFT_ATMIMA	= 0x6b,		/* ATM IMA		      */
+	IFT_PPPMULTILINKBUNDLE = 0x6c,	/* PPP Multilink Bundle */
+	IFT_IPOVERCDLC	= 0x6d,		/* IBM ipOverCdlc */
+	IFT_IPOVERCLAW	= 0x6e,		/* IBM Common Link Access to Workstn */
+	IFT_STACKTOSTACK = 0x6f,	/* IBM stackToStack */
+	IFT_VIRTUALIPADDRESS = 0x70,	/* IBM VIPA */
+	IFT_MPC		= 0x71,		/* IBM multi-protocol channel support */
+	IFT_IPOVERATM	= 0x72,		/* IBM ipOverAtm */
+	IFT_ISO88025FIBER = 0x73,	/* ISO 802.5j Fiber Token Ring */
+	IFT_TDLC	= 0x74,		/* IBM twinaxial data link control */
+	IFT_GIGABITETHERNET = 0x75,	/* Gigabit Ethernet */
+	IFT_HDLC	= 0x76,		/* HDLC */
+	IFT_LAPF	= 0x77,		/* LAP F */
+	IFT_V37		= 0x78,		/* V.37 */
+	IFT_X25MLP	= 0x79,		/* Multi-Link Protocol */
+	IFT_X25HUNTGROUP = 0x7a,	/* X25 Hunt Group */
+	IFT_TRANSPHDLC	= 0x7b,		/* Transp HDLC */
+	IFT_INTERLEAVE	= 0x7c,		/* Interleave channel */
+	IFT_FAST	= 0x7d,		/* Fast channel */
+	IFT_IP		= 0x7e,		/* IP (for APPN HPR in IP networks) */
+	IFT_DOCSCABLEMACLAYER = 0x7f,	/* CATV Mac Layer */
+	IFT_DOCSCABLEDOWNSTREAM = 0x80,	/* CATV Downstream interface */
+	IFT_DOCSCABLEUPSTREAM = 0x81,	/* CATV Upstream interface */
+	IFT_A12MPPSWITCH = 0x82,	/* Avalon Parallel Processor */
+	IFT_TUNNEL	= 0x83,		/* Encapsulation interface */
+	IFT_COFFEE	= 0x84,		/* coffee pot */
+	IFT_CES		= 0x85,		/* Circiut Emulation Service */
+	IFT_ATMSUBINTERFACE = 0x86,	/* (x)  ATM Sub Interface */
+	IFT_L2VLAN	= 0x87,		/* Layer 2 Virtual LAN using 802.1Q */
+	IFT_L3IPVLAN	= 0x88,		/* Layer 3 Virtual LAN - IP Protocol */
+	IFT_L3IPXVLAN	= 0x89,		/* Layer 3 Virtual LAN - IPX Prot. */
+	IFT_DIGITALPOWERLINE = 0x8a,	/* IP over Power Lines */
+	IFT_MEDIAMAILOVERIP = 0x8b,	/* (xxx)  Multimedia Mail over IP */
+	IFT_DTM		= 0x8c,		/* Dynamic synchronous Transfer Mode */
+	IFT_DCN		= 0x8d,		/* Data Communications Network */
+	IFT_IPFORWARD	= 0x8e,		/* IP Forwarding Interface */
+	IFT_MSDSL	= 0x8f,		/* Multi-rate Symmetric DSL */
+	IFT_IEEE1394	= 0x90,		/* IEEE1394 High Performance SerialBus*/
+	IFT_IFGSN	= 0x91,		/* HIPPI-6400 */
+	IFT_DVBRCCMACLAYER = 0x92,	/* DVB-RCC MAC Layer */
+	IFT_DVBRCCDOWNSTREAM = 0x93,	/* DVB-RCC Downstream Channel */
+	IFT_DVBRCCUPSTREAM = 0x94,	/* DVB-RCC Upstream Channel */
+	IFT_ATMVIRTUAL	= 0x95,		/* ATM Virtual Interface */
+	IFT_MPLSTUNNEL	= 0x96,		/* MPLS Tunnel Virtual Interface */
+	IFT_SRP		= 0x97,		/* Spatial Reuse Protocol */
+	IFT_VOICEOVERATM = 0x98,	/* Voice over ATM */
+	IFT_VOICEOVERFRAMERELAY	= 0x99,	/* Voice Over Frame Relay */
+	IFT_IDSL	= 0x9a,		/* Digital Subscriber Loop over ISDN */
+	IFT_COMPOSITELINK = 0x9b,	/* Avici Composite Link Interface */
+	IFT_SS7SIGLINK	= 0x9c,		/* SS7 Signaling Link */
+	IFT_PROPWIRELESSP2P = 0x9d,	/* Prop. P2P wireless interface */
+	IFT_FRFORWARD	= 0x9e,		/* Frame forward Interface */
+	IFT_RFC1483	= 0x9f,		/* Multiprotocol over ATM AAL5 */
+	IFT_USB		= 0xa0,		/* USB Interface */
+	IFT_IEEE8023ADLAG = 0xa1,	/* IEEE 802.3ad Link Aggregate*/
+	IFT_BGPPOLICYACCOUNTING = 0xa2,	/* BGP Policy Accounting */
+	IFT_FRF16MFRBUNDLE = 0xa3,	/* FRF.16 Multilik Frame Relay*/
+	IFT_H323GATEKEEPER = 0xa4,	/* H323 Gatekeeper */
+	IFT_H323PROXY	= 0xa5,		/* H323 Voice and Video Proxy */
+	IFT_MPLS	= 0xa6,		/* MPLS */
+	IFT_MFSIGLINK	= 0xa7,		/* Multi-frequency signaling link */
+	IFT_HDSL2	= 0xa8,		/* High Bit-Rate DSL, 2nd gen. */
+	IFT_SHDSL	= 0xa9,		/* Multirate HDSL2 */
+	IFT_DS1FDL	= 0xaa,		/* Facility Data Link (4Kbps) on a DS1*/
+	IFT_POS		= 0xab,		/* Packet over SONET/SDH Interface */
+	IFT_DVBASILN	= 0xac,		/* DVB-ASI Input */
+	IFT_DVBASIOUT	= 0xad,		/* DVB-ASI Output */
+	IFT_PLC		= 0xae,		/* Power Line Communications */
+	IFT_NFAS	= 0xaf,		/* Non-Facility Associated Signaling */
+	IFT_TR008	= 0xb0,		/* TROO8 */
+	IFT_GR303RDT	= 0xb1,		/* Remote Digital Terminal */
+	IFT_GR303IDT	= 0xb2,		/* Integrated Digital Terminal */
+	IFT_ISUP	= 0xb3,		/* ISUP */
+	IFT_PROPDOCSWIRELESSMACLAYER = 0xb4,	/* prop/Wireless MAC Layer */
+	IFT_PROPDOCSWIRELESSDOWNSTREAM = 0xb5,	/* prop/Wireless Downstream */
+	IFT_PROPDOCSWIRELESSUPSTREAM = 0xb6,	/* prop/Wireless Upstream */
+	IFT_HIPERLAN2	= 0xb7,		/* HIPERLAN Type 2 Radio Interface */
+	IFT_PROPBWAP2MP	= 0xb8,		/* PropBroadbandWirelessAccess P2MP*/
+	IFT_SONETOVERHEADCHANNEL = 0xb9, /* SONET Overhead Channel */
+	IFT_DIGITALWRAPPEROVERHEADCHANNEL = 0xba, /* Digital Wrapper Overhead */
+	IFT_AAL2	= 0xbb,		/* ATM adaptation layer 2 */
+	IFT_RADIOMAC	= 0xbc,		/* MAC layer over radio links */
+	IFT_ATMRADIO	= 0xbd,		/* ATM over radio links */
+	IFT_IMT		= 0xbe,		/* Inter-Machine Trunks */
+	IFT_MVL		= 0xbf,		/* Multiple Virtual Lines DSL */
+	IFT_REACHDSL	= 0xc0,		/* Long Reach DSL */
+	IFT_FRDLCIENDPT	= 0xc1,		/* Frame Relay DLCI End Point */
+	IFT_ATMVCIENDPT	= 0xc2,		/* ATM VCI End Point */
+	IFT_OPTICALCHANNEL = 0xc3,	/* Optical Channel */
+	IFT_OPTICALTRANSPORT = 0xc4,	/* Optical Transport */
+	IFT_INFINIBAND	= 0xc7,		/* Infiniband */
+	IFT_BRIDGE	= 0xd1,		/* Transparent bridge interface */
+	IFT_STF		= 0xd7,		/* 6to4 interface */
 
-#define	IFT_STF			   0xd7	/* 6to4 interface */
+	/*
+	 * Not based on IANA assignments.  Conflicting with IANA assignments.
+	 * We should make them negative probably.
+	 * This requires changes to struct if_data.
+	 */
+	IFT_GIF		= 0xf0,		/* Generic tunnel interface */
+	IFT_PVC		= 0xf1,		/* Unused */
+	IFT_ENC		= 0xf4,		/* Encapsulating interface */
+	IFT_PFLOG	= 0xf6,		/* PF packet filter logging */
+	IFT_PFSYNC	= 0xf7,		/* PF packet filter synchronization */
+} ifType;
+
+/*
+ * Some (broken) software uses #ifdef IFT_TYPE to check whether
+ * an operating systems supports certain interface type.  Lack of
+ * ifdef leads to a piece of functionality compiled out.
+ */
+#ifndef BURN_BRIDGES
+#define	IFT_BRIDGE	IFT_BRIDGE
+#define	IFT_PPP		IFT_PPP
+#define	IFT_PROPVIRTUAL	IFT_PROPVIRTUAL
+#define	IFT_L2VLAN	IFT_L2VLAN
+#define	IFT_L3IPVLAN	IFT_L3IPVLAN
+#define	IFT_IEEE1394	IFT_IEEE1394
+#define	IFT_INFINIBAND	IFT_INFINIBAND
+#endif
 
-/* not based on IANA assignments */
-#define	IFT_GIF		0xf0
-#define	IFT_PVC		0xf1
-#define	IFT_FAITH	0xf2
-#define	IFT_ENC		0xf4
-#define	IFT_PFLOG	0xf6
-#define	IFT_PFSYNC	0xf7
-#define	IFT_CARP	0xf8	/* Common Address Redundancy Protocol */
-#define IFT_IPXIP	0xf9	/* IPX over IP tunneling; no longer used. */
 #endif /* !_NET_IF_TYPES_H_ */
diff --git a/freebsd/sys/net/if_var.h b/freebsd/sys/net/if_var.h
index ee4db195..ec3719d4 100644
--- a/freebsd/sys/net/if_var.h
+++ b/freebsd/sys/net/if_var.h
@@ -58,58 +58,75 @@
  * interfaces.  These routines live in the files if.c and route.c
  */
 
-#ifdef __STDC__
-/*
- * Forward structure declarations for function prototypes [sic].
- */
-struct	mbuf;
-struct	thread;
-struct	rtentry;
-struct	rt_addrinfo;
+struct	rtentry;		/* ifa_rtrequest */
+struct	rt_addrinfo;		/* ifa_rtrequest */
 struct	socket;
-struct	ether_header;
 struct	carp_if;
+struct	carp_softc;
 struct  ifvlantrunk;
-struct	route;
+struct	route;			/* if_output */
 struct	vnet;
-#endif
-
-#include <sys/queue.h>		/* get TAILQ macros */
+struct	ifmedia;
+struct	netmap_adapter;
 
 #ifdef _KERNEL
-#include <sys/mbuf.h>
-#include <sys/eventhandler.h>
+#include <sys/mbuf.h>		/* ifqueue only? */
 #include <sys/buf_ring.h>
 #include <net/vnet.h>
 #endif /* _KERNEL */
+#include <sys/counter.h>
 #include <rtems/bsd/sys/lock.h>		/* XXX */
-#include <sys/mutex.h>		/* XXX */
+#include <sys/mutex.h>		/* struct ifqueue */
 #include <sys/rwlock.h>		/* XXX */
 #include <sys/sx.h>		/* XXX */
-#include <sys/event.h>		/* XXX */
-#include <sys/_task.h>
+#include <sys/_task.h>		/* if_link_task */
 
 #define	IF_DUNIT_NONE	-1
 
-#include <altq/if_altq.h>
+#include <net/altq/if_altq.h>
 
 TAILQ_HEAD(ifnethead, ifnet);	/* we use TAILQs so that the order of */
 TAILQ_HEAD(ifaddrhead, ifaddr);	/* instantiation is preserved in the list */
-TAILQ_HEAD(ifprefixhead, ifprefix);
 TAILQ_HEAD(ifmultihead, ifmultiaddr);
 TAILQ_HEAD(ifgrouphead, ifg_group);
 
-/*
- * Structure defining a queue for a network interface.
- */
-struct	ifqueue {
-	struct	mbuf *ifq_head;
-	struct	mbuf *ifq_tail;
-	int	ifq_len;
-	int	ifq_maxlen;
-	int	ifq_drops;
-	struct	mtx ifq_mtx;
-};
+#ifdef _KERNEL
+VNET_DECLARE(struct pfil_head, link_pfil_hook);	/* packet filter hooks */
+#define	V_link_pfil_hook	VNET(link_pfil_hook)
+
+#define	HHOOK_IPSEC_INET	0
+#define	HHOOK_IPSEC_INET6	1
+#define	HHOOK_IPSEC_COUNT	2
+VNET_DECLARE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]);
+VNET_DECLARE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]);
+#define	V_ipsec_hhh_in	VNET(ipsec_hhh_in)
+#define	V_ipsec_hhh_out	VNET(ipsec_hhh_out)
+#endif /* _KERNEL */
+
+typedef enum {
+	IFCOUNTER_IPACKETS = 0,
+	IFCOUNTER_IERRORS,
+	IFCOUNTER_OPACKETS,
+	IFCOUNTER_OERRORS,
+	IFCOUNTER_COLLISIONS,
+	IFCOUNTER_IBYTES,
+	IFCOUNTER_OBYTES,
+	IFCOUNTER_IMCASTS,
+	IFCOUNTER_OMCASTS,
+	IFCOUNTER_IQDROPS,
+	IFCOUNTER_OQDROPS,
+	IFCOUNTER_NOPROTO,
+	IFCOUNTERS /* Array size. */
+} ift_counter;
+
+typedef struct ifnet * if_t;
+
+typedef	void (*if_start_fn_t)(if_t);
+typedef	int (*if_ioctl_fn_t)(if_t, u_long, caddr_t);
+typedef	void (*if_init_fn_t)(void *);
+typedef void (*if_qflush_fn_t)(if_t);
+typedef int (*if_transmit_fn_t)(if_t, struct mbuf *);
+typedef	uint64_t (*if_get_counter_t)(if_t, ift_counter);
 
 struct ifnet_hw_tsomax {
 	u_int	tsomaxbytes;	/* TSO total burst length limit in bytes */
@@ -117,22 +134,99 @@ struct ifnet_hw_tsomax {
 	u_int	tsomaxsegsize;	/* TSO maximum segment size in bytes */
 };
 
+/* Interface encap request types */
+typedef enum {
+	IFENCAP_LL = 1			/* pre-calculate link-layer header */
+} ife_type;
+
 /*
- * Structure defining a network interface.
+ * The structure below allows to request various pre-calculated L2/L3 headers
+ * for different media. Requests varies by type (rtype field).
+ *
+ * IFENCAP_LL type: pre-calculates link header based on address family
+ *   and destination lladdr.
  *
- * (Would like to call this struct ``if'', but C isn't PL/1.)
+ *   Input data fields:
+ *     buf: pointer to destination buffer
+ *     bufsize: buffer size
+ *     flags: IFENCAP_FLAG_BROADCAST if destination is broadcast
+ *     family: address family defined by AF_ constant.
+ *     lladdr: pointer to link-layer address
+ *     lladdr_len: length of link-layer address
+ *     hdata: pointer to L3 header (optional, used for ARP requests).
+ *   Output data fields:
+ *     buf: encap data is stored here
+ *     bufsize: resulting encap length is stored here
+ *     lladdr_off: offset of link-layer address from encap hdr start
+ *     hdata: L3 header may be altered if necessary
  */
 
+struct if_encap_req {
+	u_char		*buf;		/* Destination buffer (w) */
+	size_t		bufsize;	/* size of provided buffer (r) */
+	ife_type	rtype;		/* request type (r) */
+	uint32_t	flags;		/* Request flags (r) */
+	int		family;		/* Address family AF_* (r) */
+	int		lladdr_off;	/* offset from header start (w) */
+	int		lladdr_len;	/* lladdr length (r) */
+	char		*lladdr;	/* link-level address pointer (r) */
+	char		*hdata;		/* Upper layer header data (rw) */
+};
+
+#define	IFENCAP_FLAG_BROADCAST	0x02	/* Destination is broadcast */
+
+
+/*
+ * Structure defining a network interface.
+ *
+ * Size ILP32:  592 (approx)
+ *	 LP64: 1048 (approx)
+ */
 struct ifnet {
+	/* General book keeping of interface lists. */
+	TAILQ_ENTRY(ifnet) if_link; 	/* all struct ifnets are chained */
+	LIST_ENTRY(ifnet) if_clones;	/* interfaces of a cloner */
+	TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */
+					/* protected by if_addr_lock */
+	u_char	if_alloctype;		/* if_type at time of allocation */
+
+	/* Driver and protocol specific information that remains stable. */
 	void	*if_softc;		/* pointer to driver state */
+	void	*if_llsoftc;		/* link layer softc */
 	void	*if_l2com;		/* pointer to protocol bits */
-	struct vnet *if_vnet;		/* pointer to network stack instance */
-	TAILQ_ENTRY(ifnet) if_link; 	/* all struct ifnets are chained */
-	char	if_xname[IFNAMSIZ];	/* external name (name + unit) */
 	const char *if_dname;		/* driver name */
 	int	if_dunit;		/* unit or IF_DUNIT_NONE */
+	u_short	if_index;		/* numeric abbreviation for this if  */
+	short	if_index_reserved;	/* spare space to grow if_index */
+	char	if_xname[IFNAMSIZ];	/* external name (name + unit) */
+	char	*if_description;	/* interface description */
+
+	/* Variable fields that are touched by the stack and drivers. */
+	int	if_flags;		/* up/down, broadcast, etc. */
+	int	if_drv_flags;		/* driver-managed status flags */
+	int	if_capabilities;	/* interface features & capabilities */
+	int	if_capenable;		/* enabled features & capabilities */
+	void	*if_linkmib;		/* link-type-specific MIB data */
+	size_t	if_linkmiblen;		/* length of above data */
 	u_int	if_refcount;		/* reference count */
-	struct	ifaddrhead if_addrhead;	/* linked list of addresses per if */
+
+	/* These fields are shared with struct if_data. */
+	uint8_t		if_type;	/* ethernet, tokenring, etc */
+	uint8_t		if_addrlen;	/* media address length */
+	uint8_t		if_hdrlen;	/* media header length */
+	uint8_t		if_link_state;	/* current link state */
+	uint32_t	if_mtu;		/* maximum transmission unit */
+	uint32_t	if_metric;	/* routing metric (external only) */
+	uint64_t	if_baudrate;	/* linespeed */
+	uint64_t	if_hwassist;	/* HW offload capabilities, see IFCAP */
+	time_t		if_epoch;	/* uptime at attach or stat reset */
+	struct timeval	if_lastchange;	/* time of last administrative change */
+
+	struct  ifaltq if_snd;		/* output queue (includes altq) */
+	struct	task if_linktask;	/* task for link change events */
+
+	/* Addresses of different protocol families assigned to this if. */
+	struct	rwlock if_addr_lock;	/* lock to protect address lists */
 		/*
 		 * if_addrhead is the list of all addresses associated to
 		 * an interface.
@@ -143,74 +237,53 @@ struct ifnet {
 		 * However, access to the AF_LINK address through this
 		 * field is deprecated. Use if_addr or ifaddr_byindex() instead.
 		 */
-	int	if_pcount;		/* number of promiscuous listeners */
-	struct	carp_if *if_carp;	/* carp interface structure */
-	struct	bpf_if *if_bpf;		/* packet filter structure */
-	u_short	if_index;		/* numeric abbreviation for this if  */
-	short	if_index_reserved;	/* spare space to grow if_index */
-	struct  ifvlantrunk *if_vlantrunk; /* pointer to 802.1q data */
-	int	if_flags;		/* up/down, broadcast, etc. */
-	int	if_capabilities;	/* interface features & capabilities */
-	int	if_capenable;		/* enabled features & capabilities */
-	void	*if_linkmib;		/* link-type-specific MIB data */
-	size_t	if_linkmiblen;		/* length of above data */
-	struct	if_data if_data;
+	struct	ifaddrhead if_addrhead;	/* linked list of addresses per if */
 	struct	ifmultihead if_multiaddrs; /* multicast addresses configured */
 	int	if_amcount;		/* number of all-multicast requests */
-/* procedure handles */
+	struct	ifaddr	*if_addr;	/* pointer to link-level address */
+	const u_int8_t *if_broadcastaddr; /* linklevel broadcast bytestring */
+	struct	rwlock if_afdata_lock;
+	void	*if_afdata[AF_MAX];
+	int	if_afdata_initialized;
+
+	/* Additional features hung off the interface. */
+	u_int	if_fib;			/* interface FIB */
+	struct	vnet *if_vnet;		/* pointer to network stack instance */
+	struct	vnet *if_home_vnet;	/* where this ifnet originates from */
+	struct  ifvlantrunk *if_vlantrunk; /* pointer to 802.1q data */
+	struct	bpf_if *if_bpf;		/* packet filter structure */
+	int	if_pcount;		/* number of promiscuous listeners */
+	void	*if_bridge;		/* bridge glue */
+	void	*if_lagg;		/* lagg glue */
+	void	*if_pf_kif;		/* pf glue */
+	struct	carp_if *if_carp;	/* carp interface structure */
+	struct	label *if_label;	/* interface MAC label */
+	struct	netmap_adapter *if_netmap; /* netmap(4) softc */
+
+	/* Various procedures of the layer2 encapsulation and drivers. */
 	int	(*if_output)		/* output routine (enqueue) */
-		(struct ifnet *, struct mbuf *, struct sockaddr *,
+		(struct ifnet *, struct mbuf *, const struct sockaddr *,
 		     struct route *);
 	void	(*if_input)		/* input routine (from h/w driver) */
 		(struct ifnet *, struct mbuf *);
-	void	(*if_start)		/* initiate output routine */
-		(struct ifnet *);
-	int	(*if_ioctl)		/* ioctl routine */
-		(struct ifnet *, u_long, caddr_t);
-	void	(*if_init)		/* Init routine */
-		(void *);
+	if_start_fn_t	if_start;	/* initiate output routine */
+	if_ioctl_fn_t	if_ioctl;	/* ioctl routine */
+	if_init_fn_t	if_init;	/* Init routine */
 	int	(*if_resolvemulti)	/* validate/resolve multicast */
 		(struct ifnet *, struct sockaddr **, struct sockaddr *);
-	void	(*if_qflush)		/* flush any queues */
-		(struct ifnet *);
-	int	(*if_transmit)		/* initiate output routine */
-		(struct ifnet *, struct mbuf *);
+	if_qflush_fn_t	if_qflush;	/* flush any queue */	
+	if_transmit_fn_t if_transmit;   /* initiate output routine */
+
 	void	(*if_reassign)		/* reassign to vnet routine */
 		(struct ifnet *, struct vnet *, char *);
-	struct	vnet *if_home_vnet;	/* where this ifnet originates from */
-	struct	ifaddr	*if_addr;	/* pointer to link-level address */
-	void	*if_llsoftc;		/* link layer softc */
-	int	if_drv_flags;		/* driver-managed status flags */
-	struct  ifaltq if_snd;		/* output queue (includes altq) */
-	const u_int8_t *if_broadcastaddr; /* linklevel broadcast bytestring */
+	if_get_counter_t if_get_counter; /* get counter values */
+	int	(*if_requestencap)	/* make link header from request */
+		(struct ifnet *, struct if_encap_req *);
 
-	void	*if_bridge;		/* bridge glue */
+	/* Statistics. */
+	counter_u64_t	if_counters[IFCOUNTERS];
 
-	struct	label *if_label;	/* interface MAC label */
-
-	/* these are only used by IPv6 */
-	struct	ifprefixhead if_prefixhead; /* list of prefixes per if */
-	void	*if_afdata[AF_MAX];
-	int	if_afdata_initialized;
-	struct	rwlock if_afdata_lock;
-	struct	task if_linktask;	/* task for link change events */
-	struct	mtx if_addr_mtx;	/* mutex to protect address lists */
-
-	LIST_ENTRY(ifnet) if_clones;	/* interfaces of a cloner */
-	TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */
-					/* protected by if_addr_mtx */
-	void	*if_pf_kif;
-	void	*if_lagg;		/* lagg glue */
-	char	*if_description;	/* interface description */
-	u_int	if_fib;			/* interface FIB */
-	u_char	if_alloctype;		/* if_type at time of allocation */
-
-	/*
-	 * Spare fields are added so that we can modify sensitive data
-	 * structures without changing the kernel binary interface, and must
-	 * be used with care where binary compatibility is required.
-	 */
-	char	if_cspare[3];
+	/* Stuff that's only temporary and doesn't belong here. */
 
 	/*
 	 * Network adapter TSO limits:
@@ -222,50 +295,25 @@ struct ifnet {
 	 * count limit does not apply. If all three fields are zero,
 	 * there is no TSO limit.
 	 *
-	 * NOTE: The TSO limits only apply to the data payload part of
-	 * a TCP/IP packet. That means there is no need to subtract
-	 * space for ethernet-, vlan-, IP- or TCP- headers from the
-	 * TSO limits unless the hardware driver in question requires
-	 * so.
-	 */
-	u_int	if_hw_tsomax;
-	int	if_ispare[1];
-	/*
-	 * TSO fields for segment limits. If a field is zero below,
-	 * there is no limit:
+	 * NOTE: The TSO limits should reflect the values used in the
+	 * BUSDMA tag a network adapter is using to load a mbuf chain
+	 * for transmission. The TCP/IP network stack will subtract
+	 * space for all linklevel and protocol level headers and
+	 * ensure that the full mbuf chain passed to the network
+	 * adapter fits within the given limits.
 	 */
+	u_int	if_hw_tsomax;		/* TSO maximum size in bytes */
 	u_int	if_hw_tsomaxsegcount;	/* TSO maximum segment count */
 	u_int	if_hw_tsomaxsegsize;	/* TSO maximum segment size in bytes */
-	void	*if_pspare[8];		/* 1 netmap, 7 TDB */
-};
-
-typedef void if_init_f_t(void *);
 
-/*
- * XXX These aliases are terribly dangerous because they could apply
- * to anything.
- */
-#define	if_mtu		if_data.ifi_mtu
-#define	if_type		if_data.ifi_type
-#define if_physical	if_data.ifi_physical
-#define	if_addrlen	if_data.ifi_addrlen
-#define	if_hdrlen	if_data.ifi_hdrlen
-#define	if_metric	if_data.ifi_metric
-#define	if_link_state	if_data.ifi_link_state
-#define	if_baudrate	if_data.ifi_baudrate
-#define	if_hwassist	if_data.ifi_hwassist
-#define	if_ipackets	if_data.ifi_ipackets
-#define	if_ierrors	if_data.ifi_ierrors
-#define	if_opackets	if_data.ifi_opackets
-#define	if_oerrors	if_data.ifi_oerrors
-#define	if_collisions	if_data.ifi_collisions
-#define	if_ibytes	if_data.ifi_ibytes
-#define	if_obytes	if_data.ifi_obytes
-#define	if_imcasts	if_data.ifi_imcasts
-#define	if_omcasts	if_data.ifi_omcasts
-#define	if_iqdrops	if_data.ifi_iqdrops
-#define	if_noproto	if_data.ifi_noproto
-#define	if_lastchange	if_data.ifi_lastchange
+	/*
+	 * Spare fields to be added before branching a stable branch, so
+	 * that structure can be enhanced without changing the kernel
+	 * binary interface.
+	 */
+	void	*if_pspare[4];		/* packet pacing / general use */
+	int	if_ispare[4];		/* packet pacing / general use */
+};
 
 /* for compatibility with other BSDs */
 #define	if_addrlist	if_addrhead
@@ -275,18 +323,14 @@ typedef void if_init_f_t(void *);
 /*
  * Locks for address lists on the network interface.
  */
-#define	IF_ADDR_LOCK_INIT(if)	mtx_init(&(if)->if_addr_mtx,		\
-				    "if_addr_mtx", NULL, MTX_DEF)
-#define	IF_ADDR_LOCK_DESTROY(if)	mtx_destroy(&(if)->if_addr_mtx)
-#define	IF_ADDR_WLOCK(if)	mtx_lock(&(if)->if_addr_mtx)
-#define	IF_ADDR_WUNLOCK(if)	mtx_unlock(&(if)->if_addr_mtx)
-#define	IF_ADDR_RLOCK(if)	mtx_lock(&(if)->if_addr_mtx)
-#define	IF_ADDR_RUNLOCK(if)	mtx_unlock(&(if)->if_addr_mtx)
-#define	IF_ADDR_LOCK_ASSERT(if)	mtx_assert(&(if)->if_addr_mtx, MA_OWNED)
-#define	IF_ADDR_WLOCK_ASSERT(if)	mtx_assert(&(if)->if_addr_mtx, MA_OWNED)
-/* XXX: Compat. */
-#define	IF_ADDR_LOCK(if)	IF_ADDR_WLOCK(if)
-#define	IF_ADDR_UNLOCK(if)	IF_ADDR_WUNLOCK(if)
+#define	IF_ADDR_LOCK_INIT(if)	rw_init(&(if)->if_addr_lock, "if_addr_lock")
+#define	IF_ADDR_LOCK_DESTROY(if)	rw_destroy(&(if)->if_addr_lock)
+#define	IF_ADDR_WLOCK(if)	rw_wlock(&(if)->if_addr_lock)
+#define	IF_ADDR_WUNLOCK(if)	rw_wunlock(&(if)->if_addr_lock)
+#define	IF_ADDR_RLOCK(if)	rw_rlock(&(if)->if_addr_lock)
+#define	IF_ADDR_RUNLOCK(if)	rw_runlock(&(if)->if_addr_lock)
+#define	IF_ADDR_LOCK_ASSERT(if)	rw_assert(&(if)->if_addr_lock, RA_LOCKED)
+#define	IF_ADDR_WLOCK_ASSERT(if) rw_assert(&(if)->if_addr_lock, RA_WLOCKED)
 
 /*
  * Function variations on locking macros intended to be used by loadable
@@ -295,100 +339,11 @@ typedef void if_init_f_t(void *);
  */
 void	if_addr_rlock(struct ifnet *ifp);	/* if_addrhead */
 void	if_addr_runlock(struct ifnet *ifp);	/* if_addrhead */
-void	if_maddr_rlock(struct ifnet *ifp);	/* if_multiaddrs */
-void	if_maddr_runlock(struct ifnet *ifp);	/* if_multiaddrs */
-
-/*
- * Output queues (ifp->if_snd) and slow device input queues (*ifp->if_slowq)
- * are queues of messages stored on ifqueue structures
- * (defined above).  Entries are added to and deleted from these structures
- * by these macros, which should be called with ipl raised to splimp().
- */
-#define IF_LOCK(ifq)		mtx_lock(&(ifq)->ifq_mtx)
-#define IF_UNLOCK(ifq)		mtx_unlock(&(ifq)->ifq_mtx)
-#define	IF_LOCK_ASSERT(ifq)	mtx_assert(&(ifq)->ifq_mtx, MA_OWNED)
-#define	_IF_QFULL(ifq)		((ifq)->ifq_len >= (ifq)->ifq_maxlen)
-#define	_IF_DROP(ifq)		((ifq)->ifq_drops++)
-#define	_IF_QLEN(ifq)		((ifq)->ifq_len)
-
-#define	_IF_ENQUEUE(ifq, m) do { 				\
-	(m)->m_nextpkt = NULL;					\
-	if ((ifq)->ifq_tail == NULL) 				\
-		(ifq)->ifq_head = m; 				\
-	else 							\
-		(ifq)->ifq_tail->m_nextpkt = m; 		\
-	(ifq)->ifq_tail = m; 					\
-	(ifq)->ifq_len++; 					\
-} while (0)
-
-#define IF_ENQUEUE(ifq, m) do {					\
-	IF_LOCK(ifq); 						\
-	_IF_ENQUEUE(ifq, m); 					\
-	IF_UNLOCK(ifq); 					\
-} while (0)
-
-#define	_IF_PREPEND(ifq, m) do {				\
-	(m)->m_nextpkt = (ifq)->ifq_head; 			\
-	if ((ifq)->ifq_tail == NULL) 				\
-		(ifq)->ifq_tail = (m); 				\
-	(ifq)->ifq_head = (m); 					\
-	(ifq)->ifq_len++; 					\
-} while (0)
-
-#define IF_PREPEND(ifq, m) do {		 			\
-	IF_LOCK(ifq); 						\
-	_IF_PREPEND(ifq, m); 					\
-	IF_UNLOCK(ifq); 					\
-} while (0)
-
-#define	_IF_DEQUEUE(ifq, m) do { 				\
-	(m) = (ifq)->ifq_head; 					\
-	if (m) { 						\
-		if (((ifq)->ifq_head = (m)->m_nextpkt) == NULL)	\
-			(ifq)->ifq_tail = NULL; 		\
-		(m)->m_nextpkt = NULL; 				\
-		(ifq)->ifq_len--; 				\
-	} 							\
-} while (0)
-
-#define IF_DEQUEUE(ifq, m) do { 				\
-	IF_LOCK(ifq); 						\
-	_IF_DEQUEUE(ifq, m); 					\
-	IF_UNLOCK(ifq); 					\
-} while (0)
-
-#define	_IF_DEQUEUE_ALL(ifq, m) do {				\
-	(m) = (ifq)->ifq_head;					\
-	(ifq)->ifq_head = (ifq)->ifq_tail = NULL;		\
-	(ifq)->ifq_len = 0;					\
-} while (0)
-
-#define	IF_DEQUEUE_ALL(ifq, m) do {				\
-	IF_LOCK(ifq); 						\
-	_IF_DEQUEUE_ALL(ifq, m);				\
-	IF_UNLOCK(ifq); 					\
-} while (0)
-
-#define	_IF_POLL(ifq, m)	((m) = (ifq)->ifq_head)
-#define	IF_POLL(ifq, m)		_IF_POLL(ifq, m)
-
-#define _IF_DRAIN(ifq) do { 					\
-	struct mbuf *m; 					\
-	for (;;) { 						\
-		_IF_DEQUEUE(ifq, m); 				\
-		if (m == NULL) 					\
-			break; 					\
-		m_freem(m); 					\
-	} 							\
-} while (0)
-
-#define IF_DRAIN(ifq) do {					\
-	IF_LOCK(ifq);						\
-	_IF_DRAIN(ifq);						\
-	IF_UNLOCK(ifq);						\
-} while(0)
+void	if_maddr_rlock(if_t ifp);	/* if_multiaddrs */
+void	if_maddr_runlock(if_t ifp);	/* if_multiaddrs */
 
 #ifdef _KERNEL
+#ifdef _SYS_EVENTHANDLER_H_
 /* interface link layer address change event */
 typedef void (*iflladdr_event_handler_t)(void *, struct ifnet *);
 EVENTHANDLER_DECLARE(iflladdr_event, iflladdr_event_handler_t);
@@ -404,6 +359,7 @@ EVENTHANDLER_DECLARE(ifnet_departure_event, ifnet_departure_event_handler_t);
 /* Interface link state change event */
 typedef void (*ifnet_link_event_handler_t)(void *, struct ifnet *, int);
 EVENTHANDLER_DECLARE(ifnet_link_event, ifnet_link_event_handler_t);
+#endif /* _SYS_EVENTHANDLER_H_ */
 
 /*
  * interface groups
@@ -426,6 +382,7 @@ struct ifg_list {
 	TAILQ_ENTRY(ifg_list)	 ifgl_next;
 };
 
+#ifdef _SYS_EVENTHANDLER_H_
 /* group attach event */
 typedef void (*group_attach_event_handler_t)(void *, struct ifg_group *);
 EVENTHANDLER_DECLARE(group_attach_event, group_attach_event_handler_t);
@@ -435,6 +392,7 @@ EVENTHANDLER_DECLARE(group_detach_event, group_detach_event_handler_t);
 /* group change event */
 typedef void (*group_change_event_handler_t)(void *, const char *);
 EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t);
+#endif /* _SYS_EVENTHANDLER_H_ */
 
 #define	IF_AFDATA_LOCK_INIT(ifp)	\
 	rw_init(&(ifp)->if_afdata_lock, "if_afdata")
@@ -453,331 +411,6 @@ EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t);
 #define	IF_AFDATA_WLOCK_ASSERT(ifp)	rw_assert(&(ifp)->if_afdata_lock, RA_WLOCKED)
 #define	IF_AFDATA_UNLOCK_ASSERT(ifp)	rw_assert(&(ifp)->if_afdata_lock, RA_UNLOCKED)
 
-int	if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp,
-	    int adjust);
-#define	IF_HANDOFF(ifq, m, ifp)			\
-	if_handoff((struct ifqueue *)ifq, m, ifp, 0)
-#define	IF_HANDOFF_ADJ(ifq, m, ifp, adj)	\
-	if_handoff((struct ifqueue *)ifq, m, ifp, adj)
-
-void	if_start(struct ifnet *);
-
-#define	IFQ_ENQUEUE(ifq, m, err)					\
-do {									\
-	IF_LOCK(ifq);							\
-	if (ALTQ_IS_ENABLED(ifq))					\
-		ALTQ_ENQUEUE(ifq, m, NULL, err);			\
-	else {								\
-		if (_IF_QFULL(ifq)) {					\
-			m_freem(m);					\
-			(err) = ENOBUFS;				\
-		} else {						\
-			_IF_ENQUEUE(ifq, m);				\
-			(err) = 0;					\
-		}							\
-	}								\
-	if (err)							\
-		(ifq)->ifq_drops++;					\
-	IF_UNLOCK(ifq);							\
-} while (0)
-
-#define	IFQ_DEQUEUE_NOLOCK(ifq, m)					\
-do {									\
-	if (TBR_IS_ENABLED(ifq))					\
-		(m) = tbr_dequeue_ptr(ifq, ALTDQ_REMOVE);		\
-	else if (ALTQ_IS_ENABLED(ifq))					\
-		ALTQ_DEQUEUE(ifq, m);					\
-	else								\
-		_IF_DEQUEUE(ifq, m);					\
-} while (0)
-
-#define	IFQ_DEQUEUE(ifq, m)						\
-do {									\
-	IF_LOCK(ifq);							\
-	IFQ_DEQUEUE_NOLOCK(ifq, m);					\
-	IF_UNLOCK(ifq);							\
-} while (0)
-
-#define	IFQ_POLL_NOLOCK(ifq, m)						\
-do {									\
-	if (TBR_IS_ENABLED(ifq))					\
-		(m) = tbr_dequeue_ptr(ifq, ALTDQ_POLL);			\
-	else if (ALTQ_IS_ENABLED(ifq))					\
-		ALTQ_POLL(ifq, m);					\
-	else								\
-		_IF_POLL(ifq, m);					\
-} while (0)
-
-#define	IFQ_POLL(ifq, m)						\
-do {									\
-	IF_LOCK(ifq);							\
-	IFQ_POLL_NOLOCK(ifq, m);					\
-	IF_UNLOCK(ifq);							\
-} while (0)
-
-#define	IFQ_PURGE_NOLOCK(ifq)						\
-do {									\
-	if (ALTQ_IS_ENABLED(ifq)) {					\
-		ALTQ_PURGE(ifq);					\
-	} else								\
-		_IF_DRAIN(ifq);						\
-} while (0)
-
-#define	IFQ_PURGE(ifq)							\
-do {									\
-	IF_LOCK(ifq);							\
-	IFQ_PURGE_NOLOCK(ifq);						\
-	IF_UNLOCK(ifq);							\
-} while (0)
-
-#define	IFQ_SET_READY(ifq)						\
-	do { ((ifq)->altq_flags |= ALTQF_READY); } while (0)
-
-#define	IFQ_LOCK(ifq)			IF_LOCK(ifq)
-#define	IFQ_UNLOCK(ifq)			IF_UNLOCK(ifq)
-#define	IFQ_LOCK_ASSERT(ifq)		IF_LOCK_ASSERT(ifq)
-#define	IFQ_IS_EMPTY(ifq)		((ifq)->ifq_len == 0)
-#define	IFQ_INC_LEN(ifq)		((ifq)->ifq_len++)
-#define	IFQ_DEC_LEN(ifq)		(--(ifq)->ifq_len)
-#define	IFQ_INC_DROPS(ifq)		((ifq)->ifq_drops++)
-#define	IFQ_SET_MAXLEN(ifq, len)	((ifq)->ifq_maxlen = (len))
-
-/*
- * The IFF_DRV_OACTIVE test should really occur in the device driver, not in
- * the handoff logic, as that flag is locked by the device driver.
- */
-#define	IFQ_HANDOFF_ADJ(ifp, m, adj, err)				\
-do {									\
-	int len;							\
-	short mflags;							\
-									\
-	len = (m)->m_pkthdr.len;					\
-	mflags = (m)->m_flags;						\
-	IFQ_ENQUEUE(&(ifp)->if_snd, m, err);				\
-	if ((err) == 0) {						\
-		(ifp)->if_obytes += len + (adj);			\
-		if (mflags & M_MCAST)					\
-			(ifp)->if_omcasts++;				\
-		if (((ifp)->if_drv_flags & IFF_DRV_OACTIVE) == 0)	\
-			if_start(ifp);					\
-	}								\
-} while (0)
-
-#define	IFQ_HANDOFF(ifp, m, err)					\
-	IFQ_HANDOFF_ADJ(ifp, m, 0, err)
-
-#define	IFQ_DRV_DEQUEUE(ifq, m)						\
-do {									\
-	(m) = (ifq)->ifq_drv_head;					\
-	if (m) {							\
-		if (((ifq)->ifq_drv_head = (m)->m_nextpkt) == NULL)	\
-			(ifq)->ifq_drv_tail = NULL;			\
-		(m)->m_nextpkt = NULL;					\
-		(ifq)->ifq_drv_len--;					\
-	} else {							\
-		IFQ_LOCK(ifq);						\
-		IFQ_DEQUEUE_NOLOCK(ifq, m);				\
-		while ((ifq)->ifq_drv_len < (ifq)->ifq_drv_maxlen) {	\
-			struct mbuf *m0;				\
-			IFQ_DEQUEUE_NOLOCK(ifq, m0);			\
-			if (m0 == NULL)					\
-				break;					\
-			m0->m_nextpkt = NULL;				\
-			if ((ifq)->ifq_drv_tail == NULL)		\
-				(ifq)->ifq_drv_head = m0;		\
-			else						\
-				(ifq)->ifq_drv_tail->m_nextpkt = m0;	\
-			(ifq)->ifq_drv_tail = m0;			\
-			(ifq)->ifq_drv_len++;				\
-		}							\
-		IFQ_UNLOCK(ifq);					\
-	}								\
-} while (0)
-
-#define	IFQ_DRV_PREPEND(ifq, m)						\
-do {									\
-	(m)->m_nextpkt = (ifq)->ifq_drv_head;				\
-	if ((ifq)->ifq_drv_tail == NULL)				\
-		(ifq)->ifq_drv_tail = (m);				\
-	(ifq)->ifq_drv_head = (m);					\
-	(ifq)->ifq_drv_len++;						\
-} while (0)
-
-#define	IFQ_DRV_IS_EMPTY(ifq)						\
-	(((ifq)->ifq_drv_len == 0) && ((ifq)->ifq_len == 0))
-
-#define	IFQ_DRV_PURGE(ifq)						\
-do {									\
-	struct mbuf *m, *n = (ifq)->ifq_drv_head;			\
-	while((m = n) != NULL) {					\
-		n = m->m_nextpkt;					\
-		m_freem(m);						\
-	}								\
-	(ifq)->ifq_drv_head = (ifq)->ifq_drv_tail = NULL;		\
-	(ifq)->ifq_drv_len = 0;						\
-	IFQ_PURGE(ifq);							\
-} while (0)
-
-#ifdef _KERNEL
-static __inline int
-drbr_enqueue(struct ifnet *ifp, struct buf_ring *br, struct mbuf *m)
-{	
-	int error = 0;
-
-#ifdef ALTQ
-	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
-		IFQ_ENQUEUE(&ifp->if_snd, m, error);
-		return (error);
-	}
-#endif
-	error = buf_ring_enqueue(br, m);
-	if (error)
-		m_freem(m);
-
-	return (error);
-}
-
-static __inline void
-drbr_putback(struct ifnet *ifp, struct buf_ring *br, struct mbuf *new_mbuf)
-{
-	/*
-	 * The top of the list needs to be swapped 
-	 * for this one.
-	 */
-#ifdef ALTQ
-	if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
-		/* 
-		 * Peek in altq case dequeued it
-		 * so put it back.
-		 */
-		IFQ_DRV_PREPEND(&ifp->if_snd, new_mbuf);
-		return;
-	}
-#endif
-	buf_ring_putback_sc(br, new_mbuf);
-}
-
-static __inline struct mbuf *
-drbr_peek(struct ifnet *ifp, struct buf_ring *br)
-{
-#ifdef ALTQ
-	struct mbuf *m;
-	if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
-		/* 
-		 * Pull it off like a dequeue
-		 * since drbr_advance() does nothing
-		 * for altq and drbr_putback() will
-		 * use the old prepend function.
-		 */
-		IFQ_DEQUEUE(&ifp->if_snd, m);
-		return (m);
-	}
-#endif
-	return ((struct mbuf *)buf_ring_peek(br));
-}
-
-static __inline void
-drbr_flush(struct ifnet *ifp, struct buf_ring *br)
-{
-	struct mbuf *m;
-
-#ifdef ALTQ
-	if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd))
-		IFQ_PURGE(&ifp->if_snd);
-#endif	
-	while ((m = (struct mbuf *)buf_ring_dequeue_sc(br)) != NULL)
-		m_freem(m);
-}
-
-static __inline void
-drbr_free(struct buf_ring *br, struct malloc_type *type)
-{
-
-	drbr_flush(NULL, br);
-	buf_ring_free(br, type);
-}
-
-static __inline struct mbuf *
-drbr_dequeue(struct ifnet *ifp, struct buf_ring *br)
-{
-#ifdef ALTQ
-	struct mbuf *m;
-
-	if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {	
-		IFQ_DEQUEUE(&ifp->if_snd, m);
-		return (m);
-	}
-#endif
-	return ((struct mbuf *)buf_ring_dequeue_sc(br));
-}
-
-static __inline void
-drbr_advance(struct ifnet *ifp, struct buf_ring *br)
-{
-#ifdef ALTQ
-	/* Nothing to do here since peek dequeues in altq case */
-	if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd))
-		return;
-#endif
-	return (buf_ring_advance_sc(br));
-}
-
-
-static __inline struct mbuf *
-drbr_dequeue_cond(struct ifnet *ifp, struct buf_ring *br,
-    int (*func) (struct mbuf *, void *), void *arg) 
-{
-	struct mbuf *m;
-#ifdef ALTQ
-	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
-		IFQ_LOCK(&ifp->if_snd);
-		IFQ_POLL_NOLOCK(&ifp->if_snd, m);
-		if (m != NULL && func(m, arg) == 0) {
-			IFQ_UNLOCK(&ifp->if_snd);
-			return (NULL);
-		}
-		IFQ_DEQUEUE_NOLOCK(&ifp->if_snd, m);
-		IFQ_UNLOCK(&ifp->if_snd);
-		return (m);
-	}
-#endif
-	m = (struct mbuf *)buf_ring_peek(br);
-	if (m == NULL || func(m, arg) == 0)
-		return (NULL);
-
-	return ((struct mbuf *)buf_ring_dequeue_sc(br));
-}
-
-static __inline int
-drbr_empty(struct ifnet *ifp, struct buf_ring *br)
-{
-#ifdef ALTQ
-	if (ALTQ_IS_ENABLED(&ifp->if_snd))
-		return (IFQ_IS_EMPTY(&ifp->if_snd));
-#endif
-	return (buf_ring_empty(br));
-}
-
-static __inline int
-drbr_needs_enqueue(struct ifnet *ifp, struct buf_ring *br)
-{
-#ifdef ALTQ
-	if (ALTQ_IS_ENABLED(&ifp->if_snd))
-		return (1);
-#endif
-	return (!buf_ring_empty(br));
-}
-
-static __inline int
-drbr_inuse(struct ifnet *ifp, struct buf_ring *br)
-{
-#ifdef ALTQ
-	if (ALTQ_IS_ENABLED(&ifp->if_snd))
-		return (ifp->if_snd.ifq_len);
-#endif
-	return (buf_ring_count(br));
-}
-#endif
 /*
  * 72 was chosen below because it is the size of a TCP/IP
  * header (40) + the minimum mss (32).
@@ -787,8 +420,6 @@ drbr_inuse(struct ifnet *ifp, struct buf_ring *br)
 
 #define	TOEDEV(ifp)	((ifp)->if_llsoftc)
 
-#endif /* _KERNEL */
-
 /*
  * The ifaddr structure contains information about one address
  * of an interface.  They are maintained by the different address families,
@@ -804,46 +435,28 @@ struct ifaddr {
 	struct	sockaddr *ifa_dstaddr;	/* other end of p-to-p link */
 #define	ifa_broadaddr	ifa_dstaddr	/* broadcast address interface */
 	struct	sockaddr *ifa_netmask;	/* used to determine subnet */
-	struct	if_data if_data;	/* not all members are meaningful */
 	struct	ifnet *ifa_ifp;		/* back-pointer to interface */
+	struct	carp_softc *ifa_carp;	/* pointer to CARP data */
 	TAILQ_ENTRY(ifaddr) ifa_link;	/* queue macro glue */
 	void	(*ifa_rtrequest)	/* check or clean routes (+ or -)'d */
 		(int, struct rtentry *, struct rt_addrinfo *);
 	u_short	ifa_flags;		/* mostly rt_flags for cloning */
+#define	IFA_ROUTE	RTF_UP		/* route installed */
+#define	IFA_RTSELF	RTF_HOST	/* loopback route to self installed */
 	u_int	ifa_refcnt;		/* references to this structure */
-	int	ifa_metric;		/* cost of going out this interface */
-	int (*ifa_claim_addr)		/* check if an addr goes to this if */
-		(struct ifaddr *, struct sockaddr *);
-	struct mtx ifa_mtx;
+
+	counter_u64_t	ifa_ipackets;
+	counter_u64_t	ifa_opackets;	 
+	counter_u64_t	ifa_ibytes;
+	counter_u64_t	ifa_obytes;
 };
-#define	IFA_ROUTE	RTF_UP		/* route installed */
-#define IFA_RTSELF	RTF_HOST	/* loopback route to self installed */
 
-/* for compatibility with other BSDs */
+/* For compatibility with other BSDs. SCTP uses it. */
 #define	ifa_list	ifa_link
 
-#ifdef _KERNEL
-#define	IFA_LOCK(ifa)		mtx_lock(&(ifa)->ifa_mtx)
-#define	IFA_UNLOCK(ifa)		mtx_unlock(&(ifa)->ifa_mtx)
-
+struct ifaddr *	ifa_alloc(size_t size, int flags);
 void	ifa_free(struct ifaddr *ifa);
-void	ifa_init(struct ifaddr *ifa);
 void	ifa_ref(struct ifaddr *ifa);
-#endif
-
-/*
- * The prefix structure contains information about one prefix
- * of an interface.  They are maintained by the different address families,
- * are allocated and attached when a prefix or an address is set,
- * and are linked together so all prefixes for an interface can be located.
- */
-struct ifprefix {
-	struct	sockaddr *ifpr_prefix;	/* prefix of interface */
-	struct	ifnet *ifpr_ifp;	/* back-pointer to interface */
-	TAILQ_ENTRY(ifprefix) ifpr_list; /* queue macro glue */
-	u_char	ifpr_plen;		/* prefix length in bits */
-	u_char	ifpr_type;		/* protocol dependent prefix type */
-};
 
 /*
  * Multicast address structure.  This is analogous to the ifaddr
@@ -859,16 +472,9 @@ struct ifmultiaddr {
 	struct	ifmultiaddr *ifma_llifma; /* pointer to ifma for ifma_lladdr */
 };
 
-#ifdef _KERNEL
-
 extern	struct rwlock ifnet_rwlock;
 extern	struct sx ifnet_sxlock;
 
-#define	IFNET_LOCK_INIT() do {						\
-	rw_init_flags(&ifnet_rwlock, "ifnet_rw",  RW_RECURSE);		\
-	sx_init_flags(&ifnet_sxlock, "ifnet_sx",  SX_RECURSE);		\
-} while(0)
-
 #define	IFNET_WLOCK() do {						\
 	sx_xlock(&ifnet_sxlock);					\
 	rw_wlock(&ifnet_rwlock);					\
@@ -915,15 +521,11 @@ VNET_DECLARE(struct ifnethead, ifnet);
 VNET_DECLARE(struct ifgrouphead, ifg_head);
 VNET_DECLARE(int, if_index);
 VNET_DECLARE(struct ifnet *, loif);	/* first loopback interface */
-VNET_DECLARE(int, useloopback);
 
 #define	V_ifnet		VNET(ifnet)
 #define	V_ifg_head	VNET(ifg_head)
 #define	V_if_index	VNET(if_index)
 #define	V_loif		VNET(loif)
-#define	V_useloopback	VNET(useloopback)
-
-extern	int ifqmaxlen;
 
 int	if_addgroup(struct ifnet *, const char *);
 int	if_delgroup(struct ifnet *, const char *);
@@ -935,18 +537,15 @@ void	if_dead(struct ifnet *);
 int	if_delmulti(struct ifnet *, struct sockaddr *);
 void	if_delmulti_ifma(struct ifmultiaddr *);
 void	if_detach(struct ifnet *);
-void	if_vmove(struct ifnet *, struct vnet *);
 void	if_purgeaddrs(struct ifnet *);
 void	if_delallmulti(struct ifnet *);
 void	if_down(struct ifnet *);
 struct ifmultiaddr *
-	if_findmulti(struct ifnet *, struct sockaddr *);
+	if_findmulti(struct ifnet *, const struct sockaddr *);
 void	if_free(struct ifnet *);
-void	if_free_type(struct ifnet *, u_char);
 void	if_initname(struct ifnet *, const char *, int);
 void	if_link_state_change(struct ifnet *, int);
 int	if_printf(struct ifnet *, const char *, ...) __printflike(2, 3);
-void	if_qflush(struct ifnet *);
 void	if_ref(struct ifnet *);
 void	if_rele(struct ifnet *);
 int	if_setlladdr(struct ifnet *, const u_char *, int);
@@ -956,23 +555,19 @@ int	ifpromisc(struct ifnet *, int);
 struct	ifnet *ifunit(const char *);
 struct	ifnet *ifunit_ref(const char *);
 
-void	ifq_init(struct ifaltq *, struct ifnet *ifp);
-void	ifq_delete(struct ifaltq *);
-
 int	ifa_add_loopback_route(struct ifaddr *, struct sockaddr *);
 int	ifa_del_loopback_route(struct ifaddr *, struct sockaddr *);
-
-struct	ifaddr *ifa_ifwithaddr(struct sockaddr *);
-int		ifa_ifwithaddr_check(struct sockaddr *);
-struct	ifaddr *ifa_ifwithbroadaddr(struct sockaddr *);
-struct	ifaddr *ifa_ifwithdstaddr(struct sockaddr *);
-struct	ifaddr *ifa_ifwithdstaddr_fib(struct sockaddr *, int);
-struct	ifaddr *ifa_ifwithnet(struct sockaddr *, int);
-struct	ifaddr *ifa_ifwithnet_fib(struct sockaddr *, int, int);
-struct	ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *);
-struct	ifaddr *ifa_ifwithroute_fib(int, struct sockaddr *, struct sockaddr *, u_int);
-
-struct	ifaddr *ifaof_ifpforaddr(struct sockaddr *, struct ifnet *);
+int	ifa_switch_loopback_route(struct ifaddr *, struct sockaddr *);
+
+struct	ifaddr *ifa_ifwithaddr(const struct sockaddr *);
+int		ifa_ifwithaddr_check(const struct sockaddr *);
+struct	ifaddr *ifa_ifwithbroadaddr(const struct sockaddr *, int);
+struct	ifaddr *ifa_ifwithdstaddr(const struct sockaddr *, int);
+struct	ifaddr *ifa_ifwithnet(const struct sockaddr *, int, int);
+struct	ifaddr *ifa_ifwithroute(int, const struct sockaddr *, struct sockaddr *,
+    u_int);
+struct	ifaddr *ifaof_ifpforaddr(const struct sockaddr *, struct ifnet *);
+int	ifa_preferred(struct ifaddr *, struct ifaddr *);
 
 int	if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen);
 
@@ -980,22 +575,92 @@ typedef	void *if_com_alloc_t(u_char type, struct ifnet *ifp);
 typedef	void if_com_free_t(void *com, u_char type);
 void	if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f);
 void	if_deregister_com_alloc(u_char type);
+void	if_data_copy(struct ifnet *, struct if_data *);
+uint64_t if_get_counter_default(struct ifnet *, ift_counter);
+void	if_inc_counter(struct ifnet *, ift_counter, int64_t);
 
 #define IF_LLADDR(ifp)							\
     LLADDR((struct sockaddr_dl *)((ifp)->if_addr->ifa_addr))
 
+uint64_t if_setbaudrate(if_t ifp, uint64_t baudrate);
+uint64_t if_getbaudrate(if_t ifp);
+int if_setcapabilities(if_t ifp, int capabilities);
+int if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit);
+int if_getcapabilities(if_t ifp);
+int if_togglecapenable(if_t ifp, int togglecap);
+int if_setcapenable(if_t ifp, int capenable);
+int if_setcapenablebit(if_t ifp, int setcap, int clearcap);
+int if_getcapenable(if_t ifp);
+const char *if_getdname(if_t ifp);
+int if_setdev(if_t ifp, void *dev);
+int if_setdrvflagbits(if_t ifp, int if_setflags, int clear_flags);
+int if_getdrvflags(if_t ifp);
+int if_setdrvflags(if_t ifp, int flags);
+int if_clearhwassist(if_t ifp);
+int if_sethwassistbits(if_t ifp, int toset, int toclear);
+int if_sethwassist(if_t ifp, int hwassist_bit);
+int if_gethwassist(if_t ifp);
+int if_setsoftc(if_t ifp, void *softc);
+void *if_getsoftc(if_t ifp);
+int if_setflags(if_t ifp, int flags);
+int if_setmtu(if_t ifp, int mtu);
+int if_getmtu(if_t ifp);
+int if_getmtu_family(if_t ifp, int family);
+int if_setflagbits(if_t ifp, int set, int clear);
+int if_getflags(if_t ifp);
+int if_sendq_empty(if_t ifp);
+int if_setsendqready(if_t ifp);
+int if_setsendqlen(if_t ifp, int tx_desc_count);
+int if_input(if_t ifp, struct mbuf* sendmp);
+int if_sendq_prepend(if_t ifp, struct mbuf *m);
+struct mbuf *if_dequeue(if_t ifp);
+int if_setifheaderlen(if_t ifp, int len);
+void if_setrcvif(struct mbuf *m, if_t ifp);
+void if_setvtag(struct mbuf *m, u_int16_t tag);
+u_int16_t if_getvtag(struct mbuf *m);
+int if_vlantrunkinuse(if_t ifp);
+caddr_t if_getlladdr(if_t ifp);
+void *if_gethandle(u_char);
+void if_bpfmtap(if_t ifp, struct mbuf *m);
+void if_etherbpfmtap(if_t ifp, struct mbuf *m);
+void if_vlancap(if_t ifp);
+
+int if_setupmultiaddr(if_t ifp, void *mta, int *cnt, int max);
+int if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max);
+int if_multiaddr_count(if_t ifp, int max);
+
+int if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg);
+int if_getamcount(if_t ifp);
+struct ifaddr * if_getifaddr(if_t ifp);
+
+/* Functions */
+void if_setinitfn(if_t ifp, void (*)(void *));
+void if_setioctlfn(if_t ifp, int (*)(if_t, u_long, caddr_t));
+void if_setstartfn(if_t ifp, void (*)(if_t));
+void if_settransmitfn(if_t ifp, if_transmit_fn_t);
+void if_setqflushfn(if_t ifp, if_qflush_fn_t);
+void if_setgetcounterfn(if_t ifp, if_get_counter_t);
+ 
+/* Revisit the below. These are inline functions originally */
+int drbr_inuse_drv(if_t ifp, struct buf_ring *br);
+struct mbuf* drbr_dequeue_drv(if_t ifp, struct buf_ring *br);
+int drbr_needs_enqueue_drv(if_t ifp, struct buf_ring *br);
+int drbr_enqueue_drv(if_t ifp, struct buf_ring *br, struct mbuf *m);
+
+/* TSO */
+void if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *);
+int if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *);
+
 #ifdef DEVICE_POLLING
-enum poll_cmd {	POLL_ONLY, POLL_AND_CHECK_STATUS };
+enum poll_cmd { POLL_ONLY, POLL_AND_CHECK_STATUS };
 
-typedef	int poll_handler_t(struct ifnet *ifp, enum poll_cmd cmd, int count);
-int    ether_poll_register(poll_handler_t *h, struct ifnet *ifp);
-int    ether_poll_deregister(struct ifnet *ifp);
+typedef	int poll_handler_t(if_t ifp, enum poll_cmd cmd, int count);
+int    ether_poll_register(poll_handler_t *h, if_t ifp);
+int    ether_poll_deregister(if_t ifp);
 #endif /* DEVICE_POLLING */
 
-/* TSO */
-void if_hw_tsomax_common(struct ifnet *, struct ifnet_hw_tsomax *);
-int if_hw_tsomax_update(struct ifnet *, struct ifnet_hw_tsomax *);
-
 #endif /* _KERNEL */
 
+#include <net/ifq.h>	/* XXXAO: temporary unconditional include */
+
 #endif /* !_NET_IF_VAR_H_ */
diff --git a/freebsd/sys/net/if_vlan.c b/freebsd/sys/net/if_vlan.c
index 7d08e298..8a93565b 100644
--- a/freebsd/sys/net/if_vlan.c
+++ b/freebsd/sys/net/if_vlan.c
@@ -2,6 +2,10 @@
 
 /*-
  * Copyright 1998 Massachusetts Institute of Technology
+ * Copyright 2012 ADARA Networks, Inc.
+ *
+ * Portions of this software were developed by Robert N. M. Watson under
+ * contract to ADARA Networks, Inc.
  *
  * Permission to use, copy, modify, and distribute this software and
  * its documentation for any purpose and without fee is hereby
@@ -31,8 +35,7 @@
 
 /*
  * if_vlan.c - pseudo-device driver for IEEE 802.1Q virtual LANs.
- * Might be extended some day to also handle IEEE 802.1p priority
- * tagging.  This is sort of sneaky in the implementation, since
+ * This is sort of sneaky in the implementation, since
  * we need to pretend to be enough of an Ethernet implementation
  * to make arp work.  The way we do this is by telling everyone
  * that we are an Ethernet, and then catch the packets that
@@ -47,12 +50,14 @@ __FBSDID("$FreeBSD$");
 #include <rtems/bsd/local/opt_vlan.h>
 
 #include <rtems/bsd/sys/param.h>
+#include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <rtems/bsd/sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
-#include <sys/rwlock.h>
+#include <sys/rmlock.h>
+#include <sys/priv.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
@@ -63,6 +68,7 @@ __FBSDID("$FreeBSD$");
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
@@ -74,7 +80,6 @@ __FBSDID("$FreeBSD$");
 #include <netinet/if_ether.h>
 #endif
 
-#define VLANNAME	"vlan"
 #define	VLAN_DEF_HWIDTH	4
 #define	VLAN_IFFLAGS	(IFF_BROADCAST | IFF_MULTICAST)
 
@@ -85,7 +90,7 @@ LIST_HEAD(ifvlanhead, ifvlan);
 
 struct ifvlantrunk {
 	struct	ifnet   *parent;	/* parent interface of this trunk */
-	struct	rwlock	rw;
+	struct	rmlock	lock;
 #ifdef VLAN_ARRAY
 #define	VLAN_ARRAY_SIZE	(EVL_VLID_MASK + 1)
 	struct	ifvlan	*vlans[VLAN_ARRAY_SIZE]; /* static table */
@@ -105,9 +110,9 @@ struct vlan_mc_entry {
 struct	ifvlan {
 	struct	ifvlantrunk *ifv_trunk;
 	struct	ifnet *ifv_ifp;
-	void	*ifv_cookie;
 #define	TRUNK(ifv)	((ifv)->ifv_trunk)
 #define	PARENT(ifv)	((ifv)->ifv_trunk->parent)
+	void	*ifv_cookie;
 	int	ifv_pflags;	/* special flags we have set on parent */
 	struct	ifv_linkmib {
 		int	ifvm_encaplen;	/* encapsulation length */
@@ -115,6 +120,8 @@ struct	ifvlan {
 		int	ifvm_mintu;	/* min transmission unit */
 		uint16_t ifvm_proto;	/* encapsulation ethertype */
 		uint16_t ifvm_tag;	/* tag to apply on packets leaving if */
+              	uint16_t ifvm_vid;	/* VLAN ID */
+		uint8_t	ifvm_pcp;	/* Priority Code Point (PCP). */
 	}	ifv_mib;
 	SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead;
 #ifndef VLAN_ARRAY
@@ -123,6 +130,8 @@ struct	ifvlan {
 };
 #define	ifv_proto	ifv_mib.ifvm_proto
 #define	ifv_tag		ifv_mib.ifvm_tag
+#define	ifv_vid 	ifv_mib.ifvm_vid
+#define	ifv_pcp		ifv_mib.ifvm_pcp
 #define	ifv_encaplen	ifv_mib.ifvm_encaplen
 #define	ifv_mtufudge	ifv_mib.ifvm_mtufudge
 #define	ifv_mintu	ifv_mib.ifvm_mintu
@@ -143,11 +152,22 @@ static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0,
 static SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0,
     "for consistency");
 
-static int soft_pad = 0;
-SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW, &soft_pad, 0,
-	   "pad short frames before tagging");
+static VNET_DEFINE(int, soft_pad);
+#define	V_soft_pad	VNET(soft_pad)
+SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW | CTLFLAG_VNET,
+    &VNET_NAME(soft_pad), 0, "pad short frames before tagging");
+
+/*
+ * For now, make preserving PCP via an mbuf tag optional, as it increases
+ * per-packet memory allocations and frees.  In the future, it would be
+ * preferable to reuse ether_vtag for this, or similar.
+ */
+static int vlan_mtag_pcp = 0;
+SYSCTL_INT(_net_link_vlan, OID_AUTO, mtag_pcp, CTLFLAG_RW, &vlan_mtag_pcp, 0,
+	"Retain VLAN PCP information as packets are passed up the stack");
 
-static MALLOC_DEFINE(M_VLAN, VLANNAME, "802.1Q Virtual LAN Interface");
+static const char vlanname[] = "vlan";
+static MALLOC_DEFINE(M_VLAN, vlanname, "802.1Q Virtual LAN Interface");
 
 static eventhandler_tag ifdetach_tag;
 static eventhandler_tag iflladdr_tag;
@@ -156,7 +176,7 @@ static eventhandler_tag iflladdr_tag;
  * We have a global mutex, that is used to serialize configuration
  * changes and isn't used in normal packet delivery.
  *
- * We also have a per-trunk rwlock, that is locked shared on packet
+ * We also have a per-trunk rmlock(9), that is locked shared on packet
  * processing and exclusive when configuration is changed.
  *
  * The VLAN_ARRAY substitutes the dynamic hash with a static array
@@ -170,14 +190,15 @@ static struct sx ifv_lock;
 #define	VLAN_LOCK_ASSERT()	sx_assert(&ifv_lock, SA_LOCKED)
 #define	VLAN_LOCK()		sx_xlock(&ifv_lock)
 #define	VLAN_UNLOCK()		sx_xunlock(&ifv_lock)
-#define	TRUNK_LOCK_INIT(trunk)	rw_init(&(trunk)->rw, VLANNAME)
-#define	TRUNK_LOCK_DESTROY(trunk) rw_destroy(&(trunk)->rw)
-#define	TRUNK_LOCK(trunk)	rw_wlock(&(trunk)->rw)
-#define	TRUNK_UNLOCK(trunk)	rw_wunlock(&(trunk)->rw)
-#define	TRUNK_LOCK_ASSERT(trunk) rw_assert(&(trunk)->rw, RA_WLOCKED)
-#define	TRUNK_RLOCK(trunk)	rw_rlock(&(trunk)->rw)
-#define	TRUNK_RUNLOCK(trunk)	rw_runlock(&(trunk)->rw)
-#define	TRUNK_LOCK_RASSERT(trunk) rw_assert(&(trunk)->rw, RA_RLOCKED)
+#define	TRUNK_LOCK_INIT(trunk)	rm_init(&(trunk)->lock, vlanname)
+#define	TRUNK_LOCK_DESTROY(trunk) rm_destroy(&(trunk)->lock)
+#define	TRUNK_LOCK(trunk)	rm_wlock(&(trunk)->lock)
+#define	TRUNK_UNLOCK(trunk)	rm_wunlock(&(trunk)->lock)
+#define	TRUNK_LOCK_ASSERT(trunk) rm_assert(&(trunk)->lock, RA_WLOCKED)
+#define	TRUNK_RLOCK(trunk)	rm_rlock(&(trunk)->lock, &tracker)
+#define	TRUNK_RUNLOCK(trunk)	rm_runlock(&(trunk)->lock, &tracker)
+#define	TRUNK_LOCK_RASSERT(trunk) rm_assert(&(trunk)->lock, RA_RLOCKED)
+#define	TRUNK_LOCK_READER	struct rm_priotracker tracker
 
 #ifndef VLAN_ARRAY
 static	void vlan_inithash(struct ifvlantrunk *trunk);
@@ -186,7 +207,7 @@ static	int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
 static	int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
 static	void vlan_growhash(struct ifvlantrunk *trunk, int howmuch);
 static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk,
-	uint16_t tag);
+	uint16_t vid);
 #endif
 static	void trunk_destroy(struct ifvlantrunk *trunk);
 
@@ -206,8 +227,7 @@ static	void vlan_link_state(struct ifnet *ifp);
 static	void vlan_capabilities(struct ifvlan *ifv);
 static	void vlan_trunk_capabilities(struct ifnet *ifp);
 
-static	struct ifnet *vlan_clone_match_ethertag(struct if_clone *,
-    const char *, int *);
+static	struct ifnet *vlan_clone_match_ethervid(const char *, int *);
 static	int vlan_clone_match(struct if_clone *, const char *);
 static	int vlan_clone_create(struct if_clone *, char *, size_t, caddr_t);
 static	int vlan_clone_destroy(struct if_clone *, struct ifnet *);
@@ -215,11 +235,10 @@ static	int vlan_clone_destroy(struct if_clone *, struct ifnet *);
 static	void vlan_ifdetach(void *arg, struct ifnet *ifp);
 static  void vlan_iflladdr(void *arg, struct ifnet *ifp);
 
-static	struct if_clone vlan_cloner = IFC_CLONE_INITIALIZER(VLANNAME, NULL,
-    IF_MAXUNIT, NULL, vlan_clone_match, vlan_clone_create, vlan_clone_destroy);
+static struct if_clone *vlan_cloner;
 
 #ifdef VIMAGE
-static VNET_DEFINE(struct if_clone, vlan_cloner);
+static VNET_DEFINE(struct if_clone *, vlan_cloner);
 #define	V_vlan_cloner	VNET(vlan_cloner)
 #endif
 
@@ -274,9 +293,9 @@ vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
 
 	b = 1 << trunk->hwidth;
-	i = HASH(ifv->ifv_tag, trunk->hmask);
+	i = HASH(ifv->ifv_vid, trunk->hmask);
 	LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
-		if (ifv->ifv_tag == ifv2->ifv_tag)
+		if (ifv->ifv_vid == ifv2->ifv_vid)
 			return (EEXIST);
 
 	/*
@@ -286,7 +305,7 @@ vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
 	 */
 	if (trunk->refcnt > (b * b) / 2) {
 		vlan_growhash(trunk, 1);
-		i = HASH(ifv->ifv_tag, trunk->hmask);
+		i = HASH(ifv->ifv_vid, trunk->hmask);
 	}
 	LIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list);
 	trunk->refcnt++;
@@ -304,7 +323,7 @@ vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
 	
 	b = 1 << trunk->hwidth;
-	i = HASH(ifv->ifv_tag, trunk->hmask);
+	i = HASH(ifv->ifv_vid, trunk->hmask);
 	LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
 		if (ifv2 == ifv) {
 			trunk->refcnt--;
@@ -356,7 +375,7 @@ vlan_growhash(struct ifvlantrunk *trunk, int howmuch)
 	for (i = 0; i < n; i++)
 		while ((ifv = LIST_FIRST(&trunk->hash[i])) != NULL) {
 			LIST_REMOVE(ifv, ifv_list);
-			j = HASH(ifv->ifv_tag, n2 - 1);
+			j = HASH(ifv->ifv_vid, n2 - 1);
 			LIST_INSERT_HEAD(&hash2[j], ifv, ifv_list);
 		}
 	free(trunk->hash, M_VLAN);
@@ -370,14 +389,14 @@ vlan_growhash(struct ifvlantrunk *trunk, int howmuch)
 }
 
 static __inline struct ifvlan *
-vlan_gethash(struct ifvlantrunk *trunk, uint16_t tag)
+vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
 {
 	struct ifvlan *ifv;
 
 	TRUNK_LOCK_RASSERT(trunk);
 
-	LIST_FOREACH(ifv, &trunk->hash[HASH(tag, trunk->hmask)], ifv_list)
-		if (ifv->ifv_tag == tag)
+	LIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list)
+		if (ifv->ifv_vid == vid)
 			return (ifv);
 	return (NULL);
 }
@@ -401,19 +420,19 @@ vlan_dumphash(struct ifvlantrunk *trunk)
 #else
 
 static __inline struct ifvlan *
-vlan_gethash(struct ifvlantrunk *trunk, uint16_t tag)
+vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
 {
 
-	return trunk->vlans[tag];
+	return trunk->vlans[vid];
 }
 
 static __inline int
 vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
 {
 
-	if (trunk->vlans[ifv->ifv_tag] != NULL)
+	if (trunk->vlans[ifv->ifv_vid] != NULL)
 		return EEXIST;
-	trunk->vlans[ifv->ifv_tag] = ifv;
+	trunk->vlans[ifv->ifv_vid] = ifv;
 	trunk->refcnt++;
 
 	return (0);
@@ -423,7 +442,7 @@ static __inline int
 vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
 {
 
-	trunk->vlans[ifv->ifv_tag] = NULL;
+	trunk->vlans[ifv->ifv_vid] = NULL;
 	trunk->refcnt--;
 
 	return (0);
@@ -461,48 +480,48 @@ trunk_destroy(struct ifvlantrunk *trunk)
  * traffic that it doesn't really want, which ends up being discarded
  * later by the upper protocol layers. Unfortunately, there's no way
  * to avoid this: there really is only one physical interface.
- *
- * XXX: There is a possible race here if more than one thread is
- *      modifying the multicast state of the vlan interface at the same time.
  */
 static int
 vlan_setmulti(struct ifnet *ifp)
 {
 	struct ifnet		*ifp_p;
-	struct ifmultiaddr	*ifma, *rifma = NULL;
+	struct ifmultiaddr	*ifma;
 	struct ifvlan		*sc;
 	struct vlan_mc_entry	*mc;
 	int			error;
 
-	/*VLAN_LOCK_ASSERT();*/
-
 	/* Find the parent. */
 	sc = ifp->if_softc;
+	TRUNK_LOCK_ASSERT(TRUNK(sc));
 	ifp_p = PARENT(sc);
 
 	CURVNET_SET_QUIET(ifp_p->if_vnet);
 
 	/* First, remove any existing filter entries. */
 	while ((mc = SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) {
-		error = if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr);
-		if (error)
-			return (error);
 		SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries);
+		(void)if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr);
 		free(mc, M_VLAN);
 	}
 
 	/* Now program new ones. */
+	IF_ADDR_WLOCK(ifp);
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_LINK)
 			continue;
 		mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT);
-		if (mc == NULL)
+		if (mc == NULL) {
+			IF_ADDR_WUNLOCK(ifp);
 			return (ENOMEM);
+		}
 		bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len);
 		mc->mc_addr.sdl_index = ifp_p->if_index;
 		SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries);
+	}
+	IF_ADDR_WUNLOCK(ifp);
+	SLIST_FOREACH (mc, &sc->vlan_mc_listhead, mc_entries) {
 		error = if_addmulti(ifp_p, (struct sockaddr *)&mc->mc_addr,
-		    &rifma);
+		    NULL);
 		if (error)
 			return (error);
 	}
@@ -625,17 +644,21 @@ vlan_trunkdev(struct ifnet *ifp)
 }
 
 /*
- * Return the 16bit vlan tag for this interface.
+ * Return the 12-bit VLAN VID for this interface, for use by external
+ * components such as Infiniband.
+ *
+ * XXXRW: Note that the function name here is historical; it should be named
+ * vlan_vid().
  */
 static int
-vlan_tag(struct ifnet *ifp, uint16_t *tagp)
+vlan_tag(struct ifnet *ifp, uint16_t *vidp)
 {
 	struct ifvlan *ifv;
 
 	if (ifp->if_type != IFT_L2VLAN)
 		return (EINVAL);
 	ifv = ifp->if_softc;
-	*tagp = ifv->ifv_tag;
+	*vidp = ifv->ifv_vid;
 	return (0);
 }
 
@@ -671,20 +694,21 @@ vlan_setcookie(struct ifnet *ifp, void *cookie)
 }
 
 /*
- * Return the vlan device present at the specific tag.
+ * Return the vlan device present at the specific VID.
  */
 static struct ifnet *
-vlan_devat(struct ifnet *ifp, uint16_t tag)
+vlan_devat(struct ifnet *ifp, uint16_t vid)
 {
 	struct ifvlantrunk *trunk;
 	struct ifvlan *ifv;
+	TRUNK_LOCK_READER;
 
 	trunk = ifp->if_vlantrunk;
 	if (trunk == NULL)
 		return (NULL);
 	ifp = NULL;
 	TRUNK_RLOCK(trunk);
-	ifv = vlan_gethash(trunk, tag);
+	ifv = vlan_gethash(trunk, vid);
 	if (ifv)
 		ifp = ifv->ifv_ifp;
 	TRUNK_RUNLOCK(trunk);
@@ -692,10 +716,20 @@ vlan_devat(struct ifnet *ifp, uint16_t tag)
 }
 
 /*
+ * Recalculate the cached VLAN tag exposed via the MIB.
+ */
+static void
+vlan_tag_recalculate(struct ifvlan *ifv)
+{
+
+       ifv->ifv_tag = EVL_MAKETAG(ifv->ifv_vid, ifv->ifv_pcp, 0);
+}
+
+/*
  * VLAN support can be loaded as a module.  The only place in the
  * system that's intimately aware of this is ether_input.  We hook
  * into this code through vlan_input_p which is defined there and
- * set here.  Noone else in the system should be aware of this so
+ * set here.  No one else in the system should be aware of this so
  * we use an explicit reference here.
  */
 extern	void (*vlan_input_p)(struct ifnet *, struct mbuf *);
@@ -727,7 +761,8 @@ vlan_modevent(module_t mod, int type, void *data)
 		vlan_tag_p = vlan_tag;
 		vlan_devat_p = vlan_devat;
 #ifndef VIMAGE
-		if_clone_attach(&vlan_cloner);
+		vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match,
+		    vlan_clone_create, vlan_clone_destroy);
 #endif
 		if (bootverbose)
 			printf("vlan: initialized, using "
@@ -741,7 +776,7 @@ vlan_modevent(module_t mod, int type, void *data)
 		break;
 	case MOD_UNLOAD:
 #ifndef VIMAGE
-		if_clone_detach(&vlan_cloner);
+		if_clone_detach(vlan_cloner);
 #endif
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag);
 		EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_tag);
@@ -777,8 +812,9 @@ static void
 vnet_vlan_init(const void *unused __unused)
 {
 
+	vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match,
+		    vlan_clone_create, vlan_clone_destroy);
 	V_vlan_cloner = vlan_cloner;
-	if_clone_attach(&V_vlan_cloner);
 }
 VNET_SYSINIT(vnet_vlan_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_vlan_init, NULL);
@@ -787,46 +823,39 @@ static void
 vnet_vlan_uninit(const void *unused __unused)
 {
 
-	if_clone_detach(&V_vlan_cloner);
+	if_clone_detach(V_vlan_cloner);
 }
-VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
+VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
     vnet_vlan_uninit, NULL);
 #endif
 
+/*
+ * Check for <etherif>.<vlan> style interface names.
+ */
 static struct ifnet *
-vlan_clone_match_ethertag(struct if_clone *ifc, const char *name, int *tag)
+vlan_clone_match_ethervid(const char *name, int *vidp)
 {
-	const char *cp;
+	char ifname[IFNAMSIZ];
+	char *cp;
 	struct ifnet *ifp;
-	int t;
+	int vid;
 
-	/* Check for <etherif>.<vlan> style interface names. */
-	IFNET_RLOCK_NOSLEEP();
-	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
-		/*
-		 * We can handle non-ethernet hardware types as long as
-		 * they handle the tagging and headers themselves.
-		 */
-		if (ifp->if_type != IFT_ETHER &&
-		    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
-			continue;
-		if (strncmp(ifp->if_xname, name, strlen(ifp->if_xname)) != 0)
-			continue;
-		cp = name + strlen(ifp->if_xname);
-		if (*cp++ != '.')
-			continue;
-		if (*cp == '\0')
-			continue;
-		t = 0;
-		for(; *cp >= '0' && *cp <= '9'; cp++)
-			t = (t * 10) + (*cp - '0');
-		if (*cp != '\0')
-			continue;
-		if (tag != NULL)
-			*tag = t;
-		break;
-	}
-	IFNET_RUNLOCK_NOSLEEP();
+	strlcpy(ifname, name, IFNAMSIZ);
+	if ((cp = strchr(ifname, '.')) == NULL)
+		return (NULL);
+	*cp = '\0';
+	if ((ifp = ifunit(ifname)) == NULL)
+		return (NULL);
+	/* Parse VID. */
+	if (*++cp == '\0')
+		return (NULL);
+	vid = 0;
+	for(; *cp >= '0' && *cp <= '9'; cp++)
+		vid = (vid * 10) + (*cp - '0');
+	if (*cp != '\0')
+		return (NULL);
+	if (vidp != NULL)
+		*vidp = vid;
 
 	return (ifp);
 }
@@ -836,10 +865,10 @@ vlan_clone_match(struct if_clone *ifc, const char *name)
 {
 	const char *cp;
 
-	if (vlan_clone_match_ethertag(ifc, name, NULL) != NULL)
+	if (vlan_clone_match_ethervid(name, NULL) != NULL)
 		return (1);
 
-	if (strncmp(VLANNAME, name, strlen(VLANNAME)) != 0)
+	if (strncmp(vlanname, name, strlen(vlanname)) != 0)
 		return (0);
 	for (cp = name + 4; *cp != '\0'; cp++) {
 		if (*cp < '0' || *cp > '9')
@@ -856,7 +885,7 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 	int wildcard;
 	int unit;
 	int error;
-	int tag;
+	int vid;
 	int ethertag;
 	struct ifvlan *ifv;
 	struct ifnet *ifp;
@@ -873,7 +902,10 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 	 * o specify no parameters and get an unattached device that
 	 *   must be configured separately.
 	 * The first technique is preferred; the latter two are
-	 * supported for backwards compatibilty.
+	 * supported for backwards compatibility.
+	 *
+	 * XXXRW: Note historic use of the word "tag" here.  New ioctls may be
+	 * called for.
 	 */
 	if (params) {
 		error = copyin(params, &vlr, sizeof(vlr));
@@ -881,31 +913,18 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 			return error;
 		p = ifunit(vlr.vlr_parent);
 		if (p == NULL)
-			return ENXIO;
-		/*
-		 * Don't let the caller set up a VLAN tag with
-		 * anything except VLID bits.
-		 */
-		if (vlr.vlr_tag & ~EVL_VLID_MASK)
-			return (EINVAL);
+			return (ENXIO);
 		error = ifc_name2unit(name, &unit);
 		if (error != 0)
 			return (error);
 
 		ethertag = 1;
-		tag = vlr.vlr_tag;
+		vid = vlr.vlr_tag;
 		wildcard = (unit < 0);
-	} else if ((p = vlan_clone_match_ethertag(ifc, name, &tag)) != NULL) {
+	} else if ((p = vlan_clone_match_ethervid(name, &vid)) != NULL) {
 		ethertag = 1;
 		unit = -1;
 		wildcard = 0;
-
-		/*
-		 * Don't let the caller set up a VLAN tag with
-		 * anything except VLID bits.
-		 */
-		if (tag & ~EVL_VLID_MASK)
-			return (EINVAL);
 	} else {
 		ethertag = 0;
 
@@ -937,14 +956,13 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 		return (ENOSPC);
 	}
 	SLIST_INIT(&ifv->vlan_mc_listhead);
-
 	ifp->if_softc = ifv;
 	/*
 	 * Set the name manually rather than using if_initname because
 	 * we don't conform to the default naming convention for interfaces.
 	 */
 	strlcpy(ifp->if_xname, name, IFNAMSIZ);
-	ifp->if_dname = ifc->ifc_name;
+	ifp->if_dname = vlanname;
 	ifp->if_dunit = unit;
 	/* NB: flags are not set here */
 	ifp->if_linkmib = &ifv->ifv_mib;
@@ -966,7 +984,7 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 	sdl->sdl_type = IFT_L2VLAN;
 
 	if (ethertag) {
-		error = vlan_config(ifv, p, tag);
+		error = vlan_config(ifv, p, vid);
 		if (error != 0) {
 			/*
 			 * Since we've partially failed, we need to back
@@ -975,7 +993,7 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 			 */
 			ether_ifdetach(ifp);
 			vlan_unconfig(ifp);
-			if_free_type(ifp, IFT_ETHER);
+			if_free(ifp);
 			ifc_free_unit(ifc, unit);
 			free(ifv, M_VLAN);
 
@@ -997,7 +1015,7 @@ vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
 
 	ether_ifdetach(ifp);	/* first, remove it from system-wide lists */
 	vlan_unconfig(ifp);	/* now it can be unconfigured and freed */
-	if_free_type(ifp, IFT_ETHER);
+	if_free(ifp);
 	free(ifv, M_VLAN);
 	ifc_free_unit(ifc, unit);
 
@@ -1020,6 +1038,8 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ifvlan *ifv;
 	struct ifnet *p;
+	struct m_tag *mtag;
+	uint16_t tag;
 	int error, len, mcast;
 
 	ifv = ifp->if_softc;
@@ -1035,7 +1055,7 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
 	 */
 	if (!UP_AND_RUNNING(p)) {
 		m_freem(m);
-		ifp->if_oerrors++;
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		return (ENETDOWN);
 	}
 
@@ -1051,7 +1071,7 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
 	 * devices that just discard such runts instead or mishandle
 	 * them somehow.
 	 */
-	if (soft_pad && p->if_type == IFT_ETHER) {
+	if (V_soft_pad && p->if_type == IFT_ETHER) {
 		static char pad[8];	/* just zeros */
 		int n;
 
@@ -1062,7 +1082,7 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
 
 		if (n > 0) {
 			if_printf(ifp, "cannot pad short frame\n");
-			ifp->if_oerrors++;
+			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			m_freem(m);
 			return (0);
 		}
@@ -1075,14 +1095,19 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
 	 * knows how to find the VLAN tag to use, so we attach a
 	 * packet tag that holds it.
 	 */
+	if (vlan_mtag_pcp && (mtag = m_tag_locate(m, MTAG_8021Q,
+	    MTAG_8021Q_PCP_OUT, NULL)) != NULL)
+		tag = EVL_MAKETAG(ifv->ifv_vid, *(uint8_t *)(mtag + 1), 0);
+	else
+              tag = ifv->ifv_tag;
 	if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
-		m->m_pkthdr.ether_vtag = ifv->ifv_tag;
+		m->m_pkthdr.ether_vtag = tag;
 		m->m_flags |= M_VLANTAG;
 	} else {
-		m = ether_vlanencap(m, ifv->ifv_tag);
+		m = ether_vlanencap(m, tag);
 		if (m == NULL) {
 			if_printf(ifp, "unable to prepend VLAN header\n");
-			ifp->if_oerrors++;
+			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			return (0);
 		}
 	}
@@ -1091,12 +1116,12 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
 	 * Send it, precisely as ether_output() would have.
 	 */
 	error = (p->if_transmit)(p, m);
-	if (!error) {
-		ifp->if_opackets++;
-		ifp->if_omcasts += mcast;
-		ifp->if_obytes += len;
+	if (error == 0) {
+		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
+		if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast);
 	} else
-		ifp->if_oerrors++;
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	return (error);
 }
 
@@ -1113,7 +1138,9 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
 	struct ifvlan *ifv;
-	uint16_t tag;
+	TRUNK_LOCK_READER;
+	struct m_tag *mtag;
+	uint16_t vid, tag;
 
 	KASSERT(trunk != NULL, ("%s: no trunk", __func__));
 
@@ -1122,7 +1149,7 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
 		 * Packet is tagged, but m contains a normal
 		 * Ethernet frame; the tag is stored out-of-band.
 		 */
-		tag = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag);
+		tag = m->m_pkthdr.ether_vtag;
 		m->m_flags &= ~M_VLANTAG;
 	} else {
 		struct ether_vlan_header *evl;
@@ -1138,7 +1165,7 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
 				return;
 			}
 			evl = mtod(m, struct ether_vlan_header *);
-			tag = EVL_VLANOFTAG(ntohs(evl->evl_tag));
+			tag = ntohs(evl->evl_tag);
 
 			/*
 			 * Remove the 802.1q header by copying the Ethernet
@@ -1157,43 +1184,75 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
 			      __func__, ifp->if_xname, ifp->if_type);
 #endif
 			m_freem(m);
-			ifp->if_noproto++;
+			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			return;
 		}
 	}
 
+	vid = EVL_VLANOFTAG(tag);
+
 	TRUNK_RLOCK(trunk);
-	ifv = vlan_gethash(trunk, tag);
+	ifv = vlan_gethash(trunk, vid);
 	if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) {
 		TRUNK_RUNLOCK(trunk);
 		m_freem(m);
-		ifp->if_noproto++;
+		if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 		return;
 	}
 	TRUNK_RUNLOCK(trunk);
 
+	if (vlan_mtag_pcp) {
+		/*
+		 * While uncommon, it is possible that we will find a 802.1q
+		 * packet encapsulated inside another packet that also had an
+		 * 802.1q header.  For example, ethernet tunneled over IPSEC
+		 * arriving over ethernet.  In that case, we replace the
+		 * existing 802.1q PCP m_tag value.
+		 */
+		mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_IN, NULL);
+		if (mtag == NULL) {
+			mtag = m_tag_alloc(MTAG_8021Q, MTAG_8021Q_PCP_IN,
+			    sizeof(uint8_t), M_NOWAIT);
+			if (mtag == NULL) {
+				m_freem(m);
+				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+				return;
+			}
+			m_tag_prepend(m, mtag);
+		}
+		*(uint8_t *)(mtag + 1) = EVL_PRIOFTAG(tag);
+	}
+
 	m->m_pkthdr.rcvif = ifv->ifv_ifp;
-	ifv->ifv_ifp->if_ipackets++;
+	if_inc_counter(ifv->ifv_ifp, IFCOUNTER_IPACKETS, 1);
 
 	/* Pass it back through the parent's input routine. */
 	(*ifp->if_input)(ifv->ifv_ifp, m);
 }
 
 static int
-vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag)
+vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
 {
 	struct ifvlantrunk *trunk;
 	struct ifnet *ifp;
 	int error = 0;
 
-	/* VID numbers 0x0 and 0xFFF are reserved */
-	if (tag == 0 || tag == 0xFFF)
-		return (EINVAL);
+	/*
+	 * We can handle non-ethernet hardware types as long as
+	 * they handle the tagging and headers themselves.
+	 */
 	if (p->if_type != IFT_ETHER &&
 	    (p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
 		return (EPROTONOSUPPORT);
 	if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS)
 		return (EPROTONOSUPPORT);
+	/*
+	 * Don't let the caller set up a VLAN VID with
+	 * anything except VLID bits.
+	 * VID numbers 0x0 and 0xFFF are reserved.
+	 */
+	if (vid == 0 || vid == 0xFFF || (vid & ~EVL_VLID_MASK))
+		return (EINVAL);
 	if (ifv->ifv_trunk)
 		return (EBUSY);
 
@@ -1203,7 +1262,7 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag)
 		vlan_inithash(trunk);
 		VLAN_LOCK();
 		if (p->if_vlantrunk != NULL) {
-			/* A race that that is very unlikely to be hit. */
+			/* A race that is very unlikely to be hit. */
 			vlan_freehash(trunk);
 			free(trunk, M_VLAN);
 			goto exists;
@@ -1219,7 +1278,9 @@ exists:
 		TRUNK_LOCK(trunk);
 	}
 
-	ifv->ifv_tag = tag;	/* must set this before vlan_inshash() */
+	ifv->ifv_vid = vid;	/* must set this before vlan_inshash() */
+	ifv->ifv_pcp = 0;       /* Default: best effort delivery. */
+	vlan_tag_recalculate(ifv);
 	error = vlan_inshash(trunk, ifv);
 	if (error)
 		goto done;
@@ -1297,7 +1358,7 @@ exists:
 done:
 	TRUNK_UNLOCK(trunk);
 	if (error == 0)
-		EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_tag);
+		EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_vid);
 	VLAN_UNLOCK();
 
 	return (error);
@@ -1366,7 +1427,7 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing)
 		 * Check if we were the last.
 		 */
 		if (trunk->refcnt == 0) {
-			trunk->parent->if_vlantrunk = NULL;
+			parent->if_vlantrunk = NULL;
 			/*
 			 * XXXGL: If some ithread has already entered
 			 * vlan_input() and is now blocked on the trunk
@@ -1393,7 +1454,7 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing)
 	 * to cleanup anyway.
 	 */
 	if (parent != NULL)
-		EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_tag);
+		EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_vid);
 }
 
 /* Handle a reference counted flag that should be set on the parent as well */
@@ -1494,7 +1555,7 @@ vlan_capabilities(struct ifvlan *ifv)
 	    p->if_capenable & IFCAP_VLAN_HWTAGGING) {
 		ifp->if_capenable = p->if_capenable & IFCAP_HWCSUM;
 		ifp->if_hwassist = p->if_hwassist & (CSUM_IP | CSUM_TCP |
-		    CSUM_UDP | CSUM_SCTP | CSUM_FRAGMENT);
+		    CSUM_UDP | CSUM_SCTP);
 	} else {
 		ifp->if_capenable = 0;
 		ifp->if_hwassist = 0;
@@ -1562,6 +1623,7 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 	struct ifreq *ifr;
 	struct ifaddr *ifa;
 	struct ifvlan *ifv;
+	struct ifvlantrunk *trunk;
 	struct vlanreq vlr;
 	int error = 0;
 
@@ -1633,6 +1695,13 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 
 	case SIOCSETVLAN:
 #ifdef VIMAGE
+		/*
+		 * XXXRW/XXXBZ: The goal in these checks is to allow a VLAN
+		 * interface to be delegated to a jail without allowing the
+		 * jail to change what underlying interface/VID it is
+		 * associated with.  We are not entirely convinced that this
+		 * is the right way to accomplish that policy goal.
+		 */
 		if (ifp->if_vnet != ifp->if_home_vnet) {
 			error = EPERM;
 			break;
@@ -1650,14 +1719,6 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 			error = ENOENT;
 			break;
 		}
-		/*
-		 * Don't let the caller set up a VLAN tag with
-		 * anything except VLID bits.
-		 */
-		if (vlr.vlr_tag & ~EVL_VLID_MASK) {
-			error = EINVAL;
-			break;
-		}
 		error = vlan_config(ifv, p, vlr.vlr_tag);
 		if (error)
 			break;
@@ -1678,7 +1739,7 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		if (TRUNK(ifv) != NULL) {
 			strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname,
 			    sizeof(vlr.vlr_parent));
-			vlr.vlr_tag = ifv->ifv_tag;
+			vlr.vlr_tag = ifv->ifv_vid;
 		}
 		VLAN_UNLOCK();
 		error = copyout(&vlr, ifr->ifr_data, sizeof(vlr));
@@ -1699,8 +1760,40 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		 * If we don't have a parent, just remember the membership for
 		 * when we do.
 		 */
-		if (TRUNK(ifv) != NULL)
+		trunk = TRUNK(ifv);
+		if (trunk != NULL) {
+			TRUNK_LOCK(trunk);
 			error = vlan_setmulti(ifp);
+			TRUNK_UNLOCK(trunk);
+		}
+		break;
+
+	case SIOCGVLANPCP:
+#ifdef VIMAGE
+		if (ifp->if_vnet != ifp->if_home_vnet) {
+			error = EPERM;
+			break;
+		}
+#endif
+		ifr->ifr_vlan_pcp = ifv->ifv_pcp;
+		break;
+
+	case SIOCSVLANPCP:
+#ifdef VIMAGE
+		if (ifp->if_vnet != ifp->if_home_vnet) {
+			error = EPERM;
+			break;
+		}
+#endif
+		error = priv_check(curthread, PRIV_NET_SETVLANPCP);
+		if (error)
+			break;
+		if (ifr->ifr_vlan_pcp > 7) {
+			error = EINVAL;
+			break;
+		}
+		ifv->ifv_pcp = ifr->ifr_vlan_pcp;
+		vlan_tag_recalculate(ifv);
 		break;
 
 	default:
diff --git a/freebsd/sys/net/if_vlan_var.h b/freebsd/sys/net/if_vlan_var.h
index fd3fc4f3..6b20d142 100644
--- a/freebsd/sys/net/if_vlan_var.h
+++ b/freebsd/sys/net/if_vlan_var.h
@@ -32,22 +32,6 @@
 #ifndef _NET_IF_VLAN_VAR_H_
 #define	_NET_IF_VLAN_VAR_H_	1
 
-struct	ether_vlan_header {
-	u_char	evl_dhost[ETHER_ADDR_LEN];
-	u_char	evl_shost[ETHER_ADDR_LEN];
-	u_int16_t evl_encap_proto;
-	u_int16_t evl_tag;
-	u_int16_t evl_proto;
-};
-
-#define	EVL_VLID_MASK		0x0FFF
-#define	EVL_PRI_MASK		0xE000
-#define	EVL_VLANOFTAG(tag)	((tag) & EVL_VLID_MASK)
-#define	EVL_PRIOFTAG(tag)	(((tag) >> 13) & 7)
-#define	EVL_CFIOFTAG(tag)	(((tag) >> 12) & 1)
-#define	EVL_MAKETAG(vlid, pri, cfi)					\
-	((((((pri) & 7) << 1) | ((cfi) & 1)) << 12) | ((vlid) & EVL_VLID_MASK))
-
 /* Set the VLAN ID in an mbuf packet header non-destructively. */
 #define EVL_APPLY_VLID(m, vlid)						\
 	do {								\
@@ -89,6 +73,23 @@ struct	vlanreq {
 #define	SIOCSETVLAN	SIOCSIFGENERIC
 #define	SIOCGETVLAN	SIOCGIFGENERIC
 
+#define	SIOCGVLANPCP	_IOWR('i', 152, struct ifreq)	/* Get VLAN PCP */
+#define	SIOCSVLANPCP	 _IOW('i', 153, struct ifreq)	/* Set VLAN PCP */
+
+/*
+ * Names for 802.1q priorities ("802.1p").  Notice that in this scheme,
+ * (0 < 1), allowing default 0-tagged traffic to take priority over background
+ * tagged traffic.
+ */
+#define	IEEE8021Q_PCP_BK	1	/* Background (lowest) */
+#define	IEEE8021Q_PCP_BE	0	/* Best effort (default) */
+#define	IEEE8021Q_PCP_EE	2	/* Excellent effort */
+#define	IEEE8021Q_PCP_CA	3	/* Critical applications */
+#define	IEEE8021Q_PCP_VI	4	/* Video, < 100ms latency */
+#define	IEEE8021Q_PCP_VO	5	/* Video, < 10ms latency */
+#define	IEEE8021Q_PCP_IC	6	/* Internetwork control */
+#define	IEEE8021Q_PCP_NC	7	/* Network control (highest) */
+
 #ifdef _KERNEL
 /*
  * Drivers that are capable of adding and removing the VLAN header
@@ -108,7 +109,7 @@ struct	vlanreq {
  * received VLAN tag (containing both vlan and priority information)
  * into the ether_vtag mbuf packet header field:
  * 
- *	m->m_pkthdr.ether_vtag = vlan_id;	// ntohs()?
+ *	m->m_pkthdr.ether_vtag = vtag;		// ntohs()?
  *	m->m_flags |= M_VLANTAG;
  *
  * to mark the packet m with the specified VLAN tag.
@@ -126,6 +127,16 @@ struct	vlanreq {
  * if_capabilities.
  */
 
+/*
+ * The 802.1q code may also tag mbufs with the PCP (priority) field for use in
+ * other layers of the stack, in which case an m_tag will be used.  This is
+ * semantically quite different from use of the ether_vtag field, which is
+ * defined only between the device driver and VLAN layer.
+ */
+#define	MTAG_8021Q		1326104895
+#define	MTAG_8021Q_PCP_IN	0		/* Input priority. */
+#define	MTAG_8021Q_PCP_OUT	1		/* Output priority. */
+
 #define	VLAN_CAPABILITIES(_ifp) do {				\
 	if ((_ifp)->if_vlantrunk != NULL) 			\
 		(*vlan_trunk_cap_p)(_ifp);			\
@@ -133,15 +144,15 @@ struct	vlanreq {
 
 #define	VLAN_TRUNKDEV(_ifp)					\
 	(_ifp)->if_type == IFT_L2VLAN ? (*vlan_trunkdev_p)((_ifp)) : NULL
-#define	VLAN_TAG(_ifp, _tag)					\
-	(_ifp)->if_type == IFT_L2VLAN ? (*vlan_tag_p)((_ifp), (_tag)) : EINVAL
+#define	VLAN_TAG(_ifp, _vid)					\
+	(_ifp)->if_type == IFT_L2VLAN ? (*vlan_tag_p)((_ifp), (_vid)) : EINVAL
 #define	VLAN_COOKIE(_ifp)					\
 	(_ifp)->if_type == IFT_L2VLAN ? (*vlan_cookie_p)((_ifp)) : NULL
 #define	VLAN_SETCOOKIE(_ifp, _cookie)				\
 	(_ifp)->if_type == IFT_L2VLAN ?				\
 	    (*vlan_setcookie_p)((_ifp), (_cookie)) : EINVAL
-#define	VLAN_DEVAT(_ifp, _tag)					\
-	(_ifp)->if_vlantrunk != NULL ? (*vlan_devat_p)((_ifp), (_tag)) : NULL
+#define	VLAN_DEVAT(_ifp, _vid)					\
+	(_ifp)->if_vlantrunk != NULL ? (*vlan_devat_p)((_ifp), (_vid)) : NULL
 
 extern	void (*vlan_trunk_cap_p)(struct ifnet *);
 extern	struct ifnet *(*vlan_trunkdev_p)(struct ifnet *);
@@ -150,6 +161,14 @@ extern	int (*vlan_tag_p)(struct ifnet *, uint16_t *);
 extern	int (*vlan_setcookie_p)(struct ifnet *, void *);
 extern	void *(*vlan_cookie_p)(struct ifnet *);
 
+#ifdef _SYS_EVENTHANDLER_H_
+/* VLAN state change events */
+typedef void (*vlan_config_fn)(void *, struct ifnet *, uint16_t);
+typedef void (*vlan_unconfig_fn)(void *, struct ifnet *, uint16_t);
+EVENTHANDLER_DECLARE(vlan_config, vlan_config_fn);
+EVENTHANDLER_DECLARE(vlan_unconfig, vlan_unconfig_fn);
+#endif /* _SYS_EVENTHANDLER_H_ */
+
 #endif /* _KERNEL */
 
 #endif /* _NET_IF_VLAN_VAR_H_ */
diff --git a/freebsd/sys/net/ifq.h b/freebsd/sys/net/ifq.h
new file mode 100644
index 00000000..f0d206d8
--- /dev/null
+++ b/freebsd/sys/net/ifq.h
@@ -0,0 +1,484 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	From: @(#)if.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef	_NET_IFQ_H_
+#define	_NET_IFQ_H_
+
+#ifdef _KERNEL
+#include <sys/mbuf.h>		/* ifqueue only? */
+#include <sys/buf_ring.h>
+#include <net/vnet.h>
+#endif /* _KERNEL */
+#include <rtems/bsd/sys/lock.h>		/* XXX */
+#include <sys/mutex.h>		/* struct ifqueue */
+
+/*
+ * Couple of ugly extra definitions that are required since ifq.h
+ * is splitted from if_var.h.
+ */
+#define	IF_DUNIT_NONE	-1
+
+#include <net/altq/if_altq.h>
+
+/*
+ * Structure defining a queue for a network interface.
+ */
+struct	ifqueue {
+	struct	mbuf *ifq_head;
+	struct	mbuf *ifq_tail;
+	int	ifq_len;
+	int	ifq_maxlen;
+	struct	mtx ifq_mtx;
+};
+
+#ifdef _KERNEL
+/*
+ * Output queues (ifp->if_snd) and slow device input queues (*ifp->if_slowq)
+ * are queues of messages stored on ifqueue structures
+ * (defined above).  Entries are added to and deleted from these structures
+ * by these macros.
+ */
+#define IF_LOCK(ifq)		mtx_lock(&(ifq)->ifq_mtx)
+#define IF_UNLOCK(ifq)		mtx_unlock(&(ifq)->ifq_mtx)
+#define	IF_LOCK_ASSERT(ifq)	mtx_assert(&(ifq)->ifq_mtx, MA_OWNED)
+#define	_IF_QFULL(ifq)		((ifq)->ifq_len >= (ifq)->ifq_maxlen)
+#define	_IF_QLEN(ifq)		((ifq)->ifq_len)
+
+#define	_IF_ENQUEUE(ifq, m) do { 				\
+	(m)->m_nextpkt = NULL;					\
+	if ((ifq)->ifq_tail == NULL) 				\
+		(ifq)->ifq_head = m; 				\
+	else 							\
+		(ifq)->ifq_tail->m_nextpkt = m; 		\
+	(ifq)->ifq_tail = m; 					\
+	(ifq)->ifq_len++; 					\
+} while (0)
+
+#define IF_ENQUEUE(ifq, m) do {					\
+	IF_LOCK(ifq); 						\
+	_IF_ENQUEUE(ifq, m); 					\
+	IF_UNLOCK(ifq); 					\
+} while (0)
+
+#define	_IF_PREPEND(ifq, m) do {				\
+	(m)->m_nextpkt = (ifq)->ifq_head; 			\
+	if ((ifq)->ifq_tail == NULL) 				\
+		(ifq)->ifq_tail = (m); 				\
+	(ifq)->ifq_head = (m); 					\
+	(ifq)->ifq_len++; 					\
+} while (0)
+
+#define IF_PREPEND(ifq, m) do {		 			\
+	IF_LOCK(ifq); 						\
+	_IF_PREPEND(ifq, m); 					\
+	IF_UNLOCK(ifq); 					\
+} while (0)
+
+#define	_IF_DEQUEUE(ifq, m) do { 				\
+	(m) = (ifq)->ifq_head; 					\
+	if (m) { 						\
+		if (((ifq)->ifq_head = (m)->m_nextpkt) == NULL)	\
+			(ifq)->ifq_tail = NULL; 		\
+		(m)->m_nextpkt = NULL; 				\
+		(ifq)->ifq_len--; 				\
+	} 							\
+} while (0)
+
+#define IF_DEQUEUE(ifq, m) do { 				\
+	IF_LOCK(ifq); 						\
+	_IF_DEQUEUE(ifq, m); 					\
+	IF_UNLOCK(ifq); 					\
+} while (0)
+
+#define	_IF_DEQUEUE_ALL(ifq, m) do {				\
+	(m) = (ifq)->ifq_head;					\
+	(ifq)->ifq_head = (ifq)->ifq_tail = NULL;		\
+	(ifq)->ifq_len = 0;					\
+} while (0)
+
+#define	IF_DEQUEUE_ALL(ifq, m) do {				\
+	IF_LOCK(ifq); 						\
+	_IF_DEQUEUE_ALL(ifq, m);				\
+	IF_UNLOCK(ifq); 					\
+} while (0)
+
+#define	_IF_POLL(ifq, m)	((m) = (ifq)->ifq_head)
+#define	IF_POLL(ifq, m)		_IF_POLL(ifq, m)
+
+#define _IF_DRAIN(ifq) do { 					\
+	struct mbuf *m; 					\
+	for (;;) { 						\
+		_IF_DEQUEUE(ifq, m); 				\
+		if (m == NULL) 					\
+			break; 					\
+		m_freem(m); 					\
+	} 							\
+} while (0)
+
+#define IF_DRAIN(ifq) do {					\
+	IF_LOCK(ifq);						\
+	_IF_DRAIN(ifq);						\
+	IF_UNLOCK(ifq);						\
+} while(0)
+
+int	if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp,
+	    int adjust);
+#define	IF_HANDOFF(ifq, m, ifp)			\
+	if_handoff((struct ifqueue *)ifq, m, ifp, 0)
+#define	IF_HANDOFF_ADJ(ifq, m, ifp, adj)	\
+	if_handoff((struct ifqueue *)ifq, m, ifp, adj)
+
+void	if_start(struct ifnet *);
+
+#define	IFQ_ENQUEUE(ifq, m, err)					\
+do {									\
+	IF_LOCK(ifq);							\
+	if (ALTQ_IS_ENABLED(ifq))					\
+		ALTQ_ENQUEUE(ifq, m, NULL, err);			\
+	else {								\
+		if (_IF_QFULL(ifq)) {					\
+			m_freem(m);					\
+			(err) = ENOBUFS;				\
+		} else {						\
+			_IF_ENQUEUE(ifq, m);				\
+			(err) = 0;					\
+		}							\
+	}								\
+	IF_UNLOCK(ifq);							\
+} while (0)
+
+#define	IFQ_DEQUEUE_NOLOCK(ifq, m)					\
+do {									\
+	if (TBR_IS_ENABLED(ifq))					\
+		(m) = tbr_dequeue_ptr(ifq, ALTDQ_REMOVE);		\
+	else if (ALTQ_IS_ENABLED(ifq))					\
+		ALTQ_DEQUEUE(ifq, m);					\
+	else								\
+		_IF_DEQUEUE(ifq, m);					\
+} while (0)
+
+#define	IFQ_DEQUEUE(ifq, m)						\
+do {									\
+	IF_LOCK(ifq);							\
+	IFQ_DEQUEUE_NOLOCK(ifq, m);					\
+	IF_UNLOCK(ifq);							\
+} while (0)
+
+#define	IFQ_POLL_NOLOCK(ifq, m)						\
+do {									\
+	if (TBR_IS_ENABLED(ifq))					\
+		(m) = tbr_dequeue_ptr(ifq, ALTDQ_POLL);			\
+	else if (ALTQ_IS_ENABLED(ifq))					\
+		ALTQ_POLL(ifq, m);					\
+	else								\
+		_IF_POLL(ifq, m);					\
+} while (0)
+
+#define	IFQ_POLL(ifq, m)						\
+do {									\
+	IF_LOCK(ifq);							\
+	IFQ_POLL_NOLOCK(ifq, m);					\
+	IF_UNLOCK(ifq);							\
+} while (0)
+
+#define	IFQ_PURGE_NOLOCK(ifq)						\
+do {									\
+	if (ALTQ_IS_ENABLED(ifq)) {					\
+		ALTQ_PURGE(ifq);					\
+	} else								\
+		_IF_DRAIN(ifq);						\
+} while (0)
+
+#define	IFQ_PURGE(ifq)							\
+do {									\
+	IF_LOCK(ifq);							\
+	IFQ_PURGE_NOLOCK(ifq);						\
+	IF_UNLOCK(ifq);							\
+} while (0)
+
+#define	IFQ_SET_READY(ifq)						\
+	do { ((ifq)->altq_flags |= ALTQF_READY); } while (0)
+
+#define	IFQ_LOCK(ifq)			IF_LOCK(ifq)
+#define	IFQ_UNLOCK(ifq)			IF_UNLOCK(ifq)
+#define	IFQ_LOCK_ASSERT(ifq)		IF_LOCK_ASSERT(ifq)
+#define	IFQ_IS_EMPTY(ifq)		((ifq)->ifq_len == 0)
+#define	IFQ_INC_LEN(ifq)		((ifq)->ifq_len++)
+#define	IFQ_DEC_LEN(ifq)		(--(ifq)->ifq_len)
+#define	IFQ_SET_MAXLEN(ifq, len)	((ifq)->ifq_maxlen = (len))
+
+/*
+ * The IFF_DRV_OACTIVE test should really occur in the device driver, not in
+ * the handoff logic, as that flag is locked by the device driver.
+ */
+#define	IFQ_HANDOFF_ADJ(ifp, m, adj, err)				\
+do {									\
+	int len;							\
+	short mflags;							\
+									\
+	len = (m)->m_pkthdr.len;					\
+	mflags = (m)->m_flags;						\
+	IFQ_ENQUEUE(&(ifp)->if_snd, m, err);				\
+	if ((err) == 0) {						\
+		if_inc_counter((ifp), IFCOUNTER_OBYTES, len + (adj));	\
+		if (mflags & M_MCAST)					\
+			if_inc_counter((ifp), IFCOUNTER_OMCASTS, 1);	\
+		if (((ifp)->if_drv_flags & IFF_DRV_OACTIVE) == 0)	\
+			if_start(ifp);					\
+	} else								\
+		if_inc_counter((ifp), IFCOUNTER_OQDROPS, 1);		\
+} while (0)
+
+#define	IFQ_HANDOFF(ifp, m, err)					\
+	IFQ_HANDOFF_ADJ(ifp, m, 0, err)
+
+#define	IFQ_DRV_DEQUEUE(ifq, m)						\
+do {									\
+	(m) = (ifq)->ifq_drv_head;					\
+	if (m) {							\
+		if (((ifq)->ifq_drv_head = (m)->m_nextpkt) == NULL)	\
+			(ifq)->ifq_drv_tail = NULL;			\
+		(m)->m_nextpkt = NULL;					\
+		(ifq)->ifq_drv_len--;					\
+	} else {							\
+		IFQ_LOCK(ifq);						\
+		IFQ_DEQUEUE_NOLOCK(ifq, m);				\
+		while ((ifq)->ifq_drv_len < (ifq)->ifq_drv_maxlen) {	\
+			struct mbuf *m0;				\
+			IFQ_DEQUEUE_NOLOCK(ifq, m0);			\
+			if (m0 == NULL)					\
+				break;					\
+			m0->m_nextpkt = NULL;				\
+			if ((ifq)->ifq_drv_tail == NULL)		\
+				(ifq)->ifq_drv_head = m0;		\
+			else						\
+				(ifq)->ifq_drv_tail->m_nextpkt = m0;	\
+			(ifq)->ifq_drv_tail = m0;			\
+			(ifq)->ifq_drv_len++;				\
+		}							\
+		IFQ_UNLOCK(ifq);					\
+	}								\
+} while (0)
+
+#define	IFQ_DRV_PREPEND(ifq, m)						\
+do {									\
+	(m)->m_nextpkt = (ifq)->ifq_drv_head;				\
+	if ((ifq)->ifq_drv_tail == NULL)				\
+		(ifq)->ifq_drv_tail = (m);				\
+	(ifq)->ifq_drv_head = (m);					\
+	(ifq)->ifq_drv_len++;						\
+} while (0)
+
+#define	IFQ_DRV_IS_EMPTY(ifq)						\
+	(((ifq)->ifq_drv_len == 0) && ((ifq)->ifq_len == 0))
+
+#define	IFQ_DRV_PURGE(ifq)						\
+do {									\
+	struct mbuf *m, *n = (ifq)->ifq_drv_head;			\
+	while((m = n) != NULL) {					\
+		n = m->m_nextpkt;					\
+		m_freem(m);						\
+	}								\
+	(ifq)->ifq_drv_head = (ifq)->ifq_drv_tail = NULL;		\
+	(ifq)->ifq_drv_len = 0;						\
+	IFQ_PURGE(ifq);							\
+} while (0)
+
+static __inline int
+drbr_enqueue(struct ifnet *ifp, struct buf_ring *br, struct mbuf *m)
+{	
+	int error = 0;
+
+#ifdef ALTQ
+	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
+		IFQ_ENQUEUE(&ifp->if_snd, m, error);
+		if (error)
+			if_inc_counter((ifp), IFCOUNTER_OQDROPS, 1);
+		return (error);
+	}
+#endif
+	error = buf_ring_enqueue(br, m);
+	if (error)
+		m_freem(m);
+
+	return (error);
+}
+
+static __inline void
+drbr_putback(struct ifnet *ifp, struct buf_ring *br, struct mbuf *new)
+{
+	/*
+	 * The top of the list needs to be swapped 
+	 * for this one.
+	 */
+#ifdef ALTQ
+	if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
+		/* 
+		 * Peek in altq case dequeued it
+		 * so put it back.
+		 */
+		IFQ_DRV_PREPEND(&ifp->if_snd, new);
+		return;
+	}
+#endif
+	buf_ring_putback_sc(br, new);
+}
+
+static __inline struct mbuf *
+drbr_peek(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+	struct mbuf *m;
+	if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
+		/* 
+		 * Pull it off like a dequeue
+		 * since drbr_advance() does nothing
+		 * for altq and drbr_putback() will
+		 * use the old prepend function.
+		 */
+		IFQ_DEQUEUE(&ifp->if_snd, m);
+		return (m);
+	}
+#endif
+	return(buf_ring_peek_clear_sc(br));
+}
+
+static __inline void
+drbr_flush(struct ifnet *ifp, struct buf_ring *br)
+{
+	struct mbuf *m;
+
+#ifdef ALTQ
+	if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd))
+		IFQ_PURGE(&ifp->if_snd);
+#endif	
+	while ((m = buf_ring_dequeue_sc(br)) != NULL)
+		m_freem(m);
+}
+
+static __inline void
+drbr_free(struct buf_ring *br, struct malloc_type *type)
+{
+
+	drbr_flush(NULL, br);
+	buf_ring_free(br, type);
+}
+
+static __inline struct mbuf *
+drbr_dequeue(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+	struct mbuf *m;
+
+	if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {	
+		IFQ_DEQUEUE(&ifp->if_snd, m);
+		return (m);
+	}
+#endif
+	return (buf_ring_dequeue_sc(br));
+}
+
+static __inline void
+drbr_advance(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+	/* Nothing to do here since peek dequeues in altq case */
+	if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd))
+		return;
+#endif
+	return (buf_ring_advance_sc(br));
+}
+
+
+static __inline struct mbuf *
+drbr_dequeue_cond(struct ifnet *ifp, struct buf_ring *br,
+    int (*func) (struct mbuf *, void *), void *arg) 
+{
+	struct mbuf *m;
+#ifdef ALTQ
+	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
+		IFQ_LOCK(&ifp->if_snd);
+		IFQ_POLL_NOLOCK(&ifp->if_snd, m);
+		if (m != NULL && func(m, arg) == 0) {
+			IFQ_UNLOCK(&ifp->if_snd);
+			return (NULL);
+		}
+		IFQ_DEQUEUE_NOLOCK(&ifp->if_snd, m);
+		IFQ_UNLOCK(&ifp->if_snd);
+		return (m);
+	}
+#endif
+	m = buf_ring_peek(br);
+	if (m == NULL || func(m, arg) == 0)
+		return (NULL);
+
+	return (buf_ring_dequeue_sc(br));
+}
+
+static __inline int
+drbr_empty(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+	if (ALTQ_IS_ENABLED(&ifp->if_snd))
+		return (IFQ_IS_EMPTY(&ifp->if_snd));
+#endif
+	return (buf_ring_empty(br));
+}
+
+static __inline int
+drbr_needs_enqueue(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+	if (ALTQ_IS_ENABLED(&ifp->if_snd))
+		return (1);
+#endif
+	return (!buf_ring_empty(br));
+}
+
+static __inline int
+drbr_inuse(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+	if (ALTQ_IS_ENABLED(&ifp->if_snd))
+		return (ifp->if_snd.ifq_len);
+#endif
+	return (buf_ring_count(br));
+}
+
+extern	int ifqmaxlen;
+
+void	if_qflush(struct ifnet *);
+void	ifq_init(struct ifaltq *, struct ifnet *ifp);
+void	ifq_delete(struct ifaltq *);
+
+#endif /* _KERNEL */
+#endif /* !_NET_IFQ_H_ */
diff --git a/freebsd/sys/net/iso88025.h b/freebsd/sys/net/iso88025.h
index 6edd2e0b..11bd6ec4 100644
--- a/freebsd/sys/net/iso88025.h
+++ b/freebsd/sys/net/iso88025.h
@@ -162,11 +162,13 @@ struct	iso88025_addr {
 #define	ISO88025_BPF_UNSUPPORTED	0
 #define	ISO88025_BPF_SUPPORTED		1
 
+#ifdef _KERNEL
 void	iso88025_ifattach	(struct ifnet *, const u_int8_t *, int);
 void	iso88025_ifdetach	(struct ifnet *, int);
 int	iso88025_ioctl		(struct ifnet *, u_long, caddr_t );
-int	iso88025_output		(struct ifnet *, struct mbuf *, struct sockaddr *,
-    				 struct route *);
+int	iso88025_output		(struct ifnet *, struct mbuf *,
+				 const struct sockaddr *, struct route *);
 void	iso88025_input		(struct ifnet *, struct mbuf *);
+#endif	/* _KERNEL */
 
-#endif
+#endif	/* !_NET_ISO88025_H_ */
diff --git a/freebsd/sys/net/netisr.c b/freebsd/sys/net/netisr.c
index f43cffa1..f14b2e95 100644
--- a/freebsd/sys/net/netisr.c
+++ b/freebsd/sys/net/netisr.c
@@ -72,6 +72,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
+#include <sys/malloc.h>
 #include <sys/interrupt.h>
 #include <rtems/bsd/sys/lock.h>
 #include <sys/mbuf.h>
@@ -131,7 +132,7 @@ static SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr");
 /*-
  * Three global direct dispatch policies are supported:
  *
- * NETISR_DISPATCH_QUEUED: All work is deferred for a netisr, regardless of
+ * NETISR_DISPATCH_DEFERRED: All work is deferred for a netisr, regardless of
  * context (may be overriden by protocols).
  *
  * NETISR_DISPATCH_HYBRID: If the executing context allows direct dispatch,
@@ -151,37 +152,25 @@ static SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr");
 #define	NETISR_DISPATCH_POLICY_MAXSTR	20 /* Used for temporary buffers. */
 static u_int	netisr_dispatch_policy = NETISR_DISPATCH_POLICY_DEFAULT;
 static int	sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS);
-SYSCTL_PROC(_net_isr, OID_AUTO, dispatch, CTLTYPE_STRING | CTLFLAG_RW |
-    CTLFLAG_TUN, 0, 0, sysctl_netisr_dispatch_policy, "A",
+SYSCTL_PROC(_net_isr, OID_AUTO, dispatch, CTLTYPE_STRING | CTLFLAG_RWTUN,
+    0, 0, sysctl_netisr_dispatch_policy, "A",
     "netisr dispatch policy");
 
 /*
- * These sysctls were used in previous versions to control and export
- * dispatch policy state.  Now, we provide read-only export via them so that
- * older netstat binaries work.  At some point they can be garbage collected.
- */
-static int	netisr_direct_force;
-SYSCTL_INT(_net_isr, OID_AUTO, direct_force, CTLFLAG_RD,
-    &netisr_direct_force, 0, "compat: force direct dispatch");
-
-static int	netisr_direct;
-SYSCTL_INT(_net_isr, OID_AUTO, direct, CTLFLAG_RD, &netisr_direct, 0,
-    "compat: enable direct dispatch");
-
-/*
  * Allow the administrator to limit the number of threads (CPUs) to use for
  * netisr.  We don't check netisr_maxthreads before creating the thread for
- * CPU 0, so in practice we ignore values <= 1.  This must be set at boot.
- * We will create at most one thread per CPU.
+ * CPU 0. This must be set at boot. We will create at most one thread per CPU.
+ * By default we initialize this to 1 which would assign just 1 cpu (cpu0) and
+ * therefore only 1 workstream. If set to -1, netisr would use all cpus
+ * (mp_ncpus) and therefore would have those many workstreams. One workstream
+ * per thread (CPU).
  */
-static int	netisr_maxthreads = -1;		/* Max number of threads. */
-TUNABLE_INT("net.isr.maxthreads", &netisr_maxthreads);
+static int	netisr_maxthreads = 1;		/* Max number of threads. */
 SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RDTUN,
     &netisr_maxthreads, 0,
     "Use at most this many CPUs for netisr processing");
 
 static int	netisr_bindthreads = 0;		/* Bind threads to CPUs. */
-TUNABLE_INT("net.isr.bindthreads", &netisr_bindthreads);
 SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RDTUN,
     &netisr_bindthreads, 0, "Bind netisr threads to CPUs.");
 
@@ -192,7 +181,6 @@ SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RDTUN,
  */
 #define	NETISR_DEFAULT_MAXQLIMIT	10240
 static u_int	netisr_maxqlimit = NETISR_DEFAULT_MAXQLIMIT;
-TUNABLE_INT("net.isr.maxqlimit", &netisr_maxqlimit);
 SYSCTL_UINT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RDTUN,
     &netisr_maxqlimit, 0,
     "Maximum netisr per-protocol, per-CPU queue depth.");
@@ -204,7 +192,6 @@ SYSCTL_UINT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RDTUN,
  */
 #define	NETISR_DEFAULT_DEFAULTQLIMIT	256
 static u_int	netisr_defaultqlimit = NETISR_DEFAULT_DEFAULTQLIMIT;
-TUNABLE_INT("net.isr.defaultqlimit", &netisr_defaultqlimit);
 SYSCTL_UINT(_net_isr, OID_AUTO, defaultqlimit, CTLFLAG_RDTUN,
     &netisr_defaultqlimit, 0,
     "Default netisr per-protocol, per-CPU queue limit if not set by protocol");
@@ -225,6 +212,23 @@ SYSCTL_UINT(_net_isr, OID_AUTO, maxprot, CTLFLAG_RD,
  */
 static struct netisr_proto	netisr_proto[NETISR_MAXPROT];
 
+#ifdef VIMAGE
+/*
+ * The netisr_enable array describes a per-VNET flag for registered
+ * protocols on whether this netisr is active in this VNET or not.
+ * netisr_register() will automatically enable the netisr for the
+ * default VNET and all currently active instances.
+ * netisr_unregister() will disable all active VNETs, including vnet0.
+ * Individual network stack instances can be enabled/disabled by the
+ * netisr_(un)register _vnet() functions.
+ * With this we keep the one netisr_proto per protocol but add a
+ * mechanism to stop netisr processing for vnet teardown.
+ * Apart from that we expect a VNET to always be enabled.
+ */
+static VNET_DEFINE(u_int,	netisr_enable[NETISR_MAXPROT]);
+#define	V_netisr_enable		VNET(netisr_enable)
+#endif
+
 #ifndef __rtems__
 /*
  * Per-CPU workstream data.  See netisr_internal.h for more details.
@@ -275,10 +279,7 @@ u_int
 netisr_get_cpuid(u_int cpunumber)
 {
 
-	KASSERT(cpunumber < nws_count, ("%s: %u > %u", __func__, cpunumber,
-	    nws_count));
-
-	return (nws_array[cpunumber]);
+	return (nws_array[cpunumber % nws_count]);
 }
 
 /*
@@ -308,8 +309,6 @@ static const struct netisr_dispatch_table_entry netisr_dispatch_table[] = {
 	{ NETISR_DISPATCH_HYBRID, "hybrid" },
 	{ NETISR_DISPATCH_DIRECT, "direct" },
 };
-static const u_int netisr_dispatch_table_len =
-    (sizeof(netisr_dispatch_table) / sizeof(netisr_dispatch_table[0]));
 
 static void
 netisr_dispatch_policy_to_str(u_int dispatch_policy, char *buffer,
@@ -320,7 +319,7 @@ netisr_dispatch_policy_to_str(u_int dispatch_policy, char *buffer,
 	u_int i;
 
 	str = "unknown";
-	for (i = 0; i < netisr_dispatch_table_len; i++) {
+	for (i = 0; i < nitems(netisr_dispatch_table); i++) {
 		ndtep = &netisr_dispatch_table[i];
 		if (ndtep->ndte_policy == dispatch_policy) {
 			str = ndtep->ndte_policy_str;
@@ -336,7 +335,7 @@ netisr_dispatch_policy_from_str(const char *str, u_int *dispatch_policyp)
 	const struct netisr_dispatch_table_entry *ndtep;
 	u_int i;
 
-	for (i = 0; i < netisr_dispatch_table_len; i++) {
+	for (i = 0; i < nitems(netisr_dispatch_table); i++) {
 		ndtep = &netisr_dispatch_table[i];
 		if (strcmp(ndtep->ndte_policy_str, str) == 0) {
 			*dispatch_policyp = ndtep->ndte_policy;
@@ -346,32 +345,6 @@ netisr_dispatch_policy_from_str(const char *str, u_int *dispatch_policyp)
 	return (EINVAL);
 }
 
-static void
-netisr_dispatch_policy_compat(void)
-{
-
-	switch (netisr_dispatch_policy) {
-	case NETISR_DISPATCH_DEFERRED:
-		netisr_direct_force = 0;
-		netisr_direct = 0;
-		break;
-
-	case NETISR_DISPATCH_HYBRID:
-		netisr_direct_force = 0;
-		netisr_direct = 1;
-		break;
-
-	case NETISR_DISPATCH_DIRECT:
-		netisr_direct_force = 1;
-		netisr_direct = 1;
-		break;
-
-	default:
-		panic("%s: unknown policy %u", __func__,
-		    netisr_dispatch_policy);
-	}
-}
-
 static int
 sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS)
 {
@@ -387,10 +360,8 @@ sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS)
 		    &dispatch_policy);
 		if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT)
 			error = EINVAL;
-		if (error == 0) {
+		if (error == 0)
 			netisr_dispatch_policy = dispatch_policy;
-			netisr_dispatch_policy_compat();
-		}
 	}
 	return (error);
 }
@@ -403,6 +374,7 @@ sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS)
 void
 netisr_register(const struct netisr_handler *nhp)
 {
+	VNET_ITERATOR_DECL(vnet_iter);
 	struct netisr_work *npwp;
 	const char *name;
 	u_int i, proto;
@@ -475,6 +447,22 @@ netisr_register(const struct netisr_handler *nhp)
 		bzero(npwp, sizeof(*npwp));
 		npwp->nw_qlimit = netisr_proto[proto].np_qlimit;
 	}
+
+#ifdef VIMAGE
+	/*
+	 * Test that we are in vnet0 and have a curvnet set.
+	 */
+	KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__));
+	KASSERT(IS_DEFAULT_VNET(curvnet), ("%s: curvnet %p is not vnet0 %p",
+	    __func__, curvnet, vnet0));
+	VNET_LIST_RLOCK_NOSLEEP();
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter);
+		V_netisr_enable[proto] = 1;
+		CURVNET_RESTORE();
+	}
+	VNET_LIST_RUNLOCK_NOSLEEP();
+#endif
 	NETISR_WUNLOCK();
 }
 
@@ -651,6 +639,7 @@ netisr_drain_proto(struct netisr_work *npwp)
 void
 netisr_unregister(const struct netisr_handler *nhp)
 {
+	VNET_ITERATOR_DECL(vnet_iter);
 	struct netisr_work *npwp;
 #ifdef INVARIANTS
 	const char *name;
@@ -669,6 +658,16 @@ netisr_unregister(const struct netisr_handler *nhp)
 	    ("%s(%u): protocol not registered for %s", __func__, proto,
 	    name));
 
+#ifdef VIMAGE
+	VNET_LIST_RLOCK_NOSLEEP();
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter);
+		V_netisr_enable[proto] = 0;
+		CURVNET_RESTORE();
+	}
+	VNET_LIST_RUNLOCK_NOSLEEP();
+#endif
+
 	netisr_proto[proto].np_name = NULL;
 	netisr_proto[proto].np_handler = NULL;
 	netisr_proto[proto].np_m2flow = NULL;
@@ -687,6 +686,97 @@ netisr_unregister(const struct netisr_handler *nhp)
 	NETISR_WUNLOCK();
 }
 
+#ifdef VIMAGE
+void
+netisr_register_vnet(const struct netisr_handler *nhp)
+{
+	u_int proto;
+
+	proto = nhp->nh_proto;
+
+	KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__));
+	KASSERT(proto < NETISR_MAXPROT,
+	    ("%s(%u): protocol too big for %s", __func__, proto, nhp->nh_name));
+	NETISR_WLOCK();
+	KASSERT(netisr_proto[proto].np_handler != NULL,
+	    ("%s(%u): protocol not registered for %s", __func__, proto,
+	    nhp->nh_name));
+	
+	V_netisr_enable[proto] = 1;
+	NETISR_WUNLOCK();
+}
+
+static void
+netisr_drain_proto_vnet(struct vnet *vnet, u_int proto)
+{
+	struct netisr_workstream *nwsp;
+	struct netisr_work *npwp;
+	struct mbuf *m, *mp, *n, *ne;
+	u_int i;
+
+	KASSERT(vnet != NULL, ("%s: vnet is NULL", __func__));
+	NETISR_LOCK_ASSERT();
+
+	CPU_FOREACH(i) {
+		nwsp = DPCPU_ID_PTR(i, nws);
+		if (nwsp->nws_intr_event == NULL)
+			continue;
+		npwp = &nwsp->nws_work[proto];
+		NWS_LOCK(nwsp);
+
+		/*
+		 * Rather than dissecting and removing mbufs from the middle
+		 * of the chain, we build a new chain if the packet stays and
+		 * update the head and tail pointers at the end.  All packets
+		 * matching the given vnet are freed.
+		 */
+		m = npwp->nw_head;
+		n = ne = NULL;
+		while (m != NULL) {
+			mp = m;
+			m = m->m_nextpkt;
+			mp->m_nextpkt = NULL;
+			if (mp->m_pkthdr.rcvif->if_vnet != vnet) {
+				if (n == NULL) {
+					n = ne = mp;
+				} else {
+					ne->m_nextpkt = mp;
+					ne = mp;
+				}
+				continue;
+			}
+			/* This is a packet in the selected vnet. Free it. */
+			npwp->nw_len--;
+			m_freem(mp);
+		}
+		npwp->nw_head = n;
+		npwp->nw_tail = ne;
+		NWS_UNLOCK(nwsp);
+	}
+}
+
+void
+netisr_unregister_vnet(const struct netisr_handler *nhp)
+{
+	u_int proto;
+
+	proto = nhp->nh_proto;
+
+	KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__));
+	KASSERT(proto < NETISR_MAXPROT,
+	    ("%s(%u): protocol too big for %s", __func__, proto, nhp->nh_name));
+	NETISR_WLOCK();
+	KASSERT(netisr_proto[proto].np_handler != NULL,
+	    ("%s(%u): protocol not registered for %s", __func__, proto,
+	    nhp->nh_name));
+	
+	V_netisr_enable[proto] = 0;
+
+	netisr_drain_proto_vnet(curvnet, proto);
+	NETISR_WUNLOCK();
+}
+#endif
+
 /*
  * Compose the global and per-protocol policies on dispatch, and return the
  * dispatch policy to use.
@@ -746,22 +836,25 @@ netisr_select_cpuid(struct netisr_proto *npp, u_int dispatch_policy,
 		 * dispatch.  In the queued case, fall back on the SOURCE
 		 * policy.
 		 */
-		if (*cpuidp != NETISR_CPUID_NONE)
+		if (*cpuidp != NETISR_CPUID_NONE) {
+			*cpuidp = netisr_get_cpuid(*cpuidp);
 			return (m);
+		}
 		if (dispatch_policy == NETISR_DISPATCH_HYBRID) {
-			*cpuidp = curcpu;
+			*cpuidp = netisr_get_cpuid(curcpu);
 			return (m);
 		}
 		policy = NETISR_POLICY_SOURCE;
 	}
 
 	if (policy == NETISR_POLICY_FLOW) {
-		if (!(m->m_flags & M_FLOWID) && npp->np_m2flow != NULL) {
+		if (M_HASHTYPE_GET(m) == M_HASHTYPE_NONE &&
+		    npp->np_m2flow != NULL) {
 			m = npp->np_m2flow(m, source);
 			if (m == NULL)
 				return (NULL);
 		}
-		if (m->m_flags & M_FLOWID) {
+		if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
 			*cpuidp =
 			    netisr_default_flow2cpu(m->m_pkthdr.flowid);
 			return (m);
@@ -984,6 +1077,13 @@ netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m)
 	KASSERT(netisr_proto[proto].np_handler != NULL,
 	    ("%s: invalid proto %u", __func__, proto));
 
+#ifdef VIMAGE
+	if (V_netisr_enable[proto] == 0) {
+		m_freem(m);
+		return (ENOPROTOOPT);
+	}
+#endif
+
 	m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_DEFERRED,
 	    source, m, &cpuid);
 	if (m != NULL) {
@@ -1030,6 +1130,13 @@ netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m)
 	KASSERT(npp->np_handler != NULL, ("%s: invalid proto %u", __func__,
 	    proto));
 
+#ifdef VIMAGE
+	if (V_netisr_enable[proto] == 0) {
+		m_freem(m);
+		return (ENOPROTOOPT);
+	}
+#endif
+
 	dispatch_policy = netisr_get_dispatch(npp);
 	if (dispatch_policy == NETISR_DISPATCH_DEFERRED)
 		return (netisr_queue_src(proto, source, m));
@@ -1215,15 +1322,15 @@ netisr_start_swi(u_int cpuid, struct pcpu *pc)
 static void
 netisr_init(void *arg)
 {
-	char tmp[NETISR_DISPATCH_POLICY_MAXSTR];
-	u_int dispatch_policy;
-	int error;
-
-	KASSERT(curcpu == 0, ("%s: not on CPU 0", __func__));
+#ifdef EARLY_AP_STARTUP
+	struct pcpu *pc;
+#endif
 
 	NETISR_LOCK_INIT();
-	if (netisr_maxthreads < 1)
-		netisr_maxthreads = 1;
+	if (netisr_maxthreads == 0 || netisr_maxthreads < -1 )
+		netisr_maxthreads = 1;		/* default behavior */
+	else if (netisr_maxthreads == -1)
+		netisr_maxthreads = mp_ncpus;	/* use max cpus */
 	if (netisr_maxthreads > mp_ncpus) {
 		printf("netisr_init: forcing maxthreads from %d to %d\n",
 		    netisr_maxthreads, mp_ncpus);
@@ -1248,31 +1355,24 @@ netisr_init(void *arg)
 	}
 #endif
 
-#ifndef __rtems__
-	if (TUNABLE_STR_FETCH("net.isr.dispatch", tmp, sizeof(tmp))) {
-		error = netisr_dispatch_policy_from_str(tmp,
-		    &dispatch_policy);
-		if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT)
-			error = EINVAL;
-		if (error == 0) {
-			netisr_dispatch_policy = dispatch_policy;
-			netisr_dispatch_policy_compat();
-		} else
-			printf(
-			    "%s: invalid dispatch policy %s, using default\n",
-			    __func__, tmp);
+#ifdef EARLY_AP_STARTUP
+	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
+		if (nws_count >= netisr_maxthreads)
+			break;
+		netisr_start_swi(pc->pc_cpuid, pc);
 	}
-#endif /* __rtems__ */
-
+#else
 #ifndef __rtems__
 	netisr_start_swi(curcpu, pcpu_find(curcpu));
 #else /* __rtems__ */
 	netisr_start_swi(0, NULL);
 #endif /* __rtems__ */
+#endif
 }
 SYSINIT(netisr_init, SI_SUB_SOFTINTR, SI_ORDER_FIRST, netisr_init, NULL);
 
 #ifndef __rtems__
+#ifndef EARLY_AP_STARTUP
 /*
  * Start worker threads for additional CPUs.  No attempt to gracefully handle
  * work reassignment, we don't yet support dynamic reconfiguration.
@@ -1285,9 +1385,6 @@ netisr_start(void *arg)
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 		if (nws_count >= netisr_maxthreads)
 			break;
-		/* XXXRW: Is skipping absent CPUs still required here? */
-		if (CPU_ABSENT(pc->pc_cpuid))
-			continue;
 		/* Worker will already be present for boot CPU. */
 		if (pc->pc_netisr != NULL)
 			continue;
@@ -1295,6 +1392,7 @@ netisr_start(void *arg)
 	}
 }
 SYSINIT(netisr_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr_start, NULL);
+#endif
 #endif /* __rtems__ */
 
 /*
diff --git a/freebsd/sys/net/netisr.h b/freebsd/sys/net/netisr.h
index 83bf9ce5..63764a74 100644
--- a/freebsd/sys/net/netisr.h
+++ b/freebsd/sys/net/netisr.h
@@ -52,15 +52,13 @@
 #define	NETISR_IP	1
 #define	NETISR_IGMP	2		/* IGMPv3 output queue */
 #define	NETISR_ROUTE	3		/* routing socket */
-#define	NETISR_AARP	4		/* Appletalk ARP */
-#define	NETISR_ATALK2	5		/* Appletalk phase 2 */
-#define	NETISR_ATALK1	6		/* Appletalk phase 1 */
-#define	NETISR_ARP	7		/* same as AF_LINK */
-#define	NETISR_IPX	8		/* same as AF_IPX */
-#define	NETISR_ETHER	9		/* ethernet input */
-#define	NETISR_IPV6	10
-#define	NETISR_NATM	11
-#define	NETISR_EPAIR	12		/* if_epair(4) */
+#define	NETISR_ARP	4		/* same as AF_LINK */
+#define	NETISR_ETHER	5		/* ethernet input */
+#define	NETISR_IPV6	6
+#define	NETISR_NATM	7
+#define	NETISR_EPAIR	8		/* if_epair(4) */
+#define	NETISR_IP_DIRECT	9	/* direct-dispatch IPv4 */
+#define	NETISR_IPV6_DIRECT	10	/* direct-dispatch IPv6 */
 
 /*
  * Protocol ordering and affinity policy constants.  See the detailed
@@ -212,6 +210,10 @@ void	netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp);
 void	netisr_register(const struct netisr_handler *nhp);
 int	netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit);
 void	netisr_unregister(const struct netisr_handler *nhp);
+#ifdef VIMAGE
+void	netisr_register_vnet(const struct netisr_handler *nhp);
+void	netisr_unregister_vnet(const struct netisr_handler *nhp);
+#endif
 
 /*
  * Process a packet destined for a protocol, and attempt direct dispatch.
diff --git a/freebsd/sys/net/pfil.c b/freebsd/sys/net/pfil.c
index 123d03c4..7fcecc88 100644
--- a/freebsd/sys/net/pfil.c
+++ b/freebsd/sys/net/pfil.c
@@ -47,6 +47,7 @@
 #include <sys/queue.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/pfil.h>
 
 static struct mtx pfil_global_lock;
@@ -54,18 +55,18 @@ static struct mtx pfil_global_lock;
 MTX_SYSINIT(pfil_heads_lock, &pfil_global_lock, "pfil_head_list lock",
   MTX_DEF);
 
-static int pfil_list_add(pfil_list_t *, struct packet_filter_hook *, int);
-
-static int pfil_list_remove(pfil_list_t *,
-    int (*)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *),
-    void *);
+static struct packet_filter_hook *pfil_chain_get(int, struct pfil_head *);
+static int pfil_chain_add(pfil_chain_t *, struct packet_filter_hook *, int);
+static int pfil_chain_remove(pfil_chain_t *, pfil_func_t, void *);
 
 LIST_HEAD(pfilheadhead, pfil_head);
 VNET_DEFINE(struct pfilheadhead, pfil_head_list);
 #define	V_pfil_head_list	VNET(pfil_head_list)
+VNET_DEFINE(struct rmlock, pfil_lock);
+#define	V_pfil_lock	VNET(pfil_lock)
 
 /*
- * pfil_run_hooks() runs the specified packet filter hooks.
+ * pfil_run_hooks() runs the specified packet filter hook chain.
  */
 int
 pfil_run_hooks(struct pfil_head *ph, struct mbuf **mp, struct ifnet *ifp,
@@ -78,8 +79,8 @@ pfil_run_hooks(struct pfil_head *ph, struct mbuf **mp, struct ifnet *ifp,
 
 	PFIL_RLOCK(ph, &rmpt);
 	KASSERT(ph->ph_nhooks >= 0, ("Pfil hook count dropped < 0"));
-	for (pfh = pfil_hook_get(dir, ph); pfh != NULL;
-	     pfh = TAILQ_NEXT(pfh, pfil_link)) {
+	for (pfh = pfil_chain_get(dir, ph); pfh != NULL;
+	     pfh = TAILQ_NEXT(pfh, pfil_chain)) {
 		if (pfh->pfil_func != NULL) {
 			rv = (*pfh->pfil_func)(pfh->pfil_arg, &m, ifp, dir,
 			    inp);
@@ -92,6 +93,80 @@ pfil_run_hooks(struct pfil_head *ph, struct mbuf **mp, struct ifnet *ifp,
 	return (rv);
 }
 
+static struct packet_filter_hook *
+pfil_chain_get(int dir, struct pfil_head *ph)
+{
+
+	if (dir == PFIL_IN)
+		return (TAILQ_FIRST(&ph->ph_in));
+	else if (dir == PFIL_OUT)
+		return (TAILQ_FIRST(&ph->ph_out));
+	else
+		return (NULL);
+}
+
+/*
+ * pfil_try_rlock() acquires rm reader lock for specified head
+ * if this is immediately possible.
+ */
+int
+pfil_try_rlock(struct pfil_head *ph, struct rm_priotracker *tracker)
+{
+
+	return (PFIL_TRY_RLOCK(ph, tracker));
+}
+
+/*
+ * pfil_rlock() acquires rm reader lock for specified head.
+ */
+void
+pfil_rlock(struct pfil_head *ph, struct rm_priotracker *tracker)
+{
+
+	PFIL_RLOCK(ph, tracker);
+}
+
+/*
+ * pfil_runlock() releases reader lock for specified head.
+ */
+void
+pfil_runlock(struct pfil_head *ph, struct rm_priotracker *tracker)
+{
+
+	PFIL_RUNLOCK(ph, tracker);
+}
+
+/*
+ * pfil_wlock() acquires writer lock for specified head.
+ */
+void
+pfil_wlock(struct pfil_head *ph)
+{
+
+	PFIL_WLOCK(ph);
+}
+
+/*
+ * pfil_wunlock() releases writer lock for specified head.
+ */
+void
+pfil_wunlock(struct pfil_head *ph)
+{
+
+	PFIL_WUNLOCK(ph);
+}
+
+/*
+ * pfil_wowned() returns a non-zero value if the current thread owns
+ * an exclusive lock.
+ */
+int
+pfil_wowned(struct pfil_head *ph)
+{
+
+	return (PFIL_WOWNED(ph));
+}
+
 /*
  * pfil_head_register() registers a pfil_head with the packet filter hook
  * mechanism.
@@ -101,11 +176,11 @@ pfil_head_register(struct pfil_head *ph)
 {
 	struct pfil_head *lph;
 
-	PFIL_LIST_LOCK();
+	PFIL_HEADLIST_LOCK();
 	LIST_FOREACH(lph, &V_pfil_head_list, ph_list) {
 		if (ph->ph_type == lph->ph_type &&
 		    ph->ph_un.phu_val == lph->ph_un.phu_val) {
-			PFIL_LIST_UNLOCK();
+			PFIL_HEADLIST_UNLOCK();
 			return (EEXIST);
 		}
 	}
@@ -114,7 +189,7 @@ pfil_head_register(struct pfil_head *ph)
 	TAILQ_INIT(&ph->ph_in);
 	TAILQ_INIT(&ph->ph_out);
 	LIST_INSERT_HEAD(&V_pfil_head_list, ph, ph_list);
-	PFIL_LIST_UNLOCK();
+	PFIL_HEADLIST_UNLOCK();
 	return (0);
 }
 
@@ -128,12 +203,12 @@ pfil_head_unregister(struct pfil_head *ph)
 {
 	struct packet_filter_hook *pfh, *pfnext;
 		
-	PFIL_LIST_LOCK();
+	PFIL_HEADLIST_LOCK();
 	LIST_REMOVE(ph, ph_list);
-	PFIL_LIST_UNLOCK();
-	TAILQ_FOREACH_SAFE(pfh, &ph->ph_in, pfil_link, pfnext)
+	PFIL_HEADLIST_UNLOCK();
+	TAILQ_FOREACH_SAFE(pfh, &ph->ph_in, pfil_chain, pfnext)
 		free(pfh, M_IFADDR);
-	TAILQ_FOREACH_SAFE(pfh, &ph->ph_out, pfil_link, pfnext)
+	TAILQ_FOREACH_SAFE(pfh, &ph->ph_out, pfil_chain, pfnext)
 		free(pfh, M_IFADDR);
 	PFIL_LOCK_DESTROY(ph);
 	return (0);
@@ -147,11 +222,11 @@ pfil_head_get(int type, u_long val)
 {
 	struct pfil_head *ph;
 
-	PFIL_LIST_LOCK();
+	PFIL_HEADLIST_LOCK();
 	LIST_FOREACH(ph, &V_pfil_head_list, ph_list)
 		if (ph->ph_type == type && ph->ph_un.phu_val == val)
 			break;
-	PFIL_LIST_UNLOCK();
+	PFIL_HEADLIST_UNLOCK();
 	return (ph);
 }
 
@@ -164,8 +239,7 @@ pfil_head_get(int type, u_long val)
  *	PFIL_WAITOK	OK to call malloc with M_WAITOK.
  */
 int
-pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
-  struct inpcb *), void *arg, int flags, struct pfil_head *ph)
+pfil_add_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph)
 {
 	struct packet_filter_hook *pfh1 = NULL;
 	struct packet_filter_hook *pfh2 = NULL;
@@ -191,7 +265,7 @@ pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
 	if (flags & PFIL_IN) {
 		pfh1->pfil_func = func;
 		pfh1->pfil_arg = arg;
-		err = pfil_list_add(&ph->ph_in, pfh1, flags & ~PFIL_OUT);
+		err = pfil_chain_add(&ph->ph_in, pfh1, flags & ~PFIL_OUT);
 		if (err)
 			goto locked_error;
 		ph->ph_nhooks++;
@@ -199,10 +273,10 @@ pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
 	if (flags & PFIL_OUT) {
 		pfh2->pfil_func = func;
 		pfh2->pfil_arg = arg;
-		err = pfil_list_add(&ph->ph_out, pfh2, flags & ~PFIL_IN);
+		err = pfil_chain_add(&ph->ph_out, pfh2, flags & ~PFIL_IN);
 		if (err) {
 			if (flags & PFIL_IN)
-				pfil_list_remove(&ph->ph_in, func, arg);
+				pfil_chain_remove(&ph->ph_in, func, arg);
 			goto locked_error;
 		}
 		ph->ph_nhooks++;
@@ -221,22 +295,21 @@ error:
 
 /*
  * pfil_remove_hook removes a specific function from the packet filter hook
- * list.
+ * chain.
  */
 int
-pfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
-    struct inpcb *), void *arg, int flags, struct pfil_head *ph)
+pfil_remove_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph)
 {
 	int err = 0;
 
 	PFIL_WLOCK(ph);
 	if (flags & PFIL_IN) {
-		err = pfil_list_remove(&ph->ph_in, func, arg);
+		err = pfil_chain_remove(&ph->ph_in, func, arg);
 		if (err == 0)
 			ph->ph_nhooks--;
 	}
 	if ((err == 0) && (flags & PFIL_OUT)) {
-		err = pfil_list_remove(&ph->ph_out, func, arg);
+		err = pfil_chain_remove(&ph->ph_out, func, arg);
 		if (err == 0)
 			ph->ph_nhooks--;
 	}
@@ -244,15 +317,18 @@ pfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
 	return (err);
 }
 
+/*
+ * Internal: Add a new pfil hook into a hook chain.
+ */
 static int
-pfil_list_add(pfil_list_t *list, struct packet_filter_hook *pfh1, int flags)
+pfil_chain_add(pfil_chain_t *chain, struct packet_filter_hook *pfh1, int flags)
 {
 	struct packet_filter_hook *pfh;
 
 	/*
 	 * First make sure the hook is not already there.
 	 */
-	TAILQ_FOREACH(pfh, list, pfil_link)
+	TAILQ_FOREACH(pfh, chain, pfil_chain)
 		if (pfh->pfil_func == pfh1->pfil_func &&
 		    pfh->pfil_arg == pfh1->pfil_arg)
 			return (EEXIST);
@@ -262,26 +338,23 @@ pfil_list_add(pfil_list_t *list, struct packet_filter_hook *pfh1, int flags)
 	 * the same path is followed in or out of the kernel.
 	 */
 	if (flags & PFIL_IN)
-		TAILQ_INSERT_HEAD(list, pfh1, pfil_link);
+		TAILQ_INSERT_HEAD(chain, pfh1, pfil_chain);
 	else
-		TAILQ_INSERT_TAIL(list, pfh1, pfil_link);
+		TAILQ_INSERT_TAIL(chain, pfh1, pfil_chain);
 	return (0);
 }
 
 /*
- * pfil_list_remove is an internal function that takes a function off the
- * specified list.
+ * Internal: Remove a pfil hook from a hook chain.
  */
 static int
-pfil_list_remove(pfil_list_t *list,
-    int (*func)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *),
-    void *arg)
+pfil_chain_remove(pfil_chain_t *chain, pfil_func_t func, void *arg)
 {
 	struct packet_filter_hook *pfh;
 
-	TAILQ_FOREACH(pfh, list, pfil_link)
+	TAILQ_FOREACH(pfh, chain, pfil_chain)
 		if (pfh->pfil_func == func && pfh->pfil_arg == arg) {
-			TAILQ_REMOVE(list, pfh, pfil_link);
+			TAILQ_REMOVE(chain, pfh, pfil_chain);
 			free(pfh, M_IFADDR);
 			return (0);
 		}
@@ -292,36 +365,34 @@ pfil_list_remove(pfil_list_t *list,
  * Stuff that must be initialized for every instance (including the first of
  * course).
  */
-static int
-vnet_pfil_init(const void *unused)
+static void
+vnet_pfil_init(const void *unused __unused)
 {
 
 	LIST_INIT(&V_pfil_head_list);
-	return (0);
+	PFIL_LOCK_INIT_REAL(&V_pfil_lock, "shared");
 }
 
 /*
  * Called for the removal of each instance.
  */
-static int
-vnet_pfil_uninit(const void *unused)
+static void
+vnet_pfil_uninit(const void *unused __unused)
 {
 
-	/*  XXX should panic if list is not empty */
-	return (0);
+	KASSERT(LIST_EMPTY(&V_pfil_head_list),
+	    ("%s: pfil_head_list %p not empty", __func__, &V_pfil_head_list));
+	PFIL_LOCK_DESTROY_REAL(&V_pfil_lock);
 }
 
-/* Define startup order. */
-#define	PFIL_SYSINIT_ORDER	SI_SUB_PROTO_BEGIN
-#define	PFIL_MODEVENT_ORDER	(SI_ORDER_FIRST) /* On boot slot in here. */
-#define	PFIL_VNET_ORDER		(PFIL_MODEVENT_ORDER + 2) /* Later still. */
-
 /*
  * Starting up.
  *
  * VNET_SYSINIT is called for each existing vnet and each new vnet.
+ * Make sure the pfil bits are first before any possible subsystem which
+ * might piggyback on the SI_SUB_PROTO_PFIL.
  */
-VNET_SYSINIT(vnet_pfil_init, PFIL_SYSINIT_ORDER, PFIL_VNET_ORDER,
+VNET_SYSINIT(vnet_pfil_init, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST,
     vnet_pfil_init, NULL);
  
 /*
@@ -329,5 +400,5 @@ VNET_SYSINIT(vnet_pfil_init, PFIL_SYSINIT_ORDER, PFIL_VNET_ORDER,
  *
  * VNET_SYSUNINIT is called for each exiting vnet as it exits.
  */
-VNET_SYSUNINIT(vnet_pfil_uninit, PFIL_SYSINIT_ORDER, PFIL_VNET_ORDER,
+VNET_SYSUNINIT(vnet_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST,
     vnet_pfil_uninit, NULL);
diff --git a/freebsd/sys/net/pfil.h b/freebsd/sys/net/pfil.h
index da06dedf..b78023b7 100644
--- a/freebsd/sys/net/pfil.h
+++ b/freebsd/sys/net/pfil.h
@@ -43,15 +43,18 @@ struct mbuf;
 struct ifnet;
 struct inpcb;
 
+typedef	int	(*pfil_func_t)(void *, struct mbuf **, struct ifnet *, int,
+		    struct inpcb *);
+
 /*
  * The packet filter hooks are designed for anything to call them to
- * possibly intercept the packet.
+ * possibly intercept the packet.  Multiple filter hooks are chained
+ * together and after each other in the specified order.
  */
 struct packet_filter_hook {
-        TAILQ_ENTRY(packet_filter_hook) pfil_link;
-	int	(*pfil_func)(void *, struct mbuf **, struct ifnet *, int,
-		    struct inpcb *);
-	void	*pfil_arg;
+	TAILQ_ENTRY(packet_filter_hook) pfil_chain;
+	pfil_func_t	 pfil_func;
+	void		*pfil_arg;
 };
 
 #define PFIL_IN		0x00000001
@@ -59,63 +62,87 @@ struct packet_filter_hook {
 #define PFIL_WAITOK	0x00000004
 #define PFIL_ALL	(PFIL_IN|PFIL_OUT)
 
-typedef	TAILQ_HEAD(pfil_list, packet_filter_hook) pfil_list_t;
+typedef	TAILQ_HEAD(pfil_chain, packet_filter_hook) pfil_chain_t;
 
 #define	PFIL_TYPE_AF		1	/* key is AF_* type */
 #define	PFIL_TYPE_IFNET		2	/* key is ifnet pointer */
 
+#define	PFIL_FLAG_PRIVATE_LOCK	0x01	/* Personal lock instead of global */
+
+/*
+ * A pfil head is created by each protocol or packet intercept point.
+ * For packet is then run through the hook chain for inspection.
+ */
 struct pfil_head {
-	pfil_list_t	ph_in;
-	pfil_list_t	ph_out;
-	int		ph_type;
-	int		ph_nhooks;
+	pfil_chain_t	 ph_in;
+	pfil_chain_t	 ph_out;
+	int		 ph_type;
+	int		 ph_nhooks;
 #if defined( __linux__ ) || defined( _WIN32 )
-	rwlock_t	ph_mtx;
+	rwlock_t	 ph_mtx;
 #else
-	struct rmlock	ph_lock;
+	struct rmlock	*ph_plock;	/* Pointer to the used lock */
+	struct rmlock	 ph_lock;	/* Private lock storage */
+	int		 flags;
 #endif
 	union {
-		u_long		phu_val;
-		void		*phu_ptr;
+		u_long	 phu_val;
+		void	*phu_ptr;
 	} ph_un;
-#define	ph_af		ph_un.phu_val
-#define	ph_ifnet	ph_un.phu_ptr
+#define	ph_af		 ph_un.phu_val
+#define	ph_ifnet	 ph_un.phu_ptr
 	LIST_ENTRY(pfil_head) ph_list;
 };
 
-int	pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *,
-	    int, struct inpcb *), void *, int, struct pfil_head *);
-int	pfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *,
-	    int, struct inpcb *), void *, int, struct pfil_head *);
+/* Public functions for pfil hook management by packet filters. */
+struct pfil_head *pfil_head_get(int, u_long);
+int	pfil_add_hook(pfil_func_t, void *, int, struct pfil_head *);
+int	pfil_remove_hook(pfil_func_t, void *, int, struct pfil_head *);
+#define	PFIL_HOOKED(p) ((p)->ph_nhooks > 0)
+
+/* Public functions to run the packet inspection by protocols. */
 int	pfil_run_hooks(struct pfil_head *, struct mbuf **, struct ifnet *,
 	    int, struct inpcb *inp);
 
+/* Public functions for pfil head management by protocols. */
 int	pfil_head_register(struct pfil_head *);
 int	pfil_head_unregister(struct pfil_head *);
 
-struct pfil_head *pfil_head_get(int, u_long);
-
-#define	PFIL_HOOKED(p) ((p)->ph_nhooks > 0)
-#define	PFIL_LOCK_INIT(p) \
-    rm_init_flags(&(p)->ph_lock, "PFil hook read/write mutex", RM_RECURSE)
-#define	PFIL_LOCK_DESTROY(p) rm_destroy(&(p)->ph_lock)
-#define PFIL_RLOCK(p, t) rm_rlock(&(p)->ph_lock, (t))
-#define PFIL_WLOCK(p) rm_wlock(&(p)->ph_lock)
-#define PFIL_RUNLOCK(p, t) rm_runlock(&(p)->ph_lock, (t))
-#define PFIL_WUNLOCK(p) rm_wunlock(&(p)->ph_lock)
-#define PFIL_LIST_LOCK() mtx_lock(&pfil_global_lock)
-#define PFIL_LIST_UNLOCK() mtx_unlock(&pfil_global_lock)
-
-static __inline struct packet_filter_hook *
-pfil_hook_get(int dir, struct pfil_head *ph)
-{
-
-	if (dir == PFIL_IN)
-		return (TAILQ_FIRST(&ph->ph_in));
-	else if (dir == PFIL_OUT)
-		return (TAILQ_FIRST(&ph->ph_out));
-	else
-		return (NULL);
-}
+/* Public pfil locking functions for self managed locks by packet filters. */
+struct rm_priotracker;	/* Do not require including rmlock header */
+int	pfil_try_rlock(struct pfil_head *, struct rm_priotracker *);
+void	pfil_rlock(struct pfil_head *, struct rm_priotracker *);
+void	pfil_runlock(struct pfil_head *, struct rm_priotracker *);
+void	pfil_wlock(struct pfil_head *);
+void	pfil_wunlock(struct pfil_head *);
+int	pfil_wowned(struct pfil_head *ph);
+
+/* Internal pfil locking functions. */
+#define	PFIL_LOCK_INIT_REAL(l, t)	\
+	rm_init_flags(l, "PFil " t " rmlock", RM_RECURSE)
+#define	PFIL_LOCK_DESTROY_REAL(l)	\
+	rm_destroy(l)
+#define	PFIL_LOCK_INIT(p)	do {			\
+	if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK) {	\
+		PFIL_LOCK_INIT_REAL(&(p)->ph_lock, "private");	\
+		(p)->ph_plock = &(p)->ph_lock;		\
+	} else						\
+		(p)->ph_plock = &V_pfil_lock;		\
+} while (0)
+#define	PFIL_LOCK_DESTROY(p)	do {			\
+	if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK)	\
+		PFIL_LOCK_DESTROY_REAL((p)->ph_plock);	\
+} while (0)
+
+#define	PFIL_TRY_RLOCK(p, t)	rm_try_rlock((p)->ph_plock, (t))
+#define	PFIL_RLOCK(p, t)	rm_rlock((p)->ph_plock, (t))
+#define	PFIL_WLOCK(p)		rm_wlock((p)->ph_plock)
+#define	PFIL_RUNLOCK(p, t)	rm_runlock((p)->ph_plock, (t))
+#define	PFIL_WUNLOCK(p)		rm_wunlock((p)->ph_plock)
+#define	PFIL_WOWNED(p)		rm_wowned((p)->ph_plock)
+
+/* Internal locking macros for global/vnet pfil_head_list. */
+#define	PFIL_HEADLIST_LOCK()	mtx_lock(&pfil_global_lock)
+#define	PFIL_HEADLIST_UNLOCK()	mtx_unlock(&pfil_global_lock)
 
 #endif /* _NET_PFIL_H_ */
diff --git a/freebsd/sys/net/pfkeyv2.h b/freebsd/sys/net/pfkeyv2.h
index c45f8b05..c9b27695 100644
--- a/freebsd/sys/net/pfkeyv2.h
+++ b/freebsd/sys/net/pfkeyv2.h
@@ -218,7 +218,6 @@ struct sadb_x_sa2 {
 };
 
 /* XXX Policy Extension */
-/* sizeof(struct sadb_x_policy) == 16 */
 struct sadb_x_policy {
   u_int16_t sadb_x_policy_len;
   u_int16_t sadb_x_policy_exttype;
@@ -226,8 +225,10 @@ struct sadb_x_policy {
   u_int8_t sadb_x_policy_dir;		/* direction, see ipsec.h */
   u_int8_t sadb_x_policy_reserved;
   u_int32_t sadb_x_policy_id;
-  u_int32_t sadb_x_policy_reserved2;
+  u_int32_t sadb_x_policy_priority;
 };
+_Static_assert(sizeof(struct sadb_x_policy) == 16, "struct size mismatch");
+
 /*
  * When policy_type == IPSEC, it is followed by some of
  * the ipsec policy request.
@@ -256,31 +257,31 @@ struct sadb_x_ipsecrequest {
 };
 
 /* NAT-Traversal type, see RFC 3948 (and drafts). */
-/* sizeof(struct sadb_x_nat_t_type) == 8 */
 struct sadb_x_nat_t_type {
   u_int16_t sadb_x_nat_t_type_len;
   u_int16_t sadb_x_nat_t_type_exttype;
   u_int8_t sadb_x_nat_t_type_type;
   u_int8_t sadb_x_nat_t_type_reserved[3];
 };
+_Static_assert(sizeof(struct sadb_x_nat_t_type) == 8, "struct size mismatch");
 
 /* NAT-Traversal source or destination port. */
-/* sizeof(struct sadb_x_nat_t_port) == 8 */
 struct sadb_x_nat_t_port { 
   u_int16_t sadb_x_nat_t_port_len;
   u_int16_t sadb_x_nat_t_port_exttype;
   u_int16_t sadb_x_nat_t_port_port;
   u_int16_t sadb_x_nat_t_port_reserved;
 };
+_Static_assert(sizeof(struct sadb_x_nat_t_port) == 8, "struct size mismatch");
 
 /* ESP fragmentation size. */
-/* sizeof(struct sadb_x_nat_t_frag) == 8 */
 struct sadb_x_nat_t_frag {
   u_int16_t sadb_x_nat_t_frag_len;
   u_int16_t sadb_x_nat_t_frag_exttype;
   u_int16_t sadb_x_nat_t_frag_fraglen;
   u_int16_t sadb_x_nat_t_frag_reserved;
 };
+_Static_assert(sizeof(struct sadb_x_nat_t_frag) == 8, "struct size mismatch");
 
 
 #define SADB_EXT_RESERVED             0
@@ -332,39 +333,47 @@ struct sadb_x_nat_t_frag {
 
 #define SADB_SAFLAGS_PFS      1
 
-/* RFC2367 numbers - meets RFC2407 */
+/*
+ * Though some of these numbers (both _AALG and _EALG) appear to be
+ * IKEv2 numbers and others original IKE numbers, they have no meaning.
+ * These are constants that the various IKE daemons use to tell the kernel
+ * what cipher to use.
+ *
+ * Do not use these constants directly to decide which Transformation ID
+ * to send.  You are responsible for mapping them yourself.
+ */
 #define SADB_AALG_NONE		0
 #define SADB_AALG_MD5HMAC	2
 #define SADB_AALG_SHA1HMAC	3
 #define SADB_AALG_MAX		252
-/* private allocations - based on RFC2407/IANA assignment */
 #define SADB_X_AALG_SHA2_256	5
 #define SADB_X_AALG_SHA2_384	6
 #define SADB_X_AALG_SHA2_512	7
 #define SADB_X_AALG_RIPEMD160HMAC	8
-#define SADB_X_AALG_AES_XCBC_MAC	9	/* draft-ietf-ipsec-ciph-aes-xcbc-mac-04 */
-/* private allocations should use 249-255 (RFC2407) */
+#define SADB_X_AALG_AES_XCBC_MAC	9	/* RFC3566 */
+#define SADB_X_AALG_AES128GMAC	11		/* RFC4543 + Errata1821 */
+#define SADB_X_AALG_AES192GMAC	12
+#define SADB_X_AALG_AES256GMAC	13
 #define SADB_X_AALG_MD5		249	/* Keyed MD5 */
 #define SADB_X_AALG_SHA		250	/* Keyed SHA */
 #define SADB_X_AALG_NULL	251	/* null authentication */
 #define SADB_X_AALG_TCP_MD5	252	/* Keyed TCP-MD5 (RFC2385) */
 
-/* RFC2367 numbers - meets RFC2407 */
 #define SADB_EALG_NONE		0
 #define SADB_EALG_DESCBC	2
 #define SADB_EALG_3DESCBC	3
-#define SADB_EALG_NULL		11
-#define SADB_EALG_MAX		250
-/* private allocations - based on RFC2407/IANA assignment */
 #define SADB_X_EALG_CAST128CBC	6
 #define SADB_X_EALG_BLOWFISHCBC	7
+#define SADB_EALG_NULL		11
 #define SADB_X_EALG_RIJNDAELCBC	12
 #define SADB_X_EALG_AES		12
-/* private allocations - based on RFC4312/IANA assignment */
-#define SADB_X_EALG_CAMELLIACBC		22
-/* private allocations should use 249-255 (RFC2407) */
-#define SADB_X_EALG_SKIPJACK	249	/*250*/ /* for IPSEC */
-#define SADB_X_EALG_AESCTR	250	/*249*/ /* draft-ietf-ipsec-ciph-aes-ctr-03 */
+#define SADB_X_EALG_AESCTR	13
+#define SADB_X_EALG_AESGCM8	18	/* RFC4106 */
+#define SADB_X_EALG_AESGCM12	19
+#define SADB_X_EALG_AESGCM16	20
+#define SADB_X_EALG_CAMELLIACBC	22
+#define SADB_X_EALG_AESGMAC	23	/* RFC4543 + Errata1821 */
+#define SADB_EALG_MAX		23	/* !!! keep updated !!! */
 
 /* private allocations - based on RFC2407/IANA assignment */
 #define SADB_X_CALG_NONE	0
diff --git a/freebsd/sys/net/pfvar.h b/freebsd/sys/net/pfvar.h
new file mode 100644
index 00000000..17768e96
--- /dev/null
+++ b/freebsd/sys/net/pfvar.h
@@ -0,0 +1,1757 @@
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *	$OpenBSD: pfvar.h,v 1.282 2009/01/29 15:12:28 pyr Exp $
+ *	$FreeBSD$
+ */
+
+#ifndef _NET_PFVAR_H_
+#define _NET_PFVAR_H_
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/queue.h>
+#include <sys/counter.h>
+#include <sys/malloc.h>
+#include <sys/refcount.h>
+#include <sys/tree.h>
+#include <vm/uma.h>
+
+#include <net/radix.h>
+#include <netinet/in.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+
+struct pf_addr {
+	union {
+		struct in_addr		v4;
+		struct in6_addr		v6;
+		u_int8_t		addr8[16];
+		u_int16_t		addr16[8];
+		u_int32_t		addr32[4];
+	} pfa;		    /* 128-bit address */
+#define v4	pfa.v4
+#define v6	pfa.v6
+#define addr8	pfa.addr8
+#define addr16	pfa.addr16
+#define addr32	pfa.addr32
+};
+
+#define PFI_AFLAG_NETWORK	0x01
+#define PFI_AFLAG_BROADCAST	0x02
+#define PFI_AFLAG_PEER		0x04
+#define PFI_AFLAG_MODEMASK	0x07
+#define PFI_AFLAG_NOALIAS	0x08
+
+struct pf_addr_wrap {
+	union {
+		struct {
+			struct pf_addr		 addr;
+			struct pf_addr		 mask;
+		}			 a;
+		char			 ifname[IFNAMSIZ];
+		char			 tblname[PF_TABLE_NAME_SIZE];
+	}			 v;
+	union {
+		struct pfi_dynaddr	*dyn;
+		struct pfr_ktable	*tbl;
+		int			 dyncnt;
+		int			 tblcnt;
+	}			 p;
+	u_int8_t		 type;		/* PF_ADDR_* */
+	u_int8_t		 iflags;	/* PFI_AFLAG_* */
+};
+
+#ifdef _KERNEL
+
+struct pfi_dynaddr {
+	TAILQ_ENTRY(pfi_dynaddr)	 entry;
+	struct pf_addr			 pfid_addr4;
+	struct pf_addr			 pfid_mask4;
+	struct pf_addr			 pfid_addr6;
+	struct pf_addr			 pfid_mask6;
+	struct pfr_ktable		*pfid_kt;
+	struct pfi_kif			*pfid_kif;
+	int				 pfid_net;	/* mask or 128 */
+	int				 pfid_acnt4;	/* address count IPv4 */
+	int				 pfid_acnt6;	/* address count IPv6 */
+	sa_family_t			 pfid_af;	/* rule af */
+	u_int8_t			 pfid_iflags;	/* PFI_AFLAG_* */
+};
+
+/*
+ * Address manipulation macros
+ */
+#define	HTONL(x)	(x) = htonl((__uint32_t)(x))
+#define	HTONS(x)	(x) = htons((__uint16_t)(x))
+#define	NTOHL(x)	(x) = ntohl((__uint32_t)(x))
+#define	NTOHS(x)	(x) = ntohs((__uint16_t)(x))
+
+#define	PF_NAME		"pf"
+
+#define	PF_HASHROW_ASSERT(h)	mtx_assert(&(h)->lock, MA_OWNED)
+#define	PF_HASHROW_LOCK(h)	mtx_lock(&(h)->lock)
+#define	PF_HASHROW_UNLOCK(h)	mtx_unlock(&(h)->lock)
+
+#define	PF_STATE_LOCK(s)						\
+	do {								\
+		struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(s)];	\
+		PF_HASHROW_LOCK(_ih);					\
+	} while (0)
+
+#define	PF_STATE_UNLOCK(s)						\
+	do {								\
+		struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH((s))];	\
+		PF_HASHROW_UNLOCK(_ih);					\
+	} while (0)
+
+#ifdef INVARIANTS
+#define	PF_STATE_LOCK_ASSERT(s)						\
+	do {								\
+		struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(s)];	\
+		PF_HASHROW_ASSERT(_ih);					\
+	} while (0)
+#else /* !INVARIANTS */
+#define	PF_STATE_LOCK_ASSERT(s)		do {} while (0)
+#endif /* INVARIANTS */
+
+extern struct mtx pf_unlnkdrules_mtx;
+#define	PF_UNLNKDRULES_LOCK()	mtx_lock(&pf_unlnkdrules_mtx)
+#define	PF_UNLNKDRULES_UNLOCK()	mtx_unlock(&pf_unlnkdrules_mtx)
+
+extern struct rwlock pf_rules_lock;
+#define	PF_RULES_RLOCK()	rw_rlock(&pf_rules_lock)
+#define	PF_RULES_RUNLOCK()	rw_runlock(&pf_rules_lock)
+#define	PF_RULES_WLOCK()	rw_wlock(&pf_rules_lock)
+#define	PF_RULES_WUNLOCK()	rw_wunlock(&pf_rules_lock)
+#define	PF_RULES_ASSERT()	rw_assert(&pf_rules_lock, RA_LOCKED)
+#define	PF_RULES_RASSERT()	rw_assert(&pf_rules_lock, RA_RLOCKED)
+#define	PF_RULES_WASSERT()	rw_assert(&pf_rules_lock, RA_WLOCKED)
+
+#define	PF_MODVER	1
+#define	PFLOG_MODVER	1
+#define	PFSYNC_MODVER	1
+
+#define	PFLOG_MINVER	1
+#define	PFLOG_PREFVER	PFLOG_MODVER
+#define	PFLOG_MAXVER	1
+#define	PFSYNC_MINVER	1
+#define	PFSYNC_PREFVER	PFSYNC_MODVER
+#define	PFSYNC_MAXVER	1
+
+#ifdef INET
+#ifndef INET6
+#define	PF_INET_ONLY
+#endif /* ! INET6 */
+#endif /* INET */
+
+#ifdef INET6
+#ifndef INET
+#define	PF_INET6_ONLY
+#endif /* ! INET */
+#endif /* INET6 */
+
+#ifdef INET
+#ifdef INET6
+#define	PF_INET_INET6
+#endif /* INET6 */
+#endif /* INET */
+
+#else
+
+#define	PF_INET_INET6
+
+#endif /* _KERNEL */
+
+/* Both IPv4 and IPv6 */
+#ifdef PF_INET_INET6
+
+#define PF_AEQ(a, b, c) \
+	((c == AF_INET && (a)->addr32[0] == (b)->addr32[0]) || \
+	(c == AF_INET6 && (a)->addr32[3] == (b)->addr32[3] && \
+	(a)->addr32[2] == (b)->addr32[2] && \
+	(a)->addr32[1] == (b)->addr32[1] && \
+	(a)->addr32[0] == (b)->addr32[0])) \
+
+#define PF_ANEQ(a, b, c) \
+	((c == AF_INET && (a)->addr32[0] != (b)->addr32[0]) || \
+	(c == AF_INET6 && ((a)->addr32[0] != (b)->addr32[0] || \
+	(a)->addr32[1] != (b)->addr32[1] || \
+	(a)->addr32[2] != (b)->addr32[2] || \
+	(a)->addr32[3] != (b)->addr32[3]))) \
+
+#define PF_AZERO(a, c) \
+	((c == AF_INET && !(a)->addr32[0]) || \
+	(c == AF_INET6 && !(a)->addr32[0] && !(a)->addr32[1] && \
+	!(a)->addr32[2] && !(a)->addr32[3] )) \
+
+#define PF_MATCHA(n, a, m, b, f) \
+	pf_match_addr(n, a, m, b, f)
+
+#define PF_ACPY(a, b, f) \
+	pf_addrcpy(a, b, f)
+
+#define PF_AINC(a, f) \
+	pf_addr_inc(a, f)
+
+#define PF_POOLMASK(a, b, c, d, f) \
+	pf_poolmask(a, b, c, d, f)
+
+#else
+
+/* Just IPv6 */
+
+#ifdef PF_INET6_ONLY
+
+#define PF_AEQ(a, b, c) \
+	((a)->addr32[3] == (b)->addr32[3] && \
+	(a)->addr32[2] == (b)->addr32[2] && \
+	(a)->addr32[1] == (b)->addr32[1] && \
+	(a)->addr32[0] == (b)->addr32[0]) \
+
+#define PF_ANEQ(a, b, c) \
+	((a)->addr32[3] != (b)->addr32[3] || \
+	(a)->addr32[2] != (b)->addr32[2] || \
+	(a)->addr32[1] != (b)->addr32[1] || \
+	(a)->addr32[0] != (b)->addr32[0]) \
+
+#define PF_AZERO(a, c) \
+	(!(a)->addr32[0] && \
+	!(a)->addr32[1] && \
+	!(a)->addr32[2] && \
+	!(a)->addr32[3] ) \
+
+#define PF_MATCHA(n, a, m, b, f) \
+	pf_match_addr(n, a, m, b, f)
+
+#define PF_ACPY(a, b, f) \
+	pf_addrcpy(a, b, f)
+
+#define PF_AINC(a, f) \
+	pf_addr_inc(a, f)
+
+#define PF_POOLMASK(a, b, c, d, f) \
+	pf_poolmask(a, b, c, d, f)
+
+#else
+
+/* Just IPv4 */
+#ifdef PF_INET_ONLY
+
+#define PF_AEQ(a, b, c) \
+	((a)->addr32[0] == (b)->addr32[0])
+
+#define PF_ANEQ(a, b, c) \
+	((a)->addr32[0] != (b)->addr32[0])
+
+#define PF_AZERO(a, c) \
+	(!(a)->addr32[0])
+
+#define PF_MATCHA(n, a, m, b, f) \
+	pf_match_addr(n, a, m, b, f)
+
+#define PF_ACPY(a, b, f) \
+	(a)->v4.s_addr = (b)->v4.s_addr
+
+#define PF_AINC(a, f) \
+	do { \
+		(a)->addr32[0] = htonl(ntohl((a)->addr32[0]) + 1); \
+	} while (0)
+
+#define PF_POOLMASK(a, b, c, d, f) \
+	do { \
+		(a)->addr32[0] = ((b)->addr32[0] & (c)->addr32[0]) | \
+		(((c)->addr32[0] ^ 0xffffffff ) & (d)->addr32[0]); \
+	} while (0)
+
+#endif /* PF_INET_ONLY */
+#endif /* PF_INET6_ONLY */
+#endif /* PF_INET_INET6 */
+
+/*
+ * XXX callers not FIB-aware in our version of pf yet.
+ * OpenBSD fixed it later it seems, 2010/05/07 13:33:16 claudio.
+ */
+#define	PF_MISMATCHAW(aw, x, af, neg, ifp, rtid)			\
+	(								\
+		(((aw)->type == PF_ADDR_NOROUTE &&			\
+		    pf_routable((x), (af), NULL, (rtid))) ||		\
+		(((aw)->type == PF_ADDR_URPFFAILED && (ifp) != NULL &&	\
+		    pf_routable((x), (af), (ifp), (rtid))) ||		\
+		((aw)->type == PF_ADDR_TABLE &&				\
+		    !pfr_match_addr((aw)->p.tbl, (x), (af))) ||		\
+		((aw)->type == PF_ADDR_DYNIFTL &&			\
+		    !pfi_match_addr((aw)->p.dyn, (x), (af))) ||		\
+		((aw)->type == PF_ADDR_RANGE &&				\
+		    !pf_match_addr_range(&(aw)->v.a.addr,		\
+		    &(aw)->v.a.mask, (x), (af))) ||			\
+		((aw)->type == PF_ADDR_ADDRMASK &&			\
+		    !PF_AZERO(&(aw)->v.a.mask, (af)) &&			\
+		    !PF_MATCHA(0, &(aw)->v.a.addr,			\
+		    &(aw)->v.a.mask, (x), (af))))) !=			\
+		(neg)							\
+	)
+
+
+struct pf_rule_uid {
+	uid_t		 uid[2];
+	u_int8_t	 op;
+};
+
+struct pf_rule_gid {
+	uid_t		 gid[2];
+	u_int8_t	 op;
+};
+
+struct pf_rule_addr {
+	struct pf_addr_wrap	 addr;
+	u_int16_t		 port[2];
+	u_int8_t		 neg;
+	u_int8_t		 port_op;
+};
+
+struct pf_pooladdr {
+	struct pf_addr_wrap		 addr;
+	TAILQ_ENTRY(pf_pooladdr)	 entries;
+	char				 ifname[IFNAMSIZ];
+	struct pfi_kif			*kif;
+};
+
+TAILQ_HEAD(pf_palist, pf_pooladdr);
+
+struct pf_poolhashkey {
+	union {
+		u_int8_t		key8[16];
+		u_int16_t		key16[8];
+		u_int32_t		key32[4];
+	} pfk;		    /* 128-bit hash key */
+#define key8	pfk.key8
+#define key16	pfk.key16
+#define key32	pfk.key32
+};
+
+struct pf_pool {
+	struct pf_palist	 list;
+	struct pf_pooladdr	*cur;
+	struct pf_poolhashkey	 key;
+	struct pf_addr		 counter;
+	int			 tblidx;
+	u_int16_t		 proxy_port[2];
+	u_int8_t		 opts;
+};
+
+
+/* A packed Operating System description for fingerprinting */
+typedef u_int32_t pf_osfp_t;
+#define PF_OSFP_ANY	((pf_osfp_t)0)
+#define PF_OSFP_UNKNOWN	((pf_osfp_t)-1)
+#define PF_OSFP_NOMATCH	((pf_osfp_t)-2)
+
+struct pf_osfp_entry {
+	SLIST_ENTRY(pf_osfp_entry) fp_entry;
+	pf_osfp_t		fp_os;
+	int			fp_enflags;
+#define PF_OSFP_EXPANDED	0x001		/* expanded entry */
+#define PF_OSFP_GENERIC		0x002		/* generic signature */
+#define PF_OSFP_NODETAIL	0x004		/* no p0f details */
+#define PF_OSFP_LEN	32
+	char			fp_class_nm[PF_OSFP_LEN];
+	char			fp_version_nm[PF_OSFP_LEN];
+	char			fp_subtype_nm[PF_OSFP_LEN];
+};
+#define PF_OSFP_ENTRY_EQ(a, b) \
+    ((a)->fp_os == (b)->fp_os && \
+    memcmp((a)->fp_class_nm, (b)->fp_class_nm, PF_OSFP_LEN) == 0 && \
+    memcmp((a)->fp_version_nm, (b)->fp_version_nm, PF_OSFP_LEN) == 0 && \
+    memcmp((a)->fp_subtype_nm, (b)->fp_subtype_nm, PF_OSFP_LEN) == 0)
+
+/* handle pf_osfp_t packing */
+#define _FP_RESERVED_BIT	1  /* For the special negative #defines */
+#define _FP_UNUSED_BITS		1
+#define _FP_CLASS_BITS		10 /* OS Class (Windows, Linux) */
+#define _FP_VERSION_BITS	10 /* OS version (95, 98, NT, 2.4.54, 3.2) */
+#define _FP_SUBTYPE_BITS	10 /* patch level (NT SP4, SP3, ECN patch) */
+#define PF_OSFP_UNPACK(osfp, class, version, subtype) do { \
+	(class) = ((osfp) >> (_FP_VERSION_BITS+_FP_SUBTYPE_BITS)) & \
+	    ((1 << _FP_CLASS_BITS) - 1); \
+	(version) = ((osfp) >> _FP_SUBTYPE_BITS) & \
+	    ((1 << _FP_VERSION_BITS) - 1);\
+	(subtype) = (osfp) & ((1 << _FP_SUBTYPE_BITS) - 1); \
+} while(0)
+#define PF_OSFP_PACK(osfp, class, version, subtype) do { \
+	(osfp) = ((class) & ((1 << _FP_CLASS_BITS) - 1)) << (_FP_VERSION_BITS \
+	    + _FP_SUBTYPE_BITS); \
+	(osfp) |= ((version) & ((1 << _FP_VERSION_BITS) - 1)) << \
+	    _FP_SUBTYPE_BITS; \
+	(osfp) |= (subtype) & ((1 << _FP_SUBTYPE_BITS) - 1); \
+} while(0)
+
+/* the fingerprint of an OSes TCP SYN packet */
+typedef u_int64_t	pf_tcpopts_t;
+struct pf_os_fingerprint {
+	SLIST_HEAD(pf_osfp_enlist, pf_osfp_entry) fp_oses; /* list of matches */
+	pf_tcpopts_t		fp_tcpopts;	/* packed TCP options */
+	u_int16_t		fp_wsize;	/* TCP window size */
+	u_int16_t		fp_psize;	/* ip->ip_len */
+	u_int16_t		fp_mss;		/* TCP MSS */
+	u_int16_t		fp_flags;
+#define PF_OSFP_WSIZE_MOD	0x0001		/* Window modulus */
+#define PF_OSFP_WSIZE_DC	0x0002		/* Window don't care */
+#define PF_OSFP_WSIZE_MSS	0x0004		/* Window multiple of MSS */
+#define PF_OSFP_WSIZE_MTU	0x0008		/* Window multiple of MTU */
+#define PF_OSFP_PSIZE_MOD	0x0010		/* packet size modulus */
+#define PF_OSFP_PSIZE_DC	0x0020		/* packet size don't care */
+#define PF_OSFP_WSCALE		0x0040		/* TCP window scaling */
+#define PF_OSFP_WSCALE_MOD	0x0080		/* TCP window scale modulus */
+#define PF_OSFP_WSCALE_DC	0x0100		/* TCP window scale dont-care */
+#define PF_OSFP_MSS		0x0200		/* TCP MSS */
+#define PF_OSFP_MSS_MOD		0x0400		/* TCP MSS modulus */
+#define PF_OSFP_MSS_DC		0x0800		/* TCP MSS dont-care */
+#define PF_OSFP_DF		0x1000		/* IPv4 don't fragment bit */
+#define PF_OSFP_TS0		0x2000		/* Zero timestamp */
+#define PF_OSFP_INET6		0x4000		/* IPv6 */
+	u_int8_t		fp_optcnt;	/* TCP option count */
+	u_int8_t		fp_wscale;	/* TCP window scaling */
+	u_int8_t		fp_ttl;		/* IPv4 TTL */
+#define PF_OSFP_MAXTTL_OFFSET	40
+/* TCP options packing */
+#define PF_OSFP_TCPOPT_NOP	0x0		/* TCP NOP option */
+#define PF_OSFP_TCPOPT_WSCALE	0x1		/* TCP window scaling option */
+#define PF_OSFP_TCPOPT_MSS	0x2		/* TCP max segment size opt */
+#define PF_OSFP_TCPOPT_SACK	0x3		/* TCP SACK OK option */
+#define PF_OSFP_TCPOPT_TS	0x4		/* TCP timestamp option */
+#define PF_OSFP_TCPOPT_BITS	3		/* bits used by each option */
+#define PF_OSFP_MAX_OPTS \
+    (sizeof(((struct pf_os_fingerprint *)0)->fp_tcpopts) * 8) \
+    / PF_OSFP_TCPOPT_BITS
+
+	SLIST_ENTRY(pf_os_fingerprint)	fp_next;
+};
+
+struct pf_osfp_ioctl {
+	struct pf_osfp_entry	fp_os;
+	pf_tcpopts_t		fp_tcpopts;	/* packed TCP options */
+	u_int16_t		fp_wsize;	/* TCP window size */
+	u_int16_t		fp_psize;	/* ip->ip_len */
+	u_int16_t		fp_mss;		/* TCP MSS */
+	u_int16_t		fp_flags;
+	u_int8_t		fp_optcnt;	/* TCP option count */
+	u_int8_t		fp_wscale;	/* TCP window scaling */
+	u_int8_t		fp_ttl;		/* IPv4 TTL */
+
+	int			fp_getnum;	/* DIOCOSFPGET number */
+};
+
+
+union pf_rule_ptr {
+	struct pf_rule		*ptr;
+	u_int32_t		 nr;
+};
+
+#define	PF_ANCHOR_NAME_SIZE	 64
+
+struct pf_rule {
+	struct pf_rule_addr	 src;
+	struct pf_rule_addr	 dst;
+#define PF_SKIP_IFP		0
+#define PF_SKIP_DIR		1
+#define PF_SKIP_AF		2
+#define PF_SKIP_PROTO		3
+#define PF_SKIP_SRC_ADDR	4
+#define PF_SKIP_SRC_PORT	5
+#define PF_SKIP_DST_ADDR	6
+#define PF_SKIP_DST_PORT	7
+#define PF_SKIP_COUNT		8
+	union pf_rule_ptr	 skip[PF_SKIP_COUNT];
+#define PF_RULE_LABEL_SIZE	 64
+	char			 label[PF_RULE_LABEL_SIZE];
+	char			 ifname[IFNAMSIZ];
+	char			 qname[PF_QNAME_SIZE];
+	char			 pqname[PF_QNAME_SIZE];
+#define	PF_TAG_NAME_SIZE	 64
+	char			 tagname[PF_TAG_NAME_SIZE];
+	char			 match_tagname[PF_TAG_NAME_SIZE];
+
+	char			 overload_tblname[PF_TABLE_NAME_SIZE];
+
+	TAILQ_ENTRY(pf_rule)	 entries;
+	struct pf_pool		 rpool;
+
+	u_int64_t		 evaluations;
+	u_int64_t		 packets[2];
+	u_int64_t		 bytes[2];
+
+	struct pfi_kif		*kif;
+	struct pf_anchor	*anchor;
+	struct pfr_ktable	*overload_tbl;
+
+	pf_osfp_t		 os_fingerprint;
+
+	int			 rtableid;
+	u_int32_t		 timeout[PFTM_MAX];
+	u_int32_t		 max_states;
+	u_int32_t		 max_src_nodes;
+	u_int32_t		 max_src_states;
+	u_int32_t		 max_src_conn;
+	struct {
+		u_int32_t		limit;
+		u_int32_t		seconds;
+	}			 max_src_conn_rate;
+	u_int32_t		 qid;
+	u_int32_t		 pqid;
+	u_int32_t		 rt_listid;
+	u_int32_t		 nr;
+	u_int32_t		 prob;
+	uid_t			 cuid;
+	pid_t			 cpid;
+
+	counter_u64_t		 states_cur;
+	counter_u64_t		 states_tot;
+	counter_u64_t		 src_nodes;
+
+	u_int16_t		 return_icmp;
+	u_int16_t		 return_icmp6;
+	u_int16_t		 max_mss;
+	u_int16_t		 tag;
+	u_int16_t		 match_tag;
+	u_int16_t		 scrub_flags;
+
+	struct pf_rule_uid	 uid;
+	struct pf_rule_gid	 gid;
+
+	u_int32_t		 rule_flag;
+	u_int8_t		 action;
+	u_int8_t		 direction;
+	u_int8_t		 log;
+	u_int8_t		 logif;
+	u_int8_t		 quick;
+	u_int8_t		 ifnot;
+	u_int8_t		 match_tag_not;
+	u_int8_t		 natpass;
+
+#define PF_STATE_NORMAL		0x1
+#define PF_STATE_MODULATE	0x2
+#define PF_STATE_SYNPROXY	0x3
+	u_int8_t		 keep_state;
+	sa_family_t		 af;
+	u_int8_t		 proto;
+	u_int8_t		 type;
+	u_int8_t		 code;
+	u_int8_t		 flags;
+	u_int8_t		 flagset;
+	u_int8_t		 min_ttl;
+	u_int8_t		 allow_opts;
+	u_int8_t		 rt;
+	u_int8_t		 return_ttl;
+	u_int8_t		 tos;
+	u_int8_t		 set_tos;
+	u_int8_t		 anchor_relative;
+	u_int8_t		 anchor_wildcard;
+
+#define PF_FLUSH		0x01
+#define PF_FLUSH_GLOBAL		0x02
+	u_int8_t		 flush;
+#define PF_PRIO_ZERO		0xff		/* match "prio 0" packets */
+#define PF_PRIO_MAX		7
+	u_int8_t		 prio;
+	u_int8_t		 set_prio[2];
+
+	struct {
+		struct pf_addr		addr;
+		u_int16_t		port;
+	}			divert;
+
+	uint64_t		 u_states_cur;
+	uint64_t		 u_states_tot;
+	uint64_t		 u_src_nodes;
+};
+
+/* rule flags */
+#define	PFRULE_DROP		0x0000
+#define	PFRULE_RETURNRST	0x0001
+#define	PFRULE_FRAGMENT		0x0002
+#define	PFRULE_RETURNICMP	0x0004
+#define	PFRULE_RETURN		0x0008
+#define	PFRULE_NOSYNC		0x0010
+#define PFRULE_SRCTRACK		0x0020  /* track source states */
+#define PFRULE_RULESRCTRACK	0x0040  /* per rule */
+#define	PFRULE_REFS		0x0080	/* rule has references */
+
+/* scrub flags */
+#define	PFRULE_NODF		0x0100
+#define PFRULE_RANDOMID		0x0800
+#define PFRULE_REASSEMBLE_TCP	0x1000
+#define PFRULE_SET_TOS		0x2000
+
+/* rule flags again */
+#define PFRULE_IFBOUND		0x00010000	/* if-bound */
+#define PFRULE_STATESLOPPY	0x00020000	/* sloppy state tracking */
+
+#define PFSTATE_HIWAT		10000	/* default state table size */
+#define PFSTATE_ADAPT_START	6000	/* default adaptive timeout start */
+#define PFSTATE_ADAPT_END	12000	/* default adaptive timeout end */
+
+
+struct pf_threshold {
+	u_int32_t	limit;
+#define	PF_THRESHOLD_MULT	1000
+#define PF_THRESHOLD_MAX	0xffffffff / PF_THRESHOLD_MULT
+	u_int32_t	seconds;
+	u_int32_t	count;
+	u_int32_t	last;
+};
+
+struct pf_src_node {
+	LIST_ENTRY(pf_src_node) entry;
+	struct pf_addr	 addr;
+	struct pf_addr	 raddr;
+	union pf_rule_ptr rule;
+	struct pfi_kif	*kif;
+	u_int64_t	 bytes[2];
+	u_int64_t	 packets[2];
+	u_int32_t	 states;
+	u_int32_t	 conn;
+	struct pf_threshold	conn_rate;
+	u_int32_t	 creation;
+	u_int32_t	 expire;
+	sa_family_t	 af;
+	u_int8_t	 ruletype;
+};
+
+#define PFSNODE_HIWAT		10000	/* default source node table size */
+
+struct pf_state_scrub {
+	struct timeval	pfss_last;	/* time received last packet	*/
+	u_int32_t	pfss_tsecr;	/* last echoed timestamp	*/
+	u_int32_t	pfss_tsval;	/* largest timestamp		*/
+	u_int32_t	pfss_tsval0;	/* original timestamp		*/
+	u_int16_t	pfss_flags;
+#define PFSS_TIMESTAMP	0x0001		/* modulate timestamp		*/
+#define PFSS_PAWS	0x0010		/* stricter PAWS checks		*/
+#define PFSS_PAWS_IDLED	0x0020		/* was idle too long.  no PAWS	*/
+#define PFSS_DATA_TS	0x0040		/* timestamp on data packets	*/
+#define PFSS_DATA_NOTS	0x0080		/* no timestamp on data packets	*/
+	u_int8_t	pfss_ttl;	/* stashed TTL			*/
+	u_int8_t	pad;
+	u_int32_t	pfss_ts_mod;	/* timestamp modulation		*/
+};
+
+struct pf_state_host {
+	struct pf_addr	addr;
+	u_int16_t	port;
+	u_int16_t	pad;
+};
+
+struct pf_state_peer {
+	struct pf_state_scrub	*scrub;	/* state is scrubbed		*/
+	u_int32_t	seqlo;		/* Max sequence number sent	*/
+	u_int32_t	seqhi;		/* Max the other end ACKd + win	*/
+	u_int32_t	seqdiff;	/* Sequence number modulator	*/
+	u_int16_t	max_win;	/* largest window (pre scaling)	*/
+	u_int16_t	mss;		/* Maximum segment size option	*/
+	u_int8_t	state;		/* active state level		*/
+	u_int8_t	wscale;		/* window scaling factor	*/
+	u_int8_t	tcp_est;	/* Did we reach TCPS_ESTABLISHED */
+	u_int8_t	pad[1];
+};
+
+/* Keep synced with struct pf_state_key. */
+struct pf_state_key_cmp {
+	struct pf_addr	 addr[2];
+	u_int16_t	 port[2];
+	sa_family_t	 af;
+	u_int8_t	 proto;
+	u_int8_t	 pad[2];
+};
+
+struct pf_state_key {
+	struct pf_addr	 addr[2];
+	u_int16_t	 port[2];
+	sa_family_t	 af;
+	u_int8_t	 proto;
+	u_int8_t	 pad[2];
+
+	LIST_ENTRY(pf_state_key) entry;
+	TAILQ_HEAD(, pf_state)	 states[2];
+};
+
+/* Keep synced with struct pf_state. */
+struct pf_state_cmp {
+	u_int64_t		 id;
+	u_int32_t		 creatorid;
+	u_int8_t		 direction;
+	u_int8_t		 pad[3];
+};
+
+struct pf_state {
+	u_int64_t		 id;
+	u_int32_t		 creatorid;
+	u_int8_t		 direction;
+	u_int8_t		 pad[3];
+
+	u_int			 refs;
+	TAILQ_ENTRY(pf_state)	 sync_list;
+	TAILQ_ENTRY(pf_state)	 key_list[2];
+	LIST_ENTRY(pf_state)	 entry;
+	struct pf_state_peer	 src;
+	struct pf_state_peer	 dst;
+	union pf_rule_ptr	 rule;
+	union pf_rule_ptr	 anchor;
+	union pf_rule_ptr	 nat_rule;
+	struct pf_addr		 rt_addr;
+	struct pf_state_key	*key[2];	/* addresses stack and wire  */
+	struct pfi_kif		*kif;
+	struct pfi_kif		*rt_kif;
+	struct pf_src_node	*src_node;
+	struct pf_src_node	*nat_src_node;
+	u_int64_t		 packets[2];
+	u_int64_t		 bytes[2];
+	u_int32_t		 creation;
+	u_int32_t	 	 expire;
+	u_int32_t		 pfsync_time;
+	u_int16_t		 tag;
+	u_int8_t		 log;
+	u_int8_t		 state_flags;
+#define	PFSTATE_ALLOWOPTS	0x01
+#define	PFSTATE_SLOPPY		0x02
+/*  was	PFSTATE_PFLOW		0x04 */
+#define	PFSTATE_NOSYNC		0x08
+#define	PFSTATE_ACK		0x10
+#define	PFSTATE_SETPRIO		0x0200
+#define	PFSTATE_SETMASK   (PFSTATE_SETPRIO)
+	u_int8_t		 timeout;
+	u_int8_t		 sync_state; /* PFSYNC_S_x */
+
+	/* XXX */
+	u_int8_t		 sync_updates;
+	u_int8_t		_tail[3];
+};
+
+/*
+ * Unified state structures for pulling states out of the kernel
+ * used by pfsync(4) and the pf(4) ioctl.
+ */
+struct pfsync_state_scrub {
+	u_int16_t	pfss_flags;
+	u_int8_t	pfss_ttl;	/* stashed TTL		*/
+#define PFSYNC_SCRUB_FLAG_VALID		0x01
+	u_int8_t	scrub_flag;
+	u_int32_t	pfss_ts_mod;	/* timestamp modulation	*/
+} __packed;
+
+struct pfsync_state_peer {
+	struct pfsync_state_scrub scrub;	/* state is scrubbed	*/
+	u_int32_t	seqlo;		/* Max sequence number sent	*/
+	u_int32_t	seqhi;		/* Max the other end ACKd + win	*/
+	u_int32_t	seqdiff;	/* Sequence number modulator	*/
+	u_int16_t	max_win;	/* largest window (pre scaling)	*/
+	u_int16_t	mss;		/* Maximum segment size option	*/
+	u_int8_t	state;		/* active state level		*/
+	u_int8_t	wscale;		/* window scaling factor	*/
+	u_int8_t	pad[6];
+} __packed;
+
+struct pfsync_state_key {
+	struct pf_addr	 addr[2];
+	u_int16_t	 port[2];
+};
+
+struct pfsync_state {
+	u_int64_t	 id;
+	char		 ifname[IFNAMSIZ];
+	struct pfsync_state_key	key[2];
+	struct pfsync_state_peer src;
+	struct pfsync_state_peer dst;
+	struct pf_addr	 rt_addr;
+	u_int32_t	 rule;
+	u_int32_t	 anchor;
+	u_int32_t	 nat_rule;
+	u_int32_t	 creation;
+	u_int32_t	 expire;
+	u_int32_t	 packets[2][2];
+	u_int32_t	 bytes[2][2];
+	u_int32_t	 creatorid;
+	sa_family_t	 af;
+	u_int8_t	 proto;
+	u_int8_t	 direction;
+	u_int8_t	 __spare[2];
+	u_int8_t	 log;
+	u_int8_t	 state_flags;
+	u_int8_t	 timeout;
+	u_int8_t	 sync_flags;
+	u_int8_t	 updates;
+} __packed;
+
+#ifdef _KERNEL
+/* pfsync */
+typedef int		pfsync_state_import_t(struct pfsync_state *, u_int8_t);
+typedef	void		pfsync_insert_state_t(struct pf_state *);
+typedef	void		pfsync_update_state_t(struct pf_state *);
+typedef	void		pfsync_delete_state_t(struct pf_state *);
+typedef void		pfsync_clear_states_t(u_int32_t, const char *);
+typedef int		pfsync_defer_t(struct pf_state *, struct mbuf *);
+
+extern pfsync_state_import_t	*pfsync_state_import_ptr;
+extern pfsync_insert_state_t	*pfsync_insert_state_ptr;
+extern pfsync_update_state_t	*pfsync_update_state_ptr;
+extern pfsync_delete_state_t	*pfsync_delete_state_ptr;
+extern pfsync_clear_states_t	*pfsync_clear_states_ptr;
+extern pfsync_defer_t		*pfsync_defer_ptr;
+
+void			pfsync_state_export(struct pfsync_state *,
+			    struct pf_state *);
+
+/* pflog */
+struct pf_ruleset;
+struct pf_pdesc;
+typedef int pflog_packet_t(struct pfi_kif *, struct mbuf *, sa_family_t,
+    u_int8_t, u_int8_t, struct pf_rule *, struct pf_rule *,
+    struct pf_ruleset *, struct pf_pdesc *, int);
+extern pflog_packet_t		*pflog_packet_ptr;
+
+#endif /* _KERNEL */
+
+#define	PFSYNC_FLAG_SRCNODE	0x04
+#define	PFSYNC_FLAG_NATSRCNODE	0x08
+
+/* for copies to/from network byte order */
+/* ioctl interface also uses network byte order */
+#define pf_state_peer_hton(s,d) do {		\
+	(d)->seqlo = htonl((s)->seqlo);		\
+	(d)->seqhi = htonl((s)->seqhi);		\
+	(d)->seqdiff = htonl((s)->seqdiff);	\
+	(d)->max_win = htons((s)->max_win);	\
+	(d)->mss = htons((s)->mss);		\
+	(d)->state = (s)->state;		\
+	(d)->wscale = (s)->wscale;		\
+	if ((s)->scrub) {						\
+		(d)->scrub.pfss_flags = 				\
+		    htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP);	\
+		(d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl;		\
+		(d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\
+		(d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID;	\
+	}								\
+} while (0)
+
+#define pf_state_peer_ntoh(s,d) do {		\
+	(d)->seqlo = ntohl((s)->seqlo);		\
+	(d)->seqhi = ntohl((s)->seqhi);		\
+	(d)->seqdiff = ntohl((s)->seqdiff);	\
+	(d)->max_win = ntohs((s)->max_win);	\
+	(d)->mss = ntohs((s)->mss);		\
+	(d)->state = (s)->state;		\
+	(d)->wscale = (s)->wscale;		\
+	if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && 	\
+	    (d)->scrub != NULL) {					\
+		(d)->scrub->pfss_flags =				\
+		    ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP;	\
+		(d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl;		\
+		(d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\
+	}								\
+} while (0)
+
+#define pf_state_counter_hton(s,d) do {				\
+	d[0] = htonl((s>>32)&0xffffffff);			\
+	d[1] = htonl(s&0xffffffff);				\
+} while (0)
+
+#define pf_state_counter_from_pfsync(s)				\
+	(((u_int64_t)(s[0])<<32) | (u_int64_t)(s[1]))
+
+#define pf_state_counter_ntoh(s,d) do {				\
+	d = ntohl(s[0]);					\
+	d = d<<32;						\
+	d += ntohl(s[1]);					\
+} while (0)
+
+TAILQ_HEAD(pf_rulequeue, pf_rule);
+
+struct pf_anchor;
+
+struct pf_ruleset {
+	struct {
+		struct pf_rulequeue	 queues[2];
+		struct {
+			struct pf_rulequeue	*ptr;
+			struct pf_rule		**ptr_array;
+			u_int32_t		 rcount;
+			u_int32_t		 ticket;
+			int			 open;
+		}			 active, inactive;
+	}			 rules[PF_RULESET_MAX];
+	struct pf_anchor	*anchor;
+	u_int32_t		 tticket;
+	int			 tables;
+	int			 topen;
+};
+
+RB_HEAD(pf_anchor_global, pf_anchor);
+RB_HEAD(pf_anchor_node, pf_anchor);
+struct pf_anchor {
+	RB_ENTRY(pf_anchor)	 entry_global;
+	RB_ENTRY(pf_anchor)	 entry_node;
+	struct pf_anchor	*parent;
+	struct pf_anchor_node	 children;
+	char			 name[PF_ANCHOR_NAME_SIZE];
+	char			 path[MAXPATHLEN];
+	struct pf_ruleset	 ruleset;
+	int			 refcnt;	/* anchor rules */
+	int			 match;	/* XXX: used for pfctl black magic */
+};
+RB_PROTOTYPE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare);
+RB_PROTOTYPE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare);
+
+#define PF_RESERVED_ANCHOR	"_pf"
+
+#define PFR_TFLAG_PERSIST	0x00000001
+#define PFR_TFLAG_CONST		0x00000002
+#define PFR_TFLAG_ACTIVE	0x00000004
+#define PFR_TFLAG_INACTIVE	0x00000008
+#define PFR_TFLAG_REFERENCED	0x00000010
+#define PFR_TFLAG_REFDANCHOR	0x00000020
+#define PFR_TFLAG_COUNTERS	0x00000040
+/* Adjust masks below when adding flags. */
+#define PFR_TFLAG_USRMASK	(PFR_TFLAG_PERSIST	| \
+				 PFR_TFLAG_CONST	| \
+				 PFR_TFLAG_COUNTERS)
+#define PFR_TFLAG_SETMASK	(PFR_TFLAG_ACTIVE	| \
+				 PFR_TFLAG_INACTIVE	| \
+				 PFR_TFLAG_REFERENCED	| \
+				 PFR_TFLAG_REFDANCHOR)
+#define PFR_TFLAG_ALLMASK	(PFR_TFLAG_PERSIST	| \
+				 PFR_TFLAG_CONST	| \
+				 PFR_TFLAG_ACTIVE	| \
+				 PFR_TFLAG_INACTIVE	| \
+				 PFR_TFLAG_REFERENCED	| \
+				 PFR_TFLAG_REFDANCHOR	| \
+				 PFR_TFLAG_COUNTERS)
+
+struct pf_anchor_stackframe;
+
+struct pfr_table {
+	char			 pfrt_anchor[MAXPATHLEN];
+	char			 pfrt_name[PF_TABLE_NAME_SIZE];
+	u_int32_t		 pfrt_flags;
+	u_int8_t		 pfrt_fback;
+};
+
+enum { PFR_FB_NONE, PFR_FB_MATCH, PFR_FB_ADDED, PFR_FB_DELETED,
+	PFR_FB_CHANGED, PFR_FB_CLEARED, PFR_FB_DUPLICATE,
+	PFR_FB_NOTMATCH, PFR_FB_CONFLICT, PFR_FB_NOCOUNT, PFR_FB_MAX };
+
+struct pfr_addr {
+	union {
+		struct in_addr	 _pfra_ip4addr;
+		struct in6_addr	 _pfra_ip6addr;
+	}		 pfra_u;
+	u_int8_t	 pfra_af;
+	u_int8_t	 pfra_net;
+	u_int8_t	 pfra_not;
+	u_int8_t	 pfra_fback;
+};
+#define	pfra_ip4addr	pfra_u._pfra_ip4addr
+#define	pfra_ip6addr	pfra_u._pfra_ip6addr
+
+enum { PFR_DIR_IN, PFR_DIR_OUT, PFR_DIR_MAX };
+enum { PFR_OP_BLOCK, PFR_OP_PASS, PFR_OP_ADDR_MAX, PFR_OP_TABLE_MAX };
+#define PFR_OP_XPASS	PFR_OP_ADDR_MAX
+
+struct pfr_astats {
+	struct pfr_addr	 pfras_a;
+	u_int64_t	 pfras_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+	u_int64_t	 pfras_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+	long		 pfras_tzero;
+};
+
+enum { PFR_REFCNT_RULE, PFR_REFCNT_ANCHOR, PFR_REFCNT_MAX };
+
+struct pfr_tstats {
+	struct pfr_table pfrts_t;
+	u_int64_t	 pfrts_packets[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
+	u_int64_t	 pfrts_bytes[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
+	u_int64_t	 pfrts_match;
+	u_int64_t	 pfrts_nomatch;
+	long		 pfrts_tzero;
+	int		 pfrts_cnt;
+	int		 pfrts_refcnt[PFR_REFCNT_MAX];
+};
+#define	pfrts_name	pfrts_t.pfrt_name
+#define pfrts_flags	pfrts_t.pfrt_flags
+
+#ifndef _SOCKADDR_UNION_DEFINED
+#define	_SOCKADDR_UNION_DEFINED
+union sockaddr_union {
+	struct sockaddr		sa;
+	struct sockaddr_in	sin;
+	struct sockaddr_in6	sin6;
+};
+#endif /* _SOCKADDR_UNION_DEFINED */
+
+struct pfr_kcounters {
+	u_int64_t		 pfrkc_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+	u_int64_t		 pfrkc_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+};
+
+SLIST_HEAD(pfr_kentryworkq, pfr_kentry);
+struct pfr_kentry {
+	struct radix_node	 pfrke_node[2];
+	union sockaddr_union	 pfrke_sa;
+	SLIST_ENTRY(pfr_kentry)	 pfrke_workq;
+	struct pfr_kcounters	*pfrke_counters;
+	long			 pfrke_tzero;
+	u_int8_t		 pfrke_af;
+	u_int8_t		 pfrke_net;
+	u_int8_t		 pfrke_not;
+	u_int8_t		 pfrke_mark;
+};
+
+SLIST_HEAD(pfr_ktableworkq, pfr_ktable);
+RB_HEAD(pfr_ktablehead, pfr_ktable);
+struct pfr_ktable {
+	struct pfr_tstats	 pfrkt_ts;
+	RB_ENTRY(pfr_ktable)	 pfrkt_tree;
+	SLIST_ENTRY(pfr_ktable)	 pfrkt_workq;
+	struct radix_node_head	*pfrkt_ip4;
+	struct radix_node_head	*pfrkt_ip6;
+	struct pfr_ktable	*pfrkt_shadow;
+	struct pfr_ktable	*pfrkt_root;
+	struct pf_ruleset	*pfrkt_rs;
+	long			 pfrkt_larg;
+	int			 pfrkt_nflags;
+};
+#define pfrkt_t		pfrkt_ts.pfrts_t
+#define pfrkt_name	pfrkt_t.pfrt_name
+#define pfrkt_anchor	pfrkt_t.pfrt_anchor
+#define pfrkt_ruleset	pfrkt_t.pfrt_ruleset
+#define pfrkt_flags	pfrkt_t.pfrt_flags
+#define pfrkt_cnt	pfrkt_ts.pfrts_cnt
+#define pfrkt_refcnt	pfrkt_ts.pfrts_refcnt
+#define pfrkt_packets	pfrkt_ts.pfrts_packets
+#define pfrkt_bytes	pfrkt_ts.pfrts_bytes
+#define pfrkt_match	pfrkt_ts.pfrts_match
+#define pfrkt_nomatch	pfrkt_ts.pfrts_nomatch
+#define pfrkt_tzero	pfrkt_ts.pfrts_tzero
+
+/* keep synced with pfi_kif, used in RB_FIND */
+struct pfi_kif_cmp {
+	char				 pfik_name[IFNAMSIZ];
+};
+
+struct pfi_kif {
+	char				 pfik_name[IFNAMSIZ];
+	union {
+		RB_ENTRY(pfi_kif)	 _pfik_tree;
+		LIST_ENTRY(pfi_kif)	 _pfik_list;
+	} _pfik_glue;
+#define	pfik_tree	_pfik_glue._pfik_tree
+#define	pfik_list	_pfik_glue._pfik_list
+	u_int64_t			 pfik_packets[2][2][2];
+	u_int64_t			 pfik_bytes[2][2][2];
+	u_int32_t			 pfik_tzero;
+	u_int				 pfik_flags;
+	struct ifnet			*pfik_ifp;
+	struct ifg_group		*pfik_group;
+	u_int				 pfik_rulerefs;
+	TAILQ_HEAD(, pfi_dynaddr)	 pfik_dynaddrs;
+};
+
+#define	PFI_IFLAG_REFS		0x0001	/* has state references */
+#define PFI_IFLAG_SKIP		0x0100	/* skip filtering on interface */
+
+struct pf_pdesc {
+	struct {
+		int	 done;
+		uid_t	 uid;
+		gid_t	 gid;
+	}		 lookup;
+	u_int64_t	 tot_len;	/* Make Mickey money */
+	union {
+		struct tcphdr		*tcp;
+		struct udphdr		*udp;
+		struct icmp		*icmp;
+#ifdef INET6
+		struct icmp6_hdr	*icmp6;
+#endif /* INET6 */
+		void			*any;
+	} hdr;
+
+	struct pf_rule	*nat_rule;	/* nat/rdr rule applied to packet */
+	struct pf_addr	*src;		/* src address */
+	struct pf_addr	*dst;		/* dst address */
+	u_int16_t *sport;
+	u_int16_t *dport;
+	struct pf_mtag	*pf_mtag;
+
+	u_int32_t	 p_len;		/* total length of payload */
+
+	u_int16_t	*ip_sum;
+	u_int16_t	*proto_sum;
+	u_int16_t	 flags;		/* Let SCRUB trigger behavior in
+					 * state code. Easier than tags */
+#define PFDESC_TCP_NORM	0x0001		/* TCP shall be statefully scrubbed */
+#define PFDESC_IP_REAS	0x0002		/* IP frags would've been reassembled */
+	sa_family_t	 af;
+	u_int8_t	 proto;
+	u_int8_t	 tos;
+	u_int8_t	 dir;		/* direction */
+	u_int8_t	 sidx;		/* key index for source */
+	u_int8_t	 didx;		/* key index for destination */
+};
+
+/* flags for RDR options */
+#define PF_DPORT_RANGE	0x01		/* Dest port uses range */
+#define PF_RPORT_RANGE	0x02		/* RDR'ed port uses range */
+
+/* UDP state enumeration */
+#define PFUDPS_NO_TRAFFIC	0
+#define PFUDPS_SINGLE		1
+#define PFUDPS_MULTIPLE		2
+
+#define PFUDPS_NSTATES		3	/* number of state levels */
+
+#define PFUDPS_NAMES { \
+	"NO_TRAFFIC", \
+	"SINGLE", \
+	"MULTIPLE", \
+	NULL \
+}
+
+/* Other protocol state enumeration */
+#define PFOTHERS_NO_TRAFFIC	0
+#define PFOTHERS_SINGLE		1
+#define PFOTHERS_MULTIPLE	2
+
+#define PFOTHERS_NSTATES	3	/* number of state levels */
+
+#define PFOTHERS_NAMES { \
+	"NO_TRAFFIC", \
+	"SINGLE", \
+	"MULTIPLE", \
+	NULL \
+}
+
+#define ACTION_SET(a, x) \
+	do { \
+		if ((a) != NULL) \
+			*(a) = (x); \
+	} while (0)
+
+#define REASON_SET(a, x) \
+	do { \
+		if ((a) != NULL) \
+			*(a) = (x); \
+		if (x < PFRES_MAX) \
+			counter_u64_add(V_pf_status.counters[x], 1); \
+	} while (0)
+
+struct pf_kstatus {
+	counter_u64_t	counters[PFRES_MAX]; /* reason for passing/dropping */
+	counter_u64_t	lcounters[LCNT_MAX]; /* limit counters */
+	counter_u64_t	fcounters[FCNT_MAX]; /* state operation counters */
+	counter_u64_t	scounters[SCNT_MAX]; /* src_node operation counters */
+	uint32_t	states;
+	uint32_t	src_nodes;
+	uint32_t	running;
+	uint32_t	since;
+	uint32_t	debug;
+	uint32_t	hostid;
+	char		ifname[IFNAMSIZ];
+	uint8_t		pf_chksum[PF_MD5_DIGEST_LENGTH];
+};
+
+struct pf_divert {
+	union {
+		struct in_addr	ipv4;
+		struct in6_addr	ipv6;
+	}		addr;
+	u_int16_t	port;
+};
+
+#define PFFRAG_FRENT_HIWAT	5000	/* Number of fragment entries */
+#define PFR_KENTRY_HIWAT	200000	/* Number of table entries */
+
+/*
+ * ioctl parameter structures
+ */
+
+struct pfioc_pooladdr {
+	u_int32_t		 action;
+	u_int32_t		 ticket;
+	u_int32_t		 nr;
+	u_int32_t		 r_num;
+	u_int8_t		 r_action;
+	u_int8_t		 r_last;
+	u_int8_t		 af;
+	char			 anchor[MAXPATHLEN];
+	struct pf_pooladdr	 addr;
+};
+
+struct pfioc_rule {
+	u_int32_t	 action;
+	u_int32_t	 ticket;
+	u_int32_t	 pool_ticket;
+	u_int32_t	 nr;
+	char		 anchor[MAXPATHLEN];
+	char		 anchor_call[MAXPATHLEN];
+	struct pf_rule	 rule;
+};
+
+struct pfioc_natlook {
+	struct pf_addr	 saddr;
+	struct pf_addr	 daddr;
+	struct pf_addr	 rsaddr;
+	struct pf_addr	 rdaddr;
+	u_int16_t	 sport;
+	u_int16_t	 dport;
+	u_int16_t	 rsport;
+	u_int16_t	 rdport;
+	sa_family_t	 af;
+	u_int8_t	 proto;
+	u_int8_t	 direction;
+};
+
+struct pfioc_state {
+	struct pfsync_state	state;
+};
+
+struct pfioc_src_node_kill {
+	sa_family_t psnk_af;
+	struct pf_rule_addr psnk_src;
+	struct pf_rule_addr psnk_dst;
+	u_int		    psnk_killed;
+};
+
+struct pfioc_state_kill {
+	struct pf_state_cmp	psk_pfcmp;
+	sa_family_t		psk_af;
+	int			psk_proto;
+	struct pf_rule_addr	psk_src;
+	struct pf_rule_addr	psk_dst;
+	char			psk_ifname[IFNAMSIZ];
+	char			psk_label[PF_RULE_LABEL_SIZE];
+	u_int			psk_killed;
+};
+
+struct pfioc_states {
+	int	ps_len;
+	union {
+		caddr_t			 psu_buf;
+		struct pfsync_state	*psu_states;
+	} ps_u;
+#define ps_buf		ps_u.psu_buf
+#define ps_states	ps_u.psu_states
+};
+
+struct pfioc_src_nodes {
+	int	psn_len;
+	union {
+		caddr_t		 psu_buf;
+		struct pf_src_node	*psu_src_nodes;
+	} psn_u;
+#define psn_buf		psn_u.psu_buf
+#define psn_src_nodes	psn_u.psu_src_nodes
+};
+
+struct pfioc_if {
+	char		 ifname[IFNAMSIZ];
+};
+
+struct pfioc_tm {
+	int		 timeout;
+	int		 seconds;
+};
+
+struct pfioc_limit {
+	int		 index;
+	unsigned	 limit;
+};
+
+struct pfioc_altq {
+	u_int32_t	 action;
+	u_int32_t	 ticket;
+	u_int32_t	 nr;
+	struct pf_altq	 altq;
+};
+
+struct pfioc_qstats {
+	u_int32_t	 ticket;
+	u_int32_t	 nr;
+	void		*buf;
+	int		 nbytes;
+	u_int8_t	 scheduler;
+};
+
+struct pfioc_ruleset {
+	u_int32_t	 nr;
+	char		 path[MAXPATHLEN];
+	char		 name[PF_ANCHOR_NAME_SIZE];
+};
+
+#define PF_RULESET_ALTQ		(PF_RULESET_MAX)
+#define PF_RULESET_TABLE	(PF_RULESET_MAX+1)
+struct pfioc_trans {
+	int		 size;	/* number of elements */
+	int		 esize; /* size of each element in bytes */
+	struct pfioc_trans_e {
+		int		rs_num;
+		char		anchor[MAXPATHLEN];
+		u_int32_t	ticket;
+	}		*array;
+};
+
+#define PFR_FLAG_ATOMIC		0x00000001	/* unused */
+#define PFR_FLAG_DUMMY		0x00000002
+#define PFR_FLAG_FEEDBACK	0x00000004
+#define PFR_FLAG_CLSTATS	0x00000008
+#define PFR_FLAG_ADDRSTOO	0x00000010
+#define PFR_FLAG_REPLACE	0x00000020
+#define PFR_FLAG_ALLRSETS	0x00000040
+#define PFR_FLAG_ALLMASK	0x0000007F
+#ifdef _KERNEL
+#define PFR_FLAG_USERIOCTL	0x10000000
+#endif
+
+struct pfioc_table {
+	struct pfr_table	 pfrio_table;
+	void			*pfrio_buffer;
+	int			 pfrio_esize;
+	int			 pfrio_size;
+	int			 pfrio_size2;
+	int			 pfrio_nadd;
+	int			 pfrio_ndel;
+	int			 pfrio_nchange;
+	int			 pfrio_flags;
+	u_int32_t		 pfrio_ticket;
+};
+#define	pfrio_exists	pfrio_nadd
+#define	pfrio_nzero	pfrio_nadd
+#define	pfrio_nmatch	pfrio_nadd
+#define pfrio_naddr	pfrio_size2
+#define pfrio_setflag	pfrio_size2
+#define pfrio_clrflag	pfrio_nadd
+
+struct pfioc_iface {
+	char	 pfiio_name[IFNAMSIZ];
+	void	*pfiio_buffer;
+	int	 pfiio_esize;
+	int	 pfiio_size;
+	int	 pfiio_nzero;
+	int	 pfiio_flags;
+};
+
+
+/*
+ * ioctl operations
+ */
+
+#define DIOCSTART	_IO  ('D',  1)
+#define DIOCSTOP	_IO  ('D',  2)
+#define DIOCADDRULE	_IOWR('D',  4, struct pfioc_rule)
+#define DIOCGETRULES	_IOWR('D',  6, struct pfioc_rule)
+#define DIOCGETRULE	_IOWR('D',  7, struct pfioc_rule)
+/* XXX cut 8 - 17 */
+#define DIOCCLRSTATES	_IOWR('D', 18, struct pfioc_state_kill)
+#define DIOCGETSTATE	_IOWR('D', 19, struct pfioc_state)
+#define DIOCSETSTATUSIF _IOWR('D', 20, struct pfioc_if)
+#define DIOCGETSTATUS	_IOWR('D', 21, struct pf_status)
+#define DIOCCLRSTATUS	_IO  ('D', 22)
+#define DIOCNATLOOK	_IOWR('D', 23, struct pfioc_natlook)
+#define DIOCSETDEBUG	_IOWR('D', 24, u_int32_t)
+#define DIOCGETSTATES	_IOWR('D', 25, struct pfioc_states)
+#define DIOCCHANGERULE	_IOWR('D', 26, struct pfioc_rule)
+/* XXX cut 26 - 28 */
+#define DIOCSETTIMEOUT	_IOWR('D', 29, struct pfioc_tm)
+#define DIOCGETTIMEOUT	_IOWR('D', 30, struct pfioc_tm)
+#define DIOCADDSTATE	_IOWR('D', 37, struct pfioc_state)
+#define DIOCCLRRULECTRS	_IO  ('D', 38)
+#define DIOCGETLIMIT	_IOWR('D', 39, struct pfioc_limit)
+#define DIOCSETLIMIT	_IOWR('D', 40, struct pfioc_limit)
+#define DIOCKILLSTATES	_IOWR('D', 41, struct pfioc_state_kill)
+#define DIOCSTARTALTQ	_IO  ('D', 42)
+#define DIOCSTOPALTQ	_IO  ('D', 43)
+#define DIOCADDALTQ	_IOWR('D', 45, struct pfioc_altq)
+#define DIOCGETALTQS	_IOWR('D', 47, struct pfioc_altq)
+#define DIOCGETALTQ	_IOWR('D', 48, struct pfioc_altq)
+#define DIOCCHANGEALTQ	_IOWR('D', 49, struct pfioc_altq)
+#define DIOCGETQSTATS	_IOWR('D', 50, struct pfioc_qstats)
+#define DIOCBEGINADDRS	_IOWR('D', 51, struct pfioc_pooladdr)
+#define DIOCADDADDR	_IOWR('D', 52, struct pfioc_pooladdr)
+#define DIOCGETADDRS	_IOWR('D', 53, struct pfioc_pooladdr)
+#define DIOCGETADDR	_IOWR('D', 54, struct pfioc_pooladdr)
+#define DIOCCHANGEADDR	_IOWR('D', 55, struct pfioc_pooladdr)
+/* XXX cut 55 - 57 */
+#define	DIOCGETRULESETS	_IOWR('D', 58, struct pfioc_ruleset)
+#define	DIOCGETRULESET	_IOWR('D', 59, struct pfioc_ruleset)
+#define	DIOCRCLRTABLES	_IOWR('D', 60, struct pfioc_table)
+#define	DIOCRADDTABLES	_IOWR('D', 61, struct pfioc_table)
+#define	DIOCRDELTABLES	_IOWR('D', 62, struct pfioc_table)
+#define	DIOCRGETTABLES	_IOWR('D', 63, struct pfioc_table)
+#define	DIOCRGETTSTATS	_IOWR('D', 64, struct pfioc_table)
+#define DIOCRCLRTSTATS	_IOWR('D', 65, struct pfioc_table)
+#define	DIOCRCLRADDRS	_IOWR('D', 66, struct pfioc_table)
+#define	DIOCRADDADDRS	_IOWR('D', 67, struct pfioc_table)
+#define	DIOCRDELADDRS	_IOWR('D', 68, struct pfioc_table)
+#define	DIOCRSETADDRS	_IOWR('D', 69, struct pfioc_table)
+#define	DIOCRGETADDRS	_IOWR('D', 70, struct pfioc_table)
+#define	DIOCRGETASTATS	_IOWR('D', 71, struct pfioc_table)
+#define	DIOCRCLRASTATS	_IOWR('D', 72, struct pfioc_table)
+#define	DIOCRTSTADDRS	_IOWR('D', 73, struct pfioc_table)
+#define	DIOCRSETTFLAGS	_IOWR('D', 74, struct pfioc_table)
+#define	DIOCRINADEFINE	_IOWR('D', 77, struct pfioc_table)
+#define	DIOCOSFPFLUSH	_IO('D', 78)
+#define	DIOCOSFPADD	_IOWR('D', 79, struct pf_osfp_ioctl)
+#define	DIOCOSFPGET	_IOWR('D', 80, struct pf_osfp_ioctl)
+#define	DIOCXBEGIN	_IOWR('D', 81, struct pfioc_trans)
+#define	DIOCXCOMMIT	_IOWR('D', 82, struct pfioc_trans)
+#define	DIOCXROLLBACK	_IOWR('D', 83, struct pfioc_trans)
+#define	DIOCGETSRCNODES	_IOWR('D', 84, struct pfioc_src_nodes)
+#define	DIOCCLRSRCNODES	_IO('D', 85)
+#define	DIOCSETHOSTID	_IOWR('D', 86, u_int32_t)
+#define	DIOCIGETIFACES	_IOWR('D', 87, struct pfioc_iface)
+#define	DIOCSETIFFLAG	_IOWR('D', 89, struct pfioc_iface)
+#define	DIOCCLRIFFLAG	_IOWR('D', 90, struct pfioc_iface)
+#define	DIOCKILLSRCNODES	_IOWR('D', 91, struct pfioc_src_node_kill)
+struct pf_ifspeed {
+	char			ifname[IFNAMSIZ];
+	u_int32_t		baudrate;
+};
+#define	DIOCGIFSPEED	_IOWR('D', 92, struct pf_ifspeed)
+
+#ifdef _KERNEL
+LIST_HEAD(pf_src_node_list, pf_src_node);
+struct pf_srchash {
+	struct pf_src_node_list		nodes;
+	struct mtx			lock;
+};
+
+struct pf_keyhash {
+	LIST_HEAD(, pf_state_key)	keys;
+	struct mtx			lock;
+};
+
+struct pf_idhash {
+	LIST_HEAD(, pf_state)		states;
+	struct mtx			lock;
+};
+
+extern u_long		pf_hashmask;
+extern u_long		pf_srchashmask;
+#define	PF_HASHSIZ	(32768)
+VNET_DECLARE(struct pf_keyhash *, pf_keyhash);
+VNET_DECLARE(struct pf_idhash *, pf_idhash);
+#define V_pf_keyhash	VNET(pf_keyhash)
+#define	V_pf_idhash	VNET(pf_idhash)
+VNET_DECLARE(struct pf_srchash *, pf_srchash);
+#define	V_pf_srchash	VNET(pf_srchash)
+
+#define PF_IDHASH(s)	(be64toh((s)->id) % (pf_hashmask + 1))
+
+VNET_DECLARE(void *, pf_swi_cookie);
+#define V_pf_swi_cookie	VNET(pf_swi_cookie)
+
+VNET_DECLARE(uint64_t, pf_stateid[MAXCPU]);
+#define	V_pf_stateid	VNET(pf_stateid)
+
+TAILQ_HEAD(pf_altqqueue, pf_altq);
+VNET_DECLARE(struct pf_altqqueue,	 pf_altqs[2]);
+#define	V_pf_altqs			 VNET(pf_altqs)
+VNET_DECLARE(struct pf_palist,		 pf_pabuf);
+#define	V_pf_pabuf			 VNET(pf_pabuf)
+
+VNET_DECLARE(u_int32_t,			 ticket_altqs_active);
+#define	V_ticket_altqs_active		 VNET(ticket_altqs_active)
+VNET_DECLARE(u_int32_t,			 ticket_altqs_inactive);
+#define	V_ticket_altqs_inactive		 VNET(ticket_altqs_inactive)
+VNET_DECLARE(int,			 altqs_inactive_open);
+#define	V_altqs_inactive_open		 VNET(altqs_inactive_open)
+VNET_DECLARE(u_int32_t,			 ticket_pabuf);
+#define	V_ticket_pabuf			 VNET(ticket_pabuf)
+VNET_DECLARE(struct pf_altqqueue *,	 pf_altqs_active);
+#define	V_pf_altqs_active		 VNET(pf_altqs_active)
+VNET_DECLARE(struct pf_altqqueue *,	 pf_altqs_inactive);
+#define	V_pf_altqs_inactive		 VNET(pf_altqs_inactive)
+
+VNET_DECLARE(struct pf_rulequeue, pf_unlinked_rules);
+#define	V_pf_unlinked_rules	VNET(pf_unlinked_rules)
+
+void				 pf_initialize(void);
+void				 pf_mtag_initialize(void);
+void				 pf_mtag_cleanup(void);
+void				 pf_cleanup(void);
+
+struct pf_mtag			*pf_get_mtag(struct mbuf *);
+
+extern void			 pf_calc_skip_steps(struct pf_rulequeue *);
+#ifdef ALTQ
+extern	void			 pf_altq_ifnet_event(struct ifnet *, int);
+#endif
+VNET_DECLARE(uma_zone_t,	 pf_state_z);
+#define	V_pf_state_z		 VNET(pf_state_z)
+VNET_DECLARE(uma_zone_t,	 pf_state_key_z);
+#define	V_pf_state_key_z	 VNET(pf_state_key_z)
+VNET_DECLARE(uma_zone_t,	 pf_state_scrub_z);
+#define	V_pf_state_scrub_z	 VNET(pf_state_scrub_z)
+
+extern void			 pf_purge_thread(void *);
+extern void			 pf_unload_vnet_purge(void);
+extern void			 pf_intr(void *);
+extern void			 pf_purge_expired_src_nodes(void);
+
+extern int			 pf_unlink_state(struct pf_state *, u_int);
+#define	PF_ENTER_LOCKED		0x00000001
+#define	PF_RETURN_LOCKED	0x00000002
+extern int			 pf_state_insert(struct pfi_kif *,
+				    struct pf_state_key *,
+				    struct pf_state_key *,
+				    struct pf_state *);
+extern void			 pf_free_state(struct pf_state *);
+
+static __inline void
+pf_ref_state(struct pf_state *s)
+{
+
+	refcount_acquire(&s->refs);
+}
+
+static __inline int
+pf_release_state(struct pf_state *s)
+{
+
+	if (refcount_release(&s->refs)) {
+		pf_free_state(s);
+		return (1);
+	} else
+		return (0);
+}
+
+extern struct pf_state		*pf_find_state_byid(uint64_t, uint32_t);
+extern struct pf_state		*pf_find_state_all(struct pf_state_key_cmp *,
+				    u_int, int *);
+extern struct pf_src_node	*pf_find_src_node(struct pf_addr *,
+				    struct pf_rule *, sa_family_t, int);
+extern void			 pf_unlink_src_node(struct pf_src_node *);
+extern u_int			 pf_free_src_nodes(struct pf_src_node_list *);
+extern void			 pf_print_state(struct pf_state *);
+extern void			 pf_print_flags(u_int8_t);
+extern u_int16_t		 pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t,
+				    u_int8_t);
+extern u_int16_t		 pf_proto_cksum_fixup(struct mbuf *, u_int16_t,
+				    u_int16_t, u_int16_t, u_int8_t);
+
+VNET_DECLARE(struct ifnet *,		 sync_ifp);
+#define	V_sync_ifp		 	 VNET(sync_ifp);
+VNET_DECLARE(struct pf_rule,		 pf_default_rule);
+#define	V_pf_default_rule		  VNET(pf_default_rule)
+extern void			 pf_addrcpy(struct pf_addr *, struct pf_addr *,
+				    u_int8_t);
+void				pf_free_rule(struct pf_rule *);
+
+#ifdef INET
+int	pf_test(int, struct ifnet *, struct mbuf **, struct inpcb *);
+int	pf_normalize_ip(struct mbuf **, int, struct pfi_kif *, u_short *,
+	    struct pf_pdesc *);
+#endif /* INET */
+
+#ifdef INET6
+int	pf_test6(int, struct ifnet *, struct mbuf **, struct inpcb *);
+int	pf_normalize_ip6(struct mbuf **, int, struct pfi_kif *, u_short *,
+	    struct pf_pdesc *);
+void	pf_poolmask(struct pf_addr *, struct pf_addr*,
+	    struct pf_addr *, struct pf_addr *, u_int8_t);
+void	pf_addr_inc(struct pf_addr *, sa_family_t);
+int	pf_refragment6(struct ifnet *, struct mbuf **, struct m_tag *);
+#endif /* INET6 */
+
+u_int32_t	pf_new_isn(struct pf_state *);
+void   *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *,
+	    sa_family_t);
+void	pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t);
+void	pf_change_proto_a(struct mbuf *, void *, u_int16_t *, u_int32_t,
+	    u_int8_t);
+void	pf_change_tcp_a(struct mbuf *, void *, u_int16_t *, u_int32_t);
+void	pf_send_deferred_syn(struct pf_state *);
+int	pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *,
+	    struct pf_addr *, sa_family_t);
+int	pf_match_addr_range(struct pf_addr *, struct pf_addr *,
+	    struct pf_addr *, sa_family_t);
+int	pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t);
+
+void	pf_normalize_init(void);
+void	pf_normalize_cleanup(void);
+int	pf_normalize_tcp(int, struct pfi_kif *, struct mbuf *, int, int, void *,
+	    struct pf_pdesc *);
+void	pf_normalize_tcp_cleanup(struct pf_state *);
+int	pf_normalize_tcp_init(struct mbuf *, int, struct pf_pdesc *,
+	    struct tcphdr *, struct pf_state_peer *, struct pf_state_peer *);
+int	pf_normalize_tcp_stateful(struct mbuf *, int, struct pf_pdesc *,
+	    u_short *, struct tcphdr *, struct pf_state *,
+	    struct pf_state_peer *, struct pf_state_peer *, int *);
+u_int32_t
+	pf_state_expires(const struct pf_state *);
+void	pf_purge_expired_fragments(void);
+int	pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *,
+	    int);
+int	pf_socket_lookup(int, struct pf_pdesc *, struct mbuf *);
+struct pf_state_key *pf_alloc_state_key(int);
+void	pfr_initialize(void);
+void	pfr_cleanup(void);
+int	pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t);
+void	pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t,
+	    u_int64_t, int, int, int);
+int	pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *, sa_family_t);
+void	pfr_dynaddr_update(struct pfr_ktable *, struct pfi_dynaddr *);
+struct pfr_ktable *
+	pfr_attach_table(struct pf_ruleset *, char *);
+void	pfr_detach_table(struct pfr_ktable *);
+int	pfr_clr_tables(struct pfr_table *, int *, int);
+int	pfr_add_tables(struct pfr_table *, int, int *, int);
+int	pfr_del_tables(struct pfr_table *, int, int *, int);
+int	pfr_get_tables(struct pfr_table *, struct pfr_table *, int *, int);
+int	pfr_get_tstats(struct pfr_table *, struct pfr_tstats *, int *, int);
+int	pfr_clr_tstats(struct pfr_table *, int, int *, int);
+int	pfr_set_tflags(struct pfr_table *, int, int, int, int *, int *, int);
+int	pfr_clr_addrs(struct pfr_table *, int *, int);
+int	pfr_insert_kentry(struct pfr_ktable *, struct pfr_addr *, long);
+int	pfr_add_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+	    int);
+int	pfr_del_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+	    int);
+int	pfr_set_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+	    int *, int *, int *, int, u_int32_t);
+int	pfr_get_addrs(struct pfr_table *, struct pfr_addr *, int *, int);
+int	pfr_get_astats(struct pfr_table *, struct pfr_astats *, int *, int);
+int	pfr_clr_astats(struct pfr_table *, struct pfr_addr *, int, int *,
+	    int);
+int	pfr_tst_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+	    int);
+int	pfr_ina_begin(struct pfr_table *, u_int32_t *, int *, int);
+int	pfr_ina_rollback(struct pfr_table *, u_int32_t, int *, int);
+int	pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int);
+int	pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *,
+	    int *, u_int32_t, int);
+
+MALLOC_DECLARE(PFI_MTYPE);
+VNET_DECLARE(struct pfi_kif *,		 pfi_all);
+#define	V_pfi_all	 		 VNET(pfi_all)
+
+void		 pfi_initialize(void);
+void		 pfi_initialize_vnet(void);
+void		 pfi_cleanup(void);
+void		 pfi_cleanup_vnet(void);
+void		 pfi_kif_ref(struct pfi_kif *);
+void		 pfi_kif_unref(struct pfi_kif *);
+struct pfi_kif	*pfi_kif_find(const char *);
+struct pfi_kif	*pfi_kif_attach(struct pfi_kif *, const char *);
+int		 pfi_kif_match(struct pfi_kif *, struct pfi_kif *);
+void		 pfi_kif_purge(void);
+int		 pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *,
+		    sa_family_t);
+int		 pfi_dynaddr_setup(struct pf_addr_wrap *, sa_family_t);
+void		 pfi_dynaddr_remove(struct pfi_dynaddr *);
+void		 pfi_dynaddr_copyout(struct pf_addr_wrap *);
+void		 pfi_update_status(const char *, struct pf_status *);
+void		 pfi_get_ifaces(const char *, struct pfi_kif *, int *);
+int		 pfi_set_flags(const char *, int);
+int		 pfi_clear_flags(const char *, int);
+
+int		 pf_match_tag(struct mbuf *, struct pf_rule *, int *, int);
+int		 pf_tag_packet(struct mbuf *, struct pf_pdesc *, int);
+int		 pf_addr_cmp(struct pf_addr *, struct pf_addr *,
+		    sa_family_t);
+void		 pf_qid2qname(u_int32_t, char *);
+
+VNET_DECLARE(struct pf_kstatus, pf_status);
+#define	V_pf_status	VNET(pf_status)
+
+struct pf_limit {
+	uma_zone_t	zone;
+	u_int		limit;
+};
+VNET_DECLARE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
+#define	V_pf_limits VNET(pf_limits)
+
+#endif /* _KERNEL */
+
+#ifdef _KERNEL
+VNET_DECLARE(struct pf_anchor_global,		 pf_anchors);
+#define	V_pf_anchors				 VNET(pf_anchors)
+VNET_DECLARE(struct pf_anchor,			 pf_main_anchor);
+#define	V_pf_main_anchor			 VNET(pf_main_anchor)
+#define pf_main_ruleset	V_pf_main_anchor.ruleset
+#endif
+
+/* these ruleset functions can be linked into userland programs (pfctl) */
+int			 pf_get_ruleset_number(u_int8_t);
+void			 pf_init_ruleset(struct pf_ruleset *);
+int			 pf_anchor_setup(struct pf_rule *,
+			    const struct pf_ruleset *, const char *);
+int			 pf_anchor_copyout(const struct pf_ruleset *,
+			    const struct pf_rule *, struct pfioc_rule *);
+void			 pf_anchor_remove(struct pf_rule *);
+void			 pf_remove_if_empty_ruleset(struct pf_ruleset *);
+struct pf_ruleset	*pf_find_ruleset(const char *);
+struct pf_ruleset	*pf_find_or_create_ruleset(const char *);
+void			 pf_rs_initialize(void);
+
+/* The fingerprint functions can be linked into userland programs (tcpdump) */
+int	pf_osfp_add(struct pf_osfp_ioctl *);
+#ifdef _KERNEL
+struct pf_osfp_enlist *
+	pf_osfp_fingerprint(struct pf_pdesc *, struct mbuf *, int,
+	    const struct tcphdr *);
+#endif /* _KERNEL */
+void	pf_osfp_flush(void);
+int	pf_osfp_get(struct pf_osfp_ioctl *);
+int	pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t);
+
+#ifdef _KERNEL
+void			 pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
+
+void			 pf_step_into_anchor(struct pf_anchor_stackframe *, int *,
+			    struct pf_ruleset **, int, struct pf_rule **,
+			    struct pf_rule **, int *);
+int			 pf_step_out_of_anchor(struct pf_anchor_stackframe *, int *,
+			    struct pf_ruleset **, int, struct pf_rule **,
+			    struct pf_rule **, int *);
+
+int			 pf_map_addr(u_int8_t, struct pf_rule *,
+			    struct pf_addr *, struct pf_addr *,
+			    struct pf_addr *, struct pf_src_node **);
+struct pf_rule		*pf_get_translation(struct pf_pdesc *, struct mbuf *,
+			    int, int, struct pfi_kif *, struct pf_src_node **,
+			    struct pf_state_key **, struct pf_state_key **,
+			    struct pf_addr *, struct pf_addr *,
+			    uint16_t, uint16_t, struct pf_anchor_stackframe *);
+
+struct pf_state_key	*pf_state_key_setup(struct pf_pdesc *, struct pf_addr *,
+			    struct pf_addr *, u_int16_t, u_int16_t);
+struct pf_state_key	*pf_state_key_clone(struct pf_state_key *);
+#endif /* _KERNEL */
+
+#endif /* _NET_PFVAR_H_ */
diff --git a/freebsd/sys/net/ppp_defs.h b/freebsd/sys/net/ppp_defs.h
index 386a1763..5f6d4106 100644
--- a/freebsd/sys/net/ppp_defs.h
+++ b/freebsd/sys/net/ppp_defs.h
@@ -31,6 +31,8 @@
 #ifndef _PPP_DEFS_H_
 #define _PPP_DEFS_H_
 
+#include <sys/_types.h>
+
 /*
  * The basic PPP frame.
  */
@@ -83,7 +85,7 @@
 /*
  * Extended asyncmap - allows any character to be escaped.
  */
-typedef u_int32_t	ext_accm[8];
+typedef __uint32_t	ext_accm[8];
 
 /*
  * What to do with network protocol (NP) packets.
@@ -143,8 +145,8 @@ struct ppp_comp_stats {
  * the last NP packet was sent or received.
  */
 struct ppp_idle {
-    time_t xmit_idle;		/* time since last NP packet sent */
-    time_t recv_idle;		/* time since last NP packet received */
+    __time_t xmit_idle;		/* time since last NP packet sent */
+    __time_t recv_idle;		/* time since last NP packet received */
 };
 
 #ifndef __P
diff --git a/freebsd/sys/net/radix.c b/freebsd/sys/net/radix.c
index ba15eb51..2615de65 100644
--- a/freebsd/sys/net/radix.c
+++ b/freebsd/sys/net/radix.c
@@ -58,18 +58,15 @@
 #include <net/radix.h>
 #endif /* !_KERNEL */
 
-static int	rn_walktree_from(struct radix_node_head *h, void *a, void *m,
-		    walktree_f_t *f, void *w);
-static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *);
 static struct radix_node
-	 *rn_insert(void *, struct radix_node_head *, int *,
+	 *rn_insert(void *, struct radix_head *, int *,
 	     struct radix_node [2]),
 	 *rn_newpair(void *, int, struct radix_node[2]),
 	 *rn_search(void *, struct radix_node *),
 	 *rn_search_m(void *, struct radix_node *, void *);
+static struct radix_node *rn_addmask(void *, struct radix_mask_head *, int,int);
 
-static void rn_detachhead_internal(void **head);
-static int rn_inithead_internal(void **head, int off);
+static void rn_detachhead_internal(struct radix_head *);
 
 #define	RADIX_MAX_KEY_LEN	32
 
@@ -81,14 +78,6 @@ static char rn_ones[RADIX_MAX_KEY_LEN] = {
 	-1, -1, -1, -1, -1, -1, -1, -1,
 };
 
-/*
- * XXX: Compat stuff for old rn_addmask() users
- */
-static struct radix_node_head *mask_rnhead_compat;
-#ifdef	_KERNEL
-static struct mtx mask_mtx;
-#endif
-
 
 static int	rn_lexobetter(void *m_arg, void *n_arg);
 static struct radix_mask *
@@ -225,7 +214,7 @@ rn_refines(void *m_arg, void *n_arg)
  * from host routes.
  */
 struct radix_node *
-rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head)
+rn_lookup(void *v_arg, void *m_arg, struct radix_head *head)
 {
 	struct radix_node *x;
 	caddr_t netmask;
@@ -234,7 +223,7 @@ rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head)
 		/*
 		 * Most common case: search exact prefix/mask
 		 */
-		x = rn_addmask_r(m_arg, head->rnh_masks, 1,
+		x = rn_addmask(m_arg, head->rnh_masks, 1,
 		    head->rnh_treetop->rn_offset);
 		if (x == NULL)
 			return (NULL);
@@ -287,7 +276,7 @@ rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip)
  * Search for longest-prefix match in given @head
  */
 struct radix_node *
-rn_match(void *v_arg, struct radix_node_head *head)
+rn_match(void *v_arg, struct radix_head *head)
 {
 	caddr_t v = v_arg;
 	struct radix_node *t = head->rnh_treetop, *x;
@@ -436,7 +425,7 @@ rn_newpair(void *v, int b, struct radix_node nodes[2])
 }
 
 static struct radix_node *
-rn_insert(void *v_arg, struct radix_node_head *head, int *dupentry,
+rn_insert(void *v_arg, struct radix_head *head, int *dupentry,
     struct radix_node nodes[2])
 {
 	caddr_t v = v_arg;
@@ -500,9 +489,9 @@ on1:
 }
 
 struct radix_node *
-rn_addmask_r(void *arg, struct radix_node_head *maskhead, int search, int skip)
+rn_addmask(void *n_arg, struct radix_mask_head *maskhead, int search, int skip)
 {
-	unsigned char *netmask = arg;
+	unsigned char *netmask = n_arg;
 	unsigned char *cp, *cplim;
 	struct radix_node *x;
 	int b = 0, mlen, j;
@@ -515,7 +504,7 @@ rn_addmask_r(void *arg, struct radix_node_head *maskhead, int search, int skip)
 	if (skip == 0)
 		skip = 1;
 	if (mlen <= skip)
-		return (maskhead->rnh_nodes);
+		return (maskhead->mask_nodes);
 
 	bzero(addmask_key, RADIX_MAX_KEY_LEN);
 	if (skip > 1)
@@ -528,22 +517,22 @@ rn_addmask_r(void *arg, struct radix_node_head *maskhead, int search, int skip)
 		cp--;
 	mlen = cp - addmask_key;
 	if (mlen <= skip)
-		return (maskhead->rnh_nodes);
+		return (maskhead->mask_nodes);
 	*addmask_key = mlen;
-	x = rn_search(addmask_key, maskhead->rnh_treetop);
+	x = rn_search(addmask_key, maskhead->head.rnh_treetop);
 	if (bcmp(addmask_key, x->rn_key, mlen) != 0)
-		x = 0;
+		x = NULL;
 	if (x || search)
 		return (x);
 	R_Zalloc(x, struct radix_node *, RADIX_MAX_KEY_LEN + 2 * sizeof (*x));
-	if ((saved_x = x) == 0)
+	if ((saved_x = x) == NULL)
 		return (0);
-	netmask = cp = (caddr_t)(x + 2);
+	netmask = cp = (unsigned char *)(x + 2);
 	bcopy(addmask_key, cp, mlen);
-	x = rn_insert(cp, maskhead, &maskduplicated, x);
+	x = rn_insert(cp, &maskhead->head, &maskduplicated, x);
 	if (maskduplicated) {
 		log(LOG_ERR, "rn_addmask: mask impossibly already in tree");
-		Free(saved_x);
+		R_Free(saved_x);
 		return (x);
 	}
 	/*
@@ -571,23 +560,6 @@ rn_addmask_r(void *arg, struct radix_node_head *maskhead, int search, int skip)
 	return (x);
 }
 
-struct radix_node *
-rn_addmask(void *n_arg, int search, int skip)
-{
-	struct radix_node *tt;
-
-#ifdef _KERNEL
-	mtx_lock(&mask_mtx);
-#endif
-	tt = rn_addmask_r(&mask_rnhead_compat, n_arg, search, skip);
-
-#ifdef _KERNEL
-	mtx_unlock(&mask_mtx);
-#endif
-
-	return (tt);
-}
-
 static int	/* XXX: arbitrary ordering for non-contiguous masks */
 rn_lexobetter(void *m_arg, void *n_arg)
 {
@@ -625,11 +597,11 @@ rn_new_radix_mask(struct radix_node *tt, struct radix_mask *next)
 }
 
 struct radix_node *
-rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
+rn_addroute(void *v_arg, void *n_arg, struct radix_head *head,
     struct radix_node treenodes[2])
 {
 	caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg;
-	struct radix_node *t, *x = 0, *tt;
+	struct radix_node *t, *x = NULL, *tt;
 	struct radix_node *saved_tt, *top = head->rnh_treetop;
 	short b = 0, b_leaf = 0;
 	int keyduplicated;
@@ -644,7 +616,7 @@ rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
 	 * nodes and possibly save time in calculating indices.
 	 */
 	if (netmask)  {
-		x = rn_addmask_r(netmask, head->rnh_masks, 0, top->rn_offset);
+		x = rn_addmask(netmask, head->rnh_masks, 0, top->rn_offset);
 		if (x == NULL)
 			return (0);
 		b_leaf = x->rn_bit;
@@ -752,7 +724,7 @@ rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
 		for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist)
 			if (m->rm_bit >= b_leaf)
 				break;
-		t->rn_mklist = m; *mp = 0;
+		t->rn_mklist = m; *mp = NULL;
 	}
 on2:
 	/* Add new route to highest possible ancestor's list */
@@ -799,7 +771,7 @@ on2:
 }
 
 struct radix_node *
-rn_delete(void *v_arg, void *netmask_arg, struct radix_node_head *head)
+rn_delete(void *v_arg, void *netmask_arg, struct radix_head *head)
 {
 	struct radix_node *t, *p, *x, *tt;
 	struct radix_mask *m, *saved_m, **mp;
@@ -815,22 +787,22 @@ rn_delete(void *v_arg, void *netmask_arg, struct radix_node_head *head)
 	vlen =  LEN(v);
 	saved_tt = tt;
 	top = x;
-	if (tt == 0 ||
+	if (tt == NULL ||
 	    bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off))
 		return (0);
 	/*
 	 * Delete our route from mask lists.
 	 */
 	if (netmask) {
-		x = rn_addmask_r(netmask, head->rnh_masks, 1, head_off);
+		x = rn_addmask(netmask, head->rnh_masks, 1, head_off);
 		if (x == NULL)
 			return (0);
 		netmask = x->rn_key;
 		while (tt->rn_mask != netmask)
-			if ((tt = tt->rn_dupedkey) == 0)
+			if ((tt = tt->rn_dupedkey) == NULL)
 				return (0);
 	}
-	if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0)
+	if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == NULL)
 		goto on1;
 	if (tt->rn_flags & RNF_NORMAL) {
 		if (m->rm_leaf != tt || m->rm_refs > 0) {
@@ -856,10 +828,10 @@ rn_delete(void *v_arg, void *netmask_arg, struct radix_node_head *head)
 	for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist)
 		if (m == saved_m) {
 			*mp = m->rm_mklist;
-			Free(m);
+			R_Free(m);
 			break;
 		}
-	if (m == 0) {
+	if (m == NULL) {
 		log(LOG_ERR, "rn_delete: couldn't find our annotation\n");
 		if (tt->rn_flags & RNF_NORMAL)
 			return (0); /* Dangling ref to us */
@@ -947,7 +919,7 @@ on1:
 					struct radix_mask *mm = m->rm_mklist;
 					x->rn_mklist = 0;
 					if (--(m->rm_refs) < 0)
-						Free(m);
+						R_Free(m);
 					m = mm;
 				}
 			if (m)
@@ -986,8 +958,8 @@ out:
  * This is the same as rn_walktree() except for the parameters and the
  * exit.
  */
-static int
-rn_walktree_from(struct radix_node_head *h, void *a, void *m,
+int
+rn_walktree_from(struct radix_head *h, void *a, void *m,
     walktree_f_t *f, void *w)
 {
 	int error;
@@ -998,6 +970,8 @@ rn_walktree_from(struct radix_node_head *h, void *a, void *m,
 	int stopping = 0;
 	int lastb;
 
+	KASSERT(m != NULL, ("%s: mask needs to be specified", __func__));
+
 	/*
 	 * rn_search_m is sort-of-open-coded here. We cannot use the
 	 * function because we need to keep track of the last node seen.
@@ -1021,11 +995,11 @@ rn_walktree_from(struct radix_node_head *h, void *a, void *m,
 	/*
 	 * Two cases: either we stepped off the end of our mask,
 	 * in which case last == rn, or we reached a leaf, in which
-	 * case we want to start from the last node we looked at.
-	 * Either way, last is the node we want to start from.
+	 * case we want to start from the leaf.
 	 */
-	rn = last;
-	lastb = rn->rn_bit;
+	if (rn->rn_bit >= 0)
+		rn = last;
+	lastb = last->rn_bit;
 
 	/* printf("rn %p, lastb %d\n", rn, lastb);*/
 
@@ -1072,7 +1046,7 @@ rn_walktree_from(struct radix_node_head *h, void *a, void *m,
 			rn = rn->rn_left;
 		next = rn;
 		/* Process leaves */
-		while ((rn = base) != 0) {
+		while ((rn = base) != NULL) {
 			base = rn->rn_dupedkey;
 			/* printf("leaf %p\n", rn); */
 			if (!(rn->rn_flags & RNF_ROOT)
@@ -1090,8 +1064,8 @@ rn_walktree_from(struct radix_node_head *h, void *a, void *m,
 	return (0);
 }
 
-static int
-rn_walktree(struct radix_node_head *h, walktree_f_t *f, void *w)
+int
+rn_walktree(struct radix_head *h, walktree_f_t *f, void *w)
 {
 	int error;
 	struct radix_node *base, *next;
@@ -1130,82 +1104,94 @@ rn_walktree(struct radix_node_head *h, walktree_f_t *f, void *w)
 }
 
 /*
- * Allocate and initialize an empty tree. This has 3 nodes, which are
- * part of the radix_node_head (in the order <left,root,right>) and are
+ * Initialize an empty tree. This has 3 nodes, which are passed
+ * via base_nodes (in the order <left,root,right>) and are
  * marked RNF_ROOT so they cannot be freed.
  * The leaves have all-zero and all-one keys, with significant
  * bits starting at 'off'.
- * Return 1 on success, 0 on error.
  */
-static int
-rn_inithead_internal(void **head, int off)
+void
+rn_inithead_internal(struct radix_head *rh, struct radix_node *base_nodes, int off)
 {
-	struct radix_node_head *rnh;
 	struct radix_node *t, *tt, *ttt;
-	if (*head)
-		return (1);
-	R_Zalloc(rnh, struct radix_node_head *, sizeof (*rnh));
-	if (rnh == 0)
-		return (0);
-#ifdef _KERNEL
-	RADIX_NODE_HEAD_LOCK_INIT(rnh);
-#endif
-	*head = rnh;
-	t = rn_newpair(rn_zeros, off, rnh->rnh_nodes);
-	ttt = rnh->rnh_nodes + 2;
+
+	t = rn_newpair(rn_zeros, off, base_nodes);
+	ttt = base_nodes + 2;
 	t->rn_right = ttt;
 	t->rn_parent = t;
-	tt = t->rn_left;	/* ... which in turn is rnh->rnh_nodes */
+	tt = t->rn_left;	/* ... which in turn is base_nodes */
 	tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE;
 	tt->rn_bit = -1 - off;
 	*ttt = *tt;
 	ttt->rn_key = rn_ones;
-	rnh->rnh_addaddr = rn_addroute;
-	rnh->rnh_deladdr = rn_delete;
-	rnh->rnh_matchaddr = rn_match;
-	rnh->rnh_lookup = rn_lookup;
-	rnh->rnh_walktree = rn_walktree;
-	rnh->rnh_walktree_from = rn_walktree_from;
-	rnh->rnh_treetop = t;
-	return (1);
+
+	rh->rnh_treetop = t;
 }
 
 static void
-rn_detachhead_internal(void **head)
+rn_detachhead_internal(struct radix_head *head)
 {
-	struct radix_node_head *rnh;
 
-	KASSERT((head != NULL && *head != NULL),
+	KASSERT((head != NULL),
 	    ("%s: head already freed", __func__));
-	rnh = *head;
 	
 	/* Free <left,root,right> nodes. */
-	Free(rnh);
-
-	*head = NULL;
+	R_Free(head);
 }
 
+/* Functions used by 'struct radix_node_head' users */
+
 int
 rn_inithead(void **head, int off)
 {
 	struct radix_node_head *rnh;
+	struct radix_mask_head *rmh;
+
+	rnh = *head;
+	rmh = NULL;
 
 	if (*head != NULL)
 		return (1);
 
-	if (rn_inithead_internal(head, off) == 0)
+	R_Zalloc(rnh, struct radix_node_head *, sizeof (*rnh));
+	R_Zalloc(rmh, struct radix_mask_head *, sizeof (*rmh));
+	if (rnh == NULL || rmh == NULL) {
+		if (rnh != NULL)
+			R_Free(rnh);
+		if (rmh != NULL)
+			R_Free(rmh);
 		return (0);
+	}
 
-	rnh = (struct radix_node_head *)(*head);
+	/* Init trees */
+	rn_inithead_internal(&rnh->rh, rnh->rnh_nodes, off);
+	rn_inithead_internal(&rmh->head, rmh->mask_nodes, 0);
+	*head = rnh;
+	rnh->rh.rnh_masks = rmh;
 
-	if (rn_inithead_internal((void **)&rnh->rnh_masks, 0) == 0) {
-		rn_detachhead_internal(head);
-		return (0);
-	}
+	/* Finally, set base callbacks */
+	rnh->rnh_addaddr = rn_addroute;
+	rnh->rnh_deladdr = rn_delete;
+	rnh->rnh_matchaddr = rn_match;
+	rnh->rnh_lookup = rn_lookup;
+	rnh->rnh_walktree = rn_walktree;
+	rnh->rnh_walktree_from = rn_walktree_from;
 
 	return (1);
 }
 
+static int
+rn_freeentry(struct radix_node *rn, void *arg)
+{
+	struct radix_head * const rnh = arg;
+	struct radix_node *x;
+
+	x = (struct radix_node *)rn_delete(rn + 2, NULL, rnh);
+	if (x != NULL)
+		R_Free(x);
+	return (0);
+}
+
 int
 rn_detachhead(void **head)
 {
@@ -1214,29 +1200,14 @@ rn_detachhead(void **head)
 	KASSERT((head != NULL && *head != NULL),
 	    ("%s: head already freed", __func__));
 
-	rnh = *head;
+	rnh = (struct radix_node_head *)(*head);
 
-	rn_detachhead_internal((void **)&rnh->rnh_masks);
-	rn_detachhead_internal(head);
-	return (1);
-}
+	rn_walktree(&rnh->rh.rnh_masks->head, rn_freeentry, rnh->rh.rnh_masks);
+	rn_detachhead_internal(&rnh->rh.rnh_masks->head);
+	rn_detachhead_internal(&rnh->rh);
 
-void
-rn_init(int maxk)
-{
-	if ((maxk <= 0) || (maxk > RADIX_MAX_KEY_LEN)) {
-		log(LOG_ERR,
-		    "rn_init: max_keylen must be within 1..%d\n",
-		    RADIX_MAX_KEY_LEN);
-		return;
-	}
+	*head = NULL;
 
-	/*
-	 * XXX: Compat for old rn_addmask() users
-	 */
-	if (rn_inithead((void **)(void *)&mask_rnhead_compat, 0) == 0)
-		panic("rn_init 2");
-#ifdef _KERNEL
-	mtx_init(&mask_mtx, "radix_mask", NULL, MTX_DEF);
-#endif
+	return (1);
 }
+
diff --git a/freebsd/sys/net/radix.h b/freebsd/sys/net/radix.h
index 3554c77c..69aad831 100644
--- a/freebsd/sys/net/radix.h
+++ b/freebsd/sys/net/radix.h
@@ -101,52 +101,61 @@ struct radix_mask {
 #define	rm_mask rm_rmu.rmu_mask
 #define	rm_leaf rm_rmu.rmu_leaf		/* extra field would make 32 bytes */
 
+struct radix_head;
+
 typedef int walktree_f_t(struct radix_node *, void *);
+typedef struct radix_node *rn_matchaddr_f_t(void *v,
+    struct radix_head *head);
+typedef struct radix_node *rn_addaddr_f_t(void *v, void *mask,
+    struct radix_head *head, struct radix_node nodes[]);
+typedef struct radix_node *rn_deladdr_f_t(void *v, void *mask,
+    struct radix_head *head);
+typedef struct radix_node *rn_lookup_f_t(void *v, void *mask,
+    struct radix_head *head);
+typedef int rn_walktree_t(struct radix_head *head, walktree_f_t *f,
+    void *w);
+typedef int rn_walktree_from_t(struct radix_head *head,
+    void *a, void *m, walktree_f_t *f, void *w);
+typedef void rn_close_t(struct radix_node *rn, struct radix_head *head);
+
+struct radix_mask_head;
+
+struct radix_head {
+	struct	radix_node *rnh_treetop;
+	struct	radix_mask_head *rnh_masks;	/* Storage for our masks */
+};
 
 struct radix_node_head {
-	struct	radix_node *rnh_treetop;
-	u_int	rnh_gen;		/* generation counter */
-	int	rnh_multipath;		/* multipath capable ? */
-	int	rnh_addrsize;		/* permit, but not require fixed keys */
-	int	rnh_pktsize;		/* permit, but not require fixed keys */
-	struct	radix_node *(*rnh_addaddr)	/* add based on sockaddr */
-		(void *v, void *mask,
-		     struct radix_node_head *head, struct radix_node nodes[]);
-	struct	radix_node *(*rnh_addpkt)	/* add based on packet hdr */
-		(void *v, void *mask,
-		     struct radix_node_head *head, struct radix_node nodes[]);
-	struct	radix_node *(*rnh_deladdr)	/* remove based on sockaddr */
-		(void *v, void *mask, struct radix_node_head *head);
-	struct	radix_node *(*rnh_delpkt)	/* remove based on packet hdr */
-		(void *v, void *mask, struct radix_node_head *head);
-	struct	radix_node *(*rnh_matchaddr)	/* longest match for sockaddr */
-		(void *v, struct radix_node_head *head);
-	struct	radix_node *(*rnh_lookup)	/*exact match for sockaddr*/
-		(void *v, void *mask, struct radix_node_head *head);
-	struct	radix_node *(*rnh_matchpkt)	/* locate based on packet hdr */
-		(void *v, struct radix_node_head *head);
-	int	(*rnh_walktree)			/* traverse tree */
-		(struct radix_node_head *head, walktree_f_t *f, void *w);
-	int	(*rnh_walktree_from)		/* traverse tree below a */
-		(struct radix_node_head *head, void *a, void *m,
-		     walktree_f_t *f, void *w);
-	void	(*rnh_close)	/* do something when the last ref drops */
-		(struct radix_node *rn, struct radix_node_head *head);
+	struct radix_head rh;
+	rn_matchaddr_f_t	*rnh_matchaddr;	/* longest match for sockaddr */
+	rn_addaddr_f_t	*rnh_addaddr;	/* add based on sockaddr*/
+	rn_deladdr_f_t	*rnh_deladdr;	/* remove based on sockaddr */
+	rn_lookup_f_t	*rnh_lookup;	/* exact match for sockaddr */
+	rn_walktree_t	*rnh_walktree;	/* traverse tree */
+	rn_walktree_from_t	*rnh_walktree_from; /* traverse tree below a */
+	rn_close_t	*rnh_close;	/*do something when the last ref drops*/
 	struct	radix_node rnh_nodes[3];	/* empty tree for common case */
 #ifdef _KERNEL
 	struct	rwlock rnh_lock;		/* locks entire radix tree */
 #endif
-	struct	radix_node_head *rnh_masks;	/* Storage for our masks */
 };
 
+struct radix_mask_head {
+	struct radix_head head;
+	struct radix_node mask_nodes[3];
+};
+
+void rn_inithead_internal(struct radix_head *rh, struct radix_node *base_nodes,
+    int off);
+
 #ifndef _KERNEL
 #define R_Malloc(p, t, n) (p = (t) malloc((unsigned int)(n)))
 #define R_Zalloc(p, t, n) (p = (t) calloc(1,(unsigned int)(n)))
-#define Free(p) free((char *)p);
+#define R_Free(p) free((char *)p);
 #else
 #define R_Malloc(p, t, n) (p = (t) malloc((unsigned long)(n), M_RTABLE, M_NOWAIT))
 #define R_Zalloc(p, t, n) (p = (t) malloc((unsigned long)(n), M_RTABLE, M_NOWAIT | M_ZERO))
-#define Free(p) free((caddr_t)p, M_RTABLE);
+#define R_Free(p) free((caddr_t)p, M_RTABLE);
 
 #define	RADIX_NODE_HEAD_LOCK_INIT(rnh)	\
     rw_init_flags(&(rnh)->rnh_lock, "radix node head", 0)
@@ -162,18 +171,17 @@ struct radix_node_head {
 #define	RADIX_NODE_HEAD_WLOCK_ASSERT(rnh) rw_assert(&(rnh)->rnh_lock, RA_WLOCKED)
 #endif /* _KERNEL */
 
-void	 rn_init(int);
 int	 rn_inithead(void **, int);
 int	 rn_detachhead(void **);
 int	 rn_refines(void *, void *);
-struct radix_node
-	 *rn_addmask(void *, int, int),
-	 *rn_addmask_r(void *, struct radix_node_head *, int, int),
-	 *rn_addroute (void *, void *, struct radix_node_head *,
-			struct radix_node [2]),
-	 *rn_delete(void *, void *, struct radix_node_head *),
-	 *rn_lookup (void *v_arg, void *m_arg,
-		        struct radix_node_head *head),
-	 *rn_match(void *, struct radix_node_head *);
+struct radix_node *rn_addroute(void *, void *, struct radix_head *,
+    struct radix_node[2]);
+struct radix_node *rn_delete(void *, void *, struct radix_head *);
+struct radix_node *rn_lookup (void *v_arg, void *m_arg,
+    struct radix_head *head);
+struct radix_node *rn_match(void *, struct radix_head *);
+int rn_walktree_from(struct radix_head *h, void *a, void *m,
+    walktree_f_t *f, void *w);
+int rn_walktree(struct radix_head *, walktree_f_t *, void *);
 
 #endif /* _RADIX_H_ */
diff --git a/freebsd/sys/net/radix_mpath.c b/freebsd/sys/net/radix_mpath.c
index 1bce388e..f5215205 100644
--- a/freebsd/sys/net/radix_mpath.c
+++ b/freebsd/sys/net/radix_mpath.c
@@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
 #include <net/radix.h>
 #include <net/radix_mpath.h>
 #include <net/route.h>
+#include <net/route_var.h>
 #include <net/if.h>
 #include <net/if_var.h>
 
@@ -59,12 +60,19 @@ __FBSDID("$FreeBSD$");
 static uint32_t hashjitter;
 
 int
-rn_mpath_capable(struct radix_node_head *rnh)
+rt_mpath_capable(struct rib_head *rnh)
 {
 
 	return rnh->rnh_multipath;
 }
 
+int
+rn_mpath_capable(struct radix_head *rh)
+{
+
+	return (rt_mpath_capable((struct rib_head *)rh));
+}
+
 struct radix_node *
 rn_mpath_next(struct radix_node *rn)
 {
@@ -91,7 +99,7 @@ rn_mpath_count(struct radix_node *rn)
 	
 	while (rn != NULL) {
 		rt = (struct rtentry *)rn;
-		i += rt->rt_rmx.rmx_weight;
+		i += rt->rt_weight;
 		rn = rn_mpath_next(rn);
 	}
 	return (i);
@@ -165,14 +173,14 @@ rt_mpath_deldup(struct rtentry *headrt, struct rtentry *rt)
  * Assume @rt rt_key host bits are cleared according to @netmask
  */
 int
-rt_mpath_conflict(struct radix_node_head *rnh, struct rtentry *rt,
+rt_mpath_conflict(struct rib_head *rnh, struct rtentry *rt,
     struct sockaddr *netmask)
 {
 	struct radix_node *rn, *rn1;
 	struct rtentry *rt1;
 
 	rn = (struct radix_node *)rt;
-	rn1 = rnh->rnh_lookup(rt_key(rt), netmask, rnh);
+	rn1 = rnh->rnh_lookup(rt_key(rt), netmask, &rnh->head);
 	if (!rn1 || rn1->rn_flags & RNF_ROOT)
 		return (0);
 
@@ -203,18 +211,50 @@ rt_mpath_conflict(struct radix_node_head *rnh, struct rtentry *rt,
 	return (0);
 }
 
-void
-#ifndef __rtems__
-rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum)
-#else /* __rtems__ */
-rtalloc_mpath_fib(struct route *ro, u_int32_t hash, u_int fibnum)
-#endif /* __rtems__ */
+static struct rtentry *
+rt_mpath_selectrte(struct rtentry *rte, uint32_t hash)
 {
 	struct radix_node *rn0, *rn;
-	u_int32_t n;
+	uint32_t total_weight;
 	struct rtentry *rt;
 	int64_t weight;
 
+	/* beyond here, we use rn as the master copy */
+	rn0 = rn = (struct radix_node *)rte;
+	rt = rte;
+
+	/* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */
+	total_weight = rn_mpath_count(rn0);
+	hash += hashjitter;
+	hash %= total_weight;
+	for (weight = abs((int32_t)hash);
+	     rt != NULL && weight >= rt->rt_weight; 
+	     weight -= (rt == NULL) ? 0 : rt->rt_weight) {
+		
+		/* stay within the multipath routes */
+		if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
+			break;
+		rn = rn->rn_dupedkey;
+		rt = (struct rtentry *)rn;
+	}
+
+	return (rt);
+}
+
+struct rtentry *
+rt_mpath_select(struct rtentry *rte, uint32_t hash)
+{
+	if (rn_mpath_next((struct radix_node *)rte) == NULL)
+		return (rte);
+
+	return (rt_mpath_selectrte(rte, hash));
+}
+
+void
+rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum)
+{
+	struct rtentry *rt;
+
 	/*
 	 * XXX we don't attempt to lookup cached route again; what should
 	 * be done for sendto(3) case?
@@ -232,34 +272,18 @@ rtalloc_mpath_fib(struct route *ro, u_int32_t hash, u_int fibnum)
 		return;
 	}
 
-	/* beyond here, we use rn as the master copy */
-	rn0 = rn = (struct radix_node *)ro->ro_rt;
-	n = rn_mpath_count(rn0);
-
-	/* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */
-	hash += hashjitter;
-	hash %= n;
-	for (weight = abs((int32_t)hash), rt = ro->ro_rt;
-	     weight >= rt->rt_rmx.rmx_weight && rn; 
-	     weight -= rt->rt_rmx.rmx_weight) {
-		
-		/* stay within the multipath routes */
-		if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
-			break;
-		rn = rn->rn_dupedkey;
-		rt = (struct rtentry *)rn;
-	}
+	rt = rt_mpath_selectrte(ro->ro_rt, hash);
 	/* XXX try filling rt_gwroute and avoid unreachable gw  */
 
 	/* gw selection has failed - there must be only zero weight routes */
-	if (!rn) {
+	if (!rt) {
 		RT_UNLOCK(ro->ro_rt);
 		ro->ro_rt = NULL;
 		return;
 	}
 	if (ro->ro_rt != rt) {
 		RTFREE_LOCKED(ro->ro_rt);
-		ro->ro_rt = (struct rtentry *)rn;
+		ro->ro_rt = rt;
 		RT_LOCK(ro->ro_rt);
 		RT_ADDREF(ro->ro_rt);
 
@@ -274,11 +298,11 @@ extern int	in_inithead(void **head, int off);
 int
 rn4_mpath_inithead(void **head, int off)
 {
-	struct radix_node_head *rnh;
+	struct rib_head *rnh;
 
 	hashjitter = arc4random();
 	if (in_inithead(head, off) == 1) {
-		rnh = (struct radix_node_head *)*head;
+		rnh = (struct rib_head *)*head;
 		rnh->rnh_multipath = 1;
 		return 1;
 	} else
@@ -290,11 +314,11 @@ rn4_mpath_inithead(void **head, int off)
 int
 rn6_mpath_inithead(void **head, int off)
 {
-	struct radix_node_head *rnh;
+	struct rib_head *rnh;
 
 	hashjitter = arc4random();
 	if (in6_inithead(head, off) == 1) {
-		rnh = (struct radix_node_head *)*head;
+		rnh = (struct rib_head *)*head;
 		rnh->rnh_multipath = 1;
 		return 1;
 	} else
diff --git a/freebsd/sys/net/radix_mpath.h b/freebsd/sys/net/radix_mpath.h
index bcb210e3..2b0d442e 100644
--- a/freebsd/sys/net/radix_mpath.h
+++ b/freebsd/sys/net/radix_mpath.h
@@ -44,16 +44,16 @@
 struct route;
 struct rtentry;
 struct sockaddr;
-int	rn_mpath_capable(struct radix_node_head *);
+struct rib_head;
+int	rt_mpath_capable(struct rib_head *);
+int	rn_mpath_capable(struct radix_head *);
 struct radix_node *rn_mpath_next(struct radix_node *);
 u_int32_t rn_mpath_count(struct radix_node *);
 struct rtentry *rt_mpath_matchgate(struct rtentry *, struct sockaddr *);
-int rt_mpath_conflict(struct radix_node_head *, struct rtentry *,
+int rt_mpath_conflict(struct rib_head *, struct rtentry *,
     struct sockaddr *);
 void rtalloc_mpath_fib(struct route *, u_int32_t, u_int);
-#define rtalloc_mpath(_route, _hash) rtalloc_mpath_fib((_route), (_hash), 0)
-struct radix_node *rn_mpath_lookup(void *, void *,
-    struct radix_node_head *);
+struct rtentry *rt_mpath_select(struct rtentry *, uint32_t);
 int rt_mpath_deldup(struct rtentry *, struct rtentry *);
 int	rn4_mpath_inithead(void **, int);
 int	rn6_mpath_inithead(void **, int);
diff --git a/freebsd/sys/net/raw_cb.c b/freebsd/sys/net/raw_cb.c
index 10db8bba..00a199f3 100644
--- a/freebsd/sys/net/raw_cb.c
+++ b/freebsd/sys/net/raw_cb.c
@@ -46,8 +46,8 @@
 #include <sys/systm.h>
 
 #include <net/if.h>
-#include <net/raw_cb.h>
 #include <net/vnet.h>
+#include <net/raw_cb.h>
 
 /*
  * Routines to manage the raw protocol control blocks.
diff --git a/freebsd/sys/net/raw_usrreq.c b/freebsd/sys/net/raw_usrreq.c
index 1030526f..e170ad74 100644
--- a/freebsd/sys/net/raw_usrreq.c
+++ b/freebsd/sys/net/raw_usrreq.c
@@ -48,8 +48,8 @@
 #include <sys/systm.h>
 
 #include <net/if.h>
-#include <net/raw_cb.h>
 #include <net/vnet.h>
+#include <net/raw_cb.h>
 
 MTX_SYSINIT(rawcb_mtx, &rawcb_mtx, "rawcb", MTX_DEF);
 
@@ -85,7 +85,7 @@ raw_input_ext(struct mbuf *m0, struct sockproto *proto, struct sockaddr *src,
 	struct mbuf *m = m0;
 	struct socket *last;
 
-	last = 0;
+	last = NULL;
 	mtx_lock(&rawcb_mtx);
 	LIST_FOREACH(rp, &V_rawcb_list, list) {
 		if (rp->rcb_proto.sp_family != proto->sp_family)
diff --git a/freebsd/sys/net/route.c b/freebsd/sys/net/route.c
index 781d8bb9..3eb05b94 100644
--- a/freebsd/sys/net/route.c
+++ b/freebsd/sys/net/route.c
@@ -45,7 +45,6 @@
 
 #include <rtems/bsd/sys/param.h>
 #include <sys/systm.h>
-#include <sys/syslog.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
@@ -57,8 +56,10 @@
 #include <sys/kernel.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
+#include <net/route_var.h>
 #include <net/vnet.h>
 #include <net/flowtable.h>
 
@@ -75,8 +76,7 @@
 #include <sys/file.h>
 #endif /* __rtems__ */
 
-/* We use 4 bits in the mbuf flags, thus we are limited to 16 FIBS. */
-#define	RT_MAXFIBS	16
+#define	RT_MAXFIBS	UINT16_MAX
 
 /* Kernel config default option. */
 #ifdef ROUTETABLES
@@ -102,17 +102,7 @@ extern void sctp_addr_change(struct ifaddr *ifa, int cmd);
 
 /* This is read-only.. */
 u_int rt_numfibs = RT_NUMFIBS;
-SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, "");
-/*
- * Allow the boot code to allow LESS than RT_MAXFIBS to be used.
- * We can't do more because storage is statically allocated for now.
- * (for compatibility reasons.. this will change. When this changes, code should
- * be refactored to protocol independent parts and protocol dependent parts,
- * probably hanging of domain(9) specific storage to not need the full
- * fib * af RNH allocation etc. but allow tuning the number of tables per
- * address family).
- */
-TUNABLE_INT("net.fibs", &rt_numfibs);
+SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RDTUN, &rt_numfibs, 0, "");
 
 /*
  * By default add routes to all fibs for new interfaces.
@@ -124,25 +114,20 @@ TUNABLE_INT("net.fibs", &rt_numfibs);
  * always work given the fib can be overridden and prefixes can be added
  * from the network stack context.
  */
-u_int rt_add_addr_allfibs = 1;
-SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RW,
-    &rt_add_addr_allfibs, 0, "");
-TUNABLE_INT("net.add_addr_allfibs", &rt_add_addr_allfibs);
+VNET_DEFINE(u_int, rt_add_addr_allfibs) = 1;
+SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RWTUN | CTLFLAG_VNET,
+    &VNET_NAME(rt_add_addr_allfibs), 0, "");
 
 VNET_DEFINE(struct rtstat, rtstat);
 #define	V_rtstat	VNET(rtstat)
 
-VNET_DEFINE(struct radix_node_head *, rt_tables);
+VNET_DEFINE(struct rib_head *, rt_tables);
 #define	V_rt_tables	VNET(rt_tables)
 
 VNET_DEFINE(int, rttrash);		/* routes not in table but not freed */
 #define	V_rttrash	VNET(rttrash)
 
 
-/* compare two sockaddr structures */
-#define	sa_equal(a1, a2) (((a1)->sa_len == (a2)->sa_len) && \
-    (bcmp((a1), (a2), (a1)->sa_len) == 0))
-
 /*
  * Convert a 'struct radix_node *' to a 'struct rtentry *'.
  * The operation can be done safely (in this code) because a
@@ -158,6 +143,28 @@ VNET_DEFINE(int, rttrash);		/* routes not in table but not freed */
 static VNET_DEFINE(uma_zone_t, rtzone);		/* Routing table UMA zone. */
 #define	V_rtzone	VNET(rtzone)
 
+static int rtrequest1_fib_change(struct rib_head *, struct rt_addrinfo *,
+    struct rtentry **, u_int);
+static void rt_setmetrics(const struct rt_addrinfo *, struct rtentry *);
+static int rt_ifdelroute(const struct rtentry *rt, void *arg);
+static struct rtentry *rt_unlinkrte(struct rib_head *rnh,
+    struct rt_addrinfo *info, int *perror);
+static void rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info);
+#ifdef RADIX_MPATH
+static struct radix_node *rt_mpath_unlink(struct rib_head *rnh,
+    struct rt_addrinfo *info, struct rtentry *rto, int *perror);
+#endif
+static int rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info,
+    int flags);
+
+struct if_mtuinfo
+{
+	struct ifnet	*ifp;
+	int		mtu;
+};
+
+static int	if_updatemtu_cb(struct radix_node *, void *);
+
 /*
  * handler for net.my_fibnum
  */
@@ -179,10 +186,10 @@ sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
 SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD,
             NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller");
 
-static __inline struct radix_node_head **
+static __inline struct rib_head **
 rt_tables_get_rnh_ptr(int table, int fam)
 {
-	struct radix_node_head **rnh;
+	struct rib_head **rnh;
 
 	KASSERT(table >= 0 && table < rt_numfibs, ("%s: table out of bounds.",
 	    __func__));
@@ -190,20 +197,32 @@ rt_tables_get_rnh_ptr(int table, int fam)
 	    __func__));
 
 	/* rnh is [fib=0][af=0]. */
-	rnh = (struct radix_node_head **)V_rt_tables;
+	rnh = (struct rib_head **)V_rt_tables;
 	/* Get the offset to the requested table and fam. */
 	rnh += table * (AF_MAX+1) + fam;
 
 	return (rnh);
 }
 
-struct radix_node_head *
+struct rib_head *
 rt_tables_get_rnh(int table, int fam)
 {
 
 	return (*rt_tables_get_rnh_ptr(table, fam));
 }
 
+u_int
+rt_tables_get_gen(int table, int fam)
+{
+	struct rib_head *rnh;
+
+	rnh = *rt_tables_get_rnh_ptr(table, fam);
+	KASSERT(rnh != NULL, ("%s: NULL rib_head pointer table %d fam %d",
+	    __func__, table, fam));
+	return (rnh->rnh_gen);
+}
+
+
 /*
  * route initialization must occur before ip6_init2(), which happenas at
  * SI_ORDER_MIDDLE.
@@ -211,36 +230,72 @@ rt_tables_get_rnh(int table, int fam)
 static void
 route_init(void)
 {
-	struct domain *dom;
-	int max_keylen = 0;
 
 	/* whack the tunable ints into  line. */
 	if (rt_numfibs > RT_MAXFIBS)
 		rt_numfibs = RT_MAXFIBS;
 	if (rt_numfibs == 0)
 		rt_numfibs = 1;
+}
+SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);
 
-	for (dom = domains; dom; dom = dom->dom_next)
-		if (dom->dom_maxrtkey > max_keylen)
-			max_keylen = dom->dom_maxrtkey;
+static int
+rtentry_zinit(void *mem, int size, int how)
+{
+	struct rtentry *rt = mem;
+
+	rt->rt_pksent = counter_u64_alloc(how);
+	if (rt->rt_pksent == NULL)
+		return (ENOMEM);
 
-	rn_init(max_keylen);	/* init all zeroes, all ones, mask table */
+	RT_LOCK_INIT(rt);
+
+	return (0);
+}
+
+static void
+rtentry_zfini(void *mem, int size)
+{
+	struct rtentry *rt = mem;
+
+	RT_LOCK_DESTROY(rt);
+	counter_u64_free(rt->rt_pksent);
+}
+
+static int
+rtentry_ctor(void *mem, int size, void *arg, int how)
+{
+	struct rtentry *rt = mem;
+
+	bzero(rt, offsetof(struct rtentry, rt_endzero));
+	counter_u64_zero(rt->rt_pksent);
+	rt->rt_chain = NULL;
+
+	return (0);
+}
+
+static void
+rtentry_dtor(void *mem, int size, void *arg)
+{
+	struct rtentry *rt = mem;
+
+	RT_UNLOCK_COND(rt);
 }
-SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);
 
 static void
 vnet_route_init(const void *unused __unused)
 {
 	struct domain *dom;
-	struct radix_node_head **rnh;
+	struct rib_head **rnh;
 	int table;
 	int fam;
 
 	V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) *
-	    sizeof(struct radix_node_head *), M_RTABLE, M_WAITOK|M_ZERO);
+	    sizeof(struct rib_head *), M_RTABLE, M_WAITOK|M_ZERO);
 
-	V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL,
-	    NULL, NULL, UMA_ALIGN_PTR, 0);
+	V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry),
+	    rtentry_ctor, rtentry_dtor,
+	    rtentry_zinit, rtentry_zfini, UMA_ALIGN_PTR, 0);
 	for (dom = domains; dom; dom = dom->dom_next) {
 		if (dom->dom_rtattach == NULL)
 			continue;
@@ -250,15 +305,10 @@ vnet_route_init(const void *unused __unused)
 			if (table != 0 && fam != AF_INET6 && fam != AF_INET)
 				break;
 
-			/*
-			 * XXX MRT rtattach will be also called from
-			 * vfs_export.c but the offset will be 0 (only for
-			 * AF_INET and AF_INET6 which don't need it anyhow).
-			 */
 			rnh = rt_tables_get_rnh_ptr(table, fam);
 			if (rnh == NULL)
 				panic("%s: rnh NULL", __func__);
-			dom->dom_rtattach((void **)rnh, dom->dom_rtoffset);
+			dom->dom_rtattach((void **)rnh, 0);
 		}
 	}
 }
@@ -272,7 +322,7 @@ vnet_route_uninit(const void *unused __unused)
 	int table;
 	int fam;
 	struct domain *dom;
-	struct radix_node_head **rnh;
+	struct rib_head **rnh;
 
 	for (dom = domains; dom; dom = dom->dom_next) {
 		if (dom->dom_rtdetach == NULL)
@@ -287,14 +337,68 @@ vnet_route_uninit(const void *unused __unused)
 			rnh = rt_tables_get_rnh_ptr(table, fam);
 			if (rnh == NULL)
 				panic("%s: rnh NULL", __func__);
-			dom->dom_rtdetach((void **)rnh, dom->dom_rtoffset);
+			dom->dom_rtdetach((void **)rnh, 0);
 		}
 	}
+
+	free(V_rt_tables, M_RTABLE);
+	uma_zdestroy(V_rtzone);
 }
-VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
+VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST,
     vnet_route_uninit, 0);
 #endif
 
+struct rib_head *
+rt_table_init(int offset)
+{
+	struct rib_head *rh;
+
+	rh = malloc(sizeof(struct rib_head), M_RTABLE, M_WAITOK | M_ZERO);
+
+	/* TODO: These details should be hidded inside radix.c */
+	/* Init masks tree */
+	rn_inithead_internal(&rh->head, rh->rnh_nodes, offset);
+	rn_inithead_internal(&rh->rmhead.head, rh->rmhead.mask_nodes, 0);
+	rh->head.rnh_masks = &rh->rmhead;
+
+	/* Init locks */
+	rw_init(&rh->rib_lock, "rib head lock");
+
+	/* Finally, set base callbacks */
+	rh->rnh_addaddr = rn_addroute;
+	rh->rnh_deladdr = rn_delete;
+	rh->rnh_matchaddr = rn_match;
+	rh->rnh_lookup = rn_lookup;
+	rh->rnh_walktree = rn_walktree;
+	rh->rnh_walktree_from = rn_walktree_from;
+
+	return (rh);
+}
+
+static int
+rt_freeentry(struct radix_node *rn, void *arg)
+{
+	struct radix_head * const rnh = arg;
+	struct radix_node *x;
+
+	x = (struct radix_node *)rn_delete(rn + 2, NULL, rnh);
+	if (x != NULL)
+		R_Free(x);
+	return (0);
+}
+
+void
+rt_table_destroy(struct rib_head *rh)
+{
+
+	rn_walktree(&rh->rmhead.head, rt_freeentry, &rh->rmhead.head);
+
+	/* Assume table is already empty */
+	rw_destroy(&rh->rib_lock);
+	free(rh, M_RTABLE);
+}
+
+
 #ifndef _SYS_SYSPROTO_H_
 struct setfib_args {
 	int     fibnum;
@@ -335,35 +439,6 @@ setfib(int fibnum)
  * Packet routing routines.
  */
 void
-rtalloc(struct route *ro)
-{
-
-	rtalloc_ign_fib(ro, 0UL, RT_DEFAULT_FIB);
-}
-
-void
-rtalloc_fib(struct route *ro, u_int fibnum)
-{
-	rtalloc_ign_fib(ro, 0UL, fibnum);
-}
-
-void
-rtalloc_ign(struct route *ro, u_long ignore)
-{
-	struct rtentry *rt;
-
-	if ((rt = ro->ro_rt) != NULL) {
-		if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
-			return;
-		RTFREE(rt);
-		ro->ro_rt = NULL;
-	}
-	ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, RT_DEFAULT_FIB);
-	if (ro->ro_rt)
-		RT_UNLOCK(ro->ro_rt);
-}
-
-void
 rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum)
 {
 	struct rtentry *rt;
@@ -396,49 +471,32 @@ struct rtentry *
 rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags,
 		    u_int fibnum)
 {
-	struct radix_node_head *rnh;
+	struct rib_head *rh;
 	struct radix_node *rn;
 	struct rtentry *newrt;
 	struct rt_addrinfo info;
 	int err = 0, msgtype = RTM_MISS;
-	int needlock;
 
 	KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum"));
-	switch (dst->sa_family) {
-	case AF_INET6:
-	case AF_INET:
-		/* We support multiple FIBs. */
-		break;
-	default:
-		fibnum = RT_DEFAULT_FIB;
-		break;
-	}
-	rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
+	rh = rt_tables_get_rnh(fibnum, dst->sa_family);
 	newrt = NULL;
-	if (rnh == NULL)
+	if (rh == NULL)
 		goto miss;
 
 	/*
 	 * Look up the address in the table for that Address Family
 	 */
-	needlock = !(ignflags & RTF_RNH_LOCKED);
-	if (needlock)
-		RADIX_NODE_HEAD_RLOCK(rnh);
-#ifdef INVARIANTS	
-	else
-		RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
-#endif
-	rn = rnh->rnh_matchaddr(dst, rnh);
+	RIB_RLOCK(rh);
+	rn = rh->rnh_matchaddr(dst, &rh->head);
 	if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
 		newrt = RNTORT(rn);
 		RT_LOCK(newrt);
 		RT_ADDREF(newrt);
-		if (needlock)
-			RADIX_NODE_HEAD_RUNLOCK(rnh);
-		goto done;
+		RIB_RUNLOCK(rh);
+		return (newrt);
 
-	} else if (needlock)
-		RADIX_NODE_HEAD_RUNLOCK(rnh);
+	} else
+		RIB_RUNLOCK(rh);
 	
 	/*
 	 * Either we hit the root or couldn't find any match,
@@ -457,10 +515,7 @@ miss:
 		bzero(&info, sizeof(info));
 		info.rti_info[RTAX_DST] = dst;
 		rt_missmsg_fib(msgtype, &info, 0, err, fibnum);
-	}	
-done:
-	if (newrt)
-		RT_LOCK_ASSERT(newrt);
+	}
 	return (newrt);
 }
 
@@ -471,7 +526,7 @@ done:
 void
 rtfree(struct rtentry *rt)
 {
-	struct radix_node_head *rnh;
+	struct rib_head *rnh;
 
 	KASSERT(rt != NULL,("%s: NULL rt", __func__));
 	rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family);
@@ -499,7 +554,7 @@ rtfree(struct rtentry *rt)
 	 * on the entry so that the code below reclaims the storage.
 	 */
 	if (rt->rt_refcnt == 0 && rnh->rnh_close)
-		rnh->rnh_close((struct radix_node *)rt, rnh);
+		rnh->rnh_close((struct radix_node *)rt, &rnh->head);
 
 	/*
 	 * If we are no longer "up" (and ref == 0)
@@ -531,12 +586,11 @@ rtfree(struct rtentry *rt)
 		 * This also frees the gateway, as they are always malloc'd
 		 * together.
 		 */
-		Free(rt_key(rt));
+		R_Free(rt_key(rt));
 
 		/*
 		 * and the rtentry itself of course
 		 */
-		RT_LOCK_DESTROY(rt);
 		uma_zfree(V_rtzone, rt);
 		return;
 	}
@@ -552,17 +606,6 @@ done:
  * message from the network layer.
  */
 void
-rtredirect(struct sockaddr *dst,
-	struct sockaddr *gateway,
-	struct sockaddr *netmask,
-	int flags,
-	struct sockaddr *src)
-{
-
-	rtredirect_fib(dst, gateway, netmask, flags, src, RT_DEFAULT_FIB);
-}
-
-void
 rtredirect_fib(struct sockaddr *dst,
 	struct sockaddr *gateway,
 	struct sockaddr *netmask,
@@ -570,12 +613,12 @@ rtredirect_fib(struct sockaddr *dst,
 	struct sockaddr *src,
 	u_int fibnum)
 {
-	struct rtentry *rt, *rt0 = NULL;
+	struct rtentry *rt;
 	int error = 0;
 	short *stat = NULL;
 	struct rt_addrinfo info;
 	struct ifaddr *ifa;
-	struct radix_node_head *rnh;
+	struct rib_head *rnh;
 
 	ifa = NULL;
 	rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
@@ -585,7 +628,7 @@ rtredirect_fib(struct sockaddr *dst,
 	}
 
 	/* verify the gateway is directly reachable */
-	if ((ifa = ifa_ifwithnet_fib(gateway, 0, fibnum)) == NULL) {
+	if ((ifa = ifa_ifwithnet(gateway, 0, fibnum)) == NULL) {
 		error = ENETUNREACH;
 		goto out;
 	}
@@ -596,13 +639,20 @@ rtredirect_fib(struct sockaddr *dst,
 	 * we have a routing loop, perhaps as a result of an interface
 	 * going down recently.
 	 */
-	if (!(flags & RTF_DONE) && rt &&
-	     (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
-		error = EINVAL;
-	else if (ifa_ifwithaddr_check(gateway))
+	if (!(flags & RTF_DONE) && rt) {
+		if (!sa_equal(src, rt->rt_gateway)) {
+			error = EINVAL;
+			goto done;
+		}
+		if (rt->rt_ifa != ifa && ifa->ifa_addr->sa_family != AF_LINK) {
+			error = EINVAL;
+			goto done;
+		}
+	}
+	if ((flags & RTF_GATEWAY) && ifa_ifwithaddr_check(gateway)) {
 		error = EHOSTUNREACH;
-	if (error)
 		goto done;
+	}
 	/*
 	 * Create a new entry if we just got back a wildcard entry
 	 * or the lookup failed.  This is necessary for hosts
@@ -622,36 +672,31 @@ rtredirect_fib(struct sockaddr *dst,
 			 * Create new route, rather than smashing route to net.
 			 */
 		create:
-			rt0 = rt;
-			rt = NULL;
+			if (rt != NULL)
+				RTFREE_LOCKED(rt);
 		
-			flags |=  RTF_GATEWAY | RTF_DYNAMIC;
+			flags |= RTF_DYNAMIC;
 			bzero((caddr_t)&info, sizeof(info));
 			info.rti_info[RTAX_DST] = dst;
 			info.rti_info[RTAX_GATEWAY] = gateway;
 			info.rti_info[RTAX_NETMASK] = netmask;
 			info.rti_ifa = ifa;
 			info.rti_flags = flags;
-			if (rt0 != NULL)
-				RT_UNLOCK(rt0);	/* drop lock to avoid LOR with RNH */
 			error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum);
 			if (rt != NULL) {
 				RT_LOCK(rt);
-				if (rt0 != NULL)
-					EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst);
 				flags = rt->rt_flags;
 			}
-			if (rt0 != NULL)
-				RTFREE(rt0);
 			
 			stat = &V_rtstat.rts_dynamic;
 		} else {
-			struct rtentry *gwrt;
 
 			/*
 			 * Smash the current notion of the gateway to
 			 * this destination.  Should check about netmask!!!
 			 */
+			if ((flags & RTF_GATEWAY) == 0)
+				rt->rt_flags &= ~RTF_GATEWAY;
 			rt->rt_flags |= RTF_MODIFIED;
 			flags |= RTF_MODIFIED;
 			stat = &V_rtstat.rts_newgateway;
@@ -659,13 +704,10 @@ rtredirect_fib(struct sockaddr *dst,
 			 * add the key and gateway (in one malloc'd chunk).
 			 */
 			RT_UNLOCK(rt);
-			RADIX_NODE_HEAD_LOCK(rnh);
+			RIB_WLOCK(rnh);
 			RT_LOCK(rt);
 			rt_setgate(rt, rt_key(rt), gateway);
-			gwrt = rtalloc1(gateway, 1, RTF_RNH_LOCKED);
-			RADIX_NODE_HEAD_UNLOCK(rnh);
-			EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst);
-			RTFREE_LOCKED(gwrt);
+			RIB_WUNLOCK(rnh);
 		}
 	} else
 		error = EHOSTUNREACH;
@@ -687,13 +729,6 @@ out:
 		ifa_free(ifa);
 }
 
-int
-rtioctl(u_long req, caddr_t data)
-{
-
-	return (rtioctl_fib(req, data, RT_DEFAULT_FIB));
-}
-
 /*
  * Routing table ioctl interface.
  */
@@ -715,21 +750,11 @@ rtioctl_fib(u_long req, caddr_t data, u_int fibnum)
 #endif /* INET */
 }
 
-/*
- * For both ifa_ifwithroute() routines, 'ifa' is returned referenced.
- */
 struct ifaddr *
-ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
-{
-
-	return (ifa_ifwithroute_fib(flags, dst, gateway, RT_DEFAULT_FIB));
-}
-
-struct ifaddr *
-ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway,
+ifa_ifwithroute(int flags, const struct sockaddr *dst, struct sockaddr *gateway,
 				u_int fibnum)
 {
-	register struct ifaddr *ifa;
+	struct ifaddr *ifa;
 	int not_found = 0;
 
 	if ((flags & RTF_GATEWAY) == 0) {
@@ -742,7 +767,7 @@ ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway,
 		 */
 		ifa = NULL;
 		if (flags & RTF_HOST)
-			ifa = ifa_ifwithdstaddr_fib(dst, fibnum);
+			ifa = ifa_ifwithdstaddr(dst, fibnum);
 		if (ifa == NULL)
 			ifa = ifa_ifwithaddr(gateway);
 	} else {
@@ -751,12 +776,12 @@ ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway,
 		 * or host, the gateway may still be on the
 		 * other end of a pt to pt link.
 		 */
-		ifa = ifa_ifwithdstaddr_fib(gateway, fibnum);
+		ifa = ifa_ifwithdstaddr(gateway, fibnum);
 	}
 	if (ifa == NULL)
-		ifa = ifa_ifwithnet_fib(gateway, 0, fibnum);
+		ifa = ifa_ifwithnet(gateway, 0, fibnum);
 	if (ifa == NULL) {
-		struct rtentry *rt = rtalloc1_fib(gateway, 0, RTF_RNH_LOCKED, fibnum);
+		struct rtentry *rt = rtalloc1_fib(gateway, 0, 0, fibnum);
 		if (rt == NULL)
 			return (NULL);
 		/*
@@ -800,19 +825,6 @@ ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway,
  * all the bits of info needed
  */
 int
-rtrequest(int req,
-	struct sockaddr *dst,
-	struct sockaddr *gateway,
-	struct sockaddr *netmask,
-	int flags,
-	struct rtentry **ret_nrt)
-{
-
-	return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt,
-	    RT_DEFAULT_FIB));
-}
-
-int
 rtrequest_fib(int req,
 	struct sockaddr *dst,
 	struct sockaddr *gateway,
@@ -834,6 +846,443 @@ rtrequest_fib(int req,
 	return rtrequest1_fib(req, &info, ret_nrt, fibnum);
 }
 
+
+/*
+ * Copy most of @rt data into @info.
+ *
+ * If @flags contains NHR_COPY, copies dst,netmask and gw to the
+ * pointers specified by @info structure. Assume such pointers
+ * are zeroed sockaddr-like structures with sa_len field initialized
+ * to reflect size of the provided buffer. if no NHR_COPY is specified,
+ * point dst,netmask and gw @info fields to appropriate @rt values.
+ *
+ * if @flags contains NHR_REF, do refcouting on rt_ifp.
+ *
+ * Returns 0 on success.
+ */
+int
+rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, int flags)
+{
+	struct rt_metrics *rmx;
+	struct sockaddr *src, *dst;
+	int sa_len;
+
+	if (flags & NHR_COPY) {
+		/* Copy destination if dst is non-zero */
+		src = rt_key(rt);
+		dst = info->rti_info[RTAX_DST];
+		sa_len = src->sa_len;
+		if (dst != NULL) {
+			if (src->sa_len > dst->sa_len)
+				return (ENOMEM);
+			memcpy(dst, src, src->sa_len);
+			info->rti_addrs |= RTA_DST;
+		}
+
+		/* Copy mask if set && dst is non-zero */
+		src = rt_mask(rt);
+		dst = info->rti_info[RTAX_NETMASK];
+		if (src != NULL && dst != NULL) {
+
+			/*
+			 * Radix stores different value in sa_len,
+			 * assume rt_mask() to have the same length
+			 * as rt_key()
+			 */
+			if (sa_len > dst->sa_len)
+				return (ENOMEM);
+			memcpy(dst, src, src->sa_len);
+			info->rti_addrs |= RTA_NETMASK;
+		}
+
+		/* Copy gateway is set && dst is non-zero */
+		src = rt->rt_gateway;
+		dst = info->rti_info[RTAX_GATEWAY];
+		if ((rt->rt_flags & RTF_GATEWAY) && src != NULL && dst != NULL){
+			if (src->sa_len > dst->sa_len)
+				return (ENOMEM);
+			memcpy(dst, src, src->sa_len);
+			info->rti_addrs |= RTA_GATEWAY;
+		}
+	} else {
+		info->rti_info[RTAX_DST] = rt_key(rt);
+		info->rti_addrs |= RTA_DST;
+		if (rt_mask(rt) != NULL) {
+			info->rti_info[RTAX_NETMASK] = rt_mask(rt);
+			info->rti_addrs |= RTA_NETMASK;
+		}
+		if (rt->rt_flags & RTF_GATEWAY) {
+			info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+			info->rti_addrs |= RTA_GATEWAY;
+		}
+	}
+
+	rmx = info->rti_rmx;
+	if (rmx != NULL) {
+		info->rti_mflags |= RTV_MTU;
+		rmx->rmx_mtu = rt->rt_mtu;
+	}
+
+	info->rti_flags = rt->rt_flags;
+	info->rti_ifp = rt->rt_ifp;
+	info->rti_ifa = rt->rt_ifa;
+
+	if (flags & NHR_REF) {
+		/* Do 'traditional' refcouting */
+		if_ref(info->rti_ifp);
+	}
+
+	return (0);
+}
+
+/*
+ * Lookups up route entry for @dst in RIB database for fib @fibnum.
+ * Exports entry data to @info using rt_exportinfo().
+ *
+ * if @flags contains NHR_REF, refcouting is performed on rt_ifp.
+ *   All references can be released later by calling rib_free_info()
+ *
+ * Returns 0 on success.
+ * Returns ENOENT for lookup failure, ENOMEM for export failure.
+ */
+int
+rib_lookup_info(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags,
+    uint32_t flowid, struct rt_addrinfo *info)
+{
+	struct rib_head *rh;
+	struct radix_node *rn;
+	struct rtentry *rt;
+	int error;
+
+	KASSERT((fibnum < rt_numfibs), ("rib_lookup_rte: bad fibnum"));
+	rh = rt_tables_get_rnh(fibnum, dst->sa_family);
+	if (rh == NULL)
+		return (ENOENT);
+
+	RIB_RLOCK(rh);
+	rn = rh->rnh_matchaddr(__DECONST(void *, dst), &rh->head);
+	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
+		rt = RNTORT(rn);
+		/* Ensure route & ifp is UP */
+		if (RT_LINK_IS_UP(rt->rt_ifp)) {
+			flags = (flags & NHR_REF) | NHR_COPY;
+			error = rt_exportinfo(rt, info, flags);
+			RIB_RUNLOCK(rh);
+
+			return (error);
+		}
+	}
+	RIB_RUNLOCK(rh);
+
+	return (ENOENT);
+}
+
+/*
+ * Releases all references acquired by rib_lookup_info() when
+ * called with NHR_REF flags.
+ */
+void
+rib_free_info(struct rt_addrinfo *info)
+{
+
+	if_rele(info->rti_ifp);
+}
+
+/*
+ * Iterates over all existing fibs in system calling
+ *  @setwa_f function prior to traversing each fib.
+ *  Calls @wa_f function for each element in current fib.
+ * If af is not AF_UNSPEC, iterates over fibs in particular
+ * address family.
+ */
+void
+rt_foreach_fib_walk(int af, rt_setwarg_t *setwa_f, rt_walktree_f_t *wa_f,
+    void *arg)
+{
+	struct rib_head *rnh;
+	uint32_t fibnum;
+	int i;
+
+	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
+		/* Do we want some specific family? */
+		if (af != AF_UNSPEC) {
+			rnh = rt_tables_get_rnh(fibnum, af);
+			if (rnh == NULL)
+				continue;
+			if (setwa_f != NULL)
+				setwa_f(rnh, fibnum, af, arg);
+
+			RIB_WLOCK(rnh);
+			rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f,arg);
+			RIB_WUNLOCK(rnh);
+			continue;
+		}
+
+		for (i = 1; i <= AF_MAX; i++) {
+			rnh = rt_tables_get_rnh(fibnum, i);
+			if (rnh == NULL)
+				continue;
+			if (setwa_f != NULL)
+				setwa_f(rnh, fibnum, i, arg);
+
+			RIB_WLOCK(rnh);
+			rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f,arg);
+			RIB_WUNLOCK(rnh);
+		}
+	}
+}
+
+struct rt_delinfo
+{
+	struct rt_addrinfo info;
+	struct rib_head *rnh;
+	struct rtentry *head;
+};
+
+/*
+ * Conditionally unlinks @rn from radix tree based
+ * on info data passed in @arg.
+ */
+static int
+rt_checkdelroute(struct radix_node *rn, void *arg)
+{
+	struct rt_delinfo *di;
+	struct rt_addrinfo *info;
+	struct rtentry *rt;
+	int error;
+
+	di = (struct rt_delinfo *)arg;
+	rt = (struct rtentry *)rn;
+	info = &di->info;
+	error = 0;
+
+	info->rti_info[RTAX_DST] = rt_key(rt);
+	info->rti_info[RTAX_NETMASK] = rt_mask(rt);
+	info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+
+	rt = rt_unlinkrte(di->rnh, info, &error);
+	if (rt == NULL) {
+		/* Either not allowed or not matched. Skip entry */
+		return (0);
+	}
+
+	/* Entry was unlinked. Add to the list and return */
+	rt->rt_chain = di->head;
+	di->head = rt;
+
+	return (0);
+}
+
+/*
+ * Iterates over all existing fibs in system.
+ * Deletes each element for which @filter_f function returned
+ * non-zero value.
+ * If @af is not AF_UNSPEC, iterates over fibs in particular
+ * address family.
+ */
+void
+rt_foreach_fib_walk_del(int af, rt_filter_f_t *filter_f, void *arg)
+{
+	struct rib_head *rnh;
+	struct rt_delinfo di;
+	struct rtentry *rt;
+	uint32_t fibnum;
+	int i, start, end;
+
+	bzero(&di, sizeof(di));
+	di.info.rti_filter = filter_f;
+	di.info.rti_filterdata = arg;
+
+	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
+		/* Do we want some specific family? */
+		if (af != AF_UNSPEC) {
+			start = af;
+			end = af;
+		} else {
+			start = 1;
+			end = AF_MAX;
+		}
+
+		for (i = start; i <= end; i++) {
+			rnh = rt_tables_get_rnh(fibnum, i);
+			if (rnh == NULL)
+				continue;
+			di.rnh = rnh;
+
+			RIB_WLOCK(rnh);
+			rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di);
+			RIB_WUNLOCK(rnh);
+
+			if (di.head == NULL)
+				continue;
+
+			/* We might have something to reclaim */
+			while (di.head != NULL) {
+				rt = di.head;
+				di.head = rt->rt_chain;
+				rt->rt_chain = NULL;
+
+				/* TODO std rt -> rt_addrinfo export */
+				di.info.rti_info[RTAX_DST] = rt_key(rt);
+				di.info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+
+				rt_notifydelete(rt, &di.info);
+				RTFREE_LOCKED(rt);
+			}
+
+		}
+	}
+}
+
+/*
+ * Delete Routes for a Network Interface
+ *
+ * Called for each routing entry via the rnh->rnh_walktree() call above
+ * to delete all route entries referencing a detaching network interface.
+ *
+ * Arguments:
+ *	rt	pointer to rtentry
+ *	arg	argument passed to rnh->rnh_walktree() - detaching interface
+ *
+ * Returns:
+ *	0	successful
+ *	errno	failed - reason indicated
+ */
+static int
+rt_ifdelroute(const struct rtentry *rt, void *arg)
+{
+	struct ifnet	*ifp = arg;
+
+	if (rt->rt_ifp != ifp)
+		return (0);
+
+	/*
+	 * Protect (sorta) against walktree recursion problems
+	 * with cloned routes
+	 */
+	if ((rt->rt_flags & RTF_UP) == 0)
+		return (0);
+
+	return (1);
+}
+
+/*
+ * Delete all remaining routes using this interface
+ * Unfortuneatly the only way to do this is to slog through
+ * the entire routing table looking for routes which point
+ * to this interface...oh well...
+ */
+void
+rt_flushifroutes_af(struct ifnet *ifp, int af)
+{
+	KASSERT((af >= 1 && af <= AF_MAX), ("%s: af %d not >= 1 and <= %d",
+	    __func__, af, AF_MAX));
+
+	rt_foreach_fib_walk_del(af, rt_ifdelroute, ifp);
+}
+
+void
+rt_flushifroutes(struct ifnet *ifp)
+{
+
+	rt_foreach_fib_walk_del(AF_UNSPEC, rt_ifdelroute, ifp);
+}
+
+/*
+ * Conditionally unlinks rtentry matching data inside @info from @rnh.
+ * Returns unlinked, locked and referenced @rtentry on success,
+ * Returns NULL and sets @perror to:
+ * ESRCH - if prefix was not found,
+ * EADDRINUSE - if trying to delete PINNED route without appropriate flag.
+ * ENOENT - if supplied filter function returned 0 (not matched).
+ */
+static struct rtentry *
+rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, int *perror)
+{
+	struct sockaddr *dst, *netmask;
+	struct rtentry *rt;
+	struct radix_node *rn;
+
+	dst = info->rti_info[RTAX_DST];
+	netmask = info->rti_info[RTAX_NETMASK];
+
+	rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head);
+	if (rt == NULL) {
+		*perror = ESRCH;
+		return (NULL);
+	}
+
+	if ((info->rti_flags & RTF_PINNED) == 0) {
+		/* Check if target route can be deleted */
+		if (rt->rt_flags & RTF_PINNED) {
+			*perror = EADDRINUSE;
+			return (NULL);
+		}
+	}
+
+	if (info->rti_filter != NULL) {
+		if (info->rti_filter(rt, info->rti_filterdata) == 0) {
+			/* Not matched */
+			*perror = ENOENT;
+			return (NULL);
+		}
+
+		/*
+		 * Filter function requested rte deletion.
+		 * Ease the caller work by filling in remaining info
+		 * from that particular entry.
+		 */
+		info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+	}
+
+	/*
+	 * Remove the item from the tree and return it.
+	 * Complain if it is not there and do no more processing.
+	 */
+	*perror = ESRCH;
+#ifdef RADIX_MPATH
+	if (rt_mpath_capable(rnh))
+		rn = rt_mpath_unlink(rnh, info, rt, perror);
+	else
+#endif
+	rn = rnh->rnh_deladdr(dst, netmask, &rnh->head);
+	if (rn == NULL)
+		return (NULL);
+
+	if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
+		panic ("rtrequest delete");
+
+	rt = RNTORT(rn);
+	RT_LOCK(rt);
+	RT_ADDREF(rt);
+	rt->rt_flags &= ~RTF_UP;
+
+	*perror = 0;
+
+	return (rt);
+}
+
+static void
+rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info)
+{
+	struct ifaddr *ifa;
+
+	/*
+	 * give the protocol a chance to keep things in sync.
+	 */
+	ifa = rt->rt_ifa;
+	if (ifa != NULL && ifa->ifa_rtrequest != NULL)
+		ifa->ifa_rtrequest(RTM_DELETE, rt, info);
+
+	/*
+	 * One more rtentry floating around that is not
+	 * linked to the routing table. rttrash will be decremented
+	 * when RTFREE(rt) is eventually called.
+	 */
+	V_rttrash++;
+}
+
+
 /*
  * These (questionable) definitions of apparent local variables apply
  * to the next two functions.  XXXXXX!!!
@@ -845,13 +1294,6 @@ rtrequest_fib(int req,
 #define	ifpaddr	info->rti_info[RTAX_IFP]
 #define	flags	info->rti_flags
 
-int
-rt_getifa(struct rt_addrinfo *info)
-{
-
-	return (rt_getifa_fib(info, RT_DEFAULT_FIB));
-}
-
 /*
  * Look up rt_addrinfo for a specific fib.  Note that if rti_ifa is defined,
  * it will be referenced so the caller must free it.
@@ -868,7 +1310,7 @@ rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
 	 */
 	if (info->rti_ifp == NULL && ifpaddr != NULL &&
 	    ifpaddr->sa_family == AF_LINK &&
-	    (ifa = ifa_ifwithnet_fib(ifpaddr, 0, fibnum)) != NULL) {
+	    (ifa = ifa_ifwithnet(ifpaddr, 0, fibnum)) != NULL) {
 		info->rti_ifp = ifa->ifa_ifp;
 		ifa_free(ifa);
 	}
@@ -882,10 +1324,10 @@ rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
 		if (sa != NULL && info->rti_ifp != NULL)
 			info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
 		else if (dst != NULL && gateway != NULL)
-			info->rti_ifa = ifa_ifwithroute_fib(flags, dst, gateway,
+			info->rti_ifa = ifa_ifwithroute(flags, dst, gateway,
 							fibnum);
 		else if (sa != NULL)
-			info->rti_ifa = ifa_ifwithroute_fib(flags, sa, sa,
+			info->rti_ifa = ifa_ifwithroute(flags, sa, sa,
 							fibnum);
 	}
 	if ((ifa = info->rti_ifa) != NULL) {
@@ -896,94 +1338,70 @@ rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
 	return (error);
 }
 
-/*
- * Expunges references to a route that's about to be reclaimed.
- * The route must be locked.
- */
-int
-rtexpunge(struct rtentry *rt)
+static int
+if_updatemtu_cb(struct radix_node *rn, void *arg)
 {
-#if !defined(RADIX_MPATH)
-	struct radix_node *rn;
-#else
-	struct rt_addrinfo info;
-	int fib;
-	struct rtentry *rt0;
-#endif
-	struct radix_node_head *rnh;
-	struct ifaddr *ifa;
-	int error = 0;
-
-	/*
-	 * Find the correct routing tree to use for this Address Family
-	 */
-	rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family);
-	RT_LOCK_ASSERT(rt);
-	if (rnh == NULL)
-		return (EAFNOSUPPORT);
-	RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
+	struct rtentry *rt;
+	struct if_mtuinfo *ifmtu;
 
-#ifdef RADIX_MPATH
-	fib = rt->rt_fibnum;
-	bzero(&info, sizeof(info));
-	info.rti_ifp = rt->rt_ifp;
-	info.rti_flags = RTF_RNH_LOCKED;
-	info.rti_info[RTAX_DST] = rt_key(rt);
-	info.rti_info[RTAX_GATEWAY] = rt->rt_ifa->ifa_addr;
+	rt = (struct rtentry *)rn;
+	ifmtu = (struct if_mtuinfo *)arg;
 
-	RT_UNLOCK(rt);
-	error = rtrequest1_fib(RTM_DELETE, &info, &rt0, fib);
+	if (rt->rt_ifp != ifmtu->ifp)
+		return (0);
 
-	if (error == 0 && rt0 != NULL) {
-		rt = rt0;
-		RT_LOCK(rt);
-	} else if (error != 0) {
-		RT_LOCK(rt);
-		return (error);
+	if (rt->rt_mtu >= ifmtu->mtu) {
+		/* We have to decrease mtu regardless of flags */
+		rt->rt_mtu = ifmtu->mtu;
+		return (0);
 	}
-#else
+
 	/*
-	 * Remove the item from the tree; it should be there,
-	 * but when callers invoke us blindly it may not (sigh).
+	 * New MTU is bigger. Check if are allowed to alter it
 	 */
-	rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh);
-	if (rn == NULL) {
-		error = ESRCH;
-		goto bad;
+	if ((rt->rt_flags & (RTF_FIXEDMTU | RTF_GATEWAY | RTF_HOST)) != 0) {
+
+		/*
+		 * Skip routes with user-supplied MTU and
+		 * non-interface routes
+		 */
+		return (0);
 	}
-	KASSERT((rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) == 0,
-		("unexpected flags 0x%x", rn->rn_flags));
-	KASSERT(rt == RNTORT(rn),
-		("lookup mismatch, rt %p rn %p", rt, rn));
-#endif /* RADIX_MPATH */
 
-	rt->rt_flags &= ~RTF_UP;
+	/* We are safe to update route MTU */
+	rt->rt_mtu = ifmtu->mtu;
 
-	/*
-	 * Give the protocol a chance to keep things in sync.
-	 */
-	if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) {
-		struct rt_addrinfo info;
+	return (0);
+}
 
-		bzero((caddr_t)&info, sizeof(info));
-		info.rti_flags = rt->rt_flags;
-		info.rti_info[RTAX_DST] = rt_key(rt);
-		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
-		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
-		ifa->ifa_rtrequest(RTM_DELETE, rt, &info);
-	}
+void
+rt_updatemtu(struct ifnet *ifp)
+{
+	struct if_mtuinfo ifmtu;
+	struct rib_head *rnh;
+	int i, j;
+
+	ifmtu.ifp = ifp;
 
 	/*
-	 * one more rtentry floating around that is not
-	 * linked to the routing table.
+	 * Try to update rt_mtu for all routes using this interface
+	 * Unfortunately the only way to do this is to traverse all
+	 * routing tables in all fibs/domains.
 	 */
-	V_rttrash++;
-#if !defined(RADIX_MPATH)
-bad:
-#endif
-	return (error);
+	for (i = 1; i <= AF_MAX; i++) {
+		ifmtu.mtu = if_getmtu_family(ifp, i);
+		for (j = 0; j < rt_numfibs; j++) {
+			rnh = rt_tables_get_rnh(j, i);
+			if (rnh == NULL)
+				continue;
+			RIB_WLOCK(rnh);
+			rnh->rnh_walktree(&rnh->head, if_updatemtu_cb, &ifmtu);
+			RIB_WUNLOCK(rnh);
+		}
+	}
 }
 
+
 #if 0
 int p_sockaddr(char *buf, int buflen, struct sockaddr *s);
 int rt_print(char *buf, int buflen, struct rtentry *rt);
@@ -1036,26 +1454,32 @@ rt_print(char *buf, int buflen, struct rtentry *rt)
 #endif
 
 #ifdef RADIX_MPATH
-static int
-rn_mpath_update(int req, struct rt_addrinfo *info,
-    struct radix_node_head *rnh, struct rtentry **ret_nrt)
+/*
+ * Deletes key for single-path routes, unlinks rtentry with
+ * gateway specified in @info from multi-path routes.
+ *
+ * Returnes unlinked entry. In case of failure, returns NULL
+ * and sets @perror to ESRCH.
+ */
+static struct radix_node *
+rt_mpath_unlink(struct rib_head *rnh, struct rt_addrinfo *info,
+    struct rtentry *rto, int *perror)
 {
 	/*
 	 * if we got multipath routes, we require users to specify
 	 * a matching RTAX_GATEWAY.
 	 */
-	struct rtentry *rt, *rto = NULL;
-	register struct radix_node *rn;
-	int error = 0;
+	struct rtentry *rt; // *rto = NULL;
+	struct radix_node *rn;
+	struct sockaddr *gw;
 
-	rn = rnh->rnh_lookup(dst, netmask, rnh);
-	if (rn == NULL)
-		return (ESRCH);
-	rto = rt = RNTORT(rn);
+	gw = info->rti_info[RTAX_GATEWAY];
+	rt = rt_mpath_matchgate(rto, gw);
+	if (rt == NULL) {
+		*perror = ESRCH;
+		return (NULL);
+	}
 
-	rt = rt_mpath_matchgate(rt, gateway);
-	if (rt == NULL)
-		return (ESRCH);
 	/*
 	 * this is the first entry in the chain
 	 */
@@ -1078,67 +1502,95 @@ rn_mpath_update(int req, struct rt_addrinfo *info,
 			 * check the case when there is only
 			 * one route in the chain.  
 			 */
-			if (gateway &&
-			    (rt->rt_gateway->sa_len != gateway->sa_len ||
-				memcmp(rt->rt_gateway, gateway, gateway->sa_len)))
-				error = ESRCH;
-			else {
-				/*
-				 * remove from tree before returning it
-				 * to the caller
-				 */
-				rn = rnh->rnh_deladdr(dst, netmask, rnh);
-				KASSERT(rt == RNTORT(rn), ("radix node disappeared"));
-				goto gwdelete;
+			if (gw &&
+			    (rt->rt_gateway->sa_len != gw->sa_len ||
+				memcmp(rt->rt_gateway, gw, gw->sa_len))) {
+				*perror = ESRCH;
+				return (NULL);
 			}
-			
 		}
+
 		/*
 		 * use the normal delete code to remove
 		 * the first entry
 		 */
-		if (req != RTM_DELETE) 
-			goto nondelete;
-
-		error = ENOENT;
-		goto done;
+		rn = rnh->rnh_deladdr(dst, netmask, &rnh->head);
+		*perror = 0;
+		return (rn);
 	}
 		
 	/*
 	 * if the entry is 2nd and on up
 	 */
-	if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt))
+	if (rt_mpath_deldup(rto, rt) == 0)
 		panic ("rtrequest1: rt_mpath_deldup");
-gwdelete:
-	RT_LOCK(rt);
-	RT_ADDREF(rt);
-	if (req == RTM_DELETE) {
-		rt->rt_flags &= ~RTF_UP;
-		/*
-		 * One more rtentry floating around that is not
-		 * linked to the routing table. rttrash will be decremented
-		 * when RTFREE(rt) is eventually called.
-		 */
-		V_rttrash++;
+	*perror = 0;
+	rn = (struct radix_node *)rt;
+	return (rn);
+}
+#endif
+
+#ifdef FLOWTABLE
+static struct rtentry *
+rt_flowtable_check_route(struct rib_head *rnh, struct rt_addrinfo *info)
+{
+#if defined(INET6) || defined(INET)
+	struct radix_node *rn;
+#endif
+	struct rtentry *rt0;
+
+	rt0 = NULL;
+	/* "flow-table" only supports IPv6 and IPv4 at the moment. */
+	switch (dst->sa_family) {
+#ifdef INET6
+	case AF_INET6:
+#endif
+#ifdef INET
+	case AF_INET:
+#endif
+#if defined(INET6) || defined(INET)
+		rn = rnh->rnh_matchaddr(dst, &rnh->head);
+		if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
+			struct sockaddr *mask;
+			u_char *m, *n;
+			int len;
+
+			/*
+			 * compare mask to see if the new route is
+			 * more specific than the existing one
+			 */
+			rt0 = RNTORT(rn);
+			RT_LOCK(rt0);
+			RT_ADDREF(rt0);
+			RT_UNLOCK(rt0);
+			/*
+			 * A host route is already present, so
+			 * leave the flow-table entries as is.
+			 */
+			if (rt0->rt_flags & RTF_HOST) {
+				RTFREE(rt0);
+				rt0 = NULL;
+			} else if (!(flags & RTF_HOST) && netmask) {
+				mask = rt_mask(rt0);
+				len = mask->sa_len;
+				m = (u_char *)mask;
+				n = (u_char *)netmask;
+				while (len-- > 0) {
+					if (*n != *m)
+						break;
+					n++;
+					m++;
+				}
+				if (len == 0 || (*n < *m)) {
+					RTFREE(rt0);
+					rt0 = NULL;
+				}
+			}
+		}
+#endif/* INET6 || INET */
 	}
-	
-nondelete:
-	if (req != RTM_DELETE)
-		panic("unrecognized request %d", req);
-	
 
-	/*
-	 * If the caller wants it, then it can have it,
-	 * but it's up to it to free the rtentry as we won't be
-	 * doing it.
-	 */
-	if (ret_nrt) {
-		*ret_nrt = rt;
-		RT_UNLOCK(rt);
-	} else
-		RTFREE_LOCKED(rt);
-done:
-	return (error);
+	return (rt0);
 }
 #endif
 
@@ -1146,19 +1598,19 @@ int
 rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
 				u_int fibnum)
 {
-	int error = 0, needlock = 0;
-	register struct rtentry *rt;
+	int error = 0;
+	struct rtentry *rt, *rt_old;
 #ifdef FLOWTABLE
-	register struct rtentry *rt0;
+	struct rtentry *rt0;
 #endif
-	register struct radix_node *rn;
-	register struct radix_node_head *rnh;
+	struct radix_node *rn;
+	struct rib_head *rnh;
 	struct ifaddr *ifa;
 	struct sockaddr *ndst;
 	struct sockaddr_storage mdst;
-#define senderr(x) { error = x ; goto bad; }
 
 	KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum"));
+	KASSERT((flags & RTF_RNH_LOCKED) == 0, ("rtrequest1_fib: locked"));
 	switch (dst->sa_family) {
 	case AF_INET6:
 	case AF_INET:
@@ -1175,12 +1627,7 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
 	rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
 	if (rnh == NULL)
 		return (EAFNOSUPPORT);
-	needlock = ((flags & RTF_RNH_LOCKED) == 0);
-	flags &= ~RTF_RNH_LOCKED;
-	if (needlock)
-		RADIX_NODE_HEAD_LOCK(rnh);
-	else
-		RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
+
 	/*
 	 * If we are adding a host route then we don't want to put
 	 * a netmask in the tree, nor do we want to clone it.
@@ -1194,52 +1641,14 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
 			rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
 			dst = (struct sockaddr *)&mdst;
 		}
-#ifdef RADIX_MPATH
-		if (rn_mpath_capable(rnh)) {
-			error = rn_mpath_update(req, info, rnh, ret_nrt);
-			/*
-			 * "bad" holds true for the success case
-			 * as well
-			 */
-			if (error != ENOENT)
-				goto bad;
-			error = 0;
-		}
-#endif
-		if ((flags & RTF_PINNED) == 0) {
-			/* Check if target route can be deleted */
-			rt = (struct rtentry *)rnh->rnh_lookup(dst,
-			    netmask, rnh);
-			if ((rt != NULL) && (rt->rt_flags & RTF_PINNED))
-				senderr(EADDRINUSE);
-		}
 
-		/*
-		 * Remove the item from the tree and return it.
-		 * Complain if it is not there and do no more processing.
-		 */
-		rn = rnh->rnh_deladdr(dst, netmask, rnh);
-		if (rn == NULL)
-			senderr(ESRCH);
-		if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
-			panic ("rtrequest delete");
-		rt = RNTORT(rn);
-		RT_LOCK(rt);
-		RT_ADDREF(rt);
-		rt->rt_flags &= ~RTF_UP;
+		RIB_WLOCK(rnh);
+		rt = rt_unlinkrte(rnh, info, &error);
+		RIB_WUNLOCK(rnh);
+		if (error != 0)
+			return (error);
 
-		/*
-		 * give the protocol a chance to keep things in sync.
-		 */
-		if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
-			ifa->ifa_rtrequest(RTM_DELETE, rt, info);
-
-		/*
-		 * One more rtentry floating around that is not
-		 * linked to the routing table. rttrash will be decremented
-		 * when RTFREE(rt) is eventually called.
-		 */
-		V_rttrash++;
+		rt_notifydelete(rt, info);
 
 		/*
 		 * If the caller wants it, then it can have it,
@@ -1260,37 +1669,32 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
 		break;
 	case RTM_ADD:
 		if ((flags & RTF_GATEWAY) && !gateway)
-			senderr(EINVAL);
+			return (EINVAL);
 		if (dst && gateway && (dst->sa_family != gateway->sa_family) && 
 		    (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK))
-			senderr(EINVAL);
+			return (EINVAL);
 
 		if (info->rti_ifa == NULL) {
 			error = rt_getifa_fib(info, fibnum);
 			if (error)
-				senderr(error);
+				return (error);
 		} else
 			ifa_ref(info->rti_ifa);
 		ifa = info->rti_ifa;
-		rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO);
+		rt = uma_zalloc(V_rtzone, M_NOWAIT);
 		if (rt == NULL) {
-			if (ifa != NULL)
-				ifa_free(ifa);
-			senderr(ENOBUFS);
+			ifa_free(ifa);
+			return (ENOBUFS);
 		}
-		RT_LOCK_INIT(rt);
 		rt->rt_flags = RTF_UP | flags;
 		rt->rt_fibnum = fibnum;
 		/*
 		 * Add the gateway. Possibly re-malloc-ing the storage for it.
 		 */
-		RT_LOCK(rt);
 		if ((error = rt_setgate(rt, dst, gateway)) != 0) {
-			RT_LOCK_DESTROY(rt);
-			if (ifa != NULL)
-				ifa_free(ifa);
+			ifa_free(ifa);
 			uma_zfree(V_rtzone, rt);
-			senderr(error);
+			return (error);
 		}
 
 		/*
@@ -1313,111 +1717,81 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
 		 */
 		rt->rt_ifa = ifa;
 		rt->rt_ifp = ifa->ifa_ifp;
-		rt->rt_rmx.rmx_weight = 1;
+		rt->rt_weight = 1;
 
+		rt_setmetrics(info, rt);
+
+		RIB_WLOCK(rnh);
+		RT_LOCK(rt);
 #ifdef RADIX_MPATH
 		/* do not permit exactly the same dst/mask/gw pair */
-		if (rn_mpath_capable(rnh) &&
+		if (rt_mpath_capable(rnh) &&
 			rt_mpath_conflict(rnh, rt, netmask)) {
-			if (rt->rt_ifa) {
-				ifa_free(rt->rt_ifa);
-			}
-			Free(rt_key(rt));
-			RT_LOCK_DESTROY(rt);
+			RIB_WUNLOCK(rnh);
+
+			ifa_free(rt->rt_ifa);
+			R_Free(rt_key(rt));
 			uma_zfree(V_rtzone, rt);
-			senderr(EEXIST);
+			return (EEXIST);
 		}
 #endif
 
 #ifdef FLOWTABLE
-		rt0 = NULL;
-		/* "flow-table" only supports IPv6 and IPv4 at the moment. */
-		switch (dst->sa_family) {
-#ifdef INET6
-		case AF_INET6:
-#endif
-#ifdef INET
-		case AF_INET:
-#endif
-#if defined(INET6) || defined(INET)
-			rn = rnh->rnh_matchaddr(dst, rnh);
-			if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
-				struct sockaddr *mask;
-				u_char *m, *n;
-				int len;
-				
-				/*
-				 * compare mask to see if the new route is
-				 * more specific than the existing one
-				 */
-				rt0 = RNTORT(rn);
-				RT_LOCK(rt0);
-				RT_ADDREF(rt0);
-				RT_UNLOCK(rt0);
-				/*
-				 * A host route is already present, so 
-				 * leave the flow-table entries as is.
-				 */
-				if (rt0->rt_flags & RTF_HOST) {
-					RTFREE(rt0);
-					rt0 = NULL;
-				} else if (!(flags & RTF_HOST) && netmask) {
-					mask = rt_mask(rt0);
-					len = mask->sa_len;
-					m = (u_char *)mask;
-					n = (u_char *)netmask;
-					while (len-- > 0) {
-						if (*n != *m)
-							break;
-						n++;
-						m++;
-					}
-					if (len == 0 || (*n < *m)) {
-						RTFREE(rt0);
-						rt0 = NULL;
-					}
-				}
-			}
-#endif/* INET6 || INET */
-		}
+		rt0 = rt_flowtable_check_route(rnh, info);
 #endif /* FLOWTABLE */
 
 		/* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
-		rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes);
+		rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes);
+
+		rt_old = NULL;
+		if (rn == NULL && (info->rti_flags & RTF_PINNED) != 0) {
+
+			/*
+			 * Force removal and re-try addition
+			 * TODO: better multipath&pinned support
+			 */
+			struct sockaddr *info_dst = info->rti_info[RTAX_DST];
+			info->rti_info[RTAX_DST] = ndst;
+			/* Do not delete existing PINNED(interface) routes */
+			info->rti_flags &= ~RTF_PINNED;
+			rt_old = rt_unlinkrte(rnh, info, &error);
+			info->rti_flags |= RTF_PINNED;
+			info->rti_info[RTAX_DST] = info_dst;
+			if (rt_old != NULL)
+				rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head,
+				    rt->rt_nodes);
+		}
+		RIB_WUNLOCK(rnh);
+
+		if (rt_old != NULL)
+			RT_UNLOCK(rt_old);
+
 		/*
 		 * If it still failed to go into the tree,
 		 * then un-make it (this should be a function)
 		 */
 		if (rn == NULL) {
-			if (rt->rt_ifa)
-				ifa_free(rt->rt_ifa);
-			Free(rt_key(rt));
-			RT_LOCK_DESTROY(rt);
+			ifa_free(rt->rt_ifa);
+			R_Free(rt_key(rt));
 			uma_zfree(V_rtzone, rt);
 #ifdef FLOWTABLE
 			if (rt0 != NULL)
 				RTFREE(rt0);
 #endif
-			senderr(EEXIST);
+			return (EEXIST);
 		} 
 #ifdef FLOWTABLE
 		else if (rt0 != NULL) {
-			switch (dst->sa_family) {
-#ifdef INET6
-			case AF_INET6:
-				flowtable_route_flush(V_ip6_ft, rt0);
-				break;
-#endif
-#ifdef INET
-			case AF_INET:
-				flowtable_route_flush(V_ip_ft, rt0);
-				break;
-#endif
-			}
+			flowtable_route_flush(dst->sa_family, rt0);
 			RTFREE(rt0);
 		}
 #endif
 
+		if (rt_old != NULL) {
+			rt_notifydelete(rt_old, info);
+			RTFREE(rt_old);
+		}
+
 		/*
 		 * If this protocol has something to add to this then
 		 * allow it to do that as well.
@@ -1433,16 +1807,19 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
 			*ret_nrt = rt;
 			RT_ADDREF(rt);
 		}
+		rnh->rnh_gen++;		/* Routing table updated */
 		RT_UNLOCK(rt);
 		break;
+	case RTM_CHANGE:
+		RIB_WLOCK(rnh);
+		error = rtrequest1_fib_change(rnh, info, ret_nrt, fibnum);
+		RIB_WUNLOCK(rnh);
+		break;
 	default:
 		error = EOPNOTSUPP;
 	}
-bad:
-	if (needlock)
-		RADIX_NODE_HEAD_UNLOCK(rnh);
+
 	return (error);
-#undef senderr
 }
 
 #undef dst
@@ -1452,20 +1829,147 @@ bad:
 #undef ifpaddr
 #undef flags
 
+static int
+rtrequest1_fib_change(struct rib_head *rnh, struct rt_addrinfo *info,
+    struct rtentry **ret_nrt, u_int fibnum)
+{
+	struct rtentry *rt = NULL;
+	int error = 0;
+	int free_ifa = 0;
+	int family, mtu;
+	struct if_mtuinfo ifmtu;
+
+	rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
+	    info->rti_info[RTAX_NETMASK], &rnh->head);
+
+	if (rt == NULL)
+		return (ESRCH);
+
+#ifdef RADIX_MPATH
+	/*
+	 * If we got multipath routes,
+	 * we require users to specify a matching RTAX_GATEWAY.
+	 */
+	if (rt_mpath_capable(rnh)) {
+		rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]);
+		if (rt == NULL)
+			return (ESRCH);
+	}
+#endif
+
+	RT_LOCK(rt);
+
+	rt_setmetrics(info, rt);
+
+	/*
+	 * New gateway could require new ifaddr, ifp;
+	 * flags may also be different; ifp may be specified
+	 * by ll sockaddr when protocol address is ambiguous
+	 */
+	if (((rt->rt_flags & RTF_GATEWAY) &&
+	    info->rti_info[RTAX_GATEWAY] != NULL) ||
+	    info->rti_info[RTAX_IFP] != NULL ||
+	    (info->rti_info[RTAX_IFA] != NULL &&
+	     !sa_equal(info->rti_info[RTAX_IFA], rt->rt_ifa->ifa_addr))) {
+
+		error = rt_getifa_fib(info, fibnum);
+		if (info->rti_ifa != NULL)
+			free_ifa = 1;
+
+		if (error != 0)
+			goto bad;
+	}
+
+	/* Check if outgoing interface has changed */
+	if (info->rti_ifa != NULL && info->rti_ifa != rt->rt_ifa &&
+	    rt->rt_ifa != NULL && rt->rt_ifa->ifa_rtrequest != NULL) {
+		rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, info);
+		ifa_free(rt->rt_ifa);
+	}
+	/* Update gateway address */
+	if (info->rti_info[RTAX_GATEWAY] != NULL) {
+		error = rt_setgate(rt, rt_key(rt), info->rti_info[RTAX_GATEWAY]);
+		if (error != 0)
+			goto bad;
+
+		rt->rt_flags &= ~RTF_GATEWAY;
+		rt->rt_flags |= (RTF_GATEWAY & info->rti_flags);
+	}
+
+	if (info->rti_ifa != NULL && info->rti_ifa != rt->rt_ifa) {
+		ifa_ref(info->rti_ifa);
+		rt->rt_ifa = info->rti_ifa;
+		rt->rt_ifp = info->rti_ifp;
+	}
+	/* Allow some flags to be toggled on change. */
+	rt->rt_flags &= ~RTF_FMASK;
+	rt->rt_flags |= info->rti_flags & RTF_FMASK;
+
+	if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest != NULL)
+	       rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, info);
+
+	/* Alter route MTU if necessary */
+	if (rt->rt_ifp != NULL) {
+		family = info->rti_info[RTAX_DST]->sa_family;
+		mtu = if_getmtu_family(rt->rt_ifp, family);
+		/* Set default MTU */
+		if (rt->rt_mtu == 0)
+			rt->rt_mtu = mtu;
+		if (rt->rt_mtu != mtu) {
+			/* Check if we really need to update */
+			ifmtu.ifp = rt->rt_ifp;
+			ifmtu.mtu = mtu;
+			if_updatemtu_cb(rt->rt_nodes, &ifmtu);
+		}
+	}
+
+	if (ret_nrt) {
+		*ret_nrt = rt;
+		RT_ADDREF(rt);
+	}
+bad:
+	RT_UNLOCK(rt);
+	if (free_ifa != 0)
+		ifa_free(info->rti_ifa);
+	return (error);
+}
+
+static void
+rt_setmetrics(const struct rt_addrinfo *info, struct rtentry *rt)
+{
+
+	if (info->rti_mflags & RTV_MTU) {
+		if (info->rti_rmx->rmx_mtu != 0) {
+
+			/*
+			 * MTU was explicitly provided by user.
+			 * Keep it.
+			 */
+			rt->rt_flags |= RTF_FIXEDMTU;
+		} else {
+
+			/*
+			 * User explicitly sets MTU to 0.
+			 * Assume rollback to default.
+			 */
+			rt->rt_flags &= ~RTF_FIXEDMTU;
+		}
+		rt->rt_mtu = info->rti_rmx->rmx_mtu;
+	}
+	if (info->rti_mflags & RTV_WEIGHT)
+		rt->rt_weight = info->rti_rmx->rmx_weight;
+	/* Kernel -> userland timebase conversion. */
+	if (info->rti_mflags & RTV_EXPIRE)
+		rt->rt_expire = info->rti_rmx->rmx_expire ?
+		    info->rti_rmx->rmx_expire - time_second + time_uptime : 0;
+}
+
 int
 rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
 {
 	/* XXX dst may be overwritten, can we move this to below */
 	int dlen = SA_SIZE(dst), glen = SA_SIZE(gate);
-#ifdef INVARIANTS
-	struct radix_node_head *rnh;
 
-	rnh = rt_tables_get_rnh(rt->rt_fibnum, dst->sa_family);
-#endif
-
-	RT_LOCK_ASSERT(rt);
-	RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
-	
 	/*
 	 * Prepare to store the gateway in rt->rt_gateway.
 	 * Both dst and gateway are stored one after the other in the same
@@ -1487,7 +1991,7 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
 		 * Free()/free() handle a NULL argument just fine.
 		 */
 		bcopy(dst, new, dlen);
-		Free(rt_key(rt));	/* free old block, if any */
+		R_Free(rt_key(rt));	/* free old block, if any */
 		rt_key(rt) = (struct sockaddr *)new;
 		rt->rt_gateway = (struct sockaddr *)(new + dlen);
 	}
@@ -1503,9 +2007,9 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
 void
 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask)
 {
-	register u_char *cp1 = (u_char *)src;
-	register u_char *cp2 = (u_char *)dst;
-	register u_char *cp3 = (u_char *)netmask;
+	u_char *cp1 = (u_char *)src;
+	u_char *cp2 = (u_char *)dst;
+	u_char *cp3 = (u_char *)netmask;
 	u_char *cplim = cp2 + *cp3;
 	u_char *cplim2 = cp2 + *cp1;
 
@@ -1537,7 +2041,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
 	int didwork = 0;
 	int a_failure = 0;
 	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
-	struct radix_node_head *rnh;
+	struct rib_head *rnh;
 
 	if (flags & RTF_HOST) {
 		dst = ifa->ifa_dstaddr;
@@ -1558,13 +2062,13 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
 		break;
 	}
 	if (fibnum == RT_ALL_FIBS) {
-		if (rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) {
+		if (V_rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD)
 #ifndef __rtems__
 			startfib = endfib = ifa->ifa_ifp->if_fib;
 #else /* __rtems__ */
 			startfib = endfib = BSD_DEFAULT_FIB;
 #endif /* __rtems__ */
-		} else {
+		else {
 			startfib = 0;
 			endfib = rt_numfibs - 1;
 		}
@@ -1609,10 +2113,10 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
 			if (rnh == NULL)
 				/* this table doesn't exist but others might */
 				continue;
-			RADIX_NODE_HEAD_RLOCK(rnh);
-			rn = rnh->rnh_lookup(dst, netmask, rnh);
+			RIB_RLOCK(rnh);
+			rn = rnh->rnh_lookup(dst, netmask, &rnh->head);
 #ifdef RADIX_MPATH
-			if (rn_mpath_capable(rnh)) {
+			if (rt_mpath_capable(rnh)) {
 
 				if (rn == NULL) 
 					error = ESRCH;
@@ -1635,7 +2139,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
 			error = (rn == NULL ||
 			    (rn->rn_flags & RNF_ROOT) ||
 			    RNTORT(rn)->rt_ifa != ifa);
-			RADIX_NODE_HEAD_RUNLOCK(rnh);
+			RIB_RUNLOCK(rnh);
 			if (error) {
 				/* this is only an error if bad on ALL tables */
 				continue;
@@ -1660,32 +2164,6 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
 		info.rti_info[RTAX_NETMASK] = netmask;
 		error = rtrequest1_fib(cmd, &info, &rt, fibnum);
 
-		if ((error == EEXIST) && (cmd == RTM_ADD)) {
-			/*
-			 * Interface route addition failed.
-			 * Atomically delete current prefix generating
-			 * RTM_DELETE message, and retry adding
-			 * interface prefix.
-			 */
-			rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
-			RADIX_NODE_HEAD_LOCK(rnh);
-
-			/* Delete old prefix */
-			info.rti_ifa = NULL;
-			info.rti_flags = RTF_RNH_LOCKED;
-
-			error = rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum);
-			if (error == 0) {
-				info.rti_ifa = ifa;
-				info.rti_flags = flags | RTF_RNH_LOCKED |
-				    (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED;
-				error = rtrequest1_fib(cmd, &info, &rt, fibnum);
-			}
-
-			RADIX_NODE_HEAD_UNLOCK(rnh);
-		}
-
-
 		if (error == 0 && rt != NULL) {
 			/*
 			 * notify any listening routing agents of the change
@@ -1760,15 +2238,6 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
 	return (error);
 }
 
-#ifndef BURN_BRIDGES
-/* special one for inet internal use. may not use. */
-int
-rtinit_fib(struct ifaddr *ifa, int cmd, int flags)
-{
-	return (rtinit1(ifa, cmd, flags, RT_ALL_FIBS));
-}
-#endif
-
 /*
  * Set up a routing table entry, normally
  * for an interface.
diff --git a/freebsd/sys/net/route.h b/freebsd/sys/net/route.h
index 0baa9a4c..d44dc9d5 100644
--- a/freebsd/sys/net/route.h
+++ b/freebsd/sys/net/route.h
@@ -33,6 +33,9 @@
 #ifndef _NET_ROUTE_H_
 #define _NET_ROUTE_H_
 
+#include <sys/counter.h>
+#include <net/vnet.h>
+
 /*
  * Kernel resident routing tables.
  *
@@ -41,32 +44,39 @@
  */
 
 /*
- * A route consists of a destination address, a reference
- * to a routing entry, and a reference to an llentry.  
- * These are often held by protocols in their control
- * blocks, e.g. inpcb.
+ * Struct route consiste of a destination address,
+ * a route entry pointer, link-layer prepend data pointer along
+ * with its length.
  */
 struct route {
 	struct	rtentry *ro_rt;
 	struct	llentry *ro_lle;
-	struct	in_ifaddr *ro_ia;
-	int		ro_flags;
+	/*
+	 * ro_prepend and ro_plen are only used for bpf to pass in a
+	 * preformed header.  They are not cacheable.
+	 */
+	char		*ro_prepend;
+	uint16_t	ro_plen;
+	uint16_t	ro_flags;
+	uint16_t	ro_mtu;	/* saved ro_rt mtu */
+	uint16_t	spare;
 	struct	sockaddr ro_dst;
 };
 
+#define	RT_L2_ME_BIT		2	/* dst L2 addr is our address */
+#define	RT_MAY_LOOP_BIT		3	/* dst may require loop copy */
+#define	RT_HAS_HEADER_BIT	4	/* mbuf already have its header prepended */
+
 #define	RT_CACHING_CONTEXT	0x1	/* XXX: not used anywhere */
 #define	RT_NORTREF		0x2	/* doesn't hold reference on ro_rt */
+#define	RT_L2_ME		(1 << RT_L2_ME_BIT)		/* 0x0004 */
+#define	RT_MAY_LOOP		(1 << RT_MAY_LOOP_BIT)		/* 0x0008 */
+#define	RT_HAS_HEADER		(1 << RT_HAS_HEADER_BIT)	/* 0x0010 */
 
-/*
- * These numbers are used by reliable protocols for determining
- * retransmission behavior and are included in the routing structure.
- */
-struct rt_metrics_lite {
-	u_long	rmx_mtu;	/* MTU for this path */
-	u_long	rmx_expire;	/* lifetime for route, e.g. redirect */
-	u_long	rmx_pksent;	/* packets sent using this route */
-	u_long	rmx_weight;	/* absolute weight */ 
-};
+#define	RT_REJECT		0x0020		/* Destination is reject */
+#define	RT_BLACKHOLE		0x0040		/* Destination is blackhole */
+#define	RT_HAS_GW		0x0080		/* Destination has GW  */
+#define	RT_LLE_CACHE		0x0100		/* Cache link layer  */
 
 struct rt_metrics {
 	u_long	rmx_locks;	/* Kernel must leave these values alone */
@@ -91,14 +101,24 @@ struct rt_metrics {
 #define	RTM_RTTUNIT	1000000	/* units for rtt, rttvar, as units per sec */
 #define	RTTTOPRHZ(r)	((r) / (RTM_RTTUNIT / PR_SLOWHZ))
 
+/* lle state is exported in rmx_state rt_metrics field */
+#define	rmx_state	rmx_weight
+
+/*
+ * Keep a generation count of routing table, incremented on route addition,
+ * so we can invalidate caches.  This is accessed without a lock, as precision
+ * is not required.
+ */
+typedef volatile u_int rt_gen_t;	/* tree generation (for adds) */
+#define RT_GEN(fibnum, af)	rt_tables_get_gen(fibnum, af)
+
 #define	RT_DEFAULT_FIB	0	/* Explicitly mark fib=0 restricted cases */
 #define	RT_ALL_FIBS	-1	/* Announce event for every fib */
+#ifdef _KERNEL
 extern u_int rt_numfibs;	/* number of usable routing tables */
-extern u_int rt_add_addr_allfibs;	/* Announce interfaces to all fibs */
-/*
- * XXX kernel function pointer `rt_output' is visible to applications.
- */
-struct mbuf;
+VNET_DECLARE(u_int, rt_add_addr_allfibs); /* Announce interfaces to all fibs */
+#define	V_rt_add_addr_allfibs	VNET(rt_add_addr_allfibs)
+#endif
 
 /*
  * We distinguish between routes to hosts and routes to networks,
@@ -114,6 +134,8 @@ struct mbuf;
 #include <net/radix_mpath.h>
 #endif
 #endif
+
+#if defined(_KERNEL) || defined(_WANT_RTENTRY)
 struct rtentry {
 	struct	radix_node rt_nodes[2];	/* tree glue, and other values */
 	/*
@@ -124,33 +146,20 @@ struct rtentry {
 #define	rt_key(r)	(*((struct sockaddr **)(&(r)->rt_nodes->rn_key)))
 #define	rt_mask(r)	(*((struct sockaddr **)(&(r)->rt_nodes->rn_mask)))
 	struct	sockaddr *rt_gateway;	/* value */
-	int	rt_flags;		/* up/down?, host/net */
-	int	rt_refcnt;		/* # held references */
 	struct	ifnet *rt_ifp;		/* the answer: interface to use */
 	struct	ifaddr *rt_ifa;		/* the answer: interface address to use */
-	struct	rt_metrics_lite rt_rmx;	/* metrics used by rx'ing protocols */
-	u_int	rt_fibnum;		/* which FIB */
-#ifdef _KERNEL
-	/* XXX ugly, user apps use this definition but don't have a mtx def */
-	struct	mtx rt_mtx;		/* mutex for routing entry */
-#endif
+	int		rt_flags;	/* up/down?, host/net */
+	int		rt_refcnt;	/* # held references */
+	u_int		rt_fibnum;	/* which FIB */
+	u_long		rt_mtu;		/* MTU for this path */
+	u_long		rt_weight;	/* absolute weight */ 
+	u_long		rt_expire;	/* lifetime for route, e.g. redirect */
+#define	rt_endzero	rt_pksent
+	counter_u64_t	rt_pksent;	/* packets sent using this route */
+	struct mtx	rt_mtx;		/* mutex for routing entry */
+	struct rtentry	*rt_chain;	/* pointer to next rtentry to delete */
 };
-
-/*
- * Following structure necessary for 4.3 compatibility;
- * We should eventually move it to a compat file.
- */
-struct ortentry {
-	u_long	rt_hash;		/* to speed lookups */
-	struct	sockaddr rt_dst;	/* key */
-	struct	sockaddr rt_gateway;	/* value */
-	short	rt_flags;		/* up/down?, host/net */
-	short	rt_refcnt;		/* # held references */
-	u_long	rt_use;			/* raw # packets forwarded */
-	struct	ifnet *rt_ifp;		/* the answer: interface to use */
-};
-
-#define rt_use rt_rmx.rmx_pksent
+#endif /* _KERNEL || _WANT_RTENTRY */
 
 #define	RTF_UP		0x1		/* route usable */
 #define	RTF_GATEWAY	0x2		/* destination is a gateway */
@@ -169,15 +178,10 @@ struct ortentry {
 #define RTF_BLACKHOLE	0x1000		/* just discard pkts (during updates) */
 #define RTF_PROTO2	0x4000		/* protocol specific routing flag */
 #define RTF_PROTO1	0x8000		/* protocol specific routing flag */
-
-/* XXX: temporary to stay API/ABI compatible with userland */
-#ifndef _KERNEL
-#define RTF_PRCLONING	0x10000		/* unused, for compatibility */
-#endif
-
+/*			0x10000		   unused, was RTF_PRCLONING */
 /*			0x20000		   unused, was RTF_WASCLONED */
 #define RTF_PROTO3	0x40000		/* protocol specific routing flag */
-/*			0x80000		   unused */
+#define	RTF_FIXEDMTU	0x80000		/* MTU was explicitly specified */
 #define RTF_PINNED	0x100000	/* route is immutable */
 #define	RTF_LOCAL	0x200000 	/* route represents a local address */
 #define	RTF_BROADCAST	0x400000	/* route represents a bcast address */
@@ -185,7 +189,10 @@ struct ortentry {
 					/* 0x8000000 and up unassigned */
 #define	RTF_STICKY	 0x10000000	/* always route dst->src */
 
-#define	RTF_RNH_LOCKED	 0x40000000	/* radix node head is locked */
+#define	RTF_RNH_LOCKED	 0x40000000	/* unused */
+
+#define	RTF_GWFLAG_COMPAT 0x80000000	/* a compatibility bit for interacting
+					   with existing routing apps */
 
 /* Mask of RTF flags that are allowed to be modified by RTM_CHANGE. */
 #define RTF_FMASK	\
@@ -193,6 +200,40 @@ struct ortentry {
 	 RTF_REJECT | RTF_STATIC | RTF_STICKY)
 
 /*
+ * fib_ nexthop API flags.
+ */
+
+/* Consumer-visible nexthop info flags */
+#define	NHF_REJECT		0x0010	/* RTF_REJECT */
+#define	NHF_BLACKHOLE		0x0020	/* RTF_BLACKHOLE */
+#define	NHF_REDIRECT		0x0040	/* RTF_DYNAMIC|RTF_MODIFIED */
+#define	NHF_DEFAULT		0x0080	/* Default route */
+#define	NHF_BROADCAST		0x0100	/* RTF_BROADCAST */
+#define	NHF_GATEWAY		0x0200	/* RTF_GATEWAY */
+
+/* Nexthop request flags */
+#define	NHR_IFAIF		0x01	/* Return ifa_ifp interface */
+#define	NHR_REF			0x02	/* For future use */
+
+/* Control plane route request flags */
+#define	NHR_COPY		0x100	/* Copy rte data */
+
+#ifdef _KERNEL
+/* rte<>ro_flags translation */
+static inline void
+rt_update_ro_flags(struct route *ro)
+{
+	int rt_flags = ro->ro_rt->rt_flags;
+
+	ro->ro_flags &= ~ (RT_REJECT|RT_BLACKHOLE|RT_HAS_GW);
+
+	ro->ro_flags |= (rt_flags & RTF_REJECT) ? RT_REJECT : 0;
+	ro->ro_flags |= (rt_flags & RTF_BLACKHOLE) ? RT_BLACKHOLE : 0;
+	ro->ro_flags |= (rt_flags & RTF_GATEWAY) ? RT_HAS_GW : 0;
+}
+#endif
+
+/*
  * Routing statistics.
  */
 struct	rtstat {
@@ -233,8 +274,8 @@ struct rt_msghdr {
 #define RTM_REDIRECT	0x6	/* Told to use different route */
 #define RTM_MISS	0x7	/* Lookup failed on this address */
 #define RTM_LOCK	0x8	/* fix specified metrics */
-#define RTM_OLDADD	0x9	/* caused by SIOCADDRT */
-#define RTM_OLDDEL	0xa	/* caused by SIOCDELRT */
+		    /*	0x9  */
+		    /*	0xa  */
 #define RTM_RESOLVE	0xb	/* req to resolve dst to LL addr */
 #define RTM_NEWADDR	0xc	/* address being added to iface */
 #define RTM_DELADDR	0xd	/* address being removed from iface */
@@ -282,12 +323,19 @@ struct rt_msghdr {
 #define RTAX_BRD	7	/* for NEWADDR, broadcast or p-p dest addr */
 #define RTAX_MAX	8	/* size of array to allocate */
 
+typedef int rt_filter_f_t(const struct rtentry *, void *);
+
 struct rt_addrinfo {
-	int	rti_addrs;
-	struct	sockaddr *rti_info[RTAX_MAX];
-	int	rti_flags;
-	struct	ifaddr *rti_ifa;
-	struct	ifnet *rti_ifp;
+	int	rti_addrs;			/* Route RTF_ flags */
+	int	rti_flags;			/* Route RTF_ flags */
+	struct	sockaddr *rti_info[RTAX_MAX];	/* Sockaddr data */
+	struct	ifaddr *rti_ifa;		/* value of rt_ifa addr */
+	struct	ifnet *rti_ifp;			/* route interface */
+	rt_filter_f_t	*rti_filter;		/* filter function */
+	void	*rti_filterdata;		/* filter paramenters */
+	u_long	rti_mflags;			/* metrics RTV_ flags */
+	u_long	rti_spare;			/* Will be used for fib */
+	struct	rt_metrics *rti_rmx;		/* Pointer to route metrics */
 };
 
 /*
@@ -302,17 +350,25 @@ struct rt_addrinfo {
 	sizeof(long)		:				\
 	1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(long) - 1) ) )
 
+#define	sa_equal(a, b) (	\
+    (((const struct sockaddr *)(a))->sa_len == ((const struct sockaddr *)(b))->sa_len) && \
+    (bcmp((a), (b), ((const struct sockaddr *)(b))->sa_len) == 0))
+
 #ifdef _KERNEL
 
 #define RT_LINK_IS_UP(ifp)	(!((ifp)->if_capabilities & IFCAP_LINKSTATE) \
 				 || (ifp)->if_link_state == LINK_STATE_UP)
 
 #define	RT_LOCK_INIT(_rt) \
-	mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK)
+	mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK | MTX_NEW)
 #define	RT_LOCK(_rt)		mtx_lock(&(_rt)->rt_mtx)
 #define	RT_UNLOCK(_rt)		mtx_unlock(&(_rt)->rt_mtx)
 #define	RT_LOCK_DESTROY(_rt)	mtx_destroy(&(_rt)->rt_mtx)
 #define	RT_LOCK_ASSERT(_rt)	mtx_assert(&(_rt)->rt_mtx, MA_OWNED)
+#define	RT_UNLOCK_COND(_rt)	do {				\
+	if (mtx_owned(&(_rt)->rt_mtx))				\
+		mtx_unlock(&(_rt)->rt_mtx);			\
+} while (0)
 
 #define	RT_ADDREF(_rt)	do {					\
 	RT_LOCK_ASSERT(_rt);					\
@@ -349,6 +405,7 @@ struct rt_addrinfo {
 		if ((_ro)->ro_flags & RT_NORTREF) {		\
 			(_ro)->ro_flags &= ~RT_NORTREF;		\
 			(_ro)->ro_rt = NULL;			\
+			(_ro)->ro_lle = NULL;			\
 		} else {					\
 			RT_LOCK((_ro)->ro_rt);			\
 			RTFREE_LOCKED((_ro)->ro_rt);		\
@@ -356,9 +413,24 @@ struct rt_addrinfo {
 	}							\
 } while (0)
 
-struct radix_node_head *rt_tables_get_rnh(int, int);
+/*
+ * Validate a cached route based on a supplied cookie.  If there is an
+ * out-of-date cache, simply free it.  Update the generation number
+ * for the new allocation
+ */
+#define RT_VALIDATE(ro, cookiep, fibnum) do {				\
+	rt_gen_t cookie = RT_GEN(fibnum, (ro)->ro_dst.sa_family);	\
+	if (*(cookiep) != cookie) {					\
+		if ((ro)->ro_rt != NULL) {				\
+			RTFREE((ro)->ro_rt);				\
+			(ro)->ro_rt = NULL;				\
+		}							\
+		*(cookiep) = cookie;					\
+	}								\
+} while (0)
 
 struct ifmultiaddr;
+struct rib_head;
 
 void	 rt_ieee80211msg(struct ifnet *, int, void *, size_t);
 void	 rt_ifannouncemsg(struct ifnet *, int);
@@ -372,6 +444,9 @@ int	 rt_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int);
 void	 rt_newmaddrmsg(int, struct ifmultiaddr *);
 int	 rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *);
 void 	 rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *);
+struct rib_head *rt_table_init(int);
+void	rt_table_destroy(struct rib_head *);
+u_int	rt_tables_get_gen(int table, int fam);
 
 int	rtsock_addrmsg(int, struct ifaddr *, int);
 int	rtsock_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int);
@@ -379,8 +454,6 @@ int	rtsock_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int);
 /*
  * Note the following locking behavior:
  *
- *    rtalloc_ign() and rtalloc() return ro->ro_rt unlocked
- *
  *    rtalloc1() returns a locked rtentry
  *
  *    rtfree() and RTFREE_LOCKED() require a locked rtentry
@@ -388,27 +461,20 @@ int	rtsock_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int);
  *    RTFREE() uses an unlocked entry.
  */
 
-int	 rtexpunge(struct rtentry *);
 void	 rtfree(struct rtentry *);
-int	 rt_check(struct rtentry **, struct rtentry **, struct sockaddr *);
+void	rt_updatemtu(struct ifnet *);
+
+typedef int rt_walktree_f_t(struct rtentry *, void *);
+typedef void rt_setwarg_t(struct rib_head *, uint32_t, int, void *);
+void	rt_foreach_fib_walk(int af, rt_setwarg_t *, rt_walktree_f_t *, void *);
+void	rt_foreach_fib_walk_del(int af, rt_filter_f_t *filter_f, void *arg);
+void	rt_flushifroutes_af(struct ifnet *, int);
+void	rt_flushifroutes(struct ifnet *ifp);
 
 /* XXX MRT COMPAT VERSIONS THAT SET UNIVERSE to 0 */
 /* Thes are used by old code not yet converted to use multiple FIBS */
-int	 rt_getifa(struct rt_addrinfo *);
-void	 rtalloc_ign(struct route *ro, u_long ignflags);
-void	 rtalloc(struct route *ro); /* XXX deprecated, use rtalloc_ign(ro, 0) */
 struct rtentry *rtalloc1(struct sockaddr *, int, u_long);
 int	 rtinit(struct ifaddr *, int, int);
-int	 rtioctl(u_long, caddr_t);
-void	 rtredirect(struct sockaddr *, struct sockaddr *,
-	    struct sockaddr *, int, struct sockaddr *);
-int	 rtrequest(int, struct sockaddr *,
-	    struct sockaddr *, struct sockaddr *, int, struct rtentry **);
-
-#ifndef BURN_BRIDGES
-/* defaults to "all" FIBs */
-int	 rtinit_fib(struct ifaddr *, int, int);
-#endif
 
 /* XXX MRT NEW VERSIONS THAT USE FIBs
  * For now the protocol indepedent versions are the same as the AF_INET ones
@@ -416,7 +482,6 @@ int	 rtinit_fib(struct ifaddr *, int, int);
  */
 int	 rt_getifa_fib(struct rt_addrinfo *, u_int fibnum);
 void	 rtalloc_ign_fib(struct route *ro, u_long ignflags, u_int fibnum);
-void	 rtalloc_fib(struct route *ro, u_int fibnum);
 struct rtentry *rtalloc1_fib(struct sockaddr *, int, u_long, u_int);
 int	 rtioctl_fib(u_long, caddr_t, u_int);
 void	 rtredirect_fib(struct sockaddr *, struct sockaddr *,
@@ -424,13 +489,10 @@ void	 rtredirect_fib(struct sockaddr *, struct sockaddr *,
 int	 rtrequest_fib(int, struct sockaddr *,
 	    struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int);
 int	 rtrequest1_fib(int, struct rt_addrinfo *, struct rtentry **, u_int);
+int	rib_lookup_info(uint32_t, const struct sockaddr *, uint32_t, uint32_t,
+	    struct rt_addrinfo *);
+void	rib_free_info(struct rt_addrinfo *info);
 
-#include <sys/eventhandler.h>
-typedef void (*rtevent_arp_update_fn)(void *, struct rtentry *, uint8_t *, struct sockaddr *);
-typedef void (*rtevent_redirect_fn)(void *, struct rtentry *, struct rtentry *, struct sockaddr *);
-/* route_arp_update_event is no longer generated; see arp_update_event */
-EVENTHANDLER_DECLARE(route_arp_update_event, rtevent_arp_update_fn);
-EVENTHANDLER_DECLARE(route_redirect_event, rtevent_redirect_fn);
 #endif
 
 #endif
diff --git a/freebsd/sys/net/route_var.h b/freebsd/sys/net/route_var.h
new file mode 100644
index 00000000..a8ef56a5
--- /dev/null
+++ b/freebsd/sys/net/route_var.h
@@ -0,0 +1,76 @@
+/*-
+ * Copyright (c) 2015-2016
+ * 	Alexander V. Chernikov <melifaro@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_ROUTE_VAR_H_
+#define _NET_ROUTE_VAR_H_
+
+struct rib_head {
+	struct radix_head	head;
+	rn_matchaddr_f_t	*rnh_matchaddr;	/* longest match for sockaddr */
+	rn_addaddr_f_t		*rnh_addaddr;	/* add based on sockaddr*/
+	rn_deladdr_f_t		*rnh_deladdr;	/* remove based on sockaddr */
+	rn_lookup_f_t		*rnh_lookup;	/* exact match for sockaddr */
+	rn_walktree_t		*rnh_walktree;	/* traverse tree */
+	rn_walktree_from_t	*rnh_walktree_from; /* traverse tree below a */
+	rn_close_t		*rnh_close;	/*do something when the last ref drops*/
+	rt_gen_t		rnh_gen;	/* generation counter */
+	int			rnh_multipath;	/* multipath capable ? */
+	struct radix_node	rnh_nodes[3];	/* empty tree for common case */
+	struct rwlock		rib_lock;	/* config/data path lock */
+	struct radix_mask_head	rmhead;		/* masks radix head */
+};
+
+#define	RIB_RLOCK(rh)		rw_rlock(&(rh)->rib_lock)
+#define	RIB_RUNLOCK(rh)		rw_runlock(&(rh)->rib_lock)
+#define	RIB_WLOCK(rh)		rw_wlock(&(rh)->rib_lock)
+#define	RIB_WUNLOCK(rh)		rw_wunlock(&(rh)->rib_lock)
+#define	RIB_LOCK_ASSERT(rh)	rw_assert(&(rh)->rib_lock, RA_LOCKED)
+#define	RIB_WLOCK_ASSERT(rh)	rw_assert(&(rh)->rib_lock, RA_WLOCKED)
+
+struct rib_head *rt_tables_get_rnh(int fib, int family);
+
+/* rte<>nhop translation */
+static inline uint16_t
+fib_rte_to_nh_flags(int rt_flags)
+{
+	uint16_t res;
+
+	res = (rt_flags & RTF_REJECT) ? NHF_REJECT : 0;
+	res |= (rt_flags & RTF_BLACKHOLE) ? NHF_BLACKHOLE : 0;
+	res |= (rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) ? NHF_REDIRECT : 0;
+	res |= (rt_flags & RTF_BROADCAST) ? NHF_BROADCAST : 0;
+	res |= (rt_flags & RTF_GATEWAY) ? NHF_GATEWAY : 0;
+
+	return (res);
+}
+
+
+#endif
diff --git a/freebsd/sys/net/rss_config.h b/freebsd/sys/net/rss_config.h
new file mode 100644
index 00000000..2ab32a43
--- /dev/null
+++ b/freebsd/sys/net/rss_config.h
@@ -0,0 +1,138 @@
+/*-
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_RSS_CONFIG_H_
+#define	_NET_RSS_CONFIG_H_
+
+#include <netinet/in.h>		/* in_addr_t */
+
+/*
+ * Supported RSS hash functions.
+ */
+#define	RSS_HASH_NAIVE		0x00000001	/* Poor but fast hash. */
+#define	RSS_HASH_TOEPLITZ	0x00000002	/* Required by RSS. */
+#define	RSS_HASH_CRC32		0x00000004	/* Future; some NICs do it. */
+
+#define	RSS_HASH_MASK		(RSS_HASH_NAIVE | RSS_HASH_TOEPLITZ)
+
+/*
+ * Instances of struct inpcbinfo declare an RSS hash type indicating what
+ * header fields are covered.
+ */
+#define	RSS_HASHFIELDS_NONE		0
+#define	RSS_HASHFIELDS_4TUPLE		1
+#define	RSS_HASHFIELDS_2TUPLE		2
+
+/*
+ * Define RSS representations of the M_HASHTYPE_* values, representing
+ * which particular bits are supported.  The NICs can then use this to
+ * calculate which hash types to enable and which not to enable.
+ *
+ * The fact that these line up with M_HASHTYPE_* is not to be relied
+ * upon.
+ */
+#define	RSS_HASHTYPE_RSS_IPV4		(1 << 1)	/* IPv4 2-tuple */
+#define	RSS_HASHTYPE_RSS_TCP_IPV4	(1 << 2)	/* TCPv4 4-tuple */
+#define	RSS_HASHTYPE_RSS_IPV6		(1 << 3)	/* IPv6 2-tuple */
+#define	RSS_HASHTYPE_RSS_TCP_IPV6	(1 << 4)	/* TCPv6 4-tuple */
+#define	RSS_HASHTYPE_RSS_IPV6_EX	(1 << 5)	/* IPv6 2-tuple + ext hdrs */
+#define	RSS_HASHTYPE_RSS_TCP_IPV6_EX	(1 << 6)	/* TCPv6 4-tiple + ext hdrs */
+#define	RSS_HASHTYPE_RSS_UDP_IPV4	(1 << 7)	/* IPv4 UDP 4-tuple */
+#define	RSS_HASHTYPE_RSS_UDP_IPV4_EX	(1 << 8)	/* IPv4 UDP 4-tuple + ext hdrs */
+#define	RSS_HASHTYPE_RSS_UDP_IPV6	(1 << 9)	/* IPv6 UDP 4-tuple */
+#define	RSS_HASHTYPE_RSS_UDP_IPV6_EX	(1 << 10)	/* IPv6 UDP 4-tuple + ext hdrs */
+
+/*
+ * Compile-time limits on the size of the indirection table.
+ */
+#define	RSS_MAXBITS	7
+#define	RSS_TABLE_MAXLEN	(1 << RSS_MAXBITS)
+
+/*
+ * Maximum key size used throughout.  It's OK for hardware to use only the
+ * first 16 bytes, which is all that's required for IPv4.
+ */
+#define	RSS_KEYSIZE	40
+
+/*
+ * For RSS hash methods that do a software hash on an mbuf, the packet
+ * direction (ingress / egress) is required.
+ *
+ * The default direction (INGRESS) is the "receive into the NIC" - ie,
+ * what the hardware is hashing on.
+ */
+#define	RSS_HASH_PKT_INGRESS	0
+#define	RSS_HASH_PKT_EGRESS	1
+
+/*
+ * Rate limited debugging routines.
+ */
+#define	RSS_DEBUG(format, ...)	do {					\
+	if (rss_debug) {						\
+		static struct timeval lastfail;				\
+		static int curfail;					\
+		if (ppsratecheck(&lastfail, &curfail, 5))		\
+			printf("RSS (%s:%u): " format, __func__, __LINE__,\
+			    ##__VA_ARGS__);				\
+	}								\
+} while (0)
+
+extern int	rss_debug;
+
+/*
+ * Device driver interfaces to query RSS properties that must be programmed
+ * into hardware.
+ */
+u_int	rss_getbits(void);
+u_int	rss_getbucket(u_int hash);
+u_int	rss_get_indirection_to_bucket(u_int index);
+u_int	rss_getcpu(u_int bucket);
+void	rss_getkey(uint8_t *key);
+u_int	rss_gethashalgo(void);
+u_int	rss_getnumbuckets(void);
+u_int	rss_getnumcpus(void);
+u_int	rss_gethashconfig(void);
+
+/*
+ * Hash calculation functions.
+ */
+uint32_t	rss_hash(u_int datalen, const uint8_t *data);
+
+/*
+ * Network stack interface to query desired CPU affinity of a packet.
+ */
+struct mbuf * rss_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid);
+u_int	rss_hash2cpuid(uint32_t hash_val, uint32_t hash_type);
+int	rss_hash2bucket(uint32_t hash_val, uint32_t hash_type,
+	    uint32_t *bucket_id);
+int	rss_m2bucket(struct mbuf *m, uint32_t *bucket_id);
+
+#endif /* !_NET_RSS_CONFIG_H_ */
diff --git a/freebsd/sys/net/rtsock.c b/freebsd/sys/net/rtsock.c
index e768e17b..1e69bcdf 100644
--- a/freebsd/sys/net/rtsock.c
+++ b/freebsd/sys/net/rtsock.c
@@ -54,17 +54,21 @@
 #include <sys/systm.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_llatbl.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 #include <net/raw_cb.h>
 #include <net/route.h>
+#include <net/route_var.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
+#include <netinet/ip_carp.h>
 #ifdef INET6
+#include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #endif
 
@@ -72,34 +76,6 @@
 #include <sys/mount.h>
 #include <compat/freebsd32/freebsd32.h>
 
-struct if_data32 {
-	uint8_t	ifi_type;
-	uint8_t	ifi_physical;
-	uint8_t	ifi_addrlen;
-	uint8_t	ifi_hdrlen;
-	uint8_t	ifi_link_state;
-	uint8_t	ifi_spare_char1;
-	uint8_t	ifi_spare_char2;
-	uint8_t	ifi_datalen;
-	uint32_t ifi_mtu;
-	uint32_t ifi_metric;
-	uint32_t ifi_baudrate;
-	uint32_t ifi_ipackets;
-	uint32_t ifi_ierrors;
-	uint32_t ifi_opackets;
-	uint32_t ifi_oerrors;
-	uint32_t ifi_collisions;
-	uint32_t ifi_ibytes;
-	uint32_t ifi_obytes;
-	uint32_t ifi_imcasts;
-	uint32_t ifi_omcasts;
-	uint32_t ifi_iqdrops;
-	uint32_t ifi_noproto;
-	uint32_t ifi_hwassist;
-	int32_t	ifi_epoch;
-	struct	timeval32 ifi_lastchange;
-};
-
 struct if_msghdr32 {
 	uint16_t ifm_msglen;
 	uint8_t	ifm_version;
@@ -107,7 +83,7 @@ struct if_msghdr32 {
 	int32_t	ifm_addrs;
 	int32_t	ifm_flags;
 	uint16_t ifm_index;
-	struct	if_data32 ifm_data;
+	struct	if_data ifm_data;
 };
 
 struct if_msghdrl32 {
@@ -120,7 +96,7 @@ struct if_msghdrl32 {
 	uint16_t _ifm_spare1;
 	uint16_t ifm_len;
 	uint16_t ifm_data_off;
-	struct	if_data32 ifm_data;
+	struct	if_data ifm_data;
 };
 
 struct ifa_msghdrl32 {
@@ -134,7 +110,7 @@ struct ifa_msghdrl32 {
 	uint16_t ifam_len;
 	uint16_t ifam_data_off;
 	int32_t	ifam_metric;
-	struct	if_data32 ifam_data;
+	struct	if_data ifam_data;
 };
 #endif /* COMPAT_FREEBSD32 */
 
@@ -144,18 +120,22 @@ MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
 static struct	sockaddr route_src = { 2, PF_ROUTE, };
 static struct	sockaddr sa_zero   = { sizeof(sa_zero), AF_INET, };
 
+/* These are external hooks for CARP. */
+int	(*carp_get_vhid_p)(struct ifaddr *);
+
 /*
  * Used by rtsock/raw_input callback code to decide whether to filter the update
  * notification to a socket bound to a particular FIB.
  */
 #define	RTS_FILTER_FIB	M_PROTO8
 
-static struct {
+typedef struct {
 	int	ip_count;	/* attached w/ AF_INET */
 	int	ip6_count;	/* attached w/ AF_INET6 */
-	int	ipx_count;	/* attached w/ AF_IPX */
 	int	any_count;	/* total attached */
-} route_cb;
+} route_cb_t;
+static VNET_DEFINE(route_cb_t, route_cb);
+#define	V_route_cb VNET(route_cb)
 
 struct mtx rtsock_mtx;
 MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
@@ -174,20 +154,19 @@ struct walkarg {
 };
 
 static void	rts_input(struct mbuf *m);
-static struct mbuf *rt_msg1(int type, struct rt_addrinfo *rtinfo);
-static int	rt_msg2(int type, struct rt_addrinfo *rtinfo,
-			caddr_t cp, struct walkarg *w);
+static struct mbuf *rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo);
+static int	rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo,
+			struct walkarg *w, int *plen);
 static int	rt_xaddrs(caddr_t cp, caddr_t cplim,
 			struct rt_addrinfo *rtinfo);
 static int	sysctl_dumpentry(struct radix_node *rn, void *vw);
 static int	sysctl_iflist(int af, struct walkarg *w);
 static int	sysctl_ifmalist(int af, struct walkarg *w);
-static int	route_output(struct mbuf *m, struct socket *so);
-static void	rt_setmetrics(u_long which, const struct rt_metrics *in,
-			struct rt_metrics_lite *out);
-static void	rt_getmetrics(const struct rt_metrics_lite *in,
-			struct rt_metrics *out);
+static int	route_output(struct mbuf *m, struct socket *so, ...);
+static void	rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out);
 static void	rt_dispatch(struct mbuf *, sa_family_t);
+static struct sockaddr	*rtsock_fix_netmask(struct sockaddr *dst,
+			struct sockaddr *smask, struct sockaddr_storage *dmask);
 
 static struct netisr_handler rtsock_nh = {
 	.nh_name = "rtsock",
@@ -214,17 +193,35 @@ SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW,
     "maximum routing socket dispatch queue length");
 
 static void
-rts_init(void)
+vnet_rts_init(void)
 {
 	int tmp;
 
+	if (IS_DEFAULT_VNET(curvnet)) {
 #ifndef __rtems__
-	if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
-		rtsock_nh.nh_qlimit = tmp;
+		if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
+			rtsock_nh.nh_qlimit = tmp;
+#endif /* __rtems__ */
+		netisr_register(&rtsock_nh);
+	}
+#ifdef VIMAGE
+	 else
+		netisr_register_vnet(&rtsock_nh);
 #endif
-	netisr_register(&rtsock_nh);
 }
-SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0);
+VNET_SYSINIT(vnet_rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
+    vnet_rts_init, 0);
+
+#ifdef VIMAGE
+static void
+vnet_rts_uninit(void)
+{
+
+	netisr_unregister_vnet(&rtsock_nh);
+}
+VNET_SYSUNINIT(vnet_rts_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
+    vnet_rts_uninit, 0);
+#endif
 
 static int
 raw_input_rts_cb(struct mbuf *m, struct sockproto *proto, struct sockaddr *src,
@@ -294,23 +291,13 @@ static int
 rts_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct rawcb *rp;
-	int s, error;
+	int error;
 
 	KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL"));
 
 	/* XXX */
 	rp = malloc(sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
-	if (rp == NULL)
-		return ENOBUFS;
 
-	/*
-	 * The splnet() is necessary to block protocols from sending
-	 * error notifications (like RTM_REDIRECT or RTM_LOSING) while
-	 * this PCB is extant but incompletely initialized.
-	 * Probably we should try to do more of this work beforehand and
-	 * eliminate the spl.
-	 */
-	s = splnet();
 	so->so_pcb = (caddr_t)rp;
 #ifndef __rtems__
 	so->so_fibnum = td->td_proc->p_fibnum;
@@ -320,7 +307,6 @@ rts_attach(struct socket *so, int proto, struct thread *td)
 	error = raw_attach(so, proto);
 	rp = sotorawcb(so);
 	if (error) {
-		splx(s);
 		so->so_pcb = NULL;
 		free(rp, M_PCB);
 		return error;
@@ -328,20 +314,16 @@ rts_attach(struct socket *so, int proto, struct thread *td)
 	RTSOCK_LOCK();
 	switch(rp->rcb_proto.sp_protocol) {
 	case AF_INET:
-		route_cb.ip_count++;
+		V_route_cb.ip_count++;
 		break;
 	case AF_INET6:
-		route_cb.ip6_count++;
-		break;
-	case AF_IPX:
-		route_cb.ipx_count++;
+		V_route_cb.ip6_count++;
 		break;
 	}
-	route_cb.any_count++;
+	V_route_cb.any_count++;
 	RTSOCK_UNLOCK();
 	soisconnected(so);
 	so->so_options |= SO_USELOOPBACK;
-	splx(s);
 	return 0;
 }
 
@@ -372,16 +354,13 @@ rts_detach(struct socket *so)
 	RTSOCK_LOCK();
 	switch(rp->rcb_proto.sp_protocol) {
 	case AF_INET:
-		route_cb.ip_count--;
+		V_route_cb.ip_count--;
 		break;
 	case AF_INET6:
-		route_cb.ip6_count--;
-		break;
-	case AF_IPX:
-		route_cb.ipx_count--;
+		V_route_cb.ip6_count--;
 		break;
 	}
-	route_cb.any_count--;
+	V_route_cb.any_count--;
 	RTSOCK_UNLOCK();
 	raw_usrreqs.pru_detach(so);
 }
@@ -562,17 +541,25 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
 
 /*ARGSUSED*/
 static int
-route_output(struct mbuf *m, struct socket *so)
+route_output(struct mbuf *m, struct socket *so, ...)
 {
-#define	sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
 	struct rt_msghdr *rtm = NULL;
 	struct rtentry *rt = NULL;
-	struct radix_node_head *rnh;
+	struct rib_head *rnh;
 	struct rt_addrinfo info;
-	int len, error = 0;
+	struct sockaddr_storage ss;
+#ifdef INET6
+	struct sockaddr_in6 *sin6;
+	int i, rti_need_deembed = 0;
+#endif
+	int alloc_len = 0, len, error = 0, fibnum;
 	struct ifnet *ifp = NULL;
 	union sockaddr_union saun;
 	sa_family_t saf = AF_UNSPEC;
+	struct rawcb *rp = NULL;
+	struct walkarg w;
+
+	fibnum = so->so_fibnum;
 
 #define senderr(e) { error = e; goto flush;}
 	if (m == NULL || ((m->m_len < sizeof(long)) &&
@@ -582,31 +569,53 @@ route_output(struct mbuf *m, struct socket *so)
 		panic("route_output");
 	len = m->m_pkthdr.len;
 	if (len < sizeof(*rtm) ||
-	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
-		info.rti_info[RTAX_DST] = NULL;
+	    len != mtod(m, struct rt_msghdr *)->rtm_msglen)
 		senderr(EINVAL);
-	}
-	R_Malloc(rtm, struct rt_msghdr *, len);
-	if (rtm == NULL) {
-		info.rti_info[RTAX_DST] = NULL;
+
+	/*
+	 * Most of current messages are in range 200-240 bytes,
+	 * minimize possible re-allocation on reply using larger size
+	 * buffer aligned on 1k boundaty.
+	 */
+	alloc_len = roundup2(len, 1024);
+	if ((rtm = malloc(alloc_len, M_TEMP, M_NOWAIT)) == NULL)
 		senderr(ENOBUFS);
-	}
+
 	m_copydata(m, 0, len, (caddr_t)rtm);
+	bzero(&info, sizeof(info));
+	bzero(&w, sizeof(w));
+
 	if (rtm->rtm_version != RTM_VERSION) {
-		info.rti_info[RTAX_DST] = NULL;
+		/* Do not touch message since format is unknown */
+		free(rtm, M_TEMP);
+		rtm = NULL;
 		senderr(EPROTONOSUPPORT);
 	}
+
+	/*
+	 * Starting from here, it is possible
+	 * to alter original message and insert
+	 * caller PID and error value.
+	 */
+
 #ifndef __rtems__
 	rtm->rtm_pid = curproc->p_pid;
 #else /* __rtems__ */
 	rtm->rtm_pid = BSD_DEFAULT_PID;
 #endif /* __rtems__ */
-	bzero(&info, sizeof(info));
 	info.rti_addrs = rtm->rtm_addrs;
-	if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
-		info.rti_info[RTAX_DST] = NULL;
+
+	info.rti_mflags = rtm->rtm_inits;
+	info.rti_rmx = &rtm->rtm_rmx;
+
+	/*
+	 * rt_xaddrs() performs s6_addr[2] := sin6_scope_id for AF_INET6
+	 * link-local address because rtrequest requires addresses with
+	 * embedded scope id.
+	 */
+	if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info))
 		senderr(EINVAL);
-	}
+
 	info.rti_flags = rtm->rtm_flags;
 	if (info.rti_info[RTAX_DST] == NULL ||
 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
@@ -634,11 +643,16 @@ route_output(struct mbuf *m, struct socket *so)
 	 */
 	if (info.rti_info[RTAX_GATEWAY] != NULL &&
 	    info.rti_info[RTAX_GATEWAY]->sa_family != AF_LINK) {
-		struct route gw_ro;
+		struct rt_addrinfo ginfo;
+		struct sockaddr *gdst;
+
+		bzero(&ginfo, sizeof(ginfo));
+		bzero(&ss, sizeof(ss));
+		ss.ss_len = sizeof(ss);
+
+		ginfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&ss;
+		gdst = info.rti_info[RTAX_GATEWAY];
 
-		bzero(&gw_ro, sizeof(gw_ro));
-		gw_ro.ro_dst = *info.rti_info[RTAX_GATEWAY];
-		rtalloc_ign_fib(&gw_ro, 0, so->so_fibnum);
 		/* 
 		 * A host route through the loopback interface is 
 		 * installed for each interface adddress. In pre 8.0
@@ -649,18 +663,21 @@ route_output(struct mbuf *m, struct socket *so)
 		 * AF_LINK sa_family type of the rt_gateway, and the
 		 * rt_ifp has the IFF_LOOPBACK flag set.
 		 */
-		if (gw_ro.ro_rt != NULL &&
-		    gw_ro.ro_rt->rt_gateway->sa_family == AF_LINK &&
-		    gw_ro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)
-			info.rti_flags &= ~RTF_GATEWAY;
-		if (gw_ro.ro_rt != NULL)
-			RTFREE(gw_ro.ro_rt);
+		if (rib_lookup_info(fibnum, gdst, NHR_REF, 0, &ginfo) == 0) {
+			if (ss.ss_family == AF_LINK &&
+			    ginfo.rti_ifp->if_flags & IFF_LOOPBACK) {
+				info.rti_flags &= ~RTF_GATEWAY;
+				info.rti_flags |= RTF_GWFLAG_COMPAT;
+			}
+			rib_free_info(&ginfo);
+		}
 	}
 
 	switch (rtm->rtm_type) {
 		struct rtentry *saved_nrt;
 
 	case RTM_ADD:
+	case RTM_CHANGE:
 		if (info.rti_info[RTAX_GATEWAY] == NULL)
 			senderr(EINVAL);
 		saved_nrt = NULL;
@@ -669,14 +686,19 @@ route_output(struct mbuf *m, struct socket *so)
 		if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
 		    (rtm->rtm_flags & RTF_LLDATA) != 0) {
 			error = lla_rt_output(rtm, &info);
+#ifdef INET6
+			if (error == 0)
+				rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
+#endif
 			break;
 		}
-		error = rtrequest1_fib(RTM_ADD, &info, &saved_nrt,
-		    so->so_fibnum);
-		if (error == 0 && saved_nrt) {
+		error = rtrequest1_fib(rtm->rtm_type, &info, &saved_nrt,
+		    fibnum);
+		if (error == 0 && saved_nrt != NULL) {
+#ifdef INET6
+			rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
+#endif
 			RT_LOCK(saved_nrt);
-			rt_setmetrics(rtm->rtm_inits,
-				&rtm->rtm_rmx, &saved_nrt->rt_rmx);
 			rtm->rtm_index = saved_nrt->rt_ifp->if_index;
 			RT_REMREF(saved_nrt);
 			RT_UNLOCK(saved_nrt);
@@ -690,26 +712,30 @@ route_output(struct mbuf *m, struct socket *so)
 		    (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK) &&
 		    (rtm->rtm_flags & RTF_LLDATA) != 0) {
 			error = lla_rt_output(rtm, &info);
+#ifdef INET6
+			if (error == 0)
+				rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
+#endif
 			break;
 		}
-		error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt,
-		    so->so_fibnum);
+		error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt, fibnum);
 		if (error == 0) {
 			RT_LOCK(saved_nrt);
 			rt = saved_nrt;
 			goto report;
 		}
+#ifdef INET6
+		/* rt_msg2() will not be used when RTM_DELETE fails. */
+		rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
+#endif
 		break;
 
 	case RTM_GET:
-	case RTM_CHANGE:
-	case RTM_LOCK:
-		rnh = rt_tables_get_rnh(so->so_fibnum,
-		    info.rti_info[RTAX_DST]->sa_family);
+		rnh = rt_tables_get_rnh(fibnum, saf);
 		if (rnh == NULL)
 			senderr(EAFNOSUPPORT);
 
-		RADIX_NODE_HEAD_RLOCK(rnh);
+		RIB_RLOCK(rnh);
 
 		if (info.rti_info[RTAX_NETMASK] == NULL &&
 		    rtm->rtm_type == RTM_GET) {
@@ -719,14 +745,14 @@ route_output(struct mbuf *m, struct socket *so)
 			 * 'route -n get addr'
 			 */
 			rt = (struct rtentry *) rnh->rnh_matchaddr(
-			    info.rti_info[RTAX_DST], rnh);
+			    info.rti_info[RTAX_DST], &rnh->head);
 		} else
 			rt = (struct rtentry *) rnh->rnh_lookup(
 			    info.rti_info[RTAX_DST],
-			    info.rti_info[RTAX_NETMASK], rnh);
+			    info.rti_info[RTAX_NETMASK], &rnh->head);
 
 		if (rt == NULL) {
-			RADIX_NODE_HEAD_RUNLOCK(rnh);
+			RIB_RUNLOCK(rnh);
 			senderr(ESRCH);
 		}
 #ifdef RADIX_MPATH
@@ -738,11 +764,11 @@ route_output(struct mbuf *m, struct socket *so)
 		 * if gate == NULL the first match is returned.
 		 * (no need to call rt_mpath_matchgate if gate == NULL)
 		 */
-		if (rn_mpath_capable(rnh) &&
+		if (rt_mpath_capable(rnh) &&
 		    (rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) {
 			rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]);
 			if (!rt) {
-				RADIX_NODE_HEAD_RUNLOCK(rnh);
+				RIB_RUNLOCK(rnh);
 				senderr(ESRCH);
 			}
 		}
@@ -760,7 +786,8 @@ route_output(struct mbuf *m, struct socket *so)
 			    rt->rt_ifp->if_type == IFT_PROPVIRTUAL) {
 				struct ifaddr *ifa;
 
-				ifa = ifa_ifwithnet(info.rti_info[RTAX_DST], 1);
+				ifa = ifa_ifwithnet(info.rti_info[RTAX_DST], 1,
+						RT_ALL_FIBS);
 				if (ifa != NULL)
 					rt_maskedcopy(ifa->ifa_addr,
 						      &laddr,
@@ -772,139 +799,81 @@ route_output(struct mbuf *m, struct socket *so)
 			/* 
 			 * refactor rt and no lock operation necessary
 			 */
-			rt = (struct rtentry *)rnh->rnh_matchaddr(&laddr, rnh);
+			rt = (struct rtentry *)rnh->rnh_matchaddr(&laddr,
+			    &rnh->head);
 			if (rt == NULL) {
-				RADIX_NODE_HEAD_RUNLOCK(rnh);
+				RIB_RUNLOCK(rnh);
 				senderr(ESRCH);
 			}
 		} 
 		RT_LOCK(rt);
 		RT_ADDREF(rt);
-		RADIX_NODE_HEAD_RUNLOCK(rnh);
-
-		switch(rtm->rtm_type) {
-
-		case RTM_GET:
-		report:
-			RT_LOCK_ASSERT(rt);
-			if ((rt->rt_flags & RTF_HOST) == 0
-			    ? jailed_without_vnet(curthread->td_ucred)
-			    : prison_if(curthread->td_ucred,
-			    rt_key(rt)) != 0) {
-				RT_UNLOCK(rt);
-				senderr(ESRCH);
-			}
-			info.rti_info[RTAX_DST] = rt_key(rt);
-			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
-			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
-			info.rti_info[RTAX_GENMASK] = 0;
-			if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
-				ifp = rt->rt_ifp;
-				if (ifp) {
-					info.rti_info[RTAX_IFP] =
-					    ifp->if_addr->ifa_addr;
-					error = rtm_get_jailed(&info, ifp, rt,
-					    &saun, curthread->td_ucred);
-					if (error != 0) {
-						RT_UNLOCK(rt);
-						senderr(error);
-					}
-					if (ifp->if_flags & IFF_POINTOPOINT)
-						info.rti_info[RTAX_BRD] =
-						    rt->rt_ifa->ifa_dstaddr;
-					rtm->rtm_index = ifp->if_index;
-				} else {
-					info.rti_info[RTAX_IFP] = NULL;
-					info.rti_info[RTAX_IFA] = NULL;
-				}
-			} else if ((ifp = rt->rt_ifp) != NULL) {
-				rtm->rtm_index = ifp->if_index;
-			}
-			len = rt_msg2(rtm->rtm_type, &info, NULL, NULL);
-			if (len > rtm->rtm_msglen) {
-				struct rt_msghdr *new_rtm;
-				R_Malloc(new_rtm, struct rt_msghdr *, len);
-				if (new_rtm == NULL) {
-					RT_UNLOCK(rt);
-					senderr(ENOBUFS);
-				}
-				bcopy(rtm, new_rtm, rtm->rtm_msglen);
-				Free(rtm); rtm = new_rtm;
-			}
-			(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL);
-			rtm->rtm_flags = rt->rt_flags;
-			rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
-			rtm->rtm_addrs = info.rti_addrs;
-			break;
-
-		case RTM_CHANGE:
-			/*
-			 * New gateway could require new ifaddr, ifp;
-			 * flags may also be different; ifp may be specified
-			 * by ll sockaddr when protocol address is ambiguous
-			 */
-			if (((rt->rt_flags & RTF_GATEWAY) &&
-			     info.rti_info[RTAX_GATEWAY] != NULL) ||
-			    info.rti_info[RTAX_IFP] != NULL ||
-			    (info.rti_info[RTAX_IFA] != NULL &&
-			     !sa_equal(info.rti_info[RTAX_IFA],
-				       rt->rt_ifa->ifa_addr))) {
-				RT_UNLOCK(rt);
-				RADIX_NODE_HEAD_LOCK(rnh);
-				error = rt_getifa_fib(&info, rt->rt_fibnum);
-				/*
-				 * XXXRW: Really we should release this
-				 * reference later, but this maintains
-				 * historical behavior.
-				 */
-				if (info.rti_ifa != NULL)
-					ifa_free(info.rti_ifa);
-				RADIX_NODE_HEAD_UNLOCK(rnh);
-				if (error != 0)
-					senderr(error);
-				RT_LOCK(rt);
-			}
-			if (info.rti_ifa != NULL &&
-			    info.rti_ifa != rt->rt_ifa &&
-			    rt->rt_ifa != NULL &&
-			    rt->rt_ifa->ifa_rtrequest != NULL) {
-				rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt,
-				    &info);
-				ifa_free(rt->rt_ifa);
-			}
-			if (info.rti_info[RTAX_GATEWAY] != NULL) {
-				RT_UNLOCK(rt);
-				RADIX_NODE_HEAD_LOCK(rnh);
-				RT_LOCK(rt);
-				
-				error = rt_setgate(rt, rt_key(rt),
-				    info.rti_info[RTAX_GATEWAY]);
-				RADIX_NODE_HEAD_UNLOCK(rnh);
+		RIB_RUNLOCK(rnh);
+
+report:
+		RT_LOCK_ASSERT(rt);
+		if ((rt->rt_flags & RTF_HOST) == 0
+		    ? jailed_without_vnet(curthread->td_ucred)
+		    : prison_if(curthread->td_ucred,
+		    rt_key(rt)) != 0) {
+			RT_UNLOCK(rt);
+			senderr(ESRCH);
+		}
+		info.rti_info[RTAX_DST] = rt_key(rt);
+		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+		info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt),
+		    rt_mask(rt), &ss);
+		info.rti_info[RTAX_GENMASK] = 0;
+		if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
+			ifp = rt->rt_ifp;
+			if (ifp) {
+				info.rti_info[RTAX_IFP] =
+				    ifp->if_addr->ifa_addr;
+				error = rtm_get_jailed(&info, ifp, rt,
+				    &saun, curthread->td_ucred);
 				if (error != 0) {
 					RT_UNLOCK(rt);
 					senderr(error);
 				}
-				rt->rt_flags |= (RTF_GATEWAY & info.rti_flags);
+				if (ifp->if_flags & IFF_POINTOPOINT)
+					info.rti_info[RTAX_BRD] =
+					    rt->rt_ifa->ifa_dstaddr;
+				rtm->rtm_index = ifp->if_index;
+			} else {
+				info.rti_info[RTAX_IFP] = NULL;
+				info.rti_info[RTAX_IFA] = NULL;
 			}
-			if (info.rti_ifa != NULL &&
-			    info.rti_ifa != rt->rt_ifa) {
-				ifa_ref(info.rti_ifa);
-				rt->rt_ifa = info.rti_ifa;
-				rt->rt_ifp = info.rti_ifp;
+		} else if ((ifp = rt->rt_ifp) != NULL) {
+			rtm->rtm_index = ifp->if_index;
+		}
+
+		/* Check if we need to realloc storage */
+		rtsock_msg_buffer(rtm->rtm_type, &info, NULL, &len);
+		if (len > alloc_len) {
+			struct rt_msghdr *new_rtm;
+			new_rtm = malloc(len, M_TEMP, M_NOWAIT);
+			if (new_rtm == NULL) {
+				RT_UNLOCK(rt);
+				senderr(ENOBUFS);
 			}
-			/* Allow some flags to be toggled on change. */
-			rt->rt_flags = (rt->rt_flags & ~RTF_FMASK) |
-				    (rtm->rtm_flags & RTF_FMASK);
-			rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
-					&rt->rt_rmx);
-			rtm->rtm_index = rt->rt_ifp->if_index;
-			if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
-			       rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
-			/* FALLTHROUGH */
-		case RTM_LOCK:
-			/* We don't support locks anymore */
-			break;
+			bcopy(rtm, new_rtm, rtm->rtm_msglen);
+			free(rtm, M_TEMP);
+			rtm = new_rtm;
+			alloc_len = len;
 		}
+
+		w.w_tmem = (caddr_t)rtm;
+		w.w_tmemsize = alloc_len;
+		rtsock_msg_buffer(rtm->rtm_type, &info, &w, &len);
+
+		if (rt->rt_flags & RTF_GWFLAG_COMPAT)
+			rtm->rtm_flags = RTF_GATEWAY | 
+				(rt->rt_flags & ~RTF_GWFLAG_COMPAT);
+		else
+			rtm->rtm_flags = rt->rt_flags;
+		rt_getmetrics(rt, &rtm->rtm_rmx);
+		rtm->rtm_addrs = info.rti_addrs;
+
 		RT_UNLOCK(rt);
 		break;
 
@@ -913,39 +882,55 @@ route_output(struct mbuf *m, struct socket *so)
 	}
 
 flush:
-	if (rtm) {
-		if (error)
-			rtm->rtm_errno = error;
-		else
-			rtm->rtm_flags |= RTF_DONE;
-	}
-	if (rt)		/* XXX can this be true? */
+	if (rt != NULL)
 		RTFREE(rt);
-    {
-	struct rawcb *rp = NULL;
 	/*
 	 * Check to see if we don't want our own messages.
 	 */
 	if ((so->so_options & SO_USELOOPBACK) == 0) {
-		if (route_cb.any_count <= 1) {
-			if (rtm)
-				Free(rtm);
+		if (V_route_cb.any_count <= 1) {
+			if (rtm != NULL)
+				free(rtm, M_TEMP);
 			m_freem(m);
 			return (error);
 		}
 		/* There is another listener, so construct message */
 		rp = sotorawcb(so);
 	}
-	if (rtm) {
+
+	if (rtm != NULL) {
+#ifdef INET6
+		if (rti_need_deembed) {
+			/* sin6_scope_id is recovered before sending rtm. */
+			sin6 = (struct sockaddr_in6 *)&ss;
+			for (i = 0; i < RTAX_MAX; i++) {
+				if (info.rti_info[i] == NULL)
+					continue;
+				if (info.rti_info[i]->sa_family != AF_INET6)
+					continue;
+				bcopy(info.rti_info[i], sin6, sizeof(*sin6));
+				if (sa6_recoverscope(sin6) == 0)
+					bcopy(sin6, info.rti_info[i],
+						    sizeof(*sin6));
+			}
+		}
+#endif
+		if (error != 0)
+			rtm->rtm_errno = error;
+		else
+			rtm->rtm_flags |= RTF_DONE;
+
 		m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
 		if (m->m_pkthdr.len < rtm->rtm_msglen) {
 			m_freem(m);
 			m = NULL;
 		} else if (m->m_pkthdr.len > rtm->rtm_msglen)
 			m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
+
+		free(rtm, M_TEMP);
 	}
-	if (m) {
-		M_SETFIB(m, so->so_fibnum);
+	if (m != NULL) {
+		M_SETFIB(m, fibnum);
 		m->m_flags |= RTS_FILTER_FIB;
 		if (rp) {
 			/*
@@ -959,43 +944,21 @@ flush:
 		} else
 			rt_dispatch(m, saf);
 	}
-	/* info.rti_info[RTAX_DST] (used above) can point inside of rtm */
-	if (rtm)
-		Free(rtm);
-    }
+
 	return (error);
-#undef	sa_equal
 }
 
 static void
-rt_setmetrics(u_long which, const struct rt_metrics *in,
-	struct rt_metrics_lite *out)
+rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out)
 {
-#define metric(f, e) if (which & (f)) out->e = in->e;
-	/*
-	 * Only these are stored in the routing entry since introduction
-	 * of tcp hostcache. The rest is ignored.
-	 */
-	metric(RTV_MTU, rmx_mtu);
-	metric(RTV_WEIGHT, rmx_weight);
-	/* Userland -> kernel timebase conversion. */
-	if (which & RTV_EXPIRE)
-		out->rmx_expire = in->rmx_expire ?
-		    in->rmx_expire - time_second + time_uptime : 0;
-#undef metric
-}
 
-static void
-rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out)
-{
-#define metric(e) out->e = in->e;
 	bzero(out, sizeof(*out));
-	metric(rmx_mtu);
-	metric(rmx_weight);
+	out->rmx_mtu = rt->rt_mtu;
+	out->rmx_weight = rt->rt_weight;
+	out->rmx_pksent = counter_u64_fetch(rt->rt_pksent);
 	/* Kernel -> userland timebase conversion. */
-	out->rmx_expire = in->rmx_expire ?
-	    in->rmx_expire - time_uptime + time_second : 0;
-#undef metric
+	out->rmx_expire = rt->rt_expire ?
+	    rt->rt_expire - time_uptime + time_second : 0;
 }
 
 /*
@@ -1030,6 +993,11 @@ rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
 			return (0); /* should be EINVAL but for compat */
 		}
 		/* accept it */
+#ifdef INET6
+		if (sa->sa_family == AF_INET6)
+			sa6_embedscope((struct sockaddr_in6 *)sa,
+			    V_ip6_use_defzone);
+#endif
 		rtinfo->rti_info[i] = sa;
 		cp += SA_SIZE(sa);
 	}
@@ -1037,15 +1005,42 @@ rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
 }
 
 /*
- * Used by the routing socket.
+ * Fill in @dmask with valid netmask leaving original @smask
+ * intact. Mostly used with radix netmasks.
+ */
+static struct sockaddr *
+rtsock_fix_netmask(struct sockaddr *dst, struct sockaddr *smask,
+    struct sockaddr_storage *dmask)
+{
+	if (dst == NULL || smask == NULL)
+		return (NULL);
+
+	memset(dmask, 0, dst->sa_len);
+	memcpy(dmask, smask, smask->sa_len);
+	dmask->ss_len = dst->sa_len;
+	dmask->ss_family = dst->sa_family;
+
+	return ((struct sockaddr *)dmask);
+}
+
+/*
+ * Writes information related to @rtinfo object to newly-allocated mbuf.
+ * Assumes MCLBYTES is enough to construct any message.
+ * Used for OS notifications of vaious events (if/ifa announces,etc)
+ *
+ * Returns allocated mbuf or NULL on failure.
  */
 static struct mbuf *
-rt_msg1(int type, struct rt_addrinfo *rtinfo)
+rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo)
 {
 	struct rt_msghdr *rtm;
 	struct mbuf *m;
 	int i;
 	struct sockaddr *sa;
+#ifdef INET6
+	struct sockaddr_storage ss;
+	struct sockaddr_in6 *sin6;
+#endif
 	int len, dlen;
 
 	switch (type) {
@@ -1072,20 +1067,17 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo)
 	default:
 		len = sizeof(struct rt_msghdr);
 	}
-	if (len > MCLBYTES)
-		panic("rt_msg1");
-	m = m_gethdr(M_DONTWAIT, MT_DATA);
-	if (m && len > MHLEN) {
-		MCLGET(m, M_DONTWAIT);
-		if ((m->m_flags & M_EXT) == 0) {
-			m_free(m);
-			m = NULL;
-		}
-	}
+
+	/* XXXGL: can we use MJUMPAGESIZE cluster here? */
+	KASSERT(len <= MCLBYTES, ("%s: message too big", __func__));
+	if (len > MHLEN)
+		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+	else
+		m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (m);
+
 	m->m_pkthdr.len = m->m_len = len;
-	m->m_pkthdr.rcvif = NULL;
 	rtm = mtod(m, struct rt_msghdr *);
 	bzero((caddr_t)rtm, len);
 	for (i = 0; i < RTAX_MAX; i++) {
@@ -1093,6 +1085,14 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo)
 			continue;
 		rtinfo->rti_addrs |= (1 << i);
 		dlen = SA_SIZE(sa);
+#ifdef INET6
+		if (V_deembed_scopeid && sa->sa_family == AF_INET6) {
+			sin6 = (struct sockaddr_in6 *)&ss;
+			bcopy(sa, sin6, sizeof(*sin6));
+			if (sa6_recoverscope(sin6) == 0)
+				sa = (struct sockaddr *)sin6;
+		}
+#endif
 		m_copyback(m, len, dlen, (caddr_t)sa);
 		len += dlen;
 	}
@@ -1107,17 +1107,26 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo)
 }
 
 /*
- * Used by the sysctl code and routing socket.
+ * Writes information related to @rtinfo object to preallocated buffer.
+ * Stores needed size in @plen. If @w is NULL, calculates size without
+ * writing.
+ * Used for sysctl dumps and rtsock answers (RTM_DEL/RTM_GET) generation.
+ *
+ * Returns 0 on success.
+ *
  */
 static int
-rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w)
+rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo, struct walkarg *w, int *plen)
 {
 	int i;
-	int len, dlen, second_time = 0;
-	caddr_t cp0;
+	int len, buflen = 0, dlen;
+	caddr_t cp = NULL;
+	struct rt_msghdr *rtm = NULL;
+#ifdef INET6
+	struct sockaddr_storage ss;
+	struct sockaddr_in6 *sin6;
+#endif
 
-	rtinfo->rti_addrs = 0;
-again:
 	switch (type) {
 
 	case RTM_DELADDR:
@@ -1156,9 +1165,14 @@ again:
 	default:
 		len = sizeof(struct rt_msghdr);
 	}
-	cp0 = cp;
-	if (cp0)
-		cp += len;
+
+	if (w != NULL) {
+		rtm = (struct rt_msghdr *)w->w_tmem;
+		buflen = w->w_tmemsize - len;
+		cp = (caddr_t)w->w_tmem + len;
+	}
+
+	rtinfo->rti_addrs = 0;
 	for (i = 0; i < RTAX_MAX; i++) {
 		struct sockaddr *sa;
 
@@ -1166,45 +1180,56 @@ again:
 			continue;
 		rtinfo->rti_addrs |= (1 << i);
 		dlen = SA_SIZE(sa);
-		if (cp) {
+		if (cp != NULL && buflen >= dlen) {
+#ifdef INET6
+			if (V_deembed_scopeid && sa->sa_family == AF_INET6) {
+				sin6 = (struct sockaddr_in6 *)&ss;
+				bcopy(sa, sin6, sizeof(*sin6));
+				if (sa6_recoverscope(sin6) == 0)
+					sa = (struct sockaddr *)sin6;
+			}
+#endif
 			bcopy((caddr_t)sa, cp, (unsigned)dlen);
 			cp += dlen;
+			buflen -= dlen;
+		} else if (cp != NULL) {
+			/*
+			 * Buffer too small. Count needed size
+			 * and return with error.
+			 */
+			cp = NULL;
 		}
+
 		len += dlen;
 	}
-	len = ALIGN(len);
-	if (cp == NULL && w != NULL && !second_time) {
-		struct walkarg *rw = w;
-
-		if (rw->w_req) {
-			if (rw->w_tmemsize < len) {
-				if (rw->w_tmem)
-					free(rw->w_tmem, M_RTABLE);
-				rw->w_tmem = (caddr_t)
-					malloc(len, M_RTABLE, M_NOWAIT);
-				if (rw->w_tmem)
-					rw->w_tmemsize = len;
-			}
-			if (rw->w_tmem) {
-				cp = rw->w_tmem;
-				second_time = 1;
-				goto again;
-			}
-		}
+
+	if (cp != NULL) {
+		dlen = ALIGN(len) - len;
+		if (buflen < dlen)
+			cp = NULL;
+		else
+			buflen -= dlen;
 	}
-	if (cp) {
-		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
+	len = ALIGN(len);
 
+	if (cp != NULL) {
+		/* fill header iff buffer is large enough */
 		rtm->rtm_version = RTM_VERSION;
 		rtm->rtm_type = type;
 		rtm->rtm_msglen = len;
 	}
-	return (len);
+
+	*plen = len;
+
+	if (w != NULL && cp == NULL)
+		return (ENOBUFS);
+
+	return (0);
 }
 
 /*
  * This routine is called to generate a message from the routing
- * socket indicating that a redirect has occured, a routing lookup
+ * socket indicating that a redirect has occurred, a routing lookup
  * has failed, or that a protocol has detected timeouts to a particular
  * destination.
  */
@@ -1216,9 +1241,9 @@ rt_missmsg_fib(int type, struct rt_addrinfo *rtinfo, int flags, int error,
 	struct mbuf *m;
 	struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
 
-	if (route_cb.any_count == 0)
+	if (V_route_cb.any_count == 0)
 		return;
-	m = rt_msg1(type, rtinfo);
+	m = rtsock_msg_mbuf(type, rtinfo);
 	if (m == NULL)
 		return;
 
@@ -1254,16 +1279,16 @@ rt_ifmsg(struct ifnet *ifp)
 	struct mbuf *m;
 	struct rt_addrinfo info;
 
-	if (route_cb.any_count == 0)
+	if (V_route_cb.any_count == 0)
 		return;
 	bzero((caddr_t)&info, sizeof(info));
-	m = rt_msg1(RTM_IFINFO, &info);
+	m = rtsock_msg_mbuf(RTM_IFINFO, &info);
 	if (m == NULL)
 		return;
 	ifm = mtod(m, struct if_msghdr *);
 	ifm->ifm_index = ifp->if_index;
 	ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
-	ifm->ifm_data = ifp->if_data;
+	if_data_copy(ifp, &ifm->ifm_data);
 	ifm->ifm_addrs = 0;
 	rt_dispatch(m, AF_UNSPEC);
 }
@@ -1283,8 +1308,9 @@ rtsock_addrmsg(int cmd, struct ifaddr *ifa, int fibnum)
 	struct mbuf *m;
 	struct ifa_msghdr *ifam;
 	struct ifnet *ifp = ifa->ifa_ifp;
+	struct sockaddr_storage ss;
 
-	if (route_cb.any_count == 0)
+	if (V_route_cb.any_count == 0)
 		return (0);
 
 	ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
@@ -1292,13 +1318,14 @@ rtsock_addrmsg(int cmd, struct ifaddr *ifa, int fibnum)
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
 	info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
-	info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
+	info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(
+	    info.rti_info[RTAX_IFP], ifa->ifa_netmask, &ss);
 	info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
-	if ((m = rt_msg1(ncmd, &info)) == NULL)
+	if ((m = rtsock_msg_mbuf(ncmd, &info)) == NULL)
 		return (ENOBUFS);
 	ifam = mtod(m, struct ifa_msghdr *);
 	ifam->ifam_index = ifp->if_index;
-	ifam->ifam_metric = ifa->ifa_metric;
+	ifam->ifam_metric = ifa->ifa_ifp->if_metric;
 	ifam->ifam_flags = ifa->ifa_flags;
 	ifam->ifam_addrs = info.rti_addrs;
 
@@ -1331,15 +1358,16 @@ rtsock_routemsg(int cmd, struct ifnet *ifp, int error, struct rtentry *rt,
 	struct sockaddr *sa;
 	struct mbuf *m;
 	struct rt_msghdr *rtm;
+	struct sockaddr_storage ss;
 
-	if (route_cb.any_count == 0)
+	if (V_route_cb.any_count == 0)
 		return (0);
 
 	bzero((caddr_t)&info, sizeof(info));
-	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 	info.rti_info[RTAX_DST] = sa = rt_key(rt);
+	info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(sa, rt_mask(rt), &ss);
 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
-	if ((m = rt_msg1(cmd, &info)) == NULL)
+	if ((m = rtsock_msg_mbuf(cmd, &info)) == NULL)
 		return (ENOBUFS);
 	rtm = mtod(m, struct rt_msghdr *);
 	rtm->rtm_index = ifp->if_index;
@@ -1370,7 +1398,7 @@ rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
 	struct ifnet *ifp = ifma->ifma_ifp;
 	struct ifma_msghdr *ifmam;
 
-	if (route_cb.any_count == 0)
+	if (V_route_cb.any_count == 0)
 		return;
 
 	bzero((caddr_t)&info, sizeof(info));
@@ -1381,7 +1409,7 @@ rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
 	 * (similarly to how ARP entries, e.g., are presented).
 	 */
 	info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
-	m = rt_msg1(cmd, &info);
+	m = rtsock_msg_mbuf(cmd, &info);
 	if (m == NULL)
 		return;
 	ifmam = mtod(m, struct ifma_msghdr *);
@@ -1399,10 +1427,10 @@ rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
 	struct if_announcemsghdr *ifan;
 	struct mbuf *m;
 
-	if (route_cb.any_count == 0)
+	if (V_route_cb.any_count == 0)
 		return NULL;
 	bzero((caddr_t)info, sizeof(*info));
-	m = rt_msg1(type, info);
+	m = rtsock_msg_mbuf(type, info);
 	if (m != NULL) {
 		ifan = mtod(m, struct if_announcemsghdr *);
 		ifan->ifan_index = ifp->if_index;
@@ -1509,6 +1537,7 @@ sysctl_dumpentry(struct radix_node *rn, void *vw)
 	struct rtentry *rt = (struct rtentry *)rn;
 	int error = 0, size;
 	struct rt_addrinfo info;
+	struct sockaddr_storage ss;
 
 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
 		return 0;
@@ -1519,7 +1548,8 @@ sysctl_dumpentry(struct radix_node *rn, void *vw)
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_info[RTAX_DST] = rt_key(rt);
 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
-	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+	info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt),
+	    rt_mask(rt), &ss);
 	info.rti_info[RTAX_GENMASK] = 0;
 	if (rt->rt_ifp) {
 		info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr;
@@ -1527,16 +1557,17 @@ sysctl_dumpentry(struct radix_node *rn, void *vw)
 		if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
 	}
-	size = rt_msg2(RTM_GET, &info, NULL, w);
+	if ((error = rtsock_msg_buffer(RTM_GET, &info, w, &size)) != 0)
+		return (error);
 	if (w->w_req && w->w_tmem) {
 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
 
-		rtm->rtm_flags = rt->rt_flags;
-		/*
-		 * let's be honest about this being a retarded hack
-		 */
-		rtm->rtm_fmask = rt->rt_rmx.rmx_pksent;
-		rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
+		if (rt->rt_flags & RTF_GWFLAG_COMPAT)
+			rtm->rtm_flags = RTF_GATEWAY | 
+				(rt->rt_flags & ~RTF_GWFLAG_COMPAT);
+		else
+			rtm->rtm_flags = rt->rt_flags;
+		rt_getmetrics(rt, &rtm->rtm_rmx);
 		rtm->rtm_index = rt->rt_ifp->if_index;
 		rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
 		rtm->rtm_addrs = info.rti_addrs;
@@ -1546,70 +1577,40 @@ sysctl_dumpentry(struct radix_node *rn, void *vw)
 	return (error);
 }
 
-#ifdef COMPAT_FREEBSD32
-static void
-copy_ifdata32(struct if_data *src, struct if_data32 *dst)
-{
-
-	bzero(dst, sizeof(*dst));
-	CP(*src, *dst, ifi_type);
-	CP(*src, *dst, ifi_physical);
-	CP(*src, *dst, ifi_addrlen);
-	CP(*src, *dst, ifi_hdrlen);
-	CP(*src, *dst, ifi_link_state);
-	dst->ifi_datalen = sizeof(struct if_data32);
-	CP(*src, *dst, ifi_mtu);
-	CP(*src, *dst, ifi_metric);
-	CP(*src, *dst, ifi_baudrate);
-	CP(*src, *dst, ifi_ipackets);
-	CP(*src, *dst, ifi_ierrors);
-	CP(*src, *dst, ifi_opackets);
-	CP(*src, *dst, ifi_oerrors);
-	CP(*src, *dst, ifi_collisions);
-	CP(*src, *dst, ifi_ibytes);
-	CP(*src, *dst, ifi_obytes);
-	CP(*src, *dst, ifi_imcasts);
-	CP(*src, *dst, ifi_omcasts);
-	CP(*src, *dst, ifi_iqdrops);
-	CP(*src, *dst, ifi_noproto);
-	CP(*src, *dst, ifi_hwassist);
-	CP(*src, *dst, ifi_epoch);
-	TV_CP(*src, *dst, ifi_lastchange);
-}
-#endif
-
 static int
 sysctl_iflist_ifml(struct ifnet *ifp, struct rt_addrinfo *info,
     struct walkarg *w, int len)
 {
 	struct if_msghdrl *ifm;
+	struct if_data *ifd;
+
+	ifm = (struct if_msghdrl *)w->w_tmem;
 
 #ifdef COMPAT_FREEBSD32
 	if (w->w_req->flags & SCTL_MASK32) {
 		struct if_msghdrl32 *ifm32;
 
-		ifm32 = (struct if_msghdrl32 *)w->w_tmem;
+		ifm32 = (struct if_msghdrl32 *)ifm;
 		ifm32->ifm_addrs = info->rti_addrs;
 		ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
 		ifm32->ifm_index = ifp->if_index;
 		ifm32->_ifm_spare1 = 0;
 		ifm32->ifm_len = sizeof(*ifm32);
 		ifm32->ifm_data_off = offsetof(struct if_msghdrl32, ifm_data);
-
-		copy_ifdata32(&ifp->if_data, &ifm32->ifm_data);
-
-		return (SYSCTL_OUT(w->w_req, (caddr_t)ifm32, len));
-	}
+		ifd = &ifm32->ifm_data;
+	} else
 #endif
-	ifm = (struct if_msghdrl *)w->w_tmem;
-	ifm->ifm_addrs = info->rti_addrs;
-	ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
-	ifm->ifm_index = ifp->if_index;
-	ifm->_ifm_spare1 = 0;
-	ifm->ifm_len = sizeof(*ifm);
-	ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data);
+	{
+		ifm->ifm_addrs = info->rti_addrs;
+		ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
+		ifm->ifm_index = ifp->if_index;
+		ifm->_ifm_spare1 = 0;
+		ifm->ifm_len = sizeof(*ifm);
+		ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data);
+		ifd = &ifm->ifm_data;
+	}
 
-	ifm->ifm_data = ifp->if_data;
+	if_data_copy(ifp, ifd);
 
 	return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
 }
@@ -1619,27 +1620,29 @@ sysctl_iflist_ifm(struct ifnet *ifp, struct rt_addrinfo *info,
     struct walkarg *w, int len)
 {
 	struct if_msghdr *ifm;
+	struct if_data *ifd;
+
+	ifm = (struct if_msghdr *)w->w_tmem;
 
 #ifdef COMPAT_FREEBSD32
 	if (w->w_req->flags & SCTL_MASK32) {
 		struct if_msghdr32 *ifm32;
 
-		ifm32 = (struct if_msghdr32 *)w->w_tmem;
+		ifm32 = (struct if_msghdr32 *)ifm;
 		ifm32->ifm_addrs = info->rti_addrs;
 		ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
 		ifm32->ifm_index = ifp->if_index;
-
-		copy_ifdata32(&ifp->if_data, &ifm32->ifm_data);
-
-		return (SYSCTL_OUT(w->w_req, (caddr_t)ifm32, len));
-	}
+		ifd = &ifm32->ifm_data;
+	} else
 #endif
-	ifm = (struct if_msghdr *)w->w_tmem;
-	ifm->ifm_addrs = info->rti_addrs;
-	ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
-	ifm->ifm_index = ifp->if_index;
+	{
+		ifm->ifm_addrs = info->rti_addrs;
+		ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
+		ifm->ifm_index = ifp->if_index;
+		ifd = &ifm->ifm_data;
+	}
 
-	ifm->ifm_data = ifp->if_data;
+	if_data_copy(ifp, ifd);
 
 	return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
 }
@@ -1649,12 +1652,15 @@ sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info,
     struct walkarg *w, int len)
 {
 	struct ifa_msghdrl *ifam;
+	struct if_data *ifd;
+
+	ifam = (struct ifa_msghdrl *)w->w_tmem;
 
 #ifdef COMPAT_FREEBSD32
 	if (w->w_req->flags & SCTL_MASK32) {
 		struct ifa_msghdrl32 *ifam32;
 
-		ifam32 = (struct ifa_msghdrl32 *)w->w_tmem;
+		ifam32 = (struct ifa_msghdrl32 *)ifam;
 		ifam32->ifam_addrs = info->rti_addrs;
 		ifam32->ifam_flags = ifa->ifa_flags;
 		ifam32->ifam_index = ifa->ifa_ifp->if_index;
@@ -1662,24 +1668,31 @@ sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info,
 		ifam32->ifam_len = sizeof(*ifam32);
 		ifam32->ifam_data_off =
 		    offsetof(struct ifa_msghdrl32, ifam_data);
-		ifam32->ifam_metric = ifa->ifa_metric;
-
-		copy_ifdata32(&ifa->ifa_ifp->if_data, &ifam32->ifam_data);
-
-		return (SYSCTL_OUT(w->w_req, (caddr_t)ifam32, len));
-	}
+		ifam32->ifam_metric = ifa->ifa_ifp->if_metric;
+		ifd = &ifam32->ifam_data;
+	} else
 #endif
+	{
+		ifam->ifam_addrs = info->rti_addrs;
+		ifam->ifam_flags = ifa->ifa_flags;
+		ifam->ifam_index = ifa->ifa_ifp->if_index;
+		ifam->_ifam_spare1 = 0;
+		ifam->ifam_len = sizeof(*ifam);
+		ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data);
+		ifam->ifam_metric = ifa->ifa_ifp->if_metric;
+		ifd = &ifam->ifam_data;
+	}
 
-	ifam = (struct ifa_msghdrl *)w->w_tmem;
-	ifam->ifam_addrs = info->rti_addrs;
-	ifam->ifam_flags = ifa->ifa_flags;
-	ifam->ifam_index = ifa->ifa_ifp->if_index;
-	ifam->_ifam_spare1 = 0;
-	ifam->ifam_len = sizeof(*ifam);
-	ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data);
-	ifam->ifam_metric = ifa->ifa_metric;
+	bzero(ifd, sizeof(*ifd));
+	ifd->ifi_datalen = sizeof(struct if_data);
+	ifd->ifi_ipackets = counter_u64_fetch(ifa->ifa_ipackets);
+	ifd->ifi_opackets = counter_u64_fetch(ifa->ifa_opackets);
+	ifd->ifi_ibytes = counter_u64_fetch(ifa->ifa_ibytes);
+	ifd->ifi_obytes = counter_u64_fetch(ifa->ifa_obytes);
 
-	ifam->ifam_data = ifa->if_data;
+	/* Fixup if_data carp(4) vhid. */
+	if (carp_get_vhid_p != NULL)
+		ifd->ifi_vhid = (*carp_get_vhid_p)(ifa);
 
 	return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
 }
@@ -1694,7 +1707,7 @@ sysctl_iflist_ifam(struct ifaddr *ifa, struct rt_addrinfo *info,
 	ifam->ifam_addrs = info->rti_addrs;
 	ifam->ifam_flags = ifa->ifa_flags;
 	ifam->ifam_index = ifa->ifa_ifp->if_index;
-	ifam->ifam_metric = ifa->ifa_metric;
+	ifam->ifam_metric = ifa->ifa_ifp->if_metric;
 
 	return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
 }
@@ -1706,16 +1719,19 @@ sysctl_iflist(int af, struct walkarg *w)
 	struct ifaddr *ifa;
 	struct rt_addrinfo info;
 	int len, error = 0;
+	struct sockaddr_storage ss;
 
 	bzero((caddr_t)&info, sizeof(info));
-	IFNET_RLOCK();
+	IFNET_RLOCK_NOSLEEP();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if (w->w_arg && w->w_arg != ifp->if_index)
 			continue;
 		IF_ADDR_RLOCK(ifp);
 		ifa = ifp->if_addr;
 		info.rti_info[RTAX_IFP] = ifa->ifa_addr;
-		len = rt_msg2(RTM_IFINFO, &info, NULL, w);
+		error = rtsock_msg_buffer(RTM_IFINFO, &info, w, &len);
+		if (error != 0)
+			goto done;
 		info.rti_info[RTAX_IFP] = NULL;
 		if (w->w_req && w->w_tmem) {
 			if (w->w_op == NET_RT_IFLISTL)
@@ -1732,9 +1748,12 @@ sysctl_iflist(int af, struct walkarg *w)
 			    ifa->ifa_addr) != 0)
 				continue;
 			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
-			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
+			info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(
+			    ifa->ifa_addr, ifa->ifa_netmask, &ss);
 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
-			len = rt_msg2(RTM_NEWADDR, &info, NULL, w);
+			error = rtsock_msg_buffer(RTM_NEWADDR, &info, w, &len);
+			if (error != 0)
+				goto done;
 			if (w->w_req && w->w_tmem) {
 				if (w->w_op == NET_RT_IFLISTL)
 					error = sysctl_iflist_ifaml(ifa, &info,
@@ -1747,13 +1766,14 @@ sysctl_iflist(int af, struct walkarg *w)
 			}
 		}
 		IF_ADDR_RUNLOCK(ifp);
-		info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
-			info.rti_info[RTAX_BRD] = NULL;
+		info.rti_info[RTAX_IFA] = NULL;
+		info.rti_info[RTAX_NETMASK] = NULL;
+		info.rti_info[RTAX_BRD] = NULL;
 	}
 done:
 	if (ifp != NULL)
 		IF_ADDR_RUNLOCK(ifp);
-	IFNET_RUNLOCK();
+	IFNET_RUNLOCK_NOSLEEP();
 	return (error);
 }
 
@@ -1767,7 +1787,7 @@ sysctl_ifmalist(int af, struct walkarg *w)
 	struct ifaddr *ifa;
 
 	bzero((caddr_t)&info, sizeof(info));
-	IFNET_RLOCK();
+	IFNET_RLOCK_NOSLEEP();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if (w->w_arg && w->w_arg != ifp->if_index)
 			continue;
@@ -1784,7 +1804,9 @@ sysctl_ifmalist(int af, struct walkarg *w)
 			info.rti_info[RTAX_GATEWAY] =
 			    (ifma->ifma_addr->sa_family != AF_LINK) ?
 			    ifma->ifma_lladdr : NULL;
-			len = rt_msg2(RTM_NEWMADDR, &info, NULL, w);
+			error = rtsock_msg_buffer(RTM_NEWMADDR, &info, w, &len);
+			if (error != 0)
+				goto done;
 			if (w->w_req && w->w_tmem) {
 				struct ifma_msghdr *ifmam;
 
@@ -1802,7 +1824,7 @@ sysctl_ifmalist(int af, struct walkarg *w)
 		IF_ADDR_RUNLOCK(ifp);
 	}
 done:
-	IFNET_RUNLOCK();
+	IFNET_RUNLOCK_NOSLEEP();
 	return (error);
 }
 
@@ -1811,7 +1833,7 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
 {
 	int	*name = (int *)arg1;
 	u_int	namelen = arg2;
-	struct radix_node_head *rnh = NULL; /* silence compiler. */
+	struct rib_head *rnh = NULL; /* silence compiler. */
 	int	i, lim, error = EINVAL;
 	int	fib = 0;
 	u_char	af;
@@ -1852,6 +1874,14 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error)
 		return (error);
+	
+	/*
+	 * Allocate reply buffer in advance.
+	 * All rtsock messages has maximum length of u_short.
+	 */
+	w.w_tmemsize = 65536;
+	w.w_tmem = malloc(w.w_tmemsize, M_TEMP, M_WAITOK);
+
 	switch (w.w_op) {
 
 	case NET_RT_DUMP:
@@ -1880,10 +1910,10 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
 		for (error = 0; error == 0 && i <= lim; i++) {
 			rnh = rt_tables_get_rnh(fib, i);
 			if (rnh != NULL) {
-				RADIX_NODE_HEAD_RLOCK(rnh); 
-			    	error = rnh->rnh_walktree(rnh,
+				RIB_RLOCK(rnh); 
+			    	error = rnh->rnh_walktree(&rnh->head,
 				    sysctl_dumpentry, &w);
-				RADIX_NODE_HEAD_RUNLOCK(rnh);
+				RIB_RUNLOCK(rnh);
 			} else if (af != 0)
 				error = EAFNOSUPPORT;
 		}
@@ -1898,8 +1928,8 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
 		error = sysctl_ifmalist(af, &w);
 		break;
 	}
-	if (w.w_tmem)
-		free(w.w_tmem, M_RTABLE);
+
+	free(w.w_tmem, M_TEMP);
 	return (error);
 }
 
@@ -1927,7 +1957,7 @@ static struct domain routedomain = {
 	.dom_family =		PF_ROUTE,
 	.dom_name =		 "route",
 	.dom_protosw =		routesw,
-	.dom_protoswNPROTOSW =	&routesw[sizeof(routesw)/sizeof(routesw[0])]
+	.dom_protoswNPROTOSW =	&routesw[nitems(routesw)]
 };
 
 VNET_DOMAIN_SET(route);
diff --git a/freebsd/sys/net/sff8436.h b/freebsd/sys/net/sff8436.h
new file mode 100644
index 00000000..3399cce5
--- /dev/null
+++ b/freebsd/sys/net/sff8436.h
@@ -0,0 +1,213 @@
+/*-
+ * Copyright (c) 2014 Yandex LLC.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * The following set of constants are from Document SFF-8436
+ * "QSFP+ 10 Gbs 4X PLUGGABLE TRANSCEIVER" revision 4.8 dated October 31, 2013
+ *
+ * This SFF standard defines the following QSFP+ memory address module:
+ *
+ * 1) 256-byte addressable block and 128-byte pages
+ * 2) Lower 128-bytes addresses always refer to the same page
+ * 3) Upper address space may refer to different pages depending on
+ *   "page select" byte value.
+ *
+ * Map description:
+ *
+ * Serial address 0xA02:
+ *
+ * Lower bits
+ * 0-127   Monitoring data & page select byte
+ * 128-255:
+ *
+ * Page 00:
+ * 128-191 Base ID Fields
+ * 191-223 Extended ID
+ * 223-255 Vendor Specific ID
+ *
+ * Page 01 (optional):
+ * 128-255 App-specific data
+ *
+ * Page 02 (optional):
+ * 128-255 User EEPROM Data
+ *
+ * Page 03 (optional for Cable Assmeblies)
+ * 128-223 Thresholds
+ * 225-237 Vendor Specific
+ * 238-253 Channel Controls/Monitor
+ * 254-255 Reserverd
+ *
+ * All these values are read across an I2C (i squared C) bus.
+ */
+
+#define	SFF_8436_BASE	0xA0	/* Base address for all requests */
+
+/* Table 17 - Lower Memory Map */
+enum {
+	SFF_8436_MID		= 0,	/* Copy of SFF_8436_ID field */
+	SFF_8436_STATUS		= 1,	/* 2-bytes status (Table 18) */
+	SFF_8436_INTR_START	= 3,	/* Interrupt flags (Tables 19-21) */
+	SFF_8436_INTR_END	= 21,
+	SFF_8436_MODMON_START	= 22,	/* Module monitors (Table 22 */
+	SFF_8436_TEMP		= 22,	/* Internally measured module temp */
+	SFF_8436_VCC		= 26,	/* Internally mesasure module
+					* supplied voltage */
+	SFF_8436_MODMON_END	= 33,
+	SFF_8436_CHMON_START	= 34,	/* Channel monitors (Table 23) */
+	SFF_8436_RX_CH1_MSB	= 34,	/* Internally measured RX input power */
+	SFF_8436_RX_CH1_LSB	= 35,	/* for channel 1 */
+	SFF_8436_RX_CH2_MSB	= 36,	/* Internally measured RX input power */
+	SFF_8436_RX_CH2_LSB	= 37,	/* for channel 2 */
+	SFF_8436_RX_CH3_MSB	= 38,	/* Internally measured RX input power */
+	SFF_8436_RX_CH3_LSB	= 39,	/* for channel 3 */
+	SFF_8436_RX_CH4_MSB	= 40,	/* Internally measured RX input power */
+	SFF_8436_RX_CH4_LSB	= 41,	/* for channel 4 */
+	SFF_8436_TX_CH1_MSB	= 42,	/* Internally measured TX bias */
+	SFF_8436_TX_CH1_LSB	= 43,	/* for channel 1 */
+	SFF_8436_TX_CH2_MSB	= 44,	/* Internally measured TX bias */
+	SFF_8436_TX_CH2_LSB	= 45,	/* for channel 2 */
+	SFF_8436_TX_CH3_MSB	= 46,	/* Internally measured TX bias */
+	SFF_8436_TX_CH3_LSB	= 47,	/* for channel 3 */
+	SFF_8436_TX_CH4_MSB	= 48,	/* Internally measured TX bias */
+	SFF_8436_TX_CH4_LSB	= 49,	/* for channel 4 */
+	SFF_8436_CHANMON_END	= 81,
+	SFF_8436_CONTROL_START	= 86,	/* Control (Table 24) */
+	SFF_8436_CONTROL_END	= 97,
+	SFF_8436_MASKS_START	= 100,	/* Module/channel masks (Table 25) */
+	SFF_8436_MASKS_END	= 106,
+	SFF_8436_CHPASSWORD	= 119,	/* Password change entry (4 bytes) */
+	SFF_8436_PASSWORD	= 123,	/* Password entry area (4 bytes) */
+	SFF_8436_PAGESEL	= 127,	/* Page select byte */
+};
+
+/* Table 18 - Status Indicators bits */
+/* Byte 1: all bits reserved */
+
+/* Byte 2 bits */
+#define	SFF_8436_STATUS_FLATMEM	(1 << 2)	/* Upper memory flat or paged
+						* 0 = paging, 1=Page 0 only */
+#define	SFF_8436_STATUS_INTL	(1 << 1)	/* Digital state of the intL
+						* Interrupt output pin */
+#define	SFF_8436_STATUS_NOTREADY 1		/* Module has not yet achieved
+						* power up and memory data is not
+						* ready. 0=data is ready */
+/*
+ * Upper page 0 definitions:
+ * Table 29 - Serial ID: Data fields.
+ *
+ * Note that this table is mostly the same as used in SFF-8472.
+ * The only differenee is address shift: +128 bytes.
+ */
+enum {
+	SFF_8436_ID		= 128,  /* Module Type (defined in sff8472.h) */
+	SFF_8436_EXT_ID		= 129,  /* Extended transceiver type
+					 * (Table 31) */
+	SFF_8436_CONNECTOR	= 130,  /* Connector type (Table 32) */
+	SFF_8436_TRANS_START	= 131,  /* Electric or Optical Compatibility
+					 * (Table 33) */
+	SFF_8436_CODE_E1040100G	= 131,	/* 10/40/100G Ethernet Compliance Code */
+	SFF_8436_CODE_SONET	= 132,	/* SONET Compliance codes */
+	SFF_8436_CODE_SATA	= 133,	/* SAS/SATA compliance codes */
+	SFF_8436_CODE_E1G	= 134,	/* Gigabit Ethernet Compliant codes */
+	SFF_8436_CODE_FC_START	= 135,	/* FC link/media/speed */
+	SFF_8436_CODE_FC_END	= 138,
+	SFF_8436_TRANS_END	= 138,
+	SFF_8436_ENCODING	= 139,	/* Encoding Code for high speed
+					* serial encoding algorithm (see
+					* Table 34) */
+	SFF_8436_BITRATE	= 140,	/* Nominal signaling rate, units
+					* of 100MBd. */
+	SFF_8436_RATEID		= 141,	/* Extended RateSelect Compliance
+					* (see Table 35) */
+	SFF_8436_LEN_SMF_KM	= 142,	/* Link length supported for single
+					* mode fiber, units of km */
+	SFF_8436_LEN_OM3	= 143,	/* Link length supported for 850nm
+					* 50um multimode fiber, units of 2 m */
+	SFF_8436_LEN_OM2	= 144, 	/* Link length supported for 50 um
+					* OM2 fiber, units of 1 m */
+	SFF_8436_LEN_OM1	= 145,	/* Link length supported for 1310 nm
+					 * 50um multi-mode fiber, units of 1m*/
+	SFF_8436_LEN_ASM	= 144, /* Link length of passive cable assembly
+					* Length is specified as in the INF
+					* 8074, units of 1m. 0 means this is
+					* not value assembly. Value of 255
+					* means thet the Module supports length
+					* greater than 254 m. */
+	SFF_8436_DEV_TECH	= 147,	/* Device/transmitter technology,
+					* see Table 36/37 */
+	SFF_8436_VENDOR_START	= 148,	/* Vendor name, 16 bytes, padded
+					* right with 0x20 */
+	SFF_8436_VENDOR_END	= 163,
+	SFF_8436_EXTMODCODE	= 164,	/* Extended module code, Table 164 */
+	SFF_8436_VENDOR_OUI_START	= 165 , /* Vendor OUI SFP vendor IEEE
+					* company ID */
+	SFF_8436_VENDOR_OUI_END	= 167,
+	SFF_8436_PN_START 	= 168,	/* Vendor PN, padded right with 0x20 */
+	SFF_8436_PN_END 	= 183,
+	SFF_8436_REV_START 	= 184,	/* Vendor Revision, padded right 0x20 */
+	SFF_8436_REV_END 	= 185,
+	SFF_8436_WAVELEN_START	= 186,	/* Wavelength Laser wavelength
+					* (Passive/Active Cable
+					* Specification Compliance) */
+	SFF_8436_WAVELEN_END	= 189,
+	SFF_8436_MAX_CASE_TEMP	= 190,	/* Allows to specify maximum temp
+					* above 70C. Maximum case temperature is
+					* an 8-bit value in Degrees C. A value
+					*of 0 implies the standard 70C rating.*/
+	SFF_8436_CC_BASE	= 191,	/* CC_BASE Check code for Base ID
+					* Fields (first 63 bytes) */
+	/* Extended ID fields */
+	SFF_8436_OPTIONS_START	= 192, /* Options Indicates which optional
+					* transceiver signals are
+					* implemented (see Table 39) */
+	SFF_8436_OPTIONS_END	= 195,
+	SFF_8436_SN_START 	= 196,	/* Vendor SN, riwght padded with 0x20 */
+	SFF_8436_SN_END 	= 211,
+	SFF_8436_DATE_START	= 212,	/* Vendor’s manufacturing date code
+					* (see Table 40) */
+	SFF_8436_DATE_END	= 219,
+	SFF_8436_DIAG_TYPE	= 220,	/* Diagnostic Monitoring Type
+					* Indicates which type of
+					* diagnostic monitoring is
+					* implemented (if any) in the
+					* transceiver (see Table 41) */
+
+	SFF_8436_ENHANCED	= 221,	/* Enhanced Options Indicates which
+					* optional features are implemented
+					* (if any) in the transceiver
+					* (see Table 42) */
+	SFF_8636_BITRATE	= 222,	/* Nominal bit rate per channel, units
+					* of 250 Mbps */
+	SFF_8436_CC_EXT		= 223,	/* Check code for the Extended ID
+					* Fields (bytes 192-222 incl) */
+	SFF_8436_VENDOR_RSRVD_START	= 224,
+	SFF_8436_VENDOR_RSRVD_END	= 255,
+};
+
+
diff --git a/freebsd/sys/net/sff8472.h b/freebsd/sys/net/sff8472.h
new file mode 100644
index 00000000..5c50ea46
--- /dev/null
+++ b/freebsd/sys/net/sff8472.h
@@ -0,0 +1,508 @@
+/*-
+ * Copyright (c) 2013 George V. Neville-Neil
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * The following set of constants are from Document SFF-8472
+ * "Diagnostic Monitoring Interface for Optical Transceivers" revision
+ * 11.3 published by the SFF Committee on June 11, 2013
+ *
+ * The SFF standard defines two ranges of addresses, each 255 bytes
+ * long for the storage of data and diagnostics on cables, such as
+ * SFP+ optics and TwinAx cables.  The ranges are defined in the
+ * following way:
+ *
+ * Base Address 0xa0 (Identification Data)
+ * 0-95    Serial ID Defined by SFP MSA
+ * 96-127  Vendor Specific Data
+ * 128-255 Reserved
+ *
+ * Base Address 0xa2 (Diagnostic Data)
+ * 0-55    Alarm and Warning Thresholds
+ * 56-95   Cal Constants
+ * 96-119  Real Time Diagnostic Interface
+ * 120-127 Vendor Specific
+ * 128-247 User Writable EEPROM
+ * 248-255 Vendor Specific
+ *
+ * Note that not all addresses are supported.  Where support is
+ * optional this is noted and instructions for checking for the
+ * support are supplied.
+ *
+ * All these values are read across an I2C (i squared C) bus.  Any
+ * device wishing to read these addresses must first have support for
+ * i2c calls.  The Chelsio T4/T5 driver (dev/cxgbe) is one such
+ * driver.
+ */
+
+
+/* Table 3.1 Two-wire interface ID: Data Fields */
+
+enum {
+	SFF_8472_BASE 		= 0xa0,   /* Base address for all our queries. */
+	SFF_8472_ID		= 0,  /* Transceiver Type (Table 3.2) */
+	SFF_8472_EXT_ID		= 1,  /* Extended transceiver type (Table 3.3) */
+	SFF_8472_CONNECTOR	= 2,  /* Connector type (Table 3.4) */
+	SFF_8472_TRANS_START	= 3,  /* Elec or Optical Compatibility
+				    * (Table 3.5) */
+	SFF_8472_TRANS_END	= 10,
+	SFF_8472_ENCODING	= 11, /* Encoding Code for high speed
+				     * serial encoding algorithm (see
+				     * Table 3.6) */
+	SFF_8472_BITRATE	= 12, /* Nominal signaling rate, units
+				     * of 100MBd.  (see details for
+				     * rates > 25.0Gb/s) */
+	SFF_8472_RATEID		= 13, /* Type of rate select
+				     * functionality (see Table
+				     * 3.6a) */
+	SFF_8472_LEN_SMF_KM	= 14, /* Link length supported for single
+				    * mode fiber, units of km */
+	SFF_8472_LEN_SMF	= 15, /* Link length supported for single
+				    * mode fiber, units of 100 m */
+	SFF_8472_LEN_50UM	= 16, /* Link length supported for 50 um
+				    * OM2 fiber, units of 10 m */
+	SFF_8472_LEN_625UM	= 17, /* Link length supported for 62.5
+				    * um OM1 fiber, units of 10 m */
+	SFF_8472_LEN_OM4	= 18, /* Link length supported for 50um
+				    * OM4 fiber, units of 10m.
+				    * Alternatively copper or direct
+				    * attach cable, units of m */
+	SFF_8472_LEN_OM3	= 19, /* Link length supported for 50 um OM3 fiber, units of 10 m */
+	SFF_8472_VENDOR_START 	= 20, /* Vendor name [Address A0h, Bytes
+				    * 20-35] */
+	SFF_8472_VENDOR_END 	= 35,
+	SFF_8472_TRANS		= 36, /* Transceiver Code for electronic
+				    * or optical compatibility (see
+				    * Table 3.5) */
+	SFF_8472_VENDOR_OUI_START	= 37, /* Vendor OUI SFP vendor IEEE
+				    * company ID */
+	SFF_8472_VENDOR_OUI_END	= 39,
+	SFF_8472_PN_START 	= 40, /* Vendor PN */
+	SFF_8472_PN_END 	= 55,
+	SFF_8472_REV_START 	= 56, /* Vendor Revision */
+	SFF_8472_REV_END 	= 59,
+	SFF_8472_WAVELEN_START	= 60, /* Wavelength Laser wavelength
+				    * (Passive/Active Cable
+				    * Specification Compliance) */
+	SFF_8472_WAVELEN_END	= 61,
+	SFF_8472_CC_BASE	= 63, /* CC_BASE Check code for Base ID
+				    * Fields (addresses 0 to 62) */
+
+/*
+ * Extension Fields (optional) check the options before reading other
+ * addresses.
+ */
+	SFF_8472_OPTIONS_MSB	= 64, /* Options Indicates which optional
+				    * transceiver signals are
+				    * implemented */
+	SFF_8472_OPTIONS_LSB	= 65, /* (see Table 3.7) */
+	SFF_8472_BR_MAX		= 66, /* BR max Upper bit rate margin,
+				    * units of % (see details for
+				    * rates > 25.0Gb/s) */
+	SFF_8472_BR_MIN		= 67, /* Lower bit rate margin, units of
+				    * % (see details for rates >
+				    * 25.0Gb/s) */
+	SFF_8472_SN_START 	= 68, /* Vendor SN [Address A0h, Bytes 68-83] */
+	SFF_8472_SN_END 	= 83,
+	SFF_8472_DATE_START	= 84, /* Date code Vendor’s manufacturing
+				    * date code (see Table 3.8) */
+	SFF_8472_DATE_END	= 91,
+	SFF_8472_DIAG_TYPE	= 92, /* Diagnostic Monitoring Type
+				    * Indicates which type of
+				    * diagnostic monitoring is
+				    * implemented (if any) in the
+				    * transceiver (see Table 3.9)
+				    */
+
+	SFF_8472_ENHANCED	= 93, /* Enhanced Options Indicates which
+				    * optional enhanced features are
+				    * implemented (if any) in the
+				    * transceiver (see Table 3.10) */
+	SFF_8472_COMPLIANCE	= 94, /* SFF-8472 Compliance Indicates
+				    * which revision of SFF-8472 the
+				    * transceiver complies with.  (see
+				    * Table 3.12)*/
+	SFF_8472_CC_EXT		= 95, /* Check code for the Extended ID
+				    * Fields (addresses 64 to 94)
+				    */
+
+	SFF_8472_VENDOR_RSRVD_START	= 96,
+	SFF_8472_VENDOR_RSRVD_END	= 127,
+
+	SFF_8472_RESERVED_START	= 128,
+	SFF_8472_RESERVED_END	= 255
+};
+
+#define SFF_8472_DIAG_IMPL	(1 << 6) /* Required to be 1 */
+#define SFF_8472_DIAG_INTERNAL	(1 << 5) /* Internal measurements. */
+#define SFF_8472_DIAG_EXTERNAL	(1 << 4) /* External measurements. */
+#define SFF_8472_DIAG_POWER	(1 << 3) /* Power measurement type */
+#define SFF_8472_DIAG_ADDR_CHG	(1 << 2) /* Address change required.
+					  * See SFF-8472 doc. */
+
+ /*
+  * Diagnostics are available at the two wire address 0xa2.  All
+  * diagnostics are OPTIONAL so you should check 0xa0 registers 92 to
+  * see which, if any are supported.
+  */
+
+enum {SFF_8472_DIAG = 0xa2};  /* Base address for diagnostics. */
+
+ /*
+  *  Table 3.15 Alarm and Warning Thresholds All values are 2 bytes
+  * and MUST be read in a single read operation starting at the MSB
+  */
+
+enum {
+	SFF_8472_TEMP_HIGH_ALM		= 0, /* Temp High Alarm  */
+	SFF_8472_TEMP_LOW_ALM		= 2, /* Temp Low Alarm */
+	SFF_8472_TEMP_HIGH_WARN		= 4, /* Temp High Warning */
+	SFF_8472_TEMP_LOW_WARN		= 6, /* Temp Low Warning */
+	SFF_8472_VOLTAGE_HIGH_ALM	= 8, /* Voltage High Alarm */
+	SFF_8472_VOLTAGE_LOW_ALM	= 10, /* Voltage Low Alarm */
+	SFF_8472_VOLTAGE_HIGH_WARN	= 12, /* Voltage High Warning */
+	SFF_8472_VOLTAGE_LOW_WARN	= 14, /* Voltage Low Warning */
+	SFF_8472_BIAS_HIGH_ALM		= 16, /* Bias High Alarm */
+	SFF_8472_BIAS_LOW_ALM		= 18, /* Bias Low Alarm */
+	SFF_8472_BIAS_HIGH_WARN		= 20, /* Bias High Warning */
+	SFF_8472_BIAS_LOW_WARN		= 22, /* Bias Low Warning */
+	SFF_8472_TX_POWER_HIGH_ALM	= 24, /* TX Power High Alarm */
+	SFF_8472_TX_POWER_LOW_ALM	= 26, /* TX Power Low Alarm */
+	SFF_8472_TX_POWER_HIGH_WARN	= 28, /* TX Power High Warning */
+	SFF_8472_TX_POWER_LOW_WARN	= 30, /* TX Power Low Warning */
+	SFF_8472_RX_POWER_HIGH_ALM	= 32, /* RX Power High Alarm */
+	SFF_8472_RX_POWER_LOW_ALM	= 34, /* RX Power Low Alarm */
+	SFF_8472_RX_POWER_HIGH_WARN	= 36, /* RX Power High Warning */
+	SFF_8472_RX_POWER_LOW_WARN	= 38, /* RX Power Low Warning */
+
+	SFF_8472_RX_POWER4	= 56, /* Rx_PWR(4) Single precision
+				    *  floating point calibration data
+				    *  - Rx optical power. Bit 7 of
+				    *  byte 56 is MSB. Bit 0 of byte
+				    *  59 is LSB. Rx_PWR(4) should be
+				    *  set to zero for “internally
+				    *  calibrated” devices. */
+	SFF_8472_RX_POWER3	= 60, /* Rx_PWR(3) Single precision
+				    * floating point calibration data
+				    * - Rx optical power.  Bit 7 of
+				    * byte 60 is MSB. Bit 0 of byte 63
+				    * is LSB. Rx_PWR(3) should be set
+				    * to zero for “internally
+				    * calibrated” devices.*/
+	SFF_8472_RX_POWER2	= 64, /* Rx_PWR(2) Single precision
+				    * floating point calibration data,
+				    * Rx optical power.  Bit 7 of byte
+				    * 64 is MSB, bit 0 of byte 67 is
+				    * LSB. Rx_PWR(2) should be set to
+				    * zero for “internally calibrated”
+				    * devices. */
+	SFF_8472_RX_POWER1	= 68, /* Rx_PWR(1) Single precision
+				    * floating point calibration data,
+				    * Rx optical power. Bit 7 of byte
+				    * 68 is MSB, bit 0 of byte 71 is
+				    * LSB. Rx_PWR(1) should be set to
+				    * 1 for “internally calibrated”
+				    * devices. */
+	SFF_8472_RX_POWER0	= 72, /* Rx_PWR(0) Single precision
+				    * floating point calibration data,
+				    * Rx optical power. Bit 7 of byte
+				    * 72 is MSB, bit 0 of byte 75 is
+				    * LSB. Rx_PWR(0) should be set to
+				    * zero for “internally calibrated”
+				    * devices. */
+	SFF_8472_TX_I_SLOPE	= 76, /* Tx_I(Slope) Fixed decimal
+				    * (unsigned) calibration data,
+				    * laser bias current. Bit 7 of
+				    * byte 76 is MSB, bit 0 of byte 77
+				    * is LSB. Tx_I(Slope) should be
+				    * set to 1 for “internally
+				    * calibrated” devices. */
+	SFF_8472_TX_I_OFFSET	= 78, /* Tx_I(Offset) Fixed decimal
+				    * (signed two’s complement)
+				    * calibration data, laser bias
+				    * current. Bit 7 of byte 78 is
+				    * MSB, bit 0 of byte 79 is
+				    * LSB. Tx_I(Offset) should be set
+				    * to zero for “internally
+				    * calibrated” devices. */
+	SFF_8472_TX_POWER_SLOPE	= 80, /* Tx_PWR(Slope) Fixed decimal
+				    * (unsigned) calibration data,
+				    * transmitter coupled output
+				    * power. Bit 7 of byte 80 is MSB,
+				    * bit 0 of byte 81 is LSB.
+				    * Tx_PWR(Slope) should be set to 1
+				    * for “internally calibrated”
+				    * devices. */
+	SFF_8472_TX_POWER_OFFSET	= 82, /* Tx_PWR(Offset) Fixed decimal
+					    * (signed two’s complement)
+					    * calibration data, transmitter
+					    * coupled output power. Bit 7 of
+					    * byte 82 is MSB, bit 0 of byte 83
+					    * is LSB. Tx_PWR(Offset) should be
+					    * set to zero for “internally
+					    * calibrated” devices. */
+	SFF_8472_T_SLOPE	= 84, /* T (Slope) Fixed decimal
+				    * (unsigned) calibration data,
+				    * internal module temperature. Bit
+				    * 7 of byte 84 is MSB, bit 0 of
+				    * byte 85 is LSB.  T(Slope) should
+				    * be set to 1 for “internally
+				    * calibrated” devices. */
+	SFF_8472_T_OFFSET	= 86, /* T (Offset) Fixed decimal (signed
+				    * two’s complement) calibration
+				    * data, internal module
+				    * temperature. Bit 7 of byte 86 is
+				    * MSB, bit 0 of byte 87 is LSB.
+				    * T(Offset) should be set to zero
+				    * for “internally calibrated”
+				    * devices. */
+	SFF_8472_V_SLOPE	= 88, /* V (Slope) Fixed decimal
+				    * (unsigned) calibration data,
+				    * internal module supply
+				    * voltage. Bit 7 of byte 88 is
+				    * MSB, bit 0 of byte 89 is
+				    * LSB. V(Slope) should be set to 1
+				    * for “internally calibrated”
+				    * devices. */
+	SFF_8472_V_OFFSET	= 90, /* V (Offset) Fixed decimal (signed
+				    * two’s complement) calibration
+				    * data, internal module supply
+				    * voltage. Bit 7 of byte 90 is
+				    * MSB. Bit 0 of byte 91 is
+				    * LSB. V(Offset) should be set to
+				    * zero for “internally calibrated”
+				    * devices. */
+	SFF_8472_CHECKSUM	= 95, /* Checksum Byte 95 contains the
+				    * low order 8 bits of the sum of
+				    * bytes 0 – 94. */
+	/* Internal measurements. */
+
+	SFF_8472_TEMP	 	= 96, /* Internally measured module temperature. */
+	SFF_8472_VCC 		= 98, /* Internally measured supply
+				    * voltage in transceiver.
+				    */
+	SFF_8472_TX_BIAS	= 100, /* Internally measured TX Bias Current. */
+	SFF_8472_TX_POWER	= 102, /* Measured TX output power. */
+	SFF_8472_RX_POWER	= 104, /* Measured RX input power. */
+
+	SFF_8472_STATUS		= 110 /* See below */
+};
+ /* Status Bits Described */
+
+/*
+ * TX Disable State Digital state of the TX Disable Input Pin. Updated
+ * within 100ms of change on pin.
+ */
+#define SFF_8472_STATUS_TX_DISABLE  (1 << 7) 
+
+/*
+ * Select Read/write bit that allows software disable of
+ * laser. Writing ‘1’ disables laser. See Table 3.11 for
+ * enable/disable timing requirements. This bit is “OR”d with the hard
+ * TX_DISABLE pin value. Note, per SFP MSA TX_DISABLE pin is default
+ * enabled unless pulled low by hardware. If Soft TX Disable is not
+ * implemented, the transceiver ignores the value of this bit. Default
+ * power up value is zero/low.
+ */
+#define SFF_8472_STATUS_SOFT_TX_DISABLE (1 << 6) 
+
+/*
+ * RS(1) State Digital state of SFP input pin AS(1) per SFF-8079 or
+ * RS(1) per SFF-8431. Updated within 100ms of change on pin. See A2h
+ * Byte 118, Bit 3 for Soft RS(1) Select control information.
+ */
+#define SFF_8472_RS_STATE (1 << 5) 
+
+/*
+ * Rate_Select State [aka. “RS(0)”] Digital state of the SFP
+ * Rate_Select Input Pin. Updated within 100ms of change on pin. Note:
+ * This pin is also known as AS(0) in SFF-8079 and RS(0) in SFF-8431.
+ */ 
+#define SFF_8472_STATUS_SELECT_STATE (1 << 4)
+     
+/*
+ * Read/write bit that allows software rate select control. Writing
+ * ‘1’ selects full bandwidth operation. This bit is “OR’d with the
+ * hard Rate_Select, AS(0) or RS(0) pin value. See Table 3.11 for
+ * timing requirements. Default at power up is logic zero/low. If Soft
+ * Rate Select is not implemented, the transceiver ignores the value
+ * of this bit. Note: Specific transceiver behaviors of this bit are
+ * identified in Table 3.6a and referenced documents. See Table 3.18a,
+ * byte 118, bit 3 for Soft RS(1) Select.
+ */
+#define SFF_8472_STATUS_SOFT_RATE_SELECT (1 << 3)
+
+/*
+ * TX Fault State Digital state of the TX Fault Output Pin. Updated
+ * within 100ms of change on pin.
+ */
+#define SFF_8472_STATUS_TX_FAULT_STATE (1 << 2)
+
+/*
+ * Digital state of the RX_LOS Output Pin. Updated within 100ms of
+ * change on pin.
+ */
+#define SFF_8472_STATUS_RX_LOS (1 << 1)
+
+/*
+ * Indicates transceiver has achieved power up and data is ready. Bit
+ * remains high until data is ready to be read at which time the
+ * device sets the bit low.
+ */
+#define SFF_8472_STATUS_DATA_READY (1 << 0)
+
+/*
+ * Table 3.2 Identifier values.
+ * Identifier constants has taken from SFF-8024 rev 2.9 table 4.1
+ * (as referenced by table 3.2 footer)
+ * */
+enum {
+	SFF_8024_ID_UNKNOWN	= 0x0, /* Unknown or unspecified */
+	SFF_8024_ID_GBIC	= 0x1, /* GBIC */
+	SFF_8024_ID_SFF		= 0x2, /* Module soldered to motherboard (ex: SFF)*/
+	SFF_8024_ID_SFP		= 0x3, /* SFP or SFP “Plus” */
+	SFF_8024_ID_XBI		= 0x4, /* 300 pin XBI */
+	SFF_8024_ID_XENPAK	= 0x5, /* Xenpak */
+	SFF_8024_ID_XFP		= 0x6, /* XFP */
+	SFF_8024_ID_XFF		= 0x7, /* XFF */
+	SFF_8024_ID_XFPE	= 0x8, /* XFP-E */
+	SFF_8024_ID_XPAK	= 0x9, /* XPAk */
+	SFF_8024_ID_X2		= 0xA, /* X2 */
+	SFF_8024_ID_DWDM_SFP	= 0xB, /* DWDM-SFP */
+	SFF_8024_ID_QSFP	= 0xC, /* QSFP */
+	SFF_8024_ID_QSFPPLUS	= 0xD, /* QSFP+ */
+	SFF_8024_ID_CXP		= 0xE, /* CXP */
+	SFF_8024_ID_HD4X	= 0xF, /* Shielded Mini Multilane HD 4X */ 
+	SFF_8024_ID_HD8X	= 0x10, /* Shielded Mini Multilane HD 8X */ 
+	SFF_8024_ID_QSFP28	= 0x11, /* QSFP28 */
+	SFF_8024_ID_CXP2	= 0x12, /* CXP2 (aka CXP28) */
+	SFF_8024_ID_CDFP	= 0x13, /* CDFP (Style 1/Style 2) */
+	SFF_8024_ID_SMM4	= 0x14, /* Shielded Mini Multilate HD 4X Fanout */
+	SFF_8024_ID_SMM8	= 0x15, /* Shielded Mini Multilate HD 8X Fanout */
+	SFF_8024_ID_CDFP3	= 0x16, /* CDFP (Style3) */
+	SFF_8024_ID_LAST	= SFF_8024_ID_CDFP3
+	};
+
+static const char *sff_8024_id[SFF_8024_ID_LAST + 1] = {"Unknown",
+					     "GBIC",
+					     "SFF",
+					     "SFP/SFP+/SFP28",
+					     "XBI",
+					     "Xenpak",
+					     "XFP",
+					     "XFF",
+					     "XFP-E",
+					     "XPAK",
+					     "X2",
+					     "DWDM-SFP/SFP+",
+					     "QSFP",
+					     "QSFP+",
+					     "CXP",
+					     "HD4X",
+					     "HD8X",
+					     "QSFP28",
+					     "CXP2",
+					     "CDFP",
+					     "SMM4",
+					     "SMM8",
+					     "CDFP3"};
+
+/* Keep compatibility with old definitions */
+#define	SFF_8472_ID_UNKNOWN	SFF_8024_ID_UNKNOWN
+#define	SFF_8472_ID_GBIC	SFF_8024_ID_GBIC
+#define	SFF_8472_ID_SFF		SFF_8024_ID_SFF
+#define	SFF_8472_ID_SFP		SFF_8024_ID_SFP
+#define	SFF_8472_ID_XBI		SFF_8024_ID_XBI
+#define	SFF_8472_ID_XENPAK	SFF_8024_ID_XENPAK
+#define	SFF_8472_ID_XFP		SFF_8024_ID_XFP
+#define	SFF_8472_ID_XFF		SFF_8024_ID_XFF
+#define	SFF_8472_ID_XFPE	SFF_8024_ID_XFPE
+#define	SFF_8472_ID_XPAK	SFF_8024_ID_XPAK
+#define	SFF_8472_ID_X2		SFF_8024_ID_X2
+#define	SFF_8472_ID_DWDM_SFP	SFF_8024_ID_DWDM_SFP
+#define	SFF_8472_ID_QSFP	SFF_8024_ID_QSFP
+#define	SFF_8472_ID_LAST	SFF_8024_ID_LAST
+
+#define	sff_8472_id		sff_8024_id
+
+/*
+ * Table 3.9 Diagnostic Monitoring Type (byte 92)
+ * bits described.
+ */
+
+/*
+ * Digital diagnostic monitoring implemented.
+ * Set to 1 for transceivers implementing DDM.
+ */
+#define	SFF_8472_DDM_DONE	(1 << 6)
+
+/*
+ * Measurements are internally calibrated.
+ */
+#define	SFF_8472_DDM_INTERNAL	(1 << 5)
+
+/*
+ * Measurements are externally calibrated.
+ */
+#define	SFF_8472_DDM_EXTERNAL	(1 << 4)
+
+/*
+ * Received power measurement type
+ * 0 = OMA, 1 = average power
+ */
+#define	SFF_8472_DDM_PMTYPE	(1 << 3)
+
+/* Table 3.13 and 3.14 Temperature Conversion Values */
+#define SFF_8472_TEMP_SIGN (1 << 15)
+#define SFF_8472_TEMP_SHIFT  8
+#define SFF_8472_TEMP_MSK  0xEF00
+#define SFF_8472_TEMP_FRAC 0x00FF
+
+/* Internal Callibration Conversion factors */
+
+/*
+ * Represented as a 16 bit unsigned integer with the voltage defined
+ * as the full 16 bit value (0 – 65535) with LSB equal to 100 uVolt,
+ * yielding a total range of 0 to +6.55 Volts.
+ */
+#define SFF_8472_VCC_FACTOR 10000.0 
+
+/*
+ * Represented as a 16 bit unsigned integer with the current defined
+ * as the full 16 bit value (0 – 65535) with LSB equal to 2 uA,
+ * yielding a total range of 0 to 131 mA.
+ */
+
+#define SFF_8472_BIAS_FACTOR 2000.0 
+
+/*
+ * Represented as a 16 bit unsigned integer with the power defined as
+ * the full 16 bit value (0 – 65535) with LSB equal to 0.1 uW,
+ * yielding a total range of 0 to 6.5535 mW (~ -40 to +8.2 dBm).
+ */
+
+#define SFF_8472_POWER_FACTOR 10000.0
diff --git a/freebsd/sys/net/vnet.h b/freebsd/sys/net/vnet.h
index 01e26cdb..3e186c12 100644
--- a/freebsd/sys/net/vnet.h
+++ b/freebsd/sys/net/vnet.h
@@ -70,6 +70,7 @@ struct vnet {
 	u_int			 vnet_magic_n;
 	u_int			 vnet_ifcnt;
 	u_int			 vnet_sockcnt;
+	u_int			 vnet_state;	/* SI_SUB_* */
 	void			*vnet_data_mem;
 	uintptr_t		 vnet_data_base;
 };
@@ -85,6 +86,61 @@ struct vnet {
 
 #ifdef _KERNEL
 
+#define	VNET_PCPUSTAT_DECLARE(type, name)	\
+    VNET_DECLARE(counter_u64_t, name[sizeof(type) / sizeof(uint64_t)])
+
+#define	VNET_PCPUSTAT_DEFINE(type, name)	\
+    VNET_DEFINE(counter_u64_t, name[sizeof(type) / sizeof(uint64_t)])
+
+#define	VNET_PCPUSTAT_ALLOC(name, wait)	\
+    COUNTER_ARRAY_ALLOC(VNET(name), \
+	sizeof(VNET(name)) / sizeof(counter_u64_t), (wait))
+
+#define	VNET_PCPUSTAT_FREE(name)	\
+    COUNTER_ARRAY_FREE(VNET(name), sizeof(VNET(name)) / sizeof(counter_u64_t))
+
+#define	VNET_PCPUSTAT_ADD(type, name, f, v)	\
+    counter_u64_add(VNET(name)[offsetof(type, f) / sizeof(uint64_t)], (v))
+
+#define	VNET_PCPUSTAT_FETCH(type, name, f)	\
+    counter_u64_fetch(VNET(name)[offsetof(type, f) / sizeof(uint64_t)])
+
+#define	VNET_PCPUSTAT_SYSINIT(name)	\
+static void				\
+vnet_##name##_init(const void *unused)	\
+{					\
+	VNET_PCPUSTAT_ALLOC(name, M_WAITOK);	\
+}					\
+VNET_SYSINIT(vnet_ ## name ## _init, SI_SUB_INIT_IF,			\
+    SI_ORDER_FIRST, vnet_ ## name ## _init, NULL)
+
+#define	VNET_PCPUSTAT_SYSUNINIT(name)					\
+static void								\
+vnet_##name##_uninit(const void *unused)				\
+{									\
+	VNET_PCPUSTAT_FREE(name);					\
+}									\
+VNET_SYSUNINIT(vnet_ ## name ## _uninit, SI_SUB_INIT_IF,		\
+    SI_ORDER_FIRST, vnet_ ## name ## _uninit, NULL)
+
+#ifdef SYSCTL_OID
+#define	SYSCTL_VNET_PCPUSTAT(parent, nbr, name, type, array, desc)	\
+static int								\
+array##_sysctl(SYSCTL_HANDLER_ARGS)					\
+{									\
+	type s;								\
+	CTASSERT((sizeof(type) / sizeof(uint64_t)) ==			\
+	    (sizeof(VNET(array)) / sizeof(counter_u64_t)));		\
+	COUNTER_ARRAY_COPY(VNET(array), &s, sizeof(type) / sizeof(uint64_t));\
+	if (req->newptr)						\
+		COUNTER_ARRAY_ZERO(VNET(array),				\
+		    sizeof(type) / sizeof(uint64_t));			\
+	return (SYSCTL_OUT(req, &s, sizeof(type)));			\
+}									\
+SYSCTL_PROC(parent, nbr, name, CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_RW, \
+    NULL, 0, array ## _sysctl, "I", desc)
+#endif /* SYSCTL_OID */
+
 #ifdef VIMAGE
 #include <rtems/bsd/sys/lock.h>
 #include <sys/proc.h>			/* for struct thread */
@@ -233,53 +289,6 @@ void	 vnet_data_copy(void *start, int size);
 void	 vnet_data_free(void *start_arg, int size);
 
 /*
- * Sysctl variants for vnet-virtualized global variables.  Include
- * <sys/sysctl.h> to expose these definitions.
- *
- * Note: SYSCTL_PROC() handler functions will need to resolve pointer
- * arguments themselves, if required.
- */
-#ifdef SYSCTL_OID
-int	vnet_sysctl_handle_int(SYSCTL_HANDLER_ARGS);
-int	vnet_sysctl_handle_opaque(SYSCTL_HANDLER_ARGS);
-int	vnet_sysctl_handle_string(SYSCTL_HANDLER_ARGS);
-int	vnet_sysctl_handle_uint(SYSCTL_HANDLER_ARGS);
-
-#define	SYSCTL_VNET_INT(parent, nbr, name, access, ptr, val, descr)	\
-	SYSCTL_OID(parent, nbr, name,					\
-	    CTLTYPE_INT|CTLFLAG_MPSAFE|CTLFLAG_VNET|(access),		\
-	    ptr, val, vnet_sysctl_handle_int, "I", descr)
-#define	SYSCTL_VNET_PROC(parent, nbr, name, access, ptr, arg, handler,	\
-	    fmt, descr)							\
-	CTASSERT(((access) & CTLTYPE) != 0);				\
-	SYSCTL_OID(parent, nbr, name, CTLFLAG_VNET|(access), ptr, arg, 	\
-	    handler, fmt, descr)
-#define	SYSCTL_VNET_OPAQUE(parent, nbr, name, access, ptr, len, fmt,    \
-	    descr)							\
-	SYSCTL_OID(parent, nbr, name,					\
-	    CTLTYPE_OPAQUE|CTLFLAG_VNET|(access), ptr, len, 		\
-	    vnet_sysctl_handle_opaque, fmt, descr)
-#define	SYSCTL_VNET_STRING(parent, nbr, name, access, arg, len, descr)	\
-	SYSCTL_OID(parent, nbr, name,					\
-	    CTLTYPE_STRING|CTLFLAG_VNET|(access),			\
-	    arg, len, vnet_sysctl_handle_string, "A", descr)
-#define	SYSCTL_VNET_STRUCT(parent, nbr, name, access, ptr, type, descr)	\
-	SYSCTL_OID(parent, nbr, name,					\
-	    CTLTYPE_OPAQUE|CTLFLAG_VNET|(access), ptr,			\
-	    sizeof(struct type), vnet_sysctl_handle_opaque, "S," #type,	\
-	    descr)
-#define	SYSCTL_VNET_UINT(parent, nbr, name, access, ptr, val, descr)	\
-	SYSCTL_OID(parent, nbr, name,					\
-	    CTLTYPE_UINT|CTLFLAG_MPSAFE|CTLFLAG_VNET|(access),		\
-	    ptr, val, vnet_sysctl_handle_uint, "IU", descr)
-#define	VNET_SYSCTL_ARG(req, arg1) do {					\
-	if (arg1 != NULL)						\
-		arg1 = (void *)(TD_TO_VNET((req)->td)->vnet_data_base +	\
-		    (uintptr_t)(arg1));					\
-} while (0)
-#endif /* SYSCTL_OID */
-
-/*
  * Virtual sysinit mechanism, allowing network stack components to declare
  * startup and shutdown methods to be run when virtual network stack
  * instances are created and destroyed.
@@ -402,29 +411,6 @@ do {									\
 #define	VNET(n)			(n)
 
 /*
- * When VIMAGE isn't compiled into the kernel, virtaulized SYSCTLs simply
- * become normal SYSCTLs.
- */
-#ifdef SYSCTL_OID
-#define	SYSCTL_VNET_INT(parent, nbr, name, access, ptr, val, descr)	\
-	SYSCTL_INT(parent, nbr, name, access, ptr, val, descr)
-#define	SYSCTL_VNET_PROC(parent, nbr, name, access, ptr, arg, handler,	\
-	    fmt, descr)							\
-	SYSCTL_PROC(parent, nbr, name, access, ptr, arg, handler, fmt,	\
-	    descr)
-#define	SYSCTL_VNET_OPAQUE(parent, nbr, name, access, ptr, len, fmt,    \
-	    descr)							\
-	SYSCTL_OPAQUE(parent, nbr, name, access, ptr, len, fmt, descr)
-#define	SYSCTL_VNET_STRING(parent, nbr, name, access, arg, len, descr)	\
-	SYSCTL_STRING(parent, nbr, name, access, arg, len, descr)
-#define	SYSCTL_VNET_STRUCT(parent, nbr, name, access, ptr, type, descr)	\
-	SYSCTL_STRUCT(parent, nbr, name, access, ptr, type, descr)
-#define	SYSCTL_VNET_UINT(parent, nbr, name, access, ptr, val, descr)	\
-	SYSCTL_UINT(parent, nbr, name, access, ptr, val, descr)
-#define	VNET_SYSCTL_ARG(req, arg1)
-#endif /* SYSCTL_OID */
-
-/*
  * When VIMAGE isn't compiled into the kernel, VNET_SYSINIT/VNET_SYSUNINIT
  * map into normal sysinits, which have the same ordering properties.
  */
author	Sebastian Huber <sebastian.huber@embedded-brains.de>	2016-10-07 15:10:20 +0200
committer	Sebastian Huber <sebastian.huber@embedded-brains.de>	2017-01-10 09:53:31 +0100
commit	c40e45b75eb76d79a05c7fa85c1fa9b5c728a12f (patch)
tree	ad4f2519067709f00ab98b3c591186c26dc3a21f /freebsd/sys/net
parent	userspace-header-gen.py: Simplify program ports (diff)
download	rtems-libbsd-c40e45b75eb76d79a05c7fa85c1fa9b5c728a12f.tar.bz2